diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c10135 --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +*.png +*.jpg +*.csv +*.mp4 + +*.env + +*.zip + +__pycache__/ +*.pyc + +data/ + +*.pt +*.pth + +*.wav +*.tar +*.bin + +.conda/ +.venv/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..6cdee9f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "fairseq"] + path = fairseq + url = https://github.com/facebookresearch/fairseq.git \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 77200ba..0000000 --- a/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# NetfLips -[2025-2] textless direct audio-video speech translation ---- - -This repository is built upon [AV2AV](https://github.com/choijeongsoo/av2av?tab=readme-ov-file) and [Fairseq](https://github.com/pytorch/fairseq). We appreciate the open-source of the projects. - diff --git a/README_environment.md b/README_environment.md new file mode 100644 index 0000000..b239083 --- /dev/null +++ b/README_environment.md @@ -0,0 +1,28 @@ +# 1. 환경 설정 +```bash +# 1. 레포지토리 클론 +git clone https://github.com/Prometheus-AI-3team/NetfLips.git + +cd NetfLips + +# 2. 서브모듈(fairseq) update +git submodule init +git submodule update + +# 2. Conda 기본 환경 생성 +conda env create -f environment.yml +conda activate unit2a + +# 3. Pip 다운그레이드 (메타데이터 에러 방지) +pip install "pip<24.1" + +# 4. PyTorch 설치 (CUDA 11.7 기준) +pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 + +# 5. 나머지 라이브러리 설치 +pip install -r requirements.txt + +# 6. Fairseq 설치 +cd av2av-main/fairseq +pip install -e . +``` \ No newline at end of file diff --git a/av2unit/avhubert/__init__.py b/av2unit/avhubert/__init__.py new file mode 100644 index 0000000..6cb0629 --- /dev/null +++ b/av2unit/avhubert/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .hubert import * # noqa +# from .hubert_asr import * # noqa +from .hubert_dataset import * +from .hubert_pretraining import * +# from .hubert_criterion import * diff --git a/av2unit/avhubert/hubert.py b/av2unit/avhubert/hubert.py new file mode 100644 index 0000000..30830ea --- /dev/null +++ b/av2unit/avhubert/hubert.py @@ -0,0 +1,797 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import os,sys +import logging +from typing import Dict, List, Optional, Tuple + +import numpy as np + +import torch +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2 import ( + LAYER_TYPE_CHOICES, + ConvFeatureExtractionModel, + TransformerEncoder, +) +from fairseq.modules import GradMultiply, LayerNorm +from copy import deepcopy + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + from hubert_pretraining import ( + AVHubertPretrainingConfig, + AVHubertPretrainingTask, + ) + from resnet import ResEncoder + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, + ) + from utils import compute_mask_indices + # from decoder import TransformerDecoder + +else: + from .hubert_pretraining import ( + AVHubertPretrainingConfig, + AVHubertPretrainingTask, + ) + from .resnet import ResEncoder + from .utils import compute_mask_indices + # from .decoder import TransformerDecoder + +from omegaconf import II + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum( + ["static", "uniform", "normal", "poisson"] +) + + +@dataclass +class AVHubertConfig(FairseqDataclass): + label_rate: int = II("task.label_rate") + input_modality: str = II("task.input_modality") + extractor_mode: EXTRACTOR_MODE_CHOICES = field( + default="default", + metadata={ + "help": "mode for feature extractor. default has a single group " + "norm with d groups in the first conv block, whereas layer_norm " + "has layer norms in every block (meant to use with normalize=True)" + }, + ) + encoder_layers: int = field( + default=12, metadata={"help": "num encoder layers in the transformer"} + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + encoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "encoder embedding dimension for FFN"} + ) + encoder_attention_heads: int = field( + default=12, metadata={"help": "num encoder attention heads"} + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + + # dropouts + dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for the transformer"}, + ) + attention_dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for attention weights"}, + ) + activation_dropout: float = field( + default=0.0, + metadata={"help": "dropout probability after activation in FFN"}, + ) + encoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + dropout_features: float = field( + default=0.0, + metadata={ + "help": "dropout to apply to the features (after feat extr)" + }, + ) + + final_dim: int = field( + default=0, + metadata={ + "help": "project final representations and targets to this many " + "dimensions. set to encoder_embed_dim is <= 0" + }, + ) + untie_final_proj: bool = field( + default=False, + metadata={"help": "use separate projection for each target"}, + ) + layer_norm_first: bool = field( + default=False, + metadata={"help": "apply layernorm first in the transformer"}, + ) + conv_feature_layers: str = field( + default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2", + metadata={ + "help": "string describing convolutional feature extraction " + "layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_bias: bool = field( + default=False, metadata={"help": "include bias in conv encoder"} + ) + logit_temp: float = field( + default=0.1, metadata={"help": "temperature to divide logits by"} + ) + target_glu: bool = field( + default=False, metadata={"help": "adds projection + glu to targets"} + ) + feature_grad_mult: float = field( + default=1.0, + metadata={"help": "multiply feature extractor var grads by this"}, + ) + + # masking + mask_length_audio: int = field(default=10, metadata={"help": "mask length"}) + mask_prob_audio: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_length_image: int = field(default=10, metadata={"help": "mask length"}) + mask_prob_image: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose mask length"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: int = field( + default=1, + metadata={ + "help": "min space between spans (if no overlap is enabled)" + }, + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + mask_channel_min_space: int = field( + default=1, + metadata={ + "help": "min space between spans (if no overlap is enabled)" + }, + ) + + # positional embeddings + conv_pos: int = field( + default=128, + metadata={ + "help": "number of filters for convolutional positional embeddings" + }, + ) + conv_pos_groups: int = field( + default=16, + metadata={ + "help": "number of groups for convolutional positional embedding" + }, + ) + + latent_temp: Tuple[float, float, float] = field( + default=(2, 0.5, 0.999995), + metadata={"help": "legacy (to be removed)"}, + ) + + # loss computation + skip_masked: bool = field( + default=False, + metadata={"help": "skip computing losses over masked frames"}, + ) + skip_nomask: bool = field( + default=False, + metadata={"help": "skip computing losses over unmasked frames"}, + ) + resnet_relu_type: str = field(default='prelu', metadata={"help": 'relu type for resnet'}) + resnet_weights: Optional[str] = field(default=None, metadata={"help": 'resnet weights'}) + sim_type: str = field(default='cosine', metadata={"help": 'similarity type'}) + + sub_encoder_layers: int = field(default=0, metadata={'help': 'number of transformer layers for single modality'}) + audio_feat_dim: int = field(default=-1, metadata={'help': 'audio feature dimension'}) + modality_dropout: float = field(default=0, metadata={'help': 'drop one modality'}) + audio_dropout: float = field(default=0, metadata={'help': 'drop audio feature'}) + modality_fuse: str = field(default='concat', metadata={'help': 'fusing two modalities: add,concat'}) + selection_type : str = field(default='same_other_seq', metadata={'help': 'type of selectig images, same_other_seq: replace masked span with span from another sequence, same_seq: repace masked span with span of the same sequence'}) + masking_type : str = field(default='input', metadata={'help': 'input or feature masking'}) + + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field( + default=6, metadata={"help": "num of decoder layers"} + ) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings " + "(outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.1, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.1, + metadata={ + "help": "dropout probability for attention weights " + "inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " + "inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + no_scale_embedding: bool = field(default=True, metadata={'help': 'scale embedding'}) + + layer_type: LAYER_TYPE_CHOICES = field( + default="transformer", metadata={"help": "layer type in encoder"} + ) + + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=1, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + +class SubModel(nn.Module): + def __init__(self, resnet=None, input_dim=None, cfg=None): + super().__init__() + self.resnet = resnet + self.proj = nn.Linear(input_dim, cfg.encoder_embed_dim) + self.encoder = TransformerEncoder(cfg) if cfg.encoder_layers > 0 else None + + def forward(self, x): + if self.resnet is not None: + x = self.resnet(x) + x = self.proj(x.transpose(1, 2)) + if self.encoder is not None: + x = self.encoder(x)[0].transpose(1, 2) + else: + x = x.transpose(1, 2) + return x + +@register_model("av_hubert", dataclass=AVHubertConfig) +class AVHubertModel(BaseFairseqModel): + def __init__( + self, + cfg: AVHubertConfig, + task_cfg: AVHubertPretrainingConfig, + dictionaries: List[Dictionary], + **kwargs + ) -> None: + super().__init__() + logger.info(f"HubertModel Config: {cfg}") + + feature_ds_rate = 1 + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + sub_cfg = deepcopy(cfg) + sub_cfg.encoder_layers = sub_cfg.sub_encoder_layers + resnet = ResEncoder(relu_type=cfg.resnet_relu_type, weights=cfg.resnet_weights) + self.feature_extractor_audio = SubModel(resnet=None, input_dim=cfg.audio_feat_dim, cfg=sub_cfg) + self.feature_extractor_video = SubModel(resnet=resnet, input_dim=resnet.backend_out, cfg=sub_cfg) + self.modality_dropout, self.audio_dropout = cfg.modality_dropout, cfg.audio_dropout + self.modality_fuse = cfg.modality_fuse + self.encoder_embed_dim = cfg.encoder_embed_dim + if self.modality_fuse == 'concat': + self.embed = cfg.encoder_embed_dim * 2 + elif self.modality_fuse == 'add': + self.embed = cfg.encoder_embed_dim + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob_image, self.mask_prob_audio = cfg.mask_prob_image, cfg.mask_prob_audio + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length_image, self.mask_length_audio = cfg.mask_length_image, cfg.mask_length_audio + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + self.sim_type = cfg.sim_type + self.selection_type = cfg.selection_type + self.masking_type = cfg.masking_type + + final_dim = ( + cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + ) + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.audio_feat_dim).uniform_() if self.masking_type == 'input' else torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.untie_final_proj = cfg.untie_final_proj + if self.untie_final_proj: + self.final_proj = nn.Linear( + cfg.encoder_embed_dim, final_dim * len(dictionaries) + ) + else: + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + + # modules below are not needed during fine-tuning + if any([d is None for d in dictionaries]): + logger.info( + "cannot find dictionary. assume will be used for fine-tuning" + ) + else: + self.num_classes = [len(d) for d in dictionaries] + self.label_embs_concat = nn.Parameter( + torch.FloatTensor(sum(self.num_classes), final_dim) + ) + nn.init.uniform_(self.label_embs_concat) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: AVHubertConfig, task: AVHubertPretrainingTask): + """Build a new model instance.""" + + kwargs = {} + model = AVHubertModel(cfg, task.cfg, task.dictionaries, **kwargs) + return model + + def apply_input_mask(self, x, padding_mask, target_list): + B, C, T = x.shape[:3] + is_audio = True if len(x.shape) == 3 else False + if is_audio: + mask_prob, mask_length = self.mask_prob_audio, self.mask_length_audio + else: + mask_prob, mask_length = self.mask_prob_image, self.mask_length_image + if mask_prob > 0: + + mask_indices, starts, ends, batch_indexes = compute_mask_indices( + (B, T), + padding_mask, + mask_prob, + mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices_np = mask_indices + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x = x.transpose(1, 2).contiguous() # [B, T, C, H, W] + if B == 1: + x[mask_indices] = 0 + elif is_audio: + x[mask_indices] = self.mask_emb + elif self.selection_type == 'same_other_seq': + perm = (torch.arange(B) + torch.randint(low=1, high=B, size=(1,))) % B + x_perm = x[perm] + x[mask_indices] = x_perm[mask_indices] + elif self.selection_type == 'same_seq': + batch_indexes_, other_indexes = [], [] + for batch_index, start, end in zip(batch_indexes, starts, ends): + length = end-start + other_start = np.setdiff1d(np.arange(T), np.arange(max(0, start-length), end)) + if len(other_start) > 0: + other_start = np.random.choice(other_start, size=1) + else: + other_start = 0 + other_end = other_start + length + other_indexes.append(np.arange(other_start, other_end).clip(max=T-1)) + batch_indexes_.append(np.zeros([length], dtype=np.int64)+batch_index) + batch_indexes, other_indexes = np.concatenate(batch_indexes_), np.concatenate(other_indexes) + x[mask_indices] = x[batch_indexes, other_indexes] + + x = x.transpose(1, 2).contiguous() + else: + mask_indices = None + + if self.mask_channel_prob > 0: + logger.info(f"No mask channel prob for input masking") + return x, mask_indices + + def apply_feature_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + assert self.mask_prob_audio == self.mask_prob_image and self.mask_length_audio == self.mask_length_image, f"masking prob/length for image/audio be same for feature masking" + mask_prob, mask_length = self.mask_prob_audio, self.mask_length_image + if mask_prob > 0: + mask_indices, _, _, _ = compute_mask_indices( + (B, T), + padding_mask, + mask_prob, + mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices, _, _, _ = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor, modality: str) -> torch.Tensor: + extractor = eval(f"self.feature_extractor_{modality}") + if self.feature_grad_mult > 0: + features = extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = extractor(source) + return features + + def forward_targets( + self, features: torch.Tensor, mask_indices: torch.Tensor, target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + if mask_indices is not None: + mask_indices = mask_indices[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_list = [t[:, target_inds.long()] for t in target_list] + return features, mask_indices, target_list + + def forward_padding_mask( + self, features: torch.Tensor, padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view( + padding_mask.size(0), features.size(1), -1 + ) + padding_mask = padding_mask.all(-1) + return padding_mask + + def compute_logits(self, feats, emb_mat): + # feats: [B, T, F], emb_mat: [V, F] + if self.sim_type == 'dot': + logits = torch.matmul(feats, emb_mat.transpose(0, 1)) + elif self.sim_type == 'cosine': + batch_size, timesteps, emb_dim = feats.size() + feats_ = feats.view(-1, emb_dim) + nom = (feats_.unsqueeze(dim=1) * emb_mat.unsqueeze(dim=0)).sum(dim=-1) # [B*T, V] + denom = (feats_**2).sum(dim=-1).sqrt().unsqueeze(dim=1) * (emb_mat**2).sum(dim=-1).sqrt().unsqueeze(dim=0) # [B*T, V] + logits = (nom/denom.clamp(min=1e-6)).view(batch_size, timesteps, -1) + else: + raise NotImplementedError + logits = logits / self.logit_temp + return logits + + def forward( + self, + source: torch.Tensor, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + src_audio, src_video = source['audio'], source['video'] + if mask and self.masking_type == 'input': + src_video, mask_indices_video = self.apply_input_mask(src_video, padding_mask, target_list) + src_audio, mask_indices_audio = self.apply_input_mask(src_audio, padding_mask, target_list) + mask_indices = torch.logical_or(mask_indices_audio, mask_indices_video) + else: + src_audio, src_video, mask_indices = src_audio, src_video, None + + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + features_video = self.forward_features(src_video, modality='video') + modality_drop_prob, audio_drop_prob = np.random.random(), np.random.random() + if self.training: + if modality_drop_prob < self.modality_dropout: + if audio_drop_prob < self.audio_dropout: + features_audio = 0 * features_audio + else: + features_video = 0 * features_video + if self.modality_fuse == 'concat': + features = torch.cat([features_audio, features_video], dim=1) + elif self.modality_fuse == 'add': + features = features_audio + features_video + if target_list is not None: + features, mask_indices, target_list = self.forward_targets(features, mask_indices, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + if self.masking_type == 'feature' and mask: + x, mask_indices = self.apply_feature_mask(features, padding_mask, target_list) + else: + x = features + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1 + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) + proj_x = self.final_proj(x) + if self.untie_final_proj: + proj_x_list = proj_x.chunk(len(self.num_classes), dim=-1) + else: + proj_x_list = [proj_x for _ in self.num_classes] + logit_list = [self.compute_logits(proj, emb).view(-1, num_class) for proj, emb, num_class in zip(proj_x_list, label_embs_list, self.num_classes)] # [[B*T, V]] + mask, unmask = torch.logical_and(mask_indices, ~padding_mask).view(-1), torch.logical_and(~mask_indices, ~padding_mask).view(-1) # [B*T] + logit_m_list, logit_u_list = [logit[mask] for logit in logit_list], [logit[unmask] for logit in logit_list] + target_m_list, target_u_list = [target.view(-1)[mask].long() for target in target_list], [target.view(-1)[unmask].long() for target in target_list] + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "target_m_list": target_m_list, + "target_u_list": target_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + feature = res["features"] if ret_conv else res["x"] + return feature, res["padding_mask"] + + def extract_finetune(self, source, padding_mask=None, mask=False, ret_conv=False, output_layer=None): + src_audio, src_video = source['audio'], source['video'] + if mask and self.masking_type == 'input': + src_video, mask_indices_video = self.apply_input_mask(src_video, padding_mask, target_list=None) + src_audio, mask_indices_audio = self.apply_input_mask(src_audio, padding_mask, target_list=None) + mask_indices = torch.logical_or(mask_indices_audio, mask_indices_video) # mask_indices not used in fine-tuning + else: + src_audio, src_video, mask_indices = src_audio, src_video, None + + if src_audio is not None and src_video is None: + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + features_video = features_audio.new_zeros(features_audio.size(0), self.encoder_embed_dim, features_audio.size(-1)) + elif src_audio is None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = features_video.new_zeros(features_video.size(0), self.encoder_embed_dim, features_video.size(-1)) + elif src_audio is not None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + + if self.modality_fuse == 'concat': + features = torch.cat([features_audio, features_video], dim=1) + elif self.modality_fuse == 'add': + features = features_audio + features_video + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1 + ) + + return x, padding_mask + + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + return extra_losses, names + + def remove_pretraining_modules(self): + self.target_glu = None + self.final_proj = None + + def get_logits(self, net_output, is_masked=True): + raise NotImplementedError + + def get_targets(self, net_output, is_masked=True): + raise NotImplementedError + + def compute_nce(self, x, pos, negs): + neg_is_pos = (pos == negs).all(-1) + pos = pos.unsqueeze(0) + targets = torch.cat([pos, negs], dim=0) + + logits = torch.cosine_similarity( + x.float(), targets.float(), dim=-1 + ).type_as(x) + logits /= self.logit_temp + if neg_is_pos.any(): + logits[1:][neg_is_pos] = float("-inf") + logits = logits.transpose(0, 1) # (num_x, num_cls+1) + return logits diff --git a/av2unit/avhubert/hubert_dataset.py b/av2unit/avhubert/hubert_dataset.py new file mode 100644 index 0000000..e80895f --- /dev/null +++ b/av2unit/avhubert/hubert_dataset.py @@ -0,0 +1,529 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import logging +import os +import sys +import time +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset +from python_speech_features import logfbank +from scipy.io import wavfile + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + import utils as custom_utils + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "DEBUG").upper(), + stream=sys.stdout, + ) +else: + from . import utils as custom_utils + +logger = logging.getLogger(__name__) + + +def load_audio_visual(manifest_path, max_keep, min_keep, frame_rate, label_paths, label_rates, tol=0.1): + def is_audio_label_aligned(audio_dur, label_durs): + return all([abs(audio_dur - label_dur) max_keep: + n_long += 1 + elif (not is_seq_label) and (not is_audio_label_aligned(sz/frame_rate, dur_from_label_list[ind])): + n_unaligned += 1 + else: + video_path = items[1] + audio_path = items[2] + audio_id = items[0] + names.append((video_path, audio_path+':'+audio_id)) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long and {n_unaligned} unaligned, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class AVHubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + stack_order_audio: int=1, + skip_verify: bool=False, + image_mean: float=0, + image_std: float=1, + image_crop_size: int=88, + image_aug: bool=False, + modalities: Optional[List[str]]=None, + is_s2s=False, + noise_fn=None, + noise_prob=0, + noise_snr=0, + noise_num=1 + ): + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.modalities = set(modalities) + self.audio_root, self.names, inds, tot, self.sizes = load_audio_visual(manifest_path, max_keep_sample_size, min_keep_sample_size, frame_rate=sample_rate, label_paths=label_paths, label_rates=self.label_rates) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + self.shuffle = shuffle + self.random_crop = random_crop + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.store_labels = store_labels + self.is_s2s = is_s2s + self.noise_wav, self.noise_prob, self.noise_snr, self.noise_num = [ln.strip() for ln in open(noise_fn).readlines()] if noise_fn is not None else [], noise_prob, noise_snr, noise_num + + assert self.single_target == (self.label_rates[0] == -1), f"single target should be equivalent to sequence label (label_rate==-1)" + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert ( + label_processors is None + or len(label_processors) == self.num_labels + ) + if not skip_verify: + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths(self.sizes, self.sample_rate, label_path, label_rate, inds, tot) + else: + logger.info(f"Skip label alignment verifying") + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + if image_aug: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.RandomCrop((image_crop_size, image_crop_size)), + custom_utils.HorizontalFlip(0.5), + custom_utils.Normalize(image_mean, image_std) ]) + else: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.CenterCrop((image_crop_size, image_crop_size)), + custom_utils.Normalize(image_mean, image_std) ]) + logger.info(f"image transform: {self.transform}") + + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, " + f"normalize={normalize}, max_sample_size={self.max_sample_size}, " + f"seqs2seq data={self.is_s2s},") + logger.info( + f"Noise wav: {noise_fn}->{len(self.noise_wav)} wav, Prob: {self.noise_prob}, SNR: {self.noise_snr}, Number of mixture: {self.noise_num}" + ) + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def load_feature(self, mix_name): + """ + Load image and audio feature + Returns: + video_feats: numpy.ndarray of shape [T, H, W, 1], audio_feats: numpy.ndarray of shape [T, F] + """ + def stacker(feats, stack_order): + """ + Concatenating consecutive audio frames + Args: + feats - numpy.ndarray of shape [T, F] + stack_order - int (number of neighboring frames to concatenate + Returns: + feats - numpy.ndarray of shape [T', F'] + """ + feat_dim = feats.shape[1] + if len(feats) % stack_order != 0: + res = stack_order - len(feats) % stack_order + res = np.zeros([res, feat_dim]).astype(feats.dtype) + feats = np.concatenate([feats, res], axis=0) + feats = feats.reshape((-1, stack_order, feat_dim)).reshape(-1, stack_order*feat_dim) + return feats + video_fn, audio_fn = mix_name + if 'video' in self.modalities: + video_feats = self.load_video(video_fn) # [T, H, W, 1] + else: + video_feats = None + if 'audio' in self.modalities: + audio_fn = audio_fn.split(':')[0] + sample_rate, wav_data = wavfile.read(audio_fn) + assert sample_rate == 16_000 and len(wav_data.shape) == 1 + if np.random.rand() < self.noise_prob: + wav_data = self.add_noise(wav_data) + audio_feats = logfbank(wav_data, samplerate=sample_rate).astype(np.float32) # [T, F] + audio_feats = stacker(audio_feats, self.stack_order_audio) # [T/stack_order_audio, F*stack_order_audio] + else: + audio_feats = None + if audio_feats is not None and video_feats is not None: + diff = len(audio_feats) - len(video_feats) + if diff < 0: + audio_feats = np.concatenate([audio_feats, np.zeros([-diff, audio_feats.shape[-1]], dtype=audio_feats.dtype)]) + elif diff > 0: + audio_feats = audio_feats[:-diff] + return video_feats, audio_feats + + def load_video(self, audio_name): + feats = custom_utils.load_video(os.path.join(self.audio_root, audio_name)) + feats = self.transform(feats) + feats = np.expand_dims(feats, axis=-1) + return feats + + def select_noise(self): + rand_indexes = np.random.randint(0, len(self.noise_wav), size=self.noise_num) + noise_wav = [] + for x in rand_indexes: + noise_wav.append(wavfile.read(self.noise_wav[x])[1].astype(np.float32)) + if self.noise_num == 1: + return noise_wav[0] + else: + min_len = min([len(x) for x in noise_wav]) + noise_wav = [x[:min_len] for x in noise_wav] + noise_wav = np.floor(np.stack(noise_wav).mean(axis=0)) + return noise_wav + + def add_noise(self, clean_wav): + clean_wav = clean_wav.astype(np.float32) + noise_wav = self.select_noise() + if type(self.noise_snr) == int or type(self.noise_snr) == float: + snr = self.noise_snr + elif type(self.noise_snr) == tuple: + snr = np.random.randint(self.noise_snr[0], self.noise_snr[1]+1) + clean_rms = np.sqrt(np.mean(np.square(clean_wav), axis=-1)) + if len(clean_wav) > len(noise_wav): + ratio = int(np.ceil(len(clean_wav)/len(noise_wav))) + noise_wav = np.concatenate([noise_wav for _ in range(ratio)]) + if len(clean_wav) < len(noise_wav): + start = 0 + noise_wav = noise_wav[start: start + len(clean_wav)] + noise_rms = np.sqrt(np.mean(np.square(noise_wav), axis=-1)) + adjusted_noise_rms = clean_rms / (10**(snr/20)) + adjusted_noise_wav = noise_wav * (adjusted_noise_rms / noise_rms) + mixed = clean_wav + adjusted_noise_wav + + #Avoid clipping noise + max_int16 = np.iinfo(np.int16).max + min_int16 = np.iinfo(np.int16).min + if mixed.max(axis=0) > max_int16 or mixed.min(axis=0) < min_int16: + if mixed.max(axis=0) >= abs(mixed.min(axis=0)): + reduction_rate = max_int16 / mixed.max(axis=0) + else : + reduction_rate = min_int16 / mixed.min(axis=0) + mixed = mixed * (reduction_rate) + mixed = mixed.astype(np.int16) + return mixed + + def __getitem__(self, index): + video_feats, audio_feats = self.load_feature(self.names[index]) + audio_feats, video_feats = torch.from_numpy(audio_feats.astype(np.float32)) if audio_feats is not None else None, torch.from_numpy(video_feats.astype(np.float32)) if video_feats is not None else None + if self.normalize and 'audio' in self.modalities: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + labels = self.get_labels(index) + fid = self.names[index][1].split(':')[1] + return {"id": index, 'fid': fid, "video_source": video_feats, 'audio_source': audio_feats, "label_list": labels} + + def __len__(self): + return len(self.sizes) + + def crop_to_max_size(self, wav, target_size, start=None): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + # longer utterances + if start is None: + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + else: + end = start + target_size + return wav[start:end], start + + def collater(self, samples): + samples = [s for s in samples if s["id"] is not None] + if len(samples) == 0: + return {} + + audio_source, video_source = [s["audio_source"] for s in samples], [s["video_source"] for s in samples] + if audio_source[0] is None: + audio_source = None + if video_source[0] is None: + video_source = None + if audio_source is not None: + audio_sizes = [len(s) for s in audio_source] + else: + audio_sizes = [len(s) for s in video_source] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + if audio_source is not None: + collated_audios, padding_mask, audio_starts = self.collater_audio(audio_source, audio_size) + else: + collated_audios, audio_starts = None, None + if video_source is not None: + collated_videos, padding_mask, audio_starts = self.collater_audio(video_source, audio_size, audio_starts) + else: + collated_videos = None + targets_by_label = [ + [s["label_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + source = {"audio": collated_audios, "video": collated_videos} + net_input = {"source": source, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "utt_id": [s['fid'] for s in samples] + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + if self.is_s2s: + batch['target'], net_input['prev_output_tokens'] = targets_list[0][0], targets_list[0][1] + else: + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size, audio_starts=None): + audio_feat_shape = list(audios[0].shape[1:]) + collated_audios = audios[0].new_zeros([len(audios), audio_size]+audio_feat_shape) + padding_mask = ( + torch.BoolTensor(len(audios), audio_size).fill_(False) # + ) + start_known = audio_starts is not None + audio_starts = [0 for _ in audios] if not start_known else audio_starts + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat( + [audio, audio.new_full([-diff]+audio_feat_shape, 0.0)] + ) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size, audio_starts[i] if start_known else None + ) + if len(audios[0].shape) == 2: + collated_audios = collated_audios.transpose(1, 2) # [B, T, F] -> [B, F, T] + else: + collated_audios = collated_audios.permute((0, 4, 1, 2, 3)).contiguous() # [B, T, H, W, C] -> [B, C, T, H, W] + return collated_audios, padding_mask, audio_starts + + def collater_frm_label( + self, targets, audio_size, audio_starts, label_rate, pad + ): + assert label_rate > 0 + s2f = label_rate / self.sample_rate # num label per sample + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s: s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_seq_label_s2s(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + pad, eos = self.label_processors[0].dictionary.pad(), self.label_processors[0].dictionary.eos() + targets_ = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False) + prev_output_tokens = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False, move_eos_to_beginning=True) + return (targets_, prev_output_tokens), lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + if self.is_s2s: + targets, lengths, ntokens = self.collater_seq_label_s2s(targets, pad) + else: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] diff --git a/av2unit/avhubert/hubert_pretraining.py b/av2unit/avhubert/hubert_pretraining.py new file mode 100644 index 0000000..3c3b42e --- /dev/null +++ b/av2unit/avhubert/hubert_pretraining.py @@ -0,0 +1,400 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os, glob +import sys +from typing import Dict, List, Optional, Tuple + +import numpy as np + +from dataclasses import dataclass, field +from fairseq import metrics, search +from fairseq.data import Dictionary, encoders +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from omegaconf import MISSING, II +import numpy as np +from argparse import Namespace + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + from hubert_dataset import AVHubertDataset + # from sequence_generator import SequenceGenerator +else: + from .hubert_dataset import AVHubertDataset + # from .sequence_generator import SequenceGenerator + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + +class LabelEncoderS2SToken(object): + def __init__(self, dictionary: Dictionary, bpe_tokenizer) -> None: + self.bpe_tokenizer = bpe_tokenizer + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + label = self.bpe_tokenizer.encode(label.lower()) + return self.dictionary.encode_line( + label, append_eos=True, add_if_not_exist=False, + ).long() + + def decode(self, tok, symbols_ignore=None): + tok = self.dictionary.string(tok, extra_symbols_to_ignore=symbols_ignore) + if self.bpe_tokenizer: + tok = self.bpe_tokenizer.decode(tok) + return tok + +@dataclass +class AVHubertPretrainingConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to data directory"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: int = field( + default=-1, + metadata={"help": "label frame rate. -1 for sequence label"}, + ) + + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={ + "help": "if set, normalizes input to have 0 mean and unit variance" + }, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to keep in training"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to keep in training"}, + ) + max_trim_sample_size: Optional[int] = field( + default=II("task.max_sample_size"), + metadata={"help": "max sample size to trim to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + pdb: Optional[bool] = field( + default=False, + metadata={"help": "pdb"}, + ) + stack_order_audio: int = field( + default=1, + metadata={"help": "concatenate n consecutive audio frames for one step"}, + ) + skip_verify: Optional[bool] = field( + default=False, + metadata={"help": "skip verifying label-audio alignment"}, + ) + image_aug: bool = field(default=False, metadata={'help': 'image data augmentation'}) + image_crop_size: int = field( + default=88, metadata={"help": "image ROI size"}) + image_mean: float = field( + default=0.421, metadata={"help": "image mean"}) + image_std: float = field( + default=0.165, metadata={"help": "image std"}) + modalities: Optional[List[str]] = field(default_factory=lambda: ["audio", "video"], metadata={'help': 'modalities to load'}) + is_s2s: bool=field(default=False, metadata={'help': 'seq2seq fine-tuning only'}) + tokenizer_bpe_name: Optional[str] = field(default=None, metadata={'help': 'tokenizer model name'}) + tokenizer_bpe_model: Optional[str] = field(default=None, metadata={'help': 'tokenizer model path'}) + noise_wav: Optional[str] = field(default=None, metadata={'help': 'manifest of noise wav files (one wav file path per line)'}) + noise_prob: float = field(default=0, metadata={'help': 'noise probability'}) + noise_snr: Optional[str] = field(default='0', metadata={'help': 'noise SNR in audio'}) + noise_num: int = field(default=1, metadata={'help': 'number of noise wav files to mix'}) + fine_tuning: bool = field(default=False, metadata={"help": "set to true if fine-tuning AV-Hubert"}) + +@register_task("av_hubert_pretraining", dataclass=AVHubertPretrainingConfig) +class AVHubertPretrainingTask(FairseqTask): + + cfg: AVHubertPretrainingConfig + + def __init__( + self, + cfg: AVHubertPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"AVHubertPretrainingTask Config {cfg}") + + self.fine_tuning = cfg.fine_tuning + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + if cfg.is_s2s: + self.state.add_factory("s2s_tokenizer", self.load_tokenizer) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + self.blank_symbol = "" + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None # self._source_dictionary + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary # self._target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [ + Dictionary.load(f"{label_dir}/dict.{label}.txt") + for label in self.cfg.labels + ] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_tokenizer(self): + bpe_args = Namespace(**{'bpe': self.cfg.tokenizer_bpe_name, f"{self.cfg.tokenizer_bpe_name}_model": self.cfg.tokenizer_bpe_model}) + bpe_tokenizer = encoders.build_bpe(bpe_args) + return bpe_tokenizer + + @property + def s2s_tokenizer(self): + return self.state.s2s_tokenizer + + @classmethod + def setup_task( + cls, cfg: AVHubertPretrainingConfig, **kwargs + ) -> "AVHubertPretrainingTask": + if cfg.pdb: + import pdb + pdb.set_trace() + return cls(cfg) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_dataset(self, split: str, **kwargs) -> None: + manifest = f"{self.cfg.data}/{split}.tsv" + dictionaries = [self.target_dictionary] if self.fine_tuning else self.dictionaries + pad_list = [dictionary.pad() for dictionary in dictionaries] + eos_list = [dictionary.eos() for dictionary in dictionaries] + if not self.cfg.is_s2s: + procs = [LabelEncoder(dictionary) for dictionary in dictionaries] + else: + logger.info(f"Using tokenizer") + bpe_tokenizer = self.s2s_tokenizer + procs = [LabelEncoderS2SToken(dictionary, bpe_tokenizer) for dictionary in dictionaries] + paths = [ + f"{self.get_label_dir()}/{split}.{l}" for l in self.cfg.labels + ] + image_aug = self.cfg.image_aug if split == 'train' else False + noise_fn, noise_snr = f"{self.cfg.noise_wav}/{split}.tsv" if self.cfg.noise_wav is not None else None, eval(self.cfg.noise_snr) + noise_num = self.cfg.noise_num # + self.datasets[split] = AVHubertDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + image_mean=self.cfg.image_mean, + image_std=self.cfg.image_std, + image_crop_size=self.cfg.image_crop_size, + image_aug=image_aug, + modalities=self.cfg.modalities, + is_s2s=self.cfg.is_s2s, + noise_fn=noise_fn, + noise_prob=self.cfg.noise_prob, + noise_snr=noise_snr, + noise_num=noise_num + ) + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def build_generator( + self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None, prefix_allowed_tokens_fn=None, + ): + """ + Build a :class:`~fairseq.SequenceGenerator` instance for this + task. + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + args (fairseq.dataclass.configs.GenerationConfig): + configuration object (dataclass) for generation + extra_gen_cls_kwargs (Dict[str, Any]): extra options to pass + through to SequenceGenerator + prefix_allowed_tokens_fn (Callable[[int, torch.Tensor], List[int]]): + If provided, this function constrains the beam search to + allowed tokens only at each step. The provided function + should take 2 arguments: the batch ID (`batch_id: int`) + and a unidimensional tensor of token ids (`inputs_ids: + torch.Tensor`). It has to return a `List[int]` with the + allowed tokens for the next generation step conditioned + on the previously generated tokens (`inputs_ids`) and + the batch ID (`batch_id`). This argument is useful for + constrained generation conditioned on the prefix, as + described in "Autoregressive Entity Retrieval" + (https://arxiv.org/abs/2010.00904) and + https://github.com/facebookresearch/GENRE. + """ + if getattr(args, "score_reference", False): + from fairseq.sequence_scorer import SequenceScorer + + return SequenceScorer( + self.target_dictionary, + compute_alignment=getattr(args, "print_alignment", False), + ) + + # Choose search strategy. Defaults to Beam Search. + sampling = getattr(args, "sampling", False) + sampling_topk = getattr(args, "sampling_topk", -1) + sampling_topp = getattr(args, "sampling_topp", -1.0) + diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) + diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) + match_source_len = getattr(args, "match_source_len", False) + diversity_rate = getattr(args, "diversity_rate", -1) + constrained = getattr(args, "constraints", False) + if prefix_allowed_tokens_fn is None: + prefix_allowed_tokens_fn = getattr(args, "prefix_allowed_tokens_fn", None) + if ( + sum( + int(cond) + for cond in [ + sampling, + diverse_beam_groups > 0, + match_source_len, + diversity_rate > 0, + ] + ) + > 1 + ): + raise ValueError("Provided Search parameters are mutually exclusive.") + assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" + assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" + + if sampling: + search_strategy = search.Sampling( + self.target_dictionary, sampling_topk, sampling_topp + ) + elif diverse_beam_groups > 0: + search_strategy = search.DiverseBeamSearch( + self.target_dictionary, diverse_beam_groups, diverse_beam_strength + ) + elif match_source_len: + # this is useful for tagging applications where the output + # length should match the input length, so we hardcode the + # length constraints for simplicity + search_strategy = search.LengthConstrainedBeamSearch( + self.target_dictionary, + min_len_a=1, + min_len_b=0, + max_len_a=1, + max_len_b=0, + ) + elif diversity_rate > -1: + search_strategy = search.DiverseSiblingsSearch( + self.target_dictionary, diversity_rate + ) + elif constrained: + search_strategy = search.LexicallyConstrainedBeamSearch( + self.target_dictionary, args.constraints + ) + elif prefix_allowed_tokens_fn: + search_strategy = search.PrefixConstrainedBeamSearch( + self.target_dictionary, prefix_allowed_tokens_fn + ) + else: + search_strategy = search.BeamSearch(self.target_dictionary) + + extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} + if seq_gen_cls is None: + if getattr(args, "print_alignment", False): + seq_gen_cls = SequenceGeneratorWithAlignment + extra_gen_cls_kwargs["print_alignment"] = args.print_alignment + else: + seq_gen_cls = SequenceGenerator + + return seq_gen_cls( + models, + self.target_dictionary, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + search_strategy=search_strategy, + **extra_gen_cls_kwargs, + ) diff --git a/av2unit/avhubert/resnet.py b/av2unit/avhubert/resnet.py new file mode 100644 index 0000000..e584f2b --- /dev/null +++ b/av2unit/avhubert/resnet.py @@ -0,0 +1,169 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +import torch.nn as nn +import pdb + + +logger = logging.getLogger(__name__) + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def downsample_basic_block( inplanes, outplanes, stride ): + return nn.Sequential( + nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(outplanes), + ) + +def downsample_basic_block_v2( inplanes, outplanes, stride ): + return nn.Sequential( + nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False), + nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(outplanes), + ) + + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, relu_type = 'relu' ): + super(BasicBlock, self).__init__() + + assert relu_type in ['relu','prelu'] + + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + + if relu_type == 'relu': + self.relu1 = nn.ReLU(inplace=True) + self.relu2 = nn.ReLU(inplace=True) + elif relu_type == 'prelu': + self.relu1 = nn.PReLU(num_parameters=planes) + self.relu2 = nn.PReLU(num_parameters=planes) + else: + raise Exception('relu type not implemented') + + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu2(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, relu_type = 'relu', gamma_zero = False, avg_pool_downsample = False): + self.inplanes = 64 + self.relu_type = relu_type + self.gamma_zero = gamma_zero + self.downsample_block = downsample_basic_block_v2 if avg_pool_downsample else downsample_basic_block + + super(ResNet, self).__init__() + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d(1) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + if self.gamma_zero: + for m in self.modules(): + if isinstance(m, BasicBlock ): + m.bn2.weight.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + + + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = self.downsample_block( inplanes = self.inplanes, + outplanes = planes * block.expansion, + stride = stride ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, relu_type = self.relu_type)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, relu_type = self.relu_type)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + return x + +class ResEncoder(nn.Module): + def __init__(self, relu_type, weights): + super(ResEncoder, self).__init__() + self.frontend_nout = 64 + self.backend_out = 512 + frontend_relu = nn.PReLU(num_parameters=self.frontend_nout) if relu_type == 'prelu' else nn.ReLU() + self.frontend3D = nn.Sequential( + nn.Conv3d(1, self.frontend_nout, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False), + nn.BatchNorm3d(self.frontend_nout), + frontend_relu, + nn.MaxPool3d( kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1))) + self.trunk = ResNet(BasicBlock, [2, 2, 2, 2], relu_type=relu_type) + if weights is not None: + logger.info(f"Load {weights} for resnet") + std = torch.load(weights, map_location=torch.device('cpu'))['model_state_dict'] + frontend_std, trunk_std = OrderedDict(), OrderedDict() + for key, val in std.items(): + new_key = '.'.join(key.split('.')[1:]) + if 'frontend3D' in key: + frontend_std[new_key] = val + if 'trunk' in key: + trunk_std[new_key] = val + self.frontend3D.load_state_dict(frontend_std) + self.trunk.load_state_dict(trunk_std) + + def forward(self, x): + B, C, T, H, W = x.size() + x = self.frontend3D(x) + Tnew = x.shape[2] + x = self.threeD_to_2D_tensor(x) + x = self.trunk(x) + x = x.view(B, Tnew, x.size(1)) + x = x.transpose(1, 2).contiguous() + return x + + def threeD_to_2D_tensor(self, x): + n_batch, n_channels, s_time, sx, sy = x.shape + x = x.transpose(1, 2).contiguous() + return x.reshape(n_batch*s_time, n_channels, sx, sy) diff --git a/av2unit/avhubert/utils.py b/av2unit/avhubert/utils.py new file mode 100644 index 0000000..60d57fa --- /dev/null +++ b/av2unit/avhubert/utils.py @@ -0,0 +1,298 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import cv2 +import torch +import random +import numpy as np +from typing import Dict, List, Optional, Tuple + +def load_video(path): + for i in range(3): + try: + cap = cv2.VideoCapture(path) + frames = [] + while True: + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + frames.append(frame) + else: + break + frames = np.stack(frames) + return frames + except Exception: + print(f"failed loading {path} ({i} / 3)") + if i == 2: + raise ValueError(f"Unable to load {path}") + + +class Compose(object): + """Compose several preprocess together. + Args: + preprocess (list of ``Preprocess`` objects): list of preprocess to compose. + """ + + def __init__(self, preprocess): + self.preprocess = preprocess + + def __call__(self, sample): + for t in self.preprocess: + sample = t(sample) + return sample + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.preprocess: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class Normalize(object): + """Normalize a ndarray image with mean and standard deviation. + """ + + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, frames): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + Returns: + Tensor: Normalized Tensor image. + """ + frames = (frames - self.mean) / self.std + return frames + + def __repr__(self): + return self.__class__.__name__+'(mean={0}, std={1})'.format(self.mean, self.std) + +class CenterCrop(object): + """Crop the given image at the center + """ + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = int(round((w - tw))/2.) + delta_h = int(round((h - th))/2.) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + +class RandomCrop(object): + """Crop the given image at the center + """ + + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = random.randint(0, w-tw) + delta_h = random.randint(0, h-th) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + +class HorizontalFlip(object): + """Flip image horizontally. + """ + + def __init__(self, flip_ratio): + self.flip_ratio = flip_ratio + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be flipped with a probability flip_ratio + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + if random.random() < self.flip_ratio: + for index in range(t): + frames[index] = cv2.flip(frames[index], 1) + return frames + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray( + [ + mask_idc[j] + offset + for j in range(len(mask_idc)) + for offset in range(lengths[j]) + ] + ) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + batch_indexes, starts, ends = [], [], [] + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + mask[i, mask_idc] = True + vals, run_starts, run_lengths = find_runs(mask[i]) + start_indices, lengths = run_starts[vals == True], run_lengths[vals == True] + starts.append(start_indices) + ends.append(start_indices+lengths) + batch_indexes.append(np.zeros([len(start_indices)])+i) + return mask, np.concatenate(starts).astype(np.int64), np.concatenate(ends).astype(np.int64), np.concatenate(batch_indexes).astype(np.int64) + +def find_runs(x): + """Find runs of consecutive items in an array.""" + + # ensure array + x = np.asanyarray(x) + if x.ndim != 1: + raise ValueError('only 1D array supported') + n = x.shape[0] + + # handle empty array + if n == 0: + return np.array([]), np.array([]), np.array([]) + + else: + # find run starts + loc_run_start = np.empty(n, dtype=bool) + loc_run_start[0] = True + np.not_equal(x[:-1], x[1:], out=loc_run_start[1:]) + run_starts = np.nonzero(loc_run_start)[0] + + # find run values + run_values = x[loc_run_start] + + # find run lengths + run_lengths = np.diff(np.append(run_starts, n)) + + return run_values, run_starts, run_lengths diff --git a/av2unit/inference.py b/av2unit/inference.py new file mode 100644 index 0000000..823d184 --- /dev/null +++ b/av2unit/inference.py @@ -0,0 +1,79 @@ +import os +import argparse +import numpy as np +import torch +import torch.nn.functional as F + +from fairseq import checkpoint_utils, utils + +from util import process_units, save_unit, extract_audio_from_video +from av2unit.task import AVHubertUnitPretrainingTask + +def load_model(model_path, modalities, use_cuda=False): + models, cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path]) + + for model in models: + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + task.cfg.modalities = modalities.split(",") + task.load_dataset() + + return models[0], task + +def main(args): + use_cuda = torch.cuda.is_available() and not args.cpu + + model, task = load_model(args.av2unit_path, args.modalities, use_cuda=use_cuda) + + temp_audio_path = os.path.splitext(args.in_vid_path)[0]+".temp.wav" + lip_video_path = os.path.splitext(args.in_vid_path)[0]+".lip.mp4" + extract_audio_from_video(args.in_vid_path, temp_audio_path) + + video_feats, audio_feats = task.dataset.load_feature((lip_video_path, temp_audio_path)) + audio_feats, video_feats = torch.from_numpy(audio_feats.astype(np.float32)) if audio_feats is not None else None, torch.from_numpy(video_feats.astype(np.float32)) if video_feats is not None else None + if task.dataset.normalize and 'audio' in task.dataset.modalities: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + + collated_audios, _, _ = task.dataset.collater_audio([audio_feats], len(audio_feats)) + collated_videos, _, _ = task.dataset.collater_audio([video_feats], len(video_feats)) + + sample = {"source": { + "audio": collated_audios, "video": collated_videos, + }} + sample = utils.move_to_cuda(sample) if use_cuda else sample + + pred = task.inference( + model, + sample, + ) + pred_str = task.dictionaries[0].string(pred.int().cpu()) + + save_unit(pred_str, args.out_unit_path) + os.remove(temp_audio_path) + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-vid-path", type=str, required=True, help="File path of source video input" + ) + parser.add_argument( + "--out-unit-path", type=str, required=True, help="File path of target unit output" + ) + parser.add_argument( + "--av2unit-path", type=str, required=True, help="path to the mAV-HuBERT pre-trained model" + ) + parser.add_argument( + "--modalities", type=str, default="audio,video", help="input modalities", + choices=["audio,video","audio","video"], + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + + args = parser.parse_args() + + main(args) + +if __name__ == "__main__": + cli_main() diff --git a/av2unit/task.py b/av2unit/task.py new file mode 100644 index 0000000..330fb80 --- /dev/null +++ b/av2unit/task.py @@ -0,0 +1,68 @@ +import torch + +from av2unit.avhubert.hubert_pretraining import * +from av2unit.avhubert.hubert_dataset import * + +class AVHubertUnitDataset(AVHubertDataset): + def __init__( + self, + sample_rate: float, + normalize: bool = False, + stack_order_audio: int=1, + image_mean: float=0, + image_std: float=1, + image_crop_size: int=88, + image_aug: bool=False, + modalities: Optional[List[str]]=None, + noise_prob=0, + ): + self.audio_root = "" + + self.modalities = set(modalities) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + + self.noise_prob = noise_prob + + self.normalize = normalize + if image_aug: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.RandomCrop((image_crop_size, image_crop_size)), + custom_utils.HorizontalFlip(0.5), + custom_utils.Normalize(image_mean, image_std) ]) + else: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.CenterCrop((image_crop_size, image_crop_size)), + custom_utils.Normalize(image_mean, image_std) ]) + logger.info(f"image transform: {self.transform}") + +@register_task("av_hubert_unit_pretraining", dataclass=AVHubertPretrainingConfig) +class AVHubertUnitPretrainingTask(AVHubertPretrainingTask): + def load_dataset(self) -> None: + self.dataset = AVHubertUnitDataset( + sample_rate=self.cfg.sample_rate, + normalize=self.cfg.normalize, + stack_order_audio=self.cfg.stack_order_audio, + image_mean=self.cfg.image_mean, + image_std=self.cfg.image_std, + image_crop_size=self.cfg.image_crop_size, + modalities=self.cfg.modalities, + ) + def inference(self, model, sample): + x, padding_mask = model.extract_finetune(**sample) + + label_embs_list = model.label_embs_concat.split(model.num_classes, 0) + proj_x = model.final_proj(x) + if model.untie_final_proj: + proj_x_list = proj_x.chunk(len(model.num_classes), dim=-1) + else: + proj_x_list = [proj_x for _ in model.num_classes] + logit_list = [model.compute_logits(proj, emb).view(-1, num_class) for proj, emb, num_class in zip(proj_x_list, label_embs_list, model.num_classes)] # [[B*T, V]] + + pred_even = logit_list[0].argmax(dim=-1).cpu() + pred_odd = logit_list[1].argmax(dim=-1).cpu() + pred = torch.stack([pred_even, pred_odd]).transpose(0,1).reshape(-1) + + return pred diff --git a/dict.txt b/dict.txt new file mode 100644 index 0000000..dd3cccd --- /dev/null +++ b/dict.txt @@ -0,0 +1,1000 @@ +0 1 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +10 1 +11 1 +12 1 +13 1 +14 1 +15 1 +16 1 +17 1 +18 1 +19 1 +20 1 +21 1 +22 1 +23 1 +24 1 +25 1 +26 1 +27 1 +28 1 +29 1 +30 1 +31 1 +32 1 +33 1 +34 1 +35 1 +36 1 +37 1 +38 1 +39 1 +40 1 +41 1 +42 1 +43 1 +44 1 +45 1 +46 1 +47 1 +48 1 +49 1 +50 1 +51 1 +52 1 +53 1 +54 1 +55 1 +56 1 +57 1 +58 1 +59 1 +60 1 +61 1 +62 1 +63 1 +64 1 +65 1 +66 1 +67 1 +68 1 +69 1 +70 1 +71 1 +72 1 +73 1 +74 1 +75 1 +76 1 +77 1 +78 1 +79 1 +80 1 +81 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +88 1 +89 1 +90 1 +91 1 +92 1 +93 1 +94 1 +95 1 +96 1 +97 1 +98 1 +99 1 +100 1 +101 1 +102 1 +103 1 +104 1 +105 1 +106 1 +107 1 +108 1 +109 1 +110 1 +111 1 +112 1 +113 1 +114 1 +115 1 +116 1 +117 1 +118 1 +119 1 +120 1 +121 1 +122 1 +123 1 +124 1 +125 1 +126 1 +127 1 +128 1 +129 1 +130 1 +131 1 +132 1 +133 1 +134 1 +135 1 +136 1 +137 1 +138 1 +139 1 +140 1 +141 1 +142 1 +143 1 +144 1 +145 1 +146 1 +147 1 +148 1 +149 1 +150 1 +151 1 +152 1 +153 1 +154 1 +155 1 +156 1 +157 1 +158 1 +159 1 +160 1 +161 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +170 1 +171 1 +172 1 +173 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +180 1 +181 1 +182 1 +183 1 +184 1 +185 1 +186 1 +187 1 +188 1 +189 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +198 1 +199 1 +200 1 +201 1 +202 1 +203 1 +204 1 +205 1 +206 1 +207 1 +208 1 +209 1 +210 1 +211 1 +212 1 +213 1 +214 1 +215 1 +216 1 +217 1 +218 1 +219 1 +220 1 +221 1 +222 1 +223 1 +224 1 +225 1 +226 1 +227 1 +228 1 +229 1 +230 1 +231 1 +232 1 +233 1 +234 1 +235 1 +236 1 +237 1 +238 1 +239 1 +240 1 +241 1 +242 1 +243 1 +244 1 +245 1 +246 1 +247 1 +248 1 +249 1 +250 1 +251 1 +252 1 +253 1 +254 1 +255 1 +256 1 +257 1 +258 1 +259 1 +260 1 +261 1 +262 1 +263 1 +264 1 +265 1 +266 1 +267 1 +268 1 +269 1 +270 1 +271 1 +272 1 +273 1 +274 1 +275 1 +276 1 +277 1 +278 1 +279 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +290 1 +291 1 +292 1 +293 1 +294 1 +295 1 +296 1 +297 1 +298 1 +299 1 +300 1 +301 1 +302 1 +303 1 +304 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +312 1 +313 1 +314 1 +315 1 +316 1 +317 1 +318 1 +319 1 +320 1 +321 1 +322 1 +323 1 +324 1 +325 1 +326 1 +327 1 +328 1 +329 1 +330 1 +331 1 +332 1 +333 1 +334 1 +335 1 +336 1 +337 1 +338 1 +339 1 +340 1 +341 1 +342 1 +343 1 +344 1 +345 1 +346 1 +347 1 +348 1 +349 1 +350 1 +351 1 +352 1 +353 1 +354 1 +355 1 +356 1 +357 1 +358 1 +359 1 +360 1 +361 1 +362 1 +363 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +370 1 +371 1 +372 1 +373 1 +374 1 +375 1 +376 1 +377 1 +378 1 +379 1 +380 1 +381 1 +382 1 +383 1 +384 1 +385 1 +386 1 +387 1 +388 1 +389 1 +390 1 +391 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +398 1 +399 1 +400 1 +401 1 +402 1 +403 1 +404 1 +405 1 +406 1 +407 1 +408 1 +409 1 +410 1 +411 1 +412 1 +413 1 +414 1 +415 1 +416 1 +417 1 +418 1 +419 1 +420 1 +421 1 +422 1 +423 1 +424 1 +425 1 +426 1 +427 1 +428 1 +429 1 +430 1 +431 1 +432 1 +433 1 +434 1 +435 1 +436 1 +437 1 +438 1 +439 1 +440 1 +441 1 +442 1 +443 1 +444 1 +445 1 +446 1 +447 1 +448 1 +449 1 +450 1 +451 1 +452 1 +453 1 +454 1 +455 1 +456 1 +457 1 +458 1 +459 1 +460 1 +461 1 +462 1 +463 1 +464 1 +465 1 +466 1 +467 1 +468 1 +469 1 +470 1 +471 1 +472 1 +473 1 +474 1 +475 1 +476 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +486 1 +487 1 +488 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +499 1 +500 1 +501 1 +502 1 +503 1 +504 1 +505 1 +506 1 +507 1 +508 1 +509 1 +510 1 +511 1 +512 1 +513 1 +514 1 +515 1 +516 1 +517 1 +518 1 +519 1 +520 1 +521 1 +522 1 +523 1 +524 1 +525 1 +526 1 +527 1 +528 1 +529 1 +530 1 +531 1 +532 1 +533 1 +534 1 +535 1 +536 1 +537 1 +538 1 +539 1 +540 1 +541 1 +542 1 +543 1 +544 1 +545 1 +546 1 +547 1 +548 1 +549 1 +550 1 +551 1 +552 1 +553 1 +554 1 +555 1 +556 1 +557 1 +558 1 +559 1 +560 1 +561 1 +562 1 +563 1 +564 1 +565 1 +566 1 +567 1 +568 1 +569 1 +570 1 +571 1 +572 1 +573 1 +574 1 +575 1 +576 1 +577 1 +578 1 +579 1 +580 1 +581 1 +582 1 +583 1 +584 1 +585 1 +586 1 +587 1 +588 1 +589 1 +590 1 +591 1 +592 1 +593 1 +594 1 +595 1 +596 1 +597 1 +598 1 +599 1 +600 1 +601 1 +602 1 +603 1 +604 1 +605 1 +606 1 +607 1 +608 1 +609 1 +610 1 +611 1 +612 1 +613 1 +614 1 +615 1 +616 1 +617 1 +618 1 +619 1 +620 1 +621 1 +622 1 +623 1 +624 1 +625 1 +626 1 +627 1 +628 1 +629 1 +630 1 +631 1 +632 1 +633 1 +634 1 +635 1 +636 1 +637 1 +638 1 +639 1 +640 1 +641 1 +642 1 +643 1 +644 1 +645 1 +646 1 +647 1 +648 1 +649 1 +650 1 +651 1 +652 1 +653 1 +654 1 +655 1 +656 1 +657 1 +658 1 +659 1 +660 1 +661 1 +662 1 +663 1 +664 1 +665 1 +666 1 +667 1 +668 1 +669 1 +670 1 +671 1 +672 1 +673 1 +674 1 +675 1 +676 1 +677 1 +678 1 +679 1 +680 1 +681 1 +682 1 +683 1 +684 1 +685 1 +686 1 +687 1 +688 1 +689 1 +690 1 +691 1 +692 1 +693 1 +694 1 +695 1 +696 1 +697 1 +698 1 +699 1 +700 1 +701 1 +702 1 +703 1 +704 1 +705 1 +706 1 +707 1 +708 1 +709 1 +710 1 +711 1 +712 1 +713 1 +714 1 +715 1 +716 1 +717 1 +718 1 +719 1 +720 1 +721 1 +722 1 +723 1 +724 1 +725 1 +726 1 +727 1 +728 1 +729 1 +730 1 +731 1 +732 1 +733 1 +734 1 +735 1 +736 1 +737 1 +738 1 +739 1 +740 1 +741 1 +742 1 +743 1 +744 1 +745 1 +746 1 +747 1 +748 1 +749 1 +750 1 +751 1 +752 1 +753 1 +754 1 +755 1 +756 1 +757 1 +758 1 +759 1 +760 1 +761 1 +762 1 +763 1 +764 1 +765 1 +766 1 +767 1 +768 1 +769 1 +770 1 +771 1 +772 1 +773 1 +774 1 +775 1 +776 1 +777 1 +778 1 +779 1 +780 1 +781 1 +782 1 +783 1 +784 1 +785 1 +786 1 +787 1 +788 1 +789 1 +790 1 +791 1 +792 1 +793 1 +794 1 +795 1 +796 1 +797 1 +798 1 +799 1 +800 1 +801 1 +802 1 +803 1 +804 1 +805 1 +806 1 +807 1 +808 1 +809 1 +810 1 +811 1 +812 1 +813 1 +814 1 +815 1 +816 1 +817 1 +818 1 +819 1 +820 1 +821 1 +822 1 +823 1 +824 1 +825 1 +826 1 +827 1 +828 1 +829 1 +830 1 +831 1 +832 1 +833 1 +834 1 +835 1 +836 1 +837 1 +838 1 +839 1 +840 1 +841 1 +842 1 +843 1 +844 1 +845 1 +846 1 +847 1 +848 1 +849 1 +850 1 +851 1 +852 1 +853 1 +854 1 +855 1 +856 1 +857 1 +858 1 +859 1 +860 1 +861 1 +862 1 +863 1 +864 1 +865 1 +866 1 +867 1 +868 1 +869 1 +870 1 +871 1 +872 1 +873 1 +874 1 +875 1 +876 1 +877 1 +878 1 +879 1 +880 1 +881 1 +882 1 +883 1 +884 1 +885 1 +886 1 +887 1 +888 1 +889 1 +890 1 +891 1 +892 1 +893 1 +894 1 +895 1 +896 1 +897 1 +898 1 +899 1 +900 1 +901 1 +902 1 +903 1 +904 1 +905 1 +906 1 +907 1 +908 1 +909 1 +910 1 +911 1 +912 1 +913 1 +914 1 +915 1 +916 1 +917 1 +918 1 +919 1 +920 1 +921 1 +922 1 +923 1 +924 1 +925 1 +926 1 +927 1 +928 1 +929 1 +930 1 +931 1 +932 1 +933 1 +934 1 +935 1 +936 1 +937 1 +938 1 +939 1 +940 1 +941 1 +942 1 +943 1 +944 1 +945 1 +946 1 +947 1 +948 1 +949 1 +950 1 +951 1 +952 1 +953 1 +954 1 +955 1 +956 1 +957 1 +958 1 +959 1 +960 1 +961 1 +962 1 +963 1 +964 1 +965 1 +966 1 +967 1 +968 1 +969 1 +970 1 +971 1 +972 1 +973 1 +974 1 +975 1 +976 1 +977 1 +978 1 +979 1 +980 1 +981 1 +982 1 +983 1 +984 1 +985 1 +986 1 +987 1 +988 1 +989 1 +990 1 +991 1 +992 1 +993 1 +994 1 +995 1 +996 1 +997 1 +998 1 +999 1 diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..9b48f0a --- /dev/null +++ b/environment.yml @@ -0,0 +1,9 @@ +name: netflips +channels: + - conda-forge + - defaults +dependencies: + - python=3.8 + - pip + - ffmpeg + - ninja \ No newline at end of file diff --git a/fairseq/.github/CODEOWNERS b/fairseq/.github/CODEOWNERS new file mode 100644 index 0000000..b79aa2f --- /dev/null +++ b/fairseq/.github/CODEOWNERS @@ -0,0 +1,21 @@ +# Setting up CODEOWNERS for UST related codebase +# Documentation for open sourced models relevant to UST +examples/speech_to_text @kahne @sravyapopuri388 @jmp84 +examples/speech_to_speech @an918tw @sravyapopuri388 @jmp84 +examples/speech_synthesis @kahne @jmp84 +examples/simultaneous_translation @kahne @jmp84 +examples/speech_text_joint_to_text @yuntang @jmp84 + +# Speech related models relevant to UST +fairseq/models/speech_to_speech @sravyapopuri388 @jmp84 +fairseq/models/speech_to_text @kahne @sravyapopuri388 @jmp84 +fairseq/models/text_to_speech @kahne @jmp84 + +# CONFORMER IMPLEMENTATION +fairseq/modules/conformer_layer.py @sravyapopuri388 @jmp84 +fairseq/modules/espnet_multihead_attention.py @sravyapopuri388 @jmp84 +fairseq/modules/rotary_positional_embedding.py @sravyapopuri388 @jmp84 +fairseq/modules/positional_encoding.py @sravyapopuri388 @jmp84 + +# Machine Translation/NLLB +fairseq/tasks/translation.py @gwenzek diff --git a/fairseq/.github/ISSUE_TEMPLATE.md b/fairseq/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..5c4c449 --- /dev/null +++ b/fairseq/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,3 @@ +## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈 + +Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates. diff --git a/fairseq/.github/ISSUE_TEMPLATE/bug_report.md b/fairseq/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..aa15123 --- /dev/null +++ b/fairseq/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,43 @@ +--- +name: 🐛 Bug Report +about: Submit a bug report to help us improve +labels: 'bug, needs triage' +--- + +## 🐛 Bug + + + +### To Reproduce + +Steps to reproduce the behavior (**always include the command you ran**): + +1. Run cmd '....' +2. See error + + + + +#### Code sample + + +### Expected behavior + + + +### Environment + + - fairseq Version (e.g., 1.0 or main): + - PyTorch Version (e.g., 1.0) + - OS (e.g., Linux): + - How you installed fairseq (`pip`, source): + - Build command you used (if compiling from source): + - Python version: + - CUDA/cuDNN version: + - GPU models and configuration: + - Any other relevant information: + +### Additional context + + diff --git a/fairseq/.github/ISSUE_TEMPLATE/documentation.md b/fairseq/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 0000000..3a6e2e9 --- /dev/null +++ b/fairseq/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,15 @@ +--- +name: 📚 Documentation/Typos +about: Report an issue related to documentation or a typo +labels: 'documentation, needs triage' +--- + +## 📚 Documentation + +For typos and doc fixes, please go ahead and: + +1. Create an issue. +2. Fix the typo. +3. Submit a PR. + +Thanks! diff --git a/fairseq/.github/ISSUE_TEMPLATE/feature_request.md b/fairseq/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..93c8668 --- /dev/null +++ b/fairseq/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,24 @@ +--- +name: 🚀 Feature Request +about: Submit a proposal/request for a new feature +labels: 'enhancement, help wanted, needs triage' +--- + +## 🚀 Feature Request + + +### Motivation + + + +### Pitch + + + +### Alternatives + + + +### Additional context + + diff --git a/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md b/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md new file mode 100644 index 0000000..04f3f15 --- /dev/null +++ b/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md @@ -0,0 +1,33 @@ +--- +name: ❓ Questions/Help +about: If you have questions, please first search existing issues and docs +labels: 'question, needs triage' +--- + +## ❓ Questions and Help + +### Before asking: +1. search the issues. +2. search the docs. + + + +#### What is your question? + +#### Code + + + +#### What have you tried? + +#### What's your environment? + + - fairseq Version (e.g., 1.0 or main): + - PyTorch Version (e.g., 1.0) + - OS (e.g., Linux): + - How you installed fairseq (`pip`, source): + - Build command you used (if compiling from source): + - Python version: + - CUDA/cuDNN version: + - GPU models and configuration: + - Any other relevant information: diff --git a/fairseq/.github/PULL_REQUEST_TEMPLATE.md b/fairseq/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..d005e2d --- /dev/null +++ b/fairseq/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,16 @@ +# Before submitting + +- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements) +- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)? +- [ ] Did you make sure to update the docs? +- [ ] Did you write any new necessary tests? + +## What does this PR do? +Fixes # (issue). + +## PR review +Anyone in the community is free to review the PR once the tests have passed. +If we didn't discuss your PR in Github issues there's a high chance it will not be merged. + +## Did you have fun? +Make sure you had fun coding 🙃 diff --git a/fairseq/.github/stale.yml b/fairseq/.github/stale.yml new file mode 100644 index 0000000..b12867d --- /dev/null +++ b/fairseq/.github/stale.yml @@ -0,0 +1,30 @@ +# Configuration for probot-stale - https://github.com/probot/stale +# Mostly copied from github.com/facebook/react/blob/master/.github/stale.yml +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 90 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 7 +# Issues with these labels will never be considered stale +exemptLabels: + - bug +# Label to use when marking an issue as stale +staleLabel: stale +issues: + # Comment to post when marking an issue as stale. + markComment: > + This issue has been automatically marked as stale. + **If this issue is still affecting you, please leave any comment** (for example, "bump"), and we'll keep it open. + We are sorry that we haven't been able to prioritize it yet. If you have any new additional information, please include it with your comment! + # Comment to post when closing a stale issue. + closeComment: > + Closing this issue after a prolonged period of inactivity. If this issue is still present in the latest release, please create a new issue with up-to-date information. Thank you! +pulls: + # Comment to post when marking a pull request as stale. + markComment: > + This pull request has been automatically marked as stale. + **If this pull request is still relevant, please leave any comment** (for example, "bump"), and we'll keep it open. + We are sorry that we haven't been able to prioritize reviewing it yet. Your contribution is very much appreciated. + # Comment to post when closing a stale pull request. + closeComment: > + Closing this pull request after a prolonged period of inactivity. If this issue is still present in the latest release, please ask for this pull request to be reopened. Thank you! + diff --git a/fairseq/.github/workflows/build.yml b/fairseq/.github/workflows/build.yml new file mode 100644 index 0000000..036233d --- /dev/null +++ b/fairseq/.github/workflows/build.yml @@ -0,0 +1,81 @@ +name: build + +on: + # Trigger the workflow on push to main or any pull request + push: + branches: + - main + pull_request: + +jobs: + build: + + strategy: + max-parallel: 4 + matrix: + platform: [ubuntu-latest, macos-latest] + python-version: [3.8, 3.9] + + runs-on: ${{ matrix.platform }} + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Conditionally install pytorch + if: matrix.platform == 'windows-latest' + run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html + + - name: Install locally + run: | + python -m pip install --upgrade pip + git submodule update --init --recursive + python -m pip install . + + - name: Check installation + working-directory: /tmp + run: python $GITHUB_WORKSPACE/scripts/check_installation.py + + - name: Install optional test requirements + run: | + python -m pip install '.[dev,docs]' + python -m pip install iopath transformers pyarrow + python -m pip install git+https://github.com/facebookresearch/fairscale.git@main + python -m pip install pygit2 pgzip + + - name: Install xformers for Macos + if: matrix.platform == 'macos-latest' + run: | + brew install llvm libomp + CC=/usr/local/opt/llvm/bin/clang CXX=clang++ pip install git+https://github.com/facebookresearch/xformers.git@main + + - name: Install xformers for non-MacOS + if: matrix.platform != 'macos-latest' + run: | + python -m pip install --progress-bar off git+https://github.com/facebookresearch/xformers.git@main + + - name: Lint with black + run: black --check --diff . + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Build doc + run: make singlehtml + working-directory: docs/ + + - name: Run tests + # When installing in non-editable mode, the .so files will be generated in 'site-packages/fairseq'. + # But by default, pytest import machinery will load local fairseq, and won't see the .so. + # Use --import-mode=append to favorize the 'site-packages/fairseq'. + # https://docs.pytest.org/en/7.1.x/explanation/pythonpath.html + run: pytest --import-mode=append -vvv tests/ + diff --git a/fairseq/.github/workflows/depreview.yml b/fairseq/.github/workflows/depreview.yml new file mode 100644 index 0000000..032edde --- /dev/null +++ b/fairseq/.github/workflows/depreview.yml @@ -0,0 +1,14 @@ +name: 'Dependency Review' +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@v4 + - name: Dependency Review + uses: actions/dependency-review-action@v4 diff --git a/fairseq/.github/workflows/release.yml b/fairseq/.github/workflows/release.yml new file mode 100644 index 0000000..241b74b --- /dev/null +++ b/fairseq/.github/workflows/release.yml @@ -0,0 +1,161 @@ +name: Fairseq Release + +on: + workflow_dispatch: + inputs: + name: + description: 'Release Type' + default: 'patch' + required: true + +jobs: + + get_next_version: + runs-on: ubuntu-latest + steps: + - name: checkout-repo-content + uses: actions/checkout@v2 + + - name: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: get next version and tag + id: get-next-version-and-tag + run: | + output=$(python3 release_utils.py --release-type ${{ github.event.inputs.name }}) + echo $output + new_version=$(echo $output | awk '{print $1}') + new_tag=$(echo $output | awk '{print $2}') + echo "new version is $new_version" + echo "new tag is $new_tag" + echo ::set-output name=version::$new_version + echo ::set-output name=tag::$new_tag + echo ::set-output name=branch_name::$new_version-release + echo "NEW_TAG=$new_tag" >> $GITHUB_ENV + echo "NEW_BRANCH=$new_version-release" >> $GITHUB_ENV + + + # update the version number in version.txt + - name: update version + id: update-version + run : | + echo "current folder = $PWD" + echo "current branch = $(git branch --show-current)" + output=$(python3 release_utils.py --release-type ${{ github.event.inputs.name }} --update-version) + + - name: add and commit + uses: EndBug/add-and-commit@v9 + with: + author_name: ${{ secrets.AUTHOR_NAME }} + author_email: ${{ secrets.AUTHOR_EMAIL }} + + # TODO: change this to main once shipit is disabled. + new_branch: '${{ env.NEW_BRANCH }}' + default_author: github_actor + message: '${{ env.NEW_TAG }} release' + pathspec_error_handling: exitAtEnd + + # Arguments for the git pull command. Use NO-PULL to avoid the action pulling at all. + # pull: 'NO-PULL' + tag: '${{ env.NEW_TAG }}' + + outputs: + new_version: ${{ steps.get-next-version-and-tag.outputs.version }} + new_tag: ${{ steps.get-next-version-and-tag.outputs.tag }} + branch_name: ${{ steps.get-next-version-and-tag.outputs.branch_name }} + + create_sdist: + runs-on: ubuntu-latest + name: Create Source Distribution + needs: get_next_version + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ needs.get_next_version.outputs.branch_name }} + + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Upgrade pip + run: | + python3 -m pip install --upgrade pip + + - name: Create Source Distribution + run: | + python3 -m pip install setuptools wheel twine torch + python3 setup.py sdist + + - uses: actions/upload-artifact@v2 + with: + path: dist/*.tar.gz + + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + needs: get_next_version + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ needs.get_next_version.outputs.branch_name }} + + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Upgrade pip + run: | + python3 -m pip install --upgrade pip + + - name: Install cibuildwheel + run: | + python3 -m pip install cibuildwheel + + - name: Build wheels for CPython + run: | + python3 -m cibuildwheel --output-dir dist + env: + CIBW_BUILD: "cp38-*64" + CIBW_MANYLINUX_X86_64_IMAGE: manylinux1 + CIBW_BEFORE_BUILD: git submodule update --init --recursive && pip install . + # Install system library + CIBW_BEFORE_BUILD_LINUX: (yum install -y libffi-devel || apt-get install -y libffi-devel || apk add --update --no-cache libffi-devel || true) && (yum install -y libc6 || apt-get install -y libc6 || apk add --update --no-cache libc6 || true) + CIBW_ENVIRONMENT: "PIP_ONLY_BINARY=numpy" + CIBW_SKIP: "*musllinux*" + + - uses: actions/upload-artifact@v2 + with: + path: dist + + upload: + name: Upload to PyPi and create release + runs-on: ubuntu-latest + needs: [build_wheels, create_sdist, get_next_version] + steps: + - uses: actions/download-artifact@v2 + with: + name: artifact + path: dist + + # build the PyPI package and upload it + - name: upload + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + pip install setuptools wheel twine + python3 -m twine upload --repository pypi dist/* + + # create the release on github + - name: create release on github + uses: ncipollo/release-action@v1 + with: + tag: '${{ needs.get_next_version.outputs.new_tag }}' diff --git a/fairseq/.gitignore b/fairseq/.gitignore new file mode 100644 index 0000000..4be1363 --- /dev/null +++ b/fairseq/.gitignore @@ -0,0 +1,141 @@ +# JetBrains PyCharm IDE +.idea/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# macOS dir files +.DS_Store + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Checkpoints +checkpoints + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Generated files +/fairseq/temporal_convolution_tbc +/fairseq/modules/*_layer/*_forward.cu +/fairseq/modules/*_layer/*_backward.cu +/fairseq/version.py + +# data +data-bin/ + +# reranking +/examples/reranking/rerank_data + +# Cython-generated C++ source files +/fairseq/data/data_utils_fast.cpp +/fairseq/data/token_block_utils_fast.cpp + +# VSCODE +.vscode/ftp-sync.json +.vscode/settings.json + +# Experimental Folder +experimental/* + +# Weights and Biases logs +wandb/ + +# Hydra artifacts +nohup.out +multirun +outputs diff --git a/fairseq/.gitmodules b/fairseq/.gitmodules new file mode 100644 index 0000000..07a55d4 --- /dev/null +++ b/fairseq/.gitmodules @@ -0,0 +1,4 @@ +[submodule "fairseq/model_parallel/megatron"] + path = fairseq/model_parallel/megatron + url = https://github.com/ngoyal2707/Megatron-LM + branch = fairseq diff --git a/fairseq/.pre-commit-config.yaml b/fairseq/.pre-commit-config.yaml new file mode 100644 index 0000000..6b1d6ae --- /dev/null +++ b/fairseq/.pre-commit-config.yaml @@ -0,0 +1,40 @@ +exclude: 'build|stubs' + +default_language_version: + python: python3 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + - id: check-ast + - id: check-merge-conflict + - id: no-commit-to-branch + args: ['--branch=master'] + - id: check-added-large-files + args: ['--maxkb=500'] + - id: end-of-file-fixer + +- repo: https://github.com/ambv/black + rev: 22.3.0 + hooks: + - id: black + language_version: python3.8 + +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + args: [ + # only error for syntax errors and undefined names + "--select=E9,F63,F7,F82", + ] + +- repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + exclude: README.md + additional_dependencies: [toml] + args: ["--profile", "black"] diff --git a/fairseq/CODE_OF_CONDUCT.md b/fairseq/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..a0cbeaa --- /dev/null +++ b/fairseq/CODE_OF_CONDUCT.md @@ -0,0 +1,77 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq + diff --git a/fairseq/CONTRIBUTING.md b/fairseq/CONTRIBUTING.md new file mode 100644 index 0000000..60e9025 --- /dev/null +++ b/fairseq/CONTRIBUTING.md @@ -0,0 +1,82 @@ +# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq) +We want to make contributing to this project as easy and transparent as +possible. + +## Pull Requests +We actively welcome your pull requests. + +1. Fork the repo and create your branch from `main`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +## Contributor License Agreement ("CLA") +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +## License +By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq), +you agree that your contributions will be licensed under the LICENSE file in +the root directory of this source tree. + +## Pre-commit hooks +In order to ensure your code lints, there are pre-commit hooks configured in the repository which you can install. +After installation, they will automatically run each time you commit. +An abbreviated guide is given below; for more information, refer to [the offical pre-commit documentation](https://pre-commit.com/). + +### Installation +``` +pip install pre-commit +pre-commit install +``` + +### Usage +Just commit your changes: +``` +git commit -m "My informative commit message" +``` + +If there was a failure, you will get feedback +``` +[INFO] Initializing environment for https://github.com/PyCQA/flake8. +[INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks. +[INFO] Once installed this environment will be reused. +[INFO] This may take a few minutes... +[INFO] Installing environment for https://github.com/PyCQA/flake8. +[INFO] Once installed this environment will be reused. +[INFO] This may take a few minutes... +Trim Trailing Whitespace.................................................Failed +- hook id: trailing-whitespace +- exit code: 1 +- files were modified by this hook +Fixing examples/nllb/modeling/wmt15_benchmark/eval_langs2.sh +Fix End of Files.........................................................Failed +- hook id: end-of-file-fixer +- exit code: 1 +- files were modified by this hook +Fixing examples/few_shot/scripts/schedule_jobs_few_shot.py +flake8...................................................................Passed +``` + +Certain hooks modify your files to comply. +To include these modifications, you will need to add them (i.e. `git add ...`) and commit again. + +If all is well, you should see something like: +``` +Trim Trailing Whitespace.................................................Passed +Fix End of Files.........................................................Passed +flake8...................................................................Passed +[gshard-fix-ci 8698644e1] Fix lint, add pre-commit hooks + 10 files changed, 148 insertions(+), 110 deletions(-) + create mode 100644 .flake8 + create mode 100644 .pre-commit-config.yaml + rename examples/nllb/modeling/wmt15_benchmark/{eval_langs2.py => eval_langs2.sh} (99%) + ``` diff --git a/fairseq/LICENSE b/fairseq/LICENSE new file mode 100644 index 0000000..b96dcb0 --- /dev/null +++ b/fairseq/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Facebook, Inc. and its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/fairseq/MANIFEST.in b/fairseq/MANIFEST.in new file mode 100644 index 0000000..4f719da --- /dev/null +++ b/fairseq/MANIFEST.in @@ -0,0 +1 @@ +include fairseq/version.txt diff --git a/fairseq/README.md b/fairseq/README.md new file mode 100644 index 0000000..1150c66 --- /dev/null +++ b/fairseq/README.md @@ -0,0 +1,242 @@ +

+ +
+
+ Support Ukraine + MIT License + Latest Release + Build Status + Documentation Status + CicleCI Status +

+ +-------------------------------------------------------------------------------- + +Fairseq(-py) is a sequence modeling toolkit that allows researchers and +developers to train custom models for translation, summarization, language +modeling and other text generation tasks. + +We provide reference implementations of various sequence modeling papers: + +
List of implemented papers

+ +* **Convolutional Neural Networks (CNN)** + + [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/conv_lm/README.md) + + [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md) + + [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel) + + [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md) + + [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md) +* **LightConv and DynamicConv models** + + [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md) +* **Long Short-Term Memory (LSTM) networks** + + Effective Approaches to Attention-based Neural Machine Translation (Luong et al., 2015) +* **Transformer (self-attention) networks** + + Attention Is All You Need (Vaswani et al., 2017) + + [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md) + + [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md) + + [Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018)](examples/language_model/README.adaptive_inputs.md) + + [Lexically constrained decoding with dynamic beam allocation (Post & Vilar, 2018)](examples/constrained_decoding/README.md) + + [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context (Dai et al., 2019)](examples/truncated_bptt/README.md) + + [Adaptive Attention Span in Transformers (Sukhbaatar et al., 2019)](examples/adaptive_span/README.md) + + [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md) + + [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md) + + [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md) + + [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md ) + + [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md) + + [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md) + + [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md) + + [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md) + + [Generating Medical Reports from Patient-Doctor Conversations Using Sequence-to-Sequence Models (Enarvi et al., 2020)](examples/pointer_generator/README.md) + + [Linformer: Self-Attention with Linear Complexity (Wang et al., 2020)](examples/linformer/README.md) + + [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md) + + [Deep Transformers with Latent Depth (Li et al., 2020)](examples/latent_depth/README.md) + + [Unsupervised Cross-lingual Representation Learning for Speech Recognition (Conneau et al., 2020)](https://arxiv.org/abs/2006.13979) + + [Self-training and Pre-training are Complementary for Speech Recognition (Xu et al., 2020)](https://arxiv.org/abs/2010.11430) + + [Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training (Hsu, et al., 2021)](https://arxiv.org/abs/2104.01027) + + [Unsupervised Speech Recognition (Baevski, et al., 2021)](https://arxiv.org/abs/2105.11084) + + [Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al., 2021)](https://arxiv.org/abs/2109.11680) + + [VideoCLIP: Contrastive Pre-training for Zero-shot Video-Text Understanding (Xu et. al., 2021)](https://arxiv.org/pdf/2109.14084.pdf) + + [VLM: Task-agnostic Video-Language Model Pre-training for Video Understanding (Xu et. al., 2021)](https://aclanthology.org/2021.findings-acl.370.pdf) + + [NormFormer: Improved Transformer Pretraining with Extra Normalization (Shleifer et. al, 2021)](examples/normformer/README.md) +* **Non-autoregressive Transformers** + + Non-Autoregressive Neural Machine Translation (Gu et al., 2017) + + Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement (Lee et al. 2018) + + Insertion Transformer: Flexible Sequence Generation via Insertion Operations (Stern et al. 2019) + + Mask-Predict: Parallel Decoding of Conditional Masked Language Models (Ghazvininejad et al., 2019) + + [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md) +* **Finetuning** + + [Better Fine-Tuning by Reducing Representational Collapse (Aghajanyan et al. 2020)](examples/rxf/README.md) + +

+ +### What's New: +* May 2023 [Released models for Scaling Speech Technology to 1,000+ Languages (Pratap, et al., 2023)](examples/mms/README.md) +* June 2022 [Released code for wav2vec-U 2.0 from Towards End-to-end Unsupervised Speech Recognition (Liu, et al., 2022)](examples/wav2vec/unsupervised/README.md) +* May 2022 [Integration with xFormers](https://github.com/facebookresearch/xformers) +* December 2021 [Released Direct speech-to-speech translation code](examples/speech_to_speech/README.md) +* October 2021 [Released VideoCLIP and VLM models](examples/MMPT/README.md) +* October 2021 [Released multilingual finetuned XLSR-53 model](examples/wav2vec/README.md) +* September 2021 [`master` branch renamed to `main`](https://github.com/github/renaming). +* July 2021 [Released DrNMT code](examples/discriminative_reranking_nmt/README.md) +* July 2021 [Released Robust wav2vec 2.0 model](examples/wav2vec/README.md) +* June 2021 [Released XLMR-XL and XLMR-XXL models](examples/xlmr/README.md) +* May 2021 [Released Unsupervised Speech Recognition code](examples/wav2vec/unsupervised/README.md) +* March 2021 [Added full parameter and optimizer state sharding + CPU offloading](examples/fully_sharded_data_parallel/README.md) +* February 2021 [Added LASER training code](examples/laser/README.md) +* December 2020: [Added Adaptive Attention Span code](examples/adaptive_span/README.md) +* December 2020: [GottBERT model and code released](examples/gottbert/README.md) +* November 2020: Adopted the [Hydra](https://github.com/facebookresearch/hydra) configuration framework + * [see documentation explaining how to use it for new and existing projects](docs/hydra_integration.md) +* November 2020: [fairseq 0.10.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.10.0) +* October 2020: [Added R3F/R4F (Better Fine-Tuning) code](examples/rxf/README.md) +* October 2020: [Deep Transformer with Latent Depth code released](examples/latent_depth/README.md) +* October 2020: [Added CRISS models and code](examples/criss/README.md) + +
Previous updates

+ +* September 2020: [Added Linformer code](examples/linformer/README.md) +* September 2020: [Added pointer-generator networks](examples/pointer_generator/README.md) +* August 2020: [Added lexically constrained decoding](examples/constrained_decoding/README.md) +* August 2020: [wav2vec2 models and code released](examples/wav2vec/README.md) +* July 2020: [Unsupervised Quality Estimation code released](examples/unsupervised_quality_estimation/README.md) +* May 2020: [Follow fairseq on Twitter](https://twitter.com/fairseq) +* April 2020: [Monotonic Multihead Attention code released](examples/simultaneous_translation/README.md) +* April 2020: [Quant-Noise code released](examples/quant_noise/README.md) +* April 2020: [Initial model parallel support and 11B parameters unidirectional LM released](examples/megatron_11b/README.md) +* March 2020: [Byte-level BPE code released](examples/byte_level_bpe/README.md) +* February 2020: [mBART model and code released](examples/mbart/README.md) +* February 2020: [Added tutorial for back-translation](https://github.com/pytorch/fairseq/tree/main/examples/backtranslation#training-your-own-model-wmt18-english-german) +* December 2019: [fairseq 0.9.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.9.0) +* November 2019: [VizSeq released (a visual analysis toolkit for evaluating fairseq models)](https://facebookresearch.github.io/vizseq/docs/getting_started/fairseq_example) +* November 2019: [CamemBERT model and code released](examples/camembert/README.md) +* November 2019: [BART model and code released](examples/bart/README.md) +* November 2019: [XLM-R models and code released](examples/xlmr/README.md) +* September 2019: [Nonautoregressive translation code released](examples/nonautoregressive_translation/README.md) +* August 2019: [WMT'19 models released](examples/wmt19/README.md) +* July 2019: fairseq relicensed under MIT license +* July 2019: [RoBERTa models and code released](examples/roberta/README.md) +* June 2019: [wav2vec models and code released](examples/wav2vec/README.md) + +

+ +### Features: + +* multi-GPU training on one machine or across multiple machines (data and model parallel) +* fast generation on both CPU and GPU with multiple search algorithms implemented: + + beam search + + Diverse Beam Search ([Vijayakumar et al., 2016](https://arxiv.org/abs/1610.02424)) + + sampling (unconstrained, top-k and top-p/nucleus) + + [lexically constrained decoding](examples/constrained_decoding/README.md) (Post & Vilar, 2018) +* [gradient accumulation](https://fairseq.readthedocs.io/en/latest/getting_started.html#large-mini-batch-training-with-delayed-updates) enables training with large mini-batches even on a single GPU +* [mixed precision training](https://fairseq.readthedocs.io/en/latest/getting_started.html#training-with-half-precision-floating-point-fp16) (trains faster with less GPU memory on [NVIDIA tensor cores](https://developer.nvidia.com/tensor-cores)) +* [extensible](https://fairseq.readthedocs.io/en/latest/overview.html): easily register new models, criterions, tasks, optimizers and learning rate schedulers +* [flexible configuration](docs/hydra_integration.md) based on [Hydra](https://github.com/facebookresearch/hydra) allowing a combination of code, command-line and file based configuration +* [full parameter and optimizer state sharding](examples/fully_sharded_data_parallel/README.md) +* [offloading parameters to CPU](examples/fully_sharded_data_parallel/README.md) + +We also provide [pre-trained models for translation and language modeling](#pre-trained-models-and-examples) +with a convenient `torch.hub` interface: + +``` python +en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model') +en2de.translate('Hello world', beam=5) +# 'Hallo Welt' +``` + +See the PyTorch Hub tutorials for [translation](https://pytorch.org/hub/pytorch_fairseq_translation/) +and [RoBERTa](https://pytorch.org/hub/pytorch_fairseq_roberta/) for more examples. + +# Requirements and Installation + +* [PyTorch](http://pytorch.org/) version >= 1.10.0 +* Python version >= 3.8 +* For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl) +* **To install fairseq** and develop locally: + +``` bash +git clone https://github.com/pytorch/fairseq +cd fairseq +pip install --editable ./ + +# on MacOS: +# CFLAGS="-stdlib=libc++" pip install --editable ./ + +# to install the latest stable release (0.10.x) +# pip install fairseq +``` + +* **For faster training** install NVIDIA's [apex](https://github.com/NVIDIA/apex) library: + +``` bash +git clone https://github.com/NVIDIA/apex +cd apex +pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" \ + --global-option="--deprecated_fused_adam" --global-option="--xentropy" \ + --global-option="--fast_multihead_attn" ./ +``` + +* **For large datasets** install [PyArrow](https://arrow.apache.org/docs/python/install.html#using-pip): `pip install pyarrow` +* If you use Docker make sure to increase the shared memory size either with `--ipc=host` or `--shm-size` + as command line options to `nvidia-docker run` . + +# Getting Started + +The [full documentation](https://fairseq.readthedocs.io/) contains instructions +for getting started, training new models and extending fairseq with new model +types and tasks. + +# Pre-trained models and examples + +We provide pre-trained models and pre-processed, binarized test sets for several tasks listed below, +as well as example training and evaluation commands. + +* [Translation](examples/translation/README.md): convolutional and transformer models are available +* [Language Modeling](examples/language_model/README.md): convolutional and transformer models are available + +We also have more detailed READMEs to reproduce results from specific papers: + +* [XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale (Babu et al., 2021)](examples/wav2vec/xlsr/README.md) +* [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md) +* [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md) +* [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md) +* [Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)](examples/quant_noise/README.md) +* [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md) +* [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md) +* [Reducing Transformer Depth on Demand with Structured Dropout (Fan et al., 2019)](examples/layerdrop/README.md) +* [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md) +* [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md) +* [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md) +* [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md) +* [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md) +* [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md) +* [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md) +* [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md) +* [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel) +* [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md) +* [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md) +* [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md) +* [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/README.conv.md) + +# Join the fairseq community + +* Twitter: https://twitter.com/fairseq +* Facebook page: https://www.facebook.com/groups/fairseq.users +* Google group: https://groups.google.com/forum/#!forum/fairseq-users + +# License + +fairseq(-py) is MIT-licensed. +The license applies to the pre-trained models as well. + +# Citation + +Please cite as: + +``` bibtex +@inproceedings{ott2019fairseq, + title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, + author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, + booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations}, + year = {2019}, +} +``` diff --git a/fairseq/RELEASE.md b/fairseq/RELEASE.md new file mode 100644 index 0000000..79480a1 --- /dev/null +++ b/fairseq/RELEASE.md @@ -0,0 +1,13 @@ +# Creating a New Release + +In order to create a new release: + +1. Navigate to the [Fairseq Workflows](https://github.com/facebookresearch/fairseq/actions) and find the one named _Fairseq Release_. + +2. Under _Run Workflow_ choose the branch `main` and for _Release Type_ enter either `major`, `minor`, or `patch`. + +3. A branch named `$new_version-release` will be created where the `version.txt` file is updated. Merge those changes into `main`. + +4. Make sure that a [new PYPI package](https://pypi.org/project/fairseq/) has been uploaded. + +5. Make sure that a [new github release](https://github.com/facebookresearch/fairseq/releases) has been created. diff --git a/fairseq/docs/Makefile b/fairseq/docs/Makefile new file mode 100644 index 0000000..c2f5b1a --- /dev/null +++ b/fairseq/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = fairseq +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/fairseq/docs/command_line_tools.rst b/fairseq/docs/command_line_tools.rst new file mode 100644 index 0000000..c16300f --- /dev/null +++ b/fairseq/docs/command_line_tools.rst @@ -0,0 +1,85 @@ +.. _Command-line Tools: + +Command-line Tools +================== + +Fairseq provides several command-line tools for training and evaluating models: + +- :ref:`fairseq-preprocess`: Data pre-processing: build vocabularies and binarize training data +- :ref:`fairseq-train`: Train a new model on one or multiple GPUs +- :ref:`fairseq-generate`: Translate pre-processed data with a trained model +- :ref:`fairseq-interactive`: Translate raw text with a trained model +- :ref:`fairseq-score`: BLEU scoring of generated translations against reference translations +- :ref:`fairseq-eval-lm`: Language model evaluation + + +.. _fairseq-preprocess: + +fairseq-preprocess +~~~~~~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.preprocess + + .. argparse:: + :module: fairseq.options + :func: get_preprocessing_parser + :prog: fairseq-preprocess + + +.. _fairseq-train: + +fairseq-train +~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.train + + .. argparse:: + :module: fairseq.options + :func: get_training_parser + :prog: fairseq-train + + +.. _fairseq-generate: + +fairseq-generate +~~~~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.generate + + .. argparse:: + :module: fairseq.options + :func: get_generation_parser + :prog: fairseq-generate + + +.. _fairseq-interactive: + +fairseq-interactive +~~~~~~~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.interactive + + .. argparse:: + :module: fairseq.options + :func: get_interactive_generation_parser + :prog: fairseq-interactive + + +.. _fairseq-score: + +fairseq-score +~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.score + + .. argparse:: + :module: fairseq_cli.score + :func: get_parser + :prog: fairseq-score + + +.. _fairseq-eval-lm: + +fairseq-eval-lm +~~~~~~~~~~~~~~~ +.. automodule:: fairseq_cli.eval_lm + + .. argparse:: + :module: fairseq.options + :func: get_eval_lm_parser + :prog: fairseq-eval-lm diff --git a/fairseq/docs/conf.py b/fairseq/docs/conf.py new file mode 100644 index 0000000..0bc049f --- /dev/null +++ b/fairseq/docs/conf.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# fairseq documentation build configuration file, created by +# sphinx-quickstart on Fri Aug 17 21:45:30 2018. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +import os +import sys +from fairseq import __version__ + + +# source code directory, relative to this file, for sphinx-autobuild +sys.path.insert(0, os.path.abspath("..")) + +source_suffix = [".rst"] + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinxarg.ext", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "fairseq" +copyright = "Facebook AI Research (FAIR)" +author = "Facebook AI Research (FAIR)" + +github_doc_root = "https://github.com/pytorch/fairseq/tree/main/docs/" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = __version__ +# The full version, including alpha/beta/rc tags. +release = __version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" +highlight_language = "python" + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +html_theme = "classic" + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + "numpy": ("http://docs.scipy.org/doc/numpy/", None), + "python": ("https://docs.python.org/", None), + "torch": ("https://pytorch.org/docs/master/", None), +} diff --git a/fairseq/docs/criterions.rst b/fairseq/docs/criterions.rst new file mode 100644 index 0000000..d6b8ca6 --- /dev/null +++ b/fairseq/docs/criterions.rst @@ -0,0 +1,31 @@ +.. role:: hidden + :class: hidden-section + +.. _Criterions: + +Criterions +========== + +Criterions compute the loss function given the model and batch, roughly:: + + loss = criterion(model, batch) + +.. automodule:: fairseq.criterions + :members: + +.. autoclass:: fairseq.criterions.FairseqCriterion + :members: + :undoc-members: + +.. autoclass:: fairseq.criterions.adaptive_loss.AdaptiveLoss + :members: + :undoc-members: +.. autoclass:: fairseq.criterions.composite_loss.CompositeLoss + :members: + :undoc-members: +.. autoclass:: fairseq.criterions.cross_entropy.CrossEntropyCriterion + :members: + :undoc-members: +.. autoclass:: fairseq.criterions.label_smoothed_cross_entropy.LabelSmoothedCrossEntropyCriterion + :members: + :undoc-members: diff --git a/fairseq/docs/data.rst b/fairseq/docs/data.rst new file mode 100644 index 0000000..6a390cb --- /dev/null +++ b/fairseq/docs/data.rst @@ -0,0 +1,58 @@ +.. role:: hidden + :class: hidden-section + +.. module:: fairseq.data + +Data Loading and Utilities +========================== + +.. _datasets: + +Datasets +-------- + +**Datasets** define the data format and provide helpers for creating +mini-batches. + +.. autoclass:: fairseq.data.FairseqDataset + :members: +.. autoclass:: fairseq.data.LanguagePairDataset + :members: +.. autoclass:: fairseq.data.MonolingualDataset + :members: + +**Helper Datasets** + +These datasets wrap other :class:`fairseq.data.FairseqDataset` instances and +provide additional functionality: + +.. autoclass:: fairseq.data.BacktranslationDataset + :members: +.. autoclass:: fairseq.data.ConcatDataset + :members: +.. autoclass:: fairseq.data.ResamplingDataset + :members: +.. autoclass:: fairseq.data.RoundRobinZipDatasets + :members: +.. autoclass:: fairseq.data.TransformEosDataset + :members: + + +Dictionary +---------- + +.. autoclass:: fairseq.data.Dictionary + :members: + + +Iterators +--------- + +.. autoclass:: fairseq.data.CountingIterator + :members: +.. autoclass:: fairseq.data.EpochBatchIterator + :members: +.. autoclass:: fairseq.data.GroupedIterator + :members: +.. autoclass:: fairseq.data.ShardedIterator + :members: diff --git a/fairseq/docs/docutils.conf b/fairseq/docs/docutils.conf new file mode 100644 index 0000000..526acff --- /dev/null +++ b/fairseq/docs/docutils.conf @@ -0,0 +1,2 @@ +[writers] +option-limit=0 diff --git a/fairseq/docs/fairseq.gif b/fairseq/docs/fairseq.gif new file mode 100644 index 0000000..5782fdb Binary files /dev/null and b/fairseq/docs/fairseq.gif differ diff --git a/fairseq/docs/getting_started.rst b/fairseq/docs/getting_started.rst new file mode 100644 index 0000000..09cc21e --- /dev/null +++ b/fairseq/docs/getting_started.rst @@ -0,0 +1,230 @@ +Evaluating Pre-trained Models +============================= + +First, download a pre-trained model along with its vocabularies: + +.. code-block:: console + + > curl https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2 | tar xvjf - + +This model uses a `Byte Pair Encoding (BPE) +vocabulary `__, so we'll have to apply +the encoding to the source text before it can be translated. This can be +done with the +`apply\_bpe.py `__ +script using the ``wmt14.en-fr.fconv-cuda/bpecodes`` file. ``@@`` is +used as a continuation marker and the original text can be easily +recovered with e.g. ``sed s/@@ //g`` or by passing the ``--remove-bpe`` +flag to :ref:`fairseq-generate`. Prior to BPE, input text needs to be tokenized +using ``tokenizer.perl`` from +`mosesdecoder `__. + +Let's use :ref:`fairseq-interactive` to generate translations interactively. +Here, we use a beam size of 5 and preprocess the input with the Moses +tokenizer and the given Byte-Pair Encoding vocabulary. It will automatically +remove the BPE continuation markers and detokenize the output. + +.. code-block:: console + + > MODEL_DIR=wmt14.en-fr.fconv-py + > fairseq-interactive \ + --path $MODEL_DIR/model.pt $MODEL_DIR \ + --beam 5 --source-lang en --target-lang fr \ + --tokenizer moses \ + --bpe subword_nmt --bpe-codes $MODEL_DIR/bpecodes + | loading model(s) from wmt14.en-fr.fconv-py/model.pt + | [en] dictionary: 44206 types + | [fr] dictionary: 44463 types + | Type the input sentence and press return: + Why is it rare to discover new marine mammal species? + S-0 Why is it rare to discover new marine mam@@ mal species ? + H-0 -0.0643349438905716 Pourquoi est-il rare de découvrir de nouvelles espèces de mammifères marins? + P-0 -0.0763 -0.1849 -0.0956 -0.0946 -0.0735 -0.1150 -0.1301 -0.0042 -0.0321 -0.0171 -0.0052 -0.0062 -0.0015 + +This generation script produces three types of outputs: a line prefixed +with *O* is a copy of the original source sentence; *H* is the +hypothesis along with an average log-likelihood; and *P* is the +positional score per token position, including the +end-of-sentence marker which is omitted from the text. + +Other types of output lines you might see are *D*, the detokenized hypothesis, +*T*, the reference target, *A*, alignment info, *E* the history of generation steps. + +See the `README `__ for a +full list of pre-trained models available. + +Training a New Model +==================== + +The following tutorial is for machine translation. For an example of how +to use Fairseq for other tasks, such as :ref:`language modeling`, please see the +``examples/`` directory. + +Data Pre-processing +------------------- + +Fairseq contains example pre-processing scripts for several translation +datasets: IWSLT 2014 (German-English), WMT 2014 (English-French) and WMT +2014 (English-German). To pre-process and binarize the IWSLT dataset: + +.. code-block:: console + + > cd examples/translation/ + > bash prepare-iwslt14.sh + > cd ../.. + > TEXT=examples/translation/iwslt14.tokenized.de-en + > fairseq-preprocess --source-lang de --target-lang en \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/iwslt14.tokenized.de-en + +This will write binarized data that can be used for model training to +``data-bin/iwslt14.tokenized.de-en``. + +Training +-------- + +Use :ref:`fairseq-train` to train a new model. Here a few example settings that work +well for the IWSLT 2014 dataset: + +.. code-block:: console + + > mkdir -p checkpoints/fconv + > CUDA_VISIBLE_DEVICES=0 fairseq-train data-bin/iwslt14.tokenized.de-en \ + --optimizer nag --lr 0.25 --clip-norm 0.1 --dropout 0.2 --max-tokens 4000 \ + --arch fconv_iwslt_de_en --save-dir checkpoints/fconv + +By default, :ref:`fairseq-train` will use all available GPUs on your machine. Use the +``CUDA_VISIBLE_DEVICES`` environment variable to select specific GPUs and/or to +change the number of GPU devices that will be used. + +Also note that the batch size is specified in terms of the maximum +number of tokens per batch (``--max-tokens``). You may need to use a +smaller value depending on the available GPU memory on your system. + +Generation +---------- + +Once your model is trained, you can generate translations using +:ref:`fairseq-generate` **(for binarized data)** or +:ref:`fairseq-interactive` **(for raw text)**: + +.. code-block:: console + + > fairseq-generate data-bin/iwslt14.tokenized.de-en \ + --path checkpoints/fconv/checkpoint_best.pt \ + --batch-size 128 --beam 5 + | [de] dictionary: 35475 types + | [en] dictionary: 24739 types + | data-bin/iwslt14.tokenized.de-en test 6750 examples + | model fconv + | loaded checkpoint trainings/fconv/checkpoint_best.pt + S-721 danke . + T-721 thank you . + ... + +To generate translations with only a CPU, use the ``--cpu`` flag. BPE +continuation markers can be removed with the ``--remove-bpe`` flag. + +Advanced Training Options +========================= + +Large mini-batch training with delayed updates +---------------------------------------------- + +The ``--update-freq`` option can be used to accumulate gradients from +multiple mini-batches and delay updating, creating a larger effective +batch size. Delayed updates can also improve training speed by reducing +inter-GPU communication costs and by saving idle time caused by variance +in workload across GPUs. See `Ott et al. +(2018) `__ for more details. + +To train on a single GPU with an effective batch size that is equivalent +to training on 8 GPUs: + +.. code-block:: console + + > CUDA_VISIBLE_DEVICES=0 fairseq-train --update-freq 8 (...) + +Training with half precision floating point (FP16) +-------------------------------------------------- + +.. note:: + + FP16 training requires a Volta GPU and CUDA 9.1 or greater + +Recent GPUs enable efficient half precision floating point computation, +e.g., using `Nvidia Tensor Cores +`__. +Fairseq supports FP16 training with the ``--fp16`` flag: + +.. code-block:: console + + > fairseq-train --fp16 (...) + +Distributed training +-------------------- + +Distributed training in fairseq is implemented on top of ``torch.distributed``. +The easiest way to launch jobs is with the `torch.distributed.launch +`__ tool. + +For example, to train a large English-German Transformer model on 2 nodes each +with 8 GPUs (in total 16 GPUs), run the following command on each node, +replacing ``node_rank=0`` with ``node_rank=1`` on the second node and making +sure to update ``--master_addr`` to the IP address of the first node: + +.. code-block:: console + + > python -m torch.distributed.launch --nproc_per_node=8 \ + --nnodes=2 --node_rank=0 --master_addr="192.168.1.1" \ + --master_port=12345 \ + $(which fairseq-train) data-bin/wmt16_en_de_bpe32k \ + --arch transformer_vaswani_wmt_en_de_big --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates 4000 \ + --lr 0.0005 \ + --dropout 0.3 --weight-decay 0.0 --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --max-tokens 3584 \ + --max-epoch 70 \ + --fp16 + +On SLURM clusters, fairseq will automatically detect the number of nodes and +GPUs, but a port number must be provided: + +.. code-block:: console + + > salloc --gpus=16 --nodes 2 (...) + > srun fairseq-train --distributed-port 12345 (...). + + +.. warning:: + + PyTorch Distributed features used in fairseq are intended for internal + communication only. They are not built for use in untrusted environments or + networks. + + For performance reasons, none of the PyTorch Distributed primitives include + any authorization protocol and will send messages unencrypted. They accept + connections from anywhere, and execute the workload sent without performing + any checks. Therefore, if you run a distributed fairseq job on your network, + anybody with access to the network can execute arbitrary code with the + privileges of the user running the job. + +Sharding very large datasets +---------------------------- + +It can be challenging to train over very large datasets, particularly if your +machine does not have much system RAM. Most tasks in fairseq support training +over "sharded" datasets, in which the original dataset has been preprocessed +into non-overlapping chunks (or "shards"). + +For example, instead of preprocessing all your data into a single "data-bin" +directory, you can split the data and create "data-bin1", "data-bin2", etc. +Then you can adapt your training command like so: + +.. code-block:: console + + > fairseq-train data-bin1:data-bin2:data-bin3 (...) + +Training will now iterate over each shard, one by one, with each shard +corresponding to an "epoch", thus reducing system memory usage. diff --git a/fairseq/docs/hydra_integration.md b/fairseq/docs/hydra_integration.md new file mode 100644 index 0000000..6a15298 --- /dev/null +++ b/fairseq/docs/hydra_integration.md @@ -0,0 +1,284 @@ +## Hydra + +[Hydra](https://github.com/facebookresearch/hydra) is an open-source Python +framework that simplifies the development of research and other complex +applications. The key feature is the ability to dynamically create a +hierarchical configuration by composition and override it through config files +and the command line. The name Hydra comes from its ability to run multiple +similar jobs - much like a Hydra with multiple heads. + +## Motivation + +Until recently, all components in fairseq were configured through a shared +`args` namespace that was created at application startup. Components declared +their own `add_args` method to update the argparse parser, hoping that the names +would not clash with arguments from other components. While this model works for +smaller applications, as fairseq grew and became integrated into other +applications, this became problematic. In order to determine how to configure +each component, one needed to a) examine what args were added by this component, +and b) read the code to figure out what shared arguments it is using that were +added in other places. Reproducing models involved sharing commands that often +contained dozens of command line switches. + +The model described above is still supported by fairseq for backward +compatibility, but will be deprecated some time in the future. + +New components in fairseq should now create a dataclass that encapsulates all +parameters required to configure this component. The dataclass is registered +along with the component, and fairseq takes care of constructing and providing +this configuration object to the component's constructor. Note that sharing +parameters can optionally still work, but one has to explicitly point to the +"source of truth" (see inheritance example below). These changes make components +in fairseq more independent and re-usable by other applications: all that is +needed to create a component is to initialize its dataclass and overwrite some +of the defaults. + +While configuring fairseq through command line (using either the legacy argparse +based or the new Hydra based entry points) is still fully supported, you can now +take advantage of configuring fairseq completely or piece-by-piece through +hierarchical YAML configuration files. These files can also be shipped as +examples that others can use to run an identically configured job. + +Additionally, Hydra has a rich and growing [library of +plugins](https://github.com/facebookresearch/hydra/tree/master/plugins) that +provide functionality such as hyperparameter sweeping (including using bayesian +optimization through the [Ax](https://github.com/facebook/Ax) library), job +launching across various platforms, and more. + +## Creating or migrating components + +In general, each new (or updated) component should provide a companion +[dataclass](https://www.python.org/dev/peps/pep-0557/). These dataclass are +typically located in the same file as the component and are passed as arguments +to the `register_*()` functions. Top-level configs that should be present in +every fairseq application are placed in the +[global](fairseq/dataclass/configs.py) config file and added to the +`FairseqConfig` object. + +Each dataclass is a plain-old-data object, similar to a `NamedTuple`. These +classes are decorated with a `@dataclass` decorator, and typically inherit from +`FairseqDataclass` (which adds some functionality for backward compatibility). +Each field must have a type, and generally has metadata (such as a help string) +and a default value. Only primitive types or other config objects are allowed as +data types for each field. + +#### Example: + +```python +from dataclasses import dataclass, field +from fairseq.dataclass import FairseqDataclass + +@dataclass +class InteractiveConfig(FairseqDataclass): + buffer_size: int = field( + default=0, + metadata={ + "help": "read this many sentences into a buffer before processing them" + }, + ) + input: str = field( + default="-", + metadata={"help": "file to read from; use - for stdin"}, + ) +``` + +### Inherting values + +Some components require sharing a value. For example, a learning rate scheduler +and an optimizer may both need to know the initial learning rate value. One can +declare a field that, by default, will inherit its value from another config +node in the same hierarchy: + +```python +@dataclass +FairseqAdamConfig(FairseqDataclass): + ... + lr: List[float] = II("optimization.lr") + ... +``` + +`II("optimization.lr")` is syntactic sugar for `"${optimization.lr}"`, which is +the value one can use in a YAML config file or through command line to achieve +the same effect. Note that this assumes that there is an "optimization" config +object in the root config and it has a field called "lr". + +### Tasks and Models + +Creating Tasks and Models works same as before, except that legacy +implementations now inherit from `LegacyFairseq*` base classes, while new +components inherit from `FairseqTask` and `FairseqModel` and provide a dataclass +to the `register_*()` functions. + +#### Task example: + +```python +@dataclass +class LanguageModelingConfig(FairseqDataclass): + data: Optional[str] = field( + default=None, metadata={"help": "path to data directory"} + ) + ... + +@register_task("language_modeling", dataclass=LanguageModelingConfig) +class LanguageModelingTask(FairseqTask): + ... + @classmethod + def setup_task(cls, cfg: LanguageModelingConfig): + ... +``` + +#### Model example: + +```python +@dataclass +class TransformerLanguageModelConfig(FairseqDataclass): + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="relu", metadata={"help": "activation function to use"} + ) + dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) + ... + +@register_model("transformer_lm", dataclass=TransformerLanguageModelConfig) +class TransformerLanguageModel(FairseqLanguageModel): + ... + @classmethod + def build_model(cls, cfg: TransformerLanguageModelConfig, task: FairseqTask): + ... +``` + +### Other components + +Other components work as before, but they now take their configuration dataclass +as the only constructor argument: + +```python +@dataclass +class MosesTokenizerConfig(FairseqDataclass): + source_lang: str = field(default="en", metadata={"help": "source language"}) + ... + +@register_tokenizer("moses", dataclass=MosesTokenizerConfig) +class MosesTokenizer(object): + def __init__(self, cfg: MosesTokenizerConfig): + ... +``` + +Note that if you are adding a new registry for a new set of components, you need +to add it to the `FairseqConfig` object in `fairseq/dataclass/configs.py`: + +```python +@dataclass +class FairseqConfig(object): + ... + my_new_registry: Any = None +``` + +## Training with `fairseq-hydra-train` + +To fully take advantage of configuration flexibility offered by Hydra, you may +want to train new models using the `fairseq-hydra-train` entry point. Legacy CLI +tools such as `fairseq-train` will remain supported for the foreseeable future +but will be deprecated eventually. + +On startup, Hydra will create a configuration object that contains a hierarchy +of all the necessary dataclasses populated with their default values in the +code. The default values are overwritten by values found in YAML files in +`fairseq/config` directory (which currently sets minimal defaults) and then +further overwritten by values provided through command line arguments. + +Some of the most common use cases are shown below: + +### 1. Override default values through command line: + +```shell script +$ fairseq-hydra-train \ + distributed_training.distributed_world_size=1 \ + dataset.batch_size=2 \ + task.data=data-bin \ + model=transformer_lm/transformer_lm_gpt \ + task=language_modeling \ + optimization.max_update=5000 +``` + +Note that along with explicitly providing values for parameters such as +`dataset.batch_size`, this also tells Hydra to overlay configuration found in +`fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml` over the default +values in the dataclass. If you want to train a model without specifying a +particular architecture you can simply specify `model=transformer_lm`. This only +works for migrated tasks and models. + +### 2. Replace bundled configs with an external config: + +```shell script +$ fairseq-hydra-train \ + --config-dir /path/to/external/configs \ + --config-name wiki103 +``` + +where `/path/to/external/configs/wiki103.yaml` contains: + +```yaml +# @package _group_ + +model: + _name: transformer_lm +distributed_training: + distributed_world_size: 1 +dataset: + batch_size: 2 +task: + _name: language_modeling + data: /path/to/data + add_bos_token: false + max_target_positions: 1024 +optimization: + max_update: 50000 + lr: [ 0.25 ] +criterion: cross_entropy +optimizer: adam +lr_scheduler: + _name: cosine +``` + +Note that here bundled configs from `fairseq/config` directory are not used, +however the defaults from each dataclass will still be used (unless overwritten +by your external config). + +Additionally you can choose to break up your configs by creating a directory +structure in the same location as your main config file, with the names of the +top-level fields (such as "model", "dataset", etc), and placing config files +with meaningful names that would populate that specific section of your +top-level config file (for example, you might have +`model/small_transformer_lm.yaml`, `model/big_transformer_lm.yaml`, etc). You +can then specify the correct configuration via command line, defaults in the +main config, or even launch all of them as a sweep (see Hydra documentation on +how to do this). + +### 3. Add an external config directory to Hydra search path: + +This allows combining default configuration (including using any bundled config +files), while specifying your own config files for some parts of the +configuration. + +```shell script +$ fairseq-hydra-train \ + distributed_training.distributed_world_size=1 \ + dataset.batch_size=2 \ + task.data=/path/to/data/ \ + model=transformer_lm/2_layers \ + task=language_modeling \ + optimization.max_update=5000 \ + --config-dir /path/to/external/configs +``` + +where `/path/to/external/configs` has the following structure: +``` +. ++-- model +| +-- transformer_lm +| | +-- 2_layers.yaml +``` + +and `2_layers.yaml` contains a copy of `transformer_lm_gpt.yaml` but with +`decoder_layers` set to 2. You can add other configs to configure other +components as well. diff --git a/fairseq/docs/index.rst b/fairseq/docs/index.rst new file mode 100644 index 0000000..591db86 --- /dev/null +++ b/fairseq/docs/index.rst @@ -0,0 +1,49 @@ +.. fairseq documentation master file, created by + sphinx-quickstart on Fri Aug 17 21:45:30 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +:github_url: https://github.com/pytorch/fairseq + + +fairseq documentation +===================== + +Fairseq is a sequence modeling toolkit written in `PyTorch +`_ that allows researchers and developers to +train custom models for translation, summarization, language modeling and other +text generation tasks. + +.. toctree:: + :maxdepth: 1 + :caption: Getting Started + + getting_started + command_line_tools + +.. toctree:: + :maxdepth: 1 + :caption: Extending Fairseq + + overview + tutorial_simple_lstm + tutorial_classifying_names + +.. toctree:: + :maxdepth: 2 + :caption: Library Reference + + tasks + models + criterions + optim + lr_scheduler + data + modules + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/fairseq/docs/lr_scheduler.rst b/fairseq/docs/lr_scheduler.rst new file mode 100644 index 0000000..bbc09dc --- /dev/null +++ b/fairseq/docs/lr_scheduler.rst @@ -0,0 +1,34 @@ +.. role:: hidden + :class: hidden-section + +.. _Learning Rate Schedulers: + +Learning Rate Schedulers +======================== + +Learning Rate Schedulers update the learning rate over the course of training. +Learning rates can be updated after each update via :func:`step_update` or at +epoch boundaries via :func:`step`. + +.. automodule:: fairseq.optim.lr_scheduler + :members: + +.. autoclass:: fairseq.optim.lr_scheduler.FairseqLRScheduler + :members: + :undoc-members: + +.. autoclass:: fairseq.optim.lr_scheduler.cosine_lr_scheduler.CosineSchedule + :members: + :undoc-members: +.. autoclass:: fairseq.optim.lr_scheduler.fixed_schedule.FixedSchedule + :members: + :undoc-members: +.. autoclass:: fairseq.optim.lr_scheduler.inverse_square_root_schedule.InverseSquareRootSchedule + :members: + :undoc-members: +.. autoclass:: fairseq.optim.lr_scheduler.reduce_lr_on_plateau.ReduceLROnPlateau + :members: + :undoc-members: +.. autoclass:: fairseq.optim.lr_scheduler.triangular_lr_scheduler.TriangularSchedule + :members: + :undoc-members: diff --git a/fairseq/docs/make.bat b/fairseq/docs/make.bat new file mode 100644 index 0000000..baa9d02 --- /dev/null +++ b/fairseq/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=python -msphinx +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=fairseq + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The Sphinx module was not found. Make sure you have Sphinx installed, + echo.then set the SPHINXBUILD environment variable to point to the full + echo.path of the 'sphinx-build' executable. Alternatively you may add the + echo.Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/fairseq/docs/models.rst b/fairseq/docs/models.rst new file mode 100644 index 0000000..054622d --- /dev/null +++ b/fairseq/docs/models.rst @@ -0,0 +1,104 @@ +.. role:: hidden + :class: hidden-section + +.. module:: fairseq.models + +.. _Models: + +Models +====== + +A Model defines the neural network's ``forward()`` method and encapsulates all +of the learnable parameters in the network. Each model also provides a set of +named *architectures* that define the precise network configuration (e.g., +embedding dimension, number of layers, etc.). + +Both the model type and architecture are selected via the ``--arch`` +command-line argument. Once selected, a model may expose additional command-line +arguments for further configuration. + +.. note:: + + All fairseq Models extend :class:`BaseFairseqModel`, which in turn extends + :class:`torch.nn.Module`. Thus any fairseq Model can be used as a + stand-alone Module in other PyTorch code. + + +Convolutional Neural Networks (CNN) +----------------------------------- + +.. module:: fairseq.models.fconv +.. autoclass:: fairseq.models.fconv.FConvModel + :members: +.. autoclass:: fairseq.models.fconv.FConvEncoder + :members: + :undoc-members: +.. autoclass:: fairseq.models.fconv.FConvDecoder + :members: + + +Long Short-Term Memory (LSTM) networks +-------------------------------------- + +.. module:: fairseq.models.lstm +.. autoclass:: fairseq.models.lstm.LSTMModel + :members: +.. autoclass:: fairseq.models.lstm.LSTMEncoder + :members: +.. autoclass:: fairseq.models.lstm.LSTMDecoder + :members: + + +Transformer (self-attention) networks +------------------------------------- + +.. module:: fairseq.models.transformer +.. autoclass:: fairseq.models.transformer.TransformerModel + :members: +.. autoclass:: fairseq.models.transformer.TransformerEncoder + :members: +.. autoclass:: fairseq.models.transformer.TransformerEncoderLayer + :members: +.. autoclass:: fairseq.models.transformer.TransformerDecoder + :members: +.. autoclass:: fairseq.models.transformer.TransformerDecoderLayer + :members: + + +Adding new models +----------------- + +.. currentmodule:: fairseq.models +.. autofunction:: fairseq.models.register_model +.. autofunction:: fairseq.models.register_model_architecture +.. autoclass:: fairseq.models.BaseFairseqModel + :members: + :undoc-members: +.. autoclass:: fairseq.models.FairseqEncoderDecoderModel + :members: + :undoc-members: +.. autoclass:: fairseq.models.FairseqEncoderModel + :members: + :undoc-members: +.. autoclass:: fairseq.models.FairseqLanguageModel + :members: + :undoc-members: +.. autoclass:: fairseq.models.FairseqMultiModel + :members: + :undoc-members: +.. autoclass:: fairseq.models.FairseqEncoder + :members: +.. autoclass:: fairseq.models.CompositeEncoder + :members: +.. autoclass:: fairseq.models.FairseqDecoder + :members: + + +.. _Incremental decoding: + +Incremental decoding +-------------------- + +.. autoclass:: fairseq.models.FairseqIncrementalDecoder + :members: + :undoc-members: diff --git a/fairseq/docs/modules.rst b/fairseq/docs/modules.rst new file mode 100644 index 0000000..9631c93 --- /dev/null +++ b/fairseq/docs/modules.rst @@ -0,0 +1,9 @@ +Modules +======= + +Fairseq provides several stand-alone :class:`torch.nn.Module` classes that may +be helpful when implementing a new :class:`~fairseq.models.BaseFairseqModel`. + +.. automodule:: fairseq.modules + :members: + :undoc-members: diff --git a/fairseq/docs/optim.rst b/fairseq/docs/optim.rst new file mode 100644 index 0000000..c332645 --- /dev/null +++ b/fairseq/docs/optim.rst @@ -0,0 +1,38 @@ +.. role:: hidden + :class: hidden-section + +.. _optimizers: + +Optimizers +========== + +Optimizers update the Model parameters based on the gradients. + +.. automodule:: fairseq.optim + :members: + +.. autoclass:: fairseq.optim.FairseqOptimizer + :members: + :undoc-members: + +.. autoclass:: fairseq.optim.adadelta.Adadelta + :members: + :undoc-members: +.. autoclass:: fairseq.optim.adagrad.Adagrad + :members: + :undoc-members: +.. autoclass:: fairseq.optim.adafactor.FairseqAdafactor + :members: + :undoc-members: +.. autoclass:: fairseq.optim.adam.FairseqAdam + :members: + :undoc-members: +.. autoclass:: fairseq.optim.fp16_optimizer.FP16Optimizer + :members: + :undoc-members: +.. autoclass:: fairseq.optim.nag.FairseqNAG + :members: + :undoc-members: +.. autoclass:: fairseq.optim.sgd.SGD + :members: + :undoc-members: diff --git a/fairseq/docs/overview.rst b/fairseq/docs/overview.rst new file mode 100644 index 0000000..026b3b5 --- /dev/null +++ b/fairseq/docs/overview.rst @@ -0,0 +1,74 @@ +Overview +======== + +Fairseq can be extended through user-supplied `plug-ins +`_. We support five kinds of +plug-ins: + +- :ref:`Models` define the neural network architecture and encapsulate all of the + learnable parameters. +- :ref:`Criterions` compute the loss function given the model outputs and targets. +- :ref:`Tasks` store dictionaries and provide helpers for loading/iterating over + Datasets, initializing the Model/Criterion and calculating the loss. +- :ref:`Optimizers` update the Model parameters based on the gradients. +- :ref:`Learning Rate Schedulers` update the learning rate over the course of + training. + +**Training Flow** + +Given a ``model``, ``criterion``, ``task``, ``optimizer`` and ``lr_scheduler``, +fairseq implements the following high-level training flow:: + + for epoch in range(num_epochs): + itr = task.get_batch_iterator(task.dataset('train')) + for num_updates, batch in enumerate(itr): + task.train_step(batch, model, criterion, optimizer) + average_and_clip_gradients() + optimizer.step() + lr_scheduler.step_update(num_updates) + lr_scheduler.step(epoch) + +where the default implementation for ``task.train_step`` is roughly:: + + def train_step(self, batch, model, criterion, optimizer, **unused): + loss = criterion(model, batch) + optimizer.backward(loss) + return loss + +**Registering new plug-ins** + +New plug-ins are *registered* through a set of ``@register`` function +decorators, for example:: + + @register_model('my_lstm') + class MyLSTM(FairseqEncoderDecoderModel): + (...) + +Once registered, new plug-ins can be used with the existing :ref:`Command-line +Tools`. See the Tutorial sections for more detailed walkthroughs of how to add +new plug-ins. + +**Loading plug-ins from another directory** + +New plug-ins can be defined in a custom module stored in the user system. In +order to import the module, and make the plugin available to *fairseq*, the +command line supports the ``--user-dir`` flag that can be used to specify a +custom location for additional modules to load into *fairseq*. + +For example, assuming this directory tree:: + + /home/user/my-module/ + └── __init__.py + +with ``__init__.py``:: + + from fairseq.models import register_model_architecture + from fairseq.models.transformer import transformer_vaswani_wmt_en_de_big + + @register_model_architecture('transformer', 'my_transformer') + def transformer_mmt_big(args): + transformer_vaswani_wmt_en_de_big(args) + +it is possible to invoke the :ref:`fairseq-train` script with the new architecture with:: + + fairseq-train ... --user-dir /home/user/my-module -a my_transformer --task translation diff --git a/fairseq/docs/tasks.rst b/fairseq/docs/tasks.rst new file mode 100644 index 0000000..5f65c3c --- /dev/null +++ b/fairseq/docs/tasks.rst @@ -0,0 +1,61 @@ +.. role:: hidden + :class: hidden-section + +.. module:: fairseq.tasks + +.. _Tasks: + +Tasks +===== + +Tasks store dictionaries and provide helpers for loading/iterating over +Datasets, initializing the Model/Criterion and calculating the loss. + +Tasks can be selected via the ``--task`` command-line argument. Once selected, a +task may expose additional command-line arguments for further configuration. + +Example usage:: + + # setup the task (e.g., load dictionaries) + task = fairseq.tasks.setup_task(args) + + # build model and criterion + model = task.build_model(args) + criterion = task.build_criterion(args) + + # load datasets + task.load_dataset('train') + task.load_dataset('valid') + + # iterate over mini-batches of data + batch_itr = task.get_batch_iterator( + task.dataset('train'), max_tokens=4096, + ) + for batch in batch_itr: + # compute the loss + loss, sample_size, logging_output = task.get_loss( + model, criterion, batch, + ) + loss.backward() + + +Translation +----------- + +.. autoclass:: fairseq.tasks.translation.TranslationTask + +.. _language modeling: + +Language Modeling +----------------- + +.. autoclass:: fairseq.tasks.language_modeling.LanguageModelingTask + + +Adding new tasks +---------------- + +.. autofunction:: fairseq.tasks.register_task +.. autoclass:: fairseq.tasks.FairseqTask + :members: + :undoc-members: diff --git a/fairseq/docs/tutorial_classifying_names.rst b/fairseq/docs/tutorial_classifying_names.rst new file mode 100644 index 0000000..de099f0 --- /dev/null +++ b/fairseq/docs/tutorial_classifying_names.rst @@ -0,0 +1,415 @@ +Tutorial: Classifying Names with a Character-Level RNN +====================================================== + +In this tutorial we will extend fairseq to support *classification* tasks. In +particular we will re-implement the PyTorch tutorial for `Classifying Names with +a Character-Level RNN `_ +in fairseq. It is recommended to quickly skim that tutorial before beginning +this one. + +This tutorial covers: + +1. **Preprocessing the data** to create dictionaries. +2. **Registering a new Model** that encodes an input sentence with a simple RNN + and predicts the output label. +3. **Registering a new Task** that loads our dictionaries and dataset. +4. **Training the Model** using the existing command-line tools. +5. **Writing an evaluation script** that imports fairseq and allows us to + interactively evaluate our model on new inputs. + + +1. Preprocessing the data +------------------------- + +The original tutorial provides raw data, but we'll work with a modified version +of the data that is already tokenized into characters and split into separate +train, valid and test sets. + +Download and extract the data from here: +`tutorial_names.tar.gz `_ + +Once extracted, let's preprocess the data using the :ref:`fairseq-preprocess` +command-line tool to create the dictionaries. While this tool is primarily +intended for sequence-to-sequence problems, we're able to reuse it here by +treating the label as a "target" sequence of length 1. We'll also output the +preprocessed files in "raw" format using the ``--dataset-impl`` option to +enhance readability: + +.. code-block:: console + + > fairseq-preprocess \ + --trainpref names/train --validpref names/valid --testpref names/test \ + --source-lang input --target-lang label \ + --destdir names-bin --dataset-impl raw + +After running the above command you should see a new directory, +:file:`names-bin/`, containing the dictionaries for *inputs* and *labels*. + + +2. Registering a new Model +-------------------------- + +Next we'll register a new model in fairseq that will encode an input sentence +with a simple RNN and predict the output label. Compared to the original PyTorch +tutorial, our version will also work with batches of data and GPU Tensors. + +First let's copy the simple RNN module implemented in the `PyTorch tutorial +`_. +Create a new file named :file:`fairseq/models/rnn_classifier.py` with the +following contents:: + + import torch + import torch.nn as nn + + class RNN(nn.Module): + + def __init__(self, input_size, hidden_size, output_size): + super(RNN, self).__init__() + + self.hidden_size = hidden_size + + self.i2h = nn.Linear(input_size + hidden_size, hidden_size) + self.i2o = nn.Linear(input_size + hidden_size, output_size) + self.softmax = nn.LogSoftmax(dim=1) + + def forward(self, input, hidden): + combined = torch.cat((input, hidden), 1) + hidden = self.i2h(combined) + output = self.i2o(combined) + output = self.softmax(output) + return output, hidden + + def initHidden(self): + return torch.zeros(1, self.hidden_size) + +We must also *register* this model with fairseq using the +:func:`~fairseq.models.register_model` function decorator. Once the model is +registered we'll be able to use it with the existing :ref:`Command-line Tools`. + +All registered models must implement the :class:`~fairseq.models.BaseFairseqModel` +interface, so we'll create a small wrapper class in the same file and register +it in fairseq with the name ``'rnn_classifier'``:: + + from fairseq.models import BaseFairseqModel, register_model + + # Note: the register_model "decorator" should immediately precede the + # definition of the Model class. + + @register_model('rnn_classifier') + class FairseqRNNClassifier(BaseFairseqModel): + + @staticmethod + def add_args(parser): + # Models can override this method to add new command-line arguments. + # Here we'll add a new command-line argument to configure the + # dimensionality of the hidden state. + parser.add_argument( + '--hidden-dim', type=int, metavar='N', + help='dimensionality of the hidden state', + ) + + @classmethod + def build_model(cls, args, task): + # Fairseq initializes models by calling the ``build_model()`` + # function. This provides more flexibility, since the returned model + # instance can be of a different type than the one that was called. + # In this case we'll just return a FairseqRNNClassifier instance. + + # Initialize our RNN module + rnn = RNN( + # We'll define the Task in the next section, but for now just + # notice that the task holds the dictionaries for the "source" + # (i.e., the input sentence) and "target" (i.e., the label). + input_size=len(task.source_dictionary), + hidden_size=args.hidden_dim, + output_size=len(task.target_dictionary), + ) + + # Return the wrapped version of the module + return FairseqRNNClassifier( + rnn=rnn, + input_vocab=task.source_dictionary, + ) + + def __init__(self, rnn, input_vocab): + super(FairseqRNNClassifier, self).__init__() + + self.rnn = rnn + self.input_vocab = input_vocab + + # The RNN module in the tutorial expects one-hot inputs, so we can + # precompute the identity matrix to help convert from indices to + # one-hot vectors. We register it as a buffer so that it is moved to + # the GPU when ``cuda()`` is called. + self.register_buffer('one_hot_inputs', torch.eye(len(input_vocab))) + + def forward(self, src_tokens, src_lengths): + # The inputs to the ``forward()`` function are determined by the + # Task, and in particular the ``'net_input'`` key in each + # mini-batch. We'll define the Task in the next section, but for + # now just know that *src_tokens* has shape `(batch, src_len)` and + # *src_lengths* has shape `(batch)`. + bsz, max_src_len = src_tokens.size() + + # Initialize the RNN hidden state. Compared to the original PyTorch + # tutorial we'll also handle batched inputs and work on the GPU. + hidden = self.rnn.initHidden() + hidden = hidden.repeat(bsz, 1) # expand for batched inputs + hidden = hidden.to(src_tokens.device) # move to GPU + + for i in range(max_src_len): + # WARNING: The inputs have padding, so we should mask those + # elements here so that padding doesn't affect the results. + # This is left as an exercise for the reader. The padding symbol + # is given by ``self.input_vocab.pad()`` and the unpadded length + # of each input is given by *src_lengths*. + + # One-hot encode a batch of input characters. + input = self.one_hot_inputs[src_tokens[:, i].long()] + + # Feed the input to our RNN. + output, hidden = self.rnn(input, hidden) + + # Return the final output state for making a prediction + return output + +Finally let's define a *named architecture* with the configuration for our +model. This is done with the :func:`~fairseq.models.register_model_architecture` +function decorator. Thereafter this named architecture can be used with the +``--arch`` command-line argument, e.g., ``--arch pytorch_tutorial_rnn``:: + + from fairseq.models import register_model_architecture + + # The first argument to ``register_model_architecture()`` should be the name + # of the model we registered above (i.e., 'rnn_classifier'). The function we + # register here should take a single argument *args* and modify it in-place + # to match the desired architecture. + + @register_model_architecture('rnn_classifier', 'pytorch_tutorial_rnn') + def pytorch_tutorial_rnn(args): + # We use ``getattr()`` to prioritize arguments that are explicitly given + # on the command-line, so that the defaults defined below are only used + # when no other value has been specified. + args.hidden_dim = getattr(args, 'hidden_dim', 128) + + +3. Registering a new Task +------------------------- + +Now we'll register a new :class:`~fairseq.tasks.FairseqTask` that will load our +dictionaries and dataset. Tasks can also control how the data is batched into +mini-batches, but in this tutorial we'll reuse the batching provided by +:class:`fairseq.data.LanguagePairDataset`. + +Create a new file named :file:`fairseq/tasks/simple_classification.py` with the +following contents:: + + import os + import torch + + from fairseq.data import Dictionary, LanguagePairDataset + from fairseq.tasks import LegacyFairseqTask, register_task + + + @register_task('simple_classification') + class SimpleClassificationTask(LegacyFairseqTask): + + @staticmethod + def add_args(parser): + # Add some command-line arguments for specifying where the data is + # located and the maximum supported input length. + parser.add_argument('data', metavar='FILE', + help='file prefix for data') + parser.add_argument('--max-positions', default=1024, type=int, + help='max input length') + + @classmethod + def setup_task(cls, args, **kwargs): + # Here we can perform any setup required for the task. This may include + # loading Dictionaries, initializing shared Embedding layers, etc. + # In this case we'll just load the Dictionaries. + input_vocab = Dictionary.load(os.path.join(args.data, 'dict.input.txt')) + label_vocab = Dictionary.load(os.path.join(args.data, 'dict.label.txt')) + print('| [input] dictionary: {} types'.format(len(input_vocab))) + print('| [label] dictionary: {} types'.format(len(label_vocab))) + + return SimpleClassificationTask(args, input_vocab, label_vocab) + + def __init__(self, args, input_vocab, label_vocab): + super().__init__(args) + self.input_vocab = input_vocab + self.label_vocab = label_vocab + + def load_dataset(self, split, **kwargs): + """Load a given dataset split (e.g., train, valid, test).""" + + prefix = os.path.join(self.args.data, '{}.input-label'.format(split)) + + # Read input sentences. + sentences, lengths = [], [] + with open(prefix + '.input', encoding='utf-8') as file: + for line in file: + sentence = line.strip() + + # Tokenize the sentence, splitting on spaces + tokens = self.input_vocab.encode_line( + sentence, add_if_not_exist=False, + ) + + sentences.append(tokens) + lengths.append(tokens.numel()) + + # Read labels. + labels = [] + with open(prefix + '.label', encoding='utf-8') as file: + for line in file: + label = line.strip() + labels.append( + # Convert label to a numeric ID. + torch.LongTensor([self.label_vocab.add_symbol(label)]) + ) + + assert len(sentences) == len(labels) + print('| {} {} {} examples'.format(self.args.data, split, len(sentences))) + + # We reuse LanguagePairDataset since classification can be modeled as a + # sequence-to-sequence task where the target sequence has length 1. + self.datasets[split] = LanguagePairDataset( + src=sentences, + src_sizes=lengths, + src_dict=self.input_vocab, + tgt=labels, + tgt_sizes=torch.ones(len(labels)), # targets have length 1 + tgt_dict=self.label_vocab, + left_pad_source=False, + # Since our target is a single class label, there's no need for + # teacher forcing. If we set this to ``True`` then our Model's + # ``forward()`` method would receive an additional argument called + # *prev_output_tokens* that would contain a shifted version of the + # target sequence. + input_feeding=False, + ) + + def max_positions(self): + """Return the max input length allowed by the task.""" + # The source should be less than *args.max_positions* and the "target" + # has max length 1. + return (self.args.max_positions, 1) + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary`.""" + return self.input_vocab + + @property + def target_dictionary(self): + """Return the target :class:`~fairseq.data.Dictionary`.""" + return self.label_vocab + + # We could override this method if we wanted more control over how batches + # are constructed, but it's not necessary for this tutorial since we can + # reuse the batching provided by LanguagePairDataset. + # + # def get_batch_iterator( + # self, dataset, max_tokens=None, max_sentences=None, max_positions=None, + # ignore_invalid_inputs=False, required_batch_size_multiple=1, + # seed=1, num_shards=1, shard_id=0, num_workers=0, epoch=1, + # data_buffer_size=0, disable_iterator_cache=False, + # ): + # (...) + + +4. Training the Model +--------------------- + +Now we're ready to train the model. We can use the existing :ref:`fairseq-train` +command-line tool for this, making sure to specify our new Task (``--task +simple_classification``) and Model architecture (``--arch +pytorch_tutorial_rnn``): + +.. note:: + + You can also configure the dimensionality of the hidden state by passing the + ``--hidden-dim`` argument to :ref:`fairseq-train`. + +.. code-block:: console + + > fairseq-train names-bin \ + --task simple_classification \ + --arch pytorch_tutorial_rnn \ + --optimizer adam --lr 0.001 --lr-shrink 0.5 \ + --max-tokens 1000 + (...) + | epoch 027 | loss 1.200 | ppl 2.30 | wps 15728 | ups 119.4 | wpb 116 | bsz 116 | num_updates 3726 | lr 1.5625e-05 | gnorm 1.290 | clip 0% | oom 0 | wall 32 | train_wall 21 + | epoch 027 | valid on 'valid' subset | valid_loss 1.41304 | valid_ppl 2.66 | num_updates 3726 | best 1.41208 + | done training in 31.6 seconds + +The model files should appear in the :file:`checkpoints/` directory. + + +5. Writing an evaluation script +------------------------------- + +Finally we can write a short script to evaluate our model on new inputs. Create +a new file named :file:`eval_classifier.py` with the following contents:: + + from fairseq import checkpoint_utils, data, options, tasks + + # Parse command-line arguments for generation + parser = options.get_generation_parser(default_task='simple_classification') + args = options.parse_args_and_arch(parser) + + # Setup task + task = tasks.setup_task(args) + + # Load model + print('| loading model from {}'.format(args.path)) + models, _model_args = checkpoint_utils.load_model_ensemble([args.path], task=task) + model = models[0] + + while True: + sentence = input('\nInput: ') + + # Tokenize into characters + chars = ' '.join(list(sentence.strip())) + tokens = task.source_dictionary.encode_line( + chars, add_if_not_exist=False, + ) + + # Build mini-batch to feed to the model + batch = data.language_pair_dataset.collate( + samples=[{'id': -1, 'source': tokens}], # bsz = 1 + pad_idx=task.source_dictionary.pad(), + eos_idx=task.source_dictionary.eos(), + left_pad_source=False, + input_feeding=False, + ) + + # Feed batch to the model and get predictions + preds = model(**batch['net_input']) + + # Print top 3 predictions and their log-probabilities + top_scores, top_labels = preds[0].topk(k=3) + for score, label_idx in zip(top_scores, top_labels): + label_name = task.target_dictionary.string([label_idx]) + print('({:.2f})\t{}'.format(score, label_name)) + +Now we can evaluate our model interactively. Note that we have included the +original data path (:file:`names-bin/`) so that the dictionaries can be loaded: + +.. code-block:: console + + > python eval_classifier.py names-bin --path checkpoints/checkpoint_best.pt + | [input] dictionary: 64 types + | [label] dictionary: 24 types + | loading model from checkpoints/checkpoint_best.pt + + Input: Satoshi + (-0.61) Japanese + (-1.20) Arabic + (-2.86) Italian + + Input: Sinbad + (-0.30) Arabic + (-1.76) English + (-4.08) Russian diff --git a/fairseq/docs/tutorial_simple_lstm.rst b/fairseq/docs/tutorial_simple_lstm.rst new file mode 100644 index 0000000..f529885 --- /dev/null +++ b/fairseq/docs/tutorial_simple_lstm.rst @@ -0,0 +1,518 @@ +Tutorial: Simple LSTM +===================== + +In this tutorial we will extend fairseq by adding a new +:class:`~fairseq.models.FairseqEncoderDecoderModel` that encodes a source +sentence with an LSTM and then passes the final hidden state to a second LSTM +that decodes the target sentence (without attention). + +This tutorial covers: + +1. **Writing an Encoder and Decoder** to encode/decode the source/target + sentence, respectively. +2. **Registering a new Model** so that it can be used with the existing + :ref:`Command-line tools`. +3. **Training the Model** using the existing command-line tools. +4. **Making generation faster** by modifying the Decoder to use + :ref:`Incremental decoding`. + + +1. Building an Encoder and Decoder +---------------------------------- + +In this section we'll define a simple LSTM Encoder and Decoder. All Encoders +should implement the :class:`~fairseq.models.FairseqEncoder` interface and +Decoders should implement the :class:`~fairseq.models.FairseqDecoder` interface. +These interfaces themselves extend :class:`torch.nn.Module`, so FairseqEncoders +and FairseqDecoders can be written and used in the same ways as ordinary PyTorch +Modules. + + +Encoder +~~~~~~~ + +Our Encoder will embed the tokens in the source sentence, feed them to a +:class:`torch.nn.LSTM` and return the final hidden state. To create our encoder +save the following in a new file named :file:`fairseq/models/simple_lstm.py`:: + + import torch.nn as nn + from fairseq import utils + from fairseq.models import FairseqEncoder + + class SimpleLSTMEncoder(FairseqEncoder): + + def __init__( + self, args, dictionary, embed_dim=128, hidden_dim=128, dropout=0.1, + ): + super().__init__(dictionary) + self.args = args + + # Our encoder will embed the inputs before feeding them to the LSTM. + self.embed_tokens = nn.Embedding( + num_embeddings=len(dictionary), + embedding_dim=embed_dim, + padding_idx=dictionary.pad(), + ) + self.dropout = nn.Dropout(p=dropout) + + # We'll use a single-layer, unidirectional LSTM for simplicity. + self.lstm = nn.LSTM( + input_size=embed_dim, + hidden_size=hidden_dim, + num_layers=1, + bidirectional=False, + batch_first=True, + ) + + def forward(self, src_tokens, src_lengths): + # The inputs to the ``forward()`` function are determined by the + # Task, and in particular the ``'net_input'`` key in each + # mini-batch. We discuss Tasks in the next tutorial, but for now just + # know that *src_tokens* has shape `(batch, src_len)` and *src_lengths* + # has shape `(batch)`. + + # Note that the source is typically padded on the left. This can be + # configured by adding the `--left-pad-source "False"` command-line + # argument, but here we'll make the Encoder handle either kind of + # padding by converting everything to be right-padded. + if self.args.left_pad_source: + # Convert left-padding to right-padding. + src_tokens = utils.convert_padding_direction( + src_tokens, + padding_idx=self.dictionary.pad(), + left_to_right=True + ) + + # Embed the source. + x = self.embed_tokens(src_tokens) + + # Apply dropout. + x = self.dropout(x) + + # Pack the sequence into a PackedSequence object to feed to the LSTM. + x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True) + + # Get the output from the LSTM. + _outputs, (final_hidden, _final_cell) = self.lstm(x) + + # Return the Encoder's output. This can be any object and will be + # passed directly to the Decoder. + return { + # this will have shape `(bsz, hidden_dim)` + 'final_hidden': final_hidden.squeeze(0), + } + + # Encoders are required to implement this method so that we can rearrange + # the order of the batch elements during inference (e.g., beam search). + def reorder_encoder_out(self, encoder_out, new_order): + """ + Reorder encoder output according to `new_order`. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + `encoder_out` rearranged according to `new_order` + """ + final_hidden = encoder_out['final_hidden'] + return { + 'final_hidden': final_hidden.index_select(0, new_order), + } + + +Decoder +~~~~~~~ + +Our Decoder will predict the next word, conditioned on the Encoder's final +hidden state and an embedded representation of the previous target word -- which +is sometimes called *teacher forcing*. More specifically, we'll use a +:class:`torch.nn.LSTM` to produce a sequence of hidden states that we'll project +to the size of the output vocabulary to predict each target word. + +:: + + import torch + from fairseq.models import FairseqDecoder + + class SimpleLSTMDecoder(FairseqDecoder): + + def __init__( + self, dictionary, encoder_hidden_dim=128, embed_dim=128, hidden_dim=128, + dropout=0.1, + ): + super().__init__(dictionary) + + # Our decoder will embed the inputs before feeding them to the LSTM. + self.embed_tokens = nn.Embedding( + num_embeddings=len(dictionary), + embedding_dim=embed_dim, + padding_idx=dictionary.pad(), + ) + self.dropout = nn.Dropout(p=dropout) + + # We'll use a single-layer, unidirectional LSTM for simplicity. + self.lstm = nn.LSTM( + # For the first layer we'll concatenate the Encoder's final hidden + # state with the embedded target tokens. + input_size=encoder_hidden_dim + embed_dim, + hidden_size=hidden_dim, + num_layers=1, + bidirectional=False, + ) + + # Define the output projection. + self.output_projection = nn.Linear(hidden_dim, len(dictionary)) + + # During training Decoders are expected to take the entire target sequence + # (shifted right by one position) and produce logits over the vocabulary. + # The *prev_output_tokens* tensor begins with the end-of-sentence symbol, + # ``dictionary.eos()``, followed by the target sequence. + def forward(self, prev_output_tokens, encoder_out): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + + Returns: + tuple: + - the last decoder layer's output of shape + `(batch, tgt_len, vocab)` + - the last decoder layer's attention weights of shape + `(batch, tgt_len, src_len)` + """ + bsz, tgt_len = prev_output_tokens.size() + + # Extract the final hidden state from the Encoder. + final_encoder_hidden = encoder_out['final_hidden'] + + # Embed the target sequence, which has been shifted right by one + # position and now starts with the end-of-sentence symbol. + x = self.embed_tokens(prev_output_tokens) + + # Apply dropout. + x = self.dropout(x) + + # Concatenate the Encoder's final hidden state to *every* embedded + # target token. + x = torch.cat( + [x, final_encoder_hidden.unsqueeze(1).expand(bsz, tgt_len, -1)], + dim=2, + ) + + # Using PackedSequence objects in the Decoder is harder than in the + # Encoder, since the targets are not sorted in descending length order, + # which is a requirement of ``pack_padded_sequence()``. Instead we'll + # feed nn.LSTM directly. + initial_state = ( + final_encoder_hidden.unsqueeze(0), # hidden + torch.zeros_like(final_encoder_hidden).unsqueeze(0), # cell + ) + output, _ = self.lstm( + x.transpose(0, 1), # convert to shape `(tgt_len, bsz, dim)` + initial_state, + ) + x = output.transpose(0, 1) # convert to shape `(bsz, tgt_len, hidden)` + + # Project the outputs to the size of the vocabulary. + x = self.output_projection(x) + + # Return the logits and ``None`` for the attention weights + return x, None + + +2. Registering the Model +------------------------ + +Now that we've defined our Encoder and Decoder we must *register* our model with +fairseq using the :func:`~fairseq.models.register_model` function decorator. +Once the model is registered we'll be able to use it with the existing +:ref:`Command-line Tools`. + +All registered models must implement the +:class:`~fairseq.models.BaseFairseqModel` interface. For sequence-to-sequence +models (i.e., any model with a single Encoder and Decoder), we can instead +implement the :class:`~fairseq.models.FairseqEncoderDecoderModel` interface. + +Create a small wrapper class in the same file and register it in fairseq with +the name ``'simple_lstm'``:: + + from fairseq.models import FairseqEncoderDecoderModel, register_model + + # Note: the register_model "decorator" should immediately precede the + # definition of the Model class. + + @register_model('simple_lstm') + class SimpleLSTMModel(FairseqEncoderDecoderModel): + + @staticmethod + def add_args(parser): + # Models can override this method to add new command-line arguments. + # Here we'll add some new command-line arguments to configure dropout + # and the dimensionality of the embeddings and hidden states. + parser.add_argument( + '--encoder-embed-dim', type=int, metavar='N', + help='dimensionality of the encoder embeddings', + ) + parser.add_argument( + '--encoder-hidden-dim', type=int, metavar='N', + help='dimensionality of the encoder hidden state', + ) + parser.add_argument( + '--encoder-dropout', type=float, default=0.1, + help='encoder dropout probability', + ) + parser.add_argument( + '--decoder-embed-dim', type=int, metavar='N', + help='dimensionality of the decoder embeddings', + ) + parser.add_argument( + '--decoder-hidden-dim', type=int, metavar='N', + help='dimensionality of the decoder hidden state', + ) + parser.add_argument( + '--decoder-dropout', type=float, default=0.1, + help='decoder dropout probability', + ) + + @classmethod + def build_model(cls, args, task): + # Fairseq initializes models by calling the ``build_model()`` + # function. This provides more flexibility, since the returned model + # instance can be of a different type than the one that was called. + # In this case we'll just return a SimpleLSTMModel instance. + + # Initialize our Encoder and Decoder. + encoder = SimpleLSTMEncoder( + args=args, + dictionary=task.source_dictionary, + embed_dim=args.encoder_embed_dim, + hidden_dim=args.encoder_hidden_dim, + dropout=args.encoder_dropout, + ) + decoder = SimpleLSTMDecoder( + dictionary=task.target_dictionary, + encoder_hidden_dim=args.encoder_hidden_dim, + embed_dim=args.decoder_embed_dim, + hidden_dim=args.decoder_hidden_dim, + dropout=args.decoder_dropout, + ) + model = SimpleLSTMModel(encoder, decoder) + + # Print the model architecture. + print(model) + + return model + + # We could override the ``forward()`` if we wanted more control over how + # the encoder and decoder interact, but it's not necessary for this + # tutorial since we can inherit the default implementation provided by + # the FairseqEncoderDecoderModel base class, which looks like: + # + # def forward(self, src_tokens, src_lengths, prev_output_tokens): + # encoder_out = self.encoder(src_tokens, src_lengths) + # decoder_out = self.decoder(prev_output_tokens, encoder_out) + # return decoder_out + +Finally let's define a *named architecture* with the configuration for our +model. This is done with the :func:`~fairseq.models.register_model_architecture` +function decorator. Thereafter this named architecture can be used with the +``--arch`` command-line argument, e.g., ``--arch tutorial_simple_lstm``:: + + from fairseq.models import register_model_architecture + + # The first argument to ``register_model_architecture()`` should be the name + # of the model we registered above (i.e., 'simple_lstm'). The function we + # register here should take a single argument *args* and modify it in-place + # to match the desired architecture. + + @register_model_architecture('simple_lstm', 'tutorial_simple_lstm') + def tutorial_simple_lstm(args): + # We use ``getattr()`` to prioritize arguments that are explicitly given + # on the command-line, so that the defaults defined below are only used + # when no other value has been specified. + args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 256) + args.encoder_hidden_dim = getattr(args, 'encoder_hidden_dim', 256) + args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 256) + args.decoder_hidden_dim = getattr(args, 'decoder_hidden_dim', 256) + + +3. Training the Model +--------------------- + +Now we're ready to train the model. We can use the existing :ref:`fairseq-train` +command-line tool for this, making sure to specify our new Model architecture +(``--arch tutorial_simple_lstm``). + +.. note:: + + Make sure you've already preprocessed the data from the IWSLT example in the + :file:`examples/translation/` directory. + +.. code-block:: console + + > fairseq-train data-bin/iwslt14.tokenized.de-en \ + --arch tutorial_simple_lstm \ + --encoder-dropout 0.2 --decoder-dropout 0.2 \ + --optimizer adam --lr 0.005 --lr-shrink 0.5 \ + --max-tokens 12000 + (...) + | epoch 052 | loss 4.027 | ppl 16.30 | wps 420805 | ups 39.7 | wpb 9841 | bsz 400 | num_updates 20852 | lr 1.95313e-05 | gnorm 0.218 | clip 0% | oom 0 | wall 529 | train_wall 396 + | epoch 052 | valid on 'valid' subset | valid_loss 4.74989 | valid_ppl 26.91 | num_updates 20852 | best 4.74954 + +The model files should appear in the :file:`checkpoints/` directory. While this +model architecture is not very good, we can use the :ref:`fairseq-generate` script to +generate translations and compute our BLEU score over the test set: + +.. code-block:: console + + > fairseq-generate data-bin/iwslt14.tokenized.de-en \ + --path checkpoints/checkpoint_best.pt \ + --beam 5 \ + --remove-bpe + (...) + | Translated 6750 sentences (153132 tokens) in 17.3s (389.12 sentences/s, 8827.68 tokens/s) + | Generate test with beam=5: BLEU4 = 8.18, 38.8/12.1/4.7/2.0 (BP=1.000, ratio=1.066, syslen=139865, reflen=131146) + + +4. Making generation faster +--------------------------- + +While autoregressive generation from sequence-to-sequence models is inherently +slow, our implementation above is especially slow because it recomputes the +entire sequence of Decoder hidden states for every output token (i.e., it is +``O(n^2)``). We can make this significantly faster by instead caching the +previous hidden states. + +In fairseq this is called :ref:`Incremental decoding`. Incremental decoding is a +special mode at inference time where the Model only receives a single timestep +of input corresponding to the immediately previous output token (for teacher +forcing) and must produce the next output incrementally. Thus the model must +cache any long-term state that is needed about the sequence, e.g., hidden +states, convolutional states, etc. + +To implement incremental decoding we will modify our model to implement the +:class:`~fairseq.models.FairseqIncrementalDecoder` interface. Compared to the +standard :class:`~fairseq.models.FairseqDecoder` interface, the incremental +decoder interface allows ``forward()`` methods to take an extra keyword argument +(*incremental_state*) that can be used to cache state across time-steps. + +Let's replace our ``SimpleLSTMDecoder`` with an incremental one:: + + import torch + from fairseq.models import FairseqIncrementalDecoder + + class SimpleLSTMDecoder(FairseqIncrementalDecoder): + + def __init__( + self, dictionary, encoder_hidden_dim=128, embed_dim=128, hidden_dim=128, + dropout=0.1, + ): + # This remains the same as before. + super().__init__(dictionary) + self.embed_tokens = nn.Embedding( + num_embeddings=len(dictionary), + embedding_dim=embed_dim, + padding_idx=dictionary.pad(), + ) + self.dropout = nn.Dropout(p=dropout) + self.lstm = nn.LSTM( + input_size=encoder_hidden_dim + embed_dim, + hidden_size=hidden_dim, + num_layers=1, + bidirectional=False, + ) + self.output_projection = nn.Linear(hidden_dim, len(dictionary)) + + # We now take an additional kwarg (*incremental_state*) for caching the + # previous hidden and cell states. + def forward(self, prev_output_tokens, encoder_out, incremental_state=None): + if incremental_state is not None: + # If the *incremental_state* argument is not ``None`` then we are + # in incremental inference mode. While *prev_output_tokens* will + # still contain the entire decoded prefix, we will only use the + # last step and assume that the rest of the state is cached. + prev_output_tokens = prev_output_tokens[:, -1:] + + # This remains the same as before. + bsz, tgt_len = prev_output_tokens.size() + final_encoder_hidden = encoder_out['final_hidden'] + x = self.embed_tokens(prev_output_tokens) + x = self.dropout(x) + x = torch.cat( + [x, final_encoder_hidden.unsqueeze(1).expand(bsz, tgt_len, -1)], + dim=2, + ) + + # We will now check the cache and load the cached previous hidden and + # cell states, if they exist, otherwise we will initialize them to + # zeros (as before). We will use the ``utils.get_incremental_state()`` + # and ``utils.set_incremental_state()`` helpers. + initial_state = utils.get_incremental_state( + self, incremental_state, 'prev_state', + ) + if initial_state is None: + # first time initialization, same as the original version + initial_state = ( + final_encoder_hidden.unsqueeze(0), # hidden + torch.zeros_like(final_encoder_hidden).unsqueeze(0), # cell + ) + + # Run one step of our LSTM. + output, latest_state = self.lstm(x.transpose(0, 1), initial_state) + + # Update the cache with the latest hidden and cell states. + utils.set_incremental_state( + self, incremental_state, 'prev_state', latest_state, + ) + + # This remains the same as before + x = output.transpose(0, 1) + x = self.output_projection(x) + return x, None + + # The ``FairseqIncrementalDecoder`` interface also requires implementing a + # ``reorder_incremental_state()`` method, which is used during beam search + # to select and reorder the incremental state. + def reorder_incremental_state(self, incremental_state, new_order): + # Load the cached state. + prev_state = utils.get_incremental_state( + self, incremental_state, 'prev_state', + ) + + # Reorder batches according to *new_order*. + reordered_state = ( + prev_state[0].index_select(1, new_order), # hidden + prev_state[1].index_select(1, new_order), # cell + ) + + # Update the cached state. + utils.set_incremental_state( + self, incremental_state, 'prev_state', reordered_state, + ) + +Finally, we can rerun generation and observe the speedup: + +.. code-block:: console + + # Before + + > fairseq-generate data-bin/iwslt14.tokenized.de-en \ + --path checkpoints/checkpoint_best.pt \ + --beam 5 \ + --remove-bpe + (...) + | Translated 6750 sentences (153132 tokens) in 17.3s (389.12 sentences/s, 8827.68 tokens/s) + | Generate test with beam=5: BLEU4 = 8.18, 38.8/12.1/4.7/2.0 (BP=1.000, ratio=1.066, syslen=139865, reflen=131146) + + # After + + > fairseq-generate data-bin/iwslt14.tokenized.de-en \ + --path checkpoints/checkpoint_best.pt \ + --beam 5 \ + --remove-bpe + (...) + | Translated 6750 sentences (153132 tokens) in 5.5s (1225.54 sentences/s, 27802.94 tokens/s) + | Generate test with beam=5: BLEU4 = 8.18, 38.8/12.1/4.7/2.0 (BP=1.000, ratio=1.066, syslen=139865, reflen=131146) diff --git a/fairseq/examples/.gitignore b/fairseq/examples/.gitignore new file mode 100644 index 0000000..1ef816f --- /dev/null +++ b/fairseq/examples/.gitignore @@ -0,0 +1,2 @@ +!*/*.sh +!*/*.md diff --git a/fairseq/examples/MMPT/.gitignore b/fairseq/examples/MMPT/.gitignore new file mode 100644 index 0000000..70a255d --- /dev/null +++ b/fairseq/examples/MMPT/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +runs +data +pretrained_models +projects/mmfusion_* +log_test +third-party +python_log +slurm_snapshot_code +lightning_logs +demos diff --git a/fairseq/examples/MMPT/CONFIG.md b/fairseq/examples/MMPT/CONFIG.md new file mode 100644 index 0000000..bbd1403 --- /dev/null +++ b/fairseq/examples/MMPT/CONFIG.md @@ -0,0 +1,41 @@ +### Config Files Explained + +Taking `projects/mfmmlm.yaml` for example, which run pretraining using masked frame model (MFM) and masked language model (MLM) on a single BERT: + +```yaml +project_dir: mfmmlm # specify the project dir for this baseline. +run_task: + - how2.yaml # run pretraining on how2 when launching `projects/taskmfmmlm.yaml` + - [vtt.yaml, vttcap.yaml, vttqa.yaml, youcook.yaml, youcookcap.yaml, crosstask.yaml, coin.yaml] # run fine-tuning tasks. +base_dir: task # a global template folder to specify each training task. +task_group: + pretrain: # section for pretraining. Most baselines differs in this section. + task_list: + - how2.yaml # reconfig `projects/task/how2.yaml` + dataset: + aligner: MFMMLMAligner # overwrite the aligner for MFMMLM training task. + model: + model_cls: MMFusionMFMMLM # overwrite the model, which constructs negative examples for MFM on-the-fly. + loss: + loss_cls: MFMMLM # overwrite the loss as MFMMLM, which combines MFM and MLM together. + fairseq: # all fairseq args can be expecified under this name. + dataset: + batch_size: 128 + finetune: # section for fine-tuning tasks, we don't need to change anything here mostly since we want to see how pretraining can contribute to finetuning. + task_list: # specify the list of downstream tasks, e.g., copy `projects/task/vtt.yaml` to `projects/mfmmlm`. + - vtt.yaml + - vttqa.yaml + - youcook.yaml + - youcookcap.yaml + - crosstask.yaml + - coin.yaml + test: # section for testing. + task_list: + - test_vtt.yaml + - test_vttqa.yaml + - test_youcook.yaml + - test_youcookcap.yaml + - test_crosstask.yaml + - test_crosstask_zs.yaml + - test_coin.yaml +``` diff --git a/fairseq/examples/MMPT/DATASET.md b/fairseq/examples/MMPT/DATASET.md new file mode 100644 index 0000000..930403e --- /dev/null +++ b/fairseq/examples/MMPT/DATASET.md @@ -0,0 +1,34 @@ +# Dataset + +We understand video data are challenging to download and process. For videos, we provide our preprocessing scripts under `scripts/video_feature_extractor` (deeply adapted from `https://github.com/antoine77340/video_feature_extractor`); for text, we pre-tokenizing scripts under `scripts/text_token_extractor`. + +### S3D Feature Extraction +We use pre-trained [S3D](https://github.com/antoine77340/S3D_HowTo100M) for video feature extraction. Please place the models as `pretrained_models/s3d_dict.npy` and `pretrained_models/s3d_howto100m.pth`. + +We implement a `PathBuilder` to automatically track video ids, source video paths to their feature locations (you may need `conda install -c anaconda pandas`). Decoding may need `pip install ffmpeg-python`. + +### Howto100M +[Howto100M](https://www.di.ens.fr/willow/research/howto100m/) is a large-scale video pre-training datasets. You may download videos by yourself and run preprocessing of our scripts. + +Several key differences of our preprocessing from existing papers: (1) we use `raw_caption.json` instead of `caption.json` to have pure self-supervision on text (`caption.json` has manual removal of stop words); (2) we remove partially duplicated texts that are originally designed for real-time readability (see `mmpt/processors/dedupprocessor.py`); (3) then we shard video/text features using `SharedTensor` in `mmpt/utils/shardedtensor.py` for fast loading during training (faster than `h5py`). + +#### Steps +##### video +To extract video features: edit and run `bash scripts/video_feature_extractor/how2/s3d.sh`. (consider to run this on multiple machines; by default, we store features in fp16 to save space and also for faster training). + +Split available video ids as `data/how2/how2_s3d_train.lst` and `data/how2/how2_s3d_val.lst`. + +Lastly, pack video features into `ShardedTensor` using `python scripts/video_feature_extractor/shard_feature.py`. + +##### text +Clean captions using `python -m mmpt.processors.dedupprocessor`. + +Tokenize dedupped captions `data/how2/raw_caption_dedup.pkl` into sharded numpy arrays: +``` +python scripts/text_token_extractor/pretokenization.py scripts/text_token_extractor/configs/bert-base-uncased.yaml +``` + +### Youcook, MSRVTT etc. +We use the version of Youcook and MSRVTT come with Howto100M and MILNCE. Please download the data to `data/youcook` and `data/msrvtt` accordingly, you can also check `projects/task/youcook.yaml` and `projects/task/vtt.yaml` etc. in details. +We extract features for Youcook, MSRVTT similar to the first step of Howto100M but we read text from meta data directly and perform on-the-fly tokenization. + diff --git a/fairseq/examples/MMPT/README.md b/fairseq/examples/MMPT/README.md new file mode 100644 index 0000000..4a84819 --- /dev/null +++ b/fairseq/examples/MMPT/README.md @@ -0,0 +1,166 @@ +# VideoCLIP and VLM + +You just find this toolkit for multimodal video understanding! It contains implementation of two recent multi-modal video understanding papers [VideoCLIP](https://arxiv.org/pdf/2109.14084.pdf) (EMNLP, 2021) and [VLM](https://aclanthology.org/2021.findings-acl.370.pdf) (ACL Findings, 2021), along with high-performance toolkits that are typically lacking in existing codebase. The toolkit is desigend to contain generic performance-tuned components that can be potentially adapted to other frameworks (we initially use fairseq). + +VideoCLIP is a contrastive learning model for zero-shot transfer to retrieval/classification/sequence labeling style tasks. + + + +VLM is a masked language model style pre-training using only one encoder with masked modality model (MMM) for retrieval/generation/sequence labeling style tasks. + + + +### News +[Oct. 2021] Initial release of implementation for the following papers: +[VideoCLIP: Contrastive Pre-training for Zero-shot Video-Text Understanding](https://arxiv.org/pdf/2109.14084.pdf) (Xu et. al., EMNLP 2021) +[VLM: Task-agnostic Video-Language Model Pre-training for Video Understanding](https://aclanthology.org/2021.findings-acl.370.pdf) (Xu et. al., ACL Findings 2021) + + +### Installation +We aim to minimize the dependency of this repo on other packages. +We use fairseq as the main trainer (no models/datasets dependency on fairseq. We will support other trainer in future): +``` +git clone https://github.com/pytorch/fairseq +cd fairseq +pip install -e . # also optionally follow fairseq README for apex installation for fp16 training. +export MKL_THREADING_LAYER=GNU # fairseq may need this for numpy. +``` + +Then install this toolkit: +``` +cd examples/MMPT # MMPT can be in any folder, not necessarily under fairseq/examples. +pip install -e . +``` + +The code is developed under Python=3.8.8, Pytorch=1.8, cuda=11.0 with fairseq=1.0.0a0+af0389f and tested under Python=3.8.8 pytorch=1.9 cuda=11.0 fairseq=1.0.0a0+8e7bc73 during code release. +Most models require `transformers==3.4` for API compatibility `pip install transformers==3.4`. +In addition, some downstream tasks may need `conda install pandas`. + + +### Usage +#### Download Checkpoints +We use pre-trained [S3D](https://github.com/antoine77340/S3D_HowTo100M) for video feature extraction. Please place the models as `pretrained_models/s3d_dict.npy` and `pretrained_models/s3d_howto100m.pth`. + +Download VideoCLIP checkpoint `https://dl.fbaipublicfiles.com/MMPT/retri/videoclip/checkpoint_best.pt` to `runs/retri/videoclip` or VLM checkpoint `https://dl.fbaipublicfiles.com/MMPT/mtm/vlm/checkpoint_best.pt` to `runs/mtm/vlm`. + +#### Demo of Inference +run `python locallaunch.py projects/retri/videoclip.yaml --dryrun` to get all `.yaml`s for VideoCLIP. + +```python +import torch + +from mmpt.models import MMPTModel + + +model, tokenizer, aligner = MMPTModel.from_pretrained( + "projects/retri/videoclip/how2.yaml") + +model.eval() + + +# B, T, FPS, H, W, C (VideoCLIP is trained on 30 fps of s3d) +video_frames = torch.randn(1, 2, 30, 224, 224, 3) +caps, cmasks = aligner._build_text_seq( + tokenizer("some text", add_special_tokens=False)["input_ids"] +) + +caps, cmasks = caps[None, :], cmasks[None, :] # bsz=1 + +with torch.no_grad(): + output = model(video_frames, caps, cmasks, return_score=True) +print(output["score"]) # dot-product +``` + +#### Data Preparation +See [dataset](DATASET.md) for each dataset. + +#### Global Config for Training Pipeline +We organize a global config file for a training/testing pipeline under projects (see a detailed [explanation](CONFIG.md)). For example, VideoCLIP in `projects/retri/videoclip.yaml` and VLM is in `projects/mtm/vlm.yaml`. + +We wrap all cmds into `locallaunch.py` and `mmpt_cli/localjob.py`. You can check concrete cmds by `--dryrun` and then drop it for actual run. + +First, run `python locallaunch.py projects/retri/videoclip.yaml --dryrun` will generate configs for all configs of pre-training, zero-shot evaluation, fine-tuning and testing, for VideoCLIP under `projects/retri/videoclip`. + +Then each (either training or evaluation) process will be configed by a concrete config file (we save all complex arguments into the concrete config file for reproducibility, including fairseq args). For example, run zero-shot evaluation on youcook, +``` +python locallaunch.py projects/retri/videoclip/test_youcook_zs.yaml --jobtype local_predict # zero-shot evaluation. +python locallaunch.py projects/retri/videoclip/youcook_videoclip.yaml --jobtype local_single --dryrun # fine-tuning: use --dryrun to check cmds and drop it to make an actual run; local_small will run on two gpus (as in paper). +python locallaunch.py projects/retri/videoclip/test_youcook_videoclip.yaml --jobtype local_predict # testing on fine-tuned model. +``` + +Pretraining can be run as: +``` +python locallaunch.py projects/retri/videoclip/how2.yaml --jobtype local_single --dryrun # check then drop dryrun; paper is ran on local_big as 8 gpus. +``` +You may need to change `--jobtype`, check/extend `LocalJob` in `mmpt_cli/localjob.py` for multi-gpu/multi-node pre-training. + +The detailed instructions of pretraining and fine-tuning can be found at [pretraining instruction](pretraining.md) and [finetuning instruction](endtask.md). + + +### Development +Several components of this toolkit can be re-used for future research (and also our ongoing research). + +#### Framework Wrapper +We currently only support fairseq, but most components can be easily fit into other frameworks like huggingface. This repo is a `--user-dir` of fairseq with fairseq wrapper. For example, `mmpt/tasks` includes a `FairseqMMTTask`, which manages `mmpt/datasets` with `FairseqDataset`, `mmpt/models` with `FairseqModel`, `mmpt/losses` with `FairseqCriterion`. + +#### Processors +**Multi**modal research introduces the complexity on modality alignment from different input sources to losses. Inspired by [MMF](https://github.com/facebookresearch/mmf), this toolkit leverages `mmpt/processors` to handle various needs of data preprocessing and loading, **alleviating** the needs of multiple `torch.data.utils.Dataset` (that can be tricky for ablation study). +Processors can also be decoupled from `torch.data.utils.Dataset` for offline preprocessing instead of on-the-fly data preprocessing. + +We decouple a `mmpt.MMDataset` as 3 types of processors: `MetaProcessor`, `VideoProcessor`, `TextProcessor` and `Aligner`. They can be configed in `dataset` field of a config file (e.g., see `projects/task/how2.yaml`). +`MetaProcessor` is used to load the meta data about a dataset, aka, all video_ids of how2 dataset. +`VideoProcessor` is used to load the video features about a dataset. For example, S3D features for each second of a video. +`TextProcessor` is used to load the text (feature). For example, BERT pre-tokenized text clips for how2 dataset (with `start`s, `end`s of timestamps and `cap` for `token_ids`). +`Aligner` is the core class for different baselines that prepares the training data. For example, sampling a clip, masking tokens for MLM, etc. + +#### Performance-tuned Components +To speed up pre-training, this toolkit uses sharded features stored in mmaped numpy, backed by `ShardedTensor` in `mmpt/utils/shardedtensor.py` (adopted from MARGE paper). This reduces the loads of IO for multi-GPU training without loading all features for a video into the memory each time and `ShardedTensor` ensure features are stored in continuous disk space for near random access. This is used for both How2 video features and texts in `mmpt/processors/how2processor.py`. + + +### Citation +If this codebase is useful for your work, please cite the following papers: + +```BibTeX +@inproceedings{xu-etal-2021-videoclip, + title = "{VideoCLIP}: Contrastive Pre-training for\\Zero-shot Video-Text Understanding", + author = "Xu, Hu and + Ghosh, Gargi and + Huang, Po-Yao and + Okhonko, Dmytro and + Aghajanyan, Armen and + Metze, Florian and + Zettlemoyer, Luke and + Feichtenhofer, Christoph", + booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP)", + month = nov, + year = "2021", + address = "Online", + publisher = "Association for Computational Linguistics", +} + +@inproceedings{xu-etal-2021-vlm, + title = "{VLM}: Task-agnostic Video-Language Model Pre-training for Video Understanding", + author = "Xu, Hu and + Ghosh, Gargi and + Huang, Po-Yao and + Arora, Prahal and + Aminzadeh, Masoumeh and + Feichtenhofer, Christoph and + Metze, Florian and + Zettlemoyer, Luke", + booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021", + month = aug, + year = "2021", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2021.findings-acl.370", + doi = "10.18653/v1/2021.findings-acl.370", + pages = "4227--4239", +} +``` + +### Bug Reports +This repo is in its initial stage, welcome bug reports to huxu@fb.com + +### Copyright +The majority of Multimodal Pre-training (MMPT) is licensed under CC-BY-NC, however portions of the project are available under separate license terms: Evaluation Codes/Models: Howto100M and HuggingFace Transformers are licensed under the Apache2.0 license; COIN and NLG-eval are licensed under the MIT license; CrossTask is licensed under the BSD-3; DiDeMo is licensed under the BSD-2 license. diff --git a/fairseq/examples/MMPT/endtask.md b/fairseq/examples/MMPT/endtask.md new file mode 100644 index 0000000..7690955 --- /dev/null +++ b/fairseq/examples/MMPT/endtask.md @@ -0,0 +1,41 @@ +# Zero-shot Transfer and Finetuning + +(If you are new to the ideas of `mmpt.processors`, see [README](README.md) first.) +All finetuning datasets (specifically `processors`) are defined in `mmpt.processors.dsprocessor`. +Given the complexity of different types of finetuning tasks, each task may have their own meta/video/text/aligner processors and `mmpt/evaluators/{Predictor,Metric}`. + +### Tasks + +Currently, we support 5 end datasets: `MSRVTT`, `Youcook`, `COIN`, `Crosstask` and `DiDeMo` with the following tasks: +text-video retrieval: `MSRVTT`, `Youcook`, `DiDeMo`; +video captioning: `Youcook`; +Video Question and Answering: `MSRVTT-QA`. + +To add your own dataset, you can specify the corresponding processors and config them in the `dataset` field of a config file, such as `projects/task/vtt.yaml`. + +### Zero-shot Transfer (no Training) +Zero-shot transfer will run the pre-trained model (e.g., VideoCLIP) directly on testing data. Configs with pattern: `projects/task/*_zs_*.yaml` are dedicated for zero-shot transfer. + +### Fine-tuning + +The training of a downstream task is similar to pretraining, execept you may need to specify the `restore_file` in `fairseq.checkpoint` and reset optimizers, see `projects/task/ft.yaml` that is included by `projects/task/vtt.yaml`. + +We typically do finetuning on 2 gpus (`local_small`). + +### Testing +For each finetuning dataset, you may need to specify a testing config, similar to `projects/task/test_vtt.yaml`. + +We define `mmpt.evaluators.Predictor` for different types of prediction. For example, `MSRVTT` and `Youcook` are video-retrieval tasks and expecting to use `RetrievalPredictor`. You may need to define your new type of predictors and specify that in `predictor` field of a testing config. + +Each task may also have their own metric for evaluation. This can be created in `mmpt.evaluators.Metric` and specified in the `metric` field of a testing config. + +Launching a testing is as simple as training by specifying the path of a testing config: +```python locallaunch.py projects/mfmmlm/test_vtt.yaml``` +Testing will be launched locally by default since prediction is computationally less expensive. + +### Third-party Libraries +We list the following finetuning tasks that require third-party libraries. + +Youcook captioning: `https://github.com/Maluuba/nlg-eval` + +CrossTask: `https://github.com/DmZhukov/CrossTask`'s `dp` under `third-party/CrossTask` (`python setup.py build_ext --inplace`) diff --git a/fairseq/examples/MMPT/locallaunch.py b/fairseq/examples/MMPT/locallaunch.py new file mode 100644 index 0000000..e20fd81 --- /dev/null +++ b/fairseq/examples/MMPT/locallaunch.py @@ -0,0 +1,148 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import argparse +import os + +from omegaconf import OmegaConf + +from mmpt.utils import recursive_config, overwrite_dir +from mmpt_cli.localjob import LocalJob + + +class JobLauncher(object): + JOB_CONFIG = { + "local": LocalJob, + } + + def __init__(self, yaml_file): + self.yaml_file = yaml_file + job_key = "local" + + if yaml_file.endswith(".yaml"): + config = recursive_config(yaml_file) + if config.task_type is not None: + job_key = config.task_type.split("_")[0] + else: + raise ValueError("unknown extension of job file:", yaml_file) + self.job_key = job_key + + def __call__(self, job_type=None, dryrun=False): + if job_type is not None: + self.job_key = job_type.split("_")[0] + print("[JobLauncher] job_key", self.job_key) + job = JobLauncher.JOB_CONFIG[self.job_key]( + self.yaml_file, job_type=job_type, dryrun=dryrun) + return job.submit() + + +class Pipeline(object): + """a job that loads yaml config.""" + + def __init__(self, fn): + """ + load a yaml config of a job and save generated configs as yaml for each task. + return: a list of files to run as specified by `run_task`. + """ + if fn.endswith(".py"): + # a python command. + self.backend = "python" + self.run_yamls = [fn] + return + + job_config = recursive_config(fn) + if job_config.base_dir is None: # single file job config. + self.run_yamls = [fn] + return + + self.project_dir = os.path.join("projects", job_config.project_dir) + self.run_dir = os.path.join("runs", job_config.project_dir) + + if job_config.run_task is not None: + run_yamls = [] + for stage in job_config.run_task: + # each stage can have multiple tasks running in parallel. + if OmegaConf.is_list(stage): + stage_yamls = [] + for task_file in stage: + stage_yamls.append( + os.path.join(self.project_dir, task_file)) + run_yamls.append(stage_yamls) + else: + run_yamls.append(os.path.join(self.project_dir, stage)) + self.run_yamls = run_yamls + configs_to_save = self._overwrite_task(job_config) + self._save_configs(configs_to_save) + + def __getitem__(self, idx): + yaml_files = self.run_yamls[idx] + if isinstance(yaml_files, list): + return [JobLauncher(yaml_file) for yaml_file in yaml_files] + return [JobLauncher(yaml_files)] + + def __len__(self): + return len(self.run_yamls) + + def _save_configs(self, configs_to_save: dict): + # save + os.makedirs(self.project_dir, exist_ok=True) + for config_file in configs_to_save: + config = configs_to_save[config_file] + print("saving", config_file) + OmegaConf.save(config=config, f=config_file) + + def _overwrite_task(self, job_config): + configs_to_save = {} + self.base_project_dir = os.path.join("projects", job_config.base_dir) + self.base_run_dir = os.path.join("runs", job_config.base_dir) + + for config_sets in job_config.task_group: + overwrite_config = job_config.task_group[config_sets] + if ( + overwrite_config.task_list is None + or len(overwrite_config.task_list) == 0 + ): + print( + "[warning]", + job_config.task_group, + "has no task_list specified.") + # we don't want this added to a final config. + task_list = overwrite_config.pop("task_list", None) + for config_file in task_list: + config_file_path = os.path.join( + self.base_project_dir, config_file) + config = recursive_config(config_file_path) + # overwrite it. + if overwrite_config: + config = OmegaConf.merge(config, overwrite_config) + overwrite_dir(config, self.run_dir, basedir=self.base_run_dir) + save_file_path = os.path.join(self.project_dir, config_file) + configs_to_save[save_file_path] = config + return configs_to_save + + +def main(args): + job_type = args.jobtype if args.jobtype else None + # parse multiple pipelines. + pipelines = [Pipeline(fn) for fn in args.yamls.split(",")] + + for pipe_id, pipeline in enumerate(pipelines): + if not hasattr(pipeline, "project_dir"): + for job in pipeline[0]: + job(job_type=job_type, dryrun=args.dryrun) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("yamls", type=str) + parser.add_argument( + "--dryrun", + action="store_true", + help="run config and prepare to submit without launch the job.", + ) + parser.add_argument( + "--jobtype", type=str, default="", + help="force to run jobs as specified.") + args = parser.parse_args() + main(args) diff --git a/fairseq/examples/MMPT/mmpt/__init__.py b/fairseq/examples/MMPT/mmpt/__init__.py new file mode 100644 index 0000000..6ff86dd --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +try: + # fairseq user dir + from .datasets import FairseqMMDataset + from .losses import FairseqCriterion + from .models import FairseqMMModel + from .tasks import FairseqMMTask +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/datasets/__init__.py b/fairseq/examples/MMPT/mmpt/datasets/__init__.py new file mode 100644 index 0000000..2578235 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/datasets/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .mmdataset import * + +try: + from .fairseqmmdataset import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/datasets/fairseqmmdataset.py b/fairseq/examples/MMPT/mmpt/datasets/fairseqmmdataset.py new file mode 100644 index 0000000..02c4914 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/datasets/fairseqmmdataset.py @@ -0,0 +1,57 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +TODO (huxu): fairseq wrapper class for all dataset you defined: mostly MMDataset. +""" + +from collections import OrderedDict + +from torch.utils.data import Dataset +from torch.utils.data.dataloader import default_collate +from fairseq.data import FairseqDataset, data_utils + + +class FairseqMMDataset(FairseqDataset): + """ + A wrapper class for MMDataset for fairseq. + """ + + def __init__(self, mmdataset): + if not isinstance(mmdataset, Dataset): + raise TypeError("mmdataset must be of type `torch.utils.data.dataset`.") + self.mmdataset = mmdataset + + def set_epoch(self, epoch, **unused): + super().set_epoch(epoch) + self.epoch = epoch + + def __getitem__(self, idx): + with data_utils.numpy_seed(43211, self.epoch, idx): + return self.mmdataset[idx] + + def __len__(self): + return len(self.mmdataset) + + def collater(self, samples): + if hasattr(self.mmdataset, "collator"): + return self.mmdataset.collator(samples) + if len(samples) == 0: + return {} + if isinstance(samples[0], dict): + batch = OrderedDict() + for key in samples[0]: + if samples[0][key] is not None: + batch[key] = default_collate([sample[key] for sample in samples]) + return batch + else: + return default_collate(samples) + + def size(self, index): + """dummy implementation: we don't use --max-tokens""" + return 1 + + def num_tokens(self, index): + """dummy implementation: we don't use --max-tokens""" + return 1 diff --git a/fairseq/examples/MMPT/mmpt/datasets/mmdataset.py b/fairseq/examples/MMPT/mmpt/datasets/mmdataset.py new file mode 100644 index 0000000..3d07283 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/datasets/mmdataset.py @@ -0,0 +1,111 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from collections import OrderedDict + +from torch.utils.data import Dataset +from torch.utils.data.dataloader import default_collate + +from ..utils import set_seed + + +class MMDataset(Dataset): + """ + A generic multi-modal dataset. + Args: + `meta_processor`: a meta processor, + handling loading meta data and return video_id and text_id. + `video_processor`: a video processor, + handling e.g., decoding, loading .np files. + `text_processor`: a text processor, + handling e.g., tokenization. + `aligner`: combine the video and text feature + as one training example. + """ + + def __init__( + self, + meta_processor, + video_processor, + text_processor, + align_processor, + ): + self.split = meta_processor.split + self.meta_processor = meta_processor + self.video_processor = video_processor + self.text_processor = text_processor + self.align_processor = align_processor + + def __len__(self): + return len(self.meta_processor) + + def __getitem__(self, idx): + if self.split == "test": + set_seed(idx) + video_id, text_id = self.meta_processor[idx] + video_feature = self.video_processor(video_id) + text_feature = self.text_processor(text_id) + output = self.align_processor(video_id, video_feature, text_feature) + # TODO (huxu): the following is for debug purpose. + output.update({"idx": idx}) + return output + + def collater(self, samples): + """This collator is deprecated. + set self.collator = MMDataset.collater. + see collator in FairseqMMDataset. + """ + + if len(samples) == 0: + return {} + if isinstance(samples[0], dict): + batch = OrderedDict() + for key in samples[0]: + if samples[0][key] is not None: + batch[key] = default_collate( + [sample[key] for sample in samples]) + # if torch.is_tensor(batch[key]): + # print(key, batch[key].size()) + # else: + # print(key, len(batch[key])) + return batch + else: + return default_collate(samples) + + def print_example(self, output): + print("[one example]", output["video_id"]) + if ( + hasattr(self.align_processor, "subsampling") + and self.align_processor.subsampling is not None + and self.align_processor.subsampling > 1 + ): + for key in output: + if torch.is_tensor(output[key]): + output[key] = output[key][0] + + # search tokenizer to translate ids back. + tokenizer = None + if hasattr(self.text_processor, "tokenizer"): + tokenizer = self.text_processor.tokenizer + elif hasattr(self.align_processor, "tokenizer"): + tokenizer = self.align_processor.tokenizer + if tokenizer is not None: + caps = output["caps"].tolist() + if isinstance(caps[0], list): + caps = caps[0] + print("caps", tokenizer.decode(caps)) + print("caps", tokenizer.convert_ids_to_tokens(caps)) + + for key, value in output.items(): + if torch.is_tensor(value): + if len(value.size()) >= 3: # attention_mask. + print(key, value.size()) + print(key, "first", value[0, :, :]) + print(key, "last", value[-1, :, :]) + else: + print(key, value) + print("[end of one example]") diff --git a/fairseq/examples/MMPT/mmpt/evaluators/__init__.py b/fairseq/examples/MMPT/mmpt/evaluators/__init__.py new file mode 100644 index 0000000..2d06b9d --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/evaluators/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .metric import * +from .evaluator import * + + +# experimental. +try: + from .expmetric import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/evaluators/evaluator.py b/fairseq/examples/MMPT/mmpt/evaluators/evaluator.py new file mode 100644 index 0000000..94d9c5e --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/evaluators/evaluator.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import glob +import numpy as np + +from . import metric as metric_path +from . import predictor as predictor_path + + +class Evaluator(object): + """ + perform evaluation on a single (downstream) task. + make this both offline and online. + TODO(huxu) saving evaluation results. + """ + + def __init__(self, config, eval_dataloader=None): + if config.metric is None: + raise ValueError("config.metric is", config.metric) + metric_cls = getattr(metric_path, config.metric) + self.metric = metric_cls(config) + if config.predictor is None: + raise ValueError("config.predictor is", config.predictor) + predictor_cls = getattr(predictor_path, config.predictor) + self.predictor = predictor_cls(config) + self.eval_dataloader = eval_dataloader + + def __call__(self): + try: + print(self.predictor.pred_dir) + for pred_file in glob.glob( + self.predictor.pred_dir + "/*_merged.npy"): + outputs = np.load(pred_file) + results = self.metric.compute_metrics(outputs) + self.metric.print_computed_metrics(results) + + outputs = np.load(os.path.join( + self.predictor.pred_dir, "merged.npy")) + results = self.metric.compute_metrics(outputs) + return {"results": results, "metric": self.metric} + except FileNotFoundError: + print("\n[missing]", self.predictor.pred_dir) + return {} + + def evaluate(self, model, eval_dataloader=None, output_file="merged"): + if eval_dataloader is None: + eval_dataloader = self.eval_dataloader + outputs = self.predictor.predict_loop( + model, eval_dataloader, output_file) + results = self.metric.compute_metrics(**outputs) + return results diff --git a/fairseq/examples/MMPT/mmpt/evaluators/metric.py b/fairseq/examples/MMPT/mmpt/evaluators/metric.py new file mode 100644 index 0000000..163724b --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/evaluators/metric.py @@ -0,0 +1,313 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import json + + +class Metric(object): + def __init__(self, config, metric_names): + self.metric_names = metric_names + + def best_metric(self, metric): + return metric[self.metric_names[0]] + + def save_metrics(self, fn, metrics): + with open(fn, "w") as fw: + json.dump(fw, metrics) + + def print_computed_metrics(self, metrics): + raise NotImplementedError + + +class RetrievalMetric(Metric): + """ + this is modified from `howto100m/metrics.py`. + History of changes: + refactor as a class. + add metric_key in __init__ + """ + + def __init__(self, config, metric_names=["R1", "R5", "R10", "MR"]): + super().__init__(config, metric_names) + self.error = False # TODO(huxu): add to config to print error. + + def compute_metrics(self, outputs, texts, **kwargs): + x = outputs + sx = np.sort(-x, axis=1) + d = np.diag(-x) + d = d[:, np.newaxis] + ind = sx - d + ind = np.where(ind == 0) + ind = ind[1] + metrics = {} + metrics["R1"] = float(np.sum(ind == 0)) / len(ind) + metrics["R5"] = float(np.sum(ind < 5)) / len(ind) + metrics["R10"] = float(np.sum(ind < 10)) / len(ind) + metrics["MR"] = np.median(ind) + 1 + + max_idx = np.argmax(outputs, axis=1) + if self.error: + # print top-20 errors. + error = [] + for ex_idx in range(20): + error.append((texts[ex_idx], texts[max_idx[ex_idx]])) + metrics["error"] = error + return metrics + + def print_computed_metrics(self, metrics): + r1 = metrics["R1"] + r5 = metrics["R5"] + r10 = metrics["R10"] + mr = metrics["MR"] + print( + "R@1: {:.4f} - R@5: {:.4f} - R@10: {:.4f} - Median R: {}".format( + r1, r5, r10, mr + ) + ) + if "error" in metrics: + print(metrics["error"]) + + +class DiDeMoMetric(Metric): + """ + History of changes: + python 2.x to python 3.x. + merge utils.py into eval to save one file. + reference: https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/eval.py + Code to evaluate your results on the DiDeMo dataset. + """ + def __init__(self, config, metric_names=["rank1", "rank5", "miou"]): + super().__init__(config, metric_names) + + def compute_metrics(self, outputs, targets, **kwargs): + assert len(outputs) == len(targets) + rank1, rank5, miou = self._eval_predictions(outputs, targets) + metrics = { + "rank1": rank1, + "rank5": rank5, + "miou": miou + } + return metrics + + def print_computed_metrics(self, metrics): + rank1 = metrics["rank1"] + rank5 = metrics["rank5"] + miou = metrics["miou"] + # print("Average rank@1: %f" % rank1) + # print("Average rank@5: %f" % rank5) + # print("Average iou: %f" % miou) + + print( + "Average rank@1: {:.4f} Average rank@5: {:.4f} Average iou: {:.4f}".format( + rank1, rank5, miou + ) + ) + + def _iou(self, pred, gt): + intersection = max(0, min(pred[1], gt[1]) + 1 - max(pred[0], gt[0])) + union = max(pred[1], gt[1]) + 1 - min(pred[0], gt[0]) + return float(intersection)/union + + def _rank(self, pred, gt): + return pred.index(tuple(gt)) + 1 + + def _eval_predictions(self, segments, data): + ''' + Inputs: + segments: For each item in the ground truth data, rank possible video segments given the description and video. + In DiDeMo, there are 21 posible moments extracted for each video so the list of video segments will be of length 21. + The first video segment should be the video segment that best corresponds to the text query. + There are 4180 sentence in the validation data, so when evaluating a model on the val dataset, + segments should be a list of lenght 4180, and each item in segments should be a list of length 21. + data: ground truth data + ''' + average_ranks = [] + average_iou = [] + for s, d in zip(segments, data): + pred = s[0] + ious = [self._iou(pred, t) for t in d['times']] + average_iou.append(np.mean(np.sort(ious)[-3:])) + ranks = [self._rank(s, t) for t in d['times'] if tuple(t) in s] # if t in s] is added for s, e not in prediction. + average_ranks.append(np.mean(np.sort(ranks)[:3])) + rank1 = np.sum(np.array(average_ranks) <= 1)/float(len(average_ranks)) + rank5 = np.sum(np.array(average_ranks) <= 5)/float(len(average_ranks)) + miou = np.mean(average_iou) + + # print("Average rank@1: %f" % rank1) + # print("Average rank@5: %f" % rank5) + # print("Average iou: %f" % miou) + return rank1, rank5, miou + + +class NLGMetric(Metric): + def __init__( + self, + config, + metric_names=[ + "Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4", + "METEOR", "ROUGE_L", "CIDEr" + ] + ): + super().__init__(config, metric_names) + # please install NLGEval from `https://github.com/Maluuba/nlg-eval` + from nlgeval import NLGEval + self.nlg = NLGEval() + + def compute_metrics(self, outputs, targets, **kwargs): + return self.nlg.compute_metrics( + hyp_list=outputs, ref_list=targets) + + def print_computed_metrics(self, metrics): + Bleu_1 = metrics["Bleu_1"] + Bleu_2 = metrics["Bleu_2"] + Bleu_3 = metrics["Bleu_3"] + Bleu_4 = metrics["Bleu_4"] + METEOR = metrics["METEOR"] + ROUGE_L = metrics["ROUGE_L"] + CIDEr = metrics["CIDEr"] + + print( + "Bleu_1: {:.4f} - Bleu_2: {:.4f} - Bleu_3: {:.4f} - Bleu_4: {:.4f} - METEOR: {:.4f} - ROUGE_L: {:.4f} - CIDEr: {:.4f}".format( + Bleu_1, Bleu_2, Bleu_3, Bleu_4, METEOR, ROUGE_L, CIDEr + ) + ) + + +class QAMetric(Metric): + def __init__( + self, + config, + metric_names=["acc"] + ): + super().__init__(config, metric_names) + + def compute_metrics(self, outputs, targets, **kwargs): + from sklearn.metrics import accuracy_score + return {"acc": accuracy_score(targets, outputs)} + + def print_computed_metrics(self, metrics): + print("acc: {:.4f}".format(metrics["acc"])) + + +class COINActionSegmentationMetric(Metric): + """ + COIN dataset listed 3 repos for Action Segmentation. + Action Sets, NeuralNetwork-Viterbi, TCFPN-ISBA. + The first and second are the same. + https://github.com/alexanderrichard/action-sets/blob/master/eval.py + + Future reference for the third: + `https://github.com/Zephyr-D/TCFPN-ISBA/blob/master/utils/metrics.py` + """ + def __init__(self, config, metric_name=["frame_acc"]): + super().__init__(config, metric_name) + + def compute_metrics(self, outputs, targets): + n_frames = 0 + n_errors = 0 + n_errors = sum(outputs != targets) + n_frames = len(targets) + return {"frame_acc": 1.0 - float(n_errors) / n_frames} + + def print_computed_metrics(self, metrics): + fa = metrics["frame_acc"] + print("frame accuracy:", fa) + + +class CrossTaskMetric(Metric): + def __init__(self, config, metric_names=["recall"]): + super().__init__(config, metric_names) + + def compute_metrics(self, outputs, targets, **kwargs): + """refactored from line 166: + https://github.com/DmZhukov/CrossTask/blob/master/train.py""" + + recalls = self._get_recalls(Y_true=targets, Y_pred=outputs) + results = {} + for task, rec in recalls.items(): + results[str(task)] = rec + + avg_recall = np.mean(list(recalls.values())) + results["recall"] = avg_recall + return results + + def print_computed_metrics(self, metrics): + print('Recall: {0:0.3f}'.format(metrics["recall"])) + for task in metrics: + if task != "recall": + print('Task {0}. Recall = {1:0.3f}'.format( + task, metrics[task])) + + def _get_recalls(self, Y_true, Y_pred): + """refactored from + https://github.com/DmZhukov/CrossTask/blob/master/train.py""" + + step_match = {task: 0 for task in Y_true.keys()} + step_total = {task: 0 for task in Y_true.keys()} + for task, ys_true in Y_true.items(): + ys_pred = Y_pred[task] + for vid in set(ys_pred.keys()).intersection(set(ys_true.keys())): + y_true = ys_true[vid] + y_pred = ys_pred[vid] + step_total[task] += (y_true.sum(axis=0) > 0).sum() + step_match[task] += (y_true*y_pred).sum() + recalls = { + task: step_match[task] / n for task, n in step_total.items()} + return recalls + + +class ActionRecognitionMetric(Metric): + def __init__( + self, + config, + metric_names=["acc", "acc_splits", "r1_splits", "r5_splits", "r10_splits"] + ): + super().__init__(config, metric_names) + + def compute_metrics(self, outputs, targets, splits, **kwargs): + all_video_embd = outputs + labels = targets + split1, split2, split3 = splits + accs = [] + r1s = [] + r5s = [] + r10s = [] + for split in range(3): + if split == 0: + s = split1 + elif split == 1: + s = split2 + else: + s = split3 + + X_pred = all_video_embd[np.where(s == 2)[0]] + label_test = labels[np.where(s == 2)[0]] + logits = X_pred + X_pred = np.argmax(X_pred, axis=1) + acc = np.sum(X_pred == label_test) / float(len(X_pred)) + accs.append(acc) + # compute recall. + sorted_pred = (-logits).argsort(axis=-1) + label_test_sp = label_test.reshape(-1, 1) + + r1 = np.mean((sorted_pred[:, :1] == label_test_sp).sum(axis=1), axis=0) + r5 = np.mean((sorted_pred[:, :5] == label_test_sp).sum(axis=1), axis=0) + r10 = np.mean((sorted_pred[:, :10] == label_test_sp).sum(axis=1), axis=0) + r1s.append(r1) + r5s.append(r5) + r10s.append(r10) + + return {"acc": accs[0], "acc_splits": accs, "r1_splits": r1s, "r5_splits": r5s, "r10_splits": r10s} + + def print_computed_metrics(self, metrics): + for split, acc in enumerate(metrics["acc_splits"]): + print("Top 1 accuracy on split {}: {}; r1 {}; r5 {}; r10 {}".format( + split + 1, acc, + metrics["r1_splits"][split], + metrics["r5_splits"][split], + metrics["r10_splits"][split], + ) + ) diff --git a/fairseq/examples/MMPT/mmpt/evaluators/predictor.py b/fairseq/examples/MMPT/mmpt/evaluators/predictor.py new file mode 100644 index 0000000..2ffef6a --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/evaluators/predictor.py @@ -0,0 +1,595 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import random +import json +import numpy as np +import torch +import pickle +import math + +from tqdm import tqdm + + +class Predictor(object): + """this base class is used to save predictions to disk + (and being called by a evaluator later). + Predictor has minimum support of single gpu prediction. + """ + def __init__(self, config): + self.pred_dir = None # on-the-fly eval does not save the results. + if hasattr(config, "eval") and config.eval is not None: + self.pred_dir = config.eval.save_path + os.makedirs(self.pred_dir, exist_ok=True) + + def __call__(self, outputs): + """extract the prediction and save it.""" + raise NotImplementedError + + def predict_loop(self, model, eval_dataloader, output_file=None): + """on-the-fly prediction on a single gpu.""" + self.full_scores = [] + model.eval() + model = model.to(0) + with torch.no_grad(): + for data in eval_dataloader: + data = self.to_ctx(data) + outputs = model(**data) + outputs.update(data) + self(outputs) + return self.finalize(output_file) + + def finalize(self, output_file): + pass + + def to_ctx(self, data, ctx=0, dtype=None): + if isinstance(data, dict): + for key in data: + if torch.is_tensor(data[key]): + if dtype is not None and data[key].dtype == torch.float32: + data[key] = data[key].to(dtype) + data[key] = data[key].to(ctx) + return data + else: + raise ValueError("non-dict type of batch is not supported yet.") + + +class NLGPredictor(Predictor): + """Predicting Text from MMFusion models.""" + """TODO: make a context.""" + def __init__(self, config): + super().__init__(config) + from transformers import AutoTokenizer + + self.tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name, + bos_token="[CLS]", eos_token="[SEP]") + self.bos_token_id = self.tokenizer.bos_token_id + self.eos_token_id = self.tokenizer.eos_token_id + + def predict_loop(self, model, eval_dataloader, output_file=None): + """TODO: refactor base classes.""" + ctx = 0 + outputs = {"outputs": [], "targets": [[]]} + model.eval() + model = model.to(ctx) + with torch.no_grad(): + for data in tqdm(eval_dataloader): + data = self.to_ctx(data, ctx) + self(data, model, outputs) + return self.finalize(outputs, output_file) + + def __call__(self, data, model, outputs): + data.update({ + "bos_token_id": self.bos_token_id, + "eos_token_id": self.eos_token_id + }) + + output = model.generate(**data) + assert len(output) == len(data["ref"]) + for idx, _output in enumerate(output): + generated_text = self.tokenizer.decode( + _output, skip_special_tokens=True) + if generated_text == "": + generated_text = "none" + outputs["outputs"].append(generated_text) + outputs["targets"][0].append(data["ref"][idx]) + if random.random() < 0.001: + print("_output", _output) + print("generated_text", generated_text) + print("ref", data["ref"][idx]) + + def finalize(self, outputs, output_file=None): + if output_file is not None: + with open(os.path.join( + self.pred_dir, output_file + ".json"), "w") as fw: + json.dump(outputs, fw, indent=4) + return outputs + + +class RetrievalPredictor(Predictor): + """generated `pooled_video` and `pooled_text`.""" + def __init__(self, config): + super().__init__(config) + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name) + + def predict_loop( + self, + model, + eval_dataloader, + output_file="retrieval.npy" + ): + """on-the-fly prediction on a single gpu.""" + full_scores = [] + texts = [] + model.eval() + model = model.cuda() + with torch.no_grad(): + for data in eval_dataloader: + # convert to dict. + if not isinstance(data, dict): + data = { + "caps": data[0], + "cmasks": data[1], + "vfeats": data[2], + "vmasks": data[3], + "video_id": data[4] + } + data = self.to_ctx(data) + outputs = model(**data) + outputs.update(data) + self(outputs, full_scores) + for _cap in data["caps"]: + texts.append( + self.tokenizer.decode(_cap, skip_special_tokens=True) + ) + + return self.finalize(full_scores, texts, output_file) + + def __call__(self, sample, full_scores): + scores = self._get_pooled_outputs(sample) + self._append_scores(scores, full_scores) + + def finalize(self, full_scores, texts, output_file=None): + outputs = self._aggregate_scores(full_scores) + if output_file is not None: + np.save(os.path.join(self.pred_dir, output_file + ".npy"), outputs) + return {"outputs": outputs, "texts": texts} + + def _get_pooled_outputs(self, outputs): + if "pooled_video" in outputs: + return outputs["pooled_video"], outputs["pooled_text"] + else: + raise ValueError("unknown format of outputs.") + + def _append_scores(self, scores, full_scores): + assert len(scores) == 2 + if len(full_scores) == 0: + full_scores.append([]) + full_scores.append([]) + full_scores[0].append(scores[0].cpu().detach().numpy()) + full_scores[1].append(scores[1].cpu().detach().numpy()) + + def _aggregate_scores(self, scores): + assert len(scores) == 2 + video_hidden = np.concatenate(scores[0], axis=0) + text_hidden = np.concatenate(scores[1], axis=0) + # clear up. + self.full_scores = [] + return np.matmul(text_hidden, video_hidden.T) + + +class QAPredictor(Predictor): + """generated `pooled_video` and `pooled_text`.""" + def __init__(self, config): + super().__init__(config) + """predictor maintains scores and aggregate them.""" + + def predict_loop(self, model, eval_dataloader, output_file="qa.npy"): + """on-the-fly prediction on a single gpu.""" + self.full_scores = [] + model.eval() + model = model.cuda() + with torch.no_grad(): + for data in eval_dataloader: + # reshape ans and dup video 5 times. + v_len = data["vfeats"].size(1) + hidden_size = data["vfeats"].size(2) + data["vfeats"] = data["vfeats"].unsqueeze(1).repeat(1, 5, 1, 1).view(-1, v_len, hidden_size) + data["vmasks"] = data["vmasks"].unsqueeze(1).repeat(1, 5, 1).view(-1, v_len) + + t_len = data["caps"].size(-1) + data["caps"] = data["caps"].view(-1, t_len) + data["cmasks"] = data["cmasks"].view(-1, t_len) + + data = self.to_ctx(data) + outputs = model(**data) + outputs.update(data) + self(outputs) + return self.finalize(output_file) + + def __call__(self, sample): + hidden_size = sample["pooled_video"].size(-1) + pooled_video = sample["pooled_video"].view(-1, 5, hidden_size) + pooled_text = sample["pooled_text"].view(-1, 5, hidden_size) + scores = torch.bmm(pooled_video, pooled_text.transpose(2, 1)) + scores = scores.argmax(-1) + self._append_scores(scores[:, 0], sample["answers"], self.full_scores) + + def finalize(self, output_file=None): + outputs, targets = self._aggregate_scores(self.full_scores) + if output_file is not None: + np.save(os.path.join(self.pred_dir, output_file + ".npy"), outputs) + return {"outputs": outputs, "targets": targets} + + def _append_scores(self, scores, answers, full_scores): + if len(full_scores) == 0: + full_scores.append([]) + full_scores.append([]) + full_scores[0].append(scores.cpu().detach().numpy()) + full_scores[1].append(answers.cpu().detach().numpy()) + + def _aggregate_scores(self, scores): + assert len(scores) == 2 + outputs = np.concatenate(scores[0], axis=0) + targets = np.concatenate(scores[1], axis=0) + # clear up. + self.full_scores = [] + return outputs, targets + + +class CrossTaskPredictor(Predictor): + """ + CrossTaskPredictor needs to compute the average of logits + for overlapped sliding-window. + """ + def __init__(self, config): + super().__init__(config) + self.lsm = torch.nn.LogSoftmax(dim=1) + self.max_video_len = config.dataset.max_video_len + self.sliding_window = config.dataset.sliding_window + self.sliding_window_size = config.dataset.sliding_window_size + self.annotation_path = config.dataset.annotation_path + + def predict_loop(self, model, eval_dataloader, output_file="result.pkl"): + """refactored from line 144: + https://github.com/DmZhukov/CrossTask/blob/master/train.py + """ + ctx = 0 + model.eval() + model = model.to(ctx) + # this is not a loss but just compute neg_log_prob. + Y_pred = {} + Y_true = {} + with torch.no_grad(): + for batch in eval_dataloader: + self(batch, model, Y_pred, Y_true) + return self.finalize(Y_pred, Y_true, output_file) + + def __call__(self, sample, model, Y_pred, Y_true): + # please install dp from `https://github.com/DmZhukov/CrossTask` + from dp import dp + vid, task = sample['video_id'][0], sample['task'][0] + sample = self.to_ctx(sample) + # compute the average logits over sliding windows. + output = model(**sample) + batch_logits = output["logits"].cpu() + + video_len = sample["video_len"][0] + + # the following version is slow. + logits = torch.zeros((video_len, batch_logits.size(1))) + logits_counts = torch.zeros((video_len, 1), dtype=torch.long) + # use the same loop as aligner to recover. + batch_logit_idx = 0 + for window_start in range(0, video_len, self.sliding_window): + video_end = min(video_len - window_start, self.sliding_window_size) + logits[window_start: window_start + video_end] += batch_logits[ + batch_logit_idx: batch_logit_idx + video_end] + batch_logit_idx += video_end + logits_counts[window_start: window_start + video_end] += torch.ones((video_end, 1), dtype=torch.long) + + if (video_len - window_start) <= self.sliding_window_size: + break + + logits /= logits_counts + assert logits.size() == (video_len, batch_logits.size(1)), "{}, {}".format(logits.size(), video_len) + + O = self.lsm(logits) + y = np.zeros(O.size(), dtype=np.float32) + dp(y, -O.detach().cpu().numpy()) + if task not in Y_pred: + Y_pred[task] = {} + Y_pred[task][vid] = y + annot_path = os.path.join( + self.annotation_path, task+'_'+vid+'.csv') + if os.path.exists(annot_path): + if task not in Y_true: + Y_true[task] = {} + Y_true[task][vid] = self._read_assignment( + *y.shape, annot_path) + + def finalize(self, Y_pred, Y_true, output_file=None): + if output_file is not None: + with open( + os.path.join(self.pred_dir, output_file + ".pkl"), + "wb") as fw: + pickle.dump( + {"Y_pred": Y_pred, "Y_true": Y_true}, fw, + protocol=pickle.HIGHEST_PROTOCOL) + return {"outputs": Y_pred, "targets": Y_true} + + def _read_assignment(self, T, K, path): + """ + refactored from https://github.com/DmZhukov/CrossTask/blob/master/data.py + Howto interpret contraints on loss that is going to be minimized: + lambd is a big number; + self.lambd * C is a big number for all valid position (csv stores invalids) + + def forward(self, O, Y, C): + return (Y*(self.lambd * C - self.lsm(O))).mean(dim=0).sum() + + This will load the csv file and fill-in the step col from start to end rows. + """ + + Y = np.zeros([T, K], dtype=np.uint8) + with open(path, 'r') as f: + for line in f: + step, start, end = line.strip().split(',') + start = int(math.floor(float(start))) + end = int(math.ceil(float(end))) + step = int(step) - 1 + Y[start:end, step] = 1 + return Y + + +class COINPredictor(Predictor): + """ + COINPredictor is similar to CrossTask on sliding windows. + """ + def __init__(self, config): + super().__init__(config) + self.max_video_len = config.dataset.max_video_len + self.sliding_window = config.dataset.sliding_window + self.sliding_window_size = config.dataset.sliding_window_size + + def predict_loop(self, model, eval_dataloader, output_file="result.pkl"): + """refactored from line 144: + https://github.com/DmZhukov/CrossTask/blob/master/train.py + """ + ctx = 0 + model.eval() + model = model.to(ctx) + # this is not a loss but just compute neg_log_prob. + Y_pred = [] + Y_true = [] + with torch.no_grad(): + for batch in eval_dataloader: + self(batch, model, Y_pred, Y_true) + return self.finalize(Y_pred, Y_true, output_file) + + def __call__(self, sample, model, Y_pred, Y_true): + sample = self.to_ctx(sample) + # compute the average logits over sliding windows. + output = model(**sample) + logits = self._merge_windows(sample, output) + Y_pred.append(logits.argmax(dim=1)) + Y_true.append(sample["video_targets"].squeeze(0).cpu()) + + def _merge_windows(self, sample, output): + targets = sample["targets"].reshape(-1).cpu() + valid_mask = targets != -100 + targets = targets[valid_mask] + batch_logits = output["logits"].cpu() + batch_logits = batch_logits.reshape(-1, batch_logits.size(-1)) + batch_logits = batch_logits[valid_mask] + + video_len = sample["video_len"][0] + + # the following version is slow. + logits = torch.zeros((video_len, batch_logits.size(1))) + logits_counts = torch.zeros((video_len, 1), dtype=torch.long) + # use the same loop as aligner to recover. + batch_logit_idx = 0 + for window_start in range(0, video_len, self.sliding_window): + video_end = min(video_len - window_start, self.sliding_window_size) + logits[window_start: window_start + video_end] += batch_logits[ + batch_logit_idx: batch_logit_idx + video_end] + batch_logit_idx += video_end + logits_counts[window_start: window_start + video_end] += torch.ones((video_end, 1), dtype=torch.long) + if (video_len - window_start) <= self.sliding_window_size: + break + logits /= logits_counts + assert logits.size() == (video_len, batch_logits.size(1)), "{}, {}".format(logits.size(), video_len) + return logits + + def finalize(self, Y_pred, Y_true, output_file=None): + Y_pred = torch.cat(Y_pred, dim=0).numpy() + Y_true = torch.cat(Y_true, dim=0).numpy() + assert len(Y_pred) == len(Y_true) + + error_mask = Y_pred != Y_true + print("sample error", Y_pred[error_mask][:10], Y_true[error_mask][:10]) + print("sample error", Y_pred[error_mask][10:20], Y_true[error_mask][10:20]) + + if output_file is not None: + with open( + os.path.join(self.pred_dir, output_file + ".pkl"), + "wb") as fw: + pickle.dump( + {"Y_pred": Y_pred, "Y_true": Y_true}, fw, + protocol=pickle.HIGHEST_PROTOCOL) + return {"outputs": Y_pred, "targets": Y_true} + + +class COINZSPredictor(COINPredictor): + """ + COINZSPredictor for COIN zero-shot prediction. + """ + + def __init__(self, config): + super().__init__(config) + self.dataset_config = config.dataset + + def predict_loop(self, model, eval_dataloader, output_file="result.pkl"): + """refactored from line 144: + https://github.com/DmZhukov/CrossTask/blob/master/train.py + """ + ctx = 0 + model.eval() + model = model.to(ctx) + + with torch.no_grad(): + outputs = eval_dataloader.dataset.meta_processor.meta_text_labels( + self.dataset_config) + outputs = self.to_ctx(outputs, ctx) + label_hidden_states = model.forward_text(**outputs).cpu() + label_sim = label_hidden_states @ label_hidden_states.t() + num_labels = label_sim.size(0) + eye_mask = ~torch.eye(num_labels, dtype=torch.bool) + label_sim = label_sim.masked_select(eye_mask).view(num_labels, num_labels - 1) + lbd = label_sim.max() + + # this is not a loss but just compute neg_log_prob. + Y_pred = [] + Y_true = [] + with torch.no_grad(): + for batch in eval_dataloader: + self(batch, label_hidden_states, model, lbd, Y_pred, Y_true) + return self.finalize(Y_pred, Y_true, output_file) + + def reshape_subsample(self, sample): + for key in sample: + if torch.is_tensor(sample[key]): + sample[key] = self.flat_subsample(sample[key]) + return sample + + def flat_subsample(self, tensor): + if len(tensor.size()) > 1 and tensor.size(0) == 1: + tensor = tensor.squeeze(0) + return tensor + + def __call__(self, sample, label_hidden_states, model, lbd, Y_pred, Y_true): + sample = self.reshape_subsample(sample) + sample = self.to_ctx(sample) + # compute the average logits over sliding windows. + sample["output_hidden_states"] = True + video_outputs = model.forward_video(**sample).cpu() + output = {"logits": video_outputs[:, 1:sample["vmasks"].size(1)+1] @ label_hidden_states.t()} + logits = self._merge_windows(sample, output) + # logic of zero-shot for sequence labeling. + logits_argmax = logits.argmax(dim=1) + 1 # 0 is "O" label. + logits_max = logits.max(dim=1)[0] + + pred = torch.zeros_like(logits_argmax) + label_select = logits_max > lbd # 73 or 74 + pred[label_select] = logits_argmax[label_select] + + Y_pred.append(pred) + Y_true.append(sample["video_targets"].squeeze(0).cpu()) + + def finalize(self, Y_pred, Y_true, output_file=None): + Y_pred = torch.cat(Y_pred, dim=0).numpy() + Y_true = torch.cat(Y_true, dim=0).numpy() + assert len(Y_pred) == len(Y_true) + + error_mask = Y_pred != Y_true + print("sample error", Y_pred[error_mask][:10], Y_true[error_mask][:10]) + print("sample error", Y_pred[error_mask][10:20], Y_true[error_mask][10:20]) + + if output_file is not None: + with open( + os.path.join(self.pred_dir, output_file + ".pkl"), + "wb") as fw: + pickle.dump( + {"Y_pred": Y_pred, "Y_true": Y_true}, fw, + protocol=pickle.HIGHEST_PROTOCOL) + return {"outputs": Y_pred, "targets": Y_true} + + +class DiDeMoPredictor(Predictor): + """reference: https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/eval.py + https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/data_processing.py + """ + def __init__(self, config): + super().__init__(config) + # load targets. + with open(config.dataset.test_path) as data_file: + self.test_data = json.load(data_file) + + def predict_loop(self, model, eval_dataloader, output_file="didemo.npy"): + """ + TODO: two solutions here. + """ + import itertools + # 21 chunks. + self.possible_segments = [(0,0), (1,1), (2,2), (3,3), (4,4), (5,5)] + for i in itertools.combinations(range(6), 2): + self.possible_segments.append(i) + # pick segments from a video. + + """on-the-fly prediction on a single gpu.""" + self.full_scores = [] + model.eval() + model = model.cuda() + with torch.no_grad(): + for data in eval_dataloader: + # TODO special forwarding logic here. + data = self.to_ctx(data) + data["output_hidden_states"] = True + hidden_video = model.forward_video(**data) + data["output_hidden_states"] = False + pooled_text = model.forward_text(**data) + outputs = { + "hidden_video": hidden_video, + "pooled_text": pooled_text + } + outputs.update(data) + self(outputs) + return self.finalize(output_file) + + def __call__(self, sample): + # TODO: make an index select from self.possible_segments. + hidden_video = sample["hidden_video"] + pooled_text = sample["pooled_text"] + vmasks = sample["vmasks"] + # probably maintain valid results here. + + hidden_video = hidden_video[:, 1:-1, :] + # probably maintain valid results here. + pooled_video = [] + for s, e in self.possible_segments: + pooled_video.append( + torch.mean( + hidden_video[:, int(s*5):int((e+1)*5), :], + dim=1, keepdim=True) + ) + pooled_video = torch.cat(pooled_video, dim=1) + scores = torch.bmm( + pooled_video, pooled_text.unsqueeze(-1)).squeeze(-1).cpu() + + ranks = scores.argsort(dim=-1, descending=True) + + for batch_idx, rank in enumerate(ranks): + rank_of_moment = [] + for m_idx, moment in enumerate(rank): + s, e = self.possible_segments[moment.item()] + if torch.any( + vmasks[batch_idx, int(s*5):int((e+1)*5)] + ): + rank_of_moment.append((s, e)) + self.full_scores.append(rank_of_moment) + + def finalize(self, output_file=None): + outputs = self._aggregate_scores(self.full_scores) + if output_file is not None: + np.save(os.path.join(self.pred_dir, output_file + ".npy"), outputs) + return {"outputs": outputs, "targets": self.test_data} + + def _aggregate_scores(self, scores): + self.full_scores = [] + return scores diff --git a/fairseq/examples/MMPT/mmpt/losses/__init__.py b/fairseq/examples/MMPT/mmpt/losses/__init__.py new file mode 100644 index 0000000..8dc32c9 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/losses/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .loss import * +from .nce import * + +try: + from .fairseqmmloss import * +except ImportError: + pass + +try: + from .expnce import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/losses/fairseqmmloss.py b/fairseq/examples/MMPT/mmpt/losses/fairseqmmloss.py new file mode 100644 index 0000000..a95e5ec --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/losses/fairseqmmloss.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +TODO (huxu): a general fairseq criterion for all your pre-defined losses. +""" + +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.logging import metrics + + +@register_criterion("mmloss") +class MMCriterion(FairseqCriterion): + def __init__(self, task): + super().__init__(task) + # TODO (huxu): wrap forward call of loss_fn and eval_fn into task. + self.mmtask = task.mmtask + + def forward(self, model, sample): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + outputs = self.mmtask(model, sample) + + loss, loss_scalar, max_len, batch_size, sample_size = ( + outputs["loss"], + outputs["loss_scalar"], + outputs["max_len"], + outputs["batch_size"], + outputs["sample_size"], + ) + + logging_output = { + "loss": loss_scalar, + "ntokens": max_len * batch_size, # dummy report. + "nsentences": batch_size, # dummy report. + "sample_size": sample_size, + } + + return loss, 1, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + """since we use NCE, our actual batch_size is 1 per GPU. + Then we take the mean of each worker.""" + loss_sum = sum(log.get("loss", 0.0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + metrics.log_scalar("loss", loss_sum / sample_size, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/examples/MMPT/mmpt/losses/loss.py b/fairseq/examples/MMPT/mmpt/losses/loss.py new file mode 100644 index 0000000..99c05d0 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/losses/loss.py @@ -0,0 +1,87 @@ +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch + +from torch import nn + + +class Loss(object): + def __call__(self, *args, **kwargs): + raise NotImplementedError + + +# Dummy Loss for testing. +class DummyLoss(Loss): + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits, targets) + + +class DummyK400Loss(Loss): + """dummy k400 loss for MViT.""" + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, logits, targets, **kwargs): + return self.loss( + logits, torch.randint(0, 400, (logits.size(0),), device=logits.device)) + + +class CrossEntropy(Loss): + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits.reshape(-1, logits.size(-1)), targets.reshape(-1)) + + +class ArgmaxCrossEntropy(Loss): + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits, targets.argmax(dim=1)) + + +class BCE(Loss): + def __init__(self): + self.loss = nn.BCEWithLogitsLoss() + + def __call__(self, logits, targets, **kwargs): + targets = targets.squeeze(0) + return self.loss(logits, targets) + + +class NLGLoss(Loss): + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, logits, text_label, **kwargs): + targets = text_label[text_label != -100] + return self.loss(logits, targets) + + +class MSE(Loss): + def __init__(self): + self.loss = nn.MSELoss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits, targets) + + +class L1(Loss): + def __init__(self): + self.loss = nn.L1Loss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits, targets) + + +class SmoothL1(Loss): + def __init__(self): + self.loss = nn.SmoothL1Loss() + + def __call__(self, logits, targets, **kwargs): + return self.loss(logits, targets) diff --git a/fairseq/examples/MMPT/mmpt/losses/nce.py b/fairseq/examples/MMPT/mmpt/losses/nce.py new file mode 100644 index 0000000..ed7be8d --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/losses/nce.py @@ -0,0 +1,156 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +softmax-based NCE loss, used by this project. +""" + +import torch + +from torch import nn + +from .loss import Loss + + +class NCE(Loss): + def __init__(self): + # TODO (huxu): define temperature. + self.loss = nn.CrossEntropyLoss() + + def __call__(self, align_scores, **kargs): + # note: we reuse the same shape as cls head in BERT (batch_size, 2) + # but NCE only needs one logits. + # (so we drop all weights in the second neg logits.) + align_scores = align_scores[:, :1] + # duplicate negative examples + batch_size = align_scores.size(0) // 2 + pos_scores = align_scores[:batch_size] + neg_scores = align_scores[batch_size:].view(1, batch_size).repeat( + batch_size, 1) + scores = torch.cat([pos_scores, neg_scores], dim=1) + return self.loss( + scores, + torch.zeros( + (batch_size,), + dtype=torch.long, + device=align_scores.device), + ) + + +class T2VContraLoss(Loss): + """NCE for MM joint space, on softmax text2video matrix. + """ + def __init__(self): + # TODO (huxu): define temperature. + self.loss = nn.CrossEntropyLoss() + + def __call__(self, pooled_video, pooled_text, **kargs): + batch_size = pooled_video.size(0) + logits = torch.mm(pooled_text, pooled_video.transpose(1, 0)) + targets = torch.arange( + batch_size, + dtype=torch.long, + device=pooled_video.device) + return self.loss(logits, targets) + + +class V2TContraLoss(Loss): + """NCE for MM joint space, with softmax on video2text matrix.""" + + def __init__(self): + # TODO (huxu): define temperature. + self.loss = nn.CrossEntropyLoss() + + def __call__(self, pooled_video, pooled_text, **kargs): + batch_size = pooled_video.size(0) + logits = torch.mm(pooled_video, pooled_text.transpose(1, 0)) + targets = torch.arange( + batch_size, + dtype=torch.long, + device=pooled_video.device) + return self.loss(logits, targets) + + +class MMContraLoss(Loss): + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__(self, pooled_video, pooled_text, **kwargs): + logits_per_video = pooled_video @ pooled_text.t() + logits_per_text = pooled_text @ pooled_video.t() + + targets = torch.arange( + pooled_video.size(0), + dtype=torch.long, + device=pooled_video.device) + loss_video = self.loss(logits_per_video, targets) + loss_text = self.loss(logits_per_text, targets) + return loss_video + loss_text + + +class MTM(Loss): + """Combination of MFM and MLM.""" + + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__( + self, + video_logits, + text_logits, + video_label, + text_label, + **kwargs + ): + text_logits = torch.cat([ + text_logits, + torch.zeros( + (text_logits.size(0), 1), device=text_logits.device) + ], dim=1) + vt_logits = torch.cat([video_logits, text_logits], dim=0) + # loss for video. + video_label = torch.zeros( + (video_logits.size(0),), + dtype=torch.long, + device=video_logits.device + ) + + # loss for text. + text_label = text_label.reshape(-1) + labels_mask = text_label != -100 + selected_text_label = text_label[labels_mask] + + vt_label = torch.cat([video_label, selected_text_label], dim=0) + return self.loss(vt_logits, vt_label) + + +class MFMMLM(Loss): + """Combination of MFM and MLM.""" + + def __init__(self): + self.loss = nn.CrossEntropyLoss() + + def __call__( + self, + video_logits, + text_logits, + video_label, + text_label, + **kwargs + ): + # loss for video. + video_label = torch.zeros( + (video_logits.size(0),), + dtype=torch.long, + device=video_logits.device + ) + masked_frame_loss = self.loss(video_logits, video_label) + + # loss for text. + text_label = text_label.reshape(-1) + labels_mask = text_label != -100 + selected_text_label = text_label[labels_mask] + masked_lm_loss = self.loss(text_logits, selected_text_label) + return masked_frame_loss + masked_lm_loss diff --git a/fairseq/examples/MMPT/mmpt/models/__init__.py b/fairseq/examples/MMPT/mmpt/models/__init__.py new file mode 100644 index 0000000..825250c --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/models/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .mmfusion import * +from .transformermodel import * +from .mmfusionnlg import * + +try: + from .fairseqmmmodel import * +except ImportError: + pass + +try: + from .expmmfusion import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/models/fairseqmmmodel.py b/fairseq/examples/MMPT/mmpt/models/fairseqmmmodel.py new file mode 100644 index 0000000..b7dd643 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/models/fairseqmmmodel.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.models import ( + BaseFairseqModel, + register_model, + register_model_architecture +) + + +@register_model("mmmodel") +class FairseqMMModel(BaseFairseqModel): + """a fairseq wrapper of model built by `task`.""" + + @classmethod + def build_model(cls, args, task): + return FairseqMMModel(task.mmtask.model) + + def __init__(self, mmmodel): + super().__init__() + self.mmmodel = mmmodel + + def forward(self, *args, **kwargs): + return self.mmmodel(*args, **kwargs) + + def upgrade_state_dict_named(self, state_dict, name): + + super().upgrade_state_dict_named(state_dict, name) + + keys_to_delete = [] + + for key in state_dict: + if key not in self.state_dict(): + keys_to_delete.append(key) + for key in keys_to_delete: + print("[INFO]", key, "not used anymore.") + del state_dict[key] + + # copy any newly defined parameters. + for key in self.state_dict(): + if key not in state_dict: + print("[INFO] adding", key) + state_dict[key] = self.state_dict()[key] + + +# a dummy arch, we config the model. +@register_model_architecture("mmmodel", "mmarch") +def mmarch(args): + pass diff --git a/fairseq/examples/MMPT/mmpt/models/mmfusion.py b/fairseq/examples/MMPT/mmpt/models/mmfusion.py new file mode 100644 index 0000000..2509e26 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/models/mmfusion.py @@ -0,0 +1,926 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) Facebook, Inc. All Rights Reserved + + +import torch + +from torch import nn + +try: + from transformers import AutoConfig, AutoTokenizer +except ImportError: + pass + +from . import transformermodel + + +class MMPTModel(nn.Module): + """An e2e wrapper of inference model. + """ + @classmethod + def from_pretrained(cls, config, checkpoint="checkpoint_best.pt"): + import os + from ..utils import recursive_config + from ..tasks import Task + config = recursive_config(config) + mmtask = Task.config_task(config) + checkpoint_path = os.path.join(config.eval.save_path, checkpoint) + mmtask.build_model(checkpoint=checkpoint_path) + # TODO(huxu): make the video encoder configurable. + from ..processors.models.s3dg import S3D + video_encoder = S3D('pretrained_models/s3d_dict.npy', 512) + video_encoder.load_state_dict( + torch.load('pretrained_models/s3d_howto100m.pth')) + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name, use_fast=config.dataset.use_fast + ) + from ..processors import Aligner + aligner = Aligner(config.dataset) + return ( + MMPTModel(config, mmtask.model, video_encoder), + tokenizer, + aligner + ) + + def __init__(self, config, model, video_encoder, **kwargs): + super().__init__() + self.max_video_len = config.dataset.max_video_len + self.video_encoder = video_encoder + self.model = model + + def forward(self, video_frames, caps, cmasks, return_score=False): + bsz = video_frames.size(0) + assert bsz == 1, "only bsz=1 is supported now." + seq_len = video_frames.size(1) + video_frames = video_frames.view(-1, *video_frames.size()[2:]) + vfeats = self.video_encoder(video_frames.permute(0, 4, 1, 2, 3)) + vfeats = vfeats['video_embedding'] + vfeats = vfeats.view(bsz, seq_len, vfeats.size(-1)) + padding = torch.zeros( + bsz, self.max_video_len - seq_len, vfeats.size(-1)) + vfeats = torch.cat([vfeats, padding], dim=1) + vmasks = torch.cat([ + torch.ones((bsz, seq_len), dtype=torch.bool), + torch.zeros((bsz, self.max_video_len - seq_len), dtype=torch.bool) + ], + dim=1 + ) + output = self.model(caps, cmasks, vfeats, vmasks) + if return_score: + output = {"score": torch.bmm( + output["pooled_video"][:, None, :], + output["pooled_text"][:, :, None] + ).squeeze(-1).squeeze(-1)} + return output + + +class MMFusion(nn.Module): + """a MMPT wrapper class for MMBert style models. + TODO: move isolated mask to a subclass. + """ + def __init__(self, config, **kwargs): + super().__init__() + transformer_config = AutoConfig.from_pretrained( + config.dataset.bert_name) + self.hidden_size = transformer_config.hidden_size + self.is_train = False + if config.dataset.train_path is not None: + self.is_train = True + # 0 means no iso; 1-12 means iso up to that layer. + self.num_hidden_layers = transformer_config.num_hidden_layers + self.last_iso_layer = 0 + if config.dataset.num_iso_layer is not None: + self.last_iso_layer = config.dataset.num_iso_layer - 1 + 1 + + if config.model.mm_encoder_cls is not None: + mm_encoder_cls = getattr(transformermodel, config.model.mm_encoder_cls) + model_config = AutoConfig.from_pretrained(config.dataset.bert_name) + model_config.max_video_len = config.dataset.max_video_len + # TODO: a general way to add parameter for a model. + model_config.use_seg_emb = config.model.use_seg_emb + self.mm_encoder = mm_encoder_cls.from_pretrained( + config.dataset.bert_name, config=model_config) + elif config.model.video_encoder_cls is not None\ + and config.model.text_encoder_cls is not None: + video_encoder_cls = getattr(transformermodel, config.model.video_encoder_cls) + model_config = AutoConfig.from_pretrained(config.dataset.bert_name) + model_config.max_video_len = config.dataset.max_video_len + # TODO: make each model a set of config class. + if hasattr(model_config, "num_layers"): + model_config.num_layers = config.model.num_hidden_video_layers + else: + model_config.num_hidden_layers = config.model.num_hidden_video_layers + self.video_encoder = video_encoder_cls.from_pretrained( + config.dataset.bert_name, config=model_config) + # exact same NLP model from Huggingface. + text_encoder_cls = getattr(transformermodel, config.model.text_encoder_cls) + self.text_encoder = text_encoder_cls.from_pretrained( + config.dataset.bert_name) + else: + raise ValueError("the encoder must be either MM or two backbones.") + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + **kwargs + ): + raise NotImplementedError( + "Please derive MMFusion module." + ) + + def _mm_on_the_fly( + self, + cmasks, + vmasks, + attention_mask + ): + """helper function for mask, seg_ids and token_type_ids.""" + if attention_mask is None: + attention_mask = self._mm_attention_mask(cmasks, vmasks) + + """ + 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence | + """ + token_type_ids = torch.cat( + [ + torch.zeros( + (vmasks.size(0), vmasks.size(1) + 2), + dtype=torch.long, + device=vmasks.device, + ), + torch.ones( + (cmasks.size(0), cmasks.size(1) - 2), + dtype=torch.long, + device=cmasks.device, + ), + ], + dim=1, + ) + return attention_mask, token_type_ids + + def _mm_attention_mask(self, cmasks, vmasks): + assert cmasks.size(0) == vmasks.size(0), "{}, {}, {}, {}".format( + str(cmasks.size()), + str(vmasks.size()), + str(cmasks.size(0)), + str(vmasks.size(0)), + ) + + mm_mask = torch.cat([cmasks[:, :1], vmasks, cmasks[:, 1:]], dim=1) + if self.last_iso_layer == 0: + # hard attention mask. + return mm_mask + else: + # a gpu iso mask; 0 : num_iso_layer is isolated; + # num_iso_layer: are MM-fused. + # make an iso layer + batch_size = cmasks.size(0) + iso_mask = self._make_iso_mask(batch_size, cmasks, vmasks) + mm_mask = mm_mask[:, None, :].repeat(1, mm_mask.size(-1), 1) + iso_mm_masks = [] + # hard attention mask. + iso_mask = iso_mask[:, None, :, :].repeat( + 1, self.last_iso_layer, 1, 1) + iso_mm_masks.append(iso_mask) + if self.last_iso_layer < self.num_hidden_layers: + mm_mask = mm_mask[:, None, :, :].repeat( + 1, self.num_hidden_layers - self.last_iso_layer, 1, 1 + ) + iso_mm_masks.append(mm_mask) + iso_mm_masks = torch.cat(iso_mm_masks, dim=1) + return iso_mm_masks + + def _make_iso_mask(self, batch_size, cmasks, vmasks): + cls_self_mask = torch.cat( + [ + torch.ones( + (batch_size, 1), dtype=torch.bool, device=cmasks.device), + torch.zeros( + (batch_size, cmasks.size(1) + vmasks.size(1) - 1), + dtype=torch.bool, device=cmasks.device) + ], dim=1) + + iso_video_mask = torch.cat( + [ + # [CLS] is not used. + torch.zeros( + (batch_size, 1), dtype=torch.bool, device=cmasks.device + ), + vmasks, + # assume to be 1. + cmasks[:, 1:2], + # 2 means [CLS] + [SEP] + torch.zeros( + (batch_size, cmasks.size(1) - 2), + dtype=torch.bool, + device=cmasks.device, + ), + ], + dim=1, + ) + iso_text_mask = torch.cat( + [ + torch.zeros( + (batch_size, 2 + vmasks.size(1)), + dtype=torch.bool, + device=cmasks.device, + ), # [CLS] is not used. + cmasks[:, 2:], # assume to be 1. + ], + dim=1, + ) + cls_self_mask = cls_self_mask[:, None, :] + iso_video_mask = iso_video_mask[:, None, :].repeat( + 1, vmasks.size(1) + 1, 1) + iso_text_mask = iso_text_mask[:, None, :].repeat( + 1, cmasks.size(1) - 2, 1) + return torch.cat([cls_self_mask, iso_video_mask, iso_text_mask], dim=1) + + def _pooling_vt_layer( + self, + layered_sequence_output, + cmasks, + vmasks + ): + layer_idx = self.last_iso_layer \ + if self.last_iso_layer > 0 else self.num_hidden_layers + hidden_state = layered_sequence_output[layer_idx] + # also output pooled_video and pooled_text. + batch_size = cmasks.size(0) + # pool the modality. + text_offset = vmasks.size(1) + 2 # [CLS] + [SEP] + # video tokens + [SEP] + video_outputs = hidden_state[:, 1:text_offset] + video_attention_mask = torch.cat( + [ + vmasks, + torch.ones( + (batch_size, 1), dtype=torch.bool, device=vmasks.device), + ], + dim=1, + ) + assert video_outputs.size(1) == video_attention_mask.size(1) + pooled_video = torch.sum( + video_outputs * video_attention_mask.unsqueeze(-1), dim=1 + ) / video_attention_mask.sum(1, keepdim=True) + # pooled_video = torch.mean(video_outputs[0], dim=1) + + # text tokens + [SEP] + text_attention_mask = cmasks[:, 2:] + text_outputs = hidden_state[:, text_offset:] + assert text_outputs.size(1) == text_attention_mask.size(1) + pooled_text = torch.sum( + text_outputs * text_attention_mask.unsqueeze(-1), dim=1 + ) / text_attention_mask.sum(1, keepdim=True) + return pooled_video, pooled_text + + +class MMFusionMFMMLM(MMFusion): + """forward function for MFM and MLM.""" + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + video_label=None, + text_label=None, + **kwargs + ): + output_hidden_states = False if self.is_train else True + + target_vfeats, non_masked_frame_mask = None, None + if video_label is not None: + target_vfeats = vfeats.masked_select( + video_label.unsqueeze(-1)).view( + -1, vfeats.size(-1) + ) + # mask video token. + vfeats[video_label] = 0.0 + non_masked_frame_mask = vmasks.clone() + non_masked_frame_mask[video_label] = False + + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, vmasks, attention_mask) + + outputs = self.mm_encoder( + input_ids=caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + masked_frame_labels=video_label, + target_video_hidden_states=target_vfeats, + non_masked_frame_mask=non_masked_frame_mask, + masked_lm_labels=text_label, + output_hidden_states=output_hidden_states, + ) + + video_logits, text_logits = outputs[0], outputs[1] + + if self.is_train: # return earlier for training. + return { + "video_logits": video_logits, + "text_logits": text_logits, + } + + pooled_video, pooled_text = self._pooling_vt_layer( + outputs[2], cmasks, vmasks) + return {"pooled_video": pooled_video, "pooled_text": pooled_text} + + +class MMFusionMTM(MMFusionMFMMLM): + def __init__(self, config, **kwargs): + super().__init__(config) + """ + For reproducibility: + self.mm_encoder will be initialized then discarded. + """ + from .transformermodel import MMBertForMTM + model_config = AutoConfig.from_pretrained(config.dataset.bert_name) + model_config.max_video_len = config.dataset.max_video_len + model_config.use_seg_emb = config.model.use_seg_emb + self.mm_encoder = MMBertForMTM.from_pretrained( + config.dataset.bert_name, config=model_config) + + +class MMFusionShare(MMFusion): + """A retrival wrapper using mm_encoder as both video/text backbone. + TODO: move formally. + """ + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + video_label=None, + text_label=None, + output_hidden_states=False, + **kwargs + ): + pooled_video = self.forward_video( + vfeats, + vmasks, + caps, + cmasks, + output_hidden_states + ) + + pooled_text = self.forward_text( + caps, + cmasks, + output_hidden_states + ) + + return {"pooled_video": pooled_video, "pooled_text": pooled_text} + + def forward_video( + self, + vfeats, + vmasks, + caps, + cmasks, + output_hidden_states=False, + **kwargs + ): + input_ids = caps[:, :2] + + attention_mask = torch.cat([ + cmasks[:, :1], + vmasks, + cmasks[:, 1:2] + ], dim=1) + + token_type_ids = torch.zeros( + (vmasks.size(0), vmasks.size(1) + 2), + dtype=torch.long, + device=vmasks.device) + + outputs = self.mm_encoder( + input_ids=input_ids, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=True + ) + video_outputs = outputs[0] + + if output_hidden_states: + return video_outputs + + batch_size = cmasks.size(0) + + video_attention_mask = torch.cat( + [ + torch.zeros( + (batch_size, 1), dtype=torch.bool, device=vmasks.device), + vmasks, + torch.ones( + (batch_size, 1), dtype=torch.bool, device=vmasks.device), + ], + dim=1, + ) + assert video_outputs.size(1) == video_attention_mask.size(1) + + video_attention_mask = video_attention_mask.type(video_outputs.dtype) \ + / video_attention_mask.sum(1, keepdim=True) + + pooled_video = torch.bmm( + video_outputs.transpose(2, 1), + video_attention_mask.unsqueeze(2) + ).squeeze(-1) + return pooled_video # video_outputs + + def forward_text( + self, + caps, + cmasks, + output_hidden_states=False, + **kwargs + ): + input_ids = torch.cat([ + caps[:, :1], caps[:, 2:], + ], dim=1) + + attention_mask = torch.cat([ + cmasks[:, :1], + cmasks[:, 2:] + ], dim=1) + + token_type_ids = torch.cat([ + torch.zeros( + (cmasks.size(0), 1), + dtype=torch.long, + device=cmasks.device), + torch.ones( + (cmasks.size(0), cmasks.size(1) - 2), + dtype=torch.long, + device=cmasks.device) + ], dim=1) + + outputs = self.mm_encoder( + input_ids=input_ids, + input_video_embeds=None, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=True + ) + text_outputs = outputs[0] + + if output_hidden_states: + return text_outputs + + batch_size = caps.size(0) + # text tokens + [SEP] + text_attention_mask = torch.cat([ + torch.zeros( + (batch_size, 1), dtype=torch.bool, device=cmasks.device), + cmasks[:, 2:] + ], dim=1) + + assert text_outputs.size(1) == text_attention_mask.size(1) + + text_attention_mask = text_attention_mask.type(text_outputs.dtype) \ + / text_attention_mask.sum(1, keepdim=True) + + pooled_text = torch.bmm( + text_outputs.transpose(2, 1), + text_attention_mask.unsqueeze(2) + ).squeeze(-1) + return pooled_text # text_outputs + + +class MMFusionSeparate(MMFusionShare): + def forward_video( + self, + vfeats, + vmasks, + caps, + cmasks, + output_hidden_states=False, + **kwargs + ): + input_ids = caps[:, :2] + + attention_mask = torch.cat([ + cmasks[:, :1], + vmasks, + cmasks[:, 1:2] + ], dim=1) + + token_type_ids = torch.zeros( + (vmasks.size(0), vmasks.size(1) + 2), + dtype=torch.long, + device=vmasks.device) + + outputs = self.video_encoder( + input_ids=input_ids, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=True + ) + video_outputs = outputs[0] + + if output_hidden_states: + return video_outputs + + batch_size = cmasks.size(0) + + video_attention_mask = torch.cat( + [ + torch.zeros( + (batch_size, 1), dtype=torch.bool, device=vmasks.device), + vmasks, + torch.ones( + (batch_size, 1), dtype=torch.bool, device=vmasks.device), + ], + dim=1, + ) + assert video_outputs.size(1) == video_attention_mask.size(1) + + video_attention_mask = video_attention_mask.type(video_outputs.dtype) \ + / video_attention_mask.sum(1, keepdim=True) + + pooled_video = torch.bmm( + video_outputs.transpose(2, 1), + video_attention_mask.unsqueeze(2) + ).squeeze(-1) + return pooled_video # video_outputs + + def forward_text( + self, + caps, + cmasks, + output_hidden_states=False, + **kwargs + ): + input_ids = torch.cat([ + caps[:, :1], caps[:, 2:], + ], dim=1) + + attention_mask = torch.cat([ + cmasks[:, :1], + cmasks[:, 2:] + ], dim=1) + # different from sharing, we use all-0 type. + token_type_ids = torch.zeros( + (cmasks.size(0), cmasks.size(1) - 1), + dtype=torch.long, + device=cmasks.device) + + outputs = self.text_encoder( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=True + ) + text_outputs = outputs[0] + + if output_hidden_states: + return text_outputs + + batch_size = caps.size(0) + # text tokens + [SEP] + text_attention_mask = torch.cat([ + torch.zeros( + (batch_size, 1), dtype=torch.bool, device=cmasks.device), + cmasks[:, 2:] + ], dim=1) + + assert text_outputs.size(1) == text_attention_mask.size(1) + + text_attention_mask = text_attention_mask.type(text_outputs.dtype) \ + / text_attention_mask.sum(1, keepdim=True) + + pooled_text = torch.bmm( + text_outputs.transpose(2, 1), + text_attention_mask.unsqueeze(2) + ).squeeze(-1) + return pooled_text # text_outputs + + +class MMFusionJoint(MMFusion): + """fine-tuning wrapper for retrival task.""" + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + video_label=None, + text_label=None, + **kwargs + ): + # TODO (huxu): other ways to do negative examples; move the following + # into your criterion forward. + output_hidden_states = True + + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, vmasks, attention_mask) + + separate_forward_split = ( + None if self.is_train else vmasks.size(1) + 2 + ) # [CLS] + [SEP] + + outputs = self.mm_encoder( + input_ids=caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=output_hidden_states, + separate_forward_split=separate_forward_split, + ) + + pooled_video, pooled_text = self._pooling_vt_layer( + outputs[2], cmasks, vmasks) + return {"pooled_video": pooled_video, "pooled_text": pooled_text} + + +class MMFusionActionSegmentation(MMFusion): + """Fine-tuning wrapper for action segmentation. + TODO: rename this for VLM. + """ + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + **kwargs + ): + # ActionLocalization assume of batch_size=1, squeeze it. + caps = caps.view(-1, caps.size(-1)) + cmasks = cmasks.view(-1, cmasks.size(-1)) + vfeats = vfeats.view(-1, vfeats.size(2), vfeats.size(3)) + vmasks = vmasks.view(-1, vmasks.size(-1)) + + # this may not cover all shapes of attention_mask. + attention_mask = attention_mask.view( + -1, attention_mask.size(2), attention_mask.size(3)) \ + if attention_mask is not None else None + + # TODO (huxu): other ways to do negative examples; move the following + # into your criterion forward. + output_hidden_states = True + + # video forwarding, text is dummy; never use attention_mask. + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, vmasks, attention_mask) + + logits = self.mm_encoder( + input_ids=caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=output_hidden_states, + ) + return {"logits": logits[0][:, 1:vmasks.size(1)+1]} + + +class MMFusionActionLocalization(MMFusion): + """fine-tuning model for retrival task.""" + + def __init__(self, config, **kwargs): + super().__init__(config) + tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name) + self.cls_token_id = tokenizer.cls_token_id + self.sep_token_id = tokenizer.sep_token_id + self.pad_token_id = tokenizer.pad_token_id + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + **kwargs + ): + # ActionLocalization assume of batch_size=1, squeeze it. + caps = caps.squeeze(0) + cmasks = cmasks.squeeze(0) + vfeats = vfeats.squeeze(0) + vmasks = vmasks.squeeze(0) + attention_mask = attention_mask.squeeze(0) if attention_mask is not None else None + + # TODO (huxu): other ways to do negative examples; move the following + # into your criterion forward. + output_hidden_states = True + + # a len1 dummy video token. + dummy_vfeats = torch.zeros( + (caps.size(0), 1, vfeats.size(-1)), device=vfeats.device, dtype=vfeats.dtype) + dummy_vmasks = torch.ones( + (caps.size(0), 1), dtype=torch.bool, + device=vfeats.device) + + dummy_caps = torch.LongTensor( + [[self.cls_token_id, self.sep_token_id, + self.pad_token_id, self.sep_token_id]], + ).to(caps.device).repeat(vfeats.size(0), 1) + dummy_cmasks = torch.BoolTensor( + [[0, 1, 0, 1]] # pad are valid for attention. + ).to(caps.device).repeat(vfeats.size(0), 1) + + # video forwarding, text is dummy; never use attention_mask. + attention_mask, token_type_ids = self._mm_on_the_fly( + dummy_cmasks, vmasks, None) + + outputs = self.mm_encoder( + input_ids=dummy_caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=output_hidden_states, + ) + + layer_idx = self.last_iso_layer \ + if self.last_iso_layer > 0 else self.num_hidden_layers + + video_seq = outputs[2][layer_idx][:, 1:vmasks.size(1)+1].masked_select( + vmasks.unsqueeze(-1) + ).view(-1, self.hidden_size) + + # text forwarding, video is dummy + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, dummy_vmasks, None) + + outputs = self.mm_encoder( + input_ids=caps, + input_video_embeds=dummy_vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + output_hidden_states=output_hidden_states, + ) + + _, pooled_text = self._pooling_vt_layer( + outputs[2], cmasks, dummy_vmasks) + # this line is not right. + logits = torch.mm(video_seq, pooled_text.transpose(1, 0)) + return {"logits": logits} + + +# --------------- MMFusionSeparate for end tasks --------------- + +class MMFusionSeparateActionSegmentation(MMFusionSeparate): + """Fine-tuning wrapper for action segmentation.""" + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask=None, + **kwargs + ): + # ActionLocalization assume of batch_size=1, squeeze it. + caps = caps.view(-1, caps.size(-1)) + cmasks = cmasks.view(-1, cmasks.size(-1)) + vfeats = vfeats.view(-1, vfeats.size(2), vfeats.size(3)) + vmasks = vmasks.view(-1, vmasks.size(-1)) + logits = self.forward_video( + vfeats, + vmasks, + caps, + cmasks, + output_hidden_states=True + ) + return {"logits": logits[:, 1:vmasks.size(1)+1]} + + +class MMFusionSeparateActionLocalization(MMFusionSeparate): + def __init__(self, config, **kwargs): + super().__init__(config) + tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name) + self.cls_token_id = tokenizer.cls_token_id + self.sep_token_id = tokenizer.sep_token_id + self.pad_token_id = tokenizer.pad_token_id + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + **kwargs + ): + # ActionLocalization assume of batch_size=1, squeeze it. + caps = caps.squeeze(0) + cmasks = cmasks.squeeze(0) + vfeats = vfeats.squeeze(0) + vmasks = vmasks.squeeze(0) + + # TODO (huxu): other ways to do negative examples; move the following + # into your criterion forward. + dummy_caps = torch.LongTensor( + [[self.cls_token_id, self.sep_token_id, + self.pad_token_id, self.sep_token_id]], + ).to(caps.device).repeat(vfeats.size(0), 1) + dummy_cmasks = torch.BoolTensor( + [[0, 1, 0, 1]] # pad are valid for attention. + ).to(caps.device).repeat(vfeats.size(0), 1) + + outputs = self.forward_video( + vfeats, + vmasks, + dummy_caps, + dummy_cmasks, + output_hidden_states=True + ) + + video_seq = outputs[:, 1:vmasks.size(1)+1].masked_select( + vmasks.unsqueeze(-1) + ).view(-1, self.hidden_size) + + pooled_text = self.forward_text( + caps, + cmasks, + output_hidden_states=False + ) + + # this line is not right. + logits = torch.mm(video_seq, pooled_text.transpose(1, 0)) + return {"logits": logits} + + +class MMFusionShareActionLocalization(MMFusionShare): + def __init__(self, config, **kwargs): + super().__init__(config) + tokenizer = AutoTokenizer.from_pretrained( + config.dataset.bert_name) + self.cls_token_id = tokenizer.cls_token_id + self.sep_token_id = tokenizer.sep_token_id + self.pad_token_id = tokenizer.pad_token_id + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + **kwargs + ): + # ActionLocalization assume of batch_size=1, squeeze it. + caps = caps.squeeze(0) + cmasks = cmasks.squeeze(0) + vfeats = vfeats.squeeze(0) + vmasks = vmasks.squeeze(0) + + # TODO (huxu): other ways to do negative examples; move the following + # into your criterion forward. + dummy_caps = torch.LongTensor( + [[self.cls_token_id, self.sep_token_id, + self.pad_token_id, self.sep_token_id]], + ).to(caps.device).repeat(vfeats.size(0), 1) + dummy_cmasks = torch.BoolTensor( + [[0, 1, 0, 1]] # pad are valid for attention. + ).to(caps.device).repeat(vfeats.size(0), 1) + + outputs = self.forward_video( + vfeats, + vmasks, + dummy_caps, + dummy_cmasks, + output_hidden_states=True + ) + + video_seq = outputs[:, 1:vmasks.size(1)+1].masked_select( + vmasks.unsqueeze(-1) + ).view(-1, self.hidden_size) + + pooled_text = self.forward_text( + caps, + cmasks, + output_hidden_states=False + ) + + # this line is not right. + logits = torch.mm(video_seq, pooled_text.transpose(1, 0)) + return {"logits": logits} diff --git a/fairseq/examples/MMPT/mmpt/models/mmfusionnlg.py b/fairseq/examples/MMPT/mmpt/models/mmfusionnlg.py new file mode 100644 index 0000000..9207e77 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/models/mmfusionnlg.py @@ -0,0 +1,999 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) Facebook, Inc. All Rights Reserved + + +import torch + +from torch.nn import functional as F + +from typing import Optional, Iterable + +try: + from transformers import BertPreTrainedModel + from transformers.modeling_bert import BertOnlyMLMHead + + from transformers.file_utils import ModelOutput + from transformers.modeling_outputs import CausalLMOutput + from transformers.generation_utils import ( + BeamHypotheses, + top_k_top_p_filtering + ) +except ImportError: + pass + +from .mmfusion import MMFusion +from .transformermodel import MMBertModel +from ..modules import VideoTokenMLP + + +class MMFusionNLG(MMFusion): + def __init__(self, config, **kwargs): + super().__init__(config) + if config.model.max_decode_length is not None: + self.max_length = min( + config.model.max_decode_length, + config.dataset.max_len - config.dataset.max_video_len - 3 + ) + else: + self.max_length = \ + config.dataset.max_len - config.dataset.max_video_len - 3 + self.gen_param = config.gen_param if config.gen_param is not None \ + else {} + + def forward( + self, + caps, + cmasks, + vfeats, + vmasks, + attention_mask, + video_label=None, + text_label=None, + **kwargs + ): + """use pre-trained LM header for generation.""" + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, vmasks, attention_mask) + + outputs = self.mm_encoder( + input_ids=caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + masked_lm_labels=text_label, + ) + return {"logits": outputs[0]} + + @torch.no_grad() + def generate( + self, + caps, cmasks, vfeats, vmasks, + attention_mask=None, + bos_token_id=None, + eos_token_id=None, + **kwargs + ): + # a simplified interface from + # https://huggingface.co/transformers/v3.4.0/_modules/transformers/generation_utils.html#GenerationMixin.generate + + # caps now only have + # [CLS], [SEP] (for video) and [CLS] (as bos_token) + assert caps.size(1) == 3 + + attention_mask, token_type_ids = self._mm_on_the_fly( + cmasks, vmasks, attention_mask) + + output = self.mm_encoder.generate( + input_ids=caps, + input_video_embeds=vfeats, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + max_length=self.max_length, + **self.gen_param + ) + return output + + +class MMBertForNLG(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.bert = MMBertModel(config) + self.videomlp = VideoTokenMLP(config) + # we do not use `BertGenerationOnlyLMHead` + # because we can reuse pretraining. + self.cls = BertOnlyMLMHead(config) + self.hidden_size = config.hidden_size + self.init_weights() + + def get_output_embeddings(self): + return self.cls.predictions.decoder + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + masked_lm_labels=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + ): + # similar to MMBertForMFMMLM without MFM. + video_tokens = self.videomlp(input_video_embeds) + outputs = self.bert( + input_ids, + video_tokens, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + sequence_output = outputs[0] + + prediction_scores = None + if masked_lm_labels is not None: + text_offset = input_video_embeds.size(1) + 1 # [CLS] + # recover caps format: [CLS] [SEP] text [SEP] + text_sequence_output = torch.cat( + [sequence_output[:, :1], sequence_output[:, text_offset:]], + dim=1 + ) + + # only compute select tokens to training to speed up. + hidden_size = text_sequence_output.size(-1) + # masked_lm_labels = masked_lm_labels.reshape(-1) + labels_mask = masked_lm_labels != -100 + + selected_text_output = text_sequence_output.masked_select( + labels_mask.unsqueeze(-1) + ).view(-1, hidden_size) + prediction_scores = self.cls(selected_text_output) + + if not return_dict: + output = ( + prediction_scores, + ) + outputs[2:] + return output + + # for generation. + text_offset = input_video_embeds.size(1) + 2 # [CLS] + text_sequence_output = sequence_output[:, text_offset:] + prediction_scores = self.cls(text_sequence_output) + return CausalLMOutput( + loss=None, + logits=prediction_scores, + ) + + def prepare_inputs_for_generation( + self, + input_ids, + input_video_embeds, + attention_mask=None, + token_type_ids=None, + **model_kwargs + ): + # must return a dictionary. + seq_len = input_ids.size(1) + input_video_embeds.size(1) + if attention_mask is not None: + if len(attention_mask.size()) == 4: + attention_mask = attention_mask[:, :, :seq_len, :seq_len] + elif len(attention_mask.size()) == 3: + attention_mask = attention_mask[:, :seq_len, :seq_len] + else: + attention_mask = attention_mask[:, :seq_len] + if token_type_ids is not None: + token_type_ids = token_type_ids[:, :seq_len] + + return { + "input_ids": input_ids, + "input_video_embeds": input_video_embeds, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + @torch.no_grad() + def generate( + self, + input_ids: Optional[torch.LongTensor] = None, + decoder_input_ids: Optional[torch.LongTensor] = None, + max_length: Optional[int] = None, + min_length: Optional[int] = None, + do_sample: Optional[bool] = None, + early_stopping: Optional[bool] = None, + num_beams: Optional[int] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + repetition_penalty: Optional[float] = None, + bad_words_ids: Optional[Iterable[int]] = None, + bos_token_id: Optional[int] = None, + pad_token_id: Optional[int] = None, + eos_token_id: Optional[int] = None, + length_penalty: Optional[float] = None, + no_repeat_ngram_size: Optional[int] = None, + num_return_sequences: Optional[int] = None, + attention_mask: Optional[torch.LongTensor] = None, + decoder_start_token_id: Optional[int] = None, + use_cache: Optional[bool] = None, + **model_kwargs + ) -> torch.LongTensor: + r""" + Generates sequences for models with a language modeling head. The method currently supports greedy decoding, + beam-search decoding, sampling with temperature, sampling with top-k or nucleus sampling. + Adapted in part from `Facebook's XLM beam search code + `__. + Apart from :obj:`input_ids` and :obj:`attention_mask`, all the arguments below will default to the value of the + attribute of the same name inside the :class:`~transformers.PretrainedConfig` of the model. The default values + indicated are the default values of those config. + Most of these parameters are explained in more detail in `this blog post + `__. + Parameters: + input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + The sequence used as a prompt for the generation. If :obj:`None` the method initializes + it as an empty :obj:`torch.LongTensor` of shape :obj:`(1,)`. + decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + initial input_ids for the decoder of encoder-decoder type models. If :obj:`None` then only + decoder_start_token_id is passed as the first token to the decoder. + max_length (:obj:`int`, `optional`, defaults to 20): + The maximum length of the sequence to be generated. + min_length (:obj:`int`, `optional`, defaults to 10): + The minimum length of the sequence to be generated. + do_sample (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use sampling ; use greedy decoding otherwise. + early_stopping (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether to stop the beam search when at least ``num_beams`` sentences are finished per batch or not. + num_beams (:obj:`int`, `optional`, defaults to 1): + Number of beams for beam search. 1 means no beam search. + temperature (:obj:`float`, `optional`, defaults tp 1.0): + The value used to module the next token probabilities. + top_k (:obj:`int`, `optional`, defaults to 50): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (:obj:`float`, `optional`, defaults to 1.0): + If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or + higher are kept for generation. + repetition_penalty (:obj:`float`, `optional`, defaults to 1.0): + The parameter for repetition penalty. 1.0 means no penalty. See `this paper + `__ for more details. + pad_token_id (:obj:`int`, `optional`): + The id of the `padding` token. + bos_token_id (:obj:`int`, `optional`): + The id of the `beginning-of-sequence` token. + eos_token_id (:obj:`int`, `optional`): + The id of the `end-of-sequence` token. + length_penalty (:obj:`float`, `optional`, defaults to 1.0): + Exponential penalty to the length. 1.0 means no penalty. + Set to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in + order to encourage the model to produce longer sequences. + no_repeat_ngram_size (:obj:`int`, `optional`, defaults to 0): + If set to int > 0, all ngrams of that size can only occur once. + bad_words_ids(:obj:`List[int]`, `optional`): + List of token ids that are not allowed to be generated. In order to get the tokens of the words that + should not appear in the generated text, use :obj:`tokenizer.encode(bad_word, add_prefix_space=True)`. + num_return_sequences(:obj:`int`, `optional`, defaults to 1): + The number of independently computed returned sequences for each element in the batch. + attention_mask (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + Mask to avoid performing attention on padding token indices. Mask values are in ``[0, 1]``, 1 for + tokens that are not masked, and 0 for masked tokens. + If not provided, will default to a tensor the same shape as :obj:`input_ids` that masks the pad token. + `What are attention masks? <../glossary.html#attention-mask>`__ + decoder_start_token_id (:obj:`int`, `optional`): + If an encoder-decoder model starts decoding with a different token than `bos`, the id of that token. + use_cache: (:obj:`bool`, `optional`, defaults to :obj:`True`): + Whether or not the model should use the past last key/values attentions (if applicable to the model) to + speed up decoding. + model_kwargs: + Additional model specific kwargs will be forwarded to the :obj:`forward` function of the model. + Return: + :obj:`torch.LongTensor` of shape :obj:`(batch_size * num_return_sequences, sequence_length)`: + The generated sequences. The second dimension (sequence_length) is either equal to :obj:`max_length` or + shorter if all batches finished early due to the :obj:`eos_token_id`. + Examples:: + tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache. + outputs = model.generate(max_length=40) # do greedy decoding + print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache. + input_context = 'The dog' + input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context + outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog' + for i in range(3): # 3 output sequences were generated + print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True))) + tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache. + input_context = 'The dog' + input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context + outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3, do_sample=True) # generate 3 candidates using sampling + for i in range(3): # 3 output sequences were generated + print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True))) + tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache. + input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl + input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context + outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences + print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + tokenizer = AutoTokenizer.from_pretrained('gpt2') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('gpt2') # Download model and configuration from S3 and cache. + input_context = 'My cute dog' # "Legal" is one of the control codes for ctrl + bad_words_ids = [tokenizer.encode(bad_word, add_prefix_space=True) for bad_word in ['idiot', 'stupid', 'shut up']] + input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context + outputs = model.generate(input_ids=input_ids, max_length=100, do_sample=True, bad_words_ids=bad_words_ids) # generate sequences without allowing bad_words to be generated + """ + + # We cannot generate if the model does not have a LM head + if self.get_output_embeddings() is None: + raise AttributeError( + "You tried to generate sequences with a model that does not have a LM Head." + "Please use another model class (e.g. `OpenAIGPTLMHeadModel`, `XLNetLMHeadModel`, `GPT2LMHeadModel`, `CTRLLMHeadModel`, `T5WithLMHeadModel`, `TransfoXLLMHeadModel`, `XLMWithLMHeadModel`, `BartForConditionalGeneration` )" + ) + + max_length = max_length if max_length is not None else self.config.max_length + min_length = min_length if min_length is not None else self.config.min_length + do_sample = do_sample if do_sample is not None else self.config.do_sample + early_stopping = early_stopping if early_stopping is not None else self.config.early_stopping + use_cache = use_cache if use_cache is not None else self.config.use_cache + num_beams = num_beams if num_beams is not None else self.config.num_beams + temperature = temperature if temperature is not None else self.config.temperature + top_k = top_k if top_k is not None else self.config.top_k + top_p = top_p if top_p is not None else self.config.top_p + repetition_penalty = repetition_penalty if repetition_penalty is not None else self.config.repetition_penalty + bos_token_id = bos_token_id if bos_token_id is not None else self.config.bos_token_id + pad_token_id = pad_token_id if pad_token_id is not None else self.config.pad_token_id + eos_token_id = eos_token_id if eos_token_id is not None else self.config.eos_token_id + length_penalty = length_penalty if length_penalty is not None else self.config.length_penalty + no_repeat_ngram_size = ( + no_repeat_ngram_size if no_repeat_ngram_size is not None else self.config.no_repeat_ngram_size + ) + bad_words_ids = bad_words_ids if bad_words_ids is not None else self.config.bad_words_ids + num_return_sequences = ( + num_return_sequences if num_return_sequences is not None else self.config.num_return_sequences + ) + decoder_start_token_id = ( + decoder_start_token_id if decoder_start_token_id is not None else self.config.decoder_start_token_id + ) + + if input_ids is not None: + batch_size = input_ids.shape[0] # overriden by the input batch_size + else: + batch_size = 1 + + assert isinstance(max_length, int) and max_length > 0, "`max_length` should be a strictly positive integer." + assert isinstance(min_length, int) and min_length >= 0, "`min_length` should be a positive integer." + assert isinstance(do_sample, bool), "`do_sample` should be a boolean." + assert isinstance(early_stopping, bool), "`early_stopping` should be a boolean." + assert isinstance(use_cache, bool), "`use_cache` should be a boolean." + assert isinstance(num_beams, int) and num_beams > 0, "`num_beams` should be a strictly positive integer." + assert temperature > 0, "`temperature` should be strictly positive." + assert isinstance(top_k, int) and top_k >= 0, "`top_k` should be a positive integer." + assert 0 <= top_p <= 1, "`top_p` should be between 0 and 1." + assert repetition_penalty >= 1.0, "`repetition_penalty` should be >= 1." + assert input_ids is not None or ( + isinstance(bos_token_id, int) and bos_token_id >= 0 + ), "If input_ids is not defined, `bos_token_id` should be a positive integer." + assert pad_token_id is None or ( + isinstance(pad_token_id, int) and (pad_token_id >= 0) + ), "`pad_token_id` should be a positive integer." + assert (eos_token_id is None) or ( + isinstance(eos_token_id, int) and (eos_token_id >= 0) + ), "`eos_token_id` should be a positive integer." + assert length_penalty > 0, "`length_penalty` should be strictly positive." + assert ( + isinstance(no_repeat_ngram_size, int) and no_repeat_ngram_size >= 0 + ), "`no_repeat_ngram_size` should be a positive integer." + assert ( + isinstance(num_return_sequences, int) and num_return_sequences > 0 + ), "`num_return_sequences` should be a strictly positive integer." + assert ( + bad_words_ids is None or isinstance(bad_words_ids, list) and isinstance(bad_words_ids[0], list) + ), "`bad_words_ids` is either `None` or a list of lists of tokens that should not be generated" + + if input_ids is None: + assert isinstance(bos_token_id, int) and bos_token_id >= 0, ( + "you should either supply a context to complete as `input_ids` input " + "or a `bos_token_id` (integer >= 0) as a first token to start the generation." + ) + input_ids = torch.full( + (batch_size, 1), + bos_token_id, + dtype=torch.long, + device=next(self.parameters()).device, + ) + else: + assert input_ids.dim() == 2, "Input prompt should be of shape (batch_size, sequence length)." + + # not allow to duplicate outputs when greedy decoding + if do_sample is False: + if num_beams == 1: + # no_beam_search greedy generation conditions + assert ( + num_return_sequences == 1 + ), "Greedy decoding will always produce the same output for num_beams == 1 and num_return_sequences > 1. Please set num_return_sequences = 1" + + else: + # beam_search greedy generation conditions + assert ( + num_beams >= num_return_sequences + ), "Greedy beam search decoding cannot return more sequences than it has beams. Please set num_beams >= num_return_sequences" + + # create attention mask if necessary + # TODO (PVP): this should later be handled by the forward fn() in each model in the future see PR 3140 + if (attention_mask is None) and (pad_token_id is not None) and (pad_token_id in input_ids): + attention_mask = input_ids.ne(pad_token_id).long() + elif attention_mask is None: + attention_mask = input_ids.new_ones(input_ids.shape) + + # set pad_token_id to eos_token_id if not set. Important that this is done after + # attention_mask is created + if pad_token_id is None and eos_token_id is not None: + print( + "Setting `pad_token_id` to {} (first `eos_token_id`) to generate sequence".format(eos_token_id) + ) + pad_token_id = eos_token_id + + # vocab size + if hasattr(self.config, "vocab_size"): + vocab_size = self.config.vocab_size + elif ( + self.config.is_encoder_decoder + and hasattr(self.config, "decoder") + and hasattr(self.config.decoder, "vocab_size") + ): + vocab_size = self.config.decoder.vocab_size + else: + raise ValueError("either self.config.vocab_size or self.config.decoder.vocab_size needs to be defined") + + # set effective batch size and effective batch multiplier according to do_sample + if do_sample: + effective_batch_size = batch_size * num_return_sequences + effective_batch_mult = num_return_sequences + else: + effective_batch_size = batch_size + effective_batch_mult = 1 + + if self.config.is_encoder_decoder: + if decoder_start_token_id is None: + # see if BOS token can be used for decoder_start_token_id + if bos_token_id is not None: + decoder_start_token_id = bos_token_id + elif ( + hasattr(self.config, "decoder") + and hasattr(self.config.decoder, "bos_token_id") + and self.config.decoder.bos_token_id is not None + ): + decoder_start_token_id = self.config.decoder.bos_token_id + else: + raise ValueError( + "decoder_start_token_id or bos_token_id has to be defined for encoder-decoder generation" + ) + + assert hasattr(self, "get_encoder"), "{} should have a 'get_encoder' function defined".format(self) + assert callable(self.get_encoder), "{} should be a method".format(self.get_encoder) + + # get encoder and store encoder outputs + encoder = self.get_encoder() + encoder_outputs: ModelOutput = encoder(input_ids, attention_mask=attention_mask, return_dict=True) + + # Expand input ids if num_beams > 1 or num_return_sequences > 1 + if num_return_sequences > 1 or num_beams > 1: + # TODO: make this a call-back function. + # input_ids=caps, + # input_video_embeds=vfeats, + # attention_mask=attention_mask, + # token_type_ids=token_type_ids, + input_video_embeds = model_kwargs.pop("input_video_embeds", None) + token_type_ids = model_kwargs.pop("token_type_ids", None) + + input_ids_len = input_ids.shape[-1] + input_ids = input_ids.unsqueeze(1).expand( + batch_size, effective_batch_mult * num_beams, input_ids_len) + + input_video_embeds_len, input_video_embeds_hidden = input_video_embeds.size(1), input_video_embeds.size(2) + input_video_embeds = input_video_embeds.unsqueeze(1).expand( + batch_size, effective_batch_mult * num_beams, input_video_embeds_len, input_video_embeds_hidden) + + attention_mask_from_len, attention_mask_to_len = attention_mask.size(1), attention_mask.size(2) + attention_mask = attention_mask.unsqueeze(1).expand( + batch_size, effective_batch_mult * num_beams, attention_mask_from_len, attention_mask_to_len + ) + + token_type_ids_len = token_type_ids.size(1) + token_type_ids = token_type_ids.unsqueeze(1).expand( + batch_size, effective_batch_mult * num_beams, token_type_ids_len + ) + + # contiguous ... + input_ids = input_ids.contiguous().view( + effective_batch_size * num_beams, input_ids_len + ) # shape: (batch_size * num_return_sequences * num_beams, cur_len) + + input_video_embeds = input_video_embeds.contiguous().view( + effective_batch_size * num_beams, input_video_embeds_len, input_video_embeds_hidden) + + attention_mask = attention_mask.contiguous().view( + effective_batch_size * num_beams, attention_mask_from_len, attention_mask_to_len + ) # shape: (batch_size * num_return_sequences * num_beams, cur_len) + + token_type_ids = token_type_ids.contiguous().view( + effective_batch_size * num_beams, token_type_ids_len + ) + + model_kwargs["input_video_embeds"] = input_video_embeds + model_kwargs["token_type_ids"] = token_type_ids + + if self.config.is_encoder_decoder: + device = next(self.parameters()).device + if decoder_input_ids is not None: + # give initial decoder input ids + input_ids = decoder_input_ids.repeat(effective_batch_size * num_beams, 1).to(device) + else: + # create empty decoder input_ids + input_ids = torch.full( + (effective_batch_size * num_beams, 1), + decoder_start_token_id, + dtype=torch.long, + device=device, + ) + cur_len = input_ids.shape[-1] + + assert ( + batch_size == encoder_outputs.last_hidden_state.shape[0] + ), f"expected encoder_outputs.last_hidden_state to have 1st dimension bs={batch_size}, got {encoder_outputs.last_hidden_state.shape[0]} " + + # expand batch_idx to assign correct encoder output for expanded input_ids (due to num_beams > 1 and num_return_sequences > 1) + expanded_batch_idxs = ( + torch.arange(batch_size) + .view(-1, 1) + .repeat(1, num_beams * effective_batch_mult) + .view(-1) + .to(input_ids.device) + ) + + # expand encoder_outputs + encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.index_select( + 0, expanded_batch_idxs + ) + + # save encoder_outputs in `model_kwargs` + model_kwargs["encoder_outputs"] = encoder_outputs + + else: + cur_len = input_ids.shape[-1] + + assert ( + cur_len < max_length + ), f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or `config.max_length = ...`" + + if num_beams > 1: + output = self._generate_beam_search( + input_ids, + cur_len=cur_len, + max_length=max_length, + min_length=min_length, + do_sample=do_sample, + early_stopping=early_stopping, + temperature=temperature, + top_k=top_k, + top_p=top_p, + repetition_penalty=repetition_penalty, + no_repeat_ngram_size=no_repeat_ngram_size, + bad_words_ids=bad_words_ids, + pad_token_id=pad_token_id, + eos_token_id=eos_token_id, + batch_size=effective_batch_size, + num_return_sequences=num_return_sequences, + length_penalty=length_penalty, + num_beams=num_beams, + vocab_size=vocab_size, + attention_mask=attention_mask, + use_cache=use_cache, + model_kwargs=model_kwargs, + ) + else: + output = self._generate_no_beam_search( + input_ids, + cur_len=cur_len, + max_length=max_length, + min_length=min_length, + do_sample=do_sample, + temperature=temperature, + top_k=top_k, + top_p=top_p, + repetition_penalty=repetition_penalty, + no_repeat_ngram_size=no_repeat_ngram_size, + bad_words_ids=bad_words_ids, + pad_token_id=pad_token_id, + eos_token_id=eos_token_id, + batch_size=effective_batch_size, + attention_mask=attention_mask, + use_cache=use_cache, + model_kwargs=model_kwargs, + ) + + return output + + def _generate_beam_search( + self, + input_ids, + cur_len, + max_length, + min_length, + do_sample, + early_stopping, + temperature, + top_k, + top_p, + repetition_penalty, + no_repeat_ngram_size, + bad_words_ids, + pad_token_id, + eos_token_id, + batch_size, + num_return_sequences, + length_penalty, + num_beams, + vocab_size, + attention_mask, + use_cache, + model_kwargs, + ): + """Generate sequences for each example with beam search.""" + + # generated hypotheses + generated_hyps = [ + BeamHypotheses(num_beams, max_length, length_penalty, early_stopping=early_stopping) + for _ in range(batch_size) + ] + + # scores for each sentence in the beam + beam_scores = torch.zeros((batch_size, num_beams), dtype=torch.float, device=input_ids.device) + + # for greedy decoding it is made sure that only tokens of the first beam are considered to avoid sampling the exact same tokens three times + if do_sample is False: + beam_scores[:, 1:] = -1e9 + beam_scores = beam_scores.view(-1) # shape (batch_size * num_beams,) + + # cache compute states + past = None + + # done sentences + done = [False for _ in range(batch_size)] + + while cur_len < max_length: + model_inputs = self.prepare_inputs_for_generation( + input_ids, past=past, attention_mask=attention_mask, use_cache=use_cache, **model_kwargs + ) + outputs = self(**model_inputs, return_dict=True) # (batch_size * num_beams, cur_len, vocab_size) + next_token_logits = outputs.logits[:, -1, :] # (batch_size * num_beams, vocab_size) + + # if model has past, then set the past variable to speed up decoding + if "past_key_values" in outputs: + past = outputs.past_key_values + elif "mems" in outputs: + past = outputs.mems + + if self.config.is_encoder_decoder and do_sample is False: + # TODO (PVP) still a bit hacky here - there might be a better solution + next_token_logits = self.adjust_logits_during_generation( + next_token_logits, cur_len=cur_len, max_length=max_length + ) + + scores = F.log_softmax(next_token_logits, dim=-1) # (batch_size * num_beams, vocab_size) + + scores = self.postprocess_next_token_scores( + scores=scores, + input_ids=input_ids, + no_repeat_ngram_size=no_repeat_ngram_size, + bad_words_ids=bad_words_ids, + cur_len=cur_len, + min_length=min_length, + max_length=max_length, + eos_token_id=eos_token_id, + repetition_penalty=repetition_penalty, + batch_size=batch_size, + num_beams=num_beams, + ) + + assert scores.shape == (batch_size * num_beams, vocab_size), "Shapes of scores: {} != {}".format( + scores.shape, (batch_size * num_beams, vocab_size) + ) + + if do_sample: + _scores = scores + beam_scores[:, None].expand_as(scores) # (batch_size * num_beams, vocab_size) + # Temperature + if temperature != 1.0: + _scores = _scores / temperature + # Top-p/top-k filtering + _scores = top_k_top_p_filtering( + _scores, top_k=top_k, top_p=top_p, min_tokens_to_keep=2 + ) # (batch_size * num_beams, vocab_size) + # re-organize to group the beam together to sample from all beam_idxs + _scores = _scores.contiguous().view( + batch_size, num_beams * vocab_size + ) # (batch_size, num_beams * vocab_size) + + # Sample 2 next tokens for each beam (so we have some spare tokens and match output of greedy beam search) + probs = F.softmax(_scores, dim=-1) + next_tokens = torch.multinomial(probs, num_samples=2 * num_beams) # (batch_size, num_beams * 2) + # Compute next scores + next_scores = torch.gather(_scores, -1, next_tokens) # (batch_size, num_beams * 2) + # sort the sampled vector to make sure that the first num_beams samples are the best + next_scores, next_scores_indices = torch.sort(next_scores, descending=True, dim=1) + next_tokens = torch.gather(next_tokens, -1, next_scores_indices) # (batch_size, num_beams * 2) + + else: + next_scores = scores + beam_scores[:, None].expand_as(scores) # (batch_size * num_beams, vocab_size) + + # re-organize to group the beam together (we are keeping top hypothesis accross beams) + next_scores = next_scores.view( + batch_size, num_beams * vocab_size + ) # (batch_size, num_beams * vocab_size) + + next_scores, next_tokens = torch.topk(next_scores, 2 * num_beams, dim=1, largest=True, sorted=True) + + assert next_scores.size() == next_tokens.size() == (batch_size, 2 * num_beams) + + # next batch beam content + next_batch_beam = [] + + # for each sentence + for batch_idx in range(batch_size): + + # if we are done with this sentence, add a pad token + if done[batch_idx]: + assert ( + len(generated_hyps[batch_idx]) >= num_beams + ), "Batch can only be done if at least {} beams have been generated".format(num_beams) + assert ( + eos_token_id is not None and pad_token_id is not None + ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined" + next_batch_beam.extend([(0, pad_token_id, 0)] * num_beams) # pad the batch + continue + + # next sentence beam content, this will get added to next_batch_beam + next_sent_beam = [] + + # next tokens for this sentence + for beam_token_rank, (beam_token_id, beam_token_score) in enumerate( + zip(next_tokens[batch_idx], next_scores[batch_idx]) + ): + # get beam and token IDs + beam_id = beam_token_id // vocab_size + token_id = beam_token_id % vocab_size + + effective_beam_id = batch_idx * num_beams + beam_id + # add to generated hypotheses if end of sentence + if (eos_token_id is not None) and (token_id.item() == eos_token_id): + # if beam_token does not belong to top num_beams tokens, it should not be added + is_beam_token_worse_than_top_num_beams = beam_token_rank >= num_beams + if is_beam_token_worse_than_top_num_beams: + continue + generated_hyps[batch_idx].add( + input_ids[effective_beam_id].clone(), + beam_token_score.item(), + ) + else: + # add next predicted token since it is not eos_token + next_sent_beam.append((beam_token_score, token_id, effective_beam_id)) + + # once the beam for next step is full, don't add more tokens to it. + if len(next_sent_beam) == num_beams: + break + + # Check if we are done so that we can save a pad step if all(done) + done[batch_idx] = done[batch_idx] or generated_hyps[batch_idx].is_done( + next_scores[batch_idx].max().item(), cur_len + ) + + # update next beam content + assert len(next_sent_beam) == num_beams, "Beam should always be full" + next_batch_beam.extend(next_sent_beam) + assert len(next_batch_beam) == num_beams * (batch_idx + 1), "We should have added num_beams each step" + + # stop when we are done with each sentence + if all(done): + break + + # sanity check / prepare next batch + assert len(next_batch_beam) == batch_size * num_beams + beam_scores = beam_scores.new([x[0] for x in next_batch_beam]) + beam_tokens = input_ids.new([x[1] for x in next_batch_beam]) + beam_idx = input_ids.new([x[2] for x in next_batch_beam]) + + # re-order batch and update current length + input_ids = input_ids[beam_idx, :] + input_ids = torch.cat([input_ids, beam_tokens.unsqueeze(1)], dim=-1) + cur_len = cur_len + 1 + + # re-order internal states + if past is not None: + past = self._reorder_cache(past, beam_idx) + + # extend attention_mask for new generated input if only decoder + # (huxu): move out since we trim attention_mask by ourselves. + # if self.config.is_encoder_decoder is False: + # attention_mask = torch.cat( + # [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1 + # ) + + # finalize all open beam hypotheses and add to generated hypotheses + for batch_idx in range(batch_size): + if done[batch_idx]: + continue + + # test that beam scores match previously calculated scores if not eos and batch_idx not done + if eos_token_id is not None and all( + (token_id % vocab_size).item() != eos_token_id for token_id in next_tokens[batch_idx] + ): + assert torch.all( + next_scores[batch_idx, :num_beams] == beam_scores.view(batch_size, num_beams)[batch_idx] + ), "If batch_idx is not done, final next scores: {} have to equal to accumulated beam_scores: {}".format( + next_scores[:, :num_beams][batch_idx], + beam_scores.view(batch_size, num_beams)[batch_idx], + ) + + # need to add best num_beams hypotheses to generated hyps + for beam_id in range(num_beams): + effective_beam_id = batch_idx * num_beams + beam_id + final_score = beam_scores[effective_beam_id].item() + final_tokens = input_ids[effective_beam_id] + generated_hyps[batch_idx].add(final_tokens, final_score) + + # depending on whether greedy generation is wanted or not define different output_batch_size and output_num_return_sequences_per_batch + output_batch_size = batch_size if do_sample else batch_size * num_return_sequences + output_num_return_sequences_per_batch = 1 if do_sample else num_return_sequences + + # select the best hypotheses + sent_lengths = input_ids.new(output_batch_size) + best = [] + + # retrieve best hypotheses + for i, hypotheses in enumerate(generated_hyps): + sorted_hyps = sorted(hypotheses.beams, key=lambda x: x[0]) + for j in range(output_num_return_sequences_per_batch): + effective_batch_idx = output_num_return_sequences_per_batch * i + j + best_hyp = sorted_hyps.pop()[1] + sent_lengths[effective_batch_idx] = len(best_hyp) + best.append(best_hyp) + + # prepare for adding eos + sent_max_len = min(sent_lengths.max().item() + 1, max_length) + decoded = input_ids.new(output_batch_size, sent_max_len) + # shorter batches are padded if needed + if sent_lengths.min().item() != sent_lengths.max().item(): + assert pad_token_id is not None, "`pad_token_id` has to be defined" + decoded.fill_(pad_token_id) + + # fill with hypotheses and eos_token_id if the latter fits in + for i, hypo in enumerate(best): + decoded[i, : sent_lengths[i]] = hypo + if sent_lengths[i] < max_length: + decoded[i, sent_lengths[i]] = eos_token_id + + return decoded + + def _generate_no_beam_search( + self, + input_ids, + cur_len, + max_length, + min_length, + do_sample, + temperature, + top_k, + top_p, + repetition_penalty, + no_repeat_ngram_size, + bad_words_ids, + pad_token_id, + eos_token_id, + batch_size, + attention_mask, + use_cache, + model_kwargs, + ): + """Generate sequences for each example without beam search (num_beams == 1). + All returned sequence are generated independantly. + """ + # length of generated sentences / unfinished sentences + unfinished_sents = input_ids.new(batch_size).fill_(1) + sent_lengths = input_ids.new(batch_size).fill_(max_length) + + past = None + while cur_len < max_length: + model_inputs = self.prepare_inputs_for_generation( + input_ids, past=past, attention_mask=attention_mask, use_cache=use_cache, **model_kwargs + ) + + outputs = self(**model_inputs, return_dict=True) + next_token_logits = outputs.logits[:, -1, :] + scores = self.postprocess_next_token_scores( + scores=next_token_logits, + input_ids=input_ids, + no_repeat_ngram_size=no_repeat_ngram_size, + bad_words_ids=bad_words_ids, + cur_len=cur_len, + min_length=min_length, + max_length=max_length, + eos_token_id=eos_token_id, + repetition_penalty=repetition_penalty, + batch_size=batch_size, + num_beams=1, + ) + + # if model has past, then set the past variable to speed up decoding + if "past_key_values" in outputs: + past = outputs.past_key_values + elif "mems" in outputs: + past = outputs.mems + + if do_sample: + # Temperature (higher temperature => more likely to sample low probability tokens) + if temperature != 1.0: + scores = scores / temperature + # Top-p/top-k filtering + next_token_logscores = top_k_top_p_filtering(scores, top_k=top_k, top_p=top_p) + # Sample + probs = F.softmax(next_token_logscores, dim=-1) + next_token = torch.multinomial(probs, num_samples=1).squeeze(1) + else: + # Greedy decoding + next_token = torch.argmax(next_token_logits, dim=-1) + + # print(next_token_logits[0,next_token[0]], next_token_logits[0,eos_token_id]) + + # update generations and finished sentences + if eos_token_id is not None: + # pad finished sentences if eos_token_id exist + tokens_to_add = next_token * unfinished_sents + (pad_token_id) * (1 - unfinished_sents) + else: + tokens_to_add = next_token + + # add token and increase length by one + input_ids = torch.cat([input_ids, tokens_to_add.unsqueeze(-1)], dim=-1) + cur_len = cur_len + 1 + + if eos_token_id is not None: + eos_in_sents = tokens_to_add == eos_token_id + # if sentence is unfinished and the token to add is eos, sent_lengths is filled with current length + is_sents_unfinished_and_token_to_add_is_eos = unfinished_sents.mul(eos_in_sents.long()).bool() + sent_lengths.masked_fill_(is_sents_unfinished_and_token_to_add_is_eos, cur_len) + # unfinished_sents is set to zero if eos in sentence + unfinished_sents.mul_((~eos_in_sents).long()) + + # stop when there is a
in each sentence, or if we exceed the maximul length + if unfinished_sents.max() == 0: + break + + + # extend attention_mask for new generated input if only decoder + # if self.config.is_encoder_decoder is False: + # attention_mask = torch.cat( + # [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1 + # ) + + return input_ids diff --git a/fairseq/examples/MMPT/mmpt/models/transformermodel.py b/fairseq/examples/MMPT/mmpt/models/transformermodel.py new file mode 100644 index 0000000..6acc419 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/models/transformermodel.py @@ -0,0 +1,734 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch + +from torch import nn + +try: + from transformers.modeling_bert import ( + BertPreTrainedModel, + BertModel, + BertEncoder, + BertPredictionHeadTransform, + ) +except ImportError: + pass + +from ..modules import VideoTokenMLP, MMBertEmbeddings + + +# --------------- fine-tuning models --------------- +class MMBertForJoint(BertPreTrainedModel): + """A BertModel with isolated attention mask to separate modality.""" + + def __init__(self, config): + super().__init__(config) + self.videomlp = VideoTokenMLP(config) + self.bert = MMBertModel(config) + self.init_weights() + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + next_sentence_label=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + separate_forward_split=None, + ): + return_dict = ( + return_dict if return_dict is not None + else self.config.use_return_dict + ) + video_tokens = self.videomlp(input_video_embeds) + + outputs = self.bert( + input_ids, + video_tokens, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + separate_forward_split=separate_forward_split, + ) + + return outputs + + +class MMBertForTokenClassification(BertPreTrainedModel): + """A BertModel similar to MMJointUni, with extra wrapper layer + to be fine-tuned from other pretrained MMFusion model.""" + + def __init__(self, config): + super().__init__(config) + self.videomlp = VideoTokenMLP(config) + self.bert = MMBertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + # TODO(huxu): 779 is the number of classes for COIN: move to config? + self.classifier = nn.Linear(config.hidden_size, 779) + self.init_weights() + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + next_sentence_label=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + separate_forward_split=None, + ): + return_dict = ( + return_dict if return_dict is not None + else self.config.use_return_dict + ) + + video_tokens = self.videomlp(input_video_embeds) + outputs = self.bert( + input_ids, + video_tokens, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + separate_forward_split=separate_forward_split, + ) + + return (self.classifier(outputs[0]),) + + +# ------------ pre-training models ---------------- + +class MMBertForEncoder(BertPreTrainedModel): + """A BertModel for Contrastive Learning.""" + def __init__(self, config): + super().__init__(config) + self.videomlp = VideoTokenMLP(config) + self.bert = MMBertModel(config) + self.init_weights() + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + ): + return_dict = ( + return_dict if return_dict is not None + else self.config.use_return_dict + ) + if input_video_embeds is not None: + video_tokens = self.videomlp(input_video_embeds) + else: + video_tokens = None + + outputs = self.bert( + input_ids, + video_tokens, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + return outputs + + +class MMBertForMFMMLM(BertPreTrainedModel): + """A BertModel with shared prediction head on MFM-MLM.""" + def __init__(self, config): + super().__init__(config) + self.videomlp = VideoTokenMLP(config) + self.bert = MMBertModel(config) + self.cls = MFMMLMHead(config) + self.hidden_size = config.hidden_size + self.init_weights() + + def get_output_embeddings(self): + return self.cls.predictions.decoder + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + masked_frame_labels=None, + target_video_hidden_states=None, + non_masked_frame_mask=None, + masked_lm_labels=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + ): + return_dict = ( + return_dict if return_dict is not None + else self.config.use_return_dict + ) + if input_video_embeds is not None: + video_tokens = self.videomlp(input_video_embeds) + else: + video_tokens = None + + if target_video_hidden_states is not None: + target_video_hidden_states = self.videomlp( + target_video_hidden_states) + + non_masked_frame_hidden_states = video_tokens.masked_select( + non_masked_frame_mask.unsqueeze(-1) + ).view(-1, self.hidden_size) + + outputs = self.bert( + input_ids, + video_tokens, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + sequence_output = outputs[0] + + mfm_scores, prediction_scores = None, None + if masked_frame_labels is not None and masked_lm_labels is not None: + # split the sequence. + text_offset = masked_frame_labels.size(1) + 1 # [CLS] + video_sequence_output = sequence_output[ + :, 1:text_offset + ] # remove [SEP] as not in video_label. + text_sequence_output = torch.cat( + [sequence_output[:, :1], sequence_output[:, text_offset:]], + dim=1 + ) + + hidden_size = video_sequence_output.size(-1) + selected_video_output = video_sequence_output.masked_select( + masked_frame_labels.unsqueeze(-1) + ).view(-1, hidden_size) + + # only compute select tokens to training to speed up. + hidden_size = text_sequence_output.size(-1) + # masked_lm_labels = masked_lm_labels.reshape(-1) + labels_mask = masked_lm_labels != -100 + + selected_text_output = text_sequence_output.masked_select( + labels_mask.unsqueeze(-1) + ).view(-1, hidden_size) + mfm_scores, prediction_scores = self.cls( + selected_video_output, + target_video_hidden_states, + non_masked_frame_hidden_states, + selected_text_output, + ) + + output = ( + mfm_scores, + prediction_scores, + ) + outputs + return output + + +class BertMFMMLMPredictionHead(nn.Module): + def __init__(self, config): + super().__init__() + self.transform = BertPredictionHeadTransform(config) + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear( + config.hidden_size, config.vocab_size, bias=False) + + self.bias = nn.Parameter(torch.zeros(config.vocab_size)) + + # Need a link between the two variables so that the bias is correctly + # resized with `resize_token_embeddings` + self.decoder.bias = self.bias + + def forward( + self, + video_hidden_states=None, + target_video_hidden_states=None, + non_masked_frame_hidden_states=None, + text_hidden_states=None, + ): + video_logits, text_logits = None, None + if video_hidden_states is not None: + video_hidden_states = self.transform(video_hidden_states) + non_masked_frame_logits = torch.mm( + video_hidden_states, + non_masked_frame_hidden_states.transpose(1, 0) + ) + masked_frame_logits = torch.bmm( + video_hidden_states.unsqueeze(1), + target_video_hidden_states.unsqueeze(-1), + ).squeeze(-1) + video_logits = torch.cat( + [masked_frame_logits, non_masked_frame_logits], dim=1 + ) + + if text_hidden_states is not None: + text_hidden_states = self.transform(text_hidden_states) + text_logits = self.decoder(text_hidden_states) + return video_logits, text_logits + + +class MFMMLMHead(nn.Module): + def __init__(self, config): + super().__init__() + self.predictions = BertMFMMLMPredictionHead(config) + + def forward( + self, + video_hidden_states=None, + target_video_hidden_states=None, + non_masked_frame_hidden_states=None, + text_hidden_states=None, + ): + video_logits, text_logits = self.predictions( + video_hidden_states, + target_video_hidden_states, + non_masked_frame_hidden_states, + text_hidden_states, + ) + return video_logits, text_logits + + +class MMBertForMTM(MMBertForMFMMLM): + def __init__(self, config): + BertPreTrainedModel.__init__(self, config) + self.videomlp = VideoTokenMLP(config) + self.bert = MMBertModel(config) + self.cls = MTMHead(config) + self.hidden_size = config.hidden_size + self.init_weights() + + +class BertMTMPredictionHead(nn.Module): + def __init__(self, config): + super().__init__() + self.transform = BertPredictionHeadTransform(config) + self.decoder = nn.Linear( + config.hidden_size, config.vocab_size, bias=False) + + def forward( + self, + video_hidden_states=None, + target_video_hidden_states=None, + non_masked_frame_hidden_states=None, + text_hidden_states=None, + ): + non_masked_frame_hidden_states = non_masked_frame_hidden_states.transpose(1, 0) + video_logits, text_logits = None, None + if video_hidden_states is not None: + video_hidden_states = self.transform(video_hidden_states) + + masked_frame_logits = torch.bmm( + video_hidden_states.unsqueeze(1), + target_video_hidden_states.unsqueeze(-1), + ).squeeze(-1) + + non_masked_frame_logits = torch.mm( + video_hidden_states, + non_masked_frame_hidden_states + ) + video_on_vocab_logits = self.decoder(video_hidden_states) + video_logits = torch.cat([ + masked_frame_logits, + non_masked_frame_logits, + video_on_vocab_logits], dim=1) + + if text_hidden_states is not None: + text_hidden_states = self.transform(text_hidden_states) + # text first so label does not need to be shifted. + text_on_vocab_logits = self.decoder(text_hidden_states) + text_on_video_logits = torch.mm( + text_hidden_states, + non_masked_frame_hidden_states + ) + text_logits = torch.cat([ + text_on_vocab_logits, + text_on_video_logits + ], dim=1) + + return video_logits, text_logits + + +class MTMHead(nn.Module): + def __init__(self, config): + super().__init__() + self.predictions = BertMTMPredictionHead(config) + + def forward( + self, + video_hidden_states=None, + target_video_hidden_states=None, + non_masked_frame_hidden_states=None, + text_hidden_states=None, + ): + video_logits, text_logits = self.predictions( + video_hidden_states, + target_video_hidden_states, + non_masked_frame_hidden_states, + text_hidden_states, + ) + return video_logits, text_logits + + +class MMBertModel(BertModel): + """MMBertModel has MMBertEmbedding to support video tokens.""" + + def __init__(self, config, add_pooling_layer=True): + super().__init__(config) + # overwrite embedding + self.embeddings = MMBertEmbeddings(config) + self.encoder = MultiLayerAttentionMaskBertEncoder(config) + self.init_weights() + + def forward( + self, + input_ids=None, + input_video_embeds=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + separate_forward_split=None, + ): + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + return_dict = ( + return_dict if return_dict is not None + else self.config.use_return_dict + ) + + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both input_ids " + "and inputs_embeds at the same time" + ) + elif input_ids is not None: + if input_video_embeds is not None: + input_shape = ( + input_ids.size(0), + input_ids.size(1) + input_video_embeds.size(1), + ) + else: + input_shape = ( + input_ids.size(0), + input_ids.size(1), + ) + elif inputs_embeds is not None: + if input_video_embeds is not None: + input_shape = ( + inputs_embeds.size(0), + inputs_embeds.size(1) + input_video_embeds.size(1), + ) + else: + input_shape = ( + input_ids.size(0), + input_ids.size(1), + ) + else: + raise ValueError( + "You have to specify either input_ids or inputs_embeds") + + device = input_ids.device if input_ids is not None \ + else inputs_embeds.device + + if attention_mask is None: + attention_mask = torch.ones(input_shape, device=device) + if token_type_ids is None: + token_type_ids = torch.zeros( + input_shape, dtype=torch.long, device=device) + + # We can provide a self-attention mask of dimensions + # [batch_size, from_seq_length, to_seq_length] + # ourselves in which case + # we just need to make it broadcastable to all heads. + extended_attention_mask: torch.Tensor = \ + self.get_extended_attention_mask( + attention_mask, input_shape, device) + + # If a 2D or 3D attention mask is provided for the cross-attention + # we need to make broadcastable to + # [batch_size, num_heads, seq_length, seq_length] + if self.config.is_decoder and encoder_hidden_states is not None: + ( + encoder_batch_size, + encoder_sequence_length, + _, + ) = encoder_hidden_states.size() + encoder_hidden_shape = ( + encoder_batch_size, encoder_sequence_length) + if encoder_attention_mask is None: + encoder_attention_mask = torch.ones( + encoder_hidden_shape, device=device) + encoder_extended_attention_mask = self.invert_attention_mask( + encoder_attention_mask + ) + else: + encoder_extended_attention_mask = None + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or + # [num_hidden_layers x num_heads] + # and head_mask is converted to shape + # [num_hidden_layers x batch x num_heads x seq_length x seq_length] + + head_mask = self.get_head_mask( + head_mask, self.config.num_hidden_layers) + + embedding_output = self.embeddings( + input_ids, + input_video_embeds, + position_ids=position_ids, + token_type_ids=token_type_ids, + inputs_embeds=inputs_embeds, + ) + + if separate_forward_split is not None: + split_embedding_output = \ + embedding_output[:, :separate_forward_split] + split_extended_attention_mask = extended_attention_mask[ + :, :, :, :separate_forward_split, :separate_forward_split + ] + split_encoder_outputs = self.encoder( + split_embedding_output, + attention_mask=split_extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + assert ( + len(split_encoder_outputs) <= 2 + ), "we do not support merge on attention for now." + encoder_outputs = [] + encoder_outputs.append([split_encoder_outputs[0]]) + if len(split_encoder_outputs) == 2: + encoder_outputs.append([]) + for _all_hidden_states in split_encoder_outputs[1]: + encoder_outputs[-1].append([_all_hidden_states]) + + split_embedding_output = \ + embedding_output[:, separate_forward_split:] + split_extended_attention_mask = extended_attention_mask[ + :, :, :, separate_forward_split:, separate_forward_split: + ] + + split_encoder_outputs = self.encoder( + split_embedding_output, + attention_mask=split_extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + assert ( + len(split_encoder_outputs) <= 2 + ), "we do not support merge on attention for now." + encoder_outputs[0].append(split_encoder_outputs[0]) + encoder_outputs[0] = torch.cat(encoder_outputs[0], dim=1) + if len(split_encoder_outputs) == 2: + for layer_idx, _all_hidden_states in enumerate( + split_encoder_outputs[1] + ): + encoder_outputs[1][layer_idx].append(_all_hidden_states) + encoder_outputs[1][layer_idx] = torch.cat( + encoder_outputs[1][layer_idx], dim=1 + ) + encoder_outputs = tuple(encoder_outputs) + else: + encoder_outputs = self.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + sequence_output = encoder_outputs[0] + pooled_output = ( + self.pooler(sequence_output) if self.pooler is not None else None + ) + + return (sequence_output, pooled_output) + encoder_outputs[1:] + + def get_extended_attention_mask(self, attention_mask, input_shape, device): + """This is borrowed from `modeling_utils.py` with the support of + multi-layer attention masks. + The second dim is expected to be number of layers. + See `MMAttentionMaskProcessor`. + Makes broadcastable attention and causal masks so that future + and masked tokens are ignored. + + Arguments: + attention_mask (:obj:`torch.Tensor`): + Mask with ones indicating tokens to attend to, + zeros for tokens to ignore. + input_shape (:obj:`Tuple[int]`): + The shape of the input to the model. + device: (:obj:`torch.device`): + The device of the input to the model. + + Returns: + :obj:`torch.Tensor` The extended attention mask, \ + with a the same dtype as :obj:`attention_mask.dtype`. + """ + # We can provide a self-attention mask of dimensions + # [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable + # to all heads. + if attention_mask.dim() == 4: + extended_attention_mask = attention_mask[:, :, None, :, :] + extended_attention_mask = extended_attention_mask.to( + dtype=self.dtype + ) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) \ + * -10000.0 + return extended_attention_mask + else: + return super().get_extended_attention_mask( + attention_mask, input_shape, device + ) + + +class MultiLayerAttentionMaskBertEncoder(BertEncoder): + """extend BertEncoder with the capability of + multiple layers of attention mask.""" + + def forward( + self, + hidden_states, + attention_mask=None, + head_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + output_attentions=False, + output_hidden_states=False, + return_dict=False, + ): + all_hidden_states = () if output_hidden_states else None + all_attentions = () if output_attentions else None + for i, layer_module in enumerate(self.layer): + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + layer_head_mask = head_mask[i] if head_mask is not None else None + + layer_attention_mask = ( + attention_mask[:, i, :, :, :] + if attention_mask.dim() == 5 + else attention_mask + ) + + if getattr(self.config, "gradient_checkpointing", False): + + def create_custom_forward(module): + def custom_forward(*inputs): + return module(*inputs, output_attentions) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(layer_module), + hidden_states, + layer_attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + ) + else: + layer_outputs = layer_module( + hidden_states, + layer_attention_mask, + layer_head_mask, + encoder_hidden_states, + encoder_attention_mask, + output_attentions, + ) + hidden_states = layer_outputs[0] + if output_attentions: + all_attentions = all_attentions + (layer_outputs[1],) + + if output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states,) + + return tuple( + v + for v in [hidden_states, all_hidden_states, all_attentions] + if v is not None + ) diff --git a/fairseq/examples/MMPT/mmpt/modules/__init__.py b/fairseq/examples/MMPT/mmpt/modules/__init__.py new file mode 100644 index 0000000..4c78594 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/modules/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .mm import * + +try: + from .expmm import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/modules/mm.py b/fairseq/examples/MMPT/mmpt/modules/mm.py new file mode 100644 index 0000000..5d97773 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/modules/mm.py @@ -0,0 +1,145 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) Facebook, Inc. All Rights Reserved + + +import torch + +from torch import nn + +try: + from transformers.modeling_bert import ( + BertEmbeddings, + ACT2FN, + ) +except ImportError: + pass + + +class VideoTokenMLP(nn.Module): + def __init__(self, config): + super().__init__() + input_dim = config.input_dim if hasattr(config, "input_dim") else 512 + self.linear1 = nn.Linear(input_dim, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size) + self.activation = ACT2FN[config.hidden_act] + self.linear2 = nn.Linear(config.hidden_size, config.hidden_size) + + def forward(self, hidden_states): + hidden_states = self.linear1(hidden_states) + hidden_states = self.activation(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + hidden_states = self.linear2(hidden_states) + return hidden_states + + +class MMBertEmbeddings(BertEmbeddings): + def __init__(self, config): + super().__init__(config) + self.max_video_len = config.max_video_len + if hasattr(config, "use_seg_emb") and config.use_seg_emb: + """the original VLM paper uses seg_embeddings for temporal space. + although not used it changed the randomness of initialization. + we keep it for reproducibility. + """ + self.seg_embeddings = nn.Embedding(256, config.hidden_size) + + def forward( + self, + input_ids, + input_video_embeds, + token_type_ids=None, + position_ids=None, + inputs_embeds=None, + ): + input_tensor = input_ids if input_ids is not None else inputs_embeds + if input_video_embeds is not None: + input_shape = ( + input_tensor.size(0), + input_tensor.size(1) + input_video_embeds.size(1), + ) + else: + input_shape = (input_tensor.size(0), input_tensor.size(1)) + + if position_ids is None: + """ + Auto skip position embeddings for text only case. + use cases: + (1) action localization and segmentation: + feed in len-1 dummy video token needs text part to + skip input_video_embeds.size(1) for the right + position_ids for video [SEP] and rest text tokens. + (2) MMFusionShare for two forward passings: + in `forward_text`: input_video_embeds is None. + need to skip video [SEP] token. + + # video_len + 1: [CLS] + video_embed + # self.max_video_len + 1: [SEP] for video. + # self.max_video_len + 2: [SEP] for video. + # self.max_video_len + input_ids.size(1): rest for text. + """ + if input_video_embeds is not None: + video_len = input_video_embeds.size(1) + starting_offset = self.max_video_len + 1 # video [SEP] + ending_offset = self.max_video_len + input_ids.size(1) + else: + video_len = 0 + starting_offset = self.max_video_len + 2 # first text token. + ending_offset = self.max_video_len + input_ids.size(1) + 1 + position_ids = torch.cat([ + self.position_ids[:, :video_len + 1], + self.position_ids[:, starting_offset:ending_offset] + ], dim=1) + + if token_type_ids is None: + token_type_ids = torch.zeros( + input_shape, dtype=torch.long, device=self.position_ids.device + ) + + """ + the format of input_ids is [CLS] [SEP] caption [SEP] padding. + the goal is to build [CLS] video tokens [SEP] caption [SEP] . + """ + if inputs_embeds is None: + inputs_embeds = self.word_embeddings(input_ids) + if input_video_embeds is not None: + inputs_mm_embeds = torch.cat([ + inputs_embeds[:, :1], input_video_embeds, inputs_embeds[:, 1:] + ], dim=1) + else: + # text only for `MMFusionShare`. + inputs_mm_embeds = inputs_embeds + + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + embeddings = inputs_mm_embeds + position_embeddings + embeddings += token_type_embeddings + + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class AlignHead(nn.Module): + """this will load pre-trained weights for NSP, which is desirable.""" + + def __init__(self, config): + super().__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, dropout_pooled_output): + logits = self.seq_relationship(dropout_pooled_output) + return logits diff --git a/fairseq/examples/MMPT/mmpt/modules/retri.py b/fairseq/examples/MMPT/mmpt/modules/retri.py new file mode 100644 index 0000000..d1b288f --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/modules/retri.py @@ -0,0 +1,429 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import numpy as np +import pickle +import time + +try: + import faiss +except ImportError: + pass + +from collections import defaultdict + +from ..utils import get_local_rank, print_on_rank0 + + +class VectorRetriever(object): + """ + How2 Video Retriver. + Reference usage of FAISS: + https://github.com/fairinternal/fairseq-py/blob/paraphrase_pretraining/fairseq/data/multilingual_faiss_dataset.py + """ + + def __init__(self, hidden_size, cent, db_type, examples_per_cent_to_train): + if db_type == "flatl2": + quantizer = faiss.IndexFlatL2(hidden_size) # the other index + self.db = faiss.IndexIVFFlat( + quantizer, hidden_size, cent, faiss.METRIC_L2) + elif db_type == "pq": + self.db = faiss.index_factory( + hidden_size, f"IVF{cent}_HNSW32,PQ32" + ) + else: + raise ValueError("unknown type of db", db_type) + self.train_thres = cent * examples_per_cent_to_train + self.train_cache = [] + self.train_len = 0 + self.videoid_to_vectoridx = {} + self.vectoridx_to_videoid = None + self.make_direct_maps_done = False + + def make_direct_maps(self): + faiss.downcast_index(self.db).make_direct_map() + + def __len__(self): + return self.db.ntotal + + def save(self, out_dir): + faiss.write_index( + self.db, + os.path.join(out_dir, "faiss_idx") + ) + with open( + os.path.join( + out_dir, "videoid_to_vectoridx.pkl"), + "wb") as fw: + pickle.dump( + self.videoid_to_vectoridx, fw, + protocol=pickle.HIGHEST_PROTOCOL + ) + + def load(self, out_dir): + fn = os.path.join(out_dir, "faiss_idx") + self.db = faiss.read_index(fn) + with open( + os.path.join(out_dir, "videoid_to_vectoridx.pkl"), "rb") as fr: + self.videoid_to_vectoridx = pickle.load(fr) + + def add(self, hidden_states, video_ids, last=False): + assert len(hidden_states) == len(video_ids), "{}, {}".format( + str(len(hidden_states)), str(len(video_ids))) + assert len(hidden_states.shape) == 2 + assert hidden_states.dtype == np.float32 + + valid_idx = [] + for idx, video_id in enumerate(video_ids): + if video_id not in self.videoid_to_vectoridx: + valid_idx.append(idx) + self.videoid_to_vectoridx[video_id] = \ + len(self.videoid_to_vectoridx) + + hidden_states = hidden_states[valid_idx] + if not self.db.is_trained: + self.train_cache.append(hidden_states) + self.train_len += hidden_states.shape[0] + if self.train_len < self.train_thres: + return + self.finalize_training() + else: + self.db.add(hidden_states) + + def finalize_training(self): + hidden_states = np.concatenate(self.train_cache, axis=0) + del self.train_cache + local_rank = get_local_rank() + if local_rank == 0: + start = time.time() + print("training db on", self.train_thres, "/", self.train_len) + self.db.train(hidden_states[:self.train_thres]) + if local_rank == 0: + print("training db for", time.time() - start) + self.db.add(hidden_states) + + def search( + self, + query_hidden_states, + orig_dist, + ): + if len(self.videoid_to_vectoridx) != self.db.ntotal: + raise ValueError( + "cannot search: size mismatch in-between index and db", + len(self.videoid_to_vectoridx), + self.db.ntotal + ) + + if self.vectoridx_to_videoid is None: + self.vectoridx_to_videoid = { + self.videoid_to_vectoridx[videoid]: videoid + for videoid in self.videoid_to_vectoridx + } + assert len(self.vectoridx_to_videoid) \ + == len(self.videoid_to_vectoridx) + + # MultilingualFaissDataset uses the following; not sure the purpose. + # faiss.ParameterSpace().set_index_parameter(self.db, "nprobe", 10) + queried_dist, index = self.db.search(query_hidden_states, 1) + queried_dist, index = queried_dist[:, 0], index[:, 0] + + outputs = np.array( + [self.vectoridx_to_videoid[_index] + if _index != -1 else (-1, -1, -1) for _index in index], + dtype=np.int32) + outputs[queried_dist <= orig_dist] = -1 + return outputs + + def search_by_video_ids( + self, + video_ids, + retri_factor + ): + if len(self.videoid_to_vectoridx) != self.db.ntotal: + raise ValueError( + len(self.videoid_to_vectoridx), + self.db.ntotal + ) + + if not self.make_direct_maps_done: + self.make_direct_maps() + + if self.vectoridx_to_videoid is None: + self.vectoridx_to_videoid = { + self.videoid_to_vectoridx[videoid]: videoid + for videoid in self.videoid_to_vectoridx + } + assert len(self.vectoridx_to_videoid) \ + == len(self.videoid_to_vectoridx) + + query_hidden_states = [] + vector_ids = [] + for video_id in video_ids: + vector_id = self.videoid_to_vectoridx[video_id] + vector_ids.append(vector_id) + query_hidden_state = self.db.reconstruct(vector_id) + query_hidden_states.append(query_hidden_state) + query_hidden_states = np.stack(query_hidden_states) + + # MultilingualFaissDataset uses the following; not sure the reason. + # faiss.ParameterSpace().set_index_parameter(self.db, "nprobe", 10) + _, index = self.db.search(query_hidden_states, retri_factor) + outputs = [] + for sample_idx, sample in enumerate(index): + # the first video_id is always the video itself. + cands = [video_ids[sample_idx]] + for vector_idx in sample: + if vector_idx >= 0 \ + and vector_ids[sample_idx] != vector_idx: + cands.append( + self.vectoridx_to_videoid[vector_idx] + ) + outputs.append(cands) + return outputs + + +class VectorRetrieverDM(VectorRetriever): + """ + with direct map. + How2 Video Retriver. + Reference usage of FAISS: + https://github.com/fairinternal/fairseq-py/blob/paraphrase_pretraining/fairseq/data/multilingual_faiss_dataset.py + """ + + def __init__( + self, + hidden_size, + cent, + db_type, + examples_per_cent_to_train + ): + super().__init__( + hidden_size, cent, db_type, examples_per_cent_to_train) + self.make_direct_maps_done = False + + def make_direct_maps(self): + faiss.downcast_index(self.db).make_direct_map() + self.make_direct_maps_done = True + + def search( + self, + query_hidden_states, + orig_dist, + ): + if len(self.videoid_to_vectoridx) != self.db.ntotal: + raise ValueError( + len(self.videoid_to_vectoridx), + self.db.ntotal + ) + + if not self.make_direct_maps_done: + self.make_direct_maps() + if self.vectoridx_to_videoid is None: + self.vectoridx_to_videoid = { + self.videoid_to_vectoridx[videoid]: videoid + for videoid in self.videoid_to_vectoridx + } + assert len(self.vectoridx_to_videoid) \ + == len(self.videoid_to_vectoridx) + + # MultilingualFaissDataset uses the following; not sure the reason. + # faiss.ParameterSpace().set_index_parameter(self.db, "nprobe", 10) + queried_dist, index = self.db.search(query_hidden_states, 1) + outputs = [] + for sample_idx, sample in enumerate(index): + # and queried_dist[sample_idx] < thres \ + if sample >= 0 \ + and queried_dist[sample_idx] < orig_dist[sample_idx]: + outputs.append(self.vectoridx_to_videoid[sample]) + else: + outputs.append(None) + return outputs + + def search_by_video_ids( + self, + video_ids, + retri_factor=8 + ): + if len(self.videoid_to_vectoridx) != self.db.ntotal: + raise ValueError( + len(self.videoid_to_vectoridx), + self.db.ntotal + ) + + if not self.make_direct_maps_done: + self.make_direct_maps() + if self.vectoridx_to_videoid is None: + self.vectoridx_to_videoid = { + self.videoid_to_vectoridx[videoid]: videoid + for videoid in self.videoid_to_vectoridx + } + assert len(self.vectoridx_to_videoid) \ + == len(self.videoid_to_vectoridx) + + query_hidden_states = [] + vector_ids = [] + for video_id in video_ids: + vector_id = self.videoid_to_vectoridx[video_id] + vector_ids.append(vector_id) + query_hidden_state = self.db.reconstruct(vector_id) + query_hidden_states.append(query_hidden_state) + query_hidden_states = np.stack(query_hidden_states) + + # MultilingualFaissDataset uses the following; not sure the reason. + # faiss.ParameterSpace().set_index_parameter(self.db, "nprobe", 10) + _, index = self.db.search(query_hidden_states, retri_factor) + outputs = [] + for sample_idx, sample in enumerate(index): + # the first video_id is always the video itself. + cands = [video_ids[sample_idx]] + for vector_idx in sample: + if vector_idx >= 0 \ + and vector_ids[sample_idx] != vector_idx: + cands.append( + self.vectoridx_to_videoid[vector_idx] + ) + outputs.append(cands) + return outputs + + +class MMVectorRetriever(VectorRetrieverDM): + """ + multimodal vector retriver: + text retrieve video or video retrieve text. + """ + + def __init__(self, hidden_size, cent, db_type, examples_per_cent_to_train): + super().__init__( + hidden_size, cent, db_type, examples_per_cent_to_train) + video_db = self.db + super().__init__( + hidden_size, cent, db_type, examples_per_cent_to_train) + text_db = self.db + self.db = {"video": video_db, "text": text_db} + self.video_to_videoid = defaultdict(list) + + def __len__(self): + assert self.db["video"].ntotal == self.db["text"].ntotal + return self.db["video"].ntotal + + def make_direct_maps(self): + faiss.downcast_index(self.db["video"]).make_direct_map() + faiss.downcast_index(self.db["text"]).make_direct_map() + + def save(self, out_dir): + faiss.write_index( + self.db["video"], + os.path.join(out_dir, "video_faiss_idx") + ) + faiss.write_index( + self.db["text"], + os.path.join(out_dir, "text_faiss_idx") + ) + + with open( + os.path.join( + out_dir, "videoid_to_vectoridx.pkl"), + "wb") as fw: + pickle.dump( + self.videoid_to_vectoridx, fw, + protocol=pickle.HIGHEST_PROTOCOL + ) + + def load(self, out_dir): + fn = os.path.join(out_dir, "video_faiss_idx") + video_db = faiss.read_index(fn) + fn = os.path.join(out_dir, "text_faiss_idx") + text_db = faiss.read_index(fn) + self.db = {"video": video_db, "text": text_db} + with open( + os.path.join(out_dir, "videoid_to_vectoridx.pkl"), "rb") as fr: + self.videoid_to_vectoridx = pickle.load(fr) + self.video_to_videoid = defaultdict(list) + + def add(self, hidden_states, video_ids): + """hidden_states is a pair `(video, text)`""" + assert len(hidden_states) == len(video_ids), "{}, {}".format( + str(len(hidden_states)), str(len(video_ids))) + assert len(hidden_states.shape) == 3 + assert len(self.video_to_videoid) == 0 + + valid_idx = [] + for idx, video_id in enumerate(video_ids): + if video_id not in self.videoid_to_vectoridx: + valid_idx.append(idx) + self.videoid_to_vectoridx[video_id] = \ + len(self.videoid_to_vectoridx) + + batch_size = hidden_states.shape[0] + hidden_states = hidden_states[valid_idx] + + hidden_states = np.transpose(hidden_states, (1, 0, 2)).copy() + if not self.db["video"].is_trained: + self.train_cache.append(hidden_states) + train_len = batch_size * len(self.train_cache) + if train_len < self.train_thres: + return + + hidden_states = np.concatenate(self.train_cache, axis=1) + del self.train_cache + self.db["video"].train(hidden_states[0, :self.train_thres]) + self.db["text"].train(hidden_states[1, :self.train_thres]) + self.db["video"].add(hidden_states[0]) + self.db["text"].add(hidden_states[1]) + + def get_clips_by_video_id(self, video_id): + if not self.video_to_videoid: + for video_id, video_clip, text_clip in self.videoid_to_vectoridx: + self.video_to_videoid[video_id].append( + (video_id, video_clip, text_clip)) + return self.video_to_videoid[video_id] + + def search( + self, + video_ids, + target_modality, + retri_factor=8 + ): + if len(self.videoid_to_vectoridx) != len(self): + raise ValueError( + len(self.videoid_to_vectoridx), + len(self) + ) + + if not self.make_direct_maps_done: + self.make_direct_maps() + if self.vectoridx_to_videoid is None: + self.vectoridx_to_videoid = { + self.videoid_to_vectoridx[videoid]: videoid + for videoid in self.videoid_to_vectoridx + } + assert len(self.vectoridx_to_videoid) \ + == len(self.videoid_to_vectoridx) + + src_modality = "text" if target_modality == "video" else "video" + + query_hidden_states = [] + vector_ids = [] + for video_id in video_ids: + vector_id = self.videoid_to_vectoridx[video_id] + vector_ids.append(vector_id) + query_hidden_state = self.db[src_modality].reconstruct(vector_id) + query_hidden_states.append(query_hidden_state) + query_hidden_states = np.stack(query_hidden_states) + + # MultilingualFaissDataset uses the following; not sure the reason. + # faiss.ParameterSpace().set_index_parameter(self.db, "nprobe", 10) + _, index = self.db[target_modality].search( + query_hidden_states, retri_factor) + outputs = [] + for sample_idx, sample in enumerate(index): + cands = [] + for vector_idx in sample: + if vector_idx >= 0: + cands.append( + self.vectoridx_to_videoid[vector_idx] + ) + outputs.append(cands) + return outputs diff --git a/fairseq/examples/MMPT/mmpt/modules/vectorpool.py b/fairseq/examples/MMPT/mmpt/modules/vectorpool.py new file mode 100644 index 0000000..d2b23d2 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/modules/vectorpool.py @@ -0,0 +1,246 @@ +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch +import os +import numpy as np +import pickle + +from . import retri +from ..utils import get_local_rank + + +class VectorPool(object): + """ + Base class of retrieval space. + """ + + def __init__(self, config): + from transformers import AutoConfig + self.hidden_size = AutoConfig.from_pretrained( + config.dataset.bert_name).hidden_size + self.retriever_cls = getattr(retri, config.retriever_cls) + + def __call__(self, sample, **kwargs): + raise NotImplementedError + + def build_retriver( + self, + retriever_cls=None, + hidden_size=None, + centroids=512, + db_type="flatl2", + examples_per_cent_to_train=48 + ): + + """merge results from multiple gpus and return a retriver..""" + self.retriver = retriever_cls( + hidden_size, centroids, db_type, examples_per_cent_to_train) + return self.retriver + + def __repr__(self): + if hasattr(self, "retriver"): + retriver_name = str(len(self.retriver)) + else: + retriver_name = "no retriver field yet" + return self.__class__.__name__ \ + + "(" + retriver_name + ")" + + +class VideoVectorPool(VectorPool): + """ + average clips of a video as video representation. + """ + def __init__(self, config): + super().__init__(config) + self.build_retriver(self.retriever_cls, self.hidden_size) + + def __call__(self, sample, subsampling, **kwargs): + hidden_states = ( + sample["pooled_video"] + sample["pooled_text"]) / 2. + hidden_states = hidden_states.view( + -1, subsampling, + hidden_states.size(-1)) + hidden_states = torch.mean(hidden_states, dim=1) + hidden_states = hidden_states.cpu().detach().numpy() + video_ids = [] + for offset_idx, video_id in enumerate(sample["video_id"]): + if isinstance(video_id, tuple) and len(video_id) == 3: + # a sharded video_id. + video_id = video_id[0] + video_ids.append(video_id) + assert len(video_ids) == len(hidden_states) + self.retriver.add( + hidden_states.astype("float32"), + video_ids + ) + + +class DistributedVectorPool(VectorPool): + """ + support sync of multiple gpus/nodes. + """ + def __init__(self, config): + super().__init__(config) + self.out_dir = os.path.join( + config.fairseq.checkpoint.save_dir, + "retri") + os.makedirs(self.out_dir, exist_ok=True) + self.hidden_states = [] + self.video_ids = [] + + def build_retriver( + self, + retriever_cls=None, + hidden_size=None, + centroids=4096, + db_type="flatl2", + examples_per_cent_to_train=48 + ): + if retriever_cls is None: + retriever_cls = self.retriever_cls + if hidden_size is None: + hidden_size = self.hidden_size + """merge results from multiple gpus and return a retriver..""" + if torch.distributed.is_initialized(): + self.save() + # sync saving. + torch.distributed.barrier() + world_size = torch.distributed.get_world_size() + else: + world_size = 1 + self.retriver = retriever_cls( + hidden_size, centroids, db_type, examples_per_cent_to_train) + # each gpu process has its own retriever. + for local_rank in range(world_size): + if get_local_rank() == 0: + print("load local_rank", local_rank) + hidden_states, video_ids = self.load(local_rank) + hidden_states = hidden_states.astype("float32") + self.retriver.add(hidden_states, video_ids) + return self.retriver + + def load(self, local_rank): + hidden_states = np.load( + os.path.join( + self.out_dir, + "hidden_state" + str(local_rank) + ".npy" + ) + ) + + with open( + os.path.join( + self.out_dir, "video_id" + str(local_rank) + ".pkl"), + "rb") as fr: + video_ids = pickle.load(fr) + return hidden_states, video_ids + + def save(self): + hidden_states = np.vstack(self.hidden_states) + assert len(hidden_states) == len(self.video_ids), "{}, {}".format( + len(hidden_states), + len(self.video_ids) + ) + local_rank = torch.distributed.get_rank() \ + if torch.distributed.is_initialized() else 0 + + np.save( + os.path.join( + self.out_dir, + "hidden_state" + str(local_rank) + ".npy"), + hidden_states) + + with open( + os.path.join( + self.out_dir, + "video_id" + str(local_rank) + ".pkl"), + "wb") as fw: + pickle.dump( + self.video_ids, + fw, + protocol=pickle.HIGHEST_PROTOCOL + ) + + +class DistributedVideoVectorPool(DistributedVectorPool): + """ + average clips of a video as video representation. + """ + def __call__(self, sample, subsampling, **kwargs): + hidden_states = ( + sample["pooled_video"] + sample["pooled_text"]) / 2. + hidden_states = hidden_states.view( + -1, subsampling, + hidden_states.size(-1)) + hidden_states = torch.mean(hidden_states, dim=1) + hidden_states = hidden_states.cpu().detach().numpy() + video_ids = [] + for offset_idx, video_id in enumerate(sample["video_id"]): + if isinstance(video_id, tuple) and len(video_id) == 3: + # a sharded video_id. + video_id = video_id[0] + video_ids.append(video_id) + assert len(video_ids) == len(hidden_states) + self.hidden_states.append(hidden_states) + self.video_ids.extend(video_ids) + + +# ------------ the following are deprecated -------------- + +class TextClipVectorPool(VectorPool): + def __init__(self, config): + from transformers import AutoConfig + hidden_size = AutoConfig.from_pretrained( + config.dataset.bert_name).hidden_size + retriever_cls = getattr(retri, config.retriever_cls) + self.build_retriver(retriever_cls, hidden_size) + + def __call__(self, sample, **kwargs): + clip_meta = sample["clip_meta"].cpu() + assert torch.all(torch.le(clip_meta[:, 4], clip_meta[:, 5])) + text_meta = [tuple(item.tolist()) for item in clip_meta[:, 3:]] + + if hasattr(self, "retriver"): + # build_retriver is called. + self.retriver.add( + sample["pooled_text"].cpu().numpy().astype("float32"), + text_meta + ) + else: + raise NotImplementedError + + +class MMClipVectorPool(VectorPool): + """ + Multimodal Clip-level vector pool. + """ + def __init__(self, out_dir): + """use hidden_states to store `(video, text)`.""" + """use video_ids to store `(video_id, start, end)`.""" + super().__init__(out_dir) + + def __call__(self, sample, **kwargs): + pooled_video = sample["pooled_video"].cpu().unsqueeze(1).numpy() + pooled_text = sample["pooled_text"].cpu().unsqueeze(1).numpy() + + self.hidden_states.append( + np.concatenate([pooled_video, pooled_text], axis=1) + ) + + video_starts = sample["video_start"].cpu() + video_ends = sample["video_end"].cpu() + assert torch.all(torch.le(video_starts, video_ends)) + + text_starts = sample["text_start"].cpu() + text_ends = sample["text_end"].cpu() + assert torch.all(torch.le(text_starts, text_ends)) + subsample_size = sample["pooled_video"].size(0) // len(sample["video_id"]) + video_ids = [video_id for video_id in sample["video_id"] + for _ in range(subsample_size) + ] + for video_id, video_start, video_end, text_start, text_end in zip( + video_ids, video_starts, video_ends, text_starts, text_ends): + self.video_ids.append(( + video_id, + (int(video_start), int(video_end)), + (int(text_start), int(text_end)) + )) diff --git a/fairseq/examples/MMPT/mmpt/processors/__init__.py b/fairseq/examples/MMPT/mmpt/processors/__init__.py new file mode 100644 index 0000000..434d1d9 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .processor import * + +from .how2processor import * +from .how2retriprocessor import * + +from .dsprocessor import * + +try: + from .rawvideoprocessor import * + from .codecprocessor import * + from .webvidprocessor import * + from .expprocessor import * + from .exphow2processor import * + from .exphow2retriprocessor import * + from .expcodecprocessor import * + from .expfeatureencoder import * + from .expdsprocessor import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/processors/dedupprocessor.py b/fairseq/examples/MMPT/mmpt/processors/dedupprocessor.py new file mode 100644 index 0000000..8a1ad40 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/dedupprocessor.py @@ -0,0 +1,242 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import random +import json +import pickle +from tqdm import tqdm +import os +import numpy as np + + +class CaptionDedupProcessor(object): + """remove overlapping of caption sentences(clip). + Some statistics: + caption: + {'t_clip_len': 246.6448431320854, + 'video_len': 281.09174795676245, + 'clip_tps': 0.8841283727427481, + 'video_tps': 0.7821156477732097, + 'min_clip_len': 0.0, + 'max_clip_len': 398.3, + 'mean_clip_len': 3.196580003006861, + 'num_clip': 77.15897706301081} + + raw_caption: + {'t_clip_len': 238.95908778424115, + 'video_len': 267.5914859862507, + 'clip_tps': 2.4941363624267963, + 'video_tps': 2.258989769647173, + 'min_clip_len': 0.0, + 'max_clip_len': 398.3, + 'mean_clip_len': 3.0537954186814265, + 'num_clip': 78.24986779481756} + """ + + def __init__(self, pkl_file): + with open(pkl_file, "rb") as fd: + self.data = pickle.load(fd) + self.stat = { + "t_clip_len": [], + "video_len": [], + "clip_tps": [], + "video_tps": [], + "clip_len": [], + } + + def __call__(self): + for idx, video_id in enumerate(tqdm(self.data)): + caption = json.loads(self.data[video_id]) + caption = self._dedup(caption) + if idx < 4096: # for the first 4096 examples, compute the statistics. + self.save_stat(video_id, caption) + self.data[video_id] = json.dumps(caption) + self.print_stat() + + def single(self, video_id): + caption = json.loads(self.data[video_id]) + for clip_idx, (start, end, text) in enumerate( + zip(caption["start"], caption["end"], caption["text"]) + ): + print(start, end, text) + print("@" * 100) + caption = self._dedup(caption) + for clip_idx, (start, end, text) in enumerate( + zip(caption["start"], caption["end"], caption["text"]) + ): + print(start, end, text) + print("#" * 100) + self.save_stat(video_id, caption) + self.print_stat() + + def finalize(self, tgt_fn): + with open(tgt_fn, "wb") as fw: + pickle.dump(self.data, fw, pickle.HIGHEST_PROTOCOL) + + def save_stat(self, video_id, caption): + video_fn = os.path.join( + "data/feat/feat_how2_s3d", video_id + ".npy" + ) + if os.path.isfile(video_fn): + with open(video_fn, "rb", 1) as fr: # 24 is the buffer size. buffered + version = np.lib.format.read_magic(fr) + shape, fortran, dtype = np.lib.format._read_array_header(fr, version) + video_len = shape[0] + + t_clip_len = 0.0 + t_tokens = 0 + for idx, (start, end, text) in enumerate( + zip(caption["start"], caption["end"], caption["text"]) + ): + clip_len = ( + (end - max(caption["end"][idx - 1], start)) + if idx > 0 + else end - start + ) + t_clip_len += clip_len + t_tokens += len(text.split(" ")) + self.stat["clip_len"].append(clip_len) + self.stat["t_clip_len"].append(t_clip_len) + self.stat["video_len"].append(video_len) + self.stat["clip_tps"].append(t_tokens / t_clip_len) + self.stat["video_tps"].append(t_tokens / video_len) + + def print_stat(self): + result = { + "t_clip_len": np.mean(self.stat["t_clip_len"]), + "video_len": np.mean(self.stat["video_len"]), + "clip_tps": np.mean(self.stat["clip_tps"]), + "video_tps": np.mean(self.stat["video_tps"]), + "min_clip_len": min(self.stat["clip_len"]), + "max_clip_len": max(self.stat["clip_len"]), + "mean_clip_len": np.mean(self.stat["clip_len"]), + "num_clip": len(self.stat["clip_len"]) / len(self.stat["video_tps"]), + } + print(result) + + def _dedup(self, caption): + def random_merge(end_idx, start, end, text, starts, ends, texts): + if random.random() > 0.5: + # print(clip_idx, "[PARTIAL INTO PREV]", end_idx) + # overlapped part goes to the end of previous. + ends[-1] = max(ends[-1], start) # ? + rest_text = text[end_idx:].strip() + if rest_text: + starts.append(max(ends[-1], start)) + ends.append(max(end, starts[-1])) + texts.append(rest_text) + else: # goes to the beginning of the current. + # strip the previous. + left_text = texts[-1][:-end_idx].strip() + if left_text: + # print(clip_idx, "[PREV PARTIAL INTO CUR]", end_idx) + ends[-1] = min(ends[-1], start) + texts[-1] = left_text + else: + # print(clip_idx, "[PREV LEFT NOTHING ALL INTO CUR]", end_idx) + starts.pop(-1) + ends.pop(-1) + texts.pop(-1) + starts.append(start) + ends.append(end) + texts.append(text) + + starts, ends, texts = [], [], [] + for clip_idx, (start, end, text) in enumerate( + zip(caption["start"], caption["end"], caption["text"]) + ): + if not isinstance(text, str): + continue + text = text.replace("\n", " ").strip() + if len(text) == 0: + continue + starts.append(start) + ends.append(end) + texts.append(text) + break + + for clip_idx, (start, end, text) in enumerate( + zip( + caption["start"][clip_idx + 1:], + caption["end"][clip_idx + 1:], + caption["text"][clip_idx + 1:], + ) + ): + if not isinstance(text, str): + continue + text = text.replace("\n", " ").strip() + if len(text) == 0: + continue + + # print(clip_idx, texts[-5:]) + # print(clip_idx, start, end, text) + if texts[-1].endswith(text): # subset of prev caption -> merge + # print(clip_idx, "[MERGE INTO PREV]") + ends[-1] = max(ends[-1], end) + elif text.startswith(texts[-1]): # superset of prev caption -> merge + # print(clip_idx, "[PREV MERGE INTO CUR]") + texts[-1] = text + starts[-1] = min(starts[-1], start) + ends[-1] = max(ends[-1], end) + else: # overlapping or non-overlapping. + for end_idx in range(1, len(text) + 1): + if texts[-1].endswith(text[:end_idx]): + random_merge(end_idx, start, end, text, starts, ends, texts) + break + else: + starts.append(start) + ends.append(end) + texts.append(text) + + assert (ends[-1] + 0.001) >= starts[-1] and len( + texts[-1] + ) > 0, "{} {} {} <- {} {} {}, {} {} {}".format( + str(starts[-1]), + str(ends[-1]), + texts[-1], + caption["start"][clip_idx - 1], + caption["end"][clip_idx - 1], + caption["text"][clip_idx - 1], + str(start), + str(end), + text, + ) + + return {"start": starts, "end": ends, "text": texts} + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="dedup how2 caption") + parser.add_argument('--how2dir', default="data/how2") + args = parser.parse_args() + + raw_caption_json = os.path.join(args.how2dir, "raw_caption.json") + raw_caption_pickle = os.path.join(args.how2dir, "raw_caption.pkl") + raw_caption_dedup_pickle = os.path.join(args.how2dir, "raw_caption_dedup.pkl") + + def convert_to_pickle(src_fn, tgt_fn): + with open(src_fn) as fd: + captions = json.load(fd) + + for video_id in captions: + captions[video_id] = json.dumps(captions[video_id]) + + with open(tgt_fn, "wb") as fw: + pickle.dump(captions, fw, pickle.HIGHEST_PROTOCOL) + + if not os.path.isfile(raw_caption_pickle): + convert_to_pickle(raw_caption_json, raw_caption_pickle) + + deduper = CaptionDedupProcessor(raw_caption_pickle) + deduper() + deduper.finalize(raw_caption_dedup_pickle) + + """ + # demo + deduper = CaptionDedupProcessor("data/how2/raw_caption.pkl") + deduper.single("HfIeQ9pzL5U") + """ diff --git a/fairseq/examples/MMPT/mmpt/processors/dsprocessor.py b/fairseq/examples/MMPT/mmpt/processors/dsprocessor.py new file mode 100644 index 0000000..ecebf0e --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/dsprocessor.py @@ -0,0 +1,848 @@ +# Copyright (c) Facebook, Inc. All Rights Reserved + +""" +Processors for all downstream (ds) tasks. +""" + +import json +import os +import pickle +import random +import math +import numpy as np +import torch + +from collections import defaultdict + +from .processor import ( + MetaProcessor, + VideoProcessor, + TextProcessor, + Aligner, + MMAttentionMask2DProcessor, +) + +from .how2processor import TextGenerationProcessor + + +# ------------- A General Aligner for all downstream tasks----------------- + + +class DSAligner(Aligner): + """ + Downstream (DS) aligner shared by all datasets. + """ + + def __call__(self, video_id, video_feature, text_feature, wps=0.7): + # random sample a starting sec for video. + video_start = 0 + video_end = min(len(video_feature), self.max_video_len) + # the whole sequence is a single clip. + video_clips = {"start": [video_start], "end": [video_end]} + + text_feature = { + "cap": [text_feature], + "start": [video_start], + "end": [len(text_feature) / wps], + } + text_clip_indexs = [0] + + vfeats, vmasks = self._build_video_seq( + video_feature, video_clips + ) + caps, cmasks = self._build_text_seq( + text_feature, text_clip_indexs + ) + + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, + "vmasks": vmasks, + "video_id": video_id, + } + + +class NLGTextProcessor(TextProcessor): + """ + Also return the original text as ref. + """ + def __call__(self, text_id): + return super().__call__(text_id), text_id + + +class DSNLGAligner(DSAligner): + """extend with the capability of 2d mask for generation.""" + def __init__(self, config): + super().__init__(config) + self.attnmasker = MMAttentionMask2DProcessor() + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained( + self.bert_name, use_fast=self.use_fast, + bos_token="[CLS]", eos_token="[SEP]" + ) + self.tokenizer = tokenizer + self.bos_token_id = tokenizer.bos_token_id + self.eos_token_id = tokenizer.eos_token_id + self.textgen = TextGenerationProcessor(tokenizer) + + def __call__(self, video_id, video_feature, text_feature): + output = super().__call__(video_id, video_feature, text_feature[0]) + if self.split == "test": + # output.update({"ref": text_feature[1]}) + output.update({"ref": self.tokenizer.decode( + output["caps"], skip_special_tokens=True)}) + text_label = output["caps"] + cmasks = torch.BoolTensor([1] * text_label.size(0)) + caps = torch.LongTensor([ + self.cls_token_id, + self.sep_token_id, + self.bos_token_id]) + else: + caps, text_label = self.textgen(output["caps"]) + cmasks = output["cmasks"] + + attention_mask = self.attnmasker( + output["vmasks"], cmasks, "textgen") + + output.update({ + "caps": caps, + "cmasks": cmasks, + "text_label": text_label, + "attention_mask": attention_mask, + }) + return output + + +# -------------------- MSRVTT ------------------------ + + +class MSRVTTMetaProcessor(MetaProcessor): + """MSRVTT dataset. + reference: `howto100m/msrvtt_dataloader.py` + """ + + def __init__(self, config): + super().__init__(config) + import pandas as pd + data = pd.read_csv(self._get_split_path(config)) + # TODO: add a text1ka flag. + if config.split == "train" \ + and config.full_test_path is not None \ + and config.jsfusion_path is not None: + # add testing videos from full_test_path not used by jfusion. + additional_data = pd.read_csv(config.full_test_path) + jsfusion_data = pd.read_csv(config.jsfusion_path) + + for video_id in additional_data["video_id"]: + if video_id not in jsfusion_data["video_id"].values: + data = data.append( + {"video_id": video_id}, ignore_index=True) + + if config.dup is not None and config.split == "train": + data = data.append([data] * (config.dup - 1), ignore_index=True) + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + """slightly modify with if condition to combine train/test.""" + vid, sentence = None, None + vid = self.data["video_id"].values[idx] + if "sentence" in self.data: # for testing. + sentence = self.data["sentence"].values[idx] + else: # for training. + sentence = vid + return vid, sentence + + +class MSRVTTTextProcessor(TextProcessor): + """MSRVTT dataset. + reference: `msrvtt_dataloader.py` `MSRVTT_TrainDataLoader`. + TODO (huxu): add max_words. + """ + + def __init__(self, config): + super().__init__(config) + self.sentences = None + if config.json_path is not None and config.split == "train": + with open(config.json_path) as fd: + self.data = json.load(fd) + self.sentences = defaultdict(list) + for s in self.data["sentences"]: + self.sentences[s["video_id"]].append(s["caption"]) + + def __call__(self, text_id): + if self.sentences is not None: + rind = random.randint(0, len(self.sentences[text_id]) - 1) + sentence = self.sentences[text_id][rind] + else: + sentence = text_id + caption = self.tokenizer(sentence, add_special_tokens=False) + return caption["input_ids"] + + +class MSRVTTNLGTextProcessor(MSRVTTTextProcessor): + """TODO: change dsaligner and merge to avoid any NLG text processor.""" + def __call__(self, text_id): + if self.sentences is not None: + rind = random.randint(0, len(self.sentences[text_id]) - 1) + sentence = self.sentences[text_id][rind] + else: + sentence = text_id + caption = self.tokenizer(sentence, add_special_tokens=False) + return caption["input_ids"], sentence + + +class MSRVTTQAMetaProcessor(MetaProcessor): + """MSRVTT-QA: retrieval-based multi-choice QA from JSFusion dataset. + For simplicity, we use the train retrieval model. + reference: `https://github.com/yj-yu/lsmdc` + """ + + def __init__(self, config): + super().__init__(config) + import pandas as pd + csv_data = pd.read_csv(self._get_split_path(config), sep="\t") + data = [] + for video_id, a1, a2, a3, a4, a5, answer in zip( + csv_data["vid_key"].values, + csv_data["a1"].values, + csv_data["a2"].values, + csv_data["a3"].values, + csv_data["a4"].values, + csv_data["a5"].values, + csv_data["answer"].values): + video_id = video_id.replace("msr", "video") + data.append((video_id, (answer, [a1, a2, a3, a4, a5]))) + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + + +class MSRVTTQATextProcessor(TextProcessor): + """MSRVTT-QA dataset. + text_ans is of format `(answer, [a1, a2, a3, a4, a5])`. + """ + + def __call__(self, text_ans): + for ans_idx, ans in enumerate(text_ans[1]): + if isinstance(ans, str): + text_ans[1][ans_idx] = self.tokenizer(ans, add_special_tokens=False)["input_ids"] + return text_ans + + +class MSRVTTQAAligner(DSAligner): + """MSRVTT dataset. + similar to sample in how2. + we call __call__ multiple times. + """ + + def __call__(self, video_id, video_feature, text_feature, wps=0.7): + caps = [] + cmasks = [] + answer = text_feature[0] + for ans_idx, _text_feature in enumerate(text_feature[1]): + output = super().__call__( + video_id, video_feature, _text_feature, wps) + caps.append(output["caps"]) + cmasks.append(output["cmasks"]) + output.update({ + "caps": torch.stack(caps), + "cmasks": torch.stack(cmasks), + "answers": torch.LongTensor([answer]), + }) + return output + + +# -------------------- Youcook ----------------------- + + +class YoucookMetaProcessor(MetaProcessor): + """Youcook dataset. + reference: `howto100m/youcook_dataloader.py` + note that the data can be different as the + (1) some videos already in Howto100m are removed. + (2) stop words are removed from caption + TODO (huxu): make a flag to load the original caption. + (see youcookii_annotations_trainval.json). + + The max_video_len can be 264 and text can be 64 tokens. + In reality we may not need that long. see projects/task/youcook.yaml + """ + + def __init__(self, config): + super().__init__(config) + vfeat_dir = config.vfeat_dir + print(self._get_split_path(config)) + with open(self._get_split_path(config), "rb") as fd: + data = pickle.load(fd) + all_valid_video_ids = set( + [os.path.splitext(fn)[0] for fn in os.listdir(vfeat_dir)] + ) + recs = [] + video_ids = set() + valid_video_ids = set() + for rec in data: # filter videos not available. + udl_idx = rec["id"].rindex("_") + video_id = rec["id"][:udl_idx] + video_ids.add(video_id) + if video_id in all_valid_video_ids: + valid_video_ids.add(video_id) + recs.append(rec) + print("total video_ids in .pkl", len(video_ids)) + print("valid video_ids in .pkl", len(valid_video_ids)) + print("please verify {train,val}_list.txt") + data = recs + self.data = data + + with open(config.trainval_annotation) as fd: + self.youcook_annotation = json.load(fd)["database"] + if config.use_annotation_text is True: + print("using text in annotation.") + self.use_annotation_caption = True + else: + self.use_annotation_caption = False + + def __getitem__(self, idx): + def _get_video_and_caption(rec): + vid = rec["id"] + udl_idx = vid.rindex("_") + video_id, clip_id = vid[:udl_idx], int(vid[udl_idx + 1:]) + clip = self.youcook_annotation[video_id]["annotations"][clip_id] + start, end = clip["segment"] + if self.use_annotation_caption: + caption = clip["sentence"] + else: + caption = rec["caption"] + return (video_id, start, end), caption + + rec = self.data[idx] + video_info, text_info = _get_video_and_caption(rec) + return video_info, text_info + + +class YoucookVideoProcessor(VideoProcessor): + """video_fn is a tuple of (video_id, start, end) now.""" + + def __call__(self, video_fn): + video_id, start, end = video_fn + feat = np.load(os.path.join(self.vfeat_dir, video_id + ".npy")) + return feat[start:end] + + +class YoucookNLGMetaProcessor(MetaProcessor): + """NLG uses the original split: + `train_list.txt` and `val_list.txt` + """ + + def __init__(self, config): + super().__init__(config) + vfeat_dir = config.vfeat_dir + print(self._get_split_path(config)) + with open(self._get_split_path(config)) as fd: + video_ids = [ + line.strip().split("/")[1] for line in fd.readlines()] + print("total video_ids in train/val_list.txt", len(video_ids)) + + all_valid_video_ids = set( + [os.path.splitext(fn)[0] for fn in os.listdir(vfeat_dir)] + ) + video_ids = [ + video_id for video_id in video_ids + if video_id in all_valid_video_ids] + + print("valid video_ids in train/val_list.txt", len(video_ids)) + with open(config.trainval_annotation) as fd: + self.youcook_annotation = json.load(fd)["database"] + + data = [] + for video_id in video_ids: + for clip in self.youcook_annotation[video_id]["annotations"]: + start, end = clip["segment"] + caption = clip["sentence"] + data.append(((video_id, start, end), caption)) + self.data = data + + def __getitem__(self, idx): + return self.data[idx] + + +# --------------------- CrossTask ------------------------- + +class CrossTaskMetaProcessor(MetaProcessor): + def __init__(self, config): + super().__init__(config) + np.random.seed(0) # deterministic random split. + task_vids = self._get_vids( + config.train_csv_path, + config.vfeat_dir, + config.annotation_path) + + val_vids = self._get_vids( + config.val_csv_path, + config.vfeat_dir, + config.annotation_path) + + # filter out those task and vids appear in val_vids. + task_vids = { + task: [ + vid for vid in vids + if task not in val_vids or vid not in val_vids[task]] + for task, vids in task_vids.items()} + + primary_info = self._read_task_info(config.primary_path) + test_tasks = set(primary_info['steps'].keys()) + + # if args.use_related: + related_info = self._read_task_info(config.related_path) + task_steps = {**primary_info['steps'], **related_info['steps']} + n_steps = {**primary_info['n_steps'], **related_info['n_steps']} + # else: + # task_steps = primary_info['steps'] + # n_steps = primary_info['n_steps'] + all_tasks = set(n_steps.keys()) + # filter and keep task in primary or related. + task_vids = { + task: vids for task, vids in task_vids.items() + if task in all_tasks} + # vocab-by-step matrix (A) and vocab (M) + # (huxu): we do not use BoW. + # A, M = self._get_A(task_steps, share="words") + + train_vids, test_vids = self._random_split( + task_vids, test_tasks, config.n_train) + print("train_num_videos", sum(len(vids) for vids in train_vids.values())) + print("test_num_videos", sum(len(vids) for vids in test_vids.values())) + # added by huxu to automatically determine the split. + split_map = { + "train": train_vids, + "valid": test_vids, + "test": test_vids + } + task_vids = split_map[config.split] + + self.vids = [] + for task, vids in task_vids.items(): + self.vids.extend([(task, vid) for vid in vids]) + self.task_steps = task_steps + self.n_steps = n_steps + + def __getitem__(self, idx): + task, vid = self.vids[idx] + n_steps = self.n_steps[task] + steps = self.task_steps[task] + assert len(steps) == n_steps + return (task, vid, steps, n_steps), (task, vid, steps, n_steps) + + def __len__(self): + return len(self.vids) + + def _random_split(self, task_vids, test_tasks, n_train): + train_vids = {} + test_vids = {} + for task, vids in task_vids.items(): + if task in test_tasks and len(vids) > n_train: + train_vids[task] = np.random.choice( + vids, n_train, replace=False).tolist() + test_vids[task] = [ + vid for vid in vids if vid not in train_vids[task]] + else: + train_vids[task] = vids + return train_vids, test_vids + + def _get_vids(self, path, vfeat_dir, annotation_path): + """refactored from + https://github.com/DmZhukov/CrossTask/blob/master/data.py + changes: add `vfeat_dir` to check if the video is available. + add `annotation_path` to check if the video is available. + """ + + task_vids = {} + with open(path, 'r') as f: + for line in f: + task, vid, url = line.strip().split(',') + # double check the video is available. + if not os.path.exists( + os.path.join(vfeat_dir, vid + ".npy")): + continue + # double check the annotation is available. + if not os.path.exists(os.path.join( + annotation_path, + task + "_" + vid + ".csv")): + continue + if task not in task_vids: + task_vids[task] = [] + task_vids[task].append(vid) + return task_vids + + def _read_task_info(self, path): + titles = {} + urls = {} + n_steps = {} + steps = {} + with open(path, 'r') as f: + idx = f.readline() + while idx != '': + idx = idx.strip() + titles[idx] = f.readline().strip() + urls[idx] = f.readline().strip() + n_steps[idx] = int(f.readline().strip()) + steps[idx] = f.readline().strip().split(',') + next(f) + idx = f.readline() + return { + 'title': titles, + 'url': urls, + 'n_steps': n_steps, + 'steps': steps + } + + def _get_A(self, task_steps, share="words"): + raise ValueError("running get_A is not allowed for BERT.") + """Step-to-component matrices.""" + if share == 'words': + # share words + task_step_comps = { + task: [step.split(' ') for step in steps] + for task, steps in task_steps.items()} + elif share == 'task_words': + # share words within same task + task_step_comps = { + task: [[task+'_'+tok for tok in step.split(' ')] for step in steps] + for task, steps in task_steps.items()} + elif share == 'steps': + # share whole step descriptions + task_step_comps = { + task: [[step] for step in steps] for task, steps in task_steps.items()} + else: + # no sharing + task_step_comps = { + task: [[task+'_'+step] for step in steps] + for task, steps in task_steps.items()} + # BERT tokenizer here? + vocab = [] + for task, steps in task_step_comps.items(): + for step in steps: + vocab.extend(step) + vocab = {comp: m for m, comp in enumerate(set(vocab))} + M = len(vocab) + A = {} + for task, steps in task_step_comps.items(): + K = len(steps) + a = torch.zeros(M, K) + for k, step in enumerate(steps): + a[[vocab[comp] for comp in step], k] = 1 + a /= a.sum(dim=0) + A[task] = a + return A, M + + +class CrossTaskVideoProcessor(VideoProcessor): + def __call__(self, video_fn): + task, vid, steps, n_steps = video_fn + video_fn = os.path.join(self.vfeat_dir, vid + ".npy") + feat = np.load(video_fn) + return feat + + +class CrossTaskTextProcessor(TextProcessor): + def __call__(self, text_id): + task, vid, steps, n_steps = text_id + step_ids = [] + for step_str in steps: + step_ids.append( + self.tokenizer(step_str, add_special_tokens=False)["input_ids"] + ) + return step_ids + + +class CrossTaskAligner(Aligner): + """ + TODO: it's not clear yet the formulation of the task; finish this later. + """ + def __init__(self, config): + super().__init__(config) + self.annotation_path = config.annotation_path + self.sliding_window = config.sliding_window + self.sliding_window_size = config.sliding_window_size + + def __call__(self, video_id, video_feature, text_feature): + task, vid, steps, n_steps = video_id + annot_path = os.path.join( + self.annotation_path, task + '_' + vid + '.csv') + video_len = len(video_feature) + + labels = torch.from_numpy(self._read_assignment( + video_len, n_steps, annot_path)).float() + + vfeats, vmasks, targets = [], [], [] + # sliding window on video features and targets. + for window_start in range(0, video_len, self.sliding_window): + video_start = 0 + video_end = min(video_len - window_start, self.sliding_window_size) + video_clip = {"start": [video_start], "end": [video_end]} + + vfeat, vmask = self._build_video_seq( + video_feature[window_start: window_start + video_end], + video_clip + ) + + target = labels[window_start: window_start + video_end] + assert len(vfeat) >= len(target), "{},{}".format(len(vfeat), len(target)) + # TODO: randomly drop all zero targets for training ? + # if self.split == "train" and target.sum() == 0: + # continue + vfeats.append(vfeat) + vmasks.append(vmask) + targets.append(target) + + if (video_len - window_start) <= self.sliding_window_size: + break + + vfeats = torch.stack(vfeats) + vmasks = torch.stack(vmasks) + targets = torch.cat(targets, dim=0) + + caps, cmasks = [], [] + for step in text_feature: + step_text_feature = {"start": [0], "end": [1], "cap": [step]} + step_text_clip_index = [0] + cap, cmask = self._build_text_seq( + step_text_feature, step_text_clip_index + ) + caps.append(cap) + cmasks.append(cmask) + caps = torch.stack(caps) + cmasks = torch.stack(cmasks) + + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, # X for original code. + "vmasks": vmasks, + "targets": targets, + "video_id": vid, + "task": task, + "video_len": video_len # for later checking. + } + + def _read_assignment(self, T, K, path): + """ + refactored from https://github.com/DmZhukov/CrossTask/blob/master/data.py + Howto interpret contraints on loss that is going to be minimized: + lambd is a big number; + self.lambd * C is a big number for all valid position (csv stores invalids) + + def forward(self, O, Y, C): + return (Y*(self.lambd * C - self.lsm(O))).mean(dim=0).sum() + + This will load the csv file and fill-in the step col from start to end rows. + """ + + Y = np.zeros([T, K], dtype=np.uint8) + with open(path, 'r') as f: + for line in f: + step, start, end = line.strip().split(',') + start = int(math.floor(float(start))) + end = int(math.ceil(float(end))) + step = int(step) - 1 + Y[start:end, step] = 1 + return Y + + +# --------------------- COIN ------------------------- + +class MetaTextBinarizer(Aligner): + def __call__(self, text_feature): + text_feature = { + "cap": [text_feature], + "start": [0.], + "end": [100.], + } + text_clip_indexs = [0] + + caps, cmasks = self._build_text_seq( + text_feature, text_clip_indexs + ) + return {"caps": caps, "cmasks": cmasks} + + +class COINActionSegmentationMetaProcessor(MetaProcessor): + split_map = { + "train": "training", + "valid": "testing", + "test": "testing", + } + + def __init__(self, config): + super().__init__(config) + with open(self._get_split_path(config)) as fr: + database = json.load(fr)["database"] + id2label = {} + data = [] + # filter the data by split. + for video_id, rec in database.items(): + # always use testing to determine label_set + if rec["subset"] == "testing": + for segment in rec["annotation"]: + id2label[int(segment["id"])] = segment["label"] + # text_labels is used for ZS setting + self.text_labels = ["none"] * len(id2label) + for label_id in id2label: + self.text_labels[label_id-1] = id2label[label_id] + + id2label[0] = "O" + print("num of labels", len(id2label)) + + for video_id, rec in database.items(): + if not os.path.isfile(os.path.join(config.vfeat_dir, video_id + ".npy")): + continue + if rec["subset"] == COINActionSegmentationMetaProcessor.split_map[self.split]: + starts, ends, labels = [], [], [] + for segment in rec["annotation"]: + start, end = segment["segment"] + label = int(segment["id"]) + starts.append(start) + ends.append(end) + labels.append(label) + data.append( + (video_id, {"start": starts, "end": ends, "label": labels})) + self.data = data + + def meta_text_labels(self, config): + from transformers import default_data_collator + from ..utils import get_local_rank + + text_processor = TextProcessor(config) + binarizer = MetaTextBinarizer(config) + # TODO: add prompts to .yaml. + text_labels = [label for label in self.text_labels] + + if get_local_rank() == 0: + print(text_labels) + + outputs = [] + for text_label in text_labels: + text_feature = text_processor(text_label) + outputs.append(binarizer(text_feature)) + return default_data_collator(outputs) + + def __getitem__(self, idx): + return self.data[idx] + + +class COINActionSegmentationTextProcessor(TextProcessor): + def __call__(self, text_label): + return text_label + + +class COINActionSegmentationAligner(Aligner): + def __init__(self, config): + super().__init__(config) + self.sliding_window = config.sliding_window + self.sliding_window_size = config.sliding_window_size + + def __call__(self, video_id, video_feature, text_feature): + starts, ends, label_ids = text_feature["start"], text_feature["end"], text_feature["label"] + # sliding window. + video_len = len(video_feature) + + vfeats, vmasks, targets = [], [], [] + # sliding window on video features and targets. + for window_start in range(0, video_len, self.sliding_window): + video_start = 0 + video_end = min(video_len - window_start, self.sliding_window_size) + video_clip = {"start": [video_start], "end": [video_end]} + vfeat, vmask = self._build_video_seq( + video_feature[window_start: window_start + video_end], + video_clip + ) + # covers video length only. + target = torch.full_like(vmask, -100, dtype=torch.long) + target[vmask] = 0 + for start, end, label_id in zip(starts, ends, label_ids): + if (window_start < end) and (start < (window_start + video_end)): + start_offset = max(0, math.floor(start) - window_start) + end_offset = min(video_end, math.ceil(end) - window_start) + target[start_offset:end_offset] = label_id + vfeats.append(vfeat) + vmasks.append(vmask) + targets.append(target) + if (video_len - window_start) <= self.sliding_window_size: + break + + vfeats = torch.stack(vfeats) + vmasks = torch.stack(vmasks) + targets = torch.stack(targets) + video_targets = torch.full((video_len,), 0) + for start, end, label_id in zip(starts, ends, label_ids): + start_offset = max(0, math.floor(start)) + end_offset = min(video_len, math.ceil(end)) + video_targets[start_offset:end_offset] = label_id + + caps = torch.LongTensor( + [[self.cls_token_id, self.sep_token_id, + self.pad_token_id, self.sep_token_id]], + ).repeat(vfeats.size(0), 1) + cmasks = torch.BoolTensor( + [[0, 1, 0, 1]] # pad are valid for attention. + ).repeat(vfeats.size(0), 1) + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, # X for original code. + "vmasks": vmasks, + "targets": targets, + "video_id": video_id, + "video_len": video_len, # for later checking. + "video_targets": video_targets + } + + +class DiDeMoMetaProcessor(MetaProcessor): + """reference: https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/eval.py + https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/data_processing.py + """ + def __init__(self, config): + super().__init__(config) + + assert "test" in self._get_split_path(config), "DiDeMo only supports zero-shot testing for now." + + with open(self._get_split_path(config)) as data_file: + json_data = json.load(data_file) + + data = [] + for record in json_data: + data.append((record["video"], record["description"])) + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + + +class DiDeMoTextProcessor(TextProcessor): + """reference: https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/eval.py + https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/data_processing.py + """ + + def __call__(self, text): + return self.tokenizer(text, add_special_tokens=False)["input_ids"] + + +class DiDeMoAligner(DSAligner): + """ + check video length. + """ + + def __call__(self, video_id, video_feature, text_feature): + # print(video_feature.shape[0]) + return super().__call__(video_id, video_feature, text_feature) diff --git a/fairseq/examples/MMPT/mmpt/processors/how2processor.py b/fairseq/examples/MMPT/mmpt/processors/how2processor.py new file mode 100644 index 0000000..bed2168 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/how2processor.py @@ -0,0 +1,887 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright (c) Facebook, Inc. All Rights Reserved + + +import torch +import math +import pickle +import random +import os +import numpy as np + +from collections import deque +from typing import Optional, Tuple, List +from .processor import ( + Processor, + MetaProcessor, + TextProcessor, + Aligner, + MMAttentionMask2DProcessor +) + +from ..utils import ShardedTensor + + +class How2MetaProcessor(MetaProcessor): + def __init__(self, config): + super().__init__(config) + path = self._get_split_path(config) + with open(path) as fd: + self.data = [line.strip() for line in fd] + + def __getitem__(self, idx): + video_id = self.data[idx] + return video_id, video_id + + +class ShardedHow2MetaProcessor(How2MetaProcessor): + def __init__(self, config): + super().__init__(config) + self.split = str(config.split) + self.vfeat_dir = config.vfeat_dir + self._init_shard() + + def _init_shard(self): + if self.split == "train": + meta_fn = os.path.join(self.vfeat_dir, "train" + "_meta.pkl") + with open(meta_fn, "rb") as fr: + meta = pickle.load(fr) + elif self.split == "valid": + meta_fn = os.path.join(self.vfeat_dir, "val" + "_meta.pkl") + with open(meta_fn, "rb") as fr: + meta = pickle.load(fr) + elif self.split == "test": + print("use how2 val as test.") + meta_fn = os.path.join(self.vfeat_dir, "val" + "_meta.pkl") + with open(meta_fn, "rb") as fr: + meta = pickle.load(fr) + else: + raise ValueError("unsupported for MetaProcessor:", self.split) + video_id_to_shard = {} + for shard_id in meta: + for video_idx, video_id in enumerate(meta[shard_id]): + video_id_to_shard[video_id] = (shard_id, video_idx) + self.video_id_to_shard = video_id_to_shard + + def __getitem__(self, idx): + video_id, video_id = super().__getitem__(idx) + shard_id, shard_idx = self.video_id_to_shard[video_id] + meta = (video_id, idx, shard_id, shard_idx) + return meta, meta + + +class ShardedVideoProcessor(Processor): + """ + mmaped shards of numpy video features. + """ + + def __init__(self, config): + self.split = str(config.split) + self.vfeat_dir = config.vfeat_dir + + def __call__(self, video_id): + _, _, shard_id, video_idx = video_id + if self.split == "train": + shard = ShardedTensor.load( + os.path.join(self.vfeat_dir, "train" + "_" + str(shard_id)), + "r" + ) + elif self.split == "valid": + shard = ShardedTensor.load( + os.path.join(self.vfeat_dir, "val" + "_" + str(shard_id)), + "r" + ) + elif self.split == "test": + shard = ShardedTensor.load( + os.path.join(self.vfeat_dir, "val" + "_" + str(shard_id)), + "r" + ) + else: + raise ValueError("unknown split", self.split) + feat = shard[video_idx] + return feat + + +class ShardedTextProcessor(Processor): + def __init__(self, config): + self.tfeat_dir = str(config.tfeat_dir) + self.split = str(config.split) + + def __call__(self, video_id): + _, _, shard_id, shard_idx = video_id + if self.split == "train": + target_path = self.tfeat_dir + "train" + "_" + str(shard_id) + elif self.split == "valid": + target_path = self.tfeat_dir + "val" + "_" + str(shard_id) + elif self.split == "test": + target_path = self.tfeat_dir + "val" + "_" + str(shard_id) + else: + raise ValueError("unknown split", self.split) + + startend = ShardedTensor.load( + target_path + ".startends", "r")[shard_idx] + cap_ids = ShardedTensor.load( + target_path + ".caps_ids", "r")[shard_idx] + cap = [] + for clip_idx in range(len(cap_ids)): + clip = cap_ids[clip_idx] + cap.append(clip[clip != -1].tolist()) + start, end = startend[:, 0].tolist(), startend[:, 1].tolist() + return {"start": start, "end": end, "cap": cap} + + +class FixedLenAligner(Aligner): + """ + In the model we assume text is on the left (closer to BERT formulation) + and video is on the right. + We fix the total length of text + video. + max_video_len is in number of secs. + max_text_len is in number of tokens. + + special tokens formats: + we use the format [CLS] [SEP] text tokens [SEP] [PAD] ... + [CLS] will be splitted out into: + [CLS] video tokens [SEP] text tokens [SEP] [PAD] ... + token_type_ids will be generated by the model (for now). + 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence | + so each sequence owns a [SEP] token for no-ops. + """ + + def __init__(self, config): + super().__init__(config) + self.text_clip_sampler = TextClipSamplingProcessor( + self.max_len - self.max_video_len - 3 + ) + """ + decide subsampling: + `config.subsampling` will change batch_size in trainer. + `config.clip_per_video` (used by RetriTask) doesn't + change batch_size in trainer. + """ + subsampling = config.subsampling \ + if config.subsampling is not None else None + if config.clip_per_video is not None: + subsampling = config.clip_per_video + self.subsampling = subsampling + + def _get_text_maxlen(self): + # use max text len + return self.text_clip_sampler.max_text_len + + def __call__(self, video_id, video_feature, text_feature): + from transformers import default_data_collator + video_idx = video_id[1] + if self.subsampling is not None and self.subsampling >= 1: + batch = [] + for _ in range(self.subsampling): + centerclip_idx = random.randint( + 0, len(text_feature["start"]) - 1) + batch.append( + self.sampling( + video_idx, + video_feature, + text_feature, + centerclip_idx, + self._get_text_maxlen() + )) + batch = self.batch_post_processing(batch, video_feature) + batch = default_data_collator(batch) + else: + raise ValueError( + "dataset.subsampling must be >= 1 for efficient video loading.") + batch = self.sampling(video_idx, video_feature, text_feature) + batch = self.batch_post_processing(batch, video_feature) + + batch["video_id"] = video_id if isinstance(video_id, str) \ + else video_id[0] + # e2e: make sure frame ids is into tensor. + assert torch.is_tensor(batch["vfeats"]) + return batch + + def sampling( + self, + video_idx, + video_feature, + text_feature, + centerclip_idx=None, + sampled_max_text_len=None, + ): + text_clip_indexs = self.text_clip_sampler( + text_feature, centerclip_idx, + sampled_max_text_len + ) + if isinstance(video_feature, np.ndarray): + video_len = len(video_feature) + else: + video_len = math.ceil(text_feature["end"][-1]) + + video_end = min( + math.ceil(text_feature["end"][text_clip_indexs[-1]]), + video_len + ) + video_start = max( + min( + math.floor(text_feature["start"][text_clip_indexs[0]]), + video_end), + 0 + ) + + video_clips = {"start": [video_start], "end": [video_end]} + + # tensorize. + vfeats, vmasks = self._build_video_seq( + video_feature, video_clips + ) + caps, cmasks = self._build_text_seq( + text_feature, text_clip_indexs + ) + + text_start = text_clip_indexs[0] + text_end = text_clip_indexs[-1] + 1 + + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, + "vmasks": vmasks, + "video_start": video_start, + "video_end": video_end, + "text_start": text_start, + "text_end": text_end, + } + + +class VariedLenAligner(FixedLenAligner): + def __init__(self, config): + super().__init__(config) + self.sampled_min_len = config.sampled_min_len + self.sampled_max_len = config.sampled_max_len + + def _get_text_maxlen(self): + return random.randint(self.sampled_min_len, self.sampled_max_len) + + +class StartClipAligner(VariedLenAligner): + def sampling( + self, + video_idx, + video_feature, + text_feature, + centerclip_idx=None, + sampled_max_text_len=None, + ): + return super().sampling( + video_idx, video_feature, text_feature, 0) + + +class OverlappedAligner(VariedLenAligner): + """video clip and text clip has overlappings + but may not be the same start/end.""" + def __init__(self, config): + super().__init__(config) + self.sampled_video_min_len = config.sampled_video_min_len + self.sampled_video_max_len = config.sampled_video_max_len + + self.video_clip_sampler = VideoClipSamplingProcessor() + + def _get_video_maxlen(self): + return random.randint( + self.sampled_video_min_len, self.sampled_video_max_len) + + def sampling( + self, + video_idx, + video_feature, + text_feature, + centerclip_idx=None, + sampled_max_text_len=None, + ): + text_clip_indexs = self.text_clip_sampler( + text_feature, centerclip_idx, + sampled_max_text_len + ) + if isinstance(video_feature, np.ndarray): + video_len = len(video_feature) + else: + video_len = math.ceil(text_feature["end"][-1]) + low = math.floor(text_feature["start"][text_clip_indexs[0]]) + high = math.ceil(text_feature["end"][text_clip_indexs[-1]]) + if low < high: + center = random.randint(low, high) + else: + center = int((low + high) // 2) + center = max(0, min(video_feature.shape[0] - 1, center)) + + assert 0 <= center < video_feature.shape[0] + + video_clips = self.video_clip_sampler( + video_len, self._get_video_maxlen(), center + ) + video_start = video_clips["start"][0] + video_end = video_clips["end"][0] + + # tensorize. + vfeats, vmasks = self._build_video_seq( + video_feature, video_clips + ) + caps, cmasks = self._build_text_seq( + text_feature, text_clip_indexs + ) + + text_start = text_clip_indexs[0] + text_end = text_clip_indexs[-1] + 1 + + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, + "vmasks": vmasks, + "video_start": video_start, + "video_end": video_end, + "text_start": text_start, + "text_end": text_end, + } + + +class MFMMLMAligner(FixedLenAligner): + """ + `FixedLenAligner` with Masked Language Model and Masked Frame Model. + """ + + def __init__(self, config): + super().__init__(config) + keep_prob = config.keep_prob if config.keep_prob is not None else 1.0 + self.text_clip_sampler = TextClipSamplingProcessor( + self.max_len - self.max_video_len - 3, keep_prob + ) + self.sampled_min_len = config.sampled_min_len + self.sampled_max_len = config.sampled_max_len + self.masked_token_sampler = TextMaskingProcessor(config) + self.mm_type = config.mm_type \ + if config.mm_type is not None else "full" + self.attnmasker = MMAttentionMask2DProcessor() \ + if self.mm_type == "textgen" else None + self.masked_frame_sampler = FrameMaskingProcessor(config) + self.lazy_vfeat_mask = ( + False if config.lazy_vfeat_mask is None else config.lazy_vfeat_mask + ) + self.mm_prob = config.mm_prob if config.mm_prob is not None else 0. + + def __call__(self, video_id, video_feature, text_feature): + from transformers import default_data_collator + if self.subsampling is not None and self.subsampling > 1: + batch = [] + for _ in range(self.subsampling): + centerclip_idx = random.randint( + 0, len(text_feature["start"]) - 1) + sampled_max_text_len = random.randint( + self.sampled_min_len, self.sampled_max_len + ) + batch.append( + self.sampling( + video_id, + video_feature, + text_feature, + centerclip_idx, + sampled_max_text_len, + ) + ) + batch = self.batch_post_processing(batch, video_feature) + batch = default_data_collator(batch) + else: + batch = self.sampling(video_id, video_feature, text_feature) + batch = self.batch_post_processing(batch, video_feature) + batch["video_id"] = video_id if isinstance(video_id, str) \ + else video_id[0] + return batch + + def sampling( + self, + video_id, + video_feature, + text_feature, + centerclip_idx=None, + sampled_max_text_len=None, + ): + output = FixedLenAligner.sampling(self, + video_id, video_feature, text_feature, + centerclip_idx, sampled_max_text_len) + + masking_text, masking_video = None, None + if random.random() < self.mm_prob: + if random.random() > 0.5: + masking_text, masking_video = self.mm_type, "no" + else: + masking_text, masking_video = "no", "full" + video_feats = output["vfeats"] if not self.lazy_vfeat_mask else None + video_label = self.masked_frame_sampler( + output["vmasks"], masking_video, vfeats=video_feats) + caps, text_label = self.masked_token_sampler( + output["caps"], masking_text) + + output.update({ + "caps": caps, + "video_label": video_label, + "text_label": text_label, + }) + + if self.attnmasker is not None: + attention_mask = self.attnmasker( + output["vmasks"], output["cmasks"], masking_text) + output.update({ + "attention_mask": attention_mask + }) + return output + + +class FrameMaskingProcessor(Processor): + def __init__(self, config): + self.mfm_probability = 0.15 + if config.mfm_probability is not None: + self.mfm_probability = config.mfm_probability + + def __call__(self, vmasks, modality_masking=None, vfeats=None): + """ + We perform lazy masking to save data transfer time. + It only generates video_labels by default and MFM model + will do actualy masking. + Return: `video_label` is a binary mask. + """ + video_label = vmasks.clone() + if modality_masking is not None: + if modality_masking == "full": + probability_matrix = torch.full(video_label.shape, 1.) + elif modality_masking == "no": + probability_matrix = torch.full(video_label.shape, 0.) + elif modality_masking == "inverse": + probability_matrix = torch.full( + video_label.shape, 1. - self.mfm_probability) + else: + raise ValueError("unknown modality masking.", modality_masking) + else: + probability_matrix = torch.full( + video_label.shape, self.mfm_probability) + masked_indices = torch.bernoulli(probability_matrix).bool() + # We only compute loss on masked tokens + video_label[~masked_indices] = 0 + if vfeats is not None: + vfeats[video_label, :] = 0.0 + return video_label + + +class TextGenerationProcessor(Processor): + def __init__(self, tokenizer): + self.bos_token_id = tokenizer.bos_token_id + self.pad_token_id = tokenizer.pad_token_id + + def __call__(self, inputs): + labels = inputs.clone() + # [CLS] [SEP] for video + labels[:2] = -100 + # keep [SEP] for text. + pad_mask = labels == self.pad_token_id + labels[pad_mask] = -100 + inputs[2:] = torch.cat([ + torch.LongTensor([self.bos_token_id]), + inputs[2:-1]]) + inputs[pad_mask] = self.pad_token_id + assert len(inputs) == len(labels) + return inputs, labels + + +class TextMaskingProcessor(Processor): + def __init__(self, config): + """this function is borrowed from + `transformers/data/data_collator.DataCollatorForLanguageModeling`""" + self.mlm_probability = 0.15 + if config.mlm_probability is not None: + self.mlm_probability = config.mlm_probability + self.bert_name = config.bert_name + # [CLS] is used as bos_token and [SEP] is used as eos_token. + # https://huggingface.co/transformers/master/model_doc/bertgeneration.html + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + self.bert_name, bos_token="[CLS]", eos_token="[SEP]") + self.textgen = TextGenerationProcessor(self.tokenizer) + + def __call__( + self, inputs: torch.Tensor, + modality_masking=None, + special_tokens_mask: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + expand modality_masking into + None: traditional bert masking. + "no": no masking. + "full": all [MASK] token for generation. + "gen": autoregressive generation. + """ + """ + Prepare masked tokens inputs/labels for masked language modeling: + 80% MASK, 10% random, 10% original. + """ + labels = inputs.clone() + # We sample a few tokens in each sequence for MLM training + # (with probability `self.mlm_probability`) + if modality_masking is not None: + if modality_masking == "full": + probability_matrix = torch.full(labels.shape, 1.) + elif modality_masking == "no": + probability_matrix = torch.full(labels.shape, 0.) + elif modality_masking.startswith("textgen"): + # [CLS] [SEP] ... + inputs, labels = self.textgen(inputs) + if "mask" not in modality_masking: + return inputs, labels + inputs = self.mask_input(inputs, special_tokens_mask) + return inputs, labels + elif modality_masking == "mask": + inputs = self.mask_input(inputs, special_tokens_mask) + labels = torch.full(inputs.shape, -100) + return inputs, labels + elif modality_masking == "inverse": + probability_matrix = torch.full(labels.shape, 1. - self.mlm_probability) + else: + raise ValueError("unknown modality masking.", modality_masking) + else: + probability_matrix = torch.full(labels.shape, self.mlm_probability) + + if special_tokens_mask is None: + special_tokens_mask = self.get_special_tokens_mask( + labels.tolist(), already_has_special_tokens=True + ) + special_tokens_mask = torch.tensor( + special_tokens_mask, dtype=torch.bool) + else: + special_tokens_mask = special_tokens_mask.bool() + + probability_matrix.masked_fill_(special_tokens_mask, value=0.0) + masked_indices = torch.bernoulli(probability_matrix).bool() + labels[~masked_indices] = -100 # We only compute loss on masked tokens + + # 80% of the time, + # we replace masked input tokens with tokenizer.mask_token ([MASK]) + indices_replaced = ( + torch.bernoulli( + torch.full(labels.shape, 0.8)).bool() & masked_indices + ) + inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids( + self.tokenizer.mask_token + ) + + # 10% of the time, we replace masked input tokens with random word + indices_random = ( + torch.bernoulli(torch.full(labels.shape, 0.5)).bool() + & masked_indices + & ~indices_replaced + ) + random_words = torch.randint( + len(self.tokenizer), labels.shape, dtype=torch.long + ) + inputs[indices_random] = random_words[indices_random] + + # The rest of the time (10% of the time) we keep the masked input + # tokens unchanged + return inputs, labels + + def mask_input(self, inputs, special_tokens_mask=None): + # the following is new with masked autoregressive. + probability_matrix = torch.full( + inputs.shape, self.mlm_probability) + if special_tokens_mask is None: + special_tokens_mask = self.get_special_tokens_mask( + inputs.tolist(), already_has_special_tokens=True + ) + special_tokens_mask = torch.tensor( + special_tokens_mask, dtype=torch.bool) + else: + special_tokens_mask = special_tokens_mask.bool() + probability_matrix.masked_fill_(special_tokens_mask, value=0.0) + masked_indices = torch.bernoulli(probability_matrix).bool() + indices_replaced = ( + torch.bernoulli( + torch.full(inputs.shape, 0.8)).bool() & masked_indices + ) + inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids( + self.tokenizer.mask_token + ) + + # 10% of the time, we replace masked input tokens with random word + indices_random = ( + torch.bernoulli(torch.full(inputs.shape, 0.5)).bool() + & masked_indices + & ~indices_replaced + ) + random_words = torch.randint( + len(self.tokenizer), inputs.shape, dtype=torch.long + ) + inputs[indices_random] = random_words[indices_random] + return inputs + + def get_special_tokens_mask( + self, token_ids_0: List[int], + token_ids_1: Optional[List[int]] = None, + already_has_special_tokens: bool = False + ) -> List[int]: + """ + Note: the version from transformers do not consider pad + as special tokens. + """ + + if already_has_special_tokens: + if token_ids_1 is not None: + raise ValueError( + "You should not supply a second sequence if" + "the provided sequence of " + "ids is already formated with special tokens " + "for the model." + ) + return list(map(lambda x: 1 if x in [ + self.tokenizer.sep_token_id, + self.tokenizer.cls_token_id, + self.tokenizer.pad_token_id] else 0, token_ids_0)) + + if token_ids_1 is not None: + return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] + return [1] + ([0] * len(token_ids_0)) + [1] + + +class TextClipSamplingProcessor(Processor): + def __init__(self, max_text_len, keep_prob=1.0): + self.max_text_len = max_text_len + self.max_video_len = 256 # always hold. + self.keep_prob = keep_prob + + def __call__( + self, + text_feature, + centerclip_idx=None, + sampled_max_text_len=None, + sampled_max_video_len=None, + ): + # Let's use all caps for now and see if 256 can cover all of them. + if sampled_max_text_len is not None: + max_text_len = sampled_max_text_len + else: + max_text_len = self.max_text_len + if sampled_max_video_len is not None: + max_video_len = sampled_max_video_len + else: + max_video_len = self.max_video_len + + t_num_clips = len(text_feature["start"]) + + if centerclip_idx is None: + centerclip_idx = random.randint(0, t_num_clips - 1) + + start_idx, end_idx = centerclip_idx, centerclip_idx + 1 + text_clip_indexs = deque() + text_clip_indexs.append(start_idx) + text_len = len(text_feature["cap"][start_idx]) + + video_len = max( + 0, + text_feature["end"][start_idx] + - text_feature["start"][start_idx], + ) + + while ( + (start_idx > 0 or end_idx < t_num_clips) + and text_len < max_text_len + and video_len < max_video_len + ): + if random.random() > 0.5 and end_idx < t_num_clips: + # skip the next one? + if random.random() > self.keep_prob and (end_idx + 1) < t_num_clips: + end_idx = end_idx + 1 + text_clip_indexs.append(end_idx) + text_len += len(text_feature["cap"][end_idx]) + end_idx += 1 + elif start_idx > 0: + if random.random() > self.keep_prob and (start_idx - 1) > 0: + start_idx = start_idx - 1 + start_idx -= 1 + text_clip_indexs.insert(0, start_idx) + text_len += len(text_feature["cap"][start_idx]) + else: + if end_idx < t_num_clips: + if random.random() > self.keep_prob and (end_idx + 1) < t_num_clips: + end_idx = end_idx + 1 + text_clip_indexs.append(end_idx) + text_len += len(text_feature["cap"][end_idx]) + end_idx += 1 + else: + return text_clip_indexs + video_len = max( + 0, + text_feature["end"][text_clip_indexs[-1]] + - text_feature["start"][text_clip_indexs[0]], + ) + return text_clip_indexs + + +class VideoClipSamplingProcessor(Processor): + def __call__(self, video_len, max_video_len, center): + """ + `video_len`: length of the video. + `max_video_len`: maximum video tokens allowd in a sequence. + `center`: initial starting index. + """ + assert center >= 0 and center < video_len + t_clip_len = 0 + start, end = center, center + while (start > 0 or end < video_len) and t_clip_len < max_video_len: + # decide the direction to grow. + if start <= 0: + end += 1 + elif end >= video_len: + start -= 1 + elif random.random() > 0.5: + end += 1 + else: + start -= 1 + t_clip_len += 1 + return {"start": [start], "end": [end]} + + +class How2MILNCEAligner(FixedLenAligner): + """reference: `antoine77340/MIL-NCE_HowTo100M/video_loader.py`""" + + def __init__(self, config): + super().__init__(config) + self.num_candidates = 4 + self.min_time = 5.0 + self.num_sec = 3.2 + # self.num_sec = self.num_frames / float(self.fps) num_frames=16 / fps = 5 + # self.num_frames = 16 + + def sampling( + self, + video_id, + video_feature, + text_feature, + centerclip_idx=None, # will be ignored. + sampled_max_text_len=None # will be ignored. + ): + text, start, end = self._get_text(text_feature) + video = self._get_video(video_feature, start, end) + + vfeats = torch.zeros((self.max_video_len, video_feature.shape[1])) + vmasks = torch.zeros((self.max_video_len,), dtype=torch.bool) + vfeats[: video.shape[0]] = torch.from_numpy(np.array(video)) + vmasks[: video.shape[0]] = 1 + + caps, cmasks = [], [] + for words in text: + cap, cmask = self._build_text_seq(text_feature, words) + caps.append(cap) + cmasks.append(cmask) + caps = torch.stack(caps) + cmasks = torch.stack(cmasks) + # video of shape: (video_len) + # text of shape (num_candidates, max_text_len) + + return { + "caps": caps, + "cmasks": cmasks, + "vfeats": vfeats, + "vmasks": vmasks, + # "video_id": video_id, + } + + def _get_video(self, video_feature, start, end): + start_seek = random.randint(start, int(max(start, end - self.num_sec))) + # duration = self.num_sec + 0.1 + return video_feature[start_seek : int(start_seek + self.num_sec)] + + def _get_text(self, cap): + ind = random.randint(0, len(cap["start"]) - 1) + if self.num_candidates == 1: + words = [ind] + else: + words = [] + cap_start = self._find_nearest_candidates(cap, ind) + for i in range(self.num_candidates): + words.append([max(0, min(len(cap["cap"]) - 1, cap_start + i))]) + + start, end = cap["start"][ind], cap["end"][ind] + # TODO: May need to be improved for edge cases. + # expand the min time. + if end - start < self.min_time: + diff = self.min_time - end + start + start = max(0, start - diff / 2) + end = start + self.min_time + return words, int(start), int(end) + + def _find_nearest_candidates(self, caption, ind): + """find the range of the clips.""" + start, end = ind, ind + #diff = caption["end"][end] - caption["start"][start] + n_candidate = 1 + while n_candidate < self.num_candidates: + # the first clip + if start == 0: + return 0 + # we add () in the following condition to fix the bug. + elif end == (len(caption["start"]) - 1): + return start - (self.num_candidates - n_candidate) + elif (caption["end"][end] - caption["start"][start - 1]) < ( + caption["end"][end + 1] - caption["start"][start] + ): + start -= 1 + else: + end += 1 + n_candidate += 1 + return start + + +class PKLJSONStrTextProcessor(TextProcessor): + """`caption.json` from howto100m are preprocessed as a + dict `[video_id, json_str]`. + Json parsing tokenization are conducted on-the-fly and cached into dict. + """ + + def __init__(self, config, max_clip_text_len=96): + print("[Warning] PKLJSONStrTextProcessor is slow for num_workers > 0.") + self.caption_pkl_path = str(config.caption_pkl_path) + with open(self.caption_pkl_path, "rb") as fd: + self.data = pickle.load(fd) + self.max_clip_text_len = max_clip_text_len + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + str(config.bert_name), use_fast=config.use_fast + ) + + def __call__(self, video_id): + caption = self.data[video_id] + if isinstance(caption, str): + import json + caption = json.loads(caption) + cap = [] + for clip_idx, text_clip in enumerate(caption["text"]): + clip_ids = [] + if isinstance(text_clip, str): + clip_ids = self.tokenizer( + text_clip[: self.max_clip_text_len], + add_special_tokens=False + )["input_ids"] + cap.append(clip_ids) + caption["cap"] = cap + caption.pop("text") # save space. + self.data[video_id] = caption + return caption diff --git a/fairseq/examples/MMPT/mmpt/processors/how2retriprocessor.py b/fairseq/examples/MMPT/mmpt/processors/how2retriprocessor.py new file mode 100644 index 0000000..b5a7730 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/how2retriprocessor.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .how2processor import ( + ShardedHow2MetaProcessor, + ShardedVideoProcessor, + ShardedTextProcessor, + VariedLenAligner, + OverlappedAligner +) + + +class ShardedHow2VideoRetriMetaProcessor(ShardedHow2MetaProcessor): + def __init__(self, config): + super().__init__(config) + self.num_video_per_batch = config.num_video_per_batch + self.cands = [ + self.data[batch_offset:batch_offset + self.num_video_per_batch] + for batch_offset in + range(0, (len(self.data) // (8 * self.num_video_per_batch)) * 8 * self.num_video_per_batch, self.num_video_per_batch)] + + def __len__(self): + return len(self.cands) + + def set_candidates(self, cands): + # no changes on num of batches. + print(len(self.cands), "->", len(cands)) + # assert len(self.cands) == len(cands) + self.cands = cands + + def __getitem__(self, idx): + video_ids = self.cands[idx] + assert isinstance(video_ids, list) + sharded_video_idxs = [] + for video_id in video_ids: + shard_id, video_idx = self.video_id_to_shard[video_id] + sharded_video_idxs.append((video_id, -1, shard_id, video_idx)) + return sharded_video_idxs, sharded_video_idxs + + +class ShardedVideoRetriVideoProcessor(ShardedVideoProcessor): + """In retrival case the video_id + is a list of tuples: `(shard_id, video_idx)` .""" + + def __call__(self, sharded_video_idxs): + assert isinstance(sharded_video_idxs, list) + cand_feats = [] + for shared_video_idx in sharded_video_idxs: + feat = super().__call__(shared_video_idx) + cand_feats.append(feat) + return cand_feats + + +class ShardedVideoRetriTextProcessor(ShardedTextProcessor): + """In retrival case the video_id + is a list of tuples: `(shard_id, video_idx)` .""" + + def __call__(self, sharded_video_idxs): + assert isinstance(sharded_video_idxs, list) + cand_caps = [] + for shared_video_idx in sharded_video_idxs: + caps = super().__call__(shared_video_idx) + cand_caps.append(caps) + return cand_caps + + +class VideoRetriAligner(VariedLenAligner): + # Retritask will trim dim-0. + def __call__(self, sharded_video_idxs, video_features, text_features): + from transformers import default_data_collator + batch, video_ids = [], [] + for video_id, video_feature, text_feature in \ + zip(sharded_video_idxs, video_features, text_features): + sub_batch = super().__call__(video_id, video_feature, text_feature) + batch.append(sub_batch) + if isinstance(video_id, tuple): + video_id = video_id[0] + video_ids.append(video_id) + batch = default_data_collator(batch) + batch["video_id"] = video_ids + return batch + + +class VideoRetriOverlappedAligner(OverlappedAligner): + # Retritask will trim dim-0. + def __call__(self, sharded_video_idxs, video_features, text_features): + from transformers import default_data_collator + batch, video_ids = [], [] + for video_id, video_feature, text_feature in \ + zip(sharded_video_idxs, video_features, text_features): + sub_batch = super().__call__(video_id, video_feature, text_feature) + batch.append(sub_batch) + if isinstance(video_id, tuple): + video_id = video_id[0] + video_ids.append(video_id) + batch = default_data_collator(batch) + batch["video_id"] = video_ids + return batch diff --git a/fairseq/examples/MMPT/mmpt/processors/models/s3dg.py b/fairseq/examples/MMPT/mmpt/processors/models/s3dg.py new file mode 100644 index 0000000..6c7a691 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/models/s3dg.py @@ -0,0 +1,336 @@ +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Contains a PyTorch definition for Gated Separable 3D network (S3D-G) +with a text module for computing joint text-video embedding from raw text +and video input. The following code will enable you to load the HowTo100M +pretrained S3D Text-Video model from: + A. Miech, J.-B. Alayrac, L. Smaira, I. Laptev, J. Sivic and A. Zisserman, + End-to-End Learning of Visual Representations from Uncurated Instructional Videos. + https://arxiv.org/abs/1912.06430. + +S3D-G was proposed by: + S. Xie, C. Sun, J. Huang, Z. Tu and K. Murphy, + Rethinking Spatiotemporal Feature Learning For Video Understanding. + https://arxiv.org/abs/1712.04851. + Tensorflow code: https://github.com/tensorflow/models/blob/master/research/slim/nets/s3dg.py + +The S3D architecture was slightly modified with a space to depth trick for TPU +optimization. +""" + +import torch as th +import torch.nn.functional as F +import torch.nn as nn +import os +import numpy as np +import re + + +class InceptionBlock(nn.Module): + def __init__( + self, + input_dim, + num_outputs_0_0a, + num_outputs_1_0a, + num_outputs_1_0b, + num_outputs_2_0a, + num_outputs_2_0b, + num_outputs_3_0b, + gating=True, + ): + super(InceptionBlock, self).__init__() + self.conv_b0 = STConv3D(input_dim, num_outputs_0_0a, [1, 1, 1]) + self.conv_b1_a = STConv3D(input_dim, num_outputs_1_0a, [1, 1, 1]) + self.conv_b1_b = STConv3D( + num_outputs_1_0a, num_outputs_1_0b, [3, 3, 3], padding=1, separable=True + ) + self.conv_b2_a = STConv3D(input_dim, num_outputs_2_0a, [1, 1, 1]) + self.conv_b2_b = STConv3D( + num_outputs_2_0a, num_outputs_2_0b, [3, 3, 3], padding=1, separable=True + ) + self.maxpool_b3 = th.nn.MaxPool3d((3, 3, 3), stride=1, padding=1) + self.conv_b3_b = STConv3D(input_dim, num_outputs_3_0b, [1, 1, 1]) + self.gating = gating + self.output_dim = ( + num_outputs_0_0a + num_outputs_1_0b + num_outputs_2_0b + num_outputs_3_0b + ) + if gating: + self.gating_b0 = SelfGating(num_outputs_0_0a) + self.gating_b1 = SelfGating(num_outputs_1_0b) + self.gating_b2 = SelfGating(num_outputs_2_0b) + self.gating_b3 = SelfGating(num_outputs_3_0b) + + def forward(self, input): + """Inception block + """ + b0 = self.conv_b0(input) + b1 = self.conv_b1_a(input) + b1 = self.conv_b1_b(b1) + b2 = self.conv_b2_a(input) + b2 = self.conv_b2_b(b2) + b3 = self.maxpool_b3(input) + b3 = self.conv_b3_b(b3) + if self.gating: + b0 = self.gating_b0(b0) + b1 = self.gating_b1(b1) + b2 = self.gating_b2(b2) + b3 = self.gating_b3(b3) + return th.cat((b0, b1, b2, b3), dim=1) + + +class SelfGating(nn.Module): + def __init__(self, input_dim): + super(SelfGating, self).__init__() + self.fc = nn.Linear(input_dim, input_dim) + + def forward(self, input_tensor): + """Feature gating as used in S3D-G. + """ + spatiotemporal_average = th.mean(input_tensor, dim=[2, 3, 4]) + weights = self.fc(spatiotemporal_average) + weights = th.sigmoid(weights) + return weights[:, :, None, None, None] * input_tensor + + +class STConv3D(nn.Module): + def __init__( + self, input_dim, output_dim, kernel_size, stride=1, padding=0, separable=False + ): + super(STConv3D, self).__init__() + self.separable = separable + self.relu = nn.ReLU(inplace=True) + assert len(kernel_size) == 3 + if separable and kernel_size[0] != 1: + spatial_kernel_size = [1, kernel_size[1], kernel_size[2]] + temporal_kernel_size = [kernel_size[0], 1, 1] + if isinstance(stride, list) and len(stride) == 3: + spatial_stride = [1, stride[1], stride[2]] + temporal_stride = [stride[0], 1, 1] + else: + spatial_stride = [1, stride, stride] + temporal_stride = [stride, 1, 1] + if isinstance(padding, list) and len(padding) == 3: + spatial_padding = [0, padding[1], padding[2]] + temporal_padding = [padding[0], 0, 0] + else: + spatial_padding = [0, padding, padding] + temporal_padding = [padding, 0, 0] + if separable: + self.conv1 = nn.Conv3d( + input_dim, + output_dim, + kernel_size=spatial_kernel_size, + stride=spatial_stride, + padding=spatial_padding, + bias=False, + ) + self.bn1 = nn.BatchNorm3d(output_dim) + self.conv2 = nn.Conv3d( + output_dim, + output_dim, + kernel_size=temporal_kernel_size, + stride=temporal_stride, + padding=temporal_padding, + bias=False, + ) + self.bn2 = nn.BatchNorm3d(output_dim) + else: + self.conv1 = nn.Conv3d( + input_dim, + output_dim, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=False, + ) + self.bn1 = nn.BatchNorm3d(output_dim) + + def forward(self, input): + out = self.relu(self.bn1(self.conv1(input))) + if self.separable: + out = self.relu(self.bn2(self.conv2(out))) + return out + + +class MaxPool3dTFPadding(th.nn.Module): + def __init__(self, kernel_size, stride=None, padding="SAME"): + super(MaxPool3dTFPadding, self).__init__() + if padding == "SAME": + padding_shape = self._get_padding_shape(kernel_size, stride) + self.padding_shape = padding_shape + self.pad = th.nn.ConstantPad3d(padding_shape, 0) + self.pool = th.nn.MaxPool3d(kernel_size, stride, ceil_mode=True) + + def _get_padding_shape(self, filter_shape, stride): + def _pad_top_bottom(filter_dim, stride_val): + pad_along = max(filter_dim - stride_val, 0) + pad_top = pad_along // 2 + pad_bottom = pad_along - pad_top + return pad_top, pad_bottom + + padding_shape = [] + for filter_dim, stride_val in zip(filter_shape, stride): + pad_top, pad_bottom = _pad_top_bottom(filter_dim, stride_val) + padding_shape.append(pad_top) + padding_shape.append(pad_bottom) + depth_top = padding_shape.pop(0) + depth_bottom = padding_shape.pop(0) + padding_shape.append(depth_top) + padding_shape.append(depth_bottom) + return tuple(padding_shape) + + def forward(self, inp): + inp = self.pad(inp) + out = self.pool(inp) + return out + + +class Sentence_Embedding(nn.Module): + def __init__( + self, + embd_dim, + num_embeddings=66250, + word_embedding_dim=300, + token_to_word_path="dict.npy", + max_words=16, + output_dim=2048, + ): + super(Sentence_Embedding, self).__init__() + self.word_embd = nn.Embedding(num_embeddings, word_embedding_dim) + self.fc1 = nn.Linear(word_embedding_dim, output_dim) + self.fc2 = nn.Linear(output_dim, embd_dim) + self.word_to_token = {} + self.max_words = max_words + token_to_word = np.load(token_to_word_path) + for i, t in enumerate(token_to_word): + self.word_to_token[t] = i + 1 + + def _zero_pad_tensor_token(self, tensor, size): + if len(tensor) >= size: + return tensor[:size] + else: + zero = th.zeros(size - len(tensor)).long() + return th.cat((tensor, zero), dim=0) + + def _split_text(self, sentence): + w = re.findall(r"[\w']+", str(sentence)) + return w + + def _words_to_token(self, words): + words = [ + self.word_to_token[word] for word in words if word in self.word_to_token + ] + if words: + we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) + return we + else: + return th.zeros(self.max_words).long() + + def _words_to_ids(self, x): + split_x = [self._words_to_token(self._split_text(sent.lower())) for sent in x] + return th.stack(split_x, dim=0) + + def forward(self, x): + x = self._words_to_ids(x) + x = self.word_embd(x) + x = F.relu(self.fc1(x)) + x = th.max(x, dim=1)[0] + x = self.fc2(x) + return {'text_embedding': x} + + +class S3D(nn.Module): + def __init__(self, dict_path, num_classes=512, gating=True, space_to_depth=True): + super(S3D, self).__init__() + self.num_classes = num_classes + self.gating = gating + self.space_to_depth = space_to_depth + if space_to_depth: + self.conv1 = STConv3D( + 24, 64, [2, 4, 4], stride=1, padding=(1, 2, 2), separable=False + ) + else: + self.conv1 = STConv3D( + 3, 64, [3, 7, 7], stride=2, padding=(1, 3, 3), separable=False + ) + self.conv_2b = STConv3D(64, 64, [1, 1, 1], separable=False) + self.conv_2c = STConv3D(64, 192, [3, 3, 3], padding=1, separable=True) + self.gating = SelfGating(192) + self.maxpool_2a = MaxPool3dTFPadding( + kernel_size=(1, 3, 3), stride=(1, 2, 2), padding="SAME" + ) + self.maxpool_3a = MaxPool3dTFPadding( + kernel_size=(1, 3, 3), stride=(1, 2, 2), padding="SAME" + ) + self.mixed_3b = InceptionBlock(192, 64, 96, 128, 16, 32, 32) + self.mixed_3c = InceptionBlock( + self.mixed_3b.output_dim, 128, 128, 192, 32, 96, 64 + ) + self.maxpool_4a = MaxPool3dTFPadding( + kernel_size=(3, 3, 3), stride=(2, 2, 2), padding="SAME" + ) + self.mixed_4b = InceptionBlock( + self.mixed_3c.output_dim, 192, 96, 208, 16, 48, 64 + ) + self.mixed_4c = InceptionBlock( + self.mixed_4b.output_dim, 160, 112, 224, 24, 64, 64 + ) + self.mixed_4d = InceptionBlock( + self.mixed_4c.output_dim, 128, 128, 256, 24, 64, 64 + ) + self.mixed_4e = InceptionBlock( + self.mixed_4d.output_dim, 112, 144, 288, 32, 64, 64 + ) + self.mixed_4f = InceptionBlock( + self.mixed_4e.output_dim, 256, 160, 320, 32, 128, 128 + ) + self.maxpool_5a = self.maxPool3d_5a_2x2 = MaxPool3dTFPadding( + kernel_size=(2, 2, 2), stride=(2, 2, 2), padding="SAME" + ) + self.mixed_5b = InceptionBlock( + self.mixed_4f.output_dim, 256, 160, 320, 32, 128, 128 + ) + self.mixed_5c = InceptionBlock( + self.mixed_5b.output_dim, 384, 192, 384, 48, 128, 128 + ) + self.fc = nn.Linear(self.mixed_5c.output_dim, num_classes) + self.text_module = Sentence_Embedding(num_classes, + token_to_word_path=dict_path) + + def _space_to_depth(self, input): + """3D space to depth trick for TPU optimization. + """ + B, C, T, H, W = input.shape + input = input.view(B, C, T // 2, 2, H // 2, 2, W // 2, 2) + input = input.permute(0, 3, 5, 7, 1, 2, 4, 6) + input = input.contiguous().view(B, 8 * C, T // 2, H // 2, W // 2) + return input + + def forward(self, inputs): + """Defines the S3DG base architecture.""" + if self.space_to_depth: + inputs = self._space_to_depth(inputs) + net = self.conv1(inputs) + if self.space_to_depth: + # we need to replicate 'SAME' tensorflow padding + net = net[:, :, 1:, 1:, 1:] + net = self.maxpool_2a(net) + net = self.conv_2b(net) + net = self.conv_2c(net) + if self.gating: + net = self.gating(net) + net = self.maxpool_3a(net) + net = self.mixed_3b(net) + net = self.mixed_3c(net) + net = self.maxpool_4a(net) + net = self.mixed_4b(net) + net = self.mixed_4c(net) + net = self.mixed_4d(net) + net = self.mixed_4e(net) + net = self.mixed_4f(net) + net = self.maxpool_5a(net) + net = self.mixed_5b(net) + net = self.mixed_5c(net) + net = th.mean(net, dim=[2, 3, 4]) + return {'video_embedding': self.fc(net), 'mixed_5c': net} diff --git a/fairseq/examples/MMPT/mmpt/processors/processor.py b/fairseq/examples/MMPT/mmpt/processors/processor.py new file mode 100644 index 0000000..98edb05 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/processors/processor.py @@ -0,0 +1,274 @@ +# Copyright (c) Facebook, Inc. All Rights Reserved + +import numpy as np +import os +import torch + + +class Processor(object): + """ + A generic processor for video (codec, feature etc.) and text. + """ + + def __call__(self, **kwargs): + raise NotImplementedError + + +class MetaProcessor(Processor): + """ + A meta processor is expected to load the metadata of a dataset: + (e.g., video_ids, or captions). + You must implement the `__getitem__` (meta datasets are rather diverse.). + """ + + def __init__(self, config): + self.split = config.split + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + raise NotImplementedError + + def _get_split_path(self, config): + splits = { + "train": config.train_path, + "valid": config.val_path, + "test": config.test_path, + } + if config.split is not None: + return splits[config.split] + return config.train_path + + +class TextProcessor(Processor): + """ + A generic Text processor: rename this as `withTokenizer`. + tokenize a string of text on-the-fly. + Warning: mostly used for end tasks. + (on-the-fly tokenization is slow for how2.) + TODO(huxu): move this class as a subclass. + """ + + def __init__(self, config): + self.bert_name = str(config.bert_name) + self.use_fast = config.use_fast + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + self.bert_name, use_fast=self.use_fast + ) + + def __call__(self, text_id): + caption = self.tokenizer(text_id, add_special_tokens=False) + return caption["input_ids"] + + +class VideoProcessor(Processor): + """ + A generic video processor: load a numpy video tokens by default. + """ + + def __init__(self, config): + self.vfeat_dir = config.vfeat_dir + + def __call__(self, video_fn): + if isinstance(video_fn, tuple): + video_fn = video_fn[0] + assert isinstance(video_fn, str) + video_fn = os.path.join(self.vfeat_dir, video_fn + ".npy") + feat = np.load(video_fn) + return feat + + +class Aligner(object): + """ + An alignprocessor align video and text and output a dict of tensors (for a model). + """ + def __init__(self, config): + """__init__ needs to be light weight for more workers/threads.""" + self.split = config.split + self.max_video_len = config.max_video_len + self.max_len = config.max_len + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained( + str(config.bert_name), use_fast=config.use_fast + ) + self.cls_token_id = tokenizer.cls_token_id + self.sep_token_id = tokenizer.sep_token_id + self.pad_token_id = tokenizer.pad_token_id + self.mask_token_id = tokenizer.mask_token_id + + def __call__(self, video_id, video_feature, text_feature): + raise NotImplementedError + + def _build_video_seq(self, video_feature, video_clips=None): + """ + `video_feature`: available video tokens. + `video_clips`: video clip sequence to build. + """ + if not isinstance(video_feature, np.ndarray): + raise ValueError( + "unsupported type of video_feature", type(video_feature) + ) + + if video_clips is None: + # this is borrowed from DSAligner + video_start = 0 + video_end = min(len(video_feature), self.max_video_len) + # the whole sequence is a single clip. + video_clips = {"start": [video_start], "end": [video_end]} + + vfeats = np.zeros( + (self.max_video_len, video_feature.shape[1]), dtype=np.float32 + ) + vmasks = torch.zeros((self.max_video_len,), dtype=torch.bool) + video_len = 0 + for start, end in zip(video_clips["start"], video_clips["end"]): + clip_len = min(self.max_video_len - video_len, (end - start)) + if clip_len > 0: + vfeats[video_len: video_len + clip_len] = video_feature[ + start: start + clip_len + ] + vmasks[video_len: video_len + clip_len] = 1 + video_len += clip_len + vfeats = torch.from_numpy(vfeats) + + return vfeats, vmasks + + def _build_text_seq(self, text_feature, text_clip_indexs=None): + """ + `text_feature`: all available clips. + `text_clip_indexes`: clip sequence to build. + """ + if text_clip_indexs is None: + text_clip_indexs = [0] + + full_caps = [] + if isinstance(text_feature, dict): + for clip_idx in text_clip_indexs: + full_caps.extend(text_feature["cap"][clip_idx]) + else: + full_caps = text_feature + max_text_len = self.max_len - self.max_video_len - 3 + full_caps = full_caps[:max_text_len] + full_caps = ( + [self.cls_token_id, self.sep_token_id] + full_caps + [self.sep_token_id] + ) + text_pad_len = self.max_len - len(full_caps) - self.max_video_len + padded_full_caps = full_caps + [self.pad_token_id] * text_pad_len + caps = torch.LongTensor(padded_full_caps) + cmasks = torch.zeros((len(padded_full_caps),), dtype=torch.bool) + cmasks[: len(full_caps)] = 1 + + return caps, cmasks + + def batch_post_processing(self, batch, video_feature): + return batch + + +class MMAttentionMask2DProcessor(Processor): + """text generation requires 2d mask + that is harder to generate by GPU at this stage.""" + + def __call__(self, vmask, cmask, mtype): + if mtype == "textgen": + return self._build_textgeneration_mask(vmask, cmask) + elif mtype == "videogen": + return self._build_videogeneration_mask(vmask, cmask) + else: + return self._build_mm_mask(vmask, cmask) + + def _build_mm_mask(self, vmask, cmask): + mask_1d = torch.cat([cmask[:1], vmask, cmask[1:]], dim=0) + return mask_1d[None, :].repeat(mask_1d.size(0), 1) + + def _build_videogeneration_mask(self, vmask, cmask): + # cls_mask is only about text otherwise it will leak generation. + cls_text_mask = torch.cat([ + # [CLS] + torch.ones( + (1,), dtype=torch.bool, device=cmask.device), + # video tokens and [SEP] for video. + torch.zeros( + (vmask.size(0) + 1,), dtype=torch.bool, device=cmask.device), + cmask[2:] + ], dim=0) + + # concat horizontially. + video_len = int(vmask.sum()) + video_masks = torch.cat([ + # [CLS] + torch.ones( + (video_len, 1), dtype=torch.bool, device=cmask.device + ), + torch.tril( + torch.ones( + (video_len, video_len), + dtype=torch.bool, device=cmask.device)), + # video_padding + torch.zeros( + (video_len, vmask.size(0) - video_len), + dtype=torch.bool, device=cmask.device + ), + # [SEP] for video (unused). + torch.zeros( + (video_len, 1), dtype=torch.bool, device=cmask.device + ), + cmask[2:].unsqueeze(0).repeat(video_len, 1) + ], dim=1) + + text_masks = cls_text_mask[None, :].repeat( + cmask.size(0) - 2, 1) + video_padding_masks = cls_text_mask[None, :].repeat( + vmask.size(0) - video_len, 1) + + return torch.cat([ + cls_text_mask[None, :], + video_masks, + video_padding_masks, + torch.cat([cmask[:1], vmask, cmask[1:]], dim=0)[None,:], + text_masks + ], dim=0) + + def _build_textgeneration_mask(self, vmask, cmask): + # cls_mask is only about video otherwise it will leak generation. + cls_video_mask = torch.cat([ + # [CLS] + torch.ones( + (1,), dtype=torch.bool, device=cmask.device), + vmask, + # [SEP] + torch.ones((1,), dtype=torch.bool, device=cmask.device), + torch.zeros( + (cmask.size(0)-2,), dtype=torch.bool, device=cmask.device) + ], dim=0) + + # concat horizontially. + text_len = int(cmask[2:].sum()) + text_masks = torch.cat([ + # [CLS] + torch.ones( + (text_len, 1), dtype=torch.bool, device=cmask.device + ), + vmask.unsqueeze(0).repeat(text_len, 1), + # [SEP] for video. + torch.ones( + (text_len, 1), dtype=torch.bool, device=cmask.device + ), + torch.tril( + torch.ones( + (text_len, text_len), + dtype=torch.bool, device=cmask.device)), + # padding. + torch.zeros( + (text_len, cmask.size(0) - text_len - 2), + dtype=torch.bool, device=cmask.device + ) + ], dim=1) + + cls_video_masks = cls_video_mask[None, :].repeat( + vmask.size(0) + 2, 1) + text_padding_masks = cls_video_mask[None, :].repeat( + cmask.size(0) - text_len - 2, 1) + return torch.cat([ + cls_video_masks, text_masks, text_padding_masks], dim=0) diff --git a/fairseq/examples/MMPT/mmpt/tasks/__init__.py b/fairseq/examples/MMPT/mmpt/tasks/__init__.py new file mode 100644 index 0000000..e2e9323 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from .task import * +from .vlmtask import * +from .retritask import * + +try: + from .fairseqmmtask import * +except ImportError: + pass + +try: + from .milncetask import * +except ImportError: + pass + +try: + from .expretritask import * +except ImportError: + pass diff --git a/fairseq/examples/MMPT/mmpt/tasks/fairseqmmtask.py b/fairseq/examples/MMPT/mmpt/tasks/fairseqmmtask.py new file mode 100644 index 0000000..f6b6115 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/fairseqmmtask.py @@ -0,0 +1,104 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +make a general fairseq task for MM pretraining. +""" + +import random + +from fairseq.tasks import LegacyFairseqTask, register_task + +from .task import Task +from .retritask import RetriTask +from ..datasets import FairseqMMDataset +from .. import utils + + +@register_task("mmtask") +class FairseqMMTask(LegacyFairseqTask): + @staticmethod + def add_args(parser): + # Add some command-line arguments for specifying where the data is + # located and the maximum supported input length. + parser.add_argument( + "taskconfig", + metavar="FILE", + help=("taskconfig to load all configurations" "outside fairseq parser."), + ) + + @classmethod + def setup_task(cls, args, **kwargs): + return FairseqMMTask(args) + + def __init__(self, args): + super().__init__(args) + config = utils.load_config(args) + self.mmtask = Task.config_task(config) + self.mmtask.build_dataset() + self.mmtask.build_model() + self.mmtask.build_loss() + + def load_dataset(self, split, **kwargs): + split_map = { + "train": self.mmtask.train_data, + "valid": self.mmtask.val_data, + "test": self.mmtask.test_data, + } + if split not in split_map: + raise ValueError("unknown split type.") + if split_map[split] is not None: + self.datasets[split] = FairseqMMDataset(split_map[split]) + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + random.seed(epoch) + if dataset.mmdataset.split == "train" and isinstance(self.mmtask, RetriTask): + if epoch >= self.mmtask.config.retri_epoch: + if not hasattr(self.mmtask, "retri_dataloader"): + self.mmtask.build_dataloader() + self.mmtask.retrive_candidates(epoch) + + return super().get_batch_iterator( + dataset, + max_tokens, + max_sentences, + max_positions, + ignore_invalid_inputs, + required_batch_size_multiple, + seed, + num_shards, + shard_id, + num_workers, + epoch, + data_buffer_size, + disable_iterator_cache, + grouped_shuffling, + update_epoch_batch_itr, + ) + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + return None diff --git a/fairseq/examples/MMPT/mmpt/tasks/milncetask.py b/fairseq/examples/MMPT/mmpt/tasks/milncetask.py new file mode 100644 index 0000000..61b6ab0 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/milncetask.py @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from .task import Task + + +class MILNCETask(Task): + def reshape_subsample(self, sample): + if ( + hasattr(self.config.dataset, "subsampling") + and self.config.dataset.subsampling is not None + and self.config.dataset.subsampling > 1 + ): + for key in sample: + if torch.is_tensor(sample[key]): + tensor = self.flat_subsample(sample[key]) + if key in ["caps", "cmasks"]: + size = tensor.size() + batch_size = size[0] * size[1] + expanded_size = (batch_size,) + size[2:] + tensor = tensor.view(expanded_size) + sample[key] = tensor + return sample diff --git a/fairseq/examples/MMPT/mmpt/tasks/retritask.py b/fairseq/examples/MMPT/mmpt/tasks/retritask.py new file mode 100644 index 0000000..b43f20f --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/retritask.py @@ -0,0 +1,253 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import torch +import pickle +import random + +from tqdm import tqdm +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +from ..processors import ( + ShardedHow2MetaProcessor, + ShardedVideoProcessor, + ShardedTextProcessor, + VariedLenAligner, +) + +from ..datasets import MMDataset +from .task import Task +from ..modules import vectorpool +from ..evaluators.predictor import Predictor +from ..utils import set_seed, get_local_rank, get_world_size + + +class RetriTask(Task): + """abstract class for task with retrival.""" + + def reshape_subsample(self, sample): + for key in sample: + if torch.is_tensor(sample[key]): + sample[key] = self.flat_subsample(sample[key]) + return sample + + def flat_subsample(self, tensor): + if tensor.size(0) == 1: + tensor = tensor.squeeze(0) + return tensor + + def build_dataloader(self): + """called by `get_batch_iterator` in fairseqmmtask. """ + # TODO: hard-code dataloader for retri for now and configurable in .yaml. + # reuse the `train.lst`. + self.config.dataset.split = "train" + meta_processor = ShardedHow2MetaProcessor(self.config.dataset) + video_processor = ShardedVideoProcessor(self.config.dataset) + text_processor = ShardedTextProcessor(self.config.dataset) + + aligner = VariedLenAligner(self.config.dataset) + aligner.subsampling = self.config.dataset.clip_per_video + + self.retri_data = MMDataset( + meta_processor, video_processor, text_processor, aligner + ) + + retri_sampler = DistributedSampler(self.retri_data) + infer_scale = 16 + batch_size = self.config.dataset.num_video_per_batch \ + * infer_scale + + self.retri_dataloader = DataLoader( + self.retri_data, + collate_fn=self.retri_data.collater, + batch_size=batch_size, + shuffle=False, + sampler=retri_sampler, + num_workers=self.config.fairseq.dataset.num_workers + ) + return self.retri_dataloader + + def retrive_candidates(self, epoch, dataloader=None): + if get_local_rank() == 0: + print("running retrieval model.") + out_dir = os.path.join( + self.config.fairseq.checkpoint.save_dir, "retri") + os.makedirs(out_dir, exist_ok=True) + + if not os.path.isfile( + os.path.join( + out_dir, "batched_e" + str(epoch) + "_videos0.pkl") + ): + if dataloader is None: + dataloader = self.retri_dataloader + + self.model.eval() + self.model.is_train = False + + assert self.retri_data.meta_processor.data == \ + self.train_data.meta_processor.data # video_ids not mutated. + + self._retri_predict(epoch, dataloader) + + self.model.train() + self.model.is_train = True + + torch.distributed.barrier() + output = self._retri_sync(epoch, out_dir) + torch.distributed.barrier() + self.train_data.meta_processor.set_candidates(output) + return output + + +class VideoRetriTask(RetriTask): + """RetriTask on video level.""" + + def reshape_subsample(self, sample): + if ( + hasattr(self.config.dataset, "clip_per_video") + and self.config.dataset.clip_per_video is not None + and self.config.dataset.clip_per_video > 1 + ): + for key in sample: + if torch.is_tensor(sample[key]): + sample[key] = self.flat_subsample(sample[key]) + return sample + + def flat_subsample(self, tensor): + if tensor.size(0) == 1: + tensor = tensor.squeeze(0) + return Task.flat_subsample(self, tensor) + + def _retri_predict(self, epoch, dataloader): + set_seed(epoch) + # save for retrival. + predictor = VideoPredictor(self.config) + predictor.predict_loop( + self.model, dataloader) + set_seed(epoch) # get the same text clips. + # retrival. + retri_predictor = VideoRetriPredictor( + self.config) + retri_predictor.predict_loop( + self.model, predictor.vecpool.retriver, epoch) + del predictor + del retri_predictor + + def _retri_sync(self, epoch, out_dir): + # gpu do the same merge. + batched_videos = [] + for local_rank in range(get_world_size()): + fn = os.path.join( + out_dir, + "batched_e" + str(epoch) + "_videos" + str(local_rank) + ".pkl") + with open(fn, "rb") as fr: + batched_videos.extend(pickle.load(fr)) + print( + "[INFO] batched_videos", + len(batched_videos), len(batched_videos[0])) + return batched_videos + + +class VideoPredictor(Predictor): + def __init__(self, config): + vectorpool_cls = getattr(vectorpool, config.vectorpool_cls) + self.vecpool = vectorpool_cls(config) + + def predict_loop( + self, + model, + dataloader, + early_stop=-1, + ): + with torch.no_grad(): + if get_local_rank() == 0: + dataloader = tqdm(dataloader) + for batch_idx, batch in enumerate(dataloader): + if batch_idx == early_stop: + break + self(batch, model) + return self.finalize() + + def __call__(self, sample, model, **kwargs): + param = next(model.parameters()) + dtype = param.dtype + device = param.device + subsample = sample["vfeats"].size(1) + sample = self.to_ctx(sample, device, dtype) + for key in sample: + if torch.is_tensor(sample[key]): + size = sample[key].size() + if len(size) >= 2: + batch_size = size[0] * size[1] + expanded_size = ( + (batch_size,) + size[2:] if len(size) > 2 + else (batch_size,) + ) + sample[key] = sample[key].view(expanded_size) + + outputs = model(**sample) + sample.update(outputs) + self.vecpool(sample, subsample) + + def finalize(self): + print("[INFO]", self.vecpool) + if not self.vecpool.retriver.db.is_trained: + self.vecpool.retriver.finalize_training() + return self.vecpool.retriver + + +class VideoRetriPredictor(Predictor): + """ + Online Retrieval Predictor for Clips (used by RetriTask). + TODO: merge this with VisPredictor? + """ + + def __init__(self, config): + self.pred_dir = os.path.join( + config.fairseq.checkpoint.save_dir, + "retri") + self.num_cands = config.num_cands + self.num_video_per_batch = config.dataset.num_video_per_batch + + def predict_loop( + self, + model, + retriver, + epoch, + early_stop=-1 + ): + # a fake loop that only try to recover video vector + # from video_id. + batched_videos = [] + # obtain available video_ids. + video_ids = list(retriver.videoid_to_vectoridx.keys()) + + dataloader = random.sample( + video_ids, + len(video_ids) // self.num_video_per_batch + ) + + if get_local_rank() == 0: + dataloader = tqdm(dataloader) + for batch_idx, batch in enumerate(dataloader): + # batch is one video id. + if batch_idx == early_stop: + break + video_ids = retriver.search_by_video_ids( + [batch], self.num_cands)[0] + if len(video_ids) > self.num_video_per_batch: + # we moved the center to make cluster robust. + video_ids = random.sample(video_ids, self.num_video_per_batch) + batched_videos.append(video_ids) + return self.finalize(batched_videos, epoch) + + def finalize(self, batched_videos, epoch): + fn = os.path.join( + self.pred_dir, + "batched_e" + str(epoch) + "_videos" + str(get_local_rank()) + ".pkl") + with open(fn, "wb") as fw: + pickle.dump(batched_videos, fw, pickle.HIGHEST_PROTOCOL) + return batched_videos diff --git a/fairseq/examples/MMPT/mmpt/tasks/task.py b/fairseq/examples/MMPT/mmpt/tasks/task.py new file mode 100644 index 0000000..8bb50f2 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/task.py @@ -0,0 +1,184 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import torch + +from .. import tasks +from .. import models +from .. import losses +from ..datasets import MMDataset +from .. import processors + + +class Task(object): + """ + A task refers to one generic training task (e.g., training one model). + """ + + @classmethod + def config_task(cls, config): + """ + determine whether to load a hard-coded task or config from a generic one. + via if a task string is available in config. + """ + if config.task is not None: + # TODO (huxu): expand the search scope. + task_cls = getattr(tasks, config.task) + return task_cls(config) + else: + return Task(config) + + def __init__(self, config): + self.config = config + self.train_data = None + self.val_data = None + self.test_data = None + + self.model = None + self.loss_fn = None + self.eval_fn = None + + def build_dataset(self): + """TODO (huxu): move processor breakdown to MMDataset.""" + """fill-in `self.train_data`, `self.val_data` and `self.test_data`.""" + + meta_processor_cls = getattr( + processors, self.config.dataset.meta_processor) + video_processor_cls = getattr( + processors, self.config.dataset.video_processor) + text_processor_cls = getattr( + processors, self.config.dataset.text_processor) + aligner_cls = getattr( + processors, self.config.dataset.aligner) + + if self.config.dataset.train_path is not None: + self.config.dataset.split = "train" + # may be used by meta processor. + # meta_processor controls different dataset. + meta_processor = meta_processor_cls(self.config.dataset) + video_processor = video_processor_cls(self.config.dataset) + text_processor = text_processor_cls(self.config.dataset) + aligner = aligner_cls(self.config.dataset) + self.train_data = MMDataset( + meta_processor, video_processor, text_processor, aligner + ) + print("train_len", len(self.train_data)) + output = self.train_data[0] + self.train_data.print_example(output) + if self.config.dataset.val_path is not None: + self.config.dataset.split = "valid" + # may be used by meta processor. + meta_processor = meta_processor_cls(self.config.dataset) + video_processor = video_processor_cls(self.config.dataset) + text_processor = text_processor_cls(self.config.dataset) + aligner = aligner_cls(self.config.dataset) + self.val_data = MMDataset( + meta_processor, video_processor, text_processor, aligner + ) + print("val_len", len(self.val_data)) + output = self.val_data[0] + self.val_data.print_example(output) + + if self.config.dataset.split == "test": + # the following is run via lauching fairseq-validate. + meta_processor = meta_processor_cls(self.config.dataset) + video_processor = video_processor_cls(self.config.dataset) + text_processor = text_processor_cls(self.config.dataset) + + self.test_data = MMDataset( + meta_processor, video_processor, text_processor, aligner + ) + print("test_len", len(self.test_data)) + output = self.test_data[0] + self.test_data.print_example(output) + + def build_model(self, checkpoint=None): + if self.model is None: + model_cls = getattr(models, self.config.model.model_cls) + self.model = model_cls(self.config) + if checkpoint is not None: + self.load_checkpoint(checkpoint) + return self.model + + def load_checkpoint(self, checkpoint): + if self.model is None: + raise ValueError("model is not initialized.") + state_dict = torch.load(checkpoint) + state_dict = self._trim_state_dict(state_dict) + self.model.load_state_dict(state_dict, strict=False) + # if it's a fp16 model, turn it back. + if next(self.model.parameters()).dtype == torch.float16: + self.model = self.model.float() + return self.model + + def _trim_state_dict(self, state_dict): + from collections import OrderedDict + + if "state_dict" in state_dict: + state_dict = state_dict["state_dict"] + if "model" in state_dict: # fairseq checkpoint format. + state_dict = state_dict["model"] + ret_state_dict = OrderedDict() + for ( + key, + value, + ) in state_dict.items(): + # remove fairseq wrapper since this is a task. + if key.startswith("mmmodel"): + key = key[len("mmmodel."):] + ret_state_dict[key] = value + return ret_state_dict + + def build_loss(self): + if self.loss_fn is None and self.config.loss is not None: + loss_cls = getattr(losses, self.config.loss.loss_cls) + self.loss_fn = loss_cls() + return self.loss_fn + + def flat_subsample(self, tensor): + size = tensor.size() + if len(size) >= 2: + batch_size = size[0] * size[1] + expanded_size = ( + (batch_size,) + size[2:] if len(size) > 2 + else (batch_size,) + ) + tensor = tensor.view(expanded_size) + return tensor + + def reshape_subsample(self, sample): + if ( + hasattr(self.config.dataset, "subsampling") + and self.config.dataset.subsampling is not None + and self.config.dataset.subsampling > 1 + ): + for key in sample: + if torch.is_tensor(sample[key]): + sample[key] = self.flat_subsample(sample[key]) + return sample + + def __call__(self, model, sample): + loss = None + loss_scalar = float("inf") + + sample = self.reshape_subsample(sample) + outputs = self.model(**sample) + sample.update(outputs) + if self.loss_fn is not None: + loss = self.loss_fn(**sample) + loss_scalar = loss.item() + + batch_size = sample["caps"].size(0) + sample_size = 1 + return { + "loss": loss, + "loss_scalar": loss_scalar, + "max_len": self.config.dataset.max_len, + "batch_size": batch_size, + "sample_size": sample_size, + } + + def build_dataloader(self): + """only used for trainer that lacks building loaders.""" + raise NotImplementedError diff --git a/fairseq/examples/MMPT/mmpt/tasks/vlmtask.py b/fairseq/examples/MMPT/mmpt/tasks/vlmtask.py new file mode 100644 index 0000000..57dc4c9 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/tasks/vlmtask.py @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import torch + +from .task import Task + + +class VLMTask(Task): + """A VLM task for reproducibility. + the collator split subsamples into two sub-batches. + This has should have no logic changes. + but changed the randomness in frame masking. + """ + + def flat_subsample(self, tensor): + size = tensor.size() + if len(size) >= 2: + batch_size = size[0] * (size[1] // 2) + expanded_size = ( + (batch_size, 2) + size[2:] if len(size) > 2 + else (batch_size, 2) + ) + tensor = tensor.view(expanded_size) + tensor = torch.cat([tensor[:, 0], tensor[:, 1]], dim=0) + return tensor diff --git a/fairseq/examples/MMPT/mmpt/utils/__init__.py b/fairseq/examples/MMPT/mmpt/utils/__init__.py new file mode 100644 index 0000000..2429ee3 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/utils/__init__.py @@ -0,0 +1,68 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import random +import numpy as np +import torch + +from .shardedtensor import * +from .load_config import * + + +def set_seed(seed=43211): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if torch.backends.cudnn.enabled: + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + +def get_world_size(): + if torch.distributed.is_initialized(): + world_size = torch.distributed.get_world_size() + else: + world_size = 1 + return world_size + + +def get_local_rank(): + return torch.distributed.get_rank() \ + if torch.distributed.is_initialized() else 0 + + +def print_on_rank0(func): + local_rank = get_local_rank() + if local_rank == 0: + print("[INFO]", func) + + +class RetriMeter(object): + """ + Statistics on whether retrieval yields a better pair. + """ + def __init__(self, freq=1024): + self.freq = freq + self.total = 0 + self.replace = 0 + self.updates = 0 + + def __call__(self, data): + if isinstance(data, np.ndarray): + self.replace += data.shape[0] - int((data[:, 0] == -1).sum()) + self.total += data.shape[0] + elif torch.is_tensor(data): + self.replace += int(data.sum()) + self.total += data.size(0) + else: + raise ValueError("unsupported RetriMeter data type.", type(data)) + + self.updates += 1 + if get_local_rank() == 0 and self.updates % self.freq == 0: + print("[INFO]", self) + + def __repr__(self): + return "RetriMeter (" + str(self.replace / self.total) \ + + "/" + str(self.replace) + "/" + str(self.total) + ")" diff --git a/fairseq/examples/MMPT/mmpt/utils/load_config.py b/fairseq/examples/MMPT/mmpt/utils/load_config.py new file mode 100644 index 0000000..ede4f94 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/utils/load_config.py @@ -0,0 +1,81 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import omegaconf +from omegaconf import OmegaConf + + +def load_config(args=None, config_file=None, overwrite_fairseq=False): + """TODO (huxu): move fairseq overwrite to another function.""" + if args is not None: + config_file = args.taskconfig + config = recursive_config(config_file) + + if config.dataset.subsampling is not None: + batch_size = config.fairseq.dataset.batch_size // config.dataset.subsampling + print( + "adjusting batch_size to {} due to subsampling {}.".format( + batch_size, config.dataset.subsampling + ) + ) + config.fairseq.dataset.batch_size = batch_size + + is_test = config.dataset.split is not None and config.dataset.split == "test" + if not is_test: + if ( + config.fairseq.checkpoint is None + or config.fairseq.checkpoint.save_dir is None + ): + raise ValueError("fairseq save_dir or save_path must be specified.") + + save_dir = config.fairseq.checkpoint.save_dir + os.makedirs(save_dir, exist_ok=True) + if config.fairseq.common.tensorboard_logdir is not None: + tb_run_dir = suffix_rundir( + save_dir, config.fairseq.common.tensorboard_logdir + ) + config.fairseq.common.tensorboard_logdir = tb_run_dir + print( + "update tensorboard_logdir as", config.fairseq.common.tensorboard_logdir + ) + os.makedirs(save_dir, exist_ok=True) + OmegaConf.save(config=config, f=os.path.join(save_dir, "config.yaml")) + + if overwrite_fairseq and config.fairseq is not None and args is not None: + # flatten fields. + for group in config.fairseq: + for field in config.fairseq[group]: + print("overwrite args." + field, "as", config.fairseq[group][field]) + setattr(args, field, config.fairseq[group][field]) + return config + + +def recursive_config(config_path): + """allows for stacking of configs in any depth.""" + config = OmegaConf.load(config_path) + if config.includes is not None: + includes = config.includes + config.pop("includes") + base_config = recursive_config(includes) + config = OmegaConf.merge(base_config, config) + return config + + +def suffix_rundir(save_dir, run_dir): + max_id = -1 + for search_dir in os.listdir(save_dir): + if search_dir.startswith(run_dir): + splits = search_dir.split("_") + cur_id = int(splits[1]) if len(splits) > 1 else 0 + max_id = max(max_id, cur_id) + return os.path.join(save_dir, run_dir + "_" + str(max_id + 1)) + + +def overwrite_dir(config, replace, basedir): + for key in config: + if isinstance(config[key], str) and config[key].startswith(basedir): + config[key] = config[key].replace(basedir, replace) + if isinstance(config[key], omegaconf.dictconfig.DictConfig): + overwrite_dir(config[key], replace, basedir) diff --git a/fairseq/examples/MMPT/mmpt/utils/shardedtensor.py b/fairseq/examples/MMPT/mmpt/utils/shardedtensor.py new file mode 100644 index 0000000..2424f36 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt/utils/shardedtensor.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import pickle +import numpy as np + + +class ShardedTensor(object): + def __init__(self, data, starts): + self.data = data + self.starts = starts + assert self.starts[0] == 0 + assert self.starts[-1] == len(self.data) + assert (self.starts[1:] >= self.starts[:-1]).all() + assert (self.starts > -1).all() + + @staticmethod + def from_list(xs): + starts = np.full((len(xs) + 1,), -1, dtype=np.long) + data = np.concatenate(xs, axis=0) + starts[0] = 0 + for i, x in enumerate(xs): + starts[i + 1] = starts[i] + x.shape[0] + assert (starts > -1).all() + return ShardedTensor(data, starts) + + def __getitem__(self, i): + return self.data[self.starts[i] : self.starts[i + 1]] + + def __len__(self): + return len(self.starts) - 1 + + def lengths(self): + return self.starts[1:] - self.starts[:-1] + + def save(self, path): + np.save(path + "_starts", self.starts) + np.save(path + "_data", self.data) + + @staticmethod + def load(path, mmap_mode=None): + starts = np.load(path + "_starts.npy", mmap_mode) + data = np.load(path + "_data.npy", mmap_mode) + return ShardedTensor(data, starts) diff --git a/fairseq/examples/MMPT/mmpt_cli/localjob.py b/fairseq/examples/MMPT/mmpt_cli/localjob.py new file mode 100644 index 0000000..2675d35 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt_cli/localjob.py @@ -0,0 +1,117 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os + +from mmpt.utils import recursive_config + + +class BaseJob(object): + def __init__(self, yaml_file, dryrun=False): + self.yaml_file = yaml_file + self.config = recursive_config(yaml_file) + self.dryrun = dryrun + + def submit(self, **kwargs): + raise NotImplementedError + + def _normalize_cmd(self, cmd_list): + cmd_list = list(cmd_list) + yaml_index = cmd_list.index("[yaml]") + cmd_list[yaml_index] = self.yaml_file + return cmd_list + + +class LocalJob(BaseJob): + + CMD_CONFIG = { + "local_single": [ + "fairseq-train", "[yaml]", "--user-dir", "mmpt", + "--task", "mmtask", "--arch", "mmarch", + "--criterion", "mmloss", + ], + "local_small": [ + "fairseq-train", "[yaml]", "--user-dir", "mmpt", + "--task", "mmtask", "--arch", "mmarch", + "--criterion", "mmloss", + "--distributed-world-size", "2" + ], + "local_big": [ + "fairseq-train", "[yaml]", "--user-dir", "mmpt", + "--task", "mmtask", "--arch", "mmarch", + "--criterion", "mmloss", + "--distributed-world-size", "8" + ], + "local_predict": ["python", "mmpt_cli/predict.py", "[yaml]"], + } + + def __init__(self, yaml_file, job_type=None, dryrun=False): + super().__init__(yaml_file, dryrun) + if job_type is None: + self.job_type = "local_single" + if self.config.task_type is not None: + self.job_type = self.config.task_type + else: + self.job_type = job_type + if self.job_type in ["local_single", "local_small"]: + if self.config.fairseq.dataset.batch_size > 32: + print("decreasing batch_size to 32 for local testing?") + + def submit(self): + cmd_list = self._normalize_cmd(LocalJob.CMD_CONFIG[self.job_type]) + if "predict" not in self.job_type: + # append fairseq args. + from mmpt.utils import load_config + + config = load_config(config_file=self.yaml_file) + for field in config.fairseq: + for key in config.fairseq[field]: + if key in ["fp16", "reset_optimizer", "reset_dataloader", "reset_meters"]: # a list of binary flag. + param = ["--" + key.replace("_", "-")] + else: + if key == "lr": + value = str(config.fairseq[field][key][0]) + elif key == "adam_betas": + value = "'"+str(config.fairseq[field][key])+"'" + else: + value = str(config.fairseq[field][key]) + param = [ + "--" + key.replace("_", "-"), + value + ] + cmd_list.extend(param) + + print("launching", " ".join(cmd_list)) + if not self.dryrun: + os.system(" ".join(cmd_list)) + return JobStatus("12345678") + + +class JobStatus(object): + def __init__(self, job_id): + self.job_id = job_id + + def __repr__(self): + return self.job_id + + def __str__(self): + return self.job_id + + def done(self): + return False + + def running(self): + return False + + def result(self): + if self.done(): + return "{} is done.".format(self.job_id) + else: + return "{} is running.".format(self.job_id) + + def stderr(self): + return self.result() + + def stdout(self): + return self.result() diff --git a/fairseq/examples/MMPT/mmpt_cli/predict.py b/fairseq/examples/MMPT/mmpt_cli/predict.py new file mode 100644 index 0000000..4071e19 --- /dev/null +++ b/fairseq/examples/MMPT/mmpt_cli/predict.py @@ -0,0 +1,113 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import glob +import argparse +import pprint +import omegaconf + +from omegaconf import OmegaConf +from torch.utils.data import DataLoader + +from mmpt.utils import load_config, set_seed +from mmpt.evaluators import Evaluator +from mmpt.evaluators import predictor as predictor_path +from mmpt.tasks import Task +from mmpt import processors +from mmpt.datasets import MMDataset + + +def get_dataloader(config): + meta_processor_cls = getattr(processors, config.dataset.meta_processor) + video_processor_cls = getattr(processors, config.dataset.video_processor) + text_processor_cls = getattr(processors, config.dataset.text_processor) + aligner_cls = getattr(processors, config.dataset.aligner) + + meta_processor = meta_processor_cls(config.dataset) + video_processor = video_processor_cls(config.dataset) + text_processor = text_processor_cls(config.dataset) + aligner = aligner_cls(config.dataset) + + test_data = MMDataset( + meta_processor, + video_processor, + text_processor, + aligner, + ) + print("test_len", len(test_data)) + output = test_data[0] + test_data.print_example(output) + + test_dataloader = DataLoader( + test_data, + batch_size=config.fairseq.dataset.batch_size, + shuffle=False, + num_workers=6, + collate_fn=test_data.collater, + ) + return test_dataloader + + +def main(args): + config = load_config(args) + + if isinstance(config, omegaconf.dictconfig.DictConfig): + print(OmegaConf.to_yaml(config)) + else: + pp = pprint.PrettyPrinter(indent=4) + pp.print(config) + + mmtask = Task.config_task(config) + mmtask.build_model() + + test_dataloader = get_dataloader(config) + checkpoint_search_path = os.path.dirname(config.eval.save_path) + results = [] + + prefix = os.path.basename(args.taskconfig) + if prefix.startswith("test"): + # loop all checkpoint for datasets without validation set. + if "best" not in config.fairseq.common_eval.path: + print("eval each epoch.") + for checkpoint in glob.glob(checkpoint_search_path + "/checkpoint*"): + model = mmtask.load_checkpoint(checkpoint) + ckpt = os.path.basename(checkpoint) + evaluator = Evaluator(config) + output = evaluator.evaluate( + model, test_dataloader, ckpt + "_merged") + results.append((checkpoint, output)) + # use the one specified by the config lastly. + model = mmtask.load_checkpoint(config.fairseq.common_eval.path) + evaluator = Evaluator(config) + output = evaluator.evaluate(model, test_dataloader) + results.append((config.fairseq.common_eval.path, output)) + + best_result = None + best_metric = 0. + for checkpoint, result in results: + print(checkpoint) + evaluator.metric.print_computed_metrics(result) + best_score = evaluator.metric.best_metric(result) + if best_score > best_metric: + best_result = (checkpoint, result) + best_metric = best_score + print("best results:") + print(best_result[0]) + evaluator.metric.print_computed_metrics(best_result[1]) + + elif prefix.startswith("vis"): + model = mmtask.load_checkpoint(config.fairseq.common_eval.path) + predictor_cls = getattr(predictor_path, config.predictor) + predictor = predictor_cls(config) + predictor.predict_loop(model, test_dataloader, mmtask, None) + else: + raise ValueError("unknown prefix of the config file", args.taskconfig) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("taskconfig", type=str) + args = parser.parse_args() + main(args) diff --git a/fairseq/examples/MMPT/pretraining.md b/fairseq/examples/MMPT/pretraining.md new file mode 100644 index 0000000..8f8e6d0 --- /dev/null +++ b/fairseq/examples/MMPT/pretraining.md @@ -0,0 +1,29 @@ +# Pretraining + +(If you are new to the ideas of `mmpt.processors`, see [README](README.md) first.) +We mostly use [howto100M](https://github.com/antoine77340/howto100m) dataset for pretraining (other datasets are coming). So you are less likely to write a new `MetaProcessor`, `VideoProcessor` or `TextProcessor` but only working on a new `Aligner`, a new model and loss. + +### Data Sharding +Pretraining on Howto100M is heavy on IO since we have millions of videos or captions on the hard disk that cannot be fit into the memory. +It is desirable to have an optimized preprocessing step before the actual dataloading. + +We support data sharding to pack multiple videos into a shards of training data for both videos and captions. (see [dataset](DATASET.md) for preprocessing). +These shards will be mapped into memory to reduce the frequency of IO access on millions of files. See (processors starting with `Sharded*`). +This will be the default config for a how2 dataset `projects/task/how2.yaml`. + +Great thanks to Dmytro Okhonko for sharing the code from MARGE project. + +### Training +Pretraining on Howto100m is expected on one or multiple nodes, where each node has 8 GPUS with 32 GB mem. +launching a pretraing on MFM+MLM can be done, via: +```python locallaunch.py projects/mfmmlm/how2.yaml``` + +### Pre-training with a Retrieval Model (VideoCLIP) +This projects now support alternatively run a retrieval model and pre-training. +We implement a basic retrieval model that is built on the hidden states of a video and faiss. + +You may need to install faiss via `conda install faiss-cpu -c pytorch`. + +Right now, the hidden states of a video is computed as the average of 8 clips of their pooled visual/text hidden states. +See `mmpt/tasks/retritask.py` for more details. +The `.yaml` config for running pre-training with a retrieval model can be found at `projects/retri/videoretri.yaml`. diff --git a/fairseq/examples/MMPT/projects/mfmmlm.yaml b/fairseq/examples/MMPT/projects/mfmmlm.yaml new file mode 100644 index 0000000..0f3450a --- /dev/null +++ b/fairseq/examples/MMPT/projects/mfmmlm.yaml @@ -0,0 +1,59 @@ +project_dir: mfmmlm +run_task: + - how2.yaml + - [vtt.yaml, vttcap.yaml, vttqa.yaml, youcook.yaml, youcookcap.yaml, crosstask.yaml, coin.yaml] +base_dir: task +task_group: + pretrain: + task_list: + - how2.yaml + dataset: + subsampling: 32 + sampled_min_len: 10 + sampled_max_len: 64 + max_video_len: 32 + max_len: 96 + aligner: MFMMLMAligner + lazy_vfeat_mask: True + mfm_probability: 0.15 + mlm_probability: 0.15 + mm_prob: 0.5 + model: + model_cls: MMFusionMFMMLM + mm_encoder_cls: MMFusionForMFMMLM + loss: + loss_cls: MFMMLM + fairseq: + common: + fp16: true + dataset: + batch_size: 256 + optimization: + max_epoch: 15 + finetune: + task_list: + - vtt.yaml + - vttqa.yaml + - youcook.yaml + - youcookcap.yaml + - crosstask.yaml + - coin.yaml + dataset: + max_video_len: 32 + max_len: 96 + fairseq: + common: + fp16: true + # do not write any model or loss here (they are expected to be fixed in mmfusion). + test: + task_list: + - test_vtt.yaml + - test_vttqa.yaml + - test_youcook.yaml + - test_youcookcap.yaml + - test_crosstask.yaml + - test_crosstask_zs.yaml + - test_coin.yaml + dataset: + max_video_len: 32 + max_len: 96 diff --git a/fairseq/examples/MMPT/projects/mtm/mmfusionmtm.yaml b/fairseq/examples/MMPT/projects/mtm/mmfusionmtm.yaml new file mode 100644 index 0000000..337d66a --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/mmfusionmtm.yaml @@ -0,0 +1,19 @@ +includes: projects/mfmmlm.yaml +project_dir: mtm/mmfusionmtm +task_group: + pretrain: + task: VLMTask # reproducible + dataset: + aligner: MFMMLMAligner + model: + use_seg_emb: True # reproducible + model_cls: MMFusionMTM + mm_encoder_cls: MMBertForMFMMLM + loss: + loss_cls: MTM + finetune: + model: + use_seg_emb: True # reproducible + test: + model: + use_seg_emb: True # reproducible diff --git a/fairseq/examples/MMPT/projects/mtm/vlm.yaml b/fairseq/examples/MMPT/projects/mtm/vlm.yaml new file mode 100644 index 0000000..022a262 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm.yaml @@ -0,0 +1,8 @@ +includes: projects/mtm/mmfusionmtm.yaml +project_dir: mtm/vlm +task_group: + pretrain: + dataset: + sampled_min_len: 8 + loss: + loss_cls: MTM diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/coin.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/coin.yaml new file mode 100644 index 0000000..48fd64a --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/coin.yaml @@ -0,0 +1,47 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: COINActionSegmentationMetaProcessor + train_path: data/coin/COIN.json + val_path: data/coin/COIN.json + vfeat_dir: data/feat/feat_coin_s3d + text_processor: COINActionSegmentationTextProcessor + aligner: COINActionSegmentationAligner + num_iso_layer: 12 + sliding_window: 8 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 1 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 8 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/coin +task_type: sweep_big +model: + model_cls: MMFusionActionSegmentation + mm_encoder_cls: MMBertForTokenClassification + use_seg_emb: true +loss: + loss_cls: CrossEntropy diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/crosstask.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/crosstask.yaml new file mode 100644 index 0000000..4e706b5 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/crosstask.yaml @@ -0,0 +1,53 @@ +dataset: + video_processor: CrossTaskVideoProcessor + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + train_path: data/crosstask/crosstask_release/videos.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + aligner: CrossTaskAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 1 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 5 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint11.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/crosstask +task_type: sweep_small +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +loss: + loss_cls: BCE diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/how2.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/how2.yaml new file mode 100644 index 0000000..7ca40ad --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/how2.yaml @@ -0,0 +1,55 @@ +dataset: + video_processor: ShardedVideoProcessor + bert_name: bert-base-uncased + meta_processor: ShardedHow2MetaProcessor + train_path: data/how2/how2_s3d_train.lst + val_path: data/how2/how2_s3d_val.lst + vfeat_dir: data/feat/feat_how2_s3d_shard_small + text_processor: ShardedTextProcessor + tfeat_dir: data/feat/feat_how2_s3d_shard_small/raw_caption_dedup.bert-base-uncased. + aligner: MFMMLMAligner + subsampling: 32 + sampled_min_len: 8 + sampled_max_len: 64 + max_video_len: 32 + max_len: 96 + lazy_vfeat_mask: true + mfm_probability: 0.15 + mlm_probability: 0.15 + mm_prob: 0.5 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 256 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 1000 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 15 + checkpoint: + save_dir: runs/mtm/vlm + save_interval_updates: 1024 + keep_interval_updates: 2 + keep_last_epochs: 30 +task_type: sweep_big +slurm_config: big +eval: + save_path: runs/mtm/vlm +model: + model_cls: MMFusionMTM + mm_encoder_cls: MMBertForMFMMLM + use_seg_emb: true +loss: + loss_cls: MTM +task: VLMTask diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_coin.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_coin.yaml new file mode 100644 index 0000000..8df2e66 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_coin.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: COINActionSegmentationAligner + bert_name: bert-base-uncased + test_path: data/coin/COIN.json + meta_processor: COINActionSegmentationMetaProcessor + vfeat_dir: data/feat/feat_coin_s3d + text_processor: COINActionSegmentationTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/coin/checkpoint_best.pt +model: + model_cls: MMFusionActionSegmentation + mm_encoder_cls: MMBertForTokenClassification + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/coin/eval +metric: COINActionSegmentationMetric +predictor: COINPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask.yaml new file mode 100644 index 0000000..d159847 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask.yaml @@ -0,0 +1,38 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: CrossTaskVideoProcessor + aligner: CrossTaskAligner + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/crosstask/checkpoint_best.pt +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/crosstask/eval +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask_zs.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask_zs.yaml new file mode 100644 index 0000000..59833c5 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_crosstask_zs.yaml @@ -0,0 +1,38 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: CrossTaskVideoProcessor + aligner: CrossTaskAligner + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/checkpoint_best.pt +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/crosstask_zs/eval +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_vtt.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_vtt.yaml new file mode 100644 index 0000000..a41557d --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_vtt.yaml @@ -0,0 +1,29 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + test_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/vtt/checkpoint_last.pt +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/vtt/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_vttqa.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_vttqa.yaml new file mode 100644 index 0000000..abf3309 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_vttqa.yaml @@ -0,0 +1,29 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: MSRVTTQAAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTQAMetaProcessor + test_path: data/msrvtt-qa/MSR_MC_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTQATextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/vttqa/checkpoint_last.pt +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/vttqa/eval +metric: QAMetric +predictor: QAPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_youcook.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_youcook.yaml new file mode 100644 index 0000000..3a57d25 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_youcook.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: YoucookVideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: YoucookMetaProcessor + test_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: true + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: TextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/youcook/checkpoint_last.pt +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/youcook/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/test_youcookcap.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/test_youcookcap.yaml new file mode 100644 index 0000000..b2595d7 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/test_youcookcap.yaml @@ -0,0 +1,32 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: YoucookVideoProcessor + aligner: DSNLGAligner + bert_name: bert-base-uncased + meta_processor: YoucookNLGMetaProcessor + test_path: data/youcook/val_list.txt + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: NLGTextProcessor + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/mtm/vlm/youcookcap/checkpoint_best.pt +model: + model_cls: MMFusionNLG + mm_encoder_cls: MMBertForNLG + max_decode_length: 24 + use_seg_emb: true +eval: + save_path: runs/mtm/vlm/youcookcap/eval +metric: NLGMetric +predictor: NLGPredictor +gen_param: + num_beams: 5 diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/vtt.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/vtt.yaml new file mode 100644 index 0000000..c6c5b1a --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/vtt.yaml @@ -0,0 +1,49 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + jsfusion_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + full_test_path: data/msrvtt/MSRVTT_FULL_test.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 256 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 10 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/vtt +task_type: sweep_small +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +loss: + loss_cls: T2VContraLoss diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/vttqa.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/vttqa.yaml new file mode 100644 index 0000000..0a440c7 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/vttqa.yaml @@ -0,0 +1,47 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 128 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 5 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/vttqa +task_type: sweep_small +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +loss: + loss_cls: V2TContraLoss diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/youcook.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/youcook.yaml new file mode 100644 index 0000000..9ee82b8 --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/youcook.yaml @@ -0,0 +1,47 @@ +dataset: + video_processor: YoucookVideoProcessor + bert_name: bert-base-uncased + meta_processor: YoucookMetaProcessor + train_path: data/youcook/youcook_train.pkl + val_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: true + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: TextProcessor + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 128 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 10 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/youcook +task_type: sweep_small +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint + use_seg_emb: true +loss: + loss_cls: T2VContraLoss diff --git a/fairseq/examples/MMPT/projects/mtm/vlm/youcookcap.yaml b/fairseq/examples/MMPT/projects/mtm/vlm/youcookcap.yaml new file mode 100644 index 0000000..d29dfad --- /dev/null +++ b/fairseq/examples/MMPT/projects/mtm/vlm/youcookcap.yaml @@ -0,0 +1,45 @@ +dataset: + video_processor: YoucookVideoProcessor + bert_name: bert-base-uncased + meta_processor: YoucookNLGMetaProcessor + train_path: data/youcook/train_list.txt + val_path: data/youcook/val_list.txt + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: NLGTextProcessor + aligner: DSNLGAligner + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 128 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 10 + checkpoint: + restore_file: runs/mtm/vlm/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/mtm/vlm/youcookcap +task_type: sweep_small +model: + model_cls: MMFusionNLG + mm_encoder_cls: MMBertForNLG + use_seg_emb: true +loss: + loss_cls: NLGLoss diff --git a/fairseq/examples/MMPT/projects/retri/videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip.yaml new file mode 100644 index 0000000..afd040a --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip.yaml @@ -0,0 +1,10 @@ +includes: projects/retri/videoretri.yaml +project_dir: retri/videoclip +task_group: + pretrain: + model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/coin_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/coin_videoclip.yaml new file mode 100644 index 0000000..aaed5e4 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/coin_videoclip.yaml @@ -0,0 +1,49 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: COINActionSegmentationMetaProcessor + train_path: data/coin/COIN.json + val_path: data/coin/COIN.json + vfeat_dir: data/feat/feat_coin_s3d + text_processor: COINActionSegmentationTextProcessor + aligner: COINActionSegmentationAligner + num_iso_layer: 12 + sliding_window: 8 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 1 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 8 + checkpoint: + restore_file: runs/retri/videoclip/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/retri/videoclip/coin +task_type: sweep_big +model: + model_cls: MMFusionSeparateActionSegmentation + mm_encoder_cls: null + video_encoder_cls: MMBertForTokenClassification + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: CrossEntropy diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/crosstask_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/crosstask_videoclip.yaml new file mode 100644 index 0000000..758601e --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/crosstask_videoclip.yaml @@ -0,0 +1,55 @@ +dataset: + video_processor: CrossTaskVideoProcessor + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + train_path: data/crosstask/crosstask_release/videos.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + aligner: CrossTaskAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 1 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 5 + checkpoint: + restore_file: runs/retri/videoclip/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/retri/videoclip/crosstask +task_type: sweep_small +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: BCE diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/how2.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/how2.yaml new file mode 100644 index 0000000..b49581e --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/how2.yaml @@ -0,0 +1,65 @@ +dataset: + video_processor: ShardedVideoRetriVideoProcessor + bert_name: bert-base-uncased + meta_processor: ShardedHow2VideoRetriMetaProcessor + train_path: data/how2/how2_s3d_train.lst + val_path: data/how2/how2_s3d_val.lst + vfeat_dir: data/feat/feat_how2_s3d_shard_small + text_processor: ShardedVideoRetriTextProcessor + tfeat_dir: data/feat/feat_how2_s3d_shard_small/raw_caption_dedup.bert-base-uncased. + aligner: VideoRetriOverlappedAligner + subsampling: 1 + sampled_min_len: 8 + sampled_max_len: 64 + max_video_len: 32 + max_len: 96 + lazy_vfeat_mask: true + mfm_probability: 0.15 + mlm_probability: 0.15 + mm_prob: 0.5 + sampled_video_min_len: 3 + sampled_video_max_len: 32 + num_video_per_batch: 32 + clip_per_video: 16 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 1 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 1000 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 25 + checkpoint: + save_dir: runs/retri/videoclip + save_interval_updates: 1024 + keep_interval_updates: 2 + keep_last_epochs: 30 +task_type: sweep_big +slurm_config: big +eval: + save_path: runs/retri/videoclip +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: MMContraLoss +task: VideoRetriTask +retri_epoch: 1 +vectorpool_cls: VideoVectorPool +retriever_cls: VectorRetriever +num_cands: 64 diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_videoclip.yaml new file mode 100644 index 0000000..4099062 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_videoclip.yaml @@ -0,0 +1,33 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: COINActionSegmentationAligner + bert_name: bert-base-uncased + test_path: data/coin/COIN.json + meta_processor: COINActionSegmentationMetaProcessor + vfeat_dir: data/feat/feat_coin_s3d + text_processor: COINActionSegmentationTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/coin/checkpoint_best.pt +model: + model_cls: MMFusionSeparateActionSegmentation + mm_encoder_cls: null + video_encoder_cls: MMBertForTokenClassification + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/coin/eval +metric: COINActionSegmentationMetric +predictor: COINPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_zs.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_zs.yaml new file mode 100644 index 0000000..b33739c --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_coin_zs.yaml @@ -0,0 +1,33 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: COINActionSegmentationAligner + bert_name: bert-base-uncased + test_path: data/coin/COIN.json + meta_processor: COINActionSegmentationMetaProcessor + vfeat_dir: data/feat/feat_coin_s3d + text_processor: COINActionSegmentationTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/coin_zs/eval +metric: COINActionSegmentationMetric +predictor: COINZSPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_videoclip.yaml new file mode 100644 index 0000000..e82f54f --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_videoclip.yaml @@ -0,0 +1,40 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: CrossTaskVideoProcessor + aligner: CrossTaskAligner + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/crosstask/checkpoint_best.pt +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/crosstask/eval +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_zs_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_zs_videoclip.yaml new file mode 100644 index 0000000..6fc357c --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_crosstask_zs_videoclip.yaml @@ -0,0 +1,40 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: CrossTaskVideoProcessor + aligner: CrossTaskAligner + bert_name: bert-base-uncased + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + text_processor: CrossTaskTextProcessor + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 1 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/crosstask_zs/eval +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_didemo_zs.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_didemo_zs.yaml new file mode 100644 index 0000000..8dc7168 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_didemo_zs.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: DiDeMoAligner + bert_name: bert-base-uncased + meta_processor: DiDeMoMetaProcessor + test_path: data/didemo/test_data.json + vfeat_dir: data/feat/feat_didemo_s3d + text_processor: DiDeMoTextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/didemo_zs/eval +metric: DiDeMoMetric +predictor: DiDeMoPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_videoclip.yaml new file mode 100644 index 0000000..19321ad --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_videoclip.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + test_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/vtt/checkpoint_last.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/vtt/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_zs.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_zs.yaml new file mode 100644 index 0000000..d149fa3 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_vtt_zs.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + test_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/vtt_zs/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_videoclip.yaml new file mode 100644 index 0000000..295aeed --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_videoclip.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: MSRVTTQAAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTQAMetaProcessor + test_path: data/msrvtt-qa/MSR_MC_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTQATextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/vttqa/checkpoint_last.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/vttqa/eval +metric: QAMetric +predictor: QAPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_zs.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_zs.yaml new file mode 100644 index 0000000..7a876c8 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_vttqa_zs.yaml @@ -0,0 +1,31 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: MSRVTTQAAligner + bert_name: bert-base-uncased + meta_processor: MSRVTTQAMetaProcessor + test_path: data/msrvtt-qa/MSR_MC_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTQATextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/vttqa_zs/eval +metric: QAMetric +predictor: QAPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_videoclip.yaml new file mode 100644 index 0000000..86a4ab2 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_videoclip.yaml @@ -0,0 +1,33 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: YoucookVideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: YoucookMetaProcessor + test_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: true + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: TextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/youcook/checkpoint_last.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/youcook/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_zs.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_zs.yaml new file mode 100644 index 0000000..fd29417 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/test_youcook_zs.yaml @@ -0,0 +1,33 @@ +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: YoucookVideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased + meta_processor: YoucookMetaProcessor + test_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: true + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: TextProcessor + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 + common_eval: + path: runs/retri/videoclip/checkpoint_best.pt +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/retri/videoclip/youcook_zs/eval +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/vtt_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/vtt_videoclip.yaml new file mode 100644 index 0000000..d8b4079 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/vtt_videoclip.yaml @@ -0,0 +1,51 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + jsfusion_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + full_test_path: data/msrvtt/MSRVTT_FULL_test.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 224 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 10 + checkpoint: + restore_file: runs/retri/videoclip/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/retri/videoclip/vtt +task_type: sweep_small +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: T2VContraLoss diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/vttqa_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/vttqa_videoclip.yaml new file mode 100644 index 0000000..f0566d7 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/vttqa_videoclip.yaml @@ -0,0 +1,49 @@ +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 128 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 5 + checkpoint: + restore_file: runs/retri/videoclip/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/retri/videoclip/vttqa +task_type: sweep_small +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: V2TContraLoss diff --git a/fairseq/examples/MMPT/projects/retri/videoclip/youcook_videoclip.yaml b/fairseq/examples/MMPT/projects/retri/videoclip/youcook_videoclip.yaml new file mode 100644 index 0000000..c2b13e5 --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoclip/youcook_videoclip.yaml @@ -0,0 +1,49 @@ +dataset: + video_processor: YoucookVideoProcessor + bert_name: bert-base-uncased + meta_processor: YoucookMetaProcessor + train_path: data/youcook/youcook_train.pkl + val_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: true + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: TextProcessor + aligner: DSAligner + num_iso_layer: 12 + max_video_len: 32 + max_len: 96 +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + fp16: true + dataset: + num_workers: 4 + batch_size: 128 + optimization: + lr: + - 5.0e-05 + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 + warmup_updates: 122 + weight_decay: 0.0 + ddp_backend: no_c10d + max_epoch: 10 + checkpoint: + restore_file: runs/retri/videoclip/checkpoint_best.pt + reset_optimizer: true + reset_dataloader: true + reset_meters: true + save_dir: runs/retri/videoclip/youcook +task_type: sweep_small +model: + model_cls: MMFusionSeparate + mm_encoder_cls: null + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +loss: + loss_cls: T2VContraLoss diff --git a/fairseq/examples/MMPT/projects/retri/videoretri.yaml b/fairseq/examples/MMPT/projects/retri/videoretri.yaml new file mode 100644 index 0000000..969e1fb --- /dev/null +++ b/fairseq/examples/MMPT/projects/retri/videoretri.yaml @@ -0,0 +1,51 @@ +includes: projects/mfmmlm.yaml +project_dir: retri/videoretri +run_task: + - how2.yaml +task_group: + pretrain: + task: VideoRetriTask + retri_epoch: 1 + vectorpool_cls: VideoVectorPool + retriever_cls: VectorRetriever + num_cands: 64 + dataset: + train_path: data/how2/how2_s3d_train.lst + meta_processor: ShardedHow2VideoRetriMetaProcessor + video_processor: ShardedVideoRetriVideoProcessor + text_processor: ShardedVideoRetriTextProcessor + aligner: VideoRetriOverlappedAligner + sampled_video_min_len: 3 + sampled_video_max_len: 32 + sampled_min_len: 8 + sampled_max_len: 64 + num_video_per_batch: 32 + # do not use subsampling as it changes fairseq batch_size. + subsampling: 1 # disable subsampling + clip_per_video: 16 + fairseq: + dataset: + batch_size: 1 + optimization: + max_epoch: 25 + model: + model_cls: MMFusionShare + mm_encoder_cls: MMBertForEncoder + loss: + loss_cls: MMContraLoss + finetune: + task_list: [vtt_videoclip.yaml, youcook_videoclip.yaml, vttqa_videoclip.yaml, crosstask_videoclip.yaml, coin_videoclip.yaml] + test: + task_list: + - test_youcook_zs.yaml + - test_vtt_zs.yaml + - test_vttqa_zs.yaml + - test_crosstask_zs_videoclip.yaml + - test_coin_zs.yaml + - test_didemo_zs.yaml + - test_youcook_videoclip.yaml + - test_vtt_videoclip.yaml + - test_vttqa_videoclip.yaml + - test_crosstask_videoclip.yaml + - test_coin_videoclip.yaml + diff --git a/fairseq/examples/MMPT/projects/task/coin.yaml b/fairseq/examples/MMPT/projects/task/coin.yaml new file mode 100644 index 0000000..e777248 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/coin.yaml @@ -0,0 +1,25 @@ +includes: projects/task/ft.yaml +task_type: sweep_big +dataset: + meta_processor: COINActionSegmentationMetaProcessor + train_path: data/coin/COIN.json + val_path: data/coin/COIN.json + vfeat_dir: data/feat/feat_coin_s3d + video_processor: VideoProcessor + text_processor: COINActionSegmentationTextProcessor + aligner: COINActionSegmentationAligner + num_iso_layer: 12 + sliding_window: 8 + sliding_window_size: 32 +model: + model_cls: MMFusionActionSegmentation + mm_encoder_cls: MMBertForTokenClassification +loss: + loss_cls: CrossEntropy +fairseq: + dataset: + batch_size: 1 + optimization: + max_epoch: 8 + checkpoint: + save_dir: runs/task/coin diff --git a/fairseq/examples/MMPT/projects/task/coin_videoclip.yaml b/fairseq/examples/MMPT/projects/task/coin_videoclip.yaml new file mode 100644 index 0000000..69988bc --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/coin_videoclip.yaml @@ -0,0 +1,7 @@ +includes: projects/task/coin.yaml +model: + model_cls: MMFusionSeparateActionSegmentation + mm_encoder_cls: + video_encoder_cls: MMBertForTokenClassification + text_encoder_cls: BertModel # dummy, not used. + num_hidden_video_layers: 6 diff --git a/fairseq/examples/MMPT/projects/task/crosstask.yaml b/fairseq/examples/MMPT/projects/task/crosstask.yaml new file mode 100644 index 0000000..cb4dbb0 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/crosstask.yaml @@ -0,0 +1,31 @@ +includes: projects/task/ft.yaml +dataset: + meta_processor: CrossTaskMetaProcessor + train_path: data/crosstask/crosstask_release/videos.csv # dummy + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv # dummy + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + video_processor: CrossTaskVideoProcessor + text_processor: CrossTaskTextProcessor + aligner: CrossTaskAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint +loss: + loss_cls: BCE +fairseq: + dataset: + batch_size: 1 + optimization: + max_epoch: 5 + checkpoint: + save_dir: runs/task/crosstask + restore_file: runs/task/checkpoint11.pt # for VLM diff --git a/fairseq/examples/MMPT/projects/task/crosstask_videoclip.yaml b/fairseq/examples/MMPT/projects/task/crosstask_videoclip.yaml new file mode 100644 index 0000000..6ec613c --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/crosstask_videoclip.yaml @@ -0,0 +1,10 @@ +includes: projects/task/crosstask.yaml +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel # dummy, not used. + num_hidden_video_layers: 6 +fairseq: + checkpoint: + restore_file: runs/task/checkpoint_best.pt # overwrite the default of VLM. diff --git a/fairseq/examples/MMPT/projects/task/default.yaml b/fairseq/examples/MMPT/projects/task/default.yaml new file mode 100644 index 0000000..087fef7 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/default.yaml @@ -0,0 +1,20 @@ +# this yaml cannot be run alone. you must use `how2.yaml`, `vtt.yaml` etc for training. +dataset: + video_processor: VideoProcessor + bert_name: bert-base-uncased +fairseq: + common: + tensorboard_logdir: run + log_interval: 1000 + dataset: + num_workers: 4 + optimization: + lr: [ 0.00005 ] + clip_norm: 2.0 + optimizer: adam + adam_betas: (0.9, 0.98) + lr_scheduler: polynomial_decay + total_num_update: 1000000 # backward compatible on fairseq 1.0.0a0+af0389f for reproducibility. + warmup_updates: 1000 + weight_decay: 0.0 + ddp_backend: no_c10d diff --git a/fairseq/examples/MMPT/projects/task/ft.yaml b/fairseq/examples/MMPT/projects/task/ft.yaml new file mode 100644 index 0000000..c93b8a7 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/ft.yaml @@ -0,0 +1,13 @@ +includes: projects/task/default.yaml +# all derived config will be run by fairseq-train. +task_type: sweep_small +fairseq: + optimization: + warmup_updates: 122 # copied from roberta glue: https://github.com/pytorch/fairseq/blob/master/examples/roberta/README.glue.md + checkpoint: + # save_interval_updates: 512 + # borrowed from Roberta script. + restore_file: runs/task/checkpoint_best.pt + reset_optimizer: True + reset_dataloader: True + reset_meters: True diff --git a/fairseq/examples/MMPT/projects/task/how2.yaml b/fairseq/examples/MMPT/projects/task/how2.yaml new file mode 100644 index 0000000..094dd04 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/how2.yaml @@ -0,0 +1,22 @@ +includes: projects/task/default.yaml +task_type: sweep_big +slurm_config: big +dataset: + meta_processor: ShardedHow2MetaProcessor + train_path: data/how2/how2_s3d_train.lst + val_path: data/how2/how2_s3d_val.lst + video_processor: ShardedVideoProcessor + vfeat_dir: data/feat/feat_how2_s3d_shard_small + text_processor: ShardedTextProcessor + tfeat_dir: data/feat/feat_how2_s3d_shard_small/raw_caption_dedup.bert-base-uncased. + aligner: FixedLenAligner +# disable direct running of this yaml +eval: + save_path: runs/task +fairseq: + checkpoint: + save_dir: runs/task + save_interval_updates: 1024 + keep_interval_updates: 2 + keep_last_epochs: 30 + diff --git a/fairseq/examples/MMPT/projects/task/test.yaml b/fairseq/examples/MMPT/projects/task/test.yaml new file mode 100644 index 0000000..0a98445 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test.yaml @@ -0,0 +1,13 @@ +# this yaml cannot be run alone: implement a test_${dataset}.yaml +slurm_config: big +task_type: local_predict +dataset: + split: test + video_processor: VideoProcessor + aligner: DSAligner + bert_name: bert-base-uncased +fairseq: + dataset: + batch_size: 256 + valid_subset: test + num_workers: 2 diff --git a/fairseq/examples/MMPT/projects/task/test_coin.yaml b/fairseq/examples/MMPT/projects/task/test_coin.yaml new file mode 100644 index 0000000..6d919df --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_coin.yaml @@ -0,0 +1,24 @@ +includes: projects/task/test.yaml +dataset: + split: test + test_path: data/coin/COIN.json + meta_processor: COINActionSegmentationMetaProcessor + vfeat_dir: data/feat/feat_coin_s3d + video_processor: VideoProcessor + text_processor: COINActionSegmentationTextProcessor + aligner: COINActionSegmentationAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 +model: + model_cls: MMFusionActionSegmentation + mm_encoder_cls: MMBertForTokenClassification +eval: + save_path: runs/task/coin/eval +fairseq: + dataset: + batch_size: 1 + common_eval: + path: runs/task/coin/checkpoint_best.pt +metric: COINActionSegmentationMetric +predictor: COINPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_coin_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_coin_videoclip.yaml new file mode 100644 index 0000000..b41f5bc --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_coin_videoclip.yaml @@ -0,0 +1,7 @@ +includes: projects/task/test_coin.yaml +model: + model_cls: MMFusionSeparateActionSegmentation + mm_encoder_cls: + video_encoder_cls: MMBertForTokenClassification + text_encoder_cls: BertModel # dummy, not used. + num_hidden_video_layers: 6 diff --git a/fairseq/examples/MMPT/projects/task/test_coin_zs.yaml b/fairseq/examples/MMPT/projects/task/test_coin_zs.yaml new file mode 100644 index 0000000..5d19b09 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_coin_zs.yaml @@ -0,0 +1,13 @@ +includes: projects/task/test_coin.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/task/coin_zs/eval +fairseq: + common_eval: + path: runs/task/checkpoint_best.pt +predictor: COINZSPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_crosstask.yaml b/fairseq/examples/MMPT/projects/task/test_crosstask.yaml new file mode 100644 index 0000000..6dd778e --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_crosstask.yaml @@ -0,0 +1,32 @@ +includes: projects/task/test.yaml +dataset: + split: test + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv # dummy + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + video_processor: CrossTaskVideoProcessor + text_processor: CrossTaskTextProcessor + aligner: CrossTaskAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint +eval: + save_path: runs/task/crosstask/eval +fairseq: + # read code and find what is the checkpoint arg. + dataset: + batch_size: 1 + common_eval: + path: runs/task/crosstask/checkpoint_best.pt +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_crosstask_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_crosstask_videoclip.yaml new file mode 100644 index 0000000..df12535 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_crosstask_videoclip.yaml @@ -0,0 +1,7 @@ +includes: projects/task/test_crosstask.yaml +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel # dummy, not used. + num_hidden_video_layers: 6 diff --git a/fairseq/examples/MMPT/projects/task/test_crosstask_zs.yaml b/fairseq/examples/MMPT/projects/task/test_crosstask_zs.yaml new file mode 100644 index 0000000..19386e4 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_crosstask_zs.yaml @@ -0,0 +1,32 @@ +includes: projects/task/test.yaml +dataset: + split: test + meta_processor: CrossTaskMetaProcessor + test_path: data/crosstask/crosstask_release/videos_val.csv + train_csv_path: data/crosstask/crosstask_release/videos.csv + val_path: data/crosstask/crosstask_release/videos_val.csv # dummy + val_csv_path: data/crosstask/crosstask_release/videos_val.csv + primary_path: data/crosstask/crosstask_release/tasks_primary.txt + related_path: data/crosstask/crosstask_release/tasks_related.txt + vfeat_dir: data/feat/feat_crosstask_s3d + annotation_path: data/crosstask/crosstask_release/annotations + n_train: 30 + video_processor: CrossTaskVideoProcessor + text_processor: CrossTaskTextProcessor + aligner: CrossTaskAligner + num_iso_layer: 12 + sliding_window: 16 + sliding_window_size: 32 +model: + model_cls: MMFusionActionLocalization + mm_encoder_cls: MMBertForJoint +eval: + save_path: runs/task/crosstask_zs/eval +fairseq: + # read code and find what is the checkpoint arg. + dataset: + batch_size: 1 + common_eval: + path: runs/task/checkpoint_best.pt # load the best from how2 on ACL submission: runs/task/checkpoint11.pt +metric: CrossTaskMetric +predictor: CrossTaskPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_crosstask_zs_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_crosstask_zs_videoclip.yaml new file mode 100644 index 0000000..7f01982 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_crosstask_zs_videoclip.yaml @@ -0,0 +1,7 @@ +includes: projects/task/test_crosstask_zs.yaml +model: + model_cls: MMFusionSeparateActionLocalization + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel # dummy, not used. + num_hidden_video_layers: 6 diff --git a/fairseq/examples/MMPT/projects/task/test_didemo_zs.yaml b/fairseq/examples/MMPT/projects/task/test_didemo_zs.yaml new file mode 100644 index 0000000..4b53dca --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_didemo_zs.yaml @@ -0,0 +1,23 @@ +includes: projects/task/test.yaml +dataset: + meta_processor: DiDeMoMetaProcessor + test_path: data/didemo/test_data.json + video_processor: VideoProcessor + vfeat_dir: data/feat/feat_didemo_s3d + text_processor: DiDeMoTextProcessor + aligner: DiDeMoAligner + num_iso_layer: 12 +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/task/didemo_zs/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/checkpoint_best.pt +metric: DiDeMoMetric +predictor: DiDeMoPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_vtt.yaml b/fairseq/examples/MMPT/projects/task/test_vtt.yaml new file mode 100644 index 0000000..2f809b3 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vtt.yaml @@ -0,0 +1,19 @@ +includes: projects/task/test.yaml +dataset: + meta_processor: MSRVTTMetaProcessor + test_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + video_processor: VideoProcessor + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +eval: + save_path: runs/task/vtt/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/vtt/checkpoint_last.pt +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_vtt_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_vtt_videoclip.yaml new file mode 100644 index 0000000..cb65643 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vtt_videoclip.yaml @@ -0,0 +1,8 @@ +includes: projects/task/test_vtt.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 + diff --git a/fairseq/examples/MMPT/projects/task/test_vtt_zs.yaml b/fairseq/examples/MMPT/projects/task/test_vtt_zs.yaml new file mode 100644 index 0000000..5734092 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vtt_zs.yaml @@ -0,0 +1,13 @@ +includes: projects/task/test_vtt.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/task/vtt_zs/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/checkpoint_best.pt diff --git a/fairseq/examples/MMPT/projects/task/test_vttqa.yaml b/fairseq/examples/MMPT/projects/task/test_vttqa.yaml new file mode 100644 index 0000000..ddf813c --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vttqa.yaml @@ -0,0 +1,20 @@ +includes: projects/task/test.yaml +dataset: + meta_processor: MSRVTTQAMetaProcessor + test_path: data/msrvtt-qa/MSR_MC_test.csv + video_processor: VideoProcessor + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTQATextProcessor + aligner: MSRVTTQAAligner + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +eval: + save_path: runs/task/vttqa/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/vttqa/checkpoint_last.pt +metric: QAMetric +predictor: QAPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_vttqa_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_vttqa_videoclip.yaml new file mode 100644 index 0000000..32a41e8 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vttqa_videoclip.yaml @@ -0,0 +1,8 @@ +includes: projects/task/test_vttqa.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 + diff --git a/fairseq/examples/MMPT/projects/task/test_vttqa_zs.yaml b/fairseq/examples/MMPT/projects/task/test_vttqa_zs.yaml new file mode 100644 index 0000000..5e0e29d --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_vttqa_zs.yaml @@ -0,0 +1,13 @@ +includes: projects/task/test_vttqa.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/task/vttqa_zs/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/checkpoint_best.pt diff --git a/fairseq/examples/MMPT/projects/task/test_youcook.yaml b/fairseq/examples/MMPT/projects/task/test_youcook.yaml new file mode 100644 index 0000000..092b680 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_youcook.yaml @@ -0,0 +1,22 @@ +includes: projects/task/test.yaml +dataset: + meta_processor: YoucookMetaProcessor + test_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: True + video_processor: YoucookVideoProcessor + vfeat_dir: data/feat/feat_youcook_s3d # /checkpoint/huxu/feat/youcook_vmz # /checkpoint/prarora/berniehuang/feat_youcook_vmz + text_processor: TextProcessor + aligner: DSAligner + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +eval: + save_path: runs/task/youcook/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/youcook/checkpoint_last.pt +metric: RetrievalMetric +predictor: RetrievalPredictor diff --git a/fairseq/examples/MMPT/projects/task/test_youcook_videoclip.yaml b/fairseq/examples/MMPT/projects/task/test_youcook_videoclip.yaml new file mode 100644 index 0000000..b85ea43 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_youcook_videoclip.yaml @@ -0,0 +1,8 @@ +includes: projects/task/test_youcook.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 + diff --git a/fairseq/examples/MMPT/projects/task/test_youcook_zs.yaml b/fairseq/examples/MMPT/projects/task/test_youcook_zs.yaml new file mode 100644 index 0000000..0a5875b --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_youcook_zs.yaml @@ -0,0 +1,13 @@ +includes: projects/task/test_youcook.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +eval: + save_path: runs/task/youcook_zs/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/checkpoint_best.pt diff --git a/fairseq/examples/MMPT/projects/task/test_youcookcap.yaml b/fairseq/examples/MMPT/projects/task/test_youcookcap.yaml new file mode 100644 index 0000000..24f6518 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/test_youcookcap.yaml @@ -0,0 +1,23 @@ +includes: projects/task/test.yaml +dataset: + meta_processor: YoucookNLGMetaProcessor + test_path: data/youcook/val_list.txt + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + video_processor: YoucookVideoProcessor + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: NLGTextProcessor + aligner: DSNLGAligner +model: + model_cls: MMFusionNLG + mm_encoder_cls: MMBertForNLG + max_decode_length: 24 +eval: + save_path: runs/task/youcookcap/eval +fairseq: + # read code and find what is the checkpoint arg. + common_eval: + path: runs/task/youcookcap/checkpoint_best.pt +metric: NLGMetric +predictor: NLGPredictor +gen_param: + num_beams: 5 diff --git a/fairseq/examples/MMPT/projects/task/vtt.yaml b/fairseq/examples/MMPT/projects/task/vtt.yaml new file mode 100644 index 0000000..395e2ee --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/vtt.yaml @@ -0,0 +1,25 @@ +includes: projects/task/ft.yaml +dataset: + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + jsfusion_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + full_test_path: data/msrvtt/MSRVTT_FULL_test.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +loss: + loss_cls: T2VContraLoss +fairseq: + dataset: + batch_size: 256 + optimization: + max_epoch: 10 + checkpoint: + save_dir: runs/task/vtt diff --git a/fairseq/examples/MMPT/projects/task/vtt_videoclip.yaml b/fairseq/examples/MMPT/projects/task/vtt_videoclip.yaml new file mode 100644 index 0000000..a9892ca --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/vtt_videoclip.yaml @@ -0,0 +1,12 @@ +includes: projects/task/vtt.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 +fairseq: + dataset: + batch_size: 224 +# model_cls: MMFusionShare +# mm_encoder_cls: MMBertForEncoder diff --git a/fairseq/examples/MMPT/projects/task/vttqa.yaml b/fairseq/examples/MMPT/projects/task/vttqa.yaml new file mode 100644 index 0000000..56d578e --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/vttqa.yaml @@ -0,0 +1,23 @@ +includes: projects/task/ft.yaml +dataset: + meta_processor: MSRVTTMetaProcessor + train_path: data/msrvtt/MSRVTT_train.csv + dup: 20 + val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv + vfeat_dir: data/feat/feat_vtt_s3d + text_processor: MSRVTTTextProcessor + json_path: data/msrvtt/MSRVTT_data.json + aligner: DSAligner + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +loss: + loss_cls: V2TContraLoss +fairseq: + dataset: + batch_size: 128 + optimization: + max_epoch: 5 + checkpoint: + save_dir: runs/task/vttqa diff --git a/fairseq/examples/MMPT/projects/task/vttqa_videoclip.yaml b/fairseq/examples/MMPT/projects/task/vttqa_videoclip.yaml new file mode 100644 index 0000000..2d484ca --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/vttqa_videoclip.yaml @@ -0,0 +1,10 @@ +includes: projects/task/vttqa.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 + +# model_cls: MMFusionShare +# mm_encoder_cls: MMBertForEncoder diff --git a/fairseq/examples/MMPT/projects/task/youcook.yaml b/fairseq/examples/MMPT/projects/task/youcook.yaml new file mode 100644 index 0000000..e0cd841 --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/youcook.yaml @@ -0,0 +1,25 @@ +includes: projects/task/ft.yaml +dataset: + meta_processor: YoucookMetaProcessor + train_path: data/youcook/youcook_train.pkl + val_path: data/youcook/youcook_val.pkl + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + use_annotation_text: True + video_processor: YoucookVideoProcessor + vfeat_dir: data/feat/feat_youcook_s3d # /checkpoint/huxu/feat/youcook_vmz # /checkpoint/prarora/berniehuang/feat_youcook_vmz + text_processor: TextProcessor + aligner: DSAligner + num_iso_layer: 12 +model: + model_cls: MMFusionJoint + mm_encoder_cls: MMBertForJoint +loss: + loss_cls: T2VContraLoss +fairseq: + dataset: + batch_size: 128 + optimization: + max_epoch: 10 + checkpoint: + save_dir: runs/task/youcook + diff --git a/fairseq/examples/MMPT/projects/task/youcook_videoclip.yaml b/fairseq/examples/MMPT/projects/task/youcook_videoclip.yaml new file mode 100644 index 0000000..e3e901c --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/youcook_videoclip.yaml @@ -0,0 +1,9 @@ +includes: projects/task/youcook.yaml +model: + model_cls: MMFusionSeparate + mm_encoder_cls: + video_encoder_cls: MMBertForEncoder + text_encoder_cls: BertModel + num_hidden_video_layers: 6 + # model_cls: MMFusionShare + # mm_encoder_cls: MMBertForEncoder diff --git a/fairseq/examples/MMPT/projects/task/youcookcap.yaml b/fairseq/examples/MMPT/projects/task/youcookcap.yaml new file mode 100644 index 0000000..047735f --- /dev/null +++ b/fairseq/examples/MMPT/projects/task/youcookcap.yaml @@ -0,0 +1,23 @@ +# finetuning for youcook captioning. +includes: projects/task/ft.yaml +dataset: + meta_processor: YoucookNLGMetaProcessor + train_path: data/youcook/train_list.txt + val_path: data/youcook/val_list.txt + trainval_annotation: data/youcook/youcookii_annotations_trainval.json + video_processor: YoucookVideoProcessor + vfeat_dir: data/feat/feat_youcook_s3d + text_processor: NLGTextProcessor + aligner: DSNLGAligner +model: + model_cls: MMFusionNLG + mm_encoder_cls: MMBertForNLG +loss: + loss_cls: NLGLoss +fairseq: + dataset: + batch_size: 128 + optimization: + max_epoch: 10 + checkpoint: + save_dir: runs/task/youcookcap diff --git a/fairseq/examples/MMPT/scripts/text_token_extractor/configs/bert-base-uncased.yaml b/fairseq/examples/MMPT/scripts/text_token_extractor/configs/bert-base-uncased.yaml new file mode 100644 index 0000000..473dd9b --- /dev/null +++ b/fairseq/examples/MMPT/scripts/text_token_extractor/configs/bert-base-uncased.yaml @@ -0,0 +1,5 @@ +dataset: + bert_name: bert-base-uncased + caption_pkl_path: data/how2/raw_caption_dedup.pkl + use_fast: true + target_dir: data/feat/feat_how2_s3d_shard_small diff --git a/fairseq/examples/MMPT/scripts/text_token_extractor/pretokenization.py b/fairseq/examples/MMPT/scripts/text_token_extractor/pretokenization.py new file mode 100644 index 0000000..29ae5dc --- /dev/null +++ b/fairseq/examples/MMPT/scripts/text_token_extractor/pretokenization.py @@ -0,0 +1,106 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import pickle +import os +import argparse +import numpy as np + +from torch.utils.data import Dataset, DataLoader +from mmpt.processors import PKLJSONStrTextProcessor +from mmpt.utils import ShardedTensor, recursive_config + + +class TokenizerDataset(Dataset): + def __init__(self, config): + self.text_processor = PKLJSONStrTextProcessor(config) + self.video_ids = list(self.text_processor.data.keys()) + + def __getitem__(self, idx): + video_id = self.video_ids[idx] + return video_id, self.text_processor(video_id) + + def __len__(self): + return len(self.video_ids) + + +def numpify(shard_idx, video_ids, captions, target_dir, split, prefix, max_cap_len=32): + startends = [] + caps_ids = [] + for video_id in video_ids: + caption = captions[video_id] + startend = [] + cap_ids = [] + for start, end, cap in zip( + caption["start"], caption["end"], caption["cap"]): + startend.append(np.array([start, end]).astype("float32")) + cap_id = np.full((max_cap_len,), -1, dtype=np.int32) + cap = cap[:max_cap_len] + cap_id[:len(cap)] = cap + cap_ids.append(cap_id) + startends.append(np.stack(startend)) + caps_ids.append(np.stack(cap_ids)) + + startends = ShardedTensor.from_list(startends) + target_path = os.path.join( + target_dir, + prefix + split + "_" + str(shard_idx) + ) + print("save to", target_path) + startends.save(target_path + ".startends") + caps_ids = ShardedTensor.from_list(caps_ids) + caps_ids.save(target_path + ".caps_ids") + + +def sharding(config, out_file): + with open(out_file, "rb") as fr: + captions = pickle.load(fr) + target_dir = config.target_dir + prefix = os.path.basename( + os.path.splitext(config.caption_pkl_path)[0] + ) + "." + config.bert_name + "." + for split in ["train", "val"]: + target_path = os.path.join(target_dir, split + "_meta") + with open(target_path + ".pkl", "rb") as fr: + meta = pickle.load(fr) + print("load meta", target_path, len(meta)) + for shard_id in meta: + numpify( + shard_id, meta[shard_id], captions, + target_dir, split, prefix + ) + + +def tokenize(config, out_file): + def collator(samples): + return samples + dataset = TokenizerDataset(config) + data = {} + for idx, batch in enumerate( + DataLoader(dataset, collate_fn=collator, num_workers=16)): + for video_id, caption in batch: + data[video_id] = caption + if idx % 5000 == 0: + print(idx) + with open(out_file, "wb") as fw: + pickle.dump(data, fw, pickle.HIGHEST_PROTOCOL) + + +def main(args): + config = recursive_config(args.config).dataset + + out_file = os.path.splitext(config.caption_pkl_path)[0] \ + + "." + config.bert_name + ".pkl" + if not os.path.isfile(out_file): + tokenize(config, out_file) + sharding(config, out_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="pretokenize (raw_)caption.json into pkl.") + parser.add_argument('config', type=str) + args = parser.parse_args() + main(args) diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/extract.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/extract.py new file mode 100644 index 0000000..b5ee7b7 --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/extract.py @@ -0,0 +1,157 @@ +# Copyright Howto100M authors. +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch as th +import torch.nn.functional as F +import math +import numpy as np +import argparse + +from torch.utils.data import DataLoader +from model import get_model +from preprocessing import Preprocessing +from random_sequence_shuffler import RandomSequenceSampler + +from tqdm import tqdm +from pathbuilder import PathBuilder +from videoreader import VideoLoader + + +parser = argparse.ArgumentParser(description='Easy video feature extractor') + +parser.add_argument('--vdir', type=str) +parser.add_argument('--fdir', type=str) +parser.add_argument('--hflip', type=int, default=0) + +parser.add_argument('--batch_size', type=int, default=64, + help='batch size') +parser.add_argument('--type', type=str, default='2d', + help='CNN type') +parser.add_argument('--half_precision', type=int, default=0, + help='output half precision float') +parser.add_argument('--num_decoding_thread', type=int, default=4, + help='Num parallel thread for video decoding') +parser.add_argument('--l2_normalize', type=int, default=1, + help='l2 normalize feature') +parser.add_argument('--resnext101_model_path', type=str, default='model/resnext101.pth', + help='Resnext model path') +parser.add_argument('--vmz_model_path', type=str, default='model/r2plus1d_34_clip8_ig65m_from_scratch-9bae36ae.pth', + help='vmz model path') + +args = parser.parse_args() + + +# TODO: refactor all args into config. (current code is from different people.) +CONFIGS = { + "2d": { + "fps": 1, + "size": 224, + "centercrop": False, + "shards": 0, + }, + "3d": { + "fps": 24, + "size": 112, + "centercrop": True, + "shards": 0, + }, + "s3d": { + "fps": 30, + "size": 224, + "centercrop": True, + "shards": 0, + }, + "vmz": { + "fps": 24, + "size": 112, + "centercrop": True, + "shards": 0, + }, + "vae": { + "fps": 2, + "size": 256, + "centercrop": True, + "shards": 100, + } +} + +config = CONFIGS[args.type] + + +video_dirs = args.vdir +feature_dir = args.fdir + +video_dict = PathBuilder.build(video_dirs, feature_dir, ".npy", config["shards"]) + +dataset = VideoLoader( + video_dict=video_dict, + framerate=config["fps"], + size=config["size"], + centercrop=config["centercrop"], + hflip=args.hflip +) +n_dataset = len(dataset) +sampler = RandomSequenceSampler(n_dataset, 10) +loader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + num_workers=args.num_decoding_thread, + sampler=sampler if n_dataset > 10 else None, +) +preprocess = Preprocessing(args.type) +model = get_model(args) + +with th.no_grad(): + for k, data in tqdm(enumerate(loader), total=loader.__len__(), ascii=True): + input_file = data['input'][0] + output_file = data['output'][0] + if len(data['video'].shape) > 3: + video = data['video'].squeeze() + if len(video.shape) == 4: + video = preprocess(video) + n_chunk = len(video) + if args.type == 'vmz': + n_chunk = math.ceil(n_chunk/float(3)) + features = th.cuda.FloatTensor(n_chunk, 512).fill_(0) + elif args.type == 's3d': + features = th.cuda.FloatTensor(n_chunk, 512).fill_(0) + elif args.type == "vae": + features = th.cuda.LongTensor(n_chunk, 1024).fill_(0) + else: + features = th.cuda.FloatTensor(n_chunk, 2048).fill_(0) + n_iter = int(math.ceil(n_chunk / float(args.batch_size))) + for i in range(n_iter): + factor = 1 + if args.type == 'vmz': + factor = 3 + min_ind = factor * i * args.batch_size + max_ind = factor * (i + 1) * args.batch_size + video_batch = video[min_ind:max_ind:factor].cuda() + if args.type == '2d': + batch_features = model(video_batch) # (51, 487), (51, 512) + elif args.type == 's3d': + batch_features = model(video_batch) + batch_features = batch_features['video_embedding'] + elif args.type == "vae": + # image_code. + batch_features = model(video_batch) + else: + batch_pred, batch_features = model(video_batch) # (51, 487), (51, 512) + if args.l2_normalize: + batch_features = F.normalize(batch_features, dim=1) + features[i*args.batch_size:(i+1)*args.batch_size] = batch_features + features = features.cpu().numpy() + if args.half_precision: + if args.type == "vae": + features = features.astype(np.int16) + else: + features = features.astype('float16') + else: + if args.type == "vae": + features = features.astype(np.int32) + else: + features = features.astype('float32') + np.save(output_file, features) + else: + print('Video {} error.'.format(input_file)) diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/how2/s3d.sh b/fairseq/examples/MMPT/scripts/video_feature_extractor/how2/s3d.sh new file mode 100644 index 0000000..90102c8 --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/how2/s3d.sh @@ -0,0 +1,8 @@ +#!/bin/bash + + +python scripts/video_feature_extractor/extract.py \ + --vdir \ + --fdir data/feat/feat_how2_s3d \ + --type=s3d --num_decoding_thread=4 \ + --batch_size 32 --half_precision 1 diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/model.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/model.py new file mode 100644 index 0000000..ac266e8 --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/model.py @@ -0,0 +1,58 @@ +# Copyright (c) Howto100M authors and Facebook, Inc. All Rights Reserved + +import torch as th + +from torch import nn + + +class GlobalAvgPool(nn.Module): + def __init__(self): + super(GlobalAvgPool, self).__init__() + + def forward(self, x): + return th.mean(x, dim=[-2, -1]) + + +def get_model(args): + assert args.type in ['2d', '3d', 'vmz', 's3d', 'vae'] + if args.type == '2d': + print('Loading 2D-ResNet-152 ...') + import torchvision.models as models + model = models.resnet152(pretrained=True) + model = nn.Sequential(*list(model.children())[:-2], GlobalAvgPool()) + model = model.cuda() + elif args.type == 'vmz': + print('Loading VMZ ...') + from vmz34 import r2plus1d_34 + model = r2plus1d_34(pretrained_path=args.vmz_model_path, pretrained_num_classes=487) + model = model.cuda() + elif args.type == 's3d': + # we use one copy of s3d instead of dup another one for feature extraction. + from mmpt.processors.models.s3dg import S3D + model = S3D('pretrained_models/s3d_dict.npy', 512) + model.load_state_dict(th.load('pretrained_models/s3d_howto100m.pth')) + model = model.cuda() + + elif args.type == '3d': + print('Loading 3D-ResneXt-101 ...') + from videocnn.models import resnext + model = resnext.resnet101( + num_classes=400, + shortcut_type='B', + cardinality=32, + sample_size=112, + sample_duration=16, + last_fc=False) + model = model.cuda() + model_data = th.load(args.resnext101_model_path) + model.load_state_dict(model_data) + elif args.type == 'vae': + from openaivae import OpenAIParallelDiscreteVAE + model = OpenAIParallelDiscreteVAE() + model = model.cuda() + else: + raise ValueError("model not supported yet.") + + model.eval() + print('loaded') + return model diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/pathbuilder.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/pathbuilder.py new file mode 100644 index 0000000..2392d6d --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/pathbuilder.py @@ -0,0 +1,89 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import urllib.parse +import json +import pandas as pd + +from tqdm import tqdm + + +# TODO: extending to other datasets. +supported_formats = {} + + +class PathBuilder(object): + @classmethod + def build(cls, video_dirs, feature_dir, ext, shards=0, split=None): + meta_fn = os.path.join(feature_dir, "meta_plan.json") + os.makedirs(feature_dir, exist_ok=True) + if os.path.isfile(meta_fn): + with open(meta_fn) as fr: + meta = json.load(fr) + return meta + print("searching videos...") + + video_id_to_path = {} + for video_dir in video_dirs.split(","): + # TODO: add supports of recursive listdir. + if video_dir in supported_formats: + supported_formats[video_dir].load(video_dir, video_id_to_path) + else: + for idx, fn in enumerate(tqdm(os.listdir(video_dir))): + video_fn = os.path.join(video_dir, fn) + if os.path.isfile(video_fn): + video_id = os.path.splitext(fn)[0] + video_id_to_path[video_id] = video_fn + elif os.path.isdir(video_fn): + # shards of folders. + shard_dir = video_fn + for idx, fn in enumerate(os.listdir(shard_dir)): + video_fn = os.path.join(shard_dir, fn) + if os.path.isfile(video_fn): + video_id = os.path.splitext(fn)[0] + video_id_to_path[video_id] = video_fn + + video_path, feature_path = [], [] + valid_ext = set() + for idx, video_id in enumerate(video_id_to_path): + video_path.append(video_id_to_path[video_id]) + if ext is None: + # use original file ext for format compatibility. + video_id_to_path[video_id] + path = urllib.parse.urlparse(video_id_to_path[video_id]).path + ext = os.path.splitext(path)[1] + if ext not in valid_ext: + valid_ext.add(ext) + print("adding", ext) + if shards: + shard_id = str(idx % shards) + feature_fn = os.path.join( + feature_dir, shard_id, video_id + ext) + else: + feature_fn = os.path.join( + feature_dir, video_id + ext) + feature_path.append(feature_fn) + + print("targeting", len(feature_path), "videos") + meta = { + "video_path": video_path, "feature_path": feature_path} + with open(meta_fn, "w") as fw: + json.dump(meta, fw) + + if split is not None: + splits = split.split("/") + assert len(splits) == 2 + cur, total = int(splits[0]), int(splits[1]) + assert cur < total + import math + chunk = math.ceil(len(meta["video_path"]) / total) + start = cur * chunk + end = (cur + 1) * chunk + meta = { + "video_path": meta["video_path"][start:end], + "feature_path": meta["feature_path"][start:end] + } + + return meta diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/preprocessing.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/preprocessing.py new file mode 100644 index 0000000..fa0cec3 --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/preprocessing.py @@ -0,0 +1,57 @@ +# Copyright Howto100m authors. +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch as th + +class Normalize(object): + + def __init__(self, mean, std): + self.mean = th.FloatTensor(mean).view(1, 3, 1, 1) + self.std = th.FloatTensor(std).view(1, 3, 1, 1) + + def __call__(self, tensor): + tensor = (tensor - self.mean) / (self.std + 1e-8) + return tensor + +class Preprocessing(object): + + def __init__(self, type): + self.type = type + if type == '2d': + self.norm = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + elif type == '3d': + self.norm = Normalize(mean=[110.6, 103.2, 96.3], std=[1.0, 1.0, 1.0]) + elif type == 'vmz': + self.norm = Normalize(mean=[110.201, 100.64, 95.997], std=[58.1489, 56.4701, 55.3324]) + + def _zero_pad(self, tensor, size): + n = size - len(tensor) % size + if n == size: + return tensor + else: + z = th.zeros(n, tensor.shape[1], tensor.shape[2], tensor.shape[3]) + return th.cat((tensor, z), 0) + + def __call__(self, tensor): + if self.type == '2d': + tensor = tensor / 255.0 + tensor = self.norm(tensor) + elif self.type == 'vmz': + #tensor = self._zero_pad(tensor, 8) + tensor = self._zero_pad(tensor, 10) + tensor = self.norm(tensor) + #tensor = tensor.view(-1, 8, 3, 112, 112) + tensor = tensor.view(-1, 10, 3, 112, 112) + tensor = tensor.transpose(1, 2) + elif self.type == '3d': + tensor = self._zero_pad(tensor, 16) + tensor = self.norm(tensor) + tensor = tensor.view(-1, 16, 3, 112, 112) + tensor = tensor.transpose(1, 2) + elif self.type == 's3d': + tensor = tensor / 255.0 + tensor = self._zero_pad(tensor, 30) + tensor = tensor.view(-1, 30, 3, 224, 224) # N x 30 x 3 x H x W + tensor = tensor.transpose(1, 2) # N x 3 x 30 x H x W + # for vae do nothing + return tensor diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/random_sequence_shuffler.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/random_sequence_shuffler.py new file mode 100644 index 0000000..1f3e4ac --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/random_sequence_shuffler.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. All Rights Reserved + +import numpy as np + +from torch.utils.data.sampler import Sampler + + +class RandomSequenceSampler(Sampler): + + def __init__(self, n_sample, seq_len): + self.n_sample = n_sample + self.seq_len = seq_len + + def _pad_ind(self, ind): + zeros = np.zeros(self.seq_len - self.n_sample % self.seq_len) + ind = np.concatenate((ind, zeros)) + return ind + + def __iter__(self): + idx = np.arange(self.n_sample) + if self.n_sample % self.seq_len != 0: + idx = self._pad_ind(idx) + idx = np.reshape(idx, (-1, self.seq_len)) + np.random.shuffle(idx) + idx = np.reshape(idx, (-1)) + return iter(idx.astype(int)) + + def __len__(self): + return self.n_sample + (self.seq_len - self.n_sample % self.seq_len) diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/shard_feature.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/shard_feature.py new file mode 100644 index 0000000..f75e1df --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/shard_feature.py @@ -0,0 +1,64 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import numpy as np +import os +import pickle + +from mmpt.utils import ShardedTensor + + +class Shard(object): + def __init__( + self, + vfeat_dir, + tfeat_dir, + target_dir, + file_paths, + shard_size=4096 + ): + self.vfeat_dir = vfeat_dir + self.tfeat_dir = tfeat_dir + self.target_dir = target_dir + self.video_ids = {} + for split, file_path in zip(["train", "val"], file_paths): + with open(file_path) as fr: + self.video_ids[split] = [ + line.strip() for line in fr.readlines()] + self.shard_size = shard_size + + def __call__(self, split="train"): + for split in ["train", "val"]: + meta = {} + for shard_idx, shard_offset in enumerate( + range(0, len(self.video_ids[split]), self.shard_size) + ): + print(shard_idx) + meta_shard = [] + video_shard = [] + for video_id in self.video_ids[split][shard_offset:shard_offset+self.shard_size]: + meta_shard.append(video_id) + npy_file = os.path.join(self.vfeat_dir, video_id + ".npy") + video_shard.append(np.load(npy_file)) + + meta[shard_idx] = meta_shard + video_shard = ShardedTensor.from_list(video_shard) + target_path = os.path.join( + self.target_dir, split + "_" + str(shard_idx)) + video_shard.save(target_path) + + target_path = os.path.join(self.target_dir, split + "_meta") + with open(target_path + ".pkl", "wb") as fw: + pickle.dump(meta, fw, pickle.HIGHEST_PROTOCOL) + + +if __name__ == "__main__": + shard = Shard( + "data/feat/feat_how2_s3d", + "data/how2/raw_caption_dedup.bert-base-uncased", + "data/feat/feat_how2_s3d_shard_small", + ["data/how2/how2_s3d_train.lst", "data/how2/how2_s3d_val.lst"] + ) + + shard() diff --git a/fairseq/examples/MMPT/scripts/video_feature_extractor/videoreader.py b/fairseq/examples/MMPT/scripts/video_feature_extractor/videoreader.py new file mode 100644 index 0000000..429e05f --- /dev/null +++ b/fairseq/examples/MMPT/scripts/video_feature_extractor/videoreader.py @@ -0,0 +1,242 @@ +# Copyright Howto100M authors. +# Copyright (c) Facebook, Inc. All Rights Reserved + +import torch as th +import pandas as pd +import os +import numpy as np +import ffmpeg +import random + +from torch.utils.data import Dataset + + +class VideoLoader(Dataset): + """modified from how2's video_feature_extractor.""" + def __init__( + self, + csv=None, + video_dict=None, + framerate=1, + size=112, + centercrop=False, + hflip=False, + **kwargs + ): + if csv is None and video_dict is None: + raise ValueError("csv and video_dict cannot be both None.") + if csv is not None: + self.csv = pd.read_csv(csv) + if video_dict is not None: + self.csv = pd.DataFrame.from_dict(video_dict) + + self.centercrop = centercrop + self.size = size + self.framerate = framerate + self.hflip = hflip + + def __len__(self): + return len(self.csv) + + def _get_video_dim(self, video_path): + probe = ffmpeg.probe(video_path) + video_stream = next((stream for stream in probe['streams'] + if stream['codec_type'] == 'video'), None) + width = int(video_stream['width']) + height = int(video_stream['height']) + return height, width + + def _get_video_info(self, video_path): + probe = ffmpeg.probe(video_path) + video_stream = next((stream for stream in probe['streams'] + if stream['codec_type'] == 'video'), None) + return video_stream + + def _get_output_dim(self, h, w): + if isinstance(self.size, tuple) and len(self.size) == 2: + return self.size + elif h >= w: + return int(h * self.size / w), self.size + else: + return self.size, int(w * self.size / h) + + def __getitem__(self, idx): + video_path = self.csv['video_path'].values[idx] + output_file = self.csv['feature_path'].values[idx] + return self._decode(output_file, video_path) + + def _decode(self, output_file, video_path): + if not(os.path.isfile(output_file)) and os.path.isfile(video_path): + try: + h, w = self._get_video_dim(video_path) + except Exception: + print('ffprobe failed at: {}'.format(video_path)) + return {'video': th.zeros(1), 'input': video_path, + 'output': output_file} + try: + os.makedirs(os.path.dirname(output_file), exist_ok=True) + height, width = self._get_output_dim(h, w) + + cmd = ( + ffmpeg + .input(video_path) + .filter('fps', fps=self.framerate) + .filter('scale', width, height) + ) + if self.hflip: + cmd = cmd.filter('hflip') + + if self.centercrop: + x = int((width - self.size) / 2.0) + y = int((height - self.size) / 2.0) + cmd = cmd.crop(x, y, self.size, self.size) + video = self._run(cmd, output_file) + except Exception: + video = th.zeros(1) + else: + video = th.zeros(1) + + return {'video': video, 'input': video_path, 'output': output_file} + + def _run(self, cmd, output_file): + out, _ = ( + cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') + .run(capture_stdout=True, quiet=True) + ) + if self.centercrop and isinstance(self.size, int): + height, width = self.size, self.size + video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3]) + video = th.from_numpy(video.astype('float32')) + return video.permute(0, 3, 1, 2) + + +class VideoVerifier(VideoLoader): + def __getitem__(self, idx): + video_path = self.csv['video_path'].values[idx] + try: + return self._get_video_info(video_path) + except Exception: + # print('ffprobe failed at: {}'.format(video_path)) + return None + + +class VideoCompressor(VideoLoader): + def __init__( + self, + csv=None, + video_dict=None, + framerate=1, + size=112, + centercrop=False, + hflip=False, + crf=32, + **kwargs + ): + super().__init__( + csv, + video_dict, + framerate, + size, + centercrop, + hflip + ) + self.crf = crf + + def _run(self, cmd, output_file): + out, _ = ( + cmd.output(filename=output_file, crf=self.crf) + .run(quiet=True) + ) + video = None + return video + + +class VideoDownloader(VideoCompressor): + """download""" + def __getitem__(self, idx): + video_path = self.csv['video_path'].values[idx] + output_file = self.csv['feature_path'].values[idx] + if not(os.path.isfile(output_file)): + os.makedirs(os.path.dirname(output_file), exist_ok=True) + cmd = "wget -O" + output_file + " " + video_path + # import subprocess + # subprocess.check_output( + # cmd, + # stderr=subprocess.STDOUT, shell=True) + os.system(cmd) + return {'video': None, 'input': video_path, 'output': output_file} + + +class AvKeyframeVideoCompressor(VideoLoader): + """extract keyframes from a video and save it as jpg. + TODO: consider to merge with `CodecProcessor`. + """ + def __init__( + self, + csv=None, + video_dict=None, + framerate=1, + size=112, + centercrop=False, + max_num_frames=5, + **kwargs + ): + super().__init__(csv, video_dict, framerate, size, centercrop) + self.max_num_frames = max_num_frames + + def _get_video_dim(self, video_fn): + """decord cannot probe the size of a video, we use pyav instead.""" + import av + with av.open(video_fn) as container: + height = container.streams.video[0].codec_context.height + width = container.streams.video[0].codec_context.width + return height, width + + def _get_output_dim(self, height, width): + """ + keep the shorter side be `self.size`, strech the other. + """ + if height >= width: + return int(height * self.size / width), self.size + else: + return self.size, int(width * self.size / height) + + def __getitem__(self, idx): + import av + video_path = self.csv['video_path'].values[idx] + output_file = self.csv['feature_path'].values[idx] + if not(os.path.isdir(output_file)) and os.path.isfile(video_path): + try: + h, w = self._get_video_dim(video_path) + except Exception: + print('probe failed at: {}'.format(video_path)) + return {'video': th.zeros(1), 'input': video_path, + 'output': output_file} + + try: + height, width = self._get_output_dim(h, w) + + # new for av. + with av.open(video_path) as container: + container.streams.video[0].thread_type = "AUTO" + container.streams.video[0].codec_context.height = height + container.streams.video[0].codec_context.width = width + if self.framerate == 0: # keyframe. + container.streams.video[0].codec_context.skip_frame = 'NONKEY' + frames = [] + for frame in container.decode(video=0): + frames.append(frame) + frames = random.sample(frames, self.max_num_frames) + + os.makedirs(output_file, exist_ok=True) + for frame in frames: + frame.to_image().save( + os.path.join( + output_file, + "%04d.jpg" % frame.index)) + except Exception: + print('extract failed at: {}'.format(video_path)) + return {'video': th.zeros(1), 'input': video_path, + 'output': output_file} + video = th.zeros(1) + return {'video': video, 'input': video_path, 'output': output_file} diff --git a/fairseq/examples/MMPT/setup.py b/fairseq/examples/MMPT/setup.py new file mode 100644 index 0000000..a9a8229 --- /dev/null +++ b/fairseq/examples/MMPT/setup.py @@ -0,0 +1,24 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="mmpt", + version="0.0.1", + author="Hu Xu, Po-yao Huang", + author_email="huxu@fb.com", + description="A package for multimodal pretraining.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/pytorch/fairseq/examples/MMPT", + packages=setuptools.find_packages(), + install_requires=[ + ], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: CC-BY-NC", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', +) diff --git a/fairseq/examples/__init__.py b/fairseq/examples/__init__.py new file mode 100644 index 0000000..44bb24a --- /dev/null +++ b/fairseq/examples/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +try: + from fairseq.version import __version__ # noqa +except ImportError: + pass diff --git a/fairseq/examples/adaptive_span/README.md b/fairseq/examples/adaptive_span/README.md new file mode 100644 index 0000000..d5224fb --- /dev/null +++ b/fairseq/examples/adaptive_span/README.md @@ -0,0 +1,90 @@ +# Adaptive Span + +Adaptive Span is a novel self-attention mechanism that can learn its optimal +attention span. This allows us to extend significantly the maximum context size +used in Transformer, while maintaining control over their memory footprint +and computational time. It uses the Truncated BPTT technique for training, +as in [transformerXL](https://github.com/pytorch/fairseq/blob/main/examples/truncated_bptt/README.md). + +Adaptive Span was introduced by paper: +[Adaptive Attention Span in Transformers](https://arxiv.org/abs/1905.07799), +which achieved state-of-the-art language modeling results at the time of publication. + +We manage to reproduce their result in fairseq and keep most of the +[original implementation](https://github.com/facebookresearch/adaptive-span) untouched. +You can refer to the their sweep file as well if any combination of hyperparameter is not clear. + +##### 0. Setup + +First you need to process the Enwik8 dataset, we use the pre-tokenized dataset +from [adaptive span paper](https://github.com/facebookresearch/adaptive-span/blob/master/get_data.sh). +You can download the dataset, and then run: +```bash +fairseq-preprocess --only-source --trainpref ~/data/enwik8/train.txt \ + --validpref ~/data/enwik8/valid.txt --testpref ~/data/enwik8/test.txt \ + --destdir ~/data/enwik8/data-bin/ --joined-dictionary --workers 20 +``` + +##### 1. Train a Adaptive Span model on Enwik8 + +We will train a 12-layer Adaptive Span model following the [hyperparameters +used in the original +paper](https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8.sh). + +The following command assumes 4 GPUs, so that the total batch size is 64 +sequences (4 x 16). Training should take 2-3 days on 4 V100 GPUs: +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \ + --user-dir examples/adaptive_span \ + --data ~/data/enwik8/data-bin/ \ + --fp16 --fp16-no-flatten-grads --max-update 600000 \ + --task truncated_bptt_lm --tokens-per-sample 512 --arch adaptive_span \ + --n-layer 12 --d-model 512 --n-head 8 --d-inner 2048 --dropout 0.3 \ + --attn-span 8192 --optimizer adagrad_with_grad_clip --adagrad-clip 0.03 \ + --validate-interval-updates 1000 \ + --lr-scheduler fixed --warmup-updates 32000 --batch-size-valid 32 \ + --lr 0.07 --criterion adaptive_span_loss --batch-size 16 --update-freq 1 \ + --seed 2 --log-format json --log-interval 25 --aux-loss-scaler 5e-07 +``` +This should land around 1.05 on validation, 1.03 on test. You can lower the +--aux-loss-scaler for better performance (longer span). It gives ~0.03 bpc +improvement to the transformerXL baseline here. +If training on a single GPU, set `--update-freq=4` to accumulate 4x gradients +and simulate training on 4 GPUs. +You can also reproduce the transformerXL result on enwik8 using this code base. +It should land around 1.06 on test,matching the [original paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_enwik8_base.sh). +You can try by +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \ + --user-dir examples/truncated_bptt \ + ~/data/enwik8/data-bin/ \ + --task truncated_bptt_lm --fp16 --max-update 400000 \ + --tokens-per-sample 512 --arch transformer_xl --n-layer 12 \ + --d-model 512 --n-head 8 --d-head 64 --d-inner 2048 --dropout 0.1 \ + --dropatt 0.0 --mem-len 512 --optimizer adam --clip-norm 0.25 \ + --lr-scheduler cosine --warmup-updates 0 \ + --lr 0.0 --lr 0.00025 --batch-size 15 \ + --update-freq 1 --seed 2 --log-format json --log-interval 25 \ + --fp16 +``` + +##### 2. Evaluate +For Adaptive Span: +```bash +fairseq-eval-lm ~/data/enwik8/data-bin/ --path model/checkpoint_best.pt \ + --user-dir examples/adaptive_span \ + --task truncated_bptt_lm --batch-size 8 --tokens-per-sample 512 --gen-subset test +``` +For Transformer-XL evaluation: +```bash +fairseq-eval-lm ~/data/enwik8/data-bin/ --path model/checkpoint_best.pt \ + --user-dir examples/truncated_bptt/ --task truncated_bptt_lm --batch-size 8 \ + --tokens-per-sample 80 \ + --model-overrides '{"mem_len":2100,"clamp_len":820,"same_length":True}' \ + --gen-subset valid +``` + +*Note:* During training the model saw 512 tokens of context +(``--tokens-per-sample=512``), with batch size 8. These settings match the evaluation +settings from [the original +paper](https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8.sh). diff --git a/fairseq/examples/adaptive_span/__init__.py b/fairseq/examples/adaptive_span/__init__.py new file mode 100644 index 0000000..e0a142a --- /dev/null +++ b/fairseq/examples/adaptive_span/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + +# automatically import any Python files in the current directory +cur_dir = os.path.dirname(__file__) +for file in os.listdir(cur_dir): + path = os.path.join(cur_dir, file) + if ( + not file.startswith("_") + and not file.startswith(".") + and (file.endswith(".py") or os.path.isdir(path)) + ): + mod_name = file[: file.find(".py")] if file.endswith(".py") else file + module = importlib.import_module(__name__ + "." + mod_name) diff --git a/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py b/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py new file mode 100644 index 0000000..585ce18 --- /dev/null +++ b/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py @@ -0,0 +1,128 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from torch.optim import Adagrad + +from fairseq.optim import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("adagrad_with_grad_clip") +class FairseqAdagradWithGradClip(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = AdagradWithGradClip(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + parser.add_argument('--adagrad-clip', default=0.0, type=float, metavar='D', + help='internal grad clip') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "weight_decay": self.args.weight_decay, + "grad_clip": self.args.adagrad_clip, + } + + @property + def supports_flat_params(self): + return False + + +def _clip_grad(clr, grad, group_grad_clip): + if group_grad_clip > 0: + norm = grad.norm(2).item() + if norm > group_grad_clip: + clr *= group_grad_clip / (norm + 1e-10) + return clr + + +class AdagradWithGradClip(Adagrad): + """Adagrad algorithm with custom gradient clipping""" + + def __init__( + self, + params, + lr=1e-2, + lr_decay=0, + weight_decay=0, + initial_accumulator_value=0, + grad_clip=0, + ): + Adagrad.__init__( + self, + params, + lr=lr, + lr_decay=lr_decay, + weight_decay=weight_decay, + initial_accumulator_value=initial_accumulator_value, + ) + self.defaults["grad_clip"] = grad_clip + self.param_groups[0].setdefault("grad_clip", grad_clip) + + def step(self, closure=None): + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: + continue + + grad = p.grad.data + state = self.state[p] + + state["step"] += 1 + + if group["weight_decay"] != 0: + if p.grad.data.is_sparse: + raise RuntimeError( + "weight_decay option is " + "not compatible with sparse " + "gradients" + ) + grad = grad.add(group["weight_decay"], p.data) + + clr = group["lr"] / (1 + (state["step"] - 1) * group["lr_decay"]) + + # clip + clr = _clip_grad(clr=clr, grad=grad, group_grad_clip=group["grad_clip"]) + + if grad.is_sparse: + # the update is non-linear so indices must be unique + grad = grad.coalesce() + grad_indices = grad._indices() + grad_values = grad._values() + size = grad.size() + + def make_sparse(values): + constructor = grad.new + if grad_indices.dim() == 0 or values.dim() == 0: + return constructor().resize_as_(grad) + return constructor(grad_indices, values, size) + + state["sum"].add_(make_sparse(grad_values.pow(2))) + std = state["sum"]._sparse_mask(grad) + std_values = std._values().sqrt_().add_(1e-10) + p.data.add_(-clr, make_sparse(grad_values / std_values)) + else: + state["sum"].addcmul_(1, grad, grad) + std = state["sum"].sqrt().add_(1e-10) + p.data.addcdiv_(-clr, grad, std) + + return loss diff --git a/fairseq/examples/adaptive_span/adaptive_span_attention.py b/fairseq/examples/adaptive_span/adaptive_span_attention.py new file mode 100644 index 0000000..07f757b --- /dev/null +++ b/fairseq/examples/adaptive_span/adaptive_span_attention.py @@ -0,0 +1,160 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class AdaptiveMask(nn.Module): + """Soft masking function for adaptive size. + It masks out the last K values of an input. The masking value + goes from 1 to 0 gradually, so K can be learned with + back-propagation. + Args: + max_size: maximum size (i.e. input dimension) + ramp_size: size of the ramp going from 0 to 1 + init_val: initial size proportion not to be masked out + shape: learn multiple sizes independent of each other + """ + + def __init__(self, max_size, ramp_size, init_val=0, shape=(1,)): + nn.Module.__init__(self) + self._max_size = max_size + self._ramp_size = ramp_size + self.current_val = nn.Parameter(torch.zeros(*shape) + init_val) + mask_template = torch.linspace(1 - max_size, 0, steps=max_size) + self.register_buffer("mask_template", mask_template) + + def forward(self, x): + mask = self.mask_template.float() + self.current_val.float() * self._max_size + mask = mask / self._ramp_size + 1 + mask = mask.clamp(0, 1) + if x.size(-1) < self._max_size: + # the input could have been trimmed beforehand to save computation + mask = mask.narrow(-1, self._max_size - x.size(-1), x.size(-1)) + x = (x * mask).type_as(x) + return x + + def get_current_max_size(self, include_ramp=True): + current_size = math.ceil(self.current_val.max().item() * self._max_size) + if include_ramp: + current_size += self._ramp_size + current_size = max(0, min(self._max_size, current_size)) + return current_size + + def get_current_avg_size(self, include_ramp=True): + current_size = math.ceil( + self.current_val.float().mean().item() * self._max_size + ) + if include_ramp: + current_size += self._ramp_size + current_size = max(0, min(self._max_size, current_size)) + return current_size + + def clamp_param(self): + """this need to be called after each update""" + self.current_val.data.clamp_(0, 1) + + +class AdaptiveSpan(nn.Module): + """Adaptive attention span for Transformerself. + This module learns an attention span length from data for each + self-attention head. + Args: + attn_span: maximum attention span + adapt_span_loss: loss coefficient for the span length + adapt_span_ramp: length of the masking ramp + adapt_span_init: initial size ratio + adapt_span_cache: adapt cache size to reduce memory usage + """ + + def __init__( + self, + attn_span, + adapt_span_ramp, + adapt_span_init, + n_head, + adapt_span_layer, + **kargs + ): + nn.Module.__init__(self) + self._max_span = attn_span + self._n_head = n_head + self._adapt_span_layer = adapt_span_layer + if self._adapt_span_layer: + self._mask = AdaptiveMask( + max_size=self._max_span, + ramp_size=adapt_span_ramp, + init_val=adapt_span_init, + ) + else: + self._mask = AdaptiveMask( + max_size=self._max_span, + ramp_size=adapt_span_ramp, + init_val=adapt_span_init, + shape=(n_head, 1, 1), + ) + + def forward(self, attn, normalize=True): + """mask attention with the right span""" + # batch and head dimensions are merged together, so separate them first + self.clamp_param() + if self._adapt_span_layer: + attn = self._mask(attn) + else: + B = attn.size(0) # batch size + M = attn.size(1) # block size + attn = attn.reshape(B // self._n_head, self._n_head, M, -1) + attn = self._mask(attn) + attn = attn.view(B, M, -1) + return attn + + def get_trim_len(self): + """how much of memory can be trimmed to reduce computation""" + L = self._max_span + trim_len = min(L - 1, L - self._mask.get_current_max_size()) + # too fine granularity might be bad for the memory management + trim_len = math.floor(trim_len / 64) * 64 + return trim_len + + def trim_memory(self, query, key, value, key_pe): + """trim out unnecessary memory beforehand to reduce computation""" + trim_len = self.get_trim_len() + cache_size = key.size(1) - query.size(1) + trim_len_cache = trim_len - (self._max_span - cache_size) + if trim_len_cache > 0: + key = key[:, trim_len_cache:, :] + value = value[:, trim_len_cache:, :] + elif trim_len_cache < 0: + # cache is too short! this happens when validation resumes + # after a lot of updates. + key = F.pad(key, [0, 0, -trim_len_cache, 0]) + value = F.pad(value, [0, 0, -trim_len_cache, 0]) + if trim_len > 0: + if key_pe is not None: + key_pe = key_pe[:, :, trim_len:] + return key, value, key_pe + + def get_cache_size(self): + """determine how long the cache should be""" + trim_len = self.get_trim_len() + # give a buffer of 64 steps since a span might increase + # in future updates + return min(self._max_span, self._max_span - trim_len + 64) + + def get_loss(self): + """a loss term for regularizing the span length""" + return self._max_span * self._mask.current_val.float().mean() + + def get_current_max_span(self): + return self._mask.get_current_max_size() + + def get_current_avg_span(self): + return self._mask.get_current_avg_size() + + def clamp_param(self): + self._mask.clamp_param() diff --git a/fairseq/examples/adaptive_span/adaptive_span_loss.py b/fairseq/examples/adaptive_span/adaptive_span_loss.py new file mode 100644 index 0000000..fe95b0d --- /dev/null +++ b/fairseq/examples/adaptive_span/adaptive_span_loss.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass + +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion +from fairseq.criterions.cross_entropy import CrossEntropyCriterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class AdaptiveSpanCriterionConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + + +@register_criterion("adaptive_span_loss", dataclass=AdaptiveSpanCriterionConfig) +class AdaptiveSpanCriterion(CrossEntropyCriterion): + def __init__(self, task, sentence_avg): + super().__init__(task, sentence_avg) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss here is summed, different from the adaptive span code + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss, aux_loss, avg_span, max_span = self.compute_loss( + model, net_output, sample, reduce=reduce + ) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + loss /= sample_size + total_loss = loss + aux_loss + sample_size = 1 + + logging_output = { + "loss": loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + "total_loss": total_loss.data, + "avg_span": avg_span * sample_size, + "max_span": max_span * sample_size, + } + return total_loss, sample_size, logging_output + + def compute_loss(self, model, net_output, sample, reduce=True): + loss, _ = super().compute_loss(model, net_output, sample, reduce) + aux_loss = model.get_aux_loss() + avg_span = model.get_current_avg_span() + max_span = model.get_current_max_span() + return loss, aux_loss, avg_span, max_span + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + total_loss_sum = sum(log.get("total_loss", 0) for log in logging_outputs) + avg_span_sum = sum(log.get("avg_span", 0) for log in logging_outputs) + max_span_sum = sum(log.get("max_span", 0) for log in logging_outputs) + + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("avg_span", avg_span_sum / sample_size, sample_size, round=3) + metrics.log_scalar("max_span", max_span_sum / sample_size, sample_size, round=3) + # total loss contains the L1 norm on adaptive-span + metrics.log_scalar( + "total_loss", + total_loss_sum / sample_size / math.log(2), + sample_size, + round=3, + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/examples/adaptive_span/adaptive_span_model.py b/fairseq/examples/adaptive_span/adaptive_span_model.py new file mode 100644 index 0000000..d96c95b --- /dev/null +++ b/fairseq/examples/adaptive_span/adaptive_span_model.py @@ -0,0 +1,263 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq.modules.layer_norm import LayerNorm + +from .adaptive_span_attention import AdaptiveSpan + +# Size notations: +# B = batch_size, H = d_model, M = block_size, L = attn_span + + +def _skew(X, pad_value): + """shift every row 1 step to right""" + # X = B x M x L + B, M, L = X.size() + X = F.pad(X, (0, M + 1), value=pad_value) # B x M x (L+M+1) + X = X.view(B, -1) # B x ML+MM+M + X = X[:, :-M] # B x ML+MM + X = X.view(B, M, M + L) # B x M x L+M + return X + + +def _unskew(X): + """reverse _skew operation""" + # X = B x M x L+M + B, M, L = X.size() + L -= M + X = X.view(B, -1) # B x ML+MM + X = F.pad(X, (0, M)) # B x ML+MM+M + X = X.view(B, M, M + L + 1) # B x M x L+M+1 + X = X[:, :, :L] # B x M x L + return X + + +class SeqAttention(nn.Module): + """Sequential self-attention layer. + Each token will attend to its previous fixed number of steps. + Note that attention doesn't include the current step itself. + """ + + def __init__(self, d_model, n_head, attn_span, dropout, adapt_span_layer, **kargs): + nn.Module.__init__(self) + self.dropout = nn.Dropout(dropout) + self.d_model = d_model # size of a single head + self.attn_span = attn_span + self.adaptive_span = AdaptiveSpan( + attn_span=attn_span, + n_head=n_head, + adapt_span_layer=adapt_span_layer, + **kargs + ) + + def forward(self, query, key, value, key_pe): + # query size = B x M x H + # key, value sizes = B x (M+L) x H + + key, value, key_pe = self.adaptive_span.trim_memory(query, key, value, key_pe) + + # compute attention from context + # B x M (dest) x (M+L) (src) + attn_cont = torch.matmul(query, key.transpose(-1, -2)) + attn_cont = _unskew(attn_cont) # B x M x L + + # compute the effect of position embedding + attn_pos = torch.matmul(query, key_pe) # B x M x L_pos + attn = attn_cont + attn_pos + + attn = attn / math.sqrt(self.d_model) # B x M X L_pos + + attn = F.softmax(attn.float(), dim=-1).type_as(attn) + + # trim attention lengths according to the learned span + attn = self.adaptive_span(attn) + + attn = self.dropout(attn) # B x M X L_pos + + attn_cont = _skew(attn, 0) # B x M X (L+M) + out = torch.matmul(attn_cont, value) # B x M x H + return out + + def get_cache_size(self): + return self.adaptive_span.get_cache_size() + + +class MultiHeadSeqAttention(nn.Module): + def __init__(self, d_model, n_head, **kargs): + nn.Module.__init__(self) + assert d_model % n_head == 0 + self.n_head = n_head + self.head_dim = d_model // n_head + self.attn = SeqAttention(d_model=self.head_dim, n_head=n_head, **kargs) + self.proj_query = nn.Linear(d_model, d_model, bias=False) + nn.init.xavier_normal_(self.proj_query.weight) + self.proj_out = nn.Linear(d_model, d_model, bias=False) + nn.init.xavier_normal_(self.proj_out.weight) + self.proj_val = nn.Linear(d_model, d_model, bias=False) + nn.init.xavier_normal_(self.proj_val.weight) + self.proj_key = nn.Linear(d_model, d_model, bias=False) + nn.init.xavier_normal_(self.proj_key.weight) + + def head_reshape(self, x): + K = self.n_head + D = self.head_dim + x = x.view(x.size()[:-1] + (K, D)) # B x (M+L) x K x D + x = x.transpose(1, 2).contiguous() # B x K x (M+L) x D + x = x.view(-1, x.size(-2), x.size(-1)) # B_K x (M+L) x D + return x + + def forward(self, query, key, value, key_pe): + B = query.size(0) + K = self.n_head + D = self.head_dim + M = query.size(1) + + query = self.proj_query(query) + query = self.head_reshape(query) + value = self.proj_val(value) + value = self.head_reshape(value) + key = self.proj_key(key) + key = self.head_reshape(key) + + out = self.attn(query, key, value, key_pe) # B_K x M x D + out = out.view(B, K, M, D) # B x K x M x D + out = out.transpose(1, 2).contiguous() # B x M x K x D + out = out.view(B, M, -1) # B x M x K_D + out = self.proj_out(out) + return out + + +class FeedForwardLayer(nn.Module): + def __init__(self, d_model, d_inner, dropout, **kargs): + nn.Module.__init__(self) + self.fc1 = nn.Linear(d_model, d_inner) + self.fc2 = nn.Linear(d_inner, d_model) + nn.init.xavier_uniform_(self.fc1.weight) + nn.init.xavier_uniform_(self.fc2.weight) + self.dropout = nn.Dropout(dropout) + + def forward(self, h): + h1 = F.relu(self.fc1(h)) + h1 = self.dropout(h1) + h2 = self.fc2(h1) + return h2 + + +class TransformerSeqLayer(nn.Module): + def __init__(self, d_model, **kargs): + nn.Module.__init__(self) + self.attn = MultiHeadSeqAttention(d_model=d_model, **kargs) + self.norm1 = LayerNorm(d_model) + self.ff = FeedForwardLayer(d_model=d_model, **kargs) + self.norm2 = LayerNorm(d_model) + + def forward(self, h, h_cache, key_pe): + # h = B x M x H + # h_cache = B x L x H + h_all = torch.cat([h_cache, h], dim=1) # B x (M+L) x H + attn_out = self.attn(h, h_all, h_all, key_pe) + h = self.norm1(h + attn_out) # B x M x H + if self.ff is not None: + ff_out = self.ff(h) + out = self.norm2(h + ff_out) # B x M x H + else: + out = h + return out + + def get_cache_size(self): + return self.attn.attn.get_cache_size() + + +class TransformerSeq(nn.Module): + def __init__( + self, + vocab_size, + d_model, + n_head, + n_layer, + attn_span, + emb_dropout, + aux_loss_scaler, + adapt_span_layer, + **kargs + ): + nn.Module.__init__(self) + # token embeddings + self.in_emb = nn.Embedding(vocab_size, d_model) + nn.init.normal_(self.in_emb.weight, mean=0, std=d_model ** -0.5) + self.out_emb = nn.Linear(d_model, vocab_size) + self.aux_loss_scaler = aux_loss_scaler + if emb_dropout > 0: + self.emb_dropout = nn.Dropout(emb_dropout) + else: + self.emb_dropout = None + # position embeddings + self.key_pe = nn.Parameter(torch.randn(1, d_model // n_head, attn_span)) + + self.layers = nn.ModuleList() + self.layers.extend( + TransformerSeqLayer( + d_model=d_model, + n_head=n_head, + attn_span=attn_span, + adapt_span_layer=adapt_span_layer, + **kargs + ) + for _ in range(n_layer) + ) + + def forward(self, x, h_cache, target=None): + # x size = B x M + block_size = x.size(1) + h = self.in_emb(x) # B x M x H + if self.emb_dropout is not None: + h = self.emb_dropout(h) + + h_cache_next = [] + for l, layer in enumerate(self.layers): + cache_size = layer.attn.attn.get_cache_size() + if cache_size > block_size: + h_cache_next_l = torch.cat( + [h_cache[l][:, -cache_size + block_size :, :], h], dim=1 + ).detach() + else: + h_cache_next_l = h[:, -cache_size:, :].detach() + h_cache_next.append(h_cache_next_l) + h = layer(h, h_cache[l], self.key_pe) # B x M x H + + if self.emb_dropout is not None: + h = self.emb_dropout(h) + + out = F.log_softmax(self.out_emb(h).float(), dim=-1).type_as(h) + dummy_loss = None + + return out, h_cache_next, dummy_loss + + def get_aux_loss(self): + loss = 0.0 + for layer in self.layers: + loss += layer.attn.attn.adaptive_span.get_loss() + return self.aux_loss_scaler * loss + + def get_current_max_span(self): + max_span = 0.0 + for layer in self.layers: + max_span = max( + max_span, layer.attn.attn.adaptive_span.get_current_max_span() + ) + return max_span + + def get_current_avg_span(self): + avg_span = 0.0 + for layer in self.layers: + avg_span += layer.attn.attn.adaptive_span.get_current_avg_span() + return avg_span / len(self.layers) diff --git a/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py b/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py new file mode 100644 index 0000000..5b147fe --- /dev/null +++ b/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py @@ -0,0 +1,145 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass +from typing import Dict, List, Optional + +import torch +from fairseq.dataclass import FairseqDataclass +from fairseq.models import ( + FairseqIncrementalDecoder, + FairseqLanguageModel, + register_model, +) +from .adaptive_span_model import TransformerSeq as AdaptiveSpanTransformerModel + + +logger = logging.getLogger(__name__) + + +@dataclass +class AdaptiveSpanSmallConfig(FairseqDataclass): + # defaults come from https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8_small.sh + vocab_size: int = 50 + d_model: int = 256 + n_head: int = 4 + d_inner: int = 1024 + n_layer: int = 8 + attn_span: int = 1024 + dropout: float = 0.0 + emb_dropout: float = 0.0 + adapt_span_ramp: int = 32 + adapt_span_init: float = 0.0 + aux_loss_scaler: float = 0.000002 + adapt_span_layer: bool = False + + +@register_model("adaptive_span", dataclass=AdaptiveSpanSmallConfig) +class AdaptiveSpanTransformer(FairseqLanguageModel): + @classmethod + def build_model(cls, cfg: AdaptiveSpanSmallConfig, task): + return cls(AdaptiveSpanDecoder(cfg, task)) + + def get_aux_loss(self): + return self.decoder.get_aux_loss() + + def get_current_max_span(self): + return self.decoder.get_current_max_span() + + def get_current_avg_span(self): + return self.decoder.get_current_avg_span() + + +class AdaptiveSpanDecoder(FairseqIncrementalDecoder): + def __init__(self, cfg, task): + + super().__init__(task.target_dictionary) + + self.config = cfg + config = AdaptiveSpanSmallConfig( + vocab_size=len(task.target_dictionary), + d_model=cfg.d_model, + n_head=cfg.n_head, + d_inner=cfg.d_inner, + n_layer=cfg.n_layer, + attn_span=cfg.attn_span, + dropout=cfg.dropout, + emb_dropout=cfg.emb_dropout, + adapt_span_ramp=cfg.adapt_span_ramp, + adapt_span_init=cfg.adapt_span_init, + aux_loss_scaler=cfg.aux_loss_scaler, + adapt_span_layer=cfg.adapt_span_layer, + ) + logger.info(config) + self.model = AdaptiveSpanTransformerModel(**config.__dict__) + + self._mems = None + + def forward( + self, + src_tokens, + incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None, + encoder_out=None, + ): + bsz = src_tokens.size(0) + if incremental_state is not None: # used during inference + mems = self.get_incremental_state("mems") + src_tokens = src_tokens[:, -1:] # only keep the most recent token + else: + mems = self._mems + + if mems is None: + # first time init + mems = self.init_hid_cache(bsz) + output = self.model(x=src_tokens, h_cache=mems,) + if incremental_state is not None: + self.set_incremental_state(incremental_state, "mems", output[1]) + else: + self._mems = output[1] + return (output[0],) + + def max_positions(self): + return self.config.attn_span + + def init_hid_cache(self, batch_sz): + hid = [] + for layer in self.model.layers: + param = next(self.model.parameters()) + h = torch.zeros( + batch_sz, + layer.get_cache_size(), + self.config.d_model, + dtype=param.dtype, + device=param.device, + ) + hid.append(h) + return hid + + def get_aux_loss(self): + return self.model.get_aux_loss() + + def get_current_max_span(self): + return self.model.get_current_max_span() + + def get_current_avg_span(self): + return self.model.get_current_avg_span() + + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[torch.Tensor]]], + new_order: torch.Tensor, + ): + """Reorder incremental state. + + This will be called when the order of the input has changed from the + previous time step. A typical use case is beam search, where the input + order changes between time steps based on the selection of beams. + """ + raise NotImplementedError("This is required for generation/beam search") + # mems = self.get_incremental_state(incremental_state, "mems") + # if mems is not None: + # new_mems = [mems_i.index_select(1, new_order) for mems_i in mems] + # self.set_incremental_state(incremental_state, "mems", new_mems) diff --git a/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py b/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py new file mode 100644 index 0000000..a92da3a --- /dev/null +++ b/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py @@ -0,0 +1 @@ +../truncated_bptt/truncated_bptt_lm_task.py \ No newline at end of file diff --git a/fairseq/examples/attention_head_selection/README.md b/fairseq/examples/attention_head_selection/README.md new file mode 100644 index 0000000..2434f1f --- /dev/null +++ b/fairseq/examples/attention_head_selection/README.md @@ -0,0 +1,161 @@ +# Pay Better Attention to Attention: Head Selection in Multilingual and Multi-Domain Sequence Modeling (Gong et al., 2021) + +[https://arxiv.org/pdf/2106.10840.pdf](https://arxiv.org/pdf/2106.10840.pdf) + +## Introduction + +We present attention head selection strategies in multilingual and multi-domain sequence modeling including text translation, speech recognition and speech translation tasks. + +Below is an example of training multilingual/multi-domain speech recognition models. + +## Data Preparation +Prepare mTEDx data as in [mTEDx example](https://github.com/fairinternal/fairseq-py/blob/0d9c5851e6fac40f9e366b3633ccd615c2901788/examples/speech_to_text/docs/mtedx_example.md) and CoVoST data as in [CoVoST example](https://github.com/fairinternal/fairseq-py/blob/0d9c5851e6fac40f9e366b3633ccd615c2901788/examples/speech_to_text/docs/covost_example.md). Similarly prepare EuroParl data. + + +## Training a multilingual ASR model with attention head selection + +```bash +data_dir= +train_subset="train_ar_ar_tedx,train_de_de_tedx,train_el_el_tedx,train_es_es_tedx,train_fr_fr_tedx,train_it_it_tedx,train_pt_pt_tedx,train_ru_ru_tedx" +valid_subset="valid_ar_ar_tedx,valid_de_de_tedx,valid_el_el_tedx,valid_es_es_tedx,valid_fr_fr_tedx,valid_it_it_tedx,valid_pt_pt_tedx,valid_ru_ru_tedx" +strateg= + +fairseq-train ${data_dir} \ + --user-dir examples/attention_head_selection/src \ + --train-subset "${train_subset}" \ + --valid-subset "${valid_subset}" \ + --config-yaml 'config_asr.yaml' \ + --arch 'head_selection_s2t_transformer_s' \ + --task 'speech_to_text_head_selection' \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --lr-scheduler 'inverse_sqrt' --stop-min-lr -1.0 --warmup-updates 10000 \ + --lr 5e-4 \ + --clip-norm 10.0 \ + --seed 1 \ + --max-epoch 400 \ + --max-tokens 32000 \ + --ignore-prefix-size 1 \ + --dropout 0.3 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --skip-invalid-size-inputs-valid-test \ + --encoder-attn-head-select \ + --total-encoder-attention-heads 8 \ + --decoder-self-attn-head-select \ + --total-decoder-attention-heads 8 \ + --attn-head-select-strategy ${strategy} \ + --task-type lang \ +``` + +## Training a multi-domain ASR model with attention head selection + +```bash +data_dir= +train_subset="train_es_es_tedx,train_fr_fr_tedx,train_pt_pt_tedx,train_it_it_tedx,train_ru_ru_tedx,train_el_el_tedx,train_ar_ar_tedx,train_de_de_tedx,train_ar_ar_cv,train_de_de_cv,train_es_es_cv,train_fr_fr_cv,train_it_it_cv,train_pt_pt_cv,train_ru_ru_cv,train_de_de_ep,train_es_es_ep,train_fr_fr_ep,train_it_it_ep,train_pt_pt_ep" +valid_subset="dev_es_es_tedx,dev_fr_fr_tedx,dev_pt_pt_tedx,dev_it_it_tedx,dev_ru_ru_tedx,dev_el_el_tedx,dev_ar_ar_tedx,dev_de_de_tedx,dev_ar_ar_cv,dev_de_de_cv,dev_es_es_cv,dev_fr_fr_cv,dev_it_it_cv,dev_pt_pt_cv,dev_ru_ru_cv,dev_de_de_ep,dev_es_es_ep,dev_fr_fr_ep,dev_it_it_ep,dev_pt_pt_ep" +strateg= + +fairseq-train ${data_dir} \ + --user-dir examples/attention_head_selection/src \ + --train-subset "${train_subset}" \ + --valid-subset "${valid_subset}" \ + --config-yaml 'config_asr.yaml' \ + --arch head_selection_s2t_transformer_s \ + --task speech_to_text_head_selection \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --lr-scheduler 'inverse_sqrt' --stop-min-lr -1.0 --warmup-updates 10000 \ + --lr 5e-4 \ + --clip-norm 10.0 \ + --seed 1 \ + --max-epoch 400 \ + --max-tokens 32000 \ + --ignore-prefix-size 1 \ + --dropout 0.3 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --skip-invalid-size-inputs-valid-test \ + --encoder-attn-head-select \ + --total-encoder-attention-heads 8 \ + --decoder-self-attn-head-select \ + --total-decoder-attention-heads 8 \ + --attn-head-select-strategy ${strategy} \ + --task-type domain +``` + +## Inference in multilingual setting + +```bash +MODEL_DIR= +data_dir= +gen_subset= +train_subset="train_ar_ar_tedx,train_de_de_tedx,train_el_el_tedx,train_es_es_tedx,train_fr_fr_tedx,train_it_it_tedx,train_pt_pt_tedx,train_ru_ru_tedx" +last_n=10 +CHECKPOINT_FILENAME="avg_last_${last_n}_checkpoint.pt" +CHECKPOINT="_avg" +RESULTS="${MODEL_DIR}/ckpt${CHECKPOINT}" +if [ ! -d $RESULTS ]; then + mkdir -p $RESULTS +fi; + +python scripts/average_checkpoints.py \ + --inputs ${MODEL_DIR} --num-epoch-checkpoints ${last_n} \ + --output "${MODEL_DIR}/${CHECKPOINT_FILENAME}" + +fairseq-generate ${data_dir} \ + --user-dir examples/attention_head_selection/src \ + --arch 'head_selection_s2t_transformer_s' \ + --task 'speech_to_text_head_selection' \ + --train-subset ${train_subset} \ + --gen-subset ${gen_subset} \ + --path "${MODEL_DIR}/${CHECKPOINT_FILENAME}" \ + --config-yaml 'config_asr.yaml' \ + --prefix-size 1 \ + --max-tokens 40000 --beam 5 \ + --skip-invalid-size-inputs-valid-test \ + --results-path ${RESULTS} \ + --scoring wer --wer-tokenizer 13a \ + --wer-lowercase --wer-remove-punct --remove-bpe +``` + +## Inference in multi-domain setting + +```bash +MODEL_DIR= +data_dir= +gen_subset= +train_subset="train_es_es_tedx,train_fr_fr_tedx,train_pt_pt_tedx,train_it_it_tedx,train_ru_ru_tedx,train_el_el_tedx,train_ar_ar_tedx,train_de_de_tedx,train_ar_ar_cv,train_de_de_cv,train_es_es_cv,train_fr_fr_cv,train_it_it_cv,train_pt_pt_cv,train_ru_ru_cv,train_de_de_ep,train_es_es_ep,train_fr_fr_ep,train_it_it_ep,train_pt_pt_ep" +last_n=10 +CHECKPOINT_FILENAME="avg_last_${last_n}_checkpoint.pt" +CHECKPOINT="_avg" +RESULTS="${MODEL_DIR}/ckpt${CHECKPOINT}" +if [ ! -d $RESULTS ]; then + mkdir -p $RESULTS +fi; + +python scripts/average_checkpoints.py \ + --inputs ${MODEL_DIR} --num-epoch-checkpoints ${last_n} \ + --output "${MODEL_DIR}/${CHECKPOINT_FILENAME}" + +fairseq-generate ${data_dir} \ + --user-dir examples/attention_head_selection/src \ + --arch 'head_selection_s2t_transformer_s' \ + --task 'speech_to_text_head_selection' \ + --train-subset ${train_subset} \ + --gen-subset ${gen_subset} \ + --path "${MODEL_DIR}/${CHECKPOINT_FILENAME}" \ + --config-yaml 'config_asr.yaml' \ + --prefix-size 1 \ + --max-tokens 40000 --beam 5 \ + --skip-invalid-size-inputs-valid-test \ + --results-path ${RESULTS} \ + --scoring wer --wer-tokenizer 13a \ + --wer-lowercase --wer-remove-punct --remove-bpe +``` + +## Citation +```bibtex +@article{gong2021pay, + title={Pay Better Attention to Attention: Head Selection in Multilingual and Multi-Domain Sequence Modeling}, + author={Gong, Hongyu and Tang, Yun and Pino, Juan and Li, Xian}, + journal={arXiv preprint arXiv:2106.10840}, + year={2021} +} +''' diff --git a/fairseq/examples/attention_head_selection/src/__init__.py b/fairseq/examples/attention_head_selection/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/attention_head_selection/src/loss/__init__.py b/fairseq/examples/attention_head_selection/src/loss/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/attention_head_selection/src/loss/attention_head_selection.py b/fairseq/examples/attention_head_selection/src/loss/attention_head_selection.py new file mode 100644 index 0000000..4ba3395 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/loss/attention_head_selection.py @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +from torch.nn.modules.loss import _Loss + + +class HeadSelectionLoss(_Loss): + + def __init__(self, args): + super().__init__() + self.args = args + self.kl_weight = getattr(args, "kl_weight", 0.0) + + def forward(self, head_samples, sample_sizes, prior=0.5, eps=1e-7): + """ + head_scores: (num_tasks, num_layers, num_heads) + sample_sizes: (num_tasks, ) + """ + kl_loss = (head_samples * (torch.log(head_samples + eps) - math.log(prior))).sum(-1).sum(-1) + kl_loss /= (torch.numel(head_samples) / head_samples.size(0)) + kl_loss = self.kl_weight * torch.matmul(kl_loss, sample_sizes) + return kl_loss diff --git a/fairseq/examples/attention_head_selection/src/models/__init__.py b/fairseq/examples/attention_head_selection/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/attention_head_selection/src/models/head_selection_s2t_transformer.py b/fairseq/examples/attention_head_selection/src/models/head_selection_s2t_transformer.py new file mode 100644 index 0000000..2c7ed89 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/models/head_selection_s2t_transformer.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Dict, List, Optional +from pathlib import Path +import torch.nn as nn +from torch import Tensor +from fairseq import checkpoint_utils + +from fairseq.models import register_model, register_model_architecture +from fairseq.utils import safe_hasattr +from fairseq.models.speech_to_text.s2t_transformer import ( + S2TTransformerModel, + S2TTransformerEncoder, + TransformerDecoderScriptable +) +from fairseq.models.speech_to_text.s2t_transformer import base_architecture as s2t_base_architecture + +from ..modules.attn_head_selector import AttnHeadSelector +from ..modules.head_selection_transformer_layer import HeadSelectionTransformerEncoderLayer +from .head_selection_transformer import HeadSelectionTransformerDecoder + + +logger = logging.getLogger(__name__) + + +@register_model("head_selection_s2t_transformer") +class HeadSelectionS2TTransformerModel(S2TTransformerModel): + """ + Head selection implemented in S2TTransformer + """ + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + S2TTransformerModel.add_args(parser) + # encoder head selection + parser.add_argument( + "--encoder-attn-head-select", + action="store_true", + default=False, + help="encoder head selection" + ) + parser.add_argument( + "--total-encoder-attention-heads", + type=int, + help="total number of encoder attention heads" + ) + # decoder self attention selection + parser.add_argument( + "--decoder-self-attn-head-select", + action="store_true", + default=False, + help="decoder self-attention head selection" + ) + # decoder-encoder attention selection + parser.add_argument( + "--dec-enc-attn-head-select", + action="store_true", + default=False, + help="decoder-encoder attention head selection" + ) + parser.add_argument( + "--total-decoder-attention-heads", + type=int, + help="total number of decoder attention heads" + ) + # selection strategy + parser.add_argument( + "--attn-head-select-strategy", + type=str, + help="attention head selection strategy, subset or group" + ) + + @classmethod + def build_encoder(cls, args): + if safe_hasattr(args, "encoder_attn_head_select") and args.encoder_attn_head_select: + encoder = HeadSelectionS2TTransformerEncoder(args) + else: + encoder = S2TTransformerEncoder(args) + pretraining_path = getattr(args, "load_pretrained_encoder_from", None) + if pretraining_path is not None: + if not Path(pretraining_path).exists(): + logger.warning( + f"skipped pretraining because {pretraining_path} does not exist" + ) + else: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=pretraining_path + ) + logger.info(f"loaded pretrained encoder from: {pretraining_path}") + return encoder + + @classmethod + def build_decoder(cls, args, task, embed_tokens): + if (safe_hasattr(args, "decoder_self_attn_head_select") and args.decoder_self_attn_head_select) or (safe_hasattr(args, "dec_enc_attn_head_select") and args.dec_enc_attn_head_select): + return HeadSelectionTransformerDecoderScriptable(args, task.target_dictionary, embed_tokens) + else: + return TransformerDecoderScriptable(args, task.target_dictionary, embed_tokens) + + +class HeadSelectionS2TTransformerEncoder(S2TTransformerEncoder): + + def __init__(self, args): + super().__init__(args) + self.attn_head_selector = AttnHeadSelector( + args.encoder_tasks, + args.encoder_layers, + args.total_encoder_attention_heads, + args.encoder_attention_heads, + args.attn_head_select_strategy, + ) + self.task_ids = None + self.transformer_layers = nn.ModuleList([ + HeadSelectionTransformerEncoderLayer(args, layer_idx, attn_head_selector=self.attn_head_selector) for layer_idx in range(args.encoder_layers) + ]) + + def set_task_ids(self, task_ids): + self.task_ids = task_ids + + def _forward(self, src_tokens, src_lengths, return_all_hiddens=False): + self.attn_head_selector.head_select(self.task_ids) + return super()._forward(src_tokens, src_lengths, return_all_hiddens) + + +class HeadSelectionTransformerDecoderScriptable(HeadSelectionTransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + + +@register_model_architecture(model_name="head_selection_s2t_transformer", arch_name="head_selection_s2t_transformer") +def base_architecture(args): + s2t_base_architecture(args) + args.encoder_attn_head_select = getattr(args, "encoder_attn_head_select", False) + args.decoder_self_attn_head_select = getattr(args, "decoder_self_attn_head_select", False) + args.dec_enc_attn_head_select = getattr(args, "dec_enc_attn_head_select", False) + args.total_encoder_attention_heads = getattr(args, "total_encoder_attention_heads", 8) + args.total_decoder_attention_heads = getattr(args, "total_decoder_attention_heads", 8) + args.attn_head_select_strategy = getattr(args, "attn_head_select_strategy", "group") + + +@register_model_architecture("head_selection_s2t_transformer", "head_selection_s2t_transformer_s") +def head_selection_s2t_transformer_s(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 8) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + base_architecture(args) diff --git a/fairseq/examples/attention_head_selection/src/models/head_selection_transformer.py b/fairseq/examples/attention_head_selection/src/models/head_selection_transformer.py new file mode 100644 index 0000000..b9d5956 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/models/head_selection_transformer.py @@ -0,0 +1,215 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Dict, Optional +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq.utils import safe_hasattr +from fairseq.models.transformer import ( + TransformerModel, + TransformerEncoder, + TransformerDecoder +) + +from ..modules.attn_head_selector import AttnHeadSelector +from ..modules.head_selection_transformer_layer import ( + HeadSelectionTransformerEncoderLayer, + HeadSelectionTransformerDecoderLayer +) + + +class HeadSelectionTransformerModel(TransformerModel): + def __init__(self, args, encoder, decoder): + super().__init__(args, encoder, decoder) + + @staticmethod + def add_args(parser): + TransformerModel.add_args(parser) + # encoder head selection + parser.add_argument( + "--encoder-attn-head-select", + action="store_true", + default=False, + help="encoder head selection" + ) + parser.add_argument( + "--total-encoder-attention-heads", + type=int, + help="total number of encoder attention heads" + ) + # decoder self attention + parser.add_argument( + "--decoder-self-attn-head-select", + action="store_true", + default=False, + help="decoder self-attention head selection" + ) + # decoder-encoder attention + parser.add_argument( + "--dec-enc-attn-head-select", + action="store_true", + default=False, + help="decoder-encoder attention head selection" + ) + parser.add_argument( + "--total-decoder-attention-heads", + type=int, + help="total number of decoder attention heads" + ) + # selection strategy + parser.add_argument( + "--attn-head-select-strategy", + type=str, + help="attention head selection strategy, subset or group" + ) + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + if safe_hasattr(args, "encoder_attn_head_select") and args.encoder_attn_head_select: + return HeadSelectionTransformerEncoder( + args, src_dict, embed_tokens + ) + else: + return TransformerEncoder(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + if (safe_hasattr(args, "decoder_self_attn_head_select") and args.decoder_self_attn_head_select) or (safe_hasattr(args, "dec_enc_attn_head_select") and args.dec_enc_attn_head_select): + return HeadSelectionTransformerDecoder( + args, tgt_dict, embed_tokens + ) + else: + return TransformerDecoder(args, tgt_dict, embed_tokens) + + +class HeadSelectionTransformerEncoder(TransformerEncoder): + + def __init__(self, args, dictionary, embed_tokens): + self.num_tasks = args.encoder_tasks + self.num_layers = args.encoder_layers + self.total_num_heads = args.total_encoder_attention_heads + self.num_heads = args.encoder_attention_heads + self.select_strategy = args.attn_head_select_strategy + + super().__init__(args, dictionary, embed_tokens) + self.attn_head_selector = AttnHeadSelector( + self.num_tasks, + self.num_layers, + self.total_num_heads, + self.num_heads, + self.select_strategy + ) + self.task_ids = None + self.layers = nn.ModuleList( + [self.build_encoder_layer(args, i) for i in range(args.encoder_layers)] + ) + + def set_task_ids(self, task_ids): + self.task_ids = task_ids + + def build_encoder_layer(self, args, layer_idx=None): + return HeadSelectionTransformerEncoderLayer( + args, + layer_idx, + attn_head_selector=self.attn_head_selector + ) + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + ): + self.attn_head_selector.head_select(self.task_ids) + return super().forward(src_tokens, src_lengths, return_all_hiddens, token_embeddings) + + +class HeadSelectionTransformerDecoder(TransformerDecoder): + + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.num_tasks = args.decoder_tasks + self.num_layers = args.decoder_layers + self.total_num_heads = args.total_decoder_attention_heads + self.num_heads = args.decoder_attention_heads + self.select_strategy = args.attn_head_select_strategy + super().__init__( + args, dictionary, embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection + ) + self.self_attn_head_selector = None + self.enc_attn_head_selector = None + if safe_hasattr(args, "decoder_self_attn_head_select") and args.decoder_self_attn_head_select: + self.self_attn_head_selector = AttnHeadSelector( + self.num_tasks, + self.num_layers, + self.total_num_heads, + self.num_heads, + self.select_strategy + ) + if safe_hasattr(args, "dec_enc_attn_head_select") and args.dec_enc_attn_head_select: + self.enc_attn_head_selector = AttnHeadSelector( + self.num_tasks, + self.num_layers, + self.total_num_heads, + self.num_heads, + self.select_strategy + ) + self.task_ids = None + self.layers = nn.ModuleList( + [ + self.build_head_selection_decoder_layer(args, no_encoder_attn, idx) for idx in range(args.decoder_layers) + ] + ) + + def set_task_ids(self, task_ids): + self.task_ids = task_ids + + def build_head_selection_decoder_layer(self, args, no_encoder_attn=False, layer_idx=None): + return HeadSelectionTransformerDecoderLayer( + args, + layer_idx, + self.self_attn_head_selector, + self.enc_attn_head_selector, + no_encoder_attn=no_encoder_attn + ) + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + if self.self_attn_head_selector is not None: + self.self_attn_head_selector.head_select(self.task_ids) + if self.enc_attn_head_selector is not None: + self.enc_attn_head_selector.head_select(self.task_ids) + return super().forward( + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + features_only=features_only, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + src_lengths=src_lengths, + return_all_hiddens=return_all_hiddens + ) diff --git a/fairseq/examples/attention_head_selection/src/modules/__init__.py b/fairseq/examples/attention_head_selection/src/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/attention_head_selection/src/modules/attn_head_selector.py b/fairseq/examples/attention_head_selection/src/modules/attn_head_selector.py new file mode 100644 index 0000000..346fc62 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/modules/attn_head_selector.py @@ -0,0 +1,81 @@ +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import math + + +class AttnHeadSelector(nn.Module): + """ + Latent variable modeling of attention head selection + """ + def __init__( + self, num_tasks, num_layers, + total_num_heads, num_heads, + select_strategy="group", + head_select_temp=5.0 + ): + super(AttnHeadSelector, self).__init__() + self.num_tasks = num_tasks + self.num_layers = num_layers + self.total_num_heads = total_num_heads + self.num_heads = num_heads + self.select_strategy = select_strategy + self.temp = head_select_temp + + self.head_logits = torch.nn.Parameter( + torch.Tensor(self.num_tasks, self.num_layers, total_num_heads), + requires_grad=True + ) + nn.init.uniform_( + self.head_logits, a=math.log(0.01), + b=math.log(1.0) + ) + + def gumbel_sample(self, logits, tau=1.0): + gumbels1 = -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format).exponential_().log() + gumbels2 = -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format).exponential_().log() + gumbels1 = (logits + gumbels1 - gumbels2) / tau + y_soft = gumbels1.sigmoid() + return y_soft + + def subset_select(self, y_soft, topk, dim=-1): + top_values, top_inds = torch.topk(y_soft, k=topk, dim=dim) + top_ret = 1.0 - top_values.detach() + top_values + return top_inds.detach(), top_ret + + def group_selet(self, y_soft, topk, dim=-1): + # top_values: (num_tasks, num_layers, topk) + top_values, top_inds = torch.max( + y_soft.view(self.num_tasks, self.num_layers, -1, topk), dim=2 + ) + top_inds = top_inds * topk + torch.arange(topk, device=top_inds.device).unsqueeze(0).unsqueeze(1) + top_ret = 1.0 - top_values.detach() + top_values + return top_inds.detach(), top_ret + + def head_select(self, task_ids=None): + # gumbel_sample + self.head_samples = self.gumbel_sample(self.head_logits, tau=self.temp) + # head select + if self.select_strategy == "subset": + self.subset_heads, self.subset_weights = self.subset_select( + self.head_samples, + topk=self.num_heads, + ) + elif self.select_strategy == "group": + self.subset_heads, self.subset_weights = self.group_selet( + self.head_samples, + topk=self.num_heads, + ) + else: + raise ValueError("{} is not supported".format(self.select_strategy)) + + self.batch_subset = self.subset_heads[task_ids, :, :] + self.batch_weights = self.subset_weights[task_ids, :, :] + + def forward(self, layer_idx): + assert layer_idx is not None + batch_subset = self.batch_subset[:, layer_idx, :] + batch_weights = self.batch_weights[:, layer_idx, :] + return batch_subset, batch_weights diff --git a/fairseq/examples/attention_head_selection/src/modules/head_selection_transformer_layer.py b/fairseq/examples/attention_head_selection/src/modules/head_selection_transformer_layer.py new file mode 100644 index 0000000..c792143 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/modules/head_selection_transformer_layer.py @@ -0,0 +1,92 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.utils import safe_getattr +from fairseq.modules import TransformerEncoderLayer, TransformerDecoderLayer +from ..modules.multihead_attention_selection import MultiheadAttentionSelection + + +class HeadSelectionTransformerEncoderLayer(TransformerEncoderLayer): + + def __init__(self, args, layer_idx, attn_head_selector=None): + super().__init__(args) + self.layer_idx = layer_idx + self.self_attn = self.build_self_attention_selection( + self.embed_dim, args, attn_head_selector + ) + + def build_self_attention_selection(self, embed_dim, args, attn_head_selector=None): + return MultiheadAttentionSelection( + embed_dim, + args.total_encoder_attention_heads, + args.encoder_attention_heads, + dropout=args.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + layer_idx=self.layer_idx, + attn_head_selector=attn_head_selector + ) + + +class HeadSelectionTransformerDecoderLayer(TransformerDecoderLayer): + + def __init__( + self, + args, + layer_idx, + self_attn_head_selector=None, + enc_attn_head_selector=None, + no_encoder_attn=False, + add_bias_kv=False, + add_zero_attn=False, + ): + self.layer_idx = layer_idx + super().__init__(args, no_encoder_attn, add_bias_kv, add_zero_attn) + if self_attn_head_selector is not None: + self.self_attn = self.build_self_attention_selection( + self.embed_dim, args, + self_attn_head_selector=self_attn_head_selector, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn + ) + if enc_attn_head_selector is not None: + self.encoder_attn = self.build_encoder_attention_selection( + self.embed_dim, args, + enc_attn_head_selector=enc_attn_head_selector + ) + + def build_self_attention_selection( + self, embed_dim, args, self_attn_head_selector=None, + add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttentionSelection( + embed_dim, + args.total_decoder_attention_heads, + args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not safe_getattr(args, "cross_self_attention"), + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + layer_idx=self.layer_idx, + attn_head_selector=self_attn_head_selector, + ) + + def build_encoder_attention_selection(self, embed_dim, args, enc_attn_head_selector=None): + return MultiheadAttentionSelection( + embed_dim, + args.total_decoder_attention_heads, + args.decoder_attention_heads, + kdim=args.encoder_embed_dim, + vdim=args.encoder_embed_dim, + dropout=args.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + layer_idx=self.layer_idx, + attn_head_selector=enc_attn_head_selector, + ) diff --git a/fairseq/examples/attention_head_selection/src/modules/multihead_attention_selection.py b/fairseq/examples/attention_head_selection/src/modules/multihead_attention_selection.py new file mode 100644 index 0000000..566ad82 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/modules/multihead_attention_selection.py @@ -0,0 +1,355 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional, Tuple +import torch +from fairseq import utils +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor, nn +from torch.nn import Parameter + +from fairseq.modules.multihead_attention import MultiheadAttention +from ..modules.multihead_functional import multi_head_attention_forward + + +class MultiheadAttentionSelection(MultiheadAttention): + + def __init__( + self, + embed_dim, + total_num_heads, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + layer_idx=0, + attn_head_selector=None + ): + super().__init__( + embed_dim, + num_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=self_attention, + encoder_decoder_attention=encoder_decoder_attention, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + self.layer_idx = layer_idx + self.attn_head_selector = attn_head_selector + self.total_num_heads = total_num_heads + self.total_embed_dim = self.head_dim * total_num_heads + self.k_proj = quant_noise( + nn.Linear(self.kdim, self.total_embed_dim, bias=bias), q_noise, qn_block_size + ) + self.v_proj = quant_noise( + nn.Linear(self.vdim, self.total_embed_dim, bias=bias), q_noise, qn_block_size + ) + self.q_proj = quant_noise( + nn.Linear(embed_dim, self.total_embed_dim, bias=bias), q_noise, qn_block_size + ) + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, self.total_embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, self.total_embed_dim)) + else: + self.bias_k = self.bias_v = None + self.reset_parameters() + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + # subset_heads: Optional[Tensor] = None, + # subset_weights: Optional[Tensor] = None + ) -> Tuple[Tensor, Optional[Tensor]]: + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + subset_heads, subset_weights = self.attn_head_selector(self.layer_idx) + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert list(query.size()) == [tgt_len, bsz, self.embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + ): + assert key is not None and value is not None + return multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.total_num_heads, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + subset_heads=subset_heads, + subset_weights=subset_weights + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.total_num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.total_num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.total_num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.total_num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.total_num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.total_num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.total_num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [bsz * self.total_num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.total_num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + + # evaluation + if subset_heads is not None and subset_heads.numel() == 1: + subset_heads = subset_heads.repeat(bsz) + subset_weights = subset_weights.repeat(bsz) + + if subset_heads is None: + attn = torch.bmm(attn_probs, v) + else: + # training with head selection + mixed_attn = torch.bmm(attn_probs, v).contiguous().view(bsz, self.total_num_heads, tgt_len, self.head_dim) + attn = torch.stack( + [mixed_attn[torch.arange(bsz), subset_heads[:, col], :, :] for col in range(subset_heads.size(1))], dim=1 + ) + attn = attn * subset_weights.unsqueeze(2).unsqueeze(3) + attn = attn.contiguous().view(bsz * self.num_heads, tgt_len, self.head_dim) + + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + if subset_heads is None: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + else: + mixed_attn_weights = attn_weights_float.view( + bsz, self.total_num_heads, tgt_len, src_len + ) + attn_weights = torch.stack( + [mixed_attn_weights[torch.arange(bsz), subset_heads[:, col], :, :] for col in range(subset_heads.size(1))], dim=1 + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/fairseq/examples/attention_head_selection/src/modules/multihead_functional.py b/fairseq/examples/attention_head_selection/src/modules/multihead_functional.py new file mode 100644 index 0000000..d5edc77 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/modules/multihead_functional.py @@ -0,0 +1,278 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Tuple +import torch +from torch import Tensor +from torch.nn.functional import ( + linear, softmax, dropout, pad, + has_torch_function, + handle_torch_function, + _in_projection_packed, +) +import math +import warnings + + +def _scaled_dot_product_attention( + q: Tensor, + k: Tensor, + v: Tensor, + attn_mask: Optional[Tensor] = None, + dropout_p: float = 0.0, + bsz: int = 1, + subset_heads: Optional[Tensor] = None, + subset_weights: Optional[Tensor] = None, +) -> Tuple[Tensor, Tensor]: + B, Nt, E = q.shape + q = q / math.sqrt(E) + # B: bsz * total_num_heads + # (B, Nt, E) x (B, E, Ns) -> (B, Nt, Ns) + attn = torch.bmm(q, k.transpose(-2, -1)) + if attn_mask is not None: + attn += attn_mask + attn = softmax(attn, dim=-1) + if dropout_p > 0.0: + attn = dropout(attn, p=dropout_p) + if subset_heads is None: + # (B, Nt, Ns) x (B, Ns, E) -> (B, Nt, E) + output = torch.bmm(attn, v) + else: + mixed_output = torch.bmm(attn, v).contiguous().view(bsz, -1, Nt, E) + output = torch.stack( + [mixed_output[torch.arange(bsz), subset_heads[:, col], :, :] for col in range(subset_heads.size(1))], + dim=1 + ) + output = output * subset_weights.unsqueeze(2).unsqueeze(3) + output = output.contiguous().view(-1, Nt, E) + if subset_heads is not None: + _, Nt, Ns = attn.size() + mixed_attn = attn.view(bsz, -1, Nt, Ns) + attn = torch.stack( + [mixed_attn[torch.arange(bsz), subset_heads[:, col], :, :] for col in range(subset_heads.size(1))], dim=1 + ) + return output, attn + + +def _in_projection( + q: Tensor, + k: Tensor, + v: Tensor, + w_q: Tensor, + w_k: Tensor, + w_v: Tensor, + b_q: Optional[Tensor] = None, + b_k: Optional[Tensor] = None, + b_v: Optional[Tensor] = None, +) -> Tuple[Tensor, Tensor, Tensor]: + return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v) + + +def multi_head_attention_forward( + query: Tensor, + key: Tensor, + value: Tensor, + embed_dim_to_check: int, + total_num_heads: int, + num_heads: int, + in_proj_weight: Tensor, + in_proj_bias: Optional[Tensor], + bias_k: Optional[Tensor], + bias_v: Optional[Tensor], + add_zero_attn: bool, + dropout_p: float, + out_proj_weight: Tensor, + out_proj_bias: Optional[Tensor], + training: bool = True, + key_padding_mask: Optional[Tensor] = None, + need_weights: bool = True, + attn_mask: Optional[Tensor] = None, + use_separate_proj_weight: bool = False, + q_proj_weight: Optional[Tensor] = None, + k_proj_weight: Optional[Tensor] = None, + v_proj_weight: Optional[Tensor] = None, + static_k: Optional[Tensor] = None, + static_v: Optional[Tensor] = None, + subset_heads: Optional[Tensor] = None, + subset_weights: Optional[Tensor] = None, +): + tens_ops = (query, key, value, in_proj_weight, in_proj_bias, bias_k, bias_v, out_proj_weight, out_proj_bias) + if has_torch_function(tens_ops): + return handle_torch_function( + multi_head_attention_forward, + tens_ops, + query, + key, + value, + embed_dim_to_check, + total_num_heads, + num_heads, + in_proj_weight, + in_proj_bias, + bias_k, + bias_v, + add_zero_attn, + dropout_p, + out_proj_weight, + out_proj_bias, + training=training, + key_padding_mask=key_padding_mask, + need_weights=need_weights, + attn_mask=attn_mask, + use_separate_proj_weight=use_separate_proj_weight, + q_proj_weight=q_proj_weight, + k_proj_weight=k_proj_weight, + v_proj_weight=v_proj_weight, + static_k=static_k, + static_v=static_v, + subset_heads=subset_heads, + subset_weights=subset_weights + ) + + # set up shape vars + tgt_len, bsz, embed_dim = query.shape + src_len, _, _ = key.shape + assert embed_dim == embed_dim_to_check, \ + f"was expecting embedding dimension of {embed_dim_to_check}, but got {embed_dim}" + if isinstance(embed_dim, torch.Tensor): + # embed_dim can be a tensor when JIT tracing + head_dim = embed_dim.div(num_heads, rounding_mode='trunc') + else: + head_dim = embed_dim // num_heads + assert head_dim * num_heads == embed_dim, f"embed_dim {embed_dim} not divisible by num_heads {num_heads}" + if use_separate_proj_weight: + # allow MHA to have different embedding dimensions when separate projection weights are used + assert key.shape[:2] == value.shape[:2], \ + f"key's sequence and batch dims {key.shape[:2]} do not match value's {value.shape[:2]}" + else: + assert key.shape == value.shape, f"key shape {key.shape} does not match value shape {value.shape}" + + # + # compute in-projection + # + if not use_separate_proj_weight: + q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias) + else: + assert q_proj_weight is not None, "use_separate_proj_weight is True but q_proj_weight is None" + assert k_proj_weight is not None, "use_separate_proj_weight is True but k_proj_weight is None" + assert v_proj_weight is not None, "use_separate_proj_weight is True but v_proj_weight is None" + if in_proj_bias is None: + b_q = b_k = b_v = None + else: + b_q, b_k, b_v = in_proj_bias.chunk(3) + q, k, v = _in_projection(query, key, value, q_proj_weight, k_proj_weight, v_proj_weight, b_q, b_k, b_v) + + # prep attention mask + if attn_mask is not None: + if attn_mask.dtype == torch.uint8: + warnings.warn("Byte tensor for attn_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.") + attn_mask = attn_mask.to(torch.bool) + else: + assert attn_mask.is_floating_point() or attn_mask.dtype == torch.bool, \ + f"Only float, byte, and bool types are supported for attn_mask, not {attn_mask.dtype}" + # ensure attn_mask's dim is 3 + if attn_mask.dim() == 2: + correct_2d_size = (tgt_len, src_len) + if attn_mask.shape != correct_2d_size: + raise RuntimeError(f"The shape of the 2D attn_mask is {attn_mask.shape}, but should be {correct_2d_size}.") + attn_mask = attn_mask.unsqueeze(0) + elif attn_mask.dim() == 3: + correct_3d_size = (bsz * total_num_heads, tgt_len, src_len) + if attn_mask.shape != correct_3d_size: + raise RuntimeError(f"The shape of the 3D attn_mask is {attn_mask.shape}, but should be {correct_3d_size}.") + else: + raise RuntimeError(f"attn_mask's dimension {attn_mask.dim()} is not supported") + + # prep key padding mask + if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8: + warnings.warn("Byte tensor for key_padding_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.") + key_padding_mask = key_padding_mask.to(torch.bool) + + # add bias along batch dimension (currently second) + if bias_k is not None and bias_v is not None: + assert static_k is None, "bias cannot be added to static key." + assert static_v is None, "bias cannot be added to static value." + k = torch.cat([k, bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = pad(attn_mask, (0, 1)) + if key_padding_mask is not None: + key_padding_mask = pad(key_padding_mask, (0, 1)) + else: + assert bias_k is None + assert bias_v is None + + # + # reshape q, k, v for multihead attention and make em batch first + # + q = q.contiguous().view(tgt_len, bsz * total_num_heads, head_dim).transpose(0, 1) + if static_k is None: + k = k.contiguous().view(k.shape[0], bsz * total_num_heads, head_dim).transpose(0, 1) + else: + # TODO finish disentangling control flow so we don't do in-projections when statics are passed + assert static_k.size(0) == bsz * total_num_heads, \ + f"expecting static_k.size(0) of {bsz * total_num_heads}, but got {static_k.size(0)}" + assert static_k.size(2) == head_dim, \ + f"expecting static_k.size(2) of {head_dim}, but got {static_k.size(2)}" + k = static_k + if static_v is None: + v = v.contiguous().view(v.shape[0], bsz * total_num_heads, head_dim).transpose(0, 1) + else: + # TODO finish disentangling control flow so we don't do in-projections when statics are passed + assert static_v.size(0) == bsz * total_num_heads, \ + f"expecting static_v.size(0) of {bsz * total_num_heads}, but got {static_v.size(0)}" + assert static_v.size(2) == head_dim, \ + f"expecting static_v.size(2) of {head_dim}, but got {static_v.size(2)}" + v = static_v + + # add zero attention along batch dimension (now first) + if add_zero_attn: + zero_attn_shape = (bsz * total_num_heads, 1, head_dim) + k = torch.cat([k, torch.zeros(zero_attn_shape, dtype=k.dtype, device=k.device)], dim=1) + v = torch.cat([v, torch.zeros(zero_attn_shape, dtype=v.dtype, device=v.device)], dim=1) + if attn_mask is not None: + attn_mask = pad(attn_mask, (0, 1)) + if key_padding_mask is not None: + key_padding_mask = pad(key_padding_mask, (0, 1)) + + # update source sequence length after adjustments + src_len = k.size(1) + + # merge key padding and attention masks + if key_padding_mask is not None: + assert key_padding_mask.shape == (bsz, src_len), \ + f"expecting key_padding_mask shape of {(bsz, src_len)}, but got {key_padding_mask.shape}" + key_padding_mask = key_padding_mask.view(bsz, 1, 1, src_len). \ + expand(-1, total_num_heads, -1, -1).reshape(bsz * total_num_heads, 1, src_len) + if attn_mask is None: + attn_mask = key_padding_mask + elif attn_mask.dtype == torch.bool: + attn_mask = attn_mask.logical_or(key_padding_mask) + else: + attn_mask = attn_mask.masked_fill(key_padding_mask, float("-inf")) + + # convert mask to float + if attn_mask is not None and attn_mask.dtype == torch.bool: + new_attn_mask = torch.zeros_like(attn_mask, dtype=torch.float) + new_attn_mask.masked_fill_(attn_mask, float("-inf")) + attn_mask = new_attn_mask + + # adjust dropout probability + if not training: + dropout_p = 0.0 + + # + # (deep breath) calculate attention and out projection + # + attn_output, attn_output_weights = _scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, bsz, subset_heads, subset_weights) + attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn_output = linear(attn_output, out_proj_weight, out_proj_bias) + + if need_weights: + # average attention weights over heads + attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len) + return attn_output, attn_output_weights.sum(dim=1) / num_heads + else: + return attn_output, None diff --git a/fairseq/examples/attention_head_selection/src/speech_to_text_head_selection.py b/fairseq/examples/attention_head_selection/src/speech_to_text_head_selection.py new file mode 100644 index 0000000..6e0ce11 --- /dev/null +++ b/fairseq/examples/attention_head_selection/src/speech_to_text_head_selection.py @@ -0,0 +1,180 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq.optim.amp_optimizer import AMPOptimizer +from fairseq.tasks import register_task +from fairseq.tasks.speech_to_text import SpeechToTextTask + +from .data.speech_to_text_dataset_with_domain import SpeechToTextDatasetCreatorWithDomain +from .loss.attention_head_selection import HeadSelectionLoss + + +@register_task("speech_to_text_head_selection") +class SpeechToTextHeadSelectionTask(SpeechToTextTask): + + @classmethod + def add_args(cls, parser): + SpeechToTextTask.add_args(parser) + parser.add_argument( + "--task-type", + type=str, + default="lang", + help="task type for head selection, lang or domain" + ) + parser.add_argument( + "--kl-weight", + type=float, + default=0.0, + help="the weight of KL loss" + ) + + def __init__(self, args, tgt_dict): + super().__init__(args, tgt_dict) + self.task_type = args.task_type + assert self.task_type in ["lang", "domain"], "invalid task_type: {}, should be either lang or domain".format(self.task_type) + self.map_task_to_id(args.train_subset) + self.encoder_head_prior = float(args.decoder_attention_heads) / args.total_decoder_attention_heads + self.decoder_head_prior = float(args.encoder_attention_heads) / args.total_encoder_attention_heads + self.kl_loss = HeadSelectionLoss(args) + + def map_task_to_id(self, train_subset): + src_lang_set, tgt_lang_set, domain_set = set(), set(), set() + for split in train_subset.split(","): + seq = split.split("_") + assert len(seq) == 4, "subset {} should be in the format of train_src_tgt_domain".format(split) + _, src_lang, tgt_lang, domain = seq + src_lang_set.add(src_lang) + tgt_lang_set.add(tgt_lang) + domain_set.add(domain) + src_langs = sorted(src_lang_set) + tgt_langs = sorted(tgt_lang_set) + domains = sorted(domain_set) + self.src_lang_map = {src_lang: i for (i, src_lang) in enumerate(src_langs)} + self.tgt_lang_map = {tgt_lang: i for (i, tgt_lang) in enumerate(tgt_langs)} + self.domain_map = {domain: i for (i, domain) in enumerate(domains)} + if self.task_type == "lang": + self.encoder_tasks = len(self.src_lang_map) + self.decoder_tasks = len(self.tgt_lang_map) + elif self.task_type == "domain": + self.encoder_tasks = len(self.domain_map) + self.decoder_tasks = len(self.domain_map) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + self.datasets[split] = SpeechToTextDatasetCreatorWithDomain.from_tsv( + self.args.data, + self.data_cfg, + split, + self.tgt_dict, + pre_tokenizer, + bpe_tokenizer, + is_train_split=is_train_split, + epoch=epoch, + seed=self.args.seed, + src_lang_map=self.src_lang_map, + tgt_lang_map=self.tgt_lang_map, + domain_map=self.domain_map, + speaker_to_id=self.speaker_to_id + ) + + def build_model(self, args): + args.encoder_tasks = self.encoder_tasks + args.decoder_tasks = self.decoder_tasks + return super(SpeechToTextHeadSelectionTask, self).build_model(args) + + def get_sample_sizes(self, sample, task_ids, num_tasks): + """ + task_ids: (bsz,) + get sample sizes for each task + """ + bsz = task_ids.size(0) + mat = torch.zeros((num_tasks, bsz), device=task_ids.device) + mat[task_ids, torch.arange(bsz)] = 1.0 + ntokens = torch.sum(sample['target'] != 1, dim=-1) + sample_sizes = torch.matmul(mat, ntokens.float()) + return sample_sizes + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + model.set_num_updates(update_num) + # task ids + if self.task_type == "lang": + encoder_task_ids = sample["src_lang_ids"] + decoder_task_ids = sample["tgt_lang_ids"] + elif self.task_type == "domain": + encoder_task_ids = sample["domain_ids"] + decoder_task_ids = sample["domain_ids"] + model.encoder.set_task_ids(encoder_task_ids) + model.decoder.set_task_ids(decoder_task_ids) + + with torch.autograd.profiler.record_function("forward"): + with torch.cuda.amp.autocast(enabled=(isinstance(optimizer, AMPOptimizer))): + loss, sample_size, logging_output = criterion(model, sample) + # KL loss + if self.args.encoder_attn_head_select: + sample_sizes = self.get_sample_sizes(sample, encoder_task_ids, self.encoder_tasks) + loss += self.kl_loss( + model.encoder.attn_head_selector.head_samples, + sample_sizes, + self.encoder_head_prior + ) + if self.args.decoder_self_attn_head_select: + sample_sizes = self.get_sample_sizes(sample, decoder_task_ids, self.decoder_tasks) + loss += self.kl_loss( + model.decoder.self_attn_head_selector.head_samples, + sample_sizes, + self.decoder_head_prior + ) + if self.args.dec_enc_attn_head_select: + sample_sizes = self.get_sample_sizes(sample, decoder_task_ids, self.decoder_tasks) + loss += self.kl_loss( + model.decoder.enc_attn_head_selector.head_sampes, + sample_sizes, + self.decoder_head_prior + ) + + if ignore_grad: + loss *= 0 + with torch.autograd.profiler.record_function("backward"): + optimizer.backward(loss) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + model.eval() + # task ids + if self.task_type == "lang": + encoder_task_ids = sample["src_lang_ids"] + decoder_task_ids = sample["tgt_lang_ids"] + elif self.task_type == "domain": + encoder_task_ids = sample["domain_ids"] + decoder_task_ids = sample["domain_ids"] + model.encoder.set_task_ids(encoder_task_ids) + model.decoder.set_task_ids(decoder_task_ids) + with torch.no_grad(): + loss, sample_size, logging_output = criterion(model, sample) + return loss, sample_size, logging_output + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + # task ids + if self.task_type == "lang": + encoder_task_ids = sample["src_lang_ids"][:1] + decoder_task_ids = sample["tgt_lang_ids"][:1] + elif self.task_type == "domain": + encoder_task_ids = sample["domain_ids"][:1] + decoder_task_ids = sample["domain_ids"][:1] + for model in models: + model.encoder.set_task_ids(encoder_task_ids) + model.decoder.set_task_ids(decoder_task_ids) + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, constraints=constraints + ) diff --git a/fairseq/examples/audio_nlp/nlu/README.md b/fairseq/examples/audio_nlp/nlu/README.md new file mode 100644 index 0000000..a11b3f3 --- /dev/null +++ b/fairseq/examples/audio_nlp/nlu/README.md @@ -0,0 +1,53 @@ +# End-to-end NLU + +End-to-end spoken language understanding (SLU) predicts intent directly from audio using a single model. It promises to improve the performance of assistant systems by leveraging acoustic information lost in the intermediate textual representation and preventing cascading errors from Automatic Speech Recognition (ASR). Further, having one unified model has efficiency advantages when deploying assistant systems on-device. + +This page releases the code for reproducing the results in [STOP: A dataset for Spoken Task Oriented Semantic Parsing](https://arxiv.org/abs/2207.10643) + +The dataset can be downloaded here: [download link](https://dl.fbaipublicfiles.com/stop/stop.tar.gz) + +The low-resource splits can be downloaded here: [download link](http://dl.fbaipublicfiles.com/stop/low_resource_splits.tar.gz) + +## Pretrained models end-to-end NLU Models + +| Speech Pretraining | ASR Pretraining | Test EM Accuracy | Tesst EM-Tree Accuracy | Link | +| ----------- | ----------- |----------|----------|----------| +| None | None | 36.54 | 57.01 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-none-none.pt) | +| Wav2Vec | None | 68.05 | 82.53 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-wav2vec-none.pt) | +| HuBERT | None | 68.40 | 82.85 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-hubert-none.pt) | +| Wav2Vec | STOP | 68.70 | 82.78 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-wav2vec-stop.pt) | +| HuBERT | STOP | 69.23 | 82.87 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-hubert-stop.pt) | +| Wav2Vec | Librispeech | 68.47 | 82.49 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-wav2vec-ls.pt) | +| HuBERT | Librispeech | 68.70 | 82.78 | [link](https://dl.fbaipublicfiles.com/stop/end-to-end-nlu-hubert-ls.pt) | + +## Pretrained models ASR Models +| Speech Pre-training | ASR Dataset | STOP Eval WER | STOP Test WER | dev\_other WER | dev\_clean WER | test\_clean WER | test\_other WER | Link | +| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | +| HuBERT | Librispeech | 8.47 | 2.99 | 3.25 | 8.06 | 25.68 | 26.19 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-hubert-ls.pt) | +| Wav2Vec | Librispeech | 9.215 | 3.204 | 3.334 | 9.006 | 27.257 | 27.588 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-wav2vec-ls.pt) | +| HuBERT | STOP | 46.31 | 31.30 | 31.52 | 47.16 | 4.29 | 4.26 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-hubert-stop.pt) | +| Wav2Vec | STOP | 43.103 | 27.833 | 28.479 | 28.479 | 4.679 | 4.667 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-wav2vec-stop.pt) | +| HuBERT | Librispeech + STOP | 9.015 | 3.211 | 3.372 | 8.635 | 5.133 | 5.056 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-hubert-ls-stop.pt) | +| Wav2Vec | Librispeech + STOP | 9.549 | 3.537 | 3.625 | 9.514 | 5.59 | 5.562 | [link](https://dl.fbaipublicfiles.com/stop/ctc-asr-wav2vec-ls-stop.pt) | + +## Creating the fairseq datasets from STOP + +First, create the audio file manifests and label files: + +``` +python examples/audio_nlp/nlu/generate_manifests.py --stop_root $STOP_DOWNLOAD_DIR/stop --output $FAIRSEQ_DATASET_OUTPUT/ +``` + + +Run `./examples/audio_nlp/nlu/create_dict_stop.sh $FAIRSEQ_DATASET_OUTPUT` to generate the fairseq dictionaries. + + +## Training an End-to-end NLU Model + + +Download a wav2vec or hubert model from [link](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert) or [link](https://github.com/facebookresearch/fairseq/tree/main/examples/wav2vec) + + +``` +python fairseq_cli/hydra-train --config-dir examples/audio_nlp/nlu/configs/ --config-name nlu_finetuning task.data=$FAIRSEQ_DATA_OUTPUT model.w2v_path=$PRETRAINED_MODEL_PATH +``` diff --git a/fairseq/examples/audio_nlp/nlu/configs/nlu_finetuning.yaml b/fairseq/examples/audio_nlp/nlu/configs/nlu_finetuning.yaml new file mode 100644 index 0000000..bb90f45 --- /dev/null +++ b/fairseq/examples/audio_nlp/nlu/configs/nlu_finetuning.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 10 + tensorboard_logdir: tb + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: em_error + save_interval: 10 + +task: + _name: nlu_finetuning + data: ??? + labels: parse + eval_wer_parse: true + autoregressive: true + +dataset: + num_workers: 6 + max_tokens: 1600000 + skip_invalid_size_inputs_valid_test: true + valid_subset: eval,test + train_subset: train + validate_interval: 10 + +criterion: + _name: label_smoothed_cross_entropy + +optimization: + max_update: 320000 + lr: [0.0001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_seq2seq + w2v_path: ??? + autoregressive: true + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 diff --git a/fairseq/examples/audio_nlp/nlu/create_dict_stop.sh b/fairseq/examples/audio_nlp/nlu/create_dict_stop.sh new file mode 100644 index 0000000..7533932 --- /dev/null +++ b/fairseq/examples/audio_nlp/nlu/create_dict_stop.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +### Script handling creation of data binaries +### for model training within fairseq + + +fairseq_root="." + +data_root=$1 +train_prefix="${data_root}/train" +valid_prefix="${data_root}/eval" +test_prefix="${data_root}/test" + +dest_dir="$data_root/" + +#echo "src dict: $src_dict" > "$dest_dir/src_dict.txt" +#echo "trg dict: $tgt_dict" > "$dest_dir/tgt_dict.txt" + + #--tgtdict $tgt_dict \ +PYTHONPATH=$fairseq_root \ + python $fairseq_root/fairseq_cli/preprocess.py \ + --source-lang "parse" \ + --trainpref "$train_prefix" \ + --validpref "$valid_prefix" \ + --destdir "$dest_dir" \ + --only-source \ + --dict-only \ + --workers 60; + +PYTHONPATH=$fairseq_root \ + python $fairseq_root/fairseq_cli/preprocess.py \ + --source-lang "ltr" \ + --trainpref "$train_prefix" \ + --validpref "$valid_prefix" \ + --destdir "$dest_dir" \ + --only-source \ + --dict-only \ + --workers 60; diff --git a/fairseq/examples/audio_nlp/nlu/generate_manifests.py b/fairseq/examples/audio_nlp/nlu/generate_manifests.py new file mode 100644 index 0000000..e217609 --- /dev/null +++ b/fairseq/examples/audio_nlp/nlu/generate_manifests.py @@ -0,0 +1,83 @@ +import argparse +from pathlib import Path +import soundfile + +def get_insl_frame(parse): + out = [] + def is_ont_token(tok): + return tok[0] in ["[", "]"]; + + res = [] + x = [] + for tok in parse.split(): + if is_ont_token(tok): + res.extend('_'.join(x)) + x = [] + res.append(tok.upper()) + else: + x.append(tok.upper()) + + return " ".join(res) + ' | ' + +def sequencify_utterance(utterance): + utterance = utterance.upper() + utterance = utterance.replace(' ', '|') + '|' + utterance = list(utterance) + utterance = ' '.join(utterance) + return utterance + + +def generate_fairseq_manifests(manifest, output_path, audio_root=None): + + with open(manifest, 'r') as i: + parses = [] + utterances = [] + filepaths = [] + keys = None + for (idx, line) in enumerate(i): + if idx == 0: keys = line.strip().split('\t') + else: + data = { k: v for (k, v) in zip(keys, line.split('\t'))} + parses.append(get_insl_frame(data['decoupled_normalized_seqlogical'])) + utterances.append(sequencify_utterance(data['normalized_utterance'])) + filepaths.append(data['file_id']) + + parses_fp = output_path.with_suffix('.parse') + with open(str(parses_fp), 'w') as o: + for p in parses: + o.write(p + '\n') + + utterances_fp = output_path.with_suffix('.ltr') + with open(str(utterances_fp), 'w') as o: + for u in utterances: + o.write(u + '\n') + + filepaths_fp = output_path.with_suffix('.tsv') + with open(str(filepaths_fp), 'w') as o: + o.write(str(audio_root) + '\n') + for f in filepaths: + fullpath = audio_root / f + assert fullpath.exists(), f'{fullpath}' + frames = soundfile.info(fullpath).frames + o.write(f'{f}\t{frames}\n') + +def main(args): + + splits = ['train', 'eval', 'test'] + root = Path(args.stop_root) + output_root = Path(args.output) + + for split in splits: + stop_manifest_path = root / 'manifests' / (split + '.tsv') + output_path = output_root / (split) + + generate_fairseq_manifests(stop_manifest_path, output_path, root) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Process some integers.') + parser.add_argument('--stop_root', type=str, + help='path to stop root directory') + parser.add_argument('--output', type=str, + help='output directory') + args = parser.parse_args() + main(args) diff --git a/fairseq/examples/backtranslation/README.md b/fairseq/examples/backtranslation/README.md new file mode 100644 index 0000000..73675f1 --- /dev/null +++ b/fairseq/examples/backtranslation/README.md @@ -0,0 +1,297 @@ +# Understanding Back-Translation at Scale (Edunov et al., 2018) + +This page includes pre-trained models from the paper [Understanding Back-Translation at Scale (Edunov et al., 2018)](https://arxiv.org/abs/1808.09381). + +## Pre-trained models + +Model | Description | Dataset | Download +---|---|---|--- +`transformer.wmt18.en-de` | Transformer
([Edunov et al., 2018](https://arxiv.org/abs/1808.09381))
WMT'18 winner | [WMT'18 English-German](http://www.statmt.org/wmt18/translation-task.html) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz)
See NOTE in the archive + +## Example usage (torch.hub) + +We require a few additional Python dependencies for preprocessing: +```bash +pip install subword_nmt sacremoses +``` + +Then to generate translations from the full model ensemble: +```python +import torch + +# List available models +torch.hub.list('pytorch/fairseq') # [..., 'transformer.wmt18.en-de', ... ] + +# Load the WMT'18 En-De ensemble +en2de_ensemble = torch.hub.load( + 'pytorch/fairseq', 'transformer.wmt18.en-de', + checkpoint_file='wmt18.model1.pt:wmt18.model2.pt:wmt18.model3.pt:wmt18.model4.pt:wmt18.model5.pt', + tokenizer='moses', bpe='subword_nmt') + +# The ensemble contains 5 models +len(en2de_ensemble.models) +# 5 + +# Translate +en2de_ensemble.translate('Hello world!') +# 'Hallo Welt!' +``` + +## Training your own model (WMT'18 English-German) + +The following instructions can be adapted to reproduce the models from the paper. + + +#### Step 1. Prepare parallel data and optionally train a baseline (English-German) model + +First download and preprocess the data: +```bash +# Download and prepare the data +cd examples/backtranslation/ +bash prepare-wmt18en2de.sh +cd ../.. + +# Binarize the data +TEXT=examples/backtranslation/wmt18_en_de +fairseq-preprocess \ + --joined-dictionary \ + --source-lang en --target-lang de \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/wmt18_en_de --thresholdtgt 0 --thresholdsrc 0 \ + --workers 20 + +# Copy the BPE code into the data-bin directory for future use +cp examples/backtranslation/wmt18_en_de/code data-bin/wmt18_en_de/code +``` + +(Optionally) Train a baseline model (English-German) using just the parallel data: +```bash +CHECKPOINT_DIR=checkpoints_en_de_parallel +fairseq-train --fp16 \ + data-bin/wmt18_en_de \ + --source-lang en --target-lang de \ + --arch transformer_wmt_en_de_big --share-all-embeddings \ + --dropout 0.3 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr 0.001 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --max-tokens 3584 --update-freq 16 \ + --max-update 30000 \ + --save-dir $CHECKPOINT_DIR +# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a +# different number of GPUs. +``` + +Average the last 10 checkpoints: +```bash +python scripts/average_checkpoints.py \ + --inputs $CHECKPOINT_DIR \ + --num-epoch-checkpoints 10 \ + --output $CHECKPOINT_DIR/checkpoint.avg10.pt +``` + +Evaluate BLEU: +```bash +# tokenized BLEU on newstest2017: +bash examples/backtranslation/tokenized_bleu.sh \ + wmt17 \ + en-de \ + data-bin/wmt18_en_de \ + data-bin/wmt18_en_de/code \ + $CHECKPOINT_DIR/checkpoint.avg10.pt +# BLEU4 = 29.57, 60.9/35.4/22.9/15.5 (BP=1.000, ratio=1.014, syslen=63049, reflen=62152) +# compare to 29.46 in Table 1, which is also for tokenized BLEU + +# generally it's better to report (detokenized) sacrebleu though: +bash examples/backtranslation/sacrebleu.sh \ + wmt17 \ + en-de \ + data-bin/wmt18_en_de \ + data-bin/wmt18_en_de/code \ + $CHECKPOINT_DIR/checkpoint.avg10.pt +# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 29.0 60.6/34.7/22.4/14.9 (BP = 1.000 ratio = 1.013 hyp_len = 62099 ref_len = 61287) +``` + + +#### Step 2. Back-translate monolingual German data + +Train a reverse model (German-English) to do the back-translation: +```bash +CHECKPOINT_DIR=checkpoints_de_en_parallel +fairseq-train --fp16 \ + data-bin/wmt18_en_de \ + --source-lang de --target-lang en \ + --arch transformer_wmt_en_de_big --share-all-embeddings \ + --dropout 0.3 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr 0.001 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --max-tokens 3584 --update-freq 16 \ + --max-update 30000 \ + --save-dir $CHECKPOINT_DIR +# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a +# different number of GPUs. +``` + +Let's evaluate the back-translation (BT) model to make sure it is well trained: +```bash +bash examples/backtranslation/sacrebleu.sh \ + wmt17 \ + de-en \ + data-bin/wmt18_en_de \ + data-bin/wmt18_en_de/code \ + $CHECKPOINT_DIR/checkpoint_best.py +# BLEU+case.mixed+lang.de-en+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 34.9 66.9/41.8/28.5/19.9 (BP = 0.983 ratio = 0.984 hyp_len = 63342 ref_len = 64399) +# compare to the best system from WMT'17 which scored 35.1: http://matrix.statmt.org/matrix/systems_list/1868 +``` + +Next prepare the monolingual data: +```bash +# Download and prepare the monolingual data +# By default the script samples 25M monolingual sentences, which after +# deduplication should be just over 24M sentences. These are split into 25 +# shards, each with 1M sentences (except for the last shard). +cd examples/backtranslation/ +bash prepare-de-monolingual.sh +cd ../.. + +# Binarize each shard of the monolingual data +TEXT=examples/backtranslation/wmt18_de_mono +for SHARD in $(seq -f "%02g" 0 24); do \ + fairseq-preprocess \ + --only-source \ + --source-lang de --target-lang en \ + --joined-dictionary \ + --srcdict data-bin/wmt18_en_de/dict.de.txt \ + --testpref $TEXT/bpe.monolingual.dedup.${SHARD} \ + --destdir data-bin/wmt18_de_mono/shard${SHARD} \ + --workers 20; \ + cp data-bin/wmt18_en_de/dict.en.txt data-bin/wmt18_de_mono/shard${SHARD}/; \ +done +``` + +Now we're ready to perform back-translation over the monolingual data. The +following command generates via sampling, but it's possible to use greedy +decoding (`--beam 1`), beam search (`--beam 5`), +top-k sampling (`--sampling --beam 1 --sampling-topk 10`), etc.: +```bash +mkdir backtranslation_output +for SHARD in $(seq -f "%02g" 0 24); do \ + fairseq-generate --fp16 \ + data-bin/wmt18_de_mono/shard${SHARD} \ + --path $CHECKPOINT_DIR/checkpoint_best.pt \ + --skip-invalid-size-inputs-valid-test \ + --max-tokens 4096 \ + --sampling --beam 1 \ + > backtranslation_output/sampling.shard${SHARD}.out; \ +done +``` + +After BT, use the `extract_bt_data.py` script to re-combine the shards, extract +the back-translations and apply length ratio filters: +```bash +python examples/backtranslation/extract_bt_data.py \ + --minlen 1 --maxlen 250 --ratio 1.5 \ + --output backtranslation_output/bt_data --srclang en --tgtlang de \ + backtranslation_output/sampling.shard*.out + +# Ensure lengths are the same: +# wc -l backtranslation_output/bt_data.{en,de} +# 21795614 backtranslation_output/bt_data.en +# 21795614 backtranslation_output/bt_data.de +# 43591228 total +``` + +Binarize the filtered BT data and combine it with the parallel data: +```bash +TEXT=backtranslation_output +fairseq-preprocess \ + --source-lang en --target-lang de \ + --joined-dictionary \ + --srcdict data-bin/wmt18_en_de/dict.en.txt \ + --trainpref $TEXT/bt_data \ + --destdir data-bin/wmt18_en_de_bt \ + --workers 20 + +# We want to train on the combined data, so we'll symlink the parallel + BT data +# in the wmt18_en_de_para_plus_bt directory. We link the parallel data as "train" +# and the BT data as "train1", so that fairseq will combine them automatically +# and so that we can use the `--upsample-primary` option to upsample the +# parallel data (if desired). +PARA_DATA=$(readlink -f data-bin/wmt18_en_de) +BT_DATA=$(readlink -f data-bin/wmt18_en_de_bt) +COMB_DATA=data-bin/wmt18_en_de_para_plus_bt +mkdir -p $COMB_DATA +for LANG in en de; do \ + ln -s ${PARA_DATA}/dict.$LANG.txt ${COMB_DATA}/dict.$LANG.txt; \ + for EXT in bin idx; do \ + ln -s ${PARA_DATA}/train.en-de.$LANG.$EXT ${COMB_DATA}/train.en-de.$LANG.$EXT; \ + ln -s ${BT_DATA}/train.en-de.$LANG.$EXT ${COMB_DATA}/train1.en-de.$LANG.$EXT; \ + ln -s ${PARA_DATA}/valid.en-de.$LANG.$EXT ${COMB_DATA}/valid.en-de.$LANG.$EXT; \ + ln -s ${PARA_DATA}/test.en-de.$LANG.$EXT ${COMB_DATA}/test.en-de.$LANG.$EXT; \ + done; \ +done +``` + + +#### 3. Train an English-German model over the combined parallel + BT data + +Finally we can train a model over the parallel + BT data: +```bash +CHECKPOINT_DIR=checkpoints_en_de_parallel_plus_bt +fairseq-train --fp16 \ + data-bin/wmt18_en_de_para_plus_bt \ + --upsample-primary 16 \ + --source-lang en --target-lang de \ + --arch transformer_wmt_en_de_big --share-all-embeddings \ + --dropout 0.3 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr 0.0007 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --max-tokens 3584 --update-freq 16 \ + --max-update 100000 \ + --save-dir $CHECKPOINT_DIR +# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a +# different number of GPUs. +``` + +Average the last 10 checkpoints: +```bash +python scripts/average_checkpoints.py \ + --inputs $CHECKPOINT_DIR \ + --num-epoch-checkpoints 10 \ + --output $CHECKPOINT_DIR/checkpoint.avg10.pt +``` + +Evaluate BLEU: +```bash +# tokenized BLEU on newstest2017: +bash examples/backtranslation/tokenized_bleu.sh \ + wmt17 \ + en-de \ + data-bin/wmt18_en_de \ + data-bin/wmt18_en_de/code \ + $CHECKPOINT_DIR/checkpoint.avg10.pt +# BLEU4 = 32.35, 64.4/38.9/26.2/18.3 (BP=0.977, ratio=0.977, syslen=60729, reflen=62152) +# compare to 32.35 in Table 1, which is also for tokenized BLEU + +# generally it's better to report (detokenized) sacrebleu: +bash examples/backtranslation/sacrebleu.sh \ + wmt17 \ + en-de \ + data-bin/wmt18_en_de \ + data-bin/wmt18_en_de/code \ + $CHECKPOINT_DIR/checkpoint.avg10.pt +# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 31.5 64.3/38.2/25.6/17.6 (BP = 0.971 ratio = 0.971 hyp_len = 59515 ref_len = 61287) +``` + + +## Citation +```bibtex +@inproceedings{edunov2018backtranslation, + title = {Understanding Back-Translation at Scale}, + author = {Edunov, Sergey and Ott, Myle and Auli, Michael and Grangier, David}, + booktitle = {Conference of the Association for Computational Linguistics (ACL)}, + year = 2018, +} +``` diff --git a/fairseq/examples/backtranslation/deduplicate_lines.py b/fairseq/examples/backtranslation/deduplicate_lines.py new file mode 100644 index 0000000..50e4583 --- /dev/null +++ b/fairseq/examples/backtranslation/deduplicate_lines.py @@ -0,0 +1,41 @@ +#!/usr/bin/python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import fileinput +import hashlib +import sys +from multiprocessing import Pool + + +def get_hashes_and_lines(raw_line): + hash = hashlib.md5(raw_line).hexdigest() + return hash, raw_line + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--workers", type=int, default=10) + parser.add_argument("files", nargs="*", help="input files") + args = parser.parse_args() + + seen = set() + with fileinput.input(args.files, mode="rb") as h: + pool = Pool(args.workers) + results = pool.imap_unordered(get_hashes_and_lines, h, 1000) + for i, (hash, raw_line) in enumerate(results): + if hash not in seen: + seen.add(hash) + sys.stdout.buffer.write(raw_line) + if i % 1000000 == 0: + print(i, file=sys.stderr, end="", flush=True) + elif i % 100000 == 0: + print(".", file=sys.stderr, end="", flush=True) + print(file=sys.stderr, flush=True) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/backtranslation/extract_bt_data.py b/fairseq/examples/backtranslation/extract_bt_data.py new file mode 100644 index 0000000..e766391 --- /dev/null +++ b/fairseq/examples/backtranslation/extract_bt_data.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import fileinput + +from tqdm import tqdm + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Extract back-translations from the stdout of fairseq-generate. " + "If there are multiply hypotheses for a source, we only keep the first one. " + ) + ) + parser.add_argument("--output", required=True, help="output prefix") + parser.add_argument( + "--srclang", required=True, help="source language (extracted from H-* lines)" + ) + parser.add_argument( + "--tgtlang", required=True, help="target language (extracted from S-* lines)" + ) + parser.add_argument("--minlen", type=int, help="min length filter") + parser.add_argument("--maxlen", type=int, help="max length filter") + parser.add_argument("--ratio", type=float, help="ratio filter") + parser.add_argument("files", nargs="*", help="input files") + args = parser.parse_args() + + def validate(src, tgt): + srclen = len(src.split(" ")) if src != "" else 0 + tgtlen = len(tgt.split(" ")) if tgt != "" else 0 + if ( + (args.minlen is not None and (srclen < args.minlen or tgtlen < args.minlen)) + or ( + args.maxlen is not None + and (srclen > args.maxlen or tgtlen > args.maxlen) + ) + or ( + args.ratio is not None + and (max(srclen, tgtlen) / float(min(srclen, tgtlen)) > args.ratio) + ) + ): + return False + return True + + def safe_index(toks, index, default): + try: + return toks[index] + except IndexError: + return default + + with open(args.output + "." + args.srclang, "w") as src_h, open( + args.output + "." + args.tgtlang, "w" + ) as tgt_h: + for line in tqdm(fileinput.input(args.files)): + if line.startswith("S-"): + tgt = safe_index(line.rstrip().split("\t"), 1, "") + elif line.startswith("H-"): + if tgt is not None: + src = safe_index(line.rstrip().split("\t"), 2, "") + if validate(src, tgt): + print(src, file=src_h) + print(tgt, file=tgt_h) + tgt = None + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/backtranslation/prepare-de-monolingual.sh b/fairseq/examples/backtranslation/prepare-de-monolingual.sh new file mode 100644 index 0000000..5e67b2b --- /dev/null +++ b/fairseq/examples/backtranslation/prepare-de-monolingual.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl +BPEROOT=subword-nmt/subword_nmt + + +BPE_CODE=wmt18_en_de/code +SUBSAMPLE_SIZE=25000000 +LANG=de + + +OUTDIR=wmt18_${LANG}_mono +orig=orig +tmp=$OUTDIR/tmp +mkdir -p $OUTDIR $tmp + + +URLS=( + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2007.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2008.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2009.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2010.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2011.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2012.de.shuffled.gz" + "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2013.de.shuffled.gz" + "http://www.statmt.org/wmt15/training-monolingual-news-crawl-v2/news.2014.de.shuffled.v2.gz" + "http://data.statmt.org/wmt16/translation-task/news.2015.de.shuffled.gz" + "http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz" + "http://data.statmt.org/wmt18/translation-task/news.2017.de.shuffled.deduped.gz" +) +FILES=( + "news.2007.de.shuffled.gz" + "news.2008.de.shuffled.gz" + "news.2009.de.shuffled.gz" + "news.2010.de.shuffled.gz" + "news.2011.de.shuffled.gz" + "news.2012.de.shuffled.gz" + "news.2013.de.shuffled.gz" + "news.2014.de.shuffled.v2.gz" + "news.2015.de.shuffled.gz" + "news.2016.de.shuffled.gz" + "news.2017.de.shuffled.deduped.gz" +) + + +cd $orig +for ((i=0;i<${#URLS[@]};++i)); do + file=${FILES[i]} + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + url=${URLS[i]} + wget "$url" + fi +done +cd .. + + +if [ -f $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG} ]; then + echo "found monolingual sample, skipping shuffle/sample/tokenize" +else + gzip -c -d -k $(for FILE in "${FILES[@]}"; do echo $orig/$FILE; done) \ + | shuf -n $SUBSAMPLE_SIZE \ + | perl $NORM_PUNC $LANG \ + | perl $REM_NON_PRINT_CHAR \ + | perl $TOKENIZER -threads 8 -a -l $LANG \ + > $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG} +fi + + +if [ -f $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG} ]; then + echo "found BPE monolingual sample, skipping BPE step" +else + python $BPEROOT/apply_bpe.py -c $BPE_CODE \ + < $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG} \ + > $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG} +fi + + +if [ -f $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG} ]; then + echo "found deduplicated monolingual sample, skipping deduplication step" +else + python deduplicate_lines.py $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG} \ + > $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG} +fi + + +if [ -f $OUTDIR/bpe.monolingual.dedup.00.de ]; then + echo "found sharded data, skipping sharding step" +else + split --lines 1000000 --numeric-suffixes \ + --additional-suffix .${LANG} \ + $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG} \ + $OUTDIR/bpe.monolingual.dedup. +fi diff --git a/fairseq/examples/backtranslation/prepare-wmt18en2de.sh b/fairseq/examples/backtranslation/prepare-wmt18en2de.sh new file mode 100644 index 0000000..f6fd275 --- /dev/null +++ b/fairseq/examples/backtranslation/prepare-wmt18en2de.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh + +echo 'Cloning Moses github repository (for tokenization scripts)...' +git clone https://github.com/moses-smt/mosesdecoder.git + +echo 'Cloning Subword NMT repository (for BPE pre-processing)...' +git clone https://github.com/rsennrich/subword-nmt.git + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +CLEAN=$SCRIPTS/training/clean-corpus-n.perl +NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl +BPEROOT=subword-nmt/subword_nmt +BPE_TOKENS=32000 + +URLS=( + "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz" + "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz" + "http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz" + "http://data.statmt.org/wmt18/translation-task/rapid2016.tgz" + "http://data.statmt.org/wmt17/translation-task/dev.tgz" + "http://statmt.org/wmt14/test-full.tgz" +) +FILES=( + "training-parallel-europarl-v7.tgz" + "training-parallel-commoncrawl.tgz" + "training-parallel-nc-v13.tgz" + "rapid2016.tgz" + "dev.tgz" + "test-full.tgz" +) +CORPORA=( + "training/europarl-v7.de-en" + "commoncrawl.de-en" + "training-parallel-nc-v13/news-commentary-v13.de-en" + "rapid2016.de-en" +) + +if [ ! -d "$SCRIPTS" ]; then + echo "Please set SCRIPTS variable correctly to point to Moses scripts." + exit 1 +fi + +OUTDIR=wmt18_en_de + +src=en +tgt=de +lang=en-de +prep=$OUTDIR +tmp=$prep/tmp +orig=orig + +mkdir -p $orig $tmp $prep + +cd $orig + +for ((i=0;i<${#URLS[@]};++i)); do + file=${FILES[i]} + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + url=${URLS[i]} + wget "$url" + if [ -f $file ]; then + echo "$url successfully downloaded." + else + echo "$url not successfully downloaded." + exit 1 + fi + if [ ${file: -4} == ".tgz" ]; then + tar zxvf $file + elif [ ${file: -4} == ".tar" ]; then + tar xvf $file + fi + fi +done +cd .. + +echo "pre-processing train data..." +for l in $src $tgt; do + rm $tmp/train.tags.$lang.tok.$l + for f in "${CORPORA[@]}"; do + cat $orig/$f.$l | \ + perl $NORM_PUNC $l | \ + perl $REM_NON_PRINT_CHAR | \ + perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l + done +done + +echo "pre-processing test data..." +for l in $src $tgt; do + if [ "$l" == "$src" ]; then + t="src" + else + t="ref" + fi + grep '\s*//g' | \ + sed -e 's/\s*<\/seg>\s*//g' | \ + sed -e "s/\’/\'/g" | \ + perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l + echo "" +done + +echo "splitting train and valid..." +for l in $src $tgt; do + awk '{if (NR%100 == 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l + awk '{if (NR%100 != 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l +done + +TRAIN=$tmp/train.de-en +BPE_CODE=$prep/code +rm -f $TRAIN +for l in $src $tgt; do + cat $tmp/train.$l >> $TRAIN +done + +echo "learn_bpe.py on ${TRAIN}..." +python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE + +for L in $src $tgt; do + for f in train.$L valid.$L test.$L; do + echo "apply_bpe.py to ${f}..." + python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f + done +done + +perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250 +perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250 + +for L in $src $tgt; do + cp $tmp/bpe.test.$L $prep/test.$L +done diff --git a/fairseq/examples/backtranslation/sacrebleu.sh b/fairseq/examples/backtranslation/sacrebleu.sh new file mode 100644 index 0000000..a70da23 --- /dev/null +++ b/fairseq/examples/backtranslation/sacrebleu.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +if [ $# -ne 5 ]; then + echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]" + exit +fi + + +DATASET=$1 +LANGPAIR=$2 +DATABIN=$3 +BPECODE=$4 +MODEL=$5 + +SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1) +TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2) + + +BPEROOT=examples/backtranslation/subword-nmt/subword_nmt +if [ ! -e $BPEROOT ]; then + BPEROOT=subword-nmt/subword_nmt + if [ ! -e $BPEROOT ]; then + echo 'Cloning Subword NMT repository (for BPE pre-processing)...' + git clone https://github.com/rsennrich/subword-nmt.git + fi +fi + + +sacrebleu -t $DATASET -l $LANGPAIR --echo src \ +| sacremoses tokenize -a -l $SRCLANG -q \ +| python $BPEROOT/apply_bpe.py -c $BPECODE \ +| fairseq-interactive $DATABIN --path $MODEL \ + -s $SRCLANG -t $TGTLANG \ + --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \ +| grep ^H- | cut -f 3- \ +| sacremoses detokenize -l $TGTLANG -q \ +| sacrebleu -t $DATASET -l $LANGPAIR diff --git a/fairseq/examples/backtranslation/tokenized_bleu.sh b/fairseq/examples/backtranslation/tokenized_bleu.sh new file mode 100644 index 0000000..c6d6aaa --- /dev/null +++ b/fairseq/examples/backtranslation/tokenized_bleu.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +if [ $# -ne 5 ]; then + echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]" + exit +fi + + +DATASET=$1 +LANGPAIR=$2 +DATABIN=$3 +BPECODE=$4 +MODEL=$5 + +SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1) +TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2) + + +BPEROOT=examples/backtranslation/subword-nmt/subword_nmt +if [ ! -e $BPEROOT ]; then + BPEROOT=subword-nmt/subword_nmt + if [ ! -e $BPEROOT ]; then + echo 'Cloning Subword NMT repository (for BPE pre-processing)...' + git clone https://github.com/rsennrich/subword-nmt.git + fi +fi + + +TMP_REF=$(mktemp) + +sacrebleu -t $DATASET -l $LANGPAIR --echo ref -q \ +| sacremoses normalize -l $TGTLANG -q \ +| sacremoses tokenize -a -l $TGTLANG -q \ +> $TMP_REF + +sacrebleu -t $DATASET -l $LANGPAIR --echo src -q \ +| sacremoses normalize -l $SRCLANG -q \ +| sacremoses tokenize -a -l $SRCLANG -q \ +| python $BPEROOT/apply_bpe.py -c $BPECODE \ +| fairseq-interactive $DATABIN --path $MODEL \ + -s $SRCLANG -t $TGTLANG \ + --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \ +| grep ^H- | cut -f 3- \ +| fairseq-score --ref $TMP_REF + +rm -f $TMP_REF diff --git a/fairseq/examples/bart/README.glue.md b/fairseq/examples/bart/README.glue.md new file mode 100644 index 0000000..a010934 --- /dev/null +++ b/fairseq/examples/bart/README.glue.md @@ -0,0 +1,99 @@ +# Fine-tuning BART on GLUE tasks + +### 1) Download the data from GLUE website (https://gluebenchmark.com/tasks) using following commands: +```bash +wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py +python download_glue_data.py --data_dir glue_data --tasks all +``` + +### 2) Preprocess GLUE task data (same as RoBERTa): +```bash +./examples/roberta/preprocess_GLUE_tasks.sh glue_data +``` +`glue_task_name` is one of the following: +`{ALL, QQP, MNLI, QNLI, MRPC, RTE, STS-B, SST-2, CoLA}` +Use `ALL` for preprocessing all the glue tasks. + +### 3) Fine-tuning on GLUE task: +Example fine-tuning cmd for `RTE` task +```bash +TOTAL_NUM_UPDATES=2036 # 10 epochs through RTE for bsz 16 +WARMUP_UPDATES=61 # 6 percent of the number of updates +LR=1e-05 # Peak LR for polynomial LR scheduler. +NUM_CLASSES=2 +MAX_SENTENCES=16 # Batch size. +BART_PATH=/path/to/bart/model.pt + +CUDA_VISIBLE_DEVICES=0,1 fairseq-train RTE-bin/ \ + --restore-file $BART_PATH \ + --batch-size $MAX_SENTENCES \ + --max-tokens 4400 \ + --task sentence_prediction \ + --add-prev-output-tokens \ + --layernorm-embedding \ + --share-all-embeddings \ + --share-decoder-input-output-embed \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --init-token 0 \ + --arch bart_large \ + --criterion sentence_prediction \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-08 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --max-epoch 10 \ + --find-unused-parameters \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric; +``` + +For each of the GLUE task, you will need to use following cmd-line arguments: + +Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B +---|---|---|---|---|---|---|---|--- +`--num-classes` | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 1 +`--lr` | 5e-6 | 1e-5 | 1e-5 | 1e-5 | 5e-6 | 2e-5 | 2e-5 | 2e-5 +`bsz` | 128 | 32 | 32 | 32 | 128 | 64 | 64 | 32 +`--total-num-update` | 30968 | 33112 | 113272 | 1018 | 5233 | 1148 | 1334 | 1799 +`--warmup-updates` | 1858 | 1986 | 6796 | 61 | 314 | 68 | 80 | 107 + +For `STS-B` additionally add `--regression-target --best-checkpoint-metric loss` and remove `--maximize-best-checkpoint-metric`. + +**Note:** + +a) `--total-num-updates` is used by `--polynomial_decay` scheduler and is calculated for `--max-epoch=10` and `--batch-size=32/64/128` depending on the task. + +b) Above cmd-args and hyperparams are tested on Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`. + +### Inference on GLUE task +After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using following python code snippet: + +```python +from fairseq.models.bart import BARTModel + +bart = BARTModel.from_pretrained( + 'checkpoints/', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='RTE-bin' +) + +label_fn = lambda label: bart.task.label_dictionary.string( + [label + bart.task.label_dictionary.nspecial] +) +ncorrect, nsamples = 0, 0 +bart.cuda() +bart.eval() +with open('glue_data/RTE/dev.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[1], tokens[2], tokens[3] + tokens = bart.encode(sent1, sent2) + prediction = bart.predict('sentence_classification_head', tokens).argmax().item() + prediction_label = label_fn(prediction) + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) +``` diff --git a/fairseq/examples/bart/README.md b/fairseq/examples/bart/README.md new file mode 100644 index 0000000..4050a72 --- /dev/null +++ b/fairseq/examples/bart/README.md @@ -0,0 +1,228 @@ +# BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension + +[https://arxiv.org/abs/1910.13461](https://arxiv.org/abs/1910.13461) + +## Introduction + +BART is sequence-to-sequence model trained with denoising as pretraining objective. We show that this pretraining objective is more generic and show that we can match [RoBERTa](../roberta) results on SQuAD and GLUE and gain state-of-the-art results on summarization (XSum, CNN dataset), long form generative question answering (ELI5) and dialog response genration (ConvAI2). See the associated paper for more details. + +## Pre-trained models + +Model | Description | # params | Download +---|---|---|--- +`bart.base` | BART model with 6 encoder and decoder layers | 140M | [bart.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.base.tar.gz) +`bart.large` | BART model with 12 encoder and decoder layers | 400M | [bart.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz) +`bart.large.mnli` | `bart.large` finetuned on `MNLI` | 400M | [bart.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.mnli.tar.gz) +`bart.large.cnn` | `bart.large` finetuned on `CNN-DM` | 400M | [bart.large.cnn.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.cnn.tar.gz) +`bart.large.xsum` | `bart.large` finetuned on `Xsum` | 400M | [bart.large.xsum.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.xsum.tar.gz) + +## Results + +**[GLUE (Wang et al., 2019)](https://gluebenchmark.com/)** +_(dev set, single model, single-task finetuning)_ + +Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B +---|---|---|---|---|---|---|---|--- +`roberta.large` | 90.2 | 94.7 | 92.2 | 86.6 | 96.4 | 90.9 | 68.0 | 92.4 +`bart.large` | 89.9 | 94.9 | 92.5 | 87.0 | 96.6 | 90.4 | 62.8 | 91.2 + +**[SQuAD (Rajpurkar et al., 2018)](https://rajpurkar.github.io/SQuAD-explorer/)** +_(dev set, no additional data used)_ + +Model | SQuAD 1.1 EM/F1 | SQuAD 2.0 EM/F1 +---|---|--- +`roberta.large` | 88.9/94.6 | 86.5/89.4 +`bart.large` | 88.8/94.6 | 86.1/89.2 + +**[CNN/Daily Mail](http://nlpprogress.com/english/summarization.html)** +_(test set, no additional data used)_ + +Model | R1 | R2 | RL +---|---|---|--- +`BERTSUMEXTABS` | 42.13 | 19.60 | 39.18 +`bart.large` | 44.16 | 21.28 | 40.90 + +## Example usage + +##### Load BART from torch.hub (PyTorch >= 1.1): +```python +import torch +bart = torch.hub.load('pytorch/fairseq', 'bart.large') +bart.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Load BART (for PyTorch 1.0 or custom models): +```python +# Download bart.large model +wget https://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz +tar -xzvf bart.large.tar.gz + +# Load the model in fairseq +from fairseq.models.bart import BARTModel +bart = BARTModel.from_pretrained('/path/to/bart.large', checkpoint_file='model.pt') +bart.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Apply Byte-Pair Encoding (BPE) to input text: +```python +tokens = bart.encode('Hello world!') +assert tokens.tolist() == [0, 31414, 232, 328, 2] +bart.decode(tokens) # 'Hello world!' +``` + +##### Extract features from BART: +```python +# Extract the last layer's features +last_layer_features = bart.extract_features(tokens) +assert last_layer_features.size() == torch.Size([1, 5, 1024]) + +# Extract all layer's features from decoder (layer 0 is the embedding layer) +all_layers = bart.extract_features(tokens, return_all_hiddens=True) +assert len(all_layers) == 13 +assert torch.all(all_layers[-1] == last_layer_features) +``` + +##### Use BART for sentence-pair classification tasks: +```python +# Download BART already finetuned for MNLI +bart = torch.hub.load('pytorch/fairseq', 'bart.large.mnli') +bart.eval() # disable dropout for evaluation + +# Encode a pair of sentences and make a prediction +tokens = bart.encode('BART is a seq2seq model.', 'BART is not sequence to sequence.') +bart.predict('mnli', tokens).argmax() # 0: contradiction + +# Encode another pair of sentences +tokens = bart.encode('BART is denoising autoencoder.', 'BART is version of autoencoder.') +bart.predict('mnli', tokens).argmax() # 2: entailment +``` + +##### Register a new (randomly initialized) classification head: +```python +bart.register_classification_head('new_task', num_classes=3) +logprobs = bart.predict('new_task', tokens) +``` + +##### Batched prediction: +```python +import torch +from fairseq.data.data_utils import collate_tokens + +bart = torch.hub.load('pytorch/fairseq', 'bart.large.mnli') +bart.eval() + +batch_of_pairs = [ + ['BART is a seq2seq model.', 'BART is not sequence to sequence.'], + ['BART is denoising autoencoder.', 'BART is version of autoencoder.'], +] + +batch = collate_tokens( + [bart.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1 +) + +logprobs = bart.predict('mnli', batch) +print(logprobs.argmax(dim=1)) +# tensor([0, 2]) +``` + +##### Using the GPU: +```python +bart.cuda() +bart.predict('new_task', tokens) +``` + +#### Filling masks: + +BART can be used to fill multiple `` tokens in the input. +```python +bart = torch.hub.load('pytorch/fairseq', 'bart.base') +bart.eval() +bart.fill_mask(['The cat on the .'], topk=3, beam=10) +# [[('The cat was on the ground.', tensor(-0.6183)), ('The cat was on the floor.', tensor(-0.6798)), ('The cat sleeps on the couch.', tensor(-0.6830))]] +``` + +Note that by default we enforce the output length to match the input length. +This can be disabled by setting ``match_source_len=False``: +``` +bart.fill_mask(['The cat on the .'], topk=3, beam=10, match_source_len=False) +# [[('The cat was on the ground.', tensor(-0.6185)), ('The cat was asleep on the couch.', tensor(-0.6276)), ('The cat was on the floor.', tensor(-0.6800))]] +``` + +Example code to fill masks for a batch of sentences using GPU +``` +bart.cuda() +bart.fill_mask(['The cat on the .', 'The dog on the .'], topk=3, beam=10) +# [[('The cat was on the ground.', tensor(-0.6183)), ('The cat was on the floor.', tensor(-0.6798)), ('The cat sleeps on the couch.', tensor(-0.6830))], [('The dog was on the ground.', tensor(-0.6190)), ('The dog lay on the ground.', tensor(-0.6711)), +('The dog was asleep on the couch', tensor(-0.6796))]] +``` + +#### Evaluating the `bart.large.mnli` model: + +Example python code snippet to evaluate accuracy on the MNLI `dev_matched` set. +```python +label_map = {0: 'contradiction', 1: 'neutral', 2: 'entailment'} +ncorrect, nsamples = 0, 0 +bart.cuda() +bart.eval() +with open('glue_data/MNLI/dev_matched.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[8], tokens[9], tokens[-1] + tokens = bart.encode(sent1, sent2) + prediction = bart.predict('mnli', tokens).argmax().item() + prediction_label = label_map[prediction] + ncorrect += int(prediction_label == target) + nsamples += 1 + print('| Accuracy: ', float(ncorrect)/float(nsamples)) +# Expected output: 0.9010 +``` + +#### Evaluating the `bart.large.cnn` model: +- Follow instructions [here](https://github.com/abisee/cnn-dailymail) to download and process into data-files such that `test.source` and `test.target` has one line for each non-tokenized sample. +- For simpler preprocessing, you can also `wget https://cdn-datasets.huggingface.co/summarization/cnn_dm_v2.tgz`, although there is no guarantee of identical scores +- `huggingface/transformers` has a simpler interface that supports [single-gpu](https://github.com/huggingface/transformers/blob/master/examples/legacy/seq2seq/run_eval.py) and [multi-gpu](https://github.com/huggingface/transformers/blob/master/examples/legacy/seq2seq/run_distributed_eval.py) beam search. + In `huggingface/transformers`, the BART models' paths are `facebook/bart-large-cnn` and `facebook/bart-large-xsum`. + +In `fairseq`, summaries can be generated using: + +```bash +cp data-bin/cnn_dm/dict.source.txt checkpoints/ +python examples/bart/summarize.py \ + --model-dir pytorch/fairseq \ + --model-file bart.large.cnn \ + --src cnn_dm/test.source \ + --out cnn_dm/test.hypo +``` + +For calculating rouge, install `files2rouge` from [here](https://github.com/pltrdy/files2rouge). + +```bash +export CLASSPATH=/path/to/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0.jar + +# Tokenize hypothesis and target files. +cat test.hypo | java edu.stanford.nlp.process.PTBTokenizer -ioFileList -preserveLines > test.hypo.tokenized +cat test.target | java edu.stanford.nlp.process.PTBTokenizer -ioFileList -preserveLines > test.hypo.target +files2rouge test.hypo.tokenized test.hypo.target +# Expected output: (ROUGE-2 Average_F: 0.21238) +``` + + +## Finetuning + +- [Finetuning on GLUE](README.glue.md) +- [Finetuning on CNN-DM](README.summarization.md) + +## Citation + +```bibtex +@article{lewis2019bart, + title = {BART: Denoising Sequence-to-Sequence Pre-training for Natural +Language Generation, Translation, and Comprehension}, + author = {Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and + Abdelrahman Mohamed and Omer Levy and Veselin Stoyanov + and Luke Zettlemoyer }, + journal={arXiv preprint arXiv:1910.13461}, + year = {2019}, +} +``` diff --git a/fairseq/examples/bart/README.summarization.md b/fairseq/examples/bart/README.summarization.md new file mode 100644 index 0000000..8727584 --- /dev/null +++ b/fairseq/examples/bart/README.summarization.md @@ -0,0 +1,102 @@ +# Fine-tuning BART on CNN-Dailymail summarization task + +### 1) Download the CNN and Daily Mail data and preprocess it into data files with non-tokenized cased samples. + +Follow the instructions [here](https://github.com/abisee/cnn-dailymail) to download the original CNN and Daily Mail datasets. To preprocess the data, refer to the pointers in [this issue](https://github.com/pytorch/fairseq/issues/1391) or check out the code [here](https://github.com/artmatsak/cnn-dailymail). + +Follow the instructions [here](https://github.com/EdinburghNLP/XSum) to download the original Extreme Summarization datasets, or check out the code [here](https://github.com/EdinburghNLP/XSum/tree/master/XSum-Dataset), Please keep the raw dataset and make sure no tokenization nor BPE on the dataset. + +### 2) BPE preprocess: + +```bash +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt' + +TASK=cnn_dm +for SPLIT in train val +do + for LANG in source target + do + python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json encoder.json \ + --vocab-bpe vocab.bpe \ + --inputs "$TASK/$SPLIT.$LANG" \ + --outputs "$TASK/$SPLIT.bpe.$LANG" \ + --workers 60 \ + --keep-empty; + done +done +``` + +### 3) Binarize dataset: +```bash +fairseq-preprocess \ + --source-lang "source" \ + --target-lang "target" \ + --trainpref "${TASK}/train.bpe" \ + --validpref "${TASK}/val.bpe" \ + --destdir "${TASK}-bin/" \ + --workers 60 \ + --srcdict dict.txt \ + --tgtdict dict.txt; +``` + +### 4) Fine-tuning on CNN-DM summarization task: +Example fine-tuning CNN-DM +```bash +TOTAL_NUM_UPDATES=20000 +WARMUP_UPDATES=500 +LR=3e-05 +MAX_TOKENS=2048 +UPDATE_FREQ=4 +BART_PATH=/path/to/bart/model.pt + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train cnn_dm-bin \ + --restore-file $BART_PATH \ + --max-tokens $MAX_TOKENS \ + --task translation \ + --source-lang source --target-lang target \ + --truncate-source \ + --layernorm-embedding \ + --share-all-embeddings \ + --share-decoder-input-output-embed \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --arch bart_large \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \ + --clip-norm 0.1 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --update-freq $UPDATE_FREQ \ + --skip-invalid-size-inputs-valid-test \ + --find-unused-parameters; +``` +Above is expected to run on `1` node with `8 32gb-V100`. +Expected training time is about `5 hours`. Training time can be reduced with distributed training on `4` nodes and `--update-freq 1`. + +Use TOTAL_NUM_UPDATES=15000 UPDATE_FREQ=2 for Xsum task + +### Inference for CNN-DM test data using above trained checkpoint. +After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using `eval_cnn.py`, for example + +```bash +cp data-bin/cnn_dm/dict.source.txt checkpoints/ +python examples/bart/summarize.py \ + --model-dir checkpoints \ + --model-file checkpoint_best.pt \ + --src cnn_dm/test.source \ + --out cnn_dm/test.hypo +``` +For XSUM, which uses beam=6, lenpen=1.0, max_len_b=60, min_len=10: +```bash +cp data-bin/cnn_dm/dict.source.txt checkpoints/ +python examples/bart/summarize.py \ + --model-dir checkpoints \ + --model-file checkpoint_best.pt \ + --src cnn_dm/test.source \ + --out cnn_dm/test.hypo \ + --xsum-kwargs +``` diff --git a/fairseq/examples/bart/summarize.py b/fairseq/examples/bart/summarize.py new file mode 100644 index 0000000..04435f8 --- /dev/null +++ b/fairseq/examples/bart/summarize.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq.models.bart import BARTModel +import argparse + +XSUM_KWARGS = dict(beam=6, lenpen=1.0, max_len_b=60, min_len=10, no_repeat_ngram_size=3) +CNN_KWARGS = dict(beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3) + + +@torch.no_grad() +def generate(bart, infile, outfile="bart_hypo.txt", bsz=32, n_obs=None, **eval_kwargs): + count = 1 + + # if n_obs is not None: bsz = min(bsz, n_obs) + + with open(infile) as source, open(outfile, "w") as fout: + sline = source.readline().strip() + slines = [sline] + for sline in source: + if n_obs is not None and count > n_obs: + break + if count % bsz == 0: + hypotheses_batch = bart.sample(slines, **eval_kwargs) + for hypothesis in hypotheses_batch: + fout.write(hypothesis + "\n") + fout.flush() + slines = [] + + slines.append(sline.strip()) + count += 1 + + if slines != []: + hypotheses_batch = bart.sample(slines, **eval_kwargs) + for hypothesis in hypotheses_batch: + fout.write(hypothesis + "\n") + fout.flush() + + +def main(): + """ + Usage:: + + python examples/bart/summarize.py \ + --model-dir $HOME/bart.large.cnn \ + --model-file model.pt \ + --src $HOME/data-bin/cnn_dm/test.source + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--model-dir", + required=True, + type=str, + default="bart.large.cnn/", + help="path containing model file and src_dict.txt", + ) + parser.add_argument( + "--model-file", + default="checkpoint_best.pt", + help="where in model_dir are weights saved", + ) + parser.add_argument( + "--src", default="test.source", help="text to summarize", type=str + ) + parser.add_argument( + "--out", default="test.hypo", help="where to save summaries", type=str + ) + parser.add_argument("--bsz", default=32, help="where to save summaries", type=int) + parser.add_argument( + "--n", default=None, help="how many examples to summarize", type=int + ) + parser.add_argument( + "--xsum-kwargs", + action="store_true", + default=False, + help="if true use XSUM_KWARGS else CNN_KWARGS", + ) + args = parser.parse_args() + eval_kwargs = XSUM_KWARGS if args.xsum_kwargs else CNN_KWARGS + if args.model_dir == "pytorch/fairseq": + bart = torch.hub.load("pytorch/fairseq", args.model_file) + else: + bart = BARTModel.from_pretrained( + args.model_dir, + checkpoint_file=args.model_file, + data_name_or_path=args.model_dir, + ) + bart = bart.eval() + if torch.cuda.is_available(): + bart = bart.cuda().half() + generate( + bart, args.src, bsz=args.bsz, n_obs=args.n, outfile=args.out, **eval_kwargs + ) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/byte_level_bpe/README.md b/fairseq/examples/byte_level_bpe/README.md new file mode 100644 index 0000000..6570926 --- /dev/null +++ b/fairseq/examples/byte_level_bpe/README.md @@ -0,0 +1,88 @@ +# Neural Machine Translation with Byte-Level Subwords + +https://arxiv.org/abs/1909.03341 + +We provide an implementation of byte-level byte-pair encoding (BBPE), taking IWSLT 2017 Fr-En translation as +example. + +## Data +Get data and generate fairseq binary dataset: +```bash +bash ./get_data.sh +``` + +## Model Training +Train Transformer model with Bi-GRU embedding contextualization (implemented in `gru_transformer.py`): +```bash +# VOCAB=bytes +# VOCAB=chars +VOCAB=bbpe2048 +# VOCAB=bpe2048 +# VOCAB=bbpe4096 +# VOCAB=bpe4096 +# VOCAB=bpe16384 +``` +```bash +fairseq-train "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \ + --arch gru_transformer --encoder-layers 2 --decoder-layers 2 --dropout 0.3 --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9, 0.98)' \ + --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --log-format 'simple' --log-interval 100 --save-dir "checkpoints/${VOCAB}" \ + --batch-size 100 --max-update 100000 --update-freq 2 +``` + +## Generation +`fairseq-generate` requires bytes (BBPE) decoder to convert byte-level representation back to characters: +```bash +# BPE=--bpe bytes +# BPE=--bpe characters +BPE=--bpe byte_bpe --sentencepiece-model-path data/spm_bbpe2048.model +# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe2048.model +# BPE=--bpe byte_bpe --sentencepiece-model-path data/spm_bbpe4096.model +# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe4096.model +# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe16384.model +``` + +```bash +fairseq-generate "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \ + --source-lang fr --gen-subset test --sacrebleu --path "checkpoints/${VOCAB}/checkpoint_last.pt" \ + --tokenizer moses --moses-target-lang en ${BPE} +``` +When using `fairseq-interactive`, bytes (BBPE) encoder/decoder is required to tokenize input data and detokenize model predictions: +```bash +fairseq-interactive "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \ + --path "checkpoints/${VOCAB}/checkpoint_last.pt" --input data/test.fr --tokenizer moses --moses-source-lang fr \ + --moses-target-lang en ${BPE} --buffer-size 1000 --max-tokens 10000 +``` + +## Results +| Vocabulary | Model | BLEU | +|:-------------:|:-------------:|:-------------:| +| Joint BPE 16k ([Kudo, 2018](https://arxiv.org/abs/1804.10959)) | 512d LSTM 2+2 | 33.81 | +| Joint BPE 16k | Transformer base 2+2 (w/ GRU) | 36.64 (36.72) | +| Joint BPE 4k | Transformer base 2+2 (w/ GRU) | 35.49 (36.10) | +| Joint BBPE 4k | Transformer base 2+2 (w/ GRU) | 35.61 (35.82) | +| Joint BPE 2k | Transformer base 2+2 (w/ GRU) | 34.87 (36.13) | +| Joint BBPE 2k | Transformer base 2+2 (w/ GRU) | 34.98 (35.43) | +| Characters | Transformer base 2+2 (w/ GRU) | 31.78 (33.30) | +| Bytes | Transformer base 2+2 (w/ GRU) | 31.57 (33.62) | + + +## Citation +``` +@misc{wang2019neural, + title={Neural Machine Translation with Byte-Level Subwords}, + author={Changhan Wang and Kyunghyun Cho and Jiatao Gu}, + year={2019}, + eprint={1909.03341}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + +## Contact +Changhan Wang ([changhan@fb.com](mailto:changhan@fb.com)), +Kyunghyun Cho ([kyunghyuncho@fb.com](mailto:kyunghyuncho@fb.com)), +Jiatao Gu ([jgu@fb.com](mailto:jgu@fb.com)) diff --git a/fairseq/examples/byte_level_bpe/get_bitext.py b/fairseq/examples/byte_level_bpe/get_bitext.py new file mode 100644 index 0000000..6ac1eee --- /dev/null +++ b/fairseq/examples/byte_level_bpe/get_bitext.py @@ -0,0 +1,254 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import argparse +import os +import os.path as op +from collections import namedtuple +from multiprocessing import cpu_count +from typing import List, Optional + +import sentencepiece as sp +from fairseq.data.encoders.byte_bpe import ByteBPE +from fairseq.data.encoders.byte_utils import byte_encode +from fairseq.data.encoders.bytes import Bytes +from fairseq.data.encoders.characters import Characters +from fairseq.data.encoders.moses_tokenizer import MosesTokenizer +from fairseq.data.encoders.sentencepiece_bpe import SentencepieceBPE + + +SPLITS = ["train", "valid", "test"] + + +def _convert_xml(in_path: str, out_path: str): + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + ss = s.strip() + if not ss.startswith("", "").split('">') + assert len(ss) == 2 + f_o.write(ss[1].strip() + "\n") + + +def _convert_train(in_path: str, out_path: str): + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + ss = s.strip() + if ss.startswith("<"): + continue + f_o.write(ss.strip() + "\n") + + +def _get_bytes(in_path: str, out_path: str): + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + f_o.write(Bytes.encode(s.strip()) + "\n") + + +def _get_chars(in_path: str, out_path: str): + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + f_o.write(Characters.encode(s.strip()) + "\n") + + +def pretokenize(in_path: str, out_path: str, src: str, tgt: str): + Args = namedtuple( + "Args", + [ + "moses_source_lang", + "moses_target_lang", + "moses_no_dash_splits", + "moses_no_escape", + ], + ) + args = Args( + moses_source_lang=src, + moses_target_lang=tgt, + moses_no_dash_splits=False, + moses_no_escape=False, + ) + pretokenizer = MosesTokenizer(args) + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + f_o.write(pretokenizer.encode(s.strip()) + "\n") + + +def _convert_to_bchar(in_path_prefix: str, src: str, tgt: str, out_path: str): + with open(out_path, "w") as f_o: + for lang in [src, tgt]: + with open(f"{in_path_prefix}.{lang}") as f: + for s in f: + f_o.write(byte_encode(s.strip()) + "\n") + + +def _get_bpe(in_path: str, model_prefix: str, vocab_size: int): + arguments = [ + f"--input={in_path}", + f"--model_prefix={model_prefix}", + f"--model_type=bpe", + f"--vocab_size={vocab_size}", + "--character_coverage=1.0", + "--normalization_rule_name=identity", + f"--num_threads={cpu_count()}", + ] + sp.SentencePieceTrainer.Train(" ".join(arguments)) + + +def _apply_bbpe(model_path: str, in_path: str, out_path: str): + Args = namedtuple("Args", ["sentencepiece_model_path"]) + args = Args(sentencepiece_model_path=model_path) + tokenizer = ByteBPE(args) + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + f_o.write(tokenizer.encode(s.strip()) + "\n") + + +def _apply_bpe(model_path: str, in_path: str, out_path: str): + Args = namedtuple("Args", ["sentencepiece_model"]) + args = Args(sentencepiece_model=model_path) + tokenizer = SentencepieceBPE(args) + with open(in_path) as f, open(out_path, "w") as f_o: + for s in f: + f_o.write(tokenizer.encode(s.strip()) + "\n") + + +def _concat_files(in_paths: List[str], out_path: str): + with open(out_path, "w") as f_o: + for p in in_paths: + with open(p) as f: + for r in f: + f_o.write(r) + + +def preprocess_iwslt17( + root: str, + src: str, + tgt: str, + bpe_size: Optional[int], + need_chars: bool, + bbpe_size: Optional[int], + need_bytes: bool, +): + # extract bitext + in_root = op.join(root, f"{src}-{tgt}") + for lang in [src, tgt]: + _convert_train( + op.join(in_root, f"train.tags.{src}-{tgt}.{lang}"), + op.join(root, f"train.{lang}"), + ) + _convert_xml( + op.join(in_root, f"IWSLT17.TED.dev2010.{src}-{tgt}.{lang}.xml"), + op.join(root, f"valid.{lang}"), + ) + _convert_xml( + op.join(in_root, f"IWSLT17.TED.tst2015.{src}-{tgt}.{lang}.xml"), + op.join(root, f"test.{lang}"), + ) + # pre-tokenize + for lang in [src, tgt]: + for split in SPLITS: + pretokenize( + op.join(root, f"{split}.{lang}"), + op.join(root, f"{split}.moses.{lang}"), + src, + tgt, + ) + # tokenize with BPE vocabulary + if bpe_size is not None: + # learn vocabulary + concated_train_path = op.join(root, "train.all") + _concat_files( + [op.join(root, "train.moses.fr"), op.join(root, "train.moses.en")], + concated_train_path, + ) + bpe_model_prefix = op.join(root, f"spm_bpe{bpe_size}") + _get_bpe(concated_train_path, bpe_model_prefix, bpe_size) + os.remove(concated_train_path) + # apply + for lang in [src, tgt]: + for split in SPLITS: + _apply_bpe( + bpe_model_prefix + ".model", + op.join(root, f"{split}.moses.{lang}"), + op.join(root, f"{split}.moses.bpe{bpe_size}.{lang}"), + ) + # tokenize with bytes vocabulary + if need_bytes: + for lang in [src, tgt]: + for split in SPLITS: + _get_bytes( + op.join(root, f"{split}.moses.{lang}"), + op.join(root, f"{split}.moses.bytes.{lang}"), + ) + # tokenize with characters vocabulary + if need_chars: + for lang in [src, tgt]: + for split in SPLITS: + _get_chars( + op.join(root, f"{split}.moses.{lang}"), + op.join(root, f"{split}.moses.chars.{lang}"), + ) + # tokenize with byte-level BPE vocabulary + if bbpe_size is not None: + # learn vocabulary + bchar_path = op.join(root, "train.bchar") + _convert_to_bchar(op.join(root, "train.moses"), src, tgt, bchar_path) + bbpe_model_prefix = op.join(root, f"spm_bbpe{bbpe_size}") + _get_bpe(bchar_path, bbpe_model_prefix, bbpe_size) + os.remove(bchar_path) + # apply + for lang in [src, tgt]: + for split in SPLITS: + _apply_bbpe( + bbpe_model_prefix + ".model", + op.join(root, f"{split}.moses.{lang}"), + op.join(root, f"{split}.moses.bbpe{bbpe_size}.{lang}"), + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--root", type=str, default="data") + parser.add_argument( + "--bpe-vocab", + default=None, + type=int, + help="Generate tokenized bitext with BPE of size K." + "Default to None (disabled).", + ) + parser.add_argument( + "--bbpe-vocab", + default=None, + type=int, + help="Generate tokenized bitext with BBPE of size K." + "Default to None (disabled).", + ) + parser.add_argument( + "--byte-vocab", + action="store_true", + help="Generate tokenized bitext with bytes vocabulary", + ) + parser.add_argument( + "--char-vocab", + action="store_true", + help="Generate tokenized bitext with chars vocabulary", + ) + args = parser.parse_args() + + preprocess_iwslt17( + args.root, + "fr", + "en", + args.bpe_vocab, + args.char_vocab, + args.bbpe_vocab, + args.byte_vocab, + ) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/byte_level_bpe/get_data.sh b/fairseq/examples/byte_level_bpe/get_data.sh new file mode 100644 index 0000000..c3d55d4 --- /dev/null +++ b/fairseq/examples/byte_level_bpe/get_data.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +PY_BIN_ROOT= + +# PyPI dependency +${PY_BIN_ROOT}pip install sentencepiece sacremoses + +# Get data +if [ ! -d "data" ]; then + mkdir data +fi + +if [ ! -f "data/fr-en.tgz" ]; then + wget https://wit3.fbk.eu/archive/2017-01-trnted/texts/fr/en/fr-en.tgz -P data + tar xvf data/fr-en.tgz -C data +fi +${PY_BIN_ROOT}python get_bitext.py --bpe-vocab 16384 --byte-vocab --char-vocab +for VOCAB_SIZE in 2048 4096; do + ${PY_BIN_ROOT}python get_bitext.py --bpe-vocab ${VOCAB_SIZE} --bbpe-vocab ${VOCAB_SIZE} +done +rm -r data/fr-en data/fr-en.tgz + +# Generate binary dataset +${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_bpe16384 --joined-dictionary \ + --workers "$(nproc)" --trainpref data/train.moses.bpe16384 --validpref data/valid.moses.bpe16384 \ + --testpref data/test.moses.bpe16384 + +${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_bytes --joined-dictionary \ + --workers "$(nproc)" --trainpref data/train.moses.bytes --validpref data/valid.moses.bytes \ + --testpref data/test.moses.bytes + +${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_chars --joined-dictionary \ + --workers "$(nproc)" --trainpref data/train.moses.chars --validpref data/valid.moses.chars \ + --testpref data/test.moses.chars + +for VOCAB_SIZE in 2048 4096; do + for TYPE in bbpe bpe; do + ${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir "data/bin_${TYPE}${VOCAB_SIZE}" \ + --joined-dictionary --workers "$(nproc)" --trainpref "data/train.moses.${TYPE}${VOCAB_SIZE}" \ + --validpref "data/valid.moses.${TYPE}${VOCAB_SIZE}" --testpref "data/test.moses.${TYPE}${VOCAB_SIZE}" + done +done diff --git a/fairseq/examples/byte_level_bpe/gru_transformer.py b/fairseq/examples/byte_level_bpe/gru_transformer.py new file mode 100644 index 0000000..d4efa93 --- /dev/null +++ b/fairseq/examples/byte_level_bpe/gru_transformer.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import torch.nn.functional as F +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer import TransformerEncoder, TransformerModel + + +@register_model("gru_transformer") +class GRUTransformerModel(TransformerModel): + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return GRUTransformerEncoder(args, src_dict, embed_tokens) + + +class GRUTransformerEncoder(TransformerEncoder): + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens) + self.emb_ctx = nn.GRU( + input_size=embed_tokens.embedding_dim, + hidden_size=embed_tokens.embedding_dim // 2, + num_layers=1, + bidirectional=True, + ) + + def forward_embedding(self, src_tokens): + # embed tokens and positions + x = embed = self.embed_scale * self.embed_tokens(src_tokens) + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + + # contextualize embeddings + x = x.transpose(0, 1) + x = self.dropout_module(x) + x, _ = self.emb_ctx.forward(x) + x = x.transpose(0, 1) + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + return x, embed + + +@register_model_architecture("gru_transformer", "gru_transformer") +def gru_transformer_base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.no_cross_attention = getattr(args, "no_cross_attention", False) + args.cross_self_attention = getattr(args, "cross_self_attention", False) + args.layer_wise_attention = getattr(args, "layer_wise_attention", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + + +@register_model_architecture("gru_transformer", "gru_transformer_big") +def gru_transformer_big(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.3) + gru_transformer_base_architecture(args) diff --git a/fairseq/examples/camembert/README.md b/fairseq/examples/camembert/README.md new file mode 100644 index 0000000..5ef4fe3 --- /dev/null +++ b/fairseq/examples/camembert/README.md @@ -0,0 +1,75 @@ +# CamemBERT: a Tasty French Language Model + +## Introduction + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a pretrained language model trained on 138GB of French text based on RoBERTa. + +Also available in [github.com/huggingface/transformers](https://github.com/huggingface/transformers/). + +## Pre-trained models + +| Model | #params | Download | Arch. | Training data | +|--------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------|-------|-----------------------------------| +| `camembert` / `camembert-base` | 110M | [camembert-base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz) | Base | OSCAR (138 GB of text) | +| `camembert-large` | 335M | [camembert-large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-large.tar.gz) | Large | CCNet (135 GB of text) | +| `camembert-base-ccnet` | 110M | [camembert-base-ccnet.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet.tar.gz) | Base | CCNet (135 GB of text) | +| `camembert-base-wikipedia-4gb` | 110M | [camembert-base-wikipedia-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-wikipedia-4gb.tar.gz) | Base | Wikipedia (4 GB of text) | +| `camembert-base-oscar-4gb` | 110M | [camembert-base-oscar-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-oscar-4gb.tar.gz) | Base | Subsample of OSCAR (4 GB of text) | +| `camembert-base-ccnet-4gb` | 110M | [camembert-base-ccnet-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet-4gb.tar.gz) | Base | Subsample of CCNet (4 GB of text) | + +## Example usage + +### fairseq +##### Load CamemBERT from torch.hub (PyTorch >= 1.1): +```python +import torch +camembert = torch.hub.load('pytorch/fairseq', 'camembert') +camembert.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Load CamemBERT (for PyTorch 1.0 or custom models): +```python +# Download camembert model +wget https://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz +tar -xzvf camembert.tar.gz + +# Load the model in fairseq +from fairseq.models.roberta import CamembertModel +camembert = CamembertModel.from_pretrained('/path/to/camembert') +camembert.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Filling masks: +```python +masked_line = 'Le camembert est :)' +camembert.fill_mask(masked_line, topk=3) +# [('Le camembert est délicieux :)', 0.4909118115901947, ' délicieux'), +# ('Le camembert est excellent :)', 0.10556942224502563, ' excellent'), +# ('Le camembert est succulent :)', 0.03453322499990463, ' succulent')] +``` + +##### Extract features from Camembert: +```python +# Extract the last layer's features +line = "J'aime le camembert !" +tokens = camembert.encode(line) +last_layer_features = camembert.extract_features(tokens) +assert last_layer_features.size() == torch.Size([1, 10, 768]) + +# Extract all layer's features (layer 0 is the embedding layer) +all_layers = camembert.extract_features(tokens, return_all_hiddens=True) +assert len(all_layers) == 13 +assert torch.all(all_layers[-1] == last_layer_features) +``` + +## Citation +If you use our work, please cite: + +```bibtex +@inproceedings{martin2020camembert, + title={CamemBERT: a Tasty French Language Model}, + author={Martin, Louis and Muller, Benjamin and Su{\'a}rez, Pedro Javier Ortiz and Dupont, Yoann and Romary, Laurent and de la Clergerie, {\'E}ric Villemonte and Seddah, Djam{\'e} and Sagot, Beno{\^\i}t}, + booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + year={2020} +} +``` diff --git a/fairseq/examples/constrained_decoding/README.md b/fairseq/examples/constrained_decoding/README.md new file mode 100644 index 0000000..e04b8b6 --- /dev/null +++ b/fairseq/examples/constrained_decoding/README.md @@ -0,0 +1,123 @@ +# (Vectorized) Lexically constrained decoding with dynamic beam allocation + +This page provides instructions for how to use lexically constrained decoding in Fairseq. +Fairseq implements the code described in the following papers: + +* [Fast Lexically Constrained Decoding With Dynamic Beam Allocation](https://www.aclweb.org/anthology/N18-1119/) (Post & Vilar, 2018) +* [Improved Lexically Constrained Decoding for Translation and Monolingual Rewriting](https://www.aclweb.org/anthology/N19-1090/) (Hu et al., 2019) + +## Quick start + +Constrained search is enabled by adding the command-line argument `--constraints` to `fairseq-interactive`. +Constraints are appended to each line of input, separated by tabs. Each constraint (one or more tokens) +is a separate field. + +The following command, using [Fairseq's WMT19 German--English model](https://github.com/pytorch/fairseq/blob/main/examples/wmt19/README.md), +translates the sentence *Die maschinelle Übersetzung ist schwer zu kontrollieren.* with the constraints +"hard" and "to influence". + + echo -e "Die maschinelle Übersetzung ist schwer zu kontrollieren.\thard\ttoinfluence" \ + | normalize.py | tok.py \ + | fairseq-interactive /path/to/model \ + --path /path/to/model/model1.pt \ + --bpe fastbpe \ + --bpe-codes /path/to/model/bpecodes \ + --constraints \ + -s de -t en \ + --beam 10 + +(tok.py and normalize.py can be found in the same directory as this README; they are just shortcuts around Fairseq's WMT19 preprocessing). +This will generate the following output: + + [snip] + S-0 Die masch@@ in@@ elle Über@@ setzung ist schwer zu kontrollieren . + W-0 1.844 seconds + C-0 hard + C-0 influence + H-0 -1.5333266258239746 Mach@@ ine trans@@ lation is hard to influence . + D-0 -1.5333266258239746 Machine translation is hard to influence . + P-0 -0.5434 -0.1423 -0.1930 -0.1415 -0.2346 -1.8031 -0.1701 -11.7727 -0.1815 -0.1511 + +By default, constraints are generated in the order supplied, with any number (zero or more) of tokens generated +between constraints. If you wish for the decoder to order the constraints, then use `--constraints unordered`. +Note that you may want to use a larger beam. + +## Implementation details + +The heart of the implementation is in `fairseq/search.py`, which adds a `LexicallyConstrainedBeamSearch` instance. +This instance of beam search tracks the progress of each hypothesis in the beam through the set of constraints +provided for each input sentence. It does this using one of two classes, both found in `fairseq/token_generation_contstraints.py`: + +* OrderedConstraintState: assumes the `C` input constraints will be generated in the provided order +* UnorderedConstraintState: tries to apply `C` (phrasal) constraints in all `C!` orders + +## Differences from Sockeye + +There are a number of [differences from Sockeye's implementation](https://awslabs.github.io/sockeye/inference.html#lexical-constraints). + +* Generating constraints in the order supplied (the default option here) is not available in Sockeye. +* Due to an improved beam allocation method, there is no need to prune the beam. +* Again due to better allocation, beam sizes as low as 10 or even 5 are often sufficient. +* [The vector extensions described in Hu et al.](https://github.com/edwardjhu/sockeye/tree/trie_constraints) (NAACL 2019) were never merged + into the main Sockeye branch. + +## Citation + +The paper first describing lexical constraints for seq2seq decoding is: + +```bibtex +@inproceedings{hokamp-liu-2017-lexically, + title = "Lexically Constrained Decoding for Sequence Generation Using Grid Beam Search", + author = "Hokamp, Chris and + Liu, Qun", + booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", + month = jul, + year = "2017", + address = "Vancouver, Canada", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/P17-1141", + doi = "10.18653/v1/P17-1141", + pages = "1535--1546", +} +``` + +The fairseq implementation uses the extensions described in + +```bibtex +@inproceedings{post-vilar-2018-fast, + title = "Fast Lexically Constrained Decoding with Dynamic Beam Allocation for Neural Machine Translation", + author = "Post, Matt and + Vilar, David", + booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", + month = jun, + year = "2018", + address = "New Orleans, Louisiana", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/N18-1119", + doi = "10.18653/v1/N18-1119", + pages = "1314--1324", +} +``` + +and + +```bibtex +@inproceedings{hu-etal-2019-improved, + title = "Improved Lexically Constrained Decoding for Translation and Monolingual Rewriting", + author = "Hu, J. Edward and + Khayrallah, Huda and + Culkin, Ryan and + Xia, Patrick and + Chen, Tongfei and + Post, Matt and + Van Durme, Benjamin", + booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)", + month = jun, + year = "2019", + address = "Minneapolis, Minnesota", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/N19-1090", + doi = "10.18653/v1/N19-1090", + pages = "839--850", +} +``` diff --git a/fairseq/examples/constrained_decoding/normalize.py b/fairseq/examples/constrained_decoding/normalize.py new file mode 100644 index 0000000..4ae2b51 --- /dev/null +++ b/fairseq/examples/constrained_decoding/normalize.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +from sacremoses.normalize import MosesPunctNormalizer + + +def main(args): + normalizer = MosesPunctNormalizer(lang=args.lang, penn=args.penn) + for line in sys.stdin: + print(normalizer.normalize(line.rstrip()), flush=True) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--lang", "-l", default="en") + parser.add_argument("--penn", "-p", action="store_true") + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/constrained_decoding/tok.py b/fairseq/examples/constrained_decoding/tok.py new file mode 100644 index 0000000..b1f888a --- /dev/null +++ b/fairseq/examples/constrained_decoding/tok.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +import sacremoses + + +def main(args): + """Tokenizes, preserving tabs""" + mt = sacremoses.MosesTokenizer(lang=args.lang) + + def tok(s): + return mt.tokenize(s, return_str=True) + + for line in sys.stdin: + parts = list(map(tok, line.split("\t"))) + print(*parts, sep="\t", flush=True) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--lang", "-l", default="en") + parser.add_argument("--penn", "-p", action="store_true") + parser.add_argument("--fields", "-f", help="fields to tokenize") + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/conv_seq2seq/README.md b/fairseq/examples/conv_seq2seq/README.md new file mode 100644 index 0000000..95fe7e7 --- /dev/null +++ b/fairseq/examples/conv_seq2seq/README.md @@ -0,0 +1,25 @@ +# Convolutional Sequence to Sequence Learning (Gehring et al., 2017) + +## Pre-trained models + +Description | Dataset | Model | Test set(s) +---|---|---|--- +Convolutional
([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2) | newstest2014:
[download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2)
newstest2012/2013:
[download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.ntst1213.tar.bz2) +Convolutional
([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-German](http://statmt.org/wmt14/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2) | newstest2014:
[download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-de.newstest2014.tar.bz2) +Convolutional
([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT17 English-German](http://statmt.org/wmt17/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2) | newstest2014:
[download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.v2.en-de.newstest2014.tar.bz2) + +## Example usage + +See the [translation README](../translation/README.md) for instructions on reproducing results for WMT'14 En-De and +WMT'14 En-Fr using the `fconv_wmt_en_de` and `fconv_wmt_en_fr` model architectures. + +## Citation + +```bibtex +@inproceedings{gehring2017convs2s, + title = {Convolutional Sequence to Sequence Learning}, + author = {Gehring, Jonas, and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N}, + booktitle = {Proc. of ICML}, + year = 2017, +} +``` diff --git a/fairseq/examples/criss/README.md b/fairseq/examples/criss/README.md new file mode 100644 index 0000000..4689ed7 --- /dev/null +++ b/fairseq/examples/criss/README.md @@ -0,0 +1,61 @@ +# Cross-lingual Retrieval for Iterative Self-Supervised Training + +https://arxiv.org/pdf/2006.09526.pdf + +## Introduction + +CRISS is a multilingual sequence-to-sequnce pretraining method where mining and training processes are applied iteratively, improving cross-lingual alignment and translation ability at the same time. + +## Requirements: + +* faiss: https://github.com/facebookresearch/faiss +* mosesdecoder: https://github.com/moses-smt/mosesdecoder +* flores: https://github.com/facebookresearch/flores +* LASER: https://github.com/facebookresearch/LASER + +## Unsupervised Machine Translation +##### 1. Download and decompress CRISS checkpoints +``` +cd examples/criss +wget https://dl.fbaipublicfiles.com/criss/criss_3rd_checkpoints.tar.gz +tar -xf criss_checkpoints.tar.gz +``` +##### 2. Download and preprocess Flores test dataset +Make sure to run all scripts from examples/criss directory +``` +bash download_and_preprocess_flores_test.sh +``` + +##### 3. Run Evaluation on Sinhala-English +``` +bash unsupervised_mt/eval.sh +``` + +## Sentence Retrieval +##### 1. Download and preprocess Tatoeba dataset +``` +bash download_and_preprocess_tatoeba.sh +``` + +##### 2. Run Sentence Retrieval on Tatoeba Kazakh-English +``` +bash sentence_retrieval/sentence_retrieval_tatoeba.sh +``` + +## Mining +##### 1. Install faiss +Follow instructions on https://github.com/facebookresearch/faiss/blob/master/INSTALL.md +##### 2. Mine pseudo-parallel data between Kazakh and English +``` +bash mining/mine_example.sh +``` + +## Citation +```bibtex +@article{tran2020cross, + title={Cross-lingual retrieval for iterative self-supervised training}, + author={Tran, Chau and Tang, Yuqing and Li, Xian and Gu, Jiatao}, + journal={arXiv preprint arXiv:2006.09526}, + year={2020} +} +``` diff --git a/fairseq/examples/criss/download_and_preprocess_flores_test.sh b/fairseq/examples/criss/download_and_preprocess_flores_test.sh new file mode 100644 index 0000000..ed4b390 --- /dev/null +++ b/fairseq/examples/criss/download_and_preprocess_flores_test.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +SPM_ENCODE=flores/scripts/spm_encode.py +DATA=data_tmp +SPM_MODEL=criss_checkpoints/sentence.bpe.model +DICT=criss_checkpoints/dict.txt + +download_data() { + CORPORA=$1 + URL=$2 + + if [ -f $CORPORA ]; then + echo "$CORPORA already exists, skipping download" + else + echo "Downloading $URL" + wget $URL -O $CORPORA --no-check-certificate || rm -f $CORPORA + if [ -f $CORPORA ]; then + echo "$URL successfully downloaded." + else + echo "$URL not successfully downloaded." + rm -f $CORPORA + fi + fi +} + +if [[ -f flores ]]; then + echo "flores already cloned" +else + git clone https://github.com/facebookresearch/flores +fi + +mkdir -p $DATA +download_data $DATA/wikipedia_en_ne_si_test_sets.tgz "https://github.com/facebookresearch/flores/raw/master/data/wikipedia_en_ne_si_test_sets.tgz" +pushd $DATA +pwd +tar -vxf wikipedia_en_ne_si_test_sets.tgz +popd + + +for lang in ne_NP si_LK; do + datadir=$DATA/${lang}-en_XX-flores + rm -rf $datadir + mkdir -p $datadir + TEST_PREFIX=$DATA/wikipedia_en_ne_si_test_sets/wikipedia.test + python $SPM_ENCODE \ + --model ${SPM_MODEL} \ + --output_format=piece \ + --inputs ${TEST_PREFIX}.${lang:0:2}-en.${lang:0:2} ${TEST_PREFIX}.${lang:0:2}-en.en \ + --outputs $datadir/test.bpe.${lang}-en_XX.${lang} $datadir/test.bpe.${lang}-en_XX.en_XX + + # binarize data + fairseq-preprocess \ + --source-lang ${lang} --target-lang en_XX \ + --testpref $datadir/test.bpe.${lang}-en_XX \ + --destdir $datadir \ + --srcdict ${DICT} \ + --joined-dictionary \ + --workers 4 +done diff --git a/fairseq/examples/criss/download_and_preprocess_tatoeba.sh b/fairseq/examples/criss/download_and_preprocess_tatoeba.sh new file mode 100644 index 0000000..7ed64f0 --- /dev/null +++ b/fairseq/examples/criss/download_and_preprocess_tatoeba.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +SPM_ENCODE=flores/scripts/spm_encode.py +DATA=data_tmp +SPM_MODEL=criss_checkpoints/sentence.bpe.model +DICT=criss_checkpoints/dict.txt + +if [[ -f flores ]]; then + echo "flores already cloned" +else + git clone https://github.com/facebookresearch/flores +fi +if [[ -f LASER ]]; then + echo "LASER already cloned" +else + git clone https://github.com/facebookresearch/LASER +fi +mkdir -p data_tmp +declare -A lang_tatoeba_map=( ["ar_AR"]="ara" ["de_DE"]="deu" ["es_XX"]="spa" ["et_EE"]="est" ["fi_FI"]="fin" ["fr_XX"]="fra" ["hi_IN"]="hin" ["it_IT"]="ita" ["ja_XX"]="jpn" ["ko_KR"]="kor" ["kk_KZ"]="kaz" ["nl_XX"]="nld" ["ru_RU"]="rus" ["tr_TR"]="tur" ["vi_VN"]="vie" ["zh_CN"]="cmn") +for lang in ar_AR de_DE es_XX et_EE fi_FI fr_XX hi_IN it_IT ja_XX kk_KZ ko_KR nl_XX ru_RU tr_TR vi_VN zh_CN; do + lang_tatoeba=${lang_tatoeba_map[$lang]} + echo $lang_tatoeba + datadir=$DATA/${lang}-en_XX-tatoeba + rm -rf $datadir + mkdir -p $datadir + TEST_PREFIX=LASER/data/tatoeba/v1/tatoeba + python $SPM_ENCODE \ + --model ${SPM_MODEL} \ + --output_format=piece \ + --inputs ${TEST_PREFIX}.${lang_tatoeba}-eng.${lang_tatoeba} ${TEST_PREFIX}.${lang_tatoeba}-eng.eng \ + --outputs $datadir/test.bpe.${lang}-en_XX.${lang} $datadir/test.bpe.${lang}-en_XX.en_XX + + # binarize data + fairseq-preprocess \ + --source-lang ${lang} --target-lang en_XX \ + --testpref $datadir/test.bpe.${lang}-en_XX \ + --destdir $datadir \ + --srcdict ${DICT} \ + --joined-dictionary \ + --workers 4 +done diff --git a/fairseq/examples/criss/mining/mine.py b/fairseq/examples/criss/mining/mine.py new file mode 100644 index 0000000..c872da1 --- /dev/null +++ b/fairseq/examples/criss/mining/mine.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import argparse +import glob +from subprocess import check_call + +try: + import faiss + + has_faiss = True +except ImportError: + has_faiss = False +import numpy as np + + +GB = 1024 * 1024 * 1024 + + +def call(cmd): + print(cmd) + check_call(cmd, shell=True) + + +def get_batches(directory, lang, prefix="all_avg_pool"): + print(f"Finding in {directory}/{prefix}.{lang}*") + files = glob.glob(f"{directory}/{prefix}.{lang}*") + emb_files = [] + txt_files = [] + for emb_fi in files: + emb_files.append(emb_fi) + txt_fi = emb_fi.replace(prefix, "sentences") + txt_files.append(txt_fi) + return emb_files, txt_files + + +def load_batch(emb_file, dim): + embeddings = np.fromfile(emb_file, dtype=np.float32) + num_rows = int(embeddings.shape[0] / dim) + embeddings = embeddings.reshape((num_rows, dim)) + faiss.normalize_L2(embeddings) + return embeddings + + +def knnGPU_sharded(x_batches_f, y_batches_f, dim, k, direction="x2y"): + if not has_faiss: + raise ImportError("Please install Faiss") + sims = [] + inds = [] + xfrom = 0 + xto = 0 + for x_batch_f in x_batches_f: + yfrom = 0 + yto = 0 + x_batch = load_batch(x_batch_f, dim) + xto = xfrom + x_batch.shape[0] + bsims, binds = [], [] + for y_batch_f in y_batches_f: + y_batch = load_batch(y_batch_f, dim) + neighbor_size = min(k, y_batch.shape[0]) + yto = yfrom + y_batch.shape[0] + print("{}-{} -> {}-{}".format(xfrom, xto, yfrom, yto)) + idx = faiss.IndexFlatIP(dim) + idx = faiss.index_cpu_to_all_gpus(idx) + idx.add(y_batch) + bsim, bind = idx.search(x_batch, neighbor_size) + + bsims.append(bsim) + binds.append(bind + yfrom) + yfrom += y_batch.shape[0] + del idx + del y_batch + bsims = np.concatenate(bsims, axis=1) + binds = np.concatenate(binds, axis=1) + aux = np.argsort(-bsims, axis=1) + sim_batch = np.zeros((x_batch.shape[0], k), dtype=np.float32) + ind_batch = np.zeros((x_batch.shape[0], k), dtype=np.int64) + for i in range(x_batch.shape[0]): + for j in range(k): + sim_batch[i, j] = bsims[i, aux[i, j]] + ind_batch[i, j] = binds[i, aux[i, j]] + sims.append(sim_batch) + inds.append(ind_batch) + xfrom += x_batch.shape[0] + del x_batch + sim = np.concatenate(sims, axis=0) + ind = np.concatenate(inds, axis=0) + return sim, ind + + +def score(sim, fwd_mean, bwd_mean, margin): + return margin(sim, (fwd_mean + bwd_mean) / 2) + + +def score_candidates( + sim_mat, candidate_inds, fwd_mean, bwd_mean, margin, verbose=False +): + print(" - scoring {:d} candidates".format(sim_mat.shape[0])) + scores = np.zeros(candidate_inds.shape) + for i in range(scores.shape[0]): + for j in range(scores.shape[1]): + k = int(candidate_inds[i, j]) + scores[i, j] = score(sim_mat[i, j], fwd_mean[i], bwd_mean[k], margin) + return scores + + +def load_text(files): + all_sentences = [] + for fi in files: + with open(fi) as sentence_fi: + for line in sentence_fi: + all_sentences.append(line.strip()) + print(f"Read {len(all_sentences)} sentences") + return all_sentences + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Mine bitext") + parser.add_argument("--src-lang", help="Source language") + parser.add_argument("--tgt-lang", help="Target language") + parser.add_argument( + "--dict-path", help="Path to dictionary file", default="dict.txt" + ) + parser.add_argument( + "--spm-path", help="Path to SPM model file", default="sentence.bpe.model" + ) + parser.add_argument("--dim", type=int, default=1024, help="Embedding dimension") + parser.add_argument("--mem", type=int, default=5, help="Memory in GB") + parser.add_argument("--src-dir", help="Source directory") + parser.add_argument("--tgt-dir", help="Target directory") + parser.add_argument("--output", help="Output path") + parser.add_argument( + "--neighborhood", type=int, default=4, help="Embedding dimension" + ) + parser.add_argument( + "--threshold", type=float, default=1.06, help="Threshold on mined bitext" + ) + parser.add_argument( + "--valid-size", + type=int, + default=2000, + help="Number of sentences used for validation set", + ) + parser.add_argument( + "--min-count", + type=int, + default=50000, + help="Min num sentences used for each language", + ) + args = parser.parse_args() + + x_batches_f, x_sents_f = get_batches(args.src_dir, args.src_lang) + y_batches_f, y_sents_f = get_batches(args.tgt_dir, args.tgt_lang) + margin = lambda a, b: a / b + y2x_sim, y2x_ind = knnGPU_sharded( + y_batches_f, x_batches_f, args.dim, args.neighborhood, direction="y2x" + ) + x2y_sim, x2y_ind = knnGPU_sharded( + x_batches_f, y_batches_f, args.dim, args.neighborhood, direction="x2y" + ) + + x2y_mean = x2y_sim.mean(axis=1) + y2x_mean = y2x_sim.mean(axis=1) + fwd_scores = score_candidates(x2y_sim, x2y_ind, x2y_mean, y2x_mean, margin) + bwd_scores = score_candidates(y2x_sim, y2x_ind, y2x_mean, x2y_mean, margin) + fwd_best = x2y_ind[np.arange(x2y_sim.shape[0]), fwd_scores.argmax(axis=1)] + bwd_best = y2x_ind[np.arange(y2x_sim.shape[0]), bwd_scores.argmax(axis=1)] + indices = np.stack( + ( + np.concatenate((np.arange(x2y_ind.shape[0]), bwd_best)), + np.concatenate((fwd_best, np.arange(y2x_ind.shape[0]))), + ), + axis=1, + ) + scores = np.concatenate((fwd_scores.max(axis=1), bwd_scores.max(axis=1))) + + x_sentences = load_text(x_sents_f) + y_sentences = load_text(y_sents_f) + + threshold = args.threshold + min_count = args.min_count + seen_src, seen_trg = set(), set() + directory = args.output + call(f"mkdir -p {directory}") + src_out = open( + f"{directory}/all.{args.src_lang}", + mode="w", + encoding="utf-8", + errors="surrogateescape", + ) + tgt_out = open( + f"{directory}/all.{args.tgt_lang}", + mode="w", + encoding="utf-8", + errors="surrogateescape", + ) + scores_out = open( + f"{directory}/all.scores", mode="w", encoding="utf-8", errors="surrogateescape" + ) + count = 0 + for i in np.argsort(-scores): + src_ind, trg_ind = indices[i] + if src_ind not in seen_src and trg_ind not in seen_trg: + seen_src.add(src_ind) + seen_trg.add(trg_ind) + if scores[i] > threshold or count < min_count: + if x_sentences[src_ind]: + print(scores[i], file=scores_out) + print(x_sentences[src_ind], file=src_out) + print(y_sentences[trg_ind], file=tgt_out) + count += 1 + else: + print(f"Ignoring sentence: {x_sentences[src_ind]}") + src_out.close() + tgt_out.close() + scores_out.close() + + print(f"Found {count} pairs for threshold={threshold}") + with open(f"{directory}/all.{args.src_lang}") as all_s, open( + f"{directory}/all.{args.tgt_lang}" + ) as all_t, open(f"{directory}/valid.{args.src_lang}", "w") as valid_s, open( + f"{directory}/valid.{args.tgt_lang}", "w" + ) as valid_t, open( + f"{directory}/train.{args.src_lang}", "w" + ) as train_s, open( + f"{directory}/train.{args.tgt_lang}", "w" + ) as train_t: + count = 0 + for s_line, t_line in zip(all_s, all_t): + s_line = s_line.split("\t")[1] + t_line = t_line.split("\t")[1] + if count >= args.valid_size: + train_s.write(s_line) + train_t.write(t_line) + else: + valid_s.write(s_line) + valid_t.write(t_line) + count += 1 diff --git a/fairseq/examples/criss/mining/mine_example.sh b/fairseq/examples/criss/mining/mine_example.sh new file mode 100644 index 0000000..ace995a --- /dev/null +++ b/fairseq/examples/criss/mining/mine_example.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# +source_lang=kk_KZ +target_lang=en_XX +MODEL=criss_checkpoints/criss.3rd.pt +SPM=criss_checkpoints/sentence.bpe.model +SPLIT=test +LANG_DICT=criss_checkpoints/lang_dict.txt +SPM_ENCODE=flores/scripts/spm_encode.py +SAVE_ENCODER=save_encoder.py +ENCODER_SAVE_ROOT=sentence_embeddings/$MODEL +DICT=criss_checkpoints/dict.txt +THRESHOLD=1.02 +MIN_COUNT=500 + +DATA_DIR=data_tmp +SAVE_DIR=mining/${source_lang}_${target_lang}_mined +ENCODER_SAVE_DIR=${ENCODER_SAVE_ROOT}/${source_lang}-${target_lang} +INPUT_DIR=$DATA_DIR/${source_lang}-${target_lang}-tatoeba + +mkdir -p $ENCODER_SAVE_DIR/${target_lang} +mkdir -p $ENCODER_SAVE_DIR/${source_lang} +mkdir -p $SAVE_DIR + +## Save encoder outputs + +# Save encoder outputs for source sentences +python $SAVE_ENCODER \ + ${INPUT_DIR} \ + --path ${MODEL} \ + --task translation_multi_simple_epoch \ + --lang-pairs ${source_lang}-${target_lang} \ + --lang-dict ${LANG_DICT} \ + --gen-subset ${SPLIT} \ + --bpe 'sentencepiece' \ + -s ${source_lang} -t ${target_lang} \ + --sentencepiece-model ${SPM} \ + --remove-bpe 'sentencepiece' \ + --beam 1 \ + --lang-tok-style mbart \ + --encoder-save-dir ${ENCODER_SAVE_DIR}/${source_lang} + +## Save encoder outputs for target sentences +python $SAVE_ENCODER \ + ${INPUT_DIR} \ + --path ${MODEL} \ + --lang-pairs ${source_lang}-${target_lang} \ + --lang-dict ${LANG_DICT} \ + --task translation_multi_simple_epoch \ + --gen-subset ${SPLIT} \ + --bpe 'sentencepiece' \ + -t ${source_lang} -s ${target_lang} \ + --sentencepiece-model ${SPM} \ + --remove-bpe 'sentencepiece' \ + --beam 1 \ + --lang-tok-style mbart \ + --encoder-save-dir ${ENCODER_SAVE_DIR}/${target_lang} + +## Mining +python mining/mine.py \ + --src-lang ${source_lang} \ + --tgt-lang ${target_lang} \ + --dim 1024 \ + --mem 10 \ + --neighborhood 4 \ + --src-dir ${ENCODER_SAVE_DIR}/${source_lang} \ + --tgt-dir ${ENCODER_SAVE_DIR}/${target_lang} \ + --output $SAVE_DIR \ + --threshold ${THRESHOLD} \ + --min-count ${MIN_COUNT} \ + --valid-size 100 \ + --dict-path ${DICT} \ + --spm-path ${SPM} \ + + +## Process and binarize mined data +python $SPM_ENCODE \ + --model ${SPM} \ + --output_format=piece \ + --inputs mining/${source_lang}_${target_lang}_mined/train.${source_lang} mining/${source_lang}_${target_lang}_mined/train.${target_lang} \ + --outputs mining/${source_lang}_${target_lang}_mined/train.bpe.${source_lang} mining/${source_lang}_${target_lang}_mined/train.bpe.${target_lang} + +python $SPM_ENCODE \ + --model ${SPM} \ + --output_format=piece \ + --inputs mining/${source_lang}_${target_lang}_mined/valid.${source_lang} mining/${source_lang}_${target_lang}_mined/valid.${target_lang} \ + --outputs mining/${source_lang}_${target_lang}_mined/valid.bpe.${source_lang} mining/${source_lang}_${target_lang}_mined/valid.bpe.${target_lang} + + +fairseq-preprocess \ + --source-lang ${source_lang} \ + --target-lang ${target_lang} \ + --trainpref mining/${source_lang}_${target_lang}_mined/train.bpe \ + --validpref mining/${source_lang}_${target_lang}_mined/valid.bpe \ + --destdir mining/${source_lang}_${target_lang}_mined \ + --srcdict ${DICT} \ + --joined-dictionary \ + --workers 8 diff --git a/fairseq/examples/criss/save_encoder.py b/fairseq/examples/criss/save_encoder.py new file mode 100644 index 0000000..24a842e --- /dev/null +++ b/fairseq/examples/criss/save_encoder.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Translate pre-processed data with a trained model. +""" + +import numpy as np +import torch +from fairseq import checkpoint_utils, options, progress_bar, tasks, utils +from fairseq.sequence_generator import EnsembleModel +from fairseq.utils import safe_hasattr + + +def get_avg_pool( + models, sample, prefix_tokens, src_dict, remove_bpe, has_langtok=False +): + model = EnsembleModel(models) + + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens" + } + + # compute the encoder output for each beam + encoder_outs = model.forward_encoder(encoder_input) + np_encoder_outs = encoder_outs[0].encoder_out.cpu().numpy().astype(np.float32) + encoder_mask = 1 - encoder_outs[0].encoder_padding_mask.cpu().numpy().astype( + np.float32 + ) + encoder_mask = np.expand_dims(encoder_mask.T, axis=2) + if has_langtok: + encoder_mask = encoder_mask[1:, :, :] + np_encoder_outs = np_encoder_outs[1, :, :] + masked_encoder_outs = encoder_mask * np_encoder_outs + avg_pool = (masked_encoder_outs / encoder_mask.sum(axis=0)).sum(axis=0) + return avg_pool + + +def main(args): + assert args.path is not None, "--path required for generation!" + assert ( + not args.sampling or args.nbest == args.beam + ), "--sampling requires --nbest to be equal to --beam" + assert ( + args.replace_unk is None or args.raw_text + ), "--replace-unk requires a raw text dataset (--raw-text)" + + args.beam = 1 + utils.import_user_module(args) + + if args.max_tokens is None: + args.max_tokens = 12000 + print(args) + use_cuda = torch.cuda.is_available() and not args.cpu + + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + + # Set dictionaries + try: + src_dict = getattr(task, "source_dictionary", None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + # Load ensemble + print("| loading model(s) from {}".format(args.path)) + models, _model_args = checkpoint_utils.load_model_ensemble( + args.path.split(":"), + arg_overrides=eval(args.model_overrides), + task=task, + ) + + # Optimize ensemble for generation + for model in models: + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if args.fp16: + model.half() + if use_cuda: + model.cuda() + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(args.replace_unk) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(args.gen_subset), + max_tokens=args.max_tokens, + max_positions=utils.resolve_max_positions( + task.max_positions(), + ), + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=args.required_batch_size_multiple, + num_shards=args.num_shards, + shard_id=args.shard_id, + num_workers=args.num_workers, + ).next_epoch_itr(shuffle=False) + + num_sentences = 0 + source_sentences = [] + shard_id = 0 + all_avg_pool = None + encoder_has_langtok = ( + safe_hasattr(task.args, "encoder_langtok") + and task.args.encoder_langtok is not None + and safe_hasattr(task.args, "lang_tok_replacing_bos_eos") + and not task.args.lang_tok_replacing_bos_eos + ) + with progress_bar.build_progress_bar(args, itr) as t: + for sample in t: + if sample is None: + print("Skipping None") + continue + sample = utils.move_to_cuda(sample) if use_cuda else sample + if "net_input" not in sample: + continue + + prefix_tokens = None + if args.prefix_size > 0: + prefix_tokens = sample["target"][:, : args.prefix_size] + + with torch.no_grad(): + avg_pool = get_avg_pool( + models, + sample, + prefix_tokens, + src_dict, + args.post_process, + has_langtok=encoder_has_langtok, + ) + if all_avg_pool is not None: + all_avg_pool = np.concatenate((all_avg_pool, avg_pool)) + else: + all_avg_pool = avg_pool + + if not isinstance(sample["id"], list): + sample_ids = sample["id"].tolist() + else: + sample_ids = sample["id"] + for i, sample_id in enumerate(sample_ids): + # Remove padding + src_tokens = utils.strip_pad( + sample["net_input"]["src_tokens"][i, :], tgt_dict.pad() + ) + + # Either retrieve the original sentences or regenerate them from tokens. + if align_dict is not None: + src_str = task.dataset(args.gen_subset).src.get_original_text( + sample_id + ) + else: + if src_dict is not None: + src_str = src_dict.string(src_tokens, args.post_process) + else: + src_str = "" + + if not args.quiet: + if src_dict is not None: + print("S-{}\t{}".format(sample_id, src_str)) + + source_sentences.append(f"{sample_id}\t{src_str}") + + num_sentences += sample["nsentences"] + if all_avg_pool.shape[0] >= 1000000: + with open( + f"{args.encoder_save_dir}/all_avg_pool.{args.source_lang}.{shard_id}", + "w", + ) as avg_pool_file: + all_avg_pool.tofile(avg_pool_file) + with open( + f"{args.encoder_save_dir}/sentences.{args.source_lang}.{shard_id}", + "w", + ) as sentence_file: + sentence_file.writelines(f"{line}\n" for line in source_sentences) + all_avg_pool = None + source_sentences = [] + shard_id += 1 + + if all_avg_pool is not None: + with open( + f"{args.encoder_save_dir}/all_avg_pool.{args.source_lang}.{shard_id}", "w" + ) as avg_pool_file: + all_avg_pool.tofile(avg_pool_file) + with open( + f"{args.encoder_save_dir}/sentences.{args.source_lang}.{shard_id}", "w" + ) as sentence_file: + sentence_file.writelines(f"{line}\n" for line in source_sentences) + return None + + +def cli_main(): + parser = options.get_generation_parser() + parser.add_argument( + "--encoder-save-dir", + default="", + type=str, + metavar="N", + help="directory to save encoder outputs", + ) + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py b/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py new file mode 100644 index 0000000..b41bfbe --- /dev/null +++ b/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import argparse +import glob + +import numpy as np + + +DIM = 1024 + + +def compute_dist(source_embs, target_embs, k=5, return_sim_mat=False): + target_ids = [tid for tid in target_embs] + source_mat = np.stack(source_embs.values(), axis=0) + normalized_source_mat = source_mat / np.linalg.norm( + source_mat, axis=1, keepdims=True + ) + target_mat = np.stack(target_embs.values(), axis=0) + normalized_target_mat = target_mat / np.linalg.norm( + target_mat, axis=1, keepdims=True + ) + sim_mat = normalized_source_mat.dot(normalized_target_mat.T) + if return_sim_mat: + return sim_mat + neighbors_map = {} + for i, sentence_id in enumerate(source_embs): + idx = np.argsort(sim_mat[i, :])[::-1][:k] + neighbors_map[sentence_id] = [target_ids[tid] for tid in idx] + return neighbors_map + + +def load_embeddings(directory, LANGS): + sentence_embeddings = {} + sentence_texts = {} + for lang in LANGS: + sentence_embeddings[lang] = {} + sentence_texts[lang] = {} + lang_dir = f"{directory}/{lang}" + embedding_files = glob.glob(f"{lang_dir}/all_avg_pool.{lang}.*") + for embed_file in embedding_files: + shard_id = embed_file.split(".")[-1] + embeddings = np.fromfile(embed_file, dtype=np.float32) + num_rows = embeddings.shape[0] // DIM + embeddings = embeddings.reshape((num_rows, DIM)) + + with open(f"{lang_dir}/sentences.{lang}.{shard_id}") as sentence_file: + for idx, line in enumerate(sentence_file): + sentence_id, sentence = line.strip().split("\t") + sentence_texts[lang][sentence_id] = sentence + sentence_embeddings[lang][sentence_id] = embeddings[idx, :] + + return sentence_embeddings, sentence_texts + + +def compute_accuracy(directory, LANGS): + sentence_embeddings, sentence_texts = load_embeddings(directory, LANGS) + + top_1_accuracy = {} + + top1_str = " ".join(LANGS) + "\n" + for source_lang in LANGS: + top_1_accuracy[source_lang] = {} + top1_str += f"{source_lang} " + for target_lang in LANGS: + top1 = 0 + top5 = 0 + neighbors_map = compute_dist( + sentence_embeddings[source_lang], sentence_embeddings[target_lang] + ) + for sentence_id, neighbors in neighbors_map.items(): + if sentence_id == neighbors[0]: + top1 += 1 + if sentence_id in neighbors[:5]: + top5 += 1 + n = len(sentence_embeddings[target_lang]) + top1_str += f"{top1/n} " + top1_str += "\n" + + print(top1_str) + print(top1_str, file=open(f"{directory}/accuracy", "w")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Analyze encoder outputs") + parser.add_argument("directory", help="Source language corpus") + parser.add_argument("--langs", help="List of langs") + args = parser.parse_args() + langs = args.langs.split(",") + compute_accuracy(args.directory, langs) diff --git a/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh b/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh new file mode 100644 index 0000000..0428d8b --- /dev/null +++ b/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# +source_lang=kk_KZ +target_lang=en_XX +MODEL=criss_checkpoints/criss.3rd.pt +SPM=criss_checkpoints/sentence.bpe.model +SPLIT=test +LANG_DICT=criss_checkpoints/lang_dict.txt +ENCODER_ANALYSIS=sentence_retrieval/encoder_analysis.py +SAVE_ENCODER=save_encoder.py +ENCODER_SAVE_ROOT=sentence_embeddings/$MODEL + + + +DATA_DIR=data_tmp +INPUT_DIR=$DATA_DIR/${source_lang}-${target_lang}-tatoeba +ENCODER_SAVE_DIR=${ENCODER_SAVE_ROOT}/${source_lang}-${target_lang} +mkdir -p $ENCODER_SAVE_DIR/${target_lang} +mkdir -p $ENCODER_SAVE_DIR/${source_lang} + +# Save encoder outputs for source sentences +python $SAVE_ENCODER \ + ${INPUT_DIR} \ + --path ${MODEL} \ + --task translation_multi_simple_epoch \ + --lang-dict ${LANG_DICT} \ + --gen-subset ${SPLIT} \ + --bpe 'sentencepiece' \ + --lang-pairs ${source_lang}-${target_lang} \ + -s ${source_lang} -t ${target_lang} \ + --sentencepiece-model ${SPM} \ + --remove-bpe 'sentencepiece' \ + --beam 1 \ + --lang-tok-style mbart \ + --encoder-save-dir ${ENCODER_SAVE_DIR}/${source_lang} + +# Save encoder outputs for target sentences +python $SAVE_ENCODER \ + ${INPUT_DIR} \ + --path ${MODEL} \ + --lang-dict ${LANG_DICT} \ + --task translation_multi_simple_epoch \ + --gen-subset ${SPLIT} \ + --bpe 'sentencepiece' \ + --lang-pairs ${target_lang}-${source_lang} \ + -t ${source_lang} -s ${target_lang} \ + --sentencepiece-model ${SPM} \ + --remove-bpe 'sentencepiece' \ + --beam 1 \ + --lang-tok-style mbart \ + --encoder-save-dir ${ENCODER_SAVE_DIR}/${target_lang} + +# Analyze sentence retrieval accuracy +python $ENCODER_ANALYSIS --langs "${source_lang},${target_lang}" ${ENCODER_SAVE_DIR} diff --git a/fairseq/examples/criss/unsupervised_mt/eval.sh b/fairseq/examples/criss/unsupervised_mt/eval.sh new file mode 100644 index 0000000..03b773e --- /dev/null +++ b/fairseq/examples/criss/unsupervised_mt/eval.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# +SRC=si_LK +TGT=en_XX +MODEL=criss_checkpoints/criss.3rd.pt + +MULTIBLEU=mosesdecoder/scripts/generic/multi-bleu.perl +MOSES=mosesdecoder +REPLACE_UNICODE_PUNCT=$MOSES/scripts/tokenizer/replace-unicode-punctuation.perl +NORM_PUNC=$MOSES/scripts/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$MOSES/scripts/tokenizer/remove-non-printing-char.perl +TOKENIZER=$MOSES/scripts/tokenizer/tokenizer.perl +GEN_TMP_DIR=gen_tmp +LANG_DICT=criss_checkpoints/lang_dict.txt + +if [ ! -d "mosesdecoder" ]; then + git clone https://github.com/moses-smt/mosesdecoder +fi +mkdir -p $GEN_TMP_DIR +fairseq-generate data_tmp/${SRC}-${TGT}-flores \ + --task translation_multi_simple_epoch \ + --max-tokens 2000 \ + --path ${MODEL} \ + --skip-invalid-size-inputs-valid-test \ + --beam 5 --lenpen 1.0 --gen-subset test \ + --remove-bpe=sentencepiece \ + --source-lang ${SRC} --target-lang ${TGT} \ + --decoder-langtok --lang-pairs 'en_XX-ar_AR,en_XX-de_DE,en_XX-es_XX,en_XX-fr_XX,en_XX-hi_IN,en_XX-it_IT,en_XX-ja_XX,en_XX-ko_KR,en_XX-nl_XX,en_XX-ru_RU,en_XX-zh_CN,en_XX-tr_TR,en_XX-vi_VN,en_XX-ro_RO,en_XX-my_MM,en_XX-ne_NP,en_XX-si_LK,en_XX-cs_CZ,en_XX-lt_LT,en_XX-kk_KZ,en_XX-gu_IN,en_XX-fi_FI,en_XX-et_EE,en_XX-lv_LV,ar_AR-en_XX,cs_CZ-en_XX,de_DE-en_XX,es_XX-en_XX,et_EE-en_XX,fi_FI-en_XX,fr_XX-en_XX,gu_IN-en_XX,hi_IN-en_XX,it_IT-en_XX,ja_XX-en_XX,kk_KZ-en_XX,ko_KR-en_XX,lt_LT-en_XX,lv_LV-en_XX,my_MM-en_XX,ne_NP-en_XX,nl_XX-en_XX,ro_RO-en_XX,ru_RU-en_XX,si_LK-en_XX,tr_TR-en_XX,vi_VN-en_XX,zh_CN-en_XX,ar_AR-es_XX,es_XX-ar_AR,ar_AR-hi_IN,hi_IN-ar_AR,ar_AR-zh_CN,zh_CN-ar_AR,cs_CZ-es_XX,es_XX-cs_CZ,cs_CZ-hi_IN,hi_IN-cs_CZ,cs_CZ-zh_CN,zh_CN-cs_CZ,de_DE-es_XX,es_XX-de_DE,de_DE-hi_IN,hi_IN-de_DE,de_DE-zh_CN,zh_CN-de_DE,es_XX-hi_IN,hi_IN-es_XX,es_XX-zh_CN,zh_CN-es_XX,et_EE-es_XX,es_XX-et_EE,et_EE-hi_IN,hi_IN-et_EE,et_EE-zh_CN,zh_CN-et_EE,fi_FI-es_XX,es_XX-fi_FI,fi_FI-hi_IN,hi_IN-fi_FI,fi_FI-zh_CN,zh_CN-fi_FI,fr_XX-es_XX,es_XX-fr_XX,fr_XX-hi_IN,hi_IN-fr_XX,fr_XX-zh_CN,zh_CN-fr_XX,gu_IN-es_XX,es_XX-gu_IN,gu_IN-hi_IN,hi_IN-gu_IN,gu_IN-zh_CN,zh_CN-gu_IN,hi_IN-zh_CN,zh_CN-hi_IN,it_IT-es_XX,es_XX-it_IT,it_IT-hi_IN,hi_IN-it_IT,it_IT-zh_CN,zh_CN-it_IT,ja_XX-es_XX,es_XX-ja_XX,ja_XX-hi_IN,hi_IN-ja_XX,ja_XX-zh_CN,zh_CN-ja_XX,kk_KZ-es_XX,es_XX-kk_KZ,kk_KZ-hi_IN,hi_IN-kk_KZ,kk_KZ-zh_CN,zh_CN-kk_KZ,ko_KR-es_XX,es_XX-ko_KR,ko_KR-hi_IN,hi_IN-ko_KR,ko_KR-zh_CN,zh_CN-ko_KR,lt_LT-es_XX,es_XX-lt_LT,lt_LT-hi_IN,hi_IN-lt_LT,lt_LT-zh_CN,zh_CN-lt_LT,lv_LV-es_XX,es_XX-lv_LV,lv_LV-hi_IN,hi_IN-lv_LV,lv_LV-zh_CN,zh_CN-lv_LV,my_MM-es_XX,es_XX-my_MM,my_MM-hi_IN,hi_IN-my_MM,my_MM-zh_CN,zh_CN-my_MM,ne_NP-es_XX,es_XX-ne_NP,ne_NP-hi_IN,hi_IN-ne_NP,ne_NP-zh_CN,zh_CN-ne_NP,nl_XX-es_XX,es_XX-nl_XX,nl_XX-hi_IN,hi_IN-nl_XX,nl_XX-zh_CN,zh_CN-nl_XX,ro_RO-es_XX,es_XX-ro_RO,ro_RO-hi_IN,hi_IN-ro_RO,ro_RO-zh_CN,zh_CN-ro_RO,ru_RU-es_XX,es_XX-ru_RU,ru_RU-hi_IN,hi_IN-ru_RU,ru_RU-zh_CN,zh_CN-ru_RU,si_LK-es_XX,es_XX-si_LK,si_LK-hi_IN,hi_IN-si_LK,si_LK-zh_CN,zh_CN-si_LK,tr_TR-es_XX,es_XX-tr_TR,tr_TR-hi_IN,hi_IN-tr_TR,tr_TR-zh_CN,zh_CN-tr_TR,vi_VN-es_XX,es_XX-vi_VN,vi_VN-hi_IN,hi_IN-vi_VN,vi_VN-zh_CN,zh_CN-vi_VN' \ + --lang-dict ${LANG_DICT} --lang-tok-style 'mbart' --sampling-method 'temperature' --sampling-temperature '1.0' > $GEN_TMP_DIR/${SRC}_${TGT}.gen +cat $GEN_TMP_DIR/${SRC}_${TGT}.gen | grep -P "^T-" | cut -f2 | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l ${TGT:0:2} | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape ${TGT:0:2} > $GEN_TMP_DIR/${SRC}_${TGT}.hyp +cat $GEN_TMP_DIR/${SRC}_${TGT}.gen | grep -P "^H-" | cut -f3 | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l ${TGT:0:2} | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape ${TGT:0:2} > $GEN_TMP_DIR/${SRC}_${TGT}.ref +${MULTIBLEU} $GEN_TMP_DIR/${SRC}_${TGT}.ref < $GEN_TMP_DIR/${SRC}_${TGT}.hyp diff --git a/fairseq/examples/cross_lingual_language_model/README.md b/fairseq/examples/cross_lingual_language_model/README.md new file mode 100644 index 0000000..af9128e --- /dev/null +++ b/fairseq/examples/cross_lingual_language_model/README.md @@ -0,0 +1,77 @@ +# Cross-Lingual Language Model Pre-training + +Below are some details for training Cross-Lingual Language Models (XLM) - similar to the ones presented in [Lample & Conneau, 2019](https://arxiv.org/pdf/1901.07291.pdf) - in Fairseq. The current implementation only supports the Masked Language Model (MLM) from the paper above. + +## Downloading and Tokenizing Monolingual Data + +Pointers to the monolingual data from wikipedia, used for training the XLM-style MLM model as well as details on processing (tokenization and BPE) it can be found in the [XLM Github Repository](https://github.com/facebookresearch/XLM#download--preprocess-monolingual-data). + +Let's assume the following for the code snippets in later sections to work +- Processed data is in the folder: monolingual_data/processed +- Each language has 3 files for train, test and validation. For example we have the following files for English: + train.en, valid.en +- We are training a model for 5 languages: Arabic (ar), German (de), English (en), Hindi (hi) and French (fr) +- The vocabulary file is monolingual_data/processed/vocab_mlm + + +## Fairseq Pre-processing and Binarization + +Pre-process and binarize the data with the MaskedLMDictionary and cross_lingual_lm task + +```bash +# Ensure the output directory exists +DATA_DIR=monolingual_data/fairseq_processed +mkdir -p "$DATA_DIR" + +for lg in ar de en hi fr +do + + fairseq-preprocess \ + --task cross_lingual_lm \ + --srcdict monolingual_data/processed/vocab_mlm \ + --only-source \ + --trainpref monolingual_data/processed/train \ + --validpref monolingual_data/processed/valid \ + --testpref monolingual_data/processed/test \ + --destdir monolingual_data/fairseq_processed \ + --workers 20 \ + --source-lang $lg + + # Since we only have a source language, the output file has a None for the + # target language. Remove this + + for stage in train test valid + + sudo mv "$DATA_DIR/$stage.$lg-None.$lg.bin" "$stage.$lg.bin" + sudo mv "$DATA_DIR/$stage.$lg-None.$lg.idx" "$stage.$lg.idx" + + done + +done +``` + +## Train a Cross-lingual Language Model similar to the XLM MLM model + +Use the following command to train the model on 5 languages. + +``` +fairseq-train \ +--task cross_lingual_lm monolingual_data/fairseq_processed \ +--save-dir checkpoints/mlm \ +--max-update 2400000 --save-interval 1 --no-epoch-checkpoints \ +--arch xlm_base \ +--optimizer adam --lr-scheduler reduce_lr_on_plateau \ +--lr-shrink 0.5 --lr 0.0001 --stop-min-lr 1e-09 \ +--dropout 0.1 \ +--criterion legacy_masked_lm_loss \ +--max-tokens 2048 --tokens-per-sample 256 --attention-dropout 0.1 \ +--dataset-impl lazy --seed 0 \ +--masked-lm-only \ +--monolingual-langs 'ar,de,en,hi,fr' --num-segment 5 \ +--ddp-backend=legacy_ddp +``` + +Some Notes: +- Using tokens_per_sample greater than 256 can cause OOM (out-of-memory) issues. Usually since MLM packs in streams of text, this parameter doesn't need much tuning. +- The Evaluation workflow for computing MLM Perplexity on test data is in progress. +- Finetuning this model on a downstream task is something which is not currently available. diff --git a/fairseq/examples/data2vec/README.md b/fairseq/examples/data2vec/README.md new file mode 100644 index 0000000..a0ff21b --- /dev/null +++ b/fairseq/examples/data2vec/README.md @@ -0,0 +1,261 @@ +# data2vec 2.0 + +data2vec 2.0 improves the training efficiency of the original data2vec algorithm. We make the following improvements for efficiency considerations - we forward only the unmasked timesteps through the encoder, we use convolutional decoder and we use multimasking to amortize the compute overhead of the teacher model. You can find details in the paper [Efficient Self-supervised Learning with Contextualized Target Representations for Vision, Speech and Language](https://arxiv.org/abs/2212.07525) and our [blog post](https://ai.facebook.com/blog/ai-self-supervised-learning-data2vec/). + +## Pretrained and finetuned models +### Vision +| Model | Finetuning split | Link +|---|---|--- +data2vec ViT-B | No fine-tuning | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/base_imagenet.pt) +data2vec ViT-B | Imagenet-1K | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/base_imagenet_ft.pt) +data2vec ViT-L | No fine-tuning | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/large_imagenet.pt) +data2vec ViT-L | Imagenet-1K | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/large_imagenet_ft.pt) +data2vec ViT-H | No fine-tuning | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/huge_imagenet.pt) +data2vec ViT-H | Imagenet-1K | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/huge_imagenet_ft.pt) + +Vision models only are license under CC-BY-NC. +### Speech + +| Model | Finetuning split | Dataset | Link +|---|---|---|--- +data2vec Base | No fine-tuning | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/base_libri.pt) +data2vec Base | 960 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/base_libri_960h.pt) +data2vec Large | No fine-tuning | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/large_vox.pt) +data2vec Large | 960 hours | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/large_vox_960h.pt) + +### NLP + +| Model | Fine-tuning data | Dataset | Link | Dict | BPE +|---|---|---|---|---|--- +data2vec Base | No fine-tuning | Books + Wiki | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec2/nlp_base.pt) | [dict](https://dl.fbaipublicfiles.com/fairseq/data2vec2/dict.txt) | [encoder](https://dl.fbaipublicfiles.com/fairseq/data2vec2/encoder.json) / [vocab](https://dl.fbaipublicfiles.com/fairseq/data2vec2/vocab.bpe) + +[//]: # (## Data Preparation) + +[//]: # () +[//]: # (### Vision) + +[//]: # (add details) + +[//]: # (### Speech) + +[//]: # (add details) + +[//]: # () +[//]: # (### NLP) + +[//]: # (add details) + + +## Commands to train different models using data2vec 2.0 + +### Vision + +Commands to pretrain different model configurations +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name base_images_only_task task.data=/path/to/dir +``` + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name large_images_only_task task.data=/path/to/dir +``` + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name huge_images14_only_task task.data=/path/to/dir +``` + +Commands to finetune different model configurations + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/vision/finetuning \ +--config-name mae_imagenet_clean task.data=/path/to/dir model.model_path=/path/to/pretrained/model +``` + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/vision/finetuning \ +--config-name mae_imagenet_large_clean task.data=/path/to/dir model.model_path=/path/to/pretrained/model +``` + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/vision/finetuning \ +--config-name mae_imagenet_huge_clean task.data=/path/to/dir model.model_path=/path/to/pretrained/model +``` + +### Speech + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name base_audio_only_task task.data=/path/to/manifests +``` + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name large_audio_only_task task.data=/path/to/manifests +``` + +Finetuning: + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/wav2vec/config/finetuning --config-name vox_10h \ +task.data=/path/to/manifests model.w2v_path=/path/to/pretrained/model common.user_dir=examples/data2vec +``` + +Replace vox_10h with the right config depending on your model and fine-tuning split. +See examples/wav2vec/config/finetuning for all available configs. + +### NLP + +Commands to pretrain +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2 \ +--config-name base_text_only_task task.data=/path/to/file +``` + +Commands to fine-tune all GLUE tasks +```shell script +$ task=cola # choose from [cola|qnli|mrpc|rte|sst_2|mnli|qqp|sts_b] +$ lr=1e-5 # sweep [1e-5|2e-5|4e-5|6e-5] for each task +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/v2/text_finetuning \ +--config-name $task task.data=/path/to/file model.model_path=/path/to/pretrained/model "optimization.lr=[${lr}]" +``` + +# data2vec + +data2vec is a framework for self-supervised representation learning for images, speech, and text as described in [data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language (Baevski et al., 2022)](https://ai.facebook.com/research/data2vec-a-general-framework-for-self-supervised-learning-in-speech-vision-and-language). The algorithm uses the same learning mechanism for different modalities. + + +## Pre-trained models + +### Vision + +Code and pre-trained models for data2vec visions can be found [here](https://github.com/facebookresearch/data2vec_vision/tree/main/beit). + +### Speech + +| Model | Finetuning split | Dataset | Link +|---|---|---|--- +data2vec Base | No fine-tuning | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/audio_base_ls.pt) +data2vec Base | 10 minutes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/audio_base_ls_10m.pt) +data2vec Base | 100 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/audio_base_ls_100h.pt) +data2vec Base | 960 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/audio_base_ls_960h.pt) +data2vec Large | No fine-tuning | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/vox_pretrained.pt) +data2vec Large | 10 minutes | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/vox_10m.pt) +data2vec Large | 100 hours | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/vox_100h.pt) +data2vec Large | 960 hours | [Libri-light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/vox_960h.pt) +--- + +### NLP + +Model | Fine-tuning data | Dataset | Link +|---|---|---|---| +data2vec Base | No fine-tuning | Books + Wiki | [download](https://dl.fbaipublicfiles.com/fairseq/data2vec/nlp_base.pt) + +## Training a new speech model with the CLI tools + +Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate file 10 to 30 seconds in length) + +### Prepare training data manifest: + +First, install the `soundfile` library: +```shell script +pip install soundfile +``` + +Next, run: + +```shell script +$ python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext $ext --valid-percent $valid +``` + +$ext should be set to flac, wav, or whatever format your dataset happens to use that soundfile can read. + +$valid should be set to some reasonable percentage (like 0.01) of training data to use for validation. +To use a pre-defined validation set (like dev-other from librispeech), set to it 0 and then overwrite valid.tsv with a +separately pre-processed manifest file. + +### Train a data2vec Base model: + +This configuration was used for the base model trained on the Librispeech dataset in the data2vec paper + +Note that the input is expected to be single channel, sampled at 16 kHz + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/audio/pretraining \ +--config-name base_librispeech task.data=/path/to/manifests common.user_dir=examples/data2vec +``` + +Note: you can simulate 16 GPUs by using k GPUs and adding command line parameters +`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 16/k + +### Fine-tune a pre-trained model with CTC: + +Fine-tuning a model requires parallel audio and labels file, as well as a vocabulary file in fairseq format. +A letter vocabulary can be downloaded [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt). +An example [script](../wav2vec/libri_labels.py) that generates labels for the Librispeech dataset from the tsv file produced by wav2vec_manifest.py can be used as follows: + +```shell script +split=train +$ python libri_labels.py /path/to/tsv --output-dir /output/dir --output-name $split +``` + +Fine-tuning on 100h of Librispeech with letter targets: +```shell script +$ fairseq-hydra-train \ + distributed_training.distributed_port=$PORT \ + task.data=/path/to/data \ + model.w2v_path=/path/to/model.pt \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/finetuning \ + --config-name base_100h common.user_dir=examples/data2vec +``` + +There are other config files in the config/finetuning directory that can be used to fine-tune on other splits. +You can specify the right config via the `--config-name` parameter. + +Decoding with a language model during training requires flashlight [python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter). +If you want to use a language model, add `+criterion.wer_args='[/path/to/kenlm, /path/to/lexicon, 2, -1]'` to the command line. + +### Evaluating a CTC model: + +Evaluating a CTC model with a language model requires [flashlight python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter) to be installed. + +Fairseq transformer language model used in the wav2vec 2.0 paper can be obtained from the [wav2letter model repository](https://github.com/facebookresearch/wav2letter/tree/master/recipes/sota/2019). +Be sure to upper-case the language model vocab after downloading it. + +Letter dictionary for pre-trained models can be found [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt). + +Next, run the evaluation command: + +```shell script +python examples/speech_recognition/new/infer.py --config-dir examples/speech_recognition/new/conf \ +--config-name infer task=audio_finetuning task.data=/path/to/manifests common.user_dir=examples/data2vec \ +task.labels=ltr decoding.type=kenlm \ +decoding.lmweight=${lmweight} decoding.wordscore=${wordscore} decoding.silweight=${silscore} \ +decoding.lexicon=/path/to/lexicon \ +decoding.lmpath=/path/to/lm decoding.unique_wer_file=True \ +dataset.gen_subset=dev_clean,dev_other,test_clean,test_other \ +common_eval.path=/path/to/checkpoint.pt decoding.beam=1500 distributed_training.distributed_world_size=${num_gpus} +``` + +To get raw numbers, use decoding.type=viterbi and omit the lexicon. To use the transformer language model, use decoding.type=fairseqlm. + +## Training a new NLP model with the CLI tools + +Please follow the [RoBERTa](../roberta/README.md) instructions to preprocess your data. To train a data2vec model on run: + +```shell script +$ python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/text/pretraining \ +--config-name base task.data=/path/to/data common.user_dir=examples/data2vec +``` + +As for speech models, you can simulate 16 gpus by using the update_freq parameter. + +### Finetuning data2vec-text on GLUE + +Please use a command similar to this: + +```shell +$ python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task task.data=$data_path checkpoint.restore_file="${/path/to/pretrained/model.pt}" +``` diff --git a/fairseq/examples/data2vec/__init__.py b/fairseq/examples/data2vec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/data2vec/config/audio/classification/base_classification.yaml b/fairseq/examples/data2vec/config/audio/classification/base_classification.yaml new file mode 100644 index 0000000..fdb9c8d --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/classification/base_classification.yaml @@ -0,0 +1,70 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + all_gather_list_size: 70000 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: mAP + maximize_best_checkpoint_metric: true + +task: + _name: audio_classification + data: ??? + normalize: true + labels: lbl + +dataset: + num_workers: 6 + max_tokens: 2560000 + skip_invalid_size_inputs_valid_test: true + valid_subset: eval + validate_interval: 5 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: model + can_sum: false + log_keys: + - _predictions + - _targets + +optimization: + max_update: 30000 + lr: [0.00006] # scratch 53-5 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 5000 + +model: + _name: audio_classification + model_path: ??? + apply_mask: true + mask_prob: 0.6 + mask_length: 5 # scratch 1 + mask_channel_prob: 0 + mask_channel_length: 64 + layerdrop: 0.1 + dropout: 0.1 + activation_dropout: 0.1 + attention_dropout: 0.2 + feature_grad_mult: 0 # scratch 1 + label_mixup: true + source_mixup: 0.5 + prediction_mode: lin_softmax # scratch average_sigmoid + diff --git a/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1.yaml b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1.yaml new file mode 100644 index 0000000..881a158 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1.yaml @@ -0,0 +1,35 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1g.yaml b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1g.yaml new file mode 100644 index 0000000..de7894d --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_1g.yaml @@ -0,0 +1,35 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 100 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_2.yaml new file mode 100644 index 0000000..b016cac --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/classification/run_config/slurm_2.yaml @@ -0,0 +1,35 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/audioset.yaml b/fairseq/examples/data2vec/config/audio/pretraining/audioset.yaml new file mode 100644 index 0000000..dd30fbe --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/audioset.yaml @@ -0,0 +1,91 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: /private/home/abaevski/data/audioset + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + +dataset: + num_workers: 6 + max_tokens: 3400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 24 + ddp_backend: legacy_ddp + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var +# - avg_self_attn +# - weights + +optimization: + max_update: 200000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: cosine + warmup_updates: 10000 + +model: + _name: data2vec_audio + extractor_mode: layer_norm + encoder_layerdrop: 0.05 + dropout_input: 0.0 + dropout_features: 0.0 + feature_grad_mult: 1.0 + encoder_embed_dim: 768 + + mask_prob: 0.65 + mask_length: 10 + + loss_beta: 0 + loss_scale: null + + instance_norm_target_layer: true + layer_norm_targets: true + average_top_k_layers: 12 + + self_attn_norm_type: deepnorm + final_norm_type: deepnorm + + pos_conv_depth: 5 + conv_pos: 95 + + ema_decay: 0.999 + ema_end_decay: 0.9999 + ema_anneal_end_step: 30000 + ema_transformer_only: true + ema_layers_only: false + + require_same_masks: true + mask_dropout: 0 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/base_librispeech.yaml b/fairseq/examples/data2vec/config/audio/pretraining/base_librispeech.yaml new file mode 100644 index 0000000..c332c5a --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/base_librispeech.yaml @@ -0,0 +1,83 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + +dataset: + num_workers: 6 + max_tokens: 3800000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: legacy_ddp + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + +optimization: + max_update: 400000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.03,0.9,0.07] + +model: + _name: data2vec_audio + extractor_mode: layer_norm + encoder_layerdrop: 0.05 + dropout_input: 0.0 + dropout_features: 0.0 + feature_grad_mult: 1.0 + encoder_embed_dim: 768 + + mask_prob: 0.65 + mask_length: 10 + + loss_beta: 0 + loss_scale: null + + instance_norm_target_layer: true + average_top_k_layers: 8 + + pos_conv_depth: 5 + conv_pos: 95 + + ema_decay: 0.999 + ema_end_decay: 0.9999 + ema_anneal_end_step: 30000 + ema_transformer_only: true + ema_layers_only: true + + require_same_masks: true + mask_dropout: 0 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/local.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1.yaml new file mode 100644 index 0000000..732f018 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1_aws.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..e2bab56 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_1_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2.yaml new file mode 100644 index 0000000..ec53dc2 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2_aws.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..70cc8cb --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_2_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - task.post_save_script + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_3.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_3.yaml new file mode 100644 index 0000000..14b47d1 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4_aws.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4_aws.yaml new file mode 100644 index 0000000..0231b26 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_4_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - task.post_save_script + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_6_aws.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_6_aws.yaml new file mode 100644 index 0000000..9a4e43a --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_6_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 6 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_8_aws.yaml b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_8_aws.yaml new file mode 100644 index 0000000..78c9f57 --- /dev/null +++ b/fairseq/examples/data2vec/config/audio/pretraining/run_config/slurm_8_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/text/pretraining/base.yaml b/fairseq/examples/data2vec/config/text/pretraining/base.yaml new file mode 100644 index 0000000..c6b07c4 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/base.yaml @@ -0,0 +1,77 @@ +# @package _group_ +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + no_epoch_checkpoints: true + save_interval_updates: 50000 + keep_interval_updates: 1 + +distributed_training: + distributed_world_size: 16 + ddp_backend: legacy_ddp + +task: + _name: masked_lm + data: ??? + sample_break_mode: complete_doc + tokens_per_sample: 512 + include_target_tokens: true + random_token_prob: 0 + leave_unmasked_prob: 0 + mask_prob: 0.35 + mask_multiple_length: 4 + +criterion: model + +dataset: + max_tokens: 8192 + ignore_unused_valid_subsets: true + skip_invalid_size_inputs_valid_test: true + +optimizer: + _name: adam + weight_decay: 0.01 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: cosine + warmup_updates: 10000 + +optimization: + clip_norm: 5 + lr: [0.0002] + max_update: 1000000 + update_freq: [1] + +model: + _name: data2vec_text + head_layers: 2 + average_top_k_layers: 10 + layer_norm_target_layer: true + loss_scale: 1 + ema_decay: 0.999 + ema_end_decay: 0.9999 + ema_anneal_end_step: 300000 + loss_beta: 4 + ema_transformer_layers_only: true + + transformer: + dropout: 0.1 + attention_dropout: 0.1 + layernorm_embedding: true + activation_fn: gelu + no_scale_embedding: true + max_source_positions: 512 + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 12 + attention_heads: 12 + normalize_before: false + learned_pos: true + layerdrop: 0 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/local.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_1_aws.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..4bac45a --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_1_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 + exclude: a100-st-p4d24xlarge-471 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2.yaml new file mode 100644 index 0000000..006a0f2 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2_aws.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..4292198 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_2_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 + exclude: a100-st-p4d24xlarge-471 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_3.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_3.yaml new file mode 100644 index 0000000..0e1555d --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4_aws.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4_aws.yaml new file mode 100644 index 0000000..5df84cd --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_4_aws.yaml @@ -0,0 +1,41 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 + exclude: a100-st-p4d24xlarge-471 + +distributed_training: + distributed_world_size: 32 + ddp_backend: legacy_ddp diff --git a/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_8_aws.yaml b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_8_aws.yaml new file mode 100644 index 0000000..5b32c23 --- /dev/null +++ b/fairseq/examples/data2vec/config/text/pretraining/run_config/slurm_8_aws.yaml @@ -0,0 +1,41 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 8 + name: pt + partition: wav2vec + max_num_timeout: 30 + exclude: a100-st-p4d24xlarge-471 + +distributed_training: + distributed_world_size: 64 + ddp_backend: legacy_ddp diff --git a/fairseq/examples/data2vec/config/v2/base_audio_only_task.yaml b/fairseq/examples/data2vec/config/v2/base_audio_only_task.yaml new file mode 100644 index 0000000..65a9ab3 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/base_audio_only_task.yaml @@ -0,0 +1,113 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: false + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: /private/home/abaevski/data/librispeech/full + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + precompute_mask_config: {} + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 8 + ddp_backend: legacy_ddp + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 400000 + lr: [0.00075] + debug_param_names: true + +optimizer: + _name: adam + adam_betas: [ 0.9,0.98 ] + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: cosine + warmup_updates: 8000 + +model: + _name: data2vec_multi + + loss_beta: 0 + loss_scale: null + + depth: 12 + embed_dim: 768 + clone_batch: 8 + + ema_decay: 0.999 + ema_end_decay: 0.99999 + ema_anneal_end_step: 75000 + ema_encoder_only: false + + average_top_k_layers: 8 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: false + + layerdrop: 0.05 + norm_eps: 1e-5 + + supported_modality: AUDIO + + modalities: + audio: + feature_encoder_spec: '[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]' + conv_pos_depth: 5 + conv_pos_width: 95 + conv_pos_groups: 16 + prenet_depth: 0 + mask_prob: 0.5 + mask_prob_adjust: 0.05 + inverse_mask: false + mask_length: 5 + mask_noise_std: 0.01 + mask_dropout: 0 + add_masks: false + ema_local_encoder: false + use_alibi_encoder: true + prenet_layerdrop: 0.05 + prenet_dropout: 0.1 + learned_alibi_scale: true + learned_alibi_scale_per_head: true + decoder: + input_dropout: 0.1 + decoder_dim: 384 + decoder_groups: 16 + decoder_kernel: 7 + decoder_layers: 4 diff --git a/fairseq/examples/data2vec/config/v2/base_images_only_task.yaml b/fairseq/examples/data2vec/config/v2/base_images_only_task.yaml new file mode 100644 index 0000000..ff0c247 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/base_images_only_task.yaml @@ -0,0 +1,116 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: mae_image_pretraining + data: /datasets01/imagenet_full_size/061417/ + rebuild_batches: true + local_cache_path: /scratch/cache_abaevski/imagenet + key: source + precompute_mask_config: {} + +dataset: + num_workers: 10 + batch_size: 16 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 375300 + lr: [ 0.001 ] + debug_param_names: true + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 1e-3 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + ema_decay: 0.9998 + ema_end_decay: 0.99999 + ema_anneal_end_step: 100000 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: true + end_of_block_targets: false + + depth: 10 + average_top_k_layers: 10 + clone_batch: 16 + + norm_eps: 1e-6 + + min_target_var: 0 + min_pred_var: 0 + + encoder_dropout: 0 + post_mlp_drop: 0 + attention_dropout: 0 + activation_dropout: 0 + + supported_modality: IMAGE + cls_loss: 0.01 + + ema_encoder_only: false + + modalities: + image: + inverse_mask: true + mask_prob: 0.8 + mask_prob_adjust: 0.07 + mask_length: 3 + mask_noise_std: 0.01 + prenet_depth: 2 + ema_local_encoder: true + num_extra_tokens: 1 + init_extra_token_zero: false + use_alibi_encoder: false + decoder: + decoder_dim: 768 + decoder_groups: 16 + decoder_kernel: 3 + decoder_layers: 6 + input_dropout: 0 \ No newline at end of file diff --git a/fairseq/examples/data2vec/config/v2/base_text_only_task.yaml b/fairseq/examples/data2vec/config/v2/base_text_only_task.yaml new file mode 100644 index 0000000..62f22eb --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/base_text_only_task.yaml @@ -0,0 +1,112 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + no_epoch_checkpoints: true + save_interval_updates: 50000 + keep_interval_updates: 1 + +distributed_training: + distributed_world_size: 16 + ddp_backend: legacy_ddp + +task: + _name: masked_lm + data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin + sample_break_mode: none + tokens_per_sample: 512 + include_target_tokens: true + random_token_prob: 0 + leave_unmasked_prob: 0 + include_index: True + skip_masking: True + d2v2_multi: True + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +dataset: + batch_size: 4 + ignore_unused_valid_subsets: true + skip_invalid_size_inputs_valid_test: true + disable_validation: true + +optimization: + clip_norm: 1 + lr: [0.0002] + max_update: 1000000 + update_freq: [1] + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.0002 + optimizer: + _name: adam + adam_betas: [0.9,0.98] + adam_eps: 1e-06 + weight_decay: 0.01 + lr_scheduler: + _name: cosine + warmup_updates: 4000 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + loss_beta: 0 + loss_scale: 1 + + depth: 12 + embed_dim: 768 + clone_batch: 8 + + ema_decay: 0.9999 + ema_end_decay: 0.99999 + ema_anneal_end_step: 100000 + ema_encoder_only: true + + average_top_k_layers: 12 + layer_norm_target_layer: false + instance_norm_target_layer: true + batch_norm_target_layer: false + instance_norm_targets: false + layer_norm_targets: false + + layerdrop: 0 + norm_eps: 1e-5 + + supported_modality: TEXT + + modalities: + text: + mask_prob: 0.48 + mask_length: 1 + mask_noise_std: 0.01 + prenet_depth: 0 + decoder: + input_dropout: 0.1 + decoder_dim: 768 + decoder_groups: 1 + decoder_kernel: 9 + decoder_layers: 5 + decoder_residual: false + projection_layers: 2 + projection_ratio: 2.0 diff --git a/fairseq/examples/data2vec/config/v2/huge_images14_only_task.yaml b/fairseq/examples/data2vec/config/v2/huge_images14_only_task.yaml new file mode 100644 index 0000000..a8a1525 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/huge_images14_only_task.yaml @@ -0,0 +1,122 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: mae_image_pretraining + data: /datasets01/imagenet_full_size/061417/ + rebuild_batches: true + local_cache_path: /scratch/cache_abaevski/imagenet + key: source + precompute_mask_config: {} + +dataset: + num_workers: 10 + batch_size: 8 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 32 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 500000 + lr: [ 0.0004 ] + debug_param_names: true + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 4e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + ema_decay: 0.9998 + ema_end_decay: 1 + ema_anneal_end_step: 300000 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: true + end_of_block_targets: false + + depth: 32 + embed_dim: 1280 + num_heads: 16 + + average_top_k_layers: 24 + clone_batch: 16 + + norm_eps: 1e-6 + + min_target_var: 0 + min_pred_var: 0 + + encoder_dropout: 0 + post_mlp_drop: 0 + attention_dropout: 0 + activation_dropout: 0 + + supported_modality: IMAGE + cls_loss: 0.01 + + ema_encoder_only: false + + modalities: + image: + patch_size: 14 + inverse_mask: true + mask_prob: 0.75 + mask_prob_adjust: 0.1 + mask_length: 3 + mask_noise_std: 0.01 + prenet_depth: 0 + ema_local_encoder: true + num_extra_tokens: 1 + init_extra_token_zero: false + use_alibi_encoder: false + embed_dim: 1280 + decoder: + decoder_dim: 1024 + decoder_groups: 16 + decoder_kernel: 5 + decoder_layers: 3 + final_layer_norm: false + input_dropout: 0 \ No newline at end of file diff --git a/fairseq/examples/data2vec/config/v2/huge_images_only_task.yaml b/fairseq/examples/data2vec/config/v2/huge_images_only_task.yaml new file mode 100644 index 0000000..7a352ac --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/huge_images_only_task.yaml @@ -0,0 +1,120 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: mae_image_pretraining + data: /datasets01/imagenet_full_size/061417/ + rebuild_batches: true + local_cache_path: /scratch/cache_abaevski/imagenet + key: source + precompute_mask_config: {} + +dataset: + num_workers: 10 + batch_size: 8 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 375300 + lr: [ 0.0004 ] + debug_param_names: true + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 4e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + ema_decay: 0.9998 + ema_end_decay: 0.99995 + ema_anneal_end_step: 150000 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: true + end_of_block_targets: false + + depth: 32 + embed_dim: 1280 + num_heads: 16 + + average_top_k_layers: 24 + clone_batch: 16 + + norm_eps: 1e-6 + + min_target_var: 0 + min_pred_var: 0 + + encoder_dropout: 0 + post_mlp_drop: 0 + attention_dropout: 0 + activation_dropout: 0 + + supported_modality: IMAGE + cls_loss: 0.01 + + ema_encoder_only: false + + modalities: + image: + inverse_mask: true + mask_prob: 0.75 + mask_prob_adjust: 0.1 + mask_length: 3 + mask_noise_std: 0.01 + prenet_depth: 0 + ema_local_encoder: true + num_extra_tokens: 1 + init_extra_token_zero: false + use_alibi_encoder: false + embed_dim: 1280 + decoder: + decoder_dim: 1024 + decoder_groups: 16 + decoder_kernel: 5 + decoder_layers: 3 + input_dropout: 0 \ No newline at end of file diff --git a/fairseq/examples/data2vec/config/v2/large_audio_only_task.yaml b/fairseq/examples/data2vec/config/v2/large_audio_only_task.yaml new file mode 100644 index 0000000..3f61589 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/large_audio_only_task.yaml @@ -0,0 +1,122 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: /fsx-wav2vec/abaevski/data/librivox/no_silence + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + precompute_mask_config: {} + +dataset: + num_workers: 8 + max_tokens: 320000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 48 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 600000 + debug_param_names: true + clip_norm: 1 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.0004 + optimizer: + _name: adam + adam_betas: [0.9,0.98] + adam_eps: 1e-06 + weight_decay: 0.01 + lr_scheduler: + _name: cosine + warmup_updates: 10000 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + loss_beta: 0 + loss_scale: null + + depth: 16 + embed_dim: 1024 + num_heads: 16 + + clone_batch: 12 + + ema_decay: 0.9997 + ema_end_decay: 1 + ema_anneal_end_step: 300000 + ema_encoder_only: false + + average_top_k_layers: 16 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: false + + layerdrop: 0 + norm_eps: 1e-5 + + supported_modality: AUDIO + + modalities: + audio: + feature_encoder_spec: '[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]' + conv_pos_depth: 5 + conv_pos_width: 95 + conv_pos_groups: 16 + prenet_depth: 8 + mask_prob: 0.55 + mask_prob_adjust: 0.1 + inverse_mask: false + mask_length: 5 + mask_noise_std: 0.01 + mask_dropout: 0 + add_masks: false + ema_local_encoder: false + use_alibi_encoder: true + prenet_layerdrop: 0 + prenet_dropout: 0.1 + learned_alibi_scale: true + learned_alibi_scale_per_head: true + decoder: + input_dropout: 0.1 + decoder_dim: 768 + decoder_groups: 16 + decoder_kernel: 7 + decoder_layers: 4 diff --git a/fairseq/examples/data2vec/config/v2/large_images_only_task.yaml b/fairseq/examples/data2vec/config/v2/large_images_only_task.yaml new file mode 100644 index 0000000..6b957fc --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/large_images_only_task.yaml @@ -0,0 +1,120 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: mae_image_pretraining + data: /datasets01/imagenet_full_size/061417/ + rebuild_batches: true + local_cache_path: /scratch/cache_abaevski/imagenet + key: source + precompute_mask_config: {} + +dataset: + num_workers: 10 + batch_size: 8 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 375300 + lr: [ 0.0004 ] + debug_param_names: true + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 4e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + ema_decay: 0.9998 + ema_end_decay: 0.99999 + ema_anneal_end_step: 150000 + instance_norm_target_layer: true + layer_norm_target_layer: false + layer_norm_targets: true + end_of_block_targets: false + + depth: 24 + embed_dim: 1024 + num_heads: 16 + + average_top_k_layers: 18 + clone_batch: 16 + + norm_eps: 1e-6 + + min_target_var: 0 + min_pred_var: 0 + + encoder_dropout: 0 + post_mlp_drop: 0 + attention_dropout: 0 + activation_dropout: 0 + + supported_modality: IMAGE + cls_loss: 0.01 + + ema_encoder_only: false + + modalities: + image: + inverse_mask: true + mask_prob: 0.75 + mask_prob_adjust: 0.1 + mask_length: 3 + mask_noise_std: 0.01 + prenet_depth: 0 + ema_local_encoder: true + num_extra_tokens: 1 + init_extra_token_zero: false + use_alibi_encoder: false + embed_dim: 1024 + decoder: + decoder_dim: 1024 + decoder_groups: 16 + decoder_kernel: 5 + decoder_layers: 3 + input_dropout: 0 \ No newline at end of file diff --git a/fairseq/examples/data2vec/config/v2/large_text_only_task.yaml b/fairseq/examples/data2vec/config/v2/large_text_only_task.yaml new file mode 100644 index 0000000..fd69048 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/large_text_only_task.yaml @@ -0,0 +1,112 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + min_loss_scale: 1e-6 + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + save_interval_updates: 50000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: masked_lm + data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin + sample_break_mode: none + tokens_per_sample: 512 + include_target_tokens: true + random_token_prob: 0 + leave_unmasked_prob: 0 + include_index: True + skip_masking: True + d2v2_multi: True + +dataset: + batch_size: 2 + ignore_unused_valid_subsets: true + skip_invalid_size_inputs_valid_test: true + disable_validation: true + +distributed_training: + distributed_world_size: 32 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +optimization: + max_update: 600000 + clip_norm: 1 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.0001 + optimizer: + _name: adam + adam_betas: [0.9,0.98] + adam_eps: 1e-06 + weight_decay: 0.01 + lr_scheduler: + _name: cosine + warmup_updates: 4000 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + loss_beta: 0 + loss_scale: 1 + + depth: 24 + num_heads: 16 + embed_dim: 1024 + clone_batch: 8 + + ema_decay: 0.9999 + ema_end_decay: 0.99999 + ema_anneal_end_step: 100000 + ema_encoder_only: true + + average_top_k_layers: 24 + layer_norm_target_layer: true + instance_norm_target_layer: false + batch_norm_target_layer: false + instance_norm_targets: true + layer_norm_targets: false + + layerdrop: 0 + norm_eps: 1e-5 + + supported_modality: TEXT + + modalities: + text: + mask_prob: 0.5 + mask_length: 1 + mask_noise_std: 0.01 + prenet_depth: 0 + decoder: + input_dropout: 0.1 + decoder_dim: 768 + decoder_groups: 1 + decoder_kernel: 9 + decoder_layers: 5 + decoder_residual: false + projection_layers: 2 + projection_ratio: 2.0 diff --git a/fairseq/examples/data2vec/config/v2/large_text_only_task_pgrp_1M.yaml b/fairseq/examples/data2vec/config/v2/large_text_only_task_pgrp_1M.yaml new file mode 100644 index 0000000..739e6f6 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/large_text_only_task_pgrp_1M.yaml @@ -0,0 +1,123 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + user_dir: ${env:PWD}/examples/data2vec + +checkpoint: + no_epoch_checkpoints: true + save_interval_updates: 50000 + keep_interval_updates: 1 + +distributed_training: + distributed_world_size: 32 + ddp_backend: legacy_ddp + +task: + _name: masked_lm + data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin + sample_break_mode: none + tokens_per_sample: 512 + include_target_tokens: true + random_token_prob: 0 + leave_unmasked_prob: 0 + include_index: True + skip_masking: True + d2v2_multi: True + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + - model_norm + - ema_norm + - masked_pct + +dataset: + batch_size: 2 + ignore_unused_valid_subsets: true + skip_invalid_size_inputs_valid_test: true + disable_validation: true + +optimization: + clip_norm: 1 + lr: [3e-4] + max_update: 1000000 + update_freq: [1] + +optimizer: + _name: composite + groups: + default: + lr_float: 1e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.98] + adam_eps: 1e-06 + weight_decay: 0.01 + lr_scheduler: + _name: cosine + warmup_updates: 4000 + decoder: + lr_float: 1e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.98] + adam_eps: 1e-06 + weight_decay: 0.01 + lr_scheduler: + _name: cosine + warmup_updates: 4000 + +lr_scheduler: pass_through + +model: + _name: data2vec_multi + + loss_beta: 4 + loss_scale: 1 + + depth: 24 + num_heads: 16 + embed_dim: 1024 + clone_batch: 8 + + ema_decay: 0.9999 + ema_end_decay: 0.99999 + ema_anneal_end_step: 100000 + ema_encoder_only: true + + average_top_k_layers: 24 + layer_norm_target_layer: true + instance_norm_target_layer: false + batch_norm_target_layer: false + instance_norm_targets: true + layer_norm_targets: false + + layerdrop: 0 + norm_eps: 1e-5 + + supported_modality: TEXT + decoder_group: true + + modalities: + text: + mask_prob: 0.5 + mask_length: 1 + mask_noise_std: 0.01 + prenet_depth: 0 + decoder: + input_dropout: 0.1 + decoder_dim: 768 + decoder_groups: 1 + decoder_kernel: 9 + decoder_layers: 5 + decoder_residual: false + projection_layers: 2 + projection_ratio: 2.0 diff --git a/fairseq/examples/data2vec/config/v2/run_config/local.yaml b/fairseq/examples/data2vec/config/v2/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_1.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_1.yaml new file mode 100644 index 0000000..732f018 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_1.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_1_aws.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..b2184f8 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_1_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_2.yaml new file mode 100644 index 0000000..ec53dc2 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_2.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_2_aws.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..5537655 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_2_aws.yaml @@ -0,0 +1,39 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - task.post_save_script + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - model.model_path + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 12 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_3.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_3.yaml new file mode 100644 index 0000000..14b47d1 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_4.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_4_aws.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_4_aws.yaml new file mode 100644 index 0000000..a77f62a --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_4_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - task.post_save_script + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 12 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_6_aws.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_6_aws.yaml new file mode 100644 index 0000000..20e0658 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_6_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 12 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 6 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_8.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_8.yaml new file mode 100644 index 0000000..e3ec2c2 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_8.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/run_config/slurm_8_aws.yaml b/fairseq/examples/data2vec/config/v2/run_config/slurm_8_aws.yaml new file mode 100644 index 0000000..a9dce87 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/run_config/slurm_8_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 12 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/cola.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/cola.yaml new file mode 100644 index 0000000..d4ac4ec --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/cola.yaml @@ -0,0 +1,60 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: mcc + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + report_mcc: True + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 320 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 5336 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/mnli.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/mnli.yaml new file mode 100644 index 0000000..1a9d6e5 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/mnli.yaml @@ -0,0 +1,60 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 3 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + valid_subset: valid,valid1 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 7432 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 123873 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/mrpc.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/mrpc.yaml new file mode 100644 index 0000000..8f93d9d --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/mrpc.yaml @@ -0,0 +1,60 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: acc_and_f1 + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + report_acc_and_f1: True + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 137 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 2296 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/qnli.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/qnli.yaml new file mode 100644 index 0000000..739fb53 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/qnli.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 1986 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 33112 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/qqp.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/qqp.yaml new file mode 100644 index 0000000..9accbaa --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/qqp.yaml @@ -0,0 +1,60 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: acc_and_f1 + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + report_acc_and_f1: True + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 28318 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 113272 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/rte.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/rte.yaml new file mode 100644 index 0000000..ea07764 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/rte.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 122 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 2036 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/run_config/local.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/sst_2.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/sst_2.yaml new file mode 100644 index 0000000..a273e5b --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/sst_2.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 1256 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 20935 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/v2/text_finetuning/sts_b.yaml b/fairseq/examples/data2vec/config/v2/text_finetuning/sts_b.yaml new file mode 100644 index 0000000..fb009ab --- /dev/null +++ b/fairseq/examples/data2vec/config/v2/text_finetuning/sts_b.yaml @@ -0,0 +1,61 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + user_dir: ${env:PWD}/examples/data2vec + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 1 + max_positions: 512 + d2v2_multi: True + +checkpoint: + best_checkpoint_metric: pearson_and_spearman + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +criterion: + _name: sentence_prediction + regression_target: true + report_pearson_and_spearman: True + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + num_workers: 1 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 214 + +optimization: + clip_norm: 0.0 + lr: [4e-05] + max_update: 3598 + max_epoch: 10 + +model: + _name: data2vec_text_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/vision/finetuning/imagenet.yaml b/fairseq/examples/data2vec/config/vision/finetuning/imagenet.yaml new file mode 100644 index 0000000..d6d4864 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/imagenet.yaml @@ -0,0 +1,52 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + +task: + _name: image_classification + data: /datasets01/imagenet_full_size/061417 + +dataset: + num_workers: 6 + batch_size: 64 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + valid_subset: val + +distributed_training: + distributed_world_size: 8 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - correct + +optimization: + max_update: 100000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: cosine + warmup_updates: 10000 + +model: + _name: data2vec_image_classification + model_path: ??? diff --git a/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_clean.yaml b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_clean.yaml new file mode 100644 index 0000000..17d4c0a --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_clean.yaml @@ -0,0 +1,65 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +task: + _name: mae_image_classification + data: /datasets01/imagenet_full_size/061417 + +dataset: + num_workers: 6 + batch_size: 32 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 2 + valid_subset: val + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - correct + +optimization: + max_update: 250200 + lr: [0.001] + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.001 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 16000 + min_lr: 1e-6 + + +lr_scheduler: pass_through + +model: + _name: mae_image_classification + mixup: 0.7 + mixup_prob: 0.9 + + model_path: ??? diff --git a/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_huge_clean.yaml b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_huge_clean.yaml new file mode 100644 index 0000000..2d2eb57 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_huge_clean.yaml @@ -0,0 +1,68 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +task: + _name: mae_image_classification + data: /datasets01/imagenet_full_size/061417 + +dataset: + num_workers: 6 + batch_size: 32 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 2 + valid_subset: val + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - correct + +optimization: + max_update: 125200 + lr: [0.0005] + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.0005 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 16000 + min_lr: 1e-20 + + +lr_scheduler: pass_through + +model: + _name: mae_image_classification + mixup: 0.7 + mixup_prob: 0.9 + layer_decay: 0.75 + drop_path_rate: 0.2 + + model_path: ??? diff --git a/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_large_clean.yaml b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_large_clean.yaml new file mode 100644 index 0000000..3a9413c --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/mae_imagenet_large_clean.yaml @@ -0,0 +1,68 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + +checkpoint: + save_interval: 1 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +task: + _name: mae_image_classification + data: /datasets01/imagenet_full_size/061417 + +dataset: + num_workers: 6 + batch_size: 32 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 2 + valid_subset: val + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - correct + +optimization: + max_update: 125200 + lr: [0.0005] + clip_norm: 4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 0.0005 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 16000 + min_lr: 1e-7 + + +lr_scheduler: pass_through + +model: + _name: mae_image_classification + mixup: 0.7 + mixup_prob: 0.9 + layer_decay: 0.75 + drop_path_rate: 0.2 + + model_path: ??? diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/local.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1.yaml new file mode 100644 index 0000000..732f018 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1_aws.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..e2bab56 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_1_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2.yaml new file mode 100644 index 0000000..c8b0f02 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - task.local_cache_path + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2_aws.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..93d0d9c --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_2_aws.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - task.local_cache_path + - model.model_path + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_3.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_3.yaml new file mode 100644 index 0000000..14b47d1 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4_aws.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4_aws.yaml new file mode 100644 index 0000000..d5d11cb --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_4_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_6_aws.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_6_aws.yaml new file mode 100644 index 0000000..906f08a --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_6_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 6 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_8_aws.yaml b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_8_aws.yaml new file mode 100644 index 0000000..d60e13f --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/finetuning/run_config/slurm_8_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet.yaml b/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet.yaml new file mode 100644 index 0000000..9bfc0f3 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet.yaml @@ -0,0 +1,52 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: image_pretraining + data: /datasets01/imagenet_full_size/061417/ + +dataset: + num_workers: 6 + batch_size: 64 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + +optimization: + max_update: 400000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: cosine + warmup_updates: 10000 + +model: + _name: data2vec_vision diff --git a/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet_d2v1.yaml b/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet_d2v1.yaml new file mode 100644 index 0000000..5fd399b --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/base_imagenet_d2v1.yaml @@ -0,0 +1,64 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: image_pretraining + data: /datasets01/imagenet_full_size/061417 + +dataset: + num_workers: 6 + batch_size: 128 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 2 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: legacy_ddp + +criterion: + _name: model + log_keys: + - ema_decay + - target_var + - pred_var + +optimization: + max_update: 375300 #300*1251 + lr: [0.0005] + clip_norm: 3.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.999) + adam_eps: 1e-08 + weight_decay: 0.05 + +lr_scheduler: + _name: cosine + warmup_updates: 12510 # it should be 10 epochs + +model: + _name: data2vec_vision + + attention_dropout: 0.05 + + ema_decay: 0.999 + ema_end_decay: 0.9998 + layer_norm_targets: True + average_top_k_layers: 6 + + loss_beta: 2.0 + + drop_path: 0.25 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/base_mae_imagenet.yaml b/fairseq/examples/data2vec/config/vision/pretraining/base_mae_imagenet.yaml new file mode 100644 index 0000000..d7872b5 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/base_mae_imagenet.yaml @@ -0,0 +1,64 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + fp16_no_flatten_grads: true + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: mae_image_pretraining + data: /datasets01/imagenet_full_size/061417/ + rebuild_batches: true + +dataset: + num_workers: 6 + batch_size: 64 + skip_invalid_size_inputs_valid_test: true + required_batch_size_multiple: 1 + disable_validation: true + +distributed_training: + distributed_world_size: 16 + ddp_backend: c10d + +criterion: + _name: model + +optimization: + max_update: 375300 + lr: [0.0006] + +optimizer: + _name: composite + groups: + with_decay: + lr_float: 6e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0.05 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + no_decay: + lr_float: 6e-4 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + weight_decay: 0 + lr_scheduler: + _name: cosine + warmup_updates: 50040 + +lr_scheduler: pass_through + +model: + _name: mae diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/local.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1.yaml new file mode 100644 index 0000000..732f018 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1_aws.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..e2bab56 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_1_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2.yaml new file mode 100644 index 0000000..c8b0f02 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - task.local_cache_path + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2_aws.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..032e53a --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_2_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - task.local_cache_path + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_3.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_3.yaml new file mode 100644 index 0000000..14b47d1 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4_aws.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4_aws.yaml new file mode 100644 index 0000000..d5d11cb --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_4_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_6_aws.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_6_aws.yaml new file mode 100644 index 0000000..906f08a --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_6_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 6 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_8_aws.yaml b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_8_aws.yaml new file mode 100644 index 0000000..d60e13f --- /dev/null +++ b/fairseq/examples/data2vec/config/vision/pretraining/run_config/slurm_8_aws.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/data2vec/fb_convert_beit_cp.py b/fairseq/examples/data2vec/fb_convert_beit_cp.py new file mode 100644 index 0000000..cf42ace --- /dev/null +++ b/fairseq/examples/data2vec/fb_convert_beit_cp.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import torch + +from omegaconf import OmegaConf + +from fairseq.criterions.model_criterion import ModelCriterionConfig +from fairseq.dataclass.configs import FairseqConfig + +from tasks import ImageClassificationConfig, ImagePretrainingConfig +from models.data2vec_image_classification import ( + Data2VecImageClassificationConfig, + Data2VecImageClassificationModel, +) +from models.data2vec_vision import Data2VecVisionConfig, Data2VecVisionModel + + +def get_parser(): + parser = argparse.ArgumentParser( + description="convert beit checkpoint into data2vec - vision checkpoint" + ) + # fmt: off + parser.add_argument('checkpoint', help='checkpoint to convert') + parser.add_argument('--output', required=True, metavar='PATH', help='where to output converted checkpoint') + parser.add_argument('--type', type=str, choices=['vision', 'image_classification'], default='image_classification', help='type of model to upgrade') + parser.add_argument('--inception_norms', action='store_true', default=False) + # fmt: on + + return parser + + +def update_checkpoint(model_dict, prefix, is_nested): + + replace_paths = { + "cls_token": "model.cls_emb" if is_nested else "cls_emb", + "patch_embed": "model.patch_embed" if is_nested else "patch_embed", + "mask_token": "mask_emb", + } + + starts_with = { + "patch_embed.proj": "model.patch_embed.conv" + if is_nested + else "patch_embed.conv", + "lm_head": "final_proj", + "fc_norm": "fc_norm", + "head": "head", + } + + partial = { + "mlp.fc1": "mlp.0", + "mlp.fc2": "mlp.2", + } + + for k in list(model_dict.keys()): + for sw, r in starts_with.items(): + if k.startswith(sw): + replace_paths[k] = k.replace(sw, r) + for p, r in partial.items(): + if p in k: + replace_paths[k] = prefix + k.replace(p, r) + + if prefix != "": + for k in list(model_dict.keys()): + if k not in replace_paths: + replace_paths[k] = prefix + k + + for k in list(model_dict.keys()): + if k in replace_paths: + model_dict[replace_paths[k]] = model_dict[k] + if k != replace_paths[k]: + del model_dict[k] + + return model_dict + + +def main(): + parser = get_parser() + args = parser.parse_args() + + cp = torch.load(args.checkpoint, map_location="cpu") + + cfg = FairseqConfig( + criterion=ModelCriterionConfig(_name="model", log_keys=["correct"]), + ) + + if args.type == "image_classification": + + cfg.task = ImageClassificationConfig( + _name="image_classification", + data=".", + ) + + if args.inception_norms: + cfg.task.normalization_mean = [0.5, 0.5, 0.5] + cfg.task.normalization_std = [0.5, 0.5, 0.5] + + cfg.model = Data2VecImageClassificationConfig( + _name="data2vec_image_classification", + ) + cfg.model.pretrained_model_args = FairseqConfig( + model=Data2VecVisionConfig( + _name="data2vec_vision", shared_rel_pos_bias=False + ), + task=ImagePretrainingConfig( + _name="image_pretraining", + ), + ) + + cfg = OmegaConf.create(cfg) + + state = { + "cfg": OmegaConf.to_container(cfg, resolve=True, enum_to_str=True), + "model": cp["module"], + "best_loss": None, + "optimizer": None, + "extra_state": {}, + } + + model = Data2VecImageClassificationModel(cfg.model) + model.load_state_dict( + update_checkpoint(state["model"], prefix="model.encoder.", is_nested=True), + strict=True, + ) + elif args.type == "vision": + cfg.task = ImagePretrainingConfig( + _name="image_pretraining", + data=".", + ) + + if args.inception_norms: + cfg.task.normalization_mean = [0.5, 0.5, 0.5] + cfg.task.normalization_std = [0.5, 0.5, 0.5] + + cfg.model = Data2VecVisionConfig( + _name="data2vec_vision", + ) + cfg = OmegaConf.create(cfg) + + state = { + "cfg": OmegaConf.to_container(cfg, resolve=True, enum_to_str=True), + "model": cp["model"], + "best_loss": None, + "optimizer": None, + "extra_state": {}, + } + + model = Data2VecVisionModel(cfg.model) + model.load_state_dict( + update_checkpoint(state["model"], prefix="encoder.", is_nested=False), + strict=True, + ) + else: + raise Exception("unsupported type " + args.type) + + print(state["cfg"], state.keys()) + torch.save(state, args.output) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/data2vec/models/__init__.py b/fairseq/examples/data2vec/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/data2vec/models/audio_classification.py b/fairseq/examples/data2vec/models/audio_classification.py new file mode 100644 index 0000000..06d2158 --- /dev/null +++ b/fairseq/examples/data2vec/models/audio_classification.py @@ -0,0 +1,614 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import logging +import re +from dataclasses import dataclass, field +from typing import Any, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from omegaconf import II, MISSING, open_dict + +from fairseq import checkpoint_utils, tasks +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import ( + BaseFairseqModel, + register_model, +) +from fairseq.models.wav2vec.wav2vec2 import MASKING_DISTRIBUTION_CHOICES +from fairseq.modules import TransposeLast +from fairseq.tasks import FairseqTask + +logger = logging.getLogger(__name__) + + +@dataclass +class AudioClassificationConfig(FairseqDataclass): + model_path: str = field( + default=MISSING, metadata={"help": "path to wav2vec 2.0 model"} + ) + no_pretrained_weights: bool = field( + default=False, metadata={"help": "if true, does not load pretrained weights"} + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "dropout after transformer and before final projection"}, + ) + dropout: float = field( + default=0.0, metadata={"help": "dropout probability inside wav2vec 2.0 model"} + ) + attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights inside wav2vec 2.0 model" + }, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN inside wav2vec 2.0 model" + }, + ) + + # masking + apply_mask: bool = field( + default=False, metadata={"help": "apply masking during fine-tuning"} + ) + mask_length: int = field( + default=10, metadata={"help": "repeat the mask indices multiple times"} + ) + mask_prob: float = field( + default=0.5, + metadata={ + "help": "probability of replacing a token with mask (normalized by length)" + }, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose masks"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: Optional[int] = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + require_same_masks: bool = field( + default=True, + metadata={ + "help": "whether to number of masked timesteps must be the same across all " + "examples in a batch" + }, + ) + mask_dropout: float = field( + default=0.0, + metadata={"help": "percent of masks to unmask for each sample"}, + ) + + # channel masking + mask_channel_length: int = field( + default=10, metadata={"help": "length of the mask for features (channels)"} + ) + mask_channel_prob: float = field( + default=0.0, metadata={"help": "probability of replacing a feature with 0"} + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, metadata={"help": "whether to allow channel masks to overlap"} + ) + freeze_finetune_updates: int = field( + default=0, metadata={"help": "dont finetune wav2vec for this many updates"} + ) + feature_grad_mult: float = field( + default=0.0, metadata={"help": "reset feature grad mult in wav2vec 2.0 to this"} + ) + layerdrop: float = field( + default=0.0, metadata={"help": "probability of dropping a layer in wav2vec 2.0"} + ) + mask_channel_min_space: Optional[int] = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + mask_channel_before: bool = False + normalize: bool = II("task.normalize") + data: str = II("task.data") + # this holds the loaded wav2vec args + d2v_args: Any = None + offload_activations: bool = field( + default=False, metadata={"help": "offload_activations"} + ) + min_params_to_wrap: int = field( + default=int(1e8), + metadata={ + "help": "minimum number of params for a layer to be wrapped with FSDP() when " + "training with --ddp-backend=fully_sharded. Smaller values will " + "improve memory efficiency, but may make torch.distributed " + "communication less efficient due to smaller input sizes. This option " + "is set to 0 (i.e., always wrap) when --checkpoint-activations or " + "--offload-activations are passed." + }, + ) + + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + ddp_backend: str = II("distributed_training.ddp_backend") + + prediction_mode: str = "lin_softmax" + eval_prediction_mode: Optional[str] = None + conv_kernel: int = -1 + conv_stride: int = 1 + two_convs: bool = False + extreme_factor: float = 1.0 + + conv_feature_layers: Optional[str] = field( + default=None, + metadata={ + "help": "string describing convolutional feature extraction layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + + mixup_prob: float = 1.0 + source_mixup: float = -1 + same_mixup: bool = True + label_mixup: bool = False + + gain_mode: str = "none" + + +@register_model("audio_classification", dataclass=AudioClassificationConfig) +class AudioClassificationModel(BaseFairseqModel): + def __init__(self, cfg: AudioClassificationConfig, num_classes): + super().__init__() + + self.apply_mask = cfg.apply_mask + self.cfg = cfg + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "require_same_masks": getattr(cfg, "require_same_masks", True), + "mask_dropout": getattr(cfg, "mask_dropout", 0), + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_before": cfg.mask_channel_before, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "checkpoint_activations": cfg.checkpoint_activations, + "offload_activations": cfg.offload_activations, + "min_params_to_wrap": cfg.min_params_to_wrap, + "mixup": -1, + } + + if cfg.conv_feature_layers is not None: + arg_overrides["conv_feature_layers"] = cfg.conv_feature_layers + + if cfg.d2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.model_path, arg_overrides + ) + d2v_args = state.get("cfg", None) + if d2v_args is None: + d2v_args = convert_namespace_to_omegaconf(state["args"]) + d2v_args.criterion = None + d2v_args.lr_scheduler = None + cfg.d2v_args = d2v_args + + logger.info(d2v_args) + + else: + state = None + d2v_args = cfg.d2v_args + + model_normalized = d2v_args.task.get( + "normalize", d2v_args.model.get("normalize", False) + ) + assert cfg.normalize == model_normalized, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for both pre-training and here" + ) + + if hasattr(cfg, "checkpoint_activations") and cfg.checkpoint_activations: + with open_dict(d2v_args): + d2v_args.model.checkpoint_activations = cfg.checkpoint_activations + + d2v_args.task.data = cfg.data + task = tasks.setup_task(d2v_args.task) + model = task.build_model(d2v_args.model, from_checkpoint=True) + + model.remove_pretraining_modules() + + if state is not None and not cfg.no_pretrained_weights: + self.load_model_weights(state, model, cfg) + + d = d2v_args.model.encoder_embed_dim + + self.d2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + for p in self.parameters(): + p.param_group = "pretrained" + + if cfg.prediction_mode == "proj_avg_proj": + self.proj = nn.Linear(d, d * 2) + self.proj2 = nn.Linear(d * 2, num_classes) + + for p in self.proj.parameters(): + p.param_group = "projection" + for p in self.proj2.parameters(): + p.param_group = "projection" + elif self.cfg.prediction_mode == "summary_proj": + self.proj = nn.Linear(d // 3, num_classes) + for p in self.proj.parameters(): + p.param_group = "projection" + elif self.cfg.conv_kernel > 1 and not self.cfg.two_convs: + self.proj = nn.Sequential( + TransposeLast(), + nn.Conv1d(d, num_classes, kernel_size=self.cfg.conv_kernel, stride=self.cfg.conv_stride), + TransposeLast(), + ) + for p in self.proj.parameters(): + p.param_group = "projection" + elif self.cfg.conv_kernel > 0 and self.cfg.two_convs: + self.proj = nn.Sequential( + TransposeLast(), + nn.Conv1d(d, d, kernel_size=self.cfg.conv_kernel, stride=self.cfg.conv_stride), + TransposeLast(), + nn.GELU(), + nn.Linear(d, num_classes), + ) + for p in self.proj.parameters(): + p.param_group = "projection" + else: + self.proj = nn.Linear(d, num_classes) + for p in self.proj.parameters(): + p.param_group = "projection" + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: AudioClassificationConfig, task: FairseqTask): + """Build a new model instance.""" + + assert hasattr(task, "labels"), f"Task {task} must have an attribute 'labels'" + + return cls(cfg, len(task.labels)) + + def load_model_weights(self, state, model, cfg): + if cfg.ddp_backend == "fully_sharded": + from fairseq.distributed import FullyShardedDataParallel + + for name, module in model.named_modules(): + if "encoder.layers" in name and len(name.split(".")) == 3: + # Only for layers, we do a special handling and load the weights one by one + # We dont load all weights together as that wont be memory efficient and may + # cause oom + new_dict = { + k.replace(name + ".", ""): v + for (k, v) in state["model"].items() + if name + "." in k + } + assert isinstance(module, FullyShardedDataParallel) + with module.summon_full_params(): + module.load_state_dict(new_dict, strict=True) + module._reset_lazy_init() + + # Once layers are loaded, filter them out and load everything else. + r = re.compile("encoder.layers.\d.") + filtered_list = list(filter(r.match, state["model"].keys())) + + new_big_dict = { + k: v for (k, v) in state["model"].items() if k not in filtered_list + } + + model.load_state_dict(new_big_dict, strict=False) + else: + if "_ema" in state["model"]: + del state["model"]["_ema"] + model.load_state_dict(state["model"], strict=False) + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def compute_gain(self, sound, fs=16_000, min_db=-80.0, mode="A_weighting"): + if fs == 16000: + n_fft = 2048 + elif fs == 44100: + n_fft = 4096 + else: + raise Exception("Invalid fs {}".format(fs)) + stride = n_fft // 2 + + def a_weight(fs, n_fft, min_db=-80.0): + freq = np.linspace(0, fs // 2, n_fft // 2 + 1) + freq_sq = np.power(freq, 2) + freq_sq[0] = 1.0 + weight = 2.0 + 20.0 * ( + 2 * np.log10(12194) + + 2 * np.log10(freq_sq) + - np.log10(freq_sq + 12194 ** 2) + - np.log10(freq_sq + 20.6 ** 2) + - 0.5 * np.log10(freq_sq + 107.7 ** 2) + - 0.5 * np.log10(freq_sq + 737.9 ** 2) + ) + weight = np.maximum(weight, min_db) + + return weight + + gain = [] + for i in range(0, len(sound) - n_fft + 1, stride): + if mode == "RMSE": + g = np.mean(sound[i : i + n_fft] ** 2) + elif mode == "A_weighting": + spec = np.fft.rfft(np.hanning(n_fft + 1)[:-1] * sound[i : i + n_fft]) + power_spec = np.abs(spec) ** 2 + a_weighted_spec = power_spec * np.power(10, a_weight(fs, n_fft) / 10) + g = np.sum(a_weighted_spec) + else: + raise Exception("Invalid mode {}".format(mode)) + gain.append(g) + + gain = np.array(gain) + gain = np.maximum(gain, np.power(10, min_db / 10)) + gain_db = 10 * np.log10(gain) + + return gain_db + + # adapted from https://github.com/mil-tokyo/bc_learning_sound/blob/master/utils.py + def compute_gain_torch(self, sound, fs=16_000, min_db=-80.0, mode="A_weighting"): + if fs == 16000: + n_fft = 2048 + elif fs == 44100: + n_fft = 4096 + else: + raise Exception("Invalid fs {}".format(fs)) + + if mode == "A_weighting": + if not hasattr(self, f"a_weight"): + self.a_weight = {} + + if fs not in self.a_weight: + + def a_weight(fs, n_fft, min_db=-80.0): + freq = np.linspace(0, fs // 2, n_fft // 2 + 1) + freq_sq = freq ** 2 + freq_sq[0] = 1.0 + weight = 2.0 + 20.0 * ( + 2 * np.log10(12194) + + 2 * np.log10(freq_sq) + - np.log10(freq_sq + 12194 ** 2) + - np.log10(freq_sq + 20.6 ** 2) + - 0.5 * np.log10(freq_sq + 107.7 ** 2) + - 0.5 * np.log10(freq_sq + 737.9 ** 2) + ) + weight = np.maximum(weight, min_db) + + return weight + + self.a_weight[fs] = torch.from_numpy( + np.power(10, a_weight(fs, n_fft, min_db) / 10) + ).to(device=sound.device) + + sound = sound.unfold(-1, n_fft, n_fft // 2) + + if mode == "RMSE": + sound = sound ** 2 + g = sound.mean(-1) + elif mode == "A_weighting": + w = torch.hann_window(n_fft, device=sound.device) * sound + spec = torch.fft.rfft(w) + power_spec = spec.abs() ** 2 + a_weighted_spec = power_spec * self.a_weight[fs] + g = a_weighted_spec.sum(-1) + else: + raise Exception("Invalid mode {}".format(mode)) + + gain = torch.maximum(g, torch.tensor(10 ** (min_db / 10), device=g.device)) + gain_db = 10 * torch.log10(gain) + + return gain_db + + def forward(self, source, padding_mask, label=None, **kwargs): + + if self.cfg.source_mixup >= 0 and self.training and self.cfg.mixup_prob > 0: + with torch.no_grad(): + mixed_source = source + mix_mask = None + if self.cfg.mixup_prob < 1: + mix_mask = ( + torch.empty((source.size(0),), device=source.device) + .bernoulli_(self.cfg.mixup_prob) + .bool() + ) + mixed_source = source[mix_mask] + + r = ( + torch.FloatTensor( + 1 if self.cfg.same_mixup else mixed_source.size(0) + ) + .uniform_(max(1e-6, self.cfg.source_mixup), 1) + .to(dtype=source.dtype, device=source.device) + ) + + mixup_perm = torch.randperm(source.size(0)) + s2 = source[mixup_perm] + + if self.cfg.gain_mode == "none": + p = r.unsqueeze(-1) + if mix_mask is not None: + s2 = s2[mix_mask] + else: + if self.cfg.gain_mode == "naive_rms": + G1 = source.pow(2).mean(dim=-1).sqrt() + else: + G1, _ = self.compute_gain_torch( + source, mode=self.cfg.gain_mode + ).max(-1) + G1 = G1.to(dtype=source.dtype) + + G2 = G1[mixup_perm] + + if mix_mask is not None: + G1 = G1[mix_mask] + G2 = G2[mix_mask] + s2 = s2[mix_mask] + + p = 1 / (1 + 10 ** ((G1 - G2) / 20) * (1 - r) / r) + p = p.unsqueeze(-1) + + mixed = (p * mixed_source) + (1 - p) * s2 + + if mix_mask is None: + source = mixed / torch.sqrt(p ** 2 + (1 - p) ** 2) + else: + source[mix_mask] = mixed / torch.sqrt(p ** 2 + (1 - p) ** 2) + + if label is not None and self.cfg.label_mixup: + r = r.unsqueeze(-1) + if mix_mask is None: + label = label * r + (1 - r) * label[mixup_perm] + else: + label[mix_mask] = ( + label[mix_mask] * r + (1 - r) * label[mixup_perm][mix_mask] + ) + + d2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + res = self.d2v_model.extract_features(**d2v_args) + + x = res["x"] + padding_mask = res["padding_mask"] + if padding_mask is not None: + x[padding_mask] = 0 + + x = self.final_dropout(x) + + if self.training or ( + self.cfg.eval_prediction_mode is None or self.cfg.eval_prediction_mode == "" + ): + prediction_mode = self.cfg.prediction_mode + else: + prediction_mode = self.cfg.eval_prediction_mode + + if prediction_mode == "average_before": + x = x.mean(dim=1) + + if prediction_mode != "summary_mha" and prediction_mode != "summary_proj" and prediction_mode != "cls": + x = self.proj(x) + + logits = True + if prediction_mode == "lin_softmax": + x = F.logsigmoid(x.float()) + x = torch.logsumexp(x + x, dim=1) - torch.logsumexp(x, dim=1) + x = x.clamp(max=0) + x = x - torch.log(-(torch.expm1(x))) + elif prediction_mode == "extremized_odds": + x = x.float().sum(dim=1) + x = x * self.cfg.extreme_factor + elif prediction_mode == "average_before": + x = x.float() + elif prediction_mode == "average": + x = x.float().mean(dim=1) + elif prediction_mode == "average_sigmoid": + x = torch.sigmoid(x.float()) + x = x.mean(dim=1) + logits = False + elif prediction_mode == "max": + x, _ = x.float().max(dim=1) + elif prediction_mode == "max_sigmoid": + x = torch.sigmoid(x.float()) + x, _ = x.float().max(dim=1) + logits = False + elif prediction_mode == "proj_avg_proj": + x = x.mean(dim=1) + x = self.proj2(x) + elif prediction_mode == "summary_mha" or prediction_mode == "summary_proj": + x = self.d2v_model.summary( + x, padding_mask, proj=prediction_mode == "summary_proj" + ) + x = x.type_as(source) + x = self.proj(x) + elif prediction_mode == "cls": + x = x[:,0] + x = self.proj(x) + else: + raise Exception(f"unknown prediction mode {prediction_mode}") + + if label is None: + return torch.sigmoid(x) if logits else x + + x = torch.nan_to_num(x) + + if logits: + loss = F.binary_cross_entropy_with_logits( + x, label.float(), reduction="none" + ) + else: + loss = F.binary_cross_entropy(x, label.float(), reduction="none") + + result = { + "losses": { + "main": loss, + }, + "sample_size": label.sum(), + } + + if not self.training: + result["_predictions"] = torch.sigmoid(x) if logits else x + result["_targets"] = label + + return result diff --git a/fairseq/examples/data2vec/models/data2vec2.py b/fairseq/examples/data2vec/models/data2vec2.py new file mode 100644 index 0000000..0c61b37 --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec2.py @@ -0,0 +1,813 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from dataclasses import dataclass, field +from typing import Optional, Callable +from functools import partial +import numpy as np + +from omegaconf import II + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributed as dist + +from fairseq.modules import EMAModule, EMAModuleConfig + +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model + +from examples.data2vec.data.modality import Modality + +from examples.data2vec.models.modalities.base import ( + MaskSeed, + D2vModalityConfig, + ModalitySpecificEncoder, + get_annealed_rate, +) +from examples.data2vec.models.modalities.modules import ( + D2vDecoderConfig, + AltBlock, + Decoder1d, +) + +from examples.data2vec.models.modalities.audio import ( + D2vAudioConfig, + AudioEncoder, +) +from examples.data2vec.models.modalities.images import ( + D2vImageConfig, + ImageEncoder, +) +from examples.data2vec.models.modalities.text import ( + D2vTextConfig, + TextEncoder, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class D2vModalitiesConfig(FairseqDataclass): + audio: D2vAudioConfig = D2vAudioConfig() + image: D2vImageConfig = D2vImageConfig() + text: D2vTextConfig = D2vTextConfig() + + +@dataclass +class Data2VecMultiConfig(FairseqDataclass): + + loss_beta: float = field( + default=0, metadata={"help": "beta for smooth l1 loss. 0 means use l2 loss"} + ) + loss_scale: Optional[float] = field( + default=None, + metadata={ + "help": "scale the reconstruction loss by this constant. if None then scales by 1/sqrt(dim)" + }, + ) + + depth: int = 8 + start_drop_path_rate: float = 0 + end_drop_path_rate: float = 0 + num_heads: int = 12 + norm_eps: float = 1e-6 + norm_affine: bool = True + encoder_dropout: float = 0.1 + post_mlp_drop: float = 0.1 + attention_dropout: float = 0.1 + activation_dropout: float = 0.0 + dropout_input: float = 0.0 + layerdrop: float = 0.0 + embed_dim: int = 768 + mlp_ratio: float = 4 + layer_norm_first: bool = False + + average_top_k_layers: int = field( + default=8, metadata={"help": "how many layers to average"} + ) + + end_of_block_targets: bool = False + + clone_batch: int = 1 + + layer_norm_target_layer: bool = False + batch_norm_target_layer: bool = False + instance_norm_target_layer: bool = False + instance_norm_targets: bool = False + layer_norm_targets: bool = False + + ema_decay: float = field(default=0.999, metadata={"help": "initial ema decay rate"}) + ema_same_dtype: bool = True + log_norms: bool = True + ema_end_decay: float = field( + default=0.9999, metadata={"help": "final ema decay rate"} + ) + + # when to finish annealing ema decay rate + ema_anneal_end_step: int = II("optimization.max_update") + + ema_encoder_only: bool = field( + default=True, + metadata={ + "help": "whether to momentum update only the shared transformer encoder" + }, + ) + + max_update: int = II("optimization.max_update") + + modalities: D2vModalitiesConfig = D2vModalitiesConfig() + + shared_decoder: Optional[D2vDecoderConfig] = None + + min_target_var: float = field( + default=0.1, metadata={"help": "stop training if target var falls below this"} + ) + min_pred_var: float = field( + default=0.01, + metadata={"help": "stop training if prediction var falls below this"}, + ) + + supported_modality: Optional[Modality] = None + mae_init: bool = False + + seed: int = II("common.seed") + + skip_ema: bool = False + + cls_loss: float = 0 + recon_loss: float = 0 + d2v_loss: float = 1 + + decoder_group: bool = False + + +@register_model("data2vec_multi", dataclass=Data2VecMultiConfig) +class Data2VecMultiModel(BaseFairseqModel): + def make_modality_encoder( + self, + cfg: D2vModalityConfig, + embed_dim: int, + make_block: Callable[[float], nn.ModuleList], + norm_layer: Callable[[int], nn.LayerNorm], + layer_norm_first: bool, + alibi_biases, + task, + ) -> ModalitySpecificEncoder: + if cfg.type == Modality.AUDIO: + enc_cls = AudioEncoder + elif cfg.type == Modality.IMAGE: + enc_cls = ImageEncoder + elif cfg.type == Modality.TEXT: + enc_cls = TextEncoder + if hasattr(task, "text_task"): + task = task.text_task + else: + raise Exception(f"unsupported modality {cfg.type}") + + return enc_cls( + cfg, + embed_dim, + make_block, + norm_layer, + layer_norm_first, + alibi_biases, + task, + ) + + def __init__(self, cfg: Data2VecMultiConfig, modalities, skip_ema=False, task=None): + super().__init__() + self.cfg = cfg + self.modalities = modalities + self.task = task + + make_layer_norm = partial( + nn.LayerNorm, eps=cfg.norm_eps, elementwise_affine=cfg.norm_affine + ) + + def make_block(drop_path, dim=None, heads=None): + return AltBlock( + cfg.embed_dim if dim is None else dim, + cfg.num_heads if heads is None else heads, + cfg.mlp_ratio, + qkv_bias=True, + drop=cfg.encoder_dropout, + attn_drop=cfg.attention_dropout, + mlp_drop=cfg.activation_dropout, + post_mlp_drop=cfg.post_mlp_drop, + drop_path=drop_path, + norm_layer=make_layer_norm, + layer_norm_first=cfg.layer_norm_first, + ffn_targets=not cfg.end_of_block_targets, + ) + + self.alibi_biases = {} + self.modality_encoders = nn.ModuleDict() + for mod in self.modalities: + mod_cfg = getattr(cfg.modalities, mod.name.lower()) + enc = self.make_modality_encoder( + mod_cfg, + cfg.embed_dim, + make_block, + make_layer_norm, + cfg.layer_norm_first, + self.alibi_biases, + task, + ) + self.modality_encoders[mod.name] = enc + + self.ema = None + + self.average_top_k_layers = cfg.average_top_k_layers + self.loss_beta = cfg.loss_beta + self.loss_scale = cfg.loss_scale + + self.dropout_input = nn.Dropout(cfg.dropout_input) + + dpr = np.linspace(cfg.start_drop_path_rate, cfg.end_drop_path_rate, cfg.depth) + + self.blocks = nn.ModuleList([make_block(dpr[i]) for i in range(cfg.depth)]) + + self.norm = None + if cfg.layer_norm_first: + self.norm = make_layer_norm(cfg.embed_dim) + + if self.cfg.mae_init: + self.apply(self._init_weights) + else: + from fairseq.modules.transformer_sentence_encoder import init_bert_params + + self.apply(init_bert_params) + + for mod_enc in self.modality_encoders.values(): + mod_enc.reset_parameters() + + if not skip_ema: + self.ema = self.make_ema_teacher(cfg.ema_decay) + self.shared_decoder = ( + Decoder1d(cfg.shared_decoder, cfg.embed_dim) + if self.cfg.shared_decoder is not None + else None + ) + if self.shared_decoder is not None: + self.shared_decoder.apply(self._init_weights) + + self.recon_proj = None + if cfg.recon_loss > 0: + self.recon_proj = nn.Linear(cfg.embed_dim, cfg.embed_dim) + + for pn, p in self.named_parameters(): + if len(p.shape) == 1 or pn.endswith(".bias") or "alibi_scale" in pn: + p.optim_overrides = {"optimizer": {"weight_decay_scale": 0}} + if cfg.decoder_group and "decoder" in pn: + p.param_group = "decoder" + + self.num_updates = 0 + + def _init_weights(self, m): + + try: + from apex.normalization import FusedLayerNorm + + fn = FusedLayerNorm + except: + fn = nn.LayerNorm + + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm) or isinstance(m, fn): + if m.bias is not None: + nn.init.constant_(m.bias, 0) + if m.weight is not None: + nn.init.constant_(m.weight, 1.0) + + @torch.no_grad() + def make_ema_teacher(self, ema_decay): + ema_config = EMAModuleConfig( + ema_decay=ema_decay, + ema_fp32=True, + log_norms=self.cfg.log_norms, + add_missing_params=False, + ) + + model_copy = self.make_target_model() + + return EMAModule( + model_copy, + ema_config, + copy_model=False, + ) + + def make_target_model(self): + logger.info("making target model") + + model_copy = Data2VecMultiModel( + self.cfg, self.modalities, skip_ema=True, task=self.task + ) + + if self.cfg.ema_encoder_only: + model_copy = model_copy.blocks + for p_s, p_t in zip(self.blocks.parameters(), model_copy.parameters()): + p_t.data.copy_(p_s.data) + else: + for p_s, p_t in zip(self.parameters(), model_copy.parameters()): + p_t.data.copy_(p_s.data) + + for mod_enc in model_copy.modality_encoders.values(): + mod_enc.decoder = None + if not mod_enc.modality_cfg.ema_local_encoder: + mod_enc.local_encoder = None + mod_enc.project_features = None + + model_copy.requires_grad_(False) + return model_copy + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + + if self.ema is not None and ( + (self.num_updates == 0 and num_updates > 1) + or self.num_updates >= num_updates + ): + pass + elif self.training and self.ema is not None: + ema_weight_decay = None + if self.cfg.ema_decay != self.cfg.ema_end_decay: + if num_updates >= self.cfg.ema_anneal_end_step: + decay = self.cfg.ema_end_decay + else: + decay = get_annealed_rate( + self.cfg.ema_decay, + self.cfg.ema_end_decay, + num_updates, + self.cfg.ema_anneal_end_step, + ) + self.ema.set_decay(decay, weight_decay=ema_weight_decay) + if self.ema.get_decay() < 1: + self.ema.step(self.blocks if self.cfg.ema_encoder_only else self) + + self.num_updates = num_updates + + def state_dict(self, destination=None, prefix="", keep_vars=False): + state = super().state_dict(destination, prefix, keep_vars) + + if self.ema is not None: + state[prefix + "_ema"] = self.ema.fp32_params + + return state + + def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs): + k = prefix + "_ema" + if self.ema is not None: + assert k in state_dict + self.ema.restore(state_dict[k], True) + del state_dict[k] + elif k in state_dict: + del state_dict[k] + + return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs) + + @classmethod + def build_model(cls, cfg: Data2VecMultiConfig, task=None): + """Build a new model instance.""" + if task is None or not hasattr(task, "supported_modalities"): + modalities = ( + [cfg.supported_modality] + if cfg.supported_modality is not None + else [ + Modality.AUDIO, + Modality.IMAGE, + Modality.TEXT, + ] + ) + else: + modalities = task.supported_modalities + return cls(cfg, modalities, task=task, skip_ema=cfg.skip_ema) + + def forward( + self, + source, + target=None, + id=None, + mode=None, + padding_mask=None, + mask=True, + features_only=False, + force_remove_masked=False, + remove_extra_tokens=True, + precomputed_mask=None, + ): + if mode is None: + assert self.cfg.supported_modality is not None + mode = self.cfg.supported_modality + + if isinstance(mode, Modality): + mode = mode.name + + feature_extractor = self.modality_encoders[mode] + + mask_seeds = None + if id is not None: + mask_seeds = MaskSeed(seed=self.cfg.seed, update=self.num_updates, ids=id) + + extractor_out = feature_extractor( + source, + padding_mask, + mask, + remove_masked=not features_only or force_remove_masked, + clone_batch=self.cfg.clone_batch if not features_only else 1, + mask_seeds=mask_seeds, + precomputed_mask=precomputed_mask, + ) + + x = extractor_out["x"] + encoder_mask = extractor_out["encoder_mask"] + masked_padding_mask = extractor_out["padding_mask"] + masked_alibi_bias = extractor_out.get("alibi_bias", None) + alibi_scale = extractor_out.get("alibi_scale", None) + + if self.dropout_input is not None: + x = self.dropout_input(x) + + layer_results = [] + for i, blk in enumerate(self.blocks): + if ( + not self.training + or self.cfg.layerdrop == 0 + or (np.random.random() > self.cfg.layerdrop) + ): + ab = masked_alibi_bias + if ab is not None and alibi_scale is not None: + scale = ( + alibi_scale[i] + if alibi_scale.size(0) > 1 + else alibi_scale.squeeze(0) + ) + ab = ab * scale.type_as(ab) + + x, lr = blk( + x, + padding_mask=masked_padding_mask, + alibi_bias=ab, + ) + if features_only: + layer_results.append(lr) + + if self.norm is not None: + x = self.norm(x) + + if features_only: + if remove_extra_tokens: + x = x[:, feature_extractor.modality_cfg.num_extra_tokens :] + if masked_padding_mask is not None: + masked_padding_mask = masked_padding_mask[ + :, feature_extractor.modality_cfg.num_extra_tokens : + ] + + return { + "x": x, + "padding_mask": masked_padding_mask, + "layer_results": layer_results, + "mask": encoder_mask, + } + + xs = [] + + if self.shared_decoder is not None: + dx = self.forward_decoder( + x, + feature_extractor, + self.shared_decoder, + encoder_mask, + ) + xs.append(dx) + if feature_extractor.decoder is not None: + dx = self.forward_decoder( + x, + feature_extractor, + feature_extractor.decoder, + encoder_mask, + ) + xs.append(dx) + orig_x = x + + assert len(xs) > 0 + + p = next(self.ema.model.parameters()) + device = x.device + dtype = x.dtype + ema_device = p.device + ema_dtype = p.dtype + + if not self.cfg.ema_same_dtype: + dtype = ema_dtype + + if ema_device != device or ema_dtype != dtype: + logger.info(f"adjusting ema dtype to {dtype} and device to {device}") + self.ema.model = self.ema.model.to(dtype=dtype, device=device) + ema_dtype = dtype + + def to_device(d): + for k, p in d.items(): + if isinstance(d[k], dict): + to_device(d[k]) + else: + d[k] = p.to(device=device) + + to_device(self.ema.fp32_params) + tm = self.ema.model + + with torch.no_grad(): + tm.eval() + + if self.cfg.ema_encoder_only: + assert target is None + ema_input = extractor_out["local_features"] + ema_input = feature_extractor.contextualized_features( + ema_input.to(dtype=ema_dtype), + padding_mask, + mask=False, + remove_masked=False, + ) + ema_blocks = tm + else: + ema_blocks = tm.blocks + if feature_extractor.modality_cfg.ema_local_encoder: + inp = ( + target.to(dtype=ema_dtype) + if target is not None + else source.to(dtype=ema_dtype) + ) + ema_input = tm.modality_encoders[mode]( + inp, + padding_mask, + mask=False, + remove_masked=False, + ) + else: + assert target is None + ema_input = extractor_out["local_features"] + ema_feature_enc = tm.modality_encoders[mode] + ema_input = ema_feature_enc.contextualized_features( + ema_input.to(dtype=ema_dtype), + padding_mask, + mask=False, + remove_masked=False, + ) + + ema_padding_mask = ema_input["padding_mask"] + ema_alibi_bias = ema_input.get("alibi_bias", None) + ema_alibi_scale = ema_input.get("alibi_scale", None) + ema_input = ema_input["x"] + + y = [] + ema_x = [] + extra_tokens = feature_extractor.modality_cfg.num_extra_tokens + for i, blk in enumerate(ema_blocks): + ab = ema_alibi_bias + if ab is not None and alibi_scale is not None: + scale = ( + ema_alibi_scale[i] + if ema_alibi_scale.size(0) > 1 + else ema_alibi_scale.squeeze(0) + ) + ab = ab * scale.type_as(ab) + + ema_input, lr = blk( + ema_input, + padding_mask=ema_padding_mask, + alibi_bias=ab, + ) + y.append(lr[:, extra_tokens:]) + ema_x.append(ema_input[:, extra_tokens:]) + + y = self.make_targets(y, self.average_top_k_layers) + orig_targets = y + + if self.cfg.clone_batch > 1: + y = y.repeat_interleave(self.cfg.clone_batch, 0) + + masked = encoder_mask.mask.unsqueeze(-1) + masked_b = encoder_mask.mask.bool() + y = y[masked_b] + + if xs[0].size(1) == masked_b.size(1): + xs = [x[masked_b] for x in xs] + else: + xs = [x.reshape(-1, x.size(-1)) for x in xs] + + sample_size = masked.sum().long() + + result = { + "losses": {}, + "sample_size": sample_size, + } + + sample_size = result["sample_size"] + + if self.cfg.cls_loss > 0: + assert extra_tokens > 0 + cls_target = orig_targets.mean(dim=1) + if self.cfg.clone_batch > 1: + cls_target = cls_target.repeat_interleave(self.cfg.clone_batch, 0) + cls_pred = x[:, extra_tokens - 1] + result["losses"]["cls"] = self.d2v_loss(cls_pred, cls_target) * ( + self.cfg.cls_loss * sample_size + ) + + if self.cfg.recon_loss > 0: + + with torch.no_grad(): + target = feature_extractor.patchify(source) + mean = target.mean(dim=-1, keepdim=True) + var = target.var(dim=-1, keepdim=True) + target = (target - mean) / (var + 1.0e-6) ** 0.5 + + if self.cfg.clone_batch > 1: + target = target.repeat_interleave(self.cfg.clone_batch, 0) + + if masked_b is not None: + target = target[masked_b] + + recon = xs[0] + if self.recon_proj is not None: + recon = self.recon_proj(recon) + + result["losses"]["recon"] = ( + self.d2v_loss(recon, target.float()) * self.cfg.recon_loss + ) + + if self.cfg.d2v_loss > 0: + for i, x in enumerate(xs): + reg_loss = self.d2v_loss(x, y) + n = f"{mode}_regression_{i}" if len(xs) > 1 else f"{mode}_regression" + result["losses"][n] = reg_loss * self.cfg.d2v_loss + + suffix = "" if len(self.modalities) == 1 else f"_{mode}" + with torch.no_grad(): + if encoder_mask is not None: + result["masked_pct"] = 1 - ( + encoder_mask.ids_keep.size(1) / encoder_mask.ids_restore.size(1) + ) + for i, x in enumerate(xs): + n = f"pred_var{suffix}_{i}" if len(xs) > 1 else f"pred_var{suffix}" + result[n] = self.compute_var(x.float()) + if self.ema is not None: + for k, v in self.ema.logs.items(): + result[k] = v + + y = y.float() + result[f"target_var{suffix}"] = self.compute_var(y) + + if self.num_updates > 5000: + if result[f"target_var{suffix}"] < self.cfg.min_target_var: + logger.error( + f"target var is {result[f'target_var{suffix}'].item()} < {self.cfg.min_target_var}, exiting ({mode})" + ) + raise Exception( + f"target var is {result[f'target_var{suffix}'].item()} < {self.cfg.min_target_var}, exiting ({mode})" + ) + + for k in result.keys(): + if k.startswith("pred_var") and result[k] < self.cfg.min_pred_var: + logger.error( + f"{k} is {result[k].item()} < {self.cfg.min_pred_var}, exiting ({mode})" + ) + raise Exception( + f"{k} is {result[k].item()} < {self.cfg.min_pred_var}, exiting ({mode})" + ) + + result["ema_decay"] = self.ema.get_decay() * 1000 + + return result + + def forward_decoder( + self, + x, + feature_extractor, + decoder, + mask_info, + ): + x = feature_extractor.decoder_input(x, mask_info) + x = decoder(*x) + + return x + + def d2v_loss(self, x, y): + x = x.view(-1, x.size(-1)).float() + y = y.view(-1, x.size(-1)) + + if self.loss_beta == 0: + loss = F.mse_loss(x, y, reduction="none") + else: + loss = F.smooth_l1_loss(x, y, reduction="none", beta=self.loss_beta) + + if self.loss_scale is not None: + scale = self.loss_scale + else: + scale = 1 / math.sqrt(x.size(-1)) + + reg_loss = loss * scale + + return reg_loss + + def make_targets(self, y, num_layers): + + with torch.no_grad(): + target_layer_results = y[-num_layers:] + + permuted = False + if self.cfg.instance_norm_target_layer or self.cfg.batch_norm_target_layer: + target_layer_results = [ + tl.transpose(1, 2) for tl in target_layer_results # BTC -> BCT + ] + permuted = True + if self.cfg.batch_norm_target_layer: + target_layer_results = [ + F.batch_norm( + tl.float(), running_mean=None, running_var=None, training=True + ) + for tl in target_layer_results + ] + if self.cfg.instance_norm_target_layer: + target_layer_results = [ + F.instance_norm(tl.float()) for tl in target_layer_results + ] + if permuted: + target_layer_results = [ + tl.transpose(1, 2) for tl in target_layer_results # BCT -> BTC + ] + if self.cfg.layer_norm_target_layer: + target_layer_results = [ + F.layer_norm(tl.float(), tl.shape[-1:]) + for tl in target_layer_results + ] + + y = target_layer_results[0].float() + for tl in target_layer_results[1:]: + y.add_(tl.float()) + y = y.div_(len(target_layer_results)) + + if self.cfg.layer_norm_targets: + y = F.layer_norm(y, y.shape[-1:]) + + if self.cfg.instance_norm_targets: + y = F.instance_norm(y.transpose(1, 2)).transpose(1, 2) + + return y + + @staticmethod + def compute_var(y): + y = y.view(-1, y.size(-1)) + if dist.is_initialized(): + zc = torch.tensor(y.size(0)).cuda() + zs = y.sum(dim=0) + zss = (y**2).sum(dim=0) + + dist.all_reduce(zc) + dist.all_reduce(zs) + dist.all_reduce(zss) + + var = zss / (zc - 1) - (zs**2) / (zc * (zc - 1)) + return torch.sqrt(var + 1e-6).mean() + else: + return torch.sqrt(y.var(dim=0) + 1e-6).mean() + + def extract_features( + self, source, mode=None, padding_mask=None, mask=False, remove_extra_tokens=True + ): + res = self.forward( + source, + mode=mode, + padding_mask=padding_mask, + mask=mask, + features_only=True, + remove_extra_tokens=remove_extra_tokens, + ) + return res + + def remove_pretraining_modules(self, modality=None, keep_decoder=False): + self.ema = None + self.cfg.clone_batch = 1 + self.recon_proj = None + + if not keep_decoder: + self.shared_decoder = None + + modality = modality.lower() if modality is not None else None + for k in list(self.modality_encoders.keys()): + if modality is not None and k.lower() != modality: + del self.modality_encoders[k] + else: + self.modality_encoders[k].remove_pretraining_modules( + keep_decoder=keep_decoder + ) + if not keep_decoder: + self.modality_encoders[k].decoder = None diff --git a/fairseq/examples/data2vec/models/data2vec_audio.py b/fairseq/examples/data2vec/models/data2vec_audio.py new file mode 100644 index 0000000..261c2f1 --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec_audio.py @@ -0,0 +1,537 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from dataclasses import dataclass, field +from typing import Optional + +from omegaconf import II + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributed as dist + +from fairseq.modules import EMAModule, EMAModuleConfig +from fairseq.data.data_utils import compute_mask_indices +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec import ( + ConvFeatureExtractionModel, + Wav2Vec2Config, + TransformerEncoder, +) +from fairseq.modules import ( + GradMultiply, + LayerNorm, +) +from fairseq.utils import index_put + + +logger = logging.getLogger(__name__) + + +@dataclass +class Data2VecAudioConfig(Wav2Vec2Config): + + loss_beta: float = field( + default=0, metadata={"help": "beta for smooth l1 loss. 0 means use l2 loss"} + ) + loss_scale: Optional[float] = field( + default=None, + metadata={ + "help": "scale the reconstruction loss by this constant. if None then scales by 1/sqrt(dim)" + }, + ) + average_top_k_layers: int = field( + default=8, metadata={"help": "how many layers to average"} + ) + + layer_norm_target_layer: bool = False + instance_norm_target_layer: bool = False + instance_norm_targets: bool = False + layer_norm_targets: bool = False + batch_norm_target_layer: bool = False + group_norm_target_layer: bool = False + + ema_decay: float = field(default=0.999, metadata={"help": "initial ema decay rate"}) + ema_end_decay: float = field( + default=0.9999, metadata={"help": "final ema decay rate"} + ) + + # when to finish annealing ema decay rate + ema_anneal_end_step: int = II("optimization.max_update") + + ema_transformer_only: bool = field( + default=True, + metadata={"help": "whether to momentum update only the transformer"}, + ) + ema_layers_only: bool = field( + default=True, + metadata={"help": "whether to momentum update only the transformer layers"}, + ) + + max_update: int = II("optimization.max_update") + + min_target_var: float = field( + default=0.1, metadata={"help": "stop training if target var falls below this"} + ) + min_pred_var: float = field( + default=0.01, + metadata={"help": "stop training if prediction var falls below this"}, + ) + + +def get_annealed_rate(start, end, curr_step, total_steps): + r = end - start + pct_remaining = 1 - curr_step / total_steps + return end - r * pct_remaining + + +@register_model("data2vec_audio", dataclass=Data2VecAudioConfig) +class Data2VecAudioModel(BaseFairseqModel): + def __init__(self, cfg: Data2VecAudioConfig): + super().__init__() + self.cfg = cfg + + feature_enc_layers = eval(cfg.conv_feature_layers) + self.extractor_embed = feature_enc_layers[-1][0] + + self.ema = None + self.embed = cfg.encoder_embed_dim + + self.average_top_k_layers = cfg.average_top_k_layers + self.loss_beta = cfg.loss_beta + self.loss_scale = cfg.loss_scale + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + + self.post_extract_proj = nn.Linear(self.extractor_embed, cfg.encoder_embed_dim) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_before = cfg.mask_channel_before + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.extractor_embed) + + self.final_proj = nn.Linear(self.embed, self.embed) + + self.num_updates = 0 + + def make_ema_teacher(self): + ema_config = EMAModuleConfig( + ema_decay=self.cfg.ema_decay, + ema_fp32=True, + ) + skip_keys = set() + if self.cfg.ema_layers_only: + self.cfg.ema_transformer_only = True + for k, _ in self.encoder.pos_conv.named_parameters(): + skip_keys.add(f"pos_conv.{k}") + + self.ema = EMAModule( + self.encoder if self.cfg.ema_transformer_only else self, + ema_config, + skip_keys=skip_keys, + ) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + + if self.ema is None and self.final_proj is not None: + logger.info(f"making ema teacher") + self.make_ema_teacher() + elif self.training and self.ema is not None: + if self.cfg.ema_decay != self.cfg.ema_end_decay: + if num_updates >= self.cfg.ema_anneal_end_step: + decay = self.cfg.ema_end_decay + else: + decay = get_annealed_rate( + self.cfg.ema_decay, + self.cfg.ema_end_decay, + num_updates, + self.cfg.ema_anneal_end_step, + ) + self.ema.set_decay(decay) + if self.ema.get_decay() < 1: + self.ema.step(self.encoder if self.cfg.ema_transformer_only else self) + + self.num_updates = num_updates + + def state_dict(self, destination=None, prefix="", keep_vars=False): + state = super().state_dict(destination, prefix, keep_vars) + + if self.ema is not None: + state[prefix + "_ema"] = self.ema.fp32_params + + return state + + def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs): + if self.ema is not None: + k = prefix + "_ema" + assert k in state_dict + self.ema.restore(state_dict[k], True) + del state_dict[k] + return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs) + + @classmethod + def build_model(cls, cfg: Data2VecAudioConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def apply_mask( + self, + x, + padding_mask, + mask_indices=None, + mask_channel_indices=None, + ): + B, T, C = x.shape + + if self.mask_channel_prob > 0 and self.mask_channel_before: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + if self.mask_prob > 0: + if mask_indices is None: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=1, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + require_same_masks=self.cfg.require_same_masks, + mask_dropout=self.cfg.mask_dropout, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x = index_put(x, mask_indices, self.mask_emb) + else: + mask_indices = None + + if self.mask_channel_prob > 0 and not self.mask_channel_before: + if mask_channel_indices is None: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x = index_put(x, mask_channel_indices, 0) + + return x, mask_indices + + def _get_feat_extract_output_lengths(self, input_lengths: torch.LongTensor): + """ + Computes the output length of the convolutional layers + """ + + def _conv_out_length(input_length, kernel_size, stride): + return torch.floor((input_length - kernel_size) / stride + 1) + + conv_cfg_list = eval(self.cfg.conv_feature_layers) + + for i in range(len(conv_cfg_list)): + input_lengths = _conv_out_length( + input_lengths, conv_cfg_list[i][1], conv_cfg_list[i][2] + ) + + return input_lengths.to(torch.long) + + def forward( + self, + source, + padding_mask=None, + mask=True, + features_only=False, + layer=None, + mask_indices=None, + mask_channel_indices=None, + padding_count=None, + ): + features = source + + if self.feature_grad_mult > 0: + features = self.feature_extractor(features) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(features) + + features = features.transpose(1, 2) + + features = self.layer_norm(features) + + orig_padding_mask = padding_mask + + if padding_mask is not None and padding_mask.any(): + input_lengths = (1 - padding_mask.long()).sum(-1) + # apply conv formula to get real output_lengths + output_lengths = self._get_feat_extract_output_lengths(input_lengths) + + padding_mask = torch.zeros( + features.shape[:2], dtype=features.dtype, device=features.device + ) + + # these two operations makes sure that all values + # before the output lengths indices are attended to + padding_mask[ + ( + torch.arange(padding_mask.shape[0], device=padding_mask.device), + output_lengths - 1, + ) + ] = 1 + padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool() + else: + padding_mask = None + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + pre_encoder_features = None + if self.cfg.ema_transformer_only: + pre_encoder_features = features.clone() + + features = self.dropout_input(features) + + if mask: + x, mask_indices = self.apply_mask( + features, + padding_mask, + mask_indices=mask_indices, + mask_channel_indices=mask_channel_indices, + ) + else: + x = features + mask_indices = None + + x, layer_results = self.encoder( + x, + padding_mask=padding_mask, + layer=layer, + ) + + if features_only: + return { + "x": x, + "padding_mask": padding_mask, + "layer_results": layer_results, + } + + result = { + "losses": {}, + } + + with torch.no_grad(): + self.ema.model.eval() + + if self.cfg.ema_transformer_only: + y, layer_results = self.ema.model.extract_features( + pre_encoder_features, + padding_mask=padding_mask, + min_layer=self.cfg.encoder_layers - self.average_top_k_layers, + ) + y = { + "x": y, + "padding_mask": padding_mask, + "layer_results": layer_results, + } + else: + y = self.ema.model.extract_features( + source=source, + padding_mask=orig_padding_mask, + mask=False, + ) + + target_layer_results = [l[2] for l in y["layer_results"]] + + permuted = False + if self.cfg.instance_norm_target_layer or self.cfg.batch_norm_target_layer: + target_layer_results = [ + tl.permute(1, 2, 0) for tl in target_layer_results # TBC -> BCT + ] + permuted = True + + if self.cfg.batch_norm_target_layer: + target_layer_results = [ + F.batch_norm( + tl.float(), running_mean=None, running_var=None, training=True + ) + for tl in target_layer_results + ] + + if self.cfg.instance_norm_target_layer: + target_layer_results = [ + F.instance_norm(tl.float()) for tl in target_layer_results + ] + + if permuted: + target_layer_results = [ + tl.transpose(1, 2) for tl in target_layer_results # BCT -> BTC + ] + + if self.cfg.group_norm_target_layer: + target_layer_results = [ + F.layer_norm(tl.float(), tl.shape[-2:]) + for tl in target_layer_results + ] + + if self.cfg.layer_norm_target_layer: + target_layer_results = [ + F.layer_norm(tl.float(), tl.shape[-1:]) + for tl in target_layer_results + ] + + y = sum(target_layer_results) / len(target_layer_results) + + if self.cfg.layer_norm_targets: + y = F.layer_norm(y.float(), y.shape[-1:]) + + if self.cfg.instance_norm_targets: + y = F.instance_norm(y.float().transpose(1, 2)).transpose(1, 2) + + if not permuted: + y = y.transpose(0, 1) + + y = y[mask_indices] + + x = x[mask_indices] + x = self.final_proj(x) + + sz = x.size(-1) + + if self.loss_beta == 0: + loss = F.mse_loss(x.float(), y.float(), reduction="none").sum(dim=-1) + else: + loss = F.smooth_l1_loss( + x.float(), y.float(), reduction="none", beta=self.loss_beta + ).sum(dim=-1) + + if self.loss_scale is not None: + scale = self.loss_scale + else: + scale = 1 / math.sqrt(sz) + + result["losses"]["regression"] = loss.sum() * scale + + if "sample_size" not in result: + result["sample_size"] = loss.numel() + + with torch.no_grad(): + result["target_var"] = self.compute_var(y) + result["pred_var"] = self.compute_var(x.float()) + + if self.num_updates > 5000 and result["target_var"] < self.cfg.min_target_var: + logger.error( + f"target var is {result['target_var'].item()} < {self.cfg.min_target_var}, exiting" + ) + raise Exception( + f"target var is {result['target_var'].item()} < {self.cfg.min_target_var}, exiting" + ) + if self.num_updates > 5000 and result["pred_var"] < self.cfg.min_pred_var: + logger.error( + f"pred var is {result['pred_var'].item()} < {self.cfg.min_pred_var}, exiting" + ) + raise Exception( + f"pred var is {result['pred_var'].item()} < {self.cfg.min_pred_var}, exiting" + ) + + if self.ema is not None: + result["ema_decay"] = self.ema.get_decay() * 1000 + + return result + + @staticmethod + def compute_var(y): + y = y.view(-1, y.size(-1)) + if dist.is_initialized(): + zc = torch.tensor(y.size(0)).cuda() + zs = y.sum(dim=0) + zss = (y ** 2).sum(dim=0) + + dist.all_reduce(zc) + dist.all_reduce(zs) + dist.all_reduce(zss) + + var = zss / (zc - 1) - (zs ** 2) / (zc * (zc - 1)) + return torch.sqrt(var + 1e-6).mean() + else: + return torch.sqrt(y.var(dim=0) + 1e-6).mean() + + def extract_features( + self, source, padding_mask, mask=False, layer=None + ): + res = self.forward( + source, + padding_mask, + mask=mask, + features_only=True, + layer=layer, + ) + return res + + def remove_pretraining_modules(self, last_layer=None): + self.final_proj = None + self.ema = None + if last_layer is not None: + self.encoder.layers = nn.ModuleList( + l for i, l in enumerate(self.encoder.layers) if i <= last_layer + ) diff --git a/fairseq/examples/data2vec/models/data2vec_image_classification.py b/fairseq/examples/data2vec/models/data2vec_image_classification.py new file mode 100644 index 0000000..851c9ce --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec_image_classification.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# The code in this file is adapted from the BeiT implementation which can be found here: +# https://github.com/microsoft/unilm/tree/master/beit + +import logging + +from dataclasses import dataclass +from typing import Any + +from omegaconf import II, MISSING + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import checkpoint_utils, tasks + +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model + + +logger = logging.getLogger(__name__) + + +@dataclass +class Data2VecImageClassificationConfig(FairseqDataclass): + model_path: str = MISSING + no_pretrained_weights: bool = False + num_classes: int = 1000 + mixup: float = 0.8 + cutmix: float = 1.0 + label_smoothing: float = 0.1 + + pretrained_model_args: Any = None + data: str = II("task.data") + + +@register_model( + "data2vec_image_classification", dataclass=Data2VecImageClassificationConfig +) +class Data2VecImageClassificationModel(BaseFairseqModel): + def __init__(self, cfg: Data2VecImageClassificationConfig): + super().__init__() + self.cfg = cfg + + if cfg.pretrained_model_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.model_path, {}) + pretrained_args = state.get("cfg", None) + pretrained_args.criterion = None + pretrained_args.lr_scheduler = None + cfg.pretrained_model_args = pretrained_args + + logger.info(pretrained_args) + else: + state = None + pretrained_args = cfg.pretrained_model_args + + pretrained_args.task.data = cfg.data + task = tasks.setup_task(pretrained_args.task) + model = task.build_model(pretrained_args.model, from_checkpoint=True) + + model.remove_pretraining_modules() + + self.model = model + + if state is not None and not cfg.no_pretrained_weights: + self.load_model_weights(state, model, cfg) + + self.fc_norm = nn.LayerNorm(pretrained_args.model.embed_dim) + self.head = nn.Linear(pretrained_args.model.embed_dim, cfg.num_classes) + + self.head.weight.data.mul_(1e-3) + self.head.bias.data.mul_(1e-3) + + self.mixup_fn = None + + if cfg.mixup > 0 or cfg.cutmix > 0: + from timm.data import Mixup + + self.mixup_fn = Mixup( + mixup_alpha=cfg.mixup, + cutmix_alpha=cfg.cutmix, + cutmix_minmax=None, + prob=1.0, + switch_prob=0.5, + mode="batch", + label_smoothing=cfg.label_smoothing, + num_classes=cfg.num_classes, + ) + + def load_model_weights(self, state, model, cfg): + if "_ema" in state["model"]: + del state["model"]["_ema"] + model.load_state_dict(state["model"], strict=True) + + @classmethod + def build_model(cls, cfg: Data2VecImageClassificationConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def forward( + self, + img, + label=None, + ): + if self.training and self.mixup_fn is not None and label is not None: + img, label = self.mixup_fn(img, label) + + x = self.model(img, mask=False) + x = x[:, 1:] + x = self.fc_norm(x.mean(1)) + x = self.head(x) + + if label is None: + return x + + if self.training and self.mixup_fn is not None: + loss = -label * F.log_softmax(x.float(), dim=-1) + else: + loss = F.cross_entropy( + x.float(), + label, + label_smoothing=self.cfg.label_smoothing if self.training else 0, + reduction="none", + ) + + result = { + "losses": {"regression": loss}, + "sample_size": img.size(0), + } + + if not self.training: + with torch.no_grad(): + pred = x.argmax(-1) + correct = (pred == label).sum() + result["correct"] = correct + + return result diff --git a/fairseq/examples/data2vec/models/data2vec_text.py b/fairseq/examples/data2vec/models/data2vec_text.py new file mode 100644 index 0000000..cb3c8b3 --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec_text.py @@ -0,0 +1,517 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import Optional +import logging +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.modules import EMAModule, EMAModuleConfig +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + register_model, +) +from fairseq.models.roberta.model import RobertaLMHead, RobertaClassificationHead +from fairseq.models.transformer import TransformerEncoder, TransformerConfig +from fairseq.modules.transformer_sentence_encoder import init_bert_params + +logger = logging.getLogger(__name__) + + +@dataclass +class Data2VecTextConfig(FairseqDataclass): + max_positions: int = II("task.tokens_per_sample") + + head_layers: int = 1 + + transformer: TransformerConfig = TransformerConfig() + + load_checkpoint_heads: bool = field( + default=False, + metadata={"help": "(re-)register and load heads when loading checkpoints"}, + ) + + loss_beta: float = field( + default=0, metadata={"help": "beta for smooth l1 loss. 0 means use l2 loss"} + ) + loss_scale: Optional[float] = field( + default=None, + metadata={ + "help": "scale the reconstruction loss by this constant. if None then scales by 1/sqrt(dim)" + }, + ) + average_top_k_layers: int = field( + default=8, metadata={"help": "how many layers to average"} + ) + + layer_norm_target_layer: bool = False + instance_norm_target_layer: bool = False + batch_norm_target_layer: bool = False + instance_norm_targets: bool = False + layer_norm_targets: bool = False + + ema_decay: float = field(default=0.999, metadata={"help": "initial ema decay rate"}) + ema_end_decay: float = field( + default=0.9999, metadata={"help": "final ema decay rate"} + ) + + # when to finish annealing ema decay rate + ema_anneal_end_step: int = II("optimization.max_update") + + ema_transformer_layers_only: bool = field( + default=True, + metadata={"help": "whether to momentum update only the transformer layers"}, + ) + + +def get_annealed_rate(start, end, curr_step, total_steps): + r = end - start + pct_remaining = 1 - curr_step / total_steps + return end - r * pct_remaining + + +@register_model("data2vec_text", dataclass=Data2VecTextConfig) +class Data2VecTextModel(FairseqEncoderModel): + def __init__(self, cfg: Data2VecTextConfig, encoder): + super().__init__(encoder) + self.cfg = cfg + + # We follow BERT's random weight initialization + self.apply(init_bert_params) + + self.classification_heads = nn.ModuleDict() + + @classmethod + def build_model(cls, cfg, task): + """Build a new model instance.""" + + encoder = Data2VecTextEncoder(cfg, task.source_dictionary, task.cfg.data) + + return cls(cfg, encoder) + + def forward( + self, + src_tokens, + target_tokens=None, + features_only=False, + return_all_hiddens=False, + classification_head_name=None, + **kwargs, + ): + if classification_head_name is not None: + features_only = True + + res = self.encoder( + src_tokens, target_tokens, features_only, return_all_hiddens, **kwargs + ) + + if isinstance(res, tuple): + x, extra = res + else: + return res + + if classification_head_name is not None: + x = self.classification_heads[classification_head_name](x) + return x, extra + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + logits = net_output[0].float() + if log_probs: + return F.log_softmax(logits, dim=-1) + else: + return F.softmax(logits, dim=-1) + + def register_classification_head( + self, name, num_classes=None, inner_dim=None, **kwargs + ): + """Register a classification head.""" + if name in self.classification_heads: + prev_num_classes = self.classification_heads[name].out_proj.out_features + prev_inner_dim = self.classification_heads[name].dense.out_features + if num_classes != prev_num_classes or inner_dim != prev_inner_dim: + logger.warning( + 're-registering head "{}" with num_classes {} (prev: {}) ' + "and inner_dim {} (prev: {})".format( + name, num_classes, prev_num_classes, inner_dim, prev_inner_dim + ) + ) + self.classification_heads[name] = RobertaClassificationHead( + input_dim=self.cfg.transformer.encoder.embed_dim, + inner_dim=inner_dim or self.cfg.transformer.encoder.embed_dim, + num_classes=num_classes, + activation_fn="tanh", + pooler_dropout=0, + ) + + @property + def supported_targets(self): + return {"self"} + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + + # rename decoder -> encoder before upgrading children modules + for k in list(state_dict.keys()): + if k.startswith(prefix + "decoder"): + new_k = prefix + "encoder" + k[len(prefix + "decoder") :] + state_dict[new_k] = state_dict[k] + del state_dict[k] + + # rename emb_layer_norm -> layernorm_embedding + for k in list(state_dict.keys()): + if ".emb_layer_norm." in k: + new_k = k.replace(".emb_layer_norm.", ".layernorm_embedding.") + state_dict[new_k] = state_dict[k] + del state_dict[k] + + if self.encoder.regression_head is not None: + if ".lm_head." in k: + new_k = k.replace(".lm_head.", ".regression_head.") + state_dict[new_k] = state_dict[k] + del state_dict[k] + else: + if ".regression_head." in k: + del state_dict[k] + + # upgrade children modules + super().upgrade_state_dict_named(state_dict, name) + + # Handle new classification heads present in the state dict. + current_head_names = ( + [] + if not hasattr(self, "classification_heads") + or self.classification_heads is None + else self.classification_heads.keys() + ) + keys_to_delete = [] + for k in state_dict.keys(): + if not k.startswith(prefix + "classification_heads."): + continue + + head_name = k[len(prefix + "classification_heads.") :].split(".")[0] + num_classes = state_dict[ + prefix + "classification_heads." + head_name + ".out_proj.weight" + ].size(0) + inner_dim = state_dict[ + prefix + "classification_heads." + head_name + ".dense.weight" + ].size(0) + + if self.cfg.load_checkpoint_heads: + if head_name not in current_head_names: + self.register_classification_head(head_name, num_classes, inner_dim) + else: + if head_name not in current_head_names: + logger.warning( + "deleting classification head ({}) from checkpoint " + "not present in current model: {}".format(head_name, k) + ) + keys_to_delete.append(k) + elif ( + num_classes + != self.classification_heads[head_name].out_proj.out_features + or inner_dim + != self.classification_heads[head_name].dense.out_features + ): + logger.warning( + "deleting classification head ({}) from checkpoint " + "with different dimensions than current model: {}".format( + head_name, k + ) + ) + keys_to_delete.append(k) + for k in keys_to_delete: + del state_dict[k] + + # Copy any newly-added classification heads into the state dict + # with their current weights. + if ( + hasattr(self, "classification_heads") + and self.classification_heads is not None + and len(self.classification_heads) > 0 + ): + cur_state = self.classification_heads.state_dict() + for k, v in cur_state.items(): + if prefix + "classification_heads." + k not in state_dict: + logger.info("Overwriting " + prefix + "classification_heads." + k) + state_dict[prefix + "classification_heads." + k] = v + + for k in list(state_dict.keys()): + if k.startswith(prefix + "encoder.lm_head.") or k.startswith( + prefix + "encoder.emb_head." + ): + del state_dict[k] + + self.encoder.lm_head = None + + if self.encoder.target_model is None: + for k in list(state_dict.keys()): + if k.startswith(prefix + "encoder.target_model."): + del state_dict[k] + + if (self.encoder.ema is None) and (prefix + "encoder._ema" in state_dict): + del state_dict[prefix + "encoder._ema"] + + def remove_pretraining_modules(self, last_layer=None): + self.encoder.lm_head = None + self.encoder.regression_head = None + self.encoder.ema = None + self.classification_heads = None + + if last_layer is not None: + self.encoder.sentence_encoder.layers = nn.ModuleList( + l + for i, l in enumerate(self.encoder.sentence_encoder.layers) + if i <= last_layer + ) + self.encoder.sentence_encoder.layer_norm = None + + +class Data2VecTextEncoder(FairseqEncoder): + def __init__(self, cfg: Data2VecTextConfig, dictionary, task_data): + super().__init__(dictionary) + + self.cfg = cfg + + embed_tokens = self.build_embedding( + len(dictionary), cfg.transformer.encoder.embed_dim, dictionary.pad() + ) + + self.sentence_encoder = self.build_encoder(cfg, dictionary, embed_tokens) + self.mask_idx = dictionary.index("") + assert self.mask_idx != dictionary.unk(), dictionary.symbols + + self.ema = None + self.average_top_k_layers = cfg.average_top_k_layers + self.loss_scale = cfg.loss_scale + + assert self.cfg.head_layers >= 1 + + embed_dim = cfg.transformer.encoder.embed_dim + curr_dim = embed_dim + projs = [] + for i in range(self.cfg.head_layers - 1): + next_dim = embed_dim * 2 if i == 0 else curr_dim + projs.append(nn.Linear(curr_dim, next_dim)) + projs.append(nn.GELU()) + curr_dim = next_dim + + projs.append(nn.Linear(curr_dim, embed_dim)) + self.regression_head = nn.Sequential(*projs) + + self.num_updates = 0 + + def build_embedding(self, vocab_size, embedding_dim, padding_idx): + return nn.Embedding(vocab_size, embedding_dim, padding_idx) + + def build_encoder(self, cfg, dictionary, embed_tokens): + encoder = TransformerEncoder(cfg.transformer, dictionary, embed_tokens, return_fc=True) + encoder.apply(init_bert_params) + return encoder + + def build_lm_head(self, embed_dim, output_dim, activation_fn, weight): + return RobertaLMHead(embed_dim, output_dim, activation_fn, weight) + + def make_ema_teacher(self): + ema_config = EMAModuleConfig( + ema_decay=self.cfg.ema_decay, + ema_fp32=True, + ) + skip_keys = set() + if self.cfg.ema_transformer_layers_only: + for k, _ in self.sentence_encoder.embed_positions.named_parameters(): + skip_keys.add(f"embed_tokens.{k}") + for k, _ in self.sentence_encoder.embed_positions.named_parameters(): + skip_keys.add(f"embed_positions.{k}") + if self.sentence_encoder.layernorm_embedding is not None: + for ( + k, + _, + ) in self.sentence_encoder.layernorm_embedding.named_parameters(): + skip_keys.add(f"layernorm_embedding.{k}") + if self.sentence_encoder.layer_norm is not None: + for k, _ in self.sentence_encoder.layer_norm.named_parameters(): + skip_keys.add(f"layernorm_embedding.{k}") + + self.ema = EMAModule( + self.sentence_encoder, + ema_config, + skip_keys=skip_keys, + ) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + + if self.ema is None and self.regression_head is not None: + logger.info(f"making ema teacher") + self.make_ema_teacher() + elif self.training and self.ema is not None: + if self.cfg.ema_decay != self.cfg.ema_end_decay: + if num_updates >= self.cfg.ema_anneal_end_step: + decay = self.cfg.ema_end_decay + else: + decay = get_annealed_rate( + self.cfg.ema_decay, + self.cfg.ema_end_decay, + num_updates, + self.cfg.ema_anneal_end_step, + ) + self.ema.set_decay(decay) + if self.ema.get_decay() < 1: + self.ema.step(self.sentence_encoder) + + def state_dict(self, destination=None, prefix="", keep_vars=False): + state = super().state_dict(destination, prefix, keep_vars) + if self.ema is not None: + state[prefix + "_ema"] = self.ema.fp32_params + return state + + def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs): + if self.ema is not None: + k = prefix + "_ema" + assert k in state_dict + self.ema.restore(state_dict[k], True) + del state_dict[k] + return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs) + + def forward( + self, + src_tokens, + target_tokens=None, + features_only=False, + return_all_hiddens=False, + masked_tokens=None, + **unused, + ): + """ + Args: + src_tokens (LongTensor): input tokens of shape `(batch, src_len)` + features_only (bool, optional): skip LM head and just return + features. If True, the output will be of shape + `(batch, src_len, embed_dim)`. + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + + Returns: + tuple: + - the LM output of shape `(batch, src_len, vocab)` + - a dictionary of additional data, where 'inner_states' + is a list of hidden states. Note that the hidden + states have shape `(src_len, batch, vocab)`. + """ + + x, extra = self.extract_features( + src_tokens, return_all_hiddens=return_all_hiddens + ) + + if features_only: + return x, extra + + assert target_tokens is not None + + with torch.no_grad(): + # use EMA parameter as the teacher + self.ema.model.eval() + + encoder_out = self.ema.model( + target_tokens, + return_all_hiddens=True, + ) + y = encoder_out["fc_results"] + + y = y[-self.average_top_k_layers :] + + permuted = False + if self.cfg.instance_norm_target_layer or self.cfg.batch_norm_target_layer: + y = [tl.permute(1, 2, 0) for tl in y] # TBC -> BCT + permuted = True + + if self.cfg.batch_norm_target_layer: + y = [ + F.batch_norm( + tl.float(), running_mean=None, running_var=None, training=True + ) + for tl in y + ] + + if self.cfg.instance_norm_target_layer: + y = [F.instance_norm(tl.float()) for tl in y] + + if permuted: + y = [tl.transpose(1, 2) for tl in y] # BCT -> BTC + + if self.cfg.layer_norm_target_layer: + y = [F.layer_norm(tl.float(), tl.shape[-1:]) for tl in y] + + y = sum(y) / len(y) + + if not permuted: + y = y.transpose(0, 1) + + if self.cfg.layer_norm_targets: + y = F.layer_norm(y.float(), y.shape[-1:]) + + if self.cfg.instance_norm_targets: + y = F.instance_norm(y.transpose(1, 2)).transpose(1, 2) + + masked_indices = src_tokens.eq(self.mask_idx) + + x = x[masked_indices] + y = y[masked_indices] + + x = self.regression_head(x) + + sz = x.size(-1) + if self.cfg.loss_beta == 0: + loss = F.mse_loss(x.float(), y.float(), reduction="none").sum(dim=-1) + else: + loss = F.smooth_l1_loss( + x.float(), y.float(), reduction="none", beta=self.cfg.loss_beta + ).sum(dim=-1) + + result = { + "losses": { + "main": loss.sum() / math.sqrt(sz) + if self.loss_scale <= 0 + else loss.sum() * self.loss_scale, + }, + "sample_size": loss.numel(), + } + + # logging other values + other_logs = { + "ema_decay": self.ema.get_decay() * 1000 + } + result["logs"] = other_logs + return result + + def extract_features(self, src_tokens, return_all_hiddens=False, **kwargs): + encoder_out = self.sentence_encoder( + src_tokens, + return_all_hiddens=return_all_hiddens, + token_embeddings=kwargs.get("token_embeddings", None), + ) + # T x B x C -> B x T x C + features = encoder_out["encoder_out"][0].transpose(0, 1) + inner_states = encoder_out["encoder_states"] if return_all_hiddens else None + return features, { + "inner_states": inner_states, + "encoder_embedding": encoder_out["encoder_embedding"][0], + } + + def output_layer(self, features, masked_tokens=None, **unused): + return self.lm_head(features, masked_tokens) + + def max_positions(self): + """Maximum output length supported by the encoder.""" + return self.cfg.max_positions diff --git a/fairseq/examples/data2vec/models/data2vec_text_classification.py b/fairseq/examples/data2vec/models/data2vec_text_classification.py new file mode 100644 index 0000000..e787b91 --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec_text_classification.py @@ -0,0 +1,141 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# The code in this file is adapted from the BeiT implementation which can be found here: +# https://github.com/microsoft/unilm/tree/master/beit + +import logging + +from dataclasses import dataclass +from typing import Any + +from omegaconf import II, MISSING + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import checkpoint_utils, tasks + +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.roberta.model import RobertaClassificationHead + +from examples.data2vec.data.modality import Modality + + +logger = logging.getLogger(__name__) + + +@dataclass +class Data2VecTextClassificationConfig(FairseqDataclass): + pooler_dropout: float = 0.0 + pooler_activation_fn: str = "tanh" + quant_noise_pq: int = 0 + quant_noise_pq_block_size: int = 8 + spectral_norm_classification_head: bool = False + + model_path: str = MISSING + no_pretrained_weights: bool = False + + pretrained_model_args: Any = None + + +@register_model( + "data2vec_text_classification", dataclass=Data2VecTextClassificationConfig +) +class Data2VecTextClassificationModel(BaseFairseqModel): + def __init__(self, cfg: Data2VecTextClassificationConfig): + super().__init__() + self.cfg = cfg + + if cfg.pretrained_model_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.model_path, {}) + pretrained_args = state.get("cfg", None) + pretrained_args.criterion = None + pretrained_args.lr_scheduler = None + cfg.pretrained_model_args = pretrained_args + + logger.info(pretrained_args) + else: + state = None + pretrained_args = cfg.pretrained_model_args + + task = tasks.setup_task(pretrained_args.task) + model = task.build_model(pretrained_args.model, from_checkpoint=True) + + model.remove_pretraining_modules() + + self.model = model + + if state is not None and not cfg.no_pretrained_weights: + self.load_model_weights(state, model, cfg) + + self.classification_heads = nn.ModuleDict() + + + def load_model_weights(self, state, model, cfg): + for k in list(state["model"].keys()): + if ( + k.startswith("shared_decoder") or + k.startswith("_ema") or + "decoder" in k + ): + logger.info(f"Deleting {k} from checkpoint") + del state["model"][k] + model.load_state_dict(state["model"], strict=True) + + @classmethod + def build_model(cls, cfg: Data2VecTextClassificationConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def register_classification_head( + self, name, num_classes=None, inner_dim=None, **kwargs + ): + """Register a classification head.""" + if name in self.classification_heads: + prev_num_classes = self.classification_heads[name].out_proj.out_features + prev_inner_dim = self.classification_heads[name].dense.out_features + if num_classes != prev_num_classes or inner_dim != prev_inner_dim: + logger.warning( + 're-registering head "{}" with num_classes {} (prev: {}) ' + "and inner_dim {} (prev: {})".format( + name, num_classes, prev_num_classes, inner_dim, prev_inner_dim + ) + ) + embed_dim = self.cfg.pretrained_model_args.model.embed_dim + self.classification_heads[name] = RobertaClassificationHead( + input_dim=embed_dim, + inner_dim=inner_dim or embed_dim, + num_classes=num_classes, + activation_fn=self.cfg.pooler_activation_fn, + pooler_dropout=self.cfg.pooler_dropout, + q_noise=self.cfg.quant_noise_pq, + qn_block_size=self.cfg.quant_noise_pq_block_size, + do_spectral_norm=self.cfg.spectral_norm_classification_head, + ) + + def forward( + self, + source, + id, + padding_mask, + features_only=True, + remove_extra_tokens=True, + classification_head_name=None, + ): + encoder_out = self.model( + source, + id=id, + mode=Modality.TEXT, + padding_mask=padding_mask, + mask=False, + features_only=features_only, + remove_extra_tokens=remove_extra_tokens + ) + logits = self.classification_heads[classification_head_name](encoder_out["x"]) + return logits, encoder_out diff --git a/fairseq/examples/data2vec/models/data2vec_vision.py b/fairseq/examples/data2vec/models/data2vec_vision.py new file mode 100644 index 0000000..2f89894 --- /dev/null +++ b/fairseq/examples/data2vec/models/data2vec_vision.py @@ -0,0 +1,727 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# The code in this file is adapted from the BeiT implementation which can be found here: +# https://github.com/microsoft/unilm/tree/master/beit + +import logging +import math +import numpy as np +import random + +from dataclasses import dataclass, field +from typing import Optional + +from omegaconf import II + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.distributed as dist + +from fairseq.modules import EMAModule, EMAModuleConfig +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model + + +logger = logging.getLogger(__name__) + + +@dataclass +class Data2VecVisionConfig(FairseqDataclass): + layer_scale_init_value: float = field( + default=1e-4, metadata={"help": "rescale layer outputs, 0 to disable"} + ) + num_mask_patches: int = field( + default=75, + metadata={"help": "number of the visual tokens/patches need be masked"}, + ) + min_mask_patches_per_block: int = 16 + max_mask_patches_per_block: int = 196 + image_size: int = 224 + patch_size: int = 16 + in_channels: int = 3 + + shared_rel_pos_bias: bool = True + + drop_path: float = 0.1 + attention_dropout: float = 0.0 + + depth: int = 12 + embed_dim: int = 768 + num_heads: int = 12 + mlp_ratio: int = 4 + + loss_beta: float = field( + default=0, metadata={"help": "beta for smooth l1 loss. 0 means use l2 loss"} + ) + loss_scale: Optional[float] = field( + default=None, + metadata={ + "help": "scale the reconstruction loss by this constant. if None then scales by 1/sqrt(dim)" + }, + ) + average_top_k_layers: int = field( + default=8, metadata={"help": "how many layers to average"} + ) + + end_of_block_targets: bool = True + layer_norm_target_layer: bool = False + instance_norm_target_layer: bool = False + batch_norm_target_layer: bool = False + instance_norm_targets: bool = False + layer_norm_targets: bool = False + + ema_decay: float = field(default=0.999, metadata={"help": "initial ema decay rate"}) + ema_end_decay: float = field( + default=0.9999, metadata={"help": "final ema decay rate"} + ) + + # when to finish annealing ema decay rate + ema_anneal_end_step: int = II("optimization.max_update") + + ema_transformer_only: bool = field( + default=True, + metadata={"help": "whether to momentum update only the transformer layers"}, + ) + + +def get_annealed_rate(start, end, curr_step, total_steps): + r = end - start + pct_remaining = 1 - curr_step / total_steps + return end - r * pct_remaining + + +@register_model("data2vec_vision", dataclass=Data2VecVisionConfig) +class Data2VecVisionModel(BaseFairseqModel): + def __init__(self, cfg: Data2VecVisionConfig): + super().__init__() + self.cfg = cfg + + self.ema = None + + self.average_top_k_layers = cfg.average_top_k_layers + self.loss_beta = cfg.loss_beta + self.loss_scale = ( + cfg.loss_scale + if cfg.loss_scale is not None + else 1 / math.sqrt(cfg.embed_dim) + ) + + self.patch_embed = PatchEmbed( + img_size=cfg.image_size, + patch_size=cfg.patch_size, + in_chans=cfg.in_channels, + embed_dim=cfg.embed_dim, + ) + + patch_size = self.patch_embed.patch_size + self.window_size = ( + cfg.image_size // patch_size[0], + cfg.image_size // patch_size[1], + ) + + self.cls_emb = nn.Parameter(torch.FloatTensor(1, 1, cfg.embed_dim)) + self.mask_emb = nn.Parameter(torch.FloatTensor(1, 1, cfg.embed_dim)) + + nn.init.trunc_normal_(self.cls_emb, 0.02) + nn.init.trunc_normal_(self.mask_emb, 0.02) + + self.encoder = TransformerEncoder(cfg, self.patch_embed.patch_shape) + + self.final_proj = nn.Linear(cfg.embed_dim, cfg.embed_dim) + self.num_updates = 0 + + def make_ema_teacher(self): + ema_config = EMAModuleConfig( + ema_decay=self.cfg.ema_decay, + ema_fp32=True, + ) + self.ema = EMAModule( + self.encoder if self.cfg.ema_transformer_only else self, + ema_config, + ) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + + if self.ema is None and self.final_proj is not None: + logger.info(f"making ema teacher") + self.make_ema_teacher() + elif self.training and self.ema is not None: + if self.cfg.ema_decay != self.cfg.ema_end_decay: + if num_updates >= self.cfg.ema_anneal_end_step: + decay = self.cfg.ema_end_decay + else: + decay = get_annealed_rate( + self.cfg.ema_decay, + self.cfg.ema_end_decay, + num_updates, + self.cfg.ema_anneal_end_step, + ) + self.ema.set_decay(decay) + if self.ema.get_decay() < 1: + self.ema.step(self.encoder if self.cfg.ema_transformer_only else self) + + self.num_updates = num_updates + + def state_dict(self, destination=None, prefix="", keep_vars=False): + state = super().state_dict(destination, prefix, keep_vars) + + if self.ema is not None: + state[prefix + "_ema"] = self.ema.fp32_params + + return state + + def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs): + if self.ema is not None: + k = prefix + "_ema" + assert k in state_dict + self.ema.restore(state_dict[k], True) + del state_dict[k] + return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs) + + @classmethod + def build_model(cls, cfg: Data2VecVisionConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def make_mask(self, bsz, num_masks, min_masks, max_masks): + height, width = self.window_size + + masks = np.zeros(shape=(bsz, height, width), dtype=np.int) + + for i in range(bsz): + mask = masks[i] + mask_count = 0 + + min_aspect = 0.3 + max_aspect = 1 / min_aspect + log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect)) + + def _mask(mask, max_mask_patches): + delta = 0 + for attempt in range(10): + target_area = random.uniform(min_masks, max_mask_patches) + aspect_ratio = math.exp(random.uniform(*log_aspect_ratio)) + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + if w < width and h < height: + top = random.randint(0, height - h) + left = random.randint(0, width - w) + + num_masked = mask[top : top + h, left : left + w].sum() + # Overlap + if 0 < h * w - num_masked <= max_mask_patches: + for i in range(top, top + h): + for j in range(left, left + w): + if mask[i, j] == 0: + mask[i, j] = 1 + delta += 1 + + if delta > 0: + break + return delta + + while mask_count < num_masks: + max_mask_patches = min(num_masks - mask_count, max_masks) + + delta = _mask(mask, max_mask_patches) + if delta == 0: + break + else: + mask_count += delta + + return torch.from_numpy(masks) + + def forward( + self, + img, + mask: bool = True, + layer_results: bool = False, + ): + x = self.patch_embed(img) + batch_size, seq_len, _ = x.size() + + if mask: + mask_indices = self.make_mask( + img.size(0), + self.cfg.num_mask_patches, + self.cfg.min_mask_patches_per_block, + self.cfg.max_mask_patches_per_block, + ) + bool_mask = mask_indices.view(mask_indices.size(0), -1).bool() + else: + mask_indices = bool_mask = None + + cls_tokens = self.cls_emb.expand(batch_size, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + + if self.ema is not None: + with torch.no_grad(): + self.ema.model.eval() + + if self.cfg.ema_transformer_only: + y = self.ema.model( + x, + layer_results="end" if self.cfg.end_of_block_targets else "fc", + ) + else: + y = self.ema.model( + img, + mask=False, + layer_results=True, + ) + + y = y[-self.cfg.average_top_k_layers :] + + permuted = False + if self.cfg.instance_norm_target_layer or self.cfg.batch_norm_target_layer: + y = [tl.transpose(1, 2) for tl in y] # BTC -> BCT + permuted = True + + if self.cfg.batch_norm_target_layer: + y = [ + F.batch_norm( + tl.float(), running_mean=None, running_var=None, training=True + ) + for tl in y + ] + + if self.cfg.instance_norm_target_layer: + y = [F.instance_norm(tl.float()) for tl in y] + + if permuted: + y = [tl.transpose(1, 2) for tl in y] # BCT -> BTC + + if self.cfg.layer_norm_target_layer: + y = [F.layer_norm(tl.float(), tl.shape[-1:]) for tl in y] + + y = sum(y) / len(y) + + if self.cfg.layer_norm_targets: + y = F.layer_norm(y.float(), y.shape[-1:]) + + if self.cfg.instance_norm_targets: + y = F.instance_norm(y.float().transpose(1, 2)).transpose(1, 2) + + y = y[bool_mask].float() + + if mask_indices is not None: + mask_token = self.mask_emb.expand(batch_size, seq_len, -1) + w = mask_indices.view(mask_indices.size(0), -1, 1).type_as(mask_token) + x[:, 1:] = x[:, 1:] * (1 - w) + mask_token * w + + if layer_results: + enc_layer_results = "end" if self.cfg.end_of_block_targets else "fc" + else: + enc_layer_results = None + + x = self.encoder(x, layer_results=enc_layer_results) + if layer_results or mask_indices is None: + return x + + x = x[bool_mask].float() + + if self.loss_beta == 0: + loss = F.mse_loss(x, y, reduction="none").sum(dim=-1) + else: + loss = F.smooth_l1_loss(x, y, reduction="none", beta=self.loss_beta).sum( + dim=-1 + ) + + if self.loss_scale > 0: + loss = loss * self.loss_scale + + result = { + "losses": {"regression": loss.sum()}, + "sample_size": loss.numel(), + "target_var": self.compute_var(y), + "pred_var": self.compute_var(x), + "ema_decay": self.ema.get_decay() * 1000, + } + return result + + @staticmethod + def compute_var(y): + y = y.view(-1, y.size(-1)) + if dist.is_initialized(): + zc = torch.tensor(y.size(0)).cuda() + zs = y.sum(dim=0) + zss = (y ** 2).sum(dim=0) + + dist.all_reduce(zc) + dist.all_reduce(zs) + dist.all_reduce(zss) + + var = zss / (zc - 1) - (zs ** 2) / (zc * (zc - 1)) + return torch.sqrt(var + 1e-6).mean() + else: + return torch.sqrt(y.var(dim=0) + 1e-6).mean() + + def remove_pretraining_modules(self, last_layer=None): + self.final_proj = None + self.ema = None + self.encoder.norm = nn.Identity() + self.mask_emb = None + if last_layer is not None: + self.encoder.layers = nn.ModuleList( + l for i, l in enumerate(self.encoder.layers) if i <= last_layer + ) + + +class PatchEmbed(nn.Module): + """Image to Patch Embedding""" + + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + if isinstance(img_size, int): + img_size = img_size, img_size + if isinstance(patch_size, int): + patch_size = patch_size, patch_size + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + + self.conv = nn.Conv2d( + in_chans, embed_dim, kernel_size=patch_size, stride=patch_size + ) + + def forward(self, x): + # BCHW -> BTC + x = self.conv(x).flatten(2).transpose(1, 2) + return x + + +class Attention(nn.Module): + def __init__( + self, + dim, + num_heads=8, + qkv_bias=True, + attn_drop=0.0, + proj_drop=0.0, + window_size=None, + attn_head_dim=None, + ): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + if attn_head_dim is not None: + head_dim = attn_head_dim + all_head_dim = head_dim * self.num_heads + self.scale = head_dim ** -0.5 + + self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(all_head_dim)) + self.v_bias = nn.Parameter(torch.zeros(all_head_dim)) + else: + self.q_bias = None + self.v_bias = None + + if window_size: + self.window_size = window_size + self.num_relative_distance = (2 * window_size[0] - 1) * ( + 2 * window_size[1] - 1 + ) + 3 + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + # cls to token & token 2 cls & cls to cls + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(window_size[0]) + coords_w = torch.arange(window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * window_size[1] - 1 + relative_position_index = torch.zeros( + size=(window_size[0] * window_size[1] + 1,) * 2, + dtype=relative_coords.dtype, + ) + relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer("relative_position_index", relative_position_index) + else: + self.window_size = None + self.relative_position_bias_table = None + self.relative_position_index = None + + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(all_head_dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x, rel_pos_bias=None): + B, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat( + ( + self.q_bias, + torch.zeros_like(self.v_bias, requires_grad=False), + self.v_bias, + ) + ) + # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + if self.relative_position_bias_table is not None: + assert 1==2 + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1] + 1, + self.window_size[0] * self.window_size[1] + 1, + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + print("attn.size() :", attn.size()) + print("rel_pos_bias.size() :", rel_pos_bias.size()) + if rel_pos_bias is not None: + attn = attn + rel_pos_bias + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class RelativePositionBias(nn.Module): + def __init__(self, window_size, num_heads): + super().__init__() + self.window_size = window_size + self.num_relative_distance = (2 * window_size[0] - 1) * ( + 2 * window_size[1] - 1 + ) + 3 + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, num_heads) + ) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(window_size[0]) + coords_w = torch.arange(window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * window_size[1] - 1 + relative_position_index = torch.zeros( + size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype + ) + relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer("relative_position_index", relative_position_index) + + def forward(self): + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1] + 1, + self.window_size[0] * self.window_size[1] + 1, + -1, + ) # Wh*Ww,Wh*Ww,nH + print("self.window_size :", self.window_size) + print("self.num_relative_distance :", self.num_relative_distance) + print("self.relative_position_index :", self.relative_position_index.size(), self.relative_position_index) + print("relative_position_bias.size(), relative_position_bias :",relative_position_bias.size(), relative_position_bias) + print("self.relative_position_bias_table.size(), self.relative_position_bias_table :",self.relative_position_bias_table.size(), self.relative_position_bias_table) + return relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + if self.drop_prob == 0.0 or not self.training: + return x + keep_prob = 1 - self.drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() + output = x.div(keep_prob) * random_tensor + return output + + def extra_repr(self) -> str: + return "p={}".format(self.drop_prob) + + +class Block(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + init_values=None, + window_size=None, + ): + super().__init__() + + self.norm1 = nn.LayerNorm(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + attn_drop=attn_drop, + proj_drop=drop, + window_size=window_size, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = nn.LayerNorm(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + + self.mlp = nn.Sequential( + nn.Linear(dim, mlp_hidden_dim), + nn.GELU(), + nn.Linear(mlp_hidden_dim, dim), + nn.Dropout(drop), + ) + + if init_values > 0: + self.gamma_1 = nn.Parameter( + init_values * torch.ones((dim)), requires_grad=True + ) + self.gamma_2 = nn.Parameter( + init_values * torch.ones((dim)), requires_grad=True + ) + else: + self.gamma_1, self.gamma_2 = None, None + + def forward(self, x, rel_pos_bias=None): + print("inside block :", x.size()) + if self.gamma_1 is None: + x = x + self.drop_path(self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)) + fc_feature = self.drop_path(self.mlp(self.norm2(x))) + x = x + fc_feature + else: + x = x + self.drop_path( + self.gamma_1 * self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias) + ) + fc_feature = self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + x = x + fc_feature + return x, fc_feature + + +class TransformerEncoder(nn.Module): + def __init__(self, cfg: Data2VecVisionConfig, patch_shape): + super().__init__() + + self.rel_pos_bias = None + if cfg.shared_rel_pos_bias: + self.rel_pos_bias = RelativePositionBias( + window_size=patch_shape, num_heads=cfg.num_heads + ) + + dpr = [ + x.item() for x in torch.linspace(0, cfg.drop_path, cfg.depth) + ] # stochastic depth decay rule + + print("TransformerEncoder > patch_shape :", patch_shape) + self.blocks = nn.ModuleList( + Block( + dim=cfg.embed_dim, + num_heads=cfg.num_heads, + attn_drop=cfg.attention_dropout, + drop_path=dpr[i], + init_values=cfg.layer_scale_init_value, + window_size=patch_shape if not cfg.shared_rel_pos_bias else None, + ) + for i in range(cfg.depth) + ) + + self.norm = nn.LayerNorm(cfg.embed_dim) + + self.apply(self.init_weights) + self.fix_init_weight() + + def init_weights(self, m): + std = 0.02 + if isinstance(m, nn.Linear): + nn.init.trunc_normal_(m.weight, std=std) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + nn.init.trunc_normal_(m.weight, std=std) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def fix_init_weight(self): + def rescale(param, layer_id): + param.div_(math.sqrt(2.0 * layer_id)) + + for layer_id, layer in enumerate(self.blocks): + rescale(layer.attn.proj.weight.data, layer_id + 1) + rescale(layer.mlp[2].weight.data, layer_id + 1) + + def extract_features(self, x, layer_results): + + rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None + + z = [] + for i, blk in enumerate(self.blocks): + x, fc_feature = blk(x, rel_pos_bias=rel_pos_bias) + if layer_results == "end": + z.append(x) + elif layer_results == "fc": + z.append(fc_feature) + + return z if layer_results else self.norm(x) + + def forward(self, x, layer_results=None): + x = self.extract_features(x, layer_results=layer_results) + if layer_results: + return [z[:, 1:] for z in x] + + x = x[:, 1:] + return x diff --git a/fairseq/examples/data2vec/models/mae.py b/fairseq/examples/data2vec/models/mae.py new file mode 100644 index 0000000..a3b5f72 --- /dev/null +++ b/fairseq/examples/data2vec/models/mae.py @@ -0,0 +1,829 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# The code in this file is adapted from the BeiT implementation which can be found here: +# https://github.com/microsoft/unilm/tree/master/beit + +import logging +from dataclasses import dataclass +from functools import partial + +from timm.models.vision_transformer import PatchEmbed, Block + +import torch +import torch.nn as nn + +import numpy as np + +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2 import TransformerSentenceEncoderLayer + +try: + from apex.normalization import FusedLayerNorm +except: + FusedLayerNorm = nn.LayerNorm + +import torch.nn.functional as F + + +logger = logging.getLogger(__name__) + + +@dataclass +class MaeConfig(FairseqDataclass): + input_size: int = 224 + in_chans: int = 3 + patch_size: int = 16 + embed_dim: int = 768 + depth: int = 12 + num_heads: int = 12 + decoder_embed_dim: int = 512 + decoder_depth: int = 8 + decoder_num_heads: int = 16 + mlp_ratio: int = 4 + norm_eps: float = 1e-6 + + drop_path_rate: float = 0.0 + + mask_ratio: float = 0.75 + norm_pix_loss: bool = True + + w2v_block: bool = False + alt_block: bool = False + alt_block2: bool = False + alt_attention: bool = False + block_dropout: float = 0 + attention_dropout: float = 0 + activation_dropout: float = 0 + layer_norm_first: bool = False + + fused_ln: bool = True + end_of_block_targets: bool = True + + no_decoder_embed: bool = False + no_decoder_pos_embed: bool = False + mask_noise_std: float = 0 + + single_qkv: bool = False + use_rel_pos_bias: bool = False + no_cls: bool = False + + +def modify_relative_position_bias(orig_bias, bsz, mask): + if mask is None: + return orig_bias.unsqueeze(0).repeat( + bsz, 1, 1, 1 + ) # heads x seq_len x seq_len => bsz x heads x seq_len x seq_len + heads, max_seq_len, max_seq_len = orig_bias.shape # includes CLS token + mask_for_rel_pos_bias = torch.cat( + (torch.zeros(bsz, 1, dtype=mask.dtype, device=mask.device), mask), dim=1 + ).bool() # bsz x seqlen (add CLS token) + unmasked_for_rel_pos_bias = ~mask_for_rel_pos_bias + unmasked_for_rel_pos_bias = unmasked_for_rel_pos_bias.unsqueeze(1).repeat( + 1, heads, 1 + ) # bsz x seq_len => bsz x heads x seq_len + b_t_t_rel_pos_bias = orig_bias.unsqueeze(0).repeat( + bsz, 1, 1, 1 + ) # heads x seq_len x seq_len => bsz x heads x seq_len x seq_len + b_t_t_rel_pos_bias = b_t_t_rel_pos_bias.masked_select( + unmasked_for_rel_pos_bias.unsqueeze(-1) + ) + b_t_t_rel_pos_bias = b_t_t_rel_pos_bias.view(bsz, heads, -1, max_seq_len) + new_len = b_t_t_rel_pos_bias.size(-2) + b_t_t_rel_pos_bias = b_t_t_rel_pos_bias.masked_select( + unmasked_for_rel_pos_bias.unsqueeze(-2) + ) + b_t_t_rel_pos_bias = b_t_t_rel_pos_bias.view(bsz, heads, new_len, new_len) + return b_t_t_rel_pos_bias + + +class AltBlock(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + layer_norm_first=True, + ffn_targets=False, + use_rel_pos_bias=False, + window_size=None, + alt_attention=False, + ): + super().__init__() + + self.layer_norm_first = layer_norm_first + self.ffn_targets = ffn_targets + + from timm.models.vision_transformer import Attention, DropPath, Mlp + + self.norm1 = norm_layer(dim) + self.use_rel_pos_bias = use_rel_pos_bias + if use_rel_pos_bias: + self.attn = AltAttention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + window_size=window_size, + ) + else: + if alt_attention: + from .multi.modules import AltAttention as AltAttention2 + self.attn = AltAttention2( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + else: + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + def forward(self, x, rel_pos_bias=None, pos_mask=None): + if self.layer_norm_first: + if self.use_rel_pos_bias: + x = x + self.drop_path( + self.attn( + self.norm1(x), rel_pos_bias=rel_pos_bias, pos_mask=pos_mask + ) + ) + else: + x = x + self.drop_path(self.attn(self.norm1(x))) + t = self.mlp(self.norm2(x)) + x = x + self.drop_path(t) + if not self.ffn_targets: + t = x + return x, t + else: + if self.use_rel_pos_bias: + x = x + self.drop_path( + self.attn(x, rel_pos_bias=rel_pos_bias, pos_mask=pos_mask) + ) + else: + x = x + self.drop_path(self.attn(x)) + r = x = self.norm1(x) + x = self.mlp(x) + t = x + x = self.norm2(r + self.drop_path(x)) + if not self.ffn_targets: + t = x + return x, t + + +class AltAttention(nn.Module): + def __init__( + self, + dim, + num_heads=8, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + window_size=None, + attn_head_dim=None, + ): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + if attn_head_dim is not None: + head_dim = attn_head_dim + all_head_dim = head_dim * self.num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False) + if qkv_bias: + self.q_bias = nn.Parameter(torch.zeros(all_head_dim)) + self.v_bias = nn.Parameter(torch.zeros(all_head_dim)) + else: + self.q_bias = None + self.v_bias = None + + if window_size: + self.window_size = window_size + self.num_relative_distance = (2 * window_size[0] - 1) * ( + 2 * window_size[1] - 1 + ) + 3 + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + # cls to token & token 2 cls & cls to cls + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(window_size[0]) + coords_w = torch.arange(window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * window_size[1] - 1 + relative_position_index = torch.zeros( + size=(window_size[0] * window_size[1] + 1,) * 2, + dtype=relative_coords.dtype, + ) + relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer("relative_position_index", relative_position_index) + else: + self.window_size = None + self.relative_position_bias_table = None + self.relative_position_index = None + + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(all_head_dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x, rel_pos_bias=None, pos_mask=None): + B, N, C = x.shape + qkv_bias = None + if self.q_bias is not None: + qkv_bias = torch.cat( + ( + self.q_bias, + torch.zeros_like(self.v_bias, requires_grad=False), + self.v_bias, + ) + ) + # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + if self.relative_position_bias_table is not None: + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1] + 1, + self.window_size[0] * self.window_size[1] + 1, + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + modify_relative_position_bias( + relative_position_bias, x.size(0), pos_mask + ) + + if rel_pos_bias is not None: + attn = attn + rel_pos_bias + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class RelativePositionBias(nn.Module): + def __init__(self, window_size, num_heads): + super().__init__() + self.window_size = window_size + self.num_relative_distance = (2 * window_size[0] - 1) * ( + 2 * window_size[1] - 1 + ) + 3 + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, num_heads) + ) + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(window_size[0]) + coords_w = torch.arange(window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * window_size[1] - 1 + relative_position_index = torch.zeros( + size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype + ) + relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer("relative_position_index", relative_position_index) + + def forward(self): + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1] + 1, + self.window_size[0] * self.window_size[1] + 1, + -1, + ) # Wh*Ww,Wh*Ww,nH + return relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + + +def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False): + """ + grid_size: int of the grid height and width + return: + pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + """ + grid_h = np.arange(grid_size, dtype=np.float32) + grid_w = np.arange(grid_size, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + + grid = grid.reshape([2, 1, grid_size, grid_size]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + if cls_token: + pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): + assert embed_dim % 2 == 0 + + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) + + emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000 ** omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product + + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + + emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) + return emb + + +def interpolate_pos_embed(model, checkpoint_model): + if "pos_embed" in checkpoint_model: + pos_embed_checkpoint = checkpoint_model["pos_embed"] + embedding_size = pos_embed_checkpoint.shape[-1] + num_patches = model.patch_embed.num_patches + num_extra_tokens = model.pos_embed.shape[-2] - num_patches + # height (== width) for the checkpoint position embedding + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + # height (== width) for the new position embedding + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + print( + "Position interpolate from %dx%d to %dx%d" + % (orig_size, orig_size, new_size, new_size) + ) + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape( + -1, orig_size, orig_size, embedding_size + ).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, + size=(new_size, new_size), + mode="bicubic", + align_corners=False, + ) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + checkpoint_model["pos_embed"] = new_pos_embed + + +@register_model("mae", dataclass=MaeConfig) +class MaeModel(BaseFairseqModel): + def __init__(self, cfg: MaeConfig): + super().__init__() + self.cfg = cfg + + self.mask_ratio = cfg.mask_ratio + + # -------------------------------------------------------------------------- + # MAE encoder specifics + self.patch_embed = PatchEmbed( + cfg.input_size, cfg.patch_size, cfg.in_chans, cfg.embed_dim + ) + num_patches = self.patch_embed.num_patches + + self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim)) if not cfg.no_cls else None + self.pos_embed = nn.Parameter( + torch.zeros(1, num_patches + int(not cfg.no_cls), cfg.embed_dim), requires_grad=False + ) # fixed sin-cos embedding + + norm_layer = partial(nn.LayerNorm, eps=cfg.norm_eps) + + dpr = [ + x.item() for x in torch.linspace(0, cfg.drop_path_rate, cfg.depth) + ] # stochastic depth decay rule + + def make_block(drop_path): + if cfg.w2v_block: + return TransformerSentenceEncoderLayer( + embedding_dim=cfg.embed_dim, + ffn_embedding_dim=cfg.embed_dim * cfg.mlp_ratio, + num_attention_heads=cfg.num_heads, + dropout=cfg.block_dropout, + attention_dropout=cfg.attention_dropout, + activation_dropout=cfg.activation_dropout, + activation_fn="gelu", + layer_norm_first=cfg.layer_norm_first, + drop_path=drop_path, + norm_eps=1e-6, + single_qkv=cfg.single_qkv, + fused_ln=cfg.fused_ln, + ) + elif cfg.alt_block: + window_size = ( + cfg.input_size // self.patch_embed.patch_size[0], + cfg.input_size // self.patch_embed.patch_size[1], + ) + return AltBlock( + cfg.embed_dim, + cfg.num_heads, + cfg.mlp_ratio, + qkv_bias=True, + qk_scale=None, + norm_layer=norm_layer, + drop_path=drop_path, + layer_norm_first=cfg.layer_norm_first, + ffn_targets=not cfg.end_of_block_targets, + use_rel_pos_bias=cfg.use_rel_pos_bias, + window_size=window_size + if (self.cfg.use_rel_pos_bias and not self.cfg.shared_rel_pos_bias) + else None, + alt_attention=cfg.alt_attention, + ) + elif cfg.alt_block2: + from .multi.modules import AltBlock as AltBlock2 + return AltBlock2( + cfg.embed_dim, + cfg.num_heads, + cfg.mlp_ratio, + qkv_bias=True, + qk_scale=None, + norm_layer=norm_layer, + drop_path=drop_path, + layer_norm_first=cfg.layer_norm_first, + ffn_targets=not cfg.end_of_block_targets, + ) + else: + return Block( + cfg.embed_dim, + cfg.num_heads, + cfg.mlp_ratio, + qkv_bias=True, + qk_scale=None, + norm_layer=norm_layer, + drop_path=drop_path, + ) + + self.blocks = nn.ModuleList([make_block(dpr[i]) for i in range(cfg.depth)]) + self.norm = norm_layer(cfg.embed_dim) + # -------------------------------------------------------------------------- + + # -------------------------------------------------------------------------- + # MAE decoder specifics + self.decoder_embed = ( + nn.Linear(cfg.embed_dim, cfg.decoder_embed_dim, bias=True) + if not cfg.no_decoder_embed + else None + ) + + self.mask_token = ( + nn.Parameter( + torch.zeros( + 1, + 1, + cfg.decoder_embed_dim + if not cfg.no_decoder_embed + else cfg.embed_dim, + ) + ) + if cfg.mask_noise_std <= 0 + else None + ) + + self.decoder_pos_embed = ( + nn.Parameter( + torch.zeros( + 1, + num_patches + 1, + cfg.decoder_embed_dim + if not cfg.no_decoder_embed + else cfg.embed_dim, + ), + requires_grad=False, + ) + if not cfg.no_decoder_pos_embed + else None + ) + + self.decoder_blocks = nn.ModuleList( + [ + Block( + cfg.decoder_embed_dim, + cfg.decoder_num_heads, + cfg.mlp_ratio, + qkv_bias=True, + qk_scale=None, + norm_layer=norm_layer, + ) + for _ in range(cfg.decoder_depth) + ] + ) + + self.decoder_norm = norm_layer(cfg.decoder_embed_dim) + self.decoder_pred = nn.Linear( + cfg.decoder_embed_dim, cfg.patch_size ** 2 * cfg.in_chans, bias=True + ) # decoder to patch + # -------------------------------------------------------------------------- + + self.norm_pix_loss = cfg.norm_pix_loss + + self.initialize_weights() + + for pn, p in self.named_parameters(): + if len(p.shape) == 1 or pn.endswith(".bias"): + p.param_group = "no_decay" + else: + p.param_group = "with_decay" + + def initialize_weights(self): + # initialization + # initialize (and freeze) pos_embed by sin-cos embedding + pos_embed = get_2d_sincos_pos_embed( + self.pos_embed.shape[-1], + int(self.patch_embed.num_patches ** 0.5), + cls_token=not self.cfg.no_cls, + ) + self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0)) + + if self.decoder_pos_embed is not None: + decoder_pos_embed = get_2d_sincos_pos_embed( + self.decoder_pos_embed.shape[-1], + int(self.patch_embed.num_patches ** 0.5), + cls_token=not self.cfg.no_cls, + ) + self.decoder_pos_embed.data.copy_( + torch.from_numpy(decoder_pos_embed).float().unsqueeze(0) + ) + + # initialize patch_embed like nn.Linear (instead of nn.Conv2d) + w = self.patch_embed.proj.weight.data + torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1])) + + # timm's trunc_normal_(std=.02) is effectively normal_(std=0.02) as cutoff is too big (2.) + if self.cls_token is not None: + torch.nn.init.normal_(self.cls_token, std=0.02) + + if self.mask_token is not None: + torch.nn.init.normal_(self.mask_token, std=0.02) + + # initialize nn.Linear and nn.LayerNorm + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + # we use xavier_uniform following official JAX ViT: + torch.nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm) or isinstance(m, FusedLayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def patchify(self, imgs): + """ + imgs: (N, 3, H, W) + x: (N, L, patch_size**2 *3) + """ + p = self.patch_embed.patch_size[0] + assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0 + + h = w = imgs.shape[2] // p + x = imgs.reshape(shape=(imgs.shape[0], 3, h, p, w, p)) + x = torch.einsum("nchpwq->nhwpqc", x) + x = x.reshape(shape=(imgs.shape[0], h * w, p ** 2 * 3)) + return x + + def unpatchify(self, x): + """ + x: (N, L, patch_size**2 *3) + imgs: (N, 3, H, W) + """ + p = self.patch_embed.patch_size[0] + h = w = int(x.shape[1] ** 0.5) + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, 3)) + x = torch.einsum("nhwpqc->nchpwq", x) + imgs = x.reshape(shape=(x.shape[0], 3, h * p, h * p)) + return imgs + + def random_masking(self, x, mask_ratio): + """ + Perform per-sample random masking by per-sample shuffling. + Per-sample shuffling is done by argsort random noise. + x: [N, L, D], sequence + """ + N, L, D = x.shape # batch, length, dim + len_keep = int(L * (1 - mask_ratio)) + + noise = torch.rand(N, L, device=x.device) # noise in [0, 1] + + # sort noise for each sample + ids_shuffle = torch.argsort( + noise, dim=1 + ) # ascend: small is keep, large is remove + ids_restore = torch.argsort(ids_shuffle, dim=1) + + # keep the first subset + ids_keep = ids_shuffle[:, :len_keep] + x_masked = torch.gather(x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D)) + + # generate the binary mask: 0 is keep, 1 is remove + mask = torch.ones([N, L], device=x.device) + mask[:, :len_keep] = 0 + # unshuffle to get the binary mask + mask = torch.gather(mask, dim=1, index=ids_restore) + + return x_masked, mask, ids_restore # x_masked is actually unmasked x + + @classmethod + def build_model(cls, cfg: MaeConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def forward_encoder(self, x, mask_ratio): + # embed patches + x = self.patch_embed(x) + + # add pos embed w/o cls token + # if self.cls_token is not None: + # x = x + self.pos_embed + # else: + x = x + self.pos_embed[:, 1:, :] + + # masking: length -> length * mask_ratio + if mask_ratio > 0: + x, mask, ids_restore = self.random_masking(x, mask_ratio) + else: + mask = ids_restore = None + + # append cls token + if self.cls_token is not None: + cls_token = self.cls_token + self.pos_embed[:, :1, :] + cls_tokens = cls_token.expand(x.shape[0], -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + + # apply Transformer blocks + for blk in self.blocks: + x = blk(x) + + if self.norm is not None: + x = self.norm(x) + + return x, mask, ids_restore + + def forward_decoder(self, x, ids_restore): + # embed tokens + x = self.decoder_embed(x) + + # append mask tokens to sequence + mask_tokens = self.mask_token.repeat( + x.shape[0], ids_restore.shape[1] + 1 - x.shape[1], 1 + ) + if self.cls_token is not None: + x_ = torch.cat([x[:, 1:, :], mask_tokens], dim=1) # no cls token + else: + x_ = torch.cat([x, mask_tokens], dim=1) # no cls token + + x_ = torch.gather( + x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2]) + ) # unshuffle + + if self.cls_token is not None: + x = torch.cat([x[:, :1, :], x_], dim=1) # append cls token + + # add pos embed + x = x + self.decoder_pos_embed + + # apply Transformer blocks + for blk in self.decoder_blocks: + x = blk(x) + x = self.decoder_norm(x) + + # predictor projection + x = self.decoder_pred(x) + + if self.cls_token is not None: + # remove cls token + x = x[:, 1:, :] + + return x + + def forward_loss(self, imgs, pred, mask): + """ + imgs: [N, 3, H, W] + pred: [N, L, p*p*3] + mask: [N, L], 0 is keep, 1 is remove, + """ + target = self.patchify(imgs) + if self.norm_pix_loss: + mean = target.mean(dim=-1, keepdim=True) + var = target.var(dim=-1, keepdim=True) + target = (target - mean) / (var + 1.0e-6) ** 0.5 + + loss = (pred - target) ** 2 + loss = loss.mean(dim=-1) # [N, L], mean loss per patch + + loss = (loss * mask).sum() + return loss, mask.sum() + + def forward(self, imgs, predictions_only=False): + latent, mask, ids_restore = self.forward_encoder( + imgs, self.mask_ratio if not predictions_only else 0 + ) + + if predictions_only: + return latent + + pred = self.forward_decoder(latent, ids_restore) # [N, L, p*p*3] + loss, sample_size = self.forward_loss(imgs, pred, mask) + + result = { + "losses": {"regression": loss}, + "sample_size": sample_size, + } + return result + + def remove_pretraining_modules(self): + self.decoder_embed = None + self.decoder_blocks = None + self.decoder_norm = None + self.decoder_pos_embed = None + self.decoder_pred = None + self.mask_token = None + if self.cfg.layer_norm_first: + self.norm = None diff --git a/fairseq/examples/data2vec/models/mae_image_classification.py b/fairseq/examples/data2vec/models/mae_image_classification.py new file mode 100644 index 0000000..e304618 --- /dev/null +++ b/fairseq/examples/data2vec/models/mae_image_classification.py @@ -0,0 +1,386 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# The code in this file is adapted from the BeiT implementation which can be found here: +# https://github.com/microsoft/unilm/tree/master/beit + +import logging + +from dataclasses import dataclass +from enum import Enum, auto +from typing import Any, Optional + +import numpy as np +from omegaconf import II, MISSING + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import checkpoint_utils, tasks +from omegaconf import open_dict + +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from .mae import interpolate_pos_embed + + +logger = logging.getLogger(__name__) + + +class PredictionMode(Enum): + MEAN_POOLING = auto() + CLS_TOKEN = auto() + LIN_SOFTMAX = auto() + + +@dataclass +class MaeImageClassificationConfig(FairseqDataclass): + model_path: str = MISSING + no_pretrained_weights: bool = False + linear_classifier: bool = False + num_classes: int = 1000 + mixup: float = 0.8 + cutmix: float = 1.0 + label_smoothing: float = 0.1 + + drop_path_rate: float = 0.1 + layer_decay: float = 0.65 + + mixup_prob: float = 1.0 + mixup_switch_prob: float = 0.5 + mixup_mode: str = "batch" + + pretrained_model_args: Any = None + data: str = II("task.data") + + norm_eps: Optional[float] = None + + remove_alibi: bool = False + + # regularization overwrites + encoder_dropout: float = 0 + post_mlp_drop: float = 0 + attention_dropout: float = 0 + activation_dropout: float = 0.0 + dropout_input: float = 0.0 + layerdrop: float = 0.0 + + prenet_layerdrop: float = 0 + prenet_dropout: float = 0 + + use_fc_norm: bool = True + prediction_mode: PredictionMode = PredictionMode.MEAN_POOLING + + no_decay_blocks: bool = True + + +def get_layer_id_for_vit(name, num_layers): + """ + Assign a parameter with its layer id + Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 + """ + if name in ["cls_token", "pos_embed"]: + return 0 + elif name.startswith("patch_embed"): + return 0 + elif name.startswith("rel_pos_bias"): + return num_layers - 1 + elif name.startswith("blocks"): + return int(name.split(".")[1]) + 1 + else: + return num_layers + + +@register_model("mae_image_classification", dataclass=MaeImageClassificationConfig) +class MaeImageClassificationModel(BaseFairseqModel): + def __init__(self, cfg: MaeImageClassificationConfig): + super().__init__() + self.cfg = cfg + + if cfg.pretrained_model_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.model_path, {}) + pretrained_args = state.get("cfg", None) + + pretrained_args.criterion = None + pretrained_args.lr_scheduler = None + + logger.info(pretrained_args.model) + + with open_dict(pretrained_args.model): + pretrained_args.model.drop_path_rate = cfg.drop_path_rate + if cfg.norm_eps is not None: + pretrained_args.model.norm_eps = cfg.norm_eps + + cfg.pretrained_model_args = pretrained_args + + logger.info(pretrained_args) + else: + state = None + pretrained_args = cfg.pretrained_model_args + + if "data" in pretrained_args.task: + pretrained_args.task.data = cfg.data + elif "image" in pretrained_args.task: + pretrained_args.task.image.data = cfg.data + + if "modalities" in pretrained_args.model: + prenet_blocks = pretrained_args.model["modalities"]["image"]["prenet_depth"] + model_blocks = pretrained_args.model["depth"] + with open_dict(pretrained_args): + dpr = np.linspace(0, cfg.drop_path_rate, model_blocks).tolist() + pretrained_args.model["modalities"]["image"][ + "start_drop_path_rate" + ] = dpr[0] + pretrained_args.model["modalities"]["image"][ + "end_drop_path_rate" + ] = max(0, dpr[prenet_blocks - 1]) + pretrained_args.model["start_drop_path_rate"] = dpr[prenet_blocks] + pretrained_args.model["end_drop_path_rate"] = dpr[-1] + + if "mae_masking" in pretrained_args.model["modalities"]["image"]: + del pretrained_args.model["modalities"]["image"]["mae_masking"] + + if cfg.remove_alibi: + pretrained_args.model["modalities"]["image"][ + "use_alibi_encoder" + ] = False + if ( + state is not None + and "modality_encoders.IMAGE.alibi_bias" in state["model"] + ): + del state["model"]["modality_encoders.IMAGE.alibi_bias"] + + pretrained_args.model["encoder_dropout"] = cfg.encoder_dropout + pretrained_args.model["post_mlp_drop"] = cfg.post_mlp_drop + pretrained_args.model["attention_dropout"] = cfg.attention_dropout + pretrained_args.model["activation_dropout"] = cfg.activation_dropout + pretrained_args.model["dropout_input"] = cfg.dropout_input + pretrained_args.model["layerdrop"] = cfg.layerdrop + + pretrained_args.model["modalities"]["image"][ + "prenet_layerdrop" + ] = cfg.prenet_layerdrop + pretrained_args.model["modalities"]["image"][ + "prenet_dropout" + ] = cfg.prenet_dropout + else: + # not d2v multi + with open_dict(pretrained_args): + pretrained_args.model["drop_path_rate"] = cfg.drop_path_rate + pretrained_args.model["block_dropout"] = cfg.encoder_dropout + pretrained_args.model["attention_dropout"] = cfg.attention_dropout + pretrained_args.model["activation_dropout"] = cfg.activation_dropout + + task = tasks.setup_task(pretrained_args.task) + model = task.build_model(pretrained_args.model, from_checkpoint=True) + + self.d2v_multi = "data2vec_multi" in pretrained_args.model._name + self.linear_classifier = cfg.linear_classifier + + self.model = model + + if state is not None and not cfg.no_pretrained_weights: + interpolate_pos_embed(model, state) + + if "modality_encoders.IMAGE.positional_encoder.pos_embed" in state["model"]: + state["model"][ + "modality_encoders.IMAGE.positional_encoder.positions" + ] = state["model"][ + "modality_encoders.IMAGE.positional_encoder.pos_embed" + ] + del state["model"][ + "modality_encoders.IMAGE.positional_encoder.pos_embed" + ] + if "modality_encoders.IMAGE.encoder_mask" in state["model"]: + del state["model"]["modality_encoders.IMAGE.encoder_mask"] + + model.load_state_dict(state["model"], strict=True) + + if self.d2v_multi: + model.remove_pretraining_modules(modality="image") + else: + model.remove_pretraining_modules() + + if self.linear_classifier: + model.requires_grad_(False) + + self.fc_norm = None + if self.cfg.use_fc_norm: + self.fc_norm = nn.LayerNorm(pretrained_args.model.embed_dim, eps=1e-6) + nn.init.constant_(self.fc_norm.bias, 0) + nn.init.constant_(self.fc_norm.weight, 1.0) + + self.head = nn.Linear(pretrained_args.model.embed_dim, cfg.num_classes) + + nn.init.trunc_normal_(self.head.weight, std=0.02) + nn.init.constant_(self.head.bias, 0) + + self.mixup_fn = None + + if cfg.mixup > 0 or cfg.cutmix > 0: + from timm.data import Mixup + + self.mixup_fn = Mixup( + mixup_alpha=cfg.mixup, + cutmix_alpha=cfg.cutmix, + cutmix_minmax=None, + prob=cfg.mixup_prob, + switch_prob=cfg.mixup_switch_prob, + mode=cfg.mixup_mode, + label_smoothing=cfg.label_smoothing, + num_classes=cfg.num_classes, + ) + + if self.model.norm is not None: + for pn, p in self.model.norm.named_parameters(): + if len(p.shape) == 1 or pn.endswith(".bias"): + p.optim_overrides = {"optimizer": {"weight_decay_scale": 0}} + + if self.fc_norm is not None: + for pn, p in self.fc_norm.named_parameters(): + if len(p.shape) == 1 or pn.endswith(".bias"): + p.optim_overrides = {"optimizer": {"weight_decay_scale": 0}} + + for pn, p in self.head.named_parameters(): + if len(p.shape) == 1 or pn.endswith(".bias"): + p.optim_overrides = {"optimizer": {"weight_decay_scale": 0}} + + if self.d2v_multi: + mod_encs = list(model.modality_encoders.values()) + assert len(mod_encs) == 1, len(mod_encs) + blocks = list(mod_encs[0].context_encoder.blocks) + list(model.blocks) + else: + blocks = model.blocks + + num_layers = len(blocks) + 1 + layer_scales = list( + cfg.layer_decay ** (num_layers - i) for i in range(num_layers + 1) + ) + + if self.d2v_multi: + for n, p in self.model.named_parameters(): + optimizer_override_dict = {} + + if len(p.shape) == 1 or n.endswith(".bias"): + optimizer_override_dict["weight_decay_scale"] = 0 + + p.optim_overrides = {"optimizer": optimizer_override_dict} + + if cfg.layer_decay > 0: + for i, b in enumerate(blocks): + lid = i + 1 + if layer_scales[lid] == 1.0: + continue + + for n, p in b.named_parameters(): + optim_override = getattr(p, "optim_overrides", {}) + if "optimizer" not in optim_override: + optim_override["optimizer"] = {} + + if cfg.no_decay_blocks: + optim_override["optimizer"]["lr_scale"] = layer_scales[lid] + p.optim_overrides = optim_override + else: + optim_override["optimizer"] = { + "lr_scale": layer_scales[lid] + } + p.optim_overrides = optim_override + + else: + for n, p in self.model.named_parameters(): + optimizer_override_dict = {} + layer_id = get_layer_id_for_vit(n, num_layers) + + if len(p.shape) == 1 or n.endswith(".bias"): + optimizer_override_dict["weight_decay_scale"] = 0 + + if cfg.layer_decay > 0: + optimizer_override_dict["lr_scale"] = layer_scales[layer_id] + p.optim_overrides = {"optimizer": optimizer_override_dict} + + @classmethod + def build_model(cls, cfg: MaeImageClassificationConfig, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def forward( + self, + imgs, + labels=None, + ): + if self.training and self.mixup_fn is not None and labels is not None: + imgs, labels = self.mixup_fn(imgs, labels) + + if self.linear_classifier: + with torch.no_grad(): + x = self.model_forward(imgs) + else: + x = self.model_forward(imgs) + + if self.cfg.prediction_mode == PredictionMode.MEAN_POOLING: + x = x.mean(dim=1) + elif self.cfg.prediction_mode == PredictionMode.CLS_TOKEN: + x = x[:, 0] + elif self.cfg.prediction_mode == PredictionMode.LIN_SOFTMAX: + dtype = x.dtype + x = F.logsigmoid(x.float()) + x = torch.logsumexp(x + x, dim=1) - torch.logsumexp(x + 1e-6, dim=1) + x = x.clamp(max=0) + x = x - torch.log(-(torch.expm1(x))) + x = torch.nan_to_num(x, nan=0, posinf=0, neginf=0) + x = x.to(dtype=dtype) + else: + raise Exception(f"unknown prediction mode {self.cfg.prediction_mode.name}") + + if self.fc_norm is not None: + x = self.fc_norm(x) + + x = self.head(x) + + if labels is None: + return x + + if self.training and self.mixup_fn is not None: + loss = -labels * F.log_softmax(x.float(), dim=-1) + else: + loss = F.cross_entropy( + x.float(), + labels, + label_smoothing=self.cfg.label_smoothing if self.training else 0, + reduction="none", + ) + + result = { + "losses": {"regression": loss}, + "sample_size": imgs.size(0), + } + + if not self.training: + with torch.no_grad(): + pred = x.argmax(-1) + correct = (pred == labels).sum() + result["correct"] = correct + + return result + + def model_forward(self, imgs): + if self.d2v_multi: + x = self.model.extract_features( + imgs, + mode="IMAGE", + mask=False, + remove_extra_tokens=( + self.cfg.prediction_mode != PredictionMode.CLS_TOKEN + ), + )["x"] + else: + x = self.model(imgs, predictions_only=True) + if ( + "no_cls" not in self.model.cfg or not self.model.cfg.no_cls + ) and not self.cfg.prediction_mode == PredictionMode.CLS_TOKEN: + x = x[:, 1:] + return x diff --git a/fairseq/examples/data2vec/models/modalities/__init__.py b/fairseq/examples/data2vec/models/modalities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/data2vec/models/modalities/audio.py b/fairseq/examples/data2vec/models/modalities/audio.py new file mode 100644 index 0000000..80d2857 --- /dev/null +++ b/fairseq/examples/data2vec/models/modalities/audio.py @@ -0,0 +1,192 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from functools import partial +import torch +import torch.nn as nn +import numpy as np +from dataclasses import dataclass, field +from typing import Callable, Dict, Optional +from fairseq.models.wav2vec import ConvFeatureExtractionModel +from fairseq.modules import ( + LayerNorm, + SamePad, + TransposeLast, +) +from fairseq.tasks import FairseqTask +from .base import D2vModalityConfig, ModalitySpecificEncoder, get_alibi_bias +from .modules import BlockEncoder, Decoder1d +from examples.data2vec.data.modality import Modality + + +@dataclass +class D2vAudioConfig(D2vModalityConfig): + type: Modality = Modality.AUDIO + extractor_mode: str = "layer_norm" + feature_encoder_spec: str = field( + default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]", + metadata={ + "help": "string describing convolutional feature extraction layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_pos_width: int = field( + default=95, + metadata={"help": "number of filters for convolutional positional embeddings"}, + ) + conv_pos_groups: int = field( + default=16, + metadata={"help": "number of groups for convolutional positional embedding"}, + ) + conv_pos_depth: int = field( + default=5, + metadata={"help": "depth of positional encoder network"}, + ) + conv_pos_pre_ln: bool = False + + +class AudioEncoder(ModalitySpecificEncoder): + + modality_cfg: D2vAudioConfig + + def __init__( + self, + modality_cfg: D2vAudioConfig, + embed_dim: int, + make_block: Callable[[float], nn.ModuleList], + norm_layer: Callable[[int], nn.LayerNorm], + layer_norm_first: bool, + alibi_biases: Dict, + task: Optional[FairseqTask], + ): + + self.feature_enc_layers = eval(modality_cfg.feature_encoder_spec) + feature_embed_dim = self.feature_enc_layers[-1][0] + + local_encoder = ConvFeatureExtractionModel( + conv_layers=self.feature_enc_layers, + dropout=0.0, + mode=modality_cfg.extractor_mode, + conv_bias=False, + ) + + project_features = nn.Sequential( + TransposeLast(), + nn.LayerNorm(feature_embed_dim), + nn.Linear(feature_embed_dim, embed_dim), + ) + + num_pos_layers = modality_cfg.conv_pos_depth + k = max(3, modality_cfg.conv_pos_width // num_pos_layers) + + positional_encoder = nn.Sequential( + TransposeLast(), + *[ + nn.Sequential( + nn.Conv1d( + embed_dim, + embed_dim, + kernel_size=k, + padding=k // 2, + groups=modality_cfg.conv_pos_groups, + ), + SamePad(k), + TransposeLast(), + LayerNorm(embed_dim, elementwise_affine=False), + TransposeLast(), + nn.GELU(), + ) + for _ in range(num_pos_layers) + ], + TransposeLast(), + ) + + if modality_cfg.conv_pos_pre_ln: + positional_encoder = nn.Sequential(LayerNorm(embed_dim), positional_encoder) + + dpr = np.linspace( + modality_cfg.start_drop_path_rate, + modality_cfg.end_drop_path_rate, + modality_cfg.prenet_depth, + ) + context_encoder = BlockEncoder( + nn.ModuleList(make_block(dpr[i]) for i in range(modality_cfg.prenet_depth)), + norm_layer(embed_dim) if not layer_norm_first else None, + layer_norm_first, + modality_cfg.prenet_layerdrop, + modality_cfg.prenet_dropout, + ) + + decoder = ( + Decoder1d(modality_cfg.decoder, embed_dim) + if modality_cfg.decoder is not None + else None + ) + + alibi_bias_fn = partial(get_alibi_bias, alibi_biases=alibi_biases) + + super().__init__( + modality_cfg=modality_cfg, + embed_dim=embed_dim, + local_encoder=local_encoder, + project_features=project_features, + fixed_positional_encoder=None, + relative_positional_encoder=positional_encoder, + context_encoder=context_encoder, + decoder=decoder, + get_alibi_bias=alibi_bias_fn, + ) + + def convert_padding_mask(self, x, padding_mask): + def get_feat_extract_output_lengths(input_lengths: torch.LongTensor): + """ + Computes the output length of the convolutional layers + """ + + def _conv_out_length(input_length, kernel_size, stride): + return torch.floor((input_length - kernel_size) / stride + 1) + + for i in range(len(self.feature_enc_layers)): + input_lengths = _conv_out_length( + input_lengths, + self.feature_enc_layers[i][1], + self.feature_enc_layers[i][2], + ) + + return input_lengths.to(torch.long) + + if padding_mask is not None: + input_lengths = (1 - padding_mask.long()).sum(-1) + # apply conv formula to get real output_lengths + output_lengths = get_feat_extract_output_lengths(input_lengths) + + if padding_mask.any(): + padding_mask = torch.zeros(x.shape[:2], dtype=x.dtype, device=x.device) + + # these two operations makes sure that all values + # before the output lengths indices are attended to + padding_mask[ + ( + torch.arange(padding_mask.shape[0], device=padding_mask.device), + output_lengths - 1, + ) + ] = 1 + padding_mask = ( + 1 - padding_mask.flip([-1]).cumsum(-1).flip([-1]) + ).bool() + else: + padding_mask = torch.zeros( + x.shape[:2], dtype=torch.bool, device=x.device + ) + + return padding_mask + + def reset_parameters(self): + super().reset_parameters() + for mod in self.project_features.children(): + if isinstance(mod, nn.Linear): + mod.reset_parameters() + if self.decoder is not None: + self.decoder.reset_parameters() diff --git a/fairseq/examples/data2vec/models/modalities/base.py b/fairseq/examples/data2vec/models/modalities/base.py new file mode 100644 index 0000000..642cc84 --- /dev/null +++ b/fairseq/examples/data2vec/models/modalities/base.py @@ -0,0 +1,684 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from collections import namedtuple +from dataclasses import dataclass +from functools import partial +from omegaconf import MISSING, II +from typing import Optional, Callable +from fairseq.data.data_utils import compute_mask_indices +from fairseq.modules import GradMultiply +from fairseq.utils import index_put +from examples.data2vec.data.modality import Modality +from .modules import D2vDecoderConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class D2vModalityConfig: + type: Modality = MISSING + prenet_depth: int = 4 + prenet_layerdrop: float = 0 + prenet_dropout: float = 0 + start_drop_path_rate: float = 0 + end_drop_path_rate: float = 0 + + num_extra_tokens: int = 0 + init_extra_token_zero: bool = True + + mask_noise_std: float = 0.01 + mask_prob_min: Optional[float] = None + mask_prob: float = 0.7 + inverse_mask: bool = False + mask_prob_adjust: float = 0 + keep_masked_pct: float = 0 + + mask_length: int = 5 + add_masks: bool = False + remove_masks: bool = False + mask_dropout: float = 0.0 + encoder_zero_mask: bool = True + + mask_channel_prob: float = 0.0 + mask_channel_length: int = 64 + + ema_local_encoder: bool = False # used in data2vec_multi + local_grad_mult: float = 1.0 + + use_alibi_encoder: bool = False + alibi_scale: float = 1.0 + learned_alibi: bool = False + alibi_max_pos: Optional[int] = None + learned_alibi_scale: bool = False + learned_alibi_scale_per_head: bool = False + learned_alibi_scale_per_layer: bool = False + + num_alibi_heads: int = II("model.num_heads") + model_depth: int = II("model.depth") + + decoder: Optional[D2vDecoderConfig] = D2vDecoderConfig() + + +MaskSeed = namedtuple("MaskSeed", ["seed", "update", "ids"]) +MaskInfo = namedtuple("MaskInfo", ["x_unmasked", "mask", "ids_restore", "ids_keep"]) + + +class ModalitySpecificEncoder(nn.Module): + def __init__( + self, + modality_cfg: D2vModalityConfig, + embed_dim: int, + local_encoder: nn.Module, + project_features: nn.Module, + fixed_positional_encoder: Optional[nn.Module], + relative_positional_encoder: Optional[nn.Module], + context_encoder: nn.Module, + decoder: nn.Module, + get_alibi_bias: Optional[Callable[[int, int, str, str], torch.Tensor]], + ): + super().__init__() + + self.modality_cfg = modality_cfg + self.local_encoder = local_encoder + self.project_features = project_features + self.fixed_positional_encoder = fixed_positional_encoder + self.relative_positional_encoder = relative_positional_encoder + self.context_encoder = context_encoder + + self.decoder = decoder + self.get_alibi_bias = get_alibi_bias if modality_cfg.use_alibi_encoder else None + + self.local_grad_mult = self.modality_cfg.local_grad_mult + + self.extra_tokens = None + if modality_cfg.num_extra_tokens > 0: + self.extra_tokens = nn.Parameter( + torch.zeros(1, modality_cfg.num_extra_tokens, embed_dim) + ) + if not modality_cfg.init_extra_token_zero: + nn.init.normal_(self.extra_tokens) + elif self.extra_tokens.size(1) > 1: + nn.init.normal_(self.extra_tokens[:, 1:]) + + self.alibi_scale = None + if self.get_alibi_bias is not None: + self.alibi_scale = nn.Parameter( + torch.full( + ( + (modality_cfg.prenet_depth + modality_cfg.model_depth) + if modality_cfg.learned_alibi_scale_per_layer + else 1, + 1, + self.modality_cfg.num_alibi_heads + if modality_cfg.learned_alibi_scale_per_head + else 1, + 1, + 1, + ), + modality_cfg.alibi_scale, + dtype=torch.float, + ), + requires_grad=modality_cfg.learned_alibi_scale, + ) + + if modality_cfg.learned_alibi and self.get_alibi_bias is not None: + assert modality_cfg.alibi_max_pos is not None + alibi_bias = self.get_alibi_bias( + batch_size=1, + time_steps=modality_cfg.alibi_max_pos, + heads=modality_cfg.num_alibi_heads, + scale=1.0, + dtype=torch.float, + device="cpu", + ) + self.alibi_bias = nn.Parameter(alibi_bias) + self.get_alibi_bias = partial( + _learned_alibi_bias, alibi_bias=self.alibi_bias + ) + + def upgrade_state_dict_named(self, state_dict, name): + k = f"{name}.alibi_scale" + if k in state_dict and state_dict[k].dim() == 4: + state_dict[k] = state_dict[k].unsqueeze(0) + + return state_dict + + def convert_padding_mask(self, x, padding_mask): + return padding_mask + + def decoder_input(self, x, mask_info: MaskInfo): + inp_drop = self.modality_cfg.decoder.input_dropout + if inp_drop > 0: + x = F.dropout(x, inp_drop, training=self.training, inplace=True) + + num_extra = self.modality_cfg.num_extra_tokens + + if mask_info is not None: + num_masked = mask_info.ids_restore.shape[1] - x.shape[1] + num_extra + + mask_tokens = x.new_empty( + x.size(0), + num_masked, + x.size(-1), + ).normal_(0, self.modality_cfg.mask_noise_std) + + x_ = torch.cat([x[:, num_extra:], mask_tokens], dim=1) + x = torch.gather(x_, dim=1, index=mask_info.ids_restore) + + if self.modality_cfg.decoder.add_positions_masked: + assert self.fixed_positional_encoder is not None + pos = self.fixed_positional_encoder(x, None) + x = x + (pos * mask_info.mask.unsqueeze(-1)) + else: + x = x[:, num_extra:] + + if self.modality_cfg.decoder.add_positions_all: + assert self.fixed_positional_encoder is not None + x = x + self.fixed_positional_encoder(x, None) + + return x, mask_info + + def local_features(self, features): + if self.local_grad_mult > 0: + if self.local_grad_mult == 1.0: + x = self.local_encoder(features) + else: + x = GradMultiply.apply( + self.local_encoder(features), self.local_grad_mult + ) + else: + with torch.no_grad(): + x = self.local_encoder(features) + + x = self.project_features(x) + return x + + def contextualized_features( + self, + x, + padding_mask, + mask, + remove_masked, + clone_batch: int = 1, + mask_seeds: Optional[torch.Tensor] = None, + precomputed_mask=None, + ): + + if padding_mask is not None: + padding_mask = self.convert_padding_mask(x, padding_mask) + + local_features = x + if mask and clone_batch == 1: + local_features = local_features.clone() + + orig_B, orig_T, _ = x.shape + pre_mask_B = orig_B + mask_info = None + + x_pos = None + if self.fixed_positional_encoder is not None: + x = x + self.fixed_positional_encoder(x, padding_mask) + + if mask: + if clone_batch > 1: + x = x.repeat_interleave(clone_batch, 0) + if mask_seeds is not None: + clone_hash = [ + int(hash((mask_seeds.seed, ind)) % 1e10) + for ind in range(clone_batch - 1) + ] + clone_hash = torch.tensor([0] + clone_hash).long().view(1, -1) + + id = mask_seeds.ids + id = id.repeat_interleave(clone_batch, 0) + id = id.view(-1, clone_batch) + clone_hash.to(id) + id = id.view(-1) + mask_seeds = MaskSeed( + seed=mask_seeds.seed, update=mask_seeds.update, ids=id + ) + if padding_mask is not None: + padding_mask = padding_mask.repeat_interleave(clone_batch, 0) + + x, mask_info = self.compute_mask( + x, + padding_mask, + mask_seed=mask_seeds, + apply=self.relative_positional_encoder is not None or not remove_masked, + precomputed_mask=precomputed_mask, + ) + + if self.relative_positional_encoder is not None: + x_pos = self.relative_positional_encoder(x) + + masked_padding_mask = padding_mask + if mask and remove_masked: + x = mask_info.x_unmasked + if x_pos is not None: + x = x + gather_unmasked(x_pos, mask_info) + + if padding_mask is not None and padding_mask.any(): + masked_padding_mask = gather_unmasked_mask(padding_mask, mask_info) + if not masked_padding_mask.any(): + masked_padding_mask = None + else: + masked_padding_mask = None + + elif x_pos is not None: + x = x + x_pos + + alibi_bias = None + alibi_scale = self.alibi_scale + + if self.get_alibi_bias is not None: + alibi_bias = self.get_alibi_bias( + batch_size=pre_mask_B, + time_steps=orig_T, + heads=self.modality_cfg.num_alibi_heads, + dtype=torch.float32, + device=x.device, + ) + + if alibi_scale is not None: + alibi_scale = alibi_scale.clamp_min(0) + if alibi_scale.size(0) == 1: + alibi_bias = alibi_bias * alibi_scale.squeeze(0).type_as(alibi_bias) + alibi_scale = None + + if clone_batch > 1: + alibi_bias = alibi_bias.repeat_interleave(clone_batch, 0) + + if mask_info is not None and remove_masked: + alibi_bias = masked_alibi(alibi_bias, mask_info) + + if self.extra_tokens is not None: + num = self.extra_tokens.size(1) + x = torch.cat([self.extra_tokens.expand(x.size(0), -1, -1), x], dim=1) + if masked_padding_mask is not None: + # B x T + masked_padding_mask = F.pad(masked_padding_mask, (num, 0)) + if alibi_bias is not None: + # B x H x T x T + alibi_bias = F.pad(alibi_bias, (num, 0, num, 0)) + + x = self.context_encoder( + x, + masked_padding_mask, + alibi_bias, + alibi_scale[: self.modality_cfg.prenet_depth] + if alibi_scale is not None + else None, + ) + + return { + "x": x, + "local_features": local_features, + "padding_mask": masked_padding_mask, + "alibi_bias": alibi_bias, + "alibi_scale": alibi_scale[self.modality_cfg.prenet_depth :] + if alibi_scale is not None and alibi_scale.size(0) > 1 + else alibi_scale, + "encoder_mask": mask_info, + } + + def forward( + self, + features, + padding_mask, + mask: bool, + remove_masked: bool, + clone_batch: int = 1, + mask_seeds: Optional[torch.Tensor] = None, + precomputed_mask=None, + ): + x = self.local_features(features) + return self.contextualized_features( + x, + padding_mask, + mask, + remove_masked, + clone_batch, + mask_seeds, + precomputed_mask, + ) + + def reset_parameters(self): + pass + + def compute_mask( + self, + x, + padding_mask, + mask_seed: Optional[MaskSeed], + apply, + precomputed_mask, + ): + if precomputed_mask is not None: + mask = precomputed_mask + mask_info = self.make_maskinfo(x, mask) + else: + B, T, C = x.shape + cfg = self.modality_cfg + + mask_prob = cfg.mask_prob + + if ( + cfg.mask_prob_min is not None + and cfg.mask_prob_min >= 0 + and cfg.mask_prob_min < mask_prob + ): + mask_prob = np.random.uniform(cfg.mask_prob_min, mask_prob) + + if mask_prob > 0: + if cfg.mask_length == 1: + mask_info = random_masking(x, mask_prob, mask_seed) + else: + if self.modality_cfg.inverse_mask: + mask_prob = 1 - mask_prob + + mask = compute_mask_indices( + (B, T), + padding_mask, + mask_prob, + cfg.mask_length, + min_masks=1, + require_same_masks=True, + mask_dropout=cfg.mask_dropout, + add_masks=cfg.add_masks, + seed=mask_seed.seed if mask_seed is not None else None, + epoch=mask_seed.update if mask_seed is not None else None, + indices=mask_seed.ids if mask_seed is not None else None, + ) + + mask = torch.from_numpy(mask).to(device=x.device) + if self.modality_cfg.inverse_mask: + mask = 1 - mask + mask_info = self.make_maskinfo(x, mask) + else: + mask_info = None + + if apply: + x = self.apply_mask(x, mask_info) + + return x, mask_info + + def make_maskinfo(self, x, mask, shape=None): + if shape is None: + B, T, D = x.shape + else: + B, T, D = shape + + mask = mask.to(torch.uint8) + ids_shuffle = mask.argsort(dim=1) + ids_restore = ids_shuffle.argsort(dim=1).unsqueeze(-1).expand(-1, -1, D) + + len_keep = T - mask[0].sum() + if self.modality_cfg.keep_masked_pct > 0: + len_keep += round((T - int(len_keep)) * self.modality_cfg.keep_masked_pct) + + ids_keep = ids_shuffle[:, :len_keep] + + if shape is not None: + x_unmasked = None + else: + ids_keep = ids_keep.unsqueeze(-1).expand(-1, -1, D) + x_unmasked = torch.gather(x, dim=1, index=ids_keep) + + mask_info = MaskInfo( + x_unmasked=x_unmasked, + mask=mask, + ids_restore=ids_restore, + ids_keep=ids_keep, + ) + return mask_info + + def apply_mask(self, x, mask_info): + cfg = self.modality_cfg + B, T, C = x.shape + + if mask_info is not None: + mask = mask_info.mask + if cfg.encoder_zero_mask: + x = x * (1 - mask.type_as(x).unsqueeze(-1)) + else: + num_masks = mask.sum().item() + masks = x.new_empty(num_masks, x.size(-1)).normal_( + 0, cfg.mask_noise_std + ) + x = index_put(x, mask, masks) + if cfg.mask_channel_prob > 0: + mask_channel = compute_mask_indices( + (B, C), + None, + cfg.mask_channel_prob, + cfg.mask_channel_length, + ) + mask_channel = ( + torch.from_numpy(mask_channel) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x = index_put(x, mask_channel, 0) + return x + + def remove_pretraining_modules(self, keep_decoder=False): + if not keep_decoder: + self.decoder = None + + +def get_annealed_rate(start, end, curr_step, total_steps): + if curr_step >= total_steps: + return end + r = end - start + pct_remaining = 1 - curr_step / total_steps + return end - r * pct_remaining + + +# adapted from MAE +def random_masking(x, mask_ratio, mask_seed: Optional[MaskSeed]): + N, L, D = x.shape # batch, length, dim + len_keep = int(L * (1 - mask_ratio)) + + generator = None + if mask_seed is not None: + seed = int( + hash((mask_seed.seed, mask_seed.update, mask_seed.ids.sum().item())) % 1e6 + ) + generator = torch.Generator(device=x.device) + generator.manual_seed(seed) + + noise = torch.rand(N, L, generator=generator, device=x.device) # noise in [0, 1] + + # sort noise for each sample + ids_shuffle = noise.argsort(dim=1) # ascend: small is keep, large is remove + ids_restore = ids_shuffle.argsort(dim=1) + + # keep the first subset + ids_keep = ids_shuffle[:, :len_keep] + ids_keep = ids_keep.unsqueeze(-1).expand(-1, -1, D) + x_unmasked = torch.gather(x, dim=1, index=ids_keep) + + # generate the binary mask: 0 is keep, 1 is remove + mask = torch.ones([N, L], dtype=x.dtype, device=x.device) + mask[:, :len_keep] = 0 + # unshuffle to get the binary mask + mask = torch.gather(mask, dim=1, index=ids_restore) + + ids_restore = ids_restore.unsqueeze(-1).expand(-1, -1, D) + + return MaskInfo( + x_unmasked=x_unmasked, mask=mask, ids_restore=ids_restore, ids_keep=ids_keep + ) + + +def gather_unmasked(x: torch.Tensor, mask_info: MaskInfo) -> torch.Tensor: + return torch.gather( + x, + dim=1, + index=mask_info.ids_keep, + ) + + +def gather_unmasked_mask(x: torch.Tensor, mask_info: MaskInfo) -> torch.Tensor: + return torch.gather( + x, + dim=1, + index=mask_info.ids_keep[..., 0], # ignore the feature dimension + ) + + +def get_alibi( + max_positions: int, + attention_heads: int, + dims: int = 1, + distance: str = "manhattan", +): + def get_slopes(n): + def get_slopes_power_of_2(n): + start = 2 ** (-(2 ** -(math.log2(n) - 3))) + ratio = start + return [start * ratio**i for i in range(n)] + + # In the paper, we only train models that have 2^a heads for some + # a. This function has some good properties that only occur when + # the input is a power of 2. To maintain that even when the number + # of heads is not a power of 2, we use this workaround. + if math.log2(n).is_integer(): + return get_slopes_power_of_2(n) + else: + closest_power_of_2 = 2 ** math.floor(math.log2(n)) + return ( + get_slopes_power_of_2(closest_power_of_2) + + get_slopes(2 * closest_power_of_2)[0::2][: n - closest_power_of_2] + ) + + maxpos = max_positions + attn_heads = attention_heads + slopes = torch.Tensor(get_slopes(attn_heads)) + + if dims == 1: + # prepare alibi position linear bias. Note that wav2vec2 is non + # autoregressive model so we want a symmetric mask with 0 on the + # diagonal and other wise linear decreasing valuees + pos_bias = ( + torch.abs( + torch.arange(maxpos).unsqueeze(0) - torch.arange(maxpos).unsqueeze(1) + ) + * -1 + ) + elif dims == 2: + if distance == "manhattan": + df = lambda x1, y1, x2, y2: abs(x1 - x2) + abs(y1 - y2) + elif distance == "euclidean": + df = lambda x1, y1, x2, y2: math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) + + n = math.sqrt(max_positions) + assert n.is_integer(), n + n = int(n) + + pos_bias = torch.zeros((max_positions, max_positions)) + + for i in range(n): + for j in range(n): + for k in range(n): + for l in range(n): + new_x = i * n + j + new_y = k * n + l + pos_bias[new_x, new_y] = -df(i, j, k, l) + + else: + raise Exception(f"unsupported number of alibi dims: {dims}") + + alibi_bias = slopes.unsqueeze(1).unsqueeze(1) * pos_bias.unsqueeze(0).expand( + attn_heads, -1, -1 + ) + + return alibi_bias + + +def get_alibi_bias( + alibi_biases, + batch_size, + time_steps, + heads, + dtype, + device, + dims=1, + distance="manhattan", +): + cache_key = f"{dims}_{heads}_{distance}" + + buffered = alibi_biases.get(cache_key, None) + + target_size = heads * batch_size + if ( + buffered is None + or buffered.size(0) < target_size + or buffered.size(1) < time_steps + or buffered.dtype != dtype + or buffered.device != device + ): + bt = max(time_steps, buffered.size(1) if buffered is not None else 0) + bn = max(target_size, buffered.size(0) if buffered is not None else 0) // heads + + buffered = ( + get_alibi(bt, heads, dims=dims, distance=distance) + .to(dtype=dtype, device=device) + .repeat(bn, 1, 1) + ) + + alibi_biases[cache_key] = buffered + + b = buffered[:target_size, :time_steps, :time_steps] + b = b.view(batch_size, heads, time_steps, time_steps) + return b + + +def _learned_alibi_bias( + alibi_bias, + batch_size, + time_steps, + heads, + scale, + dtype, + device, +): + assert alibi_bias.size(1) == heads, alibi_bias.shape + assert alibi_bias.dtype == dtype, alibi_bias.dtype + assert alibi_bias.device == device, alibi_bias.device + + if alibi_bias.size(-1) < time_steps: + psz = math.ceil((time_steps - alibi_bias.size(-1)) / 2) + alibi_bias = F.pad(alibi_bias, (psz, psz, psz, psz), mode="replicate") + + alibi_bias = alibi_bias.expand(batch_size, -1, -1, -1) * scale + return alibi_bias[..., :time_steps, :time_steps] + + +def masked_alibi(alibi_bias, mask_info): + H = alibi_bias.size(1) + + orig_bias = alibi_bias + + index = mask_info.ids_keep.unsqueeze(1)[..., 0].unsqueeze(-1) + alibi_bias = torch.gather( + orig_bias, + dim=-2, + index=index.expand(-1, H, -1, mask_info.ids_restore.size(1)), + ) + alibi_bias = torch.gather( + alibi_bias, + dim=-1, + index=index.transpose(-1, -2).expand(-1, H, alibi_bias.size(-2), -1), + ) + + return alibi_bias diff --git a/fairseq/examples/data2vec/models/modalities/images.py b/fairseq/examples/data2vec/models/modalities/images.py new file mode 100644 index 0000000..a6b738c --- /dev/null +++ b/fairseq/examples/data2vec/models/modalities/images.py @@ -0,0 +1,256 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from functools import partial +from dataclasses import dataclass +from typing import Callable, Dict, Optional +from timm.models.layers import to_2tuple +from fairseq.tasks import FairseqTask +from examples.data2vec.models.mae import get_2d_sincos_pos_embed, PatchEmbed +from .base import ( + D2vModalityConfig, + ModalitySpecificEncoder, + get_alibi_bias, + MaskSeed, +) +from .modules import ( + BlockEncoder, + Decoder2d, + FixedPositionalEncoder, + TransformerDecoder, + EncDecTransformerDecoder, +) +from examples.data2vec.data.modality import Modality + + +@dataclass +class D2vImageConfig(D2vModalityConfig): + type: Modality = Modality.IMAGE + + input_size: int = 224 + in_chans: int = 3 + patch_size: int = 16 + embed_dim: int = 768 + + alibi_dims: int = 2 + alibi_distance: str = "manhattan" + + fixed_positions: bool = True + + transformer_decoder: bool = False + enc_dec_transformer: bool = False + + +class ImageEncoder(ModalitySpecificEncoder): + + modality_cfg: D2vImageConfig + + def __init__( + self, + modality_cfg: D2vImageConfig, + embed_dim: int, + make_block: Callable[[float, Optional[int], Optional[int]], nn.ModuleList], + norm_layer: Callable[[int], nn.LayerNorm], + layer_norm_first: bool, + alibi_biases: Dict, + task: Optional[FairseqTask], + ): + + img_size = to_2tuple(modality_cfg.input_size) + patch_size = to_2tuple(modality_cfg.patch_size) + num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) + + local_encoder = PatchEmbed( + modality_cfg.input_size, + modality_cfg.patch_size, + modality_cfg.in_chans, + modality_cfg.embed_dim, + ) + + w = local_encoder.proj.weight.data + torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1])) + + if modality_cfg.embed_dim != embed_dim: + local_encoder = nn.Sequential( + local_encoder, + nn.Linear(modality_cfg.embed_dim, embed_dim), + ) + + project_features = nn.Identity() + + pos_embed = nn.Parameter( + torch.zeros(1, num_patches, embed_dim), requires_grad=False + ) + + side_n = int(num_patches ** 0.5) + + emb = get_2d_sincos_pos_embed( + pos_embed.shape[-1], + side_n, + cls_token=False, + ) + pos_embed.data.copy_(torch.from_numpy(emb).float().unsqueeze(0)) + fixed_positional_encoder = ( + FixedPositionalEncoder(pos_embed) if modality_cfg.fixed_positions else None + ) + + dpr = np.linspace( + modality_cfg.start_drop_path_rate, + modality_cfg.end_drop_path_rate, + modality_cfg.prenet_depth, + ) + + context_encoder = BlockEncoder( + nn.ModuleList(make_block(dpr[i]) for i in range(modality_cfg.prenet_depth)), + norm_layer(embed_dim) if not layer_norm_first else None, + layer_norm_first, + modality_cfg.prenet_layerdrop, + modality_cfg.prenet_dropout, + ) + + if modality_cfg.transformer_decoder: + if modality_cfg.enc_dec_transformer: + decoder = EncDecTransformerDecoder(modality_cfg.decoder, embed_dim) + else: + dec_enc = BlockEncoder( + nn.ModuleList( + make_block(0, modality_cfg.decoder.decoder_dim, 8) + for _ in range(modality_cfg.decoder.decoder_layers) + ), + None, + layer_norm_first, + 0, + 0, + ) + decoder = TransformerDecoder(modality_cfg.decoder, embed_dim, dec_enc) + else: + decoder = ( + Decoder2d(modality_cfg.decoder, embed_dim, side_n, side_n) + if modality_cfg.decoder is not None + else None + ) + + alibi_bias_fn = partial( + get_alibi_bias, + alibi_biases=alibi_biases, + heads=modality_cfg.num_alibi_heads, + dims=modality_cfg.alibi_dims, + distance=modality_cfg.alibi_distance, + ) + + super().__init__( + modality_cfg=modality_cfg, + embed_dim=embed_dim, + local_encoder=local_encoder, + project_features=project_features, + fixed_positional_encoder=fixed_positional_encoder, + relative_positional_encoder=None, + context_encoder=context_encoder, + decoder=decoder, + get_alibi_bias=alibi_bias_fn, + ) + + def reset_parameters(self): + super().reset_parameters() + if self.decoder is not None: + self.decoder.reset_parameters() + + @torch.no_grad() + def patchify(self, imgs): + """ + imgs: (N, 3, H, W) + x: (N, L, patch_size**2 *3) + """ + p = self.modality_cfg.patch_size + h = w = imgs.shape[2] // p + x = imgs.reshape(shape=(imgs.shape[0], 3, h, p, w, p)) + x = torch.einsum("nchpwq->nhwpqc", x) + x = x.reshape(shape=(imgs.shape[0], h * w, p ** 2 * 3)) + + return x + + @torch.no_grad() + def unpatchify(self, x): + """ + x: (N, L, patch_size**2 *3) + imgs: (N, 3, H, W) + """ + p = self.modality_cfg.patch_size + h = w = int(x.shape[1] ** 0.5) + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, 3)) + x = torch.einsum("nhwpqc->nchpwq", x) + imgs = x.reshape(shape=(x.shape[0], 3, h * p, h * p)) + return imgs + + def compute_mask( + self, + x, + padding_mask, + mask_seed: Optional[MaskSeed], + apply, + shape=None, + precomputed_mask=None, + ): + mlen = self.modality_cfg.mask_length + if mlen <= 1: + return super().compute_mask( + x, padding_mask, mask_seed, apply, precomputed_mask + ) + + if precomputed_mask is not None: + mask = precomputed_mask + else: + from fairseq.data.data_utils import compute_block_mask_2d + + if shape is not None: + B, L, D = shape + else: + B, L, D = x.shape + + mask = compute_block_mask_2d( + shape=(B, L), + mask_prob=self.modality_cfg.mask_prob, + mask_length=self.modality_cfg.mask_length, + mask_prob_adjust=self.modality_cfg.mask_prob_adjust, + inverse_mask=self.modality_cfg.inverse_mask, + require_same_masks=True, + mask_dropout=self.modality_cfg.mask_dropout, + ) + + mask_info = self.make_maskinfo(x, mask, shape) + if apply: + x = self.apply_mask(x, mask_info) + + return x, mask_info + + def decoder_input(self, x, mask_info): + if ( + not self.modality_cfg.transformer_decoder + or not self.modality_cfg.enc_dec_transformer + ): + return super().decoder_input(x, mask_info) + + inp_drop = self.modality_cfg.decoder.input_dropout + if inp_drop > 0: + x = F.dropout(x, inp_drop, training=self.training, inplace=True) + + kv = x[:, self.modality_cfg.num_extra_tokens :] + + assert self.fixed_positional_encoder is not None + pos = self.fixed_positional_encoder(x, None).expand(x.size(0), -1, -1) + + mask = mask_info.mask.bool() + if self.modality_cfg.decoder.add_positions_all: + kv = kv + pos[~mask].view(kv.shape) + + q = pos[mask].view(x.size(0), -1, x.size(-1)) + + return q, kv diff --git a/fairseq/examples/data2vec/models/modalities/modules.py b/fairseq/examples/data2vec/models/modalities/modules.py new file mode 100644 index 0000000..a4e1a4e --- /dev/null +++ b/fairseq/examples/data2vec/models/modalities/modules.py @@ -0,0 +1,589 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from dataclasses import dataclass +from fairseq.modules import ( + LayerNorm, + SamePad, + SamePad2d, + TransposeLast, +) + + +@dataclass +class D2vDecoderConfig: + decoder_dim: int = 384 + decoder_groups: int = 16 + decoder_kernel: int = 5 + decoder_layers: int = 5 + input_dropout: float = 0.1 + + add_positions_masked: bool = False + add_positions_all: bool = False + + decoder_residual: bool = True + projection_layers: int = 1 + projection_ratio: float = 2.0 + + +class FixedPositionalEncoder(nn.Module): + def __init__(self, pos_embed): + super().__init__() + self.positions = pos_embed + + def forward(self, x, padding_mask): + return self.positions + + +class TextFeatPositionalEncoder(nn.Module): + """ + Original encoder expects (B, T) long input. This module wraps it to take + local_encoder output which are (B, T, D) float tensors + """ + + def __init__(self, pos_encoder): + super().__init__() + self.pos_encoder = pos_encoder + + def forward(self, x, padding_mask): + # assume padded token embeddings are 0s + # TODO: consider using padding_mask as input + return self.pos_encoder(x[..., 0]) + + +class BlockEncoder(nn.Module): + def __init__(self, blocks, norm_layer, layer_norm_first, layerdrop, dropout): + super().__init__() + self.blocks = blocks + self.norm = norm_layer + self.layer_norm_first = layer_norm_first + self.layerdrop = layerdrop + self.dropout = nn.Dropout(dropout, inplace=True) + + def forward(self, x, padding_mask, alibi_bias, alibi_scale): + if self.norm is not None and not self.layer_norm_first: + x = self.norm(x) + + x = self.dropout(x) + + for i, blk in enumerate(self.blocks): + if ( + not self.training + or self.layerdrop == 0 + or (np.random.random() > self.layerdrop) + ): + ab = alibi_bias + if ab is not None and alibi_scale is not None: + scale = ( + alibi_scale[i] + if alibi_scale.size(0) > 1 + else alibi_scale.squeeze(0) + ) + ab = ab * scale.type_as(ab) + x, _ = blk(x, padding_mask, ab) + + if self.norm is not None and self.layer_norm_first: + x = self.norm(x) + + return x + + +class DecoderBase(nn.Module): + decoder_cfg: D2vDecoderConfig + + def __init__(self, cfg: D2vDecoderConfig): + super().__init__() + + self.decoder_cfg = cfg + + def reset_parameters(self): + for mod in self.proj.modules(): + if isinstance(mod, nn.Linear): + mod.reset_parameters() + + def add_residual(self, x, residual, i, mask_info): + if ( + residual is None + or not self.decoder_cfg.decoder_residual + or residual.size(1) != x.size(1) + ): + return x + + ret = x + residual + + return ret + + +class Decoder1d(DecoderBase): + def __init__(self, cfg: D2vDecoderConfig, input_dim): + super().__init__(cfg) + + def make_block(in_dim): + block = [ + nn.Conv1d( + in_dim, + cfg.decoder_dim, + kernel_size=cfg.decoder_kernel, + padding=cfg.decoder_kernel // 2, + groups=cfg.decoder_groups, + ), + SamePad(cfg.decoder_kernel), + TransposeLast(), + LayerNorm(cfg.decoder_dim, elementwise_affine=False), + TransposeLast(), + nn.GELU(), + ] + + return nn.Sequential(*block) + + self.blocks = nn.Sequential( + *[ + make_block(input_dim if i == 0 else cfg.decoder_dim) + for i in range(cfg.decoder_layers) + ] + ) + + projs = [] + curr_dim = cfg.decoder_dim + for i in range(cfg.projection_layers - 1): + next_dim = int(curr_dim * cfg.projection_ratio) if i == 0 else curr_dim + projs.append(nn.Linear(curr_dim, next_dim)) + projs.append(nn.GELU()) + curr_dim = next_dim + projs.append(nn.Linear(curr_dim, input_dim)) + if len(projs) == 1: + self.proj = projs[0] + else: + self.proj = nn.Sequential(*projs) + + def forward(self, x, mask_info): + + x = x.transpose(1, 2) + + residual = x + + for i, layer in enumerate(self.blocks): + x = layer(x) + x = self.add_residual(x, residual, i, mask_info) + residual = x + + x = x.transpose(1, 2) + x = self.proj(x) + return x + + +class Decoder2d(DecoderBase): + def __init__(self, cfg: D2vDecoderConfig, input_dim, h_size, w_size): + super().__init__(cfg) + + self.h_size = h_size + self.w_size = w_size + + def make_block(in_dim): + block = [ + nn.Conv2d( + in_dim, + cfg.decoder_dim, + kernel_size=cfg.decoder_kernel, + padding=cfg.decoder_kernel // 2, + groups=cfg.decoder_groups, + ), + SamePad2d(cfg.decoder_kernel), + TransposeLast(tranpose_dim=-3), + LayerNorm(cfg.decoder_dim, elementwise_affine=False), + TransposeLast(tranpose_dim=-3), + nn.GELU(), + ] + + return nn.Sequential(*block) + + self.blocks = nn.Sequential( + *[ + make_block(input_dim if i == 0 else cfg.decoder_dim) + for i in range(cfg.decoder_layers) + ] + ) + + self.proj = nn.Linear(cfg.decoder_dim, input_dim) + + def forward(self, x, mask_info): + B, T, C = x.shape + + x = x.transpose(1, 2).reshape(B, C, self.h_size, self.w_size) + + residual = x + + for i, layer in enumerate(self.blocks): + x = layer(x) + x = self.add_residual(x, residual, i, mask_info) + residual = x + + x = x.reshape(B, -1, T).transpose(1, 2) + x = self.proj(x) + return x + + +class TransformerDecoder(nn.Module): + decoder_cfg: D2vDecoderConfig + + def __init__(self, cfg: D2vDecoderConfig, input_dim, encoder): + super().__init__() + + self.decoder_cfg = cfg + + self.input_proj = nn.Linear(input_dim, cfg.decoder_dim) + + self.encoder = encoder + + self.proj = nn.Linear(cfg.decoder_dim, input_dim) + + def reset_parameters(self): + from fairseq.modules.transformer_sentence_encoder import init_bert_params + + self.apply(init_bert_params) + + def forward(self, x, mask_info): + x = self.input_proj(x) + x = self.encoder(x, None, None, 1) + x = self.proj(x) + return x + + +class AltBlock(nn.Module): + def __init__( + self, + dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + mlp_drop=0.0, + post_mlp_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + layer_norm_first=True, + ffn_targets=False, + cosine_attention=False, + ): + super().__init__() + + self.layer_norm_first = layer_norm_first + self.ffn_targets = ffn_targets + + from timm.models.vision_transformer import DropPath, Mlp + + self.norm1 = norm_layer(dim) + self.attn = AltAttention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + cosine_attention=cosine_attention, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=mlp_drop, + ) + self.post_mlp_dropout = nn.Dropout(post_mlp_drop, inplace=False) + + def forward(self, x, padding_mask=None, alibi_bias=None): + if self.layer_norm_first: + x = x + self.drop_path(self.attn(self.norm1(x), padding_mask, alibi_bias)) + r = x = self.mlp(self.norm2(x)) + t = x + x = r + self.drop_path(self.post_mlp_dropout(x)) + if not self.ffn_targets: + t = x + else: + x = x + self.drop_path(self.attn(x, padding_mask, alibi_bias)) + r = x = self.norm1(x) + x = self.mlp(x) + t = x + x = self.norm2(r + self.drop_path(self.post_mlp_dropout(x))) + if not self.ffn_targets: + t = x + + return x, t + + +class AltAttention(nn.Module): + def __init__( + self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + cosine_attention=False, + ): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.cosine_attention = cosine_attention + + if cosine_attention: + self.logit_scale = nn.Parameter( + torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True + ) + + def forward(self, x, padding_mask=None, alibi_bias=None): + B, N, C = x.shape + qkv = ( + self.qkv(x) + .reshape(B, N, 3, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) # qkv x B x H x L x D + ) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + dtype = q.dtype + + if self.cosine_attention: + # cosine attention + attn = F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1) + logit_scale = torch.clamp( + self.logit_scale, max=torch.log(torch.tensor(1.0 / 0.01)) + ).exp() + attn = attn * logit_scale + else: + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + if alibi_bias is not None: + attn = attn.type_as(alibi_bias) + attn[:, : alibi_bias.size(1)] += alibi_bias + + if padding_mask is not None and padding_mask.any(): + attn = attn.masked_fill( + padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + + attn = attn.softmax(dim=-1, dtype=torch.float32).to(dtype=dtype) + attn = self.attn_drop(attn) + x = (attn @ v).transpose(1, 2) # + x = x.reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class EncDecAttention(nn.Module): + def __init__( + self, + q_dim, + kv_dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + cosine_attention=False, + ): + super().__init__() + self.num_heads = num_heads + head_dim = q_dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.q_proj = nn.Linear(q_dim, q_dim, bias=qkv_bias) + self.kv_proj = nn.Linear(kv_dim, 2 * q_dim, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(q_dim, q_dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.cosine_attention = cosine_attention + + if cosine_attention: + self.logit_scale = nn.Parameter( + torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True + ) + + def forward(self, q, kv, padding_mask=None, alibi_bias=None): + B, N, C = q.shape + + q = ( + self.q_proj(q) + .reshape(B, N, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + ) # B x H x L x D + kv = ( + self.kv_proj(kv) + .reshape(B, -1, 2, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) # kv x B x H x L x D + k, v = ( + kv[0], + kv[1], + ) # make torchscript happy (cannot use tensor as tuple) + + dtype = q.dtype + + if self.cosine_attention: + # cosine attention + attn = F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1) + logit_scale = torch.clamp( + self.logit_scale, max=torch.log(torch.tensor(1.0 / 0.01)) + ).exp() + attn = attn * logit_scale + else: + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + if alibi_bias is not None: + attn = attn.type_as(alibi_bias) + attn[:, : alibi_bias.size(1)] += alibi_bias + + if padding_mask is not None and padding_mask.any(): + attn = attn.masked_fill( + padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + + attn = attn.softmax(dim=-1, dtype=torch.float32).to(dtype=dtype) + attn = self.attn_drop(attn) + x = (attn @ v).transpose(1, 2) # + x = x.reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class EncDecBlock(nn.Module): + def __init__( + self, + q_dim, + kv_dim, + num_heads, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + mlp_drop=0.0, + post_mlp_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + layer_norm_first=True, + cosine_attention=False, + first_residual=True, + ): + super().__init__() + + self.layer_norm_first = layer_norm_first + + from timm.models.vision_transformer import DropPath, Mlp + + self.norm1 = norm_layer(q_dim) + self.attn = EncDecAttention( + q_dim, + kv_dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + cosine_attention=cosine_attention, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(q_dim) + mlp_hidden_dim = int(q_dim * mlp_ratio) + self.mlp = Mlp( + in_features=q_dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=mlp_drop, + ) + self.post_mlp_dropout = nn.Dropout(post_mlp_drop, inplace=False) + self.first_residual = first_residual + + def forward(self, q, kv, padding_mask=None, alibi_bias=None): + r = q if self.first_residual else 0 + if self.layer_norm_first: + x = r + self.drop_path( + self.attn(self.norm1(q), kv, padding_mask, alibi_bias) + ) + r = x = self.mlp(self.norm2(x)) + x = r + self.drop_path(self.post_mlp_dropout(x)) + else: + x = r + self.drop_path(self.attn(q, kv, padding_mask, alibi_bias)) + r = x = self.norm1(x) + x = self.mlp(x) + x = self.norm2(r + self.drop_path(self.post_mlp_dropout(x))) + + return x + + +class EncDecTransformerDecoder(nn.Module): + def __init__(self, cfg: D2vDecoderConfig, input_dim): + super().__init__() + + self.input_proj = nn.Linear(input_dim, cfg.decoder_dim) + + self.blocks = nn.Sequential( + *[ + EncDecBlock( + q_dim=cfg.decoder_dim, + kv_dim=input_dim, + num_heads=8, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + mlp_drop=0.0, + post_mlp_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + layer_norm_first=False, + cosine_attention=False, + first_residual=i > 0, + ) + for i in range(cfg.decoder_layers) + ] + ) + + self.proj = nn.Linear(cfg.decoder_dim, input_dim) + + def reset_parameters(self): + from fairseq.modules.transformer_sentence_encoder import init_bert_params + + self.apply(init_bert_params) + + def forward(self, x, kv): + x = self.input_proj(x) + for i, layer in enumerate(self.blocks): + x = layer(x, kv) + + x = self.proj(x) + return x diff --git a/fairseq/examples/data2vec/models/modalities/text.py b/fairseq/examples/data2vec/models/modalities/text.py new file mode 100644 index 0000000..adfac1c --- /dev/null +++ b/fairseq/examples/data2vec/models/modalities/text.py @@ -0,0 +1,161 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass +from functools import partial +from typing import Callable, Dict, Optional + +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from fairseq.modules import PositionalEmbedding, FairseqDropout, LayerNorm +from fairseq.tasks import FairseqTask +from .base import D2vModalityConfig, ModalitySpecificEncoder, get_alibi_bias +from .modules import BlockEncoder, Decoder1d +from examples.data2vec.data.modality import Modality + + +@dataclass +class D2vTextConfig(D2vModalityConfig): + type: Modality = Modality.TEXT + max_source_positions: int = 512 + learned_pos: bool = True + dropout: float = 0.1 # used for both local_encoder and contextualized encoder. tied with global transformer in data2vec_text + + no_scale_embedding: bool = True + layernorm_embedding: bool = True + no_token_positional_embeddings: bool = False + + +class TextEncoder(ModalitySpecificEncoder): + + modality_cfg: D2vTextConfig + + def __init__( + self, + modality_cfg: D2vTextConfig, + embed_dim: int, + make_block: Callable[[float], nn.ModuleList], + norm_layer: Callable[[int], nn.LayerNorm], + layer_norm_first: bool, + alibi_biases: Dict, + task: Optional[FairseqTask], + ): + self.pad_idx = task.source_dictionary.pad() + self.vocab_size = len(task.source_dictionary) + + local_encoder = TextLocalEncoder( + vocab_size=self.vocab_size, + embed_dim=embed_dim, + max_source_positions=modality_cfg.max_source_positions, + pad_idx=self.pad_idx, + no_scale_embedding=modality_cfg.no_scale_embedding, + layernorm_embedding=modality_cfg.layernorm_embedding, + dropout=modality_cfg.dropout, + no_token_positional_embeddings=modality_cfg.no_token_positional_embeddings, + learned_pos=modality_cfg.learned_pos, + ) + dpr = np.linspace( + modality_cfg.start_drop_path_rate, + modality_cfg.end_drop_path_rate, + modality_cfg.prenet_depth, + ) + context_encoder = BlockEncoder( + nn.ModuleList(make_block(dpr[i]) for i in range(modality_cfg.prenet_depth)), + norm_layer(embed_dim) + if not layer_norm_first and modality_cfg.prenet_depth > 0 + else None, + layer_norm_first, + modality_cfg.prenet_layerdrop, + modality_cfg.prenet_dropout if modality_cfg.prenet_depth > 0 else 0.0, + ) + decoder = ( + Decoder1d(modality_cfg.decoder, embed_dim) + if modality_cfg.decoder is not None + else None + ) + + alibi_bias_fn = partial(get_alibi_bias, alibi_biases=alibi_biases) + + super().__init__( + modality_cfg=modality_cfg, + embed_dim=embed_dim, + local_encoder=local_encoder, + project_features=nn.Identity(), + fixed_positional_encoder=None, + relative_positional_encoder=None, + context_encoder=context_encoder, + decoder=decoder, + get_alibi_bias=alibi_bias_fn, + ) + + def reset_parameters(self): + super().reset_parameters() + + def convert_padding_mask(self, x, padding_mask): + if padding_mask is None or padding_mask.size(1) == x.size(1): + return padding_mask + + diff = self.downsample - padding_mask.size(1) % self.downsample + if 0 < diff < self.downsample: + padding_mask = F.pad(padding_mask, (0, diff), value=True) + + padding_mask = padding_mask.view(padding_mask.size(0), -1, self.downsample) + padding_mask = padding_mask.all(-1) + if padding_mask.size(1) > x.size(1): + padding_mask = padding_mask[:, : x.size(1)] + + assert x.size(1) == padding_mask.size( + 1 + ), f"{x.size(1), padding_mask.size(1), diff, self.downsample}" + + return padding_mask + + +class TextLocalEncoder(nn.Module): + def __init__( + self, + vocab_size, + embed_dim, + max_source_positions, + pad_idx, + no_scale_embedding, + layernorm_embedding, + dropout, + no_token_positional_embeddings, + learned_pos, + ): + super().__init__() + self.pad_idx = pad_idx + self.dropout_module = FairseqDropout(dropout) + + self.embed_tokens = nn.Embedding(vocab_size, embed_dim, pad_idx) + self.embed_scale = 1.0 if no_scale_embedding else math.sqrt(embed_dim) + self.embed_positions = ( + PositionalEmbedding( + max_source_positions, + embed_dim, + pad_idx, + learned=learned_pos, + ) + if not no_token_positional_embeddings + else None + ) + self.embed_scale = 1.0 if no_scale_embedding else math.sqrt(embed_dim) + + self.layernorm_embedding = None + if layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim) + + def forward(self, src_tokens): + x = self.embed_scale * self.embed_tokens(src_tokens) + if self.embed_positions is not None: + x = x + self.embed_positions(src_tokens) + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + return x diff --git a/fairseq/examples/data2vec/models/utils.py b/fairseq/examples/data2vec/models/utils.py new file mode 100644 index 0000000..0e2f240 --- /dev/null +++ b/fairseq/examples/data2vec/models/utils.py @@ -0,0 +1,55 @@ +import math +import torch + +def get_alibi( + max_positions: int, + attention_heads: int, +): + def get_slopes(n): + def get_slopes_power_of_2(n): + start = 2 ** (-(2 ** -(math.log2(n) - 3))) + ratio = start + return [start * ratio ** i for i in range(n)] + + # In the paper, we only train models that have 2^a heads for some + # a. This function has some good properties that only occur when + # the input is a power of 2. To maintain that even when the number + # of heads is not a power of 2, we use this workaround. + if math.log2(n).is_integer(): + return get_slopes_power_of_2(n) + else: + closest_power_of_2 = 2 ** math.floor(math.log2(n)) + return ( + get_slopes_power_of_2(closest_power_of_2) + + get_slopes(2 * closest_power_of_2)[0::2][: n - closest_power_of_2] + ) + + maxpos = max_positions + attn_heads = attention_heads + slopes = torch.Tensor(get_slopes(attn_heads)) + # prepare alibi position linear bias. Note that wav2vec2 is non + # autoregressive model so we want a symmetric mask with 0 on the + # diagonal and other wise linear decreasing valuees + pos_bias = ( + torch.abs( + torch.arange(maxpos).unsqueeze(0) - torch.arange(maxpos).unsqueeze(1) + ) + * -1 + ) + alibi_bias = slopes.unsqueeze(1).unsqueeze(1) * pos_bias.unsqueeze(0).expand( + attn_heads, -1, -1 + ) + return alibi_bias + +def masked_alibi(alibi_bias, mask_indices, orig_B, orig_T): + alibi_bias = alibi_bias.view(orig_B, -1, orig_T, orig_T) + H = alibi_bias.size(1) + alibi_mask = mask_indices.unsqueeze(1) + alibi_bias = alibi_bias.masked_select(alibi_mask.unsqueeze(-1)) + alibi_bias = alibi_bias.view(orig_B, H, -1, orig_T) + M = alibi_bias.size(-2) + alibi_bias = alibi_bias.masked_select(alibi_mask.unsqueeze(-2)) + alibi_bias = alibi_bias.view(-1, M, M) + return alibi_bias + + diff --git a/fairseq/examples/data2vec/scripts/convert_audioset_labels.py b/fairseq/examples/data2vec/scripts/convert_audioset_labels.py new file mode 100644 index 0000000..7d720e6 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/convert_audioset_labels.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os + + +def get_parser(): + parser = argparse.ArgumentParser(description="convert audioset labels") + # fmt: off + parser.add_argument('in_file', help='audioset csv file to convert') + parser.add_argument('--manifest', required=True, metavar='PATH', help='wav2vec-like manifest') + parser.add_argument('--descriptors', required=True, metavar='PATH', help='path to label descriptor file') + parser.add_argument('--output', required=True, metavar='PATH', help='where to output converted labels') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + label_descriptors = {} + with open(args.descriptors, "r") as ldf: + next(ldf) + for line in ldf: + if line.strip() == "": + continue + + items = line.split(",") + assert len(items) > 2, line + idx = items[0] + lbl = items[1] + assert lbl not in label_descriptors, lbl + label_descriptors[lbl] = idx + + labels = {} + with open(args.in_file, "r") as ifd: + for line in ifd: + if line.lstrip().startswith("#"): + continue + items = line.rstrip().split(",") + id = items[0].strip() + start = items[1].strip() + end = items[2].strip() + lbls = [label_descriptors[it.strip(' "')] for it in items[3:]] + labels[id] = [start, end, ",".join(lbls)] + + with open(args.manifest, "r") as mf, open(args.output, "w") as of: + next(mf) + for line in mf: + path, _ = line.split("\t") + id = os.path.splitext(os.path.basename(path))[0] + lbl = labels[id] + print("\t".join(lbl), file=of) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh new file mode 100644 index 0000000..41bcd31 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -eu + +job_id="$1" +task_id="$2" +dir="$3" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -d afterok:$job_id -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh $dir + diff --git a/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh new file mode 100644 index 0000000..fc85908 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_aws_local_lr_nodep.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eu + +dir="$1" + +echo "dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh $dir + diff --git a/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh new file mode 100644 index 0000000..1212269 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/multi/finetune_all_fair_local_lr.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" +tasks[mnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MNLI-bin" +tasks[qqp]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QQP-bin" +tasks[sts_b]="/fsx-wav2vec/abaevski/data/nlp/GLUE/STS-B-bin" + +lrs=(5e-6 8e-6 1e-5 2e-5) + +for task data_path in ${(kv)tasks}; do + for lr in $lrs; do + echo $lr $task + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" \ + python fairseq_cli/hydra_train.py -m --config-dir examples/data2vec/config/multi/text_finetuning \ + --config-name $task +run_config=local task.data="$data_path" common.log_interval=200 dataset.num_workers=1 \ + model.model_path="$cp" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" +model=text_wrap + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh new file mode 100644 index 0000000..18b862c --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_char_fair_aws_local_lr.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +job_id="$1" +task_id="$2" +dir="$3" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -d afterok:$job_id -o $dir/log/ft_%A.out" +sbatch_args="$sbatch_args -e $dir/log/ft_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/text/finetune_all_char_fair_local_lr.sh $dir diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair.sh new file mode 100644 index 0000000..34a2df3 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env zsh + +job_id=$1 +task_id=$2 +dir="$3" +cp="$dir/$task_id/checkpoints/checkpoint_last.pt" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +declare -A tasks +tasks[cola]="/private/home/jgu/data/GLUE/CoLA-bin" +tasks[qnli]="/private/home/jgu/data/GLUE/QNLI-bin" +tasks[mrpc]="/private/home/jgu/data/GLUE/MRPC-bin" +tasks[rte]="/private/home/jgu/data/GLUE/RTE-bin" +tasks[sst_2]="/private/home/jgu/data/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" +hydra.launcher.additional_parameters.dependency="afterok:$job_id" hydra.sweep.dir="$dir/finetune/$task" & +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws.sh new file mode 100644 index 0000000..b417c20 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env zsh + +job_id=$1 +task_id=$2 +dir="$3" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g_aws task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" +hydra.launcher.additional_parameters.dependency="afterok:$job_id" hydra.sweep.dir="$dir/finetune/$task" & +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh new file mode 100644 index 0000000..64dbcb1 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_local_lr.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +job_id="$1" +task_id="$2" +dir="$3" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -d afterok:$job_id -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh $dir diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh new file mode 100644 index 0000000..d75c549 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_aws_lr.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env zsh + +job_id=$1 +task_id=$2 +dir="$3" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + for lr in 5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g_aws task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" +hydra.launcher.additional_parameters.dependency="afterok:$job_id" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" & + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh new file mode 100644 index 0000000..8be98c0 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +lrs=(5e-6 8e-6 1e-5 2e-5) + +for task data_path in ${(kv)tasks}; do + for lr in $lrs; do + echo $lr $task + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" \ + python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task +run_config=local task.data="$data_path" common.log_interval=200 dataset.num_workers=1 \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep.sh new file mode 100644 index 0000000..d02bcc0 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/private/home/jgu/data/GLUE/CoLA-bin" +tasks[qnli]="/private/home/jgu/data/GLUE/QNLI-bin" +tasks[mrpc]="/private/home/jgu/data/GLUE/MRPC-bin" +tasks[rte]="/private/home/jgu/data/GLUE/RTE-bin" +tasks[sst_2]="/private/home/jgu/data/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune/$task" & +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh new file mode 100644 index 0000000..7553835 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g_aws task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune/$task" & +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh new file mode 100644 index 0000000..16c1358 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_local_lr.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -eu + +dir="$1" + +echo "dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/text/finetune_all_fair_local_lr.sh $dir diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh new file mode 100644 index 0000000..fb5ddbe --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + for lr in 5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g_aws task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" & + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh new file mode 100644 index 0000000..1ffab1c --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_fair_nodep_aws_lr_nopos.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +for task data_path in ${(kv)tasks}; do + for lr in 5e-6 8e-6 1e-5 2e-5 5e-5 8e-5 1e-4 2e-4; do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g_aws task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" +model.encoder_learned_pos=False & + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh new file mode 100644 index 0000000..c3c58ad --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_aws_local_lr.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +job_id="$1" +task_id="$2" +dir="$3" + +echo "job_id: $job_id, task_id: $task_id, dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -d afterok:$job_id -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh $dir diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh new file mode 100644 index 0000000..5efb00e --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[cola]="/fsx-wav2vec/abaevski/data/nlp/GLUE/CoLA-bin" +tasks[qnli]="/fsx-wav2vec/abaevski/data/nlp/GLUE/QNLI-bin" +tasks[mrpc]="/fsx-wav2vec/abaevski/data/nlp/GLUE/MRPC-bin" +tasks[rte]="/fsx-wav2vec/abaevski/data/nlp/GLUE/RTE-bin" +tasks[sst_2]="/fsx-wav2vec/abaevski/data/nlp/GLUE/SST-2-bin" + +lrs=(5e-6 8e-6 1e-5 2e-5) + +for task data_path in ${(kv)tasks}; do + for lr in $lrs; do + echo $lr $task + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" \ + python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task +run_config=local task.data="$data_path" common.log_interval=200 dataset.num_workers=1 \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune_lr/$task/$lr" "optimization.lr=[${lr}]" \ + model._name=roberta_large + done +done diff --git a/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh new file mode 100644 index 0000000..4fb21bc --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_all_large_fair_nodep_aws_local_lr.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -eu + +dir="$1" + +echo "dir: $dir" + +mkdir -p "$dir/log" +sbatch_args="-p wav2vec --nodes=1 --ntasks-per-node=1" +sbatch_args="$sbatch_args --gpus-per-node=1 --cpus-per-task=8 --mem=0 --time=24:00:00" +sbatch_args="$sbatch_args -o $dir/log/decode_sweep_%A.out" +sbatch_args="$sbatch_args -e $dir/log/decode_sweep_%A.err" + +sbatch $sbatch_args examples/data2vec/scripts/text/finetune_all_large_fair_local_lr.sh $dir diff --git a/fairseq/examples/data2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh b/fairseq/examples/data2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh new file mode 100644 index 0000000..d7b43be --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/finetune_sst2_qnli_sweep_fair_nodep.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env zsh + +dir="$1" +cp="$dir/checkpoints/checkpoint_last.pt" + +echo "dir: $dir" + +declare -A tasks +tasks[qnli]="/private/home/jgu/data/GLUE/QNLI-bin" +tasks[sst_2]="/private/home/jgu/data/GLUE/SST-2-bin" + +lrs="5e-6 1e-5 2e-5 5e-5 1e-4 2e-4 5e-4 1e-3" + +for task data_path in ${(kv)tasks}; do + for lr in $(echo "$lrs"); do + PYTHONPATH=. PREFIX="${PREFIX}" SUFFIX="" nohup python fairseq_cli/hydra_train.py -m --config-dir examples/roberta/config/finetuning \ + --config-name $task hydra/launcher=submitit_slurm +run_config=slurm_1g task.data="$data_path" hydra.launcher.name=finetune_${task}_${PREFIX} \ + checkpoint.restore_file="$cp" hydra.sweep.dir="$dir/finetune_sweep/$task/lr_$lr" "optimization.lr=[${lr}]" & + done +done diff --git a/fairseq/examples/data2vec/scripts/text/glue.py b/fairseq/examples/data2vec/scripts/text/glue.py new file mode 100644 index 0000000..5382d31 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/glue.py @@ -0,0 +1,34 @@ +from valids import parser, main as valids_main +import os.path as osp + + +args = parser.parse_args() +args.target = "valid_accuracy" +args.best_biggest = True +args.best = True +args.last = 0 +args.path_contains = None + +res = valids_main(args, print_output=False) + +grouped = {} +for k, v in res.items(): + k = osp.dirname(k) + run = osp.dirname(k) + task = osp.basename(k) + val = v["valid_accuracy"] + + if run not in grouped: + grouped[run] = {} + + grouped[run][task] = val + +for run, tasks in grouped.items(): + print(run) + avg = sum(float(v) for v in tasks.values()) / len(tasks) + avg_norte = sum(float(v) for k,v in tasks.items() if k != 'rte') / (len(tasks) -1) + try: + print(f"{tasks['cola']}\t{tasks['qnli']}\t{tasks['mrpc']}\t{tasks['rte']}\t{tasks['sst_2']}\t{avg:.2f}\t{avg_norte:.2f}") + except: + print(tasks) + print() diff --git a/fairseq/examples/data2vec/scripts/text/glue_lr.py b/fairseq/examples/data2vec/scripts/text/glue_lr.py new file mode 100644 index 0000000..75bdfe0 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/glue_lr.py @@ -0,0 +1,143 @@ +import os.path as osp +import re +from collections import defaultdict + +from valids import parser, main as valids_main + + +TASK_TO_METRIC = { + "cola": "mcc", + "qnli": "accuracy", + "mrpc": "acc_and_f1", + "rte": "accuracy", + "sst_2": "accuracy", + "mnli": "accuracy", + "qqp": "acc_and_f1", + "sts_b": "pearson_and_spearman", +} +TASKS = ["cola", "qnli", "mrpc", "rte", "sst_2", "mnli", "qqp", "sts_b"] + + +def get_best_stat_str(task_vals, show_subdir): + task_to_best_val = {} + task_to_best_dir = {} + for task, subdir_to_val in task_vals.items(): + task_to_best_val[task] = max(subdir_to_val.values()) + task_to_best_dir[task] = max(subdir_to_val.keys(), key=lambda x: subdir_to_val[x]) + + # import pdb; pdb.set_trace() + N1 = len(task_to_best_val) + N2 = len([k for k in task_to_best_val if k != "rte"]) + avg1 = sum(task_to_best_val.values()) / N1 + avg2 = sum(v for task, v in task_to_best_val.items() if task != "rte") / N2 + + try: + msg = "" + for task in TASKS: + dir = task_to_best_dir.get(task, 'null') + val = task_to_best_val.get(task, -100) + msg += f"({dir}, {val})\t" if show_subdir else f"{val}\t" + msg += f"{avg1:.2f}\t{avg2:.2f}" + except Exception as e: + msg = str(e) + msg += str(sorted(task_vals.items())) + return msg + +def get_all_stat_str(task_vals): + msg = "" + for task in [task for task in TASKS if task in task_vals]: + msg += f"=== {task}\n" + for subdir in sorted(task_vals[task].keys()): + msg += f"\t{subdir}\t{task_vals[task][subdir]}\n" + return msg + +def get_tabular_stat_str(task_vals): + """assume subdir is /run_*/0""" + msg = "" + for task in [task for task in TASKS if task in task_vals]: + msg += f"=== {task}\n" + param_to_runs = defaultdict(dict) + for subdir in task_vals[task]: + match = re.match("(.*)/(run_.*)/0", subdir) + assert match, "subdir" + param, run = match.groups() + param_to_runs[param][run] = task_vals[task][subdir] + params = sorted(param_to_runs, key=lambda x: float(x)) + runs = sorted(set(run for runs in param_to_runs.values() for run in runs)) + msg += ("runs:" + "\t".join(runs) + "\n") + msg += ("params:" + "\t".join(params) + "\n") + for param in params: + msg += "\t".join([str(param_to_runs[param].get(run, None)) for run in runs]) + msg += "\n" + # for subdir in sorted(task_vals[task].keys()): + # msg += f"\t{subdir}\t{task_vals[task][subdir]}\n" + return msg + + + +def main(): + parser.add_argument("--show_glue", action="store_true", help="show glue metric for each task instead of accuracy") + parser.add_argument("--print_mode", default="best", help="best|all|tabular") + parser.add_argument("--show_subdir", action="store_true", help="print the subdir that has the best results for each run") + parser.add_argument("--override_target", default="valid_accuracy", help="override target") + + args = parser.parse_args() + args.target = args.override_target + args.best_biggest = True + args.best = True + args.last = 0 + args.path_contains = None + + res = valids_main(args, print_output=False) + grouped_acc = {} + grouped_met = {} # use official metric for each task + for path, v in res.items(): + path = "/".join([args.base, path]) + path = re.sub("//*", "/", path) + match = re.match("(.*)finetune[^/]*/([^/]*)/(.*)", path) + if not match: + continue + run, task, subdir = match.groups() + + if run not in grouped_acc: + grouped_acc[run] = {} + grouped_met[run] = {} + if task not in grouped_acc[run]: + grouped_acc[run][task] = {} + grouped_met[run][task] = {} + + if v is not None: + grouped_acc[run][task][subdir] = float(v.get("valid_accuracy", -100)) + grouped_met[run][task][subdir] = float(v.get(f"valid_{TASK_TO_METRIC[task]}", -100)) + else: + print(f"{path} has None return") + + header = "\t".join(TASKS) + for run in sorted(grouped_acc): + print(run) + if args.print_mode == "all": + if args.show_glue: + print("===== GLUE =====") + print(get_all_stat_str(grouped_met[run])) + else: + print("===== ACC =====") + print(get_all_stat_str(grouped_acc[run])) + elif args.print_mode == "best": + print(f" {header}") + if args.show_glue: + print(f"GLEU: {get_best_stat_str(grouped_met[run], args.show_subdir)}") + else: + print(f"ACC: {get_best_stat_str(grouped_acc[run], args.show_subdir)}") + elif args.print_mode == "tabular": + if args.show_glue: + print("===== GLUE =====") + print(get_tabular_stat_str(grouped_met[run])) + else: + print("===== ACC =====") + print(get_tabular_stat_str(grouped_acc[run])) + else: + raise ValueError(args.print_mode) + print() + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/data2vec/scripts/text/unprocess_data.py b/fairseq/examples/data2vec/scripts/text/unprocess_data.py new file mode 100644 index 0000000..f1acb62 --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/unprocess_data.py @@ -0,0 +1,188 @@ +import json +import os +import tqdm +from fairseq.data import Dictionary, data_utils + + +def load_dictionary(dict_path): + return Dictionary.load(dict_path) + +def load_dataset(split_path, src_dict): + dataset = data_utils.load_indexed_dataset( + split_path, + src_dict, + combine=False, # set to true for loading `train*` + ) + if dataset is None: + raise FileNotFoundError(f"Dataset not found: {split_path}") + return dataset + +def load_bpe(enc_path): + with open(enc_path) as f: + bpe2idx = json.load(f) + idx2bpe = {v: k for k, v in bpe2idx.items()} + return bpe2idx, idx2bpe + +def detokenize(tokens, src_dict, idx2bpe): + raw_inds = map(int, src_dict.string(tokens).split()) + raw_chrs = "".join([idx2bpe[raw_ind] for raw_ind in raw_inds]) + raw_chrs = raw_chrs.replace("\u0120", " ") + return raw_chrs + +def _main(src_root, src_dict_path, src_bpe_path, src_splits, tgt_root, tgt_splits): + src_dict = load_dictionary(src_dict_path) + bpe2idx, idx2bpe = load_bpe(src_bpe_path) + + assert len(src_splits) == len(tgt_splits) + for src_split, tgt_split in zip(src_splits, tgt_splits): + src_dataset = load_dataset(f"{src_root}/{src_split}", src_dict) + tgt_path = f"{tgt_root}/{tgt_split}.txt" + print(f"processing {src_split} (dump to {tgt_path})...") + os.makedirs(os.path.dirname(tgt_path), exist_ok=True) + with open(tgt_path, "w") as f: + for tokens in tqdm.tqdm(src_dataset): + raw_str = detokenize(tokens, src_dict, idx2bpe) + f.write(raw_str + "\n") + +def main_pt(): + src_root = "/datasets01/bookwiki_CC-NEWS_openwebtext_stories-mmap2-bin/121219/bookwiki_CC-NEWS_openwebtext_stories-mmap2-bin" + src_dict_path = f"{src_root}/dict.txt" + src_bpe_path = f"{src_root}/encoder.json" + src_splits = [ + "bookwiki_aml-mmap2-bin/shard0/train", + "bookwiki_aml-mmap2-bin/shard1/train", + "bookwiki_aml-mmap2-bin/shard2/train", + "bookwiki_aml-mmap2-bin/shard3/train", + "bookwiki_aml-mmap2-bin/shard4/train", + "bookwiki_aml-mmap2-bin/valid/valid", + ] + + tgt_root = "/checkpoint/wnhsu/data/data2vec2/data/text/bookwiki_aml-full-mmap2-txt" + tgt_splits = [ + "train0", + "train1", + "train2", + "train3", + "train4", + "valid", + ] + _main(src_root, src_dict_path, src_bpe_path, src_splits, tgt_root, tgt_splits) + +def main_ft(): + src_root = "/fsx-wav2vec/wnhsu/data/data2vec2/data/text/GLUE" + src_dict_path = f"{src_root}/dict.txt" + src_bpe_path = f"{src_root}/encoder.json" + src_splits = [ + "CoLA-bin/input0/train", + "CoLA-bin/input0/valid", + "CoLA-bin/input0/test", + + "MNLI-bin/input0/train", + "MNLI-bin/input0/valid", + "MNLI-bin/input0/test", + "MNLI-bin/input0/test1", + "MNLI-bin/input1/train", + "MNLI-bin/input1/valid", + "MNLI-bin/input1/test", + "MNLI-bin/input1/test1", + + "MRPC-bin/input0/train", + "MRPC-bin/input0/valid", + "MRPC-bin/input0/test", + "MRPC-bin/input1/train", + "MRPC-bin/input1/valid", + "MRPC-bin/input1/test", + + "QNLI-bin/input0/train", + "QNLI-bin/input0/valid", + "QNLI-bin/input0/test", + "QNLI-bin/input1/train", + "QNLI-bin/input1/valid", + "QNLI-bin/input1/test", + + "QQP-bin/input0/train", + "QQP-bin/input0/valid", + "QQP-bin/input0/test", + "QQP-bin/input1/train", + "QQP-bin/input1/valid", + "QQP-bin/input1/test", + + "RTE-bin/input0/train", + "RTE-bin/input0/valid", + "RTE-bin/input0/test", + "RTE-bin/input1/train", + "RTE-bin/input1/valid", + "RTE-bin/input1/test", + + "SST-2-bin/input0/train", + "SST-2-bin/input0/valid", + "SST-2-bin/input0/test", + + "STS-B-bin/input0/train", + "STS-B-bin/input0/valid", + "STS-B-bin/input0/test", + "STS-B-bin/input1/train", + "STS-B-bin/input1/valid", + "STS-B-bin/input1/test", + ] + + tgt_root = "/fsx-wav2vec/wnhsu/data/data2vec2/data/text/GLUE_chr" + tgt_splits = [ + "CoLA-bin/input0/train", + "CoLA-bin/input0/valid", + "CoLA-bin/input0/test", + + "MNLI-bin/input0/train", + "MNLI-bin/input0/valid", + "MNLI-bin/input0/test", + "MNLI-bin/input0/test1", + "MNLI-bin/input1/train", + "MNLI-bin/input1/valid", + "MNLI-bin/input1/test", + "MNLI-bin/input1/test1", + + "MRPC-bin/input0/train", + "MRPC-bin/input0/valid", + "MRPC-bin/input0/test", + "MRPC-bin/input1/train", + "MRPC-bin/input1/valid", + "MRPC-bin/input1/test", + + "QNLI-bin/input0/train", + "QNLI-bin/input0/valid", + "QNLI-bin/input0/test", + "QNLI-bin/input1/train", + "QNLI-bin/input1/valid", + "QNLI-bin/input1/test", + + "QQP-bin/input0/train", + "QQP-bin/input0/valid", + "QQP-bin/input0/test", + "QQP-bin/input1/train", + "QQP-bin/input1/valid", + "QQP-bin/input1/test", + + "RTE-bin/input0/train", + "RTE-bin/input0/valid", + "RTE-bin/input0/test", + "RTE-bin/input1/train", + "RTE-bin/input1/valid", + "RTE-bin/input1/test", + + "SST-2-bin/input0/train", + "SST-2-bin/input0/valid", + "SST-2-bin/input0/test", + + "STS-B-bin/input0/train", + "STS-B-bin/input0/valid", + "STS-B-bin/input0/test", + "STS-B-bin/input1/train", + "STS-B-bin/input1/valid", + "STS-B-bin/input1/test", + ] + _main(src_root, src_dict_path, src_bpe_path, src_splits, tgt_root, tgt_splits) + + +if __name__ == "__main__": + main_pt() + main_ft() diff --git a/fairseq/examples/data2vec/scripts/text/valids.py b/fairseq/examples/data2vec/scripts/text/valids.py new file mode 100644 index 0000000..b2e5cfb --- /dev/null +++ b/fairseq/examples/data2vec/scripts/text/valids.py @@ -0,0 +1,301 @@ +import os, argparse, re, json, copy, math +from collections import OrderedDict +import numpy as np + +parser = argparse.ArgumentParser(description='Process some integers.') +parser.add_argument('base', help='base log path') +parser.add_argument('--file_name', default='train.log', help='the log file name') +parser.add_argument('--target', default='valid_loss', help='target metric') +parser.add_argument('--last', type=int, default=999999999, help='print last n matches') +parser.add_argument('--last_files', type=int, default=None, help='print last x files') +parser.add_argument('--everything', action='store_true', help='print everything instead of only last match') +parser.add_argument('--path_contains', help='only consider matching file pattern') +parser.add_argument('--group_on', help='if set, groups by this metric and shows table of differences') +parser.add_argument('--epoch', help='epoch for comparison', type=int) +parser.add_argument('--skip_empty', action='store_true', help='skip empty results') +parser.add_argument('--skip_containing', help='skips entries containing this attribute') +parser.add_argument('--unique_epochs', action='store_true', help='only consider the last line fore each epoch') +parser.add_argument('--best', action='store_true', help='print the last best result') +parser.add_argument('--avg_params', help='average these params through entire log') +parser.add_argument('--extract_prev', help='extracts this metric from previous line') + +parser.add_argument('--remove_metric', help='extracts this metric from previous line') + +parser.add_argument('--compact', action='store_true', help='if true, just prints checkpoint best val') +parser.add_argument('--hydra', action='store_true', help='if true, uses hydra param conventions') + +parser.add_argument('--best_biggest', action='store_true', help='if true, best is the biggest number, not smallest') +parser.add_argument('--key_len', type=int, default=10, help='max length of key') + +parser.add_argument('--best_only', action='store_true', help='if set, only prints the best value') +parser.add_argument('--flat', action='store_true', help='just print the best results') + + +def main(args, print_output): + ret = {} + + entries = [] + + def extract_metric(s, metric): + try: + j = json.loads(s) + except: + return None + if args.epoch is not None and ('epoch' not in j or j['epoch'] != args.epoch): + return None + return j[metric] if metric in j else None + + + def extract_params(s): + s = s.replace(args.base, '', 1) + if args.path_contains is not None: + s = s.replace(args.path_contains, '', 1) + + if args.hydra: + num_matches = re.findall(r'(?:/|__)([^/:]+):(\d+\.?\d*)', s) + # str_matches = re.findall(r'(?:/|__)([^/:]+):([^\.]*[^\d\.]+)(?:/|__)', s) + str_matches = re.findall(r'(?:/|__)?((?:(?!(?:\:|__)).)+):([^\.]*[^\d\.]+\d*)(?:/|__)', s) + lr_matches = re.findall(r'optimization.(lr):\[([\d\.,]+)\]', s) + task_matches = re.findall(r'.*/(\d+)$', s) + else: + num_matches = re.findall(r'\.?([^\.]+?)(\d+(e\-\d+)?(?:\.\d+)?)(\.|$)', s) + str_matches = re.findall(r'[/\.]([^\.]*[^\d\.]+\d*)(?=\.)', s) + lr_matches = [] + task_matches = [] + + cp_matches = re.findall(r'checkpoint(?:_\d+)?_(\d+).pt', s) + + items = OrderedDict() + for m in str_matches: + if isinstance(m, tuple): + if 'checkpoint' not in m[0]: + items[m[0]] = m[1] + else: + items[m] = '' + + for m in num_matches: + items[m[0]] = m[1] + + for m in lr_matches: + items[m[0]] = m[1] + + for m in task_matches: + items["hydra_task"] = m + + for m in cp_matches: + items['checkpoint'] = m + + return items + + abs_best = None + + sources = [] + for root, _, files in os.walk(args.base): + if args.path_contains is not None and not args.path_contains in root: + continue + for f in files: + if f.endswith(args.file_name): + sources.append((root, f)) + + if args.last_files is not None: + sources = sources[-args.last_files:] + + for root, file in sources: + with open(os.path.join(root, file), 'r') as fin: + found = [] + avg = {} + prev = None + for line in fin: + line = line.rstrip() + if line.find(args.target) != -1 and ( + args.skip_containing is None or line.find(args.skip_containing) == -1): + try: + idx = line.index("{") + line = line[idx:] + line_json = json.loads(line) + except: + continue + if prev is not None: + try: + prev.update(line_json) + line_json = prev + except: + pass + if args.target in line_json: + found.append(line_json) + if args.avg_params: + avg_params = args.avg_params.split(',') + for p in avg_params: + m = extract_metric(line, p) + if m is not None: + prev_v, prev_c = avg.get(p, (0, 0)) + avg[p] = prev_v + float(m), prev_c + 1 + if args.extract_prev: + try: + prev = json.loads(line) + except: + pass + best = None + if args.best: + curr_best = None + for i in range(len(found)): + cand_best = found[i][args.target] if args.target in found[i] else None + + def cmp(a, b): + a = float(a) + b = float(b) + if args.best_biggest: + return a > b + return a < b + + if cand_best is not None and not math.isnan(float(cand_best)) and ( + curr_best is None or cmp(cand_best, curr_best)): + curr_best = cand_best + if abs_best is None or cmp(curr_best, abs_best): + abs_best = curr_best + best = found[i] + if args.unique_epochs or args.epoch: + last_found = [] + last_epoch = None + for i in reversed(range(len(found))): + epoch = found[i]['epoch'] + if args.epoch and args.epoch != epoch: + continue + if epoch != last_epoch: + last_epoch = epoch + last_found.append(found[i]) + found = list(reversed(last_found)) + + if len(found) == 0: + if print_output and (args.last_files is not None or not args.skip_empty): + # print(root.split('/')[-1]) + print(root[len(args.base):]) + print('Nothing') + else: + if not print_output: + ret[root[len(args.base):]] = best + continue + + if args.compact: + # print('{}\t{}'.format(root.split('/')[-1], curr_best)) + print('{}\t{}'.format(root[len(args.base)+1:], curr_best)) + continue + + if args.group_on is None and not args.best_only: + # print(root.split('/')[-1]) + print(root[len(args.base):]) + if not args.everything: + if best is not None and args.group_on is None and not args.best_only and not args.flat: + print(best, '(best)') + if args.group_on is None and args.last and not args.best_only and not args.flat: + for f in found[-args.last:]: + if args.extract_prev is not None: + try: + print('{}\t{}'.format(f[args.extract_prev], f[args.target])) + except Exception as e: + print('Exception!', e) + else: + print(f) + try: + metric = found[-1][args.target] if not args.best or best is None else best[args.target] + except: + print(found[-1]) + raise + if metric is not None: + entries.append((extract_params(root), metric)) + else: + for f in found: + print(f) + if not args.group_on and print_output: + print() + + if len(avg) > 0: + for k, (v, c) in avg.items(): + print(f'{k}: {v/c}') + + if args.best_only: + print(abs_best) + + if args.flat: + print("\t".join(m for _, m in entries)) + + if args.group_on is not None: + by_val = OrderedDict() + for e, m in entries: + k = args.group_on + if k not in e: + m_keys = [x for x in e.keys() if x.startswith(k)] + if len(m_keys) == 0: + val = "False" + else: + assert len(m_keys) == 1 + k = m_keys[0] + val = m_keys[0] + else: + val = e[args.group_on] + if val == "": + val = "True" + scrubbed_entry = copy.deepcopy(e) + if k in scrubbed_entry: + del scrubbed_entry[k] + if args.remove_metric and args.remove_metric in scrubbed_entry: + val += '_' + scrubbed_entry[args.remove_metric] + del scrubbed_entry[args.remove_metric] + by_val.setdefault(tuple(scrubbed_entry.items()), dict())[val] = m + distinct_vals = set() + for v in by_val.values(): + distinct_vals.update(v.keys()) + try: + distinct_vals = {int(d) for d in distinct_vals} + except: + print(distinct_vals) + print() + print("by_val", len(by_val)) + for k,v in by_val.items(): + print(k, '=>', v) + print() + + # , by_val, entries) + raise + from natsort import natsorted + svals = list(map(str, natsorted(distinct_vals))) + print('{}\t{}'.format(args.group_on, '\t'.join(svals))) + sums = OrderedDict({n:[] for n in svals}) + for k, v in by_val.items(): + kstr = '.'.join(':'.join(x) for x in k) + vstr = '' + for mv in svals: + x = v[mv] if mv in v else '' + vstr += '\t{}'.format(round(x, 5) if isinstance(x, float) else x) + try: + sums[mv].append(float(x)) + except: + pass + print('{}{}'.format(kstr[:args.key_len], vstr)) + if any(len(x) > 0 for x in sums.values()): + print('min:', end='') + for v in sums.values(): + min = np.min(v) + print(f'\t{round(min, 5)}', end='') + print() + print('max:', end='') + for v in sums.values(): + max = np.max(v) + print(f'\t{round(max, 5)}', end='') + print() + print('avg:', end='') + for v in sums.values(): + mean = np.mean(v) + print(f'\t{round(mean, 5)}', end='') + print() + print('median:', end='') + for v in sums.values(): + median = np.median(v) + print(f'\t{round(median, 5)}', end='') + print() + + return ret + +if __name__ == "__main__": + args = parser.parse_args() + main(args, print_output=True) \ No newline at end of file diff --git a/fairseq/examples/data2vec/tasks/__init__.py b/fairseq/examples/data2vec/tasks/__init__.py new file mode 100644 index 0000000..a7422e4 --- /dev/null +++ b/fairseq/examples/data2vec/tasks/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .image_pretraining import ImagePretrainingTask, ImagePretrainingConfig +from .image_classification import ImageClassificationTask, ImageClassificationConfig +from .mae_image_pretraining import MaeImagePretrainingTask, MaeImagePretrainingConfig + + +__all__ = [ + "ImageClassificationTask", + "ImageClassificationConfig", + "ImagePretrainingTask", + "ImagePretrainingConfig", + "MaeImagePretrainingTask", + "MaeImagePretrainingConfig", +] \ No newline at end of file diff --git a/fairseq/examples/data2vec/tasks/audio_classification.py b/fairseq/examples/data2vec/tasks/audio_classification.py new file mode 100644 index 0000000..2925a04 --- /dev/null +++ b/fairseq/examples/data2vec/tasks/audio_classification.py @@ -0,0 +1,167 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +import numpy as np +import math +import torch + +from sklearn import metrics as sklearn_metrics +from dataclasses import dataclass + +from fairseq.tasks.audio_pretraining import AudioPretrainingTask, AudioPretrainingConfig +from fairseq.tasks import register_task +from fairseq.logging import metrics + +from ..data.add_class_target_dataset import AddClassTargetDataset + + +logger = logging.getLogger(__name__) + + +@dataclass +class AudioClassificationConfig(AudioPretrainingConfig): + label_descriptors: str = "label_descriptors.csv" + labels: str = "lbl" + + +@register_task("audio_classification", dataclass=AudioClassificationConfig) +class AudioClassificationTask(AudioPretrainingTask): + """ """ + + cfg: AudioClassificationConfig + + def __init__( + self, + cfg: AudioClassificationConfig, + ): + super().__init__(cfg) + + self.state.add_factory("labels", self.load_labels) + + def load_labels(self): + labels = {} + path = os.path.join(self.cfg.data, self.cfg.label_descriptors) + with open(path, "r") as ldf: + for line in ldf: + if line.strip() == "": + continue + items = line.split(",") + idx = items[0] + lbl = items[1] + assert lbl not in labels, lbl + labels[lbl] = idx + return labels + + @property + def labels(self): + return self.state.labels + + def load_dataset( + self, split: str, task_cfg: AudioClassificationConfig = None, **kwargs + ): + super().load_dataset(split, task_cfg, **kwargs) + + task_cfg = task_cfg or self.cfg + + data_path = self.cfg.data + label_path = os.path.join(data_path, f"{split}.{task_cfg.labels}") + skipped_indices = getattr(self.datasets[split], "skipped_indices", set()) + labels = [] + with open(label_path, "r") as f: + for i, line in enumerate(f): + if i not in skipped_indices: + lbl_items = line.rstrip().split("\t") + labels.append([int(x) for x in lbl_items[2].split(",")]) + + assert len(labels) == len(self.datasets[split]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.datasets[split])}) do not match" + ) + + self.datasets[split] = AddClassTargetDataset( + self.datasets[split], + labels, + multi_class=True, + add_to_input=True, + num_classes=len(self.labels), + ) + + def calculate_stats(self, output, target): + + classes_num = target.shape[-1] + stats = [] + + # Accuracy, only used for single-label classification such as esc-50, not for multiple label one such as AudioSet + # acc = sklearn_metrics.accuracy_score(np.argmax(target, 1), np.argmax(output, 1)) + + # Class-wise statistics + for k in range(classes_num): + # Average precision + avg_precision = sklearn_metrics.average_precision_score( + target[:, k], output[:, k], average=None + ) + + dict = { + "AP": avg_precision, + } + + # # AUC + # try: + # auc = sklearn_metrics.roc_auc_score(target[:, k], output[:, k], average=None) + # except: + # auc = 0 + # + # # Precisions, recalls + # (precisions, recalls, thresholds) = sklearn_metrics.precision_recall_curve( + # target[:, k], output[:, k] + # ) + # + # # FPR, TPR + # (fpr, tpr, thresholds) = sklearn_metrics.roc_curve(target[:, k], output[:, k]) + # + # save_every_steps = 1000 # Sample statistics to reduce size + # dict = { + # "precisions": precisions[0::save_every_steps], + # "recalls": recalls[0::save_every_steps], + # "AP": avg_precision, + # "fpr": fpr[0::save_every_steps], + # "fnr": 1.0 - tpr[0::save_every_steps], + # "auc": auc, + # # note acc is not class-wise, this is just to keep consistent with other metrics + # "acc": acc, + # } + stats.append(dict) + + return stats + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + return loss, sample_size, logging_output + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + if "_predictions" in logging_outputs[0]: + metrics.log_concat_tensor( + "_predictions", + torch.cat([l["_predictions"].cpu() for l in logging_outputs], dim=0), + ) + metrics.log_concat_tensor( + "_targets", + torch.cat([l["_targets"].cpu() for l in logging_outputs], dim=0), + ) + + def compute_stats(meters): + if meters["_predictions"].tensor.shape[0] < 100: + return 0 + stats = self.calculate_stats( + meters["_predictions"].tensor, meters["_targets"].tensor + ) + return np.nanmean([stat["AP"] for stat in stats]) + + metrics.log_derived("mAP", compute_stats) diff --git a/fairseq/examples/data2vec/tasks/image_classification.py b/fairseq/examples/data2vec/tasks/image_classification.py new file mode 100644 index 0000000..1ea4c2a --- /dev/null +++ b/fairseq/examples/data2vec/tasks/image_classification.py @@ -0,0 +1,129 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import os.path as osp +import logging + +from dataclasses import dataclass +import torch +from torchvision import transforms + +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.logging import metrics + +try: + from ..data import ImageDataset +except: + import sys + + sys.path.append("..") + from data import ImageDataset + +from .image_pretraining import ( + ImagePretrainingConfig, + ImagePretrainingTask, + IMG_EXTENSIONS, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class ImageClassificationConfig(ImagePretrainingConfig): + pass + + +@register_task("image_classification", dataclass=ImageClassificationConfig) +class ImageClassificationTask(ImagePretrainingTask): + + cfg: ImageClassificationConfig + + @classmethod + def setup_task(cls, cfg: ImageClassificationConfig, **kwargs): + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + cfg = task_cfg or self.cfg + + path_with_split = osp.join(data_path, split) + if osp.exists(path_with_split): + data_path = path_with_split + + from timm.data import create_transform + + if split == "train": + # this should always dispatch to transforms_imagenet_train + transform = create_transform( + input_size=cfg.input_size, + is_training=True, + auto_augment="rand-m9-mstd0.5-inc1", + interpolation="bicubic", + re_prob=0.25, + re_mode="pixel", + re_count=1, + mean=cfg.normalization_mean, + std=cfg.normalization_std, + ) + if not cfg.input_size > 32: + transform.transforms[0] = transforms.RandomCrop( + cfg.input_size, padding=4 + ) + else: + t = [] + if cfg.input_size > 32: + crop_pct = 1 + if cfg.input_size < 384: + crop_pct = 224 / 256 + size = int(cfg.input_size / crop_pct) + t.append( + transforms.Resize( + size, interpolation=3 + ), # to maintain same ratio w.r.t. 224 images + ) + t.append(transforms.CenterCrop(cfg.input_size)) + + t.append(transforms.ToTensor()) + t.append( + transforms.Normalize(cfg.normalization_mean, cfg.normalization_std) + ) + transform = transforms.Compose(t) + logger.info(transform) + + self.datasets[split] = ImageDataset( + root=data_path, + extensions=IMG_EXTENSIONS, + load_classes=True, + transform=transform, + ) + for k in self.datasets.keys(): + if k != split: + assert self.datasets[k].classes == self.datasets[split].classes + + def build_model(self, model_cfg: FairseqDataclass, from_checkpoint=False): + model = super().build_model(model_cfg, from_checkpoint) + + actualized_cfg = getattr(model, "cfg", None) + if actualized_cfg is not None: + if hasattr(actualized_cfg, "pretrained_model_args"): + model_cfg.pretrained_model_args = actualized_cfg.pretrained_model_args + + return model + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + if "correct" in logging_outputs[0]: + zero = torch.scalar_tensor(0.0) + correct = sum(log.get("correct", zero) for log in logging_outputs) + metrics.log_scalar_sum("_correct", correct) + + metrics.log_derived( + "accuracy", + lambda meters: 100 * meters["_correct"].sum / meters["sample_size"].sum, + ) diff --git a/fairseq/examples/data2vec/tasks/image_pretraining.py b/fairseq/examples/data2vec/tasks/image_pretraining.py new file mode 100644 index 0000000..cd688fd --- /dev/null +++ b/fairseq/examples/data2vec/tasks/image_pretraining.py @@ -0,0 +1,110 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import sys +import os.path as osp + +from dataclasses import dataclass, field +from typing import List +from omegaconf import MISSING + +import torch +from torchvision import transforms + +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +try: + from ..data import ImageDataset +except: + sys.path.append("..") + from data import ImageDataset + +logger = logging.getLogger(__name__) + +IMG_EXTENSIONS = { + ".jpg", + ".jpeg", + ".png", + ".ppm", + ".bmp", + ".pgm", + ".tif", + ".tiff", + ".webp", +} + + +@dataclass +class ImagePretrainingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + input_size: int = 224 + normalization_mean: List[float] = (0.485, 0.456, 0.406) + normalization_std: List[float] = (0.229, 0.224, 0.225) + + +@register_task("image_pretraining", dataclass=ImagePretrainingConfig) +class ImagePretrainingTask(FairseqTask): + """ """ + + cfg: ImagePretrainingConfig + + @classmethod + def setup_task(cls, cfg: ImagePretrainingConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + cfg = task_cfg or self.cfg + + path_with_split = osp.join(data_path, split) + if osp.exists(path_with_split): + data_path = path_with_split + + transform = transforms.Compose( + [ + transforms.ColorJitter(0.4, 0.4, 0.4), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomResizedCrop( + size=cfg.input_size, + interpolation=transforms.InterpolationMode.BICUBIC, + ), + transforms.ToTensor(), + transforms.Normalize( + mean=torch.tensor(cfg.normalization_mean), + std=torch.tensor(cfg.normalization_std), + ), + ] + ) + + logger.info(transform) + + self.datasets[split] = ImageDataset( + root=data_path, + extensions=IMG_EXTENSIONS, + load_classes=False, + transform=transform, + ) + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + return None + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return sys.maxsize, sys.maxsize diff --git a/fairseq/examples/data2vec/tasks/mae_image_classification.py b/fairseq/examples/data2vec/tasks/mae_image_classification.py new file mode 100644 index 0000000..1bf9358 --- /dev/null +++ b/fairseq/examples/data2vec/tasks/mae_image_classification.py @@ -0,0 +1,100 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import sys +import torch + +from typing import Optional +from dataclasses import dataclass, field +from omegaconf import MISSING + +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task +from fairseq.logging import metrics + +try: + from ..data import MaeFinetuningImageDataset +except: + sys.path.append("..") + from data import MaeFinetuningImageDataset + +logger = logging.getLogger(__name__) + + +@dataclass +class MaeImageClassificationConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + input_size: int = 224 + local_cache_path: Optional[str] = None + + rebuild_batches: bool = True + + +@register_task("mae_image_classification", dataclass=MaeImageClassificationConfig) +class MaeImageClassificationTask(FairseqTask): + """ """ + + cfg: MaeImageClassificationConfig + + @classmethod + def setup_task(cls, cfg: MaeImageClassificationConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + cfg = task_cfg or self.cfg + + self.datasets[split] = MaeFinetuningImageDataset( + root=data_path, + split=split, + is_train=split == "train", + input_size=cfg.input_size, + local_cache_path=cfg.local_cache_path, + shuffle=split == "train", + ) + + def build_model(self, model_cfg: FairseqDataclass, from_checkpoint=False): + model = super().build_model(model_cfg, from_checkpoint) + + actualized_cfg = getattr(model, "cfg", None) + if actualized_cfg is not None: + if hasattr(actualized_cfg, "pretrained_model_args"): + model_cfg.pretrained_model_args = actualized_cfg.pretrained_model_args + + return model + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + if "correct" in logging_outputs[0]: + zero = torch.scalar_tensor(0.0) + correct = sum(log.get("correct", zero) for log in logging_outputs) + metrics.log_scalar_sum("_correct", correct) + + metrics.log_derived( + "accuracy", + lambda meters: 100 * meters["_correct"].sum / meters["sample_size"].sum, + ) + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + return None + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return sys.maxsize, sys.maxsize diff --git a/fairseq/examples/data2vec/tasks/mae_image_pretraining.py b/fairseq/examples/data2vec/tasks/mae_image_pretraining.py new file mode 100644 index 0000000..35a1489 --- /dev/null +++ b/fairseq/examples/data2vec/tasks/mae_image_pretraining.py @@ -0,0 +1,119 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import sys + +from typing import Optional, List +from dataclasses import dataclass, field +from omegaconf import MISSING, II + +from fairseq.data import SubsampleDataset +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +try: + from ..data import MaeImageDataset +except: + sys.path.append("..") + from data import MaeImageDataset + +logger = logging.getLogger(__name__) + + +@dataclass +class ImageMaskingConfig: + patch_size: int = II("model.modalities.image.patch_size") + mask_prob: float = II("model.modalities.image.mask_prob") + mask_prob_adjust: float = II("model.modalities.image.mask_prob_adjust") + mask_length: int = II("model.modalities.image.mask_length") + inverse_mask: bool = II("model.modalities.image.inverse_mask") + mask_dropout: float = II("model.modalities.image.mask_dropout") + clone_batch: int = II("model.clone_batch") + expand_adjacent: bool = False + non_overlapping: bool = False + + +@dataclass +class MaeImagePretrainingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + multi_data: Optional[List[str]] = None + input_size: int = 224 + local_cache_path: Optional[str] = None + key: str = "imgs" + + beit_transforms: bool = False + target_transform: bool = False + no_transform: bool = False + + rebuild_batches: bool = True + + precompute_mask_config: Optional[ImageMaskingConfig] = None + + subsample: float = 1 + seed: int = II("common.seed") + dataset_type: str = "imagefolder" + + +@register_task("mae_image_pretraining", dataclass=MaeImagePretrainingConfig) +class MaeImagePretrainingTask(FairseqTask): + """ """ + + cfg: MaeImagePretrainingConfig + + @classmethod + def setup_task(cls, cfg: MaeImagePretrainingConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + cfg = task_cfg or self.cfg + + compute_mask = cfg.precompute_mask_config is not None + mask_args = {} + if compute_mask: + mask_args = cfg.precompute_mask_config + + self.datasets[split] = MaeImageDataset( + root=data_path if cfg.multi_data is None else cfg.multi_data, + split=split, + input_size=cfg.input_size, + local_cache_path=cfg.local_cache_path, + key=cfg.key, + beit_transforms=cfg.beit_transforms, + target_transform=cfg.target_transform, + no_transform=cfg.no_transform, + compute_mask=compute_mask, + dataset_type=cfg.dataset_type, + **mask_args, + ) + + if cfg.subsample < 1: + self.datasets[split] = SubsampleDataset( + self.datasets[split], + cfg.subsample, + shuffle=True, + seed=cfg.seed, + ) + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + return None + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return sys.maxsize, sys.maxsize diff --git a/fairseq/examples/data2vec/tasks/multimodal.py b/fairseq/examples/data2vec/tasks/multimodal.py new file mode 100644 index 0000000..74648e9 --- /dev/null +++ b/fairseq/examples/data2vec/tasks/multimodal.py @@ -0,0 +1,165 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import sys + +from dataclasses import dataclass +from typing import Optional, List +from omegaconf import II + +from fairseq.data.iterators import GroupedEpochBatchIterator + +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task +from fairseq.tasks.audio_pretraining import AudioPretrainingConfig, AudioPretrainingTask +from fairseq.tasks.masked_lm import MaskedLMConfig, MaskedLMTask +from .mae_image_pretraining import MaeImagePretrainingConfig, MaeImagePretrainingTask +from examples.data2vec.data.modality import Modality + +from fairseq.data.audio.multi_modality_dataset import ( + MultiModalityDataset, + ModalityDatasetItem, +) + + +@dataclass +class MultimodalPretrainingConfig(FairseqDataclass): + audio: Optional[AudioPretrainingConfig] = None + image: Optional[MaeImagePretrainingConfig] = None + text: Optional[MaskedLMConfig] = None + + audio_ratio: float = 1 + image_ratio: float = 1 + text_ratio: float = 1 + + max_tokens: Optional[int] = II("dataset.max_tokens") + batch_size: Optional[int] = II("dataset.batch_size") + update_freq: List[int] = II("optimization.update_freq") + + rebuild_batches: bool = True + + +@register_task("multimodal_pretraining", dataclass=MultimodalPretrainingConfig) +class MultimodalPretrainingTask(FairseqTask): + """ """ + + cfg: MultimodalPretrainingConfig + + def __init__(self, cfg: MultimodalPretrainingConfig): + super().__init__(cfg) + self.audio_task = ( + AudioPretrainingTask(cfg.audio) if cfg.audio is not None else None + ) + self.image_task = ( + MaeImagePretrainingTask(cfg.image) if cfg.image is not None else None + ) + self.text_task = MaskedLMTask(cfg.text) if cfg.text is not None else None + + self.mult_ratios = [] + + @classmethod + def setup_task(cls, cfg: MultimodalPretrainingConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + datasets = [] + self.mult_ratios = [] + + def load_ds(task, name, ratio): + if task is not None: + task.load_dataset(split) + ds = ModalityDatasetItem( + datasetname=name, + dataset=task.dataset(split), + max_positions=task.max_positions(), + max_tokens=self.cfg.max_tokens, + max_sentences=self.cfg.batch_size, + ) + datasets.append(ds) + self.mult_ratios.append(ratio) + + load_ds(self.audio_task, Modality.AUDIO, self.cfg.audio_ratio) + load_ds(self.image_task, Modality.IMAGE, self.cfg.image_ratio) + load_ds(self.text_task, Modality.TEXT, self.cfg.text_ratio) + + assert len(datasets) > 0 + + self.datasets[split] = MultiModalityDataset(datasets) + + @property + def supported_modalities(self): + modalities = [] + if self.cfg.text is not None: + modalities.append(Modality.TEXT) + if self.cfg.audio is not None: + modalities.append(Modality.AUDIO) + if self.cfg.image is not None: + modalities.append(Modality.IMAGE) + + return modalities + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=0, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + batch_samplers = dataset.get_batch_samplers( + self.mult_ratios, required_batch_size_multiple, seed + ) + + # return a reusable, sharded iterator + epoch_iter = GroupedEpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_samplers=batch_samplers, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + mult_rate=max(self.cfg.update_freq), + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + ) + self.dataset_to_epoch_iter[dataset] = {} # refresh it every epoch + return epoch_iter + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + return None + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return sys.maxsize, sys.maxsize diff --git a/fairseq/examples/discriminative_reranking_nmt/README.md b/fairseq/examples/discriminative_reranking_nmt/README.md new file mode 100644 index 0000000..b155e85 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/README.md @@ -0,0 +1,202 @@ +# Discriminative Reranking for Neural Machine Translation +https://aclanthology.org/2021.acl-long.563/ + +This folder contains source code for training DrNMT, a discriminatively trained reranker for neural machine translation. + +## Data preparation +1. Follow the instructions under `examples/translation` to build a base MT model. Prepare three files, one with source sentences, one with ground truth target sentences, and one with hypotheses generated from the base MT model. Each line in the file contains one sentence in raw text (i.e. no sentencepiece, etc.). Below is an example of the files with _N_ hypotheses for each source sentence. + +``` +# Example of the source sentence file: (The file should contain L lines.) + +source_sentence_1 +source_sentence_2 +source_sentence_3 +... +source_sentence_L + +# Example of the target sentence file: (The file should contain L lines.) + +target_sentence_1 +target_sentence_2 +target_sentence_3 +... +target_sentence_L + +# Example of the hypotheses file: (The file should contain L*N lines.) + +source_sentence_1_hypo_1 +source_sentence_1_hypo_2 +... +source_sentence_1_hypo_N +source_sentence_2_hypo_1 +... +source_sentence_2_hypo_N +... +source_sentence_L_hypo_1 +... +source_sentence_L_hypo_N +``` + +2. Download the [XLMR model](https://github.com/fairinternal/fairseq-py/tree/main/examples/xlmr#pre-trained-models). +``` +wget https://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz +tar zxvf xlmr.base.tar.gz + +# The folder should contain dict.txt, model.pt and sentencepiece.bpe.model. +``` + +3. Prepare scores and BPE data. +* `N`: Number of hypotheses per each source sentence. We use 50 in the paper. +* `SPLIT`: Name of the data split, i.e. train, valid, test. Use split_name, split_name1, split_name2, ..., if there are multiple datasets for a split, e.g. train, train1, valid, valid1. +* `NUM_SHARDS`: Number of shards. Set this to 1 for non-train splits. +* `METRIC`: The metric for DrNMT to optimize for. We support either `bleu` or `ter`. +``` +# For each data split, e.g. train, valid, test, etc., run the following: + +SOURCE_FILE=/path/to/source_sentence_file +TARGET_FILE=/path/to/target_sentence_file +HYPO_FILE=/path/to/hypo_file +XLMR_DIR=/path/to/xlmr +OUTPUT_DIR=/path/to/output + +python scripts/prep_data.py \ + --input-source ${SOURCE_FILE} \ + --input-target ${TARGET_FILE} \ + --input-hypo ${HYPO_FILE} \ + --output-dir ${OUTPUT_DIR} \ + --split $SPLIT + --beam $N \ + --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \ + --metric $METRIC \ + --num-shards ${NUM_SHARDS} + +# The script will create ${OUTPUT_DIR}/$METRIC with ${NUM_SHARDS} splits. +# Under split*/input_src, split*/input_tgt and split*/$METRIC, there will be $SPLIT.bpe and $SPLIT.$METRIC files, respectively. + +``` + +4. Pre-process the data into fairseq format. +``` +# use comma to separate if there are more than one train or valid set +for suffix in src tgt ; do + fairseq-preprocess --only-source \ + --trainpref ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/train.bpe \ + --validpref ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/valid.bpe \ + --destdir ${OUTPUT_DIR}/$METRIC/split1/input_${suffix} \ + --workers 60 \ + --srcdict ${XLMR_DIR}/dict.txt +done + +for i in `seq 2 ${NUM_SHARDS}`; do + for suffix in src tgt ; do + fairseq-preprocess --only-source \ + --trainpref ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix}/train.bpe \ + --destdir ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix} \ + --workers 60 \ + --srcdict ${XLMR_DIR}/dict.txt + + ln -s ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/valid* ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix}/. + done + + ln -s ${OUTPUT_DIR}/$METRIC/split1/$METRIC/valid* ${OUTPUT_DIR}/$METRIC/split${i}/$METRIC/. +done +``` + +## Training + +``` +EXP_DIR=/path/to/exp + +# An example of training the model with the config for De-En experiment in the paper. +# The config uses 16 GPUs and 50 hypotheses. +# For training with fewer number of GPUs, set +# distributed_training.distributed_world_size=k +optimization.update_freq='[x]' where x = 16/k +# For training with fewer number of hypotheses, set +# task.mt_beam=N dataset.batch_size=N dataset.required_batch_size_multiple=N + +fairseq-hydra-train -m \ + --config-dir config/ --config-name deen \ + task.data=${OUTPUT_DIR}/$METRIC/split1/ \ + task.num_data_splits=${NUM_SHARDS} \ + model.pretrained_model=${XLMR_DIR}/model.pt \ + common.user_dir=${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \ + checkpoint.save_dir=${EXP_DIR} + +``` + +## Inference & scoring +Perform DrNMT reranking (fw + reranker score) +1. Tune weights on valid sets. +``` +# genrate N hypotheses with the base MT model (fw score) +VALID_SOURCE_FILE=/path/to/source_sentences # one sentence per line, converted to the sentencepiece used by the base MT model +VALID_TARGET_FILE=/path/to/target_sentences # one sentence per line in raw text, i.e. no sentencepiece and tokenization +MT_MODEL=/path/to/mt_model +MT_DATA_PATH=/path/to/mt_data + +cat ${VALID_SOURCE_FILE} | \ + fairseq-interactive ${MT_DATA_PATH} \ + --max-tokens 4000 --buffer-size 16 \ + --num-workers 32 --path ${MT_MODEL} \ + --beam $N --nbest $N \ + --post-process sentencepiece &> valid-hypo.out + +# replace "bleu" with "ter" to optimize for TER +python drnmt_rerank.py \ + ${OUTPUT_DIR}/$METRIC/split1/ \ + --path ${EXP_DIR}/checkpoint_best.pt \ + --in-text valid-hypo.out \ + --results-path ${EXP_DIR} \ + --gen-subset valid \ + --target-text ${VALID_TARGET_FILE} \ + --user-dir ${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \ + --bpe sentencepiece \ + --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \ + --beam $N \ + --batch-size $N \ + --metric bleu \ + --tune + +``` + +2. Apply best weights on test sets +``` +# genrate N hypotheses with the base MT model (fw score) +TEST_SOURCE_FILE=/path/to/source_sentences # one sentence per line, converted to the sentencepiece used by the base MT model + +cat ${TEST_SOURCE_FILE} | \ + fairseq-interactive ${MT_DATA_PATH} \ + --max-tokens 4000 --buffer-size 16 \ + --num-workers 32 --path ${MT_MODEL} \ + --beam $N --nbest $N \ + --post-process sentencepiece &> test-hypo.out + +# replace "bleu" with "ter" to evaluate TER +# Add --target-text for evaluating BLEU/TER, +# otherwise the script will only generate the hypotheses with the highest scores only. +python drnmt_rerank.py \ + ${OUTPUT_DIR}/$METRIC/split1/ \ + --path ${EXP_DIR}/checkpoint_best.pt \ + --in-text test-hypo.out \ + --results-path ${EXP_DIR} \ + --gen-subset test \ + --user-dir ${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \ + --bpe sentencepiece \ + --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \ + --beam $N \ + --batch-size $N \ + --metric bleu \ + --fw-weight ${BEST_FW_WEIGHT} \ + --lenpen ${BEST_LENPEN} +``` + +## Citation +```bibtex +@inproceedings{lee2021discriminative, + title={Discriminative Reranking for Neural Machine Translation}, + author={Lee, Ann and Auli, Michael and Ranzato, Marc'Aurelio}, + booktitle={ACL}, + year={2021} +} +``` diff --git a/fairseq/examples/discriminative_reranking_nmt/__init__.py b/fairseq/examples/discriminative_reranking_nmt/__init__.py new file mode 100644 index 0000000..0278f6a --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/__init__.py @@ -0,0 +1 @@ +from . import criterions, models, tasks # noqa diff --git a/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml b/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml new file mode 100644 index 0000000..3fc2d5f --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml @@ -0,0 +1,56 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 50 + seed: 2 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: bleu + maximize_best_checkpoint_metric: true + +task: + _name: discriminative_reranking_nmt + data: ??? + num_data_splits: ??? + include_src: true + mt_beam: 50 + eval_target_metric: true + target_metric: bleu + +dataset: + batch_size: 50 + num_workers: 6 + required_batch_size_multiple: 50 + valid_subset: ??? + +criterion: + _name: kl_divergence_rereanking + target_dist_norm: minmax + temperature: 0.5 + +optimization: + max_epoch: 200 + lr: [0.00005] + update_freq: [32] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 8000 + total_num_update: 320000 + +model: + _name: discriminative_nmt_reranker + pretrained_model: ??? + classifier_dropout: 0.2 + +distributed_training: + ddp_backend: no_c10d + distributed_world_size: 16 diff --git a/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py b/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py new file mode 100644 index 0000000..7c257c2 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py @@ -0,0 +1,6 @@ +from .discriminative_reranking_criterion import KLDivergenceRerankingCriterion + + +__all__ = [ + "KLDivergenceRerankingCriterion", +] diff --git a/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py b/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py new file mode 100644 index 0000000..c8f19e3 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py @@ -0,0 +1,139 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field + +import torch +import torch.nn.functional as F + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import ChoiceEnum, FairseqDataclass + + +_EPSILON = torch.finfo(torch.float32).eps +TARGET_DIST_NORM_CHOICES = ChoiceEnum(["none", "minmax"]) + + +@dataclass +class KLDivergenceRerankingCriterionConfig(FairseqDataclass): + target_dist_norm: TARGET_DIST_NORM_CHOICES = field( + default="none", + metadata={"help": "method to normalize the range of target scores"}, + ) + temperature: float = field( + default=1.0, + metadata={"help": "temperature in softmax for target distributions"}, + ) + forward_batch_size: int = field( + default=32, + metadata={ + "help": "number of hypotheses per batch for model forward (set a value smaller than --mt-beam to avoid OOM when training with a large beam size)" + }, + ) + + +@register_criterion( + "kl_divergence_rereanking", dataclass=KLDivergenceRerankingCriterionConfig +) +class KLDivergenceRerankingCriterion(FairseqCriterion): + def __init__( + self, task, target_dist_norm, temperature, forward_batch_size, + ): + super().__init__(task) + self.target_dist_norm = target_dist_norm + self.temperature = temperature + self.forward_batch_size = forward_batch_size + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + sample_size = sample["id"].numel() + assert sample_size % self.task.cfg.mt_beam == 0, ( + f"sample_size ({sample_size}) cannot be divided by beam size ({self.task.cfg.mt_beam})." + f"Please set --required-batch-size-multiple={self.task.cfg.mt_beam}." + ) + + # split into smaller batches for model forward + batch_out = [] + for i in range(0, sample_size, self.forward_batch_size): + j = min(i + self.forward_batch_size, sample_size) + + out = model( + src_tokens=sample["net_input"]["src_tokens"][i:j, :], + src_lengths=sample["net_input"]["src_lengths"][i:j], + ) + + batch_out.append( + model.sentence_forward(out, sample["net_input"]["src_tokens"][i:j, :]) + ) + + batch_out = torch.cat(batch_out, dim=0).view( + self.task.cfg.mt_beam, sample_size // self.task.cfg.mt_beam, -1 + ) # T x B x C + if model.joint_classification == "sent": + batch_out = model.joint_forward(batch_out) + scores = model.classification_forward(batch_out.view(sample_size, 1, -1)).view( + -1, self.task.cfg.mt_beam + ) # input: B x T x C + + loss = self.compute_kl_loss( + scores, sample["target"][:, 0].view(-1, self.task.cfg.mt_beam) + ) + + sample_size = sample_size // self.task.cfg.mt_beam + + logging_output = { + "loss": loss.detach(), + "ntokens": sample["ntokens"], + "nsentences": sample_size * self.task.cfg.mt_beam, + "sample_size": sample_size, + "scores": scores.detach(), + } + + return loss, sample_size, logging_output + + def compute_kl_loss(self, logits, target): + norm_target = target + if self.target_dist_norm == "minmax": + min_v = torch.min(target, 1, keepdim=True).values + max_v = torch.max(target, 1, keepdim=True).values + norm_target = (target - min_v) / (max_v - min_v + _EPSILON) + + target_dist = F.softmax( + norm_target / self.temperature, dim=-1, dtype=torch.float32 + ) + model_dist = F.log_softmax(logits, dim=-1, dtype=torch.float32) + loss = -(target_dist * model_dist - target_dist * target_dist.log()).sum() + return loss + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + loss = loss_sum / sample_size / math.log(2) + metrics.log_scalar("loss", loss, sample_size, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py b/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py new file mode 100644 index 0000000..2e0fc2b --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Score raw text with a trained model. +""" + +from collections import namedtuple +import logging +from multiprocessing import Pool +import sys +import os +import random + +import numpy as np +import sacrebleu +import torch + +from fairseq import checkpoint_utils, options, utils + + +logger = logging.getLogger("fairseq_cli.drnmt_rerank") +logger.setLevel(logging.INFO) + +Batch = namedtuple("Batch", "ids src_tokens src_lengths") + + +pool_init_variables = {} + + +def init_loaded_scores(mt_scores, model_scores, hyp, ref): + global pool_init_variables + pool_init_variables["mt_scores"] = mt_scores + pool_init_variables["model_scores"] = model_scores + pool_init_variables["hyp"] = hyp + pool_init_variables["ref"] = ref + + +def parse_fairseq_gen(filename, task): + source = {} + hypos = {} + scores = {} + with open(filename, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("S-"): # source + uid, text = line.split("\t", 1) + uid = int(uid[2:]) + source[uid] = text + elif line.startswith("D-"): # hypo + uid, score, text = line.split("\t", 2) + uid = int(uid[2:]) + if uid not in hypos: + hypos[uid] = [] + scores[uid] = [] + hypos[uid].append(text) + scores[uid].append(float(score)) + else: + continue + + source_out = [source[i] for i in range(len(hypos))] + hypos_out = [h for i in range(len(hypos)) for h in hypos[i]] + scores_out = [s for i in range(len(scores)) for s in scores[i]] + + return source_out, hypos_out, scores_out + + +def read_target(filename): + with open(filename, "r", encoding="utf-8") as f: + output = [line.strip() for line in f] + return output + + +def make_batches(args, src, hyp, task, max_positions, encode_fn): + assert len(src) * args.beam == len( + hyp + ), f"Expect {len(src) * args.beam} hypotheses for {len(src)} source sentences with beam size {args.beam}. Got {len(hyp)} hypotheses intead." + hyp_encode = [ + task.source_dictionary.encode_line(encode_fn(h), add_if_not_exist=False).long() + for h in hyp + ] + if task.cfg.include_src: + src_encode = [ + task.source_dictionary.encode_line( + encode_fn(s), add_if_not_exist=False + ).long() + for s in src + ] + tokens = [(src_encode[i // args.beam], h) for i, h in enumerate(hyp_encode)] + lengths = [(t1.numel(), t2.numel()) for t1, t2 in tokens] + else: + tokens = [(h,) for h in hyp_encode] + lengths = [(h.numel(),) for h in hyp_encode] + + itr = task.get_batch_iterator( + dataset=task.build_dataset_for_inference(tokens, lengths), + max_tokens=args.max_tokens, + max_sentences=args.batch_size, + max_positions=max_positions, + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + ).next_epoch_itr(shuffle=False) + + for batch in itr: + yield Batch( + ids=batch["id"], + src_tokens=batch["net_input"]["src_tokens"], + src_lengths=batch["net_input"]["src_lengths"], + ) + + +def decode_rerank_scores(args): + if args.max_tokens is None and args.batch_size is None: + args.batch_size = 1 + + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + # Load ensemble + logger.info("loading model(s) from {}".format(args.path)) + models, _model_args, task = checkpoint_utils.load_model_ensemble_and_task( + [args.path], arg_overrides=eval(args.model_overrides), + ) + + for model in models: + if args.fp16: + model.half() + if use_cuda: + model.cuda() + + # Initialize generator + generator = task.build_generator(args) + + # Handle tokenization and BPE + tokenizer = task.build_tokenizer(args) + bpe = task.build_bpe(args) + + def encode_fn(x): + if tokenizer is not None: + x = tokenizer.encode(x) + if bpe is not None: + x = bpe.encode(x) + return x + + max_positions = utils.resolve_max_positions( + task.max_positions(), *[model.max_positions() for model in models] + ) + + src, hyp, mt_scores = parse_fairseq_gen(args.in_text, task) + model_scores = {} + logger.info("decode reranker score") + for batch in make_batches(args, src, hyp, task, max_positions, encode_fn): + src_tokens = batch.src_tokens + src_lengths = batch.src_lengths + if use_cuda: + src_tokens = src_tokens.cuda() + src_lengths = src_lengths.cuda() + + sample = { + "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths}, + } + scores = task.inference_step(generator, models, sample) + + for id, sc in zip(batch.ids.tolist(), scores.tolist()): + model_scores[id] = sc[0] + + model_scores = [model_scores[i] for i in range(len(model_scores))] + + return src, hyp, mt_scores, model_scores + + +def get_score(mt_s, md_s, w1, lp, tgt_len): + return mt_s / (tgt_len ** lp) * w1 + md_s + + +def get_best_hyps(mt_scores, md_scores, hypos, fw_weight, lenpen, beam): + assert len(mt_scores) == len(md_scores) and len(mt_scores) == len(hypos) + hypo_scores = [] + best_hypos = [] + best_scores = [] + offset = 0 + for i in range(len(hypos)): + tgt_len = len(hypos[i].split()) + hypo_scores.append( + get_score(mt_scores[i], md_scores[i], fw_weight, lenpen, tgt_len) + ) + + if (i + 1) % beam == 0: + max_i = np.argmax(hypo_scores) + best_hypos.append(hypos[offset + max_i]) + best_scores.append(hypo_scores[max_i]) + hypo_scores = [] + offset += beam + return best_hypos, best_scores + + +def eval_metric(args, hypos, ref): + if args.metric == "bleu": + score = sacrebleu.corpus_bleu(hypos, [ref]).score + else: + score = sacrebleu.corpus_ter(hypos, [ref]).score + + return score + + +def score_target_hypo(args, fw_weight, lp): + mt_scores = pool_init_variables["mt_scores"] + model_scores = pool_init_variables["model_scores"] + hyp = pool_init_variables["hyp"] + ref = pool_init_variables["ref"] + best_hypos, _ = get_best_hyps( + mt_scores, model_scores, hyp, fw_weight, lp, args.beam + ) + rerank_eval = None + if ref: + rerank_eval = eval_metric(args, best_hypos, ref) + print(f"fw_weight {fw_weight}, lenpen {lp}, eval {rerank_eval}") + + return rerank_eval + + +def print_result(best_scores, best_hypos, output_file): + for i, (s, h) in enumerate(zip(best_scores, best_hypos)): + print(f"{i}\t{s}\t{h}", file=output_file) + + +def main(args): + utils.import_user_module(args) + + src, hyp, mt_scores, model_scores = decode_rerank_scores(args) + + assert ( + not args.tune or args.target_text is not None + ), "--target-text has to be set when tuning weights" + if args.target_text: + ref = read_target(args.target_text) + assert len(src) == len( + ref + ), f"different numbers of source and target sentences ({len(src)} vs. {len(ref)})" + + orig_best_hypos = [hyp[i] for i in range(0, len(hyp), args.beam)] + orig_eval = eval_metric(args, orig_best_hypos, ref) + + if args.tune: + logger.info("tune weights for reranking") + + random_params = np.array( + [ + [ + random.uniform( + args.lower_bound_fw_weight, args.upper_bound_fw_weight + ), + random.uniform(args.lower_bound_lenpen, args.upper_bound_lenpen), + ] + for k in range(args.num_trials) + ] + ) + + logger.info("launching pool") + with Pool( + 32, + initializer=init_loaded_scores, + initargs=(mt_scores, model_scores, hyp, ref), + ) as p: + rerank_scores = p.starmap( + score_target_hypo, + [ + (args, random_params[i][0], random_params[i][1],) + for i in range(args.num_trials) + ], + ) + if args.metric == "bleu": + best_index = np.argmax(rerank_scores) + else: + best_index = np.argmin(rerank_scores) + best_fw_weight = random_params[best_index][0] + best_lenpen = random_params[best_index][1] + else: + assert ( + args.lenpen is not None and args.fw_weight is not None + ), "--lenpen and --fw-weight should be set" + best_fw_weight, best_lenpen = args.fw_weight, args.lenpen + + best_hypos, best_scores = get_best_hyps( + mt_scores, model_scores, hyp, best_fw_weight, best_lenpen, args.beam + ) + + if args.results_path is not None: + os.makedirs(args.results_path, exist_ok=True) + output_path = os.path.join( + args.results_path, "generate-{}.txt".format(args.gen_subset), + ) + with open(output_path, "w", buffering=1, encoding="utf-8") as o: + print_result(best_scores, best_hypos, o) + else: + print_result(best_scores, best_hypos, sys.stdout) + + if args.target_text: + rerank_eval = eval_metric(args, best_hypos, ref) + print(f"before reranking, {args.metric.upper()}:", orig_eval) + print( + f"after reranking with fw_weight={best_fw_weight}, lenpen={best_lenpen}, {args.metric.upper()}:", + rerank_eval, + ) + + +def cli_main(): + parser = options.get_generation_parser(interactive=True) + + parser.add_argument( + "--in-text", + default=None, + required=True, + help="text from fairseq-interactive output, containing source sentences and hypotheses", + ) + parser.add_argument("--target-text", default=None, help="reference text") + parser.add_argument("--metric", type=str, choices=["bleu", "ter"], default="bleu") + parser.add_argument( + "--tune", + action="store_true", + help="if set, tune weights on fw scores and lenpen instead of applying fixed weights for reranking", + ) + parser.add_argument( + "--lower-bound-fw-weight", + default=0.0, + type=float, + help="lower bound of search space", + ) + parser.add_argument( + "--upper-bound-fw-weight", + default=3, + type=float, + help="upper bound of search space", + ) + parser.add_argument( + "--lower-bound-lenpen", + default=0.0, + type=float, + help="lower bound of search space", + ) + parser.add_argument( + "--upper-bound-lenpen", + default=3, + type=float, + help="upper bound of search space", + ) + parser.add_argument( + "--fw-weight", type=float, default=None, help="weight on the fw model score" + ) + parser.add_argument( + "--num-trials", + default=1000, + type=int, + help="number of trials to do for random search", + ) + + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/discriminative_reranking_nmt/models/__init__.py b/fairseq/examples/discriminative_reranking_nmt/models/__init__.py new file mode 100644 index 0000000..c593ea5 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/models/__init__.py @@ -0,0 +1,6 @@ +from .discriminative_reranking_model import DiscriminativeNMTReranker + + +__all__ = [ + "DiscriminativeNMTReranker", +] diff --git a/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py b/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py new file mode 100644 index 0000000..e4b5887 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py @@ -0,0 +1,365 @@ +from dataclasses import dataclass, field +import os + +import torch +import torch.nn as nn + +from fairseq import utils +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import ( + BaseFairseqModel, + register_model, +) + +from fairseq.models.roberta.model import RobertaClassificationHead + +from fairseq.modules import ( + LayerNorm, + TransformerSentenceEncoder, + TransformerSentenceEncoderLayer, +) + + +ACTIVATION_FN_CHOICES = ChoiceEnum(utils.get_available_activation_fns()) +JOINT_CLASSIFICATION_CHOICES = ChoiceEnum(["none", "sent"]) +SENTENCE_REP_CHOICES = ChoiceEnum(["head", "meanpool", "maxpool"]) + + +def update_init_roberta_model_state(state): + """ + update the state_dict of a Roberta model for initializing + weights of the BertRanker + """ + for k in list(state.keys()): + if ".lm_head." in k or "version" in k: + del state[k] + continue + # remove 'encoder/decoder.sentence_encoder.' from the key + assert k.startswith("encoder.sentence_encoder.") or k.startswith( + "decoder.sentence_encoder." + ), f"Cannot recognize parameter name {k}" + if "layernorm_embedding" in k: + new_k = k.replace(".layernorm_embedding.", ".emb_layer_norm.") + state[new_k[25:]] = state[k] + else: + state[k[25:]] = state[k] + del state[k] + + +class BaseRanker(nn.Module): + def __init__(self, args, task): + super().__init__() + + self.separator_token = task.dictionary.eos() + self.padding_idx = task.dictionary.pad() + + def forward(self, src_tokens): + raise NotImplementedError + + def get_segment_labels(self, src_tokens): + segment_boundary = (src_tokens == self.separator_token).long() + segment_labels = ( + segment_boundary.cumsum(dim=1) + - segment_boundary + - (src_tokens == self.padding_idx).long() + ) + + return segment_labels + + def get_positions(self, src_tokens, segment_labels): + segment_positions = ( + torch.arange(src_tokens.shape[1]) + .to(src_tokens.device) + .repeat(src_tokens.shape[0], 1) + ) + segment_boundary = (src_tokens == self.separator_token).long() + _, col_idx = (segment_positions * segment_boundary).nonzero(as_tuple=True) + col_idx = torch.cat([torch.zeros(1).type_as(col_idx), col_idx]) + offset = torch.cat( + [ + torch.zeros(1).type_as(segment_boundary), + segment_boundary.sum(dim=1).cumsum(dim=0)[:-1], + ] + ) + segment_positions -= col_idx[segment_labels + offset.unsqueeze(1)] * ( + segment_labels != 0 + ) + + padding_mask = src_tokens.ne(self.padding_idx) + segment_positions = (segment_positions + 1) * padding_mask.type_as( + segment_positions + ) + self.padding_idx + + return segment_positions + + +class BertRanker(BaseRanker): + def __init__(self, args, task): + super(BertRanker, self).__init__(args, task) + + init_model = getattr(args, "pretrained_model", "") + self.joint_layers = nn.ModuleList() + if os.path.isfile(init_model): + print(f"initialize weight from {init_model}") + + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + os.path.dirname(init_model), + checkpoint_file=os.path.basename(init_model), + ) + + in_state_dict = x["models"][0].state_dict() + init_args = x["args"].model + + num_positional_emb = init_args.max_positions + task.dictionary.pad() + 1 + + # follow the setup in roberta + self.model = TransformerSentenceEncoder( + padding_idx=task.dictionary.pad(), + vocab_size=len(task.dictionary), + num_encoder_layers=getattr( + args, "encoder_layers", init_args.encoder_layers + ), + embedding_dim=init_args.encoder_embed_dim, + ffn_embedding_dim=init_args.encoder_ffn_embed_dim, + num_attention_heads=init_args.encoder_attention_heads, + dropout=init_args.dropout, + attention_dropout=init_args.attention_dropout, + activation_dropout=init_args.activation_dropout, + num_segments=2, # add language embeddings + max_seq_len=num_positional_emb, + offset_positions_by_padding=False, + encoder_normalize_before=True, + apply_bert_init=True, + activation_fn=init_args.activation_fn, + freeze_embeddings=args.freeze_embeddings, + n_trans_layers_to_freeze=args.n_trans_layers_to_freeze, + ) + + # still need to learn segment embeddings as we added a second language embedding + if args.freeze_embeddings: + for p in self.model.segment_embeddings.parameters(): + p.requires_grad = False + + update_init_roberta_model_state(in_state_dict) + print("loading weights from the pretrained model") + self.model.load_state_dict( + in_state_dict, strict=False + ) # ignore mismatch in language embeddings + + ffn_embedding_dim = init_args.encoder_ffn_embed_dim + num_attention_heads = init_args.encoder_attention_heads + dropout = init_args.dropout + attention_dropout = init_args.attention_dropout + activation_dropout = init_args.activation_dropout + activation_fn = init_args.activation_fn + + classifier_embed_dim = getattr( + args, "embed_dim", init_args.encoder_embed_dim + ) + if classifier_embed_dim != init_args.encoder_embed_dim: + self.transform_layer = nn.Linear( + init_args.encoder_embed_dim, classifier_embed_dim + ) + else: + self.model = TransformerSentenceEncoder( + padding_idx=task.dictionary.pad(), + vocab_size=len(task.dictionary), + num_encoder_layers=args.encoder_layers, + embedding_dim=args.embed_dim, + ffn_embedding_dim=args.ffn_embed_dim, + num_attention_heads=args.attention_heads, + dropout=args.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + max_seq_len=task.max_positions() + if task.max_positions() + else args.tokens_per_sample, + num_segments=2, + offset_positions_by_padding=False, + encoder_normalize_before=args.encoder_normalize_before, + apply_bert_init=args.apply_bert_init, + activation_fn=args.activation_fn, + ) + + classifier_embed_dim = args.embed_dim + ffn_embedding_dim = args.ffn_embed_dim + num_attention_heads = args.attention_heads + dropout = args.dropout + attention_dropout = args.attention_dropout + activation_dropout = args.activation_dropout + activation_fn = args.activation_fn + + self.joint_classification = args.joint_classification + if args.joint_classification == "sent": + if args.joint_normalize_before: + self.joint_layer_norm = LayerNorm(classifier_embed_dim) + else: + self.joint_layer_norm = None + + self.joint_layers = nn.ModuleList( + [ + TransformerSentenceEncoderLayer( + embedding_dim=classifier_embed_dim, + ffn_embedding_dim=ffn_embedding_dim, + num_attention_heads=num_attention_heads, + dropout=dropout, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + activation_fn=activation_fn, + ) + for _ in range(args.num_joint_layers) + ] + ) + + self.classifier = RobertaClassificationHead( + classifier_embed_dim, + classifier_embed_dim, + 1, # num_classes + "tanh", + args.classifier_dropout, + ) + + def forward(self, src_tokens, src_lengths): + segment_labels = self.get_segment_labels(src_tokens) + positions = self.get_positions(src_tokens, segment_labels) + + inner_states, _ = self.model( + tokens=src_tokens, + segment_labels=segment_labels, + last_state_only=True, + positions=positions, + ) + + return inner_states[-1].transpose(0, 1) # T x B x C -> B x T x C + + def sentence_forward(self, encoder_out, src_tokens=None, sentence_rep="head"): + # encoder_out: B x T x C + if sentence_rep == "head": + x = encoder_out[:, :1, :] + else: # 'meanpool', 'maxpool' + assert src_tokens is not None, "meanpool requires src_tokens input" + segment_labels = self.get_segment_labels(src_tokens) + padding_mask = src_tokens.ne(self.padding_idx) + encoder_mask = segment_labels * padding_mask.type_as(segment_labels) + + if sentence_rep == "meanpool": + ntokens = torch.sum(encoder_mask, dim=1, keepdim=True) + x = torch.sum( + encoder_out * encoder_mask.unsqueeze(2), dim=1, keepdim=True + ) / ntokens.unsqueeze(2).type_as(encoder_out) + else: # 'maxpool' + encoder_out[ + (encoder_mask == 0).unsqueeze(2).repeat(1, 1, encoder_out.shape[-1]) + ] = -float("inf") + x, _ = torch.max(encoder_out, dim=1, keepdim=True) + + if hasattr(self, "transform_layer"): + x = self.transform_layer(x) + + return x # B x 1 x C + + def joint_forward(self, x): + # x: T x B x C + if self.joint_layer_norm: + x = self.joint_layer_norm(x.transpose(0, 1)) + x = x.transpose(0, 1) + + for layer in self.joint_layers: + x, _ = layer(x, self_attn_padding_mask=None) + return x + + def classification_forward(self, x): + # x: B x T x C + return self.classifier(x) + + +@dataclass +class DiscriminativeNMTRerankerConfig(FairseqDataclass): + pretrained_model: str = field( + default="", metadata={"help": "pretrained model to load"} + ) + sentence_rep: SENTENCE_REP_CHOICES = field( + default="head", + metadata={ + "help": "method to transform the output of the transformer stack to a sentence-level representation" + }, + ) + + dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) + attention_dropout: float = field( + default=0.0, metadata={"help": "dropout probability for attention weights"} + ) + activation_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN"} + ) + classifier_dropout: float = field( + default=0.0, metadata={"help": "classifier dropout probability"} + ) + embed_dim: int = field(default=768, metadata={"help": "embedding dimension"}) + ffn_embed_dim: int = field( + default=2048, metadata={"help": "embedding dimension for FFN"} + ) + encoder_layers: int = field(default=12, metadata={"help": "num encoder layers"}) + attention_heads: int = field(default=8, metadata={"help": "num attention heads"}) + encoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each encoder block"} + ) + apply_bert_init: bool = field( + default=False, metadata={"help": "use custom param initialization for BERT"} + ) + activation_fn: ACTIVATION_FN_CHOICES = field( + default="relu", metadata={"help": "activation function to use"} + ) + freeze_embeddings: bool = field( + default=False, metadata={"help": "freeze embeddings in the pretrained model"} + ) + n_trans_layers_to_freeze: int = field( + default=0, + metadata={ + "help": "number of layers to freeze in the pretrained transformer model" + }, + ) + + # joint classfication + joint_classification: JOINT_CLASSIFICATION_CHOICES = field( + default="none", + metadata={"help": "method to compute joint features for classification"}, + ) + num_joint_layers: int = field( + default=1, metadata={"help": "number of joint layers"} + ) + joint_normalize_before: bool = field( + default=False, + metadata={"help": "apply layer norm on the input to the joint layer"}, + ) + + +@register_model( + "discriminative_nmt_reranker", dataclass=DiscriminativeNMTRerankerConfig +) +class DiscriminativeNMTReranker(BaseFairseqModel): + @classmethod + def build_model(cls, args, task): + model = BertRanker(args, task) + return DiscriminativeNMTReranker(args, model) + + def __init__(self, args, model): + super().__init__() + + self.model = model + self.sentence_rep = args.sentence_rep + self.joint_classification = args.joint_classification + + def forward(self, src_tokens, src_lengths, **kwargs): + return self.model(src_tokens, src_lengths) + + def sentence_forward(self, encoder_out, src_tokens): + return self.model.sentence_forward(encoder_out, src_tokens, self.sentence_rep) + + def joint_forward(self, x): + return self.model.joint_forward(x) + + def classification_forward(self, x): + return self.model.classification_forward(x) diff --git a/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py b/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py new file mode 100644 index 0000000..7aa7d37 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python + +import argparse +from multiprocessing import Pool +from pathlib import Path + +import sacrebleu +import sentencepiece as spm + + +def read_text_file(filename): + with open(filename, "r") as f: + output = [line.strip() for line in f] + + return output + + +def get_bleu(in_sent, target_sent): + bleu = sacrebleu.corpus_bleu([in_sent], [[target_sent]]) + out = " ".join( + map(str, [bleu.score, bleu.sys_len, bleu.ref_len] + bleu.counts + bleu.totals) + ) + return out + + +def get_ter(in_sent, target_sent): + ter = sacrebleu.corpus_ter([in_sent], [[target_sent]]) + out = " ".join(map(str, [ter.score, ter.num_edits, ter.ref_length])) + return out + + +def init(sp_model): + global sp + sp = spm.SentencePieceProcessor() + sp.Load(sp_model) + + +def process(source_sent, target_sent, hypo_sent, metric): + source_bpe = " ".join(sp.EncodeAsPieces(source_sent)) + hypo_bpe = [" ".join(sp.EncodeAsPieces(h)) for h in hypo_sent] + + if metric == "bleu": + score_str = [get_bleu(h, target_sent) for h in hypo_sent] + else: # ter + score_str = [get_ter(h, target_sent) for h in hypo_sent] + + return source_bpe, hypo_bpe, score_str + + +def main(args): + assert ( + args.split.startswith("train") or args.num_shards == 1 + ), "--num-shards should be set to 1 for valid and test sets" + assert ( + args.split.startswith("train") + or args.split.startswith("valid") + or args.split.startswith("test") + ), "--split should be set to train[n]/valid[n]/test[n]" + + source_sents = read_text_file(args.input_source) + target_sents = read_text_file(args.input_target) + + num_sents = len(source_sents) + assert num_sents == len( + target_sents + ), f"{args.input_source} and {args.input_target} should have the same number of sentences." + + hypo_sents = read_text_file(args.input_hypo) + assert ( + len(hypo_sents) % args.beam == 0 + ), f"Number of hypotheses ({len(hypo_sents)}) cannot be divided by beam size ({args.beam})." + + hypo_sents = [ + hypo_sents[i : i + args.beam] for i in range(0, len(hypo_sents), args.beam) + ] + assert num_sents == len( + hypo_sents + ), f"{args.input_hypo} should contain {num_sents * args.beam} hypotheses but only has {len(hypo_sents) * args.beam}. (--beam={args.beam})" + + output_dir = args.output_dir / args.metric + for ns in range(args.num_shards): + print(f"processing shard {ns+1}/{args.num_shards}") + shard_output_dir = output_dir / f"split{ns+1}" + source_output_dir = shard_output_dir / "input_src" + hypo_output_dir = shard_output_dir / "input_tgt" + metric_output_dir = shard_output_dir / args.metric + + source_output_dir.mkdir(parents=True, exist_ok=True) + hypo_output_dir.mkdir(parents=True, exist_ok=True) + metric_output_dir.mkdir(parents=True, exist_ok=True) + + if args.n_proc > 1: + with Pool( + args.n_proc, initializer=init, initargs=(args.sentencepiece_model,) + ) as p: + output = p.starmap( + process, + [ + (source_sents[i], target_sents[i], hypo_sents[i], args.metric) + for i in range(ns, num_sents, args.num_shards) + ], + ) + else: + init(args.sentencepiece_model) + output = [ + process(source_sents[i], target_sents[i], hypo_sents[i], args.metric) + for i in range(ns, num_sents, args.num_shards) + ] + + with open(source_output_dir / f"{args.split}.bpe", "w") as s_o, open( + hypo_output_dir / f"{args.split}.bpe", "w" + ) as h_o, open(metric_output_dir / f"{args.split}.{args.metric}", "w") as m_o: + for source_bpe, hypo_bpe, score_str in output: + assert len(hypo_bpe) == len(score_str) + for h, m in zip(hypo_bpe, score_str): + s_o.write(f"{source_bpe}\n") + h_o.write(f"{h}\n") + m_o.write(f"{m}\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input-source", type=Path, required=True) + parser.add_argument("--input-target", type=Path, required=True) + parser.add_argument("--input-hypo", type=Path, required=True) + parser.add_argument("--output-dir", type=Path, required=True) + parser.add_argument("--split", type=str, required=True) + parser.add_argument("--beam", type=int, required=True) + parser.add_argument("--sentencepiece-model", type=str, required=True) + parser.add_argument("--metric", type=str, choices=["bleu", "ter"], default="bleu") + parser.add_argument("--num-shards", type=int, default=1) + parser.add_argument("--n-proc", type=int, default=8) + + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py b/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py new file mode 100644 index 0000000..2d78ca9 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py @@ -0,0 +1,6 @@ +from .discriminative_reranking_task import DiscriminativeRerankingNMTTask + + +__all__ = [ + "DiscriminativeRerankingNMTTask", +] diff --git a/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py b/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py new file mode 100644 index 0000000..b4ed2a6 --- /dev/null +++ b/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py @@ -0,0 +1,490 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field + +import itertools +import logging +import os + +import numpy as np +import torch + +from fairseq.logging import metrics +from fairseq.data import ( + ConcatDataset, + ConcatSentencesDataset, + data_utils, + Dictionary, + IdDataset, + indexed_dataset, + NestedDictionaryDataset, + NumSamplesDataset, + NumelDataset, + PrependTokenDataset, + RawLabelDataset, + RightPadDataset, + SortDataset, + TruncateDataset, + TokenBlockDataset, +) +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import FairseqTask, register_task +from omegaconf import II, MISSING + + +EVAL_BLEU_ORDER = 4 +TARGET_METRIC_CHOICES = ChoiceEnum(["bleu", "ter"]) + +logger = logging.getLogger(__name__) + + +@dataclass +class DiscriminativeRerankingNMTConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + num_data_splits: int = field( + default=1, metadata={"help": "total number of data splits"} + ) + no_shuffle: bool = field( + default=False, metadata={"help": "do not shuffle training data"} + ) + max_positions: int = field( + default=512, metadata={"help": "number of positional embeddings to learn"} + ) + include_src: bool = field( + default=False, metadata={"help": "include source sentence"} + ) + mt_beam: int = field(default=50, metadata={"help": "beam size of input hypotheses"}) + eval_target_metric: bool = field( + default=False, + metadata={"help": "evaluation with the target metric during validation"}, + ) + target_metric: TARGET_METRIC_CHOICES = field( + default="bleu", metadata={"help": "name of the target metric to optimize for"} + ) + train_subset: str = field( + default=II("dataset.train_subset"), + metadata={"help": "data subset to use for training (e.g. train, valid, test)"}, + ) + seed: int = field( + default=II("common.seed"), + metadata={"help": "pseudo random number generator seed"}, + ) + + +class RerankerScorer(object): + """Scores the target for a given (source (optional), target) input.""" + + def __init__(self, args, mt_beam): + self.mt_beam = mt_beam + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + """Score a batch of translations.""" + net_input = sample["net_input"] + + assert len(models) == 1, "does not support model ensemble" + model = models[0] + + bs = net_input["src_tokens"].shape[0] + assert ( + model.joint_classification == "none" or bs % self.mt_beam == 0 + ), f"invalid batch size ({bs}) for joint classification with beam size ({self.mt_beam})" + + model.eval() + logits = model(**net_input) + + batch_out = model.sentence_forward(logits, net_input["src_tokens"]) + if model.joint_classification == "sent": + batch_out = model.joint_forward( + batch_out.view(self.mt_beam, bs // self.mt_beam, -1) + ) + scores = model.classification_forward( + batch_out.view(bs, 1, -1) + ) # input: B x T x C + + return scores + + +@register_task( + "discriminative_reranking_nmt", dataclass=DiscriminativeRerankingNMTConfig +) +class DiscriminativeRerankingNMTTask(FairseqTask): + """ + Translation rerank task. + The input can be either (src, tgt) sentence pairs or tgt sentence only. + """ + + cfg: DiscriminativeRerankingNMTConfig + + def __init__(self, cfg: DiscriminativeRerankingNMTConfig, data_dictionary=None): + super().__init__(cfg) + self.dictionary = data_dictionary + self._max_positions = cfg.max_positions + # args.tokens_per_sample = self._max_positions + # self.num_classes = 1 # for model + + @classmethod + def load_dictionary(cls, cfg, filename): + """Load the dictionary from the filename""" + dictionary = Dictionary.load(filename) + dictionary.add_symbol("") # for loading pretrained XLMR model + + return dictionary + + @classmethod + def setup_task(cls, cfg: DiscriminativeRerankingNMTConfig, **kwargs): + # load data dictionary (assume joint dictionary) + data_path = cfg.data + data_dict = cls.load_dictionary( + cfg, os.path.join(data_path, "input_src/dict.txt") + ) + + logger.info("[input] src dictionary: {} types".format(len(data_dict))) + + return DiscriminativeRerankingNMTTask(cfg, data_dict) + + def load_dataset(self, split, epoch=0, combine=False, **kwargs): + """Load a given dataset split (e.g., train, valid, test).""" + if self.cfg.data.endswith("1"): + data_shard = (epoch - 1) % self.cfg.num_data_splits + 1 + data_path = self.cfg.data[:-1] + str(data_shard) + else: + data_path = self.cfg.data + + def get_path(type, data_split): + return os.path.join(data_path, str(type), data_split) + + def make_dataset(type, dictionary, data_split, combine): + split_path = get_path(type, data_split) + + dataset = data_utils.load_indexed_dataset( + split_path, + dictionary, + combine=combine, + ) + return dataset + + def load_split(data_split, metric): + input_src = None + if self.cfg.include_src: + input_src = make_dataset( + "input_src", self.dictionary, data_split, combine=False + ) + assert input_src is not None, "could not find dataset: {}".format( + get_path("input_src", data_split) + ) + + input_tgt = make_dataset( + "input_tgt", self.dictionary, data_split, combine=False + ) + assert input_tgt is not None, "could not find dataset: {}".format( + get_path("input_tgt", data_split) + ) + + label_path = f"{get_path(metric, data_split)}.{metric}" + assert os.path.exists(label_path), f"could not find dataset: {label_path}" + + np_labels = np.loadtxt(label_path) + if self.cfg.target_metric == "ter": + np_labels = -np_labels + label = RawLabelDataset(np_labels) + + return input_src, input_tgt, label + + src_datasets = [] + tgt_datasets = [] + label_datasets = [] + + if split == self.cfg.train_subset: + for k in itertools.count(): + split_k = "train" + (str(k) if k > 0 else "") + prefix = os.path.join(data_path, "input_tgt", split_k) + if not indexed_dataset.dataset_exists(prefix, impl=None): + if k > 0: + break + else: + raise FileNotFoundError(f"Dataset not found: {prefix}") + input_src, input_tgt, label = load_split( + split_k, self.cfg.target_metric + ) + src_datasets.append(input_src) + tgt_datasets.append(input_tgt) + label_datasets.append(label) + else: + input_src, input_tgt, label = load_split(split, self.cfg.target_metric) + src_datasets.append(input_src) + tgt_datasets.append(input_tgt) + label_datasets.append(label) + + if len(tgt_datasets) == 1: + input_tgt, label = tgt_datasets[0], label_datasets[0] + if self.cfg.include_src: + input_src = src_datasets[0] + else: + input_tgt = ConcatDataset(tgt_datasets) + label = ConcatDataset(label_datasets) + if self.cfg.include_src: + input_src = ConcatDataset(src_datasets) + + input_tgt = TruncateDataset(input_tgt, self.cfg.max_positions) + if self.cfg.include_src: + input_src = PrependTokenDataset(input_src, self.dictionary.bos()) + input_src = TruncateDataset(input_src, self.cfg.max_positions) + src_lengths = NumelDataset(input_src, reduce=False) + src_tokens = ConcatSentencesDataset(input_src, input_tgt) + else: + src_tokens = PrependTokenDataset(input_tgt, self.dictionary.bos()) + src_lengths = NumelDataset(src_tokens, reduce=False) + + dataset = { + "id": IdDataset(), + "net_input": { + "src_tokens": RightPadDataset( + src_tokens, + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": src_lengths, + }, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_tokens, reduce=True), + "target": label, + } + + dataset = NestedDictionaryDataset( + dataset, + sizes=[src_tokens.sizes], + ) + + assert ( + len(dataset) % self.cfg.mt_beam == 0 + ), "dataset size (%d) is not a multiple of beam size (%d)" % ( + len(dataset), + self.cfg.mt_beam, + ) + + # no need to shuffle valid/test sets + if not self.cfg.no_shuffle and split == self.cfg.train_subset: + + # need to keep all hypothese together + start_idx = np.arange(0, len(dataset), self.cfg.mt_beam) + with data_utils.numpy_seed(self.cfg.seed + epoch): + np.random.shuffle(start_idx) + + idx = np.arange(0, self.cfg.mt_beam) + shuffle = np.tile(idx, (len(start_idx), 1)).reshape(-1) + np.tile( + start_idx, (self.cfg.mt_beam, 1) + ).transpose().reshape(-1) + + dataset = SortDataset( + dataset, + sort_order=[shuffle], + ) + + logger.info(f"Loaded {split} with #samples: {len(dataset)}") + + self.datasets[split] = dataset + return self.datasets[split] + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + assert not self.cfg.include_src or len(src_tokens[0]) == 2 + input_src = None + if self.cfg.include_src: + input_src = TokenBlockDataset( + [t[0] for t in src_tokens], + [l[0] for l in src_lengths], + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ) + input_src = PrependTokenDataset(input_src, self.dictionary.bos()) + input_src = TruncateDataset(input_src, self.cfg.max_positions) + + input_tgt = TokenBlockDataset( + [t[-1] for t in src_tokens], + [l[-1] for l in src_lengths], + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ) + input_tgt = TruncateDataset(input_tgt, self.cfg.max_positions) + if self.cfg.include_src: + src_tokens = ConcatSentencesDataset(input_src, input_tgt) + src_lengths = NumelDataset(input_src, reduce=False) + else: + input_tgt = PrependTokenDataset(input_tgt, self.dictionary.bos()) + src_tokens = input_tgt + src_lengths = NumelDataset(src_tokens, reduce=False) + + dataset = { + "id": IdDataset(), + "net_input": { + "src_tokens": RightPadDataset( + src_tokens, + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": src_lengths, + }, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_tokens, reduce=True), + } + + return NestedDictionaryDataset( + dataset, + sizes=[src_tokens.sizes], + ) + + def build_model(self, cfg: FairseqDataclass, from_checkpoint: bool = False): + return super().build_model(cfg) + + def build_generator(self, args): + return RerankerScorer(args, mt_beam=self.cfg.mt_beam) + + def max_positions(self): + return self._max_positions + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + def create_dummy_batch(self, device): + dummy_target = ( + torch.zeros(self.cfg.mt_beam, EVAL_BLEU_ORDER * 2 + 3).long().to(device) + if not self.cfg.eval_ter + else torch.zeros(self.cfg.mt_beam, 3).long().to(device) + ) + + return { + "id": torch.zeros(self.cfg.mt_beam, 1).long().to(device), + "net_input": { + "src_tokens": torch.zeros(self.cfg.mt_beam, 4).long().to(device), + "src_lengths": torch.ones(self.cfg.mt_beam, 1).long().to(device), + }, + "nsentences": 0, + "ntokens": 0, + "target": dummy_target, + } + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + if ignore_grad and sample is None: + sample = self.create_dummy_batch(model.device) + + return super().train_step( + sample, model, criterion, optimizer, update_num, ignore_grad + ) + + def valid_step(self, sample, model, criterion): + if sample is None: + sample = self.create_dummy_batch(model.device) + + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + + if not self.cfg.eval_target_metric: + return loss, sample_size, logging_output + + scores = logging_output["scores"] + + if self.cfg.target_metric == "bleu": + assert sample["target"].shape[1] == EVAL_BLEU_ORDER * 2 + 3, ( + "target does not contain enough information (" + + str(sample["target"].shape[1]) + + "for evaluating BLEU" + ) + + max_id = torch.argmax(scores, dim=1) + select_id = max_id + torch.arange( + 0, sample_size * self.cfg.mt_beam, self.cfg.mt_beam + ).to(max_id.device) + bleu_data = sample["target"][select_id, 1:].sum(0).data + + logging_output["_bleu_sys_len"] = bleu_data[0] + logging_output["_bleu_ref_len"] = bleu_data[1] + + for i in range(EVAL_BLEU_ORDER): + logging_output["_bleu_counts_" + str(i)] = bleu_data[2 + i] + logging_output["_bleu_totals_" + str(i)] = bleu_data[ + 2 + EVAL_BLEU_ORDER + i + ] + + elif self.cfg.target_metric == "ter": + assert sample["target"].shape[1] == 3, ( + "target does not contain enough information (" + + str(sample["target"].shape[1]) + + "for evaluating TER" + ) + + max_id = torch.argmax(scores, dim=1) + select_id = max_id + torch.arange( + 0, sample_size * self.cfg.mt_beam, self.cfg.mt_beam + ).to(max_id.device) + ter_data = sample["target"][select_id, 1:].sum(0).data + + logging_output["_ter_num_edits"] = -ter_data[0] + logging_output["_ter_ref_len"] = -ter_data[1] + + return loss, sample_size, logging_output + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + if not self.cfg.eval_target_metric: + return + + def sum_logs(key): + return sum(log.get(key, 0) for log in logging_outputs) + + if self.cfg.target_metric == "bleu": + counts, totals = [], [] + for i in range(EVAL_BLEU_ORDER): + counts.append(sum_logs("_bleu_counts_" + str(i))) + totals.append(sum_logs("_bleu_totals_" + str(i))) + + if max(totals) > 0: + # log counts as numpy arrays -- log_scalar will sum them correctly + metrics.log_scalar("_bleu_counts", np.array(counts)) + metrics.log_scalar("_bleu_totals", np.array(totals)) + metrics.log_scalar("_bleu_sys_len", sum_logs("_bleu_sys_len")) + metrics.log_scalar("_bleu_ref_len", sum_logs("_bleu_ref_len")) + + def compute_bleu(meters): + import inspect + import sacrebleu + + fn_sig = inspect.getfullargspec(sacrebleu.compute_bleu)[0] + if "smooth_method" in fn_sig: + smooth = {"smooth_method": "exp"} + else: + smooth = {"smooth": "exp"} + bleu = sacrebleu.compute_bleu( + correct=meters["_bleu_counts"].sum, + total=meters["_bleu_totals"].sum, + sys_len=meters["_bleu_sys_len"].sum, + ref_len=meters["_bleu_ref_len"].sum, + **smooth, + ) + return round(bleu.score, 2) + + metrics.log_derived("bleu", compute_bleu) + elif self.cfg.target_metric == "ter": + num_edits = sum_logs("_ter_num_edits") + ref_len = sum_logs("_ter_ref_len") + + if ref_len > 0: + metrics.log_scalar("_ter_num_edits", num_edits) + metrics.log_scalar("_ter_ref_len", ref_len) + + def compute_ter(meters): + score = meters["_ter_num_edits"].sum / meters["_ter_ref_len"].sum + return round(score.item(), 2) + + metrics.log_derived("ter", compute_ter) diff --git a/fairseq/examples/emotion_conversion/README.md b/fairseq/examples/emotion_conversion/README.md new file mode 100644 index 0000000..caf22be --- /dev/null +++ b/fairseq/examples/emotion_conversion/README.md @@ -0,0 +1,214 @@ +# Textless speech emotion conversion using decomposed and discrete representations +[Felix Kreuk](https://felixkreuk.github.io), Adam Polyak, Jade Copet, Eugene Kharitonov, Tu-Anh Nguyen, Morgane Rivière, Wei-Ning Hsu, Abdelrahman Mohamed, Emmanuel Dupoux, [Yossi Adi](https://adiyoss.github.io) + +_abstract_: Speech emotion conversion is the task of modifying the perceived emotion of a speech utterance while preserving the lexical content and speaker identity. In this study, we cast the problem of emotion conversion as a spoken language translation task. We decompose speech into discrete and disentangled learned representations, consisting of content units, F0, speaker, and emotion. First, we modify the speech content by translating the content units to a target emotion, and then predict the prosodic features based on these units. Finally, the speech waveform is generated by feeding the predicted representations into a neural vocoder. Such a paradigm allows us to go beyond spectral and parametric changes of the signal, and model non-verbal vocalizations, such as laughter insertion, yawning removal, etc. We demonstrate objectively and subjectively that the proposed method is superior to the baselines in terms of perceived emotion and audio quality. We rigorously evaluate all components of such a complex system and conclude with an extensive model analysis and ablation study to better emphasize the architectural choices, strengths and weaknesses of the proposed method. Samples and code will be publicly available under the following link: https://speechbot.github.io/emotion. + +## Installation +First, create a conda virtual environment and activate it: +``` +conda create -n emotion python=3.8 -y +conda activate emotion +``` + +Then, clone this repository: +``` +git clone https://github.com/facebookresearch/fairseq.git +cd fairseq/examples/emotion_conversion +git clone https://github.com/felixkreuk/speech-resynthesis +``` + +Next, download the EmoV discrete tokens: +``` +wget https://dl.fbaipublicfiles.com/textless_nlp/emotion_conversion/data.tar.gz # (still in fairseq/examples/emotion_conversion) +tar -xzvf data.tar.gz +``` + +Your `fairseq/examples/emotion_conversion` directory should like this: +``` +drwxrwxr-x 3 felixkreuk felixkreuk 0 Feb 6 2022 data +drwxrwxr-x 3 felixkreuk felixkreuk 0 Sep 28 10:41 emotion_models +drwxr-xr-x 3 felixkreuk felixkreuk 0 Jun 29 05:43 fairseq_models +drwxr-xr-x 3 felixkreuk felixkreuk 0 Sep 28 10:41 preprocess +-rw-rw-r-- 1 felixkreuk felixkreuk 11K Dec 5 09:00 README.md +-rw-rw-r-- 1 felixkreuk felixkreuk 88 Mar 6 2022 requirements.txt +-rw-rw-r-- 1 felixkreuk felixkreuk 13K Jun 29 06:26 synthesize.py +``` + +Lastly, install fairseq and the other packages: +``` +pip install --editable ./ +pip install -r examples/emotion_conversion/requirements.txt +``` + +## Data preprocessing + +### Convert your audio to discrete representations +Please follow the steps described [here](https://github.com/pytorch/fairseq/tree/main/examples/hubert/simple_kmeans). +To generate the same discrete representations please use the following: +1. [HuBERT checkpoint](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt) +2. k-means model at `data/hubert_base_ls960_layer9_clusters200/data_hubert_base_ls960_layer9_clusters200.bin` + +### Construct data splits +This step will use the discrete representations from the previous step and split them to train/valid/test sets for 3 tasks: +1. Translation model pre-training (BART language denoising) +2. Translation model training (content units emotion translation mechanism) +3. HiFiGAN model training (for synthesizing audio from discrete representations) + +Your processed data should be at `data/`: +1. `hubert_base_ls960_layer9_clusters200` - discrete representations extracted using HuBERT layer 9, clustered into 200 clusters. +2. `data.tsv` - a tsv file pointing to the EmoV dataset in your environment (Please edit the first line of this file according to your path). + +The following command will create the above splits: +``` +python examples/emotion_conversion/preprocess/create_core_manifest.py \ + --tsv data/data.tsv \ + --emov-km data/hubert_base_ls960_layer9_clusters200/data.km \ + --km data/hubert_base_ls960_layer9_clusters200/vctk.km \ + --dict data/hubert_base_ls960_layer9_clusters200/dict.txt \ + --manifests-dir $DATA +``` +* Set `$DATA` as the directory that will contain the processed data. + +### Extract F0 +To train the HiFiGAN vocoder we need to first extract the F0 curves: +``` +python examples/emotion_conversion/preprocess/extract_f0.py \ + --tsv data/data.tsv \ + --extractor pyaapt \ +``` + +## HiFiGAN training +Now we are all set to train the HiFiGAN vocoder: +``` +python examples/emotion_conversion/speech-resynthesis/train.py + --checkpoint_path \ + --config examples/emotion_conversion/speech-resynthesis/configs/EmoV/emov_hubert-layer9-cluster200_fixed-spkr-embedder_f0-raw_gst.json +``` + +## Translation Pre-training +Before translating emotions, we first need to pre-train the translation model as a denoising autoencoder (similarly to BART). +``` +python train.py \ + $DATA/fairseq-data/emov_multilingual_denoising_cross-speaker_dedup_nonzeroshot/tokenized \ + --save-dir \ + --tensorboard-logdir \ + --langs neutral,amused,angry,sleepy,disgusted,vctk.km \ + --dataset-impl mmap \ + --task multilingual_denoising \ + --arch transformer_small --criterion cross_entropy \ + --multilang-sampling-alpha 1.0 --sample-break-mode eos --max-tokens 16384 \ + --update-freq 1 --max-update 3000000 \ + --dropout 0.1 --attention-dropout 0.1 --relu-dropout 0.0 \ + --optimizer adam --weight-decay 0.01 --adam-eps 1e-06 \ + --clip-norm 0.1 --lr-scheduler polynomial_decay --lr 0.0003 \ + --total-num-update 3000000 --warmup-updates 10000 --fp16 \ + --poisson-lambda 3.5 --mask 0.3 --mask-length span-poisson --replace-length 1 --rotate 0 --mask-random 0.1 --insert 0 --permute-sentences 1.0 \ + --skip-invalid-size-inputs-valid-test \ + --user-dir examples/emotion_conversion/fairseq_models +``` + +## Translation Training +Now we are ready to train our emotion translation model: +``` +python train.py \ + --distributed-world-size 1 \ + $DATA/fairseq-data/emov_multilingual_translation_cross-speaker_dedup/tokenized/ \ + --save-dir \ + --tensorboard-logdir \ + --arch multilingual_small --task multilingual_translation \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --lang-pairs neutral-amused,neutral-sleepy,neutral-disgusted,neutral-angry,amused-sleepy,amused-disgusted,amused-neutral,amused-angry,angry-amused,angry-sleepy,angry-disgusted,angry-neutral,disgusted-amused,disgusted-sleepy,disgusted-neutral,disgusted-angry,sleepy-amused,sleepy-neutral,sleepy-disgusted,sleepy-angry \ + --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --lr 1e-05 --clip-norm 0 --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.01 --warmup-updates 2000 --lr-scheduler inverse_sqrt \ + --max-tokens 4096 --update-freq 1 --max-update 100000 \ + --required-batch-size-multiple 8 --fp16 --num-workers 4 \ + --seed 2 --log-format json --log-interval 25 --save-interval-updates 1000 \ + --no-epoch-checkpoints --keep-best-checkpoints 1 --keep-interval-updates 1 \ + --finetune-from-model \ + --user-dir examples/emotion_conversion/fairseq_models +``` +* To share encoders/decoders use the `--share-encoders` and `--share-decoders` flags. +* To add source/target emotion tokens use the `--encoder-langtok {'src'|'tgt'}` and `--decoder-langtok` flags. + +## F0-predictor Training +The following command trains the F0 prediction module: +``` +cd examples/emotion_conversion +python -m emotion_models.pitch_predictor n_tokens=200 \ + train_tsv="$DATA/denoising/emov/train.tsv" \ + train_km="$DATA/denoising/emov/train.km" \ + valid_tsv="$DATA/denoising/emov/valid.tsv" \ + valid_km="$DATA/denoising/emov/valid.km" +``` +* See `hyra.run.dir` to configure directory for saving models. + +## Duration-predictor Training +The following command trains the duration prediction modules: +``` +cd examples/emotion_conversion +for emotion in "neutral" "amused" "angry" "disgusted" "sleepy"; do + python -m emotion_models.duration_predictor n_tokens=200 substring=$emotion \ + train_tsv="$DATA/denoising/emov/train.tsv" \ + train_km="$DATA/denoising/emov/train.km" \ + valid_tsv="$DATA/denoising/emov/valid.tsv" \ + valid_km="$DATA/denoising/emov/valid.km" +done +``` +* See `hyra.run.dir` to configure directory for saving models. +* After the above command you should have 5 duration models in your checkpoint directory: +``` +❯ ll duration_predictor/ +total 21M +-rw-rw-r-- 1 felixkreuk felixkreuk 4.1M Nov 15 2021 amused.ckpt +-rw-rw-r-- 1 felixkreuk felixkreuk 4.1M Nov 15 2021 angry.ckpt +-rw-rw-r-- 1 felixkreuk felixkreuk 4.1M Nov 15 2021 disgusted.ckpt +-rw-rw-r-- 1 felixkreuk felixkreuk 4.1M Nov 15 2021 neutral.ckpt +-rw-rw-r-- 1 felixkreuk felixkreuk 4.1M Nov 15 2021 sleepy.ckpt +``` + +## Token Generation +The following command uses `fairseq-generate` to generate the token sequences based on the source and target emotions. +``` +fairseq-generate \ + $DATA/fairseq-data/emov_multilingual_translation_cross-speaker_dedup/tokenized/ \ + --task multilingual_translation \ + --gen-subset test \ + --path \ + --beam 5 \ + --batch-size 4 --max-len-a 1.8 --max-len-b 10 --lenpen 1 --min-len 1 \ + --skip-invalid-size-inputs-valid-test --distributed-world-size 1 \ + --source-lang neutral --target-lang amused \ + --lang-pairs neutral-amused,neutral-sleepy,neutral-disgusted,neutral-angry,amused-sleepy,amused-disgusted,amused-neutral,amused-angry,angry-amused,angry-sleepy,angry-disgusted,angry-neutral,disgusted-amused,disgusted-sleepy,disgusted-neutral,disgusted-angry,sleepy-amused,sleepy-neutral,sleepy-disgusted,sleepy-angry \ + --results-path \ + --user-dir examples/emotion_conversion/fairseq_models +``` +* Modify `--source-lang` and `--target-lang` to control for the source and target emotions. +* See [fairseq documentation](https://fairseq.readthedocs.io/en/latest/command_line_tools.html#fairseq-generate) for a full overview of generation parameters (e.g., top-k/top-p sampling). + +## Waveform Synthesis +Using the output of the above command, the HiFiGAN vocoder, and the prosody prediction modules (F0 and duration) we can now generate the output waveforms: +``` +python examples/emotion_conversion/synthesize.py \ + --result-path /generate-test.txt \ + --data $DATA/fairseq-data/emov_multilingual_translation_cross-speaker_dedup/neutral-amused \ + --orig-tsv examples/emotion_conversion/data/data.tsv \ + --orig-km examples/emotion_conversion/data/hubert_base_ls960_layer9_clusters200/data.km \ + --checkpoint-file /g_00400000 \ + --dur-model duration_predictor/ \ + --f0-model pitch_predictor/pitch_predictor.ckpt \ + -s neutral -t amused \ + --outdir ~/tmp/emotion_results/wavs/neutral-amused +``` +* Please make sure the source and target emotions here match those of the previous command. + +# Citation +If you find this useful in your research, please use the following BibTeX entry for citation. +``` +@article{kreuk2021textless, + title={Textless speech emotion conversion using decomposed and discrete representations}, + author={Kreuk, Felix and Polyak, Adam and Copet, Jade and Kharitonov, Eugene and Nguyen, Tu-Anh and Rivi{\`e}re, Morgane and Hsu, Wei-Ning and Mohamed, Abdelrahman and Dupoux, Emmanuel and Adi, Yossi}, + journal={Conference on Empirical Methods in Natural Language Processing (EMNLP)}, + year={2022} +} +``` diff --git a/fairseq/examples/emotion_conversion/emotion_models/__init__.py b/fairseq/examples/emotion_conversion/emotion_models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.py b/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.py new file mode 100644 index 0000000..eb47df0 --- /dev/null +++ b/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.py @@ -0,0 +1,243 @@ +import logging +import os + +import hydra +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.layers.torch import Rearrange +from torch.utils.data import DataLoader, Dataset + +from .utils import Accuracy + +logger = logging.getLogger(__name__) + + +def save_ckpt(model, path, model_class): + ckpt = { + "state_dict": model.state_dict(), + "padding_token": model.padding_token, + "model_class": model_class, + } + torch.save(ckpt, path) + + +def load_ckpt(path): + ckpt = torch.load(path) + ckpt["model_class"]["_target_"] = "emotion_models.duration_predictor.CnnPredictor" + model = hydra.utils.instantiate(ckpt["model_class"]) + model.load_state_dict(ckpt["state_dict"]) + model.padding_token = ckpt["padding_token"] + model = model.cpu() + model.eval() + return model + + +class Collator: + def __init__(self, padding_idx): + self.padding_idx = padding_idx + + def __call__(self, batch): + x = [item[0] for item in batch] + lengths = [len(item) for item in x] + x = torch.nn.utils.rnn.pad_sequence(x, batch_first=True, padding_value=self.padding_idx) + y = [item[1] for item in batch] + y = torch.nn.utils.rnn.pad_sequence(y, batch_first=True, padding_value=self.padding_idx) + mask = (x != self.padding_idx) + return x, y, mask, lengths + + +class Predictor(nn.Module): + def __init__(self, n_tokens, emb_dim): + super(Predictor, self).__init__() + self.n_tokens = n_tokens + self.emb_dim = emb_dim + self.padding_token = n_tokens + # add 1 extra embedding for padding token, set the padding index to be the last token + # (tokens from the clustering start at index 0) + self.emb = nn.Embedding(n_tokens + 1, emb_dim, padding_idx=self.padding_token) + + def inflate_input(self, batch): + """ get a sequence of tokens, predict their durations + and inflate them accordingly """ + batch_durs = self.forward(batch) + batch_durs = torch.exp(batch_durs) - 1 + batch_durs = batch_durs.round() + output = [] + for seq, durs in zip(batch, batch_durs): + inflated_seq = [] + for token, n in zip(seq, durs): + if token == self.padding_token: + break + n = int(n.item()) + token = int(token.item()) + inflated_seq.extend([token for _ in range(n)]) + output.append(inflated_seq) + output = torch.LongTensor(output) + return output + + +class CnnPredictor(Predictor): + def __init__(self, n_tokens, emb_dim, channels, kernel, output_dim, dropout, n_layers): + super(CnnPredictor, self).__init__(n_tokens=n_tokens, emb_dim=emb_dim) + layers = [ + Rearrange("b t c -> b c t"), + nn.Conv1d(emb_dim, channels, kernel_size=kernel, padding=(kernel - 1) // 2), + Rearrange("b c t -> b t c"), + nn.ReLU(), + nn.LayerNorm(channels), + nn.Dropout(dropout), + ] + for _ in range(n_layers-1): + layers += [ + Rearrange("b t c -> b c t"), + nn.Conv1d(channels, channels, kernel_size=kernel, padding=(kernel - 1) // 2), + Rearrange("b c t -> b t c"), + nn.ReLU(), + nn.LayerNorm(channels), + nn.Dropout(dropout), + ] + self.conv_layer = nn.Sequential(*layers) + self.proj = nn.Linear(channels, output_dim) + + def forward(self, x): + x = self.emb(x) + x = self.conv_layer(x) + x = self.proj(x) + x = x.squeeze(-1) + return x + + +def l2_log_loss(input, target): + return F.mse_loss( + input=input.float(), + target=torch.log(target.float() + 1), + reduce=False + ) + + +class DurationDataset(Dataset): + def __init__(self, tsv_path, km_path, substring=""): + lines = open(tsv_path, "r").readlines() + self.root, self.tsv = lines[0], lines[1:] + self.km = open(km_path, "r").readlines() + logger.info(f"loaded {len(self.km)} files") + + if substring != "": + tsv, km = [], [] + for tsv_line, km_line in zip(self.tsv, self.km): + if substring.lower() in tsv_line.lower(): + tsv.append(tsv_line) + km.append(km_line) + self.tsv, self.km = tsv, km + logger.info(f"after filtering: {len(self.km)} files") + + def __len__(self): + return len(self.km) + + def __getitem__(self, i): + x = self.km[i] + x = x.split(" ") + x = list(map(int, x)) + + y = [] + xd = [] + count = 1 + for x1, x2 in zip(x[:-1], x[1:]): + if x1 == x2: + count += 1 + continue + else: + y.append(count) + xd.append(x1) + count = 1 + + xd = torch.LongTensor(xd) + y = torch.LongTensor(y) + return xd, y + + +def train(cfg): + device = "cuda:0" + model = hydra.utils.instantiate(cfg[cfg.model]).to(device) + optimizer = hydra.utils.instantiate(cfg.optimizer, model.parameters()) + # add 1 extra embedding for padding token, set the padding index to be the last token + # (tokens from the clustering start at index 0) + collate_fn = Collator(padding_idx=model.padding_token) + logger.info(f"data: {cfg.train_tsv}") + train_ds = DurationDataset(cfg.train_tsv, cfg.train_km, substring=cfg.substring) + valid_ds = DurationDataset(cfg.valid_tsv, cfg.valid_km, substring=cfg.substring) + train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=collate_fn) + valid_dl = DataLoader(valid_ds, batch_size=32, shuffle=False, collate_fn=collate_fn) + + best_loss = float("inf") + for epoch in range(cfg.epochs): + train_loss, train_loss_scaled = train_epoch(model, train_dl, l2_log_loss, optimizer, device) + valid_loss, valid_loss_scaled, *acc = valid_epoch(model, valid_dl, l2_log_loss, device) + acc0, acc1, acc2, acc3 = acc + if valid_loss_scaled < best_loss: + path = f"{os.getcwd()}/{cfg.substring}.ckpt" + save_ckpt(model, path, cfg[cfg.model]) + best_loss = valid_loss_scaled + logger.info(f"saved checkpoint: {path}") + logger.info(f"[epoch {epoch}] train loss: {train_loss:.3f}, train scaled: {train_loss_scaled:.3f}") + logger.info(f"[epoch {epoch}] valid loss: {valid_loss:.3f}, valid scaled: {valid_loss_scaled:.3f}") + logger.info(f"acc: {acc0,acc1,acc2,acc3}") + + +def train_epoch(model, loader, criterion, optimizer, device): + model.train() + epoch_loss = 0 + epoch_loss_scaled = 0 + for x, y, mask, _ in loader: + x, y, mask = x.to(device), y.to(device), mask.to(device) + yhat = model(x) + loss = criterion(yhat, y) * mask + loss = torch.mean(loss) + loss.backward() + nn.utils.clip_grad_norm_(model.parameters(), 1.0) + optimizer.step() + epoch_loss += loss.item() + # get normal scale loss + yhat_scaled = torch.exp(yhat) - 1 + yhat_scaled = torch.round(yhat_scaled) + scaled_loss = torch.mean(torch.abs(yhat_scaled - y) * mask) + epoch_loss_scaled += scaled_loss.item() + return epoch_loss / len(loader), epoch_loss_scaled / len(loader) + + +def valid_epoch(model, loader, criterion, device): + model.eval() + epoch_loss = 0 + epoch_loss_scaled = 0 + acc = Accuracy() + for x, y, mask, _ in loader: + x, y, mask = x.to(device), y.to(device), mask.to(device) + yhat = model(x) + loss = criterion(yhat, y) * mask + loss = torch.mean(loss) + epoch_loss += loss.item() + # get normal scale loss + yhat_scaled = torch.exp(yhat) - 1 + yhat_scaled = torch.round(yhat_scaled) + scaled_loss = torch.sum(torch.abs(yhat_scaled - y) * mask) / mask.sum() + acc.update(yhat_scaled[mask].view(-1).float(), y[mask].view(-1).float()) + epoch_loss_scaled += scaled_loss.item() + logger.info(f"example y: {y[0, :10].tolist()}") + logger.info(f"example yhat: {yhat_scaled[0, :10].tolist()}") + acc0 = acc.acc(tol=0) + acc1 = acc.acc(tol=1) + acc2 = acc.acc(tol=2) + acc3 = acc.acc(tol=3) + logger.info(f"accs: {acc0,acc1,acc2,acc3}") + return epoch_loss / len(loader), epoch_loss_scaled / len(loader), acc0, acc1, acc2, acc3 + + +@hydra.main(config_path=".", config_name="duration_predictor.yaml") +def main(cfg): + logger.info(f"{cfg}") + train(cfg) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.yaml b/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.yaml new file mode 100644 index 0000000..0e976f4 --- /dev/null +++ b/fairseq/examples/emotion_conversion/emotion_models/duration_predictor.yaml @@ -0,0 +1,48 @@ +train_tsv: "/denoising/emov/train.tsv" +train_km: "/denoising/emov/train.km" +valid_tsv: "/denoising/emov/valid.tsv" +valid_km: "/denoising/emov/valid.km" + +n_tokens: 200 +batch_size: 32 +lr: 0.0001 +epochs: 300 +model: "cnn" +substring: "" + +rnn: + _target_: emotion_models.duration_predictor.RnnPredictor + n_tokens: ${n_tokens} + emb_dim: 128 + rnn_hidden: 128 + output_dim: 1 + dropout: 0 + n_layers: 1 + +optimizer: + _target_: torch.optim.Adam + lr: ${lr} + betas: [0.9, 0.98] + eps: 0.000000001 + weight_decay: 0 + +cnn: + _target_: emotion_models.duration_predictor.CnnPredictor + n_tokens: ${n_tokens} + emb_dim: 128 + channels: 256 + kernel: 3 + output_dim: 1 + dropout: 0.5 + n_layers: 1 + +hydra: + run: + dir: /checkpoint/felixkreuk/experiments/duration_predictor/${hydra.job.override_dirname} + job: + config: + # configuration for the ${hydra.job.override_dirname} runtime variable + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: ['train_tsv', 'train_km', 'valid_tsv', 'valid_km'] diff --git a/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.py b/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.py new file mode 100644 index 0000000..4314469 --- /dev/null +++ b/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.py @@ -0,0 +1,559 @@ +import logging +import os +import random +import sys +from collections import defaultdict + +import hydra +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange +from einops.layers.torch import Rearrange +from scipy.io.wavfile import read +from scipy.ndimage import gaussian_filter1d +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm + +dir_path = os.path.dirname(__file__) +resynth_path = os.path.dirname(dir_path) + "/speech-resynthesis" +sys.path.append(resynth_path) +from dataset import parse_speaker, parse_style +from .utils import F0Stat + +MAX_WAV_VALUE = 32768.0 +logger = logging.getLogger(__name__) + + +def quantize_f0(speaker_to_f0, nbins, normalize, log): + f0_all = [] + for speaker, f0 in speaker_to_f0.items(): + f0 = f0.raw_data + if log: + f0 = f0.log() + mean = speaker_to_f0[speaker].mean_log if log else speaker_to_f0[speaker].mean + std = speaker_to_f0[speaker].std_log if log else speaker_to_f0[speaker].std + if normalize == "mean": + f0 = f0 - mean + elif normalize == "meanstd": + f0 = (f0 - mean) / std + f0_all.extend(f0.tolist()) + + hist, bin_x = np.histogram(f0_all, 100000) + cum_hist = np.cumsum(hist) / len(f0_all) * 100 + + bin_offset = [] + bin_size = 100 / nbins + threshold = bin_size + for i in range(nbins - 1): + index = (np.abs(cum_hist - threshold)).argmin() + bin_offset.append(bin_x[index]) + threshold += bin_size + bins = np.array(bin_offset) + bins = torch.FloatTensor(bins) + + return bins + + +def save_ckpt(model, path, model_class, f0_min, f0_max, f0_bins, speaker_stats): + ckpt = { + "state_dict": model.state_dict(), + "padding_token": model.padding_token, + "model_class": model_class, + "speaker_stats": speaker_stats, + "f0_min": f0_min, + "f0_max": f0_max, + "f0_bins": f0_bins, + } + torch.save(ckpt, path) + + +def load_ckpt(path): + ckpt = torch.load(path) + ckpt["model_class"]["_target_"] = "emotion_models.pitch_predictor.CnnPredictor" + model = hydra.utils.instantiate(ckpt["model_class"]) + model.load_state_dict(ckpt["state_dict"]) + model.setup_f0_stats( + ckpt["f0_min"], + ckpt["f0_max"], + ckpt["f0_bins"], + ckpt["speaker_stats"], + ) + return model + + +def freq2bin(f0, f0_min, f0_max, bins): + f0 = f0.clone() + f0[f0 < f0_min] = f0_min + f0[f0 > f0_max] = f0_max + f0 = torch.bucketize(f0, bins) + return f0 + + +def bin2freq(x, f0_min, f0_max, bins, mode): + n_bins = len(bins) + 1 + assert x.shape[-1] == n_bins + bins = torch.cat([torch.tensor([f0_min]), bins]).to(x.device) + if mode == "mean": + f0 = (x * bins).sum(-1, keepdims=True) / x.sum(-1, keepdims=True) + elif mode == "argmax": + idx = F.one_hot(x.argmax(-1), num_classes=n_bins) + f0 = (idx * bins).sum(-1, keepdims=True) + else: + raise NotImplementedError() + return f0[..., 0] + + +def load_wav(full_path): + sampling_rate, data = read(full_path) + return data, sampling_rate + + +def l1_loss(input, target): + return F.l1_loss(input=input.float(), target=target.float(), reduce=False) + + +def l2_loss(input, target): + return F.mse_loss(input=input.float(), target=target.float(), reduce=False) + + +class Collator: + def __init__(self, padding_idx): + self.padding_idx = padding_idx + + def __call__(self, batch): + tokens = [item[0] for item in batch] + lengths = [len(item) for item in tokens] + tokens = torch.nn.utils.rnn.pad_sequence( + tokens, batch_first=True, padding_value=self.padding_idx + ) + f0 = [item[1] for item in batch] + f0 = torch.nn.utils.rnn.pad_sequence( + f0, batch_first=True, padding_value=self.padding_idx + ) + f0_raw = [item[2] for item in batch] + f0_raw = torch.nn.utils.rnn.pad_sequence( + f0_raw, batch_first=True, padding_value=self.padding_idx + ) + spk = [item[3] for item in batch] + spk = torch.LongTensor(spk) + gst = [item[4] for item in batch] + gst = torch.LongTensor(gst) + mask = tokens != self.padding_idx + return tokens, f0, f0_raw, spk, gst, mask, lengths + + +class CnnPredictor(nn.Module): + def __init__( + self, + n_tokens, + emb_dim, + channels, + kernel, + dropout, + n_layers, + spk_emb, + gst_emb, + n_bins, + f0_pred, + f0_log, + f0_norm, + ): + super(CnnPredictor, self).__init__() + self.n_tokens = n_tokens + self.emb_dim = emb_dim + self.f0_log = f0_log + self.f0_pred = f0_pred + self.padding_token = n_tokens + self.f0_norm = f0_norm + # add 1 extra embedding for padding token, set the padding index to be the last token + # (tokens from the clustering start at index 0) + self.token_emb = nn.Embedding( + n_tokens + 1, emb_dim, padding_idx=self.padding_token + ) + + self.spk_emb = spk_emb + self.gst_emb = nn.Embedding(20, gst_emb) + self.setup = False + + feats = emb_dim + gst_emb + # feats = emb_dim + gst_emb + (256 if spk_emb else 0) + layers = [ + nn.Sequential( + Rearrange("b t c -> b c t"), + nn.Conv1d( + feats, channels, kernel_size=kernel, padding=(kernel - 1) // 2 + ), + Rearrange("b c t -> b t c"), + nn.ReLU(), + nn.LayerNorm(channels), + nn.Dropout(dropout), + ) + ] + for _ in range(n_layers - 1): + layers += [ + nn.Sequential( + Rearrange("b t c -> b c t"), + nn.Conv1d( + channels, + channels, + kernel_size=kernel, + padding=(kernel - 1) // 2, + ), + Rearrange("b c t -> b t c"), + nn.ReLU(), + nn.LayerNorm(channels), + nn.Dropout(dropout), + ) + ] + self.conv_layer = nn.ModuleList(layers) + self.proj = nn.Linear(channels, n_bins) + + def forward(self, x, gst=None): + x = self.token_emb(x) + feats = [x] + + if gst is not None: + gst = self.gst_emb(gst) + gst = rearrange(gst, "b c -> b c 1") + gst = F.interpolate(gst, x.shape[1]) + gst = rearrange(gst, "b c t -> b t c") + feats.append(gst) + + x = torch.cat(feats, dim=-1) + + for i, conv in enumerate(self.conv_layer): + if i != 0: + x = conv(x) + x + else: + x = conv(x) + + x = self.proj(x) + x = x.squeeze(-1) + + if self.f0_pred == "mean": + x = torch.sigmoid(x) + elif self.f0_pred == "argmax": + x = torch.softmax(x, dim=-1) + else: + raise NotImplementedError + return x + + def setup_f0_stats(self, f0_min, f0_max, f0_bins, speaker_stats): + self.f0_min = f0_min + self.f0_max = f0_max + self.f0_bins = f0_bins + self.speaker_stats = speaker_stats + self.setup = True + + def inference(self, x, spk_id=None, gst=None): + assert ( + self.setup == True + ), "make sure that `setup_f0_stats` was called before inference!" + probs = self(x, gst) + f0 = bin2freq(probs, self.f0_min, self.f0_max, self.f0_bins, self.f0_pred) + for i in range(f0.shape[0]): + mean = ( + self.speaker_stats[spk_id[i].item()].mean_log + if self.f0_log + else self.speaker_stats[spk_id[i].item()].mean + ) + std = ( + self.speaker_stats[spk_id[i].item()].std_log + if self.f0_log + else self.speaker_stats[spk_id[i].item()].std + ) + if self.f0_norm == "mean": + f0[i] = f0[i] + mean + if self.f0_norm == "meanstd": + f0[i] = (f0[i] * std) + mean + if self.f0_log: + f0 = f0.exp() + return f0 + + +class PitchDataset(Dataset): + def __init__( + self, + tsv_path, + km_path, + substring, + spk, + spk2id, + gst, + gst2id, + f0_bins, + f0_bin_type, + f0_smoothing, + f0_norm, + f0_log, + ): + lines = open(tsv_path, "r").readlines() + self.root, self.tsv = lines[0], lines[1:] + self.root = self.root.strip() + self.km = open(km_path, "r").readlines() + print(f"loaded {len(self.km)} files") + + self.spk = spk + self.spk2id = spk2id + self.gst = gst + self.gst2id = gst2id + + self.f0_bins = f0_bins + self.f0_smoothing = f0_smoothing + self.f0_norm = f0_norm + self.f0_log = f0_log + + if substring != "": + tsv, km = [], [] + for tsv_line, km_line in zip(self.tsv, self.km): + if substring.lower() in tsv_line.lower(): + tsv.append(tsv_line) + km.append(km_line) + self.tsv, self.km = tsv, km + print(f"after filtering: {len(self.km)} files") + + self.speaker_stats = self._compute_f0_stats() + self.f0_min, self.f0_max = self._compute_f0_minmax() + if f0_bin_type == "adaptive": + self.f0_bins = quantize_f0( + self.speaker_stats, self.f0_bins, self.f0_norm, self.f0_log + ) + elif f0_bin_type == "uniform": + self.f0_bins = torch.linspace(self.f0_min, self.f0_max, self.f0_bins + 1)[ + 1:-1 + ] + else: + raise NotImplementedError + print(f"f0 min: {self.f0_min}, f0 max: {self.f0_max}") + print(f"bins: {self.f0_bins} (shape: {self.f0_bins.shape})") + + def __len__(self): + return len(self.km) + + def _load_f0(self, tsv_line): + tsv_line = tsv_line.split("\t")[0] + f0 = self.root + "/" + tsv_line.replace(".wav", ".yaapt.f0.npy") + f0 = np.load(f0) + f0 = torch.FloatTensor(f0) + return f0 + + def _preprocess_f0(self, f0, spk): + mask = f0 != -999999 # process all frames + # mask = (f0 != 0) # only process voiced frames + mean = ( + self.speaker_stats[spk].mean_log + if self.f0_log + else self.speaker_stats[spk].mean + ) + std = ( + self.speaker_stats[spk].std_log + if self.f0_log + else self.speaker_stats[spk].std + ) + if self.f0_log: + f0[f0 == 0] = 1e-5 + f0[mask] = f0[mask].log() + if self.f0_norm == "mean": + f0[mask] = f0[mask] - mean + if self.f0_norm == "meanstd": + f0[mask] = (f0[mask] - mean) / std + return f0 + + def _compute_f0_minmax(self): + f0_min, f0_max = float("inf"), -float("inf") + for tsv_line in tqdm(self.tsv, desc="computing f0 minmax"): + spk = self.spk2id[parse_speaker(tsv_line, self.spk)] + f0 = self._load_f0(tsv_line) + f0 = self._preprocess_f0(f0, spk) + f0_min = min(f0_min, f0.min().item()) + f0_max = max(f0_max, f0.max().item()) + return f0_min, f0_max + + def _compute_f0_stats(self): + from functools import partial + + speaker_stats = defaultdict(partial(F0Stat, True)) + for tsv_line in tqdm(self.tsv, desc="computing speaker stats"): + spk = self.spk2id[parse_speaker(tsv_line, self.spk)] + f0 = self._load_f0(tsv_line) + mask = f0 != 0 + f0 = f0[mask] # compute stats only on voiced parts + speaker_stats[spk].update(f0) + return speaker_stats + + def __getitem__(self, i): + x = self.km[i] + x = x.split(" ") + x = list(map(int, x)) + x = torch.LongTensor(x) + + gst = parse_style(self.tsv[i], self.gst) + gst = self.gst2id[gst] + spk = parse_speaker(self.tsv[i], self.spk) + spk = self.spk2id[spk] + + f0_raw = self._load_f0(self.tsv[i]) + f0 = self._preprocess_f0(f0_raw.clone(), spk) + + f0 = F.interpolate(f0.unsqueeze(0).unsqueeze(0), x.shape[0])[0, 0] + f0_raw = F.interpolate(f0_raw.unsqueeze(0).unsqueeze(0), x.shape[0])[0, 0] + + f0 = freq2bin(f0, f0_min=self.f0_min, f0_max=self.f0_max, bins=self.f0_bins) + f0 = F.one_hot(f0.long(), num_classes=len(self.f0_bins) + 1).float() + if self.f0_smoothing > 0: + f0 = torch.tensor( + gaussian_filter1d(f0.float().numpy(), sigma=self.f0_smoothing) + ) + return x, f0, f0_raw, spk, gst + + +def train(cfg): + device = "cuda:0" + # add 1 extra embedding for padding token, set the padding index to be the last token + # (tokens from the clustering start at index 0) + padding_token = cfg.n_tokens + collate_fn = Collator(padding_idx=padding_token) + train_ds = PitchDataset( + cfg.train_tsv, + cfg.train_km, + substring=cfg.substring, + spk=cfg.spk, + spk2id=cfg.spk2id, + gst=cfg.gst, + gst2id=cfg.gst2id, + f0_bins=cfg.f0_bins, + f0_bin_type=cfg.f0_bin_type, + f0_smoothing=cfg.f0_smoothing, + f0_norm=cfg.f0_norm, + f0_log=cfg.f0_log, + ) + valid_ds = PitchDataset( + cfg.valid_tsv, + cfg.valid_km, + substring=cfg.substring, + spk=cfg.spk, + spk2id=cfg.spk2id, + gst=cfg.gst, + gst2id=cfg.gst2id, + f0_bins=cfg.f0_bins, + f0_bin_type=cfg.f0_bin_type, + f0_smoothing=cfg.f0_smoothing, + f0_norm=cfg.f0_norm, + f0_log=cfg.f0_log, + ) + train_dl = DataLoader( + train_ds, + num_workers=0, + batch_size=cfg.batch_size, + shuffle=True, + collate_fn=collate_fn, + ) + valid_dl = DataLoader( + valid_ds, num_workers=0, batch_size=16, shuffle=False, collate_fn=collate_fn + ) + + f0_min = train_ds.f0_min + f0_max = train_ds.f0_max + f0_bins = train_ds.f0_bins + speaker_stats = train_ds.speaker_stats + + model = hydra.utils.instantiate(cfg["model"]).to(device) + model.setup_f0_stats(f0_min, f0_max, f0_bins, speaker_stats) + + optimizer = hydra.utils.instantiate(cfg.optimizer, model.parameters()) + + best_loss = float("inf") + for epoch in range(cfg.epochs): + train_loss, train_l2_loss, train_l2_voiced_loss = run_epoch( + model, train_dl, optimizer, device, cfg, mode="train" + ) + valid_loss, valid_l2_loss, valid_l2_voiced_loss = run_epoch( + model, valid_dl, None, device, cfg, mode="valid" + ) + print( + f"[epoch {epoch}] train loss: {train_loss:.3f}, l2 loss: {train_l2_loss:.3f}, l2 voiced loss: {train_l2_voiced_loss:.3f}" + ) + print( + f"[epoch {epoch}] valid loss: {valid_loss:.3f}, l2 loss: {valid_l2_loss:.3f}, l2 voiced loss: {valid_l2_voiced_loss:.3f}" + ) + if valid_l2_voiced_loss < best_loss: + path = f"{os.getcwd()}/pitch_predictor.ckpt" + save_ckpt(model, path, cfg["model"], f0_min, f0_max, f0_bins, speaker_stats) + best_loss = valid_l2_voiced_loss + print(f"saved checkpoint: {path}") + print(f"[epoch {epoch}] best loss: {best_loss:.3f}") + + +def run_epoch(model, loader, optimizer, device, cfg, mode): + if mode == "train": + model.train() + else: + model.eval() + + epoch_loss = 0 + l1 = 0 + l1_voiced = 0 + for x, f0_bin, f0_raw, spk_id, gst, mask, _ in tqdm(loader): + x, f0_bin, f0_raw, spk_id, gst, mask = ( + x.to(device), + f0_bin.to(device), + f0_raw.to(device), + spk_id.to(device), + gst.to(device), + mask.to(device), + ) + b, t, n_bins = f0_bin.shape + yhat = model(x, gst) + nonzero_mask = (f0_raw != 0).logical_and(mask) + yhat_raw = model.inference(x, spk_id, gst) + expanded_mask = mask.unsqueeze(-1).expand(-1, -1, n_bins) + if cfg.f0_pred == "mean": + loss = F.binary_cross_entropy( + yhat[expanded_mask], f0_bin[expanded_mask] + ).mean() + elif cfg.f0_pred == "argmax": + loss = F.cross_entropy( + rearrange(yhat, "b t d -> (b t) d"), + rearrange(f0_bin.argmax(-1), "b t -> (b t)"), + reduce=False, + ) + loss = rearrange(loss, "(b t) -> b t", b=b, t=t) + loss = (loss * mask).sum() / mask.float().sum() + else: + raise NotImplementedError + l1 += F.l1_loss(yhat_raw[mask], f0_raw[mask]).item() + l1_voiced += F.l1_loss(yhat_raw[nonzero_mask], f0_raw[nonzero_mask]).item() + epoch_loss += loss.item() + + if mode == "train": + loss.backward() + nn.utils.clip_grad_norm_(model.parameters(), 1.0) + optimizer.step() + + print(f"{mode} example y: {f0_bin.argmax(-1)[0, 50:60].tolist()}") + print(f"{mode} example yhat: {yhat.argmax(-1)[0, 50:60].tolist()}") + print(f"{mode} example y: {f0_raw[0, 50:60].round().tolist()}") + print(f"{mode} example yhat: {yhat_raw[0, 50:60].round().tolist()}") + return epoch_loss / len(loader), l1 / len(loader), l1_voiced / len(loader) + + +@hydra.main(config_path=dir_path, config_name="pitch_predictor.yaml") +def main(cfg): + np.random.seed(1) + random.seed(1) + torch.manual_seed(1) + from hydra.core.hydra_config import HydraConfig + + overrides = { + x.split("=")[0]: x.split("=")[1] + for x in HydraConfig.get().overrides.task + if "/" not in x + } + print(f"{cfg}") + train(cfg) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.yaml b/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.yaml new file mode 100644 index 0000000..d2dbb86 --- /dev/null +++ b/fairseq/examples/emotion_conversion/emotion_models/pitch_predictor.yaml @@ -0,0 +1,64 @@ +train_tsv: "/denoising/emov/train.tsv" +train_km: "/denoising/emov/train.km" +valid_tsv: "/denoising/emov/valid.tsv" +valid_km: "/denoising/emov/valid.km" + +n_tokens: 200 +batch_size: 64 +lr: 0.0001 +epochs: 1000 + +substring: "" +loss: "l2" +spk: "parent_parent_name" +gst: "emotion" + +f0_bins: 50 +f0_pred: "mean" # [argmax, mean] +f0_smoothing: 0.1 +f0_norm: "mean" +f0_log: false +f0_bin_type: "adaptive" # [uniform, adaptive] + +spk2id: + bea: 0 + jenie: 1 + josh: 2 + sam: 3 + +gst2id: + amused: 0 + angry: 1 + disgusted: 2 + neutral: 3 + sleepy: 4 + +optimizer: + _target_: torch.optim.Adam + lr: ${lr} + +model: + _target_: emotion_models.pitch_predictor.CnnPredictor + n_tokens: ${n_tokens} + emb_dim: 256 + channels: 256 + kernel: 5 + dropout: 0.1 + n_layers: 6 + spk_emb: true + gst_emb: 8 + n_bins: ${f0_bins} + f0_pred: ${f0_pred} + f0_log: ${f0_log} + f0_norm: ${f0_norm} + +hydra: + run: + dir: /checkpoint/felixkreuk/experiments/pitch_predictor/${hydra.job.override_dirname} + job: + config: + # configuration for the ${hydra.job.override_dirname} runtime variable + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: ['train_tsv', 'train_km', 'valid_tsv', 'valid_km'] diff --git a/fairseq/examples/emotion_conversion/emotion_models/utils.py b/fairseq/examples/emotion_conversion/emotion_models/utils.py new file mode 100644 index 0000000..4199c31 --- /dev/null +++ b/fairseq/examples/emotion_conversion/emotion_models/utils.py @@ -0,0 +1,78 @@ +import torch + + +class Stat: + def __init__(self, keep_raw=False): + self.x = 0.0 + self.x2 = 0.0 + self.z = 0.0 # z = logx + self.z2 = 0.0 + self.n = 0.0 + self.u = 0.0 + self.keep_raw = keep_raw + self.raw = [] + + def update(self, new_x): + new_z = new_x.log() + + self.x += new_x.sum() + self.x2 += (new_x**2).sum() + self.z += new_z.sum() + self.z2 += (new_z**2).sum() + self.n += len(new_x) + self.u += 1 + + if self.keep_raw: + self.raw.append(new_x) + + @property + def mean(self): + return self.x / self.n + + @property + def std(self): + return (self.x2 / self.n - self.mean**2) ** 0.5 + + @property + def mean_log(self): + return self.z / self.n + + @property + def std_log(self): + return (self.z2 / self.n - self.mean_log**2) ** 0.5 + + @property + def n_frms(self): + return self.n + + @property + def n_utts(self): + return self.u + + @property + def raw_data(self): + assert self.keep_raw, "does not support storing raw data!" + return torch.cat(self.raw) + + +class F0Stat(Stat): + def update(self, new_x): + # assume unvoiced frames are 0 and consider only voiced frames + if new_x is not None: + super().update(new_x[new_x != 0]) + + +class Accuracy: + def __init__(self): + self.y, self.yhat = [], [] + + def update(self, yhat, y): + self.yhat.append(yhat) + self.y.append(y) + + def acc(self, tol): + yhat = torch.cat(self.yhat) + y = torch.cat(self.y) + acc = torch.abs(yhat - y) <= tol + acc = acc.float().mean().item() + return acc diff --git a/fairseq/examples/emotion_conversion/fairseq_models/__init__.py b/fairseq/examples/emotion_conversion/fairseq_models/__init__.py new file mode 100644 index 0000000..441bc03 --- /dev/null +++ b/fairseq/examples/emotion_conversion/fairseq_models/__init__.py @@ -0,0 +1,226 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import utils +from fairseq.models import ( + FairseqMultiModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import ( + Embedding, + base_architecture, +) +from fairseq.models.multilingual_transformer import ( + MultilingualTransformerModel, + base_multilingual_architecture, +) +from fairseq.utils import safe_hasattr +from collections import OrderedDict + + +@register_model("multilingual_transformer_from_mbart") +class MultilingualTransformerModelFromMbart(MultilingualTransformerModel): + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + from fairseq.tasks.multilingual_translation import MultilingualTranslationTask + + assert isinstance(task, MultilingualTranslationTask) + + # make sure all arguments are present in older models + base_multilingual_architecture(args) + + if not safe_hasattr(args, "max_source_positions"): + args.max_source_positions = 1024 + if not safe_hasattr(args, "max_target_positions"): + args.max_target_positions = 1024 + + src_langs = [lang_pair.split("-")[0] for lang_pair in task.model_lang_pairs] + tgt_langs = [lang_pair.split("-")[1] for lang_pair in task.model_lang_pairs] + + if args.share_encoders: + args.share_encoder_embeddings = True + if args.share_decoders: + args.share_decoder_embeddings = True + + def build_embedding(dictionary, embed_dim, path=None): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + # build shared embeddings (if applicable) + shared_encoder_embed_tokens, shared_decoder_embed_tokens = None, None + if args.share_all_embeddings: + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=task.langs, + embed_dim=args.encoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.encoder_embed_path, + ) + shared_decoder_embed_tokens = shared_encoder_embed_tokens + args.share_decoder_input_output_embed = True + else: + if args.share_encoder_embeddings: + shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=src_langs, + embed_dim=args.encoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.encoder_embed_path, + ) + if args.share_decoder_embeddings: + shared_decoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=tgt_langs, + embed_dim=args.decoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.decoder_embed_path, + ) + + # encoders/decoders for each language + lang_encoders, lang_decoders = {}, {} + + def get_encoder(lang): + if lang not in lang_encoders: + if shared_encoder_embed_tokens is not None: + encoder_embed_tokens = shared_encoder_embed_tokens + else: + encoder_embed_tokens = build_embedding( + task.dicts[lang], + args.encoder_embed_dim, + args.encoder_embed_path, + ) + lang_encoders[lang] = MultilingualTransformerModel._get_module_class( + True, args, task.dicts[lang], encoder_embed_tokens, src_langs + ) + return lang_encoders[lang] + + def get_decoder(lang): + if lang not in lang_decoders: + if shared_decoder_embed_tokens is not None: + decoder_embed_tokens = shared_decoder_embed_tokens + else: + decoder_embed_tokens = build_embedding( + task.dicts[lang], + args.decoder_embed_dim, + args.decoder_embed_path, + ) + lang_decoders[lang] = MultilingualTransformerModel._get_module_class( + False, args, task.dicts[lang], decoder_embed_tokens, tgt_langs + ) + return lang_decoders[lang] + + # shared encoders/decoders (if applicable) + shared_encoder, shared_decoder = None, None + if args.share_encoders: + shared_encoder = get_encoder(src_langs[0]) + if args.share_decoders: + shared_decoder = get_decoder(tgt_langs[0]) + + encoders, decoders = OrderedDict(), OrderedDict() + for lang_pair, src, tgt in zip(task.model_lang_pairs, src_langs, tgt_langs): + encoders[lang_pair] = ( + shared_encoder if shared_encoder is not None else get_encoder(src) + ) + decoders[lang_pair] = ( + shared_decoder if shared_decoder is not None else get_decoder(tgt) + ) + + return MultilingualTransformerModelFromMbart(encoders, decoders) + + def load_state_dict(self, state_dict, strict=True, model_cfg=None): + state_dict_subset = state_dict.copy() + lang_pairs = set([x.split(".")[1] for x in state_dict.keys()]) + finetune_mode = not any("neutral" in lp for lp in lang_pairs) + + if finetune_mode: + # load a pre-trained mBART/BART model + # we need this code because mBART/BART are not of type FairseqMultiModel but FairseqModel + # so we hackishly load the weights by replicating them for all lang pairs + print("loading pre-trained BART") + self_state_dict = self.state_dict() + for k, v in state_dict.items(): + for lang_pair in self.models: + new_key = k if "models." in k else f"models.{lang_pair}.{k}" + # print(new_key) + if self_state_dict[new_key].shape == v.shape: + state_dict_subset[new_key] = v + elif any( + w in k + for w in [ + "encoder.embed_tokens.weight", + "decoder.embed_tokens.weight", + "decoder.output_projection.weight", + ] + ): + # why vocab_size - 5? because there are `vocab_size` tokens from the language + # and 5 additional tokens in the denoising task: eos,bos,pad,unk,mask. + # but in the translation task there are only `vocab_size` + 4 (no mask). + print( + f"{k}: {self_state_dict[new_key].shape} != {v.shape}", + end="", + flush=True, + ) + vocab_size = v.shape[0] - 5 + state_dict_subset[new_key] = self_state_dict[new_key] + state_dict_subset[new_key] = v[: vocab_size + 4] + print(f" => fixed by using first {vocab_size + 4} dims") + else: + raise ValueError("unable to load model due to mimatched dims!") + del state_dict_subset[k] + else: + print("loading pre-trained emotion translation model") + for k, _ in state_dict.items(): + assert k.startswith("models.") + lang_pair = k.split(".")[1] + if lang_pair not in self.models: + del state_dict_subset[k] + + super().load_state_dict(state_dict_subset, strict=strict, model_cfg=model_cfg) + + +@register_model_architecture("transformer", "transformer_small") +def transformer_small(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 3) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 512) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 3) + base_architecture(args) + + +@register_model_architecture( + "multilingual_transformer_from_mbart", "multilingual_small" +) +def multilingual_small(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 3) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 512) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 3) + base_multilingual_architecture(args) diff --git a/fairseq/examples/emotion_conversion/preprocess/__init__.py b/fairseq/examples/emotion_conversion/preprocess/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/emotion_conversion/preprocess/build_hifigan_manifest.py b/fairseq/examples/emotion_conversion/preprocess/build_hifigan_manifest.py new file mode 100644 index 0000000..29c0d79 --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/build_hifigan_manifest.py @@ -0,0 +1,38 @@ +import torchaudio +import argparse +import json + +def main(): + parser = argparse.ArgumentParser(description="example: python create_hifigan_manifest.py --tsv /checkpoint/felixkreuk/datasets/vctk/splits/vctk_16khz/train.tsv --km /checkpoint/felixkreuk/experiments/hubert/hubert_feats/vctk_16khz_km_100/train.km --km_type hubert_100km > ~/tmp/tmp_mani.txt") + parser.add_argument("--tsv", required=True, help="path to fairseq tsv file") + parser.add_argument("--km", required=True, help="path to a km file generated by HuBERT clustering") + parser.add_argument("--km_type", required=True, help="name of the codes in the output json (for example: 'cpc_100km')") + args = parser.parse_args() + + km_lines = open(args.km, "r").readlines() + tsv_lines = open(args.tsv, "r").readlines() + assert len(km_lines) == len(tsv_lines) - 1, "tsv and km files are not of the same length!" + + wav_root = tsv_lines[0].strip() + tsv_lines = tsv_lines[1:] + + for tsv_line, km_line in zip(tsv_lines, km_lines): + tsv_line, km_line = tsv_line.strip(), km_line.strip() + wav_basename, wav_num_frames = tsv_line.split("\t") + wav_path = wav_root + "/" + wav_basename + wav_info = torchaudio.info(wav_path) + assert int(wav_num_frames) == wav_info.num_frames, "tsv duration and actual duration don't match!" + wav_duration = wav_info.num_frames / wav_info.sample_rate + manifest_line = {"audio": wav_path, "duration": wav_duration, args.km_type: km_line} + print(json.dumps(manifest_line)) + +if __name__ == "__main__": + """ + usage: + python create_hifigan_manifest.py \ + --tsv /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/valid.tsv \ + --km /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/valid.km \ + --km_type hubert \ + > /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/hifigan_valid_manifest.txt + """ + main() diff --git a/fairseq/examples/emotion_conversion/preprocess/build_translation_manifests.py b/fairseq/examples/emotion_conversion/preprocess/build_translation_manifests.py new file mode 100644 index 0000000..d38454a --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/build_translation_manifests.py @@ -0,0 +1,258 @@ +from glob import glob +import argparse +from collections import defaultdict, Counter +from itertools import combinations, product, groupby +from pathlib import Path +import os +from sklearn.utils import shuffle +import numpy as np +import random +from shutil import copy +from subprocess import check_call + +np.random.seed(42) +random.seed(42) + + +def get_fname(s): + return s.split("\t")[0] + +def get_emotion(s): + return get_fname(s).split("_")[0].split("/")[1].lower() + +def get_utt_id(s): + return get_fname(s).split(".")[0].split("_")[-1] + +def dedup(seq): + """ >> remove_repetitions("1 2 2 3 100 2 2 1") + '1 2 3 100 2 1' """ + seq = seq.strip().split(" ") + result = seq[:1] + reps = [] + rep_counter = 1 + for k in seq[1:]: + if k != result[-1]: + result += [k] + reps += [rep_counter] + rep_counter = 1 + else: + rep_counter += 1 + reps += [rep_counter] + assert len(reps) == len(result) and sum(reps) == len(seq) + return " ".join(result) + "\n" #, reps + +def remove_under_k(seq, k): + """ remove tokens that repeat less then k times in a row + >> remove_under_k("a a a a b c c c", 1) ==> a a a a c c c """ + seq = seq.strip().split(" ") + result = [] + + freqs = [(k,len(list(g))) for k, g in groupby(seq)] + for c, f in freqs: + if f > k: + result += [c for _ in range(f)] + return " ".join(result) + "\n" #, reps + + +def call(cmd): + print(cmd) + check_call(cmd, shell=True) + + +def denoising_preprocess(path, lang, dict): + bin = 'fairseq-preprocess' + cmd = [ + bin, + f'--trainpref {path}/train.{lang} --validpref {path}/valid.{lang} --testpref {path}/test.{lang}', + f'--destdir {path}/tokenized/{lang}', + '--only-source', + '--task multilingual_denoising', + '--workers 40', + ] + if dict != "": + cmd += [f'--srcdict {dict}'] + cmd = " ".join(cmd) + call(cmd) + + +def translation_preprocess(path, src_lang, trg_lang, dict, only_train=False): + bin = 'fairseq-preprocess' + cmd = [ + bin, + f'--source-lang {src_lang} --target-lang {trg_lang}', + f'--trainpref {path}/train', + f'--destdir {path}/tokenized', + '--workers 40', + ] + if not only_train: + cmd += [f'--validpref {path}/valid --testpref {path}/test'] + if dict != "": + cmd += [ + f'--srcdict {dict}', + f'--tgtdict {dict}', + ] + cmd = " ".join(cmd) + call(cmd) + + +def load_tsv_km(tsv_path, km_path): + assert tsv_path.exists() and km_path.exists() + tsv_lines = open(tsv_path, "r").readlines() + root, tsv_lines = tsv_lines[0], tsv_lines[1:] + km_lines = open(km_path, "r").readlines() + assert len(tsv_lines) == len(km_lines), ".tsv and .km should be the same length!" + return root, tsv_lines, km_lines + + +def main(): + desc = """ + this script takes as input .tsv and .km files for EMOV dataset, and a pairs of emotions. + it generates parallel .tsv and .km files for these emotions. for exmaple: + ❯ python build_emov_translation_manifests.py \ + /checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/train.tsv \ + /checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/emov_16khz_km_100/train.km \ + ~/tmp/emov_pairs \ + --src-emotion amused --trg-emotion neutral \ + --dedup --shuffle --cross-speaker --dry-run + """ + parser = argparse.ArgumentParser(description=desc) + parser.add_argument("data", type=Path, help="path to a dir containing .tsv and .km files containing emov dataset") + parser.add_argument("output_path", type=Path, help="output directory with the manifests will be created") + parser.add_argument("-cs", "--cross-speaker", action='store_true', help="if set then translation will occur also between speakers, meaning the same sentence can be translated between different speakers (default: false)") + parser.add_argument("-dd", "--dedup", action='store_true', help="remove repeated tokens (example: 'aaabc=>abc')") + parser.add_argument("-sh", "--shuffle", action='store_true', help="shuffle the data") + parser.add_argument("-ae", "--autoencode", action='store_true', help="include training pairs from the same emotion (this includes examples of the same sentence uttered by different people and examples where the src and trg are the exact same seq)") + parser.add_argument("-dr", "--dry-run", action='store_true', help="don't write anything to disk") + parser.add_argument("-zs", "--zero-shot", action='store_true', help="if true, the denoising task will train on the same splits as the translation task (split by utterance id). if false, the denoising task will train on randomly sampled splits (not split by utterance id)") + parser.add_argument("--km-ext", default="km", help="") + parser.add_argument("--dict", default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/fairseq.dict.txt", help="") + args = parser.parse_args() + SPEAKERS = ["bea", "jenie", "josh", "sam", "SAME"] + EMOTIONS = ['neutral', 'amused', 'angry', 'disgusted', 'sleepy'] + + suffix = "" + if args.cross_speaker: suffix += "_cross-speaker" + if args.dedup: suffix += "_dedup" + translation_suffix = "" + if args.autoencode: translation_suffix += "_autoencode" + denoising_suffix = "" + denoising_suffix += "_zeroshot" if args.zero_shot else "_nonzeroshot" + + translation_dir = Path(args.output_path) / ("emov_multilingual_translation" + suffix + translation_suffix) + os.makedirs(translation_dir, exist_ok=True) + denoising_dir = Path(args.output_path) / ("emov_multilingual_denoising" + suffix + denoising_suffix) + os.makedirs(denoising_dir, exist_ok=True) + + denoising_data = [p.name for p in (args.data / "denoising").glob("*") if "emov" not in p.name] + + for split in ["train", "valid", "test"]: + root, tsv_lines, km_lines = load_tsv_km( + tsv_path = args.data / "denoising" / "emov" / f"{split}.tsv", + km_path = args.data / "denoising" / "emov" / f"{split}.{args.km_ext}" + ) + + # generate data for the multilingual denoising task + for EMOTION in EMOTIONS: + print("---") + print(split) + print(f"denoising: {EMOTION}") + emotion_tsv, emotion_km = [], [] + for tsv_line, km_line in zip(tsv_lines, km_lines): + if EMOTION.lower() in tsv_line.lower(): + km_line = km_line if not args.dedup else dedup(km_line) + emotion_tsv.append(tsv_line) + emotion_km.append(km_line) + print(f"{len(emotion_km)} samples") + open(denoising_dir / f"files.{split}.{EMOTION}", "w").writelines([root] + emotion_tsv) + open(denoising_dir / f"{split}.{EMOTION}", "w").writelines(emotion_km) + + for data in denoising_data: + with open(args.data / "denoising" / data / f"{split}.{args.km_ext}", "r") as f1: + with open(denoising_dir / f"{split}.{data}", "w") as f2: + f2.writelines([l if not args.dedup else dedup(l) for l in f1.readlines()]) + + # start of translation preprocessing + root, tsv_lines, km_lines = load_tsv_km( + tsv_path = args.data / "translation" / f"{split}.tsv", + km_path = args.data / "translation" / f"{split}.{args.km_ext}" + ) + + # generate data for the multilingual translation task + for SRC_EMOTION in EMOTIONS: + TRG_EMOTIONS = EMOTIONS if args.autoencode else set(EMOTIONS) - set([SRC_EMOTION]) + for TRG_EMOTION in TRG_EMOTIONS: + # when translating back to the same emotion - we dont want these emotion + # pairs to be part of the validation/test sets (because its not really emotion conversino) + # if SRC_EMOTION == TRG_EMOTION and split in ["valid", "test"]: continue + print("---") + print(split) + print(f"src emotions: {SRC_EMOTION}\ntrg emotions: {TRG_EMOTION}") + + # create a dictionary with the following structure: + # output[SPEAKER][UTT_ID] = list with indexes of line from the tsv file + # that match the speaker and utterance id. for exmaple: + # output = {'sam': {'0493': [875, 1608, 1822], ...}, ...} + # meaning, for speaker 'sam', utterance id '0493', the indexes in tsv_lines + # are 875, 1608, 1822 + spkr2utts = defaultdict(lambda: defaultdict(list)) + for i, tsv_line in enumerate(tsv_lines): + speaker = tsv_line.split("/")[0] + if args.cross_speaker: speaker = "SAME" + assert speaker in SPEAKERS, "unknown speaker! make sure the .tsv contains EMOV data" + utt_id = get_utt_id(tsv_line) + spkr2utts[speaker][utt_id].append(i) + + # create a tsv and km files with all the combinations for translation + src_tsv, trg_tsv, src_km, trg_km = [], [], [], [] + for speaker, utt_ids in spkr2utts.items(): + for utt_id, indices in utt_ids.items(): + # generate all pairs + pairs = [(x,y) for x in indices for y in indices] + # self-translation + if SRC_EMOTION == TRG_EMOTION: + pairs = [(x,y) for (x,y) in pairs if x == y] + # filter according to src and trg emotions + pairs = [(x,y) for (x,y) in pairs + if get_emotion(tsv_lines[x]) == SRC_EMOTION and get_emotion(tsv_lines[y]) == TRG_EMOTION] + + for idx1, idx2 in pairs: + assert get_utt_id(tsv_lines[idx1]) == get_utt_id(tsv_lines[idx2]) + src_tsv.append(tsv_lines[idx1]) + trg_tsv.append(tsv_lines[idx2]) + km_line_idx1 = km_lines[idx1] + km_line_idx2 = km_lines[idx2] + km_line_idx1 = km_line_idx1 if not args.dedup else dedup(km_line_idx1) + km_line_idx2 = km_line_idx2 if not args.dedup else dedup(km_line_idx2) + src_km.append(km_line_idx1) + trg_km.append(km_line_idx2) + assert len(src_tsv) == len(trg_tsv) == len(src_km) == len(trg_km) + print(f"{len(src_tsv)} pairs") + + if len(src_tsv) == 0: + raise Exception("ERROR: generated 0 pairs!") + + if args.dry_run: continue + + # create files + os.makedirs(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}", exist_ok=True) + open(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}" / f"files.{split}.{SRC_EMOTION}", "w").writelines([root] + src_tsv) + open(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}" / f"files.{split}.{TRG_EMOTION}", "w").writelines([root] + trg_tsv) + open(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}" / f"{split}.{SRC_EMOTION}", "w").writelines(src_km) + open(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}" / f"{split}.{TRG_EMOTION}", "w").writelines(trg_km) + + + # fairseq-preprocess the denoising data + for EMOTION in EMOTIONS + denoising_data: + denoising_preprocess(denoising_dir, EMOTION, args.dict) + os.system(f"cp {args.dict} {denoising_dir}/tokenized/dict.txt") + + # fairseq-preprocess the translation data + os.makedirs(translation_dir / "tokenized", exist_ok=True) + for SRC_EMOTION in EMOTIONS: + TRG_EMOTIONS = EMOTIONS if args.autoencode else set(EMOTIONS) - set([SRC_EMOTION]) + for TRG_EMOTION in TRG_EMOTIONS: + translation_preprocess(translation_dir / f"{SRC_EMOTION}-{TRG_EMOTION}", SRC_EMOTION, TRG_EMOTION, args.dict)#, only_train=SRC_EMOTION==TRG_EMOTION) + os.system(f"cp -rf {translation_dir}/**/tokenized/* {translation_dir}/tokenized") + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/emotion_conversion/preprocess/create_core_manifest.py b/fairseq/examples/emotion_conversion/preprocess/create_core_manifest.py new file mode 100644 index 0000000..b55740e --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/create_core_manifest.py @@ -0,0 +1,91 @@ +from pathlib import Path +import os +import sys +import subprocess +import argparse +from datetime import datetime +import logging + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(message)s', + handlers=[logging.FileHandler('debug.log'), logging.StreamHandler()] +) +logger = logging.getLogger(__name__) + + +def verify_dict_size(km, dict): + logger.info(f"verifying: {km}") + dict_size = len(open(dict, "r").readlines()) + km_vocab = set(open(km, "r").read().replace("\n", " ").split(" ")) + if "" in km_vocab: km_vocab.remove("") + km_vocab_size = len(km_vocab) + return dict_size == km_vocab_size + + +def verify_files_exist(l): + for f in l: + if not f.exists(): + logging.error(f"{f} doesn't exist!") + return False + return True + + +def run_cmd(cmd, print_output=True): + try: + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True, shell=True) + if print_output: + logger.info(f"command output:\n{out}") + return out + except subprocess.CalledProcessError as grepexc: + logger.info(f"error executing command!:\n{cmd}") + logger.info(grepexc.output) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--tsv", default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/data.tsv", type=Path) + parser.add_argument("--emov-km", required=True, type=Path) + parser.add_argument("--km", nargs='+', required=True, type=Path) + parser.add_argument("--seed", type=int, default=1) + parser.add_argument("--dict", default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/fairseq.dict.txt") + parser.add_argument("--manifests-dir", type=Path, default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz") + args = parser.parse_args() + + manifests_dir = args.manifests_dir + date = datetime.now().strftime('%d%m%y') + outdir = manifests_dir / f"{date}" + + # verify input and create folders + all_kms = args.km + [args.emov_km] + assert verify_files_exist(all_kms), "make sure the km dir contains: train-clean-all.km, blizzard2013.km, data.km" + for codes in all_kms: + assert verify_dict_size(codes, args.dict), "dict argument doesn't match the vocabulary of the km file!" + assert not outdir.exists(), "data dir already exists!" + outdir.mkdir(parents=True, exist_ok=True) + + logger.info("generating denoising split (emov)") + run_cmd(f"python preprocess/split_km_tsv.py {args.tsv} {args.emov_km} --destdir {outdir}/denoising/emov -sh --seed {args.seed}") + for codes in args.km: + codes_name = os.path.basename(codes) + run_cmd(f"python preprocess/split_km.py {codes} --destdir {outdir}/denoising/{codes_name} -sh --seed {args.seed}") + + logger.info("generating translation split") + run_cmd(f"python preprocess/split_emov_km_tsv_by_uttid.py {args.tsv} {args.emov_km} --destdir {outdir}/translation --seed {args.seed}") + + emov_code_name = os.path.basename(args.emov_km) + logger.info("generating hifigan split") + run_cmd( + f"mkdir -p {outdir}/hifigan &&" + f"python preprocess/build_hifigan_manifest.py --km_type hubert --tsv {outdir}/denoising/emov/train.tsv --km {outdir}/denoising/emov/train.km > {outdir}/hifigan/train.txt &&" + f"python preprocess/build_hifigan_manifest.py --km_type hubert --tsv {outdir}/denoising/emov/valid.tsv --km {outdir}/denoising/emov/valid.km > {outdir}/hifigan/valid.txt &&" + f"python preprocess/build_hifigan_manifest.py --km_type hubert --tsv {outdir}/denoising/emov/test.tsv --km {outdir}/denoising/emov/test.km > {outdir}/hifigan/test.txt" + ) + + logger.info("generating fairseq manifests") + run_cmd(f"python preprocess/build_translation_manifests.py {outdir} {outdir}/fairseq-data -dd -cs --dict {args.dict}") + + logger.info(f"finished processing data at:\n{outdir}") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/emotion_conversion/preprocess/extract_f0.py b/fairseq/examples/emotion_conversion/preprocess/extract_f0.py new file mode 100644 index 0000000..4204aa4 --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/extract_f0.py @@ -0,0 +1,57 @@ +import argparse +from tqdm import tqdm +from multiprocessing import Manager, Pool + +from scipy.io.wavfile import read +from librosa.util import normalize +import numpy as np +import amfm_decompy.pYAAPT as pYAAPT +import amfm_decompy.basic_tools as basic + +MAX_WAV_VALUE = 32768.0 + +parser = argparse.ArgumentParser(description="") +parser.add_argument("tsv", help="") +parser.add_argument("--extractor", choices=["crepe", "pyaapt"], default="pyaapt", help="") +parser.add_argument("--interp", action="store_true", help="") +parser.add_argument("--n_workers", type=int, default=40, help="") +args = parser.parse_args() + +tsv_lines = open(args.tsv, "r").readlines() +root, tsv_lines = tsv_lines[0].strip(), tsv_lines[1:] + + +def extract_f0(tsv_line): + wav_path, _ = tsv_line.split("\t") + wav_path = root.strip() + "/" + wav_path + sr, wav = read(wav_path) + wav = wav / MAX_WAV_VALUE + wav = normalize(wav) * 0.95 + + if args.extractor == "pyaapt": + frame_length = 20.0 + pad = int(frame_length / 1000 * sr) // 2 + wav = np.pad(wav.squeeze(), (pad, pad), "constant", constant_values=0) + signal = basic.SignalObj(wav, sr) + pitch = pYAAPT.yaapt( + signal, + **{ + 'frame_length': frame_length, + 'frame_space': 5.0, + 'nccf_thresh1': 0.25, + 'tda_frame_length': 25.0 + }) + pitch = pitch.samp_interp[None, None, :] if args.interp else pitch.samp_values[None, None, :] + pitch = pitch[0, 0] + f0_path = wav_path.replace(".wav", ".yaapt") + f0_path += ".interp.f0" if args.interp else ".f0" + np.save(f0_path, pitch) + + +def main(): + with Pool(args.n_workers) as p: + r = list(tqdm(p.imap(extract_f0, tsv_lines), total=len(tsv_lines))) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/emotion_conversion/preprocess/process_km.py b/fairseq/examples/emotion_conversion/preprocess/process_km.py new file mode 100644 index 0000000..864a022 --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/process_km.py @@ -0,0 +1,40 @@ +import sys +import argparse +from tqdm import tqdm +from build_emov_translation_manifests import dedup, remove_under_k + + +if __name__ == "__main__": + """ + this is a standalone script to process a km file + specifically, to dedup or remove tokens that repeat less + than k times in a row + """ + parser = argparse.ArgumentParser(description="") + parser.add_argument("km", type=str, help="path to km file") + parser.add_argument("--dedup", action='store_true') + parser.add_argument("--remove-under-k", type=int, default=0) + parser.add_argument("--output", default=None) + args = parser.parse_args() + + if not args.dedup and args.remove_under_k == 0: + print("nothing to do! quitting...") + sys.exit(0) + + km = open(args.km, "r").readlines() + out = [] + for line in tqdm(km): + if args.remove_under_k > 0: + line = remove_under_k(line, args.remove_under_k) + if args.dedup: + line = dedup(line) + out.append(line) + + path = args.km if args.output is None else args.output + if args.remove_under_k > 0: + path = path.replace(".km", f"-k{args.remove_under_k}.km") + if args.dedup: + path = path.replace(".km", f"-deduped.km") + + open(path, "w").writelines(out) + print(f"written to {path}") diff --git a/fairseq/examples/emotion_conversion/preprocess/split_emov_km_tsv_by_uttid.py b/fairseq/examples/emotion_conversion/preprocess/split_emov_km_tsv_by_uttid.py new file mode 100644 index 0000000..94221af --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/split_emov_km_tsv_by_uttid.py @@ -0,0 +1,70 @@ +from pathlib import Path +import os +import sys +import argparse +import random +import numpy as np +from tqdm import tqdm +from sklearn.model_selection import train_test_split +from build_translation_manifests import get_utt_id + + +def train_val_test_split(tsv_lines, km_lines, valid_percent, test_percent, seed=42): + utt_ids = list(sorted(set([get_utt_id(x) for x in tsv_lines]))) + utt_ids, valid_utt_ids, _, _ = train_test_split(utt_ids, utt_ids, test_size=valid_percent, shuffle=True, random_state=seed) + train_utt_ids, test_utt_ids, _, _ = train_test_split(utt_ids, utt_ids, test_size=test_percent, shuffle=True, random_state=seed) + + train_idx = [i for i, line in enumerate(tsv_lines) if get_utt_id(line) in train_utt_ids] + valid_idx = [i for i, line in enumerate(tsv_lines) if get_utt_id(line) in valid_utt_ids] + test_idx = [i for i, line in enumerate(tsv_lines) if get_utt_id(line) in test_utt_ids] + + train_tsv, train_km = [tsv_lines[i] for i in train_idx], [km_lines[i] for i in train_idx] + valid_tsv, valid_km = [tsv_lines[i] for i in valid_idx], [km_lines[i] for i in valid_idx] + test_tsv, test_km = [tsv_lines[i] for i in test_idx], [km_lines[i] for i in test_idx] + + print(f"train {len(train_km)}") + print(f"valid {len(valid_km)}") + print(f"test {len(test_km)}") + + return train_tsv, train_km, valid_tsv, valid_km, test_tsv, test_km + + +if __name__ == "__main__": + """ + this is a standalone script to process a km file + specifically, to dedup or remove tokens that repeat less + than k times in a row + """ + parser = argparse.ArgumentParser(description="") + parser.add_argument("tsv", type=str, help="path to tsv file") + parser.add_argument("km", type=str, help="path to km file") + parser.add_argument("--destdir", required=True, type=str) + parser.add_argument("--valid-percent", type=float, default=0.05, help="percent to allocate to validation set") + parser.add_argument("--test-percent", type=float, default=0.05, help="percent to allocate to test set") + parser.add_argument("--seed", type=int, default=42, help="") + args = parser.parse_args() + + np.random.seed(args.seed) + random.seed(args.seed) + + os.makedirs(args.destdir, exist_ok=True) + km = open(args.km, "r").readlines() + tsv = open(args.tsv, "r").readlines() + root, tsv = tsv[0], tsv[1:] + + assert args.tsv.endswith(".tsv") and args.km.endswith(".km") + assert len(tsv) == len(km) + + train_tsv, train_km, valid_tsv, valid_km, test_tsv, test_km = train_val_test_split(tsv, km, args.valid_percent, args.test_percent, args.seed) + + assert len(train_tsv) + len(valid_tsv) + len(test_tsv) == len(tsv) + assert len(train_tsv) == len(train_km) and len(valid_tsv) == len(valid_km) and len(test_tsv) == len(test_km) + + dir = Path(args.destdir) + open(dir / f"train.tsv", "w").writelines([root] + train_tsv) + open(dir / f"valid.tsv", "w").writelines([root] + valid_tsv) + open(dir / f"test.tsv", "w").writelines([root] + test_tsv) + open(dir / f"train.km", "w").writelines(train_km) + open(dir / f"valid.km", "w").writelines(valid_km) + open(dir / f"test.km", "w").writelines(test_km) + print("done") diff --git a/fairseq/examples/emotion_conversion/preprocess/split_km.py b/fairseq/examples/emotion_conversion/preprocess/split_km.py new file mode 100644 index 0000000..d145fc2 --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/split_km.py @@ -0,0 +1,50 @@ +from pathlib import Path +import os +import argparse +import random +import numpy as np +from sklearn.utils import shuffle + + +if __name__ == "__main__": + """ + this is a standalone script to process a km file + specifically, to dedup or remove tokens that repeat less + than k times in a row + """ + parser = argparse.ArgumentParser(description="") + parser.add_argument("km", type=str, help="path to km file") + parser.add_argument("--destdir", required=True, type=str) + parser.add_argument("--valid-percent", type=float, default=0.05, help="percent to allocate to validation set") + parser.add_argument("--test-percent", type=float, default=0.05, help="percent to allocate to test set") + parser.add_argument("-sh", "--shuffle", action="store_true", help="path to km file") + parser.add_argument("--seed", type=int, default=42, help="") + args = parser.parse_args() + + np.random.seed(args.seed) + random.seed(args.seed) + + os.makedirs(args.destdir, exist_ok=True) + km = open(args.km, "r").readlines() + + if args.shuffle: + km = shuffle(km) + print(f"shuffled") + + N = len(km) + N_tt = int(N * args.test_percent) + N_cv = int(N * args.valid_percent) + N_tr = N - N_tt - N_cv + + train_km = km[:N_tr] + valid_km = km[N_tr:N_tr + N_cv] + test_km = km[N_tr + N_cv:] + + dir = Path(args.destdir) + open(dir / f"train.km", "w").writelines(train_km) + open(dir / f"valid.km", "w").writelines(valid_km) + open(dir / f"test.km", "w").writelines(test_km) + print(f"train: {len(train_km)}") + print(f"valid: {len(valid_km)}") + print(f"test: {len(test_km)}") + print("done") diff --git a/fairseq/examples/emotion_conversion/preprocess/split_km_tsv.py b/fairseq/examples/emotion_conversion/preprocess/split_km_tsv.py new file mode 100644 index 0000000..2113aa7 --- /dev/null +++ b/fairseq/examples/emotion_conversion/preprocess/split_km_tsv.py @@ -0,0 +1,65 @@ +from pathlib import Path +import os +import argparse +import random +import numpy as np +from sklearn.utils import shuffle + + +if __name__ == "__main__": + """ + this is a standalone script to process a km file + specifically, to dedup or remove tokens that repeat less + than k times in a row + """ + parser = argparse.ArgumentParser(description="") + parser.add_argument("tsv", type=str, help="path to tsv file") + parser.add_argument("km", type=str, help="path to km file") + parser.add_argument("--destdir", required=True, type=str) + parser.add_argument("--valid-percent", type=float, default=0.05, help="percent to allocate to validation set") + parser.add_argument("--test-percent", type=float, default=0.05, help="percent to allocate to test set") + parser.add_argument("-sh", "--shuffle", action="store_true", help="path to km file") + parser.add_argument("--seed", type=int, default=42, help="") + args = parser.parse_args() + + np.random.seed(args.seed) + random.seed(args.seed) + + os.makedirs(args.destdir, exist_ok=True) + km = open(args.km, "r").readlines() + tsv = open(args.tsv, "r").readlines() + root, tsv = tsv[0], tsv[1:] + + assert args.tsv.endswith(".tsv") and args.km.endswith(".km") + assert len(tsv) == len(km) + + if args.shuffle: + tsv, km = shuffle(tsv, km) + print(f"shuffled") + + N = len(tsv) + N_tt = int(N * args.test_percent) + N_cv = int(N * args.valid_percent) + N_tr = N - N_tt - N_cv + + train_tsv = tsv[:N_tr] + valid_tsv = tsv[N_tr:N_tr + N_cv] + test_tsv = tsv[N_tr + N_cv:] + train_km = km[:N_tr] + valid_km = km[N_tr:N_tr + N_cv] + test_km = km[N_tr + N_cv:] + + assert len(train_tsv) + len(valid_tsv) + len(test_tsv) == len(tsv) + assert len(train_tsv) == len(train_km) and len(valid_tsv) == len(valid_km) and len(test_tsv) == len(test_km) + + dir = Path(args.destdir) + open(dir / f"train.tsv", "w").writelines([root] + train_tsv) + open(dir / f"valid.tsv", "w").writelines([root] + valid_tsv) + open(dir / f"test.tsv", "w").writelines([root] + test_tsv) + open(dir / f"train.km", "w").writelines(train_km) + open(dir / f"valid.km", "w").writelines(valid_km) + open(dir / f"test.km", "w").writelines(test_km) + print(f"train: {len(train_km)}") + print(f"valid: {len(valid_km)}") + print(f"test: {len(test_km)}") + print("done") diff --git a/fairseq/examples/emotion_conversion/requirements.txt b/fairseq/examples/emotion_conversion/requirements.txt new file mode 100644 index 0000000..fc94c5a --- /dev/null +++ b/fairseq/examples/emotion_conversion/requirements.txt @@ -0,0 +1,11 @@ +scipy +einops +amfm_decompy +joblib +numba +decorator +requests +appdirs +packaging +six +sklearn diff --git a/fairseq/examples/emotion_conversion/synthesize.py b/fairseq/examples/emotion_conversion/synthesize.py new file mode 100644 index 0000000..327fdaf --- /dev/null +++ b/fairseq/examples/emotion_conversion/synthesize.py @@ -0,0 +1,322 @@ +import logging +import argparse +import random +import sys +import os +import numpy as np +import torch +import soundfile as sf +import shutil +import librosa +import json +from pathlib import Path +from tqdm import tqdm +import amfm_decompy.basic_tools as basic +import amfm_decompy.pYAAPT as pYAAPT + +dir_path = os.path.dirname(__file__) +resynth_path = os.path.dirname(os.path.abspath(__file__)) + "/speech-resynthesis" +sys.path.append(resynth_path) + +from models import CodeGenerator +from inference import scan_checkpoint, load_checkpoint, generate +from emotion_models.pitch_predictor import load_ckpt as load_pitch_predictor +from emotion_models.duration_predictor import load_ckpt as load_duration_predictor +from dataset import load_audio, MAX_WAV_VALUE, parse_style, parse_speaker, EMOV_SPK2ID, EMOV_STYLE2ID + + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(message)s', + handlers=[logging.FileHandler('debug.log'), logging.StreamHandler()] +) +logger = logging.getLogger(__name__) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +def parse_generation_file(fname): + lines = open(fname).read() + lines = lines.split('\n') + + results = {} + for l in lines: + if len(l) == 0: + continue + + if l[0] == 'H': + parts = l[2:].split('\t') + if len(parts) == 2: + sid, utt = parts + else: + sid, _, utt = parts + sid = int(sid) + utt = [int(x) for x in utt.split()] + if sid in results: + results[sid]['H'] = utt + else: + results[sid] = {'H': utt} + elif l[0] == 'S': + sid, utt = l[2:].split('\t') + sid = int(sid) + utt = [x for x in utt.split()] + if sid in results: + results[sid]['S'] = utt + else: + results[sid] = {'S': utt} + elif l[0] == 'T': + sid, utt = l[2:].split('\t') + sid = int(sid) + utt = [int(x) for x in utt.split()] + if sid in results: + results[sid]['T'] = utt + else: + results[sid] = {'T': utt} + + for d, result in results.items(): + if 'H' not in result: + result['H'] = result['S'] + + return results + + +def get_code_to_fname(manifest, tokens): + if tokens is None: + code_to_fname = {} + with open(manifest) as f: + for line in f: + line = line.strip() + fname, code = line.split() + code = code.replace(',', ' ') + code_to_fname[code] = fname + + return code_to_fname + + with open(manifest) as f: + fnames = [l.strip() for l in f.readlines()] + root = Path(fnames[0]) + fnames = fnames[1:] + if '\t' in fnames[0]: + fnames = [x.split()[0] for x in fnames] + + with open(tokens) as f: + codes = [l.strip() for l in f.readlines()] + + code_to_fname = {} + for fname, code in zip(fnames, codes): + code = code.replace(',', ' ') + code_to_fname[code] = str(root / fname) + + return root, code_to_fname + + +def code_to_str(s): + k = ' '.join([str(x) for x in s]) + return k + + +def get_praat_f0(audio, rate=16000, interp=False): + frame_length = 20.0 + to_pad = int(frame_length / 1000 * rate) // 2 + + f0s = [] + for y in audio.astype(np.float64): + y_pad = np.pad(y.squeeze(), (to_pad, to_pad), "constant", constant_values=0) + signal = basic.SignalObj(y_pad, rate) + pitch = pYAAPT.yaapt(signal, **{'frame_length': frame_length, 'frame_space': 5.0, 'nccf_thresh1': 0.25, + 'tda_frame_length': 25.0}) + if interp: + f0s += [pitch.samp_interp[None, None, :]] + else: + f0s += [pitch.samp_values[None, None, :]] + + f0 = np.vstack(f0s) + return f0 + + +def generate_from_code(generator, h, code, spkr=None, f0=None, gst=None, device="cpu"): + batch = { + 'code': torch.LongTensor(code).to(device).view(1, -1), + } + if spkr is not None: + batch['spkr'] = spkr.to(device).unsqueeze(0) + if f0 is not None: + batch['f0'] = f0.to(device) + if gst is not None: + batch['style'] = gst.to(device) + + with torch.no_grad(): + audio, rtf = generate(h, generator, batch) + audio = librosa.util.normalize(audio / 2 ** 15) + + return audio + + +@torch.no_grad() +def synth(argv, interactive=False): + parser = argparse.ArgumentParser() + parser.add_argument('--result-path', type=Path, help='Translation Model Output', required=True) + parser.add_argument('--data', type=Path, help='a directory with the files: src.tsv, src.km, trg.tsv, trg.km, orig.tsv, orig.km') + parser.add_argument("--orig-tsv", default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/data.tsv") + parser.add_argument("--orig-km", default="/checkpoint/felixkreuk/datasets/emov/manifests/emov_16khz/core_manifests/emov_16khz_km_100/data.km") + + parser.add_argument('--checkpoint-file', type=Path, help='Generator Checkpoint', required=True) + parser.add_argument('--dur-model', type=Path, help='a token duration prediction model (if tokens were deduped)') + parser.add_argument('--f0-model', type=Path, help='a f0 prediction model') + + parser.add_argument('-s', '--src-emotion', default=None) + parser.add_argument('-t', '--trg-emotion', default=None) + parser.add_argument('-N', type=int, default=10) + parser.add_argument('--split', default="test") + + parser.add_argument('--outdir', type=Path, default=Path('results')) + parser.add_argument('--orig-filename', action='store_true') + + parser.add_argument('--device', type=int, default=0) + a = parser.parse_args(argv) + + seed = 52 + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + if os.path.isdir(a.checkpoint_file): + config_file = os.path.join(a.checkpoint_file, 'config.json') + else: + config_file = os.path.join(os.path.split(a.checkpoint_file)[0], 'config.json') + with open(config_file) as f: + data = f.read() + json_config = json.loads(data) + h = AttrDict(json_config) + + generator = CodeGenerator(h).to(a.device) + if os.path.isdir(a.checkpoint_file): + cp_g = scan_checkpoint(a.checkpoint_file, 'g_') + else: + cp_g = a.checkpoint_file + state_dict_g = load_checkpoint(cp_g) + generator.load_state_dict(state_dict_g['generator']) + + generator.eval() + generator.remove_weight_norm() + + dur_models = { + "neutral": load_duration_predictor(f"{a.dur_model}/neutral.ckpt"), + "amused": load_duration_predictor(f"{a.dur_model}/amused.ckpt"), + "disgusted": load_duration_predictor(f"{a.dur_model}/disgusted.ckpt"), + "angry": load_duration_predictor(f"{a.dur_model}/angry.ckpt"), + "sleepy": load_duration_predictor(f"{a.dur_model}/sleepy.ckpt"), + } + logger.info(f"loaded duration prediction model from {a.dur_model}") + + f0_model = load_pitch_predictor(a.f0_model).to(a.device) + logger.info(f"loaded f0 prediction model from {a.f0_model}") + + # we need to know how to map code back to the filename + # (if we want the original files names as output) + results = parse_generation_file(a.result_path) + _, src_code_to_fname = get_code_to_fname(f'{a.data}/files.{a.split}.{a.src_emotion}', f'{a.data}/{a.split}.{a.src_emotion}') + _, tgt_code_to_fname = get_code_to_fname(f'{a.data}/files.{a.split}.{a.trg_emotion}', f'{a.data}/{a.split}.{a.trg_emotion}') + + # we need the originals (before dedup) to get the ground-truth durations + orig_tsv = open(a.orig_tsv, 'r').readlines() + orig_tsv_root, orig_tsv = orig_tsv[0].strip(), orig_tsv[1:] + orig_km = open(a.orig_km, 'r').readlines() + fname_to_idx = {orig_tsv_root + "/" + line.split("\t")[0]: i for i, line in enumerate(orig_tsv)} + + outdir = a.outdir + outdir.mkdir(parents=True, exist_ok=True) + (outdir / '0-source').mkdir(exist_ok=True) + (outdir / '1-src-tokens-src-style-src-f0').mkdir(exist_ok=True) + (outdir / '2-src-tokens-trg-style-src-f0').mkdir(exist_ok=True) + (outdir / '2.5-src-tokens-trg-style-src-f0').mkdir(exist_ok=True) + (outdir / '3-src-tokens-trg-style-pred-f0').mkdir(exist_ok=True) + (outdir / '4-gen-tokens-trg-style-pred-f0').mkdir(exist_ok=True) + (outdir / '5-target').mkdir(exist_ok=True) + + N = 0 + results = list(results.items()) + random.shuffle(results) + for i, (sid, result) in tqdm(enumerate(results)): + N += 1 + if N > a.N and a.N != -1: + break + + if '[' in result['S'][0]: + result['S'] = result['S'][1:] + if '_' in result['S'][-1]: + result['S'] = result['S'][:-1] + src_ref = src_code_to_fname[code_to_str(result['S'])] + trg_ref = tgt_code_to_fname[code_to_str(result['T'])] + + src_style, trg_style = None, None + src_spkr, trg_spkr = None, None + src_f0 = None + src_audio = (load_audio(src_ref)[0] / MAX_WAV_VALUE) * 0.95 + trg_audio = (load_audio(trg_ref)[0] / MAX_WAV_VALUE) * 0.95 + src_audio = torch.FloatTensor(src_audio).unsqueeze(0).cuda() + trg_audio = torch.FloatTensor(trg_audio).unsqueeze(0).cuda() + + src_spkr = parse_speaker(src_ref, h.multispkr) + src_spkr = src_spkr if src_spkr in EMOV_SPK2ID else random.choice(list(EMOV_SPK2ID.keys())) + src_spkr = EMOV_SPK2ID[src_spkr] + src_spkr = torch.LongTensor([src_spkr]) + trg_spkr = parse_speaker(trg_ref, h.multispkr) + trg_spkr = trg_spkr if trg_spkr in EMOV_SPK2ID else random.choice(list(EMOV_SPK2ID.keys())) + trg_spkr = EMOV_SPK2ID[trg_spkr] + trg_spkr = torch.LongTensor([trg_spkr]) + + src_style = EMOV_STYLE2ID[a.src_emotion] + src_style = torch.LongTensor([src_style]).cuda() + trg_style_str = a.trg_emotion + trg_style = EMOV_STYLE2ID[a.trg_emotion] + trg_style = torch.LongTensor([trg_style]).cuda() + + src_tokens = list(map(int, orig_km[fname_to_idx[src_ref]].strip().split(" "))) + src_tokens = torch.LongTensor(src_tokens).unsqueeze(0) + src_tokens_dur_pred = torch.LongTensor(list(map(int, result['S']))).unsqueeze(0) + src_tokens_dur_pred = dur_models[trg_style_str].inflate_input(src_tokens_dur_pred) + gen_tokens = torch.LongTensor(result['H']).unsqueeze(0) + gen_tokens = dur_models[trg_style_str].inflate_input(gen_tokens) + trg_tokens = torch.LongTensor(result['T']).unsqueeze(0) + trg_tokens = dur_models[trg_style_str].inflate_input(trg_tokens) + + src_f0 = get_praat_f0(src_audio.unsqueeze(0).cpu().numpy()) + src_f0 = torch.FloatTensor(src_f0).cuda() + + pred_src_f0 = f0_model.inference(torch.LongTensor(src_tokens).to(a.device), src_spkr, trg_style).unsqueeze(0) + pred_src_dur_pred_f0 = f0_model.inference(torch.LongTensor(src_tokens_dur_pred).to(a.device), src_spkr, trg_style).unsqueeze(0) + pred_gen_f0 = f0_model.inference(torch.LongTensor(gen_tokens).to(a.device), src_spkr, trg_style).unsqueeze(0) + pred_trg_f0 = f0_model.inference(torch.LongTensor(trg_tokens).to(a.device), src_spkr, trg_style).unsqueeze(0) + + if a.orig_filename: + path = src_code_to_fname[code_to_str(result['S'])] + sid = str(sid) + "__" + Path(path).stem + shutil.copy(src_code_to_fname[code_to_str(result['S'])], outdir / '0-source' / f'{sid}.wav') + + audio = generate_from_code(generator, h, src_tokens, spkr=src_spkr, f0=src_f0, gst=src_style, device=a.device) + sf.write(outdir / '1-src-tokens-src-style-src-f0' / f'{sid}.wav', audio, samplerate=h.sampling_rate) + + audio = generate_from_code(generator, h, src_tokens, spkr=src_spkr, f0=src_f0, gst=trg_style, device=a.device) + sf.write(outdir / '2-src-tokens-trg-style-src-f0' / f'{sid}.wav', audio, samplerate=h.sampling_rate) + + audio = generate_from_code(generator, h, src_tokens_dur_pred, spkr=src_spkr, f0=src_f0, gst=trg_style, device=a.device) + sf.write(outdir / '2.5-src-tokens-trg-style-src-f0' / f'{sid}.wav', audio, samplerate=h.sampling_rate) + + audio = generate_from_code(generator, h, src_tokens_dur_pred, spkr=src_spkr, f0=pred_src_dur_pred_f0, gst=trg_style, device=a.device) + sf.write(outdir / '3-src-tokens-trg-style-pred-f0' / f'{sid}.wav', audio, samplerate=h.sampling_rate) + + audio = generate_from_code(generator, h, gen_tokens, spkr=src_spkr, f0=pred_gen_f0, gst=trg_style, device=a.device) + sf.write(outdir / '4-gen-tokens-trg-style-pred-f0' / f'{sid}.wav', audio, samplerate=h.sampling_rate) + + shutil.copy(tgt_code_to_fname[code_to_str(result['T'])], outdir / '5-target' / f'{sid}.wav') + + logger.info("Done.") + + +if __name__ == '__main__': + synth(sys.argv[1:]) diff --git a/fairseq/examples/fast_noisy_channel/README.md b/fairseq/examples/fast_noisy_channel/README.md new file mode 100644 index 0000000..f2631a8 --- /dev/null +++ b/fairseq/examples/fast_noisy_channel/README.md @@ -0,0 +1,345 @@ +# Language Models not just for Pre-training: Fast Online Neural Noisy Channel Modeling + +## Introduction +- [Yee et al. (2019)](https://www.aclweb.org/anthology/D19-1571.pdf) introduce a simple and effective noisy channel modeling approach for neural machine translation. However, the noisy channel online decoding approach introduced in this paper is too slow to be practical. +- To address this, [Bhosale et al. (2020)](http://www.statmt.org/wmt20/pdf/2020.wmt-1.68.pdf) introduces 3 simple approximations to make this approach very fast and practical without much loss in accuracy. +- This README provides intructions on how to run online decoding or generation with the noisy channel modeling approach, including ways to make it very fast without much loss in accuracy. + +## Noisy Channel Modeling + +[Yee et al. (2019)](https://www.aclweb.org/anthology/D19-1571.pdf) applies the Bayes Rule to predict `P(y|x)`, the probability of the target `y` given the source `x`. +```P(y|x) = P(x|y) * P(y) / P(x)``` +- `P(x|y)` predicts the source `x` given the target `y` and is referred to as the **channel model** +- `P(y)` is a **language model** over the target `y` +- `P(x)` is generally not modeled since it is constant for all `y`. + +We use Transformer models to parameterize the direct model `P(y|x)`, the channel model `P(x|y)` and the language model `P(y)`. + +During online decoding with beam search, we generate the top `K2` candidates per beam and score them with the following linear combination of the channel model, the language model as well as the direct model scores. + +```(1 / t) * log(P(y|x) + (1 / s) * ( λ1 * log(P(x|y)) + λ2 * log(P(y) ) )``` +- `t` - Target Prefix Length +- `s` - Source Length +- `λ1` - Channel Model Weight +- `λ2` - Language Model Weight + +The top `beam_size` candidates based on the above combined scores are chosen to continue the beams in beam search. In beam search with a direct model alone, the scores from the direct model `P(y|x)` are used to choose the top candidates in beam search. + +This framework provides a great way to utlize strong target language models trained on large amounts of unlabeled data. Language models can prefer targets unrelated to the source, so we also need a channel model whose role is to ensure that the target preferred by the language model also translates back to the source. + +### Training Translation Models and Language Models + +For training Transformer models in fairseq for machine translation, refer to instructions [here](https://github.com/pytorch/fairseq/tree/main/examples/translation) + +For training Transformer models in fairseq for language modeling, refer to instructions [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model) + +### Generation with Language Model for German-English translation with fairseq + +Here are instructions to generate using a direct model and a target-side language model. + +Note: +- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq) +- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing) + +```sh +binarized_data=data_dir/binarized +direct_model=de_en_seed4.pt +lm_model=en_lm.pt +lm_data=lm_data +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model} +mkdir -p ${lm_data} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt + +k2=10 +lenpen=0.16 +lm_wt=0.14 +fairseq-generate ${binarized_data} \ + --user-dir examples/fast_noisy_channel \ + --beam 5 \ + --path ${direct_model} \ + --lm-model ${lm_model} \ + --lm-data ${lm_data} \ + --k2 ${k2} \ + --combine-method lm_only \ + --task noisy_channel_translation \ + --lenpen ${lenpen} \ + --lm-wt ${lm_wt} \ + --gen-subset valid \ + --remove-bpe \ + --fp16 \ + --batch-size 10 +``` +### Noisy Channel Generation for German-English translation with fairseq + +Here are instructions for noisy channel generation with a direct model, channel model and language model as explained in section [Noisy Channel Modeling](#noisy-channel-modeling). + +Note: +- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq) +- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing) + +```sh +binarized_data=data_dir/binarized +direct_model=de_en_seed4.pt +lm_model=en_lm.pt +lm_data=lm_data +ch_model=en_de.big.seed4.pt +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model} +mkdir -p ${lm_data} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed4.pt -O ${ch_model} + +k2=10 +lenpen=0.21 +lm_wt=0.50 +bw_wt=0.30 +fairseq-generate ${binarized_data} \ + --user-dir examples/fast_noisy_channel \ + --beam 5 \ + --path ${direct_model} \ + --lm-model ${lm_model} \ + --lm-data ${lm_data} \ + --channel-model ${ch_model} \ + --k2 ${k2} \ + --combine-method noisy_channel \ + --task noisy_channel_translation \ + --lenpen ${lenpen} \ + --lm-wt ${lm_wt} \ + --ch-wt ${bw_wt} \ + --gen-subset test \ + --remove-bpe \ + --fp16 \ + --batch-size 1 +``` +## Fast Noisy Channel Modeling + +[Bhosale et al. (2020)](http://www.statmt.org/wmt20/pdf/2020.wmt-1.68.pdf) introduces 3 approximations that speed up online noisy channel decoding - +- Smaller channel models (`Tranformer Base` with 1 encoder and decoder layer each vs. `Transformer Big`) + - This involves training a channel model that is possibly smaller and less accurate in terms of BLEU than a channel model of the same size as the direct model. + - Since the role of the channel model is mainly to assign low scores to generations from the language model if they don't translate back to the source, we may not need the most accurate channel model for this purpose. +- Smaller output vocabulary size for the channel model (~30,000 -> ~1000) + - The channel model doesn't need to score the full output vocabulary, it just needs to score the source tokens, which are completely known. + - This is specified using the arguments `--channel-scoring-type src_vocab --top-k-vocab 500` + - This means that the output vocabulary for the channel model will be the source tokens for all examples in the batch and the top-K most frequent tokens in the vocabulary + - This reduces the memory consumption needed to store channel model scores significantly +- Smaller number of candidates (`k2`) scored per beam + - This is specified by reducing the argument `--k2` + + +### Fast Noisy Channel Generation for German-English translation with fairseq + +Here are instructions for **fast** noisy channel generation with a direct model, channel model and language model as explained in section [Fast Noisy Channel Modeling](#fast-noisy-channel-modeling). The main differences are that we use a smaller channel model, reduce `--k2`, set `--channel-scoring-type src_vocab --top-k-vocab 500` and increase the `--batch-size`. + +Note: +- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq) +- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing) + +```sh +binarized_data=data_dir/binarized +direct_model=de_en_seed4.pt +lm_model=en_lm.pt +lm_data=lm_data +small_ch_model=en_de.base_1_1.seed4.pt +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model} +mkdir -p ${lm_data} +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt +wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed4.pt -O ${small_ch_model} + +k2=3 +lenpen=0.23 +lm_wt=0.58 +bw_wt=0.26 +fairseq-generate ${binarized_data} \ + --user-dir examples/fast_noisy_channel \ + --beam 5 \ + --path ${direct_model} \ + --lm-model ${lm_model} \ + --lm-data ${lm_data} \ + --channel-model ${small_ch_model} \ + --k2 ${k2} \ + --combine-method noisy_channel \ + --task noisy_channel_translation \ + --lenpen ${lenpen} \ + --lm-wt ${lm_wt} \ + --ch-wt ${bw_wt} \ + --gen-subset test \ + --remove-bpe \ + --fp16 \ + --batch-size 50 \ + --channel-scoring-type src_vocab --top-k-vocab 500 +``` + +## Test Data Preprocessing + +For preprocessing and binarizing the test sets for Romanian-English and German-English translation, we use the following script - + +```sh +FAIRSEQ=/path/to/fairseq +cd $FAIRSEQ +SCRIPTS=$FAIRSEQ/mosesdecoder/scripts +if [ ! -d "${SCRIPTS}" ]; then + echo 'Cloning Moses github repository (for tokenization scripts)...' + git clone https://github.com/moses-smt/mosesdecoder.git +fi +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +NORMALIZE=$SCRIPTS/tokenizer/normalize-punctuation.perl + +s=de +t=en +test=wmt18 + +mkdir -p data_dir + +# Tokenization +if [ $s == "ro" ] ; then + # Note: Get normalise-romanian.py and remove-diacritics.py from + # https://github.com/rsennrich/wmt16-scripts/tree/master/preprocess + sacrebleu -t $test -l $s-$t --echo src | \ + $NORMALIZE -l $s | \ + python normalise-romanian.py | \ + python remove-diacritics.py | \ + $TOKENIZER -l $s -a -q > data_dir/$test.$s-$t.$s +else + sacrebleu -t $test -l $s-$t --echo src | perl $NORMALIZE -l $s | perl $TOKENIZER -threads 8 -a -l $s > data_dir/$test.$s-$t.$s +fi + +sacrebleu -t $test -l $s-$t --echo ref | perl $NORMALIZE -l $t | perl $TOKENIZER -threads 8 -a -l $t > data_dir/$test.$s-$t.$t + + +# Applying BPE +src_bpe_code=/path/to/source/language/bpe/code +tgt_bpe_code=/path/to/target/language/bpe/code +src_dict=/path/to/source/language/dict +tgt_dict=/path/to/target/language/dict + +FASTBPE=$FAIRSEQ/fastBPE +if [ ! -d "${FASTBPE}" ] ; then + git clone https://github.com/glample/fastBPE.git + # Follow compilation instructions at https://github.com/glample/fastBPE + g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast +fi + +${FASTBPE}/fast applybpe data_dir/bpe.$test.$s-$t.$s data_dir/$test.$s-$t.$s ${src_bpe_code} +${FASTBPE}/fast applybpe data_dir/bpe.$test.$s-$t.$s data_dir/$test.$s-$t.$s ${tgt_bpe_code} + +fairseq-preprocess -s $s -t $t \ + --testpref data_dir/bpe.$test.$s-$t \ + --destdir data_dir/binarized \ + --srcdict ${src_dict} \ + --tgtdict ${tgt_dict} +``` + +## Calculating BLEU + +```sh +DETOKENIZER=$SCRIPTS/tokenizer/detokenizer.perl +cat ${generation_output} | grep -P "^H" | sort -V | cut -f 3- | $DETOKENIZER -l $t -q -a | sacrebleu -t $test -l $s-$t +``` + + +## Romanian-English Translation + +The direct and channel models are trained using bitext data (WMT16) combined with backtranslated data (The monolingual data used for backtranslation comes from http://data.statmt.org/rsennrich/wmt16_backtranslations/ (Sennrich et al., 2016c)) + +The backtranslated data is generated using an ensemble of 3 English-Romanian models trained on bitext training data (WMT16) with unrestricted sampling. + +### BPE Codes and Dictionary + +We learn a joint BPE vocabulary of 18K types on the bitext training data which is used for both the source and target. +||Path| +|----------|------| +| BPE Code | [joint_bpe_18k](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/bpe_18k) | +| Dictionary | [dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/dict) | + +### Direct Models +For Ro-En with backtranslation, the direct and channel models use a Transformer-Big architecture. + +| Seed | Model | +|----|----| +| 2 | [ro_en_seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed2.pt) +| 4 | [ro_en_seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed4.pt) +| 6 | [ro_en_seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed6.pt) + +### Channel Models +For channel models, we follow the same steps as for the direct models. But backtranslated data is generated in the opposite direction using [this Romanian monolingual data](http://data.statmt.org/rsennrich/wmt16_backtranslations/). +The best lenpen, LM weight and CH weight are obtained by sweeping over the validation set (wmt16/dev) using beam 5. +| Model Size | Lenpen | LM Weight | CH Weight | Seed 2 | Seed 4 | Seed 6 | +|----|----|----|----|----|----|----| +| `big` | 0.84 | 0.64 | 0.56 | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) | +| `base_1_1` | 0.63 | 0.40 | 0.37 | [base_1_1.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed2.pt) | [base_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed4.pt) | [base_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed6.pt) | + +### Language Model +The model is trained on de-duplicated English Newscrawl data from 2007-2018 comprising 186 million sentences or 4.5B words after normalization and tokenization. +| | Path | +|----|----| +| `--lm-model` | [transformer_en_lm](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/lm_model/transformer_lm.pt) | +| `--lm-data` | [lm_data](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/lm_model/lm_dict) + +## German-English Translation + +### BPE Codes and Dictionaries + +| | Path| +|----------|------| +| Source BPE Code | [de_bpe_code_24K](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/de_bpe_code_24K) | +| Target BPE Code | [en_bpe_code_24K](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/en_bpe_code_24K) +| Source Dictionary | [de_dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/de_dict) | +| Target Dictionary | [en_dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/en_dict) | + +### Direct Models +We train on WMT’19 training data. Following [Ng et al., 2019](http://statmt.org/wmt19/pdf/53/WMT33.pdf), we apply language identification filtering and remove sentences longer than 250 tokens as well as sentence pairs with a source/target length ratio exceeding 1.5. This results in 26.8M sentence pairs. +We use the Transformer-Big architecture for the direct model. + +| Seed | Model | +|:----:|----| +| 4 | [de_en_seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt) +| 5 | [de_en_seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed5.pt) +| 6 | [de_en_seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed6.pt) + +### Channel Models + +We train on WMT’19 training data. Following [Ng et al., 2019](http://statmt.org/wmt19/pdf/53/WMT33.pdf), we apply language identification filtering and remove sentences longer than 250 tokens as well as sentence pairs with a source/target length ratio exceeding 1.5. This results in 26.8M sentence pairs. + +| Model Size | Seed 4 | Seed 5 | Seed 6 | +|----|----|----|----| +| `big` | [big.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed4.pt) | [big.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed5.pt) | [big.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed6.pt) | +| `big_1_1` | [big_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed4.pt) | [big_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed5.pt) | [big_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed6.pt) | +| `base` | [base.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed4.pt) | [base.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed5.pt) | [base.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed6.pt) | +| `base_1_1` | [base_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed4.pt) | [base_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed5.pt) | [base_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed6.pt) | +| `half` | [half.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed4.pt) | [half.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed5.pt) | [half.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed6.pt) | +| `half_1_1` | [half_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed4.pt) | [half_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed5.pt) | [half_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed6.pt) | +| `quarter` | [quarter.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed4.pt) | [quarter.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed5.pt) | [quarter.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed6.pt) | +| `quarter_1_1` | [quarter_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed4.pt) | [quarter_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed5.pt) | [quarter_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed6.pt) | +| `8th` | [8th.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed4.pt) | [8th.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed5.pt) | [8th.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed6.pt) | +| `8th_1_1` | [8th_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed4.pt) | [8th_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed5.pt) | [8th_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed6.pt) | +| `16th` | [16th.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed4.pt) | [16th.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed5.pt) | [16th.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed6.pt) | +| `16th_1_1` | [16th_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed4.pt) | [16th_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed5.pt) | [16th_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed6.pt) | + +### Language Model +The model is trained on de-duplicated English Newscrawl data from 2007-2018 comprising 186 million sentences or 4.5B words after normalization and tokenization. +| | Path | +|----|----| +| `--lm-model` | [transformer_en_lm](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt) | +| `--lm-data` | [lm_data](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/) + + +## Citation + +```bibtex +@inproceedings{bhosale2020language, + title={Language Models not just for Pre-training: Fast Online Neural Noisy Channel Modeling}, + author={Shruti Bhosale and Kyra Yee and Sergey Edunov and Michael Auli}, + booktitle={Proceedings of the Fifth Conference on Machine Translation (WMT)}, + year={2020}, +} + +@inproceedings{yee2019simple, + title={Simple and Effective Noisy Channel Modeling for Neural Machine Translation}, + author={Yee, Kyra and Dauphin, Yann and Auli, Michael}, + booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)}, + pages={5700--5705}, + year={2019} +} +``` diff --git a/fairseq/examples/fast_noisy_channel/__init__.py b/fairseq/examples/fast_noisy_channel/__init__.py new file mode 100644 index 0000000..9b248c3 --- /dev/null +++ b/fairseq/examples/fast_noisy_channel/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import noisy_channel_translation # noqa +from . import noisy_channel_sequence_generator # noqa +from . import noisy_channel_beam_search # noqa diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py b/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py new file mode 100644 index 0000000..23869eb --- /dev/null +++ b/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py @@ -0,0 +1,71 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq.search import Search + + +class NoisyChannelBeamSearch(Search): + + def __init__(self, tgt_dict): + super().__init__(tgt_dict) + self.fw_scores_buf = None + self.lm_scores_buf = None + + def _init_buffers(self, t): + # super()._init_buffers(t) + if self.fw_scores_buf is None: + self.scores_buf = t.new() + self.indices_buf = torch.LongTensor().to(device=t.device) + self.beams_buf = torch.LongTensor().to(device=t.device) + self.fw_scores_buf = t.new() + self.lm_scores_buf = t.new() + + def combine_fw_bw(self, combine_method, fw_cum, bw, step): + if combine_method == "noisy_channel": + fw_norm = fw_cum.div(step + 1) + lprobs = bw + fw_norm + elif combine_method == "lm_only": + lprobs = bw + fw_cum + + return lprobs + + def step(self, step, fw_lprobs, scores, bw_lprobs, lm_lprobs, combine_method): + self._init_buffers(fw_lprobs) + bsz, beam_size, vocab_size = fw_lprobs.size() + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + fw_lprobs = fw_lprobs[:, ::beam_size, :].contiguous() + bw_lprobs = bw_lprobs[:, ::beam_size, :].contiguous() + # nothing to add since we are at the first step + fw_lprobs_cum = fw_lprobs + + else: + # make probs contain cumulative scores for each hypothesis + raw_scores = (scores[:, :, step - 1].unsqueeze(-1)) + fw_lprobs_cum = (fw_lprobs.add(raw_scores)) + + combined_lprobs = self.combine_fw_bw(combine_method, fw_lprobs_cum, bw_lprobs, step) + + # choose the top k according to the combined noisy channel model score + torch.topk( + combined_lprobs.view(bsz, -1), + k=min( + # Take the best 2 x beam_size predictions. We'll choose the first + # beam_size of these which don't predict eos to continue with. + beam_size * 2, + combined_lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad + ), + out=(self.scores_buf, self.indices_buf), + ) + # save corresponding fw and lm scores + self.fw_scores_buf = torch.gather(fw_lprobs_cum.view(bsz, -1), 1, self.indices_buf) + self.lm_scores_buf = torch.gather(lm_lprobs.view(bsz, -1), 1, self.indices_buf) + # Project back into relative indices and beams + self.beams_buf = self.indices_buf // vocab_size + self.indices_buf.fmod_(vocab_size) + return self.scores_buf, self.fw_scores_buf, self.lm_scores_buf, self.indices_buf, self.beams_buf diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py b/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py new file mode 100644 index 0000000..ea8fae9 --- /dev/null +++ b/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py @@ -0,0 +1,842 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional + +import math +import numpy as np + +import torch +import torch.nn.functional as F +from torch import Tensor + +from .noisy_channel_beam_search import NoisyChannelBeamSearch +from fairseq.sequence_generator import EnsembleModel + + +class NoisyChannelSequenceGenerator(object): + def __init__( + self, + combine_method, + tgt_dict, + src_dict=None, + beam_size=1, + max_len_a=0, + max_len_b=200, + min_len=1, + len_penalty=1.0, + unk_penalty=0.0, + retain_dropout=False, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + normalize_scores=True, + channel_models=None, + k2=10, + ch_weight=1.0, + channel_scoring_type='log_norm', + top_k_vocab=0, + lm_models=None, + lm_dict=None, + lm_weight=1.0, + normalize_lm_scores_by_tgt_len=False, + ): + """Generates translations of a given source sentence, + using beam search with noisy channel decoding. + + Args: + combine_method (string, optional): Method to combine direct, LM and + channel model scores (default: None) + tgt_dict (~fairseq.data.Dictionary): target dictionary + src_dict (~fairseq.data.Dictionary): source dictionary + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + retain_dropout (bool, optional): use dropout when generating + (default: False) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + no_repeat_ngram_size (int, optional): Size of n-grams that we avoid + repeating in the generation (default: 0) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + channel_models (List[~fairseq.models.FairseqModel]): ensemble of models + translating from the target to the source + k2 (int, optional): Top K2 candidates to score per beam at each step (default:10) + ch_weight (int, optional): Weight associated with the channel model score + assuming that the direct model score has weight 1.0 (default: 1.0) + channel_scoring_type (str, optional): String specifying how to score + the channel model (default: 'log_norm') + top_k_vocab (int, optional): If `channel_scoring_type` is `'src_vocab'` or + `'src_vocab_batched'`, then this parameter specifies the number of + most frequent tokens to include in the channel model output vocabulary, + in addition to the source tokens in the input batch (default: 0) + lm_models (List[~fairseq.models.FairseqModel]): ensemble of models + generating text in the target language + lm_dict (~fairseq.data.Dictionary): LM Model dictionary + lm_weight (int, optional): Weight associated with the LM model score + assuming that the direct model score has weight 1.0 (default: 1.0) + normalize_lm_scores_by_tgt_len (bool, optional): Should we normalize LM scores + by the target length? By default, we normalize the combination of + LM and channel model scores by the source length + """ + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.retain_dropout = retain_dropout + self.temperature = temperature + self.match_source_len = match_source_len + self.no_repeat_ngram_size = no_repeat_ngram_size + self.channel_models = channel_models + self.src_dict = src_dict + self.tgt_dict = tgt_dict + self.combine_method = combine_method + self.k2 = k2 + self.ch_weight = ch_weight + self.channel_scoring_type = channel_scoring_type + self.top_k_vocab = top_k_vocab + self.lm_models = lm_models + self.lm_dict = lm_dict + self.lm_weight = lm_weight + self.log_softmax_fn = torch.nn.LogSoftmax(dim=1) + self.normalize_lm_scores_by_tgt_len = normalize_lm_scores_by_tgt_len + + self.share_tgt_dict = (self.lm_dict == self.tgt_dict) + self.tgt_to_lm = make_dict2dict(tgt_dict, lm_dict) + + self.ch_scoring_bsz = 3072 + + assert temperature > 0, '--temperature must be greater than 0' + + self.search = NoisyChannelBeamSearch(tgt_dict) + + @torch.no_grad() + def generate( + self, + models, + sample, + prefix_tokens=None, + bos_token=None, + **kwargs + ): + """Generate a batch of translations. + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + """ + model = EnsembleModel(models) + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(model.models_size) + ], + ) + if not self.retain_dropout: + model.eval() + + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in sample['net_input'].items() + if k != 'prev_output_tokens' + } + src_tokens = encoder_input['src_tokens'] + src_lengths_no_eos = (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + input_size = src_tokens.size() + # batch dimension goes first followed by source lengths + bsz = input_size[0] + src_len = input_size[1] + beam_size = self.beam_size + + if self.match_source_len: + max_len = src_lengths_no_eos.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + # exclude the EOS marker + model.max_decoder_positions() - 1, + ) + + # compute the encoder output for each beam + encoder_outs = model.forward_encoder(encoder_input) + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = model.reorder_encoder_out(encoder_outs, new_order) + + src_lengths = encoder_input['src_lengths'] + # initialize buffers + scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0) + lm_prefix_scores = src_tokens.new(bsz * beam_size).float().fill_(0) + + scores_buf = scores.clone() + tokens = src_tokens.new(bsz * beam_size, max_len + 2).long().fill_(self.pad) + tokens_buf = tokens.clone() + tokens[:, 0] = self.eos if bos_token is None else bos_token + + # reorder source tokens so they may be used as a reference in generating P(S|T) + src_tokens = reorder_all_tokens(src_tokens, src_lengths, self.src_dict.eos_index) + + src_tokens = src_tokens.repeat(1, beam_size).view(-1, src_len) + src_lengths = src_lengths.view(bsz, -1).repeat(1, beam_size).view(bsz*beam_size, -1) + + attn, attn_buf = None, None + nonpad_idxs = None + + # The cands_to_ignore indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then the cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = src_tokens.new_zeros(bsz, beam_size).eq(-1) # forward and backward-compatible False mask + + # list of completed sentences + finalized = [[] for i in range(bsz)] + finished = [False for i in range(bsz)] + num_remaining_sent = bsz + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens) + cand_offsets = torch.arange(0, cand_size).type_as(tokens) + + # helper function for allocating buffers on the fly + buffers = {} + + def buffer(name, type_of=tokens): # noqa + if name not in buffers: + buffers[name] = type_of.new() + return buffers[name] + + def is_finished(sent, step, unfin_idx): + """ + Check whether we've finished generation for a given sentence, by + comparing the worst score among finalized hypotheses to the best + possible score among unfinalized hypotheses. + """ + assert len(finalized[sent]) <= beam_size + if len(finalized[sent]) == beam_size: + return True + return False + + def finalize_hypos(step, bbsz_idx, eos_scores, combined_noisy_channel_eos_scores): + """ + Finalize the given hypotheses at this step, while keeping the total + number of finalized hypotheses per sentence <= beam_size. + + Note: the input must be in the desired finalization order, so that + hypotheses that appear earlier in the input are preferred to those + that appear later. + + Args: + step: current time step + bbsz_idx: A vector of indices in the range [0, bsz*beam_size), + indicating which hypotheses to finalize + eos_scores: A vector of the same size as bbsz_idx containing + fw scores for each hypothesis + combined_noisy_channel_eos_scores: A vector of the same size as bbsz_idx containing + combined noisy channel scores for each hypothesis + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors + tokens_clone = tokens.index_select(0, bbsz_idx) + tokens_clone = tokens_clone[:, 1:step + 2] # skip the first index, which is EOS + assert not tokens_clone.eq(self.eos).any() + tokens_clone[:, step] = self.eos + attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step+2] if attn is not None else None + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, :step+1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + combined_noisy_channel_eos_scores /= (step + 1) ** self.len_penalty + + cum_unfin = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + + sents_seen = set() + for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), combined_noisy_channel_eos_scores.tolist())): + unfin_idx = idx // beam_size + sent = unfin_idx + cum_unfin[unfin_idx] + + sents_seen.add((sent, unfin_idx)) + + if self.match_source_len and step > src_lengths_no_eos[unfin_idx]: + score = -math.inf + + def get_hypo(): + + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i][nonpad_idxs[sent]] + _, alignment = hypo_attn.max(dim=0) + else: + hypo_attn = None + alignment = None + + return { + 'tokens': tokens_clone[i], + 'score': score, + 'attention': hypo_attn, # src_len x tgt_len + 'alignment': alignment, + 'positional_scores': pos_scores[i], + } + + if len(finalized[sent]) < beam_size: + finalized[sent].append(get_hypo()) + + newly_finished = [] + for sent, unfin_idx in sents_seen: + # check termination conditions for this sentence + if not finished[sent] and is_finished(sent, step, unfin_idx): + finished[sent] = True + newly_finished.append(unfin_idx) + return newly_finished + + def noisy_channel_rescoring(lprobs, beam_size, bsz, src_tokens, tokens, k): + """Rescore the top k hypothesis from each beam using noisy channel modeling + Returns: + new_fw_lprobs: the direct model probabilities after pruning the top k + new_ch_lm_lprobs: the combined channel and language model probabilities + new_lm_lprobs: the language model probabilities after pruning the top k + """ + with torch.no_grad(): + lprobs_size = lprobs.size() + if prefix_tokens is not None and step < prefix_tokens.size(1): + probs_slice = lprobs.view(bsz, -1, lprobs.size(-1))[:, 0, :] + cand_scores = torch.gather( + probs_slice, dim=1, + index=prefix_tokens[:, step].view(-1, 1).data + ).expand(-1, beam_size).contiguous().view(bsz*beam_size, 1) + cand_indices = prefix_tokens[:, step].view(-1, 1).expand(bsz, beam_size).data.contiguous().view(bsz*beam_size, 1) + + # need to calculate and save fw and lm probs for prefix tokens + fw_top_k = cand_scores + fw_top_k_idx = cand_indices + k = 1 + else: + # take the top k best words for every sentence in batch*beam + fw_top_k, fw_top_k_idx = torch.topk(lprobs.view(beam_size*bsz, -1), k=k) + eos_idx = torch.nonzero(fw_top_k_idx.view(bsz*beam_size*k, -1) == self.eos)[:, 0] + ch_scores = fw_top_k.new_full((beam_size*bsz*k, ), 0) + src_size = torch.sum(src_tokens[:, :] != self.src_dict.pad_index, dim=1, keepdim=True, dtype=fw_top_k.dtype) + + if self.combine_method != "lm_only": + temp_src_tokens_full = src_tokens[:, :].repeat(1, k).view(bsz*beam_size*k, -1) + not_padding = temp_src_tokens_full[:, 1:] != self.src_dict.pad_index + cur_tgt_size = step+2 + + # add eos to all candidate sentences except those that already end in eos + eos_tokens = tokens[:, 0].repeat(1, k).view(-1, 1) + eos_tokens[eos_idx] = self.tgt_dict.pad_index + + if step == 0: + channel_input = torch.cat((fw_top_k_idx.view(-1, 1), eos_tokens), 1) + else: + # move eos from beginning to end of target sentence + channel_input = torch.cat((tokens[:, 1:step + 1].repeat(1, k).view(-1, step), fw_top_k_idx.view(-1, 1), eos_tokens), 1) + + ch_input_lengths = torch.tensor(np.full(channel_input.size(0), cur_tgt_size)) + ch_input_lengths[eos_idx] = cur_tgt_size-1 + if self.channel_scoring_type == "unnormalized": + ch_encoder_output = channel_model.encoder(channel_input, src_lengths=ch_input_lengths) + ch_decoder_output, _ = channel_model.decoder(temp_src_tokens_full, encoder_out=ch_encoder_output, features_only=True) + del ch_encoder_output + ch_intermed_scores = channel_model.decoder.unnormalized_scores_given_target(ch_decoder_output, target_ids=temp_src_tokens_full[:, 1:]) + ch_intermed_scores = ch_intermed_scores.float() + ch_intermed_scores *= not_padding.float() + ch_scores = torch.sum(ch_intermed_scores, dim=1) + elif self.channel_scoring_type == "k2_separate": + for k_idx in range(k): + k_eos_tokens = eos_tokens[k_idx::k, :] + if step == 0: + k_ch_input = torch.cat((fw_top_k_idx[:, k_idx:k_idx+1], k_eos_tokens), 1) + else: + # move eos from beginning to end of target sentence + k_ch_input = torch.cat((tokens[:, 1:step + 1], fw_top_k_idx[:, k_idx:k_idx+1], k_eos_tokens), 1) + k_ch_input_lengths = ch_input_lengths[k_idx::k] + k_ch_output = channel_model(k_ch_input, k_ch_input_lengths, src_tokens) + k_ch_lprobs = channel_model.get_normalized_probs(k_ch_output, log_probs=True) + k_ch_intermed_scores = torch.gather(k_ch_lprobs[:, :-1, :], 2, src_tokens[:, 1:].unsqueeze(2)).squeeze(2) + k_ch_intermed_scores *= not_padding.float() + ch_scores[k_idx::k] = torch.sum(k_ch_intermed_scores, dim=1) + elif self.channel_scoring_type == "src_vocab": + ch_encoder_output = channel_model.encoder(channel_input, src_lengths=ch_input_lengths) + ch_decoder_output, _ = channel_model.decoder(temp_src_tokens_full, encoder_out=ch_encoder_output, features_only=True) + + del ch_encoder_output + ch_lprobs = normalized_scores_with_batch_vocab( + channel_model.decoder, + ch_decoder_output, src_tokens, k, bsz, beam_size, + self.src_dict.pad_index, top_k=self.top_k_vocab) + ch_scores = torch.sum(ch_lprobs, dim=1) + elif self.channel_scoring_type == "src_vocab_batched": + ch_bsz_size = temp_src_tokens_full.shape[0] + ch_lprobs_list = [None] * len(range(0, ch_bsz_size, self.ch_scoring_bsz)) + for i, start_idx in enumerate(range(0, ch_bsz_size, self.ch_scoring_bsz)): + end_idx = min(start_idx + self.ch_scoring_bsz, ch_bsz_size) + temp_src_tokens_full_batch = temp_src_tokens_full[start_idx:end_idx, :] + channel_input_batch = channel_input[start_idx:end_idx, :] + ch_input_lengths_batch = ch_input_lengths[start_idx:end_idx] + ch_encoder_output_batch = channel_model.encoder(channel_input_batch, src_lengths=ch_input_lengths_batch) + ch_decoder_output_batch, _ = channel_model.decoder(temp_src_tokens_full_batch, encoder_out=ch_encoder_output_batch, features_only=True) + ch_lprobs_list[i] = normalized_scores_with_batch_vocab( + channel_model.decoder, + ch_decoder_output_batch, src_tokens, k, bsz, beam_size, + self.src_dict.pad_index, top_k=self.top_k_vocab, + start_idx=start_idx, end_idx=end_idx) + ch_lprobs = torch.cat(ch_lprobs_list, dim=0) + ch_scores = torch.sum(ch_lprobs, dim=1) + else: + ch_output = channel_model(channel_input, ch_input_lengths, temp_src_tokens_full) + ch_lprobs = channel_model.get_normalized_probs(ch_output, log_probs=True) + ch_intermed_scores = torch.gather(ch_lprobs[:, :-1, :], 2, temp_src_tokens_full[:, 1:].unsqueeze(2)).squeeze().view(bsz*beam_size*k, -1) + ch_intermed_scores *= not_padding.float() + ch_scores = torch.sum(ch_intermed_scores, dim=1) + + else: + cur_tgt_size = 0 + ch_scores = ch_scores.view(bsz*beam_size, k) + expanded_lm_prefix_scores = lm_prefix_scores.unsqueeze(1).expand(-1, k).flatten() + + if self.share_tgt_dict: + lm_scores = get_lm_scores(lm, tokens[:, :step + 1].view(-1, step+1), lm_incremental_states, fw_top_k_idx.view(-1, 1), torch.tensor(np.full(tokens.size(0), step+1)), k) + else: + new_lm_input = dict2dict(tokens[:, :step + 1].view(-1, step+1), self.tgt_to_lm) + new_cands = dict2dict(fw_top_k_idx.view(-1, 1), self.tgt_to_lm) + lm_scores = get_lm_scores(lm, new_lm_input, lm_incremental_states, new_cands, torch.tensor(np.full(tokens.size(0), step+1)), k) + + lm_scores.add_(expanded_lm_prefix_scores) + ch_lm_scores = combine_ch_lm(self.combine_method, ch_scores, lm_scores, src_size, cur_tgt_size) + # initialize all as min value + new_fw_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1) + new_ch_lm_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1) + new_lm_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1) + new_fw_lprobs[:, self.pad] = -math.inf + new_ch_lm_lprobs[:, self.pad] = -math.inf + new_lm_lprobs[:, self.pad] = -math.inf + + new_fw_lprobs.scatter_(1, fw_top_k_idx, fw_top_k) + new_ch_lm_lprobs.scatter_(1, fw_top_k_idx, ch_lm_scores) + new_lm_lprobs.scatter_(1, fw_top_k_idx, lm_scores.view(-1, k)) + return new_fw_lprobs, new_ch_lm_lprobs, new_lm_lprobs + + def combine_ch_lm(combine_type, ch_scores, lm_scores1, src_size, tgt_size): + if self.channel_scoring_type == "unnormalized": + ch_scores = self.log_softmax_fn( + ch_scores.view(-1, self.beam_size * self.k2) + ).view(ch_scores.shape) + ch_scores = ch_scores * self.ch_weight + lm_scores1 = lm_scores1 * self.lm_weight + + if combine_type == "lm_only": + # log P(T|S) + log P(T) + ch_scores = lm_scores1.view(ch_scores.size()) + elif combine_type == "noisy_channel": + # 1/t log P(T|S) + 1/s log P(S|T) + 1/t log P(T) + if self.normalize_lm_scores_by_tgt_len: + ch_scores.div_(src_size) + lm_scores_norm = lm_scores1.view(ch_scores.size()).div(tgt_size) + ch_scores.add_(lm_scores_norm) + # 1/t log P(T|S) + 1/s log P(S|T) + 1/s log P(T) + else: + ch_scores.add_(lm_scores1.view(ch_scores.size())) + ch_scores.div_(src_size) + + return ch_scores + + if self.channel_models is not None: + channel_model = self.channel_models[0] # assume only one channel_model model + else: + channel_model = None + + lm = EnsembleModel(self.lm_models) + lm_incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(lm.models_size) + ], + ) + + reorder_state = None + batch_idxs = None + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs) + reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size) + model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = model.reorder_encoder_out(encoder_outs, reorder_state) + + lm.reorder_incremental_state(lm_incremental_states, reorder_state) + + fw_lprobs, avg_attn_scores = model.forward_decoder( + tokens[:, :step + 1], encoder_outs, incremental_states, temperature=self.temperature, + ) + + fw_lprobs[:, self.pad] = -math.inf # never select pad + fw_lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + fw_lprobs, ch_lm_lprobs, lm_lprobs = noisy_channel_rescoring(fw_lprobs, beam_size, bsz, src_tokens, tokens, self.k2) + + # handle min and max length constraints + if step >= max_len: + fw_lprobs[:, :self.eos] = -math.inf + fw_lprobs[:, self.eos + 1:] = -math.inf + elif step < self.min_len: + fw_lprobs[:, self.eos] = -math.inf + + # handle prefix tokens (possibly with different lengths) + if prefix_tokens is not None and step < prefix_tokens.size(1): + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_mask = prefix_toks.ne(self.pad) + + prefix_fw_lprobs = fw_lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + fw_lprobs[prefix_mask] = -math.inf + fw_lprobs[prefix_mask] = fw_lprobs[prefix_mask].scatter_( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_fw_lprobs + ) + + prefix_ch_lm_lprobs = ch_lm_lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + ch_lm_lprobs[prefix_mask] = -math.inf + ch_lm_lprobs[prefix_mask] = ch_lm_lprobs[prefix_mask].scatter_( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_ch_lm_lprobs + ) + + prefix_lm_lprobs = lm_lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + lm_lprobs[prefix_mask] = -math.inf + lm_lprobs[prefix_mask] = lm_lprobs[prefix_mask].scatter_( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lm_lprobs + ) + + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[:, 0, 1:step + 1] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + def replicate_first_beam(tensor, mask): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = replicate_first_beam(tokens, eos_mask_batch_dim) + scores = replicate_first_beam(scores, eos_mask_batch_dim) + + fw_lprobs = replicate_first_beam(fw_lprobs, eos_mask_batch_dim) + ch_lm_lprobs = replicate_first_beam(ch_lm_lprobs, eos_mask_batch_dim) + lm_lprobs = replicate_first_beam(lm_lprobs, eos_mask_batch_dim) + + if self.no_repeat_ngram_size > 0: + # for each beam and batch sentence, generate a list of previous ngrams + gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)] + for bbsz_idx in range(bsz * beam_size): + gen_tokens = tokens[bbsz_idx].tolist() + for ngram in zip(*[gen_tokens[i:] for i in range(self.no_repeat_ngram_size)]): + gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \ + gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]] + + # Record attention scores + if avg_attn_scores is not None: + if attn is None: + attn = scores.new(bsz * beam_size, src_tokens.size(1), max_len + 2) + attn_buf = attn.clone() + nonpad_idxs = src_tokens.ne(self.pad) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(fw_lprobs) + scores_buf = scores_buf.type_as(fw_lprobs) + + self.search.set_src_lengths(src_lengths_no_eos) + + if self.no_repeat_ngram_size > 0: + def calculate_banned_tokens(bbsz_idx): + # before decoding the next token, prevent decoding of ngrams that have already appeared + ngram_index = tuple(tokens[bbsz_idx, step + 2 - self.no_repeat_ngram_size:step + 1].tolist()) + return gen_ngrams[bbsz_idx].get(ngram_index, []) + + if step + 2 - self.no_repeat_ngram_size >= 0: + # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet + banned_tokens = [calculate_banned_tokens(bbsz_idx) for bbsz_idx in range(bsz * beam_size)] + else: + banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)] + + for bbsz_idx in range(bsz * beam_size): + fw_lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf + + combined_noisy_channel_scores, fw_lprobs_top_k, lm_lprobs_top_k, cand_indices, cand_beams = self.search.step( + step, + fw_lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], ch_lm_lprobs.view(bsz, -1, self.vocab_size), + lm_lprobs.view(bsz, -1, self.vocab_size), self.combine_method + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos (except for candidates to be ignored) + eos_mask = cand_indices.eq(self.eos) + eos_mask[:, :beam_size] &= ~cands_to_ignore + + # only consider eos when it's among the top beam_size indices + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = set() + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + fw_lprobs_top_k[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + combined_noisy_channel_eos_scores = torch.masked_select( + combined_noisy_channel_scores[:, :beam_size], + mask=eos_mask[:, :beam_size], + ) + + # finalize hypo using channel model score + finalized_sents = finalize_hypos( + step, eos_bbsz_idx, eos_scores, combined_noisy_channel_eos_scores) + + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = cand_indices.new_ones(bsz) + batch_mask[cand_indices.new(finalized_sents)] = 0 + batch_idxs = torch.nonzero(batch_mask).squeeze(-1) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + lm_lprobs_top_k = lm_lprobs_top_k[batch_idxs] + + fw_lprobs_top_k = fw_lprobs_top_k[batch_idxs] + cand_indices = cand_indices[batch_idxs] + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths_no_eos = src_lengths_no_eos[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + scores_buf.resize_as_(scores) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens_buf.resize_as_(tokens) + src_tokens = src_tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + src_lengths = src_lengths.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + lm_prefix_scores = lm_prefix_scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1).squeeze() + + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1) + attn_buf.resize_as_(attn) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos or + # ignored hypos and values < cand_size indicate candidate + # active hypos. After this, the min values per row are the top + # candidate active hypos. + eos_mask[:, :beam_size] |= cands_to_ignore + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just the hypos + # with the smallest values in active_mask + active_hypos, new_cands_to_ignore = buffer('active_hypos'), buffer('new_cands_to_ignore') + torch.topk( + active_mask, k=beam_size, dim=1, largest=False, + out=(new_cands_to_ignore, active_hypos) + ) + + # update cands_to_ignore to ignore any finalized hypos + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + assert (~cands_to_ignore).any(dim=1).all() + + active_bbsz_idx = buffer('active_bbsz_idx') + torch.gather( + cand_bbsz_idx, dim=1, index=active_hypos, + out=active_bbsz_idx, + ) + active_scores = torch.gather( + fw_lprobs_top_k, dim=1, index=active_hypos, + out=scores[:, step].view(bsz, beam_size), + ) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + torch.index_select( + tokens[:, :step + 1], dim=0, index=active_bbsz_idx, + out=tokens_buf[:, :step + 1], + ) + torch.gather( + cand_indices, dim=1, index=active_hypos, + out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1], + ) + if step > 0: + torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx, + out=scores_buf[:, :step], + ) + torch.gather( + fw_lprobs_top_k, dim=1, index=active_hypos, + out=scores_buf.view(bsz, beam_size, -1)[:, :, step], + ) + torch.gather( + lm_lprobs_top_k, dim=1, index=active_hypos, + out=lm_prefix_scores.view(bsz, beam_size) + ) + + # copy attention for active hypotheses + if attn is not None: + torch.index_select( + attn[:, :, :step + 2], dim=0, index=active_bbsz_idx, + out=attn_buf[:, :, :step + 2], + ) + + # swap buffers + tokens, tokens_buf = tokens_buf, tokens + scores, scores_buf = scores_buf, scores + if attn is not None: + attn, attn_buf = attn_buf, attn + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True) + + return finalized + + +def get_lm_scores(model, input_tokens, incremental_states, cand_tokens, input_len, k): + with torch.no_grad(): + lm_lprobs, avg_attn_scores = model.forward_decoder( + input_tokens, encoder_outs=None, incremental_states=incremental_states, + ) + + lm_lprobs_size = lm_lprobs.size(0) + probs_next_wrd = torch.gather(lm_lprobs.repeat(1, k).view(lm_lprobs_size*k, -1), 1, cand_tokens).squeeze().view(-1) + + return probs_next_wrd + + +def make_dict2dict(old_dict, new_dict): + dict2dict_map = {} + for sym in old_dict.symbols: + dict2dict_map[old_dict.index(sym)] = new_dict.index(sym) + return dict2dict_map + + +def dict2dict(tokens, dict2dict_map): + if tokens.device == torch.device('cpu'): + tokens_tmp = tokens + else: + tokens_tmp = tokens.cpu() + return tokens_tmp.map_( + tokens_tmp, + lambda _, val, dict2dict_map=dict2dict_map : dict2dict_map[float(val)] + ).to(tokens.device) + + +def reorder_tokens(tokens, lengths, eos): + # reorder source tokens so they may be used as reference for P(S|T) + return torch.cat((tokens.new([eos]), tokens[-lengths:-1], tokens[:-lengths]), 0) + + +def reorder_all_tokens(tokens, lengths, eos): + # used to reorder src tokens from [ .. ] to [ ...] + # so source tokens can be used to predict P(S|T) + return torch.stack([reorder_tokens(token, length, eos) for token, length in zip(tokens, lengths)]) + + +def normalized_scores_with_batch_vocab( + model_decoder, features, target_ids, k, bsz, beam_size, + pad_idx, top_k=0, vocab_size_meter=None, start_idx=None, + end_idx=None, **kwargs): + """ + Get normalized probabilities (or log probs) from a net's output + w.r.t. vocab consisting of target IDs in the batch + """ + if model_decoder.adaptive_softmax is None: + weight = model_decoder.output_projection.weight + vocab_ids = torch.unique( + torch.cat( + (torch.unique(target_ids), torch.arange(top_k, device=target_ids.device)) + ) + ) + id_map = dict(zip(vocab_ids.tolist(), range(len(vocab_ids)))) + mapped_target_ids = target_ids.cpu().apply_( + lambda x, id_map=id_map: id_map[x] + ).to(target_ids.device) + expanded_target_ids = mapped_target_ids[:, :].repeat(1, k).view(bsz*beam_size*k, -1) + if start_idx is not None and end_idx is not None: + expanded_target_ids = expanded_target_ids[start_idx:end_idx, :] + logits = F.linear(features, weight[vocab_ids, :]) + log_softmax = F.log_softmax(logits, dim=-1, dtype=torch.float32) + intermed_scores = torch.gather( + log_softmax[:, :-1, :], + 2, + expanded_target_ids[:, 1:].unsqueeze(2), + ).squeeze() + not_padding = expanded_target_ids[:, 1:] != pad_idx + intermed_scores *= not_padding.float() + return intermed_scores + else: + raise ValueError("adaptive softmax doesn't work with " + + "`normalized_scores_with_batch_vocab()`") diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py b/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py new file mode 100644 index 0000000..b74bdfd --- /dev/null +++ b/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py @@ -0,0 +1,127 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.tasks.translation import TranslationTask +from fairseq.tasks.language_modeling import LanguageModelingTask +from fairseq import checkpoint_utils +import argparse +from fairseq.tasks import register_task +import torch + + +@register_task("noisy_channel_translation") +class NoisyChannelTranslation(TranslationTask): + """ + Rescore the top k candidates from each beam using noisy channel modeling + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + TranslationTask.add_args(parser) + # fmt: off + parser.add_argument('--channel-model', metavar='FILE', + help='path to P(S|T) model. P(S|T) and P(T|S) must share source and target dictionaries.') + parser.add_argument('--combine-method', default='lm_only', + choices=['lm_only', 'noisy_channel'], + help="""method for combining direct and channel model scores. + lm_only: decode with P(T|S)P(T) + noisy_channel: decode with 1/t P(T|S) + 1/s(P(S|T)P(T))""") + parser.add_argument('--normalize-lm-scores-by-tgt-len', action='store_true', default=False, + help='normalize lm score by target length instead of source length') + parser.add_argument('--channel-scoring-type', default='log_norm', choices=['unnormalized', 'log_norm', 'k2_separate', 'src_vocab', 'src_vocab_batched'], + help="Normalize bw scores with log softmax or return bw scores without log softmax") + parser.add_argument('--top-k-vocab', default=0, type=int, + help='top k vocab IDs to use with `src_vocab` in channel model scoring') + parser.add_argument('--k2', default=50, type=int, + help='the top k2 candidates to rescore with the noisy channel model for each beam') + parser.add_argument('--ch-wt', default=1, type=float, + help='weight for the channel model') + parser.add_argument('--lm-model', metavar='FILE', + help='path to lm model file, to model P(T). P(T) must share the same vocab as the direct model on the target side') + parser.add_argument('--lm-data', metavar='FILE', + help='path to lm model training data for target language, used to properly load LM with correct dictionary') + parser.add_argument('--lm-wt', default=1, type=float, + help='the weight of the lm in joint decoding') + # fmt: on + + def build_generator( + self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None + ): + if getattr(args, "score_reference", False): + raise NotImplementedError() + else: + from .noisy_channel_sequence_generator import NoisyChannelSequenceGenerator + use_cuda = torch.cuda.is_available() and not self.args.cpu + assert self.args.lm_model is not None, '--lm-model required for noisy channel generation!' + assert self.args.lm_data is not None, '--lm-data required for noisy channel generation to map between LM and bitext vocabs' + if self.args.channel_model is not None: + import copy + ch_args_task = copy.deepcopy(self.args) + tmp = ch_args_task.source_lang + ch_args_task.source_lang = ch_args_task.target_lang + ch_args_task.target_lang = tmp + ch_args_task._name = 'translation' + channel_task = TranslationTask.setup_task(ch_args_task) + + arg_dict = {} + arg_dict['task'] = 'language_modeling' + arg_dict['sample_break_mode'] = 'eos' + arg_dict['data'] = self.args.lm_data + arg_dict['output_dictionary_size'] = -1 + lm_args = argparse.Namespace(**arg_dict) + lm_task = LanguageModelingTask.setup_task(lm_args) + lm_dict = lm_task.output_dictionary + + if self.args.channel_model is not None: + channel_models, _ = checkpoint_utils.load_model_ensemble(self.args.channel_model.split(':'), task=channel_task) + + for model in channel_models: + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if self.args.fp16: + model.half() + if use_cuda: + model.cuda() + else: + channel_models = None + + lm_models, _ = checkpoint_utils.load_model_ensemble(self.args.lm_model.split(':'), task=lm_task) + + for model in lm_models: + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if self.args.fp16: + model.half() + if use_cuda: + model.cuda() + return NoisyChannelSequenceGenerator( + combine_method=self.args.combine_method, + tgt_dict=self.target_dictionary, + src_dict=self.source_dictionary, + beam_size=getattr(args, 'beam', 5), + max_len_a=getattr(args, 'max_len_a', 0), + max_len_b=getattr(args, 'max_len_b', 200), + min_len=getattr(args, 'min_len', 1), + len_penalty=getattr(args, 'lenpen', 1), + unk_penalty=getattr(args, 'unkpen', 0), + temperature=getattr(args, 'temperature', 1.), + match_source_len=getattr(args, 'match_source_len', False), + no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0), + normalize_scores=(not getattr(args, 'unnormalized', False)), + channel_models=channel_models, + k2=getattr(self.args, 'k2', 50), + ch_weight=getattr(self.args, 'ch_wt', 1), + channel_scoring_type=self.args.channel_scoring_type, + top_k_vocab=self.args.top_k_vocab, + lm_models=lm_models, + lm_dict=lm_dict, + lm_weight=getattr(self.args, 'lm_wt', 1), + normalize_lm_scores_by_tgt_len=getattr(self.args, 'normalize_lm_scores_by_tgt_len', False), + ) diff --git a/fairseq/examples/flores101/README.md b/fairseq/examples/flores101/README.md new file mode 100644 index 0000000..635c13f --- /dev/null +++ b/fairseq/examples/flores101/README.md @@ -0,0 +1,223 @@ +

+ +

+ +# Flores101: Large-Scale Multilingual Machine Translation + +## Introduction + +Baseline pretrained models for small and large tracks of WMT 21 Large-Scale Multilingual Machine Translation competition. + +Flores Task at WMT 21: http://www.statmt.org/wmt21/large-scale-multilingual-translation-task.html + +Flores announement blog post: https://ai.facebook.com/blog/flores-researchers-kick-off-multilingual-translation-challenge-at-wmt-and-call-for-compute-grants/ + + + +## Pretrained models + +Model | Num layers | Embed dimension | FFN dimension| Vocab Size | #params | Download +---|---|---|---|---|---|--- +`flores101_mm100_615M` | 12 | 1024 | 4096 | 256,000 | 615M | https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz +`flores101_mm100_175M` | 6 | 512 | 2048 | 256,000 | 175M | https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_175M.tar.gz + + +These models are trained similar to [M2M-100](https://arxiv.org/abs/2010.11125) with additional support for the languages that are part of the WMT Large-Scale Multilingual Machine Translation track. Full list of languages can be found at the bottom. + + +## Example Generation code + +### Download model, sentencepiece vocab + +```bash +fairseq=/path/to/fairseq +cd $fairseq + +# Download 615M param model. +wget https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz + +# Extract +tar -xvzf flores101_mm100_615M.tar.gz +``` + +### Encode using our SentencePiece Model +Note: Install SentencePiece from [here](https://github.com/google/sentencepiece) + + +```bash +fairseq=/path/to/fairseq +cd $fairseq + +# Download example dataset From German to French +sacrebleu --echo src -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.de +sacrebleu --echo ref -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.fr + +for lang in de fr ; do + python scripts/spm_encode.py \ + --model flores101_mm100_615M/sentencepiece.bpe.model \ + --output_format=piece \ + --inputs=raw_input.de-fr.${lang} \ + --outputs=spm.de-fr.${lang} +done +``` + +### Binarization + +```bash +fairseq-preprocess \ + --source-lang de --target-lang fr \ + --testpref spm.de-fr \ + --thresholdsrc 0 --thresholdtgt 0 \ + --destdir data_bin \ + --srcdict flores101_mm100_615M/dict.txt --tgtdict flores101_mm100_615M/dict.txt +``` + +### Generation + + +```bash +fairseq-generate \ + data_bin \ + --batch-size 1 \ + --path flores101_mm100_615M/model.pt \ + --fixed-dictionary flores101_mm100_615M/dict.txt \ + -s de -t fr \ + --remove-bpe 'sentencepiece' \ + --beam 5 \ + --task translation_multi_simple_epoch \ + --lang-pairs flores101_mm100_615M/language_pairs.txt \ + --decoder-langtok --encoder-langtok src \ + --gen-subset test \ + --fp16 \ + --dataset-impl mmap \ + --distributed-world-size 1 --distributed-no-spawn +``` + +### Supported Languages and lang code + +Language | lang code +---|--- +Akrikaans | af +Amharic | am +Arabic | ar +Assamese | as +Asturian | ast +Aymara | ay +Azerbaijani | az +Bashkir | ba +Belarusian | be +Bulgarian | bg +Bengali | bn +Breton | br +Bosnian | bs +Catalan | ca +Cebuano | ceb +Chokwe | cjk +Czech | cs +Welsh | cy +Danish | da +German | de +Dyula| dyu +Greek | el +English | en +Spanish | es +Estonian | et +Persian | fa +Fulah | ff +Finnish | fi +French | fr +Western Frisian | fy +Irish | ga +Scottish Gaelic | gd +Galician | gl +Gujarati | gu +Hausa | ha +Hebrew | he +Hindi | hi +Croatian | hr +Haitian Creole | ht +Hungarian | hu +Armenian | hy +Indonesian | id +Igbo | ig +Iloko | ilo +Icelandic | is +Italian | it +Japanese | ja +Javanese | jv +Georgian | ka +Kachin | kac +Kamba | kam +Kabuverdianu | kea +Kongo | kg +Kazakh | kk +Central Khmer | km +Kimbundu | kmb +Northern Kurdish | kmr +Kannada | kn +Korean | ko +Kurdish | ku +Kyrgyz | ky +Luxembourgish | lb +Ganda | lg +Lingala | ln +Lao | lo +Lithuanian | lt +Luo | luo +Latvian | lv +Malagasy | mg +Maori | mi +Macedonian | mk +Malayalam | ml +Mongolian | mn +Marathi | mr +Malay | ms +Maltese | mt +Burmese | my +Nepali | ne +Dutch | nl +Norwegian | no +Northern Sotho | ns +Nyanja | ny +Occitan | oc +Oromo | om +Oriya | or +Punjabi | pa +Polish | pl +Pashto | ps +Portuguese | pt +Quechua | qu +Romanian | ro +Russian | ru +Sindhi | sd +Shan | shn +Sinhala | si +Slovak | sk +Slovenian | sl +Shona | sn +Somali | so +Albanian | sq +Serbian | sr +Swati | ss +Sundanese | su +Swedish | sv +Swahili | sw +Tamil | ta +Telugu | te +Tajik | tg +Thai | th +Tigrinya | ti +Tagalog | tl +Tswana | tn +Turkish | tr +Ukrainian | uk +Umbundu | umb +Urdu | ur +Uzbek | uz +Vietnamese | vi +Wolof | wo +Xhosa | xh +Yiddish | yi +Yoruba | yo +Chinese| zh +Zulu | zu diff --git a/fairseq/examples/fully_sharded_data_parallel/README.md b/fairseq/examples/fully_sharded_data_parallel/README.md new file mode 100644 index 0000000..b9e44fe --- /dev/null +++ b/fairseq/examples/fully_sharded_data_parallel/README.md @@ -0,0 +1,177 @@ +# Fully Sharded Data Parallel (FSDP) + +## Overview +Recent work by [Microsoft](https://arxiv.org/abs/1910.02054) and +[Google](https://arxiv.org/abs/2004.13336) has shown that data parallel +training can be made significantly more efficient by sharding the model +parameters and optimizer state across data parallel workers. These ideas are +encapsulated in the new **`FullyShardedDataParallel` (FSDP)** wrapper provided +by [fairscale](https://github.com/facebookresearch/fairscale/). + +Compared to PyTorch DDP: +* FSDP produces identical results as PyTorch DDP (it's still synchronous data parallel training) +* FSDP shards parameters (FP16 + FP32) and optimizer state across data parallel GPUs +* FSDP is faster than PyTorch DDP because the optimizer step is sharded, and the communication can be overlapped with the forward pass +* FSDP enables training 13B parameter models on 8 GPUs and 175B parameter models on 128 GPUs + +FSDP is fully supported in fairseq via the following new arguments: +* `--ddp-backend=fully_sharded`: enables full sharding via FSDP +* `--cpu-offload`: offloads the optimizer state and FP32 model copy to CPU (combine with `--optimizer=cpu_adam`) +* `--no-reshard-after-forward`: increases training speed for large models (1B+ params) and is similar to ZeRO stage 2 +* other popular options (`--fp16`, `--update-freq`, `--checkpoint-activations`, `--offload-activations`, etc.) continue to work as normal + +
Limitations

+ +FSDP currently has several limitations compared to fairseq's default DDP backend (PyTorch DDP): +* while FSDP is full compatible with pointwise Optimizers (e.g., Adam, AdamW, Adadelta, Adamax, SGD, etc.), it is not currently compatible with non-pointwise Optimizers (e.g., Adagrad, Adafactor, LAMB, etc.) +* FSDP depends on flattening the parameters, so models that currently require `--fp16-no-flatten-grads` may not be supported + +See the [fairscale docs](https://fairscale.readthedocs.io/en/latest/api/nn/fsdp_tips.html) for a more detailed +explanation of these and other limitations. + +

+ +
How it works

+ +Fully Sharded Data Parallel + +See the [fairscale docs](https://fairscale.readthedocs.io/en/latest/api/nn/fsdp_tips.html) for a more detailed +explanation of how FSDP works. + +

+ +## Example usage + +The following examples illustrate how to train a very large language model with +13 billion parameters on 1 GPU by offloading parameters and optimizer states to +CPU, or on 8 GPUs by fully sharding the params and optimizer states across GPUs. + +These examples use the WikiText-103 dataset for demonstration purposes, but +in practice a much larger dataset will be needed to achieve good results. +Follow the [instructions here](https://github.com/pytorch/fairseq/blob/main/examples/roberta/README.pretraining.md#1-preprocess-the-data) +to preprocess the WikiText-103 dataset using the GPT-2/RoBERTa vocabulary. + +### 13B params on 1 V100 GPU (with CPU offloading) + +The following command trains a 13B parameter GPT-3 model on a single V100 GPU +using the `--cpu-offload` feature to offload parameters and optimizer states to +CPU. In this setting, the optimizer step (Adam) happens on CPU. We also use the +`--checkpoint-activations` feature (sometimes called [gradient checkpointing](https://pytorch.org/docs/stable/checkpoint.html)), +which further saves memory in exchange for a small increase in computation. + +**Requirements:** +- Install the latest master version of fairscale: `pip install git+https://github.com/facebookresearch/fairscale.git@master` +- You'll need 32GB of GPU memory and ~256GB of system memory to train the 13B param model. +- If you have less system memory, the 6.7B param model can be trained with ~128GB of system memory, just set `--arch transformer_lm_gpt3_6_7` +- We use the CPU Adam optimizer from [DeepSpeed](https://github.com/microsoft/DeepSpeed), so you'll need to `pip install deepspeed` before running the command. + +**Notes:** +- The command will take ~5 minutes to start training, during which time it will appear to be hung, since randomly initializing 13B weights can be slow. +- The `--cpu-offload` feature requires training in mixed precision (`--fp16`). +- Tune the `OMP_NUM_THREADS` env variable for best performance with CPU offloading. +- The example command below stops training after 10 steps (`--max-update 10`) and does not save checkpoints (`--no-save`). + +```bash +OMP_NUM_THREADS=20 CUDA_VISIBLE_DEVICES=0 \ + fairseq-train data-bin/wikitext-103-roberta-bpe-bin \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 2048 --batch-size 8 \ + --arch transformer_lm_gpt3_13 \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 10 --no-save --log-format json --log-interval 1 +``` + +
Example output

+ +``` +(...) +2021-03-08 12:29:51 | INFO | fairseq_cli.train | num. model params: 13,110,865,920 (num. trained: 13,110,865,920) +(...) +2021-03-08 12:29:51 | INFO | fairseq_cli.train | training on 1 devices (GPUs/TPUs) +2021-03-08 12:29:51 | INFO | fairseq_cli.train | max tokens per GPU = None and batch size per GPU = 8 +(...) +Adam Optimizer #0 is created with AVX2 arithmetic capability. +Config: alpha=0.000100, betas=(0.900000, 0.980000), weight_decay=0.000000, adam_w=1 +(...) +2021-03-08 12:31:36 | INFO | train_inner | {"epoch": 1, "update": 0.0, "loss": "16.475", "ppl": "91120.8", "wps": "0", "ups": "0", "wpb": "16384", "bsz": "8", "num_updates": "1", "lr": "2e-05", "gnorm": "20.751", "loss_scale": "4", "train_wall": "99", "gb_free": "9.3", "wall": "105"} +2021-03-08 12:32:33 | INFO | train_inner | {"epoch": 1, "update": 0.0, "loss": "16.446", "ppl": "89281.6", "wps": "288.7", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "2", "lr": "4e-05", "gnorm": "19.777", "loss_scale": "4", "train_wall": "57", "gb_free": "9.3", "wall": "161"} +2021-03-08 12:33:12 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +2021-03-08 12:33:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +2021-03-08 12:34:45 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "25.22", "ppl": "3.90691e+07", "wps": "123.4", "ups": "0.01", "wpb": "16384", "bsz": "8", "num_updates": "3", "lr": "6e-05", "gnorm": "131.281", "loss_scale": "1", "train_wall": "133", "gb_free": "9.3", "wall": "294"} +2021-03-08 12:35:43 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.079", "ppl": "276809", "wps": "285.5", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "4", "lr": "8e-05", "gnorm": "13.776", "loss_scale": "1", "train_wall": "57", "gb_free": "9.3", "wall": "351"} +2021-03-08 12:36:35 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "23.729", "ppl": "1.39088e+07", "wps": "316.7", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "5", "lr": "0.0001", "gnorm": "72.774", "loss_scale": "1", "train_wall": "52", "gb_free": "9.3", "wall": "403"} +2021-03-08 12:37:28 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "20.429", "ppl": "1.41203e+06", "wps": "307.6", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "6", "lr": "8e-05", "gnorm": "60.846", "loss_scale": "1", "train_wall": "53", "gb_free": "9.3", "wall": "456"} +2021-03-08 12:38:27 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.965", "ppl": "511684", "wps": "279.4", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "7", "lr": "6e-05", "gnorm": "22.687", "loss_scale": "1", "train_wall": "59", "gb_free": "9.3", "wall": "515"} +2021-03-08 12:39:18 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.345", "ppl": "332887", "wps": "319.1", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "8", "lr": "4e-05", "gnorm": "8.451", "loss_scale": "1", "train_wall": "51", "gb_free": "9.3", "wall": "566"} +2021-03-08 12:40:11 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "18.262", "ppl": "314336", "wps": "305.9", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "9", "lr": "2e-05", "gnorm": "6.457", "loss_scale": "1", "train_wall": "54", "gb_free": "9.3", "wall": "620"} +2021-03-08 12:41:04 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "17.556", "ppl": "192686", "wps": "311.8", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "10", "lr": "0", "gnorm": "5.796", "loss_scale": "1", "train_wall": "53", "gb_free": "9.3", "wall": "673"} +2021-03-08 12:41:04 | INFO | fairseq_cli.train | Stopping training due to num_updates: 10 >= max_update: 10 +2021-03-08 12:41:04 | INFO | fairseq_cli.train | begin validation on "valid" subset +2021-03-08 12:43:15 | INFO | valid | {"epoch": 1, "valid_loss": "17.953", "valid_ppl": "253807", "valid_wps": "1868.4", "valid_wpb": "15400.2", "valid_bsz": "7.6", "valid_num_updates": "10"} +2021-03-08 12:43:15 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below) +2021-03-08 12:43:15 | INFO | train | {"epoch": 1, "train_loss": "19.351", "train_ppl": "668509", "train_wps": "210.9", "train_ups": "0.01", "train_wpb": "16384", "train_bsz": "8", "train_num_updates": "10", "train_lr": "0", "train_gnorm": "36.26", "train_loss_scale": "1", "train_train_wall": "667", "train_gb_free": "9.3", "train_wall": "804"} +2021-03-08 12:43:15 | INFO | fairseq_cli.train | done training in 798.6 seconds +``` + +

+ +### 13B params on 8 V100 GPUs (with full parameter + optimizer state sharding) + +FSDP can also shard the parameters and optimizer states across multiple GPUs, +reducing memory requirements significantly. On 8 x 32GB GPUs, sharding enables +training the same 13B parameter model *without offloading the parameters to +CPU*. However, without CPU offloading we'd only be able to fit a batch size of +1 per GPU, which would cause training speed to suffer. + +We obtain the best performance on 8 GPUs by combining full sharding and CPU +offloading. The following command trains the same 13B parameter GPT-3 model as +before on 8 x 32GB V100 GPUs; training speed increases superlinearly from ~310 +words per second to ~3200 words per second. + +```bash +OMP_NUM_THREADS=20 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ + fairseq-train data-bin/wikitext-103-roberta-bpe-bin \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 2048 --batch-size 8 \ + --arch transformer_lm_gpt3_13 \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 10 --no-save --log-format json --log-interval 1 +``` + +
Example output

+ +``` +(...) +2021-03-08 18:04:09 | INFO | fairseq_cli.train | num. model params: 13,110,865,920 (num. trained: 13,110,865,920) +(...) +2021-03-08 18:04:09 | INFO | fairseq_cli.train | training on 8 devices (GPUs/TPUs) +2021-03-08 18:04:09 | INFO | fairseq_cli.train | max tokens per GPU = None and batch size per GPU = 8 +(...) +Adam Optimizer #0 is created with AVX2 arithmetic capability. +Config: alpha=0.000100, betas=(0.900000, 0.980000), weight_decay=0.000000, adam_w=1 +(...) +2021-03-08 18:05:06 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "16.408", "ppl": "86945.6", "wps": "0", "ups": "0", "wpb": "131072", "bsz": "64", "num_updates": "1", "lr": "2e-05", "gnorm": "18.27", "loss_scale": "4", "train_wall": "47", "gb_free": "9.3", "wall": "56"} +2021-03-08 18:05:45 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "16.352", "ppl": "83644.3", "wps": "3283.4", "ups": "0.03", "wpb": "131072", "bsz": "64", "num_updates": "2", "lr": "4e-05", "gnorm": "18.411", "loss_scale": "4", "train_wall": "40", "gb_free": "9.3", "wall": "96"} +2021-03-08 18:06:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +2021-03-08 18:06:56 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +2021-03-08 18:07:37 | INFO | train_inner | {"epoch": 1, "update": 0.006, "loss": "23.682", "ppl": "1.34537e+07", "wps": "1176.6", "ups": "0.01", "wpb": "131072", "bsz": "64", "num_updates": "3", "lr": "6e-05", "gnorm": "119.682", "loss_scale": "1", "train_wall": "111", "gb_free": "9.3", "wall": "208"} +2021-03-08 18:08:18 | INFO | train_inner | {"epoch": 1, "update": 0.007, "loss": "18.988", "ppl": "519921", "wps": "3189.1", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "4", "lr": "8e-05", "gnorm": "14.934", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "249"} +2021-03-08 18:08:59 | INFO | train_inner | {"epoch": 1, "update": 0.008, "loss": "20.08", "ppl": "1.10798e+06", "wps": "3223.1", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "5", "lr": "0.0001", "gnorm": "59.92", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "289"} +2021-03-08 18:09:39 | INFO | train_inner | {"epoch": 1, "update": 0.009, "loss": "18.323", "ppl": "327980", "wps": "3256.6", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "6", "lr": "8e-05", "gnorm": "37.425", "loss_scale": "1", "train_wall": "40", "gb_free": "9.3", "wall": "330"} +2021-03-08 18:10:20 | INFO | train_inner | {"epoch": 1, "update": 0.01, "loss": "17.264", "ppl": "157354", "wps": "3188.7", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "7", "lr": "6e-05", "gnorm": "10.824", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "371"} +2021-03-08 18:11:01 | INFO | train_inner | {"epoch": 1, "update": 0.011, "loss": "16.794", "ppl": "113647", "wps": "3230", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "8", "lr": "4e-05", "gnorm": "5.616", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "411"} +2021-03-08 18:11:39 | INFO | train_inner | {"epoch": 1, "update": 0.012, "loss": "16.706", "ppl": "106938", "wps": "3384", "ups": "0.03", "wpb": "131072", "bsz": "64", "num_updates": "9", "lr": "2e-05", "gnorm": "5.318", "loss_scale": "1", "train_wall": "39", "gb_free": "9.3", "wall": "450"} +2021-03-08 18:12:19 | INFO | train_inner | {"epoch": 1, "update": 0.013, "loss": "16.548", "ppl": "95796.2", "wps": "3274.4", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "10", "lr": "0", "gnorm": "5.22", "loss_scale": "1", "train_wall": "40", "gb_free": "9.3", "wall": "490"} +2021-03-08 18:12:19 | INFO | fairseq_cli.train | Stopping training due to num_updates: 10 >= max_update: 10 +2021-03-08 18:12:19 | INFO | fairseq_cli.train | begin validation on "valid" subset +2021-03-08 18:12:45 | INFO | valid | {"epoch": 1, "valid_loss": "16.624", "valid_ppl": "101000", "valid_wps": "10855.9", "valid_wpb": "123202", "valid_bsz": "60.5", "valid_num_updates": "10"} +2021-03-08 18:12:45 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below) +2021-03-08 18:12:45 | INFO | train | {"epoch": 1, "train_loss": "18.114", "train_ppl": "283776", "train_wps": "2567.8", "train_ups": "0.02", "train_wpb": "131072", "train_bsz": "64", "train_num_updates": "10", "train_lr": "0", "train_gnorm": "29.562", "train_loss_scale": "1", "train_train_wall": "480", "train_gb_free": "9.3", "train_wall": "516"} +2021-03-08 18:12:45 | INFO | fairseq_cli.train | done training in 509.9 seconds +``` + +

diff --git a/fairseq/examples/gottbert/README.md b/fairseq/examples/gottbert/README.md new file mode 100644 index 0000000..1d58feb --- /dev/null +++ b/fairseq/examples/gottbert/README.md @@ -0,0 +1,64 @@ +# GottBERT: a pure German language model + +## Introduction + +[GottBERT](http://arxiv.org/abs/2012.02110) is a pretrained language model trained on 145GB of German text based on RoBERTa. + +## Example usage + +### fairseq +##### Load GottBERT from torch.hub (PyTorch >= 1.1): +```python +import torch +gottbert = torch.hub.load('pytorch/fairseq', 'gottbert-base') +gottbert.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Load GottBERT (for PyTorch 1.0 or custom models): +```python +# Download gottbert model +wget https://dl.gottbert.de/fairseq/models/gottbert-base.tar.gz +tar -xzvf gottbert.tar.gz + +# Load the model in fairseq +from fairseq.models.roberta import GottbertModel +gottbert = GottbertModel.from_pretrained('/path/to/gottbert') +gottbert.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Filling masks: +```python +masked_line = 'Gott ist ! :)' +gottbert.fill_mask(masked_line, topk=3) +# [('Gott ist gut ! :)', 0.3642110526561737, ' gut'), +# ('Gott ist überall ! :)', 0.06009674072265625, ' überall'), +# ('Gott ist großartig ! :)', 0.0370681993663311, ' großartig')] +``` + +##### Extract features from GottBERT + +```python +# Extract the last layer's features +line = "Der erste Schluck aus dem Becher der Naturwissenschaft macht atheistisch , aber auf dem Grunde des Bechers wartet Gott !" +tokens = gottbert.encode(line) +last_layer_features = gottbert.extract_features(tokens) +assert last_layer_features.size() == torch.Size([1, 27, 768]) + +# Extract all layer's features (layer 0 is the embedding layer) +all_layers = gottbert.extract_features(tokens, return_all_hiddens=True) +assert len(all_layers) == 13 +assert torch.all(all_layers[-1] == last_layer_features) +``` +## Citation +If you use our work, please cite: + +```bibtex +@misc{scheible2020gottbert, + title={GottBERT: a pure German Language Model}, + author={Raphael Scheible and Fabian Thomczyk and Patric Tippmann and Victor Jaravine and Martin Boeker}, + year={2020}, + eprint={2012.02110}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` diff --git a/fairseq/examples/hubert/README.md b/fairseq/examples/hubert/README.md new file mode 100644 index 0000000..6695d81 --- /dev/null +++ b/fairseq/examples/hubert/README.md @@ -0,0 +1,116 @@ +# HuBERT + +## Pre-trained and fine-tuned (ASR) models +Model | Pretraining Data | Finetuning Dataset | Model | Quantizer +|---|---|---|---|--- +HuBERT Base (~95M params) | [Librispeech](http://www.openslr.org/12) 960 hr | No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt) | [L9 km500](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960_L9_km500.bin) +HuBERT Large (~316M params) | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt) +HuBERT Extra Large (~1B params) | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k.pt) +HuBERT Large | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k_finetune_ls960.pt) +HuBERT Extra Large | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k_finetune_ls960.pt) + +## Load a model +``` +ckpt_path = "/path/to/the/checkpoint.pt" +models, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path]) +model = models[0] +``` + +## Train a new model + +### Data preparation + +Follow the steps in `./simple_kmeans` to create: +- `{train,valid}.tsv` waveform list files +- `{train,valid}.km` frame-aligned pseudo label files. +- `dict.km.txt` a dummy dictionary +The `label_rate` is the same as the feature frame rate used for clustering, +which is 100Hz for MFCC features and 50Hz for HuBERT features by default. + +### Pre-train a HuBERT model + +Suppose `{train,valid}.tsv` are saved at `/path/to/data`, `{train,valid}.km` +are saved at `/path/to/labels`, and the label rate is 100Hz. + +To train a base model (12 layer transformer), run: +```sh +$ python fairseq_cli/hydra_train.py \ + --config-dir /path/to/fairseq-py/examples/hubert/config/pretrain \ + --config-name hubert_base_librispeech \ + task.data=/path/to/data task.label_dir=/path/to/labels task.labels='["km"]' model.label_rate=100 +``` + +### Fine-tune a HuBERT model with a CTC loss + +Suppose `{train,valid}.tsv` are saved at `/path/to/data`, and their +corresponding character transcripts `{train,valid}.ltr` are saved at +`/path/to/trans`. + +To fine-tune a pre-trained HuBERT model at `/path/to/checkpoint`, run +```sh +$ python fairseq_cli/hydra_train.py \ + --config-dir /path/to/fairseq-py/examples/hubert/config/finetune \ + --config-name base_10h \ + task.data=/path/to/data task.label_dir=/path/to/trans \ + model.w2v_path=/path/to/checkpoint +``` + +### Decode a HuBERT model + +Suppose the `test.tsv` and `test.ltr` are the waveform list and transcripts of +the split to be decoded, saved at `/path/to/data`, and the fine-tuned model is +saved at `/path/to/checkpoint`. We support three decoding modes: +- Viterbi decoding: greedy decoding without a language model +- KenLM decoding: decoding with an arpa-format KenLM n-gram language model +- Fairseq-LM deocding: decoding with a Fairseq neural language model + + +#### Viterbi decoding + +`task.normalize` needs to be consistent with the value used during fine-tuning. +Decoding results will be saved at +`/path/to/experiment/directory/decode/viterbi/test`. + +```sh +$ python examples/speech_recognition/new/infer.py \ + --config-dir /path/to/fairseq-py/examples/hubert/config/decode \ + --config-name infer_viterbi \ + task.data=/path/to/data \ + task.normalize=[true|false] \ + decoding.exp_dir=/path/to/experiment/directory \ + common_eval.path=/path/to/checkpoint + dataset.gen_subset=test \ +``` + +#### KenLM / Fairseq-LM decoding + +Suppose the pronunciation lexicon and the n-gram LM are saved at +`/path/to/lexicon` and `/path/to/arpa`, respectively. Decoding results will be +saved at `/path/to/experiment/directory/decode/kenlm/test`. + +```sh +$ python examples/speech_recognition/new/infer.py \ + --config-dir /path/to/fairseq-py/examples/hubert/config/decode \ + --config-name infer_kenlm \ + task.data=/path/to/data \ + task.normalize=[true|false] \ + decoding.exp_dir=/path/to/experiment/directory \ + common_eval.path=/path/to/checkpoint + dataset.gen_subset=test \ + decoding.decoder.lexicon=/path/to/lexicon \ + decoding.decoder.lmpath=/path/to/arpa +``` + +The command above uses the default decoding hyperparameter, which can be found +in `examples/speech_recognition/hydra/decoder.py`. These parameters can be +configured from the command line. For example, to search with a beam size of +500, we can append the command above with `decoding.decoder.beam=500`. +Important parameters include: +- decoding.decoder.beam +- decoding.decoder.beamthreshold +- decoding.decoder.lmweight +- decoding.decoder.wordscore +- decoding.decoder.silweight + +To decode with a Fairseq LM, use `--config-name infer_fsqlm` instead, and +change the path of lexicon and LM accordingly. diff --git a/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml b/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml new file mode 100644 index 0000000..5a02df1 --- /dev/null +++ b/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml @@ -0,0 +1,33 @@ +# @package _global_ + +common_eval: + results_path: ${decoding.exp_dir}/decode/${decoding.decoder.name}_ax/${dataset.gen_subset} + +hydra: + sweeper: + ax_config: + max_trials: 60 + early_stop: + minimize: true + max_epochs_without_improvement: 10 + epsilon: 0.025 + experiment: + name: ${dataset.gen_subset} + objective_name: wer + minimize: true + parameter_constraints: null + outcome_constraints: null + status_quo: null + client: + verbose_logging: false + random_seed: null + params: + decoding.decoder.lmweight: + type: range + bounds: [0.0, 8.0] + decoding.decoder.wordscore: + type: range + bounds: [-5.0, 5.0] + decoding.decoder.silweight: + type: range + bounds: [-10.0, 0.0] diff --git a/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml b/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml new file mode 100644 index 0000000..85ed3bd --- /dev/null +++ b/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml @@ -0,0 +1,33 @@ +# @package _global_ + +common_eval: + results_path: ${decoding.exp_dir}/decode/${decoding.decoder.name}_ax/${dataset.gen_subset} + +hydra: + sweeper: + ax_config: + max_trials: 60 + early_stop: + minimize: true + max_epochs_without_improvement: 10 + epsilon: 0.025 + experiment: + name: ${dataset.gen_subset} + objective_name: wer + minimize: true + parameter_constraints: null + outcome_constraints: null + status_quo: null + client: + verbose_logging: false + random_seed: null + params: + decoding.decoder.lmweight: + type: range + bounds: [0.0, 4.0] + decoding.decoder.wordscore: + type: range + bounds: [-5.0, 5.0] + decoding.decoder.silweight: + type: range + bounds: [-8.0, 0.0] diff --git a/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml b/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml new file mode 100644 index 0000000..026ad8d --- /dev/null +++ b/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml @@ -0,0 +1,36 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + sweep: + dir: ${common_eval.results_path} + subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + +task: + _name: hubert_pretraining + single_target: true + fine_tuning: true + data: ??? + normalize: ??? + +decoding: + type: fairseqlm + lexicon: ??? + lmpath: ??? + beamthreshold: 25 + beam: 500 + lmweight: 2 + wordscore: -1 + silweight: 0 + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/fairseq/examples/hubert/config/decode/infer_kenlm.yaml b/fairseq/examples/hubert/config/decode/infer_kenlm.yaml new file mode 100644 index 0000000..04642ae --- /dev/null +++ b/fairseq/examples/hubert/config/decode/infer_kenlm.yaml @@ -0,0 +1,36 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + sweep: + dir: ${common_eval.results_path} + subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + +task: + _name: hubert_pretraining + single_target: true + fine_tuning: true + data: ??? + normalize: ??? + +decoding: + type: kenlm + lexicon: ??? + lmpath: ??? + beamthreshold: 100 + beam: 500 + lmweight: 2 + wordscore: -1 + silweight: 0 + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/fairseq/examples/hubert/config/decode/infer_viterbi.yaml b/fairseq/examples/hubert/config/decode/infer_viterbi.yaml new file mode 100644 index 0000000..4afc74c --- /dev/null +++ b/fairseq/examples/hubert/config/decode/infer_viterbi.yaml @@ -0,0 +1,29 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/viterbi + sweep: + dir: ${common_eval.results_path} + subdir: viterbi + +task: + _name: hubert_pretraining + single_target: true + fine_tuning: true + data: ??? + normalize: ??? + +decoding: + type: viterbi + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml b/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml new file mode 100644 index 0000000..0b80658 --- /dev/null +++ b/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml @@ -0,0 +1,17 @@ +# @package _global_ +hydra: + launcher: + cpus_per_task: ${distributed_training.distributed_world_size} + gpus_per_node: ${distributed_training.distributed_world_size} + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 1 + mem_gb: 200 + timeout_min: 4320 + max_num_timeout: 50 + name: ${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/submitit + +distributed_training: + distributed_world_size: 1 + distributed_no_spawn: true + distributed_port: 29761 diff --git a/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml b/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml new file mode 100644 index 0000000..2f669f3 --- /dev/null +++ b/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml @@ -0,0 +1,17 @@ +# @package _global_ +hydra: + launcher: + cpus_per_task: ${distributed_training.distributed_world_size} + gpus_per_node: ${distributed_training.distributed_world_size} + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 1 + mem_gb: 200 + timeout_min: 4320 + max_num_timeout: 50 + name: ${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/submitit + +distributed_training: + distributed_world_size: 8 + distributed_no_spawn: true + distributed_port: 29761 diff --git a/fairseq/examples/hubert/config/finetune/base_10h.yaml b/fairseq/examples/hubert/config/finetune/base_10h.yaml new file mode 100644 index 0000000..a22c7c0 --- /dev/null +++ b/fairseq/examples/hubert/config/finetune/base_10h.yaml @@ -0,0 +1,100 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 5 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 3200000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train + valid_subset: valid + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 25000 + lr: [2e-5] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: hubert_ctc + w2v_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml b/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml new file mode 100644 index 0000000..2af96b3 --- /dev/null +++ b/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml @@ -0,0 +1,7 @@ +# @package _global_ + +task: + normalize: false + +model: + w2v_path: /checkpoint/wnhsu/w2v/hubert_final/iter1/hubert.km.randcrop.pmw1_0.puw0_0.grpnorm.ml10.mp0_8.untie.mxsz250000.ufreq1.maxtok1400000.MU400k.s1337.ngpu32/checkpoint_last.pt diff --git a/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml b/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml new file mode 100644 index 0000000..8c7728a --- /dev/null +++ b/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml @@ -0,0 +1,7 @@ +# @package _global_ + +criterion: + wer_kenlm_model: /checkpoint/abdo/old_checkpoint02/datasets/librispeech/4-gram.bin + wer_lexicon: /checkpoint/abdo/old_checkpoint02/datasets/librispeech/10h/raw/lexicon_ltr.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 diff --git a/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml b/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml new file mode 100644 index 0000000..2750950 --- /dev/null +++ b/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +hydra: + launcher: + cpus_per_task: 8 + gpus_per_node: 8 + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 1 + comment: null + mem_gb: 384 + timeout_min: 4320 + max_num_timeout: 100 + constraint: volta32gb + name: ${hydra.job.config_name}/${hydra.job.override_dirname} + submitit_folder: ${hydra.sweep.dir}/submitit/%j + +distributed_training: + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 diff --git a/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml b/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml new file mode 100644 index 0000000..bd84461 --- /dev/null +++ b/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml @@ -0,0 +1,97 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: hubert_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: hubert + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml b/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml new file mode 100644 index 0000000..a5192b5 --- /dev/null +++ b/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 128 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: hubert_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + +dataset: + num_workers: 6 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0015] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: hubert + label_rate: ??? + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + final_dim: 768 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + run: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + sweep: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml b/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml new file mode 100644 index 0000000..34e8f2b --- /dev/null +++ b/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 256 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: hubert_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + +dataset: + num_workers: 6 + max_tokens: 360000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.003] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: hubert + label_rate: ??? + encoder_layers: 48 + encoder_embed_dim: 1280 + encoder_ffn_embed_dim: 5120 + encoder_attention_heads: 16 + final_dim: 1024 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + run: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + sweep: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml b/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml new file mode 100644 index 0000000..46c979c --- /dev/null +++ b/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +hydra: + launcher: + cpus_per_task: 8 + gpus_per_node: 8 + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 4 + comment: null + mem_gb: 384 + timeout_min: 4320 + max_num_timeout: 100 + constraint: volta32gb + name: ${hydra.job.config_name}/${hydra.job.override_dirname} + submitit_folder: ${hydra.sweep.dir}/submitit/%j + +distributed_training: + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 diff --git a/fairseq/examples/hubert/measure_teacher_quality.py b/fairseq/examples/hubert/measure_teacher_quality.py new file mode 100644 index 0000000..92279b2 --- /dev/null +++ b/fairseq/examples/hubert/measure_teacher_quality.py @@ -0,0 +1,241 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import os.path as op +import re +from tabulate import tabulate +from collections import Counter + + +def comp_purity(p_xy, axis): + max_p = p_xy.max(axis=axis) + marg_p = p_xy.sum(axis=axis) + indv_pur = max_p / marg_p + aggr_pur = max_p.sum() + return indv_pur, aggr_pur + + +def comp_entropy(p): + return (-p * np.log(p + 1e-8)).sum() + + +def comp_norm_mutual_info(p_xy): + p_x = p_xy.sum(axis=1, keepdims=True) + p_y = p_xy.sum(axis=0, keepdims=True) + pmi = np.log(p_xy / np.matmul(p_x, p_y) + 1e-8) + mi = (p_xy * pmi).sum() + h_x = comp_entropy(p_x) + h_y = comp_entropy(p_y) + return mi, mi / h_x, mi / h_y, h_x, h_y + + +def pad(labs, n): + if n == 0: + return np.array(labs) + return np.concatenate([[labs[0]] * n, labs, [labs[-1]] * n]) + + +def comp_avg_seg_dur(labs_list): + n_frms = 0 + n_segs = 0 + for labs in labs_list: + labs = np.array(labs) + edges = np.zeros(len(labs)).astype(bool) + edges[0] = True + edges[1:] = labs[1:] != labs[:-1] + n_frms += len(edges) + n_segs += edges.astype(int).sum() + return n_frms / n_segs + + +def comp_joint_prob(uid2refs, uid2hyps): + """ + Args: + pad: padding for spliced-feature derived labels + """ + cnts = Counter() + skipped = [] + abs_frmdiff = 0 + for uid in uid2refs: + if uid not in uid2hyps: + skipped.append(uid) + continue + refs = uid2refs[uid] + hyps = uid2hyps[uid] + abs_frmdiff += abs(len(refs) - len(hyps)) + min_len = min(len(refs), len(hyps)) + refs = refs[:min_len] + hyps = hyps[:min_len] + cnts.update(zip(refs, hyps)) + tot = sum(cnts.values()) + + ref_set = sorted({ref for ref, _ in cnts.keys()}) + hyp_set = sorted({hyp for _, hyp in cnts.keys()}) + ref2pid = dict(zip(ref_set, range(len(ref_set)))) + hyp2lid = dict(zip(hyp_set, range(len(hyp_set)))) + # print(hyp_set) + p_xy = np.zeros((len(ref2pid), len(hyp2lid)), dtype=float) + for (ref, hyp), cnt in cnts.items(): + p_xy[ref2pid[ref], hyp2lid[hyp]] = cnt + p_xy /= p_xy.sum() + return p_xy, ref2pid, hyp2lid, tot, abs_frmdiff, skipped + + +def read_phn(tsv_path, rm_stress=True): + uid2phns = {} + with open(tsv_path) as f: + for line in f: + uid, phns = line.rstrip().split("\t") + phns = phns.split(",") + if rm_stress: + phns = [re.sub("[0-9]", "", phn) for phn in phns] + uid2phns[uid] = phns + return uid2phns + + +def read_lab(tsv_path, lab_path, pad_len=0, upsample=1): + """ + tsv is needed to retrieve the uids for the labels + """ + with open(tsv_path) as f: + f.readline() + uids = [op.splitext(op.basename(line.rstrip().split()[0]))[0] for line in f] + with open(lab_path) as f: + labs_list = [pad(line.rstrip().split(), pad_len).repeat(upsample) for line in f] + assert len(uids) == len(labs_list) + return dict(zip(uids, labs_list)) + + +def main_lab_lab( + tsv_dir, + lab_dir, + lab_name, + lab_sets, + ref_dir, + ref_name, + pad_len=0, + upsample=1, + verbose=False, +): + # assume tsv_dir is the same for both the reference and the hypotheses + tsv_dir = lab_dir if tsv_dir is None else tsv_dir + + uid2refs = {} + for s in lab_sets: + uid2refs.update(read_lab(f"{tsv_dir}/{s}.tsv", f"{ref_dir}/{s}.{ref_name}")) + + uid2hyps = {} + for s in lab_sets: + uid2hyps.update( + read_lab( + f"{tsv_dir}/{s}.tsv", f"{lab_dir}/{s}.{lab_name}", pad_len, upsample + ) + ) + _main(uid2refs, uid2hyps, verbose) + + +def main_phn_lab( + tsv_dir, + lab_dir, + lab_name, + lab_sets, + phn_dir, + phn_sets, + pad_len=0, + upsample=1, + verbose=False, +): + uid2refs = {} + for s in phn_sets: + uid2refs.update(read_phn(f"{phn_dir}/{s}.tsv")) + + uid2hyps = {} + tsv_dir = lab_dir if tsv_dir is None else tsv_dir + for s in lab_sets: + uid2hyps.update( + read_lab( + f"{tsv_dir}/{s}.tsv", f"{lab_dir}/{s}.{lab_name}", pad_len, upsample + ) + ) + _main(uid2refs, uid2hyps, verbose) + + +def _main(uid2refs, uid2hyps, verbose): + (p_xy, ref2pid, hyp2lid, tot, frmdiff, skipped) = comp_joint_prob( + uid2refs, uid2hyps + ) + ref_pur_by_hyp, ref_pur = comp_purity(p_xy, axis=0) + hyp_pur_by_ref, hyp_pur = comp_purity(p_xy, axis=1) + (mi, mi_norm_by_ref, mi_norm_by_hyp, h_ref, h_hyp) = comp_norm_mutual_info(p_xy) + outputs = { + "ref pur": ref_pur, + "hyp pur": hyp_pur, + "H(ref)": h_ref, + "H(hyp)": h_hyp, + "MI": mi, + "MI/H(ref)": mi_norm_by_ref, + "ref segL": comp_avg_seg_dur(uid2refs.values()), + "hyp segL": comp_avg_seg_dur(uid2hyps.values()), + "p_xy shape": p_xy.shape, + "frm tot": tot, + "frm diff": frmdiff, + "utt tot": len(uid2refs), + "utt miss": len(skipped), + } + print(tabulate([outputs.values()], outputs.keys(), floatfmt=".4f")) + + +if __name__ == "__main__": + """ + compute quality of labels with respect to phone or another labels if set + """ + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("tsv_dir") + parser.add_argument("lab_dir") + parser.add_argument("lab_name") + parser.add_argument("--lab_sets", default=["valid"], type=str, nargs="+") + parser.add_argument( + "--phn_dir", + default="/checkpoint/wnhsu/data/librispeech/960h/fa/raw_phn/phone_frame_align_v1", + ) + parser.add_argument( + "--phn_sets", default=["dev-clean", "dev-other"], type=str, nargs="+" + ) + parser.add_argument("--pad_len", default=0, type=int, help="padding for hypotheses") + parser.add_argument( + "--upsample", default=1, type=int, help="upsample factor for hypotheses" + ) + parser.add_argument("--ref_lab_dir", default="") + parser.add_argument("--ref_lab_name", default="") + parser.add_argument("--verbose", action="store_true") + args = parser.parse_args() + + if args.ref_lab_dir and args.ref_lab_name: + main_lab_lab( + args.tsv_dir, + args.lab_dir, + args.lab_name, + args.lab_sets, + args.ref_lab_dir, + args.ref_lab_name, + args.pad_len, + args.upsample, + args.verbose, + ) + else: + main_phn_lab( + args.tsv_dir, + args.lab_dir, + args.lab_name, + args.lab_sets, + args.phn_dir, + args.phn_sets, + args.pad_len, + args.upsample, + args.verbose, + ) diff --git a/fairseq/examples/hubert/simple_kmeans/README.md b/fairseq/examples/hubert/simple_kmeans/README.md new file mode 100644 index 0000000..847475c --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/README.md @@ -0,0 +1,80 @@ +# Sharded Feature Extraction and K-means Application + +This folder contains scripts for preparing HUBERT labels from tsv files, the +steps are: +1. feature extraction +2. k-means clustering +3. k-means application + + +## Data preparation + +`*.tsv` files contains a list of audio, where each line is the root, and +following lines are the subpath for each audio: +``` + + + +... +``` + + +## Feature extraction + +### MFCC feature +Suppose the tsv file is at `${tsv_dir}/${split}.tsv`. To extract 39-D +mfcc+delta+ddelta features for the 1st iteration HUBERT training, run: +```sh +python dump_mfcc_feature.py ${tsv_dir} ${split} ${nshard} ${rank} ${feat_dir} +``` +This would shard the tsv file into `${nshard}` and extract features for the +`${rank}`-th shard, where rank is an integer in `[0, nshard-1]`. Features would +be saved at `${feat_dir}/${split}_${rank}_${nshard}.{npy,len}`. + + +### HUBERT feature +To extract features from the `${layer}`-th transformer layer of a trained +HUBERT model saved at `${ckpt_path}`, run: +```sh +python dump_hubert_feature.py ${tsv_dir} ${split} ${ckpt_path} ${layer} ${nshard} ${rank} ${feat_dir} +``` +Features would also be saved at `${feat_dir}/${split}_${rank}_${nshard}.{npy,len}`. + +- if out-of-memory, decrease the chunk size with `--max_chunk` + + +## K-means clustering +To fit a k-means model with `${n_clusters}` clusters on 10% of the `${split}` data, run +```sh +python learn_kmeans.py ${feat_dir} ${split} ${nshard} ${km_path} ${n_cluster} --percent 0.1 +``` +This saves the k-means model to `${km_path}`. + +- set `--precent -1` to use all data +- more kmeans options can be found with `-h` flag + + +## K-means application +To apply a trained k-means model `${km_path}` to obtain labels for `${split}`, run +```sh +python dump_km_label.py ${feat_dir} ${split} ${km_path} ${nshard} ${rank} ${lab_dir} +``` +This would extract labels for the `${rank}`-th shard out of `${nshard}` shards +and dump them to `${lab_dir}/${split}_${rank}_${shard}.km` + + +Finally, merge shards for `${split}` by running +```sh +for rank in $(seq 0 $((nshard - 1))); do + cat $lab_dir/${split}_${rank}_${nshard}.km +done > $lab_dir/${split}.km +``` + + +## Create a dummy dict +To create a dummy dictionary, run +```sh +for x in $(seq 0 $((n_clusters - 1))); do + echo "$x 1" +done >> $lab_dir/dict.km.txt +``` diff --git a/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py new file mode 100644 index 0000000..7ea4ea0 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py @@ -0,0 +1,93 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import fairseq +import soundfile as sf +import torch +import torch.nn.functional as F + +from feature_utils import get_path_iterator, dump_feature +from fairseq.data.audio.audio_utils import get_features_or_waveform + + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("dump_hubert_feature") + + +class HubertFeatureReader(object): + def __init__(self, ckpt_path, layer, max_chunk=1600000): + ( + model, + cfg, + task, + ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path]) + self.model = model[0].eval().cuda() + self.task = task + self.layer = layer + self.max_chunk = max_chunk + logger.info(f"TASK CONFIG:\n{self.task.cfg}") + logger.info(f" max_chunk = {self.max_chunk}") + + def read_audio(self, path, ref_len=None): + wav = get_features_or_waveform(path, need_waveform=True, use_sample_rate=self.task.cfg.sample_rate) + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + if ref_len is not None and abs(ref_len - len(wav)) > 160: + logging.warning(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + def get_feats(self, path, ref_len=None): + x = self.read_audio(path, ref_len=ref_len) + with torch.no_grad(): + x = torch.from_numpy(x).float().cuda() + if self.task.cfg.normalize: + x = F.layer_norm(x, x.shape) + x = x.view(1, -1) + + feat = [] + for start in range(0, x.size(1), self.max_chunk): + x_chunk = x[:, start : start + self.max_chunk] + feat_chunk, _ = self.model.extract_features( + source=x_chunk, + padding_mask=None, + mask=False, + output_layer=self.layer, + ) + feat.append(feat_chunk) + return torch.cat(feat, 1).squeeze(0) + + +def main(tsv_dir, split, ckpt_path, layer, nshard, rank, feat_dir, max_chunk): + reader = HubertFeatureReader(ckpt_path, layer, max_chunk) + generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank) + dump_feature(reader, generator, num, split, nshard, rank, feat_dir) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("tsv_dir") + parser.add_argument("split") + parser.add_argument("ckpt_path") + parser.add_argument("layer", type=int) + parser.add_argument("nshard", type=int) + parser.add_argument("rank", type=int) + parser.add_argument("feat_dir") + parser.add_argument("--max_chunk", type=int, default=1600000) + args = parser.parse_args() + logger.info(args) + + main(**vars(args)) diff --git a/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py new file mode 100644 index 0000000..941bc1b --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py @@ -0,0 +1,95 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import csv +import io +import logging +import os +import os.path as op +import sys + +from dump_hubert_feature import HubertFeatureReader +from feature_utils import get_shard_range, dump_feature +from fairseq.data.audio.audio_utils import get_features_or_waveform + + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("dump_hubert_feature_s2t") + + +class HubertFeatureReaderS2T(HubertFeatureReader): + def read_audio(self, path, ref_len=None): + wav = get_features_or_waveform( + path, need_waveform=True, use_sample_rate=self.task.cfg.sample_rate + ) + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + if ref_len is not None and abs(ref_len - len(wav)) > 160: + logging.warning(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + +def get_path_iterator(root, tsv, nshard, rank, audio_col_name): + with open(tsv) as f: + reader = csv.DictReader( + f, + delimiter="\t", + quotechar=None, + doublequote=False, + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + subpaths = [op.join(root, e[audio_col_name]) for e in reader] + start, end = get_shard_range(len(subpaths), nshard, rank) + subpaths = subpaths[start:end] + + def iterate(): + for subpath in subpaths: + yield op.join(root, subpath), None + + return iterate, len(subpaths) + + +def main( + root, + tsv_path, + ckpt_path, + layer, + nshard, + rank, + feat_dir, + split, + max_chunk, + audio_col_name, +): + reader = HubertFeatureReaderS2T(ckpt_path, layer, max_chunk) + generator, num = get_path_iterator(root, tsv_path, nshard, rank, audio_col_name) + dump_feature(reader, generator, num, split, nshard, rank, feat_dir) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("root") + parser.add_argument("tsv_path") + parser.add_argument("ckpt_path") + parser.add_argument("layer", type=int) + parser.add_argument("nshard", type=int) + parser.add_argument("rank", type=int) + parser.add_argument("feat_dir") + parser.add_argument("split") + parser.add_argument("--audio_col_name", type=str, default="audio") + parser.add_argument("--max_chunk", type=int, default=1600000) + args = parser.parse_args() + logger.info(args) + + main(**vars(args)) diff --git a/fairseq/examples/hubert/simple_kmeans/dump_km_label.py b/fairseq/examples/hubert/simple_kmeans/dump_km_label.py new file mode 100644 index 0000000..8871307 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/dump_km_label.py @@ -0,0 +1,98 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import numpy as np + +import joblib +import torch +import tqdm + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("dump_km_label") + + +class ApplyKmeans(object): + def __init__(self, km_path): + self.km_model = joblib.load(km_path) + self.C_np = self.km_model.cluster_centers_.transpose() + self.Cnorm_np = (self.C_np ** 2).sum(0, keepdims=True) + + self.C = torch.from_numpy(self.C_np) + self.Cnorm = torch.from_numpy(self.Cnorm_np) + if torch.cuda.is_available(): + self.C = self.C.cuda() + self.Cnorm = self.Cnorm.cuda() + + def __call__(self, x): + if isinstance(x, torch.Tensor): + dist = ( + x.pow(2).sum(1, keepdim=True) + - 2 * torch.matmul(x, self.C) + + self.Cnorm + ) + return dist.argmin(dim=1).cpu().numpy() + else: + dist = ( + (x ** 2).sum(1, keepdims=True) + - 2 * np.matmul(x, self.C_np) + + self.Cnorm_np + ) + return np.argmin(dist, axis=1) + + +def get_feat_iterator(feat_dir, split, nshard, rank): + feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy" + leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len" + with open(leng_path, "r") as f: + lengs = [int(line.rstrip()) for line in f] + offsets = [0] + np.cumsum(lengs[:-1]).tolist() + + def iterate(): + feat = np.load(feat_path, mmap_mode="r") + assert feat.shape[0] == (offsets[-1] + lengs[-1]) + for offset, leng in zip(offsets, lengs): + yield feat[offset: offset + leng] + + return iterate, len(lengs) + + +def dump_label(feat_dir, split, km_path, nshard, rank, lab_dir): + apply_kmeans = ApplyKmeans(km_path) + generator, num = get_feat_iterator(feat_dir, split, nshard, rank) + iterator = generator() + + lab_path = f"{lab_dir}/{split}_{rank}_{nshard}.km" + os.makedirs(lab_dir, exist_ok=True) + with open(lab_path, "w") as f: + for feat in tqdm.tqdm(iterator, total=num): + # feat = torch.from_numpy(feat).cuda() + lab = apply_kmeans(feat).tolist() + f.write(" ".join(map(str, lab)) + "\n") + logger.info("finished successfully") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("feat_dir") + parser.add_argument("split") + parser.add_argument("km_path") + parser.add_argument("nshard", type=int) + parser.add_argument("rank", type=int) + parser.add_argument("lab_dir") + args = parser.parse_args() + logging.info(str(args)) + + dump_label(**vars(args)) diff --git a/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py new file mode 100644 index 0000000..c353778 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py @@ -0,0 +1,74 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import soundfile as sf +import torch +import torchaudio + +from feature_utils import get_path_iterator, dump_feature +from fairseq.data.audio.audio_utils import get_features_or_waveform + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("dump_mfcc_feature") + + +class MfccFeatureReader(object): + def __init__(self, sample_rate): + self.sample_rate = sample_rate + + def read_audio(self, path, ref_len=None): + wav = get_features_or_waveform(path, need_waveform=True, use_sample_rate=self.sample_rate) + if ref_len is not None and abs(ref_len - len(wav)) > 160: + logging.warning(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + def get_feats(self, path, ref_len=None): + x = self.read_audio(path, ref_len=ref_len) + with torch.no_grad(): + x = torch.from_numpy(x).float() + x = x.view(1, -1) + + mfccs = torchaudio.compliance.kaldi.mfcc( + waveform=x, + sample_frequency=self.sample_rate, + use_energy=False, + ) # (time, freq) + mfccs = mfccs.transpose(0, 1) # (freq, time) + deltas = torchaudio.functional.compute_deltas(mfccs) + ddeltas = torchaudio.functional.compute_deltas(deltas) + concat = torch.cat([mfccs, deltas, ddeltas], dim=0) + concat = concat.transpose(0, 1).contiguous() # (freq, time) + return concat + + +def main(tsv_dir, split, nshard, rank, feat_dir, sample_rate): + reader = MfccFeatureReader(sample_rate) + generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank) + dump_feature(reader, generator, num, split, nshard, rank, feat_dir) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("tsv_dir") + parser.add_argument("split") + parser.add_argument("nshard", type=int) + parser.add_argument("rank", type=int) + parser.add_argument("feat_dir") + parser.add_argument("--sample_rate", type=int, default=16000) + args = parser.parse_args() + logger.info(args) + + main(**vars(args)) diff --git a/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py new file mode 100644 index 0000000..a1f0d90 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py @@ -0,0 +1,95 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import fairseq +import soundfile as sf +import torch +import torch.nn.functional as F + +from feature_utils import get_path_iterator, dump_feature + + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("dump_w2v2_feature") + + +class Wav2Vec2FeatureReader(object): + def __init__(self, ckpt_path, layer, max_chunk=1600000): + ( + model, + cfg, + task, + ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path]) + self.model = model[0].eval().cuda() + self.task = task + self.layer = layer # assume this is 1-based like HuBERT + self.max_chunk = max_chunk + logger.info(f"TASK CONFIG:\n{self.task.cfg}") + logger.info(f" max_chunk = {self.max_chunk}") + logger.info(f" model:\n{self.model}") + + def read_audio(self, path, ref_len=None): + wav, sr = sf.read(path) + assert sr == self.task.cfg.sample_rate, sr + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + if ref_len is not None and abs(ref_len - len(wav)) > 160: + logging.warning(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + def get_feats(self, path, ref_len=None): + x = self.read_audio(path, ref_len) + with torch.no_grad(): + x = torch.from_numpy(x).float().cuda() + if self.task.cfg.normalize: + x = F.layer_norm(x, x.shape) + x = x.view(1, -1) + + feat = [] + for start in range(0, x.size(1), self.max_chunk): + x_chunk = x[:, start: start + self.max_chunk] + res = self.model.extract_features( + source=x_chunk, + padding_mask=None, + mask=False, + layer=self.layer - 1, + ) + feat_chunk = res["x"] + feat.append(feat_chunk) + return torch.cat(feat, 1).squeeze(0) + + +def main(tsv_dir, split, ckpt_path, layer, nshard, rank, feat_dir, max_chunk): + reader = Wav2Vec2FeatureReader(ckpt_path, layer, max_chunk) + generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank) + dump_feature(reader, generator, num, split, nshard, rank, feat_dir) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("tsv_dir") + parser.add_argument("split") + parser.add_argument("ckpt_path") + parser.add_argument("layer", type=int) + parser.add_argument("nshard", type=int) + parser.add_argument("rank", type=int) + parser.add_argument("feat_dir") + parser.add_argument("--max_chunk", type=int, default=1600000) + args = parser.parse_args() + logger.info(args) + + main(**vars(args)) diff --git a/fairseq/examples/hubert/simple_kmeans/feature_utils.py b/fairseq/examples/hubert/simple_kmeans/feature_utils.py new file mode 100644 index 0000000..f80bc45 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/feature_utils.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import tqdm +from npy_append_array import NpyAppendArray + + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("feature_utils") + + +def get_shard_range(tot, nshard, rank): + assert rank < nshard and rank >= 0, f"invaid rank/nshard {rank}/{nshard}" + start = round(tot / nshard * rank) + end = round(tot / nshard * (rank + 1)) + assert start < end, f"start={start}, end={end}" + logger.info( + f"rank {rank} of {nshard}, process {end-start} " + f"({start}-{end}) out of {tot}" + ) + return start, end + + +def get_path_iterator(tsv, nshard, rank): + with open(tsv, "r") as f: + root = f.readline().rstrip() + lines = [line.rstrip() for line in f] + start, end = get_shard_range(len(lines), nshard, rank) + lines = lines[start:end] + def iterate(): + for line in lines: + subpath, nsample = line.split("\t") + yield f"{root}/{subpath}", int(nsample) + return iterate, len(lines) + + +def dump_feature(reader, generator, num, split, nshard, rank, feat_dir): + iterator = generator() + + feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy" + leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len" + + os.makedirs(feat_dir, exist_ok=True) + if os.path.exists(feat_path): + os.remove(feat_path) + + feat_f = NpyAppendArray(feat_path) + with open(leng_path, "w") as leng_f: + for path, nsample in tqdm.tqdm(iterator, total=num): + feat = reader.get_feats(path, nsample) + feat_f.append(feat.cpu().numpy()) + leng_f.write(f"{len(feat)}\n") + logger.info("finished successfully") + + diff --git a/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py b/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py new file mode 100644 index 0000000..113ac65 --- /dev/null +++ b/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py @@ -0,0 +1,146 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys + +import numpy as np +from sklearn.cluster import MiniBatchKMeans + +import joblib + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("learn_kmeans") + + +def get_km_model( + n_clusters, + init, + max_iter, + batch_size, + tol, + max_no_improvement, + n_init, + reassignment_ratio, +): + return MiniBatchKMeans( + n_clusters=n_clusters, + init=init, + max_iter=max_iter, + batch_size=batch_size, + verbose=1, + compute_labels=False, + tol=tol, + max_no_improvement=max_no_improvement, + init_size=None, + n_init=n_init, + reassignment_ratio=reassignment_ratio, + ) + + +def load_feature_shard(feat_dir, split, nshard, rank, percent): + feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy" + leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len" + with open(leng_path, "r") as f: + lengs = [int(line.rstrip()) for line in f] + offsets = [0] + np.cumsum(lengs[:-1]).tolist() + + if percent < 0: + return np.load(feat_path, mmap_mode="r") + else: + nsample = int(np.ceil(len(lengs) * percent)) + indices = np.random.choice(len(lengs), nsample, replace=False) + feat = np.load(feat_path, mmap_mode="r") + sampled_feat = np.concatenate( + [feat[offsets[i]: offsets[i] + lengs[i]] for i in indices], axis=0 + ) + logger.info( + ( + f"sampled {nsample} utterances, {len(sampled_feat)} frames " + f"from shard {rank}/{nshard}" + ) + ) + return sampled_feat + + +def load_feature(feat_dir, split, nshard, seed, percent): + assert percent <= 1.0 + feat = np.concatenate( + [ + load_feature_shard(feat_dir, split, nshard, r, percent) + for r in range(nshard) + ], + axis=0, + ) + logging.info(f"loaded feature with dimension {feat.shape}") + return feat + + +def learn_kmeans( + feat_dir, + split, + nshard, + km_path, + n_clusters, + seed, + percent, + init, + max_iter, + batch_size, + tol, + n_init, + reassignment_ratio, + max_no_improvement, +): + np.random.seed(seed) + feat = load_feature(feat_dir, split, nshard, seed, percent) + km_model = get_km_model( + n_clusters, + init, + max_iter, + batch_size, + tol, + max_no_improvement, + n_init, + reassignment_ratio, + ) + km_model.fit(feat) + joblib.dump(km_model, km_path) + + inertia = -km_model.score(feat) / len(feat) + logger.info("total intertia: %.5f", inertia) + logger.info("finished successfully") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("feat_dir", type=str) + parser.add_argument("split", type=str) + parser.add_argument("nshard", type=int) + parser.add_argument("km_path", type=str) + parser.add_argument("n_clusters", type=int) + parser.add_argument("--seed", default=0, type=int) + parser.add_argument( + "--percent", default=-1, type=float, help="sample a subset; -1 for all" + ) + parser.add_argument("--init", default="k-means++") + parser.add_argument("--max_iter", default=100, type=int) + parser.add_argument("--batch_size", default=10000, type=int) + parser.add_argument("--tol", default=0.0, type=float) + parser.add_argument("--max_no_improvement", default=100, type=int) + parser.add_argument("--n_init", default=20, type=int) + parser.add_argument("--reassignment_ratio", default=0.0, type=float) + args = parser.parse_args() + logging.info(str(args)) + + learn_kmeans(**vars(args)) diff --git a/fairseq/examples/hubert/tests/6313-76958-0021.flac b/fairseq/examples/hubert/tests/6313-76958-0021.flac new file mode 100644 index 0000000..e644b19 Binary files /dev/null and b/fairseq/examples/hubert/tests/6313-76958-0021.flac differ diff --git a/fairseq/examples/hubert/tests/sample.base.L9.km500.km b/fairseq/examples/hubert/tests/sample.base.L9.km500.km new file mode 100644 index 0000000..656eef9 --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.base.L9.km500.km @@ -0,0 +1 @@ +17 17 17 17 296 296 20 20 20 461 461 20 184 20 20 20 184 289 144 445 445 213 213 213 213 252 215 129 401 20 354 180 494 44 416 416 416 192 192 180 180 84 84 84 16 88 88 88 88 319 242 240 348 35 35 117 404 197 226 209 83 55 55 55 322 67 94 199 118 118 118 118 118 118 402 219 219 219 222 222 222 353 59 245 245 251 251 241 241 431 367 367 178 35 35 35 458 192 351 41 324 324 324 252 464 464 139 139 424 424 424 497 497 497 122 90 42 42 147 380 380 499 319 319 319 348 348 33 33 394 90 76 465 74 425 425 386 386 431 319 319 319 319 319 240 203 53 473 34 340 340 340 340 116 64 212 384 377 123 123 123 216 216 216 114 114 57 57 57 203 381 381 117 48 13 47 80 20 80 80 320 7 7 364 345 141 141 141 141 281 281 9 86 221 198 198 22 283 455 236 239 239 107 107 395 286 286 286 468 468 406 406 467 176 176 176 328 200 200 248 464 145 365 365 365 365 330 385 457 77 77 77 54 224 300 334 334 382 304 304 271 186 31 342 342 342 198 22 283 5 38 162 232 232 482 68 26 26 359 359 81 444 213 213 252 143 458 41 324 324 324 422 143 445 445 445 351 180 486 315 315 450 450 450 203 53 473 291 89 116 379 243 478 478 66 482 482 105 105 336 336 354 29 498 498 498 498 396 396 313 37 314 198 22 222 222 222 222 245 129 74 74 437 437 496 496 496 413 94 199 41 41 324 324 318 318 269 342 9 168 106 106 284 426 426 426 426 348 64 76 401 259 108 123 153 153 153 153 372 372 396 313 24 314 90 401 259 445 445 351 351 365 365 365 365 282 282 215 233 233 229 427 20 247 126 126 126 326 326 326 326 326 326 326 101 101 101 149 228 228 20 289 20 7 217 70 65 189 189 151 240 285 300 300 495 406 467 176 135 135 339 248 466 114 222 222 222 313 313 239 384 371 490 490 38 31 54 54 224 494 494 236 129 259 74 190 487 288 288 288 288 374 173 173 280 280 302 302 175 175 69 69 223 130 129 401 75 108 119 295 295 295 295 143 192 192 135 135 135 135 200 200 464 255 255 255 251 251 241 431 235 235 235 348 348 465 192 44 44 236 8 8 354 319 319 383 348 36 310 107 107 395 462 462 8 32 32 32 354 153 153 153 153 153 387 387 387 387 85 207 318 318 318 49 453 9 168 125 125 125 125 125 466 199 44 44 143 129 144 445 351 351 351 486 486 460 285 285 302 302 497 497 122 239 161 161 79 79 499 499 499 265 265 265 85 85 85 299 299 173 352 352 427 229 170 247 15 15 15 15 15 15 193 193 193 17 diff --git a/fairseq/examples/hubert/tests/sample.base.L9.len b/fairseq/examples/hubert/tests/sample.base.L9.len new file mode 100644 index 0000000..7d3028f --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.base.L9.len @@ -0,0 +1 @@ +596 diff --git a/fairseq/examples/hubert/tests/sample.base.L9.npy b/fairseq/examples/hubert/tests/sample.base.L9.npy new file mode 100644 index 0000000..574bef9 Binary files /dev/null and b/fairseq/examples/hubert/tests/sample.base.L9.npy differ diff --git a/fairseq/examples/hubert/tests/sample.large.L20.len b/fairseq/examples/hubert/tests/sample.large.L20.len new file mode 100644 index 0000000..7d3028f --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.large.L20.len @@ -0,0 +1 @@ +596 diff --git a/fairseq/examples/hubert/tests/sample.large.L20.npy b/fairseq/examples/hubert/tests/sample.large.L20.npy new file mode 100644 index 0000000..c58d221 Binary files /dev/null and b/fairseq/examples/hubert/tests/sample.large.L20.npy differ diff --git a/fairseq/examples/hubert/tests/sample.large.hypo.word b/fairseq/examples/hubert/tests/sample.large.hypo.word new file mode 100644 index 0000000..d77a4cf --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.large.hypo.word @@ -0,0 +1 @@ +KEEP A GOING AN IF YOU'RE LUCKY YOU'LL RUN PLUMB INTO THEM WAS THE JEERING ANSWER AS THE SLEEPY COWMEN SPURRED THEIR PONIES ON TOWARD CAMP MUTTERING THEIR DISAPPROVAL OF TAKING ALONG A BUNCH OF BOYS ON A CATTLE DRIVE (None-0) diff --git a/fairseq/examples/hubert/tests/sample.xlarge.L30.len b/fairseq/examples/hubert/tests/sample.xlarge.L30.len new file mode 100644 index 0000000..7d3028f --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.xlarge.L30.len @@ -0,0 +1 @@ +596 diff --git a/fairseq/examples/hubert/tests/sample.xlarge.L30.npy b/fairseq/examples/hubert/tests/sample.xlarge.L30.npy new file mode 100644 index 0000000..29d8c0d Binary files /dev/null and b/fairseq/examples/hubert/tests/sample.xlarge.L30.npy differ diff --git a/fairseq/examples/hubert/tests/sample.xlarge.hypo.word b/fairseq/examples/hubert/tests/sample.xlarge.hypo.word new file mode 100644 index 0000000..53e402d --- /dev/null +++ b/fairseq/examples/hubert/tests/sample.xlarge.hypo.word @@ -0,0 +1 @@ +KEEP A GOIN AND IF YOU'RE LUCKY YOU'LL RUN PLUMB INTO THEM WAS THE JEERING ANSWER AS THE SLEEPY COWMEN SPURRED THEIR PONIES ON TOWARD CAMP MUTTERING THEIR DISAPPROVAL OF TAKING ALONG A BUNCH OF BOYS ON A CATTLE DRIVE (None-0) diff --git a/fairseq/examples/hubert/tests/test_feature_and_unit.sh b/fairseq/examples/hubert/tests/test_feature_and_unit.sh new file mode 100644 index 0000000..8cddb27 --- /dev/null +++ b/fairseq/examples/hubert/tests/test_feature_and_unit.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +set -e + +sizes="base large xlarge" + +declare -A ckpt_urls +ckpt_urls[base]="https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt" +ckpt_urls[large]="https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt" +ckpt_urls[xlarge]="https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k.pt" + +declare -A km_layers +km_layers[base]=9 +km_layers[large]=20 +km_layers[xlarge]=30 + +declare -A km_urls +km_urls[base]="https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960_L9_km500.bin" + +declare -A km_nunits +km_nunits[base]=500 + +test_dir=./examples/hubert/tests +split=sample + +echo -e "${test_dir}\n6313-76958-0021.flac\t190800" > "${test_dir}/${split}.tsv" + +check_feature () { + echo "checking features..." + + size=$1 + ckpt_url=$2 + km_layer=$3 + ckpt_path="$test_dir/$(basename "$ckpt_url")" + + if [ ! -f "$ckpt_path" ]; then + echo "downloading $ckpt_url to $ckpt_path" + wget "$ckpt_url" -O "$ckpt_path" + fi + + python ./examples/hubert/simple_kmeans/dump_hubert_feature.py \ + "${test_dir}" "${split}" "${ckpt_path}" "${km_layer}" 1 0 "${test_dir}" + + if diff -q "${test_dir}/${split}.${size}.L${km_layer}.npy" "${test_dir}/${split}_0_1.npy" &>/dev/null; then + echo "...passed npy check" + else + echo "...failed npy check" + fi + + if diff -q "${test_dir}/${split}.${size}.L${km_layer}.len" "${test_dir}/${split}_0_1.len" &>/dev/null; then + echo "...passed len check" + else + echo "...failed len check" + fi +} + + +check_unit () { + echo "checking units..." + + size=$1 + km_url=$2 + km_layer=$3 + km_nunit=$4 + km_path="$test_dir/$(basename "$km_url")" + + if [ ! -f "$km_path" ]; then + echo "downloading $km_url to $km_path" + wget "$km_url" -O "$km_path" + fi + + python ./examples/hubert/simple_kmeans/dump_km_label.py \ + "${test_dir}" "${split}" "${km_path}" 1 0 "${test_dir}" + + if diff -q "${test_dir}/${split}.${size}.L${km_layer}.km${km_nunit}.km" "${test_dir}/${split}_0_1.km" &>/dev/null; then + echo "...passed unit check" + else + echo "...failed unit check" + fi +} + + +for size in $sizes; do + echo "=== Running unit test for HuBERT $size ===" + check_feature "$size" "${ckpt_urls[$size]}" "${km_layers[$size]}" + + if [ -n "${km_urls[$size]}" ]; then + check_unit "$size" "${km_urls[$size]}" "${km_layers[$size]}" "${km_nunits[$size]}" + fi + + rm -f $test_dir/${split}_0_1.* +done diff --git a/fairseq/examples/hubert/tests/test_finetuned_asr.sh b/fairseq/examples/hubert/tests/test_finetuned_asr.sh new file mode 100644 index 0000000..3c0538b --- /dev/null +++ b/fairseq/examples/hubert/tests/test_finetuned_asr.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + +sizes="large xlarge" + +declare -A ckpt_urls +ckpt_urls[large]="https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k_finetune_ls960.pt" +ckpt_urls[xlarge]="https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k_finetune_ls960.pt" + +test_dir=$(pwd)/examples/hubert/tests +split=sample + +echo -e "${test_dir}\n6313-76958-0021.flac\t190800" > "${test_dir}/${split}.tsv" +echo -e "K E E P | A | G O I N G | A N D | I F | Y O U ' R E | L U C K Y | Y O U ' L L | R U N | P L U M B | I N T O | T H E M | W A S | T H E | J E E R I N G | A N S W E R | A S | T H E | S L E E P Y | C O W M E N | S P U R R E D | T H E I R | P O N I E S | O N | T O W A R D | C A M P | M U T T E R I N G | T H E I R | D I S A P P R O V A L | O F | T A K I N G | A L O N G | A | B U N C H | O F | B O Y S | O N | A | C A T T L E | D R I V E |" > "${test_dir}/${split}.ltr" + +check_asr () { + echo "checking asr outputs..." + + size=$1 + ckpt_url=$2 + ckpt_path="$test_dir/$(basename "$ckpt_url")" + + if [ ! -f "$ckpt_path" ]; then + echo "downloading $ckpt_url to $ckpt_path" + wget "$ckpt_url" -O "$ckpt_path" + fi + + python examples/speech_recognition/new/infer.py \ + --config-dir examples/hubert/config/decode --config-name infer_viterbi \ + common_eval.path="${ckpt_path}" task.data="${test_dir}" task.normalize=true \ + decoding.results_path="${test_dir}/pred" \ + common_eval.results_path="${test_dir}/pred" \ + common_eval.quiet=false dataset.gen_subset="${split}" + + if diff -q "${test_dir}/pred/hypo.word" "${test_dir}/${split}.${size}.hypo.word" &>/dev/null; then + echo "...passed word check" + else + echo "...failed word check" + fi + rm -rf "${test_dir}/pred" +} + +for size in $sizes; do + check_asr "$size" "${ckpt_urls[$size]}" +done diff --git a/fairseq/examples/hubert/update_ckpt.py b/fairseq/examples/hubert/update_ckpt.py new file mode 100644 index 0000000..53c9e74 --- /dev/null +++ b/fairseq/examples/hubert/update_ckpt.py @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +src_ckpt = "/checkpoint/wnhsu/w2v/archived/hubert_base_ls960_it2.pt" +ref_ckpt = "/checkpoint/wnhsu/w2v/hubert_icassp_oss_v3/iter2_km100-400k-grp-L6/oss.km500_p0_1_s334.pmw1_0.puw0_0.grpnorm.ml10.mp0_8.untie.mxsz250000.ufreq1.maxtok1400000.MU100k.s1337.ngpu32/checkpoint_last.pt" +new_ckpt = "/checkpoint/wnhsu/w2v/archived/hubert_base_ls960_it2_updated.pt" + + +def update_state(state): + state["model"]["label_embs_concat"] = state["model"].pop("label_embs") + state["args"].task = "hubert_pretraining" + state["args"].labels = f"['{state['args'].labels}']" + return state + + +src_state = torch.load(src_ckpt) +src_state = update_state(src_state) +torch.save(src_state, new_ckpt) diff --git a/fairseq/examples/joint_alignment_translation/README.md b/fairseq/examples/joint_alignment_translation/README.md new file mode 100644 index 0000000..cd9c0ea --- /dev/null +++ b/fairseq/examples/joint_alignment_translation/README.md @@ -0,0 +1,89 @@ +# Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019) + +This page includes instructions for training models described in [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](https://arxiv.org/abs/1909.02074). + +## Training a joint alignment-translation model on WMT'18 En-De + +##### 1. Extract and preprocess the WMT'18 En-De data +```bash +./prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh +``` + +##### 2. Generate alignments from statistical alignment toolkits e.g. Giza++/FastAlign. +In this example, we use FastAlign. +```bash +git clone git@github.com:clab/fast_align.git +pushd fast_align +mkdir build +cd build +cmake .. +make +popd +ALIGN=fast_align/build/fast_align +paste bpe.32k/train.en bpe.32k/train.de | awk -F '\t' '{print $1 " ||| " $2}' > bpe.32k/train.en-de +$ALIGN -i bpe.32k/train.en-de -d -o -v > bpe.32k/train.align +``` + +##### 3. Preprocess the dataset with the above generated alignments. +```bash +fairseq-preprocess \ + --source-lang en --target-lang de \ + --trainpref bpe.32k/train \ + --validpref bpe.32k/valid \ + --testpref bpe.32k/test \ + --align-suffix align \ + --destdir binarized/ \ + --joined-dictionary \ + --workers 32 +``` + +##### 4. Train a model +```bash +fairseq-train \ + binarized \ + --arch transformer_wmt_en_de_big_align --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --activation-fn relu\ + --lr 0.0002 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 3500 --label-smoothing 0.1 \ + --save-dir ./checkpoints --log-interval 1000 --max-update 60000 \ + --keep-interval-updates -1 --save-interval-updates 0 \ + --load-alignments --criterion label_smoothed_cross_entropy_with_alignment \ + --fp16 +``` + +Note that the `--fp16` flag requires you have CUDA 9.1 or greater and a Volta GPU or newer. + +If you want to train the above model with big batches (assuming your machine has 8 GPUs): +- add `--update-freq 8` to simulate training on 8x8=64 GPUs +- increase the learning rate; 0.0007 works well for big batches + +##### 5. Evaluate and generate the alignments (BPE level) +```bash +fairseq-generate \ + binarized --gen-subset test --print-alignment \ + --source-lang en --target-lang de \ + --path checkpoints/checkpoint_best.pt --beam 5 --nbest 1 +``` + +##### 6. Other resources. +The code for: +1. preparing alignment test sets +2. converting BPE level alignments to token level alignments +3. symmetrizing bidirectional alignments +4. evaluating alignments using AER metric +can be found [here](https://github.com/lilt/alignment-scripts) + +## Citation + +```bibtex +@inproceedings{garg2019jointly, + title = {Jointly Learning to Align and Translate with Transformer Models}, + author = {Garg, Sarthak and Peitz, Stephan and Nallasamy, Udhyakumar and Paulik, Matthias}, + booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP)}, + address = {Hong Kong}, + month = {November}, + url = {https://arxiv.org/abs/1909.02074}, + year = {2019}, +} +``` diff --git a/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh b/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh new file mode 100644 index 0000000..e3efeb2 --- /dev/null +++ b/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +echo 'Cloning Moses github repository (for tokenization scripts)...' +git clone https://github.com/moses-smt/mosesdecoder.git + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +CLEAN=$SCRIPTS/training/clean-corpus-n.perl +REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl + +URLS=( + "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz" + "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz" + "http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz" + "http://data.statmt.org/wmt18/translation-task/rapid2016.tgz" + "http://data.statmt.org/wmt17/translation-task/dev.tgz" + "http://statmt.org/wmt14/test-full.tgz" +) +CORPORA=( + "training/europarl-v7.de-en" + "commoncrawl.de-en" + "training-parallel-nc-v13/news-commentary-v13.de-en" + "rapid2016.de-en" +) + +if [ ! -d "$SCRIPTS" ]; then + echo "Please set SCRIPTS variable correctly to point to Moses scripts." + exit +fi + +src=en +tgt=de +lang=en-de +prep=wmt18_en_de +tmp=$prep/tmp +orig=orig +dev=dev/newstest2012 +codes=32000 +bpe=bpe.32k + +mkdir -p $orig $tmp $prep $bpe + +cd $orig + +for ((i=0;i<${#URLS[@]};++i)); do + url=${URLS[i]} + file=$(basename $url) + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + wget "$url" + if [ -f $file ]; then + echo "$url successfully downloaded." + else + echo "$url not successfully downloaded." + exit 1 + fi + if [ ${file: -4} == ".tgz" ]; then + tar zxvf $file + elif [ ${file: -4} == ".tar" ]; then + tar xvf $file + fi + fi +done +cd .. + +echo "pre-processing train data..." +for l in $src $tgt; do + rm -rf $tmp/train.tags.$lang.tok.$l + for f in "${CORPORA[@]}"; do + cat $orig/$f.$l | \ + perl $REM_NON_PRINT_CHAR | \ + perl $TOKENIZER -threads 8 -l $l -no-escape >> $tmp/train.tags.$lang.tok.$l + done +done + +echo "pre-processing test data..." +for l in $src $tgt; do + if [ "$l" == "$src" ]; then + t="src" + else + t="ref" + fi + grep '\s*//g' | \ + sed -e 's/\s*<\/seg>\s*//g' | \ + sed -e "s/\’/\'/g" | \ + perl $TOKENIZER -threads 8 -l $l -no-escape > $tmp/test.$l + echo "" +done + +# apply length filtering before BPE +perl $CLEAN -ratio 1.5 $tmp/train.tags.$lang.tok $src $tgt $tmp/train 1 100 + +# use newstest2012 for valid +echo "pre-processing valid data..." +for l in $src $tgt; do + rm -rf $tmp/valid.$l + cat $orig/$dev.$l | \ + perl $REM_NON_PRINT_CHAR | \ + perl $TOKENIZER -threads 8 -l $l -no-escape >> $tmp/valid.$l +done + +mkdir output +mv $tmp/{train,valid,test}.{$src,$tgt} output + +#BPE +git clone https://github.com/glample/fastBPE.git +pushd fastBPE +g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast +popd +fastBPE/fast learnbpe $codes output/train.$src output/train.$tgt > $bpe/codes +for split in {train,valid,test}; do for lang in {en,de}; do fastBPE/fast applybpe $bpe/$split.$lang output/$split.$lang $bpe/codes; done; done diff --git a/fairseq/examples/language_model/README.adaptive_inputs.md b/fairseq/examples/language_model/README.adaptive_inputs.md new file mode 100644 index 0000000..6650d58 --- /dev/null +++ b/fairseq/examples/language_model/README.adaptive_inputs.md @@ -0,0 +1,39 @@ +# Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018) + +## Pre-trained models + +Description | Parameters | Dataset | Model and Test set(s) +---|---:|---|--- +Adaptive Inputs
([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 1026M | [Google Billion Words](https://github.com/ciprian-chelba/1-billion-word-language-modeling-benchmark) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2) +Adaptive Inputs
([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 247M | [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2) + +## Training an LM with adaptive inputs + +First, see the general [language modeling README](README.md) for instructions on +preprocessing the WikiText-103 data. + +Then use the following training command to train a model with adaptive inputs +using the `transformer_lm_wiki103` model architecture: +```bash +fairseq-train --task language_modeling \ + data-bin/wikitext-103 \ + --save-dir checkpoints/transformer_wikitext-103 \ + --arch transformer_lm_wiki103 \ + --max-update 286000 --lr 1.0 --t-mult 2 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 \ + --warmup-updates 16000 --warmup-init-lr 1e-07 --stop-min-lr 1e-09 --optimizer nag --min-lr 0.0001 --clip-norm 0.1 \ + --criterion adaptive_loss --max-tokens 3072 --update-freq 3 --tokens-per-sample 3072 --seed 1 \ + --sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=legacy_ddp +``` + +## Citation + +```bibtex +@inproceedings{ + baevski2018adaptive, + title={Adaptive Input Representations for Neural Language Modeling}, + author={Alexei Baevski and Michael Auli}, + booktitle={International Conference on Learning Representations}, + year={2019}, + url={https://openreview.net/forum?id=ByxZX20qFQ}, +} +``` diff --git a/fairseq/examples/language_model/README.conv.md b/fairseq/examples/language_model/README.conv.md new file mode 100644 index 0000000..1ff8635 --- /dev/null +++ b/fairseq/examples/language_model/README.conv.md @@ -0,0 +1,40 @@ +# Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017) + +## Example usage + +First download and preprocess the data following the main [language modeling README](README.md). + +Then to train a convolutional LM using the `fconv_lm_dauphin_wikitext103` +architecture: +```bash +fairseq-train --task language_modeling \ + data-bin/wikitext-103 \ + --save-dir checkpoints/fconv_wikitext-103 \ + --arch fconv_lm_dauphin_wikitext103 \ + --adaptive-softmax-cutoff 10000,20000,200000 \ + --dropout 0.2 \ + --criterion adaptive_loss \ + --optimizer nag --clip-norm 0.1 --weight-decay 5e-06 \ + --lr 1.0 --lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \ + --max-tokens 1024 --tokens-per-sample 1024 \ + --ddp-backend legacy_ddp \ + --max-epoch 35 +``` + +And evaluate with: +```bash +fairseq-eval-lm data-bin/wikitext-103 --path checkpoints/fconv_wiki103/checkpoint_best.pt +``` + +## Citation + +```bibtex +@inproceedings{dauphin2017language, + title={Language Modeling with Gated Convolutional Networks}, + author={Dauphin, Yann N and Fan, Angela and Auli, Michael and Grangier, David}, + booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70}, + pages={933--941}, + year={2017}, + organization={JMLR} +} +``` diff --git a/fairseq/examples/language_model/README.md b/fairseq/examples/language_model/README.md new file mode 100644 index 0000000..e78ea48 --- /dev/null +++ b/fairseq/examples/language_model/README.md @@ -0,0 +1,123 @@ +# Neural Language Modeling + +## Pre-trained models + +Model | Description | Dataset | Download +---|---|---|--- +`transformer_lm.gbw.adaptive_huge` | Adaptive Inputs
([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853))
1026M params | [Google Billion Words](https://github.com/ciprian-chelba/1-billion-word-language-modeling-benchmark) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2) +`transformer_lm.wiki103.adaptive` | Adaptive Inputs
([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853))
247M params | [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2) +`transformer_lm.wmt19.en` | English LM
([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.gz) +`transformer_lm.wmt19.de` | German LM
([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.gz) +`transformer_lm.wmt19.ru` | Russian LM
([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.gz) + +## Example usage + +We require a few additional Python dependencies for preprocessing: +```bash +pip install fastBPE sacremoses +``` + +To sample from a language model using PyTorch Hub: +```python +import torch + +# List available models +torch.hub.list('pytorch/fairseq') # [..., 'transformer_lm.wmt19.en', ...] + +# Load an English LM trained on WMT'19 News Crawl data +en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe') +en_lm.eval() # disable dropout + +# Move model to GPU +en_lm.cuda() + +# Sample from the language model +en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8) +# "Barack Obama is coming to Sydney and New Zealand (...)" + +# Compute perplexity for a sequence +en_lm.score('Barack Obama is coming to Sydney and New Zealand')['positional_scores'].mean().neg().exp() +# tensor(15.1474) + +# The same interface can be used with custom models as well +from fairseq.models.transformer_lm import TransformerLanguageModel +custom_lm = TransformerLanguageModel.from_pretrained('/path/to/model/dir', 'checkpoint100.pt', tokenizer='moses', bpe='fastbpe') +custom_lm.sample('Barack Obama', beam=5) +# "Barack Obama (...)" +``` + +## Training a transformer language model with the CLI tools + +### 1) Preprocess the data + +First download and prepare the [WikiText-103 dataset](https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/): +```bash +cd examples/language_model/ +bash prepare-wikitext-103.sh +cd ../.. +``` + +Next preprocess/binarize the data: +```bash +TEXT=examples/language_model/wikitext-103 +fairseq-preprocess \ + --only-source \ + --trainpref $TEXT/wiki.train.tokens \ + --validpref $TEXT/wiki.valid.tokens \ + --testpref $TEXT/wiki.test.tokens \ + --destdir data-bin/wikitext-103 \ + --workers 20 +``` + +### 2) Train a language model + +Next we'll train a basic transformer language model on wikitext-103. For more +advanced usage, see the [adaptive inputs README](README.adaptive_inputs.md). + +To train a basic LM (assumes 2 GPUs): +``` +$ fairseq-train --task language_modeling \ + data-bin/wikitext-103 \ + --save-dir checkpoints/transformer_wikitext-103 \ + --arch transformer_lm --share-decoder-input-output-embed \ + --dropout 0.1 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --weight-decay 0.01 --clip-norm 0.0 \ + --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \ + --tokens-per-sample 512 --sample-break-mode none \ + --max-tokens 2048 --update-freq 16 \ + --fp16 \ + --max-update 50000 +``` + +If you run out of memory, try reducing `--max-tokens` (max number of tokens per +batch) or `--tokens-per-sample` (max sequence length). You can also adjust +`--update-freq` to accumulate gradients and simulate training on a different +number of GPUs. + +### 3) Evaluate + +```bash +fairseq-eval-lm data-bin/wikitext-103 \ + --path checkpoints/transformer_wiki103/checkpoint_best.pt \ + --batch-size 2 \ + --tokens-per-sample 512 \ + --context-window 400 +# | Evaluated 245569 tokens in 56.1s (4379.02 tokens/s) +# | Loss: 3.4164, Perplexity: 30.46 +``` + +*Note:* The `--context-window` option controls how much context is provided to +each token when computing perplexity. When the window size is 0, the dataset is +chunked into segments of length 512 and perplexity is computed over each segment +normally. However, this results in worse (higher) perplexity since tokens that +appear earlier in each segment have less conditioning. When the maximum window +size is used (511 in this case), then we compute perplexity for each token +fully conditioned on 511 tokens of context. This slows down evaluation +significantly, since we must run a separate forward pass for every token in the +dataset, but results in better (lower) perplexity. + + +## Convolutional language models + +Please see the [convolutional LM README](README.conv.md) for instructions on +training convolutional language models. diff --git a/fairseq/examples/language_model/prepare-wikitext-103.sh b/fairseq/examples/language_model/prepare-wikitext-103.sh new file mode 100644 index 0000000..7513021 --- /dev/null +++ b/fairseq/examples/language_model/prepare-wikitext-103.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh + +URLS=( + "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip" +) +FILES=( + "wikitext-103-v1.zip" +) + +for ((i=0;i<${#URLS[@]};++i)); do + file=${FILES[i]} + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + url=${URLS[i]} + wget "$url" + if [ -f $file ]; then + echo "$url successfully downloaded." + else + echo "$url not successfully downloaded." + exit -1 + fi + if [ ${file: -4} == ".tgz" ]; then + tar zxvf $file + elif [ ${file: -4} == ".tar" ]; then + tar xvf $file + elif [ ${file: -4} == ".zip" ]; then + unzip $file + fi + fi +done +cd .. diff --git a/fairseq/examples/laser/README.md b/fairseq/examples/laser/README.md new file mode 100644 index 0000000..66acada --- /dev/null +++ b/fairseq/examples/laser/README.md @@ -0,0 +1,144 @@ +# LASER Language-Agnostic SEntence Representations + +LASER is a library to calculate and use multilingual sentence embeddings. + +You can find more information about LASER and how to use it on the official [LASER repository](https://github.com/facebookresearch/LASER). + +This folder contains source code for training LASER embeddings. + + +## Prepare data and configuration file + +Binarize your data with fairseq, as described [here](https://fairseq.readthedocs.io/en/latest/getting_started.html#data-pre-processing). + +Create a json config file with this format: +``` +{ + "src_vocab": "/path/to/spm.src.cvocab", + "tgt_vocab": "/path/to/spm.tgt.cvocab", + "train": [ + { + "type": "translation", + "id": 0, + "src": "/path/to/srclang1-tgtlang0/train.srclang1", + "tgt": "/path/to/srclang1-tgtlang0/train.tgtlang0" + }, + { + "type": "translation", + "id": 1, + "src": "/path/to/srclang1-tgtlang1/train.srclang1", + "tgt": "/path/to/srclang1-tgtlang1/train.tgtlang1" + }, + { + "type": "translation", + "id": 0, + "src": "/path/to/srclang2-tgtlang0/train.srclang2", + "tgt": "/path/to/srclang2-tgtlang0/train.tgtlang0" + }, + { + "type": "translation", + "id": 1, + "src": "/path/to/srclang2-tgtlang1/train.srclang2", + "tgt": "/path/to/srclang2-tgtlang1/train.tgtlang1" + }, + ... + ], + "valid": [ + { + "type": "translation", + "id": 0, + "src": "/unused", + "tgt": "/unused" + } + ] +} +``` +where paths are paths to binarized indexed fairseq dataset files. +`id` represents the target language id. + + +## Training Command Line Example + +``` +fairseq-train \ + /path/to/configfile_described_above.json \ + --user-dir examples/laser/laser_src \ + --log-interval 100 --log-format simple \ + --task laser --arch laser_lstm \ + --save-dir . \ + --optimizer adam \ + --lr 0.001 \ + --lr-scheduler inverse_sqrt \ + --clip-norm 5 \ + --warmup-updates 90000 \ + --update-freq 2 \ + --dropout 0.0 \ + --encoder-dropout-out 0.1 \ + --max-tokens 2000 \ + --max-epoch 50 \ + --encoder-bidirectional \ + --encoder-layers 5 \ + --encoder-hidden-size 512 \ + --decoder-layers 1 \ + --decoder-hidden-size 2048 \ + --encoder-embed-dim 320 \ + --decoder-embed-dim 320 \ + --decoder-lang-embed-dim 32 \ + --warmup-init-lr 0.001 \ + --disable-validation +``` + + +## Applications + +We showcase several applications of multilingual sentence embeddings +with code to reproduce our results (in the directory "tasks"). + +* [**Cross-lingual document classification**](https://github.com/facebookresearch/LASER/tree/master/tasks/mldoc) using the + [*MLDoc*](https://github.com/facebookresearch/MLDoc) corpus [2,6] +* [**WikiMatrix**](https://github.com/facebookresearch/LASER/tree/master/tasks/WikiMatrix) + Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia [7] +* [**Bitext mining**](https://github.com/facebookresearch/LASER/tree/master/tasks/bucc) using the + [*BUCC*](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) corpus [3,5] +* [**Cross-lingual NLI**](https://github.com/facebookresearch/LASER/tree/master/tasks/xnli) + using the [*XNLI*](https://www.nyu.edu/projects/bowman/xnli/) corpus [4,5,6] +* [**Multilingual similarity search**](https://github.com/facebookresearch/LASER/tree/master/tasks/similarity) [1,6] +* [**Sentence embedding of text files**](https://github.com/facebookresearch/LASER/tree/master/tasks/embed) + example how to calculate sentence embeddings for arbitrary text files in any of the supported language. + +**For all tasks, we use exactly the same multilingual encoder, without any task specific optimization or fine-tuning.** + + + +## References + +[1] Holger Schwenk and Matthijs Douze, + [*Learning Joint Multilingual Sentence Representations with Neural Machine Translation*](https://aclanthology.info/papers/W17-2619/w17-2619), + ACL workshop on Representation Learning for NLP, 2017 + +[2] Holger Schwenk and Xian Li, + [*A Corpus for Multilingual Document Classification in Eight Languages*](http://www.lrec-conf.org/proceedings/lrec2018/pdf/658.pdf), + LREC, pages 3548-3551, 2018. + +[3] Holger Schwenk, + [*Filtering and Mining Parallel Data in a Joint Multilingual Space*](http://aclweb.org/anthology/P18-2037) + ACL, July 2018 + +[4] Alexis Conneau, Guillaume Lample, Ruty Rinott, Adina Williams, Samuel R. Bowman, Holger Schwenk and Veselin Stoyanov, + [*XNLI: Cross-lingual Sentence Understanding through Inference*](https://aclweb.org/anthology/D18-1269), + EMNLP, 2018. + +[5] Mikel Artetxe and Holger Schwenk, + [*Margin-based Parallel Corpus Mining with Multilingual Sentence Embeddings*](https://arxiv.org/abs/1811.01136) + arXiv, Nov 3 2018. + +[6] Mikel Artetxe and Holger Schwenk, + [*Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond*](https://arxiv.org/abs/1812.10464) + arXiv, Dec 26 2018. + +[7] Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman, + [*WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia*](https://arxiv.org/abs/1907.05791) + arXiv, July 11 2019. + +[8] Holger Schwenk, Guillaume Wenzek, Sergey Edunov, Edouard Grave and Armand Joulin + [*CCMatrix: Mining Billions of High-Quality Parallel Sentences on the WEB*](https://arxiv.org/abs/1911.04944) diff --git a/fairseq/examples/laser/laser_src/__init__.py b/fairseq/examples/laser/laser_src/__init__.py new file mode 100644 index 0000000..9ffbd65 --- /dev/null +++ b/fairseq/examples/laser/laser_src/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .laser_task import * # noqa +from .laser_lstm import * # noqa +from .laser_transformer import * # noqa diff --git a/fairseq/examples/laser/laser_src/laser_lstm.py b/fairseq/examples/laser/laser_src/laser_lstm.py new file mode 100644 index 0000000..10df90e --- /dev/null +++ b/fairseq/examples/laser/laser_src/laser_lstm.py @@ -0,0 +1,585 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import options, utils + +from fairseq.models import ( + FairseqEncoder, + FairseqIncrementalDecoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) + + +@register_model("laser_lstm") +class LSTMModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens=None, + tgt_tokens=None, + tgt_lengths=None, + target_language_id=None, + dataset_name="", + ): + assert target_language_id is not None + + src_encoder_out = self.encoder(src_tokens, src_lengths, dataset_name) + return self.decoder( + prev_output_tokens, src_encoder_out, lang_id=target_language_id + ) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--dropout", + default=0.1, + type=float, + metavar="D", + help="dropout probability", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-embed-path", + default=None, + type=str, + metavar="STR", + help="path to pre-trained encoder embedding", + ) + parser.add_argument( + "--encoder-hidden-size", type=int, metavar="N", help="encoder hidden size" + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="number of encoder layers" + ) + parser.add_argument( + "--encoder-bidirectional", + action="store_true", + help="make all layers of encoder bidirectional", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-embed-path", + default=None, + type=str, + metavar="STR", + help="path to pre-trained decoder embedding", + ) + parser.add_argument( + "--decoder-hidden-size", type=int, metavar="N", help="decoder hidden size" + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="number of decoder layers" + ) + parser.add_argument( + "--decoder-out-embed-dim", + type=int, + metavar="N", + help="decoder output embedding dimension", + ) + parser.add_argument( + "--decoder-zero-init", + type=str, + metavar="BOOL", + help="initialize the decoder hidden/cell state to zero", + ) + parser.add_argument( + "--decoder-lang-embed-dim", + type=int, + metavar="N", + help="decoder language embedding dimension", + ) + parser.add_argument( + "--fixed-embeddings", + action="store_true", + help="keep embeddings fixed (ENCODER ONLY)", + ) # TODO Also apply to decoder embeddings? + + # Granular dropout settings (if not specified these default to --dropout) + parser.add_argument( + "--encoder-dropout-in", + type=float, + metavar="D", + help="dropout probability for encoder input embedding", + ) + parser.add_argument( + "--encoder-dropout-out", + type=float, + metavar="D", + help="dropout probability for encoder output", + ) + parser.add_argument( + "--decoder-dropout-in", + type=float, + metavar="D", + help="dropout probability for decoder input embedding", + ) + parser.add_argument( + "--decoder-dropout-out", + type=float, + metavar="D", + help="dropout probability for decoder output", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted (in case there are any new ones) + base_architecture(args) + + def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + embed_dict = utils.parse_embedding(embed_path) + utils.print_embed_overlap(embed_dict, dictionary) + return utils.load_embedding(embed_dict, dictionary, embed_tokens) + + pretrained_encoder_embed = None + if args.encoder_embed_path: + pretrained_encoder_embed = load_pretrained_embedding_from_file( + args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim + ) + pretrained_decoder_embed = None + if args.decoder_embed_path: + pretrained_decoder_embed = load_pretrained_embedding_from_file( + args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim + ) + + num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0 + + encoder = LSTMEncoder( + dictionary=task.source_dictionary, + embed_dim=args.encoder_embed_dim, + hidden_size=args.encoder_hidden_size, + num_layers=args.encoder_layers, + dropout_in=args.encoder_dropout_in, + dropout_out=args.encoder_dropout_out, + bidirectional=args.encoder_bidirectional, + pretrained_embed=pretrained_encoder_embed, + fixed_embeddings=args.fixed_embeddings, + ) + decoder = LSTMDecoder( + dictionary=task.target_dictionary, + embed_dim=args.decoder_embed_dim, + hidden_size=args.decoder_hidden_size, + out_embed_dim=args.decoder_out_embed_dim, + num_layers=args.decoder_layers, + dropout_in=args.decoder_dropout_in, + dropout_out=args.decoder_dropout_out, + zero_init=options.eval_bool(args.decoder_zero_init), + encoder_embed_dim=args.encoder_embed_dim, + encoder_output_units=encoder.output_units, + pretrained_embed=pretrained_decoder_embed, + num_langs=num_langs, + lang_embed_dim=args.decoder_lang_embed_dim, + ) + return cls(encoder, decoder) + + +class LSTMEncoder(FairseqEncoder): + """LSTM encoder.""" + + def __init__( + self, + dictionary, + embed_dim=512, + hidden_size=512, + num_layers=1, + dropout_in=0.1, + dropout_out=0.1, + bidirectional=False, + left_pad=True, + pretrained_embed=None, + padding_value=0.0, + fixed_embeddings=False, + ): + super().__init__(dictionary) + self.num_layers = num_layers + self.dropout_in = dropout_in + self.dropout_out = dropout_out + self.bidirectional = bidirectional + self.hidden_size = hidden_size + + num_embeddings = len(dictionary) + self.padding_idx = dictionary.pad() + if pretrained_embed is None: + self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) + else: + self.embed_tokens = pretrained_embed + if fixed_embeddings: + self.embed_tokens.weight.requires_grad = False + + self.lstm = LSTM( + input_size=embed_dim, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=self.dropout_out if num_layers > 1 else 0.0, + bidirectional=bidirectional, + ) + self.left_pad = left_pad + self.padding_value = padding_value + + self.output_units = hidden_size + if bidirectional: + self.output_units *= 2 + + def forward(self, src_tokens, src_lengths, dataset_name): + if self.left_pad: + # convert left-padding to right-padding + src_tokens = utils.convert_padding_direction( + src_tokens, + self.padding_idx, + left_to_right=True, + ) + + bsz, seqlen = src_tokens.size() + + # embed tokens + x = self.embed_tokens(src_tokens) + x = F.dropout(x, p=self.dropout_in, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # pack embedded source tokens into a PackedSequence + try: + packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist()) + except BaseException: + raise Exception(f"Packing failed in dataset {dataset_name}") + + # apply LSTM + if self.bidirectional: + state_size = 2 * self.num_layers, bsz, self.hidden_size + else: + state_size = self.num_layers, bsz, self.hidden_size + h0 = x.data.new(*state_size).zero_() + c0 = x.data.new(*state_size).zero_() + packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) + + # unpack outputs and apply dropout + x, _ = nn.utils.rnn.pad_packed_sequence( + packed_outs, padding_value=self.padding_value + ) + x = F.dropout(x, p=self.dropout_out, training=self.training) + assert list(x.size()) == [seqlen, bsz, self.output_units] + + if self.bidirectional: + + def combine_bidir(outs): + return torch.cat( + [ + torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view( + 1, bsz, self.output_units + ) + for i in range(self.num_layers) + ], + dim=0, + ) + + final_hiddens = combine_bidir(final_hiddens) + final_cells = combine_bidir(final_cells) + + encoder_padding_mask = src_tokens.eq(self.padding_idx).t() + + # Set padded outputs to -inf so they are not selected by max-pooling + padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1) + if padding_mask.any(): + x = x.float().masked_fill_(padding_mask, float("-inf")).type_as(x) + + # Build the sentence embedding by max-pooling over the encoder outputs + sentemb = x.max(dim=0)[0] + + return { + "sentemb": sentemb, + "encoder_out": (x, final_hiddens, final_cells), + "encoder_padding_mask": encoder_padding_mask + if encoder_padding_mask.any() + else None, + } + + def reorder_encoder_out(self, encoder_out_dict, new_order): + encoder_out_dict["sentemb"] = encoder_out_dict["sentemb"].index_select( + 0, new_order + ) + encoder_out_dict["encoder_out"] = tuple( + eo.index_select(1, new_order) for eo in encoder_out_dict["encoder_out"] + ) + if encoder_out_dict["encoder_padding_mask"] is not None: + encoder_out_dict["encoder_padding_mask"] = encoder_out_dict[ + "encoder_padding_mask" + ].index_select(1, new_order) + return encoder_out_dict + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return int(1e5) # an arbitrary large number + + +class LSTMDecoder(FairseqIncrementalDecoder): + """LSTM decoder.""" + + def __init__( + self, + dictionary, + embed_dim=512, + hidden_size=512, + out_embed_dim=512, + num_layers=1, + dropout_in=0.1, + dropout_out=0.1, + zero_init=False, + encoder_embed_dim=512, + encoder_output_units=512, + pretrained_embed=None, + num_langs=1, + lang_embed_dim=0, + ): + super().__init__(dictionary) + self.dropout_in = dropout_in + self.dropout_out = dropout_out + self.hidden_size = hidden_size + + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + if pretrained_embed is None: + self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + else: + self.embed_tokens = pretrained_embed + + self.layers = nn.ModuleList( + [ + LSTMCell( + input_size=encoder_output_units + embed_dim + lang_embed_dim + if layer == 0 + else hidden_size, + hidden_size=hidden_size, + ) + for layer in range(num_layers) + ] + ) + if hidden_size != out_embed_dim: + self.additional_fc = Linear(hidden_size, out_embed_dim) + self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) + + if zero_init: + self.sentemb2init = None + else: + self.sentemb2init = Linear( + encoder_output_units, 2 * num_layers * hidden_size + ) + + if lang_embed_dim == 0: + self.embed_lang = None + else: + self.embed_lang = nn.Embedding(num_langs, lang_embed_dim) + nn.init.uniform_(self.embed_lang.weight, -0.1, 0.1) + + def forward( + self, prev_output_tokens, encoder_out_dict, incremental_state=None, lang_id=0 + ): + sentemb = encoder_out_dict["sentemb"] + encoder_out = encoder_out_dict["encoder_out"] + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + bsz, seqlen = prev_output_tokens.size() + + # get outputs from encoder + encoder_outs, _, _ = encoder_out[:3] + srclen = encoder_outs.size(0) + + # embed tokens + x = self.embed_tokens(prev_output_tokens) + x = F.dropout(x, p=self.dropout_in, training=self.training) + + # embed language identifier + if self.embed_lang is not None: + lang_ids = prev_output_tokens.data.new_full((bsz,), lang_id) + langemb = self.embed_lang(lang_ids) + # TODO Should we dropout here??? + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # initialize previous states (or get from cache during incremental generation) + cached_state = utils.get_incremental_state( + self, incremental_state, "cached_state" + ) + if cached_state is not None: + prev_hiddens, prev_cells, input_feed = cached_state + else: + num_layers = len(self.layers) + if self.sentemb2init is None: + prev_hiddens = [ + x.data.new(bsz, self.hidden_size).zero_() for i in range(num_layers) + ] + prev_cells = [ + x.data.new(bsz, self.hidden_size).zero_() for i in range(num_layers) + ] + else: + init = self.sentemb2init(sentemb) + prev_hiddens = [ + init[:, (2 * i) * self.hidden_size : (2 * i + 1) * self.hidden_size] + for i in range(num_layers) + ] + prev_cells = [ + init[ + :, + (2 * i + 1) * self.hidden_size : (2 * i + 2) * self.hidden_size, + ] + for i in range(num_layers) + ] + input_feed = x.data.new(bsz, self.hidden_size).zero_() + + attn_scores = x.data.new(srclen, seqlen, bsz).zero_() + outs = [] + for j in range(seqlen): + if self.embed_lang is None: + input = torch.cat((x[j, :, :], sentemb), dim=1) + else: + input = torch.cat((x[j, :, :], sentemb, langemb), dim=1) + + for i, rnn in enumerate(self.layers): + # recurrent cell + hidden, cell = rnn(input, (prev_hiddens[i], prev_cells[i])) + + # hidden state becomes the input to the next layer + input = F.dropout(hidden, p=self.dropout_out, training=self.training) + + # save state for next time step + prev_hiddens[i] = hidden + prev_cells[i] = cell + + out = hidden + out = F.dropout(out, p=self.dropout_out, training=self.training) + + # input feeding + input_feed = out + + # save final output + outs.append(out) + + # cache previous states (no-op except during incremental generation) + utils.set_incremental_state( + self, + incremental_state, + "cached_state", + (prev_hiddens, prev_cells, input_feed), + ) + + # collect outputs across time steps + x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size) + + # T x B x C -> B x T x C + x = x.transpose(1, 0) + + # srclen x tgtlen x bsz -> bsz x tgtlen x srclen + attn_scores = attn_scores.transpose(0, 2) + + # project back to size of vocabulary + if hasattr(self, "additional_fc"): + x = self.additional_fc(x) + x = F.dropout(x, p=self.dropout_out, training=self.training) + x = self.fc_out(x) + + return x, attn_scores + + def reorder_incremental_state(self, incremental_state, new_order): + super().reorder_incremental_state(incremental_state, new_order) + cached_state = utils.get_incremental_state( + self, incremental_state, "cached_state" + ) + if cached_state is None: + return + + def reorder_state(state): + if isinstance(state, list): + return [reorder_state(state_i) for state_i in state] + return state.index_select(0, new_order) + + new_state = tuple(map(reorder_state, cached_state)) + utils.set_incremental_state(self, incremental_state, "cached_state", new_state) + + def max_positions(self): + """Maximum output length supported by the decoder.""" + return int(1e5) # an arbitrary large number + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.uniform_(m.weight, -0.1, 0.1) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def LSTM(input_size, hidden_size, **kwargs): + m = nn.LSTM(input_size, hidden_size, **kwargs) + for name, param in m.named_parameters(): + if "weight" in name or "bias" in name: + param.data.uniform_(-0.1, 0.1) + return m + + +def LSTMCell(input_size, hidden_size, **kwargs): + m = nn.LSTMCell(input_size, hidden_size, **kwargs) + for name, param in m.named_parameters(): + if "weight" in name or "bias" in name: + param.data.uniform_(-0.1, 0.1) + return m + + +def Linear(in_features, out_features, bias=True, dropout=0): + """Weight-normalized Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features, bias=bias) + m.weight.data.uniform_(-0.1, 0.1) + if bias: + m.bias.data.uniform_(-0.1, 0.1) + return m + + +@register_model_architecture("laser_lstm", "laser_lstm") +def base_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_hidden_size = getattr( + args, "encoder_hidden_size", args.encoder_embed_dim + ) + args.encoder_layers = getattr(args, "encoder_layers", 1) + args.encoder_bidirectional = getattr(args, "encoder_bidirectional", False) + args.encoder_dropout_in = getattr(args, "encoder_dropout_in", args.dropout) + args.encoder_dropout_out = getattr(args, "encoder_dropout_out", args.dropout) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_hidden_size = getattr( + args, "decoder_hidden_size", args.decoder_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 1) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout) + args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout) + args.decoder_zero_init = getattr(args, "decoder_zero_init", "0") + args.decoder_lang_embed_dim = getattr(args, "decoder_lang_embed_dim", 0) + args.fixed_embeddings = getattr(args, "fixed_embeddings", False) diff --git a/fairseq/examples/laser/laser_src/laser_task.py b/fairseq/examples/laser/laser_src/laser_task.py new file mode 100644 index 0000000..9bf2d7a --- /dev/null +++ b/fairseq/examples/laser/laser_src/laser_task.py @@ -0,0 +1,334 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from collections import OrderedDict, defaultdict +import json +import os +import logging +from argparse import ArgumentError + +from fairseq import options, models +from fairseq.data import ( + data_utils, + Dictionary, + LanguagePairDataset, + IndexedDataset, + FairseqDataset, +) +from .multitask_data_utils import ( + MultitaskDatasetWrapper, + MultidatasetEpochBatchIterator, +) + + +from fairseq.tasks import LegacyFairseqTask, register_task + +logger = logging.getLogger(__name__) + + +@register_task("laser") +class LaserTask(LegacyFairseqTask): + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "configfile", metavar="PATH", help="dataset configuration file in json" + ) + parser.add_argument( + "--weighting-alpha", + type=float, + default=None, + help="alpha for automatic weighting", + ) + parser.add_argument( + "--raw-text", action="store_true", help="load raw text dataset" + ) + parser.add_argument( + "--left-pad-source", + default="True", + type=str, + metavar="BOOL", + help="pad the source on the left (default: True)", + ) + parser.add_argument( + "--left-pad-target", + default="False", + type=str, + metavar="BOOL", + help="pad the target on the left (default: False)", + ) + try: + parser.add_argument( + "--max-source-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + except ArgumentError: + # this might have already been defined. Once we transition this to hydra it should be fine to add it here. + pass + + def __init__(self, args, config, src_dictionary, tgt_dictionary, num_tasks): + super().__init__(args) + self.config = config + self.src_dictionary = src_dictionary + self.tgt_dictionary = tgt_dictionary + self.num_tasks = num_tasks + + @classmethod + def setup_task(cls, args, **kwargs): + with open(args.configfile, "r") as f: + config = json.load(f) + num_tasks = max(dataset["id"] for dataset in config["train"]) + 1 + + args.left_pad_source = options.eval_bool(args.left_pad_source) + args.left_pad_target = options.eval_bool(args.left_pad_target) + + src_dictionary = Dictionary.load(config["src_vocab"]) + tgt_dictionary = Dictionary.load(config["tgt_vocab"]) + + logger.info( + "| src Dictionary {} : {} types".format( + config["src_vocab"], len(src_dictionary) + ) + ) + logger.info( + "| tgt Dictionary {} : {} types".format( + config["tgt_vocab"], len(tgt_dictionary) + ) + ) + + return cls(args, config, src_dictionary, tgt_dictionary, num_tasks) + + # Experimental overriding for backtranslation + def build_model(self, args, from_checkpoint=False): + model = models.build_model(args, self) + return model + + def dataset(self, split): + if split not in self.datasets: + raise KeyError("Dataset not loaded: " + split) + return self.datasets[split] + + def load_dataset(self, split, epoch=1, **kwargs): + """Load a dataset split.""" + + def indexed_dataset(path, dictionary): + if self.args.raw_text: + raise Exception("Unable to handle raw text.") + dataset = IndexedDataset(path, fix_lua_indexing=True) + + return dataset + + pair_datasets = OrderedDict() + + if split == "valid": + self.datasets[split] = pair_datasets + return + + if split not in self.config: + raise FileNotFoundError( + "Dataset not found in config file: {}".format(split) + ) + + size_by_corpus = defaultdict(int) + size_sum = 0 + size_sum_with_subsampling = 0 + init_pair_datasets = {} + + for dataset_config in self.config[split]: + src_path = os.path.dirname(dataset_config["src"]) + corpus_name = src_path.split("/")[-2] + language_pair_name = src_path.split("/")[-1] + pair_datasets_key = corpus_name + "-" + language_pair_name + + logger.info(f"loading... {pair_datasets_key}") + if "src" in dataset_config: + src_dataset = indexed_dataset( + dataset_config["src"], self.src_dictionary + ) + else: + src_dataset = None + + if "tgt" in dataset_config: + tgt_dataset = indexed_dataset( + dataset_config["tgt"], self.tgt_dictionary + ) + else: + tgt_dataset = None + + dataset = LanguagePairDataset( + src_dataset, + src_dataset.sizes, + self.src_dictionary, + tgt_dataset, + tgt_dataset.sizes, + self.tgt_dictionary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + ) + + if pair_datasets_key in init_pair_datasets: + logger.warning( + f"Ignoring already added {pair_datasets_key}. " + f"Consider using `sample` key in order to upsample." + ) + else: + init_pair_datasets[pair_datasets_key] = { + "dataset": dataset, + "sample": dataset_config.get("sample", None), + "id": dataset_config.get("id", None), + "len": len(dataset), + } + + length_sum = 0 + weighted_freqs_sum = 0 + freq_per_dataset = {} + vmax = 0 + vmin = 1 + weighted_freq_per_dataset = {} + + if self.args.weighting_alpha: + for key in init_pair_datasets: + if init_pair_datasets[key]["sample"] is None: + length_sum += len(init_pair_datasets[key]["dataset"]) + + for key in init_pair_datasets: + if init_pair_datasets[key]["sample"] is None: + val = float(init_pair_datasets[key]["len"]) / length_sum + freq_per_dataset[key] = val + weighted_freqs_sum += val ** self.args.weighting_alpha + + for key in freq_per_dataset: + val = ( + freq_per_dataset[key] ** self.args.weighting_alpha + / weighted_freqs_sum + ) + vmin = min(vmin, val) + vmax = max(vmax, val) + weighted_freq_per_dataset[key] = val + + for pair_datasets_key in init_pair_datasets: + dataset_config = init_pair_datasets[pair_datasets_key] + dataset = dataset_config["dataset"] + sample = dataset_config["sample"] + if sample is None: + sample = 1.0 + + if pair_datasets_key in weighted_freq_per_dataset: + w = vmax / weighted_freq_per_dataset[pair_datasets_key] + sample = w + + sample = round(sample) + + initial_sample = sample + initial_pair_datasets_key = pair_datasets_key + + while sample >= 1.0: + assert ( + pair_datasets_key not in pair_datasets + ), f"{pair_datasets_key} already in" + size_sum_with_subsampling += len(dataset) + pair_datasets[pair_datasets_key] = MultitaskDatasetWrapper( + dataset, dataset_config.get("id", 0), 1.0, name=pair_datasets_key + ) + size_sum += len(dataset) + sample -= 1.0 + pair_datasets_key += "-up" + + assert sample < 1e-6, f"sample remains > 0 {pair_datasets_key}" + + logger.info( + f"added pair {initial_pair_datasets_key} length {len(dataset)} new_length = {len(dataset)*initial_sample}" + ) + size_by_corpus[corpus_name] += len(dataset) + + self.datasets[split] = pair_datasets + logger.info( + f"Datasets number = {len(self.datasets[split])} size = {size_sum} size_sum_with_subsampling = {size_sum_with_subsampling}" + ) + + @property + def source_dictionary(self): + return self.src_dictionary + + @property + def target_dictionary(self): + return self.tgt_dictionary + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + **kwargs, + ): + + assert isinstance(dataset, OrderedDict) + assert len(dataset) + assert isinstance(dataset[next(iter(dataset))], FairseqDataset) + + # initialize the dataset with the correct starting epoch + for _, dt in dataset.items(): + dt.set_epoch(epoch) + + indices = OrderedDict() + batch_sampler = OrderedDict() + + with data_utils.numpy_seed(seed + epoch): + for key, dt in dataset.items(): + logger.info(f"\t ordered_indices {key}") + indices[key] = dt.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + for key, dt in dataset.items(): + logger.info(f"\t filter_by_size {key}") + indices[key], ignored = dt.filter_indices_by_size( + indices[key], max_positions + ) + + for key, dt in dataset.items(): + logger.info(f"\t batch_by_size {key}") + batch_sampler[key] = data_utils.batch_by_size( + indices[key], + dt.num_tokens, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + + epoch_iter = MultidatasetEpochBatchIterator( + dataset=dataset, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + ) + + return epoch_iter diff --git a/fairseq/examples/laser/laser_src/laser_transformer.py b/fairseq/examples/laser/laser_src/laser_transformer.py new file mode 100644 index 0000000..0be0309 --- /dev/null +++ b/fairseq/examples/laser/laser_src/laser_transformer.py @@ -0,0 +1,354 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +from typing import Any, Dict, List, Optional +from torch import Tensor + +import torch +import torch.nn as nn + +from fairseq.models import ( + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import ( + base_architecture, + Embedding, + TransformerModel, + TransformerEncoder, + TransformerDecoder, +) +from fairseq.modules import ( + TransformerDecoderLayer, +) + +logger = logging.getLogger(__name__) + + +@register_model("laser_transformer") +class LaserTransformerModel(FairseqEncoderDecoderModel): + """Train Transformer for LASER task + + Requires --task laser + """ + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens=None, + tgt_tokens=None, + tgt_lengths=None, + target_language_id=-1, + dataset_name="", + ): + laser_encoder_out = self.encoder(src_tokens, src_lengths) + return self.decoder( + prev_output_tokens, laser_encoder_out, lang_id=target_language_id + ) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + TransformerModel.add_args(parser) + parser.add_argument( + "--decoder-lang-embed-dim", + type=int, + metavar="N", + help="decoder language embedding dimension", + ) + + @classmethod + def build_model(cls, args, task): + base_laser_transformer_architecture(args) + + num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0 + + def load_embed_tokens(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + + return Embedding(num_embeddings, embed_dim, padding_idx) + + encoder_embed_tokens = load_embed_tokens( + task.source_dictionary, args.encoder_embed_dim + ) + decoder_embed_tokens = load_embed_tokens( + task.target_dictionary, args.decoder_embed_dim + ) + num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0 + + encoder = LaserTransformerEncoder( + args, task.source_dictionary, encoder_embed_tokens + ) + + decoder = LaserTransformerDecoder( + args, + task.target_dictionary, + decoder_embed_tokens, + num_langs=num_langs, + lang_embed_dim=args.decoder_lang_embed_dim, + ) + + return cls(encoder, decoder) + + +class LaserTransformerEncoder(TransformerEncoder): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, src_tokens, *args, **kwargs): + encoder_out = super().forward(src_tokens, *args, **kwargs) + + x = encoder_out["encoder_out"][0] # T x B x C + padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1) + + if padding_mask.any(): + x = x.float().masked_fill_(padding_mask, float("-inf")).type_as(x) + + # Build the sentence embedding by max-pooling over the encoder outputs + sentemb = x.max(dim=0)[0] + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `foward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + return {"sentemb": [sentemb]} # B x C + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Same as the one in transformer.py, with new_sentemb + """ + if len(encoder_out["sentemb"]) == 0: + new_sentemb = [] + else: + new_sentemb = [encoder_out["sentemb"][0].index_select(0, new_order)] + + return { + "sentemb": new_sentemb, # B x C + } + + +class LaserTransformerDecoder(TransformerDecoder): + def __init__(self, args, dictionary, *kargs, **kwargs): + self.num_langs = kwargs.get("num_langs", 1) + self.lang_embed_dim = kwargs.get("lang_embed_dim", 0) + kwargs.pop("num_langs", None) + kwargs.pop("lang_embed_dim", None) + + super().__init__(args, dictionary, *kargs, **kwargs, no_encoder_attn=True) + + if self.lang_embed_dim == 0: + self.embed_lang = None + else: + self.embed_lang = nn.Embedding(self.num_langs, self.lang_embed_dim) + nn.init.uniform_(self.embed_lang.weight, -0.1, 0.1) + + if self.output_projection is not None: + laser_output_embed_dim = ( + self.output_embed_dim + self.lang_embed_dim + args.encoder_embed_dim + ) + self.output_projection = nn.Linear( + laser_output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, + mean=0, + std=laser_output_embed_dim ** -0.5, + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + decoder_embed_dim = args.decoder_embed_dim + args.decoder_embed_dim = ( + decoder_embed_dim + self.lang_embed_dim + args.encoder_embed_dim + ) + res = TransformerDecoderLayer(args, no_encoder_attn=True) + args.decoder_embed_dim = decoder_embed_dim + + return res + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + lang_id: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + # embed positions + positions = ( + self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + bsz, seqlen = prev_output_tokens.size() + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.embed_lang is not None: + lang_ids = prev_output_tokens.data.new_full((bsz,), lang_id) + langemb = self.embed_lang(lang_ids) + langemb = langemb.unsqueeze(0) + repeat_vals = [x.shape[0] // langemb.shape[0]] + [-1] * ( + len(langemb.shape) - 1 + ) + x = torch.cat((x, langemb.expand(*repeat_vals)), dim=-1) + + sentemb = encoder_out["sentemb"][0] + sentemb = sentemb.unsqueeze(0) + + repeat_vals = [x.shape[0] // sentemb.shape[0]] + [-1] * (len(sentemb.shape) - 1) + x = torch.cat((x, sentemb.expand(*repeat_vals)), dim=-1) + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + None, + None, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + lang_id: Optional[int] = None, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + assert lang_id is not None + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + lang_id=lang_id, + ) + if not features_only: + x = self.output_layer(x) + return x, extra + + +@register_model_architecture("laser_transformer", "laser_transformer") +def base_laser_transformer_architecture(args): + base_architecture(args) + args.decoder_lang_embed_dim = getattr(args, "decoder_lang_embed_dim", 0) diff --git a/fairseq/examples/laser/laser_src/multitask_data_utils.py b/fairseq/examples/laser/laser_src/multitask_data_utils.py new file mode 100644 index 0000000..b05caea --- /dev/null +++ b/fairseq/examples/laser/laser_src/multitask_data_utils.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import OrderedDict + +import numpy as np + +from fairseq.data import BaseWrapperDataset, FairseqDataset, iterators + + +class MultiItr(object): + def __init__(self, itr): + self.itr = itr + self._counts = [0 for x in itr] + + def __len__(self): + return sum(len(itr) for itr in self.itr) + + def __iter__(self): + return self + + def __next__(self): + ratios = [count / len(itr) for count, itr in zip(self._counts, self.itr)] + idx = ratios.index(min(ratios)) + self._counts[idx] += 1 + return next(self.itr[idx]) + + +class MultidatasetEpochBatchIterator(iterators.EpochBatchIterating): + """A wrapper around multiple epoch batch iterators.""" + + def __init__( + self, + dataset, + batch_sampler, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + ): + + assert isinstance(dataset, OrderedDict) + assert len(dataset) + assert isinstance(dataset[next(iter(dataset))], FairseqDataset) + + self.iterators = [] + + self.epoch = epoch + for key, dt in dataset.items(): + epoch_iter = iterators.EpochBatchIterator( + dataset=dt, + collate_fn=dt.collater, + batch_sampler=batch_sampler[key], + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=0, + epoch=epoch, + ) + self.iterators.append(epoch_iter) + + def __len__(self): + return sum(len(itr) for itr in self.iterators) + + def next_epoch_itr(self, shuffle=True, fix_batches_to_gpus=False): + # `self.epoch += 1` should be handled by underlying `EpochBatchIterator`s. + return MultiItr( + [ + itr.next_epoch_itr( + shuffle=shuffle, fix_batches_to_gpus=fix_batches_to_gpus + ) + for itr in self.iterators + ] + ) + + def end_of_epoch(self): + return all(itr.end_of_epoch() for itr in self.iterators) + + @property + def next_epoch_idx(self): + """Return the epoch index after *next_epoch_itr* is called.""" + + epochs = [itr.next_epoch_idx for itr in self.iterators] + self.epoch = epochs[0] + assert all(epoch == self.epoch for epoch in epochs) + + return self.epoch + + @property + def iterations_in_epoch(self): + return sum(itr.iterations_in_epoch for itr in self.iterators) + + def state_dict(self): + return { + "iterators": [it.state_dict() for it in self.iterators], + "epoch": self.epoch, + } + + def load_state_dict(self, state_dict): + self.epoch = state_dict["epoch"] + for it, d in zip(self.iterators, state_dict["iterators"]): + it.load_state_dict(d) + + +class MultitaskDatasetWrapper(BaseWrapperDataset): + """A wrapper for a multitask dataset.""" + + def __init__(self, dataset, target_language_id, sample=1.0, name=""): + super().__init__(dataset) + self.target_language_id = target_language_id + self.sample = sample + self.name = name + + def collater(self, *args, **kwargs): + ans = self.dataset.collater(*args, **kwargs) + if "net_input" in ans: + ans["net_input"]["target_language_id"] = self.target_language_id + ans["net_input"]["dataset_name"] = self.name + return ans + + def num_tokens(self, *args, **kwargs): + return self.dataset.num_tokens(*args, **kwargs) + + def ordered_indices(self, *args, **kwargs): + indices = self.dataset.ordered_indices(*args, **kwargs) + # Hacky solution for sampling + size = int(self.sample * indices.shape[0]) + + return indices.take(np.sort(np.random.permutation(indices.shape[0])[:size])) + + def size(self, index: int): + return self.dataset.size(index) + + @property + def supports_prefetch(self): + """Whether this dataset supports prefetching.""" + return getattr(self.dataset, "supports_prefetch", False) + + def prefetch(self, indices): + return self.dataset.prefetch(indices) diff --git a/fairseq/examples/latent_depth/README.md b/fairseq/examples/latent_depth/README.md new file mode 100644 index 0000000..7774c33 --- /dev/null +++ b/fairseq/examples/latent_depth/README.md @@ -0,0 +1,77 @@ +# Deep Transformers with Latent Depth (Li et al., 2020) + +[https://arxiv.org/abs/2009.13102](https://arxiv.org/abs/2009.13102). + +## Introduction + +We present a probabilistic framework to automatically learn which layer(s) to use by learning the posterior distributions of layer selection. As an extension of this framework, we propose a novel method to train one shared Transformer network for multilingual machine translation with different layer selection posteriors for each language pair. + +## Training a multilingual model with latent depth + +Below is an example of training with latent depth in decoder for one-to-many (O2M) related languages. We use the same preprocessed (numberized and binarized) TED8 dataset as in [Balancing Training for Multilingual Neural Machine Translation (Wang et al., 2020)](https://github.com/cindyxinyiwang/multiDDS), which could be generated by [the script](https://github.com/cindyxinyiwang/multiDDS/blob/multiDDS/util_scripts/prepare_multilingual_data.sh) the author provided. +```bash +lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur" +databin_dir= + +fairseq-train ${databin_dir} \ + --user-dir examples/latent_depth/latent_depth_src \ + --lang-pairs "${lang_pairs_str}" \ + --arch multilingual_transformer_iwslt_de_en \ + --task multilingual_translation_latent_depth \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --share-encoders \ + --share-decoders \ + --decoder-langtok \ + --share-decoder-input-output-embed \ + --dropout 0.3 --attention-dropout 0.3 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt --stop-min-lr 1e-9 --warmup-init-lr 1e-7 --warmup-updates 8000 \ + --max-tokens 4096 --update-freq 1 \ + --lr 0.0015 \ + --clip-norm 1.0 \ + --seed 2 \ + --ddp-backend=legacy_ddp \ + --encoder-layers 12 \ + --decoder-layers 24 \ + --decoder-latent-layer \ + --sparsity-weight 0.1 \ + --anneal-updates 5000 \ + --soft-update 500 \ + --target-layers 12 \ + --share-weight 0.1 +``` +## Inference command + +```bash +lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur" +databin_dir= +model_path= +src_lang= +tgt_lang= +gen_data= + +fairseq-generate ${databin_dir} \ + --path ${model_path} \ + --task multilingual_translation_latent_depth \ + --decoder-latent-layer \ + --lang-pairs "${lang_pairs_str}" \ + -s ${src_lang} -t ${tgt_lang} \ + --gen-subset $gen_data \ + --scoring sacrebleu \ + --remove-bpe 'sentencepiece' \ + --lenpen 1.0 \ + --beam 5 \ + --decoder-langtok \ + --max-tokens 4096 +``` + + +## Citation +```bibtex +@article{li2020deep, + title={Deep Transformers with Latent Depth}, + author={Li, Xian and Stickland, Asa Cooper and Tang, Yuqing and Kong, Xiang}, + journal={arXiv preprint arXiv:2009.13102}, + year={2020} +} +``` diff --git a/fairseq/examples/latent_depth/latent_depth_src/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/__init__.py new file mode 100644 index 0000000..c5fa760 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import multilingual_translation_latent_depth # noqa +from .loss import latent_depth # noqa +from .models import latent_multilingual_transformer # noqa +from .modules import latent_layers # noqa diff --git a/fairseq/examples/latent_depth/latent_depth_src/loss/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/loss/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py b/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py new file mode 100644 index 0000000..a3b9535 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py @@ -0,0 +1,99 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +from torch.nn.modules.loss import _Loss + + +class LatentLayersKLLoss(_Loss): + def __init__(self, args): + super().__init__() + self.args = args + + def forward(self, layer_samples, lang_idx, update_num, sample_size): + prior = self.args.prior + samples = layer_samples[lang_idx] + eps = 1e-7 + if prior == "uniform": + # uniform prior + kl_loss = (samples * (torch.log(samples + eps) - math.log(0.5))).sum(-1) + elif prior == "agged_posterior": + # aggregated posterior + y_t = torch.stack([x.detach() for x in layer_samples], dim=0) + agged_q = torch.sum(y_t, dim=0) + row_norm = agged_q.sum(-1) + normed_agg_q = agged_q / row_norm + kl_loss = ( + samples * (torch.log(samples + eps) - torch.log(normed_agg_q + eps)) + ).sum(-1) + else: + raise NotImplementedError("The specified prior is not implemented.") + + # normalized by number of layers + kl_loss /= layer_samples[0].size()[0] + kl_weight = min( + self.args.sparsity_weight, + (update_num - self.args.soft_update) + * self.args.sparsity_weight + / self.args.anneal_updates, + ) + kl_loss *= kl_weight * sample_size + return kl_loss + + +class LatentLayersSparsityLoss(_Loss): + def __init__(self, args): + super().__init__() + self.args = args + + def is_valid(self, update_num): + if self.args.target_layers <= 0: + return False + return update_num > (self.args.soft_update + self.args.anneal_updates) + + def forward(self, layer_samples_list, update_num, sample_size): + batch_loss = 0 + share_loss = 0 + global_sparsity_loss = 0 + layer_samples = torch.stack(layer_samples_list, dim=0) + if ( + self.args.target_layers > 0 or self.args.share_weight > 0 + ) and update_num > (self.args.soft_update + self.args.anneal_updates): + # anneal sparsity weight + if update_num < (self.args.anneal_updates + self.args.soft_update): + weight_anneal = 0 + elif update_num < (2 * self.args.anneal_updates + self.args.soft_update): + weight_anneal = ( + (update_num - self.args.soft_update - self.args.anneal_updates) + * self.args.share_weight + / self.args.anneal_updates + ) + else: + weight_anneal = 1 + # compute ratio among languages + layer_utilization = torch.sum(layer_samples, dim=0) + layer_utilization /= layer_samples.size()[0] + if self.args.share_weight > 0: + # encouraging sharing across languages + share_loss = sum( + -1.0 * v * math.log(v) for v in layer_utilization if v > 0 + ) + batch_loss += ( + weight_anneal * self.args.share_weight * sample_size * share_loss + ) + if self.args.target_layers > 0: + # computed expected number of layers selected + expeted_layers = sum(layer_utilization) + # compute l2 loss wrt target number of layers + global_sparsity_loss = (expeted_layers - self.args.target_layers) ** 2 + batch_loss += ( + weight_anneal + * self.args.share_weight + * sample_size + * global_sparsity_loss + ) + return batch_loss diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py b/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py new file mode 100644 index 0000000..9e7b655 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py @@ -0,0 +1,76 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.models import register_model, register_model_architecture +from fairseq.models.multilingual_transformer import MultilingualTransformerModel +from fairseq.models.transformer import ( + TransformerDecoder, + TransformerEncoder, + base_architecture, +) +from fairseq.utils import safe_hasattr + +from .latent_transformer import LatentTransformerDecoder, LatentTransformerEncoder + + +@register_model("latent_multilingual_transformer") +class LatentMultilingualTransformerModel(MultilingualTransformerModel): + """A variant of standard multilingual Transformer models which encoder and/or + decoders supports latent depth, as is in "Deep Transformer with Latent Depth" + (https://arxiv.org/abs/2009.13102). + """ + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + MultilingualTransformerModel.add_args(parser) + parser.add_argument( + '--soft-select', + action='store_true', + help='use soft samples in training an inference', + ) + parser.add_argument( + '--sampling-tau', + type=float, + default=5., + help='sampling temperature', + ) + + @classmethod + def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs): + if is_encoder: + if safe_hasattr(args, "encoder_latent_layer") and args.encoder_latent_layer: + return LatentTransformerEncoder( + args, lang_dict, embed_tokens, num_logits=len(langs) + ) + else: + return TransformerEncoder(args, lang_dict, embed_tokens) + else: + if safe_hasattr(args, "decoder_latent_layer") and args.decoder_latent_layer: + return LatentTransformerDecoder( + args, lang_dict, embed_tokens, num_logits=len(langs) + ) + else: + return TransformerDecoder(args, lang_dict, embed_tokens) + + +@register_model_architecture( + "latent_multilingual_transformer", "latent_multilingual_transformer" +) +def latent_multilingual_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 24) + args.share_encoders = getattr(args, "share_encoders", True) + args.share_decoders = getattr(args, "share_decoders", True) + args.share_encoder_embeddings = getattr(args, "share_encoder_embeddings", True) + args.share_decoder_embeddings = getattr(args, "share_decoder_embeddings", True) + + base_architecture(args) diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py b/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py new file mode 100644 index 0000000..6a82530 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py @@ -0,0 +1,156 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, Optional + +import torch.nn as nn +from fairseq.models.fairseq_encoder import EncoderOut +from fairseq.models.transformer import TransformerDecoder, TransformerEncoder +from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer +from torch import Tensor + +from ..modules.latent_layers import LayerSelect + + +class LatentTransformerEncoder(TransformerEncoder): + """Latent depth (https://arxiv.org/abs/2009.13102) implemented in + TransformerEncoder. + """ + + def __init__(self, args, dictionary, embed_tokens, num_logits=1): + self.num_logits = num_logits + self.num_layers = args.encoder_layers + super().__init__(args, dictionary, embed_tokens) + self.layer_select = LayerSelect( + num_layers=self.num_layers, + num_logits=self.num_logits, + soft_select=getattr(args, "soft_select", False), + sampling_tau=getattr(args, "sampling_tau", 5.), + ) + self.lang_idx = None + self.layers = nn.ModuleList( + [self._build_encoder_layer(args, idx) for idx in range(args.encoder_layers)] + ) + + def set_lang_idx(self, lang_idx): + self.lang_idx = lang_idx + + def _build_encoder_layer(self, args, idx=None): + return LatentTransformerEncoderLayer(args, idx, layer_select=self.layer_select) + + def forward(self, src_tokens, src_lengths, return_all_hiddens: bool = False): + self.layer_select.sample(self.lang_idx) + return super().forward(src_tokens, src_lengths, return_all_hiddens) + + +class LatentTransformerEncoderLayer(TransformerEncoderLayer): + """Encoder layer with each (non_residual) block weighted by samples of Bernouli + or Gumbel Signmoid samples. + + Args: + args (argparse.Namespace): parsed command-line arguments from standard + TransformerEncoderLayer. + idx (int): layer index (used to retrieve samples). + layer_select (LayerSelect, optional): instance of LayerSelect module with logits + parameters and sampling method. + """ + + def __init__(self, args, idx, layer_select=None): + super().__init__(args) + self.idx = idx + self.layer_select = layer_select + + def residual_connection(self, x, residual): + return residual + x * self.layer_select(self.idx) + + +class LatentTransformerDecoder(TransformerDecoder): + """Latent depth (https://arxiv.org/abs/2009.13102) implemented in + TransformerDecoder. + """ + + def __init__( + self, args, dictionary, embed_tokens, no_encoder_attn=False, num_logits=1 + ): + self.num_logits = num_logits + self.num_layers = args.decoder_layers + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn + ) + self.layer_select = LayerSelect( + num_layers=self.num_layers, + num_logits=self.num_logits, + soft_select=getattr(args, "soft_select", False), + sampling_tau=getattr(args, "sampling_tau", 5.), + ) + self.lang_idx = None + self.layers = nn.ModuleList( + [ + self._build_decoder_layer(args, no_encoder_attn, idx) + for idx in range(args.decoder_layers) + ] + ) + + def set_lang_idx(self, lang_idx): + self.lang_idx = lang_idx + + def _build_decoder_layer(self, args, no_encoder_attn=False, idx=None): + return LatentTransformerDecoderLayer( + args, idx, layer_select=self.layer_select, no_encoder_attn=no_encoder_attn + ) + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[EncoderOut] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + self.layer_select.sample(self.lang_idx) + return super().forward( + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + features_only=features_only, + alignment_layer=alignment_layer, + src_lengths=src_lengths, + return_all_hiddens=return_all_hiddens, + ) + + +class LatentTransformerDecoderLayer(TransformerDecoderLayer): + """Decoder layer with each (non_residual) block weighted by samples of Bernouli + or Gumbel Signmoid samples. + + Args: + args (argparse.Namespace): parsed command-line arguments from standard + TransformerDecoderLayer. + idx (int): layer index (used to retrieve samples). + layer_select (LayerSelect, optional): instance of LayerSelect module with logits + parameters and sampling method. + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + + """ + + def __init__( + self, + args, + idx, + layer_select=None, + no_encoder_attn=False, + add_bias_kv=False, + add_zero_attn=False, + ): + super().__init__(args, no_encoder_attn, add_bias_kv, add_zero_attn) + self.idx = idx + self.layer_select = layer_select + + def residual_connection(self, x, residual): + return residual + x * self.layer_select(self.idx) diff --git a/fairseq/examples/latent_depth/latent_depth_src/modules/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py b/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py new file mode 100644 index 0000000..2be05d5 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py @@ -0,0 +1,75 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn + + +class LayerSelect(nn.Module): + """Compute samples (from a Gumbel-Sigmoid distribution) which is used as + either (soft) weighting or (hard) selection of residual connection. + https://arxiv.org/abs/2009.13102 + """ + def __init__(self, num_layers, num_logits, soft_select=False, sampling_tau=5.): + super(LayerSelect, self).__init__() + self.layer_logits = torch.nn.Parameter( + torch.Tensor(num_logits, num_layers), + requires_grad=True, + ) + self.hard_select = not soft_select + self.tau = sampling_tau + self.detach_grad = False + self.layer_samples = [None] * num_logits + + def sample(self, logit_idx): + """To leverage the efficiency of distributed training, samples for all + layers are computed at once for each logit_idx. Logits are parameters + learnt independent of each other. + + Args: + logit_idx: The index of logit parameters used for sampling. + """ + assert logit_idx is not None + self.samples = self._gumbel_sigmoid( + self.layer_logits[logit_idx, :].detach() + if self.detach_grad + else self.layer_logits[logit_idx, :], + dim=-1, + tau=self.tau, + hard=self.hard_select, + ) + self.layer_samples[logit_idx] = self.samples + + def forward(self, i): + sample = self.samples[i] + return sample + + def _gumbel_sigmoid( + self, logits, tau=1, hard=False, eps=1e-10, dim=-1, threshold=0.5 + ): + # ~Gumbel(0,1) + gumbels1 = ( + -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format) + .exponential_() + .log() + ) + gumbels2 = ( + -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format) + .exponential_() + .log() + ) + # Difference of two gumbels because we apply a sigmoid + gumbels1 = (logits + gumbels1 - gumbels2) / tau + y_soft = gumbels1.sigmoid() + if hard: + # Straight through. + y_hard = torch.zeros_like( + logits, memory_format=torch.legacy_contiguous_format + ).masked_fill(y_soft > threshold, 1.0) + ret = y_hard - y_soft.detach() + y_soft + else: + # Reparametrization trick. + ret = y_soft + return ret diff --git a/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py b/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py new file mode 100644 index 0000000..8cc2a71 --- /dev/null +++ b/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py @@ -0,0 +1,195 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.tasks import register_task +from fairseq.tasks.multilingual_translation import MultilingualTranslationTask +from fairseq.utils import safe_hasattr + +from .loss.latent_depth import LatentLayersKLLoss, LatentLayersSparsityLoss + + +@register_task("multilingual_translation_latent_depth") +class MultilingualTranslationTaskLatentDepth(MultilingualTranslationTask): + """A task for multiple translation with latent depth. + + See `"Deep Transformer with Latent Depth" + (Li et al., 2020) `_. + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + MultilingualTranslationTask.add_args(parser) + parser.add_argument('--encoder-latent-layer', action='store_true', help='latent layer selection in encoder') + parser.add_argument('--decoder-latent-layer', action='store_true', help='latent layer selection in decoder') + parser.add_argument('--target-layers', default=-1, type=int, + help='number of effective layers to learn; -1 means no constraint') + parser.add_argument('--sparsity-weight', default=0.0, type=float, + help='weight for sparsity loss') + parser.add_argument('--share-weight', default=0.0, type=float, + help='weight for sharing loss') + parser.add_argument('--soft-update', default=1, type=int, + help='number of updates with soft sampling') + parser.add_argument('--anneal-updates', default=1, type=int, + help='number of updates to anneal the KL loss weight') + parser.add_argument('--prior', default="uniform", type=str, + help='prior used for computing KL loss') + # fmt: on + + def __init__(self, args, dicts, training): + super().__init__(args, dicts, training) + self.src_langs, self.tgt_langs = zip( + *[(lang.split("-")[0], lang.split("-")[1]) for lang in args.lang_pairs] + ) + if self.training and self.encoder_latent_layer: + assert self.args.share_encoders + if self.training and self.decoder_latent_layer: + assert self.args.share_decoders + if training or self.encoder_latent_layer or self.decoder_latent_layer: + self.lang_pairs = args.lang_pairs + else: + self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)] + self.eval_lang_pairs = self.lang_pairs + self.model_lang_pairs = self.lang_pairs + if self.training and (self.encoder_latent_layer or self.decoder_latent_layer): + self.kl_loss = LatentLayersKLLoss(self.args) + self.sparsity_loss = LatentLayersSparsityLoss(self.args) + + def _per_lang_pair_train_loss( + self, lang_pair, model, update_num, criterion, sample, optimizer, ignore_grad + ): + src, tgt = lang_pair.split("-") + if self.encoder_latent_layer: + src_lang_idx = self.src_lang_idx_dict[src] + model.models[lang_pair].encoder.set_lang_idx(src_lang_idx) + model.models[lang_pair].encoder.layer_select.hard_select = ( + update_num > self.args.soft_update + ) + if self.decoder_latent_layer: + tgt_lang_idx = self.tgt_lang_idx_dict[tgt] + model.models[lang_pair].decoder.set_lang_idx(tgt_lang_idx) + model.models[lang_pair].decoder.layer_select.hard_select = ( + update_num > self.args.soft_update + ) + + loss, sample_size, logging_output = criterion( + model.models[lang_pair], sample[lang_pair] + ) + if self.encoder_latent_layer: + none_samples = sum( + 1 if x is None else 0 + for x in model.models[lang_pair].encoder.layer_select.layer_samples + ) + if none_samples == 0 or self.args.prior != "agged_posterior": + loss += self.kl_loss( + model.models[lang_pair].encoder.layer_select.layer_samples, + src_lang_idx, + update_num, + sample_size, + ) + if self.decoder_latent_layer: + none_samples = sum( + 1 if x is None else 0 + for x in model.models[lang_pair].decoder.layer_select.layer_samples + ) + if none_samples == 0 or self.args.prior != "agged_posterior": + loss += self.kl_loss( + model.models[lang_pair].decoder.layer_select.layer_samples, + tgt_lang_idx, + update_num, + sample_size, + ) + if ignore_grad: + loss *= 0 + + if hasattr(self, "sparsity_loss") and self.sparsity_loss.is_valid(update_num): + # need to retain the graph if sparsity loss needs to be added + loss.backward(retain_graph=True) + else: + optimizer.backward(loss) + + return loss, sample_size, logging_output + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + agg_loss, agg_sample_size, agg_logging_output = super().train_step( + sample, model, criterion, optimizer, update_num, ignore_grad + ) + # compute auxiliary loss from layere sparsity, based on all samples from all languages + if hasattr(self, "sparsity_loss") and self.sparsity_loss.is_valid(update_num): + sparsity_loss = 0 + if self.encoder_latent_layer: + sparsity_loss += self.sparsity_loss( + next( + iter(model.models.values()) + ).encoder.layer_select.layer_samples, + update_num, + agg_sample_size, + ) + if self.decoder_latent_layer: + sparsity_loss += self.sparsity_loss( + next( + iter(model.models.values()) + ).decoder.layer_select.layer_samples, + update_num, + agg_sample_size, + ) + if sparsity_loss > 0: + optimizer.backward(sparsity_loss) + return agg_loss, agg_sample_size, agg_logging_output + + def _per_lang_pair_valid_loss(self, lang_pair, model, criterion, sample): + src, tgt = lang_pair.split("-") + if self.encoder_latent_layer: + src_lang_idx = self.src_lang_idx_dict[src] + model.models[lang_pair].encoder.set_lang_idx(src_lang_idx) + if self.decoder_latent_layer: + tgt_lang_idx = self.tgt_lang_idx_dict[tgt] + model.models[lang_pair].decoder.set_lang_idx(tgt_lang_idx) + loss, sample_size, logging_output = criterion( + model.models[lang_pair], sample[lang_pair] + ) + return loss, sample_size, logging_output + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + if self.encoder_latent_layer or self.decoder_latent_layer: + for model in models: + if self.encoder_latent_layer: + assert model.encoder.layer_select is not None + src_lang_idx = self.src_lang_idx_dict[self.args.source_lang] + model.encoder.set_lang_idx(src_lang_idx) + if self.decoder_latent_layer: + assert model.decoder.layer_select is not None + tgt_lang_idx = self.tgt_lang_idx_dict[self.args.target_lang] + model.decoder.set_lang_idx(tgt_lang_idx) + return super().inference_step( + generator, models, sample, prefix_tokens, constraints + ) + + @property + def encoder_latent_layer(self): + return ( + safe_hasattr(self.args, "encoder_latent_layer") + and self.args.encoder_latent_layer + ) + + @property + def decoder_latent_layer(self): + return ( + safe_hasattr(self.args, "decoder_latent_layer") + and self.args.decoder_latent_layer + ) + + @property + def src_lang_idx_dict(self): + return {lang: lang_idx for lang_idx, lang in enumerate(self.src_langs)} + + @property + def tgt_lang_idx_dict(self): + return {lang: lang_idx for lang_idx, lang in enumerate(self.tgt_langs)} diff --git a/fairseq/examples/layerdrop/README.md b/fairseq/examples/layerdrop/README.md new file mode 100644 index 0000000..4d48ee9 --- /dev/null +++ b/fairseq/examples/layerdrop/README.md @@ -0,0 +1,154 @@ +# Reducing Transformer Depth on Demand with Structured Dropout (Fan et al., 2019) +This page contains information for how to train models with LayerDrop, based on this [paper](https://arxiv.org/abs/1909.11556). + +## Citation: +If you found this technique useful, please cite our paper: +```bibtex +@article{fan2019reducing, + title={Reducing Transformer Depth on Demand with Structured Dropout}, + author={Fan, Angela and Grave, Edouard and Joulin, Armand}, + journal={arXiv preprint arXiv:1909.11556}, + year={2019} +} +``` + +## Pre-trained models + +Model | Description | Download +---|---|--- +`layerdrop_wmt_en_de_12_6` | Transformer + LayerDrop 0.2 trained on WMT16 en-de with 12 encoder and 6 decoder layers | [layerdrop_wmt_en_de_12_6.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/layerdrop_wmt_en_de_12_6.tar.gz) +`roberta_layerdrop.base` | RoBERTa Base + LayerDrop 0.2 | [roberta_layerdrop.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.base.qnli.tar.gz) +`roberta_layerdrop.large` | RoBERTa Large + LayerDrop 0.2 | [roberta_layerdrop.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.tar.gz) +`roberta_layerdrop.large.mnli` | `roberta_layerdrop.large` finetuned on [MNLI](http://www.nyu.edu/projects/bowman/multinli) | [roberta_layerdrop.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.mnli.tar.gz) +`roberta_layerdrop.large.qnli` | `roberta_layerdrop.large` finetuned on [QNLI](https://arxiv.org/abs/1804.07461) | [roberta_layerdrop.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.qnli.tar.gz) + + +Evaluate performance of these pre-trained models: +```bash +# Example for Machine Translation +fairseq-generate /path/to/bped/wmt/data --path nmt_checkpoint.pt \ + --beam 8 --lenpen 0.4 \ + --batch-size 64 \ + --remove-bpe \ + --gen-subset test > wmt16_gen.txt +bash scripts/compound_split_bleu.sh wmt16_gen.txt +# prints BLEU4 = 30.17 +``` + +```python +# Example for RoBERTa + LayerDrop finetuned on MNLI: +from fairseq.models.roberta import RobertaModel + +roberta_layerdrop = RobertaModel.from_pretrained( + '/path/to/MNLI/model', + checkpoint_file='mnli_checkpoint.pt', + data_name_or_path='/path/to/MNLI/data/MNLI-bin' +) +label_map = {0: 'contradiction', 2: 'neutral', 1: 'entailment'} +ncorrect, nsamples = 0, 0 +roberta_layerdrop.cuda() +roberta_layerdrop.eval() +with open('/path/to/MNLI/data/dev_matched.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[8], tokens[9], tokens[-1] + tokens = roberta_layerdrop.encode(sent1, sent2) + prediction = roberta_layerdrop.predict('sentence_classification_head', tokens).argmax().item() + prediction_label = label_map[prediction] + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) +# prints | Accuracy: 0.9026999490575649 + + +# Example for RoBERTa + LayerDrop finetuned on QNLI: +roberta = RobertaModel.from_pretrained( + '/path/to/QNLI/model', + checkpoint_file='qnli_checkpoint.pt', + data_name_or_path='/path/to/QNLI/data/QNLI-bin' +) + +label_fn = lambda label: roberta.task.label_dictionary.string( + [label + roberta.task.target_dictionary.nspecial] +) +ncorrect, nsamples = 0, 0 +roberta.cuda() +roberta.eval() +with open('/path/to/QNLI/data/dev.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[1], tokens[2], tokens[3] + tokens = roberta.encode(sent1, sent2) + prediction = roberta.predict('sentence_classification_head', tokens).argmax().item() + prediction_label = label_fn(prediction) + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) +# prints | Accuracy: 0.9480139117700896 +``` + + +## Example usage + +To train a model with LayerDrop, add the following flags. We recommend 0.2, a value that worked well in our experiments. For Language Models that are decoder-only, you need only the decoder flag. For RoBERTa, an encoder, you need only the encoder flag. The encoder and decoder LayerDrop values can be set differently. +``` +--encoder-layerdrop 0.2 --decoder-layerdrop 0.2 +``` + +To prune a model that has been trained with LayerDrop, add the following flags followed by a comma separated list of which layers you would like to keep. +``` +--encoder-layers-to-keep 0,2,4,6,8,10,12,14 --decoder-layers-to-keep 0,2,4,6,8,10,12,14 +``` +Setting these flags should print a message such as: +``` +| Pruning model to specified layer configuration +``` +You should also see a smaller number of parameters in the model, for example the 16-Layer Transformer Language Model prints: +``` +num. model params: 246933504 +``` +while a model pruned to 8 Layers prints: +``` +num. model params: 146163712 +``` + +If you would like to pick up training with a model that has been pruned, simply adding these flags is sufficient. If you would like to use a script that only does evaluation (no training), you may need to pass an override command. A specific example would be for language modeling: +```bash +fairseq-eval-lm /path/to/wikitext-103 \ + --path /path/to/model/checkpoint.pt \ + --model-overrides "{'decoder_layers_to_keep':'0,2,4,6,8,10,12,14'}" +``` +This model override command overrides the training parameters and updates the model arguments so that the pruned model is run instead of the full model. + +## Reproduce Paper Results + +Looking to reproduce the results in the paper? + +1. For Translation on WMT16 en-de, we followed this setting [here](https://github.com/pytorch/fairseq/blob/main/examples/scaling_nmt/README.md) +2. To train RoBERTa, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/roberta) +3. To train Language Models on Wikitext-103, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model) + + +## Tips + +1. If you would like to train large models with better performance, LayerDrop should be set to a smaller value such as 0.1 or 0.2. Too much LayerDrop will mean the model has too much regularization, so may not reach the best performance. Since LayerDrop adds regularization, you may achieve the best performance by slightly reducing the amount of standard dropout (for example, reduce by 0.1). + +2. If you would like to train large models to be pruned and made smaller, LayerDrop should be set to a larger value such as 0.5 if you want to prune very aggressively (such as removing half the network or more). If you would like to prune fewer layers away, LayerDrop can be set to a smaller value such as 0.2. Our experiments were conducted with low values of LayerDrop (such as 0.1 and 0.2), for reference. + +3. When pruning layers at inference time, it is best to spread out the layers remaining so they are evenly spaced throughout the network. For example, if you want to remove 50% of the network, keeping every other layer is good. + + +## FAQ + +1. How did the sharing layers experiment work? In an appendix (https://openreview.net/pdf?id=SylO2yStDr) we added an experiment on Wikitext-103 language modeling that combined LayerDrop with Weight Sharing. We shared chunks of 2 layers such that every other layer had shared weights. For example, if our network has layers 1 through 6, then layer 1 and 2 are shared, layer 3 and 4 are shared, and layer 5 and 6 are shared. + +2. LayerDrop hasn't been helping in my setting? During training time, LayerDrop can help regularize your network. This is most important if your network is already overfitting - if your network is underfitting, it is possible LayerDrop is adding too much regularization. We recommend using smaller values (such as 0.1 or 0.2) and also decreasing the quantity of standard dropout (for example, reduce by 0.1). + +3. Can you train a model without LayerDrop and finetune with LayerDrop (e.g. for BERT)? In our experiments, we did not see great performance. Models such as RoBERTa have trained for a long time in the pre-training setting, so only finetuning with LayerDrop for a few epochs on a downstream task such as MNLI does not achieve the robustness required for successful pruning. + + +## Having an issue or have a question? + +Please open an issue in this repository with the details of your question. Thanks! diff --git a/fairseq/examples/linformer/README.md b/fairseq/examples/linformer/README.md new file mode 100644 index 0000000..f8b36bc --- /dev/null +++ b/fairseq/examples/linformer/README.md @@ -0,0 +1,22 @@ +# Linformer: Self-Attention with Linear Complexity (Wang et al., 2020) + +This example contains code to train Linformer models as described in our paper +[Linformer: Self-Attention with Linear Complexity](https://arxiv.org/abs/2006.04768). + +## Training a new Linformer RoBERTa model + +You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md), +updating your training command with `--user-dir examples/linformer/linformer_src --arch linformer_roberta_base`. + +## Citation + +If you use our work, please cite: + +```bibtex +@article{wang2020linformer, + title={Linformer: Self-Attention with Linear Complexity}, + author={Wang, Sinong and Li, Belinda and Khabsa, Madian and Fang, Han and Ma, Hao}, + journal={arXiv preprint arXiv:2006.04768}, + year={2020} +} +``` diff --git a/fairseq/examples/linformer/linformer_src/__init__.py b/fairseq/examples/linformer/linformer_src/__init__.py new file mode 100644 index 0000000..1c52f13 --- /dev/null +++ b/fairseq/examples/linformer/linformer_src/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .models import linformer_roberta # noqa diff --git a/fairseq/examples/linformer/linformer_src/models/__init__.py b/fairseq/examples/linformer/linformer_src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py b/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py new file mode 100644 index 0000000..b7bdbb1 --- /dev/null +++ b/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py @@ -0,0 +1,120 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Linformer: Self-Attention with Linear Complexity +""" + +import logging + +import torch +from fairseq import utils +from fairseq.models import register_model, register_model_architecture +from fairseq.models.roberta import ( + init_bert_params, + roberta_base_architecture, + roberta_large_architecture, + RobertaEncoder, + RobertaModel, +) +from fairseq.utils import safe_hasattr + +from ..modules.linformer_sentence_encoder import LinformerTransformerEncoder + + +logger = logging.getLogger(__name__) + + +@register_model("linformer_roberta") +class LinformerModel(RobertaModel): + @staticmethod + def add_args(parser): + RobertaModel.add_args(parser) + + # add args for Linformer + parser.add_argument( + "--compressed", type=int, help="compressed ratio of sequence length" + ) + parser.add_argument( + "--shared-kv-compressed", + type=int, + help="share compressed matrix between k and v, in each layer", + ) + parser.add_argument( + "--shared-layer-kv-compressed", + type=int, + help="share compressed matrix between k and v and across all layers", + ) + parser.add_argument( + "--freeze-compress", + type=int, + help="freeze the parameters in compressed layer", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present + base_architecture(args) + + if not safe_hasattr(args, "max_positions"): + args.max_positions = args.tokens_per_sample + + encoder = LinformerEncoder(args, task.source_dictionary) + return cls(args, encoder) + + +class LinformerEncoder(RobertaEncoder): + """Linformer encoder.""" + + def __init__(self, args, dictionary): + super().__init__(args, dictionary) + self.register_buffer("version", torch.tensor(2)) + + def build_encoder(self, args, dictionary, embed_tokens): + encoder = LinformerTransformerEncoder(args, dictionary, embed_tokens) + encoder.apply(init_bert_params) + return encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + prefix = name + "." if name != "" else "" + + # some old checkpoints had weight sharing implemented incorrectly + # (note: this was correct in the original paper code) + if utils.item(state_dict.get(f"{prefix}version", torch.tensor(1))) < 2: + state_dict[f"{prefix}version"] = torch.tensor(1) + # check if input embeddings and output embeddings were tied + if not torch.allclose( + state_dict[f"{prefix}sentence_encoder.embed_tokens.weight"], + state_dict[f"{prefix}lm_head.weight"], + ): + # they weren't tied, re-init the LM head without weight sharing + self.lm_head = self.build_lm_head( + embed_dim=self.args.encoder_embed_dim, + output_dim=len(self.dictionary), + activation_fn=self.args.activation_fn, + weight=None, # don't share weights + ) + + +@register_model_architecture("linformer_roberta", "linformer_roberta") +def base_architecture(args): + args.compressed = getattr(args, "compressed", 4) + args.shared_kv_compressed = getattr(args, "shared_kv_compressed", 0) + args.shared_layer_kv_compressed = getattr(args, "shared_layer_kv_compressed", 0) + args.freeze_compress = getattr(args, "freeze_compress", 0) + roberta_base_architecture(args) + + +@register_model_architecture("linformer_roberta", "linformer_roberta_base") +def linformer_roberta_base_architecture(args): + base_architecture(args) + + +@register_model_architecture("linformer_roberta", "linformer_roberta_large") +def linformer_roberta_large_architecture(args): + roberta_large_architecture(args) + base_architecture(args) diff --git a/fairseq/examples/linformer/linformer_src/modules/__init__.py b/fairseq/examples/linformer/linformer_src/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py new file mode 100644 index 0000000..44f7989 --- /dev/null +++ b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch.nn as nn +from fairseq.models.transformer import TransformerEncoder + +from .linformer_sentence_encoder_layer import LinformerTransformerEncoderLayer + + +class LinformerTransformerEncoder(TransformerEncoder): + """ + Implementation for a Bi-directional Linformer based Sentence Encoder used + in BERT/XLM style pre-trained models. + + This first computes the token embedding using the token embedding matrix, + position embeddings (if specified) and segment embeddings + (if specified). After applying the specified number of + LinformerEncoderLayers, it outputs all the internal states of the + encoder as well as the final representation associated with the first + token (usually CLS token). + + Input: + - tokens: B x T matrix representing sentences + - segment_labels: B x T matrix representing segment label for tokens + + Output: + - a tuple of the following: + - a list of internal model states used to compute the + predictions where each tensor has shape T x B x C + - sentence representation associated with first input token + in format B x C. + """ + + def __init__(self, args, dictionary, embed_tokens): + self.compress_layer = None + super().__init__(args, dictionary, embed_tokens) + + def build_encoder_layer(self, args): + if self.args.shared_layer_kv_compressed == 1 and self.compress_layer is None: + compress_layer = nn.Linear( + self.args.max_positions, + self.args.max_positions // self.args.compressed, + ) + # intialize parameters for compressed layer + nn.init.xavier_uniform_(compress_layer.weight, gain=1 / math.sqrt(2)) + if self.args.freeze_compress == 1: + compress_layer.weight.requires_grad = False + self.compress_layer = compress_layer + + return LinformerTransformerEncoderLayer(args, self.compress_layer) diff --git a/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py new file mode 100644 index 0000000..7e2caa0 --- /dev/null +++ b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py @@ -0,0 +1,65 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq import utils +from fairseq.modules import TransformerEncoderLayer + +from .multihead_linear_attention import MultiheadLinearAttention + + +class LinformerTransformerEncoderLayer(TransformerEncoderLayer): + """ + Implements a Linformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__(self, args, shared_compress_layer): + # wrap in a list so it's not automatically registered by PyTorch + self.shared_compress_layer = [shared_compress_layer] + + super().__init__(args) + + self.register_buffer("version", torch.tensor(2)) + + def build_self_attention(self, embed_dim, args): + return MultiheadLinearAttention( + embed_dim, + args.encoder_attention_heads, + dropout=args.dropout, + self_attention=True, + q_noise=args.quant_noise_pq, + qn_block_size=args.quant_noise_pq_block_size, + compressed=args.compressed, + max_seq_len=args.max_positions, + shared_kv_compressed=args.shared_kv_compressed, + shared_compress_layer=self.shared_compress_layer[0], + freeze_compress=args.freeze_compress, + ) + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + prefix = name + "." if name != "" else "" + + # some old checkpoints had weight sharing implemented incorrectly + # (note: this was correct in the original paper code) + if utils.item(state_dict.get(f"{prefix}version", torch.tensor(1))) < 2: + state_dict[f"{prefix}version"] = torch.tensor(1) + # check compression layer sharing + if f"{prefix}shared_compress_layer.weight" in state_dict: + # reinitialize block without sharing compression layer to match + # old behavior + self.shared_compress_layer = [ + torch.nn.Linear( + self.shared_compress_layer[0].weight.size(1), + self.shared_compress_layer[0].weight.size(0), + ) + ] + self.self_attn = self.build_self_attention(self.embed_dim, self.args) + # delete shared_compress_layer, since it's already copied to + # self_attn.compress_k.weight + del state_dict[f"{prefix}shared_compress_layer.weight"] + if f"{prefix}shared_compress_layer.bias" in state_dict: + del state_dict[f"{prefix}shared_compress_layer.bias"] diff --git a/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py b/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py new file mode 100644 index 0000000..6be1007 --- /dev/null +++ b/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py @@ -0,0 +1,481 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor, nn +from torch.nn import Parameter + + +@with_incremental_state +class MultiheadLinearAttention(nn.Module): + """Multi-headed linformer attention. + + Projects the key and values down to the compressed dimension, before computing self-attention. + + See "Linformer: Self-Attention with Linear Complexity" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + compressed=1, + max_seq_len=256, + shared_kv_compressed=0, + shared_compress_layer=None, + freeze_compress=0, + ): + super().__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, ( + "Self-attention requires query, key and " "value to be of the same size" + ) + + self.k_proj = quant_noise( + nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.v_proj = quant_noise( + nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.q_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + # used for compress sequence to subsequence + if shared_compress_layer is None: + self.compress_seq_len = max_seq_len // compressed + self.compress_k = nn.Linear(max_seq_len, self.compress_seq_len, bias=False) + if shared_kv_compressed == 0: + self.compress_v = nn.Linear( + max_seq_len, self.compress_seq_len, bias=False + ) + self.layerwise_sharing = False + else: + self.compress_k = shared_compress_layer + if shared_kv_compressed == 0: + self.compress_v = shared_compress_layer + self.layerwise_sharing = True + self.shared_kv_compressed = shared_kv_compressed + + self.out_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.reset_parameters() + + if freeze_compress == 1: + self.compress_k.weight.requires_grad = False + if shared_kv_compressed == 0: + self.compress_v.weight.requires_grad = False + + self.onnx_trace = False + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def reset_parameters(self): + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2)) + if ( + not self.layerwise_sharing + ): # otherwise, we already initialize the parameters + nn.init.xavier_uniform_(self.compress_k.weight, gain=1 / math.sqrt(2)) + if self.shared_kv_compressed == 0: + nn.init.xavier_uniform_( + self.compress_v.weight, gain=1 / math.sqrt(2) + ) + else: + nn.init.xavier_uniform_(self.k_proj.weight) + nn.init.xavier_uniform_(self.v_proj.weight) + nn.init.xavier_uniform_(self.q_proj.weight) + if ( + not self.layerwise_sharing + ): # otherwise, we already initialize the parameters + nn.init.xavier_uniform_(self.compress_k.weight) + if self.shared_kv_compressed == 0: + nn.init.xavier_uniform_(self.compress_v.weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.out_proj.bias is not None: + nn.init.constant_(self.out_proj.bias, 0.0) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + + k_input = query.permute(1, 2, 0).contiguous() # B * C * T + k_input = ( + F.linear(k_input, self.compress_k.weight[:, 0:tgt_len]) + .permute(2, 0, 1) + .contiguous() + ) + k = self.k_proj(k_input) + + v_input = query.permute(1, 2, 0).contiguous() # B * C * T + if self.shared_kv_compressed == 0: + v_input = ( + F.linear(v_input, self.compress_v.weight[:, 0:tgt_len]) + .permute(2, 0, 1) + .contiguous() + ) + if self.shared_kv_compressed == 1: # use shared kv compressed linear layer + v_input = ( + F.linear(v_input, self.compress_k.weight[:, 0:tgt_len]) + .permute(2, 0, 1) + .contiguous() + ) + v = self.v_proj(v_input) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadLinearAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + src_len = k.size(1) + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = MultiheadLinearAttention.apply_sparse_mask( + attn_weights, tgt_len, src_len, bsz + ) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = F.dropout( + attn_weights, + p=self.dropout, + training=self.training, + ) + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1 + ) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + filler = torch.zeros( + (batch_size, src_len - prev_key_padding_mask.size(1)), + device=prev_key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), filler.float()], dim=1 + ) + elif key_padding_mask is not None: + filler = torch.zeros( + (batch_size, src_len - key_padding_mask.size(1)), + device=key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [filler.float(), key_padding_mask.float()], dim=1 + ) + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer_k = input_buffer[k] + if input_buffer_k is not None: + if self.encoder_decoder_attention and input_buffer_k.size( + 0 + ) == new_order.size(0): + break + input_buffer[k] = input_buffer_k.index_select(0, new_order) + incremental_state = self._set_input_buffer(incremental_state, input_buffer) + return incremental_state + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) + + def apply_sparse_mask(attn_weights, tgt_len: int, src_len: int, bsz: int): + return attn_weights + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + items_to_add = {} + keys_to_remove = [] + for k in state_dict.keys(): + if k.endswith(prefix + "in_proj_weight"): + # in_proj_weight used to be q + k + v with same dimensions + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim] + items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim] + items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :] + + keys_to_remove.append(k) + + k_bias = prefix + "in_proj_bias" + if k_bias in state_dict.keys(): + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim] + items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][ + dim : 2 * dim + ] + items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :] + + keys_to_remove.append(prefix + "in_proj_bias") + + for k in keys_to_remove: + del state_dict[k] + + for key, value in items_to_add.items(): + state_dict[key] = value diff --git a/fairseq/examples/m2m_100/README.md b/fairseq/examples/m2m_100/README.md new file mode 100644 index 0000000..02a68a5 --- /dev/null +++ b/fairseq/examples/m2m_100/README.md @@ -0,0 +1,241 @@ +# Beyond English-Centric Multilingual Machine Translation + +## Introduction +In this work, we create a true Many-to-Many multilingual translation model that can translate directly between any pair of 100 languages. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly translating between non-English directions while performing competitively with the best single systems of WMT. + +If you are new to using fairseq, read the following walkthrough. Otherwise, skip to the sections below. + +0. **Generation Data** + +To download the generation data, follow the below commands. Note that all datasets need to be detokenized *before* applying SPM in the data preprocessing step. If you use these evaluation datasets, please cite their associated papers. +```bash +# WMT - use sacrebleu, example here: +sacrebleu -t wmt14 -l fr-en --echo src > wmt.test.fr-en.fr +sacrebleu -t wmt14 -l fr-en --echo ref > wmt.test.fr-en.en + +# WAT +wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/wat2020.my-en.zip +unzip wat2020.my-en.zip + +# FLORES +# download from: https://github.com/facebookresearch/flores + +# TED - need to detokenize with Moses! +# from: https://github.com/neulab/word-embeddings-for-nmt +wget http://phontron.com/data/ted_talks.tar.gz + +# Autshumato +# request to download: https://repo.sadilar.org/handle/20.500.12185/397 + +# Tatoeba Challenge +# available here: https://github.com/Helsinki-NLP/Tatoeba-Challenge +``` + +1. **Training Data** + +To produce the training data, we use a combination of [CCMatrix](https://arxiv.org/abs/1911.04944) and [CCAligned](https://arxiv.org/abs/1911.06154). Check out the instructions [here](https://github.com/facebookresearch/LASER/tree/master/tasks/CCMatrix) to download the raw data. + +2. **Preprocess Data** + +After downloading raw data, you will need to postprocess the data, then apply SPM, then binarize. Note that it is very important you run the postprocessing script, because this removes any instance of the evaluation data in the mined training data. + +```bash +# preprocess data + +# remove sentences with more than 50% punctuation +python /path/to/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py + +# deduplicate training data +paste /path/to/datadir/train.$src /path/to/datadir/train.$tgt | awk '!x[$0]++' > /path/to/datadir/train.dedup +echo "keeping $(wc -l /path/to/datadir/train.dedup) bitext out of $(wc -l /path/to/datadir/train.$src)" +cut -f1 /path/to/datadir/train.dedup > /path/to/datadir/train.$src +cut -f2 /path/to/datadir/train.dedup > /path/to/datadir/train.$tgt + +# remove all instances of evaluation data from the training data +python /path/to/fairseq/examples/m2m_100/process_data/dedup_data.py + +# frequency cleaning +wget https://dl.fbaipublicfiles.com/m2m_100/histograms.tar.gz +tar -xvzf histograms.tar.gz +python /path/to/fairseq/examples/m2m_100/process_data/clean_histogram.py --src $src --tgt $tgt --src-file /path/to/source/file --tgt-file /path/to/output/file --src-output-file source_output.$src --tgt-output-file target_output.$tgt --histograms /path/to/histograms + +# apply SPM +wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model +python /path/to/fairseq/scripts/spm_encode.py \ + --model spm.128k.model \ + --output_format=piece \ + --inputs=/path/to/input/file/here \ + --outputs=/path/to/output/file/here + +# length ratio cleaning +perl mosesdecoder/scripts/training/clean-corpus-n.perl --ratio 3 /path/to/training/data/train.spm.$src-$tgt $src $tgt /path/to/output/directory/train.spm.$src-$tgt 1 250 + +# binarize data +wget https://dl.fbaipublicfiles.com/m2m_100/data_dict.128k.txt +fairseq-preprocess \ + --source-lang $src --target-lang $tgt \ + --testpref spm.$src.$tgt \ + --thresholdsrc 0 --thresholdtgt 0 \ + --destdir data_bin \ + --srcdict data_dict.128k.txt --tgtdict data_dict.128k.txt +``` + +3. **Training Scripts** + +To reproduce the training of our models, we train with fairseq-py's multilingual translation [task](https://github.com/pytorch/fairseq/tree/main/examples/multilingual). If you are interested in model parallel training, also check out [fairscale](https://github.com/facebookresearch/fairscale). + +4. **Generation** + +To generate from our models, follow the the commands in the generation section below. + + +If you use any of the resources listed here, please cite: +```bibtex +@article{fan2020beyond, + title={Beyond English-Centric Multilingual Machine Translation}, + author={Fan, Angela and Bhosale, Shruti and Schwenk, Holger and Ma, Zhiyi and El-Kishky, Ahmed and Goyal, Siddharth and Baines, Mandeep and Celebi, Onur and Wenzek, Guillaume and Chaudhary, Vishrav and Goyal, Naman and Birch, Tom and Liptchinsky, Vitaliy and Edunov, Sergey and Grave, Edouard and Auli, Michael and Joulin, Armand}, + journal={arXiv preprint}, + year={2020} +} + +@article{schwenk2019ccmatrix, + title={Ccmatrix: Mining billions of high-quality parallel sentences on the web}, + author={Schwenk, Holger and Wenzek, Guillaume and Edunov, Sergey and Grave, Edouard and Joulin, Armand}, + journal={arXiv preprint arXiv:1911.04944}, + year={2019} +} + +@article{el2019massive, + title={A Massive Collection of Cross-Lingual Web-Document Pairs}, + author={El-Kishky, Ahmed and Chaudhary, Vishrav and Guzman, Francisco and Koehn, Philipp}, + journal={arXiv preprint arXiv:1911.06154}, + year={2019} +} +``` + + +## Trained Models + +### 418M and 1.2B Model +We include the last checkpoint for both of these models. + +```bash +wget https://dl.fbaipublicfiles.com/m2m_100/model_dict.128k.txt +wget https://dl.fbaipublicfiles.com/m2m_100/language_pairs_small_models.txt + +# 418M parameter model +wget https://dl.fbaipublicfiles.com/m2m_100/418M_last_checkpoint.pt + +# 1.2B parameter model +wget https://dl.fbaipublicfiles.com/m2m_100/1.2B_last_checkpoint.pt + +# Generation: +fairseq-generate $binarized_data_path --batch-size 32 --path $path_to_model --fixed-dictionary model_dict.128k.txt -s en -t fr --remove-bpe 'sentencepiece' --beam 5 --task translation_multi_simple_epoch --lang-pairs language_pairs_small_models.txt --decoder-langtok --encoder-langtok src --gen-subset test > gen_out +``` + +### 12B Model +12B parameter model trained on many-to-many training data for 100 languages. We include the last checkpoint, average of last 5 checkpoints, average of last 10 checkpoints. There isn't a universally best choice out of these three, but all three versions are pretty close in accuracy. You can either sweep over the 3 checkpoints on a dev test and use the best performing checkpoint for final testing. Or the last checkpoint can be a good default choice. + +**Model Download Links** +Configuration | 2 32GB GPUs | 4 16GB GPUs | 6 12GB GPUs | 8 8GB GPUs +:--|:--|:--|:--|:-- +Last Checkpoint | [12b_last_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_2_gpus.pt) | [12b_last_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_4_gpus.pt) | [12b_last_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_6_gpus.pt) | [12b_last_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_8_gpus.pt) +Average of last 5 checkpoints | [12b_avg5_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_2_gpus.pt) | [12b_avg5_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_4_gpus.pt) | [12b_avg5_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_6_gpus.pt) | [12b_avg5_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_8_gpus.pt) +Average of last 10 checkpoints | [12b_avg10_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_2_gpus.pt) | [12b_avg10_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_4_gpus.pt) | [12b_avg10_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_6_gpus.pt) | [12b_avg10_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_8_gpus.pt) + +**Generation Arguments** +Configuration | 2 32GB GPUs | 4 16GB GPUs | 6 12GB GPUs | 8 8GB GPUs +:--|:--|:--|:--|:-- +`--pipeline-encoder-balance` | `[26]` | `[1,15,10]` | `[1,9,9,7]` | `[1,6,6,6,7]` +`--pipeline-encoder-devices` | `[0]` | `[0,1,0]` | `[0,1,2,0]` | `[0,4,5,1,0]` +`--pipeline-decoder-balance` | `[3,22,1]` | `[3,11,11,1]` | `[3,7,7,8,1]` | `[1,6,6,6,6,1]` +`--pipeline-decoder-devices` | `[0,1,0]` | `[0,2,3,0]` | `[0,3,4,5,0]` | `[0,2,6,7,3,0]` + + +## SentencePiece Model + +```bash +wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model +``` + +## Generation with M2M-100 + +### Encode using our SentencePiece Model + +Note: Install SentencePiece from [here](https://github.com/google/sentencepiece) + +```bash +fairseq=/path/to/fairseq +cd $fairseq +sacrebleu --echo src -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.de +sacrebleu --echo ref -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.fr +wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model +for lang in de fr ; do + python scripts/spm_encode.py \ + --model spm.128k.model \ + --output_format=piece \ + --inputs=raw_input.de-fr.${lang} \ + --outputs=spm.de-fr.${lang} +done +``` + +### Binarization + +```bash +wget https://dl.fbaipublicfiles.com/m2m_100/data_dict.128k.txt +fairseq-preprocess \ + --source-lang de --target-lang fr \ + --testpref spm.de-fr \ + --thresholdsrc 0 --thresholdtgt 0 \ + --destdir data_bin \ + --srcdict data_dict.128k.txt --tgtdict data_dict.128k.txt +``` + +### Generation for the 12B model + +Note that generation can currently be run using 2 32GB / 4 16GB / 6 12GB / 8 8GB GPUs, and the corresponding model checkpoints and pipeline arguments can be found in the [12B Model Section](#12b-model). +Generation on CPUs will be added in the future. + +```bash +wget https://dl.fbaipublicfiles.com/m2m_100/model_dict.128k.txt +wget https://dl.fbaipublicfiles.com/m2m_100/language_pairs.txt +wget https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_4_gpus.pt +fairseq-generate \ + data_bin \ + --batch-size 1 \ + --path 12b_last_chk_4_gpus.pt \ + --fixed-dictionary model_dict.128k.txt \ + -s de -t fr \ + --remove-bpe 'sentencepiece' \ + --beam 5 \ + --task translation_multi_simple_epoch \ + --lang-pairs language_pairs.txt \ + --decoder-langtok --encoder-langtok src \ + --gen-subset test \ + --fp16 \ + --dataset-impl mmap \ + --distributed-world-size 1 --distributed-no-spawn \ + --pipeline-model-parallel \ + --pipeline-chunks 1 \ + --pipeline-encoder-balance '[1,15,10]' \ + --pipeline-encoder-devices '[0,1,0]' \ + --pipeline-decoder-balance '[3,11,11,1]' \ + --pipeline-decoder-devices '[0,2,3,0]' > gen_out +``` +## Evaluation with M2M-100 + +### Tokenization + +Note: Refer to tokenizers/README.md for more details on tokenization. + +```bash +cd ${fairseq}/examples/m2m_100 +cat ${fairseq}/gen_out | grep -P "^H" | sort -V | cut -f 3- | sh tok.sh fr > hyp +cat ${fairseq}/raw_input.de-fr.fr | sh tok.sh fr > ref +``` + +### BLEU + +```bash +sacrebleu -tok 'none' ref < hyp +``` diff --git a/fairseq/examples/m2m_100/install_dependecies.sh b/fairseq/examples/m2m_100/install_dependecies.sh new file mode 100644 index 0000000..82a1054 --- /dev/null +++ b/fairseq/examples/m2m_100/install_dependecies.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +CWD=`pwd` +INSTALL_PATH=$CWD/tokenizers/thirdparty + +MOSES=$INSTALL_PATH/mosesdecoder +if [ ! -d $MOSES ]; then + echo 'Cloning Moses github repository (for tokenization scripts)...' + git clone https://github.com/moses-smt/mosesdecoder.git $MOSES + cd $MOSES + # To deal with differences in handling ' vs " + git checkout 03578921cc1a03402 + cd - +fi + +WMT16_SCRIPTS=$INSTALL_PATH/wmt16-scripts +if [ ! -d $WMT16_SCRIPTS ]; then + echo 'Cloning Romanian tokenization scripts' + git clone https://github.com/rsennrich/wmt16-scripts.git $WMT16_SCRIPTS +fi + +KYTEA=$INSTALL_PATH/kytea +if [ ! -f $KYTEA/bin/kytea ]; then + git clone https://github.com/neubig/kytea.git $KYTEA + cd $KYTEA + autoreconf -i + ./configure --prefix=`pwd` + make + make install + cd .. +fi + +export MECAB=$INSTALL_PATH/mecab-0.996-ko-0.9.2 +if [ ! -f $MECAB/bin/mecab ]; then + cd $INSTALL_PATH + curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz + tar zxfv mecab-0.996-ko-0.9.2.tar.gz + cd mecab-0.996-ko-0.9.2/ + ./configure --prefix=`pwd` + make + make install + + cd .. + curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz + tar zxfv mecab-ko-dic-2.1.1-20180720.tar.gz + cd mecab-ko-dic-2.1.1-20180720/ + ./autogen.sh + ./configure --prefix=`pwd` --with-dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic --with-mecab-config=$MECAB/bin/mecab-config + make + sh -c 'echo "dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic" > $MECAB/etc/mecabrc' + make install + cd $CWD +fi + +INDIC_RESOURCES_PATH=$INSTALL_PATH/indic_nlp_resources +if [ ! -d $INDIC_RESOURCES_PATH ]; then + echo 'Cloning indic_nlp_resources' + git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git $INDIC_RESOURCES_PATH +fi + + +if [ ! -f $INSTALL_PATH/seg_my.py ]; then + cd $INSTALL_PATH + wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/wat2020.my-en.zip + unzip wat2020.my-en.zip + # switch to python3 + cat wat2020.my-en/myseg.py |sed 's/^sys.std/###sys.std/g' | sed 's/### sys/sys/g' | sed 's/unichr/chr/g' > seg_my.py + cd $CWD +fi + + +pip install pythainlp sacrebleu indic-nlp-library + diff --git a/fairseq/examples/m2m_100/process_data/clean_histogram.py b/fairseq/examples/m2m_100/process_data/clean_histogram.py new file mode 100644 index 0000000..e24e073 --- /dev/null +++ b/fairseq/examples/m2m_100/process_data/clean_histogram.py @@ -0,0 +1,52 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--src', type=str, help='Source language') +parser.add_argument('--tgt', type=str, help='Target language') +parser.add_argument('--src-file', type=str, help='Input source file') +parser.add_argument('--tgt-file', type=str, help='Input target file') +parser.add_argument('--src-output-file', type=str, help='Output source file') +parser.add_argument('--tgt-output-file', type=str, help='Output target file') +parser.add_argument('--threshold', type=float, default=0.5, help='Threshold') +parser.add_argument('--threshold-character', type=str, default=']', help='Threshold character') +parser.add_argument('--histograms', type=str, help='Path to histograms') + +args = parser.parse_args() + + +def read_hist(f): + ch = [] + for line in f: + c = line[0] + if c == args.threshold_character: + break + ch.append(c) + return ch + + +with(open("{}/{}".format(args.histograms, args.src), 'r', encoding='utf8')) as f: + ch1 = read_hist(f) + +with(open("{}/{}".format(args.histograms, args.tgt), 'r', encoding='utf8')) as f: + ch2 = read_hist(f) + +print("Accepted characters for {}: {}".format(args.src, ch1)) +print("Accepted characters for {}: {}".format(args.tgt, ch2)) + +with open(args.src_file, 'r', encoding='utf8') as fs1, open(args.tgt_file, 'r', encoding='utf8') as fs2, open(args.src_output_file, 'w', encoding='utf8') as fos1, open(args.tgt_output_file, 'w', encoding='utf8') as fos2: + ls1 = fs1.readline() + ls2 = fs2.readline() + + while ls1 or ls2: + cnt1 = len([c for c in ls1.strip() if c in ch1]) + cnt2 = len([c for c in ls2.strip() if c in ch2]) + + if cnt1 / len(ls1) > args.threshold and cnt2 / len(ls2) > args.threshold: + fos1.write(ls1) + fos2.write(ls2) + else: + print("{} {} {} \n{} {} {}".format(args.src, cnt1 / len(ls1), ls1.strip(), args.tgt, cnt2 / len(ls2), ls2.strip())) + + ls1 = fs1.readline() + ls2 = fs2.readline() + \ No newline at end of file diff --git a/fairseq/examples/m2m_100/process_data/dedup_data.py b/fairseq/examples/m2m_100/process_data/dedup_data.py new file mode 100644 index 0000000..58d9ed1 --- /dev/null +++ b/fairseq/examples/m2m_100/process_data/dedup_data.py @@ -0,0 +1,91 @@ +import argparse +from collections import namedtuple +import os + +DATADIR = "/path/to/train_data" +DEDUP_FROM_DIR = "/path/to/eval/data" +OUTPUT_DIR = "/path/to/output/data" + + +def main(args): + languages = set() + for language_directory in os.listdir(DATADIR): + if "_" in language_directory: + src, tgt = language_directory.split("_") + languages.add(LanguagePair(src=src, tgt=tgt)) + + data = existing_data() + train_languages = sorted(languages) + for language_pair in train_languages[args.start_index:args.start_index + args.size]: + print(language_pair) + dedup(language_pair, data) + + +LanguagePair = namedtuple("LanguagePair", ["src", "tgt"]) + + +def existing_data(): + data = set() + for file in os.listdir(DEDUP_FROM_DIR): + with open(os.path.join(DEDUP_FROM_DIR, file)) as f: + data |= set(f.readlines()) + return data + +def dedup(language_pair, data, verbose=True, output=True): + train_filenames = LanguagePair( + src=f"{DATADIR}/{language_pair.src}_{language_pair.tgt}/train.{language_pair.src}", + tgt=f"{DATADIR}/{language_pair.src}_{language_pair.tgt}/train.{language_pair.tgt}", + ) + + output_filenames = LanguagePair( + src=f"{OUTPUT_DIR}/train.dedup.{language_pair.src}-{language_pair.tgt}.{language_pair.src}", + tgt=f"{OUTPUT_DIR}/train.dedup.{language_pair.src}-{language_pair.tgt}.{language_pair.tgt}" + ) + + # If output exists, skip this pair. It has already been done. + if (os.path.exists(output_filenames.src) and + os.path.exists(output_filenames.tgt)): + if verbose: + print(f"{language_pair.src}-{language_pair.tgt} already done.") + return + + if verbose: + print(f"{language_pair.src}-{language_pair.tgt} ready, will check dups.") + + # If there is no output, no need to actually do the loop. + if not output: + return + + if os.path.exists(train_filenames.src) and os.path.exists(train_filenames.tgt): + with open(train_filenames.src) as f: + train_source = f.readlines() + + with open(train_filenames.tgt) as f: + train_target = f.readlines() + + # do dedup + new_train_source = [] + new_train_target = [] + for i, train_line in enumerate(train_source): + if train_line not in data and train_target[i] not in data: + new_train_source.append(train_line) + new_train_target.append(train_target[i]) + + assert len(train_source) == len(train_target) + assert len(new_train_source) == len(new_train_target) + assert len(new_train_source) <= len(train_source) + + with open(output_filenames.src, "w") as o: + for line in new_train_source: + o.write(line) + + with open(output_filenames.tgt, "w") as o: + for line in new_train_target: + o.write(line) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--start-index", required=True, type=int) + parser.add_argument("-n", "--size", required=True, type=int) + main(parser.parse_args()) diff --git a/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py b/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py new file mode 100644 index 0000000..6c280de --- /dev/null +++ b/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py @@ -0,0 +1,36 @@ +import gzip +import argparse +from string import punctuation + +def len_no_punc(s, punc): + return len([ch for ch in s if ch in punc]) + +def filter_overpunc(len_npunc, len_sen): + return len_npunc < 0.5*len_sen + +def main(args): + punc = punctuation + "—|–" + print('Processing file {}'.format(args.input)) + with gzip.open(args.input, 'rt', encoding=args.encoding) as tsv: + with open(args.bitext + '.' + args.src_lang, 'wt', encoding=args.encoding) as fsrc: + with open(args.bitext + '.' + args.tgt_lang, 'wt', encoding=args.encoding) as ftgt: + line = tsv.readline() + fields = line.split('\t') + + src, tgt = fields[1], fields[2] + + nchar_npunc_src = len_no_punc(src, punc) + nchar_npunc_tgt = len_no_punc(tgt, punc) + + if filter_overpunc(nchar_npunc_src, len(src)) and filter_overpunc(nchar_npunc_tgt, len(tgt)): + fsrc.write(src.strip() + '\n') + ftgt.write(tgt.strip() + '\n') + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--input", required=True, type=str) + parser.add_argument('--encoding', default='utf-8', help='character encoding for input/output') + parser.add_argument('--bitext', type=str, required=True, help='language direction') + parser.add_argument('--src-lang', type=str, required=True, help='Source language') + parser.add_argument('--tgt-lang', type=str, required=True, help='Target language') + main(parser.parse_args()) diff --git a/fairseq/examples/m2m_100/tok.sh b/fairseq/examples/m2m_100/tok.sh new file mode 100644 index 0000000..ba2ec5a --- /dev/null +++ b/fairseq/examples/m2m_100/tok.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Copyright (c) 2019-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# + +set -e + +TOKENIZERS_SCRIPTS=tokenizers +INSTALL_PATH=$TOKENIZERS_SCRIPTS/thirdparty + +N_THREADS=8 + +lg=$1 + +MOSES=$INSTALL_PATH/mosesdecoder +REPLACE_UNICODE_PUNCT=$MOSES/scripts/tokenizer/replace-unicode-punctuation.perl +NORM_PUNC=$MOSES/scripts/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$MOSES/scripts/tokenizer/remove-non-printing-char.perl +TOKENIZER=$MOSES/scripts/tokenizer/tokenizer.perl + +# special tokenization for Romanian +WMT16_SCRIPTS=$INSTALL_PATH/wmt16-scripts + +NORMALIZE_ROMANIAN=$WMT16_SCRIPTS/preprocess/normalise-romanian.py +REMOVE_DIACRITICS=$WMT16_SCRIPTS/preprocess/remove-diacritics.py + +# Burmese +MY_SEGMENT=$INSTALL_PATH/seg_my.py + +# Arabic +AR_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenizer_ar.sh + +# Korean +KO_SEGMENT=$TOKENIZERS_SCRIPTS/seg_ko.sh + +# Japanese +JA_SEGMENT=$TOKENIZERS_SCRIPTS/seg_ja.sh + +# Indic +IN_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_indic.py +INDIC_RESOURCES_PATH=$INSTALL_PATH/indic_nlp_resources + +# Thai +THAI_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_thai.py + +# Chinese +CHINESE_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_zh.py + +# Chinese +if [ "$lg" = "zh" ]; then + cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | python $CHINESE_TOKENIZER +# Thai +elif [ "$lg" = "th" ]; then + cat - | python $THAI_TOKENIZER +# Japanese +elif [ "$lg" = "ja" ]; then + cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | ${JA_SEGMENT} +# Korean +elif [ "$lg" = "ko" ]; then + cat - | $REM_NON_PRINT_CHAR | ${KO_SEGMENT} +# Romanian +elif [ "$lg" = "ro" ]; then + cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | $NORMALIZE_ROMANIAN | $REMOVE_DIACRITICS | $TOKENIZER -no-escape -threads $N_THREADS -l $lg +# Burmese +elif [ "$lg" = "my" ]; then + cat - | python ${MY_SEGMENT} +# Arabic +elif [ "$lg" = "ar" ]; then + cat - | ${AR_TOKENIZER} +# Indic +elif [ "$lg" = "ne" ]; then + cat - | python ${IN_TOKENIZER} $lg +elif [ "$lg" = "si" ]; then + cat - | python ${IN_TOKENIZER} $lg +elif [ "$lg" = "hi" ]; then + cat - | python ${IN_TOKENIZER} $lg +# other languages +else + cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape -threads $N_THREADS -l $lg +fi diff --git a/fairseq/examples/m2m_100/tokenizers/README.md b/fairseq/examples/m2m_100/tokenizers/README.md new file mode 100644 index 0000000..e116932 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/README.md @@ -0,0 +1,18 @@ +# M2M-100 Tokenization + +We apply different tokenization strategies for different languages following the existing literature. Here we provide tok.sh a tokenizer that can be used to reproduce our results. + +To reproduce the results, follow these steps: + +``` +tgt_lang=... +reference_translation=... +cat generation_output | grep -P "^H" | sort -V | cut -f 3- | sh tok.sh $tgt_lang > hyp +cat $reference_translation |sh tok.sh $tgt_lang > ref +sacrebleu -tok 'none' ref < hyp +``` + +## Installation + +Tools needed for all the languages except Arabic can be installed by running install_dependencies.sh +If you want to evaluate Arabic models, please follow the instructions provided here: http://alt.qcri.org/tools/arabic-normalizer/ to install diff --git a/fairseq/examples/m2m_100/tokenizers/seg_ja.sh b/fairseq/examples/m2m_100/tokenizers/seg_ja.sh new file mode 100644 index 0000000..be6f5ca --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/seg_ja.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +SCRIPT=`realpath $0` +KYTEA=`dirname $SCRIPT`/thirdparty/kytea +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$KYTEA/lib:/usr/local/lib +export PATH=$PATH:"$KYTEA/bin" + +cat - | tr -d "[:blank:]" | kytea -notags diff --git a/fairseq/examples/m2m_100/tokenizers/seg_ko.sh b/fairseq/examples/m2m_100/tokenizers/seg_ko.sh new file mode 100644 index 0000000..c523d92 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/seg_ko.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +SCRIPT=`realpath $0` +MECAB=`dirname $SCRIPT`/thirdparty/mecab-0.996-ko-0.9.2 + +export PATH=$PATH:"$MECAB/bin":"$MECAB/lib" +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:"$MECAB/lib" + +cat - | mecab -O wakati diff --git a/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore b/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore new file mode 100644 index 0000000..19eb6a9 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore @@ -0,0 +1,12 @@ +seg_my.py +indic_nlp_library/ +indic_nlp_resources/ +kytea/ +mecab-0.996-ko-0.9.2.tar.gz +mecab-0.996-ko-0.9.2/ +mosesdecoder/ +wat2020.my-en.zip +wat2020.my-en/ +wmt16-scripts/ +mecab-ko-dic-2.1.1-20180720/ +mecab-ko-dic-2.1.1-20180720.tar.gz \ No newline at end of file diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py b/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py new file mode 100644 index 0000000..a44fad0 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Use: echo {text} | python tokenize_indic.py {language} + +import sys + +from indicnlp.normalize.indic_normalize import IndicNormalizerFactory +from indicnlp.tokenize.indic_tokenize import trivial_tokenize + + +factory = IndicNormalizerFactory() +normalizer = factory.get_normalizer( + sys.argv[1], remove_nuktas=False, nasals_mode="do_nothing" +) + +for line in sys.stdin: + normalized_line = normalizer.normalize(line.strip()) + tokenized_line = " ".join(trivial_tokenize(normalized_line, sys.argv[1])) + print(tokenized_line) diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py b/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py new file mode 100644 index 0000000..9c72cb8 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +from pythainlp import word_tokenize + + +for line in sys.stdin: + print(" ".join(word_tokenize(line.strip()))) diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py b/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py new file mode 100644 index 0000000..674b584 --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import fileinput + +import sacrebleu + + +for line in fileinput.input(): + print(sacrebleu.tokenize_zh(line)) diff --git a/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh b/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh new file mode 100644 index 0000000..ad35d7a --- /dev/null +++ b/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env sh +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# +# Please follow the instructions here http://alt.qcri.org/tools/arabic-normalizer/ +# to install tools needed for Arabic + +echo "Please install Arabic tools: http://alt.qcri.org/tools/arabic-normalizer/" +echo "Then update environment variables in tokenizer_ar.sh" +exit 1 + +SVMTOOL=... +GOMOSESGO=... +QCRI_ARABIC_NORMALIZER=... + +export PERL5LIB="$SVMTOOL/lib":"$GOMOSESGO/bin/MADA-3.2":$PERL5LIB + + +tempfile=$(mktemp) +cat - > $tempfile + +cd $QCRI_ARABIC_NORMALIZER + +bash qcri_normalizer_mada3.2_aramorph1.2.1.sh $tempfile +cat $tempfile.mada_norm-aramorph.europarl_tok diff --git a/fairseq/examples/mbart/README.md b/fairseq/examples/mbart/README.md new file mode 100644 index 0000000..a45e372 --- /dev/null +++ b/fairseq/examples/mbart/README.md @@ -0,0 +1,123 @@ +# MBART: Multilingual Denoising Pre-training for Neural Machine Translation +[https://arxiv.org/abs/2001.08210] + +## Introduction + +MBART is a sequence-to-sequence denoising auto-encoder pre-trained on large-scale monolingual corpora in many languages using the BART objective. mBART is one of the first methods for pre-training a complete sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only on the encoder, decoder, or reconstructing parts of the text. + +## Pre-trained models + +Model | Description | # params | Download +---|---|---|--- +`mbart.CC25` | mBART model with 12 encoder and decoder layers trained on 25 languages' monolingual corpus | 610M | [mbart.CC25.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.v2.tar.gz) +`mbart.ft.ro_en` | finetune mBART cc25 model on ro-en language pairs | 610M | [mbart.cc25.ft.enro.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.ft.enro.tar.gz) + +## Results + +**[WMT16 EN-RO](https://www.statmt.org/wmt16/translation-task.html)** + +_(test set, no additional data used)_ + +Model | en-ro | ro-en +---|---|--- +`Random` | 34.3 | 34.0 +`mbart.cc25` | 37.7 | 37.8 +`mbart.enro.bilingual` | 38.5 | 38.5 + +## BPE data +# download model +wget https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.v2.tar.gz +tar -xzvf mbart.CC25.tar.gz +# bpe data +install SPM [here](https://github.com/google/sentencepiece) +```bash +SPM=/path/to/sentencepiece/build/src/spm_encode +MODEL=sentence.bpe.model +${SPM} --model=${MODEL} < ${DATA}/${TRAIN}.${SRC} > ${DATA}/${TRAIN}.spm.${SRC} & +${SPM} --model=${MODEL} < ${DATA}/${TRAIN}.${TGT} > ${DATA}/${TRAIN}.spm.${TGT} & +${SPM} --model=${MODEL} < ${DATA}/${VALID}.${SRC} > ${DATA}/${VALID}.spm.${SRC} & +${SPM} --model=${MODEL} < ${DATA}/${VALID}.${TGT} > ${DATA}/${VALID}.spm.${TGT} & +${SPM} --model=${MODEL} < ${DATA}/${TEST}.${SRC} > ${DATA}/${TEST}.spm.${SRC} & +${SPM} --model=${MODEL} < ${DATA}/${TEST}.${TGT} > ${DATA}/${TEST}.spm.${TGT} & +``` + +## Preprocess data + +```bash +DICT=dict.txt +fairseq-preprocess \ + --source-lang ${SRC} \ + --target-lang ${TGT} \ + --trainpref ${DATA}/${TRAIN}.spm \ + --validpref ${DATA}/${VALID}.spm \ + --testpref ${DATA}/${TEST}.spm \ + --destdir ${DEST}/${NAME} \ + --thresholdtgt 0 \ + --thresholdsrc 0 \ + --srcdict ${DICT} \ + --tgtdict ${DICT} \ + --workers 70 +``` + +## Finetune on EN-RO +Finetune on mbart CC25 + +```bash +PRETRAIN=mbart.cc25 # fix if you moved the downloaded checkpoint +langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN + +fairseq-train path_2_data \ + --encoder-normalize-before --decoder-normalize-before \ + --arch mbart_large --layernorm-embedding \ + --task translation_from_pretrained_bart \ + --source-lang en_XX --target-lang ro_RO \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler polynomial_decay --lr 3e-05 --warmup-updates 2500 --total-num-update 40000 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 1024 --update-freq 2 \ + --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \ + --seed 222 --log-format simple --log-interval 2 \ + --restore-file $PRETRAIN \ + --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \ + --langs $langs \ + --ddp-backend legacy_ddp +``` +## Generate on EN-RO +Get sacrebleu on finetuned en-ro model + +get tokenizer [here](https://github.com/rsennrich/wmt16-scripts) +```bash +wget https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.ft.enro.tar.gz +tar -xzvf mbart.cc25.ft.enro.tar.gz +``` + +```bash +model_dir=MBART_finetuned_enro # fix if you moved the checkpoint + +fairseq-generate path_2_data \ + --path $model_dir/model.pt \ + --task translation_from_pretrained_bart \ + --gen-subset test \ + -t ro_RO -s en_XX \ + --bpe 'sentencepiece' --sentencepiece-model $model_dir/sentence.bpe.model \ + --sacrebleu --remove-bpe 'sentencepiece' \ + --batch-size 32 --langs $langs > en_ro + +cat en_ro | grep -P "^H" |sort -V |cut -f 3- | sed 's/\[ro_RO\]//g' |$TOKENIZER ro > en_ro.hyp +cat en_ro | grep -P "^T" |sort -V |cut -f 2- | sed 's/\[ro_RO\]//g' |$TOKENIZER ro > en_ro.ref +sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp +``` + +## Citation + +```bibtex +@article{liu2020multilingual, + title={Multilingual Denoising Pre-training for Neural Machine Translation}, + author={Yinhan Liu and Jiatao Gu and Naman Goyal and Xian Li and Sergey Edunov and Marjan Ghazvininejad and Mike Lewis and Luke Zettlemoyer}, + year={2020}, + eprint={2001.08210}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` diff --git a/fairseq/examples/megatron_11b/README.md b/fairseq/examples/megatron_11b/README.md new file mode 100644 index 0000000..945c96c --- /dev/null +++ b/fairseq/examples/megatron_11b/README.md @@ -0,0 +1,161 @@ +# Megatron-11b + +Megatron-11b is a unidirectional language model with `11B` parameters based on [Megatron-LM](https://arxiv.org/pdf/1909.08053.pdf). Following the original Megatron work, we trained the model using intra-layer model parallelism with each layer's parameters split across 8 GPUs. + +Megatron-11b is trained on the same data and uses the same byte-pair encoding (BPE) as [RoBERTa](https://arxiv.org/pdf/1907.11692.pdf). + +## Pre-trained models + +Model | Description | # params | # filesize | Download +---|---|---|---|--- +`megatron_11b` | megatron_11b unidirectional language model | 11B | 19Gb | [megatron_11b.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/model_parallel/megatron_11b.tar.gz) + +#### Architecture: + +Param | Value +---|--- +embed_dim | 3072 +ffn_dim | 3072 * 6 +layers | 72 +attention heads | 32 + +#### Training details: + +Param | value +---|--- +bsz | 512 +num_updates | 300,000 +peak_lr | 1.5e-04 +lr scheduler | inverse_sqrt +clip norm | 0.0 + + +## Example training command (model parallel) + +Megatron-11b contains too many parameters to train on a single GPU. Following +the original Megatron work, we adopt an intra-layer model parallel training +approach in which each layer's parameters are split across multiple GPUs and +activations and gradients are communicated during the forward/backward pass, +respectively. We similarly split the loss computation using the +`vocab_parallel_cross_entropy` criterion. + +The following training command illustrates how to do model parallel training in +fairseq. We assume that each machine (node) has 8 GPUs among which to split the +model parameters (`--model-parallel-size 8`). If you have access to multiple +nodes, you may combine this with data parallel training by increasing +`--distributed-world-size`. + +To train Megatron-11b on a single node: + + +```bash +fairseq-train \ + --distributed-world-size 8 \ + --memory-efficient-fp16 \ + --num-workers 2 \ + --model-parallel-size 8 \ + --criterion vocab_parallel_cross_entropy \ + --task language_modeling \ + --sample-break-mode none \ + --tokens-per-sample 1024 \ + --arch transformer_lm_megatron_11b \ + --share-decoder-input-output-embed \ + --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-08 --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt --lr 0.00015 \ + --warmup-updates 3000 --weight-decay 0.01 \ + --dropout 0.1 --attention-dropout 0.1 \ + --batch-size 2 \ + --max-update 300000; +``` + +Note: Above was tested on `DGX-1` box, with `8xV100-32Gb` GPUs. + +## Results + +**[Wikitext103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/)** + +Model | Valid perplexity | Test perplexity +---|---|--- +`megatron_11b` | 10.64 | 10.54 + + +## Evaluating `megatron_11b` on Wikitext-103 + +#### 1. Downloading Megatron-11b +```bash +# WARNING: this file is 19GB +wget https://dl.fbaipublicfiles.com/fairseq/models/model_parallel/megatron_11b.tar.gz +tar -xzvf megatron_11b.tar.gz +``` + +#### 2. Download Wikitext-103 +```bash +wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip +unzip wikitext-103-raw-v1.zip +``` + +#### 3. Detokenize test tokens +Megatron-11b uses a byte-level BPE that expects raw (untokenized) input. Since +the wikitext-103 dataset comes tokenized, we apply a simple detokenization +process to restore the untokenized test set: + +```bash +python -m examples.megatron_11b.detok wikitext-103-raw/wiki.test.raw > wikitext-103-raw/wiki.test.detok +``` + +#### 4. BPE encoding +```bash +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' + +python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json encoder.json \ + --vocab-bpe vocab.bpe \ + --inputs "wikitext-103-raw/wiki.test.detok" \ + --outputs "wikitext-103-raw/wiki.test.bpe" \ + --workers 60; +``` + +#### 5. Fairseq binarize +```bash +fairseq-preprocess \ + --only-source \ + --testpref wikitext-103-raw/wiki.test.bpe \ + --srcdict megatron_11b/dict.txt \ + --destdir wikitext103-bin; +``` + +#### 6. Evaluating perplexity. +We can now evaluate perplexity on the test set. Note that because we've modified +the test set (via detokenization and BPE), the perplexity reported by +`fairseq-eval-lm` needs to be renormalized. + +Compute unnormalized perplexity: + +```bash +DATA_PATH=wikitext103-bin/ +fairseq-eval-lm \ + $DATA_PATH \ + --path megatron_11b/model.pt \ + --task language_modeling \ + --gen-subset test \ + --batch-size 8 \ + --criterion cross_entropy \ + --context-window 992 \ + --distributed-world-size 8 \ + --model-parallel-size 8; +# Expected PPL (unnormalized_ppl): [8.46] +# Note: the eval command needs to run on 8 GPUs for the released model +``` +Renormalizing formula: `2 ^ ( log_2(unnormalized_PPL) * (270847 / 245566))`. +PPL After normalization: `10.54` + +To renormalize the perplexity, we must account for the change in token count +after detokenizing and appling BPE. The formula for this is: +`2 ^ ( log_2(unnormalized_PPL) * (new_token_cnt / orig_token_cnt))` + +For the wikitext-103 test set, the original token count is `245566` and the +token count after detokenization and applying BPE is `270847`. + +The perplexity after renormalization is: +`2 ^ ( log_2(8.46) * (270847 / 245566)) = 10.54` diff --git a/fairseq/examples/megatron_11b/detok.py b/fairseq/examples/megatron_11b/detok.py new file mode 100644 index 0000000..49921b2 --- /dev/null +++ b/fairseq/examples/megatron_11b/detok.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import fileinput + +import sacremoses + + +def main(): + parser = argparse.ArgumentParser(description="") + parser.add_argument("files", nargs="*", help="input files") + args = parser.parse_args() + + detok = sacremoses.MosesDetokenizer() + + for line in fileinput.input(args.files, openhook=fileinput.hook_compressed): + print( + detok.detokenize(line.strip().split(" ")) + .replace(" @", "") + .replace("@ ", "") + .replace(" =", "=") + .replace("= ", "=") + .replace(" – ", "–") + ) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/mms/MODEL_CARD.md b/fairseq/examples/mms/MODEL_CARD.md new file mode 100644 index 0000000..63f997f --- /dev/null +++ b/fairseq/examples/mms/MODEL_CARD.md @@ -0,0 +1,63 @@ +# MMS Model Card + +## Model details + +**Organization developing the model** The FAIR team + +**Model version** This is version 1 of the model. + +**Model type** MMS is speech model, based on the transformer architecture. The pre-trained model comes in two sizes: 300M and 1B parameters. We fine-tune the model for speech recognition and make it available in the 1B variant. We also fine-tune the 1B variant for language identification. + +**License** CC BY-NC + +**Where to send questions or comments about the model** Questions and comments about MMS can be sent via the [GitHub repository](https://github.com/pytorch/fairseq/tree/master/examples/mms) of the project , by opening an issue and tagging it as MMS. + +## Uses + +**Primary intended uses** The primary use of MMS is to perform speech processing research for many more languages and to perform tasks such as automatic speech recognition, language identification, and speech synthesis. + +**Primary intended users** The primary intended users of the model are researchers in speech processing, machine learning and artificial intelligence. + +**Out-of-scope use cases** Fine-tuning the pre-pretrained models on other labeled datasets or downstream tasks requires further risk evaluation and mitigation. + +## Bias and Risks + +The MMS models were pre-trained on a blend of data from different domains, including readings of the New Testament. In the paper, we describe two studies analyzing gender bias and the use of religious language which conclude that models perform equally well for both genders and that on average, there is little bias for religious language (section 8 of the paper). + +# Training Details + +## Training Data + +MMS is pre-trained on VoxPopuli (parliamentary speech), MLS (read audiobooks), VoxLingua-107 (YouTube speech), CommonVoice (read Wikipedia text), BABEL (telephone conversations), and MMS-lab-U (New Testament readings), MMS-unlab (various read Christian texts). +Models are fine-tuned on FLEURS, VoxLingua-107, MLS, CommonVoice, and MMS-lab. We obtained the language information for MMS-lab, MMS-lab-U and MMS-unlab from our data soucrce and did not manually verify it for every language. + +## Training Procedure + +Please refer to the research paper for details on this. + +# Evaluation + +## Testing Data, Factors & Metrics + +We evaluate the model on a different benchmarks for the downstream tasks. The evaluation details are presented in the paper. The models performance is measured using standard metrics such as character error rate, word error rate, and classification accuracy. + + +# Citation + +**BibTeX:** + +``` +@article{pratap2023mms, + title={Scaling Speech Technology to 1,000+ Languages}, + author={Vineel Pratap and Andros Tjandra and Bowen Shi and Paden Tomasello and Arun Babu and Sayani Kundu and Ali Elkahky and Zhaoheng Ni and Apoorv Vyas and Maryam Fazel-Zarandi and Alexei Baevski and Yossi Adi and Xiaohui Zhang and Wei-Ning Hsu and Alexis Conneau and Michael Auli}, + journal={arXiv}, + year={2023} +} + +``` + +# Model Card Contact + +Please reach out to the authors at: [vineelkpratap@meta.com](mailto:vineelkpratap@meta.com) [androstj@meta.com](mailto:androstj@meta.com) [bshi@meta.com](mailto:bshi@meta.com) [michaelauli@meta.com](mailto:michaelauli@gmail.com) + + diff --git a/fairseq/examples/mms/README.md b/fairseq/examples/mms/README.md new file mode 100644 index 0000000..0460dd5 --- /dev/null +++ b/fairseq/examples/mms/README.md @@ -0,0 +1,215 @@ +# MMS: Scaling Speech Technology to 1000+ languages + +The Massively Multilingual Speech (MMS) project expands speech technology from about 100 languages to over 1,000 by building a single multilingual speech recognition model supporting over 1,100 languages (more than 10 times as many as before), language identification models able to identify over [4,000 languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html) (40 times more than before), pretrained models supporting over 1,400 languages, and text-to-speech models for over 1,100 languages. Our goal is to make it easier for people to access information and to use devices in their preferred language. + +You can find details in the paper [Scaling Speech Technology to 1000+ languages](https://research.facebook.com/publications/scaling-speech-technology-to-1000-languages/) and the [blog post](https://ai.facebook.com/blog/multilingual-model-speech-recognition/). + +An overview of the languages covered by MMS can be found [here](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html). + +## 🤗 Transformers + +MMS has been added to Transformers. For more information, please refer to [Transformers' MMS docs](https://huggingface.co/docs/transformers/main/en/model_doc/mms). + +[Click here](https://huggingface.co/models?other=mms) to find all MMS checkpoints on the Hub. + +Checkout the demo here [![Open In HF Spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm-dark.svg)](https://huggingface.co/spaces/facebook/MMS) + +## Finetuned models +### ASR + +| Model | Languages | Dataset | Model | Dictionary* | Supported languages | | +|---|---|---|---|---|---|--- +MMS-1B:FL102 | 102 | FLEURS | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_fl102.pt) | [download](https://dl.fbaipublicfiles.com/mms/asr/dict/mms1b_fl102/eng.txt) | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_fl102_langs.html) | [🤗 Hub](https://huggingface.co/facebook/mms-1b-fl102) +MMS-1B:L1107| 1107 | MMS-lab | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_l1107.pt) | [download](https://dl.fbaipublicfiles.com/mms/asr/dict/mms1b_l1107/eng.txt) | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_l1107_langs.html) | [🤗 Hub](https://huggingface.co/facebook/mms-1b-l1107) +MMS-1B-all| 1162 | MMS-lab + FLEURS
+ CV + VP + MLS | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_all.pt) | [download](https://dl.fbaipublicfiles.com/mms/asr/dict/mms1b_all/eng.txt) | [download](https://dl.fbaipublicfiles.com/mms/asr/mms1b_all_langs.html) | [🤗 Hub](https://huggingface.co/facebook/mms-1b-all) + +\* In the `Dictionary` column, we provide the download link for token dictionary in English language. To download token dictionary for a different language supported by the model, modify the language code in the URL appropriately. For example, to get token dictionary of FL102 model for Hindi language, use [this](https://dl.fbaipublicfiles.com/mms/asr/dict/mms1b_fl102/hin.txt) link. + +### TTS +1. Download the list of [iso codes](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) of 1107 languages. +2. Find the iso code of the target language and download the checkpoint. Each folder contains 3 files: `G_100000.pth`, `config.json`, `vocab.txt`. The `G_100000.pth` is the generator trained for 100K updates, `config.json` is the training config, `vocab.txt` is the vocabulary for the TTS model. +``` +# Examples: +wget https://dl.fbaipublicfiles.com/mms/tts/eng.tar.gz # English (eng) +wget https://dl.fbaipublicfiles.com/mms/tts/azj-script_latin.tar.gz # North Azerbaijani (azj-script_latin) +``` +The above command downloads generator only, which is enough to run TTS inference. If you want the full model checkpoint which also includes the discriminator (`D_100000.pth`) and the optimizer states, download as follows. +``` +# Example (full checkpoint: generator + discriminator + optimizer): +wget https://dl.fbaipublicfiles.com/mms/tts/full_model/eng.tar.gz # English (eng) +``` + + +### LID + +\# Languages | Dataset | Model | Dictionary | Supported languages | | +|---|---|---|---|---|--- +126 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l126.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l126/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l126_langs.html) | [🤗 Hub](https://huggingface.co/facebook/mms-lid-126) +256 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l256.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l256/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l256_langs.html) | [🤗 Hub](https://huggingface.co/facebook/mms-lid-256) +512 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l512.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l512/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l512_langs.html)| [🤗 Hub](https://huggingface.co/facebook/mms-lid-512) +1024 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l1024.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l1024/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l1024_langs.html)| [🤗 Hub](https://huggingface.co/facebook/mms-lid-1024) +2048 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l2048.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l2048/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l2048_langs.html)| [🤗 Hub](https://huggingface.co/facebook/mms-lid-2048) +4017 | FLEURS + VL + MMS-lab-U + MMS-unlab | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l4017.pt) | [download](https://dl.fbaipublicfiles.com/mms/lid/dict/l4017/dict.lang.txt) | [download](https://dl.fbaipublicfiles.com/mms/lid/mms1b_l4017_langs.html)| [🤗 Hub](https://huggingface.co/facebook/mms-lid-4017) + +## Commands to run inference + +### ASR +Run this command to transcribe one or more audio files: +```shell command +cd /path/to/fairseq-py/ +python examples/mms/asr/infer/mms_infer.py --model "/path/to/asr/model" --lang lang_code \ + --audio "/path/to/audio_1.wav" "/path/to/audio_2.wav" "/path/to/audio_3.wav" +``` +We also provide an Ipython notebook example inside `asr/tutorial` folder [ipynb](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/asr/tutorial/MMS_ASR_Inference_Colab.ipynb) or [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/facebookresearch/fairseq/blob/main/examples/mms/asr/tutorial/MMS_ASR_Inference_Colab.ipynb) + + +For more advance configuration and calculate CER/WER, you could prepare manifest folder by creating a folder with this format: +``` +$ ls /path/to/manifest +dev.tsv +dev.wrd +dev.ltr +dev.uid + +# dev.tsv each line contains
`), which corresponds to embedding index `2`. +Thus **the model never saw newline characters during pretraining** and newlines should not be used during few-shot prompting. + +This is more clearly illustrated in the following example, which uses fairseq's Hub Interface to tokenize two documents in the desired format: +```python +from fairseq.models.transformer_lm import TransformerLanguageModel +model_dir = '/path/to/en_dense_lm_125m' +lm = TransformerLanguageModel.from_pretrained(model_dir, bpe='gpt2') + +data = """\ +This is the first paragraph of the first document. +This is the second paragraph of the first document. + +This is the first paragraph of the second document.\ +""" + +# The following is wrong, since it will encode newlines present in `data`. +tokens_bad = lm.score(data)['tokens'] +assert '\n' in lm.decode(tokens_bad) # oops, we encoded a newline + +# Instead pass the replace_newlines_with_eos option to get the correct behavior. +tokens_good = lm.score(data, replace_newline_with_eos=True)['tokens'] +assert '\n' not in lm.decode(tokens_good) # no newlines were encoded +``` + +## Citation + +Coming soon. diff --git a/fairseq/examples/moe_lm/data_card.md b/fairseq/examples/moe_lm/data_card.md new file mode 100644 index 0000000..54e694b --- /dev/null +++ b/fairseq/examples/moe_lm/data_card.md @@ -0,0 +1,221 @@ +# Data card for the paper "Efficient Large Scale Language Modeling with Mixtures of Experts" +## Version 1.0.0 + +We follow the recommendations of Gebru et al. (2018) and provide a datacard for the dataset used to train the 1.1T parameter model. + +## Motivation +* **For what purpose was the dataset created? Was there a specific task in mind? Was there a specific gap that needed to be filled? Please provide a description.** +The pre-training data for training the 1.1 T model was created by a union of six English language datasets, including five datasets used by RoBERTa (Liu et al 2019) and the English subset of CC 100. These purpose of creating this dataset was to pre-train the language model. + +* **Who created the dataset (e.g., which team, research group) and on behalf of which entity (e.g., company, institution, organization)?** +FAIR (Fundamental Artificial Intelligence Research) + +* **Who funded the creation of the dataset? If there is an associated grant, please provide the name of the grantor and the grant name and number.** +FAIR (Fundamental Artificial Intelligence Research) + +* **Any other comments?** +No. + +## Composition + +* **What do the instances that comprise the dataset represent (e.g., documents, photos, people, countries)? Are there multiple types of instances (e.g., movies, users, and ratings; people and interactions between them; nodes and edges)? Please provide a description.** +The instances are textual documents. The overall dataset is composed from a union of the following datasets - + * BookCorpus (Zhu et al., 2019) consists of more than 10K unpublished books (4GB); + * English Wikipedia, excluding lists, tables and headers (12GB); + * CC-News (Nagel,2016) contains 63 million English news articles crawled between September 2016 and February 2019 (76GB); + * OpenWebText (Gokaslan and Cohen, 2019), an open source recreation of the WebText dataset used to train GPT-2 (38GB); + * CC-Stories (Trinh and Le, 2018) contains a subset of CommonCrawl data filtered to match the story-like style of Winograd schemas (31GB); + * English CC100 (Wenzek et al., 2020), a dataset extracted from CommonCrawl snapshots between January 2018 and December 2018, filtered to match the style of Wikipedia (292GB). + +* **How many instances are there in total (of each type, if appropriate)?** +The training data contains 112B tokens corresponding to 453 GB of data. + +* **Does the dataset contain all possible instances or is it a sample (not necessarily random) of instances from a larger set? If the dataset is a sample, then what is the larger set? Is the sample representative of the larger set (e.g., geographic coverage)? If so, please describe how this representativeness was validated/verified. If it is not representative of the larger set, please describe why not (e.g., to cover a more diverse range of instances, because instances were withheld or unavailable).** +The English CC100 section of the dataset is a subset of CommonCrawl snapshots extracted between January 2018 to December 2018, filtered to match the style of Wikipedia. The CC-stories dataset contains a subset of CommonCrawl data filtered to match the story-like style of Winograd schemas. + +* **What data does each instance consist of? “Raw” data (e.g., unprocessed text or images) or features? In either case, please provide a description.** +Each instance consists of raw text data. + +* **Is there a label or target associated with each instance? If so, please provide a description.** +No. + +* **Is any information missing from individual instances? If so, please provide a description, explaining why this information is missing (e.g., because it was unavailable). This does not include intentionally removed information, but might include, e.g., redacted text.** +No. + +* **Are relationships between individual instances made explicit (e.g., users' movie ratings, social network links)? If so, please describe how these relationships are made explicit.** +There are no explicit relationships between individual instances. + +* **Are there recommended data splits (e.g., training, development/validation, testing)? If so, please provide a description of these splits, explaining the rationale behind them.** +We hold out a random validation set of approximately 150MB from the pretraining data, sampled proportionally to each dataset's size in the pretraining corpus. + +* **Are there any errors, sources of noise, or redundancies in the dataset? If so, please provide a description.** +N/A + +* **Is the dataset self-contained, or does it link to or otherwise rely on external resources (e.g., websites, tweets, other datasets)?** +It's self-contained. + +* **Does the dataset contain data that might be considered confidential (e.g., data that is protected by legal privilege or by doctor-patient confidentiality, data that includes the content of individuals' non-public communications)? If so, please provide a description.** +The datasets used are publicly available, and the information in them is not considered confidential. + +* **Does the dataset contain data that, if viewed directly, might be offensive, insulting, threatening, or might otherwise cause anxiety? If so, please describe why.** +Parts of the dataset are a subset of public Common Crawl data, which could contain sentences that, if viewed directly, might be offensive, insulting, threatening, or might otherwise cause anxiety. + +* **Does the dataset relate to people? If not, you may skip the remaining questions in this section.** +Some documents of this data relate to people, such as news articles, Wikipedia descriptions, etc. + +* **Does the dataset identify any subpopulations (e.g., by age, gender)? If so, please describe how these subpopulations are identified and provide a description of their respective distributions within the dataset.** +No. + +* **Is it possible to identify individuals (i.e., one or more natural persons), either directly or indirectly (i.e., in combination with other data) from the dataset? If so, please describe how** +In addition to individuals who have Wikipedia pages (celebrities, politicians, etc.), it may be possible to identify other individuals by their names, Twitter account names, etc. if that information is present in Common Crawl. + +* **Does the dataset contain data that might be considered sensitive in any way (e.g., data that reveals racial or ethnic origins, sexual orientations, religious beliefs, political opinions or union memberships, or locations; financial or health data; biometric or genetic data; forms of government identification, such as social security numbers; criminal history)? If so, please provide a description.** +The training dataset is partially derived from Common Crawl, which may contain some sensitive information. + +* **Any other comments?** +No + + +## Collection Process + +* **How was the data associated with each instance acquired? Was the data directly observable (e.g., raw text, movie ratings), reported by subjects (e.g., survey responses), or indirectly inferred/ derived from other data (e.g., part-of-speech tags, model-based guesses for age or language)? If data was reported by subjects or indirectly inferred/derived from other data, was the data validated/verified? If so, please describe how.** +N/A. The dataset is a union of six publicly available datasets. + +* **What mechanisms or procedures were used to collect the data (e.g., hardware apparatus or sensor, manual human curation, software program, software API)? How were these mechanisms or procedures validated?** +N/A + +* **If the dataset is a sample from a larger set, what was the sampling strategy (e.g., deterministic, probabilistic with specific sampling probabilities)?** +Please refer to the main document for details. + +* **Who was involved in the data collection process (e.g., students, crowdworkers, contractors) and how were they compensated (e.g., how much were crowdworkers paid)?** +This data is mined, filtered and sampled by machines. + +* **Over what timeframe was the data collected? Does this timeframe match the creation timeframe of the data associated with the instances (e.g., recent crawl of old news articles)? If not, please describe the timeframe in which the data associated with the instances was created.** +Different parts of the dataset were mined over different time periods. +1. The CC-News dataset contains English news articles crawled between September 2016 and February 2019. +2. The English CC-100 dataset was extracted from CommonCrawl snapshots between January 2018 and December 2018. + +* **Were any ethical review processes conducted (e.g., by an institutional review board)? If so, please provide a description of these review processes, including the outcomes, as well as a link or other access point to any supporting documentation.** +No. + +* **Does the dataset relate to people? If not, you may skip the remainder of the questions in this section.** +No. + +* **Did you collect the data from the individuals in question directly, or obtain it via third parties or other sources (e.g., websites)?** +N/A + +* **Were the individuals in question notified about the data collection? If so, please describe (or show with screenshots or other information) how notice was provided, and provide a link or other access point to, or otherwise reproduce, the exact language of the notification itself.** +N/A + +* **Did the individuals in question consent to the collection and use of their data? If so, please describe (or show with screenshots or other information) how consent was requested and provided, and provide a link or other access point to, or otherwise reproduce, the exact language to which the individuals consented.** +N/A + +* **If consent was obtained, were the consenting individuals provided with a mechanism to revoke their consent in the future or for certain uses? If so, please provide a description, as well as a link or other access point to the mechanism (if appropriate).** +N/A + +* **Has an analysis of the potential impact of the dataset and its use on data subjects (e.g., a data protection impact analysis) been conducted? If so, please provide a description of this analysis, including the outcomes, as well as a link or other access point to any supporting documentation.** +Some responsible AI related evaluations were performed. Please refer to the main document and the model card for the paper. + +* **Any other comments?** +No + + +## Preprocessing/cleaning/labeling + + +* **Was any preprocessing/cleaning/labeling of the data done (e.g., discretization or bucketing, tokenization, part-of-speech tagging, SIFT feature extraction, removal of instances, processing of missing values)? If so, please provide a description. If not, you may skip the remainder of the questions in this section.** +The component datasets went through standard cleaning and re-formatting practices, including removing repetitive/non informative text like "Chapter One", or "This ebook by Project Gutenberg". + +* **Was the “raw” data saved in addition to the preprocessed/cleaned/labeled data (e.g., to support unanticipated future uses)? If so, please provide a link or other access point to the “raw” data.** +The "raw" component datasets is publicly available in their respective locations (more details can be seen in the respective papers linked in references). + +* **Is the software used to preprocess/clean/label the instances available? If so, please provide a link or other access point.** +The software is proprietary to Meta Platforms and currently unavailable publicly. + +* **Any other comments?** +No + + +## Uses + +* **Has the dataset been used for any tasks already? If so, please provide a description.** +Yes, this dataset was used to pre-train the models described in the paper. + +* **Is there a repository that links to any or all papers or systems that use the dataset? If so, please provide a link or other access point.** +No. + +* **What (other) tasks could the dataset be used for?** +This data can be used to pretrain English language models, which are foundation to many current and future language tasks. + +* **Is there anything about the composition of the dataset or the way it was collected and preprocessed/cleaned/labeled that might impact future uses? For example, is there anything that a future user might need to know to avoid uses that could result in unfair treatment of individuals or groups (e.g., stereotyping, quality of service issues) or other undesirable harms (e.g., financial harms, legal risks) If so, please provide a description. Is there anything a future user could do to mitigate these undesirable harms?** +The pipeline for creating this dataset paves a way for building a scalable infrastructure for mining datasets to be be used for training large-scale models. + +* **Are there tasks for which the dataset should not be used? If so, please provide a description.** +No. + +* **Any other comments?** +No. + +## Distribution + + +* **Will the dataset be distributed to third parties outside of the entity (e.g., company, institution, organization) on behalf of which the dataset was created? If so, please provide a description.** +No. + +* **How will the dataset will be distributed (e.g., tarball on website, API, GitHub)? Does the dataset have a digital object identifier (DOI)?** +N/A + +* **When will the dataset be distributed?** +No. + +* **Will the dataset be distributed under a copyright or other intellectual property (IP) license, and/or under applicable terms of use (ToU)? If so, please describe this license and/or ToU, and provide a link or other access point to, or otherwise reproduce, any relevant licensing terms or ToU, as well as any fees associated with these restrictions.** +No. + +* **Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, please describe these restrictions, and provide a link or other access point to, or otherwise reproduce, any relevant licensing terms, as well as any fees associated with these restrictions.** +No. + +* **Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? If so, please describe these restrictions, and provide a link or other access point to, or otherwise reproduce, any supporting documentation.** +N/A + +* **Any other comments?** +No. + +## Maintenance + +* **Who is supporting/hosting/maintaining the dataset?** +FAIR (Fundamental Artificial Intelligence Research) + +* **How can the owner/curator/manager of the dataset be contacted (e.g., email address)?** +Refer to the main document. + +* **Is there an erratum? If so, please provide a link or other access point.** +N/A + +* **Will the dataset be updated (e.g., to correct labeling errors, add new instances, delete instances)? If so, please describe how often, by whom, and how updates will be communicated to users (e.g., mailing list, GitHub)?** +No plan for updating. + +* **If the dataset relates to people, are there applicable limits on the retention of the data associated with the instances (e.g., were individuals in question told that their data would be retained for a fixed period of time and then deleted)? If so, please describe these limits and explain how they will be enforced.** +N/A + +* **Will older versions of the dataset continue to be supported/hosted/maintained? If so, please describe how. If not, please describe how its obsolescence will be communicated to users.** +N/A + +* **If others want to extend/augment/build on/contribute to the dataset, is there a mechanism for them to do so? If so, please provide a description. Will these contributions be validated/ verified? If so, please describe how. If not, why not? Is there a process for communicating/ distributing these contributions to other users? If so, please provide a description.** +No. + +* **Any other comments?** +No. + +## References +Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692. + +Yukun Zhu, Ryan Kiros, Richard Zemel, Ruslan Salakhutdinov, Raquel Urtasun, Antonio Torralba, and Sanja Fidler. 2019. Aligning books and movies: Towards story-like visual explanations by watching movies and reading books. arXiv:1506.06724. + +Sebastian Nagel. 2016. Cc-news. http: //web.archive.org/save/http: //commoncrawl.org/2016/10/news-dataset-available. + +Aaron Gokaslan and Vanya Cohen. 2019. Openwebtext corpus. http://web.archive.org/save/http://Skylion007.github.io/OpenWebTextCorpus + +Trieu H Trinh and Quoc V Le. 2018. A simple method for commonsense reasoning. arXiv preprint arXiv:1806.02847. + +Guillaume Wenzek, Marie-Anne Lachaux, Alexis Conneau, Vishrav Chaudhary, Francisco Guzmán, Armand Joulin, and Edouard Grave. 2020. CCNet: Extracting high quality monolingual datasets from web crawl data. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 4003–4012, Marseille, France. European Language Resources Association. + diff --git a/fairseq/examples/moe_lm/model_card.md b/fairseq/examples/moe_lm/model_card.md new file mode 100644 index 0000000..a1cd681 --- /dev/null +++ b/fairseq/examples/moe_lm/model_card.md @@ -0,0 +1,170 @@ +# Model card for the paper ``Efficient Large Scale Language Modeling with Mixtures of Experts" +## Version 1.0.0 + +### Model developer +FAIR (Fundamental Artificial Intelligence Research) + +### Model type +An autoregressive English language model trained on a union of six English language models. We explore dense and sparse (MoE based) architectures in the paper. +* Dense models - Our dense models range from 125M parameters to 13B parameters. +* Sparse (MoE) models - Our MoE based models range from 15B parameters to 1.1 Trillion parameters. +This model card focuses on the 1.1 Trillion parameter model, but the discussion +applies to all of the models explored in this work. + +### Citation details +Artetxe et al. (2021): Efficient Large Scale Language Modeling with Mixtures of Experts + +### Model Feedback Channel +fairseq + +## Intended use +### Primary intended use +For research purposes only, e.g. reproducing model evaluation results. Generation is only used in a limited capacity for explanation/justification or for prompting/probing/priming for class labels. + +### Out of scope uses +The primary purpose of the model is not to generate language, although the model is capable of doing that. + +## Factors influencing model performance +This section discusses potential risks associated with using the model. + +### Relevant factors +Based on known problems with NLP technology, potential relevant factors include bias (gender, profession, race and religion). + +### Evaluation factors +The 1.1T model was evaluated on StereoSet and CrowS-Pairs datasets to quantify encoded bias in the model. + +## Metrics +### Model performance measures +The 1.1T parameter model was primarily evaluated on +1. In-domain and out-of-domain language modeling perplexity. +2. Zero-shot and few-shot priming. +3. Fully supervised finetuning. + +### Approaches to handle uncertainty +For few-shot learning, we report the average results across 25 runs, randomly sampling a different set of few-shot examples from the training set each time. + +## Evaluation data +## Zero Shot evaluation + +### HellaSwag +#### Description +HellaSwag is a dataset for evaluating commonsense reasoning. + +### PIQA +#### Description +PIQA is a dataset designed to evaluate reasoning about Physical Commonsense in Natural Language + +### ReCoRd +#### Description +Reading Comprehension with Commonsense Reasoning Dataset (ReCoRD) is a large-scale reading comprehension dataset which requires commonsense reasoning. ReCoRD consists of queries automatically generated from CNN/Daily Mail news articles; the answer to each query is a text span from a summarizing passage of the corresponding news. The goal of ReCoRD is to evaluate a machine's ability of commonsense reasoning in reading comprehension. + +## Few Shot evaluation +### Winogrande +#### Description +Winogrande is a benchmark for commonsense reasoning. The dataset contains pronoun resolution problems originally designed to be unsolvable for statistical models that rely on selectional preferences or word associations. + +### StoryCloze +#### Description +StoryCloze is a new commonsense reasoning framework for evaluating story understanding, story generation, and script learning. This test requires a system to choose the correct ending to a four-sentence story. + +### OpenBookQA +#### Description +OpenBookQA is a new kind of question-answering dataset modeled after open book exams for assessing human understanding of a subject. It consists of 5,957 multiple-choice elementary-level science questions (4,957 train, 500 dev, 500 test), which probe the understanding of a small “book” of 1,326 core science facts and the application of these facts to novel situations. + +## Fully supervised evaluation + +### BoolQ +#### Description +BoolQ is a question answering dataset for yes/no questions containing 15942 examples. These questions are naturally occurring – they are generated in unprompted and unconstrained settings. Each example is a triplet of (question, passage, answer), with the title of the page as optional additional context. + +### SST-2 +#### Description +SST-2 (or SST-binary) is a binary classification dataset where the goal is to differentiate between negative or somewhat negative vs somewhat positive or positive. + +### MNLI +#### Description +The Multi-Genre Natural Language Inference (MultiNLI) corpus is a crowd-sourced collection of 433k sentence pairs annotated with textual entailment information. The corpus is modeled on the SNLI corpus, but differs in that covers a range of genres of spoken and written text, and supports a distinctive cross-genre generalization evaluation. + +## Responsible AI (RAI) evaluation +### StereoSet +#### Description +A large-scale natural dataset in English to measure stereotypical biases in four domains: gender, profession, race, and religion + +#### Motivation for dataset use +The motivation for evaluating the 1.1T parameter model on this dataset is to evaluate the model's stereotype bias in gender, profession, race, and religion + +### CrowS +#### Description +Challenge Dataset for Measuring Social Biases in Masked Language Models + +#### Motivation for dataset use +The motivation for evaluating the 1.1T parameter model on this dataset is to evaluate the model’s bias in the domains of race, religion and age + +---- + +## Training data +### BookCorpus +#### Description +A dataset consisting of more than 10K unpublished books. 4GB in size. (Zhu et al., 2019) + +### English Wikipedia +#### Description +Data from English wikipedia, excluding lists, tables and headers. 12GB in size. + +### CC-News +#### Description +A dataset containing 63 millions English news articles crawled between September 2016 and February 2019. 76GB in size. (Nagel,2016) + +### OpenWebText +#### Description +An open source recreation of the WebText dataset used to train GPT-2. 38GB in size. (Gokaslan and Cohen, 2019) + +### CC-Stories +#### Description +A dataset containing a subset of CommonCrawl data filtered to match the story-like style of Winograd schemas. 31GB in size. (Trinh and Le, 2018) + +### English CC100 +#### Description +A dataset extracted from CommonCrawl snapshots between January 2018 and December 2018, filtered to match the style of Wikipedia following the methodology introduced in CCNet (https://arxiv.org/abs/1911.00359). 292GB in size. (Wenzek et al., 2020) + +## Responsible AI (RAI) Dimensions +### Fairness (Bias and inclusion) +The 1.1T parameter model was evaluated on the StereoSet and CrowS pairs dataset for inherent bias in the model, and bias as a result of the data. Similar to StereoSet, we observe that both the dense and MoE models get worse in terms of the Stereotype Score (SS) with scale. + +### Privacy and security +The 1.1T model did not have any special Privacy and Security considerations. The training data and evaluation data were both public and went through standard Meta privacy and licensing procedures. + +### Transparency and control +In the spirit of transparency and accountability we have created this model card for the 1.1T parameter model and a data card for the training data (referenced in Artetxe et al. (2021)). + +### Efficiency (Green AI) +The 1.1T parameter model is trained as a Mixture of Experts (MoE) model. Mixture of expert (MoE) models are efficient because they leverage sparse computation, i.e., only a small fraction of parameters are active for any given input. For instance, our 1.1T parameter MoE model requires only 30% more FLOPS compared to a 6.7B parameter dense model, i.e., a 160x increase in parameters with only a 30% increase in FLOPS. Notably, MoE models achieve much better validation perplexity for a given compute budget compared to dense models. + +## References +Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. 2019. HellaSwag: Can a machine really finish your sentence? In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 4791– 4800, Florence, Italy. Association for Computational Linguistics. + +Yonatan Bisk, Rowan Zellers, Ronan Le bras, Jianfeng Gao, and Yejin Choi. 2020. Piqa: Reasoning about physical commonsense in natural language. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05):7432–7439. + +Sheng Zhang, Xiaodong Liu, Jingjing Liu, Jianfeng Gao, Kevin Duh, and Benjamin Van Durme. 2018. ReCoRD: Bridging the gap between human and machine commonsense reading comprehension. arXiv preprint 1810.12885. + +Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhagavatula, and Yejin Choi. 2020. Winogrande: An adversarial winograd schema challenge at scale. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05):8732–8740. + +Nasrin Mostafazadeh, Nathanael Chambers, Xiaodong He, Devi Parikh, Dhruv Batra, Lucy Vanderwende, Pushmeet Kohli, and James Allen. 2016. A corpus and cloze evaluation for deeper understanding of commonsense stories. In Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 839–849, San Diego, California. Association for Computational Linguistics. + +Todor Mihaylov, Peter Clark, Tushar Khot, and Ashish Sabharwal. 2018. Can a suit of armor conduct electricity? a new dataset for open book question answering. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 2381–2391, Brussels, Belgium. Association for Computational Linguistics. + +Christopher Clark and Kenton Lee and Ming-Wei Chang and Tom Kwiatkowski and Michael Collins and Kristina Toutanova. 2019. BoolQ: Exploring the Surprising Difficulty of Natural Yes/No Questions + +Moin Nadeem, Anna Bethke, and Siva Reddy. 2021. StereoSet: Measuring stereotypical bias in pretrained language models. In Association for Computational Linguistics (ACL). + +Nikita Nangia, Clara Vania, Rasika Bhalerao, and Samuel R. Bowman. 2020. CrowS-pairs: A challenge dataset for measuring social biases in masked language models. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 1953–1967, Online. Association for Computational Linguistics. + +Yukun Zhu, Ryan Kiros, Richard Zemel, Ruslan Salakhutdinov, Raquel Urtasun, Antonio Torralba, and Sanja Fidler. 2019. Aligning books and movies: Towards story-like visual explanations by watching movies and reading books. arXiv:1506.06724. + +Sebastian Nagel. 2016. Cc-news. http: //web.archive.org/save/http: //commoncrawl.org/2016/10/news-dataset-available. + +Aaron Gokaslan and Vanya Cohen. 2019. Openwebtext corpus. http://web.archive.org/save/http://Skylion007.github.io/OpenWebTextCorpus + +Trieu H Trinh and Quoc V Le. 2018. A simple method for commonsense reasoning. arXiv preprint arXiv:1806.02847. + +Guillaume Wenzek, Marie-Anne Lachaux, Alexis Conneau, Vishrav Chaudhary, Francisco Guzmán, Armand Joulin, and Edouard Grave. 2020. CCNet: Extracting high quality monolingual datasets from web crawl data. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 4003–4012, Marseille, France. European Language Resources Association. diff --git a/fairseq/examples/mr_hubert/README.md b/fairseq/examples/mr_hubert/README.md new file mode 100644 index 0000000..e72c09c --- /dev/null +++ b/fairseq/examples/mr_hubert/README.md @@ -0,0 +1,187 @@ +# MR-HuBERT + +## Pre-trained models + +### Main models +Model | Pretraining Data | Model | Paper Reference +|---|---|---|--- +MR-HuBERT Base (~97M) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/mono_base/mrhubert_mono_base.pt) | mono\_base +MR-HuBERT Base (~321M) | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/mono_large/mrhubert_mono_large.pt) | mono\_large +Multilingual MR-HuBERT Base (~97M) | [Voxpopuli](https://github.com/facebookresearch/voxpopuli) 100k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/multi_base/multi_base.pt) | multi\_base +Multilingual MR-HuBERT Large (~321M) | [Voxpopuli](https://github.com/facebookresearch/voxpopuli) 100k hr | [download 400k steps](https://dl.fbaipublicfiles.com/mrhubert/multi_large/multi_large_400k.pt) or [download 600k steps](https://dl.fbaipublicfiles.com/mrhubert/multi_large/multi_large_600k.pt) | Not in the paper + + +### Abalation models +Model | Pretraining Data | Model | Paper Reference +|---|---|---|--- +MR-HuBERT Base (2-4-6 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b1-a/b1-a.pt) | (B.1)-a +MR-HuBERT Base (5-2-5 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b1-b/b1-b.pt) | (B.1)-b +MR-HuBERT Base (6-4-2 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b1-c/b1-c.pt) | (B.1)-c +MR-HuBERT Base (3res 3-2-2-2-3 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b2-a/b2-a.pt) | (B.2)-a +MR-HuBERT Base (3res 2-2-4-2-2 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b2-b/b2-b.pt) | (B.2)-b +MR-HuBERT Base (3res 2-2-2-2-2 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b2-c/b2-c.pt) | (B.2)-c +MR-HuBERT Base (Simple sampling) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b3-a/b3-a.pt) | (B.3)-a +MR-HuBERT Base (Single target) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b4-a/b4-a.pt) | (B.4)-a +MR-HuBERT Base (Simple Sampling + single target) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b4-b/b4-b.pt) | (B.4)-b +MR-HuBERT Base (Mono-resolution 20ms) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b5-a/b5-a.pt) | (B.5)-a +MR-HuBERT Base (3-3-3 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b6-a/b6-a.pt) | (B.6)-a +MR-HuBERT Base (Mono-resolution 20ms, 3-3-3 lyrs) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b6-b/b6-b.pt) | (B.6)-b +MR-HuBERT Base (HuBERT 20ms&40ms units) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b7-a/b7-a.pt) | (B.7)-a +MR-HuBERT Base (Encodec 50Hz unit) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b7-b/b7-b.pt) | (B.7)-b +MR-HuBERT Base (Encodec 50Hz units and 25Hz units) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b7-c/b7-c.pt) | (B.7)-c +MR-HuBERT Base (Encodec 50Hz units stream 0&1 ) | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b7-d/b7-d.pt) | (B.7)-d +MR-HuBERT Large (no audio norm) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-a/b8-a.pt) | (B.8)-a +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-b/b8-b.pt) | (B.8)-b +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-c/b8-c.pt) | (B.8)-c +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-d/b8-d.pt) | (B.8)-d +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-e/b8-e.pt) | (B.8)-e +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-f/b8-f.pt) | (B.8)-f +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-g/b8-g.pt) | (B.8)-g +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-h/b8-h.pt) | (B.8)-h +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-i/b8-i.pt) | (B.8)-i +MR-HuBERT Large (check paper ) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/b8-j/b8-j.pt) | (B.8)-j +Multilingual MR-HuBERT Large (Simple sampling) | [Voxpopuli](https://github.com/facebookresearch/voxpopuli) 100k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/multi_large_simple/multi_large_simple.pt) | Not in paper +MR-HuBERT xLarge (from HuBERT-base label) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/mono_xlarge/v1.pt) | Not in paper +MR-HuBERT xLarge (from HuBERT-large label) | [LibriLight](https://github.com/facebookresearch/libri-light) 60k hr | [download](https://dl.fbaipublicfiles.com/mrhubert/mono_xlarge/v2.pt) | Not in paper + +## Load a model +``` +ckpt_path = "/path/to/the/checkpoint.pt" +models, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path]) +model = models[0] +``` + +## Train a new model + +### Data preparation + +Follow the steps in `./simple_kmeans` to create: +- `{train,valid}.tsv` waveform list files with length information +``` +/path/to/your/audio/files +file1.wav\t160000 +file2.wav\t154600 +... +filen.wav\t54362 +``` +- `{train,valid}.km` frame-aligned pseudo label files (the order is the same as wavefiles in the tsv file). +``` +44 44 44 48 48 962 962 962 962 962 962 962 962 967 967 967 967 967 967 967 967 370 852 370 ... 18 18 745 745 +44 44 44 48 48 962 962 962 147 147 147 147 147 147 147 147 147 147 147 147 176 176 271 271 ... 27 27 745 745 +... +44 44 44 48 962 962 962 962 962 962 377 377 377 77 77 852 696 694 433 578 578 82 740 622 ... 27 27 745 745 +``` +- `dict.km.txt` a dummy dictionary (first column is id, the second is dummy one) +``` +0 1 +1 1 +2 1 +... +999 1 +``` + +The `label_rate` is the same as the feature frame rate used for clustering, +which is 100Hz for MFCC features and 50Hz for HuBERT features by default. + +### Pre-train a MR-HuBERT model + +Suppose `{train,valid}.tsv` are saved at `/path/to/data`, `{train,valid}.km` +are saved at `/path/to/labels`, and the label rate is 100Hz. + +To train a base model (12 layer transformer), run: +```sh +$ python fairseq_cli/hydra_train.py \ + --config-dir /path/to/fairseq-py/examples/mr_hubert/config/pretrain \ + --config-name mrhubert_base_librispeech \ + task.data=/path/to/data task.label_dir=/path/to/labels \ + task.labels='["km"]' model.label_rate=100 \ + task.label_rate_ratios='[1, 2]' \ +``` + +Please see sample pre-training scripts `train.sh` for an example script. + +### Fine-tune a MR-HuBERT model with a CTC loss + +Suppose `{train,valid}.tsv` are saved at `/path/to/data`, and their +corresponding character transcripts `{train,valid}.ltr` are saved at +`/path/to/trans`. A typical ltr file is with the same order of tsv waveform files as +``` +HOW | ARE | YOU +... +THANK | YOU +``` + +To fine-tune a pre-trained MR-HuBERT model at `/path/to/checkpoint`, run +```sh +$ python fairseq_cli/hydra_train.py \ + --config-dir /path/to/fairseq-py/examples/mr_hubert/config/finetune \ + --config-name base_10h \ + task.data=/path/to/data task.label_dir=/path/to/trans \ + model.w2v_path=/path/to/checkpoint +``` + +Please see sample fine-tuning scripts `finetune.sh` for an example script. + +### Decode a MR-HuBERT model + +Suppose the `test.tsv` and `test.ltr` are the waveform list and transcripts of +the split to be decoded, saved at `/path/to/data`, and the fine-tuned model is +saved at `/path/to/checkpoint`. + + +We support three decoding modes: +- Viterbi decoding: greedy decoding without a language model +- KenLM decoding: decoding with an arpa-format KenLM n-gram language model +- Fairseq-LM deocding: decoding with a Fairseq neural language model (not fully tested) + + +#### Viterbi decoding + +`task.normalize` needs to be consistent with the value used during fine-tuning. +Decoding results will be saved at +`/path/to/experiment/directory/decode/viterbi/test`. + +```sh +$ python examples/speech_recognition/new/infer.py \ + --config-dir /path/to/fairseq-py/examples/mr_hubert/config/decode \ + --config-name infer \ + task.data=/path/to/data \ + task.normalize=[true|false] \ + decoding.exp_dir=/path/to/experiment/directory \ + common_eval.path=/path/to/checkpoint + dataset.gen_subset=test \ +``` + +#### KenLM / Fairseq-LM decoding + +Suppose the pronunciation lexicon and the n-gram LM are saved at +`/path/to/lexicon` and `/path/to/arpa`, respectively. Decoding results will be +saved at `/path/to/experiment/directory/decode/kenlm/test`. + +```sh +$ python examples/speech_recognition/new/infer.py \ + --config-dir /path/to/fairseq-py/examples/mr_hubert/config/decode \ + --config-name infer_lm \ + task.data=/path/to/data \ + task.normalize=[true|false] \ + decoding.exp_dir=/path/to/experiment/directory \ + common_eval.path=/path/to/checkpoint + dataset.gen_subset=test \ + decoding.decoder.lexicon=/path/to/lexicon \ + decoding.decoder.lmpath=/path/to/arpa +``` + +The command above uses the default decoding hyperparameter, which can be found +in `examples/speech_recognition/hydra/decoder.py`. These parameters can be +configured from the command line. For example, to search with a beam size of +500, we can append the command above with `decoding.decoder.beam=500`. +Important parameters include: +- decoding.decoder.beam +- decoding.decoder.beamthreshold +- decoding.decoder.lmweight +- decoding.decoder.wordscore +- decoding.decoder.silweight + +To decode with a Fairseq LM, you may check the usage examples in wav2vec2 or hubert examples. + +Please see sample decoding scripts `decode.sh` for an example script. diff --git a/fairseq/examples/mr_hubert/config/decode/infer.yaml b/fairseq/examples/mr_hubert/config/decode/infer.yaml new file mode 100644 index 0000000..eff3980 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/decode/infer.yaml @@ -0,0 +1,30 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/viterbi + sweep: + dir: ${common_eval.results_path} + subdir: viterbi + +task: + _name: multires_hubert_pretraining + single_target: true + fine_tuning: true + label_rate_ratios: ??? + data: ??? + normalize: false + +decoding: + type: viterbi + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/fairseq/examples/mr_hubert/config/decode/infer_lm.yaml b/fairseq/examples/mr_hubert/config/decode/infer_lm.yaml new file mode 100644 index 0000000..535b950 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/decode/infer_lm.yaml @@ -0,0 +1,37 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + sweep: + dir: ${common_eval.results_path} + subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + +task: + _name: multires_hubert_pretraining + single_target: true + fine_tuning: true + data: ??? + label_rate_ratios: ??? + normalize: ??? + +decoding: + type: kenlm + lexicon: ??? + lmpath: ??? + beamthreshold: 100 + beam: 500 + lmweight: 1.5 + wordscore: -1 + silweight: 0 + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm.yaml b/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm.yaml new file mode 100644 index 0000000..0b80658 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm.yaml @@ -0,0 +1,17 @@ +# @package _global_ +hydra: + launcher: + cpus_per_task: ${distributed_training.distributed_world_size} + gpus_per_node: ${distributed_training.distributed_world_size} + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 1 + mem_gb: 200 + timeout_min: 4320 + max_num_timeout: 50 + name: ${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/submitit + +distributed_training: + distributed_world_size: 1 + distributed_no_spawn: true + distributed_port: 29761 diff --git a/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm_8gpu.yaml b/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm_8gpu.yaml new file mode 100644 index 0000000..2f669f3 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/decode/run/submitit_slurm_8gpu.yaml @@ -0,0 +1,17 @@ +# @package _global_ +hydra: + launcher: + cpus_per_task: ${distributed_training.distributed_world_size} + gpus_per_node: ${distributed_training.distributed_world_size} + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 1 + mem_gb: 200 + timeout_min: 4320 + max_num_timeout: 50 + name: ${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/submitit + +distributed_training: + distributed_world_size: 8 + distributed_no_spawn: true + distributed_port: 29761 diff --git a/fairseq/examples/mr_hubert/config/finetune/base_100h.yaml b/fairseq/examples/mr_hubert/config/finetune/base_100h.yaml new file mode 100644 index 0000000..c52a118 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_100h.yaml @@ -0,0 +1,97 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 3200000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_100h + valid_subset: dev_other + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 80000 + lr: [3e-5] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/finetune/base_100h_large.yaml b/fairseq/examples/mr_hubert/config/finetune/base_100h_large.yaml new file mode 100644 index 0000000..1d0c0da --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_100h_large.yaml @@ -0,0 +1,97 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 1600000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_100h + valid_subset: dev_other + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 80000 + lr: [3e-5] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/finetune/base_10h.yaml b/fairseq/examples/mr_hubert/config/finetune/base_10h.yaml new file mode 100644 index 0000000..25123e4 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_10h.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 5 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 3200000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_10h + valid_subset: dev + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 25000 + lr: [2e-5] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/finetune/base_10h_large.yaml b/fairseq/examples/mr_hubert/config/finetune/base_10h_large.yaml new file mode 100644 index 0000000..65448c7 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_10h_large.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 5 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 3200000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_10h + valid_subset: dev + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 25000 + lr: [2e-5] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/finetune/base_1h.yaml b/fairseq/examples/mr_hubert/config/finetune/base_1h.yaml new file mode 100644 index 0000000..7459c3f --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_1h.yaml @@ -0,0 +1,100 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 50 + keep_interval_updates: 1 + save_interval_updates: 1000 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 3200000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 1000 + train_subset: train_1h + valid_subset: dev_other + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 13000 + lr: [5e-5] + sentence_avg: true + update_freq: [4] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/finetune/base_1h_large.yaml b/fairseq/examples/mr_hubert/config/finetune/base_1h_large.yaml new file mode 100644 index 0000000..34ef4dc --- /dev/null +++ b/fairseq/examples/mr_hubert/config/finetune/base_1h_large.yaml @@ -0,0 +1,99 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: multires_hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + label_rate_ratios: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + single_target: true + +dataset: + num_workers: 0 + max_tokens: 1280000 + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_10h + valid_subset: dev + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 25000 + lr: [3e-4] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: multires_hubert_ctc + multires_hubert_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.multires_hubert_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/pretrain/mrhubert_base_librispeech.yaml b/fairseq/examples/mr_hubert/config/pretrain/mrhubert_base_librispeech.yaml new file mode 100644 index 0000000..16a35d3 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/pretrain/mrhubert_base_librispeech.yaml @@ -0,0 +1,103 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + min_loss_scale: 1e-8 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: multires_hubert_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + label_rate_ratios: ??? + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + # max_keep_size: 300000 + # max_keep_size: 50000 + + +dataset: + num_workers: 0 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: multires_hubert + label_rate: ??? + label_rate_ratios: ${task.label_rate_ratios} + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layers: 4 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + conv_adapator_kernal: 1 + use_single_target: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '/' + exclude_keys: + - run + - task.data + - task.label_dir + - common.min_loss_scale + - common.log_interval + - optimization.clip_norm diff --git a/fairseq/examples/mr_hubert/config/pretrain/mrhubert_large_librilight.yaml b/fairseq/examples/mr_hubert/config/pretrain/mrhubert_large_librilight.yaml new file mode 100644 index 0000000..423f3b2 --- /dev/null +++ b/fairseq/examples/mr_hubert/config/pretrain/mrhubert_large_librilight.yaml @@ -0,0 +1,107 @@ +# @package _group_ + +common: + memory_efficient_fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 128 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: multires_hubert_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + label_rate_ratios: ??? + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + # max_keep_size: 50000 + +dataset: + num_workers: 0 + max_tokens: 300000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0015] + clip_norm: 1.0 + update_freq: [3] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: multires_hubert + label_rate: ??? + label_rate_ratios: ${task.label_rate_ratios} + encoder_layers: 8 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + final_dim: 768 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + conv_adapator_kernal: 1 + use_single_target: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + run: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + sweep: + dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/fairseq/examples/mr_hubert/config/pretrain/run/submitit_reg.yaml b/fairseq/examples/mr_hubert/config/pretrain/run/submitit_reg.yaml new file mode 100644 index 0000000..46c979c --- /dev/null +++ b/fairseq/examples/mr_hubert/config/pretrain/run/submitit_reg.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +hydra: + launcher: + cpus_per_task: 8 + gpus_per_node: 8 + tasks_per_node: ${hydra.launcher.gpus_per_node} + nodes: 4 + comment: null + mem_gb: 384 + timeout_min: 4320 + max_num_timeout: 100 + constraint: volta32gb + name: ${hydra.job.config_name}/${hydra.job.override_dirname} + submitit_folder: ${hydra.sweep.dir}/submitit/%j + +distributed_training: + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 diff --git a/fairseq/examples/mr_hubert/decode.sh b/fairseq/examples/mr_hubert/decode.sh new file mode 100644 index 0000000..1ff423a --- /dev/null +++ b/fairseq/examples/mr_hubert/decode.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +FAIRSEQ= # Setup your fairseq directory + +config_dir=${FAIRSEQ}/examples/mr_hubert/config +config_name=mr_hubert_base_librispeech + + +# Prepared Data Directory + +data_dir=librispeech +# -- data_dir +# -- test.tsv +# -- test.ltr +# -- dict.ltr.txt + + +exp_dir=exp # Target experiments directory (where you have your pre-trained model with checkpoint_best.pt) +ratios="[1, 2]" # Default label rate ratios + +_opts= + +# If use slurm, uncomment this line and modify the job submission at +# _opts="${_opts} hydra/launcher=submitit_slurm +hydra.launcher.partition=${your_slurm_partition} +run=submitit_reg" + +# If want to set additional experiment tag, uncomment this line +# _opts="${_opts} hydra.sweep.subdir=${your_experiment_tag}" + +# If use un-normalized audio, uncomment this line +# _opts="${_opts} task.normalize=false" + + + +PYTHONPATH=${FAIRSEQ} +python examples/speech_recognition/new/infer.py \ + --config-dir ${config_dir} \ + --config-name infer_multires \ + ${_opts} \ + task.data=${data_dir} \ + task.label_rate_ratios='${ratios}' \ + common_eval.results_path=${exp_dir} \ + common_eval.path=${exp_dir}/checkpoint_best.pt \ + dataset.max_tokens=2000000 \ + dataset.gen_subset=test \ + dataset.skip_invalid_size_inputs_valid_test=true + diff --git a/fairseq/examples/mr_hubert/finetune.sh b/fairseq/examples/mr_hubert/finetune.sh new file mode 100644 index 0000000..31ba645 --- /dev/null +++ b/fairseq/examples/mr_hubert/finetune.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +FAIRSEQ= # Setup your fairseq directory + +config_dir=${FAIRSEQ}/examples/mr_hubert/config +config_name=mr_hubert_base_librispeech + +# override configs if need +max_tokens=3200000 +max_sample_size=1000000 +max_update=50000 + + +# Prepared Data Directory + +data_dir=librispeech +# -- data_dir +# -- train.tsv +# -- train.ltr +# -- valid.tsv +# -- valid.ltr +# -- dict.ltr.txt + + +exp_dir=exp # Target experiments directory +ratios="[1, 2]" # Default label rate ratios +hubert_path=/path/of/your/hubert.pt + +_opts= + +# If use slurm, uncomment this line and modify the job submission at +# _opts="${_opts} hydra/launcher=submitit_slurm +hydra.launcher.partition=${your_slurm_partition} +run=submitit_reg" + +# If want to set additional experiment tag, uncomment this line +# _opts="${_opts} hydra.sweep.subdir=${your_experiment_tag}" + + +python ${FAIRSEQ}/fairseq_cli/hydra_train.py \ + -m --config-dir ${config_dir} --config-name ${config_name} ${_opts} \ + task.data=${data_dir} +task.max_sample_size=${max_sample_size} \ + task.label_dir=${data_dir} \ + task.label_rate_ratios='${ratios}' \ + dataset.max_tokens=${max_tokens} \ + optimization.max_update=${max_update} \ + model.multires_hubert_path=${hubert_path} \ + hydra.sweep.dir=${exp_dir} & diff --git a/fairseq/examples/mr_hubert/simple_kmeans b/fairseq/examples/mr_hubert/simple_kmeans new file mode 100644 index 0000000..4f95545 --- /dev/null +++ b/fairseq/examples/mr_hubert/simple_kmeans @@ -0,0 +1 @@ +../hubert/simple_kmeans \ No newline at end of file diff --git a/fairseq/examples/mr_hubert/train.sh b/fairseq/examples/mr_hubert/train.sh new file mode 100644 index 0000000..da561eb --- /dev/null +++ b/fairseq/examples/mr_hubert/train.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +FAIRSEQ= # Setup your fairseq directory + +config_dir=${FAIRSEQ}/examples/mr_hubert/config +config_name=mr_hubert_base_librispeech + +# Prepared Data Directory +data_dir=librispeech +# -- data_dir +# -- train.tsv +# -- valid.tsv + +label_dir=labels +# -- label_dir +# -- train.km +# -- valid.km +# -- dict.km.txt + + +exp_dir=exp # Target experiments directory +ratios="[1, 2]" # Default label rate ratios +label_rate=50 # Base label rate + + +_opts= + +# If use slurm, uncomment this line and modify the job submission at +# _opts="${_opts} hydra/launcher=submitit_slurm +hydra.launcher.partition=${your_slurm_partition} +run=submitit_reg" + +# If want to set additional experiment tag, uncomment this line +# _opts="${_opts} hydra.sweep.subdir=${your_experiment_tag}" + + +python ${FAIRSEQ}/fairseq_cli/hydra_train.py \ + -m --config-dir ${config_dir} --config-name ${config_name} ${_opts} \ + task.data=${data_dir} \ + task.label_dir=${label_dir} \ + task.labels='["km"]' \ + model.label_rate=${label_rate} \ + task.label_rate_ratios='${ratios}' \ + hydra.sweep.dir=${exp_dir} & + + + diff --git a/fairseq/examples/multilingual/ML50_langs.txt b/fairseq/examples/multilingual/ML50_langs.txt new file mode 100644 index 0000000..558abbc --- /dev/null +++ b/fairseq/examples/multilingual/ML50_langs.txt @@ -0,0 +1,52 @@ +ar_AR +cs_CZ +de_DE +en_XX +es_XX +et_EE +fi_FI +fr_XX +gu_IN +hi_IN +it_IT +ja_XX +kk_KZ +ko_KR +lt_LT +lv_LV +my_MM +ne_NP +nl_XX +ro_RO +ru_RU +si_LK +tr_TR +vi_VN +zh_CN +af_ZA +az_AZ +bn_IN +fa_IR +he_IL +hr_HR +id_ID +ka_GE +km_KH +mk_MK +ml_IN +mn_MN +mr_IN +pl_PL +ps_AF +pt_XX +sv_SE +sw_KE +ta_IN +te_IN +th_TH +tl_XX +uk_UA +ur_PK +xh_ZA +gl_ES +sl_SI \ No newline at end of file diff --git a/fairseq/examples/multilingual/README.md b/fairseq/examples/multilingual/README.md new file mode 100644 index 0000000..46ff9c3 --- /dev/null +++ b/fairseq/examples/multilingual/README.md @@ -0,0 +1,158 @@ +# Multilingual Translation + +[[Multilingual Translation with Extensible Multilingual Pretraining and Finetuning, https://arxiv.org/abs/2008.00401]](https://arxiv.org/abs/2008.00401) + +## Introduction + +This work is for training multilingual translation models with multiple bitext datasets. This multilingual translation framework supports (see [[training section]](#Training) and [[finetuning section]](#Finetuning) for examples) + +* temperature based sampling over unbalancing datasets of different translation directions + - --sampling-method' with + choices=['uniform', 'temperature', 'concat'] + - --sampling-temperature +* configurable to automatically add source and/or target language tokens to source/target sentences using data which are prepared in the same way as bilignual training + - --encoder-langtok with choices=['src', 'tgt', None] to specify whether to add source or target language tokens to the source sentences + - --decoder-langtok (binary option) to specify whether to add target language tokens to the target sentences or not +* finetuning mBART pretrained models for multilingual translation + - --finetune-from-model to specify the path from which to load the pretrained model + +## Preprocessing data +Multilingual training requires a joint BPE vocab. Please follow [mBART's preprocessing steps](https://github.com/pytorch/fairseq/tree/main/examples/mbart#bpe-data) to reuse our pretrained sentence-piece model. + +You can also train a joint BPE model on your own dataset and then follow the steps in [[link]](https://github.com/pytorch/fairseq/tree/main/examples/translation#multilingual-translation). + +## Training + + +```bash +lang_pairs= +path_2_data= +lang_list= + +fairseq-train $path_2_data \ + --encoder-normalize-before --decoder-normalize-before \ + --arch transformer --layernorm-embedding \ + --task translation_multi_simple_epoch \ + --sampling-method "temperature" \ + --sampling-temperature 1.5 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 1024 --update-freq 2 \ + --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \ + --seed 222 --log-format simple --log-interval 2 +``` + +## Finetuning +We can also finetune multilingual models from a monolingual pretrained models, e.g. [mMBART](https://github.com/pytorch/fairseq/tree/main/examples/mbart). +```bash +lang_pairs= +path_2_data= +lang_list= +pretrained_model= + +fairseq-train $path_2_data \ + --finetune-from-model $pretrained_model \ + --encoder-normalize-before --decoder-normalize-before \ + --arch transformer --layernorm-embedding \ + --task translation_multi_simple_epoch \ + --sampling-method "temperature" \ + --sampling-temperature 1.5 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 1024 --update-freq 2 \ + --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \ + --seed 222 --log-format simple --log-interval 2 +``` +## Generate +The following command uses the multilingual task (translation_multi_simple_epoch) to generate translation from $source_lang to $target_lang on the test dataset. During generaton, the source language tokens are added to source sentences and the target language tokens are added as the starting token to decode target sentences. Options --lang-dict and --lang-pairs are needed to tell the generation process the ordered list of languages and translation directions that the trained model are awared of; they will need to be consistent with the training. + +```bash +model= +source_lang= +target_lang= + +fairseq-generate $path_2_data \ + --path $model \ + --task translation_multi_simple_epoch \ + --gen-subset test \ + --source-lang $source_lang \ + --target-lang $target_lang + --sacrebleu --remove-bpe 'sentencepiece'\ + --batch-size 32 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" > ${source_lang}_${target_lang}.txt +``` +Fairseq will generate translation into a file {source_lang}_${target_lang}.txt with sacreblue at the end. + +You can also use costomized tokenizer to compare the performance with the literature. For example, you get a tokenizer [here](https://github.com/rsennrich/wmt16-scripts) and do the following: +```bash +TOKENIZER= +TOK_CMD=<"$TOKENIZER $target_lang" or cat for sacrebleu> + +cat {source_lang}_${target_lang}.txt | grep -P "^H" |sort -V |cut -f 3- |$TOK_CMD > ${source_lang}_${target_lang}.hyp +cat {source_lang}_${target_lang}.txt | grep -P "^T" |sort -V |cut -f 2- |$TOK_CMD > ${source_lang}_${target_lang}.ref +sacrebleu -tok 'none' -s 'none' ${source_lang}_${target_lang}.ref < ${source_lang}_${target_lang}.hyp +``` + +# mBART50 models + +* [mMBART 50 pretrained model](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.pretrained.tar.gz). +* [mMBART 50 finetuned many-to-one](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.n1.tar.gz). +* [mMBART 50 finetuned one-to-many](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.1n.tar.gz). +* [mMBART 50 finetuned many-to-many](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.nn.tar.gz). + +Please download and extract from the above tarballs. Each tarball contains +* The fairseq model checkpoint: model.pt +* The list of supported languages: ML50_langs.txt +* Sentence piece model: sentence.bpe.model +* Fairseq dictionary of each language: dict.{lang}.txt (please replace lang with a language specified in ML50_langs.txt) + +To use the trained models, +* use the tool [binarize.py](./data_scripts/binarize.py) to binarize your data using sentence.bpe.model and dict.{lang}.txt, and copy the dictionaries to your data path +* then run the generation command: +```bash +path_2_data= +model=/model.pt +lang_list=/ML50_langs.txt +source_lang= +target_lang= + +fairseq-generate $path_2_data \ + --path $model \ + --task translation_multi_simple_epoch \ + --gen-subset test \ + --source-lang $source_lang \ + --target-lang $target_lang + --sacrebleu --remove-bpe 'sentencepiece'\ + --batch-size 32 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" +``` + +## Citation + +```bibtex +@article{tang2020multilingual, + title={Multilingual Translation with Extensible Multilingual Pretraining and Finetuning}, + author={Yuqing Tang and Chau Tran and Xian Li and Peng-Jen Chen and Naman Goyal and Vishrav Chaudhary and Jiatao Gu and Angela Fan}, + year={2020}, + eprint={2008.00401}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` diff --git a/fairseq/examples/multilingual/data_scripts/README.md b/fairseq/examples/multilingual/data_scripts/README.md new file mode 100644 index 0000000..cc610c0 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/README.md @@ -0,0 +1,24 @@ + +# Install dependency +```bash +pip install -r requirement.txt +``` + +# Download the data set +```bash +export WORKDIR_ROOT= + +``` +The downloaded data will be at $WORKDIR_ROOT/ML50 + +# preprocess the data +Install SPM [here](https://github.com/google/sentencepiece) +```bash +export WORKDIR_ROOT= +export SPM_PATH= +``` +* $WORKDIR_ROOT/ML50/raw: extracted raw data +* $WORKDIR_ROOT/ML50/dedup: dedup data +* $WORKDIR_ROOT/ML50/clean: data with valid and test sentences removed from the dedup data + + diff --git a/fairseq/examples/multilingual/data_scripts/binarize.py b/fairseq/examples/multilingual/data_scripts/binarize.py new file mode 100644 index 0000000..ee54c6a --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/binarize.py @@ -0,0 +1,200 @@ +import shutil +import os, sys +from subprocess import check_call, check_output +import glob +import argparse +import shutil +import pathlib +import itertools + +def call_output(cmd): + print(f"Executing: {cmd}") + ret = check_output(cmd, shell=True) + print(ret) + return ret + +def call(cmd): + print(cmd) + check_call(cmd, shell=True) + + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + +SPM_PATH = os.environ.get('SPM_PATH', None) + +if SPM_PATH is None or not SPM_PATH.strip(): + print("Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting...") + sys.exit(-1) + + +SPM_MODEL = f'{WORKDIR_ROOT}/sentence.bpe.model' +SPM_VOCAB = f'{WORKDIR_ROOT}/dict_250k.txt' + +SPM_ENCODE = f'{SPM_PATH}' + +if not os.path.exists(SPM_MODEL): + call(f"wget https://dl.fbaipublicfiles.com/fairseq/models/mbart50/sentence.bpe.model -O {SPM_MODEL}") + + +if not os.path.exists(SPM_VOCAB): + call(f"wget https://dl.fbaipublicfiles.com/fairseq/models/mbart50/dict_250k.txt -O {SPM_VOCAB}") + + + +def get_data_size(raw): + cmd = f'wc -l {raw}' + ret = call_output(cmd) + return int(ret.split()[0]) + +def encode_spm(model, direction, prefix='', splits=['train', 'test', 'valid'], pairs_per_shard=None): + src, tgt = direction.split('-') + + for split in splits: + src_raw, tgt_raw = f'{RAW_DIR}/{split}{prefix}.{direction}.{src}', f'{RAW_DIR}/{split}{prefix}.{direction}.{tgt}' + if os.path.exists(src_raw) and os.path.exists(tgt_raw): + cmd = f"""python {SPM_ENCODE} \ + --model {model}\ + --output_format=piece \ + --inputs {src_raw} {tgt_raw} \ + --outputs {BPE_DIR}/{direction}{prefix}/{split}.bpe.{src} {BPE_DIR}/{direction}{prefix}/{split}.bpe.{tgt} """ + print(cmd) + call(cmd) + + +def binarize_( + bpe_dir, + databin_dir, + direction, spm_vocab=SPM_VOCAB, + splits=['train', 'test', 'valid'], +): + src, tgt = direction.split('-') + + try: + shutil.rmtree(f'{databin_dir}', ignore_errors=True) + os.mkdir(f'{databin_dir}') + except OSError as error: + print(error) + cmds = [ + "fairseq-preprocess", + f"--source-lang {src} --target-lang {tgt}", + f"--destdir {databin_dir}/", + f"--workers 8", + ] + if isinstance(spm_vocab, tuple): + src_vocab, tgt_vocab = spm_vocab + cmds.extend( + [ + f"--srcdict {src_vocab}", + f"--tgtdict {tgt_vocab}", + ] + ) + else: + cmds.extend( + [ + f"--joined-dictionary", + f"--srcdict {spm_vocab}", + ] + ) + input_options = [] + if 'train' in splits and glob.glob(f"{bpe_dir}/train.bpe*"): + input_options.append( + f"--trainpref {bpe_dir}/train.bpe", + ) + if 'valid' in splits and glob.glob(f"{bpe_dir}/valid.bpe*"): + input_options.append(f"--validpref {bpe_dir}/valid.bpe") + if 'test' in splits and glob.glob(f"{bpe_dir}/test.bpe*"): + input_options.append(f"--testpref {bpe_dir}/test.bpe") + if len(input_options) > 0: + cmd = " ".join(cmds + input_options) + print(cmd) + call(cmd) + + +def binarize( + databin_dir, + direction, spm_vocab=SPM_VOCAB, prefix='', + splits=['train', 'test', 'valid'], + pairs_per_shard=None, +): + def move_databin_files(from_folder, to_folder): + for bin_file in glob.glob(f"{from_folder}/*.bin") \ + + glob.glob(f"{from_folder}/*.idx") \ + + glob.glob(f"{from_folder}/dict*"): + try: + shutil.move(bin_file, to_folder) + except OSError as error: + print(error) + bpe_databin_dir = f"{BPE_DIR}/{direction}{prefix}_databin" + bpe_dir = f"{BPE_DIR}/{direction}{prefix}" + if pairs_per_shard is None: + binarize_(bpe_dir, bpe_databin_dir, direction, spm_vocab=spm_vocab, splits=splits) + move_databin_files(bpe_databin_dir, databin_dir) + else: + # binarize valid and test which will not be sharded + binarize_( + bpe_dir, bpe_databin_dir, direction, + spm_vocab=spm_vocab, splits=[s for s in splits if s != "train"]) + for shard_bpe_dir in glob.glob(f"{bpe_dir}/shard*"): + path_strs = os.path.split(shard_bpe_dir) + shard_str = path_strs[-1] + shard_folder = f"{bpe_databin_dir}/{shard_str}" + databin_shard_folder = f"{databin_dir}/{shard_str}" + print(f'working from {shard_folder} to {databin_shard_folder}') + os.makedirs(databin_shard_folder, exist_ok=True) + binarize_( + shard_bpe_dir, shard_folder, direction, + spm_vocab=spm_vocab, splits=["train"]) + + for test_data in glob.glob(f"{bpe_databin_dir}/valid.*") + glob.glob(f"{bpe_databin_dir}/test.*"): + filename = os.path.split(test_data)[-1] + try: + os.symlink(test_data, f"{databin_shard_folder}/{filename}") + except OSError as error: + print(error) + move_databin_files(shard_folder, databin_shard_folder) + + +def load_langs(path): + with open(path) as fr: + langs = [l.strip() for l in fr] + return langs + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--data_root", default=f"{WORKDIR_ROOT}/ML50") + parser.add_argument("--raw-folder", default='raw') + parser.add_argument("--bpe-folder", default='bpe') + parser.add_argument("--databin-folder", default='databin') + + args = parser.parse_args() + + DATA_PATH = args.data_root #'/private/home/yuqtang/public_data/ML50' + RAW_DIR = f'{DATA_PATH}/{args.raw_folder}' + BPE_DIR = f'{DATA_PATH}/{args.bpe_folder}' + DATABIN_DIR = f'{DATA_PATH}/{args.databin_folder}' + os.makedirs(BPE_DIR, exist_ok=True) + + raw_files = itertools.chain( + glob.glob(f'{RAW_DIR}/train*'), + glob.glob(f'{RAW_DIR}/valid*'), + glob.glob(f'{RAW_DIR}/test*'), + ) + + directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files] + + for direction in directions: + prefix = "" + splits = ['train', 'valid', 'test'] + try: + shutil.rmtree(f'{BPE_DIR}/{direction}{prefix}', ignore_errors=True) + os.mkdir(f'{BPE_DIR}/{direction}{prefix}') + os.makedirs(DATABIN_DIR, exist_ok=True) + except OSError as error: + print(error) + spm_model, spm_vocab = SPM_MODEL, SPM_VOCAB + encode_spm(spm_model, direction=direction, splits=splits) + binarize(DATABIN_DIR, direction, spm_vocab=spm_vocab, splits=splits) diff --git a/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py b/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py new file mode 100644 index 0000000..f8e2eb0 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os, sys +import subprocess +import re +from subprocess import check_call, check_output + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + + +BLEU_REGEX = re.compile("^BLEU\\S* = (\\S+) ") +def run_eval_bleu(cmd): + output = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode("utf-8").strip() + print(output) + bleu = -1.0 + for line in output.strip().split('\n'): + m = BLEU_REGEX.search(line) + if m is not None: + bleu = m.groups()[0] + bleu = float(bleu) + break + return bleu + +def check_data_test_bleu(raw_folder, data_lang_pairs): + not_matchings = [] + for sacrebleu_set, src_tgts in data_lang_pairs: + for src_tgt in src_tgts: + print(f'checking test bleus for: {src_tgt} at {sacrebleu_set}') + src, tgt = src_tgt.split('-') + ssrc, stgt = src[:2], tgt[:2] + if os.path.exists(f'{raw_folder}/test.{tgt}-{src}.{src}'): + # reversed direction may have different test set + test_src = f'{raw_folder}/test.{tgt}-{src}.{src}' + else: + test_src = f'{raw_folder}/test.{src}-{tgt}.{src}' + cmd1 = f'cat {test_src} | sacrebleu -t "{sacrebleu_set}" -l {stgt}-{ssrc}; [ $? -eq 0 ] || echo ""' + test_tgt = f'{raw_folder}/test.{src}-{tgt}.{tgt}' + cmd2 = f'cat {test_tgt} | sacrebleu -t "{sacrebleu_set}" -l {ssrc}-{stgt}; [ $? -eq 0 ] || echo ""' + bleu1 = run_eval_bleu(cmd1) + if bleu1 != 100.0: + not_matchings.append(f'{sacrebleu_set}:{src_tgt} source side not matching: {test_src}') + bleu2 = run_eval_bleu(cmd2) + if bleu2 != 100.0: + not_matchings.append(f'{sacrebleu_set}:{src_tgt} target side not matching: {test_tgt}') + return not_matchings + +if __name__ == "__main__": + to_data_path = f'{WORKDIR_ROOT}/iwsltv2' + not_matching = check_data_test_bleu( + f'{to_data_path}/raw', + [ + ('iwslt17', ['en_XX-ar_AR', 'en_XX-ko_KR', 'ar_AR-en_XX', 'ko_KR-en_XX']), + ('iwslt17', ['en_XX-it_IT', 'en_XX-nl_XX', 'it_IT-en_XX', 'nl_XX-en_XX']), + ('iwslt17/tst2015', ['en_XX-vi_VN', "vi_VN-en_XX"]), + ] + ) + if len(not_matching) > 0: + print('the following datasets do not have matching test datasets:\n\t', '\n\t'.join(not_matching)) + diff --git a/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py b/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py new file mode 100644 index 0000000..07b338d --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py @@ -0,0 +1,103 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import glob +import argparse +from utils.dedup import deup +import sys + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + +def get_directions(folder): + raw_files = glob.glob(f'{folder}/train*') + directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files] + return directions + +def diff_list(lhs, rhs): + return set(lhs).difference(set(rhs)) + +def check_diff( + from_src_file, from_tgt_file, + to_src_file, to_tgt_file, +): + seen_in_from = set() + seen_src_in_from = set() + seen_tgt_in_from = set() + from_count = 0 + with open(from_src_file, encoding='utf-8') as fsrc, \ + open(from_tgt_file, encoding='utf-8') as ftgt: + for s, t in zip(fsrc, ftgt): + seen_in_from.add((s, t)) + seen_src_in_from.add(s) + seen_tgt_in_from.add(t) + from_count += 1 + common = 0 + common_src = 0 + common_tgt = 0 + to_count = 0 + seen = set() + + with open(to_src_file, encoding='utf-8') as fsrc, \ + open(to_tgt_file, encoding='utf-8') as ftgt: + for s, t in zip(fsrc, ftgt): + to_count += 1 + if (s, t) not in seen: + if (s, t) in seen_in_from: + common += 1 + if s in seen_src_in_from: + common_src += 1 + seen_src_in_from.remove(s) + if t in seen_tgt_in_from: + common_tgt += 1 + seen_tgt_in_from.remove(t) + seen.add((s, t)) + return common, common_src, common_tgt, from_count, to_count + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--folder", type=str, required=True, + help="the data folder ") + parser.add_argument("--split", type=str, default='test', + help="split (valid, test) to check against training data") + parser.add_argument('--directions', type=str, default=None, required=False) + + args = parser.parse_args() + + if args.directions is None: + directions = set(get_directions(args.folder)) + directions = sorted(directions) + else: + directions = args.directions.split(',') + directions = sorted(set(directions)) + + results = [] + print(f'checking where {args.split} split data are in training') + print(f'direction\tcommon_count\tsrc common\ttgt common\tfrom_size\tto_size') + + for direction in directions: + src, tgt = direction.split('-') + from_src_file = f'{args.folder}/{args.split}.{src}-{tgt}.{src}' + from_tgt_file = f'{args.folder}/{args.split}.{src}-{tgt}.{tgt}' + if not os.path.exists(from_src_file): + # some test/valid data might in reverse directinos: + from_src_file = f'{args.folder}/{args.split}.{tgt}-{src}.{src}' + from_tgt_file = f'{args.folder}/{args.split}.{tgt}-{src}.{tgt}' + to_src_file = f'{args.folder}/train.{src}-{tgt}.{src}' + to_tgt_file = f'{args.folder}/train.{src}-{tgt}.{tgt}' + if not os.path.exists(to_src_file) or not os.path.exists(from_src_file): + continue + r = check_diff(from_src_file, from_tgt_file, to_src_file, to_tgt_file) + results.append(r) + print(f'{direction}\t', '\t'.join(map(str, r))) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py b/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py new file mode 100644 index 0000000..40fa9ae --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py @@ -0,0 +1,124 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import argparse +import pandas as pd +import sys + + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + +def load_langs(path): + with open(path) as fr: + langs = [l.strip() for l in fr] + return langs + + + +def load_sentences(raw_data, split, direction): + src, tgt = direction.split('-') + src_path = f"{raw_data}/{split}.{direction}.{src}" + tgt_path = f"{raw_data}/{split}.{direction}.{tgt}" + if os.path.exists(src_path) and os.path.exists(tgt_path): + return [(src, open(src_path).read().splitlines()), (tgt, open(tgt_path).read().splitlines())] + else: + return [] + +def swap_direction(d): + src, tgt = d.split('-') + return f'{tgt}-{src}' + +def get_all_test_data(raw_data, directions, split='test'): + test_data = [ + x + for dd in directions + for d in [dd, swap_direction(dd)] + for x in load_sentences(raw_data, split, d) + ] + # all_test_data = {s for _, d in test_data for s in d} + all_test_data = {} + for lang, d in test_data: + for s in d: + s = s.strip() + lgs = all_test_data.get(s, set()) + lgs.add(lang) + all_test_data[s] = lgs + return all_test_data, test_data + + +def check_train_sentences(src_path, tgt_path, direction, all_test_data, mess_up_train={}): + # src, tgt = direction.split('-') + print(f'check training data for {direction} in {src_path} and {tgt_path}') + size = 0 + overlapped_size_counted_dup = 0 + if not os.path.exists(tgt_path) or not os.path.exists(src_path): + return mess_up_train, size, overlapped_size_counted_dup + + with open(src_path) as f, open(tgt_path) as g: + for src_line, tgt_line in zip(f, g): + s = src_line.strip() + t = tgt_line.strip() + size += 1 + if s in all_test_data: + langs = mess_up_train.get(s, set()) + langs.add(direction) + mess_up_train[s] = langs + overlapped_size_counted_dup += 1 + if t in all_test_data: + langs = mess_up_train.get(t, set()) + langs.add(direction) + mess_up_train[t] = langs + overlapped_size_counted_dup += 1 + print(f'{direction}: size={size}, overlapped={overlapped_size_counted_dup}') + return mess_up_train, size, overlapped_size_counted_dup + +def check_train_all(raw_data, directions, all_test_data): + mess_up_train = {} + data_sizes = {} + # raw_data = '~chau/data-bin/MineBART/multilingual_mined_100M/en_XX/et_EE-en_XX/all.{en_XX, et_EE}' + print(f'checking training data againsts # {len(all_test_data)} sentences') + print(f'example test data: ', [s for i, s in enumerate(all_test_data.keys()) if i < 10]) + for direction in directions: + src, tgt = direction.split('-') + path = f'{raw_data}/en_XX/{direction}/all' + src_path = f'{path}.{src}' + tgt_path = f'{path}.{tgt}' + print(f'checking {src_path} {tgt_path}') + _, size, overlapped_size_counted_dup = check_train_sentences(src_path, tgt_path, direction, all_test_data, mess_up_train) + data_sizes[direction] = (size, overlapped_size_counted_dup) + return mess_up_train, data_sizes + + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--folder", type=str, required=True, + help="the data folder ") + parser.add_argument("--test-data", type=str, required=True, + help="the test data folder ") + parser.add_argument('--directions', type=str, default=None, required=False) + + args = parser.parse_args() + directions = args.directions.split(',') + directions = sorted(set(directions)) + + results = [] + # print(f'checking where {args.split} split data are in training') + # print(f'direction\tcommon_count\tsrc common\ttgt common\tfrom_size\tto_size') + raw_data = args.folder + all_test_data, test_data = get_all_test_data(args.test_data, directions, split='test') + mess_up_train, data_sizes = check_train_all(raw_data, directions, all_test_data) + print(data_sizes) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/multilingual/data_scripts/dedup_all.py b/fairseq/examples/multilingual/data_scripts/dedup_all.py new file mode 100644 index 0000000..ef39c05 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/dedup_all.py @@ -0,0 +1,52 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + + +import os +import glob +import argparse +from utils.dedup import deup + +import sys +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--from-folder", type=str, required=True, + help="the data folder to be dedup") + parser.add_argument("--to-folder", type=str, required=True, + help="the data folder to save deduped data") + parser.add_argument('--directions', type=str, default=None, required=False) + + args = parser.parse_args() + + if args.directions is None: + raw_files = glob.glob(f'{args.from_folder}/train*') + + directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files] + else: + directions = args.directions.split(',') + directions = sorted(set(directions)) + + for direction in directions: + src, tgt = direction.split('-') + src_file = f'{args.from_folder}/train.{src}-{tgt}.{src}' + tgt_file = f'{args.from_folder}/train.{src}-{tgt}.{tgt}' + src_file_out = f'{args.to_folder}/train.{src}-{tgt}.{src}' + tgt_file_out = f'{args.to_folder}/train.{src}-{tgt}.{tgt}' + assert src_file != src_file_out + assert tgt_file != tgt_file_out + print(f'deduping {src_file}, {tgt_file}') + deup(src_file, tgt_file, src_file_out, tgt_file_out) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh b/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh new file mode 100644 index 0000000..99fbc75 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + +# first run download_wmt20.sh; it will install a few useful tools for other scripts +# TODO: need to print out instructions on downloading a few files which requires manually authentication from the websites +bash ./download_wmt20.sh + +python ./download_wmt19_and_before.py +bash ./download_wat19_my.sh +python ./download_ted_and_extract.py +bash ./download_lotus.sh +bash ./download_iitb.sh +bash ./download_af_xh.sh + + +# IWSLT downloading URLs have changed in between; TODO: fix them: +bash ./download_iwslt_and_extract.sh + +# TODO: globalvoices URLs changed; need to be fixed +bash ./download_flores_data.sh diff --git a/fairseq/examples/multilingual/data_scripts/download_af_xh.sh b/fairseq/examples/multilingual/data_scripts/download_af_xh.sh new file mode 100644 index 0000000..a78fbbb --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_af_xh.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# set -x -e + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + +# put intermediate files +TMP_DIR=$WORKDIR_ROOT/temp/af_xhv2 +# output {train,valid,test} files to dest +DEST=${WORKDIR_ROOT}/ML50/raw + + + +ROOT=${WORKDIR_ROOT} +UTILS=$PWD/utils +TMX2CORPUS="${UTILS}/tmx2corpus" +TMX_TOOL="python ${TMX2CORPUS}/tmx2corpus.py" + +mkdir -p $TMP_DIR +mkdir -p $DEST +mkdir -p $UTILS + +function download_opus(){ + src=$1 + tgt=$2 + subset=$3 + ulr=$4 + + mkdir extract_$subset.$src-$tgt + pushd extract_$subset.$src-$tgt + if [ ! -f "$subset.$src-$tgt.tmx.gz" ]; then + wget $url -O "$subset.$src-$tgt.tmx.gz" + gzip -d "$subset.$src-$tgt.tmx.gz" + f=$subset.$src-$tgt.tmx + $TMX_TOOL $f + mv bitext.$src ../$subset.$src-$tgt.$src + mv bitext.$tgt ../$subset.$src-$tgt.$tgt + fi + popd +} + +function concat_subsets(){ + src=$1 + tgt=$2 + subsets=$3 + src_train=raw_train.$src-$tgt.$src + tgt_train=raw_train.$src-$tgt.$tgt + > $src_train + > $tgt_train + for subset in $subsets; do + cat $subset.$src-$tgt.$src >> $src_train + cat $subset.$src-$tgt.$tgt >> $tgt_train + done +} + + + +function get_seeded_random() +{ + seed="$1" + openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \ + /dev/null +} + +function split_train_valid(){ + src=$1 + tgt=$2 + raw_src_train=raw_train.$src-$tgt.$src + raw_tgt_train=raw_train.$src-$tgt.$tgt + + shuf --random-source=<(get_seeded_random 43) $raw_src_train > shuffled.$src-$tgt.$src + shuf --random-source=<(get_seeded_random 43) $raw_tgt_train > shuffled.$src-$tgt.$tgt + + head -n 1500 shuffled.$src-$tgt.$src > valid.$src-$tgt.$src + head -n 1500 shuffled.$src-$tgt.$tgt > valid.$src-$tgt.$tgt + + tail +1501 shuffled.$src-$tgt.$src > train.$src-$tgt.$src + tail +1501 shuffled.$src-$tgt.$tgt > train.$src-$tgt.$tgt +} + +function copy2dst(){ + lsrc=$1 + ltgt=$2 + src=${lsrc:0:2} + tgt=${ltgt:0:2} + + + cp valid.$src-$tgt.$src $DEST/valid.$lsrc-$ltgt.$lsrc + cp valid.$src-$tgt.$tgt $DEST/valid.$lsrc-$ltgt.$ltgt + + cp train.$src-$tgt.$src $DEST/train.$lsrc-$ltgt.$lsrc + cp train.$src-$tgt.$tgt $DEST/train.$lsrc-$ltgt.$ltgt +} + + + + +#for xh-en +declare -A xh_en_urls +xh_en_urls=( + [Tatoeba]=https://object.pouta.csc.fi/OPUS-Tatoeba/v20190709/tmx/en-xh.tmx.gz + [wikimedia]=https://object.pouta.csc.fi/OPUS-wikimedia/v20190628/tmx/en-xh.tmx.gz + [memat]=https://object.pouta.csc.fi/OPUS-memat/v1/tmx/en-xh.tmx.gz + [uedin]=https://object.pouta.csc.fi/OPUS-bible-uedin/v1/tmx/en-xh.tmx.gz + [GNOME]=https://object.pouta.csc.fi/OPUS-GNOME/v1/tmx/en-xh.tmx.gz + [XhosaNavy]=https://object.pouta.csc.fi/OPUS-XhosaNavy/v1/tmx/en-xh.tmx.gz + [KDE4]=https://object.pouta.csc.fi/OPUS-KDE4/v2/tmx/en-xh.tmx.gz + [Ubuntu]=https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/tmx/en-xh.tmx.gz +) + +mkdir $TMP_DIR/xh-en +pushd $TMP_DIR/xh-en +for k in "${!xh_en_urls[@]}" +do + name=$k + url=${xh_en_urls[$k]} + echo "$name: $url" + download_opus xh en $name $ulr +done +concat_subsets xh en "${!xh_en_urls[@]}" +split_train_valid xh en +copy2dst xh_ZA en_XX +popd + + +## +#for af-en +declare -A af_en_urls +af_en_urls=( + [Tatoeba]=https://object.pouta.csc.fi/OPUS-Tatoeba/v20190709/tmx/af-en.tmx.gz + [uedin]=https://object.pouta.csc.fi/OPUS-bible-uedin/v1/tmx/af-en.tmx.gz + [GNOME]=https://object.pouta.csc.fi/OPUS-GNOME/v1/tmx/af-en.tmx.gz + [QED]=https://object.pouta.csc.fi/OPUS-QED/v2.0a/tmx/af-en.tmx.gz + [KDE4]=https://object.pouta.csc.fi/OPUS-KDE4/v2/tmx/af-en.tmx.gz + [OpenSubtitles]=https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/tmx/af-en.tmx.gz + [SPC]=https://object.pouta.csc.fi/OPUS-SPC/v1/tmx/af-en.tmx.gz + [Ubuntu]=https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/tmx/af-en.tmx.gz +) + +mkdir $TMP_DIR/af-en +pushd $TMP_DIR/af-en +for k in "${!af_en_urls[@]}" +do + name=$k + url=${af_en_urls[$k]} + echo "$name: $url" + download_opus af en $name $ulr +done +concat_subsets af en "${!af_en_urls[@]}" +split_train_valid af en +copy2dst af_ZA en_XX +popd + + diff --git a/fairseq/examples/multilingual/data_scripts/download_flores_data.sh b/fairseq/examples/multilingual/data_scripts/download_flores_data.sh new file mode 100644 index 0000000..e6175ce --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_flores_data.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + +set -e +set -o pipefail + +SRC=en +SI_TGT=si +NE_TGT=ne + +DESTDIR=${WORKDIR_ROOT}/ML50/raw/ + +ROOT=${WORKDIR_ROOT}/tmp +mkdir -p $ROOT +DATA=$ROOT/data +NE_ROOT=$DATA/all-clean-ne +SI_ROOT=$DATA/all-clean-si + +mkdir -p $DATA $NE_ROOT $SI_ROOT + +SI_OPUS_DATASETS=( + "$SI_ROOT/GNOME.en-si" + "$SI_ROOT/Ubuntu.en-si" + "$SI_ROOT/KDE4.en-si" + "$SI_ROOT/OpenSubtitles.en-si" +) + +SI_OPUS_URLS=( + "https://object.pouta.csc.fi/OPUS-GNOME/v1/moses/en-si.txt.zip" + "https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/moses/en-si.txt.zip" + "https://object.pouta.csc.fi/OPUS-KDE4/v2/moses/en-si.txt.zip" + "https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/moses/en-si.txt.zip" +) + +NE_OPUS_DATASETS=( + "$NE_ROOT/GNOME.en-ne" + "$NE_ROOT/Ubuntu.en-ne" + "$NE_ROOT/KDE4.en-ne" +) + +NE_OPUS_URLS=( + "https://object.pouta.csc.fi/OPUS-GNOME/v1/moses/en-ne.txt.zip" + "https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/moses/en-ne.txt.zip" + "https://object.pouta.csc.fi/OPUS-KDE4/v2/moses/en-ne.txt.zip" +) + +REMOVE_FILE_PATHS=() + +# Download data +download_data() { + CORPORA=$1 + URL=$2 + + if [ -f $CORPORA ]; then + echo "$CORPORA already exists, skipping download" + else + echo "Downloading $URL" + wget $URL -O $CORPORA --no-check-certificate || rm -f $CORPORA + if [ -f $CORPORA ]; then + echo "$URL successfully downloaded." + else + echo "$URL not successfully downloaded." + rm -f $CORPORA + exit -1 + fi + fi +} + +# Example: download_opus_data $LANG_ROOT $TGT +download_opus_data() { + LANG_ROOT=$1 + TGT=$2 + + if [ "$TGT" = "si" ]; then + URLS=("${SI_OPUS_URLS[@]}") + DATASETS=("${SI_OPUS_DATASETS[@]}") + else + URLS=("${NE_OPUS_URLS[@]}") + DATASETS=("${NE_OPUS_DATASETS[@]}") + fi + + # Download and extract data + for ((i=0;i<${#URLS[@]};++i)); do + URL=${URLS[i]} + CORPORA=${DATASETS[i]} + + download_data $CORPORA $URL + unzip -o $CORPORA -d $LANG_ROOT + REMOVE_FILE_PATHS+=( $CORPORA $CORPORA.xml $CORPORA.ids $LANG_ROOT/README $LANG_ROOT/LICENSE ) + done + + cat ${DATASETS[0]}.$SRC ${DATASETS[1]}.$SRC ${DATASETS[2]}.$SRC > $LANG_ROOT/GNOMEKDEUbuntu.$SRC-$TGT.$SRC + cat ${DATASETS[0]}.$TGT ${DATASETS[1]}.$TGT ${DATASETS[2]}.$TGT > $LANG_ROOT/GNOMEKDEUbuntu.$SRC-$TGT.$TGT + + REMOVE_FILE_PATHS+=( ${DATASETS[0]}.$SRC ${DATASETS[1]}.$SRC ${DATASETS[2]}.$SRC ) + REMOVE_FILE_PATHS+=( ${DATASETS[0]}.$TGT ${DATASETS[1]}.$TGT ${DATASETS[2]}.$TGT ) +} + +download_opus_data $SI_ROOT $SI_TGT +cp ${SI_OPUS_DATASETS[3]}.$SRC $SI_ROOT/OpenSubtitles2018.$SRC-$SI_TGT.$SRC +cp ${SI_OPUS_DATASETS[3]}.$SI_TGT $SI_ROOT/OpenSubtitles2018.$SRC-$SI_TGT.$SI_TGT +REMOVE_FILE_PATHS+=( ${SI_OPUS_DATASETS[3]}.$SRC ${SI_OPUS_DATASETS[3]}.$SI_TGT ) + +download_opus_data $NE_ROOT $NE_TGT + + +# Download and extract Global Voices data +GLOBAL_VOICES="$NE_ROOT/globalvoices.2018q4.ne-en" +GLOBAL_VOICES_URL="http://www.casmacat.eu/corpus/global-voices/globalvoices.ne-en.xliff.gz" + +download_data $GLOBAL_VOICES.gz $GLOBAL_VOICES_URL +gunzip -Nf $GLOBAL_VOICES.gz + +sed -ne 's?.*\(.*\).*?\1?p' $GLOBAL_VOICES > $GLOBAL_VOICES.$NE_TGT +sed -ne 's?.*]*>\(.*\).*?\1?p' $GLOBAL_VOICES > $GLOBAL_VOICES.$SRC + +REMOVE_FILE_PATHS+=( $GLOBAL_VOICES ) + +# Download and extract the bible dataset +BIBLE_TOOLS=bible-corpus-tools +XML_BIBLES=XML_Bibles +XML_BIBLES_DUP=XML_Bibles_dup + +if [ ! -e $BIBLE_TOOLS ]; then + echo "Cloning bible-corpus-tools repository..." + git clone https://github.com/christos-c/bible-corpus-tools.git +fi + +mkdir -p $BIBLE_TOOLS/bin $XML_BIBLES $XML_BIBLES_DUP +javac -cp "$BIBLE_TOOLS/lib/*" -d $BIBLE_TOOLS/bin $BIBLE_TOOLS/src/bible/readers/*.java $BIBLE_TOOLS/src/bible/*.java + +download_data bible.tar.gz "https://github.com/christos-c/bible-corpus/archive/v1.2.1.tar.gz" +tar xvzf bible.tar.gz + +cp bible-corpus-1.2.1/bibles/{Greek.xml,English.xml,Nepali.xml} $XML_BIBLES/ +cp bible-corpus-1.2.1/bibles/{Greek.xml,English-WEB.xml,Nepali.xml} $XML_BIBLES_DUP/ + +java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateMLBooks $XML_BIBLES +java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateMLBooks $XML_BIBLES_DUP +java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateVerseAlignedBooks $XML_BIBLES +java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateVerseAlignedBooks $XML_BIBLES_DUP + +cat $XML_BIBLES/aligned/*/English.txt > $NE_ROOT/bible.$SRC-$NE_TGT.$SRC +cat $XML_BIBLES/aligned/*/Nepali.txt > $NE_ROOT/bible.$SRC-$NE_TGT.$NE_TGT +cat $XML_BIBLES_DUP/aligned/*/English-WEB.txt > $NE_ROOT/bible_dup.$SRC-$NE_TGT.$SRC +cat $XML_BIBLES_DUP/aligned/*/Nepali.txt > $NE_ROOT/bible_dup.$SRC-$NE_TGT.$NE_TGT +REMOVE_FILE_PATHS+=( bible-corpus-1.2.1 bible.tar.gz $BIBLE_TOOLS $XML_BIBLES $XML_BIBLES_DUP ) + +# Download and extract the Penn Treebank dataset +NE_TAGGED=$ROOT/new_submissions_parallel_corpus_project_Nepal +NE_TAGGED_URL="http://www.cle.org.pk/Downloads/ling_resources/parallelcorpus/NepaliTaggedCorpus.zip" +EN_TAGGED_PATCH_URL="https://dl.fbaipublicfiles.com/fairseq/data/nepali-penn-treebank.en.patch" +NE_TAGGED_PATCH_URL="https://dl.fbaipublicfiles.com/fairseq/data/nepali-penn-treebank.ne.patch" +MOSES=mosesdecoder +MOSES_TOK=$MOSES/scripts/tokenizer +EN_PATCH_REGEX="{s:\\\/:\/:g;s/\*\T\*\-\n+//g;s/\-LCB\-/\{/g;s/\-RCB\-/\}/g; s/\-LSB\-/\[/g; s/\-RSB\-/\]/g;s/\-LRB\-/\(/g; s/\-RRB\-/\)/g; s/\'\'/\"/g; s/\`\`/\"/g; s/\ +\'s\ +/\'s /g; s/\ +\'re\ +/\'re /g; s/\"\ +/\"/g; s/\ +\"/\"/g; s/\ n't([\ \.\"])/n't\1/g; s/\r+(.)/\1/g;}" +NE_PATCH_REGEX="{s:\p{Cf}::g;s:\\\/:\/:g;s/\*\T\*\-\n+//g;s/\-LCB\-/\{/g;s/\-RCB\-/\}/g; s/\-LSB\-/\[/g; s/\-RSB\-/\]/g;s/\-LRB\-/\(/g; s/\-RRB\-/\)/g; s/\'\'/\"/g; s/\`\`/\"/g; s/\ +\'s\ +/\'s /g; s/\ +\'re\ +/\'re /g; s/\"\ +/\"/g; s/\ +\"/\"/g; s/\ n't([\ \.\"])/n't\1/g; s/\r+(.)/\1/g;}" + +download_data $DATA/nepali-penn-treebank.$SRC.patch $EN_TAGGED_PATCH_URL +download_data $DATA/nepali-penn-treebank.$NE_TGT.patch $NE_TAGGED_PATCH_URL +download_data original.zip $NE_TAGGED_URL +unzip -o original.zip -d $ROOT + +cat $NE_TAGGED/00.txt $NE_TAGGED/01.txt $NE_TAGGED/02.txt > $NE_TAGGED/nepali-penn-treebank.$SRC +cat $NE_TAGGED/00ne_revised.txt $NE_TAGGED/01ne_revised.txt $NE_TAGGED/02ne_revised.txt > $NE_TAGGED/nepali-penn-treebank.$NE_TGT + +patch $NE_TAGGED/nepali-penn-treebank.$SRC -i $DATA/nepali-penn-treebank.$SRC.patch -o $NE_TAGGED/nepali-penn-treebank-patched.$SRC +patch $NE_TAGGED/nepali-penn-treebank.$NE_TGT -i $DATA/nepali-penn-treebank.$NE_TGT.patch -o $NE_TAGGED/nepali-penn-treebank-patched.$NE_TGT + +if [ ! -e $MOSES ]; then + echo "Cloning moses repository..." + git clone https://github.com/moses-smt/mosesdecoder.git +fi + +cat $NE_TAGGED/nepali-penn-treebank-patched.$SRC | \ + perl -anpe "$EN_PATCH_REGEX" | \ + $MOSES_TOK/tokenizer.perl -l $SRC | \ + $MOSES_TOK/detokenizer.perl -l $SRC > $NE_ROOT/nepali-penn-treebank.$SRC + +cat $NE_TAGGED/nepali-penn-treebank-patched.$NE_TGT | \ + perl -CIO -anpe "$NE_PATCH_REGEX" | \ + $MOSES_TOK/detokenizer.perl -l $SRC > $NE_ROOT/nepali-penn-treebank.$NE_TGT + + +# Download nepali dictionary data +NE_DICT=$NE_ROOT/dictionaries +download_data $NE_DICT "http://www.seas.upenn.edu/~nlp/resources/TACL-data-release/dictionaries.tar.gz" +tar xvzf $NE_DICT +cp dictionaries/dict.ne $NE_ROOT/dictionary.$NE_TGT-$SRC +REMOVE_FILE_PATHS+=( $NE_DICT dictionaries ) + +REMOVE_FILE_PATHS+=( $MOSES $NE_TAGGED original.zip $DATA/nepali-penn-treebank.$SRC.patch $DATA/nepali-penn-treebank.$NE_TGT.patch ) + + +# Remove the temporary files +for ((i=0;i<${#REMOVE_FILE_PATHS[@]};++i)); do + rm -rf ${REMOVE_FILE_PATHS[i]} +done + +# Copy the training data +si=si_LK +ne=ne_NP +en=en_XX +cat $SI_ROOT/GNOMEKDEUbuntu.en-si.si $SI_ROOT/OpenSubtitles2018.en-si.si > $DESTDIR/train.$si-$en.$si +cat $SI_ROOT/GNOMEKDEUbuntu.en-si.en $SI_ROOT/OpenSubtitles2018.en-si.en > $DESTDIR/train.$si-$en.$en + +cat $NE_ROOT/bible_dup.en-ne.ne $NE_ROOT/bible.en-ne.ne $NE_ROOT/globalvoices.2018q4.ne-en.ne $NE_ROOT/GNOMEKDEUbuntu.en-ne.ne $NE_ROOT/nepali-penn-treebank.ne > $DESTDIR/train.$ne-$en.$ne +cat $NE_ROOT/bible_dup.en-ne.en $NE_ROOT/bible.en-ne.en $NE_ROOT/globalvoices.2018q4.ne-en.en $NE_ROOT/GNOMEKDEUbuntu.en-ne.en $NE_ROOT/nepali-penn-treebank.en > $DESTDIR/train.$ne-$en.$en + + +#Download the test sets +wget https://github.com/facebookresearch/flores/raw/master/data/wikipedia_en_ne_si_test_sets.tgz +tar -xvzf wikipedia_en_ne_si_test_sets.tgz + +cp wikipedia_en_ne_si_test_sets/wikipedia.dev.ne-en.ne $DESTDIR/valid.$ne-$en.$ne +cp wikipedia_en_ne_si_test_sets/wikipedia.dev.ne-en.en $DESTDIR/valid.$ne-$en.$en + +cp wikipedia_en_ne_si_test_sets/wikipedia.dev.si-en.si $DESTDIR/valid.$si-$en.$si +cp wikipedia_en_ne_si_test_sets/wikipedia.dev.si-en.en $DESTDIR/valid.$si-$en.$en + +cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.ne-en.ne $DESTDIR/devtest.$ne-$en.$ne +cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.ne-en.en $DESTDIR/devtest.$ne-$en.$en + +cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.si-en.si $DESTDIR/devtest.$si-$en.$si +cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.si-en.en $DESTDIR/devtest.$si-$en.$en + +cp wikipedia_en_ne_si_test_sets/wikipedia.test.ne-en.ne $DESTDIR/test.$ne-$en.$ne +cp wikipedia_en_ne_si_test_sets/wikipedia.test.ne-en.en $DESTDIR/test.$ne-$en.$en + +cp wikipedia_en_ne_si_test_sets/wikipedia.test.si-en.si $DESTDIR/test.$si-$en.$si +cp wikipedia_en_ne_si_test_sets/wikipedia.test.si-en.en $DESTDIR/test.$si-$en.$en + +rm -rf wikipedia_en_ne_si_test_sets.tgz wikipedia_en_ne_si_test_sets diff --git a/fairseq/examples/multilingual/data_scripts/download_iitb.sh b/fairseq/examples/multilingual/data_scripts/download_iitb.sh new file mode 100644 index 0000000..a884e20 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_iitb.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + +IITB=$WORKDIR_ROOT/IITB +mkdir -p $IITB +pushd $IITB + +wget http://www.cfilt.iitb.ac.in/~moses/iitb_en_hi_parallel/iitb_corpus_download/parallel.tgz +tar -xvzf parallel.tgz + +wget http://www.cfilt.iitb.ac.in/~moses/iitb_en_hi_parallel/iitb_corpus_download/dev_test.tgz +tar -xvzf dev_test.tgz + +DESTDIR=${WORKDIR_ROOT}/ML50/raw/ + +cp parallel/IITB.en-hi.en $DESTDIR/train.hi_IN-en_XX.en_XX +cp parallel/IITB.en-hi.hi $DESTDIR/train.hi_IN-en_XX.hi_IN + +cp dev_test/dev.en $DESTDIR/valid.hi_IN-en_XX.en_XX +cp dev_test/dev.hi $DESTDIR/valid.hi_IN-en_XX.hi_IN + +cp dev_test/test.en $DESTDIR/test.hi_IN-en_XX.en_XX +cp dev_test/test.hi $DESTDIR/test.hi_IN-en_XX.hi_IN +popd \ No newline at end of file diff --git a/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh b/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh new file mode 100644 index 0000000..ca3591b --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh @@ -0,0 +1,225 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +#echo 'Cloning Moses github repository (for tokenization scripts)...' +#git clone https://github.com/moses-smt/mosesdecoder.git + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + + +data_root=${WORKDIR_ROOT}/iwsltv2 +DESTDIR=${WORKDIR_ROOT}/ML50/raw + + +langs="ar_AR it_IT nl_XX ko_KR vi_VN" +echo "data_root: $data_root" + +download_path=${data_root}/downloads +raw=${DESTDIR} +tmp=${data_root}/tmp +orig=${data_root}/orig + +mkdir -p $download_path $orig $raw $tmp +####################### +download_iwslt(){ + iwslt_key=$1 + src=$2 + tgt=$3 + save_prefix=$4 + pushd ${download_path} + if [[ ! -f ${save_prefix}$src-$tgt.tgz ]]; then + wget https://wit3.fbk.eu/archive/${iwslt_key}/texts/$src/$tgt/$src-$tgt.tgz -O ${save_prefix}$src-$tgt.tgz + [ $? -eq 0 ] && return 0 + fi + popd +} + +extract_iwslt(){ + src=$1 + tgt=$2 + prefix=$3 + pushd $orig + tar zxvf ${download_path}/${prefix}$src-${tgt}.tgz + popd +} + +generate_train(){ + lsrc=$1 + ltgt=$2 + src=${lsrc:0:2} + tgt=${ltgt:0:2} + for ll in $lsrc $ltgt; do + l=${ll:0:2} + f="$orig/*/train.tags.$src-$tgt.$l" + f_raw=$raw/train.$lsrc-$ltgt.$ll + cat $f \ + | grep -v '' \ + | grep -v '' \ + | grep -v '' \ + | grep -v '' \ + | grep -v '' \ + | sed -e 's///g' \ + | sed -e 's/<\/title>//g' \ + | sed -e 's/<description>//g' \ + | sed -e 's/<\/description>//g' \ + | sed 's/^\s*//g' \ + | sed 's/\s*$//g' \ + > $f_raw + [ $? -eq 0 ] && echo "extracted $f to $f_raw" + done + return 0 +} + +convert_valid_test(){ + src=$1 + tgt=$2 + for l in $src $tgt; do + echo "lang: ${l}" + for o in `ls $orig/*/IWSLT*.TED*.$src-$tgt.$l.xml`; do + fname=${o##*/} + f=$tmp/${fname%.*} + echo "$o => $f" + grep '<seg id' $o \ + | sed -e 's/<seg id="[0-9]*">\s*//g' \ + | sed -e 's/\s*<\/seg>\s*//g' \ + | sed -e "s/\’/\'/g" \ + > $f + echo "" + done + done +} + +generate_subset(){ + lsrc=$1 + ltgt=$2 + src=${lsrc:0:2} + tgt=${ltgt:0:2} + subset=$3 + prefix=$4 + for ll in $lsrc $ltgt; do + l=${ll:0:2} + f=$tmp/$prefix.${src}-${tgt}.$l + if [[ -f $f ]]; then + cp $f $raw/$subset.${lsrc}-$ltgt.${ll} + fi + done +} +################# + +echo "downloading iwslt training and dev data" +# using multilingual for it, nl +download_iwslt "2017-01-trnmted" DeEnItNlRo DeEnItNlRo +download_iwslt "2017-01-trnted" ar en +download_iwslt "2017-01-trnted" en ar +download_iwslt "2017-01-trnted" ko en +download_iwslt "2017-01-trnted" en ko +download_iwslt "2015-01" vi en +download_iwslt "2015-01" en vi + +echo "donwloading iwslt test data" +download_iwslt "2017-01-mted-test" it en "test." +download_iwslt "2017-01-mted-test" en it "test." +download_iwslt "2017-01-mted-test" nl en "test." +download_iwslt "2017-01-mted-test" en nl "test." + +download_iwslt "2017-01-ted-test" ar en "test." +download_iwslt "2017-01-ted-test" en ar "test." +download_iwslt "2017-01-ted-test" ko en "test." +download_iwslt "2017-01-ted-test" en ko "test." +download_iwslt "2015-01-test" vi en "test." +download_iwslt "2015-01-test" en vi "test." + +echo "extract training data tar balls" +extract_iwslt DeEnItNlRo DeEnItNlRo +extract_iwslt ar en +extract_iwslt en ar +extract_iwslt ko en +extract_iwslt en ko +extract_iwslt vi en +extract_iwslt en vi + + +echo "extracting iwslt test data" +for lang in $langs; do + l=${lang:0:2} + extract_iwslt $l en "test." + extract_iwslt en $l "test." +done + +echo "convert dev and test data" +for lang in $langs; do + s_lang=${lang:0:2} + convert_valid_test $s_lang en + convert_valid_test en $s_lang +done + + + +echo "creating training data into $raw" +for lang in $langs; do + generate_train $lang en_XX + generate_train en_XX $lang +done + +echo "creating iwslt dev data into raw" +generate_subset en_XX vi_VN valid "IWSLT15.TED.tst2013" +generate_subset vi_VN en_XX valid "IWSLT15.TED.tst2013" + +generate_subset en_XX ar_AR valid "IWSLT17.TED.tst2016" +generate_subset ar_AR en_XX valid "IWSLT17.TED.tst2016" +generate_subset en_XX ko_KR valid "IWSLT17.TED.tst2016" +generate_subset ko_KR en_XX valid "IWSLT17.TED.tst2016" + + +generate_subset en_XX it_IT valid "IWSLT17.TED.tst2010" +generate_subset it_IT en_XX valid "IWSLT17.TED.tst2010" +generate_subset en_XX nl_XX valid "IWSLT17.TED.tst2010" +generate_subset nl_XX en_XX valid "IWSLT17.TED.tst2010" + +echo "creating iswslt test data into raw" +generate_subset en_XX vi_VN test "IWSLT15.TED.tst2015" +generate_subset vi_VN en_XX test "IWSLT15.TED.tst2015" + +generate_subset en_XX ar_AR test "IWSLT17.TED.tst2017" +generate_subset ar_AR en_XX test "IWSLT17.TED.tst2017" +generate_subset en_XX ko_KR test "IWSLT17.TED.tst2017" +generate_subset ko_KR en_XX test "IWSLT17.TED.tst2017" + +generate_subset en_XX it_IT test "IWSLT17.TED.tst2017.mltlng" +generate_subset it_IT en_XX test "IWSLT17.TED.tst2017.mltlng" +generate_subset en_XX nl_XX test "IWSLT17.TED.tst2017.mltlng" +generate_subset nl_XX en_XX test "IWSLT17.TED.tst2017.mltlng" + +# normalze iwslt directions into x-en +pushd $raw +for lang in $langs; do + for split in test valid; do + x_en_f1=$split.$lang-en_XX.en_XX + x_en_f2=$split.$lang-en_XX.${lang} + + en_x_f1=$split.en_XX-$lang.en_XX + en_x_f2=$split.en_XX-$lang.${lang} + + if [ -f $en_x_f1 ] && [ ! -f $x_en_f1 ]; then + echo "cp $en_x_f1 $x_en_f1" + cp $en_x_f1 $x_en_f1 + fi + if [ -f $x_en_f2 ] && [ ! -f $x_en_f2 ]; then + echo "cp $en_x_f2 $x_en_f2" + cp $en_x_f2 $x_en_f2 + fi + done +done +popd \ No newline at end of file diff --git a/fairseq/examples/multilingual/data_scripts/download_lotus.sh b/fairseq/examples/multilingual/data_scripts/download_lotus.sh new file mode 100644 index 0000000..c08c701 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_lotus.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + +SRCDIR=$WORKDIR_ROOT/indic_languages_corpus +DESTDIR=${WORKDIR_ROOT}/ML50/raw/ +mkdir -p $SRCDIR +mkdir -p $DESTDIR + +cd $SRCDIR +wget http://lotus.kuee.kyoto-u.ac.jp/WAT/indic-multilingual/indic_languages_corpus.tar.gz +tar -xvzf indic_languages_corpus.tar.gz + +SRC_EXTRACT_DIR=$SRCDIR/indic_languages_corpus/bilingual + +cp $SRC_EXTRACT_DIR/ml-en/train.ml $DESTDIR/train.ml_IN-en_XX.ml_IN +cp $SRC_EXTRACT_DIR/ml-en/train.en $DESTDIR/train.ml_IN-en_XX.en_XX +cp $SRC_EXTRACT_DIR/ml-en/dev.ml $DESTDIR/valid.ml_IN-en_XX.ml_IN +cp $SRC_EXTRACT_DIR/ml-en/dev.en $DESTDIR/valid.ml_IN-en_XX.en_XX +cp $SRC_EXTRACT_DIR/ml-en/test.ml $DESTDIR/test.ml_IN-en_XX.ml_IN +cp $SRC_EXTRACT_DIR/ml-en/test.en $DESTDIR/test.ml_IN-en_XX.en_XX + +cp $SRC_EXTRACT_DIR/ur-en/train.ur $DESTDIR/train.ur_PK-en_XX.ur_PK +cp $SRC_EXTRACT_DIR/ur-en/train.en $DESTDIR/train.ur_PK-en_XX.en_XX +cp $SRC_EXTRACT_DIR/ur-en/dev.ur $DESTDIR/valid.ur_PK-en_XX.ur_PK +cp $SRC_EXTRACT_DIR/ur-en/dev.en $DESTDIR/valid.ur_PK-en_XX.en_XX +cp $SRC_EXTRACT_DIR/ur-en/test.ur $DESTDIR/test.ur_PK-en_XX.ur_PK +cp $SRC_EXTRACT_DIR/ur-en/test.en $DESTDIR/test.ur_PK-en_XX.en_XX + +cp $SRC_EXTRACT_DIR/te-en/train.te $DESTDIR/train.te_IN-en_XX.te_IN +cp $SRC_EXTRACT_DIR/te-en/train.en $DESTDIR/train.te_IN-en_XX.en_XX +cp $SRC_EXTRACT_DIR/te-en/dev.te $DESTDIR/valid.te_IN-en_XX.te_IN +cp $SRC_EXTRACT_DIR/te-en/dev.en $DESTDIR/valid.te_IN-en_XX.en_XX +cp $SRC_EXTRACT_DIR/te-en/test.te $DESTDIR/test.te_IN-en_XX.te_IN +cp $SRC_EXTRACT_DIR/te-en/test.en $DESTDIR/test.te_IN-en_XX.en_XX diff --git a/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py b/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py new file mode 100644 index 0000000..eb75668 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py @@ -0,0 +1,338 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import itertools +import os +import csv +from collections import defaultdict +from six.moves import zip +import io +import wget +import sys + +from subprocess import check_call, check_output + +# scripts and data locations +CWD = os.getcwd() +UTILS = f"{CWD}/utils" + +MOSES = f"{UTILS}/mosesdecoder" + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + + +# please donwload mosesdecoder here: +detok_cmd = f'{MOSES}/scripts/tokenizer/detokenizer.perl' + + +def call(cmd): + print(f"Executing: {cmd}") + check_call(cmd, shell=True) + +class MultiLingualAlignedCorpusReader(object): + """A class to read TED talk dataset + """ + + def __init__(self, corpus_path, delimiter='\t', + target_token=True, bilingual=True, corpus_type='file', + lang_dict={'source': ['fr'], 'target': ['en']}, + eval_lang_dict=None, zero_shot=False, + detok=True, + ): + + self.empty_line_flag = 'NULL' + self.corpus_path = corpus_path + self.delimiter = delimiter + self.bilingual = bilingual + self.lang_dict = lang_dict + self.lang_set = set() + self.target_token = target_token + self.zero_shot = zero_shot + self.eval_lang_dict = eval_lang_dict + self.corpus_type = corpus_type + self.detok = detok + + for list_ in self.lang_dict.values(): + for lang in list_: + self.lang_set.add(lang) + + self.data = dict() + self.data['train'] = self.read_aligned_corpus(split_type='train') + self.data['test'] = self.read_aligned_corpus(split_type='test') + self.data['dev'] = self.read_aligned_corpus(split_type='dev') + + def read_data(self, file_loc_): + data_list = list() + with io.open(file_loc_, 'r', encoding='utf8') as fp: + for line in fp: + try: + text = line.strip() + except IndexError: + text = self.empty_line_flag + data_list.append(text) + return data_list + + def filter_text(self, dict_): + if self.target_token: + field_index = 1 + else: + field_index = 0 + data_dict = defaultdict(list) + list1 = dict_['source'] + list2 = dict_['target'] + for sent1, sent2 in zip(list1, list2): + try: + src_sent = ' '.join(sent1.split()[field_index: ]) + except IndexError: + src_sent = 'NULL' + + if src_sent.find(self.empty_line_flag) != -1 or len(src_sent) == 0: + continue + + elif sent2.find(self.empty_line_flag) != -1 or len(sent2) == 0: + continue + + else: + data_dict['source'].append(sent1) + data_dict['target'].append(sent2) + return data_dict + + def read_file(self, split_type, data_type): + return self.data[split_type][data_type] + + def save_file(self, path_, split_type, data_type, lang): + tok_file = tok_file_name(path_, lang) + with io.open(tok_file, 'w', encoding='utf8') as fp: + for line in self.data[split_type][data_type]: + fp.write(line + '\n') + if self.detok: + de_tok(tok_file, lang) + + def add_target_token(self, list_, lang_id): + new_list = list() + token = '__' + lang_id + '__' + for sent in list_: + new_list.append(token + ' ' + sent) + return new_list + + def read_from_single_file(self, path_, s_lang, t_lang): + data_dict = defaultdict(list) + with io.open(path_, 'r', encoding='utf8') as fp: + reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE) + for row in reader: + data_dict['source'].append(row[s_lang]) + data_dict['target'].append(row[t_lang]) + + if self.target_token: + text = self.add_target_token(data_dict['source'], t_lang) + data_dict['source'] = text + + return data_dict['source'], data_dict['target'] + + def read_aligned_corpus(self, split_type='train'): + data_dict = defaultdict(list) + iterable = [] + s_list = [] + t_list = [] + + if self.zero_shot: + if split_type == "train": + iterable = zip(self.lang_dict['source'], self.lang_dict['target']) + else: + iterable = zip(self.eval_lang_dict['source'], self.eval_lang_dict['target']) + + elif self.bilingual: + iterable = itertools.product(self.lang_dict['source'], self.lang_dict['target']) + + for s_lang, t_lang in iterable: + if s_lang == t_lang: + continue + if self.corpus_type == 'file': + split_type_file_path = os.path.join(self.corpus_path, + "all_talks_{}.tsv".format(split_type)) + s_list, t_list = self.read_from_single_file(split_type_file_path, + s_lang=s_lang, + t_lang=t_lang) + data_dict['source'] += s_list + data_dict['target'] += t_list + new_data_dict = self.filter_text(data_dict) + return new_data_dict + + +def read_langs(corpus_path): + split_type_file_path = os.path.join(corpus_path, 'extracted', + "all_talks_dev.tsv") + with io.open(split_type_file_path, 'r', encoding='utf8') as fp: + reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE) + header = next(reader) + return [k for k in header.keys() if k != 'talk_name'] + +def extra_english(corpus_path, split): + split_type_file_path = os.path.join(corpus_path, + f"all_talks_{split}.tsv") + output_split_type_file_path = os.path.join(corpus_path, + f"all_talks_{split}.en") + with io.open(split_type_file_path, 'r', encoding='utf8') as fp, io.open(output_split_type_file_path, 'w', encoding='utf8') as fw: + reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE) + for row in reader: + line = row['en'] + fw.write(line + '\n') + de_tok(output_split_type_file_path, 'en') + + + +def tok_file_name(filename, lang): + seps = filename.split('.') + seps.insert(-1, 'tok') + tok_file = '.'.join(seps) + return tok_file + +def de_tok(tok_file, lang): + # seps = tok_file.split('.') + # seps.insert(-1, 'detok') + # de_tok_file = '.'.join(seps) + de_tok_file = tok_file.replace('.tok.', '.') + cmd = 'perl {detok_cmd} -l {lang} < {tok_file} > {de_tok_file}'.format( + detok_cmd=detok_cmd, tok_file=tok_file, + de_tok_file=de_tok_file, lang=lang[:2]) + call(cmd) + +def extra_bitex( + ted_data_path, + lsrc_lang, + ltrg_lang, + target_token, + output_data_path, +): + def get_ted_lang(lang): + long_langs = ['pt-br', 'zh-cn', 'zh-tw', 'fr-ca'] + if lang[:5] in long_langs: + return lang[:5] + elif lang[:4] =='calv': + return lang[:5] + elif lang in ['pt_BR', 'zh_CN', 'zh_TW', 'fr_CA']: + return lang.lower().replace('_', '-') + return lang[:2] + src_lang = get_ted_lang(lsrc_lang) + trg_lang = get_ted_lang(ltrg_lang) + train_lang_dict={'source': [src_lang], 'target': [trg_lang]} + eval_lang_dict = {'source': [src_lang], 'target': [trg_lang]} + + obj = MultiLingualAlignedCorpusReader(corpus_path=ted_data_path, + lang_dict=train_lang_dict, + target_token=target_token, + corpus_type='file', + eval_lang_dict=eval_lang_dict, + zero_shot=False, + bilingual=True) + + os.makedirs(output_data_path, exist_ok=True) + lsrc_lang = lsrc_lang.replace('-', '_') + ltrg_lang = ltrg_lang.replace('-', '_') + obj.save_file(output_data_path + f"/train.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}", + split_type='train', data_type='source', lang=src_lang) + obj.save_file(output_data_path + f"/train.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}", + split_type='train', data_type='target', lang=trg_lang) + + obj.save_file(output_data_path + f"/test.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}", + split_type='test', data_type='source', lang=src_lang) + obj.save_file(output_data_path + f"/test.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}", + split_type='test', data_type='target', lang=trg_lang) + + obj.save_file(output_data_path + f"/valid.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}", + split_type='dev', data_type='source', lang=src_lang) + obj.save_file(output_data_path + f"/valid.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}", + split_type='dev', data_type='target', lang=trg_lang) + + +def bar_custom(current, total, width=80): + print("Downloading: %d%% [%d / %d] Ks" % (current / total * 100, current / 1000, total / 1000), end='\r') + + +def download_and_extract(download_to, extract_to): + url = 'http://phontron.com/data/ted_talks.tar.gz' + filename = f"{download_to}/ted_talks.tar.gz" + if os.path.exists(filename): + print(f'{filename} has already been downloaded so skip') + else: + filename = wget.download(url, filename, bar=bar_custom) + if os.path.exists(f'{extract_to}/all_talks_train.tsv'): + print(f'Already extracted so skip') + else: + extract_cmd = f'tar xzfv "{filename}" -C "{extract_to}"' + call(extract_cmd) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--ted_data_path', type=str, default=WORKDIR_ROOT, required=False) + parser.add_argument( + '--direction-list', + type=str, + # default=None, + #for ML50 + default=( + "bn_IN-en_XX,he_IL-en_XX,fa_IR-en_XX,id_ID-en_XX,sv_SE-en_XX,pt_XX-en_XX,ka_GE-en_XX,ka_GE-en_XX,th_TH-en_XX," + "mr_IN-en_XX,hr_HR-en_XX,uk_UA-en_XX,az_AZ-en_XX,mk_MK-en_XX,gl_ES-en_XX,sl_SI-en_XX,mn_MN-en_XX," + #non-english directions + # "fr_XX-de_DE," # replaced with wmt20 + # "ja_XX-ko_KR,es_XX-pt_XX,ru_RU-sv_SE,hi_IN-bn_IN,id_ID-ar_AR,cs_CZ-pl_PL,ar_AR-tr_TR" + ), + required=False) + parser.add_argument('--target-token', action='store_true', default=False) + parser.add_argument('--extract-all-english', action='store_true', default=False) + + args = parser.parse_args() + + import sys + import json + + # TED Talks data directory + ted_data_path = args.ted_data_path + + download_to = f'{ted_data_path}/downloads' + extract_to = f'{ted_data_path}/extracted' + + #DESTDIR=${WORKDIR_ROOT}/ML50/raw/ + output_path = f'{ted_data_path}/ML50/raw' + os.makedirs(download_to, exist_ok=True) + os.makedirs(extract_to, exist_ok=True) + os.makedirs(output_path, exist_ok=True) + download_and_extract(download_to, extract_to) + + + if args.extract_all_english: + for split in ['train', 'dev', 'test']: + extra_english(ted_data_path, split) + exit(0) + if args.direction_list is not None: + directions = args.direction_list.strip().split(',') + directions = [tuple(d.strip().split('-', 1)) for d in directions if d] + else: + langs = read_langs(ted_data_path) + # directions = [ + # '{}.{}'.format(src, tgt) + # for src in langs + # for tgt in langs + # if src < tgt + # ] + directions = [('en', tgt) for tgt in langs if tgt != 'en'] + print(f'num directions={len(directions)}: {directions}') + + for src_lang, trg_lang in directions: + print('--working on {}-{}'.format(src_lang, trg_lang)) + extra_bitex( + extract_to, + src_lang, + trg_lang, + target_token=args.target_token, + output_data_path=output_path + ) diff --git a/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh b/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh new file mode 100644 index 0000000..c1e2d47 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + +SRCDIR=$WORKDIR_ROOT/indic_languages_corpus +DESTDIR=$WORKDIR_ROOT/ML50/raw +mkdir -p $SRCDIR +mkdir -p $DESTDIR + +WAT_MY_EN=wat2020.my-en.zip +cd $SRCDIR +# please refer to http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/ for latest URL if the following url expired +#- The data used for WAT2020 are identical to those used in WAT2019. +wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/$WAT_MY_EN +unzip $WAT_MY_EN + + +SRC_EXTRACT_DIR=$SRCDIR/wat2020.my-en/alt + +cp $SRC_EXTRACT_DIR/train.alt.en $DESTDIR/train.my_MM-en_XX.en_XX +cp $SRC_EXTRACT_DIR/train.alt.my $DESTDIR/train.my_MM-en_XX.my_MM +cp $SRC_EXTRACT_DIR/dev.alt.en $DESTDIR/valid.my_MM-en_XX.en_XX +cp $SRC_EXTRACT_DIR/dev.alt.my $DESTDIR/valid.my_MM-en_XX.my_MM +cp $SRC_EXTRACT_DIR/test.alt.en $DESTDIR/test.my_MM-en_XX.en_XX +cp $SRC_EXTRACT_DIR/test.alt.my $DESTDIR/test.my_MM-en_XX.my_MM diff --git a/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py b/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py new file mode 100644 index 0000000..3465731 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py @@ -0,0 +1,899 @@ +from typing import NamedTuple, List +from urllib.parse import urlparse +import os, sys +import subprocess +from subprocess import check_call, check_output +import glob +import wget +import re +import multiprocessing as mp +from functools import partial +import pathlib +from collections import OrderedDict + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + +# scripts and data locations +CWD = os.getcwd() +UTILS = f"{CWD}/utils" + +MOSES = f"{UTILS}/mosesdecoder" +SGM_TOOL = f'{MOSES}/scripts/ems/support/input-from-sgm.perl' + +TMX2CORPUS = f"{UTILS}/tmx2corpus" +TMX_TOOL = f'python {TMX2CORPUS}/tmx2corpus.py' + +to_data_path = f'{WORKDIR_ROOT}/wmt' +download_to = f'{to_data_path}/downloads' +manually_downloads = f'{to_data_path}/downloads' +extract_to = f'{to_data_path}/extracted' +#DESTDIR=${WORKDIR_ROOT}/ML50/raw/ +raw_data = f'{WORKDIR_ROOT}/ML50/raw' +#### + +class DLDataset(NamedTuple): + name: str + train_urls: List[str] + valid_urls: List[str] + test_urls: List[str] + train_files_patterns: List[str] = [] + valid_files_patterns: List[str] = [] + test_files_patterns: List[str] = [] + + + +def bar_custom(current, total, width=80): + print("Downloading: %d%% [%d / %d] Ks" % (current / total * 100, current / 1000, total / 1000), end='\r') + +def get_downloaded_file(dl_folder, url): + if isinstance(url, tuple): + url, f = url + else: + url_f = urlparse(url) + # f = os.path.split(url_f.path)[-1] + f = '_'.join(url_f.path.split('/')[1:]) + return url, f"{dl_folder}/{f}" + +def download_parts_and_combine(dl_folder, urls, filename): + parts = [] + for url_record in urls: + url, part_file = get_downloaded_file(dl_folder, url_record) + if os.path.exists(part_file): + print(f'{part_file} has already been downloaded so skip') + else: + part_file = wget.download(url, part_file, bar=bar_custom) + parts.append(part_file) + + def get_combine_cmd(parts): + #default as tar.gz.?? + return f'cat {" ".join(parts)} > {filename}' + + combine_cmd = get_combine_cmd(parts) + call(combine_cmd, debug=True) + return filename + +def download_a_url(dl_folder, url): + url, filename = get_downloaded_file(dl_folder, url) + if os.path.exists(filename): + print(f'{filename} has already been downloaded so skip') + return filename + + print(f'downloading {url} to {filename}') + if isinstance(url, list) or isinstance(url, tuple): + download_parts_and_combine(dl_folder, url, filename) + else: + wget.download(url, filename, bar=bar_custom) + print(f'dowloaded: {filename}') + return filename + +def download_files(dl_folder, urls, completed_urls={}): + for url_record in urls: + url, _ = get_downloaded_file(dl_folder, url_record) + filename = download_a_url(dl_folder, url_record) + completed_urls[str(url)] = filename + return completed_urls + +def check_need_manual_downalod(dl_folder, to_manually_download_urls): + to_be_manually_dowloaded = [] + manually_completed_urls = {} + for url_record, instruction in to_manually_download_urls: + url, filename = get_downloaded_file(dl_folder, url_record) + if not os.path.exists(filename): + print(f'{url} need to be download manually, please download it manually following {instruction}; and copy it to {filename}') + to_be_manually_dowloaded.append((url, filename)) + else: + manually_completed_urls[url] = filename + # if len(to_be_manually_dowloaded) > 0: + # raise ValueError('Missing files that need to be downloaded manually; stop the process now.') + return to_be_manually_dowloaded + +def download_dataset(to_folder, dl_dataset, completed_urls={}): + download_files(to_folder, dl_dataset.train_urls, completed_urls) + download_files(to_folder, dl_dataset.valid_urls, completed_urls) + download_files(to_folder, dl_dataset.test_urls, completed_urls) + print('completed downloading') + return completed_urls + +def call(cmd, debug=False): + if debug: + print(cmd) + check_call(cmd, shell=True) + + +def get_extract_name(file_path): + path = os.path.split(file_path) + return path[-1] + '_extract' #.split('.')[0] + +def extract_file(downloaded_file, extract_folder, get_extract_name=get_extract_name, debug=False): + extract_name = get_extract_name(downloaded_file) + extract_to = f'{extract_folder}/{extract_name}' + os.makedirs(extract_to, exist_ok=True) + if os.path.exists(f'{extract_to}/DONE'): + print(f'{downloaded_file} has already been extracted to {extract_to} so skip') + return extract_to + def get_extract_cmd(filename): + if filename.endswith('.tgz') or filename.endswith('tar.gz'): + return f'tar xzfv {filename} -C {extract_to}' + elif filename.endswith('.gz.tar'): + return f'tar xfv {filename} -C {extract_to}; (cd {extract_to}; gzip -d *.gz; [ $? -eq 0 ] || gzip -d */*.gz)' + elif filename.endswith('.tar'): + return f'tar xfv {filename} -C {extract_to}' + elif filename.endswith('.gz'): + return f'cp {filename} {extract_to}; (cd {extract_to}; gzip -d *.gz)' + elif filename.endswith('.zip'): + return f'unzip {filename} -d {extract_to}' + extract_cmd = get_extract_cmd(downloaded_file) + print(f'extracting {downloaded_file}') + if isinstance(extract_cmd, list): + for c in extract_cmd: + call(c, debug=debug) + else: + call(extract_cmd, debug=debug) + call(f'echo DONE > {extract_to}/DONE') + return extract_to + + +def extract_all_files( + completed_urls, extract_folder, + get_extract_name=get_extract_name, + completed_extraction={}, + debug=False): + extracted_folders = OrderedDict() + for url, downloaded_file in set(completed_urls.items()): + if downloaded_file in completed_extraction: + print(f'{downloaded_file} is already extracted; so skip') + continue + folder = extract_file(downloaded_file, extract_folder, get_extract_name, debug) + extracted_folders[url] = folder + return extracted_folders + + +def my_glob(folder): + for p in [f'{folder}/*', f'{folder}/*/*', f'{folder}/*/*/*']: + for f in glob.glob(p): + yield f + + +def sgm2raw(sgm, debug): + to_file = sgm[0:len(sgm) - len('.sgm')] + if os.path.exists(to_file): + debug and print(f'{sgm} already converted to {to_file}; so skip') + return to_file + cmd = f'{SGM_TOOL} < {sgm} > {to_file}' + call(cmd, debug) + return to_file + +def tmx2raw(tmx, debug): + to_file = tmx[0:len(tmx) - len('.tmx')] + to_folder = os.path.join(*os.path.split(tmx)[:-1]) + if os.path.exists(f'{to_folder}/bitext.en'): + debug and print(f'{tmx} already extracted to {to_file}; so skip') + return to_file + cmd = f'(cd {to_folder}; {TMX_TOOL} {tmx})' + call(cmd, debug) + return to_file + +CZENG16_REGEX = re.compile(r'.*?data.plaintext-format/0[0-9]train$') +WMT19_WIKITITLES_REGEX = re.compile(r'.*?wikititles-v1.(\w\w)-en.tsv.gz') +TSV_REGEX = re.compile(r'.*?(\w\w)-(\w\w).tsv$') + + + +def cut_wikitles(wiki_file, debug): + # different languages have different file names: + if wiki_file.endswith('wiki/fi-en/titles.fi-en'): + to_file1 = f'{wiki_file}.fi' + to_file2 = f'{wiki_file}.en' + BACKSLASH = '\\' + cmd1 = f"cat {wiki_file} | sed 's/|||/{BACKSLASH}t/g' |cut -f1 |awk '{{$1=$1}};1' > {to_file1}" + cmd2 = f"cat {wiki_file} | sed 's/|||/{BACKSLASH}t/g' |cut -f2 |awk '{{$1=$1}};1' > {to_file2}" +# elif WMT19_WIKITITLES_REGEX.match(wiki_file): +# src = WMT19_WIKITITLES_REGEX.match(wiki_file).groups()[0] +# to_file1 = f'{wiki_file}.{src}' +# to_file2 = f'{wiki_file}.en' +# cmd1 = f"cat {wiki_file} | cut -f1 |awk '{{$1=$1}};1' > {to_file1}" +# cmd2 = f"cat {wiki_file} | cut -f2 |awk '{{$1=$1}};1' > {to_file2}" + else: + return None + if os.path.exists(to_file1) and os.path.exists(to_file2): + debug and print(f'{wiki_file} already processed to {to_file1} and {to_file2}; so skip') + return wiki_file + + call(cmd1, debug=debug) + call(cmd2, debug=debug) + return wiki_file + +def cut_tsv(file, debug): + m = TSV_REGEX.match(file) + if m is None: + raise ValueError(f'{file} is not matching tsv pattern') + src = m.groups()[0] + tgt = m.groups()[1] + + to_file1 = f'{file}.{src}' + to_file2 = f'{file}.{tgt}' + cmd1 = f"cat {file} | cut -f1 |awk '{{$1=$1}};1' > {to_file1}" + cmd2 = f"cat {file} | cut -f2 |awk '{{$1=$1}};1' > {to_file2}" + if os.path.exists(to_file1) and os.path.exists(to_file2): + debug and print(f'{file} already processed to {to_file1} and {to_file2}; so skip') + return file + + call(cmd1, debug=debug) + call(cmd2, debug=debug) + return file + + +def convert_file_if_needed(file, debug): + if file.endswith('.sgm'): + return sgm2raw(file, debug) + elif file.endswith('.tmx'): + return tmx2raw(file, debug) + elif file.endswith('wiki/fi-en/titles.fi-en'): + return cut_wikitles(file, debug) +# elif WMT19_WIKITITLES_REGEX.match(file): +# return cut_wikitles(file, debug) + elif file.endswith('.tsv'): + return cut_tsv(file, debug) + elif CZENG16_REGEX.match(file): + return convert2czeng17(file, debug) + else: + return file + + +def convert_files_if_needed(extracted_foldrs, my_glob=my_glob, debug=False): + return { + url: list(sorted(set(convert_file_if_needed(f, debug)) for f in sorted(set(my_glob(folder))))) + for url, folder in extracted_foldrs.items() + } + +def match_patt(file_path, file_pattern, src, tgt, lang): + return file_pattern.format(src=src, tgt=tgt, lang=lang) in file_path + +def match_patts(file_path, file_patterns, src, tgt, lang): + for file_pattern in file_patterns: + params = { k: v for k, v in [('src', src), ('tgt', tgt), ('lang', lang)] if k in file_pattern} + matching = file_pattern.format(**params) + + if isinstance(file_pattern, tuple): + pattern, directions = file_pattern + if f'{src}-{tgt}' in directions and matching in file_path: + return True + else: + if matching in file_path: + return True + return False + +def extracted_glob(extracted_folder, file_patterns, src, tgt, lang): + def get_matching_pattern(file_pattern): + params = { + k: v + for k, v in [('src', src), ('tgt', tgt), ('lang', lang)] + if '{' + k + '}' in file_pattern + } + file_pattern = re.sub(r'{src:(.*?)}', r'\1' if lang == src else '', file_pattern) + file_pattern = re.sub(r'{tgt:(.*?)}', r'\1' if lang == tgt else '', file_pattern) + file_pattern = file_pattern.format(**params) + return file_pattern + for file_pattern in file_patterns: + if isinstance(file_pattern, tuple): + file_pattern, lang_pairs = file_pattern + if f'{src}-{tgt}' not in lang_pairs: + continue +# print('working on pattern: ', file_pattern, lang_pairs ) + matching_pattern = get_matching_pattern(file_pattern) + if matching_pattern is None: + continue + glob_patterns = f'{extracted_folder}/{matching_pattern}' +# print('glob_patterns: ', glob_patterns) + for f in glob.glob(glob_patterns): + yield f + +# for debug usage +def all_extracted_files(split, src, tgt, extracted_folders, split_urls): + def get_url(url): + if isinstance(url, tuple): + url, downloaded_file = url + return url + return [ + f + for url in split_urls + for f in my_glob(extracted_folders[str(get_url(url))]) + ] + +def concat_files(split, src, tgt, extracted_folders, split_urls, path_patterns, to_folder, debug=False): +# if debug: +# print('extracted files to be filtered by patterns: ', +# '\n\t'.join(sorted(all_extracted_files(split, src, tgt, extracted_folders, split_urls)))) + for lang in [src, tgt]: + to_file = f'{to_folder}/{split}.{src}-{tgt}.{lang}' + s_src, s_tgt, s_lang = src.split('_')[0], tgt.split('_')[0], lang.split('_')[0] + files = [] + for url in split_urls: + if isinstance(url, tuple): + url, downloaded_file = url + if str(url) not in extracted_folders: + print(f'warning: {url} not in extracted files') + for extracted_file in set( + extracted_glob( + extracted_folders[str(url)], path_patterns, + s_src, s_tgt, s_lang)): + files.append(extracted_file) + if len(files) == 0: + print('warning: ', f'No files found for split {to_file}') + continue + files = sorted(set(files)) + print(f'concating {len(files)} files into {to_file}') + cmd = ['cat'] + [f'"{f}"' for f in files] + [f'>{to_file}'] + cmd = " ".join(cmd) + call(cmd, debug=debug) + +UTILS = os.path.join(pathlib.Path(__file__).parent, 'utils') +LID_MODEL = f'{download_to}/lid.176.bin' +LID_MULTI = f'{UTILS}/fasttext_multi_filter.py' + +def lid_filter(split, src, tgt, from_folder, to_folder, debug=False): + if not os.path.exists(LID_MODEL): + call(f'wget -nc https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin -O {LID_MODEL}') + from_prefix = f'{from_folder}/{split}.{src}-{tgt}' + to_prefix = f'{to_folder}/{split}.{src}-{tgt}' + if os.path.exists(f'{from_prefix}.{src}') and os.path.exists(f'{from_prefix}.{tgt}'): + s_src, s_tgt = src.split('_')[0], tgt.split('_')[0] + cmd = ( + f'python {LID_MULTI} --model {LID_MODEL} --inputs {from_prefix}.{src} {from_prefix}.{tgt} ' + f'--langs {s_src} {s_tgt} --outputs {to_prefix}.{src} {to_prefix}.{tgt}' + ) + print(f'filtering {from_prefix}') + call(cmd, debug=debug) + +def concat_into_splits(dl_dataset, src, tgt, extracted_folders, to_folder, debug): + to_folder_tmp = f"{to_folder}_tmp" + os.makedirs(to_folder_tmp, exist_ok=True) + concat_files('train', src, tgt, + extracted_folders, + split_urls=dl_dataset.train_urls, + path_patterns=dl_dataset.train_files_patterns, + to_folder=to_folder_tmp, debug=debug) + lid_filter('train', src, tgt, to_folder_tmp, to_folder, debug) + + concat_files('valid', src, tgt, + extracted_folders, + split_urls=dl_dataset.valid_urls, + path_patterns=dl_dataset.valid_files_patterns, + to_folder=to_folder, debug=debug) + concat_files('test', src, tgt, + extracted_folders, + split_urls=dl_dataset.test_urls, + path_patterns=dl_dataset.test_files_patterns, + to_folder=to_folder, debug=debug) + + +def download_multi(dl_folder, extract_folder, urls, num_processes=8, debug=False): + pool = mp.Pool(processes=num_processes) + download_f = partial(download_a_url, dl_folder) + downloaded_files = pool.imap_unordered(download_f, urls) + pool.close() + pool.join() + +BLEU_REGEX = re.compile("^BLEU\\S* = (\\S+) ") +def run_eval_bleu(cmd): + output = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode("utf-8").strip() + print(output) + bleu = -1.0 + for line in output.strip().split('\n'): + m = BLEU_REGEX.search(line) + if m is not None: + bleu = m.groups()[0] + bleu = float(bleu) + break + return bleu + +def check_wmt_test_bleu(raw_folder, wmt_lang_pairs): + not_matchings = [] + for wmt, src_tgts in wmt_lang_pairs: + for src_tgt in src_tgts: + print(f'checking test bleus for: {src_tgt} at {wmt}') + src, tgt = src_tgt.split('-') + ssrc, stgt = src[:2], tgt[:2] + if os.path.exists(f'{raw_folder}/test.{tgt}-{src}.{src}'): + # reversed direction may have different test set + test_src = f'{raw_folder}/test.{tgt}-{src}.{src}' + else: + test_src = f'{raw_folder}/test.{src}-{tgt}.{src}' + cmd1 = f'cat {test_src} | sacrebleu -t "{wmt}" -l {stgt}-{ssrc}; [ $? -eq 0 ] || echo ""' + test_tgt = f'{raw_folder}/test.{src}-{tgt}.{tgt}' + cmd2 = f'cat {test_tgt} | sacrebleu -t "{wmt}" -l {ssrc}-{stgt}; [ $? -eq 0 ] || echo ""' + bleu1 = run_eval_bleu(cmd1) + if bleu1 != 100.0: + not_matchings.append(f'{wmt}:{src_tgt} source side not matching: {test_src}') + bleu2 = run_eval_bleu(cmd2) + if bleu2 != 100.0: + not_matchings.append(f'{wmt}:{src_tgt} target side not matching: {test_tgt}') + return not_matchings + +def download_and_extract( + to_folder, lang_pairs, dl_dataset, + to_manually_download_urls, + completed_urls={}, completed_extraction={}, + debug=False): + + dl_folder = f'{to_folder}/downloads' + extract_folder = f'{to_folder}/extracted' + raw_folder = f'{to_folder}/raw' + lid_filtered = f'{to_folder}/lid_filtered' + + os.makedirs(extract_folder, exist_ok=True) + os.makedirs(raw_folder, exist_ok=True) + os.makedirs(lid_filtered, exist_ok=True) + + + to_be_manually_dowloaded = check_need_manual_downalod(dl_folder, to_manually_download_urls) + + completed_urls = download_dataset( + dl_folder, dl_dataset, completed_urls) + if debug: + print('completed urls: ', completed_urls) + + + extracted_folders = extract_all_files( + completed_urls, + extract_folder=extract_folder, + completed_extraction=completed_extraction, + debug=debug) + if debug: + print('download files have been extracted to folders: ', extracted_folders) + + converted_files = convert_files_if_needed(extracted_folders, debug=False) + for src_tgt in lang_pairs: + print(f'working on {dl_dataset.name}: {src_tgt}') + src, tgt = src_tgt.split('-') + concat_into_splits(dl_dataset, + src=src, tgt=tgt, + extracted_folders=extracted_folders, + to_folder=raw_folder, debug=debug) + print('completed data into: ', raw_folder) + +def download_czang16(download_to, username=None): + wgets = [ + f'wget --user={username} --password=czeng -P {download_to} http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar' + for i in range(10)] + cmds = [] + for i, cmd in enumerate(wgets): + filename = f'{download_to}/data-plaintext-format.{i}.tar' + if os.path.exists(filename): + print(f'{filename} has already been downloaded; so skip') + continue + cmds.append(cmd) + if cmds and username is None: + raise ValueError('No czeng username is given; please register at http://ufal.mff.cuni.cz/czeng/czeng16 to obtain username to download') + for cmd in cmds: + call(cmd) + print('done with downloading czeng1.6') + +def download_czeng17_script(download_to, extract_folder, debug=False): + url = 'http://ufal.mff.cuni.cz/czeng/download.php?f=convert_czeng16_to_17.pl.zip' + filename = f'{download_to}/convert_czeng16_to_17.pl.zip' + extract_to = f'{extract_folder}/{get_extract_name(filename)}' + script_path = f'{extract_to}/convert_czeng16_to_17.pl' + + if not os.path.exists(script_path): + wget.download(url, filename, bar=bar_custom) + extract_to = extract_file(f'{download_to}/convert_czeng16_to_17.pl.zip', extract_folder, get_extract_name=get_extract_name, debug=debug) + return script_path + +czeng17_script_path = "" +def convert2czeng17(file, debug): + en_file = f'{file}.en' + cs_file = f'{file}.cs' + + if not os.path.exists(en_file) or not os.path.exists(cs_file): + cs_cmd = f'cat {file} | perl {czeng17_script_path} | cut -f3 > {cs_file}' + en_cmd = f'cat {file} | perl {czeng17_script_path} | cut -f4 > {en_file}' + call(cs_cmd, debug) + call(en_cmd, debug) + else: + print(f'already extracted: {en_file} and {cs_file}') + return file + +def extract_czeng17(extract_folder, debug=False): + url = 'http://ufal.mff.cuni.cz/czeng/download.php?f=convert_czeng16_to_17.pl.zip' + filename = f'{download_to}/convert_czeng16_to_17.pl.zip' + extract_to = f'{extract_folder}/{get_extract_name(filename)}' + script_path = f'{extract_to}/convert_czeng16_to_17.pl' + + if not os.path.exists(script_path): + wget.download(url, filename, bar=bar_custom) + extract_to = extract_file(f'{download_to}/convert_czeng16_to_17.pl.zip', extract_folder, get_extract_name=get_extract_name, debug=debug) + return script_path + +######### +# definitions of wmt data sources +# for es-en +# Punctuation in the official test sets will be encoded with ASCII characters (not complex Unicode characters) as much as possible. You may want to normalize your system's output before submission. You are able able to use a rawer version of the test sets that does not have this normalization. +# script to normalize punctuation: http://www.statmt.org/wmt11/normalize-punctuation.perl +wmt13_es_en = DLDataset( + name='wmt13_es-en', + train_urls=[ + 'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz', + 'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz', + 'http://www.statmt.org/wmt13/training-parallel-un.tgz', + 'http://www.statmt.org/wmt13/training-parallel-nc-v8.tgz', + ], + valid_urls=[ + ('http://www.statmt.org/wmt13/dev.tgz', 'wmt13_dev.tgz') + ], + test_urls=[ + ('http://www.statmt.org/wmt13/test.tgz', 'wmt13_test.tgz') + ], + train_files_patterns=[ + ('*/europarl-v7.{src}-{tgt}.{lang}', ['es-en']), + ('*commoncrawl.{src}-{tgt}.{lang}', ['es-en']), + ('*/news-commentary-v8.{src}-{tgt}.{lang}', ['es-en']), + ('un/*undoc.2000.{src}-{tgt}.{lang}', ['es-en']), + ] , + valid_files_patterns=[ + ('dev/newstest2012.{lang}', ['es-en']) + ], + test_files_patterns=[ + ('test/newstest*.{lang}', ['es-en']) + ], +) + +wmt14_de_fr_en = DLDataset( + name='wmt14_de_fr_en', + train_urls=[ + 'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz', + 'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz', + 'http://www.statmt.org/wmt13/training-parallel-un.tgz', + 'http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz', + ('http://www.statmt.org/wmt10/training-giga-fren.tar', 'training-giga-fren.gz.tar'), #it is actuall a gz.tar + ], + valid_urls=[ + ('http://www.statmt.org/wmt14/dev.tgz', 'wmt14_dev.tgz'), + ], + test_urls=[ + ('http://www.statmt.org/wmt14/test-full.tgz', 'wmt14_test_full.tgz'), # cleaned test sets + ], + train_files_patterns=[ + ('*/europarl-v7.{src}-{tgt}.{lang}', ['fr-en', 'de-en']), + ('*commoncrawl.{src}-{tgt}.{lang}', ['fr-en', 'de-en']), + ('*/*news-commentary-v9.{src}-{tgt}.{lang}', ['fr-en', 'de-en']), + ('un/undoc.2000.{src}-{tgt}.{lang}', ['fr-en']), + ('*giga-{src}{tgt}*{lang}', ['fr-en']) + ], + valid_files_patterns=[ + ('dev/newstest2013.{lang}', ['fr-en', 'de-en']) + ], + test_files_patterns=[ + ('test-full/newstest*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['en-de', 'de-en', 'fr-en', 'en-fr']), + ], +) + +# pip install git+https://github.com/amake/tmx2corpus.git +wmt16_ro_en = DLDataset( + name='wmt16_ro-en', + train_urls=[ + ('http://data.statmt.org/wmt16/translation-task/training-parallel-ep-v8.tgz', 'wmt16_training-parallel-ep-v8.tgz'), + ('http://opus.nlpl.eu/download.php?f=SETIMES/v2/tmx/en-ro.tmx.gz', 'en-ro.tmx.gz'), + ], + valid_urls=[ + ('http://data.statmt.org/wmt16/translation-task/dev-romanian-updated.tgz', 'wmt16_dev.tgz') + ], + test_urls=[ + ('http://data.statmt.org/wmt16/translation-task/test.tgz', 'wmt16_test.tgz') + ], + train_files_patterns=[ + ('*/*europarl-v8.{src}-{tgt}.{lang}', ['ro-en']), + ('bitext.{lang}', ['ro-en']) #setimes from tmux + ] , + valid_files_patterns=[ + ('dev/newsdev2016*{src}{tgt}*.{lang}', ['ro-en', 'ro-en']) + ], + test_files_patterns=[ + ('test/newstest*{src}{tgt}*.{lang}', ['ro-en', 'en-ro']) + ], +) + +cwmt_wmt_instruction = 'cwmt download instruction at: http://nlp.nju.edu.cn/cwmt-wmt' +wmt17_fi_lv_tr_zh_en_manual_downloads = [ + # fake urls to have unique keys for the data + ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASIA2015.zip', 'CASIA2015.zip'), cwmt_wmt_instruction), + ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2011.zip', 'CASICT2011.zip'), cwmt_wmt_instruction), + ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2015.zip', 'CASICT2015.zip'), cwmt_wmt_instruction), + ( ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2015.zip', 'Datum2015.zip'), cwmt_wmt_instruction), + ( ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2017.zip', 'Datum2017.zip'), cwmt_wmt_instruction), + ( ('http://nlp.nju.edu.cn/cwmt-wmt/NEU2017.zip', 'NEU2017.zip'), cwmt_wmt_instruction), +] +wmt17_fi_lv_tr_zh_en = DLDataset( + name='wmt17_fi_lv_tr_zh_en', + train_urls=[ + ('http://data.statmt.org/wmt17/translation-task/training-parallel-ep-v8.tgz', 'wmt17_training-parallel-ep-v8.tgz'), + 'http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz', + 'http://www.statmt.org/wmt15/wiki-titles.tgz', + ('http://opus.nlpl.eu/download.php?f=SETIMES/v2/tmx/en-tr.tmx.gz', 'en-tr.tmx.gz'), + ('http://data.statmt.org/wmt17/translation-task/rapid2016.tgz', 'wmt17_rapid2016.tgz'), + 'http://data.statmt.org/wmt17/translation-task/leta.v1.tgz', + 'http://data.statmt.org/wmt17/translation-task/dcep.lv-en.v1.tgz', + 'http://data.statmt.org/wmt17/translation-task/books.lv-en.v1.tgz', + (('https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.00', + 'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.01',), 'UNv1.0.en-zh.tar.gz'), + #manually download files: + ('http://nlp.nju.edu.cn/cwmt-wmt/CASIA2015.zip', 'CASIA2015.zip'), + ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2011.zip', 'CASICT2011.zip'), + ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2015.zip', 'CASICT2015.zip'), + ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2015.zip', 'Datum2015.zip'), + ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2017.zip', 'Datum2017.zip'), + ('http://nlp.nju.edu.cn/cwmt-wmt/NEU2017.zip', 'NEU2017.zip'), + ], + valid_urls=[ + ('http://data.statmt.org/wmt17/translation-task/dev.tgz', 'wmt17_dev.tgz'), + ], + test_urls=[ + #NEW: Improved translations for zh test sets + ('http://data.statmt.org/wmt17/translation-task/test-update-1.tgz', 'wmt17_test_zh_en.tgz'), + ('http://data.statmt.org/wmt17/translation-task/test.tgz', 'wmt17_test_others.tgz') + ], + train_files_patterns=[ + ('casict*/cas*{src:ch}{tgt:en}.txt', ['zh-en', 'zh-en'] ), + ('casia*/cas*{src:ch}{tgt:en}.txt', ['zh-en', 'zh-en'] ), + ('dataum*/Book*{src:cn}{tgt:en}.txt', ['zh-en', 'zh-en']), + ('neu*/NEU*{src:cn}{tgt:en}.txt', ['zh-en', 'zh-en'] ), + ('*/*UNv1.0.en-zh.{src:zh}{tgt:en}', ['zh-en']), + ('training/*news-commentary-v12.{src}-{tgt}.{lang}', ['zh-en', ]), + + ('*/*europarl-v8.{src}-{tgt}.{lang}', ['fi-en', 'lv-en']), + ('wiki/fi-en/titles.{src}-{tgt}.{lang}', ['fi-en', ]), + ('rapid2016.{tgt}-{src}.{lang}', ['fi-en', 'lv-en']), + ('*/leta.{lang}', ['lv-en']), + ('*/dcep.{lang}', ['lv-en']), + ('*/farewell.{lang}', ['lv-en']), + ('bitext.{lang}', ['tr-en']), + ] , + valid_files_patterns=[ + ('dev/newsdev2017*{src}{tgt}-{src:src}{tgt:ref}.{lang}', + [ + 'fi-en', 'lv-en', 'tr-en', 'zh-en', + 'en-fi', 'en-lv', 'en-tr', 'en-zh' + ]), + ('dev/newstest2016*{src}{tgt}-{src:src}{tgt:ref}.{lang}', + [ + 'fi-en', 'tr-en', + 'en-fi', 'en-tr', + ]), + ], + test_files_patterns=[ + ('test/newstest2017-{src}{tgt}-{src:src}{tgt:ref}.{lang}', + [ + 'fi-en', 'lv-en', 'tr-en', + 'en-fi', 'en-lv', 'en-tr', + ]), + ('newstest2017-{src}{tgt}-{src:src}{tgt:ref}.{lang}', + [ + 'zh-en', + 'en-zh' + ]), + ], +) + +czeng_instruction = 'download instruction at: http://ufal.mff.cuni.cz/czeng/czeng16' +#alternative: use the prepared data but detokenize it? +wmt18_cs_et_en_manual_downloads = [ +#for cs, need to register and download; Register and download CzEng 1.6. +#Better results can be obtained by using a subset of sentences, released under a new version name CzEng 1.7. + # ((f'http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar', + # f'data-plaintext-format.{i}.tar'), czeng_instruction) + # for i in range(10) +] + +wmt18_cs_et_en = DLDataset( + name='wmt18_cs_et_en', + train_urls=[ + 'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz', + 'http://data.statmt.org/wmt18/translation-task/training-parallel-ep-v8.tgz', + 'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-cs.zipporah0-dedup-clean.tgz', + 'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-et.zipporah0-dedup-clean.tgz', + 'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz', + 'http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz', + ('http://data.statmt.org/wmt18/translation-task/rapid2016.tgz', 'wmt18_rapid2016.tgz'), + # (tuple( + # (f'http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar', + # f'data-plaintext-format.{i}.tar') + # for i in range(10) + # ), + # 'czeng16_data_plaintext.gz.tar'), + ], + valid_urls=[ + ('http://data.statmt.org/wmt18/translation-task/dev.tgz', 'wmt18_dev.tgz'), + ], + test_urls=[ + ('http://data.statmt.org/wmt18/translation-task/test.tgz', 'wmt18_test.tgz'), + ], + train_files_patterns=[ + # ('*/*europarl-v7.{src}-{tgt}.{lang}', ['cs-en']), + ('*/*europarl-v8.{src}-{tgt}.{lang}', ['et-en']), + # ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['cs-en', 'et-en']), + ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['et-en']), + # ('*commoncrawl.{src}-{tgt}.{lang}', ['cs-en']), + # ('*/news-commentary-v13.{src}-{tgt}.{lang}', ['cs-en']), + # ('data.plaintext-format/*train.{lang}', ['cs-en']), + ('rapid2016.{tgt}-{src}.{lang}', ['et-en']), + ] , + valid_files_patterns=[ + ('dev/newsdev2018*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['et-en']), + # ('dev/newstest2017*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['cs-en']) + ], + test_files_patterns=[ + ('test/newstest2018-{src}{tgt}-{src:src}{tgt:ref}.{lang}', + # ['cs-en', 'et-en']), + ['et-en']), + ] +) + +ru_en_yandex_instruction = 'Yandex Corpus download instruction at: https://translate.yandex.ru/corpus?lang=en' +wmt19_ru_gu_kk_lt_manual_downloads = [ + (('https://translate.yandex.ru/corpus?lang=en', 'wmt19_1mcorpus.zip'), ru_en_yandex_instruction) +] +wmt19_ru_gu_kk_lt = DLDataset( + name='wmt19_ru_gu_kk_lt', + train_urls=[ + 'http://www.statmt.org/europarl/v9/training/europarl-v9.lt-en.tsv.gz', + 'https://s3.amazonaws.com/web-language-models/paracrawl/release3/en-lt.bicleaner07.tmx.gz', + 'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz', + 'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz', + 'http://data.statmt.org/news-commentary/v14/training/news-commentary-v14-wmt19.en-kk.tsv.gz', + 'http://data.statmt.org/news-commentary/v14/training/news-commentary-v14.en-ru.tsv.gz', + 'http://data.statmt.org/wikititles/v1/wikititles-v1.kk-en.tsv.gz', + 'http://data.statmt.org/wikititles/v1/wikititles-v1.ru-en.tsv.gz', + 'http://data.statmt.org/wikititles/v1/wikititles-v1.kk-en.tsv.gz', + 'http://data.statmt.org/wikititles/v1/wikititles-v1.lt-en.tsv.gz', + 'http://data.statmt.org/wikititles/v1/wikititles-v1.gu-en.tsv.gz', + (('https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.00', + 'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.01', + 'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.02',), + 'wmt19_UNv1.0.en-ru.tar.gz'), + 'https://tilde-model.s3-eu-west-1.amazonaws.com/rapid2016.en-lt.tmx.zip', + ('https://translate.yandex.ru/corpus?lang=en', 'wmt19_1mcorpus.zip'), + ], + valid_urls=[ + ('http://data.statmt.org/wmt19/translation-task/dev.tgz', 'wmt19_dev.tgz'), + ], + test_urls=[ + ('http://data.statmt.org/wmt19/translation-task/test.tgz', 'wmt19_test.tgz'), + ], + train_files_patterns=[ + ('*europarl-v9.{src}-{tgt}.tsv.{lang}', ['lt-en']), + #paracrawl + ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['ru-en']), + ('bitext.{lang}', ['lt-en',]), + ('*commoncrawl.{src}-{tgt}.{lang}', ['ru-en',]), + ('*news-commentary-v14-wmt19.{tgt}-{src}.tsv.{lang}', ['kk-en', ]), + ('*news-commentary-v14.{tgt}-{src}.tsv.{lang}', ['ru-en']), + #yandex + ('corpus.{tgt}_{src}.1m.{lang}', ['ru-en']), + ('wikititles_v1_wikititles-v1.{src}-{tgt}.tsv.{lang}', ['ru-en', 'kk-en', 'lt-en', 'gu-en']), + ('*/UNv1.0.{tgt}-{src}.{lang}', ['ru-en']), + #rapid + ('bitext.{lang}', ['lt-en']) + ], + valid_files_patterns=[ + ('dev/newsdev2019*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['gu-en', 'kk-en', 'lt-en']), + ('dev/newstest2018*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['ru-en']), + ], + test_files_patterns=[ + ('sgm/newstest2019-{src}{tgt}-{src:src}{tgt:ref}.{lang}', + ['ru-en', 'gu-en', 'kk-en', 'lt-en', 'en-ru', 'en-gu', 'en-kk', 'en-lt']), + ] +) + + +######### + +if __name__ == "__main__": + # speed up the downloads with multiple processing + dl_folder = f'{to_data_path}/downloads' + extract_folder = f'{to_data_path}/extracted' + + urls = [ + url + for dataset in [wmt13_es_en, wmt14_de_fr_en, wmt16_ro_en, wmt18_cs_et_en, wmt19_ru_gu_kk_lt] + for urls in [dataset.train_urls, dataset.valid_urls, dataset.test_urls] + for url in urls + ] + urls = set(urls) + download_multi(dl_folder, extract_folder, urls, num_processes=8, debug=True) + + # check manually downlaods + to_manually_download_urls = ( + wmt17_fi_lv_tr_zh_en_manual_downloads + wmt18_cs_et_en_manual_downloads + wmt19_ru_gu_kk_lt_manual_downloads + ) + to_be_manually_dowloaded = check_need_manual_downalod(dl_folder, to_manually_download_urls) + if len(to_be_manually_dowloaded) > 0: + print('Missing files that need to be downloaded manually; stop the process now.') + exit(-1) + + completed_urls = {} + completed_extraction = {} + def work_on_wmt(directions, wmt_data): + download_and_extract( + to_data_path, + directions, + wmt_data, + to_manually_download_urls=to_manually_download_urls, + completed_urls=completed_urls, completed_extraction=completed_extraction, debug=True) + + work_on_wmt( + ['es_XX-en_XX'], + wmt13_es_en,) + work_on_wmt( + [ + 'fr_XX-en_XX', 'en_XX-fr_XX', + # 'en_XX-de_DE', 'de_DE-en_XX', + ], + wmt14_de_fr_en,) + work_on_wmt( + ['ro_RO-en_XX', 'en_XX-ro_XX'], + wmt16_ro_en,) + work_on_wmt( + [ + # 'zh_CN-en_XX', + 'lv_LV-en_XX', 'fi_FI-en_XX', 'tr_TR-en_XX', + #in case the reversed directions have different train/valid/test data + # 'en_XX-zh_CN', + 'en_XX-lv_LV', 'en_XX-fi_FI', 'en_XX-tr_TR', + ], + wmt17_fi_lv_tr_zh_en, ) + # czeng17_script_path = download_czeng17_script(download_to, extract_to, debug=False) + # cz_username = None + work_on_wmt( + [ + # 'cs_CZ-en_XX', + 'et_EE-en_XX'], + wmt18_cs_et_en,) + work_on_wmt( + [ + # 'ru_RU-en_XX', 'en_XX-ru_RU', + 'gu_IN-en_XX', 'kk_KZ-en_XX', 'lt_LT-en_XX', + #in case the reversed directions have different train/valid/test data + 'en_XX-gu_IN', 'en_XX-kk_KZ', 'en_XX-lt_LT' + ], + wmt19_ru_gu_kk_lt,) + + not_matching = check_wmt_test_bleu( + f'{to_data_path}/raw', + [ + ('wmt13', ['es_XX-en_XX']), + ('wmt14/full', ['fr_XX-en_XX',]), + ('wmt16', ['ro_RO-en_XX',]), + # ('wmt17/improved', ['zh_CN-en_XX']), + ('wmt17', [ 'lv_LV-en_XX', 'fi_FI-en_XX', 'tr_TR-en_XX']), + ('wmt18', ['cs_CZ-en_XX', 'et_EE-en_XX']), + ('wmt19', ['gu_IN-en_XX', 'kk_KZ-en_XX', 'lt_LT-en_XX']), + #'ru_RU-en_XX', + ] + ) + if len(not_matching) > 0: + print('the following datasets do not have matching test datasets:\n\t', '\n\t'.join(not_matching)) + diff --git a/fairseq/examples/multilingual/data_scripts/download_wmt20.sh b/fairseq/examples/multilingual/data_scripts/download_wmt20.sh new file mode 100644 index 0000000..31cd5c7 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/download_wmt20.sh @@ -0,0 +1,547 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + + + +set -x -e + +# TODO update the workdir and dest dir name +# put fasttext model +WORKDIR=$WORKDIR_ROOT +# put intermediate files +TMP_DIR=$WORKDIR_ROOT/tmp/tmp_wmt20_lowres_download +# output {train,valid,test} files to dest +DEST=$WORKDIR_ROOT/ML50/raw + +UTILS=$PWD/utils + +# per dataset locations +COMMONCRAWL_DIR=$TMP_DIR/commoncrawl +YANDEX_CORPUS=$WORKDIR_ROOT/wmt20/official/ru/yandex/1mcorpus.zip +# unzipped +CZENG_CORPUS=$WORKDIR_ROOT/wmt20/official/cs/czeng/czeng20-train +CCMT_DIR=$WORKDIR_ROOT/wmt20/official/zh/ccmt/parallel + +download_and_select() { + SUBFOLDER=$1 + URL=$2 + UNCOMPRESS_CMD=$3 + LANG=$4 + INPUT_FILEPATH=$5 + if [[ $# -gt 5 ]]; then + LANG_COL=$6 + EN_COL=$7 + fi + + mkdir -p $SUBFOLDER + cd $SUBFOLDER + wget -nc --content-disposition $URL + $UNCOMPRESS_CMD + + if [[ $# -gt 5 ]]; then + cut -f$LANG_COL $INPUT_FILEPATH > $INPUT_FILEPATH.$LANG + cut -f$EN_COL $INPUT_FILEPATH > $INPUT_FILEPATH.en + fi + cd .. + + ln -sf $SUBFOLDER/$INPUT_FILEPATH.$LANG $SUBFOLDER.$LANG + ln -sf $SUBFOLDER/$INPUT_FILEPATH.en $SUBFOLDER.en +} + +prepare_lid() { + pip install fasttext + + # TODO specify global workdir + MODEL=$WORKDIR/fasttext/lid.176.bin + LID_MULTI=$UTILS/fasttext_multi_filter.py + + if [ ! -f "$MODEL" ]; then + echo "downloading fasttext lid model..." + mkdir -p $WORKDIR/fasttext + wget -nc https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin -O $MODEL + fi +} + +prepare_moses() { + pushd $UTILS + echo 'Cloning Moses github repository (for tokenization scripts)...' + git clone https://github.com/moses-smt/mosesdecoder.git + popd +} + +lid_filter() { + # TODO specify global workdir + MODEL=$WORKDIR/fasttext/lid.176.bin + LID_MULTI=$UTILS/fasttext_multi_filter.py + + prepare_lid + + SRC=$1 + SRC_FILE=$2 + SRC_OUTPUT=$3 + TGT=$4 + TGT_FILE=$5 + TGT_OUTPUT=$6 + python $LID_MULTI --model $MODEL --inputs $SRC_FILE $TGT_FILE --langs $SRC $TGT --outputs $SRC_OUTPUT $TGT_OUTPUT +} + +prepare_ja_ted() { + mkdir -p ted + cd ted + + wget -nc https://wit3.fbk.eu/archive/2017-01-trnted//texts/en/ja/en-ja.tgz + tar -zxvf en-ja.tgz + cat en-ja/train.tags.en-ja.en | grep -v -P "^[ ]*\<" | sed 's/^[ \t]*//g' | sed 's/[ \t]*$//g' > en-ja/train.en-ja.en + cat en-ja/train.tags.en-ja.ja | grep -v -P "^[ ]*\<" | sed 's/^[ \t]*//g' | sed 's/[ \t]*$//g' > en-ja/train.en-ja.ja + + cd .. + ln -sf ted/en-ja/train.en-ja.ja ted.ja + ln -sf ted/en-ja/train.en-ja.en ted.en +} + +prepare_ja() { + OUTPUT_DIR=$TMP_DIR/ja + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select paracrawl "http://www.kecl.ntt.co.jp/icl/lirg/jparacrawl/release/2.0/bitext/en-ja.tar.gz" "tar -zxvf en-ja.tar.gz" ja en-ja/en-ja.bicleaner05.txt 4 3 & + download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-ja.tsv.gz" "gunzip -f news-commentary-v15.en-ja.tsv.gz" ja news-commentary-v15.en-ja.tsv 2 1 & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ja-en.tsv.gz" "gunzip -f wikititles-v2.ja-en.tsv.gz" ja wikititles-v2.ja-en.tsv 1 2 & + download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ja.langid.tsv.gz" "gunzip -f WikiMatrix.v1.en-ja.langid.tsv.gz" ja WikiMatrix.v1.en-ja.langid.tsv 3 2 & + download_and_select subtitle "https://nlp.stanford.edu/projects/jesc/data/split.tar.gz" "tar -zxvf split.tar.gz" ja split/train 2 1 & + download_and_select kftt "http://www.phontron.com/kftt/download/kftt-data-1.0.tar.gz" "tar -zxvf kftt-data-1.0.tar.gz" ja kftt-data-1.0/data/orig/kyoto-train & + + prepare_ja_ted & + + # ted data needs to + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.ja" | sort -V | xargs cat > all.ja + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter ja all.ja $DEST/train.ja_XX-en_XX.ja_XX en all.en $DEST/train.ja_XX-en_XX.en_XX +} + +prepare_ta() { + OUTPUT_DIR=$TMP_DIR/ta + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ta-en.tsv.gz" "gunzip -f wikititles-v2.ta-en.tsv.gz" ta wikititles-v2.ta-en.tsv 1 2 & + download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ta.langid.tsv.gz" "gunzip -f WikiMatrix.v1.en-ta.langid.tsv.gz" ta WikiMatrix.v1.en-ta.langid.tsv 3 2 & + download_and_select pmindia "http://data.statmt.org/pmindia/v1/parallel/pmindia.v1.ta-en.tsv" "" ta pmindia.v1.ta-en.tsv 2 1 & + download_and_select tanzil "https://object.pouta.csc.fi/OPUS-Tanzil/v1/moses/en-ta.txt.zip" "unzip en-ta.txt.zip" ta Tanzil.en-ta & + download_and_select pib "http://preon.iiit.ac.in/~jerin/resources/datasets/pib-v0.tar" "tar -xvf pib-v0.tar" ta pib/en-ta/train & + download_and_select mkb "http://preon.iiit.ac.in/~jerin/resources/datasets/mkb-v0.tar" "tar -xvf mkb-v0.tar" ta mkb/en-ta/mkb & + download_and_select ufal "http://ufal.mff.cuni.cz/~ramasamy/parallel/data/v2/en-ta-parallel-v2.tar.gz" "tar -zxvf en-ta-parallel-v2.tar.gz" ta en-ta-parallel-v2/corpus.bcn.train & + + wait + + # need special handling for nlpc + mkdir -p nlpc + cd nlpc + wget -nc https://raw.githubusercontent.com/nlpc-uom/English-Tamil-Parallel-Corpus/master/En-Ta%20Corpus/En-Ta%20English.txt + wget -nc https://github.com/nlpc-uom/English-Tamil-Parallel-Corpus/raw/master/En-Ta%20Corpus/En-Ta%20Tamil.txt + tail -n +4 "En-Ta English.txt" > en-ta.en + tail -n +4 "En-Ta Tamil.txt" > en-ta.ta + cd .. + ln -sf nlpc/en-ta.en nlpc.en + ln -sf nlpc/en-ta.ta nlpc.ta + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.ta" | sort -V | xargs cat > all.ta + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter ta all.ta $DEST/train.ta_IN-en_XX.ta_IN en all.en $DEST/train.ta_IN-en_XX.en_XX +} + +prepare_iu() { + OUTPUT_DIR=$TMP_DIR/iu + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select nh "https://nrc-digital-repository.canada.ca/eng/view/dataset/?id=c7e34fa7-7629-43c2-bd6d-19b32bf64f60" "tar -zxvf Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0.1.tgz" iu Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0/NunavutHansard > /dev/null & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.iu-en.tsv.gz" "gunzip -f wikititles-v2.iu-en.tsv.gz" iu wikititles-v2.iu-en.tsv 1 2 & + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.iu" | sort -V | xargs cat | nh/Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0/scripts/normalize-iu-spelling.pl > all.iu + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + paste all.iu all.en | awk -F $'\t' '$1!=""&&$2!=""' > all.iuen + cut -f1 all.iuen > $DEST/train.iu_CA-en_XX.iu_CA + cut -f2 all.iuen > $DEST/train.iu_CA-en_XX.en_XX +} + +prepare_km() { + OUTPUT_DIR=$TMP_DIR/km + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select paracrawl "http://data.statmt.org/wmt20/translation-task/ps-km/wmt20-sent.en-km.xz" "unxz wmt20-sent.en-km.zx" km wmt20-sent.en-km 2 1 & + + # km-parallel has multiple sets, concat all of them together + mkdir -p opus + cd opus + wget -nc "http://data.statmt.org/wmt20/translation-task/ps-km/km-parallel.tgz" + tar -zxvf km-parallel.tgz + find ./km-parallel -maxdepth 1 -name "*.km" | sort -V | xargs cat > opus.km + find ./km-parallel -maxdepth 1 -name "*.en" | sort -V | xargs cat > opus.en + cd .. + ln -sf opus/opus.km . + ln -sf opus/opus.en . + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.km" | sort -V | xargs cat > all.km + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter km all.km $DEST/train.km_KH-en_XX.km_KH en all.en $DEST/train.km_KH-en_XX.en_XX +} + +prepare_ps() { + OUTPUT_DIR=$TMP_DIR/ps + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select paracrawl "http://data.statmt.org/wmt20/translation-task/ps-km/wmt20-sent.en-ps.xz" "unxz wmt20-sent.en-ps.xz" ps wmt20-sent.en-ps 2 1 & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ps-en.tsv.gz" "gunzip -f wikititles-v2.ps-en.tsv.gz" ps wikititles-v2.ps-en.tsv 1 2 & + # ps-parallel has multiple sets, concat all of them together + mkdir -p opus + cd opus + wget -nc "http://data.statmt.org/wmt20/translation-task/ps-km/ps-parallel.tgz" + tar -zxvf ps-parallel.tgz + find ./ps-parallel -maxdepth 1 -name "*.ps" | sort -V | xargs cat > opus.ps + find ./ps-parallel -maxdepth 1 -name "*.en" | sort -V | xargs cat > opus.en + cd .. + ln -sf opus/opus.ps opus.ps + ln -sf opus/opus.en opus.en + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.ps" | sort -V | xargs cat > all.ps + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter ps all.ps $DEST/train.ps_AF-en_XX.ps_AF en all.en $DEST/train.ps_AF-en_XX.en_XX +} + +download_commoncrawl() { + mkdir -p $COMMONCRAWL_DIR + cd $COMMONCRAWL_DIR + + wget -nc "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz" + tar -zxvf training-parallel-commoncrawl.tgz +} +link_commoncrawl() { + LANG=$1 + ln -sf $COMMONCRAWL_DIR/commoncrawl.$LANG-en.en commoncrawl.en + ln -sf $COMMONCRAWL_DIR/commoncrawl.$LANG-en.$LANG commoncrawl.$LANG +} + +strip_xlf() { + INPUT_FILE=$1 + SRC=$2 + TGT=$3 + grep '<source xml:lang=' $INPUT_FILE | sed 's/^<[^<>]*>//g' | sed 's/<[^<>]*>$//g' > $INPUT_FILE.$SRC + grep '<target xml:lang=' $INPUT_FILE | sed 's/^<[^<>]*>//g' | sed 's/<[^<>]*>$//g' > $INPUT_FILE.$TGT +} + +download_and_process_tilde() { + URL=$1 + UNCOMPRESS_CMD=$2 + FILENAME=$3 + LANG=$4 + PROCESS_CMD=$5 + + mkdir -p tilde + cd tilde + wget -nc $URL + $UNCOMPRESS_CMD + echo "executing cmd" + echo $PROCESS_CMD + $PROCESS_CMD + cd .. + ln -sf tilde/$FILENAME.$LANG tilde.$LANG + ln -sf tilde/$FILENAME.en tilde.en +} + +prepare_cs() { + OUTPUT_DIR=$TMP_DIR/cs + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + #download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.cs-en.tsv.gz" "gunzip europarl-v10.cs-en.tsv.gz" cs europarl-v10.cs-en.tsv 1 2 & + #download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-cs.txt.gz" "gunzip en-cs.txt.gz" cs en-cs.txt 2 1 & + #link_commoncrawl cs + #download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.cs-en.tsv.gz" "gunzip news-commentary-v15.cs-en.tsv.gz" cs news-commentary-v15.cs-en.tsv 1 2 & + #download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.cs-en.tsv.gz" "gunzip wikititles-v2.cs-en.tsv.gz" cs wikititles-v2.cs-en.tsv 1 2 & + #download_and_process_tilde "http://data.statmt.org/wmt20/translation-task/rapid/RAPID_2019.cs-en.xlf.gz" "gunzip RAPID_2019.cs-en.xlf.gz" RAPID_2019.cs-en.xlf cs "strip_xlf RAPID_2019.cs-en.xlf cs en" & + #download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.cs-en.langid.tsv.gz" "gunzip WikiMatrix.v1.cs-en.langid.tsv.gz" cs WikiMatrix.v1.cs-en.langid.tsv 2 3 & + + #wait + + # remove previous results + #rm -f all.?? + #find ./ -maxdepth 1 -name "*.cs" | sort -V | xargs cat > all.cs + #find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + if [ -z $CZENG_CORPUS ] ; + then + echo "Please download CZENG_CORPUS manually and place them at $CZENG_CORPUS. Exitting..." + exit + fi + cat $CZENG_CORPUS | sed '/^$/d' | cut -f5 > all.cs + cat $CZENG_CORPUS | sed '/^$/d' | cut -f6 > all.en + + lid_filter cs all.cs $DEST/train.cs_CZ-en_XX.cs_CZ en all.en $DEST/train.cs_CZ-en_XX.en_XX +} + +prepare_de() { + OUTPUT_DIR=$TMP_DIR/de + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.de-en.tsv.gz" "gunzip europarl-v10.de-en.tsv.gz" de europarl-v10.de-en.tsv 1 2 & + download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-de.txt.gz" "gunzip en-de.txt.gz" de en-de.txt 2 1 & + link_commoncrawl de + download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.de-en.tsv.gz" "gunzip news-commentary-v15.de-en.tsv.gz" de news-commentary-v15.de-en.tsv 1 2 & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.de-en.tsv.gz" "gunzip wikititles-v2.de-en.tsv.gz" de wikititles-v2.de-en.tsv 1 2 & + download_and_process_tilde "http://data.statmt.org/wmt20/translation-task/rapid/RAPID_2019.de-en.xlf.gz" "gunzip RAPID_2019.de-en.xlf.gz" RAPID_2019.de-en.xlf de "strip_xlf RAPID_2019.de-en.xlf de en" & + download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.de-en.langid.tsv.gz" "gunzip WikiMatrix.v1.de-en.langid.tsv.gz" de WikiMatrix.v1.de-en.langid.tsv 2 3 & + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.de" | sort -V | xargs cat > all.de + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter de all.de $DEST/train.de_DE-en_XX.de_DE en all.en $DEST/train.de_DE-en_XX.en_XX +} + +prepare_tmx() { + TMX_FILE=$1 + git clone https://github.com/amake/TMX2Corpus $UTILS/tmx2corpus + pip install tinysegmenter + + python $UTILS/tmx2corpus/tmx2corpus.py $TMX_FILE +} + +prepare_pl() { + OUTPUT_DIR=$TMP_DIR/pl + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + # download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.pl-en.tsv.gz" "gunzip europarl-v10.pl-en.tsv.gz" pl europarl-v10.pl-en.tsv 1 2 & + # download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-pl.txt.gz" "gunzip en-pl.txt.gz" pl en-pl.txt 2 1 & + # download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.pl-en.tsv.gz" "gunzip wikititles-v2.pl-en.tsv.gz" pl wikititles-v2.pl-en.tsv 1 2 & + download_and_select tilde "https://tilde-model.s3-eu-west-1.amazonaws.com/rapid2019.en-pl.tmx.zip" "gunzip rapid2019.en-pl.tmx.zip" bitext pl "prepare_tmx RAPID_2019.UNIQUE.en-pl.tmx" & + # download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-pl.langid.tsv.gz" "gunzip WikiMatrix.v1.en-pl.langid.tsv.gz" pl WikiMatrix.v1.en-pl.langid.tsv 3 2 & + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.pl" | sort -V | xargs cat > all.pl + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter pl all.pl $DEST/train.pl_PL-en_XX.pl_PL en all.en $DEST/train.pl_PL-en_XX.en_XX +} + +prepare_uncorpus() { + $URLS=$1 + $FILES=$2 + + mkdir -p uncorpus + cd uncorpus + + for URL in $URLS; do + wget -nc $URL + done + cat $FILES > uncorpus.tar.gz + tar -zxvf uncorpus.tar.gz + + cd .. + ln -sf uncorpus/en-$LANG/UNv1.0.en-$LANG.$LANG uncorpus.$LANG + ln -sf uncorpus/en-$LANG/UNv1.0.en-$LANG.en uncorpus.en +} + +prepare_yandex() { + mkdir -p yandex + cd yandex + unzip $YANDEX_CORPUS ./ + cd .. + ln -s yandex/corpus.en_ru.1m.en yandex.en + ln -s yandex/corpus.en_ru.1m.ru yandex.ru +} + +prepare_ru() { + OUTPUT_DIR=$TMP_DIR/ru + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz" "tar -zxvf paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz" ru paracrawl-release1.en-ru.zipporah0-dedup-clean & + link_commoncrawl ru + download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-ru.tsv.gz" "gunzip news-commentary-v15.en-ru.tsv.gz" ru news-commentary-v15.en-ru.tsv 2 1 & + prepare_yandex & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ru-en.tsv.gz" "gunzip wikititles-v2.ru-en.tsv.gz" ru wikititles-v2.ru-en.tsv 1 2 & + prepare_uncorpus "https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.00 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.01 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.02" "UNv1.0.en-ru.tar.gz.00 UNv1.0.en-ru.tar.gz.01 UNv1.0.en-ru.tar.gz.02" & + download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ru.langid.tsv.gz" "gunzip WikiMatrix.v1.en-ru.langid.tsv.gz" ru WikiMatrix.v1.en-ru.langid.tsv 3 2 & + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.ru" | sort -V | xargs cat > all.ru + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter ru all.ru $DEST/train.ru_RU-en_XX.ru_RU en all.en $DEST/train.ru_RU-en_XX.en_XX +} + +prepare_ccmt() { + mkdir -p ccmt + cd ccmt + # assume ccmt data is already unzipped under CCMT_DIR folder + cat $CCMT_DIR/datum2017/Book*_cn.txt | sed 's/ //g' > datum2017.detok.zh + cat $CCMT_DIR/datum2017/Book*_en.txt > datum2017.detok.en + cat $CCMT_DIR/casict2011/casict-A_ch.txt $CCMT_DIR/casict2011/casict-B_ch.txt $CCMT_DIR/casict2015/casict2015_ch.txt $CCMT_DIR/datum2015/datum_ch.txt $CCMT_DIR/neu2017/NEU_cn.txt datum2017.detok.zh > ccmt.zh + cat $CCMT_DIR/casict2011/casict-A_en.txt $CCMT_DIR/casict2011/casict-B_en.txt $CCMT_DIR/casict2015/casict2015_en.txt $CCMT_DIR/datum2015/datum_en.txt $CCMT_DIR/neu2017/NEU_en.txt datum2017.detok.en > ccmt.en + cd .. + ln -sf ccmt/ccmt.zh ccmt.zh + ln -sf ccmt/ccmt.en ccmt.en +} + +prepare_zh() { + OUTPUT_DIR=$TMP_DIR/zh + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + + download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-zh.tsv.gz" "gunzip news-commentary-v15.en-zh.tsv.gz" zh news-commentary-v15.en-zh.tsv 2 1 & + download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.zh-en.tsv.gz" "gunzip wikititles-v2.zh-en.tsv.gz" zh wikititles-v2.zh-en.tsv 1 2 & + prepare_uncorpus "https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.00 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.01" "UNv1.0.en-zh.tar.gz.00 UNv1.0.en-zh.tar.gz.01" & + prepare_ccmt & + download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-zh.langid.tsv.gz" "gunzip WikiMatrix.v1.en-zh.langid.tsv.gz" zh WikiMatrix.v1.en-zh.langid.tsv 3 2 & + + wait + + # remove previous results + rm -f all.?? + find ./ -maxdepth 1 -name "*.zh" | sort -V | xargs cat > all.zh + find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en + lid_filter zh all.zh $DEST/train.zh_CN-en_XX.zh_CN en all.en $DEST/train.zh_CN-en_XX.en_XX +} + +prepare_tests() { + OUTPUT_DIR=$TMP_DIR + mkdir -p $OUTPUT_DIR + cd $OUTPUT_DIR + wget -nc http://data.statmt.org/wmt20/translation-task/dev.tgz + tar -zxvf dev.tgz + cd dev + + cat newsdev2020-jaen-src.ja.sgm | $UTILS/strip_sgm.sh > newsdev2020-jaen.ja + cat newsdev2020-jaen-ref.en.sgm | $UTILS/strip_sgm.sh > newsdev2020-jaen.en + split newsdev2020-jaen.ja -a 0 -n r/1/2 > $DEST/valid.ja_XX-en_XX.ja_XX + split newsdev2020-jaen.en -a 0 -n r/1/2 > $DEST/valid.ja_XX-en_XX.en_XX + split newsdev2020-jaen.ja -a 0 -n r/2/2 > $DEST/test.ja_XX-en_XX.ja_XX + split newsdev2020-jaen.en -a 0 -n r/2/2 > $DEST/test.ja_XX-en_XX.en_XX + + cat newsdev2020-iuen-src.iu.sgm | strip_sgm.sh > newsdev2020-iuen.iu + cat newsdev2020-iuen-ref.en.sgm | strip_sgm.sh > newsdev2020-iuen.en + split newsdev2020-iuen.iu -a 0 -n r/1/2 > $DEST/valid.iu_CA-en_XX.iu_CA + split newsdev2020-iuen.en -a 0 -n r/1/2 > $DEST/valid.iu_CA-en_XX.en_XX + split newsdev2020-iuen.iu -a 0 -n r/2/2 > $DEST/test.iu_CA-en_XX.iu_CA + split newsdev2020-iuen.en -a 0 -n r/2/2 > $DEST/test.iu_CA-en_XX.en_XX + + cat newsdev2020-taen-src.ta.sgm | strip_sgm.sh > newsdev2020-taen.ta + cat newsdev2020-taen-ref.en.sgm | strip_sgm.sh > newsdev2020-taen.en + split newsdev2020-taen.ta -a 0 -n r/1/2 > $DEST/valid.ta_IN-en_XX.ta_IN + split newsdev2020-taen.en -a 0 -n r/1/2 > $DEST/valid.ta_IN-en_XX.en_XX + split newsdev2020-taen.ta -a 0 -n r/2/2 > $DEST/test.ta_IN-en_XX.ta_IN + split newsdev2020-taen.en -a 0 -n r/2/2 > $DEST/test.ta_IN-en_XX.en_XX + + cp wikipedia.dev.km-en.km $DEST/valid.km_KH-en_XX.km_KH + cp wikipedia.dev.km-en.en $DEST/valid.km_KH-en_XX.en_XX + cp wikipedia.devtest.km-en.km $DEST/test.km_KH-en_XX.km_KH + cp wikipedia.devtest.km-en.en $DEST/test.km_KH-en_XX.en_XX + + cp wikipedia.dev.ps-en.ps $DEST/valid.ps_AF-en_XX.ps_AF + cp wikipedia.dev.ps-en.en $DEST/valid.ps_AF-en_XX.en_XX + cp wikipedia.devtest.ps-en.ps $DEST/test.ps_AF-en_XX.ps_AF + cp wikipedia.devtest.ps-en.en $DEST/test.ps_AF-en_XX.en_XX + + cat newsdev2020-plen-src.pl.sgm | strip_sgm.sh > newsdev2020-plen.pl + cat newsdev2020-plen-ref.en.sgm | strip_sgm.sh > newsdev2020-plen.en + split newsdev2020-plen.pl -a 0 -n r/1/2 > $DEST/valid.pl_PL-en_XX.pl_PL + split newsdev2020-plen.en -a 0 -n r/1/2 > $DEST/valid.pl_PL-en_XX.en_XX + split newsdev2020-plen.pl -a 0 -n r/2/2 > $DEST/test.pl_PL-en_XX.pl_PL + split newsdev2020-plen.en -a 0 -n r/2/2 > $DEST/test.pl_PL-en_XX.en_XX + + cat newstest2018-encs-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-cs_CZ.en_XX + cat newstest2018-encs-ref.cs.sgm | strip_sgm.sh > $DEST/valid.en_XX-cs_CZ.cs_CZ + cat newstest2019-encs-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-cs_CZ.en_XX + cat newstest2019-encs-ref.cs.sgm | strip_sgm.sh > $DEST/test.en_XX-cs_CZ.cs_CZ + + cat newstest2018-deen-src.de.sgm | strip_sgm.sh > $DEST/valid.de_DE-en_XX.de_DE + cat newstest2018-deen-ref.en.sgm | strip_sgm.sh > $DEST/valid.de_DE-en_XX.en_XX + cat newstest2018-ende-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-de_DE.en_XX + cat newstest2018-ende-ref.de.sgm | strip_sgm.sh > $DEST/valid.en_XX-de_DE.de_DE + cat newstest2019-deen-src.de.sgm | strip_sgm.sh > $DEST/test.de_DE-en_XX.de_DE + cat newstest2019-deen-ref.en.sgm | strip_sgm.sh > $DEST/test.de_DE-en_XX.en_XX + cat newstest2019-ende-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-de_DE.en_XX + cat newstest2019-ende-ref.de.sgm | strip_sgm.sh > $DEST/test.en_XX-de_DE.de_DE + + cat newstest2018-ruen-src.ru.sgm | strip_sgm.sh > $DEST/valid.ru_RU-en_XX.ru_RU + cat newstest2018-ruen-ref.en.sgm | strip_sgm.sh > $DEST/valid.ru_RU-en_XX.en_XX + cat newstest2018-enru-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-ru_RU.en_XX + cat newstest2018-enru-ref.ru.sgm | strip_sgm.sh > $DEST/valid.en_XX-ru_RU.ru_RU + cat newstest2019-ruen-src.ru.sgm | strip_sgm.sh > $DEST/test.ru_RU-en_XX.ru_RU + cat newstest2019-ruen-ref.en.sgm | strip_sgm.sh > $DEST/test.ru_RU-en_XX.en_XX + cat newstest2019-enru-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-ru_RU.en_XX + cat newstest2019-enru-ref.ru.sgm | strip_sgm.sh > $DEST/test.en_XX-ru_RU.ru_RU + + cat newstest2018-zhen-src.zh.sgm | strip_sgm.sh > $DEST/valid.zh_CN-en_XX.zh_CN + cat newstest2018-zhen-ref.en.sgm | strip_sgm.sh > $DEST/valid.zh_CN-en_XX.en_XX + cat newstest2018-enzh-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-zh_CN.en_XX + cat newstest2018-enzh-ref.zh.sgm | strip_sgm.sh > $DEST/valid.en_XX-zh_CN.zh_CN + cat newstest2019-zhen-src.zh.sgm | strip_sgm.sh > $DEST/test.zh_CN-en_XX.zh_CN + cat newstest2019-zhen-ref.en.sgm | strip_sgm.sh > $DEST/test.zh_CN-en_XX.en_XX + cat newstest2019-enzh-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-zh_CN.en_XX + cat newstest2019-enzh-ref.zh.sgm | strip_sgm.sh > $DEST/test.en_XX-zh_CN.zh_CN +} + +mkdir -p $DEST + +prepare_lid +prepare_moses +download_commoncrawl + +prepare_ja & +prepare_ta & +prepare_km & +prepare_ps & +prepare_iu & +prepare_cs & +prepare_de & +prepare_pl & +prepare_ru & +prepare_zh & + +# prepare valid/test set +prepare_tests & + +# wait + +# TODO remove intermediate files +# rm -rf $TMP_DIR diff --git a/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh b/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh new file mode 100644 index 0000000..4655936 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +if [ -z $WORKDIR_ROOT ] ; +then + echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." + exit +fi + +if [ -z $SPM_PATH ] ; +then + echo "Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting..." + exit +fi + +ML50=${WORKDIR_ROOT}/ML50 + +mkdir -p $ML50/dedup +mkdir -p $ML50/cleaned_dedup + +python ./dedup_all.py --from-folder $ML50/raw --to-folder $ML50/dedup +python ./remove_valid_test_in_train.py --from-folder $ML50/dedup --to-folder $ML50/clean +python ./binarize.py --raw-folder $ML50/clean \ No newline at end of file diff --git a/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py b/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py new file mode 100644 index 0000000..ef618ad --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py @@ -0,0 +1,290 @@ +import os, sys +import glob, itertools +import pandas as pd + +WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None) + +if WORKDIR_ROOT is None or not WORKDIR_ROOT.strip(): + print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."') + sys.exit(-1) + + +def load_langs(path): + with open(path) as fr: + langs = [l.strip() for l in fr] + return langs + + + +def load_sentences(raw_data, split, direction): + src, tgt = direction.split('-') + src_path = f"{raw_data}/{split}.{direction}.{src}" + tgt_path = f"{raw_data}/{split}.{direction}.{tgt}" + if os.path.exists(src_path) and os.path.exists(tgt_path): + return [(src, open(src_path).read().splitlines()), (tgt, open(tgt_path).read().splitlines())] + else: + return [] + +def swap_direction(d): + src, tgt = d.split('-') + return f'{tgt}-{src}' + +def get_all_test_data(raw_data, directions, split='test'): + test_data = [ + x + for dd in directions + for d in [dd, swap_direction(dd)] + for x in load_sentences(raw_data, split, d) + ] + # all_test_data = {s for _, d in test_data for s in d} + all_test_data = {} + for lang, d in test_data: + for s in d: + s = s.strip() + lgs = all_test_data.get(s, set()) + lgs.add(lang) + all_test_data[s] = lgs + return all_test_data, test_data + +def check_train_sentences(raw_data, direction, all_test_data, mess_up_train={}): + src, tgt = direction.split('-') + tgt_path = f"{raw_data}/train.{direction}.{tgt}" + src_path = f"{raw_data}/train.{direction}.{src}" + print(f'check training data in {raw_data}/train.{direction}') + size = 0 + if not os.path.exists(tgt_path) or not os.path.exists(src_path): + return mess_up_train, size + with open(src_path) as f, open(tgt_path) as g: + for src_line, tgt_line in zip(f, g): + s = src_line.strip() + t = tgt_line.strip() + size += 1 + if s in all_test_data: + langs = mess_up_train.get(s, set()) + langs.add(direction) + mess_up_train[s] = langs + if t in all_test_data: + langs = mess_up_train.get(t, set()) + langs.add(direction) + mess_up_train[t] = langs + return mess_up_train, size + +def check_train_all(raw_data, directions, all_test_data): + mess_up_train = {} + data_sizes = {} + for direction in directions: + _, size = check_train_sentences(raw_data, direction, all_test_data, mess_up_train) + data_sizes[direction] = size + return mess_up_train, data_sizes + +def count_train_in_other_set(mess_up_train): + train_in_others = [(direction, s) for s, directions in mess_up_train.items() for direction in directions] + counts = {} + for direction, s in train_in_others: + counts[direction] = counts.get(direction, 0) + 1 + return counts + +def train_size_if_remove_in_otherset(data_sizes, mess_up_train): + counts_in_other = count_train_in_other_set(mess_up_train) + remain_sizes = [] + for direction, count in counts_in_other.items(): + remain_sizes.append((direction, data_sizes[direction] - count, data_sizes[direction], count, 100 * count / data_sizes[direction] )) + return remain_sizes + + +def remove_messed_up_sentences(raw_data, direction, mess_up_train, mess_up_train_pairs, corrected_langs): + split = 'train' + src_lang, tgt_lang = direction.split('-') + + tgt = f"{raw_data}/{split}.{direction}.{tgt_lang}" + src = f"{raw_data}/{split}.{direction}.{src_lang}" + print(f'working on {direction}: ', src, tgt) + if not os.path.exists(tgt) or not os.path.exists(src) : + return + + corrected_tgt = f"{to_folder}/{split}.{direction}.{tgt_lang}" + corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}" + line_num = 0 + keep_num = 0 + with open(src, encoding='utf8',) as fsrc, \ + open(tgt, encoding='utf8',) as ftgt, \ + open(corrected_src, 'w', encoding='utf8') as fsrc_corrected, \ + open(corrected_tgt, 'w', encoding='utf8') as ftgt_corrected: + for s, t in zip(fsrc, ftgt): + s = s.strip() + t = t.strip() + if t not in mess_up_train \ + and s not in mess_up_train \ + and (s, t) not in mess_up_train_pairs \ + and (t, s) not in mess_up_train_pairs: + corrected_langs.add(direction) + print(s, file=fsrc_corrected) + print(t, file=ftgt_corrected) + keep_num += 1 + line_num += 1 + if line_num % 1000 == 0: + print(f'completed {line_num} lines', end='\r') + return line_num, keep_num + +########## + + +def merge_valid_test_messup(mess_up_train_valid, mess_up_train_test): + merged_mess = [] + for s in set(list(mess_up_train_valid.keys()) + list(mess_up_train_test.keys())): + if not s: + continue + valid = mess_up_train_valid.get(s, set()) + test = mess_up_train_test.get(s, set()) + merged_mess.append((s, valid | test)) + return dict(merged_mess) + + + +######### +def check_train_pairs(raw_data, direction, all_test_data, mess_up_train={}): + src, tgt = direction.split('-') + #a hack; TODO: check the reversed directions + path1 = f"{raw_data}/train.{src}-{tgt}.{src}" + path2 = f"{raw_data}/train.{src}-{tgt}.{tgt}" + if not os.path.exists(path1) or not os.path.exists(path2) : + return + + with open(path1) as f1, open(path2) as f2: + for src_line, tgt_line in zip(f1, f2): + s = src_line.strip() + t = tgt_line.strip() + if (s, t) in all_test_data or (t, s) in all_test_data: + langs = mess_up_train.get( (s, t), set()) + langs.add(src) + langs.add(tgt) + mess_up_train[(s, t)] = langs + + +def load_pairs(raw_data, split, direction): + src, tgt = direction.split('-') + src_f = f"{raw_data}/{split}.{direction}.{src}" + tgt_f = f"{raw_data}/{split}.{direction}.{tgt}" + if tgt != 'en_XX': + src_f, tgt_f = tgt_f, src_f + if os.path.exists(src_f) and os.path.exists(tgt_f): + return list(zip(open(src_f).read().splitlines(), + open(tgt_f).read().splitlines(), + )) + else: + return [] + +# skip_langs = ['cs_CZ', 'en_XX', 'tl_XX', 'tr_TR'] +def get_messed_up_test_pairs(split, directions): + test_pairs = [ + (d, load_pairs(raw_data, split, d)) + for d in directions + ] + # all_test_data = {s for _, d in test_data for s in d} + all_test_pairs = {} + for direction, d in test_pairs: + src, tgt = direction.split('-') + for s in d: + langs = all_test_pairs.get(s, set()) + langs.add(src) + langs.add(tgt) + all_test_pairs[s] = langs + mess_up_train_pairs = {} + for direction in directions: + check_train_pairs(raw_data, direction, all_test_pairs, mess_up_train_pairs) + return all_test_pairs, mess_up_train_pairs + + + +if __name__ == "__main__": + ####### + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + '--from-folder', + required=True, + type=str) + parser.add_argument( + '--to-folder', + required=True, + type=str) + parser.add_argument( + '--directions', + default=None, + type=str) + + + args = parser.parse_args() + raw_data = args.from_folder + to_folder = args.to_folder + os.makedirs(to_folder, exist_ok=True) + + if args.directions: + directions = args.directions.split(',') + else: + raw_files = itertools.chain( + glob.glob(f'{raw_data}/train*'), + glob.glob(f'{raw_data}/valid*'), + glob.glob(f'{raw_data}/test*'), + ) + directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files] + print('working on directions: ', directions) + + ########## + + + + all_test_data, test_data = get_all_test_data(raw_data, directions, 'test') + print('==loaded test data==') + all_valid_data, valid_data = get_all_test_data(raw_data, directions, 'valid') + print('==loaded valid data==') + all_valid_test_data = merge_valid_test_messup(all_test_data, all_valid_data) + mess_up_train, data_sizes = check_train_all(raw_data, directions, all_valid_test_data) + print('training messing up with valid, test data:', len(mess_up_train)) + data_situation = train_size_if_remove_in_otherset(data_sizes, mess_up_train) + df = pd.DataFrame(data_situation, columns=['direction', 'train_size_after_remove', 'orig_size', 'num_to_remove', 'remove_percent']) + df.sort_values('remove_percent', ascending=False) + df.to_csv(f'{raw_data}/clean_summary.tsv', sep='\t') + print(f'projected data clean summary in: {raw_data}/clean_summary.tsv') + + # correct the dataset: + all_test_pairs, mess_up_test_train_pairs = get_messed_up_test_pairs('test', directions) + all_valid_pairs, mess_up_valid_train_pairs = get_messed_up_test_pairs('valid', directions) + + all_messed_pairs = set(mess_up_test_train_pairs.keys()).union(set(mess_up_valid_train_pairs.keys())) + corrected_directions = set() + + real_data_situation = [] + for direction in directions: + org_size, new_size = remove_messed_up_sentences(raw_data, direction, mess_up_train, all_messed_pairs, corrected_directions) + if org_size == 0: + print(f"{direction} has size 0") + continue + real_data_situation.append( + (direction, new_size, org_size, org_size - new_size, (org_size - new_size) / org_size * 100) + ) + print('corrected directions: ', corrected_directions) + df = pd.DataFrame(real_data_situation, columns=['direction', 'train_size_after_remove', 'orig_size', 'num_to_remove', 'remove_percent']) + df.sort_values('remove_percent', ascending=False) + df.to_csv(f'{raw_data}/actual_clean_summary.tsv', sep='\t') + print(f'actual data clean summary (which can be different from the projected one because of duplications) in: {raw_data}/actual_clean_summary.tsv') + + import shutil + for direction in directions: + src_lang, tgt_lang = direction.split('-') + for split in ['train', 'valid', 'test']: + # copying valid, test and uncorrected train + if direction in corrected_directions and split == 'train': + continue + tgt = f"{raw_data}/{split}.{direction}.{tgt_lang}" + src = f"{raw_data}/{split}.{direction}.{src_lang}" + if not (os.path.exists(src) and os.path.exists(tgt)): + continue + corrected_tgt = f"{to_folder}/{split}.{direction}.{tgt_lang}" + corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}" + print(f'copying {src} to {corrected_src}') + shutil.copyfile(src, corrected_src) + print(f'copying {tgt} to {corrected_tgt}') + shutil.copyfile(tgt, corrected_tgt) + + print('completed') \ No newline at end of file diff --git a/fairseq/examples/multilingual/data_scripts/requirement.txt b/fairseq/examples/multilingual/data_scripts/requirement.txt new file mode 100644 index 0000000..e85d7d5 --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/requirement.txt @@ -0,0 +1,2 @@ +wget +pandas \ No newline at end of file diff --git a/fairseq/examples/multilingual/data_scripts/utils/dedup.py b/fairseq/examples/multilingual/data_scripts/utils/dedup.py new file mode 100644 index 0000000..d6fed8c --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/utils/dedup.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import argparse + +def deup(src_file, tgt_file, src_file_out, tgt_file_out): + seen = set() + dup_count = 0 + with open(src_file, encoding='utf-8') as fsrc, \ + open(tgt_file, encoding='utf-8') as ftgt, \ + open(src_file_out, 'w', encoding='utf-8') as fsrc_out, \ + open(tgt_file_out, 'w', encoding='utf-8') as ftgt_out: + for s, t in zip(fsrc, ftgt): + if (s, t) not in seen: + fsrc_out.write(s) + ftgt_out.write(t) + seen.add((s, t)) + else: + dup_count += 1 + print(f'number of duplication: {dup_count}') + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--src-file", type=str, required=True, + help="src file") + parser.add_argument("--tgt-file", type=str, required=True, + help="tgt file") + parser.add_argument("--src-file-out", type=str, required=True, + help="src ouptut file") + parser.add_argument("--tgt-file-out", type=str, required=True, + help="tgt ouput file") + args = parser.parse_args() + deup(args.src_file, args.tgt_file, args.src_file_out, args.tgt_file_out) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py b/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py new file mode 100644 index 0000000..41b38ba --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +#!/bin/python + +import fasttext +from multiprocessing import Pool +import contextlib +import sys +import argparse +from functools import partial +import io + +model = None +def init(model_path): + global model + model = fasttext.load_model(model_path) + +def pred(lines): + return lines, [model.predict(line.strip())[0][0][9:] for line in lines] + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, required=True, + help="model to load") + parser.add_argument("--inputs", nargs="+", default=['-'], + help="input files to filter") + parser.add_argument("--langs", nargs="+", required=True, + help="lang ids of each input file") + parser.add_argument("--outputs", nargs="+", default=['-'], + help="path to save lid filtered outputs") + parser.add_argument("--num-workers", type=int, metavar="N", default=10, + help="number of processes in parallel") + args = parser.parse_args() + + assert len(args.inputs) == len(args.langs) and len(args.inputs) == len(args.outputs) + + with contextlib.ExitStack() as stack: + inputs = [ + stack.enter_context(open(input, "r", encoding="utf-8", newline="\n", errors="replace")) + if input != "-" else io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors="replace") + for input in args.inputs + ] + outputs = [ + stack.enter_context(open(output, "w", encoding="utf-8", newline="\n")) + if output != "-" else sys.stdout + for output in args.outputs + ] + with Pool(args.num_workers, initializer=partial(init, args.model)) as p: + skip_cnt = 0 + for lines, preds in p.imap(pred, list(zip(*inputs)), chunksize=500): + if not all(a == b for a, b in zip(preds, args.langs)): + skip_cnt += 1 + continue + for line, output_h in zip(lines, outputs): + print(line.strip(), file=output_h) + print(f"Skipped {skip_cnt} lines.") + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh b/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh new file mode 100644 index 0000000..7f4f61d --- /dev/null +++ b/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh @@ -0,0 +1 @@ +grep "seg id" | sed 's/<seg id="[0-9]\+">//g' | sed 's/<\/seg>//g' diff --git a/fairseq/examples/multilingual/finetune_multilingual_model.sh b/fairseq/examples/multilingual/finetune_multilingual_model.sh new file mode 100644 index 0000000..25960c5 --- /dev/null +++ b/fairseq/examples/multilingual/finetune_multilingual_model.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +path_2_data=$1 # <path to data> which contains binarized data for each directions +lang_list=$2 # <path to a file which contains a list of languages separted by new lines> +lang_pairs=$3 #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en" +# pretrained can be an mBART pretrained model as well +pretrained_model=$4 #<path to a pretrained model> + + +fairseq-train "$path_2_data" \ + --encoder-normalize-before --decoder-normalize-before \ + --arch transformer --layernorm-embedding \ + --task translation_multi_simple_epoch \ + --finetune-from-model "$pretrained_model" \ + --sampling-method "temperature" \ + --sampling-temperature "1.5" \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 1024 --update-freq 2 \ + --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \ + --seed 222 --log-format simple --log-interval 2 diff --git a/fairseq/examples/multilingual/multilingual_fairseq_gen.sh b/fairseq/examples/multilingual/multilingual_fairseq_gen.sh new file mode 100644 index 0000000..65aa322 --- /dev/null +++ b/fairseq/examples/multilingual/multilingual_fairseq_gen.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +lang_pairs="en-fr,en-cs,fr-en,cs-en" +path_2_data=$1 # <path to data> +lang_list=$2 # <path to a file which contains list of languages separted by new lines> +model=$3 # <path to a trained model> +source_lang=cs +target_lang=en + +fairseq-generate "$path_2_data" \ + --path "$model" \ + --task translation_multi_simple_epoch \ + --gen-subset test \ + --source-lang "$source_lang" \ + --target-lang "$target_lang" \ + --sacrebleu --remove-bpe 'sentencepiece'\ + --batch-size 32 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" diff --git a/fairseq/examples/multilingual/train_multilingual_model.sh b/fairseq/examples/multilingual/train_multilingual_model.sh new file mode 100644 index 0000000..cc050bd --- /dev/null +++ b/fairseq/examples/multilingual/train_multilingual_model.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +path_2_data=$1 # <path to data> which contains binarized data for each directions +lang_list=$2 # <path to a file which contains a list of languages separted by new lines> +lang_pairs=$3 #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en" + +fairseq-train "$path_2_data" \ + --encoder-normalize-before --decoder-normalize-before \ + --arch transformer --layernorm-embedding \ + --task translation_multi_simple_epoch \ + --sampling-method "temperature" \ + --sampling-temperature 1.5 \ + --encoder-langtok "src" \ + --decoder-langtok \ + --lang-dict "$lang_list" \ + --lang-pairs "$lang_pairs" \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \ + --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \ + --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \ + --max-tokens 1024 --update-freq 2 \ + --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \ + --seed 222 --log-format simple --log-interval 2 diff --git a/fairseq/examples/noisychannel/README.md b/fairseq/examples/noisychannel/README.md new file mode 100644 index 0000000..9d101aa --- /dev/null +++ b/fairseq/examples/noisychannel/README.md @@ -0,0 +1,72 @@ +# Simple and Effective Noisy Channel Modeling for Neural Machine Translation (Yee et al., 2019) +This page contains pointers to pre-trained models as well as instructions on how to run the reranking scripts. + +## Citation: +```bibtex +@inproceedings{yee2019simple, + title = {Simple and Effective Noisy Channel Modeling for Neural Machine Translation}, + author = {Kyra Yee and Yann Dauphin and Michael Auli}, + booktitle = {Conference on Empirical Methods in Natural Language Processing}, + year = {2019}, +} +``` + +## Pre-trained Models: + +Model | Description | Download +---|---|--- +`transformer.noisychannel.de-en` | De->En Forward Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/forward_de2en.tar.bz2) +`transformer.noisychannel.en-de` | En->De Channel Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/backward_en2de.tar.bz2) +`transformer_lm.noisychannel.en` | En Language model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/reranking_en_lm.tar.bz2) + +Test Data: [newstest_wmt17](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/wmt17test.tar.bz2) + +## Example usage + +``` +mkdir rerank_example +curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/forward_de2en.tar.bz2 | tar xvjf - -C rerank_example +curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/backward_en2de.tar.bz2 | tar xvjf - -C rerank_example +curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/reranking_en_lm.tar.bz2 | tar xvjf - -C rerank_example +curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/wmt17test.tar.bz2 | tar xvjf - -C rerank_example + +beam=50 +num_trials=1000 +fw_name=fw_model_ex +bw_name=bw_model_ex +lm_name=lm_ex +data_dir=rerank_example/hyphen-splitting-mixed-case-wmt17test-wmt14bpe +data_dir_name=wmt17 +lm=rerank_example/lm/checkpoint_best.pt +lm_bpe_code=rerank_example/lm/bpe32k.code +lm_dict=rerank_example/lm/dict.txt +batch_size=32 +bw=rerank_example/backward_en2de.pt +fw=rerank_example/forward_de2en.pt + +# reranking with P(T|S) P(S|T) and P(T) +python examples/noisychannel/rerank_tune.py $data_dir --tune-param lenpen weight1 weight3 \ + --lower-bound 0 0 0 --upper-bound 3 3 3 --data-dir-name $data_dir_name \ + --num-trials $num_trials --source-lang de --target-lang en --gen-model $fw \ + -n $beam --batch-size $batch_size --score-model2 $fw --score-model1 $bw \ + --backwards1 --weight2 1 \ + -lm $lm --lm-dict $lm_dict --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \ + --model2-name $fw_name --model1-name $bw_name --gen-model-name $fw_name + +# reranking with P(T|S) and P(T) +python examples/noisychannel/rerank_tune.py $data_dir --tune-param lenpen weight3 \ + --lower-bound 0 0 --upper-bound 3 3 --data-dir-name $data_dir_name \ + --num-trials $num_trials --source-lang de --target-lang en --gen-model $fw \ + -n $beam --batch-size $batch_size --score-model1 $fw \ + -lm $lm --lm-dict $lm_dict --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \ + --model1-name $fw_name --gen-model-name $fw_name + +# to run with a preconfigured set of hyperparameters for the lenpen and model weights, using rerank.py instead. +python examples/noisychannel/rerank.py $data_dir \ + --lenpen 0.269 --weight1 1 --weight2 0.929 --weight3 0.831 \ + --data-dir-name $data_dir_name --source-lang de --target-lang en --gen-model $fw \ + -n $beam --batch-size $batch_size --score-model2 $fw --score-model1 $bw --backwards1 \ + -lm $lm --lm-dict $lm_dict --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \ + --model2-name $fw_name --model1-name $bw_name --gen-model-name $fw_name +``` + diff --git a/fairseq/examples/noisychannel/__init__.py b/fairseq/examples/noisychannel/__init__.py new file mode 100644 index 0000000..89f1aef --- /dev/null +++ b/fairseq/examples/noisychannel/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .rerank_options import * # noqa diff --git a/fairseq/examples/noisychannel/rerank.py b/fairseq/examples/noisychannel/rerank.py new file mode 100644 index 0000000..bb80d11 --- /dev/null +++ b/fairseq/examples/noisychannel/rerank.py @@ -0,0 +1,428 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from multiprocessing import Pool + +import numpy as np +from fairseq import options +from fairseq.data import dictionary +from fairseq.scoring import bleu + +from examples.noisychannel import ( + rerank_generate, + rerank_options, + rerank_score_bw, + rerank_score_lm, + rerank_utils, +) + + +def score_target_hypo( + args, a, b, c, lenpen, target_outfile, hypo_outfile, write_hypos, normalize +): + + print("lenpen", lenpen, "weight1", a, "weight2", b, "weight3", c) + gen_output_lst, bitext1_lst, bitext2_lst, lm_res_lst = load_score_files(args) + dict = dictionary.Dictionary() + scorer = scorer = bleu.Scorer( + bleu.BleuConfig( + pad=dict.pad(), + eos=dict.eos(), + unk=dict.unk(), + ) + ) + + ordered_hypos = {} + ordered_targets = {} + + for shard_id in range(len(bitext1_lst)): + bitext1 = bitext1_lst[shard_id] + bitext2 = bitext2_lst[shard_id] + gen_output = gen_output_lst[shard_id] + lm_res = lm_res_lst[shard_id] + + total = len(bitext1.rescore_source.keys()) + source_lst = [] + hypo_lst = [] + score_lst = [] + reference_lst = [] + j = 1 + best_score = -math.inf + + for i in range(total): + # length is measured in terms of words, not bpe tokens, since models may not share the same bpe + target_len = len(bitext1.rescore_hypo[i].split()) + + if lm_res is not None: + lm_score = lm_res.score[i] + else: + lm_score = 0 + + if bitext2 is not None: + bitext2_score = bitext2.rescore_score[i] + bitext2_backwards = bitext2.backwards + else: + bitext2_score = None + bitext2_backwards = None + + score = rerank_utils.get_score( + a, + b, + c, + target_len, + bitext1.rescore_score[i], + bitext2_score, + lm_score=lm_score, + lenpen=lenpen, + src_len=bitext1.source_lengths[i], + tgt_len=bitext1.target_lengths[i], + bitext1_backwards=bitext1.backwards, + bitext2_backwards=bitext2_backwards, + normalize=normalize, + ) + + if score > best_score: + best_score = score + best_hypo = bitext1.rescore_hypo[i] + + if j == gen_output.num_hypos[i] or j == args.num_rescore: + j = 1 + hypo_lst.append(best_hypo) + score_lst.append(best_score) + source_lst.append(bitext1.rescore_source[i]) + reference_lst.append(bitext1.rescore_target[i]) + + best_score = -math.inf + best_hypo = "" + else: + j += 1 + + gen_keys = list(sorted(gen_output.no_bpe_target.keys())) + + for key in range(len(gen_keys)): + if args.prefix_len is None: + assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], ( + "pred and rescore hypo mismatch: i: " + + str(key) + + ", " + + str(hypo_lst[key]) + + str(gen_keys[key]) + + str(gen_output.no_bpe_hypo[key]) + ) + sys_tok = dict.encode_line(hypo_lst[key]) + ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]]) + scorer.add(ref_tok, sys_tok) + + else: + full_hypo = rerank_utils.get_full_from_prefix( + hypo_lst[key], gen_output.no_bpe_hypo[gen_keys[key]] + ) + sys_tok = dict.encode_line(full_hypo) + ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]]) + scorer.add(ref_tok, sys_tok) + + # if only one set of hyper parameters is provided, write the predictions to a file + if write_hypos: + # recover the orinal ids from n best list generation + for key in range(len(gen_output.no_bpe_target)): + if args.prefix_len is None: + assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], ( + "pred and rescore hypo mismatch:" + + "i:" + + str(key) + + str(hypo_lst[key]) + + str(gen_output.no_bpe_hypo[key]) + ) + ordered_hypos[gen_keys[key]] = hypo_lst[key] + ordered_targets[gen_keys[key]] = gen_output.no_bpe_target[ + gen_keys[key] + ] + + else: + full_hypo = rerank_utils.get_full_from_prefix( + hypo_lst[key], gen_output.no_bpe_hypo[gen_keys[key]] + ) + ordered_hypos[gen_keys[key]] = full_hypo + ordered_targets[gen_keys[key]] = gen_output.no_bpe_target[ + gen_keys[key] + ] + + # write the hypos in the original order from nbest list generation + if args.num_shards == (len(bitext1_lst)): + with open(target_outfile, "w") as t: + with open(hypo_outfile, "w") as h: + for key in range(len(ordered_hypos)): + t.write(ordered_targets[key]) + h.write(ordered_hypos[key]) + + res = scorer.result_string(4) + if write_hypos: + print(res) + score = rerank_utils.parse_bleu_scoring(res) + return score + + +def match_target_hypo(args, target_outfile, hypo_outfile): + """combine scores from the LM and bitext models, and write the top scoring hypothesis to a file""" + if len(args.weight1) == 1: + res = score_target_hypo( + args, + args.weight1[0], + args.weight2[0], + args.weight3[0], + args.lenpen[0], + target_outfile, + hypo_outfile, + True, + args.normalize, + ) + rerank_scores = [res] + else: + print("launching pool") + with Pool(32) as p: + rerank_scores = p.starmap( + score_target_hypo, + [ + ( + args, + args.weight1[i], + args.weight2[i], + args.weight3[i], + args.lenpen[i], + target_outfile, + hypo_outfile, + False, + args.normalize, + ) + for i in range(len(args.weight1)) + ], + ) + + if len(rerank_scores) > 1: + best_index = np.argmax(rerank_scores) + best_score = rerank_scores[best_index] + print("best score", best_score) + print("best lenpen", args.lenpen[best_index]) + print("best weight1", args.weight1[best_index]) + print("best weight2", args.weight2[best_index]) + print("best weight3", args.weight3[best_index]) + return ( + args.lenpen[best_index], + args.weight1[best_index], + args.weight2[best_index], + args.weight3[best_index], + best_score, + ) + + else: + return ( + args.lenpen[0], + args.weight1[0], + args.weight2[0], + args.weight3[0], + rerank_scores[0], + ) + + +def load_score_files(args): + if args.all_shards: + shard_ids = list(range(args.num_shards)) + else: + shard_ids = [args.shard_id] + + gen_output_lst = [] + bitext1_lst = [] + bitext2_lst = [] + lm_res1_lst = [] + + for shard_id in shard_ids: + using_nbest = args.nbest_list is not None + ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) = rerank_utils.get_directories( + args.data_dir_name, + args.num_rescore, + args.gen_subset, + args.gen_model_name, + shard_id, + args.num_shards, + args.sampling, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + + rerank1_is_gen = ( + args.gen_model == args.score_model1 and args.source_prefix_frac is None + ) + rerank2_is_gen = ( + args.gen_model == args.score_model2 and args.source_prefix_frac is None + ) + + score1_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model1_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards1, + ) + if args.score_model2 is not None: + score2_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model2_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards2, + ) + if args.language_model is not None: + lm_score_file = rerank_utils.rescore_file_name( + pre_gen, args.prefix_len, args.lm_name, lm_file=True + ) + + # get gen output + predictions_bpe_file = pre_gen + "/generate_output_bpe.txt" + if using_nbest: + print("Using predefined n-best list from interactive.py") + predictions_bpe_file = args.nbest_list + gen_output = rerank_utils.BitextOutputFromGen( + predictions_bpe_file, + bpe_symbol=args.post_process, + nbest=using_nbest, + prefix_len=args.prefix_len, + target_prefix_frac=args.target_prefix_frac, + ) + + if rerank1_is_gen: + bitext1 = gen_output + else: + bitext1 = rerank_utils.BitextOutput( + score1_file, + args.backwards1, + args.right_to_left1, + args.post_process, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + + if args.score_model2 is not None or args.nbest_list is not None: + if rerank2_is_gen: + bitext2 = gen_output + else: + bitext2 = rerank_utils.BitextOutput( + score2_file, + args.backwards2, + args.right_to_left2, + args.post_process, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + + assert ( + bitext2.source_lengths == bitext1.source_lengths + ), "source lengths for rescoring models do not match" + assert ( + bitext2.target_lengths == bitext1.target_lengths + ), "target lengths for rescoring models do not match" + else: + if args.diff_bpe: + assert args.score_model2 is None + bitext2 = gen_output + else: + bitext2 = None + + if args.language_model is not None: + lm_res1 = rerank_utils.LMOutput( + lm_score_file, + args.lm_dict, + args.prefix_len, + args.post_process, + args.target_prefix_frac, + ) + else: + lm_res1 = None + + gen_output_lst.append(gen_output) + bitext1_lst.append(bitext1) + bitext2_lst.append(bitext2) + lm_res1_lst.append(lm_res1) + return gen_output_lst, bitext1_lst, bitext2_lst, lm_res1_lst + + +def rerank(args): + if type(args.lenpen) is not list: + args.lenpen = [args.lenpen] + if type(args.weight1) is not list: + args.weight1 = [args.weight1] + if type(args.weight2) is not list: + args.weight2 = [args.weight2] + if type(args.weight3) is not list: + args.weight3 = [args.weight3] + if args.all_shards: + shard_ids = list(range(args.num_shards)) + else: + shard_ids = [args.shard_id] + + for shard_id in shard_ids: + ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) = rerank_utils.get_directories( + args.data_dir_name, + args.num_rescore, + args.gen_subset, + args.gen_model_name, + shard_id, + args.num_shards, + args.sampling, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + rerank_generate.gen_and_reprocess_nbest(args) + rerank_score_bw.score_bw(args) + rerank_score_lm.score_lm(args) + + if args.write_hypos is None: + write_targets = pre_gen + "/matched_targets" + write_hypos = pre_gen + "/matched_hypos" + else: + write_targets = args.write_hypos + "_targets" + args.gen_subset + write_hypos = args.write_hypos + "_hypos" + args.gen_subset + + if args.all_shards: + write_targets += "_all_shards" + write_hypos += "_all_shards" + + ( + best_lenpen, + best_weight1, + best_weight2, + best_weight3, + best_score, + ) = match_target_hypo(args, write_targets, write_hypos) + + return best_lenpen, best_weight1, best_weight2, best_weight3, best_score + + +def cli_main(): + parser = rerank_options.get_reranking_parser() + args = options.parse_args_and_arch(parser) + rerank(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/noisychannel/rerank_generate.py b/fairseq/examples/noisychannel/rerank_generate.py new file mode 100644 index 0000000..daeeae0 --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_generate.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Generate n-best translations using a trained model. +""" + +import os +import subprocess +from contextlib import redirect_stdout + +from fairseq import options +from fairseq_cli import generate, preprocess + +from examples.noisychannel import rerank_options, rerank_utils + + +def gen_and_reprocess_nbest(args): + if args.score_dict_dir is None: + args.score_dict_dir = args.data + if args.prefix_len is not None: + assert ( + args.right_to_left1 is False + ), "prefix length not compatible with right to left models" + assert ( + args.right_to_left2 is False + ), "prefix length not compatible with right to left models" + + if args.nbest_list is not None: + assert args.score_model2 is None + + if args.backwards1: + scorer1_src = args.target_lang + scorer1_tgt = args.source_lang + else: + scorer1_src = args.source_lang + scorer1_tgt = args.target_lang + + store_data = ( + os.path.join(os.path.dirname(__file__)) + "/rerank_data/" + args.data_dir_name + ) + if not os.path.exists(store_data): + os.makedirs(store_data) + + ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) = rerank_utils.get_directories( + args.data_dir_name, + args.num_rescore, + args.gen_subset, + args.gen_model_name, + args.shard_id, + args.num_shards, + args.sampling, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + assert not ( + args.right_to_left1 and args.backwards1 + ), "backwards right to left not supported" + assert not ( + args.right_to_left2 and args.backwards2 + ), "backwards right to left not supported" + assert not ( + args.prefix_len is not None and args.target_prefix_frac is not None + ), "target prefix frac and target prefix len incompatible" + + # make directory to store generation results + if not os.path.exists(pre_gen): + os.makedirs(pre_gen) + + rerank1_is_gen = ( + args.gen_model == args.score_model1 and args.source_prefix_frac is None + ) + rerank2_is_gen = ( + args.gen_model == args.score_model2 and args.source_prefix_frac is None + ) + + if args.nbest_list is not None: + rerank2_is_gen = True + + # make directories to store preprossed nbest list for reranking + if not os.path.exists(left_to_right_preprocessed_dir): + os.makedirs(left_to_right_preprocessed_dir) + if not os.path.exists(right_to_left_preprocessed_dir): + os.makedirs(right_to_left_preprocessed_dir) + if not os.path.exists(lm_preprocessed_dir): + os.makedirs(lm_preprocessed_dir) + if not os.path.exists(backwards_preprocessed_dir): + os.makedirs(backwards_preprocessed_dir) + + score1_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model1_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards1, + ) + if args.score_model2 is not None: + score2_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model2_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards2, + ) + + predictions_bpe_file = pre_gen + "/generate_output_bpe.txt" + + using_nbest = args.nbest_list is not None + + if using_nbest: + print("Using predefined n-best list from interactive.py") + predictions_bpe_file = args.nbest_list + + else: + if not os.path.isfile(predictions_bpe_file): + print("STEP 1: generate predictions using the p(T|S) model with bpe") + print(args.data) + param1 = [ + args.data, + "--path", + args.gen_model, + "--shard-id", + str(args.shard_id), + "--num-shards", + str(args.num_shards), + "--nbest", + str(args.num_rescore), + "--batch-size", + str(args.batch_size), + "--beam", + str(args.num_rescore), + "--batch-size", + str(args.num_rescore), + "--gen-subset", + args.gen_subset, + "--source-lang", + args.source_lang, + "--target-lang", + args.target_lang, + ] + if args.sampling: + param1 += ["--sampling"] + + gen_parser = options.get_generation_parser() + input_args = options.parse_args_and_arch(gen_parser, param1) + + print(input_args) + with open(predictions_bpe_file, "w") as f: + with redirect_stdout(f): + generate.main(input_args) + + gen_output = rerank_utils.BitextOutputFromGen( + predictions_bpe_file, + bpe_symbol=args.post_process, + nbest=using_nbest, + prefix_len=args.prefix_len, + target_prefix_frac=args.target_prefix_frac, + ) + + if args.diff_bpe: + rerank_utils.write_reprocessed( + gen_output.no_bpe_source, + gen_output.no_bpe_hypo, + gen_output.no_bpe_target, + pre_gen + "/source_gen_bpe." + args.source_lang, + pre_gen + "/target_gen_bpe." + args.target_lang, + pre_gen + "/reference_gen_bpe." + args.target_lang, + ) + bitext_bpe = args.rescore_bpe_code + bpe_src_param = [ + "-c", + bitext_bpe, + "--input", + pre_gen + "/source_gen_bpe." + args.source_lang, + "--output", + pre_gen + "/rescore_data." + args.source_lang, + ] + bpe_tgt_param = [ + "-c", + bitext_bpe, + "--input", + pre_gen + "/target_gen_bpe." + args.target_lang, + "--output", + pre_gen + "/rescore_data." + args.target_lang, + ] + + subprocess.call( + [ + "python", + os.path.join( + os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py" + ), + ] + + bpe_src_param, + shell=False, + ) + + subprocess.call( + [ + "python", + os.path.join( + os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py" + ), + ] + + bpe_tgt_param, + shell=False, + ) + + if (not os.path.isfile(score1_file) and not rerank1_is_gen) or ( + args.score_model2 is not None + and not os.path.isfile(score2_file) + and not rerank2_is_gen + ): + print( + "STEP 2: process the output of generate.py so we have clean text files with the translations" + ) + + rescore_file = "/rescore_data" + if args.prefix_len is not None: + prefix_len_rescore_file = rescore_file + "prefix" + str(args.prefix_len) + if args.target_prefix_frac is not None: + target_prefix_frac_rescore_file = ( + rescore_file + "target_prefix_frac" + str(args.target_prefix_frac) + ) + if args.source_prefix_frac is not None: + source_prefix_frac_rescore_file = ( + rescore_file + "source_prefix_frac" + str(args.source_prefix_frac) + ) + + if not args.right_to_left1 or not args.right_to_left2: + if not args.diff_bpe: + rerank_utils.write_reprocessed( + gen_output.source, + gen_output.hypo, + gen_output.target, + pre_gen + rescore_file + "." + args.source_lang, + pre_gen + rescore_file + "." + args.target_lang, + pre_gen + "/reference_file", + bpe_symbol=args.post_process, + ) + if args.prefix_len is not None: + bw_rescore_file = prefix_len_rescore_file + rerank_utils.write_reprocessed( + gen_output.source, + gen_output.hypo, + gen_output.target, + pre_gen + prefix_len_rescore_file + "." + args.source_lang, + pre_gen + prefix_len_rescore_file + "." + args.target_lang, + pre_gen + "/reference_file", + prefix_len=args.prefix_len, + bpe_symbol=args.post_process, + ) + elif args.target_prefix_frac is not None: + bw_rescore_file = target_prefix_frac_rescore_file + rerank_utils.write_reprocessed( + gen_output.source, + gen_output.hypo, + gen_output.target, + pre_gen + + target_prefix_frac_rescore_file + + "." + + args.source_lang, + pre_gen + + target_prefix_frac_rescore_file + + "." + + args.target_lang, + pre_gen + "/reference_file", + bpe_symbol=args.post_process, + target_prefix_frac=args.target_prefix_frac, + ) + else: + bw_rescore_file = rescore_file + + if args.source_prefix_frac is not None: + fw_rescore_file = source_prefix_frac_rescore_file + rerank_utils.write_reprocessed( + gen_output.source, + gen_output.hypo, + gen_output.target, + pre_gen + + source_prefix_frac_rescore_file + + "." + + args.source_lang, + pre_gen + + source_prefix_frac_rescore_file + + "." + + args.target_lang, + pre_gen + "/reference_file", + bpe_symbol=args.post_process, + source_prefix_frac=args.source_prefix_frac, + ) + else: + fw_rescore_file = rescore_file + + if args.right_to_left1 or args.right_to_left2: + rerank_utils.write_reprocessed( + gen_output.source, + gen_output.hypo, + gen_output.target, + pre_gen + "/right_to_left_rescore_data." + args.source_lang, + pre_gen + "/right_to_left_rescore_data." + args.target_lang, + pre_gen + "/right_to_left_reference_file", + right_to_left=True, + bpe_symbol=args.post_process, + ) + + print("STEP 3: binarize the translations") + if ( + not args.right_to_left1 + or args.score_model2 is not None + and not args.right_to_left2 + or not rerank1_is_gen + ): + + if args.backwards1 or args.backwards2: + if args.backwards_score_dict_dir is not None: + bw_dict = args.backwards_score_dict_dir + else: + bw_dict = args.score_dict_dir + bw_preprocess_param = [ + "--source-lang", + scorer1_src, + "--target-lang", + scorer1_tgt, + "--trainpref", + pre_gen + bw_rescore_file, + "--srcdict", + bw_dict + "/dict." + scorer1_src + ".txt", + "--tgtdict", + bw_dict + "/dict." + scorer1_tgt + ".txt", + "--destdir", + backwards_preprocessed_dir, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(bw_preprocess_param) + preprocess.main(input_args) + + preprocess_param = [ + "--source-lang", + scorer1_src, + "--target-lang", + scorer1_tgt, + "--trainpref", + pre_gen + fw_rescore_file, + "--srcdict", + args.score_dict_dir + "/dict." + scorer1_src + ".txt", + "--tgtdict", + args.score_dict_dir + "/dict." + scorer1_tgt + ".txt", + "--destdir", + left_to_right_preprocessed_dir, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(preprocess_param) + preprocess.main(input_args) + + if args.right_to_left1 or args.right_to_left2: + preprocess_param = [ + "--source-lang", + scorer1_src, + "--target-lang", + scorer1_tgt, + "--trainpref", + pre_gen + "/right_to_left_rescore_data", + "--srcdict", + args.score_dict_dir + "/dict." + scorer1_src + ".txt", + "--tgtdict", + args.score_dict_dir + "/dict." + scorer1_tgt + ".txt", + "--destdir", + right_to_left_preprocessed_dir, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(preprocess_param) + preprocess.main(input_args) + + return gen_output + + +def cli_main(): + parser = rerank_options.get_reranking_parser() + args = options.parse_args_and_arch(parser) + gen_and_reprocess_nbest(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/noisychannel/rerank_options.py b/fairseq/examples/noisychannel/rerank_options.py new file mode 100644 index 0000000..de91939 --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_options.py @@ -0,0 +1,149 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import options + + +def get_reranking_parser(default_task="translation"): + parser = options.get_parser("Generation and reranking", default_task) + add_reranking_args(parser) + return parser + + +def get_tuning_parser(default_task="translation"): + parser = options.get_parser("Reranking tuning", default_task) + add_reranking_args(parser) + add_tuning_args(parser) + return parser + + +def add_reranking_args(parser): + group = parser.add_argument_group("Reranking") + # fmt: off + group.add_argument('--score-model1', '-s1', type=str, metavar='FILE', required=True, + help='path to first model or ensemble of models for rescoring') + group.add_argument('--score-model2', '-s2', type=str, metavar='FILE', required=False, + help='path to second model or ensemble of models for rescoring') + group.add_argument('--num-rescore', '-n', type=int, metavar='N', default=10, + help='the number of candidate hypothesis to rescore') + group.add_argument('-bz', '--batch-size', type=int, metavar='N', default=128, + help='batch size for generating the nbest list') + group.add_argument('--gen-subset', default='test', metavar='SET', choices=['test', 'train', 'valid'], + help='data subset to generate (train, valid, test)') + group.add_argument('--gen-model', default=None, metavar='FILE', + help='the model to generate translations') + group.add_argument('-b1', '--backwards1', action='store_true', + help='whether or not the first model group is backwards') + group.add_argument('-b2', '--backwards2', action='store_true', + help='whether or not the second model group is backwards') + group.add_argument('-a', '--weight1', default=1, nargs='+', type=float, + help='the weight(s) of the first model') + group.add_argument('-b', '--weight2', default=1, nargs='+', type=float, + help='the weight(s) of the second model, or the gen model if using nbest from interactive.py') + group.add_argument('-c', '--weight3', default=1, nargs='+', type=float, + help='the weight(s) of the third model') + + # lm arguments + group.add_argument('-lm', '--language-model', default=None, metavar='FILE', + help='language model for target language to rescore translations') + group.add_argument('--lm-dict', default=None, metavar='FILE', + help='the dict of the language model for the target language') + group.add_argument('--lm-name', default=None, + help='the name of the language model for the target language') + group.add_argument('--lm-bpe-code', default=None, metavar='FILE', + help='the bpe code for the language model for the target language') + group.add_argument('--data-dir-name', default=None, + help='name of data directory') + group.add_argument('--lenpen', default=1, nargs='+', type=float, + help='length penalty: <1.0 favors shorter, >1.0 favors longer sentences') + group.add_argument('--score-dict-dir', default=None, + help='the directory with dictionaries for the scoring models') + group.add_argument('--right-to-left1', action='store_true', + help='whether the first model group is a right to left model') + group.add_argument('--right-to-left2', action='store_true', + help='whether the second model group is a right to left model') + group.add_argument('--post-process', '--remove-bpe', default='@@ ', + help='the bpe symbol, used for the bitext and LM') + group.add_argument('--prefix-len', default=None, type=int, + help='the length of the target prefix to use in rescoring (in terms of words wo bpe)') + group.add_argument('--sampling', action='store_true', + help='use sampling instead of beam search for generating n best list') + group.add_argument('--diff-bpe', action='store_true', + help='bpe for rescoring and nbest list not the same') + group.add_argument('--rescore-bpe-code', default=None, + help='bpe code for rescoring models') + group.add_argument('--nbest-list', default=None, + help='use predefined nbest list in interactive.py format') + group.add_argument('--write-hypos', default=None, + help='filename prefix to write hypos to') + group.add_argument('--ref-translation', default=None, + help='reference translation to use with nbest list from interactive.py') + group.add_argument('--backwards-score-dict-dir', default=None, + help='the directory with dictionaries for the backwards model,' + 'if None then it is assumed the fw and backwards models share dictionaries') + + # extra scaling args + group.add_argument('--gen-model-name', default=None, + help='the name of the models that generated the nbest list') + group.add_argument('--model1-name', default=None, + help='the name of the set for model1 group ') + group.add_argument('--model2-name', default=None, + help='the name of the set for model2 group') + group.add_argument('--shard-id', default=0, type=int, + help='the id of the shard to generate') + group.add_argument('--num-shards', default=1, type=int, + help='the number of shards to generate across') + group.add_argument('--all-shards', action='store_true', + help='use all shards') + group.add_argument('--target-prefix-frac', default=None, type=float, + help='the fraction of the target prefix to use in rescoring (in terms of words wo bpe)') + group.add_argument('--source-prefix-frac', default=None, type=float, + help='the fraction of the source prefix to use in rescoring (in terms of words wo bpe)') + group.add_argument('--normalize', action='store_true', + help='whether to normalize by src and target len') + # fmt: on + return group + + +def add_tuning_args(parser): + group = parser.add_argument_group("Tuning") + + group.add_argument( + "--lower-bound", + default=[-0.7], + nargs="+", + type=float, + help="lower bound of search space", + ) + group.add_argument( + "--upper-bound", + default=[3], + nargs="+", + type=float, + help="upper bound of search space", + ) + group.add_argument( + "--tune-param", + default=["lenpen"], + nargs="+", + choices=["lenpen", "weight1", "weight2", "weight3"], + help="the parameter(s) to tune", + ) + group.add_argument( + "--tune-subset", + default="valid", + choices=["valid", "test", "train"], + help="the subset to tune on ", + ) + group.add_argument( + "--num-trials", + default=1000, + type=int, + help="number of trials to do for random search", + ) + group.add_argument( + "--share-weights", action="store_true", help="share weight2 and weight 3" + ) + return group diff --git a/fairseq/examples/noisychannel/rerank_score_bw.py b/fairseq/examples/noisychannel/rerank_score_bw.py new file mode 100644 index 0000000..b0bc913 --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_score_bw.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from contextlib import redirect_stdout + +from fairseq import options +from fairseq_cli import generate + +from examples.noisychannel import rerank_options, rerank_utils + + +def score_bw(args): + if args.backwards1: + scorer1_src = args.target_lang + scorer1_tgt = args.source_lang + else: + scorer1_src = args.source_lang + scorer1_tgt = args.target_lang + + if args.score_model2 is not None: + if args.backwards2: + scorer2_src = args.target_lang + scorer2_tgt = args.source_lang + else: + scorer2_src = args.source_lang + scorer2_tgt = args.target_lang + + rerank1_is_gen = ( + args.gen_model == args.score_model1 and args.source_prefix_frac is None + ) + rerank2_is_gen = ( + args.gen_model == args.score_model2 and args.source_prefix_frac is None + ) + + ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) = rerank_utils.get_directories( + args.data_dir_name, + args.num_rescore, + args.gen_subset, + args.gen_model_name, + args.shard_id, + args.num_shards, + args.sampling, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + + score1_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model1_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards1, + ) + + if args.score_model2 is not None: + score2_file = rerank_utils.rescore_file_name( + pre_gen, + args.prefix_len, + args.model2_name, + target_prefix_frac=args.target_prefix_frac, + source_prefix_frac=args.source_prefix_frac, + backwards=args.backwards2, + ) + + if args.right_to_left1: + rerank_data1 = right_to_left_preprocessed_dir + elif args.backwards1: + rerank_data1 = backwards_preprocessed_dir + else: + rerank_data1 = left_to_right_preprocessed_dir + + gen_param = ["--batch-size", str(128), "--score-reference", "--gen-subset", "train"] + if not rerank1_is_gen and not os.path.isfile(score1_file): + print("STEP 4: score the translations for model 1") + + model_param1 = [ + "--path", + args.score_model1, + "--source-lang", + scorer1_src, + "--target-lang", + scorer1_tgt, + ] + gen_model1_param = [rerank_data1] + gen_param + model_param1 + + gen_parser = options.get_generation_parser() + input_args = options.parse_args_and_arch(gen_parser, gen_model1_param) + + with open(score1_file, "w") as f: + with redirect_stdout(f): + generate.main(input_args) + + if ( + args.score_model2 is not None + and not os.path.isfile(score2_file) + and not rerank2_is_gen + ): + print("STEP 4: score the translations for model 2") + + if args.right_to_left2: + rerank_data2 = right_to_left_preprocessed_dir + elif args.backwards2: + rerank_data2 = backwards_preprocessed_dir + else: + rerank_data2 = left_to_right_preprocessed_dir + + model_param2 = [ + "--path", + args.score_model2, + "--source-lang", + scorer2_src, + "--target-lang", + scorer2_tgt, + ] + gen_model2_param = [rerank_data2] + gen_param + model_param2 + + gen_parser = options.get_generation_parser() + input_args = options.parse_args_and_arch(gen_parser, gen_model2_param) + + with open(score2_file, "w") as f: + with redirect_stdout(f): + generate.main(input_args) + + +def cli_main(): + parser = rerank_options.get_reranking_parser() + args = options.parse_args_and_arch(parser) + score_bw(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/noisychannel/rerank_score_lm.py b/fairseq/examples/noisychannel/rerank_score_lm.py new file mode 100644 index 0000000..e80948d --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_score_lm.py @@ -0,0 +1,81 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os + +from fairseq import options + +from examples.noisychannel import rerank_options, rerank_utils + + +def score_lm(args): + using_nbest = args.nbest_list is not None + ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) = rerank_utils.get_directories( + args.data_dir_name, + args.num_rescore, + args.gen_subset, + args.gen_model_name, + args.shard_id, + args.num_shards, + args.sampling, + args.prefix_len, + args.target_prefix_frac, + args.source_prefix_frac, + ) + + predictions_bpe_file = pre_gen + "/generate_output_bpe.txt" + if using_nbest: + print("Using predefined n-best list from interactive.py") + predictions_bpe_file = args.nbest_list + + gen_output = rerank_utils.BitextOutputFromGen( + predictions_bpe_file, bpe_symbol=args.post_process, nbest=using_nbest + ) + + if args.language_model is not None: + lm_score_file = rerank_utils.rescore_file_name( + pre_gen, args.prefix_len, args.lm_name, lm_file=True + ) + + if args.language_model is not None and not os.path.isfile(lm_score_file): + print("STEP 4.5: language modeling for P(T)") + if args.lm_bpe_code is None: + bpe_status = "no bpe" + elif args.lm_bpe_code == "shared": + bpe_status = "shared" + else: + bpe_status = "different" + + rerank_utils.lm_scoring( + lm_preprocessed_dir, + bpe_status, + gen_output, + pre_gen, + args.lm_dict, + args.lm_name, + args.language_model, + args.lm_bpe_code, + 128, + lm_score_file, + args.target_lang, + args.source_lang, + prefix_len=args.prefix_len, + ) + + +def cli_main(): + parser = rerank_options.get_reranking_parser() + args = options.parse_args_and_arch(parser) + score_lm(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/noisychannel/rerank_tune.py b/fairseq/examples/noisychannel/rerank_tune.py new file mode 100644 index 0000000..b2e8b75 --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_tune.py @@ -0,0 +1,102 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import random + +import numpy as np +from fairseq import options + +from examples.noisychannel import rerank, rerank_options + + +def random_search(args): + param_values = [] + tuneable_parameters = ["lenpen", "weight1", "weight2", "weight3"] + initial_params = [args.lenpen, args.weight1, args.weight2, args.weight3] + for i, elem in enumerate(initial_params): + if type(elem) is not list: + initial_params[i] = [elem] + else: + initial_params[i] = elem + + tune_parameters = args.tune_param.copy() + for i in range(len(args.tune_param)): + assert args.upper_bound[i] >= args.lower_bound[i] + index = tuneable_parameters.index(args.tune_param[i]) + del tuneable_parameters[index] + del initial_params[index] + + tune_parameters += tuneable_parameters + param_values += initial_params + random.seed(args.seed) + + random_params = np.array( + [ + [ + random.uniform(args.lower_bound[i], args.upper_bound[i]) + for i in range(len(args.tune_param)) + ] + for k in range(args.num_trials) + ] + ) + set_params = np.array( + [ + [initial_params[i][0] for i in range(len(tuneable_parameters))] + for k in range(args.num_trials) + ] + ) + random_params = np.concatenate((random_params, set_params), 1) + + rerank_args = vars(args).copy() + if args.nbest_list: + rerank_args["gen_subset"] = "test" + else: + rerank_args["gen_subset"] = args.tune_subset + + for k in range(len(tune_parameters)): + rerank_args[tune_parameters[k]] = list(random_params[:, k]) + + if args.share_weights: + k = tune_parameters.index("weight2") + rerank_args["weight3"] = list(random_params[:, k]) + + rerank_args = argparse.Namespace(**rerank_args) + best_lenpen, best_weight1, best_weight2, best_weight3, best_score = rerank.rerank( + rerank_args + ) + rerank_args = vars(args).copy() + rerank_args["lenpen"] = [best_lenpen] + rerank_args["weight1"] = [best_weight1] + rerank_args["weight2"] = [best_weight2] + rerank_args["weight3"] = [best_weight3] + + # write the hypothesis from the valid set from the best trial + + if args.gen_subset != "valid": + rerank_args["gen_subset"] = "valid" + rerank_args = argparse.Namespace(**rerank_args) + rerank.rerank(rerank_args) + + # test with the best hyperparameters on gen subset + rerank_args = vars(args).copy() + rerank_args["gen_subset"] = args.gen_subset + rerank_args["lenpen"] = [best_lenpen] + rerank_args["weight1"] = [best_weight1] + rerank_args["weight2"] = [best_weight2] + rerank_args["weight3"] = [best_weight3] + rerank_args = argparse.Namespace(**rerank_args) + rerank.rerank(rerank_args) + + +def cli_main(): + parser = rerank_options.get_tuning_parser() + args = options.parse_args_and_arch(parser) + + random_search(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/noisychannel/rerank_utils.py b/fairseq/examples/noisychannel/rerank_utils.py new file mode 100644 index 0000000..2c6bf1b --- /dev/null +++ b/fairseq/examples/noisychannel/rerank_utils.py @@ -0,0 +1,850 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import os +import re +import subprocess +from contextlib import redirect_stdout + +from fairseq import options +from fairseq_cli import eval_lm, preprocess + + +def reprocess(fle): + # takes in a file of generate.py translation generate_output + # returns a source dict and hypothesis dict, where keys are the ID num (as a string) + # and values and the corresponding source and translation. There may be several translations + # per source, so the values for hypothesis_dict are lists. + # parses output of generate.py + + with open(fle, "r") as f: + txt = f.read() + + """reprocess generate.py output""" + p = re.compile(r"[STHP][-]\d+\s*") + hp = re.compile(r"(\s*[-]?\d+[.]?\d+\s*)|(\s*(-inf)\s*)") + source_dict = {} + hypothesis_dict = {} + score_dict = {} + target_dict = {} + pos_score_dict = {} + lines = txt.split("\n") + + for line in lines: + line += "\n" + prefix = re.search(p, line) + if prefix is not None: + assert len(prefix.group()) > 2, "prefix id not found" + _, j = prefix.span() + id_num = prefix.group()[2:] + id_num = int(id_num) + line_type = prefix.group()[0] + if line_type == "H": + h_txt = line[j:] + hypo = re.search(hp, h_txt) + assert ( + hypo is not None + ), "regular expression failed to find the hypothesis scoring" + _, i = hypo.span() + score = hypo.group() + if id_num in hypothesis_dict: + hypothesis_dict[id_num].append(h_txt[i:]) + score_dict[id_num].append(float(score)) + else: + hypothesis_dict[id_num] = [h_txt[i:]] + score_dict[id_num] = [float(score)] + + elif line_type == "S": + source_dict[id_num] = line[j:] + elif line_type == "T": + target_dict[id_num] = line[j:] + elif line_type == "P": + pos_scores = (line[j:]).split() + pos_scores = [float(x) for x in pos_scores] + if id_num in pos_score_dict: + pos_score_dict[id_num].append(pos_scores) + else: + pos_score_dict[id_num] = [pos_scores] + + return source_dict, hypothesis_dict, score_dict, target_dict, pos_score_dict + + +def reprocess_nbest(fle): + """reprocess interactive.py output""" + with open(fle, "r") as f: + txt = f.read() + + source_dict = {} + hypothesis_dict = {} + score_dict = {} + target_dict = {} + pos_score_dict = {} + lines = txt.split("\n") + + hp = re.compile(r"[-]?\d+[.]?\d+") + j = -1 + + for _i, line in enumerate(lines): + line += "\n" + line_type = line[0] + + if line_type == "H": + hypo = re.search(hp, line) + _, start_index = hypo.span() + score = hypo.group() + if j in score_dict: + score_dict[j].append(float(score)) + hypothesis_dict[j].append(line[start_index:].strip("\t")) + else: + score_dict[j] = [float(score)] + hypothesis_dict[j] = [line[start_index:].strip("\t")] + elif line_type == "O": + j += 1 + source_dict[j] = line[2:] + # we don't have the targets for interactive.py + target_dict[j] = "filler" + + elif line_type == "P": + pos_scores = [float(pos_score) for pos_score in line.split()[1:]] + if j in pos_score_dict: + pos_score_dict[j].append(pos_scores) + else: + pos_score_dict[j] = [pos_scores] + + assert source_dict.keys() == hypothesis_dict.keys() + assert source_dict.keys() == pos_score_dict.keys() + assert source_dict.keys() == score_dict.keys() + + return source_dict, hypothesis_dict, score_dict, target_dict, pos_score_dict + + +def write_reprocessed( + sources, + hypos, + targets, + source_outfile, + hypo_outfile, + target_outfile, + right_to_left=False, + prefix_len=None, + bpe_symbol=None, + target_prefix_frac=None, + source_prefix_frac=None, +): + + """writes nbest hypothesis for rescoring""" + assert not ( + prefix_len is not None and target_prefix_frac is not None + ), "in writing reprocessed, only one type of prefix may be used" + assert not ( + prefix_len is not None and source_prefix_frac is not None + ), "in writing reprocessed, only one type of prefix may be used" + assert not ( + target_prefix_frac is not None and source_prefix_frac is not None + ), "in writing reprocessed, only one type of prefix may be used" + + with open(source_outfile, "w") as source_file, open( + hypo_outfile, "w" + ) as hypo_file, open(target_outfile, "w") as target_file: + + assert len(sources) == len(hypos), "sources and hypos list length mismatch" + if right_to_left: + for i in range(len(sources)): + for j in range(len(hypos[i])): + if prefix_len is None: + hypo_file.write(make_right_to_left(hypos[i][j]) + "\n") + else: + raise NotImplementedError() + source_file.write(make_right_to_left(sources[i]) + "\n") + target_file.write(make_right_to_left(targets[i]) + "\n") + else: + for i in sorted(sources.keys()): + for j in range(len(hypos[i])): + if prefix_len is not None: + shortened = ( + get_prefix_no_bpe(hypos[i][j], bpe_symbol, prefix_len) + + "\n" + ) + hypo_file.write(shortened) + source_file.write(sources[i]) + target_file.write(targets[i]) + elif target_prefix_frac is not None: + num_words, shortened, num_bpe_tokens = calc_length_from_frac( + hypos[i][j], target_prefix_frac, bpe_symbol + ) + shortened += "\n" + hypo_file.write(shortened) + source_file.write(sources[i]) + target_file.write(targets[i]) + elif source_prefix_frac is not None: + num_words, shortened, num_bpe_tokensn = calc_length_from_frac( + sources[i], source_prefix_frac, bpe_symbol + ) + shortened += "\n" + hypo_file.write(hypos[i][j]) + source_file.write(shortened) + target_file.write(targets[i]) + else: + hypo_file.write(hypos[i][j]) + source_file.write(sources[i]) + target_file.write(targets[i]) + + +def calc_length_from_frac(bpe_sentence, prefix_frac, bpe_symbol): + # return number of words, (not bpe tokens) that we want + no_bpe_sen = remove_bpe(bpe_sentence, bpe_symbol) + len_sen = len(no_bpe_sen.split()) + + num_words = math.ceil(len_sen * prefix_frac) + prefix = get_prefix_no_bpe(bpe_sentence, bpe_symbol, num_words) + num_bpe_tokens = len(prefix.split()) + return num_words, prefix, num_bpe_tokens + + +def get_prefix(sentence, prefix_len): + """assuming no bpe, gets the prefix of the sentence with prefix_len words""" + tokens = sentence.strip("\n").split() + if prefix_len >= len(tokens): + return sentence.strip("\n") + else: + return " ".join(tokens[:prefix_len]) + + +def get_prefix_no_bpe(sentence, bpe_symbol, prefix_len): + if bpe_symbol is None: + return get_prefix(sentence, prefix_len) + else: + return " ".join(get_prefix_from_len(sentence.split(), bpe_symbol, prefix_len)) + + +def get_prefix_from_len(sentence, bpe_symbol, prefix_len): + """get the prefix of sentence with bpe, with prefix len in terms of words, not bpe tokens""" + bpe_count = sum([bpe_symbol.strip(" ") in t for t in sentence[:prefix_len]]) + if bpe_count == 0: + return sentence[:prefix_len] + else: + return sentence[:prefix_len] + get_prefix_from_len( + sentence[prefix_len:], bpe_symbol, bpe_count + ) + + +def get_num_bpe_tokens_from_len(sentence, bpe_symbol, prefix_len): + """given a prefix length in terms of words, return the number of bpe tokens""" + prefix = get_prefix_no_bpe(sentence, bpe_symbol, prefix_len) + assert len(remove_bpe(prefix, bpe_symbol).split()) <= prefix_len + return len(prefix.split(" ")) + + +def make_right_to_left(line): + tokens = line.split() + tokens.reverse() + new_line = " ".join(tokens) + return new_line + + +def remove_bpe(line, bpe_symbol): + line = line.replace("\n", "") + line = (line + " ").replace(bpe_symbol, "").rstrip() + return line + ("\n") + + +def remove_bpe_dict(pred_dict, bpe_symbol): + new_dict = {} + for i in pred_dict: + if type(pred_dict[i]) == list: + new_list = [remove_bpe(elem, bpe_symbol) for elem in pred_dict[i]] + new_dict[i] = new_list + else: + new_dict[i] = remove_bpe(pred_dict[i], bpe_symbol) + return new_dict + + +def parse_bleu_scoring(line): + p = re.compile(r"(BLEU4 = )\d+[.]\d+") + res = re.search(p, line) + assert res is not None, line + return float(res.group()[8:]) + + +def get_full_from_prefix(hypo_prefix, hypos): + """given a hypo prefix, recover the first hypo from the list of complete hypos beginning with that prefix""" + for hypo in hypos: + hypo_prefix = hypo_prefix.strip("\n") + len_prefix = len(hypo_prefix) + if hypo[:len_prefix] == hypo_prefix: + return hypo + # no match found + raise Exception() + + +def get_score( + a, + b, + c, + target_len, + bitext_score1, + bitext_score2=None, + lm_score=None, + lenpen=None, + src_len=None, + tgt_len=None, + bitext1_backwards=False, + bitext2_backwards=False, + normalize=False, +): + if bitext1_backwards: + bitext1_norm = src_len + else: + bitext1_norm = tgt_len + if bitext_score2 is not None: + if bitext2_backwards: + bitext2_norm = src_len + else: + bitext2_norm = tgt_len + else: + bitext2_norm = 1 + bitext_score2 = 0 + if normalize: + score = ( + a * bitext_score1 / bitext1_norm + + b * bitext_score2 / bitext2_norm + + c * lm_score / src_len + ) + else: + score = a * bitext_score1 + b * bitext_score2 + c * lm_score + + if lenpen is not None: + score /= (target_len) ** float(lenpen) + + return score + + +class BitextOutput(object): + def __init__( + self, + output_file, + backwards, + right_to_left, + bpe_symbol, + prefix_len=None, + target_prefix_frac=None, + source_prefix_frac=None, + ): + """process output from rescoring""" + source, hypo, score, target, pos_score = reprocess(output_file) + if backwards: + self.hypo_fracs = source_prefix_frac + else: + self.hypo_fracs = target_prefix_frac + + # remove length penalty so we can use raw scores + score, num_bpe_tokens = get_score_from_pos( + pos_score, prefix_len, hypo, bpe_symbol, self.hypo_fracs, backwards + ) + source_lengths = {} + target_lengths = {} + + assert hypo.keys() == source.keys(), "key mismatch" + if backwards: + tmp = hypo + hypo = source + source = tmp + for i in source: + # since we are reranking, there should only be one hypo per source sentence + if backwards: + len_src = len(source[i][0].split()) + # record length without <eos> + if len_src == num_bpe_tokens[i][0] - 1: + source_lengths[i] = num_bpe_tokens[i][0] - 1 + else: + source_lengths[i] = num_bpe_tokens[i][0] + + target_lengths[i] = len(hypo[i].split()) + + source[i] = remove_bpe(source[i][0], bpe_symbol) + target[i] = remove_bpe(target[i], bpe_symbol) + hypo[i] = remove_bpe(hypo[i], bpe_symbol) + + score[i] = float(score[i][0]) + pos_score[i] = pos_score[i][0] + + else: + len_tgt = len(hypo[i][0].split()) + # record length without <eos> + if len_tgt == num_bpe_tokens[i][0] - 1: + target_lengths[i] = num_bpe_tokens[i][0] - 1 + else: + target_lengths[i] = num_bpe_tokens[i][0] + + source_lengths[i] = len(source[i].split()) + + if right_to_left: + source[i] = remove_bpe(make_right_to_left(source[i]), bpe_symbol) + target[i] = remove_bpe(make_right_to_left(target[i]), bpe_symbol) + hypo[i] = remove_bpe(make_right_to_left(hypo[i][0]), bpe_symbol) + score[i] = float(score[i][0]) + pos_score[i] = pos_score[i][0] + else: + assert ( + len(hypo[i]) == 1 + ), "expected only one hypothesis per source sentence" + source[i] = remove_bpe(source[i], bpe_symbol) + target[i] = remove_bpe(target[i], bpe_symbol) + hypo[i] = remove_bpe(hypo[i][0], bpe_symbol) + score[i] = float(score[i][0]) + pos_score[i] = pos_score[i][0] + + self.rescore_source = source + self.rescore_hypo = hypo + self.rescore_score = score + self.rescore_target = target + self.rescore_pos_score = pos_score + self.backwards = backwards + self.right_to_left = right_to_left + self.target_lengths = target_lengths + self.source_lengths = source_lengths + + +class BitextOutputFromGen(object): + def __init__( + self, + predictions_bpe_file, + bpe_symbol=None, + nbest=False, + prefix_len=None, + target_prefix_frac=None, + ): + if nbest: + ( + pred_source, + pred_hypo, + pred_score, + pred_target, + pred_pos_score, + ) = reprocess_nbest(predictions_bpe_file) + else: + pred_source, pred_hypo, pred_score, pred_target, pred_pos_score = reprocess( + predictions_bpe_file + ) + + assert len(pred_source) == len(pred_hypo) + assert len(pred_source) == len(pred_score) + assert len(pred_source) == len(pred_target) + assert len(pred_source) == len(pred_pos_score) + + # remove length penalty so we can use raw scores + pred_score, num_bpe_tokens = get_score_from_pos( + pred_pos_score, prefix_len, pred_hypo, bpe_symbol, target_prefix_frac, False + ) + + self.source = pred_source + self.target = pred_target + self.score = pred_score + self.pos_score = pred_pos_score + self.hypo = pred_hypo + self.target_lengths = {} + self.source_lengths = {} + + self.no_bpe_source = remove_bpe_dict(pred_source.copy(), bpe_symbol) + self.no_bpe_hypo = remove_bpe_dict(pred_hypo.copy(), bpe_symbol) + self.no_bpe_target = remove_bpe_dict(pred_target.copy(), bpe_symbol) + + # indexes to match those from the rescoring models + self.rescore_source = {} + self.rescore_target = {} + self.rescore_pos_score = {} + self.rescore_hypo = {} + self.rescore_score = {} + self.num_hypos = {} + self.backwards = False + self.right_to_left = False + + index = 0 + + for i in sorted(pred_source.keys()): + for j in range(len(pred_hypo[i])): + + self.target_lengths[index] = len(self.hypo[i][j].split()) + self.source_lengths[index] = len(self.source[i].split()) + + self.rescore_source[index] = self.no_bpe_source[i] + self.rescore_target[index] = self.no_bpe_target[i] + self.rescore_hypo[index] = self.no_bpe_hypo[i][j] + self.rescore_score[index] = float(pred_score[i][j]) + self.rescore_pos_score[index] = pred_pos_score[i][j] + self.num_hypos[index] = len(pred_hypo[i]) + index += 1 + + +def get_score_from_pos( + pos_score_dict, prefix_len, hypo_dict, bpe_symbol, hypo_frac, backwards +): + score_dict = {} + num_bpe_tokens_dict = {} + assert prefix_len is None or hypo_frac is None + for key in pos_score_dict: + score_dict[key] = [] + num_bpe_tokens_dict[key] = [] + for i in range(len(pos_score_dict[key])): + if prefix_len is not None and not backwards: + num_bpe_tokens = get_num_bpe_tokens_from_len( + hypo_dict[key][i], bpe_symbol, prefix_len + ) + score_dict[key].append(sum(pos_score_dict[key][i][:num_bpe_tokens])) + num_bpe_tokens_dict[key].append(num_bpe_tokens) + elif hypo_frac is not None: + num_words, shortened, hypo_prefix_len = calc_length_from_frac( + hypo_dict[key][i], hypo_frac, bpe_symbol + ) + score_dict[key].append(sum(pos_score_dict[key][i][:hypo_prefix_len])) + num_bpe_tokens_dict[key].append(hypo_prefix_len) + else: + score_dict[key].append(sum(pos_score_dict[key][i])) + num_bpe_tokens_dict[key].append(len(pos_score_dict[key][i])) + return score_dict, num_bpe_tokens_dict + + +class LMOutput(object): + def __init__( + self, + lm_score_file, + lm_dict=None, + prefix_len=None, + bpe_symbol=None, + target_prefix_frac=None, + ): + ( + lm_sentences, + lm_sen_scores, + lm_sen_pos_scores, + lm_no_bpe_sentences, + lm_bpe_tokens, + ) = parse_lm( + lm_score_file, + prefix_len=prefix_len, + bpe_symbol=bpe_symbol, + target_prefix_frac=target_prefix_frac, + ) + + self.sentences = lm_sentences + self.score = lm_sen_scores + self.pos_score = lm_sen_pos_scores + self.lm_dict = lm_dict + self.no_bpe_sentences = lm_no_bpe_sentences + self.bpe_tokens = lm_bpe_tokens + + +def parse_lm(input_file, prefix_len=None, bpe_symbol=None, target_prefix_frac=None): + """parse output of eval_lm""" + with open(input_file, "r") as f: + text = f.readlines() + text = text[7:] + cleaned_text = text[:-2] + + sentences = {} + sen_scores = {} + sen_pos_scores = {} + no_bpe_sentences = {} + num_bpe_tokens_dict = {} + for _i, line in enumerate(cleaned_text): + tokens = line.split() + if tokens[0].isdigit(): + line_id = int(tokens[0]) + scores = [float(x[1:-1]) for x in tokens[2::2]] + sentences[line_id] = " ".join(tokens[1::2][:-1]) + "\n" + if bpe_symbol is not None: + # exclude <eos> symbol to match output from generate.py + bpe_sen = " ".join(tokens[1::2][:-1]) + "\n" + no_bpe_sen = remove_bpe(bpe_sen, bpe_symbol) + no_bpe_sentences[line_id] = no_bpe_sen + + if prefix_len is not None: + num_bpe_tokens = get_num_bpe_tokens_from_len( + bpe_sen, bpe_symbol, prefix_len + ) + sen_scores[line_id] = sum(scores[:num_bpe_tokens]) + num_bpe_tokens_dict[line_id] = num_bpe_tokens + elif target_prefix_frac is not None: + num_words, shortened, target_prefix_len = calc_length_from_frac( + bpe_sen, target_prefix_frac, bpe_symbol + ) + sen_scores[line_id] = sum(scores[:target_prefix_len]) + num_bpe_tokens_dict[line_id] = target_prefix_len + else: + sen_scores[line_id] = sum(scores) + num_bpe_tokens_dict[line_id] = len(scores) + + sen_pos_scores[line_id] = scores + + return sentences, sen_scores, sen_pos_scores, no_bpe_sentences, num_bpe_tokens_dict + + +def get_directories( + data_dir_name, + num_rescore, + gen_subset, + fw_name, + shard_id, + num_shards, + sampling=False, + prefix_len=None, + target_prefix_frac=None, + source_prefix_frac=None, +): + nbest_file_id = ( + "nbest_" + + str(num_rescore) + + "_subset_" + + gen_subset + + "_fw_name_" + + fw_name + + "_shard_" + + str(shard_id) + + "_of_" + + str(num_shards) + ) + + if sampling: + nbest_file_id += "_sampling" + + # the directory containing all information for this nbest list + pre_gen = ( + os.path.join(os.path.dirname(__file__)) + + "/rerank_data/" + + data_dir_name + + "/" + + nbest_file_id + ) + # the directory to store the preprocessed nbest list, for left to right rescoring + left_to_right_preprocessed_dir = pre_gen + "/left_to_right_preprocessed" + if source_prefix_frac is not None: + left_to_right_preprocessed_dir = ( + left_to_right_preprocessed_dir + "/prefix_frac" + str(source_prefix_frac) + ) + # the directory to store the preprocessed nbest list, for right to left rescoring + right_to_left_preprocessed_dir = pre_gen + "/right_to_left_preprocessed" + # the directory to store the preprocessed nbest list, for backwards rescoring + backwards_preprocessed_dir = pre_gen + "/backwards" + if target_prefix_frac is not None: + backwards_preprocessed_dir = ( + backwards_preprocessed_dir + "/prefix_frac" + str(target_prefix_frac) + ) + elif prefix_len is not None: + backwards_preprocessed_dir = ( + backwards_preprocessed_dir + "/prefix_" + str(prefix_len) + ) + + # the directory to store the preprocessed nbest list, for rescoring with P(T) + lm_preprocessed_dir = pre_gen + "/lm_preprocessed" + + return ( + pre_gen, + left_to_right_preprocessed_dir, + right_to_left_preprocessed_dir, + backwards_preprocessed_dir, + lm_preprocessed_dir, + ) + + +def lm_scoring( + preprocess_directory, + bpe_status, + gen_output, + pre_gen, + cur_lm_dict, + cur_lm_name, + cur_language_model, + cur_lm_bpe_code, + batch_size, + lm_score_file, + target_lang, + source_lang, + prefix_len=None, +): + if prefix_len is not None: + assert ( + bpe_status == "different" + ), "bpe status must be different to use prefix len" + if bpe_status == "no bpe": + # run lm on output without bpe + write_reprocessed( + gen_output.no_bpe_source, + gen_output.no_bpe_hypo, + gen_output.no_bpe_target, + pre_gen + "/rescore_data_no_bpe.de", + pre_gen + "/rescore_data_no_bpe.en", + pre_gen + "/reference_file_no_bpe", + ) + + preprocess_lm_param = [ + "--only-source", + "--trainpref", + pre_gen + "/rescore_data_no_bpe." + target_lang, + "--srcdict", + cur_lm_dict, + "--destdir", + preprocess_directory, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(preprocess_lm_param) + preprocess.main(input_args) + + eval_lm_param = [ + preprocess_directory, + "--path", + cur_language_model, + "--output-word-probs", + "--batch-size", + str(batch_size), + "--max-tokens", + "1024", + "--sample-break-mode", + "eos", + "--gen-subset", + "train", + ] + + eval_lm_parser = options.get_eval_lm_parser() + input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param) + + with open(lm_score_file, "w") as f: + with redirect_stdout(f): + eval_lm.main(input_args) + + elif bpe_status == "shared": + preprocess_lm_param = [ + "--only-source", + "--trainpref", + pre_gen + "/rescore_data." + target_lang, + "--srcdict", + cur_lm_dict, + "--destdir", + preprocess_directory, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(preprocess_lm_param) + preprocess.main(input_args) + + eval_lm_param = [ + preprocess_directory, + "--path", + cur_language_model, + "--output-word-probs", + "--batch-size", + str(batch_size), + "--sample-break-mode", + "eos", + "--gen-subset", + "train", + ] + + eval_lm_parser = options.get_eval_lm_parser() + input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param) + + with open(lm_score_file, "w") as f: + with redirect_stdout(f): + eval_lm.main(input_args) + + elif bpe_status == "different": + rescore_file = pre_gen + "/rescore_data_no_bpe" + rescore_bpe = pre_gen + "/rescore_data_new_bpe" + + rescore_file += "." + rescore_bpe += "." + + write_reprocessed( + gen_output.no_bpe_source, + gen_output.no_bpe_hypo, + gen_output.no_bpe_target, + rescore_file + source_lang, + rescore_file + target_lang, + pre_gen + "/reference_file_no_bpe", + bpe_symbol=None, + ) + + # apply LM bpe to nbest list + bpe_src_param = [ + "-c", + cur_lm_bpe_code, + "--input", + rescore_file + target_lang, + "--output", + rescore_bpe + target_lang, + ] + subprocess.call( + [ + "python", + os.path.join( + os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py" + ), + ] + + bpe_src_param, + shell=False, + ) + # uncomment to use fastbpe instead of subword-nmt bpe + # bpe_src_param = [rescore_bpe+target_lang, rescore_file+target_lang, cur_lm_bpe_code] + # subprocess.call(["/private/home/edunov/fastBPE/fast", "applybpe"] + bpe_src_param, shell=False) + + preprocess_dir = preprocess_directory + + preprocess_lm_param = [ + "--only-source", + "--trainpref", + rescore_bpe + target_lang, + "--srcdict", + cur_lm_dict, + "--destdir", + preprocess_dir, + ] + preprocess_parser = options.get_preprocessing_parser() + input_args = preprocess_parser.parse_args(preprocess_lm_param) + preprocess.main(input_args) + + eval_lm_param = [ + preprocess_dir, + "--path", + cur_language_model, + "--output-word-probs", + "--batch-size", + str(batch_size), + "--max-tokens", + "1024", + "--sample-break-mode", + "eos", + "--gen-subset", + "train", + ] + + eval_lm_parser = options.get_eval_lm_parser() + input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param) + + with open(lm_score_file, "w") as f: + with redirect_stdout(f): + eval_lm.main(input_args) + + +def rescore_file_name( + nbest_dir, + prefix_len, + scorer_name, + lm_file=False, + target_prefix_frac=None, + source_prefix_frac=None, + backwards=None, +): + if lm_file: + score_file = nbest_dir + "/lm_score_translations_model_" + scorer_name + ".txt" + else: + score_file = nbest_dir + "/" + scorer_name + "_score_translations.txt" + if backwards: + if prefix_len is not None: + score_file += "prefix_len" + str(prefix_len) + elif target_prefix_frac is not None: + score_file += "target_prefix_frac" + str(target_prefix_frac) + else: + if source_prefix_frac is not None: + score_file += "source_prefix_frac" + str(source_prefix_frac) + return score_file diff --git a/fairseq/examples/nonautoregressive_translation/README.md b/fairseq/examples/nonautoregressive_translation/README.md new file mode 100644 index 0000000..8793e22 --- /dev/null +++ b/fairseq/examples/nonautoregressive_translation/README.md @@ -0,0 +1,146 @@ +# Non-autoregressive Neural Machine Translation (NAT) + +This page mainly includes instructions for reproducing results from the following papers +* [Levenshtein Transformer (Gu et al., 2019)](https://arxiv.org/abs/1905.11006). +* [Understanding Knowledge Distillation in Non-autoregressive Machine Translation (Zhou et al., 2019)](https://arxiv.org/abs/1911.02727). + +We also provided our own implementations for several popular non-autoregressive-based models as reference:<br> +* [Non-Autoregressive Neural Machine Translation (Gu et al., 2017)](https://arxiv.org/abs/1711.02281)<br> +* [Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement (Lee et al., 2018)](https://arxiv.org/abs/1802.06901)<br> +* [Insertion Transformer: Flexible Sequence Generation via Insertion Operations (Stern et al., 2019)](https://arxiv.org/abs/1902.03249)<br> +* [Mask-Predict: Parallel Decoding of Conditional Masked Language Models (Ghazvininejad et al., 2019)](https://arxiv.org/abs/1904.09324v2)<br> +* [Fast Structured Decoding for Sequence Models (Sun et al., 2019)](https://arxiv.org/abs/1910.11555) + +## Dataset + +First, follow the [instructions to download and preprocess the WMT'14 En-De dataset](../translation#wmt14-english-to-german-convolutional). +Make sure to learn a joint vocabulary by passing the `--joined-dictionary` option to `fairseq-preprocess`. + +### Knowledge Distillation +Following [Gu et al. 2019](https://arxiv.org/abs/1905.11006), [knowledge distillation](https://arxiv.org/abs/1606.07947) from an autoregressive model can effectively simplify the training data distribution, which is sometimes essential for NAT-based models to learn good translations. +The easiest way of performing distillation is to follow the [instructions of training a standard transformer model](../translation) on the same data, and then decode the training set to produce a distillation dataset for NAT. + +### Download +We also provided the preprocessed [original](http://dl.fbaipublicfiles.com/nat/original_dataset.zip) and [distillation](http://dl.fbaipublicfiles.com/nat/distill_dataset.zip) datasets. Please build the binarized dataset on your own. + + +## Train a model + +Then we can train a nonautoregressive model using the `translation_lev` task and a new criterion `nat_loss`. +Use the `--noise` flag to specify the input noise used on the target sentences. +In default, we run the task for *Levenshtein Transformer*, with `--noise='random_delete'`. Full scripts to run other models can also be found [here](./scripts.md). + +The following command will train a *Levenshtein Transformer* on the binarized dataset. + +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch levenshtein_transformer \ + --noise random_delete \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + +## Translate + +Once a model is trained, we can generate translations using an `iterative_refinement_generator` which will based on the model's initial output and iteratively read and greedily refine the translation until (1) the model predicts the same translations for two consecutive iterations; or (2) the generator reaches the maximum iterations (`--iter-decode-max-iter`). Use `--print-step` to check the actual # of iteration for each sentence. + +For *Levenshtein Transformer*, it sometimes helps to apply a `--iter-decode-eos-penalty` (typically, 0~3) to penalize the model finishing generation too early and generating too short translations. + +For example, to generate with `--iter-decode-max-iter=9`: +```bash +fairseq-generate \ + data-bin/wmt14_en_de_distill \ + --gen-subset test \ + --task translation_lev \ + --path checkpoints/checkpoint_best.pt \ + --iter-decode-max-iter 9 \ + --iter-decode-eos-penalty 0 \ + --beam 1 --remove-bpe \ + --print-step \ + --batch-size 400 +``` +In the end of the generation, we can see the tokenized BLEU score for the translation. + +## Advanced Decoding Methods +### Ensemble +The NAT models use special implementations of [ensembling](https://github.com/fairinternal/fairseq-py/blob/b98d88da52f2f21f1b169bab8c70c1c4ca19a768/fairseq/sequence_generator.py#L522) to support iterative refinement and a variety of parallel operations in different models, while it shares the same API as standard autoregressive models as follows: +```bash +fairseq-generate \ + data-bin/wmt14_en_de_distill \ + --gen-subset test \ + --task translation_lev \ + --path checkpoint_1.pt:checkpoint_2.pt:checkpoint_3.pt \ + --iter-decode-max-iter 9 \ + --iter-decode-eos-penalty 0 \ + --beam 1 --remove-bpe \ + --print-step \ + --batch-size 400 +``` +We use ``:`` to split multiple models. Note that, not all NAT models support ensembling for now. + + +### Length-beam +For models that predict lengths before decoding (e.g. the vanilla NAT, Mask-Predict, etc), it is possible to improve the translation quality by varying the target lengths around the predicted value, and translating the same example multiple times in parallel. We can select the best translation with the highest scores defined by your model's output. + +Note that, not all models support length beams. For models which dynamically change the lengths (e.g. *Insertion Transformer*, *Levenshtein Transformer*), the same trick does not apply. + +### Re-ranking +If the model generates multiple translations with length beam, we can also introduce an autoregressive model to rerank the translations considering scoring from an autoregressive model is much faster than decoding from that. + +For example, to generate translations with length beam and reranking, +```bash +fairseq-generate \ + data-bin/wmt14_en_de_distill \ + --gen-subset test \ + --task translation_lev \ + --path checkpoints/checkpoint_best.pt:at_checkpoints/checkpoint_best.pt \ + --iter-decode-max-iter 9 \ + --iter-decode-eos-penalty 0 \ + --iter-decode-with-beam 9 \ + --iter-decode-with-external-reranker \ + --beam 1 --remove-bpe \ + --print-step \ + --batch-size 100 +``` +Note that we need to make sure the autoregressive model shares the same vocabulary as our target non-autoregressive model. + + +## Citation + +```bibtex +@incollection{NIPS2019_9297, + title = {Levenshtein Transformer}, + author = {Gu, Jiatao and Wang, Changhan and Zhao, Junbo}, + booktitle = {Advances in Neural Information Processing Systems 32}, + editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, + pages = {11179--11189}, + year = {2019}, + publisher = {Curran Associates, Inc.}, + url = {http://papers.nips.cc/paper/9297-levenshtein-transformer.pdf} +} +``` +```bibtex +@article{zhou2019understanding, + title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation}, + author={Zhou, Chunting and Neubig, Graham and Gu, Jiatao}, + journal={arXiv preprint arXiv:1911.02727}, + year={2019} +} +``` diff --git a/fairseq/examples/nonautoregressive_translation/scripts.md b/fairseq/examples/nonautoregressive_translation/scripts.md new file mode 100644 index 0000000..9d3d7b6 --- /dev/null +++ b/fairseq/examples/nonautoregressive_translation/scripts.md @@ -0,0 +1,179 @@ +# Examples of Training scripts for Non-autoregressive Machine Translation models + +### Non-autoregressive Transformer (NAT, Gu et al., 2017) +Note that we need to have an additional module to perform "length prediction" (`--length-loss-factor`) before generating the whole sequence. +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch nonautoregressive_transformer \ + --noise full_mask \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --pred-length-offset \ + --length-loss-factor 0.1 \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + +### Fast Structured Decoding for Sequence Models (NAT-CRF, Sun et al., 2019) +Note that we implemented a low-rank appromixated CRF model by setting `--crf-lowrank-approx=32` and `--crf-beam-approx=64` as discribed in the original paper. All other settings are the same as the vanilla NAT model. +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch nacrf_transformer \ + --noise full_mask \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --pred-length-offset \ + --length-loss-factor 0.1 \ + --word-ins-loss-factor 0.5 \ + --crf-lowrank-approx 32 \ + --crf-beam-approx 64 \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + + +### Non-autoregressive Transformer with Iterative Refinement (iNAT, Lee et al., 2018) +Note that `--train-step` means how many iterations of refinement we used during training, and `--dae-ratio` controls the ratio of denoising auto-encoder training described in the original paper. +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch iterative_nonautoregressive_transformer \ + --noise full_mask \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --pred-length-offset \ + --length-loss-factor 0.1 \ + --train-step 4 \ + --dae-ratio 0.5 \ + --stochastic-approx \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + +### Insertion Transformer (InsT, Stern et al., 2019) +Note that we need to specify the "slot-loss" (uniform or balanced tree) described in the original paper. Here we use `--label-tau` to control the temperature. + +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch insertion_transformer \ + --noise random_delete \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + + +### Mask Predict (CMLM, Ghazvininejad et al., 2019) +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch cmlm_transformer \ + --noise random_mask \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` + + + + +### Levenshtein Transformer (LevT, Gu et al., 2019) +```bash +fairseq-train \ + data-bin/wmt14_en_de_distill \ + --save-dir checkpoints \ + --ddp-backend=legacy_ddp \ + --task translation_lev \ + --criterion nat_loss \ + --arch levenshtein_transformer \ + --noise random_delete \ + --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9,0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --stop-min-lr '1e-09' --warmup-updates 10000 \ + --warmup-init-lr '1e-07' --label-smoothing 0.1 \ + --dropout 0.3 --weight-decay 0.01 \ + --decoder-learned-pos \ + --encoder-learned-pos \ + --apply-bert-init \ + --log-format 'simple' --log-interval 100 \ + --fixed-validation-seed 7 \ + --max-tokens 8000 \ + --save-interval-updates 10000 \ + --max-update 300000 +``` diff --git a/fairseq/examples/normformer/README.md b/fairseq/examples/normformer/README.md new file mode 100644 index 0000000..037b453 --- /dev/null +++ b/fairseq/examples/normformer/README.md @@ -0,0 +1,70 @@ +### NormFormer +This is the code for the ["NormFormer: Improved Transformer Pretraining with Extra Normalization"](https://arxiv.org/abs/2110.09456) +- 2021-10-19: Commands for CLM Experiments +- Coming soon: Commands for MLM experiments + +If you have any issues or questions please post a github issue and tag `@sshleifer`. + + +### Data +- To preprocess language modeling data, see [here](https://github.com/pytorch/fairseq/blob/d0fbcb0baef6f6ff3425ded62d8daea0e8b12114/examples/language_model/README.md#1-preprocess-the-data). +- The replication commands below expect `$DATA` to be the path to the binarized data directory. +- Note that NormFormer results in Table 2 use a much larger private dataset, and to get good results you should adapt the pre-processing instructions to your dataset and compare to a baseline on the same data, rather than Table 2. +- The code uses `FSDP`, which requires `pip install fairscale>=0.4.0`. + + +### Modify existing Command +To modify an existing `fairseq-train` command to use NormFormer, simply add the following flags: +```bash +fairseq-train ... \ + --scale-attn --scale-fc --scale-heads +``` +- you probably also want to increase your learning rate +- if your model is small, you may want to add `--scale-resids` + +### Exact Training Commands + +- Note that NormFormer results in Table 2 use a much larger private dataset, and to get good results you should adapt the pre-processing instructions to your dataset. +The full commands are functions defined here, so to run them you must `source examples/normformer/train_lm.sh`. +- We default `--distributed-world-size 8`. You should adjust `--update-freq` and `--batch-size` and such that the effective batch size is (1024x1024x0.5) tokens for 125M and 355M, + and (1024x1024) for 1.3B parameter and above. For small models, `--update-freq`=256/`global_bs`. For large models, `--update-freq`=512/`global_bs`, where `global_bs` = `--batch-size` * `--distributed-world-size` +- The small models will all train on as few as 8 GPUs. + +```bash +train_125M --lr 6e-4 # GPT-3 Replicated +train_125M --lr 1e-3 # stronger high-lr baseline +train_125M --lr 3e-3 --scale-attn --scale-fc --scale-heads # No scale-resids +train_125M --lr 3e-3 --scale-attn --scale-fc --scale-heads --scale-resids # Best command +``` + +```bash +train_355M --lr 6e-4 # GPT-3 Replicated +train_355M --lr 1e-3 # stronger high-lr baseline +train_355M --lr 1e-3 --scale-attn --scale-fc --scale-heads # No scale-resids +train_355M --lr 1e-3 --scale-attn --scale-fc --scale-heads --scale-resids # Slightly better +``` + +```bash +train_1.3B --lr 2e-4 # GPT-3 Replicated +train_1.3B --lr 6e-4 # stronger high-lr baseline +train_1.3B --lr 6e-4 --scale-attn --scale-fc --scale-heads # NormFormer +``` + +```bash +train_2.7B --lr 1.6e-4 # GPT-3 Replicated +train_2.7B --lr 1.6e-4 --activation-fn relu_squared # stronger Relu^2 baseline +train_2.7B --lr 6e-4 --activation-fn relu_squared --scale-attn --scale-fc --scale-heads # NormFormer 2.7B +``` + + +### Citation +```bibtex +@misc{shleifer2021normformer, + title={NormFormer: Improved Transformer Pretraining with Extra Normalization}, + author={Sam Shleifer and Jason Weston and Myle Ott}, + year={2021}, + eprint={2110.09456}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` diff --git a/fairseq/examples/normformer/train_lm.sh b/fairseq/examples/normformer/train_lm.sh new file mode 100644 index 0000000..b081f2d --- /dev/null +++ b/fairseq/examples/normformer/train_lm.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +train_common () { + fairseq-train "$DATA" \ + --combine-val \ + --train-subset train \ + --num-workers 2 \ + --validate-interval-updates 1000 \ + --save-interval-updates 1000 \ + --no-epoch-checkpoints \ + --ddp-backend fully_sharded \ + --memory-efficient-fp16 \ + --fp16-init-scale 4 \ + --checkpoint-activations \ + --arch transformer_lm_gpt \ + --activation-fn gelu \ + --share-decoder-input-output-embed \ + --task language_modeling \ + --sample-break-mode none \ + --tokens-per-sample 2048 \ + --optimizer adam --adam-betas "(0.9, 0.98)" \ + --adam-eps 1e-08 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay \ + --warmup-updates 750 \ + --dropout 0.1 \ + --attention-dropout 0.1 \ + --weight-decay 0.01 \ + --batch-size 16 \ + --update-freq 2 \ + --required-batch-size-multiple 1 \ + --total-num-update 572204 \ + --max-update 572204 \ + --seed 1 \ + --log-format json --log-interval 1 \ + --distributed-world-size 8 --distributed-port 13177 \ + "$@" +} + +train_125M () { + train_common --decoder-layers 12 \ + --decoder-embed-dim 768 \ + --decoder-ffn-embed-dim 3072 \ + --decoder-attention-heads 12 "$@" +} + +train_355M () { + train_common --decoder-layers 24 \ + --decoder-embed-dim 1024\ + --decoder-ffn-embed-dim 4096 \ + --decoder-attention-heads 16 \ + --dropout 0.0 \ + --attention-dropout 0.0 \ + "$@" +} + +train_1.3B () { + train_common --decoder-layers 24 \ + --decoder-embed-dim 2048 \ + --decoder-ffn-embed-dim 8192 \ + --decoder-attention-heads 32 \ + --batch-size 4 \ + --update-freq 16 \ + --total-num-update 286102 \ + --max-update 286102 \ + "$@" +} + +train_2.7B () { + train_common --decoder-layers 32 \ + --decoder-embed-dim 2560 \ + --decoder-ffn-embed-dim 10240 \ + --decoder-attention-heads 32 \ + --batch-size 4 \ + --update-freq 16 \ + --total-num-update 286102 \ + --max-update 286102 \ + "$@" +} diff --git a/fairseq/examples/operators/alignment_train_cpu.cpp b/fairseq/examples/operators/alignment_train_cpu.cpp new file mode 100644 index 0000000..13c0153 --- /dev/null +++ b/fairseq/examples/operators/alignment_train_cpu.cpp @@ -0,0 +1,166 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <torch/extension.h> // @manual=//caffe2:torch_extension +#include <algorithm> + +namespace { + +template <typename T> +void exclusiveCumprod( + const T* p_choose, + T* cumprod_1mp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len) { + // cumprod_1mp = 1 - p_choose + for (uint32_t b = 0; b < bsz; b++) { + for (uint32_t tgt = 0; tgt < tgt_len; tgt++) { + for (uint32_t src = 0; src < src_len; src++) { + uint32_t idx = b * tgt_len * src_len + tgt * src_len + src; + cumprod_1mp[idx] = 1 - p_choose[idx]; + } + } + } + + // Implementing exclusive cumprod in the innermost dimension + // cumprod_1mp = cumprod(1 - p_choose) + // There is cumprod in pytorch, however there is no exclusive mode. + // cumprod(x) = [x1, x1x2, x2x3x4, ..., prod_{i=1}^n x_i] + // exclusive means + // cumprod(x) = [1, x1, x1x2, x1x2x3, ..., prod_{i=1}^{n-1} x_i] + for (uint32_t b = 0; b < bsz; b++) { + for (uint32_t tgt = 0; tgt < tgt_len; tgt++) { + uint32_t idx_offset = b * tgt_len * src_len + tgt * src_len; + T prev = cumprod_1mp[idx_offset]; + // index [b][tgt][0] + cumprod_1mp[idx_offset] = (T)1.0; + T curr; + for (uint32_t src = 1; src < src_len; src++) { + uint32_t idx = idx_offset + src; + curr = cumprod_1mp[idx]; + cumprod_1mp[idx] = cumprod_1mp[idx - 1] * prev; + prev = curr; + } + } + } +} + +template <typename T> +void clamp( + const T* cumprod_1mp, + T* cumprod_1mp_clamp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + T min_val, + T max_val) { + for (uint32_t b = 0; b < bsz; b++) { + for (uint32_t tgt = 0; tgt < tgt_len; tgt++) { + for (uint32_t src = 0; src < src_len; src++) { + uint32_t idx = b * tgt_len * src_len + tgt * src_len + src; + if (cumprod_1mp[idx] < min_val) { + cumprod_1mp_clamp[idx] = min_val; + } else if (cumprod_1mp[idx] > max_val) { + cumprod_1mp_clamp[idx] = max_val; + } else { + cumprod_1mp_clamp[idx] = cumprod_1mp[idx]; + } + } + } + } +} + +template <typename T> +void alignmentTrainCPUImpl( + const T* p_choose, + T* alpha, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + float eps) { + // p_choose: bsz , tgt_len, src_len + // cumprod_1mp: bsz , tgt_len, src_len + // cumprod_1mp_clamp : bsz, tgt_len, src_len + // alpha: bsz + 1, tgt_len, src_len + + uint32_t elements = bsz * tgt_len * src_len; + T* cumprod_1mp = new T[elements]; + T* cumprod_1mp_clamp = new T[elements]; + + exclusiveCumprod<T>(p_choose, cumprod_1mp, bsz, tgt_len, src_len); + clamp<T>( + cumprod_1mp, cumprod_1mp_clamp, bsz, tgt_len, src_len, (T)eps, (T)1.0); + + // ai = p_i * cumprod(1 − pi) * cumsum(a_i / cumprod(1 − pi)) + + // Initialize alpha [:, 0, 0] + for (uint32_t b = 0; b < bsz; b++) { + alpha[b * tgt_len * src_len] = 1.0; + } + + for (uint32_t tgt = 0; tgt < tgt_len; tgt++) { + for (uint32_t b = 0; b < bsz; b++) { + uint32_t alpha_idx, inout_idx; + T prev_scan = 0, curr_scan, out; + for (uint32_t src = 0; src < src_len; src++) { + // Apply scan/cumsum + if (tgt == 0) { + // alpha index is [b][tgt][src] + alpha_idx = b * tgt_len * src_len + src; + } else { + // alpha index is [b][tgt-1][src] + alpha_idx = b * tgt_len * src_len + (tgt - 1) * src_len + src; + } + // input index is [b][tgt][src] + inout_idx = b * tgt_len * src_len + tgt * src_len + src; + curr_scan = prev_scan + alpha[alpha_idx] / cumprod_1mp_clamp[inout_idx]; + + out = curr_scan * p_choose[inout_idx] * cumprod_1mp[inout_idx]; + alpha[inout_idx] = std::min<T>(std::max<T>(out, 0), 1.0); + prev_scan = curr_scan; + } + } + } + + free(cumprod_1mp); + free(cumprod_1mp_clamp); +} + +void alignmentTrainCPU( + const torch::Tensor& p_choose, + torch::Tensor& alpha, + float eps) { + uint32_t bsz = p_choose.size(0); + uint32_t tgt_len = p_choose.size(1); + uint32_t src_len = p_choose.size(2); + + AT_DISPATCH_FLOATING_TYPES_AND2( + torch::ScalarType::Half, + torch::ScalarType::BFloat16, + p_choose.scalar_type(), + "alignmentCPUImpl", + [&]() { + alignmentTrainCPUImpl<scalar_t>( + p_choose.data_ptr<scalar_t>(), + alpha.data_ptr<scalar_t>(), + bsz, + tgt_len, + src_len, + eps); + }); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def( + "alignment_train_cpu", + &alignmentTrainCPU, + "expected_alignment_from_p_choose (CPU)"); +} + +} // namespace diff --git a/fairseq/examples/operators/alignment_train_cuda.cpp b/fairseq/examples/operators/alignment_train_cuda.cpp new file mode 100644 index 0000000..430e048 --- /dev/null +++ b/fairseq/examples/operators/alignment_train_cuda.cpp @@ -0,0 +1,31 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "alignment_train_cuda.h" +#include "utils.h" + +namespace { + +void alignmentTrainCUDA( + const torch::Tensor& p_choose, + torch::Tensor& alpha, + float eps) { + CHECK_INPUT(p_choose); + CHECK_INPUT(alpha); + + alignmentTrainCUDAWrapper(p_choose, alpha, eps); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def( + "alignment_train_cuda", + &alignmentTrainCUDA, + "expected_alignment_from_p_choose (CUDA)"); +} + +} // namespace diff --git a/fairseq/examples/operators/alignment_train_cuda.h b/fairseq/examples/operators/alignment_train_cuda.h new file mode 100644 index 0000000..8289d1a --- /dev/null +++ b/fairseq/examples/operators/alignment_train_cuda.h @@ -0,0 +1,16 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include <torch/extension.h> // @manual=//caffe2:torch_extension + +void alignmentTrainCUDAWrapper( + const torch::Tensor& p_choose, + torch::Tensor& alpha, + float eps); diff --git a/fairseq/examples/operators/alignment_train_kernel.cu b/fairseq/examples/operators/alignment_train_kernel.cu new file mode 100644 index 0000000..efae7cc --- /dev/null +++ b/fairseq/examples/operators/alignment_train_kernel.cu @@ -0,0 +1,354 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <ATen/ATen.h> +#include <ATen/cuda/CUDAContext.h> // @manual=//caffe2/aten:ATen-cu +#include <cuda_runtime.h> +#include <algorithm> // std::min/max +#include <cub/cub.cuh> + +#include "alignment_train_cuda.h" +#include "utils.h" + +namespace { + +// The thread block length in threads along the X dimension +constexpr int BLOCK_DIM_X = 128; +// The thread block length in threads along the Y dimension +constexpr int BLOCK_DIM_Y = 8; +// The thread block length in threads for scan operation +constexpr int SCAN_BLOCK = 512; + +#define gpuErrchk(ans) \ + { gpuAssert((ans), __FILE__, __LINE__); } + +inline void +gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) { + if (code != cudaSuccess) { + fprintf( + stderr, + "\nGPUassert: %s %s %d\n", + cudaGetErrorString(code), + file, + line); + if (abort) + exit(code); + } +} + +template <typename T> +struct Prod { + /// prod operator, returns <tt>a * b</tt> + __host__ __device__ __forceinline__ T + operator()(const T& a, const T& b) const { + return a * b; + } +}; + +template <typename T> +struct BlockPrefixProdCallbackOp { + // Running prefix + T running_total; + + // Constructor + __device__ BlockPrefixProdCallbackOp(T running_total) + : running_total(running_total) {} + + // Callback operator to be entered by the first warp of threads in the block. + // Thread-0 is responsible for returning a value for seeding the block-wide + // scan. + __device__ T operator()(const T block_aggregate) { + T old_prefix = running_total; + running_total *= block_aggregate; + return old_prefix; + } +}; + +template <typename T> +struct BlockPrefixSumCallbackOp { + // Running prefix + T running_total; + + // Constructor + __device__ BlockPrefixSumCallbackOp(T running_total) + : running_total(running_total) {} + + // Callback operator to be entered by the first warp of threads in the block. + // Thread-0 is responsible for returning a value for seeding the block-wide + // scan. + __device__ T operator()(const T block_aggregate) { + T old_prefix = running_total; + running_total += block_aggregate; + return old_prefix; + } +}; + +template <typename T> +__global__ void oneMinusPKernel( + const T* __restrict__ p_choose, + T* __restrict__ cumprod_1mp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len) { + for (uint32_t b = blockIdx.x; b < bsz; b += gridDim.x) { + for (uint32_t tgt = threadIdx.y; tgt < tgt_len; tgt += blockDim.y) { + for (uint32_t src = threadIdx.x; src < src_len; src += blockDim.x) { + uint32_t idx = b * tgt_len * src_len + tgt * src_len + src; + cumprod_1mp[idx] = 1 - p_choose[idx]; + } + } + } +} + +template <typename T, int TPB> +__global__ void innermostScanKernel( + T* __restrict__ cumprod_1mp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len) { + for (uint32_t b = blockIdx.y; b < bsz; b += gridDim.y) { + for (uint32_t tgt = blockIdx.x; tgt < tgt_len; tgt += gridDim.x) { + // Specialize BlockScan for a 1D block of TPB threads on type T + typedef cub::BlockScan<T, TPB> BlockScan; + // Allocate shared memory for BlockScan + __shared__ typename BlockScan::TempStorage temp_storage; + // Initialize running total + BlockPrefixProdCallbackOp<T> prefix_op(1); + + const uint32_t tid = threadIdx.x; + for (uint32_t block_src = 0; block_src < src_len; + block_src += blockDim.x) { + uint32_t src = block_src + tid; + uint32_t idx = b * tgt_len * src_len + tgt * src_len + src; + T thread_data = (src < src_len) ? cumprod_1mp[idx] : (T)0; + + // Collectively compute the block-wide inclusive prefix sum + BlockScan(temp_storage) + .ExclusiveScan(thread_data, thread_data, Prod<T>(), prefix_op); + __syncthreads(); + + // write the scanned value to output + if (src < src_len) { + cumprod_1mp[idx] = thread_data; + } + } + } + } +} + +template <typename T> +__global__ void clampKernel( + const T* __restrict__ cumprod_1mp, + T* __restrict__ cumprod_1mp_clamp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + T min_val, + T max_val) { + for (uint32_t b = blockIdx.x; b < bsz; b += gridDim.x) { + for (uint32_t tgt = threadIdx.y; tgt < tgt_len; tgt += blockDim.y) { + for (uint32_t src = threadIdx.x; src < src_len; src += blockDim.x) { + uint32_t idx = b * tgt_len * src_len + tgt * src_len + src; + if (cumprod_1mp[idx] < min_val) { + cumprod_1mp_clamp[idx] = min_val; + } else if (cumprod_1mp[idx] > max_val) { + cumprod_1mp_clamp[idx] = max_val; + } else { + cumprod_1mp_clamp[idx] = cumprod_1mp[idx]; + } + } + } + } +} + +template <typename T> +__global__ void initAlphaCUDAKernel( + T* alpha, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len) { + // alpha[:, 0, 0] = 1.0 + for (uint32_t b = blockIdx.x; b < bsz; b += gridDim.x) { + alpha[b * tgt_len * src_len] = (T)1.0; + } +} + +template <typename T, int TPB> +__global__ void alignmentTrainCUDAKernel( + const T* __restrict__ p_choose, + const T* __restrict__ cumprod_1mp, + const T* __restrict__ cumprod_1mp_clamp, + T* __restrict__ alpha, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + uint32_t tgt) { + for (uint32_t b = blockIdx.x; b < bsz; b += gridDim.x) { + // Specialize BlockScan for a 1D block of TPB threads on type T + typedef cub::BlockScan<T, TPB> BlockScan; + + // Allocate shared memory for BlockScan + __shared__ typename BlockScan::TempStorage temp_storage; + // Initialize running total + BlockPrefixSumCallbackOp<T> prefix_op(0); + + uint32_t b_offset = b * tgt_len * src_len; + const uint32_t tid = threadIdx.x; + for (uint32_t block_src = 0; block_src < src_len; block_src += blockDim.x) { + uint32_t src = block_src + tid; + // Obtain a segment of consecutive items that are blocked across threads + uint32_t inout_idx, alpha_idx; + if (tgt == 0) { + // both alpha and other input index is [b][0][src] + alpha_idx = b_offset + src; + } else { + // alpha index is [b][tgt-1][src] + alpha_idx = b_offset + (tgt - 1) * src_len + src; + } + inout_idx = b_offset + tgt * src_len + src; + T thread_data = (T)0; + if (src < src_len) { + thread_data = alpha[alpha_idx] / cumprod_1mp_clamp[inout_idx]; + } + + // Collectively compute the block-wide inclusive prefix sum + BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, prefix_op); + __syncthreads(); + + if (src < src_len) { + T out = thread_data * p_choose[inout_idx] * cumprod_1mp[inout_idx]; + // Clamps all elements into the range [ 0, 1.0 ] + alpha[inout_idx] = std::min<T>(std::max<T>(out, 0), (T)1.0); + } + } + } +} + +template <typename T> +void exclusiveCumprod( + const T* p_choose, + T* cumprod_1mp, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + uint32_t max_grid_x, + uint32_t max_grid_y, + cudaStream_t& stream) { + // cumprod_1mp = 1 - p_choose + dim3 grid(std::min<T>(max_grid_x, bsz), 1, 1); + dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y, 1); + oneMinusPKernel<T><<<grid, block, 0, stream>>>( + p_choose, cumprod_1mp, bsz, tgt_len, src_len); + gpuErrchk(cudaGetLastError()); + + // scan on the innermost dimension of cumprod_1mp + // cumprod_1mp = cumprod(cumprod_1mp) + dim3 grid_scan( + std::min<T>(max_grid_x, tgt_len), std::min<T>(max_grid_y, bsz), 1); + innermostScanKernel<T, SCAN_BLOCK><<<grid_scan, SCAN_BLOCK, 0, stream>>>( + cumprod_1mp, bsz, tgt_len, src_len); + gpuErrchk(cudaGetLastError()); +} + +template <typename T> +void alignmentTrainCUDAImpl( + const T* p_choose, + T* alpha, + uint32_t bsz, + uint32_t tgt_len, + uint32_t src_len, + float eps) { + // p_choose: bsz , tgt_len, src_len + // cumprod_1mp: bsz , tgt_len, src_len + // cumprod_1mp_clamp : bsz, tgt_len, src_len + // alpha: bsz, tgt_len, src_len + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + uint32_t max_grid_x = at::cuda::getCurrentDeviceProperties()->maxGridSize[0]; + uint32_t max_grid_y = at::cuda::getCurrentDeviceProperties()->maxGridSize[1]; + + // Implementing exclusive cumprod. + // cumprod_1mp = cumprod(1 - p_choose) + // There is cumprod in pytorch, however there is no exclusive mode. + // cumprod(x) = [x1, x1x2, x2x3x4, ..., prod_{i=1}^n x_i] + // exclusive means + // cumprod(x) = [1, x1, x1x2, x1x2x3, ..., prod_{i=1}^{n-1} x_i] + uint32_t elements = bsz * tgt_len * src_len; + T* cumprod_1mp; + gpuErrchk(cudaMalloc(&cumprod_1mp, elements * sizeof(T))); + exclusiveCumprod<T>( + p_choose, + cumprod_1mp, + bsz, + tgt_len, + src_len, + max_grid_x, + max_grid_y, + stream); + + // clamp cumprod_1mp to the range [eps, 1.0] + T* cumprod_1mp_clamp; + gpuErrchk(cudaMalloc(&cumprod_1mp_clamp, elements * sizeof(T))); + dim3 grid_clamp(std::min<T>(max_grid_x, bsz), 1, 1); + dim3 block_clamp(BLOCK_DIM_X, BLOCK_DIM_Y, 1); + clampKernel<T><<<grid_clamp, block_clamp, 0, stream>>>( + cumprod_1mp, cumprod_1mp_clamp, bsz, tgt_len, src_len, (T)eps, (T)1.0); + gpuErrchk(cudaGetLastError()); + + // ai = p_i * cumprod(1 − pi) * cumsum(a_i / cumprod(1 − pi)) + dim3 grid_init(std::min<int>(max_grid_x, bsz), 1, 1); + initAlphaCUDAKernel<T> + <<<grid_init, 1, 0, stream>>>(alpha, bsz, tgt_len, src_len); + gpuErrchk(cudaGetLastError()); + + const int grid = std::min(bsz, max_grid_x); + + for (uint32_t i = 0; i < tgt_len; i++) { + alignmentTrainCUDAKernel<T, SCAN_BLOCK><<<grid, SCAN_BLOCK, 0, stream>>>( + p_choose, + cumprod_1mp, + cumprod_1mp_clamp, + alpha, + bsz, + tgt_len, + src_len, + i); + gpuErrchk(cudaGetLastError()); + } + + gpuErrchk(cudaFree(cumprod_1mp)); + gpuErrchk(cudaFree(cumprod_1mp_clamp)); +} + +} // namespace + +void alignmentTrainCUDAWrapper( + const torch::Tensor& p_choose, + torch::Tensor& alpha, + float eps) { + // p_choose dimension: bsz, tgt_len, src_len + uint32_t bsz = p_choose.size(0); + uint32_t tgt_len = p_choose.size(1); + uint32_t src_len = p_choose.size(2); + + cudaSetDevice(p_choose.get_device()); + + AT_DISPATCH_FLOATING_TYPES_AND2( + torch::ScalarType::Half, + torch::ScalarType::BFloat16, + p_choose.scalar_type(), + "alignmentTrainCUDAImpl", + [&]() { + alignmentTrainCUDAImpl<scalar_t>( + p_choose.data_ptr<scalar_t>(), + alpha.data_ptr<scalar_t>(), + bsz, + tgt_len, + src_len, + eps); + }); +} diff --git a/fairseq/examples/operators/utils.h b/fairseq/examples/operators/utils.h new file mode 100644 index 0000000..0ef5b43 --- /dev/null +++ b/fairseq/examples/operators/utils.h @@ -0,0 +1,19 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include <torch/extension.h> // @manual=//caffe2:torch_extension + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) diff --git a/fairseq/examples/paraphraser/README.md b/fairseq/examples/paraphraser/README.md new file mode 100644 index 0000000..3810311 --- /dev/null +++ b/fairseq/examples/paraphraser/README.md @@ -0,0 +1,46 @@ +# Paraphrasing with round-trip translation and mixture of experts + +Machine translation models can be used to paraphrase text by translating it to +an intermediate language and back (round-trip translation). + +This example shows how to paraphrase text by first passing it to an +English-French translation model, followed by a French-English [mixture of +experts translation model](/examples/translation_moe). + +##### 0. Setup + +Clone fairseq from source and install necessary dependencies: +```bash +git clone https://github.com/pytorch/fairseq.git +cd fairseq +pip install --editable . +pip install sacremoses sentencepiece +``` + +##### 1. Download models +```bash +wget https://dl.fbaipublicfiles.com/fairseq/models/paraphraser.en-fr.tar.gz +wget https://dl.fbaipublicfiles.com/fairseq/models/paraphraser.fr-en.hMoEup.tar.gz +tar -xzvf paraphraser.en-fr.tar.gz +tar -xzvf paraphraser.fr-en.hMoEup.tar.gz +``` + +##### 2. Paraphrase +```bash +python examples/paraphraser/paraphrase.py \ + --en2fr paraphraser.en-fr \ + --fr2en paraphraser.fr-en.hMoEup +# Example input: +# The new date for the Games, postponed for a year in response to the coronavirus pandemic, gives athletes time to recalibrate their training schedules. +# Example outputs: +# Delayed one year in response to the coronavirus pandemic, the new date of the Games gives athletes time to rebalance their training schedule. +# The new date of the Games, which was rescheduled one year in response to the coronavirus (CV) pandemic, gives athletes time to rebalance their training schedule. +# The new date of the Games, postponed one year in response to the coronavirus pandemic, provides athletes with time to rebalance their training schedule. +# The Games' new date, postponed one year in response to the coronavirus pandemic, gives athletes time to rebalance their training schedule. +# The new Games date, postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their training schedule. +# The new date of the Games, which was postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their training schedule. +# The new date of the Games, postponed one year in response to the coronavirus pandemic, gives athletes time to rebalance their training schedule. +# The new date of the Games, postponed one year in response to the coronavirus pandemic, gives athletes time to re-balance their training schedule. +# The new date of the Games, postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their schedule of training. +# The new date of the Games, postponed one year in response to the pandemic of coronavirus, gives the athletes time to rebalance their training schedule. +``` diff --git a/fairseq/examples/paraphraser/paraphrase.py b/fairseq/examples/paraphraser/paraphrase.py new file mode 100644 index 0000000..d3422fb --- /dev/null +++ b/fairseq/examples/paraphraser/paraphrase.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 -u + +import argparse +import fileinput +import logging +import os +import sys + +from fairseq.models.transformer import TransformerModel + + +logging.getLogger().setLevel(logging.INFO) + + +def main(): + parser = argparse.ArgumentParser(description="") + parser.add_argument("--en2fr", required=True, help="path to en2fr model") + parser.add_argument( + "--fr2en", required=True, help="path to fr2en mixture of experts model" + ) + parser.add_argument( + "--user-dir", help="path to fairseq examples/translation_moe/src directory" + ) + parser.add_argument( + "--num-experts", + type=int, + default=10, + help="(keep at 10 unless using a different model)", + ) + parser.add_argument( + "files", + nargs="*", + default=["-"], + help='input files to paraphrase; "-" for stdin', + ) + args = parser.parse_args() + + if args.user_dir is None: + args.user_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), # examples/ + "translation_moe", + "src", + ) + if os.path.exists(args.user_dir): + logging.info("found user_dir:" + args.user_dir) + else: + raise RuntimeError( + "cannot find fairseq examples/translation_moe/src " + "(tried looking here: {})".format(args.user_dir) + ) + + logging.info("loading en2fr model from:" + args.en2fr) + en2fr = TransformerModel.from_pretrained( + model_name_or_path=args.en2fr, + tokenizer="moses", + bpe="sentencepiece", + ).eval() + + logging.info("loading fr2en model from:" + args.fr2en) + fr2en = TransformerModel.from_pretrained( + model_name_or_path=args.fr2en, + tokenizer="moses", + bpe="sentencepiece", + user_dir=args.user_dir, + task="translation_moe", + ).eval() + + def gen_paraphrases(en): + fr = en2fr.translate(en) + return [ + fr2en.translate(fr, inference_step_args={"expert": i}) + for i in range(args.num_experts) + ] + + logging.info("Type the input sentence and press return:") + for line in fileinput.input(args.files): + line = line.strip() + if len(line) == 0: + continue + for paraphrase in gen_paraphrases(line): + print(paraphrase) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/pay_less_attention_paper/README.md b/fairseq/examples/pay_less_attention_paper/README.md new file mode 100644 index 0000000..5adab11 --- /dev/null +++ b/fairseq/examples/pay_less_attention_paper/README.md @@ -0,0 +1,176 @@ +# Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019) + +This page contains pointers to pre-trained models as well as instructions on how to train new models for [our paper](https://arxiv.org/abs/1901.10430). + +## Citation: +```bibtex +@inproceedings{wu2018pay, + title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, + author = {Felix Wu and Angela Fan and Alexei Baevski and Yann Dauphin and Michael Auli}, + booktitle = {International Conference on Learning Representations}, + year = {2019}, + url = {https://arxiv.org/abs/1901.10430}, +} +``` + +## Translation + +### Pre-trained models +For some datasets we release models without GLUs which are faster at inference. + +Model | Description | Dataset | Download +---|---|---|--- +`lightconv.no_glu.iwslt14.de-en` | LightConv (without GLUs) | [IWSLT14 German-English](https://wit3.fbk.eu/archive/2014-01/texts/de/en/de-en.tgz) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.lightconv.tar.gz) <br> IWSLT14 test: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/iwslt14.de-en.test.tar.bz2) +`dynamicconv.no_glu.iwslt14.de-en` | DynamicConv (without GLUs) | [IWSLT14 German-English](https://wit3.fbk.eu/archive/2014-01/texts/de/en/de-en.tgz) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.dynamicconv.tar.gz) <br> IWSLT14 test: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/iwslt14.de-en.test.tar.bz2) +`lightconv.no_glu.wmt16.en-de` | LightConv (without GLUs) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) +`dynamicconv.no_glu.wmt16.en-de` | DynamicConv (without GLUs) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) +`lightconv.glu.wmt16.en-de` | LightConv | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) +`dynamicconv.glu.wmt16.en-de` | DynamicConv | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) +`lightconv.glu.wmt14.en-fr` | LightConv | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.lightconv-glu.tar.gz) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2) +`dynamicconv.glu.wmt14.en-fr` | DynamicConv | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.dynamicconv-glu.tar.gz) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2) +`lightconv.glu.wmt17.zh-en` | LightConv | [WMT17 Chinese-English](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.lightconv-glu.tar.gz) <br> newstest2017: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.zh-en.newstest2017.tar.bz2) +`dynamicconv.glu.wmt17.zh-en` | DynamicConv | [WMT17 Chinese-English](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.dynamicconv-glu.tar.gz) <br> newstest2017: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.zh-en.newstest2017.tar.bz2) + +### Memory-Efficient CUDA Kernels + +Since the PyTorch implementations of Light/Dynamic conv are quite memory intensive, we have developed CUDA kernels that implement the light and dynamic convolution operator in a memory-efficient and performant manner. For large sequence lengths, these kernels save about 50% memory compared to the PyTorch equivalent. + +To install the kernels, use the commands below. Once installed, they will automatically be used in place of the PyTorch implementations whenever a light or dynamic convolution is used. + +```sh +# to install lightconv +cd fairseq/modules/lightconv_layer +python cuda_function_gen.py +python setup.py install + +# to install dynamicconv +cd fairseq/modules/dynamicconv_layer +python cuda_function_gen.py +python setup.py install +``` + +### Example usage (torch.hub) + +We require a few additional Python dependencies for preprocessing: +```bash +pip install sacremoses subword_nmt +``` + +Interactive translation via PyTorch Hub: +```python +import torch + +# List available models +torch.hub.list('pytorch/fairseq') # [..., 'lightconv.glu.wmt17.zh-en', ... ] + +# Load a transformer trained on WMT'16 En-De +zh2en = torch.hub.load('pytorch/fairseq', 'lightconv.glu.wmt17.zh-en', tokenizer='moses', bpe='subword_nmt') + +# The underlying model is available under the *models* attribute +assert isinstance(zh2en.models[0], fairseq.models.lightconv.LightConvModel) + +# Translate a sentence +zh2en.translate('你好 世界') +# 'Hello World' +``` + +Loading custom models: +```python +from fairseq.models.lightconv import LightConvModel +en2fr = LightConvModel.from_pretrained( + '/path/to/checkpoints', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='data-bin/wmt14_en_fr', + bpe='subword_nmt', + bpe_codes='data-bin/wmt14_en_fr/en.code' +) +en2fr.translate('Hello world!') +# 'Bonjour le monde' +``` + +### Preprocessing the training datasets + +Please follow the instructions in [`examples/translation/README.md`](../translation/README.md) to preprocess the data. + +### Training and evaluation options: +To use the model without GLU, please set `--encoder-glu 0 --decoder-glu 0`. +For LightConv, please use `--encoder-conv-type lightweight --decoder-conv-type lightweight`, otherwise the default is DynamicConv. +For best BLEU results, lenpen may need to be manually tuned. + +To use the CUDA kernels, first install the PyTorch modules using the commands +above. Once the CUDA modules are installed, they will automatically be used +instead of the PyTorch modules. + +### IWSLT14 De-En +Training and evaluating DynamicConv (without GLU) on a GPU: +```sh +# Training +SAVE="save/dynamic_conv_iwslt" +mkdir -p $SAVE +CUDA_VISIBLE_DEVICES=0 $(which fairseq-train) data-bin/iwslt14.tokenized.de-en \ + --clip-norm 0 --optimizer adam --lr 0.0005 \ + --source-lang de --target-lang en --max-tokens 4000 --no-progress-bar \ + --log-interval 100 --stop-min-lr '1e-09' --weight-decay 0.0001 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --lr-scheduler inverse_sqrt \ + --ddp-backend=legacy_ddp \ + --max-update 50000 --warmup-updates 4000 --warmup-init-lr '1e-07' \ + --adam-betas '(0.9, 0.98)' --keep-last-epochs 10 \ + -a lightconv_iwslt_de_en --save-dir $SAVE \ + --dropout 0.3 --attention-dropout 0.1 --weight-dropout 0.1 \ + --encoder-glu 0 --decoder-glu 0 +python scripts/average_checkpoints.py --inputs $SAVE \ + --num-epoch-checkpoints 10 --output "${SAVE}/checkpoint_last10_avg.pt" + +# Evaluation +CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/iwslt14.tokenized.de-en --path "${SAVE}/checkpoint_last10_avg.pt" --batch-size 128 --beam 4 --remove-bpe --lenpen 1 --gen-subset test --quiet +``` + +### WMT16 En-De +Training and evaluating DynamicConv (with GLU) on WMT16 En-De using cosine scheduler on one machine with 8 V100 GPUs: +```sh +# Training +SAVE="save/dynamic_conv_wmt16en2de" +mkdir -p $SAVE +python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \ + data-bin/wmt16_en_de_bpe32k --fp16 --log-interval 100 --no-progress-bar \ + --max-update 30000 --share-all-embeddings --optimizer adam \ + --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --stop-min-lr 1e-09 --update-freq 16 --attention-dropout 0.1 --keep-last-epochs 10 \ + --ddp-backend=legacy_ddp --max-tokens 3584 \ + --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \ + --lr-shrink 1 --lr 0.001 --min-lr 1e-7 --warmup-init-lr 1e-07 \ + --t-mult 1 --lr-period-updates 20000 \ + --arch lightconv_wmt_en_de_big --save-dir $SAVE \ + --dropout 0.3 --attention-dropout 0.1 --weight-dropout 0.1 \ + --encoder-glu 1 --decoder-glu 1 + +# Evaluation +CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/wmt16.en-de.joined-dict.newstest2014 --path "${SAVE}/checkpoint_best.pt" --batch-size 128 --beam 5 --remove-bpe --lenpen 0.5 --gen-subset test > wmt16_gen.txt +bash scripts/compound_split_bleu.sh wmt16_gen.txt +``` + +### WMT14 En-Fr +Training DynamicConv (with GLU) on WMT14 En-Fr using cosine scheduler on one machine with 8 V100 GPUs: +```sh +# Training +SAVE="save/dynamic_conv_wmt14en2fr" +mkdir -p $SAVE +python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \ + data-bin/wmt14_en_fr --fp16 --log-interval 100 --no-progress-bar \ + --max-update 30000 --share-all-embeddings --optimizer adam \ + --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --stop-min-lr 1e-09 --update-freq 16 --attention-dropout 0.1 --keep-last-epochs 10 \ + --ddp-backend=legacy_ddp --max-tokens 3584 \ + --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \ + --lr-shrink 1 --lr 0.001 --min-lr 1e-7 --warmup-init-lr 1e-07 \ + --t-mult 1 --lr-period-updates 70000 \ + --arch lightconv_wmt_en_fr_big --save-dir $SAVE \ + --dropout 0.1 --attention-dropout 0.1 --weight-dropout 0.1 \ + --encoder-glu 1 --decoder-glu 1 + +# Evaluation +CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/wmt14.en-fr.joined-dict.newstest2014 --path "${SAVE}/checkpoint_best.pt" --batch-size 128 --beam 5 --remove-bpe --lenpen 0.9 --gen-subset test +``` diff --git a/fairseq/examples/pointer_generator/README.md b/fairseq/examples/pointer_generator/README.md new file mode 100644 index 0000000..6096570 --- /dev/null +++ b/fairseq/examples/pointer_generator/README.md @@ -0,0 +1,82 @@ +# Transformer with Pointer-Generator Network + +This page describes the `transformer_pointer_generator` model that incorporates +a pointing mechanism in the Transformer model that facilitates copying of input +words to the output. This architecture is described in [Enarvi et al. (2020)](https://www.aclweb.org/anthology/2020.nlpmc-1.4/). + +## Background + +The pointer-generator network was introduced in [See et al. (2017)](https://arxiv.org/abs/1704.04368) +for RNN encoder-decoder attention models. A similar mechanism can be +incorporated in a Transformer model by reusing one of the many attention +distributions for pointing. The attention distribution over the input words is +interpolated with the normal output distribution over the vocabulary words. This +allows the model to generate words that appear in the input, even if they don't +appear in the vocabulary, helping especially with small vocabularies. + +## Implementation + +The mechanism for copying out-of-vocabulary words from the input has been +implemented differently to See et al. In their [implementation](https://github.com/abisee/pointer-generator) +they convey the word identities through the model in order to be able to produce +words that appear in the input sequence but not in the vocabulary. A different +approach was taken in the Fairseq implementation to keep it self-contained in +the model file, avoiding any changes to the rest of the code base. Copying +out-of-vocabulary words is possible by pre-processing the input and +post-processing the output. This is described in detail in the next section. + +## Usage + +The training and evaluation procedure is outlined below. You can also find a +more detailed example for the XSum dataset on [this page](README.xsum.md). + +##### 1. Create a vocabulary and extend it with source position markers + +The pointing mechanism is especially helpful with small vocabularies, if we are +able to recover the identities of any out-of-vocabulary words that are copied +from the input. For this purpose, the model allows extending the vocabulary with +special tokens that can be used in place of `<unk>` tokens to identify different +input positions. For example, the user may add `<unk-0>`, `<unk-1>`, `<unk-2>`, +etc. to the end of the vocabulary, after the normal words. Below is an example +of how to create a vocabulary of 10000 most common words and add 1000 input +position markers. + +```bash +vocab_size=10000 +position_markers=1000 +export LC_ALL=C +cat train.src train.tgt | + tr -s '[:space:]' '\n' | + sort | + uniq -c | + sort -k1,1bnr -k2 | + head -n "$((vocab_size - 4))" | + awk '{ print $2 " " $1 }' >dict.pg.txt +python3 -c "[print('<unk-{}> 0'.format(n)) for n in range($position_markers)]" >>dict.pg.txt +``` + +##### 2. Preprocess the text data + +The idea is that any `<unk>` tokens in the text are replaced with `<unk-0>` if +it appears in the first input position, `<unk-1>` if it appears in the second +input position, and so on. This can be achieved using the `preprocess.py` script +that is provided in this directory. + +##### 3. Train a model + +The number of these special tokens is given to the model with the +`--source-position-markers` argument—the model simply maps all of these to the +same word embedding as `<unk>`. + +The attention distribution that is used for pointing is selected using the +`--alignment-heads` and `--alignment-layer` command-line arguments in the same +way as with the `transformer_align` model. + +##### 4. Generate text and postprocess it + +When using the model to generate text, you want to preprocess the input text in +the same way that training data was processed, replacing out-of-vocabulary words +with `<unk-N>` tokens. If any of these tokens are copied to the output, the +actual words can be retrieved from the unprocessed input text. Any `<unk-N>` +token should be replaced with the word at position N in the original input +sequence. This can be achieved using the `postprocess.py` script. diff --git a/fairseq/examples/pointer_generator/README.xsum.md b/fairseq/examples/pointer_generator/README.xsum.md new file mode 100644 index 0000000..ac3a8c3 --- /dev/null +++ b/fairseq/examples/pointer_generator/README.xsum.md @@ -0,0 +1,180 @@ +## Training a pointer-generator model on the Extreme Summarization dataset + +##### 1. Download the Extreme Summarization data and preprocess it + +Follow the instructions [here](https://github.com/EdinburghNLP/XSum) to obtain +the original Extreme Summarization dataset. You should have six files, +{train,validation,test}.{document,summary}. + +##### 2. Create a vocabulary and extend it with source position markers + +```bash +vocab_size=10000 +position_markers=1000 +export LC_ALL=C +cat train.document train.summary | + tr -s '[:space:]' '\n' | + sort | + uniq -c | + sort -k1,1bnr -k2 | + head -n "$((vocab_size - 4))" | + awk '{ print $2 " " $1 }' >dict.pg.txt +python3 -c "[print('<unk-{}> 0'.format(n)) for n in range($position_markers)]" >>dict.pg.txt +``` + +This creates the file dict.pg.txt that contains the 10k most frequent words, +followed by 1k source position markers: + +``` +the 4954867 +. 4157552 +, 3439668 +to 2212159 +a 1916857 +of 1916820 +and 1823350 +... +<unk-0> 0 +<unk-1> 0 +<unk-2> 0 +<unk-3> 0 +<unk-4> 0 +... +``` + +##### 2. Preprocess the text data + +```bash +./preprocess.py --source train.document --target train.summary --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out train.pg.src --target-out train.pg.tgt +./preprocess.py --source validation.document --target validation.summary --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out valid.pg.src --target-out valid.pg.tgt +./preprocess.py --source test.document --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out test.pg.src +``` + +The data should now contain `<unk-N>` tokens in place of out-of-vocabulary words. + +##### 3. Binarize the dataset: + +```bash +fairseq-preprocess \ + --source-lang src \ + --target-lang tgt \ + --trainpref train.pg \ + --validpref valid.pg \ + --destdir bin \ + --workers 60 \ + --srcdict dict.pg.txt \ + --joined-dictionary +``` + +##### 3. Train a model + +```bash +total_updates=20000 +warmup_updates=500 +lr=0.001 +max_tokens=4096 +update_freq=4 +pointer_layer=-2 + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train bin \ + --user-dir examples/pointer_generator/pointer_generator_src \ + --max-tokens "$max_tokens" \ + --task translation \ + --source-lang src --target-lang tgt \ + --truncate-source \ + --layernorm-embedding \ + --share-all-embeddings \ + --encoder-normalize-before \ + --decoder-normalize-before \ + --required-batch-size-multiple 1 \ + --arch transformer_pointer_generator \ + --alignment-layer "$pointer_layer" \ + --alignment-heads 1 \ + --source-position-markers 1000 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \ + --clip-norm 0.1 \ + --lr-scheduler inverse_sqrt --lr "$lr" --max-update "$total_updates" --warmup-updates "$warmup_updates" \ + --update-freq "$update_freq" \ + --skip-invalid-size-inputs-valid-test +``` + +Above we specify that our dictionary contains 1000 source position markers, and +that we want to use one attention head from the penultimate decoder layer for +pointing. It should run in 5.5 hours on one node with eight 32GB V100 GPUs. The +logged messages confirm that dictionary indices above 10000 will be mapped to +the `<unk>` embedding: + +``` +2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | [src] dictionary: 11000 types +2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | [tgt] dictionary: 11000 types +2020-09-24 20:43:53 | INFO | fairseq.data.data_utils | loaded 11332 examples from: bin/valid.src-tgt.src +2020-09-24 20:43:53 | INFO | fairseq.data.data_utils | loaded 11332 examples from: bin/valid.src-tgt.tgt +2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | bin valid src-tgt 11332 examples +2020-09-24 20:43:53 | INFO | fairseq.models.transformer_pg | dictionary indices from 10000 to 10999 will be mapped to 3 +``` + +##### 4. Summarize the test sequences + +```bash +batch_size=32 +beam_size=6 +max_length=60 +length_penalty=1.0 + +fairseq-interactive bin \ + --user-dir examples/pointer_generator/pointer_generator_src \ + --batch-size "$batch_size" \ + --task translation \ + --source-lang src --target-lang tgt \ + --path checkpoints/checkpoint_last.pt \ + --input test.pg.src \ + --buffer-size 200 \ + --max-len-a 0 \ + --max-len-b "$max_length" \ + --lenpen "$length_penalty" \ + --beam "$beam_size" \ + --skip-invalid-size-inputs-valid-test | + tee generate.out +grep ^H generate.out | cut -f 3- >generate.hyp +``` + +Now you should have the generated sequences in `generate.hyp`. They contain +`<unk-N>` tokens that the model has copied from the source sequence. In order to +retrieve the original words, we need the unprocessed source sequences from +`test.document`. + +##### 5. Process the generated output + +Since we skipped too long inputs when producing `generate.hyp`, we also have to +skip too long sequences now that we read `test.document`. + +```bash +./postprocess.py \ + --source <(awk 'NF<1024' test.document) \ + --target generate.hyp \ + --target-out generate.hyp.processed +``` + +Now you'll find the final sequences from `generate.hyp.processed`, with +`<unk-N>` replaced with the original word from the source sequence. + +##### An example of a summarized sequence + +The original source document in `test.document`: + +> de roon moved to teesside in june 2016 for an initial # 8.8 m fee and played 33 premier league games last term . the netherlands international , 26 , scored five goals in 36 league and cup games during his spell at boro . meanwhile , manager garry monk confirmed the championship club 's interest in signing chelsea midfielder lewis baker . `` he 's a target and one of many that we 've had throughout the summer months , '' said monk . find all the latest football transfers on our dedicated page . + +The preprocessed source document in `test.src.pg`: + +> de \<unk-1> moved to \<unk-4> in june 2016 for an initial # \<unk-12> m fee and played 33 premier league games last term . the netherlands international , 26 , scored five goals in 36 league and cup games during his spell at boro . meanwhile , manager garry monk confirmed the championship club 's interest in signing chelsea midfielder lewis baker . `` he 's a target and one of many that we 've had throughout the summer months , '' said monk . find all the latest football transfers on our dedicated page . + +The generated summary in `generate.hyp`: + +> middlesbrough striker \<unk> de \<unk-1> has joined spanish side \<unk> on a season-long loan . + +The generated summary after postprocessing in `generate.hyp.processed`: + +> middlesbrough striker \<unk> de roon has joined spanish side \<unk> on a season-long loan . diff --git a/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py b/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py new file mode 100644 index 0000000..c361ff6 --- /dev/null +++ b/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import transformer_pg # noqa diff --git a/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py b/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py new file mode 100644 index 0000000..4ccf30f --- /dev/null +++ b/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py @@ -0,0 +1,518 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Dict, Optional, List, Tuple + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer import ( + DEFAULT_MAX_SOURCE_POSITIONS, + DEFAULT_MAX_TARGET_POSITIONS, + TransformerDecoder, + TransformerEncoder, + TransformerModel, + base_architecture, +) +from torch import Tensor + + +logger = logging.getLogger(__name__) + + +@register_model("transformer_pointer_generator") +class TransformerPointerGeneratorModel(TransformerModel): + """ + Transformer model from `"Attention Is All You Need" (Vaswani et al, 2017) + <https://arxiv.org/abs/1706.03762>`_, augmented with a pointer-generator + network from `"Get To The Point: Summarization with Pointer-Generator + Networks" (See et al, 2017) <https://arxiv.org/abs/1704.04368>`_. + + Args: + encoder (TransformerPointerGeneratorEncoder): the encoder + decoder (TransformerPointerGeneratorDecoder): the decoder + + The Transformer pointer-generator model provides the following named + architectures and command-line arguments: + + .. argparse:: + :ref: fairseq.models.transformer_pointer_generator_parser + :prog: + """ + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + TransformerModel.add_args(parser) + parser.add_argument('--alignment-heads', type=int, metavar='N', + help='number of attention heads to be used for ' + 'pointing') + parser.add_argument('--alignment-layer', type=int, metavar='I', + help='layer number to be used for pointing (0 ' + 'corresponding to the bottommost layer)') + parser.add_argument('--source-position-markers', type=int, metavar='N', + help='dictionary includes N additional items that ' + 'represent an OOV token at a particular input ' + 'position') + parser.add_argument('--force-generation', type=float, metavar='P', + default=None, + help='set the vocabulary distribution weight to P, ' + 'instead of predicting it from the input (1.0 ' + 'corresponding to generation, 0.0 to pointing)') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + if args.encoder_layers_to_keep: + args.encoder_layers = len(args.encoder_layers_to_keep.split(",")) + if args.decoder_layers_to_keep: + args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) + + if getattr(args, "max_source_positions", None) is None: + args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS + if getattr(args, "source_position_markers", None) is None: + args.source_position_markers = args.max_source_positions + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + if src_dict != tgt_dict: + raise ValueError("Pointer-generator requires a joined dictionary") + + def build_embedding(dictionary, embed_dim, path=None): + # The dictionary may include additional items that can be used in + # place of the normal OOV token and that all map to the same + # embedding. Using a different token for each input position allows + # one to restore the word identities from the original source text. + num_embeddings = len(dictionary) - args.source_position_markers + padding_idx = dictionary.pad() + unk_idx = dictionary.unk() + logger.info( + "dictionary indices from {0} to {1} will be mapped to {2}".format( + num_embeddings, len(dictionary) - 1, unk_idx + ) + ) + emb = Embedding(num_embeddings, embed_dim, padding_idx, unk_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + if args.share_all_embeddings: + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + encoder_embed_tokens = build_embedding( + src_dict, args.encoder_embed_dim, args.encoder_embed_path + ) + decoder_embed_tokens = encoder_embed_tokens + args.share_decoder_input_output_embed = True + else: + encoder_embed_tokens = build_embedding( + src_dict, args.encoder_embed_dim, args.encoder_embed_path + ) + decoder_embed_tokens = build_embedding( + tgt_dict, args.decoder_embed_dim, args.decoder_embed_path + ) + + encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens) + decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens) + return cls(args, encoder, decoder) + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return TransformerPointerGeneratorEncoder(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return TransformerPointerGeneratorDecoder(args, tgt_dict, embed_tokens) + + +class TransformerPointerGeneratorEncoder(TransformerEncoder): + """ + Transformer encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. The pointer-generator variant adds + the source tokens to the encoder output as these are otherwise not passed + to the decoder. + """ + + def forward( + self, + src_tokens, + src_lengths: Optional[Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[Tensor] = None + ): + """ + Runs the `forward()` method of the parent Transformer class. Then adds + the source tokens into the encoder output tuple. + + While it might be more elegant that the model would pass the source + tokens to the `forward()` method of the decoder too, this would require + changes to `SequenceGenerator`. + + Args: + src_tokens (torch.LongTensor): tokens in the source language of + shape `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + namedtuple: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + - **src_tokens** (Tensor): input token ids of shape + `(batch, src_len)` + """ + encoder_out = self.forward_scriptable(src_tokens, + src_lengths, + return_all_hiddens, + token_embeddings) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + return { + "encoder_out": encoder_out["encoder_out"], # T x B x C + "encoder_padding_mask": encoder_out["encoder_padding_mask"], # B x T + "encoder_embedding": encoder_out["encoder_embedding"], # B x T x C + "encoder_states": encoder_out["encoder_states"], # List[T x B x C] + "src_tokens": [src_tokens], # B x T + "src_lengths": [], + } + + +class TransformerPointerGeneratorDecoder(TransformerDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. The pointer-generator variant mixes + the output probabilities with an attention distribution in the output layer. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + """ + + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens, no_encoder_attn=False) + + # In the pointer-generator model these arguments define the decoder + # layer and the number of attention heads that will be averaged to + # create the alignment for pointing. + self.alignment_heads = args.alignment_heads + self.alignment_layer = args.alignment_layer + + input_embed_dim = embed_tokens.embedding_dim + + # Generation probabilities / interpolation coefficients are predicted + # from the current decoder input embedding and the decoder output, which + # is the size of output_embed_dim. + p_gen_input_size = input_embed_dim + self.output_embed_dim + self.project_p_gens = nn.Linear(p_gen_input_size, 1) + nn.init.zeros_(self.project_p_gens.bias) + + # The dictionary may include a separate entry for an OOV token in each + # input position, so that their identity can be restored from the + # original source text. + self.num_types = len(dictionary) + self.num_oov_types = args.source_position_markers + self.num_embeddings = self.num_types - self.num_oov_types + self.force_p_gen = args.force_generation + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + alignment_layer: Optional[int] = 0, + alignment_heads: Optional[int] = 1, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention + incremental_state (dict, optional): dictionary used for storing + state during :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False) + alignment_layer (int, optional): 0-based index of the layer to be + used for pointing (default: 0) + alignment_heads (int, optional): number of attention heads to be + used for pointing (default: 1) + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + # The normal Transformer model doesn't pass the alignment_layer and + # alignment_heads parameters correctly. We use our local variables. + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + alignment_layer=self.alignment_layer, + alignment_heads=self.alignment_heads, + ) + if not features_only: + # Embedding the tokens again for generation probability prediction, + # so that we don't have to reimplement the whole extract_features() + # method. + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + prev_output_embed = self.embed_tokens(prev_output_tokens) + prev_output_embed *= self.embed_scale + predictors = torch.cat((prev_output_embed, x), 2) + p_gens = self.project_p_gens(predictors) + p_gens = torch.sigmoid(p_gens.float()) + # Torchscript complains if encoder_out or attn are None because + # `output_layer()` signature expects tensors instead + attn: Optional[Tensor] = extra["attn"][0] + assert encoder_out is not None + assert attn is not None + x = self.output_layer(x, attn, encoder_out["src_tokens"][0], p_gens) + return x, extra + + def output_layer( + self, + features: Tensor, + attn: Tensor, + src_tokens: Tensor, + p_gens: Tensor + ) -> Tensor: + """ + Project features to the vocabulary size and mix with the attention + distributions. + """ + if self.force_p_gen is not None: + p_gens = self.force_p_gen + + # project back to size of vocabulary + if self.adaptive_softmax is None: + logits = self.output_projection(features) + else: + logits = features + + batch_size = logits.shape[0] + output_length = logits.shape[1] + assert logits.shape[2] == self.num_embeddings + assert src_tokens.shape[0] == batch_size + src_length = src_tokens.shape[1] + + # The final output distribution will be a mixture of the normal output + # distribution (softmax of logits) and attention weights. + gen_dists = self.get_normalized_probs_scriptable( + (logits, None), log_probs=False, sample=None + ) + gen_dists = torch.mul(gen_dists, p_gens) + padding_size = (batch_size, output_length, self.num_oov_types) + padding = gen_dists.new_zeros(padding_size) + gen_dists = torch.cat((gen_dists, padding), 2) + assert gen_dists.shape[2] == self.num_types + + # Scatter attention distributions to distributions over the extended + # vocabulary in a tensor of shape [batch_size, output_length, + # vocab_size]. Each attention weight will be written into a location + # that is for other dimensions the same as in the index tensor, but for + # the third dimension it's the value of the index tensor (the token ID). + attn = torch.mul(attn.float(), 1 - p_gens) + index = src_tokens[:, None, :] + index = index.expand(batch_size, output_length, src_length) + attn_dists_size = (batch_size, output_length, self.num_types) + attn_dists = attn.new_zeros(attn_dists_size) + attn_dists.scatter_add_(2, index, attn.float()) + + # Final distributions, [batch_size, output_length, num_types]. + return gen_dists + attn_dists + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """ + Get normalized probabilities (or log probs) from a net's output. + Pointer-generator network output is already normalized. + """ + probs = net_output[0] + # Make sure the probabilities are greater than zero when returning log + # probabilities. + return probs.clamp(1e-10, 1.0).log() if log_probs else probs + + +class Embedding(nn.Embedding): + r"""A simple lookup table that stores embeddings of a fixed dictionary and size. + This module is often used to store word embeddings and retrieve them using indices. + The input to the module is a list of indices, and the output is the corresponding + word embeddings. This subclass differs from the standard PyTorch Embedding class by + allowing additional vocabulary entries that will be mapped to the unknown token + embedding. + Args: + num_embeddings (int): size of the dictionary of embeddings + embedding_dim (int): the size of each embedding vector + padding_idx (int): Pads the output with the embedding vector at :attr:`padding_idx` + (initialized to zeros) whenever it encounters the index. + unk_idx (int): Maps all token indices that are greater than or equal to + num_embeddings to this index. + Attributes: + weight (Tensor): the learnable weights of the module of shape (num_embeddings, embedding_dim) + initialized from :math:`\mathcal{N}(0, 1)` + Shape: + - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract + - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}` + .. note:: + Keep in mind that only a limited number of optimizers support + sparse gradients: currently it's :class:`optim.SGD` (`CUDA` and `CPU`), + :class:`optim.SparseAdam` (`CUDA` and `CPU`) and :class:`optim.Adagrad` (`CPU`) + .. note:: + With :attr:`padding_idx` set, the embedding vector at + :attr:`padding_idx` is initialized to all zeros. However, note that this + vector can be modified afterwards, e.g., using a customized + initialization method, and thus changing the vector used to pad the + output. The gradient for this vector from :class:`~torch.nn.Embedding` + is always zero. + """ + __constants__ = ["unk_idx"] + + # Torchscript: Inheriting from Embedding class produces an error when exporting to Torchscript + # -> RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details + # It's happening because max_norm attribute from nn.Embedding is None by default and it cannot be + # cast to a C++ type + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int], + unk_idx: int, + max_norm: Optional[float] = float("inf"), + ): + super().__init__(num_embeddings, embedding_dim, padding_idx=padding_idx, max_norm=max_norm) + self.unk_idx = unk_idx + nn.init.normal_(self.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(self.weight[padding_idx], 0) + + def forward(self, input): + input = torch.where( + input >= self.num_embeddings, torch.ones_like(input) * self.unk_idx, input + ) + return nn.functional.embedding( + input, self.weight, self.padding_idx, self.max_norm, + self.norm_type, self.scale_grad_by_freq, self.sparse + ) + + +@register_model_architecture( + "transformer_pointer_generator", "transformer_pointer_generator" +) +def transformer_pointer_generator(args): + args.alignment_heads = getattr(args, "alignment_heads", 1) + args.alignment_layer = getattr(args, "alignment_layer", -1) + base_architecture(args) + if args.alignment_layer < 0: + args.alignment_layer = args.decoder_layers + args.alignment_layer + + +@register_model_architecture( + "transformer_pointer_generator", "transformer_pointer_generator_iwslt_de_en" +) +def transformer_pointer_generator_iwslt_de_en(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 6) + transformer_pointer_generator(args) + + +@register_model_architecture( + "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de" +) +def transformer_pointer_generator_wmt_en_de(args): + transformer_pointer_generator(args) + + +# Transformer pointer-generator with the base Transformer parameters as used in +# the "Attention Is All You Need" paper (Vaswani et al., 2017) +@register_model_architecture( + "transformer_pointer_generator", + "transformer_pointer_generator_vaswani_wmt_en_de_big", +) +def transformer_pointer_generator_vaswani_wmt_en_de_big(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.3) + transformer_pointer_generator(args) + + +@register_model_architecture( + "transformer_pointer_generator", + "transformer_pointer_generator_vaswani_wmt_en_fr_big", +) +def transformer_pointer_generator_vaswani_wmt_en_fr_big(args): + args.dropout = getattr(args, "dropout", 0.1) + transformer_pointer_generator_vaswani_wmt_en_de_big(args) + + +@register_model_architecture( + "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de_big" +) +def transformer_pointer_generator_wmt_en_de_big(args): + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + transformer_pointer_generator_vaswani_wmt_en_de_big(args) + + +# default parameters used in tensor2tensor implementation +@register_model_architecture( + "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de_big_t2t" +) +def transformer_pointer_generator_wmt_en_de_big_t2t(args): + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_dropout = getattr(args, "activation_dropout", 0.1) + transformer_pointer_generator_vaswani_wmt_en_de_big(args) diff --git a/fairseq/examples/pointer_generator/postprocess.py b/fairseq/examples/pointer_generator/postprocess.py new file mode 100644 index 0000000..b213aed --- /dev/null +++ b/fairseq/examples/pointer_generator/postprocess.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import re +import sys + + +class OOVIndexError(IndexError): + def __init__(self, pos, source_seq, target_seq): + super(OOVIndexError, self).__init__( + "A <unk-N> tag in the target sequence refers to a position that is " + "outside the source sequence. Most likely there was a mismatch in " + "provided source and target sequences. Otherwise this would mean that " + "the pointing mechanism somehow attended to a position that is past " + "the actual sequence end." + ) + self.source_pos = pos + self.source_seq = source_seq + self.target_seq = target_seq + + +def replace_oovs(source_in, target_in, target_out): + """Replaces <unk-N> tokens in the target text with the corresponding word in + the source text. + """ + + oov_re = re.compile("^<unk-([0-9]+)>$") + + for source_seq, target_seq in zip(source_in, target_in): + target_seq_out = [] + + pos_to_word = source_seq.strip().split() + for token in target_seq.strip().split(): + m = oov_re.match(token) + if m: + pos = int(m.group(1)) + if pos >= len(pos_to_word): + raise OOVIndexError(pos, source_seq, target_seq) + token_out = pos_to_word[pos] + else: + token_out = token + target_seq_out.append(token_out) + target_out.write(" ".join(target_seq_out) + "\n") + + +def main(): + parser = argparse.ArgumentParser( + description="Replaces <unk-N> tokens in target sequences with words from " + "the corresponding position in the source sequence." + ) + parser.add_argument( + "--source", type=str, help="text file with source sequences", required=True + ) + parser.add_argument( + "--target", type=str, help="text file with target sequences", required=True + ) + parser.add_argument( + "--target-out", + type=str, + help="where to write target sequences without <unk-N> " "entries", + required=True, + ) + args = parser.parse_args() + + target_in = ( + open(args.target, "r", encoding="utf-8") if args.target is not None else None + ) + target_out = ( + open(args.target_out, "w", encoding="utf-8") + if args.target_out is not None + else None + ) + with open(args.source, "r", encoding="utf-8") as source_in, open( + args.target, "r", encoding="utf-8" + ) as target_in, open(args.target_out, "w", encoding="utf-8") as target_out: + replace_oovs(source_in, target_in, target_out) + + +if __name__ == "__main__": + try: + main() + except OOVIndexError as e: + print(e, file=sys.stderr) + print("Source sequence:", e.source_seq.strip(), file=sys.stderr) + print("Target sequence:", e.target_seq.strip(), file=sys.stderr) + print( + "Source sequence length:", + len(e.source_seq.strip().split()), + file=sys.stderr, + ) + print("The offending tag points to:", e.source_pos) + sys.exit(2) diff --git a/fairseq/examples/pointer_generator/preprocess.py b/fairseq/examples/pointer_generator/preprocess.py new file mode 100644 index 0000000..f72ca7d --- /dev/null +++ b/fairseq/examples/pointer_generator/preprocess.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +from itertools import zip_longest + + +def replace_oovs(source_in, target_in, vocabulary, source_out, target_out): + """Replaces out-of-vocabulary words in source and target text with <unk-N>, + where N in is the position of the word in the source sequence. + """ + + def format_unk(pos): + return "<unk-{}>".format(pos) + + if target_in is None: + target_in = [] + + for seq_num, (source_seq, target_seq) in enumerate( + zip_longest(source_in, target_in) + ): + source_seq_out = [] + target_seq_out = [] + + word_to_pos = dict() + for position, token in enumerate(source_seq.strip().split()): + if token in vocabulary: + token_out = token + else: + if token in word_to_pos: + oov_pos = word_to_pos[token] + else: + word_to_pos[token] = position + oov_pos = position + token_out = format_unk(oov_pos) + source_seq_out.append(token_out) + source_out.write(" ".join(source_seq_out) + "\n") + + if target_seq is not None: + for token in target_seq.strip().split(): + if token in word_to_pos: + token_out = format_unk(word_to_pos[token]) + else: + token_out = token + target_seq_out.append(token_out) + if target_out is not None: + target_out.write(" ".join(target_seq_out) + "\n") + + +def main(): + parser = argparse.ArgumentParser( + description="Replaces out-of-vocabulary words in both source and target " + "sequences with tokens that indicate the position of the word " + "in the source sequence." + ) + parser.add_argument( + "--source", type=str, help="text file with source sequences", required=True + ) + parser.add_argument( + "--target", type=str, help="text file with target sequences", default=None + ) + parser.add_argument("--vocab", type=str, help="vocabulary file", required=True) + parser.add_argument( + "--source-out", + type=str, + help="where to write source sequences with <unk-N> entries", + required=True, + ) + parser.add_argument( + "--target-out", + type=str, + help="where to write target sequences with <unk-N> entries", + default=None, + ) + args = parser.parse_args() + + with open(args.vocab, encoding="utf-8") as vocab: + vocabulary = vocab.read().splitlines() + + target_in = ( + open(args.target, "r", encoding="utf-8") if args.target is not None else None + ) + target_out = ( + open(args.target_out, "w", encoding="utf-8") + if args.target_out is not None + else None + ) + with open(args.source, "r", encoding="utf-8") as source_in, open( + args.source_out, "w", encoding="utf-8" + ) as source_out: + replace_oovs(source_in, target_in, vocabulary, source_out, target_out) + if target_in is not None: + target_in.close() + if target_out is not None: + target_out.close() + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/quant_noise/README.md b/fairseq/examples/quant_noise/README.md new file mode 100644 index 0000000..a04d7e4 --- /dev/null +++ b/fairseq/examples/quant_noise/README.md @@ -0,0 +1,298 @@ +# Training with Quantization Noise for Extreme Model Compression ({Fan\*, Stock\*} *et al.*, 2020) +This page contains information for how to train and quantize models with Quantization Noise, for both scalar quantization like `int8` and Iterative Product Quantization. +Check out our paper [here](https://arxiv.org/abs/2004.07320). + +Looking for pretrained models? They will be added shortly. +Looking for code to train vision models? We are working on open sourcing our code as part of ClassyVision. Please check back, but note that both the Scalar and Iterative Product Quantization counterparts of the `nn.Conv2d` module are already included in this release. + +**Contents**: +- [Walk through of code](#walk-through-the-code) +- [Reproduce NLP Results](#looking-to-reproduce-the-nlp-results-in-the-paper) +- [Reproduce Vision Results](#looking-to-reproduce-the-vision-results-in-the-paper) + + +## Citation +```bibtex +@article{fan2020training, + title={Training with Quantization Noise for Extreme Model Compression}, + author={Angela Fan* and Pierre Stock* and and Benjamin Graham and Edouard Grave and Remi Gribonval and Herve Jegou and Armand Joulin}, + year={2020}, + eprint={2004.07320}, + archivePrefix={arXiv}, + primaryClass={cs.ML} +} +``` + +## Walk through the code + +Training a model with Quant-Noise improves the performance in subsequent inference-time quantization by training models to be robust to quantization. This technique is useful for both scalar and product quantization methods, as well as multiple domains. We detail below our approach to train, quantize models and integrate our code to quantize your favorite models. + +### Scalar Quantization + +Unlike the section [Iterative Product Quantization](#iterative-product-quantization) which gives state-of-the-art compression, this section showcases the usefulness of our approach for simple scalar quantization baselines such as int8 using on-GPU Fake Quantization. + +#### Training + +Scalar quantization with Quant-Noise consists in randomly quantizing a proportion `p` of the weights during training. Scalar quantization is implemented [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/scalar) under the form of Fake Quantization, meaning that we emulate int8 on GPU by quantizing and de-quantizing both the weights and the activations. We rely on PyTorch's [quantization primitives](https://github.com/pytorch/pytorch/tree/master/torch/quantization). + +To train a model with Quant-Noise, add the following flag: +``` +--quant-noise-scalar 0.5 +``` +Large values of noise make the network easier to quantize but may result in higher non-quantized test and validation perplexities. + +#### Quantization + +When evaluating a network, all quantized modules and activation hooks automatically switch to `p=1` so the validation accuracy reported by Fairseq is actually the quantized one, nothing more to do. + + +#### Integration with your own code + +Looking to quantize your own models with Quant-Noise + Scalar Quantization? +- Use the function `quantize_model_` implemented [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/scalar/utils.py) to (1) replace all your modules by their quantized counterparts and (2) add hooks to those modules to quantize the activations. +- Then, perform your training as usual. Note that in `eval()` mode, the network is always fully quantized (weights and activations) by default (`p=1`). + + + +### Iterative Product Quantization + + +Iterative Product Quantization with Quant-Noise proceeds in two steps. First, a model must be trained uncompressed with Quant-Noise. Second, the model must be quantized with iPQ. Note that we implement here the simplest form of noise, which consists in randomly dropping a proportion `p` of blocks, and that worked as well as assigning those blocks to their current centroid. + +#### Training + +To train a model with Quant-Noise, add the following flags: +``` +--quant-noise-pq 0.1 --quant-noise-pq-block-size 8 +``` +`quant-noise-pq` controls how much dropout is applied to the blocks of the weight matrix. `quant-noise-pq-block-size` controls the size of the weight matrix blocks. +We recommend training with 0.05 to 0.2 Quant-Noise, a value that worked well in our experiments. For the block-size, we recommend training with block-size of 8. Note that the block size must be a multiple of `input_features`, see the size checks [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py). Large block sizes result in higher compression ratio but may induce a loss in accuracy. + +We currently support training Transformer based models, such as sequence-to-sequence, language models, and BERT architectures. The `quant_noise` function [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py) wraps a module. It splits a weight matrix into blocks and applies random dropout to these blocks. +In the Transformer architectures, quant-noise is applied to the input and output embeddings, the attention, and the FFN. + +Quant-Noise can also be combined with **LayerDrop** (see [here](https://github.com/pytorch/fairseq/tree/main/examples/layerdrop)) to add its pruning effect to the quantized model and make the model even smaller. We recommend training with LayerDrop 0.1 or 0.2. + +#### Quantization + +We implement an improved version of product quantization from Stock et al, **iPQ**, described [here](https://arxiv.org/abs/1907.05686), see code with old API [here](https://github.com/facebookresearch/kill-the-bits). Note that we improved the iPQ API in terms of both compute speed and usability as described below. + +For the particular case of PQ, quantization is made sequentially. We recommend first quantizing the FFNs, then the EMBs, and finally the ATTNs. Quantization is done in two sub-steps: +- First, perform `n` steps of Product Quantization (generally `n=20` is enough). +- Then, finetune the obtained centroids. + +#### Integration with your own code + +Looking to quantize your own models with Quant-Noise + iPQ? +- First wrap your modules with the `quant_noise` function [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py), which is module-agnostic and train your favorite model. +- Then, quantize your trained model using the code [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/pq). This can be done *without any changes to your training loop*. Below is an example code for integration. +Note that we tried our approach only on Transformers and various Convolutional Models such as EfficientNets. + +```python +from fairseq.modules.quantization.pq import quantize_model_, SizeTracker + +# get configuration parameters +n_centroids_config = config["n_centroids"] +block_sizes_config = config["block_sizes"] +layers_to_quantize = config["layers_to_quantize"] + +# size tracker for keeping track of assignments, centroids and non-compressed sizes +size_tracker = SizeTracker(model) + +# Quantize model by stages +for step in range(len(layers_to_quantize)): + + # quantize model in-place + quantized_layers = quantize_model_( + model, + size_tracker, + layers_to_quantize, + block_sizes_config, + n_centroids_config, + step=step, + ) + logger.info(f"Finetuning stage {step}, quantized layers: {quantized_layers}") + logger.info(f"{size_tracker}") + + # Don't forget to re-create/update trainer/optimizer since model parameters have changed + optimizer = ... + + # Finetune the centroids with your usual training loop for a few epochs + trainer.train_epoch() +``` + + +## Looking to reproduce the NLP results in the paper? + +We detail below how to reproduce the state-of-the-art results in reported in the paper for Quant-Noise + Iterative Product Quantization. + +### Training with Quant-Noise + +To **train** RoBERTa + QuantNoise, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/roberta). +The following command can be used to train a RoBERTa Base + QuantNoise model: + +```bash +TOTAL_UPDATES=125000 +WARMUP_UPDATES=10000 +PEAK_LR=0.0005 +TOKENS_PER_SAMPLE=512 +MAX_POSITIONS=512 +MAX_SENTENCES=16 +UPDATE_FREQ=2 +DATA_DIR=/path/to/data/here + +fairseq-train $DATA_DIR \ + --task masked_lm --criterion masked_lm --arch roberta_base \ + --sample-break-mode complete \ + --tokens-per-sample $TOKENS_PER_SAMPLE --max-positions $MAX_POSITIONS \ + --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-6 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $PEAK_LR \ + --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_UPDATES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.01 \ + --batch-size $MAX_SENTENCES \ + --update-freq $UPDATE_FREQ --max-update $TOTAL_UPDATES \ + --save-dir checkpoint/roberta \ + --ddp-backend legacy_ddp --encoder-layerdrop 0.2 \ + --quant-noise-pq 0.2 --quant-noise-pq-block-size 8 --untie-weights-roberta +``` + +To **finetune** RoBERTa + QuantNoise, we followed this setting [here](https://github.com/pytorch/fairseq/blob/main/examples/roberta/README.glue.md). +The following command can be used to finetune a RoBERTa Base + QuantNoise model on the RTE dataset: + +```bash +TOTAL_NUM_UPDATES=2036 +WARMUP_UPDATES=122 +LR=2e-05 +NUM_CLASSES=2 +MAX_SENTENCES=16 +ROBERTA_PATH=/path/to/roberta_quantnoise/model.pt + +fairseq-train /path/to/rte/data/ \ + --restore-file $ROBERTA_PATH \ + --max-positions 512 \ + --batch-size $MAX_SENTENCES \ + --max-tokens 4400 \ + --task sentence_prediction \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --init-token 0 --separator-token 2 \ + --arch roberta_large \ + --criterion sentence_prediction \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --max-epoch 10 \ + --find-unused-parameters \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --ddp-backend legacy_ddp \ + --quant-noise-pq 0.2 --quant-noise-pq-block-size 8 +``` + +To **train** Language Models on Wikitext-103, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model). +The following command can be used to train a Transformer + QuantNoise model on Wikitext-103: + +```bash +fairseq-train --task language_modeling /path/to/wikitext-103/data \ + --save-dir checkpoints/transformer_wikitext-103 \ + --adaptive-input --adaptive-input-cutoff 20000,60000 --adaptive-input-factor 4 \ + --adaptive-softmax-cutoff 20000,60000 --adaptive-softmax-dropout 0.2 --adaptive-softmax-factor 4.0 \ + --tie-adaptive-proj --tie-adaptive-weights \ + --arch transformer_lm_gbw \ + --attention-dropout 0.1 --dropout 0.2 --relu-dropout 0.1 \ + --clip-norm 0.1 --criterion adaptive_loss \ + --ddp-backend legacy_ddp \ + --decoder-attention-heads 8 --decoder-embed-dim 1024 --decoder-ffn-embed-dim 4096 --decoder-input-dim 1024 \ + --decoder-layers 16 --decoder-normalize-before --decoder-output-dim 1024 \ + --min-lr 0.0001 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 --lr 1.0 --t-mult 2.0 \ + --max-tokens 3072 --tokens-per-sample 3072 --momentum 0.99 --optimizer nag \ + --sample-break-mode none --update-freq 3 \ + --warmup-init-lr 1e-07 --warmup-updates 16000 \ + --weight-decay 0 --seed 1 --stop-min-lr 1e-09 \ + --quant-noise-pq 0.05 --quant-noise-pq-block-size 8 +``` + +To **evaluate** this model, note you need to use the `eval.py` script. The following command can be used to evaluate: + +```bash +fairseq-eval-lm /path/to/wikitext-103/data --path /path/to/model/checkpoint \ + --sample-break-mode complete \ + --max-tokens 3072 \ + --context-window 2560 \ + --softmax-batch 1024 \ + --gen-subset valid +``` +and change the `--gen-subset` to `test` if you would like to evaluate on the test set instead. + + +### Iterative Product Quantization + +To quantize the finetuned RoBERTa model, we use this command on 1 GPU. This should run in a day. +```bash +TOTAL_NUM_UPDATES=6108 # 2036 updates for each iteration +WARMUP_UPDATES=122 +LR=2e-05 +NUM_CLASSES=2 +MAX_SENTENCES=16 +fairseq-train --task sentence_prediction /path/to/data/ \ + --restore-file $ROBERTA_PATH \ + --save-dir checkpoints/roberta_finetuned \ + --max-positions 512 \ + --batch-size $MAX_SENTENCES \ + --max-tokens 4400 \ + --init-token 0 --separator-token 2 \ + --arch roberta_large \ + --criterion sentence_prediction \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --clip-norm 0.0 --lr-scheduler polynomial_decay \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --no-progress-bar --skip-invalid-size-inputs-valid-test --ddp-backend legacy_ddp \ + --quantization-config-path /path/to/config/yaml +``` + +To quantize the trained Language Model, we use this command on 8 V100 23GB GPUs. This should run in a couple of hours. +```bash +fairseq-train --task language_modeling /path/to/wikitext-103/data \ + --save-dir checkpoints/transformer_wikitext-103 \ + --adaptive-input --adaptive-input-cutoff 20000,60000 --adaptive-input-factor 4 \ + --adaptive-softmax-cutoff 20000,60000 --adaptive-softmax-dropout 0.2 --adaptive-softmax-factor 4.0 \ + --arch transformer_lm_gbw \ + --attention-dropout 0.1 --dropout 0.2 --relu-dropout 0.1 \ + --bucket-cap-mb 25 --char-embedder-highway-layers 2 --character-embedding-dim 4 \ + --clip-norm 0.1 --criterion adaptive_loss \ + --ddp-backend legacy_ddp \ + --decoder-attention-heads 8 --decoder-embed-dim 1024 --decoder-ffn-embed-dim 4096 --decoder-input-dim 1024 --decoder-layers 16 --decoder-normalize-before --decoder-output-dim 1024 \ + --fp16 --keep-last-epochs -1 \ + --min-lr 0.0001 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 --lr 0.05 --stop-min-lr 1e-09 \ + --max-tokens 2944 --tokens-per-sample 2944\ + --momentum 0.99 --no-epoch-checkpoints --no-progress-bar --optimizer nag --required-batch-size-multiple 8 \ + --sample-break-mode none --t-mult 2.0 --skip-invalid-size-inputs-valid-test \ + --tie-adaptive-proj --tie-adaptive-weights --update-freq 3 --weight-decay 0 --seed 1 \ + --log-interval 100 --no-progress-bar --skip-invalid-size-inputs-valid-test \ + --restore-file path/to/trained/lm/with/quant/noise \ + --max-update 13500 --quantization-config-path /path/to/config/yaml +``` +If you have less capacity or if your distributed training freezes, try reducing `--max-tokens` and `--tokens-per-sample` (this may reduce the quantized accuracy a bit). + +### Remarks + +We try to keep the open-sourced code as readable and as easy-to-plug as possible. Therefore, we did not test it for the following cases: +- Scalar quantization with RoBERTa. +- Quantization with iPQ and `int8` combined. + +If you have trouble adapting it, we will be more than happy to help! + +## Looking to reproduce the Vision results in the paper? + +We are working on open sourcing our code as part of ClassyVision. Please check back. + + +## Having an issue or have a question? + +Please open an issue in this repository with the details of your question. Thanks! diff --git a/fairseq/examples/quant_noise/transformer_quantization_config.yaml b/fairseq/examples/quant_noise/transformer_quantization_config.yaml new file mode 100644 index 0000000..d4be14a --- /dev/null +++ b/fairseq/examples/quant_noise/transformer_quantization_config.yaml @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# This file defines example configuration arguments for quantizing +# a transformer model with product quantization + +# Number of Centroids for Product Quantization, by default 256 (byte-aligned) +n_centroids: + Linear: + key: in_features + value: {"*": 256} + Embedding: + key: embedding_dim + value: {"*": 256} + +# Block Sizes for Product Quantization +# We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings +block_sizes: + Linear: + key: fuzzy_name + value: {fc: 8, attn: 4, emb: 4} + Embedding: + key: fuzzy_name + value: {emb: 8} + +# Layers to Quantize Sequentially +# We suggest: first FFN, then EMB, then ATTN +layers_to_quantize: + - decoder\\.layers\\.\d+\\.fc[12] + - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01] + - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj) diff --git a/fairseq/examples/roberta/README.custom_classification.md b/fairseq/examples/roberta/README.custom_classification.md new file mode 100644 index 0000000..7254bb7 --- /dev/null +++ b/fairseq/examples/roberta/README.custom_classification.md @@ -0,0 +1,168 @@ +# Finetuning RoBERTa on a custom classification task + +This example shows how to finetune RoBERTa on the IMDB dataset, but should illustrate the process for most classification tasks. + +### 1) Get the data + +```bash +wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz +tar zxvf aclImdb_v1.tar.gz +``` + + +### 2) Format data + +`IMDB` data has one data-sample in each file, below python code-snippet converts it one file for train and valid each for ease of processing. +```python +import argparse +import os +import random +from glob import glob + +random.seed(0) + +def main(args): + for split in ['train', 'test']: + samples = [] + for class_label in ['pos', 'neg']: + fnames = glob(os.path.join(args.datadir, split, class_label) + '/*.txt') + for fname in fnames: + with open(fname) as fin: + line = fin.readline() + samples.append((line, 1 if class_label == 'pos' else 0)) + random.shuffle(samples) + out_fname = 'train' if split == 'train' else 'dev' + f1 = open(os.path.join(args.datadir, out_fname + '.input0'), 'w') + f2 = open(os.path.join(args.datadir, out_fname + '.label'), 'w') + for sample in samples: + f1.write(sample[0] + '\n') + f2.write(str(sample[1]) + '\n') + f1.close() + f2.close() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--datadir', default='aclImdb') + args = parser.parse_args() + main(args) +``` + + +### 3) BPE encode + +Run `multiprocessing_bpe_encoder`, you can also do this in previous step for each sample but that might be slower. +```bash +# Download encoder.json and vocab.bpe +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' + +for SPLIT in train dev; do + python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json encoder.json \ + --vocab-bpe vocab.bpe \ + --inputs "aclImdb/$SPLIT.input0" \ + --outputs "aclImdb/$SPLIT.input0.bpe" \ + --workers 60 \ + --keep-empty +done +``` + + +### 4) Preprocess data + +```bash +# Download fairseq dictionary. +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt' + +fairseq-preprocess \ + --only-source \ + --trainpref "aclImdb/train.input0.bpe" \ + --validpref "aclImdb/dev.input0.bpe" \ + --destdir "IMDB-bin/input0" \ + --workers 60 \ + --srcdict dict.txt + +fairseq-preprocess \ + --only-source \ + --trainpref "aclImdb/train.label" \ + --validpref "aclImdb/dev.label" \ + --destdir "IMDB-bin/label" \ + --workers 60 + +``` + + +### 5) Run training + +```bash +TOTAL_NUM_UPDATES=7812 # 10 epochs through IMDB for bsz 32 +WARMUP_UPDATES=469 # 6 percent of the number of updates +LR=1e-05 # Peak LR for polynomial LR scheduler. +HEAD_NAME=imdb_head # Custom name for the classification head. +NUM_CLASSES=2 # Number of classes for the classification task. +MAX_SENTENCES=8 # Batch size. +ROBERTA_PATH=/path/to/roberta.large/model.pt + +CUDA_VISIBLE_DEVICES=0 fairseq-train IMDB-bin/ \ + --restore-file $ROBERTA_PATH \ + --max-positions 512 \ + --batch-size $MAX_SENTENCES \ + --max-tokens 4400 \ + --task sentence_prediction \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --init-token 0 --separator-token 2 \ + --arch roberta_large \ + --criterion sentence_prediction \ + --classification-head-name $HEAD_NAME \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --max-epoch 10 \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --shorten-method "truncate" \ + --find-unused-parameters \ + --update-freq 4 +``` + +The above command will finetune RoBERTa-large with an effective batch-size of 32 +sentences (`--batch-size=8 --update-freq=4`). The expected +`best-validation-accuracy` after 10 epochs is ~96.5%. + +If you run out of GPU memory, try decreasing `--batch-size` and increase +`--update-freq` to compensate. + + +### 6) Load model using hub interface + +Now we can load the trained model checkpoint using the RoBERTa hub interface. + +Assuming your checkpoints are stored in `checkpoints/`: +```python +from fairseq.models.roberta import RobertaModel +roberta = RobertaModel.from_pretrained( + 'checkpoints', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='IMDB-bin' +) +roberta.eval() # disable dropout +``` + +Finally you can make predictions using the `imdb_head` (or whatever you set +`--classification-head-name` to during training): +```python +label_fn = lambda label: roberta.task.label_dictionary.string( + [label + roberta.task.label_dictionary.nspecial] +) + +tokens = roberta.encode('Best movie this year') +pred = label_fn(roberta.predict('imdb_head', tokens).argmax().item()) +assert pred == '1' # positive + +tokens = roberta.encode('Worst movie ever') +pred = label_fn(roberta.predict('imdb_head', tokens).argmax().item()) +assert pred == '0' # negative +``` diff --git a/fairseq/examples/roberta/README.glue.md b/fairseq/examples/roberta/README.glue.md new file mode 100644 index 0000000..4f596d5 --- /dev/null +++ b/fairseq/examples/roberta/README.glue.md @@ -0,0 +1,64 @@ +# Finetuning RoBERTa on GLUE tasks + +### 1) Download the data from GLUE website (https://gluebenchmark.com/tasks) using following commands: +```bash +wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py +python download_glue_data.py --data_dir glue_data --tasks all +``` + +### 2) Preprocess GLUE task data: +```bash +./examples/roberta/preprocess_GLUE_tasks.sh glue_data <glue_task_name> +``` +`glue_task_name` is one of the following: +`{ALL, QQP, MNLI, QNLI, MRPC, RTE, STS-B, SST-2, CoLA}` +Use `ALL` for preprocessing all the glue tasks. + +### 3) Fine-tuning on GLUE task: +Example fine-tuning cmd for `RTE` task +```bash +ROBERTA_PATH=/path/to/roberta/model.pt + +CUDA_VISIBLE_DEVICES=0 fairseq-hydra-train -config-dir examples/roberta/config/finetuning --config-name rte \ +task.data=RTE-bin checkpoint.restore_file=$ROBERTA_PATH +``` + +There are additional config files for each of the GLUE tasks in the examples/roberta/config/finetuning directory. + +**Note:** + +a) Above cmd-args and hyperparams are tested on one Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`. + +b) All the settings in above table are suggested settings based on our hyperparam search within a fixed search space (for careful comparison across models). You might be able to find better metrics with wider hyperparam search. + +### Inference on GLUE task +After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using following python code snippet: + +```python +from fairseq.models.roberta import RobertaModel + +roberta = RobertaModel.from_pretrained( + 'checkpoints/', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='RTE-bin' +) + +label_fn = lambda label: roberta.task.label_dictionary.string( + [label + roberta.task.label_dictionary.nspecial] +) +ncorrect, nsamples = 0, 0 +roberta.cuda() +roberta.eval() +with open('glue_data/RTE/dev.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[1], tokens[2], tokens[3] + tokens = roberta.encode(sent1, sent2) + prediction = roberta.predict('sentence_classification_head', tokens).argmax().item() + prediction_label = label_fn(prediction) + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) + +``` diff --git a/fairseq/examples/roberta/README.md b/fairseq/examples/roberta/README.md new file mode 100644 index 0000000..ed4d5df --- /dev/null +++ b/fairseq/examples/roberta/README.md @@ -0,0 +1,296 @@ +# RoBERTa: A Robustly Optimized BERT Pretraining Approach + +https://arxiv.org/abs/1907.11692 + +## Introduction + +RoBERTa iterates on BERT's pretraining procedure, including training the model longer, with bigger batches over more data; removing the next sentence prediction objective; training on longer sequences; and dynamically changing the masking pattern applied to the training data. See the associated paper for more details. + +### What's New: + +- December 2020: German model (GottBERT) is available: [GottBERT](https://github.com/pytorch/fairseq/tree/main/examples/gottbert). +- January 2020: Italian model (UmBERTo) is available from Musixmatch Research: [UmBERTo](https://github.com/musixmatchresearch/umberto). +- November 2019: French model (CamemBERT) is available: [CamemBERT](https://github.com/pytorch/fairseq/tree/main/examples/camembert). +- November 2019: Multilingual encoder (XLM-RoBERTa) is available: [XLM-R](https://github.com/pytorch/fairseq/tree/main/examples/xlmr). +- September 2019: TensorFlow and TPU support via the [transformers library](https://github.com/huggingface/transformers). +- August 2019: RoBERTa is now supported in the [pytorch-transformers library](https://github.com/huggingface/pytorch-transformers). +- August 2019: Added [tutorial for finetuning on WinoGrande](https://github.com/pytorch/fairseq/tree/main/examples/roberta/wsc#roberta-training-on-winogrande-dataset). +- August 2019: Added [tutorial for pretraining RoBERTa using your own data](README.pretraining.md). + +## Pre-trained models + +Model | Description | # params | Download +---|---|---|--- +`roberta.base` | RoBERTa using the BERT-base architecture | 125M | [roberta.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz) +`roberta.large` | RoBERTa using the BERT-large architecture | 355M | [roberta.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz) +`roberta.large.mnli` | `roberta.large` finetuned on [MNLI](http://www.nyu.edu/projects/bowman/multinli) | 355M | [roberta.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz) +`roberta.large.wsc` | `roberta.large` finetuned on [WSC](wsc/README.md) | 355M | [roberta.large.wsc.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.wsc.tar.gz) + +## Results + +**[GLUE (Wang et al., 2019)](https://gluebenchmark.com/)** +_(dev set, single model, single-task finetuning)_ + +Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B +---|---|---|---|---|---|---|---|--- +`roberta.base` | 87.6 | 92.8 | 91.9 | 78.7 | 94.8 | 90.2 | 63.6 | 91.2 +`roberta.large` | 90.2 | 94.7 | 92.2 | 86.6 | 96.4 | 90.9 | 68.0 | 92.4 +`roberta.large.mnli` | 90.2 | - | - | - | - | - | - | - + +**[SuperGLUE (Wang et al., 2019)](https://super.gluebenchmark.com/)** +_(dev set, single model, single-task finetuning)_ + +Model | BoolQ | CB | COPA | MultiRC | RTE | WiC | WSC +---|---|---|---|---|---|---|--- +`roberta.large` | 86.9 | 98.2 | 94.0 | 85.7 | 89.5 | 75.6 | - +`roberta.large.wsc` | - | - | - | - | - | - | 91.3 + +**[SQuAD (Rajpurkar et al., 2018)](https://rajpurkar.github.io/SQuAD-explorer/)** +_(dev set, no additional data used)_ + +Model | SQuAD 1.1 EM/F1 | SQuAD 2.0 EM/F1 +---|---|--- +`roberta.large` | 88.9/94.6 | 86.5/89.4 + +**[RACE (Lai et al., 2017)](http://www.qizhexie.com/data/RACE_leaderboard.html)** +_(test set)_ + +Model | Accuracy | Middle | High +---|---|---|--- +`roberta.large` | 83.2 | 86.5 | 81.3 + +**[HellaSwag (Zellers et al., 2019)](https://rowanzellers.com/hellaswag/)** +_(test set)_ + +Model | Overall | In-domain | Zero-shot | ActivityNet | WikiHow +---|---|---|---|---|--- +`roberta.large` | 85.2 | 87.3 | 83.1 | 74.6 | 90.9 + +**[Commonsense QA (Talmor et al., 2019)](https://www.tau-nlp.org/commonsenseqa)** +_(test set)_ + +Model | Accuracy +---|--- +`roberta.large` (single model) | 72.1 +`roberta.large` (ensemble) | 72.5 + +**[Winogrande (Sakaguchi et al., 2019)](https://arxiv.org/abs/1907.10641)** +_(test set)_ + +Model | Accuracy +---|--- +`roberta.large` | 78.1 + +**[XNLI (Conneau et al., 2018)](https://arxiv.org/abs/1809.05053)** +_(TRANSLATE-TEST)_ + +Model | en | fr | es | de | el | bg | ru | tr | ar | vi | th | zh | hi | sw | ur +---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|--- +`roberta.large.mnli` | 91.3 | 82.91 | 84.27 | 81.24 | 81.74 | 83.13 | 78.28 | 76.79 | 76.64 | 74.17 | 74.05 | 77.5 | 70.9 | 66.65 | 66.81 + +## Example usage + +##### Load RoBERTa from torch.hub (PyTorch >= 1.1): +```python +import torch +roberta = torch.hub.load('pytorch/fairseq', 'roberta.large') +roberta.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Load RoBERTa (for PyTorch 1.0 or custom models): +```python +# Download roberta.large model +wget https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz +tar -xzvf roberta.large.tar.gz + +# Load the model in fairseq +from fairseq.models.roberta import RobertaModel +roberta = RobertaModel.from_pretrained('/path/to/roberta.large', checkpoint_file='model.pt') +roberta.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Apply Byte-Pair Encoding (BPE) to input text: +```python +tokens = roberta.encode('Hello world!') +assert tokens.tolist() == [0, 31414, 232, 328, 2] +roberta.decode(tokens) # 'Hello world!' +``` + +##### Extract features from RoBERTa: +```python +# Extract the last layer's features +last_layer_features = roberta.extract_features(tokens) +assert last_layer_features.size() == torch.Size([1, 5, 1024]) + +# Extract all layer's features (layer 0 is the embedding layer) +all_layers = roberta.extract_features(tokens, return_all_hiddens=True) +assert len(all_layers) == 25 +assert torch.all(all_layers[-1] == last_layer_features) +``` + +##### Use RoBERTa for sentence-pair classification tasks: +```python +# Download RoBERTa already finetuned for MNLI +roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli') +roberta.eval() # disable dropout for evaluation + +# Encode a pair of sentences and make a prediction +tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.') +roberta.predict('mnli', tokens).argmax() # 0: contradiction + +# Encode another pair of sentences +tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.') +roberta.predict('mnli', tokens).argmax() # 2: entailment +``` + +##### Register a new (randomly initialized) classification head: +```python +roberta.register_classification_head('new_task', num_classes=3) +logprobs = roberta.predict('new_task', tokens) # tensor([[-1.1050, -1.0672, -1.1245]], grad_fn=<LogSoftmaxBackward>) +``` + +##### Batched prediction: +```python +import torch +from fairseq.data.data_utils import collate_tokens + +roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli') +roberta.eval() + +batch_of_pairs = [ + ['Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.'], + ['Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.'], + ['potatoes are awesome.', 'I like to run.'], + ['Mars is very far from earth.', 'Mars is very close.'], +] + +batch = collate_tokens( + [roberta.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1 +) + +logprobs = roberta.predict('mnli', batch) +print(logprobs.argmax(dim=1)) +# tensor([0, 2, 1, 0]) +``` + +##### Using the GPU: +```python +roberta.cuda() +roberta.predict('new_task', tokens) # tensor([[-1.1050, -1.0672, -1.1245]], device='cuda:0', grad_fn=<LogSoftmaxBackward>) +``` + +## Advanced usage + +#### Filling masks: + +RoBERTa can be used to fill `<mask>` tokens in the input. Some examples from the +[Natural Questions dataset](https://ai.google.com/research/NaturalQuestions/): +```python +roberta.fill_mask('The first Star wars movie came out in <mask>', topk=3) +# [('The first Star wars movie came out in 1977', 0.9504708051681519, ' 1977'), ('The first Star wars movie came out in 1978', 0.009986862540245056, ' 1978'), ('The first Star wars movie came out in 1979', 0.009574787691235542, ' 1979')] + +roberta.fill_mask('Vikram samvat calender is official in <mask>', topk=3) +# [('Vikram samvat calender is official in India', 0.21878819167613983, ' India'), ('Vikram samvat calender is official in Delhi', 0.08547237515449524, ' Delhi'), ('Vikram samvat calender is official in Gujarat', 0.07556215673685074, ' Gujarat')] + +roberta.fill_mask('<mask> is the common currency of the European Union', topk=3) +# [('Euro is the common currency of the European Union', 0.9456493854522705, 'Euro'), ('euro is the common currency of the European Union', 0.025748178362846375, 'euro'), ('€ is the common currency of the European Union', 0.011183084920048714, '€')] +``` + +#### Pronoun disambiguation (Winograd Schema Challenge): + +RoBERTa can be used to disambiguate pronouns. First install spaCy and download the English-language model: +```bash +pip install spacy +python -m spacy download en_core_web_lg +``` + +Next load the `roberta.large.wsc` model and call the `disambiguate_pronoun` +function. The pronoun should be surrounded by square brackets (`[]`) and the +query referent surrounded by underscores (`_`), or left blank to return the +predicted candidate text directly: +```python +roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.wsc', user_dir='examples/roberta/wsc') +roberta.cuda() # use the GPU (optional) + +roberta.disambiguate_pronoun('The _trophy_ would not fit in the brown suitcase because [it] was too big.') +# True +roberta.disambiguate_pronoun('The trophy would not fit in the brown _suitcase_ because [it] was too big.') +# False + +roberta.disambiguate_pronoun('The city councilmen refused the demonstrators a permit because [they] feared violence.') +# 'The city councilmen' +roberta.disambiguate_pronoun('The city councilmen refused the demonstrators a permit because [they] advocated violence.') +# 'demonstrators' +``` + +See the [RoBERTA Winograd Schema Challenge (WSC) README](wsc/README.md) for more details on how to train this model. + +#### Extract features aligned to words: + +By default RoBERTa outputs one feature vector per BPE token. You can instead +realign the features to match [spaCy's word-level tokenization](https://spacy.io/usage/linguistic-features#tokenization) +with the `extract_features_aligned_to_words` method. This will compute a +weighted average of the BPE-level features for each word and expose them in +spaCy's `Token.vector` attribute: +```python +doc = roberta.extract_features_aligned_to_words('I said, "hello RoBERTa."') +assert len(doc) == 10 +for tok in doc: + print('{:10}{} (...)'.format(str(tok), tok.vector[:5])) +# <s> tensor([-0.1316, -0.0386, -0.0832, -0.0477, 0.1943], grad_fn=<SliceBackward>) (...) +# I tensor([ 0.0559, 0.1541, -0.4832, 0.0880, 0.0120], grad_fn=<SliceBackward>) (...) +# said tensor([-0.1565, -0.0069, -0.8915, 0.0501, -0.0647], grad_fn=<SliceBackward>) (...) +# , tensor([-0.1318, -0.0387, -0.0834, -0.0477, 0.1944], grad_fn=<SliceBackward>) (...) +# " tensor([-0.0486, 0.1818, -0.3946, -0.0553, 0.0981], grad_fn=<SliceBackward>) (...) +# hello tensor([ 0.0079, 0.1799, -0.6204, -0.0777, -0.0923], grad_fn=<SliceBackward>) (...) +# RoBERTa tensor([-0.2339, -0.1184, -0.7343, -0.0492, 0.5829], grad_fn=<SliceBackward>) (...) +# . tensor([-0.1341, -0.1203, -0.1012, -0.0621, 0.1892], grad_fn=<SliceBackward>) (...) +# " tensor([-0.1341, -0.1203, -0.1012, -0.0621, 0.1892], grad_fn=<SliceBackward>) (...) +# </s> tensor([-0.0930, -0.0392, -0.0821, 0.0158, 0.0649], grad_fn=<SliceBackward>) (...) +``` + +#### Evaluating the `roberta.large.mnli` model: + +Example python code snippet to evaluate accuracy on the MNLI `dev_matched` set. +```python +label_map = {0: 'contradiction', 1: 'neutral', 2: 'entailment'} +ncorrect, nsamples = 0, 0 +roberta.cuda() +roberta.eval() +with open('glue_data/MNLI/dev_matched.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[8], tokens[9], tokens[-1] + tokens = roberta.encode(sent1, sent2) + prediction = roberta.predict('mnli', tokens).argmax().item() + prediction_label = label_map[prediction] + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) +# Expected output: 0.9060 +``` + +## Finetuning + +- [Finetuning on GLUE](README.glue.md) +- [Finetuning on custom classification tasks (e.g., IMDB)](README.custom_classification.md) +- [Finetuning on Winograd Schema Challenge (WSC)](wsc/README.md) +- [Finetuning on Commonsense QA (CQA)](commonsense_qa/README.md) + +## Pretraining using your own data + +See the [tutorial for pretraining RoBERTa using your own data](README.pretraining.md). + +## Citation + +```bibtex +@article{liu2019roberta, + title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach}, + author = {Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and + Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and + Luke Zettlemoyer and Veselin Stoyanov}, + journal={arXiv preprint arXiv:1907.11692}, + year = {2019}, +} +``` diff --git a/fairseq/examples/roberta/README.pretraining.md b/fairseq/examples/roberta/README.pretraining.md new file mode 100644 index 0000000..a4e7453 --- /dev/null +++ b/fairseq/examples/roberta/README.pretraining.md @@ -0,0 +1,84 @@ +# Pretraining RoBERTa using your own data + +This tutorial will walk you through pretraining RoBERTa over your own data. + +### 1) Preprocess the data + +Data should be preprocessed following the [language modeling format](/examples/language_model), i.e. each document should be separated by an empty line (only useful with `--sample-break-mode complete_doc`). Lines will be concatenated as a 1D text stream during training. + +We'll use the [WikiText-103 dataset](https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/) +to demonstrate how to preprocess raw text data with the GPT-2 BPE. Of course +this dataset is quite small, so the resulting pretrained model will perform +poorly, but it gives the general idea. + +First download the dataset: +```bash +wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip +unzip wikitext-103-raw-v1.zip +``` + +Next encode it with the GPT-2 BPE: +```bash +mkdir -p gpt2_bpe +wget -O gpt2_bpe/encoder.json https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json +wget -O gpt2_bpe/vocab.bpe https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe +for SPLIT in train valid test; do \ + python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json gpt2_bpe/encoder.json \ + --vocab-bpe gpt2_bpe/vocab.bpe \ + --inputs wikitext-103-raw/wiki.${SPLIT}.raw \ + --outputs wikitext-103-raw/wiki.${SPLIT}.bpe \ + --keep-empty \ + --workers 60; \ +done +``` + +Finally preprocess/binarize the data using the GPT-2 fairseq dictionary: +```bash +wget -O gpt2_bpe/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt +fairseq-preprocess \ + --only-source \ + --srcdict gpt2_bpe/dict.txt \ + --trainpref wikitext-103-raw/wiki.train.bpe \ + --validpref wikitext-103-raw/wiki.valid.bpe \ + --testpref wikitext-103-raw/wiki.test.bpe \ + --destdir data-bin/wikitext-103 \ + --workers 60 +``` + +### 2) Train RoBERTa base +```bash +DATA_DIR=data-bin/wikitext-103 + +fairseq-hydra-train -m --config-dir examples/roberta/config/pretraining \ +--config-name base task.data=$DATA_DIR +``` + +**Note:** You can optionally resume training the released RoBERTa base model by +adding `checkpoint.restore_file=/path/to/roberta.base/model.pt`. + +**Note:** The above command assumes training on 8x32GB V100 GPUs. Each GPU uses +a batch size of 16 sequences (`dataset.batch_size`) and accumulates gradients to +further increase the batch size by 16x (`optimization.update_freq`), for a total batch size +of 2048 sequences. If you have fewer GPUs or GPUs with less memory you may need +to reduce `dataset.batch_size` and increase dataset.update_freq to compensate. +Alternatively if you have more GPUs you can decrease `dataset.update_freq` accordingly +to increase training speed. + +**Note:** The learning rate and batch size are tightly connected and need to be +adjusted together. We generally recommend increasing the learning rate as you +increase the batch size according to the following table (although it's also +dataset dependent, so don't rely on the following values too closely): + +batch size | peak learning rate +---|--- +256 | 0.0001 +2048 | 0.0005 +8192 | 0.0007 + +### 3) Load your pretrained model +```python +from fairseq.models.roberta import RobertaModel +roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'path/to/data') +assert isinstance(roberta.model, torch.nn.Module) +``` diff --git a/fairseq/examples/roberta/README.race.md b/fairseq/examples/roberta/README.race.md new file mode 100644 index 0000000..13c917e --- /dev/null +++ b/fairseq/examples/roberta/README.race.md @@ -0,0 +1,68 @@ +# Finetuning RoBERTa on RACE tasks + +### 1) Download the data from RACE website (http://www.cs.cmu.edu/~glai1/data/race/) + +### 2) Preprocess RACE data: +```bash +python ./examples/roberta/preprocess_RACE.py --input-dir <input-dir> --output-dir <extracted-data-dir> +./examples/roberta/preprocess_RACE.sh <extracted-data-dir> <output-dir> +``` + +### 3) Fine-tuning on RACE: + +```bash +MAX_EPOCH=5 # Number of training epochs. +LR=1e-05 # Peak LR for fixed LR scheduler. +NUM_CLASSES=4 +MAX_SENTENCES=1 # Batch size per GPU. +UPDATE_FREQ=8 # Accumulate gradients to simulate training on 8 GPUs. +DATA_DIR=/path/to/race-output-dir +ROBERTA_PATH=/path/to/roberta/model.pt + +CUDA_VISIBLE_DEVICES=0,1 fairseq-train $DATA_DIR --ddp-backend=legacy_ddp \ + --restore-file $ROBERTA_PATH \ + --reset-optimizer --reset-dataloader --reset-meters \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --task sentence_ranking \ + --num-classes $NUM_CLASSES \ + --init-token 0 --separator-token 2 \ + --max-option-length 128 \ + --max-positions 512 \ + --shorten-method "truncate" \ + --arch roberta_large \ + --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \ + --criterion sentence_ranking \ + --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler fixed --lr $LR \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --batch-size $MAX_SENTENCES \ + --required-batch-size-multiple 1 \ + --update-freq $UPDATE_FREQ \ + --max-epoch $MAX_EPOCH +``` + +**Note:** + +a) As contexts in RACE are relatively long, we are using smaller batch size per GPU while increasing update-freq to achieve larger effective batch size. + +b) Above cmd-args and hyperparams are tested on one Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`. + +c) The setting in above command is based on our hyperparam search within a fixed search space (for careful comparison across models). You might be able to find better metrics with wider hyperparam search. + +### 4) Evaluation: + +``` +DATA_DIR=/path/to/race-output-dir # data directory used during training +MODEL_PATH=/path/to/checkpoint_best.pt # path to the finetuned model checkpoint +PREDS_OUT=preds.tsv # output file path to save prediction +TEST_SPLIT=test # can be test (Middle) or test1 (High) +fairseq-validate \ + $DATA_DIR \ + --valid-subset $TEST_SPLIT \ + --path $MODEL_PATH \ + --batch-size 1 \ + --task sentence_ranking \ + --criterion sentence_ranking \ + --save-predictions $PREDS_OUT +``` diff --git a/fairseq/examples/roberta/commonsense_qa/README.md b/fairseq/examples/roberta/commonsense_qa/README.md new file mode 100644 index 0000000..7f386de --- /dev/null +++ b/fairseq/examples/roberta/commonsense_qa/README.md @@ -0,0 +1,99 @@ +# Finetuning RoBERTa on Commonsense QA + +We follow a similar approach to [finetuning RACE](../README.race.md). Specifically +for each question we construct five inputs, one for each of the five candidate +answer choices. Each input is constructed by concatenating the question and +candidate answer. We then encode each input and pass the resulting "[CLS]" +representations through a fully-connected layer to predict the correct answer. +We train with a standard cross-entropy loss. + +We also found it helpful to prepend a prefix of `Q:` to the question and `A:` to +the answer. The complete input format is: +``` +<s> Q: Where would I not want a fox? </s> A: hen house </s> +``` + +Our final submission is based on a hyperparameter search over the learning rate +(1e-5, 2e-5, 3e-5), batch size (8, 16), number of training steps (2000, 3000, +4000) and random seed. We selected the model with the best performance on the +development set after 100 trials. + +### 1) Download data from the Commonsense QA website (https://www.tau-nlp.org/commonsenseqa) +```bash +bash examples/roberta/commonsense_qa/download_cqa_data.sh +``` + +### 2) Finetune + +```bash +MAX_UPDATES=3000 # Number of training steps. +WARMUP_UPDATES=150 # Linearly increase LR over this many steps. +LR=1e-05 # Peak LR for polynomial LR scheduler. +MAX_SENTENCES=16 # Batch size. +SEED=1 # Random seed. +ROBERTA_PATH=/path/to/roberta/model.pt +DATA_DIR=data/CommonsenseQA + +# we use the --user-dir option to load the task from +# the examples/roberta/commonsense_qa directory: +FAIRSEQ_PATH=/path/to/fairseq +FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/commonsense_qa + +CUDA_VISIBLE_DEVICES=0 fairseq-train --fp16 --ddp-backend=legacy_ddp \ + $DATA_DIR \ + --user-dir $FAIRSEQ_USER_DIR \ + --restore-file $ROBERTA_PATH \ + --reset-optimizer --reset-dataloader --reset-meters \ + --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --task commonsense_qa --init-token 0 --bpe gpt2 \ + --arch roberta_large --max-positions 512 \ + --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \ + --criterion sentence_ranking --num-classes 5 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR \ + --warmup-updates $WARMUP_UPDATES --total-num-update $MAX_UPDATES \ + --batch-size $MAX_SENTENCES \ + --max-update $MAX_UPDATES \ + --log-format simple --log-interval 25 \ + --seed $SEED +``` + +The above command assumes training on 1 GPU with 32GB of RAM. For GPUs with +less memory, decrease `--batch-size` and increase `--update-freq` +accordingly to compensate. + +### 3) Evaluate +```python +import json +import torch +from fairseq.models.roberta import RobertaModel +from examples.roberta import commonsense_qa # load the Commonsense QA task +roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'data/CommonsenseQA') +roberta.eval() # disable dropout +roberta.cuda() # use the GPU (optional) +nsamples, ncorrect = 0, 0 +with open('data/CommonsenseQA/valid.jsonl') as h: + for line in h: + example = json.loads(line) + scores = [] + for choice in example['question']['choices']: + input = roberta.encode( + 'Q: ' + example['question']['stem'], + 'A: ' + choice['text'], + no_separator=True + ) + score = roberta.predict('sentence_classification_head', input, return_logits=True) + scores.append(score) + pred = torch.cat(scores).argmax() + answer = ord(example['answerKey']) - ord('A') + nsamples += 1 + if pred == answer: + ncorrect += 1 + +print('Accuracy: ' + str(ncorrect / float(nsamples))) +# Accuracy: 0.7846027846027847 +``` + +The above snippet is not batched, which makes it quite slow. See [instructions +for batched prediction with RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta#batched-prediction). diff --git a/fairseq/examples/roberta/commonsense_qa/__init__.py b/fairseq/examples/roberta/commonsense_qa/__init__.py new file mode 100644 index 0000000..42d21f3 --- /dev/null +++ b/fairseq/examples/roberta/commonsense_qa/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import commonsense_qa_task # noqa diff --git a/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py b/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py new file mode 100644 index 0000000..7d8f813 --- /dev/null +++ b/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py @@ -0,0 +1,190 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os + +import numpy as np +import torch +from fairseq.data import ( + Dictionary, + IdDataset, + ListDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + RawLabelDataset, + RightPadDataset, + SortDataset, + data_utils, + encoders, +) +from fairseq.tasks import LegacyFairseqTask, register_task + + +@register_task("commonsense_qa") +class CommonsenseQATask(LegacyFairseqTask): + """Task to finetune RoBERTa for Commonsense QA.""" + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "data", metavar="DIR", help="path to data directory; we load <split>.jsonl" + ) + parser.add_argument( + "--init-token", + type=int, + default=None, + help="add token at the beginning of each batch item", + ) + parser.add_argument("--num-classes", type=int, default=5) + + def __init__(self, args, vocab): + super().__init__(args) + self.vocab = vocab + self.mask = vocab.add_symbol("<mask>") + + self.bpe = encoders.build_bpe(args) + + @classmethod + def load_dictionary(cls, filename): + """Load the dictionary from the filename + + Args: + filename (str): the filename + """ + dictionary = Dictionary.load(filename) + dictionary.add_symbol("<mask>") + return dictionary + + @classmethod + def setup_task(cls, args, **kwargs): + assert ( + args.criterion == "sentence_ranking" + ), "Must set --criterion=sentence_ranking" + + # load data and label dictionaries + vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt")) + print("| dictionary: {} types".format(len(vocab))) + + return cls(args, vocab) + + def load_dataset( + self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs + ): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + + def binarize(s, append_bos=False): + if self.bpe is not None: + s = self.bpe.encode(s) + tokens = self.vocab.encode_line( + s, + append_eos=True, + add_if_not_exist=False, + ).long() + if append_bos and self.args.init_token is not None: + tokens = torch.cat([tokens.new([self.args.init_token]), tokens]) + return tokens + + if data_path is None: + data_path = os.path.join(self.args.data, split + ".jsonl") + if not os.path.exists(data_path): + raise FileNotFoundError("Cannot find data: {}".format(data_path)) + + src_tokens = [[] for i in range(self.args.num_classes)] + src_lengths = [[] for i in range(self.args.num_classes)] + labels = [] + + with open(data_path) as h: + for line in h: + example = json.loads(line.strip()) + if "answerKey" in example: + label = ord(example["answerKey"]) - ord("A") + labels.append(label) + question = example["question"]["stem"] + assert len(example["question"]["choices"]) == self.args.num_classes + # format: `<s> Q: Where would I not want a fox? </s> A: hen house </s>` + question = "Q: " + question + question_toks = binarize(question, append_bos=True) + for i, choice in enumerate(example["question"]["choices"]): + src = "A: " + choice["text"] + src_bin = torch.cat([question_toks, binarize(src)]) + src_tokens[i].append(src_bin) + src_lengths[i].append(len(src_bin)) + assert all( + len(src_tokens[0]) == len(src_tokens[i]) + for i in range(self.args.num_classes) + ) + assert len(src_tokens[0]) == len(src_lengths[0]) + assert len(labels) == 0 or len(labels) == len(src_tokens[0]) + + for i in range(self.args.num_classes): + src_lengths[i] = np.array(src_lengths[i]) + src_tokens[i] = ListDataset(src_tokens[i], src_lengths[i]) + src_lengths[i] = ListDataset(src_lengths[i]) + + dataset = { + "id": IdDataset(), + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_tokens[0], reduce=True), + } + + for i in range(self.args.num_classes): + dataset.update( + { + "net_input{}".format(i + 1): { + "src_tokens": RightPadDataset( + src_tokens[i], + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": src_lengths[i], + } + } + ) + + if len(labels) > 0: + dataset.update({"target": RawLabelDataset(labels)}) + + dataset = NestedDictionaryDataset( + dataset, + sizes=[np.maximum.reduce([src_token.sizes for src_token in src_tokens])], + ) + + with data_utils.numpy_seed(self.args.seed): + dataset = SortDataset( + dataset, + # shuffle + sort_order=[np.random.permutation(len(dataset))], + ) + + print("| Loaded {} with {} samples".format(split, len(dataset))) + + self.datasets[split] = dataset + return self.datasets[split] + + def build_model(self, args, from_checkpoint=False): + from fairseq import models + + model = models.build_model(args, self) + + model.register_classification_head( + "sentence_classification_head", + num_classes=1, + ) + + return model + + @property + def source_dictionary(self): + return self.vocab + + @property + def target_dictionary(self): + return self.vocab diff --git a/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh b/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh new file mode 100644 index 0000000..5f30009 --- /dev/null +++ b/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +OUTDIR=data/CommonsenseQA + +mkdir -p $OUTDIR + +wget -O $OUTDIR/train.jsonl https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl +wget -O $OUTDIR/valid.jsonl https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl +wget -O $OUTDIR/test.jsonl https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl +wget -O $OUTDIR/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt diff --git a/fairseq/examples/roberta/config/finetuning/cola.yaml b/fairseq/examples/roberta/config/finetuning/cola.yaml new file mode 100644 index 0000000..ac76611 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/cola.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 320 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 5336 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/mnli.yaml b/fairseq/examples/roberta/config/finetuning/mnli.yaml new file mode 100644 index 0000000..5be10c3 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/mnli.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 3 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 7432 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 123873 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/mrpc.yaml b/fairseq/examples/roberta/config/finetuning/mrpc.yaml new file mode 100644 index 0000000..aa8b7db --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/mrpc.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 137 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 2296 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/qnli.yaml b/fairseq/examples/roberta/config/finetuning/qnli.yaml new file mode 100644 index 0000000..b4595b0 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/qnli.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 1986 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 33112 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/qqp.yaml b/fairseq/examples/roberta/config/finetuning/qqp.yaml new file mode 100644 index 0000000..5a2b2ed --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/qqp.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 28318 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 113272 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/rte.yaml b/fairseq/examples/roberta/config/finetuning/rte.yaml new file mode 100644 index 0000000..7318465 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/rte.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 122 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 2036 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/run_config/local.yaml b/fairseq/examples/roberta/config/finetuning/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g.yaml b/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g.yaml new file mode 100644 index 0000000..8bc2185 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g.yaml @@ -0,0 +1,28 @@ + +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/roberta_ft/${env:PREFIX}/${hydra.job.config_name}/${env:SUFFIX} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 1000 + cpus_per_task: 8 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 60 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 + exclude: learnfair1381,learnfair5192,learnfair2304 diff --git a/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g_aws.yaml b/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g_aws.yaml new file mode 100644 index 0000000..085391c --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/run_config/slurm_1g_aws.yaml @@ -0,0 +1,25 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: '_' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /fsx-wav2vec/${env:USER}/roberta_ft/${env:PREFIX}/${hydra.job.config_name}/${env:SUFFIX} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir}/submitit + timeout_min: 1000 + cpus_per_task: 8 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: learnfair,wav2vec + max_num_timeout: 30 diff --git a/fairseq/examples/roberta/config/finetuning/sst_2.yaml b/fairseq/examples/roberta/config/finetuning/sst_2.yaml new file mode 100644 index 0000000..a93ad2f --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/sst_2.yaml @@ -0,0 +1,59 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 2 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + +dataset: + batch_size: 32 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 1256 + +optimization: + clip_norm: 0.0 + lr: [1e-05] + max_update: 20935 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/finetuning/sts_b.yaml b/fairseq/examples/roberta/config/finetuning/sts_b.yaml new file mode 100644 index 0000000..2d49522 --- /dev/null +++ b/fairseq/examples/roberta/config/finetuning/sts_b.yaml @@ -0,0 +1,58 @@ +# @package _group_ + +common: + fp16: true + fp16_init_scale: 4 + threshold_loss_scale: 1 + fp16_scale_window: 128 + log_format: json + log_interval: 200 + +task: + _name: sentence_prediction + data: ??? + init_token: 0 + separator_token: 2 + num_classes: 1 + max_positions: 512 + +checkpoint: + restore_file: ??? + reset_optimizer: true + reset_dataloader: true + reset_meters: true + no_epoch_checkpoints: true + +distributed_training: + find_unused_parameters: true + distributed_world_size: 1 + +criterion: + _name: sentence_prediction + regression_target: true + +dataset: + batch_size: 16 + required_batch_size_multiple: 1 + max_tokens: 4400 + +optimizer: + _name: adam + weight_decay: 0.1 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 214 + +optimization: + clip_norm: 0.0 + lr: [2e-05] + max_update: 3598 + max_epoch: 10 + +model: + _name: roberta + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/pretraining/base.yaml b/fairseq/examples/roberta/config/pretraining/base.yaml new file mode 100644 index 0000000..9782990 --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/base.yaml @@ -0,0 +1,42 @@ +# @package _group_ +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + no_epoch_checkpoints: true + +task: + _name: masked_lm + data: ??? + sample_break_mode: complete + tokens_per_sample: 512 + +criterion: masked_lm + +dataset: + batch_size: 16 + ignore_unused_valid_subsets: true + +optimizer: + _name: adam + weight_decay: 0.01 + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 10000 + +optimization: + clip_norm: 0 + lr: [0.0005] + max_update: 125000 + update_freq: [16] + +model: + _name: roberta + max_positions: 512 + dropout: 0.1 + attention_dropout: 0.1 diff --git a/fairseq/examples/roberta/config/pretraining/run_config/local.yaml b/fairseq/examples/roberta/config/pretraining/run_config/local.yaml new file mode 100644 index 0000000..45595f9 --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/run_config/local.yaml @@ -0,0 +1,15 @@ +# @package _global_ +hydra: + sweep: + dir: ${env:PWD}/tmp_dbg/${now:%H-%M-%S} + +distributed_training: + distributed_world_size: 1 + nprocs_per_node: 1 + distributed_port: -1 + +common: + log_interval: 1 + +dataset: + num_workers: 0 diff --git a/fairseq/examples/roberta/config/pretraining/run_config/slurm_2.yaml b/fairseq/examples/roberta/config/pretraining/run_config/slurm_2.yaml new file mode 100644 index 0000000..006a0f2 --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/run_config/slurm_2.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/roberta/config/pretraining/run_config/slurm_2_aws.yaml b/fairseq/examples/roberta/config/pretraining/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..a5937ea --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/run_config/slurm_2_aws.yaml @@ -0,0 +1,39 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - task.post_save_script + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + - model.model_path + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec + max_num_timeout: 30 diff --git a/fairseq/examples/roberta/config/pretraining/run_config/slurm_3.yaml b/fairseq/examples/roberta/config/pretraining/run_config/slurm_3.yaml new file mode 100644 index 0000000..0e1555d --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/run_config/slurm_3.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/roberta/config/pretraining/run_config/slurm_4.yaml b/fairseq/examples/roberta/config/pretraining/run_config/slurm_4.yaml new file mode 100644 index 0000000..c54d735 --- /dev/null +++ b/fairseq/examples/roberta/config/pretraining/run_config/slurm_4.yaml @@ -0,0 +1,36 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 4 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb,ib4 + max_num_timeout: 30 diff --git a/fairseq/examples/roberta/fb_multilingual/README.multilingual.pretraining.md b/fairseq/examples/roberta/fb_multilingual/README.multilingual.pretraining.md new file mode 100644 index 0000000..234fd74 --- /dev/null +++ b/fairseq/examples/roberta/fb_multilingual/README.multilingual.pretraining.md @@ -0,0 +1,26 @@ +# Multilingual pretraining RoBERTa + +This tutorial will walk you through pretraining multilingual RoBERTa. + +### 1) Preprocess the data + +```bash +DICTIONARY="/private/home/namangoyal/dataset/XLM/wiki/17/175k/vocab" +DATA_LOCATION="/private/home/namangoyal/dataset/XLM/wiki/17/175k" + +for LANG in en es it +do + fairseq-preprocess \ + --only-source \ + --srcdict $DICTIONARY \ + --trainpref "$DATA_LOCATION/train.$LANG" \ + --validpref "$DATA_LOCATION/valid.$LANG" \ + --testpref "$DATA_LOCATION/test.$LANG" \ + --destdir "wiki_17-bin/$LANG" \ + --workers 60; +done +``` + +### 2) Train RoBERTa base + +[COMING UP...] diff --git a/fairseq/examples/roberta/multiprocessing_bpe_encoder.py b/fairseq/examples/roberta/multiprocessing_bpe_encoder.py new file mode 100644 index 0000000..43fe045 --- /dev/null +++ b/fairseq/examples/roberta/multiprocessing_bpe_encoder.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import contextlib +import sys +from collections import Counter +from multiprocessing import Pool + +from fairseq.data.encoders.gpt2_bpe import get_encoder + + +def main(): + """ + Helper script to encode raw text with the GPT-2 BPE using multiple processes. + + The encoder.json and vocab.bpe files can be obtained here: + - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json + - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--encoder-json", + help="path to encoder.json", + ) + parser.add_argument( + "--vocab-bpe", + type=str, + help="path to vocab.bpe", + ) + parser.add_argument( + "--inputs", + nargs="+", + default=["-"], + help="input files to filter/encode", + ) + parser.add_argument( + "--outputs", + nargs="+", + default=["-"], + help="path to save encoded outputs", + ) + parser.add_argument( + "--keep-empty", + action="store_true", + help="keep empty lines", + ) + parser.add_argument("--workers", type=int, default=20) + args = parser.parse_args() + + assert len(args.inputs) == len( + args.outputs + ), "number of input and output paths should match" + + with contextlib.ExitStack() as stack: + inputs = [ + stack.enter_context(open(input, "r", encoding="utf-8")) + if input != "-" + else sys.stdin + for input in args.inputs + ] + outputs = [ + stack.enter_context(open(output, "w", encoding="utf-8")) + if output != "-" + else sys.stdout + for output in args.outputs + ] + + encoder = MultiprocessingEncoder(args) + pool = Pool(args.workers, initializer=encoder.initializer) + encoded_lines = pool.imap(encoder.encode_lines, zip(*inputs), 100) + + stats = Counter() + for i, (filt, enc_lines) in enumerate(encoded_lines, start=1): + if filt == "PASS": + for enc_line, output_h in zip(enc_lines, outputs): + print(enc_line, file=output_h) + else: + stats["num_filtered_" + filt] += 1 + if i % 10000 == 0: + print("processed {} lines".format(i), file=sys.stderr) + + for k, v in stats.most_common(): + print("[{}] filtered {} lines".format(k, v), file=sys.stderr) + + +class MultiprocessingEncoder(object): + def __init__(self, args): + self.args = args + + def initializer(self): + global bpe + bpe = get_encoder(self.args.encoder_json, self.args.vocab_bpe) + + def encode(self, line): + global bpe + ids = bpe.encode(line) + return list(map(str, ids)) + + def decode(self, tokens): + global bpe + return bpe.decode(tokens) + + def encode_lines(self, lines): + """ + Encode a set of lines. All lines will be encoded together. + """ + enc_lines = [] + for line in lines: + line = line.strip() + if len(line) == 0 and not self.args.keep_empty: + return ["EMPTY", None] + tokens = self.encode(line) + enc_lines.append(" ".join(tokens)) + return ["PASS", enc_lines] + + def decode_lines(self, lines): + dec_lines = [] + for line in lines: + tokens = map(int, line.strip().split()) + dec_lines.append(self.decode(tokens)) + return ["PASS", dec_lines] + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/roberta/preprocess_GLUE_tasks.sh b/fairseq/examples/roberta/preprocess_GLUE_tasks.sh new file mode 100644 index 0000000..7f215a3 --- /dev/null +++ b/fairseq/examples/roberta/preprocess_GLUE_tasks.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +# raw glue data as downloaded by glue download script (https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e) +if [[ $# -ne 2 ]]; then + echo "Run as following:" + echo "./examples/roberta/preprocess_GLUE_tasks.sh <glud_data_folder> <task_name>" + exit 1 +fi + +GLUE_DATA_FOLDER=$1 + +# download bpe encoder.json, vocabulary and fairseq dictionary +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt' + +TASKS=$2 # QQP + +if [ "$TASKS" = "ALL" ] +then + TASKS="QQP MNLI QNLI MRPC RTE STS-B SST-2 CoLA" +fi + +for TASK in $TASKS +do + echo "Preprocessing $TASK" + + TASK_DATA_FOLDER="$GLUE_DATA_FOLDER/$TASK" + echo "Raw data as downloaded from glue website: $TASK_DATA_FOLDER" + + SPLITS="train dev test" + INPUT_COUNT=2 + if [ "$TASK" = "QQP" ] + then + INPUT_COLUMNS=( 4 5 ) + TEST_INPUT_COLUMNS=( 2 3 ) + LABEL_COLUMN=6 + elif [ "$TASK" = "MNLI" ] + then + SPLITS="train dev_matched dev_mismatched test_matched test_mismatched" + INPUT_COLUMNS=( 9 10 ) + TEST_INPUT_COLUMNS=( 9 10 ) + DEV_LABEL_COLUMN=16 + LABEL_COLUMN=12 + elif [ "$TASK" = "QNLI" ] + then + INPUT_COLUMNS=( 2 3 ) + TEST_INPUT_COLUMNS=( 2 3 ) + LABEL_COLUMN=4 + elif [ "$TASK" = "MRPC" ] + then + INPUT_COLUMNS=( 4 5 ) + TEST_INPUT_COLUMNS=( 4 5 ) + LABEL_COLUMN=1 + elif [ "$TASK" = "RTE" ] + then + INPUT_COLUMNS=( 2 3 ) + TEST_INPUT_COLUMNS=( 2 3 ) + LABEL_COLUMN=4 + elif [ "$TASK" = "STS-B" ] + then + INPUT_COLUMNS=( 8 9 ) + TEST_INPUT_COLUMNS=( 8 9 ) + LABEL_COLUMN=10 + # Following are single sentence tasks. + elif [ "$TASK" = "SST-2" ] + then + INPUT_COLUMNS=( 1 ) + TEST_INPUT_COLUMNS=( 2 ) + LABEL_COLUMN=2 + INPUT_COUNT=1 + elif [ "$TASK" = "CoLA" ] + then + INPUT_COLUMNS=( 4 ) + TEST_INPUT_COLUMNS=( 2 ) + LABEL_COLUMN=2 + INPUT_COUNT=1 + fi + + # Strip out header and filter lines that don't have expected number of fields. + rm -rf "$TASK_DATA_FOLDER/processed" + mkdir -p "$TASK_DATA_FOLDER/processed" + for SPLIT in $SPLITS + do + # CoLA train and dev doesn't have header. + if [[ ( "$TASK" = "CoLA") && ( "$SPLIT" != "test" ) ]] + then + cp "$TASK_DATA_FOLDER/$SPLIT.tsv" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp"; + else + tail -n +2 "$TASK_DATA_FOLDER/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp"; + fi + + # Remove unformatted lines from train and dev files for QQP dataset. + if [[ ( "$TASK" = "QQP") && ( "$SPLIT" != "test" ) ]] + then + awk -F '\t' -v NUM_FIELDS=6 'NF==NUM_FIELDS{print}{}' "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp" > "$TASK_DATA_FOLDER/processed/$SPLIT.tsv"; + else + cp "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv"; + fi + rm "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp"; + done + + # Split into input0, input1 and label + for SPLIT in $SPLITS + do + for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1))) + do + if [[ "$SPLIT" != test* ]] + then + COLUMN_NUMBER=${INPUT_COLUMNS[$INPUT_TYPE]} + else + COLUMN_NUMBER=${TEST_INPUT_COLUMNS[$INPUT_TYPE]} + fi + cut -f"$COLUMN_NUMBER" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.raw.input$INPUT_TYPE"; + done + + if [[ "$SPLIT" != test* ]] + then + if [ "$TASK" = "MNLI" ] && [ "$SPLIT" != "train" ] + then + cut -f"$DEV_LABEL_COLUMN" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.label"; + else + cut -f"$LABEL_COLUMN" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.label"; + fi + fi + + # BPE encode. + for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1))) + do + LANG="input$INPUT_TYPE" + echo "BPE encoding $SPLIT/$LANG" + python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json encoder.json \ + --vocab-bpe vocab.bpe \ + --inputs "$TASK_DATA_FOLDER/processed/$SPLIT.raw.$LANG" \ + --outputs "$TASK_DATA_FOLDER/processed/$SPLIT.$LANG" \ + --workers 60 \ + --keep-empty; + done + done + + # Remove output directory. + rm -rf "$TASK-bin" + + DEVPREF="$TASK_DATA_FOLDER/processed/dev.LANG" + TESTPREF="$TASK_DATA_FOLDER/processed/test.LANG" + if [ "$TASK" = "MNLI" ] + then + DEVPREF="$TASK_DATA_FOLDER/processed/dev_matched.LANG,$TASK_DATA_FOLDER/processed/dev_mismatched.LANG" + TESTPREF="$TASK_DATA_FOLDER/processed/test_matched.LANG,$TASK_DATA_FOLDER/processed/test_mismatched.LANG" + fi + + # Run fairseq preprocessing: + for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1))) + do + LANG="input$INPUT_TYPE" + fairseq-preprocess \ + --only-source \ + --trainpref "$TASK_DATA_FOLDER/processed/train.$LANG" \ + --validpref "${DEVPREF//LANG/$LANG}" \ + --testpref "${TESTPREF//LANG/$LANG}" \ + --destdir "$TASK-bin/$LANG" \ + --workers 60 \ + --srcdict dict.txt; + done + if [[ "$TASK" != "STS-B" ]] + then + fairseq-preprocess \ + --only-source \ + --trainpref "$TASK_DATA_FOLDER/processed/train.label" \ + --validpref "${DEVPREF//LANG/label}" \ + --destdir "$TASK-bin/label" \ + --workers 60; + else + # For STS-B output range is converted to be between: [0.0, 1.0] + mkdir -p "$TASK-bin/label" + awk '{print $1 / 5.0 }' "$TASK_DATA_FOLDER/processed/train.label" > "$TASK-bin/label/train.label" + awk '{print $1 / 5.0 }' "$TASK_DATA_FOLDER/processed/dev.label" > "$TASK-bin/label/valid.label" + fi +done diff --git a/fairseq/examples/roberta/preprocess_RACE.py b/fairseq/examples/roberta/preprocess_RACE.py new file mode 100644 index 0000000..cdd6607 --- /dev/null +++ b/fairseq/examples/roberta/preprocess_RACE.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import json +import os +import re + + +class InputExample: + def __init__(self, paragraph, qa_list, label): + self.paragraph = paragraph + self.qa_list = qa_list + self.label = label + + +def get_examples(data_dir, set_type): + """ + Extract paragraph and question-answer list from each json file + """ + examples = [] + + levels = ["middle", "high"] + set_type_c = set_type.split("-") + if len(set_type_c) == 2: + levels = [set_type_c[1]] + set_type = set_type_c[0] + for level in levels: + cur_dir = os.path.join(data_dir, set_type, level) + for filename in os.listdir(cur_dir): + cur_path = os.path.join(cur_dir, filename) + with open(cur_path, "r") as f: + cur_data = json.load(f) + answers = cur_data["answers"] + options = cur_data["options"] + questions = cur_data["questions"] + context = cur_data["article"].replace("\n", " ") + context = re.sub(r"\s+", " ", context) + for i in range(len(answers)): + label = ord(answers[i]) - ord("A") + qa_list = [] + question = questions[i] + for j in range(4): + option = options[i][j] + if "_" in question: + qa_cat = question.replace("_", option) + else: + qa_cat = " ".join([question, option]) + qa_cat = re.sub(r"\s+", " ", qa_cat) + qa_list.append(qa_cat) + examples.append(InputExample(context, qa_list, label)) + + return examples + + +def main(): + """ + Helper script to extract paragraphs questions and answers from RACE datasets. + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--input-dir", + help="input directory for downloaded RACE dataset", + ) + parser.add_argument( + "--output-dir", + help="output directory for extracted data", + ) + args = parser.parse_args() + + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir, exist_ok=True) + + for set_type in ["train", "dev", "test-middle", "test-high"]: + examples = get_examples(args.input_dir, set_type) + qa_file_paths = [ + os.path.join(args.output_dir, set_type + ".input" + str(i + 1)) + for i in range(4) + ] + qa_files = [open(qa_file_path, "w") for qa_file_path in qa_file_paths] + outf_context_path = os.path.join(args.output_dir, set_type + ".input0") + outf_label_path = os.path.join(args.output_dir, set_type + ".label") + outf_context = open(outf_context_path, "w") + outf_label = open(outf_label_path, "w") + for example in examples: + outf_context.write(example.paragraph + "\n") + for i in range(4): + qa_files[i].write(example.qa_list[i] + "\n") + outf_label.write(str(example.label) + "\n") + + for f in qa_files: + f.close() + outf_label.close() + outf_context.close() + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/roberta/preprocess_RACE.sh b/fairseq/examples/roberta/preprocess_RACE.sh new file mode 100644 index 0000000..932d2ab --- /dev/null +++ b/fairseq/examples/roberta/preprocess_RACE.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +# data should be downloaded and processed with reprocess_RACE.py +if [[ $# -ne 2 ]]; then + echo "Run as following:" + echo "./examples/roberta/preprocess_RACE.sh <race_data_folder> <output_folder>" + exit 1 +fi + +RACE_DATA_FOLDER=$1 +OUT_DATA_FOLDER=$2 + +# download bpe encoder.json, vocabulary and fairseq dictionary +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt' + +SPLITS="train dev test-middle test-high" +INPUT_TYPES="input0 input1 input2 input3 input4" +for INPUT_TYPE in $INPUT_TYPES +do + for SPLIT in $SPLITS + do + echo "BPE encoding $SPLIT/$INPUT_TYPE" + python -m examples.roberta.multiprocessing_bpe_encoder \ + --encoder-json encoder.json \ + --vocab-bpe vocab.bpe \ + --inputs "$RACE_DATA_FOLDER/$SPLIT.$INPUT_TYPE" \ + --outputs "$RACE_DATA_FOLDER/$SPLIT.$INPUT_TYPE.bpe" \ + --workers 10 \ + --keep-empty; + + done +done + +for INPUT_TYPE in $INPUT_TYPES + do + LANG="input$INPUT_TYPE" + fairseq-preprocess \ + --only-source \ + --trainpref "$RACE_DATA_FOLDER/train.$INPUT_TYPE.bpe" \ + --validpref "$RACE_DATA_FOLDER/dev.$INPUT_TYPE.bpe" \ + --testpref "$RACE_DATA_FOLDER/test-middle.$INPUT_TYPE.bpe,$RACE_DATA_FOLDER/test-high.$INPUT_TYPE.bpe" \ + --destdir "$OUT_DATA_FOLDER/$INPUT_TYPE" \ + --workers 10 \ + --srcdict dict.txt; +done + +rm -rf "$OUT_DATA_FOLDER/label" +mkdir -p "$OUT_DATA_FOLDER/label" +cp "$RACE_DATA_FOLDER/train.label" "$OUT_DATA_FOLDER/label/" +cp "$RACE_DATA_FOLDER/dev.label" "$OUT_DATA_FOLDER/label/valid.label" +cp "$RACE_DATA_FOLDER/test-middle.label" "$OUT_DATA_FOLDER/label/test.label" +cp "$RACE_DATA_FOLDER/test-high.label" "$OUT_DATA_FOLDER/label/test1.label" diff --git a/fairseq/examples/roberta/wsc/README.md b/fairseq/examples/roberta/wsc/README.md new file mode 100644 index 0000000..21a045d --- /dev/null +++ b/fairseq/examples/roberta/wsc/README.md @@ -0,0 +1,125 @@ +# Finetuning RoBERTa on Winograd Schema Challenge (WSC) data + +The following instructions can be used to finetune RoBERTa on the WSC training +data provided by [SuperGLUE](https://super.gluebenchmark.com/). + +Note that there is high variance in the results. For our GLUE/SuperGLUE +submission we swept over the learning rate (1e-5, 2e-5, 3e-5), batch size (16, +32, 64) and total number of updates (500, 1000, 2000, 3000), as well as the +random seed. Out of ~100 runs we chose the best 7 models and ensembled them. + +**Approach:** The instructions below use a slightly different loss function than +what's described in the original RoBERTa arXiv paper. In particular, +[Kocijan et al. (2019)](https://arxiv.org/abs/1905.06290) introduce a margin +ranking loss between `(query, candidate)` pairs with tunable hyperparameters +alpha and beta. This is supported in our code as well with the `--wsc-alpha` and +`--wsc-beta` arguments. However, we achieved slightly better (and more robust) +results on the development set by instead using a single cross entropy loss term +over the log-probabilities for the query and all mined candidates. **The +candidates are mined using spaCy from each input sentence in isolation, so the +approach remains strictly pointwise.** This reduces the number of +hyperparameters and our best model achieved 92.3% development set accuracy, +compared to ~90% accuracy for the margin loss. Later versions of the RoBERTa +arXiv paper will describe this updated formulation. + +### 1) Download the WSC data from the SuperGLUE website: +```bash +wget https://dl.fbaipublicfiles.com/glue/superglue/data/v2/WSC.zip +unzip WSC.zip + +# we also need to copy the RoBERTa dictionary into the same directory +wget -O WSC/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt +``` + +### 2) Finetune over the provided training data: +```bash +TOTAL_NUM_UPDATES=2000 # Total number of training steps. +WARMUP_UPDATES=250 # Linearly increase LR over this many steps. +LR=2e-05 # Peak LR for polynomial LR scheduler. +MAX_SENTENCES=16 # Batch size per GPU. +SEED=1 # Random seed. +ROBERTA_PATH=/path/to/roberta/model.pt + +# we use the --user-dir option to load the task and criterion +# from the examples/roberta/wsc directory: +FAIRSEQ_PATH=/path/to/fairseq +FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/wsc + +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train WSC/ \ + --restore-file $ROBERTA_PATH \ + --reset-optimizer --reset-dataloader --reset-meters \ + --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --valid-subset val \ + --fp16 --ddp-backend legacy_ddp \ + --user-dir $FAIRSEQ_USER_DIR \ + --task wsc --criterion wsc --wsc-cross-entropy \ + --arch roberta_large --bpe gpt2 --max-positions 512 \ + --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \ + --lr-scheduler polynomial_decay --lr $LR \ + --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_NUM_UPDATES \ + --batch-size $MAX_SENTENCES \ + --max-update $TOTAL_NUM_UPDATES \ + --log-format simple --log-interval 100 \ + --seed $SEED +``` + +The above command assumes training on 4 GPUs, but you can achieve the same +results on a single GPU by adding `--update-freq=4`. + +### 3) Evaluate +```python +from fairseq.models.roberta import RobertaModel +from examples.roberta.wsc import wsc_utils # also loads WSC task and criterion +roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'WSC/') +roberta.cuda() +nsamples, ncorrect = 0, 0 +for sentence, label in wsc_utils.jsonl_iterator('WSC/val.jsonl', eval=True): + pred = roberta.disambiguate_pronoun(sentence) + nsamples += 1 + if pred == label: + ncorrect += 1 +print('Accuracy: ' + str(ncorrect / float(nsamples))) +# Accuracy: 0.9230769230769231 +``` + +## RoBERTa training on WinoGrande dataset +We have also provided `winogrande` task and criterion for finetuning on the +[WinoGrande](https://mosaic.allenai.org/projects/winogrande) like datasets +where there are always two candidates and one is correct. +It's more efficient implementation for such subcases. + +```bash +TOTAL_NUM_UPDATES=23750 # Total number of training steps. +WARMUP_UPDATES=2375 # Linearly increase LR over this many steps. +LR=1e-05 # Peak LR for polynomial LR scheduler. +MAX_SENTENCES=32 # Batch size per GPU. +SEED=1 # Random seed. +ROBERTA_PATH=/path/to/roberta/model.pt + +# we use the --user-dir option to load the task and criterion +# from the examples/roberta/wsc directory: +FAIRSEQ_PATH=/path/to/fairseq +FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/wsc + +cd fairseq +CUDA_VISIBLE_DEVICES=0 fairseq-train winogrande_1.0/ \ + --restore-file $ROBERTA_PATH \ + --reset-optimizer --reset-dataloader --reset-meters \ + --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --valid-subset val \ + --fp16 --ddp-backend legacy_ddp \ + --user-dir $FAIRSEQ_USER_DIR \ + --task winogrande --criterion winogrande \ + --wsc-margin-alpha 5.0 --wsc-margin-beta 0.4 \ + --arch roberta_large --bpe gpt2 --max-positions 512 \ + --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \ + --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \ + --lr-scheduler polynomial_decay --lr $LR \ + --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_NUM_UPDATES \ + --batch-size $MAX_SENTENCES \ + --max-update $TOTAL_NUM_UPDATES \ + --log-format simple --log-interval 100 +``` diff --git a/fairseq/examples/roberta/wsc/__init__.py b/fairseq/examples/roberta/wsc/__init__.py new file mode 100644 index 0000000..78afa47 --- /dev/null +++ b/fairseq/examples/roberta/wsc/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import wsc_criterion # noqa +from . import wsc_task # noqa diff --git a/fairseq/examples/roberta/wsc/wsc_criterion.py b/fairseq/examples/roberta/wsc/wsc_criterion.py new file mode 100644 index 0000000..ed0251f --- /dev/null +++ b/fairseq/examples/roberta/wsc/wsc_criterion.py @@ -0,0 +1,167 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.criterions import LegacyFairseqCriterion, register_criterion +from fairseq.data import encoders + + +@register_criterion("wsc") +class WSCCriterion(LegacyFairseqCriterion): + def __init__(self, args, task): + super().__init__(args, task) + if self.args.save_predictions is not None: + self.prediction_h = open(self.args.save_predictions, "w") + else: + self.prediction_h = None + self.bpe = encoders.build_bpe(args.bpe) + self.tokenizer = encoders.build_tokenizer(args.tokenizer) + + def __del__(self): + if self.prediction_h is not None: + self.prediction_h.close() + + @staticmethod + def add_args(parser): + """Add criterion-specific arguments to the parser.""" + parser.add_argument("--wsc-margin-alpha", type=float, metavar="A", default=1.0) + parser.add_argument("--wsc-margin-beta", type=float, metavar="B", default=0.0) + parser.add_argument( + "--wsc-cross-entropy", + action="store_true", + help="use cross entropy formulation instead of margin loss", + ) + parser.add_argument( + "--save-predictions", metavar="FILE", help="file to save predictions to" + ) + + def get_masked_input(self, tokens, mask): + masked_tokens = tokens.clone() + masked_tokens[mask] = self.task.mask + return masked_tokens + + def get_lprobs(self, model, tokens, mask): + logits, _ = model(src_tokens=self.get_masked_input(tokens, mask)) + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float) + scores = lprobs.gather(2, tokens.unsqueeze(-1)).squeeze(-1) + mask = mask.type_as(scores) + scores = (scores * mask).sum(dim=-1) / mask.sum(dim=-1) + return scores + + def get_loss(self, query_lprobs, cand_lprobs): + if self.args.wsc_cross_entropy: + return F.cross_entropy( + torch.cat([query_lprobs, cand_lprobs]).unsqueeze(0), + query_lprobs.new([0]).long(), + ) + else: + return ( + -query_lprobs + + self.args.wsc_margin_alpha + * (cand_lprobs - query_lprobs + self.args.wsc_margin_beta).clamp(min=0) + ).sum() + + def forward(self, model, sample, reduce=True): + # compute loss and accuracy + loss, nloss = 0.0, 0 + ncorrect, nqueries = 0, 0 + + for i, label in enumerate(sample["labels"]): + query_lprobs = self.get_lprobs( + model, + sample["query_tokens"][i].unsqueeze(0), + sample["query_masks"][i].unsqueeze(0), + ) + cand_lprobs = self.get_lprobs( + model, + sample["candidate_tokens"][i], + sample["candidate_masks"][i], + ) + + pred = (query_lprobs >= cand_lprobs).all().item() + + if label is not None: + label = 1 if label else 0 + ncorrect += 1 if pred == label else 0 + nqueries += 1 + + if label: + # only compute a loss for positive instances + nloss += 1 + loss += self.get_loss(query_lprobs, cand_lprobs) + + id = sample["id"][i].item() + if self.prediction_h is not None: + print("{}\t{}\t{}".format(id, pred, label), file=self.prediction_h) + + if nloss == 0: + loss = torch.tensor(0.0, requires_grad=True) + + sample_size = nqueries if nqueries > 0 else 1 + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "ncorrect": ncorrect, + "nqueries": nqueries, + } + return loss, sample_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + agg_output = { + "loss": loss_sum / sample_size / math.log(2), + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + + ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs) + nqueries = sum(log.get("nqueries", 0) for log in logging_outputs) + if nqueries > 0: + agg_output["accuracy"] = ncorrect / float(nqueries) + + return agg_output + + +@register_criterion("winogrande") +class WinograndeCriterion(WSCCriterion): + def forward(self, model, sample, reduce=True): + # compute loss and accuracy + query_lprobs = self.get_lprobs( + model, + sample["query_tokens"], + sample["query_masks"], + ) + cand_lprobs = self.get_lprobs( + model, + sample["candidate_tokens"], + sample["candidate_masks"], + ) + pred = query_lprobs >= cand_lprobs + loss = self.get_loss(query_lprobs, cand_lprobs) + + sample_size = sample["query_tokens"].size(0) + ncorrect = pred.sum().item() + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "ncorrect": ncorrect, + "nqueries": sample_size, + } + return loss, sample_size, logging_output diff --git a/fairseq/examples/roberta/wsc/wsc_task.py b/fairseq/examples/roberta/wsc/wsc_task.py new file mode 100644 index 0000000..602ea73 --- /dev/null +++ b/fairseq/examples/roberta/wsc/wsc_task.py @@ -0,0 +1,401 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +import tempfile + +import numpy as np +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.data import ( + Dictionary, + IdDataset, + ListDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + PadDataset, + SortDataset, + data_utils, + encoders, +) +from fairseq.tasks import LegacyFairseqTask, register_task + +from . import wsc_utils + + +@register_task("wsc") +class WSCTask(LegacyFairseqTask): + """Task to finetune RoBERTa for Winograd Schemas.""" + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "data", metavar="DIR", help="path to data directory; we load <split>.jsonl" + ) + parser.add_argument( + "--init-token", + type=int, + default=None, + help="add token at the beginning of each batch item", + ) + + def __init__(self, args, vocab): + super().__init__(args) + self.vocab = vocab + self.mask = vocab.add_symbol("<mask>") + + self.bpe = encoders.build_bpe(args) + self.tokenizer = encoders.build_tokenizer(args) + + # hack to handle GPT-2 BPE, which includes leading spaces + if args.bpe == "gpt2": + self.leading_space = True + self.trailing_space = False + else: + self.leading_space = False + self.trailing_space = True + + @classmethod + def load_dictionary(cls, filename): + """Load the dictionary from the filename + + Args: + filename (str): the filename + """ + dictionary = Dictionary.load(filename) + dictionary.add_symbol("<mask>") + return dictionary + + @classmethod + def setup_task(cls, args, **kwargs): + assert args.criterion == "wsc", "Must set --criterion=wsc" + + # load data and label dictionaries + vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt")) + print("| dictionary: {} types".format(len(vocab))) + + return cls(args, vocab) + + def binarize(self, s: str, append_eos: bool = False): + if self.tokenizer is not None: + s = self.tokenizer.encode(s) + if self.bpe is not None: + s = self.bpe.encode(s) + tokens = self.vocab.encode_line( + s, + append_eos=append_eos, + add_if_not_exist=False, + ).long() + if self.args.init_token is not None: + tokens = torch.cat([tokens.new([self.args.init_token]), tokens]) + return tokens + + def binarize_with_mask(self, txt, prefix, suffix, leading_space, trailing_space): + toks = self.binarize( + prefix + leading_space + txt + trailing_space + suffix, + append_eos=True, + ) + mask = torch.zeros_like(toks, dtype=torch.bool) + mask_start = len(self.binarize(prefix)) + mask_size = len(self.binarize(leading_space + txt)) + mask[mask_start : mask_start + mask_size] = 1 + return toks, mask + + def load_dataset( + self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs + ): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if data_path is None: + data_path = os.path.join(self.args.data, split + ".jsonl") + if not os.path.exists(data_path): + raise FileNotFoundError("Cannot find data: {}".format(data_path)) + + query_tokens = [] + query_masks = [] + query_lengths = [] + candidate_tokens = [] + candidate_masks = [] + candidate_lengths = [] + labels = [] + + for sentence, pronoun_span, query, label in wsc_utils.jsonl_iterator(data_path): + prefix = sentence[: pronoun_span.start].text + suffix = sentence[pronoun_span.end :].text_with_ws + + # spaCy spans include trailing spaces, but we need to know about + # leading spaces for the GPT-2 BPE + leading_space = ( + " " if sentence[: pronoun_span.start].text_with_ws.endswith(" ") else "" + ) + trailing_space = " " if pronoun_span.text_with_ws.endswith(" ") else "" + + # get noun phrases, excluding pronouns and anything overlapping with the query + cand_spans = wsc_utils.filter_noun_chunks( + wsc_utils.extended_noun_chunks(sentence), + exclude_pronouns=True, + exclude_query=query, + exact_match=False, + ) + + if query is not None: + query_toks, query_mask = self.binarize_with_mask( + query, prefix, suffix, leading_space, trailing_space + ) + query_len = len(query_toks) + else: + query_toks, query_mask, query_len = None, None, 0 + + query_tokens.append(query_toks) + query_masks.append(query_mask) + query_lengths.append(query_len) + + cand_toks, cand_masks = [], [] + for cand_span in cand_spans: + toks, mask = self.binarize_with_mask( + cand_span.text, + prefix, + suffix, + leading_space, + trailing_space, + ) + cand_toks.append(toks) + cand_masks.append(mask) + + # collate candidates + cand_toks = data_utils.collate_tokens(cand_toks, pad_idx=self.vocab.pad()) + cand_masks = data_utils.collate_tokens(cand_masks, pad_idx=0) + assert cand_toks.size() == cand_masks.size() + + candidate_tokens.append(cand_toks) + candidate_masks.append(cand_masks) + candidate_lengths.append(cand_toks.size(1)) + + labels.append(label) + + query_lengths = np.array(query_lengths) + query_tokens = ListDataset(query_tokens, query_lengths) + query_masks = ListDataset(query_masks, query_lengths) + + candidate_lengths = np.array(candidate_lengths) + candidate_tokens = ListDataset(candidate_tokens, candidate_lengths) + candidate_masks = ListDataset(candidate_masks, candidate_lengths) + + labels = ListDataset(labels, [1] * len(labels)) + + dataset = { + "id": IdDataset(), + "query_tokens": query_tokens, + "query_masks": query_masks, + "candidate_tokens": candidate_tokens, + "candidate_masks": candidate_masks, + "labels": labels, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(query_tokens, reduce=True), + } + + nested_dataset = NestedDictionaryDataset( + dataset, + sizes=[query_lengths], + ) + + with data_utils.numpy_seed(self.args.seed): + shuffle = np.random.permutation(len(query_tokens)) + dataset = SortDataset( + nested_dataset, + # shuffle + sort_order=[shuffle], + ) + + if return_only: + return dataset + + self.datasets[split] = dataset + return self.datasets[split] + + def build_dataset_for_inference(self, sample_json): + with tempfile.NamedTemporaryFile(buffering=0) as h: + h.write((json.dumps(sample_json) + "\n").encode("utf-8")) + dataset = self.load_dataset( + "disambiguate_pronoun", + data_path=h.name, + return_only=True, + ) + return dataset + + def disambiguate_pronoun(self, model, sentence, use_cuda=False): + sample_json = wsc_utils.convert_sentence_to_json(sentence) + dataset = self.build_dataset_for_inference(sample_json) + sample = dataset.collater([dataset[0]]) + if use_cuda: + sample = utils.move_to_cuda(sample) + + def get_masked_input(tokens, mask): + masked_tokens = tokens.clone() + masked_tokens[mask.bool()] = self.mask + return masked_tokens + + def get_lprobs(tokens, mask): + logits, _ = model(src_tokens=get_masked_input(tokens, mask)) + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float) + scores = lprobs.gather(2, tokens.unsqueeze(-1)).squeeze(-1) + mask = mask.type_as(scores) + scores = (scores * mask).sum(dim=-1) / mask.sum(dim=-1) + return scores + + cand_lprobs = get_lprobs( + sample["candidate_tokens"][0], + sample["candidate_masks"][0], + ) + if sample["query_tokens"][0] is not None: + query_lprobs = get_lprobs( + sample["query_tokens"][0].unsqueeze(0), + sample["query_masks"][0].unsqueeze(0), + ) + return (query_lprobs >= cand_lprobs).all().item() == 1 + else: + best_idx = cand_lprobs.argmax().item() + full_cand = sample["candidate_tokens"][0][best_idx] + mask = sample["candidate_masks"][0][best_idx] + toks = full_cand[mask.bool()] + return self.bpe.decode(self.source_dictionary.string(toks)).strip() + + @property + def source_dictionary(self): + return self.vocab + + @property + def target_dictionary(self): + return self.vocab + + +@register_task("winogrande") +class WinograndeTask(WSCTask): + """ + Task for WinoGrande dataset. Efficient implementation for Winograd schema + tasks with exactly two candidates, one of which is correct. + """ + + @classmethod + def setup_task(cls, args, **kwargs): + assert args.criterion == "winogrande", "Must set --criterion=winogrande" + + # load data and label dictionaries + vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt")) + print("| dictionary: {} types".format(len(vocab))) + + return cls(args, vocab) + + def load_dataset( + self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs + ): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if data_path is None: + data_path = os.path.join(self.args.data, split + ".jsonl") + if not os.path.exists(data_path): + raise FileNotFoundError("Cannot find data: {}".format(data_path)) + + query_tokens = [] + query_masks = [] + query_lengths = [] + candidate_tokens = [] + candidate_masks = [] + candidate_lengths = [] + + itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=(split == "test")) + + for sample in itr: + sentence, pronoun_span, query, cand_text = sample + prefix = sentence[: pronoun_span[0]].rstrip() + suffix = sentence[pronoun_span[1] :] + + leading_space = " " if sentence[: pronoun_span[0]].endswith(" ") else "" + trailing_space = "" + + if query is not None: + query_toks, query_mask = self.binarize_with_mask( + query, + prefix, + suffix, + leading_space, + trailing_space, + ) + query_len = len(query_toks) + else: + query_toks, query_mask, query_len = None, None, 0 + + query_tokens.append(query_toks) + query_masks.append(query_mask) + query_lengths.append(query_len) + + cand_toks, cand_mask = self.binarize_with_mask( + cand_text, + prefix, + suffix, + leading_space, + trailing_space, + ) + + candidate_tokens.append(cand_toks) + candidate_masks.append(cand_mask) + candidate_lengths.append(cand_toks.size(0)) + + query_lengths = np.array(query_lengths) + + def get_pad_dataset_fn(tokens, length, pad_idx): + return PadDataset( + ListDataset(tokens, length), + pad_idx=pad_idx, + left_pad=False, + ) + + query_tokens = get_pad_dataset_fn(query_tokens, query_lengths, self.vocab.pad()) + query_masks = get_pad_dataset_fn(query_masks, query_lengths, 0) + + candidate_lengths = np.array(candidate_lengths) + candidate_tokens = get_pad_dataset_fn( + candidate_tokens, candidate_lengths, self.vocab.pad() + ) + candidate_masks = get_pad_dataset_fn(candidate_masks, candidate_lengths, 0) + + dataset = { + "id": IdDataset(), + "query_tokens": query_tokens, + "query_masks": query_masks, + "candidate_tokens": candidate_tokens, + "candidate_masks": candidate_masks, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(query_tokens, reduce=True), + } + + nested_dataset = NestedDictionaryDataset( + dataset, + sizes=[query_lengths], + ) + + with data_utils.numpy_seed(self.args.seed): + shuffle = np.random.permutation(len(query_tokens)) + dataset = SortDataset( + nested_dataset, + # shuffle + sort_order=[shuffle], + ) + + if return_only: + return dataset + + self.datasets[split] = dataset + return self.datasets[split] diff --git a/fairseq/examples/roberta/wsc/wsc_utils.py b/fairseq/examples/roberta/wsc/wsc_utils.py new file mode 100644 index 0000000..da6ba74 --- /dev/null +++ b/fairseq/examples/roberta/wsc/wsc_utils.py @@ -0,0 +1,241 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +from functools import lru_cache + + +def convert_sentence_to_json(sentence): + if "_" in sentence: + prefix, rest = sentence.split("_", 1) + query, rest = rest.split("_", 1) + query_index = len(prefix.rstrip().split(" ")) + else: + query, query_index = None, None + + prefix, rest = sentence.split("[", 1) + pronoun, rest = rest.split("]", 1) + pronoun_index = len(prefix.rstrip().split(" ")) + + sentence = sentence.replace("_", "").replace("[", "").replace("]", "") + + return { + "idx": 0, + "text": sentence, + "target": { + "span1_index": query_index, + "span1_text": query, + "span2_index": pronoun_index, + "span2_text": pronoun, + }, + } + + +def extended_noun_chunks(sentence): + noun_chunks = {(np.start, np.end) for np in sentence.noun_chunks} + np_start, cur_np = 0, "NONE" + for i, token in enumerate(sentence): + np_type = token.pos_ if token.pos_ in {"NOUN", "PROPN"} else "NONE" + if np_type != cur_np: + if cur_np != "NONE": + noun_chunks.add((np_start, i)) + if np_type != "NONE": + np_start = i + cur_np = np_type + if cur_np != "NONE": + noun_chunks.add((np_start, len(sentence))) + return [sentence[s:e] for (s, e) in sorted(noun_chunks)] + + +def find_token(sentence, start_pos): + found_tok = None + for tok in sentence: + if tok.idx == start_pos: + found_tok = tok + break + return found_tok + + +def find_span(sentence, search_text, start=0): + search_text = search_text.lower() + for tok in sentence[start:]: + remainder = sentence[tok.i :].text.lower() + if remainder.startswith(search_text): + len_to_consume = len(search_text) + start_idx = tok.idx + for next_tok in sentence[tok.i :]: + end_idx = next_tok.idx + len(next_tok.text) + if end_idx - start_idx == len_to_consume: + span = sentence[tok.i : next_tok.i + 1] + return span + return None + + +@lru_cache(maxsize=1) +def get_detokenizer(): + from sacremoses import MosesDetokenizer + + detok = MosesDetokenizer(lang="en") + return detok + + +@lru_cache(maxsize=1) +def get_spacy_nlp(): + import en_core_web_lg + + nlp = en_core_web_lg.load() + return nlp + + +def jsonl_iterator(input_fname, positive_only=False, ngram_order=3, eval=False): + detok = get_detokenizer() + nlp = get_spacy_nlp() + + with open(input_fname) as fin: + for line in fin: + sample = json.loads(line.strip()) + + if positive_only and "label" in sample and not sample["label"]: + # only consider examples where the query is correct + continue + + target = sample["target"] + + # clean up the query + query = target["span1_text"] + if query is not None: + if "\n" in query: + continue + if query.endswith(".") or query.endswith(","): + query = query[:-1] + + # split tokens + tokens = sample["text"].split(" ") + + def strip_pronoun(x): + return x.rstrip('.,"') + + # find the pronoun + pronoun_idx = target["span2_index"] + pronoun = strip_pronoun(target["span2_text"]) + if strip_pronoun(tokens[pronoun_idx]) != pronoun: + # hack: sometimes the index is misaligned + if strip_pronoun(tokens[pronoun_idx + 1]) == pronoun: + pronoun_idx += 1 + else: + raise Exception("Misaligned pronoun!") + assert strip_pronoun(tokens[pronoun_idx]) == pronoun + + # split tokens before and after the pronoun + before = tokens[:pronoun_idx] + after = tokens[pronoun_idx + 1 :] + + # the GPT BPE attaches leading spaces to tokens, so we keep track + # of whether we need spaces before or after the pronoun + leading_space = " " if pronoun_idx > 0 else "" + trailing_space = " " if len(after) > 0 else "" + + # detokenize + before = detok.detokenize(before, return_str=True) + pronoun = detok.detokenize([pronoun], return_str=True) + after = detok.detokenize(after, return_str=True) + + # hack: when the pronoun ends in a period (or comma), move the + # punctuation to the "after" part + if pronoun.endswith(".") or pronoun.endswith(","): + after = pronoun[-1] + trailing_space + after + pronoun = pronoun[:-1] + + # hack: when the "after" part begins with a comma or period, remove + # the trailing space + if after.startswith(".") or after.startswith(","): + trailing_space = "" + + # parse sentence with spacy + sentence = nlp(before + leading_space + pronoun + trailing_space + after) + + # find pronoun span + start = len(before + leading_space) + first_pronoun_tok = find_token(sentence, start_pos=start) + pronoun_span = find_span(sentence, pronoun, start=first_pronoun_tok.i) + assert pronoun_span.text == pronoun + + if eval: + # convert to format where pronoun is surrounded by "[]" and + # query is surrounded by "_" + query_span = find_span(sentence, query) + query_with_ws = "_{}_{}".format( + query_span.text, + (" " if query_span.text_with_ws.endswith(" ") else ""), + ) + pronoun_with_ws = "[{}]{}".format( + pronoun_span.text, + (" " if pronoun_span.text_with_ws.endswith(" ") else ""), + ) + if query_span.start < pronoun_span.start: + first = (query_span, query_with_ws) + second = (pronoun_span, pronoun_with_ws) + else: + first = (pronoun_span, pronoun_with_ws) + second = (query_span, query_with_ws) + sentence = ( + sentence[: first[0].start].text_with_ws + + first[1] + + sentence[first[0].end : second[0].start].text_with_ws + + second[1] + + sentence[second[0].end :].text + ) + yield sentence, sample.get("label", None) + else: + yield sentence, pronoun_span, query, sample.get("label", None) + + +def winogrande_jsonl_iterator(input_fname, eval=False): + with open(input_fname) as fin: + for line in fin: + sample = json.loads(line.strip()) + sentence, option1, option2 = ( + sample["sentence"], + sample["option1"], + sample["option2"], + ) + + pronoun_span = (sentence.index("_"), sentence.index("_") + 1) + + if eval: + query, cand = option1, option2 + else: + query = option1 if sample["answer"] == "1" else option2 + cand = option2 if sample["answer"] == "1" else option1 + yield sentence, pronoun_span, query, cand + + +def filter_noun_chunks( + chunks, exclude_pronouns=False, exclude_query=None, exact_match=False +): + if exclude_pronouns: + chunks = [ + np + for np in chunks + if (np.lemma_ != "-PRON-" and not all(tok.pos_ == "PRON" for tok in np)) + ] + + if exclude_query is not None: + excl_txt = [exclude_query.lower()] + filtered_chunks = [] + for chunk in chunks: + lower_chunk = chunk.text.lower() + found = False + for excl in excl_txt: + if ( + not exact_match and (lower_chunk in excl or excl in lower_chunk) + ) or lower_chunk == excl: + found = True + break + if not found: + filtered_chunks.append(chunk) + chunks = filtered_chunks + + return chunks diff --git a/fairseq/examples/rxf/README.md b/fairseq/examples/rxf/README.md new file mode 100644 index 0000000..22a1cc4 --- /dev/null +++ b/fairseq/examples/rxf/README.md @@ -0,0 +1,52 @@ +[Better Fine-Tuning by Reducing Representational Collapse](https://arxiv.org/abs/2008.03156) +===================== +This repo contains the code to replicate all experiments from the _Better Fine-Tuning by Reducing Representational Collapse_ paper excluding the probing results. + +The R3F sentence prediction criterion is registered as `sentence_prediction_r3f` while the label smoothing version of it is implemented as `label_smoothed_cross_entropy_r3f`. The R4F version of the sentence prediction criterion can be achieved by applying spectral norm to the classification head via the `--spectral-norm-classification-head` parameter. + +## Hyper-parameters +Our methods introduce 3 new hyper-parameters; `--eps` which sets the standard deviation or range of the distribution we're sampling from, `--r3f-lambda` which controls the combining of logistic loss and noisy KL loss and `--noise-type` which controls which parametric distribution we use ('normal', 'uniform'). + +For example to run R3F on RTE from GLUE + +``` +TOTAL_NUM_UPDATES=3120 +WARMUP_UPDATES=187 +LR=1e-05 +NUM_CLASSES=2 +MAX_SENTENCES=8 # Batch size. +ROBERTA_PATH=/path/to/roberta/model.pt + +CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin \ + --restore-file $ROBERTA_PATH \ + --max-positions 512 \ + --max-sentences $MAX_SENTENCES \ + --max-tokens 4400 \ + --task sentence_prediction \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --init-token 0 --separator-token 2 \ + --arch roberta_large \ + --criterion sentence_prediction_r3f \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --max-epoch 10 \ + --find-unused-parameters \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + --noise-type uniform --r3f-lambda 0.7 \ + --user-dir examples/rxf/rxf_src +``` + +## Citation +```bibtex +@article{aghajanyan2020better, + title={Better Fine-Tuning by Reducing Representational Collapse}, + author={Aghajanyan, Armen and Shrivastava, Akshat and Gupta, Anchit and Goyal, Naman and Zettlemoyer, Luke and Gupta, Sonal}, + journal={arXiv preprint arXiv:2008.03156}, + year={2020} +} +``` diff --git a/fairseq/examples/rxf/__init__.py b/fairseq/examples/rxf/__init__.py new file mode 100644 index 0000000..b24cb6b --- /dev/null +++ b/fairseq/examples/rxf/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import rxf_src # noqa diff --git a/fairseq/examples/rxf/rxf_src/__init__.py b/fairseq/examples/rxf/rxf_src/__init__.py new file mode 100644 index 0000000..306e232 --- /dev/null +++ b/fairseq/examples/rxf/rxf_src/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import label_smoothed_cross_entropy_r3f, sentence_prediction_r3f # noqa diff --git a/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py b/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py new file mode 100644 index 0000000..6191fd5 --- /dev/null +++ b/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py @@ -0,0 +1,158 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss + + +@register_criterion("label_smoothed_cross_entropy_r3f") +class LabelSmoothedCrossEntropyR3FCriterion(FairseqCriterion): + def __init__( + self, task, sentence_avg, label_smoothing, eps, r3f_lambda, noise_type + ): + super().__init__(task) + self.sentence_avg = sentence_avg + self.label_smoothing = label_smoothing + self.eps = eps + self.r3f_lambda = r3f_lambda + self.noise_type = noise_type + if self.noise_type in {"normal"}: + self.noise_sampler = torch.distributions.normal.Normal( + loc=0.0, scale=self.eps + ) + elif self.noise_type == "uniform": + self.noise_sampler = torch.distributions.uniform.Uniform( + low=-self.eps, high=self.eps + ) + else: + raise Exception(f"unrecognized noise type {self.noise_type}") + + @staticmethod + def add_args(parser): + """Add criterion-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--label-smoothing', default=0., type=float, metavar='D', + help='epsilon for label smoothing, 0 means no label smoothing') + parser.add_argument('--eps', type=float, default=1e-5, + help='noise eps') + parser.add_argument('--r3f-lambda', type=float, default=1.0, + help='lambda for combining logistic loss and noisy KL loss') + parser.add_argument('--noise-type', type=str, default='normal', + choices=['normal', 'uniform'], + help='type of noises') + # fmt: on + + def _get_symm_kl(self, noised_logits, input_logits): + return ( + F.kl_div( + F.log_softmax(noised_logits, dim=-1, dtype=torch.float32), + F.softmax(input_logits, dim=-1, dtype=torch.float32), + None, + None, + "sum", + ) + + F.kl_div( + F.log_softmax(input_logits, dim=-1, dtype=torch.float32), + F.softmax(noised_logits, dim=-1, dtype=torch.float32), + None, + None, + "sum", + ) + ) / noised_logits.size(0) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + token_embeddings = model.encoder.embed_tokens(sample["net_input"]["src_tokens"]) + input_logits, extra = model(**sample["net_input"]) + loss, nll_loss = self.compute_loss( + model, (input_logits, extra), sample, reduce=reduce + ) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + + if model.training: + noise = self.noise_sampler.sample(sample_shape=token_embeddings.shape).to( + token_embeddings + ) + noised_embeddings = token_embeddings.clone() + noise + + noised_logits, _ = model( + **sample["net_input"], token_embeddings=noised_embeddings + ) + symm_kl = self._get_symm_kl(noised_logits, input_logits) + + if model.training: + symm_kl = symm_kl * sample_size + loss = loss + self.r3f_lambda * symm_kl + + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + + if model.training: + logging_output.update( + symm_kl=utils.item(symm_kl.data) if reduce else symm_kl.data + ) + + return loss, sample_size, logging_output + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + lprobs = lprobs.view(-1, lprobs.size(-1)) + target = model.get_targets(sample, net_output).view(-1, 1) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.label_smoothing, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + symm_kl_sum = sum(log.get("symm_kl", 0) for log in logging_outputs) + + metrics.log_scalar("symm_kl", symm_kl_sum / sample_size, sample_size, round=3) + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py b/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py new file mode 100644 index 0000000..6ecffd6 --- /dev/null +++ b/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py @@ -0,0 +1,171 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.criterions import FairseqCriterion, register_criterion + + +@register_criterion("sentence_prediction_r3f") +class SentencePredictionR3F(FairseqCriterion): + def __init__( + self, + task, + eps, + r3f_lambda, + noise_type, + classification_head_name, + regression_target, + ): + super().__init__(task) + self.eps = eps + self.r3f_lambda = r3f_lambda + self.noise_type = noise_type + self.classification_head_name = classification_head_name + self.regression_target = regression_target + if self.noise_type in {"normal"}: + self.noise_sampler = torch.distributions.normal.Normal( + loc=0.0, scale=self.eps + ) + elif self.noise_type == "uniform": + self.noise_sampler = torch.distributions.uniform.Uniform( + low=-self.eps, high=self.eps + ) + else: + raise Exception(f"unrecognized noise type {self.noise_type}") + + @staticmethod + def add_args(parser): + # fmt: off + parser.add_argument('--eps', type=float, default=1e-5, + help='noise eps') + parser.add_argument('--r3f-lambda', type=float, default=1.0, + help='lambda for combining logistic loss and noisy KL loss') + parser.add_argument('--noise-type', type=str, default='uniform', + choices=['normal', 'uniform'], + help='type of noises for RXF methods') + parser.add_argument('--classification-head-name', + default='sentence_classification_head', + help='name of the classification head to use') + parser.add_argument('--regression-target', action='store_true') + # fmt: on + + def _get_symm_kl(self, noised_logits, input_logits): + return ( + F.kl_div( + F.log_softmax(noised_logits, dim=-1, dtype=torch.float32), + F.softmax(input_logits, dim=-1, dtype=torch.float32), + None, + None, + "sum", + ) + + F.kl_div( + F.log_softmax(input_logits, dim=-1, dtype=torch.float32), + F.softmax(noised_logits, dim=-1, dtype=torch.float32), + None, + None, + "sum", + ) + ) / noised_logits.size(0) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + assert ( + hasattr(model, "classification_heads") + and self.classification_head_name in model.classification_heads + ), "model must provide sentence classification head for --criterion=sentence_prediction" + + token_embeddings = model.encoder.sentence_encoder.embed_tokens( + sample["net_input"]["src_tokens"] + ) + input_logits, _ = model( + **sample["net_input"], + features_only=True, + classification_head_name=self.classification_head_name, + token_embeddings=token_embeddings, + ) + if model.training and self.noise_sampler: + noise = self.noise_sampler.sample(sample_shape=token_embeddings.shape).to( + token_embeddings + ) + noised_embeddings = token_embeddings.detach().clone() + noise + + noised_logits, _ = model( + **sample["net_input"], + features_only=True, + classification_head_name=self.classification_head_name, + token_embeddings=noised_embeddings, + ) + symm_kl = self._get_symm_kl(noised_logits, input_logits) + else: + symm_kl = 0 + + targets = model.get_targets(sample, [input_logits]).view(-1) + sample_size = targets.numel() + + if not self.regression_target: + loss = F.nll_loss( + F.log_softmax(input_logits, dim=-1, dtype=torch.float32), + targets, + reduction="sum", + ) + if model.training: + symm_kl = symm_kl * sample_size + loss = loss + self.r3f_lambda * symm_kl + else: + logits = input_logits.squeeze().float() + targets = targets.float() + loss = F.mse_loss(logits, targets, reduction="sum") + + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample_size, + "sample_size": sample_size, + } + + if not self.regression_target: + preds = input_logits.max(dim=1)[1] + logging_output.update(ncorrect=(preds == targets).sum().item()) + + if model.training and self.noise_sampler: + logging_output.update( + symm_kl=utils.item(symm_kl.data) if reduce else symm_kl.data + ) + return loss, sample_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + symm_kl_sum = sum(log.get("symm_kl", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + agg_output = { + "loss": loss_sum / sample_size / math.log(2), + "symm_kl": symm_kl_sum / sample_size, + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + + if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]: + ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs) + agg_output.update(accuracy=ncorrect / nsentences) + + if sample_size != ntokens: + agg_output["nll_loss"] = loss_sum / ntokens / math.log(2) + return agg_output diff --git a/fairseq/examples/scaling_nmt/README.md b/fairseq/examples/scaling_nmt/README.md new file mode 100644 index 0000000..0cc3360 --- /dev/null +++ b/fairseq/examples/scaling_nmt/README.md @@ -0,0 +1,114 @@ +# Scaling Neural Machine Translation (Ott et al., 2018) + +This page includes instructions for reproducing results from the paper [Scaling Neural Machine Translation (Ott et al., 2018)](https://arxiv.org/abs/1806.00187). + +## Pre-trained models + +Model | Description | Dataset | Download +---|---|---|--- +`transformer.wmt14.en-fr` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2) +`transformer.wmt16.en-de` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) + +## Training a new model on WMT'16 En-De + +First download the [preprocessed WMT'16 En-De data provided by Google](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8). + +Then: + +##### 1. Extract the WMT'16 En-De data +```bash +TEXT=wmt16_en_de_bpe32k +mkdir -p $TEXT +tar -xzvf wmt16_en_de.tar.gz -C $TEXT +``` + +##### 2. Preprocess the dataset with a joined dictionary +```bash +fairseq-preprocess \ + --source-lang en --target-lang de \ + --trainpref $TEXT/train.tok.clean.bpe.32000 \ + --validpref $TEXT/newstest2013.tok.bpe.32000 \ + --testpref $TEXT/newstest2014.tok.bpe.32000 \ + --destdir data-bin/wmt16_en_de_bpe32k \ + --nwordssrc 32768 --nwordstgt 32768 \ + --joined-dictionary \ + --workers 20 +``` + +##### 3. Train a model +```bash +fairseq-train \ + data-bin/wmt16_en_de_bpe32k \ + --arch transformer_vaswani_wmt_en_de_big --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \ + --dropout 0.3 --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --max-tokens 3584 \ + --fp16 +``` + +Note that the `--fp16` flag requires you have CUDA 9.1 or greater and a Volta GPU or newer. + +***IMPORTANT:*** You will get better performance by training with big batches and +increasing the learning rate. If you want to train the above model with big batches +(assuming your machine has 8 GPUs): +- add `--update-freq 16` to simulate training on 8x16=128 GPUs +- increase the learning rate; 0.001 works well for big batches + +##### 4. Evaluate + +Now we can evaluate our trained model. + +Note that the original [Attention Is All You Need](https://arxiv.org/abs/1706.03762) +paper used a couple tricks to achieve better BLEU scores. We use these same tricks in +the Scaling NMT paper, so it's important to apply them when reproducing our results. + +First, use the [average_checkpoints.py](/scripts/average_checkpoints.py) script to +average the last few checkpoints. Averaging the last 5-10 checkpoints is usually +good, but you may need to adjust this depending on how long you've trained: +```bash +python scripts/average_checkpoints \ + --inputs /path/to/checkpoints \ + --num-epoch-checkpoints 10 \ + --output checkpoint.avg10.pt +``` + +Next, generate translations using a beam width of 4 and length penalty of 0.6: +```bash +fairseq-generate \ + data-bin/wmt16_en_de_bpe32k \ + --path checkpoint.avg10.pt \ + --beam 4 --lenpen 0.6 --remove-bpe > gen.out +``` + +Finally, we apply the ["compound splitting" script](/scripts/compound_split_bleu.sh) to +add spaces around dashes. For example "Café-Liebhaber" would become three tokens: +"Café - Liebhaber". This typically results in larger BLEU scores, but it is not +appropriate to compare these inflated scores to work which does not include this trick. +This trick was used in the [original AIAYN code](https://github.com/tensorflow/tensor2tensor/blob/fc9335c0203685cbbfe2b30c92db4352d8f60779/tensor2tensor/utils/get_ende_bleu.sh), +so we used it in the Scaling NMT paper as well. That said, it's strongly advised to +report [sacrebleu](https://github.com/mjpost/sacrebleu) scores instead. + +To compute "compound split" tokenized BLEU (not recommended!): +```bash +bash scripts/compound_split_bleu.sh gen.out +# BLEU4 = 29.29, 60.3/35.0/22.8/15.3 (BP=1.000, ratio=1.004, syslen=64763, reflen=64496) +``` + +To compute detokenized BLEU with sacrebleu (preferred): +```bash +bash scripts/sacrebleu.sh wmt14/full en de gen.out +# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt14/full+tok.13a+version.1.4.3 = 28.6 59.3/34.3/22.1/14.9 (BP = 1.000 ratio = 1.016 hyp_len = 63666 ref_len = 62688) +``` + +## Citation + +```bibtex +@inproceedings{ott2018scaling, + title = {Scaling Neural Machine Translation}, + author = {Ott, Myle and Edunov, Sergey and Grangier, David and Auli, Michael}, + booktitle = {Proceedings of the Third Conference on Machine Translation (WMT)}, + year = 2018, +} +``` diff --git a/fairseq/examples/shuffled_word_order/README.finetuning.md b/fairseq/examples/shuffled_word_order/README.finetuning.md new file mode 100644 index 0000000..ecbcb65 --- /dev/null +++ b/fairseq/examples/shuffled_word_order/README.finetuning.md @@ -0,0 +1,135 @@ +# Fine-tuning details + +For each task (GLUE and PAWS), we perform hyperparam search for each model, and report the mean and standard deviation across 5 seeds of the best model. First, get the datasets following the instructions in [RoBERTa fine-tuning README](../roberta/README.glue.md). Alternatively, you can use [huggingface datasets](https://huggingface.co/docs/datasets/) to get the task data: + +```python +from datasets import load_dataset +import pandas as pd +from pathlib import Path + +key2file = { +"paws": { + "loc": "paws_data", + "columns": ["id", "sentence1", "sentence2", "label"], + "train": "train.tsv", + "validation": "dev.tsv", + "test": "test.tsv" + } +} + +task_data = load_dataset("paws", "labeled_final") +task_config = key2file["paws"] +save_path = Path(task_config["loc"]) +save_path.mkdir(exist_ok=True, parents=True) +for key, fl in task_config.items(): + if key in ["loc", "columns"]: + continue + print(f"Reading {key}") + columns = task_config["columns"] + df = pd.DataFrame(task_data[key]) + print(df.columns) + df = df[columns] + print(f"Got {len(df)} records") + save_loc = save_path / fl + print(f"Saving to : {save_loc}") + df.to_csv(save_loc, sep="\t", header=None, index=None) + +``` + +- Preprocess using RoBERTa GLUE preprocessing script, while keeping in mind the column numbers for `sentence1`, `sentence2` and `label` (which is 0,1,2 if you save the data according to the above example.) +- Then, fine-tuning is performed similarly to RoBERTa (for example, in case of RTE): + +```bash +TOTAL_NUM_UPDATES=30875 # 10 epochs through RTE for bsz 16 +WARMUP_UPDATES=1852 # 6 percent of the number of updates +LR=2e-05 # Peak LR for polynomial LR scheduler. +NUM_CLASSES=2 +MAX_SENTENCES=16 # Batch size. +SHUFFLED_ROBERTA_PATH=/path/to/shuffled_roberta/model.pt + +CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin/ \ + --restore-file $SHUFFLED_ROBERTA_PATH \ + --max-positions 512 \ + --batch-size $MAX_SENTENCES \ + --max-tokens 4400 \ + --task sentence_prediction \ + --reset-optimizer --reset-dataloader --reset-meters \ + --required-batch-size-multiple 1 \ + --init-token 0 --separator-token 2 \ + --arch roberta_large \ + --criterion sentence_prediction \ + --num-classes $NUM_CLASSES \ + --dropout 0.1 --attention-dropout 0.1 \ + --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \ + --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \ + --max-epoch 10 \ + --find-unused-parameters \ + --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric; +``` + +- `TOTAL_NUM_UPDATES` is computed based on the `--batch_size` value and the dataset size. +- `WARMUP_UPDATES` is computed as 6% of `TOTAL_NUM_UPDATES` +- Best hyperparam of `--lr` and `--batch_size` is reported below: + +## `--lr` + +| | name | RTE | MRPC | SST-2 | CoLA | QQP | QNLI | MNLI | PAWS | +| --: | :----------- | ----: | ----: | ----: | ----: | ----: | ----: | ----: | ----: | +| 0 | original | 2e-05 | 2e-05 | 1e-05 | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 2e-05 | +| 1 | n_1 | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 3e-05 | 1e-05 | 2e-05 | 2e-05 | +| 2 | n_2 | 2e-05 | 2e-05 | 1e-05 | 1e-05 | 2e-05 | 1e-05 | 1e-05 | 3e-05 | +| 3 | n_3 | 3e-05 | 1e-05 | 2e-05 | 2e-05 | 3e-05 | 1e-05 | 1e-05 | 2e-05 | +| 4 | n_4 | 3e-05 | 1e-05 | 2e-05 | 2e-05 | 2e-05 | 1e-05 | 1e-05 | 2e-05 | +| 5 | r512 | 1e-05 | 3e-05 | 2e-05 | 2e-05 | 3e-05 | 2e-05 | 3e-05 | 2e-05 | +| 6 | rand_corpus | 2e-05 | 1e-05 | 3e-05 | 1e-05 | 3e-05 | 3e-05 | 3e-05 | 2e-05 | +| 7 | rand_uniform | 2e-05 | 1e-05 | 3e-05 | 2e-05 | 3e-05 | 3e-05 | 3e-05 | 1e-05 | +| 8 | rand_init | 1e-05 | 1e-05 | 3e-05 | 1e-05 | 1e-05 | 1e-05 | 2e-05 | 1e-05 | +| 9 | no_pos | 1e-05 | 3e-05 | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 1e-05 | 1e-05 | + +## `--batch_size` + +| | name | RTE | MRPC | SST-2 | CoLA | QQP | QNLI | MNLI | PAWS | +| --: | :----------- | --: | ---: | ----: | ---: | --: | ---: | ---: | ---: | +| 0 | orig | 16 | 16 | 32 | 16 | 16 | 32 | 32 | 16 | +| 1 | n_1 | 32 | 32 | 16 | 32 | 32 | 16 | 32 | 16 | +| 2 | n_2 | 32 | 16 | 32 | 16 | 32 | 32 | 16 | 32 | +| 3 | n_3 | 32 | 32 | 16 | 32 | 32 | 16 | 32 | 32 | +| 4 | n_4 | 32 | 16 | 32 | 16 | 32 | 32 | 32 | 32 | +| 5 | r512 | 32 | 16 | 16 | 32 | 32 | 16 | 16 | 16 | +| 6 | rand_corpus | 16 | 16 | 16 | 16 | 32 | 16 | 16 | 32 | +| 7 | rand_uniform | 16 | 32 | 16 | 16 | 32 | 16 | 16 | 16 | +| 8 | rand_init | 16 | 16 | 32 | 16 | 16 | 16 | 32 | 16 | +| 9 | no_pos | 16 | 32 | 16 | 16 | 32 | 16 | 16 | 16 | + +- Perform inference similar to RoBERTa as well: + +```python +from fairseq.models.roberta import RobertaModel + +roberta = RobertaModel.from_pretrained( + 'checkpoints/', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='PAWS-bin' +) + +label_fn = lambda label: roberta.task.label_dictionary.string( + [label + roberta.task.label_dictionary.nspecial] +) +ncorrect, nsamples = 0, 0 +roberta.cuda() +roberta.eval() +with open('paws_data/dev.tsv') as fin: + fin.readline() + for index, line in enumerate(fin): + tokens = line.strip().split('\t') + sent1, sent2, target = tokens[0], tokens[1], tokens[2] + tokens = roberta.encode(sent1, sent2) + prediction = roberta.predict('sentence_classification_head', tokens).argmax().item() + prediction_label = label_fn(prediction) + ncorrect += int(prediction_label == target) + nsamples += 1 +print('| Accuracy: ', float(ncorrect)/float(nsamples)) + +``` diff --git a/fairseq/examples/shuffled_word_order/README.md b/fairseq/examples/shuffled_word_order/README.md new file mode 100644 index 0000000..6ce0b39 --- /dev/null +++ b/fairseq/examples/shuffled_word_order/README.md @@ -0,0 +1,94 @@ +# Masked Language Modeling and the Distributional Hypothesis: Order Word Matters Pre-training for Little + +[https://arxiv.org/abs/2104.06644](https://arxiv.org/abs/2104.06644) + +## Introduction + +In this work, we pre-train [RoBERTa](../roberta) base on various word shuffled variants of BookWiki corpus (16GB). We observe that a word shuffled pre-trained model achieves surprisingly good scores on GLUE, PAWS and several parametric probing tasks. Please read our paper for more details on the experiments. + +## Pre-trained models + +| Model | Description | Download | +| ------------------------------------- | -------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| `roberta.base.orig` | RoBERTa (base) trained on natural corpus | [roberta.base.orig.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.orig.tar.gz) | +| `roberta.base.shuffle.n1` | RoBERTa (base) trained on n=1 gram sentence word shuffled data | [roberta.base.shuffle.n1.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.tar.gz) | +| `roberta.base.shuffle.n2` | RoBERTa (base) trained on n=2 gram sentence word shuffled data | [roberta.base.shuffle.n2.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n2.tar.gz) | +| `roberta.base.shuffle.n3` | RoBERTa (base) trained on n=3 gram sentence word shuffled data | [roberta.base.shuffle.n3.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n3.tar.gz) | +| `roberta.base.shuffle.n4` | RoBERTa (base) trained on n=4 gram sentence word shuffled data | [roberta.base.shuffle.n4.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n4.tar.gz) | +| `roberta.base.shuffle.512` | RoBERTa (base) trained on unigram 512 word block shuffled data | [roberta.base.shuffle.512.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.512.tar.gz) | +| `roberta.base.shuffle.corpus` | RoBERTa (base) trained on unigram corpus word shuffled data | [roberta.base.shuffle.corpus.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus.tar.gz) | +| `roberta.base.shuffle.corpus_uniform` | RoBERTa (base) trained on unigram corpus word shuffled data, where all words are uniformly sampled | [roberta.base.shuffle.corpus_uniform.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus_uniform.tar.gz) | +| `roberta.base.nopos` | RoBERTa (base) without positional embeddings, trained on natural corpus | [roberta.base.nopos.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.nopos.tar.gz) | + +## Results + +[GLUE (Wang et al, 2019)](https://gluebenchmark.com/) & [PAWS (Zhang et al, 2019)](https://github.com/google-research-datasets/paws) _(dev set, single model, single-task fine-tuning, median of 5 seeds)_ + +| name | CoLA | MNLI | MRPC | PAWS | QNLI | QQP | RTE | SST-2 | +| :----------------------------------- | ----: | ----: | ----: | ----: | ----: | ----: | ----: | ----: | +| `roberta.base.orig` | 61.4 | 86.11 | 89.19 | 94.46 | 92.53 | 91.26 | 74.64 | 93.92 | +| `roberta.base.shuffle.n1` | 35.15 | 82.64 | 86 | 89.97 | 89.02 | 91.01 | 69.02 | 90.47 | +| `roberta.base.shuffle.n2` | 54.37 | 83.43 | 86.24 | 93.46 | 90.44 | 91.36 | 70.83 | 91.79 | +| `roberta.base.shuffle.n3` | 48.72 | 83.85 | 86.36 | 94.05 | 91.69 | 91.24 | 70.65 | 92.02 | +| `roberta.base.shuffle.n4` | 58.64 | 83.77 | 86.98 | 94.32 | 91.69 | 91.4 | 70.83 | 92.48 | +| `roberta.base.shuffle.512` | 12.76 | 77.52 | 79.61 | 84.77 | 85.19 | 90.2 | 56.52 | 86.34 | +| `roberta.base.shuffle.corpus` | 0 | 71.9 | 70.52 | 58.52 | 71.11 | 85.52 | 53.99 | 83.35 | +| `roberta.base.shuffle.corpus_random` | 9.19 | 72.33 | 70.76 | 58.42 | 77.76 | 85.93 | 53.99 | 84.04 | +| `roberta.base.nopos` | 0 | 63.5 | 72.73 | 57.08 | 77.72 | 87.87 | 54.35 | 83.24 | + +For more results on probing tasks, please refer to [our paper](https://arxiv.org/abs/2104.06644). + +## Example Usage + +Follow the same usage as in [RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta) to load and test your models: + +```python +# Download roberta.base.shuffle.n1 model +wget https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.tar.gz +tar -xzvf roberta.base.shuffle.n1.tar.gz +# Copy the dictionary files +cd roberta.base.shuffle.n1.tar.gz +wget -O dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt && wget -O encoder.json https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json && wget -O vocab.bpe https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe +cd .. + +# Load the model in fairseq +from fairseq.models.roberta import RobertaModel +roberta = RobertaModel.from_pretrained('/path/to/roberta.base.shuffle.n1', checkpoint_file='model.pt') +roberta.eval() # disable dropout (or leave in train mode to finetune) +``` + +We have also provided a [Google Colab](https://colab.research.google.com/drive/1IJDVfNVWdvRfLjphQKBGzmob84t-OXpm) notebook to demonstrate the loading of the model. The models were trained on top of Fairseq from the following commit: [62cff008ebeeed855093837507d5e6bf52065ee6](https://github.com/pytorch/fairseq/commit/62cff008ebeeed855093837507d5e6bf52065ee6). + +**Note**: The model trained without positional embeddings (`roberta.base.nopos`) is a modified `RoBERTa` model, where the positional embeddings are not used. Thus, the typical `from_pretrained` method on fairseq version of RoBERTa will not be able to load the above model weights. To do so, construct a new `RoBERTaModel` object by setting the flag `use_positional_embeddings` to `False` (or [in the latest code](https://github.com/pytorch/fairseq/blob/main/fairseq/models/roberta/model.py#L543), set `no_token_positional_embeddings` to `True`), and then load the individual weights. + +## Fine-tuning Evaluation + +We provide the trained fine-tuned models on MNLI here for each model above for quick evaluation (1 seed for each model). Please refer to [finetuning details](README.finetuning.md) for the parameters of these models. Follow [RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta) instructions to evaluate these models. + +| Model | MNLI M Dev Accuracy | Link | +| :----------------------------------------- | :------------------ | :--------------------------------------------------------------------------------------------------------------- | +| `roberta.base.orig.mnli` | 86.14 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.orig.mnli.tar.gz) | +| `roberta.base.shuffle.n1.mnli` | 82.55 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.mnli.tar.gz) | +| `roberta.base.shuffle.n2.mnli` | 83.21 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n2.mnli.tar.gz) | +| `roberta.base.shuffle.n3.mnli` | 83.89 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n3.mnli.tar.gz) | +| `roberta.base.shuffle.n4.mnli` | 84.00 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n4.mnli.tar.gz) | +| `roberta.base.shuffle.512.mnli` | 77.22 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.512.mnli.tar.gz) | +| `roberta.base.shuffle.corpus.mnli` | 71.88 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus.mnli.tar.gz) | +| `roberta.base.shuffle.corpus_uniform.mnli` | 72.46 | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus_uniform.mnli.tar.gz) | + +## Citation + +```bibtex +@misc{sinha2021masked, + title={Masked Language Modeling and the Distributional Hypothesis: Order Word Matters Pre-training for Little}, + author={Koustuv Sinha and Robin Jia and Dieuwke Hupkes and Joelle Pineau and Adina Williams and Douwe Kiela}, + year={2021}, + eprint={2104.06644}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + +## Contact + +For questions and comments, please reach out to Koustuv Sinha (koustuv.sinha@mail.mcgill.ca). diff --git a/fairseq/examples/simultaneous_translation/README.md b/fairseq/examples/simultaneous_translation/README.md new file mode 100644 index 0000000..62a005e --- /dev/null +++ b/fairseq/examples/simultaneous_translation/README.md @@ -0,0 +1,5 @@ +# Simultaneous Translation +Examples of simultaneous translation in fairseq +- [English-to-Japanese text-to-text wait-k model](docs/enja-waitk.md) +- [English-to-Germen text-to-text monotonic multihead attention model](docs/ende-mma.md) +- [English-to-Germen speech-to-text simultaneous translation model](../speech_to_text/docs/simulst_mustc_example.md) diff --git a/fairseq/examples/simultaneous_translation/__init__.py b/fairseq/examples/simultaneous_translation/__init__.py new file mode 100644 index 0000000..5835316 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import models # noqa diff --git a/fairseq/examples/simultaneous_translation/docs/ende-mma.md b/fairseq/examples/simultaneous_translation/docs/ende-mma.md new file mode 100644 index 0000000..241d604 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/docs/ende-mma.md @@ -0,0 +1,74 @@ +# Simultaneous Machine Translation + +This directory contains the code for the paper [Monotonic Multihead Attention](https://openreview.net/forum?id=Hyg96gBKPS) + +## Prepare Data + +[Please follow the instructions to download and preprocess the WMT'15 En-De dataset.](https://github.com/pytorch/fairseq/tree/simulastsharedtask/examples/translation#prepare-wmt14en2desh) + +Another example of training an English to Japanese model can be found [here](docs/enja.md) + +## Training + +- MMA-IL + +```shell +fairseq-train \ + data-bin/wmt15_en_de_32k \ + --simul-type infinite_lookback \ + --user-dir $FAIRSEQ/example/simultaneous_translation \ + --mass-preservation \ + --criterion latency_augmented_label_smoothed_cross_entropy \ + --latency-weight-avg 0.1 \ + --max-update 50000 \ + --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \ + --optimizer adam --adam-betas '(0.9, 0.98)' \ + --lr-scheduler 'inverse_sqrt' \ + --warmup-init-lr 1e-7 --warmup-updates 4000 \ + --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\ + --dropout 0.3 \ + --label-smoothing 0.1\ + --max-tokens 3584 +``` + +- MMA-H + +```shell +fairseq-train \ + data-bin/wmt15_en_de_32k \ + --simul-type hard_aligned \ + --user-dir $FAIRSEQ/example/simultaneous_translation \ + --mass-preservation \ + --criterion latency_augmented_label_smoothed_cross_entropy \ + --latency-weight-var 0.1 \ + --max-update 50000 \ + --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \ + --optimizer adam --adam-betas '(0.9, 0.98)' \ + --lr-scheduler 'inverse_sqrt' \ + --warmup-init-lr 1e-7 --warmup-updates 4000 \ + --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\ + --dropout 0.3 \ + --label-smoothing 0.1\ + --max-tokens 3584 +``` + +- wait-k + +```shell +fairseq-train \ + data-bin/wmt15_en_de_32k \ + --simul-type wait-k \ + --waitk-lagging 3 \ + --user-dir $FAIRSEQ/example/simultaneous_translation \ + --mass-preservation \ + --criterion latency_augmented_label_smoothed_cross_entropy \ + --max-update 50000 \ + --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \ + --optimizer adam --adam-betas '(0.9, 0.98)' \ + --lr-scheduler 'inverse_sqrt' \ + --warmup-init-lr 1e-7 --warmup-updates 4000 \ + --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\ + --dropout 0.3 \ + --label-smoothing 0.1\ + --max-tokens 3584 +``` diff --git a/fairseq/examples/simultaneous_translation/docs/enja-waitk.md b/fairseq/examples/simultaneous_translation/docs/enja-waitk.md new file mode 100644 index 0000000..fb9d825 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/docs/enja-waitk.md @@ -0,0 +1,106 @@ +# An example of English to Japaneses Simultaneous Translation System + +This is an example of training and evaluating a transformer *wait-k* English to Japanese simultaneous text-to-text translation model. + +## Data Preparation +This section introduces the data preparation for training and evaluation. +If you only want to evaluate the model, please jump to [Inference & Evaluation](#inference-&-evaluation) + +For illustration, we only use the following subsets of the available data from [WMT20 news translation task](http://www.statmt.org/wmt20/translation-task.html), which results in 7,815,391 sentence pairs. +- News Commentary v16 +- Wiki Titles v3 +- WikiMatrix V1 +- Japanese-English Subtitle Corpus +- The Kyoto Free Translation Task Corpus + +We use WMT20 development data as development set. Training `transformer_vaswani_wmt_en_de_big` model on such amount of data will result in 17.3 BLEU with greedy search and 19.7 with beam (10) search. Notice that a better performance can be achieved with the full WMT training data. + +We use [sentencepiece](https://github.com/google/sentencepiece) toolkit to tokenize the data with a vocabulary size of 32000. +Additionally, we filtered out the sentences longer than 200 words after tokenization. +Assuming the tokenized text data is saved at `${DATA_DIR}`, +we prepare the data binary with the following command. + +```bash +fairseq-preprocess \ + --source-lang en --target-lang ja \ + --trainpref ${DATA_DIR}/train \ + --validpref ${DATA_DIR}/dev \ + --testpref ${DATA_DIR}/test \ + --destdir ${WMT20_ENJA_DATA_BIN} \ + --nwordstgt 32000 --nwordssrc 32000 \ + --workers 20 +``` + +## Simultaneous Translation Model Training +To train a wait-k `(k=10)` model. +```bash +fairseq-train ${WMT20_ENJA_DATA_BIN} \ + --save-dir ${SAVEDIR} + --simul-type waitk \ + --waitk-lagging 10 \ + --max-epoch 70 \ + --arch transformer_monotonic_vaswani_wmt_en_de_big \ + --optimizer adam \ + --adam-betas '(0.9, 0.98)' \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 1e-07 \ + --warmup-updates 4000 \ + --lr 0.0005 \ + --stop-min-lr 1e-09 \ + --clip-norm 10.0 \ + --dropout 0.3 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-tokens 3584 +``` +This command is for training on 8 GPUs. Equivalently, the model can be trained on one GPU with `--update-freq 8`. + +## Inference & Evaluation +First of all, install [SimulEval](https://github.com/facebookresearch/SimulEval) for evaluation. + +```bash +git clone https://github.com/facebookresearch/SimulEval.git +cd SimulEval +pip install -e . +``` + +The following command is for the evaluation. +Assuming the source and reference files are `${SRC_FILE}` and `${REF_FILE}`, the sentencepiece model file for English is saved at `${SRC_SPM_PATH}` + + +```bash +simuleval \ + --source ${SRC_FILE} \ + --target ${TGT_FILE} \ + --data-bin ${WMT20_ENJA_DATA_BIN} \ + --sacrebleu-tokenizer ja-mecab \ + --eval-latency-unit char \ + --no-space \ + --src-splitter-type sentencepiecemodel \ + --src-splitter-path ${SRC_SPM_PATH} \ + --agent ${FAIRSEQ}/examples/simultaneous_translation/agents/simul_trans_text_agent_enja.py \ + --model-path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --output ${OUTPUT} \ + --scores +``` + +The `--data-bin` should be the same in previous sections if you prepare the data from the scratch. +If only for evaluation, a prepared data directory can be found [here](https://dl.fbaipublicfiles.com/simultaneous_translation/wmt20_enja_medium_databin.tgz) and a pretrained checkpoint (wait-k=10 model) can be downloaded from [here](https://dl.fbaipublicfiles.com/simultaneous_translation/wmt20_enja_medium_wait10_ckpt.pt). + +The output should look like this: +```bash +{ + "Quality": { + "BLEU": 11.442253287568398 + }, + "Latency": { + "AL": 8.6587861866951, + "AP": 0.7863304776251316, + "DAL": 9.477850951194764 + } +} +``` +The latency is evaluated by characters (`--eval-latency-unit`) on the target side. The latency is evaluated with `sacrebleu` with `MeCab` tokenizer `--sacrebleu-tokenizer ja-mecab`. `--no-space` indicates that do not add space when merging the predicted words. + +If `--output ${OUTPUT}` option is used, the detailed log and scores will be stored under the `${OUTPUT}` directory. diff --git a/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py b/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py new file mode 100644 index 0000000..8f3c870 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py @@ -0,0 +1,226 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os + +from fairseq import checkpoint_utils, tasks +import sentencepiece as spm +import torch + +try: + from simuleval import READ_ACTION, WRITE_ACTION, DEFAULT_EOS + from simuleval.agents import TextAgent +except ImportError: + print("Please install simuleval 'pip install simuleval'") + + +BOS_PREFIX = "\u2581" + + +class SimulTransTextAgentJA(TextAgent): + """ + Simultaneous Translation + Text agent for Japanese + """ + def __init__(self, args): + + # Whether use gpu + self.gpu = getattr(args, "gpu", False) + + # Max len + self.max_len = args.max_len + + # Load Model + self.load_model_vocab(args) + + # build word splitter + self.build_word_splitter(args) + + self.eos = DEFAULT_EOS + + def initialize_states(self, states): + states.incremental_states = dict() + states.incremental_states["online"] = dict() + + def to_device(self, tensor): + if self.gpu: + return tensor.cuda() + else: + return tensor.cpu() + + def load_model_vocab(self, args): + + filename = args.model_path + if not os.path.exists(filename): + raise IOError("Model file not found: {}".format(filename)) + + state = checkpoint_utils.load_checkpoint_to_cpu(filename) + + task_args = state["cfg"]["task"] + task_args.data = args.data_bin + + task = tasks.setup_task(task_args) + + # build model for ensemble + state["cfg"]["model"].load_pretrained_encoder_from = None + state["cfg"]["model"].load_pretrained_decoder_from = None + + self.model = task.build_model(state["cfg"]["model"]) + self.model.load_state_dict(state["model"], strict=True) + self.model.eval() + self.model.share_memory() + + if self.gpu: + self.model.cuda() + + # Set dictionary + self.dict = {} + self.dict["tgt"] = task.target_dictionary + self.dict["src"] = task.source_dictionary + + @staticmethod + def add_args(parser): + # fmt: off + parser.add_argument('--model-path', type=str, required=True, + help='path to your pretrained model.') + parser.add_argument("--data-bin", type=str, required=True, + help="Path of data binary") + parser.add_argument("--max-len", type=int, default=100, + help="Max length of translation") + parser.add_argument("--tgt-splitter-type", type=str, default="SentencePiece", + help="Subword splitter type for target text.") + parser.add_argument("--tgt-splitter-path", type=str, default=None, + help="Subword splitter model path for target text.") + parser.add_argument("--src-splitter-type", type=str, default="SentencePiece", + help="Subword splitter type for source text.") + parser.add_argument("--src-splitter-path", type=str, default=None, + help="Subword splitter model path for source text.") + # fmt: on + return parser + + def build_word_splitter(self, args): + self.spm = {} + for lang in ['src', 'tgt']: + if getattr(args, f'{lang}_splitter_type', None): + path = getattr(args, f'{lang}_splitter_path', None) + if path: + self.spm[lang] = spm.SentencePieceProcessor() + self.spm[lang].Load(path) + + def segment_to_units(self, segment, states): + # Split a full word (segment) into subwords (units) + return self.spm['src'].EncodeAsPieces(segment) + + def update_model_encoder(self, states): + if len(states.units.source) == 0: + return + + src_indices = [ + self.dict['src'].index(x) + for x in states.units.source.value + ] + + if states.finish_read(): + # Append the eos index when the prediction is over + src_indices += [self.dict["tgt"].eos_index] + + src_indices = self.to_device( + torch.LongTensor(src_indices).unsqueeze(0) + ) + src_lengths = self.to_device( + torch.LongTensor([src_indices.size(1)]) + ) + + states.encoder_states = self.model.encoder(src_indices, src_lengths) + + torch.cuda.empty_cache() + + def update_states_read(self, states): + # Happens after a read action. + self.update_model_encoder(states) + + def units_to_segment(self, units, states): + # Merge sub words (units) to full word (segment). + # For Japanese, we can directly send + # the untokenized token to server except the BOS token + # with following option + # --sacrebleu-tokenizer MeCab + # --eval-latency-unit char + # --no-space + token = units.value.pop() + + if ( + token == self.dict["tgt"].eos_word + or len(states.segments.target) > self.max_len + ): + return DEFAULT_EOS + + if BOS_PREFIX == token: + return None + if token[0] == BOS_PREFIX: + return token[1:] + else: + return token + + def policy(self, states): + + if not getattr(states, "encoder_states", None): + # No encoder states, read a token first + return READ_ACTION + + # encode previous predicted target tokens + tgt_indices = self.to_device( + torch.LongTensor( + [self.model.decoder.dictionary.eos()] + + [ + self.dict['tgt'].index(x) + for x in states.units.target.value + if x is not None + ] + ).unsqueeze(0) + ) + + # Current steps + states.incremental_states["steps"] = { + "src": states.encoder_states["encoder_out"][0].size(0), + "tgt": 1 + len(states.units.target), + } + + # Online only means the reading is not finished + states.incremental_states["online"]["only"] = ( + torch.BoolTensor([not states.finish_read()]) + ) + + x, outputs = self.model.decoder.forward( + prev_output_tokens=tgt_indices, + encoder_out=states.encoder_states, + incremental_state=states.incremental_states, + ) + + states.decoder_out = x + + torch.cuda.empty_cache() + + if outputs.action == 0: + return READ_ACTION + else: + return WRITE_ACTION + + def predict(self, states): + # Predict target token from decoder states + decoder_states = states.decoder_out + + lprobs = self.model.get_normalized_probs( + [decoder_states[:, -1:]], log_probs=True + ) + + index = lprobs.argmax(dim=-1)[0, 0].item() + + if index != self.dict['tgt'].eos_index: + token = self.dict['tgt'].string([index]) + else: + token = self.dict['tgt'].eos_word + + return token diff --git a/fairseq/examples/simultaneous_translation/models/__init__.py b/fairseq/examples/simultaneous_translation/models/__init__.py new file mode 100644 index 0000000..257a965 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/models/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + model_name = file[: file.find(".py")] + importlib.import_module( + "examples.simultaneous_translation.models." + model_name + ) diff --git a/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py b/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py new file mode 100644 index 0000000..4a26422 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py @@ -0,0 +1,204 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +from fairseq import checkpoint_utils +from fairseq.models import ( + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_text import ( + ConvTransformerModel, + convtransformer_espnet, + ConvTransformerEncoder, +) +from fairseq.models.speech_to_text.modules.augmented_memory_attention import ( + augmented_memory, + SequenceEncoder, + AugmentedMemoryConvTransformerEncoder, +) + +from torch import nn, Tensor +from typing import Dict, List +from fairseq.models.speech_to_text.modules.emformer import NoSegAugmentedMemoryTransformerEncoderLayer + +@register_model("convtransformer_simul_trans") +class SimulConvTransformerModel(ConvTransformerModel): + """ + Implementation of the paper: + + SimulMT to SimulST: Adapting Simultaneous Text Translation to + End-to-End Simultaneous Speech Translation + + https://www.aclweb.org/anthology/2020.aacl-main.58.pdf + """ + + @staticmethod + def add_args(parser): + super(SimulConvTransformerModel, SimulConvTransformerModel).add_args(parser) + parser.add_argument( + "--train-monotonic-only", + action="store_true", + default=False, + help="Only train monotonic attention", + ) + + @classmethod + def build_decoder(cls, args, task, embed_tokens): + tgt_dict = task.tgt_dict + + from examples.simultaneous_translation.models.transformer_monotonic_attention import ( + TransformerMonotonicDecoder, + ) + + decoder = TransformerMonotonicDecoder(args, tgt_dict, embed_tokens) + + if getattr(args, "load_pretrained_decoder_from", None): + decoder = checkpoint_utils.load_pretrained_component_from_model( + component=decoder, checkpoint=args.load_pretrained_decoder_from + ) + return decoder + + +@register_model_architecture( + "convtransformer_simul_trans", "convtransformer_simul_trans_espnet" +) +def convtransformer_simul_trans_espnet(args): + convtransformer_espnet(args) + + +@register_model("convtransformer_augmented_memory") +@augmented_memory +class AugmentedMemoryConvTransformerModel(SimulConvTransformerModel): + @classmethod + def build_encoder(cls, args): + encoder = SequenceEncoder(args, AugmentedMemoryConvTransformerEncoder(args)) + + if getattr(args, "load_pretrained_encoder_from", None) is not None: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=args.load_pretrained_encoder_from + ) + + return encoder + + +@register_model_architecture( + "convtransformer_augmented_memory", "convtransformer_augmented_memory" +) +def augmented_memory_convtransformer_espnet(args): + convtransformer_espnet(args) + + +# ============================================================================ # +# Convtransformer +# with monotonic attention decoder +# with emformer encoder +# ============================================================================ # + + +class ConvTransformerEmformerEncoder(ConvTransformerEncoder): + def __init__(self, args): + super().__init__(args) + stride = self.conv_layer_stride(args) + trf_left_context = args.segment_left_context // stride + trf_right_context = args.segment_right_context // stride + context_config = [trf_left_context, trf_right_context] + self.transformer_layers = nn.ModuleList( + [ + NoSegAugmentedMemoryTransformerEncoderLayer( + input_dim=args.encoder_embed_dim, + num_heads=args.encoder_attention_heads, + ffn_dim=args.encoder_ffn_embed_dim, + num_layers=args.encoder_layers, + dropout_in_attn=args.dropout, + dropout_on_attn=args.dropout, + dropout_on_fc1=args.dropout, + dropout_on_fc2=args.dropout, + activation_fn=args.activation_fn, + context_config=context_config, + segment_size=args.segment_length, + max_memory_size=args.max_memory_size, + scaled_init=True, # TODO: use constant for now. + tanh_on_mem=args.amtrf_tanh_on_mem, + ) + ] + ) + self.conv_transformer_encoder = ConvTransformerEncoder(args) + + def forward(self, src_tokens, src_lengths): + encoder_out: Dict[str, List[Tensor]] = self.conv_transformer_encoder(src_tokens, src_lengths.to(src_tokens.device)) + output = encoder_out["encoder_out"][0] + encoder_padding_masks = encoder_out["encoder_padding_mask"] + + return { + "encoder_out": [output], + # This is because that in the original implementation + # the output didn't consider the last segment as right context. + "encoder_padding_mask": [encoder_padding_masks[0][:, : output.size(0)]] if len(encoder_padding_masks) > 0 + else [], + "encoder_embedding": [], + "encoder_states": [], + "src_tokens": [], + "src_lengths": [], + } + + @staticmethod + def conv_layer_stride(args): + # TODO: make it configurable from the args + return 4 + + +@register_model("convtransformer_emformer") +class ConvtransformerEmformer(SimulConvTransformerModel): + @staticmethod + def add_args(parser): + super(ConvtransformerEmformer, ConvtransformerEmformer).add_args(parser) + + parser.add_argument( + "--segment-length", + type=int, + metavar="N", + help="length of each segment (not including left context / right context)", + ) + parser.add_argument( + "--segment-left-context", + type=int, + help="length of left context in a segment", + ) + parser.add_argument( + "--segment-right-context", + type=int, + help="length of right context in a segment", + ) + parser.add_argument( + "--max-memory-size", + type=int, + default=-1, + help="Right context for the segment.", + ) + parser.add_argument( + "--amtrf-tanh-on-mem", + default=False, + action="store_true", + help="whether to use tanh on memory vector", + ) + + @classmethod + def build_encoder(cls, args): + encoder = ConvTransformerEmformerEncoder(args) + if getattr(args, "load_pretrained_encoder_from", None): + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=args.load_pretrained_encoder_from + ) + return encoder + + +@register_model_architecture( + "convtransformer_emformer", + "convtransformer_emformer", +) +def convtransformer_emformer_base(args): + convtransformer_espnet(args) diff --git a/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py b/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py new file mode 100644 index 0000000..7b9414b --- /dev/null +++ b/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py @@ -0,0 +1,302 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, NamedTuple, Optional + +import torch +import torch.nn as nn +from examples.simultaneous_translation.modules.monotonic_transformer_layer import ( + TransformerMonotonicDecoderLayer, + TransformerMonotonicEncoderLayer, +) +from fairseq.models import ( + register_model, + register_model_architecture, +) +from fairseq.models.transformer import ( + TransformerModel, + TransformerEncoder, + TransformerDecoder, + base_architecture, + transformer_iwslt_de_en, + transformer_vaswani_wmt_en_de_big, + tiny_architecture +) +from torch import Tensor + +DEFAULT_MAX_SOURCE_POSITIONS = 1024 +DEFAULT_MAX_TARGET_POSITIONS = 1024 +READ_ACTION = 0 +WRITE_ACTION = 1 + +TransformerMonotonicDecoderOut = NamedTuple( + "TransformerMonotonicDecoderOut", + [ + ("action", int), + ("p_choose", Optional[Tensor]), + ("attn_list", Optional[List[Optional[Dict[str, Tensor]]]]), + ("encoder_out", Optional[Dict[str, List[Tensor]]]), + ("encoder_padding_mask", Optional[Tensor]), + ], +) + + +@register_model("transformer_unidirectional") +class TransformerUnidirectionalModel(TransformerModel): + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return TransformerMonotonicEncoder(args, src_dict, embed_tokens) + + +@register_model("transformer_monotonic") +class TransformerModelSimulTrans(TransformerModel): + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return TransformerMonotonicEncoder(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return TransformerMonotonicDecoder(args, tgt_dict, embed_tokens) + + +class TransformerMonotonicEncoder(TransformerEncoder): + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens) + + self.dictionary = dictionary + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + TransformerMonotonicEncoderLayer(args) + for i in range(args.encoder_layers) + ] + ) + + +class TransformerMonotonicDecoder(TransformerDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__(args, dictionary, embed_tokens, no_encoder_attn=False) + + self.dictionary = dictionary + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + TransformerMonotonicDecoderLayer(args) + for _ in range(args.decoder_layers) + ] + ) + self.policy_criterion = getattr(args, "policy_criterion", "any") + self.num_updates = None + + def set_num_updates(self, num_updates): + self.num_updates = num_updates + + def pre_attention( + self, + prev_output_tokens, + encoder_out_dict: Dict[str, List[Tensor]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + positions = ( + self.embed_positions( + prev_output_tokens, + incremental_state=incremental_state, + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = encoder_out_dict["encoder_out"][0] + + if "encoder_padding_mask" in encoder_out_dict: + encoder_padding_mask = ( + encoder_out_dict["encoder_padding_mask"][0] + if encoder_out_dict["encoder_padding_mask"] + and len(encoder_out_dict["encoder_padding_mask"]) > 0 + else None + ) + else: + encoder_padding_mask = None + + return x, encoder_out, encoder_padding_mask + + def post_attention(self, x): + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x + + def clean_cache( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + end_id: Optional[int] = None, + ): + """ + Clean cache in the monotonic layers. + The cache is generated because of a forward pass of decoder has run but no prediction, + so that the self attention key value in decoder is written in the incremental state. + end_id is the last idx of the layers + """ + if end_id is None: + end_id = len(self.layers) + + for index, layer in enumerate(self.layers): + if index < end_id: + layer.prune_incremental_state(incremental_state) + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, # unused + alignment_layer: Optional[int] = None, # unused + alignment_heads: Optional[int] = None, # unsed + ): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + # incremental_state = None + assert encoder_out is not None + (x, encoder_outs, encoder_padding_mask) = self.pre_attention( + prev_output_tokens, encoder_out, incremental_state + ) + attn = None + inner_states = [x] + attn_list: List[Optional[Dict[str, Tensor]]] = [] + + p_choose = torch.tensor([1.0]) + + for i, layer in enumerate(self.layers): + + x, attn, _ = layer( + x=x, + encoder_out=encoder_outs, + encoder_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + self_attn_mask=self.buffered_future_mask(x) + if incremental_state is None + else None, + ) + + inner_states.append(x) + attn_list.append(attn) + + if incremental_state is not None: + if_online = incremental_state["online"]["only"] + assert if_online is not None + if if_online.to(torch.bool): + # Online indicates that the encoder states are still changing + assert attn is not None + if self.policy_criterion == "any": + # Any head decide to read than read + head_read = layer.encoder_attn._get_monotonic_buffer(incremental_state)["head_read"] + assert head_read is not None + if head_read.any(): + # We need to prune the last self_attn saved_state + # if model decide not to read + # otherwise there will be duplicated saved_state + self.clean_cache(incremental_state, i + 1) + + return x, TransformerMonotonicDecoderOut( + action=0, + p_choose=p_choose, + attn_list=None, + encoder_out=None, + encoder_padding_mask=None, + ) + + x = self.post_attention(x) + + return x, TransformerMonotonicDecoderOut( + action=1, + p_choose=p_choose, + attn_list=attn_list, + encoder_out=encoder_out, + encoder_padding_mask=encoder_padding_mask, + ) + + +@register_model_architecture("transformer_monotonic", "transformer_monotonic") +def base_monotonic_architecture(args): + base_architecture(args) + args.encoder_unidirectional = getattr(args, "encoder_unidirectional", False) + + +@register_model_architecture( + "transformer_monotonic", "transformer_monotonic_iwslt_de_en" +) +def transformer_monotonic_iwslt_de_en(args): + transformer_iwslt_de_en(args) + base_monotonic_architecture(args) + + +# parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017) +@register_model_architecture( + "transformer_monotonic", "transformer_monotonic_vaswani_wmt_en_de_big" +) +def transformer_monotonic_vaswani_wmt_en_de_big(args): + transformer_vaswani_wmt_en_de_big(args) + + +@register_model_architecture( + "transformer_monotonic", "transformer_monotonic_vaswani_wmt_en_fr_big" +) +def transformer_monotonic_vaswani_wmt_en_fr_big(args): + transformer_monotonic_vaswani_wmt_en_fr_big(args) + + +@register_model_architecture( + "transformer_unidirectional", "transformer_unidirectional_iwslt_de_en" +) +def transformer_unidirectional_iwslt_de_en(args): + transformer_iwslt_de_en(args) + + +@register_model_architecture("transformer_monotonic", "transformer_monotonic_tiny") +def monotonic_tiny_architecture(args): + tiny_architecture(args) + base_monotonic_architecture(args) diff --git a/fairseq/examples/simultaneous_translation/modules/__init__.py b/fairseq/examples/simultaneous_translation/modules/__init__.py new file mode 100644 index 0000000..f5ea180 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/modules/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import importlib +from fairseq import registry + +( + build_monotonic_attention, + register_monotonic_attention, + MONOTONIC_ATTENTION_REGISTRY, + _, +) = registry.setup_registry("--simul-type") + +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + model_name = file[: file.find(".py")] + importlib.import_module( + "examples.simultaneous_translation.modules." + model_name + ) diff --git a/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py b/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py new file mode 100644 index 0000000..3991414 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py @@ -0,0 +1,190 @@ +from functools import partial + +import torch +from torch import Tensor +import math +import torch.nn.functional as F + +from . import register_monotonic_attention +from .monotonic_multihead_attention import ( + MonotonicAttention, + MonotonicInfiniteLookbackAttention, + WaitKAttention +) +from typing import Dict, Optional + + +def fixed_pooling_monotonic_attention(monotonic_attention): + def create_model(monotonic_attention, klass): + class FixedStrideMonotonicAttention(monotonic_attention): + def __init__(self, args): + self.waitk_lagging = 0 + self.num_heads = 0 + self.noise_mean = 0.0 + self.noise_var = 0.0 + super().__init__(args) + self.pre_decision_type = args.fixed_pre_decision_type + self.pre_decision_ratio = args.fixed_pre_decision_ratio + self.pre_decision_pad_threshold = args.fixed_pre_decision_pad_threshold + assert self.pre_decision_ratio > 1 + + if args.fixed_pre_decision_type == "average": + self.pooling_layer = torch.nn.AvgPool1d( + kernel_size=self.pre_decision_ratio, + stride=self.pre_decision_ratio, + ceil_mode=True, + ) + elif args.fixed_pre_decision_type == "last": + + def last(key): + if key.size(2) < self.pre_decision_ratio: + return key + else: + k = key[ + :, + :, + self.pre_decision_ratio - 1:: self.pre_decision_ratio, + ].contiguous() + if key.size(-1) % self.pre_decision_ratio != 0: + k = torch.cat([k, key[:, :, -1:]], dim=-1).contiguous() + return k + + self.pooling_layer = last + else: + raise NotImplementedError + + @staticmethod + def add_args(parser): + super( + FixedStrideMonotonicAttention, FixedStrideMonotonicAttention + ).add_args(parser) + parser.add_argument( + "--fixed-pre-decision-ratio", + type=int, + required=True, + help=( + "Ratio for the fixed pre-decision," + "indicating how many encoder steps will start" + "simultaneous decision making process." + ), + ) + parser.add_argument( + "--fixed-pre-decision-type", + default="average", + choices=["average", "last"], + help="Pooling type", + ) + parser.add_argument( + "--fixed-pre-decision-pad-threshold", + type=float, + default=0.3, + help="If a part of the sequence has pad" + ",the threshold the pooled part is a pad.", + ) + + def insert_zeros(self, x): + bsz_num_heads, tgt_len, src_len = x.size() + stride = self.pre_decision_ratio + weight = F.pad(torch.ones(1, 1, 1).to(x), (stride - 1, 0)) + x_upsample = F.conv_transpose1d( + x.view(-1, src_len).unsqueeze(1), + weight, + stride=stride, + padding=0, + ) + return x_upsample.squeeze(1).view(bsz_num_heads, tgt_len, -1) + + def p_choose( + self, + query: Optional[Tensor], + key: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + assert key is not None + assert query is not None + src_len = key.size(0) + tgt_len = query.size(0) + batch_size = query.size(1) + + key_pool = self.pooling_layer(key.transpose(0, 2)).transpose(0, 2) + + if key_padding_mask is not None: + key_padding_mask_pool = ( + self.pooling_layer(key_padding_mask.unsqueeze(0).float()) + .squeeze(0) + .gt(self.pre_decision_pad_threshold) + ) + # Make sure at least one element is not pad + key_padding_mask_pool[:, 0] = 0 + else: + key_padding_mask_pool = None + + if incremental_state is not None: + # The floor instead of ceil is used for inference + # But make sure the length key_pool at least 1 + if ( + max(1, math.floor(key.size(0) / self.pre_decision_ratio)) + ) < key_pool.size(0): + key_pool = key_pool[:-1] + if key_padding_mask_pool is not None: + key_padding_mask_pool = key_padding_mask_pool[:-1] + + p_choose_pooled = self.p_choose_from_qk( + query, + key_pool, + key_padding_mask_pool, + incremental_state=incremental_state, + ) + + # Upsample, interpolate zeros + p_choose = self.insert_zeros(p_choose_pooled) + + if p_choose.size(-1) < src_len: + # Append zeros if the upsampled p_choose is shorter than src_len + p_choose = torch.cat( + [ + p_choose, + torch.zeros( + p_choose.size(0), + tgt_len, + src_len - p_choose.size(-1) + ).to(p_choose) + ], + dim=2 + ) + else: + # can be larger than src_len because we used ceil before + p_choose = p_choose[:, :, :src_len] + p_choose[:, :, -1] = p_choose_pooled[:, :, -1] + + assert list(p_choose.size()) == [ + batch_size * self.num_heads, + tgt_len, + src_len, + ] + + return p_choose + + FixedStrideMonotonicAttention.__name__ = klass.__name__ + return FixedStrideMonotonicAttention + + return partial(create_model, monotonic_attention) + + +@register_monotonic_attention("waitk_fixed_pre_decision") +@fixed_pooling_monotonic_attention(WaitKAttention) +class WaitKAttentionFixedStride: + pass + + +@register_monotonic_attention("hard_aligned_fixed_pre_decision") +@fixed_pooling_monotonic_attention(MonotonicAttention) +class MonotonicAttentionFixedStride: + pass + + +@register_monotonic_attention("infinite_lookback_fixed_pre_decision") +@fixed_pooling_monotonic_attention(MonotonicInfiniteLookbackAttention) +class MonotonicInfiniteLookbackAttentionFixedStride: + pass diff --git a/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py b/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py new file mode 100644 index 0000000..06d20d8 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py @@ -0,0 +1,520 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +from torch import Tensor +import torch.nn as nn + +from examples.simultaneous_translation.utils.p_choose_strategy import ( + learnable_p_choose, + waitk_p_choose +) + +from examples.simultaneous_translation.utils.monotonic_attention import ( + expected_alignment_from_p_choose, + expected_soft_attention, + mass_preservation, +) +from fairseq.modules import MultiheadAttention + +from . import register_monotonic_attention +from typing import Dict, Optional + + +@register_monotonic_attention("hard_aligned") +class MonotonicAttention(MultiheadAttention): + """ + Abstract class of monotonic attentions + """ + k_in_proj: Dict[str, nn.Linear] + q_in_proj: Dict[str, nn.Linear] + + def __init__(self, args): + super().__init__( + embed_dim=args.decoder_embed_dim, + num_heads=args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + ) + + self.soft_attention = False + + self.eps = getattr(args, "attention_eps", True) + self.mass_preservation = getattr(args, "mass_preservation", True) + + self.noise_type = args.noise_type + self.noise_mean = args.noise_mean + self.noise_var = args.noise_var + + self.energy_bias_init = args.energy_bias_init + self.energy_bias = ( + nn.Parameter(self.energy_bias_init * torch.ones([1])) + if args.energy_bias is True + else 0 + ) + + self.k_in_proj = {"monotonic": self.k_proj} + self.q_in_proj = {"monotonic": self.q_proj} + self.chunk_size = None + + @staticmethod + def add_args(parser): + # fmt: off + parser.add_argument('--no-mass-preservation', action="store_false", + dest="mass_preservation", + help='Do not stay on the last token when decoding') + parser.add_argument('--mass-preservation', action="store_true", + dest="mass_preservation", + help='Stay on the last token when decoding') + parser.set_defaults(mass_preservation=True) + parser.add_argument('--noise-var', type=float, default=1.0, + help='Variance of discretness noise') + parser.add_argument('--noise-mean', type=float, default=0.0, + help='Mean of discretness noise') + parser.add_argument('--noise-type', type=str, default="flat", + help='Type of discretness noise') + parser.add_argument('--energy-bias', action="store_true", + default=False, + help='Bias for energy') + parser.add_argument('--energy-bias-init', type=float, default=-2.0, + help='Initial value of the bias for energy') + parser.add_argument('--attention-eps', type=float, default=1e-6, + help='Epsilon when calculating expected attention') + + def energy_from_qk( + self, + query: Tensor, + key: Tensor, + energy_type: str, + key_padding_mask: Optional[Tensor] = None, + bias: int = 0 + ): + """ + Compute energy from query and key + q_func_value is a tuple looks like + (q_proj_func, q_tensor) + q_tensor size: bsz, tgt_len, emb_dim + k_tensor size: bsz, src_len, emb_dim + key_padding_mask size: bsz, src_len + attn_mask: bsz, src_len + """ + + length, bsz, _ = query.size() + q = self.q_in_proj[energy_type].forward(query) + q = ( + q.contiguous() + .view(length, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + q = q * self.scaling + length, bsz, _ = key.size() + k = self.k_in_proj[energy_type].forward(key) + k = ( + k.contiguous() + .view(length, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + energy = torch.bmm(q, k.transpose(1, 2)) + bias + + if key_padding_mask is not None: + energy = energy.masked_fill( + key_padding_mask.unsqueeze(1).to(torch.bool), + - float("inf") + ) + + return energy + + def p_choose_from_qk(self, query, key, key_padding_mask, incremental_states=None): + monotonic_energy = self.energy_from_qk( + query, + key, + "monotonic", + key_padding_mask=key_padding_mask, + bias=self.energy_bias, + ) + + p_choose = learnable_p_choose( + monotonic_energy, + self.noise_mean, + self.noise_var, + self.training + ) + return p_choose + + def p_choose(self, query, key, key_padding_mask, incremental_states=None): + return self.p_choose_from_qk(self, query, key, key_padding_mask) + + def monotonic_attention_process_infer( + self, + query: Optional[Tensor], + key: Optional[Tensor], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + ): + """ + Monotonic attention at inference time + Notice that this function is designed for simuleval not sequence_generator + """ + assert query is not None + assert key is not None + + if query.size(1) != 1: + raise RuntimeError( + "Simultaneous translation models don't support batch decoding." + ) + # 1. compute stepwise probability + p_choose = self.p_choose( + query, key, None, incremental_state + ).squeeze(1) + + # 2. Compute the alpha + src_len = key.size(0) + # Maximum steps allows in this iteration + max_steps = src_len - 1 if self.mass_preservation else src_len + monotonic_cache = self._get_monotonic_buffer(incremental_state) + # Step for each head + monotonic_step = monotonic_cache.get( + 'head_step', + p_choose.new_zeros(1, self.num_heads).long() + ) + assert monotonic_step is not None + finish_read = monotonic_step.eq(max_steps) + p_choose_i = torch.tensor(1) + + while finish_read.sum().item() < self.num_heads: + # p_choose: self.num_heads, src_len + # only choose the p at monotonic steps + # p_choose_i: 1, self.num_heads + p_choose_i = ( + p_choose.gather( + 1, + monotonic_step + .clamp(0, src_len - 1), + ) + ) + + read_one_step = ( + (p_choose_i < 0.5) + .type_as(monotonic_step) + .masked_fill(finish_read, 0) + ) + # 1 x bsz + # sample actions on unfinished seq + # 0 means stay, finish reading + # 1 means leave, continue reading + + monotonic_step += read_one_step + + finish_read = monotonic_step.eq(max_steps) | (read_one_step == 0) + + # p_choose at last steps + p_choose_i = ( + p_choose.gather( + 1, + monotonic_step + .clamp(0, src_len - 1), + ) + ) + + monotonic_cache["head_step"] = monotonic_step + # Whether a head is looking for new input + monotonic_cache["head_read"] = ( + monotonic_step.eq(max_steps) & (p_choose_i < 0.5) + ) + self._set_monotonic_buffer(incremental_state, monotonic_cache) + + # 2. Update alpha + alpha = ( + p_choose + .new_zeros([self.num_heads, src_len]) + .scatter( + 1, + (monotonic_step) + .view(self.num_heads, 1).clamp(0, src_len - 1), + 1 + ) + ) + + if not self.mass_preservation: + alpha = alpha.masked_fill( + (monotonic_step == max_steps) + .view(self.num_heads, 1), + 0 + ) + + # 4. Compute Beta + if self.soft_attention: + monotonic_step = monotonic_step.t() + beta_mask = torch.arange(src_len).expand_as(alpha).gt(monotonic_step).unsqueeze(1) + # If it's soft attention just do softmax on current context + soft_energy = self.energy_from_qk( + query, + key, + "soft" + ) + beta = torch.nn.functional.softmax( + soft_energy.masked_fill(beta_mask, -float("inf")), dim=-1 + ) + # It could happen that a head doesn't move at all + beta = beta.masked_fill(monotonic_step.eq(0).unsqueeze(1), 0) + else: + # If it's hard attention just select the last state + beta = alpha + + return p_choose, alpha, beta + + def monotonic_attention_process_train( + self, + query: Optional[Tensor], + key: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + ): + """ + Calculating monotonic attention process for training + Including: + stepwise probability: p_choose + expected hard alignment: alpha + expected soft attention: beta + """ + assert query is not None + assert key is not None + + # 1. compute stepwise probability + p_choose = self.p_choose_from_qk(query, key, key_padding_mask) + + # 2. compute expected_alignment + alpha = expected_alignment_from_p_choose( + p_choose, + key_padding_mask, + eps=self.eps, + ) + + if self.mass_preservation: + alpha = mass_preservation( + alpha, key_padding_mask + ) + + # 3. compute expected soft attention (soft aligned model only) + if self.soft_attention: + soft_energy = self.energy_from_qk( + query, + key, + "soft", + key_padding_mask=None, + ) + + beta = expected_soft_attention( + alpha, + soft_energy, + padding_mask=key_padding_mask, + chunk_size=self.chunk_size, + eps=self.eps, + ) + else: + beta = alpha + soft_energy = alpha + + return p_choose, alpha, beta, soft_energy + + def forward( + self, + query: Optional[Tensor], + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + attn_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, static_kv: bool = False, need_head_weights: bool = False, + ): + """ + query: tgt_len, bsz, embed_dim + key: src_len, bsz, embed_dim + value: src_len, bsz, embed_dim + """ + + assert attn_mask is None + assert query is not None + assert key is not None + assert value is not None + + tgt_len, bsz, embed_dim = query.size() + src_len = value.size(0) + + if key_padding_mask is not None: + assert not key_padding_mask[:, 0].any(), ( + "Only right padding is supported." + ) + key_padding_mask = ( + key_padding_mask + .unsqueeze(1) + .expand([bsz, self.num_heads, src_len]) + .contiguous() + .view(-1, src_len) + ) + + if incremental_state is not None: + # Inference + ( + p_choose, alpha, beta + ) = self.monotonic_attention_process_infer( + query, key, incremental_state + ) + soft_energy = beta + else: + # Train + ( + p_choose, alpha, beta, soft_energy + ) = self.monotonic_attention_process_train( + query, key, key_padding_mask + ) + + v = self.v_proj(value) + length, bsz, _ = v.size() + v = ( + v.contiguous() + .view(length, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + attn = torch.bmm(beta.type_as(v), v) + + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + + attn = self.out_proj(attn) + + p_choose = p_choose.view(bsz, self.num_heads, tgt_len, src_len) + alpha = alpha.view(bsz, self.num_heads, tgt_len, src_len) + beta = beta.view(bsz, self.num_heads, tgt_len, src_len) + + return attn, { + "p_choose": p_choose, + "alpha": alpha, + "beta": beta, + "soft_energy": soft_energy, + } + + def _get_monotonic_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]): + maybe_incremental_state = self.get_incremental_state( + incremental_state, + 'monotonic', + ) + if maybe_incremental_state is None: + typed_empty_dict: Dict[str, Optional[Tensor]] = {} + return typed_empty_dict + else: + return maybe_incremental_state + + def _set_monotonic_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], buffer: Dict[str, Optional[Tensor]]): + self.set_incremental_state( + incremental_state, + 'monotonic', + buffer, + ) + + +@register_monotonic_attention("infinite_lookback") +class MonotonicInfiniteLookbackAttention( + MonotonicAttention +): + def __init__(self, args): + super().__init__(args) + self.soft_attention = True + self.init_soft_attention() + + def init_soft_attention(self): + self.k_proj_soft = nn.Linear(self.kdim, self.embed_dim, bias=True) + self.q_proj_soft = nn.Linear(self.embed_dim, self.embed_dim, bias=True) + self.k_in_proj["soft"] = self.k_proj_soft + self.q_in_proj["soft"] = self.q_proj_soft + + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_( + self.k_in_proj["soft"].weight, gain=1 / math.sqrt(2) + ) + nn.init.xavier_uniform_( + self.q_in_proj["soft"].weight, gain=1 / math.sqrt(2) + ) + else: + nn.init.xavier_uniform_(self.k_in_proj["soft"].weight) + nn.init.xavier_uniform_(self.q_in_proj["soft"].weight) + + +@register_monotonic_attention("waitk") +class WaitKAttention( + MonotonicInfiniteLookbackAttention +): + """ + STACL: Simultaneous Translation with Implicit Anticipation and + Controllable Latency using Prefix-to-Prefix Framework + https://www.aclweb.org/anthology/P19-1289/ + """ + def __init__(self, args): + super().__init__(args) + self.q_in_proj["soft"] = self.q_in_proj["monotonic"] + self.k_in_proj["soft"] = self.k_in_proj["monotonic"] + + self.waitk_lagging = args.waitk_lagging + assert self.waitk_lagging > 0, ( + f"Lagging has to been larger than 0, get {self.waitk_lagging}." + ) + + @staticmethod + def add_args(parser): + super( + MonotonicInfiniteLookbackAttention, + MonotonicInfiniteLookbackAttention + ).add_args(parser) + + parser.add_argument( + "--waitk-lagging", type=int, required=True, help="Wait K lagging" + ) + + def p_choose_from_qk( + self, + query: Optional[Tensor], + key: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + assert query is not None + assert key is not None + + p_choose = waitk_p_choose( + tgt_len=query.size(0), + src_len=key.size(0), + bsz=query.size(1) * self.num_heads, + waitk_lagging=self.waitk_lagging, + key_padding_mask=key_padding_mask, + incremental_state=incremental_state, + ) + + return p_choose.to(query) + + +@register_monotonic_attention("chunkwise") +class ChunkwiseAttention( + MonotonicInfiniteLookbackAttention +): + def __init__(self, args): + super().__init__(args) + self.chunk_size = args.mocha_chunk_size + assert self.chunk_size > 1 + + @staticmethod + def add_args(parser): + super( + MonotonicInfiniteLookbackAttention + ).add_args(parser) + + parser.add_argument( + "--mocha-chunk-size", type=int, + required=True, help="Mocha chunk size" + ) diff --git a/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py b/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py new file mode 100644 index 0000000..94bd71f --- /dev/null +++ b/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py @@ -0,0 +1,182 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer + +from . import build_monotonic_attention + +from typing import Dict, Optional, List + +from torch import Tensor +import torch + + +class TransformerMonotonicEncoderLayer(TransformerEncoderLayer): + def forward(self, x, encoder_padding_mask): + seq_len, _, _ = x.size() + attn_mask = x.new_ones([seq_len, seq_len]).triu(1) + attn_mask = attn_mask.masked_fill(attn_mask.bool(), float("-inf")) + return super().forward(x, encoder_padding_mask, attn_mask) + + +class TransformerMonotonicDecoderLayer(TransformerDecoderLayer): + def __init__(self, args): + super().__init__(args) + + assert args.simul_type is not None, "A --simul-type is needed." + self.encoder_attn = build_monotonic_attention(args) + + def prune_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ): + input_buffer = self.self_attn._get_input_buffer(incremental_state) + for key in ["prev_key", "prev_value"]: + input_buffer_key = input_buffer[key] + assert input_buffer_key is not None + if input_buffer_key.size(2) > 1: + input_buffer[key] = input_buffer_key[:, :, :-1, :] + else: + typed_empty_dict: Dict[str, Optional[Tensor]] = {} + input_buffer = typed_empty_dict + break + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, input_buffer) + + def forward( + self, + x, + encoder_out: Optional[Tensor] = None, + encoder_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[Tensor]] = None, + prev_attn_state: Optional[List[Tensor]] = None, + self_attn_mask: Optional[Tensor] = None, + self_attn_padding_mask: Optional[Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + assert self.encoder_attn is not None + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None diff --git a/fairseq/examples/simultaneous_translation/tests/test_alignment_train.py b/fairseq/examples/simultaneous_translation/tests/test_alignment_train.py new file mode 100644 index 0000000..2ad4ef1 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/tests/test_alignment_train.py @@ -0,0 +1,88 @@ +import unittest + +import numpy as np +import torch + +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from torch.testing._internal.common_utils import TestCase +from examples.simultaneous_translation.utils.functions import exclusive_cumprod + + +TEST_CUDA = torch.cuda.is_available() + + +class AlignmentTrainTest(TestCase): + def _test_custom_alignment_train_ref(self, p_choose, eps): + cumprod_1mp = exclusive_cumprod(1 - p_choose, dim=2, eps=eps) + cumprod_1mp_clamp = torch.clamp(cumprod_1mp, eps, 1.0) + + bsz = p_choose.size(0) + tgt_len = p_choose.size(1) + src_len = p_choose.size(2) + + alpha_0 = p_choose.new_zeros([bsz, 1, src_len]) + alpha_0[:, :, 0] = 1.0 + + previous_alpha = [alpha_0] + + for i in range(tgt_len): + # p_choose: bsz , tgt_len, src_len + # cumprod_1mp_clamp : bsz, tgt_len, src_len + # previous_alpha[i]: bsz, 1, src_len + # alpha_i: bsz, src_len + alpha_i = ( + p_choose[:, i] + * cumprod_1mp[:, i] + * torch.cumsum( + previous_alpha[i][:, 0] / cumprod_1mp_clamp[:, i], dim=1 + ) + ).clamp(0, 1.0) + + previous_alpha.append(alpha_i.unsqueeze(1)) + + # alpha: bsz * num_heads, tgt_len, src_len + alpha = torch.cat(previous_alpha[1:], dim=1) + return alpha + + def _test_custom_alignment_train_impl(self, p_choose, alpha, eps): + if p_choose.is_cuda: + from alignment_train_cuda_binding import alignment_train_cuda # @manual=//deeplearning/projects/fairseq-py:alignment_train_cuda_binding + alignment_train_cuda(p_choose, alpha, eps) + else: + from alignment_train_cpu_binding import alignment_train_cpu # @manual=//deeplearning/projects/fairseq-py:alignment_train_cpu_binding + alignment_train_cpu(p_choose, alpha, eps) + + @settings(deadline=None) + @given( + bsz=st.integers(1, 100), + tgt_len=st.integers(1, 100), + src_len=st.integers(1, 550), + device=st.sampled_from(["cpu", "cuda"]), + ) + def test_alignment_train(self, bsz, tgt_len, src_len, device): + eps = 1e-6 + + assume(device == "cpu" or TEST_CUDA) + p_choose = torch.rand(bsz, tgt_len, src_len, device=device) + + # run the alignment with the custom operator + alpha_act = p_choose.new_zeros([bsz, tgt_len, src_len]) + self._test_custom_alignment_train_impl(p_choose, alpha_act, eps) + + # runu the alignment with the ref implementation + alpha_ref = self._test_custom_alignment_train_ref(p_choose, eps) + + # verify the results + alpha_act = alpha_act.cpu().detach().numpy() + alpha_ref = alpha_ref.cpu().detach().numpy() + np.testing.assert_allclose( + alpha_act, + alpha_ref, + atol=1e-3, + rtol=1e-3, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/examples/simultaneous_translation/tests/test_text_models.py b/fairseq/examples/simultaneous_translation/tests/test_text_models.py new file mode 100644 index 0000000..19d6356 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/tests/test_text_models.py @@ -0,0 +1,407 @@ +import argparse +import unittest +from typing import Any, Dict + +import torch +from examples.simultaneous_translation.models import ( + transformer_monotonic_attention +) + + +from tests.test_roberta import FakeTask + + +DEFAULT_CONFIG = { + "attention_eps": 1e-6, + "mass_preservation": True, + "noise_type": "flat", + "noise_mean": 0.0, + "noise_var": 1.0, + "energy_bias_init": -2, + "energy_bias": True +} + + +PAD_INDEX = 1 + + +def generate_config(overrides_kv): + new_dict = {key: value for key, value in DEFAULT_CONFIG.items()} + for key, value in overrides_kv.items(): + new_dict[key] = value + return new_dict + + +def make_sample_with_padding(longer_src=False) -> Dict[str, Any]: + tokens_1 = torch.LongTensor( + [ + [2, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, 2], + [ + 2, 11, 12, 14, 15, 10, 11, 12, 13, 14, 15, 2, + PAD_INDEX, PAD_INDEX + ], + ] + ) + tokens_2 = torch.LongTensor( + [ + [2, 11, 12, 13, 14, 2, PAD_INDEX, PAD_INDEX], + [2, 11, 22, 33, 2, PAD_INDEX, PAD_INDEX, PAD_INDEX] + ] + ) + if longer_src: + src_tokens = tokens_1[:, 1:] + prev_output_tokens = tokens_2 + else: + src_tokens = tokens_2[:, 1:8] + prev_output_tokens = tokens_1 + + src_lengths = src_tokens.ne(PAD_INDEX).sum(dim=1).long() + + sample = { + "net_input": { + "src_tokens": src_tokens, + "prev_output_tokens": prev_output_tokens, + "src_lengths": src_lengths, + }, + "target": prev_output_tokens[:, 1:], + } + return sample + + +def build_transformer_monotonic_attention(**extra_args: Any): + overrides = { + # Use characteristics dimensions + "encoder_embed_dim": 12, + "encoder_ffn_embed_dim": 14, + "decoder_embed_dim": 12, + "decoder_ffn_embed_dim": 14, + # Disable dropout so we have comparable tests. + "dropout": 0, + "attention_dropout": 0, + "activation_dropout": 0, + "encoder_layerdrop": 0, + } + overrides.update(extra_args) + # Overrides the defaults from the parser + args = argparse.Namespace(**overrides) + transformer_monotonic_attention.monotonic_tiny_architecture(args) + + torch.manual_seed(0) + task = FakeTask(args) + return ( + transformer_monotonic_attention + .TransformerModelSimulTrans + .build_model(args, task) + ) + + +def expected_alignment_formula( + p_choose, + mass_perservation=True, + padding_mask=None +): + # Online and Linear-Time Attention by Enforcing Monotonic Alignments + # https://arxiv.org/pdf/1704.00784.pdf + # Eq 18, 19 + bsz, tgt_len, src_len = p_choose.size() + alpha = torch.zeros_like(p_choose) + + if padding_mask is not None: + bsz_pad = padding_mask.size(0) + num_heads = int(bsz / bsz_pad) + padding_mask = ( + padding_mask + .unsqueeze(1) + .expand([bsz_pad, num_heads, src_len]) + .contiguous() + .view(-1, src_len) + ) + + p_choose = p_choose.masked_fill(padding_mask.unsqueeze(1), 0) + + for bsz_i in range(bsz): + for i in range(tgt_len): + for j in range(src_len): + if i == 0: + if j == 0: + # First source token + alpha[bsz_i, i, j] = p_choose[bsz_i, i, j] + else: + # First target token + alpha[bsz_i, i, j] = ( + p_choose[bsz_i, i, j] + * torch.prod( + 1 - p_choose[bsz_i, i, :j] + ) + ) + else: + alpha[bsz_i, i, j] = alpha[bsz_i, i - 1, j] + for k in range(j): + alpha[bsz_i, i, j] += ( + alpha[bsz_i, i - 1, k] + * torch.prod( + 1 - p_choose[bsz_i, i, k:j] + ) + ) + alpha[bsz_i, i, j] *= p_choose[bsz_i, i, j] + + alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0) + + if mass_perservation: + alpha = mass_perservation_formula(alpha, False, padding_mask) + + return alpha + + +def mass_perservation_formula(alpha, left_padding=False, padding_mask=None): + if padding_mask is None or alpha.size(-1) == 1: + if alpha.size(-1) > 1: + alpha[:, :, -1] = 1 - alpha[:, :, :-1].sum(dim=-1) + return alpha + + src_lens = (padding_mask.logical_not()).sum(dim=1).long() + + bsz, tgt_len, src_len = alpha.size() + + assert ( + not left_padding + or (left_padding and (not padding_mask[:, 0].any())) + ) + + alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0) + + for bsz_i in range(bsz): + if left_padding: + alpha[bsz_i, :, -1] = ( + 1 - alpha[bsz_i, :, :-1].sum(dim=-1) + ) + else: + alpha[bsz_i, :, src_lens[bsz_i] - 1] = ( + 1 - alpha[bsz_i, :, :src_lens[bsz_i] - 1].sum(dim=-1) + ) + + return alpha + + +def expected_soft_attention_formula( + alpha, + soft_energy, + padding_mask=None, + chunksize=1e10, +): + # Monotonic Infinite Lookback Attention for Simultaneous Machine Translation + # https://arxiv.org/pdf/1906.05218.pdf + # Eq 14 + + # Monotonic Chunkwise Attention + # https://arxiv.org/abs/1712.05382 + # Eq 17 + bsz, tgt_len, src_len = alpha.size() + beta = torch.zeros_like(alpha) + + if padding_mask is not None: + bsz_pad = padding_mask.size(0) + num_heads = int(bsz / bsz_pad) + # Expanding for potential head dimension + padding_mask = ( + padding_mask + .unsqueeze(1) + .expand([bsz_pad, num_heads, src_len]) + .contiguous() + .view(-1, src_len) + ) + soft_energy = soft_energy.masked_fill(padding_mask.unsqueeze(1), float('-inf')) + + for bsz_i in range(bsz): + for i in range(tgt_len): + for j in range(src_len): + for k in range(j, min([src_len, j + chunksize])): + if not padding_mask[bsz_i, j]: + beta[bsz_i, i, j] += ( + alpha[bsz_i, i, k] * torch.exp(soft_energy[bsz_i, i, j]) + / torch.sum(torch.exp(soft_energy[bsz_i, i, max([0, k - chunksize + 1]):k + 1])) + ) + return beta + + +class MonotonicAttentionTestAbstractClass(object): + def test_forward(self): + sample = make_sample_with_padding() + out, _ = self.model.forward(**sample["net_input"]) + loss = out.sum() + loss.backward() + + def test_p_choose(self): + sample = make_sample_with_padding() + _, extra_out = self.model.forward(**sample["net_input"]) + for item in extra_out.attn_list: + p_choose = item["p_choose"] + self.assertTrue(p_choose.le(1.0).all()) + self.assertTrue(p_choose.ge(0.0).all()) + + def test_expected_alignment(self): + for longer_src in [True, False]: + sample = make_sample_with_padding(longer_src) + _, extra_out = self.model.forward(**sample["net_input"]) + for item in extra_out.attn_list: + p_choose = item["p_choose"] + alpha_system = item["alpha"] + self.assertTrue(p_choose.size() == alpha_system.size()) + bsz, num_head, tgt_len, src_len = alpha_system.size() + alpha_system = alpha_system.view(-1, tgt_len, src_len) + p_choose = p_choose.view(-1, tgt_len, src_len) + + alpha_real = expected_alignment_formula( + p_choose, + self.model.decoder.layers[0].encoder_attn.mass_preservation, + sample["net_input"]["src_tokens"].eq(PAD_INDEX) + ) + + self.assertTrue( + torch.abs(alpha_system - alpha_real).le(5e-5).all(), + ) + + +class HardMonotonicAttentionTestCase( + unittest.TestCase, + MonotonicAttentionTestAbstractClass +): + def setUp(self): + self.model = build_transformer_monotonic_attention( + **generate_config({"simul_type": "hard_aligned"}) + ) + + +class InfiniteLookbackTestCase( + unittest.TestCase, + MonotonicAttentionTestAbstractClass +): + def setUp(self): + self.model = build_transformer_monotonic_attention( + **generate_config( + { + "simul_type": "infinite_lookback" + } + ) + ) + self.model.train() + + def test_fp16_for_long_input(self): + sample = { + "net_input": { + "src_tokens": torch.LongTensor([7] * 1000 + [2]).cuda().unsqueeze(0), + "prev_output_tokens": torch.LongTensor([7] * 1000 + [2]).cuda().unsqueeze(0), + "src_lengths": torch.LongTensor([1000]).cuda(), + }, + "target": torch.LongTensor([2] + [7] * 1000).unsqueeze(0).cuda() + } + self.model.cuda().half() + _, extra_out = self.model.forward(**sample["net_input"]) + for item in extra_out.attn_list: + for key in ["p_choose", "alpha", "beta", "soft_energy"]: + self.assertFalse(torch.isnan(item[key]).any()) + + def test_expected_attention(self): + for longer_src in [True, False]: + sample = make_sample_with_padding(longer_src) + _, extra_out = self.model.forward(**sample["net_input"]) + for item in extra_out.attn_list: + p_choose = item["p_choose"] + alpha_system = item["alpha"] + beta_system = item["beta"] + soft_energy_system = item["soft_energy"] + self.assertTrue(beta_system.size() == alpha_system.size()) + self.assertTrue(p_choose.size() == alpha_system.size()) + + bsz, num_head, tgt_len, src_len = alpha_system.size() + + alpha_system = alpha_system.view(-1, tgt_len, src_len) + beta_system = beta_system.view(-1, tgt_len, src_len) + p_choose = p_choose.view(-1, tgt_len, src_len) + soft_energy_system = soft_energy_system.view(-1, tgt_len, src_len) + + alpha_real = expected_alignment_formula( + p_choose, + self.model.decoder.layers[0].encoder_attn.mass_preservation, + sample["net_input"]["src_tokens"].eq(PAD_INDEX) + ) + + beta_real = expected_soft_attention_formula( + alpha_real, + soft_energy_system, + sample["net_input"]["src_tokens"].eq(PAD_INDEX), + chunksize=getattr( + self.model.decoder.layers[0].encoder_attn, + "chunk_size", + int(1e10) + ) or int(1e10) + ) + + self.assertTrue( + torch.abs(beta_system - beta_real).le(1e-5).all(), + ) + + +class ChunkwiswTestCase( + InfiniteLookbackTestCase +): + def setUp(self): + self.model = build_transformer_monotonic_attention( + **generate_config( + { + "simul_type": "chunkwise", + "mocha_chunk_size": 3 + } + ) + ) + + +class WaitkTestCase(InfiniteLookbackTestCase): + def setUp(self): + self.model = build_transformer_monotonic_attention( + **generate_config( + { + "simul_type": "waitk", + "waitk_lagging": 3, + } + ) + ) + + def check_waitk(self, p_choose, lagging, padding_mask): + bsz, tgt_len, src_len = p_choose.size() + for bsz_i in range(bsz): + for i in range(tgt_len): + for j in range(src_len): + if not padding_mask[bsz_i, j]: + if j - i == lagging - 1: + self.assertTrue(p_choose[bsz_i, i, j] == 1) + else: + self.assertTrue(p_choose[bsz_i, i, j] == 0) + + def test_waitk_p_choose(self): + for longer_src in [True, False]: + for k in [1, 3, 10, 20, 100]: + sample = make_sample_with_padding(longer_src) + model = build_transformer_monotonic_attention( + **generate_config( + { + "simul_type": "waitk", + "waitk_lagging": k, + } + ) + ) + model.train() + _, extra_out = model.forward(**sample["net_input"]) + for item in extra_out.attn_list: + p_choose = item["p_choose"] + bsz, num_heads, tgt_len, src_len = p_choose.size() + padding_mask = sample["net_input"]["src_tokens"].eq(PAD_INDEX) + padding_mask = ( + padding_mask + .unsqueeze(1) + .expand([bsz, num_heads, src_len]) + .contiguous() + .view(-1, src_len) + ) + p_choose = p_choose.view(bsz * num_heads, tgt_len, src_len) + self.check_waitk(p_choose, k, padding_mask) diff --git a/fairseq/examples/simultaneous_translation/utils/__init__.py b/fairseq/examples/simultaneous_translation/utils/__init__.py new file mode 100644 index 0000000..1e9ce84 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +# automatically import any Python files in the criterions/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + module = file[: file.find(".py")] + importlib.import_module("examples.simultaneous_translation.utils." + module) diff --git a/fairseq/examples/simultaneous_translation/utils/functions.py b/fairseq/examples/simultaneous_translation/utils/functions.py new file mode 100644 index 0000000..590a6c1 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/utils/functions.py @@ -0,0 +1,125 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + + +def prob_check(tensor, eps=1e-10): + assert not torch.isnan(tensor).any(), ( + "Nan in a probability tensor." + ) + # Add the eps here to prevent errors introduced by precision + assert tensor.le(1.0 + eps).all() and tensor.ge(0.0 - eps).all(), ( + "Incorrect values in a probability tensor" + ", 0.0 <= tensor <= 1.0" + ) + + +def exclusive_cumprod(tensor, dim: int, eps: float = 1e-10): + """ + Implementing exclusive cumprod. + There is cumprod in pytorch, however there is no exclusive mode. + cumprod(x) = [x1, x1x2, x2x3x4, ..., prod_{i=1}^n x_i] + exclusive means + cumprod(x) = [1, x1, x1x2, x1x2x3, ..., prod_{i=1}^{n-1} x_i] + """ + tensor_size = list(tensor.size()) + tensor_size[dim] = 1 + return_tensor = safe_cumprod( + torch.cat([torch.ones(tensor_size).type_as(tensor), tensor], dim=dim), + dim=dim, + eps=eps, + ) + + if dim == 0: + return return_tensor[:-1] + elif dim == 1: + return return_tensor[:, :-1] + elif dim == 2: + return return_tensor[:, :, :-1] + else: + raise RuntimeError( + "Cumprod on dimension 3 and more is not implemented" + ) + + +def safe_cumprod(tensor, dim: int, eps: float = 1e-10): + """ + An implementation of cumprod to prevent precision issue. + cumprod(x) + = [x1, x1x2, x1x2x3, ....] + = [exp(log(x1)), exp(log(x1) + log(x2)), exp(log(x1) + log(x2) + log(x3)), ...] + = exp(cumsum(log(x))) + """ + + if (tensor + eps < 0).any().item(): + raise RuntimeError( + "Safe cumprod can only take non-negative tensors as input." + "Consider use torch.cumprod if you want to calculate negative values." + ) + + log_tensor = torch.log(tensor + eps) + cumsum_log_tensor = torch.cumsum(log_tensor, dim) + exp_cumsum_log_tensor = torch.exp(cumsum_log_tensor) + return exp_cumsum_log_tensor + + +def moving_sum(x, start_idx: int, end_idx: int): + """ + From MONOTONIC CHUNKWISE ATTENTION + https://arxiv.org/pdf/1712.05382.pdf + Equation (18) + + x = [x_1, x_2, ..., x_N] + MovingSum(x, start_idx, end_idx)_n = Sigma_{m=n−(start_idx−1)}^{n+end_idx-1} x_m + for n in {1, 2, 3, ..., N} + + x : src_len, batch_size + start_idx : start idx + end_idx : end idx + + Example + src_len = 5 + batch_size = 3 + x = + [[ 0, 5, 10], + [ 1, 6, 11], + [ 2, 7, 12], + [ 3, 8, 13], + [ 4, 9, 14]] + + MovingSum(x, 3, 1) = + [[ 0, 5, 10], + [ 1, 11, 21], + [ 3, 18, 33], + [ 6, 21, 36], + [ 9, 24, 39]] + + MovingSum(x, 1, 3) = + [[ 3, 18, 33], + [ 6, 21, 36], + [ 9, 24, 39], + [ 7, 17, 27], + [ 4, 9, 14]] + """ + # TODO: Make dimension configurable + assert start_idx > 0 and end_idx > 0 + batch_size, tgt_len, src_len = x.size() + x = x.view(-1, src_len).unsqueeze(1) + # batch_size, 1, src_len + moving_sum_weight = torch.ones([1, 1, end_idx + start_idx - 1]).type_as(x) + + moving_sum = torch.nn.functional.conv1d( + x, moving_sum_weight, padding=start_idx + end_idx - 1 + ).squeeze(1) + + moving_sum = moving_sum[:, end_idx:-start_idx] + + assert src_len == moving_sum.size(1) + assert batch_size * tgt_len == moving_sum.size(0) + + moving_sum = moving_sum.view(batch_size, tgt_len, src_len) + + return moving_sum diff --git a/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py b/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py new file mode 100644 index 0000000..3b8e0a8 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py @@ -0,0 +1,180 @@ +from typing import Optional +import torch +from torch import Tensor + +from examples.simultaneous_translation.utils.functions import ( + exclusive_cumprod, + prob_check, + moving_sum, +) + + +def expected_alignment_from_p_choose( + p_choose: Tensor, + padding_mask: Optional[Tensor] = None, + eps: float = 1e-6 +): + """ + Calculating expected alignment for from stepwise probability + + Reference: + Online and Linear-Time Attention by Enforcing Monotonic Alignments + https://arxiv.org/pdf/1704.00784.pdf + + q_ij = (1 − p_{ij−1})q_{ij−1} + a+{i−1j} + a_ij = p_ij q_ij + + Parallel solution: + ai = p_i * cumprod(1 − pi) * cumsum(a_i / cumprod(1 − pi)) + + ============================================================ + Expected input size + p_choose: bsz, tgt_len, src_len + """ + prob_check(p_choose) + + # p_choose: bsz, tgt_len, src_len + bsz, tgt_len, src_len = p_choose.size() + dtype = p_choose.dtype + + p_choose = p_choose.float() + + if padding_mask is not None: + p_choose = p_choose.masked_fill(padding_mask.unsqueeze(1), 0.0) + + if p_choose.is_cuda: + p_choose = p_choose.contiguous() + from alignment_train_cuda_binding import alignment_train_cuda as alignment_train + else: + from alignment_train_cpu_binding import alignment_train_cpu as alignment_train + + alpha = p_choose.new_zeros([bsz, tgt_len, src_len]) + alignment_train(p_choose, alpha, eps) + + # Mix precision to prevent overflow for fp16 + alpha = alpha.type(dtype) + + prob_check(alpha) + + return alpha + + +def expected_soft_attention( + alpha: Tensor, + soft_energy: Tensor, + padding_mask: Optional[Tensor] = None, + chunk_size: Optional[int] = None, + eps: float = 1e-10 +): + """ + Function to compute expected soft attention for + monotonic infinite lookback attention from + expected alignment and soft energy. + + Reference: + Monotonic Chunkwise Attention + https://arxiv.org/abs/1712.05382 + + Monotonic Infinite Lookback Attention for Simultaneous Machine Translation + https://arxiv.org/abs/1906.05218 + + alpha: bsz, tgt_len, src_len + soft_energy: bsz, tgt_len, src_len + padding_mask: bsz, src_len + left_padding: bool + """ + if padding_mask is not None: + alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0.0) + soft_energy = soft_energy.masked_fill( + padding_mask.unsqueeze(1), -float("inf") + ) + + prob_check(alpha) + + dtype = alpha.dtype + + alpha = alpha.float() + soft_energy = soft_energy.float() + + soft_energy = soft_energy - soft_energy.max(dim=2, keepdim=True)[0] + exp_soft_energy = torch.exp(soft_energy) + eps + + if chunk_size is not None: + # Chunkwise + beta = ( + exp_soft_energy + * moving_sum( + alpha / (eps + moving_sum(exp_soft_energy, chunk_size, 1)), + 1, chunk_size + ) + ) + else: + # Infinite lookback + # Notice that infinite lookback is a special case of chunkwise + # where chunksize = inf + inner_items = alpha / (eps + torch.cumsum(exp_soft_energy, dim=2)) + + beta = ( + exp_soft_energy + * torch.cumsum(inner_items.flip(dims=[2]), dim=2) + .flip(dims=[2]) + ) + + if padding_mask is not None: + beta = beta.masked_fill( + padding_mask.unsqueeze(1).to(torch.bool), 0.0) + + # Mix precision to prevent overflow for fp16 + beta = beta.type(dtype) + + beta = beta.clamp(0, 1) + + prob_check(beta) + + return beta + + +def mass_preservation( + alpha: Tensor, + padding_mask: Optional[Tensor] = None, + left_padding: bool = False +): + """ + Function to compute the mass perservation for alpha. + This means that the residual weights of alpha will be assigned + to the last token. + + Reference: + Monotonic Infinite Lookback Attention for Simultaneous Machine Translation + https://arxiv.org/abs/1906.05218 + + alpha: bsz, tgt_len, src_len + padding_mask: bsz, src_len + left_padding: bool + """ + + prob_check(alpha) + + if padding_mask is not None: + if not left_padding: + assert not padding_mask[:, 0].any(), ( + "Find padding on the beginning of the sequence." + ) + alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0.0) + + if left_padding or padding_mask is None: + residuals = 1 - alpha[:, :, :-1].sum(dim=-1).clamp(0, 1) + alpha[:, :, -1] = residuals + else: + # right padding + _, tgt_len, src_len = alpha.size() + residuals = 1 - alpha.sum(dim=-1, keepdim=True).clamp(0, 1) + src_lens = src_len - padding_mask.sum(dim=1, keepdim=True) + src_lens = src_lens.expand(-1, tgt_len).contiguous() + # add back the last value + residuals += alpha.gather(2, src_lens.unsqueeze(2) - 1) + alpha = alpha.scatter(2, src_lens.unsqueeze(2) - 1, residuals) + + prob_check(alpha) + + return alpha diff --git a/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py b/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py new file mode 100644 index 0000000..724c691 --- /dev/null +++ b/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py @@ -0,0 +1,126 @@ +from typing import Optional, Dict +from torch import Tensor +import torch + + +def waitk_p_choose( + tgt_len: int, + src_len: int, + bsz: int, + waitk_lagging: int, + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None +): + + max_src_len = src_len + if incremental_state is not None: + # Retrieve target length from incremental states + # For inference the length of query is always 1 + max_tgt_len = incremental_state["steps"]["tgt"] + assert max_tgt_len is not None + max_tgt_len = int(max_tgt_len) + else: + max_tgt_len = tgt_len + + if max_src_len < waitk_lagging: + if incremental_state is not None: + max_tgt_len = 1 + return torch.zeros( + bsz, max_tgt_len, max_src_len + ) + + # Assuming the p_choose looks like this for wait k=3 + # src_len = 6, max_tgt_len = 5 + # [0, 0, 1, 0, 0, 0, 0] + # [0, 0, 0, 1, 0, 0, 0] + # [0, 0, 0, 0, 1, 0, 0] + # [0, 0, 0, 0, 0, 1, 0] + # [0, 0, 0, 0, 0, 0, 1] + # linearize the p_choose matrix: + # [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0...] + # The indices of linearized matrix that equals 1 is + # 2 + 6 * 0 + # 3 + 6 * 1 + # ... + # n + src_len * n + k - 1 = n * (src_len + 1) + k - 1 + # n from 0 to max_tgt_len - 1 + # + # First, generate the indices (activate_indices_offset: bsz, max_tgt_len) + # Second, scatter a zeros tensor (bsz, max_tgt_len * src_len) + # with activate_indices_offset + # Third, resize the tensor to (bsz, max_tgt_len, src_len) + + activate_indices_offset = ( + ( + torch.arange(max_tgt_len) * (max_src_len + 1) + + waitk_lagging - 1 + ) + .unsqueeze(0) + .expand(bsz, max_tgt_len) + .long() + ) + + if key_padding_mask is not None: + if key_padding_mask[:, 0].any(): + # Left padding + activate_indices_offset += ( + key_padding_mask.sum(dim=1, keepdim=True) + ) + + # Need to clamp the indices that are too large + activate_indices_offset = ( + activate_indices_offset + .clamp( + 0, + min( + [ + max_tgt_len, + max_src_len - waitk_lagging + 1 + ] + ) * max_src_len - 1 + ) + ) + + p_choose = torch.zeros(bsz, max_tgt_len * max_src_len) + + p_choose = p_choose.scatter( + 1, + activate_indices_offset, + 1.0 + ).view(bsz, max_tgt_len, max_src_len) + + if key_padding_mask is not None: + p_choose = p_choose.to(key_padding_mask) + p_choose = p_choose.masked_fill(key_padding_mask.unsqueeze(1), 0) + + if incremental_state is not None: + p_choose = p_choose[:, -1:] + + return p_choose.float() + + +def learnable_p_choose( + energy, + noise_mean: float = 0.0, + noise_var: float = 0.0, + training: bool = True +): + """ + Calculating step wise prob for reading and writing + 1 to read, 0 to write + energy: bsz, tgt_len, src_len + """ + + noise = 0 + if training: + # add noise here to encourage discretness + noise = ( + torch.normal(noise_mean, noise_var, energy.size()) + .type_as(energy) + .to(energy.device) + ) + + p_choose = torch.sigmoid(energy + noise) + + # p_choose: bsz * self.num_heads, tgt_len, src_len + return p_choose diff --git a/fairseq/examples/speech_recognition/README.md b/fairseq/examples/speech_recognition/README.md new file mode 100644 index 0000000..5f9b278 --- /dev/null +++ b/fairseq/examples/speech_recognition/README.md @@ -0,0 +1,87 @@ +### 2021 Update: We are merging this example into the [S2T framework](../speech_to_text), which supports more generic speech-to-text tasks (e.g. speech translation) and more flexible data processing pipelines. Please stay tuned. + +# Speech Recognition +`examples/speech_recognition` is implementing ASR task in Fairseq, along with needed features, datasets, models and loss functions to train and infer model described in [Transformers with convolutional context for ASR (Abdelrahman Mohamed et al., 2019)](https://arxiv.org/abs/1904.11660). + + +## Additional dependencies +On top of main fairseq dependencies there are couple more additional requirements. + +1) Please follow the instructions to install [torchaudio](https://github.com/pytorch/audio). This is required to compute audio fbank features. +2) [Sclite](http://www1.icsi.berkeley.edu/Speech/docs/sctk-1.2/sclite.htm#sclite_name_0) is used to measure WER. Sclite can be downloaded and installed from source from sctk package [here](http://www.openslr.org/4/). Training and inference doesn't require Sclite dependency. +3) [sentencepiece](https://github.com/google/sentencepiece) is required in order to create dataset with word-piece targets. + +## Preparing librispeech data +``` +./examples/speech_recognition/datasets/prepare-librispeech.sh $DIR_TO_SAVE_RAW_DATA $DIR_FOR_PREPROCESSED_DATA +``` + +## Training librispeech data +``` +python train.py $DIR_FOR_PREPROCESSED_DATA --save-dir $MODEL_PATH --max-epoch 80 --task speech_recognition --arch vggtransformer_2 --optimizer adadelta --lr 1.0 --adadelta-eps 1e-8 --adadelta-rho 0.95 --clip-norm 10.0 --max-tokens 5000 --log-format json --log-interval 1 --criterion cross_entropy_acc --user-dir examples/speech_recognition/ +``` + +## Inference for librispeech +`$SET` can be `test_clean` or `test_other` +Any checkpoint in `$MODEL_PATH` can be selected. In this example we are working with `checkpoint_last.pt` +``` +python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --max-tokens 25000 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --beam 20 --results-path $RES_DIR --batch-size 40 --gen-subset $SET --user-dir examples/speech_recognition/ +``` + +## Inference for librispeech +``` +sclite -r ${RES_DIR}/ref.word-checkpoint_last.pt-${SET}.txt -h ${RES_DIR}/hypo.word-checkpoint_last.pt-${SET}.txt -i rm -o all stdout > $RES_REPORT +``` +`Sum/Avg` row from first table of the report has WER + +## Using flashlight (previously called [wav2letter](https://github.com/facebookresearch/wav2letter)) components +[flashlight](https://github.com/facebookresearch/flashlight) now has integration with fairseq. Currently this includes: + +* AutoSegmentationCriterion (ASG) +* flashlight-style Conv/GLU model +* flashlight's beam search decoder + +To use these, follow the instructions on [this page](https://github.com/flashlight/flashlight/tree/e16682fa32df30cbf675c8fe010f929c61e3b833/bindings/python) to install python bindings. **Flashlight v0.3.2** must be used to install the bindings. Running: +``` +git clone --branch v0.3.2 https://github.com/flashlight/flashlight +``` +will properly clone and check out this version. + +## Training librispeech data (flashlight style, Conv/GLU + ASG loss) +Training command: +``` +python train.py $DIR_FOR_PREPROCESSED_DATA --save-dir $MODEL_PATH --max-epoch 100 --task speech_recognition --arch w2l_conv_glu_enc --batch-size 4 --optimizer sgd --lr 0.3,0.8 --momentum 0.8 --clip-norm 0.2 --max-tokens 50000 --log-format json --log-interval 100 --num-workers 0 --sentence-avg --criterion asg_loss --asg-transitions-init 5 --max-replabel 2 --linseg-updates 8789 --user-dir examples/speech_recognition +``` + +Note that ASG loss currently doesn't do well with word-pieces. You should prepare a dataset with character targets by setting `nbpe=31` in `prepare-librispeech.sh`. + +## Inference for librispeech (flashlight decoder, n-gram LM) +Inference command: +``` +python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --seed 1 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --gen-subset $SET --results-path $RES_DIR --w2l-decoder kenlm --kenlm-model $KENLM_MODEL_PATH --lexicon $LEXICON_PATH --beam 200 --beam-threshold 15 --lm-weight 1.5 --word-score 1.5 --sil-weight -0.3 --criterion asg_loss --max-replabel 2 --user-dir examples/speech_recognition +``` + +`$KENLM_MODEL_PATH` should be a standard n-gram language model file. `$LEXICON_PATH` should be a flashlight-style lexicon (list of known words and their spellings). For ASG inference, a lexicon line should look like this (note the repetition labels): +``` +doorbell D O 1 R B E L 1 ▁ +``` +For CTC inference with word-pieces, repetition labels are not used and the lexicon should have most common spellings for each word (one can use sentencepiece's `NBestEncodeAsPieces` for this): +``` +doorbell ▁DOOR BE LL +doorbell ▁DOOR B E LL +doorbell ▁DO OR BE LL +doorbell ▁DOOR B EL L +doorbell ▁DOOR BE L L +doorbell ▁DO OR B E LL +doorbell ▁DOOR B E L L +doorbell ▁DO OR B EL L +doorbell ▁DO O R BE LL +doorbell ▁DO OR BE L L +``` +Lowercase vs. uppercase matters: the *word* should match the case of the n-gram language model (i.e. `$KENLM_MODEL_PATH`), while the *spelling* should match the case of the token dictionary (i.e. `$DIR_FOR_PREPROCESSED_DATA/dict.txt`). + +## Inference for librispeech (flashlight decoder, viterbi only) +Inference command: +``` +python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --seed 1 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --gen-subset $SET --results-path $RES_DIR --w2l-decoder viterbi --criterion asg_loss --max-replabel 2 --user-dir examples/speech_recognition +``` diff --git a/fairseq/examples/speech_recognition/__init__.py b/fairseq/examples/speech_recognition/__init__.py new file mode 100644 index 0000000..0278f6a --- /dev/null +++ b/fairseq/examples/speech_recognition/__init__.py @@ -0,0 +1 @@ +from . import criterions, models, tasks # noqa diff --git a/fairseq/examples/speech_recognition/criterions/ASG_loss.py b/fairseq/examples/speech_recognition/criterions/ASG_loss.py new file mode 100644 index 0000000..41f50bb --- /dev/null +++ b/fairseq/examples/speech_recognition/criterions/ASG_loss.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from examples.speech_recognition.data.replabels import pack_replabels +from fairseq import utils +from fairseq.criterions import FairseqCriterion, register_criterion + + +@register_criterion("asg_loss") +class ASGCriterion(FairseqCriterion): + @staticmethod + def add_args(parser): + group = parser.add_argument_group("ASG Loss") + group.add_argument( + "--asg-transitions-init", + help="initial diagonal value of transition matrix", + type=float, + default=0.0, + ) + group.add_argument( + "--max-replabel", help="maximum # of replabels", type=int, default=2 + ) + group.add_argument( + "--linseg-updates", + help="# of training updates to use LinSeg initialization", + type=int, + default=0, + ) + group.add_argument( + "--hide-linseg-messages", + help="hide messages about LinSeg initialization", + action="store_true", + ) + + def __init__( + self, + task, + silence_token, + asg_transitions_init, + max_replabel, + linseg_updates, + hide_linseg_messages, + ): + from flashlight.lib.sequence.criterion import ASGLoss, CriterionScaleMode + + super().__init__(task) + self.tgt_dict = task.target_dictionary + self.eos = self.tgt_dict.eos() + self.silence = ( + self.tgt_dict.index(silence_token) + if silence_token in self.tgt_dict + else None + ) + self.max_replabel = max_replabel + + num_labels = len(self.tgt_dict) + self.asg = ASGLoss(num_labels, scale_mode=CriterionScaleMode.TARGET_SZ_SQRT) + self.asg.trans = torch.nn.Parameter( + asg_transitions_init * torch.eye(num_labels), requires_grad=True + ) + + self.linseg_progress = torch.nn.Parameter( + torch.tensor([0], dtype=torch.int), requires_grad=False + ) + self.linseg_maximum = linseg_updates + self.linseg_message_state = "none" if hide_linseg_messages else "start" + + @classmethod + def build_criterion(cls, args, task): + return cls( + task, + args.silence_token, + args.asg_transitions_init, + args.max_replabel, + args.linseg_updates, + args.hide_linseg_messages, + ) + + def linseg_step(self): + if not self.training: + return False + if self.linseg_progress.item() < self.linseg_maximum: + if self.linseg_message_state == "start": + print("| using LinSeg to initialize ASG") + self.linseg_message_state = "finish" + self.linseg_progress.add_(1) + return True + elif self.linseg_message_state == "finish": + print("| finished LinSeg initialization") + self.linseg_message_state = "none" + return False + + def replace_eos_with_silence(self, tgt): + if tgt[-1] != self.eos: + return tgt + elif self.silence is None or (len(tgt) > 1 and tgt[-2] == self.silence): + return tgt[:-1] + else: + return tgt[:-1] + [self.silence] + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + net_output = model(**sample["net_input"]) + emissions = net_output["encoder_out"].transpose(0, 1).contiguous() + B = emissions.size(0) + T = emissions.size(1) + device = emissions.device + + target = torch.IntTensor(B, T) + target_size = torch.IntTensor(B) + using_linseg = self.linseg_step() + + for b in range(B): + initial_target_size = sample["target_lengths"][b].item() + if initial_target_size == 0: + raise ValueError("target size cannot be zero") + + tgt = sample["target"][b, :initial_target_size].tolist() + tgt = self.replace_eos_with_silence(tgt) + tgt = pack_replabels(tgt, self.tgt_dict, self.max_replabel) + tgt = tgt[:T] + + if using_linseg: + tgt = [tgt[t * len(tgt) // T] for t in range(T)] + + target[b][: len(tgt)] = torch.IntTensor(tgt) + target_size[b] = len(tgt) + + loss = self.asg.forward(emissions, target.to(device), target_size.to(device)) + + if reduce: + loss = torch.sum(loss) + + sample_size = ( + sample["target"].size(0) if self.args.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + agg_output = { + "loss": loss_sum / nsentences, + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + return agg_output diff --git a/fairseq/examples/speech_recognition/criterions/__init__.py b/fairseq/examples/speech_recognition/criterions/__init__.py new file mode 100644 index 0000000..579abd2 --- /dev/null +++ b/fairseq/examples/speech_recognition/criterions/__init__.py @@ -0,0 +1,17 @@ +import importlib +import os + + +# ASG loss requires flashlight bindings +files_to_skip = set() +try: + import flashlight.lib.sequence.criterion +except ImportError: + files_to_skip.add("ASG_loss.py") + +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_") and file not in files_to_skip: + criterion_name = file[: file.find(".py")] + importlib.import_module( + "examples.speech_recognition.criterions." + criterion_name + ) diff --git a/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py b/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py new file mode 100644 index 0000000..7c4d8ba --- /dev/null +++ b/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py @@ -0,0 +1,130 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.criterions import FairseqCriterion, register_criterion + + +@register_criterion("cross_entropy_acc") +class CrossEntropyWithAccCriterion(FairseqCriterion): + def __init__(self, task, sentence_avg): + super().__init__(task) + self.sentence_avg = sentence_avg + + def compute_loss(self, model, net_output, target, reduction, log_probs): + # N, T -> N * T + target = target.view(-1) + lprobs = model.get_normalized_probs(net_output, log_probs=log_probs) + if not hasattr(lprobs, "batch_first"): + logging.warning( + "ERROR: we need to know whether " + "batch first for the net output; " + "you need to set batch_first attribute for the return value of " + "model.get_normalized_probs. Now, we assume this is true, but " + "in the future, we will raise exception instead. " + ) + batch_first = getattr(lprobs, "batch_first", True) + if not batch_first: + lprobs = lprobs.transpose(0, 1) + + # N, T, D -> N * T, D + lprobs = lprobs.view(-1, lprobs.size(-1)) + loss = F.nll_loss( + lprobs, target, ignore_index=self.padding_idx, reduction=reduction + ) + return lprobs, loss + + def get_logging_output(self, sample, target, lprobs, loss): + target = target.view(-1) + mask = target != self.padding_idx + correct = torch.sum( + lprobs.argmax(1).masked_select(mask) == target.masked_select(mask) + ) + total = torch.sum(mask) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + "correct": utils.item(correct.data), + "total": utils.item(total.data), + "nframes": torch.sum(sample["net_input"]["src_lengths"]).item(), + } + + return sample_size, logging_output + + def forward(self, model, sample, reduction="sum", log_probs=True): + """Computes the cross entropy with accuracy metric for the given sample. + + This is similar to CrossEntropyCriterion in fairseq, but also + computes accuracy metrics as part of logging + + Args: + logprobs (Torch.tensor) of shape N, T, D i.e. + batchsize, timesteps, dimensions + targets (Torch.tensor) of shape N, T i.e batchsize, timesteps + + Returns: + tuple: With three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + + TODO: + * Currently this Criterion will only work with LSTMEncoderModels or + FairseqModels which have decoder, or Models which return TorchTensor + as net_output. + We need to make a change to support all FairseqEncoder models. + """ + net_output = model(**sample["net_input"]) + target = model.get_targets(sample, net_output) + lprobs, loss = self.compute_loss( + model, net_output, target, reduction, log_probs + ) + sample_size, logging_output = self.get_logging_output( + sample, target, lprobs, loss + ) + return loss, sample_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + correct_sum = sum(log.get("correct", 0) for log in logging_outputs) + total_sum = sum(log.get("total", 0) for log in logging_outputs) + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + nframes = sum(log.get("nframes", 0) for log in logging_outputs) + agg_output = { + "loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0, + # if args.sentence_avg, then sample_size is nsentences, then loss + # is per-sentence loss; else sample_size is ntokens, the loss + # becomes per-output token loss + "ntokens": ntokens, + "nsentences": nsentences, + "nframes": nframes, + "sample_size": sample_size, + "acc": correct_sum * 100.0 / total_sum if total_sum > 0 else 0.0, + "correct": correct_sum, + "total": total_sum, + # total is the number of validate tokens + } + if sample_size != ntokens: + agg_output["nll_loss"] = loss_sum / ntokens / math.log(2) + # loss: per output token loss + # nll_loss: per sentence loss + return agg_output diff --git a/fairseq/examples/speech_recognition/datasets/asr_prep_json.py b/fairseq/examples/speech_recognition/datasets/asr_prep_json.py new file mode 100644 index 0000000..b8db8ff --- /dev/null +++ b/fairseq/examples/speech_recognition/datasets/asr_prep_json.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import concurrent.futures +import json +import multiprocessing +import os +from collections import namedtuple +from itertools import chain + +import sentencepiece as spm +from fairseq.data import Dictionary + + +MILLISECONDS_TO_SECONDS = 0.001 + + +def process_sample(aud_path, lable, utt_id, sp, tgt_dict): + import torchaudio + + input = {} + output = {} + si, ei = torchaudio.info(aud_path) + input["length_ms"] = int( + si.length / si.channels / si.rate / MILLISECONDS_TO_SECONDS + ) + input["path"] = aud_path + + token = " ".join(sp.EncodeAsPieces(lable)) + ids = tgt_dict.encode_line(token, append_eos=False) + output["text"] = lable + output["token"] = token + output["tokenid"] = ", ".join(map(str, [t.tolist() for t in ids])) + return {utt_id: {"input": input, "output": output}} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--audio-dirs", + nargs="+", + default=["-"], + required=True, + help="input directories with audio files", + ) + parser.add_argument( + "--labels", + required=True, + help="aggregated input labels with format <ID LABEL> per line", + type=argparse.FileType("r", encoding="UTF-8"), + ) + parser.add_argument( + "--spm-model", + required=True, + help="sentencepiece model to use for encoding", + type=argparse.FileType("r", encoding="UTF-8"), + ) + parser.add_argument( + "--dictionary", + required=True, + help="file to load fairseq dictionary from", + type=argparse.FileType("r", encoding="UTF-8"), + ) + parser.add_argument("--audio-format", choices=["flac", "wav"], default="wav") + parser.add_argument( + "--output", + required=True, + type=argparse.FileType("w"), + help="path to save json output", + ) + args = parser.parse_args() + + sp = spm.SentencePieceProcessor() + sp.Load(args.spm_model.name) + + tgt_dict = Dictionary.load(args.dictionary) + + labels = {} + for line in args.labels: + (utt_id, label) = line.split(" ", 1) + labels[utt_id] = label + if len(labels) == 0: + raise Exception("No labels found in ", args.labels_path) + + Sample = namedtuple("Sample", "aud_path utt_id") + samples = [] + for path, _, files in chain.from_iterable( + os.walk(path) for path in args.audio_dirs + ): + for f in files: + if f.endswith(args.audio_format): + if len(os.path.splitext(f)) != 2: + raise Exception("Expect <utt_id.extension> file name. Got: ", f) + utt_id = os.path.splitext(f)[0] + if utt_id not in labels: + continue + samples.append(Sample(os.path.join(path, f), utt_id)) + + utts = {} + num_cpu = multiprocessing.cpu_count() + with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpu) as executor: + future_to_sample = { + executor.submit( + process_sample, s.aud_path, labels[s.utt_id], s.utt_id, sp, tgt_dict + ): s + for s in samples + } + for future in concurrent.futures.as_completed(future_to_sample): + try: + data = future.result() + except Exception as exc: + print("generated an exception: ", exc) + else: + utts.update(data) + json.dump({"utts": utts}, args.output, indent=4) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh b/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh new file mode 100644 index 0000000..9e9297f --- /dev/null +++ b/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Prepare librispeech dataset + +base_url=www.openslr.org/resources/12 +train_dir=train_960 + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <download_dir> <out_dir>" + echo "e.g.: $0 /tmp/librispeech_raw/ ~/data/librispeech_final" + exit 1 +fi + +download_dir=${1%/} +out_dir=${2%/} + +fairseq_root=~/fairseq-py/ +mkdir -p ${out_dir} +cd ${out_dir} || exit + +nbpe=5000 +bpemode=unigram + +if [ ! -d "$fairseq_root" ]; then + echo "$0: Please set correct fairseq_root" + exit 1 +fi + +echo "Data Download" +for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do + url=$base_url/$part.tar.gz + if ! wget -P $download_dir $url; then + echo "$0: wget failed for $url" + exit 1 + fi + if ! tar -C $download_dir -xvzf $download_dir/$part.tar.gz; then + echo "$0: error un-tarring archive $download_dir/$part.tar.gz" + exit 1 + fi +done + +echo "Merge all train packs into one" +mkdir -p ${download_dir}/LibriSpeech/${train_dir}/ +for part in train-clean-100 train-clean-360 train-other-500; do + mv ${download_dir}/LibriSpeech/${part}/* $download_dir/LibriSpeech/${train_dir}/ +done +echo "Merge train text" +find ${download_dir}/LibriSpeech/${train_dir}/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/${train_dir}/text + +# Use combined dev-clean and dev-other as validation set +find ${download_dir}/LibriSpeech/dev-clean/ ${download_dir}/LibriSpeech/dev-other/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/valid_text +find ${download_dir}/LibriSpeech/test-clean/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/test-clean/text +find ${download_dir}/LibriSpeech/test-other/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/test-other/text + + +dict=data/lang_char/${train_dir}_${bpemode}${nbpe}_units.txt +encoded=data/lang_char/${train_dir}_${bpemode}${nbpe}_encoded.txt +fairseq_dict=data/lang_char/${train_dir}_${bpemode}${nbpe}_fairseq_dict.txt +bpemodel=data/lang_char/${train_dir}_${bpemode}${nbpe} +echo "dictionary: ${dict}" +echo "Dictionary preparation" +mkdir -p data/lang_char/ +echo "<unk> 3" > ${dict} +echo "</s> 2" >> ${dict} +echo "<pad> 1" >> ${dict} +cut -f 2- -d" " ${download_dir}/LibriSpeech/${train_dir}/text > data/lang_char/input.txt +spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000 --unk_id=3 --eos_id=2 --pad_id=1 --bos_id=-1 --character_coverage=1 +spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt > ${encoded} +cat ${encoded} | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+3}' >> ${dict} +cat ${encoded} | tr ' ' '\n' | sort | uniq -c | awk '{print $2 " " $1}' > ${fairseq_dict} +wc -l ${dict} + +echo "Prepare train and test jsons" +for part in train_960 test-other test-clean; do + python ${fairseq_root}/examples/speech_recognition/datasets/asr_prep_json.py --audio-dirs ${download_dir}/LibriSpeech/${part} --labels ${download_dir}/LibriSpeech/${part}/text --spm-model ${bpemodel}.model --audio-format flac --dictionary ${fairseq_dict} --output ${part}.json +done +# fairseq expects to find train.json and valid.json during training +mv train_960.json train.json + +echo "Prepare valid json" +python ${fairseq_root}/examples/speech_recognition/datasets/asr_prep_json.py --audio-dirs ${download_dir}/LibriSpeech/dev-clean ${download_dir}/LibriSpeech/dev-other --labels ${download_dir}/LibriSpeech/valid_text --spm-model ${bpemodel}.model --audio-format flac --dictionary ${fairseq_dict} --output valid.json + +cp ${fairseq_dict} ./dict.txt +cp ${bpemodel}.model ./spm.model diff --git a/fairseq/examples/speech_recognition/infer.py b/fairseq/examples/speech_recognition/infer.py new file mode 100644 index 0000000..ce16bf4 --- /dev/null +++ b/fairseq/examples/speech_recognition/infer.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Run inference for pre-processed data with a trained model. +""" + +import ast +import logging +import math +import os +import sys + +import editdistance +import numpy as np +import torch +from fairseq import checkpoint_utils, options, progress_bar, tasks, utils +from fairseq.data.data_utils import post_process +from fairseq.logging.meters import StopwatchMeter, TimeMeter + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def add_asr_eval_argument(parser): + parser.add_argument("--kspmodel", default=None, help="sentence piece model") + parser.add_argument( + "--wfstlm", default=None, help="wfstlm on dictonary output units" + ) + parser.add_argument( + "--rnnt_decoding_type", + default="greedy", + help="wfstlm on dictonary\ +output units", + ) + try: + parser.add_argument( + "--lm-weight", + "--lm_weight", + type=float, + default=0.2, + help="weight for lm while interpolating with neural score", + ) + except: + pass + parser.add_argument( + "--rnnt_len_penalty", default=-0.5, help="rnnt length penalty on word level" + ) + parser.add_argument( + "--w2l-decoder", + choices=["viterbi", "kenlm", "fairseqlm"], + help="use a w2l decoder", + ) + parser.add_argument("--lexicon", help="lexicon for w2l decoder") + parser.add_argument("--unit-lm", action="store_true", help="if using a unit lm") + parser.add_argument("--kenlm-model", "--lm-model", help="lm model for w2l decoder") + parser.add_argument("--beam-threshold", type=float, default=25.0) + parser.add_argument("--beam-size-token", type=float, default=100) + parser.add_argument("--word-score", type=float, default=1.0) + parser.add_argument("--unk-weight", type=float, default=-math.inf) + parser.add_argument("--sil-weight", type=float, default=0.0) + parser.add_argument( + "--dump-emissions", + type=str, + default=None, + help="if present, dumps emissions into this file and exits", + ) + parser.add_argument( + "--dump-features", + type=str, + default=None, + help="if present, dumps features into this file and exits", + ) + parser.add_argument( + "--load-emissions", + type=str, + default=None, + help="if present, loads emissions from this file", + ) + return parser + + +def check_args(args): + # assert args.path is not None, "--path required for generation!" + # assert args.results_path is not None, "--results_path required for generation!" + assert ( + not args.sampling or args.nbest == args.beam + ), "--sampling requires --nbest to be equal to --beam" + assert ( + args.replace_unk is None or args.raw_text + ), "--replace-unk requires a raw text dataset (--raw-text)" + + +def get_dataset_itr(args, task, models): + return task.get_batch_iterator( + dataset=task.dataset(args.gen_subset), + max_tokens=args.max_tokens, + max_sentences=args.batch_size, + max_positions=(sys.maxsize, sys.maxsize), + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=args.required_batch_size_multiple, + num_shards=args.num_shards, + shard_id=args.shard_id, + num_workers=args.num_workers, + data_buffer_size=args.data_buffer_size, + ).next_epoch_itr(shuffle=False) + + +def process_predictions( + args, hypos, sp, tgt_dict, target_tokens, res_files, speaker, id +): + for hypo in hypos[: min(len(hypos), args.nbest)]: + hyp_pieces = tgt_dict.string(hypo["tokens"].int().cpu()) + + if "words" in hypo: + hyp_words = " ".join(hypo["words"]) + else: + hyp_words = post_process(hyp_pieces, args.post_process) + + if res_files is not None: + print( + "{} ({}-{})".format(hyp_pieces, speaker, id), + file=res_files["hypo.units"], + ) + print( + "{} ({}-{})".format(hyp_words, speaker, id), + file=res_files["hypo.words"], + ) + + tgt_pieces = tgt_dict.string(target_tokens) + tgt_words = post_process(tgt_pieces, args.post_process) + + if res_files is not None: + print( + "{} ({}-{})".format(tgt_pieces, speaker, id), + file=res_files["ref.units"], + ) + print( + "{} ({}-{})".format(tgt_words, speaker, id), file=res_files["ref.words"] + ) + + if not args.quiet: + logger.info("HYPO:" + hyp_words) + logger.info("TARGET:" + tgt_words) + logger.info("___________________") + + hyp_words = hyp_words.split() + tgt_words = tgt_words.split() + return editdistance.eval(hyp_words, tgt_words), len(tgt_words) + + +def prepare_result_files(args): + def get_res_file(file_prefix): + if args.num_shards > 1: + file_prefix = f"{args.shard_id}_{file_prefix}" + path = os.path.join( + args.results_path, + "{}-{}-{}.txt".format( + file_prefix, os.path.basename(args.path), args.gen_subset + ), + ) + return open(path, "w", buffering=1) + + if not args.results_path: + return None + + return { + "hypo.words": get_res_file("hypo.word"), + "hypo.units": get_res_file("hypo.units"), + "ref.words": get_res_file("ref.word"), + "ref.units": get_res_file("ref.units"), + } + + +def optimize_models(args, use_cuda, models): + """Optimize ensemble for generation""" + for model in models: + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if args.fp16: + model.half() + if use_cuda: + model.cuda() + + +def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + + +class ExistingEmissionsDecoder(object): + def __init__(self, decoder, emissions): + self.decoder = decoder + self.emissions = emissions + + def generate(self, models, sample, **unused): + ids = sample["id"].cpu().numpy() + try: + emissions = np.stack(self.emissions[ids]) + except: + print([x.shape for x in self.emissions[ids]]) + raise Exception("invalid sizes") + emissions = torch.from_numpy(emissions) + return self.decoder.decode(emissions) + + +def main(args, task=None, model_state=None): + check_args(args) + + use_fp16 = args.fp16 + if args.max_tokens is None and args.batch_size is None: + args.max_tokens = 4000000 + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + logger.info("| decoding with criterion {}".format(args.criterion)) + + task = tasks.setup_task(args) + + # Load ensemble + if args.load_emissions: + models, criterions = [], [] + task.load_dataset(args.gen_subset) + else: + logger.info("| loading model(s) from {}".format(args.path)) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + utils.split_paths(args.path, separator="\\"), + arg_overrides=ast.literal_eval(args.model_overrides), + task=task, + suffix=args.checkpoint_suffix, + strict=(args.checkpoint_shard_count == 1), + num_shards=args.checkpoint_shard_count, + state=model_state, + ) + optimize_models(args, use_cuda, models) + task.load_dataset(args.gen_subset, task_cfg=saved_cfg.task) + + + # Set dictionary + tgt_dict = task.target_dictionary + + logger.info( + "| {} {} {} examples".format( + args.data, args.gen_subset, len(task.dataset(args.gen_subset)) + ) + ) + + # hack to pass transitions to W2lDecoder + if args.criterion == "asg_loss": + raise NotImplementedError("asg_loss is currently not supported") + # trans = criterions[0].asg.trans.data + # args.asg_transitions = torch.flatten(trans).tolist() + + # Load dataset (possibly sharded) + itr = get_dataset_itr(args, task, models) + + # Initialize generator + gen_timer = StopwatchMeter() + + def build_generator(args): + w2l_decoder = getattr(args, "w2l_decoder", None) + if w2l_decoder == "viterbi": + from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder + + return W2lViterbiDecoder(args, task.target_dictionary) + elif w2l_decoder == "kenlm": + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + return W2lKenLMDecoder(args, task.target_dictionary) + elif w2l_decoder == "fairseqlm": + from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder + + return W2lFairseqLMDecoder(args, task.target_dictionary) + else: + print( + "only flashlight decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment" + ) + + # please do not touch this unless you test both generate.py and infer.py with audio_pretraining task + generator = build_generator(args) + + if args.load_emissions: + generator = ExistingEmissionsDecoder( + generator, np.load(args.load_emissions, allow_pickle=True) + ) + logger.info("loaded emissions from " + args.load_emissions) + + num_sentences = 0 + + if args.results_path is not None and not os.path.exists(args.results_path): + os.makedirs(args.results_path) + + max_source_pos = ( + utils.resolve_max_positions( + task.max_positions(), *[model.max_positions() for model in models] + ), + ) + + if max_source_pos is not None: + max_source_pos = max_source_pos[0] + if max_source_pos is not None: + max_source_pos = max_source_pos[0] - 1 + + if args.dump_emissions: + emissions = {} + if args.dump_features: + features = {} + models[0].bert.proj = None + else: + res_files = prepare_result_files(args) + errs_t = 0 + lengths_t = 0 + with progress_bar.build_progress_bar(args, itr) as t: + wps_meter = TimeMeter() + for sample in t: + sample = utils.move_to_cuda(sample) if use_cuda else sample + if use_fp16: + sample = utils.apply_to_sample(apply_half, sample) + if "net_input" not in sample: + continue + + prefix_tokens = None + if args.prefix_size > 0: + prefix_tokens = sample["target"][:, : args.prefix_size] + + gen_timer.start() + if args.dump_emissions: + with torch.no_grad(): + encoder_out = models[0](**sample["net_input"]) + emm = models[0].get_normalized_probs(encoder_out, log_probs=True) + emm = emm.transpose(0, 1).cpu().numpy() + for i, id in enumerate(sample["id"]): + emissions[id.item()] = emm[i] + continue + elif args.dump_features: + with torch.no_grad(): + encoder_out = models[0](**sample["net_input"]) + feat = encoder_out["encoder_out"].transpose(0, 1).cpu().numpy() + for i, id in enumerate(sample["id"]): + padding = ( + encoder_out["encoder_padding_mask"][i].cpu().numpy() + if encoder_out["encoder_padding_mask"] is not None + else None + ) + features[id.item()] = (feat[i], padding) + continue + hypos = task.inference_step(generator, models, sample, prefix_tokens) + num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) + gen_timer.stop(num_generated_tokens) + + for i, sample_id in enumerate(sample["id"].tolist()): + speaker = None + # id = task.dataset(args.gen_subset).ids[int(sample_id)] + id = sample_id + toks = ( + sample["target"][i, :] + if "target_label" not in sample + else sample["target_label"][i, :] + ) + target_tokens = utils.strip_pad(toks, tgt_dict.pad()).int().cpu() + # Process top predictions + errs, length = process_predictions( + args, + hypos[i], + None, + tgt_dict, + target_tokens, + res_files, + speaker, + id, + ) + errs_t += errs + lengths_t += length + + wps_meter.update(num_generated_tokens) + t.log({"wps": round(wps_meter.avg)}) + num_sentences += ( + sample["nsentences"] if "nsentences" in sample else sample["id"].numel() + ) + + wer = None + if args.dump_emissions: + emm_arr = [] + for i in range(len(emissions)): + emm_arr.append(emissions[i]) + np.save(args.dump_emissions, emm_arr) + logger.info(f"saved {len(emissions)} emissions to {args.dump_emissions}") + elif args.dump_features: + feat_arr = [] + for i in range(len(features)): + feat_arr.append(features[i]) + np.save(args.dump_features, feat_arr) + logger.info(f"saved {len(features)} emissions to {args.dump_features}") + else: + if lengths_t > 0: + wer = errs_t * 100.0 / lengths_t + logger.info(f"WER: {wer}") + + logger.info( + "| Processed {} sentences ({} tokens) in {:.1f}s ({:.2f}" + "sentences/s, {:.2f} tokens/s)".format( + num_sentences, + gen_timer.n, + gen_timer.sum, + num_sentences / gen_timer.sum, + 1.0 / gen_timer.avg, + ) + ) + logger.info("| Generate {} with beam={}".format(args.gen_subset, args.beam)) + return task, wer + + +def make_parser(): + parser = options.get_generation_parser() + parser = add_asr_eval_argument(parser) + return parser + + +def cli_main(): + parser = make_parser() + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_recognition/kaldi/__init__.py b/fairseq/examples/speech_recognition/kaldi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc b/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc new file mode 100644 index 0000000..e18fb62 --- /dev/null +++ b/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <iostream> +#include "fstext/fstext-lib.h" // @manual +#include "util/common-utils.h" // @manual + +/* + * This program is to modify a FST without self-loop by: + * for each incoming arc with non-eps input symbol, add a self-loop arc + * with that non-eps symbol as input and eps as output. + * + * This is to make sure the resultant FST can do deduplication for repeated + * symbols, which is very common in acoustic model + * + */ +namespace { +int32 AddSelfLoopsSimple(fst::StdVectorFst* fst) { + typedef fst::MutableArcIterator<fst::StdVectorFst> IterType; + + int32 num_states_before = fst->NumStates(); + fst::MakePrecedingInputSymbolsSame(false, fst); + int32 num_states_after = fst->NumStates(); + KALDI_LOG << "There are " << num_states_before + << " states in the original FST; " + << " after MakePrecedingInputSymbolsSame, there are " + << num_states_after << " states " << std::endl; + + auto weight_one = fst::StdArc::Weight::One(); + + int32 num_arc_added = 0; + + fst::StdArc self_loop_arc; + self_loop_arc.weight = weight_one; + + int32 num_states = fst->NumStates(); + std::vector<std::set<int32>> incoming_non_eps_label_per_state(num_states); + + for (int32 state = 0; state < num_states; state++) { + for (IterType aiter(fst, state); !aiter.Done(); aiter.Next()) { + fst::StdArc arc(aiter.Value()); + if (arc.ilabel != 0) { + incoming_non_eps_label_per_state[arc.nextstate].insert(arc.ilabel); + } + } + } + + for (int32 state = 0; state < num_states; state++) { + if (!incoming_non_eps_label_per_state[state].empty()) { + auto& ilabel_set = incoming_non_eps_label_per_state[state]; + for (auto it = ilabel_set.begin(); it != ilabel_set.end(); it++) { + self_loop_arc.ilabel = *it; + self_loop_arc.olabel = 0; + self_loop_arc.nextstate = state; + fst->AddArc(state, self_loop_arc); + num_arc_added++; + } + } + } + return num_arc_added; +} + +void print_usage() { + std::cout << "add-self-loop-simple usage:\n" + "\tadd-self-loop-simple <in-fst> <out-fst> \n"; +} +} // namespace + +int main(int argc, char** argv) { + if (argc != 3) { + print_usage(); + exit(1); + } + + auto input = argv[1]; + auto output = argv[2]; + + auto fst = fst::ReadFstKaldi(input); + auto num_states = fst->NumStates(); + KALDI_LOG << "Loading FST from " << input << " with " << num_states + << " states." << std::endl; + + int32 num_arc_added = AddSelfLoopsSimple(fst); + KALDI_LOG << "Adding " << num_arc_added << " self-loop arcs " << std::endl; + + fst::WriteFstKaldi(*fst, std::string(output)); + KALDI_LOG << "Writing FST to " << output << std::endl; + + delete fst; +} diff --git a/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml b/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml new file mode 100644 index 0000000..be9ba98 --- /dev/null +++ b/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml @@ -0,0 +1,8 @@ +# @package _group_ + +data_dir: ??? +fst_dir: ??? +in_labels: ??? +kaldi_root: ??? +lm_arpa: ??? +blank_symbol: <s> diff --git a/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py b/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py new file mode 100644 index 0000000..5f62cc5 --- /dev/null +++ b/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from concurrent.futures import ThreadPoolExecutor +import logging +from omegaconf import MISSING +import os +import torch +from typing import Optional +import warnings + + +from dataclasses import dataclass +from fairseq.dataclass import FairseqDataclass +from .kaldi_initializer import KaldiInitializerConfig, initalize_kaldi + + +logger = logging.getLogger(__name__) + + +@dataclass +class KaldiDecoderConfig(FairseqDataclass): + hlg_graph_path: Optional[str] = None + output_dict: str = MISSING + + kaldi_initializer_config: Optional[KaldiInitializerConfig] = None + + acoustic_scale: float = 0.5 + max_active: int = 10000 + beam_delta: float = 0.5 + hash_ratio: float = 2.0 + + is_lattice: bool = False + lattice_beam: float = 10.0 + prune_interval: int = 25 + determinize_lattice: bool = True + prune_scale: float = 0.1 + max_mem: int = 0 + phone_determinize: bool = True + word_determinize: bool = True + minimize: bool = True + + num_threads: int = 1 + + +class KaldiDecoder(object): + def __init__( + self, + cfg: KaldiDecoderConfig, + beam: int, + nbest: int = 1, + ): + try: + from kaldi.asr import FasterRecognizer, LatticeFasterRecognizer + from kaldi.base import set_verbose_level + from kaldi.decoder import ( + FasterDecoder, + FasterDecoderOptions, + LatticeFasterDecoder, + LatticeFasterDecoderOptions, + ) + from kaldi.lat.functions import DeterminizeLatticePhonePrunedOptions + from kaldi.fstext import read_fst_kaldi, SymbolTable + except: + warnings.warn( + "pykaldi is required for this functionality. Please install from https://github.com/pykaldi/pykaldi" + ) + + # set_verbose_level(2) + + self.acoustic_scale = cfg.acoustic_scale + self.nbest = nbest + + if cfg.hlg_graph_path is None: + assert ( + cfg.kaldi_initializer_config is not None + ), "Must provide hlg graph path or kaldi initializer config" + cfg.hlg_graph_path = initalize_kaldi(cfg.kaldi_initializer_config) + + assert os.path.exists(cfg.hlg_graph_path), cfg.hlg_graph_path + + if cfg.is_lattice: + self.dec_cls = LatticeFasterDecoder + opt_cls = LatticeFasterDecoderOptions + self.rec_cls = LatticeFasterRecognizer + else: + assert self.nbest == 1, "nbest > 1 requires lattice decoder" + self.dec_cls = FasterDecoder + opt_cls = FasterDecoderOptions + self.rec_cls = FasterRecognizer + + self.decoder_options = opt_cls() + self.decoder_options.beam = beam + self.decoder_options.max_active = cfg.max_active + self.decoder_options.beam_delta = cfg.beam_delta + self.decoder_options.hash_ratio = cfg.hash_ratio + + if cfg.is_lattice: + self.decoder_options.lattice_beam = cfg.lattice_beam + self.decoder_options.prune_interval = cfg.prune_interval + self.decoder_options.determinize_lattice = cfg.determinize_lattice + self.decoder_options.prune_scale = cfg.prune_scale + det_opts = DeterminizeLatticePhonePrunedOptions() + det_opts.max_mem = cfg.max_mem + det_opts.phone_determinize = cfg.phone_determinize + det_opts.word_determinize = cfg.word_determinize + det_opts.minimize = cfg.minimize + self.decoder_options.det_opts = det_opts + + self.output_symbols = {} + with open(cfg.output_dict, "r") as f: + for line in f: + items = line.rstrip().split() + assert len(items) == 2 + self.output_symbols[int(items[1])] = items[0] + + logger.info(f"Loading FST from {cfg.hlg_graph_path}") + self.fst = read_fst_kaldi(cfg.hlg_graph_path) + self.symbol_table = SymbolTable.read_text(cfg.output_dict) + + self.executor = ThreadPoolExecutor(max_workers=cfg.num_threads) + + def generate(self, models, sample, **unused): + """Generate a batch of inferences.""" + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens" + } + emissions, padding = self.get_emissions(models, encoder_input) + return self.decode(emissions, padding) + + def get_emissions(self, models, encoder_input): + """Run encoder and normalize emissions""" + model = models[0] + + all_encoder_out = [m(**encoder_input) for m in models] + + if len(all_encoder_out) > 1: + + if "encoder_out" in all_encoder_out[0]: + encoder_out = { + "encoder_out": sum(e["encoder_out"] for e in all_encoder_out) + / len(all_encoder_out), + "encoder_padding_mask": all_encoder_out[0]["encoder_padding_mask"], + } + padding = encoder_out["encoder_padding_mask"] + else: + encoder_out = { + "logits": sum(e["logits"] for e in all_encoder_out) + / len(all_encoder_out), + "padding_mask": all_encoder_out[0]["padding_mask"], + } + padding = encoder_out["padding_mask"] + else: + encoder_out = all_encoder_out[0] + padding = ( + encoder_out["padding_mask"] + if "padding_mask" in encoder_out + else encoder_out["encoder_padding_mask"] + ) + + if hasattr(model, "get_logits"): + emissions = model.get_logits(encoder_out, normalize=True) + else: + emissions = model.get_normalized_probs(encoder_out, log_probs=True) + + return ( + emissions.cpu().float().transpose(0, 1), + padding.cpu() if padding is not None and padding.any() else None, + ) + + def decode_one(self, logits, padding): + from kaldi.matrix import Matrix + + decoder = self.dec_cls(self.fst, self.decoder_options) + asr = self.rec_cls( + decoder, self.symbol_table, acoustic_scale=self.acoustic_scale + ) + + if padding is not None: + logits = logits[~padding] + + mat = Matrix(logits.numpy()) + + out = asr.decode(mat) + + if self.nbest > 1: + from kaldi.fstext import shortestpath + from kaldi.fstext.utils import ( + convert_compact_lattice_to_lattice, + convert_lattice_to_std, + convert_nbest_to_list, + get_linear_symbol_sequence, + ) + + lat = out["lattice"] + + sp = shortestpath(lat, nshortest=self.nbest) + + sp = convert_compact_lattice_to_lattice(sp) + sp = convert_lattice_to_std(sp) + seq = convert_nbest_to_list(sp) + + results = [] + for s in seq: + _, o, w = get_linear_symbol_sequence(s) + words = list(self.output_symbols[z] for z in o) + results.append( + { + "tokens": words, + "words": words, + "score": w.value, + "emissions": logits, + } + ) + return results + else: + words = out["text"].split() + return [ + { + "tokens": words, + "words": words, + "score": out["likelihood"], + "emissions": logits, + } + ] + + def decode(self, emissions, padding): + if padding is None: + padding = [None] * len(emissions) + + ret = list( + map( + lambda e, p: self.executor.submit(self.decode_one, e, p), + emissions, + padding, + ) + ) + return ret diff --git a/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py b/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py new file mode 100644 index 0000000..6d2a2a4 --- /dev/null +++ b/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py @@ -0,0 +1,698 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +import hydra +from hydra.core.config_store import ConfigStore +import logging +from omegaconf import MISSING, OmegaConf +import os +import os.path as osp +from pathlib import Path +import subprocess +from typing import Optional + +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import FairseqDataclass + +script_dir = Path(__file__).resolve().parent +config_path = script_dir / "config" + + +logger = logging.getLogger(__name__) + + +@dataclass +class KaldiInitializerConfig(FairseqDataclass): + data_dir: str = MISSING + fst_dir: Optional[str] = None + in_labels: str = MISSING + out_labels: Optional[str] = None + wav2letter_lexicon: Optional[str] = None + lm_arpa: str = MISSING + kaldi_root: str = MISSING + blank_symbol: str = "<s>" + silence_symbol: Optional[str] = None + + +def create_units(fst_dir: Path, in_labels: str, vocab: Dictionary) -> Path: + in_units_file = fst_dir / f"kaldi_dict.{in_labels}.txt" + if not in_units_file.exists(): + + logger.info(f"Creating {in_units_file}") + + with open(in_units_file, "w") as f: + print("<eps> 0", file=f) + i = 1 + for symb in vocab.symbols[vocab.nspecial :]: + if not symb.startswith("madeupword"): + print(f"{symb} {i}", file=f) + i += 1 + return in_units_file + + +def create_lexicon( + cfg: KaldiInitializerConfig, + fst_dir: Path, + unique_label: str, + in_units_file: Path, + out_words_file: Path, +) -> (Path, Path): + + disambig_in_units_file = fst_dir / f"kaldi_dict.{cfg.in_labels}_disambig.txt" + lexicon_file = fst_dir / f"kaldi_lexicon.{unique_label}.txt" + disambig_lexicon_file = fst_dir / f"kaldi_lexicon.{unique_label}_disambig.txt" + if ( + not lexicon_file.exists() + or not disambig_lexicon_file.exists() + or not disambig_in_units_file.exists() + ): + logger.info(f"Creating {lexicon_file} (in units file: {in_units_file})") + + assert cfg.wav2letter_lexicon is not None or cfg.in_labels == cfg.out_labels + + if cfg.wav2letter_lexicon is not None: + lm_words = set() + with open(out_words_file, "r") as lm_dict_f: + for line in lm_dict_f: + lm_words.add(line.split()[0]) + + num_skipped = 0 + total = 0 + with open(cfg.wav2letter_lexicon, "r") as w2l_lex_f, open( + lexicon_file, "w" + ) as out_f: + for line in w2l_lex_f: + items = line.rstrip().split("\t") + assert len(items) == 2, items + if items[0] in lm_words: + print(items[0], items[1], file=out_f) + else: + num_skipped += 1 + logger.debug( + f"Skipping word {items[0]} as it was not found in LM" + ) + total += 1 + if num_skipped > 0: + logger.warning( + f"Skipped {num_skipped} out of {total} words as they were not found in LM" + ) + else: + with open(in_units_file, "r") as in_f, open(lexicon_file, "w") as out_f: + for line in in_f: + symb = line.split()[0] + if symb != "<eps>" and symb != "<ctc_blank>" and symb != "<SIL>": + print(symb, symb, file=out_f) + + lex_disambig_path = ( + Path(cfg.kaldi_root) / "egs/wsj/s5/utils/add_lex_disambig.pl" + ) + res = subprocess.run( + [lex_disambig_path, lexicon_file, disambig_lexicon_file], + check=True, + capture_output=True, + ) + ndisambig = int(res.stdout) + disamib_path = Path(cfg.kaldi_root) / "egs/wsj/s5/utils/add_disambig.pl" + res = subprocess.run( + [disamib_path, "--include-zero", in_units_file, str(ndisambig)], + check=True, + capture_output=True, + ) + with open(disambig_in_units_file, "wb") as f: + f.write(res.stdout) + + return disambig_lexicon_file, disambig_in_units_file + + +def create_G( + kaldi_root: Path, fst_dir: Path, lm_arpa: Path, arpa_base: str +) -> (Path, Path): + + out_words_file = fst_dir / f"kaldi_dict.{arpa_base}.txt" + grammar_graph = fst_dir / f"G_{arpa_base}.fst" + if not grammar_graph.exists() or not out_words_file.exists(): + logger.info(f"Creating {grammar_graph}") + arpa2fst = kaldi_root / "src/lmbin/arpa2fst" + subprocess.run( + [ + arpa2fst, + "--disambig-symbol=#0", + f"--write-symbol-table={out_words_file}", + lm_arpa, + grammar_graph, + ], + check=True, + ) + return grammar_graph, out_words_file + + +def create_L( + kaldi_root: Path, + fst_dir: Path, + unique_label: str, + lexicon_file: Path, + in_units_file: Path, + out_words_file: Path, +) -> Path: + lexicon_graph = fst_dir / f"L.{unique_label}.fst" + + if not lexicon_graph.exists(): + logger.info(f"Creating {lexicon_graph} (in units: {in_units_file})") + make_lex = kaldi_root / "egs/wsj/s5/utils/make_lexicon_fst.pl" + fstcompile = kaldi_root / "tools/openfst-1.6.7/bin/fstcompile" + fstaddselfloops = kaldi_root / "src/fstbin/fstaddselfloops" + fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort" + + def write_disambig_symbol(file): + with open(file, "r") as f: + for line in f: + items = line.rstrip().split() + if items[0] == "#0": + out_path = str(file) + "_disamig" + with open(out_path, "w") as out_f: + print(items[1], file=out_f) + return out_path + + return None + + in_disambig_sym = write_disambig_symbol(in_units_file) + assert in_disambig_sym is not None + out_disambig_sym = write_disambig_symbol(out_words_file) + assert out_disambig_sym is not None + + try: + with open(lexicon_graph, "wb") as out_f: + res = subprocess.run( + [make_lex, lexicon_file], capture_output=True, check=True + ) + assert len(res.stderr) == 0, res.stderr.decode("utf-8") + res = subprocess.run( + [ + fstcompile, + f"--isymbols={in_units_file}", + f"--osymbols={out_words_file}", + "--keep_isymbols=false", + "--keep_osymbols=false", + ], + input=res.stdout, + capture_output=True, + ) + assert len(res.stderr) == 0, res.stderr.decode("utf-8") + res = subprocess.run( + [fstaddselfloops, in_disambig_sym, out_disambig_sym], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstarcsort, "--sort_type=olabel"], + input=res.stdout, + capture_output=True, + check=True, + ) + out_f.write(res.stdout) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + os.remove(lexicon_graph) + raise + except AssertionError: + os.remove(lexicon_graph) + raise + + return lexicon_graph + + +def create_LG( + kaldi_root: Path, + fst_dir: Path, + unique_label: str, + lexicon_graph: Path, + grammar_graph: Path, +) -> Path: + lg_graph = fst_dir / f"LG.{unique_label}.fst" + + if not lg_graph.exists(): + logger.info(f"Creating {lg_graph}") + + fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose" + fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar" + fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded" + fstpushspecial = kaldi_root / "src/fstbin/fstpushspecial" + fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort" + + try: + with open(lg_graph, "wb") as out_f: + res = subprocess.run( + [fsttablecompose, lexicon_graph, grammar_graph], + capture_output=True, + check=True, + ) + res = subprocess.run( + [ + fstdeterminizestar, + "--use-log=true", + ], + input=res.stdout, + capture_output=True, + ) + res = subprocess.run( + [fstminimizeencoded], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstpushspecial], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstarcsort, "--sort_type=ilabel"], + input=res.stdout, + capture_output=True, + check=True, + ) + out_f.write(res.stdout) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + os.remove(lg_graph) + raise + + return lg_graph + + +def create_H( + kaldi_root: Path, + fst_dir: Path, + disambig_out_units_file: Path, + in_labels: str, + vocab: Dictionary, + blk_sym: str, + silence_symbol: Optional[str], +) -> (Path, Path, Path): + h_graph = ( + fst_dir / f"H.{in_labels}{'_' + silence_symbol if silence_symbol else ''}.fst" + ) + h_out_units_file = fst_dir / f"kaldi_dict.h_out.{in_labels}.txt" + disambig_in_units_file_int = Path(str(h_graph) + "isym_disambig.int") + disambig_out_units_file_int = Path(str(disambig_out_units_file) + ".int") + if ( + not h_graph.exists() + or not h_out_units_file.exists() + or not disambig_in_units_file_int.exists() + ): + logger.info(f"Creating {h_graph}") + eps_sym = "<eps>" + + num_disambig = 0 + osymbols = [] + + with open(disambig_out_units_file, "r") as f, open( + disambig_out_units_file_int, "w" + ) as out_f: + for line in f: + symb, id = line.rstrip().split() + if line.startswith("#"): + num_disambig += 1 + print(id, file=out_f) + else: + if len(osymbols) == 0: + assert symb == eps_sym, symb + osymbols.append((symb, id)) + + i_idx = 0 + isymbols = [(eps_sym, 0)] + + imap = {} + + for i, s in enumerate(vocab.symbols): + i_idx += 1 + isymbols.append((s, i_idx)) + imap[s] = i_idx + + fst_str = [] + + node_idx = 0 + root_node = node_idx + + special_symbols = [blk_sym] + if silence_symbol is not None: + special_symbols.append(silence_symbol) + + for ss in special_symbols: + fst_str.append("{} {} {} {}".format(root_node, root_node, ss, eps_sym)) + + for symbol, _ in osymbols: + if symbol == eps_sym or symbol.startswith("#"): + continue + + node_idx += 1 + # 1. from root to emitting state + fst_str.append("{} {} {} {}".format(root_node, node_idx, symbol, symbol)) + # 2. from emitting state back to root + fst_str.append("{} {} {} {}".format(node_idx, root_node, eps_sym, eps_sym)) + # 3. from emitting state to optional blank state + pre_node = node_idx + node_idx += 1 + for ss in special_symbols: + fst_str.append("{} {} {} {}".format(pre_node, node_idx, ss, eps_sym)) + # 4. from blank state back to root + fst_str.append("{} {} {} {}".format(node_idx, root_node, eps_sym, eps_sym)) + + fst_str.append("{}".format(root_node)) + + fst_str = "\n".join(fst_str) + h_str = str(h_graph) + isym_file = h_str + ".isym" + + with open(isym_file, "w") as f: + for sym, id in isymbols: + f.write("{} {}\n".format(sym, id)) + + with open(h_out_units_file, "w") as f: + for sym, id in osymbols: + f.write("{} {}\n".format(sym, id)) + + with open(disambig_in_units_file_int, "w") as f: + disam_sym_id = len(isymbols) + for _ in range(num_disambig): + f.write("{}\n".format(disam_sym_id)) + disam_sym_id += 1 + + fstcompile = kaldi_root / "tools/openfst-1.6.7/bin/fstcompile" + fstaddselfloops = kaldi_root / "src/fstbin/fstaddselfloops" + fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort" + + try: + with open(h_graph, "wb") as out_f: + res = subprocess.run( + [ + fstcompile, + f"--isymbols={isym_file}", + f"--osymbols={h_out_units_file}", + "--keep_isymbols=false", + "--keep_osymbols=false", + ], + input=str.encode(fst_str), + capture_output=True, + check=True, + ) + res = subprocess.run( + [ + fstaddselfloops, + disambig_in_units_file_int, + disambig_out_units_file_int, + ], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstarcsort, "--sort_type=olabel"], + input=res.stdout, + capture_output=True, + check=True, + ) + out_f.write(res.stdout) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + os.remove(h_graph) + raise + return h_graph, h_out_units_file, disambig_in_units_file_int + + +def create_HLGa( + kaldi_root: Path, + fst_dir: Path, + unique_label: str, + h_graph: Path, + lg_graph: Path, + disambig_in_words_file_int: Path, +) -> Path: + hlga_graph = fst_dir / f"HLGa.{unique_label}.fst" + + if not hlga_graph.exists(): + logger.info(f"Creating {hlga_graph}") + + fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose" + fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar" + fstrmsymbols = kaldi_root / "src/fstbin/fstrmsymbols" + fstrmepslocal = kaldi_root / "src/fstbin/fstrmepslocal" + fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded" + + try: + with open(hlga_graph, "wb") as out_f: + res = subprocess.run( + [ + fsttablecompose, + h_graph, + lg_graph, + ], + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstdeterminizestar, "--use-log=true"], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstrmsymbols, disambig_in_words_file_int], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstrmepslocal], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstminimizeencoded], + input=res.stdout, + capture_output=True, + check=True, + ) + out_f.write(res.stdout) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + os.remove(hlga_graph) + raise + + return hlga_graph + + +def create_HLa( + kaldi_root: Path, + fst_dir: Path, + unique_label: str, + h_graph: Path, + l_graph: Path, + disambig_in_words_file_int: Path, +) -> Path: + hla_graph = fst_dir / f"HLa.{unique_label}.fst" + + if not hla_graph.exists(): + logger.info(f"Creating {hla_graph}") + + fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose" + fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar" + fstrmsymbols = kaldi_root / "src/fstbin/fstrmsymbols" + fstrmepslocal = kaldi_root / "src/fstbin/fstrmepslocal" + fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded" + + try: + with open(hla_graph, "wb") as out_f: + res = subprocess.run( + [ + fsttablecompose, + h_graph, + l_graph, + ], + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstdeterminizestar, "--use-log=true"], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstrmsymbols, disambig_in_words_file_int], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstrmepslocal], + input=res.stdout, + capture_output=True, + check=True, + ) + res = subprocess.run( + [fstminimizeencoded], + input=res.stdout, + capture_output=True, + check=True, + ) + out_f.write(res.stdout) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + os.remove(hla_graph) + raise + + return hla_graph + + +def create_HLG( + kaldi_root: Path, + fst_dir: Path, + unique_label: str, + hlga_graph: Path, + prefix: str = "HLG", +) -> Path: + hlg_graph = fst_dir / f"{prefix}.{unique_label}.fst" + + if not hlg_graph.exists(): + logger.info(f"Creating {hlg_graph}") + + add_self_loop = script_dir / "add-self-loop-simple" + kaldi_src = kaldi_root / "src" + kaldi_lib = kaldi_src / "lib" + + try: + if not add_self_loop.exists(): + fst_include = kaldi_root / "tools/openfst-1.6.7/include" + add_self_loop_src = script_dir / "add-self-loop-simple.cc" + + subprocess.run( + [ + "c++", + f"-I{kaldi_src}", + f"-I{fst_include}", + f"-L{kaldi_lib}", + add_self_loop_src, + "-lkaldi-base", + "-lkaldi-fstext", + "-o", + add_self_loop, + ], + check=True, + ) + + my_env = os.environ.copy() + my_env["LD_LIBRARY_PATH"] = f"{kaldi_lib}:{my_env['LD_LIBRARY_PATH']}" + + subprocess.run( + [ + add_self_loop, + hlga_graph, + hlg_graph, + ], + check=True, + capture_output=True, + env=my_env, + ) + except subprocess.CalledProcessError as e: + logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}") + raise + + return hlg_graph + + +def initalize_kaldi(cfg: KaldiInitializerConfig) -> Path: + if cfg.fst_dir is None: + cfg.fst_dir = osp.join(cfg.data_dir, "kaldi") + if cfg.out_labels is None: + cfg.out_labels = cfg.in_labels + + kaldi_root = Path(cfg.kaldi_root) + data_dir = Path(cfg.data_dir) + fst_dir = Path(cfg.fst_dir) + fst_dir.mkdir(parents=True, exist_ok=True) + + arpa_base = osp.splitext(osp.basename(cfg.lm_arpa))[0] + unique_label = f"{cfg.in_labels}.{arpa_base}" + + with open(data_dir / f"dict.{cfg.in_labels}.txt", "r") as f: + vocab = Dictionary.load(f) + + in_units_file = create_units(fst_dir, cfg.in_labels, vocab) + + grammar_graph, out_words_file = create_G( + kaldi_root, fst_dir, Path(cfg.lm_arpa), arpa_base + ) + + disambig_lexicon_file, disambig_L_in_units_file = create_lexicon( + cfg, fst_dir, unique_label, in_units_file, out_words_file + ) + + h_graph, h_out_units_file, disambig_in_units_file_int = create_H( + kaldi_root, + fst_dir, + disambig_L_in_units_file, + cfg.in_labels, + vocab, + cfg.blank_symbol, + cfg.silence_symbol, + ) + lexicon_graph = create_L( + kaldi_root, + fst_dir, + unique_label, + disambig_lexicon_file, + disambig_L_in_units_file, + out_words_file, + ) + lg_graph = create_LG( + kaldi_root, fst_dir, unique_label, lexicon_graph, grammar_graph + ) + hlga_graph = create_HLGa( + kaldi_root, fst_dir, unique_label, h_graph, lg_graph, disambig_in_units_file_int + ) + hlg_graph = create_HLG(kaldi_root, fst_dir, unique_label, hlga_graph) + + # for debugging + # hla_graph = create_HLa(kaldi_root, fst_dir, unique_label, h_graph, lexicon_graph, disambig_in_units_file_int) + # hl_graph = create_HLG(kaldi_root, fst_dir, unique_label, hla_graph, prefix="HL_looped") + # create_HLG(kaldi_root, fst_dir, "phnc", h_graph, prefix="H_looped") + + return hlg_graph + + +@hydra.main(config_path=config_path, config_name="kaldi_initializer") +def cli_main(cfg: KaldiInitializerConfig) -> None: + container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + cfg = OmegaConf.create(container) + OmegaConf.set_struct(cfg, True) + initalize_kaldi(cfg) + + +if __name__ == "__main__": + + logging.root.setLevel(logging.INFO) + logging.basicConfig(level=logging.INFO) + + try: + from hydra._internal.utils import ( + get_args, + ) # pylint: disable=import-outside-toplevel + + cfg_name = get_args().config_name or "kaldi_initializer" + except ImportError: + logger.warning("Failed to get config name from hydra args") + cfg_name = "kaldi_initializer" + + cs = ConfigStore.instance() + cs.store(name=cfg_name, node=KaldiInitializerConfig) + + cli_main() diff --git a/fairseq/examples/speech_recognition/models/__init__.py b/fairseq/examples/speech_recognition/models/__init__.py new file mode 100644 index 0000000..54b5a1c --- /dev/null +++ b/fairseq/examples/speech_recognition/models/__init__.py @@ -0,0 +1,8 @@ +import importlib +import os + + +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + model_name = file[: file.find(".py")] + importlib.import_module("examples.speech_recognition.models." + model_name) diff --git a/fairseq/examples/speech_recognition/models/vggtransformer.py b/fairseq/examples/speech_recognition/models/vggtransformer.py new file mode 100644 index 0000000..bca0ae5 --- /dev/null +++ b/fairseq/examples/speech_recognition/models/vggtransformer.py @@ -0,0 +1,1020 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import math +from collections.abc import Iterable + +import torch +import torch.nn as nn +from examples.speech_recognition.data.data_utils import lengths_to_encoder_padding_mask +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqEncoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.modules import ( + LinearizedConvolution, + TransformerDecoderLayer, + TransformerEncoderLayer, + VGGBlock, +) + + +@register_model("asr_vggtransformer") +class VGGTransformerModel(FairseqEncoderDecoderModel): + """ + Transformers with convolutional context for ASR + https://arxiv.org/abs/1904.11660 + """ + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--input-feat-per-channel", + type=int, + metavar="N", + help="encoder input dimension per input channel", + ) + parser.add_argument( + "--vggblock-enc-config", + type=str, + metavar="EXPR", + help=""" + an array of tuples each containing the configuration of one vggblock: + [(out_channels, + conv_kernel_size, + pooling_kernel_size, + num_conv_layers, + use_layer_norm), ...]) + """, + ) + parser.add_argument( + "--transformer-enc-config", + type=str, + metavar="EXPR", + help="""" + a tuple containing the configuration of the encoder transformer layers + configurations: + [(input_dim, + num_heads, + ffn_dim, + normalize_before, + dropout, + attention_dropout, + relu_dropout), ...]') + """, + ) + parser.add_argument( + "--enc-output-dim", + type=int, + metavar="N", + help=""" + encoder output dimension, can be None. If specified, projecting the + transformer output to the specified dimension""", + ) + parser.add_argument( + "--in-channels", + type=int, + metavar="N", + help="number of encoder input channels", + ) + parser.add_argument( + "--tgt-embed-dim", + type=int, + metavar="N", + help="embedding dimension of the decoder target tokens", + ) + parser.add_argument( + "--transformer-dec-config", + type=str, + metavar="EXPR", + help=""" + a tuple containing the configuration of the decoder transformer layers + configurations: + [(input_dim, + num_heads, + ffn_dim, + normalize_before, + dropout, + attention_dropout, + relu_dropout), ...] + """, + ) + parser.add_argument( + "--conv-dec-config", + type=str, + metavar="EXPR", + help=""" + an array of tuples for the decoder 1-D convolution config + [(out_channels, conv_kernel_size, use_layer_norm), ...]""", + ) + + @classmethod + def build_encoder(cls, args, task): + return VGGTransformerEncoder( + input_feat_per_channel=args.input_feat_per_channel, + vggblock_config=eval(args.vggblock_enc_config), + transformer_config=eval(args.transformer_enc_config), + encoder_output_dim=args.enc_output_dim, + in_channels=args.in_channels, + ) + + @classmethod + def build_decoder(cls, args, task): + return TransformerDecoder( + dictionary=task.target_dictionary, + embed_dim=args.tgt_embed_dim, + transformer_config=eval(args.transformer_dec_config), + conv_config=eval(args.conv_dec_config), + encoder_output_dim=args.enc_output_dim, + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted + # (in case there are any new ones) + base_architecture(args) + + encoder = cls.build_encoder(args, task) + decoder = cls.build_decoder(args, task) + return cls(encoder, decoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = super().get_normalized_probs(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + +DEFAULT_ENC_VGGBLOCK_CONFIG = ((32, 3, 2, 2, False),) * 2 +DEFAULT_ENC_TRANSFORMER_CONFIG = ((256, 4, 1024, True, 0.2, 0.2, 0.2),) * 2 +# 256: embedding dimension +# 4: number of heads +# 1024: FFN +# True: apply layerNorm before (dropout + resiaul) instead of after +# 0.2 (dropout): dropout after MultiheadAttention and second FC +# 0.2 (attention_dropout): dropout in MultiheadAttention +# 0.2 (relu_dropout): dropout after ReLu +DEFAULT_DEC_TRANSFORMER_CONFIG = ((256, 2, 1024, True, 0.2, 0.2, 0.2),) * 2 +DEFAULT_DEC_CONV_CONFIG = ((256, 3, True),) * 2 + + +# TODO: repace transformer encoder config from one liner +# to explicit args to get rid of this transformation +def prepare_transformer_encoder_params( + input_dim, + num_heads, + ffn_dim, + normalize_before, + dropout, + attention_dropout, + relu_dropout, +): + args = argparse.Namespace() + args.encoder_embed_dim = input_dim + args.encoder_attention_heads = num_heads + args.attention_dropout = attention_dropout + args.dropout = dropout + args.activation_dropout = relu_dropout + args.encoder_normalize_before = normalize_before + args.encoder_ffn_embed_dim = ffn_dim + return args + + +def prepare_transformer_decoder_params( + input_dim, + num_heads, + ffn_dim, + normalize_before, + dropout, + attention_dropout, + relu_dropout, +): + args = argparse.Namespace() + args.encoder_embed_dim = None + args.decoder_embed_dim = input_dim + args.decoder_attention_heads = num_heads + args.attention_dropout = attention_dropout + args.dropout = dropout + args.activation_dropout = relu_dropout + args.decoder_normalize_before = normalize_before + args.decoder_ffn_embed_dim = ffn_dim + return args + + +class VGGTransformerEncoder(FairseqEncoder): + """VGG + Transformer encoder""" + + def __init__( + self, + input_feat_per_channel, + vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG, + transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG, + encoder_output_dim=512, + in_channels=1, + transformer_context=None, + transformer_sampling=None, + ): + """constructor for VGGTransformerEncoder + + Args: + - input_feat_per_channel: feature dim (not including stacked, + just base feature) + - in_channel: # input channels (e.g., if stack 8 feature vector + together, this is 8) + - vggblock_config: configuration of vggblock, see comments on + DEFAULT_ENC_VGGBLOCK_CONFIG + - transformer_config: configuration of transformer layer, see comments + on DEFAULT_ENC_TRANSFORMER_CONFIG + - encoder_output_dim: final transformer output embedding dimension + - transformer_context: (left, right) if set, self-attention will be focused + on (t-left, t+right) + - transformer_sampling: an iterable of int, must match with + len(transformer_config), transformer_sampling[i] indicates sampling + factor for i-th transformer layer, after multihead att and feedfoward + part + """ + super().__init__(None) + + self.num_vggblocks = 0 + if vggblock_config is not None: + if not isinstance(vggblock_config, Iterable): + raise ValueError("vggblock_config is not iterable") + self.num_vggblocks = len(vggblock_config) + + self.conv_layers = nn.ModuleList() + self.in_channels = in_channels + self.input_dim = input_feat_per_channel + self.pooling_kernel_sizes = [] + + if vggblock_config is not None: + for _, config in enumerate(vggblock_config): + ( + out_channels, + conv_kernel_size, + pooling_kernel_size, + num_conv_layers, + layer_norm, + ) = config + self.conv_layers.append( + VGGBlock( + in_channels, + out_channels, + conv_kernel_size, + pooling_kernel_size, + num_conv_layers, + input_dim=input_feat_per_channel, + layer_norm=layer_norm, + ) + ) + self.pooling_kernel_sizes.append(pooling_kernel_size) + in_channels = out_channels + input_feat_per_channel = self.conv_layers[-1].output_dim + + transformer_input_dim = self.infer_conv_output_dim( + self.in_channels, self.input_dim + ) + # transformer_input_dim is the output dimension of VGG part + + self.validate_transformer_config(transformer_config) + self.transformer_context = self.parse_transformer_context(transformer_context) + self.transformer_sampling = self.parse_transformer_sampling( + transformer_sampling, len(transformer_config) + ) + + self.transformer_layers = nn.ModuleList() + + if transformer_input_dim != transformer_config[0][0]: + self.transformer_layers.append( + Linear(transformer_input_dim, transformer_config[0][0]) + ) + self.transformer_layers.append( + TransformerEncoderLayer( + prepare_transformer_encoder_params(*transformer_config[0]) + ) + ) + + for i in range(1, len(transformer_config)): + if transformer_config[i - 1][0] != transformer_config[i][0]: + self.transformer_layers.append( + Linear(transformer_config[i - 1][0], transformer_config[i][0]) + ) + self.transformer_layers.append( + TransformerEncoderLayer( + prepare_transformer_encoder_params(*transformer_config[i]) + ) + ) + + self.encoder_output_dim = encoder_output_dim + self.transformer_layers.extend( + [ + Linear(transformer_config[-1][0], encoder_output_dim), + LayerNorm(encoder_output_dim), + ] + ) + + def forward(self, src_tokens, src_lengths, **kwargs): + """ + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (B,) + """ + bsz, max_seq_len, _ = src_tokens.size() + x = src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim) + x = x.transpose(1, 2).contiguous() + # (B, C, T, feat) + + for layer_idx in range(len(self.conv_layers)): + x = self.conv_layers[layer_idx](x) + + bsz, _, output_seq_len, _ = x.size() + + # (B, C, T, feat) -> (B, T, C, feat) -> (T, B, C, feat) -> (T, B, C * feat) + x = x.transpose(1, 2).transpose(0, 1) + x = x.contiguous().view(output_seq_len, bsz, -1) + + input_lengths = src_lengths.clone() + for s in self.pooling_kernel_sizes: + input_lengths = (input_lengths.float() / s).ceil().long() + + encoder_padding_mask, _ = lengths_to_encoder_padding_mask( + input_lengths, batch_first=True + ) + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + subsampling_factor = int(max_seq_len * 1.0 / output_seq_len + 0.5) + attn_mask = self.lengths_to_attn_mask(input_lengths, subsampling_factor) + + transformer_layer_idx = 0 + + for layer_idx in range(len(self.transformer_layers)): + + if isinstance(self.transformer_layers[layer_idx], TransformerEncoderLayer): + x = self.transformer_layers[layer_idx]( + x, encoder_padding_mask, attn_mask + ) + + if self.transformer_sampling[transformer_layer_idx] != 1: + sampling_factor = self.transformer_sampling[transformer_layer_idx] + x, encoder_padding_mask, attn_mask = self.slice( + x, encoder_padding_mask, attn_mask, sampling_factor + ) + + transformer_layer_idx += 1 + + else: + x = self.transformer_layers[layer_idx](x) + + # encoder_padding_maks is a (T x B) tensor, its [t, b] elements indicate + # whether encoder_output[t, b] is valid or not (valid=0, invalid=1) + + return { + "encoder_out": x, # (T, B, C) + "encoder_padding_mask": encoder_padding_mask.t() + if encoder_padding_mask is not None + else None, + # (B, T) --> (T, B) + } + + def infer_conv_output_dim(self, in_channels, input_dim): + sample_seq_len = 200 + sample_bsz = 10 + x = torch.randn(sample_bsz, in_channels, sample_seq_len, input_dim) + for i, _ in enumerate(self.conv_layers): + x = self.conv_layers[i](x) + x = x.transpose(1, 2) + mb, seq = x.size()[:2] + return x.contiguous().view(mb, seq, -1).size(-1) + + def validate_transformer_config(self, transformer_config): + for config in transformer_config: + input_dim, num_heads = config[:2] + if input_dim % num_heads != 0: + msg = ( + "ERROR in transformer config {}: ".format(config) + + "input dimension {} ".format(input_dim) + + "not dividable by number of heads {}".format(num_heads) + ) + raise ValueError(msg) + + def parse_transformer_context(self, transformer_context): + """ + transformer_context can be the following: + - None; indicates no context is used, i.e., + transformer can access full context + - a tuple/list of two int; indicates left and right context, + any number <0 indicates infinite context + * e.g., (5, 6) indicates that for query at x_t, transformer can + access [t-5, t+6] (inclusive) + * e.g., (-1, 6) indicates that for query at x_t, transformer can + access [0, t+6] (inclusive) + """ + if transformer_context is None: + return None + + if not isinstance(transformer_context, Iterable): + raise ValueError("transformer context must be Iterable if it is not None") + + if len(transformer_context) != 2: + raise ValueError("transformer context must have length 2") + + left_context = transformer_context[0] + if left_context < 0: + left_context = None + + right_context = transformer_context[1] + if right_context < 0: + right_context = None + + if left_context is None and right_context is None: + return None + + return (left_context, right_context) + + def parse_transformer_sampling(self, transformer_sampling, num_layers): + """ + parsing transformer sampling configuration + + Args: + - transformer_sampling, accepted input: + * None, indicating no sampling + * an Iterable with int (>0) as element + - num_layers, expected number of transformer layers, must match with + the length of transformer_sampling if it is not None + + Returns: + - A tuple with length num_layers + """ + if transformer_sampling is None: + return (1,) * num_layers + + if not isinstance(transformer_sampling, Iterable): + raise ValueError( + "transformer_sampling must be an iterable if it is not None" + ) + + if len(transformer_sampling) != num_layers: + raise ValueError( + "transformer_sampling {} does not match with the number " + "of layers {}".format(transformer_sampling, num_layers) + ) + + for layer, value in enumerate(transformer_sampling): + if not isinstance(value, int): + raise ValueError("Invalid value in transformer_sampling: ") + if value < 1: + raise ValueError( + "{} layer's subsampling is {}.".format(layer, value) + + " This is not allowed! " + ) + return transformer_sampling + + def slice(self, embedding, padding_mask, attn_mask, sampling_factor): + """ + embedding is a (T, B, D) tensor + padding_mask is a (B, T) tensor or None + attn_mask is a (T, T) tensor or None + """ + embedding = embedding[::sampling_factor, :, :] + if padding_mask is not None: + padding_mask = padding_mask[:, ::sampling_factor] + if attn_mask is not None: + attn_mask = attn_mask[::sampling_factor, ::sampling_factor] + + return embedding, padding_mask, attn_mask + + def lengths_to_attn_mask(self, input_lengths, subsampling_factor=1): + """ + create attention mask according to sequence lengths and transformer + context + + Args: + - input_lengths: (B, )-shape Int/Long tensor; input_lengths[b] is + the length of b-th sequence + - subsampling_factor: int + * Note that the left_context and right_context is specified in + the input frame-level while input to transformer may already + go through subsampling (e.g., the use of striding in vggblock) + we use subsampling_factor to scale the left/right context + + Return: + - a (T, T) binary tensor or None, where T is max(input_lengths) + * if self.transformer_context is None, None + * if left_context is None, + * attn_mask[t, t + right_context + 1:] = 1 + * others = 0 + * if right_context is None, + * attn_mask[t, 0:t - left_context] = 1 + * others = 0 + * elsif + * attn_mask[t, t - left_context: t + right_context + 1] = 0 + * others = 1 + """ + if self.transformer_context is None: + return None + + maxT = torch.max(input_lengths).item() + attn_mask = torch.zeros(maxT, maxT) + + left_context = self.transformer_context[0] + right_context = self.transformer_context[1] + if left_context is not None: + left_context = math.ceil(self.transformer_context[0] / subsampling_factor) + if right_context is not None: + right_context = math.ceil(self.transformer_context[1] / subsampling_factor) + + for t in range(maxT): + if left_context is not None: + st = 0 + en = max(st, t - left_context) + attn_mask[t, st:en] = 1 + if right_context is not None: + st = t + right_context + 1 + st = min(st, maxT - 1) + attn_mask[t, st:] = 1 + + return attn_mask.to(input_lengths.device) + + def reorder_encoder_out(self, encoder_out, new_order): + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(1, new_order) + return encoder_out + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs. + Default: ``False`` + left_pad (bool, optional): whether the input is left-padded. Default: + ``False`` + """ + + def __init__( + self, + dictionary, + embed_dim=512, + transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG, + conv_config=DEFAULT_DEC_CONV_CONFIG, + encoder_output_dim=512, + ): + + super().__init__(dictionary) + vocab_size = len(dictionary) + self.padding_idx = dictionary.pad() + self.embed_tokens = Embedding(vocab_size, embed_dim, self.padding_idx) + + self.conv_layers = nn.ModuleList() + for i in range(len(conv_config)): + out_channels, kernel_size, layer_norm = conv_config[i] + if i == 0: + conv_layer = LinearizedConv1d( + embed_dim, out_channels, kernel_size, padding=kernel_size - 1 + ) + else: + conv_layer = LinearizedConv1d( + conv_config[i - 1][0], + out_channels, + kernel_size, + padding=kernel_size - 1, + ) + self.conv_layers.append(conv_layer) + if layer_norm: + self.conv_layers.append(nn.LayerNorm(out_channels)) + self.conv_layers.append(nn.ReLU()) + + self.layers = nn.ModuleList() + if conv_config[-1][0] != transformer_config[0][0]: + self.layers.append(Linear(conv_config[-1][0], transformer_config[0][0])) + self.layers.append( + TransformerDecoderLayer( + prepare_transformer_decoder_params(*transformer_config[0]) + ) + ) + + for i in range(1, len(transformer_config)): + if transformer_config[i - 1][0] != transformer_config[i][0]: + self.layers.append( + Linear(transformer_config[i - 1][0], transformer_config[i][0]) + ) + self.layers.append( + TransformerDecoderLayer( + prepare_transformer_decoder_params(*transformer_config[i]) + ) + ) + self.fc_out = Linear(transformer_config[-1][0], vocab_size) + + def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for input feeding/teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + Returns: + tuple: + - the last decoder layer's output of shape `(batch, tgt_len, + vocab)` + - the last decoder layer's attention weights of shape `(batch, + tgt_len, src_len)` + """ + target_padding_mask = ( + (prev_output_tokens == self.padding_idx).to(prev_output_tokens.device) + if incremental_state is None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + + # embed tokens + x = self.embed_tokens(prev_output_tokens) + + # B x T x C -> T x B x C + x = self._transpose_if_training(x, incremental_state) + + for layer in self.conv_layers: + if isinstance(layer, LinearizedConvolution): + x = layer(x, incremental_state) + else: + x = layer(x) + + # B x T x C -> T x B x C + x = self._transpose_if_inference(x, incremental_state) + + # decoder layers + for layer in self.layers: + if isinstance(layer, TransformerDecoderLayer): + x, *_ = layer( + x, + (encoder_out["encoder_out"] if encoder_out is not None else None), + ( + encoder_out["encoder_padding_mask"].t() + if encoder_out["encoder_padding_mask"] is not None + else None + ), + incremental_state, + self_attn_mask=( + self.buffered_future_mask(x) + if incremental_state is None + else None + ), + self_attn_padding_mask=( + target_padding_mask if incremental_state is None else None + ), + ) + else: + x = layer(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + x = self.fc_out(x) + + return x, None + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + if self._future_mask.size(0) < dim: + self._future_mask = torch.triu( + utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def _transpose_if_training(self, x, incremental_state): + if incremental_state is None: + x = x.transpose(0, 1) + return x + + def _transpose_if_inference(self, x, incremental_state): + if incremental_state: + x = x.transpose(0, 1) + return x + + +@register_model("asr_vggtransformer_encoder") +class VGGTransformerEncoderModel(FairseqEncoderModel): + def __init__(self, encoder): + super().__init__(encoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--input-feat-per-channel", + type=int, + metavar="N", + help="encoder input dimension per input channel", + ) + parser.add_argument( + "--vggblock-enc-config", + type=str, + metavar="EXPR", + help=""" + an array of tuples each containing the configuration of one vggblock + [(out_channels, conv_kernel_size, pooling_kernel_size,num_conv_layers), ...] + """, + ) + parser.add_argument( + "--transformer-enc-config", + type=str, + metavar="EXPR", + help=""" + a tuple containing the configuration of the Transformer layers + configurations: + [(input_dim, + num_heads, + ffn_dim, + normalize_before, + dropout, + attention_dropout, + relu_dropout), ]""", + ) + parser.add_argument( + "--enc-output-dim", + type=int, + metavar="N", + help="encoder output dimension, projecting the LSTM output", + ) + parser.add_argument( + "--in-channels", + type=int, + metavar="N", + help="number of encoder input channels", + ) + parser.add_argument( + "--transformer-context", + type=str, + metavar="EXPR", + help=""" + either None or a tuple of two ints, indicating left/right context a + transformer can have access to""", + ) + parser.add_argument( + "--transformer-sampling", + type=str, + metavar="EXPR", + help=""" + either None or a tuple of ints, indicating sampling factor in each layer""", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + base_architecture_enconly(args) + encoder = VGGTransformerEncoderOnly( + vocab_size=len(task.target_dictionary), + input_feat_per_channel=args.input_feat_per_channel, + vggblock_config=eval(args.vggblock_enc_config), + transformer_config=eval(args.transformer_enc_config), + encoder_output_dim=args.enc_output_dim, + in_channels=args.in_channels, + transformer_context=eval(args.transformer_context), + transformer_sampling=eval(args.transformer_sampling), + ) + return cls(encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (T, B, D) tensor + lprobs = super().get_normalized_probs(net_output, log_probs, sample) + # lprobs is a (T, B, D) tensor + # we need to transoose to get (B, T, D) tensor + lprobs = lprobs.transpose(0, 1).contiguous() + lprobs.batch_first = True + return lprobs + + +class VGGTransformerEncoderOnly(VGGTransformerEncoder): + def __init__( + self, + vocab_size, + input_feat_per_channel, + vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG, + transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG, + encoder_output_dim=512, + in_channels=1, + transformer_context=None, + transformer_sampling=None, + ): + super().__init__( + input_feat_per_channel=input_feat_per_channel, + vggblock_config=vggblock_config, + transformer_config=transformer_config, + encoder_output_dim=encoder_output_dim, + in_channels=in_channels, + transformer_context=transformer_context, + transformer_sampling=transformer_sampling, + ) + self.fc_out = Linear(self.encoder_output_dim, vocab_size) + + def forward(self, src_tokens, src_lengths, **kwargs): + """ + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (B,) + """ + + enc_out = super().forward(src_tokens, src_lengths) + x = self.fc_out(enc_out["encoder_out"]) + # x = F.log_softmax(x, dim=-1) + # Note: no need this line, because model.get_normalized_prob will call + # log_softmax + return { + "encoder_out": x, # (T, B, C) + "encoder_padding_mask": enc_out["encoder_padding_mask"], # (T, B) + } + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return (1e6, 1e6) # an arbitrary large number + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + # nn.init.uniform_(m.weight, -0.1, 0.1) + # nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True, dropout=0): + """Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features, bias=bias) + # m.weight.data.uniform_(-0.1, 0.1) + # if bias: + # m.bias.data.uniform_(-0.1, 0.1) + return m + + +def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0, **kwargs): + """Weight-normalized Conv1d layer optimized for decoding""" + m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs) + std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels)) + nn.init.normal_(m.weight, mean=0, std=std) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m, dim=2) + + +def LayerNorm(embedding_dim): + m = nn.LayerNorm(embedding_dim) + return m + + +# seq2seq models +def base_architecture(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 40) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", DEFAULT_ENC_VGGBLOCK_CONFIG + ) + args.transformer_enc_config = getattr( + args, "transformer_enc_config", DEFAULT_ENC_TRANSFORMER_CONFIG + ) + args.enc_output_dim = getattr(args, "enc_output_dim", 512) + args.in_channels = getattr(args, "in_channels", 1) + args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 128) + args.transformer_dec_config = getattr( + args, "transformer_dec_config", DEFAULT_ENC_TRANSFORMER_CONFIG + ) + args.conv_dec_config = getattr(args, "conv_dec_config", DEFAULT_DEC_CONV_CONFIG) + args.transformer_context = getattr(args, "transformer_context", "None") + + +@register_model_architecture("asr_vggtransformer", "vggtransformer_1") +def vggtransformer_1(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]" + ) + args.transformer_enc_config = getattr( + args, + "transformer_enc_config", + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 14", + ) + args.enc_output_dim = getattr(args, "enc_output_dim", 1024) + args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 128) + args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4") + args.transformer_dec_config = getattr( + args, + "transformer_dec_config", + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 4", + ) + + +@register_model_architecture("asr_vggtransformer", "vggtransformer_2") +def vggtransformer_2(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]" + ) + args.transformer_enc_config = getattr( + args, + "transformer_enc_config", + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 16", + ) + args.enc_output_dim = getattr(args, "enc_output_dim", 1024) + args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 512) + args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4") + args.transformer_dec_config = getattr( + args, + "transformer_dec_config", + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 6", + ) + + +@register_model_architecture("asr_vggtransformer", "vggtransformer_base") +def vggtransformer_base(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]" + ) + args.transformer_enc_config = getattr( + args, "transformer_enc_config", "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 12" + ) + + args.enc_output_dim = getattr(args, "enc_output_dim", 512) + args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 512) + args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4") + args.transformer_dec_config = getattr( + args, "transformer_dec_config", "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 6" + ) + # Size estimations: + # Encoder: + # - vggblock param: 64*1*3*3 + 64*64*3*3 + 128*64*3*3 + 128*128*3 = 258K + # Transformer: + # - input dimension adapter: 2560 x 512 -> 1.31M + # - transformer_layers (x12) --> 37.74M + # * MultiheadAttention: 512*512*3 (in_proj) + 512*512 (out_proj) = 1.048M + # * FFN weight: 512*2048*2 = 2.097M + # - output dimension adapter: 512 x 512 -> 0.26 M + # Decoder: + # - LinearizedConv1d: 512 * 256 * 3 + 256 * 256 * 3 * 3 + # - transformer_layer: (x6) --> 25.16M + # * MultiheadAttention (self-attention): 512*512*3 + 512*512 = 1.048M + # * MultiheadAttention (encoder-attention): 512*512*3 + 512*512 = 1.048M + # * FFN: 512*2048*2 = 2.097M + # Final FC: + # - FC: 512*5000 = 256K (assuming vocab size 5K) + # In total: + # ~65 M + + +# CTC models +def base_architecture_enconly(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 40) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", "[(32, 3, 2, 2, True)] * 2" + ) + args.transformer_enc_config = getattr( + args, "transformer_enc_config", "((256, 4, 1024, True, 0.2, 0.2, 0.2),) * 2" + ) + args.enc_output_dim = getattr(args, "enc_output_dim", 512) + args.in_channels = getattr(args, "in_channels", 1) + args.transformer_context = getattr(args, "transformer_context", "None") + args.transformer_sampling = getattr(args, "transformer_sampling", "None") + + +@register_model_architecture("asr_vggtransformer_encoder", "vggtransformer_enc_1") +def vggtransformer_enc_1(args): + # vggtransformer_1 is the same as vggtransformer_enc_big, except the number + # of layers is increased to 16 + # keep it here for backward compatiablity purpose + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.vggblock_enc_config = getattr( + args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]" + ) + args.transformer_enc_config = getattr( + args, + "transformer_enc_config", + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 16", + ) + args.enc_output_dim = getattr(args, "enc_output_dim", 1024) diff --git a/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py b/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py new file mode 100644 index 0000000..655a9b0 --- /dev/null +++ b/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + register_model, + register_model_architecture, +) +from fairseq.modules.fairseq_dropout import FairseqDropout + + +default_conv_enc_config = """[ + (400, 13, 170, 0.2), + (440, 14, 0, 0.214), + (484, 15, 0, 0.22898), + (532, 16, 0, 0.2450086), + (584, 17, 0, 0.262159202), + (642, 18, 0, 0.28051034614), + (706, 19, 0, 0.30014607037), + (776, 20, 0, 0.321156295296), + (852, 21, 0, 0.343637235966), + (936, 22, 0, 0.367691842484), + (1028, 23, 0, 0.393430271458), + (1130, 24, 0, 0.42097039046), + (1242, 25, 0, 0.450438317792), + (1366, 26, 0, 0.481969000038), + (1502, 27, 0, 0.51570683004), + (1652, 28, 0, 0.551806308143), + (1816, 29, 0, 0.590432749713), +]""" + + +@register_model("asr_w2l_conv_glu_encoder") +class W2lConvGluEncoderModel(FairseqEncoderModel): + def __init__(self, encoder): + super().__init__(encoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--input-feat-per-channel", + type=int, + metavar="N", + help="encoder input dimension per input channel", + ) + parser.add_argument( + "--in-channels", + type=int, + metavar="N", + help="number of encoder input channels", + ) + parser.add_argument( + "--conv-enc-config", + type=str, + metavar="EXPR", + help=""" + an array of tuples each containing the configuration of one conv layer + [(out_channels, kernel_size, padding, dropout), ...] + """, + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + conv_enc_config = getattr(args, "conv_enc_config", default_conv_enc_config) + encoder = W2lConvGluEncoder( + vocab_size=len(task.target_dictionary), + input_feat_per_channel=args.input_feat_per_channel, + in_channels=args.in_channels, + conv_enc_config=eval(conv_enc_config), + ) + return cls(encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + lprobs = super().get_normalized_probs(net_output, log_probs, sample) + lprobs.batch_first = False + return lprobs + + +class W2lConvGluEncoder(FairseqEncoder): + def __init__( + self, vocab_size, input_feat_per_channel, in_channels, conv_enc_config + ): + super().__init__(None) + + self.input_dim = input_feat_per_channel + if in_channels != 1: + raise ValueError("only 1 input channel is currently supported") + + self.conv_layers = nn.ModuleList() + self.linear_layers = nn.ModuleList() + self.dropouts = [] + cur_channels = input_feat_per_channel + + for out_channels, kernel_size, padding, dropout in conv_enc_config: + layer = nn.Conv1d(cur_channels, out_channels, kernel_size, padding=padding) + layer.weight.data.mul_(math.sqrt(3)) # match wav2letter init + self.conv_layers.append(nn.utils.weight_norm(layer)) + self.dropouts.append( + FairseqDropout(dropout, module_name=self.__class__.__name__) + ) + if out_channels % 2 != 0: + raise ValueError("odd # of out_channels is incompatible with GLU") + cur_channels = out_channels // 2 # halved by GLU + + for out_channels in [2 * cur_channels, vocab_size]: + layer = nn.Linear(cur_channels, out_channels) + layer.weight.data.mul_(math.sqrt(3)) + self.linear_layers.append(nn.utils.weight_norm(layer)) + cur_channels = out_channels // 2 + + def forward(self, src_tokens, src_lengths, **kwargs): + + """ + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (B,) + """ + B, T, _ = src_tokens.size() + x = src_tokens.transpose(1, 2).contiguous() # (B, feat, T) assuming C == 1 + + for layer_idx in range(len(self.conv_layers)): + x = self.conv_layers[layer_idx](x) + x = F.glu(x, dim=1) + x = self.dropouts[layer_idx](x) + + x = x.transpose(1, 2).contiguous() # (B, T, 908) + x = self.linear_layers[0](x) + x = F.glu(x, dim=2) + x = self.dropouts[-1](x) + x = self.linear_layers[1](x) + + assert x.size(0) == B + assert x.size(1) == T + + encoder_out = x.transpose(0, 1) # (T, B, vocab_size) + + # need to debug this -- find a simpler/elegant way in pytorch APIs + encoder_padding_mask = ( + torch.arange(T).view(1, T).expand(B, -1).to(x.device) + >= src_lengths.view(B, 1).expand(-1, T) + ).t() # (B x T) -> (T x B) + + return { + "encoder_out": encoder_out, # (T, B, vocab_size) + "encoder_padding_mask": encoder_padding_mask, # (T, B) + } + + def reorder_encoder_out(self, encoder_out, new_order): + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(1, new_order) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return (1e6, 1e6) # an arbitrary large number + + +@register_model_architecture("asr_w2l_conv_glu_encoder", "w2l_conv_glu_enc") +def w2l_conv_glu_enc(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.in_channels = getattr(args, "in_channels", 1) + args.conv_enc_config = getattr(args, "conv_enc_config", default_conv_enc_config) diff --git a/fairseq/examples/speech_recognition/new/README.md b/fairseq/examples/speech_recognition/new/README.md new file mode 100644 index 0000000..5fa0e97 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/README.md @@ -0,0 +1,43 @@ +# Flashlight Decoder + +This script runs decoding for pre-trained speech recognition models. + +## Usage + +Assuming a few variables: + +```bash +checkpoint=<path-to-checkpoint> +data=<path-to-data-directory> +lm_model=<path-to-language-model> +lexicon=<path-to-lexicon> +``` + +Example usage for decoding a fine-tuned Wav2Vec model: + +```bash +python $FAIRSEQ_ROOT/examples/speech_recognition/new/infer.py --multirun \ + task=audio_pretraining \ + task.data=$data \ + task.labels=ltr \ + common_eval.path=$checkpoint \ + decoding.type=kenlm \ + decoding.lexicon=$lexicon \ + decoding.lmpath=$lm_model \ + dataset.gen_subset=dev_clean,dev_other,test_clean,test_other +``` + +Example usage for using Ax to sweep WER parameters (requires `pip install hydra-ax-sweeper`): + +```bash +python $FAIRSEQ_ROOT/examples/speech_recognition/new/infer.py --multirun \ + hydra/sweeper=ax \ + task=audio_pretraining \ + task.data=$data \ + task.labels=ltr \ + common_eval.path=$checkpoint \ + decoding.type=kenlm \ + decoding.lexicon=$lexicon \ + decoding.lmpath=$lm_model \ + dataset.gen_subset=dev_other +``` diff --git a/fairseq/examples/speech_recognition/new/__init__.py b/fairseq/examples/speech_recognition/new/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml new file mode 100644 index 0000000..38e9c22 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml @@ -0,0 +1,29 @@ +# @package hydra.sweeper +_target_: hydra_plugins.hydra_ax_sweeper.ax_sweeper.AxSweeper +max_batch_size: null +ax_config: + max_trials: 128 + early_stop: + minimize: true + max_epochs_without_improvement: 10 + epsilon: 0.025 + experiment: + name: ${dataset.gen_subset} + objective_name: wer + minimize: true + parameter_constraints: null + outcome_constraints: null + status_quo: null + client: + verbose_logging: false + random_seed: null + params: + decoding.lmweight: + type: range + bounds: [0.0, 5.0] + decoding.wordscore: + type: range + bounds: [-5.0, 5.0] + decoding.silweight: + type: range + bounds: [ -8.0, 0.0 ] diff --git a/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax_sil.yaml b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax_sil.yaml new file mode 100644 index 0000000..eaaebcf --- /dev/null +++ b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax_sil.yaml @@ -0,0 +1,29 @@ +# @package hydra.sweeper +_target_: hydra_plugins.hydra_ax_sweeper.ax_sweeper.AxSweeper +max_batch_size: null +ax_config: + max_trials: 64 + early_stop: + minimize: true + max_epochs_without_improvement: 10 + epsilon: 0.025 + experiment: + name: ${dataset.gen_subset} + objective_name: wer + minimize: true + parameter_constraints: null + outcome_constraints: null + status_quo: null + client: + verbose_logging: false + random_seed: null + params: + decoding.lmweight: + type: range + bounds: [0.0, 10.0] + decoding.wordscore: + type: range + bounds: [-10.0, 10.0] + decoding.silweight: + type: range + bounds: [ -10.0, 0.0 ] diff --git a/fairseq/examples/speech_recognition/new/conf/infer.yaml b/fairseq/examples/speech_recognition/new/conf/infer.yaml new file mode 100644 index 0000000..2d168d0 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/conf/infer.yaml @@ -0,0 +1,27 @@ +# @package _group_ + +defaults: + - task: null + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/${dataset.gen_subset} + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${common_eval.results_path} + subdir: ${dataset.gen_subset} +common: + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +common_eval: + results_path: null + path: null + post_process: letter + quiet: true +dataset: + max_tokens: 3000000 + gen_subset: test +distributed_training: + distributed_world_size: 1 +decoding: + beam: 5 + type: viterbi diff --git a/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_1.yaml b/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_1.yaml new file mode 100644 index 0000000..d0a9b0e --- /dev/null +++ b/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_1.yaml @@ -0,0 +1,28 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - common_eval.path + sweep: + dir: /checkpoint/abaevski/asr/d2v2/decoding/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} +# subdir: ${hydra.job.override_dirname} + launcher: + cpus_per_task: 16 + gpus_per_node: 1 + tasks_per_node: 1 + nodes: 1 + partition: devlab,learnlab + mem_gb: 100 + timeout_min: 2000 + max_num_timeout: 10 + name: ${env:PREFIX}_${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/%j + constraint: volta32gb + exclude: learnfair7598 \ No newline at end of file diff --git a/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_2g.yaml b/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_2g.yaml new file mode 100644 index 0000000..c0c442f --- /dev/null +++ b/fairseq/examples/speech_recognition/new/conf/run_config/fb_slurm_2g.yaml @@ -0,0 +1,27 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - common_eval.path + sweep: + dir: /checkpoint/abaevski/asr/d2v2/decoding/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} +# subdir: ${hydra.job.override_dirname} + launcher: + cpus_per_task: 16 + gpus_per_node: 2 + tasks_per_node: 2 + nodes: 1 + partition: devlab,learnlab + mem_gb: 100 + timeout_min: 2000 + max_num_timeout: 10 + name: ${env:PREFIX}_${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir}/%j + constraint: volta32gb \ No newline at end of file diff --git a/fairseq/examples/speech_recognition/new/decoders/__init__.py b/fairseq/examples/speech_recognition/new/decoders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/speech_recognition/new/decoders/base_decoder.py b/fairseq/examples/speech_recognition/new/decoders/base_decoder.py new file mode 100644 index 0000000..a097969 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/decoders/base_decoder.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools as it +from typing import Any, Dict, List + +import torch +from fairseq.data.dictionary import Dictionary +from fairseq.models.fairseq_model import FairseqModel + + +class BaseDecoder: + def __init__(self, tgt_dict: Dictionary) -> None: + self.tgt_dict = tgt_dict + self.vocab_size = len(tgt_dict) + + self.blank = ( + tgt_dict.index("<ctc_blank>") + if "<ctc_blank>" in tgt_dict.indices + else tgt_dict.bos() + ) + if "<sep>" in tgt_dict.indices: + self.silence = tgt_dict.index("<sep>") + elif "|" in tgt_dict.indices: + self.silence = tgt_dict.index("|") + else: + self.silence = tgt_dict.eos() + + def generate( + self, models: List[FairseqModel], sample: Dict[str, Any], **unused + ) -> List[List[Dict[str, torch.LongTensor]]]: + encoder_input = { + k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens" + } + emissions = self.get_emissions(models, encoder_input) + return self.decode(emissions) + + def get_emissions( + self, + models: List[FairseqModel], + encoder_input: Dict[str, Any], + ) -> torch.FloatTensor: + model = models[0] + encoder_out = model(**encoder_input) + if hasattr(model, "get_logits"): + emissions = model.get_logits(encoder_out) + else: + emissions = model.get_normalized_probs(encoder_out, log_probs=True) + return emissions.transpose(0, 1).float().cpu().contiguous() + + def get_tokens(self, idxs: torch.IntTensor) -> torch.LongTensor: + idxs = (g[0] for g in it.groupby(idxs)) + idxs = filter(lambda x: x != self.blank, idxs) + return torch.LongTensor(list(idxs)) + + def decode( + self, + emissions: torch.FloatTensor, + ) -> List[List[Dict[str, torch.LongTensor]]]: + raise NotImplementedError diff --git a/fairseq/examples/speech_recognition/new/decoders/decoder.py b/fairseq/examples/speech_recognition/new/decoders/decoder.py new file mode 100644 index 0000000..b5bec8c --- /dev/null +++ b/fairseq/examples/speech_recognition/new/decoders/decoder.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Union + +from fairseq.data.dictionary import Dictionary + +from .decoder_config import DecoderConfig, FlashlightDecoderConfig +from .base_decoder import BaseDecoder + + +def Decoder( + cfg: Union[DecoderConfig, FlashlightDecoderConfig], tgt_dict: Dictionary +) -> BaseDecoder: + + if cfg.type == "viterbi": + from .viterbi_decoder import ViterbiDecoder + + return ViterbiDecoder(tgt_dict) + if cfg.type == "kenlm": + from .flashlight_decoder import KenLMDecoder + + return KenLMDecoder(cfg, tgt_dict) + if cfg.type == "fairseqlm": + from .flashlight_decoder import FairseqLMDecoder + + return FairseqLMDecoder(cfg, tgt_dict) + raise NotImplementedError(f"Invalid decoder name: {cfg.name}") diff --git a/fairseq/examples/speech_recognition/new/decoders/decoder_config.py b/fairseq/examples/speech_recognition/new/decoders/decoder_config.py new file mode 100644 index 0000000..659eb94 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/decoders/decoder_config.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import Optional + +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.dataclass.constants import ChoiceEnum +from omegaconf import MISSING + + +DECODER_CHOICES = ChoiceEnum(["viterbi", "kenlm", "fairseqlm"]) + + +@dataclass +class DecoderConfig(FairseqDataclass): + type: DECODER_CHOICES = field( + default="viterbi", + metadata={"help": "The type of decoder to use"}, + ) + + +@dataclass +class FlashlightDecoderConfig(FairseqDataclass): + nbest: int = field( + default=1, + metadata={"help": "Number of decodings to return"}, + ) + unitlm: bool = field( + default=False, + metadata={"help": "If set, use unit language model"}, + ) + lmpath: str = field( + default=MISSING, + metadata={"help": "Language model for KenLM decoder"}, + ) + lexicon: Optional[str] = field( + default=None, + metadata={"help": "Lexicon for Flashlight decoder"}, + ) + beam: int = field( + default=50, + metadata={"help": "Number of beams to use for decoding"}, + ) + beamthreshold: float = field( + default=50.0, + metadata={"help": "Threshold for beam search decoding"}, + ) + beamsizetoken: Optional[int] = field( + default=None, metadata={"help": "Beam size to use"} + ) + wordscore: float = field( + default=-1, + metadata={"help": "Word score for KenLM decoder"}, + ) + unkweight: float = field( + default=-math.inf, + metadata={"help": "Unknown weight for KenLM decoder"}, + ) + silweight: float = field( + default=0, + metadata={"help": "Silence weight for KenLM decoder"}, + ) + lmweight: float = field( + default=2, + metadata={"help": "Weight for LM while interpolating score"}, + ) diff --git a/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py b/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py new file mode 100644 index 0000000..7790fcd --- /dev/null +++ b/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py @@ -0,0 +1,433 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import gc +import os.path as osp +import warnings +from collections import deque, namedtuple +from typing import Any, Dict, Tuple + +import numpy as np +import torch +from fairseq import tasks +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models.fairseq_model import FairseqModel +from fairseq.utils import apply_to_sample +from omegaconf import open_dict, OmegaConf + +from typing import List + +from .decoder_config import FlashlightDecoderConfig +from .base_decoder import BaseDecoder + +try: + from flashlight.lib.text.decoder import ( + LM, + CriterionType, + DecodeResult, + KenLM, + LexiconDecoder, + LexiconDecoderOptions, + LexiconFreeDecoder, + LexiconFreeDecoderOptions, + LMState, + SmearingMode, + Trie, + ) + from flashlight.lib.text.dictionary import create_word_dict, load_words + from flashlight.lib.text.dictionary import Dictionary as flDictionary +except ImportError: + warnings.warn( + "flashlight python bindings are required to use this functionality. " + "Please install from " + "https://github.com/facebookresearch/flashlight/tree/master/bindings/python" + ) + LM = object + LMState = object + + +class KenLMDecoder(BaseDecoder): + def __init__(self, cfg: FlashlightDecoderConfig, tgt_dict: Dictionary) -> None: + super().__init__(tgt_dict) + + self.nbest = cfg.nbest + self.unitlm = cfg.unitlm + + if cfg.lexicon: + self.lexicon = load_words(cfg.lexicon) + self.word_dict = create_word_dict(self.lexicon) + self.unk_word = self.word_dict.get_index("<unk>") + + self.lm = KenLM(cfg.lmpath, self.word_dict) + self.trie = Trie(self.vocab_size, self.silence) + + start_state = self.lm.start(False) + for word, spellings in self.lexicon.items(): + word_idx = self.word_dict.get_index(word) + _, score = self.lm.score(start_state, word_idx) + for spelling in spellings: + spelling_idxs = [tgt_dict.index(token) for token in spelling] + assert ( + tgt_dict.unk() not in spelling_idxs + ), f"{word} {spelling} {spelling_idxs}" + self.trie.insert(spelling_idxs, word_idx, score) + self.trie.smear(SmearingMode.MAX) + + self.decoder_opts = LexiconDecoderOptions( + beam_size=cfg.beam, + beam_size_token=cfg.beamsizetoken or len(tgt_dict), + beam_threshold=cfg.beamthreshold, + lm_weight=cfg.lmweight, + word_score=cfg.wordscore, + unk_score=cfg.unkweight, + sil_score=cfg.silweight, + log_add=False, + criterion_type=CriterionType.CTC, + ) + + self.decoder = LexiconDecoder( + self.decoder_opts, + self.trie, + self.lm, + self.silence, + self.blank, + self.unk_word, + [], + self.unitlm, + ) + else: + assert self.unitlm, "Lexicon-free decoding requires unit LM" + + self.word_dict = flDictionary() + for sym in tgt_dict.symbols: + self.word_dict.add_entry(sym, tgt_dict.index(sym)) + self.lm = KenLM(cfg.lmpath, self.word_dict) + self.decoder_opts = LexiconFreeDecoderOptions( + beam_size=cfg.beam, + beam_size_token=cfg.beamsizetoken or len(tgt_dict), + beam_threshold=cfg.beamthreshold, + lm_weight=cfg.lmweight, + sil_score=cfg.silweight, + log_add=False, + criterion_type=CriterionType.CTC, + ) + self.decoder = LexiconFreeDecoder( + self.decoder_opts, self.lm, self.silence, self.blank, [] + ) + + def get_timesteps(self, token_idxs: List[int]) -> List[int]: + """Returns frame numbers corresponding to every non-blank token. + + Parameters + ---------- + token_idxs : List[int] + IDs of decoded tokens. + + Returns + ------- + List[int] + Frame numbers corresponding to every non-blank token. + """ + timesteps = [] + for i, token_idx in enumerate(token_idxs): + if token_idx == self.blank: + continue + if i == 0 or token_idx != token_idxs[i-1]: + timesteps.append(i) + return timesteps + + def decode( + self, + emissions: torch.FloatTensor, + ) -> List[List[Dict[str, torch.LongTensor]]]: + B, T, N = emissions.size() + hypos = [] + for b in range(B): + emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0) + results = self.decoder.decode(emissions_ptr, T, N) + + nbest_results = results[: self.nbest] + hypos.append( + [ + { + "tokens": self.get_tokens(result.tokens), + "score": result.score, + "timesteps": self.get_timesteps(result.tokens), + "words": [ + self.word_dict.get_entry(x) for x in result.words if x >= 0 + ], + } + for result in nbest_results + ] + ) + return hypos + + +FairseqLMState = namedtuple( + "FairseqLMState", + [ + "prefix", + "incremental_state", + "probs", + ], +) + + +class FairseqLM(LM): + def __init__(self, dictionary: Dictionary, model: FairseqModel) -> None: + super().__init__() + + self.dictionary = dictionary + self.model = model + self.unk = self.dictionary.unk() + + self.save_incremental = False # this currently does not work properly + self.max_cache = 20_000 + + if torch.cuda.is_available(): + model.cuda() + model.eval() + model.make_generation_fast_() + + self.states = {} + self.stateq = deque() + + def start(self, start_with_nothing: bool) -> LMState: + state = LMState() + prefix = torch.LongTensor([[self.dictionary.eos()]]) + incremental_state = {} if self.save_incremental else None + with torch.no_grad(): + res = self.model(prefix.cuda(), incremental_state=incremental_state) + probs = self.model.get_normalized_probs(res, log_probs=True, sample=None) + + if incremental_state is not None: + incremental_state = apply_to_sample(lambda x: x.cpu(), incremental_state) + self.states[state] = FairseqLMState( + prefix.numpy(), incremental_state, probs[0, -1].cpu().numpy() + ) + self.stateq.append(state) + + return state + + def score( + self, + state: LMState, + token_index: int, + no_cache: bool = False, + ) -> Tuple[LMState, int]: + """ + Evaluate language model based on the current lm state and new word + Parameters: + ----------- + state: current lm state + token_index: index of the word + (can be lexicon index then you should store inside LM the + mapping between indices of lexicon and lm, or lm index of a word) + Returns: + -------- + (LMState, float): pair of (new state, score for the current word) + """ + curr_state = self.states[state] + + def trim_cache(targ_size: int) -> None: + while len(self.stateq) > targ_size: + rem_k = self.stateq.popleft() + rem_st = self.states[rem_k] + rem_st = FairseqLMState(rem_st.prefix, None, None) + self.states[rem_k] = rem_st + + if curr_state.probs is None: + new_incremental_state = ( + curr_state.incremental_state.copy() + if curr_state.incremental_state is not None + else None + ) + with torch.no_grad(): + if new_incremental_state is not None: + new_incremental_state = apply_to_sample( + lambda x: x.cuda(), new_incremental_state + ) + elif self.save_incremental: + new_incremental_state = {} + + res = self.model( + torch.from_numpy(curr_state.prefix).cuda(), + incremental_state=new_incremental_state, + ) + probs = self.model.get_normalized_probs( + res, log_probs=True, sample=None + ) + + if new_incremental_state is not None: + new_incremental_state = apply_to_sample( + lambda x: x.cpu(), new_incremental_state + ) + + curr_state = FairseqLMState( + curr_state.prefix, new_incremental_state, probs[0, -1].cpu().numpy() + ) + + if not no_cache: + self.states[state] = curr_state + self.stateq.append(state) + + score = curr_state.probs[token_index].item() + + trim_cache(self.max_cache) + + outstate = state.child(token_index) + if outstate not in self.states and not no_cache: + prefix = np.concatenate( + [curr_state.prefix, torch.LongTensor([[token_index]])], -1 + ) + incr_state = curr_state.incremental_state + + self.states[outstate] = FairseqLMState(prefix, incr_state, None) + + if token_index == self.unk: + score = float("-inf") + + return outstate, score + + def finish(self, state: LMState) -> Tuple[LMState, int]: + """ + Evaluate eos for language model based on the current lm state + Returns: + -------- + (LMState, float): pair of (new state, score for the current word) + """ + return self.score(state, self.dictionary.eos()) + + def empty_cache(self) -> None: + self.states = {} + self.stateq = deque() + gc.collect() + + +class FairseqLMDecoder(BaseDecoder): + def __init__(self, cfg: FlashlightDecoderConfig, tgt_dict: Dictionary) -> None: + super().__init__(tgt_dict) + + self.nbest = cfg.nbest + self.unitlm = cfg.unitlm + + self.lexicon = load_words(cfg.lexicon) if cfg.lexicon else None + self.idx_to_wrd = {} + + checkpoint = torch.load(cfg.lmpath, map_location="cpu") + + if "cfg" in checkpoint and checkpoint["cfg"] is not None: + lm_args = checkpoint["cfg"] + else: + lm_args = convert_namespace_to_omegaconf(checkpoint["args"]) + + if not OmegaConf.is_dict(lm_args): + lm_args = OmegaConf.create(lm_args) + + with open_dict(lm_args.task): + lm_args.task.data = osp.dirname(cfg.lmpath) + + task = tasks.setup_task(lm_args.task) + model = task.build_model(lm_args.model) + model.load_state_dict(checkpoint["model"], strict=False) + + self.trie = Trie(self.vocab_size, self.silence) + + self.word_dict = task.dictionary + self.unk_word = self.word_dict.unk() + self.lm = FairseqLM(self.word_dict, model) + + if self.lexicon: + start_state = self.lm.start(False) + for i, (word, spellings) in enumerate(self.lexicon.items()): + if self.unitlm: + word_idx = i + self.idx_to_wrd[i] = word + score = 0 + else: + word_idx = self.word_dict.index(word) + _, score = self.lm.score(start_state, word_idx, no_cache=True) + + for spelling in spellings: + spelling_idxs = [tgt_dict.index(token) for token in spelling] + assert ( + tgt_dict.unk() not in spelling_idxs + ), f"{spelling} {spelling_idxs}" + self.trie.insert(spelling_idxs, word_idx, score) + self.trie.smear(SmearingMode.MAX) + + self.decoder_opts = LexiconDecoderOptions( + beam_size=cfg.beam, + beam_size_token=cfg.beamsizetoken or len(tgt_dict), + beam_threshold=cfg.beamthreshold, + lm_weight=cfg.lmweight, + word_score=cfg.wordscore, + unk_score=cfg.unkweight, + sil_score=cfg.silweight, + log_add=False, + criterion_type=CriterionType.CTC, + ) + + self.decoder = LexiconDecoder( + self.decoder_opts, + self.trie, + self.lm, + self.silence, + self.blank, + self.unk_word, + [], + self.unitlm, + ) + else: + assert self.unitlm, "Lexicon-free decoding requires unit LM" + + d = {w: [[w]] for w in tgt_dict.symbols} + self.word_dict = create_word_dict(d) + self.lm = KenLM(cfg.lmpath, self.word_dict) + self.decoder_opts = LexiconFreeDecoderOptions( + beam_size=cfg.beam, + beam_size_token=cfg.beamsizetoken or len(tgt_dict), + beam_threshold=cfg.beamthreshold, + lm_weight=cfg.lmweight, + sil_score=cfg.silweight, + log_add=False, + criterion_type=CriterionType.CTC, + ) + self.decoder = LexiconFreeDecoder( + self.decoder_opts, self.lm, self.silence, self.blank, [] + ) + + def decode( + self, + emissions: torch.FloatTensor, + ) -> List[List[Dict[str, torch.LongTensor]]]: + B, T, N = emissions.size() + hypos = [] + + def make_hypo(result: DecodeResult) -> Dict[str, Any]: + hypo = { + "tokens": self.get_tokens(result.tokens), + "score": result.score, + } + if self.lexicon: + hypo["words"] = [ + self.idx_to_wrd[x] if self.unitlm else self.word_dict[x] + for x in result.words + if x >= 0 + ] + return hypo + + for b in range(B): + emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0) + results = self.decoder.decode(emissions_ptr, T, N) + + nbest_results = results[: self.nbest] + hypos.append([make_hypo(result) for result in nbest_results]) + self.lm.empty_cache() + + return hypos diff --git a/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py b/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py new file mode 100644 index 0000000..a35d95e --- /dev/null +++ b/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from typing import List, Dict + +from .base_decoder import BaseDecoder + + +class ViterbiDecoder(BaseDecoder): + def decode( + self, + emissions: torch.FloatTensor, + ) -> List[List[Dict[str, torch.LongTensor]]]: + def get_pred(e): + score = e.log_softmax(dim=-1).max(dim=-1)[0].sum() + toks = e.argmax(dim=-1).unique_consecutive() + return {"tokens":toks[toks != self.blank], "score":score} + return [[get_pred(x)] for x in emissions] diff --git a/fairseq/examples/speech_recognition/new/infer.py b/fairseq/examples/speech_recognition/new/infer.py new file mode 100644 index 0000000..ca5cea4 --- /dev/null +++ b/fairseq/examples/speech_recognition/new/infer.py @@ -0,0 +1,502 @@ +#!/usr/bin/env python -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import hashlib +import logging +import os +import shutil +import sys +import re +from dataclasses import dataclass, field, is_dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +import editdistance +import torch +import torch.distributed as dist +from examples.speech_recognition.new.decoders.decoder_config import ( + DecoderConfig, + FlashlightDecoderConfig, +) +from examples.speech_recognition.new.decoders.decoder import Decoder +from fairseq import checkpoint_utils, distributed_utils, progress_bar, tasks, utils +from fairseq.data.data_utils import post_process +from fairseq.dataclass.configs import ( + CheckpointConfig, + CommonConfig, + CommonEvalConfig, + DatasetConfig, + DistributedTrainingConfig, + FairseqDataclass, +) +from fairseq.logging.meters import StopwatchMeter, TimeMeter +from fairseq.logging.progress_bar import BaseProgressBar +from fairseq.models.fairseq_model import FairseqModel +from omegaconf import OmegaConf + +import hydra +from hydra.core.config_store import ConfigStore + +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +config_path = Path(__file__).resolve().parent / "conf" + + +@dataclass +class DecodingConfig(DecoderConfig, FlashlightDecoderConfig): + unique_wer_file: bool = field( + default=False, + metadata={"help": "If set, use a unique file for storing WER"}, + ) + results_path: Optional[str] = field( + default=None, + metadata={ + "help": "If set, write hypothesis and reference sentences into this directory" + }, + ) + + +@dataclass +class InferConfig(FairseqDataclass): + task: Any = None + decoding: DecodingConfig = DecodingConfig() + common: CommonConfig = CommonConfig() + common_eval: CommonEvalConfig = CommonEvalConfig() + checkpoint: CheckpointConfig = CheckpointConfig() + distributed_training: DistributedTrainingConfig = DistributedTrainingConfig() + dataset: DatasetConfig = DatasetConfig() + is_ax: bool = field( + default=False, + metadata={ + "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume" + }, + ) + + +def reset_logging(): + root = logging.getLogger() + for handler in root.handlers: + root.removeHandler(handler) + root.setLevel(os.environ.get("LOGLEVEL", "INFO").upper()) + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter( + logging.Formatter( + fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + ) + root.addHandler(handler) + + +class InferenceProcessor: + cfg: InferConfig + + def __init__(self, cfg: InferConfig) -> None: + self.cfg = cfg + self.task = tasks.setup_task(cfg.task) + + models, saved_cfg = self.load_model_ensemble() + + ### LOAD ADAPTER #### + ckpt_obj = checkpoint_utils.load_checkpoint_to_cpu(self.cfg.common_eval.path) + if "adapter" in ckpt_obj: + target_lang = self.cfg.dataset.gen_subset.split(":")[0] + assert target_lang in ckpt_obj["adapter"] + + logger.info(f">>> LOADING ADAPTER: {target_lang}") + ft_obj = ckpt_obj["adapter"][target_lang] + ft_model = ft_obj["model"] + cdevice = models[0].w2v_encoder.proj.weight.device + cdtype = models[0].w2v_encoder.proj.weight.dtype + ft_proj_out, ft_proj_in = ft_model["w2v_encoder.proj.weight"].shape + ft_proj = torch.nn.Linear(ft_proj_in, ft_proj_out, bias=True) + ft_proj.to(device=cdevice, dtype=cdtype) + models[0].w2v_encoder.proj = ft_proj + with torch.no_grad(): + for kk, vv in models[0].named_parameters(): + if kk in ft_model: + vv.copy_(ft_model[kk]) + self.task.load_state_dict(ft_obj["task_state"]) + # overwrite gen_subset with master config + self.cfg.dataset.gen_subset = re.sub('^[\w-]+:', saved_cfg['task']['multi_corpus_keys']+":", self.cfg.dataset.gen_subset) + self.models = models + self.saved_cfg = saved_cfg + self.tgt_dict = self.task.target_dictionary + + self.task.load_dataset( + self.cfg.dataset.gen_subset, + task_cfg=saved_cfg.task, + ) + self.generator = Decoder(cfg.decoding, self.tgt_dict) + self.gen_timer = StopwatchMeter() + self.wps_meter = TimeMeter() + self.num_sentences = 0 + self.total_errors = 0 + self.total_length = 0 + + self.hypo_words_file = None + self.hypo_units_file = None + self.ref_words_file = None + self.ref_units_file = None + self.score_file = None + + self.progress_bar = self.build_progress_bar() + + def __enter__(self) -> "InferenceProcessor": + if self.cfg.decoding.results_path is not None: + self.hypo_words_file = self.get_res_file("hypo.word") + self.hypo_units_file = self.get_res_file("hypo.units") + self.ref_words_file = self.get_res_file("ref.word") + self.ref_units_file = self.get_res_file("ref.units") + self.score_file = self.get_res_file("asr_score") + return self + + def __exit__(self, *exc) -> bool: + if self.cfg.decoding.results_path is not None: + self.hypo_words_file.close() + self.hypo_units_file.close() + self.ref_words_file.close() + self.ref_units_file.close() + self.score_file.close() + return False + + def __iter__(self) -> Any: + for sample in self.progress_bar: + if not self.cfg.common.cpu: + sample = utils.move_to_cuda(sample) + + # Happens on the last batch. + if "net_input" not in sample: + continue + yield sample + + def log(self, *args, **kwargs): + self.progress_bar.log(*args, **kwargs) + + def print(self, *args, **kwargs): + self.progress_bar.print(*args, **kwargs) + + def get_res_file(self, fname: str) -> None: + fname = os.path.join(self.cfg.decoding.results_path, fname) + if self.data_parallel_world_size > 1: + fname = f"{fname}.{self.data_parallel_rank}" + return open(fname, "w", buffering=1) + + def merge_shards(self) -> None: + """Merges all shard files into shard 0, then removes shard suffix.""" + + shard_id = self.data_parallel_rank + num_shards = self.data_parallel_world_size + + if self.data_parallel_world_size > 1: + + def merge_shards_with_root(fname: str) -> None: + fname = os.path.join(self.cfg.decoding.results_path, fname) + logger.info("Merging %s on shard %d", fname, shard_id) + base_fpath = Path(f"{fname}.0") + with open(base_fpath, "a") as out_file: + for s in range(1, num_shards): + shard_fpath = Path(f"{fname}.{s}") + with open(shard_fpath, "r") as in_file: + for line in in_file: + out_file.write(line) + shard_fpath.unlink() + shutil.move(f"{fname}.0", fname) + + dist.barrier() # ensure all shards finished writing + if shard_id == (0 % num_shards): + merge_shards_with_root("hypo.word") + if shard_id == (1 % num_shards): + merge_shards_with_root("hypo.units") + if shard_id == (2 % num_shards): + merge_shards_with_root("ref.word") + if shard_id == (3 % num_shards): + merge_shards_with_root("ref.units") + dist.barrier() + + def optimize_model(self, model: FairseqModel) -> None: + model.make_generation_fast_() + if self.cfg.common.fp16: + model.half() + if not self.cfg.common.cpu: + model.cuda() + + def load_model_ensemble(self) -> Tuple[List[FairseqModel], FairseqDataclass]: + arg_overrides = ast.literal_eval(self.cfg.common_eval.model_overrides) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(self.cfg.common_eval.path, separator="\\"), + arg_overrides=arg_overrides, + task=self.task, + suffix=self.cfg.checkpoint.checkpoint_suffix, + strict=(self.cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=self.cfg.checkpoint.checkpoint_shard_count, + ) + for model in models: + self.optimize_model(model) + return models, saved_cfg + + def get_dataset_itr(self, disable_iterator_cache: bool = False) -> None: + return self.task.get_batch_iterator( + dataset=self.task.dataset(self.cfg.dataset.gen_subset), + max_tokens=self.cfg.dataset.max_tokens, + max_sentences=self.cfg.dataset.batch_size, + max_positions=(sys.maxsize, sys.maxsize), + ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple, + seed=self.cfg.common.seed, + num_shards=self.data_parallel_world_size, + shard_id=self.data_parallel_rank, + num_workers=self.cfg.dataset.num_workers, + data_buffer_size=self.cfg.dataset.data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + ).next_epoch_itr(shuffle=False) + + def build_progress_bar( + self, + epoch: Optional[int] = None, + prefix: Optional[str] = None, + default_log_format: str = "tqdm", + ) -> BaseProgressBar: + return progress_bar.progress_bar( + iterator=self.get_dataset_itr(), + log_format=self.cfg.common.log_format, + log_interval=self.cfg.common.log_interval, + epoch=epoch, + prefix=prefix, + tensorboard_logdir=self.cfg.common.tensorboard_logdir, + default_log_format=default_log_format, + ) + + @property + def data_parallel_world_size(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 1 + return distributed_utils.get_data_parallel_world_size() + + @property + def data_parallel_rank(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 0 + return distributed_utils.get_data_parallel_rank() + + def process_sentence( + self, + sample: Dict[str, Any], + hypo: Dict[str, Any], + sid: int, + batch_id: int, + ) -> Tuple[int, int]: + speaker = None # Speaker can't be parsed from dataset. + if "target_label" in sample: + toks = sample["target_label"] + else: + toks = sample["target"] + toks = toks[batch_id, :] + + # Processes hypothesis. + hyp_pieces = self.tgt_dict.string(hypo["tokens"].int().cpu()) + if "words" in hypo: + hyp_words = " ".join(hypo["words"]) + else: + hyp_words = post_process(hyp_pieces, self.cfg.common_eval.post_process) + + # Processes target. + target_tokens = utils.strip_pad(toks, self.tgt_dict.pad()) + tgt_pieces = self.tgt_dict.string(target_tokens.int().cpu()) + tgt_words = post_process(tgt_pieces, self.cfg.common_eval.post_process) + + if self.cfg.decoding.results_path is not None: + print(f"{hyp_pieces} ({speaker}-{sid})", file=self.hypo_units_file) + print(f"{hyp_words} ({speaker}-{sid})", file=self.hypo_words_file) + print(f"{tgt_pieces} ({speaker}-{sid})", file=self.ref_units_file) + print(f"{tgt_words} ({speaker}-{sid})", file=self.ref_words_file) + print(f"{hypo['score'].item()} ({speaker}-{sid})", file=self.score_file) + + if not self.cfg.common_eval.quiet: + logger.info(f"HYPO: {hyp_words}") + logger.info(f"REF: {tgt_words}") + logger.info("---------------------") + + hyp_words, tgt_words = hyp_words.split(), tgt_words.split() + + return editdistance.eval(hyp_words, tgt_words), len(tgt_words) + + def process_sample(self, sample: Dict[str, Any]) -> None: + self.gen_timer.start() + hypos = self.task.inference_step( + generator=self.generator, + models=self.models, + sample=sample, + ) + num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) + self.gen_timer.stop(num_generated_tokens) + self.wps_meter.update(num_generated_tokens) + + for batch_id, sample_id in enumerate(sample["id"].tolist()): + errs, length = self.process_sentence( + sample=sample, + sid=sample_id, + batch_id=batch_id, + hypo=hypos[batch_id][0], + ) + self.total_errors += errs + self.total_length += length + + self.log({"wps": round(self.wps_meter.avg)}) + if "nsentences" in sample: + self.num_sentences += sample["nsentences"] + else: + self.num_sentences += sample["id"].numel() + + def log_generation_time(self) -> None: + logger.info( + "Processed %d sentences (%d tokens) in %.1fs %.2f " + "sentences per second, %.2f tokens per second)", + self.num_sentences, + self.gen_timer.n, + self.gen_timer.sum, + self.num_sentences / (self.gen_timer.sum + 1e-6), + 1.0 / (self.gen_timer.avg + 1e-6), + ) + + +def parse_wer(wer_file: Path) -> float: + with open(wer_file, "r") as f: + return float(f.readline().strip().split(" ")[1]) + + +def get_wer_file(cfg: InferConfig) -> Path: + """Hashes the decoding parameters to a unique file ID.""" + base_path = "wer" + if cfg.decoding.results_path is not None: + base_path = os.path.join(cfg.decoding.results_path, base_path) + + if cfg.decoding.unique_wer_file: + yaml_str = OmegaConf.to_yaml(cfg.decoding) + fid = int(hashlib.md5(yaml_str.encode("utf-8")).hexdigest(), 16) + return Path(f"{base_path}.{fid % 1000000}") + else: + return Path(base_path) + + +def main(cfg: InferConfig) -> float: + """Entry point for main processing logic. + + Args: + cfg: The inferance configuration to use. + wer: Optional shared memory pointer for returning the WER. If not None, + the final WER value will be written here instead of being returned. + + Returns: + The final WER if `wer` is None, otherwise None. + """ + + yaml_str, wer_file = OmegaConf.to_yaml(cfg.decoding), get_wer_file(cfg) + + # Validates the provided configuration. + if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: + cfg.dataset.max_tokens = 4000000 + if not cfg.common.cpu and not torch.cuda.is_available(): + raise ValueError("CUDA not found; set `cpu=True` to run without CUDA") + + logger.info(cfg.common_eval.path) + + with InferenceProcessor(cfg) as processor: + for sample in processor: + processor.process_sample(sample) + + processor.log_generation_time() + + if cfg.decoding.results_path is not None: + processor.merge_shards() + + errs_t, leng_t = processor.total_errors, processor.total_length + + if cfg.common.cpu: + logger.warning("Merging WER requires CUDA.") + elif processor.data_parallel_world_size > 1: + stats = torch.LongTensor([errs_t, leng_t]).cuda() + dist.all_reduce(stats, op=dist.ReduceOp.SUM) + errs_t, leng_t = stats[0].item(), stats[1].item() + + wer = errs_t * 100.0 / leng_t + + if distributed_utils.is_master(cfg.distributed_training): + with open(wer_file, "w") as f: + f.write( + ( + f"WER: {wer}\n" + f"err / num_ref_words = {errs_t} / {leng_t}\n\n" + f"{yaml_str}" + ) + ) + + return wer + + +@hydra.main(config_path=config_path, config_name="infer") +def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]: + container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + cfg = OmegaConf.create(container) + OmegaConf.set_struct(cfg, True) + + if cfg.common.reset_logging: + reset_logging() + + utils.import_user_module(cfg.common) + + # logger.info("Config:\n%s", OmegaConf.to_yaml(cfg)) + wer = float("inf") + + try: + if cfg.common.profile: + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + distributed_utils.call_main(cfg, main) + else: + distributed_utils.call_main(cfg, main) + + wer = parse_wer(get_wer_file(cfg)) + except BaseException as e: # pylint: disable=broad-except + if not cfg.common.suppress_crashes: + raise + else: + logger.error("Crashed! %s", str(e)) + + logger.info("Word error rate: %.4f", wer) + if cfg.is_ax: + return wer, None + + return wer + + +def cli_main() -> None: + try: + from hydra._internal.utils import ( + get_args, + ) # pylint: disable=import-outside-toplevel + + cfg_name = get_args().config_name or "infer" + except ImportError: + logger.warning("Failed to get config name from hydra args") + cfg_name = "infer" + + cs = ConfigStore.instance() + cs.store(name=cfg_name, node=InferConfig) + + for k in InferConfig.__dataclass_fields__: + if is_dataclass(InferConfig.__dataclass_fields__[k].type): + v = InferConfig.__dataclass_fields__[k].default + cs.store(name=k, node=v) + + hydra_main() # pylint: disable=no-value-for-parameter + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_recognition/tasks/__init__.py b/fairseq/examples/speech_recognition/tasks/__init__.py new file mode 100644 index 0000000..7ac3b8d --- /dev/null +++ b/fairseq/examples/speech_recognition/tasks/__init__.py @@ -0,0 +1,8 @@ +import importlib +import os + + +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + task_name = file[: file.find(".py")] + importlib.import_module("examples.speech_recognition.tasks." + task_name) diff --git a/fairseq/examples/speech_recognition/tasks/speech_recognition.py b/fairseq/examples/speech_recognition/tasks/speech_recognition.py new file mode 100644 index 0000000..d9f011d --- /dev/null +++ b/fairseq/examples/speech_recognition/tasks/speech_recognition.py @@ -0,0 +1,157 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +import re +import sys + +import torch +from examples.speech_recognition.data import AsrDataset +from examples.speech_recognition.data.replabels import replabel_symbol +from fairseq.data import Dictionary +from fairseq.tasks import LegacyFairseqTask, register_task + + +def get_asr_dataset_from_json(data_json_path, tgt_dict): + """ + Parse data json and create dataset. + See scripts/asr_prep_json.py which pack json from raw files + + Json example: + { + "utts": { + "4771-29403-0025": { + "input": { + "length_ms": 170, + "path": "/tmp/file1.flac" + }, + "output": { + "text": "HELLO \n", + "token": "HE LLO", + "tokenid": "4815, 861" + } + }, + "1564-142299-0096": { + ... + } + } + """ + if not os.path.isfile(data_json_path): + raise FileNotFoundError("Dataset not found: {}".format(data_json_path)) + with open(data_json_path, "rb") as f: + data_samples = json.load(f)["utts"] + assert len(data_samples) != 0 + sorted_samples = sorted( + data_samples.items(), + key=lambda sample: int(sample[1]["input"]["length_ms"]), + reverse=True, + ) + aud_paths = [s[1]["input"]["path"] for s in sorted_samples] + ids = [s[0] for s in sorted_samples] + speakers = [] + for s in sorted_samples: + m = re.search("(.+?)-(.+?)-(.+?)", s[0]) + speakers.append(m.group(1) + "_" + m.group(2)) + frame_sizes = [s[1]["input"]["length_ms"] for s in sorted_samples] + tgt = [ + [int(i) for i in s[1]["output"]["tokenid"].split(", ")] + for s in sorted_samples + ] + # append eos + tgt = [[*t, tgt_dict.eos()] for t in tgt] + return AsrDataset(aud_paths, frame_sizes, tgt, tgt_dict, ids, speakers) + + +@register_task("speech_recognition") +class SpeechRecognitionTask(LegacyFairseqTask): + """ + Task for training speech recognition model. + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument("data", help="path to data directory") + parser.add_argument( + "--silence-token", default="\u2581", help="token for silence (used by w2l)" + ) + parser.add_argument( + "--max-source-positions", + default=sys.maxsize, + type=int, + metavar="N", + help="max number of frames in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + + def __init__(self, args, tgt_dict): + super().__init__(args) + self.tgt_dict = tgt_dict + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries).""" + dict_path = os.path.join(args.data, "dict.txt") + if not os.path.isfile(dict_path): + raise FileNotFoundError("Dict not found: {}".format(dict_path)) + tgt_dict = Dictionary.load(dict_path) + + if args.criterion == "ctc_loss": + tgt_dict.add_symbol("<ctc_blank>") + elif args.criterion == "asg_loss": + for i in range(1, args.max_replabel + 1): + tgt_dict.add_symbol(replabel_symbol(i)) + + print("| dictionary: {} types".format(len(tgt_dict))) + return cls(args, tgt_dict) + + def load_dataset(self, split, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + data_json_path = os.path.join(self.args.data, "{}.json".format(split)) + self.datasets[split] = get_asr_dataset_from_json(data_json_path, self.tgt_dict) + + def build_generator(self, models, args, **unused): + w2l_decoder = getattr(args, "w2l_decoder", None) + if w2l_decoder == "viterbi": + from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder + + return W2lViterbiDecoder(args, self.target_dictionary) + elif w2l_decoder == "kenlm": + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + return W2lKenLMDecoder(args, self.target_dictionary) + elif w2l_decoder == "fairseqlm": + from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder + + return W2lFairseqLMDecoder(args, self.target_dictionary) + else: + return super().build_generator(models, args) + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.tgt_dict + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary` (if applicable + for this task).""" + return None + + def max_positions(self): + """Return the max speech and sentence length allowed by the task.""" + return (self.args.max_source_positions, self.args.max_target_positions) diff --git a/fairseq/examples/speech_recognition/utils/wer_utils.py b/fairseq/examples/speech_recognition/utils/wer_utils.py new file mode 100644 index 0000000..cf6f3d0 --- /dev/null +++ b/fairseq/examples/speech_recognition/utils/wer_utils.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import re +from collections import deque +from enum import Enum + +import numpy as np + + +""" + Utility modules for computation of Word Error Rate, + Alignments, as well as more granular metrics like + deletion, insersion and substitutions. +""" + + +class Code(Enum): + match = 1 + substitution = 2 + insertion = 3 + deletion = 4 + + +class Token(object): + def __init__(self, lbl="", st=np.nan, en=np.nan): + if np.isnan(st): + self.label, self.start, self.end = "", 0.0, 0.0 + else: + self.label, self.start, self.end = lbl, st, en + + +class AlignmentResult(object): + def __init__(self, refs, hyps, codes, score): + self.refs = refs # std::deque<int> + self.hyps = hyps # std::deque<int> + self.codes = codes # std::deque<Code> + self.score = score # float + + +def coordinate_to_offset(row, col, ncols): + return int(row * ncols + col) + + +def offset_to_row(offset, ncols): + return int(offset / ncols) + + +def offset_to_col(offset, ncols): + return int(offset % ncols) + + +def trimWhitespace(str): + return re.sub(" +", " ", re.sub(" *$", "", re.sub("^ *", "", str))) + + +def str2toks(str): + pieces = trimWhitespace(str).split(" ") + toks = [] + for p in pieces: + toks.append(Token(p, 0.0, 0.0)) + return toks + + +class EditDistance(object): + def __init__(self, time_mediated): + self.time_mediated_ = time_mediated + self.scores_ = np.nan # Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic> + self.backtraces_ = ( + np.nan + ) # Eigen::Matrix<size_t, Eigen::Dynamic, Eigen::Dynamic> backtraces_; + self.confusion_pairs_ = {} + + def cost(self, ref, hyp, code): + if self.time_mediated_: + if code == Code.match: + return abs(ref.start - hyp.start) + abs(ref.end - hyp.end) + elif code == Code.insertion: + return hyp.end - hyp.start + elif code == Code.deletion: + return ref.end - ref.start + else: # substitution + return abs(ref.start - hyp.start) + abs(ref.end - hyp.end) + 0.1 + else: + if code == Code.match: + return 0 + elif code == Code.insertion or code == Code.deletion: + return 3 + else: # substitution + return 4 + + def get_result(self, refs, hyps): + res = AlignmentResult(refs=deque(), hyps=deque(), codes=deque(), score=np.nan) + + num_rows, num_cols = self.scores_.shape + res.score = self.scores_[num_rows - 1, num_cols - 1] + + curr_offset = coordinate_to_offset(num_rows - 1, num_cols - 1, num_cols) + + while curr_offset != 0: + curr_row = offset_to_row(curr_offset, num_cols) + curr_col = offset_to_col(curr_offset, num_cols) + + prev_offset = self.backtraces_[curr_row, curr_col] + + prev_row = offset_to_row(prev_offset, num_cols) + prev_col = offset_to_col(prev_offset, num_cols) + + res.refs.appendleft(curr_row - 1) # Note: this was .push_front() in C++ + res.hyps.appendleft(curr_col - 1) + if curr_row - 1 == prev_row and curr_col == prev_col: + res.codes.appendleft(Code.deletion) + elif curr_row == prev_row and curr_col - 1 == prev_col: + res.codes.appendleft(Code.insertion) + else: + # assert(curr_row - 1 == prev_row and curr_col - 1 == prev_col) + ref_str = refs[res.refs[0]].label + hyp_str = hyps[res.hyps[0]].label + + if ref_str == hyp_str: + res.codes.appendleft(Code.match) + else: + res.codes.appendleft(Code.substitution) + + confusion_pair = "%s -> %s" % (ref_str, hyp_str) + if confusion_pair not in self.confusion_pairs_: + self.confusion_pairs_[confusion_pair] = 1 + else: + self.confusion_pairs_[confusion_pair] += 1 + + curr_offset = prev_offset + + return res + + def align(self, refs, hyps): + if len(refs) == 0 and len(hyps) == 0: + return np.nan + + # NOTE: we're not resetting the values in these matrices because every value + # will be overridden in the loop below. If this assumption doesn't hold, + # be sure to set all entries in self.scores_ and self.backtraces_ to 0. + self.scores_ = np.zeros((len(refs) + 1, len(hyps) + 1)) + self.backtraces_ = np.zeros((len(refs) + 1, len(hyps) + 1)) + + num_rows, num_cols = self.scores_.shape + + for i in range(num_rows): + for j in range(num_cols): + if i == 0 and j == 0: + self.scores_[i, j] = 0.0 + self.backtraces_[i, j] = 0 + continue + + if i == 0: + self.scores_[i, j] = self.scores_[i, j - 1] + self.cost( + None, hyps[j - 1], Code.insertion + ) + self.backtraces_[i, j] = coordinate_to_offset(i, j - 1, num_cols) + continue + + if j == 0: + self.scores_[i, j] = self.scores_[i - 1, j] + self.cost( + refs[i - 1], None, Code.deletion + ) + self.backtraces_[i, j] = coordinate_to_offset(i - 1, j, num_cols) + continue + + # Below here both i and j are greater than 0 + ref = refs[i - 1] + hyp = hyps[j - 1] + best_score = self.scores_[i - 1, j - 1] + ( + self.cost(ref, hyp, Code.match) + if (ref.label == hyp.label) + else self.cost(ref, hyp, Code.substitution) + ) + + prev_row = i - 1 + prev_col = j - 1 + ins = self.scores_[i, j - 1] + self.cost(None, hyp, Code.insertion) + if ins < best_score: + best_score = ins + prev_row = i + prev_col = j - 1 + + delt = self.scores_[i - 1, j] + self.cost(ref, None, Code.deletion) + if delt < best_score: + best_score = delt + prev_row = i - 1 + prev_col = j + + self.scores_[i, j] = best_score + self.backtraces_[i, j] = coordinate_to_offset( + prev_row, prev_col, num_cols + ) + + return self.get_result(refs, hyps) + + +class WERTransformer(object): + def __init__(self, hyp_str, ref_str, verbose=True): + self.ed_ = EditDistance(False) + self.id2oracle_errs_ = {} + self.utts_ = 0 + self.words_ = 0 + self.insertions_ = 0 + self.deletions_ = 0 + self.substitutions_ = 0 + + self.process(["dummy_str", hyp_str, ref_str]) + + if verbose: + print("'%s' vs '%s'" % (hyp_str, ref_str)) + self.report_result() + + def process(self, input): # std::vector<std::string>&& input + if len(input) < 3: + print( + "Input must be of the form <id> ... <hypo> <ref> , got ", + len(input), + " inputs:", + ) + return None + + # Align + # std::vector<Token> hyps; + # std::vector<Token> refs; + + hyps = str2toks(input[-2]) + refs = str2toks(input[-1]) + + alignment = self.ed_.align(refs, hyps) + if alignment is None: + print("Alignment is null") + return np.nan + + # Tally errors + ins = 0 + dels = 0 + subs = 0 + for code in alignment.codes: + if code == Code.substitution: + subs += 1 + elif code == Code.insertion: + ins += 1 + elif code == Code.deletion: + dels += 1 + + # Output + row = input + row.append(str(len(refs))) + row.append(str(ins)) + row.append(str(dels)) + row.append(str(subs)) + # print(row) + + # Accumulate + kIdIndex = 0 + kNBestSep = "/" + + pieces = input[kIdIndex].split(kNBestSep) + + if len(pieces) == 0: + print( + "Error splitting ", + input[kIdIndex], + " on '", + kNBestSep, + "', got empty list", + ) + return np.nan + + id = pieces[0] + if id not in self.id2oracle_errs_: + self.utts_ += 1 + self.words_ += len(refs) + self.insertions_ += ins + self.deletions_ += dels + self.substitutions_ += subs + self.id2oracle_errs_[id] = [ins, dels, subs] + else: + curr_err = ins + dels + subs + prev_err = np.sum(self.id2oracle_errs_[id]) + if curr_err < prev_err: + self.id2oracle_errs_[id] = [ins, dels, subs] + + return 0 + + def report_result(self): + # print("---------- Summary ---------------") + if self.words_ == 0: + print("No words counted") + return + + # 1-best + best_wer = ( + 100.0 + * (self.insertions_ + self.deletions_ + self.substitutions_) + / self.words_ + ) + + print( + "\tWER = %0.2f%% (%i utts, %i words, %0.2f%% ins, " + "%0.2f%% dels, %0.2f%% subs)" + % ( + best_wer, + self.utts_, + self.words_, + 100.0 * self.insertions_ / self.words_, + 100.0 * self.deletions_ / self.words_, + 100.0 * self.substitutions_ / self.words_, + ) + ) + + def wer(self): + if self.words_ == 0: + wer = np.nan + else: + wer = ( + 100.0 + * (self.insertions_ + self.deletions_ + self.substitutions_) + / self.words_ + ) + return wer + + def stats(self): + if self.words_ == 0: + stats = {} + else: + wer = ( + 100.0 + * (self.insertions_ + self.deletions_ + self.substitutions_) + / self.words_ + ) + stats = dict( + { + "wer": wer, + "utts": self.utts_, + "numwords": self.words_, + "ins": self.insertions_, + "dels": self.deletions_, + "subs": self.substitutions_, + "confusion_pairs": self.ed_.confusion_pairs_, + } + ) + return stats + + +def calc_wer(hyp_str, ref_str): + t = WERTransformer(hyp_str, ref_str, verbose=0) + return t.wer() + + +def calc_wer_stats(hyp_str, ref_str): + t = WERTransformer(hyp_str, ref_str, verbose=0) + return t.stats() + + +def get_wer_alignment_codes(hyp_str, ref_str): + """ + INPUT: hypothesis string, reference string + OUTPUT: List of alignment codes (intermediate results from WER computation) + """ + t = WERTransformer(hyp_str, ref_str, verbose=0) + return t.ed_.align(str2toks(ref_str), str2toks(hyp_str)).codes + + +def merge_counts(x, y): + # Merge two hashes which have 'counts' as their values + # This can be used for example to merge confusion pair counts + # conf_pairs = merge_counts(conf_pairs, stats['confusion_pairs']) + for k, v in y.items(): + if k not in x: + x[k] = 0 + x[k] += v + return x diff --git a/fairseq/examples/speech_recognition/w2l_decoder.py b/fairseq/examples/speech_recognition/w2l_decoder.py new file mode 100644 index 0000000..fbf2d35 --- /dev/null +++ b/fairseq/examples/speech_recognition/w2l_decoder.py @@ -0,0 +1,486 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Flashlight decoders. +""" + +import gc +import itertools as it +import os.path as osp +from typing import List +import warnings +from collections import deque, namedtuple + +import numpy as np +import torch +from examples.speech_recognition.data.replabels import unpack_replabels +from fairseq import tasks +from fairseq.utils import apply_to_sample +from omegaconf import open_dict +from fairseq.dataclass.utils import convert_namespace_to_omegaconf + + +try: + from flashlight.lib.text.dictionary import create_word_dict, load_words + from flashlight.lib.sequence.criterion import CpuViterbiPath, get_data_ptr_as_bytes + from flashlight.lib.text.decoder import ( + CriterionType, + LexiconDecoderOptions, + KenLM, + LM, + LMState, + SmearingMode, + Trie, + LexiconDecoder, + ) +except: + warnings.warn( + "flashlight python bindings are required to use this functionality. Please install from https://github.com/facebookresearch/flashlight/tree/master/bindings/python" + ) + LM = object + LMState = object + + +class W2lDecoder(object): + def __init__(self, args, tgt_dict): + self.tgt_dict = tgt_dict + self.vocab_size = len(tgt_dict) + self.nbest = args.nbest + + # criterion-specific init + self.criterion_type = CriterionType.CTC + self.blank = ( + tgt_dict.index("<ctc_blank>") + if "<ctc_blank>" in tgt_dict.indices + else tgt_dict.bos() + ) + if "<sep>" in tgt_dict.indices: + self.silence = tgt_dict.index("<sep>") + elif "|" in tgt_dict.indices: + self.silence = tgt_dict.index("|") + else: + self.silence = tgt_dict.eos() + self.asg_transitions = None + + def generate(self, models, sample, **unused): + """Generate a batch of inferences.""" + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens" + } + emissions = self.get_emissions(models, encoder_input) + return self.decode(emissions) + + def get_emissions(self, models, encoder_input): + """Run encoder and normalize emissions""" + model = models[0] + encoder_out = model(**encoder_input) + if hasattr(model, "get_logits"): + emissions = model.get_logits(encoder_out) # no need to normalize emissions + else: + emissions = model.get_normalized_probs(encoder_out, log_probs=True) + return emissions.transpose(0, 1).float().cpu().contiguous() + + def get_tokens(self, idxs): + """Normalize tokens by handling CTC blank, ASG replabels, etc.""" + idxs = (g[0] for g in it.groupby(idxs)) + idxs = filter(lambda x: x != self.blank, idxs) + return torch.LongTensor(list(idxs)) + + +class W2lViterbiDecoder(W2lDecoder): + def __init__(self, args, tgt_dict): + super().__init__(args, tgt_dict) + + def decode(self, emissions): + B, T, N = emissions.size() + hypos = [] + if self.asg_transitions is None: + transitions = torch.FloatTensor(N, N).zero_() + else: + transitions = torch.FloatTensor(self.asg_transitions).view(N, N) + viterbi_path = torch.IntTensor(B, T) + workspace = torch.ByteTensor(CpuViterbiPath.get_workspace_size(B, T, N)) + CpuViterbiPath.compute( + B, + T, + N, + get_data_ptr_as_bytes(emissions), + get_data_ptr_as_bytes(transitions), + get_data_ptr_as_bytes(viterbi_path), + get_data_ptr_as_bytes(workspace), + ) + return [ + [{"tokens": self.get_tokens(viterbi_path[b].tolist()), "score": 0}] + for b in range(B) + ] + + +class W2lKenLMDecoder(W2lDecoder): + def __init__(self, args, tgt_dict): + super().__init__(args, tgt_dict) + + self.unit_lm = getattr(args, "unit_lm", False) + + if args.lexicon: + self.lexicon = load_words(args.lexicon) + self.word_dict = create_word_dict(self.lexicon) + self.unk_word = self.word_dict.get_index("<unk>") + + self.lm = KenLM(args.kenlm_model, self.word_dict) + self.trie = Trie(self.vocab_size, self.silence) + + start_state = self.lm.start(False) + for i, (word, spellings) in enumerate(self.lexicon.items()): + word_idx = self.word_dict.get_index(word) + _, score = self.lm.score(start_state, word_idx) + for spelling in spellings: + spelling_idxs = [tgt_dict.index(token) for token in spelling] + assert ( + tgt_dict.unk() not in spelling_idxs + ), f"{spelling} {spelling_idxs}" + self.trie.insert(spelling_idxs, word_idx, score) + self.trie.smear(SmearingMode.MAX) + + self.decoder_opts = LexiconDecoderOptions( + beam_size=args.beam, + beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))), + beam_threshold=args.beam_threshold, + lm_weight=args.lm_weight, + word_score=args.word_score, + unk_score=args.unk_weight, + sil_score=args.sil_weight, + log_add=False, + criterion_type=self.criterion_type, + ) + + if self.asg_transitions is None: + N = 768 + # self.asg_transitions = torch.FloatTensor(N, N).zero_() + self.asg_transitions = [] + + self.decoder = LexiconDecoder( + self.decoder_opts, + self.trie, + self.lm, + self.silence, + self.blank, + self.unk_word, + self.asg_transitions, + self.unit_lm, + ) + else: + assert args.unit_lm, "lexicon free decoding can only be done with a unit language model" + from flashlight.lib.text.decoder import LexiconFreeDecoder, LexiconFreeDecoderOptions + + d = {w: [[w]] for w in tgt_dict.symbols} + self.word_dict = create_word_dict(d) + self.lm = KenLM(args.kenlm_model, self.word_dict) + self.decoder_opts = LexiconFreeDecoderOptions( + beam_size=args.beam, + beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))), + beam_threshold=args.beam_threshold, + lm_weight=args.lm_weight, + sil_score=args.sil_weight, + log_add=False, + criterion_type=self.criterion_type, + ) + self.decoder = LexiconFreeDecoder( + self.decoder_opts, self.lm, self.silence, self.blank, [] + ) + + def get_timesteps(self, token_idxs: List[int]) -> List[int]: + """Returns frame numbers corresponding to every non-blank token. + + Parameters + ---------- + token_idxs : List[int] + IDs of decoded tokens. + + Returns + ------- + List[int] + Frame numbers corresponding to every non-blank token. + """ + timesteps = [] + for i, token_idx in enumerate(token_idxs): + if token_idx == self.blank: + continue + if i == 0 or token_idx != token_idxs[i-1]: + timesteps.append(i) + return timesteps + + def decode(self, emissions): + B, T, N = emissions.size() + hypos = [] + for b in range(B): + emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0) + results = self.decoder.decode(emissions_ptr, T, N) + + nbest_results = results[: self.nbest] + hypos.append( + [ + { + "tokens": self.get_tokens(result.tokens), + "score": result.score, + "timesteps": self.get_timesteps(result.tokens), + "words": [ + self.word_dict.get_entry(x) for x in result.words if x >= 0 + ], + } + for result in nbest_results + ] + ) + return hypos + + +FairseqLMState = namedtuple("FairseqLMState", ["prefix", "incremental_state", "probs"]) + + +class FairseqLM(LM): + def __init__(self, dictionary, model): + LM.__init__(self) + self.dictionary = dictionary + self.model = model + self.unk = self.dictionary.unk() + + self.save_incremental = False # this currently does not work properly + self.max_cache = 20_000 + + model.cuda() + model.eval() + model.make_generation_fast_() + + self.states = {} + self.stateq = deque() + + def start(self, start_with_nothing): + state = LMState() + prefix = torch.LongTensor([[self.dictionary.eos()]]) + incremental_state = {} if self.save_incremental else None + with torch.no_grad(): + res = self.model(prefix.cuda(), incremental_state=incremental_state) + probs = self.model.get_normalized_probs(res, log_probs=True, sample=None) + + if incremental_state is not None: + incremental_state = apply_to_sample(lambda x: x.cpu(), incremental_state) + self.states[state] = FairseqLMState( + prefix.numpy(), incremental_state, probs[0, -1].cpu().numpy() + ) + self.stateq.append(state) + + return state + + def score(self, state: LMState, token_index: int, no_cache: bool = False): + """ + Evaluate language model based on the current lm state and new word + Parameters: + ----------- + state: current lm state + token_index: index of the word + (can be lexicon index then you should store inside LM the + mapping between indices of lexicon and lm, or lm index of a word) + + Returns: + -------- + (LMState, float): pair of (new state, score for the current word) + """ + curr_state = self.states[state] + + def trim_cache(targ_size): + while len(self.stateq) > targ_size: + rem_k = self.stateq.popleft() + rem_st = self.states[rem_k] + rem_st = FairseqLMState(rem_st.prefix, None, None) + self.states[rem_k] = rem_st + + if curr_state.probs is None: + new_incremental_state = ( + curr_state.incremental_state.copy() + if curr_state.incremental_state is not None + else None + ) + with torch.no_grad(): + if new_incremental_state is not None: + new_incremental_state = apply_to_sample( + lambda x: x.cuda(), new_incremental_state + ) + elif self.save_incremental: + new_incremental_state = {} + + res = self.model( + torch.from_numpy(curr_state.prefix).cuda(), + incremental_state=new_incremental_state, + ) + probs = self.model.get_normalized_probs( + res, log_probs=True, sample=None + ) + + if new_incremental_state is not None: + new_incremental_state = apply_to_sample( + lambda x: x.cpu(), new_incremental_state + ) + + curr_state = FairseqLMState( + curr_state.prefix, new_incremental_state, probs[0, -1].cpu().numpy() + ) + + if not no_cache: + self.states[state] = curr_state + self.stateq.append(state) + + score = curr_state.probs[token_index].item() + + trim_cache(self.max_cache) + + outstate = state.child(token_index) + if outstate not in self.states and not no_cache: + prefix = np.concatenate( + [curr_state.prefix, torch.LongTensor([[token_index]])], -1 + ) + incr_state = curr_state.incremental_state + + self.states[outstate] = FairseqLMState(prefix, incr_state, None) + + if token_index == self.unk: + score = float("-inf") + + return outstate, score + + def finish(self, state: LMState): + """ + Evaluate eos for language model based on the current lm state + + Returns: + -------- + (LMState, float): pair of (new state, score for the current word) + """ + return self.score(state, self.dictionary.eos()) + + def empty_cache(self): + self.states = {} + self.stateq = deque() + gc.collect() + + +class W2lFairseqLMDecoder(W2lDecoder): + def __init__(self, args, tgt_dict): + super().__init__(args, tgt_dict) + + self.unit_lm = getattr(args, "unit_lm", False) + + self.lexicon = load_words(args.lexicon) if args.lexicon else None + self.idx_to_wrd = {} + + checkpoint = torch.load(args.kenlm_model, map_location="cpu") + + if "cfg" in checkpoint and checkpoint["cfg"] is not None: + lm_args = checkpoint["cfg"] + else: + lm_args = convert_namespace_to_omegaconf(checkpoint["args"]) + + with open_dict(lm_args.task): + lm_args.task.data = osp.dirname(args.kenlm_model) + + task = tasks.setup_task(lm_args.task) + model = task.build_model(lm_args.model) + model.load_state_dict(checkpoint["model"], strict=False) + + self.trie = Trie(self.vocab_size, self.silence) + + self.word_dict = task.dictionary + self.unk_word = self.word_dict.unk() + self.lm = FairseqLM(self.word_dict, model) + + if self.lexicon: + start_state = self.lm.start(False) + for i, (word, spellings) in enumerate(self.lexicon.items()): + if self.unit_lm: + word_idx = i + self.idx_to_wrd[i] = word + score = 0 + else: + word_idx = self.word_dict.index(word) + _, score = self.lm.score(start_state, word_idx, no_cache=True) + + for spelling in spellings: + spelling_idxs = [tgt_dict.index(token) for token in spelling] + assert ( + tgt_dict.unk() not in spelling_idxs + ), f"{spelling} {spelling_idxs}" + self.trie.insert(spelling_idxs, word_idx, score) + self.trie.smear(SmearingMode.MAX) + + self.decoder_opts = LexiconDecoderOptions( + beam_size=args.beam, + beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))), + beam_threshold=args.beam_threshold, + lm_weight=args.lm_weight, + word_score=args.word_score, + unk_score=args.unk_weight, + sil_score=args.sil_weight, + log_add=False, + criterion_type=self.criterion_type, + ) + + self.decoder = LexiconDecoder( + self.decoder_opts, + self.trie, + self.lm, + self.silence, + self.blank, + self.unk_word, + [], + self.unit_lm, + ) + else: + assert args.unit_lm, "lexicon free decoding can only be done with a unit language model" + from flashlight.lib.text.decoder import LexiconFreeDecoder, LexiconFreeDecoderOptions + + d = {w: [[w]] for w in tgt_dict.symbols} + self.word_dict = create_word_dict(d) + self.lm = KenLM(args.kenlm_model, self.word_dict) + self.decoder_opts = LexiconFreeDecoderOptions( + beam_size=args.beam, + beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))), + beam_threshold=args.beam_threshold, + lm_weight=args.lm_weight, + sil_score=args.sil_weight, + log_add=False, + criterion_type=self.criterion_type, + ) + self.decoder = LexiconFreeDecoder( + self.decoder_opts, self.lm, self.silence, self.blank, [] + ) + + def decode(self, emissions): + B, T, N = emissions.size() + hypos = [] + + def idx_to_word(idx): + if self.unit_lm: + return self.idx_to_wrd[idx] + else: + return self.word_dict[idx] + + def make_hypo(result): + hypo = {"tokens": self.get_tokens(result.tokens), "score": result.score} + if self.lexicon: + hypo["words"] = [idx_to_word(x) for x in result.words if x >= 0] + return hypo + + for b in range(B): + emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0) + results = self.decoder.decode(emissions_ptr, T, N) + + nbest_results = results[: self.nbest] + hypos.append([make_hypo(result) for result in nbest_results]) + self.lm.empty_cache() + + return hypos diff --git a/fairseq/examples/speech_synthesis/README.md b/fairseq/examples/speech_synthesis/README.md new file mode 100644 index 0000000..a31e7f6 --- /dev/null +++ b/fairseq/examples/speech_synthesis/README.md @@ -0,0 +1,38 @@ +Speech Synthesis (S^2) +=== +[https://arxiv.org/abs/2109.06912](https://arxiv.org/abs/2109.06912) + +Speech synthesis with fairseq. + +## Features + +- Autoregressive and non-autoregressive models +- Multi-speaker synthesis +- Audio preprocessing (denoising, VAD, etc.) for less curated data +- Automatic metrics for model development +- Similar data configuration as [S2T](../speech_to_text/README.md) + + +## Examples +- [Single-speaker synthesis on LJSpeech](docs/ljspeech_example.md) +- [Multi-speaker synthesis on VCTK](docs/vctk_example.md) +- [Multi-speaker synthesis on Common Voice](docs/common_voice_example.md) + + +## Citation +Please cite as: +``` +@article{wang2021fairseqs2, + title={fairseq S\^{} 2: A Scalable and Integrable Speech Synthesis Toolkit}, + author={Wang, Changhan and Hsu, Wei-Ning and Adi, Yossi and Polyak, Adam and Lee, Ann and Chen, Peng-Jen and Gu, Jiatao and Pino, Juan}, + journal={arXiv preprint arXiv:2109.06912}, + year={2021} +} + +@inproceedings{ott2019fairseq, + title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, + author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, + booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations}, + year = {2019}, +} +``` diff --git a/fairseq/examples/speech_synthesis/__init__.py b/fairseq/examples/speech_synthesis/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/examples/speech_synthesis/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/speech_synthesis/data_utils.py b/fairseq/examples/speech_synthesis/data_utils.py new file mode 100644 index 0000000..3b2d079 --- /dev/null +++ b/fairseq/examples/speech_synthesis/data_utils.py @@ -0,0 +1,344 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import io +import os +from pathlib import Path +from typing import Optional, List, Dict +import zipfile +import tempfile +from dataclasses import dataclass +from itertools import groupby + +import torch +import torch.nn.functional as F +import numpy as np +from tqdm import tqdm + +from examples.speech_to_text.data_utils import load_tsv_to_dicts +from fairseq.data.audio.audio_utils import ( + TTSSpectrogram, TTSMelScale, parse_path, read_from_stored_zip, is_npy_data +) + + +def trim_or_pad_to_target_length( + data_1d_or_2d: np.ndarray, target_length: int +) -> np.ndarray: + assert len(data_1d_or_2d.shape) in {1, 2} + delta = data_1d_or_2d.shape[0] - target_length + if delta >= 0: # trim if being longer + data_1d_or_2d = data_1d_or_2d[: target_length] + else: # pad if being shorter + if len(data_1d_or_2d.shape) == 1: + data_1d_or_2d = np.concatenate( + [data_1d_or_2d, np.zeros(-delta)], axis=0 + ) + else: + data_1d_or_2d = np.concatenate( + [data_1d_or_2d, np.zeros((-delta, data_1d_or_2d.shape[1]))], + axis=0 + ) + return data_1d_or_2d + + +def extract_logmel_spectrogram( + waveform: torch.Tensor, sample_rate: int, + output_path: Optional[Path] = None, win_length: int = 1024, + hop_length: int = 256, n_fft: int = 1024, + win_fn: callable = torch.hann_window, n_mels: int = 80, + f_min: float = 0., f_max: float = 8000, eps: float = 1e-5, + overwrite: bool = False, target_length: Optional[int] = None +): + if output_path is not None and output_path.is_file() and not overwrite: + return + + spectrogram_transform = TTSSpectrogram( + n_fft=n_fft, win_length=win_length, hop_length=hop_length, + window_fn=win_fn + ) + mel_scale_transform = TTSMelScale( + n_mels=n_mels, sample_rate=sample_rate, f_min=f_min, f_max=f_max, + n_stft=n_fft // 2 + 1 + ) + spectrogram = spectrogram_transform(waveform) + mel_spec = mel_scale_transform(spectrogram) + logmel_spec = torch.clamp(mel_spec, min=eps).log() + assert len(logmel_spec.shape) == 3 and logmel_spec.shape[0] == 1 + logmel_spec = logmel_spec.squeeze().t() # D x T -> T x D + if target_length is not None: + logmel_spec = trim_or_pad_to_target_length(logmel_spec, target_length) + + if output_path is not None: + np.save(output_path.as_posix(), logmel_spec) + else: + return logmel_spec + + +def extract_pitch( + waveform: torch.Tensor, sample_rate: int, + output_path: Optional[Path] = None, hop_length: int = 256, + log_scale: bool = True, phoneme_durations: Optional[List[int]] = None +): + if output_path is not None and output_path.is_file(): + return + + try: + import pyworld + except ImportError: + raise ImportError("Please install PyWORLD: pip install pyworld") + + _waveform = waveform.squeeze(0).double().numpy() + pitch, t = pyworld.dio( + _waveform, sample_rate, frame_period=hop_length / sample_rate * 1000 + ) + pitch = pyworld.stonemask(_waveform, pitch, t, sample_rate) + + if phoneme_durations is not None: + pitch = trim_or_pad_to_target_length(pitch, sum(phoneme_durations)) + try: + from scipy.interpolate import interp1d + except ImportError: + raise ImportError("Please install SciPy: pip install scipy") + nonzero_ids = np.where(pitch != 0)[0] + if len(nonzero_ids) == 0: + print((f"{output_path} has all empty values in the pitch contour")) + return + elif len(nonzero_ids) == 1: + print((f"{output_path} has only one non-zero values in the pitch contour")) + return + else: + interp_fn = interp1d( + nonzero_ids, + pitch[nonzero_ids], + fill_value=(pitch[nonzero_ids[0]], pitch[nonzero_ids[-1]]), + bounds_error=False, + ) + pitch = interp_fn(np.arange(0, len(pitch))) + d_cumsum = np.cumsum(np.concatenate([np.array([0]), phoneme_durations])) + pitch = np.array( + [ + np.mean(pitch[d_cumsum[i-1]: d_cumsum[i]]) + for i in range(1, len(d_cumsum)) + ] + ) + assert len(pitch) == len(phoneme_durations) + + if log_scale: + pitch = np.log(pitch + 1) + + if output_path is not None: + np.save(output_path.as_posix(), pitch) + else: + return pitch + + +def extract_energy( + waveform: torch.Tensor, output_path: Optional[Path] = None, + hop_length: int = 256, n_fft: int = 1024, log_scale: bool = True, + phoneme_durations: Optional[List[int]] = None +): + if output_path is not None and output_path.is_file(): + return + + assert len(waveform.shape) == 2 and waveform.shape[0] == 1 + waveform = waveform.view(1, 1, waveform.shape[1]) + waveform = F.pad( + waveform.unsqueeze(1), [n_fft // 2, n_fft // 2, 0, 0], + mode="reflect" + ) + waveform = waveform.squeeze(1) + + fourier_basis = np.fft.fft(np.eye(n_fft)) + cutoff = int((n_fft / 2 + 1)) + fourier_basis = np.vstack( + [np.real(fourier_basis[:cutoff, :]), + np.imag(fourier_basis[:cutoff, :])] + ) + + forward_basis = torch.FloatTensor(fourier_basis[:, None, :]) + forward_transform = F.conv1d( + waveform, forward_basis, stride=hop_length, padding=0 + ) + + real_part = forward_transform[:, :cutoff, :] + imag_part = forward_transform[:, cutoff:, :] + magnitude = torch.sqrt(real_part ** 2 + imag_part ** 2) + energy = torch.norm(magnitude, dim=1).squeeze(0).numpy() + + if phoneme_durations is not None: + energy = trim_or_pad_to_target_length(energy, sum(phoneme_durations)) + d_cumsum = np.cumsum(np.concatenate([np.array([0]), phoneme_durations])) + energy = np.array( + [ + np.mean(energy[d_cumsum[i - 1]: d_cumsum[i]]) + for i in range(1, len(d_cumsum)) + ] + ) + assert len(energy) == len(phoneme_durations) + + if log_scale: + energy = np.log(energy + 1) + + if output_path is not None: + np.save(output_path.as_posix(), energy) + else: + return energy + + +def get_global_cmvn(feature_root: Path, output_path: Optional[Path] = None): + mean_x, mean_x2, n_frames = None, None, 0 + feature_paths = feature_root.glob("*.npy") + for p in tqdm(feature_paths): + with open(p, 'rb') as f: + frames = np.load(f).squeeze() + + n_frames += frames.shape[0] + + cur_mean_x = frames.sum(axis=0) + if mean_x is None: + mean_x = cur_mean_x + else: + mean_x += cur_mean_x + + cur_mean_x2 = (frames ** 2).sum(axis=0) + if mean_x2 is None: + mean_x2 = cur_mean_x2 + else: + mean_x2 += cur_mean_x2 + + mean_x /= n_frames + mean_x2 /= n_frames + var_x = mean_x2 - mean_x ** 2 + std_x = np.sqrt(np.maximum(var_x, 1e-10)) + + if output_path is not None: + with open(output_path, 'wb') as f: + np.savez(f, mean=mean_x, std=std_x) + else: + return {"mean": mean_x, "std": std_x} + + +def ipa_phonemize(text, lang="en-us", use_g2p=False): + if use_g2p: + assert lang == "en-us", "g2pE phonemizer only works for en-us" + try: + from g2p_en import G2p + g2p = G2p() + return " ".join("|" if p == " " else p for p in g2p(text)) + except ImportError: + raise ImportError( + "Please install phonemizer: pip install g2p_en" + ) + else: + try: + from phonemizer import phonemize + from phonemizer.separator import Separator + return phonemize( + text, backend='espeak', language=lang, + separator=Separator(word="| ", phone=" ") + ) + except ImportError: + raise ImportError( + "Please install phonemizer: pip install phonemizer" + ) + + +@dataclass +class ForceAlignmentInfo(object): + tokens: List[str] + frame_durations: List[int] + start_sec: Optional[float] + end_sec: Optional[float] + + +def get_mfa_alignment_by_sample_id( + textgrid_zip_path: str, sample_id: str, sample_rate: int, + hop_length: int, silence_phones: List[str] = ("sil", "sp", "spn") +) -> ForceAlignmentInfo: + try: + import tgt + except ImportError: + raise ImportError("Please install TextGridTools: pip install tgt") + + filename = f"{sample_id}.TextGrid" + out_root = Path(tempfile.gettempdir()) + tgt_path = out_root / filename + with zipfile.ZipFile(textgrid_zip_path) as f_zip: + f_zip.extract(filename, path=out_root) + textgrid = tgt.io.read_textgrid(tgt_path.as_posix()) + os.remove(tgt_path) + + phones, frame_durations = [], [] + start_sec, end_sec, end_idx = 0, 0, 0 + for t in textgrid.get_tier_by_name("phones")._objects: + s, e, p = t.start_time, t.end_time, t.text + # Trim leading silences + if len(phones) == 0: + if p in silence_phones: + continue + else: + start_sec = s + phones.append(p) + if p not in silence_phones: + end_sec = e + end_idx = len(phones) + r = sample_rate / hop_length + frame_durations.append(int(np.round(e * r) - np.round(s * r))) + # Trim tailing silences + phones = phones[:end_idx] + frame_durations = frame_durations[:end_idx] + + return ForceAlignmentInfo( + tokens=phones, frame_durations=frame_durations, start_sec=start_sec, + end_sec=end_sec + ) + + +def get_mfa_alignment( + textgrid_zip_path: str, sample_ids: List[str], sample_rate: int, + hop_length: int +) -> Dict[str, ForceAlignmentInfo]: + return { + i: get_mfa_alignment_by_sample_id( + textgrid_zip_path, i, sample_rate, hop_length + ) for i in tqdm(sample_ids) + } + + +def get_unit_alignment( + id_to_unit_tsv_path: str, sample_ids: List[str] +) -> Dict[str, ForceAlignmentInfo]: + id_to_units = { + e["id"]: e["units"] for e in load_tsv_to_dicts(id_to_unit_tsv_path) + } + id_to_units = {i: id_to_units[i].split() for i in sample_ids} + id_to_units_collapsed = { + i: [uu for uu, _ in groupby(u)] for i, u in id_to_units.items() + } + id_to_durations = { + i: [len(list(g)) for _, g in groupby(u)] for i, u in id_to_units.items() + } + + return { + i: ForceAlignmentInfo( + tokens=id_to_units_collapsed[i], frame_durations=id_to_durations[i], + start_sec=None, end_sec=None + ) + for i in sample_ids + } + + +def get_feature_value_min_max(feature_paths: List[str]): + v_min, v_max = 1e-8, -1e-8 + for p in tqdm(feature_paths): + _path, slice_ptr = parse_path(p) + assert len(slice_ptr) == 2 + byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) + assert is_npy_data(byte_data) + path_or_fp = io.BytesIO(byte_data) + features = np.load(path_or_fp).squeeze() + v_min = min(v_min, features.min().item()) + v_max = max(v_max, features.max().item()) + return v_min, v_max diff --git a/fairseq/examples/speech_synthesis/docs/common_voice_example.md b/fairseq/examples/speech_synthesis/docs/common_voice_example.md new file mode 100644 index 0000000..1c0eef6 --- /dev/null +++ b/fairseq/examples/speech_synthesis/docs/common_voice_example.md @@ -0,0 +1,67 @@ +[[Back]](..) + +# Common Voice + +[Common Voice](https://commonvoice.mozilla.org/en/datasets) is a public domain speech corpus with 11.2K hours of read +speech in 76 languages (the latest version 7.0). We provide examples for building +[Transformer](https://arxiv.org/abs/1809.08895) models on this dataset. + + +## Data preparation +[Download](https://commonvoice.mozilla.org/en/datasets) and unpack Common Voice v4 to a path `${DATA_ROOT}/${LANG_ID}`. +Create splits and generate audio manifests with +```bash +python -m examples.speech_synthesis.preprocessing.get_common_voice_audio_manifest \ + --data-root ${DATA_ROOT} \ + --lang ${LANG_ID} \ + --output-manifest-root ${AUDIO_MANIFEST_ROOT} --convert-to-wav +``` + +To denoise audio and trim leading/trailing silence using signal processing based VAD, run +```bash +for SPLIT in dev test train; do + python -m examples.speech_synthesis.preprocessing.denoise_and_vad_audio \ + --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \ + --output-dir ${PROCESSED_DATA_ROOT} \ + --denoise --vad --vad-agg-level 2 +done +``` + +which generates a new audio TSV manifest under `${PROCESSED_DATA_ROOT}` with updated path to the processed audio and +a new column for SNR. + +To do filtering by CER, follow the [Automatic Evaluation](../docs/ljspeech_example.md#automatic-evaluation) section to +run ASR model (add `--eval-target` to `get_eval_manifest` for evaluation on the reference audio; add `--err-unit char` +to `eval_asr` to compute CER instead of WER). The example-level CER is saved to +`${EVAL_OUTPUT_ROOT}/uer_cer.${SPLIT}.tsv`. + +Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with +```bash +python -m examples.speech_synthesis.preprocessing.get_feature_manifest \ + --audio-manifest-root ${AUDIO_MANIFEST_ROOT} \ + --output-root ${FEATURE_MANIFEST_ROOT} \ + --ipa-vocab --lang ${LANG_ID} \ + --snr-threshold 15 \ + --cer-threshold 0.1 --cer-tsv-path ${EVAL_OUTPUT_ROOT}/uer_cer.${SPLIT}.tsv +``` +where we use phoneme inputs (`--ipa-vocab`) as example. For sample filtering, we set the SNR and CER threshold +to 15 and 10%, respectively. + + +## Training +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#transformer).) + + +## Inference +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#inference).) + +## Automatic Evaluation +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#automatic-evaluation).) + +## Results + +| Language | Speakers | --arch | Params | Test MCD | Model | +|---|---|---|---|---|---| +| English | 200 | tts_transformer | 54M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/cv4_en200_transformer_phn.tar) | + +[[Back]](..) diff --git a/fairseq/examples/speech_synthesis/docs/ljspeech_example.md b/fairseq/examples/speech_synthesis/docs/ljspeech_example.md new file mode 100644 index 0000000..836c30d --- /dev/null +++ b/fairseq/examples/speech_synthesis/docs/ljspeech_example.md @@ -0,0 +1,137 @@ +[[Back]](..) + +# LJSpeech + +[LJSpeech](https://keithito.com/LJ-Speech-Dataset) is a public domain TTS +corpus with around 24 hours of English speech sampled at 22.05kHz. We provide examples for building +[Transformer](https://arxiv.org/abs/1809.08895) and [FastSpeech 2](https://arxiv.org/abs/2006.04558) +models on this dataset. + + +## Data preparation + +Download data, create splits and generate audio manifests with +```bash +python -m examples.speech_synthesis.preprocessing.get_ljspeech_audio_manifest \ + --output-data-root ${AUDIO_DATA_ROOT} \ + --output-manifest-root ${AUDIO_MANIFEST_ROOT} +``` + +Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with +```bash +python -m examples.speech_synthesis.preprocessing.get_feature_manifest \ + --audio-manifest-root ${AUDIO_MANIFEST_ROOT} \ + --output-root ${FEATURE_MANIFEST_ROOT} \ + --ipa-vocab --use-g2p +``` +where we use phoneme inputs (`--ipa-vocab --use-g2p`) as example. + +FastSpeech 2 additionally requires frame durations, pitch and energy as auxiliary training targets. +Add `--add-fastspeech-targets` to include these fields in the feature manifests. We get frame durations either from +phoneme-level force-alignment or frame-level pseudo-text unit sequence. They should be pre-computed and specified via: +- `--textgrid-zip ${TEXT_GRID_ZIP_PATH}` for a ZIP file, inside which there is one + [TextGrid](https://www.fon.hum.uva.nl/praat/manual/TextGrid.html) file per sample to provide force-alignment info. +- `--id-to-units-tsv ${ID_TO_UNIT_TSV}` for a TSV file, where there are 2 columns for sample ID and + space-delimited pseudo-text unit sequence, respectively. + +For your convenience, we provide pre-computed +[force-alignment](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_mfa.zip) from +[Montreal Forced Aligner](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) and +[pseudo-text units](s3://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_hubert.tsv) from +[HuBERT](https://github.com/pytorch/fairseq/tree/main/examples/hubert). You can also generate them by yourself using +a different software or model. + + +## Training +#### Transformer +```bash +fairseq-train ${FEATURE_MANIFEST_ROOT} --save-dir ${SAVE_DIR} \ + --config-yaml config.yaml --train-subset train --valid-subset dev \ + --num-workers 4 --max-tokens 30000 --max-update 200000 \ + --task text_to_speech --criterion tacotron2 --arch tts_transformer \ + --clip-norm 5.0 --n-frames-per-step 4 --bce-pos-weight 5.0 \ + --dropout 0.1 --attention-dropout 0.1 --activation-dropout 0.1 \ + --encoder-normalize-before --decoder-normalize-before \ + --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --seed 1 --update-freq 8 --eval-inference --best-checkpoint-metric mcd_loss +``` +where `SAVE_DIR` is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to +update it accordingly when using more than 1 GPU. + +#### FastSpeech2 +```bash +fairseq-train ${FEATURE_MANIFEST_ROOT} --save-dir ${SAVE_DIR} \ + --config-yaml config.yaml --train-subset train --valid-subset dev \ + --num-workers 4 --max-sentences 6 --max-update 200000 \ + --task text_to_speech --criterion fastspeech2 --arch fastspeech2 \ + --clip-norm 5.0 --n-frames-per-step 1 \ + --dropout 0.1 --attention-dropout 0.1 \ + --optimizer adam --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --seed 1 --update-freq 8 --eval-inference --best-checkpoint-metric mcd_loss +``` + + +## Inference +Average the last 5 checkpoints, generate the test split spectrogram and waveform using the default Griffin-Lim vocoder: +```bash +SPLIT=test +CHECKPOINT_NAME=avg_last_5 +CHECKPOINT_PATH=${SAVE_DIR}/checkpoint_${CHECKPOINT_NAME}.pt +python scripts/average_checkpoints.py --inputs ${SAVE_DIR} \ + --num-epoch-checkpoints 5 \ + --output ${CHECKPOINT_PATH} + +python -m examples.speech_synthesis.generate_waveform ${FEATURE_MANIFEST_ROOT} \ + --config-yaml config.yaml --gen-subset ${SPLIT} --task text_to_speech \ + --path ${CHECKPOINT_PATH} --max-tokens 50000 --spec-bwd-max-iter 32 \ + --dump-waveforms +``` +which dumps files (waveform, feature, attention plot, etc.) to `${SAVE_DIR}/generate-${CHECKPOINT_NAME}-${SPLIT}`. To +re-synthesize target waveforms for automatic evaluation, add `--dump-target`. + +## Automatic Evaluation +To start with, generate the manifest for synthetic speech, which will be taken as inputs by evaluation scripts. +```bash +python -m examples.speech_synthesis.evaluation.get_eval_manifest \ + --generation-root ${SAVE_DIR}/generate-${CHECKPOINT_NAME}-${SPLIT} \ + --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \ + --output-path ${EVAL_OUTPUT_ROOT}/eval.tsv \ + --vocoder griffin_lim --sample-rate 22050 --audio-format flac \ + --use-resynthesized-target +``` +Speech recognition (ASR) models usually operate at lower sample rates (e.g. 16kHz). For the WER/CER metric, +you may need to resample the audios accordingly --- add `--output-sample-rate 16000` for `generate_waveform.py` and +use `--sample-rate 16000` for `get_eval_manifest.py`. + + +#### WER/CER metric +We use wav2vec 2.0 ASR model as example. [Download](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec) +the model checkpoint and dictionary, then compute WER/CER with +```bash +python -m examples.speech_synthesis.evaluation.eval_asr \ + --audio-header syn --text-header text --err-unit char --split ${SPLIT} \ + --w2v-ckpt ${WAV2VEC2_CHECKPOINT_PATH} --w2v-dict-dir ${WAV2VEC2_DICT_DIR} \ + --raw-manifest ${EVAL_OUTPUT_ROOT}/eval_16khz.tsv --asr-dir ${EVAL_OUTPUT_ROOT}/asr +``` + +#### MCD/MSD metric +```bash +python -m examples.speech_synthesis.evaluation.eval_sp \ + ${EVAL_OUTPUT_ROOT}/eval.tsv --mcd --msd +``` + +#### F0 metrics +```bash +python -m examples.speech_synthesis.evaluation.eval_f0 \ + ${EVAL_OUTPUT_ROOT}/eval.tsv --gpe --vde --ffe +``` + + +## Results + +| --arch | Params | Test MCD | Model | +|---|---|---|---| +| tts_transformer | 54M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_transformer_phn.tar) | +| fastspeech2 | 41M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_fastspeech2_phn.tar) | + +[[Back]](..) diff --git a/fairseq/examples/speech_synthesis/docs/vctk_example.md b/fairseq/examples/speech_synthesis/docs/vctk_example.md new file mode 100644 index 0000000..6808256 --- /dev/null +++ b/fairseq/examples/speech_synthesis/docs/vctk_example.md @@ -0,0 +1,61 @@ +[[Back]](..) + +# VCTK + +[VCTK](https://datashare.ed.ac.uk/handle/10283/3443) is an open English speech corpus. We provide examples +for building [Transformer](https://arxiv.org/abs/1809.08895) models on this dataset. + + +## Data preparation +Download data, create splits and generate audio manifests with +```bash +python -m examples.speech_synthesis.preprocessing.get_vctk_audio_manifest \ + --output-data-root ${AUDIO_DATA_ROOT} \ + --output-manifest-root ${AUDIO_MANIFEST_ROOT} +``` + +To denoise audio and trim leading/trailing silence using signal processing based VAD, run +```bash +for SPLIT in dev test train; do + python -m examples.speech_synthesis.preprocessing.denoise_and_vad_audio \ + --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \ + --output-dir ${PROCESSED_DATA_ROOT} \ + --denoise --vad --vad-agg-level 3 +done +``` +which generates a new audio TSV manifest under `${PROCESSED_DATA_ROOT}` with updated path to the processed audio and +a new column for SNR. + +To do filtering by CER, follow the [Automatic Evaluation](../docs/ljspeech_example.md#automatic-evaluation) section to +run ASR model (add `--eval-target` to `get_eval_manifest` for evaluation on the reference audio; add `--err-unit char` +to `eval_asr` to compute CER instead of WER). The example-level CER is saved to +`${EVAL_OUTPUT_ROOT}/uer_cer.${SPLIT}.tsv`. + +Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with +```bash +python -m examples.speech_synthesis.preprocessing.get_feature_manifest \ + --audio-manifest-root ${PROCESSED_DATA_ROOT} \ + --output-root ${FEATURE_MANIFEST_ROOT} \ + --ipa-vocab --use-g2p \ + --snr-threshold 15 \ + --cer-threshold 0.1 --cer-tsv-path ${EVAL_OUTPUT_ROOT}/uer_cer.${SPLIT}.tsv +``` +where we use phoneme inputs (`--ipa-vocab --use-g2p`) as example. For sample filtering, we set the SNR and CER threshold +to 15 and 10%, respectively. + +## Training +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#transformer).) + +## Inference +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#inference).) + +## Automatic Evaluation +(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#automatic-evaluation).) + +## Results + +| --arch | Params | Test MCD | Model | +|---|---|---|---| +| tts_transformer | 54M | 3.4 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/vctk_transformer_phn.tar) | + +[[Back]](..) diff --git a/fairseq/examples/speech_synthesis/evaluation/__init__.py b/fairseq/examples/speech_synthesis/evaluation/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/examples/speech_synthesis/evaluation/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_asr.py b/fairseq/examples/speech_synthesis/evaluation/eval_asr.py new file mode 100644 index 0000000..005a11b --- /dev/null +++ b/fairseq/examples/speech_synthesis/evaluation/eval_asr.py @@ -0,0 +1,128 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import editdistance +import re +import shutil +import soundfile as sf +import subprocess +from pathlib import Path + +from examples.speech_to_text.data_utils import load_tsv_to_dicts + + +def preprocess_text(text): + text = "|".join(re.sub(r"[^A-Z' ]", " ", text.upper()).split()) + text = " ".join(text) + return text + + +def prepare_w2v_data( + dict_dir, sample_rate, label, audio_paths, texts, split, data_dir +): + data_dir.mkdir(parents=True, exist_ok=True) + shutil.copyfile( + dict_dir / f"dict.{label}.txt", + data_dir / f"dict.{label}.txt" + ) + with open(data_dir / f"{split}.tsv", "w") as f: + f.write("/\n") + for audio_path in audio_paths: + wav, sr = sf.read(audio_path) + assert sr == sample_rate, f"{sr} != sample_rate" + nsample = len(wav) + f.write(f"{audio_path}\t{nsample}\n") + with open(data_dir / f"{split}.{label}", "w") as f: + for text in texts: + text = preprocess_text(text) + f.write(f"{text}\n") + + +def run_asr(asr_dir, split, w2v_ckpt, w2v_label, res_dir): + """ + results will be saved at + {res_dir}/{ref,hypo}.word-{w2v_ckpt.filename}-{split}.txt + """ + cmd = ["python", "-m", "examples.speech_recognition.infer"] + cmd += [str(asr_dir.resolve())] + cmd += ["--task", "audio_finetuning", "--nbest", "1", "--quiet"] + cmd += ["--w2l-decoder", "viterbi", "--criterion", "ctc"] + cmd += ["--post-process", "letter", "--max-tokens", "4000000"] + cmd += ["--path", str(w2v_ckpt.resolve()), "--labels", w2v_label] + cmd += ["--gen-subset", split, "--results-path", str(res_dir.resolve())] + + print(f"running cmd:\n{' '.join(cmd)}") + subprocess.run(cmd, check=True) + + +def compute_error_rate(hyp_wrd_path, ref_wrd_path, unit="word"): + """each line is "<text> (None-<index>)" """ + tokenize_line = { + "word": lambda x: re.sub(r" \(.*\)$", "", x.rstrip()).split(), + "char": lambda x: list(re.sub(r" \(.*\)$", "", x.rstrip())) + }.get(unit) + if tokenize_line is None: + raise ValueError(f"{unit} not supported") + + inds = [int(re.sub(r"\D*(\d*)\D*", r"\1", line)) + for line in open(hyp_wrd_path)] + hyps = [tokenize_line(line) for line in open(hyp_wrd_path)] + refs = [tokenize_line(line) for line in open(ref_wrd_path)] + assert(len(hyps) == len(refs)) + err_rates = [ + editdistance.eval(hyp, ref) / len(ref) for hyp, ref in zip(hyps, refs) + ] + ind_to_err_rates = {i: e for i, e in zip(inds, err_rates)} + return ind_to_err_rates + + +def main(args): + samples = load_tsv_to_dicts(args.raw_manifest) + ids = [ + sample[args.id_header] if args.id_header else "" for sample in samples + ] + audio_paths = [sample[args.audio_header] for sample in samples] + texts = [sample[args.text_header] for sample in samples] + + prepare_w2v_data( + args.w2v_dict_dir, + args.w2v_sample_rate, + args.w2v_label, + audio_paths, + texts, + args.split, + args.asr_dir + ) + run_asr(args.asr_dir, args.split, args.w2v_ckpt, args.w2v_label, args.asr_dir) + ind_to_err_rates = compute_error_rate( + args.asr_dir / f"hypo.word-{args.w2v_ckpt.name}-{args.split}.txt", + args.asr_dir / f"ref.word-{args.w2v_ckpt.name}-{args.split}.txt", + args.err_unit, + ) + + uer_path = args.asr_dir / f"uer_{args.err_unit}.{args.split}.tsv" + with open(uer_path, "w") as f: + f.write("id\taudio\tuer\n") + for ind, (id_, audio_path) in enumerate(zip(ids, audio_paths)): + f.write(f"{id_}\t{audio_path}\t{ind_to_err_rates[ind]:.4f}\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--raw-manifest", required=True, type=Path) + parser.add_argument("--asr-dir", required=True, type=Path) + parser.add_argument("--id-header", default="id", type=str) + parser.add_argument("--audio-header", default="audio", type=str) + parser.add_argument("--text-header", default="src_text", type=str) + parser.add_argument("--split", default="raw", type=str) + parser.add_argument("--w2v-ckpt", required=True, type=Path) + parser.add_argument("--w2v-dict-dir", required=True, type=Path) + parser.add_argument("--w2v-sample-rate", default=16000, type=int) + parser.add_argument("--w2v-label", default="ltr", type=str) + parser.add_argument("--err-unit", default="word", type=str) + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_f0.py b/fairseq/examples/speech_synthesis/evaluation/eval_f0.py new file mode 100644 index 0000000..df721d6 --- /dev/null +++ b/fairseq/examples/speech_synthesis/evaluation/eval_f0.py @@ -0,0 +1,266 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Signal processing-based evaluation using waveforms +""" +import numpy as np +import os.path as op + +import torchaudio +import tqdm +from tabulate import tabulate + +from examples.speech_synthesis.utils import ( + gross_pitch_error, voicing_decision_error, f0_frame_error +) +from examples.speech_synthesis.evaluation.eval_sp import load_eval_spec + + +def difference_function(x, n, tau_max): + """ + Compute difference function of data x. This solution is implemented directly + with Numpy fft. + + + :param x: audio data + :param n: length of data + :param tau_max: integration window size + :return: difference function + :rtype: list + """ + + x = np.array(x, np.float64) + w = x.size + tau_max = min(tau_max, w) + x_cumsum = np.concatenate((np.array([0.]), (x * x).cumsum())) + size = w + tau_max + p2 = (size // 32).bit_length() + nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32) + size_pad = min(x * 2 ** p2 for x in nice_numbers if x * 2 ** p2 >= size) + fc = np.fft.rfft(x, size_pad) + conv = np.fft.irfft(fc * fc.conjugate())[:tau_max] + return x_cumsum[w:w - tau_max:-1] + x_cumsum[w] - x_cumsum[:tau_max] - \ + 2 * conv + + +def cumulative_mean_normalized_difference_function(df, n): + """ + Compute cumulative mean normalized difference function (CMND). + + :param df: Difference function + :param n: length of data + :return: cumulative mean normalized difference function + :rtype: list + """ + + # scipy method + cmn_df = df[1:] * range(1, n) / np.cumsum(df[1:]).astype(float) + return np.insert(cmn_df, 0, 1) + + +def get_pitch(cmdf, tau_min, tau_max, harmo_th=0.1): + """ + Return fundamental period of a frame based on CMND function. + + :param cmdf: Cumulative Mean Normalized Difference function + :param tau_min: minimum period for speech + :param tau_max: maximum period for speech + :param harmo_th: harmonicity threshold to determine if it is necessary to + compute pitch frequency + :return: fundamental period if there is values under threshold, 0 otherwise + :rtype: float + """ + tau = tau_min + while tau < tau_max: + if cmdf[tau] < harmo_th: + while tau + 1 < tau_max and cmdf[tau + 1] < cmdf[tau]: + tau += 1 + return tau + tau += 1 + + return 0 # if unvoiced + + +def compute_yin(sig, sr, w_len=512, w_step=256, f0_min=100, f0_max=500, + harmo_thresh=0.1): + """ + + Compute the Yin Algorithm. Return fundamental frequency and harmonic rate. + + https://github.com/NVIDIA/mellotron adaption of + https://github.com/patriceguyot/Yin + + :param sig: Audio signal (list of float) + :param sr: sampling rate (int) + :param w_len: size of the analysis window (samples) + :param w_step: size of the lag between two consecutives windows (samples) + :param f0_min: Minimum fundamental frequency that can be detected (hertz) + :param f0_max: Maximum fundamental frequency that can be detected (hertz) + :param harmo_thresh: Threshold of detection. The yalgorithmù return the + first minimum of the CMND function below this threshold. + + :returns: + + * pitches: list of fundamental frequencies, + * harmonic_rates: list of harmonic rate values for each fundamental + frequency value (= confidence value) + * argmins: minimums of the Cumulative Mean Normalized DifferenceFunction + * times: list of time of each estimation + :rtype: tuple + """ + + tau_min = int(sr / f0_max) + tau_max = int(sr / f0_min) + + # time values for each analysis window + time_scale = range(0, len(sig) - w_len, w_step) + times = [t/float(sr) for t in time_scale] + frames = [sig[t:t + w_len] for t in time_scale] + + pitches = [0.0] * len(time_scale) + harmonic_rates = [0.0] * len(time_scale) + argmins = [0.0] * len(time_scale) + + for i, frame in enumerate(frames): + # Compute YIN + df = difference_function(frame, w_len, tau_max) + cm_df = cumulative_mean_normalized_difference_function(df, tau_max) + p = get_pitch(cm_df, tau_min, tau_max, harmo_thresh) + + # Get results + if np.argmin(cm_df) > tau_min: + argmins[i] = float(sr / np.argmin(cm_df)) + if p != 0: # A pitch was found + pitches[i] = float(sr / p) + harmonic_rates[i] = cm_df[p] + else: # No pitch, but we compute a value of the harmonic rate + harmonic_rates[i] = min(cm_df) + + return pitches, harmonic_rates, argmins, times + + +def extract_f0(samples): + f0_samples = [] + for sample in tqdm.tqdm(samples): + if not op.isfile(sample["ref"]) or not op.isfile(sample["syn"]): + f0_samples.append(None) + continue + + # assume single channel + yref, sr = torchaudio.load(sample["ref"]) + ysyn, _sr = torchaudio.load(sample["syn"]) + yref, ysyn = yref[0], ysyn[0] + assert sr == _sr, f"{sr} != {_sr}" + + yref_f0 = compute_yin(yref, sr) + ysyn_f0 = compute_yin(ysyn, sr) + + f0_samples += [ + { + "ref": yref_f0, + "syn": ysyn_f0 + } + ] + + return f0_samples + + +def eval_f0_error(samples, distortion_fn): + results = [] + for sample in tqdm.tqdm(samples): + if sample is None: + results.append(None) + continue + # assume single channel + yref_f, _, _, yref_t = sample["ref"] + ysyn_f, _, _, ysyn_t = sample["syn"] + + yref_f = np.array(yref_f) + yref_t = np.array(yref_t) + ysyn_f = np.array(ysyn_f) + ysyn_t = np.array(ysyn_t) + + distortion = distortion_fn(yref_t, yref_f, ysyn_t, ysyn_f) + results.append((distortion.item(), + len(yref_f), + len(ysyn_f) + )) + return results + + +def eval_gross_pitch_error(samples): + return eval_f0_error(samples, gross_pitch_error) + + +def eval_voicing_decision_error(samples): + return eval_f0_error(samples, voicing_decision_error) + + +def eval_f0_frame_error(samples): + return eval_f0_error(samples, f0_frame_error) + + +def print_results(results, show_bin): + results = np.array(list(filter(lambda x: x is not None, results))) + + np.set_printoptions(precision=3) + + def _print_result(results): + res = { + "nutt": len(results), + "error": results[:, 0].mean(), + "std": results[:, 0].std(), + "dur_ref": int(results[:, 1].sum()), + "dur_syn": int(results[:, 2].sum()), + } + print(tabulate([res.values()], res.keys(), floatfmt=".4f")) + + print(">>>> ALL") + _print_result(results) + + if show_bin: + edges = [0, 200, 400, 600, 800, 1000, 2000, 4000] + for i in range(1, len(edges)): + mask = np.logical_and(results[:, 1] >= edges[i-1], + results[:, 1] < edges[i]) + if not mask.any(): + continue + bin_results = results[mask] + print(f">>>> ({edges[i-1]}, {edges[i]})") + _print_result(bin_results) + + +def main(eval_f0, gpe, vde, ffe, show_bin): + samples = load_eval_spec(eval_f0) + if gpe or vde or ffe: + f0_samples = extract_f0(samples) + + if gpe: + print("===== Evaluate Gross Pitch Error =====") + results = eval_gross_pitch_error(f0_samples) + print_results(results, show_bin) + if vde: + print("===== Evaluate Voicing Decision Error =====") + results = eval_voicing_decision_error(f0_samples) + print_results(results, show_bin) + if ffe: + print("===== Evaluate F0 Frame Error =====") + results = eval_f0_frame_error(f0_samples) + print_results(results, show_bin) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("eval_f0") + parser.add_argument("--gpe", action="store_true") + parser.add_argument("--vde", action="store_true") + parser.add_argument("--ffe", action="store_true") + parser.add_argument("--show-bin", action="store_true") + args = parser.parse_args() + + main(args.eval_f0, args.gpe, args.vde, args.ffe, args.show_bin) diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_sp.py b/fairseq/examples/speech_synthesis/evaluation/eval_sp.py new file mode 100644 index 0000000..702c498 --- /dev/null +++ b/fairseq/examples/speech_synthesis/evaluation/eval_sp.py @@ -0,0 +1,131 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +""" +Signal processing-based evaluation using waveforms +""" + +import csv +import numpy as np +import os.path as op + +import torch +import tqdm +from tabulate import tabulate +import torchaudio + +from examples.speech_synthesis.utils import batch_mel_spectral_distortion +from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion + + +def load_eval_spec(path): + with open(path) as f: + reader = csv.DictReader(f, delimiter='\t') + samples = list(reader) + return samples + + +def eval_distortion(samples, distortion_fn, device="cuda"): + nmiss = 0 + results = [] + for sample in tqdm.tqdm(samples): + if not op.isfile(sample["ref"]) or not op.isfile(sample["syn"]): + nmiss += 1 + results.append(None) + continue + # assume single channel + yref, sr = torchaudio.load(sample["ref"]) + ysyn, _sr = torchaudio.load(sample["syn"]) + yref, ysyn = yref[0].to(device), ysyn[0].to(device) + assert sr == _sr, f"{sr} != {_sr}" + + distortion, extra = distortion_fn([yref], [ysyn], sr, None)[0] + _, _, _, _, _, pathmap = extra + nins = torch.sum(pathmap.sum(dim=1) - 1) # extra frames in syn + ndel = torch.sum(pathmap.sum(dim=0) - 1) # missing frames from syn + results.append( + (distortion.item(), # path distortion + pathmap.size(0), # yref num frames + pathmap.size(1), # ysyn num frames + pathmap.sum().item(), # path length + nins.item(), # insertion + ndel.item(), # deletion + ) + ) + return results + + +def eval_mel_cepstral_distortion(samples, device="cuda"): + return eval_distortion(samples, batch_mel_cepstral_distortion, device) + + +def eval_mel_spectral_distortion(samples, device="cuda"): + return eval_distortion(samples, batch_mel_spectral_distortion, device) + + +def print_results(results, show_bin): + results = np.array(list(filter(lambda x: x is not None, results))) + + np.set_printoptions(precision=3) + + def _print_result(results): + dist, dur_ref, dur_syn, dur_ali, nins, ndel = results.sum(axis=0) + res = { + "nutt": len(results), + "dist": dist, + "dur_ref": int(dur_ref), + "dur_syn": int(dur_syn), + "dur_ali": int(dur_ali), + "dist_per_ref_frm": dist/dur_ref, + "dist_per_syn_frm": dist/dur_syn, + "dist_per_ali_frm": dist/dur_ali, + "ins": nins/dur_ref, + "del": ndel/dur_ref, + } + print(tabulate( + [res.values()], + res.keys(), + floatfmt=".4f" + )) + + print(">>>> ALL") + _print_result(results) + + if show_bin: + edges = [0, 200, 400, 600, 800, 1000, 2000, 4000] + for i in range(1, len(edges)): + mask = np.logical_and(results[:, 1] >= edges[i-1], + results[:, 1] < edges[i]) + if not mask.any(): + continue + bin_results = results[mask] + print(f">>>> ({edges[i-1]}, {edges[i]})") + _print_result(bin_results) + + +def main(eval_spec, mcd, msd, show_bin): + samples = load_eval_spec(eval_spec) + device = "cpu" + if mcd: + print("===== Evaluate Mean Cepstral Distortion =====") + results = eval_mel_cepstral_distortion(samples, device) + print_results(results, show_bin) + if msd: + print("===== Evaluate Mean Spectral Distortion =====") + results = eval_mel_spectral_distortion(samples, device) + print_results(results, show_bin) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("eval_spec") + parser.add_argument("--mcd", action="store_true") + parser.add_argument("--msd", action="store_true") + parser.add_argument("--show-bin", action="store_true") + args = parser.parse_args() + + main(args.eval_spec, args.mcd, args.msd, args.show_bin) diff --git a/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py b/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py new file mode 100644 index 0000000..44b3685 --- /dev/null +++ b/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py @@ -0,0 +1,64 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import csv +from pathlib import Path + + +def main(args): + """ + `uid syn ref text` + """ + in_root = Path(args.generation_root).resolve() + ext = args.audio_format + with open(args.audio_manifest) as f, open(args.output_path, "w") as f_out: + reader = csv.DictReader( + f, delimiter="\t", quotechar=None, doublequote=False, + lineterminator="\n", quoting=csv.QUOTE_NONE + ) + header = ["id", "syn", "ref", "text", "speaker"] + f_out.write("\t".join(header) + "\n") + for row in reader: + dir_name = f"{ext}_{args.sample_rate}hz_{args.vocoder}" + id_ = row["id"] + syn = (in_root / dir_name / f"{id_}.{ext}").as_posix() + ref = row["audio"] + if args.use_resynthesized_target: + ref = (in_root / f"{dir_name}_tgt" / f"{id_}.{ext}").as_posix() + if args.eval_target: + syn = row["audio"] + sample = [id_, syn, ref, row["tgt_text"], row["speaker"]] + f_out.write("\t".join(sample) + "\n") + print(f"wrote evaluation file to {args.output_path}") + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + "--generation-root", help="output directory for generate_waveform.py" + ) + parser.add_argument( + "--audio-manifest", + help="used to determine the original utterance ID and text" + ) + parser.add_argument( + "--output-path", help="path to output evaluation spec file" + ) + parser.add_argument( + "--use-resynthesized-target", action="store_true", + help="use resynthesized reference instead of the original audio" + ) + parser.add_argument( + "--eval-target", action="store_true", + help="evaluate reference instead of model prediction" + ) + parser.add_argument("--vocoder", type=str, default="griffin_lim") + parser.add_argument("--sample-rate", type=int, default=22_050) + parser.add_argument("--audio-format", type=str, default="wav") + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/speech_synthesis/generate_waveform.py b/fairseq/examples/speech_synthesis/generate_waveform.py new file mode 100644 index 0000000..3b56190 --- /dev/null +++ b/fairseq/examples/speech_synthesis/generate_waveform.py @@ -0,0 +1,192 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import logging +import matplotlib.pyplot as plt +import numpy as np +from pathlib import Path +import soundfile as sf +import sys +import torch +import torchaudio + +from fairseq import checkpoint_utils, options, tasks, utils +from fairseq.logging import progress_bar +from fairseq.tasks.text_to_speech import plot_tts_output +from fairseq.data.audio.text_to_speech_dataset import TextToSpeechDataset + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def make_parser(): + parser = options.get_speech_generation_parser() + parser.add_argument("--dump-features", action="store_true") + parser.add_argument("--dump-waveforms", action="store_true") + parser.add_argument("--dump-attentions", action="store_true") + parser.add_argument("--dump-eos-probs", action="store_true") + parser.add_argument("--dump-plots", action="store_true") + parser.add_argument("--dump-target", action="store_true") + parser.add_argument("--output-sample-rate", default=22050, type=int) + parser.add_argument("--teacher-forcing", action="store_true") + parser.add_argument( + "--audio-format", type=str, default="wav", choices=["wav", "flac"] + ) + return parser + + +def postprocess_results( + dataset: TextToSpeechDataset, sample, hypos, resample_fn, dump_target +): + def to_np(x): + return None if x is None else x.detach().cpu().numpy() + + sample_ids = [dataset.ids[i] for i in sample["id"].tolist()] + texts = sample["src_texts"] if "src_texts" in sample else [""] * len(hypos) + attns = [to_np(hypo["attn"]) for hypo in hypos] + eos_probs = [to_np(hypo.get("eos_prob", None)) for hypo in hypos] + feat_preds = [to_np(hypo["feature"]) for hypo in hypos] + wave_preds = [to_np(resample_fn(h["waveform"])) for h in hypos] + if dump_target: + feat_targs = [to_np(hypo["targ_feature"]) for hypo in hypos] + wave_targs = [to_np(resample_fn(h["targ_waveform"])) for h in hypos] + else: + feat_targs = [None for _ in hypos] + wave_targs = [None for _ in hypos] + + return zip(sample_ids, texts, attns, eos_probs, feat_preds, wave_preds, + feat_targs, wave_targs) + + +def dump_result( + is_na_model, + args, + vocoder, + sample_id, + text, + attn, + eos_prob, + feat_pred, + wave_pred, + feat_targ, + wave_targ, +): + sample_rate = args.output_sample_rate + out_root = Path(args.results_path) + if args.dump_features: + feat_dir = out_root / "feat" + feat_dir.mkdir(exist_ok=True, parents=True) + np.save(feat_dir / f"{sample_id}.npy", feat_pred) + if args.dump_target: + feat_tgt_dir = out_root / "feat_tgt" + feat_tgt_dir.mkdir(exist_ok=True, parents=True) + np.save(feat_tgt_dir / f"{sample_id}.npy", feat_targ) + if args.dump_attentions: + attn_dir = out_root / "attn" + attn_dir.mkdir(exist_ok=True, parents=True) + np.save(attn_dir / f"{sample_id}.npy", attn.numpy()) + if args.dump_eos_probs and not is_na_model: + eos_dir = out_root / "eos" + eos_dir.mkdir(exist_ok=True, parents=True) + np.save(eos_dir / f"{sample_id}.npy", eos_prob) + + if args.dump_plots: + images = [feat_pred.T] if is_na_model else [feat_pred.T, attn] + names = ["output"] if is_na_model else ["output", "alignment"] + if feat_targ is not None: + images = [feat_targ.T] + images + names = [f"target (idx={sample_id})"] + names + if is_na_model: + plot_tts_output(images, names, attn, "alignment", suptitle=text) + else: + plot_tts_output(images, names, eos_prob, "eos prob", suptitle=text) + plot_dir = out_root / "plot" + plot_dir.mkdir(exist_ok=True, parents=True) + plt.savefig(plot_dir / f"{sample_id}.png") + plt.close() + + if args.dump_waveforms: + ext = args.audio_format + if wave_pred is not None: + wav_dir = out_root / f"{ext}_{sample_rate}hz_{vocoder}" + wav_dir.mkdir(exist_ok=True, parents=True) + sf.write(wav_dir / f"{sample_id}.{ext}", wave_pred, sample_rate) + if args.dump_target and wave_targ is not None: + wav_tgt_dir = out_root / f"{ext}_{sample_rate}hz_{vocoder}_tgt" + wav_tgt_dir.mkdir(exist_ok=True, parents=True) + sf.write(wav_tgt_dir / f"{sample_id}.{ext}", wave_targ, sample_rate) + + +def main(args): + assert(args.dump_features or args.dump_waveforms or args.dump_attentions + or args.dump_eos_probs or args.dump_plots) + if args.max_tokens is None and args.batch_size is None: + args.max_tokens = 8000 + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + task = tasks.setup_task(args) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [args.path], + task=task, + arg_overrides=ast.literal_eval(args.model_overrides), + ) + model = models[0].cuda() if use_cuda else models[0] + # use the original n_frames_per_step + task.args.n_frames_per_step = saved_cfg.task.n_frames_per_step + task.load_dataset(args.gen_subset, task_cfg=saved_cfg.task) + + data_cfg = task.data_cfg + sample_rate = data_cfg.config.get("features", {}).get("sample_rate", 22050) + resample_fn = { + False: lambda x: x, + True: lambda x: torchaudio.sox_effects.apply_effects_tensor( + x.detach().cpu().unsqueeze(0), sample_rate, + [['rate', str(args.output_sample_rate)]] + )[0].squeeze(0) + }.get(args.output_sample_rate != sample_rate) + if args.output_sample_rate != sample_rate: + logger.info(f"resampling to {args.output_sample_rate}Hz") + + generator = task.build_generator([model], args) + itr = task.get_batch_iterator( + dataset=task.dataset(args.gen_subset), + max_tokens=args.max_tokens, + max_sentences=args.batch_size, + max_positions=(sys.maxsize, sys.maxsize), + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=args.required_batch_size_multiple, + num_shards=args.num_shards, + shard_id=args.shard_id, + num_workers=args.num_workers, + data_buffer_size=args.data_buffer_size, + ).next_epoch_itr(shuffle=False) + + Path(args.results_path).mkdir(exist_ok=True, parents=True) + is_na_model = getattr(model, "NON_AUTOREGRESSIVE", False) + dataset = task.dataset(args.gen_subset) + vocoder = task.args.vocoder + with progress_bar.build_progress_bar(args, itr) as t: + for sample in t: + sample = utils.move_to_cuda(sample) if use_cuda else sample + hypos = generator.generate(model, sample, has_targ=args.dump_target) + for result in postprocess_results( + dataset, sample, hypos, resample_fn, args.dump_target + ): + dump_result(is_na_model, args, vocoder, *result) + + +def cli_main(): + parser = make_parser() + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py b/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py new file mode 100644 index 0000000..4e13b38 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py @@ -0,0 +1,204 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +import csv +import tempfile +from collections import defaultdict +from pathlib import Path + +import torchaudio +try: + import webrtcvad +except ImportError: + raise ImportError("Please install py-webrtcvad: pip install webrtcvad") +import pandas as pd +from tqdm import tqdm + +from examples.speech_synthesis.preprocessing.denoiser.pretrained import master64 +import examples.speech_synthesis.preprocessing.denoiser.utils as utils +from examples.speech_synthesis.preprocessing.vad import ( + frame_generator, vad_collector, read_wave, write_wave, FS_MS, THRESHOLD, + SCALE +) +from examples.speech_to_text.data_utils import save_df_to_tsv + + +log = logging.getLogger(__name__) + +PATHS = ["after_denoise", "after_vad"] +MIN_T = 0.05 + + +def generate_tmp_filename(extension="txt"): + return tempfile._get_default_tempdir() + "/" + \ + next(tempfile._get_candidate_names()) + "." + extension + + +def convert_sr(inpath, sr, output_path=None): + if not output_path: + output_path = generate_tmp_filename("wav") + cmd = f"sox {inpath} -r {sr} {output_path}" + os.system(cmd) + return output_path + + +def apply_vad(vad, inpath): + audio, sample_rate = read_wave(inpath) + frames = frame_generator(FS_MS, audio, sample_rate) + frames = list(frames) + segments = vad_collector(sample_rate, FS_MS, 300, vad, frames) + merge_segments = list() + timestamp_start = 0.0 + timestamp_end = 0.0 + # removing start, end, and long sequences of sils + for i, segment in enumerate(segments): + merge_segments.append(segment[0]) + if i and timestamp_start: + sil_duration = segment[1] - timestamp_end + if sil_duration > THRESHOLD: + merge_segments.append(int(THRESHOLD / SCALE) * (b'\x00')) + else: + merge_segments.append(int((sil_duration / SCALE)) * (b'\x00')) + timestamp_start = segment[1] + timestamp_end = segment[2] + segment = b''.join(merge_segments) + return segment, sample_rate + + +def write(wav, filename, sr=16_000): + # Normalize audio if it prevents clipping + wav = wav / max(wav.abs().max().item(), 1) + torchaudio.save(filename, wav.cpu(), sr, encoding="PCM_S", + bits_per_sample=16) + + +def process(args): + # making sure we are requested either denoise or vad + if not args.denoise and not args.vad: + log.error("No denoise or vad is requested.") + return + + log.info("Creating out directories...") + if args.denoise: + out_denoise = Path(args.output_dir).absolute().joinpath(PATHS[0]) + out_denoise.mkdir(parents=True, exist_ok=True) + if args.vad: + out_vad = Path(args.output_dir).absolute().joinpath(PATHS[1]) + out_vad.mkdir(parents=True, exist_ok=True) + + log.info("Loading pre-trained speech enhancement model...") + model = master64().to(args.device) + + log.info("Building the VAD model...") + vad = webrtcvad.Vad(int(args.vad_agg_level)) + + # preparing the output dict + output_dict = defaultdict(list) + + log.info(f"Parsing input manifest: {args.audio_manifest}") + with open(args.audio_manifest, "r") as f: + manifest_dict = csv.DictReader(f, delimiter="\t") + for row in tqdm(manifest_dict): + filename = str(row["audio"]) + + final_output = filename + keep_sample = True + n_frames = row["n_frames"] + snr = -1 + if args.denoise: + output_path_denoise = out_denoise.joinpath(Path(filename).name) + # convert to 16khz in case we use a differet sr + tmp_path = convert_sr(final_output, 16000) + + # loading audio file and generating the enhanced version + out, sr = torchaudio.load(tmp_path) + out = out.to(args.device) + estimate = model(out) + estimate = (1 - args.dry_wet) * estimate + args.dry_wet * out + write(estimate[0], str(output_path_denoise), sr) + + snr = utils.cal_snr(out, estimate) + snr = snr.cpu().detach().numpy()[0][0] + final_output = str(output_path_denoise) + + if args.vad: + output_path_vad = out_vad.joinpath(Path(filename).name) + sr = torchaudio.info(final_output).sample_rate + if sr in [16000, 32000, 48000]: + tmp_path = final_output + elif sr < 16000: + tmp_path = convert_sr(final_output, 16000) + elif sr < 32000: + tmp_path = convert_sr(final_output, 32000) + else: + tmp_path = convert_sr(final_output, 48000) + # apply VAD + segment, sample_rate = apply_vad(vad, tmp_path) + if len(segment) < sample_rate * MIN_T: + keep_sample = False + print(( + f"WARNING: skip {filename} because it is too short " + f"after VAD ({len(segment) / sample_rate} < {MIN_T})" + )) + else: + if sample_rate != sr: + tmp_path = generate_tmp_filename("wav") + write_wave(tmp_path, segment, sample_rate) + convert_sr(tmp_path, sr, + output_path=str(output_path_vad)) + else: + write_wave(str(output_path_vad), segment, sample_rate) + final_output = str(output_path_vad) + segment, _ = torchaudio.load(final_output) + n_frames = segment.size(1) + + if keep_sample: + output_dict["id"].append(row["id"]) + output_dict["audio"].append(final_output) + output_dict["n_frames"].append(n_frames) + output_dict["tgt_text"].append(row["tgt_text"]) + output_dict["speaker"].append(row["speaker"]) + output_dict["src_text"].append(row["src_text"]) + output_dict["snr"].append(snr) + + out_tsv_path = Path(args.output_dir) / Path(args.audio_manifest).name + log.info(f"Saving manifest to {out_tsv_path.as_posix()}") + save_df_to_tsv(pd.DataFrame.from_dict(output_dict), out_tsv_path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--audio-manifest", "-i", required=True, + type=str, help="path to the input manifest.") + parser.add_argument( + "--output-dir", "-o", required=True, type=str, + help="path to the output dir. it will contain files after denoising and" + " vad" + ) + parser.add_argument("--vad-agg-level", "-a", type=int, default=2, + help="the aggresive level of the vad [0-3].") + parser.add_argument( + "--dry-wet", "-dw", type=float, default=0.01, + help="the level of linear interpolation between noisy and enhanced " + "files." + ) + parser.add_argument( + "--device", "-d", type=str, default="cpu", + help="the device to be used for the speech enhancement model: " + "cpu | cuda." + ) + parser.add_argument("--denoise", action="store_true", + help="apply a denoising") + parser.add_argument("--vad", action="store_true", help="apply a VAD") + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py new file mode 100644 index 0000000..3f70e73 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py @@ -0,0 +1,473 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# author: adefossez + +import math +import time + +import torch as th +from torch import nn +from torch.nn import functional as F + +from .resample import downsample2, upsample2 +from .utils import capture_init + + +class BLSTM(nn.Module): + def __init__(self, dim, layers=2, bi=True): + super().__init__() + klass = nn.LSTM + self.lstm = klass( + bidirectional=bi, num_layers=layers, hidden_size=dim, input_size=dim + ) + self.linear = None + if bi: + self.linear = nn.Linear(2 * dim, dim) + + def forward(self, x, hidden=None): + x, hidden = self.lstm(x, hidden) + if self.linear: + x = self.linear(x) + return x, hidden + + +def rescale_conv(conv, reference): + std = conv.weight.std().detach() + scale = (std / reference)**0.5 + conv.weight.data /= scale + if conv.bias is not None: + conv.bias.data /= scale + + +def rescale_module(module, reference): + for sub in module.modules(): + if isinstance(sub, (nn.Conv1d, nn.ConvTranspose1d)): + rescale_conv(sub, reference) + + +class Demucs(nn.Module): + """ + Demucs speech enhancement model. + Args: + - chin (int): number of input channels. + - chout (int): number of output channels. + - hidden (int): number of initial hidden channels. + - depth (int): number of layers. + - kernel_size (int): kernel size for each layer. + - stride (int): stride for each layer. + - causal (bool): if false, uses BiLSTM instead of LSTM. + - resample (int): amount of resampling to apply to the input/output. + Can be one of 1, 2 or 4. + - growth (float): number of channels is multiplied by this for every layer. + - max_hidden (int): maximum number of channels. Can be useful to + control the size/speed of the model. + - normalize (bool): if true, normalize the input. + - glu (bool): if true uses GLU instead of ReLU in 1x1 convolutions. + - rescale (float): controls custom weight initialization. + See https://arxiv.org/abs/1911.13254. + - floor (float): stability flooring when normalizing. + + """ + @capture_init + def __init__(self, + chin=1, + chout=1, + hidden=48, + depth=5, + kernel_size=8, + stride=4, + causal=True, + resample=4, + growth=2, + max_hidden=10_000, + normalize=True, + glu=True, + rescale=0.1, + floor=1e-3): + + super().__init__() + if resample not in [1, 2, 4]: + raise ValueError("Resample should be 1, 2 or 4.") + + self.chin = chin + self.chout = chout + self.hidden = hidden + self.depth = depth + self.kernel_size = kernel_size + self.stride = stride + self.causal = causal + self.floor = floor + self.resample = resample + self.normalize = normalize + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + activation = nn.GLU(1) if glu else nn.ReLU() + ch_scale = 2 if glu else 1 + + for index in range(depth): + encode = [] + encode += [ + nn.Conv1d(chin, hidden, kernel_size, stride), + nn.ReLU(), + nn.Conv1d(hidden, hidden * ch_scale, 1), activation, + ] + self.encoder.append(nn.Sequential(*encode)) + + decode = [] + decode += [ + nn.Conv1d(hidden, ch_scale * hidden, 1), activation, + nn.ConvTranspose1d(hidden, chout, kernel_size, stride), + ] + if index > 0: + decode.append(nn.ReLU()) + self.decoder.insert(0, nn.Sequential(*decode)) + chout = hidden + chin = hidden + hidden = min(int(growth * hidden), max_hidden) + + self.lstm = BLSTM(chin, bi=not causal) + if rescale: + rescale_module(self, reference=rescale) + + def valid_length(self, length): + """ + Return the nearest valid length to use with the model so that + there is no time steps left over in a convolutions, e.g. for all + layers, size of the input - kernel_size % stride = 0. + + If the mixture has a valid length, the estimated sources + will have exactly the same length. + """ + length = math.ceil(length * self.resample) + for _ in range(self.depth): + length = math.ceil((length - self.kernel_size) / self.stride) + 1 + length = max(length, 1) + for _ in range(self.depth): + length = (length - 1) * self.stride + self.kernel_size + length = int(math.ceil(length / self.resample)) + return int(length) + + @property + def total_stride(self): + return self.stride ** self.depth // self.resample + + def forward(self, mix): + if mix.dim() == 2: + mix = mix.unsqueeze(1) + + if self.normalize: + mono = mix.mean(dim=1, keepdim=True) + std = mono.std(dim=-1, keepdim=True) + mix = mix / (self.floor + std) + else: + std = 1 + length = mix.shape[-1] + x = mix + x = F.pad(x, (0, self.valid_length(length) - length)) + if self.resample == 2: + x = upsample2(x) + elif self.resample == 4: + x = upsample2(x) + x = upsample2(x) + skips = [] + for encode in self.encoder: + x = encode(x) + skips.append(x) + x = x.permute(2, 0, 1) + x, _ = self.lstm(x) + x = x.permute(1, 2, 0) + for decode in self.decoder: + skip = skips.pop(-1) + x = x + skip[..., :x.shape[-1]] + x = decode(x) + if self.resample == 2: + x = downsample2(x) + elif self.resample == 4: + x = downsample2(x) + x = downsample2(x) + + x = x[..., :length] + return std * x + + +def fast_conv(conv, x): + """ + Faster convolution evaluation if either kernel size is 1 + or length of sequence is 1. + """ + batch, chin, length = x.shape + chout, chin, kernel = conv.weight.shape + assert batch == 1 + if kernel == 1: + x = x.view(chin, length) + out = th.addmm(conv.bias.view(-1, 1), + conv.weight.view(chout, chin), x) + elif length == kernel: + x = x.view(chin * kernel, 1) + out = th.addmm(conv.bias.view(-1, 1), + conv.weight.view(chout, chin * kernel), x) + else: + out = conv(x) + return out.view(batch, chout, -1) + + +class DemucsStreamer: + """ + Streaming implementation for Demucs. It supports being fed with any amount + of audio at a time. You will get back as much audio as possible at that + point. + + Args: + - demucs (Demucs): Demucs model. + - dry (float): amount of dry (e.g. input) signal to keep. 0 is maximum + noise removal, 1 just returns the input signal. Small values > 0 + allows to limit distortions. + - num_frames (int): number of frames to process at once. Higher values + will increase overall latency but improve the real time factor. + - resample_lookahead (int): extra lookahead used for the resampling. + - resample_buffer (int): size of the buffer of previous inputs/outputs + kept for resampling. + """ + def __init__(self, demucs, + dry=0, + num_frames=1, + resample_lookahead=64, + resample_buffer=256): + device = next(iter(demucs.parameters())).device + self.demucs = demucs + self.lstm_state = None + self.conv_state = None + self.dry = dry + self.resample_lookahead = resample_lookahead + resample_buffer = min(demucs.total_stride, resample_buffer) + self.resample_buffer = resample_buffer + self.frame_length = demucs.valid_length(1) + \ + demucs.total_stride * (num_frames - 1) + self.total_length = self.frame_length + self.resample_lookahead + self.stride = demucs.total_stride * num_frames + self.resample_in = th.zeros(demucs.chin, resample_buffer, device=device) + self.resample_out = th.zeros( + demucs.chin, resample_buffer, device=device + ) + + self.frames = 0 + self.total_time = 0 + self.variance = 0 + self.pending = th.zeros(demucs.chin, 0, device=device) + + bias = demucs.decoder[0][2].bias + weight = demucs.decoder[0][2].weight + chin, chout, kernel = weight.shape + self._bias = bias.view(-1, 1).repeat(1, kernel).view(-1, 1) + self._weight = weight.permute(1, 2, 0).contiguous() + + def reset_time_per_frame(self): + self.total_time = 0 + self.frames = 0 + + @property + def time_per_frame(self): + return self.total_time / self.frames + + def flush(self): + """ + Flush remaining audio by padding it with zero. Call this + when you have no more input and want to get back the last chunk of audio. + """ + pending_length = self.pending.shape[1] + padding = th.zeros( + self.demucs.chin, self.total_length, device=self.pending.device + ) + out = self.feed(padding) + return out[:, :pending_length] + + def feed(self, wav): + """ + Apply the model to mix using true real time evaluation. + Normalization is done online as is the resampling. + """ + begin = time.time() + demucs = self.demucs + resample_buffer = self.resample_buffer + stride = self.stride + resample = demucs.resample + + if wav.dim() != 2: + raise ValueError("input wav should be two dimensional.") + chin, _ = wav.shape + if chin != demucs.chin: + raise ValueError(f"Expected {demucs.chin} channels, got {chin}") + + self.pending = th.cat([self.pending, wav], dim=1) + outs = [] + while self.pending.shape[1] >= self.total_length: + self.frames += 1 + frame = self.pending[:, :self.total_length] + dry_signal = frame[:, :stride] + if demucs.normalize: + mono = frame.mean(0) + variance = (mono**2).mean() + self.variance = variance / self.frames + \ + (1 - 1 / self.frames) * self.variance + frame = frame / (demucs.floor + math.sqrt(self.variance)) + frame = th.cat([self.resample_in, frame], dim=-1) + self.resample_in[:] = frame[:, stride - resample_buffer:stride] + + if resample == 4: + frame = upsample2(upsample2(frame)) + elif resample == 2: + frame = upsample2(frame) + # remove pre sampling buffer + frame = frame[:, resample * resample_buffer:] + # remove extra samples after window + frame = frame[:, :resample * self.frame_length] + + out, extra = self._separate_frame(frame) + padded_out = th.cat([self.resample_out, out, extra], 1) + self.resample_out[:] = out[:, -resample_buffer:] + if resample == 4: + out = downsample2(downsample2(padded_out)) + elif resample == 2: + out = downsample2(padded_out) + else: + out = padded_out + + out = out[:, resample_buffer // resample:] + out = out[:, :stride] + + if demucs.normalize: + out *= math.sqrt(self.variance) + out = self.dry * dry_signal + (1 - self.dry) * out + outs.append(out) + self.pending = self.pending[:, stride:] + + self.total_time += time.time() - begin + if outs: + out = th.cat(outs, 1) + else: + out = th.zeros(chin, 0, device=wav.device) + return out + + def _separate_frame(self, frame): + demucs = self.demucs + skips = [] + next_state = [] + first = self.conv_state is None + stride = self.stride * demucs.resample + x = frame[None] + for idx, encode in enumerate(demucs.encoder): + stride //= demucs.stride + length = x.shape[2] + if idx == demucs.depth - 1: + # This is sligthly faster for the last conv + x = fast_conv(encode[0], x) + x = encode[1](x) + x = fast_conv(encode[2], x) + x = encode[3](x) + else: + if not first: + prev = self.conv_state.pop(0) + prev = prev[..., stride:] + tgt = (length - demucs.kernel_size) // demucs.stride + 1 + missing = tgt - prev.shape[-1] + offset = length - demucs.kernel_size - \ + demucs.stride * (missing - 1) + x = x[..., offset:] + x = encode[1](encode[0](x)) + x = fast_conv(encode[2], x) + x = encode[3](x) + if not first: + x = th.cat([prev, x], -1) + next_state.append(x) + skips.append(x) + + x = x.permute(2, 0, 1) + x, self.lstm_state = demucs.lstm(x, self.lstm_state) + x = x.permute(1, 2, 0) + # In the following, x contains only correct samples, i.e. the one + # for which each time position is covered by two window of the upper + # layer. extra contains extra samples to the right, and is used only as + # a better padding for the online resampling. + extra = None + for idx, decode in enumerate(demucs.decoder): + skip = skips.pop(-1) + x += skip[..., :x.shape[-1]] + x = fast_conv(decode[0], x) + x = decode[1](x) + + if extra is not None: + skip = skip[..., x.shape[-1]:] + extra += skip[..., :extra.shape[-1]] + extra = decode[2](decode[1](decode[0](extra))) + x = decode[2](x) + next_state.append( + x[..., -demucs.stride:] - decode[2].bias.view(-1, 1) + ) + if extra is None: + extra = x[..., -demucs.stride:] + else: + extra[..., :demucs.stride] += next_state[-1] + x = x[..., :-demucs.stride] + + if not first: + prev = self.conv_state.pop(0) + x[..., :demucs.stride] += prev + if idx != demucs.depth - 1: + x = decode[3](x) + extra = decode[3](extra) + self.conv_state = next_state + return x[0], extra[0] + + +def test(): + import argparse + parser = argparse.ArgumentParser( + "denoiser.demucs", + description="Benchmark the streaming Demucs implementation, as well as " + "checking the delta with the offline implementation.") + parser.add_argument("--depth", default=5, type=int) + parser.add_argument("--resample", default=4, type=int) + parser.add_argument("--hidden", default=48, type=int) + parser.add_argument("--sample_rate", default=16000, type=float) + parser.add_argument("--device", default="cpu") + parser.add_argument("-t", "--num_threads", type=int) + parser.add_argument("-f", "--num_frames", type=int, default=1) + args = parser.parse_args() + if args.num_threads: + th.set_num_threads(args.num_threads) + sr = args.sample_rate + sr_ms = sr / 1000 + demucs = Demucs( + depth=args.depth, hidden=args.hidden, resample=args.resample + ).to(args.device) + x = th.randn(1, int(sr * 4)).to(args.device) + out = demucs(x[None])[0] + streamer = DemucsStreamer(demucs, num_frames=args.num_frames) + out_rt = [] + frame_size = streamer.total_length + with th.no_grad(): + while x.shape[1] > 0: + out_rt.append(streamer.feed(x[:, :frame_size])) + x = x[:, frame_size:] + frame_size = streamer.demucs.total_stride + out_rt.append(streamer.flush()) + out_rt = th.cat(out_rt, 1) + model_size = sum(p.numel() for p in demucs.parameters()) * 4 / 2**20 + initial_lag = streamer.total_length / sr_ms + tpf = 1000 * streamer.time_per_frame + print(f"model size: {model_size:.1f}MB, ", end='') + print(f"delta batch/streaming: {th.norm(out - out_rt) / th.norm(out):.2%}") + print(f"initial lag: {initial_lag:.1f}ms, ", end='') + print(f"stride: {streamer.stride * args.num_frames / sr_ms:.1f}ms") + print(f"time per frame: {tpf:.1f}ms, ", end='') + rtf = (1000 * streamer.time_per_frame) / (streamer.stride / sr_ms) + print(f"RTF: {rtf:.2f}") + print(f"Total lag with computation: {initial_lag + tpf:.1f}ms") + + +if __name__ == "__main__": + test() diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py new file mode 100644 index 0000000..2fa8460 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py @@ -0,0 +1,81 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# author: adefossez + +import logging + +import torch.hub + +from .demucs import Demucs +from .utils import deserialize_model + +logger = logging.getLogger(__name__) +ROOT = "https://dl.fbaipublicfiles.com/adiyoss/denoiser/" +DNS_48_URL = ROOT + "dns48-11decc9d8e3f0998.th" +DNS_64_URL = ROOT + "dns64-a7761ff99a7d5bb6.th" +MASTER_64_URL = ROOT + "master64-8a5dfb4bb92753dd.th" + + +def _demucs(pretrained, url, **kwargs): + model = Demucs(**kwargs) + if pretrained: + state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu') + model.load_state_dict(state_dict) + return model + + +def dns48(pretrained=True): + return _demucs(pretrained, DNS_48_URL, hidden=48) + + +def dns64(pretrained=True): + return _demucs(pretrained, DNS_64_URL, hidden=64) + + +def master64(pretrained=True): + return _demucs(pretrained, MASTER_64_URL, hidden=64) + + +def add_model_flags(parser): + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "-m", "--model_path", help="Path to local trained model." + ) + group.add_argument( + "--dns48", action="store_true", + help="Use pre-trained real time H=48 model trained on DNS." + ) + group.add_argument( + "--dns64", action="store_true", + help="Use pre-trained real time H=64 model trained on DNS." + ) + group.add_argument( + "--master64", action="store_true", + help="Use pre-trained real time H=64 model trained on DNS and Valentini." + ) + + +def get_model(args): + """ + Load local model package or torchhub pre-trained model. + """ + if args.model_path: + logger.info("Loading model from %s", args.model_path) + pkg = torch.load(args.model_path) + model = deserialize_model(pkg) + elif args.dns64: + logger.info("Loading pre-trained real time H=64 model trained on DNS.") + model = dns64() + elif args.master64: + logger.info( + "Loading pre-trained real time H=64 model trained on DNS and Valentini." + ) + model = master64() + else: + logger.info("Loading pre-trained real time H=48 model trained on DNS.") + model = dns48() + logger.debug(model) + return model diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py new file mode 100644 index 0000000..1222add --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py @@ -0,0 +1,79 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# author: adefossez + +import math + +import torch as th +from torch.nn import functional as F + + +def sinc(t): + """sinc. + + :param t: the input tensor + """ + return th.where(t == 0, th.tensor(1., device=t.device, dtype=t.dtype), + th.sin(t) / t) + + +def kernel_upsample2(zeros=56): + """kernel_upsample2. + + """ + win = th.hann_window(4 * zeros + 1, periodic=False) + winodd = win[1::2] + t = th.linspace(-zeros + 0.5, zeros - 0.5, 2 * zeros) + t *= math.pi + kernel = (sinc(t) * winodd).view(1, 1, -1) + return kernel + + +def upsample2(x, zeros=56): + """ + Upsampling the input by 2 using sinc interpolation. + Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method." + ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing. + Vol. 9. IEEE, 1984. + """ + *other, time = x.shape + kernel = kernel_upsample2(zeros).to(x) + out = F.conv1d(x.view(-1, 1, time), kernel, padding=zeros)[..., 1:].view( + *other, time + ) + y = th.stack([x, out], dim=-1) + return y.view(*other, -1) + + +def kernel_downsample2(zeros=56): + """kernel_downsample2. + + """ + win = th.hann_window(4 * zeros + 1, periodic=False) + winodd = win[1::2] + t = th.linspace(-zeros + 0.5, zeros - 0.5, 2 * zeros) + t.mul_(math.pi) + kernel = (sinc(t) * winodd).view(1, 1, -1) + return kernel + + +def downsample2(x, zeros=56): + """ + Downsampling the input by 2 using sinc interpolation. + Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method." + ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing. + Vol. 9. IEEE, 1984. + """ + if x.shape[-1] % 2 != 0: + x = F.pad(x, (0, 1)) + xeven = x[..., ::2] + xodd = x[..., 1::2] + *other, time = xodd.shape + kernel = kernel_downsample2(zeros).to(x) + out = xeven + F.conv1d( + xodd.view(-1, 1, time), kernel, padding=zeros + )[..., :-1].view(*other, time) + return out.view(*other, -1).mul(0.5) diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py new file mode 100644 index 0000000..734d047 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py @@ -0,0 +1,176 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# author: adefossez + +import functools +import logging +from contextlib import contextmanager +import inspect +import time + +logger = logging.getLogger(__name__) + +EPS = 1e-8 + + +def capture_init(init): + """capture_init. + + Decorate `__init__` with this, and you can then + recover the *args and **kwargs passed to it in `self._init_args_kwargs` + """ + @functools.wraps(init) + def __init__(self, *args, **kwargs): + self._init_args_kwargs = (args, kwargs) + init(self, *args, **kwargs) + + return __init__ + + +def deserialize_model(package, strict=False): + """deserialize_model. + + """ + klass = package['class'] + if strict: + model = klass(*package['args'], **package['kwargs']) + else: + sig = inspect.signature(klass) + kw = package['kwargs'] + for key in list(kw): + if key not in sig.parameters: + logger.warning("Dropping inexistant parameter %s", key) + del kw[key] + model = klass(*package['args'], **kw) + model.load_state_dict(package['state']) + return model + + +def copy_state(state): + return {k: v.cpu().clone() for k, v in state.items()} + + +def serialize_model(model): + args, kwargs = model._init_args_kwargs + state = copy_state(model.state_dict()) + return {"class": model.__class__, "args": args, "kwargs": kwargs, "state": state} + + +@contextmanager +def swap_state(model, state): + """ + Context manager that swaps the state of a model, e.g: + + # model is in old state + with swap_state(model, new_state): + # model in new state + # model back to old state + """ + old_state = copy_state(model.state_dict()) + model.load_state_dict(state) + try: + yield + finally: + model.load_state_dict(old_state) + + +def pull_metric(history, name): + out = [] + for metrics in history: + if name in metrics: + out.append(metrics[name]) + return out + + +class LogProgress: + """ + Sort of like tqdm but using log lines and not as real time. + Args: + - logger: logger obtained from `logging.getLogger`, + - iterable: iterable object to wrap + - updates (int): number of lines that will be printed, e.g. + if `updates=5`, log every 1/5th of the total length. + - total (int): length of the iterable, in case it does not support + `len`. + - name (str): prefix to use in the log. + - level: logging level (like `logging.INFO`). + """ + def __init__(self, + logger, + iterable, + updates=5, + total=None, + name="LogProgress", + level=logging.INFO): + self.iterable = iterable + self.total = total or len(iterable) + self.updates = updates + self.name = name + self.logger = logger + self.level = level + + def update(self, **infos): + self._infos = infos + + def __iter__(self): + self._iterator = iter(self.iterable) + self._index = -1 + self._infos = {} + self._begin = time.time() + return self + + def __next__(self): + self._index += 1 + try: + value = next(self._iterator) + except StopIteration: + raise + else: + return value + finally: + log_every = max(1, self.total // self.updates) + # logging is delayed by 1 it, in order to have the metrics from update + if self._index >= 1 and self._index % log_every == 0: + self._log() + + def _log(self): + self._speed = (1 + self._index) / (time.time() - self._begin) + infos = " | ".join(f"{k.capitalize()} {v}" for k, v in self._infos.items()) + if self._speed < 1e-4: + speed = "oo sec/it" + elif self._speed < 0.1: + speed = f"{1/self._speed:.1f} sec/it" + else: + speed = f"{self._speed:.1f} it/sec" + out = f"{self.name} | {self._index}/{self.total} | {speed}" + if infos: + out += " | " + infos + self.logger.log(self.level, out) + + +def colorize(text, color): + """ + Display text with some ANSI color in the terminal. + """ + code = f"\033[{color}m" + restore = "\033[0m" + return "".join([code, text, restore]) + + +def bold(text): + """ + Display text in bold in the terminal. + """ + return colorize(text, "1") + + +def cal_snr(lbl, est): + import torch + y = 10.0 * torch.log10( + torch.sum(lbl**2, dim=-1) / (torch.sum((est-lbl)**2, dim=-1) + EPS) + + EPS + ) + return y diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py new file mode 100644 index 0000000..a302546 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py @@ -0,0 +1,140 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +from collections import defaultdict +from typing import List, Dict, Tuple + +import pandas as pd +import numpy as np +import torchaudio +from tqdm import tqdm + +from examples.speech_to_text.data_utils import load_df_from_tsv, save_df_to_tsv + + +log = logging.getLogger(__name__) + +SPLITS = ["train", "dev", "test"] + + +def get_top_n( + root: Path, n_speakers: int = 10, min_n_tokens: int = 5 +) -> pd.DataFrame: + df = load_df_from_tsv(root / "validated.tsv") + df["n_tokens"] = [len(s.split()) for s in df["sentence"]] + df = df[df["n_tokens"] >= min_n_tokens] + df["n_frames"] = [ + torchaudio.info((root / "clips" / p).as_posix()).num_frames + for p in tqdm(df["path"]) + ] + df["id"] = [Path(p).stem for p in df["path"]] + total_duration_ms = df.groupby("client_id")["n_frames"].agg(["sum"]) + total_duration_ms = total_duration_ms.sort_values("sum", ascending=False) + + top_n_total_duration_ms = total_duration_ms.head(n_speakers) + top_n_client_ids = set(top_n_total_duration_ms.index.tolist()) + df_top_n = df[df["client_id"].isin(top_n_client_ids)] + return df_top_n + + +def get_splits( + df, train_split_ratio=0.99, speaker_in_all_splits=False, rand_seed=0 +) -> Tuple[Dict[str, str], List[str]]: + np.random.seed(rand_seed) + dev_split_ratio = (1. - train_split_ratio) / 3 + grouped = list(df.groupby("client_id")) + id_to_split = {} + for _, cur_df in tqdm(grouped): + cur_n_examples = len(cur_df) + if speaker_in_all_splits and cur_n_examples < 3: + continue + cur_n_train = int(cur_n_examples * train_split_ratio) + cur_n_dev = int(cur_n_examples * dev_split_ratio) + cur_n_test = cur_n_examples - cur_n_dev - cur_n_train + if speaker_in_all_splits and cur_n_dev * cur_n_test == 0: + cur_n_dev, cur_n_test = 1, 1 + cur_n_train = cur_n_examples - cur_n_dev - cur_n_test + cur_indices = cur_df.index.tolist() + cur_shuffled_indices = np.random.permutation(cur_n_examples) + cur_shuffled_indices = [cur_indices[i] for i in cur_shuffled_indices] + cur_indices_by_split = { + "train": cur_shuffled_indices[:cur_n_train], + "dev": cur_shuffled_indices[cur_n_train: cur_n_train + cur_n_dev], + "test": cur_shuffled_indices[cur_n_train + cur_n_dev:] + } + for split in SPLITS: + for i in cur_indices_by_split[split]: + id_ = df["id"].loc[i] + id_to_split[id_] = split + return id_to_split, sorted(df["client_id"].unique()) + + +def convert_to_wav(root: Path, filenames: List[str], target_sr=16_000): + out_root = root / "wav" + out_root.mkdir(exist_ok=True, parents=True) + print("Converting to WAV...") + for n in tqdm(filenames): + in_path = (root / "clips" / n).as_posix() + waveform, sr = torchaudio.load(in_path) + converted, converted_sr = torchaudio.sox_effects.apply_effects_tensor( + waveform, sr, [["rate", str(target_sr)], ["channels", "1"]] + ) + out_path = (out_root / Path(n).with_suffix(".wav").name).as_posix() + torchaudio.save(out_path, converted, converted_sr, encoding="PCM_S", + bits_per_sample=16) + + +def process(args): + data_root = Path(args.data_root).absolute() / args.lang + + # Generate TSV manifest + print("Generating manifest...") + + df_top_n = get_top_n(data_root) + id_to_split, speakers = get_splits(df_top_n) + + if args.convert_to_wav: + convert_to_wav(data_root, df_top_n["path"].tolist()) + + manifest_by_split = {split: defaultdict(list) for split in SPLITS} + for sample in tqdm(df_top_n.to_dict(orient="index").values()): + sample_id = sample["id"] + split = id_to_split[sample_id] + manifest_by_split[split]["id"].append(sample_id) + if args.convert_to_wav: + audio_path = data_root / "wav" / f"{sample_id}.wav" + else: + audio_path = data_root / "clips" / f"{sample_id}.mp3" + manifest_by_split[split]["audio"].append(audio_path.as_posix()) + manifest_by_split[split]["n_frames"].append(sample["n_frames"]) + manifest_by_split[split]["tgt_text"].append(sample["sentence"]) + manifest_by_split[split]["speaker"].append(sample["client_id"]) + manifest_by_split[split]["src_text"].append(sample["sentence"]) + + output_root = Path(args.output_manifest_root).absolute() + output_root.mkdir(parents=True, exist_ok=True) + for split in SPLITS: + save_df_to_tsv( + pd.DataFrame.from_dict(manifest_by_split[split]), + output_root / f"{split}.audio.tsv" + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--data-root", "-d", required=True, type=str) + parser.add_argument("--output-manifest-root", "-m", required=True, type=str) + parser.add_argument("--lang", "-l", required=True, type=str) + parser.add_argument("--convert-to-wav", action="store_true") + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py new file mode 100644 index 0000000..4a1e119 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py @@ -0,0 +1,262 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +import shutil +from tempfile import NamedTemporaryFile +from collections import Counter, defaultdict + +import pandas as pd +import torchaudio +from tqdm import tqdm + +from fairseq.data.audio.audio_utils import convert_waveform +from examples.speech_to_text.data_utils import ( + create_zip, + gen_config_yaml, + gen_vocab, + get_zip_manifest, + load_tsv_to_dicts, + save_df_to_tsv +) +from examples.speech_synthesis.data_utils import ( + extract_logmel_spectrogram, extract_pitch, extract_energy, get_global_cmvn, + ipa_phonemize, get_mfa_alignment, get_unit_alignment, + get_feature_value_min_max +) + + +log = logging.getLogger(__name__) + + +def process(args): + assert "train" in args.splits + out_root = Path(args.output_root).absolute() + out_root.mkdir(exist_ok=True) + + print("Fetching data...") + audio_manifest_root = Path(args.audio_manifest_root).absolute() + samples = [] + for s in args.splits: + for e in load_tsv_to_dicts(audio_manifest_root / f"{s}.audio.tsv"): + e["split"] = s + samples.append(e) + sample_ids = [s["id"] for s in samples] + + # Get alignment info + id_to_alignment = None + if args.textgrid_zip is not None: + assert args.id_to_units_tsv is None + id_to_alignment = get_mfa_alignment( + args.textgrid_zip, sample_ids, args.sample_rate, args.hop_length + ) + elif args.id_to_units_tsv is not None: + # assume identical hop length on the unit sequence + id_to_alignment = get_unit_alignment(args.id_to_units_tsv, sample_ids) + + # Extract features and pack features into ZIP + feature_name = "logmelspec80" + zip_path = out_root / f"{feature_name}.zip" + pitch_zip_path = out_root / "pitch.zip" + energy_zip_path = out_root / "energy.zip" + gcmvn_npz_path = out_root / "gcmvn_stats.npz" + if zip_path.exists() and gcmvn_npz_path.exists(): + print(f"{zip_path} and {gcmvn_npz_path} exist.") + else: + feature_root = out_root / feature_name + feature_root.mkdir(exist_ok=True) + pitch_root = out_root / "pitch" + energy_root = out_root / "energy" + if args.add_fastspeech_targets: + pitch_root.mkdir(exist_ok=True) + energy_root.mkdir(exist_ok=True) + print("Extracting Mel spectrogram features...") + for sample in tqdm(samples): + waveform, sample_rate = torchaudio.load(sample["audio"]) + waveform, sample_rate = convert_waveform( + waveform, sample_rate, normalize_volume=args.normalize_volume, + to_sample_rate=args.sample_rate + ) + sample_id = sample["id"] + target_length = None + if id_to_alignment is not None: + a = id_to_alignment[sample_id] + target_length = sum(a.frame_durations) + if a.start_sec is not None and a.end_sec is not None: + start_frame = int(a.start_sec * sample_rate) + end_frame = int(a.end_sec * sample_rate) + waveform = waveform[:, start_frame: end_frame] + extract_logmel_spectrogram( + waveform, sample_rate, feature_root / f"{sample_id}.npy", + win_length=args.win_length, hop_length=args.hop_length, + n_fft=args.n_fft, n_mels=args.n_mels, f_min=args.f_min, + f_max=args.f_max, target_length=target_length + ) + if args.add_fastspeech_targets: + assert id_to_alignment is not None + extract_pitch( + waveform, sample_rate, pitch_root / f"{sample_id}.npy", + hop_length=args.hop_length, log_scale=True, + phoneme_durations=id_to_alignment[sample_id].frame_durations + ) + extract_energy( + waveform, energy_root / f"{sample_id}.npy", + hop_length=args.hop_length, n_fft=args.n_fft, + log_scale=True, + phoneme_durations=id_to_alignment[sample_id].frame_durations + ) + print("ZIPing features...") + create_zip(feature_root, zip_path) + get_global_cmvn(feature_root, gcmvn_npz_path) + shutil.rmtree(feature_root) + if args.add_fastspeech_targets: + create_zip(pitch_root, pitch_zip_path) + shutil.rmtree(pitch_root) + create_zip(energy_root, energy_zip_path) + shutil.rmtree(energy_root) + + print("Fetching ZIP manifest...") + audio_paths, audio_lengths = get_zip_manifest(zip_path) + pitch_paths, pitch_lengths, energy_paths, energy_lengths = [None] * 4 + if args.add_fastspeech_targets: + pitch_paths, pitch_lengths = get_zip_manifest(pitch_zip_path) + energy_paths, energy_lengths = get_zip_manifest(energy_zip_path) + # Generate TSV manifest + print("Generating manifest...") + id_to_cer = None + if args.cer_threshold is not None: + assert Path(args.cer_tsv_path).is_file() + id_to_cer = { + x["id"]: x["uer"] for x in load_tsv_to_dicts(args.cer_tsv_path) + } + manifest_by_split = {split: defaultdict(list) for split in args.splits} + for sample in tqdm(samples): + sample_id, split = sample["id"], sample["split"] + + if args.snr_threshold is not None and "snr" in sample \ + and sample["snr"] < args.snr_threshold: + continue + if args.cer_threshold is not None \ + and id_to_cer[sample_id] > args.cer_threhold: + continue + + normalized_utt = sample["tgt_text"] + if id_to_alignment is not None: + normalized_utt = " ".join(id_to_alignment[sample_id].tokens) + elif args.ipa_vocab: + normalized_utt = ipa_phonemize( + normalized_utt, lang=args.lang, use_g2p=args.use_g2p + ) + manifest_by_split[split]["id"].append(sample_id) + manifest_by_split[split]["audio"].append(audio_paths[sample_id]) + manifest_by_split[split]["n_frames"].append(audio_lengths[sample_id]) + manifest_by_split[split]["tgt_text"].append(normalized_utt) + manifest_by_split[split]["speaker"].append(sample["speaker"]) + manifest_by_split[split]["src_text"].append(sample["src_text"]) + if args.add_fastspeech_targets: + assert id_to_alignment is not None + duration = " ".join( + str(d) for d in id_to_alignment[sample_id].frame_durations + ) + manifest_by_split[split]["duration"].append(duration) + manifest_by_split[split]["pitch"].append(pitch_paths[sample_id]) + manifest_by_split[split]["energy"].append(energy_paths[sample_id]) + for split in args.splits: + save_df_to_tsv( + pd.DataFrame.from_dict(manifest_by_split[split]), + out_root / f"{split}.tsv" + ) + # Generate vocab + vocab_name, spm_filename = None, None + if id_to_alignment is not None or args.ipa_vocab: + vocab = Counter() + for t in manifest_by_split["train"]["tgt_text"]: + vocab.update(t.split(" ")) + vocab_name = "vocab.txt" + with open(out_root / vocab_name, "w") as f: + for s, c in vocab.most_common(): + f.write(f"{s} {c}\n") + else: + spm_filename_prefix = "spm_char" + spm_filename = f"{spm_filename_prefix}.model" + with NamedTemporaryFile(mode="w") as f: + for t in manifest_by_split["train"]["tgt_text"]: + f.write(t + "\n") + f.flush() # needed to ensure gen_vocab sees dumped text + gen_vocab(Path(f.name), out_root / spm_filename_prefix, "char") + # Generate speaker list + speakers = sorted({sample["speaker"] for sample in samples}) + speakers_path = out_root / "speakers.txt" + with open(speakers_path, "w") as f: + for speaker in speakers: + f.write(f"{speaker}\n") + # Generate config YAML + win_len_t = args.win_length / args.sample_rate + hop_len_t = args.hop_length / args.sample_rate + extra = { + "sample_rate": args.sample_rate, + "features": { + "type": "spectrogram+melscale+log", + "eps": 1e-5, "n_mels": args.n_mels, "n_fft": args.n_fft, + "window_fn": "hann", "win_length": args.win_length, + "hop_length": args.hop_length, "sample_rate": args.sample_rate, + "win_len_t": win_len_t, "hop_len_t": hop_len_t, + "f_min": args.f_min, "f_max": args.f_max, + "n_stft": args.n_fft // 2 + 1 + } + } + if len(speakers) > 1: + extra["speaker_set_filename"] = "speakers.txt" + if args.add_fastspeech_targets: + pitch_min, pitch_max = get_feature_value_min_max( + [(out_root / n).as_posix() for n in pitch_paths.values()] + ) + energy_min, energy_max = get_feature_value_min_max( + [(out_root / n).as_posix() for n in energy_paths.values()] + ) + extra["features"]["pitch_min"] = pitch_min + extra["features"]["pitch_max"] = pitch_max + extra["features"]["energy_min"] = energy_min + extra["features"]["energy_max"] = energy_max + gen_config_yaml( + out_root, spm_filename=spm_filename, vocab_name=vocab_name, + audio_root=out_root.as_posix(), input_channels=None, + input_feat_per_channel=None, specaugment_policy=None, + cmvn_type="global", gcmvn_path=gcmvn_npz_path, extra=extra + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--audio-manifest-root", "-m", required=True, type=str) + parser.add_argument("--output-root", "-o", required=True, type=str) + parser.add_argument("--splits", "-s", type=str, nargs="+", + default=["train", "dev", "test"]) + parser.add_argument("--ipa-vocab", action="store_true") + parser.add_argument("--use-g2p", action="store_true") + parser.add_argument("--lang", type=str, default="en-us") + parser.add_argument("--win-length", type=int, default=1024) + parser.add_argument("--hop-length", type=int, default=256) + parser.add_argument("--n-fft", type=int, default=1024) + parser.add_argument("--n-mels", type=int, default=80) + parser.add_argument("--f-min", type=int, default=20) + parser.add_argument("--f-max", type=int, default=8000) + parser.add_argument("--sample-rate", type=int, default=22050) + parser.add_argument("--normalize-volume", "-n", action="store_true") + parser.add_argument("--textgrid-zip", type=str, default=None) + parser.add_argument("--id-to-units-tsv", type=str, default=None) + parser.add_argument("--add-fastspeech-targets", action="store_true") + parser.add_argument("--snr-threshold", type=float, default=None) + parser.add_argument("--cer-threshold", type=float, default=None) + parser.add_argument("--cer-tsv-path", type=str, default="") + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py new file mode 100644 index 0000000..7ec1fb7 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +from collections import defaultdict + +import pandas as pd +from torchaudio.datasets import LJSPEECH +from tqdm import tqdm + +from examples.speech_to_text.data_utils import save_df_to_tsv + + +log = logging.getLogger(__name__) + +SPLITS = ["train", "dev", "test"] + + +def process(args): + out_root = Path(args.output_data_root).absolute() + out_root.mkdir(parents=True, exist_ok=True) + + # Generate TSV manifest + print("Generating manifest...") + # following FastSpeech's splits + dataset = LJSPEECH(out_root.as_posix(), download=True) + id_to_split = {} + for x in dataset._flist: + id_ = x[0] + speaker = id_.split("-")[0] + id_to_split[id_] = { + "LJ001": "test", "LJ002": "test", "LJ003": "dev" + }.get(speaker, "train") + manifest_by_split = {split: defaultdict(list) for split in SPLITS} + progress = tqdm(enumerate(dataset), total=len(dataset)) + for i, (waveform, _, utt, normalized_utt) in progress: + sample_id = dataset._flist[i][0] + split = id_to_split[sample_id] + manifest_by_split[split]["id"].append(sample_id) + audio_path = f"{dataset._path}/{sample_id}.wav" + manifest_by_split[split]["audio"].append(audio_path) + manifest_by_split[split]["n_frames"].append(len(waveform[0])) + manifest_by_split[split]["tgt_text"].append(normalized_utt) + manifest_by_split[split]["speaker"].append("ljspeech") + manifest_by_split[split]["src_text"].append(utt) + + manifest_root = Path(args.output_manifest_root).absolute() + manifest_root.mkdir(parents=True, exist_ok=True) + for split in SPLITS: + save_df_to_tsv( + pd.DataFrame.from_dict(manifest_by_split[split]), + manifest_root / f"{split}.audio.tsv" + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output-data-root", "-d", required=True, type=str) + parser.add_argument("--output-manifest-root", "-m", required=True, type=str) + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py b/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py new file mode 100644 index 0000000..0e3e4c5 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py @@ -0,0 +1,89 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import argparse +from collections import defaultdict +from itertools import chain +from pathlib import Path + +import numpy as np +import torchaudio +import torchaudio.sox_effects as ta_sox +import yaml +from tqdm import tqdm + +from examples.speech_to_text.data_utils import load_tsv_to_dicts +from examples.speech_synthesis.preprocessing.speaker_embedder import SpkrEmbedder + + +def extract_embedding(audio_path, embedder): + wav, sr = torchaudio.load(audio_path) # 2D + if sr != embedder.RATE: + wav, sr = ta_sox.apply_effects_tensor( + wav, sr, [["rate", str(embedder.RATE)]] + ) + try: + emb = embedder([wav[0].cuda().float()]).cpu().numpy() + except RuntimeError: + emb = None + return emb + + +def process(args): + print("Fetching data...") + raw_manifest_root = Path(args.raw_manifest_root).absolute() + samples = [load_tsv_to_dicts(raw_manifest_root / (s + ".tsv")) + for s in args.splits] + samples = list(chain(*samples)) + with open(args.config, "r") as f: + config = yaml.load(f, Loader=yaml.FullLoader) + with open(f"{config['audio_root']}/{config['speaker_set_filename']}") as f: + speaker_to_id = {r.strip(): i for i, r in enumerate(f)} + + embedder = SpkrEmbedder(args.ckpt).cuda() + speaker_to_cnt = defaultdict(float) + speaker_to_emb = defaultdict(float) + for sample in tqdm(samples, desc="extract emb"): + emb = extract_embedding(sample["audio"], embedder) + if emb is not None: + speaker_to_cnt[sample["speaker"]] += 1 + speaker_to_emb[sample["speaker"]] += emb + if len(speaker_to_emb) != len(speaker_to_id): + missed = set(speaker_to_id) - set(speaker_to_emb.keys()) + print( + f"WARNING: missing embeddings for {len(missed)} speaker:\n{missed}" + ) + speaker_emb_mat = np.zeros((len(speaker_to_id), len(emb)), float) + for speaker in speaker_to_emb: + idx = speaker_to_id[speaker] + emb = speaker_to_emb[speaker] + cnt = speaker_to_cnt[speaker] + speaker_emb_mat[idx, :] = emb / cnt + speaker_emb_name = "speaker_emb.npy" + speaker_emb_path = f"{config['audio_root']}/{speaker_emb_name}" + np.save(speaker_emb_path, speaker_emb_mat) + config["speaker_emb_filename"] = speaker_emb_name + + with open(args.new_config, "w") as f: + yaml.dump(config, f) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--raw-manifest-root", "-m", required=True, type=str) + parser.add_argument("--splits", "-s", type=str, nargs="+", + default=["train"]) + parser.add_argument("--config", "-c", required=True, type=str) + parser.add_argument("--new-config", "-n", required=True, type=str) + parser.add_argument("--ckpt", required=True, type=str, + help="speaker embedder checkpoint") + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py new file mode 100644 index 0000000..7afa40f --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py @@ -0,0 +1,79 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import numpy as np +import re +from pathlib import Path +from collections import defaultdict + +import pandas as pd +from torchaudio.datasets import VCTK +from tqdm import tqdm + +from examples.speech_to_text.data_utils import save_df_to_tsv + + +log = logging.getLogger(__name__) + +SPLITS = ["train", "dev", "test"] + + +def normalize_text(text): + return re.sub(r"[^a-zA-Z.?!,'\- ]", '', text) + + +def process(args): + out_root = Path(args.output_data_root).absolute() + out_root.mkdir(parents=True, exist_ok=True) + + # Generate TSV manifest + print("Generating manifest...") + dataset = VCTK(out_root.as_posix(), download=False) + ids = list(dataset._walker) + np.random.seed(args.seed) + np.random.shuffle(ids) + n_train = len(ids) - args.n_dev - args.n_test + _split = ["train"] * n_train + ["dev"] * args.n_dev + ["test"] * args.n_test + id_to_split = dict(zip(ids, _split)) + manifest_by_split = {split: defaultdict(list) for split in SPLITS} + progress = tqdm(enumerate(dataset), total=len(dataset)) + for i, (waveform, _, text, speaker_id, _) in progress: + sample_id = dataset._walker[i] + _split = id_to_split[sample_id] + audio_dir = Path(dataset._path) / dataset._folder_audio / speaker_id + audio_path = audio_dir / f"{sample_id}.wav" + text = normalize_text(text) + manifest_by_split[_split]["id"].append(sample_id) + manifest_by_split[_split]["audio"].append(audio_path.as_posix()) + manifest_by_split[_split]["n_frames"].append(len(waveform[0])) + manifest_by_split[_split]["tgt_text"].append(text) + manifest_by_split[_split]["speaker"].append(speaker_id) + manifest_by_split[_split]["src_text"].append(text) + + manifest_root = Path(args.output_manifest_root).absolute() + manifest_root.mkdir(parents=True, exist_ok=True) + for _split in SPLITS: + save_df_to_tsv( + pd.DataFrame.from_dict(manifest_by_split[_split]), + manifest_root / f"{_split}.audio.tsv" + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output-data-root", "-d", required=True, type=str) + parser.add_argument("--output-manifest-root", "-m", required=True, type=str) + parser.add_argument("--n-dev", default=50, type=int) + parser.add_argument("--n-test", default=100, type=int) + parser.add_argument("--seed", "-s", default=1234, type=int) + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py new file mode 100644 index 0000000..3b17867 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py @@ -0,0 +1,135 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import librosa +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.data +import torchaudio + + +EMBEDDER_PARAMS = { + 'num_mels': 40, + 'n_fft': 512, + 'emb_dim': 256, + 'lstm_hidden': 768, + 'lstm_layers': 3, + 'window': 80, + 'stride': 40, +} + + +def set_requires_grad(nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary + computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad + + +class LinearNorm(nn.Module): + def __init__(self, hp): + super(LinearNorm, self).__init__() + self.linear_layer = nn.Linear(hp["lstm_hidden"], hp["emb_dim"]) + + def forward(self, x): + return self.linear_layer(x) + + +class SpeechEmbedder(nn.Module): + def __init__(self, hp): + super(SpeechEmbedder, self).__init__() + self.lstm = nn.LSTM(hp["num_mels"], + hp["lstm_hidden"], + num_layers=hp["lstm_layers"], + batch_first=True) + self.proj = LinearNorm(hp) + self.hp = hp + + def forward(self, mel): + # (num_mels, T) -> (num_mels, T', window) + mels = mel.unfold(1, self.hp["window"], self.hp["stride"]) + mels = mels.permute(1, 2, 0) # (T', window, num_mels) + x, _ = self.lstm(mels) # (T', window, lstm_hidden) + x = x[:, -1, :] # (T', lstm_hidden), use last frame only + x = self.proj(x) # (T', emb_dim) + x = x / torch.norm(x, p=2, dim=1, keepdim=True) # (T', emb_dim) + + x = x.mean(dim=0) + if x.norm(p=2) != 0: + x = x / x.norm(p=2) + return x + + +class SpkrEmbedder(nn.Module): + RATE = 16000 + + def __init__( + self, + embedder_path, + embedder_params=EMBEDDER_PARAMS, + rate=16000, + hop_length=160, + win_length=400, + pad=False, + ): + super(SpkrEmbedder, self).__init__() + embedder_pt = torch.load(embedder_path, map_location="cpu") + self.embedder = SpeechEmbedder(embedder_params) + self.embedder.load_state_dict(embedder_pt) + self.embedder.eval() + set_requires_grad(self.embedder, requires_grad=False) + self.embedder_params = embedder_params + + self.register_buffer('mel_basis', torch.from_numpy( + librosa.filters.mel( + sr=self.RATE, + n_fft=self.embedder_params["n_fft"], + n_mels=self.embedder_params["num_mels"]) + ) + ) + + self.resample = None + if rate != self.RATE: + self.resample = torchaudio.transforms.Resample(rate, self.RATE) + self.hop_length = hop_length + self.win_length = win_length + self.pad = pad + + def get_mel(self, y): + if self.pad and y.shape[-1] < 14000: + y = F.pad(y, (0, 14000 - y.shape[-1])) + + window = torch.hann_window(self.win_length).to(y) + y = torch.stft(y, n_fft=self.embedder_params["n_fft"], + hop_length=self.hop_length, + win_length=self.win_length, + window=window) + magnitudes = torch.norm(y, dim=-1, p=2) ** 2 + mel = torch.log10(self.mel_basis @ magnitudes + 1e-6) + return mel + + def forward(self, inputs): + dvecs = [] + for wav in inputs: + mel = self.get_mel(wav) + if mel.dim() == 3: + mel = mel.squeeze(0) + dvecs += [self.embedder(mel)] + dvecs = torch.stack(dvecs) + + dvec = torch.mean(dvecs, dim=0) + dvec = dvec / torch.norm(dvec) + + return dvec diff --git a/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py new file mode 100644 index 0000000..9cf1210 --- /dev/null +++ b/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py @@ -0,0 +1,192 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import collections +import contextlib +import wave + +try: + import webrtcvad +except ImportError: + raise ImportError("Please install py-webrtcvad: pip install webrtcvad") +import argparse +import os +import logging +from tqdm import tqdm + +AUDIO_SUFFIX = '.wav' +FS_MS = 30 +SCALE = 6e-5 +THRESHOLD = 0.3 + + +def read_wave(path): + """Reads a .wav file. + Takes the path, and returns (PCM audio data, sample rate). + """ + with contextlib.closing(wave.open(path, 'rb')) as wf: + num_channels = wf.getnchannels() + assert num_channels == 1 + sample_width = wf.getsampwidth() + assert sample_width == 2 + sample_rate = wf.getframerate() + assert sample_rate in (8000, 16000, 32000, 48000) + pcm_data = wf.readframes(wf.getnframes()) + return pcm_data, sample_rate + + +def write_wave(path, audio, sample_rate): + """Writes a .wav file. + Takes path, PCM audio data, and sample rate. + """ + with contextlib.closing(wave.open(path, 'wb')) as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(sample_rate) + wf.writeframes(audio) + + +class Frame(object): + """Represents a "frame" of audio data.""" + def __init__(self, bytes, timestamp, duration): + self.bytes = bytes + self.timestamp = timestamp + self.duration = duration + + +def frame_generator(frame_duration_ms, audio, sample_rate): + """Generates audio frames from PCM audio data. + Takes the desired frame duration in milliseconds, the PCM data, and + the sample rate. + Yields Frames of the requested duration. + """ + n = int(sample_rate * (frame_duration_ms / 1000.0) * 2) + offset = 0 + timestamp = 0.0 + duration = (float(n) / sample_rate) / 2.0 + while offset + n < len(audio): + yield Frame(audio[offset:offset + n], timestamp, duration) + timestamp += duration + offset += n + + +def vad_collector(sample_rate, frame_duration_ms, + padding_duration_ms, vad, frames): + """Filters out non-voiced audio frames. + Given a webrtcvad.Vad and a source of audio frames, yields only + the voiced audio. + Uses a padded, sliding window algorithm over the audio frames. + When more than 90% of the frames in the window are voiced (as + reported by the VAD), the collector triggers and begins yielding + audio frames. Then the collector waits until 90% of the frames in + the window are unvoiced to detrigger. + The window is padded at the front and back to provide a small + amount of silence or the beginnings/endings of speech around the + voiced frames. + Arguments: + sample_rate - The audio sample rate, in Hz. + frame_duration_ms - The frame duration in milliseconds. + padding_duration_ms - The amount to pad the window, in milliseconds. + vad - An instance of webrtcvad.Vad. + frames - a source of audio frames (sequence or generator). + Returns: A generator that yields PCM audio data. + """ + num_padding_frames = int(padding_duration_ms / frame_duration_ms) + # We use a deque for our sliding window/ring buffer. + ring_buffer = collections.deque(maxlen=num_padding_frames) + # We have two states: TRIGGERED and NOTTRIGGERED. We start in the + # NOTTRIGGERED state. + triggered = False + + voiced_frames = [] + for frame in frames: + is_speech = vad.is_speech(frame.bytes, sample_rate) + + # sys.stdout.write('1' if is_speech else '0') + if not triggered: + ring_buffer.append((frame, is_speech)) + num_voiced = len([f for f, speech in ring_buffer if speech]) + # If we're NOTTRIGGERED and more than 90% of the frames in + # the ring buffer are voiced frames, then enter the + # TRIGGERED state. + if num_voiced > 0.9 * ring_buffer.maxlen: + triggered = True + # We want to yield all the audio we see from now until + # we are NOTTRIGGERED, but we have to start with the + # audio that's already in the ring buffer. + for f, _ in ring_buffer: + voiced_frames.append(f) + ring_buffer.clear() + else: + # We're in the TRIGGERED state, so collect the audio data + # and add it to the ring buffer. + voiced_frames.append(frame) + ring_buffer.append((frame, is_speech)) + num_unvoiced = len([f for f, speech in ring_buffer if not speech]) + # If more than 90% of the frames in the ring buffer are + # unvoiced, then enter NOTTRIGGERED and yield whatever + # audio we've collected. + if num_unvoiced > 0.9 * ring_buffer.maxlen: + triggered = False + yield [b''.join([f.bytes for f in voiced_frames]), + voiced_frames[0].timestamp, voiced_frames[-1].timestamp] + ring_buffer.clear() + voiced_frames = [] + # If we have any leftover voiced audio when we run out of input, + # yield it. + if voiced_frames: + yield [b''.join([f.bytes for f in voiced_frames]), + voiced_frames[0].timestamp, voiced_frames[-1].timestamp] + + +def main(args): + # create output folder + try: + cmd = f"mkdir -p {args.out_path}" + os.system(cmd) + except Exception: + logging.error("Can not create output folder") + exit(-1) + + # build vad object + vad = webrtcvad.Vad(int(args.agg)) + # iterating over wavs in dir + for file in tqdm(os.listdir(args.in_path)): + if file.endswith(AUDIO_SUFFIX): + audio_inpath = os.path.join(args.in_path, file) + audio_outpath = os.path.join(args.out_path, file) + audio, sample_rate = read_wave(audio_inpath) + frames = frame_generator(FS_MS, audio, sample_rate) + frames = list(frames) + segments = vad_collector(sample_rate, FS_MS, 300, vad, frames) + merge_segments = list() + timestamp_start = 0.0 + timestamp_end = 0.0 + # removing start, end, and long sequences of sils + for i, segment in enumerate(segments): + merge_segments.append(segment[0]) + if i and timestamp_start: + sil_duration = segment[1] - timestamp_end + if sil_duration > THRESHOLD: + merge_segments.append(int(THRESHOLD / SCALE)*(b'\x00')) + else: + merge_segments.append(int((sil_duration / SCALE))*(b'\x00')) + timestamp_start = segment[1] + timestamp_end = segment[2] + segment = b''.join(merge_segments) + write_wave(audio_outpath, segment, sample_rate) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Apply vad to a file of fils.') + parser.add_argument('in_path', type=str, help='Path to the input files') + parser.add_argument('out_path', type=str, + help='Path to save the processed files') + parser.add_argument('--agg', type=int, default=3, + help='The level of aggressiveness of the VAD: [0-3]') + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/speech_synthesis/utils.py b/fairseq/examples/speech_synthesis/utils.py new file mode 100644 index 0000000..2c7b037 --- /dev/null +++ b/fairseq/examples/speech_synthesis/utils.py @@ -0,0 +1,101 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +from scipy.interpolate import interp1d +import torchaudio + +from fairseq.tasks.text_to_speech import ( + batch_compute_distortion, compute_rms_dist +) + + +def batch_mel_spectral_distortion( + y1, y2, sr, normalize_type="path", mel_fn=None +): + """ + https://arxiv.org/pdf/2011.03568.pdf + + Same as Mel Cepstral Distortion, but computed on log-mel spectrograms. + """ + if mel_fn is None or mel_fn.sample_rate != sr: + mel_fn = torchaudio.transforms.MelSpectrogram( + sr, n_fft=int(0.05 * sr), win_length=int(0.05 * sr), + hop_length=int(0.0125 * sr), f_min=20, n_mels=80, + window_fn=torch.hann_window + ).to(y1[0].device) + offset = 1e-6 + return batch_compute_distortion( + y1, y2, sr, lambda y: torch.log(mel_fn(y) + offset).transpose(-1, -2), + compute_rms_dist, normalize_type + ) + + +# This code is based on +# "https://github.com/bastibe/MAPS-Scripts/blob/master/helper.py" +def _same_t_in_true_and_est(func): + def new_func(true_t, true_f, est_t, est_f): + assert type(true_t) is np.ndarray + assert type(true_f) is np.ndarray + assert type(est_t) is np.ndarray + assert type(est_f) is np.ndarray + + interpolated_f = interp1d( + est_t, est_f, bounds_error=False, kind='nearest', fill_value=0 + )(true_t) + return func(true_t, true_f, true_t, interpolated_f) + + return new_func + + +@_same_t_in_true_and_est +def gross_pitch_error(true_t, true_f, est_t, est_f): + """The relative frequency in percent of pitch estimates that are + outside a threshold around the true pitch. Only frames that are + considered pitched by both the ground truth and the estimator (if + applicable) are considered. + """ + + correct_frames = _true_voiced_frames(true_t, true_f, est_t, est_f) + gross_pitch_error_frames = _gross_pitch_error_frames( + true_t, true_f, est_t, est_f + ) + return np.sum(gross_pitch_error_frames) / np.sum(correct_frames) + + +def _gross_pitch_error_frames(true_t, true_f, est_t, est_f, eps=1e-8): + voiced_frames = _true_voiced_frames(true_t, true_f, est_t, est_f) + true_f_p_eps = [x + eps for x in true_f] + pitch_error_frames = np.abs(est_f / true_f_p_eps - 1) > 0.2 + return voiced_frames & pitch_error_frames + + +def _true_voiced_frames(true_t, true_f, est_t, est_f): + return (est_f != 0) & (true_f != 0) + + +def _voicing_decision_error_frames(true_t, true_f, est_t, est_f): + return (est_f != 0) != (true_f != 0) + + +@_same_t_in_true_and_est +def f0_frame_error(true_t, true_f, est_t, est_f): + gross_pitch_error_frames = _gross_pitch_error_frames( + true_t, true_f, est_t, est_f + ) + voicing_decision_error_frames = _voicing_decision_error_frames( + true_t, true_f, est_t, est_f + ) + return (np.sum(gross_pitch_error_frames) + + np.sum(voicing_decision_error_frames)) / (len(true_t)) + + +@_same_t_in_true_and_est +def voicing_decision_error(true_t, true_f, est_t, est_f): + voicing_decision_error_frames = _voicing_decision_error_frames( + true_t, true_f, est_t, est_f + ) + return np.sum(voicing_decision_error_frames) / (len(true_t)) diff --git a/fairseq/examples/speech_text_joint_to_text/README.md b/fairseq/examples/speech_text_joint_to_text/README.md new file mode 100644 index 0000000..c1aa119 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/README.md @@ -0,0 +1,51 @@ +# Joint Speech Text training in Fairseq +An extension of Fairseq s2t project with the speech to text task enhanced by the co-trained text to text mapping task. More details about Fairseq s2t can be found [here](../speech_to_text/README.md) + +## Examples +Examples of speech text joint training in fairseq +- [English-to-German MuST-C model](docs/ende-mustc.md) +- [IWSLT 2021 Multilingual Speech Translation](docs/iwslt2021.md) +- [Speech Text Joint Pre-training ](docs/pre-training.md) +## Citation +Please cite as: +``` +@inproceedings{Tang2022UnifiedSP, + title={Unified Speech-Text Pre-training for Speech Translation and Recognition}, + author={Yun Tang and Hongyu Gong and Ning Dong and Changhan Wang and Wei-Ning Hsu and Jiatao Gu and Alexei Baevski and Xian Li and Abdelrahman Mohamed and Michael Auli and Juan Miguel Pino}, + booktitle={ACL}, + year={2022} +} +@inproceedings{Tang2021IST, + title = {Improving Speech Translation by Understanding and Learning from the Auxiliary Text Translation Task}, + author = {Yun Tang and Juan Pino and Xian Li and Changhan Wang and Dmitriy Genzel}, + booktitle = {ACL}, + year = {2021}, +} + +@inproceedings{Tang2021FST, + title = {FST: the FAIR Speech Translation System for the IWSLT21 Multilingual Shared Task}, + author = {Yun Tang and Hongyu Gong and Xian Li and Changhan Wang and Juan Pino and Holger Schwenk and Naman Goyal}, + booktitle = {IWSLT}, + year = {2021}, +} +@inproceedings{Tang2021AGM, + title={A General Multi-Task Learning Framework to Leverage Text Data for Speech to Text Tasks}, + author={Yun Tang and J. Pino and Changhan Wang and Xutai Ma and Dmitriy Genzel}, + booktitle={ICASSP}, + year={2021} +} + +@inproceedings{wang2020fairseqs2t, + title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq}, + author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino}, + booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations}, + year = {2020}, +} + +@inproceedings{ott2019fairseq, + title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, + author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, + booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations}, + year = {2019}, +} +``` diff --git a/fairseq/examples/speech_text_joint_to_text/__init__.py b/fairseq/examples/speech_text_joint_to_text/__init__.py new file mode 100644 index 0000000..239d2e6 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import tasks, criterions, models # noqa diff --git a/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list b/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list new file mode 100644 index 0000000..02eeac4 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list @@ -0,0 +1,49 @@ +"(Applause) NOISE +"(Laughter) VOICE +"(Laughter)" VOICE +(Applause) NOISE +(Applause). NOISE +(Audience) VOICE +(Audio) NOISE +(Beat) NOISE +(Beatboxing) VOICE +(Beep) NOISE +(Beeps) NOISE +(Cheering) VOICE +(Cheers) VOICE +(Claps) NOISE +(Clicking) NOISE +(Clunk) NOISE +(Coughs) NOISE +(Drums) NOISE +(Explosion) NOISE +(Gasps) VOICE +(Guitar) NOISE +(Honk) NOISE +(Laugher) VOICE +(Laughing) VOICE +(Laughs) VOICE +(Laughter) VOICE +(Laughter). VOICE +(Laughter)... VOICE +(Mumbling) VOICE +(Music) NOISE +(Noise) NOISE +(Recording) VOICE +(Ringing) NOISE +(Shouts) VOICE +(Sigh) VOICE +(Sighs) VOICE +(Silence) NOISE +(Singing) VOICE +(Sings) VOICE +(Spanish) VOICE +(Static) NOISE +(Tones) NOISE +(Trumpet) NOISE +(Video) NOISE +(Video): NOISE +(Voice-over) NOISE +(Whistle) NOISE +(Whistling) NOISE +(video): NOISE diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py b/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py new file mode 100644 index 0000000..7faae73 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "examples.speech_text_joint_to_text.criterions." + criterion_name + ) diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_compound.py b/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_compound.py new file mode 100644 index 0000000..b3a5506 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_compound.py @@ -0,0 +1,181 @@ +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import logging +import math +from dataclasses import dataclass, field + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.ctc import CtcCriterion, CtcCriterionConfig +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterionConfig, +) +from fairseq.logging.meters import safe_round + +from .multi_modality_cross_entropy import SpeechTextPreTrainCrossEntCriterion + +logger = logging.getLogger(__name__) + + +@dataclass +class SpeechTextPreTrainCompoundCriterionConfig( + LabelSmoothedCrossEntropyCriterionConfig +): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + post_process: str = field( + default="none", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + + +@register_criterion( + "speech_text_pretrain_compound", dataclass=SpeechTextPreTrainCompoundCriterionConfig +) +class SpeechTextPreTrainCompoundCriterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + report_accuracy=False, + zero_infinity=False, + post_process=None, + ): + super().__init__(task) + self.xent = SpeechTextPreTrainCrossEntCriterion( + task, sentence_avg, label_smoothing, report_accuracy + ) + cfg_dict = { + "zero_infinity": zero_infinity, + "sentence_avg": sentence_avg, + "post_process": post_process, + } + cfg_ctc = CtcCriterionConfig(**cfg_dict) + self.ctc = CtcCriterion(cfg_ctc, task) + + def forward(self, model, sample, reduce=True): + mode = sample["net_input"]["mode"] + if mode == "sup_speech_ctc": # CTC + sample["net_input"][ + "src_lengths" + ] = None # get downsampled src_lengths from padding_mask + loss, sample_size, logging_output = self.ctc(model, sample, reduce) + logging_output["mode"] = SpeechTextPreTrainCompoundCriterion.mode2value( + "CTC" + ) + else: + loss, sample_size, logging_output = self.xent(model, sample, reduce) + logging_output["mode"] = SpeechTextPreTrainCompoundCriterion.mode2value( + "xent" + ) + + return loss, sample_size, logging_output + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True + + @staticmethod + def mode2value(mode): # make the logging_outputs_can_be_summed = True + if mode == "CTC": + return 907 # prime number + if mode == "xent": + return 887 # prime number + return 0 + + @staticmethod + def value2mode(value): + if value % 907 == 0: + return "CTC" + if value % 887 == 0: + return "xent" + raise ValueError("Unknow mode") + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + def _get_mode(logging_outputs): + mds = [ + SpeechTextPreTrainCompoundCriterion.value2mode(log["mode"]) + for log in logging_outputs + ] + if sum([1 if l != mds[0] else 0 for l in mds]) > 0: + raise ValueError("mode in one mini-batch is expected to be the same!") + return mds[0] + + log_mode = _get_mode(logging_outputs) + if log_mode == "xent": + return SpeechTextPreTrainCrossEntCriterion.reduce_metrics(logging_outputs) + + # ctc loss + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "ctc_loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ctc_ntokens", ntokens) + metrics.log_scalar("ctc_nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "ctc_nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_cross_entropy.py b/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_cross_entropy.py new file mode 100644 index 0000000..6c9cb0f --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/criterions/multi_modality_cross_entropy.py @@ -0,0 +1,101 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import torch + +from fairseq import utils +from fairseq.criterions import register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, + LabelSmoothedCrossEntropyCriterionConfig, + label_smoothed_nll_loss, +) + + +@register_criterion( + "speech_text_pretrain_cross_entropy", + dataclass=LabelSmoothedCrossEntropyCriterionConfig, +) +class SpeechTextPreTrainCrossEntCriterion(LabelSmoothedCrossEntropyCriterion): + def __init__(self, task, sentence_avg, label_smoothing, report_accuracy=False): + super().__init__( + task, sentence_avg, label_smoothing, report_accuracy=report_accuracy + ) + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + loss, nll_loss, nsentences, ntokens, n_correct = self.compute_loss( + model, net_output, sample, reduce=reduce + ) + sample_size = nsentences if self.sentence_avg else ntokens + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + if self.report_accuracy: + logging_output["n_correct"] = utils.item(n_correct) + logging_output["total"] = utils.item(ntokens) + return loss, sample_size, logging_output + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = model.get_targets(sample, net_output) + assert self.ignore_prefix_size == 0 + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs, target + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + n_correct = 0 + if isinstance(target, dict): + t_lprobs = target["target_logprobs"] + + if not lprobs.batch_first: + lprobs = lprobs.transpose(0, 1) + t_lprobs = t_lprobs.transpose(0, 1) + nsentences, seq_len = lprobs.size()[:2] + ntokens = nsentences * seq_len + t_probs = t_lprobs.exp() + mask_indices = ( + net_output[1]["mask_indices"][0] + if len(net_output[1]["mask_indices"]) > 0 + else None + ) + + # mask_indices is True for those masking frames + if mask_indices is not None: # B X T + t_probs = t_probs.masked_fill(mask_indices.eq(False).unsqueeze(-1), 0) + ntokens = mask_indices.int().sum() + t_probs = t_probs.detach() + t_lprobs = t_lprobs.detach() + loss = ( + -(t_probs * (lprobs - t_lprobs)).sum() + if reduce + else -(t_probs * (lprobs - t_lprobs)).sum(-1, keepdim=True) + ) + nll_loss = loss + else: + nsentences = target.size(0) + mask = target.ne(self.padding_idx) + loss, nll_loss = label_smoothed_nll_loss( + lprobs.view(-1, lprobs.size(-1)), + target.view(-1), + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + n_correct = torch.sum( + lprobs.argmax(-1).masked_select(mask).eq(target.masked_select(mask)) + ) + ntokens = torch.sum(mask) + return loss, nll_loss, nsentences, ntokens, n_correct diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py b/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py new file mode 100644 index 0000000..fd6ff15 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py @@ -0,0 +1,224 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.logging import metrics + + +@register_criterion("guided_label_smoothed_cross_entropy_with_accuracy") +class GuidedCrossEntAccCriterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + guide_alpha, + text_input_cost_ratio, + label_smoothing, + disable_text_guide_update_num=0, + attentive_cost_regularization=0, + ): + """ + guide_alpha: alpha to inteplate nll and kd loss + text_input_cost_ratio: loss ratio for text only input data + label_smoothing: label smoothing ratio + disable_text_guide_update_num: only use nll loss for the first N updates + attentive_cost_regularization: ratio fo attentive cost + """ + super().__init__(task) + self.alpha = guide_alpha + self.attn_beta = attentive_cost_regularization + self.sentence_avg = sentence_avg + self.eps = label_smoothing + self.text_input_cost_ratio = text_input_cost_ratio + self.disable_update_num = disable_text_guide_update_num + assert self.alpha >= 0 and self.alpha <= 1.0 + + @staticmethod + def add_args(parser): + """Add criterion-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--label-smoothing', default=0., type=float, metavar='D', + help='epsilon for label smoothing, 0 means no label smoothing') + # fmt: off + parser.add_argument('--guide-alpha', default=0., type=float, metavar='D', + help='alpha to merge kd cost from text to speech input with ce loss') + # fmt: off + parser.add_argument('--disable-text-guide-update-num', default=0, type=int, metavar='D', + help='disable guided target from text for the first N updates.') + parser.add_argument("--attentive-cost-regularization", default=0.0, type=float, metavar='D', + help="use encoder attentive loss regularization with cost ratio D") + parser.add_argument("--attentive-cost-without-normalize", action='store_true', + help="Don't do normalization during attentive cost computation") + + def forward(self, model, sample, reduce=True): + reduction = 'sum' if reduce else 'none' + net_input = sample["net_input"] + net_output = model(**net_input) + attn_cost = None + lprobs = model.get_normalized_probs(net_output, log_probs=True) + is_dual_input = True if net_input['src_tokens'] is not None and net_input.get('src_txt_tokens') is not None else False + target = model.get_targets(sample, net_output) + src_token_num = 0 + if is_dual_input: + # lprobs_spch from speech encoder and lprobs_text from text encoder + lprobs_spch, lprobs_text = torch.chunk(lprobs, 2) + lprobs_spch.batch_first = lprobs.batch_first + lprobs_text.batch_first = lprobs.batch_first + + speech_loss, speech_nll_loss, speech_correct, speech_total = \ + self.guide_loss_and_acc(model, lprobs_spch, lprobs_text, target, reduce=(reduction == 'sum')) + text_loss, text_nll_loss, text_correct, text_total = self.compute_loss_and_acc(model, lprobs_text, target, reduction=reduction) + loss = (speech_loss + text_loss) + nll_loss = (speech_nll_loss + text_nll_loss) + correct = speech_correct + text_correct + total = speech_total + text_total + + attn_cost = net_output[1].get('attn_cost') + if attn_cost is not None: + # attn_cost is batch_first and padding tokens have been masked already + src_token_num = attn_cost.ne(0).sum() + attn_cost = attn_cost.sum() + loss = loss + attn_cost * self.attn_beta + else: + attn_cost = 0 + else: + loss, nll_loss, correct, total = self.compute_loss_and_acc(model, lprobs, target, reduction=reduction) + if sample["net_input"]['src_tokens'] is None: # text input only + loss = loss * self.text_input_cost_ratio + speech_loss = None + speech_nll_loss = None + + sample_size, logging_output = self.get_logging_output( + sample, loss, nll_loss, correct, total, src_token_num, speech_loss, speech_nll_loss, attn_cost, is_dual_input + ) + return loss, sample_size, logging_output + + def compute_loss_and_acc(self, model, lprobs, target, reduction='sum'): + if not lprobs.batch_first: + lprobs = lprobs.transpose(0, 1) + lprobs = lprobs.view(-1, lprobs.size(-1)) # -> (B x T) x C + target = target.view(-1) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, target, self.eps, ignore_index=self.padding_idx, reduce=(reduction == 'sum'), + ) + + mask = target.ne(self.padding_idx) + correct = torch.sum(lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask))) + total = torch.sum(mask) + return loss, nll_loss, correct, total + + def guide_loss_and_acc(self, model, lprobs, lprobs_teacher, target, reduce=True): + """ lprobs_teacher is used as guide for lprobs """ + if self.alpha == 0.0 or model.num_updates < self.disable_update_num: + return self.compute_loss_and_acc(model, lprobs, target, reduction=('sum' if reduce else 'none')) + if not lprobs.batch_first: + lprobs = lprobs.transpose(0, 1) + lprobs_teacher = lprobs_teacher.transpose(0, 1) + + lprobs = lprobs.view(-1, lprobs.size(-1)).float() # -> (B x T) x C + lprobs_teacher = lprobs_teacher.view(-1, lprobs_teacher.size(-1)).float() # -> (B x T) x C + target = target.view(-1) + loss = F.nll_loss(lprobs, target, ignore_index=self.padding_idx, reduction='sum' if reduce else 'none') + nll_loss = loss + probs_teacher = lprobs_teacher.exp().masked_fill_(target.unsqueeze(-1).eq(self.padding_idx), 0) + probs_teacher = probs_teacher.detach() + guide_loss = -(probs_teacher*lprobs).sum() if reduce else -(probs_teacher*lprobs).sum(-1, keepdim=True) + loss = self.alpha*guide_loss + (1.0 - self.alpha)*loss + + mask = target.ne(self.padding_idx) + correct = torch.sum(lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask))) + total = torch.sum(mask) + return loss, nll_loss, correct, total + + def get_logging_output( + self, + sample, + loss, + nll_loss, + correct, + total, + src_token_num=0, + speech_loss=None, + speech_nll_loss=None, + attn_cost=None, + is_dual_input=False, + ): + + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + mul_size = 2 if is_dual_input else 1 + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "nll_loss": utils.item(nll_loss.data), # * sample['ntokens'], + "ntokens": sample["ntokens"]*mul_size, + "nsentences": sample["target"].size(0)*mul_size, + "sample_size": sample_size*mul_size, + "correct": utils.item(correct.data), + "total": utils.item(total.data), + "src_token_num": utils.item(src_token_num.data) if src_token_num > 0 else 0, + "nframes": torch.sum(sample["net_input"]["src_lengths"]).item(), + } + + if speech_loss is not None: + logging_output["speech_loss"] = utils.item(speech_loss.data) + logging_output["speech_nll_loss"] = utils.item(speech_nll_loss.data) + logging_output["sample_size_speech_cost"] = sample_size + logging_output["speech_attn_loss"] = attn_cost + + return sample_size*mul_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + correct_sum = sum(log.get("correct", 0) for log in logging_outputs) + total_sum = sum(log.get("total", 0) for log in logging_outputs) + src_token_sum = sum(log.get("src_token_num", 0) for log in logging_outputs) + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + nframes = sum(log.get("nframes", 0) for log in logging_outputs) + speech_loss_sum = sum(log.get("speech_loss", 0) for log in logging_outputs) + speech_nll_loss_sum = sum(log.get("speech_nll_loss", 0) for log in logging_outputs) + speech_attn_loss_sum = sum(log.get("speech_attn_loss", 0) for log in logging_outputs) + sample_size_speech = sum(log.get("sample_size_speech_cost", 0) for log in logging_outputs) + + agg_output = { + "loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0, + "nll_loss": nll_loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0, + # if args.sentence_avg, then sample_size is nsentences, and loss + # is per-sentence loss; else sample_size is ntokens, and the loss + # becomes per-output token loss + "speech_loss": speech_loss_sum / sample_size_speech / math.log(2) if sample_size_speech > 0 else 0.0, + "speech_nll_loss": speech_nll_loss_sum / sample_size_speech / math.log(2) if sample_size_speech > 0 else 0.0, + "speech_attn_loss": speech_attn_loss_sum / src_token_sum / math.log(2) if src_token_sum > 0 else 0.0, + "ntokens": ntokens, + "nsentences": nsentences, + "nframes": nframes, + "sample_size": sample_size, + "acc": correct_sum * 100.0 / total_sum if total_sum > 0 else 0.0, + "correct": correct_sum, + "total": total_sum, + "src_token_num": src_token_sum, + # total is the number of validate tokens + } + return agg_output + + @classmethod + def reduce_metrics(cls, logging_outputs): + """Aggregate logging outputs from data parallel training.""" + agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs) + for k, v in agg_logging_outputs.items(): + if k in {'nsentences', 'ntokens', 'sample_size'}: + continue + metrics.log_scalar(k, v, round=3) diff --git a/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md b/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md new file mode 100644 index 0000000..1acf6e0 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md @@ -0,0 +1,118 @@ +[[Back]](..) + +# Joint Speech Text Training for the MuST-C English to German Speech Translation task + +Joint Training Baseline: it is based on paper ["A general multi-task learning framework to leverage text data for speech to text tasks"](https://arxiv.org/pdf/2010.11338.pdf) + +Enhanced Joint Training: the joint training is enhanced with pre-trained models, cross attentive regularization and online knowledge distillation based on paper ["Improving Speech Translation by Understanding and Learning from the Auxiliary Text Translation Task"](https://research.fb.com/publications/improving-speech-translation-by-understanding-and-learning-from-the-auxiliary-text-translation-task) + +## Prepare Data +#### Download files +- Sentence piece model [spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/spm.model) +- Dictionary [dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/dict.txt) +- config [config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/config.yaml) +#### Prepare MuST-C data set +- Please follow the data preparation in the [S2T example](https://github.com/pytorch/fairseq/blob/main/examples/speech_to_text/docs/mustc_example.md) +- Convert source text under the "src_text" column in the tsv file into phoneme representation. +```bash + python examples/speech_text_joint_to_text/scripts/g2p_encode.py \ + --lower-case --do-filter --use-word-start --no-punc \ + --reserve-word examples/speech_text_joint_to_text/configs/mustc_noise.list \ + --data-path ${must_c_en_de_src_text} \ + --out-path ${must_c_en_de_src_text_pho} +``` +- Replace the source text under the "src_text" column in the tsv file with the corresponding phoneme reprentation generated in the step above. +Below is the snapshot for the MuST-C en-de dev tsv +``` +id audio n_frames tgt_text src_text speaker +ted_767_0 en-de/flac.zip:10071514743:48445 56160 Heute spreche ich zu Ihnen über Energie und Klima. ▁AY1 M ▁G OW1 IH0 NG ▁T UW1 ▁T AO1 K ▁T AH0 D EY1 ▁AH0 B AW1 T ▁EH1 N ER0 JH IY0 ▁AH0 N D ▁K L AY1 M AH0 T spk.767_ +ted_767_1 en-de/flac.zip:1214217978:205678 226080 Und das überrascht vielleicht etwas, weil sich meine Vollzeitbeschäftigung bei der Stiftung hauptsächlich um Impfstoffe und Saatgut dreht, um die Dinge, die wir erfinden und liefern müssen um den ärmsten 2 Milliarden ein besseres Leben zu ermöglichen. ▁AH0 N D ▁DH AE1 T ▁M AY1 T ▁S IY1 M ▁AH0 ▁B IH1 T ▁S ER0 P R AY1 Z IH0 NG ▁B IH0 K AO1 Z ▁M AY1 ▁F UH1 L ▁T AY1 M ▁W ER1 K ▁AE1 T ▁DH AH0 ▁F AW0 N D EY1 SH AH0 N ▁IH1 Z ▁M OW1 S T L IY0 ▁AH0 B AW1 T ▁V AE2 K S IY1 N Z ▁AH0 N D ▁S IY1 D Z ▁AH0 B AW1 T ▁DH AH0 ▁TH IH1 NG Z ▁DH AE1 T ▁W IY1 ▁N IY1 D ▁T UW1 ▁IH0 N V EH1 N T ▁AH0 N D ▁D IH0 L IH1 V ER0 ▁T UW1 ▁HH EH1 L P ▁DH AH0 ▁P UH1 R IH0 S T ▁T UW1 ▁B IH1 L Y AH0 N ▁L AY1 V ▁B EH1 T ER0 ▁L IH1 V Z spk.767_ +``` +- Prepare phoneme dictionary and save to $MANIFEST_ROOT as [src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/src_dict.txt) +#### Prepare WMT text data +- [Download wmt data](https://github.com/pytorch/fairseq/blob/main/examples/translation/prepare-wmt14en2de.sh) +- Convert source text (English) into phoneme representation as above +- Generate binary parallel files with "fairseq-preprocess" from fairseq for training and validation. The source input is English phoneme representation and the target input is German sentencepiece token . The output is saved under $parallel_text_data + +## Training +The model is trained with 8 v100 GPUs. + +#### Download pretrained models +- [pretrain_encoder](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_joint_asr_transformer_m.pt) +- [pretrain_nmt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/checkpoint_mt.pt) + +#### Training scripts +- Jointly trained model from scratch +```bash +python train.py ${MANIFEST_ROOT} \ + --save-dir ${save_dir} \ + --num-workers 8 \ + --task speech_text_joint_to_text \ + --arch dualinputs2ttransformer_s \ + --user-dir examples/speech_text_joint_to_text \ + --max-epoch 100 --update-mix-data \ + --optimizer adam --lr-scheduler inverse_sqrt \ + --lr 0.001 --update-freq 4 --clip-norm 10.0 \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy \ + --label-smoothing 0.1 --max-tokens 10000 --max-tokens-text 10000 \ + --max-positions-text 400 --seed 2 --speech-encoder-layers 12 \ + --text-encoder-layers 6 --encoder-shared-layers 6 --decoder-layers 6 \ + --dropout 0.1 --warmup-updates 20000 \ + --text-sample-ratio 0.25 --parallel-text-data ${parallel_text_data} \ + --text-input-cost-ratio 0.5 --enc-grad-mult 2.0 --add-speech-eos \ + --log-format json --langpairs en-de --noise-token '"'"'▁NOISE'"'"' \ + --mask-text-ratio 0.0 --max-tokens-valid 20000 --ddp-backend no_c10d \ + --log-interval 100 --data-buffer-size 50 --config-yaml config.yaml \ + --keep-last-epochs 10 +``` +- Jointly trained model with good initialization, cross attentive loss and online knowledge distillation +```bash +python train.py ${MANIFEST_ROOT} \ + --save-dir ${save_dir} \ + --num-workers 8 \ + --task speech_text_joint_to_text \ + --arch dualinputs2ttransformer_m \ + --user-dir examples/speech_text_joint_to_text \ + --max-epoch 100 --update-mix-data \ + --optimizer adam --lr-scheduler inverse_sqrt \ + --lr 0.002 --update-freq 4 --clip-norm 10.0 \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy \ + --guide-alpha 0.8 --disable-text-guide-update-num 5000 \ + --label-smoothing 0.1 --max-tokens 10000 --max-tokens-text 10000 \ + --max-positions-text 400 --seed 2 --speech-encoder-layers 12 \ + --text-encoder-layers 6 --encoder-shared-layers 6 --decoder-layers 6 \ + --dropout 0.1 --warmup-updates 20000 --attentive-cost-regularization 0.02 \ + --text-sample-ratio 0.25 --parallel-text-data ${parallel_text_data} \ + --text-input-cost-ratio 0.5 --enc-grad-mult 2.0 --add-speech-eos \ + --log-format json --langpairs en-de --noise-token '"'"'▁NOISE'"'"' \ + --mask-text-ratio 0.0 --max-tokens-valid 20000 --ddp-backend no_c10d \ + --log-interval 100 --data-buffer-size 50 --config-yaml config.yaml \ + --load-pretrain-speech-encoder ${pretrain_encoder} \ + --load-pretrain-decoder ${pretrain_nmt} \ + --load-pretrain-text-encoder-last ${pretrain_nmt} \ + --keep-last-epochs 10 +``` + +## Evaluation +```bash +python ./fairseq_cli/generate.py \ + ${MANIFEST_ROOT} \ + --task speech_text_joint_to_text \ + --max-tokens 25000 \ + --nbest 1 \ + --results-path ${infer_results} \ + --batch-size 512 \ + --path ${model} \ + --gen-subset tst-COMMON_st \ + --config-yaml config.yaml \ + --scoring sacrebleu \ + --beam 5 --lenpen 1.0 \ + --user-dir examples/speech_text_joint_to_text \ + --load-speech-only +``` + +## Results (Joint training with initialization + CAR + online KD) +|Direction|En-De | En-Es | En-Fr | +|---|---|---|---| +|BLEU|27.4| 31.2 | 37.6 | +|checkpoint | [link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/checkpoint_ave_10.pt) |[link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_es/checkpoint_ave_10.pt)|[link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_fr/checkpoint_ave_10.pt)| diff --git a/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md b/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md new file mode 100644 index 0000000..0af0fbf --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md @@ -0,0 +1,76 @@ +[[Back]](..) + +# Joint Speech Text Training for the 2021 IWSLT multilingual speech translation + +This directory contains the code from paper ["FST: the FAIR Speech Translation System for the IWSLT21 Multilingual Shared Task"](https://arxiv.org/pdf/2107.06959.pdf). + +## Prepare Data +#### Download files +- Sentence piece model [spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/spm.model) +- Dictionary [tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/dict.txt) +- Config [config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/config.yaml) + +#### Prepare +- Please follow the data preparation in [speech-to-text](https://github.com/pytorch/fairseq/blob/main/examples/speech_to_text/docs/mtedx_example.md) with option "--use-audio-input" for raw audio tsv files. +- Prepare tsv files with phoneme based source text (under column 'src_text') as [MuST-C](ende-mustc.md) example. + + +## Training + +#### Download pretrained models +- [Pretrained mbart model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/mbart.pt) +- [Pretrained w2v model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/xlsr_53_56k.pt) + + +#### Training scripts + +```bash +python train.py ${MANIFEST_ROOT} \ + --save-dir ${save_dir} \ + --user-dir examples/speech_text_joint_to_text \ + --train-subset train_es_en_tedx,train_es_es_tedx,train_fr_en_tedx,train_fr_es_tedx,train_fr_fr_tedx,train_it_it_tedx,train_pt_en_tedx,train_pt_pt_tedx \ + --valid-subset valid_es_en_tedx,valid_es_es_tedx,valid_es_fr_tedx,valid_es_it_tedx,valid_es_pt_tedx,valid_fr_en_tedx,valid_fr_es_tedx,valid_fr_fr_tedx,valid_fr_pt_tedx,valid_it_en_tedx,valid_it_es_tedx,valid_it_it_tedx,valid_pt_en_tedx,valid_pt_es_tedx,valid_pt_pt_tedx \ + --config-yaml config.yaml --ddp-backend no_c10d \ + --num-workers 2 --task speech_text_joint_to_text \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy \ + --label-smoothing 0.3 --guide-alpha 0.8 \ + --disable-text-guide-update-num 5000 --arch dualinputxmtransformer_base \ + --max-tokens 500000 --max-sentences 3 --max-tokens-valid 800000 \ + --max-source-positions 800000 --enc-grad-mult 2.0 \ + --attentive-cost-regularization 0.02 --optimizer adam \ + --clip-norm 1.0 --log-format simple --log-interval 200 \ + --keep-last-epochs 5 --seed 1 \ + --w2v-path ${w2v_path} \ + --load-pretrained-mbart-from ${mbart_path} \ + --max-update 1000000 --update-freq 4 \ + --skip-invalid-size-inputs-valid-test \ + --skip-encoder-projection --save-interval 1 \ + --attention-dropout 0.3 --mbart-dropout 0.3 \ + --finetune-w2v-params all --finetune-mbart-decoder-params all \ + --finetune-mbart-encoder-params all --stack-w2v-mbart-encoder \ + --drop-w2v-layers 12 --normalize \ + --lr 5e-05 --lr-scheduler inverse_sqrt --warmup-updates 5000 +``` + +## Evaluation +```bash +python ./fairseq_cli/generate.py + ${MANIFEST_ROOT} \ + --task speech_text_joint_to_text \ + --user-dir ./examples/speech_text_joint_to_text \ + --load-speech-only --gen-subset test_es_en_tedx \ + --path ${model} \ + --max-source-positions 800000 \ + --skip-invalid-size-inputs-valid-test \ + --config-yaml config.yaml \ + --infer-target-lang en \ + --max-tokens 800000 \ + --beam 5 \ + --results-path ${RESULTS_DIR} \ + --scoring sacrebleu +``` +The trained model can be downloaded [here](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/checkpoint17.pt) + +|direction|es_en|fr_en|pt_en|it_en|fr_es|pt_es|it_es|es_es|fr_fr|pt_pt|it_it| +|---|---|---|---|---|---|---|---|---|---|---|---| +|BLEU|31.62|36.93|35.07|27.12|38.87|35.57|34.13|74.59|74.64|70.84|69.76| diff --git a/fairseq/examples/speech_text_joint_to_text/docs/pre-training.md b/fairseq/examples/speech_text_joint_to_text/docs/pre-training.md new file mode 100644 index 0000000..6d9e2cb --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/docs/pre-training.md @@ -0,0 +1,192 @@ +[[Back]](..) + +# Unified Speech-Text Pre-training for Speech Translation and Recognition + +This directory contains the pre-training recipes from paper ["Unified Speech-Text Pre-training for Speech Translation and Recognition"](https://arxiv.org/abs/2204.05409). + +## Librispeech ASR Pre-training +### Prepare Data +#### Download files +#### Prepare pre-training data +- Text to text task (T2T): prepare the binary data following the similar steps in [EN_DE Joint training](./ende-mustc.md). The source data is presented as phomeme token sequence and the target data is coded as subword tokens via SentencePiece. The text data is downloaded from [openslr](https://www.openslr.org/12) +- Self-supervised speech learning task (SSL): The data is prepared as [wav2vec 2.0](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec/README.md) +- Speech to phoneme classification task (S2P): The tsv file contains 5 fields: "id", "audio", "n_frames", "tgt_text", and "align". The tgt_text field is corresponding to the phoneme based representation of the speech data. "align" field contains the alignment information. The phoneme level forced alignment for the labelled speech data (i.e. Librispeech) can be obtained via [kaldi](http://kaldi-asr.org) or [MFA](https://montrealcorpustools.github.io/Montreal-Forced-Aligner/). The segmentation information is normalized to 0$\sim$1 for the whole utterance. The snapshot of the tsv file is below: +``` +id audio n_frames tgt_text align +116-288045-0000 /librispeech/dev-other/116/288045/116-288045-0000.flac 170400 <sil> ▁AE1 Z AY1 ▁AH0 P R OW1 CH T ▁DH AH1 ▁S IH1 T IY0 <sil> AY1 ▁HH ER1 D ▁B EH1 L Z ▁R IH1 NG IH0 NG <sil> ▁AE1 N D AH0 ▁L IH1 T AH0 L ▁L EY1 T ER0 AY1 ▁F AW1 N D ▁DH AH0 ▁S T R IY1 T S ▁AH0 S T IH1 R ▁W IH0 TH ▁TH R AO1 NG Z ▁AH0 V ▁W EH1 L ▁D R EH1 S T ▁P IY1 P AH0 L ▁IH1 N ▁F AE1 M L IY0 ▁G R UW1 P S <sil> ▁W EH1 N D IH0 NG ▁DH EH1 R ▁W EY1 <sil> ▁HH IH1 DH ER0 ▁AH0 N D ▁TH IH1 DH ER0 <sil> 0.047977 0.056444 0.064911 0.075259 0.081844 0.089370 0.095014 0.104421 0.109125 0.111947 0.115710 0.120414 0.134525 0.141110 0.143932 0.174036 0.176858 0.190028 0.199436 0.207902 0.218250 0.224835 0.231421 0.242709 0.251176 0.257761 0.263405 0.268109 0.270931 0.290687 0.342427 0.349953 0.353716 0.356538 0.360301 0.363123 0.365945 0.368768 0.371590 0.376294 0.384760 0.394167 0.401693 0.409219 0.419567 0.430856 0.441204 0.444026 0.446849 0.449671 0.456256 0.463782 0.471308 0.477893 0.486359 0.491063 0.494826 0.501411 0.512700 0.517404 0.520226 0.534337 0.540922 0.545626 0.550329 0.559737 0.568203 0.583255 0.592662 0.600188 0.603951 0.611477 0.619003 0.624647 0.634055 0.639699 0.646284 0.653810 0.659454 0.664158 0.670743 0.682032 0.687676 0.692380 0.708373 0.713076 0.719661 0.729069 0.740357 0.744120 0.748824 0.752587 0.761994 0.770461 0.781750 0.790216 0.805268 0.808090 0.823142 0.832549 0.836312 0.840075 0.843838 0.851364 0.854186 0.857008 0.862653 0.878645 0.898401 0.901223 0.906867 0.913452 0.920038 0.926623 0.934149 0.939793 0.942615 0.945437 0.952023 0.957667 0.977422 1.000000 + +``` +- Speech to text task (S2T): The data preparation follow the steps in [EN_DE Joint training](./ende-mustc.md). + +#### Prepare fine-tuning data: +We re-use the data from T2T and S2T tasks in the fine-tuning stage. + +### Model Build +#### Pre-training +``` +python train.py $T2T_DATA \ + --save-dir $SAVE_PRE_PATH --user-dir examples/speech_text_joint_to_text --task speech_text_joint_denoising \ + --criterion speech_text_pretrain_cross_entropy --optimizer adam --weight-decay 0.01 --config-yaml config_s2p.yaml --config-s2s-yaml config.yaml --ddp-backend no_c10d \ + --lang-pairs pho-wrd --num-workers 4 --log-interval 500 --save-interval-updates 5000 --keep-interval-updates 1 --no-emb-update-unsup --report-accuracy --lr 0.001 --end-learning-rate 1e-06 \ + --lr-scheduler polynomial_decay --warmup-updates 10000 --total-num-update 800000 --update-freq 6 --validate-interval-updates 10000 --train-subset train \ + --valid-subset valid,valid_sup_speech,valid_sup_speech_s2s,valid_unsup_speech --dataset-impl mmap \ + --sup-speech-data $S2P_DATA_PATH --sup-speech-train-subset train_960.ali --sup-speech-valid-subset dev-clean.ali --sup-speech-s2s-data $S2T_DATA_PATH \ + --sup-speech-s2s-train-subset train --sup-speech-s2s-valid-subset dev-clean --unsup-speech-train-data $SSL_DATA_PATH/train.tsv --unsup-speech-valid-data $SSL_DATA_PATH/valid.tsv \ + --batch-size 200 --batch-size-valid 150 --max-source-positions 1024 --max-target-positions 1024 --max-text-tokens 3072 --max-speech-positions 600000 \ + --max-sample-size 750000 --min-sample-size 64000 --max-speech-tokens 750000 --max-tokens-valid 750000 --skip-invalid-size-inputs-valid-test \ + --unsupervised-speech-sample-ratio 3.0 --supervised-speech-sample-ratio 5 --supervised-speech-s2s-sample-ratio 5 --text-sample-ratio 1.0 --mask 0.3 --mask-random 0.1 \ + --mask-length span-poisson --speech-sup-mask-prob 0.3 --speech-unsup-mask-prob 0.7 --use-mask-whole-words --arch speech_text_pretrain_bart_base_stack \ + --no-scale-feature --activation-fn gelu --speech-extractor-mode default --stacked-encoder all --encoder-normalize-before --decoder-normalize-before \ + --encoder-learned-pos --decoder-learned-pos --dropout 0.1 --load-pretrained-mbart-encoder-from $BART --load-pretrained-mbart-decoder-from $BART +``` +The current implementation also supports model pre-training without the forced alignment supervised data. In this case, CTC is used to optimize the S2P task. We need to do following changes for the setting: +1. options to be added +``` +--use-sup-speech-ctc --criterion speech_text_pretrain_compound +``` +2. options to be deleted +``` +--same-data-update --criterion speech_text_pretrain_cross_entropy +``` +However, we find the CTC based pre-training is still worse than the forced alignment based setting. It could be partially due to the inferior pre-training setting that we re-use the forced alignment based pre-training setting for the CTC based pre-training. + +#### Fine-tuning +``` +python train.py $S2T_DATA_PATH \ + --save-dir $SAVE_FT_PATH --num-workers 8 --task speech_text_joint_to_text --arch dualinputs2twavtransformer_base_stack \ + --user-dir examples/speech_text_joint_to_text --max-update 100000 --optimizer adam --lr-scheduler inverse_sqrt --lr 0.0003 --update-freq 3 --clip-norm 10.0 \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy --guide-alpha 0.8 --label-smoothing 0.1 --warmup-updates 20000 --attentive-cost-regularization 0.02 \ + --enc-grad-mult 2.0 --max-tokens 800000 --max-source-positions 800000 --max-tokens-text 10000 --max-positions-text 1024 --max-target-positions 1024 --no-scale-feature \ + --activation-fn gelu --load-pretrained-speech-text-encoder $SAVE_PRE_PATH/checkpoint_last.pt --load-pretrained-speech-text-decoder $SAVE_PRE_PATH/checkpoint_last.pt \ + --encoder-normalize-before --decoder-normalize-before --speech-extractor-mode default --speech-mask-channel-length 64 --speech-mask-channel-prob 0.5 \ + --speech-mask-length 10 --speech-mask-prob 0.65 --text-sample-ratio 0.25 --mask-text-ratio 0.3 --mask-text-type random --parallel-text-data text_bin \ + --text-input-cost-ratio 0.5 --langpairs pho-wrd --update-mix-data --log-format json --max-tokens-valid 800000 --ddp-backend no_c10d --log-interval 500 \ + --config-yaml config.yaml --skip-invalid-size-inputs-valid-test --keep-last-epochs 50 --layernorm-embedding --encoder-learned-pos --decoder-learned-pos +``` + +### Evaluation +The last 10 epoch models from fine-tuning is conducted model average to get $FINAL_MODEL +``` +python ./fairseq_cli/generate.py \ + $S2T_DATA_PATH \ + --task speech_text_joint_to_text \ + --max-tokens 800000 \ + --max-source-positions 800000 \ + --nbest 1 \ + --results-path $RESULTS_LOG \ + --batch-size 512 \ + --path $FINAL_MODEL \ + --gen-subset $SUBSET \ + --config-yaml config.yaml \ + --scoring wer \ + --beam 10 --lenpen 1.0 examples/speech_text_joint_to_text \ + --user-dir examples/speech_text_joint_to_text --load-speech-only \ + --model-overrides {'load_pretrained_speech_text_decoder':'','load_pretrained_speech_text_encoder':''} +``` + +### Results and models +| | dev-clean | dev-other | test-clean | test-other | +|---|---|---|---|---| +| WER| 2.0 | 4.4 | 2.1 |4.6 | + +**Model Links**: +- [config_s2p.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/pretrain/config_s2p.yaml): Config for S2P +- [spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned/spm.model): Sentence Piece model +- [src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned/src_dict.txt): Source Phoneme Dictionary +- [tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned/tgt_dict.txt): Target Sentence Piece Dictionary +- [config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned/config.yaml): Config for S2T +- [BART](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/pretrain/bart.pt): trained from Librispeech text data +- [Joint Pre-trained model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/pretrain/checkpoint6.pt): model pre-trained with 960 hours Librispeech data (S2P, S2T) Librispeech text training data (T2T) and Librilight data (SSL) +- [Fine-tuned model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned/checkpoint_ave_10.pt): the pre-trained model is fined one 960 hours Librispeech speech and text data. (S2T + T2T) + +## MuST-C +### Prepare Data +Compared with the ASR Librispeech ASR recipe, the differences are below: +- Replace the speech data with corresponding MuST-C data +- Parallel text data from WMT is replaced the Librispeech text data + +### Model Build +#### Pre-training +EN-DE is used as an example +``` +python train.py $TXT_DATA \ + --save-dir $SAVE_PRE_PATH --user-dir examples/speech_text_joint_to_text --task speech_text_joint_denoising --criterion speech_text_pretrain_cross_entropy --optimizer adam --weight-decay 0.01 \ + --config-yaml config_s2p.yaml --config-s2s-yaml config.yaml --ddp-backend no_c10d --lang-pairs-bitext en-fr --num-workers 4 --log-interval 500 --save-interval-updates 5000 --keep-interval-updates 1 \ + --no-emb-update-unsup --use-decoder-output-proj --report-accuracy --lr 0.001 --end-learning-rate 1e-06 --lr-scheduler polynomial_decay --warmup-updates 10000 --total-num-update 800000 \ + --update-freq 8 --validate-interval-updates 10000 --train-subset train --valid-subset valid_sup_speech,valid_sup_speech_s2s,valid_unsup_speech --dataset-impl mmap \ + --sup-speech-data $S2P_DATA_PATH --sup-speech-train-subset train --sup-speech-valid-subset dev --sup-speech-s2s-data $S2T_DATA_PATH --sup-speech-s2s-train-subset train \ + --sup-speech-s2s-valid-subset dev --unsup-speech-train-data $SSL_DATA_PATH/train.tsv --unsup-speech-valid-data $SSL_DATA_PATH/valid.tsv --batch-size 200 --batch-size-valid 100 \ + --max-source-positions 1024 --max-target-positions 1024 --max-text-tokens 2048 --max-speech-positions 600000 --max-sample-size 600000 --min-sample-size 64000 \ + --max-speech-tokens 600000 --max-tokens-valid 600000 --skip-invalid-size-inputs-valid-test --unsupervised-speech-sample-ratio 1.2 --supervised-speech-sample-ratio 10 \ + --supervised-speech-s2s-sample-ratio 10 --bitext-sample-ratio 0.5 --mask 0.3 --mask-random 0.1 --mask-length span-poisson --speech-sup-mask-prob 0.3 \ + --speech-unsup-mask-prob 0.7 --use-mask-whole-words --arch speech_text_pretrain_bart_base_stack --no-scale-feature --activation-fn gelu --speech-extractor-mode default \ + --stacked-encoder s2s --encoder-normalize-before --decoder-normalize-before --encoder-learned-pos --decoder-learned-pos --dropout 0.1 \ + --load-pretrained-mbart-encoder-from $EN_FR_NMT --load-pretrained-mbart-decoder-from $EN_FR_NMT +``` +#### Fine-tuning +``` +python train.py $S2T_DATA_PATH \ + --save-dir $SAVE_FT_PATH --num-workers 8 --task speech_text_joint_to_text --arch dualinputs2twavtransformer_base_stack --user-dir examples/speech_text_joint_to_text \ + --max-epoch 25 --update-mix-data --optimizer adam --lr-scheduler inverse_sqrt --lr 0.0003 --update-freq 4 --clip-norm 10.0 --warmup-updates 20000 \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy --guide-alpha 0.8 --attentive-cost-regularization 0.02 --enc-grad-mult 2.0 --label-smoothing 0.1 \ + --max-tokens 800000 --max-source-positions 800000 --max-tokens-text 10000 --max-positions-text 1024 --load-pretrained-speech-text-encoder $SAVE_PRE_PATH/checkpoint_last.pt \ + --load-pretrained-speech-text-decoder $SAVE_PRE_PATH/checkpoint_last.pt --speech-mask-channel-length 64 --speech-mask-channel-prob 0.5 --speech-mask-length 10 \ + --speech-mask-prob 0.65 --text-sample-ratio 0.05 --mask-text-ratio 0.3 --mask-text-type random --parallel-text-data data-bin-wt --text-input-cost-ratio 0.5 \ + --langpairs en-fr --log-format json --max-tokens-valid 800000 --ddp-backend no_c10d --log-interval 100 --config-yaml config.yaml --skip-invalid-size-inputs-valid-test \ + --noise-token '▁NOISE' --keep-last-epochs 40 --layernorm-embedding --encoder-learned-pos --decoder-learned-pos --activation-fn gelu \ + --speech-extractor-mode default --max-target-positions 1024 --encoder-normalize-before --decoder-normalize-before +``` + +### Evaluation +The last 10 epoch models from fine-tuning is conducted model average to get $FINAL_MODEL +``` +python fairseq_cli/generate.py \ + $S2T_DATA_PATH \ + --task speech_text_joint_to_text \ + --nbest 1 \ + --max-tokens 800000 \ + --max-source-positions 800000 \ + --results-path $RESULTS_LOG \ + --batch-size 512 \ + --path $FINAL_MODEL \ + --gen-subset $SUBSET \ + --config-yaml config.yaml \ + --scoring sacrebleu \ + --beam 10 --lenpen 1.0 examples/speech_text_joint_to_text \ + --user-dir examples/speech_text_joint_to_text --load-speech-only \ + --model-overrides {'load_pretrained_speech_text_decoder':'','load_pretrained_speech_text_encoder':''} +``` + + +### Results and models +| | en-fr | en-es | en-de | +|---|---|---|---| +| BLEU| 39.7 | 33.2 |29.2 | + + +**Model Links**: +1. DE + - [de config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/config.yaml) + - [de src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/src_dict.txt) + - [de tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/tgt_dict.txt) + - [de spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/spm.model) + - [de pre-trained nmt model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/nmt.pt) + - [de pre-trained model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/checkpoint_pretraing.pt) + - [de fine-tuned model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/de/checkpoint_finetune_ave10.pt) +2. ES + - [es config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/config.yaml) + - [es src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/src_dict.txt) + - [es tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/tgt_dict.txt) + - [es spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/spm.model) + - [es pre-trained nmt model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/nmt.pt) + - [es pre-trained model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/checkpoint_pretraing.pt) + - [es fine-tuned model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/es/checkpoint_finetune_ave10.pt) +3. FR + - [fr config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/config.yaml) + - [fr src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/src_dict.txt) + - [fr tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/tgt_dict.txt) + - [fr spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/spm.model) + - [fr pre-trained nmt model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/nmt.pt) + - [fr pre-trained model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/checkpoint_pretraing.pt) + - [fr fine-tuned model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/fr/checkpoint_finetune_ave10.pt) +4. [config_s2p.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/must_c/config_s2p.yaml) diff --git a/fairseq/examples/speech_text_joint_to_text/models/__init__.py b/fairseq/examples/speech_text_joint_to_text/models/__init__.py new file mode 100644 index 0000000..5fc5d9e --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/models/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + diff --git a/fairseq/examples/speech_text_joint_to_text/models/joint_speech_text_pretrain_transformer.py b/fairseq/examples/speech_text_joint_to_text/models/joint_speech_text_pretrain_transformer.py new file mode 100644 index 0000000..6f91739 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/models/joint_speech_text_pretrain_transformer.py @@ -0,0 +1,698 @@ +#!/usr/bin/env python3 + +import logging +from collections import OrderedDict, namedtuple +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from fairseq import checkpoint_utils, utils +from fairseq.file_io import PathManager +from fairseq.models import ( + FairseqDecoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_text import ( + MultiInputDecoder, + MultiModalityEncoder, + SpeechWavTransformerEncoder, + StackedSpeechWavTransformerEncoder, +) +from fairseq.models.transformer import ( + TransformerDecoder, + TransformerEncoder, + TransformerModel, +) + +logger = logging.getLogger(__name__) + + +class SpeechTextPreTrainEncoder(MultiModalityEncoder): + def __init__( + self, + dictionary, + sup_speech_encoder, + sup_s2s_speech_encoder, + unsup_speech_encoder, + text_encoder, + ): + super().__init__(dictionary) + self.sup_speech_encoder = sup_speech_encoder + self.sup_s2s_speech_encoder = sup_s2s_speech_encoder + self.unsup_speech_encoder = unsup_speech_encoder + self.text_encoder = text_encoder + + @classmethod + def update_transformer_encoder_cfg(cls, args, update_dict): + cfg = dict(args._get_kwargs()) + for fkey in update_dict.keys(): + cfg[fkey] = update_dict[fkey] + cfg.pop("_name", None) # remove keys start with _ + model_args = namedtuple("args", cfg.keys())(*cfg.values()) + return model_args + + @classmethod + def build_text_encoder(cls, args, src_dictionary): + enc_emb = nn.Embedding( + len(src_dictionary), args.encoder_embed_dim, src_dictionary.pad() + ) + model_args = cls.update_transformer_encoder_cfg( + args, {"encoder_layers": args.text_encoder_layers} + ) + text_encoder = TransformerEncoder(model_args, src_dictionary, enc_emb) + return text_encoder + + @classmethod + def build_speech_encoder(cls, args): + model_args = cls.update_transformer_encoder_cfg( + args, + { + "encoder_layers": args.speech_encoder_layers, + "speech_mask_prob": args.speech_sup_mask_prob, + }, + ) + speech_encoder = SpeechWavTransformerEncoder(model_args) + return speech_encoder + + @classmethod + def share_layers(cls, src_layers, tgt_layers): # share layer but not dropout + # share parameters in src_layers with tgt_layers + assert len(src_layers) == len(tgt_layers) + for i, ly in enumerate(src_layers): + tly = tgt_layers[i] + tly.self_attn = ly.self_attn + tly.self_attn_layer_norm = ly.self_attn_layer_norm + tly.activation_fn = ly.activation_fn + tly.normalize_before = ly.normalize_before + tly.fc1 = ly.fc1 + tly.fc2 = ly.fc2 + tly.final_layer_norm = ly.final_layer_norm + if hasattr(tly, "encoder_attn"): + tly.encoder_attn = ly.encoder_attn + tly.encoder_attn_layer_norm = ly.encoder_attn_layer_norm + return tgt_layers + + @classmethod + def build_unsup_speech_encoder(cls, args, sup_speech_encoder): + model_args = cls.update_transformer_encoder_cfg( + args, + { + "encoder_layers": args.speech_encoder_layers, + "speech_mask_prob": args.speech_unsup_mask_prob, + "encoder_layerdrop": 0.0, + "decoder_layerdrop": 0.0, + "dropout": args.speech_unsup_dropout, + "activation_dropout": args.speech_unsup_dropout, + "attention_dropout": 0.0, + "dropout_features": args.speech_unsup_feature_dropout, + "dropout_input": args.speech_unsup_feature_dropout, + }, + ) + + unsup_speech_encoder = SpeechWavTransformerEncoder(model_args, alway_mask=True) + unsup_speech_encoder.layer_norm = sup_speech_encoder.layer_norm + unsup_speech_encoder.layers = cls.share_layers( + sup_speech_encoder.layers, unsup_speech_encoder.layers + ) + unsup_speech_encoder.mask_emb = sup_speech_encoder.mask_emb + unsup_speech_encoder.embed_positions = sup_speech_encoder.embed_positions + unsup_speech_encoder.feat_layer_norm = sup_speech_encoder.feat_layer_norm + unsup_speech_encoder.feat_proj = sup_speech_encoder.feat_proj + unsup_speech_encoder.subsample = sup_speech_encoder.subsample + return unsup_speech_encoder + + @classmethod + def build_encoder(cls, args, dictionary): + text_encoder = cls.build_text_encoder(args, dictionary) + if getattr(args, "load_pretrained_mbart_encoder_from", None): + text_encoder = checkpoint_utils.load_pretrained_component_from_model( + component=text_encoder, + checkpoint=args.load_pretrained_mbart_encoder_from, + ) + speech_encoder = cls.build_speech_encoder(args) + if getattr(args, "load_pretrained_feature_extractor_from", None): + + def load_feature_extractor(component, checkpoint): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = checkpoint_utils.load_checkpoint_to_cpu(checkpoint) + component_state_dict = OrderedDict() + + component_prefix = "feature_extractor" + for key in state["model"].keys(): + if key.startswith(component_prefix): + component_subkey = key[len(component_prefix) + 1 :] + component_state_dict[component_subkey] = state["model"][key] + component.load_state_dict(component_state_dict, strict=True) + return component + + speech_encoder.subsample = load_feature_extractor( + speech_encoder.subsample, args.load_pretrained_feature_extractor_from + ) + speech_s2s_encoder = speech_encoder + unsup_speech_encoder = cls.build_unsup_speech_encoder(args, speech_encoder) + if getattr(args, "stacked_encoder", "none") != "none": + if args.encoder_shared_text_layers_from_begin > 0: + raise ValueError( + "We can not stack encoders and share encoders at the same time!" + ) + speech_s2s_encoder = StackedSpeechWavTransformerEncoder( + speech_encoder, text_encoder.layers, text_encoder.layer_norm + ) + if args.stacked_encoder == "all": + speech_encoder = speech_s2s_encoder + unsup_speech_encoder = StackedSpeechWavTransformerEncoder( + unsup_speech_encoder, text_encoder.layers, text_encoder.layer_norm + ) + else: + cls.share_speech_text_encoder( + speech_encoder, text_encoder, args.encoder_shared_text_layers_from_begin + ) + return SpeechTextPreTrainEncoder( + dictionary, + speech_encoder, + speech_s2s_encoder, + unsup_speech_encoder, + text_encoder, + ) + + @classmethod + def share_speech_text_encoder( + cls, speech_encoder, text_encoder, shared_layers_from_begin + ): + if shared_layers_from_begin > 0: + num_text_encoder_layers = len(text_encoder.layers) + assert len(speech_encoder.layers) >= shared_layers_from_begin + assert num_text_encoder_layers >= shared_layers_from_begin + assert len(speech_encoder.layers) >= num_text_encoder_layers + for i, ly in enumerate( + speech_encoder.layers[ + -num_text_encoder_layers : -num_text_encoder_layers + + shared_layers_from_begin + ] + ): + assert isinstance(text_encoder.layers[i], type(ly)) + text_encoder.layers[i] = ly + + def select_encoder(self, mode, **kwargs): + if mode in ("speech", "sup_speech_ctc", "sup_speech_ali", "sup_speech_s2s"): + kwargs["features_only"] = True + if mode == "sup_speech_s2s": + return self.sup_s2s_speech_encoder, kwargs + return self.sup_speech_encoder, kwargs + elif mode == "unsup_speech": + kwargs["features_only"] = False + return self.unsup_speech_encoder, kwargs + elif mode in ("text", "bitext"): + return self.text_encoder, kwargs + else: + raise NotImplementedError(f"{mode} is not supported") + return None, kwargs + + def forward(self, src_tokens, src_lengths=None, mode="", alignment=None, **kwargs): + return super().forward(src_tokens, src_lengths, mode, **kwargs) + + +# SpeechDummyDecoder works as an extension of encoder, so we could fit encoder only training into seq2seq training +class SpeechDummyDecoder(FairseqDecoder): + def __init__( + self, + dictionary, + output_embedding, + no_emb_update_unsup=False, + use_output_proj=False, + ): + super().__init__(dictionary) + self.output_embedding = output_embedding + num_embedding, num_dim = self.output_embedding.weight.size() + self.out_proj = ( + None if use_output_proj is False else nn.Linear(num_dim, num_dim) + ) + self.no_emb_update_unsup = no_emb_update_unsup + + def extend_alignment(self, alignment, src_lengths, prev_output_tokens): + # alignment: B X N + # src_lengths: B X T + # prev_output_tokens: B X (N + 1) + tgt_tokens = prev_output_tokens[ + :, 1: + ] # remove the leading start of sentence token + ext_alignment = ( + torch.ones(len(src_lengths), src_lengths.max(), device=src_lengths.device) + .long() + .fill_(self.dictionary.pad()) + ) + for bs in range(src_lengths.size(0)): + tgt_length = tgt_tokens[bs].ne(self.dictionary.pad()).sum().item() + assert tgt_length == sum(alignment[bs].ne(1)) + 1 + src_st = 0 + for i in range(tgt_length): + tok = tgt_tokens[bs][i] + src_ed = (alignment[bs][i] * src_lengths[bs]).int().item() + ext_alignment[bs][src_st:src_ed].fill_(tok) + src_st = src_ed + return ext_alignment + + def forward( + self, + prev_output_tokens, + encoder_out, + incremental_state=None, + mode="speech", + alignment=None, + **kwargs, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + sup_speech_ctc: + dictionary{"logits": logits, "padding_mask": padding_mask} + sup_speech_ali and unsup_speech: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + emb_weight = self.output_embedding.weight + if ( + mode == "unsup_speech" and self.no_emb_update_unsup + ): # no gradient for embedding here + emb_weight = emb_weight.detach() + enc_out = ( + encoder_out["encoder_out"][0] + if self.out_proj is None + else self.out_proj(encoder_out["encoder_out"][0]) + ) + logits = F.linear(enc_out, emb_weight, None).transpose(0, 1) # B X T X C + others = None + if mode in ( + "speech", + "sup_speech_ctc", + ): # speech data with label, do forcealignment + if len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + logits = logits.masked_fill(padding_mask, float("-inf")) + else: + seq_len, bsz = encoder_out["encoder_out"][0].size()[:2] + padding_mask = torch.zeros( + bsz, seq_len, device=encoder_out["encoder_out"][0].device + ).bool() + return {"x": logits, "padding_mask": padding_mask} + elif mode == "sup_speech_ali": + src_lengths = None + if len(encoder_out["encoder_padding_mask"]) > 0: + src_lengths = (1 - encoder_out["encoder_padding_mask"][0].long()).sum( + -1 + ) + else: + seq_len, bsz = encoder_out["encoder_out"][0].size()[:2] + src_lengths = ( + torch.ones(bsz, device=encoder_out["encoder_out"][0].device).long() + * seq_len + ) + assert alignment is not None + alignment = self.extend_alignment( + alignment, src_lengths, prev_output_tokens + ) + others = {"pseudo_target_tokens": alignment} + elif mode == "unsup_speech": + enc_out_ori = ( + encoder_out["encoder_unmasked_out"][0] + if self.out_proj is None + else self.out_proj(encoder_out["encoder_unmasked_out"][0]) + ) + logits_ori = F.linear(enc_out_ori, emb_weight, None).transpose(0, 1) + if len(encoder_out["encoder_padding_mask"]) > 0: + encoder_padding_mask = encoder_out["encoder_padding_mask"][0] + logits_ori = logits_ori.masked_fill(encoder_padding_mask, float("-inf")) + pseudo_labels = utils.log_softmax(logits_ori, dim=-1) + others = { + "pseudo_target_logprobs": pseudo_labels, + "padding_mask": encoder_out["encoder_padding_mask"], # B X T + "mask_indices": encoder_out[ + "mask_indices" + ], # True for masked frames B X T + } + return logits, others + + def get_normalized_probs( + self, + net_output: Dict[str, Tensor], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + return self.get_normalized_probs_scriptable( + (net_output["x"], None), log_probs, sample + ) + + +class SpeechTextPreTrainDecoder(MultiInputDecoder): + def __init__(self, dictionary, speech_decoder, text_decoder): + super().__init__(dictionary) + self.speech_decoder = speech_decoder + self.text_decoder = text_decoder + + def select_decoder(self, mode, **kwargs): + if mode == "unsup_speech": + kwargs["mode"] = mode + return self.speech_decoder, kwargs + if mode in ("text", "bitext"): + return self.text_decoder, kwargs + if mode in ("speech", "sup_speech_ctc", "sup_speech_ali"): + kwargs["mode"] = mode + return self.speech_decoder, kwargs + if mode in ("speech", "sup_speech_s2s"): + if "alignment" in kwargs: + del kwargs["alignment"] + return self.text_decoder, kwargs + + raise NotImplementedError(f"{mode} is not supported") + return None, kwargs + + def get_normalized_probs( + self, + net_output, + log_probs, + sample=None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + if isinstance(net_output, dict): + return self.speech_decoder.get_normalized_probs( + net_output, log_probs, sample + ) + return self.text_decoder.get_normalized_probs(net_output, log_probs, sample) + + @classmethod + def build_text_decoder(cls, args, tgt_dictionary, dec_emb_share=None): + dec_emb = ( + nn.Embedding( + len(tgt_dictionary), args.decoder_embed_dim, tgt_dictionary.pad() + ) + if dec_emb_share is None + else dec_emb_share + ) + text_decoder = TransformerDecoder(args, tgt_dictionary, dec_emb) + return text_decoder + + @classmethod + def build_dummy_speech_decoder(cls, args, dictionary, dec_emb_share=None): + dec_emb = ( + nn.Embedding(len(dictionary), args.decoder_embed_dim, dictionary.pad()) + if dec_emb_share is None + else dec_emb_share + ) + speech_decoder = SpeechDummyDecoder( + dictionary, + dec_emb, + no_emb_update_unsup=getattr(args, "no_emb_update_unsup", False), + use_output_proj=getattr(args, "use_decoder_output_proj", False), + ) + return speech_decoder + + @classmethod + def build_decoder( + cls, args, text_dictionary, speech_dictionary, speech_output_embedding + ): + text_decoder = cls.build_text_decoder(args, text_dictionary) + speech_decoder = cls.build_dummy_speech_decoder( + args, speech_dictionary, speech_output_embedding + ) + if getattr(args, "load_pretrained_mbart_decoder_from", None): + text_decoder = checkpoint_utils.load_pretrained_component_from_model( + component=text_decoder, + checkpoint=args.load_pretrained_mbart_decoder_from, + ) + return SpeechTextPreTrainDecoder(text_dictionary, speech_decoder, text_decoder) + + +@register_model("speech_text_pretrain_bart") +class SpeechTextPreTrainModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + self.num_updates = 0 + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, src_lang_ids=None, **kwargs + ): + if src_lang_ids is not None: + encoder_out = self.encoder( + src_tokens, src_lengths=src_lengths, src_lang_ids=src_lang_ids, **kwargs + ) + else: + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + decoder_out = self.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return decoder_out + + def max_positions(self): + return None # it is provided in task + + def get_targets(self, sample, net_output): + mode = sample["net_input"]["mode"] + if mode == "unsup_speech": + return {"target_logprobs": net_output[1]["pseudo_target_logprobs"]} + if mode == "sup_speech_ali": + return net_output[1]["pseudo_target_tokens"] + return sample["target"] + + def get_normalized_probs( + self, + net_output, + log_probs, + sample=None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + @staticmethod + def add_args(parser): + TransformerModel.add_args(parser) + SpeechWavTransformerEncoder.add_args(parser) + parser.add_argument( + "--speech-sup-mask-prob", + type=float, + help="probability of replacing a token with mask (sup-speech)", + ) + parser.add_argument( + "--speech-unsup-mask-prob", + type=float, + help="probability of replacing a token with mask (unsup-speech)", + ) + parser.add_argument( + "--load-pretrained-mbart-encoder-from", + type=str, + metavar="STR", + help="model to take text encoder weights from (for initialization)", + ) + + parser.add_argument( + "--load-pretrained-mbart-decoder-from", + type=str, + metavar="STR", + help="model to take text decoder weights from (for initialization)", + ) + + parser.add_argument( + "--load-pretrained-feature-extractor-from", + type=str, + metavar="STR", + help="model to take feature extractor weights from (for initialization)", + ) + + parser.add_argument( + "--speech-unsup-dropout", + type=float, + default=0, + help="dropout for unsupervised speech encoder", + ) + + parser.add_argument( + "--speech-unsup-feature-dropout", + type=float, + default=0, + help="dropout for unsupervised speech feature encoder", + ) + + parser.add_argument( + "--encoder-shared-text-layers-from-begin", + type=int, + help="number of text encoder layers shared with speech encoder (from first layer)", + ) + + parser.add_argument( + "--stacked-encoder", + default="none", + choices=["none", "s2s", "all"], + help="stack speech and text encoders", + ) + + parser.add_argument("--use-decoder-output-proj", action="store_true") + + @classmethod + def build_model(cls, args, task): + encoder = SpeechTextPreTrainEncoder.build_encoder(args, task.src_dict) + decoder = SpeechTextPreTrainDecoder.build_decoder( + args, task.tgt_dict, task.src_dict, encoder.text_encoder.embed_tokens + ) + model = SpeechTextPreTrainModel(encoder, decoder) + return model + + def upgrade_state_dict(self, state_dict): + """Upgrade old state dicts to work with newer code.""" + if "decoder.speech_decoder.output_projection.weight" in state_dict: + del state_dict["decoder.speech_decoder.output_projection.weight"] + self.upgrade_state_dict_named(state_dict, "") + + +@register_model_architecture( + "speech_text_pretrain_bart", "speech_text_pretrain_bart_base" +) +def speech_text_pretrain_bart_base(args): + # speech masking + args.dropout_input = getattr(args, "dropout_input", 0) + args.dropout_features = getattr(args, "dropout_features", 0) + args.speech_mask_length = getattr(args, "speech_mask_length", 10) + args.speech_mask_prob = getattr(args, "speech_mask_prob", 0.65) + args.speech_sup_mask_prob = getattr(args, "speech_sup_mask_prob", 0.3) + args.speech_unsup_mask_prob = getattr( + args, "speech_unsup_mask_prob", args.speech_mask_prob + ) + args.speech_mask_selection = getattr(args, "speech_mask_selection", "static") + args.speech_mask_other = getattr(args, "speech_mask_other", 0) + args.speech_mask_min_space = getattr(args, "speech_mask_min_space", 1) + args.speech_no_mask_overlap = getattr(args, "speech_no_mask_overlap", False) + + args.speech_mask_channel_length = getattr(args, "speech_mask_channel_length", 10) + args.speech_mask_channel_prob = getattr(args, "speech_mask_channel_prob", 0.0) + args.speech_mask_channel_selection = getattr( + args, "speech_mask_channel_selection", "static" + ) + args.speech_mask_channel_other = getattr(args, "speech_mask_channel_other", 0) + args.speech_mask_channel_min_space = getattr( + args, "speech_mask_channel_min_space", 1 + ) + args.speech_no_mask_channel_overlap = getattr( + args, "speech_no_mask_channel_overlap", False + ) + args.no_scale_feature = getattr(args, "", False) + args.feature_grad_mult = getattr(args, "feature_grad_mult", 1.0) # 0.1 + + # Transformer + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = getattr( + args, "encoder_ffn_embed_dim", args.encoder_embed_dim * 4 + ) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.speech_conv_bias = getattr(args, "speech_conv_bias", False) + + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_attention_heads = getattr( + args, "decoder_attention_heads", args.encoder_attention_heads + ) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") # gelu? + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + + args.speech_unsup_dropout = getattr(args, "speech_unsup_dropout", 0) + args.speech_unsup_feature_dropout = getattr(args, "speech_unsup_feature_dropout", 0) + + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 6 + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + + args.no_emb_update_unsup = getattr(args, "no_emb_update_unsup", False) + + +@register_model_architecture( + "speech_text_pretrain_bart", "speech_text_pretrain_bart_base_stack" +) +def speech_text_pretrain_bart_base_stack(args): + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 6) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 0 + ) + args.stacked_encoder = getattr(args, "stacked_encoder", "all") + args.layernorm_embedding = getattr(args, "layernorm_embedding", True) + speech_text_pretrain_bart_base(args) + + +@register_model_architecture( + "speech_text_pretrain_bart", "speech_text_pretrain_bart_large" +) +def speech_text_pretrain_bart_large(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 24) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 12) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 12 + ) + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.dropout = getattr(args, "dropout", 0.3) + speech_text_pretrain_bart_base(args) + + +@register_model_architecture( + "speech_text_pretrain_bart", "speech_text_pretrain_bart_large_stack" +) +def speech_text_pretrain_bart_large_stack(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 6) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 12) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 0 + ) + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.stacked_encoder = getattr(args, "stacked_encoder", "s2s") + args.layernorm_embedding = getattr(args, "layernorm_embedding", True) + speech_text_pretrain_bart_base(args) diff --git a/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py new file mode 100644 index 0000000..c4ec41b --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py @@ -0,0 +1,1093 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from collections import namedtuple + +import torch +import torch.nn as nn +from fairseq import checkpoint_utils +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqDecoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.fairseq_encoder import EncoderOut +from fairseq.models.speech_to_text import ( + TransformerDecoder, + S2TTransformerEncoder, +) +from fairseq.models.transformer import TransformerEncoder +from fairseq.modules import ( + TransformerEncoderLayer, + GradMultiply, + LayerNorm, +) + +logger = logging.getLogger(__name__) + + +class SpeechEoSEncoder(FairseqEncoder): + def __init__(self, encoder, eos_num, feat_dim, adapter_type="None", adapter_dim=0): + super().__init__(None) + self.encoder = encoder + self.eos_num = eos_num # downsampling rate for speech input feature + self.eos_emb = ( + nn.Parameter(torch.zeros(1, feat_dim), requires_grad=True) + if eos_num > 0 + else None + ) + self.adapter = self.add_adapter(adapter_type, adapter_dim) + + def add_adapter(self, adapter_type, adapter_dim): + def _make_identity(linear, eps=1e-5): + assert isinstance(linear, nn.Linear) + linear.weight.data.mul_(eps) + linear.weight.data.fill_diagonal_(1.0) + if linear.bias is not None: + linear.bias.data.mul_(eps) + + adapter = None + if adapter_type == "Linear": + assert adapter_dim > 0 + adapter = nn.Sequential( + nn.Linear(adapter_dim, adapter_dim), LayerNorm(adapter_dim) + ) + # initialize the adapter as identity matrix first + _make_identity(adapter[0]) + + elif adapter_type == "MLP": + assert adapter_dim > 0 + # assume the model is pre-norm model + adapter = nn.Sequential( + nn.Linear(adapter_dim, 2 * adapter_dim), + nn.ReLU(), + nn.Linear(2 * adapter_dim, adapter_dim), + LayerNorm(adapter_dim), + ) + _make_identity(adapter[0]) + _make_identity(adapter[2]) + return adapter + + def add_eos(self, src_tokens, src_lengths): + bsz, max_seq_len, fdim = src_tokens.size() + if self.eos_num > 0: + src_token_eos = torch.zeros( + [bsz, max_seq_len + self.eos_num, fdim], + dtype=src_tokens.dtype, + device=src_tokens.device, + ) + src_token_eos[:, :max_seq_len] = src_tokens + for bi in range(bsz): + src_token_eos[bi][ + src_lengths[bi] : src_lengths[bi] + self.eos_num + ] = self.eos_emb.expand(self.eos_num, fdim) + src_lengths = src_lengths + self.eos_num + src_tokens = src_token_eos + return src_tokens, src_lengths + + def apply_adapter(self, enc_out): + if self.adapter is None: + return enc_out + rst = self.adapter(enc_out.encoder_out) + if enc_out.encoder_padding_mask is not None: + rst.masked_fill_( + enc_out.encoder_padding_mask.transpose(0, 1).unsqueeze(-1), 0 + ) + return EncoderOut( + encoder_out=rst, + encoder_padding_mask=enc_out.encoder_padding_mask, + encoder_embedding=enc_out.encoder_embedding, + encoder_states=enc_out.encoder_states, + src_tokens=enc_out.src_tokens, + src_lengths=enc_out.src_lengths, + ) + + def forward(self, src_tokens, src_lengths=None, return_all_hiddens=False, **kwargs): + """ + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (B,) + """ + src_tokens, src_lengths = self.add_eos(src_tokens, src_lengths) + enc_out = self.encoder(src_tokens, src_lengths, return_all_hiddens) + enc_out = self.apply_adapter(enc_out) + return enc_out + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + +class DualInputEncoder(FairseqEncoder): + def __init__( + self, + args, + spch_encoder, + text_encoder, + dictionary, + cross_attentive_loss_before_last_layer=-1, + ): + super().__init__(dictionary) + + self.spch_encoder = spch_encoder + self.text_encoder = text_encoder + self.enc_grad_mult = args.enc_grad_mult + self.cross_attentive_loss_before_last_layer = ( + cross_attentive_loss_before_last_layer + ) + self.use_cross_attentive_loss = ( + False if cross_attentive_loss_before_last_layer <= -1 else True + ) + self.enc2_along_grad_mult = args.enc2_along_grad_mult + + @classmethod + def set_shared_layer(cls, share_level, src_layer, tgt_layer): + """ + share parameters from tgt_layer to src_layer + share_level: + 0: share everything + 1: share everything but different model + 2: share weight but not bias, layernorm + """ + if share_level == 0: + return tgt_layer + if isinstance(src_layer, nn.Linear): + return tgt_layer + if isinstance(src_layer, TransformerEncoderLayer): + assert src_layer.embed_dim == tgt_layer.embed_dim + assert src_layer.normalize_before == tgt_layer.normalize_before + if share_level == 1: + src_layer.fc1 = tgt_layer.fc1 + src_layer.fc2 = tgt_layer.fc2 + src_layer.self_attn = tgt_layer.self_attn + src_layer.final_layer_norm = tgt_layer.final_layer_norm + src_layer.self_attn_layer_norm = tgt_layer.self_attn_layer_norm + src_layer.layernorm_embedding = tgt_layer.layernorm_embedding + else: + src_layer.fc1.weight = tgt_layer.fc1.weight + src_layer.fc2.weight = tgt_layer.fc2.weight + src_layer.self_attn.k_proj.weight = tgt_layer.self_attn.k_proj.weight + src_layer.self_attn.v_proj.weight = tgt_layer.self_attn.v_proj.weight + src_layer.self_attn.q_proj.weight = tgt_layer.self_attn.q_proj.weight + src_layer.self_attn.out_proj.weight = ( + tgt_layer.self_attn.out_proj.weight + ) + else: + if share_level == 1: + return tgt_layer + return src_layer + + @classmethod + def build_spch_encoder(cls, args): + cfg = { + "input_feat_per_channel": args.input_feat_per_channel, + "input_channels": args.input_channels, + "conv_kernel_sizes": args.conv_kernel_sizes, + "conv_channels": args.conv_channels, + "encoder_embed_dim": args.encoder_embed_dim, + "encoder_ffn_embed_dim": args.encoder_ffn_embed_dim, + "encoder_layers": args.speech_encoder_layers, + "encoder_layerdrop": args.encoder_layerdrop, + "encoder_attention_heads": args.encoder_attention_heads, + "max_source_positions": args.max_source_positions, + "dropout": args.dropout, + "encoder_normalize_before": args.encoder_normalize_before, + "activation_dropout": args.activation_dropout, + "attention_dropout": args.attention_dropout, + "activation_fn": args.activation_fn, + "layernorm_embedding": args.layernorm_embedding, + "no_token_positional_embeddings": args.no_token_positional_embeddings, + "no_scale_embedding": args.no_scale_embedding, + "quant_noise_pq": args.quant_noise_pq, + "encoder_freezing_updates": 0, + } + model_args = namedtuple("args", cfg.keys())(*cfg.values()) + spch_encoder = S2TTransformerEncoder(model_args) + if args.add_speech_eos: + spch_encoder = SpeechEoSEncoder( + spch_encoder, + 2 * len(args.conv_kernel_sizes.split(",")), + args.input_feat_per_channel, + adapter_type=getattr(args, "speech_encoder_adapter_type", "None"), + adapter_dim=args.encoder_embed_dim, + ) + return spch_encoder + + @classmethod + def build_text_encoder(cls, args, src_dictionary, spch_encoder): + if args.encoder_shared_layers > 0: + mx_shared_layers = ( + args.speech_encoder_layers + if args.speech_encoder_layers < args.text_encoder_layers + else args.text_encoder_layers + ) + args.encoder_shared_layers = ( + args.encoder_shared_layers + if args.encoder_shared_layers <= mx_shared_layers + else mx_shared_layers + ) + cfg = { + "encoder_embed_dim": args.encoder_text_embed_dim, + "encoder_ffn_embed_dim": args.encoder_ffn_embed_dim, + "encoder_layers": args.text_encoder_layers, + "encoder_layerdrop": args.encoder_layerdrop, + "encoder_attention_heads": args.encoder_attention_heads, + "encoder_learned_pos": args.encoder_learned_pos, + "max_source_positions": args.max_source_positions, + "dropout": args.dropout, + "encoder_normalize_before": args.encoder_normalize_before, + "activation_dropout": args.activation_dropout, + "attention_dropout": args.attention_dropout, + "activation_fn": args.activation_fn, + "adaptive_input": args.adaptive_input, + "no_token_positional_embeddings": args.no_token_positional_embeddings, + "no_scale_embedding": args.no_scale_embedding, + "quant_noise_pq": args.quant_noise_pq, + } + model_args = namedtuple("args", cfg.keys())(*cfg.values()) + enc_emb = nn.Embedding( + len(src_dictionary), model_args.encoder_embed_dim, src_dictionary.pad() + ) + text_encoder = TransformerEncoder(model_args, src_dictionary, enc_emb) + if args.add_speech_eos: + spch_encoder = spch_encoder.encoder + if args.encoder_shared_layers > 0: + text_encoder.layer_norm = cls.set_shared_layer( + args.encoder_shared_layer_level, + text_encoder.layer_norm, + spch_encoder.layer_norm, + ) + for i, ly in enumerate( + spch_encoder.transformer_layers[-args.encoder_shared_layers :] + ): + ly_id = i + args.text_encoder_layers - args.encoder_shared_layers + if not isinstance(text_encoder.layers[ly_id], type(ly)): + if text_encoder.layers[ly_id]._get_name() not in ('TransformerEncoderLayerBase', 'TransformerEncoderLayer'): + raise ValueError("The shared layers are expected from the same class") + text_encoder.layers[ly_id] = cls.set_shared_layer( + args.encoder_shared_layer_level, + text_encoder.layers[ly_id], + ly, + ) + return text_encoder + + def mult_rst_grad(self, rst, ratio): + assert isinstance(rst, dict) # instead of EncoderOut + assert len(rst["encoder_out"]) == 1 + rst["encoder_out"][0] = GradMultiply.apply(rst["encoder_out"][0], ratio) + return rst + + def process_attentive_loss_states(self, rst, interstates): + assert isinstance(rst, dict) # instead of EncoderOut + rst["encoder_states"] = interstates + return rst + + def forward( + self, + src_tokens, + src_lengths=None, + src_txt_tokens=None, + src_txt_lengths=None, + **kwargs + ): + """ + Args: + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (speech) (B,) + src_txt_tokens: padded tensor (B, T) + src_txt_lengths: tensor of original lengths of input utterances (text) (B,) + """ + # src_tokens only: inference + # src_tokens, src_lengths: speech only training + # src_txt_tokens, src_txt_lengths: text only training + # all valid: speech + text training + + if src_tokens is None and src_txt_tokens is None: + raise ValueError( + "src_tokens and src_txt_tokens cannot be None at the same time" + ) + ret1 = None + ret2 = None + return_all_hiddens = False + if src_tokens is not None: + if ( + self.use_cross_attentive_loss and src_txt_tokens is not None + ): # remove self.training so we can get attn score during validation step + return_all_hiddens = True + ret1 = self.spch_encoder( + src_tokens, src_lengths, return_all_hiddens=return_all_hiddens + ) + + if self.use_cross_attentive_loss and src_txt_tokens is not None: + assert self.cross_attentive_loss_before_last_layer < len( + ret1["encoder_states"] + ) + ret1 = self.process_attentive_loss_states( + ret1, + ret1["encoder_states"][ + -self.cross_attentive_loss_before_last_layer - 1 + ], + ) + + if src_txt_tokens is not None: + ret2 = self.text_encoder( + src_txt_tokens, src_txt_lengths, return_all_hiddens=return_all_hiddens + ) + if return_all_hiddens: + if self.cross_attentive_loss_before_last_layer == len( + self.text_encoder.layers + ): + text_embedding, _ = self.text_encoder.forward_embedding( + src_txt_tokens + ) + text_embedding = text_embedding.transpose(0, 1) + ret2 = self.process_attentive_loss_states(ret2, text_embedding) + else: + assert self.cross_attentive_loss_before_last_layer < len( + self.text_encoder.layers + ) + ret2 = self.process_attentive_loss_states( + ret2, + ret2["encoder_states"][ + -self.cross_attentive_loss_before_last_layer - 1 + ], + ) + + def merge_output(rst1, rst2): + if rst1 is None: + if not (self.enc2_along_grad_mult == 1.0 or self.training): + rst2 = self.mult_rst_grad(rst2, self.enc2_along_grad_mult) + return rst2 + if rst2 is None: + return rst1 + if self.enc_grad_mult != 1.0 and self.training: + rst1 = self.mult_rst_grad(rst1, self.enc_grad_mult) + rst2 = self.mult_rst_grad(rst2, self.enc_grad_mult) + rst = (rst1, rst2) + return rst + + return merge_output(ret1, ret2) + + def reorder_encoder_out(self, encoder_out, new_order): + assert self.training is False # used for inference only + return self.spch_encoder.reorder_encoder_out(encoder_out, new_order) + + +# TransformerMultiInputDecoder: take one or two encoder inputs +class TransformerMultiInputDecoder(FairseqDecoder): + def __init__( + self, + dictionary, + spch_decoder, + text_decoder, + compute_cross_attentive_loss=False, + cross_attentive_loss_with_norm=True, + cross_attentive_loss_reverse=False, + ): + + super().__init__(dictionary) + self.spch_decoder = spch_decoder + self.text_decoder = text_decoder + self.compute_cross_attentive_loss = compute_cross_attentive_loss + self.cross_attentive_loss_with_norm = cross_attentive_loss_with_norm + self.cross_attentive_loss_reverse = cross_attentive_loss_reverse + + @classmethod + def share_spchdecoder(cls, task_args, text_decoder, spch_decoder): + if task_args.decoder_shared_layer_level == 0: + return text_decoder + assert text_decoder.embed_tokens == spch_decoder.embed_tokens + spch_decoder.project_in_dim = text_decoder.project_in_dim + spch_decoder.embed_positions = text_decoder.embed_positions + spch_decoder.layernorm_embedding = text_decoder.layernorm_embedding + spch_decoder.project_out_dim = text_decoder.project_out_dim + spch_decoder.adaptive_softmax = text_decoder.adaptive_softmax + if task_args.decoder_shared_layer_level == 1: + spch_decoder.output_projection = text_decoder.output_projection + spch_decoder.layer_norm = text_decoder.layer_norm + else: # 2 + spch_decoder.output_projection.weight = ( + text_decoder.output_projection.weight + ) + for i, ly in enumerate(text_decoder.layers): + sly = spch_decoder.layers[i] + sly.self_attn = ly.self_attn + sly.self_attn_layer_norm = ly.self_attn_layer_norm + # sly.encoder_attn = ly.encoder_attn + if ( + task_args.decoder_shared_layer_level == 1 + ): # share everything, but under different models + sly.encoder_attn = ly.encoder_attn + sly.encoder_attn_layer_norm = ly.encoder_attn_layer_norm + sly.fc1 = ly.fc1 + sly.fc2 = ly.fc2 + sly.final_layer_norm = ly.final_layer_norm + else: # task_args.decoder_shared_layer_level == 2: #separated encoder_attn_layer_norm and bias + sly.encoder_attn.k_proj.weight = ly.encoder_attn.k_proj.weight + sly.encoder_attn.v_proj.weight = ly.encoder_attn.v_proj.weight + sly.encoder_attn.q_proj.weight = ly.encoder_attn.q_proj.weight + sly.encoder_attn.out_proj.weight = ly.encoder_attn.out_proj.weight + sly.fc1.weight = ly.fc1.weight + sly.fc2.weight = ly.fc2.weight + + return spch_decoder + + def cross_attentive_loss( + self, teacher_states, student_states, teacher_masking, student_masking, eps=1e-6 + ): + x = teacher_states.transpose(0, 1) # from T X B X D to B X T X D + y = student_states.transpose(0, 1) + if self.cross_attentive_loss_with_norm: + x = x / (x.norm(dim=2, keepdim=True) + eps) + y = y / (y.norm(dim=2, keepdim=True) + eps) + dim = x.size(-1) + # lengths: batch X seqLen + sim_scores_xy = torch.bmm(x, y.transpose(1, 2)) # batch X lenx X leny ] + if y.dtype == torch.float16: + sim_scores_xy = sim_scores_xy.float() + y = y.float() + x = x.float() + if teacher_masking != []: + assert len(teacher_masking) == 1 + sim_scores_xy = sim_scores_xy.masked_fill( + teacher_masking[0].unsqueeze(-1), float("-inf") + ) + if student_masking != []: + sim_scores_xy = sim_scores_xy.masked_fill( + student_masking[0].unsqueeze(1), float("-inf") + ) + # do masking + y_weights = utils.softmax(sim_scores_xy, dim=-1) + if teacher_masking != []: + y_weights = y_weights.masked_fill(teacher_masking[0].unsqueeze(-1), 0) + x_reconstruct_from_y = torch.bmm(y_weights, y) + + sim_scores_xx = torch.bmm(x, x.transpose(1, 2)) # batch X lenx X lenx ] + x_weights = utils.softmax(sim_scores_xx, dim=-1) + if teacher_masking != []: + x_weights = x_weights.masked_fill(teacher_masking[0].unsqueeze(-1), 0) + + # no gradient for teacher state + x_reconstruct_from_x = torch.bmm(x_weights, x).detach() + cost = (x_reconstruct_from_x - x_reconstruct_from_y).norm(dim=2) + if teacher_masking != []: + cost = cost.masked_fill(teacher_masking[0], 0) + + if not self.cross_attentive_loss_with_norm: + cost = cost / dim + return cost + + def forward( + self, + prev_output_tokens, + encoder_out, + incremental_state=None, + has_txt_input=False, + **kwargs + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for input feeding/teacher forcing. If there are + two or more input during training, they will share the same prev_output_tokens + encoder_out (tuple[Tensor]): output from the encoder, used for + encoder-side attention. It will be tuple if there are more inputs, but a tensor + if only one input + incremental_state ([dict]): dictionary used for storing state during + :ref:`Incremental decoding`. It is only valid for inference, only from single + input + Returns: + tuple: + - the last decoder layer's output of shape `(batch, tgt_len, + vocab)`. If there are N inputs, batch will be N bigger than a single input + - the last decoder layer's attention weights of shape `(batch, + tgt_len, src_len)` + """ + assert not isinstance(encoder_out, EncoderOut) + if isinstance(encoder_out, tuple): # training with mulitple input + rst = [] + assert len(encoder_out) == 2 + for i, eo in enumerate(encoder_out): + assert incremental_state is None + if i == 0: + rst.append( + self.spch_decoder(prev_output_tokens, eo, incremental_state) + ) + else: + rst.append( + self.text_decoder(prev_output_tokens, eo, incremental_state) + ) + dec_out = torch.cat([r[0] for r in rst], dim=0) + attn_cost = None + if self.compute_cross_attentive_loss: + assert isinstance(encoder_out[0], dict) + if self.cross_attentive_loss_reverse: + attn_cost = self.cross_attentive_loss( + teacher_states=encoder_out[1]["encoder_states"], # text_states + student_states=encoder_out[0]["encoder_states"], # spch_states + teacher_masking=encoder_out[1]["encoder_padding_mask"], + student_masking=encoder_out[0]["encoder_padding_mask"], + ) + else: + attn_cost = self.cross_attentive_loss( + teacher_states=encoder_out[0]["encoder_states"], # spch_states + student_states=encoder_out[1]["encoder_states"], # text_states + teacher_masking=encoder_out[0]["encoder_padding_mask"], + student_masking=encoder_out[1]["encoder_padding_mask"], + ) + + return (dec_out, {"attn_cost": attn_cost}) + else: # inference or training with one input + if has_txt_input: + return self.text_decoder( + prev_output_tokens, encoder_out, incremental_state + ) + return self.spch_decoder(prev_output_tokens, encoder_out, incremental_state) + + +# Note: +# dual input transformer: +# encoder: S2TTransformerEncoder for speech + TransformerEncoder for text +# decoder: TransformerDecoder for text +@register_model("dual_input_s2t_transformer") +class DualInputS2TTransformerModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + self.num_updates = 0 + + def max_positions(self): + return None # it is provided in task + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # encoder 1: S2TTransformerEncoder for speech + parser.add_argument( + "--conv-kernel-sizes", + type=str, + metavar="N", + help="kernel sizes of Conv1d subsampling layers", + ) + parser.add_argument( + "--conv-channels", + type=int, + metavar="N", + help="# of channels in Conv1d subsampling layers", + ) + parser.add_argument( + "--enc-output-dim", + type=int, + metavar="N", + help=""" + encoder output dimension, can be None. If specified, projecting the + transformer output to the specified dimension""", + ) + # standard Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-text-embed-dim", + type=int, + metavar="N", + help="encoder text embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + # non-standard transformer parameters + parser.add_argument( + "--speech-encoder-layers", + type=int, + metavar="N", + help="num speech encoder layers", + ) + parser.add_argument( + "--text-encoder-layers", + type=int, + metavar="N", + help="num text encoder layers", + ) + parser.add_argument( + "--encoder-shared-layers", + type=int, + metavar="N", + help="num shared encoder layers", + ) + parser.add_argument( + "--encoder-shared-layer-level", + type=int, + metavar="N", + default=0, + choices=[0, 1, 2], + help="share layer level 0: all share 1: all share with separate model 2: share weight but not bias and layernorm", + ) + + parser.add_argument( + "--decoder-shared-layer-level", + default=0, + choices=[0, 1, 2], + type=int, + metavar="N", + help="0: share everything; 1: share everything with different model 2: no share layer_norm and bias", + ) + ### + parser.add_argument( + "--text-input-cost-ratio", + type=float, + default=1.0, + metavar="V", + help="text input cost ratio relative to speech input cost", + ) + parser.add_argument( + "--init-scale", + type=float, + default=1.0, + metavar="V", + help="scale the initial weight by given factor", + ) + parser.add_argument( + "--enc-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc1 and enc2 gradient by V", + ) + parser.add_argument( + "--enc2-along-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc2 gradient by V if only enc2 is used", + ) + parser.add_argument( + "--load-pretrain-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained encoder """, + ) + parser.add_argument( + "--load-pretrain-speech-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained speech encoder """, + ) + parser.add_argument( + "--load-pretrain-text-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained text encoder """, + ) + parser.add_argument( + "--load-pretrain-text-encoder-last", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained text encoder """, + ) + parser.add_argument( + "--load-pretrain-decoder", + type=str, + metavar="EXPR", + default="", + help=""" path to the pretrained encoder """, + ) + parser.add_argument( + "--add-speech-eos", + action="store_true", + help="add eos token at the end of input feature", + ) + parser.add_argument( + "--speech-encoder-adapter-type", + type=str, + metavar="EXPR", + default="None", + choices=["None", "Linear", "MLP"], + help="add speech encoder adapter", + ) + + @classmethod + def build_encoder(cls, args, task): + spch_encoder = DualInputEncoder.build_spch_encoder(args) + text_encoder = DualInputEncoder.build_text_encoder( + args, task.src_dict, spch_encoder + ) + cross_attentive_loss_before_last_layer = ( + 0 if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else -1 + ) + encoder = DualInputEncoder( + args, + spch_encoder, + text_encoder, + task.src_dict, + cross_attentive_loss_before_last_layer, + ) + if args.init_scale != 1.0: + with torch.no_grad(): + for param in encoder.parameters(): + param.data.mul_(args.init_scale) + if args.load_pretrain_text_encoder != "": + checkpoint_utils.load_pretrained_component_from_model( + text_encoder, args.load_pretrain_text_encoder + ) + if args.load_pretrain_speech_encoder != "": + if hasattr(spch_encoder, "encoder"): + checkpoint_utils.load_pretrained_component_from_model( + spch_encoder.encoder, args.load_pretrain_speech_encoder + ) + else: + checkpoint_utils.load_pretrained_component_from_model( + spch_encoder, args.load_pretrain_speech_encoder + ) + if ( + args.load_pretrain_text_encoder_last != "" + ): # if share encoder, speech encoder parameters will be used. + # It provides a chance to use pre-trained mt encoder instead + checkpoint_utils.load_pretrained_component_from_model( + text_encoder, args.load_pretrain_text_encoder_last + ) + + if args.load_pretrain_encoder != "": + checkpoint_utils.load_pretrained_component_from_model( + encoder, args.load_pretrain_encoder + ) + return encoder + + @classmethod + def build_decoder(cls, args, task): + dec_cfg = { + "decoder_layerdrop": args.decoder_layerdrop, + "share_decoder_input_output_embed": args.share_decoder_input_output_embed, + "decoder_embed_dim": args.decoder_embed_dim, + "max_target_positions": args.max_target_positions, + "dropout": args.dropout, + "encoder_learned_pos": args.encoder_learned_pos, + "decoder_learned_pos": args.decoder_learned_pos, + "layernorm_embedding": args.layernorm_embedding, + "decoder_normalize_before": args.decoder_normalize_before, + "activation_dropout": args.activation_dropout, + "attention_dropout": args.attention_dropout, + "decoder_ffn_embed_dim": args.decoder_ffn_embed_dim, + "decoder_layers": args.decoder_layers, + "decoder_attention_heads": args.decoder_attention_heads, + "decoder_output_dim": args.decoder_embed_dim, + "no_scale_embedding": args.no_scale_embedding, + "adaptive_input": args.adaptive_input, + "quant_noise_pq": args.quant_noise_pq, + "adaptive_softmax_cutoff": args.adaptive_softmax_cutoff, + "tie_adaptive_weights": args.tie_adaptive_weights, + "no_token_positional_embeddings": args.no_token_positional_embeddings, + "encoder": {"embed_dim":args.encoder_embed_dim} + } + dec_cfg = namedtuple("args", dec_cfg.keys())(*dec_cfg.values()) + dec_emb = nn.Embedding( + len(task.target_dictionary), + args.decoder_embed_dim, + task.target_dictionary.pad(), + ) + compute_cross_attentive_loss = ( + True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False + ) + cross_attentive_loss_without_norm = getattr( + args, "attentive_cost_without_normalize", False + ) + cross_attentive_loss_reverse = ( + False # getattr(args, "attentive_cost_reverse", False) + ) + + text_decoder = TransformerDecoder(dec_cfg, task.target_dictionary, dec_emb) + spch_decoder = TransformerDecoder(dec_cfg, task.target_dictionary, dec_emb) + spch_decoder = TransformerMultiInputDecoder.share_spchdecoder( + args, text_decoder, spch_decoder + ) + decoder = TransformerMultiInputDecoder( + dictionary=task.target_dictionary, + spch_decoder=spch_decoder, + text_decoder=text_decoder, + compute_cross_attentive_loss=compute_cross_attentive_loss, + cross_attentive_loss_with_norm=True + if not cross_attentive_loss_without_norm + else False, + cross_attentive_loss_reverse=cross_attentive_loss_reverse, + ) + if args.init_scale != 1.0: + with torch.no_grad(): + for param in decoder.parameters(): + param.data.mul_(args.init_scale) + if args.load_pretrain_decoder != "": + try: + checkpoint_utils.load_pretrained_component_from_model( + decoder, args.load_pretrain_decoder + ) + except RuntimeError: + checkpoint_utils.load_pretrained_component_from_model( + decoder.text_decoder, args.load_pretrain_decoder + ) + if args.decoder_shared_layer_level > 0: + checkpoint_utils.load_pretrained_component_from_model( + decoder.spch_decoder, args.load_pretrain_decoder + ) + + return decoder + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted + # (in case there are any new ones) + dualinputs2ttransformer_base(args) + + encoder = cls.build_encoder(args, task) + decoder = cls.build_decoder(args, task) + return cls(encoder, decoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = super().get_normalized_probs(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + use_encoder_outputs=False, + src_txt_tokens=None, + src_txt_lengths=None, + mode="sup_speech", + **kwargs + ): + """ + Run the forward pass for an encoder-decoder model. + + First feed a batch of source tokens through the encoder. Then, feed the + encoder output and previous decoder outputs (i.e., teacher forcing) to + the decoder to produce the next outputs:: + + encoder_out = self.encoder(src_tokens, src_lengths) + return self.decoder(prev_output_tokens, encoder_out) + + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): source sentence lengths of shape `(batch)` + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + mode = 'sup_speech' or 'text' + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + if mode == "text": + assert src_txt_tokens is None + src_txt_tokens = src_tokens + src_txt_lengths = src_lengths + src_tokens = None + src_lengths = None + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + src_txt_tokens=src_txt_tokens, + src_txt_lengths=src_txt_lengths, + **kwargs + ) + has_txt_input = True if src_txt_tokens is not None else False + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + has_txt_input=has_txt_input, + **kwargs + ) + if use_encoder_outputs: + return decoder_out, encoder_out + return decoder_out + + +@register_model_architecture( + "dual_input_s2t_transformer", "dualinputs2ttransformer_base" +) +def dualinputs2ttransformer_base(args): + args.encoder_freezing_updates = getattr(args, "encoder_freezing_updates", 0) + # Convolutional subsampler + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5") + args.conv_channels = getattr(args, "conv_channels", 1024) + # Transformer + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_text_embed_dim = getattr( + args, "encoder_text_embed_dim", args.encoder_embed_dim + ) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 10) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.encoder_shared_layers = getattr(args, "encoder_shared_layers", 0) + args.decoder_layers = getattr(args, "decoder_layers", 6) + + args.add_speech_eos = getattr(args, "add_speech_eos", False) + + +@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_s") +def dualinputs2ttransformer_s(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 7) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 7) + args.decoder_layers = getattr(args, "decoder_layers", 7) + dualinputs2ttransformer_base(args) + + +@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_m") +def dualinputs2ttransformer_m(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.dropout = getattr(args, "dropout", 0.15) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 10) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.decoder_layers = getattr(args, "decoder_layers", 6) + dualinputs2ttransformer_base(args) + + +@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_b") +def dualinputs2ttransformer_b(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 768 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12) + args.dropout = getattr(args, "dropout", 0.15) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.decoder_layers = getattr(args, "decoder_layers", 6) + dualinputs2ttransformer_base(args) + + +@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_l") +def dualinputs2ttransformer_l(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.2) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.decoder_layers = getattr(args, "decoder_layers", 6) + dualinputs2ttransformer_base(args) diff --git a/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputwavtransformer.py b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputwavtransformer.py new file mode 100644 index 0000000..66e4b3f --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputwavtransformer.py @@ -0,0 +1,526 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from collections import OrderedDict, namedtuple + +import torch.nn as nn + +from fairseq import checkpoint_utils, utils +from fairseq.checkpoint_utils import load_checkpoint_to_cpu +from fairseq.file_io import PathManager +from fairseq.models import register_model, register_model_architecture +from fairseq.models.speech_to_text import ( + SpeechWavTransformerEncoder, + StackedSpeechWavTransformerEncoder, + TransformerDecoder, +) +from fairseq.models.transformer import TransformerEncoder + +from .s2t_dualinputtransformer import ( + DualInputEncoder, + DualInputS2TTransformerModel, + TransformerMultiInputDecoder, +) + +logger = logging.getLogger(__name__) + + +@register_model("dual_input_wav_transformer") +class DualInputWavTransformerModel(DualInputS2TTransformerModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + def add_transformer_args(parser): + # We can't use TransformerModel.add_args(parser), since it defines max-source-positions which is duplicated with tasks/speech_to_text.py + # Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + + parser.add_argument( + "--encoder-learned-pos", + action="store_true", + help="use learned positional embeddings", + ) + parser.add_argument( + "--decoder-learned-pos", + action="store_true", + help="use learned positional embeddings", + ) + + add_transformer_args(parser) + SpeechWavTransformerEncoder.add_args(parser) + parser.add_argument( + "--load-pretrained-speech-text-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained speech text encoder from SpeechTextPreTrainModel """, + ) + parser.add_argument( + "--load-pretrained-wav2vec-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained speech text encoder from wav2vec """, + ) + + parser.add_argument( + "--load-pretrained-speech-text-decoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained speech text decoder from SpeechTextPreTrainModel """, + ) + parser.add_argument( + "--load-pretrained-text-decoder", + type=str, + default="", + metavar="EXPR", + help=""" path to the pretrained text decoder """, + ) + parser.add_argument( + "--load-init-encoder", + type=str, + default="", + metavar="EXPR", + help=""" path to load seed encoder model """, + ) + parser.add_argument( + "--load-init-decoder", + type=str, + default="", + metavar="EXPR", + help=""" path to load seed decoder model """, + ) + + parser.add_argument( + "--text-input-cost-ratio", + type=float, + default=1.0, + metavar="V", + help="text input cost ratio relative to speech input cost", + ) + parser.add_argument( + "--enc-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc1 and enc2 gradient by V", + ) + parser.add_argument( + "--enc2-along-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc2 gradient by V if only enc2 is used", + ) + parser.add_argument( + "--no-strict-check-pretrain-model", + action="store_true", + help="Don't apply strict model check for the pretrained model", + ) + + parser.add_argument( + "--stacked-encoder", + action="store_true", + help="stack speech and text encoders", + ) + + @classmethod + def update_transformer_encoder_cfg(cls, args, update_dict): + cfg = dict(args._get_kwargs()) + for fkey in update_dict.keys(): + cfg[fkey] = update_dict[fkey] + cfg.pop("_name", None) # remove keys start with _ + model_args = namedtuple("args", cfg.keys())(*cfg.values()) + return model_args + + @classmethod + def build_text_encoder(cls, args, src_dictionary): + enc_emb = nn.Embedding( + len(src_dictionary), args.encoder_embed_dim, src_dictionary.pad() + ) + model_args = cls.update_transformer_encoder_cfg( + args, + { + "encoder_layers": args.text_encoder_layers, + "max_source_positions": args.max_positions_text, + }, + ) + text_encoder = TransformerEncoder(model_args, src_dictionary, enc_emb) + return text_encoder + + @classmethod + def build_speech_encoder(cls, args): + model_args = cls.update_transformer_encoder_cfg( + args, {"encoder_layers": args.speech_encoder_layers} + ) + speech_encoder = SpeechWavTransformerEncoder(model_args) + return speech_encoder + + @classmethod + def check_args(cls, condition, is_strict, msg): + if condition: + return + if is_strict: + raise ValueError(msg) + logger.warn(msg) + + @classmethod + def build_encoder(cls, args, task): + # text_encoder = cls.build_text_encoder(args, task.source_dictionary ) + text_encoder = cls.build_text_encoder(args, task.src_dict) + speech_encoder = cls.build_speech_encoder(args) + if args.load_pretrained_wav2vec_encoder: + component_pairs = ( + ("feature_extractor", speech_encoder.subsample), + ("post_extract_proj", speech_encoder.feat_proj), + ("layer_norm", speech_encoder.feat_layer_norm), + ("encoder.pos_conv", speech_encoder.embed_positions), + ("encoder.layers", speech_encoder.layers), + ("encoder.layer_norm", speech_encoder.layer_norm), + ("mask_emb", speech_encoder.mask_emb), + ) + state = cls.load_pretrained_speech_text_components( + args.load_pretrained_wav2vec_encoder, component_pairs + ) + cls.check_args( + args.encoder_normalize_before + == state["cfg"]["model"]["layer_norm_first"], + not args.no_strict_check_pretrain_model, + f"encoder_normalize_before {args.encoder_normalize_before} doesn't match with the pretrained model", + ) + cls.check_args( + args.activation_fn == state["cfg"]["model"]["activation_fn"], + not args.no_strict_check_pretrain_model, + f"activation_fn {args.activation_fn} doesn't match with the pretrained model", + ) + + if getattr(args, "stacked_encoder", False): + if args.encoder_shared_text_layers_from_begin > 0: + raise ValueError( + "We can not stack encoders and share encoders at the same time!" + ) + speech_encoder = StackedSpeechWavTransformerEncoder( + speech_encoder, text_encoder.layers, text_encoder.layer_norm + ) + else: + cls.share_speech_text_encoder( + speech_encoder, text_encoder, args.encoder_shared_text_layers_from_begin + ) + + cross_attentive_loss_before_last_layer = ( + 0 if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else -1 + ) + encoder = DualInputEncoder( + args, + speech_encoder, + text_encoder, + task.src_dict, + cross_attentive_loss_before_last_layer, + ) + if args.load_pretrained_speech_text_encoder: + component_pairs = ( + ("encoder.sup_s2s_speech_encoder", encoder.spch_encoder), + ("encoder.text_encoder", encoder.text_encoder), + ) + cls.load_pretrained_speech_text_components( + args.load_pretrained_speech_text_encoder, component_pairs + ) + if getattr(args, "load_init_encoder", "") != "": + checkpoint_utils.load_pretrained_component_from_model( + encoder, args.load_init_encoder + ) + return encoder + + @classmethod + def build_text_decoder(cls, args, tgt_dictionary, dec_emb_share=None): + dec_emb = ( + nn.Embedding( + len(tgt_dictionary), args.decoder_embed_dim, tgt_dictionary.pad() + ) + if dec_emb_share is None + else dec_emb_share + ) + text_decoder = TransformerDecoder(args, tgt_dictionary, dec_emb) + return text_decoder + + @classmethod + def build_decoder(cls, args, task): + text_decoder = cls.build_text_decoder(args, task.target_dictionary) + compute_cross_attentive_loss = ( + True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False + ) + cross_attentive_loss_without_norm = getattr( + args, "attentive_cost_without_normalize", False + ) + cross_attentive_loss_reverse = ( + False # getattr(args, "attentive_cost_reverse", False) + ) + if getattr(args, "load_pretrained_text_decoder", "") != "": + checkpoint_utils.load_pretrained_component_from_model( + text_decoder, args.load_pretrained_text_decoder + ) + + if args.load_pretrained_speech_text_decoder: + component_pairs = (("decoder.text_decoder", text_decoder),) + cls.load_pretrained_speech_text_components( + args.load_pretrained_speech_text_decoder, component_pairs + ) + + decoder = TransformerMultiInputDecoder( + dictionary=task.target_dictionary, + spch_decoder=text_decoder, + text_decoder=text_decoder, + compute_cross_attentive_loss=compute_cross_attentive_loss, + cross_attentive_loss_with_norm=True + if not cross_attentive_loss_without_norm + else False, + cross_attentive_loss_reverse=cross_attentive_loss_reverse, + ) + if getattr(args, "load_init_decoder", "") != "": + checkpoint_utils.load_pretrained_component_from_model( + decoder, args.load_init_decoder + ) + return decoder + + @classmethod + def load_pretrained_speech_text_components(cls, checkpoint, component_pairs): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = load_checkpoint_to_cpu(checkpoint) + for component_type, component in component_pairs: + if isinstance(component, nn.parameter.Parameter): + component.data.copy_(state["model"][component_type]) + else: + component_state_dict = OrderedDict() + for key in state["model"].keys(): + if key.startswith(component_type): + component_subkey = key[len(component_type) + 1 :] + component_state_dict[component_subkey] = state["model"][key] + component.load_state_dict(component_state_dict, strict=True) + return state + + @classmethod + def share_speech_text_encoder( + cls, speech_encoder, text_encoder, shared_layers_from_begin + ): + if shared_layers_from_begin > 0: + num_text_encoder_layers = len(text_encoder.layers) + assert len(speech_encoder.layers) >= shared_layers_from_begin + assert num_text_encoder_layers >= shared_layers_from_begin + assert len(speech_encoder.layers) >= num_text_encoder_layers + for i, ly in enumerate( + speech_encoder.layers[ + -num_text_encoder_layers : -num_text_encoder_layers + + shared_layers_from_begin + ] + ): + assert isinstance(text_encoder.layers[i], type(ly)) + text_encoder.layers[i] = ly + + +@register_model_architecture( + "dual_input_wav_transformer", "dualinputs2twavtransformer_base" +) +def dualinputs2twavtransformer_base(args): + # speech masking + args.dropout_input = getattr(args, "dropout_input", 0) + args.dropout_features = getattr(args, "dropout_features", 0) + args.speech_mask_length = getattr(args, "speech_mask_length", 10) + args.speech_mask_prob = getattr(args, "speech_mask_prob", 0.65) + args.speech_mask_selection = getattr(args, "speech_mask_selection", "static") + args.speech_mask_other = getattr(args, "speech_mask_other", 0) + args.speech_mask_min_space = getattr(args, "speech_mask_min_space", 1) + args.speech_no_mask_overlap = getattr(args, "speech_no_mask_overlap", False) + args.speech_conv_bias = getattr(args, "speech_conv_bias", False) + args.speech_extractor_mode = getattr(args, "speech_extractor_mode", "default") + args.no_strict_check_pretrain_model = getattr( + args, "no_strict_check_pretrain_model", False + ) + + args.speech_mask_channel_length = getattr(args, "speech_mask_channel_length", 10) + args.speech_mask_channel_prob = getattr(args, "speech_mask_channel_prob", 0.0) + args.speech_mask_channel_selection = getattr( + args, "speech_mask_channel_selection", "static" + ) + args.speech_mask_channel_other = getattr(args, "speech_mask_channel_other", 0) + args.speech_mask_channel_min_space = getattr( + args, "speech_mask_channel_min_space", 1 + ) + args.speech_no_mask_channel_overlap = getattr( + args, "speech_no_mask_channel_overlap", False + ) + args.no_scale_feature = getattr(args, "", False) + args.feature_grad_mult = getattr(args, "feature_grad_mult", 0.0) # 0.1 + + # Transformer + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = getattr( + args, "encoder_ffn_embed_dim", args.encoder_embed_dim * 4 + ) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0.1) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_attention_heads = getattr( + args, "decoder_attention_heads", args.encoder_attention_heads + ) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0) + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "relu") # gelu? + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 6 + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + + +@register_model_architecture( + "dual_input_wav_transformer", "dualinputs2twavtransformer_base_stack" +) +def dualinputs2twavtransformer_base_stack(args): + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 6) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 6) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 0 + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.stacked_encoder = getattr(args, "stacked_encoder", True) + args.layernorm_embedding = getattr(args, "layernorm_embedding", True) + dualinputs2twavtransformer_base(args) + + +@register_model_architecture( + "dual_input_wav_transformer", "dualinputs2twavtransformer_large" +) +def dualinputs2twavtransformer_large(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 24) + args.text_encoder_layers = getattr(args, "text_encoder_layers", 12) + args.encoder_shared_text_layers_from_begin = getattr( + args, "encoder_shared_text_layers_from_begin", 12 + ) + args.decoder_layers = getattr(args, "decoder_layers", 12) + dualinputs2twavtransformer_base(args) diff --git a/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py new file mode 100644 index 0000000..7b4cbb0 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py @@ -0,0 +1,584 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy + +import torch.nn as nn +from fairseq import checkpoint_utils +from fairseq import utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + register_model, + register_model_architecture, + FairseqEncoder, +) +from fairseq.models.speech_to_text import Wav2VecEncoderWithAdaptor +from fairseq.models.speech_to_text.xm_transformer import ( + set_default_adaptor_args, + set_default_w2v_encoder_args, + need_finetuning +) +from fairseq.models.transformer import TransformerEncoder, TransformerDecoder +from fairseq.models.wav2vec import TransformerSentenceEncoderLayer +from fairseq.utils import safe_hasattr + +from .s2t_dualinputtransformer import ( + DualInputS2TTransformerModel, + TransformerMultiInputDecoder, + DualInputEncoder, +) + + +class TransformerSentenceEncoderLayerStd(TransformerSentenceEncoderLayer): + def __init__(self, sent_enc_layer): + super(TransformerSentenceEncoderLayer, self).__init__() + self.embedding_dim = sent_enc_layer.embedding_dim + self.dropout = sent_enc_layer.dropout + self.activation_dropout = sent_enc_layer.activation_dropout + + # Initialize blocks + self.activation_fn = sent_enc_layer.activation_fn + self.self_attn = sent_enc_layer.self_attn + + self.dropout1 = sent_enc_layer.dropout1 + self.dropout2 = sent_enc_layer.dropout2 + self.dropout3 = sent_enc_layer.dropout3 + + self.layer_norm_first = sent_enc_layer.layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = sent_enc_layer.self_attn_layer_norm + self.fc1 = sent_enc_layer.fc1 + self.fc2 = sent_enc_layer.fc2 + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = sent_enc_layer.final_layer_norm + + def forward( + self, + x, + self_attn_mask=None, + self_attn_padding_mask=None, + need_weights=None, + att_args=None, + ): + x, attn = super().forward( + x, self_attn_mask, self_attn_padding_mask, need_weights, att_args + ) + return x + + +# TODO retire SharedEncoder +class SharedEncoder(FairseqEncoder): + def __init__(self, wav2vec_enc, mbart_enc, adaptor, shared_layers): + super().__init__(None) + self.w2v_encoder = wav2vec_enc + self.shared_layers = self.w2v_encoder.w2v_model.encoder.layers[-shared_layers:] + self.w2v_encoder.w2v_model.encoder.layers = ( + self.w2v_encoder.w2v_model.encoder.layers[:-shared_layers] + ) + self.adaptor = adaptor + if self.shared_layers[-1].layer_norm_first: + self.final_layer_norm = mbart_enc.layer_norm + else: + mbart_enc.layer_norm = None + self.final_layer_norm = None + shared_layer_from = len(mbart_enc.layers) - shared_layers + if shared_layer_from < 0: + shared_layer_from = 0 + for layer_id, layer in enumerate(self.shared_layers): + mbart_enc.layers[ + shared_layer_from + layer_id + ] = TransformerSentenceEncoderLayerStd(layer) + + def forward(self, src_tokens, src_lengths=None, **kwargs): + padding_mask = lengths_to_padding_mask(src_lengths) + if not padding_mask.any(): + padding_mask = None + + out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True) + x = out["encoder_out"] + enc_padding_mask = None + if out["encoder_padding_mask"] is not None: + enc_padding_mask = out["encoder_padding_mask"].transpose( + 0, 1 + ) # T X B --> B X T + + x, enc_padding_mask = self.adaptor(x, enc_padding_mask) + for layer in self.shared_layers: + x, _ = layer(x, enc_padding_mask) + if self.final_layer_norm is not None: + x = self.final_layer_norm(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [enc_padding_mask] + if enc_padding_mask is not None + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": [], # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + +class StackedWav2VecEncoderWithAdaptor(FairseqEncoder): + def __init__( + self, + wav2vec_enc, + mbart_enc_layers, + mbart_layer_norm, + adaptor, + drop_w2v_layers=0, + ): + super().__init__(None) + self.w2v_encoder = wav2vec_enc + self.adaptor = adaptor + self.mbart_encoder_layers = mbart_enc_layers + self.final_layer_norm = mbart_layer_norm + if drop_w2v_layers > 0: + self.w2v_encoder.w2v_model.encoder.layers = ( + self.w2v_encoder.w2v_model.encoder.layers[:-drop_w2v_layers] + ) + + def forward(self, src_tokens, src_lengths=None, return_all_hiddens=False, **kwargs): + padding_mask = lengths_to_padding_mask(src_lengths) + if not padding_mask.any(): + padding_mask = None + + out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True) + x = out["encoder_out"] + enc_padding_mask = None + if out["padding_mask"] is not None: + enc_padding_mask = out["padding_mask"] # B X T + + x, enc_padding_mask = self.adaptor(x, enc_padding_mask) + encoder_states = [] + for layer in self.mbart_encoder_layers: + x = layer(x, enc_padding_mask) + if return_all_hiddens: + encoder_states.append(x) + if self.final_layer_norm is not None: + x = self.final_layer_norm(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [enc_padding_mask] + if enc_padding_mask is not None + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + def reorder_encoder_out(self, encoder_out, new_order): + new_encoder_out = ( + [] + if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + + new_encoder_padding_mask = ( + [] + if len(encoder_out["encoder_padding_mask"]) == 0 + else [ + x.index_select(0, new_order) + for x in encoder_out["encoder_padding_mask"] + ] + ) + + new_encoder_embedding = ( + [] + if len(encoder_out["encoder_embedding"]) == 0 + else [ + x.index_select(0, new_order) for x in encoder_out["encoder_embedding"] + ] + ) + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], # B x T + "src_lengths": [], # B x 1 + } + + +# Note: +# dual input transformer: +# encoder: wav2vec for speech + mbart encoder for text +# decoder: mbart decoder for text +@register_model("dual_input_xm_transformer") +class DualInputXMTransformerModel(DualInputS2TTransformerModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # wav2vec encoder + Wav2VecEncoderWithAdaptor.add_args(parser) + # add_decoder_args(parser) + # mbart Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + + parser.add_argument( + "--mbart-dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--mbart-attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--mbart-activation-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-mbart-from", + type=str, + metavar="STR", + help="model to take text encoder decoder weights from (for initialization)", + ) + # parser.add_argument("--finetune-w2v-params", type=str, metavar="STR", + # help="comma-separated param strings to finetune.") + parser.add_argument( + "--finetune-mbart-decoder-params", + type=str, + metavar="STR", + help="comma-separated param strings to finetune.", + ) + parser.add_argument( + "--finetune-mbart-encoder-params", + type=str, + metavar="STR", + help="comma-separated param strings to finetune.", + ) + parser.add_argument( + "--skip-encoder-projection", + action="store_true", + help="skip the projection layer in encoder", + ) + + parser.add_argument( + "--enc-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc1 and enc2 gradient by V", + ) + parser.add_argument( + "--enc2-along-grad-mult", + type=float, + metavar="V", + default=1.0, + help="multiply enc2 gradient by V if only enc2 is used", + ) + parser.add_argument( + "--text-input-cost-ratio", + type=float, + default=1.0, + metavar="V", + help="text input cost ratio relative to speech input cost", + ) + parser.add_argument( + "--stack-w2v-mbart-encoder", + action="store_true", + help="stack w2v and mbart encoder", + ) + parser.add_argument( + "--stack-w2v-mbart-nonorm-encoder", + action="store_true", + help="stack w2v and mbart encoder", + ) + parser.add_argument( + "--no-final-norm-decoder", action="store_true", help="no layer norm" + ) + parser.add_argument( + "--drop-w2v-layers", + type=int, + default=0, + metavar="N", + help="drop w2v encoder layers", + ) + + parser.add_argument( + "--share-w2v-text-encoder", + action="store_true", + help="share w2v encoder layers with text encoder", + ) + parser.add_argument( + "--shared-w2v-layers", + type=int, + default=0, + metavar="N", + help="shared encoder layers from w2v encoder", + ) + + @classmethod + def build_encoder(cls, args, task): + _args = copy.deepcopy(args) + _args.dropout = args.mbart_dropout + _args.attention_dropout = args.mbart_attention_dropout + _args.activation_dropout = args.mbart_activation_dropout + _args.max_source_positions = 1024 + enc_emb = nn.Embedding( + len(task.src_dict), _args.encoder_embed_dim, task.src_dict.pad() + ) + text_encoder = TransformerEncoder(_args, task.src_dict, enc_emb) + spch_encoder = Wav2VecEncoderWithAdaptor(args) + if getattr(args, "load_pretrained_mbart_from", None): + text_encoder = checkpoint_utils.load_pretrained_component_from_model( + component=text_encoder, checkpoint=args.load_pretrained_mbart_from + ) + if getattr(args, "stack_w2v_mbart_encoder", False): + assert getattr(args, "share_w2v_text_encoder", False) is False + spch_encoder = StackedWav2VecEncoderWithAdaptor( + spch_encoder.w2v_encoder, + text_encoder.layers, + text_encoder.layer_norm, + spch_encoder.adaptor, + args.drop_w2v_layers, + ) + elif getattr(args, "stack_w2v_mbart_nonorm_encoder", False): + text_encoder.layer_norm = None + spch_encoder = StackedWav2VecEncoderWithAdaptor( + spch_encoder.w2v_encoder, + text_encoder.layers, + text_encoder.layer_norm, + spch_encoder.adaptor, + args.drop_w2v_layers, + ) + elif getattr(args, "share_w2v_text_encoder", False): + spch_encoder = SharedEncoder( + spch_encoder.w2v_encoder, + text_encoder, + spch_encoder.adaptor, + args.shared_w2v_layers, + ) + + for k, p in spch_encoder.named_parameters(): + # Freeze pretrained models by default + if safe_hasattr( + args, "finetune_w2v_params" + ) and need_finetuning(args.finetune_w2v_params, k): + p.requires_grad = True + else: + p.requires_grad = False + for k, p in text_encoder.named_parameters(): + # Freeze pretrained models by default + if safe_hasattr( + args, "finetune_mbart_encoder_params" + ) and need_finetuning( + args.finetune_mbart_encoder_params, k + ): + p.requires_grad = True + else: + p.requires_grad = False + cross_attentive_loss_before_last_layer = ( + 0 if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else -1 + ) + encoder = DualInputEncoder( + args, + spch_encoder, + text_encoder, + task.src_dict, + cross_attentive_loss_before_last_layer, + ) + return encoder + + @classmethod + def build_decoder(cls, args, task): + _args = copy.deepcopy(args) + _args.dropout = args.mbart_dropout + _args.attention_dropout = args.mbart_attention_dropout + _args.activation_dropout = args.mbart_activation_dropout + _args.max_target_positions = 1024 + dec_emb = nn.Embedding( + len(task.tgt_dict), _args.encoder_embed_dim, task.tgt_dict.pad() + ) + decoder = TransformerDecoder(_args, task.tgt_dict, dec_emb) + if getattr(args, "load_pretrained_mbart_from", None): + decoder = checkpoint_utils.load_pretrained_component_from_model( + component=decoder, checkpoint=args.load_pretrained_mbart_from + ) + if getattr(args, "no_final_norm_decoder", False): + decoder.layer_norm = None + for k, p in decoder.named_parameters(): + # Freeze pretrained models by default + if safe_hasattr( + args, "finetune_mbart_decoder_params" + ) and need_finetuning( + args.finetune_mbart_decoder_params, k + ): + p.requires_grad = True + else: + p.requires_grad = False + + compute_cross_attentive_loss = ( + True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False + ) + cross_attentive_loss_without_norm = getattr( + args, "attentive_cost_without_normalize", False + ) + cross_attentive_loss_reverse = ( + False # getattr(args, "attentive_cost_reverse", False) + ) + decoder = TransformerMultiInputDecoder( + dictionary=task.target_dictionary, + spch_decoder=decoder, + text_decoder=decoder, + compute_cross_attentive_loss=compute_cross_attentive_loss, + cross_attentive_loss_with_norm=True + if not cross_attentive_loss_without_norm + else False, + cross_attentive_loss_reverse=cross_attentive_loss_reverse, + ) + return decoder + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted + # (in case there are any new ones) + dualinputxmtransformer_base(args) + + encoder = cls.build_encoder(args, task) + decoder = cls.build_decoder(args, task) + return cls(encoder, decoder) + + +@register_model_architecture("dual_input_xm_transformer", "dualinputxmtransformer_base") +def dualinputxmtransformer_base(args): + # wav2vec encoder + set_default_w2v_encoder_args(args) + set_default_adaptor_args(args) + + # mbart model + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr( + args, "encoder_ffn_embed_dim", 4 * args.encoder_embed_dim + ) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True) + + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4 * 1024) + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", True) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + + args.adaptive_input = getattr(args, "adaptive_input", False) + + args.mbart_attention_dropout = getattr(args, "mbart_attention_dropout", 0.0) + args.mbart_activation_dropout = getattr(args, "mbart_activation_dropout", 0.0) + args.mbart_dropout = getattr(args, "mbart_dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", True + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + args.layernorm_embedding = getattr(args, "layernorm_embedding", True) + + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.pooler_dropout = getattr(args, "pooler_dropout", 0.0) diff --git a/fairseq/examples/speech_text_joint_to_text/scripts/convert_model.py b/fairseq/examples/speech_text_joint_to_text/scripts/convert_model.py new file mode 100644 index 0000000..4923af1 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/scripts/convert_model.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import re +from collections import OrderedDict + +import torch + +from fairseq.file_io import PathManager + + +def is_update(param_name, module_name): + if module_name in param_name: + return True + return False + + +def load_checkpoint(src_cpt): + + with PathManager.open(src_cpt, "rb") as f: + state_src = torch.load( + f, + map_location=( + lambda s, _: torch.serialization.default_restore_location(s, "cpu") + ), + ) + + return state_src + + +def save_checkpoint(tgt_cpt, states): + + with PathManager.open(tgt_cpt, "wb") as f: + torch.save( + states, + f, + ) + + +# convert the pre-trained model into bart model +def main(): + parser = argparse.ArgumentParser() + # fmt: off + parser.add_argument('--input-model', required=True, + help='Input checkpoint file path.') + parser.add_argument('--output-model', required=True, + help='output checkpoint file path.') + # fmt: on + args = parser.parse_args() + print(args) + + states = load_checkpoint(args.input_model) + model = states["model"] + new_model = OrderedDict() + for key in model.keys(): + if re.search("^encoder.text_encoder", key): + new_key = re.sub("encoder.text_encoder", "encoder", key) + new_model[new_key] = model[key] + elif re.search("^decoder.text_decoder", key): + new_key = re.sub("decoder.text_decoder", "decoder", key) + new_model[new_key] = model[key] + states["model"] = new_model + save_checkpoint(args.output_model, states) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py b/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py new file mode 100644 index 0000000..9db7793 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py @@ -0,0 +1,191 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import itertools +import logging +import re +import time + +from g2p_en import G2p + +logger = logging.getLogger(__name__) + +FAIL_SENT = "FAILED_SENTENCE" + + +def parse(): + parser = argparse.ArgumentParser() + parser.add_argument("--data-path", type=str, required=True) + parser.add_argument("--out-path", type=str, required=True) + parser.add_argument("--lower-case", action="store_true") + parser.add_argument("--do-filter", action="store_true") + parser.add_argument("--use-word-start", action="store_true") + parser.add_argument("--dup-vowel", default=1, type=int) + parser.add_argument("--dup-consonant", default=1, type=int) + parser.add_argument("--no-punc", action="store_true") + parser.add_argument("--reserve-word", type=str, default="") + parser.add_argument( + "--reserve-first-column", + action="store_true", + help="first column is sentence id", + ) + ### + parser.add_argument("--parallel-process-num", default=1, type=int) + parser.add_argument("--logdir", default="") + args = parser.parse_args() + return args + + +def process_sent(sent, g2p, res_wrds, args): + sents = pre_process_sent(sent, args.do_filter, args.lower_case, res_wrds) + pho_seqs = [do_g2p(g2p, s, res_wrds, i == 0) for i, s in enumerate(sents)] + pho_seq = ( + [FAIL_SENT] + if [FAIL_SENT] in pho_seqs + else list(itertools.chain.from_iterable(pho_seqs)) + ) + if args.no_punc: + pho_seq = remove_punc(pho_seq) + if args.dup_vowel > 1 or args.dup_consonant > 1: + pho_seq = dup_pho(pho_seq, args.dup_vowel, args.dup_consonant) + if args.use_word_start: + pho_seq = add_word_start(pho_seq) + return " ".join(pho_seq) + + +def remove_punc(sent): + ns = [] + regex = re.compile("[^a-zA-Z0-9 ]") + for p in sent: + if (not regex.search(p)) or p == FAIL_SENT: + if p == " " and (len(ns) == 0 or ns[-1] == " "): + continue + ns.append(p) + return ns + + +def do_g2p(g2p, sent, res_wrds, is_first_sent): + if sent in res_wrds: + pho_seq = [res_wrds[sent]] + else: + pho_seq = g2p(sent) + if not is_first_sent: + pho_seq = [" "] + pho_seq # add space to separate + return pho_seq + + +def pre_process_sent(sent, do_filter, lower_case, res_wrds): + if do_filter: + sent = re.sub("-", " ", sent) + sent = re.sub("—", " ", sent) + if len(res_wrds) > 0: + wrds = sent.split() + wrds = ["SPLIT_ME " + w + " SPLIT_ME" if w in res_wrds else w for w in wrds] + sents = [x.strip() for x in " ".join(wrds).split("SPLIT_ME") if x.strip() != ""] + else: + sents = [sent] + if lower_case: + sents = [s.lower() if s not in res_wrds else s for s in sents] + return sents + + +def dup_pho(sent, dup_v_num, dup_c_num): + """ + duplicate phoneme defined as cmudict + http://www.speech.cs.cmu.edu/cgi-bin/cmudict + """ + if dup_v_num == 1 and dup_c_num == 1: + return sent + ns = [] + for p in sent: + ns.append(p) + if re.search(r"\d$", p): + for i in range(1, dup_v_num): + ns.append(f"{p}-{i}P") + elif re.search(r"\w", p): + for i in range(1, dup_c_num): + ns.append(f"{p}-{i}P") + return ns + + +def add_word_start(sent): + ns = [] + do_add = True + ws = "▁" + for p in sent: + if do_add: + p = ws + p + do_add = False + if p == " ": + do_add = True + else: + ns.append(p) + return ns + + +def load_reserve_word(reserve_word): + if reserve_word == "": + return [] + with open(reserve_word, "r") as fp: + res_wrds = [x.strip().split() for x in fp.readlines() if x.strip() != ""] + assert sum([0 if len(x) == 2 else 1 for x in res_wrds]) == 0 + res_wrds = dict(res_wrds) + return res_wrds + + +def process_sents(sents, args): + g2p = G2p() + out_sents = [] + res_wrds = load_reserve_word(args.reserve_word) + for sent in sents: + col1 = "" + if args.reserve_first_column: + col1, sent = sent.split(None, 1) + sent = process_sent(sent, g2p, res_wrds, args) + if args.reserve_first_column and col1 != "": + sent = f"{col1} {sent}" + out_sents.append(sent) + return out_sents + + +def main(): + args = parse() + out_sents = [] + with open(args.data_path, "r") as fp: + sent_list = [x.strip() for x in fp.readlines()] + if args.parallel_process_num > 1: + try: + import submitit + except ImportError: + logger.warn( + "submitit is not found and only one job is used to process the data" + ) + submitit = None + + if args.parallel_process_num == 1 or submitit is None: + out_sents = process_sents(sent_list, args) + else: + # process sentences with parallel computation + lsize = len(sent_list) // args.parallel_process_num + 1 + executor = submitit.AutoExecutor(folder=args.logdir) + executor.update_parameters(timeout_min=1000, cpus_per_task=4) + jobs = [] + for i in range(args.parallel_process_num): + job = executor.submit( + process_sents, sent_list[lsize * i : lsize * (i + 1)], args + ) + jobs.append(job) + is_running = True + while is_running: + time.sleep(5) + is_running = sum([job.done() for job in jobs]) < len(jobs) + out_sents = list(itertools.chain.from_iterable([job.result() for job in jobs])) + with open(args.out_path, "w") as fp: + fp.write("\n".join(out_sents) + "\n") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py b/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py new file mode 100644 index 0000000..5fc5d9e --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/pair_denoising.py b/fairseq/examples/speech_text_joint_to_text/tasks/pair_denoising.py new file mode 100644 index 0000000..b13b1e5 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/tasks/pair_denoising.py @@ -0,0 +1,447 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import logging +import os +import re + +import numpy as np +import torch + +from examples.speech_text_joint_to_text.data.pair_denoising_dataset import ( + LanguagePairDenoisingDataset, +) +from fairseq import utils +from fairseq.data import ( + ConcatDataset, + Dictionary, + LanguagePairDataset, + ResamplingDataset, + TransformEosConcatLangPairDataset, + TransformEosLangPairDataset, + data_utils, + indexed_dataset, +) +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.tasks import register_task +from fairseq.tasks.translation import TranslationTask + +logger = logging.getLogger(__name__) + + +def gen_whole_word_mask(args, dictionary): + def is_beginning_of_word(i): + if i < dictionary.nspecial: + # special elements are always considered beginnings + return True + tok = dictionary[i] + if tok.startswith("madeupword"): + return True + + if tok in ["<unk>", "<s>", "</s>", "<pad>"]: + return True + return tok.startswith("\u2581") + + if args.use_mask_whole_words: + mask_whole_words = torch.ByteTensor( + list(map(is_beginning_of_word, range(len(dictionary)))) + ) + else: + # it will mask every token as word leading token, since no bpe model is loaded for phoneme tokens + return get_whole_word_mask(args, dictionary) + return mask_whole_words + + +@register_task("paired_denoising") +class PairedDenoisingTask(TranslationTask): + + LANG_TAG_TEMPLATE = "<lang:{}>" # Tag for language (target) + + @staticmethod + def add_args(parser): + TranslationTask.add_args(parser) + # bart setting + parser.add_argument( + "--mask", + default=0.0, + type=float, + help="fraction of words/subwords that will be masked", + ) + parser.add_argument( + "--mask-random", + default=0.0, + type=float, + help="instead of using [MASK], use random token this often", + ) + parser.add_argument( + "--insert", + default=0.0, + type=float, + help="insert this percentage of additional random tokens", + ) + parser.add_argument( + "--poisson-lambda", + default=3.0, + type=float, + help="randomly shuffle sentences for this proportion of inputs", + ) + parser.add_argument( + "--mask-length", + default="span-poisson", + type=str, + choices=["subword", "word", "span-poisson"], + help="mask length to choose", + ) + parser.add_argument( + "--replace-length", + default=1, + type=int, + help="when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + ) + + # multi-lingual + parser.add_argument( + "--multilang-sampling-alpha", + type=float, + default=1.0, + help="smoothing alpha for sample ratios across multiple datasets", + ) + parser.add_argument( + "--lang-pairs", + default="", + metavar="PAIRS", + help="comma-separated list of language pairs (in training order): phnen-en,phnfr-fr,phnit-it. Do masking", + ) + parser.add_argument( + "--lang-pairs-bitext", + default="", + metavar="PAIRS", + help="comma-separated list of language pairs (in training order): en-de,en-fr,de-fr. No masking", + ) + parser.add_argument("--add-src-lang-token", default=False, action="store_true") + parser.add_argument("--add-tgt-lang-token", default=False, action="store_true") + parser.add_argument( + "--no-whole-word-mask-langs", + type=str, + default="", + metavar="N", + help="languages without spacing between words dont support whole word masking", + ) + parser.add_argument( + "--use-mask-whole-words", default=False, action="store_true" + ) + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task.""" + paths = args.data.split(":") + assert len(paths) > 0 + src_dict = Dictionary.load( + os.path.join(paths[0], "src_dict.txt") + ) # assume all languages share a source dictionary + tgt_dict = Dictionary.load( + os.path.join(paths[0], "tgt_dict.txt") + ) # assume all languages share a target dictionary + + lang_pairs = args.lang_pairs + "," + args.lang_pairs_bitext + lang_pairs = re.sub(",$", "", re.sub("^,", "", lang_pairs)) + src_langs = [lp.split("-")[0] for lp in lang_pairs.split(",")] + tgt_langs = [lp.split("-")[1] for lp in lang_pairs.split(",")] + + if args.add_src_lang_token: + for lang in src_langs: + assert ( + src_dict.index(PairedDenoisingTask.LANG_TAG_TEMPLATE.format(lang)) + != src_dict.unk() + ) + if args.add_tgt_lang_token: + for lang in tgt_langs: + assert ( + tgt_dict.index(PairedDenoisingTask.LANG_TAG_TEMPLATE.format(lang)) + != tgt_dict.unk() + ) + + logger.info("source dictionary: {} types".format(len(src_dict))) + logger.info("target dictionary: {} types".format(len(tgt_dict))) + if not hasattr(args, "shuffle_instance"): + args.shuffle_instance = False + return cls(args, src_dict, tgt_dict) + + def __init__(self, args, src_dict, tgt_dict): + super().__init__(args, src_dict, tgt_dict) + # check mask token + self.mask_idx = self.src_dict.index("<mask>") + assert self.mask_idx != self.src_dict.unk() + self.lang_pairs = args.lang_pairs + self.lang_pairs_bitext = args.lang_pairs_bitext + self.args = args + + @classmethod + def language_pair_denoising_dataset( + cls, + data_path, + do_mask, + split, + src, + src_dict, + tgt, + tgt_dict, + mask_idx, + mask_whole_words, + seed, + args, + dataset_impl, + combine=False, + left_pad_source=True, + left_pad_target=False, + max_source_positions=1024, + max_target_positions=1024, + shuffle=True, + src_lang_id=None, + tgt_lang_id=None, + ): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join( + data_path, "{}.{}-{}.{}".format(split, src, tgt, lang) + ) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + eos = None + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + if not do_mask: + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + eos=eos, + shuffle=shuffle, + src_lang_id=src_lang_id, + tgt_lang_id=tgt_lang_id, + ) + + return LanguagePairDenoisingDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + mask_idx, + mask_whole_words, + seed, + args, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + eos=eos, + shuffle=shuffle, + src_lang_id=src_lang_id, + tgt_lang_id=tgt_lang_id, + ) + + def _get_sample_prob(self, dataset_lens): + """ + Get smoothed sampling porbability by languages. This helps low resource + languages by upsampling them. + """ + prob = dataset_lens / dataset_lens.sum() + smoothed_prob = prob ** self.args.multilang_sampling_alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + return smoothed_prob + + def resample_datasets(self, lang_datasets, lang_pairs_all, epoch): + # For train subset, additionally up or down sample languages. + if self.args.multilang_sampling_alpha == 1.0: + return lang_datasets + + dataset_lengths = np.array( + [len(d) for d in lang_datasets], + dtype=float, + ) + sample_probs = self._get_sample_prob(dataset_lengths) + logger.info( + "Sample probability by language pair: {}".format( + { + lp: "{0:.4f}".format(sample_probs[id]) + for id, lp in enumerate(lang_pairs_all) + } + ) + ) + size_ratio = (sample_probs * dataset_lengths.sum()) / dataset_lengths + logger.info( + "Up/Down Sampling ratio by language: {}".format( + { + lp: "{0:.2f}".format(size_ratio[id]) + for id, lp in enumerate(lang_pairs_all) + } + ) + ) + + resampled_lang_datasets = [ + ResamplingDataset( + lang_datasets[i], + size_ratio=size_ratio[i], + seed=self.args.seed, + epoch=epoch, + replace=size_ratio[i] >= 1.0, + ) + for i, d in enumerate(lang_datasets) + ] + return resampled_lang_datasets + + def load_dataset_only( + self, split, lang_pairs, do_mask=True, epoch=1, combine=False + ): + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + # TODO unk token will be considered as first word too, though it might be an unknown phoneme within a word + # get_whole_word_mask returns a tensor (size V by 1 ) to indicate if a token is a word start token + mask_whole_src_words = gen_whole_word_mask(self.args, self.src_dict) + language_without_segmentations = self.args.no_whole_word_mask_langs.split(",") + lang_datasets = [] + eos_bos = [] + lang_pairs = lang_pairs.split(",") if lang_pairs != "" else [] + assert len(lang_pairs) > 0 + for lp in lang_pairs: + src, tgt = lp.split("-") + lang_mask_whole_src_words = ( + mask_whole_src_words + if src not in language_without_segmentations + else None + ) + + end_token = ( + self.source_dictionary.index( + PairedDenoisingTask.LANG_TAG_TEMPLATE.format(src) + ) + if self.args.add_src_lang_token + else None + ) + bos_token = ( + self.target_dictionary.index( + PairedDenoisingTask.LANG_TAG_TEMPLATE.format(tgt) + ) + if self.args.add_tgt_lang_token + else None + ) + src_lang_id = None + + if self.args.add_src_lang_token or self.args.add_tgt_lang_token: + eos_bos.append((end_token, bos_token)) + + dataset = PairedDenoisingTask.language_pair_denoising_dataset( + data_path, + do_mask, + split, + src, + self.source_dictionary, + tgt, + self.target_dictionary, + self.mask_idx, + lang_mask_whole_src_words, + self.args.seed, + self.args, + self.args.dataset_impl, + combine=combine, + left_pad_source=utils.eval_bool(self.args.left_pad_source), + left_pad_target=utils.eval_bool(self.args.left_pad_target), + max_source_positions=self.args.max_source_positions, + max_target_positions=self.args.max_target_positions, + src_lang_id=src_lang_id, + ) + + lang_datasets.append(dataset) + + if len(lang_datasets) == 0: + return + elif len(lang_datasets) == 1: + dataset = lang_datasets[0] + if self.args.add_src_lang_token or self.args.add_tgt_lang_token: + end_token, bos_token = eos_bos[0] + dataset = TransformEosLangPairDataset( + dataset, + src_eos=self.source_dictionary.eos(), + new_src_eos=end_token, + tgt_bos=self.target_dictionary.eos(), + new_tgt_bos=bos_token, + ) + else: + end_tokens = [item[0] for item in eos_bos if item[0] is not None] + bos_tokens = [item[1] for item in eos_bos if item[1] is not None] + lang_datasets = self.resample_datasets(lang_datasets, lang_pairs, epoch) + dataset = TransformEosConcatLangPairDataset( + lang_datasets, + self.source_dictionary.eos(), + self.target_dictionary.eos(), + new_src_eos=end_tokens, + new_tgt_bos=bos_tokens, + ) + return dataset + + # split in (train, valid, test, ...) + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + self.datasets[split] = self.load_dataset_only( + split, self.lang_pairs, epoch=epoch, combine=combine + ) diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_denoise_pretrain.py b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_denoise_pretrain.py new file mode 100644 index 0000000..3ad8e1c --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_denoise_pretrain.py @@ -0,0 +1,654 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import logging +import os +import re +from argparse import Namespace +from pathlib import Path + +from fairseq.data import ConcatDataset, Dictionary, encoders +from fairseq.data.audio.multi_modality_dataset import ( + FileAudioDatasetWrapper, + ModalityDatasetItem, + MultiModalityDataset, +) +from fairseq.data.audio.speech_to_text_joint_dataset import ( + S2TJointDataConfig, + SpeechToTextJointDatasetCreator, +) +from fairseq.data.iterators import GroupedEpochBatchIterator +from fairseq.tasks import register_task + +from .pair_denoising import PairedDenoisingTask + +logger = logging.getLogger(__name__) + + +@register_task("speech_text_joint_denoising") +class SpeechTextJointDenoisingPreTask(PairedDenoisingTask): + """ + Joint denoising training task for speech and text. + """ + + SIL_TOKEN = "sil" + + @classmethod + def add_args(cls, parser): + PairedDenoisingTask.add_args(parser) + # set max tokens and position + parser.add_argument( + "--max-text-tokens", + type=int, + metavar="N", + default=1024, + help="maximum samples for encoder text input ", + ) + parser.add_argument( + "--max-speech-tokens", + type=int, + metavar="N", + default=50000, + help="maximum samples for encoder speech input ", + ) + parser.add_argument( + "--max-speech-positions", + type=int, + metavar="N", + default=400, + help="maximum tokens for per encoder text input ", + ) + + parser.add_argument( + "--max-sample-size", + type=int, + metavar="N", + default=32000, + help="max sample size to crop to for batching (unsupervised speech) ", + ) + parser.add_argument( + "--min-sample-size", + type=int, + metavar="N", + default=4000, + help="min sample size to crop to for batching (unsupervised speech) ", + ) + + # set mini-batch ratio for different modalities/subtasks + # s2p + parser.add_argument( + "--supervised-speech-sample-ratio", + default="1", + type=str, + metavar="N", + help="Multiple Ratio for speech dataset with transcripts ", + ) + # s2t + parser.add_argument( + "--supervised-speech-s2s-sample-ratio", + default="1", + type=str, + metavar="N", + help="Multiple Ratio for speech dataset with transcripts ", + ) + # ssl + parser.add_argument( + "--unsupervised-speech-sample-ratio", + default="1", + type=str, + metavar="N", + help="Multiple Ratio for speech dataset without transcripts ", + ) + # t2t with monolingual data (masking) + parser.add_argument( + "--text-sample-ratio", + default="1", + type=str, + metavar="N", + help="Multiple Ratio for text set ", + ) + # t2t with parallel data (no masking) + parser.add_argument( + "--bitext-sample-ratio", + default="1", + type=str, + metavar="N", + help="Multiple Ratio for text set (bitext) ", + ) + # train_subset = "train", 'valid' or so + # parallel data is loaded according to string lang_pairs and lang_pairs_no_mask from args.data + # (un)supervised speech is loaded from args.(un)sup_speech_{train,valid}_subset + parser.add_argument( + "--sup-speech-data", default="", help="path to supervised speech data" + ) + parser.add_argument( + "--sup-speech-train-subset", + default="", + help="supervised speech training subsets", + ) + parser.add_argument( + "--sup-speech-valid-subset", + default="", + help="supervised speech validation subsets", + ) + parser.add_argument( + "--config-yaml", + default="config.yaml", + help="supervised speech configuration yaml file", + ) + parser.add_argument( + "--sup-speech-s2s-data", default="", help="path to supervised speech data" + ) + parser.add_argument( + "--sup-speech-s2s-train-subset", + default="", + help="supervised speech training subsets", + ) + parser.add_argument( + "--sup-speech-s2s-valid-subset", + default="", + help="supervised speech validation subsets", + ) + parser.add_argument( + "--config-s2s-yaml", + default="config.yaml", + help="supervised speech configuration yaml file", + ) + parser.add_argument( + "--unsup-speech-train-data", + default="", + help="path to unsupervised speech training data (tsv)", + ) + parser.add_argument( + "--unsup-speech-valid-data", + default="", + help="path to unsupervised speech valid data (tsv)", + ) + parser.add_argument( + "--sample-rate", + type=int, + metavar="N", + default=16000, + help="input audio sampling rate", + ) + parser.add_argument( + "--no-emb-update-unsup", + default=False, + action="store_true", + help="no update for output embedding during unsupervised_speech mode", + ) + parser.add_argument("--same-data-update", default=False, action="store_true") + + # used for sup_speech_ali + parser.add_argument( + "--use-sup-speech-ctc", + default=False, + action="store_true", + help="use speech_sup_ctc instead of speech_sup_ali", + ) + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task.""" + paths = args.data.split(":") + assert len(paths) > 0 + src_dict = Dictionary.load( + os.path.join(paths[0], "src_dict.txt") + ) # assume all languages share a source dictionary + tgt_dict = Dictionary.load( + os.path.join(paths[0], "tgt_dict.txt") + ) # assume all languages share a target dictionary + + lang_pairs = args.lang_pairs + "," + args.lang_pairs_bitext + lang_pairs = re.sub(",$", "", re.sub("^,", "", lang_pairs)) + if lang_pairs != "": + src_langs = [lp.split("-")[0] for lp in lang_pairs.split(",")] + tgt_langs = [lp.split("-")[1] for lp in lang_pairs.split(",")] + else: + src_langs = [] + tgt_langs = [] + + if args.add_src_lang_token: + for lang in src_langs: + assert ( + src_dict.index(PairedDenoisingTask.LANG_TAG_TEMPLATE.format(lang)) + != src_dict.unk() + ) + if args.add_tgt_lang_token: + for lang in tgt_langs: + assert ( + tgt_dict.index(PairedDenoisingTask.LANG_TAG_TEMPLATE.format(lang)) + != tgt_dict.unk() + ) + + logger.info("source dictionary: {} types".format(len(src_dict))) + logger.info("target dictionary: {} types".format(len(tgt_dict))) + if not hasattr(args, "shuffle_instance"): + args.shuffle_instance = False + return cls(args, src_dict, tgt_dict) + + def __init__(self, args, src_dict, tgt_dict): + super().__init__(args, src_dict, tgt_dict) + self.data_cfg = S2TJointDataConfig( + Path(args.sup_speech_data) / args.config_yaml + ) + logger.info( + f"load supervised speech data configure from {Path(args.sup_speech_data) / args.config_yaml}" + ) + self.data_s2s_cfg = ( + S2TJointDataConfig(Path(args.sup_speech_s2s_data) / args.config_s2s_yaml) + if args.sup_speech_s2s_train_subset != "" + else None + ) + if self.data_s2s_cfg is not None: + logger.info( + f"load supervised sequece to sequence speech data configure from {Path(args.sup_speech_s2s_data) / args.config_yaml}" + ) + + def parse_data_ratio(sample_ratio): + ratios = sample_ratio.split(",") + if len(ratios) == 1: + return [float(ratios[0])] + epoch_ratios = [] + for item in ratios: + ep, r = item.split(":") + ep = int(ep) + r = float(r) + assert ep > 0 # epoch is 1 based + assert ep >= len(epoch_ratios) + + if len(epoch_ratios) == 0: + epoch_ratios.append( + r + ) # epoch_ratios[0] is not used, but we still set it to the first value to make thing simple. + while len(epoch_ratios) < ep: + epoch_ratios.append(epoch_ratios[-1]) + epoch_ratios.append(r) + return epoch_ratios + + self.sup_ratio = parse_data_ratio(args.supervised_speech_sample_ratio) + self.sup_s2s_ratio = parse_data_ratio(args.supervised_speech_s2s_sample_ratio) + self.text_ratio = parse_data_ratio(args.text_sample_ratio) + self.bitext_ratio = parse_data_ratio(args.bitext_sample_ratio) + self.unsup_ratio = parse_data_ratio(args.unsupervised_speech_sample_ratio) + self.sample_mode = None + + def build_model(self, args): + args.input_feat_per_channel = self.data_cfg.input_feat_per_channel + args.input_channels = self.data_cfg.input_channels + return super().build_model(args) + + def build_tokenizer(self, data_cfg, msg=""): + logger.info(f"pre-tokenizer {msg}: {data_cfg.pre_tokenizer}") + return encoders.build_tokenizer(Namespace(**data_cfg.pre_tokenizer)) + + def build_bpe(self, data_cfg, msg=""): + logger.info(f"tokenizer {msg}: {data_cfg.bpe_tokenizer}") + return encoders.build_bpe(Namespace(**data_cfg.bpe_tokenizer)) + + @classmethod + def resolve_data_type(cls, split, use_sup_speech_ctc): + if len(split.split("_")) == 1: + # default case, train or valid + is_train = split + dtype = "text" + else: + is_train, dtype = split.split("_", 1) + is_train = True if is_train == "train" else False + if dtype == "sup_speech": + dtype = "sup_speech_ctc" if use_sup_speech_ctc else "sup_speech_ali" + assert dtype in ( + "text", + "bitext", + "sup_speech_ali", + "sup_speech_s2s", + "unsup_speech", + "sup_speech_ctc", + ), f"failed resolving {split} (it resulted into: {dtype} ; is_train={is_train})" + return is_train, dtype + + def create_modalitydatasetitem(self, dtype, dataset): + dsitem = None + if dtype in ("text", "bitext"): + dsitem = ModalityDatasetItem( + dtype, + dataset, + (self.args.max_source_positions, self.args.max_target_positions), + self.args.max_text_tokens, + self.args.batch_size, + ) + elif dtype in ("sup_speech_ctc", "sup_speech_ali", "sup_speech_s2s"): + dsitem = ModalityDatasetItem( + dtype, + dataset, + (self.args.max_speech_positions, self.args.max_target_positions), + self.args.max_speech_tokens, + self.args.batch_size, + ) + elif dtype == "unsup_speech": + dsitem = ModalityDatasetItem( + dtype, dataset, 1e8, self.args.max_speech_tokens, self.args.batch_size + ) + else: + raise ValueError(f"{dtype} is not supported") + return dsitem + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + def _get_sup_src_tgt_dict(src_dict, tgt_dict, use_s2s_sup_decoder): + if use_s2s_sup_decoder: + return None, tgt_dict + # use src_dict as tgt_dict here, since we use source dictionary as target for forcealignment + return None, src_dict + + is_train, dtype = self.resolve_data_type(split, self.args.use_sup_speech_ctc) + + # Note we use --add-tgt-lang-token instead of data_cfg.prepend_tgt_lang_tag_no_change to set target language tag in the text dataset + # Verify add_tgt_lang_token and prepend_tgt_lang_tag_no_change are same + + # Note we use --multilang-sampling-alpha instead of data_cfg.sampling_text_alpha to set text data sampling + if is_train: + msets = [] + # train split, load everything into one + if self.lang_pairs != "": + text_dataset = self.load_dataset_only( + "train", self.lang_pairs, epoch=epoch, combine=combine + ) + dsitem = self.create_modalitydatasetitem("text", text_dataset) + msets.append(dsitem) + if self.lang_pairs_bitext != "": # load bitext + bitext_dataset = self.load_dataset_only( + "train_bitext", + self.lang_pairs_bitext, + do_mask=False, + epoch=epoch, + combine=combine, + ) + dsitem = self.create_modalitydatasetitem("bitext", bitext_dataset) + msets.append(dsitem) + if self.args.sup_speech_train_subset != "": + pre_tokenizer = self.build_tokenizer(self.data_cfg) + bpe_tokenizer = self.build_bpe(self.data_cfg) + + append_eos = True + sup_speech_type = "sup_speech_ali" + if self.args.use_sup_speech_ctc: + # CTC mode + sup_speech_type = "sup_speech_ctc" + append_eos = False # CTC doesn't need eos in the target + + src_dict, tgt_dict = _get_sup_src_tgt_dict( + self.src_dict, self.tgt_dict, False + ) + sup_speech_dataset = SpeechToTextJointDatasetCreator.from_tsv( + self.args.sup_speech_data, + self.data_cfg, + self.args.sup_speech_train_subset, + tgt_dict=tgt_dict, + src_dict=src_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + src_pre_tokenizer=None, + src_bpe_tokenizer=None, + is_train_split=is_train, + epoch=epoch, + seed=self.args.seed, + append_eos=append_eos, + ) + dsitem = self.create_modalitydatasetitem( + sup_speech_type, sup_speech_dataset + ) + msets.append(dsitem) + + if self.args.sup_speech_s2s_train_subset != "": + pre_tokenizer = self.build_tokenizer(self.data_s2s_cfg, msg="(s2s)") + bpe_tokenizer = self.build_bpe(self.data_s2s_cfg, msg="(s2s)") + + # make sure self.data_cfg.prepend_tgt_lang_tag_no_change == self.args.add_tgt_lang_token + src_dict, tgt_dict = _get_sup_src_tgt_dict( + self.src_dict, self.tgt_dict, True + ) + sup_speech_s2s_dataset = SpeechToTextJointDatasetCreator.from_tsv( + self.args.sup_speech_s2s_data, + self.data_s2s_cfg, + self.args.sup_speech_s2s_train_subset, + tgt_dict=tgt_dict, + src_dict=src_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + src_pre_tokenizer=None, + src_bpe_tokenizer=None, + is_train_split=is_train, + epoch=epoch, + seed=self.args.seed, + ) + dsitem = self.create_modalitydatasetitem( + "sup_speech_s2s", sup_speech_s2s_dataset + ) + msets.append(dsitem) + if self.args.unsup_speech_train_data != "": + unsup_speech_dataset = FileAudioDatasetWrapper( + self.args.unsup_speech_train_data, + self.args.sample_rate, + max_sample_size=self.args.max_sample_size, + min_sample_size=self.args.min_sample_size, + normalize=False, + ) + dsitem = self.create_modalitydatasetitem( + "unsup_speech", unsup_speech_dataset + ) + msets.append(dsitem) + + pre_train_dataset = MultiModalityDataset(msets) + self.datasets[split] = pre_train_dataset + else: # validation split, load them for each type of data + if dtype == "text": + text_dataset = self.load_dataset_only( + split, self.lang_pairs, epoch=epoch, combine=combine + ) + dsitem = self.create_modalitydatasetitem("text", text_dataset) + self.datasets[split] = MultiModalityDataset([dsitem]) + elif dtype == "bitext": + bitext_dataset = self.load_dataset_only( + split, + self.lang_pairs_bitext, + do_mask=False, + epoch=epoch, + combine=combine, + ) + dsitem = self.create_modalitydatasetitem("bitext", bitext_dataset) + self.datasets[split] = MultiModalityDataset([dsitem]) + + elif dtype in ("sup_speech_ctc", "sup_speech_ali"): + assert self.args.sup_speech_valid_subset != "" + pre_tokenizer = self.build_tokenizer(self.data_cfg) + bpe_tokenizer = self.build_bpe(self.data_cfg) + append_eos = True + if dtype == "sup_speech_ctc": + # CTC mode + append_eos = False # CTC doesn't need eos + assert self.args.use_sup_speech_ctc + + datasets = [] + for split_name in self.args.sup_speech_valid_subset.split(","): + src_dict, tgt_dict = _get_sup_src_tgt_dict( + self.src_dict, self.tgt_dict, False + ) + datasets.append( + SpeechToTextJointDatasetCreator.from_tsv( + self.args.sup_speech_data, + self.data_cfg, + split_name, + tgt_dict=tgt_dict, + src_dict=src_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + src_pre_tokenizer=None, + src_bpe_tokenizer=None, + is_train_split=is_train, + epoch=epoch, + seed=self.args.seed, + append_eos=append_eos, + ) + ) + + dset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) + dsitem = self.create_modalitydatasetitem(dtype, dset) + self.datasets[split] = MultiModalityDataset([dsitem]) + + elif dtype == "sup_speech_s2s": + assert self.args.sup_speech_s2s_valid_subset != "" + pre_tokenizer = self.build_tokenizer(self.data_s2s_cfg) + bpe_tokenizer = self.build_bpe(self.data_s2s_cfg) + datasets = [] + for split_name in self.args.sup_speech_s2s_valid_subset.split(","): + src_dict, tgt_dict = _get_sup_src_tgt_dict( + self.src_dict, self.tgt_dict, True + ) + datasets.append( + SpeechToTextJointDatasetCreator.from_tsv( + self.args.sup_speech_s2s_data, + self.data_s2s_cfg, + split_name, + tgt_dict=tgt_dict, + src_dict=src_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + src_pre_tokenizer=None, + src_bpe_tokenizer=None, + is_train_split=is_train, + epoch=epoch, + seed=self.args.seed, + ) + ) + + dset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) + dsitem = self.create_modalitydatasetitem("sup_speech_s2s", dset) + self.datasets[split] = MultiModalityDataset([dsitem]) + elif dtype == "unsup_speech": + assert self.args.unsup_speech_valid_data != "" + unsup_speech_dataset = FileAudioDatasetWrapper( + self.args.unsup_speech_valid_data, + self.args.sample_rate, + max_sample_size=self.args.max_sample_size, + min_sample_size=self.args.min_sample_size, + normalize=False, + ) + dsitem = self.create_modalitydatasetitem( + "unsup_speech", unsup_speech_dataset + ) + self.datasets[split] = MultiModalityDataset([dsitem]) + else: + raise ValueError(f"Unsupported type {dtype}") + + def get_sample_ratio(self, epoch): + sup_ratio = ( + self.sup_ratio[epoch] if len(self.sup_ratio) > epoch else self.sup_ratio[-1] + ) + sup_s2s_ratio = ( + self.sup_s2s_ratio[epoch] + if len(self.sup_s2s_ratio) > epoch + else self.sup_s2s_ratio[-1] + ) + unsup_ratio = ( + self.unsup_ratio[epoch] + if len(self.unsup_ratio) > epoch + else self.unsup_ratio[-1] + ) + text_ratio = ( + self.text_ratio[epoch] + if len(self.text_ratio) > epoch + else self.text_ratio[-1] + ) + bitext_ratio = ( + self.bitext_ratio[epoch] + if len(self.bitext_ratio) > epoch + else self.bitext_ratio[-1] + ) + return text_ratio, bitext_ratio, sup_ratio, sup_s2s_ratio, unsup_ratio + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=0, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + + assert isinstance(dataset, MultiModalityDataset) + if len(dataset.id_to_mode) == 1: + max_positions = dataset.max_positions[0] + max_tokens = dataset.max_tokens[0] + max_sentences = dataset.max_sentences[0] + return super().get_batch_iterator( + dataset, + max_tokens, + max_sentences, + max_positions, + ignore_invalid_inputs, + required_batch_size_multiple, + seed, + num_shards, + shard_id, + num_workers, + epoch, + data_buffer_size, + disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + ) + + mult_ratio = [] + ( + text_ratio, + bitext_ratio, + sup_ratio, + sup_s2s_ratio, + unsup_ratio, + ) = self.get_sample_ratio(epoch) + for mode in dataset.id_to_mode: + if mode in ("sup_speech_ctc", "sup_speech_ali"): + mult_ratio.append(sup_ratio) + elif mode == "sup_speech_s2s": + mult_ratio.append(sup_s2s_ratio) + elif mode == "text": + mult_ratio.append(text_ratio) + elif mode == "bitext": + mult_ratio.append(bitext_ratio) + elif mode == "unsup_speech": + mult_ratio.append(unsup_ratio) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + batch_samplers = dataset.get_batch_samplers( + mult_ratio, required_batch_size_multiple, seed + ) + + # return a reusable, sharded iterator + epoch_iter = GroupedEpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_samplers=batch_samplers, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + mult_rate=max(self.args.update_freq) if self.args.same_data_update else 1, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + ) + self.dataset_to_epoch_iter[dataset] = {} # refresh it every epoch + return epoch_iter diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py new file mode 100644 index 0000000..bb04f14 --- /dev/null +++ b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py @@ -0,0 +1,377 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import logging +import os +from argparse import Namespace +from pathlib import Path + +import torch +from fairseq.data import ( + encoders, + Dictionary, + ResamplingDataset, + TransformEosLangPairDataset, + ConcatDataset, +) +from fairseq.data.iterators import GroupedEpochBatchIterator +from fairseq.data.audio.multi_modality_dataset import ( + MultiModalityDataset, + LangPairMaskDataset, + ModalityDatasetItem, +) +from fairseq.data.audio.speech_to_text_dataset import ( + SpeechToTextDataset, + SpeechToTextDatasetCreator, +) +from fairseq.data.audio.speech_to_text_joint_dataset import ( + S2TJointDataConfig, + SpeechToTextJointDatasetCreator, +) +from fairseq.tasks import register_task +from fairseq.tasks.speech_to_text import SpeechToTextTask +from fairseq.tasks.translation import load_langpair_dataset + +logger = logging.getLogger(__name__) +LANG_TAG_TEMPLATE = "<lang:{}>" + + +@register_task("speech_text_joint_to_text") +class SpeechTextJointToTextTask(SpeechToTextTask): + """ + Task for joint training speech and text to text. + """ + + @classmethod + def add_args(cls, parser): + """Add task-specific arguments to the parser.""" + super(SpeechTextJointToTextTask, cls).add_args(parser) + ### + parser.add_argument( + "--parallel-text-data", + default="", + help="path to parallel text data directory", + ) + parser.add_argument( + "--max-tokens-text", + type=int, + metavar="N", + help="maximum tokens for encoder text input ", + ) + parser.add_argument( + "--max-positions-text", + type=int, + metavar="N", + default=400, + help="maximum tokens for per encoder text input ", + ) + parser.add_argument( + "--langpairs", + default=None, + metavar="S", + help='language pairs for text training, separated with ","', + ) + parser.add_argument( + "--speech-sample-ratio", + default=1, + type=float, + metavar="N", + help="Multiple Ratio for speech dataset with transcripts ", + ) + parser.add_argument( + "--text-sample-ratio", + default=1, + type=float, + metavar="N", + help="Multiple Ratio for text set ", + ) + parser.add_argument( + "--update-mix-data", + action="store_true", + help="use mixed data in one update when update-freq > 1", + ) + parser.add_argument( + "--load-speech-only", action="store_true", help="load speech data only", + ) + parser.add_argument( + "--mask-text-ratio", + type=float, + metavar="V", + default=0.0, + help="mask V source tokens for text only mode", + ) + parser.add_argument( + "--mask-text-type", + default="random", + choices=["random", "tail"], + help="mask text typed", + ) + parser.add_argument( + "--noise-token", + default="", + help="noise token for masking src text tokens if mask-text-ratio > 0", + ) + parser.add_argument( + "--infer-target-lang", + default="", + metavar="S", + help="target language for inference", + ) + + def __init__(self, args, src_dict, tgt_dict, infer_tgt_lang_id=None): + super().__init__(args, tgt_dict) + self.src_dict = src_dict + self.data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml) + assert self.tgt_dict.pad() == self.src_dict.pad() + assert self.tgt_dict.eos() == self.src_dict.eos() + self.speech_only = args.load_speech_only + self._infer_tgt_lang_id = infer_tgt_lang_id + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries).""" + data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml) + tgt_dict_path = Path(args.data) / data_cfg.vocab_filename + src_dict_path = Path(args.data) / data_cfg.src_vocab_filename + if (not os.path.isfile(src_dict_path)) or (not os.path.isfile(tgt_dict_path)): + raise FileNotFoundError("Dict not found: {}".format(args.data)) + src_dict = Dictionary.load(src_dict_path.as_posix()) + tgt_dict = Dictionary.load(tgt_dict_path.as_posix()) + + print("| src dictionary: {} types".format(len(src_dict))) + print("| tgt dictionary: {} types".format(len(tgt_dict))) + + if args.parallel_text_data != "": + if not os.path.isabs(args.parallel_text_data): + args.parallel_text_data = os.path.join( + args.data, args.parallel_text_data + ) + + if args.langpairs is None: + raise Exception( + "Could not infer language pair, please provide it explicitly" + ) + infer_tgt_lang_id = None + if args.infer_target_lang != "" and data_cfg.prepend_tgt_lang_tag_no_change: + tgt_lang_tag = SpeechToTextDataset.LANG_TAG_TEMPLATE.format( + args.infer_target_lang + ) + infer_tgt_lang_id = tgt_dict.index(tgt_lang_tag) + assert infer_tgt_lang_id != tgt_dict.unk() + return cls(args, src_dict, tgt_dict, infer_tgt_lang_id=infer_tgt_lang_id) + + def load_langpair_dataset( + self, prepend_tgt_lang_tag=False, sampling_alpha=1.0, epoch=0 + ): + lang_pairs = [] + text_dataset = None + split = "train" + for lp in self.args.langpairs.split(","): + src, tgt = lp.split("-") + text_dataset = load_langpair_dataset( + self.args.parallel_text_data, + split, + src, + self.src_dict, + tgt, + self.tgt_dict, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.args.max_positions_text, + max_target_positions=self.args.max_target_positions, + load_alignments=False, + truncate_source=False, + ) + if prepend_tgt_lang_tag: + # TODO + text_dataset = TransformEosLangPairDataset( + text_dataset, + src_eos=self.src_dict.eos(), + tgt_bos=self.tgt_dict.eos(), # 'prev_output_tokens' starts with eos + new_tgt_bos=self.tgt_dict.index(LANG_TAG_TEMPLATE.format(tgt)), + ) + lang_pairs.append(text_dataset) + if len(lang_pairs) > 1: + if sampling_alpha != 1.0: + size_ratios = SpeechToTextDatasetCreator.get_size_ratios( + self.args.langpairs.split(","), + [len(s) for s in lang_pairs], + alpha=sampling_alpha, + ) + lang_pairs = [ + ResamplingDataset(d, size_ratio=r, epoch=epoch, replace=(r >= 1.0)) + for d, r in zip(lang_pairs, size_ratios) + ] + return ConcatDataset(lang_pairs) + return text_dataset + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + bos_token=self._infer_tgt_lang_id, + ) + + def build_src_tokenizer(self, args): + logger.info(f"src-pre-tokenizer: {self.data_cfg.src_pre_tokenizer}") + return encoders.build_tokenizer(Namespace(**self.data_cfg.src_pre_tokenizer)) + + def build_src_bpe(self, args): + logger.info(f"tokenizer: {self.data_cfg.src_bpe_tokenizer}") + return encoders.build_bpe(Namespace(**self.data_cfg.src_bpe_tokenizer)) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + src_pre_tokenizer = self.build_src_tokenizer(self.args) + src_bpe_tokenizer = self.build_src_bpe(self.args) + ast_dataset = SpeechToTextJointDatasetCreator.from_tsv( + self.args.data, + self.data_cfg, + split, + self.tgt_dict, + src_dict=None if self.speech_only else self.src_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + src_pre_tokenizer=src_pre_tokenizer, + src_bpe_tokenizer=src_bpe_tokenizer, + is_train_split=is_train_split, + epoch=epoch, + seed=self.args.seed, + ) + noise_token_id = -1 + text_dataset = None + if self.args.parallel_text_data != "" and is_train_split: + text_dataset = self.load_langpair_dataset( + self.data_cfg.prepend_tgt_lang_tag_no_change, 1.0, epoch=epoch, + ) + if self.args.mask_text_ratio > 0: + # add mask + noise_token_id = ( + self.src_dict.unk() + if self.args.noise_token == "" + else self.src_dict.index(self.args.noise_token) + ) + text_dataset = LangPairMaskDataset( + text_dataset, + src_bos=self.src_dict.bos(), + src_eos=self.src_dict.eos(), + noise_id=noise_token_id, + mask_ratio=self.args.mask_text_ratio, + mask_type=self.args.mask_text_type, + ) + + if text_dataset is not None: + mdsets = [ + ModalityDatasetItem( + "sup_speech", + ast_dataset, + (self.args.max_source_positions, self.args.max_target_positions), + self.args.max_tokens, + self.args.batch_size, + ), + ModalityDatasetItem( + "text", + text_dataset, + (self.args.max_positions_text, self.args.max_target_positions), + self.args.max_tokens_text + if self.args.max_tokens_text is not None + else self.args.max_tokens, + self.args.batch_size, + ), + ] + ast_dataset = MultiModalityDataset(mdsets) + self.datasets[split] = ast_dataset + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.tgt_dict + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary` (if applicable + for this task).""" + return None if self.speech_only else self.src_dict + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=0, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + + if not isinstance(dataset, MultiModalityDataset): + return super(SpeechTextJointToTextTask, self).get_batch_iterator( + dataset, + max_tokens, + max_sentences, + max_positions, + ignore_invalid_inputs, + required_batch_size_multiple, + seed, + num_shards, + shard_id, + num_workers, + epoch, + data_buffer_size, + disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + + mult_ratio = [self.args.speech_sample_ratio, self.args.text_sample_ratio] + assert len(dataset.datasets) == 2 + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + batch_samplers = dataset.get_batch_samplers( + mult_ratio, required_batch_size_multiple, seed + ) + + # return a reusable, sharded iterator + epoch_iter = GroupedEpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_samplers=batch_samplers, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + mult_rate=1 if self.args.update_mix_data else max(self.args.update_freq), + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + ) + self.dataset_to_epoch_iter[dataset] = {} # refresh it every epoch + return epoch_iter diff --git a/fairseq/examples/speech_to_speech/README.md b/fairseq/examples/speech_to_speech/README.md new file mode 100644 index 0000000..f03f6a3 --- /dev/null +++ b/fairseq/examples/speech_to_speech/README.md @@ -0,0 +1,7 @@ +# Speech to speech translation (S2ST) + +We provide the implementation and resources for the following work on speech-to-speech translation (S2ST): + +* [Direct speech-to-speech translation with discrete units (Lee et al. 2021)](docs/direct_s2st_discrete_units.md) +* [Textless Speech-to-Speech Translation on Real Data (Lee et al. 2021)](docs/textless_s2st_real_data.md) +* [Enhanced Direct Speech-to-Speech Translation Using Self-supervised Pre-training and Data Augmentation](docs/enhanced_direct_s2st_discrete_units.md) diff --git a/fairseq/examples/speech_to_speech/__init__.py b/fairseq/examples/speech_to_speech/__init__.py new file mode 100644 index 0000000..812b3c3 --- /dev/null +++ b/fairseq/examples/speech_to_speech/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import unity # noqa diff --git a/fairseq/examples/speech_to_speech/asr_bleu/README.md b/fairseq/examples/speech_to_speech/asr_bleu/README.md new file mode 100644 index 0000000..6a7ea7f --- /dev/null +++ b/fairseq/examples/speech_to_speech/asr_bleu/README.md @@ -0,0 +1,34 @@ +# ASR-BLEU evaluation toolkit + +This toolkit provides a set of public ASR models used for evaluation of different speech-to-speech translation systems at FAIR. It enables easier score comparisons between different system's outputs. + +The ASRGenerator wraps different CTC-based ASR models from HuggingFace and fairseq code bases. Torchaudio CTC decoder is built on top of it to decode given audio files. + +Please see `asr_model_cfgs.json` for a list of languages covered currently. + +The high-level pipeline is simple by design: given a lang tag, script loads the ASR model, transcribes model's predicted audio, and computes the BLEU score against provided reference translations using sacrebleu. + +# Dependencies + +Please see `requirements.txt`. + +# Usage examples + +This toolkit have been used with: + +* Speechmatrix project: https://github.com/facebookresearch/fairseq/tree/ust/examples/speech_matrix. + +* Hokkien speech-to-speech translation project: https://github.com/facebookresearch/fairseq/tree/ust/examples/hokkien. + +# Standalone run example + +High-level example, please substitute arguments per your case: + +```bash +python compute_asr_bleu.py --lang <LANG> \ +--audio_dirpath <PATH_TO_AUDIO_DIR> \ +--reference_path <PATH_TO_REFERENCES_FILE> \ +--reference_format txt +``` + +For more details about arguments please see the script argparser help. diff --git a/fairseq/examples/speech_to_speech/asr_bleu/__init__.py b/fairseq/examples/speech_to_speech/asr_bleu/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/speech_to_speech/asr_bleu/asr_model_cfgs.json b/fairseq/examples/speech_to_speech/asr_bleu/asr_model_cfgs.json new file mode 100644 index 0000000..d0a5f3e --- /dev/null +++ b/fairseq/examples/speech_to_speech/asr_bleu/asr_model_cfgs.json @@ -0,0 +1,198 @@ +{ + "en": { + "oct22": { + "desc": "Wav2Vec 2.0 Large (LV-60) + Self Training from https://github.com/facebookresearch/fairseq/tree/main/examples/wav2vec#pre-trained-models", + "ckpt_path": "https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_960h_pl.pt", + "dict_path": "https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt", + "model_type": "fairseq", + "lang": "en", + "post_process": "collapse" + } + }, + "hok": { + "oct22": { + "desc": "Hokkien ASR model, for details check [TODO add paper link]", + "ckpt_path": "https://dl.fbaipublicfiles.com/ust_asr/hok/checkpoint_best.pt", + "dict_path": "https://dl.fbaipublicfiles.com/ust_asr/hok/dict.ltr.txt", + "model_type": "fairseq", + "lang": "hok", + "post_process": "none" + } + }, + "es": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-xlsr-53-spanish", + "model_type": "hf", + "lang": "es", + "post_process": "collapse" + } + }, + "fr": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-fr-voxpopuli-french", + "model_type": "hf", + "lang": "fr", + "post_process": "collapse" + } + }, + "zh": { + "oct22": { + "model_path": "ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt", + "model_type": "hf", + "lang": "zh", + "post_process": "collapse" + } + }, + "tr": { + "oct22": { + "model_path": "cahya/wav2vec2-large-xlsr-turkish-artificial-cv", + "model_type": "hf", + "lang": "tr", + "post_process": "collapse" + } + }, + "ar": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic", + "model_type": "hf", + "lang": "ar", + "post_process": "collapse" + } + }, + "vi": { + "oct22": { + "model_path": "not-tanh/wav2vec2-large-xlsr-53-vietnamese", + "model_type": "hf", + "lang": "vi", + "post_process": "collapse" + } + }, + "de": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-xls-r-1b-german", + "model_type": "hf", + "lang": "de", + "post_process": "collapse" + } + }, + "pl": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-xls-r-1b-polish", + "model_type": "hf", + "lang": "pl", + "post_process": "collapse" + } + }, + "it": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-xlsr-53-italian", + "model_type": "hf", + "lang": "it", + "post_process": "collapse" + } + }, + "pt": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-xls-r-1b-portuguese", + "model_type": "hf", + "lang": "pt", + "post_process": "collapse" + } + }, + "ro": { + "oct22": { + "model_path": "gigant/romanian-wav2vec2", + "model_type": "hf", + "lang": "ro", + "post_process": "collapse" + } + }, + "cs": { + "oct22": { + "model_path": "comodoro/wav2vec2-xls-r-300m-cs-250", + "model_type": "hf", + "lang": "cs", + "post_process": "collapse" + } + }, + "sk": { + "oct22": { + "model_path": "anuragshas/wav2vec2-xls-r-300m-sk-cv8-with-lm", + "model_type": "hf", + "lang": "sk", + "post_process": "collapse" + } + }, + "sl": { + "oct22": { + "model_path": "anuragshas/wav2vec2-xls-r-300m-sl-cv8-with-lm", + "model_type": "hf", + "lang": "sl", + "post_process": "collapse" + } + }, + "fi": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-xlsr-53-finnish", + "model_type": "hf", + "lang": "fi", + "post_process": "collapse" + } + }, + "hu": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-large-xlsr-53-hungarian", + "model_type": "hf", + "lang": "hu", + "post_process": "collapse" + } + }, + "et": { + "oct22": { + "model_path": "RASMUS/wav2vec2-xlsr-1b-et", + "model_type": "hf", + "lang": "et", + "post_process": "collapse" + } + }, + "lt": { + "oct22": { + "model_path": "sammy786/wav2vec2-xlsr-lithuanian", + "model_type": "hf", + "lang": "lt", + "post_process": "collapse" + } + }, + "nl": { + "oct22": { + "model_path": "jonatasgrosman/wav2vec2-xls-r-1b-dutch", + "model_type": "hf", + "lang": "nl", + "post_process": "collapse" + } + }, + "lv": { + "oct22": { + "model_path": "reach-vb/wav2vec2-large-xls-r-1B-common_voice7-lv-ft", + "model_type": "hf", + "lang": "lv", + "post_process": "collapse" + } + }, + "sv": { + "oct22": { + "model_path": "marinone94/xls-r-300m-sv-robust", + "model_type": "hf", + "lang": "sv", + "post_process": "collapse" + } + }, + "hr": { + "oct22": { + "model_path": "classla/wav2vec2-xls-r-parlaspeech-hr", + "model_type": "hf", + "lang": "hr", + "post_process": "collapse" + } + } +} diff --git a/fairseq/examples/speech_to_speech/asr_bleu/compute_asr_bleu.py b/fairseq/examples/speech_to_speech/asr_bleu/compute_asr_bleu.py new file mode 100644 index 0000000..d592619 --- /dev/null +++ b/fairseq/examples/speech_to_speech/asr_bleu/compute_asr_bleu.py @@ -0,0 +1,244 @@ +import os +from typing import Dict, List +import sacrebleu +import pandas as pd +from glob import glob +from pathlib import Path +from utils import retrieve_asr_config, ASRGenerator +from tqdm import tqdm +from argparse import ArgumentParser + + +def merge_tailo_init_final(text): + """ + Hokkien ASR hypothesis post-processing. + """ + sps = text.strip().split() + results = [] + last_syllable = "" + for sp in sps: + if sp == "NULLINIT" or sp == "nullinit": + continue + last_syllable += sp + if sp[-1].isnumeric(): + results.append(last_syllable) + last_syllable = "" + if last_syllable != "": + results.append(last_syllable) + return " ".join(results) + + +def remove_tone(text): + """ + Used for tone-less evaluation of Hokkien + """ + return " ".join([t[:-1] for t in text.split()]) + + +def extract_audio_for_eval(audio_dirpath: str, audio_format: str): + if audio_format == "n_pred.wav": + """ + The assumption here is that 0_pred.wav corresponds to the reference at line position 0 from the reference manifest + """ + audio_list = [] + audio_fp_list = glob((Path(audio_dirpath) / "*_pred.wav").as_posix()) + audio_fp_list = sorted( + audio_fp_list, key=lambda x: int(os.path.basename(x).split("_")[0]) + ) + for i in range(len(audio_fp_list)): + try: + audio_fp = (Path(audio_dirpath) / f"{i}_pred.wav").as_posix() + assert ( + audio_fp in audio_fp_list + ), f"{Path(audio_fp).name} does not exist in {audio_dirpath}" + except AssertionError: + # check the audio with random speaker + audio_fp = Path(audio_dirpath) / f"{i}_spk*_pred.wav" + audio_fp = glob( + audio_fp.as_posix() + ) # resolve audio filepath with random speaker + assert len(audio_fp) == 1 + audio_fp = audio_fp[0] + + audio_list.append(audio_fp) + else: + raise NotImplementedError + + return audio_list + + +def extract_text_for_eval( + references_filepath: str, reference_format: str, reference_tsv_column: str = None +): + if reference_format == "txt": + reference_sentences = open(references_filepath, "r").readlines() + reference_sentences = [l.strip() for l in reference_sentences] + elif reference_format == "tsv": + tsv_df = pd.read_csv(references_filepath, sep="\t", quoting=3) + reference_sentences = tsv_df[reference_tsv_column].to_list() + reference_sentences = [l.strip() for l in reference_sentences] + else: + raise NotImplementedError + + return reference_sentences + + +def compose_eval_data( + audio_dirpath: str, + audio_format: str, + references_filepath: str, + reference_format: str, + reference_tsv_column: str = None, + save_manifest_filepath=None, +): + """ + Speech matrix decoding pipeline produces audio with the following mask "N_pred.wav" where N is the order of the corresponding input sample + """ + + reference_sentences = extract_text_for_eval( + references_filepath, reference_format, reference_tsv_column + ) + predicted_audio_fp_list = extract_audio_for_eval(audio_dirpath, audio_format) + assert len(predicted_audio_fp_list) == len(reference_sentences) + + audio_text_pairs = [ + (audio, reference) + for audio, reference in zip(predicted_audio_fp_list, reference_sentences) + ] + + tsv_manifest = pd.DataFrame(audio_text_pairs, columns=["prediction", "reference"]) + + if save_manifest_filepath is not None: + tsv_manifest.to_csv(save_manifest_filepath, sep="\t", quoting=3) + + return tsv_manifest + + +def load_eval_data_from_tsv(eval_data_filepath: str): + """ + We may load the result of `compose_eval_data` directly if needed + """ + eval_df = pd.from_csv(eval_data_filepath, sep="\t") + + return eval_df + + +def run_asr_bleu(args): + + asr_config = retrieve_asr_config( + args.lang, args.asr_version, json_path="./asr_model_cfgs.json" + ) + asr_model = ASRGenerator(asr_config) + + eval_manifest = compose_eval_data( + audio_dirpath=args.audio_dirpath, + audio_format=args.audio_format, + references_filepath=args.reference_path, + reference_format=args.reference_format, + reference_tsv_column=args.reference_tsv_column, + save_manifest_filepath=None, + ) + + prediction_transcripts = [] + for _, eval_pair in tqdm( + eval_manifest.iterrows(), + desc="Transcribing predictions", + total=len(eval_manifest), + ): + transcription = asr_model.transcribe_audiofile(eval_pair.prediction) + prediction_transcripts.append(transcription.lower()) + + if args.lang == "hok": + prediction_transcripts = [ + merge_tailo_init_final(text) for text in prediction_transcripts + ] + + references = eval_manifest["reference"].tolist() + bleu_score = sacrebleu.corpus_bleu(prediction_transcripts, [references]) + + print(bleu_score) + + return prediction_transcripts, bleu_score + + +def main(): + parser = ArgumentParser( + description="This script computes the ASR-BLEU metric between model's generated audio and the text reference sequences." + ) + + parser.add_argument( + "--lang", + help="The target language used to initialize ASR model, see asr_model_cfgs.json for available languages", + type=str, + ) + parser.add_argument( + "--asr_version", + type=str, + default="oct22", + help="For future support we add and extra layer of asr versions. The current most recent version is oct22 meaning October 2022", + ) + parser.add_argument( + "--audio_dirpath", + type=str, + help="Path to the directory containing the audio predictions from the translation model", + ) + parser.add_argument( + "--reference_path", + type=str, + help="Path to the file containing reference translations in the form of normalized text (to be compared to ASR predictions", + ) + parser.add_argument( + "--reference_format", + choices=["txt", "tsv"], + help="Format of reference file. Txt means plain text format where each line represents single reference sequence", + ) + parser.add_argument( + "--reference_tsv_column", + default=None, + type=str, + help="If format is tsv, then specify the column name which contains reference sequence", + ) + parser.add_argument( + "--audio_format", + default="n_pred.wav", + choices=["n_pred.wav"], + help="Audio format n_pred.wav corresponds to names like 94_pred.wav or 94_spk7_pred.wav where spk7 is the speaker id", + ) + parser.add_argument( + "--results_dirpath", + default=None, + type=str, + help="If specified, the resulting BLEU score will be written to this file path as txt file", + ) + parser.add_argument( + "--transcripts_path", + default=None, + type=str, + help="If specified, the predicted transcripts will be written to this path as a txt file.", + ) + + args = parser.parse_args() + + prediction_transcripts, bleu_score = run_asr_bleu(args) + result_filename = f"{args.reference_format}_{args.lang}_bleu.txt" + if args.results_dirpath is not None: + if not Path(args.results_dirpath).exists(): + Path(args.results_dirpath).mkdir(parents=True) + with open(Path(args.results_dirpath) / result_filename, "w") as f: + f.write(bleu_score.format(width=2)) + + if args.transcripts_path is not None: + with open(args.transcripts_path, "w") as f: + for transcript in prediction_transcripts: + f.write(transcript + "\n") + + +if __name__ == "__main__": + main() + + +""" +Example to load Sl audio and references, compute BLEU: + +export lang=fi; split=vp && python compute_asr_bleu.py --lang $lang --audio_dirpath /checkpoint/hygong/S2S/speech_matrix_release_ckpts/generated_waveform_release/en-$lang/test_$split/checkpoint.pt --audio_format n_pred.wav --reference_path /large_experiments/ust/hygong/S2S/SpeechEncoder/manifests/vp-vp/en-$lang/test_$split.$lang --reference_format txt --results_dirpath ./ +""" diff --git a/fairseq/examples/speech_to_speech/asr_bleu/requirements.txt b/fairseq/examples/speech_to_speech/asr_bleu/requirements.txt new file mode 100644 index 0000000..cfa90f6 --- /dev/null +++ b/fairseq/examples/speech_to_speech/asr_bleu/requirements.txt @@ -0,0 +1,7 @@ +fairseq==0.12.2 +pandas==1.4.3 +sacrebleu==2.2.0 +torch==1.12.1 +torchaudio==0.12.1 +tqdm==4.64.0 +transformers==4.21.1 diff --git a/fairseq/examples/speech_to_speech/asr_bleu/utils.py b/fairseq/examples/speech_to_speech/asr_bleu/utils.py new file mode 100644 index 0000000..0fed55a --- /dev/null +++ b/fairseq/examples/speech_to_speech/asr_bleu/utils.py @@ -0,0 +1,306 @@ +import json +import re +import urllib.request +from pathlib import Path + +import fairseq +import torch +from fairseq.data.data_utils import lengths_to_padding_mask +from tqdm import tqdm + +try: + import torchaudio + from torchaudio.models.decoder import ctc_decoder +except ImportError: + raise ImportError("Upgrade torchaudio to 0.12 to enable CTC decoding") + + +class DownloadProgressBar(tqdm): + """A class to represent a download progress bar""" + + def update_to(self, b=1, bsize=1, tsize=None) -> None: + """ + Update the download progress + """ + if tsize is not None: + self.total = tsize + self.update(b * bsize - self.n) + + +def retrieve_asr_config(lang_key: str, asr_version: str, json_path: str) -> dict: + """ + Retrieve the asr model configs + + Args: + lang_key: the lanuage type as the key name + json_path: the path of the config json file + + Returns: + Dict of all the configs in the json file + """ + + with open(json_path, "r") as f: + asr_model_cfgs = json.load(f) + return asr_model_cfgs[lang_key][asr_version] + + +class ASRGenerator(object): + """A class to represent a ASR generator""" + + def __init__( + self, + model_cfg: dict, + cache_dirpath: str = (Path.home() / ".cache" / "ust_asr").as_posix(), + ) -> None: + """ + Construct all the necessary attributes of the ASRGenerator class + + Args: + model_cfg: the dict of the asr model config + cache_dirpath: the default cache path is "Path.home()/.cache/ust_asr" + """ + + self.cache_dirpath = Path(cache_dirpath) / model_cfg["lang"] + self.model_cfg = model_cfg + + self.use_cuda = torch.cuda.is_available() + + torchaudio.set_audio_backend("sox_io") + + if self.model_cfg["model_type"] == "hf": + self.prepare_hf_model(self.model_cfg) + elif self.model_cfg["model_type"] == "fairseq": + self.prepare_fairseq_model(self.model_cfg) + else: + raise NotImplementedError( + f"Model type {self.model_cfg['model_type']} is not supported" + ) + + if self.model_cfg["post_process"] == "collapse": + self.post_process_fn = lambda hypo: "".join(hypo).replace( + self.sil_token, " " + ) + elif self.model_cfg["post_process"] == "none": + self.post_process_fn = lambda hypo: " ".join(hypo).replace( + self.sil_token, " " + ) + else: + raise NotImplementedError + + if self.use_cuda: + self.model.cuda() + self.model.eval() + + self.decoder = ctc_decoder( + lexicon=None, + tokens=self.tokens, + lm=None, + nbest=1, + beam_size=1, + beam_size_token=None, + lm_weight=0.0, + word_score=0.0, + unk_score=float("-inf"), + sil_token=self.sil_token, + sil_score=0.0, + log_add=False, + blank_token=self.blank_token, + ) + + def prepare_hf_model(self, model_cfg: dict) -> None: + """ + Prepare the huggingface asr model + + Args: + model_cfg: dict with the relevant ASR config + """ + + def infer_silence_token(vocab: list): + """ + Different HF checkpoints have different notion of silence token + such as | or " " (space) + Important: when adding new HF asr model in, check what silence token it uses + """ + if "|" in vocab: + return "|" + elif " " in vocab: + return " " + else: + raise RuntimeError("Silence token is not found in the vocabulary") + + try: + from transformers import (AutoFeatureExtractor, AutoTokenizer, + Wav2Vec2ForCTC, Wav2Vec2Processor) + except ImportError: + raise ImportError("Install transformers to load HF wav2vec model") + + model_path = model_cfg["model_path"] + self.model = Wav2Vec2ForCTC.from_pretrained(model_path) + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.preprocessor = AutoFeatureExtractor.from_pretrained(model_path) + self.processor = Wav2Vec2Processor.from_pretrained(model_path) + + # extra unk tokens are there to make some models work e.g. Finnish ASR has some vocab issue + vocab_list = [ + self.tokenizer.decoder.get(i, f"{self.tokenizer.unk_token}1") + for i in range(self.tokenizer.vocab_size) + ] + + self.sampling_rate = self.preprocessor.sampling_rate + self.normalize_input = self.preprocessor.do_normalize + self.tokens = vocab_list + self.sil_token = infer_silence_token(vocab_list) + self.blank_token = self.tokenizer.pad_token + + def prepare_fairseq_model(self, model_cfg: dict) -> None: + """ + Prepare the fairseq asr model + + Args: + model_cfg: the specific model config dict must have: (1) ckpt_path, (2) dict_path + """ + + def download_file(url: str, cache_dir: Path): + download_path = cache_dir / url.split("/")[-1] + if not (cache_dir / url.split("/")[-1]).exists(): + with DownloadProgressBar( + unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1] + ) as t: + cache_dir.mkdir(parents=True, exist_ok=True) + urllib.request.urlretrieve( + url, filename=download_path.as_posix(), reporthook=t.update_to + ) + else: + print(f"'{url}' exists in {cache_dir}") + + return download_path.as_posix() + + try: + ckpt_path = model_cfg["ckpt_path"] + dict_path = model_cfg["dict_path"] + except KeyError: + raise KeyError( + "Fairseq model cfg must provide (1) ckpt_path, (2) dict_path" + ) + + if re.search("^https", ckpt_path): + ckpt_path = download_file(ckpt_path, self.cache_dirpath) + if re.search("^https", dict_path): + dict_path = download_file(dict_path, self.cache_dirpath) + + model, saved_cfg, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( + [ckpt_path], + arg_overrides={ + "task": "audio_finetuning", + "data": self.cache_dirpath.as_posix(), + }, # data must have dict in it + ) + + dict_lines = open(dict_path, "r").readlines() + tokens = [l.split()[0] for l in dict_lines] + # adding default fairseq special tokens + tokens = ["<s>", "<pad>", "</s>", "<unk>"] + tokens + + self.model = model[0] + self.tokens = tokens + + if "|" in tokens: + self.sil_token = "|" + else: + self.sil_token = tokens[ + 2 + ] # use eos as silence token if | not presented e.g., Hok ASR model + print(f"Inferring silence token from the dict: {self.sil_token}") + self.blank_token = self.tokens[0] + + self.sampling_rate = saved_cfg.task.sample_rate + self.normalize_input = saved_cfg.task.normalize + + @torch.inference_mode() + def load_audiofile(self, audio_path: str) -> torch.Tensor: + """ + Load the audio files and apply resampling and normalizaion + + Args: + audio_path: the audio file path + + Returns: + audio_waveform: the audio waveform as a torch.Tensor object + """ + + audio_waveform, sampling_rate = torchaudio.load(audio_path) + if audio_waveform.dim == 2: + audio_waveform = audio_waveform.mean(-1) + if self.sampling_rate != sampling_rate: + audio_waveform = torchaudio.functional.resample( + audio_waveform, sampling_rate, self.sampling_rate + ) + if self.normalize_input: + # following fairseq raw audio dataset + audio_waveform = torch.nn.functional.layer_norm( + audio_waveform, audio_waveform.shape + ) + + return audio_waveform + + @torch.inference_mode() + def compute_emissions(self, audio_input: torch.Tensor) -> torch.Tensor: + """ + Compute the emissions for either fairseq or huggingface asr model + + Args: + audio_path: the input audio waveform + + Returns: + emissions: the logits of the encoded prediction. + """ + + if self.use_cuda: + audio_input = audio_input.to("cuda") + if isinstance(self.model, fairseq.models.wav2vec.wav2vec2_asr.Wav2VecCtc): + padding_mask = lengths_to_padding_mask(torch.tensor([audio_input.numel()])) + emissions = self.model.w2v_encoder(audio_input, padding_mask)[ + "encoder_out" + ].transpose(0, 1) + else: + emissions = self.model(audio_input).logits + + return emissions + + def decode_emissions(self, emissions: torch.Tensor) -> str: + """ + Decode the emissions and apply post process functions + + Args: + emissions: the input Tensor object + + Returns: + hypo: the str as the decoded transcriptions + """ + + emissions = emissions.cpu() + results = self.decoder(emissions) + + # assuming the lexicon-free decoder and working with tokens + hypo = self.decoder.idxs_to_tokens(results[0][0].tokens) + hypo = self.post_process_fn(hypo) + + return hypo + + def transcribe_audiofile(self, audio_path: str, lower=True) -> str: + """ + Transcribe the audio into string + + Args: + audio_path: the input audio waveform + lower: the case of the transcriptions with lowercase as the default + + Returns: + hypo: the transcription result + """ + + asr_input = self.load_audiofile(audio_path) + emissions = self.compute_emissions(asr_input) + hypo = self.decode_emissions(emissions) + + return hypo.strip().lower() if lower else hypo.strip() diff --git a/fairseq/examples/speech_to_speech/benchmarking/README.md b/fairseq/examples/speech_to_speech/benchmarking/README.md new file mode 100644 index 0000000..c62fe12 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/README.md @@ -0,0 +1,31 @@ +# Benchmarking + +## Overview + +The goal of this framework is to support benchmarking various speech to speech translation(S2ST) models in terms of runtime, max-memory consumption and total number of floating point operations(FLOPS). It is a generic framework and can be easily extended to support any fairseq models. To accurately benchmark the performance, core inference modules are re-implemented based on fairseq_cli/generate.py (core.py/Processing) and examples/speech_to_text/generate_waveform.py(core.py/SpeechGeneration. To ensure that the end to end models and cascaded models are compared fairly, for cascaded models we only consider the performance metrics for model inference at all stages ignoring any intermediate data and io processing consumption. We run all the benchmarking runs on CPU as it is generally used in production environment and also due to lack of good benchmarking library support for GPUs. + +1. Runtime: Average time in seconds to run model inference on an example from a given dataset. We use [timeit](https://docs.python.org/3/library/timeit.html) library to measure the runtime. +2. Max memory: Maximum memory in MiB averaged over by running the model inference on all examples from the given dataset. We use [memory_profiler](https://pypi.org/project/memory-profiler/) library to gather memory footprints for a code snippet and find the maximum to get the max memory used by the code. For cascaded models, we find the max of all stages to get the overall max_memory footprint. +3. FLOPS: We compute the average number of floating point operations needed to run model inference for an example from the given dataset. We use [PAPI library](http://www.bnikolic.co.uk/blog/python/flops/2019/10/01/pytorch-count-flops.html) to benchmark the number of flops. + +## CLI Commands + +```{python} +CUBLAS_WORKSPACE_CONFIG=:4096:8 python examples/speech_to_speech/benchmarking/get_metrics.py ‘’ --config $config +``` + + +## Note: + +1. The npy dataset is a list of samples saved as a .npy file. Each sample is a dictionary with id, net_input. +2. The raw dataset is a list of raw audio paths similar to wav2vec2 input tsv file + +```{python} +sample: { + "id": xx, + "net_input": { + "src_tokens": torch.tensor([]), + "src_lengths": torch.tensor([]) + } +} +``` diff --git a/fairseq/examples/speech_to_speech/benchmarking/configs/2StageS2ST.yaml b/fairseq/examples/speech_to_speech/benchmarking/configs/2StageS2ST.yaml new file mode 100644 index 0000000..11deb42 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/configs/2StageS2ST.yaml @@ -0,0 +1,19 @@ +general: + dataset_path: $npy_dataset + cpu: True + model_type: 2StageS2ST + dataset_size: 1 + +stage1: + data: $data_bin_stage1 + task: speech_to_text + path: $checkpoint_stage1 + config_yaml: config.yaml + max_len_a: 2 + max_len_b: 500 + +stage2: + data: $data_bin_stage2 + task: text_to_speech + path: $checkpoint_stage2 + config_yaml: config.yaml diff --git a/fairseq/examples/speech_to_speech/benchmarking/configs/3StageS2ST.yaml b/fairseq/examples/speech_to_speech/benchmarking/configs/3StageS2ST.yaml new file mode 100644 index 0000000..9638136 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/configs/3StageS2ST.yaml @@ -0,0 +1,28 @@ +general: + dataset_path: $npy_dataset + cpu: True + model_type: 3StageS2ST + max_len_a: 2 + max_len_b: 500 + dataset_size: 1 + +stage1: + data: $data_bin_stage1 + task: speech_to_text + path: $checkpoint_stage1 + config_yaml: config.yaml + max_len_a: 2 + max_len_b: 500 + +stage2: + data: $data_bin_stage2 + task: translation + path: $checkpoint_stage2 + config_yaml: config.yaml + + +stage2: + data: $data_bin_stage3 + task: text_to_speech + path: $checkpoint_stage3 + config_yaml: config.yaml diff --git a/fairseq/examples/speech_to_speech/benchmarking/configs/DirectS2U.yaml b/fairseq/examples/speech_to_speech/benchmarking/configs/DirectS2U.yaml new file mode 100644 index 0000000..96264ce --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/configs/DirectS2U.yaml @@ -0,0 +1,22 @@ +general: + dataset_path: $npy_dataset_path + cpu: True + model_type: S2UT + dataset_size: 5 + dump_speech_waveforms_dir: $dump_waveforms_dir_path + +stage1: + data: $data_bin + task: speech_to_speech + path: $checkpoint + config_yaml: config.yaml + max_len_b: 100000 + beam: 10 + target_is_code: True + max_target_positions: 3000 + target_code_size: 100 + +stage2: + vocoder: $vocoder_path + vocoder_cfg: $vocoder_cfg_json + dur_prediction: True diff --git a/fairseq/examples/speech_to_speech/benchmarking/configs/S2T.yaml b/fairseq/examples/speech_to_speech/benchmarking/configs/S2T.yaml new file mode 100644 index 0000000..3a106a0 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/configs/S2T.yaml @@ -0,0 +1,13 @@ +general: + dataset_path: $npy_dataset + cpu: True + model_type: S2T + dataset_size: 1 + +stage1: + data: $data_bin + task: speech_to_text + path: $checkpoint + config_yaml: config.yaml + max_len_a: 2 + max_len_b: 500 diff --git a/fairseq/examples/speech_to_speech/benchmarking/core.py b/fairseq/examples/speech_to_speech/benchmarking/core.py new file mode 100644 index 0000000..da22a34 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/core.py @@ -0,0 +1,487 @@ +import timeit +import logging +import torch +from pypapi import events, papi_high as high +from memory_profiler import memory_usage +from torch import nn +from argparse import Namespace +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.data import data_utils as fairseq_data_utils +from fairseq import checkpoint_utils, tasks, utils +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder +from examples.hubert.simple_kmeans.dump_hubert_feature import HubertFeatureReader +from examples.hubert.simple_kmeans.dump_km_label import ApplyKmeans +from fairseq_cli.generate import get_symbols_to_strip_from_output +import soundfile as sf +import ast +import json + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +torch.manual_seed(1) +torch.set_deterministic(True) + + +class BenchmarkingBase(nn.Module): + def __init__(self): + nn.Module.__init__(self) + self.s2x_task = None + + def warm_up(self, sample, repeat): + """Warm up the model""" + for _i in range(repeat): + self.forward(sample) + logger.info(f"Model warmed up by running inference {repeat} times") + + def benchmark_run_time(self, dataset, repeat): + """Benchmark average runtime for the model by calling benchmark_run_time_single_sample function""" + logger.info("Starting run time benchmarking") + time_elapsed = 0 + for i, sample in enumerate(dataset): + time_elapsed += self.benchmark_run_time_single_sample(sample, repeat=repeat) + if i % 100 == 0: + logger.info(f"Benchmarked run time for {i}/{len(dataset)} samples") + total_time_elapsed = time_elapsed / len(dataset) + return total_time_elapsed + + def benchmark_run_time_single_sample(self, sample, repeat): + """Benchmark average runtime for a single sample using timeit library. Units are seconds""" + timer = timeit.Timer(lambda: self.forward(sample)) + time_elapsed = timer.timeit(repeat) + return time_elapsed / repeat + + def count_flops( + self, + dataset, + repeat, + ): + """Use PYPAPI library to count average flops for model inference. + Note: It only works if the model is being run on cpu""" + logger.info("Starting flop counter") + high.start_counters([events.PAPI_DP_OPS]) + for i, sample in enumerate(dataset): + for _r in range(repeat): + self.forward(sample) + if i % 100 == 0: + logger.info(f"Counted flops for {i}/{len(dataset)} samples") + flops = high.stop_counters() + flops = round(flops[0] / (repeat * len(dataset))) + return flops + + def max_memory(self, dataset, repeat): + """Compute average max memory consumed by model inference. Units are MiB""" + logger.info("Starting memory benchmarking") + total_memory = 0 + for i, sample in enumerate(dataset): + for _r in range(repeat): + total_memory += max(memory_usage((self.forward, (sample,), {}))) + if i % 100 == 0: + logger.info(f"Benchmarked memory for {i}/{len(dataset)} samples") + total_memory = total_memory / (repeat * len(dataset)) + return total_memory + + def gather_all_metrics(self, dataset, repeat): + run_time = self.benchmark_run_time(dataset, repeat) + max_memory = self.max_memory(dataset, repeat) + flops = self.count_flops(dataset, repeat) + + return run_time, max_memory, flops + + def dump_final_speech_output( + self, dataset, output_dir, resample_fn, sample_rate, prefix=None + ): + + for i, sample in enumerate(dataset): + hypo = self.forward(sample)[0] + + def to_np(x): + return x.detach().cpu().numpy() + + try: + wave_preds = to_np(resample_fn(hypo["waveform"])) + sf.write( + f"{output_dir}/{prefix}_{i}_pred.wav", + wave_preds, + sample_rate, + ) + except Exception as e: + raise Exception( + f" Encountered {e} - Invalid waveform. Make sure the model outputs a waveform" + ) + + +class Processing(BenchmarkingBase): + """Class similar to fairseq_cli/generate.py. Supports ASR, MT and ST model inference""" + + def __init__(self, args): + super().__init__() + self.use_cuda = not getattr(args, "cpu", False) + self.setUp(args) + self.training = False + self.s2x_task = self.task + + def setUp(self, cfg): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + self.task = tasks.setup_task(cfg.task) + self.tgt_dict = self.task.target_dictionary + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, _ = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides={}, + task=self.task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=False, + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + if len(models) > 1: + raise Exception("Currently loading multiple models is not supported") + self.model = models[0] + + # Optimize model for generation + if cfg.common.fp16: + self.model.half() + if self.use_cuda: + self.model.cuda() + self.model.prepare_for_inference_(cfg) + + self.generator = self.task.build_generator( + [self.model], + cfg.generation, + extra_gen_cls_kwargs={}, + ) + # Handle tokenization and BPE + self.tokenizer = self.task.build_tokenizer(cfg.tokenizer) + self.bpe = self.task.build_bpe(cfg.bpe) + self.remove_bpe = cfg.common_eval.post_process + + def encode_source(self, src): + """Method to generate source tokens from a string""" + if self.tokenizer is not None: + src = self.tokenizer.encode(src) + if self.bpe is not None: + src = self.bpe.encode(src) + src_tokens = self.task.source_dictionary.encode_line(src).long() + src_lens = src_tokens.size(0) + return { + "net_input": { + "src_tokens": src_tokens.view(1, src_lens), + "src_lengths": torch.tensor([src_lens]), + } + } + + def decode_target(self, hypos): + """Method to decode target string from tokens""" + hypo_str = self.tgt_dict.string( + hypos[0][0]["tokens"].int().cpu(), + self.remove_bpe, + get_symbols_to_strip_from_output(self.generator), + ) + if self.bpe is not None: + hypo_str = self.bpe.decode(hypo_str) + if self.tokenizer is not None: + hypo_str = self.tokenizer.decode(hypo_str) + return hypo_str + + def forward(self, sample): + hypos = self.task.inference_step( + self.generator, + [self.model], + sample, + prefix_tokens=None, + constraints=None, + ) + return hypos + + +class GenerateWaveformFromCode(BenchmarkingBase): + """Class to support waveform generation from code. Currently, vocoder only supports single speaker""" + + def __init__(self, args): + super().__init__() + with open(args.vocoder_cfg) as f: + vocoder_cfg = json.load(f) + self.dur_prediction = args.dur_prediction + self.vocoder = CodeHiFiGANVocoder(args.vocoder, vocoder_cfg) + + def format_units(self, input): + code = torch.LongTensor(list(map(int, input.strip().split()))).view(1, -1) + return {"code": code} + + def generate_vocoder_input(self, dataset): + return [self.format_units(sample) for sample in dataset] + + def forward(self, sample): + return [{"waveform": self.vocoder(sample, self.dur_prediction)}] + + +class HubertUnitExtractor(BenchmarkingBase): + def __init__(self, args): + self.feature_reader = HubertFeatureReader( + args.hubert_ckpt_path, args.hubert_layer + ) + self.kmeans = ApplyKmeans(args.hubert_km_path) + + def forward(self, sample): + with torch.no_grad(): + feat = [] + for start in range(0, sample.size(1), self.feature_reader.max_chunk): + x_chunk = sample[:, start : start + self.max_chunk] + feat_chunk, _ = self.feature_reader.model.extract_features( + source=x_chunk, + padding_mask=None, + mask=False, + output_layer=self.layer, + ) + feat.append(feat_chunk) + torch.cat(feat, 1).squeeze(0) + return self.kmeans(feat).tolist() + + +class SpeechGeneration(BenchmarkingBase): + """Class similar to examples/text_to_speech/generate_waveform.py. + Supports models with speech generation as end goal (TTS, Direct S2ST models etc)""" + + def __init__(self, args): + super().__init__() + self.use_cuda = not getattr(args, "cpu", False) + self.setUp(args) + self.s2x_task = self.task + + def setUp(self, args): + if args.task == "speech_to_speech": + args.normalize_waveform = False + self.task = tasks.setup_task(args) + self.pre_tokenizer = self.task.build_tokenizer(args) + self.bpe_tokenizer = self.task.build_bpe(args) + try: + self.src_dict = self.task.src_dict + except Exception: + self.src_dict = None + ensemble, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [args.path], + arg_overrides=ast.literal_eval(args.model_overrides), + task=self.task, + strict=False, + ) + self.model = ensemble[0] + if self.use_cuda: + self.model.cuda() + # criterion.cuda() + self.model.eval() + self.generator = self.task.build_generator( + [self.model], + args, + ) + + def processTextInput(self, text): + """Generate source tokens from text input""" + if self.pre_tokenizer is not None: + text = self.pre_tokenizer.encode(text) + if self.bpe_tokenizer is not None: + text = self.bpe_tokenizer.encode(text) + target = self.src_dict.encode_line( + text, add_if_not_exist=False, append_eos=True + ).long() + target = fairseq_data_utils.collate_tokens( + [target], + self.src_dict.pad(), + self.src_dict.eos(), + left_pad=False, + move_eos_to_beginning=False, + ) + src_lengths = torch.tensor([target.size(1)], dtype=torch.long) + prev_output_tokens = None + sample = { + "net_input": { + "src_tokens": target, + "src_lengths": src_lengths, + "prev_output_tokens": prev_output_tokens, + } + } + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + return sample + + def forward(self, sample): + sample["speaker"] = None + output = self.generator.generate(self.model, sample) # , has_targ=False + return output + + +class S2UT(BenchmarkingBase): + """Class to support S2UT models. Also supports generating waveforms from the units predicted""" + + def __init__(self, s2u_args, vocoder_args=None): + super().__init__() + self.s2u = Processing(s2u_args) + self.vocoder = None + if vocoder_args: + self.vocoder = GenerateWaveformFromCode(vocoder_args) + self.vocoder_input = None + + def forward(self, sample): + s2u_hypos = self.s2u(sample) + s2u_output = self.s2u.decode_target(s2u_hypos) + if not self.vocoder: + return s2u_output + units = self.vocoder.format_units(s2u_output) + vocoder_output = self.vocoder(units) + return vocoder_output + + def generate_s2u_outputs(self, dataset): + return [self.s2u.decode_target(self.s2u(sample)) for sample in dataset] + + def compute_metrics(self, metric_type, dataset, repeat=None): + """Generic function to compute metrics ignoring the io processing time""" + if self.vocoder and not self.vocoder_input: + self.s2u_output = self.generate_s2u_outputs(dataset) + self.vocoder_input = self.vocoder.generate_vocoder_input(self.s2u_output) + + s2u_metrics = getattr(self.s2u, metric_type)( + dataset, + repeat, + ) + vocoder_metrics = 0 + if self.vocoder: + vocoder_metrics = getattr(self.vocoder, metric_type)( + self.vocoder_input, + repeat, + ) + print( + f"metric_type = {metric_type} s2u_metrics = {s2u_metrics} \t vocoder_metrics = {vocoder_metrics}" + ) + if metric_type == "max_memory": + return max(s2u_metrics, vocoder_metrics) + else: + return s2u_metrics + vocoder_metrics + + def benchmark_run_time(self, dataset, repeat): + return self.compute_metrics("benchmark_run_time", dataset, repeat) + + def count_flops(self, dataset, repeat): + return self.compute_metrics("count_flops", dataset, repeat) + + def max_memory(self, dataset, repeat): + return self.compute_metrics("max_memory", dataset, repeat) + + +class Cascaded2StageS2ST(BenchmarkingBase): + """ST + TTS""" + + def __init__(self, s2t_args, tts_args): + super().__init__() + self.s2t = Processing(s2t_args) + self.s2x_task = self.s2t.task + self.tts = SpeechGeneration(tts_args) if tts_args else None + self.training = False + self.tts_inputs = None + + def forward(self, sample): + if not self.tts: + raise Exception( + "Forward function is not callable without tts. Reinitialize the class with tts_args" + ) + s2t_hypos = self.s2t(sample) + s2t_output = self.s2t.decode_target(s2t_hypos) + tts_input = self.tts.processTextInput(s2t_output) + tts_output = self.tts(tts_input) + return tts_output + + def generate_s2t_outputs(self, dataset): + """Process dataset and generate s2t outputs""" + return [self.s2t.decode_target(self.s2t(sample)) for sample in dataset] + + def generate_tts_inputs(self, dataset): + """Process dataset and generate tts inputs""" + return [self.tts.processTextInput(sample) for sample in dataset] + + def compute_metrics(self, metric_type, dataset, repeat=None): + """Generic function to compute metrics ignoring the io processing time""" + if not self.tts_inputs: + s2t_outputs = self.generate_s2t_outputs(dataset) + self.tts_inputs = self.generate_tts_inputs(s2t_outputs) + + s2t_metrics = getattr(self.s2t, metric_type)( + dataset, + repeat, + ) + + tts_metrics = getattr(self.tts, metric_type)( + self.tts_inputs, + repeat, + ) + print( + f"metric_type = {metric_type} s2t_metrics = {s2t_metrics} \t tts_metrics = {tts_metrics}" + ) + if metric_type == "max_memory": + return max(s2t_metrics, tts_metrics) + else: + return s2t_metrics + tts_metrics + + def benchmark_run_time(self, dataset, repeat): + return self.compute_metrics("benchmark_run_time", dataset, repeat) + + def count_flops(self, dataset, repeat): + return self.compute_metrics("count_flops", dataset, repeat) + + def max_memory(self, dataset, repeat): + return self.compute_metrics("max_memory", dataset, repeat) + + +class Cascaded3StageS2ST(Cascaded2StageS2ST): + """ASR + MT + TTS""" + + def __init__(self, s2t_args, tts_args, mt_args): + super().__init__(s2t_args, tts_args) + self.mt = Processing(mt_args) + self.mt_inputs = [] + + def forward(self, sample): + s2t_hypos = self.s2t(sample) + s2t_output = self.s2t.decode_target(s2t_hypos) + mt_input = self.mt.encode_source(s2t_output) + mt_hypos = self.mt(mt_input) + mt_output = self.mt.decode_target(mt_hypos) + tts_input = self.tts.processTextInput(mt_output) + tts_output = self.tts(tts_input) + return tts_output + + def generate_mt_inputs(self, dataset): + """Process dataset to generate mt model inputs""" + return [self.mt.encode_source(sample) for sample in dataset] + + def generate_mt_outputs(self, dataset): + """Process dataset to generate mt model outputs""" + return [self.mt.decode_target(self.mt(sample)) for sample in dataset] + + def compute_metrics(self, metric_type, dataset, repeat=None): + """Generic function to compute metrics ignoring the io processing time""" + if not self.tts_inputs: + s2t_outputs = self.generate_s2t_outputs(dataset) + self.mt_inputs = self.generate_mt_inputs(s2t_outputs) + mt_outputs = self.generate_mt_outputs(self.mt_inputs) + self.tts_inputs = self.generate_tts_inputs(mt_outputs) + + s2t_metrics = getattr(self.s2t, metric_type)( + dataset, + repeat, + ) + mt_metrics = getattr(self.mt, metric_type)(self.mt_inputs, repeat) + tts_metrics = getattr(self.tts, metric_type)( + self.tts_inputs, + repeat, + ) + print( + f"metric_type = {metric_type} s2t_metrics = {s2t_metrics} \t mt_metrics = {mt_metrics} \t tts_metrics = {tts_metrics}" + ) + if metric_type == "max_memory": + return max(s2t_metrics, mt_metrics, tts_metrics) + else: + return s2t_metrics + mt_metrics + tts_metrics diff --git a/fairseq/examples/speech_to_speech/benchmarking/data_utils.py b/fairseq/examples/speech_to_speech/benchmarking/data_utils.py new file mode 100644 index 0000000..c73a599 --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/data_utils.py @@ -0,0 +1,264 @@ +from fairseq import tasks +import numpy as np +import logging +import random +from fairseq import options +import torch +import os +import soundfile as sf + +from fairseq.data.audio.audio_utils import ( + get_waveform, + parse_path, +) + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +random.seed(1) +np.random.seed(1) +random_number_generator = np.random.RandomState(30) + + +def generate_random_data_sample(T, B=1, D=80): + """Generate random data sample given the T, B, D values""" + net_input = { + "src_tokens": torch.tensor(random_number_generator.randn(B, T, D)).float(), + "src_lengths": torch.tensor([T]), + } + return {"net_input": net_input} + + +def generate_random_dataset(T_range_min, T_range_max, B=1, D=80, dataset_size=100): + """Generate random dataset with T values within a given range, B, D""" + T_values = [random.randint(T_range_min, T_range_max) for i in range(dataset_size)] + dataset = [] + for t in T_values: + dataset.append(generate_random_data_sample(t, B, D)) + return dataset, sum(T_values) / dataset_size + + +def load_dataset_npy(file_name, dataset_size=None): + """Load dataset from a .npy file.""" + data = np.load(file_name, allow_pickle=True) + if dataset_size: + data = data[:dataset_size] + return data + + +def load_dataset_raw_to_waveforms( + file_name, + dataset_size=None, + need_waveform=True, + sample_rate=16000, + read_using_soundfile=False, +): + """Load raw dataset from w2v tsv file. Optionally get waveforms""" + data = [] + with open(file_name, "r") as fp: + lines = fp.readlines() + data = [ + os.path.join(lines[0].strip(), line.strip().split("\t")[0]) + for line in lines[1:] + ] + + if dataset_size: + data = data[:dataset_size] + + if not need_waveform: + return data + + features = [] + if read_using_soundfile: + for _i, d in enumerate(data): + wav = sf.read(d)[0] + if wav.ndim == 2: + wav = wav.mean(-1) + features.append(torch.from_numpy(wav).float().view(1, -1)) + else: + for i, d in enumerate(data): + _path, slice_ptr = parse_path(d) + if len(slice_ptr) == 0: + feat = get_waveform( + _path, always_2d=True, output_sample_rate=sample_rate + )[0] + features.append( + { + "id": i, + "net_input": { + "src_tokens": torch.tensor(feat), + "src_lengths": torch.tensor([feat.shape[1]]), + }, + } + ) + else: + raise Exception("Currently unsupported data format") + return features + + +def load_dataset_task( + args, + batch_size=1, + limit_size=None, + ref_dataset=None, +): + """Loads dataset based on args by creating a task""" + if not args.data or not args.subset or not args.task: + raise Exception( + "Please provide necessary arguments to load the dataset - data, subset and task" + ) + task = tasks.setup_task(args) + + task.load_dataset(args.subset) + if not limit_size: + limit_size = len(task.dataset(args.subset)) + + iter = task.get_batch_iterator( + dataset=task.dataset(args.subset), max_sentences=batch_size + ).next_epoch_itr(shuffle=False) + dataset = [] + for i, sample in enumerate(iter): + sample = { + "id": task.datasets[args.subset].ids[sample["id"].item()], + "net_input": { + "src_tokens": sample["net_input"]["src_tokens"], + "src_lengths": sample["net_input"]["src_lengths"], + }, + } + dataset.append(sample) + if i == limit_size - 1: + break + + if ref_dataset: + try: + ids = get_ids_from_dataset(ref_dataset) + except Exception as e: + raise Exception(f"{e} - Cannot extract ids from reference dataset") + + filtered_dataset = [] + for sample in dataset: + if ( + sample["id"] in ids + or sample["id"][5:] in ids + or f"dev_{sample['id']}" in ids + ): + filtered_dataset.append(sample) + dataset = filtered_dataset + + max_len, min_len, avg_len = get_dataset_stats(dataset) + print( + f"{args.subset} dataset stats : num_samples={len(dataset)} max_len = {max_len} min_len = {min_len} avg_len = {avg_len}" + ) + + return dataset + + +def randomly_sample_subset(dataset, size=500): + """Randomly sample subset from a dataset""" + random_indices = [random.randint(0, len(dataset) - 1) for i in range(size)] + return [dataset[i] for i in random_indices] + + +def get_short_data_subset(dataset, size=500): + """Get a subset of desired size by sorting based on src_lengths""" + return sort_dataset(dataset)[:size] + + +def get_long_data_subset(dataset, size=500): + """Get a subset of desired size by sorting based on src_lengths descending""" + return sort_dataset(dataset, reverse=True)[:size] + + +def sort_dataset(dataset, reverse=False): + return sorted( + dataset, key=lambda x: x["net_input"]["src_lengths"].item(), reverse=reverse + ) + + +def save_dataset_npy(dataset, file_name): + """Save a dataset as .npy file""" + np.save(file_name, dataset) + + +def get_dataset_stats(dataset): + """Get stats about dataset based on src_lengths of samples""" + max_len = 0 + min_len = 100000 + avg_len = 0 + for d in dataset: + max_len = max(max_len, d["net_input"]["src_lengths"].item()) + min_len = min(min_len, d["net_input"]["src_lengths"].item()) + avg_len += d["net_input"]["src_lengths"].item() + + return max_len, min_len, avg_len / len(dataset) + + +def make_parser(): + """ + Additional args: + 1. Provide the dataset dir path using --data. + 2. Loading the dataset doesn't require config, provide --config-yaml to apply additional feature transforms + """ + parser = options.get_speech_generation_parser() + parser.add_argument( + "--subset", + default=None, + type=str, + required=True, + help="Subset to use for dataset generation", + ) + parser.add_argument( + "--dataset-save-dir", + default=None, + type=str, + required=False, + help="Dir path in which the datasets are to be saved", + ) + parser.add_argument( + "--ref-dataset", + default=None, + type=str, + required=False, + help="If provided, the ids in the reference dataset will be used to filter the new dataset generated.", + ) + parser.add_argument("--dataset-save-token", default="", type=str, required=False) + + options.add_generation_args(parser) + return parser + + +def get_ids_from_dataset(dataset): + return {sample["id"]: 1 for sample in dataset} + + +def cli_main(): + parser = make_parser() + args = options.parse_args_and_arch(parser) + dataset = load_dataset_task(args) + + random_dataset = randomly_sample_subset(dataset) + short_dataset = get_short_data_subset(dataset) + long_dataset = get_long_data_subset(dataset) + + if args.dataset_save_token: + args.dataset_save_token = f"_{args.dataset_save_token}_" + + if args.dataset_save_dir: + save_dataset_npy( + random_dataset, + f"{args.dataset_save_dir}/random_dataset{args.dataset_save_token}w_ids.npy", + ) + save_dataset_npy( + short_dataset, + f"{args.dataset_save_dir}/short_dataset{args.dataset_save_token}w_ids.npy", + ) + save_dataset_npy( + long_dataset, + f"{args.dataset_save_dir}/long_dataset{args.dataset_save_token}w_ids.npy", + ) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_to_speech/benchmarking/get_metrics.py b/fairseq/examples/speech_to_speech/benchmarking/get_metrics.py new file mode 100644 index 0000000..773257f --- /dev/null +++ b/fairseq/examples/speech_to_speech/benchmarking/get_metrics.py @@ -0,0 +1,162 @@ +import copy +import torch +import logging +from argparse import Namespace +import yaml +from fairseq import options +from examples.speech_to_speech.benchmarking.core import ( + Processing, + SpeechGeneration, + Cascaded2StageS2ST, + Cascaded3StageS2ST, + S2UT, +) +from examples.speech_to_speech.benchmarking.data_utils import ( + load_dataset_npy, + load_dataset_raw_to_waveforms, +) + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +torch.manual_seed(1) +torch.set_deterministic(True) + + +def make_parser(): + """Note: As the names indicate use s2x_args(ex:ST, ASR etc) for models with speech input, + x2s_args for models with speech output(ex:TTS) and mt_args for translation models (ex: mt, T2U etc). + For direct S2ST models, use x2s_args to provide model details. + """ + parser = options.get_speech_generation_parser() + parser.add_argument("--target-is-code", action="store_true", default=False) + parser.add_argument("--config", type=str) + parser.add_argument( + "--model-type", + default="S2U", + choices=["S2S", "TTS", "S2UT", "MT", "S2T", "2StageS2ST", "3StageS2ST"], + help="Choose one of the models. For model inference implementation, refer to core.py", + ) + parser.add_argument( + "--dataset-path", + type=str, + help="""File to load dataset from. Assumes dataset is a list of samples. + Each sample is a dict of format {'net_input':{'src_tokens':torch.tenor(),'src_lengths':torch.tensor()}}""", + ) + parser.add_argument( + "--dataset-type", + type=str, + default="npy", + choices=["npy", "raw"], + help="""Type of input dataset file""", + ) + parser.add_argument( + "--read-using-sf", + type=str, + default=False, + help="""If sound file should be used to read the raw dataset""", + ) + parser.add_argument( + "--dataset-size", + default=None, + type=int, + help="Dataset size to use for benchmarking", + ) + parser.add_argument( + "--dump-speech-waveforms-dir", + default=None, + type=str, + help="Directory to dump the speech waveforms computed on the dataset.", + ) + parser.add_argument( + "--dump-waveform-file-prefix", + default="", + type=str, + help="File name prefix for the saved speech waveforms", + ) + parser.add_argument( + "--feat-dim", default=80, type=int, help="Input feature dimension" + ) + parser.add_argument( + "--target-sr", + default=16000, + type=int, + help="Target sample rate for dumping waveforms", + ) + + options.add_generation_args(parser) + options.get_interactive_generation_parser(parser) + return parser + + +def cli_main(): + parser = make_parser() + args = options.parse_args_and_arch(parser) + + with open( + args.config, + "r", + ) as f: + config = yaml.load(f, Loader=yaml.FullLoader) + dict_args = vars(args) + dict_args.update(config["general"]) + args = Namespace(**dict_args) + + i = 1 + stage_args = [] + while i <= 3: + var = f"stage{i}" + tmp_args = copy.deepcopy(dict_args) + if var in config: + tmp_args.update(config[var]) + stage_args.append(Namespace(**tmp_args)) + i += 1 + else: + break + + if args.model_type == "S2S" or args.model_type == "TTS": + model = SpeechGeneration(stage_args[0]) + elif args.model_type == "S2UT": + model = S2UT(stage_args[0], stage_args[1] if len(stage_args) > 1 else None) + elif args.model_type == "MT" or args.model_type == "S2T": + model = Processing(stage_args[0]) + elif args.model_type == "2StageS2ST": + model = Cascaded2StageS2ST(stage_args[0], stage_args[1]) + elif args.model_type == "3StageS2ST": + model = Cascaded3StageS2ST(stage_args[0], stage_args[2], stage_args[1]) + else: + raise Exception(f"Currently unsupported model type {args.model_type}") + + print(f"Evaluating on dataset - {args.dataset_path}\n") + + if args.dataset_type == "npy": + dataset = load_dataset_npy(args.dataset_path, dataset_size=args.dataset_size) + elif args.dataset_type == "raw": + dataset = load_dataset_raw_to_waveforms( + args.dataset_path, + dataset_size=args.dataset_size, + read_using_soundfile=args.read_using_sf, + ) + else: + raise Exception(f"Invalid dataset type {args.dataset_type}") + + model.warm_up(sample=dataset[0], repeat=2) + + run_time, memory, flops = model.gather_all_metrics(dataset, repeat=1) + print(f"run_time = {run_time}sec \tmemory = {memory}MiB \tflops = {flops}") + + if args.dump_speech_waveforms_dir: + model.dump_final_speech_output( + dataset, + args.dump_speech_waveforms_dir, + lambda x: x, + args.target_sr, + prefix=args.dump_waveform_file_prefix, + ) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_to_speech/docs/data_augmentation.md b/fairseq/examples/speech_to_speech/docs/data_augmentation.md new file mode 100644 index 0000000..c0c17ff --- /dev/null +++ b/fairseq/examples/speech_to_speech/docs/data_augmentation.md @@ -0,0 +1,435 @@ +# Noise and audio augmentation techniques + +The noise and data augmentation techniques were written in an effort to understand how augmenatation can affect model robustness and performance in both clean and noisy settings. + +All transforms discussed in this section are subclasses of `AudioFeatureTransform`, `AudioWaveformTransform`, or `AudioDatasetTransform`. Each `Audio*Transform` has unique interaction with the data. If interested in implemented one's own transforms, it is highly advisable to review the differences (see [Adding your own transforms](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#adding-your-own-transforms)). If only applying the in-built transforms, then one only needs to be mindful that the correct kind of transform is listed in the config (see [Using transforms](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#using-transforms)). These transforms can be applied to instances of `SpeechToTextDataset`. + +### Contents +[In-built transforms](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#in-built-transforms) + +[Benchmark studies](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#benchmark-studies) + +[Using transforms](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#using-transforms) + +[Adding your own transforms](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/data_augmentation.md#adding-your-own-transforms) + + +## In-built transforms +### 1. Utterance concatenation +Utterance concatenation is a data augmenation technique introduced as ConcatAug in [Translatotron 2: High-quality direct speech-to-speech translation +with voice preservation](https://arxiv.org/pdf/2107.08661.pdf). +With some parameterized probability, samples are concatenated with one other randomly chosen sample from the whole dataset. In the positive (concatenation) case, accessing `dataset[i]` will return a `SpeechToTextDatasetItem` where `source=source[i]+source[j]` and `target=target[i]+target[j]`. In the negative (skip concatenation) case, accessing `dataset[i]` will return a `SpeechToTextDatasetItem` where `source=source[i]` and `target=target[i]` as usual. + +**Usage**: `concataugment` is an `AudioDatasetTransform` and has three configurable hyperparameters: +- `rate`: probability that any single access will result in the positive (concatenation) case. Defaults to 0.25. +- `max_tokens`: maximum number of tokens allowed for concatenated source sequences. This parameter is meant to limit the length of concatenated samples to avoid out-of-memory errors. Defaults to 300. +- `attempts`: maximum number of invalid concatenation attempts before defaulting to the negative (skip concatenation) case. This parameter aims to limit excessive time spent trying to find candidate samples that are short enough to concatenate with. Defaults to 5. + +Please be wary of OOMs while using this augmentation technique; we used smaller batch sizes as a workaround to avoid OOMs. Batch size is determined by update frequency, batch size hyperparameter, and the number of GPU, so you may want to alter these to this end. + +### 2. Noise augmentation suite + +The four noise augmentation methods in this suite adhere to the following principle: with some parameterized probability, samples are overlayed with a noise track. The content of the noise track is specific to the method. Signal-to-noise ratio with which the noise track is overlayed is determined by choosing a value from a random uniform distribution with parameterized endpoints. The first three methods are based off data augmentation methods suggested in Section 3.3 of [X-Vectors: Robust DNN Embeddings for Speaker Recognition](https://danielpovey.com/files/2018_icassp_xvectors.pdf). + +#### 2.1. Music augmentation +For music augmentation, the noise track consists of one file uniformly randomly selected from a corpus of music files. The music file is cut to size, including being repeated to fill the original sample length if necessary. + +**Usage**: `musicaugment` is an `AudioWaveformTransform` and has four configurable hyperparameters: +- `samples_path`: path where background music files are saved as audios (.wav files). No default. +- `rate`: probability that any single access will result in the positive (background music) case. Defaults to 0.25. +- `snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 5. +- `snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 15. + +#### 2.2. Babble augmentation +For babble augmentation, the noise track consists of multiple audios uniformly randomly selected from a corpus of speech files. The number of speech audios in the background track is chosen randomly with equal probability between 3 and 7 audios. + +**Usage**: `babbleaugment` is an `AudioWaveformTransform` and has four configurable hyperparameters: +- `samples_path`: path where background speech files are saved as audios (.wav files). No default. +- `rate`: probability that any single access will result in the positive (background speech) case. Defaults to 0.25. +- `snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 5. +- `snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 15. + +#### 2.3. Sporadic noise augmentation +For sporadic noise augmentation, the noise track is mostly silent except for intermittent short clips of noise which are added at roughly a parameterized frequency. These clips are randomly chosen and cut from a corpus of noise files to lengths according to a parameterized Gaussian distribution. + +**Usage**: `sporadicnoiseaugment` is an `AudioWaveformTransform` and has seven configurable hyperparameters: +- `samples_path`: path where background noise files are saved as audios (.wav files). No default. +- `rate`: probability that any single access will result in the positive (add a sporadic noise track) case. Defaults to 0.25. +- `snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 5. +- `snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 15. +- `noise_rate`: rate in noises per second at which noise clip will be added to the original sample +- `noise_len_mean`: mean of Gaussian normal distribution from which length of noise clip is chosen +- `noise_len_std`: standard deviation of Gaussian normal distribution from which length of noise clip is chosen + +#### 2.4. Background noise augmentation +For background noise augmentation, the noise track is a single track uniformly randomly selected from a corpus of noise files. The noise file is cut to size, including being repeated to fill the original sample length if necessary. + +**Usage**: `backgroundnoiseaugment` is an `AudioWaveformTransform` and has four configurable hyperparameters: +- `samples_path`: path where background noise files are saved as audios (.wav files). No default. +- `rate`: probability that any single access will result in the positive (background noise) case. Defaults to 0.25. +- `snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 5. +- `snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 15. + +### 3. Mixed babble and background noise augmentation with recognizable source speaker + +This augmentation technique is based on Algorithm 1 in [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900) and is similar to the noise augmentation suite techniques in that it has a background noise track. The noise track consists of either (1) another audio sample from the batch or (2) a background noise track. A key difference is the length of the noise track is chosen from a uniform random distribution between 0 and half of the original sample length. + +**Usage**: `noisyoverlapaugment` is an `AudioDatasetTransform` and has seven configurable hyperparameters: +- `noises_path`: path where background noise files are saved as audios (.wav files). No default. +- `rate`: probability that any single access will result in the positive (background noise) case. Defaults to 0.25. +- `mixing_noise_rate`: probability that in a positive (background noise) case, the noise track will consist of background noise (rather than babble from the batch). Defaults to 0.1. +- `noise_snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to -5. +- `noise_snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add background noise to the original source. Defaults to 5. +- `utterance_snr_min`: lower endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add **another audio from the batch** to the original source. Defaults to -5. +- `utterance_snr_max`: higher endpoint of the range from which a signal-to-noise ratio is uniformly randomly chosen with which to add **another audio from the batch** to the original source. Defaults to 5. + +## Benchmark studies +### Evaluation on clean data +Augmentation in training data|Hyperparameters|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|---|--- +None||3.954|24.984|23.962|24.448 +ConcatAugment|rate = 0.25, max_tokens = 3000, attempts = 5|3.940|25.322|26.124|26.19 +BabbleAugment|rate = 0.25, MUSAN speech, snr_min = (-5), snr_max = 5|3.957|24.226|23.186|22.368| +BackgroundNoiseAugment|rate = 0.1, MUSAN noises, snr_min = (-10), snr_max = 10|3.955|24.745|23.513|23.819 +MusicAugment|rate = 0.25, MUSAN music, snr_min = 0, snr_max = 20|3.954|25.096|24.301|23.341| +SporadicNoiseAugment|rate = 0.1, noise_rate = 0.25, MUSAN noises, snr_min = 10, snr_max = 35|3.954|24.924|23.951|23.484| +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|as above, except limited rates to sum to 0.25: music (0.074), background (0.029), babble (0.074), sporadic (0.029)|3.953|24.874|23.675|24.249| +NoisyOverlapAugment|rate = 0.25, mixing_noise_rate = 0.5, MUSAN noises, utterance_snr_min = (-10), utterance_snr_max = 0, noise_snr_min = (-5), noise_snr_max = 20|3.954|24.949|24.015|23.768| + +### Evaluation on data with music noise added at SNR = (-5) - 5 +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|15.785|21.105|16.944 +ConcatAugment|3.940|17.186|23.255|18.24 +BabbleAugment|3.957|19.158|22.064|17.116 +BackgroundNoiseAugment|3.955|17.777|22.0|17.535| +MusicAugment|3.954|20.345|23.126|19.433| +SporadicNoiseAugment|3.954|15.927|21.382|14.736| +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|19.724|22.659|17.852| +NoisyOverlapAugment|3.954|17.49|22.142|17.207| + +### Evaluation on data with babble noise added at SNR = (-5) - 5 +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|4.092|13.514|5.13 +ConcatAugment|3.940|5.493|15.835|6.893 +BabbleAugment|3.957|16.12|21.097|13.996 +BackgroundNoiseAugment|3.955|4.691|15.784|5.982 +MusicAugment|3.954|8.06|17.764|9.008 +SporadicNoiseAugment|3.954|4.009|13.935|4.814 +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|14.692|20.882|14.45 +NoisyOverlapAugment|3.954|4.032|16.434|7.284 + +### Evaluation on data with sporadic noise added at SNR = (-5) - 5 +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|23.778|23.745|22.748 +ConcatAugment|3.940|24.239|25.907|25.723 +BabbleAugment|3.957|23.42|23.048|21.076 +BackgroundNoiseAugment|3.955|23.998|23.467|22.494 +MusicAugment|3.954|24.142|24.181|19.143 +SporadicNoiseAugment|3.954|23.97|23.894|22.61 +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|24.118|23.59|23.717 +NoisyOverlapAugment|3.954|24.265|24.103|23.167 + +### Evaluation on data with background noise added at SNR = (-5) - 5 +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|20.201|22.525|19.66 +ConcatAugment|3.940|20.904|24.706|21.353 +BabbleAugment|3.957|20.687|22.374|18.907 +BackgroundNoiseAugment|3.955|21.574|22.998|20.043 +MusicAugment|3.954|21.65|23.529|19.87 +SporadicNoiseAugment|3.954|20.578|22.577|19.096 +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|21.811|23.144|20.986 +NoisyOverlapAugment|3.954|21.312|23.153|20.302 + +### Evaluation on data with all four types of noises added at SNR = (-5) - 5, each applied with prob 0.5 +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|10.895|19.319|12.748 +ConcatAugment|3.940|13.517|21.658|15.428 +BabbleAugment|3.957|18.09|21.384|16.018 +BackgroundNoiseAugment|3.955|12.837|20.719|13.933 +MusicAugment|3.954|16.589|21.823|15.927 +SporadicNoiseAugment|3.954|11.238|19.91|13.31 +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|18.636|21.935|17.845 +NoisyOverlapAugment|3.954|12.829|20.856|15.048 + +### Evaluation on data with noisy overlap augment +Augmentation in training data|Training loss|BLEU (covost)|BLEU (epst)|BLEU (mtedx) +---|---|---|---|--- +None|3.954|21.245|22.24|20.994 +ConcatAugment|3.940|21.611|24.247|23.068 +BabbleAugment|3.957|21.867|21.987|20.099| +BackgroundNoiseAugment|3.955|21.533|21.806|19.717| +MusicAugment|3.954|21.823|22.643|20.847| +SporadicNoiseAugment|3.954|21.373|22.381|20.672| +MusicAugment + BabbleAugment + BackgroundNoiseAugment + SporadicNoiseAugment|3.953|22.206|22.414|21.375| +NoisyOverlapAugment|3.954|23.371|23.396|22.627| + +## Using transforms +Transforms are configurable. + +1. Please pay careful attention to the type of transform you are applying. + - `concataugment` and `noisyoverlapaugment` are instances of `AudioDatasetTransform` and should be listed in the config under `dataset_transforms`. + - `musicaugment`, `babbleaugment`, `sporadicnoiseaugment`, and `backgroundnoiseaugment` are instances of `AudioWaveformTransform` and should be listed under `waveform_transforms`. + - Instances of `AudioFeatureTransform` should be listed under `feature_transforms`. +2. Feel free to apply these augmentations in different contexts, e.g., you may use a `_train` or `_eval` flag to specify when the transform will be applied. If the dataset at hand contains `train` in its name, those transforms under the `_train` flag will be applied; else, the remaining transforms will be applied. + +For example, you would add this to your config to apply the musicaugment transform to a training dataset: +```yaml +musicaugment: + samples_path: ${MUSIC_PATH} + snr_min: 10 + snr_max: 15 + rate: 0.25 +waveform_transforms: + _train: + - musicaugment +``` +or add this to apply the concataugment transform: +```yaml +concataugment: + rate: 0.25 + max_tokens: 3000 + attempts: 5 +dataset_transforms: + _train: + - concataugment + ``` +You may also want to add multiple of one type of transform; here, we add multiple `AudioWaveformTransform`s: +```yaml +musicaugment: + samples_path: ${MUSIC_PATH} + snr_min: 5 + snr_max: 20 + rate: 0.25 +backgroundnoiseaugment: + samples_path: ${NOISES_PATH} + snr_min: 10 + snr_max: 20 + rate: 0.1 +sporadicnoiseaugment: + samples_path: ${NOISES_PATH} + snr_min: 5 + snr_max: 15 + rate: 0.1 + noise_rate: 0.25 +waveform_transforms: + _train: + - musicaugment + - backgroundnoiseaugment + - sporadicnoiseaugment +``` + +## Adding your own transforms +Note: We store transform implementations in `fairseq/data/audio/*_transforms` directories. You may refer to these as examples while implementing your own transform. + +### Step 1. Picking the right class for your transform +The integration into SpeechToTextDataset is quite different for each kind of transform, so it is important to understand which one is best suited to your purposes. + +**Feature transforms** +`AudioFeatureTransform` is a base class which allows **some transform to be applied to audio spectrograms** in the data loading step. One thing to note is that the source data is either saved as `np.ndarrays` or as audio files, and is to be returned either as features (spectrogram) or waveform. If and only if the data is to be returned as a spectrogram, then `AudioFeatureTransform`s will be applied. + +**Waveform transforms** +`AudioWaveformTransform` is a base class which allows some **transform to be applied to waveforms** in the data loading step. As mentioned above, there are two source and return types to data loading for this dataset. If and only if the data is saved in audio file format, then `AudioWaveformTransform`s will be applied, whichever return type is used. + +**Dataset transforms** +`AudioDatasetTransform` is a base class for transforms **based on more than one item in a dataset**, ex. concatenation of two random samples in a dataset. Rather than being applied in a consistent way, i.e., to all features or to all waveforms, the integration of a dataset transform is entirely specific. Adding a dataset transform requires actually editing the `fairseq/data/audio/speech_to_text_dataset.py` file. + +### Step 2. Setting up your transform (generic to all types of transforms) +Now that you know which kind of transform you would like to use, we are ready to implement it. This step is generic for all transform types, i.e., `TRANSFORM_TYPE` may be any of `feature`, `waveform`, or `dataset`. We will show how to build utterance concatenation (an `AudioDatasetTransform`) as an example. + +Import the base class and registration function for your transform. +```python +from fairseq.data.audio.dataset_transforms import ( + AudioDatasetTransform, + register_audio_dataset_transform +) +``` + +Define the class and register the transform. The name passed into the registration function is how your transform should be named in the config. +```python +@register_audio_dataset_transform("concataugment") +class ConcatAugment(AudioDatasetTransform): +``` + +We are now ready to add the basic important functions to our new class. In this example, `_DEFAULTS` refers to a dictionary with the default hyperparameter values that we defined. `from_config_dict` is called to instantiate the transform given hyperparameters from the config. +```python + @classmethod + def from_config_dict(cls, config=None): + _config = {} if config is None else config + return ConcatAugment( + _config.get("rate", _DEFAULTS["rate"]), + _config.get("max_tokens", _DEFAULTS["max_tokens"]), + _config.get("attempts", _DEFAULTS["attempts"]), + ) +``` +We edit the instantiation function `__init__` to track hyperparameters and do any setup work. +```python + def __init__( + self, + rate=_DEFAULTS["rate"], + max_tokens=_DEFAULTS["max_tokens"], + attempts=_DEFAULTS["attempts"], + ): + self.rate, self.max_tokens, self.attempts = rate, max_tokens, attempts +``` +Lastly `__repr__` gives how the transform will be reported in an output log. +```python + def __repr__(self): + return ( + self.__class__.__name__ + + "(" + + ", ".join( + [ + f"rate={self.rate}", + f"max_tokens={self.max_tokens}", + f"attempts={self.attempts}", + ] + ) + + ")" + ) +``` + +### Step 3. Adding the transform logic +At this point, we are ready to implement the actual transform logic. The flow from here is different for each of the three transforms, so follow the path that is relevant to you. +### ...for feature transforms +The final step is implementing the `__call__` function, which applies the transform logic and **returns** the spectrogram with transform applied. This supports and should take exactly **two arguments**: +- `self` +- `x` (np.ndarray): the spectrogram for one source sample. (This is a positional argument, so you can use another parameter name like `spectrogram` instead of `x`.) + +For example, this is the `__call__` function for GlobalCMVN (cepstral mean and variance normalization). +```python + def __call__(self, x): + x = np.subtract(x, self.mean) + x = np.divide(x, self.std) + return x + +``` +### ...for waveform transforms +The final step is implementing the `__call__` function, which applies the transform logic. This supports and should take exactly **three arguments**: +- `self` +- `source` (numpy.ndarray or torch.Tensor): source audio 2d waveform (channels x length) +- `sample_rate` (optional, defaults to None): sample rate of `source` + +`__call__` **returns**: +- transformed audio waveform +- sample rate of transformed audio waveform + +For example, this is the `__call__` function for augmentations in the Noise Augmentation Suite. +```python + def __call__(self, source, sample_rate=None): + if np.random.random() > self.rate: + return source + + noise = self._get_noise( + source.shape, always_2d=True, use_sample_rate=sample_rate + ) + return self._mix(source, noise, rand_uniform(self.snr_min, self.snr_max)), sample_rate +``` + +### ...for dataset transforms +Dataset transforms are extremely flexible, and implementation involves directly integrating them into `fairseq/data/audio/speech_to_text_dataset.py` in transform-specific ways. +There are two basic components: (1) check whether or not this transform is part of this dataset instance using `self.dataset_transforms.has_transform(TRANSFORM_CLS)`, and (2) if so, get the transform using `self.dataset_transforms.get_transform(TRANSFORM_CLS)` & apply it. +Due to the case-by-case specificity, it is easier to demonstrate this by examples. + +#### Example: NoisyOverlapAugment +This transform requires access to multiple items within the same batch at once. + +**Logic**: We still use the transform classes to keep away the transform logic. For example, `__call__` of `NoisyOverlapAugment` class takes a list of source tokens for items in a mini-batch, applies noise/utterance as dictated by the transform, and returns the list of transformed source tokens for items in the mini-batch. + +```python + def __call__(self, sources): + for i, source in enumerate(sources): + if np.random.random() > self.rate: + continue + + pri = source.numpy() + + # ... some transform code omitted + + pri[s_source : s_source + l] = np.add( + pri[s_source : s_source + l], np.multiply(scl, sec[s_sec : s_sec + l]) + ) + sources[i] = torch.from_numpy(pri).float() + + return sources +``` + +**Integration**: The `collater` function for `SpeechToTextDataset` is responsible for preparing a mini-batch for training, so we integrate NOAug through adding a few lines to the top of this function: +```python +def collater( + self, samples: List[SpeechToTextDatasetItem], return_order: bool = False +) -> Dict: + if len(samples) == 0: + return {} + indices = torch.tensor([x.index for x in samples], dtype=torch.long) + + sources = [x.source for x in samples] + + # NOAUG INTEGRATION BLOCK + # (1) Check whether or not this transform is part of this dataset instance + has_NOAug = self.dataset_transforms.has_transform(NoisyOverlapAugment) + # (2) If so, get & apply the transform + if has_NOAug and self.cfg.use_audio_input: + NOAug = self.dataset_transforms.get_transform(NoisyOverlapAugment) + sources = NOAug(sources) + + frames = _collate_frames(sources, self.cfg.use_audio_input) + # sort samples by descending number of frames + n_frames = torch.tensor([x.size(0) for x in sources], dtype=torch.long) + n_frames, order = n_frames.sort(descending=True) + indices = indices.index_select(0, order) + frames = frames.index_select(0, order) + + # ... rest of function +``` + +#### Example: ConcatAugment +This transform requires access to another item within the dataset at once. + +**Logic**: We abstract the logic for picking indices to concatenate by adding a `find_indices` function to the `ConcatAugment` class, which takes one index in the dataset and finds a compatible second index to concatenate source and target tokens. +```python + def find_indices(self, index: int, n_frames: List[int], n_samples: int): + # skip conditions: application rate, max_tokens limit exceeded + if np.random.random() > self.rate: + return [index] + if self.max_tokens and n_frames[index] > self.max_tokens: + return [index] + + # pick second sample to concatenate + for _ in range(self.attempts): + index2 = np.random.randint(0, n_samples) + if index2 != index and ( + not self.max_tokens + or n_frames[index] + n_frames[index2] < self.max_tokens + ): + return [index, index2] + + return [index] +``` + +**Integration**: `SpeechToTextDataset` uses a custom `__getitem__(self, index)` function (called in the background when you write `dataset[i]`). We edited this function (as well as `_get_source_audio` and `get_tokenized_tgt_text`) to achieve the desired transform effect where accessing `dataset[i]` will return a `SpeechToTextDatasetItem` where `source=source[i]+source[j]` and `target=target[i]+target[j]`. +```python +def __getitem__(self, index: int) -> SpeechToTextDatasetItem: + + # CONCATAUGMENT INTEGRATION BLOCK + # (1) Check whether or not this transform is part of this dataset instance + has_concat = self.dataset_transforms.has_transform(ConcatAugment) + # (2) If so, get & apply the transform + if has_concat: + concat = self.dataset_transforms.get_transform(ConcatAugment) + indices = concat.find_indices(index, self.n_frames, self.n_samples) + + source = self._get_source_audio(indices if has_concat else index) + source = self.pack_frames(source) + + target = None + if self.tgt_texts is not None: + tokenized = self.get_tokenized_tgt_text(indices if has_concat else index) + target = self.tgt_dict.encode_line( + + # ... rest of function +``` diff --git a/fairseq/examples/speech_to_speech/docs/direct_s2st_discrete_units.md b/fairseq/examples/speech_to_speech/docs/direct_s2st_discrete_units.md new file mode 100644 index 0000000..0c63ffe --- /dev/null +++ b/fairseq/examples/speech_to_speech/docs/direct_s2st_discrete_units.md @@ -0,0 +1,181 @@ +# Direct speech-to-speech translation with discrete units + +We provide the implementation for speech-to-unit translation (S2UT) proposed in "[Direct speech-to-speech translation with discrete units (Lee et al. 2021)](https://arxiv.org/abs/2107.05604)" and also the transformer-based implementation of the speech-to-spectrogram translation (S2SPECT, or transformer-based [Translatotron](https://arxiv.org/abs/1904.06037)) baseline in the paper. + +## Pretrained Models + +### Unit-based HiFi-GAN Vocoder +Unit config | Unit size | Vocoder dataset | Model +|---|---|---|--- +[HuBERT Base, Librispeech](https://github.com/fairinternal/fairseq-py/tree/main/examples/hubert), layer 6 | 100 | [LJSpeech](https://keithito.com/LJ-Speech-Dataset/) | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/hubert_base_100_lj/g_00500000), [config](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/hubert_base_100_lj/config.json) + + +## Data preparation +### Target speech +0. (optional) To prepare S2S data from a speech-to-text translation (ST) dataset, see [fairseq-S^2](https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis) for pre-trained TTS models and instructions on how to train and decode TTS models. +1. Prepare two folders, `$SRC_AUDIO` and `$TGT_AUDIO`, with `${SPLIT}/${SAMPLE_ID}.wav` for source and target speech under each folder, separately. Note that for S2UT experiments, target audio sampling rate should be in 16,000 Hz, and for S2SPECT experiments, target audio sampling rate is recommended to be in 22,050 Hz. +2. To prepare target discrete units for S2UT model training, see [Generative Spoken Language Modeling (speech2unit)](https://github.com/pytorch/fairseq/tree/main/examples/textless_nlp/gslm/speech2unit) for pre-trained k-means models, checkpoints, and instructions on how to decode units from speech. Set the output target unit files (`--out_quantized_file_path`) as `${TGT_AUDIO}/${SPLIT}.txt`. In [Lee et al. 2021](https://arxiv.org/abs/2107.05604), we use 100 units from the sixth layer (`--layer 6`) of the HuBERT Base model. + +### Formatting data +**Speech-to-speech data** + +_S2UT_ + * Set `--reduce-unit` for training S2UT _reduced_ model + * Pre-trained vocoder and config (`$VOCODER_CKPT`, `$VOCODER_CFG`) can be downloaded from the **Pretrained Models** section. They are not required if `--eval-inference` is not going to be set during model training. +``` +# $SPLIT1, $SPLIT2, etc. are split names such as train, dev, test, etc. + +python examples/speech_to_speech/preprocessing/prep_s2ut_data.py \ + --source-dir $SRC_AUDIO --target-dir $TGT_AUDIO --data-split $SPLIT1 $SPLIT2 \ + --output-root $DATA_ROOT --reduce-unit \ + --vocoder-checkpoint $VOCODER_CKPT --vocoder-cfg $VOCODER_CFG +``` + +_S2SPECT_ +``` +# $SPLIT1, $SPLIT2, etc. are split names such as train, dev, test, etc. + +python examples/speech_to_speech/preprocessing/prep_s2spect_data.py \ + --source-dir $SRC_AUDIO --target-dir $TGT_AUDIO --data-split $SPLIT1 $SPLIT2 \ + --output-root $DATA_ROOT +``` + +**Multitask data** + * For each multitask `$TASK_NAME`, prepare `${DATA_ROOT}/${TASK_NAME}/${SPLIT}.tsv` files for each split following the format below: (Two tab separated columns. The sample_ids should match with the sample_ids for the speech-to-speech data in `${DATA_ROOT}/${SPLIT}.tsv`.) +``` +id tgt_text +sample_id_0 token1 token2 token3 ... +sample_id_1 token1 token2 token3 ... +... +``` + * For each multitask `$TASK_NAME`, prepare `${DATA_ROOT}/${TASK_NAME}/dict.txt`, a dictionary in fairseq format with all tokens for the targets for `$TASK_NAME`. + * Create `config_multitask.yaml`. Below is an example of the config used for S2UT _reduced_ with Fisher experiments including two encoder multitasks (`source_letter`, `target_letter`) and one decoder CTC task (`decoder_target_ctc`). +``` +source_letter: # $TASK_NAME + decoder_type: transformer + dict: ${DATA_ROOT}/source_letter/dict.txt + data: ${DATA_ROOT}/source_letter + encoder_layer: 6 + loss_weight: 8.0 +target_letter: + decoder_type: transformer + dict: ${DATA_ROOT}/target_letter/dict.txt + data: ${DATA_ROOT}/target_letter + encoder_layer: 8 + loss_weight: 8.0 +decoder_target_ctc: + decoder_type: ctc + dict: ${DATA_ROOT}/decoder_target_ctc/dict.txt + data: ${DATA_ROOT}/decoder_target_ctc + decoder_layer: 3 + loss_weight: 1.6 +``` + + +## Training + +**Speech-to-unit translation (S2UT)** + +Here's an example for training Fisher S2UT models with 100 discrete units as target: +``` +fairseq-train $DATA_ROOT \ + --config-yaml config.yaml --multitask-config-yaml config_multitask.yaml \ + --task speech_to_speech --target-is-code --target-code-size 100 --vocoder code_hifigan \ + --criterion speech_to_unit --label-smoothing 0.2 \ + --arch s2ut_transformer_fisher --share-decoder-input-output-embed \ + --dropout 0.1 --attention-dropout 0.1 --relu-dropout 0.1 \ + --train-subset train --valid-subset dev \ + --save-dir ${MODEL_DIR} \ + --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-init-lr 1e-7 --warmup-updates 10000 \ + --optimizer adam --adam-betas "(0.9,0.98)" --clip-norm 10.0 \ + --max-update 400000 --max-tokens 20000 --max-target-positions 3000 --update-freq 4 \ + --seed 1 --fp16 --num-workers 8 +``` +* Adjust `--update-freq` accordingly for different #GPUs. In the above we set `--update-freq 4` to simulate training with 4 GPUs. +* Set `--n-frames-per-step 5` to train an S2UT _stacked_ system with reduction ratio r=5. (Use `$DATA_ROOT` prepared without `--reduce-unit`.) +* (optional) one can turn on tracking MCD loss during training for checkpoint selection by setting `--eval-inference --eval-args '{"beam": 1, "max_len_a": 1}' --best-checkpoint-metric mcd_loss`. It is recommended to sample a smaller subset as the validation set as MCD loss computation is time-consuming. + +**Speech-to-spectrogram translation (S2SPECT)** + +Here's an example for training Fisher S2SPECT models with reduction ratio r=5: +``` +fairseq-train $DATA_ROOT \ + --config-yaml config.yaml --multitask-config-yaml config_multitask.yaml \ + --task speech_to_speech --n-frames-per-step 5 \ + --criterion speech_to_spectrogram \ + --arch s2spect_transformer_fisher --decoder-normalize-before \ + --dropout 0.1 --attention-dropout 0.1 --relu-dropout 0.1 \ + --train-subset train --valid-subset dev \ + --save-dir ${MODEL_DIR} \ + --eval-inference --best-checkpoint-metric mcd_loss \ + --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-init-lr 1e-7 --warmup-updates 10000 \ + --optimizer adam --adam-betas "(0.9,0.98)" --clip-norm 10.0 --weight-decay 1e-6 \ + --max-update 400000 --max-tokens 80000 --max-tokens-valid 30000 --required-batch-size-multiple 1 \ + --max-target-positions 3000 --update-freq 16 \ + --seed 1 --fp16 --num-workers 8 +``` +* Adjust `--update-freq` accordingly for different #GPUs. In the above we set `--update-freq 16` to simulate training with 16 GPUs. +* We recommend turning on MCD loss during training for the best checkpoint selection. + +**Unit-based HiFi-GAN vocoder** + +The vocoder is trained with the [speech-resynthesis repo](https://github.com/facebookresearch/speech-resynthesis). See [here](https://github.com/facebookresearch/speech-resynthesis/tree/main/examples/speech_to_speech_translation) for instructions on how to train the unit-based HiFi-GAN vocoder with duration prediction. The same vocoder can support waveform generation for both _reduced_ unit sequences (with `--dur-prediction` set during inference) and original unit sequences. + +## Inference + +**Speech-to-unit translation (S2UT)** + +1. Follow the same inference process as in [fairseq-S2T](https://github.com/pytorch/fairseq/tree/main/examples/speech_to_text) to generate unit sequences (`${RESULTS_PATH}/generate-${GEN_SUBSET}.txt`). +``` +fairseq-generate $DATA_ROOT \ + --config-yaml config.yaml --multitask-config-yaml config_multitask.yaml \ + --task speech_to_speech --target-is-code --target-code-size 100 --vocoder code_hifigan \ + --path $MODEL_DIR/checkpoint_best.pt --gen-subset $GEN_SUBSET \ + --max-tokens 50000 \ + --beam 10 --max-len-a 1 \ + --results-path ${RESULTS_PATH} +``` + * Set `--beam 1 --n-frames-per-step $r` for decoding with S2UT _stacked_ models. + +2. Convert unit sequences to waveform. +``` +grep "^D\-" ${RESULTS_PATH}/generate-${GEN_SUBSET}.txt | \ + sed 's/^D-//ig' | sort -nk1 | cut -f3 \ + > ${RESULTS_PATH}/generate-${GEN_SUBSET}.unit + +python examples/speech_to_speech/generate_waveform_from_code.py \ + --in-code-file ${RESULTS_PATH}/generate-${GEN_SUBSET}.unit \ + --vocoder $VOCODER_CKPT --vocoder-cfg $VOCODER_CFG \ + --results-path ${RESULTS_PATH} --dur-prediction +``` + * Set `--dur-prediction` for generating audio for S2UT _reduced_ models. + + +**Speech-to-spectrogram translation (S2SPECT)** + +Follow the same inference process as in [fairseq-S^2](https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis) to generate waveform. + +``` +# assume using a default Griffin-Lim vocoder + +python examples/speech_synthesis/generate_waveform.py $DATA_ROOT \ + --config-yaml config.yaml --multitask-config-yaml config_multitask.yaml \ + --task speech_to_speech --n-frames-per-step 5 \ + --path $MODEL_DIR/checkpoint_best.pt --gen-subset $GEN_SUBSET \ + --max-tokens 50000 \ + --results-path ${RESULTS_PATH} --dump-waveforms --output-sample-rate 16000 +``` + +In addition to using the default Griffin-Lim vocoder, one can also finetune a HiFi-GAN vocoder for the S2SPECT model by following the instructions in the [HiFi-GAN repo](https://github.com/jik876/hifi-gan). + +**Multitask decoding** + +Coming soon. + +## Evaluation + +To evaluate speech translation output, we first apply ASR on the speech output and then compute BLEU score betweent the ASR decoded text and the references using sacreBLEU. + +**En** +* ASR: We use the "[Wav2Vec 2.0 Large (LV-60) + Self Training / 960 hours / Libri-Light + Librispeech](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_960h_pl.pt)" En ASR model open-sourced by the [wav2vec](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec) project. See [instructions](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#evaluating-a-ctc-model) on how to run inference with a wav2vec-based ASR model. The model is also available on [Hugging Face](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self). +* Text normalization: We use the text cleaner at [https://github.com/keithito/tacotron](https://github.com/keithito/tacotron) for pre-processing reference English text for ASR BLEU evaluation. diff --git a/fairseq/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md b/fairseq/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md new file mode 100644 index 0000000..fbfa5dd --- /dev/null +++ b/fairseq/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md @@ -0,0 +1,125 @@ +# Speech to speech translation (S2ST) + +We provide the implementation for speech-to-unit translation (S2UT) proposed in [Enhanced Direct Speech-to-Speech Translation Using Self-supervised Pre-training and Data Augmentation (Popuri et al. 2022)](https://arxiv.org/abs/2204.02967) and the various pretrained models used. + +## Pretrained Models + +### Unit extraction + +We used the multilingual HuBERT model open sourced in [Textless S2ST with Real Data](textless_s2st_real_data.md) + +### Wav2vec 2.0 + +Language | Block type | Model size | Dataset | Model | +--- | --- | --- | --- | --- | +Es | Transformer | BASE | Voxpopuli | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/w2v2/es/transformer_B.pt) | +Es | Transformer | LARGE | Voxpopuli | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/w2v2/es/transformer_L.pt) | +Es | Conformer | LARGE | Voxpopuli | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/w2v2/es/conformer_L.pt) | +En | Transformer | BASE | Librilight| [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/w2v2/en/transformer_B.pt) | +En | Conformer | LARGE | Librilight | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/w2v2/en/conformer_L.pt) | + +### Unit mBART + +Unit size | Dataset | Unit config | Model | +--- | --- | --- | --- | +1000 | [Voxpopuli](https://aclanthology.org/2021.acl-long.80) En, Es unlabelled speech | [mbart_large](https://github.com/pytorch/fairseq/blob/f591cc94caa85098ccf125a4782f91125b6a086d/fairseq/models/bart/model.py#L368) |[ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/unit_mBART/checkpoint.pt) | + +## Data preparation + +1. To prepare data for S2UT finetuning, follow the steps from [Direct S2ST with Discrete Units](./direct_s2st_discrete_units.md) and format the data in the _S2UT_ format. Note that we use 1000 units from the eleventh layer (`--layer 11`) of the multilingual hubert model linked above instead +2. Run + +``` +var="id\taudio\tn_frames\ttgt_text\ttgt_n_frames" +sed -i "1s/.*/$var/" ${SPLIT}.tsv +``` + +## Training + +**Speech-to-unit translation (S2UT)** + +Here's an example for finetuning S2UT models with 1000 discrete units as target. You can download the sample [config](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/config.yaml) file and [vocabulary](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/dict.txt) for Es-En from here: + +``` +fairseq-train $DATA_ROOT \ + --config-yaml config.yaml \ + --task speech_to_text --arch xm_transformer\ + --criterion l --label-smoothing 0.2 \ + --share-decoder-input-output-embed --adaptor-n-layers 1 --normalize\ + --dropout 0.1 --attention-dropout 0.1 --relu-dropout 0.1 \ + --train-subset train --valid-subset dev \ + --load-pretrained-decoder-from ${unit_mBART} --w2v-path ${wav2vec2.0} \ + --mask-prob 0.3 --mask-channel-length 32 --mask-channel-prob 0.25\ + --save-dir ${MODEL_DIR} --checkpoint-activations --encoder-proj \ + --lr 0.0005 --dropout 0.1 --attention-dropout 0.1 --lr-scheduler inverse_sqrt\ + --warmup-init-lr 1e-7 --warmup-updates 10000 \ + --optimizer adam --adam-betas "(0.9,0.98)" --clip-norm 10.0 \ + --max-update 20000 --max-tokens 4000 --max-tokens-valid 4000 --max-source-positions 4000 \ + --max-target-positions 4000 --update-freq 120 \ + --seed 1 --fp16 --num-workers 1 +``` + +* Adjust `--update-freq` accordingly for different #GPUs. In the above we set `--update-freq 15` to simulate training with 120 GPUs. +* In the above setting we finetune the model end to end, corresponding to the full setup in the paper. +* To apply LNA-E partial finetuning, add `--finetune-w2v-params layer_norm,self_attn` +* For LNA-D partial finetuning add `--finetune-decoder-params encoder_attn,layer_norm,self_attn`. To optionally freeze the encoder by k updates, use `--freeze-finetune-updates ${K}` +* For LNA-E,D partial finetuning add both the above options. + +**Unit-based HiFi-GAN vocoder** + +We apply the open-sourced unit-based HiFi-GAN vocoders to convert the predicted unit sequences to waveform. They are open sourced in [Textless S2ST with Real Data](textless_s2st_real_data.md) + +## Inference + +**Speech-to-unit translation (S2UT)** + +1. Follow the same inference process as in [fairseq-S2T](https://github.com/pytorch/fairseq/tree/main/examples/speech_to_text) to generate unit sequences (`${RESULTS_PATH}/generate-${GEN_SUBSET}.txt`). + +``` +fairseq-generate $DATA_ROOT \ + --config-yaml config.yaml \ + --task speech_to_text \ + --path $MODEL_DIR/checkpoint_best.pt --gen-subset $GEN_SUBSET \ + --max-tokens 10000 --max-source-positions 10000 --max-target-positions 10000\ + --beam 10 --max-len-a 1 --max-len-b 200 \ + --results-path ${RESULTS_PATH} +``` + +2. Convert unit sequences to waveform. + +``` +grep "^D\-" ${RESULTS_PATH}/generate-${GEN_SUBSET}.txt | \ + sed 's/^D-//ig' | sort -nk1 | cut -f3 \ + > ${RESULTS_PATH}/generate-${GEN_SUBSET}.unit + +python examples/speech_to_speech/generate_waveform_from_code.py \ + --in-code-file ${RESULTS_PATH}/generate-${GEN_SUBSET}.unit \ + --vocoder $VOCODER_CKPT --vocoder-cfg $VOCODER_CFG \ + --results-path ${RESULTS_PATH} --dur-prediction +``` + +## Evaluation + +To evaluate speech translation output, we first apply ASR on the speech output and then compute BLEU score betweent the ASR decoded text and the references using sacreBLEU. + +* Text normalization: We use the text cleaner at [https://github.com/keithito/tacotron](https://github.com/keithito/tacotron) for pre-processing reference English text for ASR BLEU evaluation. The text cleaner used for Spanish text normalization will be updated here shortly. +* En ASR: We use the "[Wav2Vec 2.0 Large (LV-60) + Self Training / 960 hours / Libri-Light + Librispeech](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_960h_pl.pt)" En ASR model open-sourced by the [wav2vec](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec) project. The model is also available on [Hugging Face](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self). +* Es ASR: We use the [Wav2Vec2-Large-XLSR-53-Spanish](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) finetuned on spanish Common Voice Es ASR model open-sourced by Jonatasgrosman(<https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-spanish>) on [Hugging Face](https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-spanish). +* See [instructions](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#evaluating-a-ctc-model) on how to run inference with a wav2vec-based ASR model. + + +## Finetuned Model Checkpoints + +ID | En - Es | Es - En | +| --- | --- | --- | +**S2UT systems without pre-training** +S2UT with multitask | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//S2UT_w_multitask.pt) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//S2UT_w_multitask.pt) | +**S2UT systems with model pre-training** +w2v2-L | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//w2v2_only.pt ) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//w2v2_only.pt) | +w2v2-L + mBART (LNA-E) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//w2v2_mbart_LNE.pt) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//w2v2_mbart_LNE.pt) | +w2v2-L + mBART (LNA-D) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//w2v2_mbart_LND.pt) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//w2v2_mbart_LND.pt) | +w2v2-L + mBART (LNA-E,D) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//w2v2_mbart_LNED.pt) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//w2v2_mbart_LNED.pt) | +**S2UT systems with model pre-training and data augmentation** +w2v2-L + mBART (LNA-D) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/en_es//w2v2_mbart_LND_w_ASR.pt) | [checkpoint](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/s2st_finetuning/es_en//w2v2_mbart_LND_w_ASR.pt) | + +Note: Some of the tasks use speech_to_text_sharded task which is yet to be open sourced. So make sure to override the task to speech_to_text to use those models. diff --git a/fairseq/examples/speech_to_speech/docs/textless_s2st_real_data.md b/fairseq/examples/speech_to_speech/docs/textless_s2st_real_data.md new file mode 100644 index 0000000..ca6044b --- /dev/null +++ b/fairseq/examples/speech_to_speech/docs/textless_s2st_real_data.md @@ -0,0 +1,89 @@ +# Textless Speech-to-Speech Translation (S2ST) on Real Data + +We provide instructions and pre-trained models for the work "[Textless Speech-to-Speech Translation on Real Data (Lee et al. 2021)](https://arxiv.org/abs/2112.08352)". + +## Pre-trained Models + +### HuBERT +Model | Pretraining Data | Model | Quantizer +|---|---|---|--- +mHuBERT Base | [VoxPopuli](https://github.com/facebookresearch/voxpopuli) En, Es, Fr speech from the 100k subset | [download](https://dl.fbaipublicfiles.com/hubert/mhubert_base_vp_en_es_fr_it3.pt) | [L11 km1000](https://dl.fbaipublicfiles.com/hubert/mhubert_base_vp_en_es_fr_it3_L11_km1000.bin) + + +### Unit-based HiFi-GAN vocoder +Unit config | Unit size | Vocoder language | Dataset | Model +|---|---|---|---|--- +mHuBERT, layer 11 | 1000 | En | [LJSpeech](https://keithito.com/LJ-Speech-Dataset/) | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000), [config](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json) +mHuBERT, layer 11 | 1000 | Es | [CSS10](https://github.com/Kyubyong/css10) | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10/g_00500000), [config](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10/config.json) +mHuBERT, layer 11 | 1000 | Fr | [CSS10](https://github.com/Kyubyong/css10) | [ckpt](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_fr_css10/g_00500000), [config](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_fr_css10/config.json) + + +### Speech normalizer +Language | Training data | Target unit config | Model +|---|---|---|--- +En | 10 mins | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/en/en_10min.tar.gz) +En | 1 hr | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/en/en_1h.tar.gz) +En | 10 hrs | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/en/en_10h.tar.gz) +Es | 10 mins | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/es/es_10min.tar.gz) +Es | 1 hr | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/es/es_1h.tar.gz) +Es | 10 hrs | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/es/es_10h.tar.gz) +Fr | 10 mins | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/fr/fr_10min.tar.gz) +Fr | 1 hr | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/fr/fr_1h.tar.gz) +Fr | 10 hrs | mHuBERT, layer 11, km1000 | [download](https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/speech_normalizer/fr/fr_10h.tar.gz) + +* Refer to the paper for the details of the training data. + +## Inference with Pre-trained Models + +### Speech normalizer +1. Download the pre-trained models, including the dictionary, to `DATA_DIR`. +2. Format the audio data. +```bash +# AUDIO_EXT: audio extension, e.g. wav, flac, etc. +# Assume all audio files are at ${AUDIO_DIR}/*.${AUDIO_EXT} + +python examples/speech_to_speech/preprocessing/prep_sn_data.py \ + --audio-dir ${AUDIO_DIR} --ext ${AUIDO_EXT} \ + --data-name ${GEN_SUBSET} --output-dir ${DATA_DIR} \ + --for-inference +``` + +3. Run the speech normalizer and post-process the output. +```bash +mkdir -p ${RESULTS_PATH} + +python examples/speech_recognition/new/infer.py \ + --config-dir examples/hubert/config/decode/ \ + --config-name infer_viterbi \ + task.data=${DATA_DIR} \ + task.normalize=false \ + common_eval.results_path=${RESULTS_PATH}/log \ + common_eval.path=${DATA_DIR}/checkpoint_best.pt \ + dataset.gen_subset=${GEN_SUBSET} \ + '+task.labels=["unit"]' \ + +decoding.results_path=${RESULTS_PATH} \ + common_eval.post_process=none \ + +dataset.batch_size=1 \ + common_eval.quiet=True + +# Post-process and generate output at ${RESULTS_PATH}/${GEN_SUBSET}.txt +python examples/speech_to_speech/preprocessing/prep_sn_output_data.py \ + --in-unit ${RESULTS_PATH}/hypo.units \ + --in-audio ${DATA_DIR}/${GEN_SUBSET}.tsv \ + --output-root ${RESULTS_PATH} +``` + + +### Unit-to-waveform conversion with unit vocoder +The pre-trained vocoders can support generating audio for both full unit sequences and reduced unit sequences (i.e. duplicating consecutive units removed). Set `--dur-prediction` for generating audio with reduced unit sequences. +```bash +# IN_CODE_FILE contains one unit sequence per line. Units are separated by space. + +python examples/speech_to_speech/generate_waveform_from_code.py \ + --in-code-file ${IN_CODE_FILE} \ + --vocoder ${VOCODER_CKPT} --vocoder-cfg ${VOCODER_CFG} \ + --results-path ${RESULTS_PATH} --dur-prediction +``` + +## Training new models +To be updated. diff --git a/fairseq/examples/speech_to_speech/generate_waveform_from_code.py b/fairseq/examples/speech_to_speech/generate_waveform_from_code.py new file mode 100644 index 0000000..82aa7ac --- /dev/null +++ b/fairseq/examples/speech_to_speech/generate_waveform_from_code.py @@ -0,0 +1,116 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import json +import logging +from pathlib import Path +import random +import soundfile as sf +import torch + +from tqdm import tqdm + +from fairseq import utils +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def dump_result(args, sample_id, pred_wav, suffix=""): + sf.write( + f"{args.results_path}/{sample_id}{suffix}_pred.wav", + pred_wav.detach().cpu().numpy(), + 16000, + ) + + +def load_code(in_file): + with open(in_file) as f: + out = [list(map(int, line.strip().split())) for line in f] + return out + + +def main(args): + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + with open(args.vocoder_cfg) as f: + vocoder_cfg = json.load(f) + vocoder = CodeHiFiGANVocoder(args.vocoder, vocoder_cfg) + if use_cuda: + vocoder = vocoder.cuda() + + multispkr = vocoder.model.multispkr + if multispkr: + logger.info("multi-speaker vocoder") + num_speakers = vocoder_cfg.get( + "num_speakers", 200 + ) # following the default in codehifigan to set to 200 + assert ( + args.speaker_id < num_speakers + ), f"invalid --speaker-id ({args.speaker_id}) with total #speakers = {num_speakers}" + + data = load_code(args.in_code_file) + Path(args.results_path).mkdir(exist_ok=True, parents=True) + for i, d in tqdm(enumerate(data), total=len(data)): + x = { + "code": torch.LongTensor(d).view(1, -1), + } + suffix = "" + if multispkr: + spk = ( + random.randint(0, num_speakers - 1) + if args.speaker_id == -1 + else args.speaker_id + ) + suffix = f"_spk{spk}" + x["spkr"] = torch.LongTensor([spk]).view(1, 1) + + x = utils.move_to_cuda(x) if use_cuda else x + wav = vocoder(x, args.dur_prediction) + dump_result(args, i, wav, suffix=suffix) + + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-code-file", type=str, required=True, help="one unit sequence per line" + ) + parser.add_argument( + "--vocoder", type=str, required=True, help="path to the CodeHiFiGAN vocoder" + ) + parser.add_argument( + "--vocoder-cfg", + type=str, + required=True, + help="path to the CodeHiFiGAN vocoder config", + ) + parser.add_argument("--results-path", type=str, required=True) + parser.add_argument( + "--dur-prediction", + action="store_true", + help="enable duration prediction (for reduced/unique code sequences)", + ) + parser.add_argument( + "--speaker-id", + type=int, + default=-1, + help="Speaker id (for vocoder that supports multispeaker). Set to -1 to randomly sample speakers.", + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + + args = parser.parse_args() + + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/speech_to_speech/preprocessing/__init__.py b/fairseq/examples/speech_to_speech/preprocessing/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/speech_to_speech/preprocessing/data_utils.py b/fairseq/examples/speech_to_speech/preprocessing/data_utils.py new file mode 100644 index 0000000..a83a67f --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/data_utils.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path +from typing import List, Optional + +from examples.speech_to_text.data_utils import S2TDataConfigWriter + + +def gen_config_yaml( + manifest_root: Path, + yaml_filename: str = "config.yaml", + specaugment_policy: Optional[str] = "lb", + feature_transform: Optional[List[str]] = None, + input_channels: Optional[int] = 1, + input_feat_per_channel: Optional[int] = 80, + audio_root: str = "", + vocoder_type: Optional[str] = None, + vocoder_checkpoint: Optional[str] = None, + vocoder_cfg: Optional[str] = None, + extra=None, +): + manifest_root = manifest_root.absolute() + writer = S2TDataConfigWriter(manifest_root / yaml_filename) + + if input_channels is not None: + writer.set_input_channels(input_channels) + if input_feat_per_channel is not None: + writer.set_input_feat_per_channel(input_feat_per_channel) + specaugment_setters = { + "lb": writer.set_specaugment_lb_policy, + "ld": writer.set_specaugment_ld_policy, + "sm": writer.set_specaugment_sm_policy, + "ss": writer.set_specaugment_ss_policy, + } + specaugment_setter = specaugment_setters.get(specaugment_policy, None) + if specaugment_setter is not None: + specaugment_setter() + + if feature_transform is None: + feature_transform = [] + else: + writer.set_feature_transforms("*", feature_transform) + + if specaugment_policy is not None: + writer.set_feature_transforms("_train", feature_transform + ["specaugment"]) + + if len(audio_root) > 0: + writer.set_audio_root(audio_root) + + if ( + vocoder_type is not None + and vocoder_checkpoint is not None + and vocoder_cfg is not None + ): + writer.set_extra( + { + "vocoder": { + "type": vocoder_type, + "config": vocoder_cfg, + "checkpoint": vocoder_checkpoint, + } + } + ) + + if extra is not None: + writer.set_extra(extra) + writer.flush() + + +def load_units(in_file): + out = {} + with open(in_file) as f: + for line in f: + sample_id, units = line.strip().split("|", 1) + out[sample_id] = units.split() + + return out + + +def process_units(units, reduce=False): + if not reduce: + return units + + out = [u for i, u in enumerate(units) if i == 0 or u != units[i - 1]] + return out diff --git a/fairseq/examples/speech_to_speech/preprocessing/prep_s2spect_data.py b/fairseq/examples/speech_to_speech/preprocessing/prep_s2spect_data.py new file mode 100644 index 0000000..2748b37 --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/prep_s2spect_data.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +from pathlib import Path +import shutil +import torchaudio + +import soundfile as sf +from tqdm import tqdm +import pandas as pd + +from examples.speech_synthesis.data_utils import extract_logmel_spectrogram +from examples.speech_to_speech.preprocessing.data_utils import gen_config_yaml +from examples.speech_to_text.data_utils import create_zip, get_zip_manifest, save_df_to_tsv +from fairseq.data.audio.audio_utils import convert_waveform + + +logger = logging.getLogger(__name__) + +MANIFEST_COLUMNS = ["id", "src_audio", "src_n_frames", "tgt_audio", "tgt_n_frames"] + + +def prepare_target_data(args, tgt_audios): + feature_name = "logmelspec80" + zip_path = args.output_root / f"{feature_name}.zip" + if zip_path.exists(): + print(f"{zip_path} exists.") + return zip_path + + feature_root = args.output_root / feature_name + feature_root.mkdir(exist_ok=True) + + print("Extracting Mel spectrogram features...") + for tgt_audio in tqdm(tgt_audios): + sample_id = tgt_audio.stem + waveform, sample_rate = torchaudio.load(tgt_audio.as_posix()) + waveform, sample_rate = convert_waveform( + waveform, sample_rate, normalize_volume=args.normalize_volume, + to_sample_rate=args.sample_rate + ) + extract_logmel_spectrogram( + waveform, sample_rate, feature_root / f"{sample_id}.npy", + win_length=args.win_length, hop_length=args.hop_length, + n_fft=args.n_fft, n_mels=args.n_mels, f_min=args.f_min, + f_max=args.f_max + ) + print("ZIPing features...") + create_zip(feature_root, zip_path) + shutil.rmtree(feature_root) + + return zip_path + + +def process(args): + os.makedirs(args.output_root, exist_ok=True) + + manifest = {} + tgt_audios = [] + for split in args.data_split: + print(f"Processing {split}...") + + manifest[split] = {c: [] for c in MANIFEST_COLUMNS} + missing_tgt_audios = [] + src_audios = list(args.source_dir.glob(f"{split}/*.wav")) + for src_audio in tqdm(src_audios): + sample_id = src_audio.stem + + tgt_audio = args.target_dir / split / f"{sample_id}.wav" + if not tgt_audio.is_file(): + missing_tgt_audios.append(sample_id) + continue + + tgt_audios.append(tgt_audio) + + src_n_frames = sf.info(src_audio.as_posix()).frames + manifest[split]["id"].append(sample_id) + manifest[split]["src_audio"].append(src_audio.as_posix()) + manifest[split]["src_n_frames"].append( + src_n_frames // 160 + ) # estimation of 10-ms frame for 16kHz audio + + print(f"Processed {len(manifest[split]['id'])} samples") + if len(missing_tgt_audios) > 0: + print( + f"{len(missing_tgt_audios)} with missing target data (first 3 examples: {', '.join(missing_tgt_audios[:3])})" + ) + + # Extract features and pack features into ZIP + zip_path = prepare_target_data(args, tgt_audios) + + print("Fetching ZIP manifest...") + tgt_audio_paths, tgt_audio_lengths = get_zip_manifest(zip_path) + + print("Generating manifest...") + for split in args.data_split: + print(f"Processing {split}...") + + for sample_id in tqdm(manifest[split]["id"]): + manifest[split]["tgt_audio"].append(tgt_audio_paths[sample_id]) + manifest[split]["tgt_n_frames"].append(tgt_audio_lengths[sample_id]) + + out_manifest = args.output_root / f"{split}.tsv" + print(f"Writing manifest to {out_manifest}...") + save_df_to_tsv(pd.DataFrame.from_dict(manifest[split]), out_manifest) + + # Generate config YAML + win_len_t = args.win_length / args.sample_rate + hop_len_t = args.hop_length / args.sample_rate + extra = { + "features": { + "type": "spectrogram+melscale+log", + "sample_rate": args.sample_rate, + "eps": 1e-5, "n_mels": args.n_mels, "n_fft": args.n_fft, + "window_fn": "hann", "win_length": args.win_length, + "hop_length": args.hop_length, + "win_len_t": win_len_t, "hop_len_t": hop_len_t, + "f_min": args.f_min, "f_max": args.f_max, + "n_stft": args.n_fft // 2 + 1 + } + } + gen_config_yaml( + args.output_root, + audio_root=args.output_root.as_posix(), + specaugment_policy="lb", + feature_transform=["utterance_cmvn", "delta_deltas"], + extra=extra, + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--source-dir", required=True, type=Path, help="source audio directory" + ) + parser.add_argument( + "--target-dir", required=True, type=Path, help="target audio directory" + ) + parser.add_argument( + "--data-split", + default=["train", "valid", "test"], + nargs="+", + help="data split names", + ) + parser.add_argument( + "--output-root", required=True, type=Path, help="output directory" + ) + # target feature related + parser.add_argument("--win-length", type=int, default=1024) + parser.add_argument("--hop-length", type=int, default=256) + parser.add_argument("--n-fft", type=int, default=1024) + parser.add_argument("--n-mels", type=int, default=80) + parser.add_argument("--f-min", type=int, default=20) + parser.add_argument("--f-max", type=int, default=8000) + parser.add_argument("--sample-rate", type=int, default=22050) + parser.add_argument("--normalize-volume", "-n", action="store_true") + + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_speech/preprocessing/prep_s2ut_data.py b/fairseq/examples/speech_to_speech/preprocessing/prep_s2ut_data.py new file mode 100644 index 0000000..c97c0fe --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/prep_s2ut_data.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path + +import soundfile as sf +from tqdm import tqdm +import pandas as pd + +from examples.speech_to_speech.preprocessing.data_utils import ( + gen_config_yaml, + load_units, + process_units, +) +from examples.speech_to_text.data_utils import save_df_to_tsv + +logger = logging.getLogger(__name__) + +MANIFEST_COLUMNS = ["id", "src_audio", "src_n_frames", "tgt_audio", "tgt_n_frames"] + + +def process(args): + args.output_root.mkdir(exist_ok=True) + + print("Generating manifest...") + for split in args.data_split: + print(f"Processing {split}") + + # load target units + target_unit_data = load_units(args.target_dir / f"{split}.txt") + + manifest = {c: [] for c in MANIFEST_COLUMNS} + missing_tgt_audios = [] + src_audios = list(args.source_dir.glob(f"{split}/*.wav")) + for src_audio in tqdm(src_audios): + sample_id = src_audio.stem + + if sample_id not in target_unit_data: + missing_tgt_audios.append(sample_id) + continue + + src_n_frames = sf.info(src_audio.as_posix()).frames + manifest["id"].append(sample_id) + manifest["src_audio"].append(src_audio.as_posix()) + manifest["src_n_frames"].append( + src_n_frames // 160 + ) # estimation of 10-ms frame for 16kHz audio + + target_units = process_units(target_unit_data[sample_id], args.reduce_unit) + manifest["tgt_audio"].append(" ".join(target_units)) + manifest["tgt_n_frames"].append(len(target_units)) + + print(f"Processed {len(manifest['id'])} samples") + if len(missing_tgt_audios) > 0: + print( + f"{len(missing_tgt_audios)} with missing target data (first 3 examples: {', '.join(missing_tgt_audios[:3])})" + ) + + out_manifest = args.output_root / f"{split}.tsv" + print(f"Writing manifest to {out_manifest}...") + save_df_to_tsv(pd.DataFrame.from_dict(manifest), out_manifest) + + # Generate config YAML + gen_config_yaml( + args.output_root, + specaugment_policy="lb", + feature_transform=["utterance_cmvn"], + vocoder_type="code_hifigan", + vocoder_checkpoint=args.vocoder_checkpoint, + vocoder_cfg=args.vocoder_cfg, + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--source-dir", required=True, type=Path, help="source audio directory" + ) + parser.add_argument( + "--target-dir", required=True, type=Path, help="target audio directory" + ) + parser.add_argument( + "--data-split", + default=["train", "valid", "test"], + nargs="+", + help="data split names", + ) + parser.add_argument( + "--output-root", required=True, type=Path, help="output directory" + ) + parser.add_argument( + "--reduce-unit", + action="store_true", + help="reduce a target unit sequence to a unique unit sequence, i.e. '1 1 1 2 2' -> '1 2'", + ) + parser.add_argument( + "--vocoder-checkpoint", default=None, type=str, help="vocoder checkpoint" + ) + parser.add_argument( + "--vocoder-cfg", default=None, type=str, help="vocoder config file" + ) + + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_speech/preprocessing/prep_sn_data.py b/fairseq/examples/speech_to_speech/preprocessing/prep_sn_data.py new file mode 100644 index 0000000..ea94175 --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/prep_sn_data.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# +# Adapted from examples/wav2vec/wav2vec_manifest.py +""" +Data preparation for the speech normalizer +""" + +import argparse +import glob +import os + +import soundfile + +from examples.speech_to_speech.preprocessing.data_utils import load_units, process_units + + +def process(args): + assert ( + args.for_inference or args.target_unit is not None + ), "missing --target-unit or --for-inference" + + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + dir_path = os.path.realpath(args.audio_dir) + search_path = os.path.join(dir_path, "**/*." + args.ext) + + if args.target_unit: + unit_data = load_units(args.target_unit) + + with open(os.path.join(args.output_dir, f"{args.data_name}.tsv"), "w") as o_t, open( + os.path.join(args.output_dir, f"{args.data_name}.unit"), "w" + ) as o_u: + print(dir_path, file=o_t) + for fname in glob.iglob(search_path, recursive=True): + file_path = os.path.realpath(fname) + frames = soundfile.info(fname).frames + print( + "{}\t{}".format(os.path.relpath(file_path, dir_path), frames), file=o_t + ) + + if args.for_inference: + print("0", file=o_u) + else: + sample_id = os.path.basename(file_path)[: -len(args.ext) - 1] + assert ( + sample_id in unit_data + ), f'{fname} does not have unit data in {args.target_unit}. Expecting sample_id "{sample_id}".' + target_units = process_units(unit_data[sample_id], reduce=True) + print(" ".join(target_units), file=o_u) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--audio-dir", required=True, type=str, help="audio directory") + parser.add_argument("--ext", default="flac", type=str, help="audio extension") + parser.add_argument( + "--data-name", + required=True, + type=str, + help="dataset name", + ) + parser.add_argument( + "--output-dir", required=True, type=str, help="output directory" + ) + parser.add_argument( + "--for-inference", + action="store_true", + help="set this if preparing data for running inference with a speech normalizer", + ) + parser.add_argument( + "--target-unit", + default=None, + type=str, + help="a file containing unit sequences in the format: sample_id|u1 u2 ...", + ) + + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_speech/preprocessing/prep_sn_output_data.py b/fairseq/examples/speech_to_speech/preprocessing/prep_sn_output_data.py new file mode 100644 index 0000000..0699134 --- /dev/null +++ b/fairseq/examples/speech_to_speech/preprocessing/prep_sn_output_data.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +from pathlib import Path + +from tqdm import tqdm + + +def process(args): + args.output_root.mkdir(exist_ok=True) + + # load units + units = {} + with open(args.in_unit) as f: + for line in f: + unit_seq, utt_id = line.strip().rsplit(" ", 1) + utt_id = int(utt_id[6:-1]) # remove "(None-" + units[utt_id] = unit_seq + + with open(args.in_audio) as f, open( + args.output_root / f"{args.in_audio.stem}.txt", "w" + ) as o: + f.readline() + for i, line in enumerate(tqdm(f.readlines())): + audio, _ = line.strip().split("\t", 1) + sample_id = Path(audio).stem + o.write(f"{sample_id}|{units[i]}\n") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-unit", + required=True, + type=Path, + help="unit file (output from the speech normalizer)", + ) + parser.add_argument( + "--in-audio", + required=True, + type=Path, + help="tsv file (input to the normalizer)", + ) + parser.add_argument( + "--output-root", required=True, type=Path, help="output directory" + ) + + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_speech/unity/__init__.py b/fairseq/examples/speech_to_speech/unity/__init__.py new file mode 100644 index 0000000..349db7c --- /dev/null +++ b/fairseq/examples/speech_to_speech/unity/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import sequence_generator # noqa +from . import sequence_generator_multi_decoder # noqa diff --git a/fairseq/examples/speech_to_speech/unity/sequence_generator.py b/fairseq/examples/speech_to_speech/unity/sequence_generator.py new file mode 100644 index 0000000..c482098 --- /dev/null +++ b/fairseq/examples/speech_to_speech/unity/sequence_generator.py @@ -0,0 +1,626 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import sys +from typing import Dict, List, Optional + +import torch +from torch import Tensor + +from fairseq.sequence_generator import EnsembleModel as EnsembleModelBase +from fairseq.sequence_generator import SequenceGenerator as SequenceGeneratorBase + + +class SequenceGenerator(SequenceGeneratorBase): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + tokens_to_suppress=(), + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__( + models=models, + tgt_dict=tgt_dict, + beam_size=beam_size, + max_len_a=max_len_a, + max_len_b=max_len_b, + max_len=max_len, + min_len=min_len, + normalize_scores=normalize_scores, + len_penalty=len_penalty, + unk_penalty=unk_penalty, + temperature=temperature, + match_source_len=match_source_len, + no_repeat_ngram_size=no_repeat_ngram_size, + search_strategy=search_strategy, + eos=eos, + symbols_to_strip_from_output=symbols_to_strip_from_output, + lm_model=lm_model, + lm_weight=lm_weight, + tokens_to_suppress=tokens_to_suppress, + ) + + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + + self.model.set_decoder_beam_size(self.beam_size) + self.model.eval() + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + # if src_lengths exists in net_input (speech_to_text dataset case), then use it + if "src_lengths" in net_input: + src_lengths = net_input["src_lengths"] + else: + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)) + .long() + .sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception( + "expected src_tokens or source in net input. input keys: " + + str(net_input.keys()) + ) + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, self.beam_size) + + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.model.forward_encoder(net_input) + + finalized = self.generate_decoder( + encoder_outs, + src_tokens, + src_lengths, + sample, + prefix_tokens, + constraints, + bos_token, + ) + return finalized + + def generate_decoder( + self, + encoder_outs, + src_tokens, + src_lengths, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + aux_task_name="", + encoder_outs_aug: Optional[ + Tensor + ] = None, # an additional/augmented encoder_outs + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + decoder_name = f"{aux_task_name}_decoder" if aux_task_name else "decoder" + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + if encoder_outs_aug is not None: + encoder_outs_aug = self.model.reorder_encoder_out( + encoder_outs_aug, new_order + ) + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state( + incremental_states, reorder_state, decoder_name + ) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + if encoder_outs_aug is not None: + encoder_outs_aug = self.model.reorder_encoder_out( + encoder_outs_aug, reorder_state + ) + with torch.autograd.profiler.record_function( + "EnsembleModel: forward_decoder" + ): + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + decoder_name=decoder_name, + encoder_outs_aug=encoder_outs_aug, + ) + + if self.lm_model is not None and not aux_task_name: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs += probs + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + else: + if step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + if self.token_indices_to_suppress is not None: + lprobs[:, self.token_indices_to_suppress] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + +class EnsembleModel(EnsembleModelBase): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + decoder_name="decoder", + encoder_outs_aug: List[Dict[str, List[Tensor]]] = None, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + encoder_out_aug: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + if encoder_outs_aug is not None: + encoder_out_aug = encoder_outs_aug[i] + # decode each model + if self.has_incremental_states(): + if encoder_out_aug is not None: + decoder_out = getattr(model, decoder_name).forward( + tokens, + encoder_out=encoder_out, + encoder_out_aug=encoder_out_aug, + incremental_state=incremental_states[i], + ) + else: + decoder_out = getattr(model, decoder_name).forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, decoder_name): + decoder_out = getattr(model, decoder_name).forward( + tokens, encoder_out=encoder_out + ) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = getattr(model, decoder_name).get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + decoder_name="decoder", + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + getattr(model, decoder_name).reorder_incremental_state_scripting( + incremental_states[i], new_order + ) diff --git a/fairseq/examples/speech_to_speech/unity/sequence_generator_multi_decoder.py b/fairseq/examples/speech_to_speech/unity/sequence_generator_multi_decoder.py new file mode 100644 index 0000000..af99a96 --- /dev/null +++ b/fairseq/examples/speech_to_speech/unity/sequence_generator_multi_decoder.py @@ -0,0 +1,267 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import search + + +class MultiDecoderSequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + tgt_dict_mt, + beam_size=1, + beam_size_mt=1, + max_len_a=0, + max_len_b=200, + max_len_a_mt=0, + max_len_b_mt=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + len_penalty_mt=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + eos=None, + eos_mt=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length for the second pass + max_len_a_mt/b_mt (int, optional): generate sequences of maximum length + ax + b, where x is the source length for the first pass + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty in the second pass, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + len_penalty (float, optional): length penalty in the first pass, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + + from examples.speech_to_speech.unity.sequence_generator import SequenceGenerator + + self.generator = SequenceGenerator( + models, + tgt_dict, + beam_size=beam_size, + max_len_a=max_len_a, + max_len_b=max_len_b, + max_len=max_len, + min_len=min_len, + normalize_scores=normalize_scores, + len_penalty=len_penalty, + unk_penalty=unk_penalty, + temperature=temperature, + match_source_len=match_source_len, + no_repeat_ngram_size=no_repeat_ngram_size, + search_strategy=search.BeamSearch(tgt_dict), + eos=eos, + symbols_to_strip_from_output=symbols_to_strip_from_output, + lm_model=lm_model, + lm_weight=lm_weight, + ) + self.eos = self.generator.eos + + self.generator_mt = SequenceGenerator( + models, + tgt_dict_mt, + beam_size=beam_size_mt, + max_len_a=max_len_a_mt, + max_len_b=max_len_b_mt, + max_len=max_len, + min_len=min_len, + normalize_scores=normalize_scores, + len_penalty=len_penalty_mt, + unk_penalty=unk_penalty, + temperature=temperature, + match_source_len=match_source_len, + no_repeat_ngram_size=no_repeat_ngram_size, + search_strategy=search.BeamSearch(tgt_dict_mt), + eos=eos_mt, + symbols_to_strip_from_output=symbols_to_strip_from_output, + ) + + @torch.no_grad() + def generate( + self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs + ) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + # if src_lengths exists in net_input (speech_to_text dataset case), then use it + if "src_lengths" in net_input: + src_lengths = net_input["src_lengths"] + else: + src_lengths = ( + ( + src_tokens.ne(self.generator.eos) + & src_tokens.ne(self.generator.pad) + ) + .long() + .sum(dim=1) + ) + else: + raise Exception( + "expected src_tokens or source in net input. input keys: " + + str(net_input.keys()) + ) + + if constraints is not None and not self.generator.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.generator.search.init_constraints(constraints, self.generator.beam_size) + self.generator_mt.search.init_constraints( + constraints, self.generator_mt.beam_size + ) + + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.generator.model.forward_encoder(net_input) + + single_model = self.generator.model.single_model + mt_decoder = getattr(single_model, f"{single_model.mt_task_name}_decoder") + + # 1. MT decoder + finalized_mt = self.generator_mt.generate_decoder( + encoder_outs, + src_tokens, + src_lengths, + sample, + prefix_tokens, + constraints, + bos_token, + aux_task_name=single_model.mt_task_name, + ) + + # extract decoder output corresponding to the best hypothesis + max_tgt_len = max([len(hypo[0]["tokens"]) for hypo in finalized_mt]) + prev_output_tokens_mt = ( + src_tokens.new_zeros(src_tokens.shape[0], max_tgt_len) + .fill_(mt_decoder.padding_idx) + .int() + ) # B x T + for i, hypo in enumerate(finalized_mt): + i_beam = 0 + tmp = hypo[i_beam]["tokens"].int() # hyp + eos + prev_output_tokens_mt[i, 0] = self.generator_mt.eos + if tmp[-1] == self.generator_mt.eos: + tmp = tmp[:-1] + prev_output_tokens_mt[i, 1 : len(tmp) + 1] = tmp + + text = "".join([self.generator_mt.tgt_dict[c] for c in tmp]) + text = text.replace("_", " ") + text = text.replace("▁", " ") + text = text.replace("<unk>", " ") + text = text.replace("<s>", "") + text = text.replace("</s>", "") + if len(text) > 0 and text[0] == " ": + text = text[1:] + sample_id = sample["id"].tolist()[i] + print("{} (None-{})".format(text, sample_id)) + + x = mt_decoder( + prev_output_tokens_mt, + encoder_out=encoder_outs[0], + features_only=True, + )[0].transpose(0, 1) + + if getattr(single_model, "proj", None) is not None: + x = single_model.proj(x) + + mt_decoder_padding_mask = None + if prev_output_tokens_mt.eq(mt_decoder.padding_idx).any(): + mt_decoder_padding_mask = prev_output_tokens_mt.eq(mt_decoder.padding_idx) + + # 2. T2U encoder + if getattr(single_model, "synthesizer_encoder", None) is not None: + t2u_encoder_out = single_model.synthesizer_encoder( + x, + mt_decoder_padding_mask, + ) + else: + t2u_encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [mt_decoder_padding_mask] + if mt_decoder_padding_mask is not None + else [], # B x T + "encoder_embedding": [], + "encoder_states": [], + "src_tokens": [], + "src_lengths": [], + } + + if getattr(single_model, "t2u_augmented_cross_attn", False): + encoder_outs_aug = [t2u_encoder_out] + else: + encoder_outs = [t2u_encoder_out] + encoder_outs_aug = None + + # 3. T2U decoder + finalized = self.generator.generate_decoder( + encoder_outs, + src_tokens, + src_lengths, + sample, + prefix_tokens, + constraints, + bos_token, + encoder_outs_aug=encoder_outs_aug, + ) + return finalized diff --git a/fairseq/examples/speech_to_text/README.md b/fairseq/examples/speech_to_text/README.md new file mode 100644 index 0000000..f639d30 --- /dev/null +++ b/fairseq/examples/speech_to_text/README.md @@ -0,0 +1,77 @@ +# Speech-to-Text (S2T) Modeling + +[https://www.aclweb.org/anthology/2020.aacl-demo.6](https://www.aclweb.org/anthology/2020.aacl-demo.6.pdf) + +Speech recognition (ASR) and speech-to-text translation (ST) with fairseq. + +## Data Preparation +S2T modeling data consists of source speech features, target text and other optional information +(source text, speaker id, etc.). Fairseq S2T uses per-dataset-split TSV manifest files +to store these information. Each data field is represented by a column in the TSV file. + +Unlike text token embeddings, speech features (e.g. log mel-scale filter banks) are usually fixed +during model training and can be pre-computed. The manifest file contains the path to +either the feature file in NumPy format or the WAV/FLAC audio file. For the latter, +features will be extracted on-the-fly by fairseq S2T. Optionally, feature/audio files can be packed +into uncompressed ZIP files (then accessed via byte offset and length) to improve I/O performance. + +Fairseq S2T also employs a YAML file for data related configurations: tokenizer type and dictionary path +for the target text, feature transforms such as CMVN (cepstral mean and variance normalization) and SpecAugment, +temperature-based resampling, etc. + +## Model Training +Fairseq S2T uses the unified `fairseq-train` interface for model training. It requires arguments `--task speech_to_text`, + `--arch <model architecture in fairseq.models.speech_to_text.*>` and `--config-yaml <config YAML filename>`. + +## Inference & Evaluation +Fairseq S2T uses the unified `fairseq-generate`/`fairseq-interactive` interface for inference and evaluation. It +requires arguments `--task speech_to_text` and `--config-yaml <config YAML filename>`. The interactive console takes +audio paths (one per line) as inputs. + + +## Examples +- [Speech Recognition (ASR) on LibriSpeech](docs/librispeech_example.md) + +- [Speech-to-Text Translation (ST) on MuST-C](docs/mustc_example.md) + +- [Speech-to-Text Translation (ST) on CoVoST 2](docs/covost_example.md) + +- [Speech-to-Text Translation (ST) on Multilingual TEDx](docs/mtedx_example.md) +- [Simultaneous Speech-to-Text Translation (SimulST) on MuST-C](docs/simulst_mustc_example.md) + +## Updates +- 02/04/2021: Added interactive decoding (`fairseq-interactive`) support. Examples: + [ASR (LibriSpeech)](docs/librispeech_example.md#interactive-decoding) + and [ST (CoVoST 2)](docs/covost_example.md#interactive-decoding). +- 01/08/2021: Several fixes for S2T Transformer model, inference-time de-tokenization, scorer configuration and data + preparation scripts. We also add pre-trained models to the examples and revise the instructions. + Breaking changes: the data preparation scripts now extract filterbank features without CMVN. CMVN is instead applied + on-the-fly (defined in the config YAML). + +## What's Next +- We are migrating the old fairseq [ASR example](../speech_recognition) into this S2T framework and + merging the features from both sides. +- The following papers also base their experiments on fairseq S2T. We are adding more examples for replication. + - [Improving Cross-Lingual Transfer Learning for End-to-End Speech Recognition with Speech Translation (Wang et al., 2020)](https://arxiv.org/abs/2006.05474) + - [Self-Supervised Representations Improve End-to-End Speech Translation (Wu et al., 2020)](https://arxiv.org/abs/2006.12124) + - [Self-Training for End-to-End Speech Translation (Pino et al., 2020)](https://arxiv.org/abs/2006.02490) + - [CoVoST: A Diverse Multilingual Speech-To-Text Translation Corpus (Wang et al., 2020)](https://arxiv.org/abs/2002.01320) + - [Harnessing Indirect Training Data for End-to-End Automatic Speech Translation: Tricks of the Trade (Pino et al., 2019)](https://arxiv.org/abs/1909.06515) + +## Citation +Please cite as: +``` +@inproceedings{wang2020fairseqs2t, + title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq}, + author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino}, + booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations}, + year = {2020}, +} + +@inproceedings{ott2019fairseq, + title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, + author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, + booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations}, + year = {2019}, +} +``` diff --git a/fairseq/examples/speech_to_text/data_utils.py b/fairseq/examples/speech_to_text/data_utils.py new file mode 100644 index 0000000..b8648cb --- /dev/null +++ b/fairseq/examples/speech_to_text/data_utils.py @@ -0,0 +1,383 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import csv +from pathlib import Path +import zipfile +from functools import reduce +from multiprocessing import cpu_count +from typing import Any, Dict, List, Optional, Union +import io + +import numpy as np +import pandas as pd +import sentencepiece as sp +from fairseq.data.audio.audio_utils import ( + convert_waveform, _get_kaldi_fbank, _get_torchaudio_fbank, is_npy_data, + is_sf_audio_data +) +import torch +import soundfile as sf +from tqdm import tqdm + + +UNK_TOKEN, UNK_TOKEN_ID = "<unk>", 3 +BOS_TOKEN, BOS_TOKEN_ID = "<s>", 0 +EOS_TOKEN, EOS_TOKEN_ID = "</s>", 2 +PAD_TOKEN, PAD_TOKEN_ID = "<pad>", 1 + + +def gen_vocab( + input_path: Path, output_path_prefix: Path, model_type="bpe", + vocab_size=1000, special_symbols: Optional[List[str]] = None +): + # Train SentencePiece Model + arguments = [ + f"--input={input_path.as_posix()}", + f"--model_prefix={output_path_prefix.as_posix()}", + f"--model_type={model_type}", + f"--vocab_size={vocab_size}", + "--character_coverage=1.0", + f"--num_threads={cpu_count()}", + f"--unk_id={UNK_TOKEN_ID}", + f"--bos_id={BOS_TOKEN_ID}", + f"--eos_id={EOS_TOKEN_ID}", + f"--pad_id={PAD_TOKEN_ID}", + ] + if special_symbols is not None: + _special_symbols = ",".join(special_symbols) + arguments.append(f"--user_defined_symbols={_special_symbols}") + sp.SentencePieceTrainer.Train(" ".join(arguments)) + # Export fairseq dictionary + spm = sp.SentencePieceProcessor() + spm.Load(output_path_prefix.as_posix() + ".model") + vocab = {i: spm.IdToPiece(i) for i in range(spm.GetPieceSize())} + assert ( + vocab.get(UNK_TOKEN_ID) == UNK_TOKEN + and vocab.get(PAD_TOKEN_ID) == PAD_TOKEN + and vocab.get(BOS_TOKEN_ID) == BOS_TOKEN + and vocab.get(EOS_TOKEN_ID) == EOS_TOKEN + ) + vocab = { + i: s + for i, s in vocab.items() + if s not in {UNK_TOKEN, BOS_TOKEN, EOS_TOKEN, PAD_TOKEN} + } + with open(output_path_prefix.as_posix() + ".txt", "w") as f_out: + for _, s in sorted(vocab.items(), key=lambda x: x[0]): + f_out.write(f"{s} 1\n") + + +def extract_fbank_features( + waveform: torch.FloatTensor, + sample_rate: int, + output_path: Optional[Path] = None, + n_mel_bins: int = 80, + overwrite: bool = False, +): + if output_path is not None and output_path.is_file() and not overwrite: + return + + _waveform, _ = convert_waveform(waveform, sample_rate, to_mono=True) + # Kaldi compliance: 16-bit signed integers + _waveform = _waveform * (2 ** 15) + _waveform = _waveform.numpy() + + features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins) + if features is None: + features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins) + if features is None: + raise ImportError( + "Please install pyKaldi or torchaudio to enable fbank feature extraction" + ) + + if output_path is not None: + np.save(output_path.as_posix(), features) + return features + + +def create_zip(data_root: Path, zip_path: Path): + paths = list(data_root.glob("*.npy")) + paths.extend(data_root.glob("*.flac")) + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_STORED) as f: + for path in tqdm(paths): + f.write(path, arcname=path.name) + + +def get_zip_manifest( + zip_path: Path, zip_root: Optional[Path] = None, is_audio=False +): + _zip_path = Path.joinpath(zip_root or Path(""), zip_path) + with zipfile.ZipFile(_zip_path, mode="r") as f: + info = f.infolist() + paths, lengths = {}, {} + for i in tqdm(info): + utt_id = Path(i.filename).stem + offset, file_size = i.header_offset + 30 + len(i.filename), i.file_size + paths[utt_id] = f"{zip_path.as_posix()}:{offset}:{file_size}" + with open(_zip_path, "rb") as f: + f.seek(offset) + byte_data = f.read(file_size) + assert len(byte_data) > 1 + if is_audio: + assert is_sf_audio_data(byte_data), i + else: + assert is_npy_data(byte_data), i + byte_data_fp = io.BytesIO(byte_data) + if is_audio: + lengths[utt_id] = sf.info(byte_data_fp).frames + else: + lengths[utt_id] = np.load(byte_data_fp).shape[0] + return paths, lengths + + +def gen_config_yaml( + manifest_root: Path, + spm_filename: Optional[str] = None, + vocab_name: Optional[str] = None, + yaml_filename: str = "config.yaml", + specaugment_policy: Optional[str] = "lb", + prepend_tgt_lang_tag: bool = False, + sampling_alpha: Optional[float] = None, + input_channels: Optional[int] = 1, + input_feat_per_channel: Optional[int] = 80, + audio_root: str = "", + cmvn_type: str = "utterance", + gcmvn_path: Optional[Path] = None, + extra=None +): + manifest_root = manifest_root.absolute() + writer = S2TDataConfigWriter(manifest_root / yaml_filename) + assert spm_filename is not None or vocab_name is not None + vocab_name = spm_filename.replace(".model", ".txt") if vocab_name is None \ + else vocab_name + writer.set_vocab_filename(vocab_name) + if input_channels is not None: + writer.set_input_channels(input_channels) + if input_feat_per_channel is not None: + writer.set_input_feat_per_channel(input_feat_per_channel) + specaugment_setters = { + "lb": writer.set_specaugment_lb_policy, + "ld": writer.set_specaugment_ld_policy, + "sm": writer.set_specaugment_sm_policy, + "ss": writer.set_specaugment_ss_policy, + } + specaugment_setter = specaugment_setters.get(specaugment_policy, None) + if specaugment_setter is not None: + specaugment_setter() + if spm_filename is not None: + writer.set_bpe_tokenizer( + { + "bpe": "sentencepiece", + "sentencepiece_model": (manifest_root / spm_filename).as_posix(), + } + ) + if prepend_tgt_lang_tag: + writer.set_prepend_tgt_lang_tag(True) + if sampling_alpha is not None: + writer.set_sampling_alpha(sampling_alpha) + + if cmvn_type not in ["global", "utterance"]: + raise NotImplementedError + + if specaugment_policy is not None: + writer.set_feature_transforms( + "_train", [f"{cmvn_type}_cmvn", "specaugment"] + ) + writer.set_feature_transforms("*", [f"{cmvn_type}_cmvn"]) + + if cmvn_type == "global": + if gcmvn_path is None: + raise ValueError("Please provide path of global cmvn file.") + else: + writer.set_global_cmvn(gcmvn_path.as_posix()) + + if len(audio_root) > 0: + writer.set_audio_root(audio_root) + + if extra is not None: + writer.set_extra(extra) + writer.flush() + + +def load_df_from_tsv(path: Union[str, Path]) -> pd.DataFrame: + _path = path if isinstance(path, str) else path.as_posix() + return pd.read_csv( + _path, + sep="\t", + header=0, + encoding="utf-8", + escapechar="\\", + quoting=csv.QUOTE_NONE, + na_filter=False, + ) + + +def save_df_to_tsv(dataframe, path: Union[str, Path]): + _path = path if isinstance(path, str) else path.as_posix() + dataframe.to_csv( + _path, + sep="\t", + header=True, + index=False, + encoding="utf-8", + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + + +def load_tsv_to_dicts(path: Union[str, Path]) -> List[dict]: + with open(path, "r") as f: + reader = csv.DictReader( + f, + delimiter="\t", + quotechar=None, + doublequote=False, + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + rows = [dict(e) for e in reader] + return rows + + +def filter_manifest_df( + df, is_train_split=False, extra_filters=None, min_n_frames=5, max_n_frames=3000 +): + filters = { + "no speech": df["audio"] == "", + f"short speech (<{min_n_frames} frames)": df["n_frames"] < min_n_frames, + "empty sentence": df["tgt_text"] == "", + } + if is_train_split: + filters[f"long speech (>{max_n_frames} frames)"] = df["n_frames"] > max_n_frames + if extra_filters is not None: + filters.update(extra_filters) + invalid = reduce(lambda x, y: x | y, filters.values()) + valid = ~invalid + print( + "| " + + ", ".join(f"{n}: {f.sum()}" for n, f in filters.items()) + + f", total {invalid.sum()} filtered, {valid.sum()} remained." + ) + return df[valid] + + +def cal_gcmvn_stats(features_list): + features = np.concatenate(features_list) + square_sums = (features ** 2).sum(axis=0) + mean = features.mean(axis=0) + features = np.subtract(features, mean) + var = square_sums / features.shape[0] - mean ** 2 + std = np.sqrt(np.maximum(var, 1e-8)) + return {"mean": mean.astype("float32"), "std": std.astype("float32")} + + +class S2TDataConfigWriter(object): + DEFAULT_VOCAB_FILENAME = "dict.txt" + DEFAULT_INPUT_FEAT_PER_CHANNEL = 80 + DEFAULT_INPUT_CHANNELS = 1 + + def __init__(self, yaml_path: Path): + try: + import yaml + except ImportError: + print("Please install PyYAML for S2T data config YAML files") + self.yaml = yaml + self.yaml_path = yaml_path + self.config = {} + + def flush(self): + with open(self.yaml_path, "w") as f: + self.yaml.dump(self.config, f) + + def set_audio_root(self, audio_root=""): + self.config["audio_root"] = audio_root + + def set_vocab_filename(self, vocab_filename: str = "dict.txt"): + self.config["vocab_filename"] = vocab_filename + + def set_specaugment( + self, + time_wrap_w: int, + freq_mask_n: int, + freq_mask_f: int, + time_mask_n: int, + time_mask_t: int, + time_mask_p: float, + ): + self.config["specaugment"] = { + "time_wrap_W": time_wrap_w, + "freq_mask_N": freq_mask_n, + "freq_mask_F": freq_mask_f, + "time_mask_N": time_mask_n, + "time_mask_T": time_mask_t, + "time_mask_p": time_mask_p, + } + + def set_specaugment_lb_policy(self): + self.set_specaugment( + time_wrap_w=0, + freq_mask_n=1, + freq_mask_f=27, + time_mask_n=1, + time_mask_t=100, + time_mask_p=1.0, + ) + + def set_specaugment_ld_policy(self): + self.set_specaugment( + time_wrap_w=0, + freq_mask_n=2, + freq_mask_f=27, + time_mask_n=2, + time_mask_t=100, + time_mask_p=1.0, + ) + + def set_specaugment_sm_policy(self): + self.set_specaugment( + time_wrap_w=0, + freq_mask_n=2, + freq_mask_f=15, + time_mask_n=2, + time_mask_t=70, + time_mask_p=0.2, + ) + + def set_specaugment_ss_policy(self): + self.set_specaugment( + time_wrap_w=0, + freq_mask_n=2, + freq_mask_f=27, + time_mask_n=2, + time_mask_t=70, + time_mask_p=0.2, + ) + + def set_input_channels(self, input_channels: int = 1): + self.config["input_channels"] = input_channels + + def set_input_feat_per_channel(self, input_feat_per_channel: int = 80): + self.config["input_feat_per_channel"] = input_feat_per_channel + + def set_bpe_tokenizer(self, bpe_tokenizer: Dict[str, Any]): + self.config["bpe_tokenizer"] = bpe_tokenizer + + def set_global_cmvn(self, stats_npz_path: str): + self.config["global_cmvn"] = {"stats_npz_path": stats_npz_path} + + def set_feature_transforms(self, split: str, transforms: List[str]): + if "transforms" not in self.config: + self.config["transforms"] = {} + self.config["transforms"][split] = transforms + + def set_prepend_tgt_lang_tag(self, flag: bool = True): + self.config["prepend_tgt_lang_tag"] = flag + + def set_sampling_alpha(self, sampling_alpha: float = 1.0): + self.config["sampling_alpha"] = sampling_alpha + + def set_extra(self, data): + self.config.update(data) diff --git a/fairseq/examples/speech_to_text/docs/covost_example.md b/fairseq/examples/speech_to_text/docs/covost_example.md new file mode 100644 index 0000000..6282428 --- /dev/null +++ b/fairseq/examples/speech_to_text/docs/covost_example.md @@ -0,0 +1,140 @@ +[[Back]](..) + +# S2T Example: ST on CoVoST + +We replicate the experiments in +[CoVoST 2 and Massively Multilingual Speech-to-Text Translation (Wang et al., 2020)](https://arxiv.org/abs/2007.10310). + +## Data Preparation + +[Download](https://commonvoice.mozilla.org/en/datasets) and unpack Common Voice v4 to a path +`${COVOST_ROOT}/${SOURCE_LANG_ID}`, then preprocess it with + +```bash +# additional Python packages for S2T data processing/model training +pip install pandas torchaudio sentencepiece + +# En ASR +python examples/speech_to_text/prep_covost_data.py \ + --data-root ${COVOST_ROOT} --vocab-type char --src-lang en +# ST +python examples/speech_to_text/prep_covost_data.py \ + --data-root ${COVOST_ROOT} --vocab-type char \ + --src-lang fr --tgt-lang en +``` + +The generated files (manifest, features, vocabulary and data configuration) will be added to +`${COVOST_ROOT}/${SOURCE_LANG_ID}`. + +Download our vocabulary files if you want to use our pre-trained models: + +- ASR: [En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_asr_vocab_char.zip) +- ST: [Fr-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_fr_en_st_vocab_char.zip), [De-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_de_en_st_vocab_char.zip), [Es-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_es_en_st_vocab_char.zip), [Ca-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_ca_en_st_vocab_char.zip), [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_de_st_vocab_char.zip), [En-Ca](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_ca_st_vocab_char.zip), [En-Fa](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_fa_st_vocab_char.zip), [En-Et](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_et_st_vocab_char.zip) + +## ASR + +#### Training + +We train an En ASR model for encoder pre-training some of the ST models. + +```bash +fairseq-train ${COVOST_ROOT}/en \ + --config-yaml config_asr_en.yaml --train-subset train_asr_en --valid-subset dev_asr_en \ + --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 50000 --max-update 60000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --report-accuracy --arch s2t_transformer_s --dropout 0.15 --optimizer adam --lr 2e-3 \ + --lr-scheduler inverse_sqrt --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \ + --attn-type None --pos-enc-type ${POS_ENC_TYPE} +``` + +where `ASR_SAVE_DIR` is the checkpoint root path and `POS_ENC_TYPE` refers to positional encoding to be used in the conformer encoder. +Set it to `abs`, `rope` or `rel_pos` to use the absolute positional encoding, rotary positional encoding or relative positional encoding in the conformer layer respectively. +Transformer encoder only supports absolute positional encoding and by default, the transformer encoder will be used. +To switch to conformer, set `--attn-type espnet` and `--POS_ENC_TYPE`. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly when using more than 1 GPU. + +#### Inference & Evaluation + +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}" +fairseq-generate ${COVOST_ROOT}/en \ + --config-yaml config_asr_en.yaml --gen-subset test_asr_en --task speech_to_text \ + --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \ + --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct +``` + +#### Results + +| --arch | --pos-enc-type | Params | En | Model | +|---|---|---|---|---| +| s2t_transformer_s | - | 31M | 25.6 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_asr_transformer_s.pt) | +| s2t_conformer | rel_pos | 42.9M | 23.18| [Download](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_asr/rel_pos_asr_checkpoint_best.pt) | +| s2t_conformer | rope | 42.1M | 23.8| [Download](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_asr/rope_pos_asr_checkpoint_best.pt) | +| s2t_conformer | abs | 42.1M | 23.8| [Download](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_asr/abs_asr_checkpoint_best.pt) | + +## ST + +#### Training + +Fr-En as example: + +```bash +fairseq-train ${COVOST_ROOT}/fr \ + --config-yaml config_st_fr_en.yaml --train-subset train_st_fr_en --valid-subset dev_st_fr_en \ + --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-update 30000 --max-tokens 40000 \ # --max-tokens 50000 for en-* + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --encoder-freezing-updates 1000 --optimizer adam --lr 2e-3 \ + --lr-scheduler inverse_sqrt --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \ + --attn-type None --pos-enc-type ${POS_ENC_TYPE} \ + --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} +``` + +where `ST_SAVE_DIR` is the checkpoint root path and `POS_ENC_TYPE` refers to positional encoding to be used in the conformer encoder. +Set it to `abs`, `rope` or `rel_pos` to use the absolute positional encoding, rotary positional encoding or relative positional encoding in the conformer layer respectively. +Transformer encoder only supports absolute positional encoding and by default, the transformer encoder will be used. +To switch to conformer, set `--attn-type espnet` and `--POS_ENC_TYPE`. Optionally load the pre-trained En ASR encoder for faster training and better +performance: `--load-pretrained-encoder-from <ASR checkpoint path>`. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. +You may want to update it accordingly when using more than 1 GPU. + +#### Inference & Evaluation + +Average the last 10 checkpoints and evaluate on test split: + +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}" +fairseq-generate ${COVOST_ROOT}/fr \ + --config-yaml config_st_fr_en.yaml --gen-subset test_st_fr_en --task speech_to_text \ + --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 --scoring sacrebleu +``` + +## Interactive Decoding + +Launch the interactive console via + +```bash +fairseq-interactive ${COVOST_ROOT}/fr --config-yaml config_st_fr_en.yaml \ + --task speech_to_text --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 +``` + +Type in WAV/FLAC/OGG audio paths (one per line) after the prompt. + +#### Results + +| --arch | --pos-enc-type | Params | ASR PT | Fr-En | De-En | Es-En | Ca-En | En-De | En-Ca | En-Fa | En-Et | Model | +|---|---|---|---|---|---|---|---|---|---|---|---|---| +| s2t_transformer | - | 31M | Yes | [27.2](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_fr_en_st_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_de_en_st_transformer_s.pt) | [23.1](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_es_en_st_transformer_s.pt) | [19.3](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_ca_en_st_transformer_s.pt) | [16.1](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_de_st_transformer_s.pt) | [21.6](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_ca_st_transformer_s.pt) | [12.9](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_fa_st_transformer_s.pt) | [12.8](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_et_st_transformer_s.pt) | (<-Download) | +| s2t_conformer | rel_pos | 42.9M | No | [28.32](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [18.21](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [25.98](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [21.13](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [20.37](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [25.89](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [15.59](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | [14.49](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/rel_pos_from_scratch_avg_last_10_checkpoint.pt) | (<-Download) | +| s2t_conformer | rel_pos | 42.9M | Yes| [27.15](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [18.22](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [25.14](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [21.68](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [20.35](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [25.92](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [15.76](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | [16.52](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/rel_pos_asr_pt_avg_last_10_checkpoint.pt) | (<-Download) | +| s2t_conformer | rope | 42.1M | No | [27.61](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/rope_from_scratch_avg_last_10_checkpoint.pt) | [17.6](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/rope_from_scratch_avg_last_10_checkpoint.pt) | [24.91](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/rope_from_scratch_avg_last_10_checkpoint.pt) | [20.78](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/rope_from_scratch_avg_last_10_checkpoint.pt) | [19.7](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/rope_from_scratch_avg_last_10_checkpoint.pt) | [25.13](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/rope_from_scratch_avg_last_10_checkpoint.pt) | [15.22](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/rope_from_scratch_avg_last_10_checkpoint.pt) | [15.87](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/rope_from_scratch_avg_last_10_checkpoint.pt) | (<-Download) | +| s2t_conformer | rope | 42.1M | Yes | [26.99](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/rope_asr_pt_avg_last_10_checkpoint.pt) | [17.71](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/rope_asr_pt_avg_last_10_checkpoint.pt) | [24.24](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/rope_asr_pt_avg_last_10_checkpoint.pt) | [21.24](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/rope_asr_pt_avg_last_10_checkpoint.pt) | [19.9](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/rope_asr_pt_avg_last_10_checkpoint.pt) | [25.25](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/rope_asr_pt_avg_last_10_checkpoint.pt) | [15.58](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/rope_asr_pt_avg_last_10_checkpoint.pt) | [15.97](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/rope_asr_pt_avg_last_10_checkpoint.pt) | (<-Download) | +| s2t_conformer | abs | 42.1M | No | [27.45](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/abs_from_scratch_avg_last_10_checkpoint.pt) | [17.25](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/abs_from_scratch_avg_last_10_checkpoint.pt) | [25.01](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/abs_from_scratch_avg_last_10_checkpoint.pt) | [20.26](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/abs_from_scratch_avg_last_10_checkpoint.pt) | [19.86](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/abs_from_scratch_avg_last_10_checkpoint.pt) | [25.25](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/abs_from_scratch_avg_last_10_checkpoint.pt) | [15.46](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/abs_from_scratch_avg_last_10_checkpoint.pt) | [15.81](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/abs_from_scratch_avg_last_10_checkpoint.pt) | (<-Download) | +| s2t_conforme | abs | 42.1M | Yes| [26.52](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/fr_en/abs_asr_pt_avg_last_10_checkpoint.pt) | [17.37](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/de_en/abs_asr_pt_avg_last_10_checkpoint.pt) | [25.40](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/es_en/abs_asr_pt_avg_last_10_checkpoint.pt) | [20.45](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/ca_en/abs_asr_pt_avg_last_10_checkpoint.pt) | [19.57](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_de/abs_asr_pt_avg_last_10_checkpoint.pt) | [25.40](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_ca/abs_asr_pt_avg_last_10_checkpoint.pt) | [15.17](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_fa/abs_asr_pt_avg_last_10_checkpoint.pt) | [15.83](https://dl.fbaipublicfiles.com/fairseq/conformer/covost2/en_et/abs_asr_pt_avg_last_10_checkpoint.pt) | (<-Download) | + +[[Back]](..) diff --git a/fairseq/examples/speech_to_text/docs/librispeech_example.md b/fairseq/examples/speech_to_text/docs/librispeech_example.md new file mode 100644 index 0000000..4040fda --- /dev/null +++ b/fairseq/examples/speech_to_text/docs/librispeech_example.md @@ -0,0 +1,69 @@ +[[Back]](..) + +# S2T Example: Speech Recognition (ASR) on LibriSpeech +[LibriSpeech](https://www.danielpovey.com/files/2015_icassp_librispeech.pdf) is a de-facto standard English ASR +benchmark. We provide competitive +vanilla [Transformer](https://papers.nips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) baselines. + +## Data preparation +Download and preprocess LibriSpeech data with +```bash +# additional Python packages for S2T data processing/model training +pip install pandas torchaudio sentencepiece + +python examples/speech_to_text/prep_librispeech_data.py \ + --output-root ${LS_ROOT} --vocab-type unigram --vocab-size 10000 +``` +where `LS_ROOT` is the root path for downloaded data as well as generated files (manifest, features, vocabulary and +data configuration). + +[Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_vocab_unigram10000.zip) our vocabulary files +if you want to use our pre-trained models. + +## Training +```bash +fairseq-train ${LS_ROOT} --save-dir ${SAVE_DIR} \ + --config-yaml config.yaml --train-subset train-clean-100,train-clean-360,train-other-500 --valid-subset dev-clean,dev-other \ + --num-workers 4 --max-tokens 40000 --max-update 300000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --share-decoder-input-output-embed \ + --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt --warmup-updates 10000 \ + --clip-norm 10.0 --seed 1 --update-freq 8 +``` +where `SAVE_DIR` is the checkpoint root path. Here we use `--arch s2t_transformer_s` (31M parameters) as example. +For better performance, you may switch to `s2t_transformer_m` (71M, with `--lr 1e-3`) or `s2t_transformer_l` +(268M, with `--lr 5e-4`). We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly +when using more than 1 GPU. + +## Inference & Evaluation +Average the last 10 checkpoints and evaluate on the 4 splits +(`dev-clean`, `dev-other`, `test-clean` and `test-other`): +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py --inputs ${SAVE_DIR} \ + --num-epoch-checkpoints 10 \ + --output "${SAVE_DIR}/${CHECKPOINT_FILENAME}" +for SUBSET in dev-clean dev-other test-clean test-other; do + fairseq-generate ${LS_ROOT} --config-yaml config.yaml --gen-subset ${SUBSET} \ + --task speech_to_text --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 --scoring wer +done +``` + +## Interactive Decoding +Launch the interactive console via +```bash +fairseq-interactive ${LS_ROOT} --config-yaml config.yaml --task speech_to_text \ + --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 +``` +Type in WAV/FLAC/OGG audio paths (one per line) after the prompt. + +## Results + +| --arch | Params | dev-clean | dev-other | test-clean | test-other | Model | +|---|---|---|---|---|---|---| +| s2t_transformer_s | 30M | 3.8 | 8.9 | 4.4 | 9.0 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_s.pt) | +| s2t_transformer_m | 71M | 3.2 | 8.0 | 3.4 | 7.9 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_m.pt) | +| s2t_transformer_l | 268M | 3.0 | 7.5 | 3.2 | 7.5 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_l.pt) | + +[[Back]](..) diff --git a/fairseq/examples/speech_to_text/docs/mtedx_example.md b/fairseq/examples/speech_to_text/docs/mtedx_example.md new file mode 100644 index 0000000..7e3d759 --- /dev/null +++ b/fairseq/examples/speech_to_text/docs/mtedx_example.md @@ -0,0 +1,201 @@ +[[Back]](..) + +# S2T Example: Speech Translation (ST) on Multilingual TEDx + +[Multilingual TEDx](https://arxiv.org/abs/2102.01757) is multilingual corpus for speech recognition and +speech translation. The data is derived from TEDx talks in 8 source languages +with translations to a subset of 5 target languages. + +## Data Preparation +[Download](http://openslr.org/100/) and unpack Multilingual TEDx data to a path +`${MTEDX_ROOT}/${LANG_PAIR}`, then preprocess it with +```bash +# additional Python packages for S2T data processing/model training +pip install pandas torchaudio soundfile sentencepiece + +# Generate TSV manifests, features, vocabulary +# and configuration for each language +python examples/speech_to_text/prep_mtedx_data.py \ + --data-root ${MTEDX_ROOT} --task asr \ + --vocab-type unigram --vocab-size 1000 +python examples/speech_to_text/prep_mtedx_data.py \ + --data-root ${MTEDX_ROOT} --task st \ + --vocab-type unigram --vocab-size 1000 + +# Add vocabulary and configuration for joint data +# (based on the manifests and features generated above) +python examples/speech_to_text/prep_mtedx_data.py \ + --data-root ${MTEDX_ROOT} --task asr --joint \ + --vocab-type unigram --vocab-size 8000 +python examples/speech_to_text/prep_mtedx_data.py \ + --data-root ${MTEDX_ROOT} --task st --joint \ + --vocab-type unigram --vocab-size 8000 +``` +The generated files (manifest, features, vocabulary and data configuration) will be added to +`${MTEDX_ROOT}/${LANG_PAIR}` (per-language data) and `MTEDX_ROOT` (joint data). + + +## ASR +#### Training +Spanish as example: +```bash +fairseq-train ${MTEDX_ROOT}/es-es \ + --config-yaml config_asr.yaml --train-subset train_asr --valid-subset valid_asr \ + --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \ + --arch s2t_transformer_xs --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \ + --load-pretrained-encoder-from ${PRETRAINED_ENCODER} \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 --update-freq 8 --patience 10 +``` +For joint model (using ASR data from all 8 languages): +```bash +fairseq-train ${MTEDX_ROOT} \ + --config-yaml config_asr.yaml \ + --train-subset train_es-es_asr,train_fr-fr_asr,train_pt-pt_asr,train_it-it_asr,train_ru-ru_asr,train_el-el_asr,train_ar-ar_asr,train_de-de_asr \ + --valid-subset valid_es-es_asr,valid_fr-fr_asr,valid_pt-pt_asr,valid_it-it_asr,valid_ru-ru_asr,valid_el-el_asr,valid_ar-ar_asr,valid_de-de_asr \ + --save-dir ${MULTILINGUAL_ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \ + --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 --update-freq 8 --patience 10 \ + --ignore-prefix-size 1 +``` +where `MULTILINGUAL_ASR_SAVE_DIR` is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs +with 1 GPU. You may want to update it accordingly when using more than 1 GPU. +For multilingual models, we prepend target language ID token as target BOS, which should be excluded from +the training loss via `--ignore-prefix-size 1`. + +#### Inference & Evaluation +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}" + +fairseq-generate ${MTEDX_ROOT}/es-es \ + --config-yaml config_asr.yaml --gen-subset test --task speech_to_text \ + --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \ + --skip-invalid-size-inputs-valid-test \ + --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct --remove-bpe + +# For models trained on joint data +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${MULTILINGUAL_ASR_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${MULTILINGUAL_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}" + +for LANG in es fr pt it ru el ar de; do + fairseq-generate ${MTEDX_ROOT} \ + --config-yaml config_asr.yaml --gen-subset test_${LANG}-${LANG}_asr --task speech_to_text \ + --prefix-size 1 --path ${MULTILINGUAL_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 40000 --beam 5 \ + --skip-invalid-size-inputs-valid-test \ + --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct --remove-bpe +done +``` +#### Results +| Data | --arch | Params | Es | Fr | Pt | It | Ru | El | Ar | De | +|--------------|--------------------|--------|------|------|------|------|------|-------|-------|-------| +| Monolingual | s2t_transformer_xs | 10M | 46.4 | 45.6 | 54.8 | 48.0 | 74.7 | 109.5 | 104.4 | 111.1 | + + +## ST +#### Training +Es-En as example: +```bash +fairseq-train ${MTEDX_ROOT}/es-en \ + --config-yaml config_st.yaml --train-subset train_st --valid-subset valid_st \ + --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \ + --arch s2t_transformer_xs --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \ + --load-pretrained-encoder-from ${PRETRAINED_ENCODER} \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 --update-freq 8 --patience 10 +``` +For multilingual model (all 12 directions): +```bash +fairseq-train ${MTEDX_ROOT} \ + --config-yaml config_st.yaml \ + --train-subset train_el-en_st,train_es-en_st,train_es-fr_st,train_es-it_st,train_es-pt_st,train_fr-en_st,train_fr-es_st,train_fr-pt_st,train_it-en_st,train_it-es_st,train_pt-en_st,train_pt-es_st,train_ru-en_st \ + --valid-subset valid_el-en_st,valid_es-en_st,valid_es-fr_st,valid_es-it_st,valid_es-pt_st,valid_fr-en_st,valid_fr-es_st,valid_fr-pt_st,valid_it-en_st,valid_it-es_st,valid_pt-en_st,valid_pt-es_st,valid_ru-en_st \ + --save-dir ${MULTILINGUAL_ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \ + --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 --update-freq 8 --patience 10 \ + --ignore-prefix-size 1 \ + --load-pretrained-encoder-from ${PRETRAINED_ENCODER} +``` +where `ST_SAVE_DIR` (`MULTILINGUAL_ST_SAVE_DIR`) is the checkpoint root path. The ST encoder is pre-trained by ASR +for faster training and better performance: `--load-pretrained-encoder-from <(JOINT_)ASR checkpoint path>`. We set +`--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly when using more than 1 GPU. +For multilingual models, we prepend target language ID token as target BOS, which should be excluded from +the training loss via `--ignore-prefix-size 1`. + +#### Inference & Evaluation +Average the last 10 checkpoints and evaluate on the `test` split: +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}" + +fairseq-generate ${MTEDX_ROOT}/es-en \ + --config-yaml config_st.yaml --gen-subset test --task speech_to_text \ + --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 --scoring sacrebleu --remove-bpe + +# For multilingual models +python scripts/average_checkpoints.py \ + --inputs ${MULTILINGUAL_ST_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME}" + +for LANGPAIR in es-en es-fr es-pt fr-en fr-es fr-pt pt-en pt-es it-en it-es ru-en el-en; do + fairseq-generate ${MTEDX_ROOT} \ + --config-yaml config_st.yaml --gen-subset test_${LANGPAIR}_st --task speech_to_text \ + --prefix-size 1 --path ${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 40000 --beam 5 \ + --skip-invalid-size-inputs-valid-test \ + --scoring sacrebleu --remove-bpe +done +``` +For multilingual models, we force decoding from the target language ID token (as BOS) via `--prefix-size 1`. + +#### Results +| Data | --arch | Params | Es-En | Es-Pt | Es-Fr | Fr-En | Fr-Es | Fr-Pt | Pt-En | Pt-Es | It-En | It-Es | Ru-En | El-En | +|--------------|--------------------|-----|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------| +| Bilingual | s2t_transformer_xs | 10M | 7.0 | 12.2 | 1.7 | 8.9 | 10.6 | 7.9 | 8.1 | 8.7 | 6.4 | 1.0 | 0.7 | 0.6 | +| Multilingual | s2t_transformer_s | 31M | 12.3 | 17.4 | 6.1 | 12.0 | 13.6 | 13.2 | 12.0 | 13.7 | 10.7 | 13.1 | 0.6 | 0.8 | + + +## Citation +Please cite as: +``` +@inproceedings{salesky2021mtedx, + title={Multilingual TEDx Corpus for Speech Recognition and Translation}, + author={Elizabeth Salesky and Matthew Wiesner and Jacob Bremerman and Roldano Cattoni and Matteo Negri and Marco Turchi and Douglas W. Oard and Matt Post}, + booktitle={Proceedings of Interspeech}, + year={2021}, +} + +@inproceedings{wang2020fairseqs2t, + title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq}, + author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino}, + booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations}, + year = {2020}, +} + +@inproceedings{ott2019fairseq, + title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling}, + author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, + booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations}, + year = {2019}, +} +``` + +[[Back]](..) diff --git a/fairseq/examples/speech_to_text/docs/mustc_example.md b/fairseq/examples/speech_to_text/docs/mustc_example.md new file mode 100644 index 0000000..c95ef3e --- /dev/null +++ b/fairseq/examples/speech_to_text/docs/mustc_example.md @@ -0,0 +1,155 @@ +[[Back]](..) + +# S2T Example: Speech Translation (ST) on MuST-C + +[MuST-C](https://www.aclweb.org/anthology/N19-1202) is multilingual speech-to-text translation corpus with +8-language translations on English TED talks. We match the state-of-the-art performance in +[ESPNet-ST](https://arxiv.org/pdf/2004.10234.pdf) with a simpler model training pipeline. + +## Data Preparation +[Download](https://ict.fbk.eu/must-c) and unpack MuST-C data to a path +`${MUSTC_ROOT}/en-${TARGET_LANG_ID}`, then preprocess it with +```bash +# additional Python packages for S2T data processing/model training +pip install pandas torchaudio soundfile sentencepiece + +# Generate TSV manifests, features, vocabulary +# and configuration for each language +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task asr \ + --vocab-type unigram --vocab-size 5000 +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task st \ + --vocab-type unigram --vocab-size 8000 + +# Add vocabulary and configuration for joint data +# (based on the manifests and features generated above) +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task asr --joint \ + --vocab-type unigram --vocab-size 10000 +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task st --joint \ + --vocab-type unigram --vocab-size 10000 +``` +The generated files (manifest, features, vocabulary and data configuration) will be added to +`${MUSTC_ROOT}/en-${TARGET_LANG_ID}` (per-language data) and `MUSTC_ROOT` (joint data). + +Download our vocabulary files if you want to use our pre-trained models: +- ASR: [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_asr_vocab_unigram5000.zip), [En-Nl](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_asr_vocab_unigram5000.zip), [En-Es](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_asr_vocab_unigram5000.zip), [En-Fr](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_asr_vocab_unigram5000.zip), [En-It](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_asr_vocab_unigram5000.zip), [En-Pt](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_asr_vocab_unigram5000.zip), [En-Ro](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_asr_vocab_unigram5000.zip), [En-Ru](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_asr_vocab_unigram5000.zip), [Joint](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_joint_asr_vocab_unigram10000.zip) +- ST: [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_st_vocab_unigram8000.zip), [En-Nl](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_st_vocab_unigram8000.zip), [En-Es](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_st_vocab_unigram8000.zip), [En-Fr](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_st_vocab_unigram8000.zip), [En-It](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_st_vocab_unigram8000.zip), [En-Pt](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_st_vocab_unigram8000.zip), [En-Ro](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_st_vocab_unigram8000.zip), [En-Ru](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_st_vocab_unigram8000.zip), [Multilingual](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_multilingual_st_vocab_unigram10000.zip) + +## ASR +#### Training +En-De as example: +```bash +fairseq-train ${MUSTC_ROOT}/en-de \ + --config-yaml config_asr.yaml --train-subset train_asr --valid-subset dev_asr \ + --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --optimizer adam --lr 1e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 +``` +For joint model (using ASR data from all 8 directions): +```bash +fairseq-train ${MUSTC_ROOT} \ + --config-yaml config_asr.yaml \ + --train-subset train_de_asr,train_nl_asr,train_es_asr,train_fr_asr,train_it_asr,train_pt_asr,train_ro_asr,train_ru_asr \ + --valid-subset dev_de_asr,dev_nl_asr,dev_es_asr,dev_fr_asr,dev_it_asr,dev_pt_asr,dev_ro_asr,dev_ru_asr \ + --save-dir ${JOINT_ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --optimizer adam --lr 1e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 +``` +where `ASR_SAVE_DIR` (`JOINT_ASR_SAVE_DIR`) is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs +with 1 GPU. You may want to update it accordingly when using more than 1 GPU. + +#### Inference & Evaluation +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}" +fairseq-generate ${MUSTC_ROOT}/en-de \ + --config-yaml config_asr.yaml --gen-subset tst-COMMON_asr --task speech_to_text \ + --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \ + --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct + +# For models trained on joint data +python scripts/average_checkpoints.py \ + --inputs ${JOINT_ASR_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}" +for LANG in de nl es fr it pt ro ru; do + fairseq-generate ${MUSTC_ROOT} \ + --config-yaml config_asr.yaml --gen-subset tst-COMMON_${LANG}_asr --task speech_to_text \ + --path ${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \ + --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct +done +``` +#### Results +| Data | --arch | Params | En-De | En-Nl | En-Es | En-Fr | En-It | En-Pt | En-Ro | En-Ru | Model | +|---|---|---|---|---|---|---|---|---|---|---|---| +| Single | s2t_transformer_s | 31M | [18.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_asr_transformer_s.pt) | [17.6](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_asr_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_asr_transformer_s.pt) | [17.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_asr_transformer_s.pt) | [17.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_asr_transformer_s.pt) | [19.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_asr_transformer_s.pt) | [18.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_asr_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_asr_transformer_s.pt) | (<-Download) | +| Joint | s2t_transformer_m | 76M | 16.8 | 16.7 | 16.9 | 16.9 | 17.0 | 17.4 | 17.0 | 16.9 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_joint_asr_transformer_m.pt) | + +## ST +#### Training +En-De as example: +```bash +fairseq-train ${MUSTC_ROOT}/en-de \ + --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \ + --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \ + --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} +``` +For multilingual model (all 8 directions): +```bash +fairseq-train ${MUSTC_ROOT} \ + --config-yaml config_st.yaml \ + --train-subset train_de_st,train_nl_st,train_es_st,train_fr_st,train_it_st,train_pt_st,train_ro_st,train_ru_st \ + --valid-subset dev_de_st,dev_nl_st,dev_es_st,dev_fr_st,dev_it_st,dev_pt_st,dev_ro_st,dev_ru_st \ + --save-dir ${MULTILINGUAL_ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \ + --arch s2t_transformer_s --ignore-prefix-size 1 --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \ + --load-pretrained-encoder-from ${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} +``` +where `ST_SAVE_DIR` (`MULTILINGUAL_ST_SAVE_DIR`) is the checkpoint root path. The ST encoder is pre-trained by ASR +for faster training and better performance: `--load-pretrained-encoder-from <(JOINT_)ASR checkpoint path>`. We set +`--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly when using more than 1 GPU. +For multilingual models, we prepend target language ID token as target BOS, which should be excluded from +the training loss via `--ignore-prefix-size 1`. + +#### Inference & Evaluation +Average the last 10 checkpoints and evaluate on the `tst-COMMON` split: +```bash +CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt +python scripts/average_checkpoints.py \ + --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}" +fairseq-generate ${MUSTC_ROOT}/en-de \ + --config-yaml config_st.yaml --gen-subset tst-COMMON_st --task speech_to_text \ + --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 --scoring sacrebleu + +# For multilingual models +python scripts/average_checkpoints.py \ + --inputs ${MULTILINGUAL_ST_SAVE_DIR} --num-epoch-checkpoints 10 \ + --output "${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME}" +for LANG in de nl es fr it pt ro ru; do + fairseq-generate ${MUSTC_ROOT} \ + --config-yaml config_st.yaml --gen-subset tst-COMMON_${LANG}_st --task speech_to_text \ + --prefix-size 1 --path ${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --max-tokens 50000 --beam 5 --scoring sacrebleu +done +``` +For multilingual models, we force decoding from the target language ID token (as BOS) via `--prefix-size 1`. + +#### Results +| Data | --arch | Params | En-De | En-Nl | En-Es | En-Fr | En-It | En-Pt | En-Ro | En-Ru | Model | +|---|---|---|---|---|---|---|---|---|---|---|---| +| Bilingual | s2t_transformer_s | 31M | [22.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_st_transformer_s.pt) | [27.3](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_st_transformer_s.pt) | [27.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_st_transformer_s.pt) | [32.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_st_transformer_s.pt) | [22.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_st_transformer_s.pt) | [28.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_st_transformer_s.pt) | [21.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_st_transformer_s.pt) | [15.3](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_st_transformer_s.pt) | (<-Download) | +| Multilingual | s2t_transformer_m | 76M | 24.5 | 28.6 | 28.2 | 34.9 | 24.6 | 31.1 | 23.8 | 16.0 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_multilingual_st_transformer_m.pt) | + +[[Back]](..) diff --git a/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md b/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md new file mode 100644 index 0000000..f3b5a41 --- /dev/null +++ b/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md @@ -0,0 +1,190 @@ +# Simultaneous Speech Translation (SimulST) on MuST-C + +This is a tutorial of training and evaluating a transformer *wait-k* simultaneous model on MUST-C English-Germen Dataset, from [SimulMT to SimulST: Adapting Simultaneous Text Translation to End-to-End Simultaneous Speech Translation](https://www.aclweb.org/anthology/2020.aacl-main.58.pdf). + +[MuST-C](https://www.aclweb.org/anthology/N19-1202) is multilingual speech-to-text translation corpus with 8-language translations on English TED talks. + +## Data Preparation +This section introduces the data preparation for training and evaluation. +If you only want to evaluate the model, please jump to [Inference & Evaluation](#inference--evaluation) + +[Download](https://ict.fbk.eu/must-c) and unpack MuST-C data to a path +`${MUSTC_ROOT}/en-${TARGET_LANG_ID}`, then preprocess it with +```bash +# Additional Python packages for S2T data processing/model training +pip install pandas torchaudio sentencepiece + +# Generate TSV manifests, features, vocabulary, +# global cepstral and mean estimation, +# and configuration for each language +cd fairseq + +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task asr \ + --vocab-type unigram --vocab-size 10000 \ + --cmvn-type global + +python examples/speech_to_text/prep_mustc_data.py \ + --data-root ${MUSTC_ROOT} --task st \ + --vocab-type unigram --vocab-size 10000 \ + --cmvn-type global +``` + +## ASR Pretraining +We need a pretrained offline ASR model. Assuming the save directory of the ASR model is `${ASR_SAVE_DIR}`. +The following command (and the subsequent training commands in this tutorial) assume training on 1 GPU (you can also train on 8 GPUs and remove the `--update-freq 8` option). +``` +fairseq-train ${MUSTC_ROOT}/en-de \ + --config-yaml config_asr.yaml --train-subset train_asr --valid-subset dev_asr \ + --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \ + --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \ + --arch convtransformer_espnet --optimizer adam --lr 0.0005 --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 +``` +A pretrained ASR checkpoint can be downloaded [here](https://dl.fbaipublicfiles.com/simultaneous_translation/must_c_v1_en_de_pretrained_asr) + +## Simultaneous Speech Translation Training + +### Wait-K with fixed pre-decision module +Fixed pre-decision indicates that the model operate simultaneous policy on the boundaries of fixed chunks. +Here is a example of fixed pre-decision ratio 7 (the simultaneous decision is made every 7 encoder states) and +a wait-3 policy model. Assuming the save directory is `${ST_SAVE_DIR}` +```bash + fairseq-train ${MUSTC_ROOT}/en-de \ + --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \ + --save-dir ${ST_SAVE_DIR} --num-workers 8 \ + --optimizer adam --lr 0.0001 --lr-scheduler inverse_sqrt --clip-norm 10.0 \ + --criterion label_smoothed_cross_entropy \ + --warmup-updates 4000 --max-update 100000 --max-tokens 40000 --seed 2 \ + --load-pretrained-encoder-from ${ASR_SAVE_DIR}/checkpoint_best.pt \ + --task speech_to_text \ + --arch convtransformer_simul_trans_espnet \ + --simul-type waitk_fixed_pre_decision \ + --waitk-lagging 3 \ + --fixed-pre-decision-ratio 7 \ + --update-freq 8 + +``` +### Monotonic multihead attention with fixed pre-decision module +``` + fairseq-train ${MUSTC_ROOT}/en-de \ + --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \ + --save-dir ${ST_SAVE_DIR} --num-workers 8 \ + --optimizer adam --lr 0.0001 --lr-scheduler inverse_sqrt --clip-norm 10.0 \ + --warmup-updates 4000 --max-update 100000 --max-tokens 40000 --seed 2 \ + --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --task speech_to_text \ + --criterion latency_augmented_label_smoothed_cross_entropy \ + --latency-weight-avg 0.1 \ + --arch convtransformer_simul_trans_espnet \ + --simul-type infinite_lookback_fixed_pre_decision \ + --fixed-pre-decision-ratio 7 \ + --update-freq 8 +``` +## Inference & Evaluation +[SimulEval](https://github.com/facebookresearch/SimulEval) is used for evaluation. +The following command is for evaluation. + +``` +git clone https://github.com/facebookresearch/SimulEval.git +cd SimulEval +pip install -e . + +simuleval \ + --agent ${FAIRSEQ}/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py + --source ${SRC_LIST_OF_AUDIO} + --target ${TGT_FILE} + --data-bin ${MUSTC_ROOT}/en-de \ + --config config_st.yaml \ + --model-path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \ + --output ${OUTPUT} \ + --scores +``` + +The source file `${SRC_LIST_OF_AUDIO}` is a list of paths of audio files. Assuming your audio files stored at `/home/user/data`, +it should look like this + +```bash +/home/user/data/audio-1.wav +/home/user/data/audio-2.wav +``` + +Each line of target file `${TGT_FILE}` is the translation for each audio file input. +```bash +Translation_1 +Translation_2 +``` +The evaluation runs on the original MUSTC segmentation. +The following command will generate the wav list and text file for a evaluation set `${SPLIT}` (chose from `dev`, `tst-COMMON` and `tst-HE`) in MUSTC to `${EVAL_DATA}`. +```bash +python ${FAIRSEQ}/examples/speech_to_text/seg_mustc_data.py \ + --data-root ${MUSTC_ROOT} --lang de \ + --split ${SPLIT} --task st \ + --output ${EVAL_DATA} +``` + +The `--data-bin` and `--config` should be the same in previous section if you prepare the data from the scratch. +If only for evaluation, a prepared data directory can be found [here](https://dl.fbaipublicfiles.com/simultaneous_translation/must_c_v1.0_en_de_databin.tgz). It contains +- `spm_unigram10000_st.model`: a sentencepiece model binary. +- `spm_unigram10000_st.txt`: the dictionary file generated by the sentencepiece model. +- `gcmvn.npz`: the binary for global cepstral mean and variance. +- `config_st.yaml`: the config yaml file. It looks like this. +You will need to set the absolute paths for `sentencepiece_model` and `stats_npz_path` if the data directory is downloaded. +```yaml +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: ABS_PATH_TO_SENTENCEPIECE_MODEL +global_cmvn: + stats_npz_path: ABS_PATH_TO_GCMVN_FILE +input_channels: 1 +input_feat_per_channel: 80 +sampling_alpha: 1.0 +specaugment: + freq_mask_F: 27 + freq_mask_N: 1 + time_mask_N: 1 + time_mask_T: 100 + time_mask_p: 1.0 + time_wrap_W: 0 +transforms: + '*': + - global_cmvn + _train: + - global_cmvn + - specaugment +vocab_filename: spm_unigram10000_st.txt +``` + +Notice that once a `--data-bin` is set, the `--config` is the base name of the config yaml, not the full path. + +Set `--model-path` to the model checkpoint. +A pretrained checkpoint can be downloaded from [here](https://dl.fbaipublicfiles.com/simultaneous_translation/convtransformer_wait5_pre7), which is a wait-5 model with a pre-decision of 280 ms. + +The result of this model on `tst-COMMON` is: +```bash +{ + "Quality": { + "BLEU": 13.94974229366959 + }, + "Latency": { + "AL": 1751.8031870037803, + "AL_CA": 2338.5911762796536, + "AP": 0.7931395378788959, + "AP_CA": 0.9405103863210942, + "DAL": 1987.7811616943081, + "DAL_CA": 2425.2751560926167 + } +} +``` + +If `--output ${OUTPUT}` option is used, the detailed log and scores will be stored under the `${OUTPUT}` directory. + + +The quality is measured by detokenized BLEU. So make sure that the predicted words sent to the server are detokenized. + +The latency metrics are +* Average Proportion +* Average Lagging +* Differentiable Average Lagging + +Again they will also be evaluated on detokenized text. diff --git a/fairseq/examples/speech_to_text/prep_covost_data.py b/fairseq/examples/speech_to_text/prep_covost_data.py new file mode 100644 index 0000000..411e9b5 --- /dev/null +++ b/fairseq/examples/speech_to_text/prep_covost_data.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +import shutil +from tempfile import NamedTemporaryFile +from typing import Optional, Tuple + +import pandas as pd +import torchaudio +from examples.speech_to_text.data_utils import ( + create_zip, + extract_fbank_features, + filter_manifest_df, + gen_config_yaml, + gen_vocab, + get_zip_manifest, + load_df_from_tsv, + save_df_to_tsv, +) +from torch import Tensor +from torch.utils.data import Dataset +from torchaudio.datasets.utils import download_url, extract_archive +from tqdm import tqdm + + +log = logging.getLogger(__name__) + + +MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"] + + +class CoVoST(Dataset): + """Create a Dataset for CoVoST (https://github.com/facebookresearch/covost). + + Args: + root (str): root path to the dataset and generated manifests/features + source_language (str): source (audio) language + target_language (str, optional): target (text) language, + None for no translation (default: None) + version (int, optional): CoVoST version. (default: 2) + download (bool, optional): Whether to download the dataset if it is not + found at root path. (default: ``False``). + """ + + COVOST_URL_TEMPLATE = ( + "https://dl.fbaipublicfiles.com/covost/" + "covost_v2.{src_lang}_{tgt_lang}.tsv.tar.gz" + ) + + VERSIONS = {2} + SPLITS = ["train", "dev", "test"] + + XX_EN_LANGUAGES = { + 1: ["fr", "de", "nl", "ru", "es", "it", "tr", "fa", "sv-SE", "mn", "zh-CN"], + 2: [ + "fr", + "de", + "es", + "ca", + "it", + "ru", + "zh-CN", + "pt", + "fa", + "et", + "mn", + "nl", + "tr", + "ar", + "sv-SE", + "lv", + "sl", + "ta", + "ja", + "id", + "cy", + ], + } + EN_XX_LANGUAGES = { + 1: [], + 2: [ + "de", + "tr", + "fa", + "sv-SE", + "mn", + "zh-CN", + "cy", + "ca", + "sl", + "et", + "id", + "ar", + "ta", + "lv", + "ja", + ], + } + + def __init__( + self, + root: str, + split: str, + source_language: str, + target_language: Optional[str] = None, + version: int = 2, + ) -> None: + assert version in self.VERSIONS and split in self.SPLITS + assert source_language is not None + self.no_translation = target_language is None + if not self.no_translation: + assert "en" in {source_language, target_language} + if source_language == "en": + assert target_language in self.EN_XX_LANGUAGES[version] + else: + assert source_language in self.XX_EN_LANGUAGES[version] + else: + # Hack here so that we can get "split" column from CoVoST TSV. + # Note that we use CoVoST train split for ASR which is an extension + # to Common Voice train split. + target_language = "de" if source_language == "en" else "en" + + self.root: Path = Path(root) + + cv_tsv_path = self.root / "validated.tsv" + assert cv_tsv_path.is_file() + + covost_url = self.COVOST_URL_TEMPLATE.format( + src_lang=source_language, tgt_lang=target_language + ) + covost_archive = self.root / Path(covost_url).name + if not covost_archive.is_file(): + download_url(covost_url, self.root.as_posix(), hash_value=None) + extract_archive(covost_archive.as_posix()) + + cv_tsv = load_df_from_tsv(cv_tsv_path) + covost_tsv = load_df_from_tsv( + self.root / Path(covost_url).name.replace(".tar.gz", "") + ) + df = pd.merge( + left=cv_tsv[["path", "sentence", "client_id"]], + right=covost_tsv[["path", "translation", "split"]], + how="inner", + on="path", + ) + if split == "train": + df = df[(df["split"] == split) | (df["split"] == f"{split}_covost")] + else: + df = df[df["split"] == split] + data = df.to_dict(orient="index").items() + data = [v for k, v in sorted(data, key=lambda x: x[0])] + self.data = [] + for e in data: + try: + path = self.root / "clips" / e["path"] + _ = torchaudio.info(path.as_posix()) + self.data.append(e) + except RuntimeError: + pass + + def __getitem__( + self, n: int + ) -> Tuple[Tensor, int, str, str, Optional[str], str, str]: + """Load the n-th sample from the dataset. + + Args: + n (int): The index of the sample to be loaded + + Returns: + tuple: ``(waveform, sample_rate, sentence, translation, speaker_id, + sample_id)`` + """ + data = self.data[n] + path = self.root / "clips" / data["path"] + waveform, sample_rate = torchaudio.load(path) + sentence = data["sentence"] + translation = None if self.no_translation else data["translation"] + speaker_id = data["client_id"] + _id = data["path"].replace(".mp3", "") + return waveform, sample_rate, sentence, translation, speaker_id, _id + + def __len__(self) -> int: + return len(self.data) + + +def process(args): + root = Path(args.data_root).absolute() / args.src_lang + if not root.is_dir(): + raise NotADirectoryError(f"{root} does not exist") + # Extract features + feature_root = root / "fbank80" + feature_root.mkdir(exist_ok=True) + for split in CoVoST.SPLITS: + print(f"Fetching split {split}...") + dataset = CoVoST(root, split, args.src_lang, args.tgt_lang) + print("Extracting log mel filter bank features...") + for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset): + extract_fbank_features( + waveform, sample_rate, feature_root / f"{utt_id}.npy" + ) + # Pack features into ZIP + zip_path = root / "fbank80.zip" + print("ZIPing features...") + create_zip(feature_root, zip_path) + print("Fetching ZIP manifest...") + audio_paths, audio_lengths = get_zip_manifest(zip_path) + # Generate TSV manifest + print("Generating manifest...") + train_text = [] + task = f"asr_{args.src_lang}" + if args.tgt_lang is not None: + task = f"st_{args.src_lang}_{args.tgt_lang}" + for split in CoVoST.SPLITS: + manifest = {c: [] for c in MANIFEST_COLUMNS} + dataset = CoVoST(root, split, args.src_lang, args.tgt_lang) + for _, _, src_utt, tgt_utt, speaker_id, utt_id in tqdm(dataset): + manifest["id"].append(utt_id) + manifest["audio"].append(audio_paths[utt_id]) + manifest["n_frames"].append(audio_lengths[utt_id]) + manifest["tgt_text"].append(src_utt if args.tgt_lang is None else tgt_utt) + manifest["speaker"].append(speaker_id) + is_train_split = split.startswith("train") + if is_train_split: + train_text.extend(manifest["tgt_text"]) + df = pd.DataFrame.from_dict(manifest) + df = filter_manifest_df(df, is_train_split=is_train_split) + save_df_to_tsv(df, root / f"{split}_{task}.tsv") + # Generate vocab + vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{task}" + with NamedTemporaryFile(mode="w") as f: + for t in train_text: + f.write(t + "\n") + gen_vocab( + Path(f.name), + root / spm_filename_prefix, + args.vocab_type, + args.vocab_size + ) + # Generate config YAML + gen_config_yaml( + root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{task}.yaml", + specaugment_policy="lb", + ) + # Clean up + shutil.rmtree(feature_root) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--data-root", "-d", required=True, type=str, + help="data root with sub-folders for each language <root>/<src_lang>" + ) + parser.add_argument( + "--vocab-type", + default="unigram", + required=True, + type=str, + choices=["bpe", "unigram", "char"], + ), + parser.add_argument("--vocab-size", default=1000, type=int) + parser.add_argument("--src-lang", "-s", required=True, type=str) + parser.add_argument("--tgt-lang", "-t", type=str) + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_text/prep_librispeech_data.py b/fairseq/examples/speech_to_text/prep_librispeech_data.py new file mode 100644 index 0000000..f379fa7 --- /dev/null +++ b/fairseq/examples/speech_to_text/prep_librispeech_data.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +import shutil +from tempfile import NamedTemporaryFile + +import pandas as pd +from examples.speech_to_text.data_utils import ( + create_zip, + extract_fbank_features, + gen_config_yaml, + gen_vocab, + get_zip_manifest, + save_df_to_tsv, +) +from torchaudio.datasets import LIBRISPEECH +from tqdm import tqdm + + +log = logging.getLogger(__name__) + +SPLITS = [ + "train-clean-100", + "train-clean-360", + "train-other-500", + "dev-clean", + "dev-other", + "test-clean", + "test-other", +] + +MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"] + + +def process(args): + out_root = Path(args.output_root).absolute() + out_root.mkdir(exist_ok=True) + # Extract features + feature_root = out_root / "fbank80" + feature_root.mkdir(exist_ok=True) + for split in SPLITS: + print(f"Fetching split {split}...") + dataset = LIBRISPEECH(out_root.as_posix(), url=split, download=True) + print("Extracting log mel filter bank features...") + for wav, sample_rate, _, spk_id, chapter_no, utt_no in tqdm(dataset): + sample_id = f"{spk_id}-{chapter_no}-{utt_no}" + extract_fbank_features( + wav, sample_rate, feature_root / f"{sample_id}.npy" + ) + # Pack features into ZIP + zip_path = out_root / "fbank80.zip" + print("ZIPing features...") + create_zip(feature_root, zip_path) + print("Fetching ZIP manifest...") + audio_paths, audio_lengths = get_zip_manifest(zip_path) + # Generate TSV manifest + print("Generating manifest...") + train_text = [] + for split in SPLITS: + manifest = {c: [] for c in MANIFEST_COLUMNS} + dataset = LIBRISPEECH(out_root.as_posix(), url=split) + for _, _, utt, spk_id, chapter_no, utt_no in tqdm(dataset): + sample_id = f"{spk_id}-{chapter_no}-{utt_no}" + manifest["id"].append(sample_id) + manifest["audio"].append(audio_paths[sample_id]) + manifest["n_frames"].append(audio_lengths[sample_id]) + manifest["tgt_text"].append(utt.lower()) + manifest["speaker"].append(spk_id) + save_df_to_tsv( + pd.DataFrame.from_dict(manifest), out_root / f"{split}.tsv" + ) + if split.startswith("train"): + train_text.extend(manifest["tgt_text"]) + # Generate vocab + vocab_size = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size}" + with NamedTemporaryFile(mode="w") as f: + for t in train_text: + f.write(t + "\n") + gen_vocab( + Path(f.name), + out_root / spm_filename_prefix, + args.vocab_type, + args.vocab_size, + ) + # Generate config YAML + gen_config_yaml( + out_root, + spm_filename=spm_filename_prefix + ".model", + specaugment_policy="ld" + ) + # Clean up + shutil.rmtree(feature_root) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output-root", "-o", required=True, type=str) + parser.add_argument( + "--vocab-type", + default="unigram", + required=True, + type=str, + choices=["bpe", "unigram", "char"], + ), + parser.add_argument("--vocab-size", default=10000, type=int) + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_text/prep_mtedx_data.py b/fairseq/examples/speech_to_text/prep_mtedx_data.py new file mode 100644 index 0000000..2dfd631 --- /dev/null +++ b/fairseq/examples/speech_to_text/prep_mtedx_data.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +from pathlib import Path +import shutil +from itertools import groupby +from tempfile import NamedTemporaryFile +from typing import Tuple + +import pandas as pd +import soundfile as sf +from examples.speech_to_text.data_utils import ( + create_zip, + extract_fbank_features, + filter_manifest_df, + gen_config_yaml, + gen_vocab, + get_zip_manifest, + load_df_from_tsv, + save_df_to_tsv, +) +import torch +from torch.utils.data import Dataset +from tqdm import tqdm + +from fairseq.data.audio.audio_utils import get_waveform, convert_waveform + + +log = logging.getLogger(__name__) + + +MANIFEST_COLUMNS = [ + "id", "audio", "n_frames", "tgt_text", "speaker", "tgt_lang" +] + + +class mTEDx(Dataset): + """ + Create a Dataset for Multilingual TEDx. + Each item is a tuple of the form: waveform, sample_rate, source utterance, + target utterance, speaker_id, utterance_id + """ + + SPLITS = ["train", "valid", "test"] + LANGPAIRS = ["es-es", "fr-fr", "pt-pt", "it-it", "ru-ru", "el-el", "ar-ar", + "de-de", "es-en", "es-fr", "es-pt", "es-it", "fr-en", "fr-es", + "fr-pt", "pt-en", "pt-es", "it-en", "it-es", "ru-en", "el-en"] + + def __init__(self, root: str, lang: str, split: str) -> None: + assert split in self.SPLITS and lang in self.LANGPAIRS + _root = Path(root) / f"{lang}" / "data" / split + wav_root, txt_root = _root / "wav", _root / "txt" + assert _root.is_dir() and wav_root.is_dir() and txt_root.is_dir() + # Load audio segments + try: + import yaml + except ImportError: + print( + "Please install PyYAML to load the Multilingual TEDx YAML files" + ) + with open(txt_root / f"{split}.yaml") as f: + segments = yaml.load(f, Loader=yaml.BaseLoader) + # Load source and target utterances + src, tgt = lang.split("-") + for _lang in [src, tgt]: + with open(txt_root / f"{split}.{_lang}") as f: + utterances = [r.strip() for r in f] + assert len(segments) == len(utterances) + for i, u in enumerate(utterances): + segments[i][_lang] = u + # Gather info + self.data = [] + for wav_filename, _seg_group in groupby(segments, lambda x: x["wav"]): + wav_filename = wav_filename.replace(".wav", ".flac") + wav_path = wav_root / wav_filename + sample_rate = sf.info(wav_path.as_posix()).samplerate + seg_group = sorted(_seg_group, key=lambda x: float(x["offset"])) + for i, segment in enumerate(seg_group): + offset = int(float(segment["offset"]) * sample_rate) + n_frames = int(float(segment["duration"]) * sample_rate) + _id = f"{wav_path.stem}_{i}" + self.data.append( + ( + wav_path.as_posix(), + offset, + n_frames, + sample_rate, + segment[src], + segment[tgt], + segment["speaker_id"], + tgt, + _id, + ) + ) + + def __getitem__( + self, n: int + ) -> Tuple[torch.Tensor, int, str, str, str, str, str]: + wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, tgt_lang, \ + utt_id = self.data[n] + waveform, _ = get_waveform(wav_path, frames=n_frames, start=offset) + waveform = torch.from_numpy(waveform) + return waveform, sr, src_utt, tgt_utt, spk_id, tgt_lang, utt_id + + def __len__(self) -> int: + return len(self.data) + + +def process(args): + root = Path(args.data_root).absolute() + for lang in mTEDx.LANGPAIRS: + cur_root = root / f"{lang}" + if not cur_root.is_dir(): + print(f"{cur_root.as_posix()} does not exist. Skipped.") + continue + # Extract features + audio_root = cur_root / ("flac" if args.use_audio_input else "fbank80") + audio_root.mkdir(exist_ok=True) + for split in mTEDx.SPLITS: + print(f"Fetching split {split}...") + dataset = mTEDx(root.as_posix(), lang, split) + if args.use_audio_input: + print("Converting audios...") + for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset): + tgt_sample_rate = 16_000 + _wavform, _ = convert_waveform( + waveform, sample_rate, to_mono=True, + to_sample_rate=tgt_sample_rate + ) + sf.write( + (audio_root / f"{utt_id}.flac").as_posix(), + _wavform.numpy(), tgt_sample_rate + ) + else: + print("Extracting log mel filter bank features...") + for waveform, sample_rate, _, _, _, _, utt_id in tqdm(dataset): + extract_fbank_features( + waveform, sample_rate, audio_root / f"{utt_id}.npy" + ) + # Pack features into ZIP + zip_path = cur_root / f"{audio_root.name}.zip" + print("ZIPing audios/features...") + create_zip(audio_root, zip_path) + print("Fetching ZIP manifest...") + audio_paths, audio_lengths = get_zip_manifest(zip_path) + # Generate TSV manifest + print("Generating manifest...") + train_text = [] + for split in mTEDx.SPLITS: + is_train_split = split.startswith("train") + manifest = {c: [] for c in MANIFEST_COLUMNS} + ds = mTEDx(args.data_root, lang, split) + for _, _, src_utt, tgt_utt, spk_id, tgt_lang, utt_id in tqdm(ds): + manifest["id"].append(utt_id) + manifest["audio"].append(audio_paths[utt_id]) + manifest["n_frames"].append(audio_lengths[utt_id]) + manifest["tgt_text"].append( + src_utt if args.task == "asr" else tgt_utt + ) + manifest["speaker"].append(spk_id) + manifest["tgt_lang"].append(tgt_lang) + if is_train_split: + train_text.extend(manifest["tgt_text"]) + df = pd.DataFrame.from_dict(manifest) + df = filter_manifest_df(df, is_train_split=is_train_split) + save_df_to_tsv(df, cur_root / f"{split}_{args.task}.tsv") + # Generate vocab + v_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{v_size_str}_{args.task}" + with NamedTemporaryFile(mode="w") as f: + for t in train_text: + f.write(t + "\n") + gen_vocab( + Path(f.name), + cur_root / spm_filename_prefix, + args.vocab_type, + args.vocab_size, + ) + # Generate config YAML + if args.use_audio_input: + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy=None, + extra={"use_audio_input": True} + ) + else: + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy="lb", + ) + # Clean up + shutil.rmtree(audio_root) + + +def process_joint(args): + cur_root = Path(args.data_root) + assert all((cur_root / f"{lang}").is_dir() for lang in mTEDx.LANGPAIRS), \ + "do not have downloaded data available for all languages" + # Generate vocab + vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{args.task}" + with NamedTemporaryFile(mode="w") as f: + for lang in mTEDx.LANGPAIRS: + tsv_path = cur_root / f"{lang}" / f"train_{args.task}.tsv" + df = load_df_from_tsv(tsv_path) + for t in df["tgt_text"]: + f.write(t + "\n") + special_symbols = None + if args.joint: + # Add tgt_lang tags to dict + special_symbols = list( + {f'<lang:{lang.split("-")[1]}>' for lang in mTEDx.LANGPAIRS} + ) + gen_vocab( + Path(f.name), + cur_root / spm_filename_prefix, + args.vocab_type, + args.vocab_size, + special_symbols=special_symbols + ) + # Generate config YAML + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy="ld", + prepend_tgt_lang_tag=(args.joint), + ) + # Make symbolic links to manifests + for lang in mTEDx.LANGPAIRS: + for split in mTEDx.SPLITS: + src_path = cur_root / f"{lang}" / f"{split}_{args.task}.tsv" + desc_path = cur_root / f"{split}_{lang}_{args.task}.tsv" + if not desc_path.is_symlink(): + os.symlink(src_path, desc_path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--data-root", "-d", required=True, type=str) + parser.add_argument( + "--vocab-type", + default="unigram", + required=True, + type=str, + choices=["bpe", "unigram", "char"], + ), + parser.add_argument("--vocab-size", default=8000, type=int) + parser.add_argument("--task", type=str, choices=["asr", "st"]) + parser.add_argument("--joint", action="store_true", help="") + parser.add_argument("--use-audio-input", action="store_true") + args = parser.parse_args() + + if args.joint: + process_joint(args) + else: + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_text/prep_mustc_data.py b/fairseq/examples/speech_to_text/prep_mustc_data.py new file mode 100644 index 0000000..c2362f7 --- /dev/null +++ b/fairseq/examples/speech_to_text/prep_mustc_data.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +from pathlib import Path +import shutil +from itertools import groupby +from tempfile import NamedTemporaryFile +from typing import Tuple + +import numpy as np +import pandas as pd +import soundfile as sf +from examples.speech_to_text.data_utils import ( + create_zip, + extract_fbank_features, + filter_manifest_df, + gen_config_yaml, + gen_vocab, + get_zip_manifest, + load_df_from_tsv, + save_df_to_tsv, + cal_gcmvn_stats, +) +import torch +from torch.utils.data import Dataset +from tqdm import tqdm + +from fairseq.data.audio.audio_utils import get_waveform, convert_waveform + + +log = logging.getLogger(__name__) + + +MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"] + + +class MUSTC(Dataset): + """ + Create a Dataset for MuST-C. Each item is a tuple of the form: + waveform, sample_rate, source utterance, target utterance, speaker_id, + utterance_id + """ + + SPLITS = ["train", "dev", "tst-COMMON", "tst-HE"] + LANGUAGES = ["de", "es", "fr", "it", "nl", "pt", "ro", "ru"] + + def __init__(self, root: str, lang: str, split: str) -> None: + assert split in self.SPLITS and lang in self.LANGUAGES + _root = Path(root) / f"en-{lang}" / "data" / split + wav_root, txt_root = _root / "wav", _root / "txt" + assert _root.is_dir() and wav_root.is_dir() and txt_root.is_dir() + # Load audio segments + try: + import yaml + except ImportError: + print("Please install PyYAML to load the MuST-C YAML files") + with open(txt_root / f"{split}.yaml") as f: + segments = yaml.load(f, Loader=yaml.BaseLoader) + # Load source and target utterances + for _lang in ["en", lang]: + with open(txt_root / f"{split}.{_lang}") as f: + utterances = [r.strip() for r in f] + assert len(segments) == len(utterances) + for i, u in enumerate(utterances): + segments[i][_lang] = u + # Gather info + self.data = [] + for wav_filename, _seg_group in groupby(segments, lambda x: x["wav"]): + wav_path = wav_root / wav_filename + sample_rate = sf.info(wav_path.as_posix()).samplerate + seg_group = sorted(_seg_group, key=lambda x: x["offset"]) + for i, segment in enumerate(seg_group): + offset = int(float(segment["offset"]) * sample_rate) + n_frames = int(float(segment["duration"]) * sample_rate) + _id = f"{wav_path.stem}_{i}" + self.data.append( + ( + wav_path.as_posix(), + offset, + n_frames, + sample_rate, + segment["en"], + segment[lang], + segment["speaker_id"], + _id, + ) + ) + + def __getitem__( + self, n: int + ) -> Tuple[torch.Tensor, int, str, str, str, str]: + wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, \ + utt_id = self.data[n] + waveform, _ = get_waveform(wav_path, frames=n_frames, start=offset) + waveform = torch.from_numpy(waveform) + return waveform, sr, src_utt, tgt_utt, spk_id, utt_id + + def __len__(self) -> int: + return len(self.data) + + +def process(args): + root = Path(args.data_root).absolute() + for lang in MUSTC.LANGUAGES: + cur_root = root / f"en-{lang}" + if not cur_root.is_dir(): + print(f"{cur_root.as_posix()} does not exist. Skipped.") + continue + # Extract features + audio_root = cur_root / ("flac" if args.use_audio_input else "fbank80") + audio_root.mkdir(exist_ok=True) + + for split in MUSTC.SPLITS: + print(f"Fetching split {split}...") + dataset = MUSTC(root.as_posix(), lang, split) + if args.use_audio_input: + print("Converting audios...") + for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset): + tgt_sample_rate = 16_000 + _wavform, _ = convert_waveform( + waveform, sample_rate, to_mono=True, + to_sample_rate=tgt_sample_rate + ) + sf.write( + (audio_root / f"{utt_id}.flac").as_posix(), + _wavform.T.numpy(), tgt_sample_rate + ) + else: + print("Extracting log mel filter bank features...") + gcmvn_feature_list = [] + if split == 'train' and args.cmvn_type == "global": + print("And estimating cepstral mean and variance stats...") + + for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset): + features = extract_fbank_features( + waveform, sample_rate, audio_root / f"{utt_id}.npy" + ) + if split == 'train' and args.cmvn_type == "global": + if len(gcmvn_feature_list) < args.gcmvn_max_num: + gcmvn_feature_list.append(features) + + if split == 'train' and args.cmvn_type == "global": + # Estimate and save cmv + stats = cal_gcmvn_stats(gcmvn_feature_list) + with open(cur_root / "gcmvn.npz", "wb") as f: + np.savez(f, mean=stats["mean"], std=stats["std"]) + + # Pack features into ZIP + zip_path = cur_root / f"{audio_root.name}.zip" + print("ZIPing audios/features...") + create_zip(audio_root, zip_path) + print("Fetching ZIP manifest...") + audio_paths, audio_lengths = get_zip_manifest( + zip_path, + is_audio=args.use_audio_input, + ) + # Generate TSV manifest + print("Generating manifest...") + train_text = [] + for split in MUSTC.SPLITS: + is_train_split = split.startswith("train") + manifest = {c: [] for c in MANIFEST_COLUMNS} + dataset = MUSTC(args.data_root, lang, split) + for _, _, src_utt, tgt_utt, speaker_id, utt_id in tqdm(dataset): + manifest["id"].append(utt_id) + manifest["audio"].append(audio_paths[utt_id]) + manifest["n_frames"].append(audio_lengths[utt_id]) + manifest["tgt_text"].append( + src_utt if args.task == "asr" else tgt_utt + ) + manifest["speaker"].append(speaker_id) + if is_train_split: + train_text.extend(manifest["tgt_text"]) + df = pd.DataFrame.from_dict(manifest) + df = filter_manifest_df(df, is_train_split=is_train_split) + save_df_to_tsv(df, cur_root / f"{split}_{args.task}.tsv") + # Generate vocab + v_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{v_size_str}_{args.task}" + with NamedTemporaryFile(mode="w") as f: + for t in train_text: + f.write(t + "\n") + gen_vocab( + Path(f.name), + cur_root / spm_filename_prefix, + args.vocab_type, + args.vocab_size, + ) + # Generate config YAML + if args.use_audio_input: + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy=None, + extra={"use_audio_input": True} + ) + else: + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy="lb", + cmvn_type=args.cmvn_type, + gcmvn_path=( + cur_root / "gcmvn.npz" if args.cmvn_type == "global" + else None + ), + ) + # Clean up + shutil.rmtree(audio_root) + + +def process_joint(args): + cur_root = Path(args.data_root) + assert all( + (cur_root / f"en-{lang}").is_dir() for lang in MUSTC.LANGUAGES + ), "do not have downloaded data available for all 8 languages" + # Generate vocab + vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{args.task}" + with NamedTemporaryFile(mode="w") as f: + for lang in MUSTC.LANGUAGES: + tsv_path = cur_root / f"en-{lang}" / f"train_{args.task}.tsv" + df = load_df_from_tsv(tsv_path) + for t in df["tgt_text"]: + f.write(t + "\n") + special_symbols = None + if args.task == 'st': + special_symbols = [f'<lang:{lang}>' for lang in MUSTC.LANGUAGES] + gen_vocab( + Path(f.name), + cur_root / spm_filename_prefix, + args.vocab_type, + args.vocab_size, + special_symbols=special_symbols + ) + # Generate config YAML + gen_config_yaml( + cur_root, + spm_filename=spm_filename_prefix + ".model", + yaml_filename=f"config_{args.task}.yaml", + specaugment_policy="ld", + prepend_tgt_lang_tag=(args.task == "st"), + ) + # Make symbolic links to manifests + for lang in MUSTC.LANGUAGES: + for split in MUSTC.SPLITS: + src_path = cur_root / f"en-{lang}" / f"{split}_{args.task}.tsv" + desc_path = cur_root / f"{split}_{lang}_{args.task}.tsv" + if not desc_path.is_symlink(): + os.symlink(src_path, desc_path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--data-root", "-d", required=True, type=str) + parser.add_argument( + "--vocab-type", + default="unigram", + required=True, + type=str, + choices=["bpe", "unigram", "char"], + ), + parser.add_argument("--vocab-size", default=8000, type=int) + parser.add_argument("--task", type=str, choices=["asr", "st"]) + parser.add_argument("--joint", action="store_true", help="") + parser.add_argument( + "--cmvn-type", default="utterance", + choices=["global", "utterance"], + help="The type of cepstral mean and variance normalization" + ) + parser.add_argument( + "--gcmvn-max-num", default=150000, type=int, + help="Maximum number of sentences to use to estimate global mean and " + "variance" + ) + parser.add_argument("--use-audio-input", action="store_true") + args = parser.parse_args() + + if args.joint: + process_joint(args) + else: + process(args) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/speech_to_text/seg_mustc_data.py b/fairseq/examples/speech_to_text/seg_mustc_data.py new file mode 100644 index 0000000..1ee665d --- /dev/null +++ b/fairseq/examples/speech_to_text/seg_mustc_data.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +from pathlib import Path +import soundfile as sf +from examples.speech_to_text.prep_mustc_data import ( + MUSTC +) + +from tqdm import tqdm + +log = logging.getLogger(__name__) + + +def main(args): + root = Path(args.data_root).absolute() + lang = args.lang + split = args.split + + cur_root = root / f"en-{lang}" + assert cur_root.is_dir(), ( + f"{cur_root.as_posix()} does not exist. Skipped." + ) + + dataset = MUSTC(root.as_posix(), lang, split) + output = Path(args.output).absolute() + output.mkdir(exist_ok=True) + f_text = open(output / f"{split}.{lang}", "w") + f_wav_list = open(output / f"{split}.wav_list", "w") + for waveform, sample_rate, _, text, _, utt_id in tqdm(dataset): + sf.write( + output / f"{utt_id}.wav", + waveform.squeeze(0).numpy(), + samplerate=int(sample_rate) + ) + f_text.write(text + "\n") + f_wav_list.write(str(output / f"{utt_id}.wav") + "\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--data-root", "-d", required=True, type=str) + parser.add_argument("--task", required=True, type=str, choices=["asr", "st"]) + parser.add_argument("--lang", required=True, type=str) + parser.add_argument("--output", required=True, type=str) + parser.add_argument("--split", required=True, choices=MUSTC.SPLITS) + args = parser.parse_args() + + main(args) diff --git a/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py b/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py new file mode 100644 index 0000000..61617a1 --- /dev/null +++ b/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py @@ -0,0 +1,363 @@ +import math +import os +import json +import numpy as np +import torch +import torchaudio.compliance.kaldi as kaldi +import yaml +from fairseq import checkpoint_utils, tasks +from fairseq.file_io import PathManager + +try: + from simuleval import READ_ACTION, WRITE_ACTION, DEFAULT_EOS + from simuleval.agents import SpeechAgent + from simuleval.states import ListEntry, SpeechStates +except ImportError: + print("Please install simuleval 'pip install simuleval'") + +SHIFT_SIZE = 10 +WINDOW_SIZE = 25 +SAMPLE_RATE = 16000 +FEATURE_DIM = 80 +BOW_PREFIX = "\u2581" + + +class OnlineFeatureExtractor: + """ + Extract speech feature on the fly. + """ + + def __init__(self, args): + self.shift_size = args.shift_size + self.window_size = args.window_size + assert self.window_size >= self.shift_size + + self.sample_rate = args.sample_rate + self.feature_dim = args.feature_dim + self.num_samples_per_shift = int(self.shift_size * self.sample_rate / 1000) + self.num_samples_per_window = int(self.window_size * self.sample_rate / 1000) + self.len_ms_to_samples = lambda x: x * self.sample_rate / 1000 + self.previous_residual_samples = [] + self.global_cmvn = args.global_cmvn + + def clear_cache(self): + self.previous_residual_samples = [] + + def __call__(self, new_samples): + samples = self.previous_residual_samples + new_samples + if len(samples) < self.num_samples_per_window: + self.previous_residual_samples = samples + return + + # num_frames is the number of frames from the new segment + num_frames = math.floor( + (len(samples) - self.len_ms_to_samples(self.window_size - self.shift_size)) + / self.num_samples_per_shift + ) + + # the number of frames used for feature extraction + # including some part of thte previous segment + effective_num_samples = int( + num_frames * self.len_ms_to_samples(self.shift_size) + + self.len_ms_to_samples(self.window_size - self.shift_size) + ) + + input_samples = samples[:effective_num_samples] + self.previous_residual_samples = samples[ + num_frames * self.num_samples_per_shift: + ] + + torch.manual_seed(1) + output = kaldi.fbank( + torch.FloatTensor(input_samples).unsqueeze(0), + num_mel_bins=self.feature_dim, + frame_length=self.window_size, + frame_shift=self.shift_size, + ).numpy() + + output = self.transform(output) + + return torch.from_numpy(output) + + def transform(self, input): + if self.global_cmvn is None: + return input + + mean = self.global_cmvn["mean"] + std = self.global_cmvn["std"] + + x = np.subtract(input, mean) + x = np.divide(x, std) + return x + + +class TensorListEntry(ListEntry): + """ + Data structure to store a list of tensor. + """ + + def append(self, value): + + if len(self.value) == 0: + self.value = value + return + + self.value = torch.cat([self.value] + [value], dim=0) + + def info(self): + return { + "type": str(self.new_value_type), + "length": self.__len__(), + "value": "" if type(self.value) is list else self.value.size(), + } + + +class FairseqSimulSTAgent(SpeechAgent): + + speech_segment_size = 40 # in ms, 4 pooling ratio * 10 ms step size + + def __init__(self, args): + super().__init__(args) + + self.eos = DEFAULT_EOS + + self.gpu = getattr(args, "gpu", False) + + self.args = args + + self.load_model_vocab(args) + + if getattr( + self.model.decoder.layers[0].encoder_attn, + 'pre_decision_ratio', + None + ) is not None: + self.speech_segment_size *= ( + self.model.decoder.layers[0].encoder_attn.pre_decision_ratio + ) + + args.global_cmvn = None + if args.config: + with open(os.path.join(args.data_bin, args.config), "r") as f: + config = yaml.load(f, Loader=yaml.BaseLoader) + + if "global_cmvn" in config: + args.global_cmvn = np.load(config["global_cmvn"]["stats_npz_path"]) + + if args.global_stats: + with PathManager.open(args.global_stats, "r") as f: + global_cmvn = json.loads(f.read()) + self.global_cmvn = {"mean": global_cmvn["mean"], "std": global_cmvn["stddev"]} + + self.feature_extractor = OnlineFeatureExtractor(args) + + self.max_len = args.max_len + + self.force_finish = args.force_finish + + torch.set_grad_enabled(False) + + def build_states(self, args, client, sentence_id): + # Initialize states here, for example add customized entry to states + # This function will be called at beginning of every new sentence + states = SpeechStates(args, client, sentence_id, self) + self.initialize_states(states) + return states + + def to_device(self, tensor): + if self.gpu: + return tensor.cuda() + else: + return tensor.cpu() + + @staticmethod + def add_args(parser): + # fmt: off + parser.add_argument('--model-path', type=str, required=True, + help='path to your pretrained model.') + parser.add_argument("--data-bin", type=str, required=True, + help="Path of data binary") + parser.add_argument("--config", type=str, default=None, + help="Path to config yaml file") + parser.add_argument("--global-stats", type=str, default=None, + help="Path to json file containing cmvn stats") + parser.add_argument("--tgt-splitter-type", type=str, default="SentencePiece", + help="Subword splitter type for target text") + parser.add_argument("--tgt-splitter-path", type=str, default=None, + help="Subword splitter model path for target text") + parser.add_argument("--user-dir", type=str, default="examples/simultaneous_translation", + help="User directory for simultaneous translation") + parser.add_argument("--max-len", type=int, default=200, + help="Max length of translation") + parser.add_argument("--force-finish", default=False, action="store_true", + help="Force the model to finish the hypothsis if the source is not finished") + parser.add_argument("--shift-size", type=int, default=SHIFT_SIZE, + help="Shift size of feature extraction window.") + parser.add_argument("--window-size", type=int, default=WINDOW_SIZE, + help="Window size of feature extraction window.") + parser.add_argument("--sample-rate", type=int, default=SAMPLE_RATE, + help="Sample rate") + parser.add_argument("--feature-dim", type=int, default=FEATURE_DIM, + help="Acoustic feature dimension.") + + # fmt: on + return parser + + def load_model_vocab(self, args): + + filename = args.model_path + if not os.path.exists(filename): + raise IOError("Model file not found: {}".format(filename)) + + state = checkpoint_utils.load_checkpoint_to_cpu(filename) + + task_args = state["cfg"]["task"] + task_args.data = args.data_bin + + if args.config is not None: + task_args.config_yaml = args.config + + task = tasks.setup_task(task_args) + + # build model for ensemble + state["cfg"]["model"].load_pretrained_encoder_from = None + state["cfg"]["model"].load_pretrained_decoder_from = None + self.model = task.build_model(state["cfg"]["model"]) + self.model.load_state_dict(state["model"], strict=True) + self.model.eval() + self.model.share_memory() + + if self.gpu: + self.model.cuda() + + # Set dictionary + self.dict = {} + self.dict["tgt"] = task.target_dictionary + + def initialize_states(self, states): + self.feature_extractor.clear_cache() + states.units.source = TensorListEntry() + states.units.target = ListEntry() + states.incremental_states = dict() + + def segment_to_units(self, segment, states): + # Convert speech samples to features + features = self.feature_extractor(segment) + if features is not None: + return [features] + else: + return [] + + def units_to_segment(self, units, states): + # Merge sub word to full word. + if self.model.decoder.dictionary.eos() == units[0]: + return DEFAULT_EOS + + segment = [] + if None in units.value: + units.value.remove(None) + + for index in units: + if index is None: + units.pop() + token = self.model.decoder.dictionary.string([index]) + if token.startswith(BOW_PREFIX): + if len(segment) == 0: + segment += [token.replace(BOW_PREFIX, "")] + else: + for j in range(len(segment)): + units.pop() + + string_to_return = ["".join(segment)] + + if self.model.decoder.dictionary.eos() == units[0]: + string_to_return += [DEFAULT_EOS] + + return string_to_return + else: + segment += [token.replace(BOW_PREFIX, "")] + + if ( + len(units) > 0 + and self.model.decoder.dictionary.eos() == units[-1] + or len(states.units.target) > self.max_len + ): + tokens = [self.model.decoder.dictionary.string([unit]) for unit in units] + return ["".join(tokens).replace(BOW_PREFIX, "")] + [DEFAULT_EOS] + + return None + + def update_model_encoder(self, states): + if len(states.units.source) == 0: + return + src_indices = self.to_device( + states.units.source.value.unsqueeze(0) + ) + src_lengths = self.to_device( + torch.LongTensor([states.units.source.value.size(0)]) + ) + + states.encoder_states = self.model.encoder(src_indices, src_lengths) + torch.cuda.empty_cache() + + def update_states_read(self, states): + # Happens after a read action. + self.update_model_encoder(states) + + def policy(self, states): + if not getattr(states, "encoder_states", None): + return READ_ACTION + + tgt_indices = self.to_device( + torch.LongTensor( + [self.model.decoder.dictionary.eos()] + + [x for x in states.units.target.value if x is not None] + ).unsqueeze(0) + ) + + states.incremental_states["steps"] = { + "src": states.encoder_states["encoder_out"][0].size(0), + "tgt": 1 + len(states.units.target), + } + + states.incremental_states["online"] = {"only": torch.tensor(not states.finish_read())} + + x, outputs = self.model.decoder.forward( + prev_output_tokens=tgt_indices, + encoder_out=states.encoder_states, + incremental_state=states.incremental_states, + ) + + states.decoder_out = x + + states.decoder_out_extra = outputs + + torch.cuda.empty_cache() + + if outputs.action == 0: + return READ_ACTION + else: + return WRITE_ACTION + + def predict(self, states): + decoder_states = states.decoder_out + + lprobs = self.model.get_normalized_probs( + [decoder_states[:, -1:]], log_probs=True + ) + + index = lprobs.argmax(dim=-1) + + index = index[0, 0].item() + + if ( + self.force_finish + and index == self.model.decoder.dictionary.eos() + and not states.finish_read() + ): + # If we want to force finish the translation + # (don't stop before finish reading), return a None + # self.model.decoder.clear_cache(states.incremental_states) + index = None + + return index diff --git a/fairseq/examples/stories/README.md b/fairseq/examples/stories/README.md new file mode 100644 index 0000000..588941e --- /dev/null +++ b/fairseq/examples/stories/README.md @@ -0,0 +1,66 @@ +# Hierarchical Neural Story Generation (Fan et al., 2018) + +The following commands provide an example of pre-processing data, training a model, and generating text for story generation with the WritingPrompts dataset. + +## Pre-trained models + +Description | Dataset | Model | Test set(s) +---|---|---|--- +Stories with Convolutional Model <br> ([Fan et al., 2018](https://arxiv.org/abs/1805.04833)) | [WritingPrompts](https://dl.fbaipublicfiles.com/fairseq/data/writingPrompts.tar.gz) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.bz2) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/stories_test.tar.bz2) + +We provide sample stories generated by the [convolutional seq2seq model](https://dl.fbaipublicfiles.com/fairseq/data/seq2seq_stories.txt) and [fusion model](https://dl.fbaipublicfiles.com/fairseq/data/fusion_stories.txt) from [Fan et al., 2018](https://arxiv.org/abs/1805.04833). The corresponding prompts for the fusion model can be found [here](https://dl.fbaipublicfiles.com/fairseq/data/fusion_prompts.txt). Note that there are unk in the file, as we modeled a small full vocabulary (no BPE or pre-training). We did not use these unk prompts for human evaluation. + +## Dataset + +The dataset can be downloaded like this: + +```bash +cd examples/stories +curl https://dl.fbaipublicfiles.com/fairseq/data/writingPrompts.tar.gz | tar xvzf - +``` + +and contains a train, test, and valid split. The dataset is described here: https://arxiv.org/abs/1805.04833. We model only the first 1000 words of each story, including one newLine token. + +## Example usage + +First we will preprocess the dataset. Note that the dataset release is the full data, but the paper models the first 1000 words of each story. Here is example code that trims the dataset to the first 1000 words of each story: +```python +data = ["train", "test", "valid"] +for name in data: + with open(name + ".wp_target") as f: + stories = f.readlines() + stories = [" ".join(i.split()[0:1000]) for i in stories] + with open(name + ".wp_target", "w") as o: + for line in stories: + o.write(line.strip() + "\n") +``` + +Once we've trimmed the data we can binarize it and train our model: +```bash +# Binarize the dataset: +export TEXT=examples/stories/writingPrompts +fairseq-preprocess --source-lang wp_source --target-lang wp_target \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/writingPrompts --padding-factor 1 --thresholdtgt 10 --thresholdsrc 10 + +# Train the model: +fairseq-train data-bin/writingPrompts -a fconv_self_att_wp --lr 0.25 --optimizer nag --clip-norm 0.1 --max-tokens 1500 --lr-scheduler reduce_lr_on_plateau --decoder-attention True --encoder-attention False --criterion label_smoothed_cross_entropy --weight-decay .0000001 --label-smoothing 0 --source-lang wp_source --target-lang wp_target --gated-attention True --self-attention True --project-input True --pretrained False + +# Train a fusion model: +# add the arguments: --pretrained True --pretrained-checkpoint path/to/checkpoint + +# Generate: +# Note: to load the pretrained model at generation time, you need to pass in a model-override argument to communicate to the fusion model at generation time where you have placed the pretrained checkpoint. By default, it will load the exact path of the fusion model's pretrained model from training time. You should use model-override if you have moved the pretrained model (or are using our provided models). If you are generating from a non-fusion model, the model-override argument is not necessary. + +fairseq-generate data-bin/writingPrompts --path /path/to/trained/model/checkpoint_best.pt --batch-size 32 --beam 1 --sampling --sampling-topk 10 --temperature 0.8 --nbest 1 --model-overrides "{'pretrained_checkpoint':'/path/to/pretrained/model/checkpoint'}" +``` + +## Citation +```bibtex +@inproceedings{fan2018hierarchical, + title = {Hierarchical Neural Story Generation}, + author = {Fan, Angela and Lewis, Mike and Dauphin, Yann}, + booktitle = {Conference of the Association for Computational Linguistics (ACL)}, + year = 2018, +} +``` diff --git a/fairseq/examples/textless_nlp/dgslm/README.md b/fairseq/examples/textless_nlp/dgslm/README.md new file mode 100644 index 0000000..917dbb2 --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/README.md @@ -0,0 +1,183 @@ +# Generative Spoken Dialogue Language Modeling +[[paper]](https://arxiv.org/abs/2203.16502) [[demo samples]](https://speechbot.github.io/dgslm/index.html) [[blog]](https://ai.facebook.com/blog/generating-chit-chat-including-laughs-yawns-ums-and-other-nonverbal-cues-from-raw-audio/) + +This repo contains the code and pre-trained models for the paper _Generative Spoken Dialogue Language Modeling_. +<details> + <summary>Paper abstract </summary> + +> We introduce dGSLM, the first "textless" model able to generate audio samples of naturalistic spoken dialogues. It uses recent work on unsupervised spoken unit discovery coupled with a dual-tower transformer architecture with cross-attention trained on 2000 hours of two-channel raw conversational audio (Fisher dataset) without any text or labels. We show that our model is able to generate speech, laughter and other paralinguistic signals in the two channels simultaneously and reproduces more naturalistic and fluid turn taking compared to a text-based cascaded model. + +</details> + +## [Speech-to-Unit Encoder for dGSLM: The Fisher HuBERT model](hubert_fisher/) +The [hubert_fisher](hubert_fisher/) repository contains the pre-trained models and recipies to produce discrete units for the dGSLM model. + +## [Unit-to-Speech Decoder for dGSLM](vocoder_hifigan/) +The [vocoder_hifigan](vocoder_hifigan/) repo contains the vocoder and recipies to synthesize the waveform from the discrete units. + +## Spoken Dialogue Transformer Language Model (SpeechDLM) +### Pre-trained model +We share the pre-trained model checkpoint for the best configuration in the paper (DLM-5 model, with Edge Unit Prediction & Delayed Duration Prediction objectives), dubbed as `SpeechDLM`, trained on the 2000 hours of Fisher dataset : +| Pre-trained SpeechDLM model trained on Fisher dataset | +|-----------------------------------------------| +|[model checkpoint](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/speech_dlm/speech_dlm_base.pt) - [dictionary 1](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/speech_dlm/dict.unitA.txt) - [dictionary 2](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/speech_dlm/dict.unitB.txt)| +the two dictionary files correspond to the two channels, and actually have the same content. + +### Sample from a trained model +You can sample from a trained SpeechDLM model interactively : +```python +from fairseq.models.speech_dlm import SpeechDLM + +# Load SpeechDLM model +speech_dlm = SpeechDLM.from_pretrained( + model_name_or_path='/path/to/model/dir', + checkpoint_file='speech_dlm_base.pt', + data_name_or_path='/path/to/data/dir' + ) +# Disable dropout +speech_dlm.eval() +# Move model to GPU +speech_dlm.cuda() + +# Define the input sequences +input_sequences = [{ + 'unitA': '7 376 376 133 178 486 486 486 486 486 486 486 486 2 486', + 'unitB': '7 499 415 177 7 7 7 7 7 7 136 136 289 289 408' + }] + +# Sample from the SpeechDLM model +generated_units = speech_dlm.sample( + input_sequences, + max_len_a = 0, + max_len_b = 500, + sampling=True, + beam=5, + ) +# >> {'unitA': '7 376 376 133 178 486 486 486 486 486 486 486 486 2 486 486 178 486 486 2 2 376 376 486 486 486 376 376 387 387 ...', +# >> 'unitB': '7 499 415 177 7 7 7 7 7 7 136 136 289 289 408 32 428 95 356 141 331 439 350 350 192 331 445 202 104 104 ...'} +``` + +Or using the `sample_speech_dlm.py` script : +```bash +python sample_speech_dlm.py \ + --in-file $INPUT_CODE_FILE --out-file $OUTPUT_FILE \ + --ckpt $CHECKPOINT_PATH --data $DATA_DIR +``` +where each line of INPUT_CODE_FILE is a dictionary with keys `'audio', 'unitA', 'unitB'` as follows : +``` +{'audio': 'file_1', 'unitA': '8 8 ... 352 352', 'unitB': '217 8 ... 8 8'} +{'audio': 'file_2', 'unitA': '5 5 ... 65 65', 'unitB': '6 35 ... 8 9'} +... +``` +This code file can be created with the script `create_input_code.py` (using the outputs of `quantize_with_kmeans.py` [here](hubert_fisher/#encode-audio-to-discrete-units)) : +```bash +python examples/textless_nlp/dgslm/vocoder_hifigan/create_input_code.py \ + $CHANNEL1_UNITS $CHANNEL2_UNITS $OUTPUT_CODE_FILE +``` + +### Training a SpeechDLM model +#### 1) Data preparation +First, you need to prepare the raw dataset. For each `split` (train, valid), you need two files corresponding to two channels (namely `unitA` and `unitB` for example) containing the units from each channel separately. Make sure that 2 files have the same number of lines and each corresponding line has the same number of units. + +Here is an example of `.unitA` file : +``` +7 376 376 133 178 +486 486 486 +486 376 +``` +and the corresponding `.unitB` file : +``` +7 499 415 177 7 +7 7 136 +331 445 +``` +These two files can be obtained using the [example command](hubert_fisher/#encode-audio-to-discrete-units) of hubert fisher, with the `--hide-fname` option added. + +The raw dataset directory should contain the following files : +``` +train.unitA valid.unitA +train.unitB valid.unitB +``` + +Next preprocess/binarize the data with `fairseq-preprocess`, but make sure to preprocess each channel separately, and **rename** the preprocessed files under the following format `${split}.${channel}.{bin, idx}`. Each channel also needs a separate dictionary file under the name `dict.${channel}.txt` . + +Here is an example pre-processing code : + +```bash +# Preprocess the first channel (unitA) +fairseq-preprocess --source-lang unitA \ + --only-source \ + --trainpref $RAW_DATA_DIR/train \ + --validpref $RAW_DATA_DIR/valid \ + --destdir $BIN_DATA_DIR \ + --workers 20 + +# Preprocess the second channel (unitB) and reuse the dictionary from the first channel +fairseq-preprocess --source-lang unitB \ + --srcdict $BIN_DATA_DIR/dict.unitA.txt \ + --only-source \ + --trainpref $RAW_DATA_DIR/train \ + --validpref $RAW_DATA_DIR/valid \ + --destdir $BIN_DATA_DIR \ + --workers 20 + +# Rename the bin & index files +for channel in unitA unitB; do + for split in train valid; do + mv $BIN_DATA_DIR/${split}.${channel}-None.${channel}.bin $BIN_DATA_DIR/${split}.${channel}.bin + mv $BIN_DATA_DIR/${split}.${channel}-None.${channel}.idx $BIN_DATA_DIR/${split}.${channel}.idx + done +done +``` +Finally, the preprocessed (bin) dataset directory should contain the following files : +``` +dict.unitA.txt train.unitA.idx train.unitA.bin valid.unitA.idx valid.unitA.bin +dict.unitB.txt train.unitB.idx train.unitB.bin valid.unitB.idx valid.unitB.bin +``` + +#### 2) Train the model +To train the SpeechDLM (with the configuration as the pre-trained model) on 2 GPUs : +```bash +fairseq-train $BIN_DATA_DIR \ + --save-dir $CHECKPOINT_DIR \ + --tensorboard-logdir $CHECKPOINT_DIR \ + --task speech_dlm_task --channels unitA,unitB \ + --next-unit-prediction "False" --edge-unit-prediction "True" \ + --duration-prediction "True" --delayed-duration-target "True" \ + --criterion speech_dlm_criterion \ + --arch speech_dlm --decoder-cross-layers 4 \ + --share-decoder-input-output-embed \ + --dropout 0.1 --attention-dropout 0.1 \ + --optimizer adam --adam-betas "(0.9, 0.98)" --clip-norm 1.0 \ + --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 \ + --max-tokens 18432 --tokens-per-sample 6144 --sample-break-mode none \ + --update-freq 16 --num-workers 4 --skip-invalid-size-inputs-valid-test \ + --max-update 250000 --warmup-updates 20000 \ + --save-interval-updates 10000 --keep-last-epochs 1 --no-epoch-checkpoints \ + --log-interval 50 --seed 100501 \ + --fp16 --checkpoint-activations +``` + +#### 3) Validate +The model can be validated via the `fairseq-validate` command : +```bash +fairseq-validate $BIN_DATA_DIR \ + --task speech_dlm_task \ + --path $CHECKPOINT_PATH \ + --max-tokens 6144 +``` + +## Reference + +If you find our work useful in your research, please consider citing our paper: + +```bibtex +@article{nguyen2022dgslm, + title = {Generative Spoken Dialogue Language Modeling}, + author = {Nguyen, Tu Anh and Kharitonov, Eugene and Copet, Jade and Adi, Yossi and Hsu, Wei-Ning and Elkahky, Ali and Tomasello, Paden and Algayres, Robin and Sagot, Benoit and Mohamed, Abdelrahman and Dupoux, Emmanuel}, + eprint={2203.16502}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` diff --git a/fairseq/examples/textless_nlp/dgslm/create_code_file.py b/fairseq/examples/textless_nlp/dgslm/create_code_file.py new file mode 100644 index 0000000..d10f948 --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/create_code_file.py @@ -0,0 +1,79 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse + + +def main(): + """ + Create code file with the following format: + {'audio': 'file1', 'unitA': 'file1_chnl1_units', 'unitB': 'file1_chnl2_units'} + {'audio': 'file2', 'unitA': 'file2_chnl1_units', 'unitB': 'file2_chnl2_units'} + ... + + Given the input units files + - channel1_units_file: + file1|file1_chnl1_units + file2|file2_chnl1_units + ... + - channel2_units_file: + file1|file1_chnl2_units + file2|file2_chnl2_units + ... + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "channel1_units_file", + type=str, + help="Units of the first channel.", + ) + parser.add_argument( + "channel2_units_file", + type=str, + help="Units of the second channel.", + ) + parser.add_argument( + "output_file", + type=str, + help="Output file.", + ) + parser.add_argument( + "--channels", + type=str, + default='unitA,unitB', + help="Comma-separated list of the channel names to create in the code" + "(Default: 'unitA,unitB').", + ) + + args = parser.parse_args() + + channel_names = args.channels.split(',') + + with open(args.channel1_units_file) as funit1, \ + open(args.channel2_units_file) as funit2, \ + open(args.output_file, 'w') as fout: + for line1, line2 in zip(funit1, funit2): + fname1, units1 = line1.strip().split('|') + fname2, units2 = line2.strip().split('|') + assert len(units1.split()) == len(units2.split()), \ + f"Mismatch units length ({len(units1.split())} vs {len(units2.split())})" + base_fname1 = fname1[:-9] + base_fname2 = fname2[:-9] + assert base_fname1 == base_fname2, \ + f"Mismatch filenames ({base_fname1} vs {base_fname2}). " \ + f"Expected $filename-channel1 and $filename-channel2 in two files" + code = { + "audio" : base_fname1, + channel_names[0] : units1, + channel_names[1] : units2, + } + fout.write(str(code)) + fout.write("\n") + print(f"Codes written to {args.output_file}") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/textless_nlp/dgslm/dgslm_utils.py b/fairseq/examples/textless_nlp/dgslm/dgslm_utils.py new file mode 100644 index 0000000..8049d49 --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/dgslm_utils.py @@ -0,0 +1,78 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +import json + +from fairseq import utils +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder + +# from examples.hubert.simple_kmeans.dump_hubert_feature import HubertFeatureReader +from examples.textless_nlp.gslm.speech2unit.pretrained.hubert_feature_reader import HubertFeatureReader +from examples.hubert.simple_kmeans.dump_km_label import ApplyKmeans + + +# Hubert tokenizer +class HubertTokenizer: + def __init__( + self, + hubert_path, + hubert_layer, + km_path, + use_cuda=True, + ): + self.feature_extractor = HubertFeatureReader(hubert_path, hubert_layer, use_cuda=use_cuda) + self.quantizer = ApplyKmeans(km_path) + if not use_cuda: + self.quantizer.C = self.quantizer.C.cpu() + self.quantizer.Cnorm = self.quantizer.Cnorm.cpu() + + def wav2code(self, path, channel_id=1): + feat = self.feature_extractor.get_feats(path, channel_id=channel_id) + code = self.quantizer(feat) + return ' '.join(map(str, code)) + + def wav2codes(self, path): + codes = [ + self.wav2code(path, channel_id=1), + self.wav2code(path, channel_id=2) + ] + return codes + + +# Vocoder +class HifiganVocoder: + def __init__( + self, + vocoder_path, + vocoder_cfg_path, + use_cuda=True, + ): + with open(vocoder_cfg_path) as f: + cfg = json.load(f) + self.vocoder = CodeHiFiGANVocoder(vocoder_path, cfg).eval() + self.use_cuda = use_cuda + if self.use_cuda: + self.vocoder.cuda() + + def code2wav(self, code, speaker_id=0, pred_dur=False): + if isinstance(code, str): + code = list(map(int, code.split())) + inp = {"code": torch.LongTensor(code).view(1, -1)} + if self.vocoder.model.multispkr: + inp["spkr"] = torch.LongTensor([speaker_id]).view(1, 1) + if self.use_cuda: + inp = utils.move_to_cuda(inp) + return self.vocoder(inp, pred_dur).detach().cpu().numpy() + + def codes2wav(self, codes, speaker_ids=[0, 4], pred_dur=False): + if isinstance(codes, dict): + codes = list(codes.values()) + assert len(codes) == 2 + wav1 = self.code2wav(codes[0], speaker_ids[0], pred_dur) + wav2 = self.code2wav(codes[1], speaker_ids[1], pred_dur) + wav = np.stack([wav1, wav2]) + return wav diff --git a/fairseq/examples/textless_nlp/dgslm/hubert_fisher/README.md b/fairseq/examples/textless_nlp/dgslm/hubert_fisher/README.md new file mode 100644 index 0000000..52c528f --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/hubert_fisher/README.md @@ -0,0 +1,47 @@ +# Dialogue Speech-to-Unit Encoder for dGSLM: The Fisher HuBERT model +For the speech2unit encoder, we train a [HuBERT model](https://arxiv.org/pdf/2106.07447.pdf) on the [Fisher dataset](http://www.lrec-conf.org/proceedings/lrec2004/pdf/767.pdf) for 3 iterations (see [our paper](https://arxiv.org/pdf/2203.16502.pdf) for more details) and train a k-means model with 500 units on the layer 12 features of the HuBERT model. + +## Model checkpoints +The pre-trained HuBERT and k-means model checkpoints can be found here: + +| Fisher HuBERT model | k-means model | +|---------------------|---------------| +|[download](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/hubert/hubert_fisher.pt)|[download](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/hubert/hubert_fisher_km_500.bin)| + + +## Encode audio to discrete units +Below is an example command to encode a stereo dataset to discrete units using the pre-trained model checkpoints : +```bash +for CHANNEL_ID in 1 2; do + python examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py \ + --feature_type hubert \ + --kmeans_model_path path/to/hubert_fisher_km_500.bin \ + --acoustic_model_path path/to/hubert_fisher.pt \ + --layer 12 \ + --manifest_path $MANIFEST_FILE \ + --out_quantized_file_path ${OUTPUT_FILE}-channel${CHANNEL_ID} \ + --extension $EXTENSION \ + --channel_id $CHANNEL_ID +done +``` +where MANIFEST_FILE is the output of [wav2vec manifest script](https://github.com/facebookresearch/fairseq/blob/main/examples/wav2vec/wav2vec_manifest.py), which can be obtained through the following command : +``` +python examples/wav2vec/wav2vec_manifest.py --valid-percent=0.0 $AUDIO_DIR --dest=$OUTPUT_DIR --ext=$EXTENSION +``` + +Otherwise, you can encode an audio file in python interactively with the HubertTokenizer class : +```python +# Load the Hubert tokenizer +from examples.textless_nlp.dgslm.dgslm_utils import HubertTokenizer +encoder = HubertTokenizer( + hubert_path = "/path/to/hubert_ckpt.pt", + hubert_layer = 12, + km_path = "path/to/km.bin" +) + +# Encode the audio to units +path = "/path/to/stereo/audio.wav" +codes = encoder.wav2codes(path) +# > ['7 376 376 133 178 486 486 486 486 486 486 486 486 2 486', +# > '7 499 415 177 7 7 7 7 7 7 136 136 289 289 408'] +``` \ No newline at end of file diff --git a/fairseq/examples/textless_nlp/dgslm/sample_speech_dlm.py b/fairseq/examples/textless_nlp/dgslm/sample_speech_dlm.py new file mode 100644 index 0000000..484cbab --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/sample_speech_dlm.py @@ -0,0 +1,202 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import ast +import argparse +import logging +import torch + +from fairseq import utils +from fairseq.models.speech_dlm import SpeechDLM + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def load_data(in_file): + with open(in_file) as f: + data = [ast.literal_eval(line.strip()) for line in f] + return data + + +def write_data(out_file, data): + with open(out_file, 'w') as f: + for d in data: + f.write(str(d)) + f.write('\n') + + +def limit(codes, n): + new_codes = {} + for k, v in codes.items(): + new_codes[k] = ' '.join(v.split()[:n]) + return new_codes + + +def main(args): + logger.info(args) + + use_cuda = torch.cuda.is_available() + + # Load the data + data = load_data(args.in_file) + channels = args.channels.split(',') + unit_sequences = [{ + channels[0]: d[channels[0]], + channels[1]: d[channels[1]], + } for d in data] + fnames = [d['audio'] for d in data] + print(f"Found {len(data)} sequences from {args.in_file}") + + # Limit the prefix size + if args.prefix_size is not None: + print(f"Limit the prefix size to {args.prefix_size}") + unit_sequences = [limit(codes, args.prefix_size) for codes in unit_sequences] + + # Load model from ckpt + print(f"Loading the SpeechDLM model from {args.ckpt}") + model = SpeechDLM.from_pretrained( + model_name_or_path=os.path.dirname(args.ckpt), + checkpoint_file=os.path.basename(args.ckpt), + data_name_or_path=args.data + ) + model.eval() + if use_cuda: + model.cuda() + + # Set batch sizes + model.cfg.dataset.max_tokens = args.batch_max_tokens + model.max_positions = args.batch_max_positions + if args.batch_max_sentences is not None: + model.cfg.dataset.batch_size = args.batch_max_sentences + + # Set seed (if needed) + if args.seed is not None: + utils.set_torch_seed(args.seed) + + # Sample from the SpeechDLM model + print(f"Generating {len(unit_sequences)} sequences with SpeechDLM model...\n" + f"Generation args: sampling={(not args.beam_search)}, " + f"sampling_topk={args.sampling_topk}, sampling_topp={args.sampling_topp}, " + f"beam={args.beam_size}, min_len={args.min_len}, " + f"max_len_a={args.max_len_a}, max_len_b={args.max_len_b}, " + f"temperature={args.temperature}, dur_temperature={args.dur_temperature}, " + f"seed={args.seed}") + generated_units = model.sample( + unit_sequences, + sampling=(not args.beam_search), + sampling_topk=args.sampling_topk, + sampling_topp=args.sampling_topp, + beam=args.beam_size, + max_len_a=args.max_len_a, + max_len_b=args.max_len_b, + min_len=args.min_len, + temperature=args.temperature, + duration_temperature=args.dur_temperature, + verbose=args.verbose, + skip_invalid_size_inputs=args.skip_invalid_size_batch, + ) + + # Create the generated sequences + generated_data = [] + for fname, gen_units in zip(fnames, generated_units): + d = { + "audio" : fname+'-generated', + **gen_units + } + generated_data.append(d) + + # Write the generated sequences + print(f"Write the generated units to {args.out_file}") + if args.out_file: + os.makedirs(os.path.dirname(args.out_file), exist_ok=True) + write_data(args.out_file, generated_data) + + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-file", + type=str, + required=True, + help="Input file following the same format of the output from create_input.py", + ) + parser.add_argument( + "--ckpt", + type=str, + required=True, + help="Path to the model checkpoint." + ) + parser.add_argument( + "--data", + type=str, + required=True, + help="path to the model data dir (containing dict files)", + ) + parser.add_argument( + "--out-file", + type=str, + required=True, + help="Path of the output file.", + ) + parser.add_argument( + "--channels", + type=str, + default='unitA,unitB', + help="Comma-separated list of the channel names" + "(Default: 'unitA,unitB').", + ) + parser.add_argument("--prefix-size", type=int, default=None, + help='Limit the prefix size') + + # Batch sizes + parser.add_argument("--batch-max-tokens", type=int, default=9216, + help='maximum number of tokens considered in a batch') + parser.add_argument("--batch-max-positions", type=int, default=6144, + help='maximum number of tokens allowed for a sentence in a batch') + parser.add_argument("--batch-max-sentences", type=int, default=None, + help='maximum number of sentences considered in a batch') + parser.add_argument("--skip-invalid-size-batch", action='store_true', + help='skip sentences with more tokens than --batch-max-positions') + + # Generation args + parser.add_argument("--beam-search", action='store_true', + help='perform beam search instead of sampling') + parser.add_argument("--beam-size", type=int, default=5, + help="beam width (used in both sampling and beam search mode) " + "(default: 5)") + parser.add_argument("--sampling-topk", type=int, default=-1, + help="only sample from top-k candidates (default: -1, non applied)") + parser.add_argument("--sampling-topp", type=float, default=-1.0, + help="only sample among the smallest set of elements whose cumulative " + "probability mass exceeds p (default: -1.0, non applied)") + parser.add_argument("--max-len-a", type=int, default=0, + help="generate sequences of maximum length ax + b, " + "where x is the source length (default: 0)") + parser.add_argument("--max-len-b", type=int, default=500, + help="generate sequences of maximum length ax + b, " + "where x is the source length (default: 500 ~ 10s)") + parser.add_argument("--min-len", type=int, default=1, + help="generate sequences of maximum length ax + b, " + "where x is the source length (default: 1)") + parser.add_argument("--temperature", type=float, default=1.0, + help="temperature when generating unit tokens (default: 1.0)") + parser.add_argument("--dur-temperature", type=float, default=1.0, + help="temperature when generating duration tokens (default: 1.0)") + parser.add_argument("--verbose", action='store_true', + help="print the scores given by the model to generated sequences") + parser.add_argument("--seed", type=int, default=123, + help="seed of the generation model") + + args = parser.parse_args() + + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/README.md b/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/README.md new file mode 100644 index 0000000..5d4a59a --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/README.md @@ -0,0 +1,47 @@ +# Dialogue Unit-to-Speech Decoder for dGSLM +For the unit2speech decoder, we train a [discrete unit-based HiFi-GAN vocoder](https://arxiv.org/pdf/2104.00355.pdf) on the [Fisher dataset](http://www.lrec-conf.org/proceedings/lrec2004/pdf/767.pdf). + +## Model checkpoint +The pre-trained model checkpoint can be found here : + +| HiFi-GAN vocoder based on HuBERT Fisher Units | +|-----------------------------------------------| +|[model checkpoint](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/hifigan/hifigan_vocoder) - [config](https://dl.fbaipublicfiles.com/textless_nlp/dgslm/checkpoints/hifigan/config.json) | + +## Decode discrete units to audio +To create waveform from discrete units, use the script `generate_stereo_waveform.py` : +```bash +python examples/textless_nlp/dgslm/vocoder_hifigan/generate_stereo_waveform.py \ + --in-file $INPUT_CODE_FILE \ + --vocoder $VOCODER_PATH \ + --vocoder-cfg $VOCODER_CONFIG \ + --results-path $OUTPUT_DIR +``` +where INPUT_CODE_FILE is expected to have the following format : +``` +{'audio': 'file_1', 'unitA': '8 8 ... 352 352', 'unitB': '217 8 ... 8 8'} +{'audio': 'file_2', 'unitA': '5 5 ... 65 65', 'unitB': '6 35 ... 8 9'} +... +``` + +You can also use the HifiganVocoder class to generate waveform from the codes interactively : +```python +# Load the Hifigan vocoder +from examples.textless_nlp.dgslm.dgslm_utils import HifiganVocoder +decoder = HifiganVocoder( + vocoder_path = "/path/to/hifigan_vocoder", + vocoder_cfg_path = "/path/to/config.json", +) + +# Decode the units to waveform +codes = [ + '7 376 376 133 178 486 486 486 486 486 486 486 486 2 486', + '7 499 415 177 7 7 7 7 7 7 136 136 289 289 408', +] +wav = decoder.codes2wav(codes) +# > array of shape (2, 4800) + +# Play the waveform +import IPython.display as ipd +ipd.Audio(wav, rate=16_000) +``` diff --git a/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/generate_stereo_waveform.py b/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/generate_stereo_waveform.py new file mode 100644 index 0000000..1e15f43 --- /dev/null +++ b/fairseq/examples/textless_nlp/dgslm/vocoder_hifigan/generate_stereo_waveform.py @@ -0,0 +1,137 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import argparse +import json +import logging +from pathlib import Path +import soundfile as sf +import torch + +from tqdm import tqdm + +from fairseq import utils +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def dump_result(args, data, sample_id, pred_wav): + assert "audio" in data or args.results_path is not None + if args.results_path: + fname = Path(data["audio"]).stem + ".wav" if "audio" in data else f"{sample_id}_pred.wav" + out_file = Path(args.results_path) / fname + + sf.write( + out_file.as_posix(), + pred_wav.detach().cpu().numpy(), + args.sample_rate, + ) + + +def load_data(in_file): + with open(in_file) as f: + data = [ast.literal_eval(line.strip()) for line in f] + + return data + + +def load_vocoder(vocoder_path, vocoder_cfg_path, use_cuda=True): + with open(vocoder_cfg_path) as f: + cfg = json.load(f) + vocoder = CodeHiFiGANVocoder(vocoder_path, cfg).eval() + if use_cuda: + vocoder = vocoder.cuda() + return vocoder + + +def code2wav(vocoder, code, speaker_id, use_cuda=True): + if isinstance(code, str): + code = list(map(int, code.split())) + inp = dict() + inp["code"] = torch.LongTensor(code).view(1, -1) + if vocoder.model.multispkr: + inp["spkr"] = torch.LongTensor([speaker_id]).view(1, 1) + if use_cuda: + inp = utils.move_to_cuda(inp) + return vocoder(inp) + + +def main(args): + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + vocoder = load_vocoder(args.vocoder, args.vocoder_cfg, use_cuda) + + data = load_data(args.in_file) + + if args.results_path: + Path(args.results_path).mkdir(exist_ok=True, parents=True) + + channels = args.channels.split(',') + speakers = [args.channel1_spk, args.channel2_spk] + + for i, d in tqdm(enumerate(data), total=len(data)): + wavs = [] + for key, speaker_id in zip(channels, speakers): + wav = code2wav(vocoder, d[key], speaker_id, use_cuda=use_cuda) + wavs.append(wav) + + wav = torch.stack(wavs, dim=-1) + if args.mix: + wav = torch.mean(wav, dim=-1) + + dump_result(args, d, i, wav) + + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-file", + type=str, + required=True, + help="Input file following the same format of the output from create_input.py", + ) + parser.add_argument( + "--vocoder", type=str, required=True, help="path to the vocoder" + ) + parser.add_argument( + "--vocoder-cfg", + type=str, + required=True, + help="path to the vocoder config", + ) + parser.add_argument( + "--channels", + type=str, + default='unitA,unitB', + help="Comma-separated list of the channel names" + "(Default: 'unitA,unitB').", + ) + parser.add_argument("--sample-rate", type=int, default=16_000) + parser.add_argument( + "--results-path", + type=str, + default=None, + help="Output directory. If not set, the audios will be stored following the 'audio' field specified in the input file", + ) + parser.add_argument("--channel1-spk", type=int, default=0, help="Speaker of the first channel",) + parser.add_argument("--channel2-spk", type=int, default=4, help="Speaker of the second channel",) + parser.add_argument("--mix", action="store_true", help="Mix the two channels to create output mono files") + parser.add_argument("--cpu", action="store_true", help="run on CPU") + + args = parser.parse_args() + + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/textless_nlp/gslm/README.md b/fairseq/examples/textless_nlp/gslm/README.md new file mode 100644 index 0000000..7a76ffd --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/README.md @@ -0,0 +1,21 @@ +# Generative Spoken Language Modeling + +* [Paper](https://arxiv.org/abs/2102.01192) +* [Demo](https://speechbot.github.io/gslm/index.html) + +We build and evaluate generative speech2speech systems using [Log Mel Filtebank](https://pytorch.org/audio/stable/compliance.kaldi.html#fbank), [Modified CPC](https://github.com/facebookresearch/CPC_audio), [HuBERT Base](https://github.com/pytorch/fairseq/tree/main/examples/hubert) and [Wav2Vec 2.0 Large](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec). Our system is composed of three components, namely, *speech2unit*, *ulm* and *unit2speech*. We explain about models and usage of these components in their respective sub-directories. See the links below. + +## Speech to Unit Model (speech2unit) +Speech to unit model is used for quantizing raw speech into learned discrete speech units. [More details](speech2unit) + +## Unit Language Model (ulm) +Unit Language Model is a generative language model trained on discrete speech units. [More details](ulm) + +## Unit to Speech Model (unit2speech) +Unit to speech model is used for synthesizing speech from discrete speech units. [More details](unit2speech) + +## Metrics +We show how to compute ASR based metrics as well as zero-shot metrics proposed in our paper [here](metrics). + +## Tools +We share two tools to resynthesize a given spoken utterance, and generate novel spoken language given a spoken prompt. [More detail](tools) diff --git a/fairseq/examples/textless_nlp/gslm/metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/README.md new file mode 100644 index 0000000..0a63e2f --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/README.md @@ -0,0 +1,10 @@ +# GSLM Metrics + +## ASR Metrics +The suite of metrics here uses an ASR model to transcribe the synthesized speech into text, and then uses text-based metrics. We also use word error rate from ASR transcription itself as one of the metrics. [More details](asr_metrics) + +## ABX Metrics +We use [ABX](https://www.semanticscholar.org/paper/ABX-Discriminability-Measures-and-Applications-Schatz/13d3537228f728c1063cc83743cb118bba3367a0) to evaluate how well-separated phonetic categories are with quantized representations. [More details](abx_metrics) + +## sWUGGY and sBLIMP +We refer to [ZeroSpeech challenge](https://www.zerospeech.com/2021/track_s.html#scoring-based-metrics) for details on the sWUGGY and sBLIMP metrics. diff --git a/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md new file mode 100644 index 0000000..aa2560f --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md @@ -0,0 +1,77 @@ +# ABX-based evaluation + +ABX is used to evaluate the quality of the obtained discrete units. + +The life cycle of the ABX-based evaluation for the Speech-to-Unit contains the following steps: +1. Training an acoustic model (or use an existing acoustic model) ([description](./../..)) +2. Perform quantization of speech by learning a K-means clustering model ([description](./../..)) +3. Compute discrete features for ABX computation using the learned clusters +4. Compute the ABX score over the discrete features taking advantage of [libri-light's ABX evaluation script][ll-abx] + +Here we assume that you already went throught the first two steps and focus solely on extracting features and computing ABX scores. + +## Libri-light setup + +Follow [libri-light's instructions][ll-instructions] for installation and [ABX evaluation setup][ll-abx] (including the download of the data items required for ABX computation). + +## Computing ABX + +### Dumping quantized features + +The first step for the ABX computation is to dump the quantized representations corresponding to the test files. + +```shell +TYPE="hubert" +LAYER=6 +CKPT_PATH="<PATH_TO_HUBERT_MODEL_CHECKPOINT_FILE>" +KM_MODEL_PATH="<PATH_TO_PRETRAINED_KM_MODEL_FILE>" + +SUBSET="dev-clean" +MANIFEST="<PATH_TO_MANIFEST_FOR_LS_DEV-CLEAN>" +DATA_DIR="<PATH_TO_DIR_TO_STORE_FEATURES>/$SUBSET" + +PYTHONPATH=. python examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py \ + --feature_type $TYPE \ + --kmeans_model_path $KM_MODEL_PATH \ + --checkpoint_path $CKPT_PATH \ + --layer $LAYER \ + --manifest_path $MANIFEST \ + --out_dir_path $DATA_DIR \ + --extension ".flac" +``` + +Again the manifest file follows the same structure than elsewhere in the codebase. + +### Compute ABX with Libri-light + +Use libri-light's `eval_ABX.py` script (within the appropriate environment set up) as followed: + +```shell +LIBRILIGHT_ROOT="<PATH_TO_LIBRILIGHT>" + +SUBSET="dev-clean" +DATA_DIR="<PATH_TO_DIR_TO_STORE_FEATURES>/$SUBSET" +ITEM_FILE_PATH="$LIBRILIGHT_ROOT/eval/ABX_data/$SUBSET.item" +OUT_DIR="<PATH_TO_DIR_TO_STORE_ABX_SCORES>/$SUBSET" + +FILE_EXTENSION=".npy" +FEATURE_SIZE=0.02 # depends on the model used + +PYTHONPATH=$LIBRILIGHT_ROOT \ + python $LIBRILIGHT_ROOT/eval/eval_ABX.py \ + $DATA_DIR \ + $ITEM_FILE_PATH \ + --file_extension $FILE_EXTENSION \ + --feature_size $FEATURE_SIZE \ + --out $OUT_DIR \ + --mode "all" +``` + +Note that `FEATURE_SIZE` will depend on the model type you are using to extract the acoustic features: +* For HuBERT and Wav2Vec2.0, use `FEATURE_SIZE=0.02` +* For CPC and Log Mel, use `FEATURE_SIZE=0.01` + +If you have a gpu available, make sure you add the `--cuda` flag for faster computation. + +[ll-instructions]: https://github.com/facebookresearch/libri-light +[ll-abx]: https://github.com/facebookresearch/libri-light/tree/master/eval#abx diff --git a/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py new file mode 100644 index 0000000..41cf558 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os + +import joblib +import numpy as np + +from examples.textless_nlp.gslm.speech2unit.clustering.utils import get_audio_files +from examples.textless_nlp.gslm.speech2unit.pretrained.utils import get_features + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + +def get_parser(): + parser = argparse.ArgumentParser( + description="Quantize using K-means clustering over acoustic features." + ) + parser.add_argument( + "--feature_type", + type=str, + choices=["logmel", "hubert", "w2v2", "cpc"], + default=None, + required=True, + help="Acoustic feature type", + ) + parser.add_argument( + "--kmeans_model_path", + type=str, + required=True, + help="K-means model file path to use for inference", + ) + parser.add_argument( + "--manifest_path", + type=str, + default=None, + help="Manifest file containing the root dir and file names", + ) + parser.add_argument( + "--checkpoint_path", + type=str, + help="Pretrained model checkpoint", + ) + parser.add_argument( + "--layer", + type=int, + help="The layer of the pretrained model to extract features from", + default=-1, + ) + parser.add_argument( + "--out_dir_path", + required=True, + type=str, + help="File path of quantized output.", + ) + parser.add_argument( + "--extension", type=str, default=".flac", help="Features file path" + ) + return parser + + +def one_hot(feat, n_clusters): + return np.eye(n_clusters)[feat] + +def main(args, logger): + # Feature extraction + logger.info(f"Extracting {args.feature_type} acoustic features...") + features_batch = get_features( + feature_type=args.feature_type, + checkpoint_path=args.checkpoint_path, + layer=args.layer, + manifest_path=args.manifest_path, + sample_pct=1.0, + flatten=False, + ) + logger.info(f"Features extracted for {len(features_batch)} utterances.\n") + logger.info(f"Dimensionality of representation = {features_batch[0].shape[1]}") + + logger.info(f"Loading K-means model from {args.kmeans_model_path} ...") + kmeans_model = joblib.load(open(args.kmeans_model_path, "rb")) + kmeans_model.verbose = False + + _, fnames, _ = get_audio_files(args.manifest_path) + + os.makedirs(args.out_dir_path, exist_ok=True) + logger.info(f"Writing quantized features to {args.out_dir_path}") + for i, feats in enumerate(features_batch): + pred = kmeans_model.predict(feats) + emb = one_hot(pred, kmeans_model.n_clusters) + base_fname = os.path.basename(fnames[i]).rstrip(args.extension) + output_path = os.path.join(args.out_dir_path, f"{base_fname}.npy") + with open(output_path, "wb") as f: + np.save(f, emb) + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + main(args, logger) diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md new file mode 100644 index 0000000..90741f4 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md @@ -0,0 +1,87 @@ +# ASR-based evaluation + +Overall, the life cycle of the ASR-based evaluation for an ULM contains the following steps: + 1. Training an ULM and sampling from it [[description]](./../../ulm) + 2. Running UTS on the sampled unit sequences [[description]](./../../unit2speech) + 3. Pre-processing for the ASR (down-sampling to 16 KHz, aligning length of the generated audio with ground-truth utterances) + 4. Running ASR + 5. Calculation of the post-ASR evaluation metrics + +Here we assume that you have already went throught the first two steps and focus on the rest. + +## Preprocessing +### Down-sampling to 16KHz +The bulk conversion can be done by running +```bash + python $FAIRSEQ_ROOT/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py $UTS_OUTPUT $UTS_OUTPUT_DOWNSAMPLE + ``` + where `$UTS_OUTPUT` specifies the directory with the generated audio and `$UTS_OUTPUT_DOWNSAMPLE` is the directory where downsampled audio would be saved. + + ### Matching by length +This step is somewhat optional. However, if you want to compare the fluency and diversity of a generated speech utterance to that of the ground-truth speech with the same prefix, it is a good idea to force them to be of the same length. +```bash +python $FAIRSEQ_ROOT/examples/textless_nlp/asr_metrics/cut_as.py \ + --samples_dir=$UTS_OUTPUT_DOWNSAMPLE --out_dir=$UTS_OUTPUT_DOWNSAMPLE_CUT \ + --prompts_description=data/ground_truth_continuation_dev.json +``` + +Here `ground_truth_continuation_dev.json` is a json file with ground-truth text from LibriSpeech dev-clean, associated with some meta-data (assuming the evaluation is done on dev-clean). This file can be downloaded [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/ground_truth_continuation_dev.json). A similar file for the test-clean is [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/ground_truth_continuation_test.json). These files are used for the evaluation and contain texts for audio sequences that are at least 6s long. + +## Running ASR +We use a pre-trained wav2vec model to run the ASR step. We firstly need to prepare manifest files which, roughly, tell the ASR system which files we want to transcribe. You can find more details and download the `960h_scratch.pt` checkpoint +[[here]](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec/README.md)). To run ASR, you would also need to +install KenLM, Flashlight decoder, and download the KenLM 4-gram English language model. + +```bash + python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py \ + $UTS_OUTPUT_DOWNSAMPLE_CUT --valid-percent 0.0 --dest $MANIFEST_DIR --ext wav +``` +where `$UTS_OUTPUT_DOWNSAMPLE_CUT` speficies the directory with the preprocessed UTS outputs and `$MANIFEST_DIR` is the output directory. + +We will be running an out-of-the-box evaluation script which requires ground-truth transcripts to measure quality metrics. We are only +interested in the transcripts (and we don't have ground-truth outputs for when our ULM generated!), hence we will just generate +some dummy transcripts instead: +```bash +cp $FAIRSEQ_ROOT/examples/textless_nlp/gslm/asr_metrics/misc/dict.ltr.txt $MANIFEST_DIR +python $FAIRSEQ_ROOT/examples/textless_nlp/gslm/asr_metrics/misc/dummy_asr_data.py --tsv=$MANIFEST_DIR/train.tsv \ + --output-dir=$MANIFEST_DIR +``` + +Now we are ready for running ASR: +``` +mkdir -p asr +python $FAIRSEQ_ROOT/examples/speech_recognition/infer.py \ + $MANIFEST_DIR \ + --task audio_pretraining --nbest 1 --path 960h_scratch.pt \ + --gen-subset=train --results-path $PATH_TO_ASR_OUTPUT \ + --w2l-decoder kenlm --lm-model 4-gram.bin \ + --lexicon librispeech/lexicon_ltr.lst --word-score -1 \ + --sil-weight 0 --lm-weight 2 --criterion ctc --labels ltr --max-tokens 300000 --remove-bpe letter +``` +where `lexicon_ltr.lst` is the LibriSpeech lexicon and `$PATH_TO_ASR_OUTPUT` is the output directory (can be downloaded [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/lexicon_ltr.lst)). + +## Evaluation metrics +We run evaluation on the 1_000 shortest sequences that are at least 6s long. To filter those from the ASR transcript, we additionally provide each metric script with the paths to the manifest and `ground_truth_continuation_*` files. + +### Perplexity (PPX) +To get a PPX metric estimate on an ASR transcript, you need to run the following command: +```bash +python ppx.py $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt --cut-tail\ + --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json +``` +where `--cut-tail` tells the script to ignore the last token on each line (ASR puts the sequence ID there). + +### Self- and Auto-BLEU +```bash +python self_bleu.py $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt --cut-tail \ + --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json +``` + +### Continuation-BLEU +```bash +python continuation_eval.py --asr-transcript $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt \ + --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json +``` + +### AUC +Based on the metrics calculated above, we can estimate the AUC of the perplexity/diversity trade-off. We provide an illustration in a [Colab notebook](https://colab.research.google.com/drive/1pVPfOVax_PU3MkYdHRSsa-SI8GBUldNt?usp=sharing). diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py new file mode 100644 index 0000000..72b92a3 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py @@ -0,0 +1,99 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from collections import defaultdict +import numpy as np +from misc.bleu_utils import sentence_bleu +import json +import warnings + + +def get_args(): + import argparse + + parser = argparse.ArgumentParser("Tool to calculate Continuation-BLEU2") + parser.add_argument('--asr-transcript', type=str, + help='Path to the transcript file.') + parser.add_argument('--prompts-description', type=str, + help='Path to the ground-truth continuation') + parser.add_argument('--manifest', type=str, required=True) + parser.add_argument('--take-shortest', type=int, default=1000) + + args = parser.parse_args() + + return args + + +def main(): + # NLTK produces warnings + warnings.filterwarnings("ignore") + + args = get_args() + + with open(args.prompts_description, 'r') as fin: + original_continuations = json.loads(fin.read()) + + sequence2length = [(k, v[0]) for k, v in original_continuations.items()] + assert all(float(v) >= 6.0 for (_, v) in sequence2length) # 6 seconds + + sequence2length.sort(key=lambda x: x[1]) + to_take = set(v[0] for v in sequence2length[:args.take_shortest]) + + with open(args.manifest, 'r') as fin: + fin.readline() + + linenum2file = dict([ + (i, l.split("__")[0]) for (i, l) in enumerate(fin) + ]) + + max_files = max(linenum2file.keys()) + continuations = defaultdict(list) + + mean_length_after = 0 + n_examples = 0 + + with open(args.asr_transcript, 'r') as fin: + for line in fin: + n_examples += 1 + line = line.split() + sequence_id = int(line[-1].split('-')[1][:-1]) + + assert sequence_id <= max_files + + sequence_name = linenum2file[sequence_id] + + continuations[sequence_name].append(line[:-1]) + mean_length_after += len(line) + + mean_length_after /= n_examples + print(f'Mean length of continuations, in words: {mean_length_after}') + metric_values = [] + + mean_ground_truth_words = 0 + n_examples = 0 + n_candidates = 0 + + for k, candidates in continuations.items(): + if k not in to_take: + continue + + n_examples += 1 + + ground_truth = original_continuations[k][1].split() + n_candidates += len(candidates) + bleu = sentence_bleu(candidates, ground_truth, weights=( + 0.5, 0.5), no_length_penalty=True, averaging_mode="geometric") + mean_ground_truth_words += len(ground_truth) + + metric_values.append(bleu) + + n = len(metric_values) + print( + f'Median BLEU over {n} examples: {np.median(metric_values)} +- {np.std(metric_values) / np.sqrt(n)}') + + +if __name__ == '__main__': + main() diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py new file mode 100644 index 0000000..75cc527 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py @@ -0,0 +1,166 @@ +""" + +TODO: the code is take from Apache-2 Licensed NLTK: make sure we do this properly! + + +Copied over from nltk.tranlate.bleu_score. This code has two major changes: + - allows to turn off length/brevity penalty --- it has no sense for self-bleu, + - allows to use arithmetic instead of geometric mean +""" + +import math +import sys +from fractions import Fraction +import warnings +from collections import Counter +from nltk.translate.bleu_score import modified_precision, closest_ref_length, brevity_penalty, SmoothingFunction + + +def corpus_bleu( + list_of_references, + hypotheses, + weights=(0.25, 0.25, 0.25, 0.25), + smoothing_function=None, + auto_reweigh=False, + averaging_mode="geometric", + no_length_penalty=False +): + """ + Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all + the hypotheses and their respective references. + + Instead of averaging the sentence level BLEU scores (i.e. marco-average + precision), the original BLEU metric (Papineni et al. 2002) accounts for + the micro-average precision (i.e. summing the numerators and denominators + for each hypothesis-reference(s) pairs before the division). + + >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + ... 'ensures', 'that', 'the', 'military', 'always', + ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] + >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + ... 'ensures', 'that', 'the', 'military', 'will', 'forever', + ... 'heed', 'Party', 'commands'] + >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', + ... 'guarantees', 'the', 'military', 'forces', 'always', + ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] + >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + ... 'army', 'always', 'to', 'heed', 'the', 'directions', + ... 'of', 'the', 'party'] + + >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', + ... 'interested', 'in', 'world', 'history'] + >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', + ... 'because', 'he', 'read', 'the', 'book'] + + >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] + >>> hypotheses = [hyp1, hyp2] + >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS + 0.5920... + + The example below show that corpus_bleu() is different from averaging + sentence_bleu() for hypotheses + + >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1) + >>> score2 = sentence_bleu([ref2a], hyp2) + >>> (score1 + score2) / 2 # doctest: +ELLIPSIS + 0.6223... + + :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses + :type list_of_references: list(list(list(str))) + :param hypotheses: a list of hypothesis sentences + :type hypotheses: list(list(str)) + :param weights: weights for unigrams, bigrams, trigrams and so on + :type weights: list(float) + :param smoothing_function: + :type smoothing_function: SmoothingFunction + :param auto_reweigh: Option to re-normalize the weights uniformly. + :type auto_reweigh: bool + :return: The corpus-level BLEU score. + :rtype: float + """ + # Before proceeding to compute BLEU, perform sanity checks. + + p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. + p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. + hyp_lengths, ref_lengths = 0, 0 + + assert len(list_of_references) == len(hypotheses), ( + "The number of hypotheses and their reference(s) should be the " "same " + ) + + # Iterate through each hypothesis and their corresponding references. + for references, hypothesis in zip(list_of_references, hypotheses): + # For each order of ngram, calculate the numerator and + # denominator for the corpus-level modified precision. + for i, _ in enumerate(weights, start=1): + p_i = modified_precision(references, hypothesis, i) + p_numerators[i] += p_i.numerator + p_denominators[i] += p_i.denominator + + # Calculate the hypothesis length and the closest reference length. + # Adds them to the corpus-level hypothesis and reference counts. + hyp_len = len(hypothesis) + hyp_lengths += hyp_len + ref_lengths += closest_ref_length(references, hyp_len) + + # Calculate corpus-level brevity penalty. + if no_length_penalty and averaging_mode == 'geometric': + bp = 1.0 + elif no_length_penalty and averaging_mode == 'arithmetic': + bp = 0.0 + else: + assert not no_length_penalty + assert averaging_mode != 'arithmetic', 'Not sure how to apply length penalty when aurithmetic mode' + bp = brevity_penalty(ref_lengths, hyp_lengths) + + # Uniformly re-weighting based on maximum hypothesis lengths if largest + # order of n-grams < 4 and weights is set at default. + if auto_reweigh: + if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25): + weights = (1 / hyp_lengths,) * hyp_lengths + + # Collects the various precision values for the different ngram orders. + p_n = [ + Fraction(p_numerators[i], p_denominators[i], _normalize=False) + for i, _ in enumerate(weights, start=1) + ] + + # Returns 0 if there's no matching n-grams + # We only need to check for p_numerators[1] == 0, since if there's + # no unigrams, there won't be any higher order ngrams. + if p_numerators[1] == 0: + return 0 + + # If there's no smoothing, set use method0 from SmoothinFunction class. + if not smoothing_function: + smoothing_function = SmoothingFunction().method0 + # Smoothen the modified precision. + # Note: smoothing_function() may convert values into floats; + # it tries to retain the Fraction object as much as the + # smoothing method allows. + p_n = smoothing_function( + p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths + ) + + if averaging_mode == "geometric": + s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n)) + s = bp * math.exp(math.fsum(s)) + elif averaging_mode == "arithmetic": + s = (w_i * p_i for w_i, p_i in zip(weights, p_n)) + s = math.fsum(s) + + return s + + +def sentence_bleu( + references, + hypothesis, + weights=(0.25, 0.25, 0.25, 0.25), + smoothing_function=None, + auto_reweigh=False, + averaging_mode="geometric", + no_length_penalty=False +): + return corpus_bleu( + [references], [hypothesis], weights, smoothing_function, auto_reweigh, averaging_mode, no_length_penalty + ) \ No newline at end of file diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py new file mode 100644 index 0000000..5b7e1e9 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import torchaudio +import argparse +import json +import pathlib + + +def get_args(): + parser = argparse.ArgumentParser( + "Assuring generated audio have the same length as ground-truth audio") + parser.add_argument('--samples_dir', required=True, type=str) + parser.add_argument('--out_dir', required=True, type=str) + parser.add_argument('--prompts_description', required=True, type=str) + return parser.parse_args() + + +def cut(src, tgt, l): + x, sr = torchaudio.load(str(src)) + assert sr == 16_000 + + x = x.squeeze() + target_frames = int(l * sr) + + flag = 0 + if target_frames <= x.size(0): + x = x[:target_frames] + flag = 1 + else: + flag = 0 + torchaudio.save(str(tgt), x.unsqueeze(0), sr) + return flag + + +def main(): + args = get_args() + tgt_dir = pathlib.Path(args.out_dir) + tgt_dir.mkdir(exist_ok=True, parents=True) + + total_files, sufficiently_long = 0, 0 + + with open(args.prompts_description, 'r') as f: + description = json.loads(f.read()) + + for src_f in pathlib.Path(args.samples_dir).glob('*.wav'): + name_prompt = src_f.with_suffix('').name.split('__')[0] + + assert name_prompt in description, f'Cannot find {name_prompt}!' + + target_length = description[name_prompt][0] + tgt_f = tgt_dir / (src_f.name) + + is_long_enough = cut(src_f, tgt_f, target_length) + sufficiently_long += is_long_enough + if not is_long_enough: + print(f'{src_f} is not long enough') + + total_files += 1 + + print( + f'Total files: {total_files}; sufficiently long: {sufficiently_long}') + + +if __name__ == '__main__': + main() diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt new file mode 100644 index 0000000..69929e1 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt @@ -0,0 +1,28 @@ +| 94802 +E 51860 +T 38431 +A 33152 +O 31495 +N 28855 +I 28794 +H 27187 +S 26071 +R 23546 +D 18289 +L 16308 +U 12400 +M 10685 +W 10317 +C 9844 +F 9062 +G 8924 +Y 8226 +P 6890 +B 6339 +V 3936 +K 3456 +' 1023 +X 636 +J 598 +Q 437 +Z 213 diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py new file mode 100644 index 0000000..d6a40e4 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py @@ -0,0 +1,122 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import torch +import numpy as np +import warnings + + +def get_target_sequences(manifest, ground_truth, to_take=1000): + import json + import pathlib + + with open(ground_truth, 'r') as fin: + original_continuations = json.loads(fin.read()) + + sequence2length = [(k, v[0]) for k, v in original_continuations.items()] + assert all(float(v) >= 6.0 for (_, v) in sequence2length) # 6 seconds + + sequence2length.sort(key=lambda x: x[1]) + to_take_sequences = set(v[0] for v in sequence2length[:to_take]) + to_take_ids = [] + + with open(manifest, 'r') as f: + f.readline() + + for i, line in enumerate(f.readlines()): + seq_id = line.split()[0] + seq_id = pathlib.Path(seq_id).name.split('__')[0] + + if seq_id in to_take_sequences: + to_take_ids.append(i) + + print(f'Took {len(to_take_ids)} ids') + return set(to_take_ids) + + +def get_args(): + import argparse + + parser = argparse.ArgumentParser("Evaluate PPX metric of a transcript.") + parser.add_argument('--asr-transcript', type=str, + help='Path to the transcript file.') + parser.add_argument('--cut-id', action='store_true', + help='Whether cut the first token (typically a seq id)') + parser.add_argument('--cut-tail', action='store_true', + help='Whether cut the last token (typically a speaker id)') + + parser.add_argument('--manifest', type=str, default=None) + parser.add_argument('--prompts-description', type=str, default=None) + + args = parser.parse_args() + + return args + + +def main(): + args = get_args() + + lm = torch.hub.load( + 'pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe') + + lm.eval().cuda() # disable dropout + + if args.manifest is None and args.prompts_description is None: + target_ids = None + else: + target_ids = get_target_sequences( + args.manifest, args.prompts_description) + + with open(args.asr_transcript, 'r') as fin: + lines = fin.readlines() + + if target_ids is not None: + filtered = [] + for line in lines: + line_id = line.split()[-1] + line_id = int(line_id.split('-')[1][:-1]) + if line_id in target_ids: + filtered.append(line) + lines = filtered + else: + pass + + if args.cut_id: + lines = [' '.join(x.split()[1:]) for x in lines] + if args.cut_tail: + lines = [' '.join(x.split()[:-1]) for x in lines] + lines = [x.strip().lower() for x in lines] + + def get_logprob(sent): return \ + lm.score(sent)['positional_scores'].mean().neg().item() + + logprobs = [get_logprob(l) for l in lines] + + filtered = [x for x in logprobs if not np.isnan(x)] + if len(filtered) != len(logprobs): + warnings.warn("NaNs detected!") + logprobs = filtered + + perplexities = [np.exp(l) for l in logprobs] + + for name, stats in [('logprob', logprobs), ('perplexity', perplexities)]: + mean = np.mean(stats) + sem = np.std(stats) / np.sqrt(len(stats)) + + median = np.median(stats) + interval = list(np.percentile(stats, [10, 90])) + + mean, sem, median, percentile10, percentile90 = [ + round(x, 2) for x in [mean, sem, median] + interval] + + print(name) + print(f"\tMean {mean} +- {sem}") + print( + f"\tMedian {median}, 90% confidence interval {percentile10}...{percentile90}") + + +if __name__ == '__main__': + main() diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py new file mode 100644 index 0000000..062bb82 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py @@ -0,0 +1,201 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import nltk +from misc.bleu_utils import sentence_bleu +import warnings + + +def get_target_sequences(manifest, ground_truth, to_take=1000): + import json + import pathlib + + with open(ground_truth, 'r') as fin: + original_continuations = json.loads(fin.read()) + + sequence2length = [(k, v[0]) for k, v in original_continuations.items()] + assert all(float(v) >= 6.0 for (_, v) in sequence2length) # 6 seconds + + sequence2length.sort(key=lambda x: x[1]) + to_take_sequences = set(v[0] for v in sequence2length[:to_take]) + to_take_ids = [] + + with open(manifest, 'r') as f: + f.readline() + + for i, line in enumerate(f.readlines()): + seq_id = line.split()[0] + seq_id = pathlib.Path(seq_id).name.split('__')[0] + + if seq_id in to_take_sequences: + to_take_ids.append(i) + + print(f'Took {len(to_take_ids)} ids') + return set(to_take_ids) + + +def get_args(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--asr-transcript', type=str, + help='Path to the transcript file.') + + parser.add_argument('--manifest', required=True) + parser.add_argument('--prompts-description', required=True) + + parser.add_argument('--cut-id', action='store_true', + help='Whether cut the first token (typically a seq id)') + parser.add_argument('--cut-tail', action='store_true', + help='Whether cut the last token (typically a speaker id)') + parser.add_argument('--debug', action='store_true') + + args = parser.parse_args() + + return args + + +def get_self_bleu(utterances, averaging_mode, weights): + self_bleu = [] + + for i in range(len(utterances)): + hypo = utterances[i] + rest = utterances[:i] + utterances[i+1:] + + self_bleu.append(sentence_bleu(rest, hypo, weights, + no_length_penalty=True, averaging_mode=averaging_mode)) + + return self_bleu + + +def get_self_bleu2_arithmetic(utterances): + weights = (0.5, 0.5) # equal weight for unigrams and bigrams + return get_self_bleu(utterances, averaging_mode='arithmetic', weights=weights) + + +def get_self_bleu2_geometric(utterances): + weights = (0.5, 0.5) + return get_self_bleu(utterances, averaging_mode='geometric', weights=weights) + + +def get_auto_bleu2_arithmetic(utterances): + weights = (0.5, 0.5) + return [auto_bleu(u, mean_mode='arithmetic', weights=weights) for u in utterances] + + +def get_auto_bleu2_geometric(utterances): + weights = (0.5, 0.5) + return [auto_bleu(u, mean_mode='geometric', weights=weights) for u in utterances] + + +def get_auto_bleu3_geometric(utterances): + weights = (1./3, 1./3, 1./3) + return [auto_bleu(u, mean_mode='geometric', weights=weights) for u in utterances] + + +def get_auto_bleu3_arithmetic(utterances): + weights = (1./3, 1./3, 1./3) + return [auto_bleu(u, mean_mode='arithmetic', weights=weights) for u in utterances] + + +def get_self_bleu3_arithmetic(utterances): + weights = (1./3, 1./3, 1./3) + return get_self_bleu(utterances, averaging_mode='arithmetic', weights=weights) + + +def get_self_bleu3_geometric(utterances): + weights = (1./3, 1./3, 1./3) + return get_self_bleu(utterances, averaging_mode='geometric', weights=weights) + + +def auto_bleu(sentence, weights, mean_mode='arithmetic'): + if len(sentence) <= 1: + return 0 + + N = len(weights) + + bleu_n = np.zeros([N]) + for n in range(N): + targ_ngrams = list(nltk.ngrams(sentence, n+1)) + for p in range(len(targ_ngrams)): + left = sentence[:p] + right = sentence[(p+n+1):] + rest_ngrams = list(nltk.ngrams(left, n+1)) + \ + list(nltk.ngrams(right, n+1)) + # compute the nb of matching ngrams + bleu_n[n] += targ_ngrams[p] in rest_ngrams + bleu_n[n] /= len(targ_ngrams) # average them to get a proportion + + weights = np.array(weights) + if mean_mode == 'arithmetic': + return (bleu_n * weights).sum() + elif mean_mode == 'geometric': + return (bleu_n ** weights).prod() + else: + raise ValueError(f'Unknown agggregation mode {mean_mode}') + + +def main(): + from multiprocessing import Pool + + args = get_args() + target_ids = get_target_sequences(args.manifest, args.prompts_description) + + with open(args.asr_transcript, 'r') as fin: + lines = fin.readlines() + + terms = [x.strip().split() for x in lines] + filtered = [] + for term in terms: + line_id = int(term[-1].split('-')[1][:-1]) + if line_id in target_ids: + filtered.append(term) + terms = filtered + + if args.cut_id: + terms = [x[1:] for x in terms] + if args.cut_tail: + terms = [x[:-1] for x in terms] + + if args.debug: + terms = terms[:10] + + tasks = [ + ('Self-BLEU2-arithmetic', get_self_bleu2_arithmetic), + ('Self-BLEU2-geometric', get_self_bleu2_geometric), + ('Auto-BLEU2-arithmetic', get_auto_bleu2_arithmetic), + ('Auto-BLEU2-geometric', get_auto_bleu2_geometric), + + ('Self-BLEU3-arithmetic', get_self_bleu3_arithmetic), + ('Self-BLEU3-geometric', get_self_bleu3_geometric), + ('Auto-BLEU3-arithmetic', get_auto_bleu3_arithmetic), + ('Auto-BLEU3-geometric', get_auto_bleu3_geometric), + ] + + n_processes = min(16, len(tasks)) + with Pool(n_processes) as pool: + metrics = pool.map(run_f, [(t[1], terms) for t in tasks]) + + for (metric_name, _), metric in zip(tasks, metrics): + metric, sem = np.mean(metric), np.std(metric) / np.sqrt(len(metric)) + + metric, sem = [ + round(100 * x, 2) for x in [metric, sem] + ] + + print(f'{metric_name} {metric} +- {sem}') + + +def run_f(task_params): + f, terms = task_params + return f(terms) + + +if __name__ == '__main__': + # NLTK produces warnings + warnings.filterwarnings("ignore") + + main() diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/README.md b/fairseq/examples/textless_nlp/gslm/speech2unit/README.md new file mode 100644 index 0000000..9dff9d3 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/README.md @@ -0,0 +1,68 @@ +# Speech to Unit Model (speech2unit) + +## Acoustic Model +For quantizing speech we learn a K-means clustering over acoustic representations for which we either use Log-Mel Filterbank or pretrained acoustic representation models. For using pretrained models, please download from their respective locations linked below. +* [Modified CPC](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/cpc_big_ll6kh_top_ctc.pt) +* [HuBERT-Base](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt) +* [Wav2Vec 2.0-Base](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_new.pt) + +## Quantization Model +You can download pretrained quantized model from the list below. + +K-Means Model | Download Link +|-|- +Log Mel Filterbank + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km50/km.bin) +Log Mel Filterbank + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km100/km.bin) +Log Mel Filterbank + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km200/km.bin) +Modified CPC + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km50/km.bin) +Modified CPC + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km100/km.bin) +Modified CPC + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km200/km.bin) +HuBERT Base + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km50/km.bin) +HuBERT Base + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km100/km.bin) +HuBERT Base + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km200/km.bin) +wav2vec 2.0 Large + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km50/km.bin) +wav2vec 2.0 Large + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km100/km.bin) +wav2vec 2.0 Large + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km200/km.bin) + +### Quantization +For quantizing speech with a given acoustic representation, please follow the steps below. +1. Learn K-means clustering model +``` +N_CLUSTERS=<number_of_clusters_used_for_kmeans> +TYPE=<one_of_logmel/cpc/hubert/w2v2> +CKPT_PATH=<path_of_pretrained_acoustic_model> +LAYER=<layer_of_acoustic_model_to_extract_features_from> +MANIFEST=<tab_separated_manifest_of_audio_files_for_training_kmeans> +KM_MODEL_PATH=<output_path_of_the_kmeans_model> + +PYTHONPATH=. python examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py \ + --num_clusters $N_CLUSTERS \ + --feature_type $TYPE \ + --checkpoint_path $CKPT_PATH \ + --layer $LAYER \ + --manifest_path $MANIFEST \ + --out_kmeans_model_path $KM_MODEL_PATH +``` +2. Quantize using the learned clusters +``` +MANIFEST=<tab_separated_manifest_of_audio_files_to_quantize> +OUT_QUANTIZED_FILE=<output_quantized_audio_file_path> + +python examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py \ + --feature_type $TYPE \ + --kmeans_model_path $KM_MODEL_PATH \ + --acoustic_model_path $CKPT_PATH \ + --layer $LAYER \ + --manifest_path $MANIFEST \ + --out_quantized_file_path $OUT_QUANTIZED_FILE \ + --extension ".flac" +``` + +Note about the manifest file is a file with paths and length of input audio files. The format of the file is as follows: +``` +<path_of_root_directory_containing_audio_files> +<relative_path_of_audio_file_1>\t<number_of_frames_1> +<relative_path_of_audio_file_2>\t<number_of_frames_1> +... +``` + diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/__init__.py b/fairseq/examples/textless_nlp/gslm/speech2unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/__init__.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py new file mode 100644 index 0000000..7cf844a --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py @@ -0,0 +1,212 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +import time + +import numpy as np +from sklearn.cluster import MiniBatchKMeans + +import joblib +from examples.textless_nlp.gslm.speech2unit.pretrained.utils import ( + get_and_dump_features, + get_features, +) + + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Learn K-means clustering over acoustic features." + ) + + # Features arguments + parser.add_argument( + "--in_features_path", type=str, default=None, help="Features file path" + ) + parser.add_argument( + "--feature_type", + type=str, + choices=["logmel", "hubert", "w2v2", "cpc"], + default=None, + help="Acoustic feature type", + ) + parser.add_argument( + "--manifest_path", + type=str, + default=None, + help="Manifest file containing the root dir and file names", + ) + parser.add_argument( + "--out_features_path", + type=str, + default=None, + help="Features file path to write to", + ) + parser.add_argument( + "--checkpoint_path", + type=str, + help="Pretrained acoustic model checkpoint", + ) + parser.add_argument( + "--layer", + type=int, + help="The layer of the pretrained model to extract features from", + default=-1, + ) + parser.add_argument( + "--sample_pct", + type=float, + help="Percent data to use for K-means training", + default=0.1, + ) + + # K-means arguments + parser.add_argument( + "--num_clusters", type=int, help="Nubmer of clusters", default=50 + ) + parser.add_argument("--init", default="k-means++") + parser.add_argument( + "--max_iter", + type=int, + help="Maximum number of iterations for K-means training", + default=150, + ) + parser.add_argument( + "--batch_size", + type=int, + help="Batch size for K-means training", + default=10000, + ) + parser.add_argument("--tol", default=0.0, type=float) + parser.add_argument("--max_no_improvement", default=100, type=int) + parser.add_argument("--n_init", default=20, type=int) + parser.add_argument("--reassignment_ratio", default=0.5, type=float) + parser.add_argument( + "--out_kmeans_model_path", + type=str, + required=True, + help="Path to save K-means model", + ) + + # Leftovers + parser.add_argument( + "--seed", + type=int, + help="Random seed to use for K-means training", + default=1369, + ) + + return parser + + +def get_kmeans_model( + n_clusters, + init, + max_iter, + batch_size, + tol, + max_no_improvement, + n_init, + reassignment_ratio, + random_state, +): + return MiniBatchKMeans( + n_clusters=n_clusters, + init=init, + max_iter=max_iter, + batch_size=batch_size, + tol=tol, + max_no_improvement=max_no_improvement, + n_init=n_init, + reassignment_ratio=reassignment_ratio, + random_state=random_state, + verbose=1, + compute_labels=True, + init_size=None, + ) + + +def train_kmeans(kmeans_model, features_batch): + start_time = time.time() + kmeans_model.fit(features_batch) + time_taken = round((time.time() - start_time) // 60, 2) + return kmeans_model, time_taken + + +def main(args, logger): + # Features loading/extraction for K-means + if args.in_features_path: + # Feature loading + logger.info(f"Loading features from {args.in_features_path}...") + features_batch = np.load(args.in_features_path, allow_pickle=True) + else: + # Feature extraction + logger.info(f"Extracting {args.feature_type} acoustic features...") + features_batch = ( + get_features( + feature_type=args.feature_type, + checkpoint_path=args.checkpoint_path, + layer=args.layer, + manifest_path=args.manifest_path, + sample_pct=args.sample_pct, + flatten=True, + ) + if not args.out_features_path + else get_and_dump_features( + feature_type=args.feature_type, + checkpoint_path=args.checkpoint_path, + layer=args.layer, + manifest_path=args.manifest_path, + sample_pct=args.sample_pct, + flatten=True, + out_features_path=args.out_features_path, + ) + ) + if args.out_features_path: + logger.info( + f"Saved extracted features at {args.out_features_path}" + ) + logger.info(f"Features shape = {features_batch.shape}\n") + + # Learn and save K-means model + kmeans_model = get_kmeans_model( + n_clusters=args.num_clusters, + init=args.init, + max_iter=args.max_iter, + batch_size=args.batch_size, + tol=args.tol, + max_no_improvement=args.max_no_improvement, + n_init=args.n_init, + reassignment_ratio=args.reassignment_ratio, + random_state=args.seed, + ) + logger.info("Starting k-means training...") + kmeans_model, time_taken = train_kmeans( + kmeans_model=kmeans_model, features_batch=features_batch + ) + logger.info(f"...done k-means training in {time_taken} minutes") + inertia = -kmeans_model.score(features_batch) / len(features_batch) + logger.info(f"Total intertia: {round(inertia, 2)}\n") + + logger.info(f"Saving k-means model to {args.out_kmeans_model_path}") + os.makedirs(os.path.dirname(args.out_kmeans_model_path), exist_ok=True) + joblib.dump(kmeans_model, open(args.out_kmeans_model_path, "wb")) + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + main(args, logger) diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py new file mode 100644 index 0000000..031567c --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py @@ -0,0 +1,91 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging + +from examples.textless_nlp.gslm.speech2unit.pretrained.utils import ( + get_and_dump_features, +) + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Compute and dump log mel fbank features." + ) + parser.add_argument( + "--feature_type", + type=str, + choices=["logmel", "hubert", "w2v2", "cpc"], + default=None, + help="Acoustic feature type", + ) + parser.add_argument( + "--manifest_path", + type=str, + default=None, + help="Manifest file containing the root dir and file names", + ) + parser.add_argument( + "--out_features_path", + type=str, + default=None, + help="Features file path to write to", + ) + parser.add_argument( + "--checkpoint_path", + type=str, + help="Pretrained acoustic model checkpoint", + ) + parser.add_argument( + "--layer", + type=int, + help="The layer of the pretrained model to extract features from", + default=-1, + ) + parser.add_argument( + "--sample_pct", + type=float, + help="Percent data to use for K-means training", + default=0.1, + ) + parser.add_argument( + "--out_features_path", + type=str, + help="Path to save log mel fbank features", + ) + return parser + + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + + +if __name__ == "__main__": + """ + Example command: + python ~/speechbot/clustering/dump_logmelfank_feats.py \ + --manifest_path /checkpoint/kushall/data/LJSpeech-1.1/asr_input_wavs_16k/train.tsv + --out_features_path /checkpoint/kushall/experiments/speechbot/logmelfbank/features/ljspeech/train.npy + """ + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + + logger.info(f"Extracting {args.feature_type} acoustic features...") + get_and_dump_features( + feature_type=args.feature_type, + checkpoint_path=args.checkpoint_path, + layer=args.layer, + manifest_path=args.manifest_path, + sample_pct=args.sample_pct, + flatten=True, + out_features_path=args.out_features_path, + ) + logger.info(f"Saved extracted features at {args.out_features_path}") diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py new file mode 100644 index 0000000..dd95105 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py @@ -0,0 +1,141 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os + +import numpy as np + +import joblib +from examples.textless_nlp.gslm.speech2unit.clustering.utils import ( + get_audio_files, +) +from examples.textless_nlp.gslm.speech2unit.pretrained.utils import ( + get_features, +) + + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Quantize using K-means clustering over acoustic features." + ) + parser.add_argument( + "--feature_type", + type=str, + choices=["logmel", "hubert", "w2v2", "cpc"], + default=None, + required=True, + help="Acoustic feature type", + ) + parser.add_argument( + "--acoustic_model_path", + type=str, + help="Pretrained acoustic model checkpoint" + ) + parser.add_argument( + "--layer", + type=int, + help="The layer of the pretrained model to extract features from", + default=-1, + ) + parser.add_argument( + "--kmeans_model_path", + type=str, + required=True, + help="K-means model file path to use for inference", + ) + parser.add_argument( + "--features_path", + type=str, + default=None, + help="Features file path. You don't need to enter acoustic model details if you have dumped features", + ) + parser.add_argument( + "--manifest_path", + type=str, + default=None, + help="Manifest file containing the root dir and file names", + ) + parser.add_argument( + "--out_quantized_file_path", + required=True, + type=str, + help="File path of quantized output.", + ) + parser.add_argument( + "--extension", type=str, default=".flac", help="Features file path" + ) + parser.add_argument( + "--channel_id", + choices=['1', '2'], + help="The audio channel to extract the units in case of stereo file.", + default=None, + ) + parser.add_argument( + "--hide-fname", action='store_true', + help="Hide file names in the output file." + ) + return parser + + +def main(args, logger): + # Feature extraction + if args.features_path is not None: + logger.info(f"Loading acoustic features from {args.features_path}...") + features_batch = np.load(args.features_path) + else: + logger.info(f"Extracting {args.feature_type} acoustic features...") + features_batch = get_features( + feature_type=args.feature_type, + checkpoint_path=args.acoustic_model_path, + layer=args.layer, + manifest_path=args.manifest_path, + sample_pct=1.0, + flatten=False, + channel_id=int(args.channel_id) if args.channel_id else None, + ) + logger.info( + f"Features extracted for {len(features_batch)} utterances.\n" + ) + logger.info( + f"Dimensionality of representation = {features_batch[0].shape[1]}" + ) + + # K-means model + logger.info(f"Loading K-means model from {args.kmeans_model_path} ...") + kmeans_model = joblib.load(open(args.kmeans_model_path, "rb")) + kmeans_model.verbose = False + + _, fnames, _ = get_audio_files(args.manifest_path) + + os.makedirs(os.path.dirname(args.out_quantized_file_path), exist_ok=True) + print(f"Writing quantized predictions to {args.out_quantized_file_path}") + with open(args.out_quantized_file_path, "w") as fout: + for i, feats in enumerate(features_batch): + pred = kmeans_model.predict(feats) + pred_str = " ".join(str(p) for p in pred) + base_fname = os.path.basename(fnames[i]).rstrip('.'+args.extension.lstrip('.')) + if args.channel_id is not None: + base_fname = base_fname+f'-channel{args.channel_id}' + if not args.hide_fname: + fout.write(f"{base_fname}|{pred_str}\n") + else: + fout.write(f"{pred_str}\n") + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + main(args, logger) diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py new file mode 100644 index 0000000..cf08d1f --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Tuple + + +def get_audio_files(manifest_path: str) -> Tuple[str, List[str], List[int]]: + fnames, sizes = [], [] + with open(manifest_path, "r") as f: + root_dir = f.readline().strip() + for line in f: + items = line.strip().split("\t") + assert ( + len(items) == 2 + ), f"File must have two columns separated by tab. Got {line}" + fnames.append(items[0]) + sizes.append(int(items[1])) + return root_dir, fnames, sizes diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py new file mode 100644 index 0000000..2ea3890 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py @@ -0,0 +1,204 @@ +import soundfile as sf +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CpcFeatureReader: + """ + Wrapper class to run inference on CPC model. + Helps extract features for a given audio file. + """ + + def __init__( + self, + checkpoint_path, + layer, + use_encoder_layer=False, + norm_features=False, + sample_rate=16000, + max_chunk=64000, + use_cuda=True, + ): + self.model = load_cpc_model(checkpoint_path, layer).eval() + self.sample_rate = sample_rate + self.max_chunk = max_chunk + self.norm_features = norm_features + self.use_encoder_layer = use_encoder_layer + self.use_cuda = use_cuda + if self.use_cuda: + self.model.cuda() + + def read_audio(self, path, ref_len=None, channel_id=None): + wav, sr = sf.read(path) + if channel_id is not None: + assert wav.ndim == 2, \ + f"Expected stereo input when channel_id is given ({path})" + assert channel_id in [1, 2], \ + "channel_id is expected to be in [1, 2]" + wav = wav[:, channel_id-1] + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + assert sr == self.sample_rate, sr + if ref_len is not None and abs(ref_len - len(wav)) > 160: + print(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + def get_feats(self, file_path, ref_len=None, channel_id=None): + x = self.read_audio(file_path, ref_len, channel_id) + # Inspired from CPC_audio feature_loader.py + with torch.no_grad(): + x = torch.from_numpy(x).float() + if self.use_cuda: + x = x.cuda() + x = x.view(1, 1, -1) + size = x.size(2) + feat = [] + start = 0 + while start < size: + if start + self.max_chunk > size: + break + x_chunk = x[..., start : start + self.max_chunk] + feat_chunk = self.model.extract_features( + source=x_chunk, + get_encoded=self.use_encoder_layer, + norm_output=self.norm_features, + ) + feat.append(feat_chunk) + start += self.max_chunk + + if start < size: + x_chunk = x[:, -self.max_chunk :] + feat_chunk = self.model.extract_features( + source=x_chunk, + get_encoded=self.use_encoder_layer, + norm_output=self.norm_features, + ) + df = x_chunk.size(2) // feat_chunk.size(1) + delta = (size - start) // df + feat.append(feat_chunk[:, -delta:]) + return torch.cat(feat, 1).squeeze(0) + + +def load_cpc_model(checkpoint_path, layer=None): + state_dict = torch.load(checkpoint_path) + weights = state_dict["weights"] + config = state_dict["config"] + if layer is not None: + config["nLevelsGRU"] = layer + + encoder = CPCEncoder(config["hiddenEncoder"]) + ar_net = CPCAR( + config["hiddenEncoder"], config["hiddenGar"], False, config["nLevelsGRU"] + ) + + model = CPCModel(encoder, ar_net) + model.load_state_dict(weights, strict=False) + model.config = config + + return model + + +class ChannelNorm(nn.Module): + def __init__(self, num_features, epsilon=1e-05, affine=True): + super(ChannelNorm, self).__init__() + if affine: + self.weight = nn.parameter.Parameter(torch.Tensor(1, num_features, 1)) + self.bias = nn.parameter.Parameter(torch.Tensor(1, num_features, 1)) + else: + self.weight = None + self.bias = None + self.epsilon = epsilon + self.p = 0 + self.affine = affine + self.reset_parameters() + + def reset_parameters(self): + if self.affine: + torch.nn.init.ones_(self.weight) + torch.nn.init.zeros_(self.bias) + + def forward(self, x): + cum_mean = x.mean(dim=1, keepdim=True) + cum_var = x.var(dim=1, keepdim=True) + x = (x - cum_mean) * torch.rsqrt(cum_var + self.epsilon) + if self.weight is not None: + x = x * self.weight + self.bias + return x + + +class CPCEncoder(nn.Module): + def __init__(self, hidden_dim=512): + super(CPCEncoder, self).__init__() + self.conv0 = nn.Conv1d(1, hidden_dim, 10, stride=5, padding=3) + self.batchNorm0 = ChannelNorm(hidden_dim) + self.conv1 = nn.Conv1d(hidden_dim, hidden_dim, 8, stride=4, padding=2) + self.batchNorm1 = ChannelNorm(hidden_dim) + self.conv2 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1) + self.batchNorm2 = ChannelNorm(hidden_dim) + self.conv3 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1) + self.batchNorm3 = ChannelNorm(hidden_dim) + self.conv4 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1) + self.batchNorm4 = ChannelNorm(hidden_dim) + self.DOWNSAMPLING = 160 + + def get_output_dim(self): + return self.conv4.out_channels + + def forward(self, x): + x = F.relu(self.batchNorm0(self.conv0(x))) + x = F.relu(self.batchNorm1(self.conv1(x))) + x = F.relu(self.batchNorm2(self.conv2(x))) + x = F.relu(self.batchNorm3(self.conv3(x))) + x = F.relu(self.batchNorm4(self.conv4(x))) + return x + + +class CPCAR(nn.Module): + def __init__(self, dim_encoded, dim_output, keep_hidden, num_layers): + super(CPCAR, self).__init__() + self.baseNet = nn.LSTM( + dim_encoded, dim_output, num_layers=num_layers, batch_first=True + ) + self.hidden = None + self.keep_hidden = keep_hidden + + def get_output_dim(self): + return self.baseNet.hidden_size + + def forward(self, x): + try: + self.baseNet.flatten_parameters() + except RuntimeError: + pass + x, h = self.baseNet(x, self.hidden) + if self.keep_hidden: + if isinstance(h, tuple): + self.hidden = tuple(x.detach() for x in h) + else: + self.hidden = h.detach() + return x + + +class CPCModel(nn.Module): + def __init__(self, encoder, ar_net): + super(CPCModel, self).__init__() + self.gEncoder = encoder + self.gAR = ar_net + self.config = None + + def forward(self, x, label): + encoded = self.gEncoder(x).permute(0, 2, 1) + cpc_feature = self.gAR(encoded) + return cpc_feature, encoded, label + + def extract_features(self, source, get_encoded=False, norm_output=False): + cpc_feature, encoded, _ = self.forward(source, None) + if get_encoded: + cpc_feature = encoded + if norm_output: + mean = cpc_feature.mean(dim=1, keepdim=True) + var = cpc_feature.var(dim=1, keepdim=True) + cpc_feature = (cpc_feature - mean) / torch.sqrt(var + 1e-08) + return cpc_feature diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py new file mode 100644 index 0000000..4fef859 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import fairseq +import soundfile as sf +import torch.nn.functional as F + + +class HubertFeatureReader: + """ + Wrapper class to run inference on HuBERT model. + Helps extract features for a given audio file. + """ + + def __init__(self, checkpoint_path, layer, max_chunk=1600000, use_cuda=True): + ( + model, + cfg, + task, + ) = fairseq.checkpoint_utils.load_model_ensemble_and_task( + [checkpoint_path] + ) + self.model = model[0].eval() + self.task = task + self.layer = layer + self.max_chunk = max_chunk + self.use_cuda = use_cuda + if self.use_cuda: + self.model.cuda() + + def read_audio(self, path, ref_len=None, channel_id=None): + wav, sr = sf.read(path) + if channel_id is not None: + assert wav.ndim == 2, \ + f"Expected stereo input when channel_id is given ({path})" + assert channel_id in [1, 2], \ + "channel_id is expected to be in [1, 2]" + wav = wav[:, channel_id-1] + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + assert sr == self.task.cfg.sample_rate, sr + if ref_len is not None and abs(ref_len - len(wav)) > 160: + print(f"ref {ref_len} != read {len(wav)} ({path})") + return wav + + def get_feats(self, file_path, ref_len=None, channel_id=None): + x = self.read_audio(file_path, ref_len, channel_id) + with torch.no_grad(): + x = torch.from_numpy(x).float() + if self.use_cuda: + x = x.cuda() + if self.task.cfg.normalize: + x = F.layer_norm(x, x.shape) + x = x.view(1, -1) + + feat = [] + for start in range(0, x.size(1), self.max_chunk): + x_chunk = x[:, start: start + self.max_chunk] + feat_chunk, _ = self.model.extract_features( + source=x_chunk, + padding_mask=None, + mask=False, + output_layer=self.layer, + ) + feat.append(feat_chunk) + return torch.cat(feat, 1).squeeze(0) diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py new file mode 100644 index 0000000..5879da7 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py @@ -0,0 +1,34 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import soundfile as sf +import torch +import torchaudio.compliance.kaldi as kaldi + + +class LogMelFeatureReader: + """ + Wrapper class to run inference on HuBERT model. + Helps extract features for a given audio file. + """ + + def __init__(self, *args, **kwargs): + self.num_mel_bins = kwargs.get("num_mel_bins", 80) + self.frame_length = kwargs.get("frame_length", 25.0) + + def get_feats(self, file_path, channel_id=None): + wav, sr = sf.read(file_path) + if channel_id is not None: + assert wav.ndim == 2, \ + f"Expected stereo input when channel_id is given ({file_path})" + wav = wav[:, channel_id-1] + feats = torch.from_numpy(wav).float() + feats = kaldi.fbank( + feats.unsqueeze(0), + num_mel_bins=self.num_mel_bins, + frame_length=self.frame_length, + sample_frequency=sr, + ) + return feats diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py new file mode 100644 index 0000000..2eca68e --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py @@ -0,0 +1,127 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import gc +import os +import random +import shutil +import numpy as np + +import torch +import tqdm +from examples.textless_nlp.gslm.speech2unit.pretrained.cpc_feature_reader import ( + CpcFeatureReader, +) +from examples.textless_nlp.gslm.speech2unit.pretrained.hubert_feature_reader import ( + HubertFeatureReader, +) +from examples.textless_nlp.gslm.speech2unit.pretrained.logmel_feature_reader import ( + LogMelFeatureReader, +) +from examples.textless_nlp.gslm.speech2unit.pretrained.w2v2_feature_reader import ( + Wav2VecFeatureReader, +) + + +def get_feature_reader(feature_type): + if feature_type == "logmel": + return LogMelFeatureReader + elif feature_type == "hubert": + return HubertFeatureReader + elif feature_type == "w2v2": + return Wav2VecFeatureReader + elif feature_type == "cpc": + return CpcFeatureReader + else: + raise NotImplementedError(f"{feature_type} is not supported.") + + +def get_feature_iterator( + feature_type, checkpoint_path, layer, manifest_path, sample_pct, channel_id +): + feature_reader_cls = get_feature_reader(feature_type) + with open(manifest_path, "r") as fp: + lines = fp.read().split("\n") + root = lines.pop(0).strip() + file_path_list = [ + os.path.join(root, line.split("\t")[0]) + for line in lines + if len(line) > 0 + ] + if sample_pct < 1.0: + file_path_list = random.sample( + file_path_list, int(sample_pct * len(file_path_list)) + ) + num_files = len(file_path_list) + reader = feature_reader_cls( + checkpoint_path=checkpoint_path, layer=layer + ) + + def iterate(): + for file_path in file_path_list: + feats = reader.get_feats(file_path, channel_id=channel_id) + yield feats.cpu().numpy() + + return iterate, num_files + + +def get_features( + feature_type, checkpoint_path, layer, manifest_path, sample_pct, flatten, channel_id +): + generator, num_files = get_feature_iterator( + feature_type=feature_type, + checkpoint_path=checkpoint_path, + layer=layer, + manifest_path=manifest_path, + sample_pct=sample_pct, + channel_id=channel_id + ) + iterator = generator() + + features_list = [] + for features in tqdm.tqdm(iterator, total=num_files): + features_list.append(features) + + # Explicit clean up + del iterator + del generator + gc.collect() + torch.cuda.empty_cache() + + if flatten: + return np.concatenate(features_list) + + return features_list + + +def get_and_dump_features( + feature_type, + checkpoint_path, + layer, + manifest_path, + sample_pct, + flatten, + out_features_path, +): + # Feature extraction + features_batch = get_features( + feature_type=feature_type, + checkpoint_path=checkpoint_path, + layer=layer, + manifest_path=manifest_path, + sample_pct=sample_pct, + flatten=flatten, + ) + + # Save features + out_dir_path = os.path.dirname(out_features_path) + os.makedirs(out_dir_path, exist_ok=True) + shutil.copyfile( + manifest_path, + os.path.join(out_dir_path, os.path.basename(manifest_path)), + ) + np.save(out_features_path, features_batch) + + return features_batch diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py new file mode 100644 index 0000000..9f9da6c --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py @@ -0,0 +1,56 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import fairseq +import soundfile as sf + + +class Wav2VecFeatureReader: + """ + Wrapper class to run inference on Wav2Vec 2.0 model. + Helps extract features for a given audio file. + """ + + def __init__(self, checkpoint_path, layer, use_cuda=True): + state = fairseq.checkpoint_utils.load_checkpoint_to_cpu( + checkpoint_path + ) + + w2v_args = state["args"] + self.task = fairseq.tasks.setup_task(w2v_args) + model = self.task.build_model(w2v_args) + model.load_state_dict(state["model"], strict=True) + model.eval() + self.model = model + self.layer = layer + self.use_cuda = use_cuda + if self.use_cuda: + self.model.cuda() + + def read_audio(self, fname, channel_id=None): + wav, sr = sf.read(fname) + if channel_id is not None: + assert wav.ndim == 2, \ + f"Expected stereo input when channel_id is given ({fname})" + assert channel_id in [1, 2], \ + "channel_id is expected to be in [1, 2]" + wav = wav[:, channel_id-1] + if wav.ndim == 2: + wav = wav.mean(-1) + assert wav.ndim == 1, wav.ndim + assert sr == self.task.cfg.sample_rate, sr + return wav + + def get_feats(self, file_path, channel_id=None): + x = self.read_audio(file_path, channel_id) + with torch.no_grad(): + source = torch.from_numpy(x).view(1, -1).float() + if self.use_cuda: + source = source.cuda() + res = self.model( + source=source, mask=False, features_only=True, layer=self.layer + ) + return res["layer_results"][self.layer][0].squeeze(1) diff --git a/fairseq/examples/textless_nlp/gslm/tools/README.md b/fairseq/examples/textless_nlp/gslm/tools/README.md new file mode 100644 index 0000000..3858348 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/tools/README.md @@ -0,0 +1,25 @@ +# GSLM Tools + +## Resynthesis +You can use the command line tool below to input an audio file and get the resynthesized audio. This tool implements the unsupervised method for resynthesis described in the paper. The way to invoke the command line tool is shown below. +``` +FAIRSEQ_ROOT=<path_to_your_fairseq_repo_root> +TYPE=<one_of_logmel/cpc/hubert/w2v2> +ACOUSTIC_MODEL_PATH=<path_of_pretrained_acoustic_model> +LAYER=<layer_of_acoustic_model_to_extract_features_from> +KM_MODEL_PATH=<output_path_of_the_kmeans_model> +TTS_MODEL_PATH=<unit2speech_model_file_path> +# A text file containing the codes, one per line +CODE_DICT_PATH=<unit2speech_code_dict_path> +WAVEGLOW_PATH=<path_where_you_have_downloaded_waveglow_checkpoint> + +PYTHONPATH=${FAIRSEQ_ROOT}:${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech python ${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/tools/resynthesize_speech.py \ + --feature_type $TYPE \ + --acoustic_model_path $ACOUSTIC_MODEL_PATH \ + --layer $LAYER \ + --kmeans_model_path $KM_MODEL_PATH \ + --tts_model_path $TTS_MODEL_PATH \ + --code_dict_path $CODE_DICT_PATH \ + --waveglow_path $WAVEGLOW_PATH \ + --max_decoder_steps 2000 +``` \ No newline at end of file diff --git a/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py b/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py new file mode 100644 index 0000000..3098772 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py @@ -0,0 +1,132 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import gc +import logging +import os + +import joblib +import soundfile as sf +import torch +from examples.textless_nlp.gslm.speech2unit.pretrained.utils import get_feature_reader +from examples.textless_nlp.gslm.unit2speech.tts_data import TacotronInputDataset +from examples.textless_nlp.gslm.unit2speech.utils import ( + load_tacotron, + load_waveglow, + synthesize_audio, +) + + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + + +def get_parser(): + parser = argparse.ArgumentParser(description="GSLM U2S tool") + parser.add_argument( + "--feature_type", + type=str, + choices=["logmel", "hubert", "w2v2", "cpc"], + default=None, + required=True, + help="Acoustic feature type", + ) + parser.add_argument( + "--acoustic_model_path", + type=str, + help="Pretrained acoustic model checkpoint", + ) + parser.add_argument("--layer", type=int, help="Layer of acoustic model") + parser.add_argument( + "--kmeans_model_path", + type=str, + required=True, + help="K-means model file path to use for inference", + ) + parser.add_argument( + "--tts_model_path", + type=str, + help="TTS model file path to use for inference", + ) + parser.add_argument( + "--code_dict_path", + type=str, + help="Code dict file path to use for inference", + ) + parser.add_argument( + "--waveglow_path", + type=str, + help="Waveglow (vocoder) model file path to use for inference", + ) + parser.add_argument("--max_decoder_steps", type=int, default=2000) + parser.add_argument("--denoiser_strength", type=float, default=0.1) + return parser + + +################################################ +def main(args, logger): + # Acoustic Model + logger.info(f"Loading acoustic model from {args.tts_model_path}...") + feature_reader_cls = get_feature_reader(args.feature_type) + reader = feature_reader_cls( + checkpoint_path=args.acoustic_model_path, layer=args.layer + ) + + # K-means Model + logger.info(f"Loading K-means model from {args.kmeans_model_path} ...") + kmeans_model = joblib.load(open(args.kmeans_model_path, "rb")) + kmeans_model.verbose = False + + # TTS Model + logger.info(f"Loading TTS model from {args.tts_model_path}...") + tacotron_model, sample_rate, hparams = load_tacotron( + tacotron_model_path=args.tts_model_path, + max_decoder_steps=args.max_decoder_steps, + ) + + # Waveglow Model + logger.info(f"Loading Waveglow model from {args.waveglow_path}...") + waveglow, denoiser = load_waveglow(waveglow_path=args.waveglow_path) + + # Dataset + if not os.path.exists(hparams.code_dict): + hparams.code_dict = args.code_dict_path + tts_dataset = TacotronInputDataset(hparams) + + iters = 0 + while True: + in_file_path = input("Input: Enter the full file path of audio file...\n") + out_file_path = input("Output: Enter the full file path of audio file...\n") + feats = reader.get_feats(in_file_path).cpu().numpy() + iters += 1 + if iters == 1000: + gc.collect() + torch.cuda.empty_cache() + + quantized_units = kmeans_model.predict(feats) + quantized_units_str = " ".join(map(str, quantized_units)) + + tts_input = tts_dataset.get_tensor(quantized_units_str) + mel, aud, aud_dn, has_eos = synthesize_audio( + tacotron_model, + waveglow, + denoiser, + tts_input.unsqueeze(0), + strength=args.denoiser_strength, + ) + sf.write(f"{out_file_path}", aud_dn[0].cpu().float().numpy(), sample_rate) + logger.info("Resynthesis done!\n") + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + main(args, logger) diff --git a/fairseq/examples/textless_nlp/gslm/ulm/README.md b/fairseq/examples/textless_nlp/gslm/ulm/README.md new file mode 100644 index 0000000..0145912 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/ulm/README.md @@ -0,0 +1,72 @@ +# Unit Language Model (ULM) + +Here you can find links to the pre-trained ULMs and instructions on training new models using fairseq. At the end of the page, we also share how to run sampling for those models and provide pointers to the transcribed prompts we used. + +## Pre-trained models + +Using the links below, you can download pre-trained models for various unit types and vocabulary sizes: + +| | 50 | 100 | 200 +|-|-|-|- +| LogMel Filterbank | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km50/logmel50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km100/logmel100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km200/logmel200_lm.tgz) +| Modified CPC | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km50/cpc50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km100/cpc100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km200/cpc200_lm.tgz) +| HuBERT | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km50/hubert50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km100/hubert100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km200/hubert200_lm.tgz) +| Wav2Vec 2.0 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km50/w2v2_50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km100/w2v2_100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km200/w2v2_200_lm.tgz) + + +## Preprocessing data +Assuming that unit-transcribed train, valid, and test sets are located in `data/train.txt`, `data/valid.txt`, and `data/test.txt`, respectively, +we run the following command to get a preprocessed version of the datast in `data-bin`: + +```bash +fairseq-preprocess --only-source \ + --trainpref data/train.txt --validpref data/valid.txt --testpref data/test.txt \ + --destdir data-bin/ --workers 40 +``` +As a result, the `data-bin` directory should appear. + +## Fitting a Unit Language Model (ULM) +As an ULM, we train a standard fairseq Transformer LM. Assuming 8 GPUs used for training, a good starting point for an ULM training would be: +```bash + fairseq-train data-bin/ \ + --task=language_modeling \ + --arch=transformer_lm_big \ + --share-decoder-input-output-embed \ + --dropout=0.1 \ + --attention-dropout=0.1 \ + --optimizer=adam \ + --adam-betas='(0.9, 0.98)' \ + --clip-norm=1.0 \ + --lr=0.0005 \ + --lr-scheduler=inverse_sqrt \ + --warmup-updates=4000 \ + --warmup-init-lr=1e-07 \ + --tokens-per-sample=3072 \ + --update-freq=16 \ + --max-tokens=4096 \ + --num-workers=4 \ + --skip-invalid-size-inputs-valid-test \ + --max-update=500000 \ + --log-interval=10 \ + --seed=100501 \ + --fp16 \ + --sample-break-mode=eos +``` +This command will train a Transformer-large model (12 layers). You can train other standard LM models provided by fairseq, e.g. specify `--arch=transformer_lm` to train a smaller (6-layer) Transformer model. When training with a different number of GPUs, it might be a good idea to adjust the `update-freq` parameter. To save the GPU memory at an expense of additional computation, it can be useful to enable activation checkpointing with `--checkpoint-activations`. + +## Sampling from an ULM +Once an ULM was trained, we can use it for generating new utterances. Suppose, that the prompts are given in a file named `prompts.txt`. Then we can sample continuations by running the following command: + +```bash + python sample.py data-bin/ \ + --path=checkpoints/checkpoint_best.pt --task=language_modeling --sampling --temperature=0.7 \ + --seed=1 --prompts=prompts.txt --output=samples.txt --max-len-a=0 --max-len-b=500 \ + --prefix-size=-1 --batch-size=16 --fp16 --samples-per-prompt=10 +``` +Here, `--prefix-size` controls the number of tokens that are used to prime the ULM. When set to a positive value, the sampling script will take first `prefix-size` tokens to prompt the ULM; with `0` it runs unconditional sampling and with `-1` the entire prompt is used. +`--samples-per-prompt` specifies how many utterances are generated with every prompt which can be useful when generating multiple prompt continuations. In this command, `--max-len-a` and `--max-len-b` control the number of generated tokens. + +When using a pretrained model from above, `data-bin` should point to the unpacked directory (with `dict.txt` file). + +Evaluation-time, to generate prompts, we used utterances from LibriSpeech dev-clean and test-clean that are longer than 6s. We took first 3s from an utterance as a prompt. Unit transcripts of those prompts can be downloaded here: [[dev]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/dev_prompts.tgz) [[test]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/test_prompts.tgz) + diff --git a/fairseq/examples/textless_nlp/gslm/ulm/sample.py b/fairseq/examples/textless_nlp/gslm/ulm/sample.py new file mode 100644 index 0000000..77302a6 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/ulm/sample.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Sample from a trained LM; hacked fairseq-interactive +""" +from collections import namedtuple +import os +import ast +import numpy as np + +from fairseq import checkpoint_utils, options, tasks, utils + +import tqdm + +Batch = namedtuple('Batch', 'ids src_tokens src_lengths') +Translation = namedtuple('Translation', 'src_str hypos pos_scores alignments') + + +def make_batches(lines, args, task, max_positions): + tokens = [ + task.source_dictionary.encode_line( + src_str, add_if_not_exist=False + ).long() + for src_str in lines + ] + lengths = [t.numel() for t in tokens] + itr = task.get_batch_iterator( + dataset=task.build_dataset_for_inference(tokens, lengths), + max_tokens=args.dataset.max_tokens, + max_sentences=args.dataset.batch_size, + max_positions=max_positions, + ignore_invalid_inputs=args.dataset.skip_invalid_size_inputs_valid_test + ).next_epoch_itr(shuffle=False) + for batch in itr: + yield Batch( + ids=batch['id'], + src_tokens=batch['net_input']['src_tokens'], src_lengths=batch['net_input']['src_lengths'], + ) + + +def main(args): + arg_prompts = args.prompts + arg_output = args.output + arg_debug = args.debug + arg_sample_size = args.samples_per_prompt + + try: + from fairseq.dataclass.utils import convert_namespace_to_omegaconf + args = convert_namespace_to_omegaconf(args) + except: + pass + + # if args.max_tokens is None and args.max_sentences is None: + if args.common.seed is not None: + np.random.seed(args.common.seed) + utils.set_torch_seed(args.common.seed) + + if args.generation.sampling: + args.generation.nbest = args.generation.beam = arg_sample_size + + task = tasks.setup_task(args.task) + + overrides = ast.literal_eval(args.common_eval.model_overrides) + + models, _model_args = checkpoint_utils.load_model_ensemble( + args.common_eval.path.split(os.pathsep), + arg_overrides=overrides, + task=task, + suffix=getattr(args, "checkpoint_suffix", ""), + ) + + # Set dictionaries + src_dict = task.source_dictionary + tgt_dict = task.target_dictionary + + # Optimize ensemble for generation + for model in models: + model.prepare_for_inference_(args) + model.cuda() + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(args.generation.replace_unk) + + max_positions = utils.resolve_max_positions( + task.max_positions(), + *[model.max_positions() for model in models] + ) + + output_file = open(arg_output, 'w') + + with open(arg_prompts, 'r') as fin: + lines = fin.readlines() + + split = [x.split('|', 1) for x in lines] + seq_id = [x[0] for x in split] + prompts = [x[1] for x in split] + + if args.generation.prefix_size >= 0: + prompts = [' '.join(l.split()[:args.generation.prefix_size]) + for l in prompts] + + if arg_debug: + prompts = prompts[:10] + + generator = task.build_generator(models, args.generation) + + start_id = 0 + pbar = tqdm.tqdm(total=len(prompts)) + for batch in make_batches(prompts, args, task, max_positions): + src_tokens = batch.src_tokens + src_lengths = batch.src_lengths + src_tokens = src_tokens.cuda() + src_lengths = src_lengths.cuda() + + sample = { + 'net_input': { + 'src_tokens': src_tokens, + 'src_lengths': src_lengths, + }, + } + + results = [] + translations = task.inference_step(generator, models, sample) + for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): + src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) + results.append((i + start_id, src_tokens_i, hypos)) + + # sort output to match input order + for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]): + if src_dict is not None: + src_str = src_dict.string( + src_tokens, args.common_eval.post_process) + + # Process top predictions + for hypo_id, hypo in enumerate(hypos): + _hypo_tokens, hypo_str, _alignment = utils.post_process_prediction( + hypo_tokens=hypo['tokens'].int().cpu(), + src_str=src_str, + alignment=hypo['alignment'], + align_dict=align_dict, + tgt_dict=tgt_dict, + remove_bpe=args.common_eval.post_process, + ) + + detok_hypo_str = hypo_str + utterance = detok_hypo_str + print(f'{seq_id[id]}__{hypo_id}|{utterance}', file=output_file) + pbar.update(1) + start_id += len(results) + + # output_file.close() + + +def cli_main(): + parser = options.get_interactive_generation_parser() + parser.add_argument('--prompts', type=str, default=None, required=True) + parser.add_argument('--output', type=str, default=None, required=True) + parser.add_argument('--debug', action='store_true') + parser.add_argument('--samples-per-prompt', type=int, default=1) + + args = options.parse_args_and_arch(parser) + + np.random.seed(args.seed) + utils.set_torch_seed(args.seed) + + main(args) + + +if __name__ == '__main__': + cli_main() diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/README.md b/fairseq/examples/textless_nlp/gslm/unit2speech/README.md new file mode 100644 index 0000000..e616013 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/README.md @@ -0,0 +1,40 @@ +# Unit to Speech Model (unit2speech) + +Unit to speech model is modified Tacotron2 model that learns to synthesize speech from discrete speech units. All models are trained on quantized [LJSpeech](https://keithito.com/LJ-Speech-Dataset/). + +Upstream Units | Download Links | model md5 +|-|-|- +Log Mel Filterbank + KM50 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km50/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km50/code_dict) | 932b3b8527c0125f5f964b57762eba49 +Log Mel Filterbank + KM100 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km100/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km100/code_dict) | cde0b0d278a39011d0acbd5df27abdf4 +Log Mel Filterbank + KM200 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km200/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km200/code_dict) | dba0f1d4de64bc7976718834010b23e7 +Modified CPC + KM50 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km50/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km50/code_dict) | a585e8dd8890ea56164f17635dd8e613 +Modified CPC + KM100 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km100/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km100/code_dict) | 5c0ee2869b4f483d17f37f1a41a548e0 +Modified CPC + KM200 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km200/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km200/code_dict) | 2f0c9951cf37020d9464514bff48bc5d +HuBERT Base + KM50 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km50/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km50/code_dict) | 85ffce8baec5aa90035ab696fe676fce +HuBERT Base + KM100 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km100/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km100/code_dict) | df4a9c6ffd1bb00c91405432c234aba3 +HuBERT Base + KM200 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km200/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km200/code_dict) | ac72f2c0c563589819bec116c7f8d274 +wav2vec 2.0 Large + KM50 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km50/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km50/code_dict) | e3503d0ad822b2c24b89f68b857fedff +wav2vec 2.0 Large + KM100 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km100/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km100/code_dict) | eb3666e456ae4c96bf2a1eec825c13ed +wav2vec 2.0 Large + KM200 | [model](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km200/tts_checkpoint_best.pt) - [code_dict](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km200/code_dict) | 777d343e963c4d64f04d78eef032f4e8 + +## Run inference using a unit2speech model +* Install librosa, unidecode and inflect using `pip install librosa, unidecode, inflect` +* Download [Waveglow checkpoint](https://dl.fbaipublicfiles.com/textless_nlp/gslm/waveglow_256channels_new.pt). This is the vocoder. + +Sample commnd to run inference using trained unit2speech models. Please note that the quantized audio to synthesized should be using the same units as the unit2speech model was trained with. +``` +FAIRSEQ_ROOT=<path_to_your_fairseq_repo_root> +TTS_MODEL_PATH=<unit2speech_model_file_path> +QUANTIZED_UNIT_PATH=<quantized_audio_file_path> +OUT_DIR=<dir_to_dump_synthesized_audio_files> +WAVEGLOW_PATH=<path_where_you_have_downloaded_waveglow_checkpoint> +CODE_DICT_PATH=<unit2speech_code_dict_path> + +PYTHONPATH=${FAIRSEQ_ROOT}:${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech python ${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py \ + --tts_model_path $TTS_MODEL_PATH \ + --quantized_unit_path $QUANTIZED_UNIT_PATH \ + --out_audio_dir $OUT_DIR \ + --waveglow_path $WAVEGLOW_PATH \ + --code_dict_path $CODE_DICT_PATH \ + --max_decoder_steps 2000 +``` diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py b/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py new file mode 100644 index 0000000..2be848f --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py @@ -0,0 +1,56 @@ +import os +import shlex +import subprocess +import progressbar +from time import time +from pathlib import Path + +def find_all_files(path_dir, extension): + out = [] + for root, dirs, filenames in os.walk(path_dir): + for f in filenames: + if f.endswith(extension): + out.append(((str(Path(f).stem)), os.path.join(root, f))) + return out + +def convert16k(inputfile, outputfile16k): + command = ('sox -c 1 -b 16 {} -t wav {} rate 16k'.format(inputfile, outputfile16k)) + subprocess.call(shlex.split(command)) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='Convert to wav 16k audio using sox.') + parser.add_argument('input_dir', type=str, + help='Path to the input dir.') + parser.add_argument('output_dir', type=str, + help='Path to the output dir.') + parser.add_argument('--extension', type=str, default='wav', + help='Audio file extension in the input. Default: mp3') + args = parser.parse_args() + + # Find all sequences + print(f"Finding all audio files with extension '{args.extension}' from {args.input_dir}...") + audio_files = find_all_files(args.input_dir, args.extension) + print(f"Done! Found {len(audio_files)} files.") + + # Convert to relative path + audio_files = [os.path.relpath(file[-1], start=args.input_dir) for file in audio_files] + + # Create all the directories needed + rel_dirs_set = set([os.path.dirname(file) for file in audio_files]) + for rel_dir in rel_dirs_set: + Path(os.path.join(args.output_dir, rel_dir)).mkdir(parents=True, exist_ok=True) + + # Converting wavs files + print("Converting the audio to wav files...") + bar = progressbar.ProgressBar(maxval=len(audio_files)) + bar.start() + start_time = time() + for index, file in enumerate(audio_files): + bar.update(index) + input_file = os.path.join(args.input_dir, file) + output_file = os.path.join(args.output_dir, os.path.splitext(file)[0]+".wav") + convert16k(input_file, output_file) + bar.finish() + print(f"...done {len(audio_files)} files in {time()-start_time} seconds.") \ No newline at end of file diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py b/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py new file mode 100644 index 0000000..41fd437 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py @@ -0,0 +1,312 @@ +# ***************************************************************************** +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ***************************************************************************** +import copy +import torch +from torch.autograd import Variable +import torch.nn.functional as F + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a+input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +class WaveGlowLoss(torch.nn.Module): + def __init__(self, sigma=1.0): + super(WaveGlowLoss, self).__init__() + self.sigma = sigma + + def forward(self, model_output): + z, log_s_list, log_det_W_list = model_output + for i, log_s in enumerate(log_s_list): + if i == 0: + log_s_total = torch.sum(log_s) + log_det_W_total = log_det_W_list[i] + else: + log_s_total = log_s_total + torch.sum(log_s) + log_det_W_total += log_det_W_list[i] + + loss = torch.sum(z*z)/(2*self.sigma*self.sigma) - log_s_total - log_det_W_total + return loss/(z.size(0)*z.size(1)*z.size(2)) + + +class Invertible1x1Conv(torch.nn.Module): + """ + The layer outputs both the convolution, and the log determinant + of its weight matrix. If reverse=True it does convolution with + inverse + """ + def __init__(self, c): + super(Invertible1x1Conv, self).__init__() + self.conv = torch.nn.Conv1d(c, c, kernel_size=1, stride=1, padding=0, + bias=False) + + # Sample a random orthonormal matrix to initialize weights + _qr = torch.linalg.qr if torch.__version__ >= "1.8" else torch.qr + W = _qr(torch.FloatTensor(c, c).normal_())[0] + + # Ensure determinant is 1.0 not -1.0 + if torch.det(W) < 0: + W[:,0] = -1*W[:,0] + W = W.view(c, c, 1) + self.conv.weight.data = W + + def forward(self, z, reverse=False): + # shape + batch_size, group_size, n_of_groups = z.size() + + W = self.conv.weight.squeeze() + + if reverse: + if not hasattr(self, 'W_inverse'): + # Reverse computation + W_inverse = W.float().inverse() + W_inverse = Variable(W_inverse[..., None]) + if z.type() == 'torch.cuda.HalfTensor': + W_inverse = W_inverse.half() + self.W_inverse = W_inverse + z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0) + return z + else: + # Forward computation + log_det_W = batch_size * n_of_groups * torch.logdet(W) + z = self.conv(z) + return z, log_det_W + + +class WN(torch.nn.Module): + """ + This is the WaveNet like layer for the affine coupling. The primary difference + from WaveNet is the convolutions need not be causal. There is also no dilation + size reset. The dilation only doubles on each layer + """ + def __init__(self, n_in_channels, n_mel_channels, n_layers, n_channels, + kernel_size): + super(WN, self).__init__() + assert(kernel_size % 2 == 1) + assert(n_channels % 2 == 0) + self.n_layers = n_layers + self.n_channels = n_channels + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + + start = torch.nn.Conv1d(n_in_channels, n_channels, 1) + start = torch.nn.utils.weight_norm(start, name='weight') + self.start = start + + # Initializing last layer to 0 makes the affine coupling layers + # do nothing at first. This helps with training stability + end = torch.nn.Conv1d(n_channels, 2*n_in_channels, 1) + end.weight.data.zero_() + end.bias.data.zero_() + self.end = end + + cond_layer = torch.nn.Conv1d(n_mel_channels, 2*n_channels*n_layers, 1) + self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight') + + for i in range(n_layers): + dilation = 2 ** i + padding = int((kernel_size*dilation - dilation)/2) + in_layer = torch.nn.Conv1d(n_channels, 2*n_channels, kernel_size, + dilation=dilation, padding=padding) + in_layer = torch.nn.utils.weight_norm(in_layer, name='weight') + self.in_layers.append(in_layer) + + + # last one is not necessary + if i < n_layers - 1: + res_skip_channels = 2*n_channels + else: + res_skip_channels = n_channels + res_skip_layer = torch.nn.Conv1d(n_channels, res_skip_channels, 1) + res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight') + self.res_skip_layers.append(res_skip_layer) + + def forward(self, forward_input): + audio, spect = forward_input + audio = self.start(audio) + output = torch.zeros_like(audio) + n_channels_tensor = torch.IntTensor([self.n_channels]) + + spect = self.cond_layer(spect) + + for i in range(self.n_layers): + spect_offset = i*2*self.n_channels + acts = fused_add_tanh_sigmoid_multiply( + self.in_layers[i](audio), + spect[:,spect_offset:spect_offset+2*self.n_channels,:], + n_channels_tensor) + + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.n_layers - 1: + audio = audio + res_skip_acts[:,:self.n_channels,:] + output = output + res_skip_acts[:,self.n_channels:,:] + else: + output = output + res_skip_acts + + return self.end(output) + + +class WaveGlow(torch.nn.Module): + def __init__(self, n_mel_channels, n_flows, n_group, n_early_every, + n_early_size, WN_config): + super(WaveGlow, self).__init__() + + self.upsample = torch.nn.ConvTranspose1d(n_mel_channels, + n_mel_channels, + 1024, stride=256) + assert(n_group % 2 == 0) + self.n_flows = n_flows + self.n_group = n_group + self.n_early_every = n_early_every + self.n_early_size = n_early_size + self.WN = torch.nn.ModuleList() + self.convinv = torch.nn.ModuleList() + + n_half = int(n_group/2) + + # Set up layers with the right sizes based on how many dimensions + # have been output already + n_remaining_channels = n_group + for k in range(n_flows): + if k % self.n_early_every == 0 and k > 0: + n_half = n_half - int(self.n_early_size/2) + n_remaining_channels = n_remaining_channels - self.n_early_size + self.convinv.append(Invertible1x1Conv(n_remaining_channels)) + self.WN.append(WN(n_half, n_mel_channels*n_group, **WN_config)) + self.n_remaining_channels = n_remaining_channels # Useful during inference + + def forward(self, forward_input): + """ + forward_input[0] = mel_spectrogram: batch x n_mel_channels x frames + forward_input[1] = audio: batch x time + """ + spect, audio = forward_input + + # Upsample spectrogram to size of audio + spect = self.upsample(spect) + assert(spect.size(2) >= audio.size(1)) + if spect.size(2) > audio.size(1): + spect = spect[:, :, :audio.size(1)] + + spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3) + spect = spect.contiguous().view(spect.size(0), spect.size(1), -1).permute(0, 2, 1) + + audio = audio.unfold(1, self.n_group, self.n_group).permute(0, 2, 1) + output_audio = [] + log_s_list = [] + log_det_W_list = [] + + for k in range(self.n_flows): + if k % self.n_early_every == 0 and k > 0: + output_audio.append(audio[:,:self.n_early_size,:]) + audio = audio[:,self.n_early_size:,:] + + audio, log_det_W = self.convinv[k](audio) + log_det_W_list.append(log_det_W) + + n_half = int(audio.size(1)/2) + audio_0 = audio[:,:n_half,:] + audio_1 = audio[:,n_half:,:] + + output = self.WN[k]((audio_0, spect)) + log_s = output[:, n_half:, :] + b = output[:, :n_half, :] + audio_1 = torch.exp(log_s)*audio_1 + b + log_s_list.append(log_s) + + audio = torch.cat([audio_0, audio_1],1) + + output_audio.append(audio) + return torch.cat(output_audio,1), log_s_list, log_det_W_list + + def infer(self, spect, sigma=1.0): + spect = self.upsample(spect) + # trim conv artifacts. maybe pad spec to kernel multiple + time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0] + spect = spect[:, :, :-time_cutoff] + + spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3) + spect = spect.contiguous().view(spect.size(0), spect.size(1), -1).permute(0, 2, 1) + + if spect.type() == 'torch.cuda.HalfTensor': + audio = torch.cuda.HalfTensor(spect.size(0), + self.n_remaining_channels, + spect.size(2)).normal_() + else: + audio = torch.cuda.FloatTensor(spect.size(0), + self.n_remaining_channels, + spect.size(2)).normal_() + + audio = torch.autograd.Variable(sigma*audio) + + for k in reversed(range(self.n_flows)): + n_half = int(audio.size(1)/2) + audio_0 = audio[:,:n_half,:] + audio_1 = audio[:,n_half:,:] + + output = self.WN[k]((audio_0, spect)) + + s = output[:, n_half:, :] + b = output[:, :n_half, :] + audio_1 = (audio_1 - b)/torch.exp(s) + audio = torch.cat([audio_0, audio_1],1) + + audio = self.convinv[k](audio, reverse=True) + + if k % self.n_early_every == 0 and k > 0: + if spect.type() == 'torch.cuda.HalfTensor': + z = torch.cuda.HalfTensor(spect.size(0), self.n_early_size, spect.size(2)).normal_() + else: + z = torch.cuda.FloatTensor(spect.size(0), self.n_early_size, spect.size(2)).normal_() + audio = torch.cat((sigma*z, audio),1) + + audio = audio.permute(0,2,1).contiguous().view(audio.size(0), -1).data + return audio + + @staticmethod + def remove_weightnorm(model): + waveglow = model + for WN in waveglow.WN: + WN.start = torch.nn.utils.remove_weight_norm(WN.start) + WN.in_layers = remove(WN.in_layers) + WN.cond_layer = torch.nn.utils.remove_weight_norm(WN.cond_layer) + WN.res_skip_layers = remove(WN.res_skip_layers) + return waveglow + + +def remove(conv_list): + new_conv_list = torch.nn.ModuleList() + for old_conv in conv_list: + old_conv = torch.nn.utils.remove_weight_norm(old_conv) + new_conv_list.append(old_conv) + return new_conv_list diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py b/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py new file mode 100644 index 0000000..2a287a4 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py @@ -0,0 +1,27 @@ +import os +import time +import torch +import sys +import subprocess + +argslist = list(sys.argv)[1:] +log_dir = argslist[-1] +num_gpus = torch.cuda.device_count() +argslist.append('--n_gpus={}'.format(num_gpus)) +workers = [] +job_id = time.strftime("%Y_%m_%d-%H%M%S") +argslist.append("--group_name=group_{}".format(job_id)) + +print("GPU log directory is {}".format(log_dir)) +os.makedirs(log_dir, exist_ok=True) +for i in range(num_gpus): + argslist.append('--rank={}'.format(i)) + stdout = None if i == 0 else open("{}/{}_GPU_{}.log".format(log_dir, job_id, i), + "w") + print(argslist) + p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout) + workers.append(p) + argslist = argslist[:-1] + +for p in workers: + p.wait() diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py b/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py new file mode 100644 index 0000000..8073084 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py @@ -0,0 +1,105 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os + +import soundfile as sf +from examples.textless_nlp.gslm.unit2speech.tts_data import ( + TacotronInputDataset, +) +from examples.textless_nlp.gslm.unit2speech.utils import ( + load_quantized_audio_from_file, + load_tacotron, + load_waveglow, + synthesize_audio, +) + + +def get_logger(): + log_format = "[%(asctime)s] [%(levelname)s]: %(message)s" + logging.basicConfig(format=log_format, level=logging.INFO) + logger = logging.getLogger(__name__) + return logger + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Wav2Vec 2.0 speech generator." + ) + parser.add_argument( + "--quantized_unit_path", + type=str, + help="K-means model file path to use for inference", + ) + parser.add_argument( + "--tts_model_path", + type=str, + help="TTS model file path to use for inference", + ) + parser.add_argument( + "--waveglow_path", + type=str, + help="Path to the waveglow checkpoint (vocoder).", + ) + parser.add_argument( + "--code_dict_path", + type=str, + help="Code dict file path to use for inference", + ) + parser.add_argument("--max_decoder_steps", type=int, default=2000) + parser.add_argument("--denoiser_strength", type=float, default=0.1) + parser.add_argument( + "--out_audio_dir", + type=str, + help="Output directory to dump audio files", + ) + + return parser + + +def main(args, logger): + # Load quantized audio + logger.info(f"Loading quantized audio from {args.quantized_unit_path}...") + names_batch, quantized_units_batch = load_quantized_audio_from_file( + file_path=args.quantized_unit_path + ) + + logger.info(f"Loading TTS model from {args.tts_model_path}...") + tacotron_model, sample_rate, hparams = load_tacotron( + tacotron_model_path=args.tts_model_path, + max_decoder_steps=args.max_decoder_steps, + ) + + logger.info(f"Loading Waveglow model from {args.waveglow_path}...") + waveglow, denoiser = load_waveglow(waveglow_path=args.waveglow_path) + + if not os.path.exists(hparams.code_dict): + hparams.code_dict = args.code_dict_path + tts_dataset = TacotronInputDataset(hparams) + + for name, quantized_units in zip(names_batch, quantized_units_batch): + quantized_units_str = " ".join(map(str, quantized_units)) + tts_input = tts_dataset.get_tensor(quantized_units_str) + mel, aud, aud_dn, has_eos = synthesize_audio( + tacotron_model, + waveglow, + denoiser, + tts_input.unsqueeze(0), + strength=args.denoiser_strength, + ) + out_file_path = os.path.join(args.out_audio_dir, f"{name}.wav") + sf.write( + f"{out_file_path}", aud_dn[0].cpu().float().numpy(), sample_rate + ) + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + logger = get_logger() + logger.info(args) + main(args, logger) diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/__init__.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py new file mode 100644 index 0000000..b5af7f7 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py @@ -0,0 +1,93 @@ +import torch +import numpy as np +from scipy.signal import get_window +import librosa.util as librosa_util + + +def window_sumsquare(window, n_frames, hop_length=200, win_length=800, + n_fft=800, dtype=np.float32, norm=None): + """ + # from librosa 0.6 + Compute the sum-square envelope of a window function at a given hop length. + + This is used to estimate modulation effects induced by windowing + observations in short-time fourier transforms. + + Parameters + ---------- + window : string, tuple, number, callable, or list-like + Window specification, as in `get_window` + + n_frames : int > 0 + The number of analysis frames + + hop_length : int > 0 + The number of samples to advance between frames + + win_length : [optional] + The length of the window function. By default, this matches `n_fft`. + + n_fft : int > 0 + The length of each analysis frame. + + dtype : np.dtype + The data type of the output + + Returns + ------- + wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` + The sum-squared envelope of the window function + """ + if win_length is None: + win_length = n_fft + + n = n_fft + hop_length * (n_frames - 1) + x = np.zeros(n, dtype=dtype) + + # Compute the squared window at the desired length + win_sq = get_window(window, win_length, fftbins=True) + win_sq = librosa_util.normalize(win_sq, norm=norm)**2 + win_sq = librosa_util.pad_center(win_sq, n_fft) + + # Fill the envelope + for i in range(n_frames): + sample = i * hop_length + x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))] + return x + + +def griffin_lim(magnitudes, stft_fn, n_iters=30): + """ + PARAMS + ------ + magnitudes: spectrogram magnitudes + stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods + """ + + angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size()))) + angles = angles.astype(np.float32) + angles = torch.autograd.Variable(torch.from_numpy(angles)) + signal = stft_fn.inverse(magnitudes, angles).squeeze(1) + + for i in range(n_iters): + _, angles = stft_fn.transform(signal) + signal = stft_fn.inverse(magnitudes, angles).squeeze(1) + return signal + + +def dynamic_range_compression(x, C=1, clip_val=1e-5): + """ + PARAMS + ------ + C: compression factor + """ + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression(x, C=1): + """ + PARAMS + ------ + C: compression factor used to compress + """ + return torch.exp(x) / C diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py new file mode 100644 index 0000000..e2e35c1 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py @@ -0,0 +1,90 @@ +""" from https://github.com/keithito/tacotron """ + +''' +Cleaners are transformations that run over the input text at both training and eval time. + +Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" +hyperparameter. Some cleaners are English-specific. You'll typically want to use: + 1. "english_cleaners" for English text + 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using + the Unidecode library (https://pypi.python.org/pypi/Unidecode) + 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update + the symbols in symbols.py to match your data). +''' + +import re +from unidecode import unidecode +from .numbers import normalize_numbers + + +# Regular expression matching whitespace: +_whitespace_re = re.compile(r'\s+') + +# List of (regular expression, replacement) pairs for abbreviations: +_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [ + ('mrs', 'misess'), + ('mr', 'mister'), + ('dr', 'doctor'), + ('st', 'saint'), + ('co', 'company'), + ('jr', 'junior'), + ('maj', 'major'), + ('gen', 'general'), + ('drs', 'doctors'), + ('rev', 'reverend'), + ('lt', 'lieutenant'), + ('hon', 'honorable'), + ('sgt', 'sergeant'), + ('capt', 'captain'), + ('esq', 'esquire'), + ('ltd', 'limited'), + ('col', 'colonel'), + ('ft', 'fort'), +]] + + +def expand_abbreviations(text): + for regex, replacement in _abbreviations: + text = re.sub(regex, replacement, text) + return text + + +def expand_numbers(text): + return normalize_numbers(text) + + +def lowercase(text): + return text.lower() + + +def collapse_whitespace(text): + return re.sub(_whitespace_re, ' ', text) + + +def convert_to_ascii(text): + return unidecode(text) + + +def basic_cleaners(text): + '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def transliteration_cleaners(text): + '''Pipeline for non-English text that transliterates to ASCII.''' + text = convert_to_ascii(text) + text = lowercase(text) + text = collapse_whitespace(text) + return text + + +def english_cleaners(text): + '''Pipeline for English text, including number and abbreviation expansion.''' + text = convert_to_ascii(text) + text = lowercase(text) + text = expand_numbers(text) + text = expand_abbreviations(text) + text = collapse_whitespace(text) + return text diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py new file mode 100644 index 0000000..62bfef7 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py @@ -0,0 +1,65 @@ +""" from https://github.com/keithito/tacotron """ + +import re + + +valid_symbols = [ + 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2', + 'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2', + 'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY', + 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1', + 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0', + 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW', + 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH' +] + +_valid_symbol_set = set(valid_symbols) + + +class CMUDict: + '''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict''' + def __init__(self, file_or_path, keep_ambiguous=True): + if isinstance(file_or_path, str): + with open(file_or_path, encoding='latin-1') as f: + entries = _parse_cmudict(f) + else: + entries = _parse_cmudict(file_or_path) + if not keep_ambiguous: + entries = {word: pron for word, pron in entries.items() if len(pron) == 1} + self._entries = entries + + + def __len__(self): + return len(self._entries) + + + def lookup(self, word): + '''Returns list of ARPAbet pronunciations of the given word.''' + return self._entries.get(word.upper()) + + + +_alt_re = re.compile(r'\([0-9]+\)') + + +def _parse_cmudict(file): + cmudict = {} + for line in file: + if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"): + parts = line.split(' ') + word = re.sub(_alt_re, '', parts[0]) + pronunciation = _get_pronunciation(parts[1]) + if pronunciation: + if word in cmudict: + cmudict[word].append(pronunciation) + else: + cmudict[word] = [pronunciation] + return cmudict + + +def _get_pronunciation(s): + parts = s.strip().split(' ') + for part in parts: + if part not in _valid_symbol_set: + return None + return ' '.join(parts) diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py new file mode 100644 index 0000000..f10d557 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py @@ -0,0 +1,103 @@ +import torch +from librosa.filters import mel as librosa_mel_fn +from .audio_processing import dynamic_range_compression +from .audio_processing import dynamic_range_decompression +from .stft import STFT +from .utils import get_mask_from_lengths + + +class LinearNorm(torch.nn.Module): + def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): + super(LinearNorm, self).__init__() + self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) + + torch.nn.init.xavier_uniform_( + self.linear_layer.weight, + gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, x): + return self.linear_layer(x) + + +class ConvNorm(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, + padding=None, dilation=1, bias=True, w_init_gain='linear'): + super(ConvNorm, self).__init__() + if padding is None: + assert(kernel_size % 2 == 1) + padding = int(dilation * (kernel_size - 1) / 2) + + self.conv = torch.nn.Conv1d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, + bias=bias) + + torch.nn.init.xavier_uniform_( + self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain)) + + def forward(self, signal): + conv_signal = self.conv(signal) + return conv_signal + + +class GlobalAvgPool(torch.nn.Module): + def __init__(self): + super(GlobalAvgPool, self).__init__() + + def forward(self, x, lengths=None): + """Average pooling across time steps (dim=1) with optionally lengths. + Args: + x: torch.Tensor of shape (N, T, ...) + lengths: None or torch.Tensor of shape (N,) + dim: dimension to pool + """ + if lengths is None: + return x.mean(dim=1, keepdim=False) + else: + mask = get_mask_from_lengths(lengths).type(x.type()).to(x.device) + mask_shape = list(mask.size()) + [1 for _ in range(x.ndimension()-2)] + mask = mask.reshape(*mask_shape) + numer = (x * mask).sum(dim=1, keepdim=False) + denom = mask.sum(dim=1, keepdim=False) + return numer / denom + + +class TacotronSTFT(torch.nn.Module): + def __init__(self, filter_length=1024, hop_length=256, win_length=1024, + n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0, + mel_fmax=8000.0): + super(TacotronSTFT, self).__init__() + self.n_mel_channels = n_mel_channels + self.sampling_rate = sampling_rate + self.stft_fn = STFT(filter_length, hop_length, win_length) + mel_basis = librosa_mel_fn( + sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax) + mel_basis = torch.from_numpy(mel_basis).float() + self.register_buffer('mel_basis', mel_basis) + + def spectral_normalize(self, magnitudes): + output = dynamic_range_compression(magnitudes) + return output + + def spectral_de_normalize(self, magnitudes): + output = dynamic_range_decompression(magnitudes) + return output + + def mel_spectrogram(self, y): + """Computes mel-spectrograms from a batch of waves + PARAMS + ------ + y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1] + + RETURNS + ------- + mel_output: torch.FloatTensor of shape (B, n_mel_channels, T) + """ + assert(torch.min(y.data) >= -1) + assert(torch.max(y.data) <= 1) + + magnitudes, phases = self.stft_fn.transform(y) + magnitudes = magnitudes.data + mel_output = torch.matmul(self.mel_basis, magnitudes) + mel_output = self.spectral_normalize(mel_output) + return mel_output diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py new file mode 100644 index 0000000..ccf132b --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py @@ -0,0 +1,669 @@ +from math import sqrt +import torch +import torch.distributions as distr +from torch.autograd import Variable +from torch import nn +from torch.nn import functional as F +from .layers import ConvNorm, LinearNorm, GlobalAvgPool +from .utils import to_gpu, get_mask_from_lengths + + +class LocationLayer(nn.Module): + def __init__(self, attention_n_filters, attention_kernel_size, + attention_dim): + super(LocationLayer, self).__init__() + padding = int((attention_kernel_size - 1) / 2) + self.location_conv = ConvNorm(2, attention_n_filters, + kernel_size=attention_kernel_size, + padding=padding, bias=False, stride=1, + dilation=1) + self.location_dense = LinearNorm(attention_n_filters, attention_dim, + bias=False, w_init_gain='tanh') + + def forward(self, attention_weights_cat): + processed_attention = self.location_conv(attention_weights_cat) + processed_attention = processed_attention.transpose(1, 2) + processed_attention = self.location_dense(processed_attention) + return processed_attention + + +class Attention(nn.Module): + def __init__(self, attention_rnn_dim, embedding_dim, attention_dim, + attention_location_n_filters, attention_location_kernel_size): + super(Attention, self).__init__() + self.query_layer = LinearNorm(attention_rnn_dim, attention_dim, + bias=False, w_init_gain='tanh') + self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False, + w_init_gain='tanh') + self.v = LinearNorm(attention_dim, 1, bias=False) + self.location_layer = LocationLayer(attention_location_n_filters, + attention_location_kernel_size, + attention_dim) + self.score_mask_value = -float("inf") + + def get_alignment_energies(self, query, processed_memory, + attention_weights_cat): + """ + PARAMS + ------ + query: decoder output (batch, n_mel_channels * n_frames_per_step) + processed_memory: processed encoder outputs (B, T_in, attention_dim) + attention_weights_cat: cumulative and prev. att weights (B, 2, max_time) + + RETURNS + ------- + alignment (batch, max_time) + """ + + processed_query = self.query_layer(query.unsqueeze(1)) + processed_attention_weights = self.location_layer(attention_weights_cat) + energies = self.v(torch.tanh( + processed_query + processed_attention_weights + processed_memory)) + + energies = energies.squeeze(-1) + return energies + + def forward(self, attention_hidden_state, memory, processed_memory, + attention_weights_cat, mask): + """ + PARAMS + ------ + attention_hidden_state: attention rnn last output + memory: encoder outputs + processed_memory: processed encoder outputs + attention_weights_cat: previous and cummulative attention weights + mask: binary mask for padded data + """ + alignment = self.get_alignment_energies( + attention_hidden_state, processed_memory, attention_weights_cat) + + if mask is not None: + alignment.data.masked_fill_(mask, self.score_mask_value) + + attention_weights = F.softmax(alignment, dim=1) + attention_context = torch.bmm(attention_weights.unsqueeze(1), memory) + attention_context = attention_context.squeeze(1) + + return attention_context, attention_weights + + +class Prenet(nn.Module): + def __init__(self, in_dim, sizes): + super(Prenet, self).__init__() + in_sizes = [in_dim] + sizes[:-1] + self.layers = nn.ModuleList( + [LinearNorm(in_size, out_size, bias=False) + for (in_size, out_size) in zip(in_sizes, sizes)]) + + def forward(self, x): + for linear in self.layers: + x = F.dropout(F.relu(linear(x)), p=0.5, training=True) + return x + + +class Postnet(nn.Module): + """Postnet + - Five 1-d convolution with 512 channels and kernel size 5 + """ + + def __init__(self, hparams): + super(Postnet, self).__init__() + self.convolutions = nn.ModuleList() + + self.convolutions.append( + nn.Sequential( + ConvNorm(hparams.n_mel_channels, hparams.postnet_embedding_dim, + kernel_size=hparams.postnet_kernel_size, stride=1, + padding=int((hparams.postnet_kernel_size - 1) / 2), + dilation=1, w_init_gain='tanh'), + nn.BatchNorm1d(hparams.postnet_embedding_dim)) + ) + + for i in range(1, hparams.postnet_n_convolutions - 1): + self.convolutions.append( + nn.Sequential( + ConvNorm(hparams.postnet_embedding_dim, + hparams.postnet_embedding_dim, + kernel_size=hparams.postnet_kernel_size, stride=1, + padding=int((hparams.postnet_kernel_size - 1) / 2), + dilation=1, w_init_gain='tanh'), + nn.BatchNorm1d(hparams.postnet_embedding_dim)) + ) + + self.convolutions.append( + nn.Sequential( + ConvNorm(hparams.postnet_embedding_dim, hparams.n_mel_channels, + kernel_size=hparams.postnet_kernel_size, stride=1, + padding=int((hparams.postnet_kernel_size - 1) / 2), + dilation=1, w_init_gain='linear'), + nn.BatchNorm1d(hparams.n_mel_channels)) + ) + + def forward(self, x): + for i in range(len(self.convolutions) - 1): + x = F.dropout(torch.tanh(self.convolutions[i](x)), 0.5, self.training) + x = F.dropout(self.convolutions[-1](x), 0.5, self.training) + + return x + + +class Encoder(nn.Module): + """Encoder module: + - Three 1-d convolution banks + - Bidirectional LSTM + """ + def __init__(self, hparams): + super(Encoder, self).__init__() + + convolutions = [] + for _ in range(hparams.encoder_n_convolutions): + conv_layer = nn.Sequential( + ConvNorm(hparams.encoder_embedding_dim, + hparams.encoder_embedding_dim, + kernel_size=hparams.encoder_kernel_size, stride=1, + padding=int((hparams.encoder_kernel_size - 1) / 2), + dilation=1, w_init_gain='relu'), + nn.BatchNorm1d(hparams.encoder_embedding_dim)) + convolutions.append(conv_layer) + self.convolutions = nn.ModuleList(convolutions) + + self.lstm = nn.LSTM(hparams.encoder_embedding_dim, + int(hparams.encoder_embedding_dim / 2), 1, + batch_first=True, bidirectional=True) + + def forward(self, x, input_lengths): + for conv in self.convolutions: + x = F.dropout(F.relu(conv(x)), 0.5, self.training) + + x = x.transpose(1, 2) + + # pytorch tensor are not reversible, hence the conversion + input_lengths = input_lengths.cpu().numpy() + x = nn.utils.rnn.pack_padded_sequence( + x, input_lengths, batch_first=True) + + self.lstm.flatten_parameters() + outputs, _ = self.lstm(x) + + outputs, _ = nn.utils.rnn.pad_packed_sequence( + outputs, batch_first=True) + + return outputs + + def inference(self, x): + for conv in self.convolutions: + x = F.dropout(F.relu(conv(x)), 0.5, self.training) + + x = x.transpose(1, 2) + + self.lstm.flatten_parameters() + outputs, _ = self.lstm(x) + + return outputs + + +class AudioEncoder(nn.Module): + def __init__(self, hparams): + super(AudioEncoder, self).__init__() + + assert hparams.lat_dim > 0 + + convolutions = [] + inp_dim = hparams.n_mel_channels + for _ in range(hparams.lat_n_convolutions): + conv_layer = nn.Sequential( + ConvNorm(inp_dim, hparams.lat_n_filters, + kernel_size=hparams.lat_kernel_size, stride=1, + padding=int((hparams.lat_kernel_size - 1) / 2), + dilation=1, w_init_gain='tanh'), + nn.BatchNorm1d(hparams.lat_n_filters)) + inp_dim = hparams.lat_n_filters + convolutions.append(conv_layer) + self.convolutions = nn.ModuleList(convolutions) + + self.lstm = nn.LSTM(hparams.lat_n_filters, + int(hparams.lat_n_filters / 2), + hparams.lat_n_blstms, batch_first=True, + bidirectional=True) + self.pool = GlobalAvgPool() + + self.mu_proj = LinearNorm(hparams.lat_n_filters, hparams.lat_dim) + self.logvar_proj = LinearNorm(hparams.lat_n_filters, hparams.lat_dim) + self.lat_dim = hparams.lat_dim + + def forward(self, x, lengths): + """ + Args: + x (torch.Tensor): (B, F, T) + """ + + for conv in self.convolutions: + x = F.dropout(F.tanh(conv(x)), 0.5, self.training) + + x = x.transpose(1, 2) # (B, T, D) + + # x may not be sorted by length. Sort->process->unsort + max_len = x.size(1) + assert max_len == torch.max(lengths).item() + + lengths, perm_idx = lengths.sort(0, descending=True) + x = x[perm_idx] + x = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True) + + self.lstm.flatten_parameters() + outputs, _ = self.lstm(x) + outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True) + + _, unperm_idx = perm_idx.sort(0) + outputs = outputs[unperm_idx] # (B, T, D) + lengths = lengths[unperm_idx] # (B, T, D) + + outputs = self.pool(outputs, lengths) # (B, D) + + mu = self.mu_proj(outputs) + logvar = self.logvar_proj(outputs) + z = distr.Normal(mu, logvar).rsample() + return z, mu, logvar + + +class Decoder(nn.Module): + def __init__(self, hparams): + super(Decoder, self).__init__() + self.n_mel_channels = hparams.n_mel_channels + self.n_frames_per_step = hparams.n_frames_per_step + self.encoder_embedding_dim = hparams.encoder_embedding_dim + self.obs_dim = hparams.obs_dim + self.lat_dim = hparams.lat_dim + self.attention_rnn_dim = hparams.attention_rnn_dim + self.decoder_rnn_dim = hparams.decoder_rnn_dim + self.prenet_dim = hparams.prenet_dim + self.max_decoder_steps = hparams.max_decoder_steps + self.gate_threshold = hparams.gate_threshold + self.p_attention_dropout = hparams.p_attention_dropout + self.p_decoder_dropout = hparams.p_decoder_dropout + + self.prenet = Prenet( + hparams.n_mel_channels * hparams.n_frames_per_step, + [hparams.prenet_dim, hparams.prenet_dim]) + + self.attention_rnn = nn.LSTMCell( + hparams.prenet_dim + hparams.encoder_embedding_dim, + hparams.attention_rnn_dim) + + self.attention_layer = Attention( + hparams.attention_rnn_dim, hparams.encoder_embedding_dim, + hparams.attention_dim, hparams.attention_location_n_filters, + hparams.attention_location_kernel_size) + + encoder_tot_dim = (hparams.encoder_embedding_dim + \ + hparams.lat_dim + hparams.obs_dim) + self.decoder_rnn = nn.LSTMCell( + hparams.attention_rnn_dim + encoder_tot_dim, + hparams.decoder_rnn_dim, 1) + + self.linear_projection = LinearNorm( + hparams.decoder_rnn_dim + encoder_tot_dim, + hparams.n_mel_channels * hparams.n_frames_per_step) + + self.gate_layer = LinearNorm( + hparams.decoder_rnn_dim + encoder_tot_dim, 1, + bias=True, w_init_gain='sigmoid') + + def get_go_frame(self, memory): + """ Gets all zeros frames to use as first decoder input + PARAMS + ------ + memory: decoder outputs + + RETURNS + ------- + decoder_input: all zeros frames + """ + B = memory.size(0) + decoder_input = Variable(memory.data.new( + B, self.n_mel_channels * self.n_frames_per_step).zero_()) + return decoder_input + + def initialize_decoder_states(self, memory, obs_and_lat, mask): + """ Initializes attention rnn states, decoder rnn states, attention + weights, attention cumulative weights, attention context, stores memory + and stores processed memory + PARAMS + ------ + memory: Encoder outputs + obs_and_lat: Observed and latent attribute embeddings + mask: Mask for padded data if training, expects None for inference + """ + B = memory.size(0) + MAX_TIME = memory.size(1) + + self.attention_hidden = Variable(memory.data.new( + B, self.attention_rnn_dim).zero_()) + self.attention_cell = Variable(memory.data.new( + B, self.attention_rnn_dim).zero_()) + + self.decoder_hidden = Variable(memory.data.new( + B, self.decoder_rnn_dim).zero_()) + self.decoder_cell = Variable(memory.data.new( + B, self.decoder_rnn_dim).zero_()) + + self.attention_weights = Variable(memory.data.new( + B, MAX_TIME).zero_()) + self.attention_weights_cum = Variable(memory.data.new( + B, MAX_TIME).zero_()) + self.attention_context = Variable(memory.data.new( + B, self.encoder_embedding_dim).zero_()) + + self.memory = memory + self.processed_memory = self.attention_layer.memory_layer(memory) + self.obs_and_lat = obs_and_lat + self.mask = mask + + def parse_decoder_inputs(self, decoder_inputs): + """ Prepares decoder inputs, i.e. mel outputs + PARAMS + ------ + decoder_inputs: inputs used for teacher-forced training, i.e. mel-specs + + RETURNS + ------- + inputs: processed decoder inputs + + """ + # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels) + decoder_inputs = decoder_inputs.transpose(1, 2) + decoder_inputs = decoder_inputs.view( + decoder_inputs.size(0), + int(decoder_inputs.size(1)/self.n_frames_per_step), -1) + # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels) + decoder_inputs = decoder_inputs.transpose(0, 1) + return decoder_inputs + + def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments): + """ Prepares decoder outputs for output + PARAMS + ------ + mel_outputs: + gate_outputs: gate output energies + alignments: + + RETURNS + ------- + mel_outputs: + gate_outpust: gate output energies + alignments: + """ + # (T_out, B) -> (B, T_out) + alignments = torch.stack(alignments).transpose(0, 1) + # (T_out, B) -> (B, T_out) + gate_outputs = torch.stack(gate_outputs).transpose(0, 1) + gate_outputs = gate_outputs.contiguous() + # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels) + mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous() + # decouple frames per step + mel_outputs = mel_outputs.view( + mel_outputs.size(0), -1, self.n_mel_channels) + # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out) + mel_outputs = mel_outputs.transpose(1, 2) + + return mel_outputs, gate_outputs, alignments + + def decode(self, decoder_input): + """ Decoder step using stored states, attention and memory + PARAMS + ------ + decoder_input: previous mel output + + RETURNS + ------- + mel_output: + gate_output: gate output energies + attention_weights: + """ + cell_input = torch.cat((decoder_input, self.attention_context), -1) + self.attention_hidden, self.attention_cell = self.attention_rnn( + cell_input, (self.attention_hidden, self.attention_cell)) + self.attention_hidden = F.dropout( + self.attention_hidden, self.p_attention_dropout, self.training) + + attention_weights_cat = torch.cat( + (self.attention_weights.unsqueeze(1), + self.attention_weights_cum.unsqueeze(1)), dim=1) + self.attention_context, self.attention_weights = self.attention_layer( + self.attention_hidden, self.memory, self.processed_memory, + attention_weights_cat, self.mask) + + self.attention_weights_cum += self.attention_weights + decoder_input = torch.cat( + (self.attention_hidden, self.attention_context), -1) + if self.obs_and_lat is not None: + decoder_input = torch.cat((decoder_input, self.obs_and_lat), -1) + self.decoder_hidden, self.decoder_cell = self.decoder_rnn( + decoder_input, (self.decoder_hidden, self.decoder_cell)) + self.decoder_hidden = F.dropout( + self.decoder_hidden, self.p_decoder_dropout, self.training) + + decoder_hidden_attention_context = torch.cat( + (self.decoder_hidden, self.attention_context), dim=1) + if self.obs_and_lat is not None: + decoder_hidden_attention_context = torch.cat( + (decoder_hidden_attention_context, self.obs_and_lat), dim=1) + decoder_output = self.linear_projection( + decoder_hidden_attention_context) + + gate_prediction = self.gate_layer(decoder_hidden_attention_context) + return decoder_output, gate_prediction, self.attention_weights + + def forward(self, memory, obs_and_lat, decoder_inputs, memory_lengths): + """ Decoder forward pass for training + PARAMS + ------ + memory: Encoder outputs + obs_and_lat: Observed and latent attribute embeddings + decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs + memory_lengths: Encoder output lengths for attention masking. + + RETURNS + ------- + mel_outputs: mel outputs from the decoder + gate_outputs: gate outputs from the decoder + alignments: sequence of attention weights from the decoder + """ + + decoder_input = self.get_go_frame(memory).unsqueeze(0) + decoder_inputs = self.parse_decoder_inputs(decoder_inputs) + decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0) + decoder_inputs = self.prenet(decoder_inputs) + + self.initialize_decoder_states( + memory, obs_and_lat, mask=~get_mask_from_lengths(memory_lengths)) + + mel_outputs, gate_outputs, alignments = [], [], [] + while len(mel_outputs) < decoder_inputs.size(0) - 1: + decoder_input = decoder_inputs[len(mel_outputs)] + mel_output, gate_output, attention_weights = self.decode( + decoder_input) + mel_outputs += [mel_output.squeeze(1)] + gate_outputs += [gate_output.squeeze()] + alignments += [attention_weights] + + mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs( + mel_outputs, gate_outputs, alignments) + + return mel_outputs, gate_outputs, alignments + + def inference(self, memory, obs_and_lat, ret_has_eos=False): + """ Decoder inference + PARAMS + ------ + memory: Encoder outputs + obs_and_lat: Observed and latent attribute embeddings + + RETURNS + ------- + mel_outputs: mel outputs from the decoder + gate_outputs: gate outputs from the decoder + alignments: sequence of attention weights from the decoder + """ + decoder_input = self.get_go_frame(memory) + + self.initialize_decoder_states(memory, obs_and_lat, mask=None) + + mel_outputs, gate_outputs, alignments = [], [], [] + has_eos = False + while True: + decoder_input = self.prenet(decoder_input) + mel_output, gate_output, alignment = self.decode(decoder_input) + + mel_outputs += [mel_output.squeeze(1)] + gate_outputs += [gate_output] + alignments += [alignment] + + if torch.sigmoid(gate_output.data) > self.gate_threshold: + has_eos = True + break + elif len(mel_outputs) == self.max_decoder_steps: + # print("Warning! Reached max decoder steps") + break + + decoder_input = mel_output + + mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs( + mel_outputs, gate_outputs, alignments) + + if ret_has_eos: + return mel_outputs, gate_outputs, alignments, has_eos + else: + return mel_outputs, gate_outputs, alignments + + +class Tacotron2(nn.Module): + def __init__(self, hparams): + super(Tacotron2, self).__init__() + self.mask_padding = hparams.mask_padding + self.fp16_run = hparams.fp16_run + self.n_mel_channels = hparams.n_mel_channels + self.n_frames_per_step = hparams.n_frames_per_step + + # initialize text encoder embedding + self.embedding = nn.Embedding( + hparams.n_symbols, hparams.symbols_embedding_dim) + std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim)) + val = sqrt(3.0) * std # uniform bounds for std + self.embedding.weight.data.uniform_(-val, val) + + # initialize observed attribute embedding + self.obs_embedding = None + if hparams.obs_dim > 0: + self.obs_embedding = nn.Embedding( + hparams.obs_n_class, hparams.obs_dim) + std = sqrt(2.0 / (hparams.obs_n_class + hparams.obs_dim)) + val = sqrt(3.0) * std # uniform bounds for std + self.obs_embedding.weight.data.uniform_(-val, val) + + self.encoder = Encoder(hparams) + self.decoder = Decoder(hparams) + self.postnet = Postnet(hparams) + + self.lat_encoder = None + if hparams.lat_dim > 0: + self.lat_encoder = AudioEncoder(hparams) + + def parse_batch(self, batch): + (text_padded, input_lengths, obs_labels, + mel_padded, gate_padded, output_lengths) = batch + text_padded = to_gpu(text_padded).long() + input_lengths = to_gpu(input_lengths).long() + obs_labels = to_gpu(obs_labels).long() + max_len = torch.max(input_lengths.data).item() + mel_padded = to_gpu(mel_padded).float() + gate_padded = to_gpu(gate_padded).float() + output_lengths = to_gpu(output_lengths).long() + + return ( + (text_padded, input_lengths, obs_labels, + mel_padded, max_len, output_lengths), + (mel_padded, gate_padded)) + + def parse_output(self, outputs, output_lengths=None): + if self.mask_padding and output_lengths is not None: + mask = ~get_mask_from_lengths(output_lengths) + mask = mask.expand(self.n_mel_channels, mask.size(0), mask.size(1)) + mask = mask.permute(1, 0, 2) + + outputs[0].data.masked_fill_(mask, 0.0) + outputs[1].data.masked_fill_(mask, 0.0) + outputs[2].data.masked_fill_(mask[:, 0, :], 1e3) # gate energies + + return outputs + + def forward(self, inputs): + (text_inputs, text_lengths, obs_labels, + mels, max_len, output_lengths) = inputs + text_lengths, output_lengths = text_lengths.data, output_lengths.data + + embedded_inputs = self.embedding(text_inputs).transpose(1, 2) + + encoder_outputs = self.encoder(embedded_inputs, text_lengths) + + obs = None + if self.obs_embedding is not None: + obs = self.obs_embedding(obs_labels) + + lat, lat_mu, lat_logvar = None, None, None + if self.lat_encoder is not None: + (lat, lat_mu, lat_logvar) = self.lat_encoder(mels, output_lengths) + + obs_and_lat = [x for x in [obs, lat] if x is not None] + if bool(obs_and_lat): + obs_and_lat = torch.cat(obs_and_lat, dim=-1) + else: + obs_and_lat = None + + mel_outputs, gate_outputs, alignments = self.decoder( + encoder_outputs, obs_and_lat, mels, memory_lengths=text_lengths) + + mel_outputs_postnet = self.postnet(mel_outputs) + mel_outputs_postnet = mel_outputs + mel_outputs_postnet + + return self.parse_output( + [mel_outputs, mel_outputs_postnet, gate_outputs, alignments, + lat_mu, lat_logvar], + output_lengths) + + def inference(self, inputs, obs_labels=None, lat=None, ret_has_eos=False): + embedded_inputs = self.embedding(inputs).transpose(1, 2) + encoder_outputs = self.encoder.inference(embedded_inputs) + + if obs_labels is None: + obs_labels = torch.LongTensor(len(inputs)) + obs_labels = obs_labels.to(inputs.device).zero_() + + obs = None + if self.obs_embedding is not None: + obs = self.obs_embedding(obs_labels) + + if self.lat_encoder is not None: + if lat is None: + lat = torch.FloatTensor(len(inputs), self.lat_encoder.lat_dim) + lat = lat.to(inputs.device).zero_().type(encoder_outputs.type()) + + obs_and_lat = [x for x in [obs, lat] if x is not None] + if bool(obs_and_lat): + obs_and_lat = torch.cat(obs_and_lat, dim=-1) + else: + obs_and_lat = None + + mel_outputs, gate_outputs, alignments, has_eos = self.decoder.inference( + encoder_outputs, obs_and_lat, ret_has_eos=True) + + mel_outputs_postnet = self.postnet(mel_outputs) + mel_outputs_postnet = mel_outputs + mel_outputs_postnet + + outputs = self.parse_output( + [mel_outputs, mel_outputs_postnet, gate_outputs, alignments]) + + if ret_has_eos: + return outputs + [has_eos] + else: + return outputs diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py new file mode 100644 index 0000000..0d5f7fa --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py @@ -0,0 +1,71 @@ +""" from https://github.com/keithito/tacotron """ + +import inflect +import re + + +_inflect = inflect.engine() +_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') +_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') +_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') +_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') +_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') +_number_re = re.compile(r'[0-9]+') + + +def _remove_commas(m): + return m.group(1).replace(',', '') + + +def _expand_decimal_point(m): + return m.group(1).replace('.', ' point ') + + +def _expand_dollars(m): + match = m.group(1) + parts = match.split('.') + if len(parts) > 2: + return match + ' dollars' # Unexpected format + dollars = int(parts[0]) if parts[0] else 0 + cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 + if dollars and cents: + dollar_unit = 'dollar' if dollars == 1 else 'dollars' + cent_unit = 'cent' if cents == 1 else 'cents' + return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) + elif dollars: + dollar_unit = 'dollar' if dollars == 1 else 'dollars' + return '%s %s' % (dollars, dollar_unit) + elif cents: + cent_unit = 'cent' if cents == 1 else 'cents' + return '%s %s' % (cents, cent_unit) + else: + return 'zero dollars' + + +def _expand_ordinal(m): + return _inflect.number_to_words(m.group(0)) + + +def _expand_number(m): + num = int(m.group(0)) + if num > 1000 and num < 3000: + if num == 2000: + return 'two thousand' + elif num > 2000 and num < 2010: + return 'two thousand ' + _inflect.number_to_words(num % 100) + elif num % 100 == 0: + return _inflect.number_to_words(num // 100) + ' hundred' + else: + return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') + else: + return _inflect.number_to_words(num, andword='') + + +def normalize_numbers(text): + text = re.sub(_comma_number_re, _remove_commas, text) + text = re.sub(_pounds_re, r'\1 pounds', text) + text = re.sub(_dollars_re, _expand_dollars, text) + text = re.sub(_decimal_number_re, _expand_decimal_point, text) + text = re.sub(_ordinal_re, _expand_ordinal, text) + text = re.sub(_number_re, _expand_number, text) + return text diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py new file mode 100644 index 0000000..63fcd43 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py @@ -0,0 +1,141 @@ +""" +BSD 3-Clause License + +Copyright (c) 2017, Prem Seetharaman +All rights reserved. + +* Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import torch +import numpy as np +import torch.nn.functional as F +from torch.autograd import Variable +from scipy.signal import get_window +from librosa.util import pad_center, tiny +from .audio_processing import window_sumsquare + + +class STFT(torch.nn.Module): + """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft""" + def __init__(self, filter_length=800, hop_length=200, win_length=800, + window='hann'): + super(STFT, self).__init__() + self.filter_length = filter_length + self.hop_length = hop_length + self.win_length = win_length + self.window = window + self.forward_transform = None + scale = self.filter_length / self.hop_length + fourier_basis = np.fft.fft(np.eye(self.filter_length)) + + cutoff = int((self.filter_length / 2 + 1)) + fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]), + np.imag(fourier_basis[:cutoff, :])]) + + forward_basis = torch.FloatTensor(fourier_basis[:, None, :]) + inverse_basis = torch.FloatTensor( + np.linalg.pinv(scale * fourier_basis).T[:, None, :]) + + if window is not None: + assert(filter_length >= win_length) + # get window and zero center pad it to filter_length + fft_window = get_window(window, win_length, fftbins=True) + fft_window = pad_center(fft_window, filter_length) + fft_window = torch.from_numpy(fft_window).float() + + # window the bases + forward_basis *= fft_window + inverse_basis *= fft_window + + self.register_buffer('forward_basis', forward_basis.float()) + self.register_buffer('inverse_basis', inverse_basis.float()) + + def transform(self, input_data): + num_batches = input_data.size(0) + num_samples = input_data.size(1) + + self.num_samples = num_samples + + # similar to librosa, reflect-pad the input + input_data = input_data.view(num_batches, 1, num_samples) + input_data = F.pad( + input_data.unsqueeze(1), + (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0), + mode='reflect') + input_data = input_data.squeeze(1) + + forward_transform = F.conv1d( + input_data, + Variable(self.forward_basis, requires_grad=False), + stride=self.hop_length, + padding=0) + + cutoff = int((self.filter_length / 2) + 1) + real_part = forward_transform[:, :cutoff, :] + imag_part = forward_transform[:, cutoff:, :] + + magnitude = torch.sqrt(real_part**2 + imag_part**2) + phase = torch.autograd.Variable( + torch.atan2(imag_part.data, real_part.data)) + + return magnitude, phase + + def inverse(self, magnitude, phase): + recombine_magnitude_phase = torch.cat( + [magnitude*torch.cos(phase), magnitude*torch.sin(phase)], dim=1) + + inverse_transform = F.conv_transpose1d( + recombine_magnitude_phase, + Variable(self.inverse_basis, requires_grad=False), + stride=self.hop_length, + padding=0) + + if self.window is not None: + window_sum = window_sumsquare( + self.window, magnitude.size(-1), hop_length=self.hop_length, + win_length=self.win_length, n_fft=self.filter_length, + dtype=np.float32) + # remove modulation effects + approx_nonzero_indices = torch.from_numpy( + np.where(window_sum > tiny(window_sum))[0]) + window_sum = torch.autograd.Variable( + torch.from_numpy(window_sum), requires_grad=False) + window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum + inverse_transform[:, :, approx_nonzero_indices] /= window_sum[approx_nonzero_indices] + + # scale by hop ratio + inverse_transform *= float(self.filter_length) / self.hop_length + + inverse_transform = inverse_transform[:, :, int(self.filter_length/2):] + inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):] + + return inverse_transform + + def forward(self, input_data): + self.magnitude, self.phase = self.transform(input_data) + reconstruction = self.inverse(self.magnitude, self.phase) + return reconstruction diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py new file mode 100644 index 0000000..5f0d70f --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py @@ -0,0 +1,18 @@ +""" from https://github.com/keithito/tacotron """ + +''' +Defines the set of symbols used in text input to the model. + +The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. ''' +from . import cmudict + +_pad = '_' +_punctuation = '!\'(),.:;? ' +_special = '-' +_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' + +# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): +_arpabet = ['@' + s for s in cmudict.valid_symbols] + +# Export all symbols: +symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py new file mode 100644 index 0000000..49e2ca4 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py @@ -0,0 +1,107 @@ +""" from https://github.com/keithito/tacotron """ +import numpy as np +import re +from . import cleaners +from .symbols import symbols + + +# Mappings from symbol to numeric ID and vice versa: +_symbol_to_id = {s: i for i, s in enumerate(symbols)} +_id_to_symbol = {i: s for i, s in enumerate(symbols)} + +# Regular expression matching text enclosed in curly braces: +_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)') + +# Special symbols +SOS_TOK = '<s>' +EOS_TOK = '</s>' + +def text_to_sequence(text, cleaner_names): + '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. + + The text can optionally have ARPAbet sequences enclosed in curly braces embedded + in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street." + + Args: + text: string to convert to a sequence + cleaner_names: names of the cleaner functions to run the text through + + Returns: + List of integers corresponding to the symbols in the text + ''' + sequence = [] + + # Check for curly braces and treat their contents as ARPAbet: + while len(text): + m = _curly_re.match(text) + if not m: + sequence += _symbols_to_sequence(_clean_text(text, cleaner_names)) + break + sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names)) + sequence += _arpabet_to_sequence(m.group(2)) + text = m.group(3) + + return sequence + + +def sample_code_chunk(code, size): + assert(size > 0 and size <= len(code)) + start = np.random.randint(len(code) - size + 1) + end = start + size + return code[start:end], start, end + + +def code_to_sequence(code, code_dict, collapse_code): + if collapse_code: + prev_c = None + sequence = [] + for c in code: + if c in code_dict and c != prev_c: + sequence.append(code_dict[c]) + prev_c = c + else: + sequence = [code_dict[c] for c in code if c in code_dict] + if len(sequence) < 0.95 * len(code): + print('WARNING : over 5%% codes are OOV') + + return sequence + + +def sequence_to_text(sequence): + '''Converts a sequence of IDs back to a string''' + result = '' + for symbol_id in sequence: + if symbol_id in _id_to_symbol: + s = _id_to_symbol[symbol_id] + # Enclose ARPAbet back in curly braces: + if len(s) > 1 and s[0] == '@': + s = '{%s}' % s[1:] + result += s + return result.replace('}{', ' ') + + +def sequence_to_code(sequence, code_dict): + '''Analogous to sequence_to_text''' + id_to_code = {i: c for c, i in code_dict.items()} + return ' '.join([id_to_code[i] for i in sequence]) + + +def _clean_text(text, cleaner_names): + for name in cleaner_names: + cleaner = getattr(cleaners, name) + if not cleaner: + raise Exception('Unknown cleaner: %s' % name) + text = cleaner(text) + return text + + +def _symbols_to_sequence(symbols): + return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)] + + +def _arpabet_to_sequence(text): + return _symbols_to_sequence(['@' + s for s in text.split()]) + + +def _should_keep_symbol(s): + return s in _symbol_to_id and s != '_' and s != '~' diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py new file mode 100644 index 0000000..b72ae0e --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py @@ -0,0 +1,171 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import collections +import io +import json +import librosa +import numpy as np +import soundfile as sf +import time +import torch +from scipy.io.wavfile import read +from .text import SOS_TOK, EOS_TOK + + +def get_mask_from_lengths(lengths): + max_len = torch.max(lengths).item() + ids = torch.arange(0, max_len, out=torch.cuda.LongTensor(max_len)) + mask = (ids < lengths.unsqueeze(1)) + return mask + + +def load_wav_to_torch(full_path, sr=None): + data, sr = librosa.load(full_path, sr=sr) + data = np.clip(data, -1, 1) # potentially out of [-1, 1] due to resampling + data = data * 32768.0 # match values loaded by scipy + return torch.FloatTensor(data.astype(np.float32)), sr + + +def read_binary_audio(bin_data, tar_sr=None): + """ + read binary audio (`bytes` or `uint8` `numpy.ndarray`) to `float32` + `numpy.ndarray` + + RETURNS: + data (np.ndarray) : audio of shape (n,) or (2, n) + tar_sr (int) : sample rate + """ + data, ori_sr = sf.read(io.BytesIO(bin_data), dtype='float32') + data = data.T + if (tar_sr is not None) and (ori_sr != tar_sr): + data = librosa.resample(data, ori_sr, tar_sr) + else: + tar_sr = ori_sr + data = np.clip(data, -1, 1) + data = data * 32768.0 + return torch.FloatTensor(data.astype(np.float32)), tar_sr + + +def load_filepaths_and_text(filename): + with open(filename, encoding='utf-8') as f: + data = [json.loads(line.rstrip()) for line in f] + return data + + +def to_gpu(x): + x = x.contiguous() + + if torch.cuda.is_available(): + x = x.cuda(non_blocking=True) + return torch.autograd.Variable(x) + + +def load_code_dict(path, add_sos=False, add_eos=False): + if not path: + return {} + + with open(path, 'r') as f: + codes = ['_'] + [line.rstrip() for line in f] # '_' for pad + code_dict = {c: i for i, c in enumerate(codes)} + + if add_sos: + code_dict[SOS_TOK] = len(code_dict) + if add_eos: + code_dict[EOS_TOK] = len(code_dict) + assert(set(code_dict.values()) == set(range(len(code_dict)))) + + return code_dict + + +def load_obs_label_dict(path): + if not path: + return {} + with open(path, 'r') as f: + obs_labels = [line.rstrip() for line in f] + return {c: i for i, c in enumerate(obs_labels)} + + +# A simple timer class inspired from `tnt.TimeMeter` +class CudaTimer: + def __init__(self, keys): + self.keys = keys + self.reset() + + def start(self, key): + s = torch.cuda.Event(enable_timing=True) + s.record() + self.start_events[key].append(s) + return self + + def stop(self, key): + e = torch.cuda.Event(enable_timing=True) + e.record() + self.end_events[key].append(e) + return self + + def reset(self): + self.start_events = collections.defaultdict(list) + self.end_events = collections.defaultdict(list) + self.running_times = collections.defaultdict(float) + self.n = collections.defaultdict(int) + return self + + def value(self): + self._synchronize() + return {k: self.running_times[k] / self.n[k] for k in self.keys} + + def _synchronize(self): + torch.cuda.synchronize() + for k in self.keys: + starts = self.start_events[k] + ends = self.end_events[k] + if len(starts) == 0: + raise ValueError("Trying to divide by zero in TimeMeter") + if len(ends) != len(starts): + raise ValueError("Call stop before checking value!") + time = 0 + for start, end in zip(starts, ends): + time += start.elapsed_time(end) + self.running_times[k] += time * 1e-3 + self.n[k] += len(starts) + self.start_events = collections.defaultdict(list) + self.end_events = collections.defaultdict(list) + + +# Used to measure the time taken for multiple events +class Timer: + def __init__(self, keys): + self.keys = keys + self.n = {} + self.running_time = {} + self.total_time = {} + self.reset() + + def start(self, key): + self.running_time[key] = time.time() + return self + + def stop(self, key): + self.total_time[key] = time.time() - self.running_time[key] + self.n[key] += 1 + self.running_time[key] = None + return self + + def reset(self): + for k in self.keys: + self.total_time[k] = 0 + self.running_time[k] = None + self.n[k] = 0 + return self + + def value(self): + vals = {} + for k in self.keys: + if self.n[k] == 0: + raise ValueError("Trying to divide by zero in TimeMeter") + else: + vals[k] = self.total_time[k] / self.n[k] + return vals diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py new file mode 100644 index 0000000..6a6585e --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py @@ -0,0 +1,40 @@ +# import sys +# sys.path.append('tacotron2') +import torch +from .layers import STFT + + +class Denoiser(torch.nn.Module): + """ Removes model bias from audio produced with waveglow """ + + def __init__(self, waveglow, filter_length=1024, n_overlap=4, + win_length=1024, mode='zeros'): + super(Denoiser, self).__init__() + self.stft = STFT(filter_length=filter_length, + hop_length=int(filter_length/n_overlap), + win_length=win_length).cuda() + if mode == 'zeros': + mel_input = torch.zeros( + (1, 80, 88), + dtype=waveglow.upsample.weight.dtype, + device=waveglow.upsample.weight.device) + elif mode == 'normal': + mel_input = torch.randn( + (1, 80, 88), + dtype=waveglow.upsample.weight.dtype, + device=waveglow.upsample.weight.device) + else: + raise Exception("Mode {} if not supported".format(mode)) + + with torch.no_grad(): + bias_audio = waveglow.infer(mel_input, sigma=0.0).float() + bias_spec, _ = self.stft.transform(bias_audio) + + self.register_buffer('bias_spec', bias_spec[:, :, 0][:, :, None]) + + def forward(self, audio, strength=0.1): + audio_spec, audio_angles = self.stft.transform(audio.cuda().float()) + audio_spec_denoised = audio_spec - self.bias_spec * strength + audio_spec_denoised = torch.clamp(audio_spec_denoised, 0.0) + audio_denoised = self.stft.inverse(audio_spec_denoised, audio_angles) + return audio_denoised diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py new file mode 100644 index 0000000..d2b04c0 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import torch +import numpy as np +from examples.textless_nlp.gslm.unit2speech.tacotron2.text import ( + EOS_TOK, + SOS_TOK, + code_to_sequence, + text_to_sequence, +) +from examples.textless_nlp.gslm.unit2speech.tacotron2.utils import ( + load_code_dict, +) + + +class TacotronInputDataset: + def __init__(self, hparams, append_str=""): + self.is_text = getattr(hparams, "text_or_code", "text") == "text" + if not self.is_text: + self.code_dict = load_code_dict( + hparams.code_dict, hparams.add_sos, hparams.add_eos + ) + self.code_key = hparams.code_key + self.add_sos = hparams.add_sos + self.add_eos = hparams.add_eos + self.collapse_code = hparams.collapse_code + self.append_str = append_str + + def process_code(self, inp_str): + inp_toks = inp_str.split() + if self.add_sos: + inp_toks = [SOS_TOK] + inp_toks + if self.add_eos: + inp_toks = inp_toks + [EOS_TOK] + return code_to_sequence(inp_toks, self.code_dict, self.collapse_code) + + def process_text(self, inp_str): + return text_to_sequence(inp_str, ["english_cleaners"]) + + def get_tensor(self, inp_str): + # uid, txt, inp_str = self._get_data(idx) + inp_str = inp_str + self.append_str + if self.is_text: + inp_toks = self.process_text(inp_str) + else: + inp_toks = self.process_code(inp_str) + return torch.from_numpy(np.array(inp_toks)).long() + + def __len__(self): + return len(self.data) diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py b/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py new file mode 100644 index 0000000..7aced08 --- /dev/null +++ b/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py @@ -0,0 +1,55 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import torch +from examples.textless_nlp.gslm.unit2speech.tacotron2.model import Tacotron2 +from examples.textless_nlp.gslm.unit2speech.tacotron2.waveglow_denoiser import ( + Denoiser, +) + + +def load_quantized_audio_from_file(file_path): + base_fname_batch, quantized_units_batch = [], [] + with open(file_path) as f: + for line in f: + base_fname, quantized_units_str = line.rstrip().split("|") + quantized_units = [int(q) for q in quantized_units_str.split(" ")] + base_fname_batch.append(base_fname) + quantized_units_batch.append(quantized_units) + return base_fname_batch, quantized_units_batch + + +def synthesize_audio(model, waveglow, denoiser, inp, lab=None, strength=0.0): + assert inp.size(0) == 1 + inp = inp.cuda() + if lab is not None: + lab = torch.LongTensor(1).cuda().fill_(lab) + + with torch.no_grad(): + _, mel, _, ali, has_eos = model.inference(inp, lab, ret_has_eos=True) + aud = waveglow.infer(mel, sigma=0.666) + aud_dn = denoiser(aud, strength=strength).squeeze(1) + return mel, aud, aud_dn, has_eos + + +def load_tacotron(tacotron_model_path, max_decoder_steps): + ckpt_dict = torch.load(tacotron_model_path) + hparams = ckpt_dict["hparams"] + hparams.max_decoder_steps = max_decoder_steps + sr = hparams.sampling_rate + model = Tacotron2(hparams) + model.load_state_dict(ckpt_dict["model_dict"]) + model = model.cuda().eval().half() + return model, sr, hparams + + +def load_waveglow(waveglow_path): + waveglow = torch.load(waveglow_path)["model"] + waveglow = waveglow.cuda().eval().half() + for k in waveglow.convinv: + k.float() + denoiser = Denoiser(waveglow) + return waveglow, denoiser diff --git a/fairseq/examples/textless_nlp/pgslm/README.md b/fairseq/examples/textless_nlp/pgslm/README.md new file mode 100644 index 0000000..596467f --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/README.md @@ -0,0 +1,318 @@ +# Text-Free Prosody-Aware Generative Spoken Language Modeling + +This folder contains code and recipes to reproduce results reported in a paper _Text-Free Prosody-Aware Generative Spoken Language Modeling_, +Eugene Kharitonov*, Ann Lee*, Adam Polyak, Yossi Adi, Jade Copet, Kushal Lakhotia, Tu-Anh Nguyen, Morgane Rivière, Abdelrahman Mohamed, Emmanuel Dupoux, Wei-Ning Hsu, 2021. arxiv/2109.03264 [[arxiv]](https://arxiv.org/abs/2109.03264). + +`*` denotes equal contribution. + +You can find demo samples [[here]](https://speechbot.github.io/pgslm/index.html). + +<details> + <summary>If you find this code useful, please consider citing our work using this bibtex </summary> + +``` + @misc{Kharitonov2021, + title={Text-Free Prosody-Aware Generative Spoken Language Modeling}, + author={Eugene Kharitonov and Ann Lee and Adam Polyak and Yossi Adi and Jade Copet and Kushal Lakhotia and Tu-Anh Nguyen and Morgane Rivière and Abdelrahman Mohamed and Emmanuel Dupoux and Wei-Ning Hsu}, + year={2021}, + eprint={2109.03264}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` +</details> + + +## Additional requirements +Three packages are required in addition to fairseq, they are installable with pip: +```bash +pip install AMFM-decompy SoundFile scipy sklearn torchaudio npy-append-array +``` + +## Data preprocessing + +### Prepare unit pseudo-text transcriptions of the audio +To get unit trascripts of the speech data we rely on the preprocessing steps of [GSLM](https://github.com/pytorch/fairseq/tree/main/examples/textless_nlp/gslm/speech2unit/) work. + +Firstly, we will need to prepare manifest files for the dataset we want to preprocess +``` +mkdir manifests/ +python examples/wav2vec/wav2vec_manifest.py --valid-percent=0.0 $DATA_PATH --dest=manifests/train/ +``` +Next, we need a pre-trained HuBERT-base-ls960 model [[download]](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt) and a corresponding kmeans-100 quantizer [[download]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km100/km.bin). Having those we can quantize the dataset: +``` +python examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py \ + --feature_type hubert \ + --kmeans_model_path km.bin \ + --acoustic_model_path hubert_base_ls960.pt \ + --layer 6 \ + --manifest_path manifests/train/train.tsv \ + --out_quantized_file_path manifests/train/units +``` + +Finally, by running +``` +python examples/textless_nlp/pgslm/scripts/join_units_manifest.py --manifest=manifests/train/train.tsv --units=manifests/train/units --output=train.txt +``` +We will get the training data description `train.txt` in the format that pGSLM expects. The above steps have to be repeated for +dev/test sets. Importantly, we rely on an assumption that the directories are structured as in LibriSpeech, i.e. the file paths follow the +`<spk_id>/<session_id>/<sample_id>.wav` format. + +### Preprocess data for pGSLM +The very first step is to obtain the F0 quantization bins. +Assume the vocoder training manifest is `vocoder_train.txt` (in pGSLM data format prepared with the same process above). +We prepare the quantized F0 from the vocoder training data by running +```sh +bash examples/textless_nlp/pgslm/scripts/prepare_f0_quantization.sh \ + vocoder_train.txt <sample_rate> 32 <preprocessed_dir> <output_prefix> # we use 32 bins in the paper +``` +- `<sample_rate>`: sampling rate of the audio files in the manifest +- `<preprocessed_dir>`: where to output the output files +- `<output_prefix>`: prefix of the output files + +The script will generate +- `<output_prefix>.f0_stat.pt`: the speaker-level F0 statistics, which can be used in vocoder training +- `<output_prefix>_mean_norm_log_f0_bin.th`: the quantized F0, which should be used in `prepare_data.sh` below + +**Note:** See "Pre-trained models" for the pre-computed speaker-level F0 statistics and quantized F0 bins. We suggest using the pre-computed statistics for the data preparation below in order to take advantage of the pre-trained vocoder for waveform generation. + +Next prepare the pGSLM data. +Assume train/valid/test manifests are `{train,valid,test}.txt`. +Here is an example of how to preprocess data: + +```sh +bash examples/textless_nlp/pgslm/scripts/prepare_data.sh \ + train.txt valid.txt test.txt <n_unit> <hop_size> <sample_rate> \ + <preprocessed_dir>/<output_prefix>_mean_norm_log_f0_bin.th <preprocessed_dir> +``` +- `<n_unit>`: discrete unit vocabulary size (we used a kmeans quantizer with the number of units equal to 100 in the example above) +- `<hop_size>`: downsampling rate relative to the waveform (e.g., 320 for HuBERT units) +- `<sample_rate>`: sampling rate of the audio files in the manifest +- `<preprocessed_dir>`: where to output the preprocessed files + +This will create the dataset json config used for the next section at +`<preprocessed_dir>/data_config.json`. + +Note that the example script uses only one thread to compute F0, which can take +_very long_ for preprocessing large datasets. It is suggested to distribute +jobs over multiple nodes/processes with `--nshards=x` and `--rank=z` (where z is +in [1, x]) in `preprocess_f0.py`, and set `--nshards_list=x` in +`prepare_data.py` correspondingly to collect sharded F0 data. + +Now, everything is ready for training a model. + +## Training Multi-Stream Transformer Unit Language Model (MS-TLM) + +Below is an example command that trains Multi-Stream Transformer Language Model (MS-TLM) on a prepared dataset: +```bash +DATASET=data_config.json + +fairseq-train $DATASET \ + --task=speech_unit_modeling \ + --arch="transformer_ulm_tiny" \ + --criterion=speech_unit_lm_criterion \ + --share-decoder-input-output-embed \ + --dropout=0.1 \ + --attention-dropout=0.1 \ + --optimizer="adam" \ + --adam-betas="(0.9, 0.98)" \ + --clip-norm=1.0 \ + --lr=0.0005 \ + --lr-scheduler="inverse_sqrt" \ + --warmup-updates=4000 \ + --warmup-init-lr=1e-07 \ + --tokens-per-sample=3072 \ + --max-tokens=3072 \ + --update-freq=4 \ + --max-epoch=70 \ + --num-workers=0 \ + --skip-invalid-size-inputs-valid-test \ + --loss-weights="1.0;0.5;0.0" \ + --ignore-f0-input \ + --checkpoint-activations \ + --fp16 \ + --max-target-positions=4096 \ + --stream-shifts="1,1" \ + --log-f0 --normalize-f0-mean --interpolate-f0 \ + --ignore-unused-valid-subsets \ + --discrete-duration --discrete-f0 +``` + +Some of the important parameters that are specific to MS-TLM: + * `arch`: specifies the Transformer architecture used. Supported options are: + * `transformer_ulm_tiny` - a tiny model that can be used for debugging; it has 2 layers, 1 attention head, FFN and embedding dimensions of 64, + * `transformer_ulm` - a base model with 6 layers, 8 heads, embedding dimension 512, and FFN dimensionality of 2048, + * `transformer_ulm_big` - the largest model we experiment with in the paper: 12-layer/16 heads, 1024/4096 embedding and FFN dimensions; + * `loss-weights`: this parameter sets importance weights (must be non-negative) for the components of the loss that correspond to unit, duration, and F0 streams. To turn off a component of the loss, its weight has to be set to 0. For instance, to predict only unit stream the parameter should be set to "1;0;0"; + * `stream-shifts`: specifies relative shifts of the two prosodic streams w.r.t. the unit stream (duration and F0, respectively). No shift corresponds to "0,0"; + * `ignore-duration-input`/`ignore-f0-input`: setting these flags would zero-out correpsonding input streams; + * `max-token-duration`: duration values would be max-capped by the specified value; + * `discrete-duration`/`discrete-f0`: whether duration and F0 streams should be quantized; + * `log_f0`, `normalize-f0-mean`, `normalize-f0-std`, `interpolate-f0`: configure how F0 stream is treated. `log_f0` sets up modelling in the log-space, `normalize-f0-mean`/`normalize-f0-std` control per-speaker normalization, and `interpolate-f0` enables F0 interpolation for unvoiced regions where F0 was set to 0, + * `mask-dur-prob`, `mask-f0-prob`, `mask-dur-seg-prob`, `mask-f0-seg-prob`, `mask-unit-seg-prob`, `mask-unit-seg-leng`: this family of parameters sets the probababilities of masking individual steps and spans on each stream as well as lengths of the maked spans. + + +## Pre-trained models +### MS-TLM +Below you can find checkpoints for four best-performing models from the paper (IDs 9..12 in Table 1). These models are trained on Hubert-100 transcripts of the LibriLight-6K dataset. They have the prosody streams shifted by 1 w.r.t. the unit stream. All models predict all three streams (units, duration, and F0), but two +of them only have unit steam in their input. + +| | Continuous prosody | Quantized prosody | +|-------------------|--------------------|-------------------| +| No prosody input | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/ulm_checkpoints/continuous_no_prosody_shift_1_1.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/ulm_checkpoints/discrete_no_prosody_shift_1_1.pt) | +| Has prosody input | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/ulm_checkpoints/continuous_prosody_shift_1_1.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/ulm_checkpoints/discrete_prosody_shift_1_1.pt)| + +The optimal per-stream sampling temperatures/scaling parameters that we have identified for these models, in the (`T-token, T-duration, T-f0`) format: + +| | Continuous prosody | Quantized prosody | +|-------------------|--------------------|-------------------| +| No prosody input | 0.7, 0.125, 0.0003125| 0.7, 0.25, 0.5 | +| Has prosody input | 0.7, 0.125, 0.00125 | 0.7, 0.25, 0.7 | + +## Vocoder +| Units | Prosody | F0 stats | Checkpoint | Config | +|-------------------|---------|--------------|------------|--------| +| HuBERT-base-ls960, kmeans-100 | [[Quantized 32 bins]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/mean_norm_log_f0_seg_bin.th) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/f0_stats.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/naive_quant_32_norm_log_seg_hubert/checkpoint.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/naive_quant_32_norm_log_seg_hubert/config.json) | +| HuBERT-base-ls960, kmeans-100 | Continuous | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/f0_stats.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/mean_norm_log_f0_hubert/checkpoint.pt) | [[download]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/mean_norm_log_f0_hubert/config.json) | + + +## Evaluating a trained model +Evaluation is done with the `eval/cont_metrics.py` scripts. As described in the paper, there are several metrics used. + +**Teacher-forced metrics** +```bash +SET=valid +CHECKPOINT_PATH=discrete_prosody_shift_1_1.pt +DATA=data_config.json + +python examples/textless_nlp/pgslm/eval/cont_metrics.py $DATA \ + --metric=teacher_force_everything \ + --path=$CHECKPOINT_PATH \ + --batch-size=16 \ + --fp16 \ + --seed=111 \ + --eval-subset=$SET \ + --f0-discretization-bounds=mean_norm_log_f0_seg_bin.th --dequantize-prosody +``` +(Using this command, our provided `discrete_prosody_shift_1_1.pt` checkpoint should produce `{'token_loss': 1.408..., 'duration_loss': 0.5424..., 'f0_loss': 0.0474...}` on LibriSpeech dev-clean). + +The parameters `--f0-discretization-bounds=mean_norm_log_f0_seg_bin.th --dequantize-prosody` are specific for quantized-prosody models. They signal that the prosody streams must be decoded into the continuous domain before calculating correlation. It is the same `*_mean_norm_log_f0_bin.th` file as we prepared before. +The `mean_norm_log_f0_seg_bin.th` file we used with the pre-trained models can be downloaded [[here]](https://dl.fbaipublicfiles.com/textless_nlp/pgslm/vocoder/blizzard2013/mean_norm_log_f0_seg_bin.th). + + +**Consistency (aka Correlation) metrics** + +The following command estimates correlation between mean values of the F0 stream in the prompt and in the generated continuation (unit and duration steams are fixed). + +```bash +T_F0=0.7 +EXPLOSION=20 +SET=test +CHECKPOINT_PATH=discrete_prosody_shift_1_1.pt +DATA=data_config.json + +python examples/textless_nlp/pgslm/eval/cont_metrics.py $DATA \ + --prefix-length=150 \ + --metric=correlation \ + --path=$CHECKPOINT_PATH \ + --batch-size=16 \ + --fp16 \ + --seed=111 \ + --teacher-force-tokens \ + --teacher-force-duration \ + --min-length=300 \ + --batch-explosion-rate=$EXPLOSION \ + --T-f0=$T_F0 \ + --eval-subset=$SET \ + --f0-discretization-bounds=mean_norm_log_f0_seg_bin.th \ + --dequantize-prosody --n-workers=8 +``` +(Using this command, our provided `discrete_prosody_shift_1_1.pt` checkpoint should produce `{...'F0 corr': 0.315 ..}` on LibriSpeech test-clean). + + * By using flags `--teacher-force-tokens, --teacher-force-duration, --teacher-force-f0` one can calculate correlations along each stream while having other two streams fixed to ground-truth values (or freeze all three streams to get ground-truth correlation values); + * The parameters `T-f0`, `T-duration`, and `T-token` specify per-stream temperatures and, in the case of continuous-valued prosody, scaling parameter of the corresponding Laplace distribution (setting a temperature to 0 will enforce greedy sampling); + * `min-length` filters out sequences that are shorter then 300 duration units (i.e. 6s in the case of Hubert units); + * `prefix-length` specifies that we want to use first 150 duration units are prompt (i.e. 3s in the case of Hubert units) + + +**Correctness (aka Continuation) and Expressiveness (aka Std) metrics** + +By running the following command, we can get minMAE and Std for the log-F0 stream for the model with quantized prosody. +```bash +DATA=data_config.json +EXPLOSION=20 +SET=test +CHECKPOINT_PATH=discrete_prosody_shift_1_1.pt +T_F0=0.7 + +python examples/textless_nlp/pgslm/eval/cont_metrics.py $DATA \ + --prefix-length=150 \ + --metric=continuation \ + --path=$CHECKPOINT_PATH \ + --batch-size=16 \ + --fp16 \ + --seed=111 \ + --batch-explosion-rate=$EXPLOSION \ + --teacher-force-tokens \ + --teacher-force-duration \ + --T-f0=$T_F0 \ + --eval-subset=$SET \ + --f0-discretization-bounds=mean_norm_log_f0_seg_bin.th --dequantize-prosody +``` +(Using this command, our provided `discrete_prosody_shift_1_1.pt` checkpoint should produce `{...'F0 MAE': 0.0772, 'F0 Std': 0.1489...}` on LibriSpeech test-clean). + +Again, by setting `--teacher-force-tokens, --teacher-force-duration, --teacher-force-f0` we can calculate Token BLEU for the token stream (when `--teacher-force-duration` & `--teacher-force-f0` are on) and per-stream min MAE for each prosody stream individually. + +Finally, `cont_metrics.py` allows to specify the number of workers (e.g., `n-workers=8`) which allows to speed up the computation by spreading multiple worker processes +over the available GPUs. + +**Cont Word BLEU** + +We used the code and the evaluation protocol of [(Lakhotia et al., 2021)](https://arxiv.org/abs/2102.01192). + +## Sampling from a trained model + +To get (prompted or not) samples from a trained model it is enough to run `sample.py`: +```bash +CHECKPOINT_PATH=checkpoints/checkpoint_best.pt +DATASET=examples/textless_nlp/pgslm/repro/dataset/data_config.json +python examples/textless_nlp/pgslm/sample/sample.py $DATASET \ + --output=$SAMPLES \ + --path=$CHECKPOINT_PATH \ + --sampling \ + --T-token=0.7 \ + --T-duration=0.25 \ + --T-f0=0.7 \ + --max-length=500 \ + --prefix-length=150 \ + --subset=valid \ + --seed=1 \ + --match-duration \ + --code-type=hubert \ + --batch-explosion-rate=2 +``` + +Some useful parameters: + * `T-token`, `T-duration`, `T-f0` specify sampling temperature for the three streams. Setting a temperature to `0` switches sample to the greedy (argmax) one; + * `prefix-length`: length of the prompt, measured in timesteps (e.g. for Hubert (CPC) each timestep is 20 (10) ms); + * `subset`: which subset of the dataset to use as prompts (can be `train`, `valid`, `test`); + * `teacher-force-tokens`, `teacher-force-duration`, `teacher-force-f0`: if set, at each autoregressive step, ground-truth values replace the produced one; + * `short-curcuit`: replace sampling by ground-truth inputs; + * `match-duration`: forces the produced sample to have the same duration (in time), as the entire sequence (beyond the prompt if there is any); + * `batch-explosion-rate`: number of samples per prompt; + * `f0-discretization-bounds`: path to a file with quantization boundaries. If it is set, F0 values are de-quantized back to the continuous domain + (the model must be a quanized one); + * `max-length` sets the maximal number of segment steps to be produced. + +Note that `sample.py` automatically uses all available GPUs, to avoid that please use environment variable `CUDA_VISIBLE_DEVICES`. + +## Vocoding samples +To generate audios for output from `sample.py` (`$IN_FILE`): +```bash +python examples/textless_nlp/pgslm/generate_waveform.py \ + --in-file=$IN_FILE \ + --vocoder=$VODOER \ + --vocoder-cfg=$VOCODER_CFG \ + --results-path=$RESULTS_PATH +``` +See "Pre-trained model" for `$VOCODER` and `VOCODER_CFG`. diff --git a/fairseq/examples/textless_nlp/pgslm/data_utils.py b/fairseq/examples/textless_nlp/pgslm/data_utils.py new file mode 100644 index 0000000..2033697 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/data_utils.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import torch + +from tqdm import tqdm + + +class Stat: + def __init__(self, keep_raw=False): + self.x = 0.0 + self.x2 = 0.0 + self.z = 0.0 # z = logx + self.z2 = 0.0 + self.n = 0.0 + self.u = 0.0 + self.keep_raw = keep_raw + self.raw = [] + + def update(self, new_x): + new_z = new_x.log() + + self.x += new_x.sum() + self.x2 += (new_x**2).sum() + self.z += new_z.sum() + self.z2 += (new_z**2).sum() + self.n += len(new_x) + self.u += 1 + + if self.keep_raw: + self.raw.append(new_x) + + @property + def mean(self): + return self.x / self.n + + @property + def std(self): + return (self.x2 / self.n - self.mean**2) ** 0.5 + + @property + def mean_log(self): + return self.z / self.n + + @property + def std_log(self): + return (self.z2 / self.n - self.mean_log**2) ** 0.5 + + @property + def n_frms(self): + return self.n + + @property + def n_utts(self): + return self.u + + @property + def raw_data(self): + assert self.keep_raw, "does not support storing raw data!" + return torch.cat(self.raw) + + +class F0Stat(Stat): + def update(self, new_x): + # assume unvoiced frames are 0 and consider only voiced frames + if new_x is not None: + super().update(new_x[new_x != 0]) + + +def dump_speaker_f0_stat(speaker_to_f0_stat, out_prefix): + path = f"{out_prefix}.f0_stat.pt" + assert not os.path.exists(path) + + d = { + speaker: { + "f0_mean": speaker_to_f0_stat[speaker].mean, + "f0_std": speaker_to_f0_stat[speaker].std, + "logf0_mean": speaker_to_f0_stat[speaker].mean_log, + "logf0_std": speaker_to_f0_stat[speaker].std_log, + } + for speaker in speaker_to_f0_stat + } + torch.save(d, path) + + return d + + +def load_audio_path(path): + audio_paths = [] + with open(path) as f: + for line in f.readlines(): + sample = eval(line.strip()) + audio_paths.append(sample["audio"]) + + return audio_paths + + +def load_f0(f0_dir, nshards): + path_to_f0 = {} + for rank in tqdm(range(1, nshards + 1), desc=f"load f0"): + f0_shard_path = f"{f0_dir}/f0_{rank}_{nshards}.pt" + shard_path_to_f0 = torch.load(f0_shard_path) + path_to_f0.update(shard_path_to_f0) + return path_to_f0 diff --git a/fairseq/examples/textless_nlp/pgslm/eval/__init__.py b/fairseq/examples/textless_nlp/pgslm/eval/__init__.py new file mode 100644 index 0000000..0e028c2 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/eval/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/textless_nlp/pgslm/eval/cont_metrics.py b/fairseq/examples/textless_nlp/pgslm/eval/cont_metrics.py new file mode 100644 index 0000000..e98abad --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/eval/cont_metrics.py @@ -0,0 +1,730 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import numpy as np +import scipy + +import torch +import torch.multiprocessing as mp +from fairseq import checkpoint_utils, options +from fairseq.data.codedataset import CodeDataset, ExpressiveCodeDataConfig +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from torch.utils.data import DataLoader, DistributedSampler +from fairseq.utils import move_to_cuda +from fairseq import utils +from fairseq.criterions.speech_ulm_criterion import nll_loss, mae_loss + +import time +from types import SimpleNamespace + +import sys, pathlib + +sys.path.append(str(pathlib.Path(__file__).parent.parent.resolve())) + +from naive_decoder import Naive_F0_Decoder +from inference_dataset import InferenceDataset, explode_batch +from sample.sample import do_sampling, TemperatureDecoder, FilterNamesDataset + +try: + from nltk.translate.bleu_score import sentence_bleu +except ImportError: + print("Please install nltk: `pip install --user -U nltk`") + raise + + +@torch.no_grad() +def teacher_force_everything( + args, dataset, model, criterion, tgt_dict, rank, world_size +): + prefix = args.prefix_length + + f0_decoder = None + if args.dequantize_prosody: + assert dataset.discrete_f0 + print("Reporting MAE for a discrete model") + f0_decoder = Naive_F0_Decoder( + args.f0_discretization_bounds, dataset.config.f0_vq_n_units + ).cuda() + + dataset = InferenceDataset( + dataset, + prefix=args.prefix_length, + only_prefix=False, + filter_short=True, + presort_by_length=True, + ) + sampler = ( + None + if world_size == 1 + else DistributedSampler( + dataset, num_replicas=world_size, rank=rank, shuffle=False + ) + ) + dataloader = DataLoader( + dataset, + args.batch_size, + shuffle=False, + collate_fn=dataset.collater, + sampler=sampler, + ) + + total_token_loss, total_duration_loss, total_f0_loss, total_tokens = ( + 0.0, + 0.0, + 0.0, + 0.0, + ) + + i = 0 + for batch in dataloader: + i += 1 + batch = move_to_cuda(batch) + output = model(**batch["net_input"]) + + tokens, durations, f0 = output["token"], output["duration"], output["f0"] + durations, f0 = durations.squeeze(), f0.squeeze() + + token_loss = nll_loss( + tokens[:, prefix - 1 :], + batch["target"][:, prefix - 1 :].contiguous(), + batch["mask"][:, prefix - 1 :].contiguous(), + reduce=True, + ) + + if args.dequantize_prosody: + durations = durations.argmax(dim=-1) + duration_loss = mae_loss( + durations[:, prefix - 1 :].contiguous().float(), + batch["dur_target"][:, prefix - 1 :].contiguous().float(), + batch["dur_mask"][:, prefix - 1 :].contiguous(), + reduce=True, + ) + else: + duration_loss = criterion.dur_loss_fn( + durations[:, prefix - 1 :].contiguous(), + batch["dur_target"][:, prefix - 1 :].contiguous(), + batch["dur_mask"][:, prefix - 1 :].contiguous(), + reduce=True, + ) + + if f0_decoder: + f0 = f0.argmax(dim=-1) + f0 = f0_decoder(f0).squeeze(-1) + + f0_target = batch["raw_f0"] + f0_loss = mae_loss( + f0[:, prefix - 1 :].contiguous(), + f0_target[:, prefix - 1 :].contiguous(), + batch["f0_mask"][:, prefix - 1 :].contiguous(), + reduce=True, + ) + else: + f0_loss = criterion.f0_loss_fn( + f0[:, prefix - 1 :].contiguous(), + batch["f0_target"][:, prefix - 1 :].contiguous(), + batch["f0_mask"][:, prefix - 1 :].contiguous(), + reduce=True, + ) + + n_tokens = (~batch["dur_mask"])[:, prefix - 1 :].sum() + + total_token_loss += token_loss.item() + total_duration_loss += duration_loss.item() + total_f0_loss += f0_loss.item() + + total_tokens += n_tokens.item() + if args.debug and i > 5: + break + + values = torch.tensor([total_token_loss, total_duration_loss, total_f0_loss]) + normalizers = torch.tensor([total_tokens for _ in range(3)]) + + return values, normalizers + + +def get_bleu(produced_tokens, target_tokens, tgt_dict): + assert target_tokens.ndim == 1 + assert produced_tokens.size(1) == target_tokens.size(0) + + # we can have padding due to shifted channels + shift = 0 + for token in reversed(target_tokens.cpu().tolist()): + if token in [tgt_dict.pad(), tgt_dict.eos()]: + shift += 1 + else: + break + target_tokens = target_tokens[:-shift] + produced_tokens = produced_tokens[:, :-shift] + + string_target = tgt_dict.string(target_tokens).split() + string_candidates = [ + tgt_dict.string(produced_tokens[i, :]).split() + for i in range(produced_tokens.size(0)) + ] + + bleu3 = sentence_bleu( + references=string_candidates, + hypothesis=string_target, + weights=(1.0 / 3, 1.0 / 3, 1.0 / 3), + ) + return bleu3 + + +@torch.no_grad() +def continuation(args, dataset, model, criterion, tgt_dict, rank, world_size): + is_discrete_duration = dataset.discrete_dur + is_discrete_f0 = dataset.discrete_f0 + + f0_decoder = None + if args.dequantize_prosody: + assert dataset.discrete_f0 + print("Reporting MAE F0 for a discrete model") + f0_decoder = Naive_F0_Decoder( + args.f0_discretization_bounds, dataset.config.f0_vq_n_units + ).cuda() + + dataset = InferenceDataset( + dataset, args.prefix_length, filter_short=True, presort_by_length=True + ) + sampler = ( + None + if world_size == 1 + else DistributedSampler( + dataset, num_replicas=world_size, rank=rank, shuffle=False + ) + ) + dataloader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + collate_fn=dataset.collater, + sampler=sampler, + ) + + Ts = args.T_token, args.T_duration, args.T_f0 + decoder = TemperatureDecoder( + Ts, discrete_dur=is_discrete_duration, discrete_f0=is_discrete_f0 + ) + + running_stats = SimpleNamespace( + token_bleu=0.0, + duration_nll=0.0, + duration_mae=0.0, + f0_nll=0.0, + f0_mae=0.0, + n_tokens=0.0, + n_sentences=0.0, + f0_sum=0.0, + f0_sum_sq=0.0, + dur_sum=0.0, + dur_sum_sq=0.0, + ) + + for i, batch in enumerate(dataloader): + batch = explode_batch(batch, args.batch_explosion_rate) + bsz = batch["target"].size(0) + + batch = move_to_cuda(batch) + prefix = batch["prefix"][0] + + max_length_to_unroll = batch["target"].size(1) + prefix_length = batch["net_input"]["src_tokens"].size(1) + steps = max_length_to_unroll - prefix_length + 1 + + assert steps > 0 + produced_tokens, produced_durations, produced_f0, outputs = do_sampling( + model, + batch, + tgt_dict.eos(), + decoder, + autoregressive_steps=steps, + teacher_force_tokens=args.teacher_force_tokens, + teacher_force_duration=args.teacher_force_duration, + teacher_force_f0=args.teacher_force_f0, + ) + + if args.teacher_force_tokens: + assert (produced_tokens[:, 1:] == batch["target"]).all() + if args.teacher_force_duration: + assert (produced_durations[:, 1:] == batch["dur_target"]).all() + if args.teacher_force_f0: + assert (produced_f0[:, 1:] == batch["f0_target"]).all() + + dur_target = batch["dur_target"][:, prefix - 1 :].contiguous() + f0_target = batch["f0_target"][:, prefix - 1 :].contiguous() + + f0_mask = batch["f0_mask"][:, prefix - 1 :].contiguous() + dur_mask = batch["dur_mask"][:, prefix - 1 :].contiguous() + + duration_mae = mae_loss( + produced_durations[:, prefix:].float(), + dur_target.float(), + dur_mask, + reduce=False, + ) + min_duration_mae = duration_mae.view(bsz, -1).sum(dim=-1).min(dim=0)[0] + running_stats.duration_mae += min_duration_mae + + running_stats.dur_sum += ( + produced_durations[:, prefix:].float() * (~dur_mask) + ).sum() / args.batch_explosion_rate + running_stats.dur_sum_sq += ( + produced_durations[:, prefix:].float() * (~dur_mask) + ).pow(2.0).sum() / args.batch_explosion_rate + + if is_discrete_duration: + duration_loss = criterion.dur_loss_fn( + torch.stack([x[1] for x in outputs], dim=1), + dur_target, + dur_mask, + reduce=False, + ) + min_duration_loss = duration_loss.view(bsz, -1).sum(dim=-1).min(dim=0)[0] + running_stats.duration_nll += min_duration_loss + + if f0_decoder: # can only exist for discrete F0 models + decoded_produced_f0 = f0_decoder(produced_f0[:, prefix:]) + decoded_f0_target = batch["raw_f0"][:, prefix - 1 :].contiguous() + + if produced_f0.ndim == 3: + decoded_produced_f0 = decoded_produced_f0.squeeze(2) + decoded_f0_target = decoded_f0_target.squeeze(2) + + f0_mae = mae_loss( + decoded_produced_f0, decoded_f0_target, f0_mask, reduce=False + ) + f0_mae = f0_mae.view(bsz, -1).sum(dim=-1).min(dim=0)[0] + running_stats.f0_mae += f0_mae + + f0_loss = criterion.f0_loss_fn( + torch.stack([x[2] for x in outputs], dim=1), + f0_target.long(), + f0_mask, + reduce=False, + ) + f0_loss = f0_loss.view(bsz, -1).sum(dim=-1).min(dim=0)[0] + running_stats.f0_nll += f0_loss + + running_stats.f0_sum += ( + decoded_produced_f0 * (~f0_mask) + ).sum() / args.batch_explosion_rate + running_stats.f0_sum_sq += (decoded_produced_f0 * (~f0_mask)).pow( + 2.0 + ).sum() / args.batch_explosion_rate + + else: + assert not is_discrete_duration + + f0_loss = mae_loss( + produced_f0[:, prefix:], f0_target, f0_mask, reduce=False + ) + f0_loss = f0_loss.view(bsz, -1).sum(dim=-1).min(dim=0)[0] + running_stats.f0_mae += f0_loss + + running_stats.f0_sum += ( + produced_f0[:, prefix:].sum() / args.batch_explosion_rate + ) + running_stats.f0_sum_sq += ( + produced_f0[:, prefix:].pow(2.0).sum() / args.batch_explosion_rate + ) + + running_stats.n_tokens += (~dur_mask)[0, ...].sum() + + token_loss = get_bleu( + produced_tokens[:, prefix:], batch["target"][0, prefix - 1 :], tgt_dict + ) + running_stats.token_bleu += token_loss + running_stats.n_sentences += 1 + + if args.debug: + break + + values = torch.tensor( + [ + running_stats.token_bleu, + running_stats.duration_nll, + running_stats.duration_mae, + running_stats.f0_nll, + running_stats.f0_mae, + running_stats.f0_sum, + running_stats.f0_sum_sq, + running_stats.dur_sum, + running_stats.dur_sum_sq, + ] + ) + normalizers = torch.tensor( + [running_stats.n_sentences] + [running_stats.n_tokens] * 8 + ) + + return values, normalizers + + +@torch.no_grad() +def correlation(args, dataset, model, criterion, tgt_dict, rank, world_size): + is_discrete_duration = dataset.discrete_dur + is_discrete_f0 = dataset.discrete_f0 + + f0_decoder = None + if is_discrete_f0: + assert dataset.discrete_f0 + f0_decoder = Naive_F0_Decoder( + args.f0_discretization_bounds, dataset.config.f0_vq_n_units + ).cuda() + + if is_discrete_f0: + assert f0_decoder # correlation on tokens is meaningless + + dataset = InferenceDataset( + dataset, + args.prefix_length, + filter_short=True, + presort_by_length=True, + min_length=args.min_length, + ) + sampler = ( + None + if world_size == 1 + else DistributedSampler( + dataset, num_replicas=world_size, rank=rank, shuffle=False + ) + ) + dataloader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + collate_fn=dataset.collater, + sampler=sampler, + ) + + Ts = args.T_token, args.T_duration, args.T_f0 + decoder = TemperatureDecoder( + Ts, discrete_dur=is_discrete_duration, discrete_f0=is_discrete_f0 + ) + + mean_dur_prefix, mean_dur_cont = [], [] + mean_f0_prefix, mean_f0_cont = [], [] + + for batch in dataloader: + batch = explode_batch(batch, args.batch_explosion_rate) + batch = move_to_cuda(batch) + + assert len(batch["prefix"]) == 1 + + if args.teacher_force_tokens: + autoregressive_steps = batch["target"].size(1) - args.prefix_length - 1 + else: + autoregressive_steps = args.max_length - args.prefix_length # + max_shift? + + if args.copy_target: + produced_durations, produced_f0 = batch["dur_target"], batch["f0_target"] + else: + _, produced_durations, produced_f0, outputs = do_sampling( + model, + batch, + tgt_dict.eos(), + decoder, + autoregressive_steps=autoregressive_steps, + teacher_force_tokens=args.teacher_force_tokens, + teacher_force_duration=args.teacher_force_duration, + teacher_force_f0=args.teacher_force_f0, + ) + + # first tokens actually correspond to BOS + produced_durations = produced_durations[:, 1:] + produced_f0 = produced_f0[:, 1:] + + dur_target = batch["dur_target"] + if is_discrete_duration: + produced_durations = produced_durations.float() + dur_target = dur_target.float() + + if is_discrete_f0: + produced_f0 = f0_decoder(produced_f0).squeeze(-1) + f0_target = batch["raw_f0"] + else: + f0_target = batch["f0_target"] + + # prefix values + prefix = batch["prefix"][0] + dur_prefix_mean = dur_target[:, :prefix].sum(dim=-1) / ( + (~batch["dur_mask"][:, :prefix]).sum(dim=-1) + ) + + non_voiced = f0_target[:, :prefix] == 0.0 + f0_mask = batch["f0_mask"][:, :prefix].logical_or(non_voiced) + f0_prefix_mean = f0_target[:, :prefix].sum(dim=-1) / ((~f0_mask).sum(dim=-1)) + + # continuation values + dur_cont_mean = produced_durations[:, prefix:].sum(dim=-1) / ( + (~batch["dur_mask"][:, prefix:]).sum(dim=-1) + ) + + non_voiced = produced_f0[:, prefix:] == 0.0 + f0_mask = non_voiced + f0_cont_mean = produced_f0[:, prefix:].sum(dim=-1) / ((~f0_mask).sum(dim=-1)) + + assert not f0_cont_mean.isnan().any() + + mean_dur_prefix.append(dur_prefix_mean.cpu()) + mean_dur_cont.append(dur_cont_mean.cpu()) + + mean_f0_prefix.append(f0_prefix_mean.cpu()) + mean_f0_cont.append(f0_cont_mean.cpu()) + + if args.debug and len(mean_dur_prefix) > 10: + break + + mean_dur_prefix, mean_dur_cont = torch.cat(mean_dur_prefix), torch.cat( + mean_dur_cont + ) + mean_f0_prefix, mean_f0_cont = torch.cat(mean_f0_prefix), torch.cat(mean_f0_cont) + + return mean_dur_prefix, mean_dur_cont, mean_f0_prefix, mean_f0_cont + + +def main(rank, world_size, args): + start = time.time() + + if world_size > 1: + torch.distributed.init_process_group( + backend="gloo", init_method="env://", world_size=world_size, rank=rank + ) + torch.cuda.set_device(rank % torch.cuda.device_count()) + + raw_args = args + + args = convert_namespace_to_omegaconf(args) + if args.common.seed is not None: + np.random.seed(args.common.seed) + utils.set_torch_seed(args.common.seed) + + models, model_args, task = checkpoint_utils.load_model_ensemble_and_task( + [raw_args.path], arg_overrides={"data": args.task.data} + ) + + tgt_dict = task.target_dictionary + + for model in models: + model.prepare_for_inference_(args) + model.cuda().eval() + if raw_args.fp16: + model = model.half() + model = models[0] + + config = ExpressiveCodeDataConfig(args.task.data) + + dataset = CodeDataset( + manifest=config.manifests[raw_args.eval_subset], + dictionary=task.source_dictionary, + dur_dictionary=task.source_duration_dictionary, + f0_dictionary=task.source_f0_dictionary, + config=config, + discrete_dur=task.cfg.discrete_duration, + discrete_f0=task.cfg.discrete_f0, + log_f0=task.cfg.log_f0, + normalize_f0_mean=task.cfg.normalize_f0_mean, + normalize_f0_std=task.cfg.normalize_f0_std, + interpolate_f0=task.cfg.interpolate_f0, + shifts=task.cfg.stream_shifts, + return_filename=True, + strip_filename=False, + return_continuous_f0=raw_args.dequantize_prosody, + ) + + if raw_args.filter_names: + dataset = FilterNamesDataset(dataset, raw_args.filter_names) + + criterion = task.build_criterion(model_args.criterion) + + name2metric = { + "continuation": continuation, + "teacher_force_everything": teacher_force_everything, + "correlation": correlation, + } + + name2keys = { + "continuation": ( + "Token BLEU3", + "Duration NLL", + "Duration MAE", + "F0 NLL", + "F0 MAE", + "F0 sum", + "F0 sum_sq", + "Dur sum", + "Dur sum_sq", + ), + "teacher_force_everything": ("token_loss", "duration_loss", "f0_loss"), + "correlation": ("Duration corr", "F0 corr"), + } + metric_name = raw_args.metric + + metric = name2metric[metric_name] + results = metric(raw_args, dataset, model, criterion, tgt_dict, rank, world_size) + + values = None + + if metric_name not in [ + "correlation", + ]: + values, normalizers = results + values = maybe_aggregate_normalize(values, normalizers, world_size) + elif metric_name == "correlation": + values = maybe_aggregate_correlations(results, world_size) + else: + assert False + + assert values is not None + summary = dict(zip(name2keys[raw_args.metric], values.tolist())) + if metric_name == "continuation": + summary["F0 Std"] = np.sqrt(-summary["F0 sum"] ** 2 + summary["F0 sum_sq"]) + summary["Dur Std"] = np.sqrt(-summary["Dur sum"] ** 2 + summary["Dur sum_sq"]) + del summary["F0 sum"] + del summary["F0 sum_sq"] + del summary["Dur sum"] + del summary["Dur sum_sq"] + + summary["metric"] = metric_name + + if rank == 0: + print(summary) + if raw_args.wandb: + wandb_results(summary, raw_args) + print("# finished in ", time.time() - start, "seconds") + + +def wandb_results(summary, raw_args): + import wandb + + run = wandb.init( + project=raw_args.wandb_project_name, tags=raw_args.wandb_tags.split(",") + ) + run.config.metric = raw_args.metric + run.config.model = raw_args.path + run.config.data = raw_args.data + + if raw_args.wandb_run_name: + run.name = raw_args.wandb_run_name + run.save() + + wandb.log(summary) + wandb.finish() + + +def maybe_aggregate_normalize(values, normalizers, world_size): + if world_size > 1: + torch.distributed.barrier() + + torch.distributed.all_reduce_multigpu([values]) + torch.distributed.all_reduce_multigpu([normalizers]) + + return values / normalizers + + +def maybe_aggregate_correlations(results, world_size): + if world_size > 1: + output = [None for _ in range(world_size)] + torch.distributed.all_gather_object(output, results) + mean_dur_prefix, mean_dur_cont, mean_f0_prefix, mean_f0_cont = [ + torch.cat([x[i] for x in output]) for i in range(4) + ] + else: + mean_dur_prefix, mean_dur_cont, mean_f0_prefix, mean_f0_cont = results + + corr_dur = scipy.stats.pearsonr(mean_dur_prefix.numpy(), mean_dur_cont.numpy())[0] + corr_f0 = scipy.stats.pearsonr(mean_f0_prefix.numpy(), mean_f0_cont.numpy())[0] + values = torch.tensor([corr_dur, corr_f0]) + + return values + + +def cli_main(): + parser = options.get_interactive_generation_parser() + parser.add_argument( + "--prefix-length", + type=int, + default=1, + help="Prompt prefix length (including <s>)", + ) + parser.add_argument( + "--duration-scale", + type=float, + default=1, + help="Multiply durations by the given scaler", + ) + parser.add_argument( + "--debug", action="store_true", help="Process only the first batch" + ) + parser.add_argument("--n_hypotheses", type=int, default=1) + parser.add_argument("--filter-names", type=str, default=None) + parser.add_argument( + "--max-length", type=int, default=200, help="Maximal produced length" + ) + + parser.add_argument("--teacher-force-tokens", action="store_true", default=False) + parser.add_argument("--teacher-force-duration", action="store_true", default=False) + parser.add_argument("--teacher-force-f0", action="store_true", default=False) + + parser.add_argument("--copy-target", action="store_true", default=False) + parser.add_argument("--min-length", type=int, default=None) + parser.add_argument("--f0-discretization-bounds", type=str, default=None) + parser.add_argument("--dequantize-prosody", action="store_true") + parser.add_argument("--batch-explosion-rate", type=int, default=1) + + parser.add_argument( + "--metric", + choices=["continuation", "teacher_force_everything", "correlation"], + required=True, + ) + + parser.add_argument("--wandb", action="store_true") + parser.add_argument("--wandb-project-name", type=str, default="eslm") + parser.add_argument("--wandb-tags", type=str, default="") + parser.add_argument("--wandb-run-name", type=str, default="") + + parser.add_argument("--T-token", type=float, default=1.0) + parser.add_argument("--T-duration", type=float, default=1.0) + parser.add_argument("--T-f0", type=float, default=1.0) + + parser.add_argument("--n-workers", type=int, default=1) + + parser.add_argument( + "--eval-subset", type=str, default="valid", choices=["valid", "test"] + ) + + args = options.parse_args_and_arch(parser) + + assert ( + args.prefix_length >= 1 + ), "Prefix length includes bos token <s>, hence the minimum is 1." + assert args.temperature >= 0.0, "T must be non-negative!" + + if args.dequantize_prosody: + assert args.f0_discretization_bounds + + world_size = args.n_workers or torch.cuda.device_count() + if world_size > 1: + import random + + mp.set_start_method("spawn", force=True) + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000)) + + mp.spawn( + main, + nprocs=world_size, + args=( + world_size, + args, + ), + join=True, + ) + else: + main(rank=0, world_size=world_size, args=args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/textless_nlp/pgslm/generate_waveform.py b/fairseq/examples/textless_nlp/pgslm/generate_waveform.py new file mode 100644 index 0000000..a6f348b --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/generate_waveform.py @@ -0,0 +1,120 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import argparse +import json +import logging +from pathlib import Path +import soundfile as sf +import torch + +from tqdm import tqdm + +from fairseq import utils +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder + + +logging.basicConfig() +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def dump_result(args, data, sample_id, pred_wav): + assert "audio" in data or args.results_path is not None + if args.results_path: + fname = Path(data["audio"]).name if "audio" in data else f"{sample_id}_pred.wav" + out_file = Path(args.results_path) / fname + + sf.write( + out_file.as_posix(), + pred_wav.detach().cpu().numpy(), + args.sample_rate, + ) + + +def load_data(in_file): + with open(in_file) as f: + data = [ast.literal_eval(line.strip()) for line in f] + + return data + + +def get_f0_upsample_ratio(code_hop_size, f_hop_size): + ratio = (code_hop_size // 160) // (f_hop_size // 256) * 2 + return ratio + + +def main(args): + logger.info(args) + + use_cuda = torch.cuda.is_available() and not args.cpu + + with open(args.vocoder_cfg) as f: + vocoder_cfg = json.load(f) + vocoder = CodeHiFiGANVocoder(args.vocoder, vocoder_cfg) + if use_cuda: + vocoder = vocoder.cuda() + + data = load_data(args.in_file) + + if args.results_path: + Path(args.results_path).mkdir(exist_ok=True, parents=True) + + for i, d in tqdm(enumerate(data), total=len(data)): + code_key = "cpc_km100" if "cpc_km100" in d else "hubert" + code = list(map(int, d[code_key].split())) + + x = { + "code": torch.LongTensor(code).view(1, -1), + "f0": torch.Tensor(d["f0"]).view(1, -1), + } + + f0_up_ratio = get_f0_upsample_ratio( + vocoder_cfg["code_hop_size"], vocoder_cfg["hop_size"] + ) + if f0_up_ratio > 1: + bsz, cond_length = x["f0"].size() + x["f0"] = x["f0"].unsqueeze(2).repeat(1, 1, f0_up_ratio).view(bsz, -1) + + x = utils.move_to_cuda(x) if use_cuda else x + wav = vocoder(x) + dump_result(args, d, i, wav) + + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-file", + type=str, + required=True, + help="Input file following the same format of the output from sample.py ('f0' and 'cpc_km100/hubert' are required fields)", + ) + parser.add_argument( + "--vocoder", type=str, required=True, help="path to the vocoder" + ) + parser.add_argument( + "--vocoder-cfg", + type=str, + required=True, + help="path to the vocoder config", + ) + parser.add_argument("--sample-rate", type=int, default=16_000) + parser.add_argument( + "--results-path", + type=str, + default=None, + help="Output directory. If not set, the audios will be stored following the 'audio' field specified in the input file.", + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + + args = parser.parse_args() + + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/textless_nlp/pgslm/inference_dataset.py b/fairseq/examples/textless_nlp/pgslm/inference_dataset.py new file mode 100644 index 0000000..9f7cfa5 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/inference_dataset.py @@ -0,0 +1,103 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import torch + + +class InferenceDataset: + def __init__( + self, + dataset, + prefix, + only_prefix=True, + presort_by_length=True, + filter_short=False, + min_length=None, + ): + self.dataset = dataset + self.collater = self.dataset.collater + self.prefix = prefix + self.only_prefix = only_prefix + self.filter_short = filter_short + + self.remapping = list(range(len(self.dataset))) + if min_length: + assert min_length >= prefix + 1 + + length_thr = prefix + 1 if not min_length else min_length + + if filter_short: + self.remapping = list( + filter( + lambda i: self.dataset[i]["dur_source"].sum() > length_thr, + self.remapping, + ) + ) + print( + f"# the initial dataset of {len(self.dataset)} examples became {len(self.remapping)} after filtering" + f" examples shorter than {length_thr} (in duration units)" + ) + + if presort_by_length: + lengths = {index: dataset.size(index) for index in self.remapping} + self.remapping.sort(key=lambda i: lengths[i]) + + @property + def pads(self): + return self.dataset.pads + + def __len__(self): + return len(self.remapping) + + def original_size(self, k): + k = self.remapping[k] + return self.dataset.size(k) + + def __getitem__(self, k): + k = self.remapping[k] + channels = self.dataset[k] + + if self.prefix and self.only_prefix: + dur_channel = channels["dur_source"] + assert dur_channel.sum() >= self.prefix + + token_times = dur_channel.cumsum(dim=-1) + cut_after = torch.searchsorted(token_times, torch.tensor(self.prefix)) + + r = {} + for channel_name, value in channels.items(): + if isinstance(value, torch.Tensor) and "source" in channel_name: + # if self.filter_short: assert value.size(0) >= self.prefix + r[channel_name] = value[: cut_after + 1] + else: + r[channel_name] = value + + r["prefix"] = cut_after + 1 + else: + r = channels + + return r + + +def explode_batch(batch, times): + if times == 1: + return batch + + new_batch = {} + + for key, value in batch.items(): + if isinstance(value, torch.Tensor): + assert value.size(0) == 1 + new_batch[key] = torch.cat([value] * times) + elif key in ["ntokens", "nsentences"]: + new_batch[key] = value * times + elif key in ["prefix", "filename"]: + new_batch[key] = value + elif key == "net_input": + new_batch[key] = explode_batch(value, times) + else: + assert False, key + return new_batch diff --git a/fairseq/examples/textless_nlp/pgslm/naive_decoder.py b/fairseq/examples/textless_nlp/pgslm/naive_decoder.py new file mode 100644 index 0000000..5132889 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/naive_decoder.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import warnings + + +class Naive_F0_Decoder(torch.nn.Module): + def __init__(self, bounds_path, n_units=32): + super().__init__() + + bounds = torch.load(bounds_path) + bounds = torch.from_numpy(bounds[n_units]) + assert bounds.ndim == 1 + + pad = torch.tensor([-5.0, -5.0]) # bos, eos, pad are in the dictionary + centers = torch.cat( + [bounds[0:1], 0.5 * (bounds[1:] + bounds[:-1]), bounds[-1:], pad[:]] + ) + + self.embedding = torch.nn.Embedding.from_pretrained( + centers.unsqueeze(-1), freeze=True + ) + self.max_n = self.embedding.weight.numel() + + def forward(self, discrete_f0: torch.Tensor): + in_bounds = (0 <= discrete_f0).all() and (discrete_f0 < self.max_n).all() + if not in_bounds: + warnings.warn( + f"F0 contains some weird outputs: discrete_f0.max().item()={discrete_f0.max().item()} discrete_f0.min().item()={discrete_f0.min().item()}; " + f"while we have embeddings for {self.max_n} values. " + "Assuming this is a no-prosody model -- but be careful!" + ) + + mask = discrete_f0 >= self.max_n + discrete_f0 = discrete_f0.masked_fill(mask, self.max_n - 1) + + return self.embedding(discrete_f0).squeeze(-1) diff --git a/fairseq/examples/textless_nlp/pgslm/prepare_dataset.py b/fairseq/examples/textless_nlp/pgslm/prepare_dataset.py new file mode 100644 index 0000000..3d5edaa --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/prepare_dataset.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from multiprocessing import Pool + +import os +from collections import defaultdict +from itertools import starmap + +import torch +from npy_append_array import NpyAppendArray +from tqdm import tqdm + +from data_utils import dump_speaker_f0_stat, F0Stat, load_f0 +from fairseq.data.codedataset import ( + ExpressiveCodeDataConfig, + parse_manifest, + F0_FRAME_SPACE, + align_f0_to_durations, +) +from fairseq.tasks.speech_ulm_task import UnitDictionary + + +def load_meta(meta_path, split): + config = ExpressiveCodeDataConfig(meta_path) + manifest_path = config.manifests[split] + dictionary = UnitDictionary(n_units=config.n_units) + audio_paths, codes, durs, speakers = parse_manifest(manifest_path, dictionary) + return config, audio_paths, codes, durs, speakers + + +def _align_f0(f0, dur, ratio, frm_tol=5): + if f0 is None: + seg_f0 = torch.zeros_like(dur, dtype=torch.float) + else: + seg_f0 = align_f0_to_durations(f0, dur, ratio, tol=frm_tol * ratio) + return seg_f0.numpy() # try a hacky stuff + + +def align_f0(path_to_f0, audio_paths, durs, ratio, mp=False): + chunk_size = 2000 + num_procs = 40 + iterable = ((path_to_f0[p], d, ratio) for p, d in zip(audio_paths, durs)) + + seg_f0s = [] + if mp: + with Pool(num_procs) as pool: + iterator = tqdm( + pool.istarmap(_align_f0, iterable, chunk_size), + desc="align f0", + total=len(durs), + ) + for seg_f0 in iterator: + seg_f0s.append(torch.from_numpy(seg_f0).float()) + else: + iterator = tqdm(starmap(_align_f0, iterable), desc="align f0", total=len(durs)) + for seg_f0 in iterator: + seg_f0s.append(torch.from_numpy(seg_f0).float()) + + return seg_f0s + + +def prepare_seg_data(config, audio_paths, codes, durs, speakers, path_to_f0): + ratio = config.code_hop_size / (config.sampling_rate * F0_FRAME_SPACE) + seg_f0s = align_f0(path_to_f0, audio_paths, durs, ratio) + data = { + "codes": codes, + "duration": durs, + "f0": seg_f0s, + "speaker": speakers, + "path": audio_paths, + } + return data + + +def dump_seg_data(data, out_prefix): + key_targs = { + "codes": f"{out_prefix}.code.npy", + "duration": f"{out_prefix}.dur.npy", + "f0": f"{out_prefix}.f0.npy", + } + for key, targ in key_targs.items(): + assert not os.path.exists(targ) + npaa = NpyAppendArray(targ) + for utt_data in tqdm(data[key], desc=f"dumping {key}"): + npaa.append(utt_data.numpy()) + + assert not os.path.exists(f"{out_prefix}.path.txt") + with open(f"{out_prefix}.path.txt", "w") as f: + for x in data["path"]: + f.write(f"{str(x)}\n") + + assert not os.path.exists(f"{out_prefix}.leng.txt") + with open(f"{out_prefix}.leng.txt", "w") as f: + for x in data["codes"]: + f.write(f"{len(x)}\n") + + assert not os.path.exists(f"{out_prefix}.speaker.txt") + with open(f"{out_prefix}.speaker.txt", "w") as f: + for x in data["speaker"]: + f.write(f"{str(x)}\n") + + print(f"wrote to files with prefix {out_prefix}") + + +def main(meta_path, f0_dir, splits, nshards_list): + speaker_to_stat = defaultdict(F0Stat) + if len(nshards_list) == 1: + nshards_list = nshards_list * len(splits) + else: + assert len(nshards_list) == len(splits) + + for split, nshards in zip(splits, nshards_list): + config, audio_paths, codes, durs, speakers = load_meta(meta_path, split) + path_to_f0 = load_f0(f"{f0_dir}/{split}", nshards) + + # segment-level data + data = prepare_seg_data(config, audio_paths, codes, durs, speakers, path_to_f0) + dump_seg_data(data, config.manifests[split]) + + # speaker f0 + for audio_path, speaker in tqdm(zip(audio_paths, speakers)): + f0 = path_to_f0[audio_path] + speaker_to_stat[speaker].update(f0) + dump_speaker_f0_stat(speaker_to_stat, config.manifests[split]) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("meta_path") + parser.add_argument("f0_dir", help="out_dir from preprocess_f0") + parser.add_argument("--splits", nargs="+", default=["train", "valid"]) + parser.add_argument( + "--nshards_list", type=int, nargs="+", default=[20], help="number of f0 shards" + ) + args = parser.parse_args() + print(args) + + main(**vars(args)) diff --git a/fairseq/examples/textless_nlp/pgslm/preprocess_f0.py b/fairseq/examples/textless_nlp/pgslm/preprocess_f0.py new file mode 100644 index 0000000..afe899c --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/preprocess_f0.py @@ -0,0 +1,65 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import torch +from tqdm import tqdm +from data_utils import load_audio_path +from fairseq.data.codedataset import get_f0_by_filename + + +def process_one(path, sr): + """ + Args: + path: audio file path + sr: sampling rate + """ + try: + # YAAPT throws errors in some rare cases + f0 = get_f0_by_filename(path, sr) + except Exception as e: + print( + f"WARNING: error when processing {path}. set f0 to zero. original error message:\n{e}" + ) + f0 = None + return f0 + + +def main(file_path, out_dir, nshards, rank, sampling_rate): + # load data + audio_paths = load_audio_path(file_path) + + # shard + assert nshards <= len(audio_paths) and nshards > 0 + shard_size = len(audio_paths) / nshards + s = int(round((rank - 1) * shard_size)) + e = int(round(rank * shard_size)) + audio_paths = audio_paths[s:e] + + # process + path_to_f0 = {} + for i, audio_path in enumerate(tqdm(audio_paths)): + f0 = process_one(audio_path, sampling_rate) + path_to_f0[audio_path] = f0 + print(f"finished processing {len(path_to_f0)} utterances ({s}-{e})") + + f0_path = f"{out_dir}/f0_{rank}_{nshards}.pt" + os.makedirs(out_dir, exist_ok=True) + torch.save(path_to_f0, f0_path) + print(f"saved to {f0_path}") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("file_path") + parser.add_argument("out_dir") + parser.add_argument("--nshards", type=int, default=20) + parser.add_argument("--rank", type=int, default=1) + parser.add_argument("--sampling_rate", type=int, default=16000) + args = parser.parse_args() + + main(**vars(args)) diff --git a/fairseq/examples/textless_nlp/pgslm/quantize_f0.py b/fairseq/examples/textless_nlp/pgslm/quantize_f0.py new file mode 100644 index 0000000..d9e3df2 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/quantize_f0.py @@ -0,0 +1,94 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import defaultdict +from functools import partial + +import numpy as np +import torch +from tqdm import tqdm + +from data_utils import dump_speaker_f0_stat, F0Stat, load_audio_path, load_f0 + + +def load_speaker(path): + speakers = [] + with open(path) as f: + for line in f.readlines(): + sample = eval(line.strip()) + assert "speaker" in sample + speakers.append(sample["speaker"]) + return speakers + + +def quantize_f0(speaker_to_f0, f0_stats, nbins, normalize, log): + f0_all = [] + for speaker, f0 in speaker_to_f0.items(): + f0 = f0.raw_data + if log: + f0 = f0.log() + mean = f0_stats[speaker]["logf0_mean"] if log else f0_stats[speaker]["f0_mean"] + std = f0_stats[speaker]["logf0_std"] if log else f0_stats[speaker]["f0_std"] + if normalize == "mean": + f0 = f0 - mean + elif normalize == "meanstd": + f0 = (f0 - mean) / std + f0_all.extend(f0.tolist()) + + hist, bin_x = np.histogram(f0_all, 100000) + cum_hist = np.cumsum(hist) / len(f0_all) * 100 + + f0_bin = {} + for num_bin in nbins: + bin_offset = [] + bin_size = 100 / num_bin + threshold = bin_size + for i in range(num_bin - 1): + index = (np.abs(cum_hist - threshold)).argmin() + bin_offset.append(bin_x[index]) + threshold += bin_size + f0_bin[num_bin] = np.array(bin_offset) + + return f0_bin + + +def main(file_path, f0_dir, out_dir, out_prefix, nbins, nshards, normalize, log): + audio_paths = load_audio_path(file_path) + path_to_f0 = load_f0(f0_dir, nshards) + + speakers = load_speaker(file_path) + speaker_to_f0 = defaultdict(partial(F0Stat, True)) + + # speaker f0 stats + for audio_path, speaker in tqdm(zip(audio_paths, speakers)): + f0 = path_to_f0[audio_path] + speaker_to_f0[speaker].update(f0) + f0_stats = dump_speaker_f0_stat(speaker_to_f0, f"{out_dir}/{out_prefix}") + + # quantize + f0_bin = quantize_f0(speaker_to_f0, f0_stats, nbins, normalize, log) + log_suffix = "_log" if log else "" + f0_bin_out_file = f"{out_dir}/{out_prefix}_{normalize}_norm{log_suffix}_f0_bin.th" + torch.save(f0_bin, f0_bin_out_file) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("file_path") + parser.add_argument("f0_dir", help="out_dir from preprocess_f0") + parser.add_argument("out_dir") + parser.add_argument("out_prefix") + parser.add_argument("--nbins", nargs="+", type=int, default=[32]) + parser.add_argument("--nshards", type=int, default=20, help="number of f0 shards") + parser.add_argument( + "--normalize", type=str, choices=["meanstd", "mean", "none"], default="mean" + ) + parser.add_argument("--log", action="store_true") + args = parser.parse_args() + print(args) + + main(**vars(args)) diff --git a/fairseq/examples/textless_nlp/pgslm/sample/__init__.py b/fairseq/examples/textless_nlp/pgslm/sample/__init__.py new file mode 100644 index 0000000..0e028c2 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/sample/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/examples/textless_nlp/pgslm/sample/sample.py b/fairseq/examples/textless_nlp/pgslm/sample/sample.py new file mode 100644 index 0000000..55ec7a9 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/sample/sample.py @@ -0,0 +1,612 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import torch.multiprocessing as mp +import numpy as np +import json + +import torch +from torch.distributions.categorical import Categorical + +from fairseq import checkpoint_utils, options, utils +from fairseq.data.codedataset import CodeDataset, ExpressiveCodeDataConfig +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from torch.utils.data import DataLoader, DistributedSampler +from fairseq.utils import move_to_cuda + +import tqdm +import random +import pathlib + +import sys, pathlib + +sys.path.append(str(pathlib.Path(__file__).parent.parent)) +from inference_dataset import InferenceDataset, explode_batch +from naive_decoder import Naive_F0_Decoder +from truncated_laplace import truncated_laplace + +CODETYPE_TO_FRAMETIME = {"cpc_km100": 0.01, "hubert": 0.02} # 10ms # 20ms + + +class TemperatureDecoder: + def __init__(self, Ts, discrete_dur=False, discrete_f0=False): + self.T_token, self.T_dur, self.T_f0 = Ts + self.discrete_dur = discrete_dur + self.discrete_f0 = discrete_f0 + + def __call__(self, output): + def sample_multinomial(key, T): + logits = output[key][:, -1, :].float() + return Categorical(logits=logits / T).sample().unsqueeze(-1) + + def sample_laplace(key, T, truncate_at_zero): + mean = output[key][:, -1, :].float() + return truncated_laplace(mean=mean, T=T, truncate_by_zero=truncate_at_zero) + + if self.T_token > 0: + new_tokens = sample_multinomial("token", self.T_token) + else: + new_tokens = output["token"][:, -1, :].argmax(dim=-1, keepdim=True) + + if not self.discrete_dur and self.T_dur == 0: + new_durations = output["duration"][:, -1].round().int() + elif not self.discrete_dur and self.T_dur > 0: + new_durations = ( + sample_laplace("duration", self.T_dur, truncate_at_zero=True) + .round() + .int() + ) + elif self.discrete_dur and self.T_dur > 0: + new_durations = sample_multinomial("duration", self.T_dur) + elif self.discrete_dur and self.T_dur == 0: + new_durations = output["duration"][:, -1, :].argmax(dim=-1, keepdim=True) + else: + assert False + + if not self.discrete_f0 and self.T_f0 == 0: + new_f0 = output["f0"][:, -1] + elif not self.discrete_f0 and self.T_f0 > 0: + new_f0 = sample_laplace("f0", self.T_f0, truncate_at_zero=False) + elif self.discrete_f0 and self.T_f0 > 0: + new_f0 = sample_multinomial("f0", self.T_f0) + elif self.discrete_f0 and self.T_f0 == 0: + new_f0 = output["f0"][:, -1, :].argmax(dim=-1, keepdim=True) + else: + assert False + + return new_tokens, new_durations, new_f0 + + +class FilterNamesDataset: + def __init__(self, dataset, fnames_path): + self.dataset = dataset + + with open(fnames_path, "r") as fin: + fnames = set((eval(line)["audio"] for line in fin)) + print(f"# will retrict the dataset for {len(fnames)} files") + + self.indexes = [] + + for i, datapoint in enumerate(dataset): + if datapoint["filename"] in fnames: + self.indexes.append(i) + assert len(self.indexes) == len(fnames), f"{len(self.indexes)} {len(fnames)}" + + self.collater = self.dataset.collater + self.discrete_dur = self.dataset.discrete_dur + self.discrete_f0 = self.dataset.discrete_f0 + + def __len__(self): + return len(self.indexes) + + def __getitem__(self, k): + k = self.indexes[k] + return self.dataset[k] + + def size(self, k): + k = self.indexes[k] + return self.dataset.size(k) + + +@torch.no_grad() +def do_sampling( + model, + batch, + eos_token, + decoder, + autoregressive_steps=100, + teacher_force_tokens=False, + teacher_force_duration=False, + teacher_force_f0=False, + match_duration=False, +): + def autoregressive_step_(output, autoregressive_steps): + new_tokens, new_durations, new_f0 = decoder(output) + + n = output["token"].size(1) if output["token"].ndim == 3 else 1 + + if teacher_force_tokens: + new_tokens = batch["target"][:, n - 1].unsqueeze(-1) + if teacher_force_duration: + new_durations = batch["dur_target"][:, n - 1].unsqueeze(-1) + if teacher_force_f0: + new_f0 = batch["f0_target"][:, n - 1].unsqueeze(-1) + + batch["net_input"]["src_tokens"] = torch.cat( + [batch["net_input"]["src_tokens"], new_tokens], dim=1 + ) + batch["net_input"]["dur_src"] = torch.cat( + [batch["net_input"]["dur_src"], new_durations], dim=1 + ) + batch["net_input"]["f0_src"] = torch.cat( + [batch["net_input"]["f0_src"], new_f0], dim=1 + ) + + outputs = [] + + if teacher_force_tokens or teacher_force_duration or teacher_force_f0: + max_time = batch["target"].size(1) + prefix_time = batch["net_input"]["src_tokens"].size(1) + + autoregressive_steps = max_time - prefix_time + 1 # should be 0 + + for _ in range(autoregressive_steps): + output = model(**batch["net_input"]) + + last_steps = ( + output["token"][:, -1, ...], + output["duration"][:, -1, ...], + output["f0"][:, -1, ...], + ) + outputs.append(last_steps) + + autoregressive_step_(output, autoregressive_steps) + tokens, duration, f0 = ( + batch["net_input"]["src_tokens"], + batch["net_input"]["dur_src"], + batch["net_input"]["f0_src"], + ) + + if ( + match_duration + and (batch["dur_target"].sum(dim=-1) < duration.sum(dim=-1)).all() + ): + break + + return tokens, duration, f0, outputs + + +def unroll_duration(token_stream, duration_stream): + assert len(token_stream) == len( + duration_stream + ), f"{len(token_stream)} != {len(duration_stream)}" + non_positive_durations = sum(d <= 0 for d in duration_stream) + if non_positive_durations > 0: + print( + f"# {non_positive_durations} durations are non-positive, they will be capped to 1" + ) + + result = [] + + duration_stream_rounded_capped = [max(1, int(round(x))) for x in duration_stream] + for t, d in zip(token_stream, duration_stream_rounded_capped): + result.extend([t] * d) + + return result + + +def realign_shifted_streams(tokens, durations, F0s, shifts): + """ + Durations are shifted by 1, F0 by 2 + >>> tokens = ["<s>", "t1", "t2", "t3", "</s>", "x", "x"] + >>> durations = ["<0>", "<0>", "d1", "d2", "d3", "<0>", "x"] + >>> F0s = ["<0>", "<0>", "<0>", "f1", "f2", "f3", "<0>"] + >>> shifts = [1,2] + >>> realign_shifted_streams(tokens, durations, F0s, shifts) + (['<s>', 't1', 't2', 't3', '</s>'], ['<0>', 'd1', 'd2', 'd3', '<0>'], ['<0>', 'f1', 'f2', 'f3', '<0>']) + """ + max_shift = max(shifts) + if max_shift > 0: + shift_durations, shift_F0s = shifts + + tokens = tokens[:-max_shift] + durations = durations[shift_durations:] + if shift_durations < max_shift: + durations = durations[: -(max_shift - shift_durations)] + + if F0s is not None: + F0s = F0s[shift_F0s:] + if shift_F0s < max_shift: + F0s = F0s[: -(max_shift - shift_F0s)] + + assert len(tokens) == len(durations), f"{len(tokens)} =! {len(durations)}" + if F0s is not None: + assert len(tokens) == len(F0s), f"{len(tokens)} =! {len(F0s)}" + + return tokens, durations, F0s + + +def maybe_cut_eos(produced_tokens, produced_duration, produced_f0, eos_idx): + if eos_idx in produced_tokens: + eos_index = produced_tokens.index(eos_idx) + produced_tokens = produced_tokens[:eos_index] + produced_duration = produced_duration[:eos_index] + produced_f0 = produced_f0[:eos_index] + return produced_tokens, produced_duration, produced_f0 + + +def maybe_filter_pad(produced_tokens, produced_duration, produced_f0, pad_idx): + if pad_idx not in produced_tokens: + return produced_tokens, produced_duration, produced_f0 + + assert len(produced_tokens) == len(produced_duration) == len(produced_f0) + + print("<pad> is detected in the output!") + filtered_tokens, filtered_duration, filtered_f0 = [], [], [] + + for t, d, f in zip(produced_tokens, produced_duration, produced_f0): + if t != pad_idx: + filtered_tokens.append(t) + filtered_duration.append(d) + filtered_f0.append(f) + return filtered_tokens, filtered_duration, filtered_f0 + + +def match_duration(produced_tokens, produced_duration, produced_f0, target_duration): + """ + >>> tokens = ['t'] * 4 + >>> F0s = ['f0'] * 4 + >>> produced_duration = [1, 10, 10, 10] + >>> match_duration(tokens, produced_duration, F0s, target_duration=100) + (['t', 't', 't', 't'], [1, 10, 10, 10], ['f0', 'f0', 'f0', 'f0']) + >>> match_duration(tokens, produced_duration, F0s, target_duration=5) + (['t', 't'], [1, 4], ['f0', 'f0']) + """ + if sum(produced_duration) <= target_duration: + return produced_tokens, produced_duration, produced_f0 + + running_duration = 0 + filtered_duration = [] + + for next_tok_duration in produced_duration: + if running_duration + next_tok_duration < target_duration: + filtered_duration.append(next_tok_duration) + running_duration += next_tok_duration + else: + to_add = target_duration - running_duration + assert to_add <= next_tok_duration + filtered_duration.append(to_add) + break + + produced_duration = filtered_duration + assert sum(produced_duration) == target_duration + + n_tok = len(filtered_duration) + + return produced_tokens[:n_tok], produced_duration, produced_f0[:n_tok] + + +def main(rank, world_size, args): + if world_size > 1: + torch.distributed.init_process_group( + backend="gloo", init_method="env://", world_size=world_size, rank=rank + ) + torch.cuda.set_device(rank) + + raw_args = args + args = convert_namespace_to_omegaconf(args) + if args.common.seed is not None: + random.seed(args.common.seed) + np.random.seed(args.common.seed) + utils.set_torch_seed(args.common.seed) + + models, model_args, task = checkpoint_utils.load_model_ensemble_and_task( + [raw_args.path], arg_overrides={"data": args.task.data} + ) + tgt_dict = task.target_dictionary + + for model in models: + model.prepare_for_inference_(args) + model.cuda().eval() + if raw_args.fp16: + model = model.half() + model = models[0] + + config = ExpressiveCodeDataConfig(args.task.data) + + dataset = CodeDataset( + manifest=config.manifests[raw_args.subset], + dictionary=task.source_dictionary, + dur_dictionary=task.source_duration_dictionary, + f0_dictionary=task.source_f0_dictionary, + config=config, + discrete_dur=task.cfg.discrete_duration, + discrete_f0=task.cfg.discrete_f0, + log_f0=task.cfg.log_f0, + normalize_f0_mean=task.cfg.normalize_f0_mean, + normalize_f0_std=task.cfg.normalize_f0_std, + interpolate_f0=task.cfg.interpolate_f0, + shifts=task.cfg.stream_shifts, + return_filename=True, + strip_filename=False, + ) + tgt_dict = task.target_dictionary + shifts = dataset.shifts.dur, dataset.shifts.f0 + max_shift = max(shifts) + + fname = raw_args.output + if world_size > 1: + fname += f"_{rank}" + output_file = open(fname, "w") + + if raw_args.filter_names: + dataset = FilterNamesDataset(dataset, raw_args.filter_names) + + dataset = InferenceDataset(dataset, raw_args.prefix_length, filter_short=True) + print(f"Dataset size {len(dataset)}") + sampler = ( + None + if world_size == 1 + else DistributedSampler( + dataset, num_replicas=world_size, rank=rank, shuffle=False + ) + ) + dataloader = DataLoader( + dataset, + batch_size=1, + shuffle=False, + collate_fn=dataset.collater, + sampler=sampler, + ) + + Ts = raw_args.T_token, raw_args.T_duration, raw_args.T_f0 + decoder = TemperatureDecoder( + Ts, discrete_dur=task.cfg.discrete_duration, discrete_f0=task.cfg.discrete_f0 + ) + + dataset_size = len(dataset) + + f0_decoder = None + if raw_args.f0_discretization_bounds: + assert task.cfg.discrete_f0 + f0_decoder = Naive_F0_Decoder(raw_args.f0_discretization_bounds).cuda() + + pbar = ( + tqdm.tqdm( + total=dataset_size + if raw_args.max_samples is None + else min(raw_args.max_samples, dataset_size) + ) + if world_size == 1 + else None + ) + + samples_produced = 0 + + for batch in dataloader: + if ( + raw_args.max_samples is not None + and samples_produced >= raw_args.max_samples + ): + break + + prefix = batch["prefix"][0] + + batch = explode_batch(batch, raw_args.batch_explosion_rate) + batch = move_to_cuda(batch) + + if not raw_args.short_curcuit: + produced_tokens, produced_durations, produced_f0, _ = do_sampling( + models[0], + batch, + tgt_dict.eos(), + decoder, + autoregressive_steps=raw_args.max_length - prefix + max_shift, + teacher_force_tokens=raw_args.teacher_force_tokens, + match_duration=raw_args.match_duration, + teacher_force_duration=raw_args.teacher_force_duration, + teacher_force_f0=raw_args.teacher_force_f0, + ) + + # stip entries corresponding to <s> + produced_tokens = produced_tokens[:, 1:] + produced_durations = produced_durations[:, 1:] + produced_f0 = produced_f0[:, 1:] + + else: + max_length = raw_args.max_length + max_shift + produced_tokens, produced_durations, produced_f0 = ( + batch["target"][:, :max_length], + batch["dur_target"][:, :max_length], + batch["f0_target"][:, :max_length], + ) + + if f0_decoder is not None: + produced_f0 = f0_decoder(produced_f0) + + produced_tokens, produced_durations, produced_f0 = ( + produced_tokens.cpu().tolist(), + produced_durations.cpu().tolist(), + produced_f0.cpu().tolist(), + ) + + bsz = batch["target"].size(0) + assert bsz == raw_args.batch_explosion_rate + + for i in range(bsz): + if ( + raw_args.max_samples is not None + and samples_produced >= raw_args.max_samples + ): + break + + produced_tokens_i = produced_tokens[i] + produced_durations_i = produced_durations[i] + produced_f0_i = produced_f0[i] + + ( + produced_tokens_i, + produced_durations_i, + produced_f0_i, + ) = realign_shifted_streams( + produced_tokens_i, produced_durations_i, produced_f0_i, shifts + ) + + produced_tokens_i, produced_durations_i, produced_f0_i = maybe_cut_eos( + produced_tokens_i, produced_durations_i, produced_f0_i, tgt_dict.eos() + ) + + produced_tokens_i, produced_durations_i, produced_f0_i = maybe_filter_pad( + produced_tokens_i, produced_durations_i, produced_f0_i, tgt_dict.pad() + ) + + if raw_args.match_duration: + # NB: here we cheat a bit and use that padding has duration 0 + # so no need to re-align and remove padding + dur_target_i = batch["dur_target"][i, :].sum().item() + produced_tokens_i, produced_durations_i, produced_f0_i = match_duration( + produced_tokens_i, produced_durations_i, produced_f0_i, dur_target_i + ) + + if raw_args.cut_prompt: + produced_tokens_i, produced_durations_i, produced_f0_i = ( + produced_tokens_i[prefix:], + produced_durations_i[prefix:], + produced_f0_i[prefix:], + ) + + prompt_fname = batch["filename"][0] + fname = str(pathlib.Path(prompt_fname).with_suffix("")) + f"__{i}.wav" + + token_stream = unroll_duration(produced_tokens_i, produced_durations_i) + f0_stream = unroll_duration(produced_f0_i, produced_durations_i) + output_line = json.dumps( + { + "audio": fname, + "prompt": prompt_fname, + raw_args.code_type: " ".join(map(str, token_stream)), + "duration": round( + sum(produced_durations_i) + * CODETYPE_TO_FRAMETIME[raw_args.code_type], + 3, + ), + "raw_duration": produced_durations_i, + "raw_f0": produced_f0_i, + "f0": [round(f0, 3) for f0 in f0_stream], + } + ) + print(output_line, file=output_file) + + if pbar: + pbar.update(1) + samples_produced += 1 + + if raw_args.debug: + break + + output_file.close() + + if world_size > 1: + # important that everything is flushed before aggregating + torch.distributed.barrier() + + if world_size > 1 and rank == 0: + with open(raw_args.output, "w") as fout: + for i in range(world_size): + f = raw_args.output + f"_{i}" + with open(f, "r") as fin: + fout.write(fin.read()) + os.remove(f) + + +def cli_main(): + parser = options.get_interactive_generation_parser() + parser.add_argument( + "--prefix-length", + type=int, + default=1, + help="Prompt prefix length (including <s>)", + ) + parser.add_argument("--output", type=str, default=None, required=True) + parser.add_argument( + "--debug", action="store_true", help="Process only the first batch" + ) + parser.add_argument( + "--ignore-durations", + action="store_true", + help="If set, the duration stream is ignored", + ) + parser.add_argument( + "--max-length", type=int, default=200, help="Maximal produced length" + ) + parser.add_argument( + "--code-type", choices=["cpc_km100", "hubert"], default="cpc_km100" + ) + parser.add_argument("--max-samples", type=int, default=None) + parser.add_argument("--prompt-duration-scaler", type=float, default=1.0) + parser.add_argument("--teacher-force-tokens", action="store_true", default=False) + parser.add_argument("--teacher-force-duration", action="store_true", default=False) + parser.add_argument("--teacher-force-f0", action="store_true", default=False) + parser.add_argument("--filter-names", type=str, default=None) + parser.add_argument( + "--match-duration", + action="store_true", + help="Do not produce sequences longer that ground-truth", + ) + parser.add_argument( + "--cut-prompt", + action="store_true", + help="Remove prompt from the produced audio", + ) + parser.add_argument( + "--short-curcuit", action="store_true", help="Use 'target' as a sample" + ) + parser.add_argument("--f0-discretization-bounds", type=str, default=None) + + parser.add_argument("--batch-explosion-rate", type=int, default=1) + + parser.add_argument("--T-token", type=float, default=1.0) + parser.add_argument("--T-duration", type=float, default=1.0) + parser.add_argument("--T-f0", type=float, default=1.0) + + parser.add_argument( + "--subset", type=str, default="valid", choices=["test", "valid"] + ) + + args = options.parse_args_and_arch(parser) + + assert ( + args.prefix_length >= 1 + ), "Prefix length includes bos token <s>, hence the minimum is 1." + assert all( + t >= 0 for t in [args.T_token, args.T_f0, args.T_duration] + ), "T must be non-negative!" + + world_size = torch.cuda.device_count() + if world_size > 1: + import random + + mp.set_start_method("spawn", force=True) + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000)) + + print(f"Using {world_size} devices, master port {os.environ['MASTER_PORT']}") + + mp.spawn( + main, + nprocs=world_size, + args=( + world_size, + args, + ), + join=True, + ) + else: + main(rank=0, world_size=world_size, args=args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/textless_nlp/pgslm/scripts/join_units_manifest.py b/fairseq/examples/textless_nlp/pgslm/scripts/join_units_manifest.py new file mode 100644 index 0000000..ed14fc5 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/scripts/join_units_manifest.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import argparse +import pathlib + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--manifest", required=True) + parser.add_argument("--units", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--sample_rate", type=int, default=16_000) + + args = parser.parse_args() + + with open(args.manifest, "r") as manifest, open(args.units, "r") as units, open( + args.output, "w" + ) as outp: + root = manifest.readline().strip() + root = pathlib.Path(root) + + for manifest_line, unit_line in zip(manifest.readlines(), units.readlines()): + path, frames = manifest_line.split() + duration = int(frames) / float(args.sample_rate) + fname = root / path + speaker = fname.parent.parent.name + + units = unit_line.split("|")[1] + + print( + json.dumps( + dict( + audio=str(root / path), + duration=duration, + hubert_km100=units.strip(), + speaker=speaker, + ) + ), + file=outp, + ) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/textless_nlp/pgslm/scripts/prepare_data.sh b/fairseq/examples/textless_nlp/pgslm/scripts/prepare_data.sh new file mode 100644 index 0000000..ec892e5 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/scripts/prepare_data.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +set -eu + +train_json=$1 +valid_json=$2 +test_json=$3 +n_units=$4 +hop_size=$5 +sr=$6 +f0_quantizer=$7 +out_dir=$8 + +meta_path="$out_dir/data_config.json" +f0_dir="$out_dir/f0" + +mkdir -p $out_dir +ln -sf $train_json $out_dir/train.txt +ln -sf $valid_json $out_dir/valid.txt +ln -sf $test_json $out_dir/test.txt + +cat <<EOF >$meta_path +{ + "manifests": { + "train": "$out_dir/train.txt", + "valid": "$out_dir/valid.txt", + "test": "$out_dir/test.txt" + }, + "n_units": $n_units, + "code_hop_size": $hop_size, + "sampling_rate": $sr, + "multispkr": "parent_parent_name", + + "f0_vq_type": "naive", + "f0_vq_naive_quantizer": { + "log_mean_norm": "$f0_quantizer" + }, + "f0_vq_n_units": 32 +} +EOF + +for split in train valid test; do + python examples/textless_nlp/pgslm/preprocess_f0.py \ + $out_dir/$split.txt $f0_dir/$split --nshards=1 --rank=1 --sampling_rate=$sr + + #NSHARDS=16 + #seq 1 $NSHARDS | parallel -j $NSHARDS python examples/textless_nlp/pgslm/preprocess_f0.py \ + # $out_dir/$split.txt $f0_dir/$split --nshards=$NSHARDS --sampling_rate=$sr --rank +done + +# Please make sure that the number of shards (--nshards_list) is consistent across commands +python examples/textless_nlp/pgslm/prepare_dataset.py \ + $meta_path $f0_dir --splits test valid train --nshards_list 1 diff --git a/fairseq/examples/textless_nlp/pgslm/scripts/prepare_f0_quantization.sh b/fairseq/examples/textless_nlp/pgslm/scripts/prepare_f0_quantization.sh new file mode 100644 index 0000000..3a285a3 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/scripts/prepare_f0_quantization.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +set -eu + +train_json=$1 +sr=$2 +nbins=$3 +out_dir=$4 +out_prefix=$5 + +f0_dir="$out_dir/f0" + +python examples/textless_nlp/pgslm/preprocess_f0.py \ + $train_json $f0_dir/${out_prefix}_f0_quant --nshards 1 --rank 1 --sampling_rate $sr + +# NB: one can use parallel here: +# NSHARDS=16 +# +#seq 1 $NSHARDS | parallel -j $NSHARDS python examples/textless_nlp/pgslm/preprocess_f0.py \ +# $train_json $f0_dir/${out_prefix}_f0_quant --nshards $NSHARDS --sampling_rate $sr --rank + +python examples/textless_nlp/pgslm/quantize_f0.py \ + $train_json $f0_dir/${out_prefix}_f0_quant $out_dir $out_prefix --nbins $nbins --nshards 1 --normalize mean --log diff --git a/fairseq/examples/textless_nlp/pgslm/truncated_laplace.py b/fairseq/examples/textless_nlp/pgslm/truncated_laplace.py new file mode 100644 index 0000000..089f8a8 --- /dev/null +++ b/fairseq/examples/textless_nlp/pgslm/truncated_laplace.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import warnings + + +def truncated_laplace(mean, T, truncate_by_zero=False): + """Generating a sample from a Laplace distribution, possible left-truncated at zero. + A bit of explanation here https://stats.stackexchange.com/a/357598 . + """ + assert isinstance(mean, torch.Tensor) + + if not truncate_by_zero: + percentile = 0.0 + else: + if not (mean >= 0.0).all(): + warnings.warn(f"means are supposed to be non-negative, but got {mean}") + mean = torch.clamp_min(mean, 0.0) + + lower_bound = mean.new_tensor([0.0]) + percentile = 0.5 + 0.5 * torch.sign(lower_bound - mean) * ( + 1.0 - torch.exp(-1.0 / T * torch.abs(mean - lower_bound)) + ) + + p = torch.empty_like(mean).uniform_() * (1.0 - percentile) + percentile + return mean - T * torch.sign(p - 0.5) * torch.log(1 - 2 * torch.abs(p - 0.5)) diff --git a/fairseq/examples/textless_nlp/speech-resynth/README.md b/fairseq/examples/textless_nlp/speech-resynth/README.md new file mode 100644 index 0000000..a099682 --- /dev/null +++ b/fairseq/examples/textless_nlp/speech-resynth/README.md @@ -0,0 +1,28 @@ + +# Speech Resynthesis from Discrete Disentangled Self-Supervised Representations +Landing page with usfull resources for the [Speech Resynthesis from Discrete Disentangled Self-Supervised Representations](https://arxiv.org/abs/2104.00355) paper. + +<p align="center"><img width="70%" src="img/fig.png" /></p> + +__Abstract__: We propose using self-supervised discrete representations for the task of speech resynthesis. To generate disentangled representation, we separately extract low-bitrate representations for speech content, prosodic information, and speaker identity. This allows to synthesize speech in a controllable manner. We analyze various state-of-the-art, self-supervised representation learning methods and shed light on the advantages of each method while considering reconstruction quality and disentanglement properties. Specifically, we evaluate the F0 reconstruction, speaker identification performance (for both resynthesis and voice conversion), recordings' intelligibility, and overall quality using subjective human evaluation. Lastly, we demonstrate how these representations can be used for an ultra-lightweight speech codec. Using the obtained representations, we can get to a rate of 365 bits per second while providing better speech quality than the baseline methods. + + +## Quick Links +- [Paper](https://arxiv.org/pdf/2104.00355.pdf) +- [Samples](https://speechbot.github.io/resynthesis/index.html) +- [Code](https://github.com/facebookresearch/speech-resynthesis) + +The codebase for the [Speech Resynthesis from Discrete Disentangled Self-Supervised Representations](https://arxiv.org/abs/2104.00355) paper can be found under the following [repository](https://github.com/facebookresearch/speech-resynthesis). + + +## Citation +``` +@inproceedings{polyak21_interspeech, + author={Adam Polyak and Yossi Adi and Jade Copet and + Eugene Kharitonov and Kushal Lakhotia and + Wei-Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux}, + title={{Speech Resynthesis from Discrete Disentangled Self-Supervised Representations}}, + year=2021, + booktitle={Proc. Interspeech 2021}, +} +``` diff --git a/fairseq/examples/translation/README.md b/fairseq/examples/translation/README.md new file mode 100644 index 0000000..2941f5e --- /dev/null +++ b/fairseq/examples/translation/README.md @@ -0,0 +1,301 @@ +# Neural Machine Translation + +This README contains instructions for [using pretrained translation models](#example-usage-torchhub) +as well as [training new models](#training-a-new-model). + +## Pre-trained models + +Model | Description | Dataset | Download +---|---|---|--- +`conv.wmt14.en-fr` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2) <br> newstest2012/2013: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.ntst1213.tar.bz2) +`conv.wmt14.en-de` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-German](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-de.newstest2014.tar.bz2) +`conv.wmt17.en-de` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT17 English-German](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.v2.en-de.newstest2014.tar.bz2) +`transformer.wmt14.en-fr` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2) +`transformer.wmt16.en-de` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2) +`transformer.wmt18.en-de` | Transformer <br> ([Edunov et al., 2018](https://arxiv.org/abs/1808.09381)) <br> WMT'18 winner | [WMT'18 English-German](http://www.statmt.org/wmt18/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz) <br> See NOTE in the archive +`transformer.wmt19.en-de` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 English-German](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz) +`transformer.wmt19.de-en` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 German-English](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz) +`transformer.wmt19.en-ru` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 English-Russian](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz) +`transformer.wmt19.ru-en` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 Russian-English](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz) + +## Example usage (torch.hub) + +We require a few additional Python dependencies for preprocessing: +```bash +pip install fastBPE sacremoses subword_nmt +``` + +Interactive translation via PyTorch Hub: +```python +import torch + +# List available models +torch.hub.list('pytorch/fairseq') # [..., 'transformer.wmt16.en-de', ... ] + +# Load a transformer trained on WMT'16 En-De +# Note: WMT'19 models use fastBPE instead of subword_nmt, see instructions below +en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de', + tokenizer='moses', bpe='subword_nmt') +en2de.eval() # disable dropout + +# The underlying model is available under the *models* attribute +assert isinstance(en2de.models[0], fairseq.models.transformer.TransformerModel) + +# Move model to GPU for faster translation +en2de.cuda() + +# Translate a sentence +en2de.translate('Hello world!') +# 'Hallo Welt!' + +# Batched translation +en2de.translate(['Hello world!', 'The cat sat on the mat.']) +# ['Hallo Welt!', 'Die Katze saß auf der Matte.'] +``` + +Loading custom models: +```python +from fairseq.models.transformer import TransformerModel +zh2en = TransformerModel.from_pretrained( + '/path/to/checkpoints', + checkpoint_file='checkpoint_best.pt', + data_name_or_path='data-bin/wmt17_zh_en_full', + bpe='subword_nmt', + bpe_codes='data-bin/wmt17_zh_en_full/zh.code' +) +zh2en.translate('你好 世界') +# 'Hello World' +``` + +If you are using a `transformer.wmt19` models, you will need to set the `bpe` +argument to `'fastbpe'` and (optionally) load the 4-model ensemble: +```python +en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de', + checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt', + tokenizer='moses', bpe='fastbpe') +en2de.eval() # disable dropout +``` + +## Example usage (CLI tools) + +Generation with the binarized test sets can be run in batch mode as follows, e.g. for WMT 2014 English-French on a GTX-1080ti: +```bash +mkdir -p data-bin +curl https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2 | tar xvjf - -C data-bin +curl https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2 | tar xvjf - -C data-bin +fairseq-generate data-bin/wmt14.en-fr.newstest2014 \ + --path data-bin/wmt14.en-fr.fconv-py/model.pt \ + --beam 5 --batch-size 128 --remove-bpe | tee /tmp/gen.out +# ... +# | Translated 3003 sentences (96311 tokens) in 166.0s (580.04 tokens/s) +# | Generate test with beam=5: BLEU4 = 40.83, 67.5/46.9/34.4/25.5 (BP=1.000, ratio=1.006, syslen=83262, reflen=82787) + +# Compute BLEU score +grep ^H /tmp/gen.out | cut -f3- > /tmp/gen.out.sys +grep ^T /tmp/gen.out | cut -f2- > /tmp/gen.out.ref +fairseq-score --sys /tmp/gen.out.sys --ref /tmp/gen.out.ref +# BLEU4 = 40.83, 67.5/46.9/34.4/25.5 (BP=1.000, ratio=1.006, syslen=83262, reflen=82787) +``` + +## Training a new model + +### IWSLT'14 German to English (Transformer) + +The following instructions can be used to train a Transformer model on the [IWSLT'14 German to English dataset](http://workshop2014.iwslt.org/downloads/proceeding.pdf). + +First download and preprocess the data: +```bash +# Download and prepare the data +cd examples/translation/ +bash prepare-iwslt14.sh +cd ../.. + +# Preprocess/binarize the data +TEXT=examples/translation/iwslt14.tokenized.de-en +fairseq-preprocess --source-lang de --target-lang en \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/iwslt14.tokenized.de-en \ + --workers 20 +``` + +Next we'll train a Transformer translation model over this data: +```bash +CUDA_VISIBLE_DEVICES=0 fairseq-train \ + data-bin/iwslt14.tokenized.de-en \ + --arch transformer_iwslt_de_en --share-decoder-input-output-embed \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \ + --dropout 0.3 --weight-decay 0.0001 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --max-tokens 4096 \ + --eval-bleu \ + --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \ + --eval-bleu-detok moses \ + --eval-bleu-remove-bpe \ + --eval-bleu-print-samples \ + --best-checkpoint-metric bleu --maximize-best-checkpoint-metric +``` + +Finally we can evaluate our trained model: +```bash +fairseq-generate data-bin/iwslt14.tokenized.de-en \ + --path checkpoints/checkpoint_best.pt \ + --batch-size 128 --beam 5 --remove-bpe +``` + +### WMT'14 English to German (Convolutional) + +The following instructions can be used to train a Convolutional translation model on the WMT English to German dataset. +See the [Scaling NMT README](../scaling_nmt/README.md) for instructions to train a Transformer translation model on this data. + +The WMT English to German dataset can be preprocessed using the `prepare-wmt14en2de.sh` script. +By default it will produce a dataset that was modeled after [Attention Is All You Need (Vaswani et al., 2017)](https://arxiv.org/abs/1706.03762), but with additional news-commentary-v12 data from WMT'17. + +To use only data available in WMT'14 or to replicate results obtained in the original [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](https://arxiv.org/abs/1705.03122) paper, please use the `--icml17` option. + +```bash +# Download and prepare the data +cd examples/translation/ +# WMT'17 data: +bash prepare-wmt14en2de.sh +# or to use WMT'14 data: +# bash prepare-wmt14en2de.sh --icml17 +cd ../.. + +# Binarize the dataset +TEXT=examples/translation/wmt17_en_de +fairseq-preprocess \ + --source-lang en --target-lang de \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/wmt17_en_de --thresholdtgt 0 --thresholdsrc 0 \ + --workers 20 + +# Train the model +mkdir -p checkpoints/fconv_wmt_en_de +fairseq-train \ + data-bin/wmt17_en_de \ + --arch fconv_wmt_en_de \ + --dropout 0.2 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --optimizer nag --clip-norm 0.1 \ + --lr 0.5 --lr-scheduler fixed --force-anneal 50 \ + --max-tokens 4000 \ + --save-dir checkpoints/fconv_wmt_en_de + +# Evaluate +fairseq-generate data-bin/wmt17_en_de \ + --path checkpoints/fconv_wmt_en_de/checkpoint_best.pt \ + --beam 5 --remove-bpe +``` + +### WMT'14 English to French +```bash +# Download and prepare the data +cd examples/translation/ +bash prepare-wmt14en2fr.sh +cd ../.. + +# Binarize the dataset +TEXT=examples/translation/wmt14_en_fr +fairseq-preprocess \ + --source-lang en --target-lang fr \ + --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \ + --destdir data-bin/wmt14_en_fr --thresholdtgt 0 --thresholdsrc 0 \ + --workers 60 + +# Train the model +mkdir -p checkpoints/fconv_wmt_en_fr +fairseq-train \ + data-bin/wmt14_en_fr \ + --arch fconv_wmt_en_fr \ + --dropout 0.1 \ + --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \ + --optimizer nag --clip-norm 0.1 \ + --lr 0.5 --lr-scheduler fixed --force-anneal 50 \ + --max-tokens 3000 \ + --save-dir checkpoints/fconv_wmt_en_fr + +# Evaluate +fairseq-generate \ + data-bin/fconv_wmt_en_fr \ + --path checkpoints/fconv_wmt_en_fr/checkpoint_best.pt \ + --beam 5 --remove-bpe +``` + +## Multilingual Translation + +We also support training multilingual translation models. In this example we'll +train a multilingual `{de,fr}-en` translation model using the IWSLT'17 datasets. + +Note that we use slightly different preprocessing here than for the IWSLT'14 +En-De data above. In particular we learn a joint BPE code for all three +languages and use fairseq-interactive and sacrebleu for scoring the test set. + +```bash +# First install sacrebleu and sentencepiece +pip install sacrebleu sentencepiece + +# Then download and preprocess the data +cd examples/translation/ +bash prepare-iwslt17-multilingual.sh +cd ../.. + +# Binarize the de-en dataset +TEXT=examples/translation/iwslt17.de_fr.en.bpe16k +fairseq-preprocess --source-lang de --target-lang en \ + --trainpref $TEXT/train.bpe.de-en \ + --validpref $TEXT/valid0.bpe.de-en,$TEXT/valid1.bpe.de-en,$TEXT/valid2.bpe.de-en,$TEXT/valid3.bpe.de-en,$TEXT/valid4.bpe.de-en,$TEXT/valid5.bpe.de-en \ + --destdir data-bin/iwslt17.de_fr.en.bpe16k \ + --workers 10 + +# Binarize the fr-en dataset +# NOTE: it's important to reuse the en dictionary from the previous step +fairseq-preprocess --source-lang fr --target-lang en \ + --trainpref $TEXT/train.bpe.fr-en \ + --validpref $TEXT/valid0.bpe.fr-en,$TEXT/valid1.bpe.fr-en,$TEXT/valid2.bpe.fr-en,$TEXT/valid3.bpe.fr-en,$TEXT/valid4.bpe.fr-en,$TEXT/valid5.bpe.fr-en \ + --tgtdict data-bin/iwslt17.de_fr.en.bpe16k/dict.en.txt \ + --destdir data-bin/iwslt17.de_fr.en.bpe16k \ + --workers 10 + +# Train a multilingual transformer model +# NOTE: the command below assumes 1 GPU, but accumulates gradients from +# 8 fwd/bwd passes to simulate training on 8 GPUs +mkdir -p checkpoints/multilingual_transformer +CUDA_VISIBLE_DEVICES=0 fairseq-train data-bin/iwslt17.de_fr.en.bpe16k/ \ + --max-epoch 50 \ + --ddp-backend=legacy_ddp \ + --task multilingual_translation --lang-pairs de-en,fr-en \ + --arch multilingual_transformer_iwslt_de_en \ + --share-decoders --share-decoder-input-output-embed \ + --optimizer adam --adam-betas '(0.9, 0.98)' \ + --lr 0.0005 --lr-scheduler inverse_sqrt \ + --warmup-updates 4000 --warmup-init-lr '1e-07' \ + --label-smoothing 0.1 --criterion label_smoothed_cross_entropy \ + --dropout 0.3 --weight-decay 0.0001 \ + --save-dir checkpoints/multilingual_transformer \ + --max-tokens 4000 \ + --update-freq 8 + +# Generate and score the test set with sacrebleu +SRC=de +sacrebleu --test-set iwslt17 --language-pair ${SRC}-en --echo src \ + | python scripts/spm_encode.py --model examples/translation/iwslt17.de_fr.en.bpe16k/sentencepiece.bpe.model \ + > iwslt17.test.${SRC}-en.${SRC}.bpe +cat iwslt17.test.${SRC}-en.${SRC}.bpe \ + | fairseq-interactive data-bin/iwslt17.de_fr.en.bpe16k/ \ + --task multilingual_translation --lang-pairs de-en,fr-en \ + --source-lang ${SRC} --target-lang en \ + --path checkpoints/multilingual_transformer/checkpoint_best.pt \ + --buffer-size 2000 --batch-size 128 \ + --beam 5 --remove-bpe=sentencepiece \ + > iwslt17.test.${SRC}-en.en.sys +grep ^H iwslt17.test.${SRC}-en.en.sys | cut -f3 \ + | sacrebleu --test-set iwslt17 --language-pair ${SRC}-en +``` + +##### Argument format during inference + +During inference it is required to specify a single `--source-lang` and +`--target-lang`, which indicates the inference langauge direction. +`--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to +the same value as training. diff --git a/fairseq/examples/translation/prepare-iwslt14.sh b/fairseq/examples/translation/prepare-iwslt14.sh new file mode 100644 index 0000000..2fb6643 --- /dev/null +++ b/fairseq/examples/translation/prepare-iwslt14.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# +# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh + +echo 'Cloning Moses github repository (for tokenization scripts)...' +git clone https://github.com/moses-smt/mosesdecoder.git + +echo 'Cloning Subword NMT repository (for BPE pre-processing)...' +git clone https://github.com/rsennrich/subword-nmt.git + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +LC=$SCRIPTS/tokenizer/lowercase.perl +CLEAN=$SCRIPTS/training/clean-corpus-n.perl +BPEROOT=subword-nmt/subword_nmt +BPE_TOKENS=10000 + +URL="http://dl.fbaipublicfiles.com/fairseq/data/iwslt14/de-en.tgz" +GZ=de-en.tgz + +if [ ! -d "$SCRIPTS" ]; then + echo "Please set SCRIPTS variable correctly to point to Moses scripts." + exit +fi + +src=de +tgt=en +lang=de-en +prep=iwslt14.tokenized.de-en +tmp=$prep/tmp +orig=orig + +mkdir -p $orig $tmp $prep + +echo "Downloading data from ${URL}..." +cd $orig +wget "$URL" + +if [ -f $GZ ]; then + echo "Data successfully downloaded." +else + echo "Data not successfully downloaded." + exit +fi + +tar zxvf $GZ +cd .. + +echo "pre-processing train data..." +for l in $src $tgt; do + f=train.tags.$lang.$l + tok=train.tags.$lang.tok.$l + + cat $orig/$lang/$f | \ + grep -v '<url>' | \ + grep -v '<talkid>' | \ + grep -v '<keywords>' | \ + sed -e 's/<title>//g' | \ + sed -e 's/<\/title>//g' | \ + sed -e 's/<description>//g' | \ + sed -e 's/<\/description>//g' | \ + perl $TOKENIZER -threads 8 -l $l > $tmp/$tok + echo "" +done +perl $CLEAN -ratio 1.5 $tmp/train.tags.$lang.tok $src $tgt $tmp/train.tags.$lang.clean 1 175 +for l in $src $tgt; do + perl $LC < $tmp/train.tags.$lang.clean.$l > $tmp/train.tags.$lang.$l +done + +echo "pre-processing valid/test data..." +for l in $src $tgt; do + for o in `ls $orig/$lang/IWSLT14.TED*.$l.xml`; do + fname=${o##*/} + f=$tmp/${fname%.*} + echo $o $f + grep '<seg id' $o | \ + sed -e 's/<seg id="[0-9]*">\s*//g' | \ + sed -e 's/\s*<\/seg>\s*//g' | \ + sed -e "s/\’/\'/g" | \ + perl $TOKENIZER -threads 8 -l $l | \ + perl $LC > $f + echo "" + done +done + + +echo "creating train, valid, test..." +for l in $src $tgt; do + awk '{if (NR%23 == 0) print $0; }' $tmp/train.tags.de-en.$l > $tmp/valid.$l + awk '{if (NR%23 != 0) print $0; }' $tmp/train.tags.de-en.$l > $tmp/train.$l + + cat $tmp/IWSLT14.TED.dev2010.de-en.$l \ + $tmp/IWSLT14.TEDX.dev2012.de-en.$l \ + $tmp/IWSLT14.TED.tst2010.de-en.$l \ + $tmp/IWSLT14.TED.tst2011.de-en.$l \ + $tmp/IWSLT14.TED.tst2012.de-en.$l \ + > $tmp/test.$l +done + +TRAIN=$tmp/train.en-de +BPE_CODE=$prep/code +rm -f $TRAIN +for l in $src $tgt; do + cat $tmp/train.$l >> $TRAIN +done + +echo "learn_bpe.py on ${TRAIN}..." +python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE + +for L in $src $tgt; do + for f in train.$L valid.$L test.$L; do + echo "apply_bpe.py to ${f}..." + python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $prep/$f + done +done diff --git a/fairseq/examples/translation/prepare-iwslt17-multilingual.sh b/fairseq/examples/translation/prepare-iwslt17-multilingual.sh new file mode 100644 index 0000000..23be875 --- /dev/null +++ b/fairseq/examples/translation/prepare-iwslt17-multilingual.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +SRCS=( + "de" + "fr" +) +TGT=en + +ROOT=$(dirname "$0") +SCRIPTS=$ROOT/../../scripts +SPM_TRAIN=$SCRIPTS/spm_train.py +SPM_ENCODE=$SCRIPTS/spm_encode.py + +BPESIZE=16384 +ORIG=$ROOT/iwslt17_orig +DATA=$ROOT/iwslt17.de_fr.en.bpe16k +mkdir -p "$ORIG" "$DATA" + +TRAIN_MINLEN=1 # remove sentences with <1 BPE token +TRAIN_MAXLEN=250 # remove sentences with >250 BPE tokens + +URLS=( + "https://wit3.fbk.eu/archive/2017-01-trnted/texts/de/en/de-en.tgz" + "https://wit3.fbk.eu/archive/2017-01-trnted/texts/fr/en/fr-en.tgz" +) +ARCHIVES=( + "de-en.tgz" + "fr-en.tgz" +) +VALID_SETS=( + "IWSLT17.TED.dev2010.de-en IWSLT17.TED.tst2010.de-en IWSLT17.TED.tst2011.de-en IWSLT17.TED.tst2012.de-en IWSLT17.TED.tst2013.de-en IWSLT17.TED.tst2014.de-en IWSLT17.TED.tst2015.de-en" + "IWSLT17.TED.dev2010.fr-en IWSLT17.TED.tst2010.fr-en IWSLT17.TED.tst2011.fr-en IWSLT17.TED.tst2012.fr-en IWSLT17.TED.tst2013.fr-en IWSLT17.TED.tst2014.fr-en IWSLT17.TED.tst2015.fr-en" +) + +# download and extract data +for ((i=0;i<${#URLS[@]};++i)); do + ARCHIVE=$ORIG/${ARCHIVES[i]} + if [ -f "$ARCHIVE" ]; then + echo "$ARCHIVE already exists, skipping download" + else + URL=${URLS[i]} + wget -P "$ORIG" "$URL" + if [ -f "$ARCHIVE" ]; then + echo "$URL successfully downloaded." + else + echo "$URL not successfully downloaded." + exit 1 + fi + fi + FILE=${ARCHIVE: -4} + if [ -e "$FILE" ]; then + echo "$FILE already exists, skipping extraction" + else + tar -C "$ORIG" -xzvf "$ARCHIVE" + fi +done + +echo "pre-processing train data..." +for SRC in "${SRCS[@]}"; do + for LANG in "${SRC}" "${TGT}"; do + cat "$ORIG/${SRC}-${TGT}/train.tags.${SRC}-${TGT}.${LANG}" \ + | grep -v '<url>' \ + | grep -v '<talkid>' \ + | grep -v '<keywords>' \ + | grep -v '<speaker>' \ + | grep -v '<reviewer' \ + | grep -v '<translator' \ + | grep -v '<doc' \ + | grep -v '</doc>' \ + | sed -e 's/<title>//g' \ + | sed -e 's/<\/title>//g' \ + | sed -e 's/<description>//g' \ + | sed -e 's/<\/description>//g' \ + | sed 's/^\s*//g' \ + | sed 's/\s*$//g' \ + > "$DATA/train.${SRC}-${TGT}.${LANG}" + done +done + +echo "pre-processing valid data..." +for ((i=0;i<${#SRCS[@]};++i)); do + SRC=${SRCS[i]} + VALID_SET=(${VALID_SETS[i]}) + for ((j=0;j<${#VALID_SET[@]};++j)); do + FILE=${VALID_SET[j]} + for LANG in "$SRC" "$TGT"; do + grep '<seg id' "$ORIG/${SRC}-${TGT}/${FILE}.${LANG}.xml" \ + | sed -e 's/<seg id="[0-9]*">\s*//g' \ + | sed -e 's/\s*<\/seg>\s*//g' \ + | sed -e "s/\’/\'/g" \ + > "$DATA/valid${j}.${SRC}-${TGT}.${LANG}" + done + done +done + +# learn BPE with sentencepiece +TRAIN_FILES=$(for SRC in "${SRCS[@]}"; do echo $DATA/train.${SRC}-${TGT}.${SRC}; echo $DATA/train.${SRC}-${TGT}.${TGT}; done | tr "\n" ",") +echo "learning joint BPE over ${TRAIN_FILES}..." +python "$SPM_TRAIN" \ + --input=$TRAIN_FILES \ + --model_prefix=$DATA/sentencepiece.bpe \ + --vocab_size=$BPESIZE \ + --character_coverage=1.0 \ + --model_type=bpe + +# encode train/valid +echo "encoding train with learned BPE..." +for SRC in "${SRCS[@]}"; do + python "$SPM_ENCODE" \ + --model "$DATA/sentencepiece.bpe.model" \ + --output_format=piece \ + --inputs $DATA/train.${SRC}-${TGT}.${SRC} $DATA/train.${SRC}-${TGT}.${TGT} \ + --outputs $DATA/train.bpe.${SRC}-${TGT}.${SRC} $DATA/train.bpe.${SRC}-${TGT}.${TGT} \ + --min-len $TRAIN_MINLEN --max-len $TRAIN_MAXLEN +done + +echo "encoding valid with learned BPE..." +for ((i=0;i<${#SRCS[@]};++i)); do + SRC=${SRCS[i]} + VALID_SET=(${VALID_SETS[i]}) + for ((j=0;j<${#VALID_SET[@]};++j)); do + python "$SPM_ENCODE" \ + --model "$DATA/sentencepiece.bpe.model" \ + --output_format=piece \ + --inputs $DATA/valid${j}.${SRC}-${TGT}.${SRC} $DATA/valid${j}.${SRC}-${TGT}.${TGT} \ + --outputs $DATA/valid${j}.bpe.${SRC}-${TGT}.${SRC} $DATA/valid${j}.bpe.${SRC}-${TGT}.${TGT} + done +done diff --git a/fairseq/examples/translation/prepare-wmt14en2de.sh b/fairseq/examples/translation/prepare-wmt14en2de.sh new file mode 100644 index 0000000..6702c88 --- /dev/null +++ b/fairseq/examples/translation/prepare-wmt14en2de.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh + +echo 'Cloning Moses github repository (for tokenization scripts)...' +git clone https://github.com/moses-smt/mosesdecoder.git + +echo 'Cloning Subword NMT repository (for BPE pre-processing)...' +git clone https://github.com/rsennrich/subword-nmt.git + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +CLEAN=$SCRIPTS/training/clean-corpus-n.perl +NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl +BPEROOT=subword-nmt/subword_nmt +BPE_TOKENS=40000 + +URLS=( + "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz" + "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz" + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz" + "http://data.statmt.org/wmt17/translation-task/dev.tgz" + "http://statmt.org/wmt14/test-full.tgz" +) +FILES=( + "training-parallel-europarl-v7.tgz" + "training-parallel-commoncrawl.tgz" + "training-parallel-nc-v12.tgz" + "dev.tgz" + "test-full.tgz" +) +CORPORA=( + "training/europarl-v7.de-en" + "commoncrawl.de-en" + "training/news-commentary-v12.de-en" +) + +# This will make the dataset compatible to the one used in "Convolutional Sequence to Sequence Learning" +# https://arxiv.org/abs/1705.03122 +if [ "$1" == "--icml17" ]; then + URLS[2]="http://statmt.org/wmt14/training-parallel-nc-v9.tgz" + FILES[2]="training-parallel-nc-v9.tgz" + CORPORA[2]="training/news-commentary-v9.de-en" + OUTDIR=wmt14_en_de +else + OUTDIR=wmt17_en_de +fi + +if [ ! -d "$SCRIPTS" ]; then + echo "Please set SCRIPTS variable correctly to point to Moses scripts." + exit +fi + +src=en +tgt=de +lang=en-de +prep=$OUTDIR +tmp=$prep/tmp +orig=orig +dev=dev/newstest2013 + +mkdir -p $orig $tmp $prep + +cd $orig + +for ((i=0;i<${#URLS[@]};++i)); do + file=${FILES[i]} + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + url=${URLS[i]} + wget "$url" + if [ -f $file ]; then + echo "$url successfully downloaded." + else + echo "$url not successfully downloaded." + exit -1 + fi + if [ ${file: -4} == ".tgz" ]; then + tar zxvf $file + elif [ ${file: -4} == ".tar" ]; then + tar xvf $file + fi + fi +done +cd .. + +echo "pre-processing train data..." +for l in $src $tgt; do + rm $tmp/train.tags.$lang.tok.$l + for f in "${CORPORA[@]}"; do + cat $orig/$f.$l | \ + perl $NORM_PUNC $l | \ + perl $REM_NON_PRINT_CHAR | \ + perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l + done +done + +echo "pre-processing test data..." +for l in $src $tgt; do + if [ "$l" == "$src" ]; then + t="src" + else + t="ref" + fi + grep '<seg id' $orig/test-full/newstest2014-deen-$t.$l.sgm | \ + sed -e 's/<seg id="[0-9]*">\s*//g' | \ + sed -e 's/\s*<\/seg>\s*//g' | \ + sed -e "s/\’/\'/g" | \ + perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l + echo "" +done + +echo "splitting train and valid..." +for l in $src $tgt; do + awk '{if (NR%100 == 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l + awk '{if (NR%100 != 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l +done + +TRAIN=$tmp/train.de-en +BPE_CODE=$prep/code +rm -f $TRAIN +for l in $src $tgt; do + cat $tmp/train.$l >> $TRAIN +done + +echo "learn_bpe.py on ${TRAIN}..." +python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE + +for L in $src $tgt; do + for f in train.$L valid.$L test.$L; do + echo "apply_bpe.py to ${f}..." + python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f + done +done + +perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250 +perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250 + +for L in $src $tgt; do + cp $tmp/bpe.test.$L $prep/test.$L +done diff --git a/fairseq/examples/translation/prepare-wmt14en2fr.sh b/fairseq/examples/translation/prepare-wmt14en2fr.sh new file mode 100644 index 0000000..2ac97a5 --- /dev/null +++ b/fairseq/examples/translation/prepare-wmt14en2fr.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh + +echo 'Cloning Moses github repository (for tokenization scripts)...' +git clone https://github.com/moses-smt/mosesdecoder.git + +echo 'Cloning Subword NMT repository (for BPE pre-processing)...' +git clone https://github.com/rsennrich/subword-nmt.git + +SCRIPTS=mosesdecoder/scripts +TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl +CLEAN=$SCRIPTS/training/clean-corpus-n.perl +NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl +REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl +BPEROOT=subword-nmt/subword_nmt +BPE_TOKENS=40000 + +URLS=( + "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz" + "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz" + "http://statmt.org/wmt13/training-parallel-un.tgz" + "http://statmt.org/wmt14/training-parallel-nc-v9.tgz" + "http://statmt.org/wmt10/training-giga-fren.tar" + "http://statmt.org/wmt14/test-full.tgz" +) +FILES=( + "training-parallel-europarl-v7.tgz" + "training-parallel-commoncrawl.tgz" + "training-parallel-un.tgz" + "training-parallel-nc-v9.tgz" + "training-giga-fren.tar" + "test-full.tgz" +) +CORPORA=( + "training/europarl-v7.fr-en" + "commoncrawl.fr-en" + "un/undoc.2000.fr-en" + "training/news-commentary-v9.fr-en" + "giga-fren.release2.fixed" +) + +if [ ! -d "$SCRIPTS" ]; then + echo "Please set SCRIPTS variable correctly to point to Moses scripts." + exit +fi + +src=en +tgt=fr +lang=en-fr +prep=wmt14_en_fr +tmp=$prep/tmp +orig=orig + +mkdir -p $orig $tmp $prep + +cd $orig + +for ((i=0;i<${#URLS[@]};++i)); do + file=${FILES[i]} + if [ -f $file ]; then + echo "$file already exists, skipping download" + else + url=${URLS[i]} + wget "$url" + if [ -f $file ]; then + echo "$url successfully downloaded." + else + echo "$url not successfully downloaded." + exit -1 + fi + if [ ${file: -4} == ".tgz" ]; then + tar zxvf $file + elif [ ${file: -4} == ".tar" ]; then + tar xvf $file + fi + fi +done + +gunzip giga-fren.release2.fixed.*.gz +cd .. + +echo "pre-processing train data..." +for l in $src $tgt; do + rm $tmp/train.tags.$lang.tok.$l + for f in "${CORPORA[@]}"; do + cat $orig/$f.$l | \ + perl $NORM_PUNC $l | \ + perl $REM_NON_PRINT_CHAR | \ + perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l + done +done + +echo "pre-processing test data..." +for l in $src $tgt; do + if [ "$l" == "$src" ]; then + t="src" + else + t="ref" + fi + grep '<seg id' $orig/test-full/newstest2014-fren-$t.$l.sgm | \ + sed -e 's/<seg id="[0-9]*">\s*//g' | \ + sed -e 's/\s*<\/seg>\s*//g' | \ + sed -e "s/\’/\'/g" | \ + perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l + echo "" +done + +echo "splitting train and valid..." +for l in $src $tgt; do + awk '{if (NR%1333 == 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l + awk '{if (NR%1333 != 0) print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l +done + +TRAIN=$tmp/train.fr-en +BPE_CODE=$prep/code +rm -f $TRAIN +for l in $src $tgt; do + cat $tmp/train.$l >> $TRAIN +done + +echo "learn_bpe.py on ${TRAIN}..." +python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE + +for L in $src $tgt; do + for f in train.$L valid.$L test.$L; do + echo "apply_bpe.py to ${f}..." + python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f + done +done + +perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250 +perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250 + +for L in $src $tgt; do + cp $tmp/bpe.test.$L $prep/test.$L +done diff --git a/fairseq/examples/translation_moe/README.md b/fairseq/examples/translation_moe/README.md new file mode 100644 index 0000000..2e5c8af --- /dev/null +++ b/fairseq/examples/translation_moe/README.md @@ -0,0 +1,89 @@ +# Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019) + +This page includes instructions for reproducing results from the paper [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](https://arxiv.org/abs/1902.07816). + +## Download data + +First, follow the [instructions to download and preprocess the WMT'17 En-De dataset](../translation#prepare-wmt14en2desh). +Make sure to learn a joint vocabulary by passing the `--joined-dictionary` option to `fairseq-preprocess`. + +## Train a model + +Then we can train a mixture of experts model using the `translation_moe` task. +Use the `--method` flag to choose the MoE variant; we support hard mixtures with a learned or uniform prior (`--method hMoElp` and `hMoEup`, respectively) and soft mixures (`--method sMoElp` and `sMoEup`). +The model is trained with online responsibility assignment and shared parameterization. + +The following command will train a `hMoElp` model with `3` experts: +```bash +fairseq-train --ddp-backend='legacy_ddp' \ + data-bin/wmt17_en_de \ + --max-update 100000 \ + --task translation_moe --user-dir examples/translation_moe/translation_moe_src \ + --method hMoElp --mean-pool-gating-network \ + --num-experts 3 \ + --arch transformer_wmt_en_de --share-all-embeddings \ + --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates 4000 \ + --lr 0.0007 \ + --dropout 0.1 --weight-decay 0.0 --criterion cross_entropy \ + --max-tokens 3584 +``` + +## Translate + +Once a model is trained, we can generate translations from different experts using the `--gen-expert` option. +For example, to generate from expert 0: +```bash +fairseq-generate data-bin/wmt17_en_de \ + --path checkpoints/checkpoint_best.pt \ + --beam 1 --remove-bpe \ + --task translation_moe --user-dir examples/translation_moe/translation_moe_src \ + --method hMoElp --mean-pool-gating-network \ + --num-experts 3 \ + --gen-expert 0 +``` + +## Evaluate + +First download a tokenized version of the WMT'14 En-De test set with multiple references: +```bash +wget dl.fbaipublicfiles.com/fairseq/data/wmt14-en-de.extra_refs.tok +``` + +Next apply BPE on the fly and run generation for each expert: +```bash +BPE_CODE=examples/translation/wmt17_en_de/code +for EXPERT in $(seq 0 2); do \ + cat wmt14-en-de.extra_refs.tok \ + | grep ^S | cut -f 2 \ + | fairseq-interactive data-bin/wmt17_en_de \ + --path checkpoints/checkpoint_best.pt \ + --beam 1 \ + --bpe subword_nmt --bpe-codes $BPE_CODE \ + --buffer-size 500 --max-tokens 6000 \ + --task translation_moe --user-dir examples/translation_moe/translation_moe_src \ + --method hMoElp --mean-pool-gating-network \ + --num-experts 3 \ + --gen-expert $EXPERT ; \ +done > wmt14-en-de.extra_refs.tok.gen.3experts +``` + +Finally use `score_moe.py` to compute pairwise BLUE and average oracle BLEU: +```bash +python examples/translation_moe/score.py --sys wmt14-en-de.extra_refs.tok.gen.3experts --ref wmt14-en-de.extra_refs.tok +# pairwise BLEU: 48.26 +# #refs covered: 2.11 +# multi-reference BLEU (leave-one-out): 59.46 +``` +This matches row 3 from Table 7 in the paper. + +## Citation + +```bibtex +@article{shen2019mixture, + title = {Mixture Models for Diverse Machine Translation: Tricks of the Trade}, + author = {Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato}, + journal = {International Conference on Machine Learning}, + year = 2019, +} +``` diff --git a/fairseq/examples/translation_moe/score.py b/fairseq/examples/translation_moe/score.py new file mode 100644 index 0000000..e45b2cb --- /dev/null +++ b/fairseq/examples/translation_moe/score.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Scoring script for computing pairwise BLEU and multi-ref BLEU over a set of +candidate hypotheses. + +See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade" +(Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_. +""" + +import argparse +import random +import sys +from itertools import chain + +import numpy as np +import sacrebleu +from sacrebleu import corpus_bleu as _corpus_bleu + +def main(): + parser = argparse.ArgumentParser(sys.argv[0]) + parser.add_argument( + "--sys", nargs="*", default="", metavar="FILE", help="path to system output" + ) + parser.add_argument("--ref", default="", metavar="FILE", help="path to references") + parser.add_argument( + "--output", + default="", + metavar="FILE", + help="print outputs into a pretty format", + ) + args = parser.parse_args() + + if args.sys: + src, tgt, hypos, log_probs = load_sys(args.sys) + print("pairwise BLEU: %.2f" % pairwise(hypos)) + if args.output: + merge(src, tgt, hypos, log_probs, args.output) + + if args.ref: + _, _, refs = load_ref(args.ref) + if args.sys: + multi_ref(refs, hypos) + else: + intra_ref(refs) + + +def dictolist(d): + a = sorted(d.items(), key=lambda i: i[0]) + return [i[1] for i in a] + + +def load_sys(paths): + src, tgt, hypos, log_probs = {}, {}, {}, {} + for path in paths: + with open(path) as f: + for line in f: + line = line.rstrip() + # S: source + # T: target + # D: detokenized system output + if line.startswith(("S-", "T-", "D-")): + i = int(line[line.find("-") + 1 : line.find("\t")]) + if line.startswith("S-"): + src[i] = line.split("\t")[1] + if line.startswith("T-"): + tgt[i] = line.split("\t")[1] + if line.startswith("D-"): + if i not in hypos: + hypos[i] = [] + log_probs[i] = [] + hypos[i].append(line.split("\t")[2]) + log_probs[i].append(float(line.split("\t")[1])) + return dictolist(src), dictolist(tgt), dictolist(hypos), dictolist(log_probs) + + +def load_ref(path): + with open(path) as f: + lines = f.readlines() + src, tgt, refs = [], [], [] + i = 0 + while i < len(lines): + if lines[i].startswith("S-"): + src.append(lines[i].split("\t")[1].rstrip()) + i += 1 + elif lines[i].startswith("T-"): + tgt.append(lines[i].split("\t")[1].rstrip()) + i += 1 + else: + a = [] + while i < len(lines) and lines[i].startswith("R"): + a.append(lines[i].split("\t")[1].rstrip()) + i += 1 + refs.append(a) + return src, tgt, refs + + +def merge(src, tgt, hypos, log_probs, path): + with open(path, "w") as f: + for s, t, hs, lps in zip(src, tgt, hypos, log_probs): + f.write(s + "\n") + f.write(t + "\n") + f.write("\n") + for h, lp in zip(hs, lps): + f.write("\t%f\t%s\n" % (lp, h.strip())) + f.write("------------------------------------------------------\n") + + +def corpus_bleu(sys_stream, ref_streams): + bleu = _corpus_bleu(sys_stream, ref_streams, tokenize="none") + return bleu.score + + +def sentence_bleu(hypothesis, reference): + bleu = _corpus_bleu(hypothesis, reference) + for i in range(1, 4): + bleu.counts[i] += 1 + bleu.totals[i] += 1 + bleu = sacrebleu.BLEU.compute_bleu( + bleu.counts, + bleu.totals, + bleu.sys_len, + bleu.ref_len, + smooth_method="exp", + ) + return bleu.score + + +def pairwise(sents): + _ref, _hypo = [], [] + for s in sents: + for i in range(len(s)): + for j in range(len(s)): + if i != j: + _ref.append(s[i]) + _hypo.append(s[j]) + return corpus_bleu(_hypo, [_ref]) + + +def multi_ref(refs, hypos): + _ref, _hypo = [], [] + ref_cnt = 0 + assert len(refs) == len(hypos) + + # count number of refs covered + for rs, hs in zip(refs, hypos): + a = set() + for h in hs: + s = [sentence_bleu(h, r) for r in rs] + j = np.argmax(s) + _ref.append(rs[j]) + _hypo.append(h) + best = [k for k in range(len(rs)) if s[k] == s[j]] + a.add(random.choice(best)) + ref_cnt += len(a) + print("#refs covered: %.2f" % (ref_cnt / len(refs))) + + # transpose refs and hypos + refs = list(zip(*refs)) + hypos = list(zip(*hypos)) + + # compute multi-ref corpus BLEU (leave-one-out to be comparable to intra_ref) + k = len(hypos) + m = len(refs) + flat_hypos = [hypos[j][i] for i in range(len(hypos[0])) for j in range(k)] + duplicated_refs = [[ref for ref in refs_i for _ in range(k)] for refs_i in refs] + loo_bleus = [] + for held_out_ref in range(m): + remaining_refs = ( + duplicated_refs[:held_out_ref] + duplicated_refs[held_out_ref + 1 :] + ) + assert len(remaining_refs) == m - 1 + loo_bleus.append(corpus_bleu(flat_hypos, remaining_refs)) + print("average multi-reference BLEU (leave-one-out): %.2f" % np.mean(loo_bleus)) + + +def intra_ref(refs): + print("ref pairwise BLEU: %.2f" % pairwise(refs)) + refs = list(zip(*refs)) + m = len(refs) + concat_h = [] + concat_rest = [[] for j in range(m - 1)] + for i, h in enumerate(refs): + rest = refs[:i] + refs[i + 1 :] + concat_h.append(h) + for j in range(m - 1): + concat_rest[j].extend(rest[j]) + concat_h = list(chain.from_iterable(concat_h)) + bleu = corpus_bleu(concat_h, concat_rest) + print("multi-reference BLEU (leave-one-out): %.2f" % bleu) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/translation_moe/translation_moe_src/__init__.py b/fairseq/examples/translation_moe/translation_moe_src/__init__.py new file mode 100644 index 0000000..c0abe53 --- /dev/null +++ b/fairseq/examples/translation_moe/translation_moe_src/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import translation_moe # noqa diff --git a/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py b/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py new file mode 100644 index 0000000..fb299da --- /dev/null +++ b/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + + +class LogSumExpMoE(torch.autograd.Function): + """Standard LogSumExp forward pass, but use *posterior* for the backward. + + See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade" + (Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_. + """ + + @staticmethod + def forward(ctx, logp, posterior, dim=-1): + ctx.save_for_backward(posterior) + ctx.dim = dim + return torch.logsumexp(logp, dim=dim) + + @staticmethod + def backward(ctx, grad_output): + (posterior,) = ctx.saved_tensors + grad_logp = grad_output.unsqueeze(ctx.dim) * posterior + return grad_logp, None, None diff --git a/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py b/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py new file mode 100644 index 0000000..efc7ae4 --- /dev/null +++ b/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py @@ -0,0 +1,50 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn.functional as F + + +class MeanPoolGatingNetwork(torch.nn.Module): + """A simple mean-pooling gating network for selecting experts. + + This module applies mean pooling over an encoder's output and returns + reponsibilities for each expert. The encoder format is expected to match + :class:`fairseq.models.transformer.TransformerEncoder`. + """ + + def __init__(self, embed_dim, num_experts, dropout=None): + super().__init__() + self.embed_dim = embed_dim + self.num_experts = num_experts + + self.fc1 = torch.nn.Linear(embed_dim, embed_dim) + self.dropout = torch.nn.Dropout(dropout) if dropout is not None else None + self.fc2 = torch.nn.Linear(embed_dim, num_experts) + + def forward(self, encoder_out): + if not ( + "encoder_out" in encoder_out + and "encoder_padding_mask" in encoder_out + and encoder_out["encoder_out"][0].size(2) == self.embed_dim + ): + raise ValueError("Unexpected format for encoder_out") + + # mean pooling over time + encoder_padding_mask = encoder_out["encoder_padding_mask"][0] # B x T + encoder_out = encoder_out["encoder_out"][0].transpose(0, 1) # B x T x C + if encoder_padding_mask is not None: + encoder_out = encoder_out.clone() # required because of transpose above + encoder_out[encoder_padding_mask] = 0 + ntokens = torch.sum(~encoder_padding_mask, dim=1, keepdim=True) + x = torch.sum(encoder_out, dim=1) / ntokens.type_as(encoder_out) + else: + x = torch.mean(encoder_out, dim=1) + + x = torch.tanh(self.fc1(x)) + if self.dropout is not None: + x = self.dropout(x) + x = self.fc2(x) + return F.log_softmax(x, dim=-1, dtype=torch.float32).type_as(x) diff --git a/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py b/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py new file mode 100644 index 0000000..a829bf7 --- /dev/null +++ b/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py @@ -0,0 +1,259 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +import torch +from omegaconf import II + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.dataclass import ChoiceEnum +from fairseq.tasks import register_task +from fairseq.tasks.translation import TranslationConfig, TranslationTask + +from .logsumexp_moe import LogSumExpMoE +from .mean_pool_gating_network import MeanPoolGatingNetwork + + +METHOD_CHOICES = ChoiceEnum(["sMoElp", "sMoEup", "hMoElp", "hMoEup"]) + + +@dataclass +class TranslationMoEConfig(TranslationConfig): + method: METHOD_CHOICES = field( + default="hMoEup", + metadata={"help": "MoE method"}, + ) + num_experts: int = field( + default=3, + metadata={"help": "number of experts"}, + ) + mean_pool_gating_network: bool = field( + default=False, + metadata={"help": "use a simple mean-pooling gating network"}, + ) + mean_pool_gating_network_dropout: float = field( + default=0, + metadata={"help": "dropout for mean-pooling gating network"}, + ) + mean_pool_gating_network_encoder_dim: int = field( + default=0, + metadata={"help": "encoder output dim for mean-pooling gating network"}, + ) + gen_expert: int = field( + default=0, + metadata={"help": "which expert to use for generation"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + + +@register_task("translation_moe", dataclass=TranslationMoEConfig) +class TranslationMoETask(TranslationTask): + """ + Translation task for Mixture of Experts (MoE) models. + + See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade" + (Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_. + + Args: + src_dict (~fairseq.data.Dictionary): dictionary for the source language + tgt_dict (~fairseq.data.Dictionary): dictionary for the target language + + .. note:: + + The translation task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate` and :mod:`fairseq-interactive`. + + The translation task provides the following additional command-line + arguments: + + .. argparse:: + :ref: fairseq.tasks.translation_parser + :prog: + """ + + cfg: TranslationMoEConfig + + def __init__(self, cfg: TranslationMoEConfig, src_dict, tgt_dict): + if cfg.method == "sMoElp": + # soft MoE with learned prior + self.uniform_prior = False + self.hard_selection = False + elif cfg.method == "sMoEup": + # soft MoE with uniform prior + self.uniform_prior = True + self.hard_selection = False + elif cfg.method == "hMoElp": + # hard MoE with learned prior + self.uniform_prior = False + self.hard_selection = True + elif cfg.method == "hMoEup": + # hard MoE with uniform prior + self.uniform_prior = True + self.hard_selection = True + + # add indicator tokens for each expert + for i in range(cfg.num_experts): + # add to both dictionaries in case we're sharing embeddings + src_dict.add_symbol("<expert_{}>".format(i)) + tgt_dict.add_symbol("<expert_{}>".format(i)) + + super().__init__(cfg, src_dict, tgt_dict) + + def build_model(self, cfg, from_checkpoint=False): + from fairseq import models + + model = models.build_model(cfg, self) + if not self.uniform_prior and not hasattr(model, "gating_network"): + if self.cfg.mean_pool_gating_network: + if self.cfg.mean_pool_gating_network_encoder_dim > 0: + encoder_dim = self.cfg.mean_pool_gating_network_encoder_dim + elif getattr(cfg, "encoder_embed_dim", None): + # assume that encoder_embed_dim is the encoder's output dimension + encoder_dim = cfg.encoder_embed_dim + else: + raise ValueError( + "Must specify --mean-pool-gating-network-encoder-dim" + ) + + if self.cfg.mean_pool_gating_network_dropout > 0: + dropout = self.cfg.mean_pool_gating_network_dropout + elif getattr(cfg, "dropout", None): + dropout = cfg.dropout + else: + raise ValueError("Must specify task.mean_pool_gating_network_dropout") + + model.gating_network = MeanPoolGatingNetwork( + encoder_dim, + self.cfg.num_experts, + dropout, + ) + else: + raise ValueError( + "translation_moe task with learned prior requires the model to " + "have a gating network; try using --mean-pool-gating-network" + ) + return model + + def expert_index(self, i): + return i + self.tgt_dict.index("<expert_0>") + + def _get_loss(self, sample, model, criterion): + assert hasattr( + criterion, "compute_loss" + ), "translation_moe task requires the criterion to implement the compute_loss() method" + + k = self.cfg.num_experts + bsz = sample["target"].size(0) + + def get_lprob_y(encoder_out, prev_output_tokens_k): + net_output = model.decoder( + prev_output_tokens=prev_output_tokens_k, + encoder_out=encoder_out, + ) + loss, _ = criterion.compute_loss(model, net_output, sample, reduce=False) + loss = loss.view(bsz, -1) + return -loss.sum(dim=1, keepdim=True) # -> B x 1 + + def get_lprob_yz(winners=None): + encoder_out = model.encoder( + src_tokens=sample["net_input"]["src_tokens"], + src_lengths=sample["net_input"]["src_lengths"], + ) + + if winners is None: + lprob_y = [] + for i in range(k): + prev_output_tokens_k = sample["net_input"][ + "prev_output_tokens" + ].clone() + assert not prev_output_tokens_k.requires_grad + prev_output_tokens_k[:, 0] = self.expert_index(i) + lprob_y.append(get_lprob_y(encoder_out, prev_output_tokens_k)) + lprob_y = torch.cat(lprob_y, dim=1) # -> B x K + else: + prev_output_tokens_k = sample["net_input"]["prev_output_tokens"].clone() + prev_output_tokens_k[:, 0] = self.expert_index(winners) + lprob_y = get_lprob_y(encoder_out, prev_output_tokens_k) # -> B + + if self.uniform_prior: + lprob_yz = lprob_y + else: + lprob_z = model.gating_network(encoder_out) # B x K + if winners is not None: + lprob_z = lprob_z.gather(dim=1, index=winners.unsqueeze(-1)) + lprob_yz = lprob_y + lprob_z.type_as(lprob_y) # B x K + + return lprob_yz + + # compute responsibilities without dropout + with utils.model_eval(model): # disable dropout + with torch.no_grad(): # disable autograd + lprob_yz = get_lprob_yz() # B x K + prob_z_xy = torch.nn.functional.softmax(lprob_yz, dim=1) + assert not prob_z_xy.requires_grad + + # compute loss with dropout + if self.hard_selection: + winners = prob_z_xy.max(dim=1)[1] + loss = -get_lprob_yz(winners) + else: + lprob_yz = get_lprob_yz() # B x K + loss = -LogSumExpMoE.apply(lprob_yz, prob_z_xy, 1) + + loss = loss.sum() + sample_size = ( + sample["target"].size(0) if self.cfg.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": bsz, + "sample_size": sample_size, + "posterior": prob_z_xy.float().sum(dim=0).cpu(), + } + return loss, sample_size, logging_output + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + loss, sample_size, logging_output = self._get_loss(sample, model, criterion) + if ignore_grad: + loss *= 0 + optimizer.backward(loss) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + model.eval() + with torch.no_grad(): + loss, sample_size, logging_output = self._get_loss(sample, model, criterion) + return loss, sample_size, logging_output + + def inference_step( + self, + generator, + models, + sample, + prefix_tokens=None, + expert=None, + constraints=None, + ): + expert = expert or self.cfg.gen_expert + with torch.no_grad(): + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + bos_token=self.expert_index(expert), + ) + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + metrics.log_scalar( + "posterior", + sum(log["posterior"] for log in logging_outputs if "posterior" in log), + ) diff --git a/fairseq/examples/truncated_bptt/README.md b/fairseq/examples/truncated_bptt/README.md new file mode 100644 index 0000000..86518c9 --- /dev/null +++ b/fairseq/examples/truncated_bptt/README.md @@ -0,0 +1,70 @@ +# Truncated Backpropagation Through Time (BPTT) + +Truncated BPTT is a useful technique for training language models on very long +sequences. Typically a long sequences is split into chunks and a language model +is trained over the chunks sequentially. The LM may condition on previous +chunks, but gradients only flow through the current chunk. This technique was +the basis for the paper: [Transformer-XL: Attentive Language Models Beyond a +Fixed-Length Context](https://arxiv.org/abs/1901.02860), which achieved +state-of-the-art language modeling results at the time of publication. + +It is slightly tricky to implement Truncated BPTT efficiently in fairseq, since +we need to iterate over the data sequentially and disable any batch shuffling +logic. The code provided in this example illustrates how to implement Truncated +BPTT in fairseq by overriding ``FairseqTask::get_batch_iterator`` to iterate +over the data sequentially. Crucially, this example supports batching and +multi-GPU (data parallel) training. + +##### 0. Setup + +First, see the general [language modeling README](README.md) for instructions on +preprocessing the WikiText-103 data. + +##### 1. Train a Transformer-XL model on WikiText-103 + +We will train a 16-layer Transformer-XL model following the [hyperparameters +used in the original +paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_wt103_base.sh). + +The following command assumes 4 GPUs, so that the total batch size is 60 +sequences (15 x 4). Training should take ~24 hours on 4 V100 GPUs: +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \ + --user-dir examples/truncated_bptt \ + data-bin/wikitext-103/ \ + --task truncated_bptt_lm --tokens-per-sample 150 \ + --batch-size 15 --max-update 200000 \ + --arch transformer_xl --n-layer 16 --d-model 410 --n-head 10 \ + --d-head 41 --d-inner 2100 --dropout 0.1 --dropatt 0.0 --mem-len 150 \ + --optimizer adam --clip-norm 0.25 \ + --lr-scheduler cosine --warmup-updates 0 --min-lr 0.0 --lr 0.00025 \ + --log-format json --log-interval 25 \ + --fp16 +``` + +If training on a single GPU, set `--update-freq=4` to accumulate 4x gradients +and simulate training on 4 GPUs. + +##### 2. Evaluate + +```bash +fairseq-eval-lm data-bin/wikitext-103/ \ + --path checkpoints/checkpoint_best.pt \ + --user-dir examples/truncated_bptt/ \ + --task truncated_bptt_lm \ + --batch-size 1 --required-batch-size-multiple 1 \ + --model-overrides '{"mem_len":640,"clamp_len":400,"same_length":True}' \ + --tokens-per-sample 64 +# ... | INFO | fairseq_cli.eval_lm | num. model params: 151123537 +# ... | INFO | fairseq_cli.eval_lm | Evaluated 245569 tokens in 83.1s (2956.82 tokens/s) +# ... | INFO | fairseq_cli.eval_lm | Loss (base 2): 4.5668, Perplexity: 23.70 +# Compare to 24.0 test perplexity from the paper +``` + +*Note:* During training the model saw 150 tokens of context +(``--tokens-per-sample=150``) and 150 extra memory tokens (``--mem-len=150``). +During evaluation we measure perplexity on sequences of 64 tokens +(``--tokens-per-sample=64``) and increase the memory length +(``--model-overrides='{"mem_len":640}'``). These settings match the evaluation +settings from [the original +paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_wt103_base.sh). diff --git a/fairseq/examples/truncated_bptt/__init__.py b/fairseq/examples/truncated_bptt/__init__.py new file mode 100644 index 0000000..eee484d --- /dev/null +++ b/fairseq/examples/truncated_bptt/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import transformer_xl_model, truncated_bptt_lm_task # noqa diff --git a/fairseq/examples/truncated_bptt/transformer_xl_model.py b/fairseq/examples/truncated_bptt/transformer_xl_model.py new file mode 100644 index 0000000..58c0f6a --- /dev/null +++ b/fairseq/examples/truncated_bptt/transformer_xl_model.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +import torch +from fairseq.dataclass import FairseqDataclass +from fairseq.models import ( + FairseqIncrementalDecoder, + FairseqLanguageModel, + register_model, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from omegaconf import II + + +logger = logging.getLogger(__name__) + + +@dataclass +class TransformerXLConfig(FairseqDataclass): + # defaults come from the original Transformer-XL code + cutoffs: List[int] = field(default_factory=lambda: [20000, 40000, 200000]) + d_model: int = 500 + n_head: int = 10 + d_head: int = 50 + d_inner: int = 1000 + div_val: int = 1 + n_layer: int = 12 + mem_len: int = 0 + clamp_len: int = -1 + same_length: bool = False + dropout: float = 0.0 + dropatt: float = 0.0 + checkpoint_activations: bool = False + offload_activations: bool = False + max_target_positions: int = II("task.max_target_positions") + + +@register_model("transformer_xl", dataclass=TransformerXLConfig) +class TransformerXLLanguageModel(FairseqLanguageModel): + @classmethod + def build_model(cls, cfg: TransformerXLConfig, task): + return cls(TransformerXLDecoder(cfg, task)) + + +class TransformerXLDecoder(FairseqIncrementalDecoder): + def __init__(self, cfg, task): + try: + from transformers.models.transfo_xl import ( + TransfoXLConfig, + TransfoXLLMHeadModel, + ) + except ImportError: + from transformers.configuration_transfo_xl import TransfoXLConfig + from transformers.modeling_transfo_xl import TransfoXLLMHeadModel + + super().__init__(task.target_dictionary) + self.cfg = cfg + + # remove any cutoffs larger than the vocab size + cutoffs = [ + cutoff for cutoff in cfg.cutoffs if cutoff < len(task.target_dictionary) + ] + + config = TransfoXLConfig( + vocab_size=len(task.target_dictionary), + cutoffs=cutoffs, + d_model=cfg.d_model, + d_embed=cfg.d_model, + n_head=cfg.n_head, + d_head=cfg.d_head, + d_inner=cfg.d_inner, + div_val=cfg.div_val, + n_layer=cfg.n_layer, + mem_len=cfg.mem_len, + clamp_len=cfg.clamp_len, + same_length=cfg.same_length, + dropout=cfg.dropout, + dropatt=cfg.dropatt, + ) + logger.info(config) + self.model = TransfoXLLMHeadModel(config) + + if cfg.checkpoint_activations or cfg.offload_activations: + for i in range(len(self.model.transformer.layers)): + self.model.transformer.layers[i] = checkpoint_wrapper( + self.model.transformer.layers[i], + offload_to_cpu=cfg.offload_activations, + ) + # TODO: may save mem to wrap(layer.pos_ff.CoreNet[3]) + + self._mems = None + + def forward( + self, + src_tokens, + src_lengths=None, # unused + incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None, + encoder_out=None, + ): + if incremental_state is not None: # used during inference + mems = self.get_incremental_state(incremental_state, "mems") + src_tokens = src_tokens[:, -1:] # only keep the most recent token + else: + mems = self._mems + + output = self.model( + input_ids=src_tokens, + mems=mems, + return_dict=False, + ) + + if len(output) >= 2: + if incremental_state is not None: + self.set_incremental_state(incremental_state, "mems", output[1]) + else: + self._mems = output[1] + + return (output[0],) + + def max_positions(self): + return self.cfg.max_target_positions + + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[torch.Tensor]]], + new_order: torch.Tensor, + ): + """Reorder incremental state. + + This will be called when the order of the input has changed from the + previous time step. A typical use case is beam search, where the input + order changes between time steps based on the selection of beams. + """ + mems = self.get_incremental_state(incremental_state, "mems") + if mems is not None: + new_mems = [mems_i.index_select(1, new_order) for mems_i in mems] + self.set_incremental_state(incremental_state, "mems", new_mems) diff --git a/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py b/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py new file mode 100644 index 0000000..9978481 --- /dev/null +++ b/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py @@ -0,0 +1,285 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import List, Optional, Tuple + +import torch +from fairseq import utils +from fairseq.data import ( + Dictionary, + TokenBlockDataset, + data_utils, + iterators, +) +from fairseq.dataclass import FairseqDataclass +from fairseq.distributed import utils as dist_utils +from fairseq.tasks import FairseqTask, register_task +from omegaconf import II + + +logger = logging.getLogger(__name__) + + +@dataclass +class TruncatedBPTTLMConfig(FairseqDataclass): + data: str = field(default="???", metadata={"help": "path to data directory"}) + tokens_per_sample: int = field( + default=1024, metadata={"help": "max number of tokens per sequence"}, + ) + batch_size: int = II("dataset.batch_size") + # Some models use *max_target_positions* to know how many positional + # embeddings to learn. We use II(...) to make it default to + # *tokens_per_sample*, but in principle there could be more positional + # embeddings than tokens in a single batch. This may also be irrelevant for + # custom model implementations. + max_target_positions: int = II("task.tokens_per_sample") + # these will be populated automatically if not provided + data_parallel_rank: Optional[int] = None + data_parallel_size: Optional[int] = None + + +@register_task("truncated_bptt_lm", dataclass=TruncatedBPTTLMConfig) +class TruncatedBPTTLMTask(FairseqTask): + def __init__(self, cfg: TruncatedBPTTLMConfig): + super().__init__(cfg) + + if cfg.data_parallel_rank is None or cfg.data_parallel_size is None: + if torch.distributed.is_initialized(): + cfg.data_parallel_rank = dist_utils.get_data_parallel_rank() + cfg.data_parallel_size = dist_utils.get_data_parallel_world_size() + else: + cfg.data_parallel_rank = 0 + cfg.data_parallel_size = 1 + + # load the dictionary + paths = utils.split_paths(cfg.data) + assert len(paths) > 0 + self.dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(self.dictionary))) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split (e.g., train, valid, test)""" + + # support sharded datasets + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + # each element of *data* will be a tensorized line from the original + # text dataset, similar to ``open(split_path).readlines()`` + data = data_utils.load_indexed_dataset( + split_path, self.dictionary, combine=combine + ) + if data is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + # this is similar to ``data.view(-1).split(tokens_per_sample)`` + data = TokenBlockDataset( + data, + data.sizes, + block_size=self.cfg.tokens_per_sample, + pad=None, # unused + eos=None, # unused + break_mode="none", + ) + + self.datasets[split] = TruncatedBPTTDataset( + data=data, + bsz_per_shard=self.cfg.batch_size, + shard_id=self.cfg.data_parallel_rank, + num_shards=self.cfg.data_parallel_size, + ) + + def dataset(self, split): + return self.datasets[split] + + def get_batch_iterator( + self, + dataset, + num_workers=0, + epoch=1, + data_buffer_size=0, + skip_remainder_batch=False, + **kwargs + ): + return iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=self._collate_fn, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + # we don't use the batching functionality from EpochBatchIterator; + # instead every item in *dataset* is a whole batch + batch_sampler=[[i] for i in range(len(dataset))], + disable_shuffling=True, + skip_remainder_batch=skip_remainder_batch, + ) + + def _collate_fn(self, items: List[List[torch.Tensor]]): + # we don't use fairseq's batching functionality, so we expect a single + # Tensor of type List[torch.Tensor] + assert len(items) == 1 + + # item will have shape B x T (the last batch may have length < T) + id, item = items[0] + item = data_utils.collate_tokens(item, pad_idx=self.source_dictionary.pad()) + B, T = item.size() + + # shift item one position over and append a padding token for the target + target = torch.nn.functional.pad( + item[:, 1:], (0, 1, 0, 0), value=self.target_dictionary.pad() + ) + + # fairseq expects batches to have the following structure + return { + "id": torch.tensor([id] * item.size(0)), + "net_input": {"src_tokens": item,}, + "target": target, + "nsentences": item.size(0), + "ntokens": item.numel(), + } + + def build_dataset_for_inference( + self, src_tokens: List[torch.Tensor], src_lengths: List[int], **kwargs + ) -> torch.utils.data.Dataset: + eos = self.source_dictionary.eos() + dataset = TokenBlockDataset( + src_tokens, + src_lengths, + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionary.pad(), + eos=eos, + break_mode="eos", + ) + + class Dataset(torch.utils.data.Dataset): + def __getitem__(self, i): + item = dataset[i] + if item[-1] == eos: + # remove eos to support generating with a prefix + item = item[:-1] + return (i, [item]) + + def __len__(self): + return len(dataset) + + return Dataset() + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + if constraints is not None: + raise NotImplementedError + + # SequenceGenerator doesn't use *src_tokens* directly, we need to + # pass the *prefix_tokens* argument instead. + if prefix_tokens is None and sample["net_input"]["src_tokens"].nelement(): + prefix_tokens = sample["net_input"]["src_tokens"] + + # begin generation with the end-of-sentence token + bos_token = self.source_dictionary.eos() + + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token + ) + + def eval_lm_dataloader( + self, + dataset, + max_tokens: Optional[int] = 36000, + batch_size: Optional[int] = None, + max_positions: Optional[int] = None, + num_shards: int = 1, + shard_id: int = 0, + num_workers: int = 1, + data_buffer_size: int = 10, + context_window: int = 0, + ): + if context_window > 0: + raise NotImplementedError( + "Transformer-XL doesn't need --context-window, try " + "--model-overrides '{\"mem_len\":42}' instead " + ) + return self.get_batch_iterator( + dataset=dataset, + max_tokens=max_tokens, + max_sentences=batch_size, + max_positions=max_positions, + ignore_invalid_inputs=True, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + data_buffer_size=data_buffer_size, + ).next_epoch_itr(shuffle=False) + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + +class TruncatedBPTTDataset(torch.utils.data.Dataset): + def __init__( + self, + data: List[torch.Tensor], # ordered list of items + bsz_per_shard, # number of items processed per GPUs per forward + shard_id, # current GPU ID + num_shards, # number of GPUs + ): + super().__init__() + self.data = data + + def batchify(data, bsz): + # Work out how cleanly we can divide the dataset into bsz parts. + nbatch = data.size(0) // bsz + # Trim off any extra elements that wouldn't cleanly fit (remainders). + data = data.narrow(0, 0, nbatch * bsz) + # Evenly divide the data across the bsz batches. + data = data.view(bsz, -1).contiguous() + return data + + # total number of sequences processed by all GPUs in each forward pass + global_batch_size = bsz_per_shard * num_shards + + """ + With a 16 item dataset, bsz_per_shard=2 and num_shards=3, + *indices* might look like: + + indices = [[0, 1], + [2, 3], + [4, 5], + [6, 7], + [8, 9], + [10, 11]] + + The size of the TruncatedBPTTDataset instance will be 2, + and shard 1 will see items: + + [(0, [data[4], data[6]]), + (1, [data[5], data[7]])] + """ + indices = batchify(torch.arange(len(data)), global_batch_size) + assert indices.size(0) == global_batch_size + + self.my_indices = indices[ + shard_id * bsz_per_shard : (shard_id + 1) * bsz_per_shard + ] + assert self.my_indices.size(0) == bsz_per_shard + + def __len__(self): + return self.my_indices.size(1) + + def __getitem__(self, i) -> Tuple[int, List[torch.Tensor]]: + return (i, [self.data[idx] for idx in self.my_indices[:, i]]) diff --git a/fairseq/examples/unsupervised_quality_estimation/README.md b/fairseq/examples/unsupervised_quality_estimation/README.md new file mode 100644 index 0000000..e86a0d1 --- /dev/null +++ b/fairseq/examples/unsupervised_quality_estimation/README.md @@ -0,0 +1,126 @@ +# Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020) + +This page includes instructions for reproducing results from the paper [Unsupervised Quality Estimation for Neural +Machine Translation (Fomicheva et al., 2020)](https://arxiv.org/abs/2005.10608) + +## Requirements: + +* mosesdecoder: https://github.com/moses-smt/mosesdecoder +* subword-nmt: https://github.com/rsennrich/subword-nmt +* flores: https://github.com/facebookresearch/flores + +## Download Models and Test Data + +Download translation models and test data from [MLQE dataset repository](https://github.com/facebookresearch/mlqe). + +## Set up: + +Given a testset consisting of source sentences and reference translations: + +* `SRC_LANG`: source language +* `TGT_LANG`: target language +* `INPUT`: input prefix, such that the file `$INPUT.$SRC_LANG` contains source sentences and `$INPUT.$TGT_LANG` +contains the reference sentences +* `OUTPUT_DIR`: output path to store results +* `MOSES_DECODER`: path to mosesdecoder installation +* `BPE_ROOT`: path to subword-nmt installation +* `BPE`: path to BPE model +* `MODEL_DIR`: directory containing the NMT model `.pt` file as well as the source and target vocabularies. +* `TMP`: directory for intermediate temporary files +* `GPU`: if translating with GPU, id of the GPU to use for inference +* `DROPOUT_N`: number of stochastic forward passes + +`$DROPOUT_N` is set to 30 in the experiments reported in the paper. However, we observed that increasing it beyond 10 +does not bring substantial improvements. + +## Translate the data using standard decoding + +Preprocess the input data: +``` +for LANG in $SRC_LANG $TGT_LANG; do + perl $MOSES_DECODER/scripts/tokenizer/tokenizer.perl -threads 80 -a -l $LANG < $INPUT.$LANG > $TMP/preprocessed.tok.$LANG + python $BPE_ROOT/apply_bpe.py -c ${BPE} < $TMP/preprocessed.tok.$LANG > $TMP/preprocessed.tok.bpe.$LANG +done +``` + +Binarize the data for faster translation: + +``` +fairseq-preprocess --srcdict $MODEL_DIR/dict.$SRC_LANG.txt --tgtdict $MODEL_DIR/dict.$TGT_LANG.txt +--source-lang ${SRC_LANG} --target-lang ${TGT_LANG} --testpref $TMP/preprocessed.tok.bpe --destdir $TMP/bin --workers 4 +``` + +Translate + +``` +CUDA_VISIBLE_DEVICES=$GPU fairseq-generate $TMP/bin --path ${MODEL_DIR}/${SRC_LANG}-${TGT_LANG}.pt --beam 5 +--source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --unkpen 5 > $TMP/fairseq.out +grep ^H $TMP/fairseq.out | cut -d- -f2- | sort -n | cut -f3- > $TMP/mt.out +``` + +Post-process + +``` +sed -r 's/(@@ )| (@@ ?$)//g' < $TMP/mt.out | perl $MOSES_DECODER/scripts/tokenizer/detokenizer.perl +-l $TGT_LANG > $OUTPUT_DIR/mt.out +``` + +## Produce uncertainty estimates + +### Scoring + +Make temporary files to store the translations repeated N times. + +``` +python ${SCRIPTS}/scripts/uncertainty/repeat_lines.py -i $TMP/preprocessed.tok.bpe.$SRC_LANG -n $DROPOUT_N +-o $TMP/repeated.$SRC_LANG +python ${SCRIPTS}/scripts/uncertainty/repeat_lines.py -i $TMP/mt.out -n $DROPOUT_N -o $TMP/repeated.$TGT_LANG + +fairseq-preprocess --srcdict ${MODEL_DIR}/dict.${SRC_LANG}.txt $TGT_DIC --source-lang ${SRC_LANG} +--target-lang ${TGT_LANG} --testpref ${TMP}/repeated --destdir ${TMP}/bin-repeated +``` + +Produce model scores for the generated translations using `--retain-dropout` option to apply dropout at inference time: + +``` +CUDA_VISIBLE_DEVICES=${GPU} fairseq-generate ${TMP}/bin-repeated --path ${MODEL_DIR}/${LP}.pt --beam 5 + --source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --unkpen 5 --score-reference --retain-dropout + --retain-dropout-modules '["TransformerModel","TransformerEncoder","TransformerDecoder","TransformerEncoderLayer"]' + TransformerDecoderLayer --seed 46 > $TMP/dropout.scoring.out + +grep ^H $TMP/dropout.scoring.out | cut -d- -f2- | sort -n | cut -f2 > $TMP/dropout.scores + +``` + +Use `--retain-dropout-modules` to specify the modules. By default, dropout is applied in the same places +as for training. + +Compute the mean of the resulting output distribution: + +``` +python $SCRIPTS/scripts/uncertainty/aggregate_scores.py -i $TMP/dropout.scores -o $OUTPUT_DIR/dropout.scores.mean +-n $DROPOUT_N +``` + +### Generation + +Produce multiple translation hypotheses for the same source using `--retain-dropout` option: + +``` +CUDA_VISIBLE_DEVICES=${GPU} fairseq-generate ${TMP}/bin-repeated --path ${MODEL_DIR}/${LP}.pt + --beam 5 --source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --retain-dropout + --unkpen 5 --retain-dropout-modules TransformerModel TransformerEncoder TransformerDecoder +TransformerEncoderLayer TransformerDecoderLayer --seed 46 > $TMP/dropout.generation.out + +grep ^H $TMP/dropout.generation.out | cut -d- -f2- | sort -n | cut -f3- > $TMP/dropout.hypotheses_ + +sed -r 's/(@@ )| (@@ ?$)//g' < $TMP/dropout.hypotheses_ | perl $MOSES_DECODER/scripts/tokenizer/detokenizer.perl +-l $TGT_LANG > $TMP/dropout.hypotheses +``` + +Compute similarity between multiple hypotheses corresponding to the same source sentence using Meteor +evaluation metric: +``` +python meteor.py -i $TMP/dropout.hypotheses -m <path_to_meteor_installation> -n $DROPOUT_N -o +$OUTPUT_DIR/dropout.gen.sim.meteor +``` diff --git a/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py b/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py new file mode 100644 index 0000000..66d50d0 --- /dev/null +++ b/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys + +import numpy as np + + +aggregate_funcs = { + "std": np.std, + "var": np.var, + "median": np.median, + "mean": np.mean, + "min": np.min, + "max": np.max, +} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_file", required=True, type=str) + parser.add_argument("-n", "--repeat_times", required=True, type=int) + parser.add_argument("-o", "--output_file", required=False) + parser.add_argument("-f", "--func", required=False, default="mean") + args = parser.parse_args() + + stream = open(args.output_file, "w") if args.output_file else sys.stdout + + segment_scores = [] + for line in open(args.input_file): + segment_scores.append(float(line.strip())) + if len(segment_scores) == args.repeat_times: + stream.write("{}\n".format(aggregate_funcs[args.func](segment_scores))) + segment_scores = [] + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/unsupervised_quality_estimation/meteor.py b/fairseq/examples/unsupervised_quality_estimation/meteor.py new file mode 100644 index 0000000..2ee0448 --- /dev/null +++ b/fairseq/examples/unsupervised_quality_estimation/meteor.py @@ -0,0 +1,109 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import math +import os +import subprocess +import sys +import tempfile +from collections import defaultdict +from itertools import combinations + + +def read_translations(path, n_repeats): + segment_counter = 0 + segment_translations = [] + translations = defaultdict(list) + for line in open(path): + segment_translations.append(" ".join(line.split())) + if len(segment_translations) == n_repeats: + translations[segment_counter] = segment_translations + segment_translations = [] + segment_counter += 1 + return translations + + +def generate_input(translations, n_repeats): + _, ref_path = tempfile.mkstemp() + _, mt_path = tempfile.mkstemp() + ref_fh = open(ref_path, "w") + mt_fh = open(mt_path, "w") + for segid in sorted(translations.keys()): + assert len(translations[segid]) == n_repeats + indexes = combinations(range(n_repeats), 2) + for idx1, idx2 in indexes: + mt_fh.write(translations[segid][idx1].strip() + "\n") + ref_fh.write(translations[segid][idx2].strip() + "\n") + sys.stderr.write("\nSaved translations to %s and %s" % (ref_path, mt_path)) + return ref_path, mt_path + + +def run_meteor(ref_path, mt_path, metric_path, lang="en"): + _, out_path = tempfile.mkstemp() + subprocess.call( + [ + "java", + "-Xmx2G", + "-jar", + metric_path, + mt_path, + ref_path, + "-p", + "0.5 0.2 0.6 0.75", # default parameters, only changed alpha to give equal weight to P and R + "-norm", + "-l", + lang, + ], + stdout=open(out_path, "w"), + ) + os.remove(ref_path) + os.remove(mt_path) + sys.stderr.write("\nSaved Meteor output to %s" % out_path) + return out_path + + +def read_output(meteor_output_path, n_repeats): + n_combinations = math.factorial(n_repeats) / ( + math.factorial(2) * math.factorial(n_repeats - 2) + ) + raw_scores = [] + average_scores = [] + for line in open(meteor_output_path): + if not line.startswith("Segment "): + continue + score = float(line.strip().split("\t")[1]) + raw_scores.append(score) + if len(raw_scores) == n_combinations: + average_scores.append(sum(raw_scores) / n_combinations) + raw_scores = [] + os.remove(meteor_output_path) + return average_scores + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--infile") + parser.add_argument("-n", "--repeat_times", type=int) + parser.add_argument("-m", "--meteor") + parser.add_argument("-o", "--output") + args = parser.parse_args() + + translations = read_translations(args.infile, args.repeat_times) + sys.stderr.write("\nGenerating input for Meteor...") + ref_path, mt_path = generate_input(translations, args.repeat_times) + sys.stderr.write("\nRunning Meteor...") + out_path = run_meteor(ref_path, mt_path, args.meteor) + sys.stderr.write("\nReading output...") + scores = read_output(out_path, args.repeat_times) + sys.stderr.write("\nWriting results...") + with open(args.output, "w") as o: + for scr in scores: + o.write("{}\n".format(scr)) + o.close() + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py b/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py new file mode 100644 index 0000000..5a04851 --- /dev/null +++ b/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys + + +def _normalize_spaces(line): + return " ".join(line.split()) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input_file", required=True, type=str) + parser.add_argument("-n", "--repeat_times", required=True, type=int) + parser.add_argument("-o", "--output_file", required=False, type=str) + args = parser.parse_args() + stream = open(args.output_file, "w") if args.output_file else sys.stdout + + for line in open(args.input_file): + for _ in range(args.repeat_times): + stream.write(_normalize_spaces(line) + "\n") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/README.md b/fairseq/examples/wav2vec/README.md new file mode 100644 index 0000000..e979733 --- /dev/null +++ b/fairseq/examples/wav2vec/README.md @@ -0,0 +1,426 @@ +# wav2vec 2.0 + +wav2vec 2.0 learns speech representations on unlabeled data as described in [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](https://arxiv.org/abs/2006.11477). + +We learned speech representations in multiple languages as well in [Unsupervised Cross-lingual Representation Learning for Speech Recognition (Conneau et al., 2020)](https://arxiv.org/abs/2006.13979). + +We also combined wav2vec 2.0 with self-training in [Self-training and Pre-training are Complementary for Speech Recognition (Xu et al., 2020)](https://arxiv.org/abs/2010.11430). + +We combined speech data from multiple domains in [Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training (Hsu, et al., 2021)](https://arxiv.org/abs/2104.01027). + +We finetuned XLSR-53 on multiple languages to transcribe unseen languages in [Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al., 2021)](https://arxiv.org/abs/2109.11680). + +## Pre-trained models + +Model | Finetuning split | Dataset | Model +|---|---|---|--- +Wav2Vec 2.0 Base | No finetuning | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small.pt) +Wav2Vec 2.0 Base | 10 minutes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_10m.pt) +Wav2Vec 2.0 Base | 100 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_100h.pt) +Wav2Vec 2.0 Base | 960 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_960h.pt) +Wav2Vec 2.0 Large | No finetuning | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/libri960_big.pt) +Wav2Vec 2.0 Large | 10 minutes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_10m.pt) +Wav2Vec 2.0 Large | 100 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_100h.pt) +Wav2Vec 2.0 Large | 960 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_960h.pt) +Wav2Vec 2.0 Large (LV-60)* | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_new.pt) +Wav2Vec 2.0 Large conformer - rel_pos (LV-60)* | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_relpos_PT_no_FT) +Wav2Vec 2.0 Large conformer - rope (LV-60)* | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_rope_PT_no_FT) +Wav2Vec 2.0 Large (LV-60)* | 10 minutes | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_10m_new.pt) +Wav2Vec 2.0 Large (LV-60)* | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_100h_new.pt) +Wav2Vec 2.0 Large conformer - rel_pos (LV-60)* | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_relpos_PT_100h_FT.pt) +Wav2Vec 2.0 Large conformer - rope (LV-60)* | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_rope_PT_100h_FT.pt) +Wav2Vec 2.0 Large (LV-60)* | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec2_vox_960h_new.pt) +Wav2Vec 2.0 Large conformer - rel_pos (LV-60)* | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_relpos_PT_960h_FT.pt) +Wav2Vec 2.0 Large conformer - rope (LV-60)* | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](s3://dl.fbaipublicfiles.com/fairseq/conformer/wav2vec2/librilight/LL_rope_PT_960h_FT.pt) +Wav2Vec 2.0 Large (LV-60) + Self Training * | 10 minutes | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_10m_pl.pt) +Wav2Vec 2.0 Large (LV-60) + Self Training * | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_100h_pl.pt) +Wav2Vec 2.0 Large (LV-60) + Self Training * | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_960h_pl.pt) +Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv.pt) +Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | 960 hours Librispeech | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv_ftls960_updated.pt) +Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | 300 hours Switchboard | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv_ftsb300_updated.pt) + +\* updated (Oct. 24, 2020)\ +** updated (Nov. 13, 2021) + +We also release multilingual pre-trained wav2vec 2.0 (XLSR) models: + +Model | Architecture | Hours | Languages | Datasets | Model +|---|---|---|---|---|--- +XLSR-53 | Large | 56k | 53 | MLS, CommonVoice, BABEL | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt) + +The XLSR model uses the following datasets for multilingual pretraining: + +* **[MLS: Multilingual LibriSpeech](https://indico2.conference4me.psnc.pl/event/35/contributions/3585/attachments/1060/1101/Wed-2-6-10.pdf)** (8 languages, 50.7k hours): *Dutch, English, French, German, Italian, Polish, Portuguese, Spanish* + +* **[CommonVoice](https://commonvoice.mozilla.org/en/languages)** (36 languages, 3.6k hours): *Arabic, Basque, Breton, Chinese (CN), Chinese (HK), Chinese (TW), Chuvash, Dhivehi, Dutch, English, Esperanto, Estonian, French, German, Hakh-Chin, Indonesian, Interlingua, Irish, Italian, Japanese, Kabyle, Kinyarwanda, Kyrgyz, Latvian, Mongolian, Persian, Portuguese, Russian, Sakha, Slovenian, Spanish, Swedish, Tamil, Tatar, Turkish, Welsh* (see also [finetuning splits]([https://dl.fbaipublicfiles.com/cpc_audio/common_voices_splits.tar.gz]) from [this paper](https://arxiv.org/abs/2002.02848)). + +* **[Babel](https://catalog.ldc.upenn.edu/byyear)** (17 languages, 1.7k hours): *Assamese, Bengali, Cantonese, Cebuano, Georgian, Haitian, Kazakh, Kurmanji, Lao, Pashto, Swahili, Tagalog, Tamil, Tok, Turkish, Vietnamese, Zulu* + +We also finetuned several models on languages from [CommonVoice](https://commonvoice.mozilla.org/en/languages) (version 6.1) and [Babel](https://catalog.ldc.upenn.edu/byyear). Please refer to [our paper](https://arxiv.org/abs/2109.11680) for details about which languages are used. + +Pretrained Model | Fintune Dataset | # Languages | Phonemizer | Model | Dictionary +|---|---|---|---|---|--- +LV-60 | CommonVoice | 26 | [Espeak](https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/espeak_en_26lang_m10.pt) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/espeak_dict.txt) +XLSR-53 | CommonVoice | 26 | [Espeak](https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/espeak_26lang_m10.pt) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/espeak_dict.txt) +XLSR-53 | CommonVoice | 21 | [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/phonetisaurus_21lang_m10.pt) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/phonetisaurus_dict.txt) +XLSR-53 | CommonVoice, BABEL | 21, 19 | [Phonetisaurus](https://github.com/AdolfVonKleist/Phonetisaurus) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/phonetisaurus_40lang_m10.pt) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/zero_shot/phonetisaurus_40lang.dict.txt) + +We release 2 models that are finetuned on data from 2 different phonemizers. Although the phonemes are all [IPA](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet) symbols, there are still subtle differences between the phonemized transcriptions from the 2 phonemizers. Thus, it's better to use the corresponding model, if your data is phonemized by either phonemizer above. + +## Training a new model with the CLI tools + +Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate file 10 to 30 seconds in length) + +### Prepare training data manifest + +First, install the `soundfile` library: + +```shell script +pip install soundfile +``` + +Next, run: + +```shell script +python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext $ext --valid-percent $valid +``` + +$ext should be set to flac, wav, or whatever format your dataset happens to use that soundfile can read. + +$valid should be set to some reasonable percentage (like 0.01) of training data to use for validation. +To use a pre-defined validation set (like dev-other from librispeech), set to it 0 and then overwrite valid.tsv with a +separately pre-processed manifest file. + +### Train a wav2vec 2.0 base model + +This configuration was used for the base model trained on the Librispeech dataset in the wav2vec 2.0 paper + +Note that the input is expected to be single channel, sampled at 16 kHz + +```shell script +$ fairseq-hydra-train \ + task.data=/path/to/data \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_base_librispeech +``` + +Note: you can simulate 64 GPUs by using k GPUs and adding command line parameters (before `--config-dir`) +`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 64/k + +### Train a wav2vec 2.0 large model + +This configuration was used for the large model trained on the Libri-light dataset in the wav2vec 2.0 paper + +```shell script +$ fairseq-hydra-train \ + task.data=/path/to/data \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_large_librivox +``` + +Note: you can simulate 128 GPUs by using k GPUs and adding command line parameters (before `--config-dir`) +`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 128/k + +### Train a wav2vec 2.0 model with conformer backbone + +To replace the transformer layers in the encoder with the conformer layers, set `--layer-type conformer --attn-type espnet --pos-enc-type ${POS_ENC_TYPE}`. `POS_ENC_TYPE` refers to positional encoding to be used in the conformer encoder. +Set it to `abs`, `rope` or `rel_pos` to use the absolute positional encoding, rotary positional encoding or relative positional encoding in the conformer layer respectively. + +To train a base model with conformer: + +```shell script +$ fairseq-hydra-train \ + task.data=/path/to/data \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_conformer_base_librispeech \ + --attn-type espnet --pos-enc-type ${POS_ENC_TYPE} +``` + +To train a large model with conformer: + +```shell script +$ fairseq-hydra-train \ + task.data=/path/to/data \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_conformer_large_librivox + --attn-type espnet --pos-enc-type ${POS_ENC_TYPE} + +``` + +### Fine-tune a pre-trained model with CTC + +Fine-tuning a model requires parallel audio and labels file, as well as a vocabulary file in fairseq format. +A letter vocabulary can be downloaded [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt). +An example [script](libri_labels.py) that generates labels for the Librispeech dataset from the tsv file produced by wav2vec_manifest.py can be used as follows: + +```shell script +split=train +$ python libri_labels.py /path/to/tsv --output-dir /output/dir --output-name $split +``` + +Fine-tuning on 100h of Librispeech with letter targets: + +```shell script +$ fairseq-hydra-train \ + distributed_training.distributed_port=$PORT \ + task.data=/path/to/data \ + model.w2v_path=/path/to/model.pt \ + --config-dir /path/to/fairseq-py/examples/wav2vec/config/finetuning \ + --config-name base_100h +``` + +There are other config files in the config/finetuning directory that can be used to fine-tune on other splits. +You can specify the right config via the `--config-name` parameter. + +Note: you can simulate 24 GPUs by using k GPUs and adding command line parameters (before `--config-dir`) +`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 24/k + +Decoding with a language model during training requires flashlight [python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter). +If you want to use a language model, add `+criterion.wer_args='[/path/to/kenlm, /path/to/lexicon, 2, -1]'` to the command line. + +### Evaluating a CTC model + +Evaluating a CTC model with a language model requires [flashlight python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter) to be installed. + +Fairseq transformer language model used in the wav2vec 2.0 paper can be obtained from the [wav2letter model repository](https://github.com/facebookresearch/wav2letter/tree/master/recipes/sota/2019). +Be sure to upper-case the language model vocab after downloading it. + +Letter dictionary for pre-trained models can be found [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt). + +Next, run the evaluation command: + +```shell script +$subset=dev_other +python examples/speech_recognition/infer.py /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw --task audio_finetuning \ +--nbest 1 --path /path/to/model --gen-subset $subset --results-path /path/to/save/results/for/sclite --w2l-decoder kenlm \ +--lm-model /path/to/kenlm.bin --lm-weight 2 --word-score -1 --sil-weight 0 --criterion ctc --labels ltr --max-tokens 4000000 \ +--post-process letter +``` + +To get raw numbers, use --w2l-decoder viterbi and omit the lexicon. To use the transformer language model, use --w2l-decoder fairseqlm. + +## Use wav2vec 2.0 with 🤗Transformers + +Wav2Vec2 is also available in the [🤗Transformers library](https://github.com/huggingface/transformers) since version 4.4. + +Pretrained Models can be found on the [hub](https://huggingface.co/models?filter=wav2vec2) +and documentation can be found [here](https://huggingface.co/transformers/master/model_doc/wav2vec2.html). + +Usage example: + +```python +# !pip install transformers +# !pip install datasets +import soundfile as sf +import torch +from datasets import load_dataset +from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor + +# load pretrained model +processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") +model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") + + +librispeech_samples_ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation") + +# load audio +audio_input, sample_rate = sf.read(librispeech_samples_ds[0]["file"]) + +# pad input values and return pt tensor +input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values + +# INFERENCE + +# retrieve logits & take argmax +logits = model(input_values).logits +predicted_ids = torch.argmax(logits, dim=-1) + +# transcribe +transcription = processor.decode(predicted_ids[0]) + +# FINE-TUNE + +target_transcription = "A MAN SAID TO THE UNIVERSE I EXIST" + +# encode labels +with processor.as_target_processor(): + labels = processor(target_transcription, return_tensors="pt").input_ids + +# compute loss by passing labels +loss = model(input_values, labels=labels).loss +loss.backward() +``` + +# wav2vec + +Example to train a wav2vec model as described in [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](https://arxiv.org/abs/1904.05862). + +## Pre-trained models + +Description | Dataset | Model +---|---|--- +Wav2Vec large | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_large.pt) + +#### Example usage + +```python +import torch +import fairseq + +cp_path = '/path/to/wav2vec.pt' +model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([cp_path]) +model = model[0] +model.eval() + +wav_input_16khz = torch.randn(1,10000) +z = model.feature_extractor(wav_input_16khz) +c = model.feature_aggregator(z) +``` + +## Training a new model with the CLI tools + +Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate files 10 to 30 seconds in length) + +### Prepare training data manifest + +``` +python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext wav +``` + +### Train a wav2vec model + +``` +$ python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \ +--arch wav2vec --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \ +--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \ +--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \ +--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \ +--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test +``` + +### Run wav2vec2 pre-training on Google Cloud TPUs + +Wav2Vec2 is now supported on TPUs! It's currently pre-training only. + +#### Using hydra on a v3-8 + +``` +$ OMP_NUM_THREADS=1 fairseq-hydra-train \ + task.data=/manifest/path \ + --config-dir /PATH/TO/FAIRSEQ/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_large_librivox_tpu.yaml +``` + +#### Using command line arguments on a v3-8 + +Note: Commandline arguments way of execution has a [known-problem](https://github.com/pytorch/fairseq/issues/3741) currently. + +``` +$ OMP_NUM_THREADS=1 python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \ +--arch wav2vec2 --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \ +--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \ +--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \ +--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \ +--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test \ +--tpu --distributed-world-size 8 --num-batch-buckets 3 --enable-padding \ +--encoder-layerdrop 0 --mask-channel-prob 0.1 +``` + +#### Using hydra on a pod slice (v3-N with N > 8) + +``` +$ OMP_NUM_THREADS=1 fairseq-hydra-train \ + task.data=/manifest/path \ + --config-dir /PATH/TO/FAIRSEQ/examples/wav2vec/config/pretraining \ + --config-name wav2vec2_large_librivox_tpu-pod.yaml # edit distributed-world-size accordingly +``` + +#### Using command line arguments on a pod slice (v3-N with N > 8) + +Note: Commandline arguments way of execution has a [known-problem](https://github.com/pytorch/fairseq/issues/3741) currently. + +``` +$ python -m torch_xla.distributed.xla_dist \ + --tpu ${TPUNAME} --conda-env=torch-xla-${TORCH_XLA_VERSION} --env OMP_NUM_THREADS=1 \ + -- \ +python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \ +--arch wav2vec2 --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \ +--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \ +--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \ +--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \ +--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test \ +--tpu --distributed-world-size ${WORLD_SIZE} --num-batch-buckets 3 --enable-padding \ +--encoder-layerdrop 0 --mask-channel-prob 0.1 +``` + +### Extract embeddings from the downstream task data + +``` +$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/wav2vec_featurize.py --input /path/to/task/waves --output /path/to/output \ +--model /model/path/checkpoint_best.pt --split train valid test +``` + +# vq-wav2vec + +Example to train a vq-wav2vec model as described in [vq-wav2vec: Self-Supervised Learning of Discrete Speech Representations (Baevski et al., 2019)](https://arxiv.org/abs/1910.05453). + +These models are also used in [Effectiveness of self-supervised pre-training for speech recognition (Baevski et al., 2019)](https://arxiv.org/abs/1911.03912). + +## Pre-trained models + +Description | Dataset | Model +---|---|--- +vq-wav2vec Gumbel | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/vq-wav2vec.pt) +vq-wav2vec K-means | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/vq-wav2vec_kmeans.pt) +Roberta on K-means codes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/bert_kmeans.tar) + +#### Example usage + +```python +import torch +import fairseq + +cp = torch.load('/path/to/vq-wav2vec.pt') +model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([cp]) +model = model[0] +model.eval() + +wav_input_16khz = torch.randn(1,10000) +z = model.feature_extractor(wav_input_16khz) +_, idxs = model.vector_quantizer.forward_idx(z) +print(idxs.shape) # output: torch.Size([1, 60, 2]), 60 timesteps with 2 indexes corresponding to 2 groups in the model +``` + +## Training a new model with the CLI tools + +Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate file 10 to 30 seconds in length) + +### Prepare training data manifest + +``` +python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext wav +``` + +### Train a gumbel vq-wav2vec model + +``` +$ python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 \ +--save-interval 1 --no-epoch-checkpoints --arch wav2vec --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 \ +--optimizer adam --lr 1e-05 --lr-scheduler cosine \ +--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1), (512, 1, 1)] \ +--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \ +--activation gelu --offset auto --skip-connections-agg --residual-scale 0.5 \ +--log-keys ["prob_perplexity","code_perplexity","temp"] --vq-type gumbel --vq-groups 2 --vq-depth 2 \ +--combine-groups --vq-vars 320 --vq-temp (2,0.5,0.999995) --prediction-steps 12 --warmup-updates 1000 \ +--warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 --max-sample-size 150000 \ +--max-tokens 300000 --cross-sample-negatives 0 --update-freq 1 --seed 2 --skip-invalid-size-inputs-valid-test +``` + +for k-means training, set vq-type with "kmeans" and add --loss-weights [1] argument. Pre-trained models were trained on 16 GPUs. + +### Tokenize audio data (e.g. for BERT training) + +``` +$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/vq-wav2vec_featurize.py --data-dir /manifest/path --output-dir /path/to/output \ +--checkpoint /model/path/checkpoint_best.pt --split train valid test --extension tsv +``` diff --git a/fairseq/examples/wav2vec/__init__.py b/fairseq/examples/wav2vec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml new file mode 100644 index 0000000..153b5df --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml @@ -0,0 +1,58 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: false + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 2 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 80000 + lr: [0.00003] + sentence_avg: true + update_freq: [4] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 diff --git a/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml new file mode 100644 index 0000000..5044518 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 50 + save_interval_updates: 10000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: false + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 50 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 2 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 20000 + lr: [0.00005] + sentence_avg: true + update_freq: [4] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.05 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml b/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml new file mode 100644 index 0000000..14abc01 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 1000 + save_interval_updates: 50 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: false + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 1000 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 2 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 13000 + lr: [0.00005] + sentence_avg: true + update_freq: [4] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml new file mode 100644 index 0000000..a0af1cf --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 50 + save_interval_updates: 1000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: false + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 1000 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 2 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 13000 + lr: [0.00005] + sentence_avg: true + update_freq: [4] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml new file mode 100644 index 0000000..3eadc36 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml @@ -0,0 +1,57 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: false + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 320000 + lr: [0.0001] + sentence_avg: true + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.1 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1.yaml new file mode 100644 index 0000000..4a84843 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 \ No newline at end of file diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_16.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_16.yaml new file mode 100644 index 0000000..041843a --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_16.yaml @@ -0,0 +1,27 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 16 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 + exclude: learnfair1381,learnfair5192,learnfair2304 \ No newline at end of file diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_aws.yaml new file mode 100644 index 0000000..b9335df --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_old.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_old.yaml new file mode 100644 index 0000000..a8d2363 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_1_old.yaml @@ -0,0 +1,27 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 450 + nodes: 1 + name: ${env:PREFIX}_wav2vec3_small_librispeech + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 + exclude: learnfair1381 \ No newline at end of file diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2.yaml new file mode 100644 index 0000000..65ec489 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2.yaml @@ -0,0 +1,27 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 + exclude: learnfair7491,learnfair7477,learnfair7487 \ No newline at end of file diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2_aws.yaml new file mode 100644 index 0000000..e7590ef --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 8 + tasks_per_node: 1 + mem_gb: 0 + nodes: 2 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2g.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2g.yaml new file mode 100644 index 0000000..aaa20eb --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_2g.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 2 + tasks_per_node: 2 + mem_gb: 200 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_3.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_3.yaml new file mode 100644 index 0000000..9614ece --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_3.yaml @@ -0,0 +1,27 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 450 + nodes: 3 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 + exclude: learnfair7491,learnfair7477,learnfair7487 \ No newline at end of file diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g.yaml new file mode 100644 index 0000000..c0c9f60 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 200 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g_aws.yaml new file mode 100644 index 0000000..6bbbf3b --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_4g_aws.yaml @@ -0,0 +1,37 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '/' + exclude_keys: + - run_config + - distributed_training.distributed_port + - distributed_training.distributed_world_size + - model.pretrained_model_path + - model.target_network_path + - next_script + - task.cache_in_scratch + - task.local_cache_path + - task.data + - checkpoint.save_interval_updates + - checkpoint.keep_interval_updates + - checkpoint.save_on_overflow + - common.log_interval + - common.user_dir + sweep: + dir: /fsx-wav2vec/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: '' + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 80 + gpus_per_node: 4 + tasks_per_node: 1 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab,learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_8.yaml b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_8.yaml new file mode 100644 index 0000000..984f218 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/run_config/slurm_8.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 4320 + cpus_per_task: 10 + gpus_per_node: 8 + tasks_per_node: 8 + mem_gb: 400 + nodes: 8 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml new file mode 100644 index 0000000..b8f81e5 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml @@ -0,0 +1,58 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 80000 + lr: [0.00003] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_100h_2.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_100h_2.yaml new file mode 100644 index 0000000..9bf588f --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_100h_2.yaml @@ -0,0 +1,106 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: 0 + wer_sil_weight: -2 + +optimization: + max_update: 100000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_100h_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_100h_2_aws.yaml new file mode 100644 index 0000000..3a0d517 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_100h_2_aws.yaml @@ -0,0 +1,82 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/100h/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: 0 + wer_sil_weight: -2 + +optimization: + max_update: 100000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 82000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 7 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_100h_3.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_100h_3.yaml new file mode 100644 index 0000000..4677866 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_100h_3.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 100000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 8000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml new file mode 100644 index 0000000..8f1ca71 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 50 + save_interval_updates: 10000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 50 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 20000 + lr: [0.0001] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.75 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h_2.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h_2.yaml new file mode 100644 index 0000000..05ee76f --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h_2.yaml @@ -0,0 +1,102 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 10 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + keep_interval_updates: 1 + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 10 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 60000 + lr: [2e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 8000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h_2_aws.yaml new file mode 100644 index 0000000..a0afc9c --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h_2_aws.yaml @@ -0,0 +1,81 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 10 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 10 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: 4 + wer_sil_weight: -5 + +optimization: + max_update: 60000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.75 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws.yaml new file mode 100644 index 0000000..c754373 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws.yaml @@ -0,0 +1,104 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 10 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 10 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter +# wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin +# wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst +# wer_lm_weight: 2.0 +# wer_word_score: -1.0 + +optimization: + max_update: 60000 + lr: [2e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: wav2vec,learnlab + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws_v100.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws_v100.yaml new file mode 100644 index 0000000..58ad2ac --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h_aws_v100.yaml @@ -0,0 +1,102 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 +# tensorboard_logdir: tb + +checkpoint: + save_interval: 10 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx/abaevski/data/libri/10h/wav2vec/raw + labels: ltr + cache_in_scratch: true + + +dataset: + num_workers: 10 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 10 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_lexicon: /fsx/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 60000 + lr: [2e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 72000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.6 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /fsx/${env:USER}/w2v_ft/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 0 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: learnfair + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml new file mode 100644 index 0000000..07e327f --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 1000 + save_interval_updates: 50 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 1000 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 13000 + lr: [0.0001] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10m_2.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10m_2.yaml new file mode 100644 index 0000000..1ac7c12 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10m_2.yaml @@ -0,0 +1,114 @@ +# @package _group_ + +common: + fp16: true + fp16_no_flatten_grads: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 500 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/10m/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 500 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 5 + wer_word_score: 2 + wer_sil_weight: -2 + +optimization: + max_update: 10000 + lr: [2e-6] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [4] # base 10h we -> 2/4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 2e-6 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + lr_scheduler: + _name: cosine + warmup_updates: 1000 + +lr_scheduler: pass_through + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 3 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + freeze_finetune_updates: 100 + + zero_mask: true + feature_grad_mult: 0.0 + activation_dropout: 0.1 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + update_alibi: false + +#hydra: +# job: +# config: +# override_dirname: +# kv_sep: ':' +# item_sep: '__' +# exclude_keys: +# - run_config +# - distributed_training.distributed_port +# sweep: +# dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} +# subdir: ${hydra.job.num} +# launcher: +# submitit_folder: ${hydra.sweep.dir} +# timeout_min: 3000 +# cpus_per_task: 10 +# gpus_per_node: 4 +# tasks_per_node: 4 +# mem_gb: 250 +# nodes: 1 +# name: ${env:PREFIX}_${hydra.job.config_name} +# partition: devlab,learnlab,learnfair,scavenge +# constraint: volta32gb +# max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10m_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10m_2_aws.yaml new file mode 100644 index 0000000..a9c2708 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10m_2_aws.yaml @@ -0,0 +1,114 @@ +# @package _group_ + +common: + fp16: true + fp16_no_flatten_grads: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 500 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/10m/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 500 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 5 + wer_word_score: 2 + wer_sil_weight: -2 + +optimization: + max_update: 10000 + lr: [2e-6] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [4] # base 10h we -> 2/4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 2e-6 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + lr_scheduler: + _name: cosine + warmup_updates: 1000 + +lr_scheduler: pass_through + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 3 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + freeze_finetune_updates: 100 + + zero_mask: true + feature_grad_mult: 0.0 + activation_dropout: 0.1 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + update_alibi: false + +#hydra: +# job: +# config: +# override_dirname: +# kv_sep: ':' +# item_sep: '__' +# exclude_keys: +# - run_config +# - distributed_training.distributed_port +# sweep: +# dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} +# subdir: ${hydra.job.num} +# launcher: +# submitit_folder: ${hydra.sweep.dir} +# timeout_min: 3000 +# cpus_per_task: 10 +# gpus_per_node: 4 +# tasks_per_node: 4 +# mem_gb: 250 +# nodes: 1 +# name: ${env:PREFIX}_${hydra.job.config_name} +# partition: devlab,learnlab,learnfair,scavenge +# constraint: volta32gb +# max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10m_3.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10m_3.yaml new file mode 100644 index 0000000..b680412 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_10m_3.yaml @@ -0,0 +1,105 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1000 + save_interval_updates: 100 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/10m/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 500 + valid_subset: dev_other + required_batch_size_multiple: 8 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 8 + wer_word_score: 5.8 + wer_sil_weight: -8 + +optimization: + max_update: 13000 + lr: [2e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [5] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_length: 10 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml new file mode 100644 index 0000000..fac1bbb --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml @@ -0,0 +1,63 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval: 1000 + save_interval_updates: 50 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 1000 + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 13000 + lr: [0.0003] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.75 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h_2.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h_2.yaml new file mode 100644 index 0000000..75f4aaf --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h_2.yaml @@ -0,0 +1,104 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 100 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 6 + wer_word_score: -0.1 + wer_sil_weight: -4.7 + +optimization: + max_update: 60000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 4000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h_2_aws.yaml new file mode 100644 index 0000000..cc4d511 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h_2_aws.yaml @@ -0,0 +1,114 @@ +# @package _group_ + +common: + fp16: true + fp16_no_flatten_grads: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 100 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 500 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 5 + wer_word_score: 0 + wer_sil_weight: -4 + +optimization: + max_update: 10000 + lr: [2e-6] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [4] # base 10h we -> 2/4 + +optimizer: + _name: composite + dynamic_groups: true + groups: + default: + lr_float: 2e-6 + optimizer: + _name: adam + adam_betas: [0.9,0.95] + lr_scheduler: + _name: cosine + warmup_updates: 1000 + +lr_scheduler: pass_through + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 3 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + freeze_finetune_updates: 100 + + zero_mask: true + feature_grad_mult: 0.0 + activation_dropout: 0.1 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + update_alibi: false + +#hydra: +# job: +# config: +# override_dirname: +# kv_sep: ':' +# item_sep: '__' +# exclude_keys: +# - run_config +# - distributed_training.distributed_port +# sweep: +# dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} +# subdir: ${hydra.job.num} +# launcher: +# submitit_folder: ${hydra.sweep.dir} +# timeout_min: 3000 +# cpus_per_task: 10 +# gpus_per_node: 4 +# tasks_per_node: 4 +# mem_gb: 250 +# nodes: 1 +# name: ${env:PREFIX}_${hydra.job.config_name} +# partition: devlab,learnlab,learnfair,scavenge +# constraint: volta32gb +# max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h_3.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h_3.yaml new file mode 100644 index 0000000..842c897 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h_3.yaml @@ -0,0 +1,104 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 100 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 640000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 100 + valid_subset: dev_other + required_batch_size_multiple: 8 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 6 + wer_word_score: -0.1 + wer_sil_weight: -4.7 + +optimization: + max_update: 13000 + lr: [6e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [5] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 4000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.3 + mask_length: 3 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h_4.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h_4.yaml new file mode 100644 index 0000000..698ed8c --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h_4.yaml @@ -0,0 +1,104 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 100 + save_interval_updates: 1000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 640000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 100 + valid_subset: dev_other + required_batch_size_multiple: 8 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 13000 + lr: [6e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [5] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_length: 10 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h_aws.yaml new file mode 100644 index 0000000..aa67004 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h_aws.yaml @@ -0,0 +1,80 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 100 + save_interval_updates: 500 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/libri/10m/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval: 100 + valid_subset: dev_other + required_batch_size_multiple: 8 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 5 + wer_word_score: -0.1 + wer_sil_weight: -4.7 + +optimization: + max_update: 13000 + lr: [6e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [5] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 4000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.3 + mask_length: 3 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.25 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + update_alibi: false diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml new file mode 100644 index 0000000..9d72404 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml @@ -0,0 +1,57 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + valid_subset: dev_other + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 24 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 320000 + lr: [0.00003] + sentence_avg: true + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_960h_2.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_960h_2.yaml new file mode 100644 index 0000000..d96e232 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_960h_2.yaml @@ -0,0 +1,105 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/960h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 16 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 200000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 200000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_960h_2_aws.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_960h_2_aws.yaml new file mode 100644 index 0000000..41d2b38 --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_960h_2_aws.yaml @@ -0,0 +1,82 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /data/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /fsx-wav2vec/abaevski/data/librispeech + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 16 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /fsx-wav2vec/abaevski/data/libri/4-gram.bin + wer_lexicon: /fsx-wav2vec/abaevski/data/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 1.5 + wer_word_score: 0 + wer_sil_weight: -1 + +optimization: + max_update: 200000 + lr: [2e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: null + warmup_steps: 8000 + hold_steps: 0 + decay_steps: 192000 + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.3 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_960h_3.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_960h_3.yaml new file mode 100644 index 0000000..ef6597a --- /dev/null +++ b/fairseq/examples/wav2vec/config/finetuning/vox_960h_3.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + user_dir: /private/home/abaevski/fairseq-py/examples/data2vec +# tensorboard_logdir: tb + +checkpoint: + save_interval: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: /checkpoint/abaevski/data/speech/libri/1h/wav2vec/raw + labels: ltr + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1000000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 100 + validate_interval: 1 + valid_subset: dev_other + required_batch_size_multiple: 1 + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 16 + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + wer_kenlm_model: /checkpoint/abaevski/data/speech/libri/4-gram.bin + wer_lexicon: /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst + wer_lm_weight: 2.0 + wer_word_score: -1.0 + +optimization: + max_update: 200000 + lr: [1e-5] +# lr: [1e-5] # base 10h wer + sentence_avg: true + update_freq: [1] # base 10h we -> 2/4 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: cosine + warmup_updates: 8000 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.4 + mask_length: 5 +# mask_prob: 0.65 # base 10h wer + mask_channel_prob: 0.1 +# mask_channel_prob: 0.6 # base 10h wer + mask_channel_length: 64 + layerdrop: 0.1 +# layerdrop: 0.05 # base 10h wer + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 100 + dropout: 0 + final_dropout: 0 + attention_dropout: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 4 + tasks_per_node: 4 + mem_gb: 250 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml new file mode 100644 index 0000000..b686e21 --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml @@ -0,0 +1,57 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 250000 + min_sample_size: 32000 + normalize: false + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 64 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 10] + +optimization: + max_update: 400000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + feature_grad_mult: 0.1 + encoder_embed_dim: 768 diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_base_librispeech.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_base_librispeech.yaml new file mode 100644 index 0000000..912ac15 --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_base_librispeech.yaml @@ -0,0 +1,60 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 250000 + min_sample_size: 32000 + normalize: false + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 64 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 10] + +optimization: + max_update: 400000 + lr: [0.0005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + feature_grad_mult: 0.1 + encoder_embed_dim: 768 + layer_type: conformer + attn_type: espnet + pos_enc_type: rel_pos diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_large_librivox.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_large_librivox.yaml new file mode 100644 index 0000000..676166b --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_conformer_large_librivox.yaml @@ -0,0 +1,72 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + +dataset: + num_workers: 6 + max_tokens: 1200000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 128 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 0] + +optimization: + max_update: 1000000 + lr: [0.005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + extractor_mode: layer_norm + layer_norm_first: true + final_dim: 768 + latent_temp: [2.0,0.1,0.999995] + encoder_layerdrop: 0.00 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + conv_bias: true + + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + + feature_grad_mult: 1.0 + + layer_type: conformer + attn_type: espnet + pos_enc_type: rel_pos diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml new file mode 100644 index 0000000..3192ce4 --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml @@ -0,0 +1,70 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 320000 + min_sample_size: 32000 + normalize: true + +dataset: + batch_size: 4 + num_workers: 6 + max_tokens: 1200000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 128 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 0] + +optimization: + max_update: 1000000 + lr: [0.005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + extractor_mode: layer_norm + layer_norm_first: true + final_dim: 768 + latent_temp: [2.0,0.1,0.999995] + encoder_layerdrop: 0.00 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + conv_bias: true + + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + + feature_grad_mult: 1.0 + diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml new file mode 100644 index 0000000..ff35a95 --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml @@ -0,0 +1,72 @@ +# @package _group_ + +common: + tpu: true + fp16: false + log_format: json + log_interval: 10 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 250000 + min_sample_size: 32000 + normalize: true + num_batch_buckets: 3 + precompute_mask_indices: true + enable_padding: true + +dataset: + num_workers: 6 + max_tokens: 1200000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 128 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 0] + +optimization: + max_update: 1000000 + lr: [0.005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + extractor_mode: layer_norm + layer_norm_first: true + final_dim: 768 + latent_temp: [2.0,0.1,0.999995] + encoder_layerdrop: 0.00 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + conv_bias: true + + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + + feature_grad_mult: 1.0 diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml new file mode 100644 index 0000000..ee55bda --- /dev/null +++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml @@ -0,0 +1,77 @@ +# @package _group_ + +common: + tpu: true + fp16: false + log_format: json + log_interval: 10 + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + +task: + _name: audio_pretraining + data: ??? + max_sample_size: 250000 + min_sample_size: 32000 + normalize: true + num_batch_buckets: 3 + precompute_mask_indices: true + enable_padding: true + inferred_w2v_config: + mask_prob: 0.65 + mask_selection: 'static' + mask_other: 0 + mask_channel_prob: 0.1 + +dataset: + num_workers: 6 + max_tokens: 1200000 + skip_invalid_size_inputs_valid_test: true + +distributed_training: + distributed_world_size: 8 + ddp_backend: legacy_ddp + +criterion: + _name: wav2vec + infonce: true + log_keys: ["prob_perplexity","code_perplexity","temp"] + loss_weights: [0.1, 0] + +optimization: + max_update: 1000000 + lr: [0.005] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: wav2vec2 + quantize_targets: true + extractor_mode: layer_norm + layer_norm_first: true + final_dim: 768 + latent_temp: [2.0,0.1,0.999995] + encoder_layerdrop: 0.00 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + conv_bias: true + + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + + feature_grad_mult: 1.0 diff --git a/fairseq/examples/wav2vec/libri_labels.py b/fairseq/examples/wav2vec/libri_labels.py new file mode 100644 index 0000000..694a202 --- /dev/null +++ b/fairseq/examples/wav2vec/libri_labels.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset +""" + +import argparse +import os + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("tsv") + parser.add_argument("--output-dir", required=True) + parser.add_argument("--output-name", required=True) + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + transcriptions = {} + + with open(args.tsv, "r") as tsv, open( + os.path.join(args.output_dir, args.output_name + ".ltr"), "w" + ) as ltr_out, open( + os.path.join(args.output_dir, args.output_name + ".wrd"), "w" + ) as wrd_out: + root = next(tsv).strip() + for line in tsv: + line = line.strip() + dir = os.path.dirname(line) + if dir not in transcriptions: + parts = dir.split(os.path.sep) + trans_path = f"{parts[-2]}-{parts[-1]}.trans.txt" + path = os.path.join(root, dir, trans_path) + assert os.path.exists(path) + texts = {} + with open(path, "r") as trans_f: + for tline in trans_f: + items = tline.strip().split() + texts[items[0]] = " ".join(items[1:]) + transcriptions[dir] = texts + part = os.path.basename(line).split(".")[0] + assert part in transcriptions[dir] + print(transcriptions[dir][part], file=wrd_out) + print( + " ".join(list(transcriptions[dir][part].replace(" ", "|"))) + " |", + file=ltr_out, + ) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/scripts/binarize_manifest.sh b/fairseq/examples/wav2vec/scripts/binarize_manifest.sh new file mode 100644 index 0000000..6f201bd --- /dev/null +++ b/fairseq/examples/wav2vec/scripts/binarize_manifest.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# usage: bash binarize_manifest <dest_dir> <train_split> <valid_split> + +DEST_DIR=$1 +TRAIN_SPLIT=$2 +VALID_SPLIT=$3 +FAIRSEQ_ROOT=$4 + +mkdir -p $DEST_DIR + +# split file path and lengths into separate files +cut -f1 $TRAIN_SPLIT.tsv > $DEST_DIR/train_fnames.txt +cut -f1 $VALID_SPLIT.tsv > $DEST_DIR/valid_fnames.txt +cut -f2 $TRAIN_SPLIT.tsv > $DEST_DIR/train.lengths +cut -f2 $VALID_SPLIT.tsv > $DEST_DIR/valid.lengths + +# copy root directory +head -1 $TRAIN_SPLIT.tsv > $DEST_DIR/train.root +head -1 $VALID_SPLIT.tsv > $DEST_DIR/valid.root + +# remove root directory +sed -i '1d' $DEST_DIR/train_fnames.txt +sed -i '1d' $DEST_DIR/valid_fnames.txt +sed -i '1d' $DEST_DIR/train.lengths +sed -i '1d' $DEST_DIR/valid.lengths + +# insert spaces between characters +sed -i -e 's/\(.\)/\1 /g' $DEST_DIR/train_fnames.txt +sed -i -e 's/\(.\)/\1 /g' $DEST_DIR/valid_fnames.txt + +# run preprocessor +PYTHONPATH=$FAIRSEQ_ROOT python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $DEST_DIR/train_fnames.txt --validpref $DEST_DIR/valid_fnames.txt --workers 60 --only-source --destdir $DEST_DIR diff --git a/fairseq/examples/wav2vec/unsupervised/README.md b/fairseq/examples/wav2vec/unsupervised/README.md new file mode 100644 index 0000000..b9d6f67 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/README.md @@ -0,0 +1,119 @@ +# wav2vec Unsupervised (wav2vec-U) + +Wav2vec Unsupervised (wav2vec-U) and the 2.0 version are frameworks for building speech recognition systems without any labeled training data as described in [Unsupervised Speech Recognition (Baevski et al., 2021)](https://ai.facebook.com/research/publications/unsupervised-speech-recognition) and [Towards End-to-end Unsupervised Speech Recognition (Liu, et al., 2022)](https://arxiv.org/abs/2204.02492). The model takes as input wav2vec 2.0 or XLSR representations (see [pretrained models](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec)) as well as unlabeled speech and text data. + + The training procedure consists of three consecutive main steps: +* Preparation of speech representations and text data +* Generative adversarial training (GAN) +* Iterative self-training + Kaldi LM-decoding + +## Preparation of speech and text data +Similar to [wav2vec 2.0](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec/README.md), data folders contain {train,valid,test}.{tsv,wrd,phn} files, where audio paths are stored in tsv files, and word, letter or phoneme transcriptions are stored in .{wrd,ltr,phn}. + +In **/path/to/data/with_silence** you need a *train.tsv* file as well as (optionally) *{valid,test}.{tsv,wrd,phn}*. It is nice to have *10h.{tsv,phn}* files there too for reproducing the ablation study on layer selection. In **/path/to/data/without_silence** you have the same files, except *.tsv* files contain audios with silences removed using rVAD. + +Pre-requisites: +* set FAIRSEQ_ROOT environmental variable to your fairseq installation +* set RVAD_ROOT environmental variable to a checkout of [rVADfast](https://github.com/zhenghuatan/rVADfast) +* set KENLM_ROOT environmental variable to the location of [KenLM](https://github.com/kpu/kenlm) binaries +* install [PyKaldi](https://github.com/pykaldi/pykaldi) and set KALDI_ROOT environmental variable to the location of your kaldi installation. To use the version bundled with PyKaldi, you can use /path/to/pykaldi/tools/kaldi + +Create new audio files without silences: +```shell +# create a manifest file for the set original of audio files +python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py /dir/to/save/audio/files --ext wav --dest /path/to/new/train.tsv --valid-percent 0 + +python scripts/vads.py -r $RVAD_ROOT < /path/to/train.tsv > train.vads + +python scripts/remove_silence.py --tsv /path/to/train.tsv --vads train.vads --out /dir/to/save/audio/files + +python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py /dir/to/save/audio/files --ext wav --dest /path/to/new/train.tsv --valid-percent 0.01 +``` + +Next, we need to preprocess the audio data to better match phonemized text data: + +```shell +# wav2vec-U +zsh scripts/prepare_audio.sh /dir/with/{train,test,valid}.tsv /output/dir /path/to/wav2vec2/model.pt 512 14 +# wav2vec-U 2.0 +zsh scripts/prepare_audio_v2.sh /dir/with/{train,test,valid}.tsv /output/dir /path/to/wav2vec2/model.pt 64 14 +``` +Note that if you have splits different than train/valid/test, you will need to modify this script. The thrid argument is the PCA dimensionality for wav2vec-U and the number of MFCC clusters for wav2vec-U 2.0. The last argument is the 0-based index of the layer from which to extract representations. + +Now we need to prepare text data: +```shell +zsh scripts/prepare_text.sh language /path/to/text/file /output/dir 1000 espeak /path/to/fasttext/lid/model sil_prob +``` + +The fourth argument is minimum number observations of phones to keep. If your text corpus is small, you might want to reduce this number. + +The fifth argument is which phonemizer to use. Supported values are [espeak](http://espeak.sourceforge.net/), [espeak-ng](https://github.com/espeak-ng/espeak-ng), and [G2P](https://github.com/Kyubyong/g2p) (english only). + +Pre-trained fasttext LID models can be downloaded [here](https://fasttext.cc/docs/en/language-identification.html). + +The last argument is the probability to introduce silence (`<SIL>`) between the word boundaries. We found the value `0.25`/`0.5` works in general for wav2vec-U and the 2.0 version respectively, but you might want to vary for languages that are never tested. + +### Prepare TIMIT data +TIMIT transcripts include silence. Therefore VAD is not used for audio preprocessing, and we do not wrap transcripts with silences or insert random silence in between words. + +To prepare TIMIT data for both the matched an unmatched setup: +```shell +bash scripts/prepare_timit.sh /dir/to/timit/raw/data /output/dir /path/to/wav2vec2/model.pt +``` + +Note that we assume the TIMIT distribution with capitalized directories and filenames are used (e.g., `TRAIN/DR1/FCJF0/SA1.PHN`). + +## Generative adversarial training (GAN) + +We then use a GAN model to build a first unsupervised ASR model. The data preparation above of both speech features and text data is a necessary procedure that enables the generator to match speech to text in an unsupervised way. + +Launching GAN training on top of preprocessed features, with default hyperparameters can be done with: + +``` +PREFIX=w2v_unsup_gan_xp + +# For wav2vec-U, audio features are pre-segmented +CONFIG_NAME=w2vu +TASK_DATA=/path/to/features/precompute_unfiltered_pca512_cls128_mean_pooled + +# For wav2vec-U 2.0, use raw audio features +CONFIG_NAME=w2vu2 +TASK_DATA=/path/to/features/ + +# Unpaired text input +TEXT_DATA=/path/to/data/phones # path to fairseq-preprocessed GAN data (phones dir) +KENLM_PATH=/path/to/data/phones/kenlm.phn.o4.bin # KenLM 4-gram phoneme language model (LM data = GAN data here) + +PYTHONPATH=$FAIRSEQ_ROOT PREFIX=$PREFIX fairseq-hydra-train \ + -m --config-dir config/gan \ + --config-name $CONFIG_NAME \ + task.data=${TASK_DATA} \ + task.text_data=${TEXT_DATA} \ + task.kenlm_path=${KENLM_PATH} \ + common.user_dir=${FAIRSEQ_ROOT}/examples/wav2vec/unsupervised \ + model.code_penalty=2,4 model.gradient_penalty=1.5,2.0 \ + model.smoothness_weight=0.5,0.75,1.0 'common.seed=range(0,5)' +``` + + +Once we find the best checkpoint (chosen using unsupervised metric that combined language model perplexity and vocabulary usage), we can use it to generate phone labels (or word labels with an appropriate kaldi WFST): + +```shell +python w2vu_generate.py --config-dir config/generate --config-name viterbi \ +fairseq.common.user_dir=${FAIRSEQ_ROOT}/examples/wav2vec/unsupervised \ +fairseq.task.data=/path/to/dir/with/features \ +fairseq.common_eval.path=/path/to/gan/checkpoint \ +fairseq.dataset.gen_subset=valid results_path=/where/to/save/transcriptions +``` + +The decoding without LM works best on the same adjacent-mean-pooled features that the gan was trained on, while decoding with LM works better on features before the adjacent timestep mean-pooling step (without the "_pooled" suffix). + +While the generator of wav2vec-U 2.0 is trained with an output frequency of 16hz, we found decoding at a higher frequency produces better results. This can be done by adding `decode_stride=1` or `2` to the argument. + +## Iterative self-training + Kaldi LM-decoding +After the GAN training provides a first unsupervised model, we can then progressively refine the quality of transcriptions using several iterations of semi-supervised learning. We perform two iterations: first, pseudo-label the training data with the unsupervised GAN model and train an HMM on the pseudo-labels. Second, we relabel the training data with the HMM and then fine-tune the original wav2vec 2.0 model using the HMM pseudo-labels with a CTC loss. Note that HMM models use phonemes as output, while wav2vec 2.0 use letter. Both are decoded using WFST decoders into words. + + +Please see [this README](kaldi_self_train/README.md) for more instructions on how to do iterative self-training + Kaldi LM-decoding. + +*** Note: these instructions are a work in progress and will be updated over the next few days diff --git a/fairseq/examples/wav2vec/unsupervised/__init__.py b/fairseq/examples/wav2vec/unsupervised/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml b/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml new file mode 100644 index 0000000..19a3ef3 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml @@ -0,0 +1,62 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + no_epoch_checkpoints: true + save_interval_updates: 20000 + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 800000 + skip_invalid_size_inputs_valid_test: true + train_subset: train + valid_subset: valid + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 8 + find_unused_parameters: True + +criterion: + _name: ctc + zero_infinity: true + post_process: letter + +optimization: + max_update: 80000 + lr: [0.00003] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.25 + mask_channel_prob: 0.1 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 diff --git a/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml new file mode 100644 index 0000000..74f1829 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml @@ -0,0 +1,115 @@ +# @package _group_ + +common: + fp16: false + fp16_no_flatten_grads: true + log_format: json + log_interval: 100 + tensorboard_logdir: tb + reset_logging: false + suppress_crashes: false + +checkpoint: + save_interval: 1000 + save_interval_updates: 1000 + no_epoch_checkpoints: true + best_checkpoint_metric: weighted_lm_ppl + save_dir: . + +distributed_training: + distributed_world_size: 1 + +task: + _name: unpaired_audio_text + data: ??? + text_data: ??? + labels: phn + sort_by_length: false + unfiltered: false + max_length: null + append_eos: false + kenlm_path: ??? + +dataset: + num_workers: 6 + batch_size: 160 + skip_invalid_size_inputs_valid_test: true + valid_subset: valid + validate_interval: 1000 + validate_interval_updates: 1000 + +criterion: + _name: model + log_keys: + - accuracy_dense + - accuracy_token + - temp + - code_ppl + +optimization: + max_update: 150000 + clip_norm: 5.0 + lr: [0] + +optimizer: + _name: composite + groups: + generator: + lr: [0.0004] + lr_float: null + optimizer: + _name: adam + adam_betas: [0.5,0.98] + adam_eps: 1e-06 + weight_decay: 0 + amsgrad: false + lr_scheduler: + _name: fixed + warmup_updates: 0 + discriminator: + lr: [ 0.0005 ] + lr_float: null + optimizer: + _name: adam + adam_betas: [0.5,0.98] + adam_eps: 1e-06 + weight_decay: 0.0001 + amsgrad: false + lr_scheduler: + _name: fixed + warmup_updates: 0 + +lr_scheduler: pass_through + +model: + _name: wav2vec_u + + discriminator_dim: 384 + discriminator_depth: 2 + discriminator_kernel: 6 + discriminator_linear_emb: false + discriminator_causal: true + discriminator_max_pool: false + discriminator_act_after_linear: false + discriminator_dropout: 0.0 + discriminator_weight_norm: false + + generator_stride: 1 + generator_kernel: 4 + generator_bias: false + generator_dropout: 0.1 + + smoothness_weight: 0.5 + smoothing: 0 + smoothing_one_sided: false + gumbel: false + hard_gumbel: false + gradient_penalty: 1.5 + code_penalty: 4.0 + temp: [ 2,0.1,0.99995 ] + input_dim: 512 + + segmentation: + type: JOIN + mean_pool_join: false + remove_zeros: false diff --git a/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu2.yaml b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu2.yaml new file mode 100644 index 0000000..5201422 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu2.yaml @@ -0,0 +1,154 @@ +# @package _group_ + +common: + fp16: false + fp16_no_flatten_grads: true + log_format: json + log_interval: 100 + tensorboard_logdir: tb + reset_logging: false + suppress_crashes: false + +checkpoint: + save_interval: 1000 + save_interval_updates: 1000 + no_epoch_checkpoints: true + best_checkpoint_metric: weighted_lm_ppl + save_dir: . + +distributed_training: + distributed_world_size: 1 + +task: + _name: unpaired_audio_text + data: ??? + text_data: ??? + labels: phn + sort_by_length: false + unfiltered: false + max_length: null + append_eos: false + kenlm_path: ??? + aux_target_postfix: km + +dataset: + num_workers: 6 + batch_size: 160 + skip_invalid_size_inputs_valid_test: true + valid_subset: valid + validate_interval: 1000 + validate_interval_updates: 1000 + +criterion: + _name: model + log_keys: + - accuracy_dense + - accuracy_token + - temp + - code_ppl + +optimization: + max_update: 150000 + clip_norm: 5.0 + lr: [0] + +optimizer: + _name: composite + groups: + generator: + lr: [0.00005] + lr_float: null + optimizer: + _name: adam + adam_betas: [0.5,0.98] + adam_eps: 1e-06 + weight_decay: 0 + amsgrad: false + lr_scheduler: + _name: fixed + warmup_updates: 0 + discriminator: + lr: [ 0.0003 ] + lr_float: null + optimizer: + _name: adam + adam_betas: [0.5,0.98] + adam_eps: 1e-06 + weight_decay: 0.0001 + amsgrad: false + lr_scheduler: + _name: fixed + warmup_updates: 0 + +lr_scheduler: pass_through + +model: + _name: wav2vec_u + + discriminator_dim: 384 + discriminator_depth: 2 + discriminator_kernel: 8 + discriminator_linear_emb: false + discriminator_causal: true + discriminator_max_pool: false + discriminator_act_after_linear: false + discriminator_dropout: 0.0 + discriminator_weight_norm: false + + generator_stride: 3 + generator_kernel: 9 + generator_bias: false + generator_dropout: 0.1 + generator_batch_norm: 30 + generator_residual: true + + smoothness_weight: 1.5 + smoothing: 0 + smoothing_one_sided: false + gumbel: false + hard_gumbel: false + gradient_penalty: 1.0 + code_penalty: 3.0 + temp: [ 2,0.1,0.99995 ] + input_dim: 1024 + mmi_weight: 0.5 + target_dim: 64 + + segmentation: + type: JOIN + mean_pool_join: false + remove_zeros: false + + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - run_config + - distributed_training.distributed_port + - common.user_dir + - task.data + - task.kenlm_path + - task.text_data + - model.generator_layers + - task.labels + - task.force_model_seed + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}/${hydra.job.override_dirname} + subdir: ${hydra.job.num} + launcher: + submitit_folder: ${hydra.sweep.dir} + timeout_min: 3000 + cpus_per_task: 10 + gpus_per_node: 1 + tasks_per_node: 1 + mem_gb: 120 + nodes: 1 + name: ${env:PREFIX}_${hydra.job.config_name} + partition: devlab,learnlab,learnfair,scavenge + comment: intern_endding_soon + constraint: volta32gb + max_num_timeout: 30 diff --git a/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml b/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml new file mode 100644 index 0000000..9c88bee --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml @@ -0,0 +1,21 @@ +# @package _group_ + +fairseq: + task: + _name: unpaired_audio_text + labels: phn + data: ??? + sort_by_length: false + shuffle: false + text_data: '' + + common_eval: + path: ??? + quiet: true + + dataset: + gen_subset: valid + batch_size: 1 + +w2l_decoder: VITERBI +post_process: silence diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid new file mode 100644 index 0000000..4010082 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid @@ -0,0 +1,192 @@ +FDHC0_SI1559 +FDHC0_SI2189 +FDHC0_SI929 +FDHC0_SX119 +FDHC0_SX209 +FDHC0_SX29 +FDHC0_SX299 +FDHC0_SX389 +FELC0_SI1386 +FELC0_SI2016 +FELC0_SI756 +FELC0_SX126 +FELC0_SX216 +FELC0_SX306 +FELC0_SX36 +FELC0_SX396 +FJLM0_SI1043 +FJLM0_SI1673 +FJLM0_SI2303 +FJLM0_SX143 +FJLM0_SX233 +FJLM0_SX323 +FJLM0_SX413 +FJLM0_SX53 +FMGD0_SI1564 +FMGD0_SI2194 +FMGD0_SI934 +FMGD0_SX124 +FMGD0_SX214 +FMGD0_SX304 +FMGD0_SX34 +FMGD0_SX394 +FMLD0_SI2185 +FMLD0_SI822 +FMLD0_SI925 +FMLD0_SX115 +FMLD0_SX205 +FMLD0_SX25 +FMLD0_SX295 +FMLD0_SX385 +FNLP0_SI1308 +FNLP0_SI1938 +FNLP0_SI678 +FNLP0_SX138 +FNLP0_SX228 +FNLP0_SX318 +FNLP0_SX408 +FNLP0_SX48 +FPAS0_SI1272 +FPAS0_SI2204 +FPAS0_SI944 +FPAS0_SX134 +FPAS0_SX224 +FPAS0_SX314 +FPAS0_SX404 +FPAS0_SX44 +FPKT0_SI1538 +FPKT0_SI2168 +FPKT0_SI908 +FPKT0_SX188 +FPKT0_SX278 +FPKT0_SX368 +FPKT0_SX8 +FPKT0_SX98 +MBPM0_SI1577 +MBPM0_SI1584 +MBPM0_SI947 +MBPM0_SX137 +MBPM0_SX227 +MBPM0_SX317 +MBPM0_SX407 +MBPM0_SX47 +MCMJ0_SI1094 +MCMJ0_SI464 +MCMJ0_SI602 +MCMJ0_SX104 +MCMJ0_SX14 +MCMJ0_SX194 +MCMJ0_SX284 +MCMJ0_SX374 +MDAB0_SI1039 +MDAB0_SI1669 +MDAB0_SI2299 +MDAB0_SX139 +MDAB0_SX229 +MDAB0_SX319 +MDAB0_SX409 +MDAB0_SX49 +MGRT0_SI1450 +MGRT0_SI2080 +MGRT0_SI820 +MGRT0_SX10 +MGRT0_SX100 +MGRT0_SX190 +MGRT0_SX280 +MGRT0_SX370 +MJDH0_SI1354 +MJDH0_SI1984 +MJDH0_SI724 +MJDH0_SX184 +MJDH0_SX274 +MJDH0_SX364 +MJDH0_SX4 +MJDH0_SX94 +MJLN0_SI1449 +MJLN0_SI2079 +MJLN0_SI819 +MJLN0_SX189 +MJLN0_SX279 +MJLN0_SX369 +MJLN0_SX9 +MJLN0_SX99 +MJMP0_SI1535 +MJMP0_SI1791 +MJMP0_SI905 +MJMP0_SX185 +MJMP0_SX275 +MJMP0_SX365 +MJMP0_SX5 +MJMP0_SX95 +MKLT0_SI1213 +MKLT0_SI1843 +MKLT0_SI583 +MKLT0_SX133 +MKLT0_SX223 +MKLT0_SX313 +MKLT0_SX403 +MKLT0_SX43 +MLLL0_SI1363 +MLLL0_SI1993 +MLLL0_SI733 +MLLL0_SX103 +MLLL0_SX13 +MLLL0_SX193 +MLLL0_SX283 +MLLL0_SX373 +MLNT0_SI1574 +MLNT0_SI1902 +MLNT0_SI642 +MLNT0_SX102 +MLNT0_SX12 +MLNT0_SX192 +MLNT0_SX282 +MLNT0_SX372 +MNJM0_SI1580 +MNJM0_SI2210 +MNJM0_SI950 +MNJM0_SX140 +MNJM0_SX230 +MNJM0_SX320 +MNJM0_SX410 +MNJM0_SX50 +MPAM0_SI1189 +MPAM0_SI1819 +MPAM0_SI1961 +MPAM0_SX109 +MPAM0_SX19 +MPAM0_SX199 +MPAM0_SX289 +MPAM0_SX379 +MTAS1_SI1473 +MTAS1_SI2098 +MTAS1_SI838 +MTAS1_SX118 +MTAS1_SX208 +MTAS1_SX28 +MTAS1_SX298 +MTAS1_SX388 +MTLS0_SI1370 +MTLS0_SI2000 +MTLS0_SI740 +MTLS0_SX110 +MTLS0_SX20 +MTLS0_SX200 +MTLS0_SX290 +MTLS0_SX380 +MWBT0_SI1553 +MWBT0_SI2183 +MWBT0_SI923 +MWBT0_SX113 +MWBT0_SX203 +MWBT0_SX23 +MWBT0_SX293 +MWBT0_SX383 +MWEW0_SI1361 +MWEW0_SI1991 +MWEW0_SI731 +MWEW0_SX101 +MWEW0_SX11 +MWEW0_SX191 +MWEW0_SX281 +MWEW0_SX371 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid new file mode 100644 index 0000000..c39fd0b --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid @@ -0,0 +1,3696 @@ +FAEM0_SI1392 +FAEM0_SI2022 +FAEM0_SI762 +FAEM0_SX132 +FAEM0_SX222 +FAEM0_SX312 +FAEM0_SX402 +FAEM0_SX42 +FAJW0_SI1263 +FAJW0_SI1893 +FAJW0_SI633 +FAJW0_SX183 +FAJW0_SX273 +FAJW0_SX3 +FAJW0_SX363 +FAJW0_SX93 +FALK0_SI1086 +FALK0_SI456 +FALK0_SI658 +FALK0_SX186 +FALK0_SX276 +FALK0_SX366 +FALK0_SX6 +FALK0_SX96 +FALR0_SI1325 +FALR0_SI1955 +FALR0_SI695 +FALR0_SX155 +FALR0_SX245 +FALR0_SX335 +FALR0_SX425 +FALR0_SX65 +FAPB0_SI1063 +FAPB0_SI1693 +FAPB0_SI2323 +FAPB0_SX163 +FAPB0_SX253 +FAPB0_SX343 +FAPB0_SX433 +FAPB0_SX73 +FBAS0_SI1387 +FBAS0_SI1472 +FBAS0_SI2066 +FBAS0_SX127 +FBAS0_SX217 +FBAS0_SX307 +FBAS0_SX37 +FBAS0_SX397 +FBCG1_SI1612 +FBCG1_SI2242 +FBCG1_SI982 +FBCG1_SX172 +FBCG1_SX262 +FBCG1_SX352 +FBCG1_SX442 +FBCG1_SX82 +FBCH0_SI1586 +FBCH0_SI956 +FBCH0_SI959 +FBCH0_SX146 +FBCH0_SX236 +FBCH0_SX326 +FBCH0_SX416 +FBCH0_SX56 +FBJL0_SI1552 +FBJL0_SI2182 +FBJL0_SI922 +FBJL0_SX112 +FBJL0_SX202 +FBJL0_SX22 +FBJL0_SX292 +FBJL0_SX382 +FBLV0_SI1058 +FBLV0_SI1688 +FBLV0_SI2318 +FBLV0_SX158 +FBLV0_SX248 +FBLV0_SX338 +FBLV0_SX428 +FBLV0_SX68 +FBMH0_SI1136 +FBMH0_SI1766 +FBMH0_SI970 +FBMH0_SX146 +FBMH0_SX236 +FBMH0_SX326 +FBMH0_SX416 +FBMH0_SX56 +FBMJ0_SI1776 +FBMJ0_SI516 +FBMJ0_SI815 +FBMJ0_SX156 +FBMJ0_SX246 +FBMJ0_SX336 +FBMJ0_SX426 +FBMJ0_SX66 +FCAG0_SI1503 +FCAG0_SI1641 +FCAG0_SI2133 +FCAG0_SX153 +FCAG0_SX243 +FCAG0_SX333 +FCAG0_SX423 +FCAG0_SX63 +FCAJ0_SI1479 +FCAJ0_SI1804 +FCAJ0_SI849 +FCAJ0_SX129 +FCAJ0_SX219 +FCAJ0_SX309 +FCAJ0_SX39 +FCAJ0_SX399 +FCDR1_SI1186 +FCDR1_SI1816 +FCDR1_SI556 +FCDR1_SX106 +FCDR1_SX16 +FCDR1_SX196 +FCDR1_SX286 +FCDR1_SX376 +FCEG0_SI1248 +FCEG0_SI1878 +FCEG0_SI618 +FCEG0_SX168 +FCEG0_SX258 +FCEG0_SX348 +FCEG0_SX438 +FCEG0_SX78 +FCJF0_SI1027 +FCJF0_SI1657 +FCJF0_SI648 +FCJF0_SX127 +FCJF0_SX217 +FCJF0_SX307 +FCJF0_SX37 +FCJF0_SX397 +FCJS0_SI1607 +FCJS0_SI2237 +FCJS0_SI977 +FCJS0_SX167 +FCJS0_SX257 +FCJS0_SX347 +FCJS0_SX437 +FCJS0_SX77 +FCKE0_SI1111 +FCKE0_SI1741 +FCKE0_SI481 +FCKE0_SX121 +FCKE0_SX211 +FCKE0_SX301 +FCKE0_SX31 +FCKE0_SX391 +FCLT0_SI1438 +FCLT0_SI2068 +FCLT0_SI808 +FCLT0_SX178 +FCLT0_SX268 +FCLT0_SX358 +FCLT0_SX448 +FCLT0_SX88 +FCMG0_SI1142 +FCMG0_SI1242 +FCMG0_SI1872 +FCMG0_SX162 +FCMG0_SX252 +FCMG0_SX342 +FCMG0_SX432 +FCMG0_SX72 +FCMM0_SI1083 +FCMM0_SI1957 +FCMM0_SI453 +FCMM0_SX183 +FCMM0_SX273 +FCMM0_SX363 +FCMM0_SX420 +FCMM0_SX93 +FCRZ0_SI1913 +FCRZ0_SI2053 +FCRZ0_SI793 +FCRZ0_SX163 +FCRZ0_SX253 +FCRZ0_SX343 +FCRZ0_SX433 +FCRZ0_SX73 +FCYL0_SI1297 +FCYL0_SI1927 +FCYL0_SI667 +FCYL0_SX127 +FCYL0_SX217 +FCYL0_SX349 +FCYL0_SX37 +FCYL0_SX397 +FDAS1_SI1461 +FDAS1_SI2091 +FDAS1_SI831 +FDAS1_SX111 +FDAS1_SX201 +FDAS1_SX21 +FDAS1_SX291 +FDAS1_SX381 +FDAW0_SI1271 +FDAW0_SI1406 +FDAW0_SI2036 +FDAW0_SX146 +FDAW0_SX236 +FDAW0_SX326 +FDAW0_SX416 +FDAW0_SX56 +FDFB0_SI1318 +FDFB0_SI1948 +FDFB0_SI2010 +FDFB0_SX148 +FDFB0_SX238 +FDFB0_SX328 +FDFB0_SX418 +FDFB0_SX58 +FDJH0_SI1565 +FDJH0_SI2195 +FDJH0_SI935 +FDJH0_SX125 +FDJH0_SX215 +FDJH0_SX305 +FDJH0_SX35 +FDJH0_SX395 +FDKN0_SI1081 +FDKN0_SI1202 +FDKN0_SI1711 +FDKN0_SX181 +FDKN0_SX271 +FDKN0_SX361 +FDKN0_SX451 +FDKN0_SX91 +FDML0_SI1149 +FDML0_SI1779 +FDML0_SI2075 +FDML0_SX159 +FDML0_SX249 +FDML0_SX339 +FDML0_SX429 +FDML0_SX69 +FDMY0_SI1197 +FDMY0_SI567 +FDMY0_SI714 +FDMY0_SX117 +FDMY0_SX207 +FDMY0_SX27 +FDMY0_SX297 +FDMY0_SX387 +FDNC0_SI1278 +FDNC0_SI1908 +FDNC0_SI2287 +FDNC0_SX108 +FDNC0_SX18 +FDNC0_SX198 +FDNC0_SX288 +FDNC0_SX378 +FDTD0_SI1561 +FDTD0_SI2191 +FDTD0_SI931 +FDTD0_SX121 +FDTD0_SX211 +FDTD0_SX301 +FDTD0_SX321 +FDTD0_SX391 +FDXW0_SI1511 +FDXW0_SI2141 +FDXW0_SI881 +FDXW0_SX161 +FDXW0_SX251 +FDXW0_SX341 +FDXW0_SX431 +FDXW0_SX71 +FEAC0_SI1245 +FEAC0_SI1875 +FEAC0_SI615 +FEAC0_SX165 +FEAC0_SX255 +FEAC0_SX345 +FEAC0_SX435 +FEAC0_SX75 +FEAR0_SI1252 +FEAR0_SI1882 +FEAR0_SI622 +FEAR0_SX172 +FEAR0_SX262 +FEAR0_SX352 +FEAR0_SX442 +FEAR0_SX82 +FECD0_SI1418 +FECD0_SI2048 +FECD0_SI788 +FECD0_SX158 +FECD0_SX248 +FECD0_SX338 +FECD0_SX428 +FECD0_SX68 +FEEH0_SI1112 +FEEH0_SI1742 +FEEH0_SI471 +FEEH0_SX122 +FEEH0_SX212 +FEEH0_SX302 +FEEH0_SX32 +FEEH0_SX392 +FEME0_SI1505 +FEME0_SI2135 +FEME0_SI875 +FEME0_SX155 +FEME0_SX245 +FEME0_SX335 +FEME0_SX425 +FEME0_SX65 +FETB0_SI1148 +FETB0_SI1778 +FETB0_SI518 +FETB0_SX158 +FETB0_SX248 +FETB0_SX338 +FETB0_SX428 +FETB0_SX68 +FEXM0_SI1101 +FEXM0_SI1731 +FEXM0_SI482 +FEXM0_SX111 +FEXM0_SX201 +FEXM0_SX291 +FEXM0_SX366 +FEXM0_SX381 +FGCS0_SI1486 +FGCS0_SI2116 +FGCS0_SI856 +FGCS0_SX136 +FGCS0_SX226 +FGCS0_SX316 +FGCS0_SX406 +FGCS0_SX46 +FGDP0_SI1618 +FGDP0_SI2248 +FGDP0_SI988 +FGDP0_SX178 +FGDP0_SX268 +FGDP0_SX358 +FGDP0_SX448 +FGDP0_SX88 +FGMB0_SI1145 +FGMB0_SI1775 +FGMB0_SI515 +FGMB0_SX155 +FGMB0_SX245 +FGMB0_SX335 +FGMB0_SX425 +FGMB0_SX65 +FGRW0_SI1152 +FGRW0_SI1782 +FGRW0_SI1990 +FGRW0_SX162 +FGRW0_SX252 +FGRW0_SX342 +FGRW0_SX432 +FGRW0_SX72 +FHLM0_SI1560 +FHLM0_SI2190 +FHLM0_SI930 +FHLM0_SX120 +FHLM0_SX210 +FHLM0_SX300 +FHLM0_SX349 +FHLM0_SX390 +FHXS0_SI1075 +FHXS0_SI2302 +FHXS0_SI2335 +FHXS0_SX175 +FHXS0_SX265 +FHXS0_SX355 +FHXS0_SX445 +FHXS0_SX85 +FJDM2_SI1582 +FJDM2_SI1964 +FJDM2_SI2212 +FJDM2_SX142 +FJDM2_SX232 +FJDM2_SX322 +FJDM2_SX412 +FJDM2_SX52 +FJEN0_SI1047 +FJEN0_SI1677 +FJEN0_SI2307 +FJEN0_SX147 +FJEN0_SX237 +FJEN0_SX327 +FJEN0_SX417 +FJEN0_SX57 +FJHK0_SI1022 +FJHK0_SI1652 +FJHK0_SI2282 +FJHK0_SX122 +FJHK0_SX212 +FJHK0_SX302 +FJHK0_SX32 +FJHK0_SX392 +FJKL0_SI1562 +FJKL0_SI2192 +FJKL0_SI932 +FJKL0_SX122 +FJKL0_SX212 +FJKL0_SX302 +FJKL0_SX32 +FJKL0_SX392 +FJLG0_SI1506 +FJLG0_SI1889 +FJLG0_SI2306 +FJLG0_SX179 +FJLG0_SX269 +FJLG0_SX359 +FJLG0_SX449 +FJLG0_SX89 +FJLR0_SI1231 +FJLR0_SI1861 +FJLR0_SI601 +FJLR0_SX151 +FJLR0_SX241 +FJLR0_SX331 +FJLR0_SX421 +FJLR0_SX61 +FJRB0_SI1302 +FJRB0_SI1932 +FJRB0_SI672 +FJRB0_SX132 +FJRB0_SX222 +FJRB0_SX312 +FJRB0_SX402 +FJRB0_SX42 +FJRP1_SI1432 +FJRP1_SI2062 +FJRP1_SI802 +FJRP1_SX172 +FJRP1_SX262 +FJRP1_SX352 +FJRP1_SX442 +FJRP1_SX82 +FJSK0_SI1052 +FJSK0_SI1682 +FJSK0_SI2312 +FJSK0_SX152 +FJSK0_SX242 +FJSK0_SX332 +FJSK0_SX422 +FJSK0_SX62 +FJSP0_SI1434 +FJSP0_SI1763 +FJSP0_SI804 +FJSP0_SX174 +FJSP0_SX264 +FJSP0_SX354 +FJSP0_SX444 +FJSP0_SX84 +FJWB1_SI2055 +FJWB1_SI748 +FJWB1_SI795 +FJWB1_SX165 +FJWB1_SX255 +FJWB1_SX345 +FJWB1_SX435 +FJWB1_SX75 +FJXM0_SI1211 +FJXM0_SI1971 +FJXM0_SI581 +FJXM0_SX131 +FJXM0_SX221 +FJXM0_SX311 +FJXM0_SX401 +FJXM0_SX41 +FJXP0_SI1122 +FJXP0_SI1752 +FJXP0_SI492 +FJXP0_SX132 +FJXP0_SX222 +FJXP0_SX312 +FJXP0_SX402 +FJXP0_SX42 +FKAA0_SI1208 +FKAA0_SI1838 +FKAA0_SI578 +FKAA0_SX128 +FKAA0_SX218 +FKAA0_SX308 +FKAA0_SX38 +FKAA0_SX398 +FKDE0_SI1141 +FKDE0_SI1771 +FKDE0_SI2221 +FKDE0_SX151 +FKDE0_SX241 +FKDE0_SX331 +FKDE0_SX421 +FKDE0_SX61 +FKDW0_SI1207 +FKDW0_SI1891 +FKDW0_SI577 +FKDW0_SX127 +FKDW0_SX217 +FKDW0_SX307 +FKDW0_SX37 +FKDW0_SX397 +FKFB0_SI1608 +FKFB0_SI2238 +FKFB0_SI978 +FKFB0_SX168 +FKFB0_SX258 +FKFB0_SX348 +FKFB0_SX438 +FKFB0_SX78 +FKKH0_SI1290 +FKKH0_SI1920 +FKKH0_SI660 +FKKH0_SX120 +FKKH0_SX210 +FKKH0_SX30 +FKKH0_SX300 +FKKH0_SX390 +FKLC0_SI1615 +FKLC0_SI2245 +FKLC0_SI985 +FKLC0_SX175 +FKLC0_SX265 +FKLC0_SX355 +FKLC0_SX445 +FKLC0_SX85 +FKLC1_SI1048 +FKLC1_SI1678 +FKLC1_SI2308 +FKLC1_SX148 +FKLC1_SX238 +FKLC1_SX328 +FKLC1_SX418 +FKLC1_SX58 +FKLH0_SI1257 +FKLH0_SI1887 +FKLH0_SI627 +FKLH0_SX177 +FKLH0_SX267 +FKLH0_SX357 +FKLH0_SX447 +FKLH0_SX87 +FKSR0_SI1117 +FKSR0_SI1747 +FKSR0_SI487 +FKSR0_SX161 +FKSR0_SX217 +FKSR0_SX366 +FKSR0_SX37 +FKSR0_SX397 +FLAC0_SI1339 +FLAC0_SI2161 +FLAC0_SI901 +FLAC0_SX181 +FLAC0_SX271 +FLAC0_SX361 +FLAC0_SX451 +FLAC0_SX91 +FLAG0_SI1464 +FLAG0_SI2094 +FLAG0_SI834 +FLAG0_SX114 +FLAG0_SX204 +FLAG0_SX24 +FLAG0_SX294 +FLAG0_SX384 +FLEH0_SI1051 +FLEH0_SI1681 +FLEH0_SI2311 +FLEH0_SX151 +FLEH0_SX241 +FLEH0_SX331 +FLEH0_SX421 +FLEH0_SX61 +FLET0_SI1137 +FLET0_SI1767 +FLET0_SI507 +FLET0_SX147 +FLET0_SX237 +FLET0_SX277 +FLET0_SX417 +FLET0_SX57 +FLHD0_SI1344 +FLHD0_SI1827 +FLHD0_SI1974 +FLHD0_SX174 +FLHD0_SX264 +FLHD0_SX354 +FLHD0_SX444 +FLHD0_SX84 +FLJA0_SI1078 +FLJA0_SI1708 +FLJA0_SI2338 +FLJA0_SX178 +FLJA0_SX268 +FLJA0_SX358 +FLJA0_SX448 +FLJA0_SX88 +FLJD0_SI1516 +FLJD0_SI2146 +FLJD0_SI886 +FLJD0_SX166 +FLJD0_SX256 +FLJD0_SX346 +FLJD0_SX436 +FLJD0_SX76 +FLJG0_SI1611 +FLJG0_SI2241 +FLJG0_SI981 +FLJG0_SX171 +FLJG0_SX261 +FLJG0_SX351 +FLJG0_SX441 +FLJG0_SX81 +FLKM0_SI1880 +FLKM0_SI620 +FLKM0_SI686 +FLKM0_SX116 +FLKM0_SX260 +FLKM0_SX350 +FLKM0_SX440 +FLKM0_SX80 +FLMA0_SI1243 +FLMA0_SI1873 +FLMA0_SI613 +FLMA0_SX163 +FLMA0_SX253 +FLMA0_SX343 +FLMA0_SX433 +FLMA0_SX73 +FLMC0_SI1372 +FLMC0_SI2002 +FLMC0_SI742 +FLMC0_SX112 +FLMC0_SX22 +FLMC0_SX292 +FLMC0_SX336 +FLMC0_SX382 +FLMK0_SI1035 +FLMK0_SI1229 +FLMK0_SI2295 +FLMK0_SX135 +FLMK0_SX225 +FLMK0_SX315 +FLMK0_SX405 +FLMK0_SX45 +FLOD0_SI1287 +FLOD0_SI1917 +FLOD0_SI657 +FLOD0_SX117 +FLOD0_SX171 +FLOD0_SX207 +FLOD0_SX297 +FLOD0_SX387 +FLTM0_SI1070 +FLTM0_SI1700 +FLTM0_SI2330 +FLTM0_SX170 +FLTM0_SX260 +FLTM0_SX350 +FLTM0_SX440 +FLTM0_SX80 +FMAH1_SI1509 +FMAH1_SI2139 +FMAH1_SI879 +FMAH1_SX159 +FMAH1_SX249 +FMAH1_SX339 +FMAH1_SX429 +FMAH1_SX69 +FMBG0_SI1160 +FMBG0_SI1790 +FMBG0_SI2264 +FMBG0_SX260 +FMBG0_SX3 +FMBG0_SX350 +FMBG0_SX440 +FMBG0_SX80 +FMEM0_SI1377 +FMEM0_SI2007 +FMEM0_SI747 +FMEM0_SX117 +FMEM0_SX207 +FMEM0_SX297 +FMEM0_SX333 +FMEM0_SX387 +FMJB0_SI1177 +FMJB0_SI1807 +FMJB0_SI547 +FMJB0_SX187 +FMJB0_SX277 +FMJB0_SX367 +FMJB0_SX7 +FMJB0_SX97 +FMJF0_SI1254 +FMJF0_SI1884 +FMJF0_SI624 +FMJF0_SX174 +FMJF0_SX264 +FMJF0_SX354 +FMJF0_SX444 +FMJF0_SX84 +FMJU0_SI1389 +FMJU0_SI2019 +FMJU0_SI759 +FMJU0_SX129 +FMJU0_SX219 +FMJU0_SX309 +FMJU0_SX39 +FMJU0_SX399 +FMKC0_SI1041 +FMKC0_SI1072 +FMKC0_SI1702 +FMKC0_SX172 +FMKC0_SX262 +FMKC0_SX352 +FMKC0_SX442 +FMKC0_SX82 +FMKF0_SI1018 +FMKF0_SI1536 +FMKF0_SI906 +FMKF0_SX186 +FMKF0_SX276 +FMKF0_SX366 +FMKF0_SX6 +FMKF0_SX96 +FMMH0_SI1537 +FMMH0_SI2167 +FMMH0_SI907 +FMMH0_SX187 +FMMH0_SX367 +FMMH0_SX420 +FMMH0_SX7 +FMMH0_SX97 +FMPG0_SI1602 +FMPG0_SI2232 +FMPG0_SI972 +FMPG0_SX162 +FMPG0_SX252 +FMPG0_SX342 +FMPG0_SX432 +FMPG0_SX72 +FNKL0_SI1522 +FNKL0_SI2152 +FNKL0_SI892 +FNKL0_SX172 +FNKL0_SX196 +FNKL0_SX262 +FNKL0_SX442 +FNKL0_SX82 +FNTB0_SI1203 +FNTB0_SI573 +FNTB0_SI679 +FNTB0_SX123 +FNTB0_SX213 +FNTB0_SX303 +FNTB0_SX33 +FNTB0_SX393 +FPAB1_SI1471 +FPAB1_SI2101 +FPAB1_SI841 +FPAB1_SX121 +FPAB1_SX211 +FPAB1_SX301 +FPAB1_SX31 +FPAB1_SX391 +FPAC0_SI1921 +FPAC0_SI2011 +FPAC0_SI661 +FPAC0_SX121 +FPAC0_SX211 +FPAC0_SX301 +FPAC0_SX31 +FPAC0_SX391 +FPAD0_SI1346 +FPAD0_SI1976 +FPAD0_SI716 +FPAD0_SX176 +FPAD0_SX266 +FPAD0_SX356 +FPAD0_SX446 +FPAD0_SX86 +FPAF0_SI1054 +FPAF0_SI1684 +FPAF0_SI2314 +FPAF0_SX154 +FPAF0_SX244 +FPAF0_SX334 +FPAF0_SX424 +FPAF0_SX64 +FPAZ0_SI1593 +FPAZ0_SI2223 +FPAZ0_SI963 +FPAZ0_SX153 +FPAZ0_SX243 +FPAZ0_SX27 +FPAZ0_SX423 +FPAZ0_SX63 +FPJF0_SI1046 +FPJF0_SI1259 +FPJF0_SI1676 +FPJF0_SX146 +FPJF0_SX236 +FPJF0_SX326 +FPJF0_SX352 +FPJF0_SX56 +FPLS0_SI1590 +FPLS0_SI2220 +FPLS0_SI960 +FPLS0_SX150 +FPLS0_SX240 +FPLS0_SX3 +FPLS0_SX330 +FPLS0_SX60 +FPMY0_SI1153 +FPMY0_SI1783 +FPMY0_SI523 +FPMY0_SX163 +FPMY0_SX196 +FPMY0_SX253 +FPMY0_SX343 +FPMY0_SX73 +FREH0_SI1315 +FREH0_SI1945 +FREH0_SI685 +FREH0_SX145 +FREH0_SX235 +FREH0_SX325 +FREH0_SX415 +FREH0_SX55 +FRJB0_SI1427 +FRJB0_SI1470 +FRJB0_SI1794 +FRJB0_SX167 +FRJB0_SX257 +FRJB0_SX347 +FRJB0_SX437 +FRJB0_SX77 +FRLL0_SI1514 +FRLL0_SI805 +FRLL0_SI884 +FRLL0_SX164 +FRLL0_SX254 +FRLL0_SX344 +FRLL0_SX434 +FRLL0_SX74 +FSAG0_SI1323 +FSAG0_SI1953 +FSAG0_SI693 +FSAG0_SX153 +FSAG0_SX243 +FSAG0_SX333 +FSAG0_SX423 +FSAG0_SX63 +FSAH0_SI1244 +FSAH0_SI1874 +FSAH0_SI614 +FSAH0_SX164 +FSAH0_SX327 +FSAH0_SX344 +FSAH0_SX434 +FSAH0_SX74 +FSAK0_SI1300 +FSAK0_SI1930 +FSAK0_SI670 +FSAK0_SX130 +FSAK0_SX220 +FSAK0_SX310 +FSAK0_SX40 +FSAK0_SX400 +FSBK0_SI1069 +FSBK0_SI1699 +FSBK0_SI2329 +FSBK0_SX169 +FSBK0_SX259 +FSBK0_SX349 +FSBK0_SX439 +FSBK0_SX79 +FSCN0_SI1886 +FSCN0_SI626 +FSCN0_SI705 +FSCN0_SX176 +FSCN0_SX266 +FSCN0_SX356 +FSCN0_SX446 +FSCN0_SX86 +FSDC0_SI1312 +FSDC0_SI1942 +FSDC0_SI2234 +FSDC0_SX142 +FSDC0_SX232 +FSDC0_SX322 +FSDC0_SX412 +FSDC0_SX52 +FSDJ0_SI1115 +FSDJ0_SI1745 +FSDJ0_SI485 +FSDJ0_SX125 +FSDJ0_SX215 +FSDJ0_SX305 +FSDJ0_SX35 +FSDJ0_SX395 +FSGF0_SI1557 +FSGF0_SI2187 +FSGF0_SI927 +FSGF0_SX117 +FSGF0_SX207 +FSGF0_SX27 +FSGF0_SX297 +FSGF0_SX387 +FSJG0_SI1570 +FSJG0_SI2200 +FSJG0_SI940 +FSJG0_SX130 +FSJG0_SX220 +FSJG0_SX310 +FSJG0_SX40 +FSJG0_SX400 +FSJK1_SI1025 +FSJK1_SI2285 +FSJK1_SI696 +FSJK1_SX125 +FSJK1_SX215 +FSJK1_SX305 +FSJK1_SX35 +FSJK1_SX395 +FSJS0_SI1171 +FSJS0_SI1801 +FSJS0_SI541 +FSJS0_SX181 +FSJS0_SX271 +FSJS0_SX361 +FSJS0_SX451 +FSJS0_SX91 +FSJW0_SI1333 +FSJW0_SI1963 +FSJW0_SI703 +FSJW0_SX163 +FSJW0_SX253 +FSJW0_SX343 +FSJW0_SX433 +FSJW0_SX73 +FSKC0_SI1416 +FSKC0_SI2046 +FSKC0_SI786 +FSKC0_SX156 +FSKC0_SX246 +FSKC0_SX336 +FSKC0_SX426 +FSKC0_SX66 +FSKL0_SI1529 +FSKL0_SI2159 +FSKL0_SI899 +FSKL0_SX179 +FSKL0_SX269 +FSKL0_SX359 +FSKL0_SX449 +FSKL0_SX89 +FSKP0_SI1098 +FSKP0_SI1728 +FSKP0_SI468 +FSKP0_SX108 +FSKP0_SX18 +FSKP0_SX198 +FSKP0_SX288 +FSKP0_SX378 +FSLS0_SI1056 +FSLS0_SI1686 +FSLS0_SI2316 +FSLS0_SX156 +FSLS0_SX202 +FSLS0_SX246 +FSLS0_SX426 +FSLS0_SX66 +FSMA0_SI1621 +FSMA0_SI2251 +FSMA0_SI991 +FSMA0_SX181 +FSMA0_SX271 +FSMA0_SX361 +FSMA0_SX451 +FSMA0_SX91 +FSMM0_SI1314 +FSMM0_SI1944 +FSMM0_SI684 +FSMM0_SX144 +FSMM0_SX234 +FSMM0_SX324 +FSMM0_SX414 +FSMM0_SX54 +FSMS1_SI1504 +FSMS1_SI2134 +FSMS1_SI874 +FSMS1_SX154 +FSMS1_SX244 +FSMS1_SX334 +FSMS1_SX347 +FSMS1_SX64 +FSPM0_SI1241 +FSPM0_SI1871 +FSPM0_SI611 +FSPM0_SX161 +FSPM0_SX251 +FSPM0_SX341 +FSPM0_SX431 +FSPM0_SX71 +FSRH0_SI1719 +FSRH0_SI1931 +FSRH0_SI671 +FSRH0_SX131 +FSRH0_SX221 +FSRH0_SX311 +FSRH0_SX401 +FSRH0_SX41 +FSSB0_SI1082 +FSSB0_SI1712 +FSSB0_SI2342 +FSSB0_SX182 +FSSB0_SX272 +FSSB0_SX362 +FSSB0_SX452 +FSSB0_SX92 +FTAJ0_SI1329 +FTAJ0_SI474 +FTAJ0_SI699 +FTAJ0_SX159 +FTAJ0_SX249 +FTAJ0_SX339 +FTAJ0_SX429 +FTAJ0_SX69 +FTBR0_SI1402 +FTBR0_SI2181 +FTBR0_SI921 +FTBR0_SX111 +FTBR0_SX201 +FTBR0_SX21 +FTBR0_SX291 +FTBR0_SX381 +FTBW0_SI1345 +FTBW0_SI1975 +FTBW0_SI715 +FTBW0_SX175 +FTBW0_SX265 +FTBW0_SX355 +FTBW0_SX445 +FTBW0_SX85 +FTLG0_SI1743 +FTLG0_SI483 +FTLG0_SI840 +FTLG0_SX123 +FTLG0_SX213 +FTLG0_SX303 +FTLG0_SX33 +FTLG0_SX393 +FTMG0_SI1532 +FTMG0_SI2162 +FTMG0_SI902 +FTMG0_SX182 +FTMG0_SX272 +FTMG0_SX362 +FTMG0_SX452 +FTMG0_SX92 +FVFB0_SI1032 +FVFB0_SI1510 +FVFB0_SI2292 +FVFB0_SX132 +FVFB0_SX222 +FVFB0_SX312 +FVFB0_SX402 +FVFB0_SX42 +FVKB0_SI1159 +FVKB0_SI1789 +FVKB0_SI529 +FVKB0_SX169 +FVKB0_SX259 +FVKB0_SX349 +FVKB0_SX439 +FVKB0_SX79 +FVMH0_SI1466 +FVMH0_SI2096 +FVMH0_SI836 +FVMH0_SX116 +FVMH0_SX206 +FVMH0_SX26 +FVMH0_SX296 +FVMH0_SX386 +MABC0_SI1620 +MABC0_SI2041 +MABC0_SI781 +MABC0_SX151 +MABC0_SX241 +MABC0_SX331 +MABC0_SX421 +MABC0_SX61 +MADC0_SI1367 +MADC0_SI1997 +MADC0_SI737 +MADC0_SX107 +MADC0_SX17 +MADC0_SX197 +MADC0_SX287 +MADC0_SX377 +MADD0_SI1295 +MADD0_SI1798 +MADD0_SI538 +MADD0_SX178 +MADD0_SX268 +MADD0_SX358 +MADD0_SX448 +MADD0_SX88 +MAEB0_SI1411 +MAEB0_SI2250 +MAEB0_SI990 +MAEB0_SX180 +MAEB0_SX270 +MAEB0_SX360 +MAEB0_SX450 +MAEB0_SX90 +MAEO0_SI1326 +MAEO0_SI1655 +MAEO0_SI1956 +MAEO0_SX156 +MAEO0_SX246 +MAEO0_SX336 +MAEO0_SX426 +MAEO0_SX66 +MAFM0_SI1569 +MAFM0_SI2199 +MAFM0_SI939 +MAFM0_SX129 +MAFM0_SX219 +MAFM0_SX309 +MAFM0_SX39 +MAFM0_SX399 +MAJP0_SI1074 +MAJP0_SI1704 +MAJP0_SI2334 +MAJP0_SX174 +MAJP0_SX264 +MAJP0_SX354 +MAJP0_SX444 +MAJP0_SX84 +MAKB0_SI1016 +MAKB0_SI1646 +MAKB0_SI2276 +MAKB0_SX116 +MAKB0_SX206 +MAKB0_SX26 +MAKB0_SX296 +MAKB0_SX386 +MAKR0_SI1352 +MAKR0_SI1982 +MAKR0_SI722 +MAKR0_SX182 +MAKR0_SX272 +MAKR0_SX362 +MAKR0_SX452 +MAKR0_SX92 +MAPV0_SI1293 +MAPV0_SI1923 +MAPV0_SI663 +MAPV0_SX123 +MAPV0_SX213 +MAPV0_SX303 +MAPV0_SX33 +MAPV0_SX393 +MARC0_SI1188 +MARC0_SI1818 +MARC0_SI558 +MARC0_SX108 +MARC0_SX18 +MARC0_SX198 +MARC0_SX288 +MARC0_SX378 +MARW0_SI1276 +MARW0_SI1906 +MARW0_SI646 +MARW0_SX106 +MARW0_SX16 +MARW0_SX286 +MARW0_SX349 +MARW0_SX376 +MBAR0_SI1319 +MBAR0_SI1949 +MBAR0_SI689 +MBAR0_SX149 +MBAR0_SX239 +MBAR0_SX329 +MBAR0_SX419 +MBAR0_SX59 +MBBR0_SI1055 +MBBR0_SI1685 +MBBR0_SI2315 +MBBR0_SX155 +MBBR0_SX245 +MBBR0_SX335 +MBBR0_SX425 +MBBR0_SX65 +MBCG0_SI2217 +MBCG0_SI486 +MBCG0_SI957 +MBCG0_SX147 +MBCG0_SX237 +MBCG0_SX327 +MBCG0_SX417 +MBCG0_SX57 +MBEF0_SI1281 +MBEF0_SI1911 +MBEF0_SI651 +MBEF0_SX111 +MBEF0_SX201 +MBEF0_SX21 +MBEF0_SX291 +MBEF0_SX381 +MBGT0_SI1341 +MBGT0_SI1841 +MBGT0_SI711 +MBGT0_SX171 +MBGT0_SX261 +MBGT0_SX351 +MBGT0_SX441 +MBGT0_SX81 +MBJV0_SI1247 +MBJV0_SI1877 +MBJV0_SI617 +MBJV0_SX167 +MBJV0_SX257 +MBJV0_SX347 +MBJV0_SX437 +MBJV0_SX77 +MBMA0_SI1222 +MBMA0_SI1852 +MBMA0_SI592 +MBMA0_SX142 +MBMA0_SX232 +MBMA0_SX322 +MBMA0_SX412 +MBMA0_SX52 +MBMA1_SI2207 +MBMA1_SI2214 +MBMA1_SI954 +MBMA1_SX144 +MBMA1_SX234 +MBMA1_SX324 +MBMA1_SX414 +MBMA1_SX54 +MBML0_SI1169 +MBML0_SI1799 +MBML0_SI539 +MBML0_SX179 +MBML0_SX269 +MBML0_SX359 +MBML0_SX449 +MBML0_SX89 +MBOM0_SI1014 +MBOM0_SI1644 +MBOM0_SI2274 +MBOM0_SX114 +MBOM0_SX204 +MBOM0_SX294 +MBOM0_SX311 +MBOM0_SX384 +MBSB0_SI1353 +MBSB0_SI1983 +MBSB0_SI723 +MBSB0_SX183 +MBSB0_SX273 +MBSB0_SX3 +MBSB0_SX363 +MBSB0_SX93 +MBTH0_SI2102 +MBTH0_SI505 +MBTH0_SI757 +MBTH0_SX122 +MBTH0_SX212 +MBTH0_SX302 +MBTH0_SX32 +MBTH0_SX392 +MBWP0_SI1531 +MBWP0_SI1969 +MBWP0_SI709 +MBWP0_SX169 +MBWP0_SX259 +MBWP0_SX349 +MBWP0_SX439 +MBWP0_SX79 +MCAE0_SI1447 +MCAE0_SI2077 +MCAE0_SI817 +MCAE0_SX187 +MCAE0_SX277 +MCAE0_SX367 +MCAE0_SX7 +MCAE0_SX97 +MCAL0_SI1138 +MCAL0_SI1768 +MCAL0_SI508 +MCAL0_SX148 +MCAL0_SX238 +MCAL0_SX328 +MCAL0_SX418 +MCAL0_SX58 +MCDC0_SI1292 +MCDC0_SI1922 +MCDC0_SI662 +MCDC0_SX122 +MCDC0_SX212 +MCDC0_SX302 +MCDC0_SX32 +MCDC0_SX392 +MCDD0_SI1513 +MCDD0_SI2143 +MCDD0_SI883 +MCDD0_SX163 +MCDD0_SX253 +MCDD0_SX343 +MCDD0_SX433 +MCDD0_SX73 +MCDR0_SI1154 +MCDR0_SI1784 +MCDR0_SI524 +MCDR0_SX164 +MCDR0_SX254 +MCDR0_SX344 +MCDR0_SX434 +MCDR0_SX74 +MCEF0_SI1135 +MCEF0_SI1765 +MCEF0_SI842 +MCEF0_SX145 +MCEF0_SX235 +MCEF0_SX325 +MCEF0_SX415 +MCEF0_SX55 +MCEW0_SI1442 +MCEW0_SI2072 +MCEW0_SI812 +MCEW0_SX182 +MCEW0_SX272 +MCEW0_SX362 +MCEW0_SX452 +MCEW0_SX92 +MCHL0_SI1347 +MCHL0_SI1404 +MCHL0_SI1977 +MCHL0_SX177 +MCHL0_SX267 +MCHL0_SX357 +MCHL0_SX447 +MCHL0_SX87 +MCLK0_SI1660 +MCLK0_SI2290 +MCLK0_SI650 +MCLK0_SX130 +MCLK0_SX220 +MCLK0_SX310 +MCLK0_SX40 +MCLK0_SX400 +MCLM0_SI1456 +MCLM0_SI2086 +MCLM0_SI826 +MCLM0_SX106 +MCLM0_SX16 +MCLM0_SX196 +MCLM0_SX286 +MCLM0_SX376 +MCPM0_SI1194 +MCPM0_SI1824 +MCPM0_SI564 +MCPM0_SX114 +MCPM0_SX204 +MCPM0_SX24 +MCPM0_SX294 +MCPM0_SX384 +MCRE0_SI1121 +MCRE0_SI1725 +MCRE0_SI1751 +MCRE0_SX131 +MCRE0_SX221 +MCRE0_SX24 +MCRE0_SX401 +MCRE0_SX41 +MCSS0_SI1380 +MCSS0_SI688 +MCSS0_SI750 +MCSS0_SX120 +MCSS0_SX210 +MCSS0_SX30 +MCSS0_SX300 +MCSS0_SX390 +MCTH0_SI1209 +MCTH0_SI1839 +MCTH0_SI579 +MCTH0_SX129 +MCTH0_SX219 +MCTH0_SX309 +MCTH0_SX39 +MCTH0_SX399 +MCTM0_SI1350 +MCTM0_SI1980 +MCTM0_SI720 +MCTM0_SX180 +MCTM0_SX270 +MCTM0_SX360 +MCTM0_SX450 +MCTM0_SX90 +MCXM0_SI1351 +MCXM0_SI1981 +MCXM0_SI721 +MCXM0_SX181 +MCXM0_SX271 +MCXM0_SX361 +MCXM0_SX451 +MCXM0_SX91 +MDAC0_SI1261 +MDAC0_SI1837 +MDAC0_SI631 +MDAC0_SX181 +MDAC0_SX271 +MDAC0_SX361 +MDAC0_SX451 +MDAC0_SX91 +MDAS0_SI1266 +MDAS0_SI1896 +MDAS0_SI636 +MDAS0_SX186 +MDAS0_SX21 +MDAS0_SX276 +MDAS0_SX6 +MDAS0_SX96 +MDBB1_SI1006 +MDBB1_SI1636 +MDBB1_SI2056 +MDBB1_SX106 +MDBB1_SX16 +MDBB1_SX196 +MDBB1_SX286 +MDBB1_SX376 +MDBP0_SI1158 +MDBP0_SI1788 +MDBP0_SI528 +MDBP0_SX168 +MDBP0_SX258 +MDBP0_SX348 +MDBP0_SX438 +MDBP0_SX78 +MDCD0_SI1415 +MDCD0_SI2045 +MDCD0_SI785 +MDCD0_SX155 +MDCD0_SX245 +MDCD0_SX335 +MDCD0_SX425 +MDCD0_SX65 +MDCM0_SI1480 +MDCM0_SI2110 +MDCM0_SI850 +MDCM0_SX130 +MDCM0_SX220 +MDCM0_SX310 +MDCM0_SX40 +MDCM0_SX400 +MDDC0_SI1419 +MDDC0_SI2049 +MDDC0_SI789 +MDDC0_SX159 +MDDC0_SX249 +MDDC0_SX339 +MDDC0_SX429 +MDDC0_SX69 +MDED0_SI1170 +MDED0_SI1800 +MDED0_SI540 +MDED0_SX180 +MDED0_SX270 +MDED0_SX360 +MDED0_SX450 +MDED0_SX90 +MDEF0_SI1123 +MDEF0_SI1563 +MDEF0_SI2193 +MDEF0_SX123 +MDEF0_SX213 +MDEF0_SX303 +MDEF0_SX33 +MDEF0_SX393 +MDEM0_SI1868 +MDEM0_SI608 +MDEM0_SI800 +MDEM0_SX158 +MDEM0_SX248 +MDEM0_SX338 +MDEM0_SX428 +MDEM0_SX68 +MDHL0_SI1439 +MDHL0_SI2069 +MDHL0_SI809 +MDHL0_SX179 +MDHL0_SX269 +MDHL0_SX359 +MDHL0_SX449 +MDHL0_SX89 +MDHS0_SI1530 +MDHS0_SI2160 +MDHS0_SI900 +MDHS0_SX180 +MDHS0_SX270 +MDHS0_SX360 +MDHS0_SX450 +MDHS0_SX90 +MDJM0_SI1455 +MDJM0_SI2085 +MDJM0_SI825 +MDJM0_SX105 +MDJM0_SX15 +MDJM0_SX195 +MDJM0_SX285 +MDJM0_SX375 +MDKS0_SI1066 +MDKS0_SI1696 +MDKS0_SI2326 +MDKS0_SX166 +MDKS0_SX256 +MDKS0_SX346 +MDKS0_SX436 +MDKS0_SX76 +MDLB0_SI1306 +MDLB0_SI1936 +MDLB0_SI676 +MDLB0_SX136 +MDLB0_SX226 +MDLB0_SX316 +MDLB0_SX406 +MDLB0_SX46 +MDLC0_SI1395 +MDLC0_SI2025 +MDLC0_SI765 +MDLC0_SX135 +MDLC0_SX225 +MDLC0_SX315 +MDLC0_SX405 +MDLC0_SX45 +MDLC1_SI1435 +MDLC1_SI2065 +MDLC1_SI2144 +MDLC1_SX175 +MDLC1_SX265 +MDLC1_SX355 +MDLC1_SX445 +MDLC1_SX85 +MDLC2_SI1614 +MDLC2_SI2244 +MDLC2_SI984 +MDLC2_SX174 +MDLC2_SX264 +MDLC2_SX354 +MDLC2_SX444 +MDLC2_SX84 +MDLH0_SI1960 +MDLH0_SI574 +MDLH0_SI700 +MDLH0_SX160 +MDLH0_SX250 +MDLH0_SX340 +MDLH0_SX430 +MDLH0_SX70 +MDLM0_SI1234 +MDLM0_SI1864 +MDLM0_SI604 +MDLM0_SX154 +MDLM0_SX244 +MDLM0_SX334 +MDLM0_SX424 +MDLM0_SX64 +MDLR0_SI1233 +MDLR0_SI1863 +MDLR0_SI603 +MDLR0_SX153 +MDLR0_SX243 +MDLR0_SX333 +MDLR0_SX423 +MDLR0_SX63 +MDLR1_SI1299 +MDLR1_SI1929 +MDLR1_SI669 +MDLR1_SX129 +MDLR1_SX219 +MDLR1_SX309 +MDLR1_SX39 +MDLR1_SX399 +MDMA0_SI1238 +MDMA0_SI1430 +MDMA0_SI2060 +MDMA0_SX170 +MDMA0_SX260 +MDMA0_SX350 +MDMA0_SX440 +MDMA0_SX80 +MDMT0_SI1832 +MDMT0_SI2341 +MDMT0_SI572 +MDMT0_SX122 +MDMT0_SX212 +MDMT0_SX302 +MDMT0_SX32 +MDMT0_SX392 +MDNS0_SI1011 +MDNS0_SI2271 +MDNS0_SI873 +MDNS0_SX111 +MDNS0_SX201 +MDNS0_SX21 +MDNS0_SX291 +MDNS0_SX381 +MDPB0_SI1760 +MDPB0_SI2126 +MDPB0_SI866 +MDPB0_SX146 +MDPB0_SX236 +MDPB0_SX326 +MDPB0_SX416 +MDPB0_SX56 +MDPK0_SI1053 +MDPK0_SI1683 +MDPK0_SI552 +MDPK0_SX153 +MDPK0_SX243 +MDPK0_SX333 +MDPK0_SX423 +MDPK0_SX63 +MDPS0_SI1651 +MDPS0_SI1979 +MDPS0_SI719 +MDPS0_SX179 +MDPS0_SX269 +MDPS0_SX359 +MDPS0_SX449 +MDPS0_SX89 +MDRD0_SI1382 +MDRD0_SI2012 +MDRD0_SI752 +MDRD0_SX122 +MDRD0_SX212 +MDRD0_SX302 +MDRD0_SX32 +MDRD0_SX392 +MDSJ0_SI1462 +MDSJ0_SI2092 +MDSJ0_SI832 +MDSJ0_SX112 +MDSJ0_SX22 +MDSJ0_SX292 +MDSJ0_SX382 +MDSJ0_SX438 +MDSS0_SI1881 +MDSS0_SI2087 +MDSS0_SI621 +MDSS0_SX171 +MDSS0_SX261 +MDSS0_SX351 +MDSS0_SX441 +MDSS0_SX81 +MDSS1_SI1327 +MDSS1_SI1713 +MDSS1_SI697 +MDSS1_SX157 +MDSS1_SX247 +MDSS1_SX337 +MDSS1_SX427 +MDSS1_SX67 +MDTB0_SI1200 +MDTB0_SI1830 +MDTB0_SI570 +MDTB0_SX120 +MDTB0_SX210 +MDTB0_SX300 +MDTB0_SX321 +MDTB0_SX390 +MDWD0_SI1260 +MDWD0_SI1890 +MDWD0_SI557 +MDWD0_SX180 +MDWD0_SX270 +MDWD0_SX360 +MDWD0_SX450 +MDWD0_SX90 +MDWH0_SI1168 +MDWH0_SI1925 +MDWH0_SI665 +MDWH0_SX125 +MDWH0_SX215 +MDWH0_SX305 +MDWH0_SX35 +MDWH0_SX395 +MDWM0_SI1546 +MDWM0_SI2176 +MDWM0_SI916 +MDWM0_SX106 +MDWM0_SX16 +MDWM0_SX286 +MDWM0_SX376 +MDWM0_SX433 +MEAL0_SI1547 +MEAL0_SI2177 +MEAL0_SI917 +MEAL0_SX107 +MEAL0_SX197 +MEAL0_SX287 +MEAL0_SX347 +MEAL0_SX377 +MEDR0_SI1374 +MEDR0_SI2004 +MEDR0_SI744 +MEDR0_SX114 +MEDR0_SX204 +MEDR0_SX24 +MEDR0_SX294 +MEDR0_SX384 +MEFG0_SI465 +MEFG0_SI491 +MEFG0_SI598 +MEFG0_SX105 +MEFG0_SX15 +MEFG0_SX195 +MEFG0_SX285 +MEFG0_SX375 +MEGJ0_SI1337 +MEGJ0_SI1967 +MEGJ0_SI707 +MEGJ0_SX167 +MEGJ0_SX257 +MEGJ0_SX3 +MEGJ0_SX437 +MEGJ0_SX77 +MEJL0_SI1592 +MEJL0_SI1654 +MEJL0_SI962 +MEJL0_SX152 +MEJL0_SX242 +MEJL0_SX332 +MEJL0_SX422 +MEJL0_SX62 +MEJS0_SI1240 +MEJS0_SI1870 +MEJS0_SI610 +MEJS0_SX160 +MEJS0_SX250 +MEJS0_SX340 +MEJS0_SX430 +MEJS0_SX70 +MESG0_SI1332 +MESG0_SI1962 +MESG0_SI702 +MESG0_SX162 +MESG0_SX252 +MESG0_SX342 +MESG0_SX432 +MESG0_SX72 +MESJ0_SI2039 +MESJ0_SI2257 +MESJ0_SI997 +MESJ0_SX187 +MESJ0_SX277 +MESJ0_SX367 +MESJ0_SX7 +MESJ0_SX97 +MEWM0_SI1348 +MEWM0_SI1978 +MEWM0_SI718 +MEWM0_SX178 +MEWM0_SX268 +MEWM0_SX358 +MEWM0_SX448 +MEWM0_SX88 +MFER0_SI1492 +MFER0_SI2122 +MFER0_SI862 +MFER0_SX142 +MFER0_SX232 +MFER0_SX322 +MFER0_SX412 +MFER0_SX52 +MFMC0_SI1132 +MFMC0_SI1762 +MFMC0_SI502 +MFMC0_SX142 +MFMC0_SX232 +MFMC0_SX322 +MFMC0_SX412 +MFMC0_SX52 +MFRM0_SI1155 +MFRM0_SI1717 +MFRM0_SI1785 +MFRM0_SX165 +MFRM0_SX255 +MFRM0_SX345 +MFRM0_SX435 +MFRM0_SX75 +MFWK0_SI1249 +MFWK0_SI1879 +MFWK0_SI619 +MFWK0_SX169 +MFWK0_SX259 +MFWK0_SX349 +MFWK0_SX439 +MFWK0_SX79 +MFXS0_SI1674 +MFXS0_SI2225 +MFXS0_SI2304 +MFXS0_SX144 +MFXS0_SX234 +MFXS0_SX324 +MFXS0_SX414 +MFXS0_SX54 +MFXV0_SI1005 +MFXV0_SI1342 +MFXV0_SI1635 +MFXV0_SX105 +MFXV0_SX15 +MFXV0_SX195 +MFXV0_SX285 +MFXV0_SX375 +MGAF0_SI1282 +MGAF0_SI1912 +MGAF0_SI652 +MGAF0_SX112 +MGAF0_SX202 +MGAF0_SX22 +MGAF0_SX292 +MGAF0_SX382 +MGAG0_SI1321 +MGAG0_SI645 +MGAG0_SI691 +MGAG0_SX151 +MGAG0_SX241 +MGAG0_SX331 +MGAG0_SX421 +MGAG0_SX61 +MGAK0_SI1036 +MGAK0_SI1666 +MGAK0_SI2296 +MGAK0_SX136 +MGAK0_SX226 +MGAK0_SX316 +MGAK0_SX406 +MGAK0_SX46 +MGAR0_SI1212 +MGAR0_SI1694 +MGAR0_SI1842 +MGAR0_SX132 +MGAR0_SX222 +MGAR0_SX312 +MGAR0_SX402 +MGAR0_SX42 +MGAW0_SI1165 +MGAW0_SI1802 +MGAW0_SI535 +MGAW0_SX175 +MGAW0_SX265 +MGAW0_SX355 +MGAW0_SX445 +MGAW0_SX85 +MGES0_SI1481 +MGES0_SI2111 +MGES0_SI851 +MGES0_SX131 +MGES0_SX221 +MGES0_SX311 +MGES0_SX401 +MGES0_SX41 +MGJC0_SI1256 +MGJC0_SI1335 +MGJC0_SI1965 +MGJC0_SX165 +MGJC0_SX255 +MGJC0_SX345 +MGJC0_SX435 +MGJC0_SX75 +MGRL0_SI1497 +MGRL0_SI2127 +MGRL0_SI867 +MGRL0_SX147 +MGRL0_SX237 +MGRL0_SX327 +MGRL0_SX417 +MGRL0_SX57 +MGRP0_SI1317 +MGRP0_SI1947 +MGRP0_SI687 +MGRP0_SX147 +MGRP0_SX237 +MGRP0_SX327 +MGRP0_SX417 +MGRP0_SX57 +MGSH0_SI1176 +MGSH0_SI1806 +MGSH0_SI546 +MGSH0_SX127 +MGSH0_SX186 +MGSH0_SX276 +MGSH0_SX6 +MGSH0_SX96 +MGSL0_SI1164 +MGSL0_SI534 +MGSL0_SI797 +MGSL0_SX174 +MGSL0_SX264 +MGSL0_SX354 +MGSL0_SX444 +MGSL0_SX84 +MGXP0_SI1087 +MGXP0_SI457 +MGXP0_SI525 +MGXP0_SX187 +MGXP0_SX277 +MGXP0_SX367 +MGXP0_SX7 +MGXP0_SX97 +MHBS0_SI1575 +MHBS0_SI2205 +MHBS0_SI945 +MHBS0_SX135 +MHBS0_SX225 +MHBS0_SX315 +MHBS0_SX405 +MHBS0_SX45 +MHIT0_SI1613 +MHIT0_SI2243 +MHIT0_SI983 +MHIT0_SX173 +MHIT0_SX263 +MHIT0_SX353 +MHIT0_SX443 +MHIT0_SX83 +MHJB0_SI1017 +MHJB0_SI1647 +MHJB0_SI2277 +MHJB0_SX117 +MHJB0_SX207 +MHJB0_SX27 +MHJB0_SX297 +MHJB0_SX387 +MHMG0_SI1365 +MHMG0_SI1995 +MHMG0_SI735 +MHMG0_SX105 +MHMG0_SX15 +MHMG0_SX195 +MHMG0_SX285 +MHMG0_SX375 +MHMR0_SI1119 +MHMR0_SI1692 +MHMR0_SI489 +MHMR0_SX129 +MHMR0_SX219 +MHMR0_SX309 +MHMR0_SX39 +MHMR0_SX399 +MHRM0_SI1475 +MHRM0_SI2218 +MHRM0_SI958 +MHRM0_SX148 +MHRM0_SX238 +MHRM0_SX328 +MHRM0_SX418 +MHRM0_SX58 +MHXL0_SI1772 +MHXL0_SI512 +MHXL0_SI612 +MHXL0_SX152 +MHXL0_SX242 +MHXL0_SX332 +MHXL0_SX422 +MHXL0_SX62 +MILB0_SI2163 +MILB0_SI807 +MILB0_SI903 +MILB0_SX183 +MILB0_SX273 +MILB0_SX3 +MILB0_SX363 +MILB0_SX93 +MJAC0_SI1331 +MJAC0_SI2148 +MJAC0_SI701 +MJAC0_SX251 +MJAC0_SX307 +MJAC0_SX341 +MJAC0_SX431 +MJAC0_SX71 +MJAE0_SI1524 +MJAE0_SI1999 +MJAE0_SI2154 +MJAE0_SX174 +MJAE0_SX264 +MJAE0_SX354 +MJAE0_SX444 +MJAE0_SX84 +MJAI0_SI1604 +MJAI0_SI682 +MJAI0_SI710 +MJAI0_SX164 +MJAI0_SX254 +MJAI0_SX344 +MJAI0_SX434 +MJAI0_SX74 +MJBG0_SI1232 +MJBG0_SI1724 +MJBG0_SI1862 +MJBG0_SX152 +MJBG0_SX242 +MJBG0_SX332 +MJBG0_SX422 +MJBG0_SX62 +MJDA0_SI1031 +MJDA0_SI1661 +MJDA0_SI2291 +MJDA0_SX131 +MJDA0_SX221 +MJDA0_SX311 +MJDA0_SX401 +MJDA0_SX41 +MJDC0_SI1161 +MJDC0_SI2165 +MJDC0_SI531 +MJDC0_SX171 +MJDC0_SX261 +MJDC0_SX351 +MJDC0_SX441 +MJDC0_SX81 +MJDE0_SI1120 +MJDE0_SI463 +MJDE0_SI490 +MJDE0_SX130 +MJDE0_SX220 +MJDE0_SX310 +MJDE0_SX40 +MJDE0_SX400 +MJDG0_SI1042 +MJDG0_SI1672 +MJDG0_SI1705 +MJDG0_SX142 +MJDG0_SX232 +MJDG0_SX322 +MJDG0_SX412 +MJDG0_SX52 +MJDM0_SI1340 +MJDM0_SI1937 +MJDM0_SI974 +MJDM0_SX170 +MJDM0_SX260 +MJDM0_SX350 +MJDM0_SX440 +MJDM0_SX80 +MJEB0_SI1286 +MJEB0_SI1916 +MJEB0_SI656 +MJEB0_SX170 +MJEB0_SX206 +MJEB0_SX26 +MJEB0_SX296 +MJEB0_SX386 +MJEB1_SI1467 +MJEB1_SI2097 +MJEB1_SI837 +MJEB1_SX117 +MJEB1_SX207 +MJEB1_SX27 +MJEB1_SX297 +MJEB1_SX387 +MJEE0_SI1237 +MJEE0_SI1867 +MJEE0_SI607 +MJEE0_SX157 +MJEE0_SX247 +MJEE0_SX337 +MJEE0_SX427 +MJEE0_SX67 +MJFH0_SI1107 +MJFH0_SI1737 +MJFH0_SI477 +MJFH0_SX117 +MJFH0_SX207 +MJFH0_SX27 +MJFH0_SX297 +MJFH0_SX387 +MJFR0_SI1605 +MJFR0_SI2235 +MJFR0_SI975 +MJFR0_SX165 +MJFR0_SX255 +MJFR0_SX345 +MJFR0_SX435 +MJFR0_SX75 +MJHI0_SI1328 +MJHI0_SI555 +MJHI0_SI698 +MJHI0_SX158 +MJHI0_SX248 +MJHI0_SX338 +MJHI0_SX428 +MJHI0_SX68 +MJJB0_SI1139 +MJJB0_SI1277 +MJJB0_SI1769 +MJJB0_SX149 +MJJB0_SX239 +MJJB0_SX329 +MJJB0_SX419 +MJJB0_SX59 +MJJJ0_SI1163 +MJJJ0_SI1793 +MJJJ0_SI533 +MJJJ0_SX173 +MJJJ0_SX263 +MJJJ0_SX353 +MJJJ0_SX443 +MJJJ0_SX83 +MJJM0_SI1251 +MJJM0_SI1457 +MJJM0_SI827 +MJJM0_SX107 +MJJM0_SX17 +MJJM0_SX197 +MJJM0_SX287 +MJJM0_SX377 +MJKR0_SI1201 +MJKR0_SI1831 +MJKR0_SI571 +MJKR0_SX121 +MJKR0_SX211 +MJKR0_SX301 +MJKR0_SX31 +MJKR0_SX391 +MJLB0_SI1616 +MJLB0_SI2246 +MJLB0_SI986 +MJLB0_SX176 +MJLB0_SX266 +MJLB0_SX356 +MJLB0_SX446 +MJLB0_SX86 +MJLG1_SI1012 +MJLG1_SI1642 +MJLG1_SI2272 +MJLG1_SX112 +MJLG1_SX202 +MJLG1_SX22 +MJLG1_SX292 +MJLG1_SX382 +MJLS0_SI1096 +MJLS0_SI1726 +MJLS0_SI466 +MJLS0_SX106 +MJLS0_SX16 +MJLS0_SX196 +MJLS0_SX286 +MJLS0_SX376 +MJMA0_SI1495 +MJMA0_SI2125 +MJMA0_SI865 +MJMA0_SX145 +MJMA0_SX235 +MJMA0_SX325 +MJMA0_SX415 +MJMA0_SX55 +MJMD0_SI1028 +MJMD0_SI1658 +MJMD0_SI2288 +MJMD0_SX128 +MJMD0_SX218 +MJMD0_SX308 +MJMD0_SX38 +MJMD0_SX398 +MJMM0_SI1255 +MJMM0_SI1885 +MJMM0_SI625 +MJMM0_SX175 +MJMM0_SX265 +MJMM0_SX355 +MJMM0_SX445 +MJMM0_SX85 +MJPG0_SI1191 +MJPG0_SI1821 +MJPG0_SI561 +MJPG0_SX111 +MJPG0_SX201 +MJPG0_SX21 +MJPG0_SX291 +MJPG0_SX381 +MJPM0_SI1368 +MJPM0_SI1998 +MJPM0_SI738 +MJPM0_SX108 +MJPM0_SX18 +MJPM0_SX198 +MJPM0_SX288 +MJPM0_SX378 +MJPM1_SI1897 +MJPM1_SI2280 +MJPM1_SI761 +MJPM1_SX131 +MJPM1_SX221 +MJPM1_SX311 +MJPM1_SX401 +MJPM1_SX41 +MJRA0_SI1236 +MJRA0_SI1866 +MJRA0_SI606 +MJRA0_SX156 +MJRA0_SX246 +MJRA0_SX336 +MJRA0_SX426 +MJRA0_SX66 +MJRG0_SI1366 +MJRG0_SI1996 +MJRG0_SI736 +MJRG0_SX106 +MJRG0_SX16 +MJRG0_SX286 +MJRG0_SX352 +MJRG0_SX376 +MJRH0_SI1125 +MJRH0_SI1755 +MJRH0_SI1840 +MJRH0_SX135 +MJRH0_SX225 +MJRH0_SX315 +MJRH0_SX405 +MJRH0_SX45 +MJRH1_SI1558 +MJRH1_SI1774 +MJRH1_SI514 +MJRH1_SX154 +MJRH1_SX244 +MJRH1_SX334 +MJRH1_SX424 +MJRH1_SX64 +MJRK0_SI1662 +MJRK0_SI2103 +MJRK0_SI880 +MJRK0_SX160 +MJRK0_SX250 +MJRK0_SX340 +MJRK0_SX430 +MJRK0_SX70 +MJRP0_SI1835 +MJRP0_SI1845 +MJRP0_SI585 +MJRP0_SX135 +MJRP0_SX225 +MJRP0_SX315 +MJRP0_SX405 +MJRP0_SX45 +MJSR0_SI1424 +MJSR0_SI2054 +MJSR0_SI794 +MJSR0_SX164 +MJSR0_SX254 +MJSR0_SX344 +MJSR0_SX434 +MJSR0_SX74 +MJWG0_SI2155 +MJWG0_SI813 +MJWG0_SI895 +MJWG0_SX175 +MJWG0_SX265 +MJWG0_SX355 +MJWG0_SX445 +MJWG0_SX85 +MJWS0_SI1143 +MJWS0_SI1773 +MJWS0_SI513 +MJWS0_SX153 +MJWS0_SX243 +MJWS0_SX333 +MJWS0_SX423 +MJWS0_SX63 +MJWT0_SI1291 +MJWT0_SI1381 +MJWT0_SI751 +MJWT0_SX121 +MJWT0_SX211 +MJWT0_SX301 +MJWT0_SX31 +MJWT0_SX391 +MJXA0_SI1507 +MJXA0_SI2137 +MJXA0_SI877 +MJXA0_SX157 +MJXA0_SX247 +MJXA0_SX337 +MJXA0_SX427 +MJXA0_SX67 +MJXL0_SI1172 +MJXL0_SI1795 +MJXL0_SI542 +MJXL0_SX182 +MJXL0_SX272 +MJXL0_SX362 +MJXL0_SX452 +MJXL0_SX92 +MKAG0_SI1609 +MKAG0_SI2239 +MKAG0_SI979 +MKAG0_SX169 +MKAG0_SX259 +MKAG0_SX30 +MKAG0_SX439 +MKAG0_SX79 +MKAH0_SI1528 +MKAH0_SI2158 +MKAH0_SI898 +MKAH0_SX178 +MKAH0_SX268 +MKAH0_SX358 +MKAH0_SX448 +MKAH0_SX88 +MKAJ0_SI1414 +MKAJ0_SI2044 +MKAJ0_SI784 +MKAJ0_SX154 +MKAJ0_SX244 +MKAJ0_SX334 +MKAJ0_SX424 +MKAJ0_SX64 +MKAM0_SI1250 +MKAM0_SI1316 +MKAM0_SI1465 +MKAM0_SX146 +MKAM0_SX236 +MKAM0_SX326 +MKAM0_SX416 +MKAM0_SX56 +MKDB0_SI2132 +MKDB0_SI588 +MKDB0_SI872 +MKDB0_SX152 +MKDB0_SX242 +MKDB0_SX332 +MKDB0_SX422 +MKDB0_SX62 +MKDD0_SI1567 +MKDD0_SI2197 +MKDD0_SI937 +MKDD0_SX127 +MKDD0_SX217 +MKDD0_SX307 +MKDD0_SX37 +MKDD0_SX397 +MKDT0_SI2153 +MKDT0_SI814 +MKDT0_SI893 +MKDT0_SX173 +MKDT0_SX263 +MKDT0_SX353 +MKDT0_SX443 +MKDT0_SX83 +MKES0_SI1253 +MKES0_SI1883 +MKES0_SI623 +MKES0_SX173 +MKES0_SX263 +MKES0_SX353 +MKES0_SX443 +MKES0_SX83 +MKJO0_SI1517 +MKJO0_SI2147 +MKJO0_SI887 +MKJO0_SX167 +MKJO0_SX257 +MKJO0_SX424 +MKJO0_SX437 +MKJO0_SX77 +MKLN0_SI1598 +MKLN0_SI2228 +MKLN0_SI968 +MKLN0_SX158 +MKLN0_SX248 +MKLN0_SX338 +MKLN0_SX428 +MKLN0_SX68 +MKLR0_SI1059 +MKLR0_SI1689 +MKLR0_SI2319 +MKLR0_SX159 +MKLR0_SX249 +MKLR0_SX339 +MKLR0_SX429 +MKLR0_SX69 +MKLS0_SI1437 +MKLS0_SI1533 +MKLS0_SI2067 +MKLS0_SX177 +MKLS0_SX267 +MKLS0_SX357 +MKLS0_SX447 +MKLS0_SX87 +MKLS1_SI1545 +MKLS1_SI2175 +MKLS1_SI915 +MKLS1_SX105 +MKLS1_SX15 +MKLS1_SX195 +MKLS1_SX285 +MKLS1_SX375 +MKLW0_SI1571 +MKLW0_SI1844 +MKLW0_SI2201 +MKLW0_SX131 +MKLW0_SX221 +MKLW0_SX311 +MKLW0_SX401 +MKLW0_SX41 +MKRG0_SI1491 +MKRG0_SI2121 +MKRG0_SI861 +MKRG0_SX141 +MKRG0_SX231 +MKRG0_SX31 +MKRG0_SX411 +MKRG0_SX51 +MKXL0_SI1185 +MKXL0_SI1815 +MKXL0_SI1958 +MKXL0_SX105 +MKXL0_SX15 +MKXL0_SX195 +MKXL0_SX285 +MKXL0_SX375 +MLBC0_SI1239 +MLBC0_SI1869 +MLBC0_SI609 +MLBC0_SX159 +MLBC0_SX249 +MLBC0_SX339 +MLBC0_SX429 +MLBC0_SX69 +MLEL0_SI1246 +MLEL0_SI1876 +MLEL0_SI616 +MLEL0_SX166 +MLEL0_SX256 +MLEL0_SX346 +MLEL0_SX436 +MLEL0_SX76 +MLJC0_SI1225 +MLJC0_SI1855 +MLJC0_SI595 +MLJC0_SX145 +MLJC0_SX235 +MLJC0_SX325 +MLJC0_SX415 +MLJC0_SX55 +MLJH0_SI1324 +MLJH0_SI1422 +MLJH0_SI694 +MLJH0_SX154 +MLJH0_SX244 +MLJH0_SX334 +MLJH0_SX424 +MLJH0_SX64 +MLNS0_SI1407 +MLNS0_SI2037 +MLNS0_SI777 +MLNS0_SX147 +MLNS0_SX237 +MLNS0_SX327 +MLNS0_SX417 +MLNS0_SX57 +MLSH0_SI1417 +MLSH0_SI2047 +MLSH0_SI787 +MLSH0_SX157 +MLSH0_SX247 +MLSH0_SX337 +MLSH0_SX427 +MLSH0_SX67 +MMAA0_SI1588 +MMAA0_SI2105 +MMAA0_SI845 +MMAA0_SX125 +MMAA0_SX215 +MMAA0_SX305 +MMAA0_SX35 +MMAA0_SX395 +MMAB1_SI1494 +MMAB1_SI2124 +MMAB1_SI864 +MMAB1_SX144 +MMAB1_SX234 +MMAB1_SX324 +MMAB1_SX414 +MMAB1_SX54 +MMAG0_SI1126 +MMAG0_SI1756 +MMAG0_SI496 +MMAG0_SX136 +MMAG0_SX226 +MMAG0_SX316 +MMAG0_SX406 +MMAG0_SX46 +MMAM0_SI1597 +MMAM0_SI1668 +MMAM0_SI2227 +MMAM0_SX157 +MMAM0_SX247 +MMAM0_SX337 +MMAM0_SX427 +MMAM0_SX67 +MMAR0_SI1336 +MMAR0_SI1966 +MMAR0_SI706 +MMAR0_SX166 +MMAR0_SX256 +MMAR0_SX346 +MMAR0_SX436 +MMAR0_SX76 +MMBS0_SI1151 +MMBS0_SI1781 +MMBS0_SI521 +MMBS0_SX161 +MMBS0_SX251 +MMBS0_SX341 +MMBS0_SX431 +MMBS0_SX71 +MMCC0_SI1338 +MMCC0_SI1968 +MMCC0_SI708 +MMCC0_SX168 +MMCC0_SX258 +MMCC0_SX348 +MMCC0_SX438 +MMCC0_SX78 +MMDB0_SI1358 +MMDB0_SI1617 +MMDB0_SI987 +MMDB0_SX177 +MMDB0_SX267 +MMDB0_SX357 +MMDB0_SX447 +MMDB0_SX87 +MMDG0_SI1780 +MMDG0_SI2035 +MMDG0_SI520 +MMDG0_SX160 +MMDG0_SX250 +MMDG0_SX340 +MMDG0_SX430 +MMDG0_SX70 +MMDM0_SI1311 +MMDM0_SI1941 +MMDM0_SI681 +MMDM0_SX141 +MMDM0_SX231 +MMDM0_SX321 +MMDM0_SX411 +MMDM0_SX51 +MMDM1_SI1650 +MMDM1_SI2043 +MMDM1_SI783 +MMDM1_SX153 +MMDM1_SX243 +MMDM1_SX333 +MMDM1_SX423 +MMDM1_SX63 +MMDS0_SI1343 +MMDS0_SI1973 +MMDS0_SI713 +MMDS0_SX173 +MMDS0_SX263 +MMDS0_SX353 +MMDS0_SX443 +MMDS0_SX83 +MMEA0_SI1388 +MMEA0_SI2018 +MMEA0_SI758 +MMEA0_SX128 +MMEA0_SX218 +MMEA0_SX308 +MMEA0_SX38 +MMEA0_SX398 +MMEB0_SI1357 +MMEB0_SI1987 +MMEB0_SI727 +MMEB0_SX187 +MMEB0_SX327 +MMEB0_SX367 +MMEB0_SX7 +MMEB0_SX97 +MMGC0_SI1305 +MMGC0_SI1935 +MMGC0_SI2184 +MMGC0_SX135 +MMGC0_SX225 +MMGC0_SX315 +MMGC0_SX405 +MMGC0_SX45 +MMGG0_SI1079 +MMGG0_SI1709 +MMGG0_SI2339 +MMGG0_SX179 +MMGG0_SX269 +MMGG0_SX359 +MMGG0_SX449 +MMGG0_SX89 +MMGK0_SI1322 +MMGK0_SI1952 +MMGK0_SI692 +MMGK0_SX152 +MMGK0_SX242 +MMGK0_SX332 +MMGK0_SX422 +MMGK0_SX62 +MMJB1_SI1408 +MMJB1_SI2038 +MMJB1_SI778 +MMJB1_SX148 +MMJB1_SX238 +MMJB1_SX328 +MMJB1_SX418 +MMJB1_SX58 +MMLM0_SI1527 +MMLM0_SI2150 +MMLM0_SI897 +MMLM0_SX177 +MMLM0_SX267 +MMLM0_SX357 +MMLM0_SX447 +MMLM0_SX87 +MMPM0_SI1061 +MMPM0_SI1691 +MMPM0_SI2321 +MMPM0_SX161 +MMPM0_SX251 +MMPM0_SX341 +MMPM0_SX431 +MMPM0_SX71 +MMRP0_SI2034 +MMRP0_SI717 +MMRP0_SI774 +MMRP0_SX144 +MMRP0_SX234 +MMRP0_SX324 +MMRP0_SX414 +MMRP0_SX54 +MMSM0_SI1106 +MMSM0_SI1736 +MMSM0_SI476 +MMSM0_SX116 +MMSM0_SX206 +MMSM0_SX26 +MMSM0_SX296 +MMSM0_SX386 +MMVP0_SI1284 +MMVP0_SI1914 +MMVP0_SI654 +MMVP0_SX114 +MMVP0_SX204 +MMVP0_SX294 +MMVP0_SX347 +MMVP0_SX384 +MMWB0_SI1619 +MMWB0_SI2249 +MMWB0_SI989 +MMWB0_SX179 +MMWB0_SX269 +MMWB0_SX359 +MMWB0_SX449 +MMWB0_SX89 +MMWS0_SI1518 +MMWS0_SI559 +MMWS0_SI888 +MMWS0_SX168 +MMWS0_SX258 +MMWS0_SX348 +MMWS0_SX438 +MMWS0_SX78 +MMWS1_SI1071 +MMWS1_SI1701 +MMWS1_SI2331 +MMWS1_SX261 +MMWS1_SX27 +MMWS1_SX351 +MMWS1_SX441 +MMWS1_SX81 +MMXS0_SI2136 +MMXS0_SI629 +MMXS0_SI876 +MMXS0_SX156 +MMXS0_SX246 +MMXS0_SX336 +MMXS0_SX426 +MMXS0_SX66 +MNET0_SI1446 +MNET0_SI2076 +MNET0_SI816 +MNET0_SX186 +MNET0_SX276 +MNET0_SX366 +MNET0_SX6 +MNET0_SX96 +MNTW0_SI1068 +MNTW0_SI1698 +MNTW0_SI2328 +MNTW0_SX168 +MNTW0_SX202 +MNTW0_SX258 +MNTW0_SX348 +MNTW0_SX78 +MPAR0_SI1576 +MPAR0_SI2206 +MPAR0_SI946 +MPAR0_SX136 +MPAR0_SX226 +MPAR0_SX316 +MPAR0_SX406 +MPAR0_SX46 +MPEB0_SI1034 +MPEB0_SI1860 +MPEB0_SI600 +MPEB0_SX150 +MPEB0_SX240 +MPEB0_SX330 +MPEB0_SX420 +MPEB0_SX60 +MPFU0_SI1258 +MPFU0_SI1888 +MPFU0_SI628 +MPFU0_SX178 +MPFU0_SX268 +MPFU0_SX358 +MPFU0_SX448 +MPFU0_SX88 +MPGH0_SI1554 +MPGH0_SI675 +MPGH0_SI924 +MPGH0_SX114 +MPGH0_SX204 +MPGH0_SX24 +MPGH0_SX294 +MPGH0_SX384 +MPGR0_SI1410 +MPGR0_SI2040 +MPGR0_SI780 +MPGR0_SX150 +MPGR0_SX240 +MPGR0_SX330 +MPGR0_SX420 +MPGR0_SX60 +MPGR1_SI1269 +MPGR1_SI1499 +MPGR1_SI2129 +MPGR1_SX149 +MPGR1_SX239 +MPGR1_SX329 +MPGR1_SX419 +MPGR1_SX59 +MPMB0_SI1501 +MPMB0_SI2131 +MPMB0_SI871 +MPMB0_SX151 +MPMB0_SX241 +MPMB0_SX331 +MPMB0_SX421 +MPMB0_SX61 +MPPC0_SI1412 +MPPC0_SI2042 +MPPC0_SI782 +MPPC0_SX152 +MPPC0_SX242 +MPPC0_SX332 +MPPC0_SX422 +MPPC0_SX62 +MPRB0_SI1205 +MPRB0_SI1215 +MPRB0_SI575 +MPRB0_SX125 +MPRB0_SX215 +MPRB0_SX305 +MPRB0_SX35 +MPRB0_SX395 +MPRD0_SI1431 +MPRD0_SI2061 +MPRD0_SI801 +MPRD0_SX171 +MPRD0_SX261 +MPRD0_SX351 +MPRD0_SX441 +MPRD0_SX81 +MPRK0_SI1097 +MPRK0_SI1727 +MPRK0_SI467 +MPRK0_SX107 +MPRK0_SX17 +MPRK0_SX197 +MPRK0_SX287 +MPRK0_SX377 +MPRT0_SI1210 +MPRT0_SI495 +MPRT0_SI580 +MPRT0_SX130 +MPRT0_SX220 +MPRT0_SX310 +MPRT0_SX40 +MPRT0_SX400 +MPSW0_SI1067 +MPSW0_SI1697 +MPSW0_SI2327 +MPSW0_SX167 +MPSW0_SX24 +MPSW0_SX257 +MPSW0_SX437 +MPSW0_SX77 +MRAB0_SI1224 +MRAB0_SI1854 +MRAB0_SI594 +MRAB0_SX144 +MRAB0_SX234 +MRAB0_SX324 +MRAB0_SX414 +MRAB0_SX54 +MRAB1_SI1478 +MRAB1_SI2108 +MRAB1_SI848 +MRAB1_SX128 +MRAB1_SX218 +MRAB1_SX308 +MRAB1_SX38 +MRAB1_SX398 +MRAI0_SI1954 +MRAI0_SI2052 +MRAI0_SI792 +MRAI0_SX162 +MRAI0_SX252 +MRAI0_SX342 +MRAI0_SX432 +MRAI0_SX72 +MRAM0_SI1275 +MRAM0_SI1905 +MRAM0_SI1951 +MRAM0_SX105 +MRAM0_SX15 +MRAM0_SX195 +MRAM0_SX285 +MRAM0_SX375 +MRAV0_SI1008 +MRAV0_SI1638 +MRAV0_SI2268 +MRAV0_SX108 +MRAV0_SX18 +MRAV0_SX198 +MRAV0_SX288 +MRAV0_SX378 +MRBC0_SI1665 +MRBC0_SI1859 +MRBC0_SI599 +MRBC0_SX149 +MRBC0_SX239 +MRBC0_SX329 +MRBC0_SX419 +MRBC0_SX59 +MRCG0_SI1428 +MRCG0_SI2058 +MRCG0_SI798 +MRCG0_SX168 +MRCG0_SX258 +MRCG0_SX348 +MRCG0_SX438 +MRCG0_SX78 +MRCW0_SI1371 +MRCW0_SI2001 +MRCW0_SI741 +MRCW0_SX111 +MRCW0_SX201 +MRCW0_SX21 +MRCW0_SX291 +MRCW0_SX381 +MRDD0_SI1050 +MRDD0_SI1680 +MRDD0_SI2310 +MRDD0_SX150 +MRDD0_SX240 +MRDD0_SX277 +MRDD0_SX330 +MRDD0_SX60 +MRDM0_SI1044 +MRDM0_SI1595 +MRDM0_SI965 +MRDM0_SX155 +MRDM0_SX245 +MRDM0_SX335 +MRDM0_SX425 +MRDM0_SX65 +MRDS0_SI1167 +MRDS0_SI1797 +MRDS0_SI537 +MRDS0_SX177 +MRDS0_SX267 +MRDS0_SX357 +MRDS0_SX447 +MRDS0_SX87 +MREE0_SI1104 +MREE0_SI1734 +MREE0_SI1959 +MREE0_SX114 +MREE0_SX204 +MREE0_SX24 +MREE0_SX294 +MREE0_SX384 +MREH1_SI1599 +MREH1_SI2229 +MREH1_SI969 +MREH1_SX159 +MREH1_SX249 +MREH1_SX339 +MREH1_SX429 +MREH1_SX69 +MREM0_SI1591 +MREM0_SI511 +MREM0_SI961 +MREM0_SX151 +MREM0_SX241 +MREM0_SX331 +MREM0_SX421 +MREM0_SX61 +MREW1_SI1500 +MREW1_SI2130 +MREW1_SI870 +MREW1_SX150 +MREW1_SX240 +MREW1_SX330 +MREW1_SX420 +MREW1_SX60 +MRFK0_SI1076 +MRFK0_SI1706 +MRFK0_SI2336 +MRFK0_SX176 +MRFK0_SX266 +MRFK0_SX356 +MRFK0_SX446 +MRFK0_SX86 +MRFL0_SI1156 +MRFL0_SI1786 +MRFL0_SI526 +MRFL0_SX166 +MRFL0_SX256 +MRFL0_SX346 +MRFL0_SX436 +MRFL0_SX76 +MRGM0_SI1162 +MRGM0_SI1792 +MRGM0_SI532 +MRGM0_SX172 +MRGM0_SX262 +MRGM0_SX416 +MRGM0_SX442 +MRGM0_SX82 +MRGS0_SI1356 +MRGS0_SI1986 +MRGS0_SI726 +MRGS0_SX186 +MRGS0_SX276 +MRGS0_SX366 +MRGS0_SX6 +MRGS0_SX96 +MRHL0_SI1515 +MRHL0_SI2145 +MRHL0_SI885 +MRHL0_SX165 +MRHL0_SX255 +MRHL0_SX345 +MRHL0_SX435 +MRHL0_SX75 +MRJB1_SI1020 +MRJB1_SI1413 +MRJB1_SI2021 +MRJB1_SX120 +MRJB1_SX210 +MRJB1_SX30 +MRJB1_SX300 +MRJB1_SX390 +MRJH0_SI1519 +MRJH0_SI889 +MRJH0_SI914 +MRJH0_SX169 +MRJH0_SX259 +MRJH0_SX307 +MRJH0_SX439 +MRJH0_SX79 +MRJM0_SI1095 +MRJM0_SI1228 +MRJM0_SI1858 +MRJM0_SX148 +MRJM0_SX238 +MRJM0_SX328 +MRJM0_SX418 +MRJM0_SX58 +MRJM1_SI1298 +MRJM1_SI1928 +MRJM1_SI668 +MRJM1_SX128 +MRJM1_SX218 +MRJM1_SX308 +MRJM1_SX38 +MRJM1_SX398 +MRJT0_SI1498 +MRJT0_SI1805 +MRJT0_SI868 +MRJT0_SX148 +MRJT0_SX238 +MRJT0_SX328 +MRJT0_SX418 +MRJT0_SX58 +MRKM0_SI1267 +MRKM0_SI1391 +MRKM0_SI637 +MRKM0_SX187 +MRKM0_SX277 +MRKM0_SX367 +MRKM0_SX7 +MRKM0_SX97 +MRLD0_SI1594 +MRLD0_SI2224 +MRLD0_SI964 +MRLD0_SX154 +MRLD0_SX244 +MRLD0_SX334 +MRLD0_SX424 +MRLD0_SX64 +MRLJ0_SI1420 +MRLJ0_SI2050 +MRLJ0_SI790 +MRLJ0_SX160 +MRLJ0_SX250 +MRLJ0_SX340 +MRLJ0_SX430 +MRLJ0_SX70 +MRLJ1_SI1671 +MRLJ1_SI2301 +MRLJ1_SI2332 +MRLJ1_SX141 +MRLJ1_SX231 +MRLJ1_SX321 +MRLJ1_SX411 +MRLJ1_SX51 +MRLK0_SI1468 +MRLK0_SI2140 +MRLK0_SI843 +MRLK0_SX123 +MRLK0_SX213 +MRLK0_SX303 +MRLK0_SX33 +MRLK0_SX393 +MRLR0_SI1196 +MRLR0_SI1826 +MRLR0_SI566 +MRLR0_SX116 +MRLR0_SX206 +MRLR0_SX26 +MRLR0_SX296 +MRLR0_SX386 +MRMB0_SI1581 +MRMB0_SI2211 +MRMB0_SI951 +MRMB0_SX141 +MRMB0_SX231 +MRMB0_SX321 +MRMB0_SX411 +MRMB0_SX51 +MRMG0_SI1080 +MRMG0_SI1710 +MRMG0_SI2340 +MRMG0_SX180 +MRMG0_SX270 +MRMG0_SX360 +MRMG0_SX450 +MRMG0_SX90 +MRMH0_SI1021 +MRMH0_SI1349 +MRMH0_SI2281 +MRMH0_SX121 +MRMH0_SX211 +MRMH0_SX301 +MRMH0_SX31 +MRMH0_SX391 +MRML0_SI1421 +MRML0_SI2051 +MRML0_SI791 +MRML0_SX161 +MRML0_SX251 +MRML0_SX341 +MRML0_SX431 +MRML0_SX71 +MRMS0_SI1113 +MRMS0_SI2057 +MRMS0_SI2100 +MRMS0_SX120 +MRMS0_SX210 +MRMS0_SX30 +MRMS0_SX300 +MRMS0_SX390 +MRPC1_SI1482 +MRPC1_SI2026 +MRPC1_SI2112 +MRPC1_SX132 +MRPC1_SX222 +MRPC1_SX312 +MRPC1_SX402 +MRPC1_SX42 +MRRE0_SI1334 +MRRE0_SI704 +MRRE0_SI952 +MRRE0_SX164 +MRRE0_SX254 +MRRE0_SX344 +MRRE0_SX434 +MRRE0_SX74 +MRSO0_SI1206 +MRSO0_SI1659 +MRSO0_SI2289 +MRSO0_SX129 +MRSO0_SX219 +MRSO0_SX309 +MRSO0_SX39 +MRSO0_SX399 +MRSP0_SI1429 +MRSP0_SI2059 +MRSP0_SI799 +MRSP0_SX169 +MRSP0_SX196 +MRSP0_SX259 +MRSP0_SX439 +MRSP0_SX79 +MRTC0_SI1458 +MRTC0_SI2088 +MRTC0_SI828 +MRTC0_SX108 +MRTC0_SX18 +MRTC0_SX198 +MRTC0_SX288 +MRTC0_SX378 +MRTJ0_SI1551 +MRTJ0_SI2032 +MRTJ0_SI772 +MRTJ0_SX142 +MRTJ0_SX232 +MRTJ0_SX322 +MRTJ0_SX412 +MRTJ0_SX52 +MRVG0_SI1140 +MRVG0_SI1770 +MRVG0_SI510 +MRVG0_SX150 +MRVG0_SX240 +MRVG0_SX330 +MRVG0_SX420 +MRVG0_SX60 +MRWA0_SI1603 +MRWA0_SI2233 +MRWA0_SI973 +MRWA0_SX163 +MRWA0_SX253 +MRWA0_SX343 +MRWA0_SX433 +MRWA0_SX73 +MRWS0_SI1102 +MRWS0_SI1732 +MRWS0_SI472 +MRWS0_SX112 +MRWS0_SX202 +MRWS0_SX22 +MRWS0_SX292 +MRWS0_SX382 +MRXB0_SI1585 +MRXB0_SI2215 +MRXB0_SI955 +MRXB0_SX145 +MRXB0_SX235 +MRXB0_SX325 +MRXB0_SX415 +MRXB0_SX55 +MSAH1_SI1049 +MSAH1_SI1679 +MSAH1_SI2309 +MSAH1_SX149 +MSAH1_SX239 +MSAH1_SX329 +MSAH1_SX419 +MSAH1_SX59 +MSAS0_SI1376 +MSAS0_SI2006 +MSAS0_SI746 +MSAS0_SX116 +MSAS0_SX206 +MSAS0_SX26 +MSAS0_SX296 +MSAS0_SX386 +MSAT0_SI1526 +MSAT0_SI2156 +MSAT0_SI896 +MSAT0_SX176 +MSAT0_SX266 +MSAT0_SX356 +MSAT0_SX446 +MSAT0_SX86 +MSAT1_SI1073 +MSAT1_SI1703 +MSAT1_SI2333 +MSAT1_SX173 +MSAT1_SX263 +MSAT1_SX353 +MSAT1_SX443 +MSAT1_SX83 +MSDB0_SI1007 +MSDB0_SI1637 +MSDB0_SI2267 +MSDB0_SX107 +MSDB0_SX17 +MSDB0_SX197 +MSDB0_SX287 +MSDB0_SX377 +MSDH0_SI2113 +MSDH0_SI2240 +MSDH0_SI980 +MSDH0_SX170 +MSDH0_SX260 +MSDH0_SX350 +MSDH0_SX440 +MSDH0_SX80 +MSDS0_SI1077 +MSDS0_SI1707 +MSDS0_SI2337 +MSDS0_SX177 +MSDS0_SX267 +MSDS0_SX357 +MSDS0_SX447 +MSDS0_SX87 +MSEM1_SI1440 +MSEM1_SI2070 +MSEM1_SI810 +MSEM1_SX180 +MSEM1_SX270 +MSEM1_SX360 +MSEM1_SX450 +MSEM1_SX90 +MSES0_SI1589 +MSES0_SI2216 +MSES0_SI2219 +MSES0_SX149 +MSES0_SX239 +MSES0_SX329 +MSES0_SX419 +MSES0_SX59 +MSFH0_SI1216 +MSFH0_SI1738 +MSFH0_SI586 +MSFH0_SX136 +MSFH0_SX226 +MSFH0_SX316 +MSFH0_SX406 +MSFH0_SX46 +MSFV0_SI1262 +MSFV0_SI1892 +MSFV0_SI632 +MSFV0_SX182 +MSFV0_SX272 +MSFV0_SX362 +MSFV0_SX452 +MSFV0_SX92 +MSJK0_SI1596 +MSJK0_SI2226 +MSJK0_SI966 +MSJK0_SX156 +MSJK0_SX246 +MSJK0_SX336 +MSJK0_SX426 +MSJK0_SX66 +MSMC0_SI1907 +MSMC0_SI509 +MSMC0_SI647 +MSMC0_SX107 +MSMC0_SX17 +MSMC0_SX197 +MSMC0_SX287 +MSMC0_SX377 +MSMR0_SI1150 +MSMR0_SI1405 +MSMR0_SI775 +MSMR0_SX145 +MSMR0_SX235 +MSMR0_SX325 +MSMR0_SX415 +MSMR0_SX55 +MSMS0_SI1433 +MSMS0_SI2063 +MSMS0_SI803 +MSMS0_SX173 +MSMS0_SX263 +MSMS0_SX353 +MSMS0_SX443 +MSMS0_SX83 +MSRG0_SI1221 +MSRG0_SI1851 +MSRG0_SI591 +MSRG0_SX141 +MSRG0_SX231 +MSRG0_SX321 +MSRG0_SX411 +MSRG0_SX51 +MSRR0_SI1131 +MSRR0_SI1761 +MSRR0_SI501 +MSRR0_SX141 +MSRR0_SX231 +MSRR0_SX30 +MSRR0_SX411 +MSRR0_SX51 +MSTF0_SI1396 +MSTF0_SI766 +MSTF0_SI852 +MSTF0_SX136 +MSTF0_SX226 +MSTF0_SX316 +MSTF0_SX406 +MSTF0_SX46 +MSVS0_SI1568 +MSVS0_SI2198 +MSVS0_SI938 +MSVS0_SX128 +MSVS0_SX218 +MSVS0_SX308 +MSVS0_SX38 +MSVS0_SX398 +MTAB0_SI1572 +MTAB0_SI2202 +MTAB0_SI942 +MTAB0_SX132 +MTAB0_SX222 +MTAB0_SX312 +MTAB0_SX402 +MTAB0_SX42 +MTAS0_SI1385 +MTAS0_SI2015 +MTAS0_SI755 +MTAS0_SX125 +MTAS0_SX215 +MTAS0_SX305 +MTAS0_SX35 +MTAS0_SX395 +MTAT0_SI1110 +MTAT0_SI1740 +MTAT0_SI811 +MTAT0_SX120 +MTAT0_SX210 +MTAT0_SX30 +MTAT0_SX300 +MTAT0_SX390 +MTAT1_SI1409 +MTAT1_SI1627 +MTAT1_SI779 +MTAT1_SX149 +MTAT1_SX239 +MTAT1_SX329 +MTAT1_SX419 +MTAT1_SX59 +MTBC0_SI1173 +MTBC0_SI1803 +MTBC0_SI543 +MTBC0_SX183 +MTBC0_SX273 +MTBC0_SX347 +MTBC0_SX363 +MTBC0_SX93 +MTCS0_SI1972 +MTCS0_SI2265 +MTCS0_SI712 +MTCS0_SX172 +MTCS0_SX262 +MTCS0_SX352 +MTCS0_SX442 +MTCS0_SX82 +MTDB0_SI1401 +MTDB0_SI2031 +MTDB0_SI771 +MTDB0_SX141 +MTDB0_SX231 +MTDB0_SX321 +MTDB0_SX411 +MTDB0_SX51 +MTDP0_SI1274 +MTDP0_SI1521 +MTDP0_SI2151 +MTDP0_SX171 +MTDP0_SX261 +MTDP0_SX351 +MTDP0_SX441 +MTDP0_SX81 +MTER0_SI1157 +MTER0_SI1787 +MTER0_SI527 +MTER0_SX167 +MTER0_SX17 +MTER0_SX257 +MTER0_SX437 +MTER0_SX77 +MTJG0_SI1520 +MTJG0_SI2157 +MTJG0_SI890 +MTJG0_SX170 +MTJG0_SX260 +MTJG0_SX350 +MTJG0_SX440 +MTJG0_SX80 +MTJM0_SI1226 +MTJM0_SI1856 +MTJM0_SI655 +MTJM0_SX146 +MTJM0_SX236 +MTJM0_SX326 +MTJM0_SX416 +MTJM0_SX56 +MTJS0_SI1192 +MTJS0_SI1822 +MTJS0_SI562 +MTJS0_SX112 +MTJS0_SX202 +MTJS0_SX22 +MTJS0_SX292 +MTJS0_SX382 +MTJU0_SI2020 +MTJU0_SI2269 +MTJU0_SI760 +MTJU0_SX130 +MTJU0_SX220 +MTJU0_SX310 +MTJU0_SX40 +MTJU0_SX400 +MTKD0_SI1187 +MTKD0_SI1817 +MTKD0_SI630 +MTKD0_SX107 +MTKD0_SX17 +MTKD0_SX197 +MTKD0_SX287 +MTKD0_SX377 +MTKP0_SI1023 +MTKP0_SI2283 +MTKP0_SI454 +MTKP0_SX123 +MTKP0_SX213 +MTKP0_SX303 +MTKP0_SX33 +MTKP0_SX393 +MTLB0_SI1134 +MTLB0_SI1764 +MTLB0_SI504 +MTLB0_SX144 +MTLB0_SX234 +MTLB0_SX324 +MTLB0_SX414 +MTLB0_SX54 +MTLC0_SI1313 +MTLC0_SI1477 +MTLC0_SI847 +MTLC0_SX127 +MTLC0_SX217 +MTLC0_SX307 +MTLC0_SX37 +MTLC0_SX397 +MTML0_SI1065 +MTML0_SI1695 +MTML0_SI2325 +MTML0_SX165 +MTML0_SX255 +MTML0_SX345 +MTML0_SX435 +MTML0_SX75 +MTMN0_SI1064 +MTMN0_SI2324 +MTMN0_SI582 +MTMN0_SX164 +MTMN0_SX254 +MTMN0_SX344 +MTMN0_SX434 +MTMN0_SX74 +MTMT0_SI1118 +MTMT0_SI1748 +MTMT0_SI488 +MTMT0_SX128 +MTMT0_SX218 +MTMT0_SX308 +MTMT0_SX38 +MTMT0_SX398 +MTPF0_SI1235 +MTPF0_SI1865 +MTPF0_SI605 +MTPF0_SX155 +MTPF0_SX245 +MTPF0_SX335 +MTPF0_SX425 +MTPF0_SX65 +MTPG0_SI1383 +MTPG0_SI2013 +MTPG0_SI753 +MTPG0_SX123 +MTPG0_SX213 +MTPG0_SX303 +MTPG0_SX33 +MTPG0_SX393 +MTPP0_SI1508 +MTPP0_SI2138 +MTPP0_SI878 +MTPP0_SX158 +MTPP0_SX248 +MTPP0_SX338 +MTPP0_SX428 +MTPP0_SX68 +MTPR0_SI1600 +MTPR0_SI2230 +MTPR0_SI506 +MTPR0_SX160 +MTPR0_SX250 +MTPR0_SX340 +MTPR0_SX430 +MTPR0_SX70 +MTQC0_SI1441 +MTQC0_SI2071 +MTQC0_SI480 +MTQC0_SX181 +MTQC0_SX271 +MTQC0_SX361 +MTQC0_SX451 +MTQC0_SX91 +MTRC0_SI1623 +MTRC0_SI589 +MTRC0_SI993 +MTRC0_SX170 +MTRC0_SX183 +MTRC0_SX273 +MTRC0_SX363 +MTRC0_SX93 +MTRR0_SI1548 +MTRR0_SI2178 +MTRR0_SI918 +MTRR0_SX108 +MTRR0_SX18 +MTRR0_SX198 +MTRR0_SX288 +MTRR0_SX378 +MTRT0_SI1227 +MTRT0_SI1857 +MTRT0_SI597 +MTRT0_SX147 +MTRT0_SX237 +MTRT0_SX254 +MTRT0_SX417 +MTRT0_SX57 +MTWH1_SI1512 +MTWH1_SI2142 +MTWH1_SI882 +MTWH1_SX162 +MTWH1_SX252 +MTWH1_SX342 +MTWH1_SX432 +MTWH1_SX72 +MTXS0_SI1060 +MTXS0_SI1690 +MTXS0_SI2320 +MTXS0_SX160 +MTXS0_SX250 +MTXS0_SX340 +MTXS0_SX430 +MTXS0_SX70 +MVJH0_SI1556 +MVJH0_SI2186 +MVJH0_SI926 +MVJH0_SX116 +MVJH0_SX206 +MVJH0_SX26 +MVJH0_SX296 +MVJH0_SX386 +MVLO0_SI1147 +MVLO0_SI1777 +MVLO0_SI517 +MVLO0_SX157 +MVLO0_SX247 +MVLO0_SX337 +MVLO0_SX427 +MVLO0_SX67 +MVRW0_SI1485 +MVRW0_SI2115 +MVRW0_SI855 +MVRW0_SX135 +MVRW0_SX225 +MVRW0_SX315 +MVRW0_SX405 +MVRW0_SX45 +MWAC0_SI1601 +MWAC0_SI2231 +MWAC0_SI971 +MWAC0_SX161 +MWAC0_SX251 +MWAC0_SX341 +MWAC0_SX431 +MWAC0_SX71 +MWAD0_SI1062 +MWAD0_SI1749 +MWAD0_SI2322 +MWAD0_SX162 +MWAD0_SX252 +MWAD0_SX342 +MWAD0_SX432 +MWAD0_SX72 +MWAR0_SI1045 +MWAR0_SI1675 +MWAR0_SI2305 +MWAR0_SX145 +MWAR0_SX235 +MWAR0_SX325 +MWAR0_SX415 +MWAR0_SX55 +MWCH0_SI1622 +MWCH0_SI1895 +MWCH0_SI2252 +MWCH0_SX182 +MWCH0_SX272 +MWCH0_SX362 +MWCH0_SX452 +MWCH0_SX92 +MWDK0_SI1436 +MWDK0_SI2017 +MWDK0_SI806 +MWDK0_SX176 +MWDK0_SX266 +MWDK0_SX356 +MWDK0_SX446 +MWDK0_SX86 +MWEM0_SI1320 +MWEM0_SI1393 +MWEM0_SI1950 +MWEM0_SX150 +MWEM0_SX240 +MWEM0_SX330 +MWEM0_SX420 +MWEM0_SX60 +MWGR0_SI1606 +MWGR0_SI2236 +MWGR0_SI976 +MWGR0_SX166 +MWGR0_SX256 +MWGR0_SX346 +MWGR0_SX436 +MWGR0_SX76 +MWRE0_SI1057 +MWRE0_SI1687 +MWRE0_SI2317 +MWRE0_SX157 +MWRE0_SX247 +MWRE0_SX337 +MWRE0_SX427 +MWRE0_SX67 +MWRP0_SI1443 +MWRP0_SI1525 +MWRP0_SI2073 +MWRP0_SX183 +MWRP0_SX273 +MWRP0_SX3 +MWRP0_SX363 +MWRP0_SX93 +MWSB0_SI1626 +MWSB0_SI2256 +MWSB0_SI996 +MWSB0_SX186 +MWSB0_SX276 +MWSB0_SX366 +MWSB0_SX6 +MWSB0_SX96 +MWSH0_SI1426 +MWSH0_SI2266 +MWSH0_SI796 +MWSH0_SX166 +MWSH0_SX256 +MWSH0_SX346 +MWSH0_SX436 +MWSH0_SX76 +MZMB0_SI1166 +MZMB0_SI1796 +MZMB0_SI536 +MZMB0_SX176 +MZMB0_SX266 +MZMB0_SX356 +MZMB0_SX446 +MZMB0_SX86 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid new file mode 100644 index 0000000..c39fd0b --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid @@ -0,0 +1,3696 @@ +FAEM0_SI1392 +FAEM0_SI2022 +FAEM0_SI762 +FAEM0_SX132 +FAEM0_SX222 +FAEM0_SX312 +FAEM0_SX402 +FAEM0_SX42 +FAJW0_SI1263 +FAJW0_SI1893 +FAJW0_SI633 +FAJW0_SX183 +FAJW0_SX273 +FAJW0_SX3 +FAJW0_SX363 +FAJW0_SX93 +FALK0_SI1086 +FALK0_SI456 +FALK0_SI658 +FALK0_SX186 +FALK0_SX276 +FALK0_SX366 +FALK0_SX6 +FALK0_SX96 +FALR0_SI1325 +FALR0_SI1955 +FALR0_SI695 +FALR0_SX155 +FALR0_SX245 +FALR0_SX335 +FALR0_SX425 +FALR0_SX65 +FAPB0_SI1063 +FAPB0_SI1693 +FAPB0_SI2323 +FAPB0_SX163 +FAPB0_SX253 +FAPB0_SX343 +FAPB0_SX433 +FAPB0_SX73 +FBAS0_SI1387 +FBAS0_SI1472 +FBAS0_SI2066 +FBAS0_SX127 +FBAS0_SX217 +FBAS0_SX307 +FBAS0_SX37 +FBAS0_SX397 +FBCG1_SI1612 +FBCG1_SI2242 +FBCG1_SI982 +FBCG1_SX172 +FBCG1_SX262 +FBCG1_SX352 +FBCG1_SX442 +FBCG1_SX82 +FBCH0_SI1586 +FBCH0_SI956 +FBCH0_SI959 +FBCH0_SX146 +FBCH0_SX236 +FBCH0_SX326 +FBCH0_SX416 +FBCH0_SX56 +FBJL0_SI1552 +FBJL0_SI2182 +FBJL0_SI922 +FBJL0_SX112 +FBJL0_SX202 +FBJL0_SX22 +FBJL0_SX292 +FBJL0_SX382 +FBLV0_SI1058 +FBLV0_SI1688 +FBLV0_SI2318 +FBLV0_SX158 +FBLV0_SX248 +FBLV0_SX338 +FBLV0_SX428 +FBLV0_SX68 +FBMH0_SI1136 +FBMH0_SI1766 +FBMH0_SI970 +FBMH0_SX146 +FBMH0_SX236 +FBMH0_SX326 +FBMH0_SX416 +FBMH0_SX56 +FBMJ0_SI1776 +FBMJ0_SI516 +FBMJ0_SI815 +FBMJ0_SX156 +FBMJ0_SX246 +FBMJ0_SX336 +FBMJ0_SX426 +FBMJ0_SX66 +FCAG0_SI1503 +FCAG0_SI1641 +FCAG0_SI2133 +FCAG0_SX153 +FCAG0_SX243 +FCAG0_SX333 +FCAG0_SX423 +FCAG0_SX63 +FCAJ0_SI1479 +FCAJ0_SI1804 +FCAJ0_SI849 +FCAJ0_SX129 +FCAJ0_SX219 +FCAJ0_SX309 +FCAJ0_SX39 +FCAJ0_SX399 +FCDR1_SI1186 +FCDR1_SI1816 +FCDR1_SI556 +FCDR1_SX106 +FCDR1_SX16 +FCDR1_SX196 +FCDR1_SX286 +FCDR1_SX376 +FCEG0_SI1248 +FCEG0_SI1878 +FCEG0_SI618 +FCEG0_SX168 +FCEG0_SX258 +FCEG0_SX348 +FCEG0_SX438 +FCEG0_SX78 +FCJF0_SI1027 +FCJF0_SI1657 +FCJF0_SI648 +FCJF0_SX127 +FCJF0_SX217 +FCJF0_SX307 +FCJF0_SX37 +FCJF0_SX397 +FCJS0_SI1607 +FCJS0_SI2237 +FCJS0_SI977 +FCJS0_SX167 +FCJS0_SX257 +FCJS0_SX347 +FCJS0_SX437 +FCJS0_SX77 +FCKE0_SI1111 +FCKE0_SI1741 +FCKE0_SI481 +FCKE0_SX121 +FCKE0_SX211 +FCKE0_SX301 +FCKE0_SX31 +FCKE0_SX391 +FCLT0_SI1438 +FCLT0_SI2068 +FCLT0_SI808 +FCLT0_SX178 +FCLT0_SX268 +FCLT0_SX358 +FCLT0_SX448 +FCLT0_SX88 +FCMG0_SI1142 +FCMG0_SI1242 +FCMG0_SI1872 +FCMG0_SX162 +FCMG0_SX252 +FCMG0_SX342 +FCMG0_SX432 +FCMG0_SX72 +FCMM0_SI1083 +FCMM0_SI1957 +FCMM0_SI453 +FCMM0_SX183 +FCMM0_SX273 +FCMM0_SX363 +FCMM0_SX420 +FCMM0_SX93 +FCRZ0_SI1913 +FCRZ0_SI2053 +FCRZ0_SI793 +FCRZ0_SX163 +FCRZ0_SX253 +FCRZ0_SX343 +FCRZ0_SX433 +FCRZ0_SX73 +FCYL0_SI1297 +FCYL0_SI1927 +FCYL0_SI667 +FCYL0_SX127 +FCYL0_SX217 +FCYL0_SX349 +FCYL0_SX37 +FCYL0_SX397 +FDAS1_SI1461 +FDAS1_SI2091 +FDAS1_SI831 +FDAS1_SX111 +FDAS1_SX201 +FDAS1_SX21 +FDAS1_SX291 +FDAS1_SX381 +FDAW0_SI1271 +FDAW0_SI1406 +FDAW0_SI2036 +FDAW0_SX146 +FDAW0_SX236 +FDAW0_SX326 +FDAW0_SX416 +FDAW0_SX56 +FDFB0_SI1318 +FDFB0_SI1948 +FDFB0_SI2010 +FDFB0_SX148 +FDFB0_SX238 +FDFB0_SX328 +FDFB0_SX418 +FDFB0_SX58 +FDJH0_SI1565 +FDJH0_SI2195 +FDJH0_SI935 +FDJH0_SX125 +FDJH0_SX215 +FDJH0_SX305 +FDJH0_SX35 +FDJH0_SX395 +FDKN0_SI1081 +FDKN0_SI1202 +FDKN0_SI1711 +FDKN0_SX181 +FDKN0_SX271 +FDKN0_SX361 +FDKN0_SX451 +FDKN0_SX91 +FDML0_SI1149 +FDML0_SI1779 +FDML0_SI2075 +FDML0_SX159 +FDML0_SX249 +FDML0_SX339 +FDML0_SX429 +FDML0_SX69 +FDMY0_SI1197 +FDMY0_SI567 +FDMY0_SI714 +FDMY0_SX117 +FDMY0_SX207 +FDMY0_SX27 +FDMY0_SX297 +FDMY0_SX387 +FDNC0_SI1278 +FDNC0_SI1908 +FDNC0_SI2287 +FDNC0_SX108 +FDNC0_SX18 +FDNC0_SX198 +FDNC0_SX288 +FDNC0_SX378 +FDTD0_SI1561 +FDTD0_SI2191 +FDTD0_SI931 +FDTD0_SX121 +FDTD0_SX211 +FDTD0_SX301 +FDTD0_SX321 +FDTD0_SX391 +FDXW0_SI1511 +FDXW0_SI2141 +FDXW0_SI881 +FDXW0_SX161 +FDXW0_SX251 +FDXW0_SX341 +FDXW0_SX431 +FDXW0_SX71 +FEAC0_SI1245 +FEAC0_SI1875 +FEAC0_SI615 +FEAC0_SX165 +FEAC0_SX255 +FEAC0_SX345 +FEAC0_SX435 +FEAC0_SX75 +FEAR0_SI1252 +FEAR0_SI1882 +FEAR0_SI622 +FEAR0_SX172 +FEAR0_SX262 +FEAR0_SX352 +FEAR0_SX442 +FEAR0_SX82 +FECD0_SI1418 +FECD0_SI2048 +FECD0_SI788 +FECD0_SX158 +FECD0_SX248 +FECD0_SX338 +FECD0_SX428 +FECD0_SX68 +FEEH0_SI1112 +FEEH0_SI1742 +FEEH0_SI471 +FEEH0_SX122 +FEEH0_SX212 +FEEH0_SX302 +FEEH0_SX32 +FEEH0_SX392 +FEME0_SI1505 +FEME0_SI2135 +FEME0_SI875 +FEME0_SX155 +FEME0_SX245 +FEME0_SX335 +FEME0_SX425 +FEME0_SX65 +FETB0_SI1148 +FETB0_SI1778 +FETB0_SI518 +FETB0_SX158 +FETB0_SX248 +FETB0_SX338 +FETB0_SX428 +FETB0_SX68 +FEXM0_SI1101 +FEXM0_SI1731 +FEXM0_SI482 +FEXM0_SX111 +FEXM0_SX201 +FEXM0_SX291 +FEXM0_SX366 +FEXM0_SX381 +FGCS0_SI1486 +FGCS0_SI2116 +FGCS0_SI856 +FGCS0_SX136 +FGCS0_SX226 +FGCS0_SX316 +FGCS0_SX406 +FGCS0_SX46 +FGDP0_SI1618 +FGDP0_SI2248 +FGDP0_SI988 +FGDP0_SX178 +FGDP0_SX268 +FGDP0_SX358 +FGDP0_SX448 +FGDP0_SX88 +FGMB0_SI1145 +FGMB0_SI1775 +FGMB0_SI515 +FGMB0_SX155 +FGMB0_SX245 +FGMB0_SX335 +FGMB0_SX425 +FGMB0_SX65 +FGRW0_SI1152 +FGRW0_SI1782 +FGRW0_SI1990 +FGRW0_SX162 +FGRW0_SX252 +FGRW0_SX342 +FGRW0_SX432 +FGRW0_SX72 +FHLM0_SI1560 +FHLM0_SI2190 +FHLM0_SI930 +FHLM0_SX120 +FHLM0_SX210 +FHLM0_SX300 +FHLM0_SX349 +FHLM0_SX390 +FHXS0_SI1075 +FHXS0_SI2302 +FHXS0_SI2335 +FHXS0_SX175 +FHXS0_SX265 +FHXS0_SX355 +FHXS0_SX445 +FHXS0_SX85 +FJDM2_SI1582 +FJDM2_SI1964 +FJDM2_SI2212 +FJDM2_SX142 +FJDM2_SX232 +FJDM2_SX322 +FJDM2_SX412 +FJDM2_SX52 +FJEN0_SI1047 +FJEN0_SI1677 +FJEN0_SI2307 +FJEN0_SX147 +FJEN0_SX237 +FJEN0_SX327 +FJEN0_SX417 +FJEN0_SX57 +FJHK0_SI1022 +FJHK0_SI1652 +FJHK0_SI2282 +FJHK0_SX122 +FJHK0_SX212 +FJHK0_SX302 +FJHK0_SX32 +FJHK0_SX392 +FJKL0_SI1562 +FJKL0_SI2192 +FJKL0_SI932 +FJKL0_SX122 +FJKL0_SX212 +FJKL0_SX302 +FJKL0_SX32 +FJKL0_SX392 +FJLG0_SI1506 +FJLG0_SI1889 +FJLG0_SI2306 +FJLG0_SX179 +FJLG0_SX269 +FJLG0_SX359 +FJLG0_SX449 +FJLG0_SX89 +FJLR0_SI1231 +FJLR0_SI1861 +FJLR0_SI601 +FJLR0_SX151 +FJLR0_SX241 +FJLR0_SX331 +FJLR0_SX421 +FJLR0_SX61 +FJRB0_SI1302 +FJRB0_SI1932 +FJRB0_SI672 +FJRB0_SX132 +FJRB0_SX222 +FJRB0_SX312 +FJRB0_SX402 +FJRB0_SX42 +FJRP1_SI1432 +FJRP1_SI2062 +FJRP1_SI802 +FJRP1_SX172 +FJRP1_SX262 +FJRP1_SX352 +FJRP1_SX442 +FJRP1_SX82 +FJSK0_SI1052 +FJSK0_SI1682 +FJSK0_SI2312 +FJSK0_SX152 +FJSK0_SX242 +FJSK0_SX332 +FJSK0_SX422 +FJSK0_SX62 +FJSP0_SI1434 +FJSP0_SI1763 +FJSP0_SI804 +FJSP0_SX174 +FJSP0_SX264 +FJSP0_SX354 +FJSP0_SX444 +FJSP0_SX84 +FJWB1_SI2055 +FJWB1_SI748 +FJWB1_SI795 +FJWB1_SX165 +FJWB1_SX255 +FJWB1_SX345 +FJWB1_SX435 +FJWB1_SX75 +FJXM0_SI1211 +FJXM0_SI1971 +FJXM0_SI581 +FJXM0_SX131 +FJXM0_SX221 +FJXM0_SX311 +FJXM0_SX401 +FJXM0_SX41 +FJXP0_SI1122 +FJXP0_SI1752 +FJXP0_SI492 +FJXP0_SX132 +FJXP0_SX222 +FJXP0_SX312 +FJXP0_SX402 +FJXP0_SX42 +FKAA0_SI1208 +FKAA0_SI1838 +FKAA0_SI578 +FKAA0_SX128 +FKAA0_SX218 +FKAA0_SX308 +FKAA0_SX38 +FKAA0_SX398 +FKDE0_SI1141 +FKDE0_SI1771 +FKDE0_SI2221 +FKDE0_SX151 +FKDE0_SX241 +FKDE0_SX331 +FKDE0_SX421 +FKDE0_SX61 +FKDW0_SI1207 +FKDW0_SI1891 +FKDW0_SI577 +FKDW0_SX127 +FKDW0_SX217 +FKDW0_SX307 +FKDW0_SX37 +FKDW0_SX397 +FKFB0_SI1608 +FKFB0_SI2238 +FKFB0_SI978 +FKFB0_SX168 +FKFB0_SX258 +FKFB0_SX348 +FKFB0_SX438 +FKFB0_SX78 +FKKH0_SI1290 +FKKH0_SI1920 +FKKH0_SI660 +FKKH0_SX120 +FKKH0_SX210 +FKKH0_SX30 +FKKH0_SX300 +FKKH0_SX390 +FKLC0_SI1615 +FKLC0_SI2245 +FKLC0_SI985 +FKLC0_SX175 +FKLC0_SX265 +FKLC0_SX355 +FKLC0_SX445 +FKLC0_SX85 +FKLC1_SI1048 +FKLC1_SI1678 +FKLC1_SI2308 +FKLC1_SX148 +FKLC1_SX238 +FKLC1_SX328 +FKLC1_SX418 +FKLC1_SX58 +FKLH0_SI1257 +FKLH0_SI1887 +FKLH0_SI627 +FKLH0_SX177 +FKLH0_SX267 +FKLH0_SX357 +FKLH0_SX447 +FKLH0_SX87 +FKSR0_SI1117 +FKSR0_SI1747 +FKSR0_SI487 +FKSR0_SX161 +FKSR0_SX217 +FKSR0_SX366 +FKSR0_SX37 +FKSR0_SX397 +FLAC0_SI1339 +FLAC0_SI2161 +FLAC0_SI901 +FLAC0_SX181 +FLAC0_SX271 +FLAC0_SX361 +FLAC0_SX451 +FLAC0_SX91 +FLAG0_SI1464 +FLAG0_SI2094 +FLAG0_SI834 +FLAG0_SX114 +FLAG0_SX204 +FLAG0_SX24 +FLAG0_SX294 +FLAG0_SX384 +FLEH0_SI1051 +FLEH0_SI1681 +FLEH0_SI2311 +FLEH0_SX151 +FLEH0_SX241 +FLEH0_SX331 +FLEH0_SX421 +FLEH0_SX61 +FLET0_SI1137 +FLET0_SI1767 +FLET0_SI507 +FLET0_SX147 +FLET0_SX237 +FLET0_SX277 +FLET0_SX417 +FLET0_SX57 +FLHD0_SI1344 +FLHD0_SI1827 +FLHD0_SI1974 +FLHD0_SX174 +FLHD0_SX264 +FLHD0_SX354 +FLHD0_SX444 +FLHD0_SX84 +FLJA0_SI1078 +FLJA0_SI1708 +FLJA0_SI2338 +FLJA0_SX178 +FLJA0_SX268 +FLJA0_SX358 +FLJA0_SX448 +FLJA0_SX88 +FLJD0_SI1516 +FLJD0_SI2146 +FLJD0_SI886 +FLJD0_SX166 +FLJD0_SX256 +FLJD0_SX346 +FLJD0_SX436 +FLJD0_SX76 +FLJG0_SI1611 +FLJG0_SI2241 +FLJG0_SI981 +FLJG0_SX171 +FLJG0_SX261 +FLJG0_SX351 +FLJG0_SX441 +FLJG0_SX81 +FLKM0_SI1880 +FLKM0_SI620 +FLKM0_SI686 +FLKM0_SX116 +FLKM0_SX260 +FLKM0_SX350 +FLKM0_SX440 +FLKM0_SX80 +FLMA0_SI1243 +FLMA0_SI1873 +FLMA0_SI613 +FLMA0_SX163 +FLMA0_SX253 +FLMA0_SX343 +FLMA0_SX433 +FLMA0_SX73 +FLMC0_SI1372 +FLMC0_SI2002 +FLMC0_SI742 +FLMC0_SX112 +FLMC0_SX22 +FLMC0_SX292 +FLMC0_SX336 +FLMC0_SX382 +FLMK0_SI1035 +FLMK0_SI1229 +FLMK0_SI2295 +FLMK0_SX135 +FLMK0_SX225 +FLMK0_SX315 +FLMK0_SX405 +FLMK0_SX45 +FLOD0_SI1287 +FLOD0_SI1917 +FLOD0_SI657 +FLOD0_SX117 +FLOD0_SX171 +FLOD0_SX207 +FLOD0_SX297 +FLOD0_SX387 +FLTM0_SI1070 +FLTM0_SI1700 +FLTM0_SI2330 +FLTM0_SX170 +FLTM0_SX260 +FLTM0_SX350 +FLTM0_SX440 +FLTM0_SX80 +FMAH1_SI1509 +FMAH1_SI2139 +FMAH1_SI879 +FMAH1_SX159 +FMAH1_SX249 +FMAH1_SX339 +FMAH1_SX429 +FMAH1_SX69 +FMBG0_SI1160 +FMBG0_SI1790 +FMBG0_SI2264 +FMBG0_SX260 +FMBG0_SX3 +FMBG0_SX350 +FMBG0_SX440 +FMBG0_SX80 +FMEM0_SI1377 +FMEM0_SI2007 +FMEM0_SI747 +FMEM0_SX117 +FMEM0_SX207 +FMEM0_SX297 +FMEM0_SX333 +FMEM0_SX387 +FMJB0_SI1177 +FMJB0_SI1807 +FMJB0_SI547 +FMJB0_SX187 +FMJB0_SX277 +FMJB0_SX367 +FMJB0_SX7 +FMJB0_SX97 +FMJF0_SI1254 +FMJF0_SI1884 +FMJF0_SI624 +FMJF0_SX174 +FMJF0_SX264 +FMJF0_SX354 +FMJF0_SX444 +FMJF0_SX84 +FMJU0_SI1389 +FMJU0_SI2019 +FMJU0_SI759 +FMJU0_SX129 +FMJU0_SX219 +FMJU0_SX309 +FMJU0_SX39 +FMJU0_SX399 +FMKC0_SI1041 +FMKC0_SI1072 +FMKC0_SI1702 +FMKC0_SX172 +FMKC0_SX262 +FMKC0_SX352 +FMKC0_SX442 +FMKC0_SX82 +FMKF0_SI1018 +FMKF0_SI1536 +FMKF0_SI906 +FMKF0_SX186 +FMKF0_SX276 +FMKF0_SX366 +FMKF0_SX6 +FMKF0_SX96 +FMMH0_SI1537 +FMMH0_SI2167 +FMMH0_SI907 +FMMH0_SX187 +FMMH0_SX367 +FMMH0_SX420 +FMMH0_SX7 +FMMH0_SX97 +FMPG0_SI1602 +FMPG0_SI2232 +FMPG0_SI972 +FMPG0_SX162 +FMPG0_SX252 +FMPG0_SX342 +FMPG0_SX432 +FMPG0_SX72 +FNKL0_SI1522 +FNKL0_SI2152 +FNKL0_SI892 +FNKL0_SX172 +FNKL0_SX196 +FNKL0_SX262 +FNKL0_SX442 +FNKL0_SX82 +FNTB0_SI1203 +FNTB0_SI573 +FNTB0_SI679 +FNTB0_SX123 +FNTB0_SX213 +FNTB0_SX303 +FNTB0_SX33 +FNTB0_SX393 +FPAB1_SI1471 +FPAB1_SI2101 +FPAB1_SI841 +FPAB1_SX121 +FPAB1_SX211 +FPAB1_SX301 +FPAB1_SX31 +FPAB1_SX391 +FPAC0_SI1921 +FPAC0_SI2011 +FPAC0_SI661 +FPAC0_SX121 +FPAC0_SX211 +FPAC0_SX301 +FPAC0_SX31 +FPAC0_SX391 +FPAD0_SI1346 +FPAD0_SI1976 +FPAD0_SI716 +FPAD0_SX176 +FPAD0_SX266 +FPAD0_SX356 +FPAD0_SX446 +FPAD0_SX86 +FPAF0_SI1054 +FPAF0_SI1684 +FPAF0_SI2314 +FPAF0_SX154 +FPAF0_SX244 +FPAF0_SX334 +FPAF0_SX424 +FPAF0_SX64 +FPAZ0_SI1593 +FPAZ0_SI2223 +FPAZ0_SI963 +FPAZ0_SX153 +FPAZ0_SX243 +FPAZ0_SX27 +FPAZ0_SX423 +FPAZ0_SX63 +FPJF0_SI1046 +FPJF0_SI1259 +FPJF0_SI1676 +FPJF0_SX146 +FPJF0_SX236 +FPJF0_SX326 +FPJF0_SX352 +FPJF0_SX56 +FPLS0_SI1590 +FPLS0_SI2220 +FPLS0_SI960 +FPLS0_SX150 +FPLS0_SX240 +FPLS0_SX3 +FPLS0_SX330 +FPLS0_SX60 +FPMY0_SI1153 +FPMY0_SI1783 +FPMY0_SI523 +FPMY0_SX163 +FPMY0_SX196 +FPMY0_SX253 +FPMY0_SX343 +FPMY0_SX73 +FREH0_SI1315 +FREH0_SI1945 +FREH0_SI685 +FREH0_SX145 +FREH0_SX235 +FREH0_SX325 +FREH0_SX415 +FREH0_SX55 +FRJB0_SI1427 +FRJB0_SI1470 +FRJB0_SI1794 +FRJB0_SX167 +FRJB0_SX257 +FRJB0_SX347 +FRJB0_SX437 +FRJB0_SX77 +FRLL0_SI1514 +FRLL0_SI805 +FRLL0_SI884 +FRLL0_SX164 +FRLL0_SX254 +FRLL0_SX344 +FRLL0_SX434 +FRLL0_SX74 +FSAG0_SI1323 +FSAG0_SI1953 +FSAG0_SI693 +FSAG0_SX153 +FSAG0_SX243 +FSAG0_SX333 +FSAG0_SX423 +FSAG0_SX63 +FSAH0_SI1244 +FSAH0_SI1874 +FSAH0_SI614 +FSAH0_SX164 +FSAH0_SX327 +FSAH0_SX344 +FSAH0_SX434 +FSAH0_SX74 +FSAK0_SI1300 +FSAK0_SI1930 +FSAK0_SI670 +FSAK0_SX130 +FSAK0_SX220 +FSAK0_SX310 +FSAK0_SX40 +FSAK0_SX400 +FSBK0_SI1069 +FSBK0_SI1699 +FSBK0_SI2329 +FSBK0_SX169 +FSBK0_SX259 +FSBK0_SX349 +FSBK0_SX439 +FSBK0_SX79 +FSCN0_SI1886 +FSCN0_SI626 +FSCN0_SI705 +FSCN0_SX176 +FSCN0_SX266 +FSCN0_SX356 +FSCN0_SX446 +FSCN0_SX86 +FSDC0_SI1312 +FSDC0_SI1942 +FSDC0_SI2234 +FSDC0_SX142 +FSDC0_SX232 +FSDC0_SX322 +FSDC0_SX412 +FSDC0_SX52 +FSDJ0_SI1115 +FSDJ0_SI1745 +FSDJ0_SI485 +FSDJ0_SX125 +FSDJ0_SX215 +FSDJ0_SX305 +FSDJ0_SX35 +FSDJ0_SX395 +FSGF0_SI1557 +FSGF0_SI2187 +FSGF0_SI927 +FSGF0_SX117 +FSGF0_SX207 +FSGF0_SX27 +FSGF0_SX297 +FSGF0_SX387 +FSJG0_SI1570 +FSJG0_SI2200 +FSJG0_SI940 +FSJG0_SX130 +FSJG0_SX220 +FSJG0_SX310 +FSJG0_SX40 +FSJG0_SX400 +FSJK1_SI1025 +FSJK1_SI2285 +FSJK1_SI696 +FSJK1_SX125 +FSJK1_SX215 +FSJK1_SX305 +FSJK1_SX35 +FSJK1_SX395 +FSJS0_SI1171 +FSJS0_SI1801 +FSJS0_SI541 +FSJS0_SX181 +FSJS0_SX271 +FSJS0_SX361 +FSJS0_SX451 +FSJS0_SX91 +FSJW0_SI1333 +FSJW0_SI1963 +FSJW0_SI703 +FSJW0_SX163 +FSJW0_SX253 +FSJW0_SX343 +FSJW0_SX433 +FSJW0_SX73 +FSKC0_SI1416 +FSKC0_SI2046 +FSKC0_SI786 +FSKC0_SX156 +FSKC0_SX246 +FSKC0_SX336 +FSKC0_SX426 +FSKC0_SX66 +FSKL0_SI1529 +FSKL0_SI2159 +FSKL0_SI899 +FSKL0_SX179 +FSKL0_SX269 +FSKL0_SX359 +FSKL0_SX449 +FSKL0_SX89 +FSKP0_SI1098 +FSKP0_SI1728 +FSKP0_SI468 +FSKP0_SX108 +FSKP0_SX18 +FSKP0_SX198 +FSKP0_SX288 +FSKP0_SX378 +FSLS0_SI1056 +FSLS0_SI1686 +FSLS0_SI2316 +FSLS0_SX156 +FSLS0_SX202 +FSLS0_SX246 +FSLS0_SX426 +FSLS0_SX66 +FSMA0_SI1621 +FSMA0_SI2251 +FSMA0_SI991 +FSMA0_SX181 +FSMA0_SX271 +FSMA0_SX361 +FSMA0_SX451 +FSMA0_SX91 +FSMM0_SI1314 +FSMM0_SI1944 +FSMM0_SI684 +FSMM0_SX144 +FSMM0_SX234 +FSMM0_SX324 +FSMM0_SX414 +FSMM0_SX54 +FSMS1_SI1504 +FSMS1_SI2134 +FSMS1_SI874 +FSMS1_SX154 +FSMS1_SX244 +FSMS1_SX334 +FSMS1_SX347 +FSMS1_SX64 +FSPM0_SI1241 +FSPM0_SI1871 +FSPM0_SI611 +FSPM0_SX161 +FSPM0_SX251 +FSPM0_SX341 +FSPM0_SX431 +FSPM0_SX71 +FSRH0_SI1719 +FSRH0_SI1931 +FSRH0_SI671 +FSRH0_SX131 +FSRH0_SX221 +FSRH0_SX311 +FSRH0_SX401 +FSRH0_SX41 +FSSB0_SI1082 +FSSB0_SI1712 +FSSB0_SI2342 +FSSB0_SX182 +FSSB0_SX272 +FSSB0_SX362 +FSSB0_SX452 +FSSB0_SX92 +FTAJ0_SI1329 +FTAJ0_SI474 +FTAJ0_SI699 +FTAJ0_SX159 +FTAJ0_SX249 +FTAJ0_SX339 +FTAJ0_SX429 +FTAJ0_SX69 +FTBR0_SI1402 +FTBR0_SI2181 +FTBR0_SI921 +FTBR0_SX111 +FTBR0_SX201 +FTBR0_SX21 +FTBR0_SX291 +FTBR0_SX381 +FTBW0_SI1345 +FTBW0_SI1975 +FTBW0_SI715 +FTBW0_SX175 +FTBW0_SX265 +FTBW0_SX355 +FTBW0_SX445 +FTBW0_SX85 +FTLG0_SI1743 +FTLG0_SI483 +FTLG0_SI840 +FTLG0_SX123 +FTLG0_SX213 +FTLG0_SX303 +FTLG0_SX33 +FTLG0_SX393 +FTMG0_SI1532 +FTMG0_SI2162 +FTMG0_SI902 +FTMG0_SX182 +FTMG0_SX272 +FTMG0_SX362 +FTMG0_SX452 +FTMG0_SX92 +FVFB0_SI1032 +FVFB0_SI1510 +FVFB0_SI2292 +FVFB0_SX132 +FVFB0_SX222 +FVFB0_SX312 +FVFB0_SX402 +FVFB0_SX42 +FVKB0_SI1159 +FVKB0_SI1789 +FVKB0_SI529 +FVKB0_SX169 +FVKB0_SX259 +FVKB0_SX349 +FVKB0_SX439 +FVKB0_SX79 +FVMH0_SI1466 +FVMH0_SI2096 +FVMH0_SI836 +FVMH0_SX116 +FVMH0_SX206 +FVMH0_SX26 +FVMH0_SX296 +FVMH0_SX386 +MABC0_SI1620 +MABC0_SI2041 +MABC0_SI781 +MABC0_SX151 +MABC0_SX241 +MABC0_SX331 +MABC0_SX421 +MABC0_SX61 +MADC0_SI1367 +MADC0_SI1997 +MADC0_SI737 +MADC0_SX107 +MADC0_SX17 +MADC0_SX197 +MADC0_SX287 +MADC0_SX377 +MADD0_SI1295 +MADD0_SI1798 +MADD0_SI538 +MADD0_SX178 +MADD0_SX268 +MADD0_SX358 +MADD0_SX448 +MADD0_SX88 +MAEB0_SI1411 +MAEB0_SI2250 +MAEB0_SI990 +MAEB0_SX180 +MAEB0_SX270 +MAEB0_SX360 +MAEB0_SX450 +MAEB0_SX90 +MAEO0_SI1326 +MAEO0_SI1655 +MAEO0_SI1956 +MAEO0_SX156 +MAEO0_SX246 +MAEO0_SX336 +MAEO0_SX426 +MAEO0_SX66 +MAFM0_SI1569 +MAFM0_SI2199 +MAFM0_SI939 +MAFM0_SX129 +MAFM0_SX219 +MAFM0_SX309 +MAFM0_SX39 +MAFM0_SX399 +MAJP0_SI1074 +MAJP0_SI1704 +MAJP0_SI2334 +MAJP0_SX174 +MAJP0_SX264 +MAJP0_SX354 +MAJP0_SX444 +MAJP0_SX84 +MAKB0_SI1016 +MAKB0_SI1646 +MAKB0_SI2276 +MAKB0_SX116 +MAKB0_SX206 +MAKB0_SX26 +MAKB0_SX296 +MAKB0_SX386 +MAKR0_SI1352 +MAKR0_SI1982 +MAKR0_SI722 +MAKR0_SX182 +MAKR0_SX272 +MAKR0_SX362 +MAKR0_SX452 +MAKR0_SX92 +MAPV0_SI1293 +MAPV0_SI1923 +MAPV0_SI663 +MAPV0_SX123 +MAPV0_SX213 +MAPV0_SX303 +MAPV0_SX33 +MAPV0_SX393 +MARC0_SI1188 +MARC0_SI1818 +MARC0_SI558 +MARC0_SX108 +MARC0_SX18 +MARC0_SX198 +MARC0_SX288 +MARC0_SX378 +MARW0_SI1276 +MARW0_SI1906 +MARW0_SI646 +MARW0_SX106 +MARW0_SX16 +MARW0_SX286 +MARW0_SX349 +MARW0_SX376 +MBAR0_SI1319 +MBAR0_SI1949 +MBAR0_SI689 +MBAR0_SX149 +MBAR0_SX239 +MBAR0_SX329 +MBAR0_SX419 +MBAR0_SX59 +MBBR0_SI1055 +MBBR0_SI1685 +MBBR0_SI2315 +MBBR0_SX155 +MBBR0_SX245 +MBBR0_SX335 +MBBR0_SX425 +MBBR0_SX65 +MBCG0_SI2217 +MBCG0_SI486 +MBCG0_SI957 +MBCG0_SX147 +MBCG0_SX237 +MBCG0_SX327 +MBCG0_SX417 +MBCG0_SX57 +MBEF0_SI1281 +MBEF0_SI1911 +MBEF0_SI651 +MBEF0_SX111 +MBEF0_SX201 +MBEF0_SX21 +MBEF0_SX291 +MBEF0_SX381 +MBGT0_SI1341 +MBGT0_SI1841 +MBGT0_SI711 +MBGT0_SX171 +MBGT0_SX261 +MBGT0_SX351 +MBGT0_SX441 +MBGT0_SX81 +MBJV0_SI1247 +MBJV0_SI1877 +MBJV0_SI617 +MBJV0_SX167 +MBJV0_SX257 +MBJV0_SX347 +MBJV0_SX437 +MBJV0_SX77 +MBMA0_SI1222 +MBMA0_SI1852 +MBMA0_SI592 +MBMA0_SX142 +MBMA0_SX232 +MBMA0_SX322 +MBMA0_SX412 +MBMA0_SX52 +MBMA1_SI2207 +MBMA1_SI2214 +MBMA1_SI954 +MBMA1_SX144 +MBMA1_SX234 +MBMA1_SX324 +MBMA1_SX414 +MBMA1_SX54 +MBML0_SI1169 +MBML0_SI1799 +MBML0_SI539 +MBML0_SX179 +MBML0_SX269 +MBML0_SX359 +MBML0_SX449 +MBML0_SX89 +MBOM0_SI1014 +MBOM0_SI1644 +MBOM0_SI2274 +MBOM0_SX114 +MBOM0_SX204 +MBOM0_SX294 +MBOM0_SX311 +MBOM0_SX384 +MBSB0_SI1353 +MBSB0_SI1983 +MBSB0_SI723 +MBSB0_SX183 +MBSB0_SX273 +MBSB0_SX3 +MBSB0_SX363 +MBSB0_SX93 +MBTH0_SI2102 +MBTH0_SI505 +MBTH0_SI757 +MBTH0_SX122 +MBTH0_SX212 +MBTH0_SX302 +MBTH0_SX32 +MBTH0_SX392 +MBWP0_SI1531 +MBWP0_SI1969 +MBWP0_SI709 +MBWP0_SX169 +MBWP0_SX259 +MBWP0_SX349 +MBWP0_SX439 +MBWP0_SX79 +MCAE0_SI1447 +MCAE0_SI2077 +MCAE0_SI817 +MCAE0_SX187 +MCAE0_SX277 +MCAE0_SX367 +MCAE0_SX7 +MCAE0_SX97 +MCAL0_SI1138 +MCAL0_SI1768 +MCAL0_SI508 +MCAL0_SX148 +MCAL0_SX238 +MCAL0_SX328 +MCAL0_SX418 +MCAL0_SX58 +MCDC0_SI1292 +MCDC0_SI1922 +MCDC0_SI662 +MCDC0_SX122 +MCDC0_SX212 +MCDC0_SX302 +MCDC0_SX32 +MCDC0_SX392 +MCDD0_SI1513 +MCDD0_SI2143 +MCDD0_SI883 +MCDD0_SX163 +MCDD0_SX253 +MCDD0_SX343 +MCDD0_SX433 +MCDD0_SX73 +MCDR0_SI1154 +MCDR0_SI1784 +MCDR0_SI524 +MCDR0_SX164 +MCDR0_SX254 +MCDR0_SX344 +MCDR0_SX434 +MCDR0_SX74 +MCEF0_SI1135 +MCEF0_SI1765 +MCEF0_SI842 +MCEF0_SX145 +MCEF0_SX235 +MCEF0_SX325 +MCEF0_SX415 +MCEF0_SX55 +MCEW0_SI1442 +MCEW0_SI2072 +MCEW0_SI812 +MCEW0_SX182 +MCEW0_SX272 +MCEW0_SX362 +MCEW0_SX452 +MCEW0_SX92 +MCHL0_SI1347 +MCHL0_SI1404 +MCHL0_SI1977 +MCHL0_SX177 +MCHL0_SX267 +MCHL0_SX357 +MCHL0_SX447 +MCHL0_SX87 +MCLK0_SI1660 +MCLK0_SI2290 +MCLK0_SI650 +MCLK0_SX130 +MCLK0_SX220 +MCLK0_SX310 +MCLK0_SX40 +MCLK0_SX400 +MCLM0_SI1456 +MCLM0_SI2086 +MCLM0_SI826 +MCLM0_SX106 +MCLM0_SX16 +MCLM0_SX196 +MCLM0_SX286 +MCLM0_SX376 +MCPM0_SI1194 +MCPM0_SI1824 +MCPM0_SI564 +MCPM0_SX114 +MCPM0_SX204 +MCPM0_SX24 +MCPM0_SX294 +MCPM0_SX384 +MCRE0_SI1121 +MCRE0_SI1725 +MCRE0_SI1751 +MCRE0_SX131 +MCRE0_SX221 +MCRE0_SX24 +MCRE0_SX401 +MCRE0_SX41 +MCSS0_SI1380 +MCSS0_SI688 +MCSS0_SI750 +MCSS0_SX120 +MCSS0_SX210 +MCSS0_SX30 +MCSS0_SX300 +MCSS0_SX390 +MCTH0_SI1209 +MCTH0_SI1839 +MCTH0_SI579 +MCTH0_SX129 +MCTH0_SX219 +MCTH0_SX309 +MCTH0_SX39 +MCTH0_SX399 +MCTM0_SI1350 +MCTM0_SI1980 +MCTM0_SI720 +MCTM0_SX180 +MCTM0_SX270 +MCTM0_SX360 +MCTM0_SX450 +MCTM0_SX90 +MCXM0_SI1351 +MCXM0_SI1981 +MCXM0_SI721 +MCXM0_SX181 +MCXM0_SX271 +MCXM0_SX361 +MCXM0_SX451 +MCXM0_SX91 +MDAC0_SI1261 +MDAC0_SI1837 +MDAC0_SI631 +MDAC0_SX181 +MDAC0_SX271 +MDAC0_SX361 +MDAC0_SX451 +MDAC0_SX91 +MDAS0_SI1266 +MDAS0_SI1896 +MDAS0_SI636 +MDAS0_SX186 +MDAS0_SX21 +MDAS0_SX276 +MDAS0_SX6 +MDAS0_SX96 +MDBB1_SI1006 +MDBB1_SI1636 +MDBB1_SI2056 +MDBB1_SX106 +MDBB1_SX16 +MDBB1_SX196 +MDBB1_SX286 +MDBB1_SX376 +MDBP0_SI1158 +MDBP0_SI1788 +MDBP0_SI528 +MDBP0_SX168 +MDBP0_SX258 +MDBP0_SX348 +MDBP0_SX438 +MDBP0_SX78 +MDCD0_SI1415 +MDCD0_SI2045 +MDCD0_SI785 +MDCD0_SX155 +MDCD0_SX245 +MDCD0_SX335 +MDCD0_SX425 +MDCD0_SX65 +MDCM0_SI1480 +MDCM0_SI2110 +MDCM0_SI850 +MDCM0_SX130 +MDCM0_SX220 +MDCM0_SX310 +MDCM0_SX40 +MDCM0_SX400 +MDDC0_SI1419 +MDDC0_SI2049 +MDDC0_SI789 +MDDC0_SX159 +MDDC0_SX249 +MDDC0_SX339 +MDDC0_SX429 +MDDC0_SX69 +MDED0_SI1170 +MDED0_SI1800 +MDED0_SI540 +MDED0_SX180 +MDED0_SX270 +MDED0_SX360 +MDED0_SX450 +MDED0_SX90 +MDEF0_SI1123 +MDEF0_SI1563 +MDEF0_SI2193 +MDEF0_SX123 +MDEF0_SX213 +MDEF0_SX303 +MDEF0_SX33 +MDEF0_SX393 +MDEM0_SI1868 +MDEM0_SI608 +MDEM0_SI800 +MDEM0_SX158 +MDEM0_SX248 +MDEM0_SX338 +MDEM0_SX428 +MDEM0_SX68 +MDHL0_SI1439 +MDHL0_SI2069 +MDHL0_SI809 +MDHL0_SX179 +MDHL0_SX269 +MDHL0_SX359 +MDHL0_SX449 +MDHL0_SX89 +MDHS0_SI1530 +MDHS0_SI2160 +MDHS0_SI900 +MDHS0_SX180 +MDHS0_SX270 +MDHS0_SX360 +MDHS0_SX450 +MDHS0_SX90 +MDJM0_SI1455 +MDJM0_SI2085 +MDJM0_SI825 +MDJM0_SX105 +MDJM0_SX15 +MDJM0_SX195 +MDJM0_SX285 +MDJM0_SX375 +MDKS0_SI1066 +MDKS0_SI1696 +MDKS0_SI2326 +MDKS0_SX166 +MDKS0_SX256 +MDKS0_SX346 +MDKS0_SX436 +MDKS0_SX76 +MDLB0_SI1306 +MDLB0_SI1936 +MDLB0_SI676 +MDLB0_SX136 +MDLB0_SX226 +MDLB0_SX316 +MDLB0_SX406 +MDLB0_SX46 +MDLC0_SI1395 +MDLC0_SI2025 +MDLC0_SI765 +MDLC0_SX135 +MDLC0_SX225 +MDLC0_SX315 +MDLC0_SX405 +MDLC0_SX45 +MDLC1_SI1435 +MDLC1_SI2065 +MDLC1_SI2144 +MDLC1_SX175 +MDLC1_SX265 +MDLC1_SX355 +MDLC1_SX445 +MDLC1_SX85 +MDLC2_SI1614 +MDLC2_SI2244 +MDLC2_SI984 +MDLC2_SX174 +MDLC2_SX264 +MDLC2_SX354 +MDLC2_SX444 +MDLC2_SX84 +MDLH0_SI1960 +MDLH0_SI574 +MDLH0_SI700 +MDLH0_SX160 +MDLH0_SX250 +MDLH0_SX340 +MDLH0_SX430 +MDLH0_SX70 +MDLM0_SI1234 +MDLM0_SI1864 +MDLM0_SI604 +MDLM0_SX154 +MDLM0_SX244 +MDLM0_SX334 +MDLM0_SX424 +MDLM0_SX64 +MDLR0_SI1233 +MDLR0_SI1863 +MDLR0_SI603 +MDLR0_SX153 +MDLR0_SX243 +MDLR0_SX333 +MDLR0_SX423 +MDLR0_SX63 +MDLR1_SI1299 +MDLR1_SI1929 +MDLR1_SI669 +MDLR1_SX129 +MDLR1_SX219 +MDLR1_SX309 +MDLR1_SX39 +MDLR1_SX399 +MDMA0_SI1238 +MDMA0_SI1430 +MDMA0_SI2060 +MDMA0_SX170 +MDMA0_SX260 +MDMA0_SX350 +MDMA0_SX440 +MDMA0_SX80 +MDMT0_SI1832 +MDMT0_SI2341 +MDMT0_SI572 +MDMT0_SX122 +MDMT0_SX212 +MDMT0_SX302 +MDMT0_SX32 +MDMT0_SX392 +MDNS0_SI1011 +MDNS0_SI2271 +MDNS0_SI873 +MDNS0_SX111 +MDNS0_SX201 +MDNS0_SX21 +MDNS0_SX291 +MDNS0_SX381 +MDPB0_SI1760 +MDPB0_SI2126 +MDPB0_SI866 +MDPB0_SX146 +MDPB0_SX236 +MDPB0_SX326 +MDPB0_SX416 +MDPB0_SX56 +MDPK0_SI1053 +MDPK0_SI1683 +MDPK0_SI552 +MDPK0_SX153 +MDPK0_SX243 +MDPK0_SX333 +MDPK0_SX423 +MDPK0_SX63 +MDPS0_SI1651 +MDPS0_SI1979 +MDPS0_SI719 +MDPS0_SX179 +MDPS0_SX269 +MDPS0_SX359 +MDPS0_SX449 +MDPS0_SX89 +MDRD0_SI1382 +MDRD0_SI2012 +MDRD0_SI752 +MDRD0_SX122 +MDRD0_SX212 +MDRD0_SX302 +MDRD0_SX32 +MDRD0_SX392 +MDSJ0_SI1462 +MDSJ0_SI2092 +MDSJ0_SI832 +MDSJ0_SX112 +MDSJ0_SX22 +MDSJ0_SX292 +MDSJ0_SX382 +MDSJ0_SX438 +MDSS0_SI1881 +MDSS0_SI2087 +MDSS0_SI621 +MDSS0_SX171 +MDSS0_SX261 +MDSS0_SX351 +MDSS0_SX441 +MDSS0_SX81 +MDSS1_SI1327 +MDSS1_SI1713 +MDSS1_SI697 +MDSS1_SX157 +MDSS1_SX247 +MDSS1_SX337 +MDSS1_SX427 +MDSS1_SX67 +MDTB0_SI1200 +MDTB0_SI1830 +MDTB0_SI570 +MDTB0_SX120 +MDTB0_SX210 +MDTB0_SX300 +MDTB0_SX321 +MDTB0_SX390 +MDWD0_SI1260 +MDWD0_SI1890 +MDWD0_SI557 +MDWD0_SX180 +MDWD0_SX270 +MDWD0_SX360 +MDWD0_SX450 +MDWD0_SX90 +MDWH0_SI1168 +MDWH0_SI1925 +MDWH0_SI665 +MDWH0_SX125 +MDWH0_SX215 +MDWH0_SX305 +MDWH0_SX35 +MDWH0_SX395 +MDWM0_SI1546 +MDWM0_SI2176 +MDWM0_SI916 +MDWM0_SX106 +MDWM0_SX16 +MDWM0_SX286 +MDWM0_SX376 +MDWM0_SX433 +MEAL0_SI1547 +MEAL0_SI2177 +MEAL0_SI917 +MEAL0_SX107 +MEAL0_SX197 +MEAL0_SX287 +MEAL0_SX347 +MEAL0_SX377 +MEDR0_SI1374 +MEDR0_SI2004 +MEDR0_SI744 +MEDR0_SX114 +MEDR0_SX204 +MEDR0_SX24 +MEDR0_SX294 +MEDR0_SX384 +MEFG0_SI465 +MEFG0_SI491 +MEFG0_SI598 +MEFG0_SX105 +MEFG0_SX15 +MEFG0_SX195 +MEFG0_SX285 +MEFG0_SX375 +MEGJ0_SI1337 +MEGJ0_SI1967 +MEGJ0_SI707 +MEGJ0_SX167 +MEGJ0_SX257 +MEGJ0_SX3 +MEGJ0_SX437 +MEGJ0_SX77 +MEJL0_SI1592 +MEJL0_SI1654 +MEJL0_SI962 +MEJL0_SX152 +MEJL0_SX242 +MEJL0_SX332 +MEJL0_SX422 +MEJL0_SX62 +MEJS0_SI1240 +MEJS0_SI1870 +MEJS0_SI610 +MEJS0_SX160 +MEJS0_SX250 +MEJS0_SX340 +MEJS0_SX430 +MEJS0_SX70 +MESG0_SI1332 +MESG0_SI1962 +MESG0_SI702 +MESG0_SX162 +MESG0_SX252 +MESG0_SX342 +MESG0_SX432 +MESG0_SX72 +MESJ0_SI2039 +MESJ0_SI2257 +MESJ0_SI997 +MESJ0_SX187 +MESJ0_SX277 +MESJ0_SX367 +MESJ0_SX7 +MESJ0_SX97 +MEWM0_SI1348 +MEWM0_SI1978 +MEWM0_SI718 +MEWM0_SX178 +MEWM0_SX268 +MEWM0_SX358 +MEWM0_SX448 +MEWM0_SX88 +MFER0_SI1492 +MFER0_SI2122 +MFER0_SI862 +MFER0_SX142 +MFER0_SX232 +MFER0_SX322 +MFER0_SX412 +MFER0_SX52 +MFMC0_SI1132 +MFMC0_SI1762 +MFMC0_SI502 +MFMC0_SX142 +MFMC0_SX232 +MFMC0_SX322 +MFMC0_SX412 +MFMC0_SX52 +MFRM0_SI1155 +MFRM0_SI1717 +MFRM0_SI1785 +MFRM0_SX165 +MFRM0_SX255 +MFRM0_SX345 +MFRM0_SX435 +MFRM0_SX75 +MFWK0_SI1249 +MFWK0_SI1879 +MFWK0_SI619 +MFWK0_SX169 +MFWK0_SX259 +MFWK0_SX349 +MFWK0_SX439 +MFWK0_SX79 +MFXS0_SI1674 +MFXS0_SI2225 +MFXS0_SI2304 +MFXS0_SX144 +MFXS0_SX234 +MFXS0_SX324 +MFXS0_SX414 +MFXS0_SX54 +MFXV0_SI1005 +MFXV0_SI1342 +MFXV0_SI1635 +MFXV0_SX105 +MFXV0_SX15 +MFXV0_SX195 +MFXV0_SX285 +MFXV0_SX375 +MGAF0_SI1282 +MGAF0_SI1912 +MGAF0_SI652 +MGAF0_SX112 +MGAF0_SX202 +MGAF0_SX22 +MGAF0_SX292 +MGAF0_SX382 +MGAG0_SI1321 +MGAG0_SI645 +MGAG0_SI691 +MGAG0_SX151 +MGAG0_SX241 +MGAG0_SX331 +MGAG0_SX421 +MGAG0_SX61 +MGAK0_SI1036 +MGAK0_SI1666 +MGAK0_SI2296 +MGAK0_SX136 +MGAK0_SX226 +MGAK0_SX316 +MGAK0_SX406 +MGAK0_SX46 +MGAR0_SI1212 +MGAR0_SI1694 +MGAR0_SI1842 +MGAR0_SX132 +MGAR0_SX222 +MGAR0_SX312 +MGAR0_SX402 +MGAR0_SX42 +MGAW0_SI1165 +MGAW0_SI1802 +MGAW0_SI535 +MGAW0_SX175 +MGAW0_SX265 +MGAW0_SX355 +MGAW0_SX445 +MGAW0_SX85 +MGES0_SI1481 +MGES0_SI2111 +MGES0_SI851 +MGES0_SX131 +MGES0_SX221 +MGES0_SX311 +MGES0_SX401 +MGES0_SX41 +MGJC0_SI1256 +MGJC0_SI1335 +MGJC0_SI1965 +MGJC0_SX165 +MGJC0_SX255 +MGJC0_SX345 +MGJC0_SX435 +MGJC0_SX75 +MGRL0_SI1497 +MGRL0_SI2127 +MGRL0_SI867 +MGRL0_SX147 +MGRL0_SX237 +MGRL0_SX327 +MGRL0_SX417 +MGRL0_SX57 +MGRP0_SI1317 +MGRP0_SI1947 +MGRP0_SI687 +MGRP0_SX147 +MGRP0_SX237 +MGRP0_SX327 +MGRP0_SX417 +MGRP0_SX57 +MGSH0_SI1176 +MGSH0_SI1806 +MGSH0_SI546 +MGSH0_SX127 +MGSH0_SX186 +MGSH0_SX276 +MGSH0_SX6 +MGSH0_SX96 +MGSL0_SI1164 +MGSL0_SI534 +MGSL0_SI797 +MGSL0_SX174 +MGSL0_SX264 +MGSL0_SX354 +MGSL0_SX444 +MGSL0_SX84 +MGXP0_SI1087 +MGXP0_SI457 +MGXP0_SI525 +MGXP0_SX187 +MGXP0_SX277 +MGXP0_SX367 +MGXP0_SX7 +MGXP0_SX97 +MHBS0_SI1575 +MHBS0_SI2205 +MHBS0_SI945 +MHBS0_SX135 +MHBS0_SX225 +MHBS0_SX315 +MHBS0_SX405 +MHBS0_SX45 +MHIT0_SI1613 +MHIT0_SI2243 +MHIT0_SI983 +MHIT0_SX173 +MHIT0_SX263 +MHIT0_SX353 +MHIT0_SX443 +MHIT0_SX83 +MHJB0_SI1017 +MHJB0_SI1647 +MHJB0_SI2277 +MHJB0_SX117 +MHJB0_SX207 +MHJB0_SX27 +MHJB0_SX297 +MHJB0_SX387 +MHMG0_SI1365 +MHMG0_SI1995 +MHMG0_SI735 +MHMG0_SX105 +MHMG0_SX15 +MHMG0_SX195 +MHMG0_SX285 +MHMG0_SX375 +MHMR0_SI1119 +MHMR0_SI1692 +MHMR0_SI489 +MHMR0_SX129 +MHMR0_SX219 +MHMR0_SX309 +MHMR0_SX39 +MHMR0_SX399 +MHRM0_SI1475 +MHRM0_SI2218 +MHRM0_SI958 +MHRM0_SX148 +MHRM0_SX238 +MHRM0_SX328 +MHRM0_SX418 +MHRM0_SX58 +MHXL0_SI1772 +MHXL0_SI512 +MHXL0_SI612 +MHXL0_SX152 +MHXL0_SX242 +MHXL0_SX332 +MHXL0_SX422 +MHXL0_SX62 +MILB0_SI2163 +MILB0_SI807 +MILB0_SI903 +MILB0_SX183 +MILB0_SX273 +MILB0_SX3 +MILB0_SX363 +MILB0_SX93 +MJAC0_SI1331 +MJAC0_SI2148 +MJAC0_SI701 +MJAC0_SX251 +MJAC0_SX307 +MJAC0_SX341 +MJAC0_SX431 +MJAC0_SX71 +MJAE0_SI1524 +MJAE0_SI1999 +MJAE0_SI2154 +MJAE0_SX174 +MJAE0_SX264 +MJAE0_SX354 +MJAE0_SX444 +MJAE0_SX84 +MJAI0_SI1604 +MJAI0_SI682 +MJAI0_SI710 +MJAI0_SX164 +MJAI0_SX254 +MJAI0_SX344 +MJAI0_SX434 +MJAI0_SX74 +MJBG0_SI1232 +MJBG0_SI1724 +MJBG0_SI1862 +MJBG0_SX152 +MJBG0_SX242 +MJBG0_SX332 +MJBG0_SX422 +MJBG0_SX62 +MJDA0_SI1031 +MJDA0_SI1661 +MJDA0_SI2291 +MJDA0_SX131 +MJDA0_SX221 +MJDA0_SX311 +MJDA0_SX401 +MJDA0_SX41 +MJDC0_SI1161 +MJDC0_SI2165 +MJDC0_SI531 +MJDC0_SX171 +MJDC0_SX261 +MJDC0_SX351 +MJDC0_SX441 +MJDC0_SX81 +MJDE0_SI1120 +MJDE0_SI463 +MJDE0_SI490 +MJDE0_SX130 +MJDE0_SX220 +MJDE0_SX310 +MJDE0_SX40 +MJDE0_SX400 +MJDG0_SI1042 +MJDG0_SI1672 +MJDG0_SI1705 +MJDG0_SX142 +MJDG0_SX232 +MJDG0_SX322 +MJDG0_SX412 +MJDG0_SX52 +MJDM0_SI1340 +MJDM0_SI1937 +MJDM0_SI974 +MJDM0_SX170 +MJDM0_SX260 +MJDM0_SX350 +MJDM0_SX440 +MJDM0_SX80 +MJEB0_SI1286 +MJEB0_SI1916 +MJEB0_SI656 +MJEB0_SX170 +MJEB0_SX206 +MJEB0_SX26 +MJEB0_SX296 +MJEB0_SX386 +MJEB1_SI1467 +MJEB1_SI2097 +MJEB1_SI837 +MJEB1_SX117 +MJEB1_SX207 +MJEB1_SX27 +MJEB1_SX297 +MJEB1_SX387 +MJEE0_SI1237 +MJEE0_SI1867 +MJEE0_SI607 +MJEE0_SX157 +MJEE0_SX247 +MJEE0_SX337 +MJEE0_SX427 +MJEE0_SX67 +MJFH0_SI1107 +MJFH0_SI1737 +MJFH0_SI477 +MJFH0_SX117 +MJFH0_SX207 +MJFH0_SX27 +MJFH0_SX297 +MJFH0_SX387 +MJFR0_SI1605 +MJFR0_SI2235 +MJFR0_SI975 +MJFR0_SX165 +MJFR0_SX255 +MJFR0_SX345 +MJFR0_SX435 +MJFR0_SX75 +MJHI0_SI1328 +MJHI0_SI555 +MJHI0_SI698 +MJHI0_SX158 +MJHI0_SX248 +MJHI0_SX338 +MJHI0_SX428 +MJHI0_SX68 +MJJB0_SI1139 +MJJB0_SI1277 +MJJB0_SI1769 +MJJB0_SX149 +MJJB0_SX239 +MJJB0_SX329 +MJJB0_SX419 +MJJB0_SX59 +MJJJ0_SI1163 +MJJJ0_SI1793 +MJJJ0_SI533 +MJJJ0_SX173 +MJJJ0_SX263 +MJJJ0_SX353 +MJJJ0_SX443 +MJJJ0_SX83 +MJJM0_SI1251 +MJJM0_SI1457 +MJJM0_SI827 +MJJM0_SX107 +MJJM0_SX17 +MJJM0_SX197 +MJJM0_SX287 +MJJM0_SX377 +MJKR0_SI1201 +MJKR0_SI1831 +MJKR0_SI571 +MJKR0_SX121 +MJKR0_SX211 +MJKR0_SX301 +MJKR0_SX31 +MJKR0_SX391 +MJLB0_SI1616 +MJLB0_SI2246 +MJLB0_SI986 +MJLB0_SX176 +MJLB0_SX266 +MJLB0_SX356 +MJLB0_SX446 +MJLB0_SX86 +MJLG1_SI1012 +MJLG1_SI1642 +MJLG1_SI2272 +MJLG1_SX112 +MJLG1_SX202 +MJLG1_SX22 +MJLG1_SX292 +MJLG1_SX382 +MJLS0_SI1096 +MJLS0_SI1726 +MJLS0_SI466 +MJLS0_SX106 +MJLS0_SX16 +MJLS0_SX196 +MJLS0_SX286 +MJLS0_SX376 +MJMA0_SI1495 +MJMA0_SI2125 +MJMA0_SI865 +MJMA0_SX145 +MJMA0_SX235 +MJMA0_SX325 +MJMA0_SX415 +MJMA0_SX55 +MJMD0_SI1028 +MJMD0_SI1658 +MJMD0_SI2288 +MJMD0_SX128 +MJMD0_SX218 +MJMD0_SX308 +MJMD0_SX38 +MJMD0_SX398 +MJMM0_SI1255 +MJMM0_SI1885 +MJMM0_SI625 +MJMM0_SX175 +MJMM0_SX265 +MJMM0_SX355 +MJMM0_SX445 +MJMM0_SX85 +MJPG0_SI1191 +MJPG0_SI1821 +MJPG0_SI561 +MJPG0_SX111 +MJPG0_SX201 +MJPG0_SX21 +MJPG0_SX291 +MJPG0_SX381 +MJPM0_SI1368 +MJPM0_SI1998 +MJPM0_SI738 +MJPM0_SX108 +MJPM0_SX18 +MJPM0_SX198 +MJPM0_SX288 +MJPM0_SX378 +MJPM1_SI1897 +MJPM1_SI2280 +MJPM1_SI761 +MJPM1_SX131 +MJPM1_SX221 +MJPM1_SX311 +MJPM1_SX401 +MJPM1_SX41 +MJRA0_SI1236 +MJRA0_SI1866 +MJRA0_SI606 +MJRA0_SX156 +MJRA0_SX246 +MJRA0_SX336 +MJRA0_SX426 +MJRA0_SX66 +MJRG0_SI1366 +MJRG0_SI1996 +MJRG0_SI736 +MJRG0_SX106 +MJRG0_SX16 +MJRG0_SX286 +MJRG0_SX352 +MJRG0_SX376 +MJRH0_SI1125 +MJRH0_SI1755 +MJRH0_SI1840 +MJRH0_SX135 +MJRH0_SX225 +MJRH0_SX315 +MJRH0_SX405 +MJRH0_SX45 +MJRH1_SI1558 +MJRH1_SI1774 +MJRH1_SI514 +MJRH1_SX154 +MJRH1_SX244 +MJRH1_SX334 +MJRH1_SX424 +MJRH1_SX64 +MJRK0_SI1662 +MJRK0_SI2103 +MJRK0_SI880 +MJRK0_SX160 +MJRK0_SX250 +MJRK0_SX340 +MJRK0_SX430 +MJRK0_SX70 +MJRP0_SI1835 +MJRP0_SI1845 +MJRP0_SI585 +MJRP0_SX135 +MJRP0_SX225 +MJRP0_SX315 +MJRP0_SX405 +MJRP0_SX45 +MJSR0_SI1424 +MJSR0_SI2054 +MJSR0_SI794 +MJSR0_SX164 +MJSR0_SX254 +MJSR0_SX344 +MJSR0_SX434 +MJSR0_SX74 +MJWG0_SI2155 +MJWG0_SI813 +MJWG0_SI895 +MJWG0_SX175 +MJWG0_SX265 +MJWG0_SX355 +MJWG0_SX445 +MJWG0_SX85 +MJWS0_SI1143 +MJWS0_SI1773 +MJWS0_SI513 +MJWS0_SX153 +MJWS0_SX243 +MJWS0_SX333 +MJWS0_SX423 +MJWS0_SX63 +MJWT0_SI1291 +MJWT0_SI1381 +MJWT0_SI751 +MJWT0_SX121 +MJWT0_SX211 +MJWT0_SX301 +MJWT0_SX31 +MJWT0_SX391 +MJXA0_SI1507 +MJXA0_SI2137 +MJXA0_SI877 +MJXA0_SX157 +MJXA0_SX247 +MJXA0_SX337 +MJXA0_SX427 +MJXA0_SX67 +MJXL0_SI1172 +MJXL0_SI1795 +MJXL0_SI542 +MJXL0_SX182 +MJXL0_SX272 +MJXL0_SX362 +MJXL0_SX452 +MJXL0_SX92 +MKAG0_SI1609 +MKAG0_SI2239 +MKAG0_SI979 +MKAG0_SX169 +MKAG0_SX259 +MKAG0_SX30 +MKAG0_SX439 +MKAG0_SX79 +MKAH0_SI1528 +MKAH0_SI2158 +MKAH0_SI898 +MKAH0_SX178 +MKAH0_SX268 +MKAH0_SX358 +MKAH0_SX448 +MKAH0_SX88 +MKAJ0_SI1414 +MKAJ0_SI2044 +MKAJ0_SI784 +MKAJ0_SX154 +MKAJ0_SX244 +MKAJ0_SX334 +MKAJ0_SX424 +MKAJ0_SX64 +MKAM0_SI1250 +MKAM0_SI1316 +MKAM0_SI1465 +MKAM0_SX146 +MKAM0_SX236 +MKAM0_SX326 +MKAM0_SX416 +MKAM0_SX56 +MKDB0_SI2132 +MKDB0_SI588 +MKDB0_SI872 +MKDB0_SX152 +MKDB0_SX242 +MKDB0_SX332 +MKDB0_SX422 +MKDB0_SX62 +MKDD0_SI1567 +MKDD0_SI2197 +MKDD0_SI937 +MKDD0_SX127 +MKDD0_SX217 +MKDD0_SX307 +MKDD0_SX37 +MKDD0_SX397 +MKDT0_SI2153 +MKDT0_SI814 +MKDT0_SI893 +MKDT0_SX173 +MKDT0_SX263 +MKDT0_SX353 +MKDT0_SX443 +MKDT0_SX83 +MKES0_SI1253 +MKES0_SI1883 +MKES0_SI623 +MKES0_SX173 +MKES0_SX263 +MKES0_SX353 +MKES0_SX443 +MKES0_SX83 +MKJO0_SI1517 +MKJO0_SI2147 +MKJO0_SI887 +MKJO0_SX167 +MKJO0_SX257 +MKJO0_SX424 +MKJO0_SX437 +MKJO0_SX77 +MKLN0_SI1598 +MKLN0_SI2228 +MKLN0_SI968 +MKLN0_SX158 +MKLN0_SX248 +MKLN0_SX338 +MKLN0_SX428 +MKLN0_SX68 +MKLR0_SI1059 +MKLR0_SI1689 +MKLR0_SI2319 +MKLR0_SX159 +MKLR0_SX249 +MKLR0_SX339 +MKLR0_SX429 +MKLR0_SX69 +MKLS0_SI1437 +MKLS0_SI1533 +MKLS0_SI2067 +MKLS0_SX177 +MKLS0_SX267 +MKLS0_SX357 +MKLS0_SX447 +MKLS0_SX87 +MKLS1_SI1545 +MKLS1_SI2175 +MKLS1_SI915 +MKLS1_SX105 +MKLS1_SX15 +MKLS1_SX195 +MKLS1_SX285 +MKLS1_SX375 +MKLW0_SI1571 +MKLW0_SI1844 +MKLW0_SI2201 +MKLW0_SX131 +MKLW0_SX221 +MKLW0_SX311 +MKLW0_SX401 +MKLW0_SX41 +MKRG0_SI1491 +MKRG0_SI2121 +MKRG0_SI861 +MKRG0_SX141 +MKRG0_SX231 +MKRG0_SX31 +MKRG0_SX411 +MKRG0_SX51 +MKXL0_SI1185 +MKXL0_SI1815 +MKXL0_SI1958 +MKXL0_SX105 +MKXL0_SX15 +MKXL0_SX195 +MKXL0_SX285 +MKXL0_SX375 +MLBC0_SI1239 +MLBC0_SI1869 +MLBC0_SI609 +MLBC0_SX159 +MLBC0_SX249 +MLBC0_SX339 +MLBC0_SX429 +MLBC0_SX69 +MLEL0_SI1246 +MLEL0_SI1876 +MLEL0_SI616 +MLEL0_SX166 +MLEL0_SX256 +MLEL0_SX346 +MLEL0_SX436 +MLEL0_SX76 +MLJC0_SI1225 +MLJC0_SI1855 +MLJC0_SI595 +MLJC0_SX145 +MLJC0_SX235 +MLJC0_SX325 +MLJC0_SX415 +MLJC0_SX55 +MLJH0_SI1324 +MLJH0_SI1422 +MLJH0_SI694 +MLJH0_SX154 +MLJH0_SX244 +MLJH0_SX334 +MLJH0_SX424 +MLJH0_SX64 +MLNS0_SI1407 +MLNS0_SI2037 +MLNS0_SI777 +MLNS0_SX147 +MLNS0_SX237 +MLNS0_SX327 +MLNS0_SX417 +MLNS0_SX57 +MLSH0_SI1417 +MLSH0_SI2047 +MLSH0_SI787 +MLSH0_SX157 +MLSH0_SX247 +MLSH0_SX337 +MLSH0_SX427 +MLSH0_SX67 +MMAA0_SI1588 +MMAA0_SI2105 +MMAA0_SI845 +MMAA0_SX125 +MMAA0_SX215 +MMAA0_SX305 +MMAA0_SX35 +MMAA0_SX395 +MMAB1_SI1494 +MMAB1_SI2124 +MMAB1_SI864 +MMAB1_SX144 +MMAB1_SX234 +MMAB1_SX324 +MMAB1_SX414 +MMAB1_SX54 +MMAG0_SI1126 +MMAG0_SI1756 +MMAG0_SI496 +MMAG0_SX136 +MMAG0_SX226 +MMAG0_SX316 +MMAG0_SX406 +MMAG0_SX46 +MMAM0_SI1597 +MMAM0_SI1668 +MMAM0_SI2227 +MMAM0_SX157 +MMAM0_SX247 +MMAM0_SX337 +MMAM0_SX427 +MMAM0_SX67 +MMAR0_SI1336 +MMAR0_SI1966 +MMAR0_SI706 +MMAR0_SX166 +MMAR0_SX256 +MMAR0_SX346 +MMAR0_SX436 +MMAR0_SX76 +MMBS0_SI1151 +MMBS0_SI1781 +MMBS0_SI521 +MMBS0_SX161 +MMBS0_SX251 +MMBS0_SX341 +MMBS0_SX431 +MMBS0_SX71 +MMCC0_SI1338 +MMCC0_SI1968 +MMCC0_SI708 +MMCC0_SX168 +MMCC0_SX258 +MMCC0_SX348 +MMCC0_SX438 +MMCC0_SX78 +MMDB0_SI1358 +MMDB0_SI1617 +MMDB0_SI987 +MMDB0_SX177 +MMDB0_SX267 +MMDB0_SX357 +MMDB0_SX447 +MMDB0_SX87 +MMDG0_SI1780 +MMDG0_SI2035 +MMDG0_SI520 +MMDG0_SX160 +MMDG0_SX250 +MMDG0_SX340 +MMDG0_SX430 +MMDG0_SX70 +MMDM0_SI1311 +MMDM0_SI1941 +MMDM0_SI681 +MMDM0_SX141 +MMDM0_SX231 +MMDM0_SX321 +MMDM0_SX411 +MMDM0_SX51 +MMDM1_SI1650 +MMDM1_SI2043 +MMDM1_SI783 +MMDM1_SX153 +MMDM1_SX243 +MMDM1_SX333 +MMDM1_SX423 +MMDM1_SX63 +MMDS0_SI1343 +MMDS0_SI1973 +MMDS0_SI713 +MMDS0_SX173 +MMDS0_SX263 +MMDS0_SX353 +MMDS0_SX443 +MMDS0_SX83 +MMEA0_SI1388 +MMEA0_SI2018 +MMEA0_SI758 +MMEA0_SX128 +MMEA0_SX218 +MMEA0_SX308 +MMEA0_SX38 +MMEA0_SX398 +MMEB0_SI1357 +MMEB0_SI1987 +MMEB0_SI727 +MMEB0_SX187 +MMEB0_SX327 +MMEB0_SX367 +MMEB0_SX7 +MMEB0_SX97 +MMGC0_SI1305 +MMGC0_SI1935 +MMGC0_SI2184 +MMGC0_SX135 +MMGC0_SX225 +MMGC0_SX315 +MMGC0_SX405 +MMGC0_SX45 +MMGG0_SI1079 +MMGG0_SI1709 +MMGG0_SI2339 +MMGG0_SX179 +MMGG0_SX269 +MMGG0_SX359 +MMGG0_SX449 +MMGG0_SX89 +MMGK0_SI1322 +MMGK0_SI1952 +MMGK0_SI692 +MMGK0_SX152 +MMGK0_SX242 +MMGK0_SX332 +MMGK0_SX422 +MMGK0_SX62 +MMJB1_SI1408 +MMJB1_SI2038 +MMJB1_SI778 +MMJB1_SX148 +MMJB1_SX238 +MMJB1_SX328 +MMJB1_SX418 +MMJB1_SX58 +MMLM0_SI1527 +MMLM0_SI2150 +MMLM0_SI897 +MMLM0_SX177 +MMLM0_SX267 +MMLM0_SX357 +MMLM0_SX447 +MMLM0_SX87 +MMPM0_SI1061 +MMPM0_SI1691 +MMPM0_SI2321 +MMPM0_SX161 +MMPM0_SX251 +MMPM0_SX341 +MMPM0_SX431 +MMPM0_SX71 +MMRP0_SI2034 +MMRP0_SI717 +MMRP0_SI774 +MMRP0_SX144 +MMRP0_SX234 +MMRP0_SX324 +MMRP0_SX414 +MMRP0_SX54 +MMSM0_SI1106 +MMSM0_SI1736 +MMSM0_SI476 +MMSM0_SX116 +MMSM0_SX206 +MMSM0_SX26 +MMSM0_SX296 +MMSM0_SX386 +MMVP0_SI1284 +MMVP0_SI1914 +MMVP0_SI654 +MMVP0_SX114 +MMVP0_SX204 +MMVP0_SX294 +MMVP0_SX347 +MMVP0_SX384 +MMWB0_SI1619 +MMWB0_SI2249 +MMWB0_SI989 +MMWB0_SX179 +MMWB0_SX269 +MMWB0_SX359 +MMWB0_SX449 +MMWB0_SX89 +MMWS0_SI1518 +MMWS0_SI559 +MMWS0_SI888 +MMWS0_SX168 +MMWS0_SX258 +MMWS0_SX348 +MMWS0_SX438 +MMWS0_SX78 +MMWS1_SI1071 +MMWS1_SI1701 +MMWS1_SI2331 +MMWS1_SX261 +MMWS1_SX27 +MMWS1_SX351 +MMWS1_SX441 +MMWS1_SX81 +MMXS0_SI2136 +MMXS0_SI629 +MMXS0_SI876 +MMXS0_SX156 +MMXS0_SX246 +MMXS0_SX336 +MMXS0_SX426 +MMXS0_SX66 +MNET0_SI1446 +MNET0_SI2076 +MNET0_SI816 +MNET0_SX186 +MNET0_SX276 +MNET0_SX366 +MNET0_SX6 +MNET0_SX96 +MNTW0_SI1068 +MNTW0_SI1698 +MNTW0_SI2328 +MNTW0_SX168 +MNTW0_SX202 +MNTW0_SX258 +MNTW0_SX348 +MNTW0_SX78 +MPAR0_SI1576 +MPAR0_SI2206 +MPAR0_SI946 +MPAR0_SX136 +MPAR0_SX226 +MPAR0_SX316 +MPAR0_SX406 +MPAR0_SX46 +MPEB0_SI1034 +MPEB0_SI1860 +MPEB0_SI600 +MPEB0_SX150 +MPEB0_SX240 +MPEB0_SX330 +MPEB0_SX420 +MPEB0_SX60 +MPFU0_SI1258 +MPFU0_SI1888 +MPFU0_SI628 +MPFU0_SX178 +MPFU0_SX268 +MPFU0_SX358 +MPFU0_SX448 +MPFU0_SX88 +MPGH0_SI1554 +MPGH0_SI675 +MPGH0_SI924 +MPGH0_SX114 +MPGH0_SX204 +MPGH0_SX24 +MPGH0_SX294 +MPGH0_SX384 +MPGR0_SI1410 +MPGR0_SI2040 +MPGR0_SI780 +MPGR0_SX150 +MPGR0_SX240 +MPGR0_SX330 +MPGR0_SX420 +MPGR0_SX60 +MPGR1_SI1269 +MPGR1_SI1499 +MPGR1_SI2129 +MPGR1_SX149 +MPGR1_SX239 +MPGR1_SX329 +MPGR1_SX419 +MPGR1_SX59 +MPMB0_SI1501 +MPMB0_SI2131 +MPMB0_SI871 +MPMB0_SX151 +MPMB0_SX241 +MPMB0_SX331 +MPMB0_SX421 +MPMB0_SX61 +MPPC0_SI1412 +MPPC0_SI2042 +MPPC0_SI782 +MPPC0_SX152 +MPPC0_SX242 +MPPC0_SX332 +MPPC0_SX422 +MPPC0_SX62 +MPRB0_SI1205 +MPRB0_SI1215 +MPRB0_SI575 +MPRB0_SX125 +MPRB0_SX215 +MPRB0_SX305 +MPRB0_SX35 +MPRB0_SX395 +MPRD0_SI1431 +MPRD0_SI2061 +MPRD0_SI801 +MPRD0_SX171 +MPRD0_SX261 +MPRD0_SX351 +MPRD0_SX441 +MPRD0_SX81 +MPRK0_SI1097 +MPRK0_SI1727 +MPRK0_SI467 +MPRK0_SX107 +MPRK0_SX17 +MPRK0_SX197 +MPRK0_SX287 +MPRK0_SX377 +MPRT0_SI1210 +MPRT0_SI495 +MPRT0_SI580 +MPRT0_SX130 +MPRT0_SX220 +MPRT0_SX310 +MPRT0_SX40 +MPRT0_SX400 +MPSW0_SI1067 +MPSW0_SI1697 +MPSW0_SI2327 +MPSW0_SX167 +MPSW0_SX24 +MPSW0_SX257 +MPSW0_SX437 +MPSW0_SX77 +MRAB0_SI1224 +MRAB0_SI1854 +MRAB0_SI594 +MRAB0_SX144 +MRAB0_SX234 +MRAB0_SX324 +MRAB0_SX414 +MRAB0_SX54 +MRAB1_SI1478 +MRAB1_SI2108 +MRAB1_SI848 +MRAB1_SX128 +MRAB1_SX218 +MRAB1_SX308 +MRAB1_SX38 +MRAB1_SX398 +MRAI0_SI1954 +MRAI0_SI2052 +MRAI0_SI792 +MRAI0_SX162 +MRAI0_SX252 +MRAI0_SX342 +MRAI0_SX432 +MRAI0_SX72 +MRAM0_SI1275 +MRAM0_SI1905 +MRAM0_SI1951 +MRAM0_SX105 +MRAM0_SX15 +MRAM0_SX195 +MRAM0_SX285 +MRAM0_SX375 +MRAV0_SI1008 +MRAV0_SI1638 +MRAV0_SI2268 +MRAV0_SX108 +MRAV0_SX18 +MRAV0_SX198 +MRAV0_SX288 +MRAV0_SX378 +MRBC0_SI1665 +MRBC0_SI1859 +MRBC0_SI599 +MRBC0_SX149 +MRBC0_SX239 +MRBC0_SX329 +MRBC0_SX419 +MRBC0_SX59 +MRCG0_SI1428 +MRCG0_SI2058 +MRCG0_SI798 +MRCG0_SX168 +MRCG0_SX258 +MRCG0_SX348 +MRCG0_SX438 +MRCG0_SX78 +MRCW0_SI1371 +MRCW0_SI2001 +MRCW0_SI741 +MRCW0_SX111 +MRCW0_SX201 +MRCW0_SX21 +MRCW0_SX291 +MRCW0_SX381 +MRDD0_SI1050 +MRDD0_SI1680 +MRDD0_SI2310 +MRDD0_SX150 +MRDD0_SX240 +MRDD0_SX277 +MRDD0_SX330 +MRDD0_SX60 +MRDM0_SI1044 +MRDM0_SI1595 +MRDM0_SI965 +MRDM0_SX155 +MRDM0_SX245 +MRDM0_SX335 +MRDM0_SX425 +MRDM0_SX65 +MRDS0_SI1167 +MRDS0_SI1797 +MRDS0_SI537 +MRDS0_SX177 +MRDS0_SX267 +MRDS0_SX357 +MRDS0_SX447 +MRDS0_SX87 +MREE0_SI1104 +MREE0_SI1734 +MREE0_SI1959 +MREE0_SX114 +MREE0_SX204 +MREE0_SX24 +MREE0_SX294 +MREE0_SX384 +MREH1_SI1599 +MREH1_SI2229 +MREH1_SI969 +MREH1_SX159 +MREH1_SX249 +MREH1_SX339 +MREH1_SX429 +MREH1_SX69 +MREM0_SI1591 +MREM0_SI511 +MREM0_SI961 +MREM0_SX151 +MREM0_SX241 +MREM0_SX331 +MREM0_SX421 +MREM0_SX61 +MREW1_SI1500 +MREW1_SI2130 +MREW1_SI870 +MREW1_SX150 +MREW1_SX240 +MREW1_SX330 +MREW1_SX420 +MREW1_SX60 +MRFK0_SI1076 +MRFK0_SI1706 +MRFK0_SI2336 +MRFK0_SX176 +MRFK0_SX266 +MRFK0_SX356 +MRFK0_SX446 +MRFK0_SX86 +MRFL0_SI1156 +MRFL0_SI1786 +MRFL0_SI526 +MRFL0_SX166 +MRFL0_SX256 +MRFL0_SX346 +MRFL0_SX436 +MRFL0_SX76 +MRGM0_SI1162 +MRGM0_SI1792 +MRGM0_SI532 +MRGM0_SX172 +MRGM0_SX262 +MRGM0_SX416 +MRGM0_SX442 +MRGM0_SX82 +MRGS0_SI1356 +MRGS0_SI1986 +MRGS0_SI726 +MRGS0_SX186 +MRGS0_SX276 +MRGS0_SX366 +MRGS0_SX6 +MRGS0_SX96 +MRHL0_SI1515 +MRHL0_SI2145 +MRHL0_SI885 +MRHL0_SX165 +MRHL0_SX255 +MRHL0_SX345 +MRHL0_SX435 +MRHL0_SX75 +MRJB1_SI1020 +MRJB1_SI1413 +MRJB1_SI2021 +MRJB1_SX120 +MRJB1_SX210 +MRJB1_SX30 +MRJB1_SX300 +MRJB1_SX390 +MRJH0_SI1519 +MRJH0_SI889 +MRJH0_SI914 +MRJH0_SX169 +MRJH0_SX259 +MRJH0_SX307 +MRJH0_SX439 +MRJH0_SX79 +MRJM0_SI1095 +MRJM0_SI1228 +MRJM0_SI1858 +MRJM0_SX148 +MRJM0_SX238 +MRJM0_SX328 +MRJM0_SX418 +MRJM0_SX58 +MRJM1_SI1298 +MRJM1_SI1928 +MRJM1_SI668 +MRJM1_SX128 +MRJM1_SX218 +MRJM1_SX308 +MRJM1_SX38 +MRJM1_SX398 +MRJT0_SI1498 +MRJT0_SI1805 +MRJT0_SI868 +MRJT0_SX148 +MRJT0_SX238 +MRJT0_SX328 +MRJT0_SX418 +MRJT0_SX58 +MRKM0_SI1267 +MRKM0_SI1391 +MRKM0_SI637 +MRKM0_SX187 +MRKM0_SX277 +MRKM0_SX367 +MRKM0_SX7 +MRKM0_SX97 +MRLD0_SI1594 +MRLD0_SI2224 +MRLD0_SI964 +MRLD0_SX154 +MRLD0_SX244 +MRLD0_SX334 +MRLD0_SX424 +MRLD0_SX64 +MRLJ0_SI1420 +MRLJ0_SI2050 +MRLJ0_SI790 +MRLJ0_SX160 +MRLJ0_SX250 +MRLJ0_SX340 +MRLJ0_SX430 +MRLJ0_SX70 +MRLJ1_SI1671 +MRLJ1_SI2301 +MRLJ1_SI2332 +MRLJ1_SX141 +MRLJ1_SX231 +MRLJ1_SX321 +MRLJ1_SX411 +MRLJ1_SX51 +MRLK0_SI1468 +MRLK0_SI2140 +MRLK0_SI843 +MRLK0_SX123 +MRLK0_SX213 +MRLK0_SX303 +MRLK0_SX33 +MRLK0_SX393 +MRLR0_SI1196 +MRLR0_SI1826 +MRLR0_SI566 +MRLR0_SX116 +MRLR0_SX206 +MRLR0_SX26 +MRLR0_SX296 +MRLR0_SX386 +MRMB0_SI1581 +MRMB0_SI2211 +MRMB0_SI951 +MRMB0_SX141 +MRMB0_SX231 +MRMB0_SX321 +MRMB0_SX411 +MRMB0_SX51 +MRMG0_SI1080 +MRMG0_SI1710 +MRMG0_SI2340 +MRMG0_SX180 +MRMG0_SX270 +MRMG0_SX360 +MRMG0_SX450 +MRMG0_SX90 +MRMH0_SI1021 +MRMH0_SI1349 +MRMH0_SI2281 +MRMH0_SX121 +MRMH0_SX211 +MRMH0_SX301 +MRMH0_SX31 +MRMH0_SX391 +MRML0_SI1421 +MRML0_SI2051 +MRML0_SI791 +MRML0_SX161 +MRML0_SX251 +MRML0_SX341 +MRML0_SX431 +MRML0_SX71 +MRMS0_SI1113 +MRMS0_SI2057 +MRMS0_SI2100 +MRMS0_SX120 +MRMS0_SX210 +MRMS0_SX30 +MRMS0_SX300 +MRMS0_SX390 +MRPC1_SI1482 +MRPC1_SI2026 +MRPC1_SI2112 +MRPC1_SX132 +MRPC1_SX222 +MRPC1_SX312 +MRPC1_SX402 +MRPC1_SX42 +MRRE0_SI1334 +MRRE0_SI704 +MRRE0_SI952 +MRRE0_SX164 +MRRE0_SX254 +MRRE0_SX344 +MRRE0_SX434 +MRRE0_SX74 +MRSO0_SI1206 +MRSO0_SI1659 +MRSO0_SI2289 +MRSO0_SX129 +MRSO0_SX219 +MRSO0_SX309 +MRSO0_SX39 +MRSO0_SX399 +MRSP0_SI1429 +MRSP0_SI2059 +MRSP0_SI799 +MRSP0_SX169 +MRSP0_SX196 +MRSP0_SX259 +MRSP0_SX439 +MRSP0_SX79 +MRTC0_SI1458 +MRTC0_SI2088 +MRTC0_SI828 +MRTC0_SX108 +MRTC0_SX18 +MRTC0_SX198 +MRTC0_SX288 +MRTC0_SX378 +MRTJ0_SI1551 +MRTJ0_SI2032 +MRTJ0_SI772 +MRTJ0_SX142 +MRTJ0_SX232 +MRTJ0_SX322 +MRTJ0_SX412 +MRTJ0_SX52 +MRVG0_SI1140 +MRVG0_SI1770 +MRVG0_SI510 +MRVG0_SX150 +MRVG0_SX240 +MRVG0_SX330 +MRVG0_SX420 +MRVG0_SX60 +MRWA0_SI1603 +MRWA0_SI2233 +MRWA0_SI973 +MRWA0_SX163 +MRWA0_SX253 +MRWA0_SX343 +MRWA0_SX433 +MRWA0_SX73 +MRWS0_SI1102 +MRWS0_SI1732 +MRWS0_SI472 +MRWS0_SX112 +MRWS0_SX202 +MRWS0_SX22 +MRWS0_SX292 +MRWS0_SX382 +MRXB0_SI1585 +MRXB0_SI2215 +MRXB0_SI955 +MRXB0_SX145 +MRXB0_SX235 +MRXB0_SX325 +MRXB0_SX415 +MRXB0_SX55 +MSAH1_SI1049 +MSAH1_SI1679 +MSAH1_SI2309 +MSAH1_SX149 +MSAH1_SX239 +MSAH1_SX329 +MSAH1_SX419 +MSAH1_SX59 +MSAS0_SI1376 +MSAS0_SI2006 +MSAS0_SI746 +MSAS0_SX116 +MSAS0_SX206 +MSAS0_SX26 +MSAS0_SX296 +MSAS0_SX386 +MSAT0_SI1526 +MSAT0_SI2156 +MSAT0_SI896 +MSAT0_SX176 +MSAT0_SX266 +MSAT0_SX356 +MSAT0_SX446 +MSAT0_SX86 +MSAT1_SI1073 +MSAT1_SI1703 +MSAT1_SI2333 +MSAT1_SX173 +MSAT1_SX263 +MSAT1_SX353 +MSAT1_SX443 +MSAT1_SX83 +MSDB0_SI1007 +MSDB0_SI1637 +MSDB0_SI2267 +MSDB0_SX107 +MSDB0_SX17 +MSDB0_SX197 +MSDB0_SX287 +MSDB0_SX377 +MSDH0_SI2113 +MSDH0_SI2240 +MSDH0_SI980 +MSDH0_SX170 +MSDH0_SX260 +MSDH0_SX350 +MSDH0_SX440 +MSDH0_SX80 +MSDS0_SI1077 +MSDS0_SI1707 +MSDS0_SI2337 +MSDS0_SX177 +MSDS0_SX267 +MSDS0_SX357 +MSDS0_SX447 +MSDS0_SX87 +MSEM1_SI1440 +MSEM1_SI2070 +MSEM1_SI810 +MSEM1_SX180 +MSEM1_SX270 +MSEM1_SX360 +MSEM1_SX450 +MSEM1_SX90 +MSES0_SI1589 +MSES0_SI2216 +MSES0_SI2219 +MSES0_SX149 +MSES0_SX239 +MSES0_SX329 +MSES0_SX419 +MSES0_SX59 +MSFH0_SI1216 +MSFH0_SI1738 +MSFH0_SI586 +MSFH0_SX136 +MSFH0_SX226 +MSFH0_SX316 +MSFH0_SX406 +MSFH0_SX46 +MSFV0_SI1262 +MSFV0_SI1892 +MSFV0_SI632 +MSFV0_SX182 +MSFV0_SX272 +MSFV0_SX362 +MSFV0_SX452 +MSFV0_SX92 +MSJK0_SI1596 +MSJK0_SI2226 +MSJK0_SI966 +MSJK0_SX156 +MSJK0_SX246 +MSJK0_SX336 +MSJK0_SX426 +MSJK0_SX66 +MSMC0_SI1907 +MSMC0_SI509 +MSMC0_SI647 +MSMC0_SX107 +MSMC0_SX17 +MSMC0_SX197 +MSMC0_SX287 +MSMC0_SX377 +MSMR0_SI1150 +MSMR0_SI1405 +MSMR0_SI775 +MSMR0_SX145 +MSMR0_SX235 +MSMR0_SX325 +MSMR0_SX415 +MSMR0_SX55 +MSMS0_SI1433 +MSMS0_SI2063 +MSMS0_SI803 +MSMS0_SX173 +MSMS0_SX263 +MSMS0_SX353 +MSMS0_SX443 +MSMS0_SX83 +MSRG0_SI1221 +MSRG0_SI1851 +MSRG0_SI591 +MSRG0_SX141 +MSRG0_SX231 +MSRG0_SX321 +MSRG0_SX411 +MSRG0_SX51 +MSRR0_SI1131 +MSRR0_SI1761 +MSRR0_SI501 +MSRR0_SX141 +MSRR0_SX231 +MSRR0_SX30 +MSRR0_SX411 +MSRR0_SX51 +MSTF0_SI1396 +MSTF0_SI766 +MSTF0_SI852 +MSTF0_SX136 +MSTF0_SX226 +MSTF0_SX316 +MSTF0_SX406 +MSTF0_SX46 +MSVS0_SI1568 +MSVS0_SI2198 +MSVS0_SI938 +MSVS0_SX128 +MSVS0_SX218 +MSVS0_SX308 +MSVS0_SX38 +MSVS0_SX398 +MTAB0_SI1572 +MTAB0_SI2202 +MTAB0_SI942 +MTAB0_SX132 +MTAB0_SX222 +MTAB0_SX312 +MTAB0_SX402 +MTAB0_SX42 +MTAS0_SI1385 +MTAS0_SI2015 +MTAS0_SI755 +MTAS0_SX125 +MTAS0_SX215 +MTAS0_SX305 +MTAS0_SX35 +MTAS0_SX395 +MTAT0_SI1110 +MTAT0_SI1740 +MTAT0_SI811 +MTAT0_SX120 +MTAT0_SX210 +MTAT0_SX30 +MTAT0_SX300 +MTAT0_SX390 +MTAT1_SI1409 +MTAT1_SI1627 +MTAT1_SI779 +MTAT1_SX149 +MTAT1_SX239 +MTAT1_SX329 +MTAT1_SX419 +MTAT1_SX59 +MTBC0_SI1173 +MTBC0_SI1803 +MTBC0_SI543 +MTBC0_SX183 +MTBC0_SX273 +MTBC0_SX347 +MTBC0_SX363 +MTBC0_SX93 +MTCS0_SI1972 +MTCS0_SI2265 +MTCS0_SI712 +MTCS0_SX172 +MTCS0_SX262 +MTCS0_SX352 +MTCS0_SX442 +MTCS0_SX82 +MTDB0_SI1401 +MTDB0_SI2031 +MTDB0_SI771 +MTDB0_SX141 +MTDB0_SX231 +MTDB0_SX321 +MTDB0_SX411 +MTDB0_SX51 +MTDP0_SI1274 +MTDP0_SI1521 +MTDP0_SI2151 +MTDP0_SX171 +MTDP0_SX261 +MTDP0_SX351 +MTDP0_SX441 +MTDP0_SX81 +MTER0_SI1157 +MTER0_SI1787 +MTER0_SI527 +MTER0_SX167 +MTER0_SX17 +MTER0_SX257 +MTER0_SX437 +MTER0_SX77 +MTJG0_SI1520 +MTJG0_SI2157 +MTJG0_SI890 +MTJG0_SX170 +MTJG0_SX260 +MTJG0_SX350 +MTJG0_SX440 +MTJG0_SX80 +MTJM0_SI1226 +MTJM0_SI1856 +MTJM0_SI655 +MTJM0_SX146 +MTJM0_SX236 +MTJM0_SX326 +MTJM0_SX416 +MTJM0_SX56 +MTJS0_SI1192 +MTJS0_SI1822 +MTJS0_SI562 +MTJS0_SX112 +MTJS0_SX202 +MTJS0_SX22 +MTJS0_SX292 +MTJS0_SX382 +MTJU0_SI2020 +MTJU0_SI2269 +MTJU0_SI760 +MTJU0_SX130 +MTJU0_SX220 +MTJU0_SX310 +MTJU0_SX40 +MTJU0_SX400 +MTKD0_SI1187 +MTKD0_SI1817 +MTKD0_SI630 +MTKD0_SX107 +MTKD0_SX17 +MTKD0_SX197 +MTKD0_SX287 +MTKD0_SX377 +MTKP0_SI1023 +MTKP0_SI2283 +MTKP0_SI454 +MTKP0_SX123 +MTKP0_SX213 +MTKP0_SX303 +MTKP0_SX33 +MTKP0_SX393 +MTLB0_SI1134 +MTLB0_SI1764 +MTLB0_SI504 +MTLB0_SX144 +MTLB0_SX234 +MTLB0_SX324 +MTLB0_SX414 +MTLB0_SX54 +MTLC0_SI1313 +MTLC0_SI1477 +MTLC0_SI847 +MTLC0_SX127 +MTLC0_SX217 +MTLC0_SX307 +MTLC0_SX37 +MTLC0_SX397 +MTML0_SI1065 +MTML0_SI1695 +MTML0_SI2325 +MTML0_SX165 +MTML0_SX255 +MTML0_SX345 +MTML0_SX435 +MTML0_SX75 +MTMN0_SI1064 +MTMN0_SI2324 +MTMN0_SI582 +MTMN0_SX164 +MTMN0_SX254 +MTMN0_SX344 +MTMN0_SX434 +MTMN0_SX74 +MTMT0_SI1118 +MTMT0_SI1748 +MTMT0_SI488 +MTMT0_SX128 +MTMT0_SX218 +MTMT0_SX308 +MTMT0_SX38 +MTMT0_SX398 +MTPF0_SI1235 +MTPF0_SI1865 +MTPF0_SI605 +MTPF0_SX155 +MTPF0_SX245 +MTPF0_SX335 +MTPF0_SX425 +MTPF0_SX65 +MTPG0_SI1383 +MTPG0_SI2013 +MTPG0_SI753 +MTPG0_SX123 +MTPG0_SX213 +MTPG0_SX303 +MTPG0_SX33 +MTPG0_SX393 +MTPP0_SI1508 +MTPP0_SI2138 +MTPP0_SI878 +MTPP0_SX158 +MTPP0_SX248 +MTPP0_SX338 +MTPP0_SX428 +MTPP0_SX68 +MTPR0_SI1600 +MTPR0_SI2230 +MTPR0_SI506 +MTPR0_SX160 +MTPR0_SX250 +MTPR0_SX340 +MTPR0_SX430 +MTPR0_SX70 +MTQC0_SI1441 +MTQC0_SI2071 +MTQC0_SI480 +MTQC0_SX181 +MTQC0_SX271 +MTQC0_SX361 +MTQC0_SX451 +MTQC0_SX91 +MTRC0_SI1623 +MTRC0_SI589 +MTRC0_SI993 +MTRC0_SX170 +MTRC0_SX183 +MTRC0_SX273 +MTRC0_SX363 +MTRC0_SX93 +MTRR0_SI1548 +MTRR0_SI2178 +MTRR0_SI918 +MTRR0_SX108 +MTRR0_SX18 +MTRR0_SX198 +MTRR0_SX288 +MTRR0_SX378 +MTRT0_SI1227 +MTRT0_SI1857 +MTRT0_SI597 +MTRT0_SX147 +MTRT0_SX237 +MTRT0_SX254 +MTRT0_SX417 +MTRT0_SX57 +MTWH1_SI1512 +MTWH1_SI2142 +MTWH1_SI882 +MTWH1_SX162 +MTWH1_SX252 +MTWH1_SX342 +MTWH1_SX432 +MTWH1_SX72 +MTXS0_SI1060 +MTXS0_SI1690 +MTXS0_SI2320 +MTXS0_SX160 +MTXS0_SX250 +MTXS0_SX340 +MTXS0_SX430 +MTXS0_SX70 +MVJH0_SI1556 +MVJH0_SI2186 +MVJH0_SI926 +MVJH0_SX116 +MVJH0_SX206 +MVJH0_SX26 +MVJH0_SX296 +MVJH0_SX386 +MVLO0_SI1147 +MVLO0_SI1777 +MVLO0_SI517 +MVLO0_SX157 +MVLO0_SX247 +MVLO0_SX337 +MVLO0_SX427 +MVLO0_SX67 +MVRW0_SI1485 +MVRW0_SI2115 +MVRW0_SI855 +MVRW0_SX135 +MVRW0_SX225 +MVRW0_SX315 +MVRW0_SX405 +MVRW0_SX45 +MWAC0_SI1601 +MWAC0_SI2231 +MWAC0_SI971 +MWAC0_SX161 +MWAC0_SX251 +MWAC0_SX341 +MWAC0_SX431 +MWAC0_SX71 +MWAD0_SI1062 +MWAD0_SI1749 +MWAD0_SI2322 +MWAD0_SX162 +MWAD0_SX252 +MWAD0_SX342 +MWAD0_SX432 +MWAD0_SX72 +MWAR0_SI1045 +MWAR0_SI1675 +MWAR0_SI2305 +MWAR0_SX145 +MWAR0_SX235 +MWAR0_SX325 +MWAR0_SX415 +MWAR0_SX55 +MWCH0_SI1622 +MWCH0_SI1895 +MWCH0_SI2252 +MWCH0_SX182 +MWCH0_SX272 +MWCH0_SX362 +MWCH0_SX452 +MWCH0_SX92 +MWDK0_SI1436 +MWDK0_SI2017 +MWDK0_SI806 +MWDK0_SX176 +MWDK0_SX266 +MWDK0_SX356 +MWDK0_SX446 +MWDK0_SX86 +MWEM0_SI1320 +MWEM0_SI1393 +MWEM0_SI1950 +MWEM0_SX150 +MWEM0_SX240 +MWEM0_SX330 +MWEM0_SX420 +MWEM0_SX60 +MWGR0_SI1606 +MWGR0_SI2236 +MWGR0_SI976 +MWGR0_SX166 +MWGR0_SX256 +MWGR0_SX346 +MWGR0_SX436 +MWGR0_SX76 +MWRE0_SI1057 +MWRE0_SI1687 +MWRE0_SI2317 +MWRE0_SX157 +MWRE0_SX247 +MWRE0_SX337 +MWRE0_SX427 +MWRE0_SX67 +MWRP0_SI1443 +MWRP0_SI1525 +MWRP0_SI2073 +MWRP0_SX183 +MWRP0_SX273 +MWRP0_SX3 +MWRP0_SX363 +MWRP0_SX93 +MWSB0_SI1626 +MWSB0_SI2256 +MWSB0_SI996 +MWSB0_SX186 +MWSB0_SX276 +MWSB0_SX366 +MWSB0_SX6 +MWSB0_SX96 +MWSH0_SI1426 +MWSH0_SI2266 +MWSH0_SI796 +MWSH0_SX166 +MWSH0_SX256 +MWSH0_SX346 +MWSH0_SX436 +MWSH0_SX76 +MZMB0_SI1166 +MZMB0_SI1796 +MZMB0_SI536 +MZMB0_SX176 +MZMB0_SX266 +MZMB0_SX356 +MZMB0_SX446 +MZMB0_SX86 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid new file mode 100644 index 0000000..ab5ef38 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid @@ -0,0 +1,400 @@ +FADG0_SI1279 +FADG0_SI1909 +FADG0_SI649 +FADG0_SX109 +FADG0_SX19 +FADG0_SX199 +FADG0_SX289 +FADG0_SX379 +FAKS0_SI1573 +FAKS0_SI2203 +FAKS0_SI943 +FAKS0_SX133 +FAKS0_SX223 +FAKS0_SX313 +FAKS0_SX403 +FAKS0_SX43 +FCAL1_SI1403 +FCAL1_SI2033 +FCAL1_SI773 +FCAL1_SX143 +FCAL1_SX233 +FCAL1_SX323 +FCAL1_SX413 +FCAL1_SX53 +FCMH0_SI1454 +FCMH0_SI2084 +FCMH0_SI824 +FCMH0_SX104 +FCMH0_SX14 +FCMH0_SX194 +FCMH0_SX284 +FCMH0_SX374 +FDAC1_SI1474 +FDAC1_SI2104 +FDAC1_SI844 +FDAC1_SX124 +FDAC1_SX214 +FDAC1_SX304 +FDAC1_SX34 +FDAC1_SX394 +FDMS0_SI1218 +FDMS0_SI1502 +FDMS0_SI1848 +FDMS0_SX138 +FDMS0_SX228 +FDMS0_SX318 +FDMS0_SX408 +FDMS0_SX48 +FDRW0_SI1283 +FDRW0_SI1423 +FDRW0_SI653 +FDRW0_SX113 +FDRW0_SX203 +FDRW0_SX23 +FDRW0_SX293 +FDRW0_SX383 +FEDW0_SI1084 +FEDW0_SI1653 +FEDW0_SI1714 +FEDW0_SX184 +FEDW0_SX274 +FEDW0_SX364 +FEDW0_SX4 +FEDW0_SX94 +FGJD0_SI1179 +FGJD0_SI549 +FGJD0_SI818 +FGJD0_SX189 +FGJD0_SX279 +FGJD0_SX369 +FGJD0_SX9 +FGJD0_SX99 +FJEM0_SI1264 +FJEM0_SI1894 +FJEM0_SI634 +FJEM0_SX184 +FJEM0_SX274 +FJEM0_SX364 +FJEM0_SX4 +FJEM0_SX94 +FJMG0_SI1181 +FJMG0_SI1811 +FJMG0_SI551 +FJMG0_SX101 +FJMG0_SX11 +FJMG0_SX191 +FJMG0_SX281 +FJMG0_SX371 +FJSJ0_SI1484 +FJSJ0_SI2114 +FJSJ0_SI854 +FJSJ0_SX134 +FJSJ0_SX224 +FJSJ0_SX314 +FJSJ0_SX404 +FJSJ0_SX44 +FKMS0_SI1490 +FKMS0_SI2120 +FKMS0_SI860 +FKMS0_SX140 +FKMS0_SX230 +FKMS0_SX320 +FKMS0_SX410 +FKMS0_SX50 +FMAH0_SI1289 +FMAH0_SI1919 +FMAH0_SI659 +FMAH0_SX119 +FMAH0_SX209 +FMAH0_SX29 +FMAH0_SX299 +FMAH0_SX389 +FMML0_SI1040 +FMML0_SI1670 +FMML0_SI2300 +FMML0_SX140 +FMML0_SX230 +FMML0_SX320 +FMML0_SX410 +FMML0_SX50 +FNMR0_SI1399 +FNMR0_SI2029 +FNMR0_SI769 +FNMR0_SX139 +FNMR0_SX229 +FNMR0_SX319 +FNMR0_SX409 +FNMR0_SX49 +FREW0_SI1030 +FREW0_SI1280 +FREW0_SI1910 +FREW0_SX110 +FREW0_SX20 +FREW0_SX200 +FREW0_SX290 +FREW0_SX380 +FSEM0_SI1198 +FSEM0_SI1828 +FSEM0_SI568 +FSEM0_SX118 +FSEM0_SX208 +FSEM0_SX28 +FSEM0_SX298 +FSEM0_SX388 +MAJC0_SI1946 +MAJC0_SI2095 +MAJC0_SI835 +MAJC0_SX115 +MAJC0_SX205 +MAJC0_SX25 +MAJC0_SX295 +MAJC0_SX385 +MBDG0_SI1463 +MBDG0_SI2093 +MBDG0_SI833 +MBDG0_SX113 +MBDG0_SX203 +MBDG0_SX23 +MBDG0_SX293 +MBDG0_SX383 +MBNS0_SI1220 +MBNS0_SI1850 +MBNS0_SI590 +MBNS0_SX140 +MBNS0_SX230 +MBNS0_SX320 +MBNS0_SX410 +MBNS0_SX50 +MBWM0_SI1304 +MBWM0_SI1934 +MBWM0_SI674 +MBWM0_SX134 +MBWM0_SX224 +MBWM0_SX314 +MBWM0_SX404 +MBWM0_SX44 +MCSH0_SI1549 +MCSH0_SI2179 +MCSH0_SI919 +MCSH0_SX109 +MCSH0_SX19 +MCSH0_SX199 +MCSH0_SX289 +MCSH0_SX379 +MDLF0_SI1583 +MDLF0_SI2213 +MDLF0_SI953 +MDLF0_SX143 +MDLF0_SX233 +MDLF0_SX323 +MDLF0_SX413 +MDLF0_SX53 +MDLS0_SI1628 +MDLS0_SI2258 +MDLS0_SI998 +MDLS0_SX188 +MDLS0_SX278 +MDLS0_SX368 +MDLS0_SX8 +MDLS0_SX98 +MDVC0_SI2174 +MDVC0_SI2196 +MDVC0_SI936 +MDVC0_SX126 +MDVC0_SX216 +MDVC0_SX306 +MDVC0_SX36 +MDVC0_SX396 +MERS0_SI1019 +MERS0_SI1649 +MERS0_SI497 +MERS0_SX119 +MERS0_SX209 +MERS0_SX29 +MERS0_SX299 +MERS0_SX389 +MGJF0_SI1901 +MGJF0_SI641 +MGJF0_SI776 +MGJF0_SX101 +MGJF0_SX11 +MGJF0_SX191 +MGJF0_SX281 +MGJF0_SX371 +MGLB0_SI1534 +MGLB0_SI2164 +MGLB0_SI904 +MGLB0_SX184 +MGLB0_SX274 +MGLB0_SX364 +MGLB0_SX4 +MGLB0_SX94 +MGWT0_SI1539 +MGWT0_SI2169 +MGWT0_SI909 +MGWT0_SX189 +MGWT0_SX279 +MGWT0_SX369 +MGWT0_SX9 +MGWT0_SX99 +MJAR0_SI1988 +MJAR0_SI2247 +MJAR0_SI728 +MJAR0_SX188 +MJAR0_SX278 +MJAR0_SX368 +MJAR0_SX8 +MJAR0_SX98 +MJFC0_SI1033 +MJFC0_SI1663 +MJFC0_SI2293 +MJFC0_SX133 +MJFC0_SX223 +MJFC0_SX313 +MJFC0_SX403 +MJFC0_SX43 +MJSW0_SI1010 +MJSW0_SI1640 +MJSW0_SI2270 +MJSW0_SX110 +MJSW0_SX20 +MJSW0_SX200 +MJSW0_SX290 +MJSW0_SX380 +MMDB1_SI1625 +MMDB1_SI2255 +MMDB1_SI995 +MMDB1_SX185 +MMDB1_SX275 +MMDB1_SX365 +MMDB1_SX5 +MMDB1_SX95 +MMDM2_SI1452 +MMDM2_SI1555 +MMDM2_SI2082 +MMDM2_SX102 +MMDM2_SX12 +MMDM2_SX192 +MMDM2_SX282 +MMDM2_SX372 +MMJR0_SI1648 +MMJR0_SI2166 +MMJR0_SI2278 +MMJR0_SX118 +MMJR0_SX208 +MMJR0_SX28 +MMJR0_SX298 +MMJR0_SX388 +MMWH0_SI1089 +MMWH0_SI1301 +MMWH0_SI459 +MMWH0_SX189 +MMWH0_SX279 +MMWH0_SX369 +MMWH0_SX9 +MMWH0_SX99 +MPDF0_SI1542 +MPDF0_SI2172 +MPDF0_SI912 +MPDF0_SX102 +MPDF0_SX12 +MPDF0_SX192 +MPDF0_SX282 +MPDF0_SX372 +MRCS0_SI1223 +MRCS0_SI1853 +MRCS0_SI593 +MRCS0_SX143 +MRCS0_SX233 +MRCS0_SX323 +MRCS0_SX413 +MRCS0_SX53 +MREB0_SI1375 +MREB0_SI2005 +MREB0_SI745 +MREB0_SX115 +MREB0_SX205 +MREB0_SX25 +MREB0_SX295 +MREB0_SX385 +MRJM4_SI1489 +MRJM4_SI2119 +MRJM4_SI859 +MRJM4_SX139 +MRJM4_SX229 +MRJM4_SX319 +MRJM4_SX409 +MRJM4_SX49 +MRJR0_SI1182 +MRJR0_SI1812 +MRJR0_SI2313 +MRJR0_SX102 +MRJR0_SX12 +MRJR0_SX192 +MRJR0_SX282 +MRJR0_SX372 +MROA0_SI1307 +MROA0_SI1970 +MROA0_SI677 +MROA0_SX137 +MROA0_SX227 +MROA0_SX317 +MROA0_SX407 +MROA0_SX47 +MRTK0_SI1093 +MRTK0_SI1723 +MRTK0_SI1750 +MRTK0_SX103 +MRTK0_SX13 +MRTK0_SX193 +MRTK0_SX283 +MRTK0_SX373 +MRWS1_SI1130 +MRWS1_SI1496 +MRWS1_SI500 +MRWS1_SX140 +MRWS1_SX230 +MRWS1_SX320 +MRWS1_SX410 +MRWS1_SX50 +MTAA0_SI1285 +MTAA0_SI1915 +MTAA0_SI596 +MTAA0_SX115 +MTAA0_SX205 +MTAA0_SX25 +MTAA0_SX295 +MTAA0_SX385 +MTDT0_SI1994 +MTDT0_SI2254 +MTDT0_SI994 +MTDT0_SX184 +MTDT0_SX274 +MTDT0_SX364 +MTDT0_SX4 +MTDT0_SX94 +MTEB0_SI1133 +MTEB0_SI2064 +MTEB0_SI503 +MTEB0_SX143 +MTEB0_SX233 +MTEB0_SX323 +MTEB0_SX413 +MTEB0_SX53 +MTHC0_SI1015 +MTHC0_SI1645 +MTHC0_SI2275 +MTHC0_SX115 +MTHC0_SX205 +MTHC0_SX25 +MTHC0_SX295 +MTHC0_SX385 +MWJG0_SI1124 +MWJG0_SI1754 +MWJG0_SI494 +MWJG0_SX134 +MWJG0_SX224 +MWJG0_SX314 +MWJG0_SX404 +MWJG0_SX44 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid new file mode 100644 index 0000000..e3967e4 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid @@ -0,0 +1,1680 @@ +FADG0_SA1 +FADG0_SA2 +FADG0_SI1279 +FADG0_SI1909 +FADG0_SI649 +FADG0_SX109 +FADG0_SX19 +FADG0_SX199 +FADG0_SX289 +FADG0_SX379 +FAKS0_SA1 +FAKS0_SA2 +FAKS0_SI1573 +FAKS0_SI2203 +FAKS0_SI943 +FAKS0_SX133 +FAKS0_SX223 +FAKS0_SX313 +FAKS0_SX403 +FAKS0_SX43 +FASW0_SA1 +FASW0_SA2 +FASW0_SI1550 +FASW0_SI2180 +FASW0_SI920 +FASW0_SX110 +FASW0_SX20 +FASW0_SX200 +FASW0_SX290 +FASW0_SX380 +FAWF0_SA1 +FAWF0_SA2 +FAWF0_SI1000 +FAWF0_SI1630 +FAWF0_SI2260 +FAWF0_SX10 +FAWF0_SX100 +FAWF0_SX190 +FAWF0_SX280 +FAWF0_SX370 +FCAL1_SA1 +FCAL1_SA2 +FCAL1_SI1403 +FCAL1_SI2033 +FCAL1_SI773 +FCAL1_SX143 +FCAL1_SX233 +FCAL1_SX323 +FCAL1_SX413 +FCAL1_SX53 +FCAU0_SA1 +FCAU0_SA2 +FCAU0_SI1037 +FCAU0_SI1667 +FCAU0_SI2297 +FCAU0_SX137 +FCAU0_SX227 +FCAU0_SX317 +FCAU0_SX407 +FCAU0_SX47 +FCFT0_SA1 +FCFT0_SA2 +FCFT0_SI1178 +FCFT0_SI1808 +FCFT0_SI548 +FCFT0_SX188 +FCFT0_SX278 +FCFT0_SX368 +FCFT0_SX8 +FCFT0_SX98 +FCMH0_SA1 +FCMH0_SA2 +FCMH0_SI1454 +FCMH0_SI2084 +FCMH0_SI824 +FCMH0_SX104 +FCMH0_SX14 +FCMH0_SX194 +FCMH0_SX284 +FCMH0_SX374 +FCMH1_SA1 +FCMH1_SA2 +FCMH1_SI1493 +FCMH1_SI2123 +FCMH1_SI863 +FCMH1_SX143 +FCMH1_SX233 +FCMH1_SX323 +FCMH1_SX413 +FCMH1_SX53 +FCMR0_SA1 +FCMR0_SA2 +FCMR0_SI1105 +FCMR0_SI1735 +FCMR0_SI475 +FCMR0_SX115 +FCMR0_SX205 +FCMR0_SX25 +FCMR0_SX295 +FCMR0_SX385 +FCRH0_SA1 +FCRH0_SA2 +FCRH0_SI1088 +FCRH0_SI1718 +FCRH0_SI458 +FCRH0_SX188 +FCRH0_SX278 +FCRH0_SX368 +FCRH0_SX8 +FCRH0_SX98 +FDAC1_SA1 +FDAC1_SA2 +FDAC1_SI1474 +FDAC1_SI2104 +FDAC1_SI844 +FDAC1_SX124 +FDAC1_SX214 +FDAC1_SX304 +FDAC1_SX34 +FDAC1_SX394 +FDHC0_SA1 +FDHC0_SA2 +FDHC0_SI1559 +FDHC0_SI2189 +FDHC0_SI929 +FDHC0_SX119 +FDHC0_SX209 +FDHC0_SX29 +FDHC0_SX299 +FDHC0_SX389 +FDMS0_SA1 +FDMS0_SA2 +FDMS0_SI1218 +FDMS0_SI1502 +FDMS0_SI1848 +FDMS0_SX138 +FDMS0_SX228 +FDMS0_SX318 +FDMS0_SX408 +FDMS0_SX48 +FDRD1_SA1 +FDRD1_SA2 +FDRD1_SI1544 +FDRD1_SI1566 +FDRD1_SI2149 +FDRD1_SX104 +FDRD1_SX14 +FDRD1_SX194 +FDRD1_SX284 +FDRD1_SX374 +FDRW0_SA1 +FDRW0_SA2 +FDRW0_SI1283 +FDRW0_SI1423 +FDRW0_SI653 +FDRW0_SX113 +FDRW0_SX203 +FDRW0_SX23 +FDRW0_SX293 +FDRW0_SX383 +FEDW0_SA1 +FEDW0_SA2 +FEDW0_SI1084 +FEDW0_SI1653 +FEDW0_SI1714 +FEDW0_SX184 +FEDW0_SX274 +FEDW0_SX364 +FEDW0_SX4 +FEDW0_SX94 +FELC0_SA1 +FELC0_SA2 +FELC0_SI1386 +FELC0_SI2016 +FELC0_SI756 +FELC0_SX126 +FELC0_SX216 +FELC0_SX306 +FELC0_SX36 +FELC0_SX396 +FGJD0_SA1 +FGJD0_SA2 +FGJD0_SI1179 +FGJD0_SI549 +FGJD0_SI818 +FGJD0_SX189 +FGJD0_SX279 +FGJD0_SX369 +FGJD0_SX9 +FGJD0_SX99 +FGMD0_SA1 +FGMD0_SA2 +FGMD0_SI1943 +FGMD0_SI2107 +FGMD0_SI683 +FGMD0_SX143 +FGMD0_SX233 +FGMD0_SX323 +FGMD0_SX413 +FGMD0_SX53 +FGWR0_SA1 +FGWR0_SA2 +FGWR0_SI1578 +FGWR0_SI2208 +FGWR0_SI948 +FGWR0_SX138 +FGWR0_SX228 +FGWR0_SX318 +FGWR0_SX408 +FGWR0_SX48 +FHES0_SA1 +FHES0_SA2 +FHES0_SI1109 +FHES0_SI1739 +FHES0_SI479 +FHES0_SX119 +FHES0_SX209 +FHES0_SX29 +FHES0_SX299 +FHES0_SX389 +FHEW0_SA1 +FHEW0_SA2 +FHEW0_SI2023 +FHEW0_SI690 +FHEW0_SI763 +FHEW0_SX133 +FHEW0_SX223 +FHEW0_SX313 +FHEW0_SX403 +FHEW0_SX43 +FISB0_SA1 +FISB0_SA2 +FISB0_SI1579 +FISB0_SI2209 +FISB0_SI949 +FISB0_SX139 +FISB0_SX229 +FISB0_SX319 +FISB0_SX409 +FISB0_SX49 +FJAS0_SA1 +FJAS0_SA2 +FJAS0_SI1400 +FJAS0_SI2030 +FJAS0_SI770 +FJAS0_SX140 +FJAS0_SX230 +FJAS0_SX320 +FJAS0_SX410 +FJAS0_SX50 +FJCS0_SA1 +FJCS0_SA2 +FJCS0_SI1309 +FJCS0_SI1833 +FJCS0_SI1939 +FJCS0_SX139 +FJCS0_SX229 +FJCS0_SX319 +FJCS0_SX409 +FJCS0_SX49 +FJEM0_SA1 +FJEM0_SA2 +FJEM0_SI1264 +FJEM0_SI1894 +FJEM0_SI634 +FJEM0_SX184 +FJEM0_SX274 +FJEM0_SX364 +FJEM0_SX4 +FJEM0_SX94 +FJLM0_SA1 +FJLM0_SA2 +FJLM0_SI1043 +FJLM0_SI1673 +FJLM0_SI2303 +FJLM0_SX143 +FJLM0_SX233 +FJLM0_SX323 +FJLM0_SX413 +FJLM0_SX53 +FJMG0_SA1 +FJMG0_SA2 +FJMG0_SI1181 +FJMG0_SI1811 +FJMG0_SI551 +FJMG0_SX101 +FJMG0_SX11 +FJMG0_SX191 +FJMG0_SX281 +FJMG0_SX371 +FJRE0_SA1 +FJRE0_SA2 +FJRE0_SI1116 +FJRE0_SI1587 +FJRE0_SI1746 +FJRE0_SX126 +FJRE0_SX216 +FJRE0_SX306 +FJRE0_SX36 +FJRE0_SX396 +FJSA0_SA1 +FJSA0_SA2 +FJSA0_SI1379 +FJSA0_SI2009 +FJSA0_SI749 +FJSA0_SX119 +FJSA0_SX209 +FJSA0_SX29 +FJSA0_SX299 +FJSA0_SX389 +FJSJ0_SA1 +FJSJ0_SA2 +FJSJ0_SI1484 +FJSJ0_SI2114 +FJSJ0_SI854 +FJSJ0_SX134 +FJSJ0_SX224 +FJSJ0_SX314 +FJSJ0_SX404 +FJSJ0_SX44 +FJWB0_SA1 +FJWB0_SA2 +FJWB0_SI1265 +FJWB0_SI635 +FJWB0_SI992 +FJWB0_SX185 +FJWB0_SX275 +FJWB0_SX365 +FJWB0_SX5 +FJWB0_SX95 +FKMS0_SA1 +FKMS0_SA2 +FKMS0_SI1490 +FKMS0_SI2120 +FKMS0_SI860 +FKMS0_SX140 +FKMS0_SX230 +FKMS0_SX320 +FKMS0_SX410 +FKMS0_SX50 +FLAS0_SA1 +FLAS0_SA2 +FLAS0_SI1026 +FLAS0_SI1488 +FLAS0_SI858 +FLAS0_SX138 +FLAS0_SX228 +FLAS0_SX318 +FLAS0_SX408 +FLAS0_SX48 +FLBW0_SA1 +FLBW0_SA2 +FLBW0_SI1219 +FLBW0_SI1849 +FLBW0_SI2253 +FLBW0_SX139 +FLBW0_SX229 +FLBW0_SX319 +FLBW0_SX409 +FLBW0_SX49 +FLKD0_SA1 +FLKD0_SA2 +FLKD0_SI1369 +FLKD0_SI739 +FLKD0_SI894 +FLKD0_SX109 +FLKD0_SX19 +FLKD0_SX199 +FLKD0_SX289 +FLKD0_SX379 +FLNH0_SA1 +FLNH0_SA2 +FLNH0_SI1214 +FLNH0_SI584 +FLNH0_SI941 +FLNH0_SX134 +FLNH0_SX224 +FLNH0_SX314 +FLNH0_SX404 +FLNH0_SX44 +FMAF0_SA1 +FMAF0_SA2 +FMAF0_SI1459 +FMAF0_SI2089 +FMAF0_SI829 +FMAF0_SX109 +FMAF0_SX19 +FMAF0_SX199 +FMAF0_SX289 +FMAF0_SX379 +FMAH0_SA1 +FMAH0_SA2 +FMAH0_SI1289 +FMAH0_SI1919 +FMAH0_SI659 +FMAH0_SX119 +FMAH0_SX209 +FMAH0_SX29 +FMAH0_SX299 +FMAH0_SX389 +FMCM0_SA1 +FMCM0_SA2 +FMCM0_SI1180 +FMCM0_SI1810 +FMCM0_SI550 +FMCM0_SX10 +FMCM0_SX100 +FMCM0_SX190 +FMCM0_SX280 +FMCM0_SX370 +FMGD0_SA1 +FMGD0_SA2 +FMGD0_SI1564 +FMGD0_SI2194 +FMGD0_SI934 +FMGD0_SX124 +FMGD0_SX214 +FMGD0_SX304 +FMGD0_SX34 +FMGD0_SX394 +FMLD0_SA1 +FMLD0_SA2 +FMLD0_SI2185 +FMLD0_SI822 +FMLD0_SI925 +FMLD0_SX115 +FMLD0_SX205 +FMLD0_SX25 +FMLD0_SX295 +FMLD0_SX385 +FMML0_SA1 +FMML0_SA2 +FMML0_SI1040 +FMML0_SI1670 +FMML0_SI2300 +FMML0_SX140 +FMML0_SX230 +FMML0_SX320 +FMML0_SX410 +FMML0_SX50 +FNLP0_SA1 +FNLP0_SA2 +FNLP0_SI1308 +FNLP0_SI1938 +FNLP0_SI678 +FNLP0_SX138 +FNLP0_SX228 +FNLP0_SX318 +FNLP0_SX408 +FNLP0_SX48 +FNMR0_SA1 +FNMR0_SA2 +FNMR0_SI1399 +FNMR0_SI2029 +FNMR0_SI769 +FNMR0_SX139 +FNMR0_SX229 +FNMR0_SX319 +FNMR0_SX409 +FNMR0_SX49 +FPAS0_SA1 +FPAS0_SA2 +FPAS0_SI1272 +FPAS0_SI2204 +FPAS0_SI944 +FPAS0_SX134 +FPAS0_SX224 +FPAS0_SX314 +FPAS0_SX404 +FPAS0_SX44 +FPKT0_SA1 +FPKT0_SA2 +FPKT0_SI1538 +FPKT0_SI2168 +FPKT0_SI908 +FPKT0_SX188 +FPKT0_SX278 +FPKT0_SX368 +FPKT0_SX8 +FPKT0_SX98 +FRAM1_SA1 +FRAM1_SA2 +FRAM1_SI1360 +FRAM1_SI522 +FRAM1_SI730 +FRAM1_SX10 +FRAM1_SX100 +FRAM1_SX190 +FRAM1_SX280 +FRAM1_SX370 +FREW0_SA1 +FREW0_SA2 +FREW0_SI1030 +FREW0_SI1280 +FREW0_SI1910 +FREW0_SX110 +FREW0_SX20 +FREW0_SX200 +FREW0_SX290 +FREW0_SX380 +FRNG0_SA1 +FRNG0_SA2 +FRNG0_SI1355 +FRNG0_SI1985 +FRNG0_SI725 +FRNG0_SX185 +FRNG0_SX275 +FRNG0_SX365 +FRNG0_SX5 +FRNG0_SX95 +FSEM0_SA1 +FSEM0_SA2 +FSEM0_SI1198 +FSEM0_SI1828 +FSEM0_SI568 +FSEM0_SX118 +FSEM0_SX208 +FSEM0_SX28 +FSEM0_SX298 +FSEM0_SX388 +FSLB1_SA1 +FSLB1_SA2 +FSLB1_SI1904 +FSLB1_SI644 +FSLB1_SI891 +FSLB1_SX104 +FSLB1_SX14 +FSLB1_SX194 +FSLB1_SX284 +FSLB1_SX374 +FSXA0_SA1 +FSXA0_SA2 +FSXA0_SI1108 +FSXA0_SI1846 +FSXA0_SI478 +FSXA0_SX118 +FSXA0_SX208 +FSXA0_SX28 +FSXA0_SX298 +FSXA0_SX388 +FTLH0_SA1 +FTLH0_SA2 +FTLH0_SI1009 +FTLH0_SI1390 +FTLH0_SI1639 +FTLH0_SX109 +FTLH0_SX19 +FTLH0_SX199 +FTLH0_SX289 +FTLH0_SX379 +FUTB0_SA1 +FUTB0_SA2 +FUTB0_SI1204 +FUTB0_SI1330 +FUTB0_SI1834 +FUTB0_SX124 +FUTB0_SX214 +FUTB0_SX304 +FUTB0_SX34 +FUTB0_SX394 +MABW0_SA1 +MABW0_SA2 +MABW0_SI1230 +MABW0_SI1664 +MABW0_SI2294 +MABW0_SX134 +MABW0_SX224 +MABW0_SX314 +MABW0_SX404 +MABW0_SX44 +MAHH0_SA1 +MAHH0_SA2 +MAHH0_SI1294 +MAHH0_SI1924 +MAHH0_SI664 +MAHH0_SX124 +MAHH0_SX214 +MAHH0_SX304 +MAHH0_SX34 +MAHH0_SX394 +MAJC0_SA1 +MAJC0_SA2 +MAJC0_SI1946 +MAJC0_SI2095 +MAJC0_SI835 +MAJC0_SX115 +MAJC0_SX205 +MAJC0_SX25 +MAJC0_SX295 +MAJC0_SX385 +MBDG0_SA1 +MBDG0_SA2 +MBDG0_SI1463 +MBDG0_SI2093 +MBDG0_SI833 +MBDG0_SX113 +MBDG0_SX203 +MBDG0_SX23 +MBDG0_SX293 +MBDG0_SX383 +MBJK0_SA1 +MBJK0_SA2 +MBJK0_SI1175 +MBJK0_SI2128 +MBJK0_SI545 +MBJK0_SX185 +MBJK0_SX275 +MBJK0_SX365 +MBJK0_SX5 +MBJK0_SX95 +MBNS0_SA1 +MBNS0_SA2 +MBNS0_SI1220 +MBNS0_SI1850 +MBNS0_SI590 +MBNS0_SX140 +MBNS0_SX230 +MBNS0_SX320 +MBNS0_SX410 +MBNS0_SX50 +MBPM0_SA1 +MBPM0_SA2 +MBPM0_SI1577 +MBPM0_SI1584 +MBPM0_SI947 +MBPM0_SX137 +MBPM0_SX227 +MBPM0_SX317 +MBPM0_SX407 +MBPM0_SX47 +MBWM0_SA1 +MBWM0_SA2 +MBWM0_SI1304 +MBWM0_SI1934 +MBWM0_SI674 +MBWM0_SX134 +MBWM0_SX224 +MBWM0_SX314 +MBWM0_SX404 +MBWM0_SX44 +MCCS0_SA1 +MCCS0_SA2 +MCCS0_SI1469 +MCCS0_SI2099 +MCCS0_SI839 +MCCS0_SX119 +MCCS0_SX209 +MCCS0_SX29 +MCCS0_SX299 +MCCS0_SX389 +MCEM0_SA1 +MCEM0_SA2 +MCEM0_SI1398 +MCEM0_SI2028 +MCEM0_SI768 +MCEM0_SX138 +MCEM0_SX228 +MCEM0_SX318 +MCEM0_SX408 +MCEM0_SX48 +MCHH0_SA1 +MCHH0_SA2 +MCHH0_SI1004 +MCHH0_SI1634 +MCHH0_SI530 +MCHH0_SX104 +MCHH0_SX14 +MCHH0_SX194 +MCHH0_SX284 +MCHH0_SX374 +MCMB0_SA1 +MCMB0_SA2 +MCMB0_SI1268 +MCMB0_SI1898 +MCMB0_SI638 +MCMB0_SX188 +MCMB0_SX278 +MCMB0_SX368 +MCMB0_SX8 +MCMB0_SX98 +MCMJ0_SA1 +MCMJ0_SA2 +MCMJ0_SI1094 +MCMJ0_SI464 +MCMJ0_SI602 +MCMJ0_SX104 +MCMJ0_SX14 +MCMJ0_SX194 +MCMJ0_SX284 +MCMJ0_SX374 +MCRC0_SA1 +MCRC0_SA2 +MCRC0_SI1092 +MCRC0_SI1722 +MCRC0_SI462 +MCRC0_SX102 +MCRC0_SX12 +MCRC0_SX192 +MCRC0_SX282 +MCRC0_SX372 +MCSH0_SA1 +MCSH0_SA2 +MCSH0_SI1549 +MCSH0_SI2179 +MCSH0_SI919 +MCSH0_SX109 +MCSH0_SX19 +MCSH0_SX199 +MCSH0_SX289 +MCSH0_SX379 +MCTT0_SA1 +MCTT0_SA2 +MCTT0_SI1144 +MCTT0_SI2188 +MCTT0_SI928 +MCTT0_SX118 +MCTT0_SX208 +MCTT0_SX28 +MCTT0_SX298 +MCTT0_SX388 +MCTW0_SA1 +MCTW0_SA2 +MCTW0_SI1373 +MCTW0_SI2003 +MCTW0_SI743 +MCTW0_SX113 +MCTW0_SX203 +MCTW0_SX23 +MCTW0_SX293 +MCTW0_SX383 +MDAB0_SA1 +MDAB0_SA2 +MDAB0_SI1039 +MDAB0_SI1669 +MDAB0_SI2299 +MDAB0_SX139 +MDAB0_SX229 +MDAB0_SX319 +MDAB0_SX409 +MDAB0_SX49 +MDAC2_SA1 +MDAC2_SA2 +MDAC2_SI2259 +MDAC2_SI560 +MDAC2_SI999 +MDAC2_SX189 +MDAC2_SX279 +MDAC2_SX369 +MDAC2_SX9 +MDAC2_SX99 +MDAW1_SA1 +MDAW1_SA2 +MDAW1_SI1453 +MDAW1_SI2083 +MDAW1_SI823 +MDAW1_SX103 +MDAW1_SX13 +MDAW1_SX193 +MDAW1_SX283 +MDAW1_SX373 +MDBB0_SA1 +MDBB0_SA2 +MDBB0_SI1195 +MDBB0_SI1825 +MDBB0_SI565 +MDBB0_SX115 +MDBB0_SX205 +MDBB0_SX25 +MDBB0_SX295 +MDBB0_SX385 +MDLD0_SA1 +MDLD0_SA2 +MDLD0_SI1543 +MDLD0_SI2173 +MDLD0_SI913 +MDLD0_SX103 +MDLD0_SX13 +MDLD0_SX193 +MDLD0_SX283 +MDLD0_SX373 +MDLF0_SA1 +MDLF0_SA2 +MDLF0_SI1583 +MDLF0_SI2213 +MDLF0_SI953 +MDLF0_SX143 +MDLF0_SX233 +MDLF0_SX323 +MDLF0_SX413 +MDLF0_SX53 +MDLS0_SA1 +MDLS0_SA2 +MDLS0_SI1628 +MDLS0_SI2258 +MDLS0_SI998 +MDLS0_SX188 +MDLS0_SX278 +MDLS0_SX368 +MDLS0_SX8 +MDLS0_SX98 +MDRB0_SA1 +MDRB0_SA2 +MDRB0_SI1174 +MDRB0_SI2109 +MDRB0_SI544 +MDRB0_SX184 +MDRB0_SX274 +MDRB0_SX364 +MDRB0_SX4 +MDRB0_SX94 +MDRM0_SA1 +MDRM0_SA2 +MDRM0_SI1013 +MDRM0_SI1643 +MDRM0_SI2273 +MDRM0_SX113 +MDRM0_SX203 +MDRM0_SX23 +MDRM0_SX293 +MDRM0_SX383 +MDSC0_SA1 +MDSC0_SA2 +MDSC0_SI1038 +MDSC0_SI2298 +MDSC0_SI967 +MDSC0_SX138 +MDSC0_SX228 +MDSC0_SX318 +MDSC0_SX408 +MDSC0_SX48 +MDVC0_SA1 +MDVC0_SA2 +MDVC0_SI2174 +MDVC0_SI2196 +MDVC0_SI936 +MDVC0_SX126 +MDVC0_SX216 +MDVC0_SX306 +MDVC0_SX36 +MDVC0_SX396 +MDWA0_SA1 +MDWA0_SA2 +MDWA0_SI1146 +MDWA0_SI1445 +MDWA0_SI519 +MDWA0_SX185 +MDWA0_SX275 +MDWA0_SX365 +MDWA0_SX5 +MDWA0_SX95 +MDWK0_SA1 +MDWK0_SA2 +MDWK0_SI1540 +MDWK0_SI2170 +MDWK0_SI910 +MDWK0_SX10 +MDWK0_SX100 +MDWK0_SX190 +MDWK0_SX280 +MDWK0_SX370 +MERS0_SA1 +MERS0_SA2 +MERS0_SI1019 +MERS0_SI1649 +MERS0_SI497 +MERS0_SX119 +MERS0_SX209 +MERS0_SX29 +MERS0_SX299 +MERS0_SX389 +MESD0_SA1 +MESD0_SA2 +MESD0_SI1002 +MESD0_SI1632 +MESD0_SI2262 +MESD0_SX102 +MESD0_SX12 +MESD0_SX192 +MESD0_SX282 +MESD0_SX372 +MFGK0_SA1 +MFGK0_SA2 +MFGK0_SI1451 +MFGK0_SI1744 +MFGK0_SI484 +MFGK0_SX124 +MFGK0_SX214 +MFGK0_SX304 +MFGK0_SX34 +MFGK0_SX394 +MGJF0_SA1 +MGJF0_SA2 +MGJF0_SI1901 +MGJF0_SI641 +MGJF0_SI776 +MGJF0_SX101 +MGJF0_SX11 +MGJF0_SX191 +MGJF0_SX281 +MGJF0_SX371 +MGLB0_SA1 +MGLB0_SA2 +MGLB0_SI1534 +MGLB0_SI2164 +MGLB0_SI904 +MGLB0_SX184 +MGLB0_SX274 +MGLB0_SX364 +MGLB0_SX4 +MGLB0_SX94 +MGMM0_SA1 +MGMM0_SA2 +MGMM0_SI1129 +MGMM0_SI1759 +MGMM0_SI499 +MGMM0_SX139 +MGMM0_SX229 +MGMM0_SX319 +MGMM0_SX409 +MGMM0_SX49 +MGRT0_SA1 +MGRT0_SA2 +MGRT0_SI1450 +MGRT0_SI2080 +MGRT0_SI820 +MGRT0_SX10 +MGRT0_SX100 +MGRT0_SX190 +MGRT0_SX280 +MGRT0_SX370 +MGWT0_SA1 +MGWT0_SA2 +MGWT0_SI1539 +MGWT0_SI2169 +MGWT0_SI909 +MGWT0_SX189 +MGWT0_SX279 +MGWT0_SX369 +MGWT0_SX9 +MGWT0_SX99 +MHPG0_SA1 +MHPG0_SA2 +MHPG0_SI1090 +MHPG0_SI1720 +MHPG0_SI460 +MHPG0_SX10 +MHPG0_SX100 +MHPG0_SX190 +MHPG0_SX280 +MHPG0_SX370 +MJAR0_SA1 +MJAR0_SA2 +MJAR0_SI1988 +MJAR0_SI2247 +MJAR0_SI728 +MJAR0_SX188 +MJAR0_SX278 +MJAR0_SX368 +MJAR0_SX8 +MJAR0_SX98 +MJBR0_SA1 +MJBR0_SA2 +MJBR0_SI1001 +MJBR0_SI1631 +MJBR0_SI2261 +MJBR0_SX101 +MJBR0_SX11 +MJBR0_SX191 +MJBR0_SX281 +MJBR0_SX371 +MJDH0_SA1 +MJDH0_SA2 +MJDH0_SI1354 +MJDH0_SI1984 +MJDH0_SI724 +MJDH0_SX184 +MJDH0_SX274 +MJDH0_SX364 +MJDH0_SX4 +MJDH0_SX94 +MJDM1_SA1 +MJDM1_SA2 +MJDM1_SI1085 +MJDM1_SI1715 +MJDM1_SI455 +MJDM1_SX185 +MJDM1_SX275 +MJDM1_SX365 +MJDM1_SX5 +MJDM1_SX95 +MJES0_SA1 +MJES0_SA2 +MJES0_SI1384 +MJES0_SI2014 +MJES0_SI754 +MJES0_SX124 +MJES0_SX214 +MJES0_SX304 +MJES0_SX34 +MJES0_SX394 +MJFC0_SA1 +MJFC0_SA2 +MJFC0_SI1033 +MJFC0_SI1663 +MJFC0_SI2293 +MJFC0_SX133 +MJFC0_SX223 +MJFC0_SX313 +MJFC0_SX403 +MJFC0_SX43 +MJJG0_SA1 +MJJG0_SA2 +MJJG0_SI1003 +MJJG0_SI1633 +MJJG0_SI2263 +MJJG0_SX103 +MJJG0_SX13 +MJJG0_SX193 +MJJG0_SX283 +MJJG0_SX373 +MJLN0_SA1 +MJLN0_SA2 +MJLN0_SI1449 +MJLN0_SI2079 +MJLN0_SI819 +MJLN0_SX189 +MJLN0_SX279 +MJLN0_SX369 +MJLN0_SX9 +MJLN0_SX99 +MJMP0_SA1 +MJMP0_SA2 +MJMP0_SI1535 +MJMP0_SI1791 +MJMP0_SI905 +MJMP0_SX185 +MJMP0_SX275 +MJMP0_SX365 +MJMP0_SX5 +MJMP0_SX95 +MJRF0_SA1 +MJRF0_SA2 +MJRF0_SI1114 +MJRF0_SI2081 +MJRF0_SI821 +MJRF0_SX101 +MJRF0_SX11 +MJRF0_SX191 +MJRF0_SX281 +MJRF0_SX371 +MJSW0_SA1 +MJSW0_SA2 +MJSW0_SI1010 +MJSW0_SI1640 +MJSW0_SI2270 +MJSW0_SX110 +MJSW0_SX20 +MJSW0_SX200 +MJSW0_SX290 +MJSW0_SX380 +MJTC0_SA1 +MJTC0_SA2 +MJTC0_SI1460 +MJTC0_SI2090 +MJTC0_SI830 +MJTC0_SX110 +MJTC0_SX20 +MJTC0_SX200 +MJTC0_SX290 +MJTC0_SX380 +MJTH0_SA1 +MJTH0_SA2 +MJTH0_SI1296 +MJTH0_SI1926 +MJTH0_SI666 +MJTH0_SX126 +MJTH0_SX216 +MJTH0_SX306 +MJTH0_SX36 +MJTH0_SX396 +MJVW0_SA1 +MJVW0_SA2 +MJVW0_SI1733 +MJVW0_SI1758 +MJVW0_SI473 +MJVW0_SX113 +MJVW0_SX203 +MJVW0_SX23 +MJVW0_SX293 +MJVW0_SX383 +MKCH0_SA1 +MKCH0_SA2 +MKCH0_SI1378 +MKCH0_SI1425 +MKCH0_SI2008 +MKCH0_SX118 +MKCH0_SX208 +MKCH0_SX28 +MKCH0_SX298 +MKCH0_SX388 +MKCL0_SA1 +MKCL0_SA2 +MKCL0_SI1091 +MKCL0_SI1721 +MKCL0_SI461 +MKCL0_SX101 +MKCL0_SX11 +MKCL0_SX191 +MKCL0_SX281 +MKCL0_SX371 +MKDR0_SA1 +MKDR0_SA2 +MKDR0_SI1273 +MKDR0_SI1903 +MKDR0_SI643 +MKDR0_SX103 +MKDR0_SX13 +MKDR0_SX193 +MKDR0_SX283 +MKDR0_SX373 +MKJL0_SA1 +MKJL0_SA2 +MKJL0_SI1100 +MKJL0_SI1730 +MKJL0_SI470 +MKJL0_SX110 +MKJL0_SX20 +MKJL0_SX200 +MKJL0_SX290 +MKJL0_SX380 +MKLT0_SA1 +MKLT0_SA2 +MKLT0_SI1213 +MKLT0_SI1843 +MKLT0_SI583 +MKLT0_SX133 +MKLT0_SX223 +MKLT0_SX313 +MKLT0_SX403 +MKLT0_SX43 +MLIH0_SA1 +MLIH0_SA2 +MLIH0_SI1183 +MLIH0_SI1813 +MLIH0_SI553 +MLIH0_SX103 +MLIH0_SX13 +MLIH0_SX193 +MLIH0_SX283 +MLIH0_SX373 +MLJB0_SA1 +MLJB0_SA2 +MLJB0_SI1310 +MLJB0_SI1940 +MLJB0_SI680 +MLJB0_SX140 +MLJB0_SX230 +MLJB0_SX320 +MLJB0_SX410 +MLJB0_SX50 +MLLL0_SA1 +MLLL0_SA2 +MLLL0_SI1363 +MLLL0_SI1993 +MLLL0_SI733 +MLLL0_SX103 +MLLL0_SX13 +MLLL0_SX193 +MLLL0_SX283 +MLLL0_SX373 +MLNT0_SA1 +MLNT0_SA2 +MLNT0_SI1574 +MLNT0_SI1902 +MLNT0_SI642 +MLNT0_SX102 +MLNT0_SX12 +MLNT0_SX192 +MLNT0_SX282 +MLNT0_SX372 +MMAB0_SA1 +MMAB0_SA2 +MMAB0_SI1362 +MMAB0_SI1992 +MMAB0_SI732 +MMAB0_SX102 +MMAB0_SX12 +MMAB0_SX192 +MMAB0_SX282 +MMAB0_SX372 +MMDB1_SA1 +MMDB1_SA2 +MMDB1_SI1625 +MMDB1_SI2255 +MMDB1_SI995 +MMDB1_SX185 +MMDB1_SX275 +MMDB1_SX365 +MMDB1_SX5 +MMDB1_SX95 +MMDH0_SA1 +MMDH0_SA2 +MMDH0_SI1656 +MMDH0_SI2118 +MMDH0_SI2286 +MMDH0_SX126 +MMDH0_SX216 +MMDH0_SX306 +MMDH0_SX36 +MMDH0_SX396 +MMDM2_SA1 +MMDM2_SA2 +MMDM2_SI1452 +MMDM2_SI1555 +MMDM2_SI2082 +MMDM2_SX102 +MMDM2_SX12 +MMDM2_SX192 +MMDM2_SX282 +MMDM2_SX372 +MMJR0_SA1 +MMJR0_SA2 +MMJR0_SI1648 +MMJR0_SI2166 +MMJR0_SI2278 +MMJR0_SX118 +MMJR0_SX208 +MMJR0_SX28 +MMJR0_SX298 +MMJR0_SX388 +MMWH0_SA1 +MMWH0_SA2 +MMWH0_SI1089 +MMWH0_SI1301 +MMWH0_SI459 +MMWH0_SX189 +MMWH0_SX279 +MMWH0_SX369 +MMWH0_SX9 +MMWH0_SX99 +MNJM0_SA1 +MNJM0_SA2 +MNJM0_SI1580 +MNJM0_SI2210 +MNJM0_SI950 +MNJM0_SX140 +MNJM0_SX230 +MNJM0_SX320 +MNJM0_SX410 +MNJM0_SX50 +MNLS0_SA1 +MNLS0_SA2 +MNLS0_SI1483 +MNLS0_SI1610 +MNLS0_SI853 +MNLS0_SX133 +MNLS0_SX223 +MNLS0_SX313 +MNLS0_SX403 +MNLS0_SX43 +MPAB0_SA1 +MPAB0_SA2 +MPAB0_SI1103 +MPAB0_SI1128 +MPAB0_SI498 +MPAB0_SX138 +MPAB0_SX228 +MPAB0_SX318 +MPAB0_SX408 +MPAB0_SX48 +MPAM0_SA1 +MPAM0_SA2 +MPAM0_SI1189 +MPAM0_SI1819 +MPAM0_SI1961 +MPAM0_SX109 +MPAM0_SX19 +MPAM0_SX199 +MPAM0_SX289 +MPAM0_SX379 +MPAM1_SA1 +MPAM1_SA2 +MPAM1_SI1029 +MPAM1_SI1836 +MPAM1_SI576 +MPAM1_SX126 +MPAM1_SX216 +MPAM1_SX306 +MPAM1_SX36 +MPAM1_SX396 +MPCS0_SA1 +MPCS0_SA2 +MPCS0_SI1359 +MPCS0_SI1989 +MPCS0_SI729 +MPCS0_SX189 +MPCS0_SX279 +MPCS0_SX369 +MPCS0_SX9 +MPCS0_SX99 +MPDF0_SA1 +MPDF0_SA2 +MPDF0_SI1542 +MPDF0_SI2172 +MPDF0_SI912 +MPDF0_SX102 +MPDF0_SX12 +MPDF0_SX192 +MPDF0_SX282 +MPDF0_SX372 +MPGL0_SA1 +MPGL0_SA2 +MPGL0_SI1099 +MPGL0_SI1729 +MPGL0_SI469 +MPGL0_SX109 +MPGL0_SX19 +MPGL0_SX199 +MPGL0_SX289 +MPGL0_SX379 +MPLB0_SA1 +MPLB0_SA2 +MPLB0_SI1394 +MPLB0_SI2024 +MPLB0_SI764 +MPLB0_SX134 +MPLB0_SX224 +MPLB0_SX314 +MPLB0_SX404 +MPLB0_SX44 +MPWM0_SA1 +MPWM0_SA2 +MPWM0_SI1127 +MPWM0_SI1757 +MPWM0_SI2279 +MPWM0_SX137 +MPWM0_SX227 +MPWM0_SX317 +MPWM0_SX407 +MPWM0_SX47 +MRCS0_SA1 +MRCS0_SA2 +MRCS0_SI1223 +MRCS0_SI1853 +MRCS0_SI593 +MRCS0_SX143 +MRCS0_SX233 +MRCS0_SX323 +MRCS0_SX413 +MRCS0_SX53 +MRCZ0_SA1 +MRCZ0_SA2 +MRCZ0_SI1541 +MRCZ0_SI2171 +MRCZ0_SI911 +MRCZ0_SX101 +MRCZ0_SX11 +MRCZ0_SX191 +MRCZ0_SX281 +MRCZ0_SX371 +MREB0_SA1 +MREB0_SA2 +MREB0_SI1375 +MREB0_SI2005 +MREB0_SI745 +MREB0_SX115 +MREB0_SX205 +MREB0_SX25 +MREB0_SX295 +MREB0_SX385 +MRES0_SA1 +MRES0_SA2 +MRES0_SI1217 +MRES0_SI1847 +MRES0_SI587 +MRES0_SX137 +MRES0_SX227 +MRES0_SX317 +MRES0_SX407 +MRES0_SX47 +MRGG0_SA1 +MRGG0_SA2 +MRGG0_SI1199 +MRGG0_SI1829 +MRGG0_SI569 +MRGG0_SX119 +MRGG0_SX209 +MRGG0_SX29 +MRGG0_SX299 +MRGG0_SX389 +MRJM3_SA1 +MRJM3_SA2 +MRJM3_SI1448 +MRJM3_SI1809 +MRJM3_SI2078 +MRJM3_SX188 +MRJM3_SX278 +MRJM3_SX368 +MRJM3_SX8 +MRJM3_SX98 +MRJM4_SA1 +MRJM4_SA2 +MRJM4_SI1489 +MRJM4_SI2119 +MRJM4_SI859 +MRJM4_SX139 +MRJM4_SX229 +MRJM4_SX319 +MRJM4_SX409 +MRJM4_SX49 +MRJO0_SA1 +MRJO0_SA2 +MRJO0_SI1364 +MRJO0_SI1624 +MRJO0_SI734 +MRJO0_SX104 +MRJO0_SX14 +MRJO0_SX194 +MRJO0_SX284 +MRJO0_SX374 +MRJR0_SA1 +MRJR0_SA2 +MRJR0_SI1182 +MRJR0_SI1812 +MRJR0_SI2313 +MRJR0_SX102 +MRJR0_SX12 +MRJR0_SX192 +MRJR0_SX282 +MRJR0_SX372 +MRJS0_SA1 +MRJS0_SA2 +MRJS0_SI1444 +MRJS0_SI1523 +MRJS0_SI2074 +MRJS0_SX184 +MRJS0_SX274 +MRJS0_SX364 +MRJS0_SX4 +MRJS0_SX94 +MRKO0_SA1 +MRKO0_SA2 +MRKO0_SI1397 +MRKO0_SI2027 +MRKO0_SI767 +MRKO0_SX137 +MRKO0_SX227 +MRKO0_SX317 +MRKO0_SX407 +MRKO0_SX47 +MRMS1_SA1 +MRMS1_SA2 +MRMS1_SI1487 +MRMS1_SI2117 +MRMS1_SI857 +MRMS1_SX137 +MRMS1_SX227 +MRMS1_SX317 +MRMS1_SX407 +MRMS1_SX47 +MROA0_SA1 +MROA0_SA2 +MROA0_SI1307 +MROA0_SI1970 +MROA0_SI677 +MROA0_SX137 +MROA0_SX227 +MROA0_SX317 +MROA0_SX407 +MROA0_SX47 +MRPC0_SA1 +MRPC0_SA2 +MRPC0_SI1753 +MRPC0_SI493 +MRPC0_SI933 +MRPC0_SX133 +MRPC0_SX223 +MRPC0_SX313 +MRPC0_SX403 +MRPC0_SX43 +MRPP0_SA1 +MRPP0_SA2 +MRPP0_SI1184 +MRPP0_SI1814 +MRPP0_SI554 +MRPP0_SX104 +MRPP0_SX14 +MRPP0_SX194 +MRPP0_SX284 +MRPP0_SX374 +MRRK0_SA1 +MRRK0_SA2 +MRRK0_SI1288 +MRRK0_SI1716 +MRRK0_SI1918 +MRRK0_SX118 +MRRK0_SX208 +MRRK0_SX28 +MRRK0_SX298 +MRRK0_SX388 +MRTK0_SA1 +MRTK0_SA2 +MRTK0_SI1093 +MRTK0_SI1723 +MRTK0_SI1750 +MRTK0_SX103 +MRTK0_SX13 +MRTK0_SX193 +MRTK0_SX283 +MRTK0_SX373 +MRWS1_SA1 +MRWS1_SA2 +MRWS1_SI1130 +MRWS1_SI1496 +MRWS1_SI500 +MRWS1_SX140 +MRWS1_SX230 +MRWS1_SX320 +MRWS1_SX410 +MRWS1_SX50 +MSFH1_SA1 +MSFH1_SA2 +MSFH1_SI1270 +MSFH1_SI1900 +MSFH1_SI640 +MSFH1_SX10 +MSFH1_SX100 +MSFH1_SX190 +MSFH1_SX280 +MSFH1_SX370 +MSJS1_SA1 +MSJS1_SA2 +MSJS1_SI1899 +MSJS1_SI639 +MSJS1_SI869 +MSJS1_SX189 +MSJS1_SX279 +MSJS1_SX369 +MSJS1_SX9 +MSJS1_SX99 +MSLB0_SA1 +MSLB0_SA2 +MSLB0_SI1193 +MSLB0_SI1823 +MSLB0_SI563 +MSLB0_SX113 +MSLB0_SX203 +MSLB0_SX23 +MSLB0_SX293 +MSLB0_SX383 +MSTK0_SA1 +MSTK0_SA2 +MSTK0_SI1024 +MSTK0_SI2222 +MSTK0_SI2284 +MSTK0_SX124 +MSTK0_SX214 +MSTK0_SX304 +MSTK0_SX34 +MSTK0_SX394 +MTAA0_SA1 +MTAA0_SA2 +MTAA0_SI1285 +MTAA0_SI1915 +MTAA0_SI596 +MTAA0_SX115 +MTAA0_SX205 +MTAA0_SX25 +MTAA0_SX295 +MTAA0_SX385 +MTAS1_SA1 +MTAS1_SA2 +MTAS1_SI1473 +MTAS1_SI2098 +MTAS1_SI838 +MTAS1_SX118 +MTAS1_SX208 +MTAS1_SX28 +MTAS1_SX298 +MTAS1_SX388 +MTDT0_SA1 +MTDT0_SA2 +MTDT0_SI1994 +MTDT0_SI2254 +MTDT0_SI994 +MTDT0_SX184 +MTDT0_SX274 +MTDT0_SX364 +MTDT0_SX4 +MTDT0_SX94 +MTEB0_SA1 +MTEB0_SA2 +MTEB0_SI1133 +MTEB0_SI2064 +MTEB0_SI503 +MTEB0_SX143 +MTEB0_SX233 +MTEB0_SX323 +MTEB0_SX413 +MTEB0_SX53 +MTHC0_SA1 +MTHC0_SA2 +MTHC0_SI1015 +MTHC0_SI1645 +MTHC0_SI2275 +MTHC0_SX115 +MTHC0_SX205 +MTHC0_SX25 +MTHC0_SX295 +MTHC0_SX385 +MTLS0_SA1 +MTLS0_SA2 +MTLS0_SI1370 +MTLS0_SI2000 +MTLS0_SI740 +MTLS0_SX110 +MTLS0_SX20 +MTLS0_SX200 +MTLS0_SX290 +MTLS0_SX380 +MTMR0_SA1 +MTMR0_SA2 +MTMR0_SI1303 +MTMR0_SI1933 +MTMR0_SI673 +MTMR0_SX133 +MTMR0_SX223 +MTMR0_SX313 +MTMR0_SX403 +MTMR0_SX43 +MTWH0_SA1 +MTWH0_SA2 +MTWH0_SI1190 +MTWH0_SI1629 +MTWH0_SI1820 +MTWH0_SX110 +MTWH0_SX20 +MTWH0_SX200 +MTWH0_SX290 +MTWH0_SX380 +MWBT0_SA1 +MWBT0_SA2 +MWBT0_SI1553 +MWBT0_SI2183 +MWBT0_SI923 +MWBT0_SX113 +MWBT0_SX203 +MWBT0_SX23 +MWBT0_SX293 +MWBT0_SX383 +MWEW0_SA1 +MWEW0_SA2 +MWEW0_SI1361 +MWEW0_SI1991 +MWEW0_SI731 +MWEW0_SX101 +MWEW0_SX11 +MWEW0_SX191 +MWEW0_SX281 +MWEW0_SX371 +MWJG0_SA1 +MWJG0_SA2 +MWJG0_SI1124 +MWJG0_SI1754 +MWJG0_SI494 +MWJG0_SX134 +MWJG0_SX224 +MWJG0_SX314 +MWJG0_SX404 +MWJG0_SX44 +MWVW0_SA1 +MWVW0_SA2 +MWVW0_SI1476 +MWVW0_SI2106 +MWVW0_SI846 +MWVW0_SX126 +MWVW0_SX216 +MWVW0_SX306 +MWVW0_SX36 +MWVW0_SX396 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid new file mode 100644 index 0000000..35b02e7 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid @@ -0,0 +1,3000 @@ +FAEM0_SA1 +FAEM0_SA2 +FAEM0_SI2022 +FAEM0_SX132 +FAEM0_SX222 +FAEM0_SX312 +FAEM0_SX402 +FAJW0_SA2 +FAJW0_SI1893 +FAJW0_SX183 +FAJW0_SX273 +FAJW0_SX363 +FALK0_SA1 +FALK0_SA2 +FALK0_SI1086 +FALK0_SI456 +FALK0_SX276 +FALK0_SX366 +FALK0_SX96 +FALR0_SA1 +FALR0_SA2 +FALR0_SI1955 +FALR0_SI695 +FALR0_SX155 +FALR0_SX245 +FALR0_SX425 +FALR0_SX65 +FAPB0_SA1 +FAPB0_SA2 +FAPB0_SI1693 +FAPB0_SX163 +FAPB0_SX253 +FAPB0_SX343 +FAPB0_SX73 +FBAS0_SA2 +FBAS0_SI1387 +FBAS0_SX127 +FBAS0_SX307 +FBAS0_SX37 +FBAS0_SX397 +FBCG1_SA2 +FBCG1_SI1612 +FBCG1_SI2242 +FBCG1_SI982 +FBCG1_SX262 +FBCG1_SX82 +FBCH0_SA1 +FBCH0_SA2 +FBCH0_SI1586 +FBCH0_SI956 +FBCH0_SX146 +FBCH0_SX326 +FBCH0_SX56 +FBJL0_SA1 +FBJL0_SA2 +FBJL0_SI1552 +FBJL0_SI2182 +FBJL0_SX112 +FBJL0_SX202 +FBJL0_SX22 +FBJL0_SX292 +FBJL0_SX382 +FBLV0_SA2 +FBLV0_SI2318 +FBLV0_SX158 +FBLV0_SX248 +FBLV0_SX428 +FBMH0_SA2 +FBMH0_SI1766 +FBMH0_SX146 +FBMH0_SX236 +FBMH0_SX326 +FBMH0_SX416 +FBMH0_SX56 +FBMJ0_SA2 +FBMJ0_SX156 +FBMJ0_SX246 +FBMJ0_SX426 +FBMJ0_SX66 +FCAG0_SA2 +FCAG0_SI1503 +FCAG0_SI1641 +FCAG0_SI2133 +FCAG0_SX333 +FCAG0_SX423 +FCAG0_SX63 +FCAJ0_SA1 +FCAJ0_SA2 +FCAJ0_SI1804 +FCAJ0_SI849 +FCAJ0_SX129 +FCAJ0_SX219 +FCAJ0_SX39 +FCAJ0_SX399 +FCDR1_SA1 +FCDR1_SA2 +FCDR1_SX16 +FCDR1_SX376 +FCEG0_SA1 +FCEG0_SI1248 +FCEG0_SI1878 +FCEG0_SI618 +FCEG0_SX168 +FCEG0_SX258 +FCEG0_SX348 +FCEG0_SX438 +FCEG0_SX78 +FCJF0_SA2 +FCJF0_SI1027 +FCJF0_SI1657 +FCJF0_SI648 +FCJF0_SX217 +FCJF0_SX307 +FCJF0_SX37 +FCJF0_SX397 +FCJS0_SA1 +FCJS0_SA2 +FCJS0_SI977 +FCJS0_SX167 +FCJS0_SX347 +FCJS0_SX437 +FCJS0_SX77 +FCKE0_SA1 +FCKE0_SI1111 +FCKE0_SX211 +FCKE0_SX301 +FCKE0_SX31 +FCKE0_SX391 +FCLT0_SA1 +FCLT0_SA2 +FCLT0_SI1438 +FCLT0_SX178 +FCLT0_SX268 +FCLT0_SX358 +FCMG0_SA1 +FCMG0_SI1242 +FCMG0_SX162 +FCMG0_SX252 +FCMG0_SX342 +FCMM0_SI1083 +FCMM0_SI453 +FCMM0_SX273 +FCMM0_SX363 +FCMM0_SX93 +FCRZ0_SA1 +FCRZ0_SA2 +FCRZ0_SI1913 +FCRZ0_SI793 +FCRZ0_SX163 +FCRZ0_SX253 +FCRZ0_SX343 +FCRZ0_SX73 +FCYL0_SA2 +FCYL0_SI1297 +FCYL0_SI1927 +FCYL0_SX127 +FCYL0_SX217 +FCYL0_SX397 +FDAS1_SA1 +FDAS1_SA2 +FDAS1_SX111 +FDAS1_SX21 +FDAS1_SX291 +FDAW0_SA1 +FDAW0_SA2 +FDAW0_SX146 +FDAW0_SX236 +FDAW0_SX326 +FDAW0_SX416 +FDAW0_SX56 +FDFB0_SI1318 +FDFB0_SI1948 +FDFB0_SX148 +FDFB0_SX238 +FDFB0_SX328 +FDFB0_SX418 +FDJH0_SA1 +FDJH0_SA2 +FDJH0_SI1565 +FDJH0_SI2195 +FDJH0_SX125 +FDJH0_SX215 +FDJH0_SX35 +FDJH0_SX395 +FDKN0_SA1 +FDKN0_SA2 +FDKN0_SI1081 +FDKN0_SI1711 +FDKN0_SX271 +FDKN0_SX361 +FDKN0_SX91 +FDML0_SA1 +FDML0_SI1149 +FDML0_SI1779 +FDML0_SI2075 +FDML0_SX339 +FDML0_SX69 +FDMY0_SI1197 +FDMY0_SX117 +FDMY0_SX207 +FDMY0_SX297 +FDNC0_SA1 +FDNC0_SA2 +FDNC0_SI2287 +FDNC0_SX108 +FDNC0_SX18 +FDNC0_SX378 +FDTD0_SA2 +FDTD0_SI1561 +FDTD0_SI2191 +FDTD0_SI931 +FDTD0_SX121 +FDTD0_SX301 +FDTD0_SX391 +FDXW0_SA2 +FDXW0_SI1511 +FDXW0_SI2141 +FDXW0_SI881 +FDXW0_SX161 +FDXW0_SX431 +FEAC0_SA1 +FEAC0_SA2 +FEAC0_SI1245 +FEAC0_SI1875 +FEAC0_SX255 +FEAC0_SX345 +FEAC0_SX435 +FEAR0_SA1 +FEAR0_SA2 +FEAR0_SI1252 +FEAR0_SI1882 +FEAR0_SX172 +FEAR0_SX262 +FEAR0_SX442 +FEAR0_SX82 +FECD0_SA2 +FECD0_SI2048 +FECD0_SX158 +FECD0_SX248 +FECD0_SX338 +FECD0_SX428 +FEEH0_SA2 +FEEH0_SI1112 +FEEH0_SX212 +FEEH0_SX302 +FEEH0_SX32 +FEEH0_SX392 +FEME0_SA2 +FEME0_SI1505 +FEME0_SI2135 +FEME0_SX245 +FEME0_SX425 +FETB0_SA2 +FETB0_SI1778 +FETB0_SI518 +FETB0_SX248 +FETB0_SX338 +FETB0_SX428 +FETB0_SX68 +FEXM0_SA2 +FEXM0_SI1731 +FEXM0_SX111 +FEXM0_SX201 +FEXM0_SX291 +FEXM0_SX381 +FGCS0_SA1 +FGCS0_SA2 +FGCS0_SI1486 +FGCS0_SI2116 +FGCS0_SI856 +FGCS0_SX46 +FGDP0_SA2 +FGDP0_SI1618 +FGDP0_SI2248 +FGDP0_SX178 +FGDP0_SX268 +FGDP0_SX358 +FGDP0_SX448 +FGMB0_SA1 +FGMB0_SA2 +FGMB0_SI515 +FGMB0_SX155 +FGMB0_SX425 +FGMB0_SX65 +FGRW0_SA2 +FGRW0_SI1782 +FGRW0_SI1990 +FGRW0_SX252 +FGRW0_SX342 +FGRW0_SX72 +FHLM0_SA1 +FHLM0_SA2 +FHLM0_SI1560 +FHLM0_SI2190 +FHLM0_SI930 +FHLM0_SX210 +FHLM0_SX300 +FHXS0_SI2335 +FHXS0_SX265 +FHXS0_SX355 +FHXS0_SX85 +FJDM2_SI1582 +FJDM2_SI1964 +FJDM2_SI2212 +FJDM2_SX322 +FJDM2_SX412 +FJEN0_SA2 +FJEN0_SI1047 +FJEN0_SI1677 +FJEN0_SI2307 +FJEN0_SX147 +FJEN0_SX237 +FJEN0_SX57 +FJHK0_SA1 +FJHK0_SA2 +FJHK0_SI1022 +FJHK0_SI1652 +FJHK0_SX122 +FJHK0_SX212 +FJHK0_SX32 +FJHK0_SX392 +FJKL0_SA1 +FJKL0_SA2 +FJKL0_SI1562 +FJKL0_SI2192 +FJKL0_SX122 +FJKL0_SX302 +FJKL0_SX32 +FJLG0_SA1 +FJLG0_SA2 +FJLG0_SI1506 +FJLG0_SX179 +FJLG0_SX269 +FJLG0_SX359 +FJLG0_SX449 +FJLG0_SX89 +FJLR0_SA2 +FJLR0_SI1861 +FJLR0_SI601 +FJLR0_SX151 +FJLR0_SX241 +FJLR0_SX331 +FJLR0_SX421 +FJLR0_SX61 +FJRB0_SA1 +FJRB0_SA2 +FJRB0_SI1302 +FJRB0_SI1932 +FJRB0_SI672 +FJRB0_SX132 +FJRB0_SX222 +FJRB0_SX312 +FJRB0_SX42 +FJRP1_SA2 +FJRP1_SI802 +FJRP1_SX172 +FJRP1_SX442 +FJSK0_SA2 +FJSK0_SI1682 +FJSK0_SI2312 +FJSK0_SX152 +FJSK0_SX242 +FJSK0_SX332 +FJSK0_SX422 +FJSK0_SX62 +FJSP0_SA1 +FJSP0_SA2 +FJSP0_SI1763 +FJSP0_SI804 +FJSP0_SX174 +FJSP0_SX84 +FJWB1_SA2 +FJWB1_SI2055 +FJWB1_SI795 +FJWB1_SX165 +FJWB1_SX255 +FJWB1_SX75 +FJXM0_SA2 +FJXM0_SI1211 +FJXM0_SI1971 +FJXM0_SX131 +FJXM0_SX221 +FJXP0_SA2 +FJXP0_SI492 +FJXP0_SX222 +FJXP0_SX312 +FJXP0_SX402 +FJXP0_SX42 +FKAA0_SA2 +FKAA0_SI1208 +FKAA0_SI1838 +FKAA0_SI578 +FKAA0_SX218 +FKAA0_SX308 +FKAA0_SX38 +FKDE0_SA2 +FKDE0_SI2221 +FKDE0_SX331 +FKDW0_SA1 +FKDW0_SA2 +FKDW0_SI577 +FKDW0_SX127 +FKDW0_SX217 +FKDW0_SX307 +FKDW0_SX37 +FKFB0_SA1 +FKFB0_SI2238 +FKFB0_SI978 +FKFB0_SX168 +FKFB0_SX258 +FKKH0_SI660 +FKKH0_SX210 +FKKH0_SX30 +FKKH0_SX300 +FKLC0_SA1 +FKLC0_SA2 +FKLC0_SI1615 +FKLC0_SI2245 +FKLC0_SX265 +FKLC0_SX445 +FKLC0_SX85 +FKLC1_SA1 +FKLC1_SA2 +FKLC1_SI1678 +FKLC1_SX148 +FKLC1_SX58 +FKLH0_SA1 +FKLH0_SI1887 +FKLH0_SI627 +FKLH0_SX267 +FKLH0_SX357 +FKLH0_SX447 +FKLH0_SX87 +FKSR0_SI1117 +FKSR0_SX161 +FKSR0_SX37 +FKSR0_SX397 +FLAC0_SA1 +FLAC0_SA2 +FLAC0_SI2161 +FLAC0_SI901 +FLAC0_SX181 +FLAC0_SX271 +FLAC0_SX361 +FLAC0_SX91 +FLAG0_SA1 +FLAG0_SI2094 +FLAG0_SX294 +FLEH0_SA1 +FLEH0_SA2 +FLEH0_SX151 +FLEH0_SX241 +FLEH0_SX421 +FLEH0_SX61 +FLET0_SA2 +FLET0_SI1137 +FLET0_SI1767 +FLET0_SX147 +FLET0_SX237 +FLET0_SX277 +FLET0_SX417 +FLET0_SX57 +FLHD0_SA1 +FLHD0_SA2 +FLHD0_SI1344 +FLHD0_SI1974 +FLHD0_SX174 +FLHD0_SX264 +FLHD0_SX444 +FLHD0_SX84 +FLJA0_SA2 +FLJA0_SI1708 +FLJA0_SX268 +FLJA0_SX358 +FLJA0_SX448 +FLJA0_SX88 +FLJD0_SA1 +FLJD0_SA2 +FLJD0_SI2146 +FLJD0_SX166 +FLJD0_SX256 +FLJD0_SX346 +FLJD0_SX436 +FLJG0_SA1 +FLJG0_SI1611 +FLJG0_SI2241 +FLJG0_SX261 +FLJG0_SX441 +FLJG0_SX81 +FLKM0_SI1880 +FLKM0_SX116 +FLMA0_SA2 +FLMA0_SI1243 +FLMA0_SI1873 +FLMA0_SX163 +FLMA0_SX253 +FLMA0_SX343 +FLMC0_SA1 +FLMC0_SA2 +FLMC0_SI2002 +FLMC0_SI742 +FLMC0_SX112 +FLMC0_SX292 +FLMC0_SX336 +FLMC0_SX382 +FLMK0_SA2 +FLMK0_SI2295 +FLMK0_SX135 +FLMK0_SX225 +FLMK0_SX45 +FLOD0_SA1 +FLOD0_SA2 +FLOD0_SI1287 +FLOD0_SI657 +FLOD0_SX207 +FLOD0_SX387 +FLTM0_SA2 +FLTM0_SI1700 +FLTM0_SX260 +FLTM0_SX80 +FMAH1_SA1 +FMAH1_SI1509 +FMAH1_SI2139 +FMAH1_SX249 +FMAH1_SX339 +FMAH1_SX429 +FMAH1_SX69 +FMBG0_SA1 +FMBG0_SI1790 +FMBG0_SX260 +FMBG0_SX3 +FMBG0_SX350 +FMBG0_SX440 +FMBG0_SX80 +FMEM0_SA2 +FMEM0_SI1377 +FMEM0_SI2007 +FMEM0_SX117 +FMEM0_SX207 +FMEM0_SX297 +FMJB0_SA1 +FMJB0_SA2 +FMJB0_SI1807 +FMJB0_SX187 +FMJB0_SX277 +FMJB0_SX367 +FMJB0_SX7 +FMJF0_SA1 +FMJF0_SI1254 +FMJF0_SI1884 +FMJF0_SX264 +FMJF0_SX354 +FMJF0_SX444 +FMJU0_SA1 +FMJU0_SA2 +FMJU0_SI2019 +FMJU0_SI759 +FMJU0_SX129 +FMJU0_SX219 +FMJU0_SX39 +FMKC0_SA1 +FMKC0_SA2 +FMKC0_SI1072 +FMKC0_SX172 +FMKC0_SX262 +FMKC0_SX352 +FMKF0_SA1 +FMKF0_SA2 +FMKF0_SI1536 +FMKF0_SI906 +FMKF0_SX276 +FMKF0_SX366 +FMKF0_SX6 +FMKF0_SX96 +FMMH0_SA1 +FMMH0_SA2 +FMMH0_SI1537 +FMMH0_SI2167 +FMMH0_SI907 +FMMH0_SX187 +FMMH0_SX367 +FMMH0_SX420 +FMMH0_SX7 +FMMH0_SX97 +FMPG0_SI1602 +FMPG0_SI2232 +FMPG0_SX252 +FMPG0_SX72 +FNKL0_SA1 +FNKL0_SA2 +FNKL0_SI2152 +FNKL0_SX172 +FNKL0_SX196 +FNKL0_SX262 +FNKL0_SX442 +FNKL0_SX82 +FNTB0_SA1 +FNTB0_SA2 +FNTB0_SX123 +FNTB0_SX213 +FNTB0_SX33 +FNTB0_SX393 +FPAB1_SA2 +FPAB1_SX121 +FPAB1_SX301 +FPAB1_SX31 +FPAB1_SX391 +FPAC0_SA1 +FPAC0_SI2011 +FPAC0_SX121 +FPAC0_SX211 +FPAC0_SX301 +FPAC0_SX31 +FPAC0_SX391 +FPAD0_SA1 +FPAD0_SI1346 +FPAD0_SI1976 +FPAD0_SX266 +FPAD0_SX446 +FPAF0_SI1684 +FPAF0_SI2314 +FPAF0_SX244 +FPAF0_SX334 +FPAF0_SX424 +FPAF0_SX64 +FPAZ0_SI1593 +FPAZ0_SX153 +FPAZ0_SX27 +FPAZ0_SX423 +FPAZ0_SX63 +FPJF0_SA2 +FPJF0_SI1046 +FPJF0_SI1676 +FPJF0_SX236 +FPJF0_SX326 +FPLS0_SA1 +FPLS0_SA2 +FPLS0_SI2220 +FPLS0_SX150 +FPLS0_SX240 +FPLS0_SX3 +FPLS0_SX60 +FPMY0_SA2 +FPMY0_SI1783 +FPMY0_SX163 +FPMY0_SX196 +FPMY0_SX253 +FPMY0_SX73 +FREH0_SI1315 +FREH0_SI685 +FREH0_SX145 +FREH0_SX235 +FREH0_SX325 +FREH0_SX55 +FRJB0_SA1 +FRJB0_SA2 +FRJB0_SI1427 +FRJB0_SI1470 +FRJB0_SI1794 +FRJB0_SX167 +FRJB0_SX257 +FRJB0_SX437 +FRJB0_SX77 +FRLL0_SA1 +FRLL0_SA2 +FRLL0_SI1514 +FRLL0_SI884 +FRLL0_SX164 +FRLL0_SX254 +FRLL0_SX344 +FRLL0_SX74 +FSAG0_SA2 +FSAG0_SI1953 +FSAG0_SI693 +FSAG0_SX63 +FSAH0_SI1244 +FSAH0_SI1874 +FSAH0_SX344 +FSAH0_SX74 +FSAK0_SA1 +FSAK0_SA2 +FSAK0_SI1930 +FSAK0_SI670 +FSAK0_SX130 +FSAK0_SX220 +FSAK0_SX310 +FSAK0_SX40 +FSAK0_SX400 +FSBK0_SA1 +FSBK0_SI1699 +FSBK0_SI2329 +FSBK0_SX259 +FSBK0_SX439 +FSBK0_SX79 +FSCN0_SI1886 +FSCN0_SX356 +FSDC0_SA1 +FSDC0_SI1942 +FSDC0_SI2234 +FSDC0_SX232 +FSDC0_SX412 +FSDJ0_SA1 +FSDJ0_SA2 +FSDJ0_SI1745 +FSDJ0_SX125 +FSDJ0_SX35 +FSGF0_SA1 +FSGF0_SA2 +FSGF0_SI1557 +FSGF0_SX207 +FSGF0_SX27 +FSGF0_SX297 +FSGF0_SX387 +FSJG0_SI1570 +FSJG0_SI2200 +FSJG0_SX310 +FSJK1_SA1 +FSJK1_SI1025 +FSJK1_SI2285 +FSJK1_SI696 +FSJK1_SX215 +FSJK1_SX305 +FSJK1_SX395 +FSJS0_SA2 +FSJS0_SI1171 +FSJS0_SI1801 +FSJS0_SI541 +FSJS0_SX271 +FSJS0_SX361 +FSJS0_SX91 +FSJW0_SA1 +FSJW0_SA2 +FSJW0_SI703 +FSJW0_SX163 +FSJW0_SX253 +FSJW0_SX343 +FSJW0_SX73 +FSKC0_SA1 +FSKC0_SA2 +FSKC0_SI2046 +FSKC0_SX156 +FSKC0_SX336 +FSKC0_SX426 +FSKC0_SX66 +FSKL0_SA1 +FSKL0_SA2 +FSKL0_SI2159 +FSKL0_SI899 +FSKL0_SX179 +FSKL0_SX269 +FSKL0_SX359 +FSKL0_SX89 +FSKP0_SA1 +FSKP0_SI1728 +FSKP0_SI468 +FSKP0_SX108 +FSKP0_SX18 +FSKP0_SX198 +FSKP0_SX288 +FSKP0_SX378 +FSLS0_SA1 +FSLS0_SA2 +FSLS0_SI1056 +FSLS0_SI1686 +FSLS0_SI2316 +FSLS0_SX202 +FSLS0_SX246 +FSLS0_SX66 +FSMA0_SA1 +FSMA0_SI1621 +FSMA0_SI2251 +FSMA0_SX271 +FSMA0_SX361 +FSMA0_SX91 +FSMM0_SA1 +FSMM0_SA2 +FSMM0_SI1314 +FSMM0_SI1944 +FSMM0_SI684 +FSMM0_SX414 +FSMM0_SX54 +FSMS1_SA1 +FSMS1_SA2 +FSMS1_SI1504 +FSMS1_SI2134 +FSMS1_SI874 +FSMS1_SX154 +FSMS1_SX334 +FSMS1_SX64 +FSPM0_SA1 +FSPM0_SI1871 +FSPM0_SI611 +FSPM0_SX341 +FSPM0_SX431 +FSRH0_SA1 +FSRH0_SA2 +FSRH0_SI1719 +FSRH0_SX131 +FSRH0_SX41 +FSSB0_SA1 +FSSB0_SA2 +FSSB0_SI1082 +FSSB0_SI2342 +FSSB0_SX182 +FSSB0_SX272 +FSSB0_SX452 +FSSB0_SX92 +FTAJ0_SA1 +FTAJ0_SA2 +FTAJ0_SI1329 +FTAJ0_SI474 +FTAJ0_SX339 +FTAJ0_SX69 +FTBR0_SA1 +FTBR0_SA2 +FTBR0_SI2181 +FTBR0_SX111 +FTBR0_SX201 +FTBR0_SX291 +FTBR0_SX381 +FTBW0_SA2 +FTBW0_SI1345 +FTBW0_SI1975 +FTBW0_SX265 +FTBW0_SX355 +FTBW0_SX445 +FTBW0_SX85 +FTLG0_SA1 +FTLG0_SA2 +FTLG0_SI840 +FTLG0_SX123 +FTLG0_SX213 +FTLG0_SX303 +FTLG0_SX33 +FTLG0_SX393 +FTMG0_SA1 +FTMG0_SA2 +FTMG0_SX182 +FTMG0_SX272 +FTMG0_SX362 +FTMG0_SX92 +FVFB0_SA1 +FVFB0_SI1032 +FVFB0_SI2292 +FVFB0_SX222 +FVFB0_SX312 +FVFB0_SX402 +FVKB0_SA2 +FVKB0_SI1159 +FVKB0_SI1789 +FVKB0_SI529 +FVKB0_SX169 +FVKB0_SX259 +FVKB0_SX439 +FVKB0_SX79 +FVMH0_SA1 +FVMH0_SI2096 +FVMH0_SX206 +FVMH0_SX296 +FVMH0_SX386 +MABC0_SA1 +MABC0_SA2 +MABC0_SX151 +MABC0_SX241 +MABC0_SX331 +MABC0_SX421 +MABC0_SX61 +MADC0_SA1 +MADC0_SA2 +MADC0_SI1997 +MADC0_SX17 +MADC0_SX197 +MADC0_SX287 +MADD0_SA1 +MADD0_SI1798 +MADD0_SI538 +MADD0_SX358 +MADD0_SX448 +MAEB0_SA1 +MAEB0_SA2 +MAEB0_SI2250 +MAEB0_SI990 +MAEB0_SX180 +MAEB0_SX270 +MAEB0_SX360 +MAEB0_SX90 +MAEO0_SA2 +MAEO0_SI1655 +MAEO0_SI1956 +MAEO0_SX156 +MAEO0_SX246 +MAEO0_SX336 +MAEO0_SX426 +MAEO0_SX66 +MAFM0_SA1 +MAFM0_SA2 +MAFM0_SI1569 +MAFM0_SI2199 +MAFM0_SX219 +MAFM0_SX39 +MAFM0_SX399 +MAJP0_SA1 +MAJP0_SI1074 +MAJP0_SI2334 +MAJP0_SX264 +MAJP0_SX354 +MAJP0_SX444 +MAJP0_SX84 +MAKB0_SA1 +MAKB0_SX206 +MAKB0_SX296 +MAKR0_SA1 +MAKR0_SA2 +MAKR0_SI1352 +MAKR0_SI1982 +MAKR0_SI722 +MAKR0_SX182 +MAKR0_SX272 +MAKR0_SX452 +MAPV0_SA1 +MAPV0_SA2 +MAPV0_SI1923 +MAPV0_SX123 +MAPV0_SX303 +MAPV0_SX33 +MAPV0_SX393 +MARC0_SA1 +MARC0_SI1188 +MARC0_SI1818 +MARC0_SI558 +MARC0_SX288 +MARC0_SX378 +MARW0_SA1 +MARW0_SA2 +MARW0_SI1276 +MARW0_SI646 +MARW0_SX106 +MARW0_SX16 +MARW0_SX376 +MBAR0_SA2 +MBAR0_SI1319 +MBAR0_SI1949 +MBAR0_SI689 +MBAR0_SX149 +MBAR0_SX239 +MBAR0_SX329 +MBBR0_SA1 +MBBR0_SA2 +MBBR0_SI1685 +MBBR0_SX155 +MBBR0_SX245 +MBBR0_SX425 +MBCG0_SA2 +MBCG0_SI2217 +MBCG0_SX147 +MBCG0_SX237 +MBCG0_SX417 +MBCG0_SX57 +MBEF0_SA1 +MBEF0_SA2 +MBEF0_SX111 +MBEF0_SX201 +MBEF0_SX291 +MBGT0_SA1 +MBGT0_SI1341 +MBGT0_SI711 +MBGT0_SX81 +MBJV0_SA2 +MBJV0_SI1247 +MBJV0_SI1877 +MBJV0_SX167 +MBJV0_SX257 +MBJV0_SX437 +MBJV0_SX77 +MBMA0_SA1 +MBMA0_SA2 +MBMA0_SI1852 +MBMA0_SX142 +MBMA0_SX322 +MBMA0_SX412 +MBMA1_SA1 +MBMA1_SA2 +MBMA1_SI2207 +MBMA1_SX144 +MBMA1_SX234 +MBMA1_SX414 +MBML0_SA1 +MBML0_SI1799 +MBML0_SI539 +MBML0_SX179 +MBML0_SX269 +MBML0_SX359 +MBML0_SX449 +MBOM0_SA1 +MBOM0_SI1014 +MBOM0_SI1644 +MBOM0_SX114 +MBOM0_SX204 +MBOM0_SX311 +MBOM0_SX384 +MBSB0_SA2 +MBSB0_SI1353 +MBSB0_SI1983 +MBSB0_SI723 +MBSB0_SX183 +MBSB0_SX273 +MBSB0_SX363 +MBSB0_SX93 +MBTH0_SA1 +MBTH0_SI505 +MBTH0_SI757 +MBTH0_SX212 +MBTH0_SX302 +MBTH0_SX392 +MBWP0_SA1 +MBWP0_SA2 +MBWP0_SI1531 +MBWP0_SI1969 +MBWP0_SI709 +MBWP0_SX169 +MBWP0_SX259 +MBWP0_SX439 +MBWP0_SX79 +MCAE0_SA1 +MCAE0_SA2 +MCAE0_SX187 +MCAE0_SX367 +MCAE0_SX7 +MCAE0_SX97 +MCAL0_SA1 +MCAL0_SI508 +MCAL0_SX148 +MCAL0_SX238 +MCAL0_SX328 +MCAL0_SX418 +MCAL0_SX58 +MCDC0_SA2 +MCDC0_SI1292 +MCDC0_SI1922 +MCDC0_SI662 +MCDC0_SX122 +MCDC0_SX302 +MCDC0_SX32 +MCDC0_SX392 +MCDD0_SA1 +MCDD0_SI1513 +MCDD0_SI2143 +MCDD0_SX163 +MCDD0_SX343 +MCDD0_SX73 +MCDR0_SA1 +MCDR0_SA2 +MCDR0_SX164 +MCDR0_SX254 +MCDR0_SX344 +MCDR0_SX434 +MCDR0_SX74 +MCEF0_SA1 +MCEF0_SA2 +MCEF0_SI1135 +MCEF0_SI1765 +MCEF0_SX145 +MCEF0_SX325 +MCEF0_SX55 +MCEW0_SI1442 +MCEW0_SX182 +MCEW0_SX272 +MCEW0_SX92 +MCHL0_SA1 +MCHL0_SA2 +MCHL0_SI1977 +MCHL0_SX177 +MCHL0_SX267 +MCHL0_SX357 +MCHL0_SX447 +MCLK0_SA1 +MCLK0_SA2 +MCLK0_SI1660 +MCLK0_SX130 +MCLK0_SX220 +MCLK0_SX40 +MCLK0_SX400 +MCLM0_SA2 +MCLM0_SI1456 +MCLM0_SX106 +MCLM0_SX16 +MCLM0_SX196 +MCLM0_SX286 +MCLM0_SX376 +MCPM0_SA2 +MCPM0_SI1194 +MCPM0_SI564 +MCPM0_SX204 +MCPM0_SX24 +MCRE0_SA1 +MCRE0_SA2 +MCRE0_SI1121 +MCRE0_SI1725 +MCRE0_SI1751 +MCRE0_SX131 +MCRE0_SX221 +MCRE0_SX24 +MCRE0_SX401 +MCRE0_SX41 +MCSS0_SA1 +MCSS0_SA2 +MCSS0_SX120 +MCSS0_SX210 +MCSS0_SX30 +MCSS0_SX300 +MCSS0_SX390 +MCTH0_SA2 +MCTH0_SI1209 +MCTH0_SI1839 +MCTH0_SI579 +MCTH0_SX129 +MCTH0_SX219 +MCTH0_SX309 +MCTH0_SX399 +MCTM0_SA1 +MCTM0_SA2 +MCTM0_SI720 +MCTM0_SX180 +MCTM0_SX270 +MCTM0_SX360 +MCTM0_SX450 +MCTM0_SX90 +MCXM0_SA1 +MCXM0_SA2 +MCXM0_SI1351 +MCXM0_SI1981 +MCXM0_SI721 +MCXM0_SX181 +MCXM0_SX271 +MCXM0_SX361 +MCXM0_SX451 +MDAC0_SA2 +MDAC0_SI1261 +MDAC0_SI1837 +MDAC0_SX271 +MDAC0_SX451 +MDAC0_SX91 +MDAS0_SA1 +MDAS0_SA2 +MDAS0_SI1266 +MDAS0_SX186 +MDAS0_SX21 +MDAS0_SX276 +MDAS0_SX96 +MDBB1_SA1 +MDBB1_SA2 +MDBB1_SI1006 +MDBB1_SI1636 +MDBB1_SI2056 +MDBB1_SX196 +MDBB1_SX286 +MDBP0_SA1 +MDBP0_SA2 +MDBP0_SI1158 +MDBP0_SI1788 +MDBP0_SX258 +MDBP0_SX348 +MDBP0_SX78 +MDCD0_SA1 +MDCD0_SA2 +MDCD0_SI2045 +MDCD0_SX155 +MDCD0_SX65 +MDCM0_SA1 +MDCM0_SA2 +MDCM0_SI2110 +MDCM0_SI850 +MDCM0_SX130 +MDCM0_SX220 +MDCM0_SX310 +MDDC0_SA1 +MDDC0_SA2 +MDDC0_SX249 +MDDC0_SX339 +MDDC0_SX429 +MDED0_SI1170 +MDED0_SI1800 +MDED0_SX180 +MDED0_SX270 +MDED0_SX360 +MDED0_SX450 +MDED0_SX90 +MDEF0_SA1 +MDEF0_SA2 +MDEF0_SI1563 +MDEF0_SI2193 +MDEF0_SX213 +MDEF0_SX33 +MDEF0_SX393 +MDEM0_SA2 +MDEM0_SI1868 +MDEM0_SX158 +MDEM0_SX248 +MDEM0_SX338 +MDEM0_SX68 +MDHL0_SA1 +MDHL0_SA2 +MDHL0_SI2069 +MDHL0_SI809 +MDHL0_SX179 +MDHL0_SX359 +MDHL0_SX89 +MDHS0_SX180 +MDHS0_SX270 +MDHS0_SX360 +MDHS0_SX450 +MDHS0_SX90 +MDJM0_SA1 +MDJM0_SA2 +MDJM0_SI2085 +MDJM0_SI825 +MDJM0_SX195 +MDJM0_SX285 +MDJM0_SX375 +MDKS0_SA1 +MDKS0_SA2 +MDKS0_SI1066 +MDKS0_SI1696 +MDKS0_SI2326 +MDKS0_SX256 +MDKS0_SX76 +MDLB0_SA1 +MDLB0_SI1936 +MDLB0_SI676 +MDLB0_SX226 +MDLB0_SX316 +MDLB0_SX46 +MDLC0_SA1 +MDLC0_SA2 +MDLC0_SI765 +MDLC0_SX135 +MDLC0_SX225 +MDLC0_SX315 +MDLC0_SX45 +MDLC1_SA1 +MDLC1_SX175 +MDLC1_SX265 +MDLC1_SX355 +MDLC1_SX85 +MDLC2_SA1 +MDLC2_SA2 +MDLC2_SI1614 +MDLC2_SI984 +MDLC2_SX174 +MDLC2_SX264 +MDLC2_SX444 +MDLC2_SX84 +MDLH0_SA1 +MDLH0_SI1960 +MDLH0_SI574 +MDLH0_SI700 +MDLH0_SX250 +MDLH0_SX340 +MDLH0_SX70 +MDLM0_SA1 +MDLM0_SA2 +MDLM0_SX244 +MDLM0_SX334 +MDLM0_SX64 +MDLR0_SI1233 +MDLR0_SX243 +MDLR0_SX423 +MDLR0_SX63 +MDLR1_SI1299 +MDLR1_SI1929 +MDLR1_SX129 +MDLR1_SX219 +MDLR1_SX309 +MDLR1_SX39 +MDLR1_SX399 +MDMA0_SA1 +MDMA0_SA2 +MDMA0_SI1238 +MDMA0_SI2060 +MDMT0_SI2341 +MDMT0_SI572 +MDMT0_SX212 +MDMT0_SX302 +MDMT0_SX392 +MDNS0_SA1 +MDNS0_SX111 +MDNS0_SX291 +MDNS0_SX381 +MDPB0_SA1 +MDPB0_SA2 +MDPB0_SI2126 +MDPB0_SX146 +MDPB0_SX236 +MDPB0_SX326 +MDPB0_SX56 +MDPK0_SA1 +MDPK0_SA2 +MDPK0_SI1683 +MDPK0_SI552 +MDPK0_SX153 +MDPK0_SX243 +MDPK0_SX63 +MDPS0_SA1 +MDPS0_SA2 +MDPS0_SI1651 +MDPS0_SI1979 +MDPS0_SX179 +MDPS0_SX269 +MDPS0_SX449 +MDPS0_SX89 +MDRD0_SA2 +MDRD0_SI1382 +MDRD0_SI2012 +MDRD0_SX122 +MDRD0_SX212 +MDRD0_SX302 +MDRD0_SX392 +MDSJ0_SA1 +MDSJ0_SA2 +MDSJ0_SI832 +MDSJ0_SX112 +MDSJ0_SX22 +MDSJ0_SX292 +MDSJ0_SX382 +MDSS0_SA1 +MDSS0_SI1881 +MDSS0_SI2087 +MDSS0_SI621 +MDSS0_SX171 +MDSS0_SX261 +MDSS0_SX351 +MDSS0_SX81 +MDSS1_SA2 +MDSS1_SI1713 +MDSS1_SX247 +MDSS1_SX337 +MDSS1_SX427 +MDTB0_SA1 +MDTB0_SA2 +MDTB0_SI570 +MDTB0_SX210 +MDTB0_SX300 +MDTB0_SX321 +MDTB0_SX390 +MDWD0_SA1 +MDWD0_SI1890 +MDWD0_SI557 +MDWD0_SX180 +MDWD0_SX360 +MDWD0_SX450 +MDWH0_SA2 +MDWH0_SI1925 +MDWH0_SX125 +MDWH0_SX35 +MDWH0_SX395 +MDWM0_SI1546 +MDWM0_SI2176 +MDWM0_SX106 +MDWM0_SX376 +MDWM0_SX433 +MEAL0_SA1 +MEAL0_SI1547 +MEAL0_SI917 +MEAL0_SX197 +MEAL0_SX287 +MEAL0_SX377 +MEDR0_SI744 +MEDR0_SX114 +MEDR0_SX204 +MEDR0_SX24 +MEDR0_SX294 +MEDR0_SX384 +MEFG0_SA2 +MEFG0_SI465 +MEFG0_SX105 +MEFG0_SX15 +MEFG0_SX195 +MEFG0_SX285 +MEFG0_SX375 +MEGJ0_SI1967 +MEGJ0_SX437 +MEGJ0_SX77 +MEJL0_SA2 +MEJL0_SI1592 +MEJL0_SI1654 +MEJL0_SI962 +MEJL0_SX332 +MEJL0_SX422 +MEJL0_SX62 +MEJS0_SA1 +MEJS0_SA2 +MEJS0_SI1870 +MEJS0_SX250 +MEJS0_SX430 +MEJS0_SX70 +MESG0_SA1 +MESG0_SA2 +MESG0_SI1332 +MESG0_SI1962 +MESG0_SX162 +MESG0_SX252 +MESG0_SX342 +MESG0_SX72 +MESJ0_SA1 +MESJ0_SA2 +MESJ0_SI2257 +MESJ0_SI997 +MESJ0_SX277 +MESJ0_SX367 +MESJ0_SX7 +MEWM0_SA1 +MEWM0_SA2 +MEWM0_SI1348 +MEWM0_SI1978 +MEWM0_SX268 +MEWM0_SX358 +MEWM0_SX448 +MFER0_SA1 +MFER0_SA2 +MFER0_SI1492 +MFER0_SI2122 +MFER0_SX232 +MFER0_SX322 +MFER0_SX412 +MFER0_SX52 +MFMC0_SA1 +MFMC0_SA2 +MFMC0_SI1132 +MFMC0_SI1762 +MFMC0_SI502 +MFMC0_SX142 +MFMC0_SX232 +MFMC0_SX322 +MFMC0_SX412 +MFMC0_SX52 +MFRM0_SA1 +MFRM0_SA2 +MFRM0_SI1155 +MFRM0_SI1717 +MFRM0_SI1785 +MFRM0_SX165 +MFRM0_SX255 +MFRM0_SX75 +MFWK0_SA1 +MFWK0_SA2 +MFWK0_SI1249 +MFWK0_SI619 +MFWK0_SX259 +MFWK0_SX439 +MFWK0_SX79 +MFXS0_SA1 +MFXS0_SA2 +MFXS0_SI1674 +MFXS0_SI2225 +MFXS0_SI2304 +MFXS0_SX144 +MFXS0_SX234 +MFXS0_SX414 +MFXV0_SA1 +MFXV0_SI1635 +MFXV0_SX15 +MFXV0_SX195 +MFXV0_SX285 +MFXV0_SX375 +MGAF0_SA2 +MGAF0_SI1912 +MGAF0_SI652 +MGAF0_SX112 +MGAF0_SX202 +MGAF0_SX292 +MGAG0_SA1 +MGAG0_SI1321 +MGAG0_SI645 +MGAG0_SX151 +MGAG0_SX241 +MGAG0_SX331 +MGAG0_SX421 +MGAG0_SX61 +MGAK0_SA1 +MGAK0_SA2 +MGAK0_SI1666 +MGAK0_SI2296 +MGAK0_SX316 +MGAK0_SX406 +MGAR0_SA1 +MGAR0_SA2 +MGAR0_SI1212 +MGAR0_SI1694 +MGAR0_SI1842 +MGAR0_SX222 +MGAR0_SX402 +MGAR0_SX42 +MGAW0_SA1 +MGAW0_SA2 +MGAW0_SI1802 +MGAW0_SX265 +MGAW0_SX355 +MGAW0_SX445 +MGAW0_SX85 +MGES0_SA2 +MGES0_SI1481 +MGES0_SX131 +MGES0_SX221 +MGES0_SX401 +MGES0_SX41 +MGJC0_SA1 +MGJC0_SI1256 +MGJC0_SI1335 +MGJC0_SI1965 +MGJC0_SX165 +MGJC0_SX255 +MGJC0_SX345 +MGRL0_SA1 +MGRL0_SA2 +MGRL0_SI1497 +MGRL0_SX237 +MGRL0_SX417 +MGRL0_SX57 +MGRP0_SA1 +MGRP0_SI1947 +MGRP0_SI687 +MGRP0_SX147 +MGRP0_SX237 +MGRP0_SX417 +MGRP0_SX57 +MGSH0_SA1 +MGSH0_SX186 +MGSH0_SX96 +MGSL0_SA2 +MGSL0_SI1164 +MGSL0_SX174 +MGSL0_SX354 +MGSL0_SX444 +MGSL0_SX84 +MGXP0_SA1 +MGXP0_SA2 +MGXP0_SI457 +MGXP0_SX277 +MGXP0_SX367 +MGXP0_SX97 +MHBS0_SA1 +MHBS0_SA2 +MHBS0_SI1575 +MHBS0_SI2205 +MHBS0_SX135 +MHBS0_SX225 +MHBS0_SX405 +MHIT0_SA2 +MHIT0_SI1613 +MHIT0_SI2243 +MHIT0_SX173 +MHIT0_SX263 +MHIT0_SX353 +MHIT0_SX443 +MHIT0_SX83 +MHJB0_SA2 +MHJB0_SI1647 +MHJB0_SI2277 +MHJB0_SX117 +MHJB0_SX207 +MHJB0_SX27 +MHJB0_SX297 +MHJB0_SX387 +MHMG0_SA1 +MHMG0_SA2 +MHMG0_SI1365 +MHMG0_SI1995 +MHMG0_SX105 +MHMG0_SX15 +MHMG0_SX285 +MHMG0_SX375 +MHMR0_SA2 +MHMR0_SI1119 +MHMR0_SX129 +MHMR0_SX219 +MHMR0_SX309 +MHMR0_SX39 +MHMR0_SX399 +MHRM0_SA2 +MHRM0_SI1475 +MHRM0_SI2218 +MHRM0_SX238 +MHRM0_SX328 +MHRM0_SX418 +MHXL0_SA1 +MHXL0_SA2 +MHXL0_SI512 +MHXL0_SI612 +MHXL0_SX152 +MHXL0_SX332 +MHXL0_SX422 +MHXL0_SX62 +MILB0_SA1 +MILB0_SI2163 +MILB0_SI807 +MILB0_SX183 +MILB0_SX273 +MILB0_SX3 +MILB0_SX363 +MILB0_SX93 +MJAC0_SA1 +MJAC0_SA2 +MJAC0_SI1331 +MJAC0_SI2148 +MJAC0_SX341 +MJAC0_SX431 +MJAE0_SA1 +MJAE0_SA2 +MJAE0_SI1524 +MJAE0_SI1999 +MJAE0_SI2154 +MJAE0_SX264 +MJAE0_SX354 +MJAE0_SX444 +MJAI0_SI1604 +MJAI0_SX164 +MJAI0_SX254 +MJAI0_SX344 +MJAI0_SX434 +MJAI0_SX74 +MJBG0_SA1 +MJBG0_SA2 +MJBG0_SI1232 +MJBG0_SI1724 +MJBG0_SI1862 +MJBG0_SX152 +MJBG0_SX242 +MJBG0_SX332 +MJBG0_SX422 +MJDA0_SA1 +MJDA0_SA2 +MJDA0_SI1661 +MJDA0_SI2291 +MJDA0_SX131 +MJDA0_SX221 +MJDA0_SX401 +MJDA0_SX41 +MJDC0_SA1 +MJDC0_SA2 +MJDC0_SI1161 +MJDC0_SI2165 +MJDC0_SX171 +MJDC0_SX261 +MJDC0_SX351 +MJDC0_SX441 +MJDC0_SX81 +MJDE0_SA2 +MJDE0_SX130 +MJDE0_SX310 +MJDE0_SX40 +MJDE0_SX400 +MJDG0_SA1 +MJDG0_SI1672 +MJDG0_SX142 +MJDG0_SX232 +MJDG0_SX322 +MJDG0_SX412 +MJDG0_SX52 +MJDM0_SA2 +MJDM0_SI1937 +MJDM0_SX260 +MJDM0_SX440 +MJDM0_SX80 +MJEB0_SA1 +MJEB0_SA2 +MJEB0_SI1286 +MJEB0_SI1916 +MJEB0_SX206 +MJEB0_SX26 +MJEB0_SX386 +MJEB1_SA1 +MJEB1_SI2097 +MJEB1_SX117 +MJEB1_SX27 +MJEB1_SX297 +MJEE0_SA2 +MJEE0_SI1237 +MJEE0_SI1867 +MJEE0_SI607 +MJEE0_SX157 +MJEE0_SX427 +MJEE0_SX67 +MJFH0_SA1 +MJFH0_SI1737 +MJFH0_SI477 +MJFH0_SX117 +MJFH0_SX207 +MJFH0_SX27 +MJFH0_SX297 +MJFH0_SX387 +MJFR0_SA2 +MJFR0_SI1605 +MJFR0_SI2235 +MJFR0_SI975 +MJFR0_SX165 +MJFR0_SX255 +MJFR0_SX345 +MJHI0_SA2 +MJHI0_SI555 +MJHI0_SI698 +MJHI0_SX248 +MJHI0_SX338 +MJHI0_SX428 +MJHI0_SX68 +MJJB0_SA2 +MJJB0_SI1139 +MJJB0_SI1277 +MJJB0_SI1769 +MJJB0_SX149 +MJJB0_SX329 +MJJB0_SX419 +MJJB0_SX59 +MJJJ0_SA1 +MJJJ0_SA2 +MJJJ0_SI1793 +MJJJ0_SI533 +MJJJ0_SX173 +MJJJ0_SX263 +MJJJ0_SX353 +MJJJ0_SX83 +MJJM0_SA1 +MJJM0_SI1457 +MJJM0_SX17 +MJJM0_SX197 +MJJM0_SX287 +MJJM0_SX377 +MJKR0_SA2 +MJKR0_SI1201 +MJKR0_SI1831 +MJKR0_SX121 +MJKR0_SX211 +MJKR0_SX301 +MJKR0_SX31 +MJKR0_SX391 +MJLB0_SA1 +MJLB0_SA2 +MJLB0_SI2246 +MJLB0_SI986 +MJLB0_SX266 +MJLB0_SX356 +MJLB0_SX446 +MJLB0_SX86 +MJLG1_SA1 +MJLG1_SA2 +MJLG1_SI1012 +MJLG1_SI1642 +MJLG1_SI2272 +MJLG1_SX112 +MJLG1_SX202 +MJLG1_SX22 +MJLG1_SX382 +MJLS0_SA1 +MJLS0_SA2 +MJLS0_SI1096 +MJLS0_SI466 +MJLS0_SX16 +MJLS0_SX196 +MJLS0_SX286 +MJLS0_SX376 +MJMA0_SI1495 +MJMA0_SI865 +MJMA0_SX145 +MJMA0_SX235 +MJMA0_SX325 +MJMA0_SX415 +MJMA0_SX55 +MJMD0_SA1 +MJMD0_SI1028 +MJMD0_SI1658 +MJMD0_SX128 +MJMD0_SX218 +MJMD0_SX398 +MJMM0_SA1 +MJMM0_SA2 +MJMM0_SI1885 +MJMM0_SI625 +MJMM0_SX265 +MJMM0_SX355 +MJMM0_SX445 +MJPG0_SA1 +MJPG0_SA2 +MJPG0_SI561 +MJPG0_SX291 +MJPG0_SX381 +MJPM0_SA1 +MJPM0_SI1998 +MJPM0_SI738 +MJPM0_SX108 +MJPM0_SX18 +MJPM0_SX198 +MJPM0_SX288 +MJPM1_SA1 +MJPM1_SA2 +MJPM1_SI1897 +MJPM1_SI761 +MJPM1_SX131 +MJPM1_SX221 +MJPM1_SX41 +MJRA0_SI606 +MJRA0_SX156 +MJRA0_SX246 +MJRA0_SX66 +MJRG0_SA1 +MJRG0_SA2 +MJRG0_SX106 +MJRG0_SX16 +MJRG0_SX286 +MJRH0_SA1 +MJRH0_SA2 +MJRH0_SI1125 +MJRH0_SI1755 +MJRH0_SX135 +MJRH0_SX315 +MJRH0_SX405 +MJRH0_SX45 +MJRH1_SA2 +MJRH1_SI1774 +MJRH1_SX334 +MJRH1_SX64 +MJRK0_SI2103 +MJRK0_SX340 +MJRK0_SX70 +MJRP0_SI1835 +MJRP0_SI585 +MJRP0_SX135 +MJRP0_SX315 +MJRP0_SX405 +MJRP0_SX45 +MJSR0_SA2 +MJSR0_SX164 +MJSR0_SX254 +MJSR0_SX434 +MJSR0_SX74 +MJWG0_SA2 +MJWG0_SI2155 +MJWG0_SX355 +MJWG0_SX445 +MJWG0_SX85 +MJWS0_SA1 +MJWS0_SA2 +MJWS0_SI1143 +MJWS0_SI1773 +MJWS0_SX243 +MJWS0_SX423 +MJWT0_SA2 +MJWT0_SI751 +MJXA0_SA1 +MJXA0_SA2 +MJXA0_SI1507 +MJXA0_SI2137 +MJXA0_SI877 +MJXA0_SX157 +MJXA0_SX247 +MJXA0_SX337 +MJXA0_SX67 +MJXL0_SA1 +MJXL0_SA2 +MJXL0_SI1795 +MJXL0_SX182 +MJXL0_SX272 +MJXL0_SX362 +MJXL0_SX452 +MJXL0_SX92 +MKAG0_SA2 +MKAG0_SI1609 +MKAG0_SI2239 +MKAG0_SX169 +MKAG0_SX30 +MKAG0_SX439 +MKAG0_SX79 +MKAH0_SA1 +MKAH0_SA2 +MKAH0_SI1528 +MKAH0_SI2158 +MKAH0_SI898 +MKAH0_SX268 +MKAH0_SX358 +MKAH0_SX448 +MKAH0_SX88 +MKAJ0_SA1 +MKAJ0_SI1414 +MKAJ0_SI2044 +MKAJ0_SI784 +MKAJ0_SX244 +MKAJ0_SX334 +MKAJ0_SX424 +MKAJ0_SX64 +MKAM0_SA2 +MKAM0_SI1316 +MKAM0_SX236 +MKAM0_SX416 +MKDB0_SI2132 +MKDB0_SI588 +MKDB0_SI872 +MKDB0_SX242 +MKDB0_SX332 +MKDB0_SX422 +MKDB0_SX62 +MKDD0_SA1 +MKDD0_SX127 +MKDD0_SX217 +MKDD0_SX307 +MKDD0_SX37 +MKDD0_SX397 +MKDT0_SA1 +MKDT0_SA2 +MKDT0_SI2153 +MKDT0_SI893 +MKDT0_SX173 +MKDT0_SX263 +MKDT0_SX353 +MKDT0_SX443 +MKDT0_SX83 +MKES0_SA2 +MKES0_SX263 +MKES0_SX353 +MKES0_SX443 +MKES0_SX83 +MKJO0_SA1 +MKJO0_SA2 +MKJO0_SI2147 +MKJO0_SX167 +MKJO0_SX257 +MKJO0_SX424 +MKJO0_SX77 +MKLN0_SA1 +MKLN0_SA2 +MKLN0_SI1598 +MKLN0_SI2228 +MKLN0_SX158 +MKLN0_SX338 +MKLN0_SX428 +MKLN0_SX68 +MKLR0_SA1 +MKLR0_SI1059 +MKLR0_SI2319 +MKLR0_SX159 +MKLR0_SX249 +MKLR0_SX339 +MKLR0_SX429 +MKLR0_SX69 +MKLS0_SA2 +MKLS0_SI1533 +MKLS0_SX177 +MKLS0_SX267 +MKLS0_SX447 +MKLS1_SI1545 +MKLS1_SI2175 +MKLS1_SX105 +MKLS1_SX15 +MKLS1_SX195 +MKLS1_SX285 +MKLW0_SA2 +MKLW0_SI1844 +MKLW0_SI2201 +MKLW0_SX131 +MKLW0_SX221 +MKLW0_SX401 +MKLW0_SX41 +MKRG0_SA1 +MKRG0_SA2 +MKRG0_SI1491 +MKRG0_SI2121 +MKRG0_SX141 +MKRG0_SX231 +MKRG0_SX31 +MKRG0_SX51 +MKXL0_SA1 +MKXL0_SI1185 +MKXL0_SX105 +MKXL0_SX195 +MKXL0_SX285 +MLBC0_SA2 +MLBC0_SI609 +MLBC0_SX159 +MLBC0_SX339 +MLBC0_SX429 +MLBC0_SX69 +MLEL0_SI1876 +MLEL0_SX346 +MLEL0_SX76 +MLJC0_SA1 +MLJC0_SA2 +MLJC0_SI1855 +MLJC0_SI595 +MLJC0_SX235 +MLJC0_SX325 +MLJC0_SX55 +MLJH0_SI1324 +MLJH0_SX154 +MLJH0_SX334 +MLJH0_SX424 +MLNS0_SA1 +MLNS0_SA2 +MLNS0_SI1407 +MLNS0_SI777 +MLNS0_SX147 +MLNS0_SX237 +MLNS0_SX327 +MLNS0_SX417 +MLNS0_SX57 +MLSH0_SA1 +MLSH0_SA2 +MLSH0_SI2047 +MLSH0_SI787 +MLSH0_SX157 +MLSH0_SX337 +MLSH0_SX427 +MLSH0_SX67 +MMAA0_SI2105 +MMAA0_SX125 +MMAA0_SX215 +MMAA0_SX305 +MMAA0_SX395 +MMAB1_SA1 +MMAB1_SA2 +MMAB1_SI2124 +MMAB1_SX144 +MMAB1_SX414 +MMAB1_SX54 +MMAG0_SI496 +MMAG0_SX226 +MMAG0_SX406 +MMAG0_SX46 +MMAM0_SA1 +MMAM0_SA2 +MMAM0_SI1597 +MMAM0_SI1668 +MMAM0_SX247 +MMAM0_SX337 +MMAM0_SX67 +MMAR0_SA1 +MMAR0_SA2 +MMAR0_SI1336 +MMAR0_SI706 +MMAR0_SX436 +MMAR0_SX76 +MMBS0_SA1 +MMBS0_SA2 +MMBS0_SI1151 +MMBS0_SX251 +MMBS0_SX341 +MMBS0_SX431 +MMBS0_SX71 +MMCC0_SA1 +MMCC0_SI1968 +MMCC0_SI708 +MMCC0_SX168 +MMCC0_SX258 +MMCC0_SX348 +MMCC0_SX438 +MMCC0_SX78 +MMDB0_SA1 +MMDB0_SA2 +MMDB0_SI1358 +MMDB0_SI1617 +MMDB0_SX267 +MMDB0_SX357 +MMDB0_SX447 +MMDB0_SX87 +MMDG0_SI2035 +MMDG0_SX340 +MMDG0_SX430 +MMDG0_SX70 +MMDM0_SA1 +MMDM0_SA2 +MMDM0_SX231 +MMDM0_SX321 +MMDM0_SX411 +MMDM0_SX51 +MMDM1_SA1 +MMDM1_SI1650 +MMDM1_SI783 +MMDM1_SX243 +MMDS0_SA2 +MMDS0_SI1343 +MMDS0_SI1973 +MMDS0_SI713 +MMDS0_SX173 +MMDS0_SX263 +MMDS0_SX353 +MMDS0_SX443 +MMDS0_SX83 +MMEA0_SA2 +MMEA0_SI1388 +MMEA0_SI2018 +MMEA0_SI758 +MMEA0_SX218 +MMEA0_SX308 +MMEA0_SX38 +MMEB0_SA1 +MMEB0_SI1357 +MMEB0_SI1987 +MMEB0_SI727 +MMEB0_SX7 +MMEB0_SX97 +MMGC0_SA1 +MMGC0_SI1935 +MMGC0_SI2184 +MMGC0_SX315 +MMGC0_SX405 +MMGC0_SX45 +MMGG0_SA1 +MMGG0_SA2 +MMGG0_SI1709 +MMGG0_SI2339 +MMGG0_SX179 +MMGG0_SX359 +MMGG0_SX89 +MMGK0_SA1 +MMGK0_SA2 +MMGK0_SI1322 +MMGK0_SI1952 +MMGK0_SI692 +MMGK0_SX152 +MMGK0_SX242 +MMGK0_SX422 +MMJB1_SA1 +MMJB1_SI1408 +MMJB1_SI2038 +MMJB1_SI778 +MMJB1_SX148 +MMJB1_SX238 +MMJB1_SX328 +MMJB1_SX418 +MMJB1_SX58 +MMLM0_SA1 +MMLM0_SA2 +MMLM0_SI1527 +MMLM0_SI897 +MMLM0_SX177 +MMLM0_SX267 +MMLM0_SX357 +MMLM0_SX447 +MMLM0_SX87 +MMPM0_SA1 +MMPM0_SA2 +MMPM0_SI1061 +MMPM0_SI1691 +MMPM0_SI2321 +MMPM0_SX251 +MMPM0_SX341 +MMPM0_SX431 +MMPM0_SX71 +MMRP0_SA1 +MMRP0_SI2034 +MMRP0_SI717 +MMRP0_SI774 +MMRP0_SX234 +MMRP0_SX414 +MMRP0_SX54 +MMSM0_SA1 +MMSM0_SA2 +MMSM0_SI1736 +MMSM0_SX26 +MMSM0_SX296 +MMSM0_SX386 +MMVP0_SI1284 +MMVP0_SI1914 +MMVP0_SX114 +MMVP0_SX204 +MMVP0_SX294 +MMVP0_SX384 +MMWB0_SA2 +MMWB0_SI1619 +MMWB0_SX179 +MMWB0_SX269 +MMWS0_SA1 +MMWS0_SI1518 +MMWS0_SI559 +MMWS0_SI888 +MMWS0_SX258 +MMWS0_SX78 +MMWS1_SA1 +MMWS1_SA2 +MMWS1_SI1071 +MMWS1_SI2331 +MMWS1_SX261 +MMWS1_SX27 +MMWS1_SX351 +MMWS1_SX441 +MMWS1_SX81 +MMXS0_SA1 +MMXS0_SA2 +MMXS0_SI629 +MMXS0_SI876 +MMXS0_SX156 +MMXS0_SX336 +MMXS0_SX66 +MNET0_SA1 +MNET0_SA2 +MNET0_SI1446 +MNET0_SI2076 +MNET0_SX186 +MNET0_SX276 +MNET0_SX366 +MNET0_SX96 +MNTW0_SA1 +MNTW0_SI2328 +MNTW0_SX202 +MNTW0_SX258 +MNTW0_SX348 +MPAR0_SA1 +MPAR0_SA2 +MPAR0_SI1576 +MPAR0_SX226 +MPAR0_SX406 +MPAR0_SX46 +MPEB0_SA1 +MPEB0_SA2 +MPEB0_SX150 +MPEB0_SX420 +MPEB0_SX60 +MPFU0_SA1 +MPFU0_SA2 +MPFU0_SI1888 +MPFU0_SX178 +MPFU0_SX268 +MPFU0_SX358 +MPFU0_SX88 +MPGH0_SA1 +MPGH0_SA2 +MPGH0_SI1554 +MPGH0_SI924 +MPGH0_SX204 +MPGH0_SX294 +MPGH0_SX384 +MPGR0_SA1 +MPGR0_SA2 +MPGR0_SI2040 +MPGR0_SI780 +MPGR0_SX150 +MPGR0_SX420 +MPGR0_SX60 +MPGR1_SA1 +MPGR1_SA2 +MPGR1_SI1269 +MPGR1_SI2129 +MPGR1_SX239 +MPGR1_SX329 +MPGR1_SX419 +MPGR1_SX59 +MPMB0_SX241 +MPPC0_SA2 +MPPC0_SI2042 +MPPC0_SI782 +MPPC0_SX152 +MPPC0_SX242 +MPPC0_SX332 +MPPC0_SX422 +MPPC0_SX62 +MPRB0_SA1 +MPRB0_SA2 +MPRB0_SI1205 +MPRB0_SX125 +MPRB0_SX215 +MPRB0_SX305 +MPRB0_SX35 +MPRB0_SX395 +MPRD0_SA2 +MPRD0_SI1431 +MPRD0_SI2061 +MPRK0_SA2 +MPRK0_SX17 +MPRK0_SX197 +MPRT0_SA2 +MPRT0_SI1210 +MPRT0_SI495 +MPRT0_SI580 +MPRT0_SX130 +MPRT0_SX220 +MPRT0_SX40 +MPRT0_SX400 +MPSW0_SA1 +MPSW0_SA2 +MPSW0_SI1697 +MPSW0_SI2327 +MPSW0_SX24 +MPSW0_SX257 +MPSW0_SX77 +MRAB0_SA1 +MRAB0_SA2 +MRAB0_SI1224 +MRAB0_SI594 +MRAB0_SX144 +MRAB0_SX234 +MRAB0_SX324 +MRAB0_SX414 +MRAB0_SX54 +MRAB1_SA1 +MRAB1_SA2 +MRAB1_SI1478 +MRAB1_SI2108 +MRAB1_SX218 +MRAB1_SX38 +MRAB1_SX398 +MRAI0_SI1954 +MRAI0_SX162 +MRAI0_SX252 +MRAI0_SX342 +MRAM0_SI1275 +MRAM0_SI1905 +MRAM0_SX105 +MRAM0_SX195 +MRAM0_SX285 +MRAM0_SX375 +MRAV0_SA1 +MRAV0_SA2 +MRAV0_SI1008 +MRAV0_SI1638 +MRAV0_SI2268 +MRAV0_SX108 +MRAV0_SX18 +MRAV0_SX198 +MRAV0_SX288 +MRAV0_SX378 +MRBC0_SA1 +MRBC0_SA2 +MRBC0_SI1665 +MRBC0_SI599 +MRBC0_SX149 +MRBC0_SX239 +MRBC0_SX59 +MRCG0_SA1 +MRCG0_SI2058 +MRCG0_SX258 +MRCG0_SX78 +MRCW0_SA2 +MRCW0_SI1371 +MRCW0_SI2001 +MRCW0_SX111 +MRCW0_SX201 +MRCW0_SX21 +MRCW0_SX381 +MRDD0_SA1 +MRDD0_SA2 +MRDD0_SI1050 +MRDD0_SI2310 +MRDD0_SX240 +MRDD0_SX330 +MRDM0_SA1 +MRDM0_SA2 +MRDM0_SI965 +MRDM0_SX155 +MRDM0_SX245 +MRDM0_SX425 +MRDS0_SA2 +MRDS0_SI1167 +MRDS0_SI1797 +MRDS0_SI537 +MRDS0_SX177 +MRDS0_SX267 +MRDS0_SX357 +MRDS0_SX447 +MRDS0_SX87 +MREE0_SA1 +MREE0_SA2 +MREE0_SI1734 +MREE0_SX114 +MREE0_SX204 +MREE0_SX294 +MREE0_SX384 +MREH1_SA2 +MREH1_SI2229 +MREH1_SX159 +MREH1_SX339 +MREH1_SX429 +MREM0_SA1 +MREM0_SI1591 +MREM0_SI961 +MREM0_SX151 +MREM0_SX241 +MREM0_SX331 +MREM0_SX421 +MREM0_SX61 +MREW1_SA1 +MREW1_SA2 +MREW1_SI1500 +MREW1_SI2130 +MREW1_SX150 +MREW1_SX240 +MREW1_SX330 +MREW1_SX420 +MREW1_SX60 +MRFK0_SA1 +MRFK0_SA2 +MRFK0_SI1706 +MRFK0_SI2336 +MRFK0_SX176 +MRFK0_SX266 +MRFK0_SX356 +MRFK0_SX86 +MRFL0_SA2 +MRFL0_SI1786 +MRFL0_SX346 +MRGM0_SA1 +MRGM0_SI1162 +MRGM0_SI1792 +MRGM0_SX416 +MRGM0_SX82 +MRGS0_SA1 +MRGS0_SI1986 +MRGS0_SX276 +MRGS0_SX366 +MRGS0_SX96 +MRHL0_SA1 +MRHL0_SA2 +MRHL0_SI1515 +MRHL0_SI2145 +MRHL0_SX165 +MRHL0_SX255 +MRHL0_SX75 +MRJB1_SI1020 +MRJB1_SX300 +MRJH0_SA1 +MRJH0_SI914 +MRJH0_SX259 +MRJH0_SX439 +MRJM0_SA1 +MRJM0_SA2 +MRJM0_SI1095 +MRJM0_SI1228 +MRJM0_SI1858 +MRJM0_SX238 +MRJM0_SX328 +MRJM0_SX418 +MRJM0_SX58 +MRJM1_SA1 +MRJM1_SI668 +MRJM1_SX218 +MRJM1_SX308 +MRJM1_SX38 +MRJM1_SX398 +MRJT0_SA1 +MRJT0_SI1805 +MRJT0_SX148 +MRJT0_SX238 +MRKM0_SA1 +MRKM0_SX187 +MRKM0_SX277 +MRKM0_SX7 +MRKM0_SX97 +MRLD0_SA1 +MRLD0_SI1594 +MRLD0_SI964 +MRLD0_SX244 +MRLD0_SX334 +MRLD0_SX64 +MRLJ0_SA2 +MRLJ0_SI1420 +MRLJ0_SI2050 +MRLJ0_SX160 +MRLJ0_SX430 +MRLJ0_SX70 +MRLJ1_SI1671 +MRLJ1_SI2332 +MRLJ1_SX141 +MRLJ1_SX231 +MRLJ1_SX411 +MRLJ1_SX51 +MRLK0_SA1 +MRLK0_SA2 +MRLK0_SI2140 +MRLK0_SX303 +MRLK0_SX33 +MRLK0_SX393 +MRLR0_SA1 +MRLR0_SA2 +MRLR0_SI1826 +MRLR0_SI566 +MRLR0_SX116 +MRLR0_SX206 +MRLR0_SX26 +MRLR0_SX296 +MRLR0_SX386 +MRMB0_SA1 +MRMB0_SI2211 +MRMB0_SI951 +MRMB0_SX141 +MRMB0_SX231 +MRMB0_SX321 +MRMB0_SX51 +MRMG0_SA2 +MRMG0_SI1710 +MRMG0_SI2340 +MRMG0_SX180 +MRMG0_SX270 +MRMG0_SX360 +MRMG0_SX90 +MRMH0_SA1 +MRMH0_SA2 +MRMH0_SI1021 +MRMH0_SX211 +MRMH0_SX301 +MRMH0_SX31 +MRMH0_SX391 +MRML0_SI2051 +MRML0_SI791 +MRML0_SX431 +MRML0_SX71 +MRMS0_SA1 +MRMS0_SA2 +MRMS0_SI1113 +MRMS0_SI2100 +MRMS0_SX120 +MRMS0_SX210 +MRMS0_SX30 +MRMS0_SX300 +MRMS0_SX390 +MRPC1_SA1 +MRPC1_SA2 +MRPC1_SI1482 +MRPC1_SI2026 +MRPC1_SX132 +MRPC1_SX222 +MRPC1_SX312 +MRPC1_SX402 +MRPC1_SX42 +MRRE0_SI704 +MRRE0_SX254 +MRRE0_SX434 +MRSO0_SA1 +MRSO0_SA2 +MRSO0_SI1659 +MRSO0_SI2289 +MRSO0_SX219 +MRSO0_SX309 +MRSO0_SX399 +MRSP0_SA1 +MRSP0_SA2 +MRSP0_SI2059 +MRSP0_SI799 +MRSP0_SX169 +MRSP0_SX196 +MRSP0_SX439 +MRSP0_SX79 +MRTC0_SA1 +MRTC0_SA2 +MRTC0_SI2088 +MRTC0_SI828 +MRTC0_SX108 +MRTC0_SX18 +MRTC0_SX198 +MRTC0_SX288 +MRTJ0_SA2 +MRTJ0_SI1551 +MRTJ0_SI2032 +MRTJ0_SX322 +MRTJ0_SX412 +MRVG0_SA1 +MRVG0_SA2 +MRVG0_SI1770 +MRVG0_SI510 +MRVG0_SX150 +MRVG0_SX330 +MRVG0_SX420 +MRVG0_SX60 +MRWA0_SA1 +MRWA0_SA2 +MRWA0_SI1603 +MRWA0_SI2233 +MRWA0_SX253 +MRWA0_SX343 +MRWA0_SX433 +MRWS0_SA1 +MRWS0_SA2 +MRWS0_SX112 +MRWS0_SX202 +MRWS0_SX292 +MRXB0_SA1 +MRXB0_SI1585 +MRXB0_SX145 +MRXB0_SX235 +MRXB0_SX325 +MRXB0_SX55 +MSAH1_SA1 +MSAH1_SA2 +MSAH1_SI1049 +MSAH1_SI2309 +MSAH1_SX149 +MSAH1_SX239 +MSAH1_SX329 +MSAH1_SX419 +MSAH1_SX59 +MSAS0_SA1 +MSAS0_SA2 +MSAS0_SI2006 +MSAS0_SX26 +MSAS0_SX296 +MSAT0_SA2 +MSAT0_SI1526 +MSAT0_SI2156 +MSAT0_SI896 +MSAT0_SX176 +MSAT0_SX266 +MSAT0_SX356 +MSAT0_SX446 +MSAT0_SX86 +MSAT1_SA1 +MSAT1_SA2 +MSAT1_SI1073 +MSAT1_SI1703 +MSAT1_SI2333 +MSAT1_SX173 +MSAT1_SX353 +MSDB0_SA1 +MSDB0_SA2 +MSDB0_SI1007 +MSDB0_SI1637 +MSDB0_SI2267 +MSDB0_SX107 +MSDB0_SX17 +MSDH0_SA1 +MSDH0_SA2 +MSDH0_SI2113 +MSDH0_SX260 +MSDH0_SX350 +MSDS0_SA2 +MSDS0_SI1707 +MSDS0_SI2337 +MSDS0_SX177 +MSDS0_SX447 +MSDS0_SX87 +MSEM1_SA1 +MSEM1_SA2 +MSEM1_SX360 +MSEM1_SX450 +MSEM1_SX90 +MSES0_SA1 +MSES0_SA2 +MSES0_SI2216 +MSES0_SI2219 +MSES0_SX149 +MSES0_SX329 +MSES0_SX59 +MSFH0_SA2 +MSFH0_SI1216 +MSFH0_SI586 +MSFH0_SX226 +MSFH0_SX46 +MSFV0_SA1 +MSFV0_SA2 +MSFV0_SI1262 +MSFV0_SX182 +MSFV0_SX272 +MSFV0_SX452 +MSJK0_SA1 +MSJK0_SA2 +MSJK0_SI2226 +MSJK0_SI966 +MSJK0_SX156 +MSJK0_SX246 +MSJK0_SX426 +MSJK0_SX66 +MSMC0_SA1 +MSMC0_SA2 +MSMC0_SI1907 +MSMC0_SI647 +MSMC0_SX107 +MSMC0_SX17 +MSMC0_SX197 +MSMC0_SX287 +MSMC0_SX377 +MSMR0_SA1 +MSMR0_SA2 +MSMR0_SI1405 +MSMR0_SI775 +MSMR0_SX145 +MSMR0_SX235 +MSMR0_SX325 +MSMR0_SX55 +MSMS0_SA2 +MSMS0_SI2063 +MSMS0_SI803 +MSMS0_SX263 +MSMS0_SX353 +MSMS0_SX443 +MSRG0_SA2 +MSRG0_SI1851 +MSRG0_SI591 +MSRG0_SX141 +MSRG0_SX231 +MSRG0_SX321 +MSRG0_SX411 +MSRG0_SX51 +MSRR0_SA1 +MSRR0_SA2 +MSRR0_SI1131 +MSRR0_SX141 +MSRR0_SX231 +MSRR0_SX30 +MSRR0_SX411 +MSRR0_SX51 +MSTF0_SA1 +MSTF0_SA2 +MSTF0_SI1396 +MSTF0_SX136 +MSTF0_SX226 +MSTF0_SX406 +MSVS0_SA1 +MSVS0_SI1568 +MSVS0_SX128 +MSVS0_SX218 +MSVS0_SX38 +MTAB0_SA1 +MTAB0_SA2 +MTAB0_SI2202 +MTAB0_SI942 +MTAB0_SX132 +MTAB0_SX222 +MTAB0_SX402 +MTAB0_SX42 +MTAS0_SA1 +MTAS0_SA2 +MTAS0_SI1385 +MTAS0_SI2015 +MTAS0_SI755 +MTAS0_SX125 +MTAS0_SX305 +MTAT0_SA2 +MTAT0_SI1740 +MTAT0_SX120 +MTAT0_SX210 +MTAT0_SX30 +MTAT0_SX300 +MTAT1_SA1 +MTAT1_SA2 +MTAT1_SI1409 +MTAT1_SI1627 +MTAT1_SX239 +MTAT1_SX419 +MTBC0_SA1 +MTBC0_SA2 +MTBC0_SI1173 +MTBC0_SX183 +MTBC0_SX273 +MTBC0_SX347 +MTBC0_SX363 +MTBC0_SX93 +MTCS0_SA1 +MTCS0_SI1972 +MTCS0_SX172 +MTCS0_SX262 +MTCS0_SX352 +MTCS0_SX442 +MTDB0_SA1 +MTDB0_SA2 +MTDB0_SI2031 +MTDB0_SX141 +MTDB0_SX231 +MTDB0_SX321 +MTDB0_SX411 +MTDB0_SX51 +MTDP0_SI1274 +MTDP0_SI2151 +MTDP0_SX261 +MTDP0_SX441 +MTDP0_SX81 +MTER0_SI527 +MTER0_SX167 +MTER0_SX17 +MTER0_SX257 +MTER0_SX77 +MTJG0_SA2 +MTJG0_SI1520 +MTJG0_SI890 +MTJG0_SX350 +MTJG0_SX440 +MTJG0_SX80 +MTJM0_SA1 +MTJM0_SA2 +MTJM0_SI1226 +MTJM0_SI655 +MTJM0_SX236 +MTJM0_SX326 +MTJM0_SX416 +MTJM0_SX56 +MTJS0_SA1 +MTJS0_SI1192 +MTJS0_SX112 +MTJS0_SX202 +MTJS0_SX22 +MTJS0_SX292 +MTJU0_SA1 +MTJU0_SA2 +MTJU0_SI2269 +MTJU0_SI760 +MTJU0_SX220 +MTJU0_SX310 +MTJU0_SX40 +MTKD0_SA1 +MTKD0_SA2 +MTKD0_SI1187 +MTKD0_SI1817 +MTKD0_SX17 +MTKD0_SX197 +MTKD0_SX377 +MTKP0_SA1 +MTKP0_SA2 +MTKP0_SX123 +MTKP0_SX213 +MTKP0_SX303 +MTKP0_SX33 +MTKP0_SX393 +MTLB0_SA2 +MTLB0_SI1764 +MTLB0_SI504 +MTLB0_SX144 +MTLB0_SX414 +MTLB0_SX54 +MTLC0_SA2 +MTLC0_SI847 +MTLC0_SX127 +MTLC0_SX217 +MTLC0_SX307 +MTLC0_SX37 +MTLC0_SX397 +MTML0_SA1 +MTML0_SA2 +MTML0_SI1065 +MTML0_SI1695 +MTML0_SX255 +MTML0_SX345 +MTML0_SX75 +MTMN0_SA1 +MTMN0_SX164 +MTMN0_SX254 +MTMN0_SX344 +MTMN0_SX74 +MTMT0_SA1 +MTMT0_SI1118 +MTMT0_SX128 +MTMT0_SX218 +MTMT0_SX308 +MTMT0_SX38 +MTMT0_SX398 +MTPF0_SA1 +MTPF0_SA2 +MTPF0_SI1235 +MTPF0_SI1865 +MTPF0_SI605 +MTPF0_SX155 +MTPF0_SX245 +MTPF0_SX335 +MTPF0_SX425 +MTPG0_SA1 +MTPG0_SA2 +MTPG0_SI2013 +MTPG0_SX123 +MTPG0_SX213 +MTPG0_SX33 +MTPG0_SX393 +MTPP0_SA1 +MTPP0_SA2 +MTPP0_SI2138 +MTPP0_SI878 +MTPP0_SX158 +MTPP0_SX248 +MTPP0_SX428 +MTPP0_SX68 +MTPR0_SA1 +MTPR0_SA2 +MTPR0_SI1600 +MTPR0_SI506 +MTPR0_SX250 +MTPR0_SX70 +MTQC0_SA2 +MTQC0_SI2071 +MTQC0_SX271 +MTQC0_SX361 +MTRC0_SA1 +MTRC0_SA2 +MTRC0_SI1623 +MTRC0_SI993 +MTRC0_SX170 +MTRC0_SX183 +MTRC0_SX273 +MTRC0_SX363 +MTRC0_SX93 +MTRR0_SA1 +MTRR0_SA2 +MTRR0_SI1548 +MTRR0_SI2178 +MTRR0_SX108 +MTRR0_SX18 +MTRR0_SX378 +MTRT0_SA1 +MTRT0_SI1857 +MTRT0_SI597 +MTRT0_SX147 +MTRT0_SX237 +MTRT0_SX417 +MTWH1_SA1 +MTWH1_SA2 +MTWH1_SI1512 +MTWH1_SI2142 +MTWH1_SI882 +MTWH1_SX162 +MTWH1_SX252 +MTWH1_SX342 +MTWH1_SX432 +MTXS0_SI1690 +MTXS0_SX250 +MTXS0_SX340 +MTXS0_SX70 +MVJH0_SA1 +MVJH0_SA2 +MVJH0_SI2186 +MVJH0_SX116 +MVJH0_SX26 +MVJH0_SX386 +MVLO0_SA2 +MVLO0_SI1147 +MVLO0_SI1777 +MVLO0_SX157 +MVLO0_SX247 +MVLO0_SX337 +MVLO0_SX427 +MVLO0_SX67 +MVRW0_SA1 +MVRW0_SI1485 +MVRW0_SI2115 +MVRW0_SI855 +MVRW0_SX315 +MVRW0_SX405 +MVRW0_SX45 +MWAC0_SA1 +MWAC0_SI2231 +MWAC0_SI971 +MWAC0_SX71 +MWAD0_SA1 +MWAD0_SA2 +MWAD0_SI1062 +MWAD0_SI1749 +MWAD0_SI2322 +MWAD0_SX162 +MWAD0_SX252 +MWAD0_SX342 +MWAR0_SA2 +MWAR0_SI2305 +MWAR0_SX145 +MWAR0_SX235 +MWAR0_SX325 +MWAR0_SX415 +MWAR0_SX55 +MWCH0_SA1 +MWCH0_SA2 +MWCH0_SI1622 +MWCH0_SX272 +MWCH0_SX362 +MWCH0_SX92 +MWDK0_SX266 +MWDK0_SX356 +MWDK0_SX446 +MWEM0_SA1 +MWEM0_SI1950 +MWEM0_SX240 +MWEM0_SX330 +MWEM0_SX60 +MWGR0_SA1 +MWGR0_SA2 +MWGR0_SI1606 +MWGR0_SI2236 +MWGR0_SI976 +MWGR0_SX166 +MWGR0_SX256 +MWGR0_SX436 +MWGR0_SX76 +MWRE0_SA1 +MWRE0_SI1687 +MWRE0_SI2317 +MWRE0_SX157 +MWRP0_SA2 +MWRP0_SI1525 +MWRP0_SI2073 +MWRP0_SX183 +MWRP0_SX3 +MWRP0_SX93 +MWSB0_SA1 +MWSB0_SA2 +MWSB0_SI1626 +MWSB0_SI2256 +MWSB0_SX186 +MWSB0_SX366 +MWSB0_SX6 +MWSB0_SX96 +MWSH0_SA1 +MWSH0_SA2 +MWSH0_SI2266 +MWSH0_SX346 +MWSH0_SX436 +MZMB0_SA2 +MZMB0_SI1166 +MZMB0_SI1796 +MZMB0_SI536 +MZMB0_SX176 +MZMB0_SX266 +MZMB0_SX356 +MZMB0_SX446 +MZMB0_SX86 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid new file mode 100644 index 0000000..0e0c251 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid @@ -0,0 +1,1000 @@ +FAEM0_SI762 +FAEM0_SX42 +FAJW0_SA1 +FAJW0_SX3 +FAJW0_SX93 +FALK0_SX186 +FALK0_SX6 +FALR0_SI1325 +FBAS0_SA1 +FBAS0_SX217 +FBCG1_SA1 +FBCG1_SX172 +FBCG1_SX442 +FBCH0_SX236 +FBCH0_SX416 +FBLV0_SA1 +FBLV0_SI1058 +FBLV0_SX338 +FBLV0_SX68 +FBMH0_SA1 +FBMJ0_SI815 +FCAG0_SA1 +FCAG0_SX153 +FCAG0_SX243 +FCAJ0_SI1479 +FCAJ0_SX309 +FCDR1_SX106 +FCDR1_SX196 +FCEG0_SA2 +FCJF0_SA1 +FCJF0_SX127 +FCJS0_SI1607 +FCJS0_SI2237 +FCJS0_SX257 +FCKE0_SA2 +FCKE0_SX121 +FCLT0_SI2068 +FCLT0_SX448 +FCLT0_SX88 +FCMG0_SA2 +FCMG0_SI1872 +FCMG0_SX72 +FCMM0_SA1 +FCMM0_SA2 +FCMM0_SX183 +FCRZ0_SI2053 +FCRZ0_SX433 +FCYL0_SA1 +FCYL0_SX37 +FDAS1_SI2091 +FDAS1_SX201 +FDAS1_SX381 +FDAW0_SI1406 +FDFB0_SA1 +FDFB0_SA2 +FDFB0_SI2010 +FDFB0_SX58 +FDJH0_SX305 +FDML0_SA2 +FDML0_SX159 +FDML0_SX249 +FDML0_SX429 +FDMY0_SA2 +FDMY0_SX27 +FDNC0_SX198 +FDNC0_SX288 +FDTD0_SX211 +FDXW0_SA1 +FDXW0_SX251 +FDXW0_SX341 +FDXW0_SX71 +FEAC0_SX165 +FEAC0_SX75 +FEAR0_SI622 +FECD0_SX68 +FEEH0_SA1 +FEEH0_SI1742 +FEEH0_SI471 +FEEH0_SX122 +FEME0_SA1 +FEME0_SX155 +FEME0_SX65 +FETB0_SA1 +FETB0_SI1148 +FETB0_SX158 +FEXM0_SI1101 +FGCS0_SX136 +FGCS0_SX226 +FGCS0_SX316 +FGCS0_SX406 +FGDP0_SA1 +FGMB0_SI1775 +FGMB0_SX245 +FHLM0_SX390 +FHXS0_SA2 +FHXS0_SX445 +FJDM2_SA1 +FJDM2_SX232 +FJDM2_SX52 +FJHK0_SX302 +FJKL0_SX212 +FJKL0_SX392 +FJLG0_SI2306 +FJLR0_SA1 +FJRP1_SI2062 +FJRP1_SX82 +FJSK0_SA1 +FJSP0_SX264 +FJSP0_SX354 +FJSP0_SX444 +FJWB1_SA1 +FJWB1_SX345 +FJWB1_SX435 +FJXM0_SA1 +FJXM0_SI581 +FJXM0_SX401 +FJXP0_SA1 +FJXP0_SI1122 +FJXP0_SX132 +FKAA0_SX128 +FKAA0_SX398 +FKDE0_SA1 +FKDE0_SX151 +FKDE0_SX241 +FKDE0_SX421 +FKDE0_SX61 +FKDW0_SX397 +FKFB0_SA2 +FKFB0_SX348 +FKFB0_SX78 +FKKH0_SA1 +FKKH0_SA2 +FKKH0_SX120 +FKKH0_SX390 +FKLC0_SX355 +FKLC1_SI2308 +FKLC1_SX238 +FKLC1_SX328 +FKLC1_SX418 +FKLH0_SA2 +FKLH0_SX177 +FKSR0_SA1 +FKSR0_SA2 +FKSR0_SI1747 +FKSR0_SI487 +FKSR0_SX217 +FLAC0_SX451 +FLAG0_SA2 +FLAG0_SX114 +FLAG0_SX204 +FLAG0_SX24 +FLAG0_SX384 +FLEH0_SI1681 +FLEH0_SI2311 +FLEH0_SX331 +FLET0_SA1 +FLHD0_SI1827 +FLHD0_SX354 +FLJA0_SA1 +FLJA0_SI2338 +FLJD0_SI886 +FLJD0_SX76 +FLJG0_SA2 +FLKM0_SA2 +FLKM0_SI686 +FLKM0_SX260 +FLKM0_SX80 +FLMA0_SA1 +FLMA0_SI613 +FLMA0_SX433 +FLMA0_SX73 +FLMC0_SX22 +FLMK0_SI1035 +FLMK0_SX315 +FLMK0_SX405 +FLOD0_SI1917 +FLOD0_SX117 +FLOD0_SX171 +FLOD0_SX297 +FLTM0_SA1 +FLTM0_SI1070 +FLTM0_SI2330 +FMAH1_SA2 +FMAH1_SX159 +FMBG0_SA2 +FMBG0_SI2264 +FMEM0_SI747 +FMEM0_SX387 +FMJB0_SI547 +FMJB0_SX97 +FMJF0_SA2 +FMJU0_SX309 +FMJU0_SX399 +FMKC0_SI1702 +FMKC0_SX442 +FMKC0_SX82 +FMKF0_SX186 +FMPG0_SA2 +FNKL0_SI1522 +FNTB0_SI1203 +FNTB0_SI573 +FNTB0_SX303 +FPAB1_SI1471 +FPAB1_SX211 +FPAC0_SA2 +FPAD0_SA2 +FPAD0_SX356 +FPAD0_SX86 +FPAF0_SA2 +FPAF0_SX154 +FPAZ0_SA1 +FPAZ0_SA2 +FPAZ0_SX243 +FPJF0_SA1 +FPJF0_SX146 +FPJF0_SX56 +FPLS0_SI1590 +FPLS0_SX330 +FPMY0_SA1 +FPMY0_SX343 +FREH0_SA1 +FREH0_SA2 +FREH0_SX415 +FRJB0_SX347 +FRLL0_SX434 +FSAG0_SA1 +FSAG0_SX243 +FSAH0_SA1 +FSAH0_SA2 +FSAH0_SX164 +FSAH0_SX434 +FSBK0_SA2 +FSBK0_SI1069 +FSBK0_SX169 +FSCN0_SA2 +FSCN0_SI626 +FSCN0_SX266 +FSCN0_SX446 +FSCN0_SX86 +FSDC0_SA2 +FSDC0_SX142 +FSDC0_SX322 +FSDC0_SX52 +FSDJ0_SI485 +FSDJ0_SX215 +FSDJ0_SX305 +FSDJ0_SX395 +FSGF0_SX117 +FSJG0_SX130 +FSJK1_SA2 +FSJK1_SX125 +FSJK1_SX35 +FSJS0_SX181 +FSJW0_SI1963 +FSJW0_SX433 +FSKC0_SI1416 +FSKC0_SI786 +FSKC0_SX246 +FSKL0_SI1529 +FSKL0_SX449 +FSKP0_SA2 +FSLS0_SX156 +FSLS0_SX426 +FSMA0_SA2 +FSMA0_SX181 +FSMM0_SX144 +FSMM0_SX234 +FSMS1_SX244 +FSMS1_SX347 +FSPM0_SA2 +FSPM0_SX161 +FSPM0_SX71 +FSRH0_SI1931 +FSRH0_SI671 +FSRH0_SX221 +FSRH0_SX401 +FTAJ0_SI699 +FTAJ0_SX159 +FTAJ0_SX249 +FTAJ0_SX429 +FTBR0_SX21 +FTBW0_SA1 +FTMG0_SI1532 +FTMG0_SI2162 +FTMG0_SX452 +FVFB0_SA2 +FVFB0_SX132 +FVFB0_SX42 +FVKB0_SA1 +FVMH0_SA2 +FVMH0_SX116 +FVMH0_SX26 +MABC0_SI1620 +MABC0_SI2041 +MABC0_SI781 +MADC0_SX107 +MADC0_SX377 +MADD0_SA2 +MADD0_SI1295 +MADD0_SX178 +MADD0_SX268 +MADD0_SX88 +MAEB0_SX450 +MAEO0_SA1 +MAFM0_SI939 +MAFM0_SX129 +MAFM0_SX309 +MAJP0_SA2 +MAKB0_SI1646 +MAKB0_SX26 +MAKB0_SX386 +MAKR0_SX362 +MAKR0_SX92 +MAPV0_SX213 +MARC0_SA2 +MARC0_SX108 +MARC0_SX18 +MARC0_SX198 +MARW0_SI1906 +MBAR0_SA1 +MBAR0_SX419 +MBAR0_SX59 +MBBR0_SI2315 +MBBR0_SX65 +MBCG0_SA1 +MBCG0_SI486 +MBEF0_SI1281 +MBEF0_SI1911 +MBEF0_SI651 +MBEF0_SX21 +MBEF0_SX381 +MBGT0_SA2 +MBGT0_SX261 +MBGT0_SX351 +MBGT0_SX441 +MBJV0_SA1 +MBJV0_SI617 +MBJV0_SX347 +MBMA0_SI592 +MBMA0_SX232 +MBMA0_SX52 +MBMA1_SI2214 +MBMA1_SX54 +MBML0_SA2 +MBML0_SI1169 +MBML0_SX89 +MBOM0_SA2 +MBOM0_SI2274 +MBOM0_SX294 +MBSB0_SA1 +MBSB0_SX3 +MBTH0_SA2 +MBTH0_SX122 +MBTH0_SX32 +MCAE0_SX277 +MCAL0_SA2 +MCAL0_SI1768 +MCDC0_SA1 +MCDC0_SX212 +MCDD0_SA2 +MCDD0_SI883 +MCDD0_SX253 +MCDD0_SX433 +MCDR0_SI1154 +MCEF0_SX235 +MCEF0_SX415 +MCEW0_SA2 +MCHL0_SX87 +MCLK0_SX310 +MCLM0_SA1 +MCLM0_SI2086 +MCLM0_SI826 +MCPM0_SA1 +MCPM0_SX114 +MCPM0_SX294 +MCPM0_SX384 +MCSS0_SI750 +MCTH0_SA1 +MCTH0_SX39 +MCXM0_SX91 +MDAC0_SA1 +MDAC0_SX181 +MDAC0_SX361 +MDAS0_SX6 +MDBB1_SX106 +MDBB1_SX16 +MDBB1_SX376 +MDBP0_SX168 +MDCD0_SI1415 +MDCD0_SX245 +MDCD0_SX425 +MDCM0_SX40 +MDCM0_SX400 +MDDC0_SI2049 +MDDC0_SI789 +MDDC0_SX159 +MDDC0_SX69 +MDED0_SA1 +MDED0_SA2 +MDEF0_SX123 +MDEF0_SX303 +MDHL0_SI1439 +MDHL0_SX269 +MDHL0_SX449 +MDHS0_SA1 +MDHS0_SA2 +MDHS0_SI1530 +MDHS0_SI2160 +MDJM0_SX105 +MDJM0_SX15 +MDKS0_SX436 +MDLB0_SA2 +MDLC0_SX405 +MDLC1_SA2 +MDLC1_SI2065 +MDLC1_SI2144 +MDLC1_SX445 +MDLC2_SI2244 +MDLC2_SX354 +MDLH0_SA2 +MDLM0_SI1234 +MDLM0_SI1864 +MDLM0_SX154 +MDLM0_SX424 +MDLR0_SA1 +MDLR0_SA2 +MDLR0_SI1863 +MDLR0_SI603 +MDLR0_SX153 +MDLR1_SA1 +MDLR1_SA2 +MDMA0_SI1430 +MDMA0_SX260 +MDMA0_SX80 +MDMT0_SA1 +MDMT0_SA2 +MDMT0_SI1832 +MDMT0_SX122 +MDMT0_SX32 +MDNS0_SA2 +MDNS0_SI2271 +MDNS0_SX201 +MDNS0_SX21 +MDPB0_SX416 +MDPK0_SI1053 +MDPK0_SX333 +MDPK0_SX423 +MDPS0_SI719 +MDPS0_SX359 +MDRD0_SA1 +MDRD0_SX32 +MDSJ0_SI2092 +MDSS0_SA2 +MDSS0_SX441 +MDSS1_SA1 +MDSS1_SI1327 +MDSS1_SI697 +MDSS1_SX157 +MDSS1_SX67 +MDTB0_SI1200 +MDTB0_SI1830 +MDTB0_SX120 +MDWD0_SA2 +MDWD0_SX270 +MDWD0_SX90 +MDWH0_SX215 +MDWH0_SX305 +MDWM0_SA1 +MDWM0_SA2 +MDWM0_SX16 +MDWM0_SX286 +MEAL0_SA2 +MEAL0_SI2177 +MEAL0_SX107 +MEAL0_SX347 +MEDR0_SA1 +MEDR0_SA2 +MEDR0_SI1374 +MEFG0_SA1 +MEGJ0_SA2 +MEGJ0_SX257 +MEGJ0_SX3 +MEJL0_SA1 +MEJL0_SX152 +MEJL0_SX242 +MEJS0_SI610 +MEJS0_SX160 +MEJS0_SX340 +MESG0_SX432 +MESJ0_SX187 +MESJ0_SX97 +MEWM0_SI718 +MEWM0_SX178 +MEWM0_SX88 +MFER0_SI862 +MFER0_SX142 +MFRM0_SX345 +MFRM0_SX435 +MFWK0_SI1879 +MFWK0_SX169 +MFXS0_SX54 +MFXV0_SA2 +MFXV0_SX105 +MGAF0_SA1 +MGAF0_SX22 +MGAF0_SX382 +MGAG0_SA2 +MGAK0_SX226 +MGAK0_SX46 +MGAR0_SX132 +MGAW0_SI535 +MGAW0_SX175 +MGES0_SA1 +MGES0_SI2111 +MGES0_SI851 +MGJC0_SA2 +MGJC0_SX75 +MGRL0_SI2127 +MGRL0_SI867 +MGRL0_SX147 +MGRP0_SA2 +MGSH0_SA2 +MGSH0_SI1806 +MGSH0_SX127 +MGSH0_SX276 +MGSH0_SX6 +MGSL0_SA1 +MGSL0_SI534 +MGSL0_SX264 +MGXP0_SX187 +MGXP0_SX7 +MHBS0_SX315 +MHBS0_SX45 +MHIT0_SA1 +MHJB0_SA1 +MHJB0_SI1017 +MHMG0_SX195 +MHMR0_SA1 +MHMR0_SI489 +MHRM0_SA1 +MHRM0_SI958 +MHRM0_SX148 +MHRM0_SX58 +MHXL0_SI1772 +MHXL0_SX242 +MILB0_SA2 +MJAC0_SX307 +MJAC0_SX71 +MJAE0_SX174 +MJAI0_SA1 +MJAI0_SA2 +MJBG0_SX62 +MJDA0_SI1031 +MJDA0_SX311 +MJDE0_SI463 +MJDG0_SA2 +MJDG0_SI1042 +MJDG0_SI1705 +MJDM0_SA1 +MJDM0_SI974 +MJEB0_SI656 +MJEB0_SX296 +MJEB1_SA2 +MJEB1_SX207 +MJEB1_SX387 +MJEE0_SA1 +MJEE0_SX247 +MJEE0_SX337 +MJFH0_SA2 +MJFH0_SI1107 +MJFR0_SX75 +MJHI0_SA1 +MJHI0_SX158 +MJJB0_SA1 +MJJB0_SX239 +MJJJ0_SX443 +MJJM0_SA2 +MJJM0_SI827 +MJJM0_SX107 +MJKR0_SA1 +MJKR0_SI571 +MJLB0_SX176 +MJLG1_SX292 +MJLS0_SX106 +MJMA0_SA1 +MJMA0_SA2 +MJMD0_SA2 +MJMD0_SX308 +MJMD0_SX38 +MJMM0_SX85 +MJPG0_SI1191 +MJPG0_SX111 +MJPG0_SX201 +MJPG0_SX21 +MJPM0_SA2 +MJPM0_SX378 +MJPM1_SI2280 +MJPM1_SX401 +MJRA0_SA1 +MJRA0_SA2 +MJRA0_SI1236 +MJRA0_SI1866 +MJRA0_SX426 +MJRG0_SI1366 +MJRG0_SI1996 +MJRG0_SX376 +MJRH0_SX225 +MJRH1_SA1 +MJRH1_SI514 +MJRH1_SX154 +MJRH1_SX244 +MJRH1_SX424 +MJRK0_SA1 +MJRK0_SA2 +MJRK0_SI1662 +MJRK0_SX160 +MJRK0_SX250 +MJRK0_SX430 +MJRP0_SA1 +MJRP0_SA2 +MJRP0_SX225 +MJSR0_SA1 +MJSR0_SI1424 +MJSR0_SX344 +MJWG0_SA1 +MJWG0_SX265 +MJWS0_SI513 +MJWS0_SX153 +MJWS0_SX63 +MJWT0_SA1 +MJWT0_SX121 +MJWT0_SX211 +MJWT0_SX301 +MJWT0_SX31 +MJWT0_SX391 +MJXA0_SX427 +MJXL0_SI542 +MKAG0_SA1 +MKAG0_SX259 +MKAJ0_SA2 +MKAJ0_SX154 +MKAM0_SA1 +MKAM0_SX146 +MKAM0_SX326 +MKAM0_SX56 +MKDB0_SA1 +MKDB0_SA2 +MKDB0_SX152 +MKDD0_SA2 +MKES0_SA1 +MKES0_SI1253 +MKES0_SI1883 +MKES0_SX173 +MKJO0_SI1517 +MKJO0_SI887 +MKJO0_SX437 +MKLN0_SI968 +MKLN0_SX248 +MKLR0_SA2 +MKLR0_SI1689 +MKLS0_SA1 +MKLS0_SX357 +MKLS0_SX87 +MKLS1_SA1 +MKLS1_SA2 +MKLS1_SX375 +MKLW0_SA1 +MKRG0_SX411 +MKXL0_SA2 +MKXL0_SX15 +MKXL0_SX375 +MLBC0_SA1 +MLBC0_SI1869 +MLBC0_SX249 +MLEL0_SA1 +MLEL0_SA2 +MLEL0_SI1246 +MLEL0_SX256 +MLEL0_SX436 +MLJC0_SX145 +MLJC0_SX415 +MLJH0_SX64 +MLNS0_SI2037 +MMAA0_SA1 +MMAA0_SA2 +MMAA0_SX35 +MMAB1_SI1494 +MMAB1_SX234 +MMAG0_SA2 +MMAG0_SI1126 +MMAG0_SX316 +MMAM0_SI2227 +MMAM0_SX157 +MMAM0_SX427 +MMAR0_SX256 +MMBS0_SI1781 +MMCC0_SA2 +MMDB0_SX177 +MMDG0_SA1 +MMDG0_SA2 +MMDG0_SI520 +MMDG0_SX160 +MMDG0_SX250 +MMDM0_SI1941 +MMDM0_SI681 +MMDM0_SX141 +MMDM1_SA2 +MMDM1_SI2043 +MMDM1_SX423 +MMDM1_SX63 +MMDS0_SA1 +MMEA0_SA1 +MMEA0_SX128 +MMEA0_SX398 +MMEB0_SA2 +MMEB0_SX187 +MMEB0_SX367 +MMGC0_SA2 +MMGC0_SX135 +MMGC0_SX225 +MMGG0_SX269 +MMGK0_SX332 +MMGK0_SX62 +MMJB1_SA2 +MMRP0_SA2 +MMRP0_SX144 +MMSM0_SX116 +MMSM0_SX206 +MMVP0_SA1 +MMVP0_SA2 +MMWB0_SI989 +MMWB0_SX89 +MMWS0_SA2 +MMWS0_SX168 +MMWS0_SX348 +MMWS0_SX438 +MMWS1_SI1701 +MMXS0_SI2136 +MMXS0_SX246 +MMXS0_SX426 +MNET0_SI816 +MNET0_SX6 +MNTW0_SA2 +MNTW0_SX168 +MNTW0_SX78 +MPAR0_SI2206 +MPAR0_SI946 +MPAR0_SX136 +MPAR0_SX316 +MPEB0_SI1034 +MPEB0_SI1860 +MPEB0_SX240 +MPEB0_SX330 +MPFU0_SI628 +MPFU0_SX448 +MPGH0_SX114 +MPGH0_SX24 +MPGR0_SX240 +MPGR0_SX330 +MPGR1_SX149 +MPPC0_SA1 +MPRD0_SA1 +MPRD0_SX261 +MPRD0_SX351 +MPRD0_SX441 +MPRD0_SX81 +MPRK0_SI1727 +MPRK0_SX107 +MPRK0_SX377 +MPRT0_SA1 +MPRT0_SX310 +MPSW0_SI1067 +MPSW0_SX167 +MPSW0_SX437 +MRAB1_SX128 +MRAB1_SX308 +MRAI0_SA1 +MRAI0_SA2 +MRAI0_SX72 +MRAM0_SA1 +MRAM0_SA2 +MRAM0_SX15 +MRBC0_SI1859 +MRBC0_SX329 +MRBC0_SX419 +MRCG0_SI798 +MRCG0_SX168 +MRCW0_SA1 +MRCW0_SX291 +MRDD0_SI1680 +MRDD0_SX150 +MRDD0_SX277 +MRDD0_SX60 +MRDM0_SI1595 +MRDM0_SX65 +MRDS0_SA1 +MREE0_SX24 +MREH1_SX249 +MREH1_SX69 +MREM0_SA2 +MREW1_SI870 +MRFK0_SX446 +MRFL0_SA1 +MRFL0_SX256 +MRFL0_SX436 +MRFL0_SX76 +MRGM0_SA2 +MRGM0_SX262 +MRGS0_SA2 +MRGS0_SX186 +MRHL0_SI885 +MRHL0_SX345 +MRHL0_SX435 +MRJB1_SA1 +MRJB1_SA2 +MRJB1_SX210 +MRJB1_SX30 +MRJB1_SX390 +MRJH0_SA2 +MRJH0_SX307 +MRJH0_SX79 +MRJM0_SX148 +MRJM1_SA2 +MRJM1_SI1298 +MRJM1_SI1928 +MRJM1_SX128 +MRJT0_SA2 +MRJT0_SI1498 +MRJT0_SX328 +MRJT0_SX418 +MRKM0_SA2 +MRKM0_SX367 +MRLD0_SA2 +MRLD0_SI2224 +MRLD0_SX154 +MRLD0_SX424 +MRLJ0_SA1 +MRLJ0_SX250 +MRLJ0_SX340 +MRLJ1_SA1 +MRLJ1_SA2 +MRLJ1_SX321 +MRLK0_SI843 +MRLK0_SX123 +MRLK0_SX213 +MRMB0_SA2 +MRMB0_SI1581 +MRMB0_SX411 +MRMG0_SA1 +MRMG0_SI1080 +MRMG0_SX450 +MRMH0_SI1349 +MRMH0_SI2281 +MRMH0_SX121 +MRML0_SA2 +MRML0_SX341 +MRPC1_SI2112 +MRRE0_SA2 +MRRE0_SX164 +MRRE0_SX344 +MRRE0_SX74 +MRSO0_SX129 +MRSO0_SX39 +MRSP0_SX259 +MRTC0_SX378 +MRVG0_SI1140 +MRVG0_SX240 +MRWA0_SI973 +MRWA0_SX163 +MRWA0_SX73 +MRWS0_SI1732 +MRWS0_SI472 +MRWS0_SX22 +MRWS0_SX382 +MRXB0_SA2 +MRXB0_SX415 +MSAH1_SI1679 +MSAS0_SX116 +MSAS0_SX206 +MSAS0_SX386 +MSAT0_SA1 +MSAT1_SX263 +MSAT1_SX443 +MSAT1_SX83 +MSDB0_SX197 +MSDB0_SX287 +MSDB0_SX377 +MSDH0_SI2240 +MSDH0_SX440 +MSDH0_SX80 +MSDS0_SA1 +MSEM1_SI1440 +MSEM1_SX180 +MSEM1_SX270 +MSES0_SI1589 +MSES0_SX239 +MSES0_SX419 +MSFH0_SX316 +MSFV0_SI1892 +MSFV0_SX362 +MSFV0_SX92 +MSMR0_SX415 +MSMS0_SA1 +MSMS0_SX173 +MSMS0_SX83 +MSRG0_SA1 +MSRG0_SI1221 +MSTF0_SI766 +MSTF0_SX316 +MSTF0_SX46 +MSVS0_SA2 +MSVS0_SX308 +MTAS0_SX215 +MTAS0_SX35 +MTAS0_SX395 +MTAT0_SX390 +MTAT1_SX59 +MTBC0_SI1803 +MTCS0_SA2 +MTCS0_SI2265 +MTCS0_SX82 +MTDP0_SA2 +MTER0_SA2 +MTER0_SI1787 +MTJG0_SA1 +MTJG0_SI2157 +MTJG0_SX260 +MTJM0_SI1856 +MTJM0_SX146 +MTJU0_SX130 +MTJU0_SX400 +MTKD0_SX107 +MTKD0_SX287 +MTKP0_SI1023 +MTLB0_SA1 +MTLB0_SX234 +MTLC0_SA1 +MTML0_SI2325 +MTML0_SX165 +MTMN0_SA2 +MTMN0_SI1064 +MTMN0_SI2324 +MTMN0_SX434 +MTMT0_SA2 +MTMT0_SI1748 +MTPF0_SX65 +MTPG0_SI1383 +MTPG0_SI753 +MTPG0_SX303 +MTPP0_SX338 +MTPR0_SX340 +MTQC0_SI480 +MTQC0_SX91 +MTRR0_SX198 +MTRR0_SX288 +MTRT0_SA2 +MTRT0_SX254 +MTRT0_SX57 +MTWH1_SX72 +MTXS0_SA1 +MTXS0_SA2 +MVJH0_SI926 +MVJH0_SX206 +MVJH0_SX296 +MVLO0_SA1 +MVRW0_SA2 +MVRW0_SX135 +MVRW0_SX225 +MWAC0_SA2 +MWAC0_SX341 +MWAC0_SX431 +MWAD0_SX432 +MWAD0_SX72 +MWAR0_SA1 +MWAR0_SI1675 +MWCH0_SI1895 +MWCH0_SI2252 +MWCH0_SX182 +MWCH0_SX452 +MWDK0_SA1 +MWDK0_SA2 +MWDK0_SI2017 +MWDK0_SI806 +MWDK0_SX176 +MWDK0_SX86 +MWEM0_SA2 +MWEM0_SI1320 +MWEM0_SI1393 +MWEM0_SX150 +MWGR0_SX346 +MWRE0_SX247 +MWRE0_SX337 +MWRE0_SX427 +MWRP0_SA1 +MWRP0_SX273 +MWRP0_SX363 +MWSB0_SX276 +MWSH0_SX256 +MWSH0_SX76 +MZMB0_SA1 diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid new file mode 100644 index 0000000..e99edfe --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid @@ -0,0 +1,620 @@ +FAEM0_SI1392 +FAJW0_SI1263 +FAJW0_SI633 +FALK0_SI658 +FALR0_SX335 +FAPB0_SI1063 +FAPB0_SI2323 +FAPB0_SX433 +FBAS0_SI1472 +FBAS0_SI2066 +FBCG1_SX352 +FBCH0_SI959 +FBJL0_SI922 +FBLV0_SI1688 +FBMH0_SI1136 +FBMH0_SI970 +FBMJ0_SA1 +FBMJ0_SI1776 +FBMJ0_SI516 +FBMJ0_SX336 +FCDR1_SI1186 +FCDR1_SI1816 +FCDR1_SI556 +FCDR1_SX286 +FCKE0_SI1741 +FCKE0_SI481 +FCLT0_SI808 +FCMG0_SI1142 +FCMG0_SX432 +FCMM0_SI1957 +FCMM0_SX420 +FCYL0_SI667 +FCYL0_SX349 +FDAS1_SI1461 +FDAS1_SI831 +FDAW0_SI1271 +FDAW0_SI2036 +FDJH0_SI935 +FDKN0_SI1202 +FDKN0_SX181 +FDKN0_SX451 +FDMY0_SA1 +FDMY0_SI567 +FDMY0_SI714 +FDMY0_SX387 +FDNC0_SI1278 +FDNC0_SI1908 +FDTD0_SA1 +FDTD0_SX321 +FEAC0_SI615 +FEAR0_SX352 +FECD0_SA1 +FECD0_SI1418 +FECD0_SI788 +FEME0_SI875 +FEME0_SX335 +FEXM0_SA1 +FEXM0_SI482 +FEXM0_SX366 +FGDP0_SI988 +FGDP0_SX88 +FGMB0_SI1145 +FGMB0_SX335 +FGRW0_SA1 +FGRW0_SI1152 +FGRW0_SX162 +FGRW0_SX432 +FHLM0_SX120 +FHLM0_SX349 +FHXS0_SA1 +FHXS0_SI1075 +FHXS0_SI2302 +FHXS0_SX175 +FJDM2_SA2 +FJDM2_SX142 +FJEN0_SA1 +FJEN0_SX327 +FJEN0_SX417 +FJHK0_SI2282 +FJKL0_SI932 +FJLG0_SI1889 +FJLR0_SI1231 +FJRB0_SX402 +FJRP1_SA1 +FJRP1_SI1432 +FJRP1_SX262 +FJRP1_SX352 +FJSK0_SI1052 +FJSP0_SI1434 +FJWB1_SI748 +FJXM0_SX311 +FJXM0_SX41 +FJXP0_SI1752 +FKAA0_SA1 +FKDE0_SI1141 +FKDE0_SI1771 +FKDW0_SI1207 +FKDW0_SI1891 +FKFB0_SI1608 +FKFB0_SX438 +FKKH0_SI1290 +FKKH0_SI1920 +FKLC0_SI985 +FKLC0_SX175 +FKLC1_SI1048 +FKLH0_SI1257 +FKSR0_SX366 +FLAC0_SI1339 +FLAG0_SI1464 +FLAG0_SI834 +FLEH0_SI1051 +FLET0_SI507 +FLJA0_SI1078 +FLJA0_SX178 +FLJD0_SI1516 +FLJG0_SI981 +FLJG0_SX171 +FLJG0_SX351 +FLKM0_SA1 +FLKM0_SI620 +FLKM0_SX350 +FLKM0_SX440 +FLMC0_SI1372 +FLMK0_SA1 +FLMK0_SI1229 +FLTM0_SX170 +FLTM0_SX350 +FLTM0_SX440 +FMAH1_SI879 +FMBG0_SI1160 +FMEM0_SA1 +FMEM0_SX333 +FMJB0_SI1177 +FMJF0_SI624 +FMJF0_SX174 +FMJF0_SX84 +FMJU0_SI1389 +FMKC0_SI1041 +FMKF0_SI1018 +FMPG0_SA1 +FMPG0_SI972 +FMPG0_SX162 +FMPG0_SX342 +FMPG0_SX432 +FNKL0_SI892 +FNTB0_SI679 +FPAB1_SA1 +FPAB1_SI2101 +FPAB1_SI841 +FPAC0_SI1921 +FPAC0_SI661 +FPAD0_SI716 +FPAD0_SX176 +FPAF0_SA1 +FPAF0_SI1054 +FPAZ0_SI2223 +FPAZ0_SI963 +FPJF0_SI1259 +FPJF0_SX352 +FPLS0_SI960 +FPMY0_SI1153 +FPMY0_SI523 +FREH0_SI1945 +FRLL0_SI805 +FSAG0_SI1323 +FSAG0_SX153 +FSAG0_SX333 +FSAG0_SX423 +FSAH0_SI614 +FSAH0_SX327 +FSAK0_SI1300 +FSBK0_SX349 +FSCN0_SA1 +FSCN0_SI705 +FSCN0_SX176 +FSDC0_SI1312 +FSDJ0_SI1115 +FSGF0_SI2187 +FSGF0_SI927 +FSJG0_SA1 +FSJG0_SA2 +FSJG0_SI940 +FSJG0_SX220 +FSJG0_SX40 +FSJG0_SX400 +FSJS0_SA1 +FSJS0_SX451 +FSJW0_SI1333 +FSKP0_SI1098 +FSMA0_SI991 +FSMA0_SX451 +FSMM0_SX324 +FSPM0_SI1241 +FSPM0_SX251 +FSRH0_SX311 +FSSB0_SI1712 +FSSB0_SX362 +FTBR0_SI1402 +FTBR0_SI921 +FTBW0_SI715 +FTBW0_SX175 +FTLG0_SI1743 +FTLG0_SI483 +FTMG0_SI902 +FVFB0_SI1510 +FVKB0_SX349 +FVMH0_SI1466 +FVMH0_SI836 +MADC0_SI1367 +MADC0_SI737 +MAEB0_SI1411 +MAEO0_SI1326 +MAJP0_SI1704 +MAJP0_SX174 +MAKB0_SA2 +MAKB0_SI1016 +MAKB0_SI2276 +MAKB0_SX116 +MAPV0_SI1293 +MAPV0_SI663 +MARW0_SX286 +MARW0_SX349 +MBBR0_SI1055 +MBBR0_SX335 +MBCG0_SI957 +MBCG0_SX327 +MBGT0_SI1841 +MBGT0_SX171 +MBMA0_SI1222 +MBMA1_SI954 +MBMA1_SX324 +MBTH0_SI2102 +MBWP0_SX349 +MCAE0_SI1447 +MCAE0_SI2077 +MCAE0_SI817 +MCAL0_SI1138 +MCDR0_SI1784 +MCDR0_SI524 +MCEF0_SI842 +MCEW0_SA1 +MCEW0_SI2072 +MCEW0_SI812 +MCEW0_SX362 +MCEW0_SX452 +MCHL0_SI1347 +MCHL0_SI1404 +MCLK0_SI2290 +MCLK0_SI650 +MCPM0_SI1824 +MCSS0_SI1380 +MCSS0_SI688 +MCTM0_SI1350 +MCTM0_SI1980 +MDAC0_SI631 +MDAS0_SI1896 +MDAS0_SI636 +MDBP0_SI528 +MDBP0_SX438 +MDCD0_SI785 +MDCD0_SX335 +MDCM0_SI1480 +MDDC0_SI1419 +MDED0_SI540 +MDEF0_SI1123 +MDEM0_SA1 +MDEM0_SI608 +MDEM0_SI800 +MDEM0_SX428 +MDHS0_SI900 +MDJM0_SI1455 +MDKS0_SX166 +MDKS0_SX346 +MDLB0_SI1306 +MDLB0_SX136 +MDLB0_SX406 +MDLC0_SI1395 +MDLC0_SI2025 +MDLC1_SI1435 +MDLH0_SX160 +MDLH0_SX430 +MDLM0_SI604 +MDLR0_SX333 +MDLR1_SI669 +MDMA0_SX170 +MDMA0_SX350 +MDMA0_SX440 +MDNS0_SI1011 +MDNS0_SI873 +MDPB0_SI1760 +MDPB0_SI866 +MDRD0_SI752 +MDSJ0_SI1462 +MDSJ0_SX438 +MDWD0_SI1260 +MDWH0_SA1 +MDWH0_SI1168 +MDWH0_SI665 +MDWM0_SI916 +MEDR0_SI2004 +MEFG0_SI491 +MEFG0_SI598 +MEGJ0_SA1 +MEGJ0_SI1337 +MEGJ0_SI707 +MEGJ0_SX167 +MEJS0_SI1240 +MESG0_SI702 +MESJ0_SI2039 +MFWK0_SX349 +MFXS0_SX324 +MFXV0_SI1005 +MFXV0_SI1342 +MGAF0_SI1282 +MGAG0_SI691 +MGAK0_SI1036 +MGAK0_SX136 +MGAR0_SX312 +MGAW0_SI1165 +MGES0_SX311 +MGJC0_SX435 +MGRL0_SX327 +MGRP0_SI1317 +MGRP0_SX327 +MGSH0_SI1176 +MGSH0_SI546 +MGSL0_SI797 +MGXP0_SI1087 +MGXP0_SI525 +MHBS0_SI945 +MHIT0_SI983 +MHMG0_SI735 +MHMR0_SI1692 +MILB0_SI903 +MJAC0_SI701 +MJAC0_SX251 +MJAE0_SX84 +MJAI0_SI682 +MJAI0_SI710 +MJDC0_SI531 +MJDE0_SA1 +MJDE0_SI1120 +MJDE0_SI490 +MJDE0_SX220 +MJDM0_SI1340 +MJDM0_SX170 +MJDM0_SX350 +MJEB0_SX170 +MJEB1_SI1467 +MJEB1_SI837 +MJFR0_SA1 +MJFR0_SX435 +MJHI0_SI1328 +MJJJ0_SI1163 +MJJM0_SI1251 +MJLB0_SI1616 +MJLS0_SI1726 +MJMA0_SI2125 +MJMD0_SI2288 +MJMM0_SI1255 +MJMM0_SX175 +MJPG0_SI1821 +MJPM0_SI1368 +MJPM1_SX311 +MJRA0_SX336 +MJRG0_SI736 +MJRG0_SX352 +MJRH0_SI1840 +MJRH1_SI1558 +MJRK0_SI880 +MJRP0_SI1845 +MJSR0_SI2054 +MJSR0_SI794 +MJWG0_SI813 +MJWG0_SI895 +MJWG0_SX175 +MJWS0_SX333 +MJWT0_SI1291 +MJWT0_SI1381 +MJXL0_SI1172 +MKAG0_SI979 +MKAH0_SX178 +MKAM0_SI1250 +MKAM0_SI1465 +MKDD0_SI1567 +MKDD0_SI2197 +MKDD0_SI937 +MKDT0_SI814 +MKES0_SI623 +MKLS0_SI1437 +MKLS0_SI2067 +MKLS1_SI915 +MKLW0_SI1571 +MKLW0_SX311 +MKRG0_SI861 +MKXL0_SI1815 +MKXL0_SI1958 +MLBC0_SI1239 +MLEL0_SI616 +MLEL0_SX166 +MLJC0_SI1225 +MLJH0_SA1 +MLJH0_SA2 +MLJH0_SI1422 +MLJH0_SI694 +MLJH0_SX244 +MLSH0_SI1417 +MLSH0_SX247 +MMAA0_SI1588 +MMAA0_SI845 +MMAB1_SI864 +MMAB1_SX324 +MMAG0_SA1 +MMAG0_SI1756 +MMAG0_SX136 +MMAR0_SI1966 +MMAR0_SX166 +MMAR0_SX346 +MMBS0_SI521 +MMBS0_SX161 +MMCC0_SI1338 +MMDB0_SI987 +MMDG0_SI1780 +MMDM0_SI1311 +MMDM1_SX153 +MMDM1_SX333 +MMEB0_SX327 +MMGC0_SI1305 +MMGG0_SI1079 +MMGG0_SX449 +MMLM0_SI2150 +MMPM0_SX161 +MMRP0_SX324 +MMSM0_SI1106 +MMSM0_SI476 +MMVP0_SI654 +MMVP0_SX347 +MMWB0_SA1 +MMWB0_SI2249 +MMWB0_SX359 +MMWB0_SX449 +MNTW0_SI1068 +MNTW0_SI1698 +MPEB0_SI600 +MPFU0_SI1258 +MPGH0_SI675 +MPGR0_SI1410 +MPGR1_SI1499 +MPMB0_SA1 +MPMB0_SA2 +MPMB0_SI1501 +MPMB0_SI2131 +MPMB0_SI871 +MPMB0_SX151 +MPMB0_SX331 +MPMB0_SX421 +MPMB0_SX61 +MPPC0_SI1412 +MPRB0_SI1215 +MPRB0_SI575 +MPRD0_SI801 +MPRD0_SX171 +MPRK0_SA1 +MPRK0_SI1097 +MPRK0_SI467 +MPRK0_SX287 +MRAB0_SI1854 +MRAB1_SI848 +MRAI0_SI2052 +MRAI0_SI792 +MRAI0_SX432 +MRAM0_SI1951 +MRCG0_SA2 +MRCG0_SI1428 +MRCG0_SX348 +MRCG0_SX438 +MRCW0_SI741 +MRDM0_SI1044 +MRDM0_SX335 +MREE0_SI1104 +MREE0_SI1959 +MREH1_SA1 +MREH1_SI1599 +MREH1_SI969 +MREM0_SI511 +MRFK0_SI1076 +MRFL0_SI1156 +MRFL0_SI526 +MRFL0_SX166 +MRGM0_SI532 +MRGM0_SX172 +MRGM0_SX442 +MRGS0_SI1356 +MRGS0_SI726 +MRGS0_SX6 +MRJB1_SI1413 +MRJB1_SI2021 +MRJB1_SX120 +MRJH0_SI1519 +MRJH0_SI889 +MRJH0_SX169 +MRJT0_SI868 +MRJT0_SX58 +MRKM0_SI1267 +MRKM0_SI1391 +MRKM0_SI637 +MRLJ0_SI790 +MRLJ1_SI2301 +MRLK0_SI1468 +MRLR0_SI1196 +MRML0_SA1 +MRML0_SI1421 +MRML0_SX161 +MRML0_SX251 +MRMS0_SI2057 +MRRE0_SA1 +MRRE0_SI1334 +MRRE0_SI952 +MRSO0_SI1206 +MRSP0_SI1429 +MRTC0_SI1458 +MRTJ0_SA1 +MRTJ0_SI772 +MRTJ0_SX142 +MRTJ0_SX232 +MRTJ0_SX52 +MRWS0_SI1102 +MRXB0_SI2215 +MRXB0_SI955 +MSAS0_SI1376 +MSAS0_SI746 +MSDH0_SI980 +MSDH0_SX170 +MSDS0_SI1077 +MSDS0_SX267 +MSDS0_SX357 +MSEM1_SI2070 +MSEM1_SI810 +MSFH0_SA1 +MSFH0_SI1738 +MSFH0_SX136 +MSFH0_SX406 +MSFV0_SI632 +MSJK0_SI1596 +MSJK0_SX336 +MSMC0_SI509 +MSMR0_SI1150 +MSMS0_SI1433 +MSRR0_SI1761 +MSRR0_SI501 +MSTF0_SI852 +MSVS0_SI2198 +MSVS0_SI938 +MSVS0_SX398 +MTAB0_SI1572 +MTAB0_SX312 +MTAT0_SA1 +MTAT0_SI1110 +MTAT0_SI811 +MTAT1_SI779 +MTAT1_SX149 +MTAT1_SX329 +MTBC0_SI543 +MTCS0_SI712 +MTDB0_SI1401 +MTDB0_SI771 +MTDP0_SA1 +MTDP0_SI1521 +MTDP0_SX171 +MTDP0_SX351 +MTER0_SA1 +MTER0_SI1157 +MTER0_SX437 +MTJG0_SX170 +MTJS0_SA2 +MTJS0_SI1822 +MTJS0_SI562 +MTJS0_SX382 +MTJU0_SI2020 +MTKD0_SI630 +MTKP0_SI2283 +MTKP0_SI454 +MTLB0_SI1134 +MTLB0_SX324 +MTLC0_SI1313 +MTLC0_SI1477 +MTML0_SX435 +MTMN0_SI582 +MTMT0_SI488 +MTPP0_SI1508 +MTPR0_SI2230 +MTPR0_SX160 +MTPR0_SX430 +MTQC0_SA1 +MTQC0_SI1441 +MTQC0_SX181 +MTQC0_SX451 +MTRC0_SI589 +MTRR0_SI918 +MTRT0_SI1227 +MTXS0_SI1060 +MTXS0_SI2320 +MTXS0_SX160 +MTXS0_SX430 +MVJH0_SI1556 +MVLO0_SI517 +MWAC0_SI1601 +MWAC0_SX161 +MWAC0_SX251 +MWAR0_SI1045 +MWDK0_SI1436 +MWEM0_SX420 +MWRE0_SA2 +MWRE0_SI1057 +MWRE0_SX67 +MWRP0_SI1443 +MWSB0_SI996 +MWSH0_SI1426 +MWSH0_SI796 +MWSH0_SX166 diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md new file mode 100644 index 0000000..314984f --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md @@ -0,0 +1,56 @@ +# Self-Training with Kaldi HMM Models +This folder contains recipes for self-training on pseudo phone transcripts and +decoding into phones or words with [kaldi](https://github.com/kaldi-asr/kaldi). + +To start, download and install kaldi follow its instruction, and place this +folder in `path/to/kaldi/egs`. + +## Training +Assuming the following has been prepared: +- `w2v_dir`: contains features `{train,valid}.{npy,lengths}`, real transcripts `{train,valid}.${label}`, and dict `dict.${label}.txt` +- `lab_dir`: contains pseudo labels `{train,valid}.txt` +- `arpa_lm`: Arpa-format n-gram phone LM for decoding +- `arpa_lm_bin`: Arpa-format n-gram phone LM for unsupervised model selection to be used with KenLM + +Set these variables in `train.sh`, as well as `out_dir`, the output directory, +and then run it. + +The output will be: +``` +==== WER w.r.t. real transcript (select based on unsupervised metric) +INFO:root:./out/exp/mono/decode_valid/scoring/14.0.0.tra.txt: score 0.9178 wer 28.71% lm_ppl 24.4500 gt_wer 25.57% +INFO:root:./out/exp/tri1/decode_valid/scoring/17.1.0.tra.txt: score 0.9257 wer 26.99% lm_ppl 30.8494 gt_wer 21.90% +INFO:root:./out/exp/tri2b/decode_valid/scoring/8.0.0.tra.txt: score 0.7506 wer 23.15% lm_ppl 25.5944 gt_wer 15.78% +``` +where `wer` is the word eror rate with respect to the pseudo label, `gt_wer` to +the ground truth label, `lm_ppl` the language model perplexity of HMM prediced +transcripts, and `score` is the unsupervised metric for model selection. We +choose the model and the LM parameter of the one with the lowest score. In the +example above, it is `tri2b`, `8.0.0`. + + +## Decoding into Phones +In `decode_phone.sh`, set `out_dir` the same as used in `train.sh`, set +`dec_exp` and `dec_lmparam` to the selected model and LM parameter (e.g. +`tri2b` and `8.0.0` in the above example). `dec_script` needs to be set +according to `dec_exp`: for mono/tri1/tri2b, use `decode.sh`; for tri3b, use +`decode_fmllr.sh`. + +The output will be saved at `out_dir/dec_data` + + +## Decoding into Words +`decode_word_step1.sh` prepares WFSTs for word decoding. Besides the variables +mentioned above, set +- `wrd_arpa_lm`: Arpa-format n-gram word LM for decoding +- `wrd_arpa_lm_bin`: Arpa-format n-gram word LM for unsupervised model selection + +`decode_word_step1.sh` decodes the `train` and `valid` split into word and runs +unsupervised model selection using the `valid` split. The output is like: +``` +INFO:root:./out/exp/tri2b/decodeword_valid/scoring/17.0.0.tra.txt: score 1.8693 wer 24.97% lm_ppl 1785.5333 gt_wer 31.45% +``` + +After determining the LM parameter (`17.0.0` in the example above), set it in +`decode_word_step2.sh` and run it. The output will be saved at +`out_dir/dec_data_word`. diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh new file mode 100644 index 0000000..e749531 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="run.pl --mem 2G" +export decode_cmd="run.pl --mem 4G" +export mkgraph_cmd="run.pl --mem 8G" diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh new file mode 100644 index 0000000..947342a --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# decode into phones (and prepare a new data directory for HMM outputs) + +. ./path.sh + +set -eu + +out_dir= # same as in train.sh +dec_lmparam= # LM hyperparameters (e.g., 7.0.0) +dec_exp= +dec_script= +dec_splits="train valid" +dec_data_dir=$out_dir/dec_data # where to write HMM output + +data_dir=${out_dir}/data + +local/decode.sh --nj 40 --graph_name graph \ + --val_sets "$dec_splits" --decode_script $dec_script \ + $out_dir/exp/$dec_exp $data_dir $data_dir/lang_test + +if [ ! -z $dec_lmparam ]; then + for x in $dec_splits; do + mkdir -p $dec_data_dir/$x + cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $dec_data_dir/$x/ + + tra=$out_dir/exp/$dec_exp/decode_${x}/scoring/${dec_lmparam}.tra + cat $tra | utils/int2sym.pl -f 2- $data_dir/lang/words.txt | \ + sed 's:<UNK>::g' | sed 's:<SIL>::g' > $dec_data_dir/${x}/text + utils/fix_data_dir.sh $dec_data_dir/${x} + echo "WER on ${x} is" $(compute-wer ark:$data_dir/${x}_gt/text ark:$dec_data_dir/$x/text | cut -d" " -f2-) + done +fi diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh new file mode 100644 index 0000000..c1276bb --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# prepare word WFSTs, reference data, and decode + +set -eu + +w2v_dir= # same as in train.sh +out_dir= # same as in train.sh +lexicon= # word to phone mapping +wrd_arpa_lm= # word LM +wrd_arpa_lm_bin= # word LM for KenLM, used in unsupervised selection + +dec_exp= # what HMM stage to decode (e.g., tri3b) +dec_script= # what decoding script to use (e.g., steps/decode_fmllr.sh) +phn_label=phnc +wrd_label=wrd +dec_suffix=word +dec_splits="train valid" +valid_split="valid" + +data_dir=$out_dir/data +wrd_data_dir=$out_dir/data_word + +lexicon_clean=$(mktemp) +cat $lexicon | sort | uniq > $lexicon_clean +local/prepare_lang_word.sh $w2v_dir/dict.${phn_label}.txt $data_dir $lexicon_clean && rm $lexicon_clean +local/prepare_lm.sh --langdir $data_dir/lang_word --lmdir $data_dir/lang_test_word $wrd_arpa_lm $data_dir + +for x in $dec_splits; do + x_gt=${x}_gt + mkdir -p $wrd_data_dir/$x_gt + cp $data_dir/$x_gt/{feats.scp,cmvn.scp,utt2spk,spk2utt} $wrd_data_dir/$x_gt/ + python local/copy_aligned_text.py < $w2v_dir/$x.$wrd_label > $wrd_data_dir/$x_gt/text +done + +local/decode.sh --nj 40 --graph_name graph${dec_suffix} --decode_suffix $dec_suffix \ + --val_sets "$dec_splits" --decode_script $dec_script \ + $out_dir/exp/$dec_exp $data_dir $data_dir/lang_test_word + +local/unsup_select_decode_word.sh \ + --split $valid_split --kenlm_path $wrd_arpa_lm_bin \ + --ref_txt $wrd_data_dir/${valid_split}_gt/text \ + --psd_txt $data_dir/${valid_split}/text \ + --dec_name decode${dec_suffix} --graph_name graph${dec_suffix} \ + --phonemize_lexicon $data_dir/local/dict_word/lexicon.txt \ + $out_dir/exp diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh new file mode 100644 index 0000000..59a6cbb --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# prepare a new data directory of HMM word output + +. ./path.sh + +set -eu + +out_dir= # same as in train.sh +dec_lmparam= # LM hyperparameters (e.g., 7.0.0) + +dec_exp=tri3b # what HMM stage to decode (e.g., tri3b) +dec_suffix=word +dec_splits="train valid" +dec_data_dir=$out_dir/dec_data_word # where to write HMM output + +data_dir=$out_dir/data +wrd_data_dir=$out_dir/data_word + +for x in $dec_splits; do + mkdir -p $dec_data_dir/$x + cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $dec_data_dir/$x/ + + tra=$out_dir/exp/$dec_exp/decode${dec_suffix}_${x}/scoring/${dec_lmparam}.tra + cat $tra | utils/int2sym.pl -f 2- $data_dir/lang_word/words.txt | \ + sed 's:<UNK>::g' | sed 's:<SIL>::g' > $dec_data_dir/$x/text + utils/fix_data_dir.sh $dec_data_dir/$x + echo "WER on $x is" $(compute-wer ark:$wrd_data_dir/${x}_gt/text ark:$dec_data_dir/$x/text | cut -d" " -f2-) +done + diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py new file mode 100644 index 0000000..5f4faa9 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py @@ -0,0 +1,4 @@ +import sys + +for idx, line in enumerate(sys.stdin): + print(f"utt{idx:010d} {line}", end='') \ No newline at end of file diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh new file mode 100644 index 0000000..811cb63 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -u + +val_sets="dev_other" +graph_name=graph +decode_suffix="" +decode_script="steps/decode_fmllr.sh" +decode_args="" +nj=60 + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +set -x +exp_dir=$1 +data_root=$2 +lang_test=$3 + +graph=$exp_dir/$graph_name + +if [ ! -d $graph ]; then + utils/mkgraph.sh $lang_test $exp_dir $graph +fi + +for part in $val_sets; do + dec_dir=$exp_dir/decode${decode_suffix}_${part} + if [ ! -d $dec_dir ]; then + echo "decoding $part for $exp_dir" + $decode_script --nj $nj --cmd "$decode_cmd" $decode_args \ + $graph $data_root/$part $dec_dir & + else + echo "$dec_dir exists. skip" + fi +done + +wait diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py new file mode 100644 index 0000000..66954ea --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py @@ -0,0 +1,56 @@ +import kaldi_io +import numpy as np +import os + + +def get_parser(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("w2v_dir", help="wav2vec feature and text directory") + parser.add_argument("tar_root", help="output data directory in kaldi's format") + parser.add_argument("split", help="name of the subset") + parser.add_argument("--label", default="", help="if specified, copy labels too") + return parser + +def main(): + parser = get_parser() + args = parser.parse_args() + + tar_dir = os.path.join(args.tar_root, args.split) + os.makedirs(tar_dir, exist_ok=True) + + lengths_path = os.path.join(args.w2v_dir, f"{args.split}.lengths") + with open(lengths_path) as f: + lengths = [int(line.rstrip()) for line in f] + offsets = [0] + np.cumsum(lengths[:-1]).tolist() + feats = np.load( + os.path.join(args.w2v_dir, f"{args.split}.npy"), + mmap_mode="r" + ) + assert feats.shape[0] == sum(lengths), \ + f"lengths mismatch {feats.shape[0]} != {sum(lengths)}" + + ark_path = os.path.join(tar_dir, "feats.ark") + scp_path = os.path.join(tar_dir, "feats.scp") + wspec = f"ark:| copy-feats --compress=true ark:- ark,scp:{ark_path},{scp_path}" + with kaldi_io.open_or_fd(wspec, "wb") as f: + for idx, (offset, length) in enumerate(zip(offsets, lengths)): + feat = feats[offset:offset+length] + kaldi_io.write_mat(f, feat, key=f"utt{idx:010d}") + + u2s_path = os.path.join(tar_dir, "utt2spk") + s2u_path = os.path.join(tar_dir, "spk2utt") + with open(u2s_path, "w") as f_u2s, open(s2u_path, "w") as f_s2u: + for idx in range(len(lengths)): + f_u2s.write(f"utt{idx:010d} utt{idx:010d}\n") + f_s2u.write(f"utt{idx:010d} utt{idx:010d}\n") + + if bool(args.label): + lab_path = os.path.join(args.w2v_dir, f"{args.split}.{args.label}") + txt_path = os.path.join(tar_dir, "text") + with open(lab_path) as f_lab, open(txt_path, "w") as f_txt: + for idx, line in enumerate(f_lab): + f_txt.write(f"utt{idx:010d} {line}") + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh new file mode 100644 index 0000000..e9a8000 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +sil_prob=0.5 +num_sil_states=3 +num_nonsil_states=1 + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +set -eux + +dict=$1 +data_dir=$2 + +dict_dir=$data_dir/local/dict +tmplm_dir=$data_dir/local/lang_tmp +lm_dir=$data_dir/lang + +mkdir -p $dict_dir $tmplm_dir $lm_dir + +# prepare dict +echo "SIL" > $dict_dir/silence_phones.txt +echo "SIL" > $dict_dir/optional_silence.txt +awk '{print $1}' $dict > $dict_dir/nonsilence_phones.txt + +echo "SIL SIL" > $dict_dir/lexicon.txt +echo "<UNK> SIL" >> $dict_dir/lexicon.txt +awk '{print $1" "$1}' $dict >> $dict_dir/lexicon.txt + +echo "SIL" > $dict_dir/extra_questions.txt +awk '{printf $1" "} END {printf "\n"}' $dict >> $dict_dir/extra_questions.txt + +# prepare lang +utils/prepare_lang.sh --sil-prob $sil_prob --position-dependent-phones false \ + --num_sil_states $num_sil_states --num_nonsil_states $num_nonsil_states \ + $dict_dir "<UNK>" $tmplm_dir $lm_dir diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh new file mode 100644 index 0000000..a7ea387 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +num_sil_states=3 +num_nonsil_states=1 + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +set -eux + +dict=$1 +data_dir=$2 +lexicon=$3 + +dict_dir=$data_dir/local/dict_word +tmplm_dir=$data_dir/local/lang_tmp_word +lm_dir=$data_dir/lang_word + +mkdir -p $dict_dir $tmplm_dir $lm_dir + +# prepare dict +echo "SIL" > $dict_dir/silence_phones.txt +echo "SIL" > $dict_dir/optional_silence.txt +awk '{print $1}' $dict > $dict_dir/nonsilence_phones.txt + +(echo "!SIL SIL"; echo "<UNK> SIL";) | cat - $lexicon > $dict_dir/lexicon.txt + +echo "SIL" > $dict_dir/extra_questions.txt +awk '{printf $1" "} END {printf "\n"}' $dict >> $dict_dir/extra_questions.txt + +# prepare lang +utils/prepare_lang.sh --position-dependent-phones false \ + --num_sil_states $num_sil_states --num_nonsil_states $num_nonsil_states \ + $dict_dir "<UNK>" $tmplm_dir $lm_dir diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh new file mode 100644 index 0000000..c2edcef --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +langdir="" +lmdir="" + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +arpa_lm=$1 +data=$2 + +if [ -z $langdir ]; then + langdir=$data/lang +fi +if [ -z $lmdir ]; then + lmdir=$data/lang_test +fi + +if [ ! -d $langdir ]; then + echo "$langdir not found. run local/prepare_lang.sh first" && exit 1 +fi + +mkdir -p $lmdir +cp -r $langdir/* $lmdir + +if [[ "$arpa_lm" == *.gz ]]; then + gunzip -c $arpa_lm | arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt - $lmdir/G.fst +else + arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt $arpa_lm $lmdir/G.fst +fi +fstisstochastic $lmdir/G.fst +utils/validate_lang.pl $lmdir || exit 1 + +echo "done preparing lm ($lmdir)" diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh new file mode 100644 index 0000000..cb5bbb7 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# 2014 Guoguo Chen +# Apache 2.0 + +[ -f ./path.sh ] && . ./path.sh + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +word_ins_penalty=0.0,0.5,1.0 +min_lmwt=7 +max_lmwt=17 +iter=final +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>" + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring " + echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt + +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done + +# Note: the double level of quoting for the sed command +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done + +exit 0; diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh new file mode 100644 index 0000000..9ecf169 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +split="dev_other" +ref_data="" +get_best_wer=true +dec_name="decode" +graph_name="graph" + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +exp_root=$1 + +set -eu + +echo "==== WER w.r.t. pseudo transcript" +for x in $exp_root/*/${dec_name}_${split}*; do grep WER $x/wer_* 2>/dev/null | utils/best_wer.sh; done + + +if [ ! -z $ref_data ]; then + echo "==== WER w.r.t. real transcript (select based on pseudo WER)" + ref_txt=$ref_data/$split/text + for x in $exp_root/*/${dec_name}_${split}*; do + lang=$(dirname $x)/$graph_name + + lmwt=$( + grep WER $x/wer_* 2>/dev/null | utils/best_wer.sh | + sed 's/.*wer_\(.*\)$/\1/g' | sed 's/_/./g' + ) + tra=$x/scoring/$lmwt.tra + cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' | \ + compute-wer --text --mode=present \ + ark:$ref_txt ark,p:- 2> /dev/null | grep WER | xargs -I{} echo {} $tra + done +fi + +if [ ! -z $ref_data ] && $get_best_wer; then + echo "==== WER w.r.t. real transcript (select based on true WER)" + ref_txt=$ref_data/$split/text + for x in $exp_root/*/${dec_name}_${split}*; do + lang=$(dirname $x)/$graph_name + + for tra in $x/scoring/*.tra; do + cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' | \ + compute-wer --text --mode=present \ + ark:$ref_txt ark,p:- 2> /dev/null | grep WER | xargs -I{} echo {} $tra + done | sort -k2n | head -n1 + done +fi + +exit 0; diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh new file mode 100644 index 0000000..913c1d8 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash + +out_root=/tmp +out_name=train_${RANDOM} +num_nonsil_states=1 + +valid="dev_other" +train="train" +mono_size="-1" # 2000 +tri1_size="-1" # 5000 +tri2b_size="-1" # 10000 +tri3b_size="-1" # 10000 + +# Acoustic model parameters +numLeavesTri1=2000 +numGaussTri1=10000 +numLeavesMLLT=2500 +numGaussMLLT=15000 +numLeavesSAT=2500 +numGaussSAT=15000 + +stage=1 +max_stage=1 + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +data=$1 +lang=$2 +lang_test=$3 + +exp_root=$out_root/$out_name + +# you might not want to do this for interactive shells. +set -e + + +if [ $stage -le 1 ] && [ $max_stage -ge 1 ]; then + # train a monophone system + if [ ! $mono_size -eq -1 ]; then + utils/subset_data_dir.sh $data/$train $mono_size $data/${train}_${mono_size} + mono_train=${train}_${mono_size} + else + mono_train=${train} + fi + + steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \ + --initial-beam 40 --regular-beam 60 --retry-beam 120 \ + $data/$mono_train $lang $exp_root/mono + + utils/mkgraph.sh $lang_test $exp_root/mono $exp_root/mono/graph + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $exp_root/mono/graph $data/$valid $exp_root/mono/decode_$valid & +fi + + +if [ $stage -le 2 ] && [ $max_stage -ge 2 ]; then + # train a first delta + delta-delta triphone system on a subset of 5000 utterances + if [ ! $tri1_size -eq -1 ]; then + utils/subset_data_dir.sh $data/$train $tri1_size $data/${train}_${tri1_size} + tri1_train=${train}_${tri1_size} + else + tri1_train=${train} + fi + + steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ + $data/$tri1_train $lang \ + $exp_root/mono $exp_root/mono_ali_${tri1_train} + + steps_gan/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ + --num_nonsil_states $num_nonsil_states $numLeavesTri1 $numGaussTri1 \ + $data/$tri1_train $lang \ + $exp_root/mono_ali_${tri1_train} $exp_root/tri1 + + utils/mkgraph.sh $lang_test $exp_root/tri1 $exp_root/tri1/graph + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $exp_root/tri1/graph $data/$valid $exp_root/tri1/decode_$valid & +fi + +if [ $stage -le 3 ] && [ $max_stage -ge 3 ]; then + # train an LDA+MLLT system. + if [ ! $tri2b_size -eq -1 ]; then + utils/subset_data_dir.sh $data/$train $tri2b_size $data/${train}_${tri2b_size} + tri2b_train=${train}_${tri2b_size} + else + tri2b_train=${train} + fi + + steps/align_si.sh --nj 10 --cmd "$train_cmd" \ + $data/$tri2b_train $lang \ + $exp_root/tri1 $exp_root/tri1_ali_${tri2b_train} + + steps_gan/train_lda_mllt.sh --cmd "$train_cmd" \ + --num_nonsil_states $num_nonsil_states \ + --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT \ + $data/$tri2b_train $lang \ + $exp_root/tri1_ali_${tri2b_train} $exp_root/tri2b + + utils/mkgraph.sh $lang_test $exp_root/tri2b $exp_root/tri2b/graph + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $exp_root/tri2b/graph $data/$valid $exp_root/tri2b/decode_$valid & +fi + + +if [ $stage -le 4 ] && [ $max_stage -ge 4 ]; then + # Train tri3b, which is LDA+MLLT+SAT on 10k utts + if [ ! $tri3b_size -eq -1 ]; then + utils/subset_data_dir.sh $data/$train $tri3b_size $data/${train}_${tri3b_size} + tri3b_train=${train}_${tri3b_size} + else + tri3b_train=${train} + fi + + steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \ + $data/$tri3b_train $lang \ + $exp_root/tri2b $exp_root/tri2b_ali_${tri2b_train} + + steps_gan/train_sat.sh --cmd "$train_cmd" \ + --num_nonsil_states $num_nonsil_states $numLeavesSAT $numGaussSAT \ + $data/$tri3b_train $lang \ + $exp_root/tri2b_ali_${tri2b_train} $exp_root/tri3b + + utils/mkgraph.sh $lang_test $exp_root/tri3b $exp_root/tri3b/graph + steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \ + $exp_root/tri3b/graph $data/$valid $exp_root/tri3b/decode_$valid & +fi + +wait diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py new file mode 100644 index 0000000..1122c88 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py @@ -0,0 +1,135 @@ +""" +Implement unsupervised metric for decoding hyperparameter selection: + $$ alpha * LM_PPL + ViterbitUER(%) * 100 $$ +""" +import argparse +import logging +import math +import sys + +import kenlm +import editdistance +from g2p_en import G2p + +logging.root.setLevel(logging.INFO) +logging.basicConfig(stream=sys.stdout, level=logging.INFO) +logger = logging.getLogger(__name__) + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("ref_tra", help="reference pseudo labels") + parser.add_argument("hyp_tra", help="decoded pseudo labels to be assess") + parser.add_argument("--kenlm_path", default="/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o5.bin", help="") + parser.add_argument("--uppercase", action="store_true", help="") + parser.add_argument("--skipwords", default="", help="") + parser.add_argument("--gt_tra", default="", help="ground truth pseudo labels for computing oracle WER") + parser.add_argument("--min_vt_uer", default=0.0, type=float) + parser.add_argument("--phonemize", action="store_true", help="phonemize word hypotheses, used when reference is phone transcript") + parser.add_argument("--phonemize_lexicon", default="", type=str, help="use a lexicon for phonemizing") + return parser + +def load_tra(tra_path): + with open(tra_path, "r") as f: + uid_to_tra = {} + for line in f: + toks = line.rstrip().split() + uid, tra = toks[0], " ".join(toks[1:]) + uid_to_tra[uid] = tra + logger.debug(f"loaded {len(uid_to_tra)} utterances from {tra_path}") + return uid_to_tra + +def load_lex(lex_path): + with open(lex_path, "r") as f: + w2p = {} + for line in f: + w, p = line.rstrip().split(None, 1) + w2p[w] = p.split() + return w2p + +def compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p, g2p_dict): + d_cnt = 0 + w_cnt = 0 + w_cnt_h = 0 + for uid in hyp_uid_to_tra: + ref = ref_uid_to_tra[uid].split() + if g2p_dict is not None: + hyp = [] + for word in hyp_uid_to_tra[uid].split(): + if word in g2p_dict: + hyp = hyp + g2p_dict[word] + else: + logger.warning(f"{word} not in g2p_dict") + elif g2p is not None: + hyp = g2p(hyp_uid_to_tra[uid]) + hyp = [p for p in hyp if p != "'" and p != " "] + hyp = [p[:-1] if p[-1].isnumeric() else p for p in hyp] + else: + hyp = hyp_uid_to_tra[uid].split() + logger.debug(( + f"======================\n" + f"HYP: {' '.join(hyp)}\n" + f"REF: {' '.join(ref)}" + )) + d_cnt += editdistance.eval(ref, hyp) + w_cnt += len(ref) + w_cnt_h += len(hyp) + wer = float(d_cnt) / w_cnt + logger.debug(( + f"wer = {wer*100:.2f}%; num. of ref words = {w_cnt}; " + f"num. of hyp words = {w_cnt_h}; num. of sentences = {len(ref_uid_to_tra)}" + )) + return wer + +def compute_lm_ppl(hyp_uid_to_tra, score_fn): + lm_score = 0. + w_cnt = 0 + for hyp in hyp_uid_to_tra.values(): + cur_score = score_fn(hyp) + cur_cnt = len(hyp.split()) + 1 # plus one for </s> + lm_score += cur_score + w_cnt += cur_cnt + logger.debug(( + f"======================\n" + f"score sum/avg = {cur_score:.2f}/{cur_score/cur_cnt:.2f}\n" + f"hyp = {hyp}" + )) + lm_ppl = math.pow(10, -lm_score / w_cnt) + logger.debug(f"lm ppl = {lm_ppl:.2f}; num. of words = {w_cnt}") + return lm_ppl + +def main(): + args = get_parser().parse_args() + logger.debug(f"Args: {args}") + + ref_uid_to_tra = load_tra(args.ref_tra) + hyp_uid_to_tra = load_tra(args.hyp_tra) + assert not bool(set(hyp_uid_to_tra.keys()) - set(ref_uid_to_tra.keys())) + + lm = kenlm.Model(args.kenlm_path) + skipwords = set(args.skipwords.split(",")) + def compute_lm_score(s): + s = " ".join(w for w in s.split() if w not in skipwords) + s = s.upper() if args.uppercase else s + return lm.score(s) + + g2p, g2p_dict = None, None + if args.phonemize: + if args.phonemize_lexicon: + g2p_dict = load_lex(args.phonemize_lexicon) + else: + g2p = G2p() + + wer = compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p, g2p_dict) + lm_ppl = compute_lm_ppl(hyp_uid_to_tra, compute_lm_score) + + gt_wer = -math.inf + if args.gt_tra: + gt_uid_to_tra = load_tra(args.gt_tra) + gt_wer = compute_wer(gt_uid_to_tra, hyp_uid_to_tra, None, None) + + score = math.log(lm_ppl) * max(wer, args.min_vt_uer) + logging.info(f"{args.hyp_tra}: score={score:.4f}; wer={wer*100:.2f}%; lm_ppl={lm_ppl:.4f}; gt_wer={gt_wer*100:.2f}%") + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh new file mode 100644 index 0000000..b34c5b6 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +split="dev_other" +ref_txt="" # ground truth transcript path +psd_txt="" # pseudo transcript path +get_best_wer=true +dec_name="decode" +graph_name="graph" +kenlm_path=/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o6.bin + +. ./cmd.sh +. ./path.sh +. parse_options.sh + +exp_root=$1 +unsup_args="" +if [ $# -ge 2 ]; then + unsup_args=$2 +fi + +set -eu + +if [ ! -z $ref_txt ] && $get_best_wer; then + echo "==== WER w.r.t. real transcript (select based on unsupervised metric)" + for x in $exp_root/*/${dec_name}_${split}*; do + lang=$(dirname $x)/$graph_name + + ( + for tra in $x/scoring/*.tra; do + cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' > $tra.txt + python local/unsup_select.py $psd_txt $tra.txt --kenlm_path $kenlm_path --gt_tra $ref_txt $unsup_args + done 2>/dev/null | grep "score=" | sed 's/=/ /g' | sed 's/;//g' | sort -k3n | head -n1 + ) & + done +fi +wait + diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh new file mode 100644 index 0000000..c10a6b8 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +split="dev_other" +ref_txt="" # ground truth transcript path +psd_txt="" # pseudo transcript path +get_best_wer=true +dec_name="decode" +graph_name="graph" +kenlm_path=/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o6.bin +phonemize_lexicon="" + +. ./cmd.sh +. ./path.sh +. parse_options.sh +. /private/home/wnhsu/unsup_asr/fairseq-py-unsup/env.sh + +exp_root=$1 + +set -eu + +if [ ! -z $ref_txt ] && $get_best_wer; then + echo "==== WER w.r.t. real transcript (select based on unsupervised metric)" + for x in $exp_root/*/${dec_name}_${split}*; do + lang=$(dirname $x)/$graph_name + + for tra in $x/scoring/*.tra; do + cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:\<UNK\>::g' > $tra.txt + python local/unsup_select.py $psd_txt $tra.txt \ + --kenlm_path $kenlm_path --gt_tra $ref_txt --phonemize \ + --phonemize_lexicon "$phonemize_lexicon" + done | grep "score=" | sed 's/=/ /g' | sed 's/;//g' | sort -k3n | head -n1 + done +fi + + diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh new file mode 100644 index 0000000..1a6fb5f --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps new file mode 100644 index 0000000..6e99bf5 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh new file mode 100644 index 0000000..af68715 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0 + +# Begin configuration. +stage=-4 # This allows restarting after partway, when something when wrong. +config= +cmd=run.pl +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +realign_iters="10 20 30"; +num_iters=35 # Number of iterations of training +max_iter_inc=25 # Last iter to increase #Gauss on. +beam=10 +careful=false +retry_beam=40 +boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment +power=0.25 # Exponent for number of gaussians according to occurrence counts +cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves +norm_vars=false # deprecated. Prefer --cmvn-opts "--norm-vars=true" + # use the option --cmvn-opts "--norm-means=false" +cmvn_opts= +delta_opts= +context_opts= # use"--context-width=5 --central-position=2" for quinphone +num_nonsil_states=3 +# End configuration. + +echo "$0 $@" # Print the command line for logging + +[ -f path.sh ] && . ./path.sh; +. parse_options.sh || exit 1; + +if [ $# != 6 ]; then + echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <alignment-dir> <exp-dir>" + echo "e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." + echo " --config <config-file> # config containing options" + echo " --stage <stage> # stage to do partial re-run from." + exit 1; +fi + +numleaves=$1 +totgauss=$2 +data=$3 +lang=$4 +alidir=$5 +dir=$6 + +for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do + [ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1; +done + +numgauss=$numleaves +incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss +oov=`cat $lang/oov.int` || exit 1; +ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; +nj=`cat $alidir/num_jobs` || exit 1; +mkdir -p $dir/log +echo $nj > $dir/num_jobs + +utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; +cp $lang/phones.txt $dir || exit 1; + +sdata=$data/split$nj; +split_data.sh $data $nj || exit 1; + + +[ $(cat $alidir/cmvn_opts 2>/dev/null | wc -c) -gt 1 ] && [ -z "$cmvn_opts" ] && \ + echo "$0: warning: ignoring CMVN options from source directory $alidir" +$norm_vars && cmvn_opts="--norm-vars=true $cmvn_opts" +echo $cmvn_opts > $dir/cmvn_opts # keep track of options to CMVN. +[ ! -z $delta_opts ] && echo $delta_opts > $dir/delta_opts + +feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |" + +rm $dir/.error 2>/dev/null + +if [ $stage -le -3 ]; then + echo "$0: accumulating tree stats" + $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ + acc-tree-stats $context_opts \ + --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ + "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; + sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1; + rm $dir/*.treeacc +fi + +if [ $stage -le -2 ]; then + echo "$0: getting questions for tree-building, via clustering" + # preparing questions, roots file... + cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) $context_opts \ + $dir/treeacc $lang/phones/sets.int \ + $dir/questions.int 2> $dir/log/questions.log || exit 1; + cat $lang/phones/extra_questions.int >> $dir/questions.int + compile-questions $context_opts $lang/topo $dir/questions.int \ + $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; + + echo "$0: building the tree" + $cmd $dir/log/build_tree.log \ + build-tree $context_opts --verbose=1 --max-leaves=$numleaves \ + --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ + $dir/questions.qst $lang/topo $dir/tree || exit 1; + + $cmd $dir/log/init_model.log \ + gmm-init-model --write-occs=$dir/1.occs \ + $dir/tree $dir/treeacc $lang/topo $dir/1.mdl || exit 1; + if grep 'no stats' $dir/log/init_model.log; then + echo "** The warnings above about 'no stats' generally mean you have phones **" + echo "** (or groups of phones) in your phone set that had no corresponding data. **" + echo "** You should probably figure out whether something went wrong, **" + echo "** or whether your data just doesn't happen to have examples of those **" + echo "** phones. **" + fi + + gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1; + rm $dir/treeacc +fi + +if [ $stage -le -1 ]; then + # Convert the alignments. + echo "$0: converting alignments from $alidir to use current tree" + $cmd JOB=1:$nj $dir/log/convert.JOB.log \ + convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \ + "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; +fi + +if [ $stage -le 0 ]; then + echo "$0: compiling graphs of transcripts" + $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ + compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl $lang/L.fst \ + "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \ + "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; +fi + +x=1 +while [ $x -lt $num_iters ]; do + echo "$0: training pass $x" + if [ $stage -le $x ]; then + if echo $realign_iters | grep -w $x >/dev/null; then + echo "$0: aligning data" + mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" + $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ + gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \ + "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ + "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; + fi + $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ + gmm-acc-stats-ali $dir/$x.mdl "$feats" \ + "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1; + $cmd $dir/log/update.$x.log \ + gmm-est --mix-up=$numgauss --power=$power \ + --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \ + "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1; + rm $dir/$x.mdl $dir/$x.*.acc + rm $dir/$x.occs + fi + [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss]; + x=$[$x+1]; +done + +rm $dir/final.mdl $dir/final.occs 2>/dev/null +ln -s $x.mdl $dir/final.mdl +ln -s $x.occs $dir/final.occs + +steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir + +# Summarize warning messages... +utils/summarize_warnings.pl $dir/log + +steps/info/gmm_dir_info.pl $dir + +echo "$0: Done training system with delta+delta-delta features in $dir" + +exit 0 diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh new file mode 100644 index 0000000..9d8c319 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh @@ -0,0 +1,239 @@ +#!/usr/bin/env bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# +# LDA+MLLT refers to the way we transform the features after computing +# the MFCCs: we splice across several frames, reduce the dimension (to 40 +# by default) using Linear Discriminant Analysis), and then later estimate, +# over multiple iterations, a diagonalizing transform known as MLLT or STC. +# See http://kaldi-asr.org/doc/transform.html for more explanation. +# +# Apache 2.0. + +# Begin configuration. +cmd=run.pl +config= +stage=-5 +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +realign_iters="10 20 30"; +mllt_iters="2 4 6 12"; +num_iters=35 # Number of iterations of training +max_iter_inc=25 # Last iter to increase #Gauss on. +dim=40 +beam=10 +retry_beam=40 +careful=false +boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment +power=0.25 # Exponent for number of gaussians according to occurrence counts +randprune=4.0 # This is approximately the ratio by which we will speed up the + # LDA and MLLT calculations via randomized pruning. +splice_opts= +cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves +norm_vars=false # deprecated. Prefer --cmvn-opts "--norm-vars=false" +cmvn_opts= +context_opts= # use "--context-width=5 --central-position=2" for quinphone. +# End configuration. +train_tree=true # if false, don't actually train the tree. +use_lda_mat= # If supplied, use this LDA[+MLLT] matrix. +num_nonsil_states=3 + +echo "$0 $@" # Print the command line for logging + +[ -f path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# != 6 ]; then + echo "Usage: steps/train_lda_mllt.sh [options] <#leaves> <#gauss> <data> <lang> <alignments> <dir>" + echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." + echo " --config <config-file> # config containing options" + echo " --stage <stage> # stage to do partial re-run from." + exit 1; +fi + +numleaves=$1 +totgauss=$2 +data=$3 +lang=$4 +alidir=$5 +dir=$6 + +for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do + [ ! -f $f ] && echo "train_lda_mllt.sh: no such file $f" && exit 1; +done + +numgauss=$numleaves +incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment +oov=`cat $lang/oov.int` || exit 1; +nj=`cat $alidir/num_jobs` || exit 1; +silphonelist=`cat $lang/phones/silence.csl` || exit 1; +ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; + +mkdir -p $dir/log + +utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; +cp $lang/phones.txt $dir || exit 1; + +echo $nj >$dir/num_jobs +echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options + # so that later stages of system building can know what they were. + + +[ $(cat $alidir/cmvn_opts 2>/dev/null | wc -c) -gt 1 ] && [ -z "$cmvn_opts" ] && \ + echo "$0: warning: ignoring CMVN options from source directory $alidir" +$norm_vars && cmvn_opts="--norm-vars=true $cmvn_opts" +echo $cmvn_opts > $dir/cmvn_opts # keep track of options to CMVN. + +sdata=$data/split$nj; +split_data.sh $data $nj || exit 1; + +splicedfeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" +# Note: $feats gets overwritten later in the script. +feats="$splicedfeats transform-feats $dir/0.mat ark:- ark:- |" + + + +if [ $stage -le -5 ]; then + if [ -z "$use_lda_mat" ]; then + echo "$0: Accumulating LDA statistics." + rm $dir/lda.*.acc 2>/dev/null + $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \ + ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ + weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \ + acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- \ + $dir/lda.JOB.acc || exit 1; + est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \ + 2>$dir/log/lda_est.log || exit 1; + rm $dir/lda.*.acc + else + echo "$0: Using supplied LDA matrix $use_lda_mat" + cp $use_lda_mat $dir/0.mat || exit 1; + [ ! -z "$mllt_iters" ] && \ + echo "$0: Warning: using supplied LDA matrix $use_lda_mat but we will do MLLT," && \ + echo " which you might not want; to disable MLLT, specify --mllt-iters ''" && \ + sleep 5 + fi +fi + +cur_lda_iter=0 + +if [ $stage -le -4 ] && $train_tree; then + echo "$0: Accumulating tree stats" + $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ + acc-tree-stats $context_opts \ + --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ + "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; + [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1; + $cmd $dir/log/sum_tree_acc.log \ + sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1; + rm $dir/*.treeacc +fi + + +if [ $stage -le -3 ] && $train_tree; then + echo "$0: Getting questions for tree clustering." + # preparing questions, roots file... + cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) $context_opts $dir/treeacc $lang/phones/sets.int \ + $dir/questions.int 2> $dir/log/questions.log || exit 1; + cat $lang/phones/extra_questions.int >> $dir/questions.int + compile-questions $context_opts $lang/topo $dir/questions.int \ + $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; + + echo "$0: Building the tree" + $cmd $dir/log/build_tree.log \ + build-tree $context_opts --verbose=1 --max-leaves=$numleaves \ + --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ + $dir/questions.qst $lang/topo $dir/tree || exit 1; +fi + +if [ $stage -le -2 ]; then + echo "$0: Initializing the model" + if $train_tree; then + gmm-init-model --write-occs=$dir/1.occs \ + $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1; + grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning."; + rm $dir/treeacc + else + cp $alidir/tree $dir/ || exit 1; + $cmd JOB=1 $dir/log/init_model.log \ + gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \ + "$feats subset-feats ark:- ark:-|" || exit 1; + fi +fi + + +if [ $stage -le -1 ]; then + # Convert the alignments. + echo "$0: Converting alignments from $alidir to use current tree" + $cmd JOB=1:$nj $dir/log/convert.JOB.log \ + convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \ + "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; +fi + +if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then + echo "$0: Compiling graphs of transcripts" + $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ + compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl $lang/L.fst \ + "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \ + "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; +fi + + +x=1 +while [ $x -lt $num_iters ]; do + echo Training pass $x + if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then + echo Aligning data + mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" + $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ + gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \ + "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ + "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; + fi + if echo $mllt_iters | grep -w $x >/dev/null; then + if [ $stage -le $x ]; then + echo "$0: Estimating MLLT" + $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \ + ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ + weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \ + gmm-acc-mllt --rand-prune=$randprune $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \ + || exit 1; + est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1; + gmm-transform-means $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \ + 2> $dir/log/transform_means.$x.log || exit 1; + compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_lda_iter.mat $dir/$x.mat || exit 1; + rm $dir/$x.*.macc + fi + feats="$splicedfeats transform-feats $dir/$x.mat ark:- ark:- |" + cur_lda_iter=$x + fi + + if [ $stage -le $x ]; then + $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ + gmm-acc-stats-ali $dir/$x.mdl "$feats" \ + "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1; + $cmd $dir/log/update.$x.log \ + gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \ + $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1; + rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs + fi + [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss]; + x=$[$x+1]; +done + +rm $dir/final.{mdl,mat,occs} 2>/dev/null +ln -s $x.mdl $dir/final.mdl +ln -s $x.occs $dir/final.occs +ln -s $cur_lda_iter.mat $dir/final.mat + +steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir + +# Summarize warning messages... +utils/summarize_warnings.pl $dir/log + +steps/info/gmm_dir_info.pl $dir + +echo "$0: Done training system with LDA+MLLT features in $dir" + +exit 0 diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh new file mode 100644 index 0000000..f75afaf --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh @@ -0,0 +1,281 @@ +#!/usr/bin/env bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. + + +# This does Speaker Adapted Training (SAT), i.e. train on +# fMLLR-adapted features. It can be done on top of either LDA+MLLT, or +# delta and delta-delta features. If there are no transforms supplied +# in the alignment directory, it will estimate transforms itself before +# building the tree (and in any case, it estimates transforms a number +# of times during training). + + +# Begin configuration section. +stage=-5 +exit_stage=-100 # you can use this to require it to exit at the + # beginning of a specific stage. Not all values are + # supported. +fmllr_update_type=full +cmd=run.pl +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +beam=10 +retry_beam=40 +careful=false +boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment +context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone. +realign_iters="10 20 30"; +fmllr_iters="2 4 6 12"; +silence_weight=0.0 # Weight on silence in fMLLR estimation. +num_iters=35 # Number of iterations of training +max_iter_inc=25 # Last iter to increase #Gauss on. +power=0.2 # Exponent for number of gaussians according to occurrence counts +cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves +phone_map= +train_tree=true +tree_stats_opts= +cluster_phones_opts= +compile_questions_opts= +# End configuration section. +num_nonsil_states=3 + +echo "$0 $@" # Print the command line for logging + +[ -f path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# != 6 ]; then + echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>" + echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." + echo " --config <config-file> # config containing options" + echo " --stage <stage> # stage to do partial re-run from." + exit 1; +fi + +numleaves=$1 +totgauss=$2 +data=$3 +lang=$4 +alidir=$5 +dir=$6 + +for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do + [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1; +done + +numgauss=$numleaves +incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment +oov=`cat $lang/oov.int` +nj=`cat $alidir/num_jobs` || exit 1; +silphonelist=`cat $lang/phones/silence.csl` +ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; +sdata=$data/split$nj; +splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options. +cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` +delta_opts=`cat $alidir/delta_opts 2>/dev/null` +phone_map_opt= +[ ! -z "$phone_map" ] && phone_map_opt="--phone-map='$phone_map'" + +mkdir -p $dir/log +cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options. +cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. +cp $alidir/delta_opts $dir 2>/dev/null # delta option. + +utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; +cp $lang/phones.txt $dir || exit 1; + +echo $nj >$dir/num_jobs +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; + +# Set up features. + +if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi +echo "$0: feature type is $feat_type" + +## Set up speaker-independent features. +case $feat_type in + delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";; + lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" + cp $alidir/final.mat $dir + cp $alidir/full.mat $dir 2>/dev/null + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac + +## Get initial fMLLR transforms (possibly from alignment dir) +if [ -f $alidir/trans.1 ]; then + echo "$0: Using transforms from $alidir" + feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |" + cur_trans_dir=$alidir +else + if [ $stage -le -5 ]; then + echo "$0: obtaining initial fMLLR transforms since not present in $alidir" + # The next line is necessary because of $silphonelist otherwise being incorrect; would require + # old $lang dir which would require another option. Not needed anyway. + [ ! -z "$phone_map" ] && \ + echo "$0: error: you must provide transforms if you use the --phone-map option." && exit 1; + $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \ + ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ + weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \ + gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \ + --spk2utt=ark:$sdata/JOB/spk2utt $alidir/final.mdl "$sifeats" \ + ark:- ark:$dir/trans.JOB || exit 1; + fi + feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" + cur_trans_dir=$dir +fi + +if [ $stage -le -4 ] && $train_tree; then + # Get tree stats. + echo "$0: Accumulating tree stats" + $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ + acc-tree-stats $context_opts $tree_stats_opts $phone_map_opt --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ + "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; + [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1; + $cmd $dir/log/sum_tree_acc.log \ + sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1; + rm $dir/*.treeacc +fi + +if [ $stage -le -3 ] && $train_tree; then + echo "$0: Getting questions for tree clustering." + # preparing questions, roots file... + cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) \ + $cluster_phones_opts $context_opts \ + $dir/treeacc $lang/phones/sets.int $dir/questions.int 2>$dir/log/questions.log || exit 1; + cat $lang/phones/extra_questions.int >> $dir/questions.int + compile-questions $context_opts $compile_questions_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; + + echo "$0: Building the tree" + $cmd $dir/log/build_tree.log \ + build-tree $context_opts --verbose=1 --max-leaves=$numleaves \ + --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ + $dir/questions.qst $lang/topo $dir/tree || exit 1; +fi + +if [ $stage -le -2 ]; then + echo "$0: Initializing the model" + if $train_tree; then + gmm-init-model --write-occs=$dir/1.occs \ + $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1; + grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning."; + rm $dir/treeacc + else + cp $alidir/tree $dir/ || exit 1; + $cmd JOB=1 $dir/log/init_model.log \ + gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \ + "$feats subset-feats ark:- ark:-|" || exit 1; + fi +fi + +if [ $stage -le -1 ]; then + # Convert the alignments. + echo "$0: Converting alignments from $alidir to use current tree" + $cmd JOB=1:$nj $dir/log/convert.JOB.log \ + convert-ali $phone_map_opt $alidir/final.mdl $dir/1.mdl $dir/tree \ + "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; +fi + +[ "$exit_stage" -eq 0 ] && echo "$0: Exiting early: --exit-stage $exit_stage" && exit 0; + +if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then + echo "$0: Compiling graphs of transcripts" + $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ + compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl $lang/L.fst \ + "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \ + "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; +fi + +x=1 +while [ $x -lt $num_iters ]; do + echo Pass $x + if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then + echo Aligning data + mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" + $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ + gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \ + "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ + "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; + fi + + if echo $fmllr_iters | grep -w $x >/dev/null; then + if [ $stage -le $x ]; then + echo Estimating fMLLR transforms + # We estimate a transform that's additional to the previous transform; + # we'll compose them. + $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \ + ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ + weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \ + gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \ + --spk2utt=ark:$sdata/JOB/spk2utt $dir/$x.mdl \ + "$feats" ark:- ark:$dir/tmp_trans.JOB || exit 1; + for n in `seq $nj`; do + ! ( compose-transforms --b-is-affine=true \ + ark:$dir/tmp_trans.$n ark:$cur_trans_dir/trans.$n ark:$dir/composed_trans.$n \ + && mv $dir/composed_trans.$n $dir/trans.$n && \ + rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \ + && echo "$0: Error composing transforms" && exit 1; + done + fi + feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |" + cur_trans_dir=$dir + fi + + if [ $stage -le $x ]; then + $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ + gmm-acc-stats-ali $dir/$x.mdl "$feats" \ + "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1; + [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1; + $cmd $dir/log/update.$x.log \ + gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \ + "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1; + rm $dir/$x.mdl $dir/$x.*.acc + rm $dir/$x.occs + fi + [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss]; + x=$[$x+1]; +done + + +if [ $stage -le $x ]; then + # Accumulate stats for "alignment model"-- this model is + # computed with the speaker-independent features, but matches Gaussian-for-Gaussian + # with the final speaker-adapted model. + $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \ + ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ + gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \ + ark,s,cs:- $dir/$x.JOB.acc || exit 1; + [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1; + # Update model. + $cmd $dir/log/est_alimdl.log \ + gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \ + "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl || exit 1; + rm $dir/$x.*.acc +fi + +rm $dir/final.{mdl,alimdl,occs} 2>/dev/null +ln -s $x.mdl $dir/final.mdl +ln -s $x.occs $dir/final.occs +ln -s $x.alimdl $dir/final.alimdl + + +steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir + +utils/summarize_warnings.pl $dir/log +( + echo "$0: Likelihood evolution:" + for x in `seq $[$num_iters-1]`; do + tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); } + /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} + END{ d /= t2; l /= t; printf("%s ", d+l); } ' + done + echo +) | tee $dir/log/summary.log + + +steps/info/gmm_dir_info.pl $dir + +echo "$0: done training SAT system in $dir" + +exit 0 diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh new file mode 100644 index 0000000..f3a3d3f --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -eu + +w2v_dir= # contains features `{train,valid}.{npy,lengths}`, real transcripts `{train,valid}.${label}`, and dict `dict.${label}.txt` +lab_dir= # contains pseudo labels `{train,valid}.txt` +out_dir= # output root +arpa_lm= # phone LM +arpa_lm_bin= # (binary) phone LM for KenLM, used in unsupervised selection + +label=phnc +train_name="train" +valid_name="valid" +data_dir=${out_dir}/data + +mkdir -p ${out_dir}/exp +local/prepare_lang.sh $w2v_dir/dict.${label}.txt $data_dir +local/prepare_lm.sh $arpa_lm $data_dir + +for x in $train_name $valid_name; do + x_gt=${x}_gt + + # prepare pseudo data + python local/prepare_data_from_w2v.py $w2v_dir $data_dir $x + steps/compute_cmvn_stats.sh $data_dir/$x $out_dir/exp/make_feat/$x $out_dir/feats/$x + python local/copy_aligned_text.py < $lab_dir/$x.txt > $data_dir/$x/text + + # prepare ground truth data + mkdir $data_dir/$x_gt + cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $data_dir/$x_gt/ + python local/copy_aligned_text.py < $w2v_dir/$x.$label > $data_dir/$x_gt/text +done + +local/train_subset_lgbeam.sh \ + --out_root ${out_dir} --out_name exp --train $train_name --valid $valid_name \ + --mono_size 2000 --tri1_size 5000 --tri2b_size -1 --tri3b_size -1 \ + --stage 1 --max_stage 3 $data_dir $data_dir/lang $data_dir/lang_test + +local/unsup_select_decode.sh \ + --split $valid_name --kenlm_path $arpa_lm_bin \ + --ref_txt $data_dir/${valid_name}_gt/text \ + --psd_txt $data_dir/${valid_name}/text \ + $out_dir/exp diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils new file mode 100644 index 0000000..b240885 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/fairseq/examples/wav2vec/unsupervised/models/__init__.py b/fairseq/examples/wav2vec/unsupervised/models/__init__.py new file mode 100644 index 0000000..3e3039b --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/models/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .wav2vec_u import Wav2vec_U + + +__all__ = [ + "Wav2vec_U", +] diff --git a/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py b/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py new file mode 100644 index 0000000..8a1e905 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py @@ -0,0 +1,687 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +from enum import Enum, auto +import math +import numpy as np +from typing import Tuple, List, Optional, Dict + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import autograd + +from fairseq import checkpoint_utils, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.modules import ( + SamePad, + TransposeLast, +) + + +class SegmentationType(Enum): + NONE = auto() + RANDOM = auto() + UNIFORM_RANDOM = auto() + UNIFORM_RANDOM_JOIN = auto() + JOIN = auto() + + +@dataclass +class SegmentationConfig(FairseqDataclass): + type: SegmentationType = SegmentationType.NONE + subsample_rate: float = 0.25 + mean_pool: bool = True + mean_pool_join: bool = False + remove_zeros: bool = False + + +@dataclass +class Wav2vec_UConfig(FairseqDataclass): + discriminator_kernel: int = 3 + discriminator_dilation: int = 1 + discriminator_dim: int = 256 + discriminator_causal: bool = True + discriminator_linear_emb: bool = False + discriminator_depth: int = 1 + discriminator_max_pool: bool = False + discriminator_act_after_linear: bool = False + discriminator_dropout: float = 0.0 + discriminator_spectral_norm: bool = False + discriminator_weight_norm: bool = False + + generator_kernel: int = 4 + generator_dilation: int = 1 + generator_stride: int = 1 + generator_pad: int = -1 + generator_bias: bool = False + generator_dropout: float = 0.0 + generator_batch_norm: int = 0 + generator_residual: bool = False + + blank_weight: float = 0 + blank_mode: str = "add" + blank_is_sil: bool = False + no_softmax: bool = False + + smoothness_weight: float = 0.0 + smoothing: float = 0.0 + smoothing_one_sided: bool = False + gradient_penalty: float = 0.0 + probabilistic_grad_penalty_slicing: bool = False + code_penalty: float = 0.0 + mmi_weight: float = 0.0 + target_dim: int = 64 + target_downsample_rate: int = 2 + gumbel: bool = False + hard_gumbel: bool = True + temp: Tuple[float, float, float] = (2, 0.1, 0.99995) + input_dim: int = 128 + + segmentation: SegmentationConfig = SegmentationConfig() + + +class Segmenter(nn.Module): + cfg: SegmentationConfig + + def __init__(self, cfg: SegmentationConfig): + super().__init__() + self.cfg = cfg + self.subsample_rate = cfg.subsample_rate + + def pre_segment(self, dense_x, dense_padding_mask): + return dense_x, dense_padding_mask + + def logit_segment(self, logits, padding_mask): + return logits, padding_mask + + +class RandomSegmenter(Segmenter): + def pre_segment(self, dense_x, dense_padding_mask): + target_num = math.ceil(dense_x.size(1) * self.subsample_rate) + ones = torch.ones(dense_x.shape[:-1], device=dense_x.device) + indices, _ = ones.multinomial(target_num).sort(dim=-1) + indices_ld = indices.unsqueeze(-1).expand(-1, -1, dense_x.size(-1)) + dense_x = dense_x.gather(1, indices_ld) + dense_padding_mask = dense_padding_mask.gather(1, index=indices) + return dense_x, dense_padding_mask + + +class UniformRandomSegmenter(Segmenter): + def pre_segment(self, dense_x, dense_padding_mask): + bsz, tsz, fsz = dense_x.shape + + target_num = math.ceil(tsz * self.subsample_rate) + + rem = tsz % target_num + + if rem > 0: + dense_x = F.pad(dense_x, [0, 0, 0, target_num - rem]) + dense_padding_mask = F.pad( + dense_padding_mask, [0, target_num - rem], value=True + ) + + dense_x = dense_x.view(bsz, target_num, -1, fsz) + dense_padding_mask = dense_padding_mask.view(bsz, target_num, -1) + + if self.cfg.mean_pool: + dense_x = dense_x.mean(dim=-2) + dense_padding_mask = dense_padding_mask.all(dim=-1) + else: + ones = torch.ones((bsz, dense_x.size(2)), device=dense_x.device) + indices = ones.multinomial(1) + indices = indices.unsqueeze(-1).expand(-1, target_num, -1) + indices_ld = indices.unsqueeze(-1).expand(-1, -1, -1, fsz) + dense_x = dense_x.gather(2, indices_ld).reshape(bsz, -1, fsz) + dense_padding_mask = dense_padding_mask.gather(2, index=indices).reshape( + bsz, -1 + ) + return dense_x, dense_padding_mask + + +class JoinSegmenter(Segmenter): + def logit_segment(self, logits, padding_mask): + preds = logits.argmax(dim=-1) + + if padding_mask.any(): + preds[padding_mask] = -1 # mark pad + uniques = [] + + bsz, tsz, csz = logits.shape + + for p in preds: + uniques.append( + p.cpu().unique_consecutive(return_inverse=True, return_counts=True) + ) + + new_tsz = max(u[0].numel() for u in uniques) + new_logits = logits.new_zeros(bsz, new_tsz, csz) + new_pad = padding_mask.new_zeros(bsz, new_tsz) + + for b in range(bsz): + u, idx, c = uniques[b] + keep = u != -1 + + if self.cfg.remove_zeros: + keep.logical_and_(u != 0) + + if self.training and not self.cfg.mean_pool_join: + u[0] = 0 + u[1:] = c.cumsum(0)[:-1] + m = c > 1 + r = torch.rand(m.sum()) + o = (c[m] * r).long() + u[m] += o + new_logits[b, : u.numel()] = logits[b, u] + else: + new_logits[b].index_add_( + dim=0, index=idx.to(new_logits.device), source=logits[b] + ) + new_logits[b, : c.numel()] /= c.unsqueeze(-1).to(new_logits.device) + + new_sz = keep.sum() + if not keep.all(): + kept_logits = new_logits[b, : c.numel()][keep] + new_logits[b, :new_sz] = kept_logits + + if new_sz < new_tsz: + pad = new_tsz - new_sz + new_logits[b, -pad:] = 0 + new_pad[b, -pad:] = True + + return new_logits, new_pad + + +class UniformRandomJoinSegmenter(UniformRandomSegmenter, JoinSegmenter): + pass + + +SEGMENT_FACTORY = { + SegmentationType.NONE: Segmenter, + SegmentationType.RANDOM: RandomSegmenter, + SegmentationType.UNIFORM_RANDOM: UniformRandomSegmenter, + SegmentationType.UNIFORM_RANDOM_JOIN: UniformRandomJoinSegmenter, + SegmentationType.JOIN: JoinSegmenter, +} + + +class Discriminator(nn.Module): + def __init__(self, dim, cfg: Wav2vec_UConfig): + super().__init__() + + inner_dim = cfg.discriminator_dim + kernel = cfg.discriminator_kernel + dilation = cfg.discriminator_dilation + self.max_pool = cfg.discriminator_max_pool + + if cfg.discriminator_causal: + padding = kernel - 1 + else: + padding = kernel // 2 + + def make_conv(in_d, out_d, k, p=0, has_dilation=True): + conv = nn.Conv1d( + in_d, + out_d, + kernel_size=k, + padding=p, + dilation=dilation if has_dilation else 1, + ) + if cfg.discriminator_spectral_norm: + conv = nn.utils.spectral_norm(conv) + elif cfg.discriminator_weight_norm: + conv = nn.utils.weight_norm(conv) + return conv + + inner_net = [ + nn.Sequential( + make_conv(inner_dim, inner_dim, kernel, padding), + SamePad(kernel_size=kernel, causal=cfg.discriminator_causal), + nn.Dropout(cfg.discriminator_dropout), + nn.GELU(), + ) + for _ in range(cfg.discriminator_depth - 1) + ] + [ + make_conv(inner_dim, 1, kernel, padding, has_dilation=False), + SamePad(kernel_size=kernel, causal=cfg.discriminator_causal), + ] + + if cfg.discriminator_linear_emb: + emb_net = [make_conv(dim, inner_dim, 1)] + else: + emb_net = [ + make_conv(dim, inner_dim, kernel, padding), + SamePad(kernel_size=kernel, causal=cfg.discriminator_causal), + ] + + if cfg.discriminator_act_after_linear: + emb_net.append(nn.GELU()) + + self.net = nn.Sequential( + *emb_net, + nn.Dropout(cfg.discriminator_dropout), + *inner_net, + ) + + def forward(self, x, padding_mask): + x = x.transpose(1, 2) # BTC -> BCT + x = self.net(x) + x = x.transpose(1, 2) + x_sz = x.size(1) + if padding_mask is not None and padding_mask.any() and padding_mask.dim() > 1: + padding_mask = padding_mask[:, : x.size(1)] + x[padding_mask] = float("-inf") if self.max_pool else 0 + x_sz = x_sz - padding_mask.sum(dim=-1) + x = x.squeeze(-1) + if self.max_pool: + x, _ = x.max(dim=-1) + else: + x = x.sum(dim=-1) + x = x / x_sz + return x + + +class Generator(nn.Module): + def __init__(self, input_dim, output_dim, cfg: Wav2vec_UConfig): + super().__init__() + + self.cfg = cfg + self.output_dim = output_dim + self.stride = cfg.generator_stride + self.dropout = nn.Dropout(cfg.generator_dropout) + self.batch_norm = cfg.generator_batch_norm != 0 + self.residual = cfg.generator_residual + + padding = ( + cfg.generator_kernel // 2 if cfg.generator_pad < 0 else cfg.generator_pad + ) + self.proj = nn.Sequential( + TransposeLast(), + nn.Conv1d( + input_dim, + output_dim, + kernel_size=cfg.generator_kernel, + stride=cfg.generator_stride, + dilation=cfg.generator_dilation, + padding=padding, + bias=cfg.generator_bias, + ), + TransposeLast(), + ) + + if self.batch_norm: + self.bn = nn.BatchNorm1d(input_dim) + self.bn.weight.data.fill_(cfg.generator_batch_norm) + if self.residual: + self.in_proj = nn.Linear(input_dim, input_dim) + + def forward(self, dense_x, tokens, dense_padding_mask): + result = {} + + if self.batch_norm: + dense_x = self.bn_padded_data(dense_x, dense_padding_mask) + if self.residual: + inter_x = self.in_proj(self.dropout(dense_x)) + dense_x = dense_x + inter_x + result["inter_x"] = inter_x + + dense_x = self.dropout(dense_x) + + dense_x = self.proj(dense_x) + if self.stride > 1: + dense_padding_mask = dense_padding_mask[:, :: self.stride] + + if dense_padding_mask.size(1) != dense_x.size(1): + new_padding = dense_padding_mask.new_zeros(dense_x.shape[:-1]) + diff = new_padding.size(1) - dense_padding_mask.size(1) + + if diff > 0: + new_padding[:, diff:] = dense_padding_mask + else: + assert diff < 0 + new_padding = dense_padding_mask[:, :diff] + + dense_padding_mask = new_padding + + token_x = None + if tokens is not None: + token_x = dense_x.new_zeros(tokens.numel(), self.output_dim) + token_x.scatter_(1, tokens.view(-1, 1).long(), 1) + token_x = token_x.view(tokens.shape + (self.output_dim,)) + + result["dense_x"] = dense_x + result["token_x"] = token_x + result["dense_padding_mask"] = dense_padding_mask + + return result + + def bn_padded_data(self, feature, padding_mask): + normed_feature = feature.clone() + normed_feature[~padding_mask] = self.bn( + feature[~padding_mask].unsqueeze(-1) + ).squeeze(-1) + return normed_feature + + +@register_model("wav2vec_u", dataclass=Wav2vec_UConfig) +class Wav2vec_U(BaseFairseqModel): + def calc_gradient_penalty(self, real_data, fake_data): + + b_size = min(real_data.size(0), fake_data.size(0)) + t_size = min(real_data.size(1), fake_data.size(1)) + + if self.cfg.probabilistic_grad_penalty_slicing: + + def get_slice(data, dim, target_size): + + size = data.size(dim) + diff = size - target_size + if diff <= 0: + return data + + start = np.random.randint(0, diff + 1) + return data.narrow(dim=dim, start=start, length=target_size) + + real_data = get_slice(real_data, 0, b_size) + real_data = get_slice(real_data, 1, t_size) + fake_data = get_slice(fake_data, 0, b_size) + fake_data = get_slice(fake_data, 1, t_size) + + else: + real_data = real_data[:b_size, :t_size] + fake_data = fake_data[:b_size, :t_size] + + alpha = torch.rand(real_data.size(0), 1, 1) + alpha = alpha.expand(real_data.size()) + alpha = alpha.to(real_data.device) + + interpolates = alpha * real_data + ((1 - alpha) * fake_data) + + disc_interpolates = self.discriminator(interpolates, None) + + gradients = autograd.grad( + outputs=disc_interpolates, + inputs=interpolates, + grad_outputs=torch.ones(disc_interpolates.size(), device=real_data.device), + create_graph=True, + retain_graph=True, + only_inputs=True, + )[0] + + gradient_penalty = (gradients.norm(2, dim=1) - 1) ** 2 + return gradient_penalty + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self.update_num = num_updates + self.curr_temp = max( + self.max_temp * self.temp_decay ** num_updates, self.min_temp + ) + + def discrim_step(self, num_updates): + return num_updates % 2 == 1 + + def get_groups_for_update(self, num_updates): + return "discriminator" if self.discrim_step(num_updates) else "generator" + + def __init__(self, cfg: Wav2vec_UConfig, target_dict): + super().__init__() + + self.cfg = cfg + self.zero_index = target_dict.index("<SIL>") if "<SIL>" in target_dict else 0 + self.smoothness_weight = cfg.smoothness_weight + + output_size = len(target_dict) + self.pad = target_dict.pad() + self.eos = target_dict.eos() + self.smoothing = cfg.smoothing + self.smoothing_one_sided = cfg.smoothing_one_sided + self.no_softmax = cfg.no_softmax + self.gumbel = cfg.gumbel + self.hard_gumbel = cfg.hard_gumbel + self.last_acc = None + + self.gradient_penalty = cfg.gradient_penalty + self.code_penalty = cfg.code_penalty + self.mmi_weight = cfg.mmi_weight + self.blank_weight = cfg.blank_weight + self.blank_mode = cfg.blank_mode + self.blank_index = target_dict.index("<SIL>") if cfg.blank_is_sil else 0 + assert self.blank_index != target_dict.unk() + + self.discriminator = Discriminator(output_size, cfg) + for p in self.discriminator.parameters(): + p.param_group = "discriminator" + + self.pca_A = self.pca_b = None + d = cfg.input_dim + + self.segmenter = SEGMENT_FACTORY[cfg.segmentation.type](cfg.segmentation) + + self.generator = Generator(d, output_size, cfg) + + for p in self.generator.parameters(): + p.param_group = "generator" + + for p in self.segmenter.parameters(): + p.param_group = "generator" + + self.max_temp, self.min_temp, self.temp_decay = cfg.temp + self.curr_temp = self.max_temp + self.update_num = 0 + + if self.mmi_weight > 0: + self.target_downsample_rate = cfg.target_downsample_rate + self.decoder = nn.Linear(d, cfg.target_dim) + for p in self.decoder.parameters(): + p.param_group = "generator" + + @classmethod + def build_model(cls, cfg, task): + return cls(cfg, task.target_dictionary) + + def get_logits( + self, + net_output: Optional[Dict[str, List[Optional[torch.Tensor]]]], + normalize: bool = False, + ): + logits = net_output["logits"] + + if self.blank_weight != 0: + if self.blank_mode == "add": + logits[..., self.blank_index] += self.blank_weight + elif self.blank_mode == "set": + logits[..., self.blank_index] = self.blank_weight + else: + raise Exception(f"invalid blank mode {self.blank_mode}") + + padding = net_output["padding_mask"] + if padding.any(): + logits[padding] = float("-inf") + logits[padding][..., self.blank_index] = float("inf") + + if normalize: + logits = utils.log_softmax(logits.float(), dim=-1) + + return logits.transpose(0, 1) + + def get_normalized_probs( + self, + net_output: Tuple[ + torch.Tensor, Optional[Dict[str, List[Optional[torch.Tensor]]]] + ], + log_probs: bool, + sample: Optional[Dict[str, torch.Tensor]] = None, + ): + logits = self.get_logits(net_output) + + probs = super().get_normalized_probs(logits, log_probs, sample) + # BTC -> TBC for ctc + probs = probs.transpose(0, 1) + return probs + + def normalize(self, dense_x): + + bsz, tsz, csz = dense_x.shape + + if dense_x.numel() == 0: + raise Exception(dense_x.shape) + _, k = dense_x.max(-1) + hard_x = ( + dense_x.new_zeros(bsz * tsz, csz) + .scatter_(-1, k.view(-1, 1), 1.0) + .view(-1, csz) + ) + hard_probs = torch.mean(hard_x.float(), dim=0) + code_perplexity = torch.exp( + -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1) + ) + + avg_probs = torch.softmax(dense_x.reshape(-1, csz).float(), dim=-1).mean(dim=0) + prob_perplexity = torch.exp( + -torch.sum(avg_probs * torch.log(avg_probs + 1e-7), dim=-1) + ) + + if not self.no_softmax: + if self.training and self.gumbel: + dense_x = F.gumbel_softmax( + dense_x.float(), tau=self.curr_temp, hard=self.hard_gumbel + ).type_as(dense_x) + else: + dense_x = dense_x.softmax(-1) + + return dense_x, code_perplexity, prob_perplexity + + def forward( + self, + features, + padding_mask, + random_label=None, + dense_x_only=False, + segment=True, + aux_target=None, + ): + if segment: + features, padding_mask = self.segmenter.pre_segment(features, padding_mask) + + orig_size = features.size(0) * features.size(1) - padding_mask.sum() + + gen_result = self.generator(features, random_label, padding_mask) + + orig_dense_x, token_x = gen_result["dense_x"], gen_result["token_x"] + orig_dense_padding_mask = gen_result["dense_padding_mask"] + + if segment: + dense_x, dense_padding_mask = self.segmenter.logit_segment( + orig_dense_x, orig_dense_padding_mask + ) + else: + dense_x = orig_dense_x + dense_padding_mask = orig_dense_padding_mask + + dense_logits = dense_x + prob_perplexity = None + code_perplexity = None + + if not (self.no_softmax and dense_x_only): + dense_x, code_perplexity, prob_perplexity = self.normalize(dense_logits) + + if dense_x_only or self.discriminator is None: + return { + "logits": dense_x, + "padding_mask": dense_padding_mask, + } + + token_padding_mask = random_label == self.pad + + dense_y = self.discriminator(dense_x, dense_padding_mask) + token_y = self.discriminator(token_x, token_padding_mask) + + sample_size = features.size(0) + + d_step = self.discrim_step(self.update_num) + + fake_smooth = self.smoothing + real_smooth = self.smoothing + if self.smoothing_one_sided: + fake_smooth = 0 + + zero_loss = None + smoothness_loss = None + code_pen = None + mmi_loss = None + + if d_step: + loss_dense = F.binary_cross_entropy_with_logits( + dense_y, + dense_y.new_ones(dense_y.shape) - fake_smooth, + reduction="sum", + ) + loss_token = F.binary_cross_entropy_with_logits( + token_y, + token_y.new_zeros(token_y.shape) + real_smooth, + reduction="sum", + ) + if self.training and self.gradient_penalty > 0: + grad_pen = self.calc_gradient_penalty(token_x, dense_x) + grad_pen = grad_pen.sum() * self.gradient_penalty + else: + grad_pen = None + else: + grad_pen = None + loss_token = None + loss_dense = F.binary_cross_entropy_with_logits( + dense_y, + dense_y.new_zeros(dense_y.shape) + fake_smooth, + reduction="sum", + ) + num_vars = dense_x.size(-1) + if prob_perplexity is not None: + code_pen = (num_vars - prob_perplexity) / num_vars + code_pen = code_pen * sample_size * self.code_penalty + + if self.smoothness_weight > 0: + smoothness_loss = F.mse_loss( + dense_logits[:, :-1], dense_logits[:, 1:], reduction="none" + ) + smoothness_loss[dense_padding_mask[:, 1:]] = 0 + smoothness_loss = ( + smoothness_loss.mean() * sample_size * self.smoothness_weight + ) + + if (self.mmi_weight > 0) and (aux_target is not None): + inter_x = self.decoder(gen_result["inter_x"]) + if self.target_downsample_rate > 1: + aux_target = aux_target[:, :: self.target_downsample_rate] + max_t_len = min(aux_target.shape[1], inter_x.shape[1]) + mmi_loss = F.cross_entropy( + inter_x[:, :max_t_len].transpose(1, 2), + aux_target[:, :max_t_len], + ignore_index=-1, + reduction="none", + ) + mmi_loss = mmi_loss.mean() * mmi_loss.shape[0] * self.mmi_weight + + result = { + "losses": { + "grad_pen": grad_pen, + "code_pen": code_pen, + "smoothness": smoothness_loss, + "mmi": mmi_loss, + }, + "temp": self.curr_temp, + "code_ppl": code_perplexity, + "prob_ppl": prob_perplexity, + "d_steps": int(d_step), + "sample_size": sample_size, + } + + suff = "_d" if d_step else "_g" + result["losses"]["dense" + suff] = loss_dense + result["losses"]["token" + suff] = loss_token + + return result diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py b/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py new file mode 100644 index 0000000..10ad6ce --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import math +import numpy as np +import tqdm +import torch +from shutil import copyfile + +from npy_append_array import NpyAppendArray + + +def get_parser(): + parser = argparse.ArgumentParser( + description="transforms features via a given pca and stored them in target dir" + ) + # fmt: off + parser.add_argument('source', help='directory with features') + parser.add_argument('--split', help='which split to read', required=True) + parser.add_argument('--save-dir', help='where to save the output', required=True) + parser.add_argument('--pca-path', type=str, help='pca location. will append _A.npy and _b.npy', required=True) + parser.add_argument('--batch-size', type=int, default=2048000, help='batch size') + parser.add_argument('--unfiltered', action='store_true', help='process the unfiltered version') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + source_path = osp.join(args.source, args.split) + data_poth = source_path + "_unfiltered" if args.unfiltered else source_path + + print(f"data path: {data_poth}") + + features = np.load(data_poth + ".npy", mmap_mode="r") + pca_A = torch.from_numpy(np.load(args.pca_path + "_A.npy")).cuda() + pca_b = torch.from_numpy(np.load(args.pca_path + "_b.npy")).cuda() + + os.makedirs(args.save_dir, exist_ok=True) + save_path = osp.join(args.save_dir, args.split) + + copyfile(source_path + ".tsv", save_path + ".tsv") + copyfile(data_poth + ".lengths", save_path + ".lengths") + + if osp.exists(source_path + ".phn"): + copyfile(source_path + ".phn", save_path + ".phn") + + if osp.exists(source_path + ".wrd"): + copyfile(source_path + ".wrd", save_path + ".wrd") + + if osp.exists(save_path + ".npy"): + os.remove(save_path + ".npy") + npaa = NpyAppendArray(save_path + ".npy") + + batches = math.ceil(features.shape[0] / args.batch_size) + + with torch.no_grad(): + for b in tqdm.trange(batches): + start = b * args.batch_size + end = start + args.batch_size + x = torch.from_numpy(features[start:end]).cuda() + x = torch.matmul(x, pca_A) + pca_b + npaa.append(x.cpu().numpy()) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py b/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py new file mode 100644 index 0000000..9898683 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +for idx, line in enumerate(sys.stdin): + print(f"utt{idx:010d} {line}", end="") diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py b/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py new file mode 100644 index 0000000..5bf3e51 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys + +from fairseq.data import Dictionary + + +def get_parser(): + parser = argparse.ArgumentParser( + description="filters a lexicon given a unit dictionary" + ) + parser.add_argument("-d", "--unit-dict", help="unit dictionary", required=True) + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + d = Dictionary.load(args.unit_dict) + symbols = set(d.symbols) + + for line in sys.stdin: + items = line.rstrip().split() + skip = len(items) < 2 + for x in items[1:]: + if x not in symbols: + skip = True + break + if not skip: + print(line, end="") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py b/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py new file mode 100644 index 0000000..a09d79a --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import argparse +import sys + + +parser = argparse.ArgumentParser() +parser.add_argument("--tsv", required=True, type=str) +parser.add_argument("--no-skip", action="store_true") +parser.add_argument("--keep", action="store_true") +params = parser.parse_args() + + +def get_fname(line): + p = os.path.basename(line.split("\t")[0]) + p = os.path.splitext(p)[0] + return p + + +# filenames to exclude +seen = set() +with open(params.tsv) as f: + if not params.no_skip: + root = next(f).rstrip() + for line in f: + seen.add(get_fname(line)) + +for i, line in enumerate(sys.stdin): + exists = get_fname(line) in seen + keep = (exists and params.keep) or (not exists and not params.keep) + if i == 0 or keep: + print(line, end="") diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py b/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py new file mode 100644 index 0000000..2e31c30 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys + +from g2p_en import G2p + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--compact", + action="store_true", + help="if set, compacts phones", + ) + args = parser.parse_args() + + compact = args.compact + + wrd_to_phn = {} + g2p = G2p() + for line in sys.stdin: + words = line.strip().split() + phones = [] + for w in words: + if w not in wrd_to_phn: + wrd_to_phn[w] = g2p(w) + if compact: + wrd_to_phn[w] = [ + p[:-1] if p[-1].isnumeric() else p for p in wrd_to_phn[w] + ] + phones.extend(wrd_to_phn[w]) + try: + print(" ".join(phones)) + except: + print(wrd_to_phn, words, phones, file=sys.stderr) + raise + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py b/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py new file mode 100644 index 0000000..36c85d1 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + + +def main(): + for line in sys.stdin: + print(line.replace(" ", "").replace("|", " ").strip()) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py b/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py new file mode 100644 index 0000000..4eea048 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import math +import numpy as np +import tqdm +import torch +import torch.nn.functional as F +from shutil import copyfile + +from npy_append_array import NpyAppendArray + + +def get_parser(): + parser = argparse.ArgumentParser( + description="mean pools representations by compressing uniform splits of the data" + ) + # fmt: off + parser.add_argument('source', help='directory with features') + parser.add_argument('--split', help='which split to read', required=True) + parser.add_argument('--save-dir', help='where to save the output', required=True) + parser.add_argument('--subsample-rate', type=float, default=0.5, help='size to subsample data to') + + parser.add_argument('--remove-extra', action='store_true', help='if true, removes extra states that cant be pooled, otherwise pads with 0s') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + source_path = osp.join(args.source, args.split) + + print(f"data path: {source_path}") + + features = np.load(source_path + ".npy", mmap_mode="r") + + os.makedirs(args.save_dir, exist_ok=True) + save_path = osp.join(args.save_dir, args.split) + + copyfile(source_path + ".tsv", save_path + ".tsv") + + if os.path.exists(source_path + ".phn"): + copyfile(source_path + ".phn", save_path + ".phn") + if os.path.exists(source_path + ".wrd"): + copyfile(source_path + ".wrd", save_path + ".wrd") + + if os.path.exists(osp.join(args.source, "dict.phn.txt")): + copyfile( + osp.join(args.source, "dict.phn.txt"), + osp.join(args.save_dir, "dict.phn.txt"), + ) + + if osp.exists(save_path + ".npy"): + os.remove(save_path + ".npy") + npaa = NpyAppendArray(save_path + ".npy") + + with open(source_path + ".lengths", "r") as lf: + lengths = lf.readlines() + + fsz = features.shape[-1] + start = 0 + with torch.no_grad(): + with open(save_path + ".lengths", "w") as lengths_out: + for length in tqdm.tqdm(lengths): + length = int(length) + end = start + length + feats = features[start:end] + start += length + x = torch.from_numpy(feats).cuda() + target_num = math.ceil(length * args.subsample_rate) + rem = length % target_num + + if rem > 0: + if args.remove_extra: + to_rem = target_num - rem + target_num -= 1 + x = x[:-to_rem] + else: + to_add = target_num - rem + x = F.pad(x, [0, 0, 0, to_add]) + x[-to_add:] = x[-to_add - 1] + + x = x.view(target_num, -1, fsz) + x = x.mean(dim=-2) + print(target_num, file=lengths_out) + npaa.append(x.cpu().numpy()) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py b/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py new file mode 100644 index 0000000..2780f9d --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import numpy as np +import tqdm +import torch +import random +from shutil import copyfile + +from npy_append_array import NpyAppendArray + + +def get_parser(): + parser = argparse.ArgumentParser( + description="transforms features via a given pca and stored them in target dir" + ) + # fmt: off + parser.add_argument('source', help='directory with features') + parser.add_argument('--split', help='which split to read', required=True) + parser.add_argument('--save-dir', help='where to save the output', required=True) + parser.add_argument('--cluster-dir', help='where the clusters are') + parser.add_argument('--pooling', type=str, default='mean', choices=['mean', 'sample'], help='how to pool') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + source_path = osp.join(args.source, args.split) + cluster_path = osp.join(args.cluster_dir, args.split + ".src") + print(f"data path: {source_path}") + + features = np.load(source_path + ".npy", mmap_mode="r") + sizes = [] + offsets = [] + offset = 0 + with open(source_path + ".lengths", "r") as len_f: + for line in len_f: + length = int(line.rstrip()) + sizes.append(length) + offsets.append(offset) + offset += length + + clusters = [] + with open(cluster_path, "r") as cf: + for line in cf: + line = line.rstrip() + items = line.split() + items = list(map(int, items)) + clusters.append(items) + + os.makedirs(args.save_dir, exist_ok=True) + save_path = osp.join(args.save_dir, args.split) + + copyfile(source_path + ".tsv", save_path + ".tsv") + + if os.path.exists(source_path + ".phn"): + copyfile(source_path + ".phn", save_path + ".phn") + if os.path.exists(osp.join(args.source, "dict.phn.txt")): + copyfile( + osp.join(args.source, "dict.phn.txt"), + osp.join(args.save_dir, "dict.phn.txt"), + ) + if os.path.exists(source_path + ".wrd"): + copyfile(source_path + ".wrd", save_path + ".wrd") + + if osp.exists(save_path + ".npy"): + os.remove(save_path + ".npy") + npaa = NpyAppendArray(save_path + ".npy") + + def merge(feats, clust): + feats = torch.from_numpy(feats.copy()) + clust = torch.LongTensor(clust) + _, counts = clust.unique_consecutive(return_counts=True) + curr = 0 + + merged = [] + for c in counts: + c = c.item() + start = curr + end = curr + c + curr += c + if args.pooling == "mean": + new_x = feats[start:end].mean(dim=0) + elif args.pooling == "sample": + new_x = feats[start + int(random.random() * c)] + else: + raise NotImplementedError() + merged.append(new_x) + + return torch.stack(merged, dim=0).numpy() + + with open(save_path + ".lengths", "w") as l_f: + for size, offset, clust in tqdm.tqdm( + zip(sizes, offsets, clusters), total=len(sizes) + ): + end = size + offset + feats = features[offset:end] + feats = merge(feats, clust) + print(len(feats), file=l_f) + npaa.append(feats) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py new file mode 100644 index 0000000..c2bd16e --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import fasttext as ft +import os +import regex +import sys + + +def get_parser(): + parser = argparse.ArgumentParser( + description="reads text from stdin and outputs normalized, lid-filtered version to stdout" + ) + parser.add_argument( + "--fasttext-model", + help="path to fasttext model", + default="lid.187.bin", + ) + parser.add_argument("--lang", help="language id", required=True) + parser.add_argument( + "--lid-threshold", + type=float, + help="threshold for this lang id probability", + default=0.4, + ) + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + filter_r = regex.compile(r"[^\p{L}\p{N}\p{M}\' \-]") + + lg = args.lang.lower() + lg_label = f"__label__{lg}" + thresh = args.lid_threshold + + if os.path.exists(args.fasttext_model): + model = ft.load_model(args.fasttext_model) + else: + print( + f"fasttext language id model {args.fasttext_model} not found. Proceeding without language filtering. " + f"To enable language filtering, please download the latest language id model " + f"from https://fasttext.cc/docs/en/language-identification.html", + file=sys.stderr, + ) + model = None + + for line in sys.stdin: + line = line.strip() + line = filter_r.sub(" ", line) + line = " ".join(line.split()) + + if model is not None: + lid, prob = model.predict(line, k=100) + try: + target_idx = lid.index(lg_label) + except ValueError: + continue + if target_idx == 0 or prob[target_idx] >= thresh: + print(line) + else: + print(line) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py new file mode 100644 index 0000000..9d0ffeb --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import regex +import sys + + +def main(): + filter_r = regex.compile(r"[^\p{L}\p{N}\p{M}\' \-]") + + for line in sys.stdin: + line = line.strip() + line = filter_r.sub(" ", line) + line = " ".join(line.split()) + print(line) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/pca.py b/fairseq/examples/wav2vec/unsupervised/scripts/pca.py new file mode 100644 index 0000000..948cf53 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/pca.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import numpy as np + +import faiss + + + +def get_parser(): + parser = argparse.ArgumentParser( + description="compute a pca matrix given an array of numpy features" + ) + # fmt: off + parser.add_argument('data', help='numpy file containing features') + parser.add_argument('--output', help='where to save the pca matrix', required=True) + parser.add_argument('--dim', type=int, help='dim for pca reduction', required=True) + parser.add_argument('--eigen-power', type=float, default=0, help='eigen power, -0.5 for whitening') + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + print("Reading features") + x = np.load(args.data, mmap_mode="r") + + print("Computing PCA") + pca = faiss.PCAMatrix(x.shape[-1], args.dim, args.eigen_power) + pca.train(x) + b = faiss.vector_to_array(pca.b) + A = faiss.vector_to_array(pca.A).reshape(pca.d_out, pca.d_in) + + os.makedirs(args.output, exist_ok=True) + + prefix = str(args.dim) + if args.eigen_power != 0: + prefix += f"_{args.eigen_power}" + + np.save(osp.join(args.output, f"{prefix}_pca_A"), A.T) + np.save(osp.join(args.output, f"{prefix}_pca_b"), b) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py b/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py new file mode 100644 index 0000000..c6512d7 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import numpy as np +import sys + + +def get_parser(): + parser = argparse.ArgumentParser( + description="converts words to phones adding optional silences around in between words" + ) + parser.add_argument( + "--sil-prob", + "-s", + type=float, + default=0, + help="probability of inserting silence between each word", + ) + parser.add_argument( + "--surround", + action="store_true", + help="if set, surrounds each example with silence", + ) + parser.add_argument( + "--lexicon", + help="lexicon to convert to phones", + required=True, + ) + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + sil_prob = args.sil_prob + surround = args.surround + sil = "<SIL>" + + wrd_to_phn = {} + + with open(args.lexicon, "r") as lf: + for line in lf: + items = line.rstrip().split() + assert len(items) > 1, line + assert items[0] not in wrd_to_phn, items + wrd_to_phn[items[0]] = items[1:] + + for line in sys.stdin: + words = line.strip().split() + + if not all(w in wrd_to_phn for w in words): + continue + + phones = [] + if surround: + phones.append(sil) + + sample_sil_probs = None + if sil_prob > 0 and len(words) > 1: + sample_sil_probs = np.random.random(len(words) - 1) + + for i, w in enumerate(words): + phones.extend(wrd_to_phn[w]) + if ( + sample_sil_probs is not None + and i < len(sample_sil_probs) + and sample_sil_probs[i] < sil_prob + ): + phones.append(sil) + + if surround: + phones.append(sil) + print(" ".join(phones)) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh new file mode 100644 index 0000000..013f7a9 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env zsh +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +source_dir=$1 +tgt_dir=$2 +model=$3 + +if [ -z "$4" ] + then + dim=512 + else + dim=$4 +fi + +echo "using $dim dim for PCA" + +if [ -z "$5" ] + then + layer=14 + else + layer=$5 +fi + +echo "extracting from layer $layer" + +train_split=train +valid_split=valid +test_split=test + +all_splits=($train_split) + +if [[ -f "$source_dir/valid.tsv" ]]; then + all_splits+=('valid') +fi + +if [[ -f "$source_dir/test.tsv" ]]; then + all_splits+=('test') +fi + +echo "processing splits: $all_splits" + +mkdir -p $tgt_dir + +cp $source_dir/*.tsv $tgt_dir +cp $source_dir/*.wrd $tgt_dir +cp $source_dir/*.ltr $tgt_dir +cp $source_dir/*.phn $tgt_dir +cp $source_dir/dict* $tgt_dir + +setopt shwordsplit + +for split in $all_splits; do + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py $source_dir --split $split \ + --save-dir $tgt_dir --checkpoint $model --layer $layer +done + +python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py $tgt_dir/${train_split}.tsv \ +--checkpoint $model --save-dir $tgt_dir -f "CLUS128" --sample-pct 1.0 + +for split in $all_splits; do + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py $tgt_dir \ + --checkpoint $model --path $tgt_dir/CLUS128 --split $split +done + +python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/pca.py $tgt_dir/${train_split}.npy --output $tgt_dir/pca --dim $dim + +for split in $all_splits; do + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/apply_pca.py $tgt_dir --split $split --save-dir $tgt_dir/precompute_pca$dim --pca-path $tgt_dir/pca/${dim}_pca --batch-size 1048000 + + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/merge_clusters.py $tgt_dir/precompute_pca$dim --cluster-dir $tgt_dir/CLUS128 \ + --split $split --save-dir $tgt_dir/precompute_pca${dim}_cls128_mean --pooling mean + + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/mean_pool.py $tgt_dir/precompute_pca${dim}_cls128_mean \ + --save-dir $tgt_dir/precompute_pca${dim}_cls128_mean_pooled --split $split +done diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio_v2.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio_v2.sh new file mode 100644 index 0000000..96a52c5 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio_v2.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env zsh +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +source_dir=$1 +tgt_dir=$2 +model=$3 + +if [ -z "$4" ] + then + dim=64 + else + dim=$4 +fi + +echo "using $dim clusters for auxilary target" + +if [ -z "$5" ] + then + layer=14 + else + layer=$5 +fi + +echo "extracting from layer $layer" + +train_split=train +valid_split=valid +test_split=test + +all_splits=($train_split) + +if [[ -f "$source_dir/valid.tsv" ]]; then + all_splits+=('valid') +fi + +if [[ -f "$source_dir/test.tsv" ]]; then + all_splits+=('test') +fi + +echo "processing splits: $all_splits" + +mkdir -p $tgt_dir + +cp $source_dir/*.tsv $tgt_dir +cp $source_dir/*.wrd $tgt_dir +cp $source_dir/*.ltr $tgt_dir +cp $source_dir/*.phn $tgt_dir +cp $source_dir/dict* $tgt_dir + +setopt shwordsplit + +for split in $all_splits; do + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py $source_dir --split $split \ + --save-dir $tgt_dir --checkpoint $model --layer $layer +done + + +mkdir -p $tgt_dir/mfcc + +# Consider spliting corpus into chuncks for large corpus, see HuBERT preprocessing for more details +python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_mfcc_feature.py \ + $tgt_dir $train_split 1 0 $tgt_dir/mfcc +python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_km_label.py \ + $tgt_dir/mfcc $train_split $tgt_dir/mfcc/cls$dim 1 0 $tgt_dir/mfcc/cls${dim}_idx +cp $tgt_dir/mfcc/cls${dim}_idx/${train_split}_0_1.km $tgt_dir/$train_split.km diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh new file mode 100644 index 0000000..dbd17a2 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env zsh +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +lg=$1 +text_path=$2 +target_dir=$3 +min_phones=$4 +phonemizer=$5 +lid_path=$6 +sil_prob=$7 + +if [ -z "$lid_path" ]; then + lid_path="lid.187.bin" +fi + +ph_lg=${lg:l} +if test "$lg" = 'fr'; then + ph_lg='fr-fr' +elif test "$lg" = 'en'; then + ph_lg='en-us' +elif test "$lg" = 'pt'; then + ph_lg='pt-br' +fi + +ESPEAK_PATH='' +if test "$phonemizer" = 'espeak'; then + ESPEAK_PATH=$(which espeak) +elif test "$phonemizer" = 'espeak-ng'; then + ESPEAK_PATH=$(which espeak-ng) +elif test "$phonemizer" = 'G2P'; then + ESPEAK_PATH='' +else + echo "Unknown phonemizer $phonemizer. Valid options are espeak, espean-ng and G2P" + exit 1 +fi + +echo $lg +echo $ph_lg +echo $text_path +echo $target_dir +echo "min phone seen threshold is $min_phones" + +mkdir -p $target_dir +python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py --lang $lg --fasttext-model $lid_path < $text_path | grep -v '\-\-\-' >! $target_dir/lm.upper.lid.txt +python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/lm.upper.lid.txt --only-source --destdir $target_dir --thresholdsrc 2 --padding-factor 1 --dict-only +cut -f1 -d' ' $target_dir/dict.txt | grep -v -x '[[:punct:]]*' | grep -Pv '\d\d\d\d\d+' >! $target_dir/words.txt + + +if [ -z "$ESPEAK_PATH" ]; then + python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py --compact < $target_dir/words.txt > $target_dir/phones.txt +else + # echoing 1 into corpus will prevent the mismatch lines between lexicon and phones in case the phonemizer fails + one=$(echo "1" | PHONEMIZER_ESPEAK_PATH=$ESPEAK_PATH phonemize -p ' ' -w '' -l $ph_lg --language-switch remove-flags) + sed 's/$/ 1/' $target_dir/words.txt | PHONEMIZER_ESPEAK_PATH=$ESPEAK_PATH phonemize -o $target_dir/phones.txt -p ' ' -w '' -l $ph_lg -j 70 --language-switch remove-flags + echo "one is ${one}" + sed -i "s/${one}$//" $target_dir/phones.txt +fi + +paste $target_dir/words.txt $target_dir/phones.txt >! $target_dir/lexicon.lst + +python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/phones.txt --only-source --destdir $target_dir/phones --thresholdsrc $min_phones --padding-factor 1 --dict-only + +python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/filter_lexicon.py -d $target_dir/phones/dict.txt < $target_dir/lexicon.lst >! $target_dir/lexicon_filtered.lst +python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py -s $sil_prob --surround --lexicon $target_dir/lexicon_filtered.lst < $target_dir/lm.upper.lid.txt >! $target_dir/phones/lm.phones.filtered.txt +cp $target_dir/phones/dict.txt $target_dir/phones/dict.phn.txt +echo "<SIL> 0" >> $target_dir/phones/dict.phn.txt +python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/phones/lm.phones.filtered.txt --workers 70 --only-source --destdir $target_dir/phones --srcdict $target_dir/phones/dict.phn.txt + +$KENLM_ROOT/lmplz -o 4 < $target_dir/lm.upper.lid.txt --discount_fallback --prune 0 0 0 3 >! $target_dir/kenlm.wrd.o40003.arpa +$KENLM_ROOT/build_binary $target_dir/kenlm.wrd.o40003.arpa $target_dir/kenlm.wrd.o40003.bin + +lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_words_sil lm_arpa=$target_dir/kenlm.wrd.o40003.arpa wav2letter_lexicon=$target_dir/lexicon_filtered.lst data_dir=$target_dir/phones in_labels=phn "blank_symbol='<SIL>'" +lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_words lm_arpa=$target_dir/kenlm.wrd.o40003.arpa wav2letter_lexicon=$target_dir/lexicon_filtered.lst data_dir=$target_dir/phones in_labels=phn + +$KENLM_ROOT/lmplz -o 4 < $target_dir/phones/lm.phones.filtered.txt --discount_fallback >! $target_dir/phones/lm.phones.filtered.04.arpa +$KENLM_ROOT/build_binary $target_dir/phones/lm.phones.filtered.04.arpa $target_dir/phones/lm.phones.filtered.04.bin +$KENLM_ROOT/lmplz -o 6 < $target_dir/phones/lm.phones.filtered.txt --discount_fallback >! $target_dir/phones/lm.phones.filtered.06.arpa +$KENLM_ROOT/build_binary $target_dir/phones/lm.phones.filtered.06.arpa $target_dir/phones/lm.phones.filtered.06.bin + +lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_phn_sil lm_arpa=$target_dir/phones/lm.phones.filtered.06.arpa data_dir=$target_dir/phones in_labels=phn "blank_symbol='<SIL>'" diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh new file mode 100644 index 0000000..d8f5d59 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +timit_root=$1 # assume it is the upper-cased version +tgt_dir=$2 +model=$3 + +set -eu + +setups="matched unmatched" +splits="test valid train train_text" + +tgt_dir=$(realpath $tgt_dir) +sph2wav=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +wav_dir=$tgt_dir/wav + + +mkdir -p $tgt_dir $wav_dir +find $timit_root/{TRAIN,TEST} -iname "*.WAV" > $tgt_dir/all_sph.flist +cat $tgt_dir/all_sph.flist | sed -e 's#//*#/#g' -e 's#.*/\([^/]*\)/\([^/]*\).WAV#\1_\2#g' > $tgt_dir/all.uid +paste -d' ' $tgt_dir/{all_sph.flist,all.uid} | \ + awk -v sph2wav=$sph2wav -v wav_dir=$wav_dir '{print sph2wav " -f wav " $1 " > " wav_dir "/" $2 ".wav"}' \ + > $tgt_dir/sph2wav.sh +bash $tgt_dir/sph2wav.sh +cat $tgt_dir/all.uid | awk -v wav_dir=$(pwd)/$wav_dir '{print $1" "wav_dir"/"$1".wav"}' | sort > $tgt_dir/all_wav.scp +cut -d' ' -f2 $tgt_dir/all_wav.scp | xargs -I{} soxi -s {} > $tgt_dir/all.dur +paste -d' ' $tgt_dir/{all_wav.scp,all.dur} > $tgt_dir/all_wav_dur.scp +rm $tgt_dir/{all.uid,all_sph.flist,sph2wav.sh} + +find $timit_root/{TRAIN,TEST} -iname "*.PHN" > $tgt_dir/all_phn60.flist +while read line; do + if [ ! -f $line ]; then + >&2 echo "Cannot find transcription file '$line'" && exit 1; + fi + cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;' +done < $tgt_dir/all_phn60.flist > $tgt_dir/all.phn60 +cat $tgt_dir/all_phn60.flist | sed -e 's#//*#/#g' -e 's#.*/\([^/]*\)/\([^/]*\).PHN#\1_\2#g' | \ + paste -d' ' - $tgt_dir/all.phn60 | \ + $KALDI_ROOT/egs/timit/s5/local/timit_norm_trans.pl -i - -m $KALDI_ROOT/egs/timit/s5/conf/phones.60-48-39.map -to 39 | \ + sort > $tgt_dir/all.phn +echo "done preparing wav and 39-phone transcripts" + + +for s in $setups; do + mkdir -p $tgt_dir/$s + for x in $splits; do + uid_path=config/timit_${s}/${x}.uid + grep -w -f $uid_path $tgt_dir/all.phn | cut -d' ' -f2- > $tgt_dir/$s/$x.phn + ln -sf $(realpath $tgt_dir/$s/$x.phn) $tgt_dir/$s/$x.wrd + + echo "/" > $tgt_dir/$s/$x.tsv && grep -w -f $uid_path $tgt_dir/all_wav_dur.scp | cut -d' ' -f2- | sed 's# #\t#' >> $tgt_dir/$s/$x.tsv + done + + for x in $splits; do + cat $tgt_dir/$s/$x.phn + done | tr ' ' '\n' | sort -u | awk '{print $1" "1}' > $tgt_dir/$s/dict.phn.txt + ln -sf $(realpath $tgt_dir/$s/dict.phn.txt) $tgt_dir/$s/dict.wrd.txt +done +echo "done preparing unmatched and matched setups for TIMIT" + + +for s in $setups; do + zsh scripts/prepare_audio.sh $tgt_dir/$s $tgt_dir/$s/feat $model + + lm_dir=$tgt_dir/$s/phones + fst_dir=$tgt_dir/$s/fst/phn_to_phn + + python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $tgt_dir/$s/train_text.phn --workers 10 --only-source --destdir $lm_dir --srcdict $tgt_dir/$s/dict.phn.txt + $KENLM_ROOT/lmplz -o 3 < $tgt_dir/$s/train_text.phn --discount_fallback >$lm_dir/train_text_phn.03.arpa + $KENLM_ROOT/build_binary $lm_dir/train_text_phn.03.arpa $lm_dir/train_text_phn.03.bin + $KENLM_ROOT/lmplz -o 4 < $tgt_dir/$s/train_text.phn --discount_fallback >$lm_dir/train_text_phn.04.arpa + $KENLM_ROOT/build_binary $lm_dir/train_text_phn.04.arpa $lm_dir/train_text_phn.04.bin + + python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$fst_dir lm_arpa=$lm_dir/train_text_phn.03.arpa data_dir=$tgt_dir/$s in_labels=phn +done +echo "done preprocessing audio and text for wav2vec-U" diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py b/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py new file mode 100644 index 0000000..fac88b9 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +get intervals from .vads file, specify output data, and this script removes silences and saves the audio data in out path folder +paths=shards/train.tsv +vads=shards/train.vads +python remove_silence.py --paths $paths --vads $vads +""" + +import os +import argparse +import torch +import torchaudio +import tqdm + + +parser = argparse.ArgumentParser() +parser.add_argument("--tsv", default="", type=str) +parser.add_argument("--vads", default="", type=str) +parser.add_argument("--out", type=str) +params = parser.parse_args() + +# load paths +paths = [] +with open(params.tsv) as f: + root = next(f).rstrip() + for line in f: + paths.append(os.path.join(root, line.rstrip().split("\t")[0])) + +# load vads +list_intervals = [] +with open(params.vads) as f: + for line in f: + interval = [ + [int(w.split(":")[0]), int(w.split(":")[1])] for w in line.rstrip().split() + ] + list_intervals.append(interval) + + +# load audio and keep only intervals (i.e. remove silences) +for i in tqdm.trange(len(paths)): + data, _ = torchaudio.load(paths[i]) + if len(list_intervals[i]) > 0: + data_filtered = torch.cat( + [data[0][int(it[0]) : int(it[1])] for it in list_intervals[i]] + ).unsqueeze(0) + else: + data_filtered = data + + # YOU MAY NEED TO MODIFY THIS TO GET THE RIGHT SUBPATH + # outpath = params.out + '/'.join(paths[i].split('/')[-1]) + outpath = params.out + "/" + "/".join(paths[i].split("/")[-2:]) + + if not os.path.isdir("/".join(outpath.split("/")[:-1])): + os.makedirs("/".join(outpath.split("/")[:-1])) + if not os.path.exists(outpath): + torchaudio.save(outpath, data_filtered, sample_rate=16000) + else: + print(outpath, "exists!") diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/vads.py b/fairseq/examples/wav2vec/unsupervised/scripts/vads.py new file mode 100644 index 0000000..2398da9 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/vads.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import sys + +from copy import deepcopy +from scipy.signal import lfilter + +import numpy as np +from tqdm import tqdm +import soundfile as sf +import os.path as osp + + +def get_parser(): + parser = argparse.ArgumentParser(description="compute vad segments") + parser.add_argument( + "--rvad-home", + "-r", + help="path to rvad home (see https://github.com/zhenghuatan/rVADfast)", + required=True, + ) + + return parser + + +def rvad(speechproc, path): + winlen, ovrlen, pre_coef, nfilter, nftt = 0.025, 0.01, 0.97, 20, 512 + ftThres = 0.5 + vadThres = 0.4 + opts = 1 + + data, fs = sf.read(path) + assert fs == 16_000, "sample rate must be 16khz" + ft, flen, fsh10, nfr10 = speechproc.sflux(data, fs, winlen, ovrlen, nftt) + + # --spectral flatness -- + pv01 = np.zeros(ft.shape[0]) + pv01[np.less_equal(ft, ftThres)] = 1 + pitch = deepcopy(ft) + + pvblk = speechproc.pitchblockdetect(pv01, pitch, nfr10, opts) + + # --filtering-- + ENERGYFLOOR = np.exp(-50) + b = np.array([0.9770, -0.9770]) + a = np.array([1.0000, -0.9540]) + fdata = lfilter(b, a, data, axis=0) + + # --pass 1-- + noise_samp, noise_seg, n_noise_samp = speechproc.snre_highenergy( + fdata, nfr10, flen, fsh10, ENERGYFLOOR, pv01, pvblk + ) + + # sets noisy segments to zero + for j in range(n_noise_samp): + fdata[range(int(noise_samp[j, 0]), int(noise_samp[j, 1]) + 1)] = 0 + + vad_seg = speechproc.snre_vad( + fdata, nfr10, flen, fsh10, ENERGYFLOOR, pv01, pvblk, vadThres + ) + return vad_seg, data + + +def main(): + parser = get_parser() + args = parser.parse_args() + + sys.path.append(args.rvad_home) + import speechproc + + stride = 160 + lines = sys.stdin.readlines() + root = lines[0].rstrip() + for fpath in tqdm(lines[1:]): + path = osp.join(root, fpath.split()[0]) + vads, wav = rvad(speechproc, path) + + start = None + vad_segs = [] + for i, v in enumerate(vads): + if start is None and v == 1: + start = i * stride + elif start is not None and v == 0: + vad_segs.append((start, i * stride)) + start = None + if start is not None: + vad_segs.append((start, len(wav))) + + print(" ".join(f"{v[0]}:{v[1]}" for v in vad_segs)) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py new file mode 100644 index 0000000..a5dd7ae --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import numpy as np +import tqdm +import torch +import sys + +import faiss +import torch.nn.functional as F + +from wav2vec_cluster_faiss import parse_faiss_specs, Wav2VecFeatureReader + + +def get_parser(): + parser = argparse.ArgumentParser(description="apply clusters") + # fmt: off + parser.add_argument('data', help='location of tsv files') + parser.add_argument('--split', help='split to process', required=True) + parser.add_argument('--labels', help='split to process', default="phn") + parser.add_argument('--path', help='path to pca and centroids', required=True) + parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec model (if using wav2vec features)', required=True) + parser.add_argument('--layer', '-l', type=int, help='which layer to read', default=14) + parser.add_argument('--max-tsz', type=int, help='batch kmeans up to this much', default=14) + # fmt: on + + return parser + + +def get_iterator(args): + label_path = osp.join(args.data, f"{args.split}.{args.labels}") + if osp.exists(label_path): + lp = open(label_path, "r") + else: + lp = None + + with open(osp.join(args.data, f"{args.split}.tsv"), "r") as fp: + lines = fp.read().split("\n") + root = lines.pop(0).strip() + files = [line.rstrip() for line in lines if len(line) > 0] + + if lp is not None: + lbls = [line.rstrip() for line in lp] + else: + lbls = [None] * len(files) + + num = len(files) + reader = Wav2VecFeatureReader(args.checkpoint, args.layer) + + def iterate(): + for fname, lbl in zip(files, lbls): + file = osp.join(root, fname.split("\t")[0]) + feats = reader.get_feats(file) + yield feats.data, fname, lbl + + return iterate, num, root + + +def main(): + parser = get_parser() + args = parser.parse_args() + + spec = osp.basename(args.path) + + try: + faiss_spec = parse_faiss_specs(spec.rstrip("/"))[0] + except: + print(spec) + raise + + print("Faiss Spec:", faiss_spec, file=sys.stderr) + + if faiss_spec.pca: + A = torch.from_numpy(np.load(osp.join(args.path, "pca_A.npy"))).cuda() + b = torch.from_numpy(np.load(osp.join(args.path, "pca_b.npy"))).cuda() + print("Loaded PCA", file=sys.stderr) + + centroids = np.load(osp.join(args.path, "centroids.npy")) + print("Loaded centroids", centroids.shape, file=sys.stderr) + + res = faiss.StandardGpuResources() + index_flat = ( + faiss.IndexFlatL2(centroids.shape[1]) + if not faiss_spec.sphere + else faiss.IndexFlatIP(centroids.shape[1]) + ) + faiss_index = faiss.index_cpu_to_gpu(res, 0, index_flat) + faiss_index.add(centroids) + + generator, num, root = get_iterator(args) + iterator = generator() + + had_labels = False + label_path = osp.join(args.path, f"{args.split}.{args.labels}") + + with torch.no_grad(): + with open(osp.join(args.path, f"{args.split}.src"), "w") as fp, open( + osp.join(args.path, f"{args.split}.tsv"), "w" + ) as pp, open(label_path, "w") as lp: + print(root, file=pp) + for f, fname, lbl in tqdm.tqdm(iterator, total=num): + if faiss_spec.pca: + f = torch.mm(f, A) + b + if faiss_spec.norm: + f = F.normalize(f, p=2, dim=-1) + + f = f.cpu().numpy() + + _, z = faiss_index.search(f, 1) + + print(" ".join(str(x.item()) for x in z), file=fp) + print(fname, file=pp) + + if lbl is not None: + print(lbl, file=lp) + had_labels = True + if not had_labels: + os.remove(label_path) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py new file mode 100644 index 0000000..632a69e --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import gc +import os +import os.path as osp +import random +import numpy as np +import tqdm +import torch + +from collections import namedtuple + +import faiss + +import fairseq +import soundfile as sf + + +def get_parser(): + parser = argparse.ArgumentParser( + description="compute kmeans codebook from kaldi-computed feats" + ) + # fmt: off + parser.add_argument('data', help='location of tsv files') + parser.add_argument('--save-dir', help='where to save the output', required=True) + parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec model (if using wav2vec features)', required=True) + parser.add_argument('--sample-pct', '-r', type=float, help='percentage of timesteps to sample', default=0) + parser.add_argument('--layer', '-l', type=int, help='which layer to read', default=14) + parser.add_argument('--faiss-specs', '-f', type=str, + help='faiss index specs; separated by space ' + 'format is: PCAx_NORM_CLUSx_SPHERICAL -> ' + 'PCAx if exists first apply PCA ' + 'NORM if exists, normalize the vector by L2 norm ' + 'CLUSx must exist, cluster to x clusters ' + 'SPEHRICAL if exists, apply spherical kmeans', + default='l2') + # fmt: on + + return parser + + +faiss_spec = namedtuple("faiss_spec", ["pca", "norm", "n_clus", "sphere", "spec_str"]) + + +def parse_faiss_specs(specs_str): + specs = [] + for ss in specs_str.split(): + comps = ss.split("_") + pca = 0 + norm = False + n_clus = 0 + sphere = False + for c in comps: + if c.startswith("PCA"): + pca = int(c[3:]) + elif c == "NORM": + norm = True + elif c.startswith("CLUS"): + n_clus = int(c[4:]) + elif c == "SPHERICAL": + sphere = True + assert n_clus > 0 + specs.append( + faiss_spec(pca=pca, norm=norm, n_clus=n_clus, sphere=sphere, spec_str=ss) + ) + return specs + + +class Wav2VecFeatureReader(object): + def __init__(self, cp_file, layer): + state = fairseq.checkpoint_utils.load_checkpoint_to_cpu(cp_file) + + self.layer = layer + + if "cfg" in state: + w2v_args = state["cfg"] + task = fairseq.tasks.setup_task(w2v_args.task) + model = task.build_model(w2v_args.model) + else: + w2v_args = state["args"] + task = fairseq.tasks.setup_task(w2v_args) + model = task.build_model(w2v_args) + model.load_state_dict(state["model"], strict=True) + model.eval() + model.cuda() + self.model = model + + def read_audio(self, fname): + """Load an audio file and return PCM along with the sample rate""" + wav, sr = sf.read(fname) + assert sr == 16e3 + + return wav + + def get_feats(self, loc): + x = self.read_audio(loc) + with torch.no_grad(): + source = torch.from_numpy(x).view(1, -1).float().cuda() + res = self.model( + source=source, mask=False, features_only=True, layer=self.layer + ) + return res["layer_results"][self.layer][0].squeeze(1) + + +def get_iterator(args): + with open(args.data, "r") as fp: + lines = fp.read().split("\n") + root = lines.pop(0).strip() + files = [osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0] + + if getattr(args, "sample_pct", 0) > 0: + files = random.sample(files, int(args.sample_pct * len(files))) + num = len(files) + reader = Wav2VecFeatureReader(args.checkpoint, args.layer) + + def iterate(): + for fname in files: + feats = reader.get_feats(fname) + yield feats.cpu().numpy() + + return iterate, num + + +def main(): + parser = get_parser() + args = parser.parse_args() + + faiss_specs = parse_faiss_specs(args.faiss_specs) + print("Faiss Specs:", faiss_specs) + + feat_path = osp.join(args.save_dir, "features") + if osp.exists(feat_path + ".npy"): + feats = np.load(feat_path + ".npy") + else: + generator, num = get_iterator(args) + iterator = generator() + + feats = [] + for f in tqdm.tqdm(iterator, total=num): + feats.append(f) + + del iterator + del generator + + feats = np.concatenate(feats) + + print(feats.shape) + + os.makedirs(args.save_dir, exist_ok=True) + # np.save(feat_path, feats) + + gc.collect() + torch.cuda.empty_cache() + + reload = False + for spec in faiss_specs: + print("Processing spec", spec) + + if reload: + print("Reloading...") + del feats + gc.collect() + feats = np.load(feat_path + ".npy") + + save_path = osp.join(args.save_dir, spec.spec_str) + os.makedirs(save_path, exist_ok=True) + d = feats.shape[-1] + x = feats + if spec.pca > 0: + print("Computing PCA") + pca = faiss.PCAMatrix(d, spec.pca) + pca.train(x) + d = spec.pca + b = faiss.vector_to_array(pca.b) + A = faiss.vector_to_array(pca.A).reshape(pca.d_out, pca.d_in) + np.save(osp.join(save_path, "pca_A"), A.T) + np.save(osp.join(save_path, "pca_b"), b) + print("Applying PCA") + x = pca.apply_py(x) + + if spec.norm: + reload = spec.pca <= 0 + print("Normalizing") + faiss.normalize_L2(x) + + print("Computing kmeans") + kmeans = faiss.Kmeans( + d, + spec.n_clus, + niter=50, + verbose=True, + spherical=spec.sphere, + max_points_per_centroid=feats.shape[0], + gpu=True, + nredo=3, + ) + kmeans.train(x) + np.save(osp.join(save_path, "centroids"), kmeans.centroids) + del kmeans + del x + gc.collect() + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py new file mode 100644 index 0000000..b07e274 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import os.path as osp +import tqdm +import torch +import torch.nn.functional as F +from shutil import copyfile + +from npy_append_array import NpyAppendArray + +import fairseq +import soundfile as sf + + +def get_parser(): + parser = argparse.ArgumentParser( + description="compute kmeans codebook from kaldi-computed feats" + ) + # fmt: off + parser.add_argument('data', help='location of tsv files') + parser.add_argument('--split', help='which split to read', required=True) + parser.add_argument('--save-dir', help='where to save the output', required=True) + parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec ctc model', required=True) + parser.add_argument('--layer', type=int, default=14, help='which layer to use') + # fmt: on + + return parser + + +class Wav2VecFeatureReader(object): + def __init__(self, cp_file, layer): + model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( + [cp_file] + ) + model = model[0] + model.eval() + model.cuda() + self.model = model + self.task = task + self.layer = layer + + def read_audio(self, fname): + """Load an audio file and return PCM along with the sample rate""" + wav, sr = sf.read(fname) + assert sr == 16e3 + + return wav + + def get_feats(self, loc): + x = self.read_audio(loc) + with torch.no_grad(): + source = torch.from_numpy(x).float().cuda() + if self.task.cfg.normalize: + assert source.dim() == 1, source.dim() + with torch.no_grad(): + source = F.layer_norm(source, source.shape) + source = source.view(1, -1) + + m_res = self.model(source=source, mask=False, features_only=True, layer=self.layer) + return m_res["x"].squeeze(0).cpu() + + +def get_iterator(args): + with open(osp.join(args.data, args.split) + ".tsv", "r") as fp: + lines = fp.read().split("\n") + root = lines.pop(0).strip() + files = [osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0] + + num = len(files) + reader = Wav2VecFeatureReader(args.checkpoint, args.layer) + + def iterate(): + for fname in files: + w2v_feats = reader.get_feats(fname) + yield w2v_feats + + return iterate, num + + +def main(): + parser = get_parser() + args = parser.parse_args() + + os.makedirs(args.save_dir, exist_ok=True) + + def create_files(dest): + copyfile(osp.join(args.data, args.split) + ".tsv", dest + ".tsv") + if osp.exists(osp.join(args.data, args.split) + ".wrd"): + copyfile(osp.join(args.data, args.split) + ".wrd", dest + ".wrd") + if osp.exists(osp.join(args.data, args.split) + ".phn"): + copyfile(osp.join(args.data, args.split) + ".phn", dest + ".phn") + + if osp.exists(dest + ".npy"): + os.remove(dest + ".npy") + npaa = NpyAppendArray(dest + ".npy") + return npaa + + save_path = osp.join(args.save_dir, args.split) + npaa = create_files(save_path) + + generator, num = get_iterator(args) + iterator = generator() + + with open(save_path + ".lengths", "w") as l_f: + for w2v_feats in tqdm.tqdm(iterator, total=num): + print(len(w2v_feats), file=l_f) + + if len(w2v_feats) > 0: + npaa.append(w2v_feats.numpy()) + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wer.py b/fairseq/examples/wav2vec/unsupervised/scripts/wer.py new file mode 100644 index 0000000..613ab50 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/wer.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Implement unsupervised metric for decoding hyperparameter selection: + $$ alpha * LM_PPL + ViterbitUER(%) * 100 $$ +""" +import argparse +import logging +import sys + +import editdistance + +logging.root.setLevel(logging.INFO) +logging.basicConfig(stream=sys.stdout, level=logging.INFO) +logger = logging.getLogger(__name__) + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--hypo", help="hypo transcription", required=True) + parser.add_argument( + "-r", "--reference", help="reference transcription", required=True + ) + return parser + + +def compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p): + d_cnt = 0 + w_cnt = 0 + w_cnt_h = 0 + for uid in hyp_uid_to_tra: + ref = ref_uid_to_tra[uid].split() + if g2p is not None: + hyp = g2p(hyp_uid_to_tra[uid]) + hyp = [p for p in hyp if p != "'" and p != " "] + hyp = [p[:-1] if p[-1].isnumeric() else p for p in hyp] + else: + hyp = hyp_uid_to_tra[uid].split() + d_cnt += editdistance.eval(ref, hyp) + w_cnt += len(ref) + w_cnt_h += len(hyp) + wer = float(d_cnt) / w_cnt + logger.debug( + ( + f"wer = {wer * 100:.2f}%; num. of ref words = {w_cnt}; " + f"num. of hyp words = {w_cnt_h}; num. of sentences = {len(ref_uid_to_tra)}" + ) + ) + return wer + + +def main(): + args = get_parser().parse_args() + + errs = 0 + count = 0 + with open(args.hypo, "r") as hf, open(args.reference, "r") as rf: + for h, r in zip(hf, rf): + h = h.rstrip().split() + r = r.rstrip().split() + errs += editdistance.eval(r, h) + count += len(r) + + logger.info(f"UER: {errs / count * 100:.2f}%") + + +if __name__ == "__main__": + main() + + +def load_tra(tra_path): + with open(tra_path, "r") as f: + uid_to_tra = {} + for line in f: + uid, tra = line.split(None, 1) + uid_to_tra[uid] = tra + logger.debug(f"loaded {len(uid_to_tra)} utterances from {tra_path}") + return uid_to_tra diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py b/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py new file mode 100644 index 0000000..f834714 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + + +def main(): + for line in sys.stdin: + print(" ".join(list(line.strip().replace(" ", "|"))) + " |") + + +if __name__ == "__main__": + main() diff --git a/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py b/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py new file mode 100644 index 0000000..6d7dd62 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .unpaired_audio_text import UnpairedAudioText + + +__all__ = [ + "UnpairedAudioText", +] diff --git a/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py b/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py new file mode 100644 index 0000000..b6b65d5 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py @@ -0,0 +1,452 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +from dataclasses import dataclass, field +import logging +import math +import os +from typing import Optional +import torch + +from fairseq.logging import metrics +from fairseq.tasks import FairseqTask, register_task +from ..data import ExtractedFeaturesDataset, RandomInputDataset + +from fairseq.data import ( + Dictionary, + data_utils, + StripTokenDataset, +) +from fairseq.dataclass import FairseqDataclass +from fairseq.distributed.utils import get_data_parallel_world_size +from omegaconf import MISSING + +from examples.speech_recognition.kaldi.kaldi_decoder import ( + KaldiDecoder, + KaldiDecoderConfig, +) + + +logger = logging.getLogger(__name__) + + +@dataclass +class DecodingConfig(FairseqDataclass): + kenlm_path: Optional[str] = None + lm_weight: float = 0 + blank_weight: float = 0 + + +@dataclass +class UnpairedAudioTextConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to data directory containing audio"} + ) + text_data: str = field( + default=MISSING, metadata={"help": "path to data directory containing text"} + ) + max_length: Optional[int] = None + labels: Optional[str] = field( + default=None, + metadata={"help": "extension of the label file to load, used for fine-tuning"}, + ) + aux_target_postfix: Optional[str] = field( + default=None, + metadata={"help": "auxaliry target filename extension"}, + ) + unfiltered: bool = field( + default=False, metadata={"help": "load data with _unfiltered suffix"} + ) + ctc_eval: bool = field( + default=False, metadata={"help": "eval UER as if computed by CTC"} + ) + sort_by_length: bool = field( + default=True, metadata={"help": "sort examples by length of audio timesteps"} + ) + shuffle: bool = field(default=True, metadata={"help": "shuffle examples"}) + append_eos: bool = field(default=False, metadata={"help": "append eos"}) + uppercase: Optional[bool] = field( + default=False, metadata={"help": "uppercase for LM score computation"} + ) + skipwords: Optional[str] = field( + default="", + metadata={ + "help": "comma-separated words to be removed for LM score computation" + }, + ) + kenlm_path: Optional[str] = None + vocab_usage_power: float = 2 + + word_decoder_config: Optional[KaldiDecoderConfig] = None + word_kenlm_path: Optional[str] = None + + decoding_config: DecodingConfig = DecodingConfig() + + +@register_task("unpaired_audio_text", dataclass=UnpairedAudioTextConfig) +class UnpairedAudioText(FairseqTask): + """ """ + + cfg: UnpairedAudioTextConfig + + def __init__( + self, + cfg: UnpairedAudioTextConfig, + source_dictionary=None, + target_dictionary=None, + ): + super().__init__(cfg) + + self._target_dictionary = target_dictionary + self._source_dictionary = source_dictionary + self.num_symbols = ( + len([s for s in target_dictionary.symbols if not s.startswith("madeup")]) + - target_dictionary.nspecial + ) + self.sil_id = ( + target_dictionary.index("<SIL>") if "<SIL>" in target_dictionary else -1 + ) + self.kenlm = None + if cfg.kenlm_path is not None: + import kenlm + + self.kenlm = kenlm.Model(cfg.kenlm_path) + + self.word_kenlm = None + if cfg.word_kenlm_path is not None: + import kenlm + + self.word_kenlm = kenlm.Model(cfg.word_kenlm_path) + + self.uppercase = cfg.uppercase + self.skipwords = set(cfg.skipwords.split(",")) + + def str_postprocess(s): + s = " ".join(w for w in s.split() if w not in self.skipwords) + s = s.upper() if self.uppercase else s + return s + + self.str_postprocess = str_postprocess + self.compute_lm_score = lambda s: self.kenlm.score(self.str_postprocess(s)) + + self.compute_word_score = None + if cfg.word_decoder_config is not None: + self.kaldi_decoder = KaldiDecoder(cfg.word_decoder_config, beam=10) + + def compute_word_score(logits, padding): + res = self.kaldi_decoder.decode(logits, padding) + for r in res: + r = r.result() + assert len(r) == 1 + r = r[0] + yield r["score"], r["words"] + + self.compute_word_score = compute_word_score + + @classmethod + def setup_task(cls, cfg: UnpairedAudioTextConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + dict_path = os.path.join(cfg.text_data, "dict.txt") + if os.path.exists(dict_path): + target_dictionary = Dictionary.load(dict_path) + else: + dict_path = os.path.join(cfg.data, f"dict.{cfg.labels}.txt") + target_dictionary = Dictionary.load(dict_path) + + return cls(cfg, target_dictionary=target_dictionary) + + def optimizer_step(self, optimizer, model, update_num): + if hasattr(model, "get_groups_for_update"): + groups = model.get_groups_for_update(update_num) + optimizer.step(groups={groups}) + else: + optimizer.step() + + def valid_step(self, sample, model, criterion): + res = model( + **sample["net_input"], + dense_x_only=True, + ) + + dense_x = res["logits"] + padding_mask = res["padding_mask"] + + word_scores = None + if self.compute_word_score is not None: + word_scores = self.compute_word_score(dense_x.cpu(), padding_mask.cpu()) + + z = dense_x.argmax(-1) + z[padding_mask] = self.target_dictionary.pad() + + vocab_seen = torch.zeros(self.num_symbols, dtype=torch.bool) + + import editdistance + + c_err = 0 + c_len = 0 + pred_c_len = 0 + lm_score_sum = 0 + for i, (x, t, id) in enumerate( + zip( + z, + sample["target"] if "target" in sample else [None] * len(z), + sample["id"], + ) + ): + + if t is not None: + t = t[(t >= self.target_dictionary.nspecial)] + x = x[ + (x >= self.target_dictionary.nspecial) + & (x < (self.num_symbols + self.target_dictionary.nspecial)) + ] + if self.sil_id >= 0: + x = x[x != self.sil_id] + + vocab_seen[x - self.target_dictionary.nspecial] = True + + pred_units_arr = x + if self.cfg.ctc_eval: + pred_units_arr = pred_units_arr.unique_consecutive() + pred_units_arr = pred_units_arr[pred_units_arr != 0] + + if id == 0: + if t is not None: + logger.info(f"REF: {self.target_dictionary.string(t)}") + logger.info(f"HYP: {self.target_dictionary.string(pred_units_arr)}") + + if self.kenlm is not None: + if t is not None: + ref_lm_s = self.compute_lm_score( + self.target_dictionary.string(t) + ) + logger.info( + f"LM [REF]: {ref_lm_s}, {math.pow(10, -ref_lm_s / (len(t) + 1))}" + ) + + hyp_lm_s = self.compute_lm_score( + self.target_dictionary.string(pred_units_arr) + ) + logger.info( + f"LM [HYP]: {hyp_lm_s}, {math.pow(10, -hyp_lm_s / (len(pred_units_arr) + 1))}" + ) + + pred_units_arr = pred_units_arr.tolist() + + pred_c_len += len(pred_units_arr) + + if t is not None: + t = t.tolist() + c_err += editdistance.eval(pred_units_arr, t) + c_len += len(t) + else: + c_len = pred_c_len + + if self.kenlm is not None: + pred_str = self.target_dictionary.string(pred_units_arr) + lm_score = self.compute_lm_score(pred_str) + lm_score_sum += lm_score + + kaldi_score_sum = 0 + word_lm_sum = 0 + num_words = 0 + if word_scores is not None: + for score, words in word_scores: + kaldi_score_sum += score + num_words += len(words) + if self.word_kenlm is not None: + word_lm_sum += self.kenlm.score(" ".join(words)) + + try: + world_size = get_data_parallel_world_size() + except: + world_size = 1 + + logging_output = { + "loss": c_err, + "_num_char_errors": c_err, + "_num_chars": c_len, + "_num_pred_chars": pred_c_len, + "ntokens": c_len, + "nsentences": z.size(0), + "sample_size": c_len, + "_world_size": world_size, + "_lm_score_sum": lm_score_sum, + "_kaldi_score_sum": kaldi_score_sum, + "_word_lm_sum": word_lm_sum, + "_num_words": num_words, + "_vocab_seen": vocab_seen, + } + + return c_err, c_len, logging_output + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + task_cfg = task_cfg or self.cfg + + has_unpaired_text = os.path.exists( + os.path.join(self.cfg.text_data, f"{split}.idx") + ) + + self.datasets[split] = ExtractedFeaturesDataset( + path=data_path, + split=split, + min_length=3, + max_length=task_cfg.max_length, + labels=None if has_unpaired_text else task_cfg.labels, + label_dict=self.target_dictionary, + shuffle=getattr(task_cfg, "shuffle", True), + sort_by_length=task_cfg.sort_by_length, + aux_target_postfix=task_cfg.aux_target_postfix, + ) + + logger.info(f"split {split} has unpaired text? {has_unpaired_text}") + if has_unpaired_text: + text_dataset = data_utils.load_indexed_dataset( + os.path.join(self.cfg.text_data, split), self.target_dictionary + ) + text_dataset = StripTokenDataset(text_dataset, self.target_dictionary.eos()) + self.datasets[split] = RandomInputDataset( + self.datasets[split], + text_dataset, + ["random_label"], + add_to_input=True, + pad_idx=self.target_dictionary.pad(), + ) + + @property + def source_dictionary(self): + return self._source_dictionary + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self._target_dictionary + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + zero = torch.scalar_tensor(0.0) + num_char_errors = sum( + log.get("_num_char_errors", zero) for log in logging_outputs + ) + num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs) + num_word_errors = sum( + log.get("_num_word_errors", zero) for log in logging_outputs + ) + num_words = sum(log.get("_num_words", zero) for log in logging_outputs) + num_pred_chars = sum( + log.get("_num_pred_chars", zero) for log in logging_outputs + ) + + lm_score_sum = sum(log.get("_lm_score_sum", zero) for log in logging_outputs) + vocab_seen = ( + sum(log.get("_vocab_seen", zero) for log in logging_outputs) + .bool() + .sum() + .item() + ) + kaldi_score_sum = sum( + log.get("_kaldi_score_sum", zero) for log in logging_outputs + ) + word_lm_sum = sum(log.get("_word_lm_sum", zero) for log in logging_outputs) + + metrics.log_scalar_sum("_num_char_errors", num_char_errors) + metrics.log_scalar_sum("_num_chars", num_chars) + metrics.log_scalar_sum("_num_word_errors", num_word_errors) + metrics.log_scalar_sum("_num_words", num_words) + + metrics.log_scalar_sum("lm_score_sum", lm_score_sum) + metrics.log_scalar_sum("num_pred_chars", num_pred_chars) + + if self.cfg.word_kenlm_path is not None: + metrics.log_scalar_sum("kaldi_score_sum", kaldi_score_sum) + metrics.log_scalar_sum("word_lm_sum", word_lm_sum) + + if num_chars > 0: + metrics.log_derived( + "uer", + lambda meters: meters["_num_char_errors"].sum + * 100.0 + / meters["_num_chars"].sum + if meters["_num_chars"].sum > 0 + else float("nan"), + ) + + if lm_score_sum < 0 and vocab_seen > 0: + metrics.log_scalar("vocab_seen_pct", vocab_seen / self.num_symbols) + + metrics.log_derived( + "weighted_lm_ppl", + lambda meters: math.pow( + 10, + -meters["lm_score_sum"].sum + / ( + meters["num_pred_chars"].sum + meters["nsentences"].sum + ), # account for </s> + ) + / meters["vocab_seen_pct"].avg ** self.cfg.vocab_usage_power, + ) + + metrics.log_derived( + "lm_ppl", + lambda meters: math.pow( + 10, + -meters["lm_score_sum"].sum + / ( + meters["num_pred_chars"].sum + meters["nsentences"].sum + ), # account for </s> + ), + ) + else: + metrics.log_derived("weighted_lm_ppl", lambda meters: float("inf")) + + if num_words > 0: + if word_lm_sum != 0: + metrics.log_derived( + "word_lm_ppl", + lambda meters: math.pow( + 10, + -meters["word_lm_sum"].sum + / ( + meters["_num_words"].sum + meters["nsentences"].sum + ), # account for </s> + ), + ) + metrics.log_derived( + "weighted_word_lm_ppl", + lambda meters: math.pow( + 10, + -meters["word_lm_sum"].sum + / ( + meters["_num_words"].sum + meters["nsentences"].sum + ), # account for </s> + ) + / meters["vocab_seen_pct"].avg ** self.cfg.vocab_usage_power, + ) + + if self.cfg.word_kenlm_path is not None: + metrics.log_derived( + "kaldi_score", + lambda meters: meters["kaldi_score_sum"].sum + / meters["nsentences"].sum, + ) + + def build_model(self, cfg: FairseqDataclass, from_checkpoint=False): + model = super().build_model(cfg) + + return model diff --git a/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py b/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py new file mode 100644 index 0000000..0611297 --- /dev/null +++ b/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py @@ -0,0 +1,714 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Run inference for pre-processed data with a trained model. +""" + +import ast +from collections import namedtuple +from dataclasses import dataclass, field +from enum import Enum, auto +import hydra +from hydra.core.config_store import ConfigStore +import logging +import math +import os +from omegaconf import OmegaConf +from typing import Optional +import sys + +import editdistance +import torch + +from hydra.core.hydra_config import HydraConfig + +from fairseq import checkpoint_utils, progress_bar, tasks, utils +from fairseq.data.data_utils import post_process +from fairseq.dataclass.configs import FairseqDataclass, FairseqConfig +from fairseq.logging.meters import StopwatchMeter +from omegaconf import open_dict + +from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoderConfig + +logging.root.setLevel(logging.INFO) +logging.basicConfig(stream=sys.stdout, level=logging.INFO) +logger = logging.getLogger(__name__) + + +class DecoderType(Enum): + VITERBI = auto() + KENLM = auto() + FAIRSEQ = auto() + KALDI = auto() + + +@dataclass +class UnsupGenerateConfig(FairseqDataclass): + fairseq: FairseqConfig = FairseqConfig() + lm_weight: float = field( + default=2.0, + metadata={"help": "language model weight"}, + ) + w2l_decoder: DecoderType = field( + default=DecoderType.VITERBI, + metadata={"help": "type of decoder to use"}, + ) + kaldi_decoder_config: Optional[KaldiDecoderConfig] = None + lexicon: Optional[str] = field( + default=None, + metadata={ + "help": "path to lexicon. This is also used to 'phonemize' for unsupvised param tuning" + }, + ) + lm_model: Optional[str] = field( + default=None, + metadata={"help": "path to language model (kenlm or fairseq)"}, + ) + decode_stride: Optional[float] = field( + default=None, + metadata={"help": "changing the decoding frequency of the generator"}, + ) + unit_lm: bool = field( + default=False, + metadata={"help": "whether to use unit lm"}, + ) + beam_threshold: float = field( + default=50.0, + metadata={"help": "beam score threshold"}, + ) + beam_size_token: float = field( + default=100.0, + metadata={"help": "max tokens per beam"}, + ) + beam: int = field( + default=5, + metadata={"help": "decoder beam size"}, + ) + nbest: int = field( + default=1, + metadata={"help": "number of results to return"}, + ) + word_score: float = field( + default=1.0, + metadata={"help": "word score to add at end of word"}, + ) + unk_weight: float = field( + default=-math.inf, + metadata={"help": "unknown token weight"}, + ) + sil_weight: float = field( + default=0.0, + metadata={"help": "silence token weight"}, + ) + targets: Optional[str] = field( + default=None, + metadata={"help": "extension of ground truth labels to compute UER"}, + ) + results_path: Optional[str] = field( + default=None, + metadata={"help": "where to store results"}, + ) + post_process: Optional[str] = field( + default=None, + metadata={"help": "how to post process results"}, + ) + vocab_usage_power: float = field( + default=2, + metadata={"help": "for unsupervised param tuning"}, + ) + + viterbi_transcript: Optional[str] = field( + default=None, + metadata={"help": "for unsupervised param tuning"}, + ) + min_lm_ppl: float = field( + default=0, + metadata={"help": "for unsupervised param tuning"}, + ) + min_vt_uer: float = field( + default=0, + metadata={"help": "for unsupervised param tuning"}, + ) + + blank_weight: float = field( + default=0, + metadata={"help": "value to add or set for blank emission"}, + ) + blank_mode: str = field( + default="set", + metadata={ + "help": "can be add or set, how to modify blank emission with blank weight" + }, + ) + sil_is_blank: bool = field( + default=False, + metadata={"help": "if true, <SIL> token is same as blank token"}, + ) + + unsupervised_tuning: bool = field( + default=False, + metadata={ + "help": "if true, returns a score based on unsupervised param selection metric instead of UER" + }, + ) + is_ax: bool = field( + default=False, + metadata={ + "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume" + }, + ) + + +def get_dataset_itr(cfg, task): + return task.get_batch_iterator( + dataset=task.dataset(cfg.fairseq.dataset.gen_subset), + max_tokens=cfg.fairseq.dataset.max_tokens, + max_sentences=cfg.fairseq.dataset.batch_size, + max_positions=(sys.maxsize, sys.maxsize), + ignore_invalid_inputs=cfg.fairseq.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.fairseq.dataset.required_batch_size_multiple, + num_shards=cfg.fairseq.dataset.num_shards, + shard_id=cfg.fairseq.dataset.shard_id, + num_workers=cfg.fairseq.dataset.num_workers, + data_buffer_size=cfg.fairseq.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + + +def process_predictions( + cfg: UnsupGenerateConfig, + hypos, + tgt_dict, + target_tokens, + res_files, +): + retval = [] + word_preds = [] + transcriptions = [] + dec_scores = [] + + for i, hypo in enumerate(hypos[: min(len(hypos), cfg.nbest)]): + if torch.is_tensor(hypo["tokens"]): + tokens = hypo["tokens"].int().cpu() + tokens = tokens[tokens >= tgt_dict.nspecial] + hyp_pieces = tgt_dict.string(tokens) + else: + hyp_pieces = " ".join(hypo["tokens"]) + + if "words" in hypo and len(hypo["words"]) > 0: + hyp_words = " ".join(hypo["words"]) + else: + hyp_words = post_process(hyp_pieces, cfg.post_process) + + to_write = {} + if res_files is not None: + to_write[res_files["hypo.units"]] = hyp_pieces + to_write[res_files["hypo.words"]] = hyp_words + + tgt_words = "" + if target_tokens is not None: + if isinstance(target_tokens, str): + tgt_pieces = tgt_words = target_tokens + else: + tgt_pieces = tgt_dict.string(target_tokens) + tgt_words = post_process(tgt_pieces, cfg.post_process) + + if res_files is not None: + to_write[res_files["ref.units"]] = tgt_pieces + to_write[res_files["ref.words"]] = tgt_words + + if not cfg.fairseq.common_eval.quiet: + logger.info(f"HYPO {i}:" + hyp_words) + if tgt_words: + logger.info("TARGET:" + tgt_words) + + if "am_score" in hypo and "lm_score" in hypo: + logger.info( + f"DECODER AM SCORE: {hypo['am_score']}, DECODER LM SCORE: {hypo['lm_score']}, DECODER SCORE: {hypo['score']}" + ) + elif "score" in hypo: + logger.info(f"DECODER SCORE: {hypo['score']}") + + logger.info("___________________") + + hyp_words_arr = hyp_words.split() + tgt_words_arr = tgt_words.split() + + retval.append( + ( + editdistance.eval(hyp_words_arr, tgt_words_arr), + len(hyp_words_arr), + len(tgt_words_arr), + hyp_pieces, + hyp_words, + ) + ) + word_preds.append(hyp_words_arr) + transcriptions.append(to_write) + dec_scores.append(-hypo.get("score", 0)) # negate cuz kaldi returns NLL + + if len(retval) > 1: + best = None + for r, t in zip(retval, transcriptions): + if best is None or r[0] < best[0][0]: + best = r, t + for dest, tran in best[1].items(): + print(tran, file=dest) + dest.flush() + return best[0] + + assert len(transcriptions) == 1 + for dest, tran in transcriptions[0].items(): + print(tran, file=dest) + + return retval[0] + + +def prepare_result_files(cfg: UnsupGenerateConfig): + def get_res_file(file_prefix): + if cfg.fairseq.dataset.num_shards > 1: + file_prefix = f"{cfg.fairseq.dataset.shard_id}_{file_prefix}" + path = os.path.join( + cfg.results_path, + "{}{}.txt".format( + cfg.fairseq.dataset.gen_subset, + file_prefix, + ), + ) + return open(path, "w", buffering=1) + + if not cfg.results_path: + return None + + return { + "hypo.words": get_res_file(""), + "hypo.units": get_res_file("_units"), + "ref.words": get_res_file("_ref"), + "ref.units": get_res_file("_ref_units"), + "hypo.nbest.words": get_res_file("_nbest_words"), + } + + +def optimize_models(cfg: UnsupGenerateConfig, use_cuda, models): + """Optimize ensemble for generation""" + for model in models: + model.eval() + if cfg.fairseq.common.fp16: + model.half() + if use_cuda: + model.cuda() + + +GenResult = namedtuple( + "GenResult", + [ + "count", + "errs_t", + "gen_timer", + "lengths_hyp_unit_t", + "lengths_hyp_t", + "lengths_t", + "lm_score_t", + "num_feats", + "num_sentences", + "num_symbols", + "vt_err_t", + "vt_length_t", + ], +) + + +def generate(cfg: UnsupGenerateConfig, models, saved_cfg, use_cuda): + task = tasks.setup_task(cfg.fairseq.task) + saved_cfg.task.labels = cfg.fairseq.task.labels + task.load_dataset(cfg.fairseq.dataset.gen_subset, task_cfg=saved_cfg.task) + # Set dictionary + tgt_dict = task.target_dictionary + logger.info( + "| {} {} {} examples".format( + cfg.fairseq.task.data, + cfg.fairseq.dataset.gen_subset, + len(task.dataset(cfg.fairseq.dataset.gen_subset)), + ) + ) + # Load dataset (possibly sharded) + itr = get_dataset_itr(cfg, task) + # Initialize generator + gen_timer = StopwatchMeter() + + def build_generator(cfg: UnsupGenerateConfig): + w2l_decoder = cfg.w2l_decoder + if w2l_decoder == DecoderType.VITERBI: + from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder + + return W2lViterbiDecoder(cfg, task.target_dictionary) + elif w2l_decoder == DecoderType.KENLM: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + return W2lKenLMDecoder(cfg, task.target_dictionary) + elif w2l_decoder == DecoderType.FAIRSEQ: + from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder + + return W2lFairseqLMDecoder(cfg, task.target_dictionary) + elif w2l_decoder == DecoderType.KALDI: + from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoder + + assert cfg.kaldi_decoder_config is not None + + return KaldiDecoder( + cfg.kaldi_decoder_config, + cfg.beam, + ) + else: + raise NotImplementedError( + "only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment but found " + + str(w2l_decoder) + ) + + generator = build_generator(cfg) + + kenlm = None + fairseq_lm = None + if cfg.lm_model is not None: + import kenlm + + kenlm = kenlm.Model(cfg.lm_model) + + num_sentences = 0 + if cfg.results_path is not None and not os.path.exists(cfg.results_path): + os.makedirs(cfg.results_path) + + res_files = prepare_result_files(cfg) + errs_t = 0 + lengths_hyp_t = 0 + lengths_hyp_unit_t = 0 + lengths_t = 0 + count = 0 + num_feats = 0 + all_hyp_pieces = [] + all_hyp_words = [] + + num_symbols = ( + len([s for s in tgt_dict.symbols if not s.startswith("madeup")]) + - tgt_dict.nspecial + ) + targets = None + if cfg.targets is not None: + tgt_path = os.path.join( + cfg.fairseq.task.data, cfg.fairseq.dataset.gen_subset + "." + cfg.targets + ) + if os.path.exists(tgt_path): + with open(tgt_path, "r") as f: + targets = f.read().splitlines() + viterbi_transcript = None + if cfg.viterbi_transcript is not None and len(cfg.viterbi_transcript) > 0: + logger.info(f"loading viterbi transcript from {cfg.viterbi_transcript}") + with open(cfg.viterbi_transcript, "r") as vf: + viterbi_transcript = vf.readlines() + viterbi_transcript = [v.rstrip().split() for v in viterbi_transcript] + + gen_timer.start() + + start = 0 + end = len(itr) + + hypo_futures = None + if cfg.w2l_decoder == DecoderType.KALDI: + logger.info("Extracting features") + hypo_futures = [] + samples = [] + with progress_bar.build_progress_bar(cfg.fairseq.common, itr) as t: + for i, sample in enumerate(t): + if "net_input" not in sample or i < start or i >= end: + continue + if "padding_mask" not in sample["net_input"]: + sample["net_input"]["padding_mask"] = None + + hypos, num_feats = gen_hypos( + generator, models, num_feats, sample, task, use_cuda + ) + hypo_futures.append(hypos) + samples.append(sample) + itr = list(zip(hypo_futures, samples)) + start = 0 + end = len(itr) + logger.info("Finished extracting features") + + with progress_bar.build_progress_bar(cfg.fairseq.common, itr) as t: + for i, sample in enumerate(t): + if i < start or i >= end: + continue + + if hypo_futures is not None: + hypos, sample = sample + hypos = [h.result() for h in hypos] + else: + if "net_input" not in sample: + continue + + hypos, num_feats = gen_hypos( + generator, models, num_feats, sample, task, use_cuda + ) + + for i, sample_id in enumerate(sample["id"].tolist()): + if targets is not None: + target_tokens = targets[sample_id] + elif "target" in sample or "target_label" in sample: + toks = ( + sample["target"][i, :] + if "target_label" not in sample + else sample["target_label"][i, :] + ) + + target_tokens = utils.strip_pad(toks, tgt_dict.pad()).int().cpu() + else: + target_tokens = None + + # Process top predictions + ( + errs, + length_hyp, + length, + hyp_pieces, + hyp_words, + ) = process_predictions( + cfg, + hypos[i], + tgt_dict, + target_tokens, + res_files, + ) + errs_t += errs + lengths_hyp_t += length_hyp + lengths_hyp_unit_t += ( + len(hyp_pieces) if len(hyp_pieces) > 0 else len(hyp_words) + ) + lengths_t += length + count += 1 + all_hyp_pieces.append(hyp_pieces) + all_hyp_words.append(hyp_words) + + num_sentences += ( + sample["nsentences"] if "nsentences" in sample else sample["id"].numel() + ) + + lm_score_sum = 0 + if kenlm is not None: + + if cfg.unit_lm: + lm_score_sum = sum(kenlm.score(w) for w in all_hyp_pieces) + else: + lm_score_sum = sum(kenlm.score(w) for w in all_hyp_words) + elif fairseq_lm is not None: + lm_score_sum = sum(fairseq_lm.score([h.split() for h in all_hyp_words])[0]) + + vt_err_t = 0 + vt_length_t = 0 + if viterbi_transcript is not None: + unit_hyps = [] + if cfg.targets is not None and cfg.lexicon is not None: + lex = {} + with open(cfg.lexicon, "r") as lf: + for line in lf: + items = line.rstrip().split() + lex[items[0]] = items[1:] + for h in all_hyp_pieces: + hyp_ws = [] + for w in h.split(): + assert w in lex, w + hyp_ws.extend(lex[w]) + unit_hyps.append(hyp_ws) + + else: + unit_hyps.extend([h.split() for h in all_hyp_words]) + + vt_err_t = sum( + editdistance.eval(vt, h) for vt, h in zip(viterbi_transcript, unit_hyps) + ) + + vt_length_t = sum(len(h) for h in viterbi_transcript) + + if res_files is not None: + for r in res_files.values(): + r.close() + + gen_timer.stop(lengths_hyp_t) + + return GenResult( + count, + errs_t, + gen_timer, + lengths_hyp_unit_t, + lengths_hyp_t, + lengths_t, + lm_score_sum, + num_feats, + num_sentences, + num_symbols, + vt_err_t, + vt_length_t, + ) + + +def gen_hypos(generator, models, num_feats, sample, task, use_cuda): + sample = utils.move_to_cuda(sample) if use_cuda else sample + + if "features" in sample["net_input"]: + sample["net_input"]["dense_x_only"] = True + num_feats += ( + sample["net_input"]["features"].shape[0] + * sample["net_input"]["features"].shape[1] + ) + hypos = task.inference_step(generator, models, sample, None) + return hypos, num_feats + + +def main(cfg: UnsupGenerateConfig, model=None): + if ( + cfg.fairseq.dataset.max_tokens is None + and cfg.fairseq.dataset.batch_size is None + ): + cfg.fairseq.dataset.max_tokens = 1024000 + + use_cuda = torch.cuda.is_available() and not cfg.fairseq.common.cpu + + task = tasks.setup_task(cfg.fairseq.task) + + overrides = ast.literal_eval(cfg.fairseq.common_eval.model_overrides) + + if cfg.fairseq.task._name == "unpaired_audio_text": + overrides["model"] = { + "blank_weight": cfg.blank_weight, + "blank_mode": cfg.blank_mode, + "blank_is_sil": cfg.sil_is_blank, + "no_softmax": True, + "segmentation": { + "type": "NONE", + }, + } + else: + overrides["model"] = { + "blank_weight": cfg.blank_weight, + "blank_mode": cfg.blank_mode, + } + + if cfg.decode_stride: + overrides["model"]["generator_stride"] = cfg.decode_stride + + if model is None: + # Load ensemble + logger.info("| loading model(s) from {}".format(cfg.fairseq.common_eval.path)) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + cfg.fairseq.common_eval.path.split("\\"), + arg_overrides=overrides, + task=task, + suffix=cfg.fairseq.checkpoint.checkpoint_suffix, + strict=(cfg.fairseq.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.fairseq.checkpoint.checkpoint_shard_count, + ) + optimize_models(cfg, use_cuda, models) + else: + models = [model] + saved_cfg = cfg.fairseq + + with open_dict(saved_cfg.task): + saved_cfg.task.shuffle = False + saved_cfg.task.sort_by_length = False + + gen_result = generate(cfg, models, saved_cfg, use_cuda) + + wer = None + if gen_result.lengths_t > 0: + wer = gen_result.errs_t * 100.0 / gen_result.lengths_t + logger.info(f"WER: {wer}") + + lm_ppl = float("inf") + + if gen_result.lm_score_t != 0 and gen_result.lengths_hyp_t > 0: + hyp_len = gen_result.lengths_hyp_t + lm_ppl = math.pow( + 10, -gen_result.lm_score_t / (hyp_len + gen_result.num_sentences) + ) + logger.info(f"LM PPL: {lm_ppl}") + + logger.info( + "| Processed {} sentences ({} tokens) in {:.1f}s ({:.2f}" + " sentences/s, {:.2f} tokens/s)".format( + gen_result.num_sentences, + gen_result.gen_timer.n, + gen_result.gen_timer.sum, + gen_result.num_sentences / gen_result.gen_timer.sum, + 1.0 / gen_result.gen_timer.avg, + ) + ) + + vt_diff = None + if gen_result.vt_length_t > 0: + vt_diff = gen_result.vt_err_t / gen_result.vt_length_t + vt_diff = max(cfg.min_vt_uer, vt_diff) + + lm_ppl = max(cfg.min_lm_ppl, lm_ppl) + + if not cfg.unsupervised_tuning: + weighted_score = wer + else: + weighted_score = math.log(lm_ppl) * (vt_diff or 1.0) + + res = ( + f"| Generate {cfg.fairseq.dataset.gen_subset} with beam={cfg.beam}, " + f"lm_weight={cfg.kaldi_decoder_config.acoustic_scale if cfg.kaldi_decoder_config else cfg.lm_weight}, " + f"word_score={cfg.word_score}, sil_weight={cfg.sil_weight}, blank_weight={cfg.blank_weight}, " + f"WER: {wer}, LM_PPL: {lm_ppl}, num feats: {gen_result.num_feats}, " + f"length: {gen_result.lengths_hyp_t}, UER to viterbi: {(vt_diff or 0) * 100}, score: {weighted_score}" + ) + + logger.info(res) + # print(res) + + return task, weighted_score + + +@hydra.main( + config_path=os.path.join("../../..", "fairseq", "config"), config_name="config" +) +def hydra_main(cfg): + with open_dict(cfg): + # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126) + cfg.job_logging_cfg = OmegaConf.to_container( + HydraConfig.get().job_logging, resolve=True + ) + + cfg = OmegaConf.create( + OmegaConf.to_container(cfg, resolve=False, enum_to_str=False) + ) + OmegaConf.set_struct(cfg, True) + logger.info(cfg) + + utils.import_user_module(cfg.fairseq.common) + + _, score = main(cfg) + + if cfg.is_ax: + return score, None + return score + + +def cli_main(): + try: + from hydra._internal.utils import get_args + + cfg_name = get_args().config_name or "config" + except: + logger.warning("Failed to get config name from hydra args") + cfg_name = "config" + + cs = ConfigStore.instance() + cs.store(name=cfg_name, node=UnsupGenerateConfig) + hydra_main() + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/examples/wav2vec/vq-wav2vec_featurize.py b/fairseq/examples/wav2vec/vq-wav2vec_featurize.py new file mode 100644 index 0000000..627072e --- /dev/null +++ b/fairseq/examples/wav2vec/vq-wav2vec_featurize.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset +""" + +import argparse +import glob +import os +import os.path as osp +import pprint + +import soundfile as sf +import torch +import fairseq +from torch import nn +from torch.utils.data import DataLoader + + +try: + import tqdm +except: + print("Install tqdm to use --log-format=tqdm") + + +class FilesDataset: + def __init__(self, files, labels): + self.files = files + if labels and osp.exists(labels): + with open(labels, "r") as lbl_f: + self.labels = [line.rstrip() for line in lbl_f] + else: + self.labels = labels + + def __len__(self): + return len(self.files) + + def __getitem__(self, index): + fname = self.files[index] + + wav, sr = sf.read(fname) + assert sr == 16000 + + wav = torch.from_numpy(wav).float() + lbls = None + if self.labels: + if isinstance(self.labels, str): + lbl_file = osp.splitext(fname)[0] + "." + self.labels + with open(lbl_file, "r") as lblf: + lbls = lblf.readline() + assert lbls is not None + else: + lbls = self.labels[index] + return wav, lbls + + def collate(self, batch): + return batch + + +class ArgTypes: + @staticmethod + def existing_path(arg): + arg = str(arg) + assert osp.exists(arg), f"File {arg} does not exist" + return arg + + @staticmethod + def mkdir(arg): + arg = str(arg) + os.makedirs(arg, exist_ok=True) + return arg + + +class DatasetWriter: + def __init__(self): + + self.args = self.load_config() + pprint.pprint(self.args.__dict__) + + self.model = self.load_model() + + def __getattr__(self, attr): + return getattr(self.args, attr) + + def read_manifest(self, fname): + + with open(fname, "r") as fp: + lines = fp.read().split("\n") + root = lines.pop(0).strip() + fnames = [ + osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0 + ] + + return fnames + + def process_splits(self): + + if self.args.shard is not None or self.args.num_shards is not None: + assert self.args.shard is not None and self.args.num_shards is not None + + for split in self.splits: + print(split) + + if self.extension == "tsv": + datadir = osp.join(self.data_dir, f"{split}.{self.extension}") + print("Reading manifest file: ", datadir) + files = self.read_manifest(datadir) + else: + datadir = osp.join(self.data_dir, split, f"**/*.{self.extension}") + files = glob.glob(datadir, recursive=True) + + assert len(files) > 0 + + if self.args.shard is not None: + files = files[self.args.shard :: self.args.num_shards] + + lbls = [] + with open(self.data_file(split), "w") as srcf: + for line, lbl in self.iterate(files): + print(line, file=srcf) + if self.args.labels: + lbls.append(lbl + "\n") + + if self.args.labels: + assert all(a is not None for a in lbls) + with open(self.lbl_file(split), "w") as lblf: + lblf.writelines(lbls) + + def iterate(self, files): + + data = self.load_data(files) + for samples in tqdm.tqdm(data, total=len(files) // 32): + + for wav, lbl in samples: + x = wav.unsqueeze(0).float().cuda() + + div = 1 + while x.size(-1) // div > self.args.max_size: + div += 1 + + xs = x.chunk(div, dim=-1) + + result = [] + for x in xs: + torch.cuda.empty_cache() + x = self.model.feature_extractor(x) + if self.quantize_location == "encoder": + with torch.no_grad(): + _, idx = self.model.vector_quantizer.forward_idx(x) + idx = idx.squeeze(0).cpu() + else: + with torch.no_grad(): + z = self.model.feature_aggregator(x) + _, idx = self.model.vector_quantizer.forward_idx(z) + idx = idx.squeeze(0).cpu() + result.append(idx) + + idx = torch.cat(result, dim=0) + yield " ".join("-".join(map(str, a.tolist())) for a in idx), lbl + + def lbl_file(self, name): + shard_part = "" if self.args.shard is None else f".{self.args.shard}" + return osp.join(self.output_dir, f"{name}.lbl{shard_part}") + + def data_file(self, name): + shard_part = "" if self.args.shard is None else f".{self.args.shard}" + return osp.join(self.output_dir, f"{name}.src{shard_part}") + + def var_file(self): + return osp.join(self.output_dir, f"vars.pt") + + def load_config(self): + + parser = argparse.ArgumentParser("Vector Quantized wav2vec features") + + # Model Arguments + parser.add_argument("--checkpoint", type=ArgTypes.existing_path, required=True) + parser.add_argument("--data-parallel", action="store_true") + + # Output Arguments + parser.add_argument("--output-dir", type=ArgTypes.mkdir, required=True) + + # Data Arguments + parser.add_argument("--data-dir", type=ArgTypes.existing_path, required=True) + parser.add_argument("--splits", type=str, nargs="+", required=True) + parser.add_argument("--extension", type=str, required=True) + parser.add_argument("--labels", type=str, required=False) + + parser.add_argument("--shard", type=int, default=None) + parser.add_argument("--num-shards", type=int, default=None) + parser.add_argument("--max-size", type=int, default=1300000) + + # Logger Arguments + parser.add_argument( + "--log-format", type=str, choices=["none", "simple", "tqdm"] + ) + + return parser.parse_args() + + def load_data(self, fnames): + + dataset = FilesDataset(fnames, self.args.labels) + loader = DataLoader( + dataset, batch_size=32, collate_fn=dataset.collate, num_workers=8 + ) + return loader + + def load_model(self): + model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([self.checkpoint]) + model = model[0] + + self.quantize_location = getattr(cfg.model, "vq", "encoder") + + model.eval().float() + model.cuda() + + if self.data_parallel: + model = nn.DataParallel(model) + + return model + + def __call__(self): + + self.process_splits() + + if hasattr(self.model.feature_extractor, "vars") and ( + self.args.shard is None or self.args.shard == 0 + ): + vars = ( + self.model.feature_extractor.vars.view( + self.model.feature_extractor.banks, + self.model.feature_extractor.num_vars, + -1, + ) + .cpu() + .detach() + ) + print("writing learned latent variable embeddings: ", vars.shape) + torch.save(vars, self.var_file()) + + +if __name__ == "__main__": + write_data = DatasetWriter() + + write_data() + print("Done.") diff --git a/fairseq/examples/wav2vec/wav2vec_featurize.py b/fairseq/examples/wav2vec/wav2vec_featurize.py new file mode 100644 index 0000000..588268b --- /dev/null +++ b/fairseq/examples/wav2vec/wav2vec_featurize.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset +""" + +import argparse +import glob +import os +from shutil import copy + +import h5py +import numpy as np +import soundfile as sf +import torch +import tqdm +import fairseq +from torch import nn + + +def read_audio(fname): + """ Load an audio file and return PCM along with the sample rate """ + + wav, sr = sf.read(fname) + assert sr == 16e3 + + return wav, 16e3 + + +class PretrainedWav2VecModel(nn.Module): + def __init__(self, fname): + super().__init__() + + model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([fname]) + model = model[0] + model.eval() + + self.model = model + + def forward(self, x): + with torch.no_grad(): + z = self.model.feature_extractor(x) + if isinstance(z, tuple): + z = z[0] + c = self.model.feature_aggregator(z) + return z, c + + +class EmbeddingWriterConfig(argparse.ArgumentParser): + def __init__(self): + super().__init__("Pre-compute embeddings for flashlight datasets") + + kwargs = {"action": "store", "type": str, "required": True} + + self.add_argument("--input", "-i", help="Input Directory", **kwargs) + self.add_argument("--output", "-o", help="Output Directory", **kwargs) + self.add_argument("--model", help="Path to model checkpoint", **kwargs) + self.add_argument("--split", help="Dataset Splits", nargs="+", **kwargs) + self.add_argument( + "--ext", default="wav", required=False, help="Audio file extension" + ) + + self.add_argument( + "--no-copy-labels", + action="store_true", + help="Do not copy label files. Useful for large datasets, use --targetdir in flashlight then.", + ) + self.add_argument( + "--use-feat", + action="store_true", + help="Use the feature vector ('z') instead of context vector ('c') for features", + ) + self.add_argument("--gpu", help="GPU to use", default=0, type=int) + + +class Prediction: + """ Lightweight wrapper around a fairspeech embedding model """ + + def __init__(self, fname, gpu=0): + self.gpu = gpu + self.model = PretrainedWav2VecModel(fname).cuda(gpu) + + def __call__(self, x): + x = torch.from_numpy(x).float().cuda(self.gpu) + with torch.no_grad(): + z, c = self.model(x.unsqueeze(0)) + + return z.squeeze(0).cpu().numpy(), c.squeeze(0).cpu().numpy() + + +class H5Writer: + """ Write features as hdf5 file in flashlight compatible format """ + + def __init__(self, fname): + self.fname = fname + os.makedirs(os.path.dirname(self.fname), exist_ok=True) + + def write(self, data): + channel, T = data.shape + + with h5py.File(self.fname, "w") as out_ds: + data = data.T.flatten() + out_ds["features"] = data + out_ds["info"] = np.array([16e3 // 160, T, channel]) + + +class EmbeddingDatasetWriter(object): + """Given a model and a flashlight dataset, pre-compute and store embeddings + + Args: + input_root, str : + Path to the flashlight dataset + output_root, str : + Desired output directory. Will be created if non-existent + split, str : + Dataset split + """ + + def __init__( + self, + input_root, + output_root, + split, + model_fname, + extension="wav", + gpu=0, + verbose=False, + use_feat=False, + ): + + assert os.path.exists(model_fname) + + self.model_fname = model_fname + self.model = Prediction(self.model_fname, gpu) + + self.input_root = input_root + self.output_root = output_root + self.split = split + self.verbose = verbose + self.extension = extension + self.use_feat = use_feat + + assert os.path.exists(self.input_path), "Input path '{}' does not exist".format( + self.input_path + ) + + def _progress(self, iterable, **kwargs): + if self.verbose: + return tqdm.tqdm(iterable, **kwargs) + return iterable + + def require_output_path(self, fname=None): + path = self.get_output_path(fname) + os.makedirs(path, exist_ok=True) + + @property + def input_path(self): + return self.get_input_path() + + @property + def output_path(self): + return self.get_output_path() + + def get_input_path(self, fname=None): + if fname is None: + return os.path.join(self.input_root, self.split) + return os.path.join(self.get_input_path(), fname) + + def get_output_path(self, fname=None): + if fname is None: + return os.path.join(self.output_root, self.split) + return os.path.join(self.get_output_path(), fname) + + def copy_labels(self): + self.require_output_path() + + labels = list( + filter( + lambda x: self.extension not in x, glob.glob(self.get_input_path("*")) + ) + ) + for fname in tqdm.tqdm(labels): + copy(fname, self.output_path) + + @property + def input_fnames(self): + return sorted(glob.glob(self.get_input_path("*.{}".format(self.extension)))) + + def __len__(self): + return len(self.input_fnames) + + def write_features(self): + + paths = self.input_fnames + + fnames_context = map( + lambda x: os.path.join( + self.output_path, x.replace("." + self.extension, ".h5context") + ), + map(os.path.basename, paths), + ) + + for name, target_fname in self._progress( + zip(paths, fnames_context), total=len(self) + ): + wav, sr = read_audio(name) + z, c = self.model(wav) + feat = z if self.use_feat else c + writer = H5Writer(target_fname) + writer.write(feat) + + def __repr__(self): + + return "EmbeddingDatasetWriter ({n_files} files)\n\tinput:\t{input_root}\n\toutput:\t{output_root}\n\tsplit:\t{split})".format( + n_files=len(self), **self.__dict__ + ) + + +if __name__ == "__main__": + + args = EmbeddingWriterConfig().parse_args() + + for split in args.split: + + writer = EmbeddingDatasetWriter( + input_root=args.input, + output_root=args.output, + split=split, + model_fname=args.model, + gpu=args.gpu, + extension=args.ext, + use_feat=args.use_feat, + ) + + print(writer) + writer.require_output_path() + + print("Writing Features...") + writer.write_features() + print("Done.") + + if not args.no_copy_labels: + print("Copying label data...") + writer.copy_labels() + print("Done.") diff --git a/fairseq/examples/wav2vec/wav2vec_manifest.py b/fairseq/examples/wav2vec/wav2vec_manifest.py new file mode 100644 index 0000000..9b8aa18 --- /dev/null +++ b/fairseq/examples/wav2vec/wav2vec_manifest.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Data pre-processing: build vocabularies and binarize training data. +""" + +import argparse +import glob +import os +import random + +import soundfile + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "root", metavar="DIR", help="root directory containing flac files to index" + ) + parser.add_argument( + "--valid-percent", + default=0.01, + type=float, + metavar="D", + help="percentage of data to use as validation set (between 0 and 1)", + ) + parser.add_argument( + "--dest", default=".", type=str, metavar="DIR", help="output directory" + ) + parser.add_argument( + "--ext", default="flac", type=str, metavar="EXT", help="extension to look for" + ) + parser.add_argument("--seed", default=42, type=int, metavar="N", help="random seed") + parser.add_argument( + "--path-must-contain", + default=None, + type=str, + metavar="FRAG", + help="if set, path must contain this substring for a file to be included in the manifest", + ) + return parser + + +def main(args): + assert args.valid_percent >= 0 and args.valid_percent <= 1.0 + + if not os.path.exists(args.dest): + os.makedirs(args.dest) + + dir_path = os.path.realpath(args.root) + search_path = os.path.join(dir_path, "**/*." + args.ext) + rand = random.Random(args.seed) + + valid_f = ( + open(os.path.join(args.dest, "valid.tsv"), "w") + if args.valid_percent > 0 + else None + ) + + with open(os.path.join(args.dest, "train.tsv"), "w") as train_f: + print(dir_path, file=train_f) + + if valid_f is not None: + print(dir_path, file=valid_f) + + for fname in glob.iglob(search_path, recursive=True): + file_path = os.path.realpath(fname) + + if args.path_must_contain and args.path_must_contain not in file_path: + continue + + frames = soundfile.info(fname).frames + dest = train_f if rand.random() > args.valid_percent else valid_f + print( + "{}\t{}".format(os.path.relpath(file_path, dir_path), frames), file=dest + ) + if valid_f is not None: + valid_f.close() + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args() + main(args) diff --git a/fairseq/examples/wav2vec/xlsr/README.md b/fairseq/examples/wav2vec/xlsr/README.md new file mode 100644 index 0000000..e0a7c4e --- /dev/null +++ b/fairseq/examples/wav2vec/xlsr/README.md @@ -0,0 +1,95 @@ +# XLS-R + +XLS-R is a set of large-scale models for self-supervised cross-lingual speech representation learning based on wav2vec 2.0. It was pretrained on 128 languages and approximately 436K hours of unlabeled speech data. With finetuning, these models achieve state of the art performance in speech translation, speech recognition and language identification. We evaluate the model across multiple benchmarks such as CoVoST-2 for speech translation, BABEL / MLS / CommonVoice / VoxPopuli for automatic speech recognition, and VoxLingua107 for language identification as we llas VoxCeleb1 for speaker identification. More details about this work can be found in our [paper](https://arxiv.org/pdf/2111.09296.pdf) and download links can be found below. + +Model | Link +|------|------ +XLS-R 300M | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr2_300m.pt) +XLS-R 1B | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr2_960m_1000k.pt) +XLS-R 2B | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr2_2B_1000k.pt) + +You can also download these models [here](https://huggingface.co/models?other=xls_r) and read more about it in the [blogpost](https://huggingface.co/blog/fine-tune-xlsr-wav2vec2) from Hugging Face. + +## Speech Translation Finetuned Models + +We multilingually finetune XLS-R models on [CoVoST 2](https://github.com/facebookresearch/covost), which has 21 +into-English and 15 out-of-English directions. + +Model | Directions | Link +|------|------|------ +XLS-R 300M | 21 langs → En | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_300m_21_en.pt) +XLS-R 300M | En → 15 langs | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_300m_en_15.pt) +XLS-R 1B | 21 langs → En | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_1b_21_en.pt) +XLS-R 1B | En → 15 langs | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_1b_en_15.pt) +XLS-R 2B | 21 langs → En | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_2b_21_en.pt) +XLS-R 2B | En → 15 langs | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_2b_en_15.pt) +XLS-R 2B | 21 langs → En + En → 15 langs | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xls_r_2b_22_16.pt) + +## ASR Finetuning + +You can refer the original wav2vec documentation on detailed instructions about how to finetune a pretrained model with CTC [here](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#fine-tune-a-pre-trained-model-with-ctc). Below is an example command and you can find the values for different hyperparameters to reproduce the results in our paper. + +```shell script +$ fairseq-hydra-train \ + distributed_training.distributed_port=$PORT \ + task.data=/path/to/data \ + model.w2v_path=/path/to/model.pt \ + --config-dir /path/to/fairseq-py/examples/wav2vec/xlsr/config \ + --config-name finetune +``` + +For finetuning the 300M as well as 1B model, we use the same hyperparameter setting defined in `finetune.yaml`. We vary `optimization.max_update` as described in the below table and the `optimization.lr` is picked from the interval [2e-5, 3e-4] based on dev word error rate. + +Benchmark | Total Number of Updates +|------|------ +Babel | 26000 +Common Voice | 13000 +VoxPopuli | 50000 +MLS 10h | 20000 + +For finetuning the 2B model, we make some additional changes for `finetune.yaml` . We use the fully_sharded `distributed_training.ddp_backend` provided by the [fairscale](https://github.com/facebookresearch/fairscale) library and and set `model.activation_checkpoint` to true. We also increase `dataset.max_tokens` to 2560000 and use a total effective batch size of 2560000*24. We sweep for the best `optimization.lr` within the interval [3e−6,3e−5] using dev error rate. For common voice dataset, we pick the `model.mask_prob` for different languages among {0.30, 0.40} based on best dev error rate. + +## LID Inference + +Model | Link +|------|------ +XLS-R 300M + ft Voxlingua107 | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_300m_voxlingua107_ft.pt) + +How to run inference & calculate accuracy (step-by-step): +1. Download the Voxlingua107 checkpoint from the table above. +1. Use this python script to extract logit/embedding from the XLSR model: https://github.com/fairinternal/fairseq-py/blob/xlsr2/examples/wav2vec/gen_audio_embedding.py +```shell command +CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 examples/wav2vec/gen_audio_embedding.py \ + /fsx/data/VoxLingua107/manifest --path "/path/to/checkpoint.pt" \ + --task audio_classification --batch-size 90 --gen-subset test \ + --infer-manifest /fsx/data/VoxLingua107/manifest/test.tsv \ + --infer-xtimes 10 --infer-max-sample-size 160000 --output-path /tmp/tmp_voxling_infer.npz +``` + +2. Calculate the overall accuracy, 0-5 seconds and 5-20 seconds: +```shell command +PYTHONPATH='.' python examples/wav2vec/eval_speaker_clf_task.py \ + --task cls --merge mean_logit --data /tmp/tmp_voxling_infer.npz + +Output: +| run classification evaluation +| acc = 94.34% -- err = 5.66% -- correct=1518 total=1609 +| acc 0to5 = 90.91% -- err = 9.09% -- c_5=230.0 t_5=253 +| acc 5to20 = 94.99% -- err = 5.01% -- c_20=1288.0 t_20=1356 +``` + +## Citation + +Please cite as: + +``` bibtex +@article{babu2021xlsr, + title={XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale}, + author={Arun Babu and Changhan Wang and Andros Tjandra and Kushal Lakhotia and Qiantong Xu and Naman Goyal and Kritika Singh and Patrick von Platen and Yatharth Saraf and Juan Pino and Alexei Baevski and Alexis Conneau and Michael Auli}, + year={2021}, + volume={abs/2111.09296}, + journal={arXiv}, +} +``` + + diff --git a/fairseq/examples/wav2vec/xlsr/config/finetune.yaml b/fairseq/examples/wav2vec/xlsr/config/finetune.yaml new file mode 100644 index 0000000..8736e10 --- /dev/null +++ b/fairseq/examples/wav2vec/xlsr/config/finetune.yaml @@ -0,0 +1,66 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tb + +checkpoint: + save_interval: 1000 + save_interval_updates: 1000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: wer + +task: + _name: audio_finetuning + data: ??? + normalize: true + labels: ltr + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: 10000 + validate_interval_updates: 1000 + valid_subset: valid + +distributed_training: + ddp_backend: legacy_ddp + distributed_world_size: 4 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: ??? + lr: [0.0003] + sentence_avg: true + update_freq: [5] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: wav2vec_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.75 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + + checkpoint_activations: false diff --git a/fairseq/examples/wav2vec/xlsr/scripts/eval_speaker_clf_task.py b/fairseq/examples/wav2vec/xlsr/scripts/eval_speaker_clf_task.py new file mode 100644 index 0000000..16d0751 --- /dev/null +++ b/fairseq/examples/wav2vec/xlsr/scripts/eval_speaker_clf_task.py @@ -0,0 +1,173 @@ +""" +Usage: + This scripts it to evaluate the classification accuracy/error rate from the embedding extracted + by gen_audio_embedding.py + Example (LID classification) + + PYTHONPATH='.' python examples/wav2vec/eval_speaker_clf_task.py \ + --data /fsx/androstj/exps/lid_voxlingua/infer/atj_xlsr2_100pct_300M_mean_fast_upd_100k_new.npz \ + --task cls --merge mean_logit +""" +import numpy as np +import sklearn +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.preprocessing import StandardScaler +from tqdm import tqdm +import ipdb +import logging +import argparse +from scipy.special import softmax + +log=logging.getLogger(__name__) +log.setLevel(logging.INFO) + +def calculate_eer(y_label, y_score): + # y denotes groundtruth scores, + # y_score denotes the prediction scores. + from scipy.optimize import brentq + from sklearn.metrics import roc_curve + from scipy.interpolate import interp1d + + fpr, tpr, thresholds = roc_curve(y_label, y_score, pos_label=1) + eer = brentq(lambda x : 1. - x - interp1d(fpr, tpr)(x), 0., 1.) + optimal_threshold = interp1d(fpr, thresholds)(eer) + return eer, optimal_threshold + +def calculate_minDCF(y_label, y_score, p_target=0.01, c_miss=1, c_fa=1): + # https://github.com/kaldi-asr/kaldi/blob/master/egs/sre08/v1/sid/compute_min_dcf.py + from sklearn.metrics import det_curve + fpr, fnr, thresholds = det_curve(y_label, y_score, pos_label=1) + min_c_det = float("inf") + min_c_det_threshold = thresholds[0] + for i in range(0, len(fpr)): + # See Equation (2). it is a weighted sum of false negative + # and false positive errors. + c_det = c_miss * fnr[i] * p_target + c_fa * fpr[i] * (1 - p_target) + if c_det < min_c_det: + min_c_det = c_det + min_c_det_threshold = thresholds[i] + # See Equations (3) and (4). Now we normalize the cost. + c_def = min(c_miss * p_target, c_fa * (1 - p_target)) + min_dcf = min_c_det / c_def + return min_dcf, min_c_det_threshold + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data', help='npz contains name & latent file') + parser.add_argument('--task', choices=['cls', 'veri', 'cls_voxlingua']) + parser.add_argument('--merge', choices=['mean_logit', 'first_logit', 'mean_latent_sim', 'first_latent_sim', 'mean_logit_sim', 'first_logit_sim']) + parser.add_argument('--veri-pair', help='verification file contains 1/0 utt_x utt_y') + parser.add_argument('--scaler', type=str, choices=['mean_var']) + parser.add_argument('--compress-method', choices=['pca']) + parser.add_argument('--compress-dim', type=int) + args = parser.parse_args() + + if args.task in ['cls', 'cls_voxlingua']: + print('| run classification evaluation') + data = np.load(args.data) + data_logit = data['logit'] + data_target = data['target'] + data_src_len = data['src_len'] + assert data_logit.shape[0] == data_target.shape[0] + B = data_logit.shape[0] + correct = 0 + total = 0 + data_prob = softmax(data_logit, axis=2) + correct_vs_len = np.empty((B, 2)) + for ii in range(B): + _target = data_target[ii] + if args.merge == 'mean_logit': + _prob = np.mean(data_prob[ii], axis=0) + top_1 = np.argmax(_prob) + elif args.merge == 'first_logit': + _prob = data_prob[ii][0] + top_1 = np.argmax(_prob) + else : + raise ValueError() + is_top_1 = (1 if top_1 == _target else 0) + correct += is_top_1 + total += 1 + _src_len = data_src_len[ii] / 16000 + correct_vs_len[ii] = [is_top_1, _src_len] + + acc = correct / total * 100 + t_5 = correct_vs_len[:, 1] <= 5 + t_20 = correct_vs_len[:, 1] > 5 + c_5 = correct_vs_len[t_5, 0].sum() + c_20 = correct_vs_len[t_20, 0].sum() + t_5 = t_5.sum() + t_20 = t_20.sum() + acc_5 = c_5 / t_5 * 100 + acc_20 = c_20 / t_20 * 100 + print(f'| acc = {acc:.2f}% -- err = {100-acc:.2f}% -- {correct=} {total=}') + print(f'| acc 0to5 = {acc_5:.2f}% -- err = {100-acc_5:.2f}% -- {c_5=} {t_5=}') + print(f'| acc 5to20 = {acc_20:.2f}% -- err = {100-acc_20:.2f}% -- {c_20=} {t_20=}') + + + + if args.task == 'veri': + print('| run verification evaluation') + veri_pairs = [] + with open(args.veri_pair) as ff: + for fi in ff: + a,b,c = fi.split() + a = int(a) + veri_pairs.append([a,b,c]) + + data = np.load(args.data) + if 'logit' in args.merge: + data_latent = data['logit'] + elif 'latent' in args.merge: + data_latent = data['latent'] + else : + raise ValueError() + + data_name = data['name'] + assert len(data_name) == len(data_latent) + map_name_latent = {} + + from sklearn.pipeline import make_pipeline + pipe = [] + if args.scaler == 'mean_var': + print(f'| apply StandardScaler') + pipe.append(StandardScaler()) + + if args.compress_method == 'pca': + n_comp = args.compress_dim + print(f'| apply PCA with {n_comp=}') + from sklearn.decomposition import PCA + pipe.append(PCA(n_components=n_comp)) + if len(pipe) > 0 : + pipe = make_pipeline(*pipe) + data_latent_2d = data_latent.reshape(-1, data_latent.shape[-1]) + pipe.fit(data_latent_2d) + data_latent_2d = pipe.transform(data_latent_2d) + data_latent = data_latent_2d.reshape(data_latent.shape[0], data_latent.shape[1], -1) + + for ii in range(len(data_name)): + map_name_latent[data_name[ii]] = data_latent[ii] + labels = [] + scores = [] + for lbl, pair_a, pair_b in tqdm(veri_pairs): + labels.append(lbl) + pair_a = map_name_latent[pair_a] + pair_b = map_name_latent[pair_b] + assert pair_a.ndim == pair_b.ndim == 2 + score = cosine_similarity(pair_a, pair_b) + if args.merge.startswith('mean'): + score = np.mean(score) + elif args.merge.startswith('first'): + score = score[0, 0] + else : + raise ValueError() + scores.append(score) + labels = np.array(labels) + scores = np.array(scores) + eer, eer_threshold = calculate_eer(labels, scores) + minDCF, minDCF_threshold = calculate_minDCF(labels, scores) + print('='*40) + print(f'| EER = {eer*100:.2f}%\tthreshold = {eer_threshold:.2f}') + print(f'| minDCF = {minDCF:.2f}\tthreshold = {minDCF_threshold:.2f}') + + diff --git a/fairseq/examples/wav2vec/xlsr/scripts/gen_audio_embedding.py b/fairseq/examples/wav2vec/xlsr/scripts/gen_audio_embedding.py new file mode 100644 index 0000000..e5de1d5 --- /dev/null +++ b/fairseq/examples/wav2vec/xlsr/scripts/gen_audio_embedding.py @@ -0,0 +1,222 @@ +""" +Usage: + This script is used to extract the embedding / logit for speech classification task. + 1. Set fdir into your model checkpoint directory + 2. Run the following command (preferrably on GPU machine to speed up the inference process) + + CUDA_VISIBLE_DEVICES=0 python3 examples/wav2vec/gen_audio_embedding.py /fsx/data/VoxLingua107/manifest --path ${fdir} \ + --task audio_classification --batch-size 90 --gen-subset test \ + --infer-manifest /fsx/data/VoxLingua107/manifest/test.tsv \ + --infer-xtimes 10 --infer-max-sample-size 160000 --output-path $odir + + Example: + Case: LID logit extraction + fdir='/fsx/androstj/exps/voxlingua_lid_train_all/ckpt_100pct_300m_voxling-act_linear-pool_mean_fast-lr_1e-4-phase_0.1_0.4_0.5-maxupd_100000-ufreq_1-mprob_0.5-fz_0-cr_softmax/0/checkpoints/checkpoint_best.pt' + python3 examples/wav2vec/gen_audio_embedding.py /fsx/data/VoxLingua107/manifest --path ${fdir} \ + --task audio_classification --batch-size 90 --gen-subset test \ + --infer-manifest /fsx/data/VoxLingua107/manifest/test.tsv \ + --infer-xtimes 10 --infer-max-sample-size 160000 --output-path $odir + +""" +import torch +from fairseq import checkpoint_utils, distributed_utils, options, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import metrics, progress_bar +from fairseq import checkpoint_utils, data, options, tasks +from fairseq.data import FileAudioDataset, AddTargetDataset, Dictionary +from fairseq.tasks.audio_classification import LabelEncoder +import ipdb +import copy +import sys +from tqdm import tqdm +import tempfile +import numpy as np +import sklearn + +def subset_manifest(infer_manifest, veri_pair): + with open(infer_manifest) as ff, open(veri_pair) as gg, \ + tempfile.NamedTemporaryFile('w', delete=False) as ww: + fnames = ff.read().strip().split("\n") + basedir = fnames[0] + needed_fname = [] + for gi in gg.read().strip().split('\n'): + _, x1, x2 = gi.split() + needed_fname.append(x1) + needed_fname.append(x2) + needed_fname = set(needed_fname) + + ww.write(basedir+'\n') + for ii in range(1, len(fnames)): + x1,x2 = fnames[ii].split() + if x1 in needed_fname: + ww.write(fnames[ii]+'\n') + print(f'| subset manifest for verification: {ww.name}') + return ww.name + +def wrap_target_dataset(infer_manifest, dataset, task): + label_path = infer_manifest.replace(".tsv", ".label") + with open(label_path, "r") as f: + labels = f.read().strip().split("\n") + assert len(labels) == len(dataset) + process_label = LabelEncoder(task.target_dictionary) + dataset = AddTargetDataset(dataset, labels, + pad=task.target_dictionary.pad(), + eos=task.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + add_to_input=False) + return dataset + +def resample_data(source, padding_mask, n_sample, max_sample_len): + # source: BxT + # padding_mask: BxT + B = source.shape[0] + T = source.shape[1] + sources = [] + padding_masks = [] + seq_len = (~padding_mask).sum(1) + for jj in range(n_sample): + new_source = source.new_zeros(B, max_sample_len) + new_padding_mask = padding_mask.new_zeros(B, max_sample_len) + for ii in range(B): + if seq_len[ii] > max_sample_len: + start = np.random.randint(0, seq_len[ii]-max_sample_len+1) + end = start + max_sample_len + else : + start = 0 + end = seq_len[ii] + new_source[ii, 0:end-start] = source[ii, start:end] + new_padding_mask[ii, end-start+1:] = True + sources.append(new_source) + padding_masks.append(new_padding_mask) + return sources, padding_masks + +def resample_sample(sample, n_sample, max_sample_len): + new_sources, new_padding_masks = resample_data(sample['net_input']['source'], sample['net_input']['padding_mask'], n_sample, max_sample_len) + new_samples = [] + for ii in range(n_sample): + new_sample = copy.deepcopy(sample) + new_sample['net_input']['source'] = new_sources[ii] + new_sample['net_input']['padding_mask'] = new_padding_masks[ii] + new_samples.append(new_sample) + return new_samples + +if __name__ == '__main__': + np.random.seed(123) + # Parse command-line arguments for generation + parser = options.get_generation_parser(default_task='audio_classification') + # parser.add_argument('--infer-merge', type=str, default='mean') + parser.add_argument('--infer-xtimes', type=int, default=1) + parser.add_argument('--infer-max-sample-size', type=int, default=5*16000) # 5 secs + parser.add_argument('--infer-manifest', type=str) + parser.add_argument('--verification-pair', type=str, required=False, + help=''' + a file that contains pairs of utts to evaluated if they are from same speaker or not + format: (following voxceleb) + 1/0 <wav_pair_a> <wav_pair_b> + ''') + parser.add_argument('--output-path', type=str) + # parser.add_argument('--infer-xtimes', type=int, default=1) + + args = options.parse_args_and_arch(parser) + # Setup task + # task = tasks.setup_task(args) + use_cuda = not args.cpu + + # Load model & task + print('| loading model from {}'.format(args.path)) + arg_overrides = { + 'data': args.data, + # 'mask_prob': 0 + #'max_sample_size': sys.maxsize, + #'min_sample_size': 0, + } + state = checkpoint_utils.load_checkpoint_to_cpu(args.path) + # move to AWS + state['cfg']['model']['w2v_path'] = state['cfg']['model']['w2v_path'].replace('/checkpoint/arbabu/XLSR2/model_versions/', '/fsx/data/model_versions/').replace('/checkpoint/kushall/final_model_checkpoints/wav2vec2/', '/fsx/data/wav2vec_ckpt/') + state['cfg']['task']['data'] = state['cfg']['task']['data'].replace('/checkpoint/kushall/data/', '/fsx/data/') + + models, _model_args, task = checkpoint_utils.load_model_ensemble_and_task([args.path], + arg_overrides=arg_overrides, + task=None, + state=state) + model = models[0] + model.eval() + if use_cuda: + model.cuda() + + + # Load dataset + task.load_dataset(args.gen_subset) + dataset = task.dataset(args.gen_subset) + infer_manifest = args.infer_manifest + # only decode needed utts + # infer_manifest = subset_manifest(infer_manifest, + # args.verification_pair) + infer_dataset = FileAudioDataset(infer_manifest, + sample_rate=task.cfg.sample_rate, + max_sample_size=10**10, #task.cfg.max_sample_size, + min_sample_size=1, #task.cfg.min_sample_size, + pad=True, + normalize=task.cfg.normalize) + # add target (if needed) + infer_dataset = wrap_target_dataset(infer_manifest, infer_dataset, task) + itr = task.get_batch_iterator( + dataset=infer_dataset, + max_sentences=args.batch_size, + ).next_epoch_itr(shuffle=False) + + + # correct = 0 + # total = 0 + list_uttname = [] + list_latent = [] + list_logit = [] + list_target = [] + list_src_len = [] + with torch.no_grad(): + for _, sample in tqdm(enumerate(itr)): + # resample if needed + samples = resample_sample(sample, args.infer_xtimes, args.infer_max_sample_size) + list_uttname.extend(sample['name']) + list_target.extend(sample['target'][:, 0].cpu().numpy()) + list_src_len.extend((~sample['net_input']['padding_mask']).sum(1).cpu().numpy()) + latents = [] + logits = [] + for sample in samples: + sample = utils.move_to_cuda(sample) if use_cuda else sample + try: + latent = model.forward_latent(**sample['net_input']) + latents.append(latent.detach().cpu().numpy()) + except: + latent = None + logit = model.forward(**sample['net_input']) + logits.append(logit.detach().cpu().numpy()) + + if len(latents) > 0: + latents = np.stack(latents, 1) # B,X,D + logits = np.stack(logits, 1) # B,X,Cls + list_latent.extend(latents) + list_logit.extend(logits) + + # create big npz + list_uttname = np.array(list_uttname) + list_latent = np.array(list_latent) + list_target = np.array(list_target) + list_logit = np.array(list_logit) + list_src_len = np.array(list_src_len) + # save to npz + output_path = args.output_path + if (output_path is None): + output_path = tempfile.NamedTemporaryFile('wb', delete=False).name + + with open(output_path, 'wb') as ww: + np.savez(ww, name=list_uttname, + latent=list_latent, + target=list_target, + logit=list_logit, + src_len=list_src_len) + + print("="*10 + " REPORT " + "="*10) + print(f'| latent saved in {output_path}') + print(f'| {list_uttname.shape=}, {list_latent.shape=}, {list_target.shape=}, {list_logit.shape=}, {list_src_len.shape=}') diff --git a/fairseq/examples/wmt19/README.md b/fairseq/examples/wmt19/README.md new file mode 100644 index 0000000..5c90d0e --- /dev/null +++ b/fairseq/examples/wmt19/README.md @@ -0,0 +1,85 @@ +# WMT 19 + +This page provides pointers to the models of Facebook-FAIR's WMT'19 news translation task submission [(Ng et al., 2019)](https://arxiv.org/abs/1907.06616). + +## Pre-trained models + +Model | Description | Download +---|---|--- +`transformer.wmt19.en-de` | En->De Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz) +`transformer.wmt19.de-en` | De->En Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz) +`transformer.wmt19.en-ru` | En->Ru Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz) +`transformer.wmt19.ru-en` | Ru->En Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz) +`transformer_lm.wmt19.en` | En Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.gz) +`transformer_lm.wmt19.de` | De Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.gz) +`transformer_lm.wmt19.ru` | Ru Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.gz) + +## Pre-trained single models before finetuning + +Model | Description | Download +---|---|--- +`transformer.wmt19.en-de` | En->De Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.ffn8192.tar.gz) +`transformer.wmt19.de-en` | De->En Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.ffn8192.tar.gz) +`transformer.wmt19.en-ru` | En->Ru Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ffn8192.tar.gz) +`transformer.wmt19.ru-en` | Ru->En Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ffn8192.tar.gz) + +## Example usage (torch.hub) + +#### Requirements + +We require a few additional Python dependencies for preprocessing: +```bash +pip install fastBPE sacremoses +``` + +#### Translation + +```python +import torch + +# English to German translation +en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt', + tokenizer='moses', bpe='fastbpe') +en2de.translate("Machine learning is great!") # 'Maschinelles Lernen ist großartig!' + +# German to English translation +de2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.de-en', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt', + tokenizer='moses', bpe='fastbpe') +de2en.translate("Maschinelles Lernen ist großartig!") # 'Machine learning is great!' + +# English to Russian translation +en2ru = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-ru', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt', + tokenizer='moses', bpe='fastbpe') +en2ru.translate("Machine learning is great!") # 'Машинное обучение - это здорово!' + +# Russian to English translation +ru2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.ru-en', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt', + tokenizer='moses', bpe='fastbpe') +ru2en.translate("Машинное обучение - это здорово!") # 'Machine learning is great!' +``` + +#### Language Modeling + +```python +# Sample from the English LM +en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe') +en_lm.sample("Machine learning is") # 'Machine learning is the future of computing, says Microsoft boss Satya Nadella ...' + +# Sample from the German LM +de_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.de', tokenizer='moses', bpe='fastbpe') +de_lm.sample("Maschinelles lernen ist") # 'Maschinelles lernen ist das A und O (neues-deutschland.de) Die Arbeitsbedingungen für Lehrerinnen und Lehrer sind seit Jahren verbesserungswürdig ...' + +# Sample from the Russian LM +ru_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.ru', tokenizer='moses', bpe='fastbpe') +ru_lm.sample("машинное обучение это") # 'машинное обучение это то, что мы называем "искусственным интеллектом".' +``` + +## Citation +```bibtex +@inproceedings{ng2019facebook}, + title = {Facebook FAIR's WMT19 News Translation Task Submission}, + author = {Ng, Nathan and Yee, Kyra and Baevski, Alexei and Ott, Myle and Auli, Michael and Edunov, Sergey}, + booktitle = {Proc. of WMT}, + year = 2019, +} +``` diff --git a/fairseq/examples/wmt20/README.md b/fairseq/examples/wmt20/README.md new file mode 100644 index 0000000..b4f2874 --- /dev/null +++ b/fairseq/examples/wmt20/README.md @@ -0,0 +1,72 @@ +# WMT 20 + +This page provides pointers to the models of Facebook-FAIR's WMT'20 news translation task submission [(Chen et al., 2020)](https://arxiv.org/abs/2011.08298). + +## Single best MT models (after finetuning on part of WMT20 news dev set) + +Model | Description | Download +---|---|--- +`transformer.wmt20.ta-en` | Ta->En | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta-en.single.tar.gz) +`transformer.wmt20.en-ta` | En->Ta | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-ta.single.tar.gz) +`transformer.wmt20.iu-en.news` | Iu->En (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.news.single.tar.gz) +`transformer.wmt20.en-iu.news` | En->Iu (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.news.single.tar.gz) +`transformer.wmt20.iu-en.nh` | Iu->En (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.nh.single.tar.gz) +`transformer.wmt20.en-iu.nh` | En->Iu (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.nh.single.tar.gz) + +## Language models +Model | Description | Download +---|---|--- +`transformer_lm.wmt20.en` | En Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en.tar.gz) +`transformer_lm.wmt20.ta` | Ta Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta.tar.gz) +`transformer_lm.wmt20.iu.news` | Iu Language Model (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu.news.tar.gz) +`transformer_lm.wmt20.iu.nh` | Iu Language Model (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu.nh.tar.gz) + +## Example usage (torch.hub) + +#### Translation + +```python +import torch + +# English to Tamil translation +en2ta = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.en-ta') +en2ta.translate("Machine learning is great!") # 'இயந்திரக் கற்றல் அருமை!' + +# Tamil to English translation +ta2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.ta-en') +ta2en.translate("இயந்திரக் கற்றல் அருமை!") # 'Machine learning is great!' + +# English to Inuktitut translation +en2iu = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.en-iu.news') +en2iu.translate("machine learning is great!") # 'ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ ᐱᐅᔪᒻᒪᕆᒃ!' + +# Inuktitut to English translation +iu2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.iu-en.news') +iu2en.translate("ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ ᐱᐅᔪᒻᒪᕆᒃ!") # 'Machine learning excellence!' +``` + +#### Language Modeling + +```python +# Sample from the English LM +en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.en') +en_lm.sample("Machine learning is") # 'Machine learning is a type of artificial intelligence that uses machine learning to learn from data and make predictions.' + +# Sample from the Tamil LM +ta_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.ta') +ta_lm.sample("இயந்திரக் கற்றல் என்பது செயற்கை நுண்ணறிவின்") # 'இயந்திரக் கற்றல் என்பது செயற்கை நுண்ணறிவின் ஒரு பகுதியாகும்.' + +# Sample from the Inuktitut LM +iu_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.iu.news') +iu_lm.sample("ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ") # 'ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ, ᐊᒻᒪᓗ ᓯᓚᐅᑉ ᐊᓯᙳᖅᐸᓪᓕᐊᓂᖓᓄᑦ ᖃᓄᐃᓕᐅᕈᑎᒃᓴᑦ, ᐃᓚᖃᖅᖢᑎᒃ ᐅᑯᓂᖓ:' +``` + +## Citation +```bibtex +@inproceedings{chen2020facebook + title={Facebook AI's WMT20 News Translation Task Submission}, + author={Peng-Jen Chen and Ann Lee and Changhan Wang and Naman Goyal and Angela Fan and Mary Williamson and Jiatao Gu}, + booktitle={Proc. of WMT}, + year={2020}, +} +``` diff --git a/fairseq/examples/wmt21/README.md b/fairseq/examples/wmt21/README.md new file mode 100644 index 0000000..524fffb --- /dev/null +++ b/fairseq/examples/wmt21/README.md @@ -0,0 +1,25 @@ +# WMT 21 + +This page provides pointers to the models of Facebook AI's WMT'21 news translation task submission [(Tran et al., 2021)](https://arxiv.org/abs/2108.03265). + +## Single best dense models + +Model | Description | Download +---|---|--- +`wmt21.dense-24-wide.X-En` | X-En | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt21.dense-24-wide.X-En.tar.gz) +`wmt21.dense-24-wide.En-X` | En-X | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt21.dense-24-wide.En-X.tar.gz) + +## Example usage + +See eval.sh + + +## Citation +```bibtex +@inproceedings{tran2021facebook + title={Facebook AI’s WMT21 News Translation Task Submission}, + author={Chau Tran and Shruti Bhosale and James Cross and Philipp Koehn and Sergey Edunov and Angela Fan}, + booktitle={Proc. of WMT}, + year={2021}, +} +``` diff --git a/fairseq/examples/wmt21/eval.sh b/fairseq/examples/wmt21/eval.sh new file mode 100644 index 0000000..b36d934 --- /dev/null +++ b/fairseq/examples/wmt21/eval.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +SRC=en +TGT=is +MODEL_NAME=wmt21.dense-24-wide.En-X + +PATH_TO_FAIRSEQ_PY=. +TMP_DIR=generation_tmp +mkdir -p $TMP_DIR + +REPLACE_UNICODE_PUNCT=$PATH_TO_FAIRSEQ_PY/examples/wmt21/scripts/replace-unicode-punctuation.perl +NORM_PUNCT=$PATH_TO_FAIRSEQ_PY/examples/wmt21/scripts/normalize-punctuation.perl +if [ ! -d "${TMP_DIR}/${MODEL_NAME}" ]; then + wget https://dl.fbaipublicfiles.com/fairseq/models/${MODEL_NAME}.tar.gz -P $TMP_DIR/ + tar -xvf $TMP_DIR/${MODEL_NAME}.tar.gz -C $TMP_DIR +fi +MODEL_DIR=$TMP_DIR/${MODEL_NAME} +if [ ! -d "${TMP_DIR}/wmt21-news-systems" ]; then + git clone https://github.com/wmt-conference/wmt21-news-systems $TMP_DIR/wmt21-news-systems +fi + +DOMAIN_TAG="wmtdata newsdomain" +INPUT_FILE=$TMP_DIR/wmt21-news-systems/txt/sources/newstest2021.${SRC}-${TGT}.src.${SRC} +REF_FILE=$TMP_DIR/wmt21-news-systems/txt/references/newstest2021.${SRC}-${TGT}.ref.A.${TGT} + +# Translate +cat ${INPUT_FILE} | sed "s/^/${DOMAIN_TAG} /" | $REPLACE_UNICODE_PUNCT | $NORM_PUNCT -l ${SRC} | python $PATH_TO_FAIRSEQ_PY/fairseq_cli/interactive.py $MODEL_DIR \ + --path ${MODEL_DIR}/checkpoint.pt \ + --task translation_multi_simple_epoch \ + --langs "en,ha,is,ja,cs,ru,zh,de" \ + --lang-pairs $SRC-$TGT \ + --bpe "sentencepiece" \ + --sentencepiece-model ${MODEL_DIR}/sentencepiece.model \ + --buffer-size 1024 \ + --batch-size 10 -s $SRC -t $TGT \ + --decoder-langtok \ + --encoder-langtok src \ + --beam 5 \ + --lenpen 1.0 \ + --fp16 > $TMP_DIR/${SRC}-${TGT}.gen_log + +cat $TMP_DIR/$SRC-$TGT.gen_log | grep -P "^D-" | cut -f3 > $TMP_DIR/$SRC-$TGT.hyp + +# Calculate BLEU score +sacrebleu -l $SRC-$TGT $REF_FILE < $TMP_DIR/$SRC-$TGT.hyp diff --git a/fairseq/examples/wmt21/scripts/normalize-punctuation.perl b/fairseq/examples/wmt21/scripts/normalize-punctuation.perl new file mode 100644 index 0000000..a7c0750 --- /dev/null +++ b/fairseq/examples/wmt21/scripts/normalize-punctuation.perl @@ -0,0 +1,90 @@ +#!/usr/bin/env perl +# +# This file is part of moses. Its use is licensed under the GNU Lesser General +# Public License version 2.1 or, at your option, any later version. + +use warnings; +use strict; + +my $language = "en"; +my $PENN = 0; + +while (@ARGV) { + $_ = shift; + /^-b$/ && ($| = 1, next); # not buffered (flush each line) + /^-l$/ && ($language = shift, next); + /^[^\-]/ && ($language = $_, next); + /^-penn$/ && ($PENN = 1, next); +} + +while(<STDIN>) { + s/\r//g; + # remove extra spaces + s/\(/ \(/g; + s/\)/\) /g; s/ +/ /g; + s/\) ([\.\!\:\?\;\,])/\)$1/g; + s/\( /\(/g; + s/ \)/\)/g; + s/(\d) \%/$1\%/g; + s/ :/:/g; + s/ ;/;/g; + # normalize unicode punctuation + if ($PENN == 0) { + s/\`/\'/g; + s/\'\'/ \" /g; + } + + s/„/\"/g; + s/“/\"/g; + s/”/\"/g; + s/–/-/g; + s/—/ - /g; s/ +/ /g; + s/´/\'/g; + s/([a-z])‘([a-z])/$1\'$2/gi; + s/([a-z])’([a-z])/$1\'$2/gi; + s/‘/\'/g; + s/‚/\'/g; + s/’/\"/g; + s/''/\"/g; + s/´´/\"/g; + s/…/.../g; + # French quotes + s/ « / \"/g; + s/« /\"/g; + s/«/\"/g; + s/ » /\" /g; + s/ »/\"/g; + s/»/\"/g; + # handle pseudo-spaces + s/ \%/\%/g; + s/nº /nº /g; + s/ :/:/g; + s/ ºC/ ºC/g; + s/ cm/ cm/g; + s/ \?/\?/g; + s/ \!/\!/g; + s/ ;/;/g; + s/, /, /g; s/ +/ /g; + + # English "quotation," followed by comma, style + if ($language eq "en") { + s/\"([,\.]+)/$1\"/g; + } + # Czech is confused + elsif ($language eq "cs" || $language eq "cz") { + } + # German/Spanish/French "quotation", followed by comma, style + else { + s/,\"/\",/g; + s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence + } + + + if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") { + s/(\d) (\d)/$1,$2/g; + } + else { + s/(\d) (\d)/$1.$2/g; + } + print $_; +} diff --git a/fairseq/examples/wmt21/scripts/replace-unicode-punctuation.perl b/fairseq/examples/wmt21/scripts/replace-unicode-punctuation.perl new file mode 100644 index 0000000..faed2cd --- /dev/null +++ b/fairseq/examples/wmt21/scripts/replace-unicode-punctuation.perl @@ -0,0 +1,55 @@ +#!/usr/bin/env perl +# +# This file is part of moses. Its use is licensed under the GNU Lesser General +# Public License version 2.1 or, at your option, any later version. + +use warnings; +use strict; + +while (@ARGV) { + $_ = shift; + /^-b$/ && ($| = 1, next); # not buffered (flush each line) +} + +#binmode(STDIN, ":utf8"); +#binmode(STDOUT, ":utf8"); + +while(<STDIN>) { + s/,/,/g; + s/。 */. /g; + s/、/,/g; + s/”/"/g; + s/“/"/g; + s/∶/:/g; + s/:/:/g; + s/?/\?/g; + s/《/"/g; + s/》/"/g; + s/)/\)/g; + s/!/\!/g; + s/(/\(/g; + s/;/;/g; + s/1/1/g; + s/」/"/g; + s/「/"/g; + s/0/0/g; + s/3/3/g; + s/2/2/g; + s/5/5/g; + s/6/6/g; + s/9/9/g; + s/7/7/g; + s/8/8/g; + s/4/4/g; + s/. */. /g; + s/~/\~/g; + s/’/\'/g; + s/…/\.\.\./g; + s/━/\-/g; + s/〈/\</g; + s/〉/\>/g; + s/【/\[/g; + s/】/\]/g; + s/%/\%/g; + print $_; +} diff --git a/fairseq/examples/womens_bios/README.md b/fairseq/examples/womens_bios/README.md new file mode 100644 index 0000000..07d0646 --- /dev/null +++ b/fairseq/examples/womens_bios/README.md @@ -0,0 +1,81 @@ +# Wikipedia Biographies of Women + + +## Training: + +The training dataset is created based on WikiSum, a dataset created from the paper [Generating Wikipedia by Summarizing Long Sequences](https://arxiv.org/pdf/1801.10198.pdf). The dataset needs to be generated following the instructions in this [Github Repository](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wikisum). + +### How is the WikiSum dataset structured? + +Overall, the task in WikiSum was to generate the entire Wikipedia article based on the contents of the top 10 Google Search Results. The authors provide a way for people to recreate their work. In the WikiSum Github, there are two options for the dataset recreation --- the first is to use CommonCrawl (a static, open source crawl of the web) and the second to do Live Web Fetches. The second has higher coverage, but the content is subject to change and difficult to fetch. We used the static, Commoncrawl version. This can be downloaded following the Github repo instructions, though note it will require usage of Google Cloud. + +Note: in our experience, it also requires requesting that the resource limit of the Google Cloud instance be raised, which requires emailing. + +Note: Having higher coverage in the training dataset would be expected to improve the model quality. There are many instances in the dataset where the training input (web evidence) does not contain sufficient content for producing the desired Wikipedia article. This may harm the model's ability to learn to retrieve, look at the input evidence, and overall could contribute to increased challenges in generating verifiable Wikipedia biographies. + +### How do you go from WikiSum dataset to Biography dataset? + +The WikiSum dataset is for Wikipedia in general, not just biographies. We do this by querying WikiData to see if the Wikipedia article has an occupation, with the thought that all articles with occupations are probably biographies. + + +## Evaluation: + +You can download the dataset and baseline model with the following command: + +``` +wget -N 'https://dl.fbaipublicfiles.com/fairseq/womenbios_dataset.zip' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe' +wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt' +``` + +We provide the full text Wikipedia articles split into four categories: +- Women in Africa +- Women in Asia +- Women in Science +- Women +We note that these are not exhaustive intersectional categories and mainly stem from personal interest. + +We also provide the URL of the Wikipedia article. Note that Wikipedia articles are constantly being improved, edited, and changed. Thus, it's completely possible that the Wikipedia article on Wikipedia has been lovingly improved by other Wikipedia editors. + +To get the occupations of each biographical subject, we use WikiData. We provide a sample script to do this. We also provide the raw output of this query. + +The final part of the evaluation dataset is to query web evidence for each of the biographical subjects. This is the part of the evaluation dataset that requires the most improvement. As we discuss in our paper, one of the major reasons why it is difficult to write biographies for sometimes very well qualified women is that there is not information online about them. Further, the search engine may not find it. We encourage others to improve upon this part of the data, as even re-querying again on the internet may find new, updated sources of information as the web is constantly evolving. + +We use the search engine from [Internet-Augmented Dialogue Generation](https://arxiv.org/abs/2107.07566), see [project URL](https://parl.ai/projects/sea/) to do the search queries. Note: we remove wikipedia site sources from our query (or we'd query the data itself). However, it's possible Wikipedia information can be copied around in multiple forms on the web, linked with edits, etc. + + +## Section by Section Generation: + +Wikipedia articles are split into sections, which are usually separated by headings. These headings can be separated in the article text by looking for these equal signs (==), where the number of equal signs usually signals if you are looking at a toplevel heading or a subheading, etc. An example regex that you can use is: + +` +section_header_re = re.compile(r"(?<!=)==([^=]+)==(?!=)") +` + + +## List of Notes: +- People can have multiple occupations, and we keep all occupations that we query from WikiData + + +## List of Possible Improvement Areas: +Using a larger generative pre-trained model, larger-scale retrieval, a retrieval encoder specialized to Wikipedia (or biographies), tuning all of the training & generation parameters exhaustively --- and the like --- would most likely be very useful. Overall, we hope that this is a starting point for others who might be interested in focusing on how we can help address the gender gap on Wikipedia. + + +## Interested in Wikipedia and Gender Gap? +You might want to check out: +- https://humaniki.wmcloud.org/ +- https://en.wikipedia.org/wiki/Wikipedia:WikiProject_Women_in_Red and https://wikimediafoundation.org/news/2018/10/18/women-in-red-wikiproject/ +- https://meta.wikimedia.org/wiki/Whose_Knowledge%3F/VisibleWikiWomen +- https://www.ted.com/talks/jess_wade_a_voice_for_diversity_in_science + +and thanks again to all of the Wikipedia editors and the entire community that is already working so hard to write amazing articles for diverse groups of people. + + +# LICENSE +This is licensed under CC-BY-NC, however portions of the dataset are available under separate license terms: text sourced from Wikipedia is licensed under CC-BY-SA. + + + + + diff --git a/fairseq/examples/womens_bios/query_occupations_from_wikidata.py b/fairseq/examples/womens_bios/query_occupations_from_wikidata.py new file mode 100644 index 0000000..8028c6e --- /dev/null +++ b/fairseq/examples/womens_bios/query_occupations_from_wikidata.py @@ -0,0 +1,34 @@ +import sys +from SPARQLWrapper import SPARQLWrapper, JSON + +endpoint_url = "https://query.wikidata.org/sparql" + +with open("/your/urls/here") as f: + data = f.readlines() +urls = [i.strip() for i in data] + +def get_results(endpoint_url, URL): + query = f"""SELECT ?uriLabel ?occupation ?occupationLabel ?dob ?dobLabel WHERE {{ + <{URL}> schema:about ?uri . + ?uri wdt:P106 ?occupation . + SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }} + }}""" + user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) + sparql = SPARQLWrapper(endpoint_url, agent=user_agent) + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + return sparql.query().convert() + +all_occupations = [] +for URL in urls: + results = get_results(endpoint_url, URL) + occupations = [] + for result in results["results"]["bindings"]: + occupations.append(result['occupationLabel']['value']) + all_occupations.append(result['uriLabel']['value'] + ", " + ", ".join(occupations)) + +assert(len(all_occupations) == len(urls)) + +with open("/your/file/output/here", "w") as o: + for line in all_occupations: + o.write(line.strip() + "\n") \ No newline at end of file diff --git a/fairseq/examples/xformers/README.md b/fairseq/examples/xformers/README.md new file mode 100644 index 0000000..400a74d --- /dev/null +++ b/fairseq/examples/xformers/README.md @@ -0,0 +1,43 @@ +# Using xFormers with FairSeq + +[xFormers](https://github.com/facebookresearch/xformers) is a xFormers is a modular library for flexibly generating transformer architectures with interoperable and optimized building blocks. +The current integration allows for FairSeq users to use an attention variant available in the xFormers repository. + +In order to enable xFormers, all that needs to be passed in is a string representing an [xFormers attention config](https://github.com/facebookresearch/xformers/blob/5f754129bfb1ea53747b1ab2077261ea762faa47/xformers/components/attention/base.py#L18). + +The various attention variants can be found [here](https://github.com/facebookresearch/xformers/tree/main/xformers/components/attention). +These include sparse attention and blocksparse attention. + +For example, you could pass in the following args: + ```python +decoder_xformers_att_config = '{"name": "scaled_dot_product"}' + +encoder_xformers_att_config = '{"name": "linformer", "seq_len": "256"}' + ``` + +In order to use blocksparse attention you would have to additionally pass in a blocksparse layout and blocksize. For example: + + ```python + + xformers_att_config = '{"name": "scaled_dot_product"}' + xformers_blocksparse_blocksize = 16 + xformers_blocksparse_layout = torch.ones( + seq_len // xformers_blocksparse_blocksize, + seq_len // xformers_blocksparse_blocksize, + ) + + xf_blocksparse_mha = ( + MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + add_zero_attn=add_zero_attn, + xformers_att_config=xformers_att_config, + xformers_blocksparse_layout=xformers_blocksparse_layout, + xformers_blocksparse_blocksize=xformers_blocksparse_blocksize, + ) + + ``` + +The xFormers repository currenlty has benchmarks on the [runtime](https://github.com/facebookresearch/xformers/blob/main/docs/plots/runtime_vs_attention.png) +and [memory usage](https://github.com/facebookresearch/xformers/blob/main/docs/plots/memory_vs_attention.png) of the various attentions. diff --git a/fairseq/examples/xglm/README.md b/fairseq/examples/xglm/README.md new file mode 100644 index 0000000..914e297 --- /dev/null +++ b/fairseq/examples/xglm/README.md @@ -0,0 +1,195 @@ +# Few-shot Learning with Multilingual Language Models + +## Introduction + +In this work, we train a family of multilingual generative language models, dubbed XGLM, on a balanced corpus covering a diverse set of languages, and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters sets new state of the art in few-shot learning on more than 20 representative languages, outperforming GPT-3 of comparable size in multilingual commonsense reasoning (+7.4 accuracy points for 0-shot, +9.4 for 4-shot) and natural language inference (+5.4 for 0-shot, +5.4 for 4-shot). We have included a [model card](model_card.md) of XGLM for transparency and accountability. + +## Data and Languages +XGLM models are trained on a new multilingual corpus extracted from CommonCrawl (CC100-XL), a significantly larger multilingual dataset covering 68 Common Crawl (CC) snapshots (from [Summer 2013](http://commoncrawl.org/2013/11/new-crawl-data-available/) to [March/April 2020](https://commoncrawl.org/2020/04/march-april-2020-crawl-archive-now-available/) consisting of 134 languages. The detailed languages and data statistics are reported in the paper (Table A.1). + +## Pre-trained models + +Model | Layers | Model Dim | FFN Dim | Languages | Download +---|---|---|---|---|--- +`XGLM 564M` | 24 | 1024 | 4096 | trained on 30 languages| [xglm.564M.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xglm/xglm.564M.tar.gz) +`XGLM 1.7B` | 24 | 2048 | 8192 | trained on 30 languages| [xglm.1.7B.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xglm/xglm.1.7B.tar.gz) +`XGLM 2.9B` | 48 | 2048 | 8192 | trained on 30 languages| [xglm.2.9B.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xglm/xglm.2.9B.tar.gz) +`XGLM 7.5B` | 32 | 4096 | 16384 | trained on 30 languages| [xglm.7.5B.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xglm/xglm.7.5B.tar.gz) +`XGLM 4.5B` | 48 | 2048 | 16384 | trained on 134 languages| [xglm.4.5B.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xglm/xglm.4.5B.tar.gz) + +## Pre-training Data Format +Our models were pre-trained with data in the following format (i.e. paragraphs are separated with new lines and documents were separated with double new lines). +``` +<doc0,para0,tok0> ... <doc0,para0,tokX0> # X0: number of tokens in para0 of doc0 +<doc0,para1,tok0> ... <doc0,para1,tokY0> # Y0: number of tokens in para1 of doc0 + +<doc1,para0,tok0> ... <doc1,para0,tokX1> # X1: number of tokens in para0 of doc1 +<doc1,para1,tok0> ... <doc1,para1,tokY1> # Y1: number of tokens in para1 of doc1 + +... +``` +Fairseq's preprocessing replaces newlines with the end-of-sentence symbol (`</s>`). As a result, the models never saw newline characters during pretraining and the same preprocessing should be run prior to few-shot inference to maximize performance. For example, our language model scoring function has `replace_newlines_with_eos` argument to trigger this preprocessing: +```python +from fairseq.models.transformer_lm import TransformerLanguageModel + +model_dir = 'path_to_decompressed_tar_gz_dir' +lm = TransformerLanguageModel.from_pretrained(model_dir, bpe='sentencepiece') + +text = """First paragraph of the first document. +Second paragraph of the first document. + +First paragraph of the second document. +""" +tokens = lm.score(text, replace_newlines_with_eos=True)['tokens'] +assert '\n' not in lm.decode(tokens) # no newlines were encoded +``` + +## Evaluation + +### Example (COPA) + +The following snippet show how to evaluate our models on the Choice of Plausible Alternatives (COPA) task, using examples in English, Chinese and Hindi. + +```python +data_samples = { + 'en': [ + { + "premise": "I wanted to conserve energy.", + "choice1": "I swept the floor in the unoccupied room.", + "choice2": "I shut off the light in the unoccupied room.", + "question": "effect", + "label": "1" + }, + { + "premise": "The flame on the candle went out.", + "choice1": "I blew on the wick.", + "choice2": "I put a match to the wick.", + "question": "cause", + "label": "0" + } + ], + 'zh': [ + { + "premise": "我想节约能源。", + "choice1": "我在空着的房间里扫了地板。", + "choice2": "我把空房间里的灯关了。", + "question": "effect", + "label": "1" + }, + { + "premise": "蜡烛上的火焰熄灭了。", + "choice1": "我吹灭了灯芯。", + "choice2": "我把一根火柴放在灯芯上。", + "question": "cause", + "label": "0" + } + ], + 'hi': [ + { + "premise": "M te vle konsève enèji.", + "choice1": "Mwen te fin baleye chanm lib la.", + "choice2": "Mwen te femen limyè nan chanm lib la.", + "question": "effect", + "label": "1" + }, + { + "premise": "Flam bouji a te etenn.", + "choice1": "Mwen te soufle bouji a.", + "choice2": "Mwen te limen mèch bouji a.", + "question": "cause", + "label": "0" + } + ] +} +``` +In this example, we format the examples use the non-verbal prompts `{premise}\n{choice1}` and `{premise}\n{choice2}`, which are shared by all three languages. +```python +from fairseq.models.transformer_lm import TransformerLanguageModel + +model_dir = 'path_to_decompressed_tar_gz_dir' +lm = TransformerLanguageModel.from_pretrained(model_dir, bpe='sentencepiece') +lm = lm.eval() +lm = lm.half() +lm = lm.cuda() + +def get_logprobs(prompt): + import re + prompt = re.sub('\n+' , '\n', prompt) # collapse repeated newlines, which indicate separate documents + return lm.score(prompt, replace_newlines_with_eos=True)['positional_scores'] + +# Zero-shot evaluation for the Choice of Plausible Alternatives (COPA) task. +# A return value of 0 indicates that the first alternative is more plausible, +# while 1 indicates that the second alternative is more plausible. +def COPA_eval(prompt, alternative1, alternative2): + lprob1 = get_logprobs(prompt + "\n" + alternative1).sum() + lprob2 = get_logprobs(prompt + "\n" + alternative2).sum() + return 0 if lprob1 > lprob2 else 1 + +for lang in ['en', 'zh', 'hi']: + for idx, example in enumerate(data_samples[lang]): + predict = COPA_eval(example["premise"], example["choice1"], example["choice2"]) + print(f'{lang}-{idx}', predict, example['label']) + +# en-0 1 1 +# en-1 0 0 +# zh-0 1 1 +# zh-1 0 0 +# hi-0 1 1 +# hi-1 0 0 +``` + +## XStoryCloze + +We release XStoryCloze, a new multilingual dataset intended for few-shot evaluation, alongside this paper. XStoryCloze consists of professional translation of the validation split of the [English StoryCloze dataset](https://cs.rochester.edu/nlp/rocstories/) (Spring 2016 version) to 10 other languages. It is opensourced under [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/legalcode), the same license as the English StoryCloze. + +You can download the dataset via [this link](https://dl.fbaipublicfiles.com/xstorycloze.zip). + +Language | ar | es | eu | hi | id | my | ru | sw | te | zh +---|---|---|---|---|---|---|---|---|---|--- +Train size | 360 | 360 | 360 | 360 | 360 | 360 | 360 | 360 | 360 | 360 +Eval size | 1511 | 1511 | 1511 | 1511 | 1511 | 1511 | 1511 | 1511 | 1511 | 1511 + +Please refer to [the dataset doc](XStoryCloze.md) for more information. + + +## Publication +[Few-shot Learning with Multilingual Generative Language Models](https://arxiv.org/abs/2112.10668). +Xi Victoria Lin*, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li* (* Equal Contribution). +EMNLP 2022. + +## Citation +``` +@article{DBLP:journals/corr/abs-2112-10668, + author = {Xi Victoria Lin and + Todor Mihaylov and + Mikel Artetxe and + Tianlu Wang and + Shuohui Chen and + Daniel Simig and + Myle Ott and + Naman Goyal and + Shruti Bhosale and + Jingfei Du and + Ramakanth Pasunuru and + Sam Shleifer and + Punit Singh Koura and + Vishrav Chaudhary and + Brian O'Horo and + Jeff Wang and + Luke Zettlemoyer and + Zornitsa Kozareva and + Mona T. Diab and + Veselin Stoyanov and + Xian Li}, + title = {Few-shot Learning with Multilingual Language Models}, + journal = {CoRR}, + volume = {abs/2112.10668}, + year = {2021}, + url = {https://arxiv.org/abs/2112.10668}, + eprinttype = {arXiv}, + eprint = {2112.10668}, + timestamp = {Tue, 04 Jan 2022 15:59:27 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2112-10668.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` diff --git a/fairseq/examples/xglm/XStoryCloze.md b/fairseq/examples/xglm/XStoryCloze.md new file mode 100644 index 0000000..9b0fce0 --- /dev/null +++ b/fairseq/examples/xglm/XStoryCloze.md @@ -0,0 +1,57 @@ +XStoryCloze consists of professional translation of the validation split of the [English StoryCloze dataset](https://cs.rochester.edu/nlp/rocstories/) (Spring 2016 version) to 10 other languages. This dataset is released by FAIR (Fundamental Artificial Intelligence Research) alongside the paper [Few-shot Learning with Multilingual Generative Language Models. EMNLP 2022](https://arxiv.org/abs/2112.10668). + +# Languages +ru, zh (Simplified), es (Latin America), ar, hi, id, te, sw, eu, my. + +# Data Splits +This dataset is intended to be used for evaluating the zero- and few-shot learning capabilities of multlingual language models. We split the data for each language into train and test (360 vs. 1510 examples, respectively). The released data files for different languages maintain a line-by-line alignment. + +# Access English StoryCloze +Please request the original English StoryCloze dataset through the [official website](https://cs.rochester.edu/nlp/rocstories/). You can create a split of the en data following our data split scheme using the following commands: +``` +head -361 spring2016.val.tsv > spring2016.val.en.tsv.split_20_80_train.tsv + +head -1 spring2016.val.tsv > spring2016.val.en.tsv.split_20_80_eval.tsv # TSV header +tail -1511 spring2016.val.tsv >> spring2016.val.en.tsv.split_20_80_eval.tsv +``` + +# Licence +XStoryCloze is opensourced under [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/legalcode), the same license as the original English StoryCloze. + +# Citation +We hope this dataset is helpful for the research and wider NLP community. If you use XStoryCloze in your work, please cite +``` +@article{DBLP:journals/corr/abs-2112-10668, + author = {Xi Victoria Lin and + Todor Mihaylov and + Mikel Artetxe and + Tianlu Wang and + Shuohui Chen and + Daniel Simig and + Myle Ott and + Naman Goyal and + Shruti Bhosale and + Jingfei Du and + Ramakanth Pasunuru and + Sam Shleifer and + Punit Singh Koura and + Vishrav Chaudhary and + Brian O'Horo and + Jeff Wang and + Luke Zettlemoyer and + Zornitsa Kozareva and + Mona T. Diab and + Veselin Stoyanov and + Xian Li}, + title = {Few-shot Learning with Multilingual Language Models}, + journal = {CoRR}, + volume = {abs/2112.10668}, + year = {2021}, + url = {https://arxiv.org/abs/2112.10668}, + eprinttype = {arXiv}, + eprint = {2112.10668}, + timestamp = {Tue, 04 Jan 2022 15:59:27 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2112-10668.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` diff --git a/fairseq/examples/xglm/model_card.md b/fairseq/examples/xglm/model_card.md new file mode 100644 index 0000000..2656ec5 --- /dev/null +++ b/fairseq/examples/xglm/model_card.md @@ -0,0 +1,152 @@ +# XGLM multilingual model +## Version 1.0.0 + +### Model developer +FAIR (Fundamental Artificial Intelligence Research) + +### Model type +A family of multilingual autoregressive language models (ranging from 564 million to 7.5 billion parameters) trained on a balanced corpus of a diverse set of languages. The language model can learn tasks from natural language descriptions and a few examples. + +### Model Feedback Channel +https://github.com/pytorch/fairseq + +## Intended use +### Primary intended use +For research purposes only, e.g. reproducing model evaluation results. Generation is only used in a limited capacity for explanation/justification or for prompting/probing/priming for class labels. + +### Out of scope uses +The primary purpose of the model is not to generate language, although the model is capable of doing that. + +## Potential risks +This section lists the potential risks associated with using the model. + +### Relevant factors +Based on known problems with NLP technology, potential relevant factors include output correctness, robustness, bias (gender, profession, race and religion), etc. + +### Evaluation factors +The model was evaluated on hate speech detection and occupation identification. +* Hate speech detection (Huang et al. (2020)) - A safety task to test language models’ ability to identify hateful and offensive text. +* Occupation identification (De-Arteaga et al., 2019), (Zhao et al., 2020) - A bias task to study language models’ performance divergence between different gender groups on the task of occupation identification. + +## Metrics +### Model performance measures +The XGLM model was primarily evaluated on +1. Zero shot and few shot learning by looking at per-language performance on tasks spanning commonsense reasoning (XCOPA, XWinograd), natural language inference (XNLI) and paraphrasing (PAWS-X). The model is also evaluated on XStoryCloze, a new dataset created by FAIR (Fundamental Artificial Intelligence Research). +2. Cross lingual transfer through templates and few-shot examples. +3. Knowledge probing - Evaluate to what extent the XGLM model can effectively store factual knowledge in different languages using the mLAMA benchmark. +4. Translation - We report machine translation results on WMT benchmarks and a subset of FLORES-101 in the main paper. + +The model was also evaluated on hate speech datasets introduced by Huang et al. (2020) and an occupation identification dataset by De-Arteaga et al. 2019 to identify bias in the model. + +### Approaches to handle uncertainty +Report confidence intervals, variance metrics for the model performance metrics. Few-shot evaluation was conducted with different sampling with 5 seeds. We reported statistical significance. + +## Evaluation data +## Zero Shot and Few Shot evaluation + +### XNLI (Conneau et al., 2018) +#### Description +The Cross-lingual Natural Language Inference (XNLI) corpus is the extension of the Multi-Genre NLI (MultiNLI) corpus to 15 languages. The dataset was created by manually translating the validation and test sets of MultiNLI into each of those 15 languages. + +### XStoryCloze +#### Description +A new dataset created by FAIR along side this work by translating the validation split of the English StoryCloze dataset (Mostafazadeh et al., 2016) (Spring 2016 version) to 10 other typologically diverse languages (ru, zh Simplified, es Latin America, ar, hi, id, te, sw, eu, my). + +### XCOPA (Ponti et al., 2020) +#### Description +The Cross-lingual Choice of Plausible Alternatives (XCOPA) dataset is a benchmark to evaluate the ability of machine learning models to transfer commonsense reasoning across languages. The dataset is the translation and reannotation of the English COPA (Roemmele et al. 2011) and covers 11 languages from 11 families and several areas around the globe. + +### XWinograd (Tikhonov and Ryabinin, 2021) +#### Description +XWinograd is a multilingual collection of Winograd Schemas in six languages that can be used for evaluation of cross-lingual commonsense reasoning capabilities. + +### PAWS-X (Yang et al., 2019) +#### Description +PAWS-X contains 23,659 human translated PAWS evaluation pairs and 296,406 machine translated training pairs in six typologically distinct languages: French, Spanish, German, Chinese, Japanese, and Korean. All translated pairs are sourced from examples in PAWS-Wiki. + +## Responsible AI (RAI) evaluation +### Hate speech (Huang et al. 2020) +This is a multilingual Twitter corpus for the task of hate speech detection with inferred four author demographic factors: age, country, gender and race/ethnicity. The corpus covers five languages: English, Italian, Polish, Portuguese and Spanish. + +### Bias dataset (De-Arteaga et al. 2019) +The aim of this dataset is to study the gender bias of models that identify a person’s occupation from their bios. + +---- + +## Training data +### CC100-XL +#### Description +Following the recent success of multilingual self-supervised pre-training (Devlin et al., 2019; Lample and Conneau, 2019; Con; Xue et al., 2020; Goyal et al., 2021a; Liu et al., 2020), we train our language models on a mixture of monolingual text of different languages. We extended the pipeline used for mining the CC100 corpus to generate CC100-XL, a significantly larger multilingual dataset covering 68 Common Crawl snapshots (from Summer 2013 to March/April 2020) and 134 languages. + +More details on the CC100-XL dataset can be found in the Appendix section of the paper. + +## RAI Dimensions +### Fairness (Bias and inclusion) +The XGLM model was evaluated on Hate speech and bias identification datasets. For hate speech, we observe that across the 5 languages in the dataset, in context learning results are only slightly better than random (50%). Another interesting observation is that most few shot results are worse than zero-shot, which indicates that the model is not able to utilize examples using the templates described in the paper. For bias identification, the XGLM (6.7B) English only model achieves the best performance on English and Spanish, while the GPT-3 model of comparable size (6.7B) model achieves the best in French. On certain occupations (e.g. model and teacher), XGLM 6.7B En only model and GPT-3 (6.7B) have very significant bias while XGLM 7.5B is much less biased. + +### Privacy and security +The XGLM model did not have any special Privacy and Security considerations. The training data and evaluation data were both public and went through standard Meta privacy and licensing procedures. + +### Transparency and control +In the spirit of transparency and accountability we have created this model card and a data card for the CC100-XL which can be found in the Appendix section of the paper. + +### Efficiency (Green AI) +From an engineering perspective, XGLM pertains to a family of models that represent single unified models catering to many languages which have wide application across many applications. Such a unified single model saves on carbon footprint as well as energy consumption (comparing to the alternative: separate models for different languages) leading to more energy efficiency. A single model, despite having the risk of being a single point of failure, has the powerful incentive of being easier to maintain, access, distribute, and track. + +## References +Edoardo Maria Ponti, Goran Glavas, Olga Majewska, Qianchu Liu, Ivan Vulic, and Anna Korhonen. 2020. XCOPA: A multilingual dataset for causal commonsense reasoning. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP 2020, Online, November 16-20, 2020, pages 2362–2376. Association for Computational Linguistics. +XCOPA Dataset | Papers With Code + +Alexey Tikhonov and Max Ryabinin. 2021. It’s all in the heads: Using attention heads as a baseline for cross-lingual transfer in commonsense reasoning. In Findings of the Association for Computational Linguistics: ACL/IJCNLP 2021, Online Event, August 1-6, 2021, volume ACL/IJCNLP 2021 of Findings of ACL, pages 3534–3546. Association for Computational Linguistics. +XWINO Dataset | Papers With Code (XWinograd) + +Yinfei Yang, Yuan Zhang, Chris Tar, and Jason Baldridge. 2019. PAWS-X: A cross-lingual adversarial dataset for paraphrase identification. CoRR, abs/1908.11828. +PAWS-X Dataset | Papers With Code + +Alexis Conneau, Guillaume Lample, Ruty Rinott, Adina Williams, Samuel R. Bowman, Holger Schwenk, and Veselin Stoyanov. 2018. XNLI: evaluating cross-lingual sentence representations. CoRR, abs/1809.05053. +XNLI Dataset | Papers With Code + +Xiaolei Huang, Linzi Xing, Franck Dernoncourt, and Michael Paul. 2020. Multilingual twitter corpus and baselines for evaluating demographic bias in hate speech recognition. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 1440–1448. + +Maria De-Arteaga, Alexey Romanov, Hanna Wallach, Jennifer Chayes, Christian Borgs, Alexandra Chouldechova, Sahin Geyik, Krishnaram Kenthapadi, and Adam Tauman Kalai. 2019. Bias in bios: A case study of semantic representation bias in a high-stakes setting. In proceedings of the Conference on Fairness, Accountability, and Transparency, pages 120–128. + +Nasrin Mostafazadeh, Nathanael Chambers, Xiaodong He, Devi Parikh, Dhruv Batra, Lucy Vanderwende, Pushmeet Kohli, James F. Allen. A Corpus and Evaluation Framework for Deeper Understanding of Commonsense Stories. CoRR abs/1604.01696. + +Jieyu Zhao, Subhabrata Mukherjee, Saghar Hosseini, Kai-Wei Chang, and Ahmed Hassan Awadallah. 2020. Gender bias in multilingual embeddings and crosslingual transfer. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 2896–2907. + +## Citation details +``` +@article{DBLP:journals/corr/abs-2112-10668, + author = {Xi Victoria Lin and + Todor Mihaylov and + Mikel Artetxe and + Tianlu Wang and + Shuohui Chen and + Daniel Simig and + Myle Ott and + Naman Goyal and + Shruti Bhosale and + Jingfei Du and + Ramakanth Pasunuru and + Sam Shleifer and + Punit Singh Koura and + Vishrav Chaudhary and + Brian O'Horo and + Jeff Wang and + Luke Zettlemoyer and + Zornitsa Kozareva and + Mona T. Diab and + Veselin Stoyanov and + Xian Li}, + title = {Few-shot Learning with Multilingual Language Models}, + journal = {CoRR}, + volume = {abs/2112.10668}, + year = {2021}, + url = {https://arxiv.org/abs/2112.10668}, + eprinttype = {arXiv}, + eprint = {2112.10668}, + timestamp = {Tue, 04 Jan 2022 15:59:27 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2112-10668.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` diff --git a/fairseq/examples/xlmr/README.md b/fairseq/examples/xlmr/README.md new file mode 100644 index 0000000..bba7910 --- /dev/null +++ b/fairseq/examples/xlmr/README.md @@ -0,0 +1,144 @@ +# Unsupervised Cross-lingual Representation Learning at Scale (XLM-RoBERTa) +https://arxiv.org/pdf/1911.02116.pdf + +# Larger-Scale Transformers for Multilingual Masked Language Modeling +https://arxiv.org/pdf/2105.00572.pdf + + +## What's New: +- June 2021: `XLMR-XL` AND `XLMR-XXL` models released. + +## Introduction + +`XLM-R` (`XLM-RoBERTa`) is a generic cross lingual sentence encoder that obtains state-of-the-art results on many cross-lingual understanding (XLU) benchmarks. It is trained on `2.5T` of filtered CommonCrawl data in 100 languages (list below). + + Language | Language|Language |Language | Language +---|---|---|---|--- +Afrikaans | Albanian | Amharic | Arabic | Armenian +Assamese | Azerbaijani | Basque | Belarusian | Bengali +Bengali Romanize | Bosnian | Breton | Bulgarian | Burmese +Burmese zawgyi font | Catalan | Chinese (Simplified) | Chinese (Traditional) | Croatian +Czech | Danish | Dutch | English | Esperanto +Estonian | Filipino | Finnish | French | Galician +Georgian | German | Greek | Gujarati | Hausa +Hebrew | Hindi | Hindi Romanize | Hungarian | Icelandic +Indonesian | Irish | Italian | Japanese | Javanese +Kannada | Kazakh | Khmer | Korean | Kurdish (Kurmanji) +Kyrgyz | Lao | Latin | Latvian | Lithuanian +Macedonian | Malagasy | Malay | Malayalam | Marathi +Mongolian | Nepali | Norwegian | Oriya | Oromo +Pashto | Persian | Polish | Portuguese | Punjabi +Romanian | Russian | Sanskrit | Scottish Gaelic | Serbian +Sindhi | Sinhala | Slovak | Slovenian | Somali +Spanish | Sundanese | Swahili | Swedish | Tamil +Tamil Romanize | Telugu | Telugu Romanize | Thai | Turkish +Ukrainian | Urdu | Urdu Romanize | Uyghur | Uzbek +Vietnamese | Welsh | Western Frisian | Xhosa | Yiddish + +## Pre-trained models + +Model | Description | #params | vocab size | Download +---|---|---|---|--- +`xlmr.base` | XLM-R using the BERT-base architecture | 250M | 250k | [xlm.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz) +`xlmr.large` | XLM-R using the BERT-large architecture | 560M | 250k | [xlm.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz) +`xlmr.xl` | XLM-R (`layers=36, model_dim=2560`) | 3.5B | 250k | [xlm.xl.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xl.tar.gz) +`xlmr.xxl` | XLM-R (`layers=48, model_dim=4096`) | 10.7B | 250k | [xlm.xxl.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xxl.tar.gz) + +## Results + +**[XNLI (Conneau et al., 2018)](https://arxiv.org/abs/1809.05053)** + +Model | average | en | fr | es | de | el | bg | ru | tr | ar | vi | th | zh | hi | sw | ur +---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|--- +`roberta.large.mnli` _(TRANSLATE-TEST)_ | 77.8 | 91.3 | 82.9 | 84.3 | 81.2 | 81.7 | 83.1 | 78.3 | 76.8 | 76.6 | 74.2 | 74.1 | 77.5 | 70.9 | 66.7 | 66.8 +`xlmr.large` _(TRANSLATE-TRAIN-ALL)_ | 83.6 | 89.1 | 85.1 | 86.6 | 85.7 | 85.3 | 85.9 | 83.5 | 83.2 | 83.1 | 83.7 | 81.5 | 83.7 | 81.6 | 78.0 | 78.1 +`xlmr.xl` _(TRANSLATE-TRAIN-ALL)_ | 85.4 | 91.1 | 87.2 | 88.1 | 87.0 | 87.4 | 87.8 | 85.3 | 85.2 | 85.3 | 86.2 | 83.8 | 85.3 | 83.1 | 79.8 | 78.2 | 85.4 +`xlmr.xxl` _(TRANSLATE-TRAIN-ALL)_ | 86.0 | 91.5 | 87.6 | 88.7 | 87.8 | 87.4 | 88.2 | 85.6 | 85.1 | 85.8 | 86.3 | 83.9 | 85.6 | 84.6 | 81.7 | 80.6 + +**[MLQA (Lewis et al., 2018)](https://arxiv.org/abs/1910.07475)** + +Model | average | en | es | de | ar | hi | vi | zh +---|---|---|---|---|---|---|---|--- +`BERT-large` | - | 80.2/67.4 | - | - | - | - | - | - +`mBERT` | 57.7 / 41.6 | 77.7 / 65.2 | 64.3 / 46.6 | 57.9 / 44.3 | 45.7 / 29.8| 43.8 / 29.7 | 57.1 / 38.6 | 57.5 / 37.3 +`xlmr.large` | 70.7 / 52.7 | 80.6 / 67.8 | 74.1 / 56.0 | 68.5 / 53.6 | 63.1 / 43.5 | 69.2 / 51.6 | 71.3 / 50.9 | 68.0 / 45.4 +`xlmr.xl` | 73.4 / 55.3 | 85.1 / 72.6 | 66.7 / 46.2 | 70.5 / 55.5 | 74.3 / 56.9 | 72.2 / 54.7 | 74.4 / 52.9 | 70.9 / 48.5 +`xlmr.xxl` | 74.8 / 56.6 | 85.5 / 72.4 | 68.6 / 48.4 | 72.7 / 57.8 | 75.4 / 57.6 | 73.7 / 55.8 | 76.0 / 55.0 | 71.7 / 48.9 + + +## Example usage + +##### Load XLM-R from torch.hub (PyTorch >= 1.1): +```python +import torch +xlmr = torch.hub.load('pytorch/fairseq:main', 'xlmr.large') +xlmr.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Load XLM-R (for PyTorch 1.0 or custom models): +```python +# Download xlmr.large model +wget https://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz +tar -xzvf xlmr.large.tar.gz + +# Load the model in fairseq +from fairseq.models.roberta import XLMRModel +xlmr = XLMRModel.from_pretrained('/path/to/xlmr.large', checkpoint_file='model.pt') +xlmr.eval() # disable dropout (or leave in train mode to finetune) +``` + +##### Apply sentence-piece-model (SPM) encoding to input text: +```python +en_tokens = xlmr.encode('Hello world!') +assert en_tokens.tolist() == [0, 35378, 8999, 38, 2] +xlmr.decode(en_tokens) # 'Hello world!' + +zh_tokens = xlmr.encode('你好,世界') +assert zh_tokens.tolist() == [0, 6, 124084, 4, 3221, 2] +xlmr.decode(zh_tokens) # '你好,世界' + +hi_tokens = xlmr.encode('नमस्ते दुनिया') +assert hi_tokens.tolist() == [0, 68700, 97883, 29405, 2] +xlmr.decode(hi_tokens) # 'नमस्ते दुनिया' + +ar_tokens = xlmr.encode('مرحبا بالعالم') +assert ar_tokens.tolist() == [0, 665, 193478, 258, 1705, 77796, 2] +xlmr.decode(ar_tokens) # 'مرحبا بالعالم' + +fr_tokens = xlmr.encode('Bonjour le monde') +assert fr_tokens.tolist() == [0, 84602, 95, 11146, 2] +xlmr.decode(fr_tokens) # 'Bonjour le monde' +``` + +##### Extract features from XLM-R: +```python +# Extract the last layer's features +last_layer_features = xlmr.extract_features(zh_tokens) +assert last_layer_features.size() == torch.Size([1, 6, 1024]) + +# Extract all layer's features (layer 0 is the embedding layer) +all_layers = xlmr.extract_features(zh_tokens, return_all_hiddens=True) +assert len(all_layers) == 25 +assert torch.all(all_layers[-1] == last_layer_features) +``` + +## Citation + +```bibtex +@article{conneau2019unsupervised, + title={Unsupervised Cross-lingual Representation Learning at Scale}, + author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, + journal={arXiv preprint arXiv:1911.02116}, + year={2019} +} +``` + + +```bibtex +@article{goyal2021larger, + title={Larger-Scale Transformers for Multilingual Masked Language Modeling}, + author={Goyal, Naman and Du, Jingfei and Ott, Myle and Anantharaman, Giri and Conneau, Alexis}, + journal={arXiv preprint arXiv:2105.00572}, + year={2021} +} +``` diff --git a/fairseq/examples/xmod/README.md b/fairseq/examples/xmod/README.md new file mode 100644 index 0000000..46958b8 --- /dev/null +++ b/fairseq/examples/xmod/README.md @@ -0,0 +1,151 @@ +# X-MOD: Lifting the Curse of Multilinguality by Pre-training Modular Transformers + +https://arxiv.org/abs/2205.06266 + + +## Introduction + +X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components, introduced at each transformer layer. Each module is only used by one language. For fine-tuning, the modular components are frozen, and replaced with the target language in cross-lingual transfer settings. + + +## Pre-trained models + +Model | Size | # train steps | # langs | Download +---|---|---|---|--- +`xmod.base.13.125k` | BERT-base | 125k | 13 | [xmod.base.13.125k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.13.125k.tar.gz) +`xmod.base.30.125k` | BERT-base | 125k | 30 | [xmod.base.30.125k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.30.125k.tar.gz) +`xmod.base.30.195k` | BERT-base | 195k | 30 | [xmod.base.30.195k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.30.195k.tar.gz) +`xmod.base.60.125k` | BERT-base | 125k | 60 | [xmod.base.60.125k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.60.125k.tar.gz) +`xmod.base.60.265k` | BERT-base | 265k | 60 | [xmod.base.60.265k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.60.265k.tar.gz) +`xmod.base.75.125k` | BERT-base | 125k | 75 | [xmod.base.75.125k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.75.125k.tar.gz) +`xmod.base.75.269k` | BERT-base | 269k | 75 | [xmod.base.75.269k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.75.269k.tar.gz) +`xmod.base` | BERT-base | 1M | 81 | [xmod.base.81.1M.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.81.1M.tar.gz) +`xmod.large.prenorm` | BERT-large | 500k | 81 | [xmod.large.prenorm.81.500k.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.large.prenorm.81.500k.tar.gz) + + +## Fine-tuning on NLI + +We next provide an example of how to fine-tune the pre-trained models above on Natural Language Inference (NLI). We use MNLI for training in English, and show how to run inference in other languages. + +### 1) Download a pre-trained model + +```bash +MODEL=xmod.base.81.1M +wget https://dl.fbaipublicfiles.com/fairseq/models/xmod/$MODEL.tar.gz +tar -xzf $MODEL.tar.gz +``` + +### 2) Download and preprocess [MNLI](https://cims.nyu.edu/~sbowman/multinli/) +```bash +wget https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zip +unzip multinli_1.0.zip +python ./examples/xmod/preprocess_nli.py \ + --sentencepiece-model $MODEL/sentencepiece.bpe.model \ + --train multinli_1.0/multinli_1.0_train.jsonl \ + --valid multinli_1.0/multinli_1.0_dev_matched.jsonl \ + --destdir multinli_1.0/fairseq +``` + +### 3) Fine-tune on MNLI: + +```bash +MAX_EPOCH=5 +LR=1e-05 +BATCH_SIZE=32 +DATA_DIR=multinli_1.0/fairseq/bin + +CUDA_VISIBLE_DEVICES=0 fairseq-train $DATA_DIR \ + --restore-file $MODEL/model.pt \ + --save-dir $MODEL/nli \ + --reset-optimizer \ + --reset-dataloader \ + --reset-meters \ + --best-checkpoint-metric accuracy \ + --maximize-best-checkpoint-metric \ + --task sentence_prediction_adapters \ + --num-classes 3 \ + --init-token 0 \ + --separator-token 2 \ + --max-positions 512 \ + --shorten-method "truncate" \ + --arch xmod_base \ + --dropout 0.1 \ + --attention-dropout 0.1 \ + --weight-decay 0.01 \ + --criterion sentence_prediction_adapters \ + --optimizer adam \ + --adam-betas '(0.9, 0.98)' \ + --adam-eps 1e-06 \ + --clip-norm 0.0 \ + --lr-scheduler fixed \ + --lr $LR \ + --fp16 \ + --fp16-init-scale 4 \ + --threshold-loss-scale 1 \ + --fp16-scale-window 128 \ + --batch-size $BATCH_SIZE \ + --required-batch-size-multiple 1 \ + --update-freq 1 \ + --max-epoch $MAX_EPOCH +``` + +### 4) Run inference + +After training the model, we can load it and run inference in our target language. The default language is set to English, which is why we were not required to pass a language ID to the model during fine-tuning. To run inference in a non-English language, we need to tell the model that the module of the target language should be used instead: + +```python +from fairseq.models.xmod import XMODModel + +MODEL='xmod.base.81.1M/nli' +DATA='multinli_1.0/fairseq/bin' + +# Load model +model = XMODModel.from_pretrained( + model_name_or_path=MODEL, + checkpoint_file='checkpoint_best.pt', + data_name_or_path=DATA, + suffix='', + criterion='cross_entropy', + bpe='sentencepiece', + sentencepiece_model=DATA+'/input0/sentencepiece.bpe.model') +model = model.eval(); # disable dropout +model = model.half(); # use FP16 +model = model.cuda(); # move to GPU + +def predict(premise, hypothesis, lang): + tokens = model.encode(premise, hypothesis) + idx = model.predict('sentence_classification_head', tokens, lang_id=[lang]).argmax().item() + dictionary = model.task.label_dictionary + return dictionary[idx + dictionary.nspecial] + +predict( + premise='X-Mod hat spezifische Module die für jede Sprache existieren.', + hypothesis='X-Mod hat Module.', + lang='de_DE' +) # entailment + +predict( + premise='Londres es la capital del Reino Unido.', + hypothesis='Londres está en Francia.', + lang='es_XX', +) # contradiction + +predict( + premise='Patxik gogoko ditu babarrunak.', + hypothesis='Patxik babarrunak bazkaldu zituen.', + lang='eu_ES', +) # neutral +``` + + +## Citation + +```bibtex +@misc{pfeiffer2022xmod, + doi = {10.48550/ARXIV.2205.06266}, + url = {https://arxiv.org/abs/2205.06266}, + title = {Lifting the Curse of Multilinguality by Pre-training Modular Transformers}, + publisher = {arXiv}, + year = {2022}, +} +``` diff --git a/fairseq/examples/xmod/preprocess_nli.py b/fairseq/examples/xmod/preprocess_nli.py new file mode 100644 index 0000000..e1fb91c --- /dev/null +++ b/fairseq/examples/xmod/preprocess_nli.py @@ -0,0 +1,168 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import json +import collections +import argparse +import shutil +import subprocess +import sys +import tempfile +from multiprocessing import Pool +import sentencepiece as spm + + +def preprocess(spm_model_path, train_path, valid_path, test_path, dest_dir, remove_empty=False, output_format='piece', workers=20): + with tempfile.TemporaryDirectory() as tmp: + # Tokenize with SentencePiece + for split, path in ('train', train_path), ('valid', valid_path), ('test', test_path): + if path is None: + continue + if path == '-': + path = sys.stdin.fileno() + with open(path, encoding='utf-8', errors='surrogateescape') as fin: + with open(f'{tmp}/{split}', mode='w', encoding='utf-8', errors='surrogateescape') as fout: + encoder = MultiprocessingEncoder(model=spm_model_path, remove_empty=remove_empty, output_format=output_format) + pool = Pool(workers, initializer=encoder.initializer) + encoded_lines = pool.imap(encoder.encode, fin, 10000) + for i, line in enumerate(encoded_lines, start=1): + if line is not None: + print(line, file=fout) + if i % 10000 == 0: + print("tokenized {} lines".format(i), file=sys.stderr) + + # Generate dictionary + sp = spm.SentencePieceProcessor(model_file=spm_model_path) + if output_format == 'piece': + vocab = [sp.id_to_piece(i) for i in range(3, sp.vocab_size())] + else: + vocab = map(str, range(sp.vocab_size())) + with open(f'{tmp}/dict.txt', mode='w', encoding='utf-8', errors='surrogateescape') as f: + for word in vocab: + print(word, 1, file=f) + + # Binarize + command = [ + 'python3', '-m', 'fairseq_cli.preprocess', + '--only-source', + '--thresholdsrc', '0', + '--destdir', dest_dir, + '--srcdict', f'{tmp}/dict.txt', + '--workers', '20', + ] + for split, path in ('train', train_path), ('valid', valid_path), ('test', test_path): + if path is not None: + command += [f'--{split}pref', f'{tmp}/{split}'] + subprocess.run(command) + + # Copy SentencePiece model + shutil.copyfile(spm_model_path, f'{dest_dir}/sentencepiece.bpe.model') + + +class MultiprocessingEncoder(object): + def __init__(self, model, remove_empty, output_format): + self.model = model + self.remove_empty = remove_empty + self.output_format = output_format + + def initializer(self): + global sp + sp = spm.SentencePieceProcessor(model_file=self.model) + + def encode(self, line): + global sp + line = line.strip() + if len(line) == 0 and self.remove_empty: + return None + + if self.output_format == 'piece': + return ' '.join(sp.encode_as_pieces(line)) + else: + return ' '.join(map(str, sp.encode(line))) + + +def write_lines(lines, path): + with open(path, mode='x', encoding='utf-8') as f: + for line in lines: + print(line, file=f) + + +def read_jsonl(path): + with open(path, encoding='utf-8') as f: + return [json.loads(line) for line in f.read().splitlines()] + + +def read_nli(path, langs=None): + data = read_jsonl(path) + + if langs is not None: + data = [sample for sample in data if sample.get('language') in langs] + + lang2count = collections.defaultdict(int) + for sample in data: + lang2count[sample.get('language')] += 1 + + if langs: + assert set(lang2count.keys()) == set(langs) + + nlangs = len(lang2count) + assert nlangs > 0 + lens = list(lang2count.values()) + assert all([lens[0] == length for length in lens]) + + print(f'Loaded {lens[0]} samples in {nlangs} languages from {path}', file=sys.stderr) + return data + + +def main(): + parser = argparse.ArgumentParser(description='Tokenize and binarize NLI data') + parser.add_argument('--sentencepiece-model', required=True) + parser.add_argument('--train', required=True, help='Training data in jsonl format') + parser.add_argument('--valid', required=True, help='Validation data in jsonl format') + parser.add_argument('--destdir', required=True) + + args = parser.parse_args() + + os.makedirs(args.destdir + '/raw',) + os.makedirs(args.destdir + '/bin', ) + + # Extract input/labels + for split, path in ('train', args.train), ('valid', args.valid): + data = read_nli(path, langs=None) + original_size = len(data) + data = [sample for sample in data if sample['gold_label'] != '-'] + assert all(sample['gold_label'] in ('contradiction', 'entailment', 'neutral') for sample in data) + filtered_size = len(data) + if filtered_size != original_size: + print(f'Filtered {filtered_size}/{original_size} samples from {path}', file=sys.stderr) + for name, field in ('input0', 'sentence1'), ('input1', 'sentence2'), ('label', 'gold_label'): + write_lines([sample[field] for sample in data], f'{args.destdir}/raw/{split}.{name}.txt') + + # Tokenize and binarize input + for field in 'input0', 'input1': + preprocess( + spm_model_path=args.sentencepiece_model, + train_path=f'{args.destdir}/raw/train.{field}.txt', + valid_path=f'{args.destdir}/raw/valid.{field}.txt', + test_path=None, + dest_dir=f'{args.destdir}/bin/{field}', + workers=20, + ) + + # Binarize labels + subprocess.run([ + 'python3', '-m', 'fairseq_cli.preprocess', + '--trainpref', f'{args.destdir}/raw/train.label.txt', + '--validpref', f'{args.destdir}/raw/valid.label.txt', + '--only-source', + '--thresholdsrc', '0', + '--destdir', f'{args.destdir}/bin/label', + '--workers', '20', + ]) + + +if __name__ == '__main__': + main() diff --git a/fairseq/fairseq/__init__.py b/fairseq/fairseq/__init__.py new file mode 100644 index 0000000..080c988 --- /dev/null +++ b/fairseq/fairseq/__init__.py @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import os +import sys + +try: + from .version import __version__ # noqa +except ImportError: + version_txt = os.path.join(os.path.dirname(__file__), "version.txt") + with open(version_txt) as f: + __version__ = f.read().strip() + +__all__ = ["pdb"] + +# backwards compatibility to support `from fairseq.X import Y` +from fairseq.distributed import utils as distributed_utils +from fairseq.logging import meters, metrics, progress_bar # noqa + +sys.modules["fairseq.distributed_utils"] = distributed_utils +sys.modules["fairseq.meters"] = meters +sys.modules["fairseq.metrics"] = metrics +sys.modules["fairseq.progress_bar"] = progress_bar + +# initialize hydra +from fairseq.dataclass.initialize import hydra_init + +hydra_init() + +import fairseq.criterions # noqa +import fairseq.distributed # noqa +import fairseq.models # noqa +import fairseq.modules # noqa +import fairseq.optim # noqa +import fairseq.optim.lr_scheduler # noqa +import fairseq.pdb # noqa +import fairseq.scoring # noqa +import fairseq.tasks # noqa +import fairseq.token_generation_constraints # noqa + +import fairseq.benchmark # noqa +import fairseq.model_parallel # noqa diff --git a/fairseq/fairseq/benchmark/__init__.py b/fairseq/fairseq/benchmark/__init__.py new file mode 100644 index 0000000..0317d5c --- /dev/null +++ b/fairseq/fairseq/benchmark/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# import models/tasks to register them +from . import dummy_dataset, dummy_lm, dummy_masked_lm, dummy_model, dummy_mt # noqa diff --git a/fairseq/fairseq/benchmark/benchmark_multihead_attention.py b/fairseq/fairseq/benchmark/benchmark_multihead_attention.py new file mode 100644 index 0000000..a44847f --- /dev/null +++ b/fairseq/fairseq/benchmark/benchmark_multihead_attention.py @@ -0,0 +1,172 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import random + +import torch +from torch.utils import benchmark + +from fairseq.modules.multihead_attention import MultiheadAttention + +BATCH = [20, 41, 97] +SEQ = 64 +EMB = 48 +HEADS = 4 +DROP = 0.1 +DEVICE = torch.device("cuda") +ATTN_MASK_DTYPE = [torch.uint8, torch.bool, torch.float] +KEY_PADDING_MASK_DTYPE = [torch.uint8, torch.bool] + + +def _reset_seeds(): + torch.manual_seed(0) + random.seed(0) + + +def _get_mask(to_dtype: torch.dtype, dim0: int, dim1: int): + if to_dtype == torch.float: + mask = torch.randint(0, 2, (dim0, dim1)).to(dtype=torch.bool) + return mask.to(dtype=to_dtype).masked_fill(mask, -float("inf")) + return torch.randint(0, 2, (dim0, dim1)).to(dtype=to_dtype) + + +def benchmark_multihead_attention( + label="", + attn_dtype=torch.uint8, + key_padding_dtype=torch.uint8, + add_bias_kv=False, + add_zero_attn=False, + static_kv=False, + batch_size=20, + embedding=EMB, + seq_len=SEQ, + num_heads=HEADS, +): + + results = [] + # device = torch.device("cuda") + + xformers_att_config = '{"name": "scaled_dot_product"}' + + attn_mask = _get_mask(to_dtype=attn_dtype, dim0=seq_len, dim1=seq_len) + key_padding_mask = _get_mask( + to_dtype=key_padding_dtype, dim0=batch_size, dim1=seq_len + ) + + q = torch.rand(seq_len, batch_size, embedding, requires_grad=True) + k = torch.rand(seq_len, batch_size, embedding, requires_grad=True) + v = torch.rand(seq_len, batch_size, embedding, requires_grad=True) + + _reset_seeds() + + original_mha = MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + xformers_att_config=None, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + xformers_mha = MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + xformers_att_config=xformers_att_config, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + def original_bench_fw(q, k, v, key_padding_mask, attn_mask, static_kv): + original_mha( + query=q, + key=k, + value=v, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + + def xformers_bench_fw(q, k, v, key_padding_mask, attn_mask, static_kv): + xformers_mha( + query=q, + key=k, + value=v, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + + def original_bench_fw_bw(q, k, v, key_padding_mask, attn_mask, static_kv): + output, _ = original_mha( + query=q, + key=k, + value=v, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + loss = torch.norm(output) + loss.backward() + + def xformers_bench_fw_bw(q, k, v, key_padding_mask, attn_mask, static_kv): + output, _ = xformers_mha( + query=q, + key=k, + value=v, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + loss = torch.norm(output) + loss.backward() + + fns = [ + original_bench_fw, + xformers_bench_fw, + original_bench_fw_bw, + xformers_bench_fw_bw, + ] + + for fn in fns: + results.append( + benchmark.Timer( + stmt="fn(q, k, v, key_padding_mask, attn_mask, static_kv)", + globals={ + "q": q, + "k": k, + "v": v, + "key_padding_mask": key_padding_mask, + "attn_mask": attn_mask, + "static_kv": static_kv, + "fn": fn, + }, + label="multihead fw + bw", + sub_label=f"{fn.__name__}", + description=label, + ).blocked_autorange(min_run_time=1) + ) + + compare = benchmark.Compare(results) + compare.print() + + +def run_benchmarks(): + for attn_dtype, key_padding_dtype, add_bias_kv, add_zero_attn in itertools.product( + ATTN_MASK_DTYPE, KEY_PADDING_MASK_DTYPE, [True, False], [True, False] + ): + label = f"attn_dtype {attn_dtype}, key_padding_dtype {key_padding_dtype}, \ + add_bias_kv {add_bias_kv}, add_zero_attn {add_zero_attn}" + benchmark_multihead_attention( + label=label, + attn_dtype=attn_dtype, + key_padding_dtype=key_padding_dtype, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + +run_benchmarks() diff --git a/fairseq/fairseq/benchmark/dummy_dataset.py b/fairseq/fairseq/benchmark/dummy_dataset.py new file mode 100644 index 0000000..2f05175 --- /dev/null +++ b/fairseq/fairseq/benchmark/dummy_dataset.py @@ -0,0 +1,36 @@ +import numpy as np +from fairseq.data import FairseqDataset + + +class DummyDataset(FairseqDataset): + def __init__(self, batch, num_items, item_size): + super().__init__() + self.batch = batch + self.num_items = num_items + self.item_size = item_size + + def __getitem__(self, index): + return index + + def __len__(self): + return self.num_items + + def collater(self, samples): + return self.batch + + @property + def sizes(self): + return np.array([self.item_size] * self.num_items) + + def num_tokens(self, index): + return self.item_size + + def size(self, index): + return self.item_size + + def ordered_indices(self): + return np.arange(self.num_items) + + @property + def supports_prefetch(self): + return False diff --git a/fairseq/fairseq/benchmark/dummy_lm.py b/fairseq/fairseq/benchmark/dummy_lm.py new file mode 100644 index 0000000..c6246a0 --- /dev/null +++ b/fairseq/fairseq/benchmark/dummy_lm.py @@ -0,0 +1,83 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Optional + +import torch +from .dummy_dataset import DummyDataset +from fairseq.data import Dictionary +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task +from omegaconf import II + + +logger = logging.getLogger(__name__) + + +@dataclass +class DummyLMConfig(FairseqDataclass): + dict_size: int = 49996 + dataset_size: int = 100000 + tokens_per_sample: int = field( + default=512, metadata={"help": "max sequence length"} + ) + add_bos_token: bool = False + batch_size: Optional[int] = II("dataset.batch_size") + max_tokens: Optional[int] = II("dataset.max_tokens") + max_target_positions: int = II("task.tokens_per_sample") + + +@register_task("dummy_lm", dataclass=DummyLMConfig) +class DummyLMTask(FairseqTask): + def __init__(self, cfg: DummyLMConfig): + super().__init__(cfg) + + # load dictionary + self.dictionary = Dictionary() + for i in range(cfg.dict_size): + self.dictionary.add_symbol("word{}".format(i)) + self.dictionary.pad_to_multiple_(8) # often faster if divisible by 8 + logger.info("dictionary: {} types".format(len(self.dictionary))) + + seq = torch.arange(cfg.tokens_per_sample + 1) + self.dictionary.pad() + 1 + + self.dummy_src = seq[:-1] + self.dummy_tgt = seq[1:] + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if self.cfg.batch_size is not None: + bsz = self.cfg.batch_size + else: + bsz = max(1, self.cfg.max_tokens // self.cfg.tokens_per_sample) + self.datasets[split] = DummyDataset( + { + "id": 1, + "net_input": { + "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]), + "src_lengths": torch.full( + (bsz,), self.cfg.tokens_per_sample, dtype=torch.long + ), + }, + "target": torch.stack([self.dummy_tgt for _ in range(bsz)]), + "nsentences": bsz, + "ntokens": bsz * self.cfg.tokens_per_sample, + }, + num_items=self.cfg.dataset_size, + item_size=self.cfg.tokens_per_sample, + ) + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary diff --git a/fairseq/fairseq/benchmark/dummy_masked_lm.py b/fairseq/fairseq/benchmark/dummy_masked_lm.py new file mode 100644 index 0000000..12b9c5d --- /dev/null +++ b/fairseq/fairseq/benchmark/dummy_masked_lm.py @@ -0,0 +1,94 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Optional + +import torch +from omegaconf import II + +from .dummy_dataset import DummyDataset +from fairseq.data import Dictionary +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +logger = logging.getLogger(__name__) + + +@dataclass +class DummyMaskedLMConfig(FairseqDataclass): + dict_size: int = 49996 + dataset_size: int = 100000 + tokens_per_sample: int = field( + default=512, + metadata={ + "help": "max number of total tokens over all" + " segments per sample for BERT dataset" + }, + ) + batch_size: Optional[int] = II("dataset.batch_size") + max_tokens: Optional[int] = II("dataset.max_tokens") + max_target_positions: int = II("task.tokens_per_sample") + + +@register_task("dummy_masked_lm", dataclass=DummyMaskedLMConfig) +class DummyMaskedLMTask(FairseqTask): + def __init__(self, cfg: DummyMaskedLMConfig): + super().__init__(cfg) + + self.dictionary = Dictionary() + for i in range(cfg.dict_size): + self.dictionary.add_symbol("word{}".format(i)) + logger.info("dictionary: {} types".format(len(self.dictionary))) + # add mask token + self.mask_idx = self.dictionary.add_symbol("<mask>") + self.dictionary.pad_to_multiple_(8) # often faster if divisible by 8 + + mask_idx = 0 + pad_idx = 1 + seq = torch.arange(cfg.tokens_per_sample) + pad_idx + 1 + mask = torch.arange(2, cfg.tokens_per_sample, 7) # ~15% + src = seq.clone() + src[mask] = mask_idx + tgt = torch.full_like(seq, pad_idx) + tgt[mask] = seq[mask] + + self.dummy_src = src + self.dummy_tgt = tgt + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if self.cfg.batch_size is not None: + bsz = self.cfg.batch_size + else: + bsz = max(1, self.cfg.max_tokens // self.cfg.tokens_per_sample) + self.datasets[split] = DummyDataset( + { + "id": 1, + "net_input": { + "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]), + "src_lengths": torch.full( + (bsz,), self.cfg.tokens_per_sample, dtype=torch.long + ), + }, + "target": torch.stack([self.dummy_tgt for _ in range(bsz)]), + "nsentences": bsz, + "ntokens": bsz * self.cfg.tokens_per_sample, + }, + num_items=self.cfg.dataset_size, + item_size=self.cfg.tokens_per_sample, + ) + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary diff --git a/fairseq/fairseq/benchmark/dummy_model.py b/fairseq/fairseq/benchmark/dummy_model.py new file mode 100644 index 0000000..ff26e4f --- /dev/null +++ b/fairseq/fairseq/benchmark/dummy_model.py @@ -0,0 +1,96 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import torch.nn.functional as F +from fairseq.data import Dictionary +from fairseq.models import ( + FairseqDecoder, + FairseqLanguageModel, + register_model, + register_model_architecture, +) + + +@register_model("dummy_model") +class DummyModel(FairseqLanguageModel): + def __init__(self, args, encoder): + super().__init__(encoder) + self.args = args + + @staticmethod + def add_args(parser): + parser.add_argument("--num-layers", type=int, default=24) + parser.add_argument("--embed-dim", type=int, default=1024) + + @classmethod + def build_model(cls, args, task): + encoder = DummyEncoder( + num_embed=len(task.target_dictionary), + embed_dim=args.embed_dim, + num_layers=args.num_layers, + ) + return cls(args, encoder) + + def forward(self, src_tokens, masked_tokens=None, **kwargs): + return self.decoder(src_tokens, masked_tokens=masked_tokens) + + +class DummyEncoder(FairseqDecoder): + def __init__(self, num_embed=50000, embed_dim=1024, num_layers=24): + super().__init__(Dictionary()) + self.embed = nn.Embedding( + num_embeddings=num_embed, embedding_dim=embed_dim, padding_idx=0 + ) + self.layers_a = nn.ModuleList( + [ + nn.Sequential( + nn.LayerNorm(embed_dim), + nn.Linear(embed_dim, 3 * embed_dim), # q, k, v input projection + nn.Linear(3 * embed_dim, embed_dim), # skip self-attention + nn.Linear(embed_dim, embed_dim), # output projection + nn.Dropout(), + ) + for i in range(num_layers) + ] + ) + self.layers_b = nn.ModuleList( + [ + nn.Sequential( + nn.LayerNorm(embed_dim), + nn.Linear(embed_dim, 4 * embed_dim), # FFN + nn.ReLU(), + nn.Linear(4 * embed_dim, embed_dim), # FFN + nn.Dropout(0.1), + ) + for i in range(num_layers) + ] + ) + self.out_proj = nn.Linear(embed_dim, num_embed) + + def forward(self, tokens, masked_tokens=None): + x = self.embed(tokens) + for layer_a, layer_b in zip(self.layers_a, self.layers_b): + x = x + layer_a(x) + x = x + layer_b(x) + x = self.out_proj(x) + if masked_tokens is not None: + x = x[masked_tokens] + return (x,) + + def max_positions(self): + return 1024 + + def get_normalized_probs(self, net_output, log_probs, sample=None): + logits = net_output[0].float() + if log_probs: + return F.log_softmax(logits, dim=-1) + else: + return F.softmax(logits, dim=-1) + + +@register_model_architecture("dummy_model", "dummy_model") +def base_architecture(args): + pass diff --git a/fairseq/fairseq/benchmark/dummy_mt.py b/fairseq/fairseq/benchmark/dummy_mt.py new file mode 100644 index 0000000..28d78cf --- /dev/null +++ b/fairseq/fairseq/benchmark/dummy_mt.py @@ -0,0 +1,119 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import numpy as np +import torch + +from fairseq.data import Dictionary, FairseqDataset +from fairseq.tasks import LegacyFairseqTask, register_task + +logger = logging.getLogger(__name__) + + +@register_task("dummy_mt") +class DummyMTTask(LegacyFairseqTask): + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument("--dict-size", default=49996, type=int) + parser.add_argument("--dataset-size", default=100000, type=int) + parser.add_argument("--src-len", default=30, type=int) + parser.add_argument("--tgt-len", default=30, type=int) + + def __init__(self, args, dictionary): + super().__init__(args) + self.dictionary = dictionary + self.seed = args.seed + + dictionary.pad_to_multiple_(8) # often faster if divisible by 8 + + self.dummy_src = torch.arange(args.src_len + 1) + dictionary.pad() + 1 + self.dummy_tgt = torch.arange(args.tgt_len + 1) + dictionary.pad() + 1 + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task.""" + dictionary = Dictionary() + for i in range(args.dict_size): + dictionary.add_symbol("word{}".format(i)) + logger.info("dictionary: {} types".format(len(dictionary))) + + args.max_source_positions = args.src_len + dictionary.pad() + 2 + args.max_target_positions = args.tgt_len + dictionary.pad() + 2 + + return cls(args, dictionary) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + Args: + split (str): name of the split (e.g., train, valid, test) + """ + item_size = max(self.args.src_len, self.args.tgt_len) + if self.args.batch_size is not None: + bsz = self.args.batch_size + else: + bsz = max(1, self.args.max_tokens // item_size) + tgt = torch.stack([self.dummy_tgt for _ in range(bsz)]) + self.datasets[split] = DummyDataset( + { + "id": 1, + "net_input": { + "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]), + "src_lengths": torch.full( + (bsz,), self.args.src_len, dtype=torch.long + ), + "prev_output_tokens": tgt.clone(), + }, + "target": tgt, + "nsentences": bsz, + "ntokens": bsz * self.args.tgt_len, + }, + num_items=self.args.dataset_size, + item_size=item_size, + ) + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + +class DummyDataset(FairseqDataset): + def __init__(self, batch, num_items, item_size): + super().__init__() + self.batch = batch + self.num_items = num_items + self.item_size = item_size + + def __getitem__(self, index): + return index + + def __len__(self): + return self.num_items + + def collater(self, samples): + return self.batch + + @property + def sizes(self): + return np.array([self.item_size] * self.num_items) + + def num_tokens(self, index): + return self.item_size + + def size(self, index): + return self.item_size + + def ordered_indices(self): + return np.arange(self.num_items) + + @property + def supports_prefetch(self): + return False diff --git a/fairseq/fairseq/binarizer.py b/fairseq/fairseq/binarizer.py new file mode 100644 index 0000000..6f03d7a --- /dev/null +++ b/fairseq/fairseq/binarizer.py @@ -0,0 +1,381 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import typing as tp +from abc import ABC, abstractmethod +from collections import Counter +from dataclasses import dataclass +from multiprocessing import Pool + +import torch + +from fairseq.data import Dictionary, indexed_dataset +from fairseq.file_chunker_utils import Chunker, find_offsets +from fairseq.file_io import PathManager +from fairseq.tokenizer import tokenize_line + +logger = logging.getLogger("binarizer") + + +@dataclass +class BinarizeSummary: + """ + Keep track of what's going on in the binarizer + """ + + num_seq: int = 0 + replaced: tp.Optional[Counter] = None + num_tok: int = 0 + + @property + def num_replaced(self) -> int: + if self.replaced is None: + return 0 + return sum(self.replaced.values()) + + @property + def replaced_percent(self) -> float: + return 100 * self.num_replaced / self.num_tok + + def __str__(self) -> str: + base = f"{self.num_seq} sents, {self.num_tok} tokens" + if self.replaced is None: + return base + + return f"{base}, {self.replaced_percent:.3}% replaced" + + def merge(self, other: "BinarizeSummary"): + replaced = None + if self.replaced is not None: + replaced = self.replaced + if other.replaced is not None: + if replaced is None: + replaced = other.replaced + else: + replaced += other.replaced + self.replaced = replaced + self.num_seq += other.num_seq + self.num_tok += other.num_tok + + +class Binarizer(ABC): + """ + a binarizer describes how to take a string and build a tensor out of it + """ + + @abstractmethod + def binarize_line( + self, + line: str, + summary: BinarizeSummary, + ) -> torch.IntTensor: + ... + + +def _worker_prefix(output_prefix: str, worker_id: int): + return f"{output_prefix}.pt{worker_id}" + + +class FileBinarizer: + """ + An file binarizer can take a file, tokenize it, and binarize each line to a tensor + """ + + @classmethod + def multiprocess_dataset( + cls, + input_file: str, + dataset_impl: str, + binarizer: Binarizer, + output_prefix: str, + vocab_size=None, + num_workers=1, + ) -> BinarizeSummary: + final_summary = BinarizeSummary() + + offsets = find_offsets(input_file, num_workers) + # find_offsets returns a list of position [pos1, pos2, pos3, pos4] but we would want pairs: + # [(pos1, pos2), (pos2, pos3), (pos3, pos4)] to process the chunks with start/end info + # we zip the list with itself shifted by one to get all the pairs. + (first_chunk, *more_chunks) = zip(offsets, offsets[1:]) + pool = None + if num_workers > 1: + pool = Pool(processes=num_workers - 1) + worker_results = [ + pool.apply_async( + cls._binarize_chunk_and_finalize, + args=( + binarizer, + input_file, + start_offset, + end_offset, + _worker_prefix( + output_prefix, + worker_id, + ), + dataset_impl, + ), + kwds={ + "vocab_size": vocab_size, + } + if vocab_size is not None + else {}, + ) + for worker_id, (start_offset, end_offset) in enumerate( + more_chunks, start=1 + ) + ] + + pool.close() + pool.join() + for r in worker_results: + summ = r.get() + final_summary.merge(summ) + + # do not close the bin file as we need to merge the worker results in + final_ds, summ = cls._binarize_file_chunk( + binarizer, + input_file, + offset_start=first_chunk[0], + offset_end=first_chunk[1], + output_prefix=output_prefix, + dataset_impl=dataset_impl, + vocab_size=vocab_size if vocab_size is not None else None, + ) + final_summary.merge(summ) + + if num_workers > 1: + for worker_id in range(1, num_workers): + # merge the worker outputs + worker_output_prefix = _worker_prefix( + output_prefix, + worker_id, + ) + final_ds.merge_file_(worker_output_prefix) + try: + os.remove(indexed_dataset.data_file_path(worker_output_prefix)) + os.remove(indexed_dataset.index_file_path(worker_output_prefix)) + except Exception as e: + logger.error( + f"couldn't remove {worker_output_prefix}.*", exc_info=e + ) + + # now we can close the file + idx_file = indexed_dataset.index_file_path(output_prefix) + final_ds.finalize(idx_file) + return final_summary + + @staticmethod + def _binarize_file_chunk( + binarizer: Binarizer, + filename: str, + offset_start: int, + offset_end: int, + output_prefix: str, + dataset_impl: str, + vocab_size=None, + ) -> tp.Tuple[tp.Any, BinarizeSummary]: # (dataset builder, BinarizeSummary) + """ + creates a dataset builder and append binarized items to it. This function does not + finalize the builder, this is useful if you want to do other things with your bin file + like appending/merging other files + """ + bin_file = indexed_dataset.data_file_path(output_prefix) + ds = indexed_dataset.make_builder( + bin_file, + impl=dataset_impl, + vocab_size=vocab_size, + ) + summary = BinarizeSummary() + + with Chunker( + PathManager.get_local_path(filename), offset_start, offset_end + ) as line_iterator: + for line in line_iterator: + ds.add_item(binarizer.binarize_line(line, summary)) + + return ds, summary + + @classmethod + def _binarize_chunk_and_finalize( + cls, + binarizer: Binarizer, + filename: str, + offset_start: int, + offset_end: int, + output_prefix: str, + dataset_impl: str, + vocab_size=None, + ): + """ + same as above, but also finalizes the builder + """ + ds, summ = cls._binarize_file_chunk( + binarizer, + filename, + offset_start, + offset_end, + output_prefix, + dataset_impl, + vocab_size=vocab_size, + ) + + idx_file = indexed_dataset.index_file_path(output_prefix) + ds.finalize(idx_file) + + return summ + + +class VocabularyDatasetBinarizer(Binarizer): + """ + Takes a Dictionary/Vocabulary, assign ids to each + token using the dictionary encode_line function. + """ + + def __init__( + self, + dict: Dictionary, + tokenize: tp.Callable[[str], tp.List[str]] = tokenize_line, + append_eos: bool = True, + reverse_order: bool = False, + already_numberized: bool = False, + ) -> None: + self.dict = dict + self.tokenize = tokenize + self.append_eos = append_eos + self.reverse_order = reverse_order + self.already_numberized = already_numberized + super().__init__() + + def binarize_line( + self, + line: str, + summary: BinarizeSummary, + ): + if summary.replaced is None: + summary.replaced = Counter() + + def replaced_consumer(word, idx): + if idx == self.dict.unk_index and word != self.dict.unk_word: + summary.replaced.update([word]) + + if self.already_numberized: + id_strings = line.strip().split() + id_list = [int(id_string) for id_string in id_strings] + if self.reverse_order: + id_list.reverse() + if self.append_eos: + id_list.append(self.dict.eos()) + ids = torch.IntTensor(id_list) + else: + ids = self.dict.encode_line( + line=line, + line_tokenizer=self.tokenize, + add_if_not_exist=False, + consumer=replaced_consumer, + append_eos=self.append_eos, + reverse_order=self.reverse_order, + ) + + summary.num_seq += 1 + summary.num_tok += len(ids) + return ids + + +class AlignmentDatasetBinarizer(Binarizer): + """ + binarize by parsing a set of alignments and packing + them in a tensor (see utils.parse_alignment) + """ + + def __init__( + self, + alignment_parser: tp.Callable[[str], torch.IntTensor], + ) -> None: + super().__init__() + self.alignment_parser = alignment_parser + + def binarize_line( + self, + line: str, + summary: BinarizeSummary, + ): + ids = self.alignment_parser(line) + summary.num_seq += 1 + summary.num_tok += len(ids) + return ids + + +class LegacyBinarizer: + @classmethod + def binarize( + cls, + filename: str, + dico: Dictionary, + consumer: tp.Callable[[torch.IntTensor], None], + tokenize: tp.Callable[[str], tp.List[str]] = tokenize_line, + append_eos: bool = True, + reverse_order: bool = False, + offset: int = 0, + end: int = -1, + already_numberized: bool = False, + ) -> tp.Dict[str, int]: + binarizer = VocabularyDatasetBinarizer( + dict=dico, + tokenize=tokenize, + append_eos=append_eos, + reverse_order=reverse_order, + already_numberized=already_numberized, + ) + return cls._consume_file( + filename, + binarizer, + consumer, + offset_start=offset, + offset_end=end, + ) + + @classmethod + def binarize_alignments( + cls, + filename: str, + alignment_parser: tp.Callable[[str], torch.IntTensor], + consumer: tp.Callable[[torch.IntTensor], None], + offset: int = 0, + end: int = -1, + ) -> tp.Dict[str, int]: + binarizer = AlignmentDatasetBinarizer(alignment_parser) + return cls._consume_file( + filename, + binarizer, + consumer, + offset_start=offset, + offset_end=end, + ) + + @staticmethod + def _consume_file( + filename: str, + binarizer: Binarizer, + consumer: tp.Callable[[torch.IntTensor], None], + offset_start: int, + offset_end: int, + ) -> tp.Dict[str, int]: + summary = BinarizeSummary() + + with Chunker( + PathManager.get_local_path(filename), offset_start, offset_end + ) as line_iterator: + for line in line_iterator: + consumer(binarizer.binarize_line(line, summary)) + + return { + "nseq": summary.num_seq, + "nunk": summary.num_replaced, + "ntok": summary.num_tok, + "replaced": summary.replaced, + } diff --git a/fairseq/fairseq/checkpoint_utils.py b/fairseq/fairseq/checkpoint_utils.py new file mode 100644 index 0000000..8dd2c54 --- /dev/null +++ b/fairseq/fairseq/checkpoint_utils.py @@ -0,0 +1,937 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import collections +import contextlib +import inspect +import logging +import os +import re +import time +import traceback +from collections import OrderedDict +from pathlib import Path +from typing import Any, Dict, Optional, Union + +import numpy as np +import torch +from fairseq.data import data_utils +from fairseq.dataclass.configs import CheckpointConfig +from fairseq.dataclass.utils import ( + convert_namespace_to_omegaconf, + overwrite_args_by_name, +) +from fairseq.distributed.fully_sharded_data_parallel import FSDP, has_FSDP +from fairseq.file_io import PathManager +from fairseq.models import FairseqDecoder, FairseqEncoder +from omegaconf import DictConfig, OmegaConf, open_dict + +logger = logging.getLogger(__name__) + + +def save_checkpoint(cfg: CheckpointConfig, trainer, epoch_itr, val_loss): + from fairseq import meters + + # only one worker should attempt to create the required dir + if trainer.data_parallel_rank == 0: + os.makedirs(cfg.save_dir, exist_ok=True) + + prev_best = getattr(save_checkpoint, "best", val_loss) + if val_loss is not None: + best_function = max if cfg.maximize_best_checkpoint_metric else min + save_checkpoint.best = best_function(val_loss, prev_best) + + if cfg.no_save: + return None + + trainer.consolidate_optimizer() # TODO(SS): do we need this if no_save_optimizer_state + + if not trainer.should_save_checkpoint_on_current_rank: + if trainer.always_call_state_dict_during_save_checkpoint: + trainer.state_dict() + return None + + write_timer = meters.StopwatchMeter() + write_timer.start() + + epoch = epoch_itr.epoch + end_of_epoch = epoch_itr.end_of_epoch() + updates = trainer.get_num_updates() + + logger.info(f"Preparing to save checkpoint for epoch {epoch} @ {updates} updates") + + def is_better(a, b): + return a >= b if cfg.maximize_best_checkpoint_metric else a <= b + + suffix = trainer.checkpoint_suffix + checkpoint_conds = collections.OrderedDict() + checkpoint_conds["checkpoint{}{}.pt".format(epoch, suffix)] = ( + end_of_epoch and not cfg.no_epoch_checkpoints and epoch % cfg.save_interval == 0 + ) + checkpoint_conds["checkpoint_{}_{}{}.pt".format(epoch, updates, suffix)] = ( + not end_of_epoch + and cfg.save_interval_updates > 0 + and updates % cfg.save_interval_updates == 0 + ) + checkpoint_conds["checkpoint_best{}.pt".format(suffix)] = val_loss is not None and ( + not hasattr(save_checkpoint, "best") + or is_better(val_loss, save_checkpoint.best) + ) + if val_loss is not None and cfg.keep_best_checkpoints > 0: + worst_best = getattr(save_checkpoint, "best", None) + chkpts = checkpoint_paths( + cfg.save_dir, + pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format( + cfg.best_checkpoint_metric, suffix + ), + ) + if len(chkpts) > 0: + p = chkpts[-1] if cfg.maximize_best_checkpoint_metric else chkpts[0] + worst_best = float(p.rsplit("_")[-1].replace("{}.pt".format(suffix), "")) + # add random digits to resolve ties + with data_utils.numpy_seed(epoch, updates, val_loss): + rand_sfx = np.random.randint(0, cfg.keep_best_checkpoints) + + checkpoint_conds[ + "checkpoint.best_{}_{:.3f}{}{}.pt".format( + cfg.best_checkpoint_metric, val_loss, rand_sfx, suffix + ) + ] = worst_best is None or is_better(val_loss, worst_best) + checkpoint_conds[ + "checkpoint_last{}.pt".format(suffix) + ] = not cfg.no_last_checkpoints + + extra_state = { + "train_iterator": epoch_itr.state_dict(), + "val_loss": val_loss, + } + + # Going forward, different tasks could expose an API like this to dump all + # the checkpoint worthy attributes in a dictionary which then will be + # merged with the parent dictionary to create the "extra_state". This + # allows for an extensible yet simple design to checkpoint task level + # attributes + if hasattr(trainer.task, "get_checkpoint_dict"): + extra_state = {**extra_state, **trainer.task.get_checkpoint_dict()} + logger.info(f"State of {trainer.task.__class__.__name__} is ready to be persisted with the checkpoint") + + if hasattr(save_checkpoint, "best"): + extra_state.update({"best": save_checkpoint.best}) + + checkpoints = [ + os.path.join(cfg.save_dir, fn) for fn, cond in checkpoint_conds.items() if cond + ] + saved_cp = None + if len(checkpoints) > 0 and trainer.should_save_checkpoint_on_current_rank: + saved_cp = trainer.save_checkpoint(checkpoints[0], extra_state) + for cp in checkpoints[1:]: + if cfg.write_checkpoints_asynchronously: + # TODO[ioPath]: Need to implement a delayed asynchronous + # file copying/moving feature. + logger.warning( + f"ioPath is not copying {checkpoints[0]} to {cp} " + "since async write mode is on." + ) + else: + assert PathManager.copy( + checkpoints[0], cp, overwrite=True + ), f"Failed to copy {checkpoints[0]} to {cp}" + + write_timer.stop() + logger.info( + "Saved checkpoint {} (epoch {} @ {} updates, score {}) (writing took {} seconds)".format( + checkpoints[0], epoch, updates, val_loss, write_timer.sum + ) + ) + + if ( + not end_of_epoch + and cfg.keep_interval_updates > 0 + and trainer.should_save_checkpoint_on_current_rank + ): + # remove old checkpoints; checkpoints are sorted in descending order + if cfg.keep_interval_updates_pattern == -1: + checkpoints = checkpoint_paths( + cfg.save_dir, pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix) + ) + else: + checkpoints = checkpoint_paths( + cfg.save_dir, + pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix), + keep_match=True, + ) + checkpoints = [ + x[0] + for x in checkpoints + if x[1] % cfg.keep_interval_updates_pattern != 0 + ] + + for old_chk in checkpoints[cfg.keep_interval_updates :]: + if os.path.lexists(old_chk): + os.remove(old_chk) + elif PathManager.exists(old_chk): + PathManager.rm(old_chk) + + if cfg.keep_last_epochs > 0 and trainer.should_save_checkpoint_on_current_rank: + # remove old epoch checkpoints; checkpoints are sorted in descending order + checkpoints = checkpoint_paths( + cfg.save_dir, pattern=r"checkpoint(\d+){}\.pt".format(suffix) + ) + for old_chk in checkpoints[cfg.keep_last_epochs :]: + if os.path.lexists(old_chk): + os.remove(old_chk) + elif PathManager.exists(old_chk): + PathManager.rm(old_chk) + + if cfg.keep_best_checkpoints > 0 and trainer.should_save_checkpoint_on_current_rank: + # only keep the best N checkpoints according to validation metric + checkpoints = checkpoint_paths( + cfg.save_dir, + pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format( + cfg.best_checkpoint_metric, suffix + ), + ) + if not cfg.maximize_best_checkpoint_metric: + checkpoints = checkpoints[::-1] + for old_chk in checkpoints[cfg.keep_best_checkpoints :]: + if os.path.lexists(old_chk): + os.remove(old_chk) + elif PathManager.exists(old_chk): + PathManager.rm(old_chk) + + return saved_cp + + +def load_checkpoint(cfg: CheckpointConfig, trainer, **passthrough_args): + """ + Load a checkpoint and restore the training iterator. + + *passthrough_args* will be passed through to + ``trainer.get_train_iterator``. + """ + + reset_optimizer = cfg.reset_optimizer + reset_lr_scheduler = cfg.reset_lr_scheduler + optimizer_overrides = ast.literal_eval(cfg.optimizer_overrides) + reset_meters = cfg.reset_meters + reset_dataloader = cfg.reset_dataloader + + if cfg.finetune_from_model is not None and ( + reset_optimizer or reset_lr_scheduler or reset_meters or reset_dataloader + ): + raise ValueError( + "--finetune-from-model can not be set together with either --reset-optimizer" + " or reset_lr_scheduler or reset_meters or reset_dataloader" + ) + + suffix = trainer.checkpoint_suffix + if ( + cfg.restore_file == "checkpoint_last.pt" + ): # default value of restore_file is 'checkpoint_last.pt' + checkpoint_path = os.path.join( + cfg.save_dir, "checkpoint_last{}.pt".format(suffix) + ) + first_launch = not PathManager.exists(checkpoint_path) + if first_launch and getattr(cfg, "continue_once", None) is not None: + checkpoint_path = cfg.continue_once + elif cfg.finetune_from_model is not None and first_launch: + # if there is no last checkpoint to restore, start the finetune from pretrained model + # else just use usual logic to load checkpoint, e.g. restart from last checkpoint and etc. + if PathManager.exists(cfg.finetune_from_model): + checkpoint_path = cfg.finetune_from_model + reset_optimizer = True + reset_lr_scheduler = True + reset_meters = True + reset_dataloader = True + logger.info( + f"loading pretrained model from {checkpoint_path}: " + "optimizer, lr scheduler, meters, dataloader will be reset" + ) + else: + raise ValueError( + f"--finetune-from-model {cfg.finetune_from_model} does not exist" + ) + elif suffix is not None: + checkpoint_path = cfg.restore_file.replace(".pt", suffix + ".pt") + else: + checkpoint_path = cfg.restore_file + + if cfg.restore_file != "checkpoint_last.pt" and cfg.finetune_from_model: + raise ValueError( + "--finetune-from-model and --restore-file (non-default value) " + "can not be specified together: " + str(cfg) + ) + + extra_state = trainer.load_checkpoint( + checkpoint_path, + reset_optimizer, + reset_lr_scheduler, + optimizer_overrides, + reset_meters=reset_meters, + ) + + if ( + extra_state is not None + and "best" in extra_state + and not reset_optimizer + and not reset_meters + ): + save_checkpoint.best = extra_state["best"] + + if extra_state is not None and not reset_dataloader: + # restore iterator from checkpoint + itr_state = extra_state["train_iterator"] + epoch_itr = trainer.get_train_iterator( + epoch=itr_state["epoch"], load_dataset=True, **passthrough_args + ) + epoch_itr.load_state_dict(itr_state) + + # Preload the checkpoint for the task + task_cp_dict = extra_state.get(trainer.task.__class__.__name__, {}) + if task_cp_dict and hasattr(trainer.task, "set_checkpoint_dict"): + trainer.task.set_checkpoint_dict(task_cp_dict) + else: + epoch_itr = trainer.get_train_iterator( + epoch=1, load_dataset=True, **passthrough_args + ) + + trainer.lr_step(epoch_itr.epoch) + + return extra_state, epoch_itr + + +def load_checkpoint_to_cpu(path, arg_overrides=None, load_on_all_ranks=False): + """Loads a checkpoint to CPU (with upgrading for backward compatibility). + + If doing single-GPU training or if the checkpoint is only being loaded by at + most one process on each node (current default behavior is for only rank 0 + to read the checkpoint from disk), load_on_all_ranks should be False to + avoid errors from torch.distributed not having been initialized or + torch.distributed.barrier() hanging. + + If all processes on each node may be loading the checkpoint + simultaneously, load_on_all_ranks should be set to True to avoid I/O + conflicts. + + There's currently no support for > 1 but < all processes loading the + checkpoint on each node. + """ + local_path = PathManager.get_local_path(path) + # The locally cached file returned by get_local_path() may be stale for + # remote files that are periodically updated/overwritten (ex: + # checkpoint_last.pt) - so we remove the local copy, sync across processes + # (if needed), and then download a fresh copy. + if local_path != path and PathManager.path_requires_pathmanager(path): + try: + os.remove(local_path) + except FileNotFoundError: + # With potentially multiple processes removing the same file, the + # file being missing is benign (missing_ok isn't available until + # Python 3.8). + pass + if load_on_all_ranks: + torch.distributed.barrier() + local_path = PathManager.get_local_path(path) + + with open(local_path, "rb") as f: + state = torch.load(f, map_location=torch.device("cpu"), weights_only=False) + + if "args" in state and state["args"] is not None and arg_overrides is not None: + args = state["args"] + for arg_name, arg_val in arg_overrides.items(): + setattr(args, arg_name, arg_val) + + if "cfg" in state and state["cfg"] is not None: + + # hack to be able to set Namespace in dict config. this should be removed when we update to newer + # omegaconf version that supports object flags, or when we migrate all existing models + from omegaconf import __version__ as oc_version + from omegaconf import _utils + + if oc_version < "2.2": + old_primitive = _utils.is_primitive_type + _utils.is_primitive_type = lambda _: True + + state["cfg"] = OmegaConf.create(state["cfg"]) + + _utils.is_primitive_type = old_primitive + OmegaConf.set_struct(state["cfg"], True) + else: + state["cfg"] = OmegaConf.create(state["cfg"], flags={"allow_objects": True}) + + if arg_overrides is not None: + overwrite_args_by_name(state["cfg"], arg_overrides) + + state = _upgrade_state_dict(state) + return state + + +def load_model_ensemble( + filenames, + arg_overrides: Optional[Dict[str, Any]] = None, + task=None, + strict=True, + suffix="", + num_shards=1, + state=None, +): + """Loads an ensemble of models. + + Args: + filenames (List[str]): checkpoint files to load + arg_overrides (Dict[str,Any], optional): override model args that + were used during model training + task (fairseq.tasks.FairseqTask, optional): task to use for loading + """ + assert not ( + strict and num_shards > 1 + ), "Cannot load state dict with strict=True and checkpoint shards > 1" + ensemble, args, _task = load_model_ensemble_and_task( + filenames, + arg_overrides, + task, + strict, + suffix, + num_shards, + state, + ) + return ensemble, args + + +def get_maybe_sharded_checkpoint_filename( + filename: str, suffix: str, shard_idx: int, num_shards: int +) -> str: + orig_filename = filename + filename = filename.replace(".pt", suffix + ".pt") + fsdp_filename = filename[:-3] + f"-shard{shard_idx}.pt" + model_parallel_filename = orig_filename[:-3] + f"_part{shard_idx}.pt" + if PathManager.exists(fsdp_filename): + return fsdp_filename + elif num_shards > 1: + return model_parallel_filename + else: + return filename + + +def load_model_ensemble_and_task( + filenames, + arg_overrides: Optional[Dict[str, Any]] = None, + task=None, + strict=True, + suffix="", + num_shards=1, + state=None, +): + assert state is None or len(filenames) == 1 + + from fairseq import tasks + + assert not ( + strict and num_shards > 1 + ), "Cannot load state dict with strict=True and checkpoint shards > 1" + ensemble = [] + cfg = None + for filename in filenames: + orig_filename = filename + model_shard_state = {"shard_weights": [], "shard_metadata": []} + assert num_shards > 0 + st = time.time() + for shard_idx in range(num_shards): + filename = get_maybe_sharded_checkpoint_filename( + orig_filename, suffix, shard_idx, num_shards + ) + + if not PathManager.exists(filename): + raise IOError("Model file not found: {}".format(filename)) + if state is None: + state = load_checkpoint_to_cpu(filename, arg_overrides) + if "args" in state and state["args"] is not None: + cfg = convert_namespace_to_omegaconf(state["args"]) + elif "cfg" in state and state["cfg"] is not None: + cfg = state["cfg"] + else: + raise RuntimeError( + f"Neither args nor cfg exist in state keys = {state.keys()}" + ) + + if task is None: + task = tasks.setup_task(cfg.task, from_checkpoint=True) + + if "task_state" in state: + task.load_state_dict(state["task_state"]) + + argspec = inspect.getfullargspec(task.build_model) + + if "fsdp_metadata" in state and num_shards > 1: + model_shard_state["shard_weights"].append(state["model"]) + model_shard_state["shard_metadata"].append(state["fsdp_metadata"]) + # check FSDP import before the code goes too far + if not has_FSDP: + raise ImportError( + "Cannot find FullyShardedDataParallel. " + "Please install fairscale with: pip install fairscale" + ) + if shard_idx == num_shards - 1: + consolidated_model_state = FSDP.consolidate_shard_weights( + shard_weights=model_shard_state["shard_weights"], + shard_metadata=model_shard_state["shard_metadata"], + ) + if "from_checkpoint" in argspec.args: + model = task.build_model(cfg.model, from_checkpoint=True) + else: + model = task.build_model(cfg.model) + if ( + "optimizer_history" in state + and len(state["optimizer_history"]) > 0 + and "num_updates" in state["optimizer_history"][-1] + ): + model.set_num_updates( + state["optimizer_history"][-1]["num_updates"] + ) + model.load_state_dict( + consolidated_model_state, strict=strict, model_cfg=cfg.model + ) + else: + # model parallel checkpoint or unsharded checkpoint + # support old external tasks + + if "from_checkpoint" in argspec.args: + model = task.build_model(cfg.model, from_checkpoint=True) + else: + model = task.build_model(cfg.model) + if ( + "optimizer_history" in state + and len(state["optimizer_history"]) > 0 + and "num_updates" in state["optimizer_history"][-1] + ): + model.set_num_updates(state["optimizer_history"][-1]["num_updates"]) + model.load_state_dict( + state["model"], strict=strict, model_cfg=cfg.model + ) + + # reset state so it gets loaded for the next model in ensemble + state = None + if shard_idx % 10 == 0 and shard_idx > 0: + elapsed = time.time() - st + logger.info( + f"Loaded {shard_idx} shards in {elapsed:.2f}s, {elapsed / (shard_idx+1):.2f}s/shard" + ) + + # build model for ensemble + ensemble.append(model) + return ensemble, cfg, task + + +def load_model_ensemble_and_task_from_hf_hub( + model_id, + cache_dir: Optional[str] = None, + arg_overrides: Optional[Dict[str, Any]] = None, + **kwargs: Any, +): + try: + from huggingface_hub import snapshot_download + except ImportError: + raise ImportError( + "You need to install huggingface_hub to use `load_from_hf_hub`. " + "See https://pypi.org/project/huggingface-hub/ for installation." + ) + + library_name = "fairseq" + cache_dir = cache_dir or (Path.home() / ".cache" / library_name).as_posix() + cache_dir = snapshot_download( + model_id, cache_dir=cache_dir, library_name=library_name, **kwargs + ) + + _arg_overrides = arg_overrides or {} + _arg_overrides["data"] = cache_dir + return load_model_ensemble_and_task( + [p.as_posix() for p in Path(cache_dir).glob("*.pt")], + arg_overrides=_arg_overrides, + ) + + +def checkpoint_paths(path, pattern=r"checkpoint(\d+)\.pt", keep_match=False): + """Retrieves all checkpoints found in `path` directory. + + Checkpoints are identified by matching filename to the specified pattern. If + the pattern contains groups, the result will be sorted by the first group in + descending order. + """ + pt_regexp = re.compile(pattern) + files = PathManager.ls(path) + + entries = [] + for i, f in enumerate(files): + m = pt_regexp.fullmatch(f) + if m is not None: + idx = float(m.group(1)) if len(m.groups()) > 0 else i + entries.append((idx, m.group(0))) + if keep_match: + return [(os.path.join(path, x[1]), x[0]) for x in sorted(entries, reverse=True)] + else: + return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)] + + +def torch_persistent_save(obj, filename, async_write: bool = False): + if async_write: + with PathManager.opena(filename, "wb") as f: + _torch_persistent_save(obj, f) + else: + if PathManager.supports_rename(filename): + # do atomic save + with PathManager.open(filename + ".tmp", "wb") as f: + _torch_persistent_save(obj, f) + PathManager.rename(filename + ".tmp", filename) + else: + # fallback to non-atomic save + with PathManager.open(filename, "wb") as f: + _torch_persistent_save(obj, f) + + +def _torch_persistent_save(obj, f): + if isinstance(f, str): + with PathManager.open(f, "wb") as h: + torch_persistent_save(obj, h) + return + for i in range(3): + try: + return torch.save(obj, f) + except Exception: + if i == 2: + logger.error(traceback.format_exc()) + raise + else: + time.sleep(2.5) + + +def _upgrade_state_dict(state): + """Helper for upgrading old model checkpoints.""" + + # add optimizer_history + if "optimizer_history" not in state: + state["optimizer_history"] = [ + {"criterion_name": "CrossEntropyCriterion", "best_loss": state["best_loss"]} + ] + state["last_optimizer_state"] = state["optimizer"] + del state["optimizer"] + del state["best_loss"] + # move extra_state into sub-dictionary + if "epoch" in state and "extra_state" not in state: + state["extra_state"] = { + "epoch": state["epoch"], + "batch_offset": state["batch_offset"], + "val_loss": state["val_loss"], + } + del state["epoch"] + del state["batch_offset"] + del state["val_loss"] + # reduce optimizer history's memory usage (only keep the last state) + if "optimizer" in state["optimizer_history"][-1]: + state["last_optimizer_state"] = state["optimizer_history"][-1]["optimizer"] + for optim_hist in state["optimizer_history"]: + del optim_hist["optimizer"] + # record the optimizer class name + if "optimizer_name" not in state["optimizer_history"][-1]: + state["optimizer_history"][-1]["optimizer_name"] = "FairseqNAG" + # move best_loss into lr_scheduler_state + if "lr_scheduler_state" not in state["optimizer_history"][-1]: + state["optimizer_history"][-1]["lr_scheduler_state"] = { + "best": state["optimizer_history"][-1]["best_loss"] + } + del state["optimizer_history"][-1]["best_loss"] + # keep track of number of updates + if "num_updates" not in state["optimizer_history"][-1]: + state["optimizer_history"][-1]["num_updates"] = 0 + # use stateful training data iterator + if "train_iterator" not in state["extra_state"]: + state["extra_state"]["train_iterator"] = { + "epoch": state["extra_state"].get("epoch", 0), + "iterations_in_epoch": state["extra_state"].get("batch_offset", 0), + } + + # backward compatibility, cfg updates + if "args" in state and state["args"] is not None: + # old model checkpoints may not have separate source/target positions + if hasattr(state["args"], "max_positions") and not hasattr( + state["args"], "max_source_positions" + ): + state["args"].max_source_positions = state["args"].max_positions + state["args"].max_target_positions = state["args"].max_positions + # default to translation task + if not hasattr(state["args"], "task"): + state["args"].task = "translation" + # --raw-text and --lazy-load are deprecated + if getattr(state["args"], "raw_text", False): + state["args"].dataset_impl = "raw" + elif getattr(state["args"], "lazy_load", False): + state["args"].dataset_impl = "lazy" + # epochs start at 1 + if state["extra_state"]["train_iterator"] is not None: + state["extra_state"]["train_iterator"]["epoch"] = max( + state["extra_state"]["train_iterator"].get("epoch", 1), 1 + ) + # --remove-bpe ==> --postprocess + if hasattr(state["args"], "remove_bpe"): + state["args"].post_process = state["args"].remove_bpe + # --min-lr ==> --stop-min-lr + if hasattr(state["args"], "min_lr"): + state["args"].stop_min_lr = state["args"].min_lr + del state["args"].min_lr + # binary_cross_entropy / kd_binary_cross_entropy => wav2vec criterion + if hasattr(state["args"], "criterion") and state["args"].criterion in [ + "binary_cross_entropy", + "kd_binary_cross_entropy", + ]: + state["args"].criterion = "wav2vec" + # remove log_keys if it's None (criteria will supply a default value of []) + if hasattr(state["args"], "log_keys") and state["args"].log_keys is None: + delattr(state["args"], "log_keys") + # speech_pretraining => audio pretraining + if ( + hasattr(state["args"], "task") + and state["args"].task == "speech_pretraining" + ): + state["args"].task = "audio_pretraining" + # audio_cpc => wav2vec + if hasattr(state["args"], "arch") and state["args"].arch == "audio_cpc": + state["args"].arch = "wav2vec" + # convert legacy float learning rate to List[float] + if hasattr(state["args"], "lr") and isinstance(state["args"].lr, float): + state["args"].lr = [state["args"].lr] + # convert task data arg to a string instead of List[string] + if ( + hasattr(state["args"], "data") + and isinstance(state["args"].data, list) + and len(state["args"].data) > 0 + ): + state["args"].data = state["args"].data[0] + + state["cfg"] = convert_namespace_to_omegaconf(state["args"]) + + if "cfg" in state and state["cfg"] is not None: + cfg = state["cfg"] + with open_dict(cfg): + # any upgrades for Hydra-based configs + if ( + "task" in cfg + and "eval_wer_config" in cfg.task + and isinstance(cfg.task.eval_wer_config.print_alignment, bool) + ): + cfg.task.eval_wer_config.print_alignment = "hard" + if "generation" in cfg and isinstance(cfg.generation.print_alignment, bool): + cfg.generation.print_alignment = ( + "hard" if cfg.generation.print_alignment else None + ) + if ( + "model" in cfg + and "w2v_args" in cfg.model + and cfg.model.w2v_args is not None + and ( + hasattr(cfg.model.w2v_args, "task") or "task" in cfg.model.w2v_args + ) + and hasattr(cfg.model.w2v_args.task, "eval_wer_config") + and cfg.model.w2v_args.task.eval_wer_config is not None + and isinstance( + cfg.model.w2v_args.task.eval_wer_config.print_alignment, bool + ) + ): + cfg.model.w2v_args.task.eval_wer_config.print_alignment = "hard" + + return state + + +def prune_state_dict(state_dict, model_cfg: Optional[DictConfig]): + """Prune the given state_dict if desired for LayerDrop + (https://arxiv.org/abs/1909.11556). + + Training with LayerDrop allows models to be robust to pruning at inference + time. This function prunes state_dict to allow smaller models to be loaded + from a larger model and re-maps the existing state_dict for this to occur. + + It's called by functions that load models from checkpoints and does not + need to be called directly. + """ + arch = None + if model_cfg is not None: + arch = ( + model_cfg._name + if isinstance(model_cfg, DictConfig) + else getattr(model_cfg, "arch", None) + ) + + if not model_cfg or arch is None or arch == "ptt_transformer": + # args should not be none, but don't crash if it is. + return state_dict + + encoder_layers_to_keep = getattr(model_cfg, "encoder_layers_to_keep", None) + decoder_layers_to_keep = getattr(model_cfg, "decoder_layers_to_keep", None) + + if not encoder_layers_to_keep and not decoder_layers_to_keep: + return state_dict + + # apply pruning + logger.info( + "Pruning model to specified layer configuration - this works best if the model was trained with LayerDrop" + ) + + def create_pruning_pass(layers_to_keep, layer_name): + keep_layers = sorted( + int(layer_string) for layer_string in layers_to_keep.split(",") + ) + mapping_dict = {} + for i in range(len(keep_layers)): + mapping_dict[str(keep_layers[i])] = str(i) + + regex = re.compile(r"^{layer}.*\.layers\.(\d+)".format(layer=layer_name)) + return {"substitution_regex": regex, "mapping_dict": mapping_dict} + + pruning_passes = [] + if encoder_layers_to_keep: + pruning_passes.append(create_pruning_pass(encoder_layers_to_keep, "encoder")) + if decoder_layers_to_keep: + pruning_passes.append(create_pruning_pass(decoder_layers_to_keep, "decoder")) + + new_state_dict = {} + for layer_name in state_dict.keys(): + match = re.search(r"\.layers\.(\d+)\.", layer_name) + # if layer has no number in it, it is a supporting layer, such as an + # embedding + if not match: + new_state_dict[layer_name] = state_dict[layer_name] + continue + + # otherwise, layer should be pruned. + original_layer_number = match.group(1) + # figure out which mapping dict to replace from + for pruning_pass in pruning_passes: + if original_layer_number in pruning_pass["mapping_dict"] and pruning_pass[ + "substitution_regex" + ].search(layer_name): + new_layer_number = pruning_pass["mapping_dict"][original_layer_number] + substitution_match = pruning_pass["substitution_regex"].search( + layer_name + ) + new_state_key = ( + layer_name[: substitution_match.start(1)] + + new_layer_number + + layer_name[substitution_match.end(1) :] + ) + new_state_dict[new_state_key] = state_dict[layer_name] + + # Since layers are now pruned, *_layers_to_keep are no longer needed. + # This is more of "It would make it work fix" rather than a proper fix. + if isinstance(model_cfg, DictConfig): + context = open_dict(model_cfg) + else: + context = contextlib.ExitStack() + with context: + if hasattr(model_cfg, "encoder_layers_to_keep"): + model_cfg.encoder_layers_to_keep = None + if hasattr(model_cfg, "decoder_layers_to_keep"): + model_cfg.decoder_layers_to_keep = None + + return new_state_dict + + +def load_pretrained_component_from_model( + component: Union[FairseqEncoder, FairseqDecoder], + checkpoint: str, + strict: bool = True, +): + """ + Load a pretrained FairseqEncoder or FairseqDecoder from checkpoint into the + provided `component` object. If state_dict fails to load, there may be a + mismatch in the architecture of the corresponding `component` found in the + `checkpoint` file. + """ + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = load_checkpoint_to_cpu(checkpoint) + if isinstance(component, FairseqEncoder): + component_type = "encoder" + elif isinstance(component, FairseqDecoder): + component_type = "decoder" + else: + raise ValueError( + "component to load must be either a FairseqEncoder or " + "FairseqDecoder. Loading other component types are not supported." + ) + component_state_dict = OrderedDict() + for key in state["model"].keys(): + if key.startswith(component_type): + # encoder.input_layers.0.0.weight --> input_layers.0.0.weight + component_subkey = key[len(component_type) + 1 :] + component_state_dict[component_subkey] = state["model"][key] + component.load_state_dict(component_state_dict, strict=strict) + return component + + +def verify_checkpoint_directory(save_dir: str) -> None: + if not os.path.exists(save_dir): + os.makedirs(save_dir, exist_ok=True) + temp_file_path = os.path.join(save_dir, "dummy") + try: + with open(temp_file_path, "w"): + pass + except OSError as e: + logger.warning( + "Unable to access checkpoint save directory: {}".format(save_dir) + ) + raise e + else: + os.remove(temp_file_path) + + +def save_ema_as_checkpoint(src_path, dst_path): + state = load_ema_from_checkpoint(src_path) + torch_persistent_save(state, dst_path) + + +def load_ema_from_checkpoint(fpath): + """Loads exponential moving averaged (EMA) checkpoint from input and + returns a model with ema weights. + + Args: + fpath: A string path of checkpoint to load from. + + Returns: + A dict of string keys mapping to various values. The 'model' key + from the returned dict should correspond to an OrderedDict mapping + string parameter names to torch Tensors. + """ + params_dict = collections.OrderedDict() + new_state = None + + with PathManager.open(fpath, "rb") as f: + new_state = torch.load( + f, + map_location=( + lambda s, _: torch.serialization.default_restore_location(s, "cpu") + ), + weights_only=False, + ) + + # EMA model is stored in a separate "extra state" + model_params = new_state["extra_state"]["ema"] + + for key in list(model_params.keys()): + p = model_params[key] + if isinstance(p, torch.HalfTensor): + p = p.float() + if key not in params_dict: + params_dict[key] = p.clone() + # NOTE: clone() is needed in case of p is a shared parameter + else: + raise ValueError("Key {} is repeated in EMA model params.".format(key)) + + if len(params_dict) == 0: + raise ValueError( + f"Input checkpoint path '{fpath}' does not contain " + "ema model weights, is this model trained with EMA?" + ) + + new_state["model"] = params_dict + return new_state diff --git a/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp new file mode 100644 index 0000000..7072191 --- /dev/null +++ b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp @@ -0,0 +1,55 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT License. +*/ + +#include <torch/extension.h> +#include <vector> + +/* +CPP Binding for CUDA OP +*/ + +// CUDA forward declarations +torch::Tensor ngram_repeat_block_cuda_forward( + torch::Tensor tokens, + torch::Tensor lprobs, + int bsz, + int step, + int beam_size, + int no_repeat_ngram_size); + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +// Input check and call to CUDA OP +// Backward method not required +torch::Tensor ngram_repeat_block_forward( + torch::Tensor tokens, + torch::Tensor lprobs, + int bsz, + int step, + int beam_size, + int no_repeat_ngram_size) { + CHECK_INPUT(tokens); + CHECK_INPUT(lprobs); + assert(bsz > 0); + assert(step >= 0); + assert(beam_size > 0); + assert(no_repeat_ngram_size > 0); + + return ngram_repeat_block_cuda_forward( + tokens, lprobs, bsz, step, beam_size, no_repeat_ngram_size); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def( + "forward", + &ngram_repeat_block_forward, + "No Repeat Ngram Block forward (CUDA)"); +} diff --git a/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu new file mode 100644 index 0000000..bd6106c --- /dev/null +++ b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu @@ -0,0 +1,82 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT License. +*/ + +/* +Kernel implementation for blocking repeated n-grams. +*/ + +#include <cuda.h> +#include <cuda_runtime.h> +#include <math.h> +#include <torch/extension.h> +#include <vector> + +// Ban repeated ngrams of length = 'no_repeat_ngram_size' +__global__ void banRepeatedTokens( + long* __restrict__ tokens, + float* __restrict__ lprobs, + int max_predict_len, + int vocab_size, + int no_repeat_ngram_size) { + auto row = blockIdx.x; + auto col = threadIdx.x; + auto start = row * (max_predict_len) + col; + // Each thread compares ngram starting from + // thread index with final ngram starting from + // step - no_repeat_ngram_size +2 + auto check_start_pos = blockDim.x; + auto lprob_start = row * vocab_size; + bool is_banned = true; + extern __shared__ long tokens_shm[]; + tokens_shm[col] = tokens[start]; + if (col == blockDim.x - 1) { + for (int i = 1; i < no_repeat_ngram_size; i++) { + if (col + i < max_predict_len) { + tokens_shm[col + i] = tokens[start + i]; + } + } + } + __syncthreads(); + + for (int k = 0; k < no_repeat_ngram_size - 1; k++) { + if (tokens_shm[col + k] != tokens_shm[check_start_pos + k]) { + is_banned = false; + } + } + if (is_banned == true) { + auto token_to_be_banned = tokens_shm[col + no_repeat_ngram_size - 1]; + lprobs[lprob_start + token_to_be_banned] = -INFINITY; + } +} + +// Allocate blocks and threads based on +// batch size and sequence length and launch +// kernel +torch::Tensor ngram_repeat_block_cuda_forward( + const torch::Tensor tokens, + torch::Tensor lprobs, + int bsz, + int step, + int beam_size, + int no_repeat_ngram_size) { + int threads = step - no_repeat_ngram_size + 2; + if (threads <= 0) + return lprobs; + int max_predict_len = tokens.size(1); + int vocab_size = lprobs.size(1); + auto token_ptr = tokens.data_ptr<long>(); + auto lprob_ptr = lprobs.data_ptr<float>(); + int blocks = bsz * beam_size; + int shared_mem_size = (step + 1) * sizeof(long); + + // Launching N blocks where N is number of samples in a batch (beams*bsz) + // Launching T threads where T is number of previous ngrams in a sample + // Allocating shared mem per block for fastser access of input tokens since + // each token will be accessed N times to compare with current Ngram where + // N is Ngram size. + banRepeatedTokens<<<blocks, threads, shared_mem_size>>>( + token_ptr, lprob_ptr, max_predict_len, vocab_size, no_repeat_ngram_size); + return lprobs; +} diff --git a/fairseq/fairseq/clib/libbase/balanced_assignment.cpp b/fairseq/fairseq/clib/libbase/balanced_assignment.cpp new file mode 100644 index 0000000..1a5a106 --- /dev/null +++ b/fairseq/fairseq/clib/libbase/balanced_assignment.cpp @@ -0,0 +1,109 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +/* +C++ code for solving the linear assignment problem. +Based on the Auction Algorithm from +https://dspace.mit.edu/bitstream/handle/1721.1/3265/P-2108-26912652.pdf and the +implementation from: https://github.com/bkj/auction-lap Adapted to be more +efficient when each worker is looking for k jobs instead of 1. +*/ +#include <torch/extension.h> +#include <iostream> +using namespace torch::indexing; +torch::Tensor balanced_assignment(torch::Tensor job_and_worker_to_score) { + int max_iterations = 100; + torch::Tensor epsilon = + (job_and_worker_to_score.max() - job_and_worker_to_score.min()) / 50; + epsilon.clamp_min_(1e-04); + torch::Tensor worker_and_job_to_score = + job_and_worker_to_score.detach().transpose(0, 1).contiguous(); + int num_workers = worker_and_job_to_score.size(0); + int num_jobs = worker_and_job_to_score.size(1); + auto device = worker_and_job_to_score.device(); + int jobs_per_worker = num_jobs / num_workers; + torch::Tensor value = worker_and_job_to_score.clone(); + int counter = 0; + torch::Tensor max_value = worker_and_job_to_score.max(); + + torch::Tensor bid_indices; + torch::Tensor cost = worker_and_job_to_score.new_zeros({1, num_jobs}); + torch::Tensor bids = + worker_and_job_to_score.new_empty({num_workers, num_jobs}); + torch::Tensor bid_increments = + worker_and_job_to_score.new_empty({num_workers, jobs_per_worker}); + torch::Tensor top_values = + worker_and_job_to_score.new_empty({num_workers, jobs_per_worker + 1}); + torch::Tensor high_bids = worker_and_job_to_score.new_empty({num_jobs}); + + torch::Tensor top_index = top_values.to(torch::kLong); + torch::Tensor high_bidders = top_index.new_empty({num_jobs}); + torch::Tensor have_bids = high_bidders.to(torch::kBool); + torch::Tensor jobs_indices = + torch::arange({num_jobs}, torch::dtype(torch::kLong).device(device)); + torch::Tensor true_tensor = + torch::ones({1}, torch::dtype(torch::kBool).device(device)); + + while (true) { + bids.zero_(); + torch::topk_out(top_values, top_index, value, jobs_per_worker + 1, 1); + + // Each worker bids the difference in value between that job and the k+1th + // job + torch::sub_out( + bid_increments, + top_values.index({Slice(None, None), Slice(0, jobs_per_worker)}), + top_values.index({Slice(None, None), jobs_per_worker}).unsqueeze(1)); + + bid_increments.add_(epsilon); + bids.scatter_( + 1, + top_index.index({Slice(None, None), Slice(0, jobs_per_worker)}), + bid_increments); + + if (counter < max_iterations && counter > 0) { + // Put in a minimal bid to retain items from the last round if no-one else + // bids for them this round + bids.view(-1).index_put_({bid_indices}, epsilon); + } + + // Find the highest bidding worker per job + torch::max_out(high_bids, high_bidders, bids, 0); + torch::gt_out(have_bids, high_bids, 0); + + if (have_bids.all().item<bool>()) { + // All jobs were bid for + break; + } + + // Make popular items more expensive + cost.add_(high_bids); + torch::sub_out(value, worker_and_job_to_score, cost); + + bid_indices = ((high_bidders * num_jobs) + jobs_indices).index({have_bids}); + + if (counter < max_iterations) { + // Make sure that this item will be in the winning worker's top-k next + // time. + value.view(-1).index_put_({bid_indices}, max_value); + } else { + // Suboptimal approximation that converges quickly from current solution + value.view(-1).index_put_( + {bid_indices}, worker_and_job_to_score.view(-1).index({bid_indices})); + } + + counter += 1; + } + + return top_index.index({Slice(None, None), Slice(0, jobs_per_worker)}) + .reshape(-1); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("balanced_assignment", &balanced_assignment, "Balanced Assignment"); +} diff --git a/fairseq/fairseq/clib/libbleu/libbleu.cpp b/fairseq/fairseq/clib/libbleu/libbleu.cpp new file mode 100644 index 0000000..939d9e1 --- /dev/null +++ b/fairseq/fairseq/clib/libbleu/libbleu.cpp @@ -0,0 +1,157 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <array> +#include <cstdio> +#include <cstring> +#include <map> + +// NOLINTNEXTLINE +typedef struct { + size_t reflen; + size_t predlen; + size_t match1; + size_t count1; + size_t match2; + size_t count2; + size_t match3; + size_t count3; + size_t match4; + size_t count4; +} bleu_stat; + +// left trim (remove pad) +void bleu_ltrim(size_t* len, int** sent, int pad) { + size_t start = 0; + while (start < *len) { + if (*(*sent + start) != pad) { + break; + } + start++; + } + *sent += start; + *len -= start; +} + +// right trim remove (eos) +void bleu_rtrim(size_t* len, int** sent, int pad, int eos) { + size_t end = *len - 1; + while (end > 0) { + if (*(*sent + end) != eos && *(*sent + end) != pad) { + break; + } + end--; + } + *len = end + 1; +} + +// left and right trim +void bleu_trim(size_t* len, int** sent, int pad, int eos) { + bleu_ltrim(len, sent, pad); + bleu_rtrim(len, sent, pad, eos); +} + +size_t bleu_hash(int len, int* data) { + size_t h = 14695981039346656037ul; + size_t prime = 0x100000001b3; + char* b = (char*)data; + size_t blen = sizeof(int) * len; + + while (blen-- > 0) { + h ^= *b++; + h *= prime; + } + + return h; +} + +void bleu_addngram( + size_t* ntotal, + size_t* nmatch, + size_t n, + size_t reflen, + int* ref, + size_t predlen, + int* pred) { + if (predlen < n) { + return; + } + + predlen = predlen - n + 1; + (*ntotal) += predlen; + + if (reflen < n) { + return; + } + + reflen = reflen - n + 1; + + std::map<size_t, size_t> count; + while (predlen > 0) { + size_t w = bleu_hash(n, pred++); + count[w]++; + predlen--; + } + + while (reflen > 0) { + size_t w = bleu_hash(n, ref++); + if (count[w] > 0) { + (*nmatch)++; + count[w] -= 1; + } + reflen--; + } +} + +extern "C" { + +#ifdef _WIN64 +__declspec(dllexport) +#endif + void bleu_zero_init(bleu_stat* stat) { + std::memset(stat, 0, sizeof(bleu_stat)); +} + +#ifdef _WIN64 +__declspec(dllexport) +#endif + void bleu_one_init(bleu_stat* stat) { + bleu_zero_init(stat); + stat->count1 = 0; + stat->count2 = 1; + stat->count3 = 1; + stat->count4 = 1; + stat->match1 = 0; + stat->match2 = 1; + stat->match3 = 1; + stat->match4 = 1; +} + +#ifdef _WIN64 +__declspec(dllexport) +#endif + void bleu_add( + bleu_stat* stat, + size_t reflen, + int* ref, + size_t predlen, + int* pred, + int pad, + int eos) { + + bleu_trim(&reflen, &ref, pad, eos); + bleu_trim(&predlen, &pred, pad, eos); + stat->reflen += reflen; + stat->predlen += predlen; + + bleu_addngram(&stat->count1, &stat->match1, 1, reflen, ref, predlen, pred); + bleu_addngram(&stat->count2, &stat->match2, 2, reflen, ref, predlen, pred); + bleu_addngram(&stat->count3, &stat->match3, 3, reflen, ref, predlen, pred); + bleu_addngram(&stat->count4, &stat->match4, 4, reflen, ref, predlen, pred); +} +} diff --git a/fairseq/fairseq/clib/libbleu/module.cpp b/fairseq/fairseq/clib/libbleu/module.cpp new file mode 100644 index 0000000..35288b3 --- /dev/null +++ b/fairseq/fairseq/clib/libbleu/module.cpp @@ -0,0 +1,33 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <Python.h> + +static PyMethodDef method_def[] = {{NULL, NULL, 0, NULL}}; // NOLINT + +static struct PyModuleDef module_def = { + PyModuleDef_HEAD_INIT, + "libbleu", /* name of module */ + // NOLINTNEXTLINE + NULL, /* module documentation, may be NULL */ + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + method_def}; // NOLINT + +#if PY_MAJOR_VERSION == 2 +PyMODINIT_FUNC init_libbleu() +#else +PyMODINIT_FUNC PyInit_libbleu() +#endif +{ + PyObject* m = PyModule_Create(&module_def); + if (!m) { + return NULL; + } + return m; +} diff --git a/fairseq/fairseq/clib/libnat/edit_dist.cpp b/fairseq/fairseq/clib/libnat/edit_dist.cpp new file mode 100644 index 0000000..9ffb605 --- /dev/null +++ b/fairseq/fairseq/clib/libnat/edit_dist.cpp @@ -0,0 +1,231 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <pybind11/detail/common.h> +#include <pybind11/pybind11.h> +#include <torch/torch.h> // @manual=//caffe2:torch_extension +#include <algorithm> +#include <cstdint> +#include <iosfwd> +#include <memory> +#include <new> +#include <string> +#include <utility> +#include <vector> + +using namespace ::std; + +vector<vector<uint32_t>> edit_distance2_with_dp( + vector<uint32_t>& x, + vector<uint32_t>& y) { + uint32_t lx = x.size(); + uint32_t ly = y.size(); + vector<vector<uint32_t>> d(lx + 1, vector<uint32_t>(ly + 1)); + for (uint32_t i = 0; i < lx + 1; i++) { + d[i][0] = i; + } + for (uint32_t j = 0; j < ly + 1; j++) { + d[0][j] = j; + } + for (uint32_t i = 1; i < lx + 1; i++) { + for (uint32_t j = 1; j < ly + 1; j++) { + d[i][j] = + min(min(d[i - 1][j], d[i][j - 1]) + 1, + d[i - 1][j - 1] + 2 * (x.at(i - 1) == y.at(j - 1) ? 0 : 1)); + } + } + return d; +} + +vector<vector<uint32_t>> edit_distance2_backtracking( + vector<vector<uint32_t>>& d, + vector<uint32_t>& x, + vector<uint32_t>& y, + uint32_t terminal_symbol) { + vector<uint32_t> seq; + vector<vector<uint32_t>> edit_seqs(x.size() + 2, vector<uint32_t>()); + /* + edit_seqs: + 0~x.size() cell is the insertion sequences + last cell is the delete sequence + */ + + if (x.size() == 0) { + edit_seqs.at(0) = y; + return edit_seqs; + } + + uint32_t i = d.size() - 1; + uint32_t j = d.at(0).size() - 1; + + while ((i >= 0) && (j >= 0)) { + if ((i == 0) && (j == 0)) { + break; + } + + if ((j > 0) && (d.at(i).at(j - 1) < d.at(i).at(j))) { + seq.push_back(1); // insert + seq.push_back(y.at(j - 1)); + j--; + } else if ((i > 0) && (d.at(i - 1).at(j) < d.at(i).at(j))) { + seq.push_back(2); // delete + seq.push_back(x.at(i - 1)); + i--; + } else { + seq.push_back(3); // keep + seq.push_back(x.at(i - 1)); + i--; + j--; + } + } + + uint32_t prev_op, op, s, word; + prev_op = 0, s = 0; + for (uint32_t k = 0; k < seq.size() / 2; k++) { + op = seq.at(seq.size() - 2 * k - 2); + word = seq.at(seq.size() - 2 * k - 1); + if (prev_op != 1) { + s++; + } + if (op == 1) // insert + { + edit_seqs.at(s - 1).push_back(word); + } else if (op == 2) // delete + { + edit_seqs.at(x.size() + 1).push_back(1); + } else { + edit_seqs.at(x.size() + 1).push_back(0); + } + + prev_op = op; + } + + for (uint32_t k = 0; k < edit_seqs.size(); k++) { + if (edit_seqs[k].size() == 0) { + edit_seqs[k].push_back(terminal_symbol); + } + } + return edit_seqs; +} + +vector<vector<uint32_t>> edit_distance2_backtracking_with_delete( + vector<vector<uint32_t>>& d, + vector<uint32_t>& x, + vector<uint32_t>& y, + uint32_t terminal_symbol, + uint32_t deletion_symbol) { + vector<uint32_t> seq; + vector<vector<uint32_t>> edit_seqs(x.size() + 1, vector<uint32_t>()); + /* + edit_seqs: + 0~x.size() cell is the insertion sequences + last cell is the delete sequence + */ + + if (x.size() == 0) { + edit_seqs.at(0) = y; + return edit_seqs; + } + + uint32_t i = d.size() - 1; + uint32_t j = d.at(0).size() - 1; + + while ((i >= 0) && (j >= 0)) { + if ((i == 0) && (j == 0)) { + break; + } + + if ((j > 0) && (d.at(i).at(j - 1) < d.at(i).at(j))) { + seq.push_back(1); // insert + seq.push_back(y.at(j - 1)); + j--; + } else if ((i > 0) && (d.at(i - 1).at(j) < d.at(i).at(j))) { + seq.push_back(2); // delete + seq.push_back(x.at(i - 1)); + i--; + } else { + seq.push_back(3); // keep + seq.push_back(x.at(i - 1)); + i--; + j--; + } + } + + uint32_t prev_op, op, s, word; + prev_op = 0, s = 0; + for (uint32_t k = 0; k < seq.size() / 2; k++) { + op = seq.at(seq.size() - 2 * k - 2); + word = seq.at(seq.size() - 2 * k - 1); + if (prev_op != 1) { + s++; + } + if (op == 1) // insert + { + edit_seqs.at(s - 1).push_back(word); + } else if (op == 2) // delete + { + edit_seqs.at(s - 1).push_back(deletion_symbol); + } + + prev_op = op; + } + + for (uint32_t k = 0; k < edit_seqs.size(); k++) { + if (edit_seqs.at(k).size() == 0) { + edit_seqs.at(k).push_back(terminal_symbol); + } + } + return edit_seqs; +} + +vector<uint32_t> compute_ed2( + vector<vector<uint32_t>>& xs, + vector<vector<uint32_t>>& ys) { + vector<uint32_t> distances(xs.size()); + for (uint32_t i = 0; i < xs.size(); i++) { + vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i)); + distances.at(i) = d.at(xs.at(i).size()).at(ys.at(i).size()); + } + return distances; +} + +vector<vector<vector<uint32_t>>> suggested_ed2_path( + vector<vector<uint32_t>>& xs, + vector<vector<uint32_t>>& ys, + uint32_t terminal_symbol) { + vector<vector<vector<uint32_t>>> seq(xs.size()); + for (uint32_t i = 0; i < xs.size(); i++) { + vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i)); + seq.at(i) = + edit_distance2_backtracking(d, xs.at(i), ys.at(i), terminal_symbol); + } + return seq; +} + +vector<vector<vector<uint32_t>>> suggested_ed2_path_with_delete( + vector<vector<uint32_t>>& xs, + vector<vector<uint32_t>>& ys, + uint32_t terminal_symbol, + uint32_t deletion_symbol) { + vector<vector<vector<uint32_t>>> seq(xs.size()); + for (uint32_t i = 0; i < xs.size(); i++) { + vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i)); + seq.at(i) = edit_distance2_backtracking_with_delete( + d, xs.at(i), ys.at(i), terminal_symbol, deletion_symbol); + } + return seq; +} + +PYBIND11_MODULE(libnat, m) { + m.def("compute_ed2", &compute_ed2, "compute_ed2"); + m.def("suggested_ed2_path", &suggested_ed2_path, "suggested_ed2_path"); + m.def( + "suggested_ed2_path_with_delete", + &suggested_ed2_path_with_delete, + "suggested_ed2_path_with_delete"); +} diff --git a/fairseq/fairseq/clib/libnat_cuda/binding.cpp b/fairseq/fairseq/clib/libnat_cuda/binding.cpp new file mode 100644 index 0000000..ced91c0 --- /dev/null +++ b/fairseq/fairseq/clib/libnat_cuda/binding.cpp @@ -0,0 +1,67 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +/* + This code is partially adpoted from + https://github.com/1ytic/pytorch-edit-distance + */ + +#include <torch/types.h> +#include "edit_dist.h" + +#ifndef TORCH_CHECK +#define TORCH_CHECK AT_CHECK +#endif + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +torch::Tensor LevenshteinDistance( + torch::Tensor source, + torch::Tensor target, + torch::Tensor source_length, + torch::Tensor target_length) { + CHECK_INPUT(source); + CHECK_INPUT(target); + CHECK_INPUT(source_length); + CHECK_INPUT(target_length); + return LevenshteinDistanceCuda(source, target, source_length, target_length); +} + +torch::Tensor GenerateDeletionLabel( + torch::Tensor source, + torch::Tensor operations) { + CHECK_INPUT(source); + CHECK_INPUT(operations); + return GenerateDeletionLabelCuda(source, operations); +} + +std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabel( + torch::Tensor target, + torch::Tensor operations) { + CHECK_INPUT(target); + CHECK_INPUT(operations); + return GenerateInsertionLabelCuda(target, operations); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("levenshtein_distance", &LevenshteinDistance, "Levenshtein distance"); + m.def( + "generate_deletion_labels", + &GenerateDeletionLabel, + "Generate Deletion Label"); + m.def( + "generate_insertion_labels", + &GenerateInsertionLabel, + "Generate Insertion Label"); +} diff --git a/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu b/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu new file mode 100644 index 0000000..1ea5ec7 --- /dev/null +++ b/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu @@ -0,0 +1,344 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "edit_dist.h" + +#include <c10/cuda/CUDAStream.h> +#include <cuda.h> +#include <cuda_runtime.h> +#include <device_launch_parameters.h> +#include <utility> // std::pair + +template <typename scalar_t> +__global__ void generate_deletion_label_kernel( + const scalar_t* __restrict__ source, + const size_t source_size, + const size_t operation_size, + int* __restrict__ operations, + int* __restrict__ labels) { + const int index = blockIdx.x; + const int offset = index * operation_size; + const int offset_label = index * source_size; + + for (int i = 0; i < source_size; i++) { + labels[offset_label + i] = 0; + } + + int k = 0; + for (int i = 0; i < operation_size; i++) { + if (operations[offset + i] == 0) { + break; + } else if (operations[offset + i] == 1) { + continue; + } else { + labels[offset_label + k] = 3 - operations[offset + i]; + k++; + } + } +} + +template <typename scalar_t> +__global__ void generate_insertion_label_kernel( + const scalar_t* __restrict__ target, + const size_t target_size, + const size_t operation_size, + int* __restrict__ operations, + int* __restrict__ labels, + int* __restrict__ masks) { + const int index = blockIdx.x; + const int offset = index * operation_size; + const int offset_label = index * target_size; + + int k = 0; + int u = 0; + int m = 0; + + for (int i = 0; i < target_size; i++) { + labels[offset_label + i] = 0; + masks[offset_label + i] = 0; + } + + for (int i = 0; i < operation_size - 1; i++) { + if (operations[offset + i] == 0) { + break; + } else if (operations[offset + i] == 2) { + continue; + } else if (operations[offset + i] == 1) { + masks[offset_label + m] = 1; + u++; + m++; + } else { + labels[offset_label + k] = u; + masks[offset_label + m] = 0; + k++; + m++; + u = 0; + } + } +} + +template <typename scalar_t> +__global__ void levenshtein_distance_kernel( + const scalar_t* __restrict__ source, + const scalar_t* __restrict__ target, + const int* __restrict__ source_length, + const int* __restrict__ target_length, + const size_t source_size, + const size_t target_size, + int* __restrict__ operations, + int* __restrict__ errors_curr) { + const int index = blockIdx.x; + const int offset = index * (source_size + target_size); + const int d = index * (source_size + 1) * (target_size + 1); + const int t = target_size + 1; + + auto err_idx = [d, t](int i, int j) { return d + i * t + j; }; + auto opt_idx = [offset](int k) { return offset + k; }; + + const int hyp_len = source_length[index]; + const int ref_len = target_length[index]; + const scalar_t* hyp_begin = source + index * source_size; + const scalar_t* ref_begin = target + index * target_size; + + // dynamic programming + for (int i = 0; i <= hyp_len; i++) { + errors_curr[err_idx(i, 0)] = i; + } + for (int j = 0; j <= ref_len; j++) { + errors_curr[err_idx(0, j)] = j; + } + for (int i = 1; i <= hyp_len; i++) { + for (int j = 1; j <= ref_len; j++) { + errors_curr[err_idx(i, j)] = min( + min(errors_curr[err_idx(i - 1, j)], errors_curr[err_idx(i, j - 1)]) + + 1, + errors_curr[err_idx(i - 1, j - 1)] + + 2 * (*(hyp_begin + i - 1) == *(ref_begin + j - 1) ? 0 : 1)); + } + } + + // back-tracing + int i = hyp_len; + int j = ref_len; + int o = hyp_len + ref_len; + + for (int k = 0; k < source_size + target_size; k++) { + operations[opt_idx(k)] = 0; + } + + while ((i >= 0) && (j >= 0)) { + if ((i == 0) && (j == 0)) { + break; + } + + if ((j > 0) && + (errors_curr[err_idx(i, j - 1)] < errors_curr[err_idx(i, j)])) { + o--; + operations[opt_idx(o)] = 1; + j--; // insertion + } else if ( + (i > 0) && + (errors_curr[err_idx(i - 1, j)] < errors_curr[err_idx(i, j)])) { + o--; + operations[opt_idx(o)] = 2; + i--; // deletion + } else { + o--; + operations[opt_idx(o)] = 3; + i--; + j--; // do nothing + } + } + + // moving to the left + for (int k = 0; k < hyp_len + ref_len; k++) { + if (k + o < hyp_len + ref_len) { + operations[opt_idx(k)] = operations[opt_idx(k + o)]; + } else { + operations[opt_idx(k)] = 0; // padding + } + } +} + +template <typename scalar_t> +__global__ void faster_levenshtein_distance_kernel( + const scalar_t* __restrict__ source, + const scalar_t* __restrict__ target, + const int* __restrict__ source_length, + const int* __restrict__ target_length, + const size_t source_size, + const size_t target_size, + int* __restrict__ operations) { + extern __shared__ short errors[]; + auto errors_curr = errors; + + const int index = blockIdx.x; + const int offset = index * (source_size + target_size); + const int t = target_size + 1; + + auto err_idx = [t](int i, int j) { return i * t + j; }; + auto opt_idx = [offset](int k) { return offset + k; }; + + const int hyp_len = source_length[index]; + const int ref_len = target_length[index]; + const scalar_t* hyp_begin = source + index * source_size; + const scalar_t* ref_begin = target + index * target_size; + + // dynamic programming + for (int i = 0; i <= hyp_len; i++) { + errors_curr[err_idx(i, 0)] = i; + } + for (int j = 0; j <= ref_len; j++) { + errors_curr[err_idx(0, j)] = j; + } + for (int i = 1; i <= hyp_len; i++) { + for (int j = 1; j <= ref_len; j++) { + errors_curr[err_idx(i, j)] = min( + min(errors_curr[err_idx(i - 1, j)], errors_curr[err_idx(i, j - 1)]) + + 1, + errors_curr[err_idx(i - 1, j - 1)] + + 2 * (*(hyp_begin + i - 1) == *(ref_begin + j - 1) ? 0 : 1)); + } + } + + // back-tracing + int i = hyp_len; + int j = ref_len; + int o = hyp_len + ref_len; + + for (int k = 0; k < source_size + target_size; k++) { + operations[opt_idx(k)] = 0; + } + + while ((i >= 0) && (j >= 0)) { + if ((i == 0) && (j == 0)) { + break; + } + + if ((j > 0) && + (errors_curr[err_idx(i, j - 1)] < errors_curr[err_idx(i, j)])) { + o--; + operations[opt_idx(o)] = 1; + j--; // insertion + } else if ( + (i > 0) && + (errors_curr[err_idx(i - 1, j)] < errors_curr[err_idx(i, j)])) { + o--; + operations[opt_idx(o)] = 2; + i--; // deletion + } else { + o--; + operations[opt_idx(o)] = 3; + i--; + j--; // do nothing + } + } + + // moving to the left + for (int k = 0; k < hyp_len + ref_len; k++) { + if (k + o < hyp_len + ref_len) { + operations[opt_idx(k)] = operations[opt_idx(k + o)]; + } else { + operations[opt_idx(k)] = 0; // padding + } + } +} + +torch::Tensor GenerateDeletionLabelCuda( + torch::Tensor source, + torch::Tensor operations) { + const auto batch_size = source.size(0); + at::TensorOptions options(source.device()); + options = options.dtype(at::ScalarType::Int); + auto labels = torch::empty({batch_size, source.size(1)}, options); + auto stream = at::cuda::getCurrentCUDAStream(source.device().index()); + + AT_DISPATCH_ALL_TYPES(source.scalar_type(), "generate_deletion_labels", ([&] { + generate_deletion_label_kernel<scalar_t> + <<<batch_size, 1, 0, stream>>>( + source.data_ptr<scalar_t>(), + source.size(1), + operations.size(1), + operations.data_ptr<int>(), + labels.data_ptr<int>()); + })); + + return labels; +} + +std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda( + torch::Tensor target, + torch::Tensor operations) { + const auto batch_size = target.size(0); + at::TensorOptions options(target.device()); + options = options.dtype(at::ScalarType::Int); + auto labels = torch::empty({batch_size, target.size(1)}, options); + auto masks = torch::empty({batch_size, target.size(1)}, options); + auto stream = at::cuda::getCurrentCUDAStream(target.device().index()); + + AT_DISPATCH_ALL_TYPES( + target.scalar_type(), "generate_insertion_labels", ([&] { + generate_insertion_label_kernel<scalar_t><<<batch_size, 1, 0, stream>>>( + target.data_ptr<scalar_t>(), + target.size(1), + operations.size(1), + operations.data_ptr<int>(), + labels.data_ptr<int>(), + masks.data_ptr<int>()); + })); + + return std::make_pair(labels, masks); +} + +torch::Tensor LevenshteinDistanceCuda( + torch::Tensor source, + torch::Tensor target, + torch::Tensor source_length, + torch::Tensor target_length) { + const auto batch_size = source.size(0); + const auto shared_size = + (source.size(1) + 1) * (target.size(1) + 1) * sizeof(short); + + at::TensorOptions options(source.device()); + options = options.dtype(at::ScalarType::Int); + auto operations = + torch::empty({batch_size, source.size(1) + target.size(1)}, options); + auto stream = at::cuda::getCurrentCUDAStream(source.device().index()); + + if (shared_size > 40000) { + auto distances = torch::empty( + {batch_size, (source.size(1) + 1) * (target.size(1) + 1)}, options); + AT_DISPATCH_ALL_TYPES(source.scalar_type(), "levenshtein_distance", ([&] { + levenshtein_distance_kernel<scalar_t> + <<<batch_size, 1, 0, stream>>>( + source.data_ptr<scalar_t>(), + target.data_ptr<scalar_t>(), + source_length.data_ptr<int>(), + target_length.data_ptr<int>(), + source.size(1), + target.size(1), + operations.data_ptr<int>(), + distances.data_ptr<int>()); + })); + } else { + AT_DISPATCH_ALL_TYPES( + source.scalar_type(), "faster_levenshtein_distance", ([&] { + faster_levenshtein_distance_kernel<scalar_t> + <<<batch_size, 1, shared_size, stream>>>( + source.data_ptr<scalar_t>(), + target.data_ptr<scalar_t>(), + source_length.data_ptr<int>(), + target_length.data_ptr<int>(), + source.size(1), + target.size(1), + operations.data_ptr<int>()); + })); + } + + return operations; +} diff --git a/fairseq/fairseq/clib/libnat_cuda/edit_dist.h b/fairseq/fairseq/clib/libnat_cuda/edit_dist.h new file mode 100644 index 0000000..5220c52 --- /dev/null +++ b/fairseq/fairseq/clib/libnat_cuda/edit_dist.h @@ -0,0 +1,25 @@ +/** + * Copyright 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include <torch/extension.h> + +torch::Tensor LevenshteinDistanceCuda( + torch::Tensor source, + torch::Tensor target, + torch::Tensor source_length, + torch::Tensor target_length); + +torch::Tensor GenerateDeletionLabelCuda( + torch::Tensor source, + torch::Tensor operations); + +std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda( + torch::Tensor source, + torch::Tensor operations); diff --git a/fairseq/fairseq/config/__init__.py b/fairseq/fairseq/config/__init__.py new file mode 100644 index 0000000..6264236 --- /dev/null +++ b/fairseq/fairseq/config/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/fairseq/fairseq/config/config.yaml b/fairseq/fairseq/config/config.yaml new file mode 100644 index 0000000..2ed7168 --- /dev/null +++ b/fairseq/fairseq/config/config.yaml @@ -0,0 +1,19 @@ +# @package _group_ + +hydra: + run: + dir: . + +defaults: + - _self_ + - task: null + - model: null + - criterion: cross_entropy + - optimizer: null + - lr_scheduler: fixed + - bpe: null + - tokenizer: null + - scoring: null + - generation: null + - common_eval: null + - eval_lm: null diff --git a/fairseq/fairseq/config/fb_run_config/slurm.yaml b/fairseq/fairseq/config/fb_run_config/slurm.yaml new file mode 100644 index 0000000..20cf8f5 --- /dev/null +++ b/fairseq/fairseq/config/fb_run_config/slurm.yaml @@ -0,0 +1,29 @@ +# @package _global_ + +hydra: + job: + config: + override_dirname: + kv_sep: ':' + item_sep: '__' + exclude_keys: + - fb_run_config + - distributed_training.distributed_port + sweep: + dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} + launcher: + cpus_per_task: 60 + gpus_per_node: ??? + tasks_per_node: 1 + nodes: 1 + partition: learnfair + mem_gb: 400 + timeout_min: 4320 + max_num_timeout: 10 + name: ${env:PREFIX}_${hydra.job.config_name} + submitit_folder: ${hydra.sweep.dir} + +distributed_training: + ddp_backend: c10d + distributed_world_size: ??? + distributed_port: ??? diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml new file mode 100644 index 0000000..30b1a4f --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "relu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 512 +decoder_output_dim: 512 +decoder_input_dim: 512 +decoder_ffn_embed_dim: 4096 +decoder_layers: 12 +decoder_attention_heads: 16 +decoder_normalize_before: true +no_decoder_final_norm: true +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml new file mode 100644 index 0000000..1154cfa --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "relu" +dropout: 0.3 +attention_dropout: 0.1 +activation_dropout: 0.1 +relu_dropout: 0.1 +decoder_embed_dim: 1024 +decoder_output_dim: 1024 +decoder_input_dim: 1024 +decoder_ffn_embed_dim: 4096 +decoder_layers: 16 +decoder_attention_heads: 8 +decoder_normalize_before: true +no_decoder_final_norm: true +adaptive_softmax_cutoff: "20000,60000" +adaptive_softmax_dropout: 0.2 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: true +adaptive_input_factor: 4 +adaptive_input_cutoff: "20000,60000" +tie_adaptive_weights: true +tie_adaptive_proj: true +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml new file mode 100644 index 0000000..3095753 --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "relu" +dropout: 0.1 +attention_dropout: 0.0 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 1024 +decoder_output_dim: 1024 +decoder_input_dim: 1024 +decoder_ffn_embed_dim: 4096 +decoder_layers: 12 +decoder_attention_heads: 16 +decoder_normalize_before: true +no_decoder_final_norm: false +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml new file mode 100644 index 0000000..30b1a4f --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "relu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 512 +decoder_output_dim: 512 +decoder_input_dim: 512 +decoder_ffn_embed_dim: 4096 +decoder_layers: 12 +decoder_attention_heads: 16 +decoder_normalize_before: true +no_decoder_final_norm: true +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml new file mode 100644 index 0000000..2c6cb7b --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "gelu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 768 +decoder_output_dim: 768 +decoder_input_dim: 768 +decoder_ffn_embed_dim: 3072 +decoder_layers: 12 +decoder_attention_heads: 12 +decoder_normalize_before: true +no_decoder_final_norm: false +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml new file mode 100644 index 0000000..a08769a --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "gelu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 1600 +decoder_output_dim: 1600 +decoder_input_dim: 1600 +decoder_ffn_embed_dim: 6400 +decoder_layers: 48 +decoder_attention_heads: 25 +decoder_normalize_before: true +no_decoder_final_norm: false +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml new file mode 100644 index 0000000..64261d7 --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "gelu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 1280 +decoder_output_dim: 1280 +decoder_input_dim: 1280 +decoder_ffn_embed_dim: 5120 +decoder_layers: 36 +decoder_attention_heads: 20 +decoder_normalize_before: true +no_decoder_final_norm: false +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml new file mode 100644 index 0000000..702e81f --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "gelu" +dropout: 0.1 +attention_dropout: 0.1 +activation_dropout: 0.0 +relu_dropout: 0.0 +decoder_embed_dim: 1024 +decoder_output_dim: 1024 +decoder_input_dim: 1024 +decoder_ffn_embed_dim: 4096 +decoder_layers: 24 +decoder_attention_heads: 16 +decoder_normalize_before: true +no_decoder_final_norm: false +adaptive_softmax_cutoff: null +adaptive_softmax_dropout: 0 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: false +adaptive_input_factor: 4 +adaptive_input_cutoff: null +tie_adaptive_weights: false +tie_adaptive_proj: false +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml new file mode 100644 index 0000000..1154cfa --- /dev/null +++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml @@ -0,0 +1,36 @@ +# @package _group_ +activation_fn: "relu" +dropout: 0.3 +attention_dropout: 0.1 +activation_dropout: 0.1 +relu_dropout: 0.1 +decoder_embed_dim: 1024 +decoder_output_dim: 1024 +decoder_input_dim: 1024 +decoder_ffn_embed_dim: 4096 +decoder_layers: 16 +decoder_attention_heads: 8 +decoder_normalize_before: true +no_decoder_final_norm: true +adaptive_softmax_cutoff: "20000,60000" +adaptive_softmax_dropout: 0.2 +adaptive_softmax_factor: 4 +no_token_positional_embeddings: false +share_decoder_input_output_embed: false +character_embeddings: false +character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]" +character_embedding_dim: 4 +char_embedder_highway_layers: 2 +adaptive_input: true +adaptive_input_factor: 4 +adaptive_input_cutoff: "20000,60000" +tie_adaptive_weights: true +tie_adaptive_proj: true +decoder_learned_pos: false +decoder_layerdrop: 0 +decoder_layers_to_keep: null +layernorm_embedding: false +no_scale_embedding: false +quant_noise_pq: 0 +quant_noise_pq_block_size: 8 +quant_noise_scalar: 0 diff --git a/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml b/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml new file mode 100644 index 0000000..ee1329b --- /dev/null +++ b/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml @@ -0,0 +1,5 @@ +# @package _group_ +activation: gelu +vq_type: gumbel +vq_depth: 2 +combine_groups: true diff --git a/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml new file mode 100644 index 0000000..ce65499 --- /dev/null +++ b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml @@ -0,0 +1,8 @@ +# @package _group_ + +quantize_targets: true +final_dim: 256 +encoder_layerdrop: 0.05 +dropout_input: 0.1 +dropout_features: 0.1 +feature_grad_mult: 0.1 diff --git a/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml new file mode 100644 index 0000000..5846f75 --- /dev/null +++ b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml @@ -0,0 +1,20 @@ +# @package _group_ + +quantize_targets: true +extractor_mode: layer_norm +layer_norm_first: true +final_dim: 768 +latent_temp: [2.0,0.1,0.999995] +encoder_layerdrop: 0.0 +dropout_input: 0.0 +dropout_features: 0.0 +dropout: 0.0 +attention_dropout: 0.0 +conv_bias: true + +encoder_layers: 24 +encoder_embed_dim: 1024 +encoder_ffn_embed_dim: 4096 +encoder_attention_heads: 16 + +feature_grad_mult: 1.0 diff --git a/fairseq/fairseq/criterions/__init__.py b/fairseq/fairseq/criterions/__init__.py new file mode 100644 index 0000000..ecd65d3 --- /dev/null +++ b/fairseq/fairseq/criterions/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import importlib +import os + +from fairseq import registry +from fairseq.criterions.fairseq_criterion import ( # noqa + FairseqCriterion, + LegacyFairseqCriterion, +) +from omegaconf import DictConfig + + +( + build_criterion_, + register_criterion, + CRITERION_REGISTRY, + CRITERION_DATACLASS_REGISTRY, +) = registry.setup_registry( + "--criterion", base_class=FairseqCriterion, default="cross_entropy" +) + + +def build_criterion(cfg: DictConfig, task, from_checkpoint=False): + return build_criterion_(cfg, task, from_checkpoint=from_checkpoint) + + +# automatically import any Python files in the criterions/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + file_name = file[: file.find(".py")] + importlib.import_module("fairseq.criterions." + file_name) diff --git a/fairseq/fairseq/criterions/adaptive_loss.py b/fairseq/fairseq/criterions/adaptive_loss.py new file mode 100644 index 0000000..fc1ac85 --- /dev/null +++ b/fairseq/fairseq/criterions/adaptive_loss.py @@ -0,0 +1,124 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass + +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.constants import DDP_BACKEND_CHOICES +from omegaconf import II + + +@dataclass +class AdaptiveLossConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + ddp_backend: DDP_BACKEND_CHOICES = II("distributed_training.ddp_backend") + + +@register_criterion("adaptive_loss", dataclass=AdaptiveLossConfig) +class AdaptiveLoss(FairseqCriterion): + """This is an implementation of the loss function accompanying the adaptive softmax approximation for + graphical processing units (GPU), described in the paper "Efficient softmax approximation for GPUs" + (http://arxiv.org/abs/1609.04309).""" + + def __init__(self, task, sentence_avg): + super().__init__(task) + self.sentence_avg = sentence_avg + + @classmethod + def build_criterion(cls, cfg: AdaptiveLossConfig, task): + if cfg.ddp_backend in {"c10d", "pytorch_ddp"}: + raise Exception( + "AdaptiveLoss is not compatible with the PyTorch " + "version of DistributedDataParallel. Please use " + "`--ddp-backend=legacy_ddp` instead." + ) + return cls(task, cfg.sentence_avg) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + assert ( + hasattr(model.decoder, "adaptive_softmax") + and model.decoder.adaptive_softmax is not None + ) + adaptive_softmax = model.decoder.adaptive_softmax + + net_output = model(**sample["net_input"]) + orig_target = model.get_targets(sample, net_output) + + nsentences = orig_target.size(0) + orig_target = orig_target.view(-1) + + bsz = orig_target.size(0) + + logits, target = adaptive_softmax(net_output[0], orig_target) + assert len(target) == len(logits) + + loss = net_output[0].new(1 if reduce else bsz).zero_() + + for i in range(len(target)): + if target[i] is not None: + assert target[i].min() >= 0 and target[i].max() <= logits[i].size(1) + loss += F.cross_entropy( + logits[i], + target[i], + ignore_index=self.padding_idx, + reduction="sum" if reduce else "none", + ) + + orig = utils.strip_pad(orig_target, self.padding_idx) + ntokens = orig.numel() + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + logging_output = { + "loss": loss.data, + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/composite_loss.py b/fairseq/fairseq/criterions/composite_loss.py new file mode 100644 index 0000000..98e835f --- /dev/null +++ b/fairseq/fairseq/criterions/composite_loss.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import utils +from fairseq.criterions import LegacyFairseqCriterion, register_criterion +from torch import nn + + +@register_criterion("composite_loss") +class CompositeLoss(LegacyFairseqCriterion): + """This is a composite loss that, given a list of model outputs and a list of targets, + computes an average of losses for each output-target pair""" + + def __init__(self, args, task): + super().__init__(args, task) + self.underlying_criterion = args.underlying_criterion + + @staticmethod + def add_args(parser): + """Add criterion-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--underlying-criterion', type=str, metavar='VAL', required=True, + help='underlying criterion to use for the composite loss') + # fmt: on + + @staticmethod + def build_underlying_criterion(args, task): + saved_criterion = args.criterion + args.criterion = args.underlying_criterion + assert saved_criterion != args.underlying_criterion + underlying_criterion = task.build_criterion(args) + args.criterion = saved_criterion + return underlying_criterion + + @classmethod + def build_criterion(cls, args, task): + underlying_criterion = CompositeLoss.build_underlying_criterion(args, task) + + class FakeModel(nn.Module): + def __init__(self, model, net_out, target): + super().__init__() + self.model = model + self.net_out = net_out + self.target = target + + def forward(self, **unused): + return self.net_out + + def get_normalized_probs(self, net_output, log_probs, sample=None): + return self.model.get_normalized_probs( + net_output, log_probs, sample=sample + ) + + def get_targets(self, *unused): + return self.target + + @property + def decoder(self): + return self.model.decoder + + class _CompositeLoss(LegacyFairseqCriterion): + def __init__(self, args, task, underlying_criterion): + super().__init__(args, task) + self.underlying_criterion = underlying_criterion + + def forward(self, model, sample, reduce=True): + net_outputs = model(**sample["net_input"]) + targets = sample["target"] + + bsz = targets[0].size(0) + loss = net_outputs[0][0].new(1 if reduce else bsz).float().zero_() + + sample_size = 0 + logging_output = {} + for o, t in zip(net_outputs[0], targets): + m = FakeModel(model, (o, net_outputs[1]), t) + sample["target"] = t + l, ss, logging_output = self.underlying_criterion(m, sample, reduce) + loss += l + sample_size += ss + + loss.div_(len(targets)) + sample_size /= len(targets) + + logging_output["loss"] = utils.item(loss.data) if reduce else loss.data + return loss, sample_size, logging_output + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + return underlying_criterion.__class__.aggregate_logging_outputs( + logging_outputs + ) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + underlying_criterion.__class__.reduce_metrics(logging_outputs) + + return _CompositeLoss(args, task, underlying_criterion) diff --git a/fairseq/fairseq/criterions/cross_entropy.py b/fairseq/fairseq/criterions/cross_entropy.py new file mode 100644 index 0000000..24d6bcd --- /dev/null +++ b/fairseq/fairseq/criterions/cross_entropy.py @@ -0,0 +1,91 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass + +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class CrossEntropyCriterionConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + + +@register_criterion("cross_entropy", dataclass=CrossEntropyCriterionConfig) +class CrossEntropyCriterion(FairseqCriterion): + def __init__(self, task, sentence_avg): + super().__init__(task) + self.sentence_avg = sentence_avg + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss, _ = self.compute_loss(model, net_output, sample, reduce=reduce) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + lprobs = lprobs.view(-1, lprobs.size(-1)) + target = model.get_targets(sample, net_output).view(-1) + loss = F.nll_loss( + lprobs, + target, + ignore_index=self.padding_idx, + reduction="sum" if reduce else "none", + ) + return loss, loss + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/ctc.py b/fairseq/fairseq/criterions/ctc.py new file mode 100644 index 0000000..368213c --- /dev/null +++ b/fairseq/fairseq/criterions/ctc.py @@ -0,0 +1,325 @@ +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + + +@dataclass +class CtcCriterionConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: str = field( + default="letter", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + wer_sil_weight: float = field( + default=0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + +@register_criterion("ctc", dataclass=CtcCriterionConfig) +class CtcCriterion(FairseqCriterion): + def __init__( + self, cfg: CtcCriterionConfig, task: FairseqTask, rdrop_alpha: int = 0.0 + ): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + self.rdrop_alpha = rdrop_alpha + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None and cfg.wer_kenlm_model != "": + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.sil_weight = cfg.wer_sil_weight + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + def forward(self, model, sample, reduce=True, **kwargs): + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + # CTC loss is calculated over duplicated inputs + # sample is already duplicated for R-Drop + if self.rdrop_alpha > 0: + for k, v in sample.items(): + if k in ["target", "target_lengths"]: + sample[k] = torch.cat([v, v.clone()], dim=0) + elif k == "net_input": + if sample[k]["src_tokens"].size(1) != sample[k]["src_lengths"].size( + 0 + ): + # for decoder CTC loss + sample[k]["src_lengths"] = torch.cat( + [ + sample[k]["src_lengths"], + sample[k]["src_lengths"].clone(), + ], + dim=0, + ) + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + } + + if not model.training: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/fairseq_criterion.py b/fairseq/fairseq/criterions/fairseq_criterion.py new file mode 100644 index 0000000..0b1e64a --- /dev/null +++ b/fairseq/fairseq/criterions/fairseq_criterion.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import inspect +from typing import Any, Dict, List + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import gen_parser_from_dataclass +from torch.nn.modules.loss import _Loss + + +class FairseqCriterion(_Loss): + def __init__(self, task): + super().__init__() + self.task = task + if hasattr(task, "target_dictionary"): + tgt_dict = task.target_dictionary + self.padding_idx = tgt_dict.pad() if tgt_dict is not None else -100 + + @classmethod + def add_args(cls, parser): + """Add criterion-specific arguments to the parser.""" + dc = getattr(cls, "__dataclass", None) + if dc is not None: + gen_parser_from_dataclass(parser, dc()) + + @classmethod + def build_criterion(cls, cfg: FairseqDataclass, task): + """Construct a criterion from command-line args.""" + # arguments in the __init__. + init_args = {} + for p in inspect.signature(cls).parameters.values(): + if ( + p.kind == p.POSITIONAL_ONLY + or p.kind == p.VAR_POSITIONAL + or p.kind == p.VAR_KEYWORD + ): + # we haven't implemented inference for these argument types, + # but PRs welcome :) + raise NotImplementedError("{} not supported".format(p.kind)) + + assert p.kind in {p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY} + + if p.name == "task": + init_args["task"] = task + elif p.name == "cfg": + init_args["cfg"] = cfg + elif hasattr(cfg, p.name): + init_args[p.name] = getattr(cfg, p.name) + elif p.default != p.empty: + pass # we'll use the default value + else: + raise NotImplementedError( + "Unable to infer Criterion arguments, please implement " + "{}.build_criterion".format(cls.__name__) + ) + return cls(**init_args) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + raise NotImplementedError + + @staticmethod + def aggregate_logging_outputs( + logging_outputs: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """Aggregate logging outputs from data parallel training.""" + utils.deprecation_warning( + "The aggregate_logging_outputs API is deprecated. " + "Please use the reduce_metrics API instead." + ) + raise NotImplementedError + + @classmethod + def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: + """Aggregate logging outputs from data parallel training.""" + utils.deprecation_warning( + "Criterions should implement the reduce_metrics API. " + "Falling back to deprecated aggregate_logging_outputs API." + ) + agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs) + for k, v in agg_logging_outputs.items(): + if k in {"nsentences", "ntokens", "sample_size"}: + continue + metrics.log_scalar(k, v) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False + + +class LegacyFairseqCriterion(FairseqCriterion): + def __init__(self, args, task): + super().__init__(task=task) + self.args = args + + utils.deprecation_warning( + "Criterions should take explicit arguments instead of an " + "argparse.Namespace object, please update your criterion by " + "extending FairseqCriterion instead of LegacyFairseqCriterion." + ) + + @classmethod + def build_criterion(cls, args, task): + """Construct a criterion from command-line args.""" + return cls(args, task) diff --git a/fairseq/fairseq/criterions/fastspeech2_loss.py b/fairseq/fairseq/criterions/fastspeech2_loss.py new file mode 100644 index 0000000..ab7cd08 --- /dev/null +++ b/fairseq/fairseq/criterions/fastspeech2_loss.py @@ -0,0 +1,137 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +from typing import List, Dict, Any +from dataclasses import dataclass, field + +import torch +import torch.nn.functional as F + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import lengths_to_mask +from fairseq.models.fairseq_model import FairseqEncoderModel + + +@dataclass +class FastSpeech2CriterionConfig(FairseqDataclass): + ctc_weight: float = field(default=0.0, metadata={"help": "weight for CTC loss"}) + + +@register_criterion("fastspeech2", dataclass=FastSpeech2CriterionConfig) +class FastSpeech2Loss(FairseqCriterion): + def __init__(self, task, ctc_weight): + super().__init__(task) + self.ctc_weight = ctc_weight + + def forward(self, model: FairseqEncoderModel, sample, reduction="mean"): + src_tokens = sample["net_input"]["src_tokens"] + src_lens = sample["net_input"]["src_lengths"] + tgt_lens = sample["target_lengths"] + _feat_out, _feat_out_post, _, log_dur_out, pitch_out, energy_out = model( + src_tokens=src_tokens, + src_lengths=src_lens, + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + incremental_state=None, + target_lengths=tgt_lens, + speaker=sample["speaker"], + durations=sample["durations"], + pitches=sample["pitches"], + energies=sample["energies"], + ) + + src_mask = lengths_to_mask(sample["net_input"]["src_lengths"]) + tgt_mask = lengths_to_mask(sample["target_lengths"]) + + pitches, energies = sample["pitches"], sample["energies"] + pitch_out, pitches = pitch_out[src_mask], pitches[src_mask] + energy_out, energies = energy_out[src_mask], energies[src_mask] + + feat_out, feat = _feat_out[tgt_mask], sample["target"][tgt_mask] + l1_loss = F.l1_loss(feat_out, feat, reduction=reduction) + if _feat_out_post is not None: + l1_loss += F.l1_loss(_feat_out_post[tgt_mask], feat, reduction=reduction) + + pitch_loss = F.mse_loss(pitch_out, pitches, reduction=reduction) + energy_loss = F.mse_loss(energy_out, energies, reduction=reduction) + + log_dur_out = log_dur_out[src_mask] + dur = sample["durations"].float() + dur = dur.half() if log_dur_out.type().endswith(".HalfTensor") else dur + log_dur = torch.log(dur + 1)[src_mask] + dur_loss = F.mse_loss(log_dur_out, log_dur, reduction=reduction) + + ctc_loss = torch.tensor(0.0).type_as(l1_loss) + if self.ctc_weight > 0.0: + lprobs = model.get_normalized_probs((_feat_out,), log_probs=True) + lprobs = lprobs.transpose(0, 1) # T x B x C + src_mask = lengths_to_mask(src_lens) + src_tokens_flat = src_tokens.masked_select(src_mask) + ctc_loss = ( + F.ctc_loss( + lprobs, + src_tokens_flat, + tgt_lens, + src_lens, + reduction=reduction, + zero_infinity=True, + ) + * self.ctc_weight + ) + + loss = l1_loss + dur_loss + pitch_loss + energy_loss + ctc_loss + + sample_size = sample["nsentences"] + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "l1_loss": utils.item(l1_loss.data), + "dur_loss": utils.item(dur_loss.data), + "pitch_loss": utils.item(pitch_loss.data), + "energy_loss": utils.item(energy_loss.data), + "ctc_loss": utils.item(ctc_loss.data), + } + return loss, sample_size, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: + ns = [log.get("sample_size", 0) for log in logging_outputs] + ntot = sum(ns) + ws = [n / (ntot + 1e-8) for n in ns] + for key in [ + "loss", + "l1_loss", + "dur_loss", + "pitch_loss", + "energy_loss", + "ctc_loss", + ]: + vals = [log.get(key, 0) for log in logging_outputs] + val = sum(val * w for val, w in zip(vals, ws)) + metrics.log_scalar(key, val, ntot, round=3) + metrics.log_scalar("sample_size", ntot, len(logging_outputs)) + + # inference metrics + if "targ_frames" not in logging_outputs[0]: + return + n = sum(log.get("targ_frames", 0) for log in logging_outputs) + for key, new_key in [ + ("mcd_loss", "mcd_loss"), + ("pred_frames", "pred_ratio"), + ("nins", "ins_rate"), + ("ndel", "del_rate"), + ]: + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar(new_key, val / n, n, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + return False diff --git a/fairseq/fairseq/criterions/hubert_criterion.py b/fairseq/fairseq/criterions/hubert_criterion.py new file mode 100644 index 0000000..262874b --- /dev/null +++ b/fairseq/fairseq/criterions/hubert_criterion.py @@ -0,0 +1,195 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass + + +@dataclass +class HubertCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + + +@register_criterion("hubert", dataclass=HubertCriterionConfig) +class HubertCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss = 0.0 + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + + logging_output = { + "loss": loss.item() if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + def compute_correct(logits): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + corr_m, count_m = compute_correct(logp_m) + logging_output[f"correct_m_{i}"] = corr_m + logging_output[f"count_m_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + corr_u, count_u = compute_correct(logp_u) + logging_output[f"correct_u_{i}"] = corr_u + logging_output[f"count_u_{i}"] = count_u + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py new file mode 100644 index 0000000..325679b --- /dev/null +++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py @@ -0,0 +1,168 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field + +import torch +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class LabelSmoothedCrossEntropyCriterionConfig(FairseqDataclass): + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + report_accuracy: bool = field( + default=False, + metadata={"help": "report accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + + +def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=None, reduce=True): + if target.dim() == lprobs.dim() - 1: + target = target.unsqueeze(-1) + nll_loss = -lprobs.gather(dim=-1, index=target) + smooth_loss = -lprobs.sum(dim=-1, keepdim=True) + if ignore_index is not None: + pad_mask = target.eq(ignore_index) + nll_loss.masked_fill_(pad_mask, 0.0) + smooth_loss.masked_fill_(pad_mask, 0.0) + else: + nll_loss = nll_loss.squeeze(-1) + smooth_loss = smooth_loss.squeeze(-1) + if reduce: + nll_loss = nll_loss.sum() + smooth_loss = smooth_loss.sum() + eps_i = epsilon / (lprobs.size(-1) - 1) + loss = (1.0 - epsilon - eps_i) * nll_loss + eps_i * smooth_loss + return loss, nll_loss + + +@register_criterion( + "label_smoothed_cross_entropy", dataclass=LabelSmoothedCrossEntropyCriterionConfig +) +class LabelSmoothedCrossEntropyCriterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size=0, + report_accuracy=False, + ): + super().__init__(task) + self.sentence_avg = sentence_avg + self.eps = label_smoothing + self.ignore_prefix_size = ignore_prefix_size + self.report_accuracy = report_accuracy + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output, sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + return loss, sample_size, logging_output + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = model.get_targets(sample, net_output) + if self.ignore_prefix_size > 0: + # lprobs: B x T x C + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("n_correct", n_correct) + metrics.log_derived( + "accuracy", + lambda meters: round( + meters["n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py new file mode 100644 index 0000000..6eaedab --- /dev/null +++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py @@ -0,0 +1,221 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +import torch +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, + LabelSmoothedCrossEntropyCriterionConfig, +) + +try: + from simuleval.metrics.latency import ( + AverageLagging, + AverageProportion, + DifferentiableAverageLagging, + ) + + LATENCY_METRICS = { + "average_lagging": AverageLagging, + "average_proportion": AverageProportion, + "differentiable_average_lagging": DifferentiableAverageLagging, + } +except ImportError: + LATENCY_METRICS = None + + +@dataclass +class LabelSmoothedCrossEntropyCriterionLatencyAugmentConfig( + LabelSmoothedCrossEntropyCriterionConfig +): + latency_avg_weight: float = field( + default=0.0, + metadata={"help": "weight fot average latency loss."}, + ) + latency_var_weight: float = field( + default=0.0, + metadata={"help": "weight fot variance latency loss."}, + ) + latency_avg_type: str = field( + default="differentiable_average_lagging", + metadata={"help": "latency type for average loss"}, + ) + latency_var_type: str = field( + default="variance_delay", + metadata={"help": "latency typ for variance loss"}, + ) + latency_gather_method: str = field( + default="weighted_average", + metadata={"help": "method to gather latency loss for all heads"}, + ) + latency_update_after: int = field( + default=0, + metadata={"help": "Add latency loss after certain steps"}, + ) + + +@register_criterion( + "latency_augmented_label_smoothed_cross_entropy", + dataclass=LabelSmoothedCrossEntropyCriterionLatencyAugmentConfig, +) +class LatencyAugmentedLabelSmoothedCrossEntropyCriterion( + LabelSmoothedCrossEntropyCriterion +): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size, + report_accuracy, + latency_avg_weight, + latency_var_weight, + latency_avg_type, + latency_var_type, + latency_gather_method, + latency_update_after, + ): + super().__init__( + task, sentence_avg, label_smoothing, ignore_prefix_size, report_accuracy + ) + assert LATENCY_METRICS is not None, "Please make sure SimulEval is installed." + + self.latency_avg_weight = latency_avg_weight + self.latency_var_weight = latency_var_weight + self.latency_avg_type = latency_avg_type + self.latency_var_type = latency_var_type + self.latency_gather_method = latency_gather_method + self.latency_update_after = latency_update_after + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + # 1. Compute cross entropy loss + loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce) + + # 2. Compute cross latency loss + latency_loss, expected_latency, expected_delays_var = self.compute_latency_loss( + model, sample, net_output + ) + + if self.latency_update_after > 0: + num_updates = getattr(model.decoder, "num_updates", None) + assert ( + num_updates is not None + ), "model.decoder doesn't have attribute 'num_updates'" + if num_updates <= self.latency_update_after: + latency_loss = 0 + + loss += latency_loss + + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + "latency": expected_latency, + "delays_var": expected_delays_var, + "latency_loss": latency_loss, + } + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output, sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + return loss, sample_size, logging_output + + def compute_latency_loss(self, model, sample, net_output): + assert ( + net_output[-1].encoder_padding_mask is None + or not net_output[-1].encoder_padding_mask[:, 0].any() + ), "Only right padding on source is supported." + # 1. Obtain the expected alignment + alpha_list = [item["alpha"] for item in net_output[1].attn_list] + num_layers = len(alpha_list) + bsz, num_heads, tgt_len, src_len = alpha_list[0].size() + + # bsz * num_layers * num_heads, tgt_len, src_len + alpha_all = torch.cat(alpha_list, dim=1).view(-1, tgt_len, src_len) + + # 2 compute expected delays + # bsz * num_heads * num_layers, tgt_len, src_len for MMA + steps = ( + torch.arange(1, 1 + src_len) + .unsqueeze(0) + .unsqueeze(1) + .expand_as(alpha_all) + .type_as(alpha_all) + ) + + expected_delays = torch.sum(steps * alpha_all, dim=-1) + + target_padding_mask = ( + model.get_targets(sample, net_output) + .eq(self.padding_idx) + .unsqueeze(1) + .expand(bsz, num_layers * num_heads, tgt_len) + .contiguous() + .view(-1, tgt_len) + ) + + src_lengths = ( + sample["net_input"]["src_lengths"] + .unsqueeze(1) + .expand(bsz, num_layers * num_heads) + .contiguous() + .view(-1) + ) + expected_latency = LATENCY_METRICS[self.latency_avg_type]( + expected_delays, src_lengths, None, target_padding_mask=target_padding_mask + ) + + # 2.1 average expected latency of heads + # bsz, num_layers * num_heads + expected_latency = expected_latency.view(bsz, -1) + if self.latency_gather_method == "average": + # bsz * tgt_len + expected_latency = expected_delays.mean(dim=1) + elif self.latency_gather_method == "weighted_average": + weights = torch.nn.functional.softmax(expected_latency, dim=1) + expected_latency = torch.sum(expected_latency * weights, dim=1) + elif self.latency_gather_method == "max": + expected_latency = expected_latency.max(dim=1)[0] + else: + raise NotImplementedError + + expected_latency = expected_latency.sum() + avg_loss = self.latency_avg_weight * expected_latency + + # 2.2 variance of expected delays + expected_delays_var = ( + expected_delays.view(bsz, -1, tgt_len).var(dim=1).mean(dim=1) + ) + expected_delays_var = expected_delays_var.sum() + var_loss = self.latency_avg_weight * expected_delays_var + + # 3. Final loss + latency_loss = avg_loss + var_loss + + return latency_loss, expected_latency, expected_delays_var + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + super().reduce_metrics(logging_outputs) + latency = sum(log.get("latency", 0) for log in logging_outputs) + delays_var = sum(log.get("delays_var", 0) for log in logging_outputs) + latency_loss = sum(log.get("latency_loss", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + metrics.log_scalar("latency", latency.float() / nsentences, nsentences, round=3) + metrics.log_scalar("delays_var", delays_var / nsentences, nsentences, round=3) + metrics.log_scalar( + "latency_loss", latency_loss / nsentences, nsentences, round=3 + ) diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py new file mode 100644 index 0000000..b55f65e --- /dev/null +++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py @@ -0,0 +1,131 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion + +from .label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, + LabelSmoothedCrossEntropyCriterionConfig, +) + +from dataclasses import dataclass, field + + +@dataclass +class LabelSmoothedCrossEntropyCriterionWithAlignmentConfig( + LabelSmoothedCrossEntropyCriterionConfig +): + alignment_lambda: float = field( + default=0.05, metadata={"help": "weight for the alignment loss"} + ) + + +@register_criterion( + "label_smoothed_cross_entropy_with_alignment", + dataclass=LabelSmoothedCrossEntropyCriterionWithAlignmentConfig, +) +class LabelSmoothedCrossEntropyCriterionWithAlignment( + LabelSmoothedCrossEntropyCriterion +): + def __init__(self, task, sentence_avg, label_smoothing, alignment_lambda): + super().__init__(task, sentence_avg, label_smoothing) + self.alignment_lambda = alignment_lambda + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "nll_loss": utils.item(nll_loss.data) if reduce else nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + + alignment_loss = None + + # Compute alignment loss only for training set and non dummy batches. + if "alignments" in sample and sample["alignments"] is not None: + alignment_loss = self.compute_alignment_loss(sample, net_output) + + if alignment_loss is not None: + logging_output["alignment_loss"] = utils.item(alignment_loss.data) + loss += self.alignment_lambda * alignment_loss + + return loss, sample_size, logging_output + + def compute_alignment_loss(self, sample, net_output): + attn_prob = net_output[1]["attn"][0] + bsz, tgt_sz, src_sz = attn_prob.shape + attn = attn_prob.view(bsz * tgt_sz, src_sz) + + align = sample["alignments"] + align_weights = sample["align_weights"].float() + + if len(align) > 0: + # Alignment loss computation. align (shape [:, 2]) contains the src-tgt index pairs corresponding to + # the alignments. align_weights (shape [:]) contains the 1 / frequency of a tgt index for normalizing. + loss = -( + (attn[align[:, 1][:, None], align[:, 0][:, None]]).log() + * align_weights[:, None] + ).sum() + else: + return None + + return loss + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + nll_loss_sum = utils.item( + sum(log.get("nll_loss", 0) for log in logging_outputs) + ) + alignment_loss_sum = utils.item( + sum(log.get("alignment_loss", 0) for log in logging_outputs) + ) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_scalar( + "alignment_loss", + alignment_loss_sum / sample_size / math.log(2), + sample_size, + round=3, + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_ctc.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_ctc.py new file mode 100644 index 0000000..f2e8cdf --- /dev/null +++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_ctc.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field + +import torch +import torch.nn.functional as F + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, + LabelSmoothedCrossEntropyCriterionConfig, +) +from fairseq.data.data_utils import lengths_to_mask + + +@dataclass +class LabelSmoothedCrossEntropyWithCtcCriterionConfig( + LabelSmoothedCrossEntropyCriterionConfig +): + ctc_weight: float = field(default=1.0, metadata={"help": "weight for CTC loss"}) + + +@register_criterion( + "label_smoothed_cross_entropy_with_ctc", + dataclass=LabelSmoothedCrossEntropyWithCtcCriterionConfig, +) +class LabelSmoothedCrossEntropyWithCtcCriterion(LabelSmoothedCrossEntropyCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size, + report_accuracy, + ctc_weight, + ): + super().__init__( + task, sentence_avg, label_smoothing, ignore_prefix_size, report_accuracy + ) + self.ctc_weight = ctc_weight + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce) + + ctc_loss = torch.tensor(0.0).type_as(loss) + if self.ctc_weight > 0.0: + ctc_lprobs, ctc_lens = model.get_ctc_output(net_output, sample) + ctc_tgt, ctc_tgt_lens = model.get_ctc_target(sample) + ctc_tgt_mask = lengths_to_mask(ctc_tgt_lens) + ctc_tgt_flat = ctc_tgt.masked_select(ctc_tgt_mask) + reduction = "sum" if reduce else "none" + ctc_loss = ( + F.ctc_loss( + ctc_lprobs, + ctc_tgt_flat, + ctc_lens, + ctc_tgt_lens, + reduction=reduction, + zero_infinity=True, + ) + * self.ctc_weight + ) + loss += ctc_loss + + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": utils.item(loss.data), + "nll_loss": utils.item(nll_loss.data), + "ctc_loss": utils.item(ctc_loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output, sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + return loss, sample_size, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + super().reduce_metrics(logging_outputs) + loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "ctc_loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_rdrop.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_rdrop.py new file mode 100644 index 0000000..47ee263 --- /dev/null +++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_rdrop.py @@ -0,0 +1,177 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field + +import torch + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, + LabelSmoothedCrossEntropyCriterionConfig, + label_smoothed_nll_loss, +) + + +@dataclass +class RdropLabelSmoothedCrossEntropyCriterionConfig( + LabelSmoothedCrossEntropyCriterionConfig +): + rdrop_alpha: float = field( + default=0.0, + metadata={"help": "alpha for r-drop, 0 means no r-drop"}, + ) + + +@register_criterion( + "label_smoothed_cross_entropy_with_rdrop", + dataclass=RdropLabelSmoothedCrossEntropyCriterionConfig, +) +class RdropLabelSmoothedCrossEntropyCriterion(LabelSmoothedCrossEntropyCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size=0, + report_accuracy=False, + rdrop_alpha=0.0, + ): + super().__init__( + task, + sentence_avg, + label_smoothing, + ignore_prefix_size=ignore_prefix_size, + report_accuracy=report_accuracy, + ) + self.sentence_avg = sentence_avg + self.eps = label_smoothing + self.ignore_prefix_size = ignore_prefix_size + self.report_accuracy = report_accuracy + self.rdrop_alpha = rdrop_alpha + + def forward(self, model, sample, reduce=True, net_output=None): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + if net_output is None: + if self.rdrop_alpha > 0 and sample["net_input"]["src_tokens"].size( + 0 + ) == sample["target"].size(0): + sample = duplicate_input(sample) + net_output = model(**sample["net_input"]) + loss, nll_loss, rdrop_kl_loss = self.compute_loss( + model, net_output, sample, reduce=reduce + ) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output, sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + if self.rdrop_alpha > 0: + logging_output["rdrop_kl_loss"] = utils.item(rdrop_kl_loss.data) + return loss, sample_size, logging_output + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = model.get_targets(sample, net_output) + if self.rdrop_alpha > 0 or target.size(0) != lprobs.size(0): + target = torch.cat([target, target.clone()], dim=0) + + if self.ignore_prefix_size > 0: + # lprobs: B x T x C + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + + if self.rdrop_alpha > 0: + pad_mask = target[: target.size(0) // 2].unsqueeze(-1).eq(self.padding_idx) + rdrop_kl_loss = compute_kl_loss(model, net_output, pad_mask) + loss += self.rdrop_alpha * rdrop_kl_loss + else: + rdrop_kl_loss = loss.new_zeros(1) + return loss, nll_loss, rdrop_kl_loss + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + super().reduce_metrics(logging_outputs) + + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + rdrop_kl_loss = utils.item( + sum(log.get("rdrop_kl_loss", 0) for log in logging_outputs) + / sample_size + / math.log(2) + ) + if rdrop_kl_loss > 0: + metrics.log_scalar("rdrop_kl_loss", rdrop_kl_loss) + + +def duplicate_input(sample): + if "net_input" in sample.keys(): + sample_input = sample["net_input"] + else: + sample_input = sample + + for k, v in sample_input.items(): + if isinstance(v, torch.Tensor): + sample_input[k] = torch.cat([v, v.clone()], dim=0) + if "net_input" in sample.keys(): + sample["net_input"] = sample_input + else: + sample = sample_input + return sample + + +def compute_kl_loss(model, net_output, pad_mask=None, reduce=True): + net_prob = model.get_normalized_probs(net_output, log_probs=True) + net_prob_tec = model.get_normalized_probs(net_output, log_probs=False) + + net_prob = net_prob.view(-1, net_prob.size(-1)) + net_prob_tec = net_prob_tec.view(-1, net_prob_tec.size(-1)) + + p, q = torch.split(net_prob, net_prob.size(0) // 2, dim=0) + p_tec, q_tec = torch.split(net_prob_tec, net_prob_tec.size(0) // 2, dim=0) + + p_loss = torch.nn.functional.kl_div(p, q_tec, reduction="none") + q_loss = torch.nn.functional.kl_div(q, p_tec, reduction="none") + + if pad_mask is not None: + p_loss.masked_fill_(pad_mask, 0.0) + q_loss.masked_fill_(pad_mask, 0.0) + + if reduce: + p_loss = p_loss.sum() + q_loss = q_loss.sum() + + loss = (p_loss + q_loss) / 2 + return loss diff --git a/fairseq/fairseq/criterions/legacy_masked_lm.py b/fairseq/fairseq/criterions/legacy_masked_lm.py new file mode 100644 index 0000000..5cf70df --- /dev/null +++ b/fairseq/fairseq/criterions/legacy_masked_lm.py @@ -0,0 +1,178 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion + + +def compute_cross_entropy_loss(logits, targets, ignore_index=-100): + """ + Function to compute the cross entropy loss. The default value of + ignore_index is the same as the default value for F.cross_entropy in + pytorch. + """ + assert logits.size(0) == targets.size( + -1 + ), "Logits and Targets tensor shapes don't match up" + + loss = F.nll_loss( + F.log_softmax(logits, -1, dtype=torch.float32), + targets, + reduction="sum", + ignore_index=ignore_index, + ) + return loss + + +@register_criterion("legacy_masked_lm_loss") +class LegacyMaskedLmLoss(FairseqCriterion): + """ + Implementation for the loss used in masked language model (MLM) training. + This optionally also computes the next sentence prediction (NSP) loss and + adds it to the overall loss based on the specified args. There are three + cases to consider: + 1) Generic MLM training without NSP loss. In this case sentence_targets + and sentence_logits are both None. + 2) BERT training without NSP loss. In this case sentence_targets is + not None but sentence_logits is None and we should not be computing + a sentence level loss. + 3) BERT training with NSP loss. In this case both sentence_targets and + sentence_logits are not None and we should be computing a sentence + level loss. The weight of the sentence level loss is specified as + an argument. + """ + + def __init__(self, task, masked_lm_only, nsp_loss_weight): + super().__init__(task) + self.masked_lm_only = masked_lm_only + self.nsp_loss_weight = nsp_loss_weight + + @staticmethod + def add_args(parser): + """Args for MaskedLM Loss""" + # Default for masked_lm_only is False so as to not break BERT training + parser.add_argument( + "--masked-lm-only", + default=False, + action="store_true", + help="compute MLM loss only", + ) + parser.add_argument( + "--nsp-loss-weight", + default=1.0, + type=float, + help="weight for next sentence prediction" " loss (default 1)", + ) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + lm_logits, output_metadata = model(**sample["net_input"]) + + # reshape lm_logits from (N,T,C) to (N*T,C) + lm_logits = lm_logits.view(-1, lm_logits.size(-1)) + lm_targets = sample["lm_target"].view(-1) + lm_loss = compute_cross_entropy_loss(lm_logits, lm_targets, self.padding_idx) + + # compute the number of tokens for which loss is computed. This is used + # to normalize the loss + ntokens = utils.strip_pad(lm_targets, self.padding_idx).numel() + loss = lm_loss / ntokens + nsentences = sample["nsentences"] + # nsentences = 0 + + # Compute sentence loss if masked_lm_only is False + sentence_loss = None + if not self.masked_lm_only: + sentence_logits = output_metadata["sentence_logits"] + sentence_targets = sample["sentence_target"].view(-1) + # This needs to be recomputed due to some differences between + # TokenBlock and BlockPair dataset. This can be resolved with a + # refactor of BERTModel which we will do in the future. + # TODO: Remove this after refactor of BERTModel + nsentences = sentence_targets.size(0) + + # Check for logits being none which can happen when remove_heads + # is set to true in the BERT model. Ideally we should set + # masked_lm_only to true in this case, but that requires some + # refactor in the BERT model. + if sentence_logits is not None: + sentence_loss = compute_cross_entropy_loss( + sentence_logits, sentence_targets + ) + + loss += self.nsp_loss_weight * (sentence_loss / nsentences) + + # NOTE: as we are summing up per token mlm loss and per sentence nsp loss + # we don't need to use sample_size as denominator for the gradient + # here sample_size is just used for logging + sample_size = 1 + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "lm_loss": utils.item(lm_loss.data) if reduce else lm_loss.data, + # sentence loss is not always computed + "sentence_loss": ( + (utils.item(sentence_loss.data) if reduce else sentence_loss.data) + if sentence_loss is not None + else 0.0 + ), + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + lm_loss_sum = sum(log.get("lm_loss", 0) for log in logging_outputs) + sentence_loss_sum = sum(log.get("sentence_loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + agg_loss = sum(log.get("loss", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", + agg_loss / sample_size / math.log(2) if sample_size > 0 else 0.0, + sample_size, + round=3, + ) + metrics.log_scalar( + "lm_loss", + lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0, + ntokens, + round=3, + ) + metrics.log_scalar( + "sentence_loss", + sentence_loss_sum / nsentences / math.log(2) if nsentences > 0 else 0.0, + nsentences, + round=3, + ) + metrics.log_scalar( + "nll_loss", + lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0, + ntokens, + round=3, + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/masked_lm.py b/fairseq/fairseq/criterions/masked_lm.py new file mode 100644 index 0000000..09ddd9f --- /dev/null +++ b/fairseq/fairseq/criterions/masked_lm.py @@ -0,0 +1,99 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +import math +from omegaconf import II + +import torch +from fairseq import modules, utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass + + +@dataclass +class MaskedLmConfig(FairseqDataclass): + tpu: bool = II("common.tpu") + + +@register_criterion("masked_lm", dataclass=MaskedLmConfig) +class MaskedLmLoss(FairseqCriterion): + """ + Implementation for the loss used in masked language model (MLM) training. + """ + + def __init__(self, cfg: MaskedLmConfig, task): + super().__init__(task) + self.tpu = cfg.tpu + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + masked_tokens = sample["target"].ne(self.padding_idx) + sample_size = masked_tokens.int().sum() + + # Rare: when all tokens are masked, project all tokens. + # We use torch.where to avoid device-to-host transfers, + # except on CPU where torch.where is not well supported + # (see github.com/pytorch/pytorch/issues/26247). + if self.tpu: + masked_tokens = None # always project all tokens on TPU + elif masked_tokens.device == torch.device("cpu"): + if not masked_tokens.any(): + masked_tokens = None + else: + masked_tokens = torch.where( + masked_tokens.any(), + masked_tokens, + masked_tokens.new([True]), + ) + + logits = model(**sample["net_input"], masked_tokens=masked_tokens)[0] + targets = model.get_targets(sample, [logits]) + if masked_tokens is not None: + targets = targets[masked_tokens] + + loss = modules.cross_entropy( + logits.view(-1, logits.size(-1)), + targets.view(-1), + reduction="sum", + ignore_index=self.padding_idx, + ) + + logging_output = { + "loss": loss if self.tpu else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/model_criterion.py b/fairseq/fairseq/criterions/model_criterion.py new file mode 100644 index 0000000..4c020dd --- /dev/null +++ b/fairseq/fairseq/criterions/model_criterion.py @@ -0,0 +1,177 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Dict, List + +import torch + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.logging.meters import safe_round + + +logger = logging.getLogger(__name__) + + +@dataclass +class ModelCriterionConfig(FairseqDataclass): + loss_weights: Dict[str, float] = field( + default_factory=dict, + metadata={"help": "weights for the loss terms"}, + ) + log_keys: List[str] = field( + default_factory=list, + metadata={"help": "additional output keys to log"}, + ) + can_sum: bool = True + + +@register_criterion("model", dataclass=ModelCriterionConfig) +class ModelCriterion(FairseqCriterion): + """ + This criterion relies on the model to supply losses. + The losses should be a dictionary of name -> scalar returned by + the model either by including it in the net_output dict or by + implementing a get_losses(net_output, sample) method. The final loss is + a scaled sum of all losses according to weights in loss_weights. + If no weights are provided, then all losses are scaled by 1.0. + + The losses will be automatically logged. Additional keys from + net_output dict can be logged via the log_keys parameter. + """ + + def __init__(self, task, loss_weights=None, log_keys=None, can_sum=True): + super().__init__(task) + self.loss_weights = loss_weights + self.log_keys = log_keys + self.can_sum = can_sum + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + + scaled_losses = {} + + if hasattr(model, "get_losses"): + losses = model.get_losses(net_output, sample) + elif isinstance(net_output, dict) and "losses" in net_output: + losses = net_output["losses"] + else: + raise Exception("Could not retrieve losses") + + for lk, p in losses.items(): + try: + coef = 1.0 if len(self.loss_weights) == 0 else self.loss_weights[lk] + except KeyError: + logger.error( + f"weight for loss {lk} is not in loss_weights ({self.loss_weights})" + ) + raise + if coef != 0 and p is not None: + scaled_losses[lk] = coef * p.float().sum() + + loss = sum(scaled_losses.values()) + + if "sample_size" in net_output: + sample_size = net_output["sample_size"] + else: + sample_size = loss.numel() + + if reduce and loss.numel() > 1: + loss = loss.sum() + + logging_output = { + "loss": loss.data, + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + "_world_size": 1, + } + + for lk in self.log_keys: + if lk in net_output and net_output[lk] is not None: + if not torch.is_tensor(net_output[lk]) or net_output[lk].numel() == 1: + logging_output[lk] = float(net_output[lk]) + elif lk.startswith("_"): + logging_output[lk] = net_output[lk] + else: + for i, v in enumerate(net_output[lk]): + logging_output[f"{lk}_{i}"] = float(v) + + if len(scaled_losses) > 1: + for lk, l in scaled_losses.items(): + if l.numel() > 1: + l = l.sum() + logging_output[f"loss_{lk}"] = l.item() + + if "logs" in net_output: + for lgw in net_output["logs"]: + logging_output[lgw] = net_output["logs"][lgw] + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar("loss", loss_sum / sample_size, sample_size, round=3) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + metrics.log_scalar("sample_size", sample_size) + + builtin_keys = { + "loss", + "ntokens", + "nsentences", + "sample_size", + "_world_size", + } + + world_size = utils.item( + sum(log.get("_world_size", 0) for log in logging_outputs) + ) + + for k in logging_outputs[0]: + if k not in builtin_keys and not k.startswith("_"): + val = sum(log.get(k, 0) for log in logging_outputs) + if k.startswith("loss_"): + metrics.log_scalar(k, val / sample_size, sample_size, round=3) + else: + metrics.log_scalar(k, val / world_size, round=3) + + correct = sum(log.get("correct", 0) for log in logging_outputs) + total = sum(log.get("count", 0) for log in logging_outputs) + + if total > 0: + metrics.log_scalar("_correct", correct) + metrics.log_scalar("_total", total) + + metrics.log_derived( + "accuracy", + lambda meters: safe_round( + meters["_correct"].sum / meters["_total"].sum, 5 + ) + if meters["_total"].sum > 0 + else float("nan"), + ) + + def logging_outputs_can_be_summed(self) -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return self.can_sum diff --git a/fairseq/fairseq/criterions/nat_loss.py b/fairseq/fairseq/criterions/nat_loss.py new file mode 100644 index 0000000..fc0bdaf --- /dev/null +++ b/fairseq/fairseq/criterions/nat_loss.py @@ -0,0 +1,181 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from torch import Tensor + +from dataclasses import dataclass, field + + +@dataclass +class LabelSmoothedDualImitationCriterionConfig(FairseqDataclass): + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("nat_loss", dataclass=LabelSmoothedDualImitationCriterionConfig) +class LabelSmoothedDualImitationCriterion(FairseqCriterion): + def __init__(self, task, label_smoothing): + super().__init__(task) + self.label_smoothing = label_smoothing + + def _compute_loss( + self, outputs, targets, masks=None, label_smoothing=0.0, name="loss", factor=1.0 + ): + """ + outputs: batch x len x d_model + targets: batch x len + masks: batch x len + + policy_logprob: if there is some policy + depends on the likelihood score as rewards. + """ + + def mean_ds(x: Tensor, dim=None) -> Tensor: + return ( + x.float().mean().type_as(x) + if dim is None + else x.float().mean(dim).type_as(x) + ) + + if masks is not None: + outputs, targets = outputs[masks], targets[masks] + + if masks is not None and not masks.any(): + nll_loss = torch.tensor(0) + loss = nll_loss + else: + logits = F.log_softmax(outputs, dim=-1) + if targets.dim() == 1: + losses = F.nll_loss(logits, targets.to(logits.device), reduction="none") + + else: # soft-labels + losses = F.kl_div(logits, targets.to(logits.device), reduction="none") + losses = losses.sum(-1) + + nll_loss = mean_ds(losses) + if label_smoothing > 0: + loss = ( + nll_loss * (1 - label_smoothing) - mean_ds(logits) * label_smoothing + ) + else: + loss = nll_loss + + loss = loss * factor + return {"name": name, "loss": loss, "nll_loss": nll_loss, "factor": factor} + + def _custom_loss(self, loss, name="loss", factor=1.0): + return {"name": name, "loss": loss, "factor": factor} + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + nsentences, ntokens = sample["nsentences"], sample["ntokens"] + + # B x T + src_tokens, src_lengths = ( + sample["net_input"]["src_tokens"], + sample["net_input"]["src_lengths"], + ) + tgt_tokens, prev_output_tokens = sample["target"], sample["prev_target"] + + outputs = model(src_tokens, src_lengths, prev_output_tokens, tgt_tokens) + losses, nll_loss = [], [] + + for obj in outputs: + if outputs[obj].get("loss", None) is None: + _losses = self._compute_loss( + outputs[obj].get("out"), + outputs[obj].get("tgt"), + outputs[obj].get("mask", None), + outputs[obj].get("ls", 0.0), + name=obj + "-loss", + factor=outputs[obj].get("factor", 1.0), + ) + else: + _losses = self._custom_loss( + outputs[obj].get("loss"), + name=obj + "-loss", + factor=outputs[obj].get("factor", 1.0), + ) + + losses += [_losses] + if outputs[obj].get("nll_loss", False): + nll_loss += [_losses.get("nll_loss", 0.0)] + + loss = sum(l["loss"] for l in losses) + nll_loss = sum(l for l in nll_loss) if len(nll_loss) > 0 else loss.new_tensor(0) + + # NOTE: + # we don't need to use sample_size as denominator for the gradient + # here sample_size is just used for logging + sample_size = 1 + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": ntokens, + "nsentences": nsentences, + "sample_size": sample_size, + } + + for l in losses: + logging_output[l["name"]] = ( + utils.item(l["loss"].data / l["factor"]) + if reduce + else l[["loss"]].data / l["factor"] + ) + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + loss = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + nll_loss = utils.item(sum(log.get("nll_loss", 0) for log in logging_outputs)) + + metrics.log_scalar( + "loss", loss / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "nll_loss", nll_loss / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + for key in logging_outputs[0]: + if key[-5:] == "-loss": + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar( + key[:-5], + val / sample_size / math.log(2) if sample_size > 0 else 0.0, + sample_size, + round=3, + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/sentence_prediction.py b/fairseq/fairseq/criterions/sentence_prediction.py new file mode 100644 index 0000000..298b805 --- /dev/null +++ b/fairseq/fairseq/criterions/sentence_prediction.py @@ -0,0 +1,288 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from itertools import chain + +import numpy as np +import torch +import torch.nn.functional as F +from sklearn.metrics import f1_score +from sklearn.metrics import matthews_corrcoef as _matthews_corrcoef +from scipy.stats import pearsonr, spearmanr + +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.logging.meters import safe_round + + +def simple_accuracy(preds, labels): + return (preds == labels).mean() + + +def acc_and_f1(preds, labels): + acc = simple_accuracy(preds, labels) + f1 = f1_score(y_true=labels, y_pred=preds) + return { + "acc": acc, + "f1": f1, + "acc_and_f1": (acc + f1) / 2, + } + + +def pearson_and_spearman(preds, labels): + pearson_corr = pearsonr(preds, labels)[0] + spearman_corr = spearmanr(preds, labels)[0] + return { + "pearson": pearson_corr, + "spearmanr": spearman_corr, + "corr": (pearson_corr + spearman_corr) / 2, + } + + +def matthews_corrcoef(preds, labels): + # make it consistent with other metrics taking (preds, labels) as input + mcc = _matthews_corrcoef(labels, preds) + return mcc + + +@dataclass +class SentencePredictionConfig(FairseqDataclass): + classification_head_name: str = field( + default="sentence_classification_head", + metadata={"help": "name of the classification head to use"}, + ) + regression_target: bool = field( + default=False, + ) + report_mcc: bool = False + report_acc_and_f1: bool = False + report_pearson_and_spearman: bool = False + + +@register_criterion("sentence_prediction", dataclass=SentencePredictionConfig) +class SentencePredictionCriterion(FairseqCriterion): + def __init__(self, cfg: SentencePredictionConfig, task): + super().__init__(task) + self.classification_head_name = cfg.classification_head_name + self.regression_target = cfg.regression_target + self.keep_pred_and_targ = ( + cfg.report_mcc or cfg.report_acc_and_f1 or cfg.report_pearson_and_spearman + ) + self.report_mcc = cfg.report_mcc + self.report_acc_and_f1 = cfg.report_acc_and_f1 + self.report_pearson_and_spearman = cfg.report_pearson_and_spearman + self.label_dict = task.label_dictionary + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + assert ( + hasattr(model, "classification_heads") + and self.classification_head_name in model.classification_heads + ), "model must provide sentence classification head for --criterion=sentence_prediction" + + logits, _ = model( + **sample["net_input"], + features_only=True, + classification_head_name=self.classification_head_name, + ) + targets = model.get_targets(sample, [logits]).view(-1) + sample_size = targets.numel() + + if not self.regression_target: + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32) + task_loss = F.nll_loss(lprobs, targets, reduction="sum") + else: + logits = logits.view(-1).float() + targets = targets.float() + task_loss = F.mse_loss(logits, targets, reduction="sum") + + logging_output = {} + loss = task_loss + # mha & ffn regularization update + if ( + hasattr(model, "args") + and hasattr(model.args, "mha_reg_scale_factor") + and model.args.mha_reg_scale_factor != 0.0 + ): + mha_reg_loss = model._get_adaptive_head_loss() + loss += mha_reg_loss + logging_output.update({"mha_reg_loss": mha_reg_loss}) + if ( + hasattr(model, "args") + and hasattr(model.args, "ffn_reg_scale_factor") + and model.args.ffn_reg_scale_factor != 0.0 + ): + ffn_reg_loss = model._get_adaptive_ffn_loss() + loss += ffn_reg_loss + logging_output.update({"ffn_reg_loss": ffn_reg_loss}) + + logging_output.update( + { + "loss": loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample_size, + "sample_size": sample_size, + } + ) + if not self.regression_target: + preds = logits.argmax(dim=1) + logging_output["ncorrect"] = (preds == targets).sum() + if self.keep_pred_and_targ and not model.training: + if self.regression_target: + logging_output["pred"] = logits.detach().cpu().tolist() + logging_output["targ"] = targets.detach().cpu().tolist() + else: + # remove offset `self.label_dict.nspecial` from OffsetTokensDataset + preds = self.label_dict.string(preds + self.label_dict.nspecial).split() + targets = self.label_dict.string( + targets + self.label_dict.nspecial + ).split() + logging_output["pred"] = list(map(int, preds)) + logging_output["targ"] = list(map(int, targets)) + + if self.report_mcc: + logging_output["report_mcc"] = True + if self.report_acc_and_f1: + logging_output["report_acc_and_f1"] = True + if self.report_pearson_and_spearman: + logging_output["report_pearson_and_spearman"] = True + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + mha_reg_loss_sum = sum(log.get("mha_reg_loss", 0) for log in logging_outputs) + ffn_reg_loss_sum = sum(log.get("ffn_reg_loss", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if mha_reg_loss_sum: + metrics.log_scalar( + "mha_reg_loss", + mha_reg_loss_sum / sample_size / math.log(2), + sample_size, + round=3, + ) + if ffn_reg_loss_sum: + metrics.log_scalar( + "ffn_reg_loss", + ffn_reg_loss_sum / sample_size / math.log(2), + sample_size, + round=3, + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]: + ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs) + metrics.log_scalar( + "accuracy", 100.0 * ncorrect / nsentences, nsentences, round=1 + ) + + # Metrics used by GLUE + pred = np.array( + list(chain.from_iterable(log.get("pred", []) for log in logging_outputs)) + ) + targ = np.array( + list(chain.from_iterable(log.get("targ", []) for log in logging_outputs)) + ) + if len(pred): + metrics.log_concat_tensor("pred", torch.from_numpy(pred), dim=0) + metrics.log_concat_tensor("targ", torch.from_numpy(targ), dim=0) + if any("report_mcc" in log for log in logging_outputs): + metrics.log_derived( + "mcc", + lambda meters: safe_round( + matthews_corrcoef( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + ) + * 100, + 1, + ), + ) + if any("report_acc_and_f1" in log for log in logging_outputs): + metrics.log_derived( + "acc_and_f1", + lambda meters: safe_round( + acc_and_f1( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + )["acc_and_f1"] + * 100, + 1, + ), + ) + metrics.log_derived( + "f1", + lambda meters: safe_round( + acc_and_f1( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + )["f1"] + * 100, + 1, + ), + ) + if any("report_pearson_and_spearman" in log for log in logging_outputs): + metrics.log_derived( + "pearson_and_spearman", + lambda meters: safe_round( + pearson_and_spearman( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + )["corr"] + * 100, + 1, + ), + ) + metrics.log_derived( + "pearson", + lambda meters: safe_round( + pearson_and_spearman( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + )["pearson"] + * 100, + 1, + ), + ) + metrics.log_derived( + "spearman", + lambda meters: safe_round( + pearson_and_spearman( + meters["pred"].tensor.numpy(), + meters["targ"].tensor.numpy(), + )["spearmanr"] + * 100, + 1, + ), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/sentence_prediction_adapters.py b/fairseq/fairseq/criterions/sentence_prediction_adapters.py new file mode 100644 index 0000000..8a873a4 --- /dev/null +++ b/fairseq/fairseq/criterions/sentence_prediction_adapters.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn.functional as F +from fairseq.criterions import register_criterion +from fairseq.criterions.sentence_prediction import ( + SentencePredictionCriterion, + SentencePredictionConfig, +) + + +@register_criterion("sentence_prediction_adapters", dataclass=SentencePredictionConfig) +class SentencePredictionCriterionAdapters(SentencePredictionCriterion): + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + assert ( + hasattr(model, "classification_heads") + and self.classification_head_name in model.classification_heads + ), "model must provide sentence classification head for --criterion=sentence_prediction" + + if not hasattr(sample, "lang_id"): + # If no language ID is given, we fall back to English + lang_id = ["en_XX"] * sample["nsentences"] + else: + lang_id = sample["lang_id"] + + logits, _ = model( + **sample["net_input"], + features_only=True, + classification_head_name=self.classification_head_name, + lang_id=lang_id, + ) + targets = model.get_targets(sample, [logits]).view(-1) + sample_size = targets.numel() + + if not self.regression_target: + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32) + loss = F.nll_loss(lprobs, targets, reduction="sum") + else: + logits = logits.view(-1).float() + targets = targets.float() + loss = F.mse_loss(logits, targets, reduction="sum") + + logging_output = { + "loss": loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample_size, + "sample_size": sample_size, + } + if not self.regression_target: + preds = logits.argmax(dim=1) + logging_output["ncorrect"] = (preds == targets).sum() + + return loss, sample_size, logging_output diff --git a/fairseq/fairseq/criterions/sentence_ranking.py b/fairseq/fairseq/criterions/sentence_ranking.py new file mode 100644 index 0000000..bfb9f05 --- /dev/null +++ b/fairseq/fairseq/criterions/sentence_ranking.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion + + +@register_criterion("sentence_ranking") +class SentenceRankingCriterion(FairseqCriterion): + def __init__(self, task, ranking_head_name, save_predictions, num_classes): + super().__init__(task) + self.ranking_head_name = ranking_head_name + if save_predictions is not None: + self.prediction_h = open(save_predictions, "w") + else: + self.prediction_h = None + self.num_classes = num_classes + + def __del__(self): + if self.prediction_h is not None: + self.prediction_h.close() + + @staticmethod + def add_args(parser): + # fmt: off + parser.add_argument('--save-predictions', metavar='FILE', + help='file to save predictions to') + parser.add_argument('--ranking-head-name', + default='sentence_classification_head', + help='name of the ranking head to use') + # fmt: on + + def forward(self, model, sample, reduce=True): + """Compute ranking loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + assert ( + hasattr(model, "classification_heads") + and self.ranking_head_name in model.classification_heads + ), "model must provide sentence ranking head for --criterion=sentence_ranking" + + scores = [] + for idx in range(self.num_classes): + score, _ = model( + **sample["net_input{idx}".format(idx=idx + 1)], + classification_head_name=self.ranking_head_name, + ) + scores.append(score) + + logits = torch.cat(scores, dim=1) + sample_size = logits.size(0) + + if "target" in sample: + targets = model.get_targets(sample, [logits]).view(-1) + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32) + loss = F.nll_loss(lprobs, targets, reduction="sum") + else: + targets = None + loss = torch.tensor(0.0, requires_grad=True) + + if self.prediction_h is not None: + preds = logits.argmax(dim=1) + for i, (id, pred) in enumerate(zip(sample["id"].tolist(), preds.tolist())): + if targets is not None: + label = targets[i].item() + print("{}\t{}\t{}".format(id, pred, label), file=self.prediction_h) + else: + print("{}\t{}".format(id, pred), file=self.prediction_h) + + logging_output = { + "loss": loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample_size, + "sample_size": sample_size, + } + if targets is not None: + logging_output["ncorrect"] = (logits.argmax(dim=1) == targets).sum() + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]: + ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs) + metrics.log_scalar( + "accuracy", 100.0 * ncorrect / nsentences, nsentences, round=1 + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/speech_dlm_criterion.py b/fairseq/fairseq/criterions/speech_dlm_criterion.py new file mode 100644 index 0000000..8888180 --- /dev/null +++ b/fairseq/fairseq/criterions/speech_dlm_criterion.py @@ -0,0 +1,335 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import Optional + +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class SpeechDLMCriterionConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + main_and_cross_weights: Optional[str] = field( + default="1,0", + metadata={ + "help": "Comma-separated list of weights of Main-channel vs Cross-channel Prediction Losses" + "(default: 1,0)" + }, + ) + general_unit_loss_weight: float = field( + default=0, + metadata={ + "help": "The weight of the General Prediction Loss (Next-step Unit Prediction Loss)" + "(default: 0)" + }, + ) + edge_unit_loss_weight: float = field( + default=1, + metadata={"help": "The weight of the Edge Unit Prediction Loss" "(default: 1)"}, + ) + duration_loss_weight: float = field( + default=1, + metadata={ + "help": "The weight of the Edge Unit Duration Prediction Loss" + "(default: 1)" + }, + ) + + +@register_criterion("speech_dlm_criterion", dataclass=SpeechDLMCriterionConfig) +class SpeechDLMCriterion(FairseqCriterion): + """Criteron for the SpeechDLM model as described in the paper: + https://arxiv.org/pdf/2203.16502.pdf + + There are 3 possible losses depending on the targets of the model: + - general_unit_loss : The next unit prediction loss, corresponding to + 'next' target + - edge_unit_loss : The edge unit prediction loss, corresponding to + 'edge' target + - duration_loss : The duration prediction loss, corresponding to + 'duration' target + """ + + def __init__( + self, + task, + sentence_avg, + main_and_cross_weights, + general_unit_loss_weight, + edge_unit_loss_weight, + duration_loss_weight, + ): + super().__init__(task) + self.sentence_avg = sentence_avg + + self.channels = task.channels + self.targets = task.targets + self.delayed_duration_target = task.delayed_duration_target + + self.main_channel_weight = float(main_and_cross_weights.split(",")[0]) + self.cross_channel_weight = float(main_and_cross_weights.split(",")[1]) + assert self.main_channel_weight >= 0 and self.cross_channel_weight >= 0 + + self.channel_weights = { + channel: weight + for channel, weight in zip(self.channels, task.channel_weights) + } + + self.target_weights = {} + for t in self.targets: + if t == "next": + self.target_weights[t] = general_unit_loss_weight + assert ( + general_unit_loss_weight > 0 + ), "Expect a positive --general-unit-loss-weight for next unit prediction" + elif t == "edge": + self.target_weights[t] = edge_unit_loss_weight + assert ( + edge_unit_loss_weight > 0 + ), "Expect a positive --edge-unit-loss-weight for edge unit prediction" + elif t == "duration": + self.target_weights[t] = duration_loss_weight + assert ( + duration_loss_weight > 0 + ), "Expect a positive --duration-loss-weight for duration prediction" + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss_dict, stats_dict = self.compute_loss( + model, net_output, sample, reduce=reduce + ) + nsentences = sample["net_input"]["src_tokens"][self.channels[0]].size(0) + + logging_output = { + "nsentences": nsentences, + } + logging_output["nsentences"] = nsentences + + loss_all = {t: 0 for t in self.targets} + correct_all = {t: 0 for t in self.targets} + count_all = {t: 0 for t in self.targets} + ntokens_all = 0 + sample_size_all = 0 + for channel in loss_dict: + for pred_channel in loss_dict[channel]: + # Get ntokens & sample_size + ntokens = sample["net_input"]["src_tokens"][channel].numel() + sample_size = nsentences if self.sentence_avg else ntokens + prefix = "[{}-{}]".format(channel, pred_channel) + log_keys = { + "next": "general_token", + "edge": "edge_token", + "duration": "edge_duration", + } + + # Log & Update the sizes + logging_output["{}ntokens".format(prefix)] = ntokens + logging_output["{}sample_size".format(prefix)] = sample_size + ntokens_all += ntokens + sample_size_all += sample_size + + for t in self.targets: + log_key = log_keys[t] + loss = loss_dict[channel][pred_channel][t] + correct, count = stats_dict[channel][pred_channel][t] + + # Log the statistics + logging_output["{}{}_loss".format(prefix, log_key)] = loss.data + logging_output["{}{}_correct".format(prefix, log_key)] = correct + logging_output["{}{}_count".format(prefix, log_key)] = count + + # Scale the training loss by weights + target_loss = loss * self.channel_weights[channel] + if pred_channel == channel: + target_loss = target_loss * self.main_channel_weight + else: + target_loss = target_loss * self.cross_channel_weight + # Normalize the losses in the training by the number of edges + if t in ["edge", "duration"]: + target_loss = target_loss / count * sample_size + + # Update the statistics + loss_all[t] += target_loss + correct_all[t] += correct + count_all[t] += count + + # Logging the average statistics + logging_output["ntokens"] = ntokens_all + logging_output["sample_size"] = sample_size_all + for t in self.targets: + log_key = { + "next": "general_token", + "edge": "edge_token", + "duration": "edge_duration", + }[t] + logging_output["{}_loss".format(log_key)] = loss_all[t].data + logging_output["{}_correct".format(log_key)] = correct_all[t] + logging_output["{}_count".format(log_key)] = count_all[t] + + # Define the training loss + training_loss = 0 + for t in self.targets: + training_loss += loss_all[t] * self.target_weights[t] + logging_output["loss"] = training_loss.data + + return training_loss, sample_size_all, logging_output + + def compute_loss(self, model, net_output, sample, reduce=True): + # Get the model outputs and target + lprobs_dict = model.get_normalized_probs(net_output, log_probs=True) + target_dict = model.get_targets(sample, net_output) + + # Init the dictionaries + loss_dict, stats_dict = {}, {} + + for channel in lprobs_dict: + # Init the dictionaries + loss_dict[channel], stats_dict[channel] = {}, {} + + for pred_channel in lprobs_dict[channel]: + # Init the dictionaries + loss_dict[channel][pred_channel] = {} + stats_dict[channel][pred_channel] = {} + + # Get token & duration predictions + outputs = lprobs_dict[channel][pred_channel] + if not isinstance(outputs, dict): + token_lprobs = outputs + else: + token_lprobs = outputs["pred_token"] + dur_preds = outputs["pred_duration"] + dur_preds = dur_preds.view(-1) + token_lprobs = token_lprobs.view(-1, token_lprobs.size(-1)) + token_preds = token_lprobs.argmax(dim=-1) + + # Get edge indices + if "edge" in self.targets or "duration" in self.targets: + edge_indices = target_dict["edge_indices"][pred_channel] + + # Compute loss and statistics + for t in self.targets: + if t in ["next", "edge"]: + if t == "next": + target = target_dict["next"][pred_channel].view(-1) + lprobs = token_lprobs + preds = token_preds + elif t == "edge": + target = target_dict["edge"][pred_channel] + lprobs = token_lprobs[edge_indices] + preds = token_preds[edge_indices] + + loss = F.nll_loss( + lprobs, + target, + ignore_index=self.padding_idx, + reduction="sum" if reduce else "none", + ) + elif t == "duration": + target = target_dict["duration"][pred_channel] + if self.delayed_duration_target: + duration_indices = edge_indices + 1 + if duration_indices[-1] == len(dur_preds): + duration_indices = duration_indices[:-1] + target = target[:-1] + else: + duration_indices = edge_indices + preds = dur_preds[duration_indices] + + loss = F.l1_loss( + preds, + target, + reduction="sum" if reduce else "none", + ) + preds = preds.round() + + correct = (preds == target).sum().float().cpu().item() + count = float(target.size(0)) + + loss_dict[channel][pred_channel][t] = loss + stats_dict[channel][pred_channel][t] = (correct, count) + + return loss_dict, stats_dict + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + logging_keys = next(iter(logging_outputs)).keys() + channels = [item[:-7] for item in logging_keys if item.endswith("ntokens")] + target_prefixes = set( + [ + item[:-5].split("]")[-1] + for item in logging_keys + if item.endswith("_loss") + ] + ) + for channel_prefix in channels: + for target_prefix in target_prefixes: + prefix = "{}{}".format(channel_prefix, target_prefix) + count_sum = sum( + log.get("{}_count".format(prefix), 0) for log in logging_outputs + ) + correct_sum = sum( + log.get("{}_correct".format(prefix), 0) for log in logging_outputs + ) + loss_sum = sum( + log.get("{}_loss".format(prefix), 0) for log in logging_outputs + ) + + if "duration" not in target_prefix: + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "{}_loss".format(prefix), + loss_sum / count_sum / math.log(2), + count_sum, + round=3, + ) + metrics.log_derived( + "{}_ppl".format(prefix), + lambda meters, prefix=prefix: utils.get_perplexity( + meters["{}_loss".format(prefix)].avg + ), + ) + else: + # for duration we don't need to divide by log(2) + metrics.log_scalar( + "{}_loss".format(prefix), + loss_sum / count_sum, + count_sum, + round=3, + ) + + accuracy = 100 * correct_sum / count_sum + metrics.log_scalar("{}_pred_acc".format(prefix), accuracy, round=3) + + # Logging training loss + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/criterions/speech_to_speech_criterion.py b/fairseq/fairseq/criterions/speech_to_speech_criterion.py new file mode 100644 index 0000000..06a8252 --- /dev/null +++ b/fairseq/fairseq/criterions/speech_to_speech_criterion.py @@ -0,0 +1,517 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from collections import OrderedDict + +import torch + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import register_criterion +from fairseq.criterions.ctc import CtcCriterion +from fairseq.criterions.label_smoothed_cross_entropy_with_rdrop import ( + RdropLabelSmoothedCrossEntropyCriterion, + RdropLabelSmoothedCrossEntropyCriterionConfig, + duplicate_input, +) +from fairseq.criterions.tacotron2_loss import ( + Tacotron2Criterion, + Tacotron2CriterionConfig, +) + +logger = logging.getLogger(__name__) + + +class MultitaskCriterion: + def __init__(self, multitask_tasks, rdrop_alpha=0.0): + self.rdrop_alpha = rdrop_alpha + self.rdrop_alpha_mtl = rdrop_alpha + + self.multitask_criterion = OrderedDict() + self.multitask_loss_weight = OrderedDict() + for task_name, task_obj in multitask_tasks.items(): + if task_obj.args.get_loss_weight(0) == 0: + logger.info(f"Skip {task_name} loss criterion") + continue + + rdrop_alpha_task = task_obj.args.rdrop_alpha + if rdrop_alpha_task is None: + rdrop_alpha_task = rdrop_alpha + self.rdrop_alpha_mtl = rdrop_alpha_task + logger.info(f"rdrop_alpha is set to {rdrop_alpha_task} for {task_name}") + + if task_obj.args.decoder_type == "ctc": + self.multitask_criterion[task_name] = CtcCriterion( + task_obj.args.criterion_cfg, + task_obj, + rdrop_alpha=rdrop_alpha_task, + ) + else: + self.multitask_criterion[ + task_name + ] = RdropLabelSmoothedCrossEntropyCriterion( + task_obj, + task_obj.args.criterion_cfg.sentence_avg, + label_smoothing=task_obj.args.criterion_cfg.label_smoothing, + rdrop_alpha=rdrop_alpha_task, + ) + + def set_multitask_loss_weight(self, task_name, weight=0.0): + self.multitask_loss_weight[task_name] = weight + + def get_multitask_loss(self, model, sample, model_out): + logging_output = {} + loss = 0.0 + for task_name, task_criterion in self.multitask_criterion.items(): + layer_id = task_criterion.task.args.input_layer + if isinstance(task_criterion, CtcCriterion): + if task_criterion.task.args.input_from == "encoder": + if len(model_out["encoder_padding_mask"]) > 0: + non_padding_mask = ~model_out["encoder_padding_mask"][0] + input_lengths = non_padding_mask.long().sum(-1) + else: + out = model_out["encoder_states"][layer_id] + input_lengths = out.new_full( + (out.shape[1],), out.shape[0] + ).long() + + task_sample = { + "net_input": { + "src_tokens": model_out["encoder_states"][ + layer_id + ], # check batch idx + "src_lengths": input_lengths, + }, + "id": sample["id"], + } + else: + task_sample = { + "net_input": { + "src_tokens": model_out["inner_states"][layer_id], + "src_lengths": sample["target_lengths"], + }, + "id": sample["id"], + } + else: + task_sample = { + "net_input": { + "src_tokens": sample["multitask"][task_name]["net_input"][ + "prev_output_tokens" + ], + "encoder_out": { + "encoder_out": [model_out["encoder_states"][layer_id]], + "encoder_padding_mask": model_out["encoder_padding_mask"], + }, + } + } + + for key in ["target", "target_lengths", "ntokens"]: + task_sample[key] = sample["multitask"][task_name][key] + + if task_name == getattr(model, "mt_task_name", None): + decoder_out = model_out["mt_decoder_out"] + else: + decoder_out = None + task_loss, task_sample_size, task_logging_output = task_criterion( + model.multitask_decoders[task_name], task_sample, net_output=decoder_out + ) + + loss = loss + self.multitask_loss_weight[task_name] * task_loss + task_logging_output["loss_weight"] = self.multitask_loss_weight[task_name] + logging_output[task_name] = task_logging_output + return loss, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + for task_name in logging_outputs[0]["multitask"].keys(): + # different criterion may return different logging + # currently only reduce on loss, the most common one + # ideally the way that losses are reduced should also depend on the task type + loss_sum = sum( + log["multitask"][task_name].get("loss", 0) for log in logging_outputs + ) + sample_size = sum( + log["multitask"][task_name].get("sample_size", 0) + for log in logging_outputs + ) + + metrics.log_scalar( + f"multitask_{task_name}_loss", + loss_sum / sample_size / math.log(2), + sample_size, + round=3, + ) + + loss_weight = logging_outputs[0]["multitask"][task_name].get( + "loss_weight", 0 + ) + metrics.log_scalar( + f"multitask_{task_name}_loss_weight", + loss_weight, + weight=0, + priority=250, + ) + + +@register_criterion( + "speech_to_unit", dataclass=RdropLabelSmoothedCrossEntropyCriterionConfig +) +class SpeechToUnitMultitaskTaskCriterion( + RdropLabelSmoothedCrossEntropyCriterion, MultitaskCriterion +): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size=0, + report_accuracy=False, + rdrop_alpha=0.0, + ): + super().__init__( + task, + sentence_avg, + label_smoothing, + ignore_prefix_size, + report_accuracy, + rdrop_alpha, + ) + MultitaskCriterion.__init__(self, task.multitask_tasks, rdrop_alpha) + + def forward(self, model, sample, reduce=True): + net_input_concat = { + "src_tokens": sample["net_input"]["src_tokens"], + "src_lengths": sample["net_input"]["src_lengths"], + "prev_output_tokens": sample["net_input"]["prev_output_tokens"], + "tgt_speaker": sample["net_input"].get("tgt_speaker", None), + "return_all_hiddens": True, + } + + if self.rdrop_alpha > 0 or self.rdrop_alpha_mtl > 0: + net_input_concat = duplicate_input(net_input_concat) + + net_output, extra = model(**net_input_concat) + loss, nll_loss, rdrop_kl_loss = self.compute_loss( + model, [net_output], sample, reduce=reduce + ) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, [net_output], sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + if self.rdrop_alpha > 0: + logging_output["rdrop_kl_loss"] = utils.item(rdrop_kl_loss.data) + + if len(self.multitask_criterion) == 0: + return loss, sample_size, logging_output + + # multitask + multitask_loss, multitask_log = self.get_multitask_loss(model, sample, extra) + loss += multitask_loss + logging_output["multitask"] = multitask_log + + return loss, sample_size, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + super().reduce_metrics(logging_outputs) + + # inference metrics + if "targ_frames" in logging_outputs[0]: + n = sum(log.get("norm_frames", 0) for log in logging_outputs) + for key, new_key in [ + ("mcd_loss", "mcd_loss"), + ("pred_frames", "pred_ratio"), + ("nins", "ins_rate"), + ("ndel", "del_rate"), + ]: + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar(new_key, val / n, n, round=3) + + if "multitask" not in logging_outputs[0]: + return + + MultitaskCriterion.reduce_metrics(logging_outputs) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False + + +@register_criterion( + "speech_to_unit_2pass", dataclass=RdropLabelSmoothedCrossEntropyCriterionConfig +) +class SpeechToUnit2passMultitaskTaskCriterion(SpeechToUnitMultitaskTaskCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size=0, + report_accuracy=False, + rdrop_alpha=0.0, + ): + super().__init__( + task, + sentence_avg, + label_smoothing, + ignore_prefix_size, + report_accuracy, + rdrop_alpha, + ) + + def forward(self, model, sample, reduce=True): + net_input_concat = { + "src_tokens": sample["net_input"]["src_tokens"], + "src_lengths": sample["net_input"]["src_lengths"], + "prev_output_tokens": sample["net_input"]["prev_output_tokens"], + "prev_output_tokens_mt": sample["multitask"][model.mt_task_name][ + "net_input" + ]["prev_output_tokens"], + "tgt_speaker": sample["net_input"].get("tgt_speaker", None), + "return_all_hiddens": True, + } + if getattr(model, "asr_task_name", None) is not None: + net_input_concat["prev_output_tokens_asr"] = sample["multitask"][ + model.asr_task_name + ]["net_input"]["prev_output_tokens"] + + if self.rdrop_alpha > 0 or self.rdrop_alpha_mtl > 0: + net_input_concat = duplicate_input(net_input_concat) + + net_output, extra = model(**net_input_concat) + loss, nll_loss, rdrop_kl_loss = self.compute_loss( + model, [net_output], sample, reduce=reduce + ) + + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.data, + "nll_loss": nll_loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, [net_output], sample) + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + if self.rdrop_alpha > 0: + logging_output["rdrop_kl_loss"] = utils.item(rdrop_kl_loss.data) + + if len(self.multitask_criterion) == 0: + return loss, sample_size, logging_output + + # multitask + multitask_loss, multitask_log = self.get_multitask_loss(model, sample, extra) + loss += multitask_loss + logging_output["multitask"] = multitask_log + + return loss, sample_size, logging_output + + +@register_criterion("speech_to_spectrogram", dataclass=Tacotron2CriterionConfig) +class SpeechToSpectrogramMultitaskTaskCriterion(Tacotron2Criterion, MultitaskCriterion): + def __init__( + self, + task, + sentence_avg, + use_guided_attention_loss, + guided_attention_loss_sigma, + bce_pos_weight, + ctc_weight, + ): + super().__init__( + task, + sentence_avg, + use_guided_attention_loss, + guided_attention_loss_sigma, + bce_pos_weight, + ctc_weight, + ) + MultitaskCriterion.__init__(self, task.multitask_tasks) + + def forward(self, model, sample, reduction="mean"): + bsz, max_len, _ = sample["target"].size() + feat_tgt = sample["target"] + feat_len = sample["target_lengths"].view(bsz, 1).expand(-1, max_len) + eos_tgt = torch.arange(max_len).to(sample["target"].device) + eos_tgt = eos_tgt.view(1, max_len).expand(bsz, -1) + eos_tgt = (eos_tgt == (feat_len - 1)).float() + + feat_out, eos_out, extra = model( + src_tokens=sample["net_input"]["src_tokens"], + src_lengths=sample["net_input"]["src_lengths"], + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + tgt_speaker=sample["net_input"]["tgt_speaker"], + target_lengths=sample["target_lengths"], + return_all_hiddens=True, + ) + + l1_loss, mse_loss, eos_loss = self.compute_loss( + extra["feature_out"], + feat_out, + eos_out, + feat_tgt, + eos_tgt, + sample["target_lengths"], + reduction, + ) + attn_loss = torch.tensor(0.0).type_as(l1_loss) + if self.guided_attn is not None: + attn_loss = self.guided_attn( + extra["attn"], + sample["net_input"]["src_lengths"], + sample["target_lengths"], + reduction, + ) + loss = ( + l1_loss + mse_loss + eos_loss + attn_loss + ) # do not include ctc loss as there's no text target + + sample_size = sample["nsentences"] if self.sentence_avg else sample["ntokens"] + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "l1_loss": utils.item(l1_loss.data), + "mse_loss": utils.item(mse_loss.data), + "eos_loss": utils.item(eos_loss.data), + "attn_loss": utils.item(attn_loss.data), + } + + if len(self.multitask_criterion) == 0: + return loss, sample_size, logging_output + + # multitask + multitask_loss, multitask_log = self.get_multitask_loss(model, sample, extra) + loss += multitask_loss + logging_output["multitask"] = multitask_log + return loss, sample_size, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + super().reduce_metrics(logging_outputs) + + # inference metrics + if "targ_frames" in logging_outputs[0]: + n = sum(log.get("norm_frames", 0) for log in logging_outputs) + for key, new_key in [ + ("mcd_loss", "mcd_loss"), + ("pred_frames", "pred_ratio"), + ("nins", "ins_rate"), + ("ndel", "del_rate"), + ]: + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar(new_key, val / n, n, round=3) + + if "multitask" not in logging_outputs[0]: + return + + MultitaskCriterion.reduce_metrics(logging_outputs) + + +@register_criterion("speech_to_spectrogram_2pass", dataclass=Tacotron2CriterionConfig) +class SpeechToSpectrogram2passMultitaskTaskCriterion( + SpeechToSpectrogramMultitaskTaskCriterion +): + def __init__( + self, + task, + sentence_avg, + use_guided_attention_loss, + guided_attention_loss_sigma, + bce_pos_weight, + ctc_weight, + ): + super().__init__( + task, + sentence_avg, + use_guided_attention_loss, + guided_attention_loss_sigma, + bce_pos_weight, + ctc_weight, + ) + + def forward(self, model, sample, reduction="mean"): + bsz, max_len, _ = sample["target"].size() + feat_tgt = sample["target"] + feat_len = sample["target_lengths"].view(bsz, 1).expand(-1, max_len) + eos_tgt = torch.arange(max_len).to(sample["target"].device) + eos_tgt = eos_tgt.view(1, max_len).expand(bsz, -1) + eos_tgt = (eos_tgt == (feat_len - 1)).float() + + feat_out, eos_out, extra = model( + src_tokens=sample["net_input"]["src_tokens"], + src_lengths=sample["net_input"]["src_lengths"], + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + prev_output_tokens_mt=sample["multitask"][model.mt_task_name]["net_input"][ + "prev_output_tokens" + ], + tgt_speaker=sample["net_input"]["tgt_speaker"], + target_lengths=sample["target_lengths"], + return_all_hiddens=True, + ) + + l1_loss, mse_loss, eos_loss = self.compute_loss( + extra["feature_out"], + feat_out, + eos_out, + feat_tgt, + eos_tgt, + sample["target_lengths"], + reduction, + ) + attn_loss = torch.tensor(0.0).type_as(l1_loss) + if self.guided_attn is not None: + attn_loss = self.guided_attn( + extra["attn"], + sample["net_input"]["src_lengths"], + sample["target_lengths"], + reduction, + ) + loss = ( + l1_loss + mse_loss + eos_loss + attn_loss + ) # do not include ctc loss as there's no text target + + sample_size = sample["nsentences"] if self.sentence_avg else sample["ntokens"] + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "l1_loss": utils.item(l1_loss.data), + "mse_loss": utils.item(mse_loss.data), + "eos_loss": utils.item(eos_loss.data), + "attn_loss": utils.item(attn_loss.data), + } + + if len(self.multitask_criterion) == 0: + return loss, sample_size, logging_output + + # multitask + multitask_loss, multitask_log = self.get_multitask_loss(model, sample, extra) + loss += multitask_loss + logging_output["multitask"] = multitask_log + return loss, sample_size, logging_output diff --git a/fairseq/fairseq/criterions/speech_ulm_criterion.py b/fairseq/fairseq/criterions/speech_ulm_criterion.py new file mode 100644 index 0000000..eea74ba --- /dev/null +++ b/fairseq/fairseq/criterions/speech_ulm_criterion.py @@ -0,0 +1,126 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from dataclasses import dataclass, field + +import torch.nn.functional as F +from fairseq.logging import metrics +from fairseq.tasks import FairseqTask +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class SpeechUnitLmCriterionConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + loss_weights: str = field( + default="1.;0.0;0.0", + metadata={ + "help": "Weights of the losses that correspond to token, duration, and F0 streams" + }, + ) + discrete_duration: bool = II("task.discrete_duration") + discrete_f0: bool = II("task.discrete_f0") + + +def mae_loss(pred, targ, mask, reduce=True): + if pred.ndim == 3: + pred = pred.squeeze(2) + else: + assert pred.ndim == 2 + loss = (pred.float() - targ.float()).abs() * (~mask).float() + loss = loss.sum() if reduce else loss.view(-1) + return loss + + +def nll_loss(pred, targ, mask, reduce=True): + lprob = F.log_softmax(pred, dim=-1) + loss = F.nll_loss(lprob.view(-1, lprob.size(-1)), targ.view(-1), reduction="none") + loss = loss * (~mask).float().view(-1) + loss = loss.sum() if reduce else loss.view(-1) + return loss + + +@register_criterion("speech_unit_lm_criterion", dataclass=SpeechUnitLmCriterionConfig) +class SpeechUnitLmCriterion(FairseqCriterion): + def __init__(self, cfg: SpeechUnitLmCriterionConfig, task: FairseqTask): + super().__init__(task) + self.sentence_avg = cfg.sentence_avg + self.weights = torch.tensor([float(w) for w in cfg.loss_weights.split(";")]) + assert self.weights.size(0) == 3 + assert (self.weights >= 0.0).all() + + self.dur_loss_fn = nll_loss if cfg.discrete_duration else mae_loss + self.f0_loss_fn = nll_loss if cfg.discrete_f0 else mae_loss + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + + token_loss = nll_loss( + net_output["token"], sample["target"], sample["mask"], reduce + ) + dur_loss = self.dur_loss_fn( + net_output["duration"], + sample["dur_target"], + sample["dur_mask"], + reduce, + ) + f0_loss = self.f0_loss_fn( + net_output["f0"], + sample["f0_target"], + sample["f0_mask"], + reduce, + ) + loss = self.weights.to(token_loss.device) * torch.stack( + [token_loss, dur_loss, f0_loss], dim=-1 + ) + loss = loss.sum() if reduce else loss.sum(-1) + + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": loss.detach().sum().item(), + "token_loss": token_loss.detach().sum().item(), + "dur_loss": dur_loss.detach().sum().item(), + "f0_loss": f0_loss.detach().sum().item(), + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + token_loss_sum = sum(log.get("token_loss", 0) for log in logging_outputs) + dur_loss_sum = sum(log.get("dur_loss", 0) for log in logging_outputs) + f0_loss_sum = sum(log.get("f0_loss", 0) for log in logging_outputs) + + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar("loss", loss_sum / sample_size, sample_size, round=3) + + metrics.log_scalar( + "token_loss", token_loss_sum / sample_size, sample_size, round=3 + ) + + metrics.log_scalar("dur_loss", dur_loss_sum / sample_size, sample_size, round=3) + + metrics.log_scalar("f0_loss", f0_loss_sum / sample_size, sample_size, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + return True diff --git a/fairseq/fairseq/criterions/tacotron2_loss.py b/fairseq/fairseq/criterions/tacotron2_loss.py new file mode 100644 index 0000000..4113fdc --- /dev/null +++ b/fairseq/fairseq/criterions/tacotron2_loss.py @@ -0,0 +1,227 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +from dataclasses import dataclass, field +from functools import lru_cache +from typing import Any, Dict, List + +import torch +import torch.nn.functional as F +from omegaconf import II + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.data.data_utils import lengths_to_mask +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) + + +@dataclass +class Tacotron2CriterionConfig(FairseqDataclass): + bce_pos_weight: float = field( + default=1.0, + metadata={"help": "weight of positive examples for BCE loss"}, + ) + use_guided_attention_loss: bool = field( + default=False, + metadata={"help": "use guided attention loss"}, + ) + guided_attention_loss_sigma: float = field( + default=0.4, + metadata={"help": "weight of positive examples for BCE loss"}, + ) + ctc_weight: float = field(default=0.0, metadata={"help": "weight for CTC loss"}) + sentence_avg: bool = II("optimization.sentence_avg") + + +class GuidedAttentionLoss(torch.nn.Module): + """ + Efficiently Trainable Text-to-Speech System Based on Deep Convolutional + Networks with Guided Attention (https://arxiv.org/abs/1710.08969) + """ + + def __init__(self, sigma): + super().__init__() + self.sigma = sigma + + @staticmethod + @lru_cache(maxsize=8) + def _get_weight(s_len, t_len, sigma): + grid_x, grid_y = torch.meshgrid(torch.arange(t_len), torch.arange(s_len)) + grid_x = grid_x.to(s_len.device) + grid_y = grid_y.to(s_len.device) + w = (grid_y.float() / s_len - grid_x.float() / t_len) ** 2 + return 1.0 - torch.exp(-w / (2 * (sigma**2))) + + def _get_weights(self, src_lens, tgt_lens): + bsz, max_s_len, max_t_len = len(src_lens), max(src_lens), max(tgt_lens) + weights = torch.zeros((bsz, max_t_len, max_s_len)) + for i, (s_len, t_len) in enumerate(zip(src_lens, tgt_lens)): + weights[i, :t_len, :s_len] = self._get_weight(s_len, t_len, self.sigma) + return weights + + @staticmethod + def _get_masks(src_lens, tgt_lens): + in_masks = lengths_to_mask(src_lens) + out_masks = lengths_to_mask(tgt_lens) + return out_masks.unsqueeze(2) & in_masks.unsqueeze(1) + + def forward(self, attn, src_lens, tgt_lens, reduction="mean"): + weights = self._get_weights(src_lens, tgt_lens).to(attn.device) + masks = self._get_masks(src_lens, tgt_lens).to(attn.device) + loss = (weights * attn.transpose(1, 2)).masked_select(masks) + loss = torch.sum(loss) if reduction == "sum" else torch.mean(loss) + return loss + + +@register_criterion("tacotron2", dataclass=Tacotron2CriterionConfig) +class Tacotron2Criterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + use_guided_attention_loss, + guided_attention_loss_sigma, + bce_pos_weight, + ctc_weight, + ): + super().__init__(task) + self.sentence_avg = sentence_avg + self.bce_pos_weight = bce_pos_weight + + self.guided_attn = None + if use_guided_attention_loss: + self.guided_attn = GuidedAttentionLoss(guided_attention_loss_sigma) + self.ctc_weight = ctc_weight + + def forward(self, model, sample, reduction="mean"): + bsz, max_len, _ = sample["target"].size() + feat_tgt = sample["target"] + feat_len = sample["target_lengths"].view(bsz, 1).expand(-1, max_len) + eos_tgt = torch.arange(max_len).to(sample["target"].device) + eos_tgt = eos_tgt.view(1, max_len).expand(bsz, -1) + eos_tgt = (eos_tgt == (feat_len - 1)).float() + src_tokens = sample["net_input"]["src_tokens"] + src_lens = sample["net_input"]["src_lengths"] + tgt_lens = sample["target_lengths"] + + feat_out, eos_out, extra = model( + src_tokens=src_tokens, + src_lengths=src_lens, + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + incremental_state=None, + target_lengths=tgt_lens, + speaker=sample["speaker"], + ) + + l1_loss, mse_loss, eos_loss = self.compute_loss( + extra["feature_out"], + feat_out, + eos_out, + feat_tgt, + eos_tgt, + tgt_lens, + reduction, + ) + attn_loss = torch.tensor(0.0).type_as(l1_loss) + if self.guided_attn is not None: + attn_loss = self.guided_attn(extra["attn"], src_lens, tgt_lens, reduction) + ctc_loss = torch.tensor(0.0).type_as(l1_loss) + if self.ctc_weight > 0.0: + net_output = (feat_out, eos_out, extra) + lprobs = model.get_normalized_probs(net_output, log_probs=True) + lprobs = lprobs.transpose(0, 1) # T x B x C + src_mask = lengths_to_mask(src_lens) + src_tokens_flat = src_tokens.masked_select(src_mask) + ctc_loss = ( + F.ctc_loss( + lprobs, + src_tokens_flat, + tgt_lens, + src_lens, + reduction=reduction, + zero_infinity=True, + ) + * self.ctc_weight + ) + loss = l1_loss + mse_loss + eos_loss + attn_loss + ctc_loss + + sample_size = sample["nsentences"] if self.sentence_avg else sample["ntokens"] + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "l1_loss": utils.item(l1_loss.data), + "mse_loss": utils.item(mse_loss.data), + "eos_loss": utils.item(eos_loss.data), + "attn_loss": utils.item(attn_loss.data), + "ctc_loss": utils.item(ctc_loss.data), + } + return loss, sample_size, logging_output + + def compute_loss( + self, + feat_out, + feat_out_post, + eos_out, + feat_tgt, + eos_tgt, + tgt_lens, + reduction="mean", + ): + mask = lengths_to_mask(tgt_lens) + _eos_out = eos_out[mask].squeeze() + _eos_tgt = eos_tgt[mask] + _feat_tgt = feat_tgt[mask] + _feat_out = feat_out[mask] + _feat_out_post = feat_out_post[mask] + + l1_loss = F.l1_loss(_feat_out, _feat_tgt, reduction=reduction) + F.l1_loss( + _feat_out_post, _feat_tgt, reduction=reduction + ) + mse_loss = F.mse_loss(_feat_out, _feat_tgt, reduction=reduction) + F.mse_loss( + _feat_out_post, _feat_tgt, reduction=reduction + ) + eos_loss = F.binary_cross_entropy_with_logits( + _eos_out, + _eos_tgt, + pos_weight=torch.tensor(self.bce_pos_weight), + reduction=reduction, + ) + return l1_loss, mse_loss, eos_loss + + @classmethod + def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: + ns = [log.get("sample_size", 0) for log in logging_outputs] + ntot = sum(ns) + ws = [n / (ntot + 1e-8) for n in ns] + for key in ["loss", "l1_loss", "mse_loss", "eos_loss", "attn_loss", "ctc_loss"]: + vals = [log.get(key, 0) for log in logging_outputs] + val = sum(val * w for val, w in zip(vals, ws)) + metrics.log_scalar(key, val, ntot, round=3) + metrics.log_scalar("sample_size", ntot, len(logging_outputs)) + + # inference metrics + if "targ_frames" not in logging_outputs[0]: + return + n = sum(log.get("targ_frames", 0) for log in logging_outputs) + for key, new_key in [ + ("mcd_loss", "mcd_loss"), + ("pred_frames", "pred_ratio"), + ("nins", "ins_rate"), + ("ndel", "del_rate"), + ]: + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar(new_key, val / n, n, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + return False diff --git a/fairseq/fairseq/criterions/wav2vec_criterion.py b/fairseq/fairseq/criterions/wav2vec_criterion.py new file mode 100644 index 0000000..3975468 --- /dev/null +++ b/fairseq/fairseq/criterions/wav2vec_criterion.py @@ -0,0 +1,231 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.logging.meters import safe_round +from fairseq.utils import is_xla_tensor + + +@dataclass +class Wav2VecCriterionConfig(FairseqDataclass): + infonce: bool = field( + default=False, + metadata={ + "help": "if set, uses cross entropy instead of binary cross entropy (i.e. InfoNCE loss)" + }, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + + +@register_criterion("wav2vec", dataclass=Wav2VecCriterionConfig) +class Wav2vecCriterion(FairseqCriterion): + def __init__(self, task, infonce=False, loss_weights=None, log_keys=None): + super().__init__(task) + self.infonce = infonce + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + logits = model.get_logits(net_output).float() + target = model.get_targets(sample, net_output) + self.xla = is_xla_tensor(logits) + + # XXX: handle weights on xla. + weights = None + if hasattr(model, "get_target_weights") and not self.infonce: + weights = model.get_target_weights(target, net_output) + if torch.is_tensor(weights): + weights = weights.float() + + losses = [] + + reduction = "none" if ((not reduce) or self.xla) else "sum" + if self.infonce: + loss = F.cross_entropy(logits, target, reduction=reduction) + else: + loss = F.binary_cross_entropy_with_logits( + logits, target.float(), weights, reduction=reduction + ) + + if self.xla: + # tpu-comment: since dynamic shapes lead to recompilations on xla, + # we don't shrink tensors using mask_indices. + # Instead, we use mask indices to adjust loss. + mi = ( + sample["net_input"]["mask_indices"] + .transpose(0, 1) # logits are transposed in `model.get_logits` + .reshape(logits.size(0)) + ) + loss = (loss * mi).sum() if reduce else (loss * mi) + + if "sample_size" in sample: + sample_size = sample["sample_size"] + elif "mask_indices" in sample["net_input"]: + sample_size = sample["net_input"]["mask_indices"].sum() + else: + sample_size = target.numel() if self.infonce else target.long().sum().item() + losses.append(loss.detach().clone()) + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, coef in zip(extra_losses, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + losses.append(p) + + logging_output = { + "loss": loss.item() if (reduce and not self.xla) else loss.detach(), + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + } + + for lk in self.log_keys: + # Only store "logits" and "target" for computing MAP and MAUC + # during validation + if lk == "logits": + if not self.training: + logging_output["logits"] = logits.cpu().numpy() + elif lk == "target": + if not self.training: + # If the targets have been mixed with the predictions of + # teacher models, find the original targets + if hasattr(model, "get_original_targets"): + original_target = model.get_original_targets(sample, net_output) + else: + original_target = target + logging_output["target"] = original_target.cpu().numpy() + elif lk in net_output: + value = net_output[lk] + if not is_xla_tensor(value): + value = float(value) + logging_output[lk] = value + + if len(losses) > 1: + for i, l in enumerate(losses): + logging_output[f"loss_{i}"] = l.item() if not self.xla else l.detach() + + if self.infonce: + with torch.no_grad(): + if logits.numel() == 0: + corr = 0 + count = 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + if is_xla_tensor(logits): + max, min = max * mi, min * mi + both = max & min + corr = max.long().sum() - both.long().sum() + count = mi.sum() + else: + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = float(max.numel()) + + logging_output["correct"] = corr + logging_output["count"] = count + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / (sample_size or 1) / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + + correct = sum(log.get("correct", 0) for log in logging_outputs) + metrics.log_scalar("_correct", correct) + + total = sum(log.get("count", 0) for log in logging_outputs) + metrics.log_scalar("_total", total) + + if total > 0: + metrics.log_derived( + "accuracy", + lambda meters: safe_round( + meters["_correct"].sum / meters["_total"].sum, 5 + ) + if meters["_total"].sum > 0 + else float("nan"), + ) + + builtin_keys = { + "loss", + "ntokens", + "nsentences", + "sample_size", + "correct", + "count", + } + + for k in logging_outputs[0]: + if k not in builtin_keys: + val = sum(log.get(k, 0) for log in logging_outputs) + if k.startswith("loss"): + metrics.log_scalar( + k, val / (sample_size or 1) / math.log(2), sample_size, round=3 + ) + else: + metrics.log_scalar(k, val / len(logging_outputs), round=3) + + # FIXME: revert when gather based xla reduction is implemented + # @staticmethod + # def logging_outputs_can_be_summed() -> bool: + def logging_outputs_can_be_summed(self) -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + # XXX: Gather based reduction not implemented for xla yet. + # So we fall to sum based reduction for xla. + return self.xla diff --git a/fairseq/fairseq/dataclass/__init__.py b/fairseq/fairseq/dataclass/__init__.py new file mode 100644 index 0000000..25408d2 --- /dev/null +++ b/fairseq/fairseq/dataclass/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .configs import FairseqDataclass +from .constants import ChoiceEnum + + +__all__ = [ + "FairseqDataclass", + "ChoiceEnum", +] diff --git a/fairseq/fairseq/dataclass/configs.py b/fairseq/fairseq/dataclass/configs.py new file mode 100644 index 0000000..af957fe --- /dev/null +++ b/fairseq/fairseq/dataclass/configs.py @@ -0,0 +1,1147 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import sys +from dataclasses import _MISSING_TYPE, dataclass, field +from typing import Any, List, Optional + +import torch +from omegaconf import II, MISSING + +from fairseq.dataclass.constants import ( + DATASET_IMPL_CHOICES, + DDP_BACKEND_CHOICES, + DDP_COMM_HOOK_CHOICES, + GENERATION_CONSTRAINTS_CHOICES, + GENERATION_DECODING_FORMAT_CHOICES, + LOG_FORMAT_CHOICES, + PIPELINE_CHECKPOINT_CHOICES, + PRINT_ALIGNMENT_CHOICES, + ZERO_SHARDING_CHOICES, +) + + +@dataclass +class FairseqDataclass: + """fairseq base dataclass that supported fetching attributes and metas""" + + _name: Optional[str] = None + + @staticmethod + def name(): + return None + + def _get_all_attributes(self) -> List[str]: + return [k for k in self.__dataclass_fields__.keys()] + + def _get_meta( + self, attribute_name: str, meta: str, default: Optional[Any] = None + ) -> Any: + return self.__dataclass_fields__[attribute_name].metadata.get(meta, default) + + def _get_name(self, attribute_name: str) -> str: + return self.__dataclass_fields__[attribute_name].name + + def _get_default(self, attribute_name: str) -> Any: + if hasattr(self, attribute_name): + if str(getattr(self, attribute_name)).startswith("${"): + return str(getattr(self, attribute_name)) + elif str(self.__dataclass_fields__[attribute_name].default).startswith( + "${" + ): + return str(self.__dataclass_fields__[attribute_name].default) + elif ( + getattr(self, attribute_name) + != self.__dataclass_fields__[attribute_name].default + ): + return getattr(self, attribute_name) + + f = self.__dataclass_fields__[attribute_name] + if not isinstance(f.default_factory, _MISSING_TYPE): + return f.default_factory() + return f.default + + def _get_type(self, attribute_name: str) -> Any: + return self.__dataclass_fields__[attribute_name].type + + def _get_help(self, attribute_name: str) -> Any: + return self._get_meta(attribute_name, "help") + + def _get_argparse_const(self, attribute_name: str) -> Any: + return self._get_meta(attribute_name, "argparse_const") + + def _get_argparse_alias(self, attribute_name: str) -> Any: + return self._get_meta(attribute_name, "argparse_alias") + + def _get_choices(self, attribute_name: str) -> Any: + return self._get_meta(attribute_name, "choices") + + @classmethod + def from_namespace(cls, args): + if isinstance(args, cls): + return args + else: + config = cls() + for k in config.__dataclass_fields__.keys(): + if k.startswith("_"): + # private member, skip + continue + if hasattr(args, k): + setattr(config, k, getattr(args, k)) + + return config + + +@dataclass +class CommonConfig(FairseqDataclass): + # This is the core dataclass including common parameters shared by all different jobs. Please append your params to other dataclasses if they were + # used for a particular purpose or task, such as those dedicated for `distributed training`, `optimization`, etc. + no_progress_bar: bool = field( + default=False, metadata={"help": "disable progress bar"} + ) + log_interval: int = field( + default=100, + metadata={ + "help": "log progress every N batches (when progress bar is disabled)" + }, + ) + log_format: Optional[LOG_FORMAT_CHOICES] = field( + default=None, metadata={"help": "log format to use"} + ) + log_file: Optional[str] = field( + default=None, metadata={"help": "log file to copy metrics to."} + ) + aim_repo: Optional[str] = field( + default=None, + metadata={"help": "path to Aim repository"}, + ) + aim_run_hash: Optional[str] = field( + default=None, + metadata={ + "help": "Aim run hash. If skipped, creates or continues run " + "based on save_dir" + }, + ) + tensorboard_logdir: Optional[str] = field( + default=None, + metadata={ + "help": "path to save logs for tensorboard, should match --logdir " + "of running tensorboard (default: no tensorboard logging)" + }, + ) + wandb_project: Optional[str] = field( + default=None, + metadata={"help": "Weights and Biases project name to use for logging"}, + ) + azureml_logging: Optional[bool] = field( + default=False, + metadata={"help": "Log scalars to AzureML context"}, + ) + seed: int = field( + default=1, metadata={"help": "pseudo random number generator seed"} + ) + cpu: bool = field(default=False, metadata={"help": "use CPU instead of CUDA"}) + tpu: bool = field(default=False, metadata={"help": "use TPU instead of CUDA"}) + bf16: bool = field(default=False, metadata={"help": "use bfloat16; implies --tpu"}) + memory_efficient_bf16: bool = field( + default=False, + metadata={ + "help": "use a memory-efficient version of BF16 training; implies --bf16" + }, + ) + fp16: bool = field(default=False, metadata={"help": "use FP16"}) + memory_efficient_fp16: bool = field( + default=False, + metadata={ + "help": "use a memory-efficient version of FP16 training; implies --fp16" + }, + ) + fp16_no_flatten_grads: bool = field( + default=False, metadata={"help": "don't flatten FP16 grads tensor"} + ) + fp16_init_scale: int = field( + default=2**7, metadata={"help": "default FP16 loss scale"} + ) + fp16_scale_window: Optional[int] = field( + default=None, + metadata={"help": "number of updates before increasing loss scale"}, + ) + fp16_scale_tolerance: float = field( + default=0.0, + metadata={ + "help": "pct of updates that can overflow before decreasing the loss scale" + }, + ) + on_cpu_convert_precision: bool = field( + default=False, + metadata={ + "help": "if set, the floating point conversion to fp16/bf16 runs on CPU. " + "This reduces bus transfer time and GPU memory usage." + }, + ) + min_loss_scale: float = field( + default=1e-4, + metadata={ + "help": "minimum FP16/AMP loss scale, after which training is stopped" + }, + ) + threshold_loss_scale: Optional[float] = field( + default=None, metadata={"help": "threshold FP16 loss scale from below"} + ) + amp: bool = field(default=False, metadata={"help": "use automatic mixed precision"}) + amp_batch_retries: int = field( + default=2, + metadata={ + "help": "number of retries of same batch after reducing loss scale with AMP" + }, + ) + amp_init_scale: int = field( + default=2**7, metadata={"help": "default AMP loss scale"} + ) + amp_scale_window: Optional[int] = field( + default=None, + metadata={"help": "number of updates before increasing AMP loss scale"}, + ) + user_dir: Optional[str] = field( + default=None, + metadata={ + "help": "path to a python module containing custom extensions (tasks and/or architectures)" + }, + ) + empty_cache_freq: int = field( + default=0, + metadata={"help": "how often to clear the PyTorch CUDA cache (0 to disable)"}, + ) + all_gather_list_size: int = field( + default=16384, + metadata={"help": "number of bytes reserved for gathering stats from workers"}, + ) + model_parallel_size: int = field( + default=1, metadata={"help": "total number of GPUs to parallelize model over"} + ) + quantization_config_path: Optional[str] = field( + default=None, metadata={"help": "path to quantization config file"} + ) + profile: bool = field( + default=False, metadata={"help": "enable autograd profiler emit_nvtx"} + ) + reset_logging: bool = field( + default=False, + metadata={ + "help": "when using Hydra, reset the logging at the beginning of training" + }, + ) + suppress_crashes: bool = field( + default=False, + metadata={ + "help": "suppress crashes when training with the hydra_train entry point so that the " + "main method can return a value (useful for sweeps)" + }, + ) + use_plasma_view: bool = field( + default=False, metadata={"help": "Store indices and sizes in shared memory"} + ) + plasma_path: Optional[str] = field( + default="/tmp/plasma", + metadata={ + "help": "path to run plasma_store, defaults to /tmp/plasma. Paths outside /tmp tend to fail." + }, + ) + + +@dataclass +class DistributedTrainingConfig(FairseqDataclass): + distributed_world_size: int = field( + default=max(1, torch.cuda.device_count()), + metadata={ + "help": "total number of GPUs across all nodes (default: all visible GPUs)" + }, + ) + distributed_num_procs: Optional[int] = field( + default=max(1, torch.cuda.device_count()), + metadata={ + "help": "total number of processes to fork (default: all visible GPUs)" + }, + ) + distributed_rank: Optional[int] = field( + default=0, metadata={"help": "rank of the current worker"} + ) + distributed_backend: str = field( + default="nccl", metadata={"help": "distributed backend"} + ) + distributed_init_method: Optional[str] = field( + default=None, + metadata={ + "help": "typically tcp://hostname:port that will be used to " + "establish initial connetion" + }, + ) + distributed_port: int = field( + default=-1, + metadata={ + "help": "port number (not required if using --distributed-init-method)" + }, + ) + device_id: int = field( + default=os.getenv("LOCAL_RANK", 0), + metadata={ + "help": "which GPU to use (by default looks for $LOCAL_RANK, usually configured automatically)", + "argparse_alias": "--local_rank", + }, + ) + distributed_no_spawn: bool = field( + default=False, + metadata={ + "help": "do not spawn multiple processes even if multiple GPUs are visible" + }, + ) + ddp_backend: DDP_BACKEND_CHOICES = field( + default="pytorch_ddp", metadata={"help": "DistributedDataParallel backend"} + ) + ddp_comm_hook: DDP_COMM_HOOK_CHOICES = field( + default="none", metadata={"help": "communication hook"} + ) + bucket_cap_mb: int = field( + default=25, metadata={"help": "bucket size for reduction"} + ) + fix_batches_to_gpus: bool = field( + default=False, + metadata={ + "help": "don't shuffle batches between GPUs; this reduces overall " + "randomness and may affect precision but avoids the cost of re-reading the data" + }, + ) + find_unused_parameters: bool = field( + default=False, + metadata={ + "help": "disable unused parameter detection (not applicable to " + "--ddp-backend=legacy_ddp)" + }, + ) + gradient_as_bucket_view: bool = field( + default=False, + metadata={ + "help": "when set to True, gradients will be views pointing to different offsets of allreduce communication buckets. This can reduce peak memory usage, where the saved memory size will be equal to the total gradients size. " + "--gradient-as-bucket-view=gradient_as_bucket_view)" + }, + ) + fast_stat_sync: bool = field( + default=False, + metadata={"help": "[deprecated] this is now defined per Criterion"}, + ) + heartbeat_timeout: int = field( + default=-1, + metadata={ + "help": "kill the job if no progress is made in N seconds; " + "set to -1 to disable" + }, + ) + broadcast_buffers: bool = field( + default=False, + metadata={ + "help": "Copy non-trainable parameters between GPUs, such as " + "batchnorm population statistics" + }, + ) + slowmo_momentum: Optional[float] = field( + default=None, + metadata={ + "help": "SlowMo momentum term; by default use 0.0 for 16 GPUs, " + "0.2 for 32 GPUs; 0.5 for 64 GPUs, 0.6 for > 64 GPUs" + }, + ) + slowmo_base_algorithm: str = field( + default="localsgd", + metadata={ + "help": "Base algorithm. Either 'localsgd' or 'sgp'. Please refer " + "to the documentation of 'slowmo_base_algorithm' parameter in " + "https://fairscale.readthedocs.io/en/latest/api/experimental/nn/slowmo_ddp.html " + "for more details" + }, + ) + localsgd_frequency: int = field( + default=3, metadata={"help": "Local SGD allreduce frequency"} + ) + nprocs_per_node: int = field( + default=max(1, torch.cuda.device_count()), + metadata={ + "help": "number of GPUs in each node. An allreduce operation across GPUs in " + "a node is very fast. Hence, we do allreduce across GPUs in a node, " + "and gossip across different nodes" + }, + ) + pipeline_model_parallel: bool = field( + default=False, + metadata={"help": "if set, use pipeline model parallelism across GPUs"}, + ) + pipeline_balance: Optional[str] = field( + default=None, + metadata={ + "help": "partition the model into N_K pieces, where each piece " + "contains N_i layers. The sum(args.pipeline_balance) " + "should equal the total number of layers in the model" + }, + ) + pipeline_devices: Optional[str] = field( + default=None, + metadata={ + "help": "a list of device indices indicating which device to place " + "each of the N_K partitions. The length of this list should " + "equal the length of the --pipeline-balance argument" + }, + ) + pipeline_chunks: Optional[int] = field( + default=0, metadata={"help": "microbatch count for pipeline model parallelism"} + ) + pipeline_encoder_balance: Optional[str] = field( + default=None, + metadata={ + "help": "partition the pipeline parallel encoder into N_K pieces, where each piece " + "contains N_i layers. The sum(args.pipeline_encoder_balance) " + "should equal the total number of encoder layers in the model" + }, + ) + pipeline_encoder_devices: Optional[str] = field( + default=None, + metadata={ + "help": "a list of device indices indicating which device to place " + "each of the N_K partitions. The length of this list should " + "equal the length of the --pipeline-encoder-balance argument" + }, + ) + pipeline_decoder_balance: Optional[str] = field( + default=None, + metadata={ + "help": "partition the pipeline parallel decoder into N_K pieces, where each piece " + "contains N_i layers. The sum(args.pipeline_decoder_balance) " + "should equal the total number of decoder layers in the model" + }, + ) + pipeline_decoder_devices: Optional[str] = field( + default=None, + metadata={ + "help": "a list of device indices indicating which device to place " + "each of the N_K partitions. The length of this list should " + "equal the length of the --pipeline-decoder-balance argument" + }, + ) + pipeline_checkpoint: PIPELINE_CHECKPOINT_CHOICES = field( + default="never", + metadata={"help": "checkpointing mode for pipeline model parallelism"}, + ) + zero_sharding: ZERO_SHARDING_CHOICES = field( + default="none", metadata={"help": "ZeRO sharding"} + ) + fp16: bool = II("common.fp16") + memory_efficient_fp16: bool = II("common.memory_efficient_fp16") + tpu: bool = II("common.tpu") + # configuration for --ddp-backend=fully_sharded + no_reshard_after_forward: bool = field( + default=False, + metadata={"help": "don't reshard parameters after forward pass"}, + ) + fp32_reduce_scatter: bool = field( + default=False, + metadata={"help": "reduce-scatter grads in FP32"}, + ) + cpu_offload: bool = field( + default=False, metadata={"help": "offload FP32 params to CPU"} + ) + use_sharded_state: bool = field( + default=False, + metadata={"help": "use sharded checkpoint files"}, + ) + not_fsdp_flatten_parameters: bool = field( + default=False, + metadata={"help": "not flatten parameter param for fsdp"}, + ) + + +@dataclass +class DatasetConfig(FairseqDataclass): + num_workers: int = field( + default=1, metadata={"help": "how many subprocesses to use for data loading"} + ) + skip_invalid_size_inputs_valid_test: bool = field( + default=False, + metadata={"help": "ignore too long or too short lines in valid and test set"}, + ) + max_tokens: Optional[int] = field( + default=None, metadata={"help": "maximum number of tokens in a batch"} + ) + batch_size: Optional[int] = field( + default=None, + metadata={ + "help": "number of examples in a batch", + "argparse_alias": "--max-sentences", + }, + ) + required_batch_size_multiple: int = field( + default=8, metadata={"help": "batch size will be a multiplier of this value"} + ) + required_seq_len_multiple: int = field( + default=1, + metadata={ + "help": "maximum sequence length in batch will be a multiplier of this value" + }, + ) + dataset_impl: Optional[DATASET_IMPL_CHOICES] = field( + default=None, metadata={"help": "output dataset implementation"} + ) + data_buffer_size: int = field( + default=10, metadata={"help": "Number of batches to preload"} + ) + train_subset: str = field( + default="train", + metadata={"help": "data subset to use for training (e.g. train, valid, test)"}, + ) + valid_subset: str = field( + default="valid", + metadata={ + "help": "comma separated list of data subsets to use for validation" + " (e.g. train, valid, test)" + }, + ) + combine_valid_subsets: Optional[bool] = field( + default=None, + metadata={ + "help": "comma separated list of data subsets to use for validation" + " (e.g. train, valid, test)", + "argparse_alias": "--combine-val", + }, + ) + ignore_unused_valid_subsets: Optional[bool] = field( + default=False, + metadata={"help": "do not raise error if valid subsets are ignored"}, + ) + + validate_interval: int = field( + default=1, metadata={"help": "validate every N epochs"} + ) + validate_interval_updates: int = field( + default=0, metadata={"help": "validate every N updates"} + ) + validate_after_updates: int = field( + default=0, metadata={"help": "dont validate until reaching this many updates"} + ) + fixed_validation_seed: Optional[int] = field( + default=None, metadata={"help": "specified random seed for validation"} + ) + disable_validation: bool = field( + default=False, metadata={"help": "disable validation"} + ) + max_tokens_valid: Optional[int] = field( + default=II("dataset.max_tokens"), + metadata={ + "help": "maximum number of tokens in a validation batch" + " (defaults to --max-tokens)" + }, + ) + batch_size_valid: Optional[int] = field( + default=II("dataset.batch_size"), + metadata={ + "help": "batch size of the validation batch (defaults to --batch-size)", + "argparse_alias": "--max-sentences-valid", + }, + ) + max_valid_steps: Optional[int] = field( + default=None, + metadata={"help": "How many batches to evaluate", "argparse_alias": "--nval"}, + ) + curriculum: int = field( + default=0, metadata={"help": "don't shuffle batches for first N epochs"} + ) + gen_subset: str = field( + default="test", + metadata={"help": "data subset to generate (train, valid, test)"}, + ) + num_shards: int = field( + default=1, metadata={"help": "shard generation over N shards"} + ) + shard_id: int = field( + default=0, metadata={"help": "id of the shard to generate (id < num_shards)"} + ) + grouped_shuffling: bool = field( + default=False, + metadata={ + "help": "shuffle batches in groups of num_shards to enable similar sequence lengths on each GPU worker when batches are sorted by length", + }, + ) + update_epoch_batch_itr: bool = field( + default=II("dataset.grouped_shuffling"), + metadata={ + "help": "if true then prevents the reuse the epoch batch iterator by setting can_reuse_epoch_itr to false, defaults to --grouped-shuffling )", + }, + ) + update_ordered_indices_seed: bool = field( + default=False, + metadata={ + "help": "if true then increment seed with epoch for getting batch iterators, defautls to False.", + }, + ) + + +@dataclass +class OptimizationConfig(FairseqDataclass): + max_epoch: int = field( + default=0, metadata={"help": "force stop training at specified epoch"} + ) + max_update: int = field( + default=0, metadata={"help": "force stop training at specified update"} + ) + stop_time_hours: float = field( + default=0, + metadata={ + "help": "force stop training after specified cumulative time (if >0)" + }, + ) + clip_norm: float = field( + default=0.0, metadata={"help": "clip threshold of gradients"} + ) + sentence_avg: bool = field( + default=False, + metadata={ + "help": "normalize gradients by the number of sentences in a batch" + " (default is to normalize by number of tokens)" + }, + ) + update_freq: List[int] = field( + default_factory=lambda: [1], + metadata={"help": "update parameters every N_i batches, when in epoch i"}, + ) + lr: List[float] = field( + default_factory=lambda: [0.25], + metadata={ + "help": "learning rate for the first N epochs; all epochs >N using LR_N" + " (note: this may be interpreted differently depending on --lr-scheduler)" + }, + ) + stop_min_lr: float = field( + default=-1.0, + metadata={"help": "stop training when the learning rate reaches this minimum"}, + ) + use_bmuf: bool = field( + default=False, + metadata={ + "help": "specify global optimizer for syncing models on different GPUs/shards" + }, + ) + skip_remainder_batch: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, include the last (partial) batch of each epoch in training" + " (default is to skip it)." + }, + ) + debug_param_names: bool = False + + +@dataclass +class CheckpointConfig(FairseqDataclass): + save_dir: str = field( + default="checkpoints", metadata={"help": "path to save checkpoints"} + ) + restore_file: str = field( + default="checkpoint_last.pt", + metadata={ + "help": "filename from which to load checkpoint " + "(default: <save-dir>/checkpoint_last.pt" + }, + ) + continue_once: Optional[str] = field( + default=None, + metadata={ + "help": "continues from this checkpoint, unless a checkpoint indicated in 'restore_file' option is present" + }, + ) + finetune_from_model: Optional[str] = field( + default=None, + metadata={ + "help": "finetune from a pretrained model; note that meters and lr scheduler will be reset" + }, + ) + reset_dataloader: bool = field( + default=False, + metadata={ + "help": "if set, does not reload dataloader state from the checkpoint" + }, + ) + reset_lr_scheduler: bool = field( + default=False, + metadata={ + "help": "if set, does not load lr scheduler state from the checkpoint" + }, + ) + reset_meters: bool = field( + default=False, + metadata={"help": "if set, does not load meters from the checkpoint"}, + ) + reset_optimizer: bool = field( + default=False, + metadata={"help": "if set, does not load optimizer state from the checkpoint"}, + ) + optimizer_overrides: str = field( + default="{}", + metadata={ + "help": "a dictionary used to override optimizer args when loading a checkpoint" + }, + ) + save_interval: int = field( + default=1, metadata={"help": "save a checkpoint every N epochs"} + ) + save_interval_updates: int = field( + default=0, metadata={"help": "save a checkpoint (and validate) every N updates"} + ) + keep_interval_updates: int = field( + default=-1, + metadata={ + "help": "keep the last N checkpoints saved with --save-interval-updates" + }, + ) + keep_interval_updates_pattern: int = field( + default=-1, + metadata={ + "help": "when used with --keep-interval-updates, skips deleting " + "any checkpoints with update X where " + "X %% keep_interval_updates_pattern == 0" + }, + ) + keep_last_epochs: int = field( + default=-1, metadata={"help": "keep last N epoch checkpoints"} + ) + keep_best_checkpoints: int = field( + default=-1, metadata={"help": "keep best N checkpoints based on scores"} + ) + no_save: bool = field( + default=False, metadata={"help": "don't save models or checkpoints"} + ) + no_epoch_checkpoints: bool = field( + default=False, metadata={"help": "only store last and best checkpoints"} + ) + no_last_checkpoints: bool = field( + default=False, metadata={"help": "don't store last checkpoints"} + ) + no_save_optimizer_state: bool = field( + default=False, + metadata={"help": "don't save optimizer-state as part of checkpoint"}, + ) + best_checkpoint_metric: str = field( + default="loss", metadata={"help": 'metric to use for saving "best" checkpoints'} + ) + maximize_best_checkpoint_metric: bool = field( + default=False, + metadata={ + "help": 'select the largest metric value for saving "best" checkpoints' + }, + ) + patience: int = field( + default=-1, + metadata={ + "help": ( + "early stop training if valid performance doesn't " + "improve for N consecutive validation runs; note " + "that this is influenced by --validate-interval" + ) + }, + ) + checkpoint_suffix: str = field( + default="", metadata={"help": "suffix to add to the checkpoint file name"} + ) + checkpoint_shard_count: int = field( + default=1, + metadata={ + "help": "Number of shards containing the checkpoint - " + "if the checkpoint is over 300GB, it is preferable " + "to split it into shards to prevent OOM on CPU while loading " + "the checkpoint" + }, + ) + load_checkpoint_on_all_dp_ranks: bool = field( + default=False, + metadata={ + "help": "load checkpoints on all data parallel devices " + "(default: only load on rank 0 and broadcast to other devices)" + }, + ) + write_checkpoints_asynchronously: bool = field( + default=False, + metadata={ + "help": ( + "Write checkpoints asynchronously in a separate " + "thread. NOTE: This feature is currently being tested." + ), + "argparse_alias": "--save-async", + }, + ) + model_parallel_size: int = II("common.model_parallel_size") + + +@dataclass +class FairseqBMUFConfig(FairseqDataclass): + block_lr: float = field( + default=1, metadata={"help": "block learning rate for bmuf"} + ) + block_momentum: float = field( + default=0.875, metadata={"help": "block momentum for bmuf"} + ) + global_sync_iter: int = field( + default=50, metadata={"help": "Iteration for syncing global model"} + ) + warmup_iterations: int = field( + default=500, metadata={"help": "warmup iterations for model to broadcast"} + ) + use_nbm: bool = field( + default=False, + metadata={"help": "Specify whether you want to use classical BM / Nesterov BM"}, + ) + average_sync: bool = field( + default=False, + metadata={ + "help": "Specify whether you want to average the local momentum after each sync" + }, + ) + distributed_world_size: int = II("distributed_training.distributed_world_size") + + +@dataclass +class GenerationConfig(FairseqDataclass): + beam: int = field( + default=5, + metadata={"help": "beam size"}, + ) + beam_mt: int = field( + default=0, + metadata={"help": "beam size for the first-pass decoder"}, + ) + nbest: int = field( + default=1, + metadata={"help": "number of hypotheses to output"}, + ) + max_len_a: float = field( + default=0, + metadata={ + "help": "generate sequences of maximum length ax + b, where x is the source length" + }, + ) + max_len_b: int = field( + default=200, + metadata={ + "help": "generate sequences of maximum length ax + b, where x is the source length" + }, + ) + max_len_a_mt: float = field( + default=0, + metadata={ + "help": "generate sequences of maximum length ax + b, where x is the source length for the first-pass decoder" + }, + ) + max_len_b_mt: int = field( + default=200, + metadata={ + "help": "generate sequences of maximum length ax + b, where x is the source length for the first-pass decoder" + }, + ) + min_len: int = field( + default=1, + metadata={"help": "minimum generation length"}, + ) + match_source_len: bool = field( + default=False, + metadata={"help": "generations should match the source length"}, + ) + unnormalized: bool = field( + default=False, + metadata={"help": "compare unnormalized hypothesis scores"}, + ) + no_early_stop: bool = field( + default=False, + metadata={"help": "deprecated"}, + ) + no_beamable_mm: bool = field( + default=False, + metadata={"help": "don't use BeamableMM in attention layers"}, + ) + lenpen: float = field( + default=1, + metadata={ + "help": "length penalty: <1.0 favors shorter, >1.0 favors longer sentences" + }, + ) + lenpen_mt: float = field( + default=1, + metadata={ + "help": "length penalty for the first-pass decoder: <1.0 favors shorter, >1.0 favors longer sentences" + }, + ) + unkpen: float = field( + default=0, + metadata={ + "help": "unknown word penalty: <0 produces more unks, >0 produces fewer" + }, + ) + replace_unk: Optional[str] = field( + default=None, + metadata={ + "help": "perform unknown replacement (optionally with alignment dictionary)", + "argparse_const": "@@ ", + }, + ) + sacrebleu: bool = field( + default=False, + metadata={"help": "score with sacrebleu"}, + ) + score_reference: bool = field( + default=False, + metadata={"help": "just score the reference translation"}, + ) + prefix_size: int = field( + default=0, + metadata={"help": "initialize generation by target prefix of given length"}, + ) + no_repeat_ngram_size: int = field( + default=0, + metadata={ + "help": "ngram blocking such that this size ngram cannot be repeated in the generation" + }, + ) + sampling: bool = field( + default=False, + metadata={"help": "sample hypotheses instead of using beam search"}, + ) + sampling_topk: int = field( + default=-1, + metadata={"help": "sample from top K likely next words instead of all words"}, + ) + sampling_topp: float = field( + default=-1.0, + metadata={ + "help": "sample from the smallest set whose cumulative probability mass exceeds p for next words" + }, + ) + constraints: Optional[GENERATION_CONSTRAINTS_CHOICES] = field( + default=None, + metadata={ + "help": "enables lexically constrained decoding", + "argparse_const": "ordered", + }, + ) + temperature: float = field( + default=1.0, + metadata={"help": "temperature for generation"}, + ) + diverse_beam_groups: int = field( + default=-1, + metadata={"help": "number of groups for Diverse Beam Search"}, + ) + diverse_beam_strength: float = field( + default=0.5, + metadata={"help": "strength of diversity penalty for Diverse Beam Search"}, + ) + diversity_rate: float = field( + default=-1.0, + metadata={"help": "strength of diversity penalty for Diverse Siblings Search"}, + ) + print_alignment: Optional[PRINT_ALIGNMENT_CHOICES] = field( + default=None, + metadata={ + "help": "if set, uses attention feedback to compute and print alignment to source tokens " + "(valid options are: hard, soft, otherwise treated as hard alignment)", + "argparse_const": "hard", + }, + ) + print_step: bool = field( + default=False, + metadata={"help": "print steps"}, + ) + lm_path: Optional[str] = field( + default=None, + metadata={"help": "path to lm checkpoint for lm fusion"}, + ) + lm_weight: float = field( + default=0.0, + metadata={"help": "weight for lm probs for lm fusion"}, + ) + + # arguments for iterative refinement generator + iter_decode_eos_penalty: float = field( + default=0.0, + metadata={"help": "if > 0.0, it penalized early-stopping in decoding."}, + ) + iter_decode_max_iter: int = field( + default=10, + metadata={"help": "maximum iterations for iterative refinement."}, + ) + iter_decode_force_max_iter: bool = field( + default=False, + metadata={ + "help": "if set, run exact the maximum number of iterations without early stop" + }, + ) + iter_decode_with_beam: int = field( + default=1, + metadata={ + "help": "if > 1, model will generate translations varying by the lengths." + }, + ) + iter_decode_with_external_reranker: bool = field( + default=False, + metadata={ + "help": "if set, the last checkpoint are assumed to be a reranker to rescore the translations" + }, + ) + retain_iter_history: bool = field( + default=False, + metadata={ + "help": "if set, decoding returns the whole history of iterative refinement" + }, + ) + retain_dropout: bool = field( + default=False, + metadata={"help": "Use dropout at inference time"}, + ) + # temporarily set to Any until https://github.com/facebookresearch/hydra/issues/1117 is fixed + # retain_dropout_modules: Optional[List[str]] = field( + retain_dropout_modules: Any = field( + default=None, + metadata={ + "help": "if set, only retain dropout for the specified modules; " + "if not set, then dropout will be retained for all modules" + }, + ) + # special decoding format for advanced decoding. + decoding_format: Optional[GENERATION_DECODING_FORMAT_CHOICES] = field( + default=None, + metadata={"help": "special decoding format for advanced decoding."}, + ) + no_seed_provided: bool = field( + default=False, + metadata={"help": "if set, dont use seed for initializing random generators"}, + ) + eos_token: Optional[str] = field( + default=None, + metadata={"help": "EOS token"}, + ) + + +@dataclass +class CommonEvalConfig(FairseqDataclass): + path: Optional[str] = field( + default=None, + metadata={"help": "path(s) to model file(s), colon separated"}, + ) + post_process: Optional[str] = field( + default=None, + metadata={ + "help": ( + "post-process text by removing BPE, letter segmentation, etc. " + "Valid options can be found in fairseq.data.utils.post_process." + ), + "argparse_const": "subword_nmt", + "argparse_alias": "--remove-bpe", + }, + ) + quiet: bool = field(default=False, metadata={"help": "only print final scores"}) + model_overrides: str = field( + default="{}", + metadata={ + "help": "a dictionary used to override model args at generation that were used during model training" + }, + ) + results_path: Optional[str] = field( + default=None, metadata={"help": "path to save eval results (optional)"} + ) + + +@dataclass +class EvalLMConfig(FairseqDataclass): + output_word_probs: bool = field( + default=False, + metadata={ + "help": "if set, outputs words and their predicted log probabilities to standard output" + }, + ) + output_word_stats: bool = field( + default=False, + metadata={ + "help": "if set, outputs word statistics such as word count, average probability, etc" + }, + ) + context_window: int = field( + default=0, + metadata={ + "help": "ensures that every evaluated token has access to a context of at least this size, if possible" + }, + ) + softmax_batch: int = field( + default=sys.maxsize, + metadata={ + "help": "if BxT is more than this, will batch the softmax over vocab to this amount of tokens, in order to fit into GPU memory" + }, + ) + + +@dataclass +class InteractiveConfig(FairseqDataclass): + buffer_size: int = field( + default=0, + metadata={ + "help": "read this many sentences into a buffer before processing them" + }, + ) + input: str = field( + default="-", + metadata={"help": "file to read from; use - for stdin"}, + ) + + +@dataclass +class EMAConfig(FairseqDataclass): + store_ema: bool = field( + default=False, metadata={help: "store exponential moving average shadow model"} + ) + ema_decay: float = field( + default=0.9999, metadata={"help": "decay for exponential moving average model"} + ) + ema_start_update: int = field( + default=0, metadata={"help": "start EMA update after this many model updates"} + ) + ema_seed_model: Optional[str] = field( + default=None, + metadata={ + "help": "Seed to load EMA model from. " + "Used to load EMA model separately from the actual model." + }, + ) + ema_update_freq: int = field( + default=1, metadata={"help": "Do EMA update every this many model updates"} + ) + ema_fp32: bool = field( + default=False, + metadata={"help": "If true, store EMA model in fp32 even if model is in fp16"}, + ) + + +@dataclass +class FairseqConfig(FairseqDataclass): + common: CommonConfig = CommonConfig() + common_eval: CommonEvalConfig = CommonEvalConfig() + distributed_training: DistributedTrainingConfig = DistributedTrainingConfig() + dataset: DatasetConfig = DatasetConfig() + optimization: OptimizationConfig = OptimizationConfig() + checkpoint: CheckpointConfig = CheckpointConfig() + bmuf: FairseqBMUFConfig = FairseqBMUFConfig() + generation: GenerationConfig = GenerationConfig() + eval_lm: EvalLMConfig = EvalLMConfig() + interactive: InteractiveConfig = InteractiveConfig() + model: Any = MISSING + task: Any = None + criterion: Any = None + optimizer: Any = None + lr_scheduler: Any = None + scoring: Any = None + bpe: Any = None + tokenizer: Any = None + ema: EMAConfig = EMAConfig() diff --git a/fairseq/fairseq/dataclass/constants.py b/fairseq/fairseq/dataclass/constants.py new file mode 100644 index 0000000..5af92f2 --- /dev/null +++ b/fairseq/fairseq/dataclass/constants.py @@ -0,0 +1,56 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum, EnumMeta +from typing import List + + +class StrEnumMeta(EnumMeta): + # this is workaround for submitit pickling leading to instance checks failing in hydra for StrEnum, see + # https://github.com/facebookresearch/hydra/issues/1156 + @classmethod + def __instancecheck__(cls, other): + return "enum" in str(type(other)) + + +class StrEnum(Enum, metaclass=StrEnumMeta): + def __str__(self): + return self.value + + def __eq__(self, other: str): + return self.value == other + + def __repr__(self): + return self.value + + def __hash__(self): + return hash(str(self)) + + +def ChoiceEnum(choices: List[str]): + """return the Enum class used to enforce list of choices""" + return StrEnum("Choices", {k: k for k in choices}) + + +LOG_FORMAT_CHOICES = ChoiceEnum(["json", "none", "simple", "tqdm"]) +DDP_BACKEND_CHOICES = ChoiceEnum( + [ + "c10d", # alias for pytorch_ddp + "fully_sharded", # FullyShardedDataParallel from fairscale + "legacy_ddp", + "no_c10d", # alias for legacy_ddp + "pytorch_ddp", + "slowmo", + ] +) +DDP_COMM_HOOK_CHOICES = ChoiceEnum(["none", "fp16"]) +DATASET_IMPL_CHOICES = ChoiceEnum(["raw", "lazy", "cached", "mmap", "fasta", "huffman"]) +GENERATION_CONSTRAINTS_CHOICES = ChoiceEnum(["ordered", "unordered"]) +GENERATION_DECODING_FORMAT_CHOICES = ChoiceEnum( + ["unigram", "ensemble", "vote", "dp", "bs"] +) +ZERO_SHARDING_CHOICES = ChoiceEnum(["none", "os"]) +PIPELINE_CHECKPOINT_CHOICES = ChoiceEnum(["always", "never", "except_last"]) +PRINT_ALIGNMENT_CHOICES = ChoiceEnum(["hard", "soft"]) diff --git a/fairseq/fairseq/dataclass/initialize.py b/fairseq/fairseq/dataclass/initialize.py new file mode 100644 index 0000000..5a7784b --- /dev/null +++ b/fairseq/fairseq/dataclass/initialize.py @@ -0,0 +1,61 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import logging +from hydra.core.config_store import ConfigStore +from fairseq.dataclass.configs import FairseqConfig +from omegaconf import DictConfig, OmegaConf + + +logger = logging.getLogger(__name__) + + +def hydra_init(cfg_name="config") -> None: + + cs = ConfigStore.instance() + cs.store(name=f"{cfg_name}", node=FairseqConfig) + + for k in FairseqConfig.__dataclass_fields__: + v = FairseqConfig.__dataclass_fields__[k].default + try: + cs.store(name=k, node=v) + except BaseException: + logger.error(f"{k} - {v}") + raise + + +def add_defaults(cfg: DictConfig) -> None: + """This function adds default values that are stored in dataclasses that hydra doesn't know about""" + + from fairseq.registry import REGISTRIES + from fairseq.tasks import TASK_DATACLASS_REGISTRY + from fairseq.models import ARCH_MODEL_NAME_REGISTRY, MODEL_DATACLASS_REGISTRY + from fairseq.dataclass.utils import merge_with_parent + from typing import Any + + OmegaConf.set_struct(cfg, False) + + for k, v in FairseqConfig.__dataclass_fields__.items(): + field_cfg = cfg.get(k) + if field_cfg is not None and v.type == Any: + dc = None + + if isinstance(field_cfg, str): + field_cfg = DictConfig({"_name": field_cfg}) + field_cfg.__dict__["_parent"] = field_cfg.__dict__["_parent"] + + name = getattr(field_cfg, "_name", None) + + if k == "task": + dc = TASK_DATACLASS_REGISTRY.get(name) + elif k == "model": + name = ARCH_MODEL_NAME_REGISTRY.get(name, name) + dc = MODEL_DATACLASS_REGISTRY.get(name) + elif k in REGISTRIES: + dc = REGISTRIES[k]["dataclass_registry"].get(name) + + if dc is not None: + cfg[k] = merge_with_parent(dc, field_cfg) diff --git a/fairseq/fairseq/dataclass/utils.py b/fairseq/fairseq/dataclass/utils.py new file mode 100644 index 0000000..f6467d5 --- /dev/null +++ b/fairseq/fairseq/dataclass/utils.py @@ -0,0 +1,510 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ast +import inspect +import logging +import os +import re +from argparse import ArgumentError, ArgumentParser, Namespace +from dataclasses import _MISSING_TYPE, MISSING, is_dataclass +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple, Type + +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.configs import FairseqConfig +from hydra.core.global_hydra import GlobalHydra +from hydra.experimental import compose, initialize +from omegaconf import DictConfig, OmegaConf, open_dict, _utils + +logger = logging.getLogger(__name__) + + +def eval_str_list(x, x_type=float): + if x is None: + return None + if isinstance(x, str): + if len(x) == 0: + return [] + x = ast.literal_eval(x) + try: + return list(map(x_type, x)) + except TypeError: + return [x_type(x)] + + +def interpret_dc_type(field_type): + if isinstance(field_type, str): + raise RuntimeError("field should be a type") + + if field_type == Any: + return str + + typestring = str(field_type) + if re.match( + r"(typing.|^)Union\[(.*), NoneType\]$", typestring + ) or typestring.startswith("typing.Optional"): + return field_type.__args__[0] + return field_type + + +def gen_parser_from_dataclass( + parser: ArgumentParser, + dataclass_instance: FairseqDataclass, + delete_default: bool = False, + with_prefix: Optional[str] = None, +) -> None: + """ + convert a dataclass instance to tailing parser arguments. + + If `with_prefix` is provided, prefix all the keys in the resulting parser with it. It means that we are + building a flat namespace from a structured dataclass (see transformer_config.py for example). + """ + + def argparse_name(name: str): + if name == "data" and (with_prefix is None or with_prefix == ""): + # normally data is positional args, so we don't add the -- nor the prefix + return name + if name == "_name": + # private member, skip + return None + full_name = "--" + name.replace("_", "-") + if with_prefix is not None and with_prefix != "": + # if a prefix is specified, construct the prefixed arg name + full_name = with_prefix + "-" + full_name[2:] # strip -- when composing + return full_name + + def get_kwargs_from_dc( + dataclass_instance: FairseqDataclass, k: str + ) -> Dict[str, Any]: + """k: dataclass attributes""" + + kwargs = {} + + field_type = dataclass_instance._get_type(k) + inter_type = interpret_dc_type(field_type) + + field_default = dataclass_instance._get_default(k) + + if isinstance(inter_type, type) and issubclass(inter_type, Enum): + field_choices = [t.value for t in list(inter_type)] + else: + field_choices = None + + field_help = dataclass_instance._get_help(k) + field_const = dataclass_instance._get_argparse_const(k) + + if isinstance(field_default, str) and field_default.startswith("${"): + kwargs["default"] = field_default + else: + if field_default is MISSING: + kwargs["required"] = True + if field_choices is not None: + kwargs["choices"] = field_choices + if ( + isinstance(inter_type, type) + and (issubclass(inter_type, List) or issubclass(inter_type, Tuple)) + ) or ("List" in str(inter_type) or "Tuple" in str(inter_type)): + if "int" in str(inter_type): + kwargs["type"] = lambda x: eval_str_list(x, int) + elif "float" in str(inter_type): + kwargs["type"] = lambda x: eval_str_list(x, float) + elif "str" in str(inter_type): + kwargs["type"] = lambda x: eval_str_list(x, str) + else: + raise NotImplementedError( + "parsing of type " + str(inter_type) + " is not implemented" + ) + if field_default is not MISSING: + kwargs["default"] = ( + ",".join(map(str, field_default)) + if field_default is not None + else None + ) + elif ( + isinstance(inter_type, type) and issubclass(inter_type, Enum) + ) or "Enum" in str(inter_type): + kwargs["type"] = str + if field_default is not MISSING: + if isinstance(field_default, Enum): + kwargs["default"] = field_default.value + else: + kwargs["default"] = field_default + elif inter_type is bool: + kwargs["action"] = ( + "store_false" if field_default is True else "store_true" + ) + kwargs["default"] = field_default + else: + kwargs["type"] = inter_type + if field_default is not MISSING: + kwargs["default"] = field_default + + # build the help with the hierarchical prefix + if with_prefix is not None and with_prefix != "" and field_help is not None: + field_help = with_prefix[2:] + ": " + field_help + + kwargs["help"] = field_help + if field_const is not None: + kwargs["const"] = field_const + kwargs["nargs"] = "?" + + return kwargs + + for k in dataclass_instance._get_all_attributes(): + field_name = argparse_name(dataclass_instance._get_name(k)) + field_type = dataclass_instance._get_type(k) + if field_name is None: + continue + elif inspect.isclass(field_type) and issubclass(field_type, FairseqDataclass): + # for fields that are of type FairseqDataclass, we can recursively + # add their fields to the namespace (so we add the args from model, task, etc. to the root namespace) + prefix = None + if with_prefix is not None: + # if a prefix is specified, then we don't want to copy the subfields directly to the root namespace + # but we prefix them with the name of the current field. + prefix = field_name + gen_parser_from_dataclass(parser, field_type(), delete_default, prefix) + continue + + kwargs = get_kwargs_from_dc(dataclass_instance, k) + + field_args = [field_name] + alias = dataclass_instance._get_argparse_alias(k) + if alias is not None: + field_args.append(alias) + + if "default" in kwargs: + if isinstance(kwargs["default"], str) and kwargs["default"].startswith( + "${" + ): + if kwargs["help"] is None: + # this is a field with a name that will be added elsewhere + continue + else: + del kwargs["default"] + if delete_default and "default" in kwargs: + del kwargs["default"] + try: + parser.add_argument(*field_args, **kwargs) + except ArgumentError: + pass + + +def _set_legacy_defaults(args, cls): + """Helper to set default arguments based on *add_args*.""" + if not hasattr(cls, "add_args"): + return + + import argparse + + parser = argparse.ArgumentParser( + argument_default=argparse.SUPPRESS, allow_abbrev=False + ) + cls.add_args(parser) + # copied from argparse.py: + defaults = argparse.Namespace() + for action in parser._actions: + if action.dest is not argparse.SUPPRESS: + if not hasattr(defaults, action.dest): + if action.default is not argparse.SUPPRESS: + setattr(defaults, action.dest, action.default) + for key, default_value in vars(defaults).items(): + if not hasattr(args, key): + setattr(args, key, default_value) + + +def _override_attr( + sub_node: str, data_class: Type[FairseqDataclass], args: Namespace +) -> List[str]: + overrides = [] + + if not inspect.isclass(data_class) or not issubclass(data_class, FairseqDataclass): + return overrides + + def get_default(f): + if not isinstance(f.default_factory, _MISSING_TYPE): + return f.default_factory() + return f.default + + for k, v in data_class.__dataclass_fields__.items(): + if k.startswith("_"): + # private member, skip + continue + + val = get_default(v) if not hasattr(args, k) else getattr(args, k) + + field_type = interpret_dc_type(v.type) + if ( + isinstance(val, str) + and not val.startswith("${") # not interpolation + and field_type != str + and ( + not inspect.isclass(field_type) or not issubclass(field_type, Enum) + ) # not choices enum + ): + # upgrade old models that stored complex parameters as string + val = ast.literal_eval(val) + + if isinstance(val, tuple): + val = list(val) + + v_type = getattr(v.type, "__origin__", None) + if ( + (v_type is List or v_type is list or v_type is Optional) + # skip interpolation + and not (isinstance(val, str) and val.startswith("${")) + ): + # if type is int but val is float, then we will crash later - try to convert here + if hasattr(v.type, "__args__"): + t_args = v.type.__args__ + if len(t_args) == 1 and (t_args[0] is float or t_args[0] is int): + val = list(map(t_args[0], val)) + elif val is not None and ( + field_type is int or field_type is bool or field_type is float + ): + try: + val = field_type(val) + except: + pass # ignore errors here, they are often from interpolation args + + if val is None: + overrides.append("{}.{}=null".format(sub_node, k)) + elif val == "": + overrides.append("{}.{}=''".format(sub_node, k)) + elif isinstance(val, str): + val = val.replace("'", r"\'") + overrides.append("{}.{}='{}'".format(sub_node, k, val)) + elif isinstance(val, FairseqDataclass): + overrides += _override_attr(f"{sub_node}.{k}", type(val), args) + elif isinstance(val, Namespace): + sub_overrides, _ = override_module_args(val) + for so in sub_overrides: + overrides.append(f"{sub_node}.{k}.{so}") + else: + overrides.append("{}.{}={}".format(sub_node, k, val)) + + return overrides + + +def migrate_registry( + name, value, registry, args, overrides, deletes, use_name_as_val=False +): + if value in registry: + overrides.append("{}={}".format(name, value)) + overrides.append("{}._name={}".format(name, value)) + overrides.extend(_override_attr(name, registry[value], args)) + elif use_name_as_val and value is not None: + overrides.append("{}={}".format(name, value)) + else: + deletes.append(name) + + +def override_module_args(args: Namespace) -> Tuple[List[str], List[str]]: + """use the field in args to overrides those in cfg""" + overrides = [] + deletes = [] + + for k in FairseqConfig.__dataclass_fields__.keys(): + overrides.extend( + _override_attr(k, FairseqConfig.__dataclass_fields__[k].type, args) + ) + + if args is not None: + if hasattr(args, "task"): + from fairseq.tasks import TASK_DATACLASS_REGISTRY + + migrate_registry( + "task", args.task, TASK_DATACLASS_REGISTRY, args, overrides, deletes + ) + else: + deletes.append("task") + + # these options will be set to "None" if they have not yet been migrated + # so we can populate them with the entire flat args + CORE_REGISTRIES = {"criterion", "optimizer", "lr_scheduler"} + + from fairseq.registry import REGISTRIES + + for k, v in REGISTRIES.items(): + if hasattr(args, k): + migrate_registry( + k, + getattr(args, k), + v["dataclass_registry"], + args, + overrides, + deletes, + use_name_as_val=k not in CORE_REGISTRIES, + ) + else: + deletes.append(k) + + no_dc = True + if hasattr(args, "arch"): + from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_MODEL_NAME_REGISTRY + + if args.arch in ARCH_MODEL_REGISTRY: + m_cls = ARCH_MODEL_REGISTRY[args.arch] + dc = getattr(m_cls, "__dataclass", None) + if dc is not None: + m_name = ARCH_MODEL_NAME_REGISTRY[args.arch] + overrides.append("model={}".format(m_name)) + overrides.append("model._name={}".format(args.arch)) + # override model params with those exist in args + overrides.extend(_override_attr("model", dc, args)) + no_dc = False + if no_dc: + deletes.append("model") + + return overrides, deletes + + +class omegaconf_no_object_check: + def __init__(self): + # Changed in https://github.com/omry/omegaconf/pull/911 - both are kept for back compat. + if hasattr(_utils, "is_primitive_type"): + self.old_is_primitive = _utils.is_primitive_type + else: + self.old_is_primitive = _utils.is_primitive_type_annotation + + def __enter__(self): + if hasattr(_utils, "is_primitive_type"): + _utils.is_primitive_type = lambda _: True + else: + _utils.is_primitive_type_annotation = lambda _: True + + def __exit__(self, type, value, traceback): + if hasattr(_utils, "is_primitive_type"): + _utils.is_primitive_type = self.old_is_primitive + else: + _utils.is_primitive_type_annotation = self.old_is_primitive + + +def convert_namespace_to_omegaconf(args: Namespace) -> DictConfig: + """Convert a flat argparse.Namespace to a structured DictConfig.""" + + # Here we are using field values provided in args to override counterparts inside config object + overrides, deletes = override_module_args(args) + + # configs will be in fairseq/config after installation + config_path = os.path.join("..", "config") + + GlobalHydra.instance().clear() + + with initialize(config_path=config_path): + try: + composed_cfg = compose("config", overrides=overrides, strict=False) + except: + logger.error("Error when composing. Overrides: " + str(overrides)) + raise + + for k in deletes: + composed_cfg[k] = None + + cfg = OmegaConf.create( + OmegaConf.to_container(composed_cfg, resolve=True, enum_to_str=True) + ) + + # hack to be able to set Namespace in dict config. this should be removed when we update to newer + # omegaconf version that supports object flags, or when we migrate all existing models + from omegaconf import _utils + + with omegaconf_no_object_check(): + if cfg.task is None and getattr(args, "task", None): + cfg.task = Namespace(**vars(args)) + from fairseq.tasks import TASK_REGISTRY + + _set_legacy_defaults(cfg.task, TASK_REGISTRY[args.task]) + cfg.task._name = args.task + if cfg.model is None and getattr(args, "arch", None): + cfg.model = Namespace(**vars(args)) + from fairseq.models import ARCH_MODEL_REGISTRY + + _set_legacy_defaults(cfg.model, ARCH_MODEL_REGISTRY[args.arch]) + cfg.model._name = args.arch + if cfg.optimizer is None and getattr(args, "optimizer", None): + cfg.optimizer = Namespace(**vars(args)) + from fairseq.optim import OPTIMIZER_REGISTRY + + _set_legacy_defaults(cfg.optimizer, OPTIMIZER_REGISTRY[args.optimizer]) + cfg.optimizer._name = args.optimizer + if cfg.lr_scheduler is None and getattr(args, "lr_scheduler", None): + cfg.lr_scheduler = Namespace(**vars(args)) + from fairseq.optim.lr_scheduler import LR_SCHEDULER_REGISTRY + + _set_legacy_defaults( + cfg.lr_scheduler, LR_SCHEDULER_REGISTRY[args.lr_scheduler] + ) + cfg.lr_scheduler._name = args.lr_scheduler + if cfg.criterion is None and getattr(args, "criterion", None): + cfg.criterion = Namespace(**vars(args)) + from fairseq.criterions import CRITERION_REGISTRY + + _set_legacy_defaults(cfg.criterion, CRITERION_REGISTRY[args.criterion]) + cfg.criterion._name = args.criterion + + OmegaConf.set_struct(cfg, True) + return cfg + + +def overwrite_args_by_name(cfg: DictConfig, overrides: Dict[str, any]): + # this will be deprecated when we get rid of argparse and model_overrides logic + + from fairseq.registry import REGISTRIES + + with open_dict(cfg): + for k in cfg.keys(): + # "k in cfg" will return false if its a "mandatory value (e.g. ???)" + if k in cfg and isinstance(cfg[k], DictConfig): + if k in overrides and isinstance(overrides[k], dict): + for ok, ov in overrides[k].items(): + if isinstance(ov, dict) and cfg[k][ok] is not None: + overwrite_args_by_name(cfg[k][ok], ov) + else: + cfg[k][ok] = ov + else: + overwrite_args_by_name(cfg[k], overrides) + elif k in cfg and isinstance(cfg[k], Namespace): + for override_key, val in overrides.items(): + setattr(cfg[k], override_key, val) + elif k in overrides: + if ( + k in REGISTRIES + and overrides[k] in REGISTRIES[k]["dataclass_registry"] + ): + cfg[k] = DictConfig( + REGISTRIES[k]["dataclass_registry"][overrides[k]] + ) + overwrite_args_by_name(cfg[k], overrides) + cfg[k]._name = overrides[k] + else: + cfg[k] = overrides[k] + + +def merge_with_parent(dc: FairseqDataclass, cfg: DictConfig, remove_missing=False): + if remove_missing: + + def remove_missing_rec(src_keys, target_cfg): + if is_dataclass(target_cfg): + target_keys = set(target_cfg.__dataclass_fields__.keys()) + else: + target_keys = set(target_cfg.keys()) + + for k in list(src_keys.keys()): + if k not in target_keys: + del src_keys[k] + elif OmegaConf.is_config(src_keys[k]): + tgt = getattr(target_cfg, k) + if tgt is not None and (is_dataclass(tgt) or hasattr(tgt, "keys")): + remove_missing_rec(src_keys[k], tgt) + + with open_dict(cfg): + remove_missing_rec(cfg, dc) + + merged_cfg = OmegaConf.merge(dc, cfg) + merged_cfg.__dict__["_parent"] = cfg.__dict__["_parent"] + OmegaConf.set_struct(merged_cfg, True) + return merged_cfg diff --git a/fairseq/fairseq/distributed/__init__.py b/fairseq/fairseq/distributed/__init__.py new file mode 100644 index 0000000..9130db8 --- /dev/null +++ b/fairseq/fairseq/distributed/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .distributed_timeout_wrapper import DistributedTimeoutWrapper +from .fully_sharded_data_parallel import ( + fsdp_enable_wrap, + fsdp_wrap, + FullyShardedDataParallel, +) +from .legacy_distributed_data_parallel import LegacyDistributedDataParallel +from .module_proxy_wrapper import ModuleProxyWrapper +from .tpu_distributed_data_parallel import TPUDistributedDataParallel + + +__all__ = [ + "DistributedTimeoutWrapper", + "fsdp_enable_wrap", + "fsdp_wrap", + "FullyShardedDataParallel", + "LegacyDistributedDataParallel", + "ModuleProxyWrapper", + "TPUDistributedDataParallel", +] diff --git a/fairseq/fairseq/distributed/distributed_timeout_wrapper.py b/fairseq/fairseq/distributed/distributed_timeout_wrapper.py new file mode 100644 index 0000000..6e06b4b --- /dev/null +++ b/fairseq/fairseq/distributed/distributed_timeout_wrapper.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import signal +import threading + +from torch import nn + + +logger = logging.getLogger(__name__) + + +class DistributedTimeoutWrapper(nn.Module): + """ + A wrapper that kills the process if no progress is made within a given + *timeout*. The timer is reset every time :func:`forward` is called. + + Usage:: + + module = DistributedTimeoutWrapper(module, timeout=30) + x = module(input) + time.sleep(20) # safe + x = module(input) + time.sleep(45) # job will be killed before this returns + + Args: + module (nn.Module): module to wrap + timeout (int): number of seconds before killing the process + (set to a value <= 0 to disable the timeout) + signal (Optional): signal to send once timeout is triggered + """ + + def __init__(self, module: nn.Module, timeout: int, signal=signal.SIGINT): + super().__init__() + self.module = module + self.timeout = timeout + self.signal = signal + + if timeout > 0: + self._heartbeat = threading.Event() + self._heartbeat_thread = threading.Thread( + target=self._check_heartbeat, + args=(os.getpid(),), + daemon=True, + ) + self._heartbeat_thread.start() + self._terminated = False + else: + self._heartbeat = None + self._heartbeat_thread = None + + def __del__(self): + self.stop_timeout() + + def __getattr__(self, name): + """Forward missing attributes to wrapped module.""" + try: + return super().__getattr__(name) # defer to nn.Module's logic + except AttributeError: + return getattr(self.module, name) + + def stop_timeout(self): + if self._heartbeat_thread is not None: + self._terminated = True + self._heartbeat_thread.join() + + def state_dict(self, *args, **kwargs): + return self.module.state_dict(*args, **kwargs) + + def load_state_dict(self, *args, **kwargs): + return self.module.load_state_dict(*args, **kwargs) + + def forward(self, *args, **kwargs): + if self._heartbeat is not None: + self._heartbeat.set() + return self.module(*args, **kwargs) + + def _check_heartbeat(self, parent_pid): + self._heartbeat.wait() # wait for the first forward pass + while True: + self._heartbeat.clear() + success = self._heartbeat.wait(timeout=self.timeout) + if self._terminated: + break + elif not success: + logger.error( + ( + "Killing job for not making progress in {} seconds. " + "Set --heartbeat-timeout=-1 to disable this timeout." + ).format(int(self.timeout)) + ) + os.kill(parent_pid, self.signal) + return diff --git a/fairseq/fairseq/distributed/fully_sharded_data_parallel.py b/fairseq/fairseq/distributed/fully_sharded_data_parallel.py new file mode 100644 index 0000000..1c508b0 --- /dev/null +++ b/fairseq/fairseq/distributed/fully_sharded_data_parallel.py @@ -0,0 +1,145 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +from typing import Optional + +import torch +from fairseq.dataclass.configs import DistributedTrainingConfig +from fairseq.distributed import utils as dist_utils + + +try: + from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP + + has_FSDP = True +except ImportError: + FSDP = torch.nn.Module + has_FSDP = False + + +class FullyShardedDataParallel(FSDP): + """ + A small wrapper around fairscale's FullyShardedDataParallel (FSDP) with some + fairseq-specific checkpoint saving/loading logic. + + Args: + use_sharded_state (bool): if True, then ``state_dict`` will return + ``FSDP.local_state_dict`` and ``load_state_dict`` will call + ``FSDP.load_local_state_dict``. Otherwise, ``state_dict`` will + return the full model weights on data parallel rank 0 (empty on + other ranks) and ``load_state_dict`` will broadcast model weights + from rank 0 to other ranks. + """ + + def __init__(self, *args, use_sharded_state: bool = False, **kwargs): + if not has_FSDP: + raise ImportError( + "Cannot find FullyShardedDataParallel. " + "Please install fairscale with: pip install fairscale" + ) + super().__init__(*args, **kwargs) + self.use_sharded_state = use_sharded_state + + @property + def unwrapped_module(self) -> torch.nn.Module: + if self.flatten_parameters: + return self.module.module + else: + return self.module + + def state_dict(self, destination=None, prefix="", keep_vars=False): + if self.use_sharded_state: + return super().local_state_dict( + destination=destination, prefix=prefix, keep_vars=keep_vars + ) + else: + if self.rank == 0: + return super().state_dict( + destination=destination, prefix=prefix, keep_vars=keep_vars + ) + else: + # We must call state_dict() due to use of communication + # primitives. But we don't use the result. + super().state_dict() + return destination or {} + + def load_state_dict(self, state_dict, strict=True, model_cfg=None): + if self.use_sharded_state: + return super().load_local_state_dict(state_dict, strict=strict) + else: + state_dict = dist_utils.broadcast_object( + state_dict, src_rank=0, group=self.process_group + ) + return super().load_state_dict(state_dict, strict=strict) + + +class DummyProcessGroup: + def __init__(self, rank: int, size: int): + self._rank = rank + self._size = size + + def rank(self) -> int: + return self._rank + + def size(self) -> int: + return self._size + + +@contextlib.contextmanager +def fsdp_enable_wrap(cfg: DistributedTrainingConfig): + try: + from fairscale.nn import enable_wrap + except ImportError: + raise ImportError( + "Cannot find FullyShardedDataParallel. " + "Please install fairscale with: pip install fairscale" + ) + if cfg.memory_efficient_fp16: + assert cfg.fp16 # memory_efficient_fp16 should imply fp16 + group = dist_utils.get_data_parallel_group() + if group is None and cfg.distributed_world_size == 1: + group = DummyProcessGroup(rank=0, size=1) + fsdp_config = { + "process_group": group, + "reshard_after_forward": not cfg.no_reshard_after_forward, + "mixed_precision": cfg.fp16 and not cfg.memory_efficient_fp16, + "fp32_reduce_scatter": cfg.fp32_reduce_scatter, + "flatten_parameters": not cfg.not_fsdp_flatten_parameters, + "cpu_offload": cfg.cpu_offload, + "compute_dtype": torch.float16 if cfg.fp16 else torch.float32, + "bucket_cap_mb": cfg.bucket_cap_mb, + "state_dict_device": torch.device("cpu"), # reduce GPU mem usage + } + with enable_wrap( + wrapper_cls=FullyShardedDataParallel, + use_sharded_state=cfg.use_sharded_state, + **fsdp_config, + ): + yield + + +def fsdp_wrap(module, min_num_params: Optional[int] = None, **kwargs): + """ + Helper to wrap layers/modules in FSDP. This falls back to a no-op if + fairscale is not available. + + Args: + module (nn.Module): module to (maybe) wrap + min_num_params (int, Optional): minimum number of layer params to wrap + """ + try: + from fairscale.nn import wrap + + if min_num_params is not None: + num_params = sum(p.numel() for p in module.parameters()) + if num_params >= min_num_params: + return wrap(module, **kwargs) + else: + return module + else: + return wrap(module, **kwargs) + except ImportError: + return module diff --git a/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py b/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py new file mode 100644 index 0000000..cd434c7 --- /dev/null +++ b/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py @@ -0,0 +1,165 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +A modified version of the legacy DistributedDataParallel module that uses c10d +communication primitives. This version is simpler than the latest PyTorch +version and is useful for debugging. Notably it does not overlap gradient +communication with the backward pass, which makes it slower but more robust +than the PyTorch version. + +This version also supports the *no_sync* context manager, which allows faster +training with `--update-freq`. +""" + +from collections import OrderedDict +from contextlib import contextmanager + +import torch +from torch import nn + +from fairseq.distributed import utils + + +class LegacyDistributedDataParallel(nn.Module): + """Implements distributed data parallelism at the module level. + + A simplified version of :class:`torch.nn.parallel.DistributedDataParallel`. + This version uses a c10d process group for communication and does not + broadcast buffers. + + Args: + module (~torch.nn.Module): module to be parallelized + process_group: the c10d process group to be used for distributed data + parallel all-reduction. + buffer_size (int, optional): number of elements to buffer before + performing all-reduce (default: 256M). + """ + + def __init__(self, module, process_group, buffer_size=2**28): + super().__init__() + + self.module = module + self.process_group = process_group + self.world_size = utils.get_world_size(self.process_group) + + # Never use a bigger buffer than the number of model params + self.buffer_size = min(buffer_size, sum(p.numel() for p in module.parameters())) + self.buffer = None + + # We can also forcibly accumulate grads locally and only do the + # all-reduce at some later time + self.accumulate_grads = False + + # make per-device lists of parameters + paramlists = OrderedDict() + for param in self.module.parameters(): + device = param.device + if paramlists.get(device) is None: + paramlists[device] = [] + paramlists[device] += [param] + self.per_device_params = list(paramlists.values()) + + @contextmanager + def no_sync(self): + """A context manager to disable gradient synchronization.""" + old_accumulate_grads = self.accumulate_grads + self.accumulate_grads = True + yield + self.accumulate_grads = old_accumulate_grads + + def forward(self, *inputs, **kwargs): + return self.module(*inputs, **kwargs) + + def all_reduce_grads(self): + """ + This function must be called explicitly after backward to reduce + gradients. There is no automatic hook like c10d. + """ + + def all_reduce_params(params): + buffer = self.buffer + nonzero_buffer = False + if len(params) > 1: + offset = 0 + for p in params: + sz = p.numel() + if p.grad is not None: + buffer[offset : offset + sz].copy_(p.grad.data.view(-1)) + nonzero_buffer = True + else: + buffer[offset : offset + sz].zero_() + offset += sz + else: + # we only have a single grad to all-reduce + p = params[0] + if p.grad is not None: + buffer = p.grad.data + nonzero_buffer = True + elif p.numel() <= self.buffer.numel(): + buffer = buffer[: p.numel()] + buffer.zero_() + else: + buffer = torch.zeros_like(p) + + if nonzero_buffer: + buffer.div_(self.world_size) + + utils.all_reduce(buffer, self.process_group) + + # copy all-reduced grads back into their original place + offset = 0 + for p in params: + sz = p.numel() + if p.grad is not None: + p.grad.data.copy_(buffer[offset : offset + sz].view_as(p)) + else: + p.grad = buffer[offset : offset + sz].view_as(p).clone() + offset += sz + + def reduction_fn(): + # This function only needs to be called once + if self.accumulate_grads: + return + + if self.buffer is None: + self.buffer = next(self.module.parameters()).new(self.buffer_size) + + for params in self.per_device_params: + # All-reduce the gradients in buckets + offset = 0 + buffered_params = [] + for param in params: + if not param.requires_grad: + continue + if param.grad is None: + param.grad = torch.zeros_like(param) + + if hasattr(param, "expert"): + # Skip gradient sync for unshared parameters + continue + + if param.grad.requires_grad: + raise RuntimeError( + "DistributedDataParallel only works " + "with gradients that don't require " + "grad" + ) + sz = param.numel() + if sz > self.buffer.numel(): + # all-reduce big params directly + all_reduce_params([param]) + else: + if offset + sz > self.buffer.numel(): + all_reduce_params(buffered_params) + offset = 0 + buffered_params.clear() + buffered_params.append(param) + offset += sz + + if len(buffered_params) > 0: + all_reduce_params(buffered_params) + + reduction_fn() diff --git a/fairseq/fairseq/distributed/module_proxy_wrapper.py b/fairseq/fairseq/distributed/module_proxy_wrapper.py new file mode 100644 index 0000000..904dc0c --- /dev/null +++ b/fairseq/fairseq/distributed/module_proxy_wrapper.py @@ -0,0 +1,56 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from torch import nn + + +class ModuleProxyWrapper(nn.Module): + """ + Wrap a DistributedDataParallel module and forward requests for missing + attributes to the module wrapped by DDP (the twice-wrapped module). + Also forward calls to :func:`state_dict` and :func:`load_state_dict`. + + Usage:: + + module.xyz = "hello world" + wrapped_module = DistributedDataParallel(module, **ddp_args) + wrapped_module = ModuleProxyWrapper(wrapped_module) + assert wrapped_module.xyz == "hello world" + assert wrapped_module.state_dict().keys() == module.state_dict().keys() + + Args: + module (nn.Module): module to wrap + """ + + def __init__(self, module: nn.Module): + super().__init__() + assert hasattr( + module, "module" + ), "ModuleProxyWrapper expects input to wrap another module" + self.module = module + + def __getattr__(self, name): + """Forward missing attributes to twice-wrapped module.""" + try: + # defer to nn.Module's logic + return super().__getattr__(name) + except AttributeError: + try: + # forward to the once-wrapped module + return getattr(self.module, name) + except AttributeError: + # forward to the twice-wrapped module + return getattr(self.module.module, name) + + def state_dict(self, *args, **kwargs): + """Forward to the twice-wrapped module.""" + return self.module.module.state_dict(*args, **kwargs) + + def load_state_dict(self, *args, **kwargs): + """Forward to the twice-wrapped module.""" + return self.module.module.load_state_dict(*args, **kwargs) + + def forward(self, *args, **kwargs): + return self.module(*args, **kwargs) diff --git a/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py b/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py new file mode 100644 index 0000000..3b9e103 --- /dev/null +++ b/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py @@ -0,0 +1,43 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from torch import nn + +from fairseq.distributed import utils + + +class TPUDistributedDataParallel(nn.Module): + def __init__(self, module, process_group): + super().__init__() + self.module = module + self.process_group = process_group + self.world_size = utils.get_world_size(self.process_group) + + def forward(self, *inputs, **kwargs): + return self.module(*inputs, **kwargs) + + def all_reduce_grads(self): + gradients = [] + for p in self.parameters(): + if not p.requires_grad: + continue + if p.grad is None: + p.grad = torch.zeros_like(p) + if p.grad.requires_grad: + raise RuntimeError( + "TPUDistributedDataParallel only works with gradients that don't " + "require grad" + ) + gradients.append(p.grad) + + import torch_xla.core.xla_model as xm + + xm.all_reduce( + "sum", + gradients, + scale=1.0 / self.world_size, + groups=self.process_group[1], + ) diff --git a/fairseq/fairseq/distributed/utils.py b/fairseq/fairseq/distributed/utils.py new file mode 100644 index 0000000..968830d --- /dev/null +++ b/fairseq/fairseq/distributed/utils.py @@ -0,0 +1,843 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import io +import logging +import os +import pickle +import random +import socket +import struct +import subprocess +import warnings +from argparse import Namespace +from collections import OrderedDict +from dataclasses import dataclass +from typing import Any, Dict, List, Mapping, Optional + +import torch +import torch.distributed as dist +from fairseq.dataclass.configs import DistributedTrainingConfig, FairseqConfig +from omegaconf import open_dict + +try: + import torch_xla.core.xla_model as xm +except ImportError: + xm = None + + +# Flag to indicate if we're using Megatron +# NOTE: this is a temporary hack until we move away from Megatron's model parallel init +_USE_MEGATRON = False + +# Whether to use XLA ops (e.g., on TPUs) instead of CUDA ops. +_USE_XLA = False + + +logger = logging.getLogger(__name__) + + +def is_master(cfg: DistributedTrainingConfig): + return cfg.distributed_rank == 0 + + +def infer_init_method(cfg: DistributedTrainingConfig, force_distributed=False): + if cfg.distributed_init_method is not None or cfg.tpu: + return + + num_pipelines_per_node = None + if cfg.pipeline_model_parallel: + num_pipeline_devices, num_pipelines_per_node = _pipeline_parallel_pre_init(cfg) + + if cfg.distributed_world_size == 1: + return + if all( + key in os.environ + for key in ["MASTER_ADDR", "MASTER_PORT", "WORLD_SIZE", "RANK"] + ): + # support torch.distributed.launch + _infer_torch_distributed_launch_init(cfg) + else: + # we can determine the init method automatically for Slurm + if not _infer_slurm_init(cfg, num_pipelines_per_node): + if cfg.distributed_port <= 0 or force_distributed: + _infer_single_node_init(cfg) + elif cfg.distributed_port <= 0: + _infer_single_node_init(cfg) + + if cfg.pipeline_model_parallel: + _pipeline_parallel_post_init(cfg, num_pipeline_devices, num_pipelines_per_node) + elif not cfg.distributed_no_spawn: + with open_dict(cfg): + cfg.distributed_num_procs = min( + torch.cuda.device_count(), cfg.distributed_world_size + ) + else: + if cfg.device_id > 0: + logger.info( + "setting CUDA device={} on rank {}".format( + cfg.device_id, cfg.distributed_rank + ) + ) + torch.cuda.set_device(cfg.device_id) + + +def _infer_torch_distributed_launch_init(cfg: DistributedTrainingConfig): + cfg.distributed_init_method = "env://" + cfg.distributed_world_size = int(os.environ["WORLD_SIZE"]) + cfg.distributed_rank = int(os.environ["RANK"]) + cfg.device_id = cfg.distributed_rank % torch.cuda.device_count() + # processes are created by torch.distributed.launch + cfg.distributed_no_spawn = True + + +def _infer_slurm_init(cfg: DistributedTrainingConfig, num_pipelines_per_node): + node_list = os.environ.get("SLURM_STEP_NODELIST") + if node_list is None: + node_list = os.environ.get("SLURM_JOB_NODELIST") + if node_list is not None: + try: + hostnames = subprocess.check_output( + ["scontrol", "show", "hostnames", node_list] + ) + cfg.distributed_init_method = "tcp://{host}:{port}".format( + host=hostnames.split()[0].decode("utf-8"), + port=cfg.distributed_port, + ) + nnodes = int(os.environ.get("SLURM_NNODES")) + ntasks_per_node = os.environ.get("SLURM_NTASKS_PER_NODE") + if ntasks_per_node is not None: + ntasks_per_node = int(ntasks_per_node) + else: + ntasks = int(os.environ.get("SLURM_NTASKS")) + nnodes = int(os.environ.get("SLURM_NNODES")) + assert ntasks % nnodes == 0 + ntasks_per_node = int(ntasks / nnodes) + if ntasks_per_node == 1: + gpus_per_node = torch.cuda.device_count() + node_id = int(os.environ.get("SLURM_NODEID")) + cfg.distributed_rank = node_id * gpus_per_node + cfg.distributed_world_size = nnodes * gpus_per_node + elif cfg.pipeline_model_parallel: + assert ntasks_per_node == num_pipelines_per_node, ( + "SLURM --ntasks-per-node must match number of pipelines per " + "node (={})".format(num_pipelines_per_node) + ) + cfg.distributed_no_spawn = True + # For 4-way MP on nodes with 8 GPUs, ranks will be [0, 1] on + # the first node, [1, 2] on the second node, etc. This + # matches torch.distributed.launch. + node_id = int(os.environ.get("SLURM_NODEID")) + local_id = int(os.environ.get("SLURM_LOCALID")) + cfg.distributed_rank = node_id * num_pipelines_per_node + local_id + # In the above example, device_id will always be in [0, 1], + # which also matches torch.distributed.launch. + cfg.device_id = local_id + # We also want to set distributed_world_size to be the total + # number of pipelines across all nodes. + cfg.distributed_world_size = nnodes * num_pipelines_per_node + else: + assert ( + ntasks_per_node == cfg.distributed_world_size // nnodes + ), f"{ntasks_per_node}, {cfg.distributed_world_size}, {nnodes}" + cfg.distributed_no_spawn = True + cfg.distributed_rank = int(os.environ.get("SLURM_PROCID")) + cfg.device_id = int(os.environ.get("SLURM_LOCALID")) + logger.info(f"Rank {cfg.distributed_rank}, device_id: {cfg.device_id}") + return True + except subprocess.CalledProcessError as e: # scontrol failed + raise e + except FileNotFoundError: # Slurm is not installed + pass + + return False + + +def _infer_single_node_init(cfg: DistributedTrainingConfig): + assert ( + cfg.distributed_world_size <= torch.cuda.device_count() + ), f"world size is {cfg.distributed_world_size} but have {torch.cuda.device_count()} available devices" + + if cfg.distributed_port <= 0: + jobid = os.environ.get("SLURM_JOB_ID") + task_id = os.environ.get("SLURM_ARRAY_TASK_ID") + + if jobid is not None: + if task_id is not None: + jobid += str(task_id) + jobid = int(jobid) + rng = random.Random(jobid) + port = rng.randint(10000, 60000) + else: + port = random.randint(10000, 60000) + + cfg.distributed_port = port + cfg.distributed_init_method = "tcp://localhost:{port}".format( + port=cfg.distributed_port + ) + + +def _pipeline_parallel_pre_init(cfg: DistributedTrainingConfig): + from fairseq import utils + + balance_exists = ( + cfg.pipeline_balance is not None + or cfg.pipeline_encoder_balance is not None + or cfg.pipeline_decoder_balance is not None + ) + devices_exist = ( + cfg.pipeline_devices is not None + or cfg.pipeline_encoder_devices is not None + or cfg.pipeline_decoder_devices is not None + ) + if not balance_exists: + raise ValueError( + "--pipeline-balance is currently required for pipeline model parallelism" + ) + if not devices_exist: + raise ValueError( + "--pipeline-devices is currently required for pipeline model parallelism" + ) + + cfg.pipeline_balance = utils.eval_str_list(cfg.pipeline_balance, type=int) + if cfg.pipeline_devices is not None: + cfg.pipeline_devices = utils.eval_str_list(cfg.pipeline_devices, type=int) + num_pipeline_devices = len(set(cfg.pipeline_devices)) + else: + cfg.pipeline_encoder_devices = utils.eval_str_list( + cfg.pipeline_encoder_devices, type=int + ) + cfg.pipeline_decoder_devices = utils.eval_str_list( + cfg.pipeline_decoder_devices, type=int + ) + num_pipeline_devices = len( + set(cfg.pipeline_encoder_devices + cfg.pipeline_decoder_devices) + ) + gpus_per_node = torch.cuda.device_count() + assert ( + gpus_per_node >= num_pipeline_devices + and gpus_per_node % num_pipeline_devices == 0 + ), ( + "the number of unique device IDs in --pipeline-devices must evenly divide " + "the number of GPUs per node (multi-node pipelining is not yet supported)" + ) + num_pipelines_per_node = gpus_per_node // num_pipeline_devices + return num_pipeline_devices, num_pipelines_per_node + + +def _pipeline_parallel_post_init( + cfg: DistributedTrainingConfig, num_pipeline_devices, num_pipelines_per_node +): + if not cfg.distributed_no_spawn: + # When distributed_no_spawn is False, we expect distributed_rank and + # distributed_world_size to be based on the total number of GPUs, so + # we need to correct them to be based on the number of pipelines. + assert cfg.distributed_world_size % num_pipeline_devices == 0 + cfg.distributed_world_size = cfg.distributed_world_size // num_pipeline_devices + # In the case of 4-way MP on nodes with 8 GPUs, we want + # distributed_rank to be the starting GPU index for each pipeline + # i.e., 0, 2, ... + gpus_per_node = torch.cuda.device_count() + assert cfg.distributed_rank % gpus_per_node == 0 + assert cfg.distributed_rank % num_pipeline_devices == 0 + + with open_dict(cfg): + cfg.distributed_rank = cfg.distributed_rank // num_pipeline_devices + # launch one process per pipeline + cfg.distributed_num_procs = num_pipelines_per_node + + # if we have 4-way MP on a node with 8 GPUs, we want device_ids to be 0 + # and 4, indicating the starting device IDs for each pipeline + cfg.device_id *= num_pipeline_devices + + if cfg.device_id > 0: + # if there's multiple pipelines on a node (e.g., 4-way MP on an 8 + # GPU node), we need to adjust pipeline_devices accordingly + logger.debug( + "setting CUDA device={} on rank {}".format( + cfg.device_id, cfg.distributed_rank + ) + ) + torch.cuda.set_device(cfg.device_id) + with open_dict(cfg): + cfg.pipeline_devices = [cfg.device_id + d for d in cfg.pipeline_devices] + logger.info( + "setting pipeline_devices={} on rank {}".format( + cfg.pipeline_devices, cfg.distributed_rank + ) + ) + + +def distributed_init(cfg: FairseqConfig): + if isinstance(cfg, Namespace): + from fairseq.dataclass.utils import convert_namespace_to_omegaconf + + cfg = convert_namespace_to_omegaconf(cfg) + + if not cfg.common.tpu: + if torch.distributed.is_available() and torch.distributed.is_initialized(): + warnings.warn( + "Distributed is already initialized, cannot initialize twice!" + ) + else: + logger.info( + "distributed init (rank {}): {}".format( + cfg.distributed_training.distributed_rank, + cfg.distributed_training.distributed_init_method, + ) + ) + dist.init_process_group( + backend=cfg.distributed_training.distributed_backend, + init_method=cfg.distributed_training.distributed_init_method, + world_size=cfg.distributed_training.distributed_world_size, + rank=cfg.distributed_training.distributed_rank, + ) + logger.info( + "initialized host {} as rank {}".format( + socket.gethostname(), + cfg.distributed_training.distributed_rank, + ) + ) + + # perform a dummy all-reduce to initialize the NCCL communicator + if torch.cuda.is_available(): + dist.all_reduce(torch.zeros(1).cuda()) + + cfg.distributed_training.distributed_rank = torch.distributed.get_rank() + else: + assert xm.xrt_world_size() == cfg.distributed_training.distributed_world_size + global _USE_XLA + _USE_XLA = True + cfg.distributed_training.device_id = xm.get_local_ordinal() + cfg.distributed_training.distributed_rank = xm.get_ordinal() + xm.rendezvous("distributed_init") # wait for all workers + + if is_master(cfg.distributed_training): + logging.getLogger().setLevel(logging.INFO) + else: + logging.getLogger().setLevel(logging.WARNING) + + if cfg.common.model_parallel_size > 1: + try: + from fairseq.model_parallel.megatron.mpu import ( + initialize_model_parallel, + model_parallel_cuda_manual_seed, + ) + except ImportError: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + global _USE_MEGATRON + _USE_MEGATRON = True + initialize_model_parallel(cfg.common.model_parallel_size) + model_parallel_cuda_manual_seed(cfg.common.seed) + model_part_number = get_model_parallel_rank() + cfg.checkpoint.checkpoint_suffix += "-model_part-{0}".format(model_part_number) + + if hasattr(cfg, "model") and getattr(cfg.model, "base_layers", 0) > 0: + cfg.checkpoint.checkpoint_suffix = ( + f"-rank-{cfg.distributed_training.distributed_rank}" + ) + + return cfg.distributed_training.distributed_rank + + +def distributed_main(i, main, cfg: FairseqConfig, kwargs): + cfg.distributed_training.device_id = i + if torch.cuda.is_available() and not cfg.common.cpu and not cfg.common.tpu: + torch.cuda.set_device(cfg.distributed_training.device_id) + if cfg.distributed_training.distributed_rank is None: # torch.multiprocessing.spawn + cfg.distributed_training.distributed_rank = kwargs.pop("start_rank", 0) + i + + cfg.distributed_training.distributed_rank = distributed_init(cfg) + + after_distributed_init_fn = kwargs.pop("after_distributed_init_fn", None) + if after_distributed_init_fn: + cfg = after_distributed_init_fn(cfg) + + main(cfg, **kwargs) + + if torch.distributed.is_initialized(): + torch.distributed.barrier(get_global_group()) + + +def call_main(cfg: FairseqConfig, main, **kwargs): + if cfg.distributed_training.distributed_init_method is None: + infer_init_method(cfg.distributed_training) + + if cfg.distributed_training.distributed_init_method is not None: + # distributed training + if not cfg.distributed_training.distributed_no_spawn: + start_rank = cfg.distributed_training.distributed_rank + cfg.distributed_training.distributed_rank = None # assign automatically + kwargs["start_rank"] = start_rank + + torch.multiprocessing.spawn( + fn=distributed_main, + args=(main, cfg, kwargs), + nprocs=min( + torch.cuda.device_count(), + cfg.distributed_training.distributed_world_size, + ), + join=True, + ) + else: + distributed_main(cfg.distributed_training.device_id, main, cfg, kwargs) + elif cfg.common.tpu and cfg.distributed_training.distributed_world_size > 1: + import torch_xla.distributed.xla_multiprocessing as xmp + + torch.multiprocessing.set_sharing_strategy("file_system") + xmp.spawn( + fn=distributed_main, + args=(main, cfg, kwargs), + # tpu-comment: + # 8 devices in one TPU VM, is the max processes to be spawned. + # The rest is driven by xm.distributed.xla_dist + nprocs=min(cfg.distributed_training.distributed_world_size, 8), + ) + else: + # single GPU main + main(cfg, **kwargs) + + +def use_xla(): + global _USE_XLA + return _USE_XLA + + +def new_groups(grouped_ranks: List[List[int]]): + if use_xla(): + return ("tpu", grouped_ranks) + else: + groups = [dist.new_group(g) for g in grouped_ranks] + my_group_idx = _find_my_group_index(grouped_ranks) + return groups[my_group_idx] + + +def _find_my_group_index(grouped_ranks): + my_rank = get_global_rank() + for i, group in enumerate(grouped_ranks): + if my_rank in group: + return i + raise RuntimeError + + +def _find_my_group(grouped_ranks): + index = _find_my_group_index(grouped_ranks) + return grouped_ranks[index] + + +def get_rank(group): + if use_xla(): + assert group[0] == "tpu" + my_group = _find_my_group(group[1]) + return my_group.index(get_global_rank()) + else: + return dist.get_rank(group=group) + + +def get_world_size(group): + if use_xla(): + assert group[0] == "tpu" + my_group = _find_my_group(group[1]) + return len(my_group) + elif torch.distributed.is_initialized(): + return dist.get_world_size(group=group) + else: + return 1 + + +def get_global_group(): + if use_xla(): + return new_groups([list(range(get_global_world_size()))]) + elif torch.distributed.is_initialized(): + if not hasattr(get_global_group, "_global_group"): + # ideally we could use torch.distributed.group.WORLD, but it seems + # to cause random NCCL hangs in some cases + get_global_group._global_group = dist.new_group() + return get_global_group._global_group + else: + return None + + +def get_global_rank(): + if use_xla(): + return xm.get_ordinal() + elif torch.distributed.is_initialized(): + return torch.distributed.get_rank() + else: + return 0 + + +def get_global_world_size(): + if use_xla(): + return xm.xrt_world_size() + elif torch.distributed.is_initialized(): + return torch.distributed.get_world_size() + else: + return 1 + + +def get_data_parallel_group(): + """Get the data parallel group the caller rank belongs to.""" + global _USE_MEGATRON + if _USE_MEGATRON: + from fairseq.model_parallel.megatron import mpu + + return mpu.get_data_parallel_group() + else: + return get_global_group() + + +def get_data_parallel_rank(): + """Return my rank for the data parallel group.""" + return get_rank(get_data_parallel_group()) + + +def get_data_parallel_world_size(): + """Return world size for the data parallel group.""" + return get_world_size(get_data_parallel_group()) + + +def get_model_parallel_group(): + global _USE_MEGATRON + if _USE_MEGATRON: + from fairseq.model_parallel.megatron import mpu + + return mpu.get_model_parallel_group() + else: + return None + + +def get_model_parallel_rank(): + """Return my rank for the model parallel group.""" + return get_rank(get_model_parallel_group()) + + +def get_model_parallel_world_size(): + """Return world size for the model parallel group.""" + return get_world_size(get_model_parallel_group()) + + +def all_reduce(tensor, group, op="sum"): + if use_xla(): + assert isinstance(group, tuple) and group[0] == "tpu" + tensor = [tensor] # wrap in a list to make xm.all_reduce in-place + return xm.all_reduce(op, tensor, groups=group[1])[0] + else: + if op == "sum": + op = dist.ReduceOp.SUM + elif op == "max": + op = dist.ReduceOp.MAX + else: + raise NotImplementedError + dist.all_reduce(tensor, op=op, group=group) + return tensor + + +def broadcast(tensor, src, group): + if use_xla(): + # XLA doesn't support broadcast, hack it with all_reduce + if get_rank(group) != src: + tensor.zero_() + all_reduce(tensor, group) + else: + dist.broadcast(tensor, src=src, group=group) + + +def all_to_all(tensor, group): + """Perform an all-to-all operation on a 1D Tensor.""" + assert tensor.dim() == 1 + split_count = get_world_size(group=group) + assert tensor.numel() % split_count == 0 + if use_xla(): + assert isinstance(group, tuple) and group[0] == "tpu" + return xm.all_to_all( + tensor, + split_dimension=0, + concat_dimension=0, + split_count=split_count, + groups=group[1], + ) + else: + output = torch.zeros_like(tensor) + dist.all_to_all_single(output, tensor, group=group) + return output + + +def all_gather(tensor, group, return_tensor=False): + """Perform an all-gather operation.""" + if use_xla(): + result = xm.all_gather(tensor, groups=group[1]) + world_size = get_world_size(group=group) + result = result.view(world_size, *tensor.size()) + if return_tensor: + return result + else: + return [result[i] for i in range(world_size)] + else: + world_size = get_world_size(group=group) + rank = get_rank(group=group) + tensor_list = [ + tensor if i == rank else torch.empty_like(tensor) for i in range(world_size) + ] + dist.all_gather(tensor_list, tensor, group=group) + if return_tensor: + return torch.stack(tensor_list, dim=0) + else: + return tensor_list + + +def all_gather_list(data, group=None, max_size=16384): + """Gathers arbitrary data from all nodes into a list. + + Similar to :func:`~torch.distributed.all_gather` but for arbitrary Python + data. Note that *data* must be picklable and any CUDA tensors will be moved + to CPU and returned on CPU as well. + + Args: + data (Any): data from the local worker to be gathered on other workers + group: group of the collective + max_size (int, optional): maximum size of the data to be gathered + across workers + """ + from fairseq import utils + + if group is None: + group = get_global_group() + rank = get_rank(group=group) + world_size = get_world_size(group=group) + + buffer_size = max_size * world_size + if ( + not hasattr(all_gather_list, "_buffer") + or all_gather_list._buffer.numel() < buffer_size + ): + all_gather_list._buffer = torch.cuda.ByteTensor(buffer_size) + all_gather_list._cpu_buffer = torch.ByteTensor(max_size).pin_memory() + buffer = all_gather_list._buffer + buffer.zero_() + cpu_buffer = all_gather_list._cpu_buffer + + data = utils.move_to_cpu(data) + enc = pickle.dumps(data) + enc_size = len(enc) + header_size = 4 # size of header that contains the length of the encoded data + size = header_size + enc_size + if size > max_size: + raise ValueError( + "encoded data size ({}) exceeds max_size ({})".format(size, max_size) + ) + + header = struct.pack(">I", enc_size) + cpu_buffer[:size] = torch.ByteTensor(list(header + enc)) + start = rank * max_size + buffer[start : start + size].copy_(cpu_buffer[:size]) + + all_reduce(buffer, group=group) + + buffer = buffer.cpu() + try: + result = [] + for i in range(world_size): + out_buffer = buffer[i * max_size : (i + 1) * max_size] + (enc_size,) = struct.unpack(">I", bytes(out_buffer[:header_size].tolist())) + if enc_size > 0: + result.append( + pickle.loads( + bytes(out_buffer[header_size : header_size + enc_size].tolist()) + ) + ) + return result + except pickle.UnpicklingError: + raise Exception( + "Unable to unpickle data from other workers. all_gather_list requires all " + "workers to enter the function together, so this error usually indicates " + "that the workers have fallen out of sync somehow. Workers can fall out of " + "sync if one of them runs out of memory, or if there are other conditions " + "in your training script that can cause one worker to finish an epoch " + "while other workers are still iterating over their portions of the data. " + "Try rerunning with --ddp-backend=legacy_ddp and see if that helps." + ) + + +def all_reduce_dict(data: Mapping[str, Any], device, group) -> Dict[str, Any]: + """ + AllReduce a dictionary of values across workers. We separately + reduce items that are already on the device and items on CPU for + better performance. + + Args: + data (Mapping[str, Any]): dictionary of data to all-reduce, but + cannot be a nested dictionary + device (torch.device): device for the reduction + group: group of the collective + """ + data_keys = list(data.keys()) + + # We want to separately reduce items that are already on the + # device and items on CPU for performance reasons. + cpu_data = OrderedDict() + device_data = OrderedDict() + for k in data_keys: + t = data[k] + if not torch.is_tensor(t): + cpu_data[k] = torch.tensor(t, dtype=torch.double) + elif t.device.type != device.type: + cpu_data[k] = t.to(dtype=torch.double) + else: + device_data[k] = t.to(dtype=torch.double) + + def _all_reduce_dict(data: OrderedDict): + if len(data) == 0: + return data + buf = torch.cat([t.view(-1) for t in data.values()]).to(device=device) + all_reduce(buf, group=group) + split_buf = torch.split(buf.clone(), [t.numel() for t in data.values()]) + reduced_data = [t.view_as(orig) for t, orig in zip(split_buf, data.values())] + return OrderedDict(zip(data.keys(), reduced_data)) + + cpu_data = _all_reduce_dict(cpu_data) + device_data = _all_reduce_dict(device_data) + + def get_from_stack(key): + if key in cpu_data: + return cpu_data[key] + elif key in device_data: + return device_data[key] + raise KeyError + + return OrderedDict([(key, get_from_stack(key)) for key in data_keys]) + + +def broadcast_tensors( + tensors: Optional[List[torch.Tensor]], + src_rank: int, + group: object, + dist_device: Optional[torch.device] = None, +) -> List[torch.Tensor]: + """ + Broadcasts a list of tensors without other (non-src) ranks needing to know + the dtypes/shapes of the tensors. + """ + if dist_device is None: + if torch.distributed.get_backend(group) == "nccl": + dist_device = torch.device("cuda") + else: + dist_device = torch.device("cpu") + + # share metadata first to simplify transfer + is_src_rank = get_rank(group) == src_rank + if is_src_rank: + metadata = [ + {"size": t.size(), "dtype": t.dtype, "device": t.device} for t in tensors + ] + metadata = _broadcast_object_slow(metadata, src_rank, group, dist_device) + else: + metadata = _broadcast_object_slow(None, src_rank, group, dist_device) + + out_tensors = [] + for i, meta in enumerate(metadata): + if is_src_rank: + tensor = tensors[i] + broadcast(tensors[i].to(dist_device), src=src_rank, group=group) + else: + tensor = torch.zeros( + [meta["size"].numel()], dtype=meta["dtype"], device=dist_device + ) + broadcast(tensor, src=src_rank, group=group) + tensor = tensor.view(meta["size"]).to(meta["device"]) + out_tensors.append(tensor) + return out_tensors + + +def broadcast_object( + obj: Any, + src_rank: int, + group: object, + dist_device: Optional[torch.device] = None, +) -> Any: + """Broadcast an arbitrary Python object to other workers.""" + if dist_device is None: + if torch.distributed.get_backend(group) == "nccl": + dist_device = torch.device("cuda") + else: + dist_device = torch.device("cpu") + + if get_rank(group) == src_rank: + # split the tensors from the non-tensors so we can broadcast them + # directly, avoiding unnecessary serialization/deserialization + tensors = [] + obj = _split_tensors_from_obj(obj, tensors) + obj = _broadcast_object_slow(obj, src_rank, group, dist_device) + tensors = broadcast_tensors(tensors, src_rank, group, dist_device) + else: + obj = _broadcast_object_slow(None, src_rank, group, dist_device) + tensors = broadcast_tensors(None, src_rank, group, dist_device) + return _put_tensors_in_obj(obj, tensors) + + +def _broadcast_object_slow( + obj: Any, + src_rank: int, + group: object, + dist_device: torch.device, +) -> Any: + if get_rank(group) == src_rank: + # Emit data + buffer = io.BytesIO() + torch.save(obj, buffer) + buffer = torch.ByteTensor(buffer.getbuffer()).to(dist_device) + length = torch.LongTensor([len(buffer)]).to(dist_device) + broadcast(length, src=src_rank, group=group) + broadcast(buffer, src=src_rank, group=group) + else: + # Fetch from the source + length = torch.LongTensor([0]).to(dist_device) + broadcast(length, src=src_rank, group=group) + buffer = torch.ByteTensor(int(length.item())).to(dist_device) + broadcast(buffer, src=src_rank, group=group) + buffer = io.BytesIO(buffer.cpu().numpy()) + obj = torch.load(buffer, map_location="cpu") + return obj + + +@dataclass(frozen=True) +class _TensorPlaceholder: + index: int + + +def _split_tensors_from_obj(obj: Any, tensors: List[torch.Tensor]) -> Any: + if torch.is_tensor(obj): + placeholder = _TensorPlaceholder(index=len(tensors)) + tensors.append(obj) + return placeholder + elif isinstance(obj, dict): + return {k: _split_tensors_from_obj(v, tensors) for k, v in obj.items()} + elif isinstance(obj, list): + return [_split_tensors_from_obj(v, tensors) for v in obj] + elif isinstance(obj, tuple): + return tuple(_split_tensors_from_obj(v, tensors) for v in obj) + elif isinstance(obj, set): + return {_split_tensors_from_obj(v, tensors) for v in obj} + else: + return obj + + +def _put_tensors_in_obj(obj: Any, tensors: List[torch.Tensor]) -> Any: + if isinstance(obj, _TensorPlaceholder): + return tensors[obj.index] + elif isinstance(obj, dict): + return {k: _put_tensors_in_obj(v, tensors) for k, v in obj.items()} + elif isinstance(obj, list): + return [_put_tensors_in_obj(v, tensors) for v in obj] + elif isinstance(obj, tuple): + return tuple(_put_tensors_in_obj(v, tensors) for v in obj) + elif isinstance(obj, set): + return {_put_tensors_in_obj(v, tensors) for v in obj} + else: + return obj diff --git a/fairseq/fairseq/file_chunker_utils.py b/fairseq/fairseq/file_chunker_utils.py new file mode 100644 index 0000000..3f27549 --- /dev/null +++ b/fairseq/fairseq/file_chunker_utils.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import typing as tp + + +def _safe_readline(fd) -> str: + pos = fd.tell() + while True: + try: + return fd.readline() + except UnicodeDecodeError: + pos -= 1 + fd.seek(pos) # search where this character begins + + +def find_offsets(filename: str, num_chunks: int) -> tp.List[int]: + """ + given a file and a number of chuncks, find the offsets in the file + to be able to chunk around full lines. + """ + with open(filename, "r", encoding="utf-8") as f: + size = os.fstat(f.fileno()).st_size + chunk_size = size // num_chunks + offsets = [0 for _ in range(num_chunks + 1)] + for i in range(1, num_chunks): + f.seek(chunk_size * i) + _safe_readline(f) + offsets[i] = f.tell() + offsets[-1] = size + return offsets + + +class ChunkLineIterator: + """ + Iterator to properly iterate over lines of a file chunck. + """ + + def __init__(self, fd, start_offset: int, end_offset: int): + self._fd = fd + self._start_offset = start_offset + self._end_offset = end_offset + + def __iter__(self) -> tp.Iterable[str]: + self._fd.seek(self._start_offset) + # next(f) breaks f.tell(), hence readline() must be used + line = _safe_readline(self._fd) + while line: + pos = self._fd.tell() + # f.tell() does not always give the byte position in the file + # sometimes it skips to a very large number + # it is unlikely that through a normal read we go from + # end bytes to end + 2**32 bytes (4 GB) and this makes it unlikely + # that the procedure breaks by the undeterministic behavior of + # f.tell() + if ( + self._end_offset > 0 + and pos > self._end_offset + and pos < self._end_offset + 2**32 + ): + break + yield line + line = self._fd.readline() + + +class Chunker: + """ + contextmanager to read a chunck of a file line by line. + """ + + def __init__(self, path: str, start_offset: int, end_offset: int): + self.path = path + self.start_offset = start_offset + self.end_offset = end_offset + + def __enter__(self) -> ChunkLineIterator: + self.fd = open(self.path, "r", encoding="utf-8") + return ChunkLineIterator(self.fd, self.start_offset, self.end_offset) + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + self.fd.close() diff --git a/fairseq/fairseq/file_io.py b/fairseq/fairseq/file_io.py new file mode 100644 index 0000000..8eca70a --- /dev/null +++ b/fairseq/fairseq/file_io.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import shutil +from typing import List, Optional + + +logger = logging.getLogger(__file__) + + +try: + from iopath.common.file_io import g_pathmgr as IOPathManager + + try: + # [FB only - for now] AWS PathHandler for PathManager + from .fb_pathhandlers import S3PathHandler + + IOPathManager.register_handler(S3PathHandler()) + except KeyError: + logging.warning("S3PathHandler already registered.") + except ImportError: + logging.debug( + "S3PathHandler couldn't be imported. Either missing fb-only files, or boto3 module." + ) + +except ImportError: + IOPathManager = None + + +class PathManager: + """ + Wrapper for insulating OSS I/O (using Python builtin operations) from + iopath's PathManager abstraction (for transparently handling various + internal backends). + """ + + @staticmethod + def open( + path: str, + mode: str = "r", + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + ): + if IOPathManager: + return IOPathManager.open( + path=path, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + ) + return open( + path, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + ) + + @staticmethod + def copy(src_path: str, dst_path: str, overwrite: bool = False) -> bool: + if IOPathManager: + return IOPathManager.copy( + src_path=src_path, dst_path=dst_path, overwrite=overwrite + ) + return shutil.copyfile(src_path, dst_path) + + @staticmethod + def get_local_path(path: str, **kwargs) -> str: + if IOPathManager: + return IOPathManager.get_local_path(path, **kwargs) + return path + + @staticmethod + def exists(path: str) -> bool: + if IOPathManager: + return IOPathManager.exists(path) + return os.path.exists(path) + + @staticmethod + def isfile(path: str) -> bool: + if IOPathManager: + return IOPathManager.isfile(path) + return os.path.isfile(path) + + @staticmethod + def ls(path: str) -> List[str]: + if IOPathManager: + return IOPathManager.ls(path) + return os.listdir(path) + + @staticmethod + def mkdirs(path: str) -> None: + if IOPathManager: + return IOPathManager.mkdirs(path) + os.makedirs(path, exist_ok=True) + + @staticmethod + def rm(path: str) -> None: + if IOPathManager: + return IOPathManager.rm(path) + os.remove(path) + + @staticmethod + def chmod(path: str, mode: int) -> None: + if not PathManager.path_requires_pathmanager(path): + os.chmod(path, mode) + + @staticmethod + def register_handler(handler) -> None: + if IOPathManager: + return IOPathManager.register_handler(handler=handler) + + @staticmethod + def copy_from_local( + local_path: str, dst_path: str, overwrite: bool = False, **kwargs + ) -> None: + if IOPathManager: + return IOPathManager.copy_from_local( + local_path=local_path, dst_path=dst_path, overwrite=overwrite, **kwargs + ) + return shutil.copyfile(local_path, dst_path) + + @staticmethod + def path_requires_pathmanager(path: str) -> bool: + """Do we require PathManager to access given path?""" + if IOPathManager: + for p in IOPathManager._path_handlers.keys(): + if path.startswith(p): + return True + return False + + @staticmethod + def supports_rename(path: str) -> bool: + # PathManager doesn't yet support renames + return not PathManager.path_requires_pathmanager(path) + + @staticmethod + def rename(src: str, dst: str): + os.rename(src, dst) + + """ + ioPath async PathManager methods: + """ + + @staticmethod + def opena( + path: str, + mode: str = "r", + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + ): + """ + Return file descriptor with asynchronous write operations. + """ + global IOPathManager + if not IOPathManager: + logging.info("ioPath is initializing PathManager.") + try: + from iopath.common.file_io import PathManager + + IOPathManager = PathManager() + except Exception: + logging.exception("Failed to initialize ioPath PathManager object.") + return IOPathManager.opena( + path=path, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + ) + + @staticmethod + def async_close() -> bool: + """ + Wait for files to be written and clean up asynchronous PathManager. + NOTE: `PathManager.async_close()` must be called at the end of any + script that uses `PathManager.opena(...)`. + """ + global IOPathManager + if IOPathManager: + return IOPathManager.async_close() + return False diff --git a/fairseq/fairseq/file_utils.py b/fairseq/fairseq/file_utils.py new file mode 100644 index 0000000..b99da2e --- /dev/null +++ b/fairseq/fairseq/file_utils.py @@ -0,0 +1,370 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Utilities for working with the local dataset cache. +This file is adapted from `AllenNLP <https://github.com/allenai/allennlp>`_. +and `huggingface <https://github.com/huggingface>`_. +""" + +import fnmatch +import json +import logging +import os +import shutil +import tarfile +import tempfile +from functools import partial, wraps +from hashlib import sha256 +from io import open + + +try: + from torch.hub import _get_torch_home + + torch_cache_home = _get_torch_home() +except ImportError: + torch_cache_home = os.path.expanduser( + os.getenv( + "TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch") + ) + ) +default_cache_path = os.path.join(torch_cache_home, "pytorch_fairseq") + +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + +try: + from pathlib import Path + + PYTORCH_FAIRSEQ_CACHE = Path(os.getenv("PYTORCH_FAIRSEQ_CACHE", default_cache_path)) +except (AttributeError, ImportError): + PYTORCH_FAIRSEQ_CACHE = os.getenv("PYTORCH_FAIRSEQ_CACHE", default_cache_path) + +CONFIG_NAME = "config.json" +WEIGHTS_NAME = "pytorch_model.bin" + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def load_archive_file(archive_file): + # redirect to the cache, if necessary + try: + resolved_archive_file = cached_path(archive_file, cache_dir=None) + except EnvironmentError: + logger.info( + "Archive name '{}' was not found in archive name list. " + "We assumed '{}' was a path or URL but couldn't find any file " + "associated to this path or URL.".format( + archive_file, + archive_file, + ) + ) + return None + + if resolved_archive_file == archive_file: + logger.info("loading archive file {}".format(archive_file)) + else: + logger.info( + "loading archive file {} from cache at {}".format( + archive_file, resolved_archive_file + ) + ) + + # Extract archive to temp dir and replace .tar.bz2 if necessary + tempdir = None + if not os.path.isdir(resolved_archive_file): + tempdir = tempfile.mkdtemp() + logger.info( + "extracting archive file {} to temp dir {}".format( + resolved_archive_file, tempdir + ) + ) + ext = os.path.splitext(archive_file)[1][1:] + with tarfile.open(resolved_archive_file, "r:" + ext) as archive: + top_dir = os.path.commonprefix(archive.getnames()) + archive.extractall(tempdir) + os.remove(resolved_archive_file) + shutil.move(os.path.join(tempdir, top_dir), resolved_archive_file) + shutil.rmtree(tempdir) + + return resolved_archive_file + + +def url_to_filename(url, etag=None): + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the URL's, delimited + by a period. + """ + url_bytes = url.encode("utf-8") + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode("utf-8") + etag_hash = sha256(etag_bytes) + filename += "." + etag_hash.hexdigest() + + return filename + + +def filename_to_url(filename, cache_dir=None): + """ + Return the url and etag (which may be ``None``) stored for `filename`. + Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist. + """ + if cache_dir is None: + cache_dir = PYTORCH_FAIRSEQ_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + cache_path = os.path.join(cache_dir, filename) + if not os.path.exists(cache_path): + raise EnvironmentError("file {} not found".format(cache_path)) + + meta_path = cache_path + ".json" + if not os.path.exists(meta_path): + raise EnvironmentError("file {} not found".format(meta_path)) + + with open(meta_path, encoding="utf-8") as meta_file: + metadata = json.load(meta_file) + url = metadata["url"] + etag = metadata["etag"] + + return url, etag + + +def cached_path_from_pm(url_or_filename): + """ + Tries to cache the specified URL using PathManager class. + Returns the cached path if success otherwise failure. + """ + try: + from fairseq.file_io import PathManager + + local_path = PathManager.get_local_path(url_or_filename) + return local_path + except Exception: + return None + + +def cached_path(url_or_filename, cache_dir=None): + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_FAIRSEQ_CACHE + if isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ("http", "https", "s3"): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == "": + # File, but it doesn't exist. + raise EnvironmentError("file {} not found".format(url_or_filename)) + else: + cached_path = cached_path_from_pm(url_or_filename) + if cached_path: + return cached_path + # Something unknown + raise ValueError( + "unable to parse {} as a URL or as a local path".format(url_or_filename) + ) + + +def split_s3_path(url): + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + + +def s3_request(func): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url, *args, **kwargs): + from botocore.exceptions import ClientError + + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise EnvironmentError("file {} not found".format(url)) + else: + raise + + return wrapper + + +@s3_request +def s3_etag(url): + """Check ETag on S3 object.""" + import boto3 + + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + + +@s3_request +def s3_get(url, temp_file): + """Pull a file directly from S3.""" + import boto3 + + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + + +def request_wrap_timeout(func, url): + import requests + + for attempt, timeout in enumerate([10, 20, 40, 60, 60]): + try: + return func(timeout=timeout) + except requests.exceptions.Timeout as e: + logger.warning( + "Request for %s timed-out (attempt %d). Retrying with a timeout of %d secs", + url, + attempt, + timeout, + exc_info=e, + ) + continue + raise RuntimeError(f"Unable to fetch file {url}") + + +def http_get(url, temp_file): + import requests + from tqdm import tqdm + + req = request_wrap_timeout(partial(requests.get, url, stream=True), url) + content_length = req.headers.get("Content-Length") + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + + +def get_from_cache(url, cache_dir=None): + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_FAIRSEQ_CACHE + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + try: + import requests + + response = request_wrap_timeout( + partial(requests.head, url, allow_redirects=True), url + ) + if response.status_code != 200: + etag = None + else: + etag = response.headers.get("ETag") + except RuntimeError: + etag = None + + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + # If we don't have a connection (etag is None) and can't identify the file + # try to get the last downloaded one + if not os.path.exists(cache_path) and etag is None: + matching_files = fnmatch.filter(os.listdir(cache_dir), filename + ".*") + matching_files = list(filter(lambda s: not s.endswith(".json"), matching_files)) + if matching_files: + cache_path = os.path.join(cache_dir, matching_files[-1]) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, "wb") as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {"url": url, "etag": etag} + meta_path = cache_path + ".json" + with open(meta_path, "w") as meta_file: + output_string = json.dumps(meta) + meta_file.write(output_string) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + + +def read_set_from_file(filename): + """ + Extract a de-duped collection (set) of text from a file. + Expected file format is one item per line. + """ + collection = set() + with open(filename, "r", encoding="utf-8") as file_: + for line in file_: + collection.add(line.rstrip()) + return collection + + +def get_file_extension(path, dot=True, lower=True): + ext = os.path.splitext(path)[1] + ext = ext if dot else ext[1:] + return ext.lower() if lower else ext diff --git a/fairseq/fairseq/hub_utils.py b/fairseq/fairseq/hub_utils.py new file mode 100644 index 0000000..b0c2da1 --- /dev/null +++ b/fairseq/fairseq/hub_utils.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import copy +import logging +import os +from typing import Any, Dict, Iterator, List + +import torch +from omegaconf import open_dict +from torch import nn + +from fairseq import utils +from fairseq.data import encoders + +logger = logging.getLogger(__name__) + + +def from_pretrained( + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + archive_map=None, + **kwargs +): + from fairseq import checkpoint_utils, file_utils + + if archive_map is not None: + if model_name_or_path in archive_map: + model_name_or_path = archive_map[model_name_or_path] + if data_name_or_path is not None and data_name_or_path in archive_map: + data_name_or_path = archive_map[data_name_or_path] + + # allow archive_map to set default arg_overrides (e.g., tokenizer, bpe) + # for each model + if isinstance(model_name_or_path, dict): + for k, v in model_name_or_path.items(): + if k == "checkpoint_file": + checkpoint_file = v + elif ( + k != "path" + # only set kwargs that don't already have overrides + and k not in kwargs + ): + kwargs[k] = v + model_name_or_path = model_name_or_path["path"] + + model_path = file_utils.load_archive_file(model_name_or_path) + + # convenience hack for loading data and BPE codes from model archive + if data_name_or_path.startswith("."): + kwargs["data"] = os.path.abspath(os.path.join(model_path, data_name_or_path)) + else: + kwargs["data"] = file_utils.load_archive_file(data_name_or_path) + for file, arg in { + "code": "bpe_codes", + "bpecodes": "bpe_codes", + "sentencepiece.bpe.model": "sentencepiece_model", + "merges.txt": "bpe_merges", + "vocab.json": "bpe_vocab", + }.items(): + path = os.path.join(model_path, file) + if os.path.exists(path): + kwargs[arg] = path + + if "user_dir" in kwargs: + utils.import_user_module(argparse.Namespace(user_dir=kwargs["user_dir"])) + + model_path = [ + os.path.join(model_path, cpt) for cpt in checkpoint_file.split(os.pathsep) + ] + + if "is_vocoder" in kwargs: + args = {"data": kwargs["data"], "model_path": model_path} + task = None + models = None + else: + models, args, task = checkpoint_utils.load_model_ensemble_and_task( + model_path, + arg_overrides=kwargs, + ) + if "generation_args" in kwargs and kwargs["generation_args"]: + for key in kwargs["generation_args"]: + setattr(args["generation"], key, kwargs["generation_args"][key]) + + return { + "args": args, + "task": task, + "models": models, + } + + +class GeneratorHubInterface(nn.Module): + """ + PyTorch Hub interface for generating sequences from a pre-trained + translation or language model. + """ + + def __init__(self, cfg, task, models): + super().__init__() + self.cfg = cfg + self.task = task + self.models = nn.ModuleList(models) + self.src_dict = task.source_dictionary + self.tgt_dict = task.target_dictionary + + # optimize model for generation + for model in self.models: + model.prepare_for_inference_(cfg) + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + self.align_dict = utils.load_align_dict(cfg.generation.replace_unk) + + self.tokenizer = encoders.build_tokenizer(cfg.tokenizer) + self.bpe = encoders.build_bpe(cfg.bpe) + + self.max_positions = utils.resolve_max_positions( + self.task.max_positions(), *[model.max_positions() for model in models] + ) + + # this is useful for determining the device + self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float)) + + @property + def device(self): + return self._float_tensor.device + + def translate( + self, sentences: List[str], beam: int = 5, verbose: bool = False, **kwargs + ) -> List[str]: + return self.sample(sentences, beam, verbose, **kwargs) + + def sample( + self, sentences: List[str], beam: int = 1, verbose: bool = False, **kwargs + ) -> List[str]: + if isinstance(sentences, str): + return self.sample([sentences], beam=beam, verbose=verbose, **kwargs)[0] + tokenized_sentences = [self.encode(sentence) for sentence in sentences] + batched_hypos = self.generate(tokenized_sentences, beam, verbose, **kwargs) + return [self.decode(hypos[0]["tokens"]) for hypos in batched_hypos] + + def score( + self, sentences: List[str], replace_newline_with_eos: bool = False, **kwargs + ): + if isinstance(sentences, str): + return self.score( + [sentences], replace_newline_with_eos=replace_newline_with_eos, **kwargs + )[0] + + def encode(sentence): + if replace_newline_with_eos: + return torch.cat([self.encode(line) for line in sentence.splitlines()]) + else: + return self.encode(sentence) + + # NOTE: this doesn't support translation tasks currently + tokenized_sentences = [encode(sentence) for sentence in sentences] + return [ + hypos[0] + for hypos in self.generate( + tokenized_sentences, score_reference=True, **kwargs + ) + ] + + def generate( + self, + tokenized_sentences: List[torch.LongTensor], + beam: int = 5, + verbose: bool = False, + skip_invalid_size_inputs=False, + inference_step_args=None, + prefix_allowed_tokens_fn=None, + **kwargs + ) -> List[List[Dict[str, torch.Tensor]]]: + if torch.is_tensor(tokenized_sentences) and tokenized_sentences.dim() == 1: + return self.generate( + tokenized_sentences.unsqueeze(0), beam=beam, verbose=verbose, **kwargs + )[0] + + # build generator using current args as well as any kwargs + gen_args = copy.deepcopy(self.cfg.generation) + with open_dict(gen_args): + gen_args.beam = beam + for k, v in kwargs.items(): + setattr(gen_args, k, v) + generator = self.task.build_generator( + self.models, + gen_args, + prefix_allowed_tokens_fn=prefix_allowed_tokens_fn, + ) + + inference_step_args = inference_step_args or {} + results = [] + for batch in self._build_batches(tokenized_sentences, skip_invalid_size_inputs): + batch = utils.apply_to_sample(lambda t: t.to(self.device), batch) + translations = self.task.inference_step( + generator, self.models, batch, **inference_step_args + ) + for id, hypos in zip(batch["id"].tolist(), translations): + results.append((id, hypos)) + + # sort output to match input order + outputs = [hypos for _, hypos in sorted(results, key=lambda x: x[0])] + + if verbose: + + def getarg(name, default): + return getattr(gen_args, name, getattr(self.cfg, name, default)) + + for source_tokens, target_hypotheses in zip(tokenized_sentences, outputs): + src_str_with_unk = self.string(source_tokens) + logger.info("S\t{}".format(src_str_with_unk)) + for hypo in target_hypotheses: + hypo_str = self.decode(hypo["tokens"]) + logger.info("H\t{}\t{}".format(hypo["score"], hypo_str)) + logger.info( + "P\t{}".format( + " ".join( + map( + lambda x: "{:.4f}".format(x), + hypo["positional_scores"].tolist(), + ) + ) + ) + ) + if hypo["alignment"] is not None and getarg( + "print_alignment", False + ): + logger.info( + "A\t{}".format( + " ".join( + [ + "{}-{}".format(src_idx, tgt_idx) + for src_idx, tgt_idx in hypo["alignment"] + ] + ) + ) + ) + return outputs + + def encode(self, sentence: str) -> torch.LongTensor: + sentence = self.tokenize(sentence) + sentence = self.apply_bpe(sentence) + return self.binarize(sentence) + + def decode(self, tokens: torch.LongTensor) -> str: + sentence = self.string(tokens) + sentence = self.remove_bpe(sentence) + return self.detokenize(sentence) + + def tokenize(self, sentence: str) -> str: + if self.tokenizer is not None: + sentence = self.tokenizer.encode(sentence) + return sentence + + def detokenize(self, sentence: str) -> str: + if self.tokenizer is not None: + sentence = self.tokenizer.decode(sentence) + return sentence + + def apply_bpe(self, sentence: str) -> str: + if self.bpe is not None: + sentence = self.bpe.encode(sentence) + return sentence + + def remove_bpe(self, sentence: str) -> str: + if self.bpe is not None: + sentence = self.bpe.decode(sentence) + return sentence + + def binarize(self, sentence: str) -> torch.LongTensor: + return self.src_dict.encode_line(sentence, add_if_not_exist=False).long() + + def string(self, tokens: torch.LongTensor) -> str: + return self.tgt_dict.string(tokens) + + def _build_batches( + self, tokens: List[List[int]], skip_invalid_size_inputs: bool + ) -> Iterator[Dict[str, Any]]: + lengths = torch.LongTensor([t.numel() for t in tokens]) + batch_iterator = self.task.get_batch_iterator( + dataset=self.task.build_dataset_for_inference(tokens, lengths), + max_tokens=self.cfg.dataset.max_tokens, + max_sentences=self.cfg.dataset.batch_size, + max_positions=self.max_positions, + ignore_invalid_inputs=skip_invalid_size_inputs, + disable_iterator_cache=True, + ).next_epoch_itr(shuffle=False) + return batch_iterator + + +class BPEHubInterface(object): + """PyTorch Hub interface for Byte-Pair Encoding (BPE).""" + + def __init__(self, bpe, **kwargs): + super().__init__() + args = argparse.Namespace(bpe=bpe, **kwargs) + self.bpe = encoders.build_bpe(args) + assert self.bpe is not None + + def encode(self, sentence: str) -> str: + return self.bpe.encode(sentence) + + def decode(self, sentence: str) -> str: + return self.bpe.decode(sentence) + + +class TokenizerHubInterface(object): + """PyTorch Hub interface for tokenization.""" + + def __init__(self, tokenizer, **kwargs): + super().__init__() + args = argparse.Namespace(tokenizer=tokenizer, **kwargs) + self.tokenizer = encoders.build_tokenizer(args) + assert self.tokenizer is not None + + def encode(self, sentence: str) -> str: + return self.tokenizer.encode(sentence) + + def decode(self, sentence: str) -> str: + return self.tokenizer.decode(sentence) diff --git a/fairseq/fairseq/incremental_decoding_utils.py b/fairseq/fairseq/incremental_decoding_utils.py new file mode 100644 index 0000000..b26e6cd --- /dev/null +++ b/fairseq/fairseq/incremental_decoding_utils.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import uuid +from typing import Dict, Optional + +from torch import Tensor + + +class FairseqIncrementalState(object): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.init_incremental_state() + + def init_incremental_state(self): + self._incremental_state_id = str(uuid.uuid4()) + + def _get_full_incremental_state_key(self, key: str) -> str: + return "{}.{}".format(self._incremental_state_id, key) + + def get_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + key: str, + ) -> Optional[Dict[str, Optional[Tensor]]]: + """Helper for getting incremental state for an nn.Module.""" + full_key = self._get_full_incremental_state_key(key) + if incremental_state is None or full_key not in incremental_state: + return None + return incremental_state[full_key] + + def set_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + key: str, + value: Dict[str, Optional[Tensor]], + ) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]: + """Helper for setting incremental state for an nn.Module.""" + if incremental_state is not None: + full_key = self._get_full_incremental_state_key(key) + incremental_state[full_key] = value + return incremental_state + + +def with_incremental_state(cls): + cls.__bases__ = (FairseqIncrementalState,) + tuple( + b for b in cls.__bases__ if b != FairseqIncrementalState + ) + return cls diff --git a/fairseq/fairseq/iterative_refinement_generator.py b/fairseq/fairseq/iterative_refinement_generator.py new file mode 100644 index 0000000..3d32c6b --- /dev/null +++ b/fairseq/fairseq/iterative_refinement_generator.py @@ -0,0 +1,359 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import namedtuple + +import numpy as np +import torch +from fairseq import utils + + +DecoderOut = namedtuple( + "IterativeRefinementDecoderOut", + ["output_tokens", "output_scores", "attn", "step", "max_step", "history"], +) + + +class IterativeRefinementGenerator(object): + def __init__( + self, + tgt_dict, + models=None, + eos_penalty=0.0, + max_iter=10, + max_ratio=2, + beam_size=1, + decoding_format=None, + retain_dropout=False, + adaptive=True, + retain_history=False, + reranking=False, + ): + """ + Generates translations based on iterative refinement. + + Args: + tgt_dict: target dictionary + eos_penalty: if > 0.0, it penalized early-stopping in decoding + max_iter: maximum number of refinement iterations + max_ratio: generate sequences of maximum length ax, where x is the source length + decoding_format: decoding mode in {'unigram', 'ensemble', 'vote', 'dp', 'bs'} + retain_dropout: retaining dropout in the inference + adaptive: decoding with early stop + """ + self.bos = tgt_dict.bos() + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() + self.vocab_size = len(tgt_dict) + self.eos_penalty = eos_penalty + self.max_iter = max_iter + self.max_ratio = max_ratio + self.beam_size = beam_size + self.reranking = reranking + self.decoding_format = decoding_format + self.retain_dropout = retain_dropout + self.retain_history = retain_history + self.adaptive = adaptive + self.models = models + + def generate_batched_itr( + self, + data_itr, + maxlen_a=None, + maxlen_b=None, + cuda=False, + timer=None, + prefix_size=0, + ): + """Iterate over a batched dataset and yield individual translations. + + Args: + maxlen_a/b: generate sequences of maximum length ax + b, + where x is the source sentence length. + cuda: use GPU for generation + timer: StopwatchMeter for timing generations. + """ + + for sample in data_itr: + if "net_input" not in sample: + continue + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate( + self.models, + sample, + prefix_tokens=sample["target"][:, :prefix_size] + if prefix_size > 0 + else None, + ) + if timer is not None: + timer.stop(sample["ntokens"]) + for i, id in enumerate(sample["id"]): + # remove padding + src = utils.strip_pad(sample["net_input"]["src_tokens"][i, :], self.pad) + ref = utils.strip_pad(sample["target"][i, :], self.pad) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate(self, models, sample, prefix_tokens=None, constraints=None): + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the IterativeRefinementGenerator is not supported" + ) + + # TODO: iterative refinement generator does not support ensemble for now. + if not self.retain_dropout: + for model in models: + model.eval() + + model, reranker = models[0], None + if self.reranking: + assert len(models) > 1, "Assuming the last checkpoint is the reranker" + assert ( + self.beam_size > 1 + ), "Reranking requires multiple translation for each example" + + reranker = models[-1] + models = models[:-1] + + if len(models) > 1 and hasattr(model, "enable_ensemble"): + assert model.allow_ensemble, "{} does not support ensembling".format( + model.__class__.__name__ + ) + model.enable_ensemble(models) + + # TODO: better encoder inputs? + src_tokens = sample["net_input"]["src_tokens"] + src_lengths = sample["net_input"]["src_lengths"] + bsz, src_len = src_tokens.size() + + # initialize + encoder_out = model.forward_encoder([src_tokens, src_lengths]) + prev_decoder_out = model.initialize_output_tokens(encoder_out, src_tokens) + + if self.beam_size > 1: + assert ( + model.allow_length_beam + ), "{} does not support decoding with length beam.".format( + model.__class__.__name__ + ) + + # regenerate data based on length-beam + length_beam_order = ( + utils.new_arange(src_tokens, self.beam_size, bsz).t().reshape(-1) + ) + encoder_out = model.encoder.reorder_encoder_out( + encoder_out, length_beam_order + ) + prev_decoder_out = model.regenerate_length_beam( + prev_decoder_out, self.beam_size + ) + bsz = bsz * self.beam_size + + sent_idxs = torch.arange(bsz) + prev_output_tokens = prev_decoder_out.output_tokens.clone() + + if self.retain_history: + prev_decoder_out = prev_decoder_out._replace(history=[prev_output_tokens]) + + finalized = [[] for _ in range(bsz)] + + def is_a_loop(x, y, s, a): + b, l_x, l_y = x.size(0), x.size(1), y.size(1) + if l_x > l_y: + y = torch.cat([y, x.new_zeros(b, l_x - l_y).fill_(self.pad)], 1) + s = torch.cat([s, s.new_zeros(b, l_x - l_y)], 1) + if a is not None: + a = torch.cat([a, a.new_zeros(b, l_x - l_y, a.size(2))], 1) + elif l_x < l_y: + x = torch.cat([x, y.new_zeros(b, l_y - l_x).fill_(self.pad)], 1) + return (x == y).all(1), y, s, a + + def finalized_hypos(step, prev_out_token, prev_out_score, prev_out_attn): + cutoff = prev_out_token.ne(self.pad) + tokens = prev_out_token[cutoff] + if prev_out_score is None: + scores, score = None, None + else: + scores = prev_out_score[cutoff] + score = scores.mean() + + if prev_out_attn is None: + hypo_attn, alignment = None, None + else: + hypo_attn = prev_out_attn[cutoff] + alignment = hypo_attn.max(dim=1)[1] + return { + "steps": step, + "tokens": tokens, + "positional_scores": scores, + "score": score, + "hypo_attn": hypo_attn, + "alignment": alignment, + } + + for step in range(self.max_iter + 1): + + decoder_options = { + "eos_penalty": self.eos_penalty, + "max_ratio": self.max_ratio, + "decoding_format": self.decoding_format, + } + prev_decoder_out = prev_decoder_out._replace( + step=step, + max_step=self.max_iter + 1, + ) + + decoder_out = model.forward_decoder( + prev_decoder_out, encoder_out, **decoder_options + ) + + if self.adaptive: + # terminate if there is a loop + terminated, out_tokens, out_scores, out_attn = is_a_loop( + prev_output_tokens, + decoder_out.output_tokens, + decoder_out.output_scores, + decoder_out.attn, + ) + decoder_out = decoder_out._replace( + output_tokens=out_tokens, + output_scores=out_scores, + attn=out_attn, + ) + + else: + terminated = decoder_out.output_tokens.new_zeros( + decoder_out.output_tokens.size(0) + ).bool() + + if step == self.max_iter: # reach last iteration, terminate + terminated.fill_(1) + + # collect finalized sentences + finalized_idxs = sent_idxs[terminated.to(sent_idxs.device)] + finalized_tokens = decoder_out.output_tokens[terminated] + finalized_scores = decoder_out.output_scores[terminated] + finalized_attn = ( + None + if (decoder_out.attn is None or decoder_out.attn.size(0) == 0) + else decoder_out.attn[terminated] + ) + + if self.retain_history: + finalized_history_tokens = [h[terminated] for h in decoder_out.history] + + for i in range(finalized_idxs.size(0)): + finalized[finalized_idxs[i]] = [ + finalized_hypos( + step, + finalized_tokens[i], + finalized_scores[i], + None if finalized_attn is None else finalized_attn[i], + ) + ] + + if self.retain_history: + finalized[finalized_idxs[i]][0]["history"] = [] + for j in range(len(finalized_history_tokens)): + finalized[finalized_idxs[i]][0]["history"].append( + finalized_hypos( + step, finalized_history_tokens[j][i], None, None + ) + ) + + # check if all terminated + if terminated.sum() == terminated.size(0): + break + + # for next step + not_terminated = ~terminated + prev_decoder_out = decoder_out._replace( + output_tokens=decoder_out.output_tokens[not_terminated], + output_scores=decoder_out.output_scores[not_terminated], + attn=decoder_out.attn[not_terminated] + if (decoder_out.attn is not None and decoder_out.attn.size(0) > 0) + else None, + history=[h[not_terminated] for h in decoder_out.history] + if decoder_out.history is not None + else None, + ) + encoder_out = model.encoder.reorder_encoder_out( + encoder_out, not_terminated.nonzero(as_tuple=False).squeeze() + ) + sent_idxs = sent_idxs[not_terminated.to(sent_idxs.device)] + prev_output_tokens = prev_decoder_out.output_tokens.clone() + + if self.beam_size > 1: + if reranker is not None: + finalized = self.rerank( + reranker, finalized, [src_tokens, src_lengths], self.beam_size + ) + + # aggregate information from length beam + finalized = [ + finalized[ + np.argmax( + [ + finalized[self.beam_size * i + j][0]["score"] + for j in range(self.beam_size) + ] + ) + + self.beam_size * i + ] + for i in range(len(finalized) // self.beam_size) + ] + + return finalized + + def rerank(self, reranker, finalized, encoder_input, beam_size): + def rebuild_batch(finalized): + finalized_tokens = [f[0]["tokens"] for f in finalized] + finalized_maxlen = max(f.size(0) for f in finalized_tokens) + final_output_tokens = ( + finalized_tokens[0] + .new_zeros(len(finalized_tokens), finalized_maxlen) + .fill_(self.pad) + ) + for i, f in enumerate(finalized_tokens): + final_output_tokens[i, : f.size(0)] = f + return final_output_tokens + + final_output_tokens = rebuild_batch(finalized) + final_output_tokens[ + :, 0 + ] = self.eos # autoregressive model assumes starting with EOS + + reranker_encoder_out = reranker.encoder(*encoder_input) + length_beam_order = ( + utils.new_arange( + final_output_tokens, beam_size, reranker_encoder_out.encoder_out.size(1) + ) + .t() + .reshape(-1) + ) + reranker_encoder_out = reranker.encoder.reorder_encoder_out( + reranker_encoder_out, length_beam_order + ) + reranking_scores = reranker.get_normalized_probs( + reranker.decoder(final_output_tokens[:, :-1], reranker_encoder_out), + True, + None, + ) + reranking_scores = reranking_scores.gather(2, final_output_tokens[:, 1:, None]) + reranking_masks = final_output_tokens[:, 1:].ne(self.pad) + reranking_scores = ( + reranking_scores[:, :, 0].masked_fill_(~reranking_masks, 0).sum(1) + ) + reranking_scores = reranking_scores / reranking_masks.sum(1).type_as( + reranking_scores + ) + + for i in range(len(finalized)): + finalized[i][0]["score"] = reranking_scores[i] + + return finalized diff --git a/fairseq/fairseq/logging/__init__.py b/fairseq/fairseq/logging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq/logging/meters.py b/fairseq/fairseq/logging/meters.py new file mode 100644 index 0000000..495bd08 --- /dev/null +++ b/fairseq/fairseq/logging/meters.py @@ -0,0 +1,351 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import bisect +import time +from collections import OrderedDict +from typing import Dict, Optional + +try: + import torch + + def type_as(a, b): + if torch.is_tensor(a) and torch.is_tensor(b): + return a.to(b) + else: + return a + +except ImportError: + torch = None + + def type_as(a, b): + return a + + +try: + import numpy as np +except ImportError: + np = None + + +class Meter(object): + """Base class for Meters.""" + + def __init__(self): + pass + + def state_dict(self): + return {} + + def load_state_dict(self, state_dict): + pass + + def reset(self): + raise NotImplementedError + + @property + def smoothed_value(self) -> float: + """Smoothed value used for logging.""" + raise NotImplementedError + + +def safe_round(number, ndigits): + if hasattr(number, "__round__"): + return round(number, ndigits) + elif torch is not None and torch.is_tensor(number) and number.numel() == 1: + return safe_round(number.item(), ndigits) + elif np is not None and np.ndim(number) == 0 and hasattr(number, "item"): + return safe_round(number.item(), ndigits) + else: + return number + + +class AverageMeter(Meter): + """Computes and stores the average and current value""" + + def __init__(self, round: Optional[int] = None): + self.round = round + self.reset() + + def reset(self): + self.val = None # most recent update + self.sum = 0 # sum from all updates + self.count = 0 # total n from all updates + + def update(self, val, n=1): + if val is not None: + self.val = val + if n > 0: + self.sum = type_as(self.sum, val) + (val * n) + self.count = type_as(self.count, n) + n + + def state_dict(self): + return { + "val": self.val, + "sum": self.sum, + "count": self.count, + "round": self.round, + } + + def load_state_dict(self, state_dict): + self.val = state_dict["val"] + self.sum = state_dict["sum"] + self.count = state_dict["count"] + self.round = state_dict.get("round", None) + + @property + def avg(self): + return self.sum / self.count if self.count > 0 else self.val + + @property + def smoothed_value(self) -> float: + val = self.avg + if self.round is not None and val is not None: + val = safe_round(val, self.round) + return val + + +class SumMeter(Meter): + """Computes and stores the sum""" + + def __init__(self, round: Optional[int] = None): + self.round = round + self.reset() + + def reset(self): + self.sum = 0 # sum from all updates + + def update(self, val): + if val is not None: + self.sum = type_as(self.sum, val) + val + + def state_dict(self): + return { + "sum": self.sum, + "round": self.round, + } + + def load_state_dict(self, state_dict): + self.sum = state_dict["sum"] + self.round = state_dict.get("round", None) + + @property + def smoothed_value(self) -> float: + val = self.sum + if self.round is not None and val is not None: + val = safe_round(val, self.round) + return val + + +class ConcatTensorMeter(Meter): + """Concatenates tensors""" + + def __init__(self, dim=0): + super().__init__() + self.reset() + self.dim = dim + + def reset(self): + self.tensor = None + + def update(self, val): + if self.tensor is None: + self.tensor = val + else: + self.tensor = torch.cat([self.tensor, val], dim=self.dim) + + def state_dict(self): + return { + "tensor": self.tensor, + } + + def load_state_dict(self, state_dict): + self.tensor = state_dict["tensor"] + + @property + def smoothed_value(self) -> float: + return [] # return a dummy value + + +class TimeMeter(Meter): + """Computes the average occurrence of some event per second""" + + def __init__( + self, + init: int = 0, + n: int = 0, + round: Optional[int] = None, + ): + self.round = round + self.reset(init, n) + + def reset(self, init=0, n=0): + self.init = init + self.start = time.perf_counter() + self.n = n + self.i = 0 + + def update(self, val=1): + self.n = type_as(self.n, val) + val + self.i += 1 + + def state_dict(self): + return { + "init": self.elapsed_time, + "n": self.n, + "round": self.round, + } + + def load_state_dict(self, state_dict): + if "start" in state_dict: + # backwards compatibility for old state_dicts + self.reset(init=state_dict["init"]) + else: + self.reset(init=state_dict["init"], n=state_dict["n"]) + self.round = state_dict.get("round", None) + + @property + def avg(self): + return self.n / self.elapsed_time + + @property + def elapsed_time(self): + return self.init + (time.perf_counter() - self.start) + + @property + def smoothed_value(self) -> float: + val = self.avg + if self.round is not None and val is not None: + val = safe_round(val, self.round) + return val + + +class StopwatchMeter(Meter): + """Computes the sum/avg duration of some event in seconds""" + + def __init__(self, round: Optional[int] = None): + self.round = round + self.sum = 0 + self.n = 0 + self.start_time = None + + def start(self): + self.start_time = time.perf_counter() + + def stop(self, n=1, prehook=None): + if self.start_time is not None: + if prehook is not None: + prehook() + delta = time.perf_counter() - self.start_time + self.sum = self.sum + delta + self.n = type_as(self.n, n) + n + + def reset(self): + self.sum = 0 # cumulative time during which stopwatch was active + self.n = 0 # total n across all start/stop + self.start() + + def state_dict(self): + return { + "sum": self.sum, + "n": self.n, + "round": self.round, + } + + def load_state_dict(self, state_dict): + self.sum = state_dict["sum"] + self.n = state_dict["n"] + self.start_time = None + self.round = state_dict.get("round", None) + + @property + def avg(self): + return self.sum / self.n if self.n > 0 else self.sum + + @property + def elapsed_time(self): + if self.start_time is None: + return 0.0 + return time.perf_counter() - self.start_time + + @property + def smoothed_value(self) -> float: + val = self.avg if self.sum > 0 else self.elapsed_time + if self.round is not None and val is not None: + val = safe_round(val, self.round) + return val + + +class MetersDict(OrderedDict): + """A sorted dictionary of :class:`Meters`. + + Meters are sorted according to a priority that is given when the + meter is first added to the dictionary. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.priorities = [] + + def __setitem__(self, key, value): + assert key not in self, "MetersDict doesn't support reassignment" + priority, value = value + bisect.insort(self.priorities, (priority, len(self.priorities), key)) + super().__setitem__(key, value) + for _, _, key in self.priorities: # reorder dict to match priorities + self.move_to_end(key) + + def add_meter(self, key, meter, priority): + self.__setitem__(key, (priority, meter)) + + def state_dict(self): + return [ + (pri, key, self[key].__class__.__name__, self[key].state_dict()) + for pri, _, key in self.priorities + # can't serialize DerivedMeter instances + if not isinstance(self[key], MetersDict._DerivedMeter) + ] + + def load_state_dict(self, state_dict): + self.clear() + self.priorities.clear() + for pri, key, meter_cls, meter_state in state_dict: + meter = globals()[meter_cls]() + meter.load_state_dict(meter_state) + self.add_meter(key, meter, pri) + + def get_smoothed_value(self, key: str) -> float: + """Get a single smoothed value.""" + meter = self[key] + if isinstance(meter, MetersDict._DerivedMeter): + return meter.fn(self) + else: + return meter.smoothed_value + + def get_smoothed_values(self) -> Dict[str, float]: + """Get all smoothed values.""" + return OrderedDict( + [ + (key, self.get_smoothed_value(key)) + for key in self.keys() + if not key.startswith("_") + ] + ) + + def reset(self): + """Reset Meter instances.""" + for meter in self.values(): + if isinstance(meter, MetersDict._DerivedMeter): + continue + meter.reset() + + class _DerivedMeter(Meter): + """A Meter whose values are derived from other Meters.""" + + def __init__(self, fn): + self.fn = fn + + def reset(self): + pass diff --git a/fairseq/fairseq/logging/metrics.py b/fairseq/fairseq/logging/metrics.py new file mode 100644 index 0000000..49301f2 --- /dev/null +++ b/fairseq/fairseq/logging/metrics.py @@ -0,0 +1,336 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +A standalone module for aggregating metrics. + +Metrics can be logged from anywhere using the `log_*` functions defined +in this module. The logged values will be aggregated dynamically based +on the aggregation context in which the logging occurs. See the +:func:`aggregate` context manager for more details. +""" + +import contextlib +import uuid +from collections import defaultdict +from typing import Callable, List, Optional + +from .meters import * + + +# Aggregation contexts are considered "active" when inside the scope +# created by the :func:`aggregate` context manager. +_aggregators = OrderedDict() +_active_aggregators = OrderedDict() +_active_aggregators_cnt = defaultdict(lambda: 0) + + +def reset() -> None: + """Reset all metrics aggregators.""" + _aggregators.clear() + _active_aggregators.clear() + _active_aggregators_cnt.clear() + + # The "default" aggregator observes all logged values. + _aggregators["default"] = MetersDict() + _active_aggregators["default"] = _aggregators["default"] + _active_aggregators_cnt["default"] = 1 + + +reset() + + +@contextlib.contextmanager +def aggregate(name: Optional[str] = None, new_root: bool = False): + """Context manager to aggregate metrics under a given name. + + Aggregations can be nested. If *new_root* is ``False``, then logged + metrics will be recorded along the entire stack of nested + aggregators, including a global "default" aggregator. If *new_root* + is ``True``, then this aggregator will be the root of a new + aggregation stack, thus bypassing any parent aggregators. + + Note that aggregation contexts are uniquely identified by their + *name* (e.g., train, valid). Creating a context with an existing + name will reuse the corresponding :class:`MetersDict` instance. + If no name is given, then a temporary aggregator will be created. + + Usage:: + + with metrics.aggregate("train"): + for step, batch in enumerate(epoch): + with metrics.aggregate("train_inner") as agg: + metrics.log_scalar("loss", get_loss(batch)) + if step % log_interval == 0: + print(agg.get_smoothed_value("loss")) + agg.reset() + print(metrics.get_smoothed_values("train")["loss"]) + + Args: + name (str): name of the aggregation. Defaults to a + random/temporary name if not given explicitly. + new_root (bool): make this aggregation the root of a new + aggregation stack. + """ + if name is None: + # generate a temporary name + name = str(uuid.uuid4()) + assert name not in _aggregators + agg = MetersDict() + else: + assert name != "default" + agg = _aggregators.setdefault(name, MetersDict()) + + if new_root: + backup_aggregators = _active_aggregators.copy() + _active_aggregators.clear() + backup_aggregators_cnt = _active_aggregators_cnt.copy() + _active_aggregators_cnt.clear() + + _active_aggregators[name] = agg + _active_aggregators_cnt[name] += 1 + + yield agg + + _active_aggregators_cnt[name] -= 1 + if _active_aggregators_cnt[name] == 0 and name in _active_aggregators: + del _active_aggregators[name] + + if new_root: + _active_aggregators.clear() + _active_aggregators.update(backup_aggregators) + _active_aggregators_cnt.clear() + _active_aggregators_cnt.update(backup_aggregators_cnt) + + +def get_active_aggregators() -> List[MetersDict]: + return list(_active_aggregators.values()) + + +def log_scalar( + key: str, + value: float, + weight: float = 1, + priority: int = 10, + round: Optional[int] = None, +): + """Log a scalar value. + + Args: + key (str): name of the field to log + value (float): value to log + weight (float): weight that this value contributes to the average. + A weight of 0 will always log the latest value. + priority (int): smaller values are logged earlier in the output + round (Optional[int]): number of digits to round to when displaying + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, AverageMeter(round=round), priority) + agg[key].update(value, weight) + + +def log_scalar_sum( + key: str, + value: float, + priority: int = 10, + round: Optional[int] = None, +): + """Log a scalar value that is summed for reporting. + + Args: + key (str): name of the field to log + value (float): value to log + priority (int): smaller values are logged earlier in the output + round (Optional[int]): number of digits to round to when displaying + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, SumMeter(round=round), priority) + agg[key].update(value) + + +def log_concat_tensor( + key: str, + value: torch.Tensor, + priority: int = 10, + dim: int = 0, +): + """Log a scalar value that is summed for reporting. + + Args: + key (str): name of the field to log + value (float): value to log + priority (int): smaller values are logged earlier in the output + round (Optional[int]): number of digits to round to when displaying + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, ConcatTensorMeter(dim=dim), priority) + agg[key].update(value) + + +def log_derived(key: str, fn: Callable[[MetersDict], float], priority: int = 20): + """Log a scalar value derived from other meters. + + Args: + key (str): name of the field to log + fn (Callable[[MetersDict], float]): function that takes a single + argument *meters* and returns the derived value + priority (int): smaller values are logged earlier in the output + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, MetersDict._DerivedMeter(fn), priority) + + +def log_speed( + key: str, + value: float, + priority: int = 30, + round: Optional[int] = None, +): + """Log the rate of some quantity per second. + + Args: + key (str): name of the field to log + value (float): value to log + priority (int): smaller values are logged earlier in the output + round (Optional[int]): number of digits to round to when displaying + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, TimeMeter(round=round), priority) + agg[key].reset() # reset meter on the first call + else: + agg[key].update(value) + + +def log_start_time(key: str, priority: int = 40, round: Optional[int] = None): + """Log the duration of some event in seconds. + + The duration will be computed once :func:`log_stop_time` is called. + + Args: + key (str): name of the field to log + priority (int): smaller values are logged earlier in the output + round (Optional[int]): number of digits to round to when displaying + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, StopwatchMeter(round=round), priority) + agg[key].start() + + +def log_stop_time(key: str, weight: float = 0.0, prehook=None): + """Log the duration of some event in seconds. + + The duration will be computed since :func:`log_start_time` was called. + Set weight > 0 to report the average time instead of the sum. + + Args: + key (str): name of the field to log + weight (float): weight that this time contributes to the average + prehook (function, no arguments): will be called before the timer + is stopped. For example, use prehook=torch.cuda.synchronize to + make sure all gpu operations are done before timer is stopped. + """ + for agg in get_active_aggregators(): + if key in agg: + agg[key].stop(weight, prehook) + + +def log_custom( + new_meter_fn: Callable[[], Meter], + key: str, + *args, + priority: int = 50, + **kwargs, +): + """Log using a custom Meter. + + Any extra *args* or *kwargs* will be passed through to the Meter's + *update* method. + + Args: + new_meter_fn (Callable[[], Meter]): function that returns a new + Meter instance + key (str): name of the field to log + priority (int): smaller values are logged earlier in the output + """ + for agg in get_active_aggregators(): + if key not in agg: + agg.add_meter(key, new_meter_fn(), priority) + agg[key].update(*args, **kwargs) + + +def reset_meter(name: str, key: str) -> None: + """Reset Meter instance aggregated under a given *name* and *key*.""" + meter = get_meter(name, key) + if meter is not None: + meter.reset() + + +def reset_meters(name: str) -> None: + """Reset Meter instances aggregated under a given *name*.""" + meters = get_meters(name) + if meters is not None: + meters.reset() + + +def get_meter(name: str, key: str) -> Meter: + """Get a single Meter instance aggregated under *name* and *key*. + + Returns: + Meter or None if no metrics have been logged under *name* and *key*. + """ + if name not in _aggregators: + return None + return _aggregators[name].get(key, None) + + +def get_meters(name: str) -> MetersDict: + """Get Meter instances aggregated under a given *name*. + + Returns: + MetersDict or None if no metrics have been logged under *name*. + """ + return _aggregators.get(name, None) + + +def get_smoothed_value(name: str, key: str) -> float: + """Get a single smoothed value. + + Raises: + KeyError: if no metrics have been logged under *name* and *key*. + """ + return _aggregators[name].get_smoothed_value(key) + + +def get_smoothed_values(name: str) -> Dict[str, float]: + """Get smoothed values aggregated under a given *name*. + + Raises: + KeyError: if no metrics have been logged under *name*. + """ + return _aggregators[name].get_smoothed_values() + + +def state_dict(): + return OrderedDict([(name, agg.state_dict()) for name, agg in _aggregators.items()]) + + +def load_state_dict(state_dict): + for name, agg_state in state_dict.items(): + _aggregators[name] = MetersDict() + _aggregators[name].load_state_dict(agg_state) + + +def xla_metrics_report(): + try: + import torch_xla.debug.metrics as met + + print(met.metrics_report()) + except ImportError: + return diff --git a/fairseq/fairseq/logging/progress_bar.py b/fairseq/fairseq/logging/progress_bar.py new file mode 100644 index 0000000..4c64b61 --- /dev/null +++ b/fairseq/fairseq/logging/progress_bar.py @@ -0,0 +1,582 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Wrapper around various loggers and progress bars (e.g., tqdm). +""" + +import atexit +import json +import logging +import os +import sys +from collections import OrderedDict +from contextlib import contextmanager +from numbers import Number +from typing import Optional + +import torch + +from .meters import AverageMeter, StopwatchMeter, TimeMeter + +logger = logging.getLogger(__name__) + + +def progress_bar( + iterator, + log_format: Optional[str] = None, + log_interval: int = 100, + log_file: Optional[str] = None, + epoch: Optional[int] = None, + prefix: Optional[str] = None, + aim_repo: Optional[str] = None, + aim_run_hash: Optional[str] = None, + aim_param_checkpoint_dir: Optional[str] = None, + tensorboard_logdir: Optional[str] = None, + default_log_format: str = "tqdm", + wandb_project: Optional[str] = None, + wandb_run_name: Optional[str] = None, + azureml_logging: Optional[bool] = False, +): + if log_format is None: + log_format = default_log_format + if log_file is not None: + handler = logging.FileHandler(filename=log_file) + logger.addHandler(handler) + + if log_format == "tqdm" and not sys.stderr.isatty(): + log_format = "simple" + + if log_format == "json": + bar = JsonProgressBar(iterator, epoch, prefix, log_interval) + elif log_format == "none": + bar = NoopProgressBar(iterator, epoch, prefix) + elif log_format == "simple": + bar = SimpleProgressBar(iterator, epoch, prefix, log_interval) + elif log_format == "tqdm": + bar = TqdmProgressBar(iterator, epoch, prefix) + else: + raise ValueError("Unknown log format: {}".format(log_format)) + + if aim_repo: + bar = AimProgressBarWrapper( + bar, + aim_repo=aim_repo, + aim_run_hash=aim_run_hash, + aim_param_checkpoint_dir=aim_param_checkpoint_dir, + ) + + if tensorboard_logdir: + try: + # [FB only] custom wrapper for TensorBoard + import palaas # noqa + + from .fb_tbmf_wrapper import FbTbmfWrapper + + bar = FbTbmfWrapper(bar, log_interval) + except ImportError: + bar = TensorboardProgressBarWrapper(bar, tensorboard_logdir) + + if wandb_project: + bar = WandBProgressBarWrapper(bar, wandb_project, run_name=wandb_run_name) + + if azureml_logging: + bar = AzureMLProgressBarWrapper(bar) + + return bar + + +def build_progress_bar( + args, + iterator, + epoch: Optional[int] = None, + prefix: Optional[str] = None, + default: str = "tqdm", + no_progress_bar: str = "none", +): + """Legacy wrapper that takes an argparse.Namespace.""" + if getattr(args, "no_progress_bar", False): + default = no_progress_bar + if getattr(args, "distributed_rank", 0) == 0: + tensorboard_logdir = getattr(args, "tensorboard_logdir", None) + else: + tensorboard_logdir = None + return progress_bar( + iterator, + log_format=args.log_format, + log_interval=args.log_interval, + epoch=epoch, + prefix=prefix, + tensorboard_logdir=tensorboard_logdir, + default_log_format=default, + ) + + +def format_stat(stat): + if isinstance(stat, Number): + stat = "{:g}".format(stat) + elif isinstance(stat, AverageMeter): + stat = "{:.3f}".format(stat.avg) + elif isinstance(stat, TimeMeter): + stat = "{:g}".format(round(stat.avg)) + elif isinstance(stat, StopwatchMeter): + stat = "{:g}".format(round(stat.sum)) + elif torch.is_tensor(stat): + stat = stat.tolist() + return stat + + +class BaseProgressBar(object): + """Abstract class for progress bars.""" + + def __init__(self, iterable, epoch=None, prefix=None): + self.iterable = iterable + self.n = getattr(iterable, "n", 0) + self.epoch = epoch + self.prefix = "" + if epoch is not None: + self.prefix += "epoch {:03d}".format(epoch) + if prefix is not None: + self.prefix += (" | " if self.prefix != "" else "") + prefix + + def __len__(self): + return len(self.iterable) + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + def __iter__(self): + raise NotImplementedError + + def log(self, stats, tag=None, step=None): + """Log intermediate stats according to log_interval.""" + raise NotImplementedError + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + raise NotImplementedError + + def update_config(self, config): + """Log latest configuration.""" + pass + + def _str_commas(self, stats): + return ", ".join(key + "=" + stats[key].strip() for key in stats.keys()) + + def _str_pipes(self, stats): + return " | ".join(key + " " + stats[key].strip() for key in stats.keys()) + + def _format_stats(self, stats): + postfix = OrderedDict(stats) + # Preprocess stats according to datatype + for key in postfix.keys(): + postfix[key] = str(format_stat(postfix[key])) + return postfix + + +@contextmanager +def rename_logger(logger, new_name): + old_name = logger.name + if new_name is not None: + logger.name = new_name + yield logger + logger.name = old_name + + +class JsonProgressBar(BaseProgressBar): + """Log output in JSON format.""" + + def __init__(self, iterable, epoch=None, prefix=None, log_interval=1000): + super().__init__(iterable, epoch, prefix) + self.log_interval = log_interval + self.i = None + self.size = None + + def __iter__(self): + self.size = len(self.iterable) + for i, obj in enumerate(self.iterable, start=self.n): + self.i = i + yield obj + + def log(self, stats, tag=None, step=None): + """Log intermediate stats according to log_interval.""" + step = step or self.i or 0 + if step > 0 and self.log_interval is not None and step % self.log_interval == 0: + update = ( + self.epoch - 1 + (self.i + 1) / float(self.size) + if self.epoch is not None + else None + ) + stats = self._format_stats(stats, epoch=self.epoch, update=update) + with rename_logger(logger, tag): + logger.info(json.dumps(stats)) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + self.stats = stats + if tag is not None: + self.stats = OrderedDict( + [(tag + "_" + k, v) for k, v in self.stats.items()] + ) + stats = self._format_stats(self.stats, epoch=self.epoch) + with rename_logger(logger, tag): + logger.info(json.dumps(stats)) + + def _format_stats(self, stats, epoch=None, update=None): + postfix = OrderedDict() + if epoch is not None: + postfix["epoch"] = epoch + if update is not None: + postfix["update"] = round(update, 3) + # Preprocess stats according to datatype + for key in stats.keys(): + postfix[key] = format_stat(stats[key]) + return postfix + + +class NoopProgressBar(BaseProgressBar): + """No logging.""" + + def __init__(self, iterable, epoch=None, prefix=None): + super().__init__(iterable, epoch, prefix) + + def __iter__(self): + for obj in self.iterable: + yield obj + + def log(self, stats, tag=None, step=None): + """Log intermediate stats according to log_interval.""" + pass + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + pass + + +class SimpleProgressBar(BaseProgressBar): + """A minimal logger for non-TTY environments.""" + + def __init__(self, iterable, epoch=None, prefix=None, log_interval=1000): + super().__init__(iterable, epoch, prefix) + self.log_interval = log_interval + self.i = None + self.size = None + + def __iter__(self): + self.size = len(self.iterable) + for i, obj in enumerate(self.iterable, start=self.n): + self.i = i + yield obj + + def log(self, stats, tag=None, step=None): + """Log intermediate stats according to log_interval.""" + step = step or self.i or 0 + if step > 0 and self.log_interval is not None and step % self.log_interval == 0: + stats = self._format_stats(stats) + postfix = self._str_commas(stats) + with rename_logger(logger, tag): + logger.info( + "{}: {:5d} / {:d} {}".format( + self.prefix, self.i + 1, self.size, postfix + ) + ) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + postfix = self._str_pipes(self._format_stats(stats)) + with rename_logger(logger, tag): + logger.info("{} | {}".format(self.prefix, postfix)) + + +class TqdmProgressBar(BaseProgressBar): + """Log to tqdm.""" + + def __init__(self, iterable, epoch=None, prefix=None): + super().__init__(iterable, epoch, prefix) + from tqdm import tqdm + + self.tqdm = tqdm( + iterable, + self.prefix, + leave=False, + disable=(logger.getEffectiveLevel() > logging.INFO), + ) + + def __iter__(self): + return iter(self.tqdm) + + def log(self, stats, tag=None, step=None): + """Log intermediate stats according to log_interval.""" + self.tqdm.set_postfix(self._format_stats(stats), refresh=False) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + postfix = self._str_pipes(self._format_stats(stats)) + with rename_logger(logger, tag): + logger.info("{} | {}".format(self.prefix, postfix)) + + +try: + import functools + + from aim import Repo as AimRepo + + @functools.lru_cache() + def get_aim_run(repo, run_hash): + from aim import Run + + return Run(run_hash=run_hash, repo=repo) + +except ImportError: + get_aim_run = None + AimRepo = None + + +class AimProgressBarWrapper(BaseProgressBar): + """Log to Aim.""" + + def __init__(self, wrapped_bar, aim_repo, aim_run_hash, aim_param_checkpoint_dir): + self.wrapped_bar = wrapped_bar + + if get_aim_run is None: + self.run = None + logger.warning("Aim not found, please install with: pip install aim") + else: + logger.info(f"Storing logs at Aim repo: {aim_repo}") + + if not aim_run_hash: + # Find run based on save_dir parameter + query = f"run.checkpoint.save_dir == '{aim_param_checkpoint_dir}'" + try: + runs_generator = AimRepo(aim_repo).query_runs(query) + run = next(runs_generator.iter_runs()) + aim_run_hash = run.run.hash + except Exception: + pass + + if aim_run_hash: + logger.info(f"Appending to run: {aim_run_hash}") + + self.run = get_aim_run(aim_repo, aim_run_hash) + + def __iter__(self): + return iter(self.wrapped_bar) + + def log(self, stats, tag=None, step=None): + """Log intermediate stats to Aim.""" + self._log_to_aim(stats, tag, step) + self.wrapped_bar.log(stats, tag=tag, step=step) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + self._log_to_aim(stats, tag, step) + self.wrapped_bar.print(stats, tag=tag, step=step) + + def update_config(self, config): + """Log latest configuration.""" + if self.run is not None: + for key in config: + self.run.set(key, config[key], strict=False) + self.wrapped_bar.update_config(config) + + def _log_to_aim(self, stats, tag=None, step=None): + if self.run is None: + return + + if step is None: + step = stats["num_updates"] + + if "train" in tag: + context = {"tag": tag, "subset": "train"} + elif "val" in tag: + context = {"tag": tag, "subset": "val"} + else: + context = {"tag": tag} + + for key in stats.keys() - {"num_updates"}: + self.run.track(stats[key], name=key, step=step, context=context) + + +try: + _tensorboard_writers = {} + from torch.utils.tensorboard import SummaryWriter +except ImportError: + try: + from tensorboardX import SummaryWriter + except ImportError: + SummaryWriter = None + + +def _close_writers(): + for w in _tensorboard_writers.values(): + w.close() + + +atexit.register(_close_writers) + + +class TensorboardProgressBarWrapper(BaseProgressBar): + """Log to tensorboard.""" + + def __init__(self, wrapped_bar, tensorboard_logdir): + self.wrapped_bar = wrapped_bar + self.tensorboard_logdir = tensorboard_logdir + + if SummaryWriter is None: + logger.warning( + "tensorboard not found, please install with: pip install tensorboard" + ) + + def _writer(self, key): + if SummaryWriter is None: + return None + _writers = _tensorboard_writers + if key not in _writers: + _writers[key] = SummaryWriter(os.path.join(self.tensorboard_logdir, key)) + _writers[key].add_text("sys.argv", " ".join(sys.argv)) + return _writers[key] + + def __iter__(self): + return iter(self.wrapped_bar) + + def log(self, stats, tag=None, step=None): + """Log intermediate stats to tensorboard.""" + self._log_to_tensorboard(stats, tag, step) + self.wrapped_bar.log(stats, tag=tag, step=step) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + self._log_to_tensorboard(stats, tag, step) + self.wrapped_bar.print(stats, tag=tag, step=step) + + def update_config(self, config): + """Log latest configuration.""" + # TODO add hparams to Tensorboard + self.wrapped_bar.update_config(config) + + def _log_to_tensorboard(self, stats, tag=None, step=None): + writer = self._writer(tag or "") + if writer is None: + return + if step is None: + step = stats["num_updates"] + for key in stats.keys() - {"num_updates"}: + if isinstance(stats[key], AverageMeter): + writer.add_scalar(key, stats[key].val, step) + elif isinstance(stats[key], Number): + writer.add_scalar(key, stats[key], step) + elif torch.is_tensor(stats[key]) and stats[key].numel() == 1: + writer.add_scalar(key, stats[key].item(), step) + writer.flush() + + +try: + import wandb +except ImportError: + wandb = None + + +class WandBProgressBarWrapper(BaseProgressBar): + """Log to Weights & Biases.""" + + def __init__(self, wrapped_bar, wandb_project, run_name=None): + self.wrapped_bar = wrapped_bar + if wandb is None: + logger.warning("wandb not found, pip install wandb") + return + + # reinit=False to ensure if wandb.init() is called multiple times + # within one process it still references the same run + wandb.init(project=wandb_project, reinit=False, name=run_name) + + def __iter__(self): + return iter(self.wrapped_bar) + + def log(self, stats, tag=None, step=None): + """Log intermediate stats to tensorboard.""" + self._log_to_wandb(stats, tag, step) + self.wrapped_bar.log(stats, tag=tag, step=step) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats.""" + self._log_to_wandb(stats, tag, step) + self.wrapped_bar.print(stats, tag=tag, step=step) + + def update_config(self, config): + """Log latest configuration.""" + if wandb is not None: + wandb.config.update(config) + self.wrapped_bar.update_config(config) + + def _log_to_wandb(self, stats, tag=None, step=None): + if wandb is None: + return + if step is None: + step = stats["num_updates"] + + prefix = "" if tag is None else tag + "/" + + for key in stats.keys() - {"num_updates"}: + if isinstance(stats[key], AverageMeter): + wandb.log({prefix + key: stats[key].val}, step=step) + elif isinstance(stats[key], Number): + wandb.log({prefix + key: stats[key]}, step=step) + + +try: + from azureml.core import Run +except ImportError: + Run = None + + +class AzureMLProgressBarWrapper(BaseProgressBar): + """Log to Azure ML""" + + def __init__(self, wrapped_bar): + self.wrapped_bar = wrapped_bar + if Run is None: + logger.warning("azureml.core not found, pip install azureml-core") + return + self.run = Run.get_context() + + def __exit__(self, *exc): + if Run is not None: + self.run.complete() + return False + + def __iter__(self): + return iter(self.wrapped_bar) + + def log(self, stats, tag=None, step=None): + """Log intermediate stats to AzureML""" + self._log_to_azureml(stats, tag, step) + self.wrapped_bar.log(stats, tag=tag, step=step) + + def print(self, stats, tag=None, step=None): + """Print end-of-epoch stats""" + self._log_to_azureml(stats, tag, step) + self.wrapped_bar.print(stats, tag=tag, step=step) + + def update_config(self, config): + """Log latest configuration.""" + self.wrapped_bar.update_config(config) + + def _log_to_azureml(self, stats, tag=None, step=None): + if Run is None: + return + if step is None: + step = stats["num_updates"] + + prefix = "" if tag is None else tag + "/" + + for key in stats.keys() - {"num_updates"}: + name = prefix + key + if isinstance(stats[key], AverageMeter): + self.run.log_row(name=name, **{"step": step, key: stats[key].val}) + elif isinstance(stats[key], Number): + self.run.log_row(name=name, **{"step": step, key: stats[key]}) diff --git a/fairseq/fairseq/model_parallel/__init__.py b/fairseq/fairseq/model_parallel/__init__.py new file mode 100644 index 0000000..69f2168 --- /dev/null +++ b/fairseq/fairseq/model_parallel/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import criterions, models, modules # noqa diff --git a/fairseq/fairseq/model_parallel/criterions/__init__.py b/fairseq/fairseq/model_parallel/criterions/__init__.py new file mode 100644 index 0000000..5fae7bd --- /dev/null +++ b/fairseq/fairseq/model_parallel/criterions/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +# automatically import any Python files in the criterions/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + module = file[: file.find(".py")] + importlib.import_module("fairseq.model_parallel.criterions." + module) diff --git a/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py b/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py new file mode 100644 index 0000000..5ffbaa8 --- /dev/null +++ b/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +from fairseq import utils +from fairseq.logging import metrics +from fairseq.criterions import FairseqCriterion, register_criterion + + +try: + from fairseq.model_parallel.megatron.mpu.cross_entropy import ( + vocab_parallel_cross_entropy, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +@register_criterion("vocab_parallel_cross_entropy") +class VocabParallelCrossEntropyCriterion(FairseqCriterion): + def __init__(self, task, sentence_avg): + super().__init__(task) + self.sentence_avg = sentence_avg + if not has_megatron_submodule: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + target = sample["target"] + + loss = vocab_parallel_cross_entropy(net_output[0].float(), target) + loss = (loss * (target != self.padding_idx)).sum() + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + logging_output = { + "loss": utils.item(loss.data) if reduce else loss.data, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/fairseq/fairseq/model_parallel/megatron_trainer.py b/fairseq/fairseq/model_parallel/megatron_trainer.py new file mode 100644 index 0000000..aedf608 --- /dev/null +++ b/fairseq/fairseq/model_parallel/megatron_trainer.py @@ -0,0 +1,75 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Train a network across multiple GPUs. +""" + +from fairseq.dataclass.configs import FairseqConfig +from fairseq.distributed import utils as distributed_utils +from fairseq.trainer import Trainer + +try: + from fairseq.model_parallel.megatron.mpu import ( + get_data_parallel_rank, + get_data_parallel_world_size, + get_model_parallel_src_rank, + get_cuda_rng_tracker, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +class MegatronTrainer(Trainer): + """Main class for model parallel with data parallel training.""" + + def __init__(self, cfg: FairseqConfig, task, model, criterion, **kwargs): + if not has_megatron_submodule: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + super().__init__(cfg, task, model, criterion, **kwargs) + + def clip_grad_norm(self, clip_norm): + def _aggregate_model_parallel_grad_norm(total_norm): + total_norm = total_norm**2 + distributed_utils.all_reduce( + total_norm, group=distributed_utils.get_model_parallel_group() + ) + total_norm = total_norm**0.5 + return total_norm + + return self.optimizer.clip_grad_norm( + clip_norm, + aggregate_norm_fn=_aggregate_model_parallel_grad_norm, + ) + + def save_checkpoint(self, filename, extra_state): + """Save all training state in a checkpoint file.""" + extra_state["rng_tracker_states"] = get_cuda_rng_tracker().get_states() + super().save_checkpoint(filename, extra_state) + + def load_checkpoint( + self, + filename, + reset_optimizer=False, + reset_lr_scheduler=False, + optimizer_overrides=None, + reset_meters=False, + ): + extra_state = super().load_checkpoint( + filename, + reset_optimizer=reset_optimizer, + reset_lr_scheduler=reset_lr_scheduler, + optimizer_overrides=optimizer_overrides, + reset_meters=reset_meters, + ) + if extra_state is not None and "rng_tracker_states" in extra_state: + get_cuda_rng_tracker().set_states(extra_state["rng_tracker_states"]) + return extra_state diff --git a/fairseq/fairseq/model_parallel/models/__init__.py b/fairseq/fairseq/model_parallel/models/__init__.py new file mode 100644 index 0000000..3532479 --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +# automatically import any Python files in the models/ directory +models_dir = os.path.dirname(__file__) +for file in os.listdir(models_dir): + path = os.path.join(models_dir, file) + if ( + not file.startswith("_") + and not file.startswith(".") + and (file.endswith(".py") or os.path.isdir(path)) + ): + model_name = file[: file.find(".py")] if file.endswith(".py") else file + module = importlib.import_module("fairseq.model_parallel.models." + model_name) diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py new file mode 100644 index 0000000..117827c --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .model import * # noqa diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py new file mode 100644 index 0000000..85dbd44 --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py @@ -0,0 +1,600 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from collections import namedtuple + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import options, utils +from fairseq.modules import ( + AdaptiveSoftmax, + LayerNorm, + MultiheadAttention, + PositionalEmbedding, +) + +EncoderOut = namedtuple( + "TransformerEncoderOut", + [ + "encoder_out", # T x B x C + "encoder_padding_mask", # B x T + "encoder_embedding", # B x T x C + "encoder_states", # List[T x B x C] + ], +) + + +class TransformerEncoderEmbedding(nn.Module): + """Encoder Embedding + Positional Embedding""" + + def __init__(self, args, embed_tokens): + super().__init__() + self.dropout = args.dropout + self.max_source_positions = args.max_source_positions + self.embed_tokens = embed_tokens + if isinstance(embed_tokens, nn.ModuleList): + self.padding_idx = embed_tokens[0].padding_idx + embed_dim = sum(e.embedding_dim for e in embed_tokens) + else: + self.padding_idx = embed_tokens.padding_idx + embed_dim = embed_tokens.embedding_dim + self.embed_scale = math.sqrt(embed_dim) + self.embed_positions = ( + PositionalEmbedding( + args.max_source_positions, + embed_dim, + self.padding_idx, + learned=args.encoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + if getattr(args, "layernorm_embedding", False): + self.layernorm_embedding = LayerNorm(embed_dim) + else: + self.layernorm_embedding = None + + def forward(self, input): + # embed tokens and positions + src_tokens = input[0] + prev_output_tokens = input[2] + if isinstance(self.embed_tokens, nn.ModuleList): + x_embed_list = [] + for embed_tokens_part in self.embed_tokens: + x_embed_list.append(embed_tokens_part(src_tokens)) + + embedded = torch.cat(x_embed_list, dim=-1) + else: + embedded = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * embedded + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding: + x = self.layernorm_embedding(x) + x = F.dropout(x, p=self.dropout, training=self.training) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + return (x, encoder_padding_mask, prev_output_tokens) + + +class TransformerEncoderLayerNorm(nn.Module): + """ + Layer norm at the the end of all encoder layers if + args.encoder_enormalize_before = True + """ + + def __init__(self, args, embed_dim): + super().__init__() + if args.encoder_normalize_before: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward(self, input): + x = input[0] + encoder_padding_mask = input[1] + prev_output_tokens = input[2] + if self.layer_norm: + x = self.layer_norm(x) + # keeping track of the incremental_state is not supported yet + return (x, encoder_padding_mask, prev_output_tokens) + + +class TransformerDecoderEmbedding(nn.Module): + """Decoder Embedding + Positional Embedding""" + + def __init__(self, args, embed_tokens): + super().__init__() + self.dropout = args.dropout + self.share_input_output_embed = args.share_decoder_input_output_embed + input_embed_dim = ( + sum(e.embedding_dim for e in embed_tokens) + if isinstance(embed_tokens, nn.ModuleList) + else embed_tokens.embedding_dim + ) + embed_dim = args.decoder_embed_dim + self.output_embed_dim = args.decoder_output_dim + + padding_idx = ( + embed_tokens[0].padding_idx + if isinstance(embed_tokens, nn.ModuleList) + else embed_tokens.padding_idx + ) + self.max_target_positions = args.max_target_positions + + self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + args.max_target_positions, + embed_dim, + padding_idx, + learned=args.decoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + + def forward(self, input): + mt_task = False + if isinstance(input, tuple): + if len(input) == 3: + encoder_out = input[0] + encoder_padding_mask = input[1] + prev_output_tokens = input[2] + incremental_state = None # Hardcoding to avoid passing of None objects + mt_task = True + else: + # HACK for now, need to fix (TODO sidgoyal) + prev_output_tokens = input[0] + # discard "src_lengths" + encoder_out = None + encoder_padding_mask = None + incremental_state = None + + else: + prev_output_tokens = input + encoder_out = None + encoder_padding_mask = None + incremental_state = None + + positions = ( + self.embed_positions( + prev_output_tokens, + incremental_state=incremental_state, + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + + if isinstance(self.embed_tokens, nn.ModuleList): + x_embed_list = [] + for embed_tokens_part in self.embed_tokens: + x_embed_list.append(embed_tokens_part(prev_output_tokens)) + + x = self.embed_scale * torch.cat(x_embed_list, dim=-1) + else: + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if mt_task: + return (x, encoder_out, encoder_padding_mask) + return x + + +class TransformerDecoderOutputLayer(nn.Module): + def __init__(self, args, embed_tokens, dictionary): + super().__init__() + self.share_input_output_embed = args.share_decoder_input_output_embed + self.embed_tokens = embed_tokens + self.output_embed_dim = args.decoder_output_dim + embed_dim = args.decoder_embed_dim + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not args.tie_adaptive_weights + else None + ) + self.adaptive_softmax = None + if args.adaptive_softmax_cutoff is not None: + assert not isinstance(embed_tokens, nn.ModuleList) + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + options.eval_str_list(args.adaptive_softmax_cutoff, type=int), + dropout=args.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, + factor=args.adaptive_softmax_factor, + tie_proj=args.tie_adaptive_proj, + ) + elif not self.share_input_output_embed: + self.embed_tokens = nn.Parameter( + torch.Tensor(len(dictionary), self.output_embed_dim) + ) + nn.init.normal_( + self.embed_tokens, mean=0, std=self.output_embed_dim**-0.5 + ) + + if args.decoder_normalize_before and not getattr( + args, "no_decoder_final_norm", False + ): + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward(self, input, apply_final_proj=True): + if isinstance(input, tuple): + x = input[0] + else: + x = input + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + if apply_final_proj: + x = self.output_layer(x) + return x + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + if self.share_input_output_embed: + if isinstance(self.embed_tokens, nn.ModuleList): + output = None + for i, emb in enumerate(self.embed_tokens): + sidx = i * emb.embedding_dim + eidx = (i + 1) * emb.embedding_dim + if output is None: + output = F.linear(features[:, :, sidx:eidx], emb.weight) + else: + output += F.linear(features[:, :, sidx:eidx], emb.weight) + + return output + else: + return F.linear(features, self.embed_tokens.weight) + else: + return F.linear(features, self.embed_tokens) + else: + return features + + +class TransformerEncoderLayer(nn.Module): + """Encoder layer block. + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.encoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, args): + super().__init__() + self.embed_dim = args.encoder_embed_dim + self.self_attn = MultiheadAttention( + self.embed_dim, + args.encoder_attention_heads, + dropout=args.attention_dropout, + self_attention=True, + ) + self.self_attn_layer_norm = LayerNorm(self.embed_dim) + self.dropout = args.dropout + self.activation_fn = utils.get_activation_fn( + activation=getattr(args, "activation_fn", "relu") + ) + self.activation_dropout = getattr(args, "activation_dropout", 0) + if self.activation_dropout == 0: + # for backwards compatibility with models that use args.relu_dropout + self.activation_dropout = getattr(args, "relu_dropout", 0) + self.normalize_before = args.encoder_normalize_before + self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim) + self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim) + self.final_layer_norm = LayerNorm(self.embed_dim) + + def upgrade_state_dict_named(self, state_dict, name): + """ + Rename layer norm states from `...layer_norms.0.weight` to + `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to + `...final_layer_norm.weight` + """ + layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"} + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layer_norms.{}.{}".format(name, old, m) + if k in state_dict: + state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k] + del state_dict[k] + + def forward(self, input): + """ + Args: + input (Tuple): + input[0] (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + input[1] (ByteTensor/FloatTensor): encoder padding mask - + binary ByteTensor of shape `(batch, src_len)` where padding elements + are indicated by ``1``. + input[2] (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing) + Returns: + output (Tuple): + output[0] (Tensor): encoded output of shape `(batch, src_len, embed_dim)` + output[1] (ByteTensor/FloatTensor): encoder padding mask + output[2] (LongTensor): previous decoder outputs + """ + x = input[0] + encoder_padding_mask = input[1] + prev_output_tokens = input[2] + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + x, _ = self.self_attn( + query=x, key=x, value=x, key_padding_mask=encoder_padding_mask + ) + x = F.dropout(x, p=self.dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.activation_dropout, training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + return (x, encoder_padding_mask, prev_output_tokens) + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + +class TransformerDecoderLayer(nn.Module): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.decoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False + ): + super().__init__() + self.embed_dim = args.decoder_embed_dim + self.self_attn = MultiheadAttention( + embed_dim=self.embed_dim, + num_heads=args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=True, + ) + self.dropout = args.dropout + self.activation_fn = utils.get_activation_fn( + activation=getattr(args, "activation_fn", "relu") + ) + self.activation_dropout = getattr(args, "activation_dropout", 0) + if self.activation_dropout == 0: + # for backwards compatibility with models that use args.relu_dropout + self.activation_dropout = getattr(args, "relu_dropout", 0) + self.normalize_before = args.decoder_normalize_before + + # use layerNorm rather than FusedLayerNorm for exporting. + # char_inputs can be used to determint this. + # TODO remove this once we update apex with the fix + export = getattr(args, "char_inputs", False) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = MultiheadAttention( + self.embed_dim, + args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + ) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim) + self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=export) + self.need_attn = True + + self.onnx_trace = False + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def forward(self, input): + """ + Args: + input (Tuple): + input[0] (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + input[1] (Tensor): encoder output of shape `(batch, src_len, embed_dim)` + input[2] (ByteTensor/FloatTensor): encoder padding mask - + binary ByteTensor of shape `(batch, src_len)` where padding elements + are indicated by ``1``. + Returns: + output (Tuple): + output[0] (Tensor): encoded output of shape `(batch, src_len, embed_dim)` + output[1] (ByteTensor/FloatTensor): encoder padding mask + output[2] (LongTensor): previous decoder outputs + """ + # Note: incremental state is not yet supported + mt_task = False + if isinstance(input, tuple): + x = input[0] + encoder_out = input[1] + encoder_padding_mask = input[2] + incremental_state = None + mt_task = True + else: + x = input + encoder_out = None + encoder_padding_mask = None + incremental_state = None + + if incremental_state is None: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + # TODO: add back prev_self_attn_state, prev_attn_state, + # self_attn_padding_mask + prev_self_attn_state = None + prev_attn_state = None + self_attn_padding_mask = None + + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + if prev_self_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_self_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.self_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = F.dropout(x, p=self.dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + if self.encoder_attn is not None: + residual = x + x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True) + if prev_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=(not self.training and self.need_attn), + ) + x = F.dropout(x, p=self.dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.activation_dropout, training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + + if mt_task: + return (x, encoder_out, encoder_padding_mask) + return x + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + if self._future_mask.size(0) < dim: + self._future_mask = torch.triu( + utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + def make_generation_fast_(self, need_attn=False, **kwargs): + self.need_attn = need_attn + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py new file mode 100644 index 0000000..7873ac6 --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py @@ -0,0 +1,779 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.model_parallel.models.pipeline_parallel_transformer.layers import ( + Embedding, + TransformerDecoderEmbedding, + TransformerDecoderLayer, + TransformerDecoderOutputLayer, + TransformerEncoderEmbedding, + TransformerEncoderLayer, + TransformerEncoderLayerNorm, +) +from fairseq.models import ( + BaseFairseqModel, + FairseqDecoder, + FairseqEncoder, + register_model, + register_model_architecture, +) +from fairseq.models.fairseq_encoder import EncoderOut +from fairseq.models.transformer import ( + base_architecture, + transformer_iwslt_de_en, + transformer_wmt_en_de_big, +) +from fairseq.modules import SinusoidalPositionalEmbedding + + +logger = logging.getLogger(__name__) + + +DEFAULT_MAX_SOURCE_POSITIONS = 1024 +DEFAULT_MAX_TARGET_POSITIONS = 1024 +TORCH_PIPE = False +RPC_INIT = False + + +def import_pipe(): + global TORCH_PIPE + global RPC_INIT + try: + from torch.distributed.pipeline.sync import Pipe # noqa + + global Pipe + from torch.distributed.pipeline.sync.utils import partition_model + + global partition_model + from torch.distributed import rpc + import tempfile + + TORCH_PIPE = True + # Initialize single process RPC agent since TORCH_PIPE requires + # RRef. RRef depends on RPC being initialized and as a result we initialize + # RPC with a single node. + tmpfile = tempfile.NamedTemporaryFile() + if not RPC_INIT: + rpc.init_rpc( + name="worker", + rank=0, + world_size=1, + rpc_backend_options=rpc.TensorPipeRpcBackendOptions( + init_method="file://{}".format(tmpfile.name), + ), + ) + RPC_INIT = True + logger.info("Using torch pipe") + except ImportError: + try: + from fairscale.nn import Pipe # noqa + + logger.info("Using fairscale pipe") + except ImportError: + raise ImportError("Please install fairscale with: pip install fairscale") + + +@register_model("pipeline_parallel_transformer") +class PipelineParallelTransformerModel(BaseFairseqModel): + def __init__(self, encoder, decoder, balance, devices, chunks, checkpoint): + import_pipe() + super().__init__() + assert isinstance(encoder, FairseqEncoder) + assert isinstance(decoder, FairseqDecoder) + encoder_module_list = ( + [encoder.embedding_layer] + + list(encoder.encoder_layers) + + [encoder.final_layer_norm] + ) + self.num_encoder_modules = len(encoder_module_list) + decoder_module_list = ( + [decoder.embedding_layer] + + list(decoder.decoder_layers) + + [decoder.decoder_output_layer] + ) + self.num_decoder_modules = len(decoder_module_list) + module_list = encoder_module_list + decoder_module_list + self.devices = devices + if TORCH_PIPE: + self.model = Pipe( + partition_model(nn.Sequential(*module_list), balance, devices), + chunks=chunks, + checkpoint=checkpoint, + ) + else: + self.model = Pipe( + nn.Sequential(*module_list), + balance=balance, + devices=devices, + chunks=chunks, + checkpoint=checkpoint, + ) + self.encoder_max_positions = self.max_positions_helper( + encoder.embedding_layer, "max_source_positions" + ) + self.decoder_max_positions = self.max_positions_helper( + decoder.embedding_layer, "max_target_positions" + ) + self.adaptive_softmax = getattr(decoder, "adaptive_softmax", None) + # Note: To be populated during inference + self.encoder = None + self.decoder = None + + def forward(self, src_tokens, src_lengths, prev_output_tokens): + if self.training: + input_lst = [src_tokens, src_lengths, prev_output_tokens] + input = tuple(i.to(self.devices[0], non_blocking=True) for i in input_lst) + if TORCH_PIPE: + return self.model(input).local_value() + else: + return self.model(input) + else: + assert self.encoder is not None and self.decoder is not None, ( + "encoder and decoder need to be initialized by " + + "calling the `prepare_for_inference_()` method" + ) + encoder_output_tuple = self.encoder(input) + return self.decoder(encoder_output_tuple) + + def prepare_for_inference_(self, cfg): + if self.encoder is not None and self.decoder is not None: + logger.info("Encoder and Decoder already initialized") + return + encoder_module_list = [] + decoder_module_list = [] + module_count = 0 + for partition in self.model.partitions: + for module in partition: + if module_count < self.num_encoder_modules: + encoder_module_list.append(module) + else: + decoder_module_list.append(module) + module_count += 1 + self.model = None + self.encoder = TransformerEncoder( + cfg.distributed_training, None, None, encoder_module_list + ) + self.decoder = TransformerDecoder( + cfg.distributed_training, + None, + None, + decoder_module_list=decoder_module_list, + ) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--activation-fn', + choices=utils.get_available_activation_fns(), + help='activation function to use') + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability') + parser.add_argument('--attention-dropout', type=float, metavar='D', + help='dropout probability for attention weights') + parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D', + help='dropout probability after activation in FFN.') + parser.add_argument('--encoder-embed-path', type=str, metavar='STR', + help='path to pre-trained encoder embedding') + parser.add_argument('--encoder-embed-dim', type=int, metavar='N', + help='encoder embedding dimension') + parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N', + help='encoder embedding dimension for FFN') + parser.add_argument('--encoder-layers', type=int, metavar='N', + help='num encoder layers') + parser.add_argument('--encoder-attention-heads', type=int, metavar='N', + help='num encoder attention heads') + parser.add_argument('--encoder-normalize-before', action='store_true', + help='apply layernorm before each encoder block') + parser.add_argument('--encoder-learned-pos', action='store_true', + help='use learned positional embeddings in the encoder') + parser.add_argument('--decoder-embed-path', type=str, metavar='STR', + help='path to pre-trained decoder embedding') + parser.add_argument('--decoder-embed-dim', type=int, metavar='N', + help='decoder embedding dimension') + parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N', + help='decoder embedding dimension for FFN') + parser.add_argument('--decoder-layers', type=int, metavar='N', + help='num decoder layers') + parser.add_argument('--decoder-attention-heads', type=int, metavar='N', + help='num decoder attention heads') + parser.add_argument('--decoder-learned-pos', action='store_true', + help='use learned positional embeddings in the decoder') + parser.add_argument('--decoder-normalize-before', action='store_true', + help='apply layernorm before each decoder block') + parser.add_argument('--share-decoder-input-output-embed', action='store_true', + help='share decoder input and output embeddings') + parser.add_argument('--share-all-embeddings', action='store_true', + help='share encoder, decoder and output embeddings' + ' (requires shared dictionary and embed dim)') + parser.add_argument('--no-token-positional-embeddings', default=False, action='store_true', + help='if set, disables positional embeddings (outside self attention)') + parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR', + help='comma separated list of adaptive softmax cutoff points. ' + 'Must be used with adaptive_loss criterion'), + parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D', + help='sets adaptive softmax dropout for the tail projections') + parser.add_argument('--num-embedding-chunks', type=int, metavar='N', default=1, + help='Number of embedding layer chunks (enables more even distribution' + 'of optimizer states across data parallel nodes' + 'when using optimizer state sharding and' + 'a big embedding vocabulary)') + # fmt: on + + @classmethod + def build_model_base(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + if not hasattr(args, "max_source_positions"): + args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS + if not hasattr(args, "max_target_positions"): + args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + def build_embedding(dictionary, embed_dim, path=None, num_embed_chunks=1): + assert embed_dim % num_embed_chunks == 0, ( + f"Number of embedding chunks = {num_embed_chunks} should be " + + f"divisible by the embedding dimension = {embed_dim}" + ) + assert path is None or num_embed_chunks == 1, ( + "Loading embedding from a path with number of embedding chunks > 1" + + " is not yet supported" + ) + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + # if provided, load from preloaded dictionaries + if path: + emb = Embedding(num_embeddings, embed_dim, padding_idx) + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + else: + embed_chunk_dim = embed_dim // num_embed_chunks + emb = nn.ModuleList() + for i in range(num_embed_chunks): + emb.append(Embedding(num_embeddings, embed_chunk_dim, padding_idx)) + return emb + + num_embed_chunks = args.num_embedding_chunks + if args.share_all_embeddings: + if src_dict != tgt_dict: + raise ValueError("--share-all-embeddings requires a joined dictionary") + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + encoder_embed_tokens = build_embedding( + src_dict, + args.encoder_embed_dim, + args.encoder_embed_path, + num_embed_chunks, + ) + decoder_embed_tokens = encoder_embed_tokens + args.share_decoder_input_output_embed = True + else: + assert args.share_decoder_input_output_embed or num_embed_chunks == 1, ( + "Not sharing decoder I/O embeddings is not yet supported with number of " + + "embedding chunks > 1" + ) + encoder_embed_tokens = build_embedding( + src_dict, + args.encoder_embed_dim, + args.encoder_embed_path, + num_embed_chunks, + ) + decoder_embed_tokens = build_embedding( + tgt_dict, + args.decoder_embed_dim, + args.decoder_embed_path, + num_embed_chunks, + ) + + encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens) + decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens) + return (encoder, decoder) + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return TransformerEncoder(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return TransformerDecoder(args, tgt_dict, embed_tokens) + + @classmethod + def build_model(cls, args, task): + encoder, decoder = cls.build_model_base(args, task) + return PipelineParallelTransformerModel( + encoder=encoder, + decoder=decoder, + balance=utils.eval_str_list(args.pipeline_balance, type=int), + devices=utils.eval_str_list(args.pipeline_devices, type=int), + chunks=args.pipeline_chunks, + checkpoint=args.pipeline_checkpoint, + ) + + def output_layer(self, features, **kwargs): + """Project features to the default output size (typically vocabulary size).""" + return self.decoder.output_layer(features, **kwargs) + + def max_positions(self): + """Maximum length supported by the model.""" + return (self.encoder_max_positions, self.decoder_max_positions) + + def max_positions_helper( + self, embedding_layer, max_positions_field="max_source_positions" + ): + """Maximum input length supported by the encoder or decoder.""" + if embedding_layer.embed_positions is None: + return getattr(embedding_layer, max_positions_field) + return min( + getattr(embedding_layer, max_positions_field), + embedding_layer.embed_positions.max_positions, + ) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + + if hasattr(self, "adaptive_softmax") and self.adaptive_softmax is not None: + if sample is not None: + assert "target" in sample + target = sample["target"] + else: + target = None + out = self.adaptive_softmax.get_log_prob(net_output, target=target) + return out.exp_() if not log_probs else out + + # A Pipe() module returns a tuple of tensors as the output. + # In this case, the tuple has one element - the output tensor of logits + logits = net_output if isinstance(net_output, torch.Tensor) else net_output[0] + if log_probs: + return utils.log_softmax(logits, dim=-1, onnx_trace=False) + else: + return utils.softmax(logits, dim=-1, onnx_trace=False) + + def max_decoder_positions(self): + """Maximum length supported by the decoder.""" + return self.decoder_max_positions + + def load_state_dict(self, state_dict, strict=True, model_cfg=None): + """Copies parameters and buffers from *state_dict* into this module and + its descendants. + + Overrides the method in :class:`nn.Module`. Compared with that method + this additionally "upgrades" *state_dicts* from old checkpoints. + """ + self.upgrade_state_dict(state_dict) + is_regular_transformer = not any("model.partitions" in k for k in state_dict) + if is_regular_transformer: + state_dict = self.convert_to_pipeline_parallel_state_dict(state_dict) + return super().load_state_dict(state_dict, strict) + + def convert_to_pipeline_parallel_state_dict(self, state_dict): + new_state_dict = self.state_dict() + encoder_layer_idx = 0 + decoder_layer_idx = 0 + encoder_key_suffixes = [ + "self_attn.k_proj.weight", + "self_attn.k_proj.bias", + "self_attn.v_proj.weight", + "self_attn.v_proj.bias", + "self_attn.q_proj.weight", + "self_attn.q_proj.bias", + "self_attn.out_proj.weight", + "self_attn.out_proj.bias", + "self_attn_layer_norm.weight", + "self_attn_layer_norm.bias", + "fc1.weight", + "fc1.bias", + "fc2.weight", + "fc2.bias", + "final_layer_norm.weight", + "final_layer_norm.bias", + ] + decoder_key_suffixes = [ + "self_attn.k_proj.weight", + "self_attn.k_proj.bias", + "self_attn.v_proj.weight", + "self_attn.v_proj.bias", + "self_attn.q_proj.weight", + "self_attn.q_proj.bias", + "self_attn.out_proj.weight", + "self_attn.out_proj.bias", + "self_attn_layer_norm.weight", + "self_attn_layer_norm.bias", + "encoder_attn.k_proj.weight", + "encoder_attn.k_proj.bias", + "encoder_attn.v_proj.weight", + "encoder_attn.v_proj.bias", + "encoder_attn.q_proj.weight", + "encoder_attn.q_proj.bias", + "encoder_attn.out_proj.weight", + "encoder_attn.out_proj.bias", + "encoder_attn_layer_norm.weight", + "encoder_attn_layer_norm.bias", + "fc1.weight", + "fc1.bias", + "fc2.weight", + "fc2.bias", + "final_layer_norm.weight", + "final_layer_norm.bias", + ] + for pid, partition in enumerate(self.model.partitions): + logger.info(f"Begin Partition {pid}") + for mid, module in enumerate(partition): + # fmt: off + if isinstance(module, TransformerEncoderEmbedding): + new_state_dict[f'model.partitions.{pid}.{mid}.embed_tokens.weight'] = state_dict['encoder.embed_tokens.weight'] + if isinstance(module, TransformerEncoderLayer): + for suffix in encoder_key_suffixes: + new_state_dict[f'model.partitions.{pid}.{mid}.{suffix}'] = state_dict[f'encoder.layers.{encoder_layer_idx}.{suffix}'] + encoder_layer_idx += 1 + if isinstance(module, TransformerDecoderLayer): + for suffix in decoder_key_suffixes: + new_state_dict[f'model.partitions.{pid}.{mid}.{suffix}'] = state_dict[f'decoder.layers.{decoder_layer_idx}.{suffix}'] + decoder_layer_idx += 1 + if isinstance(module, TransformerEncoderLayerNorm): + if 'encoder.layer_norm.weight' in state_dict: + new_state_dict[f'model.partitions.{pid}.{mid}.layer_norm.weight'] = state_dict['encoder.layer_norm.weight'] + new_state_dict[f'model.partitions.{pid}.{mid}.layer_norm.bias'] = state_dict['encoder.layer_norm.bias'] + if isinstance(module, TransformerDecoderEmbedding): + new_state_dict[f'model.partitions.{pid}.{mid}.embed_tokens.weight'] = state_dict['decoder.embed_tokens.weight'] + if isinstance(module, TransformerDecoderOutputLayer): + new_state_dict[f'model.partitions.{pid}.{mid}.output_projection.weight'] = state_dict['decoder.output_projection.weight'] + # fmt: on + return new_state_dict + + +class TransformerEncoder(FairseqEncoder): + """ + Transformer encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, args, dictionary, embed_tokens, encoder_module_list=None): + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + import_pipe() + self.use_pipeline = encoder_module_list is not None + if not self.use_pipeline: + self.embedding_layer = TransformerEncoderEmbedding(args, embed_tokens) + self.encoder_layers = nn.Sequential( + *[TransformerEncoderLayer(args) for i in range(args.encoder_layers)] + ) + if isinstance(embed_tokens, nn.ModuleList): + emb_dim = sum(e.embedding_dim for e in embed_tokens) + else: + emb_dim = embed_tokens.embedding_dim + self.final_layer_norm = TransformerEncoderLayerNorm(args, emb_dim) + else: + encoder_balance = utils.eval_str_list( + args.pipeline_encoder_balance, type=int + ) + encoder_devices = utils.eval_str_list( + args.pipeline_encoder_devices, type=int + ) + assert sum(encoder_balance) == len(encoder_module_list), ( + f"Sum of encoder_balance={encoder_balance} is not equal " + + f"to num_encoder_modules={len(encoder_module_list)}" + ) + if TORCH_PIPE: + self.model = Pipe( + module=partition_model( + nn.Sequential(*encoder_module_list), + encoder_balance, + encoder_devices, + ), + chunks=args.pipeline_chunks, + checkpoint=args.pipeline_checkpoint, + ) + else: + self.model = Pipe( + module=nn.Sequential(*encoder_module_list), + balance=encoder_balance, + devices=encoder_devices, + chunks=args.pipeline_chunks, + checkpoint=args.pipeline_checkpoint, + ) + + def forward(self, src_tokens, src_lengths): + """ + Args: + input_tuple( + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + ) + + Returns: + output_tuple( + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - prev_output_tokens + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + ) + """ + dummy_prev_output_tokens = torch.zeros( + 1, dtype=src_tokens.dtype, device=src_tokens.device + ) + input_tuple = (src_tokens, src_lengths, dummy_prev_output_tokens) + if self.use_pipeline: + input_tuple = tuple(i.to(self.model.devices[0]) for i in input_tuple) + if TORCH_PIPE: + encoder_out = self.model(input_tuple).local_value() + else: + encoder_out = self.model(input_tuple) + else: + encoder_embed_output_tuple = self.embedding_layer(input_tuple) + encoder_layers_output = self.encoder_layers(encoder_embed_output_tuple) + encoder_out = self.final_layer_norm(encoder_layers_output) + # first element is the encoder output + # second element is the encoder padding mask + # the remaining elements of EncoderOut are not computed by + # the PipelineParallelTransformer + return EncoderOut(encoder_out[0], encoder_out[1], None, None, None, None) + + def reorder_encoder_out(self, encoder_out, new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if encoder_out.encoder_out is not None: + encoder_out = encoder_out._replace( + encoder_out=encoder_out.encoder_out.index_select(1, new_order) + ) + if encoder_out.encoder_padding_mask is not None: + encoder_out = encoder_out._replace( + encoder_padding_mask=encoder_out.encoder_padding_mask.index_select( + 0, new_order + ) + ) + if encoder_out.encoder_embedding is not None: + encoder_out = encoder_out._replace( + encoder_embedding=encoder_out.encoder_embedding.index_select( + 0, new_order + ) + ) + if encoder_out.encoder_states is not None: + for idx, state in enumerate(encoder_out.encoder_states): + encoder_out.encoder_states[idx] = state.index_select(1, new_order) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embedding_layer.embed_positions is None: + return self.embedding_layer.max_source_positions + return min( + self.embedding_layer.max_source_positions, + self.embedding_layer.embed_positions.max_positions, + ) + + +class TransformerDecoder(FairseqDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + decoder_module_list=None, + ): + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + import_pipe() + self.use_pipeline = decoder_module_list is not None + if not self.use_pipeline: + self.embedding_layer = TransformerDecoderEmbedding(args, embed_tokens) + self.decoder_layers = nn.Sequential( + *[ + TransformerDecoderLayer(args, no_encoder_attn) + for _ in range(args.decoder_layers) + ] + ) + self.decoder_output_layer = TransformerDecoderOutputLayer( + args, embed_tokens, dictionary + ) + else: + decoder_balance = utils.eval_str_list( + args.pipeline_decoder_balance, type=int + ) + decoder_devices = utils.eval_str_list( + args.pipeline_decoder_devices, type=int + ) + assert sum(decoder_balance) == len(decoder_module_list), ( + f"Sum of decoder_balance={decoder_balance} is not equal " + + f"to num_decoder_modules={len(decoder_module_list)}" + ) + if TORCH_PIPE: + self.model = Pipe( + module=partition_model( + nn.Sequential(*decoder_module_list), + decoder_balance, + decoder_devices, + ), + chunks=args.pipeline_chunks, + checkpoint=args.pipeline_checkpoint, + ) + else: + self.model = Pipe( + module=nn.Sequential(*decoder_module_list), + balance=decoder_balance, + devices=decoder_devices, + chunks=args.pipeline_chunks, + checkpoint=args.pipeline_checkpoint, + ) + + def forward( + self, + prev_output_tokens, + encoder_out=None, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + input_tuple = ( + encoder_out.encoder_out, + encoder_out.encoder_padding_mask, + prev_output_tokens, + ) + if self.use_pipeline: + input_tuple = tuple(i.to(self.model.devices[0]) for i in input_tuple) + if TORCH_PIPE: + return (self.model(input_tuple).local_value(),) + else: + return (self.model(input_tuple),) + else: + embed_layer_output = self.embedding_layer(input_tuple) + state = self.decoder_layers(embed_layer_output) + return (self.decoder_output_layer(state),) + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + if self.share_input_output_embed: + return F.linear(features, self.embed_tokens.weight) + else: + return F.linear(features, self.embed_out) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embedding_layer.embed_positions is None: + return self.embedding_layer.max_target_positions + return min( + self.embedding_layer.max_target_positions, + self.embedding_layer.embed_positions.max_positions, + ) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + for i in range(len(self.layers)): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +@register_model_architecture( + "pipeline_parallel_transformer", "transformer_iwslt_de_en_pipeline_parallel" +) +def transformer_iwslt_de_en_dist(args): + transformer_iwslt_de_en(args) + + +@register_model_architecture( + "pipeline_parallel_transformer", "transformer_wmt_en_de_big_pipeline_parallel" +) +def transformer_wmt_en_de_big_dist(args): + transformer_wmt_en_de_big(args) diff --git a/fairseq/fairseq/model_parallel/models/roberta/__init__.py b/fairseq/fairseq/model_parallel/models/roberta/__init__.py new file mode 100644 index 0000000..117827c --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/roberta/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .model import * # noqa diff --git a/fairseq/fairseq/model_parallel/models/roberta/model.py b/fairseq/fairseq/model_parallel/models/roberta/model.py new file mode 100644 index 0000000..77a80ef --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/roberta/model.py @@ -0,0 +1,225 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +RoBERTa: A Robustly Optimized BERT Pretraining Approach. +""" + +import logging + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.model_parallel.models.transformer import ModelParallelTransformerEncoder +from fairseq.models import register_model, register_model_architecture +from fairseq.models.roberta import ( + roberta_base_architecture, + roberta_prenorm_architecture, + RobertaEncoder, + RobertaModel, +) +from fairseq.modules import LayerNorm + + +try: + from fairseq.model_parallel.megatron.mpu import ( + copy_to_model_parallel_region, + gather_from_model_parallel_region, + ColumnParallelLinear, + VocabParallelEmbedding, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + +logger = logging.getLogger(__name__) + + +@register_model("model_parallel_roberta") +class ModelParallelRobertaModel(RobertaModel): + def __init__(self, args, encoder): + super().__init__(args, encoder) + + self.classification_heads = nn.ModuleDict() + + @staticmethod + def add_args(parser): + RobertaModel.add_args(parser) + parser.add_argument( + "--no-final-layer-norm", + action="store_true", + help=( + "don't add final layernorm (only applicable when " + "--encoder-normalize-before=True" + ), + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present + base_architecture(args) + + task.source_dictionary.pad_to_multiple_(args.model_parallel_size * 8) + task.target_dictionary.pad_to_multiple_(args.model_parallel_size * 8) + + if not hasattr(args, "max_positions"): + args.max_positions = args.tokens_per_sample + + if getattr(args, "untie_weights_roberta", False): + raise NotImplementedError( + "--untie-weights-roberta is not supported in model parallel mode" + ) + + encoder = ModelParallelRobertaEncoder(args, task.source_dictionary) + return cls(args, encoder) + + def forward( + self, + src_tokens, + features_only=False, + return_all_hiddens=False, + classification_head_name=None, + **kwargs + ): + if classification_head_name is not None: + features_only = True + + x, extra = self.encoder(src_tokens, features_only, return_all_hiddens, **kwargs) + + if classification_head_name is not None: + x = self.classification_heads[classification_head_name](x) + return x, extra + + def register_classification_head( + self, name, num_classes=None, inner_dim=None, **kwargs + ): + """Register a classification head.""" + if name in self.classification_heads: + prev_num_classes = self.classification_heads[name].out_proj.out_features + prev_inner_dim = self.classification_heads[name].dense.out_features + if num_classes != prev_num_classes or inner_dim != prev_inner_dim: + logger.warning( + 're-registering head "{}" with num_classes {} (prev: {}) ' + "and inner_dim {} (prev: {})".format( + name, num_classes, prev_num_classes, inner_dim, prev_inner_dim + ) + ) + self.classification_heads[name] = ModelParallelRobertaClassificationHead( + self.args.encoder_embed_dim, + inner_dim or self.args.encoder_embed_dim, + num_classes, + self.args.pooler_activation_fn, + self.args.pooler_dropout, + ) + + +class ModelParallelRobertaLMHead(nn.Module): + """Head for masked language modeling.""" + + def __init__(self, embed_dim, output_dim, activation_fn, weight=None): + super().__init__() + self.dense = ColumnParallelLinear(embed_dim, embed_dim, gather_output=True) + self.activation_fn = utils.get_activation_fn(activation_fn) + self.layer_norm = LayerNorm(embed_dim) + + if weight is None: + weight = nn.Linear(embed_dim, output_dim, bias=False).weight + self.weight = weight + self.bias = nn.Parameter(torch.zeros(output_dim)) + + def forward(self, features, masked_tokens=None, **kwargs): + # Only project the unmasked tokens while training, + # saves both memory and computation + if masked_tokens is not None: + features = features[masked_tokens, :] + + x = self.dense(features) + x = self.activation_fn(x) + x = self.layer_norm(x) + + x = copy_to_model_parallel_region(x) + # project back to size of vocabulary with bias + x = F.linear(x, self.weight) + x = gather_from_model_parallel_region(x).contiguous() + x = x + self.bias + return x + + +class ModelParallelRobertaClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__( + self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout + ): + super().__init__() + self.dense = ColumnParallelLinear(input_dim, inner_dim, gather_output=True) + self.activation_fn = utils.get_activation_fn(activation_fn) + self.dropout = nn.Dropout(p=pooler_dropout) + self.out_proj = nn.Linear(inner_dim, num_classes) + + def forward(self, features, **kwargs): + x = features[:, 0, :] # take <s> token (equiv. to [CLS]) + x = self.dropout(x) + x = self.dense(x) + x = self.activation_fn(x) + x = self.dropout(x) + x = self.out_proj(x) + return x + + +class ModelParallelRobertaEncoder(RobertaEncoder): + """RoBERTa encoder.""" + + def __init__(self, args, dictionary): + super().__init__(args, dictionary) + assert not self.args.untie_weights_roberta + + def build_embedding(self, vocab_size, embedding_dim, padding_idx): + return VocabParallelEmbedding(vocab_size, embedding_dim, padding_idx) + + def build_encoder(self, args, dictionary, embed_tokens): + return ModelParallelTransformerEncoder(args, dictionary, embed_tokens) + + def build_lm_head(self, embed_dim, output_dim, activation_fn, weight): + return ModelParallelRobertaLMHead(embed_dim, output_dim, activation_fn, weight) + + +@register_model_architecture("model_parallel_roberta", "model_parallel_roberta") +def base_architecture(args): + args.no_final_layer_norm = getattr(args, "no_final_layer_norm", False) + # model parallel RoBERTa defaults to "Pre-LN" formulation + roberta_prenorm_architecture(args) + + +# earlier versions of model parallel RoBERTa removed the final layer norm +@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_v1") +def model_parallel_roberta_v1_architecture(args): + args.no_final_layer_norm = getattr(args, "no_final_layer_norm", True) + base_architecture(args) + + +@register_model_architecture( + "model_parallel_roberta", "model_parallel_roberta_postnorm" +) +def model_parallel_roberta_postnorm_architecture(args): + # the original BERT/RoBERTa uses the "Post-LN" formulation + roberta_base_architecture(args) + + +@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_base") +def model_parallel_roberta_base_architecture(args): + base_architecture(args) + + +@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_large") +def model_parallel_roberta_large_architecture(args): + args.encoder_layers = getattr(args, "encoder_layers", 24) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + base_architecture(args) diff --git a/fairseq/fairseq/model_parallel/models/transformer.py b/fairseq/fairseq/model_parallel/models/transformer.py new file mode 100644 index 0000000..cf3b2e8 --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/transformer.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch.nn as nn + +from fairseq.model_parallel.modules import ( + ModelParallelTransformerDecoderLayer, + ModelParallelTransformerEncoderLayer, +) +from fairseq.models import register_model +from fairseq.models.transformer import ( + TransformerDecoder, + TransformerEncoder, + TransformerModel, +) + +try: + from fairseq.model_parallel.megatron.mpu import ( + VocabParallelEmbedding, + copy_to_model_parallel_region, + gather_from_model_parallel_region, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +logger = logging.getLogger(__name__) + + +@register_model("model_parallel_transformer") +class ModelParallelTransformerModel(TransformerModel): + """ + Model parallel Transformer model. + """ + + @classmethod + def build_embedding(cls, args, dictionary, embed_dim, path=None): + if not has_megatron_submodule: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + dictionary.pad_to_multiple_(args.model_parallel_size * 8) + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + + def _vocab_init(tensor, **kwargs): + nn.init.normal_(tensor, mean=0, std=num_embeddings**-0.5) + nn.init.constant_(tensor[1], 0) + + emb = VocabParallelEmbedding( + num_embeddings, embed_dim, padding_idx, init_method=_vocab_init + ) + # if provided, load from preloaded dictionaries + if path: + raise NotImplementedError( + "Loading of embedding from path is not supported for model parallel" + ) + return emb + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return ModelParallelTransformerEncoder(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return ModelParallelTransformerDecoder( + args, + tgt_dict, + embed_tokens, + no_encoder_attn=getattr(args, "no_cross_attention", False), + ) + + +class ModelParallelTransformerEncoder(TransformerEncoder): + """ + Model parallel Transformer encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`ModelParallelTransformerEncoderLayer`. + """ + + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens) + + if args.no_final_layer_norm: + self.layer_norm = None + + def build_encoder_layer(self, args): + return ModelParallelTransformerEncoderLayer(args) + + +class ModelParallelTransformerDecoder(TransformerDecoder): + """ + Model Parallel Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`ModelParallelTransformerDecoderLayer`. + """ + + def build_decoder_layer(self, args, no_encoder_attn=False): + return ModelParallelTransformerDecoderLayer(args, no_encoder_attn) + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + if not self.share_input_output_embed: + raise NotImplementedError( + "Model parallel training currently requires --share-decoder-input-output-embed" + ) + + features = copy_to_model_parallel_region(features) + + # project back to size of vocabulary + x = self.output_projection(features) + + if getattr(self.args, "criterion") != "vocab_parallel_cross_entropy": + x = gather_from_model_parallel_region(x).contiguous() + return x diff --git a/fairseq/fairseq/model_parallel/models/transformer_lm.py b/fairseq/fairseq/model_parallel/models/transformer_lm.py new file mode 100644 index 0000000..03e4dbe --- /dev/null +++ b/fairseq/fairseq/model_parallel/models/transformer_lm.py @@ -0,0 +1,169 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + +from fairseq.model_parallel.models.transformer import ModelParallelTransformerDecoder +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer_lm import TransformerLanguageModel + +try: + from fairseq.model_parallel.megatron.mpu import VocabParallelEmbedding + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +DEFAULT_MAX_TARGET_POSITIONS = 1024 + + +@register_model("model_parallel_transformer_lm") +class ModelParallelTransformerLanguageModel(TransformerLanguageModel): + @staticmethod + def add_args(parser): + TransformerLanguageModel.add_args(parser) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + if not has_megatron_submodule: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + + # make sure all arguments are present in older models + base_lm_architecture(args) + + task.source_dictionary.pad_to_multiple_(args.model_parallel_size * 8) + task.target_dictionary.pad_to_multiple_(args.model_parallel_size * 8) + + if args.decoder_layers_to_keep: + args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) + + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + + if args.character_embeddings: + raise NotImplementedError( + "Character embeddings is not supported for model parallel" + ) + elif args.adaptive_input: + raise NotImplementedError( + "Adaptive input is not supported for model parallel" + ) + else: + embed_tokens = cls.build_embedding( + args, task.source_dictionary, args.decoder_input_dim + ) + + decoder = ModelParallelTransformerDecoder( + args, + task.target_dictionary, + embed_tokens, + no_encoder_attn=True, + ) + return cls(decoder) + + @classmethod + def build_embedding(cls, args, dictionary, embed_dim, path=None): + def _vocab_init(tensor, **kwargs): + nn.init.normal_(tensor, mean=0, std=embed_dim**-0.5) + nn.init.constant_(tensor[1], 0) + + embed_tokens = VocabParallelEmbedding( + len(dictionary), embed_dim, dictionary.pad(), init_method=_vocab_init + ) + return embed_tokens + + +def base_lm_architecture(args): + # backward compatibility for older model checkpoints + if hasattr(args, "no_tie_adaptive_proj"): + # previous models defined --no-tie-adaptive-proj, so use the existence of + # that option to determine if this is an "old" model checkpoint + args.no_decoder_final_norm = True # old models always set this to True + if args.no_tie_adaptive_proj is False: + args.tie_adaptive_proj = True + if hasattr(args, "decoder_final_norm"): + args.no_decoder_final_norm = not args.decoder_final_norm + + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.relu_dropout = getattr(args, "relu_dropout", 0.0) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + # Model training is not stable without this + args.decoder_normalize_before = True + args.no_decoder_final_norm = getattr(args, "no_decoder_final_norm", False) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.adaptive_softmax_factor = getattr(args, "adaptive_softmax_factor", 4) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.character_embeddings = getattr(args, "character_embeddings", False) + args.character_filters = getattr( + args, + "character_filters", + "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]", + ) + args.character_embedding_dim = getattr(args, "character_embedding_dim", 4) + args.char_embedder_highway_layers = getattr(args, "char_embedder_highway_layers", 2) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.adaptive_input_factor = getattr(args, "adaptive_input_factor", 4) + args.adaptive_input_cutoff = getattr(args, "adaptive_input_cutoff", None) + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.tie_adaptive_proj = getattr(args, "tie_adaptive_proj", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0.0) + args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8) + args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0.0) + args.add_bos_token = getattr(args, "add_bos_token", False) + + +@register_model_architecture("model_parallel_transformer_lm", "transformer_lm_megatron") +def transformer_lm_megatron(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 3072) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 3072 * 4) + args.decoder_layers = getattr(args, "decoder_layers", 72) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture( + "model_parallel_transformer_lm", "transformer_lm_megatron_11b" +) +def transformer_lm_megatron_11b(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 3072) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 3072 * 6) + args.decoder_layers = getattr(args, "decoder_layers", 72) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) diff --git a/fairseq/fairseq/model_parallel/modules/__init__.py b/fairseq/fairseq/model_parallel/modules/__init__.py new file mode 100644 index 0000000..1160321 --- /dev/null +++ b/fairseq/fairseq/model_parallel/modules/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +from .multihead_attention import ModelParallelMultiheadAttention +from .transformer_layer import ( + ModelParallelTransformerEncoderLayer, + ModelParallelTransformerDecoderLayer, +) + +__all__ = [ + "ModelParallelMultiheadAttention", + "ModelParallelTransformerEncoderLayer", + "ModelParallelTransformerDecoderLayer", +] diff --git a/fairseq/fairseq/model_parallel/modules/multihead_attention.py b/fairseq/fairseq/model_parallel/modules/multihead_attention.py new file mode 100644 index 0000000..bbea450 --- /dev/null +++ b/fairseq/fairseq/model_parallel/modules/multihead_attention.py @@ -0,0 +1,349 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from torch import Tensor, nn + +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.fairseq_dropout import FairseqDropout + +try: + from fairseq.model_parallel.megatron.mpu import ( + ColumnParallelLinear, + RowParallelLinear, + get_cuda_rng_tracker, + get_model_parallel_world_size, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +@with_incremental_state +class ModelParallelMultiheadAttention(nn.Module): + """Model parallel Multi-headed attention. + This performs the Multi-headed attention over multiple gpus. + + See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + self_attention=False, + encoder_decoder_attention=False, + ): + super().__init__() + if not has_megatron_submodule: + raise ImportError( + "\n\nPlease install the megatron submodule:" + "\n\n git submodule update --init " + "fairseq/model_parallel/megatron" + ) + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.model_parallel_size = get_model_parallel_world_size() + + self.num_heads_partition = num_heads // self.model_parallel_size + assert ( + self.num_heads_partition * self.model_parallel_size == num_heads + ), "Number of heads must be divisible by model parallel size" + + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim**-0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert ( + not self.self_attention or self.qkv_same_dim + ), "Self-attention requires query, key and value to be of the same size" + + self.k_proj = ColumnParallelLinear( + self.kdim, embed_dim, bias=bias, gather_output=False + ) + self.v_proj = ColumnParallelLinear( + self.vdim, embed_dim, bias=bias, gather_output=False + ) + self.q_proj = ColumnParallelLinear( + embed_dim, embed_dim, bias=bias, gather_output=False + ) + self.out_proj = RowParallelLinear( + embed_dim, embed_dim, bias=bias, input_is_parallel=True + ) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + **unused_kwargs, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + """ + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + + is_tpu = query.device.type == "xla" + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads_partition, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads_partition, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads_partition, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads_partition, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view( + bsz * self.num_heads_partition, -1, self.head_dim + ) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view( + bsz * self.num_heads_partition, -1, self.head_dim + ) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = ( + ModelParallelMultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + ) + + saved_state["prev_key"] = k.view( + bsz, self.num_heads_partition, -1, self.head_dim + ) + saved_state["prev_value"] = v.view( + bsz, self.num_heads_partition, -1, self.head_dim + ) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + src_len = k.size(1) + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + + assert list(attn_weights.size()) == [ + bsz * self.num_heads_partition, + tgt_len, + src_len, + ] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view( + bsz, self.num_heads_partition, tgt_len, src_len + ) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view( + bsz * self.num_heads_partition, tgt_len, src_len + ) + + attn_weights_float = utils.softmax(attn_weights, dim=-1) + attn_weights = attn_weights_float.type_as(attn_weights) + + with get_cuda_rng_tracker().fork(): + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [ + bsz * self.num_heads_partition, + tgt_len, + self.head_dim, + ] + embed_dim_partition = embed_dim // self.model_parallel_size + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim_partition) + attn = self.out_proj(attn) + # return attn_weights None to keep the return type same as single gpu multihead attention + # This will be deprecated. + attn_weights: Optional[Tensor] = None + + return attn, attn_weights + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1 + ) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + + filler = torch.zeros(batch_size, src_len - prev_key_padding_mask.size(1)) + if prev_key_padding_mask.is_cuda: + filler = filler.cuda() + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), filler.float()], dim=1 + ) + elif key_padding_mask is not None: + filler = torch.zeros(batch_size, src_len - key_padding_mask.size(1)) + if key_padding_mask.is_cuda: + filler = filler.cuda() + new_key_padding_mask = torch.cat( + [filler.float(), key_padding_mask.float()], dim=1 + ) + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + def reorder_incremental_state( + self, incremental_state: Dict[str, Dict[str, Optional[Tensor]]], new_order + ): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + if input_buffer[k] is not None: + input_buffer[k] = input_buffer[k].index_select(0, new_order) + incremental_state = self._set_input_buffer(incremental_state, input_buffer) + return incremental_state + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) diff --git a/fairseq/fairseq/model_parallel/modules/transformer_layer.py b/fairseq/fairseq/model_parallel/modules/transformer_layer.py new file mode 100644 index 0000000..7ab53c6 --- /dev/null +++ b/fairseq/fairseq/model_parallel/modules/transformer_layer.py @@ -0,0 +1,78 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.model_parallel.modules import ModelParallelMultiheadAttention +from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer + + +try: + from fairseq.model_parallel.megatron.mpu import ( + ColumnParallelLinear, + RowParallelLinear, + ) + + has_megatron_submodule = True +except (ImportError, ModuleNotFoundError): + has_megatron_submodule = False + + +class ModelParallelTransformerEncoderLayer(TransformerEncoderLayer): + """Encoder layer block over multiple gpus. + + See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details. + """ + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + if q_noise > 0: + raise NotImplementedError + return ColumnParallelLinear(input_dim, output_dim, gather_output=False) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + if q_noise > 0: + raise NotImplementedError + return RowParallelLinear(input_dim, output_dim, input_is_parallel=True) + + def build_self_attention(self, embed_dim, args, **unused_kwargs): + return ModelParallelMultiheadAttention( + embed_dim, + args.encoder_attention_heads, + dropout=args.attention_dropout, + self_attention=True, + ) + + +class ModelParallelTransformerDecoderLayer(TransformerDecoderLayer): + """Decoder layer block. + + See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details. + """ + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + if q_noise > 0: + raise NotImplementedError + return ColumnParallelLinear(input_dim, output_dim, gather_output=False) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + if q_noise > 0: + raise NotImplementedError + return RowParallelLinear(input_dim, output_dim, input_is_parallel=True) + + def build_self_attention(self, embed_dim, args, **unused_kwargs): + return ModelParallelMultiheadAttention( + embed_dim=embed_dim, + num_heads=args.decoder_attention_heads, + dropout=args.attention_dropout, + self_attention=not getattr(args, "cross_self_attention", False), + ) + + def build_encoder_attention(self, embed_dim, args, **unused_kwargs): + return ModelParallelMultiheadAttention( + embed_dim=embed_dim, + num_heads=args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + ) diff --git a/fairseq/fairseq/models/__init__.py b/fairseq/fairseq/models/__init__.py new file mode 100644 index 0000000..11cf6ee --- /dev/null +++ b/fairseq/fairseq/models/__init__.py @@ -0,0 +1,236 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import argparse +import importlib +import os + +from contextlib import ExitStack + +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import merge_with_parent +from hydra.core.config_store import ConfigStore +from omegaconf import open_dict, OmegaConf + +from .composite_encoder import CompositeEncoder +from .distributed_fairseq_model import DistributedFairseqModel +from .fairseq_decoder import FairseqDecoder +from .fairseq_encoder import FairseqEncoder +from .fairseq_incremental_decoder import FairseqIncrementalDecoder +from .fairseq_model import ( + BaseFairseqModel, + FairseqEncoderDecoderModel, + FairseqEncoderModel, + FairseqLanguageModel, + FairseqModel, + FairseqMultiModel, +) + + +MODEL_REGISTRY = {} +MODEL_DATACLASS_REGISTRY = {} +ARCH_MODEL_REGISTRY = {} +ARCH_MODEL_NAME_REGISTRY = {} +ARCH_MODEL_INV_REGISTRY = {} +ARCH_CONFIG_REGISTRY = {} + + +__all__ = [ + "BaseFairseqModel", + "CompositeEncoder", + "DistributedFairseqModel", + "FairseqDecoder", + "FairseqEncoder", + "FairseqEncoderDecoderModel", + "FairseqEncoderModel", + "FairseqIncrementalDecoder", + "FairseqLanguageModel", + "FairseqModel", + "FairseqMultiModel", +] + + +def build_model(cfg: FairseqDataclass, task, from_checkpoint=False): + + model = None + model_type = getattr(cfg, "_name", None) or getattr(cfg, "arch", None) + + if not model_type and len(cfg) == 1: + # this is hit if config object is nested in directory that is named after model type + + model_type = next(iter(cfg)) + if model_type in MODEL_DATACLASS_REGISTRY: + cfg = cfg[model_type] + else: + raise Exception( + "Could not infer model type from directory. Please add _name field to indicate model type. " + "Available models: " + + str(MODEL_DATACLASS_REGISTRY.keys()) + + " Requested model type: " + + model_type + ) + + if model_type in ARCH_MODEL_REGISTRY: + # case 1: legacy models + model = ARCH_MODEL_REGISTRY[model_type] + elif model_type in MODEL_DATACLASS_REGISTRY: + # case 2: config-driven models + model = MODEL_REGISTRY[model_type] + + if model_type in MODEL_DATACLASS_REGISTRY: + # set defaults from dataclass. note that arch name and model name can be the same + dc = MODEL_DATACLASS_REGISTRY[model_type] + + if isinstance(cfg, argparse.Namespace): + cfg = dc.from_namespace(cfg) + else: + cfg = merge_with_parent(dc(), cfg, from_checkpoint) + else: + if model_type in ARCH_CONFIG_REGISTRY: + with open_dict(cfg) if OmegaConf.is_config(cfg) else ExitStack(): + # this calls the different "arch" functions (like base_architecture()) that you indicate + # if you specify --arch on the command line. this is only applicable to the old argparse based models + # hydra models should expose different architectures via different config files + # it will modify the cfg object and default parameters according to the arch + ARCH_CONFIG_REGISTRY[model_type](cfg) + + assert model is not None, ( + f"Could not infer model type from {cfg}. " + "Available models: {}".format(MODEL_DATACLASS_REGISTRY.keys()) + + f" Requested model type: {model_type}" + ) + + return model.build_model(cfg, task) + + +def register_model(name, dataclass=None): + """ + New model types can be added to fairseq with the :func:`register_model` + function decorator. + + For example:: + + @register_model('lstm') + class LSTM(FairseqEncoderDecoderModel): + (...) + + .. note:: All models must implement the :class:`BaseFairseqModel` interface. + Typically you will extend :class:`FairseqEncoderDecoderModel` for + sequence-to-sequence tasks or :class:`FairseqLanguageModel` for + language modeling tasks. + + Args: + name (str): the name of the model + """ + + def register_model_cls(cls): + if name in MODEL_REGISTRY: + return MODEL_REGISTRY[name] + + if not issubclass(cls, BaseFairseqModel): + raise ValueError( + "Model ({}: {}) must extend BaseFairseqModel".format(name, cls.__name__) + ) + MODEL_REGISTRY[name] = cls + if dataclass is not None and not issubclass(dataclass, FairseqDataclass): + raise ValueError( + "Dataclass {} must extend FairseqDataclass".format(dataclass) + ) + + cls.__dataclass = dataclass + if dataclass is not None: + MODEL_DATACLASS_REGISTRY[name] = dataclass + + cs = ConfigStore.instance() + node = dataclass() + node._name = name + cs.store(name=name, group="model", node=node, provider="fairseq") + + @register_model_architecture(name, name) + def noop(_): + pass + + return cls + + return register_model_cls + + +def register_model_architecture(model_name, arch_name): + """ + New model architectures can be added to fairseq with the + :func:`register_model_architecture` function decorator. After registration, + model architectures can be selected with the ``--arch`` command-line + argument. + + For example:: + + @register_model_architecture('lstm', 'lstm_luong_wmt_en_de') + def lstm_luong_wmt_en_de(cfg): + args.encoder_embed_dim = getattr(cfg.model, 'encoder_embed_dim', 1000) + (...) + + The decorated function should take a single argument *cfg*, which is a + :class:`omegaconf.DictConfig`. The decorated function should modify these + arguments in-place to match the desired architecture. + + Args: + model_name (str): the name of the Model (Model must already be + registered) + arch_name (str): the name of the model architecture (``--arch``) + """ + + def register_model_arch_fn(fn): + if model_name not in MODEL_REGISTRY: + raise ValueError( + "Cannot register model architecture for unknown model type ({})".format( + model_name + ) + ) + if arch_name in ARCH_MODEL_REGISTRY: + raise ValueError( + "Cannot register duplicate model architecture ({})".format(arch_name) + ) + if not callable(fn): + raise ValueError( + "Model architecture must be callable ({})".format(arch_name) + ) + ARCH_MODEL_REGISTRY[arch_name] = MODEL_REGISTRY[model_name] + ARCH_MODEL_NAME_REGISTRY[arch_name] = model_name + ARCH_MODEL_INV_REGISTRY.setdefault(model_name, []).append(arch_name) + ARCH_CONFIG_REGISTRY[arch_name] = fn + return fn + + return register_model_arch_fn + + +def import_models(models_dir, namespace): + for file in os.listdir(models_dir): + path = os.path.join(models_dir, file) + if ( + not file.startswith("_") + and not file.startswith(".") + and (file.endswith(".py") or os.path.isdir(path)) + ): + model_name = file[: file.find(".py")] if file.endswith(".py") else file + importlib.import_module(namespace + "." + model_name) + + # extra `model_parser` for sphinx + if model_name in MODEL_REGISTRY: + parser = argparse.ArgumentParser(add_help=False) + group_archs = parser.add_argument_group("Named architectures") + group_archs.add_argument( + "--arch", choices=ARCH_MODEL_INV_REGISTRY[model_name] + ) + group_args = parser.add_argument_group( + "Additional command-line arguments" + ) + MODEL_REGISTRY[model_name].add_args(group_args) + globals()[model_name + "_parser"] = parser + + +# automatically import any Python files in the models/ directory +models_dir = os.path.dirname(__file__) +import_models(models_dir, "fairseq.models") diff --git a/fairseq/fairseq/models/bart/__init__.py b/fairseq/fairseq/models/bart/__init__.py new file mode 100644 index 0000000..a701923 --- /dev/null +++ b/fairseq/fairseq/models/bart/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .hub_interface import * # noqa +from .model import * # noqa diff --git a/fairseq/fairseq/models/bart/hub_interface.py b/fairseq/fairseq/models/bart/hub_interface.py new file mode 100644 index 0000000..6b647c9 --- /dev/null +++ b/fairseq/fairseq/models/bart/hub_interface.py @@ -0,0 +1,211 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging +from typing import Dict, List + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.data import encoders +from fairseq.hub_utils import GeneratorHubInterface +from omegaconf import open_dict + + +logger = logging.getLogger(__name__) + + +class BARTHubInterface(GeneratorHubInterface): + """A simple PyTorch Hub interface to BART. + + Usage: https://github.com/pytorch/fairseq/tree/main/examples/bart + """ + + def __init__(self, cfg, task, model): + super().__init__(cfg, task, [model]) + self.model = self.models[0] + + def encode( + self, sentence: str, *addl_sentences, no_separator=True + ) -> torch.LongTensor: + """ + BPE-encode a sentence (or multiple sentences). + + Every sequence begins with a beginning-of-sentence (`<s>`) symbol. + Every sentence ends with an end-of-sentence (`</s>`). + + Example (single sentence): `<s> a b c </s>` + Example (sentence pair): `<s> d e f </s> 1 2 3 </s>` + + The BPE encoding follows GPT-2. One subtle detail is that the GPT-2 BPE + requires leading spaces. For example:: + + >>> bart.encode('Hello world').tolist() + [0, 31414, 232, 2] + >>> bart.encode(' world').tolist() + [0, 232, 2] + >>> bart.encode('world').tolist() + [0, 8331, 2] + """ + tokens = self.bpe.encode(sentence) + if len(tokens.split(" ")) > min(self.max_positions) - 2: + tokens = " ".join(tokens.split(" ")[: min(self.max_positions) - 2]) + bpe_sentence = "<s> " + tokens + " </s>" + for s in addl_sentences: + bpe_sentence += " </s>" if not no_separator else "" + bpe_sentence += " " + self.bpe.encode(s) + " </s>" + tokens = self.task.source_dictionary.encode_line(bpe_sentence, append_eos=False) + return tokens.long() + + def decode(self, tokens: torch.LongTensor): + assert tokens.dim() == 1 + tokens = tokens.cpu().numpy() + if tokens[0] == self.task.source_dictionary.bos(): + tokens = tokens[1:] # remove <s> + eos_mask = tokens == self.task.source_dictionary.eos() + doc_mask = eos_mask[1:] & eos_mask[:-1] + sentences = np.split(tokens, doc_mask.nonzero()[0] + 1) + sentences = [ + self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences + ] + if len(sentences) == 1: + return sentences[0] + return sentences + + def _build_sample(self, src_tokens: List[torch.LongTensor]): + # assert torch.is_tensor(src_tokens) + dataset = self.task.build_dataset_for_inference( + src_tokens, + [x.numel() for x in src_tokens], + ) + sample = dataset.collater(dataset) + sample = utils.apply_to_sample(lambda tensor: tensor.to(self.device), sample) + return sample + + def generate( + self, + tokenized_sentences: List[torch.LongTensor], + *args, + inference_step_args=None, + skip_invalid_size_inputs=False, + **kwargs + ) -> List[List[Dict[str, torch.Tensor]]]: + inference_step_args = inference_step_args or {} + if "prefix_tokens" in inference_step_args: + raise NotImplementedError("prefix generation not implemented for BART") + res = [] + for batch in self._build_batches(tokenized_sentences, skip_invalid_size_inputs): + src_tokens = batch["net_input"]["src_tokens"] + inference_step_args["prefix_tokens"] = src_tokens.new_full( + (src_tokens.size(0), 1), fill_value=self.task.source_dictionary.bos() + ).to(device=self.device) + results = super().generate( + src_tokens, + *args, + inference_step_args=inference_step_args, + skip_invalid_size_inputs=skip_invalid_size_inputs, + **kwargs + ) + for id, hypos in zip(batch["id"].tolist(), results): + res.append((id, hypos)) + res = [hypos for _, hypos in sorted(res, key=lambda x: x[0])] + return res + + def extract_features( + self, tokens: torch.LongTensor, return_all_hiddens: bool = False + ) -> torch.Tensor: + if tokens.dim() == 1: + tokens = tokens.unsqueeze(0) + if tokens.size(-1) > min(self.model.max_positions()): + raise ValueError( + "tokens exceeds maximum length: {} > {}".format( + tokens.size(-1), self.model.max_positions() + ) + ) + tokens.to(device=self.device), + prev_output_tokens = tokens.clone() + + prev_output_tokens[:, 0] = tokens.gather( + 1, + (tokens.ne(self.task.source_dictionary.pad()).sum(dim=1) - 1).unsqueeze(-1), + ).squeeze() + + prev_output_tokens[:, 1:] = tokens[:, :-1] + features, extra = self.model( + src_tokens=tokens, + src_lengths=None, + prev_output_tokens=prev_output_tokens, + features_only=True, + return_all_hiddens=return_all_hiddens, + ) + if return_all_hiddens: + # convert from T x B x C -> B x T x C + inner_states = extra["inner_states"] + return [inner_state.transpose(0, 1) for inner_state in inner_states] + else: + return features # just the last layer's features + + def register_classification_head( + self, name: str, num_classes: int = None, embedding_size: int = None, **kwargs + ): + self.model.register_classification_head( + name, num_classes=num_classes, embedding_size=embedding_size, **kwargs + ) + + def predict(self, head: str, tokens: torch.LongTensor, return_logits: bool = False): + if tokens.dim() == 1: + tokens = tokens.unsqueeze(0) + features = self.extract_features(tokens.to(device=self.device)) + sentence_representation = features[ + tokens.eq(self.task.source_dictionary.eos()), : + ].view(features.size(0), -1, features.size(-1))[:, -1, :] + + logits = self.model.classification_heads[head](sentence_representation) + if return_logits: + return logits + return F.log_softmax(logits, dim=-1) + + def fill_mask( + self, + masked_inputs: List[str], + topk: int = 5, + match_source_len: bool = True, + **generate_kwargs + ): + masked_token = "<mask>" + batch_tokens = [] + for masked_input in masked_inputs: + assert ( + masked_token in masked_input + ), "please add one {} token for the input".format(masked_token) + + text_spans = masked_input.split(masked_token) + text_spans_bpe = ( + (" {0} ".format(masked_token)) + .join([self.bpe.encode(text_span.rstrip()) for text_span in text_spans]) + .strip() + ) + tokens = self.task.source_dictionary.encode_line( + "<s> " + text_spans_bpe + " </s>", + append_eos=False, + add_if_not_exist=False, + ).long() + batch_tokens.append(tokens) + + # ensure beam size is at least as big as topk + generate_kwargs["beam"] = max( + topk, + generate_kwargs.get("beam", -1), + ) + generate_kwargs["match_source_len"] = match_source_len + batch_hypos = self.generate(batch_tokens, **generate_kwargs) + + return [ + [(self.decode(hypo["tokens"]), hypo["score"]) for hypo in hypos[:topk]] + for hypos in batch_hypos + ] diff --git a/fairseq/fairseq/models/bart/model.py b/fairseq/fairseq/models/bart/model.py new file mode 100644 index 0000000..e3670c0 --- /dev/null +++ b/fairseq/fairseq/models/bart/model.py @@ -0,0 +1,394 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +BART: Denoising Sequence-to-Sequence Pre-training for +Natural Language Generation, Translation, and Comprehension +""" +import logging +from typing import Optional + +import torch +import torch.nn as nn + +from fairseq import utils +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer import TransformerModel +from fairseq.modules.transformer_sentence_encoder import init_bert_params + +from .hub_interface import BARTHubInterface + +logger = logging.getLogger(__name__) + + +@register_model("bart") +class BARTModel(TransformerModel): + __jit_unused_properties__ = ["supported_targets"] + + @classmethod + def hub_models(cls): + return { + "bart.base": "http://dl.fbaipublicfiles.com/fairseq/models/bart.base.tar.gz", + "bart.large": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz", + "bart.large.mnli": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.mnli.tar.gz", + "bart.large.cnn": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.cnn.tar.gz", + "bart.large.xsum": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.xsum.tar.gz", + } + + def __init__(self, args, encoder, decoder): + super().__init__(args, encoder, decoder) + + # We follow BERT's random weight initialization + self.apply(init_bert_params) + + self.classification_heads = nn.ModuleDict() + if hasattr(self.encoder, "dictionary"): + self.eos: int = self.encoder.dictionary.eos() + + @staticmethod + def add_args(parser): + super(BARTModel, BARTModel).add_args(parser) + parser.add_argument( + "--pooler-dropout", + type=float, + metavar="D", + help="dropout probability in the masked_lm pooler layers", + ) + parser.add_argument( + "--pooler-activation-fn", + choices=utils.get_available_activation_fns(), + help="activation function to use for pooler layer", + ) + parser.add_argument( + "--spectral-norm-classification-head", + action="store_true", + help="Apply spectral normalization on the classification head", + ) + + @property + def supported_targets(self): + return {"self"} + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + features_only: bool = False, + classification_head_name: Optional[str] = None, + token_embeddings: Optional[torch.Tensor] = None, + return_all_hiddens: bool = True, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + if classification_head_name is not None: + features_only = True + + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + token_embeddings=token_embeddings, + return_all_hiddens=return_all_hiddens, + ) + x, extra = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + features_only=features_only, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + src_lengths=src_lengths, + return_all_hiddens=return_all_hiddens, + ) + eos: int = self.eos + if classification_head_name is not None: + sentence_representation = x[src_tokens.eq(eos), :].view( + x.size(0), -1, x.size(-1) + )[:, -1, :] + for k, head in self.classification_heads.items(): + # for torch script only supports iteration + if k == classification_head_name: + x = head(sentence_representation) + break + return x, extra + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="gpt2", + sample_break_mode="eos", + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + sample_break_mode=sample_break_mode, + **kwargs, + ) + return BARTHubInterface(x["args"], x["task"], x["models"][0]) + + def register_classification_head( + self, name, num_classes=None, inner_dim=None, **kwargs + ): + """Register a classification head.""" + logger.info("Registering classification head: {0}".format(name)) + if name in self.classification_heads: + prev_num_classes = self.classification_heads[name].out_proj.out_features + prev_inner_dim = self.classification_heads[name].dense.out_features + if num_classes != prev_num_classes or inner_dim != prev_inner_dim: + logger.warning( + 're-registering head "{}" with num_classes {} (prev: {}) ' + "and inner_dim {} (prev: {})".format( + name, num_classes, prev_num_classes, inner_dim, prev_inner_dim + ) + ) + self.classification_heads[name] = BARTClassificationHead( + input_dim=self.args.encoder_embed_dim, + inner_dim=inner_dim or self.args.encoder_embed_dim, + num_classes=num_classes, + activation_fn=self.args.pooler_activation_fn, + pooler_dropout=self.args.pooler_dropout, + do_spectral_norm=getattr( + self.args, "spectral_norm_classification_head", False + ), + ) + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + + prefix = name + "." if name != "" else "" + current_head_names = ( + [] + if not hasattr(self, "classification_heads") + else self.classification_heads.keys() + ) + + # Handle new classification heads present in the state dict. + keys_to_delete = [] + for k in state_dict.keys(): + if not k.startswith(prefix + "classification_heads."): + continue + + head_name = k[len(prefix + "classification_heads.") :].split(".")[0] + num_classes = state_dict[ + prefix + "classification_heads." + head_name + ".out_proj.weight" + ].size(0) + inner_dim = state_dict[ + prefix + "classification_heads." + head_name + ".dense.weight" + ].size(0) + + if getattr(self.args, "load_checkpoint_heads", False): + if head_name not in current_head_names: + self.register_classification_head(head_name, num_classes, inner_dim) + else: + if head_name not in current_head_names: + logger.warning( + "deleting classification head ({}) from checkpoint " + "not present in current model: {}".format(head_name, k) + ) + keys_to_delete.append(k) + elif ( + num_classes + != self.classification_heads[head_name].out_proj.out_features + or inner_dim + != self.classification_heads[head_name].dense.out_features + ): + logger.warning( + "deleting classification head ({}) from checkpoint " + "with different dimensions than current model: {}".format( + head_name, k + ) + ) + keys_to_delete.append(k) + for k in keys_to_delete: + del state_dict[k] + + def truncate_emb(key): + if key in state_dict: + state_dict[key] = state_dict[key][:-1, :] + + # When finetuning on translation task, remove last row of + # embedding matrix that corresponds to mask_idx token. + loaded_dict_size = state_dict["encoder.embed_tokens.weight"].size(0) + if ( + loaded_dict_size == len(self.encoder.dictionary) + 1 + and "<mask>" not in self.encoder.dictionary + ): + truncate_emb("encoder.embed_tokens.weight") + truncate_emb("decoder.embed_tokens.weight") + truncate_emb("encoder.output_projection.weight") + truncate_emb("decoder.output_projection.weight") + + # When continued pretraining on new set of languages for mbart, + # add extra lang embeddings at the end of embed_tokens. + # Note: newly added languages are assumed to have been added at the end. + if self.args.task == "multilingual_denoising" and loaded_dict_size < len( + self.encoder.dictionary + ): + logger.info( + "Adding extra language embeddings not found in pretrained model for " + "continued pretraining of MBART on new set of languages." + ) + loaded_mask_token_embedding = state_dict["encoder.embed_tokens.weight"][ + -1, : + ] + + num_langids_to_add = len(self.encoder.dictionary) - loaded_dict_size + embed_dim = state_dict["encoder.embed_tokens.weight"].size(1) + + new_lang_embed_to_add = torch.zeros(num_langids_to_add, embed_dim) + nn.init.normal_(new_lang_embed_to_add, mean=0, std=embed_dim**-0.5) + new_lang_embed_to_add = new_lang_embed_to_add.to( + dtype=state_dict["encoder.embed_tokens.weight"].dtype, + ) + + state_dict["encoder.embed_tokens.weight"] = torch.cat( + [ + state_dict["encoder.embed_tokens.weight"][ + : loaded_dict_size - 1, : + ], + new_lang_embed_to_add, + loaded_mask_token_embedding.unsqueeze(0), + ] + ) + state_dict["decoder.embed_tokens.weight"] = torch.cat( + [ + state_dict["decoder.embed_tokens.weight"][ + : loaded_dict_size - 1, : + ], + new_lang_embed_to_add, + loaded_mask_token_embedding.unsqueeze(0), + ] + ) + + # Copy any newly-added classification heads into the state dict + # with their current weights. + if hasattr(self, "classification_heads"): + cur_state = self.classification_heads.state_dict() + for k, v in cur_state.items(): + if prefix + "classification_heads." + k not in state_dict: + logger.info("Overwriting " + prefix + "classification_heads." + k) + state_dict[prefix + "classification_heads." + k] = v + + def set_beam_size(self, beam): + """Set beam size for efficient beamable enc-dec attention.""" + beamable = False + for layer in self.decoder.layers: + if layer.encoder_attn is not None: + if hasattr(layer.encoder_attn, "set_beam_size"): + layer.encoder_attn.set_beam_size(beam) + beamable = True + if beamable: + self.encoder.reorder_encoder_out = self.encoder._reorder_encoder_out + + +class BARTClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__( + self, + input_dim, + inner_dim, + num_classes, + activation_fn, + pooler_dropout, + do_spectral_norm=False, + ): + super().__init__() + self.dense = nn.Linear(input_dim, inner_dim) + self.activation_fn = utils.get_activation_fn(activation_fn) + self.dropout = nn.Dropout(p=pooler_dropout) + self.out_proj = nn.Linear(inner_dim, num_classes) + + if do_spectral_norm: + self.out_proj = torch.nn.utils.spectral_norm(self.out_proj) + + def forward(self, features, **kwargs): + x = features + x = self.dropout(x) + x = self.dense(x) + x = self.activation_fn(x) + x = self.dropout(x) + x = self.out_proj(x) + return x + + +@register_model_architecture("bart", "bart_large") +def bart_large_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 1024) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", True) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.relu_dropout = getattr(args, "relu_dropout", 0.0) + args.dropout = getattr(args, "dropout", 0.1) + args.max_target_positions = getattr(args, "max_target_positions", 1024) + args.max_source_positions = getattr(args, "max_source_positions", 1024) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", True + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", True) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.no_scale_embedding = getattr(args, "no_scale_embedding", True) + args.layernorm_embedding = getattr(args, "layernorm_embedding", True) + + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.pooler_dropout = getattr(args, "pooler_dropout", 0.0) + + +@register_model_architecture("bart", "bart_base") +def bart_base_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 768) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12) + bart_large_architecture(args) + + +@register_model_architecture("bart", "mbart_large") +def mbart_large_architecture(args): + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + bart_large_architecture(args) + + +@register_model_architecture("bart", "mbart_base") +def mbart_base_architecture(args): + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + bart_base_architecture(args) + + +@register_model_architecture("bart", "mbart_base_wmt20") +def mbart_base_wmt20_architecture(args): + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + mbart_base_architecture(args) diff --git a/fairseq/fairseq/models/composite_encoder.py b/fairseq/fairseq/models/composite_encoder.py new file mode 100644 index 0000000..4e20fe3 --- /dev/null +++ b/fairseq/fairseq/models/composite_encoder.py @@ -0,0 +1,57 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .fairseq_encoder import FairseqEncoder + + +class CompositeEncoder(FairseqEncoder): + """ + A wrapper around a dictionary of :class:`FairseqEncoder` objects. + + We run forward on each encoder and return a dictionary of outputs. The first + encoder's dictionary is used for initialization. + + Args: + encoders (dict): a dictionary of :class:`FairseqEncoder` objects. + """ + + def __init__(self, encoders): + super().__init__(next(iter(encoders.values())).dictionary) + self.encoders = encoders + for key in self.encoders: + self.add_module(key, self.encoders[key]) + + def forward(self, src_tokens, src_lengths): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): lengths of each source sentence of shape + `(batch)` + + Returns: + dict: + the outputs from each Encoder + """ + encoder_out = {} + for key in self.encoders: + encoder_out[key] = self.encoders[key](src_tokens, src_lengths) + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + """Reorder encoder output according to new_order.""" + for key in self.encoders: + encoder_out[key] = self.encoders[key].reorder_encoder_out( + encoder_out[key], new_order + ) + return encoder_out + + def max_positions(self): + return min(self.encoders[key].max_positions() for key in self.encoders) + + def upgrade_state_dict(self, state_dict): + for key in self.encoders: + self.encoders[key].upgrade_state_dict(state_dict) + return state_dict diff --git a/fairseq/fairseq/models/distributed_fairseq_model.py b/fairseq/fairseq/models/distributed_fairseq_model.py new file mode 100644 index 0000000..fd76bcd --- /dev/null +++ b/fairseq/fairseq/models/distributed_fairseq_model.py @@ -0,0 +1,147 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import signal +import threading + +import torch +import torch.nn as nn +from torch.nn.parallel import DistributedDataParallel + +from fairseq.distributed import ( + DistributedTimeoutWrapper, + LegacyDistributedDataParallel, + ModuleProxyWrapper, + TPUDistributedDataParallel, +) + +logger = logging.getLogger(__name__) + + +_SLOWMO_DDP_DISABLED = False +try: + from fairscale.experimental.nn.data_parallel import ( + SlowMoBaseAlgorithm, + SlowMoDistributedDataParallel, + ) +except ImportError: + _SLOWMO_DDP_DISABLED = True + + +def DistributedFairseqModel(args, model, process_group, device): + """ + Wrap a *model* to support distributed data parallel training. + + This is similar to the built-in DistributedDataParallel, but allows + additional configuration of the DistributedDataParallel class to + use, and also provides easier access to the wrapped model by + forwarding requests for missing attributes to the wrapped model. + + Args: + args (argparse.Namespace): fairseq args + model (BaseFairseqModel): model to wrap + process_group: the c10d process group to be used for distributed data + parallel all-reduction. + device: device to move model to + """ + assert isinstance(model, nn.Module) + if args.tpu: + wrapped_model = TPUDistributedDataParallel( + module=model.to(device), + process_group=process_group, + ) + # forward missing getattr and state_dict/load_state_dict to orig model + wrapped_model = ModuleProxyWrapper(wrapped_model) + elif args.ddp_backend in {"c10d", "pytorch_ddp"}: + wrapped_model = DistributedDataParallel( + module=model.to(device), + device_ids=[args.device_id], + output_device=args.device_id, + broadcast_buffers=args.broadcast_buffers, + bucket_cap_mb=args.bucket_cap_mb, + process_group=process_group, + find_unused_parameters=args.find_unused_parameters, + gradient_as_bucket_view=args.gradient_as_bucket_view, + ) + if args.ddp_comm_hook == "fp16": + logger.info("enable fp16 communication hook in DDP") + try: + from torch.distributed.algorithms.ddp_comm_hooks import ( + DDPCommHookType, + register_ddp_comm_hook, + ) + except: + logger.error( + "Could not import from torch.distributed.algorithms.ddp_comm_hooks; you may need to update your pytorch version" + ) + raise + + register_ddp_comm_hook(DDPCommHookType.FP16_COMPRESS, wrapped_model) + # forward missing getattr and state_dict/load_state_dict to orig model + wrapped_model = ModuleProxyWrapper(wrapped_model) + elif args.ddp_backend in {"no_c10d", "legacy_ddp"}: + wrapped_model = LegacyDistributedDataParallel( + module=model.to(device), + buffer_size=2**28, + process_group=process_group, + ) + # forward missing getattr and state_dict/load_state_dict to orig model + wrapped_model = ModuleProxyWrapper(wrapped_model) + elif args.ddp_backend == "slowmo": + if _SLOWMO_DDP_DISABLED: + raise ImportError( + "Cannot find SlowMoDistributedDataParallel. " + "Please install fairscale with: pip install fairscale" + ) + + # The values of slowmo_momentum below were obtained by tuning on the + # En-De 16 dataset by training the transformer_wmt_en_de_large model + if args.slowmo_momentum is None: + if args.distributed_world_size <= 16: + args.slowmo_momentum = 0.0 + elif args.distributed_world_size <= 32: + args.slowmo_momentum = 0.2 + elif args.distributed_world_size <= 64: + args.slowmo_momentum = 0.5 + else: + args.slowmo_momentum = 0.6 + slowmo_base_algorithm = SlowMoBaseAlgorithm[args.slowmo_base_algorithm.upper()] + + wrapped_model = SlowMoDistributedDataParallel( + module=model.to(device), + broadcast_buffers=args.broadcast_buffers, + nprocs_per_node=args.nprocs_per_node, + slowmo_momentum=args.slowmo_momentum, + slowmo_base_algorithm=slowmo_base_algorithm, + localsgd_frequency=args.localsgd_frequency, + ) + # forward missing getattr and state_dict/load_state_dict to orig model + wrapped_model = ModuleProxyWrapper(wrapped_model) + elif args.ddp_backend == "fully_sharded": + try: + from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP + except ImportError: + raise ImportError( + "Cannot find FullyShardedDataParallel. " + "Please install fairscale with: pip install fairscale" + ) + assert isinstance(model, FSDP), "expected model to already be wrapped in FSDP" + wrapped_model = model + if args.memory_efficient_fp16: + wrapped_model = wrapped_model.half() + if not args.cpu_offload: + wrapped_model = wrapped_model.to(device=device) + else: + raise ValueError("Unknown --ddp-backend: " + args.ddp_backend) + + # kill hung distributed jobs after a timeout + if getattr(args, "heartbeat_timeout", -1) > 0: + wrapped_model = DistributedTimeoutWrapper( + wrapped_model, timeout=getattr(args, "heartbeat_timeout", -1) + ) + + return wrapped_model diff --git a/fairseq/fairseq/models/ema/__init__.py b/fairseq/fairseq/models/ema/__init__.py new file mode 100644 index 0000000..503ceaa --- /dev/null +++ b/fairseq/fairseq/models/ema/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + +from .ema import EMA + + +def build_ema(model, cfg, device): + return EMA(model, cfg, device) + + +# automatically import any Python files in the models/ema/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + file_name = file[: file.find(".py")] + importlib.import_module("fairseq.models.ema." + file_name) diff --git a/fairseq/fairseq/models/ema/ema.py b/fairseq/fairseq/models/ema/ema.py new file mode 100644 index 0000000..472d5d5 --- /dev/null +++ b/fairseq/fairseq/models/ema/ema.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 + +""" +This module has the EMA class used to store a copy of the exponentially decayed +model params. + +Typical usage of EMA class involves initializing an object using an existing +model (random or from a seed model) and setting the config like ema_decay, +ema_start_update which determine how the EMA model is updated. After every +update of the model i.e. at the end of the train_step, the EMA should be updated +by passing the new model to the EMA.step function. The EMA model state dict +can be stored in the extra state under the key of "ema" and dumped +into a checkpoint and loaded. The EMA object can be passed to tasks +by setting task.uses_ema property. +EMA is a smoothed/ensemble model which might have better performance +when used for inference or further fine-tuning. EMA class has a +reverse function to load the EMA params into a model and use it +like a regular model. + +This implementation is used for trainer-level ema tracking. For EMA tracking +inside the model, please use fairseq/modules/ema_module.py instead. +""" + +import copy +import logging + +import torch + +from fairseq import checkpoint_utils + + +class EMA(object): + """Exponential Moving Average of Fairseq Models + EMA keeps a copy of the exponentially decayed model params. + The set of params should include both gradient-descent and + non-gradient descent params, such as batch mean/var and buffers. + This is a modified implementation of + the open source code in https://github.com/zhawe01/fairseq-gec.git, + and internal source code in + fbcode/mobile-vision/projects/classification_pytorch/lib/utils/model_ema.py. + + Similar to TF EMA. + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage. + EMA provides a averaged and smoothed set of model weights, and has been shown to + improve vision models. EMA class does all necessary functions to update, reload, + or init EMA methods. + + EMA object is initialized from an arbitrary model. By default, it is stored in + the same device (unless device specified at initialization) and with the + same precision as the model (unless ema_fp32 is True). ema_fp32 is recommended. + This stores the EMA parameters in fp32 only for the EMA update step, and + is used at the default precision otherwise. + EMA is usually enabled using EMAConfig with store_ema=True. Some important + parameters to configure EMA are + 1) ema_decay - The decay of EMA + 2) ema_update_freq - EMA is updated every this many model updates. + 3) ema_start_update - Start EMA update after this many model updates [default 0] + + Key methods: + 1) step - One update of EMA using new model + 2) restore - Update EMA from a state dict + 3) reverse - Load EMA into a model + 4) get_decay, _set_decay - Used to get or set the decay. Note _set_decay is + called from step. + 5) build_fp32_params - Used to initialize or update the fp32 copy of EMA params. + Note this is enabled only when ema_fp32=True + """ + + def __init__(self, model, config, device=None, skip_keys=None): + """ + @param model model to initialize the EMA with + @param config EMAConfig object with configuration like + ema_decay, ema_update_freq, ema_fp32 + @param device If provided, copy EMA to this device (e.g. gpu). + Otherwise EMA is in the same device as the model. + """ + + self.decay = config.ema_decay + self.model = copy.deepcopy(model) + self.model.requires_grad_(False) + self.config = config + self.skip_keys = skip_keys or set() + self.fp32_params = {} + + if self.config.ema_seed_model is not None: + state = checkpoint_utils.load_ema_from_checkpoint( + self.config.ema_seed_model + ) + self.model.load_state_dict(state["model"], strict=True) + + if device is not None: + logging.info(f"Copying EMA model to device {device}") + self.model = self.model.to(device=device) + + if self.config.ema_fp32: + self.build_fp32_params() + + self.update_freq_counter = 0 + + def get_model(self): + return self.model + + def build_fp32_params(self, state_dict=None): + """ + Store a copy of the EMA params in fp32. + If state dict is passed, the EMA params is copied from + the provided state dict. Otherwise, it is copied from the + current EMA model parameters. + """ + if not self.config.ema_fp32: + raise RuntimeError( + "build_fp32_params should not be called if ema_fp32=False. " + "Use ema_fp32=True if this is really intended." + ) + + if state_dict is None: + state_dict = self.model.state_dict() + + def _to_float(t): + return t.float() if torch.is_floating_point(t) else t + + for param_key in state_dict: + if param_key in self.fp32_params: + self.fp32_params[param_key].copy_(state_dict[param_key]) + else: + self.fp32_params[param_key] = _to_float(state_dict[param_key]) + + def restore(self, state_dict, build_fp32_params=False): + """Load data from a model spec into EMA model""" + self.model.load_state_dict(state_dict, strict=False) + if build_fp32_params: + self.build_fp32_params(state_dict) + + def _set_decay(self, decay): + self.decay = decay + + def get_decay(self): + return self.decay + + def _step_internal(self, new_model, updates=None): + """One update of the EMA model based on new model weights""" + decay = self.decay + + ema_state_dict = {} + ema_params = ( + self.fp32_params if self.config.ema_fp32 else self.model.state_dict() + ) + for key, param in new_model.state_dict().items(): + if isinstance(param, dict): + continue + try: + ema_param = ema_params[key] + except KeyError: + ema_param = ( + param.float().clone() if param.ndim == 1 else copy.deepcopy(param) + ) + + if param.shape != ema_param.shape: + raise ValueError( + "incompatible tensor shapes between model param and ema param" + + "{} vs. {}".format(param.shape, ema_param.shape) + ) + + if "version" in key: + # Do not decay a model.version pytorch param + continue + + if key in self.skip_keys: + ema_param = param.to(dtype=ema_param.dtype).clone() + else: + ema_param.mul_(decay) + ema_param.add_(param.to(dtype=ema_param.dtype), alpha=1 - decay) + ema_state_dict[key] = ema_param + self.restore(ema_state_dict, build_fp32_params=False) + + def step(self, new_model, updates=None): + """ + One update of EMA which is done every self.config.ema_update_freq + updates of the model. + + @param updates The current number of model updates done. + Decay is set of 0 if model updates < ema_start_update, which means + the model will be simply copied over to the EMA. + When model updates >= ema_start_updates, then EMA is updated with + a decay of self.config.ema_decay. + """ + if updates is not None: + self._set_decay( + 0 if updates < self.config.ema_start_update else self.config.ema_decay + ) + if self.config.ema_update_freq > 1: + self.update_freq_counter += 1 + if self.update_freq_counter >= self.config.ema_update_freq: + self._step_internal(new_model, updates) + self.update_freq_counter = 0 + else: + self._step_internal(new_model, updates) + + def reverse(self, model): + """ + Load the model parameters from EMA model. + Useful for inference or fine-tuning from the EMA model. + """ + d = self.model.state_dict() + if "_ema" in d: + del d["_ema"] + + model.load_state_dict(d, strict=False) + return model diff --git a/fairseq/fairseq/models/fairseq_decoder.py b/fairseq/fairseq/models/fairseq_decoder.py new file mode 100644 index 0000000..13b73d6 --- /dev/null +++ b/fairseq/fairseq/models/fairseq_decoder.py @@ -0,0 +1,104 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional, Tuple + +import torch.nn as nn +from fairseq import utils +from torch import Tensor + + +class FairseqDecoder(nn.Module): + """Base class for decoders.""" + + def __init__(self, dictionary): + super().__init__() + self.dictionary = dictionary + self.onnx_trace = False + self.adaptive_softmax = None + + def forward(self, prev_output_tokens, encoder_out=None, **kwargs): + """ + Args: + prev_output_tokens (LongTensor): shifted output tokens of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (dict, optional): output from the encoder, used for + encoder-side attention + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + x, extra = self.extract_features( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + x = self.output_layer(x) + return x, extra + + def extract_features(self, prev_output_tokens, encoder_out=None, **kwargs): + """ + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + raise NotImplementedError + + def output_layer(self, features, **kwargs): + """ + Project features to the default output size, e.g., vocabulary size. + + Args: + features (Tensor): features returned by *extract_features*. + """ + raise NotImplementedError + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + return self.get_normalized_probs_scriptable(net_output, log_probs, sample) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def get_normalized_probs_scriptable( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + + if hasattr(self, "adaptive_softmax") and self.adaptive_softmax is not None: + if sample is not None: + assert "target" in sample + target = sample["target"] + else: + target = None + out = self.adaptive_softmax.get_log_prob(net_output[0], target=target) + return out.exp_() if not log_probs else out + + logits = net_output[0] + if log_probs: + return utils.log_softmax(logits, dim=-1, onnx_trace=self.onnx_trace) + else: + return utils.softmax(logits, dim=-1, onnx_trace=self.onnx_trace) + + def max_positions(self): + """Maximum input length supported by the decoder.""" + return 1e6 # an arbitrary large number + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade old state dicts to work with newer code.""" + return state_dict + + def prepare_for_onnx_export_(self): + self.onnx_trace = True diff --git a/fairseq/fairseq/models/fairseq_encoder.py b/fairseq/fairseq/models/fairseq_encoder.py new file mode 100644 index 0000000..08cbde1 --- /dev/null +++ b/fairseq/fairseq/models/fairseq_encoder.py @@ -0,0 +1,92 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, NamedTuple, Optional + +import torch +import torch.nn as nn +from torch import Tensor + + +EncoderOut = NamedTuple( + "EncoderOut", + [ + ("encoder_out", Tensor), # T x B x C + ("encoder_padding_mask", Optional[Tensor]), # B x T + ("encoder_embedding", Optional[Tensor]), # B x T x C + ("encoder_states", Optional[List[Tensor]]), # List[T x B x C] + ("src_tokens", Optional[Tensor]), # B x T + ("src_lengths", Optional[Tensor]), # B x 1 + ], +) + + +class FairseqEncoder(nn.Module): + """Base class for encoders.""" + + def __init__(self, dictionary): + super().__init__() + self.dictionary = dictionary + + def forward(self, src_tokens, src_lengths=None, **kwargs): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): lengths of each source sentence of shape + `(batch)` + """ + raise NotImplementedError + + def forward_torchscript(self, net_input: Dict[str, Tensor]): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + if torch.jit.is_scripting(): + return self.forward( + src_tokens=net_input["src_tokens"], + src_lengths=net_input["src_lengths"], + ) + else: + return self.forward_non_torchscript(net_input) + + @torch.jit.unused + def forward_non_torchscript(self, net_input: Dict[str, Tensor]): + encoder_input = { + k: v for k, v in net_input.items() if k != "prev_output_tokens" + } + return self.forward(**encoder_input) + + def reorder_encoder_out(self, encoder_out, new_order): + """ + Reorder encoder output according to `new_order`. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + `encoder_out` rearranged according to `new_order` + """ + raise NotImplementedError + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return 1e6 # an arbitrary large number + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade old state dicts to work with newer code.""" + return state_dict + + def set_num_updates(self, num_updates): + """State from trainer to pass along to model at every update.""" + + def _apply(m): + if hasattr(m, "set_num_updates") and m != self: + m.set_num_updates(num_updates) + + self.apply(_apply) diff --git a/fairseq/fairseq/models/fairseq_incremental_decoder.py b/fairseq/fairseq/models/fairseq_incremental_decoder.py new file mode 100644 index 0000000..cc72a0f --- /dev/null +++ b/fairseq/fairseq/models/fairseq_incremental_decoder.py @@ -0,0 +1,118 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Dict, Optional + +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.models import FairseqDecoder +from torch import Tensor + + +logger = logging.getLogger(__name__) + + +@with_incremental_state +class FairseqIncrementalDecoder(FairseqDecoder): + """Base class for incremental decoders. + + Incremental decoding is a special mode at inference time where the Model + only receives a single timestep of input corresponding to the previous + output token (for teacher forcing) and must produce the next output + *incrementally*. Thus the model must cache any long-term state that is + needed about the sequence, e.g., hidden states, convolutional states, etc. + + Compared to the standard :class:`FairseqDecoder` interface, the incremental + decoder interface allows :func:`forward` functions to take an extra keyword + argument (*incremental_state*) that can be used to cache state across + time-steps. + + The :class:`FairseqIncrementalDecoder` interface also defines the + :func:`reorder_incremental_state` method, which is used during beam search + to select and reorder the incremental state based on the selection of beams. + + To learn more about how incremental decoding works, refer to `this blog + <http://www.telesens.co/2019/04/21/understanding-incremental-decoding-in-fairseq/>`_. + """ + + def __init__(self, dictionary): + super().__init__(dictionary) + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs + ): + """ + Args: + prev_output_tokens (LongTensor): shifted output tokens of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (dict, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict, optional): dictionary used for storing + state during :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + raise NotImplementedError + + def extract_features( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs + ): + """ + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + raise NotImplementedError + + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + """Reorder incremental state. + + This will be called when the order of the input has changed from the + previous time step. A typical use case is beam search, where the input + order changes between time steps based on the selection of beams. + """ + pass + + def reorder_incremental_state_scripting( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + """Main entry point for reordering the incremental state. + + Due to limitations in TorchScript, we call this function in + :class:`fairseq.sequence_generator.SequenceGenerator` instead of + calling :func:`reorder_incremental_state` directly. + """ + for module in self.modules(): + if hasattr(module, "reorder_incremental_state"): + result = module.reorder_incremental_state(incremental_state, new_order) + if result is not None: + incremental_state = result + + def set_beam_size(self, beam_size): + """Sets the beam size in the decoder and all children.""" + if getattr(self, "_beam_size", -1) != beam_size: + seen = set() + + def apply_set_beam_size(module): + if ( + module != self + and hasattr(module, "set_beam_size") + and module not in seen + ): + seen.add(module) + module.set_beam_size(beam_size) + + self.apply(apply_set_beam_size) + self._beam_size = beam_size diff --git a/fairseq/fairseq/models/fairseq_model.py b/fairseq/fairseq/models/fairseq_model.py new file mode 100644 index 0000000..65ead9d --- /dev/null +++ b/fairseq/fairseq/models/fairseq_model.py @@ -0,0 +1,579 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Base classes for various fairseq models. +""" + +import logging +from argparse import Namespace +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.data import Dictionary +from fairseq.dataclass.utils import ( + convert_namespace_to_omegaconf, + gen_parser_from_dataclass, +) +from fairseq.models import FairseqDecoder, FairseqEncoder +from omegaconf import DictConfig +from torch import Tensor + + +logger = logging.getLogger(__name__) + + +def check_type(module, expected_type): + if hasattr(module, "unwrapped_module"): + assert isinstance( + module.unwrapped_module, expected_type + ), f"{type(module.unwrapped_module)} != {expected_type}" + else: + assert isinstance(module, expected_type), f"{type(module)} != {expected_type}" + + +class BaseFairseqModel(nn.Module): + """Base class for fairseq models.""" + + def __init__(self): + super().__init__() + self._is_generation_fast = False + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + dc = getattr(cls, "__dataclass", None) + if dc is not None: + # do not set defaults so that settings defaults from various architectures still works + gen_parser_from_dataclass(parser, dc(), delete_default=True) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + raise NotImplementedError("Model must implement the build_model method") + + def get_targets(self, sample, net_output): + """Get targets from either the sample or the net's output.""" + return sample["target"] + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + return self.get_normalized_probs_scriptable(net_output, log_probs, sample) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def get_normalized_probs_scriptable( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Scriptable helper function for get_normalized_probs in ~BaseFairseqModel""" + if hasattr(self, "decoder"): + return self.decoder.get_normalized_probs(net_output, log_probs, sample) + elif torch.is_tensor(net_output): + # syntactic sugar for simple models which don't have a decoder + # (e.g., the classification tutorial) + logits = net_output.float() + if log_probs: + return F.log_softmax(logits, dim=-1) + else: + return F.softmax(logits, dim=-1) + raise NotImplementedError + + def extract_features(self, *args, **kwargs): + """Similar to *forward* but only return features.""" + return self(*args, **kwargs) + + def max_positions(self): + """Maximum length supported by the model.""" + return None + + def load_state_dict( + self, + state_dict, + strict=True, + model_cfg: Optional[DictConfig] = None, + args: Optional[Namespace] = None, + ): + """Copies parameters and buffers from *state_dict* into this module and + its descendants. + + Overrides the method in :class:`nn.Module`. Compared with that method + this additionally "upgrades" *state_dicts* from old checkpoints. + """ + + if model_cfg is None and args is not None: + logger.warn( + "using 'args' is deprecated, please update your code to use dataclass config" + ) + model_cfg = convert_namespace_to_omegaconf(args).model + + self.upgrade_state_dict(state_dict) + + from fairseq.checkpoint_utils import prune_state_dict + + new_state_dict = prune_state_dict(state_dict, model_cfg) + return super().load_state_dict(new_state_dict, strict) + + def upgrade_state_dict(self, state_dict): + """Upgrade old state dicts to work with newer code.""" + self.upgrade_state_dict_named(state_dict, "") + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade old state dicts to work with newer code. + + Args: + state_dict (dict): state dictionary to upgrade, in place + name (str): the state dict key corresponding to the current module + """ + assert state_dict is not None + + def do_upgrade(m, prefix): + if len(prefix) > 0: + prefix += "." + + for n, c in m.named_children(): + name = prefix + n + if hasattr(c, "upgrade_state_dict_named"): + c.upgrade_state_dict_named(state_dict, name) + elif hasattr(c, "upgrade_state_dict"): + c.upgrade_state_dict(state_dict) + do_upgrade(c, name) + + do_upgrade(self, name) + + def set_num_updates(self, num_updates): + """State from trainer to pass along to model at every update.""" + for m in self.modules(): + if hasattr(m, "set_num_updates") and m != self: + m.set_num_updates(num_updates) + + def set_epoch(self, epoch): + for m in self.modules(): + if hasattr(m, "set_epoch") and m != self: + m.set_epoch(epoch) + + def prepare_for_inference_(self, cfg: DictConfig): + """Prepare model for inference.""" + kwargs = {} + kwargs["beamable_mm_beam_size"] = ( + None + if getattr(cfg.generation, "no_beamable_mm", False) + else getattr(cfg.generation, "beam", 5) + ) + kwargs["need_attn"] = getattr(cfg.generation, "print_alignment", False) + if getattr(cfg.generation, "retain_dropout", False): + kwargs["retain_dropout"] = cfg.generation.retain_dropout + kwargs["retain_dropout_modules"] = cfg.generation.retain_dropout_modules + self.make_generation_fast_(**kwargs) + + def make_generation_fast_(self, **kwargs): + """ + Legacy entry point to optimize model for faster generation. + Prefer prepare_for_inference_. + """ + if self._is_generation_fast: + return # only apply once + self._is_generation_fast = True + + # remove weight norm from all modules in the network + def apply_remove_weight_norm(module): + try: + nn.utils.remove_weight_norm(module) + except (AttributeError, ValueError): # this module didn't have weight norm + return + + self.apply(apply_remove_weight_norm) + + def apply_make_generation_fast_(module, prefix): + if len(prefix) > 0: + prefix += "." + + base_func = BaseFairseqModel.make_generation_fast_ + for n, m in module.named_modules(): + if ( + m != self + and hasattr(m, "make_generation_fast_") + # don't call this implementation again, e.g., if + # children modules also inherit from BaseFairseqModel + and m.make_generation_fast_.__func__ is not base_func + ): + name = prefix + n + m.make_generation_fast_(name=name, **kwargs) + + apply_make_generation_fast_(self, "") + + def train(mode=True): + if mode: + raise RuntimeError("cannot train after make_generation_fast") + + # this model should no longer be used for training + self.eval() + self.train = train + + def prepare_for_onnx_export_(self, **kwargs): + """Make model exportable via ONNX trace.""" + seen = set() + + def apply_prepare_for_onnx_export_(module): + if ( + module != self + and hasattr(module, "prepare_for_onnx_export_") + and module not in seen + ): + seen.add(module) + module.prepare_for_onnx_export_(**kwargs) + + self.apply(apply_prepare_for_onnx_export_) + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + **kwargs, + ): + """ + Load a :class:`~fairseq.models.FairseqModel` from a pre-trained model + file. Downloads and caches the pre-trained model file if needed. + + The base implementation returns a + :class:`~fairseq.hub_utils.GeneratorHubInterface`, which can be used to + generate translations or sample from language models. The underlying + :class:`~fairseq.models.FairseqModel` can be accessed via the + *generator.models* attribute. + + Other models may override this to implement custom hub interfaces. + + Args: + model_name_or_path (str): either the name of a pre-trained model to + load or a path/URL to a pre-trained model state dict + checkpoint_file (str, optional): colon-separated list of checkpoint + files in the model archive to ensemble (default: 'model.pt') + data_name_or_path (str, optional): point args.data to the archive + at the given path/URL. Can start with '.' or './' to reuse the + model archive path. + """ + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + **kwargs, + ) + logger.info(x["args"]) + return hub_utils.GeneratorHubInterface(x["args"], x["task"], x["models"]) + + @classmethod + def hub_models(cls): + return {} + + +class FairseqEncoderDecoderModel(BaseFairseqModel): + """Base class for encoder-decoder models. + + Args: + encoder (FairseqEncoder): the encoder + decoder (FairseqDecoder): the decoder + """ + + def __init__(self, encoder, decoder): + super().__init__() + + self.encoder = encoder + self.decoder = decoder + + check_type(self.encoder, FairseqEncoder) + check_type(self.decoder, FairseqDecoder) + + def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + """ + Run the forward pass for an encoder-decoder model. + + First feed a batch of source tokens through the encoder. Then, feed the + encoder output and previous decoder outputs (i.e., teacher forcing) to + the decoder to produce the next outputs:: + + encoder_out = self.encoder(src_tokens, src_lengths) + return self.decoder(prev_output_tokens, encoder_out) + + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): source sentence lengths of shape `(batch)` + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + decoder_out = self.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return decoder_out + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.decoder(prev_output_tokens, **kwargs) + + def extract_features(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + features = self.decoder.extract_features( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return features + + def output_layer(self, features, **kwargs): + """Project features to the default output size (typically vocabulary size).""" + return self.decoder.output_layer(features, **kwargs) + + def max_positions(self): + """Maximum length supported by the model.""" + return (self.encoder.max_positions(), self.decoder.max_positions()) + + def max_decoder_positions(self): + """Maximum length supported by the decoder.""" + return self.decoder.max_positions() + + +class FairseqModel(FairseqEncoderDecoderModel): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + utils.deprecation_warning( + "FairseqModel is deprecated, please use FairseqEncoderDecoderModel " + "or BaseFairseqModel instead", + stacklevel=4, + ) + + +class FairseqMultiModel(BaseFairseqModel): + """Base class for combining multiple encoder-decoder models.""" + + def __init__(self, encoders, decoders): + super().__init__() + assert encoders.keys() == decoders.keys() + self.keys = list(encoders.keys()) + for key in self.keys: + check_type(encoders[key], FairseqEncoder) + check_type(decoders[key], FairseqDecoder) + + self.models = nn.ModuleDict( + { + key: FairseqEncoderDecoderModel(encoders[key], decoders[key]) + for key in self.keys + } + ) + + @staticmethod + def build_shared_embeddings( + dicts: Dict[str, Dictionary], + langs: List[str], + embed_dim: int, + build_embedding: callable, + pretrained_embed_path: Optional[str] = None, + ): + """ + Helper function to build shared embeddings for a set of languages after + checking that all dicts corresponding to those languages are equivalent. + + Args: + dicts: Dict of lang_id to its corresponding Dictionary + langs: languages that we want to share embeddings for + embed_dim: embedding dimension + build_embedding: callable function to actually build the embedding + pretrained_embed_path: Optional path to load pretrained embeddings + """ + shared_dict = dicts[langs[0]] + if any(dicts[lang] != shared_dict for lang in langs): + raise ValueError( + "--share-*-embeddings requires a joined dictionary: " + "--share-encoder-embeddings requires a joined source " + "dictionary, --share-decoder-embeddings requires a joined " + "target dictionary, and --share-all-embeddings requires a " + "joint source + target dictionary." + ) + return build_embedding(shared_dict, embed_dim, pretrained_embed_path) + + def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + raise NotImplementedError + + def max_positions(self): + """Maximum length supported by the model.""" + return { + key: ( + self.models[key].encoder.max_positions(), + self.models[key].decoder.max_positions(), + ) + for key in self.keys + } + + def max_decoder_positions(self): + """Maximum length supported by the decoder.""" + return min(model.decoder.max_positions() for model in self.models.values()) + + @property + def encoder(self): + return self.models[self.keys[0]].encoder + + @property + def decoder(self): + return self.models[self.keys[0]].decoder + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.decoder(prev_output_tokens, **kwargs) + + def load_state_dict( + self, + state_dict, + strict=True, + model_cfg=None, + args: Optional[Namespace] = None, + ): + """Copies parameters and buffers from *state_dict* into this module and + its descendants. + + Overrides the method in :class:`nn.Module`. Compared with that method + this additionally "upgrades" *state_dicts* from old checkpoints. + """ + + if model_cfg is None and args is not None: + logger.warn( + "using 'args' is deprecated, please update your code to use dataclass config" + ) + model_cfg = convert_namespace_to_omegaconf(args).model + + self.upgrade_state_dict(state_dict) + + from fairseq.checkpoint_utils import prune_state_dict + + new_state_dict = prune_state_dict(state_dict, model_cfg) + return super().load_state_dict(new_state_dict, strict) + + +class FairseqLanguageModel(BaseFairseqModel): + """Base class for decoder-only models. + + Args: + decoder (FairseqDecoder): the decoder + """ + + def __init__(self, decoder): + super().__init__() + self.decoder = decoder + check_type(self.decoder, FairseqDecoder) + + def forward(self, src_tokens, **kwargs): + """ + Run the forward pass for a decoder-only model. + + Feeds a batch of tokens through the decoder to predict the next tokens. + + Args: + src_tokens (LongTensor): tokens on which to condition the decoder, + of shape `(batch, tgt_len)` + src_lengths (LongTensor): source sentence lengths of shape `(batch)` + + Returns: + tuple: + - the decoder's output of shape `(batch, seq_len, vocab)` + - a dictionary with any model-specific outputs + """ + return self.decoder(src_tokens, **kwargs) + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.decoder(prev_output_tokens, **kwargs) + + def extract_features(self, src_tokens, **kwargs): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, seq_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + return self.decoder.extract_features(src_tokens, **kwargs) + + def output_layer(self, features, **kwargs): + """Project features to the default output size (typically vocabulary size).""" + return self.decoder.output_layer(features, **kwargs) + + def max_positions(self): + """Maximum length supported by the model.""" + return self.decoder.max_positions() + + def max_decoder_positions(self): + """Maximum length supported by the decoder.""" + return self.decoder.max_positions() + + @property + def supported_targets(self): + return {"future"} + + +class FairseqEncoderModel(BaseFairseqModel): + """Base class for encoder-only models. + + Args: + encoder (FairseqEncoder): the encoder + """ + + def __init__(self, encoder): + super().__init__() + self.encoder = encoder + check_type(self.encoder, FairseqEncoder) + + def forward(self, src_tokens, src_lengths, **kwargs): + """ + Run the forward pass for a encoder-only model. + + Feeds a batch of tokens through the encoder to generate features. + + Args: + src_tokens (LongTensor): input tokens of shape `(batch, src_len)` + src_lengths (LongTensor): source sentence lengths of shape `(batch)` + + Returns: + the encoder's output, typically of shape `(batch, src_len, features)` + """ + return self.encoder(src_tokens, src_lengths, **kwargs) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + encoder_out = net_output["encoder_out"] + if torch.is_tensor(encoder_out): + logits = encoder_out.float() + if log_probs: + return F.log_softmax(logits, dim=-1) + else: + return F.softmax(logits, dim=-1) + raise NotImplementedError + + def max_positions(self): + """Maximum length supported by the model.""" + return self.encoder.max_positions() diff --git a/fairseq/fairseq/models/fconv.py b/fairseq/fairseq/models/fconv.py new file mode 100644 index 0000000..c99a215 --- /dev/null +++ b/fairseq/fairseq/models/fconv.py @@ -0,0 +1,756 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.modules import ( + AdaptiveSoftmax, + BeamableMM, + FairseqDropout, + GradMultiply, + LearnedPositionalEmbedding, + LinearizedConvolution, +) + + +@register_model("fconv") +class FConvModel(FairseqEncoderDecoderModel): + """ + A fully convolutional model, i.e. a convolutional encoder and a + convolutional decoder, as described in `"Convolutional Sequence to Sequence + Learning" (Gehring et al., 2017) <https://arxiv.org/abs/1705.03122>`_. + + Args: + encoder (FConvEncoder): the encoder + decoder (FConvDecoder): the decoder + + The Convolutional model provides the following named architectures and + command-line arguments: + + .. argparse:: + :ref: fairseq.models.fconv_parser + :prog: + """ + + @classmethod + def hub_models(cls): + def moses_subword(path): + return { + "path": path, + "tokenizer": "moses", + "bpe": "subword_nmt", + } + + return { + "conv.wmt14.en-fr": moses_subword( + "https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2" + ), + "conv.wmt14.en-de": moses_subword( + "https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2" + ), + "conv.wmt17.en-de": moses_subword( + "https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2" + ), + } + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + self.encoder.num_attention_layers = sum( + layer is not None for layer in decoder.attention + ) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability') + parser.add_argument('--encoder-embed-dim', type=int, metavar='N', + help='encoder embedding dimension') + parser.add_argument('--encoder-embed-path', type=str, metavar='STR', + help='path to pre-trained encoder embedding') + parser.add_argument('--encoder-layers', type=str, metavar='EXPR', + help='encoder layers [(dim, kernel_size), ...]') + parser.add_argument('--decoder-embed-dim', type=int, metavar='N', + help='decoder embedding dimension') + parser.add_argument('--decoder-embed-path', type=str, metavar='STR', + help='path to pre-trained decoder embedding') + parser.add_argument('--decoder-layers', type=str, metavar='EXPR', + help='decoder layers [(dim, kernel_size), ...]') + parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N', + help='decoder output embedding dimension') + parser.add_argument('--decoder-attention', type=str, metavar='EXPR', + help='decoder attention [True, ...]') + parser.add_argument('--share-input-output-embed', action='store_true', + help='share input and output embeddings (requires' + ' --decoder-out-embed-dim and --decoder-embed-dim' + ' to be equal)') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted (in case there are any new ones) + base_architecture(args) + + encoder_embed_dict = None + if args.encoder_embed_path: + encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) + utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) + + decoder_embed_dict = None + if args.decoder_embed_path: + decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) + utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) + + encoder = FConvEncoder( + dictionary=task.source_dictionary, + embed_dim=args.encoder_embed_dim, + embed_dict=encoder_embed_dict, + convolutions=eval(args.encoder_layers), + dropout=args.dropout, + max_positions=args.max_source_positions, + ) + decoder = FConvDecoder( + dictionary=task.target_dictionary, + embed_dim=args.decoder_embed_dim, + embed_dict=decoder_embed_dict, + convolutions=eval(args.decoder_layers), + out_embed_dim=args.decoder_out_embed_dim, + attention=eval(args.decoder_attention), + dropout=args.dropout, + max_positions=args.max_target_positions, + share_embed=args.share_input_output_embed, + ) + return FConvModel(encoder, decoder) + + +class FConvEncoder(FairseqEncoder): + """ + Convolutional encoder consisting of `len(convolutions)` layers. + + Args: + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_dim (int, optional): embedding dimension + embed_dict (str, optional): filename from which to load pre-trained + embeddings + max_positions (int, optional): maximum supported input sequence length + convolutions (list, optional): the convolutional layer structure. Each + list item `i` corresponds to convolutional layer `i`. Layers are + given as ``(out_channels, kernel_width, [residual])``. Residual + connections are added between layers when ``residual=1`` (which is + the default behavior). + dropout (float, optional): dropout to be applied before each conv layer + """ + + def __init__( + self, + dictionary, + embed_dim=512, + embed_dict=None, + max_positions=1024, + convolutions=((512, 3),) * 20, + dropout=0.1, + ): + super().__init__(dictionary) + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.num_attention_layers = None + + num_embeddings = len(dictionary) + self.padding_idx = dictionary.pad() + self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) + if embed_dict: + self.embed_tokens = utils.load_embedding( + embed_dict, self.dictionary, self.embed_tokens + ) + + self.embed_positions = PositionalEmbedding( + max_positions, + embed_dim, + self.padding_idx, + ) + + convolutions = extend_conv_spec(convolutions) + in_channels = convolutions[0][0] + self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) + self.projections = nn.ModuleList() + self.convolutions = nn.ModuleList() + self.residuals = [] + + layer_in_channels = [in_channels] + for _, (out_channels, kernel_size, residual) in enumerate(convolutions): + if residual == 0: + residual_dim = out_channels + else: + residual_dim = layer_in_channels[-residual] + self.projections.append( + Linear(residual_dim, out_channels) + if residual_dim != out_channels + else None + ) + if kernel_size % 2 == 1: + padding = kernel_size // 2 + else: + padding = 0 + self.convolutions.append( + ConvTBC( + in_channels, + out_channels * 2, + kernel_size, + dropout=dropout, + padding=padding, + ) + ) + self.residuals.append(residual) + in_channels = out_channels + layer_in_channels.append(out_channels) + self.fc2 = Linear(in_channels, embed_dim) + + def forward(self, src_tokens, src_lengths): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): lengths of each source sentence of shape + `(batch)` + + Returns: + dict: + - **encoder_out** (tuple): a tuple with two elements, where the + first element is the last encoder layer's output and the + second element is the same quantity summed with the input + embedding (used for attention). The shape of both tensors is + `(batch, src_len, embed_dim)`. + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + """ + # embed tokens and positions + x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens) + x = self.dropout_module(x) + input_embedding = x + + # project to size of convolution + x = self.fc1(x) + + # used to mask padding in input + encoder_padding_mask = src_tokens.eq(self.padding_idx).t() # -> T x B + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + residuals = [x] + # temporal convolutions + for proj, conv, res_layer in zip( + self.projections, self.convolutions, self.residuals + ): + if res_layer > 0: + residual = residuals[-res_layer] + residual = residual if proj is None else proj(residual) + else: + residual = None + + if encoder_padding_mask is not None: + x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0) + + x = self.dropout_module(x) + if conv.kernel_size[0] % 2 == 1: + # padding is implicit in the conv + x = conv(x) + else: + padding_l = (conv.kernel_size[0] - 1) // 2 + padding_r = conv.kernel_size[0] // 2 + x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r)) + x = conv(x) + x = F.glu(x, dim=2) + + if residual is not None: + x = (x + residual) * math.sqrt(0.5) + residuals.append(x) + + # T x B x C -> B x T x C + x = x.transpose(1, 0) + + # project back to size of embedding + x = self.fc2(x) + + if encoder_padding_mask is not None: + encoder_padding_mask = encoder_padding_mask.t() # -> B x T + x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0) + + # scale gradients (this only affects backward, not forward) + x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers)) + + # add output to input embedding for attention + y = (x + input_embedding) * math.sqrt(0.5) + + return { + "encoder_out": (x, y), + "encoder_padding_mask": encoder_padding_mask, # B x T + } + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = ( + encoder_out["encoder_out"][0].index_select(0, new_order), + encoder_out["encoder_out"][1].index_select(0, new_order), + ) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return self.embed_positions.max_positions + + +class AttentionLayer(nn.Module): + def __init__(self, conv_channels, embed_dim, bmm=None): + super().__init__() + # projects from output of convolution to embedding dimension + self.in_projection = Linear(conv_channels, embed_dim) + # projects from embedding dimension to convolution size + self.out_projection = Linear(embed_dim, conv_channels) + + self.bmm = bmm if bmm is not None else torch.bmm + + def forward(self, x, target_embedding, encoder_out, encoder_padding_mask): + residual = x + + # attention + x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5) + x = self.bmm(x, encoder_out[0]) + + # don't attend over padding + if encoder_padding_mask is not None: + x = ( + x.float() + .masked_fill(encoder_padding_mask.unsqueeze(1), float("-inf")) + .type_as(x) + ) # FP16 support: cast to float and back + + # softmax over last dim + sz = x.size() + x = F.softmax(x.view(sz[0] * sz[1], sz[2]), dim=1) + x = x.view(sz) + attn_scores = x + + x = self.bmm(x, encoder_out[1]) + + # scale attention output (respecting potentially different lengths) + s = encoder_out[1].size(1) + if encoder_padding_mask is None: + x = x * (s * math.sqrt(1.0 / s)) + else: + s = s - encoder_padding_mask.type_as(x).sum( + dim=1, keepdim=True + ) # exclude padding + s = s.unsqueeze(-1) + x = x * (s * s.rsqrt()) + + # project back + x = (self.out_projection(x) + residual) * math.sqrt(0.5) + return x, attn_scores + + def make_generation_fast_(self, beamable_mm_beam_size=None, **kwargs): + """Replace torch.bmm with BeamableMM.""" + if beamable_mm_beam_size is not None: + del self.bmm + self.add_module("bmm", BeamableMM(beamable_mm_beam_size)) + + +class FConvDecoder(FairseqIncrementalDecoder): + """Convolutional decoder""" + + def __init__( + self, + dictionary, + embed_dim=512, + embed_dict=None, + out_embed_dim=256, + max_positions=1024, + convolutions=((512, 3),) * 20, + attention=True, + dropout=0.1, + share_embed=False, + positional_embeddings=True, + adaptive_softmax_cutoff=None, + adaptive_softmax_dropout=0.0, + ): + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([2])) + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.need_attn = True + + convolutions = extend_conv_spec(convolutions) + in_channels = convolutions[0][0] + if isinstance(attention, bool): + # expand True into [True, True, ...] and do the same with False + attention = [attention] * len(convolutions) + if not isinstance(attention, list) or len(attention) != len(convolutions): + raise ValueError( + "Attention is expected to be a list of booleans of " + "length equal to the number of layers." + ) + + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + if embed_dict: + self.embed_tokens = utils.load_embedding( + embed_dict, self.dictionary, self.embed_tokens + ) + + self.embed_positions = ( + PositionalEmbedding( + max_positions, + embed_dim, + padding_idx, + ) + if positional_embeddings + else None + ) + + self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) + self.projections = nn.ModuleList() + self.convolutions = nn.ModuleList() + self.attention = nn.ModuleList() + self.residuals = [] + + layer_in_channels = [in_channels] + for i, (out_channels, kernel_size, residual) in enumerate(convolutions): + if residual == 0: + residual_dim = out_channels + else: + residual_dim = layer_in_channels[-residual] + self.projections.append( + Linear(residual_dim, out_channels) + if residual_dim != out_channels + else None + ) + self.convolutions.append( + LinearizedConv1d( + in_channels, + out_channels * 2, + kernel_size, + padding=(kernel_size - 1), + dropout=dropout, + ) + ) + self.attention.append( + AttentionLayer(out_channels, embed_dim) if attention[i] else None + ) + self.residuals.append(residual) + in_channels = out_channels + layer_in_channels.append(out_channels) + + self.adaptive_softmax = None + self.fc2 = self.fc3 = None + + if adaptive_softmax_cutoff is not None: + assert not share_embed + self.adaptive_softmax = AdaptiveSoftmax( + num_embeddings, + in_channels, + adaptive_softmax_cutoff, + dropout=adaptive_softmax_dropout, + ) + else: + self.fc2 = Linear(in_channels, out_embed_dim) + if share_embed: + assert out_embed_dim == embed_dim, ( + "Shared embed weights implies same dimensions " + " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim) + ) + self.fc3 = nn.Linear(out_embed_dim, num_embeddings) + self.fc3.weight = self.embed_tokens.weight + else: + self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout) + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + if encoder_out is not None: + encoder_padding_mask = encoder_out["encoder_padding_mask"] + encoder_out = encoder_out["encoder_out"] + + # split and transpose encoder outputs + encoder_a, encoder_b = self._split_encoder_out( + encoder_out, incremental_state + ) + + if self.embed_positions is not None: + pos_embed = self.embed_positions(prev_output_tokens, incremental_state) + else: + pos_embed = 0 + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + x = self._embed_tokens(prev_output_tokens, incremental_state) + + # embed tokens and combine with positional embeddings + x += pos_embed + x = self.dropout_module(x) + target_embedding = x + + # project to size of convolution + x = self.fc1(x) + + # B x T x C -> T x B x C + x = self._transpose_if_training(x, incremental_state) + + # temporal convolutions + avg_attn_scores = None + num_attn_layers = len(self.attention) + residuals = [x] + for proj, conv, attention, res_layer in zip( + self.projections, self.convolutions, self.attention, self.residuals + ): + if res_layer > 0: + residual = residuals[-res_layer] + residual = residual if proj is None else proj(residual) + else: + residual = None + + x = self.dropout_module(x) + x = conv(x, incremental_state) + x = F.glu(x, dim=2) + + # attention + if attention is not None: + x = self._transpose_if_training(x, incremental_state) + + x, attn_scores = attention( + x, target_embedding, (encoder_a, encoder_b), encoder_padding_mask + ) + + if not self.training and self.need_attn: + attn_scores = attn_scores / num_attn_layers + if avg_attn_scores is None: + avg_attn_scores = attn_scores + else: + avg_attn_scores.add_(attn_scores) + + x = self._transpose_if_training(x, incremental_state) + + # residual + if residual is not None: + x = (x + residual) * math.sqrt(0.5) + residuals.append(x) + + # T x B x C -> B x T x C + x = self._transpose_if_training(x, incremental_state) + + # project back to size of vocabulary if not using adaptive softmax + if self.fc2 is not None and self.fc3 is not None: + x = self.fc2(x) + x = self.dropout_module(x) + x = self.fc3(x) + + return x, avg_attn_scores + + def reorder_incremental_state(self, incremental_state, new_order): + super().reorder_incremental_state(incremental_state, new_order) + encoder_out = utils.get_incremental_state( + self, incremental_state, "encoder_out" + ) + if encoder_out is not None: + encoder_out = tuple(eo.index_select(0, new_order) for eo in encoder_out) + utils.set_incremental_state( + self, incremental_state, "encoder_out", encoder_out + ) + + def max_positions(self): + """Maximum output length supported by the decoder.""" + return ( + self.embed_positions.max_positions + if self.embed_positions is not None + else float("inf") + ) + + def upgrade_state_dict(self, state_dict): + if utils.item(state_dict.get("decoder.version", torch.Tensor([1]))[0]) < 2: + # old models use incorrect weight norm dimension + for i, conv in enumerate(self.convolutions): + # reconfigure weight norm + nn.utils.remove_weight_norm(conv) + self.convolutions[i] = nn.utils.weight_norm(conv, dim=0) + state_dict["decoder.version"] = torch.Tensor([1]) + return state_dict + + def make_generation_fast_(self, need_attn=False, **kwargs): + self.need_attn = need_attn + + def _embed_tokens(self, tokens, incremental_state): + if incremental_state is not None: + # keep only the last token for incremental forward pass + tokens = tokens[:, -1:] + return self.embed_tokens(tokens) + + def _split_encoder_out(self, encoder_out, incremental_state): + """Split and transpose encoder outputs. + + This is cached when doing incremental inference. + """ + cached_result = utils.get_incremental_state( + self, incremental_state, "encoder_out" + ) + if cached_result is not None: + return cached_result + + # transpose only once to speed up attention layers + encoder_a, encoder_b = encoder_out + encoder_a = encoder_a.transpose(1, 2).contiguous() + result = (encoder_a, encoder_b) + + if incremental_state is not None: + utils.set_incremental_state(self, incremental_state, "encoder_out", result) + return result + + def _transpose_if_training(self, x, incremental_state): + if incremental_state is None: + x = x.transpose(0, 1) + return x + + +def extend_conv_spec(convolutions): + """ + Extends convolutional spec that is a list of tuples of 2 or 3 parameters + (kernel size, dim size and optionally how many layers behind to look for residual) + to default the residual propagation param if it is not specified + """ + extended = [] + for spec in convolutions: + if len(spec) == 3: + extended.append(spec) + elif len(spec) == 2: + extended.append(spec + (1,)) + else: + raise Exception( + "invalid number of parameters in convolution spec " + + str(spec) + + ". expected 2 or 3" + ) + return tuple(extended) + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, 0, 0.1) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx): + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, 0, 0.1) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, dropout=0.0): + """Weight-normalized Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features) + nn.init.normal_(m.weight, mean=0, std=math.sqrt((1 - dropout) / in_features)) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m) + + +def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs): + """Weight-normalized Conv1d layer optimized for decoding""" + m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs) + std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels)) + nn.init.normal_(m.weight, mean=0, std=std) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m, dim=2) + + +def ConvTBC(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs): + """Weight-normalized Conv1d layer""" + from fairseq.modules import ConvTBC + + m = ConvTBC(in_channels, out_channels, kernel_size, **kwargs) + std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels)) + nn.init.normal_(m.weight, mean=0, std=std) + nn.init.constant_(m.bias, 0) + return nn.utils.weight_norm(m, dim=2) + + +@register_model_architecture("fconv", "fconv") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_layers = getattr(args, "encoder_layers", "[(512, 3)] * 20") + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_layers = getattr(args, "decoder_layers", "[(512, 3)] * 20") + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256) + args.decoder_attention = getattr(args, "decoder_attention", "True") + args.share_input_output_embed = getattr(args, "share_input_output_embed", False) + + +@register_model_architecture("fconv", "fconv_iwslt_de_en") +def fconv_iwslt_de_en(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_layers = getattr(args, "encoder_layers", "[(256, 3)] * 4") + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_layers = getattr(args, "decoder_layers", "[(256, 3)] * 3") + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256) + base_architecture(args) + + +@register_model_architecture("fconv", "fconv_wmt_en_ro") +def fconv_wmt_en_ro(args): + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + base_architecture(args) + + +@register_model_architecture("fconv", "fconv_wmt_en_de") +def fconv_wmt_en_de(args): + convs = "[(512, 3)] * 9" # first 9 layers have 512 units + convs += " + [(1024, 3)] * 4" # next 4 layers have 1024 units + convs += " + [(2048, 1)] * 2" # final 2 layers use 1x1 convolutions + + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_layers = getattr(args, "encoder_layers", convs) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 768) + args.decoder_layers = getattr(args, "decoder_layers", convs) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + base_architecture(args) + + +@register_model_architecture("fconv", "fconv_wmt_en_fr") +def fconv_wmt_en_fr(args): + convs = "[(512, 3)] * 6" # first 6 layers have 512 units + convs += " + [(768, 3)] * 4" # next 4 layers have 768 units + convs += " + [(1024, 3)] * 3" # next 3 layers have 1024 units + convs += " + [(2048, 1)] * 1" # next 1 layer uses 1x1 convolutions + convs += " + [(4096, 1)] * 1" # final 1 layer uses 1x1 convolutions + + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_layers = getattr(args, "encoder_layers", convs) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 768) + args.decoder_layers = getattr(args, "decoder_layers", convs) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + base_architecture(args) diff --git a/fairseq/fairseq/models/fconv_lm.py b/fairseq/fairseq/models/fconv_lm.py new file mode 100644 index 0000000..4b243d6 --- /dev/null +++ b/fairseq/fairseq/models/fconv_lm.py @@ -0,0 +1,136 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import utils +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.fconv import FConvDecoder +from fairseq.utils import safe_hasattr + + +@register_model("fconv_lm") +class FConvLanguageModel(FairseqLanguageModel): + def __init__(self, decoder): + super().__init__(decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-layers", + type=str, + metavar="EXPR", + help="decoder layers [(dim, kernel_size), ...]", + ) + parser.add_argument( + "--decoder-out-embed-dim", + type=int, + metavar="N", + help="decoder output embedding dimension", + ) + parser.add_argument( + "--adaptive-softmax-cutoff", + metavar="EXPR", + help="comma separated list of adaptive softmax cutoff points. " + "Must be used with adaptive_loss criterion", + ) + parser.add_argument( + "--adaptive-softmax-dropout", + type=float, + metavar="D", + help="sets adaptive softmax dropout for the tail projections", + ) + parser.add_argument( + "--decoder-attention", + type=str, + metavar="EXPR", + help="decoder attention [True, ...]", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure all arguments are present in older models + base_lm_architecture(args) + + if safe_hasattr(args, "max_target_positions") and not safe_hasattr( + args, "tokens_per_sample" + ): + args.tokens_per_sample = args.max_target_positions + + decoder = FConvDecoder( + dictionary=task.target_dictionary, + embed_dim=args.decoder_embed_dim, + convolutions=eval(args.decoder_layers), + out_embed_dim=args.decoder_embed_dim, + attention=eval(args.decoder_attention), + dropout=args.dropout, + max_positions=args.tokens_per_sample, + share_embed=False, + positional_embeddings=False, + adaptive_softmax_cutoff=( + utils.eval_str_list(args.adaptive_softmax_cutoff, type=int) + if args.criterion == "adaptive_loss" + else None + ), + adaptive_softmax_dropout=args.adaptive_softmax_dropout, + ) + return FConvLanguageModel(decoder) + + +@register_model_architecture("fconv_lm", "fconv_lm") +def base_lm_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128) + args.decoder_layers = getattr(args, "decoder_layers", "[(1268, 4)] * 13") + args.decoder_attention = getattr(args, "decoder_attention", "False") + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + + +@register_model_architecture("fconv_lm", "fconv_lm_dauphin_wikitext103") +def fconv_lm_dauphin_wikitext103(args): + layers = "[(850, 6)] * 3" + layers += " + [(850, 1)] * 1" + layers += " + [(850, 5)] * 4" + layers += " + [(850, 1)] * 1" + layers += " + [(850, 4)] * 3" + layers += " + [(1024, 4)] * 1" + layers += " + [(2048, 4)] * 1" + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 280) + args.decoder_layers = getattr(args, "decoder_layers", layers) + args.decoder_attention = getattr(args, "decoder_attention", "False") + args.adaptive_softmax_cutoff = getattr( + args, "adaptive_softmax_cutoff", "10000,20000,200000" + ) + base_lm_architecture(args) + + +@register_model_architecture("fconv_lm", "fconv_lm_dauphin_gbw") +def fconv_lm_dauphin_gbw(args): + layers = "[(512, 5)]" + layers += " + [(128, 1, 0), (128, 5, 0), (512, 1, 3)] * 3" + layers += " + [(512, 1, 0), (512, 5, 0), (1024, 1, 3)] * 3" + layers += " + [(1024, 1, 0), (1024, 5, 0), (2048, 1, 3)] * 6" + layers += " + [(1024, 1, 0), (1024, 5, 0), (4096, 1, 3)]" + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128) + args.decoder_layers = getattr(args, "decoder_layers", layers) + args.decoder_attention = getattr(args, "decoder_attention", "False") + args.adaptive_softmax_cutoff = getattr( + args, "adaptive_softmax_cutoff", "10000,50000,200000" + ) + base_lm_architecture(args) diff --git a/fairseq/fairseq/models/fconv_self_att.py b/fairseq/fairseq/models/fconv_self_att.py new file mode 100644 index 0000000..8357ef7 --- /dev/null +++ b/fairseq/fairseq/models/fconv_self_att.py @@ -0,0 +1,674 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import checkpoint_utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.models import ( + CompositeEncoder, + FairseqDecoder, + FairseqEncoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.modules import ( + DownsampledMultiHeadAttention, + FairseqDropout, + GradMultiply, + LayerNorm, + LearnedPositionalEmbedding, + LinearizedConvolution, +) + + +logger = logging.getLogger(__name__) + + +@register_model("fconv_self_att") +class FConvModelSelfAtt(FairseqEncoderDecoderModel): + @classmethod + def hub_models(cls): + return { + "conv.stories.pretrained": { + "path": "https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.gz", + "checkpoint_file": "pretrained_checkpoint.pt", + "tokenizer": "nltk", + }, + "conv.stories": { + "path": "https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.gz", + "checkpoint_file": "fusion_checkpoint.pt", + "tokenizer": "nltk", + "pretrained": "True", + "pretrained_checkpoint": "./pretrained_checkpoint.pt", + }, + # Test set containing dictionaries + "data.stories": "https://dl.fbaipublicfiles.com/fairseq/data/stories_test.tar.bz2", + } + + def __init__(self, encoder, decoder, pretrained_encoder=None): + super().__init__(encoder, decoder) + self.encoder.num_attention_layers = sum( + layer is not None for layer in decoder.attention + ) + self.pretrained_encoder = pretrained_encoder + if self.pretrained_encoder is None: + encoders = {"encoder": encoder} + else: + encoders = {"encoder": encoder, "pretrained": self.pretrained_encoder} + # for fusion model, CompositeEncoder contains both pretrained and training encoders + # these are forwarded and then combined in the decoder + self.encoder = CompositeEncoder(encoders) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability') + parser.add_argument('--encoder-embed-dim', type=int, metavar='N', + help='encoder embedding dimension') + parser.add_argument('--encoder-layers', type=str, metavar='EXPR', + help='encoder layers [(dim, kernel_size), ...]') + parser.add_argument('--decoder-embed-dim', type=int, metavar='N', + help='decoder embedding dimension') + parser.add_argument('--decoder-layers', type=str, metavar='EXPR', + help='decoder layers [(dim, kernel_size), ...]') + parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N', + help='decoder output embedding dimension') + parser.add_argument('--decoder-attention', type=str, metavar='EXPR', + help='decoder attention [True, ...]') + parser.add_argument('--self-attention', type=str, metavar='EXPR', + help='decoder self-attention layers, ex: [True] + [False]*5') + parser.add_argument('--multihead-attention-nheads', type=int, + help='Number of heads to use in attention') + parser.add_argument('--multihead-self-attention-nheads', type=int, + help='Number of heads to use in self-attention') + parser.add_argument('--encoder-attention', type=str, metavar='EXPR', + help='encoder attention [True, ...]') + parser.add_argument('--encoder-attention-nheads', type=int, + help='Number of heads to use in encoder attention') + parser.add_argument('--project-input', type=str, metavar='EXPR', + help='Use projections in self-attention [True, ...]') + parser.add_argument('--gated-attention', type=str, metavar='EXPR', + help='Use GLU layers in self-attention projections [True, ...]') + parser.add_argument('--downsample', type=str, metavar='EXPR', + help='Use downsampling in self-attention [True, ...]') + parser.add_argument('--pretrained-checkpoint', metavar='DIR', + help='path to load checkpoint from pretrained model') + parser.add_argument('--pretrained', type=str, metavar='EXPR', + help='use pretrained model when training [True, ...]') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + trained_encoder, trained_decoder = None, None + pretrained = eval(args.pretrained) + if pretrained: + logger.info("loading pretrained model") + if not os.path.exists(args.pretrained_checkpoint): + new_pretrained_checkpoint = os.path.join( + args.data, args.pretrained_checkpoint + ) + if os.path.exists(new_pretrained_checkpoint): + args.pretrained_checkpoint = new_pretrained_checkpoint + trained_model = checkpoint_utils.load_model_ensemble( + filenames=[args.pretrained_checkpoint], + task=task, + )[0][0] + trained_decoder = list(trained_model.children())[1] + trained_encoder = list(trained_model.children())[0] + + # freeze pretrained model + for param in trained_decoder.parameters(): + param.requires_grad = False + for param in trained_encoder.parameters(): + param.requires_grad = False + + encoder = FConvEncoder( + task.source_dictionary, + embed_dim=args.encoder_embed_dim, + convolutions=eval(args.encoder_layers), + dropout=args.dropout, + max_positions=args.max_source_positions, + attention=eval(args.encoder_attention), + attention_nheads=args.encoder_attention_nheads, + ) + + decoder = FConvDecoder( + task.target_dictionary, + embed_dim=args.decoder_embed_dim, + convolutions=eval(args.decoder_layers), + out_embed_dim=args.decoder_out_embed_dim, + attention=eval(args.decoder_attention), + dropout=args.dropout, + max_positions=args.max_target_positions, + selfattention=eval(args.self_attention), + attention_nheads=args.multihead_attention_nheads, + selfattention_nheads=args.multihead_self_attention_nheads, + project_input=eval(args.project_input), + gated_attention=eval(args.gated_attention), + downsample=eval(args.downsample), + pretrained=pretrained, + trained_decoder=trained_decoder, + ) + model = FConvModelSelfAtt(encoder, decoder, trained_encoder) + + return model + + @property + def pretrained(self): + return self.pretrained_encoder is not None + + +class FConvEncoder(FairseqEncoder): + """Convolutional encoder""" + + def __init__( + self, + dictionary, + embed_dim=512, + max_positions=1024, + convolutions=((512, 3),) * 20, + dropout=0.1, + attention=False, + attention_nheads=1, + ): + super().__init__(dictionary) + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.num_attention_layers = None + + num_embeddings = len(dictionary) + self.padding_idx = dictionary.pad() + self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) + self.embed_positions = PositionalEmbedding( + max_positions, + embed_dim, + self.padding_idx, + ) + + def expand_bool_array(val): + if isinstance(val, bool): + # expand True into [True, True, ...] and do the same with False + return [val] * len(convolutions) + return val + + attention = expand_bool_array(attention) + + in_channels = convolutions[0][0] + self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) + self.projections = nn.ModuleList() + self.convolutions = nn.ModuleList() + self.attention = nn.ModuleList() + self.attproj = nn.ModuleList() + for i, (out_channels, kernel_size) in enumerate(convolutions): + self.projections.append( + Linear(in_channels, out_channels) + if in_channels != out_channels + else None + ) + self.convolutions.append( + ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout) + ) + + self.attention.append( + SelfAttention(out_channels, embed_dim, attention_nheads) + if attention[i] + else None + ) + in_channels = out_channels + + self.fc2 = Linear(in_channels, embed_dim) + + def forward(self, src_tokens, src_lengths): + # embed tokens and positions + x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens) + x = self.dropout_module(x) + input_embedding = x.transpose(0, 1) + + # project to size of convolution + x = self.fc1(x) + + encoder_padding_mask = src_tokens.eq(self.padding_idx).t() # -> T x B + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # temporal convolutions + for proj, conv, attention in zip( + self.projections, self.convolutions, self.attention + ): + residual = x if proj is None else proj(x) + + if encoder_padding_mask is not None: + x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0) + + x = self.dropout_module(x) + padding_l = (conv.kernel_size[0] - 1) // 2 + padding_r = conv.kernel_size[0] // 2 + x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r)) + x = conv(x) + x = F.glu(x, dim=2) + if attention is not None: + x = attention(x) + x = (x + residual) * math.sqrt(0.5) + + # T x B x C -> B x T x C + x = x.transpose(1, 0) + + # project back to size of embedding + x = self.fc2(x) + + if encoder_padding_mask is not None: + encoder_padding_mask = encoder_padding_mask.t() # -> B x T + x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0) + + # scale gradients (this only affects backward, not forward) + x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers)) + + # add output to input embedding for attention + y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5) + + return { + "encoder_out": (x, y), + "encoder_padding_mask": encoder_padding_mask, # B x T + } + + def reorder_encoder_out(self, encoder_out, new_order): + encoder_out["encoder_out"] = tuple( + eo.index_select(0, new_order) for eo in encoder_out["encoder_out"] + ) + + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + + if "pretrained" in encoder_out: + encoder_out["pretrained"]["encoder_out"] = tuple( + eo.index_select(0, new_order) + for eo in encoder_out["pretrained"]["encoder_out"] + ) + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return self.embed_positions.max_positions + + +@with_incremental_state +class FConvDecoder(FairseqDecoder): + """Convolutional decoder""" + + def __init__( + self, + dictionary, + embed_dim=512, + out_embed_dim=256, + max_positions=1024, + convolutions=((512, 3),) * 8, + attention=True, + dropout=0.1, + selfattention=False, + attention_nheads=1, + selfattention_nheads=1, + project_input=False, + gated_attention=False, + downsample=False, + pretrained=False, + trained_decoder=None, + ): + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([2])) + self.pretrained = pretrained + self.pretrained_decoder = trained_decoder + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.need_attn = True + in_channels = convolutions[0][0] + + def expand_bool_array(val): + if isinstance(val, bool): + # expand True into [True, True, ...] and do the same with False + return [val] * len(convolutions) + return val + + attention = expand_bool_array(attention) + selfattention = expand_bool_array(selfattention) + + if not isinstance(attention, list) or len(attention) != len(convolutions): + raise ValueError( + "Attention is expected to be a list of booleans of " + "length equal to the number of layers." + ) + + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + + self.embed_positions = PositionalEmbedding( + max_positions, + embed_dim, + padding_idx, + ) + + self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) + self.projections = nn.ModuleList() + self.convolutions = nn.ModuleList() + self.attention = nn.ModuleList() + self.selfattention = nn.ModuleList() + self.attproj = nn.ModuleList() + for i, (out_channels, kernel_size) in enumerate(convolutions): + self.projections.append( + Linear(in_channels, out_channels) + if in_channels != out_channels + else None + ) + self.convolutions.append( + LinearizedConv1d( + in_channels, + out_channels * 2, + kernel_size, + padding=(kernel_size - 1), + dropout=dropout, + ) + ) + + self.attention.append( + DownsampledMultiHeadAttention( + out_channels, + embed_dim, + attention_nheads, + project_input=project_input, + gated=False, + downsample=False, + ) + if attention[i] + else None + ) + + self.attproj.append( + Linear(out_channels, embed_dim, dropout=dropout) + if attention[i] + else None + ) + self.selfattention.append( + SelfAttention( + out_channels, + embed_dim, + selfattention_nheads, + project_input=project_input, + gated=gated_attention, + downsample=downsample, + ) + if selfattention[i] + else None + ) + in_channels = out_channels + + self.fc2 = Linear(in_channels, out_embed_dim) + self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout) + + # model fusion + if self.pretrained: + # independent gates are learned from the concatenated input + self.gate1 = nn.Sequential( + Linear(out_embed_dim * 2, out_embed_dim), nn.Sigmoid() + ) + self.gate2 = nn.Sequential( + Linear(out_embed_dim * 2, out_embed_dim), nn.Sigmoid() + ) + # pretrained and trained models are joined + self.joining = nn.Sequential( + Linear(out_embed_dim * 2, out_embed_dim * 2), + LayerNorm(out_embed_dim * 2), + nn.GLU(), + Linear(out_embed_dim, out_embed_dim * 2), + LayerNorm(out_embed_dim * 2), + nn.GLU(), + Linear(out_embed_dim, out_embed_dim), + LayerNorm(out_embed_dim), + ) + # pretrained model contains an output layer that is nhid -> vocab size + # but the models are combined in their hidden state + # the hook stores the output of the pretrained model forward + self.pretrained_outputs = {} + + def save_output(): + def hook(a, b, output): + self.pretrained_outputs["out"] = output + + return hook + + self.pretrained_decoder.fc2.register_forward_hook(save_output()) + + def forward(self, prev_output_tokens, encoder_out): + trained_encoder_out = encoder_out["pretrained"] if self.pretrained else None + encoder_out = encoder_out["encoder"]["encoder_out"] + + encoder_a, encoder_b = self._split_encoder_out(encoder_out) + + # embed positions + positions = self.embed_positions(prev_output_tokens) + + # embed tokens and positions + x = self.embed_tokens(prev_output_tokens) + positions + x = self.dropout_module(x) + target_embedding = x.transpose(0, 1) + + # project to size of convolution + x = self.fc1(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # temporal convolutions + avg_attn_scores = None + for proj, conv, attention, selfattention, attproj in zip( + self.projections, + self.convolutions, + self.attention, + self.selfattention, + self.attproj, + ): + residual = x if proj is None else proj(x) + + x = self.dropout_module(x) + x = conv(x) + x = F.glu(x, dim=2) + + # attention + if attention is not None: + r = x + x, attn_scores = attention( + attproj(x) + target_embedding, encoder_a, encoder_b + ) + x = x + r + if not self.training and self.need_attn: + if avg_attn_scores is None: + avg_attn_scores = attn_scores + else: + avg_attn_scores.add_(attn_scores) + + if selfattention is not None: + x = selfattention(x) + + x = (x + residual) * math.sqrt(0.5) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + # project back to size of vocabulary + x = self.fc2(x) + x = self.dropout_module(x) + if not self.pretrained: + x = self.fc3(x) + + # fusion gating + if self.pretrained: + trained_x, _ = self.pretrained_decoder.forward( + prev_output_tokens, trained_encoder_out + ) + y = torch.cat([x, self.pretrained_outputs["out"]], dim=-1) + gate1 = self.gate1(y) + gate2 = self.gate2(y) + gated_x1 = gate1 * x + gated_x2 = gate2 * self.pretrained_outputs["out"] + fusion = torch.cat([gated_x1, gated_x2], dim=-1) + fusion = self.joining(fusion) + fusion_output = self.fc3(fusion) + return fusion_output, avg_attn_scores + else: + return x, avg_attn_scores + + def max_positions(self): + """Maximum output length supported by the decoder.""" + return self.embed_positions.max_positions + + def make_generation_fast_(self, need_attn=False, **kwargs): + self.need_attn = need_attn + + def _split_encoder_out(self, encoder_out): + """Split and transpose encoder outputs.""" + # transpose only once to speed up attention layers + encoder_a, encoder_b = encoder_out + encoder_a = encoder_a.transpose(0, 1).contiguous() + encoder_b = encoder_b.transpose(0, 1).contiguous() + result = (encoder_a, encoder_b) + return result + + +class SelfAttention(nn.Module): + def __init__( + self, + out_channels, + embed_dim, + num_heads, + project_input=False, + gated=False, + downsample=False, + ): + super().__init__() + self.attention = DownsampledMultiHeadAttention( + out_channels, + embed_dim, + num_heads, + dropout=0, + bias=True, + project_input=project_input, + gated=gated, + downsample=downsample, + ) + self.in_proj_q = Linear(out_channels, embed_dim) + self.in_proj_k = Linear(out_channels, embed_dim) + self.in_proj_v = Linear(out_channels, embed_dim) + self.ln = LayerNorm(out_channels) + + def forward(self, x): + residual = x + query = self.in_proj_q(x) + key = self.in_proj_k(x) + value = self.in_proj_v(x) + x, _ = self.attention( + query, key, value, mask_future_timesteps=True, use_scalar_bias=True + ) + return self.ln(x + residual) + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + m.weight.data.normal_(0, 0.1) + return m + + +def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx): + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + m.weight.data.normal_(0, 0.1) + return m + + +def Linear(in_features, out_features, dropout=0.0): + """Weight-normalized Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features) + m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features)) + m.bias.data.zero_() + return m + + +def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs): + """Weight-normalized Conv1d layer optimized for decoding""" + m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs) + std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels)) + m.weight.data.normal_(mean=0, std=std) + m.bias.data.zero_() + return m + + +def ConvTBC(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs): + """Weight-normalized Conv1d layer""" + from fairseq.modules import ConvTBC + + m = ConvTBC(in_channels, out_channels, kernel_size, **kwargs) + std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels)) + m.weight.data.normal_(mean=0, std=std) + m.bias.data.zero_() + return m + + +@register_model_architecture("fconv_self_att", "fconv_self_att") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_layers = getattr(args, "encoder_layers", "[(512, 3)] * 3") + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_layers = getattr(args, "decoder_layers", "[(512, 3)] * 8") + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256) + args.decoder_attention = getattr(args, "decoder_attention", "True") + args.self_attention = getattr(args, "self_attention", "False") + args.encoder_attention = getattr(args, "encoder_attention", "False") + args.multihead_attention_nheads = getattr(args, "multihead_attention_nheads", 1) + args.multihead_self_attention_nheads = getattr( + args, "multihead_self_attention_nheads", 1 + ) + args.encoder_attention_nheads = getattr(args, "encoder_attention_nheads", 1) + args.project_input = getattr(args, "project_input", "False") + args.gated_attention = getattr(args, "gated_attention", "False") + args.downsample = getattr(args, "downsample", "False") + args.pretrained_checkpoint = getattr(args, "pretrained_checkpoint", "") + args.pretrained = getattr(args, "pretrained", "False") + + +@register_model_architecture("fconv_self_att", "fconv_self_att_wp") +def fconv_self_att_wp(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_layers = getattr( + args, "encoder_layers", "[(128, 3)] * 2 + [(512,3)] * 1" + ) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_layers = getattr( + args, "decoder_layers", "[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1" + ) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256) + args.self_attention = getattr(args, "self_attention", "True") + args.multihead_self_attention_nheads = getattr( + args, "multihead_self_attention_nheads", 4 + ) + args.project_input = getattr(args, "project_input", "True") + args.gated_attention = getattr(args, "gated_attention", "True") + args.downsample = getattr(args, "downsample", "True") + base_architecture(args) diff --git a/fairseq/fairseq/models/hubert/__init__.py b/fairseq/fairseq/models/hubert/__init__.py new file mode 100644 index 0000000..a1b0eab --- /dev/null +++ b/fairseq/fairseq/models/hubert/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .hubert import * # noqa +from .hubert_asr import * # noqa diff --git a/fairseq/fairseq/models/hubert/hubert.py b/fairseq/fairseq/models/hubert/hubert.py new file mode 100644 index 0000000..cc3b777 --- /dev/null +++ b/fairseq/fairseq/models/hubert/hubert.py @@ -0,0 +1,576 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +from omegaconf import II + +from fairseq import utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2 import ( + EXTRACTOR_MODE_CHOICES, + MASKING_DISTRIBUTION_CHOICES, + LAYER_TYPE_CHOICES, + ConvFeatureExtractionModel, + TransformerEncoder, +) +from fairseq.modules import GradMultiply, LayerNorm +from fairseq.tasks.hubert_pretraining import ( + HubertPretrainingConfig, + HubertPretrainingTask, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class HubertConfig(FairseqDataclass): + label_rate: float = II("task.label_rate") + + extractor_mode: EXTRACTOR_MODE_CHOICES = field( + default="default", + metadata={ + "help": "mode for feature extractor. default has a single group " + "norm with d groups in the first conv block, whereas layer_norm " + "has layer norms in every block (meant to use with normalize=True)" + }, + ) + encoder_layers: int = field( + default=12, metadata={"help": "num encoder layers in the transformer"} + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + encoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "encoder embedding dimension for FFN"} + ) + encoder_attention_heads: int = field( + default=12, metadata={"help": "num encoder attention heads"} + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + layer_type: LAYER_TYPE_CHOICES = field( + default="transformer", metadata={"help": "layer type in encoder"} + ) + + # dropouts + dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for the transformer"}, + ) + attention_dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for attention weights"}, + ) + activation_dropout: float = field( + default=0.0, + metadata={"help": "dropout probability after activation in FFN"}, + ) + encoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + dropout_features: float = field( + default=0.0, + metadata={"help": "dropout to apply to the features (after feat extr)"}, + ) + + final_dim: int = field( + default=0, + metadata={ + "help": "project final representations and targets to this many " + "dimensions. set to encoder_embed_dim is <= 0" + }, + ) + untie_final_proj: bool = field( + default=False, + metadata={"help": "use separate projection for each target"}, + ) + layer_norm_first: bool = field( + default=False, + metadata={"help": "apply layernorm first in the transformer"}, + ) + conv_feature_layers: str = field( + default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2", + metadata={ + "help": "string describing convolutional feature extraction " + "layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_bias: bool = field( + default=False, metadata={"help": "include bias in conv encoder"} + ) + logit_temp: float = field( + default=0.1, metadata={"help": "temperature to divide logits by"} + ) + target_glu: bool = field( + default=False, metadata={"help": "adds projection + glu to targets"} + ) + feature_grad_mult: float = field( + default=1.0, + metadata={"help": "multiply feature extractor var grads by this"}, + ) + + # masking + mask_length: int = field(default=10, metadata={"help": "mask length"}) + mask_prob: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose mask length"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + mask_channel_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + + # positional embeddings + conv_pos: int = field( + default=128, + metadata={"help": "number of filters for convolutional positional embeddings"}, + ) + conv_pos_groups: int = field( + default=16, + metadata={"help": "number of groups for convolutional positional embedding"}, + ) + conv_pos_batch_norm: bool = field( + default=False, + metadata={ + "help": "use batch norm instead of weight norm in conv_pos (for bf16 models)" + }, + ) + + latent_temp: Tuple[float, float, float] = field( + default=(2, 0.5, 0.999995), + metadata={"help": "legacy (to be removed)"}, + ) + + # loss computation + skip_masked: bool = field( + default=False, + metadata={"help": "skip computing losses over masked frames"}, + ) + skip_nomask: bool = field( + default=False, + metadata={"help": "skip computing losses over unmasked frames"}, + ) + + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=2, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + + # Conformer + depthwise_conv_kernel_size: int = field( + default=31, + metadata={ + "help": "depthwise-conv-kernel-size for convolution in conformer layer" + }, + ) + attn_type: str = field( + default="", + metadata={"help": "if espnet use ESPNET MHA"}, + ) + pos_enc_type: str = field( + default="abs", + metadata={"help": "Positional encoding type to use in conformer"}, + ) + fp16: bool = field(default=False, metadata={"help": "If fp16 is being used"}) + + +@register_model("hubert", dataclass=HubertConfig) +class HubertModel(BaseFairseqModel): + def __init__( + self, + cfg: HubertConfig, + task_cfg: HubertPretrainingConfig, + dictionaries: List[Dictionary], + ) -> None: + super().__init__() + logger.info(f"HubertModel Config: {cfg}") + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.untie_final_proj = cfg.untie_final_proj + if self.untie_final_proj: + self.final_proj = nn.Linear( + cfg.encoder_embed_dim, final_dim * len(dictionaries) + ) + else: + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + + # modules below are not needed during fine-tuning + if any([d is None for d in dictionaries]): + logger.info("cannot find dictionary. assume will be used for fine-tuning") + else: + self.num_classes = [len(d) for d in dictionaries] + self.label_embs_concat = nn.Parameter( + torch.FloatTensor(sum(self.num_classes), final_dim) + ) + nn.init.uniform_(self.label_embs_concat) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertConfig, task: HubertPretrainingTask): + """Build a new model instance.""" + + model = HubertModel(cfg, task.cfg, task.dictionaries) + return model + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def compute_nce(self, x, pos, negs): + neg_is_pos = (pos == negs).all(-1) + pos = pos.unsqueeze(0) + targets = torch.cat([pos, negs], dim=0) + + logits = torch.cosine_similarity(x.float(), targets.float(), dim=-1).type_as(x) + logits /= self.logit_temp + if neg_is_pos.any(): + logits[1:][neg_is_pos] = float("-inf") + logits = logits.transpose(0, 1) # (num_x, num_cls+1) + return logits + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def forward( + self, + source: torch.Tensor, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + def compute_pred(proj_x, target, label_embs): + # compute logits for the i-th label set + y = torch.index_select(label_embs, 0, target.long()) + negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1) + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + # proj_x: (S, D) + # y: (S, D) + # negs: (Neg, S, D) + return self.compute_nce(proj_x, y, negs) + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) + + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = self.final_proj(x[masked_indices]) + if self.untie_final_proj: + proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1) + else: + proj_x_m_list = [proj_x_m for _ in range(len(target_list))] + logit_m_list = [ + compute_pred(proj_x_m, t[masked_indices], label_embs_list[i]) + for i, (proj_x_m, t) in enumerate(zip(proj_x_m_list, target_list)) + ] + else: + logit_m_list = [None for _ in target_list] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = self.final_proj(x[nomask_indices]) + if self.untie_final_proj: + proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1) + else: + proj_x_u_list = [proj_x_u for _ in range(len(target_list))] + + logit_u_list = [ + compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i]) + for i, (proj_x_u, t) in enumerate(zip(proj_x_u_list, target_list)) + ] + else: + logit_u_list = [None for _ in target_list] + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + feature = res["features"] if ret_conv else res["x"] + return feature, res["padding_mask"] + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x.float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + logits_list = self.get_logits(net_output, is_masked) + targets_list = [x.new_zeros(x.size(0), dtype=torch.long) for x in logits_list] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + return extra_losses, names + + def remove_pretraining_modules(self): + self.target_glu = None + self.final_proj = None diff --git a/fairseq/fairseq/models/hubert/hubert_asr.py b/fairseq/fairseq/models/hubert/hubert_asr.py new file mode 100644 index 0000000..11c85ce --- /dev/null +++ b/fairseq/fairseq/models/hubert/hubert_asr.py @@ -0,0 +1,675 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import copy +import logging +import math +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Any, Optional +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import II, MISSING, open_dict + +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import ( + BaseFairseqModel, + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, +) +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.modules import LayerNorm, PositionalEmbedding, TransformerDecoderLayer +from fairseq.tasks import FairseqTask + +logger = logging.getLogger(__name__) + + +@dataclass +class HubertAsrConfig(FairseqDataclass): + w2v_path: str = field(default=MISSING, metadata={"help": "path to hubert model"}) + no_pretrained_weights: bool = field( + default=False, + metadata={"help": "if true, does not load pretrained weights"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "dropout after transformer and before final projection"}, + ) + dropout: float = field( + default=0.0, + metadata={"help": "dropout probability inside hubert model"}, + ) + attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights " "inside hubert model" + }, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " "inside hubert model" + }, + ) + encoder_embed_dim: Optional[int] = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + + # masking + apply_mask: bool = field( + default=False, metadata={"help": "apply masking during fine-tuning"} + ) + mask_length: int = field( + default=10, metadata={"help": "repeat the mask indices multiple times"} + ) + mask_prob: float = field( + default=0.5, + metadata={ + "help": "probability of replacing a token with mask " + "(normalized by length)" + }, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose masks"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + freeze_finetune_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + feature_grad_mult: float = field( + default=0.0, + metadata={"help": "reset feature grad mult in hubert to this"}, + ) + layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a layer in hubert"}, + ) + normalize: bool = II("task.normalize") + data: str = II("task.data") + + # this holds the loaded hubert args + w2v_args: Any = None + + +@dataclass +class HubertCtcConfig(HubertAsrConfig): + pass + + +@register_model("hubert_ctc", dataclass=HubertCtcConfig) +class HubertCtc(BaseFairseqModel): + def __init__(self, cfg: HubertCtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertCtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = HubertEncoder(cfg, task) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs): + """Get normalized probabilities (or log probs) from a net's output.""" + + logits = net_output["encoder_out"] + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + +@dataclass +class HubertSeq2SeqConfig(HubertAsrConfig): + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"}) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each decoder block"} + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings (outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.0, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + autoregressive: bool = II("task.autoregressive") + seq2seq_path: str = field( + default="", + metadata={"help": "reset_dict"}, + ) + reset_dict: bool = field( + default=False, + metadata={"help": "reset_dict"}, + ) + + +@register_model("hubert_seq2seq", dataclass=HubertSeq2SeqConfig) +class HubertSeq2SeqModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def build_model(cls, cfg: HubertSeq2SeqConfig, task: FairseqTask): + """Build a new model instance.""" + + assert ( + cfg.autoregressive + ), "Please set task.autoregressive=true for seq2seq asr models" + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + return emb + + decoder_embed_tokens = build_embedding(tgt_dict, cfg.decoder_embed_dim) + + encoder = cls.build_encoder(cfg, task) + decoder = cls.build_decoder(cfg, tgt_dict, decoder_embed_tokens) + + model = HubertSeq2SeqModel(encoder, decoder) + + if cfg["seq2seq_path"]: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.seq2seq_path) + state = state["model"] + if cfg["reset_dict"]: + del state["decoder.embed_out"] + del state["decoder.embed_tokens.weight"] + model.load_state_dict(state, strict=False) + return model + + @classmethod + def build_encoder(cls, cfg: HubertAsrConfig, task): + return HubertEncoder(cfg, task) + + @classmethod + def build_decoder(cls, cfg: HubertSeq2SeqConfig, tgt_dict, embed_tokens): + return TransformerDecoder(cfg, tgt_dict, embed_tokens) + + def forward(self, **kwargs): + encoder_out = self.encoder(**kwargs) + decoder_out = self.decoder(encoder_out=encoder_out, **kwargs) + return decoder_out + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + def load_state_dict( + self, + state_dict, + strict=True, + model_cfg=None, + args: Optional[Namespace] = None, + ): + if model_cfg.reset_dict: + logger.warn("Overriding loading strict state dict!") + del state_dict["decoder.embed_out"] + del state_dict["decoder.embed_tokens.weight"] + return super().load_state_dict(state_dict, False, model_cfg, args) + return super().load_state_dict(state_dict, strict, model_cfg, args) + + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: HubertAsrConfig, task): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + assert cfg.normalize == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + pretrain_task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + pretrain_task.load_state_dict(state["task_state"]) + else: + pretrain_task.load_state_dict(task.state_dict()) + + model = pretrain_task.build_model(w2v_args.model, from_checkpoint=True) + if state is not None and not cfg.no_pretrained_weights: + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(pretrain_task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + if task.target_dictionary is not None and not cfg.autoregressive: + self.proj = Linear(d, len(task.target_dictionary)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, tbc=True, **kwargs): + + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + } + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if encoder_out["padding_mask"] is not None: + encoder_out["padding_mask"] = encoder_out["padding_mask"].index_select( + 0, new_order + ) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg: HubertSeq2SeqConfig, + dictionary, + embed_tokens, + no_encoder_attn=False, + ): + super().__init__(dictionary) + + self.dropout = cfg.decoder_dropout + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder_embed_dim + self.output_embed_dim = cfg.decoder_embed_dim + + self.layerdrop = cfg.decoder_layerdrop + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder_learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + + # TODO: update this when transformer gets converted to dataclass configs + transformer_cfg = copy.deepcopy(cfg) + with open_dict(transformer_cfg): + transformer_cfg.dropout = transformer_cfg.decoder_dropout + transformer_cfg.attention_dropout = ( + transformer_cfg.decoder_attention_dropout + ) + transformer_cfg.activation_dropout = ( + transformer_cfg.decoder_activation_dropout + ) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + TransformerDecoderLayer(transformer_cfg, no_encoder_attn) + for _ in range(transformer_cfg.decoder_layers) + ] + ) + + if not self.share_input_output_embed: + self.embed_out = nn.Parameter( + torch.Tensor(len(dictionary), self.output_embed_dim) + ) + nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim**-0.5) + + if transformer_cfg.decoder_normalize_before: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + if type(prev_output_tokens) == list: + max_len = max((len(x) for x in prev_output_tokens)) + tmp = torch.zeros( + [len(prev_output_tokens), max_len], device=prev_output_tokens[0].device + ) + for (i, p) in enumerate(prev_output_tokens): + tmp[i, : len(p)] = p + prev_output_tokens = tmp + prev_output_tokens = prev_output_tokens.long() + x, extra = self.extract_features( + prev_output_tokens, encoder_out, incremental_state + ) + x = self.output_layer(x) + return x, extra + + def extract_features( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + + # embed positions + positions = ( + self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + + inner_states = [x] + + # decoder layers + self_attn_padding_mask = None + if prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + for layer in self.layers: + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, attn, _ = layer( + x, + encoder_out["encoder_out"] if encoder_out is not None else None, + encoder_out["padding_mask"] if encoder_out is not None else None, + incremental_state, + self_attn_mask=self.buffered_future_mask(x) + if incremental_state is None + else None, + self_attn_padding_mask=self_attn_padding_mask, + ) + inner_states.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, {"attn": attn, "inner_states": inner_states} + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + # project back to size of vocabulary + if self.share_input_output_embed: + return F.linear(features, self.embed_tokens.weight) + else: + return F.linear(features, self.embed_out) + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/fairseq/fairseq/models/huggingface/__init__.py b/fairseq/fairseq/models/huggingface/__init__.py new file mode 100644 index 0000000..f7911c2 --- /dev/null +++ b/fairseq/fairseq/models/huggingface/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import os + + +# automatically import any Python files in the models/huggingface/ directory +models_dir = os.path.dirname(__file__) +for file in os.listdir(models_dir): + path = os.path.join(models_dir, file) + if ( + not file.startswith("_") + and not file.startswith(".") + and (file.endswith(".py") or os.path.isdir(path)) + ): + model_name = file[: file.find(".py")] if file.endswith(".py") else file + module = importlib.import_module("fairseq.models.huggingface." + model_name) diff --git a/fairseq/fairseq/models/huggingface/hf_gpt2.py b/fairseq/fairseq/models/huggingface/hf_gpt2.py new file mode 100644 index 0000000..3a8eb78 --- /dev/null +++ b/fairseq/fairseq/models/huggingface/hf_gpt2.py @@ -0,0 +1,168 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys +from typing import Dict, List, Optional + +import torch +from fairseq.models import ( + FairseqIncrementalDecoder, + FairseqLanguageModel, + register_model, + register_model_architecture, +) + + +logger = logging.getLogger(__name__) + + +DEFAULT_MAX_TARGET_POSITIONS = 1024 + + +@register_model("hf_gpt2") +class HuggingFaceGPT2LanguageModel(FairseqLanguageModel): + def __init__(self, decoder): + super().__init__(decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--embed-dim', type=int, metavar='N', + help='embedding dimension') + parser.add_argument('--num-attention-heads', type=int, metavar='N', + help='num attention heads') + parser.add_argument('--num-layers', type=int, metavar='N', + help='num layers') + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability for all fully connected layers ' + 'in the embeddings, encoder, and pooler') + parser.add_argument('--attention-dropout', type=float, metavar='D', + help='dropout probability for attention weights') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + default_architecture(args) + return cls(HuggingFaceGPT2Decoder(args, task)) + + +class HuggingFaceGPT2Decoder(FairseqIncrementalDecoder): + def __init__(self, args, task): + try: + from transformers import GPT2Config, GPT2LMHeadModel + except ImportError: + raise ImportError( + "\n\nPlease install huggingface/transformers with:" + "\n\n pip install transformers" + ) + + super().__init__(task.target_dictionary) + + config = GPT2Config( + vocab_size=len(task.target_dictionary), + n_positions=args.max_target_positions + 1, + n_ctx=args.max_target_positions, + n_embd=args.embed_dim, + n_layer=args.num_layers, + n_head=args.num_attention_heads, + resid_pdrop=args.dropout, + embd_pdrop=args.dropout, + attn_pdrop=args.attention_dropout, + layer_norm_epsilon=1e-6, + ) + self.model = GPT2LMHeadModel(config) + + # set zero embedding for padding symbol + self.pad_idx = task.target_dictionary.pad() + self.model.transformer.wte.weight.data[self.pad_idx].zero_() + self.model.transformer.wpe.weight.data[0].zero_() + + def forward( + self, + prev_output_tokens, + src_lengths=None, + incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None, + encoder_out=None, + ): + features = self.extract_features(prev_output_tokens, incremental_state) + lm_logits = self.model.lm_head(features) + return (lm_logits,) + + def extract_features( + self, + prev_output_tokens, + incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None, + ): + if incremental_state: + past = self.get_incremental_state("past") + else: + past = None + + # don't attend to padding symbols + attention_mask = prev_output_tokens.ne(self.pad_idx).int() + + # set position ids to exclude padding symbols + position_ids = attention_mask * ( + torch.arange(1, 1 + prev_output_tokens.size(1)) + .to(prev_output_tokens) + .repeat(prev_output_tokens.size(0), 1) + ) + + outputs = self.model.transformer( + input_ids=prev_output_tokens, + past=past, + attention_mask=attention_mask, + position_ids=position_ids, + ) + last_hidden_states = outputs[0] + + if incremental_state: + self.set_incremental_state(incremental_state, "past", outputs[1]) + + return last_hidden_states + + def max_positions(self): + return self.model.config.n_positions - 1 + + +@register_model_architecture("hf_gpt2", "hf_gpt2") +def default_architecture(args): + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + args.embed_dim = getattr(args, "embed_dim", 768) + args.num_attention_heads = getattr(args, "num_attention_heads", 12) + args.num_layers = getattr(args, "num_layers", 12) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + + +@register_model_architecture("hf_gpt2", "hf_gpt2_medium") +def hf_gpt2_medium(args): + args.embed_dim = getattr(args, "embed_dim", 1024) + args.num_attention_heads = getattr(args, "num_attention_heads", 16) + args.num_layers = getattr(args, "num_layers", 24) + default_architecture(args) + + +@register_model_architecture("hf_gpt2", "hf_gpt2_large") +def hf_gpt2_large(args): + args.embed_dim = getattr(args, "embed_dim", 1280) + args.num_attention_heads = getattr(args, "num_attention_heads", 20) + args.num_layers = getattr(args, "num_layers", 36) + default_architecture(args) + + +@register_model_architecture("hf_gpt2", "hf_gpt2_xl") +def hf_gpt2_xl(args): + args.embed_dim = getattr(args, "embed_dim", 1600) + args.num_attention_heads = getattr(args, "num_attention_heads", 25) + args.num_layers = getattr(args, "num_layers", 48) + default_architecture(args) diff --git a/fairseq/fairseq/models/lightconv.py b/fairseq/fairseq/models/lightconv.py new file mode 100644 index 0000000..7950280 --- /dev/null +++ b/fairseq/fairseq/models/lightconv.py @@ -0,0 +1,1119 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.modules import ( + AdaptiveSoftmax, + DynamicConv_scripatable as DynamicConv, + FairseqDropout, + LayerNorm, + LightweightConv, + MultiheadAttention, + PositionalEmbedding, +) +from fairseq.utils import safe_hasattr +from torch import Tensor + + +@register_model("lightconv") +class LightConvModel(FairseqEncoderDecoderModel): + """ + LightConv and DynamicConv model from `"Pay Less Attention with Lightweight and Dynamic Convolutions" (Wu, et al, 2019) + <https://openreview.net/pdf?id=SkVhlh09tX>`_. + To use LightConv please set ``--encoder-conv-type lightweight --decoder-conv-type lightweight`` + To use DynamicConv please set ``--encoder-conv-type dynamic --decoder-conv-type dynamic`` + + Args: + encoder (LightConvEncoder): the encoder + decoder (LightConvDecoder): the decoder + + The LightConv model provides the following named architectures and + command-line arguments: + + .. argparse:: + :ref: fairseq.models.lightconv_parser + :prog: + """ + + @classmethod + def hub_models(cls): + # fmt: off + + def moses_subword(path): + return { + 'path': path, + 'tokenizer': 'moses', + 'bpe': 'subword_nmt', + } + + return { + 'lightconv.no_glu.iwslt14.de-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.lightconv.tar.gz'), + 'dynamicconv.no_glu.iwslt14.de-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.dynamicconv.tar.gz'), + 'lightconv.no_glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv.tar.gz'), + 'dynamicconv.no_glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv.tar.gz'), + 'lightconv.glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz'), + 'dynamicconv.glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz'), + 'lightconv.glu.wmt17.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz'), + 'dynamicconv.glu.wmt17.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz'), + 'lightconv.glu.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.lightconv-glu.tar.gz'), + 'dynamicconv.glu.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.dynamicconv-glu.tar.gz'), + 'lightconv.glu.wmt17.zh-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.lightconv-glu.tar.gz'), + 'dynamicconv.glu.wmt17.zh-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.dynamicconv-glu.tar.gz'), + } + # fmt: on + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after ReLU in FFN", + ) + parser.add_argument( + "--input-dropout", + type=float, + metavar="D", + help="dropout probability of the inputs", + ) + parser.add_argument( + "--encoder-embed-path", + type=str, + metavar="STR", + help="path to pre-trained encoder embedding", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-conv-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads or LightConv/DynamicConv heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--encoder-learned-pos", + action="store_true", + help="use learned positional embeddings in the encoder", + ) + parser.add_argument( + "--decoder-embed-path", + type=str, + metavar="STR", + help="path to pre-trained decoder embedding", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-conv-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads or LightConv/DynamicConv heads", + ) + parser.add_argument( + "--decoder-learned-pos", + action="store_true", + help="use learned positional embeddings in the decoder", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--share-all-embeddings", + action="store_true", + help="share encoder, decoder and output embeddings" + " (requires shared dictionary and embed dim)", + ) + parser.add_argument( + "--adaptive-softmax-cutoff", + metavar="EXPR", + help="comma separated list of adaptive softmax cutoff points. " + "Must be used with adaptive_loss criterion", + ), + parser.add_argument( + "--adaptive-softmax-dropout", + type=float, + metavar="D", + help="sets adaptive softmax dropout for the tail projections", + ) + + """LightConv and DynamicConv arguments""" + parser.add_argument( + "--encoder-kernel-size-list", + type=lambda x: utils.eval_str_list(x, int), + help='list of kernel size (default: "[3,7,15,31,31,31,31]")', + ) + parser.add_argument( + "--decoder-kernel-size-list", + type=lambda x: utils.eval_str_list(x, int), + help='list of kernel size (default: "[3,7,15,31,31,31]")', + ) + parser.add_argument( + "--encoder-glu", type=utils.eval_bool, help="glu after in proj" + ) + parser.add_argument( + "--decoder-glu", type=utils.eval_bool, help="glu after in proj" + ) + parser.add_argument( + "--encoder-conv-type", + default="dynamic", + type=str, + choices=["dynamic", "lightweight"], + help="type of convolution", + ) + parser.add_argument( + "--decoder-conv-type", + default="dynamic", + type=str, + choices=["dynamic", "lightweight"], + help="type of convolution", + ) + parser.add_argument("--weight-softmax", default=True, type=utils.eval_bool) + parser.add_argument( + "--weight-dropout", + type=float, + metavar="D", + help="dropout probability for conv weights", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + if not safe_hasattr(args, "max_source_positions"): + args.max_source_positions = 1024 + if not safe_hasattr(args, "max_target_positions"): + args.max_target_positions = 1024 + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + def build_embedding(dictionary, embed_dim, path=None): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + if args.share_all_embeddings: + if src_dict != tgt_dict: + raise RuntimeError( + "--share-all-embeddings requires a joined dictionary" + ) + if args.encoder_embed_dim != args.decoder_embed_dim: + raise RuntimeError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise RuntimeError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + encoder_embed_tokens = build_embedding( + src_dict, args.encoder_embed_dim, args.encoder_embed_path + ) + decoder_embed_tokens = encoder_embed_tokens + args.share_decoder_input_output_embed = True + else: + encoder_embed_tokens = build_embedding( + src_dict, args.encoder_embed_dim, args.encoder_embed_path + ) + decoder_embed_tokens = build_embedding( + tgt_dict, args.decoder_embed_dim, args.decoder_embed_path + ) + + encoder = LightConvEncoder(args, src_dict, encoder_embed_tokens) + decoder = LightConvDecoder(args, tgt_dict, decoder_embed_tokens) + return LightConvModel(encoder, decoder) + + def forward( + self, + src_tokens: Tensor, + src_lengths: Tensor, + prev_output_tokens: Tensor, + ): + """ + (The forward method inherited from the base class has a **kwargs + argument in its input, which is not supported in torchscript. This + method overwrites the forward method definition without **kwargs.) + + Run the forward pass for an encoder-decoder model. + + First feed a batch of source tokens through the encoder. Then, feed the + encoder output and previous decoder outputs (i.e., teacher forcing) to + the decoder to produce the next outputs:: + + encoder_out = self.encoder(src_tokens, src_lengths) + return self.decoder(prev_output_tokens, encoder_out) + + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (LongTensor): source sentence lengths of shape `(batch)` + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + encoder_out = self.encoder(src_tokens, src_lengths) + decoder_out = self.decoder(prev_output_tokens, encoder_out=encoder_out) + return decoder_out + + +class LightConvEncoder(FairseqEncoder): + """ + LightConv encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`LightConvEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, args, dictionary, embed_tokens): + super().__init__(dictionary) + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + + embed_dim = embed_tokens.embedding_dim + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = args.max_source_positions + + self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(embed_dim) + self.embed_positions = ( + PositionalEmbedding( + args.max_source_positions, + embed_dim, + self.padding_idx, + learned=args.encoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + LightConvEncoderLayer( + args, kernel_size=args.encoder_kernel_size_list[i] + ) + for i in range(args.encoder_layers) + ] + ) + self.register_buffer("version", torch.Tensor([2])) + self.normalize = args.encoder_normalize_before + if self.normalize: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward( + self, src_tokens: Tensor, src_lengths: Optional[Tensor] = None + ) -> Dict[str, List[Tensor]]: + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + """ + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(src_tokens) + if self.embed_positions is not None: + x += self.embed_positions(src_tokens) + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) # B x T + if not encoder_padding_mask.any(): + encoder_mask = None + else: + encoder_mask = encoder_padding_mask + + # encoder layers + for layer in self.layers: + x = layer(x, encoder_mask) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + output_dict: Dict[str, List[Tensor]] = {} + if src_lengths is not None: + output_dict["src_lengths"] = [src_lengths] + output_dict["encoder_out"] = [x] # T x B x C + if encoder_mask is not None: + output_dict["encoder_padding_mask"] = [encoder_mask] # B x T + + return output_dict + + @torch.jit.export + def reorder_encoder_out( + self, encoder_out: Dict[str, List[Tensor]], new_order: Tensor + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + encoder = [] + else: + encoder = [encoder_out["encoder_out"][0].index_select(1, new_order)] + output_dict = {"encoder_out": encoder} + + if ("encoder_padding_mask" not in encoder_out) or ( + len(encoder_out["encoder_padding_mask"]) == 0 + ): + encoder_padding_mask = [] + else: + encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + output_dict["encoder_padding_mask"] = encoder_padding_mask + return output_dict + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + +class LightConvDecoder(FairseqIncrementalDecoder): + """ + LightConv decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`LightConvDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs. + Default: ``False`` + """ + + def __init__( + self, args, dictionary, embed_tokens, no_encoder_attn=False, final_norm=True + ): + super().__init__(dictionary) + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.share_input_output_embed = args.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = args.decoder_embed_dim + output_embed_dim = args.decoder_output_dim + + padding_idx = embed_tokens.padding_idx + self.max_target_positions = args.max_target_positions + + self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + args.max_target_positions, + embed_dim, + padding_idx, + learned=args.decoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + LightConvDecoderLayer( + args, + no_encoder_attn, + kernel_size=args.decoder_kernel_size_list[i], + dictionary=dictionary, + ) + for i in range(args.decoder_layers) + ] + ) + + self.adaptive_softmax = None + self.output_projection = None + + self.project_out_dim = ( + Linear(embed_dim, output_embed_dim, bias=False) + if embed_dim != output_embed_dim and not args.tie_adaptive_weights + else None + ) + + if args.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + output_embed_dim, + utils.eval_str_list(args.adaptive_softmax_cutoff, type=int), + dropout=args.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, + factor=args.adaptive_softmax_factor, + tie_proj=args.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + + else: + self.output_projection = nn.Linear( + output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=output_embed_dim**-0.5 + ) + self.register_buffer("version", torch.Tensor([2])) + self.normalize = args.decoder_normalize_before and final_norm + if self.normalize: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward( + self, + prev_output_tokens: Tensor, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + src_lengths: Optional[Any] = None, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the last decoder layer's output of shape `(batch, tgt_len, + vocab)` + - the last decoder layer's attention weights of shape `(batch, + tgt_len, src_len)` + """ + # embed positions + positions = ( + self.embed_positions( + prev_output_tokens, + incremental_state=incremental_state, + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens.contiguous()) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + + inner_states: List[Optional[Tensor]] = [x] + + # decoder layers + attn: Optional[Tensor] = None + for layer in self.layers: + encoder: Optional[Tensor] = None + encoder_padding_mask: Optional[Tensor] = None + if encoder_out is not None: + if len(encoder_out["encoder_out"]) > 0: + encoder = encoder_out["encoder_out"][0] + if ( + "encoder_padding_mask" in encoder_out + and len(encoder_out["encoder_padding_mask"]) > 0 + ): + encoder_padding_mask = encoder_out["encoder_padding_mask"][0] + x, attn = layer( + x, + encoder, + encoder_padding_mask, + incremental_state, + ) + inner_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + if self.adaptive_softmax is None: + # project back to size of vocabulary + x = self.output_projection(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + if self._future_mask.size(0) < dim: + self._future_mask = torch.triu( + utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + +class LightConvEncoderLayer(nn.Module): + """Encoder layer block. + + Args: + args (argparse.Namespace): parsed command-line arguments + kernel_size: kernel size of the convolution + """ + + def __init__(self, args, kernel_size=0): + super().__init__() + self.embed_dim = args.encoder_embed_dim + self.conv_dim = args.encoder_conv_dim + padding_l = ( + kernel_size // 2 + if kernel_size % 2 == 1 + else ((kernel_size - 1) // 2, kernel_size // 2) + ) + + if args.encoder_glu: + self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim) + self.act = nn.GLU() + else: + self.linear1 = Linear(self.embed_dim, self.conv_dim) + self.act = None + if args.encoder_conv_type == "lightweight": + self.conv = LightweightConv( + self.conv_dim, + kernel_size, + padding_l=padding_l, + weight_softmax=args.weight_softmax, + num_heads=args.encoder_attention_heads, + weight_dropout=args.weight_dropout, + ) + elif args.encoder_conv_type == "dynamic": + self.conv = DynamicConv( + self.conv_dim, + kernel_size, + padding_l=padding_l, + weight_softmax=args.weight_softmax, + num_heads=args.encoder_attention_heads, + weight_dropout=args.weight_dropout, + ) + else: + raise NotImplementedError + self.linear2 = Linear(self.conv_dim, self.embed_dim) + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.relu_dropout_module = FairseqDropout( + args.relu_dropout, module_name=self.__class__.__name__ + ) + self.input_dropout_module = FairseqDropout( + args.input_dropout, module_name=self.__class__.__name__ + ) + self.normalize_before = args.encoder_normalize_before + self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim) + self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim) + self.layer_norm1 = LayerNorm(self.embed_dim) + self.layer_norm2 = LayerNorm(self.embed_dim) + + def forward(self, x, encoder_padding_mask: Optional[Tensor] = None) -> Tensor: + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + + Returns: + encoded output of shape `(batch, src_len, embed_dim)` + """ + residual = x + normalize = self.maybe_layer_norm(before=True) + if normalize: + x = self.layer_norm1(x) + x = self.input_dropout_module(x) + x = self.linear1(x) + if self.act is not None: + x = self.act(x) + if encoder_padding_mask is not None: + x = x.masked_fill(encoder_padding_mask.transpose(0, 1).unsqueeze(2), 0) + x = self.conv(x) + x = self.linear2(x) + x = self.dropout_module(x) + x = residual + x + normalize = self.maybe_layer_norm(after=True) + if normalize: + x = self.layer_norm1(x) + + residual = x + normalize = self.maybe_layer_norm(before=True) + if normalize: + x = self.layer_norm2(x) + x = F.relu(self.fc1(x)) + x = self.relu_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = residual + x + normalize = self.maybe_layer_norm(after=True) + if normalize: + x = self.layer_norm2(x) + return x + + def maybe_layer_norm(self, before: bool = False, after: bool = False): + assert before ^ after, "Incorrect arguments" + return after ^ self.normalize_before + + def extra_repr(self): + return ( + "dropout={}, relu_dropout={}, input_dropout={}, normalize_before={}".format( + self.dropout_module.p, + self.relu_dropout_module.p, + self.input_dropout_module.p, + self.normalize_before, + ) + ) + + +class LightConvDecoderLayer(nn.Module): + """Decoder layer block. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs. + Default: ``False`` + kernel_size: kernel size of the convolution + """ + + def __init__(self, args, no_encoder_attn=False, kernel_size=0, dictionary=None): + super().__init__() + self.embed_dim = args.decoder_embed_dim + self.conv_dim = args.decoder_conv_dim + if args.decoder_glu: + self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim) + self.act = nn.GLU() + else: + self.linear1 = Linear(self.embed_dim, self.conv_dim) + self.act = None + if args.decoder_conv_type == "lightweight": + self.conv = LightweightConv( + self.conv_dim, + kernel_size, + padding_l=kernel_size - 1, + weight_softmax=args.weight_softmax, + num_heads=args.decoder_attention_heads, + weight_dropout=args.weight_dropout, + ) + elif args.decoder_conv_type == "dynamic": + self.conv = DynamicConv( + self.conv_dim, + kernel_size, + padding_l=kernel_size - 1, + weight_softmax=args.weight_softmax, + num_heads=args.decoder_attention_heads, + weight_dropout=args.weight_dropout, + ) + else: + raise NotImplementedError + self.linear2 = Linear(self.conv_dim, self.embed_dim) + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.relu_dropout_module = FairseqDropout( + args.relu_dropout, module_name=self.__class__.__name__ + ) + self.input_dropout_module = FairseqDropout( + args.input_dropout, module_name=self.__class__.__name__ + ) + self.normalize_before = args.decoder_normalize_before + + self.conv_layer_norm = LayerNorm(self.embed_dim) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = MultiheadAttention( + self.embed_dim, + args.decoder_attention_heads, + dropout=args.attention_dropout, + encoder_decoder_attention=True, + dictionary=dictionary, + ) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim) + + self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim) + self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim) + + self.final_layer_norm = LayerNorm(self.embed_dim) + self.need_attn = True + + def forward( + self, + x: Tensor, + encoder_out: Optional[Tensor], + encoder_padding_mask: Optional[Tensor], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + prev_conv_state: Optional[Tensor] = None, + prev_attn_state: Optional[Tuple[Tensor, Tensor]] = None, + conv_mask: Optional[Tensor] = None, + conv_padding_mask: Optional[Tensor] = None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + + Returns: + encoded output of shape `(batch, src_len, embed_dim)` + """ + residual = x + normalize = self.maybe_layer_norm(before=True) + if normalize: + x = self.conv_layer_norm(x) + if prev_conv_state is not None: + self.conv._set_input_buffer(incremental_state, prev_conv_state) + x = self.input_dropout_module(x) + x = self.linear1(x) + if self.act is not None: + x = self.act(x) + x = self.conv(x, incremental_state=incremental_state) + x = self.linear2(x) + x = self.dropout_module(x) + x = residual + x + normalize = self.maybe_layer_norm(after=True) + if normalize: + x = self.conv_layer_norm(x) + + attn: Optional[Tensor] = None + if self.encoder_attn is not None: + residual = x + normalize = self.maybe_layer_norm(before=True) + if normalize: + x = self.encoder_attn_layer_norm(x) + + if prev_attn_state is not None: + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_attn_state[0], + "prev_value": prev_attn_state[1], + } + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=(not self.training and self.need_attn), + ) + x = self.dropout_module(x) + x = residual + x + normalize = self.maybe_layer_norm(after=True) + if normalize: + x = self.encoder_attn_layer_norm(x) + + residual = x + normalize = self.maybe_layer_norm(before=True) + if normalize: + x = self.final_layer_norm(x) + x = F.relu(self.fc1(x)) + x = self.relu_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = residual + x + normalize = self.maybe_layer_norm(after=True) + if normalize: + x = self.final_layer_norm(x) + return x, attn + + def maybe_layer_norm(self, before: bool = False, after: bool = False): + assert before ^ after, "Incorrect usage" + return after ^ self.normalize_before + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn + + def extra_repr(self): + return ( + "dropout={}, relu_dropout={}, input_dropout={}, normalize_before={}".format( + self.dropout_module.p, + self.relu_dropout_module.p, + self.input_dropout_module.p, + self.normalize_before, + ) + ) + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + + +@register_model_architecture("lightconv", "lightconv") +def base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 7) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.relu_dropout = getattr(args, "relu_dropout", 0.0) + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.encoder_conv_dim = getattr(args, "encoder_conv_dim", args.encoder_embed_dim) + args.decoder_conv_dim = getattr(args, "decoder_conv_dim", args.decoder_embed_dim) + + args.encoder_kernel_size_list = getattr( + args, "encoder_kernel_size_list", [3, 7, 15, 31, 31, 31, 31] + ) + args.decoder_kernel_size_list = getattr( + args, "decoder_kernel_size_list", [3, 7, 15, 31, 31, 31] + ) + if len(args.encoder_kernel_size_list) == 1: + args.encoder_kernel_size_list = ( + args.encoder_kernel_size_list * args.encoder_layers + ) + if len(args.decoder_kernel_size_list) == 1: + args.decoder_kernel_size_list = ( + args.decoder_kernel_size_list * args.decoder_layers + ) + assert ( + len(args.encoder_kernel_size_list) == args.encoder_layers + ), "encoder_kernel_size_list doesn't match encoder_layers" + assert ( + len(args.decoder_kernel_size_list) == args.decoder_layers + ), "decoder_kernel_size_list doesn't match decoder_layers" + args.encoder_glu = getattr(args, "encoder_glu", True) + args.decoder_glu = getattr(args, "decoder_glu", True) + args.input_dropout = getattr(args, "input_dropout", 0.1) + args.weight_dropout = getattr(args, "weight_dropout", args.attention_dropout) + + +@register_model_architecture("lightconv", "lightconv_iwslt_de_en") +def lightconv_iwslt_de_en(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 7) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.weight_dropout = getattr(args, "weight_dropout", 0.1) + args.encoder_glu = getattr(args, "encoder_glu", False) + args.decoder_glu = getattr(args, "decoder_glu", False) + args.input_dropout = getattr(args, "input_dropout", 0.0) + base_architecture(args) + + +@register_model_architecture("lightconv", "lightconv_wmt_en_de") +def lightconv_wmt_en_de(args): + base_architecture(args) + + +@register_model_architecture("lightconv", "lightconv_wmt_en_de_big") +def lightconv_wmt_en_de_big(args): + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.3) + base_architecture(args) + + +@register_model_architecture("lightconv", "lightconv_wmt_en_fr_big") +def lightconv_wmt_en_fr_big(args): + args.dropout = getattr(args, "dropout", 0.1) + lightconv_wmt_en_de_big(args) + + +@register_model_architecture("lightconv", "lightconv_wmt_zh_en_big") +def lightconv_wmt_zh_en_big(args): + args.dropout = getattr(args, "dropout", 0.2) + args.attention_dropout = getattr(args, "attention_dropout", 0.2) + args.weight_dropout = getattr(args, "weight_dropout", 0.2) + lightconv_wmt_en_de_big(args) diff --git a/fairseq/fairseq/models/lightconv_lm.py b/fairseq/fairseq/models/lightconv_lm.py new file mode 100644 index 0000000..1d9efc4 --- /dev/null +++ b/fairseq/fairseq/models/lightconv_lm.py @@ -0,0 +1,306 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import utils +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.lightconv import Embedding, LightConvDecoder +from fairseq.modules import AdaptiveInput, CharacterTokenEmbedder + + +@register_model("lightconv_lm") +class LightConvLanguageModel(FairseqLanguageModel): + def __init__(self, decoder): + super().__init__(decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--dropout", + default=0.1, + type=float, + metavar="D", + help="dropout probability", + ) + parser.add_argument( + "--attention-dropout", + default=0.0, + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--relu-dropout", + default=0.0, + type=float, + metavar="D", + help="dropout probability after ReLU in FFN", + ) + parser.add_argument( + "--input-dropout", + type=float, + metavar="D", + help="dropout probability of the inputs", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-output-dim", + type=int, + metavar="N", + help="decoder output dimension", + ) + parser.add_argument( + "--decoder-input-dim", type=int, metavar="N", help="decoder input dimension" + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads or LightConv/DynamicConv heads", + ) + parser.add_argument( + "--decoder-normalize-before", + default=False, + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--adaptive-softmax-cutoff", + metavar="EXPR", + help="comma separated list of adaptive softmax cutoff points. " + "Must be used with adaptive_loss criterion", + ) + parser.add_argument( + "--adaptive-softmax-dropout", + type=float, + metavar="D", + help="sets adaptive softmax dropout for the tail projections", + ) + parser.add_argument( + "--adaptive-softmax-factor", + type=float, + metavar="N", + help="adaptive input factor", + ) + parser.add_argument( + "--no-token-positional-embeddings", + default=False, + action="store_true", + help="if set, disables positional embeddings (outside self attention)", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + default=False, + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--character-embeddings", + default=False, + action="store_true", + help="if set, uses character embedding convolutions to produce token embeddings", + ) + parser.add_argument( + "--character-filters", + type=str, + metavar="LIST", + default="[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]", + help="size of character embeddings", + ) + parser.add_argument( + "--character-embedding-dim", + type=int, + metavar="N", + default=4, + help="size of character embeddings", + ) + parser.add_argument( + "--char-embedder-highway-layers", + type=int, + metavar="N", + default=2, + help="number of highway layers for character token embeddder", + ) + parser.add_argument( + "--adaptive-input", + default=False, + action="store_true", + help="if set, uses adaptive input", + ) + parser.add_argument( + "--adaptive-input-factor", + type=float, + metavar="N", + help="adaptive input factor", + ) + parser.add_argument( + "--adaptive-input-cutoff", + metavar="EXPR", + help="comma separated list of adaptive input cutoff points.", + ) + parser.add_argument( + "--tie-adaptive-weights", + action="store_true", + help="if set, ties the weights of adaptive softmax and adaptive input", + ) + parser.add_argument( + "--tie-adaptive-proj", + action="store_true", + help="if set, ties the projection weights of adaptive softmax and adaptive input", + ) + parser.add_argument( + "--decoder-learned-pos", + action="store_true", + help="use learned positional embeddings in the decoder", + ) + + """LightConv and DynamicConv arguments""" + parser.add_argument( + "--decoder-kernel-size-list", + type=lambda x: utils.eval_str_list(x, int), + help='list of kernel size (default: "[3,7,15,31,31,31]")', + ) + parser.add_argument( + "--decoder-glu", type=utils.eval_bool, help="glu after in proj" + ) + parser.add_argument( + "--decoder-conv-type", + default="dynamic", + type=str, + choices=["dynamic", "lightweight"], + help="type of convolution", + ) + parser.add_argument("--weight-softmax", default=True, type=utils.eval_bool) + parser.add_argument( + "--weight-dropout", + type=float, + metavar="D", + help="dropout probability for conv weights", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_lm_architecture(args) + + if getattr(args, "max_source_positions", None) is None: + args.max_source_positions = args.tokens_per_sample + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = args.tokens_per_sample + + if args.character_embeddings: + embed_tokens = CharacterTokenEmbedder( + task.dictionary, + eval(args.character_filters), + args.character_embedding_dim, + args.decoder_embed_dim, + args.char_embedder_highway_layers, + ) + elif args.adaptive_input: + embed_tokens = AdaptiveInput( + len(task.dictionary), + task.dictionary.pad(), + args.decoder_input_dim, + args.adaptive_input_factor, + args.decoder_embed_dim, + utils.eval_str_list(args.adaptive_input_cutoff, type=int), + ) + else: + embed_tokens = Embedding( + len(task.dictionary), args.decoder_input_dim, task.dictionary.pad() + ) + + if args.tie_adaptive_weights: + assert args.adaptive_input + assert args.adaptive_input_factor == args.adaptive_softmax_factor + assert ( + args.adaptive_softmax_cutoff == args.adaptive_input_cutoff + ), "{} != {}".format( + args.adaptive_softmax_cutoff, args.adaptive_input_cutoff + ) + assert args.decoder_input_dim == args.decoder_output_dim + + decoder = LightConvDecoder( + args, + task.output_dictionary, + embed_tokens, + no_encoder_attn=True, + final_norm=False, + ) + return LightConvLanguageModel(decoder) + + +@register_model_architecture("lightconv_lm", "lightconv_lm") +def base_lm_architecture(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.adaptive_softmax_factor = getattr(args, "adaptive_softmax_factor", 4) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + + args.character_embeddings = getattr(args, "character_embeddings", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.decoder_conv_dim = getattr(args, "decoder_conv_dim", args.decoder_embed_dim) + + # The model training is not stable without this + args.decoder_normalize_before = True + + args.adaptive_input = getattr(args, "adaptive_input", False) + args.adaptive_input_factor = getattr(args, "adaptive_input_factor", 4) + args.adaptive_input_cutoff = getattr(args, "adaptive_input_cutoff", None) + + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.tie_adaptive_proj = getattr(args, "tie_adaptive_proj", False) + + args.decoder_kernel_size_list = getattr( + args, "decoder_kernel_size_list", [3, 7, 15, 31, 31, 31] + ) + if len(args.decoder_kernel_size_list) == 1: + args.decoder_kernel_size_list = ( + args.decoder_kernel_size_list * args.decoder_layers + ) + assert ( + len(args.decoder_kernel_size_list) == args.decoder_layers + ), "decoder_kernel_size_list doesn't match decoder_layers" + args.decoder_glu = getattr(args, "decoder_glu", True) + args.input_dropout = getattr(args, "input_dropout", 0.1) + args.weight_dropout = getattr(args, "weight_dropout", args.attention_dropout) + + +@register_model_architecture("lightconv_lm", "lightconv_lm_gbw") +def lightconv_lm_gbw(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + base_lm_architecture(args) diff --git a/fairseq/fairseq/models/lstm.py b/fairseq/fairseq/models/lstm.py new file mode 100644 index 0000000..8a29156 --- /dev/null +++ b/fairseq/fairseq/models/lstm.py @@ -0,0 +1,755 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.modules import AdaptiveSoftmax, FairseqDropout +from torch import Tensor + + +DEFAULT_MAX_SOURCE_POSITIONS = 1e5 +DEFAULT_MAX_TARGET_POSITIONS = 1e5 + + +@register_model("lstm") +class LSTMModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability') + parser.add_argument('--encoder-embed-dim', type=int, metavar='N', + help='encoder embedding dimension') + parser.add_argument('--encoder-embed-path', type=str, metavar='STR', + help='path to pre-trained encoder embedding') + parser.add_argument('--encoder-freeze-embed', action='store_true', + help='freeze encoder embeddings') + parser.add_argument('--encoder-hidden-size', type=int, metavar='N', + help='encoder hidden size') + parser.add_argument('--encoder-layers', type=int, metavar='N', + help='number of encoder layers') + parser.add_argument('--encoder-bidirectional', action='store_true', + help='make all layers of encoder bidirectional') + parser.add_argument('--decoder-embed-dim', type=int, metavar='N', + help='decoder embedding dimension') + parser.add_argument('--decoder-embed-path', type=str, metavar='STR', + help='path to pre-trained decoder embedding') + parser.add_argument('--decoder-freeze-embed', action='store_true', + help='freeze decoder embeddings') + parser.add_argument('--decoder-hidden-size', type=int, metavar='N', + help='decoder hidden size') + parser.add_argument('--decoder-layers', type=int, metavar='N', + help='number of decoder layers') + parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N', + help='decoder output embedding dimension') + parser.add_argument('--decoder-attention', type=str, metavar='BOOL', + help='decoder attention') + parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR', + help='comma separated list of adaptive softmax cutoff points. ' + 'Must be used with adaptive_loss criterion') + parser.add_argument('--share-decoder-input-output-embed', default=False, + action='store_true', + help='share decoder input and output embeddings') + parser.add_argument('--share-all-embeddings', default=False, action='store_true', + help='share encoder, decoder and output embeddings' + ' (requires shared dictionary and embed dim)') + + # Granular dropout settings (if not specified these default to --dropout) + parser.add_argument('--encoder-dropout-in', type=float, metavar='D', + help='dropout probability for encoder input embedding') + parser.add_argument('--encoder-dropout-out', type=float, metavar='D', + help='dropout probability for encoder output') + parser.add_argument('--decoder-dropout-in', type=float, metavar='D', + help='dropout probability for decoder input embedding') + parser.add_argument('--decoder-dropout-out', type=float, metavar='D', + help='dropout probability for decoder output') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure that all args are properly defaulted (in case there are any new ones) + base_architecture(args) + + if args.encoder_layers != args.decoder_layers: + raise ValueError("--encoder-layers must match --decoder-layers") + + max_source_positions = getattr( + args, "max_source_positions", DEFAULT_MAX_SOURCE_POSITIONS + ) + max_target_positions = getattr( + args, "max_target_positions", DEFAULT_MAX_TARGET_POSITIONS + ) + + def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + embed_dict = utils.parse_embedding(embed_path) + utils.print_embed_overlap(embed_dict, dictionary) + return utils.load_embedding(embed_dict, dictionary, embed_tokens) + + if args.encoder_embed_path: + pretrained_encoder_embed = load_pretrained_embedding_from_file( + args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim + ) + else: + num_embeddings = len(task.source_dictionary) + pretrained_encoder_embed = Embedding( + num_embeddings, args.encoder_embed_dim, task.source_dictionary.pad() + ) + + if args.share_all_embeddings: + # double check all parameters combinations are valid + if task.source_dictionary != task.target_dictionary: + raise ValueError("--share-all-embeddings requires a joint dictionary") + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embed not compatible with --decoder-embed-path" + ) + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to " + "match --decoder-embed-dim" + ) + pretrained_decoder_embed = pretrained_encoder_embed + args.share_decoder_input_output_embed = True + else: + # separate decoder input embeddings + pretrained_decoder_embed = None + if args.decoder_embed_path: + pretrained_decoder_embed = load_pretrained_embedding_from_file( + args.decoder_embed_path, + task.target_dictionary, + args.decoder_embed_dim, + ) + # one last double check of parameter combinations + if args.share_decoder_input_output_embed and ( + args.decoder_embed_dim != args.decoder_out_embed_dim + ): + raise ValueError( + "--share-decoder-input-output-embeddings requires " + "--decoder-embed-dim to match --decoder-out-embed-dim" + ) + + if args.encoder_freeze_embed: + pretrained_encoder_embed.weight.requires_grad = False + if args.decoder_freeze_embed: + pretrained_decoder_embed.weight.requires_grad = False + + encoder = LSTMEncoder( + dictionary=task.source_dictionary, + embed_dim=args.encoder_embed_dim, + hidden_size=args.encoder_hidden_size, + num_layers=args.encoder_layers, + dropout_in=args.encoder_dropout_in, + dropout_out=args.encoder_dropout_out, + bidirectional=args.encoder_bidirectional, + pretrained_embed=pretrained_encoder_embed, + max_source_positions=max_source_positions, + ) + decoder = LSTMDecoder( + dictionary=task.target_dictionary, + embed_dim=args.decoder_embed_dim, + hidden_size=args.decoder_hidden_size, + out_embed_dim=args.decoder_out_embed_dim, + num_layers=args.decoder_layers, + dropout_in=args.decoder_dropout_in, + dropout_out=args.decoder_dropout_out, + attention=utils.eval_bool(args.decoder_attention), + encoder_output_units=encoder.output_units, + pretrained_embed=pretrained_decoder_embed, + share_input_output_embed=args.share_decoder_input_output_embed, + adaptive_softmax_cutoff=( + utils.eval_str_list(args.adaptive_softmax_cutoff, type=int) + if args.criterion == "adaptive_loss" + else None + ), + max_target_positions=max_target_positions, + residuals=False, + ) + return cls(encoder, decoder) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths) + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + ) + return decoder_out + + +class LSTMEncoder(FairseqEncoder): + """LSTM encoder.""" + + def __init__( + self, + dictionary, + embed_dim=512, + hidden_size=512, + num_layers=1, + dropout_in=0.1, + dropout_out=0.1, + bidirectional=False, + left_pad=True, + pretrained_embed=None, + padding_idx=None, + max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS, + ): + super().__init__(dictionary) + self.num_layers = num_layers + self.dropout_in_module = FairseqDropout( + dropout_in * 1.0, module_name=self.__class__.__name__ + ) + self.dropout_out_module = FairseqDropout( + dropout_out * 1.0, module_name=self.__class__.__name__ + ) + self.bidirectional = bidirectional + self.hidden_size = hidden_size + self.max_source_positions = max_source_positions + + num_embeddings = len(dictionary) + self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad() + if pretrained_embed is None: + self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) + else: + self.embed_tokens = pretrained_embed + + self.lstm = LSTM( + input_size=embed_dim, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=self.dropout_out_module.p if num_layers > 1 else 0.0, + bidirectional=bidirectional, + ) + self.left_pad = left_pad + + self.output_units = hidden_size + if bidirectional: + self.output_units *= 2 + + def forward( + self, + src_tokens: Tensor, + src_lengths: Tensor, + enforce_sorted: bool = True, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of + shape `(batch, src_len)` + src_lengths (LongTensor): lengths of each source sentence of + shape `(batch)` + enforce_sorted (bool, optional): if True, `src_tokens` is + expected to contain sequences sorted by length in a + decreasing order. If False, this condition is not + required. Default: True. + """ + if self.left_pad: + # nn.utils.rnn.pack_padded_sequence requires right-padding; + # convert left-padding to right-padding + src_tokens = utils.convert_padding_direction( + src_tokens, + torch.zeros_like(src_tokens).fill_(self.padding_idx), + left_to_right=True, + ) + + bsz, seqlen = src_tokens.size() + + # embed tokens + x = self.embed_tokens(src_tokens) + x = self.dropout_in_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # pack embedded source tokens into a PackedSequence + packed_x = nn.utils.rnn.pack_padded_sequence( + x, src_lengths.cpu(), enforce_sorted=enforce_sorted + ) + + # apply LSTM + if self.bidirectional: + state_size = 2 * self.num_layers, bsz, self.hidden_size + else: + state_size = self.num_layers, bsz, self.hidden_size + h0 = x.new_zeros(*state_size) + c0 = x.new_zeros(*state_size) + packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0)) + + # unpack outputs and apply dropout + x, _ = nn.utils.rnn.pad_packed_sequence( + packed_outs, padding_value=self.padding_idx * 1.0 + ) + x = self.dropout_out_module(x) + assert list(x.size()) == [seqlen, bsz, self.output_units] + + if self.bidirectional: + final_hiddens = self.combine_bidir(final_hiddens, bsz) + final_cells = self.combine_bidir(final_cells, bsz) + + encoder_padding_mask = src_tokens.eq(self.padding_idx).t() + + return tuple( + ( + x, # seq_len x batch x hidden + final_hiddens, # num_layers x batch x num_directions*hidden + final_cells, # num_layers x batch x num_directions*hidden + encoder_padding_mask, # seq_len x batch + ) + ) + + def combine_bidir(self, outs, bsz: int): + out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous() + return out.view(self.num_layers, bsz, -1) + + def reorder_encoder_out( + self, encoder_out: Tuple[Tensor, Tensor, Tensor, Tensor], new_order + ): + return tuple( + ( + encoder_out[0].index_select(1, new_order), + encoder_out[1].index_select(1, new_order), + encoder_out[2].index_select(1, new_order), + encoder_out[3].index_select(1, new_order), + ) + ) + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return self.max_source_positions + + +class AttentionLayer(nn.Module): + def __init__(self, input_embed_dim, source_embed_dim, output_embed_dim, bias=False): + super().__init__() + + self.input_proj = Linear(input_embed_dim, source_embed_dim, bias=bias) + self.output_proj = Linear( + input_embed_dim + source_embed_dim, output_embed_dim, bias=bias + ) + + def forward(self, input, source_hids, encoder_padding_mask): + # input: bsz x input_embed_dim + # source_hids: srclen x bsz x source_embed_dim + + # x: bsz x source_embed_dim + x = self.input_proj(input) + + # compute attention + attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2) + + # don't attend over padding + if encoder_padding_mask is not None: + attn_scores = ( + attn_scores.float() + .masked_fill_(encoder_padding_mask, float("-inf")) + .type_as(attn_scores) + ) # FP16 support: cast to float and back + + attn_scores = F.softmax(attn_scores, dim=0) # srclen x bsz + + # sum weighted sources + x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0) + + x = torch.tanh(self.output_proj(torch.cat((x, input), dim=1))) + return x, attn_scores + + +class LSTMDecoder(FairseqIncrementalDecoder): + """LSTM decoder.""" + + def __init__( + self, + dictionary, + embed_dim=512, + hidden_size=512, + out_embed_dim=512, + num_layers=1, + dropout_in=0.1, + dropout_out=0.1, + attention=True, + encoder_output_units=512, + pretrained_embed=None, + share_input_output_embed=False, + adaptive_softmax_cutoff=None, + max_target_positions=DEFAULT_MAX_TARGET_POSITIONS, + residuals=False, + ): + super().__init__(dictionary) + self.dropout_in_module = FairseqDropout( + dropout_in * 1.0, module_name=self.__class__.__name__ + ) + self.dropout_out_module = FairseqDropout( + dropout_out * 1.0, module_name=self.__class__.__name__ + ) + self.hidden_size = hidden_size + self.share_input_output_embed = share_input_output_embed + self.need_attn = True + self.max_target_positions = max_target_positions + self.residuals = residuals + self.num_layers = num_layers + + self.adaptive_softmax = None + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + if pretrained_embed is None: + self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + else: + self.embed_tokens = pretrained_embed + + self.encoder_output_units = encoder_output_units + if encoder_output_units != hidden_size and encoder_output_units != 0: + self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size) + self.encoder_cell_proj = Linear(encoder_output_units, hidden_size) + else: + self.encoder_hidden_proj = self.encoder_cell_proj = None + + # disable input feeding if there is no encoder + # input feeding is described in arxiv.org/abs/1508.04025 + input_feed_size = 0 if encoder_output_units == 0 else hidden_size + self.layers = nn.ModuleList( + [ + LSTMCell( + input_size=input_feed_size + embed_dim + if layer == 0 + else hidden_size, + hidden_size=hidden_size, + ) + for layer in range(num_layers) + ] + ) + + if attention: + # TODO make bias configurable + self.attention = AttentionLayer( + hidden_size, encoder_output_units, hidden_size, bias=False + ) + else: + self.attention = None + + if hidden_size != out_embed_dim: + self.additional_fc = Linear(hidden_size, out_embed_dim) + + if adaptive_softmax_cutoff is not None: + # setting adaptive_softmax dropout to dropout_out for now but can be redefined + self.adaptive_softmax = AdaptiveSoftmax( + num_embeddings, + hidden_size, + adaptive_softmax_cutoff, + dropout=dropout_out, + ) + elif not self.share_input_output_embed: + self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out) + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Tuple[Tensor, Tensor, Tensor, Tensor]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + src_lengths: Optional[Tensor] = None, + ): + x, attn_scores = self.extract_features( + prev_output_tokens, encoder_out, incremental_state + ) + return self.output_layer(x), attn_scores + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Tuple[Tensor, Tensor, Tensor, Tensor]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + """ + Similar to *forward* but only return features. + """ + # get outputs from encoder + if encoder_out is not None: + encoder_outs = encoder_out[0] + encoder_hiddens = encoder_out[1] + encoder_cells = encoder_out[2] + encoder_padding_mask = encoder_out[3] + else: + encoder_outs = torch.empty(0) + encoder_hiddens = torch.empty(0) + encoder_cells = torch.empty(0) + encoder_padding_mask = torch.empty(0) + srclen = encoder_outs.size(0) + + if incremental_state is not None and len(incremental_state) > 0: + prev_output_tokens = prev_output_tokens[:, -1:] + + bsz, seqlen = prev_output_tokens.size() + + # embed tokens + x = self.embed_tokens(prev_output_tokens) + x = self.dropout_in_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # initialize previous states (or get from cache during incremental generation) + if incremental_state is not None and len(incremental_state) > 0: + prev_hiddens, prev_cells, input_feed = self.get_cached_state( + incremental_state + ) + elif encoder_out is not None: + # setup recurrent cells + prev_hiddens = [encoder_hiddens[i] for i in range(self.num_layers)] + prev_cells = [encoder_cells[i] for i in range(self.num_layers)] + if self.encoder_hidden_proj is not None: + prev_hiddens = [self.encoder_hidden_proj(y) for y in prev_hiddens] + prev_cells = [self.encoder_cell_proj(y) for y in prev_cells] + input_feed = x.new_zeros(bsz, self.hidden_size) + else: + # setup zero cells, since there is no encoder + zero_state = x.new_zeros(bsz, self.hidden_size) + prev_hiddens = [zero_state for i in range(self.num_layers)] + prev_cells = [zero_state for i in range(self.num_layers)] + input_feed = None + + assert ( + srclen > 0 or self.attention is None + ), "attention is not supported if there are no encoder outputs" + attn_scores: Optional[Tensor] = ( + x.new_zeros(srclen, seqlen, bsz) if self.attention is not None else None + ) + outs = [] + for j in range(seqlen): + # input feeding: concatenate context vector from previous time step + if input_feed is not None: + input = torch.cat((x[j, :, :], input_feed), dim=1) + else: + input = x[j] + + for i, rnn in enumerate(self.layers): + # recurrent cell + hidden, cell = rnn(input, (prev_hiddens[i], prev_cells[i])) + + # hidden state becomes the input to the next layer + input = self.dropout_out_module(hidden) + if self.residuals: + input = input + prev_hiddens[i] + + # save state for next time step + prev_hiddens[i] = hidden + prev_cells[i] = cell + + # apply attention using the last layer's hidden state + if self.attention is not None: + assert attn_scores is not None + out, attn_scores[:, j, :] = self.attention( + hidden, encoder_outs, encoder_padding_mask + ) + else: + out = hidden + out = self.dropout_out_module(out) + + # input feeding + if input_feed is not None: + input_feed = out + + # save final output + outs.append(out) + + # Stack all the necessary tensors together and store + prev_hiddens_tensor = torch.stack(prev_hiddens) + prev_cells_tensor = torch.stack(prev_cells) + cache_state = torch.jit.annotate( + Dict[str, Optional[Tensor]], + { + "prev_hiddens": prev_hiddens_tensor, + "prev_cells": prev_cells_tensor, + "input_feed": input_feed, + }, + ) + self.set_incremental_state(incremental_state, "cached_state", cache_state) + + # collect outputs across time steps + x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size) + + # T x B x C -> B x T x C + x = x.transpose(1, 0) + + if hasattr(self, "additional_fc") and self.adaptive_softmax is None: + x = self.additional_fc(x) + x = self.dropout_out_module(x) + # srclen x tgtlen x bsz -> bsz x tgtlen x srclen + if not self.training and self.need_attn and self.attention is not None: + assert attn_scores is not None + attn_scores = attn_scores.transpose(0, 2) + else: + attn_scores = None + return x, attn_scores + + def output_layer(self, x): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + if self.share_input_output_embed: + x = F.linear(x, self.embed_tokens.weight) + else: + x = self.fc_out(x) + return x + + def get_cached_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + ) -> Tuple[List[Tensor], List[Tensor], Optional[Tensor]]: + cached_state = self.get_incremental_state(incremental_state, "cached_state") + assert cached_state is not None + prev_hiddens_ = cached_state["prev_hiddens"] + assert prev_hiddens_ is not None + prev_cells_ = cached_state["prev_cells"] + assert prev_cells_ is not None + prev_hiddens = [prev_hiddens_[i] for i in range(self.num_layers)] + prev_cells = [prev_cells_[j] for j in range(self.num_layers)] + input_feed = cached_state[ + "input_feed" + ] # can be None for decoder-only language models + return prev_hiddens, prev_cells, input_feed + + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + if incremental_state is None or len(incremental_state) == 0: + return + prev_hiddens, prev_cells, input_feed = self.get_cached_state(incremental_state) + prev_hiddens = [p.index_select(0, new_order) for p in prev_hiddens] + prev_cells = [p.index_select(0, new_order) for p in prev_cells] + if input_feed is not None: + input_feed = input_feed.index_select(0, new_order) + cached_state_new = torch.jit.annotate( + Dict[str, Optional[Tensor]], + { + "prev_hiddens": torch.stack(prev_hiddens), + "prev_cells": torch.stack(prev_cells), + "input_feed": input_feed, + }, + ) + self.set_incremental_state(incremental_state, "cached_state", cached_state_new), + return + + def max_positions(self): + """Maximum output length supported by the decoder.""" + return self.max_target_positions + + def make_generation_fast_(self, need_attn=False, **kwargs): + self.need_attn = need_attn + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.uniform_(m.weight, -0.1, 0.1) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def LSTM(input_size, hidden_size, **kwargs): + m = nn.LSTM(input_size, hidden_size, **kwargs) + for name, param in m.named_parameters(): + if "weight" in name or "bias" in name: + param.data.uniform_(-0.1, 0.1) + return m + + +def LSTMCell(input_size, hidden_size, **kwargs): + m = nn.LSTMCell(input_size, hidden_size, **kwargs) + for name, param in m.named_parameters(): + if "weight" in name or "bias" in name: + param.data.uniform_(-0.1, 0.1) + return m + + +def Linear(in_features, out_features, bias=True, dropout=0.0): + """Linear layer (input: N x T x C)""" + m = nn.Linear(in_features, out_features, bias=bias) + m.weight.data.uniform_(-0.1, 0.1) + if bias: + m.bias.data.uniform_(-0.1, 0.1) + return m + + +@register_model_architecture("lstm", "lstm") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_freeze_embed = getattr(args, "encoder_freeze_embed", False) + args.encoder_hidden_size = getattr( + args, "encoder_hidden_size", args.encoder_embed_dim + ) + args.encoder_layers = getattr(args, "encoder_layers", 1) + args.encoder_bidirectional = getattr(args, "encoder_bidirectional", False) + args.encoder_dropout_in = getattr(args, "encoder_dropout_in", args.dropout) + args.encoder_dropout_out = getattr(args, "encoder_dropout_out", args.dropout) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_freeze_embed = getattr(args, "decoder_freeze_embed", False) + args.decoder_hidden_size = getattr( + args, "decoder_hidden_size", args.decoder_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 1) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + args.decoder_attention = getattr(args, "decoder_attention", "1") + args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout) + args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.adaptive_softmax_cutoff = getattr( + args, "adaptive_softmax_cutoff", "10000,50000,200000" + ) + + +@register_model_architecture("lstm", "lstm_wiseman_iwslt_de_en") +def lstm_wiseman_iwslt_de_en(args): + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_dropout_in = getattr(args, "encoder_dropout_in", 0) + args.encoder_dropout_out = getattr(args, "encoder_dropout_out", 0) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256) + args.decoder_dropout_in = getattr(args, "decoder_dropout_in", 0) + args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout) + base_architecture(args) + + +@register_model_architecture("lstm", "lstm_luong_wmt_en_de") +def lstm_luong_wmt_en_de(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1000) + args.encoder_layers = getattr(args, "encoder_layers", 4) + args.encoder_dropout_out = getattr(args, "encoder_dropout_out", 0) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1000) + args.decoder_layers = getattr(args, "decoder_layers", 4) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 1000) + args.decoder_dropout_out = getattr(args, "decoder_dropout_out", 0) + base_architecture(args) diff --git a/fairseq/fairseq/models/lstm_lm.py b/fairseq/fairseq/models/lstm_lm.py new file mode 100644 index 0000000..454f0ac --- /dev/null +++ b/fairseq/fairseq/models/lstm_lm.py @@ -0,0 +1,142 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq import utils +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.lstm import Embedding, LSTMDecoder + + +DEFAULT_MAX_TARGET_POSITIONS = 1e5 + + +@register_model("lstm_lm") +class LSTMLanguageModel(FairseqLanguageModel): + def __init__(self, decoder): + super().__init__(decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--dropout', type=float, metavar='D', + help='dropout probability') + parser.add_argument('--decoder-embed-dim', type=int, metavar='N', + help='decoder embedding dimension') + parser.add_argument('--decoder-embed-path', type=str, metavar='STR', + help='path to pre-trained decoder embedding') + parser.add_argument('--decoder-hidden-size', type=int, metavar='N', + help='decoder hidden size') + parser.add_argument('--decoder-layers', type=int, metavar='N', + help='number of decoder layers') + parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N', + help='decoder output embedding dimension') + parser.add_argument('--decoder-attention', type=str, metavar='BOOL', + help='decoder attention') + parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR', + help='comma separated list of adaptive softmax cutoff points. ' + 'Must be used with adaptive_loss criterion') + parser.add_argument('--residuals', default=False, + action='store_true', + help='applying residuals between LSTM layers') + + # Granular dropout settings (if not specified these default to --dropout) + parser.add_argument('--decoder-dropout-in', type=float, metavar='D', + help='dropout probability for decoder input embedding') + parser.add_argument('--decoder-dropout-out', type=float, metavar='D', + help='dropout probability for decoder output') + parser.add_argument('--share-decoder-input-output-embed', default=False, + action='store_true', + help='share decoder input and output embeddings') + # fmt: on + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + if getattr(args, "max_target_positions", None) is not None: + max_target_positions = args.max_target_positions + else: + max_target_positions = getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + + def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) + embed_dict = utils.parse_embedding(embed_path) + utils.print_embed_overlap(embed_dict, dictionary) + return utils.load_embedding(embed_dict, dictionary, embed_tokens) + + pretrained_decoder_embed = None + if args.decoder_embed_path: + pretrained_decoder_embed = load_pretrained_embedding_from_file( + args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim + ) + + if args.share_decoder_input_output_embed: + # double check all parameters combinations are valid + if task.source_dictionary != task.target_dictionary: + raise ValueError( + "--share-decoder-input-output-embeddings requires a joint dictionary" + ) + + if args.decoder_embed_dim != args.decoder_out_embed_dim: + raise ValueError( + "--share-decoder-input-output-embeddings requires " + "--decoder-embed-dim to match --decoder-out-embed-dim" + ) + + decoder = LSTMDecoder( + dictionary=task.dictionary, + embed_dim=args.decoder_embed_dim, + hidden_size=args.decoder_hidden_size, + out_embed_dim=args.decoder_out_embed_dim, + num_layers=args.decoder_layers, + dropout_in=args.decoder_dropout_in, + dropout_out=args.decoder_dropout_out, + attention=False, # decoder-only language model doesn't support attention + encoder_output_units=0, + pretrained_embed=pretrained_decoder_embed, + share_input_output_embed=args.share_decoder_input_output_embed, + adaptive_softmax_cutoff=( + utils.eval_str_list(args.adaptive_softmax_cutoff, type=int) + if args.criterion == "adaptive_loss" + else None + ), + max_target_positions=max_target_positions, + residuals=args.residuals, + ) + + return cls(decoder) + + +@register_model_architecture("lstm_lm", "lstm_lm") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_hidden_size = getattr( + args, "decoder_hidden_size", args.decoder_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 1) + args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512) + args.decoder_attention = getattr(args, "decoder_attention", "0") + args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout) + args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.adaptive_softmax_cutoff = getattr( + args, "adaptive_softmax_cutoff", "10000,50000,200000" + ) + args.residuals = getattr(args, "residuals", False) diff --git a/fairseq/fairseq/models/masked_lm.py b/fairseq/fairseq/models/masked_lm.py new file mode 100644 index 0000000..b71254c --- /dev/null +++ b/fairseq/fairseq/models/masked_lm.py @@ -0,0 +1,398 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + register_model, + register_model_architecture, +) +from fairseq.modules import ( + LayerNorm, + SinusoidalPositionalEmbedding, + TransformerSentenceEncoder, +) +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import safe_hasattr + + +logger = logging.getLogger(__name__) + + +@register_model("masked_lm") +class MaskedLMModel(FairseqEncoderModel): + """ + Class for training a Masked Language Model. It also supports an + additional sentence level prediction if the sent-loss argument is set. + """ + + def __init__(self, args, encoder): + super().__init__(encoder) + self.args = args + + # if specified then apply bert initialization on the model. We need + # to explictly call this to make sure that the output embeddings + # and projection layers are also correctly initialized + if getattr(args, "apply_bert_init", False): + self.apply(init_bert_params) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # Arguments related to dropout + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for" " attention weights", + ) + parser.add_argument( + "--act-dropout", + type=float, + metavar="D", + help="dropout probability after" " activation in FFN", + ) + + # Arguments related to hidden states and self-attention + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + + # Arguments related to input and output embeddings + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--share-encoder-input-output-embed", + action="store_true", + help="share encoder input" " and output embeddings", + ) + parser.add_argument( + "--encoder-learned-pos", + action="store_true", + help="use learned positional embeddings in the encoder", + ) + parser.add_argument( + "--no-token-positional-embeddings", + action="store_true", + help="if set, disables positional embeddings" " (outside self attention)", + ) + parser.add_argument( + "--num-segment", type=int, metavar="N", help="num segment in the input" + ) + parser.add_argument( + "--max-positions", type=int, help="number of positional embeddings to learn" + ) + + # Arguments related to sentence level prediction + parser.add_argument( + "--sentence-class-num", + type=int, + metavar="N", + help="number of classes for sentence task", + ) + parser.add_argument( + "--sent-loss", + action="store_true", + help="if set," " calculate sentence level predictions", + ) + + # Arguments related to parameter initialization + parser.add_argument( + "--apply-bert-init", + action="store_true", + help="use custom param initialization for BERT", + ) + + # misc params + parser.add_argument( + "--activation-fn", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--pooler-activation-fn", + choices=utils.get_available_activation_fns(), + help="Which activation function to use for pooler layer.", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + + def forward(self, src_tokens, segment_labels=None, **kwargs): + return self.encoder(src_tokens, segment_labels=segment_labels, **kwargs) + + def max_positions(self): + return self.encoder.max_positions + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure all arguments are present in older models + base_architecture(args) + + if not safe_hasattr(args, "max_positions"): + args.max_positions = args.tokens_per_sample + + logger.info(args) + + encoder = MaskedLMEncoder(args, task.dictionary) + return cls(args, encoder) + + +class MaskedLMEncoder(FairseqEncoder): + """ + Encoder for Masked Language Modelling. + """ + + def __init__(self, args, dictionary): + super().__init__(dictionary) + + self.padding_idx = dictionary.pad() + self.vocab_size = dictionary.__len__() + self.max_positions = args.max_positions + + self.sentence_encoder = TransformerSentenceEncoder( + padding_idx=self.padding_idx, + vocab_size=self.vocab_size, + num_encoder_layers=args.encoder_layers, + embedding_dim=args.encoder_embed_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=args.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.act_dropout, + max_seq_len=self.max_positions, + num_segments=args.num_segment, + use_position_embeddings=not args.no_token_positional_embeddings, + encoder_normalize_before=args.encoder_normalize_before, + apply_bert_init=args.apply_bert_init, + activation_fn=args.activation_fn, + learned_pos_embedding=args.encoder_learned_pos, + ) + + self.share_input_output_embed = args.share_encoder_input_output_embed + self.embed_out = None + self.sentence_projection_layer = None + self.sentence_out_dim = args.sentence_class_num + self.lm_output_learned_bias = None + + # Remove head is set to true during fine-tuning + self.load_softmax = not getattr(args, "remove_head", False) + + self.masked_lm_pooler = nn.Linear( + args.encoder_embed_dim, args.encoder_embed_dim + ) + self.pooler_activation = utils.get_activation_fn(args.pooler_activation_fn) + + self.lm_head_transform_weight = nn.Linear( + args.encoder_embed_dim, args.encoder_embed_dim + ) + self.activation_fn = utils.get_activation_fn(args.activation_fn) + self.layer_norm = LayerNorm(args.encoder_embed_dim) + + self.lm_output_learned_bias = None + if self.load_softmax: + self.lm_output_learned_bias = nn.Parameter(torch.zeros(self.vocab_size)) + + if not self.share_input_output_embed: + self.embed_out = nn.Linear( + args.encoder_embed_dim, self.vocab_size, bias=False + ) + + if args.sent_loss: + self.sentence_projection_layer = nn.Linear( + args.encoder_embed_dim, self.sentence_out_dim, bias=False + ) + + def forward(self, src_tokens, segment_labels=None, masked_tokens=None, **unused): + """ + Forward pass for Masked LM encoder. This first computes the token + embedding using the token embedding matrix, position embeddings (if + specified) and segment embeddings (if specified). + + Here we assume that the sentence representation corresponds to the + output of the classification_token (see bert_task or cross_lingual_lm + task for more details). + Args: + - src_tokens: B x T matrix representing sentences + - segment_labels: B x T matrix representing segment label for tokens + Returns: + - a tuple of the following: + - logits for predictions in format B x T x C to be used in + softmax afterwards + - a dictionary of additional data, where 'pooled_output' contains + the representation for classification_token and 'inner_states' + is a list of internal model states used to compute the + predictions (similar in ELMO). 'sentence_logits' + is the prediction logit for NSP task and is only computed if + this is specified in the input arguments. + """ + + inner_states, sentence_rep = self.sentence_encoder( + src_tokens, + segment_labels=segment_labels, + ) + + x = inner_states[-1].transpose(0, 1) + # project masked tokens only + if masked_tokens is not None: + x = x[masked_tokens, :] + x = self.layer_norm(self.activation_fn(self.lm_head_transform_weight(x))) + + pooled_output = self.pooler_activation(self.masked_lm_pooler(sentence_rep)) + + # project back to size of vocabulary + if self.share_input_output_embed and hasattr( + self.sentence_encoder.embed_tokens, "weight" + ): + x = F.linear(x, self.sentence_encoder.embed_tokens.weight) + elif self.embed_out is not None: + x = self.embed_out(x) + if self.lm_output_learned_bias is not None: + x = x + self.lm_output_learned_bias + sentence_logits = None + if self.sentence_projection_layer: + sentence_logits = self.sentence_projection_layer(pooled_output) + + return x, { + "inner_states": inner_states, + "pooled_output": pooled_output, + "sentence_logits": sentence_logits, + } + + def max_positions(self): + """Maximum output length supported by the encoder.""" + return self.max_positions + + def upgrade_state_dict_named(self, state_dict, name): + if not self.load_softmax: + for k in list(state_dict.keys()): + if ( + "embed_out.weight" in k + or "sentence_projection_layer.weight" in k + or "lm_output_learned_bias" in k + ): + del state_dict[k] + return state_dict + + +@register_model_architecture("masked_lm", "masked_lm") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.act_dropout = getattr(args, "act_dropout", 0.0) + + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.share_encoder_input_output_embed = getattr( + args, "share_encoder_input_output_embed", False + ) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.num_segment = getattr(args, "num_segment", 2) + + args.sentence_class_num = getattr(args, "sentence_class_num", 2) + args.sent_loss = getattr(args, "sent_loss", False) + + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.activation_fn = getattr(args, "activation_fn", "relu") + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + + +@register_model_architecture("masked_lm", "bert_base") +def bert_base_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.share_encoder_input_output_embed = getattr( + args, "share_encoder_input_output_embed", True + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True) + args.num_segment = getattr(args, "num_segment", 2) + + args.encoder_layers = getattr(args, "encoder_layers", 12) + + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 3072) + + args.sentence_class_num = getattr(args, "sentence_class_num", 2) + args.sent_loss = getattr(args, "sent_loss", True) + + args.apply_bert_init = getattr(args, "apply_bert_init", True) + + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + base_architecture(args) + + +@register_model_architecture("masked_lm", "bert_large") +def bert_large_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_layers = getattr(args, "encoder_layers", 24) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + bert_base_architecture(args) + + +@register_model_architecture("masked_lm", "xlm_base") +def xlm_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.share_encoder_input_output_embed = getattr( + args, "share_encoder_input_output_embed", True + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True) + args.num_segment = getattr(args, "num_segment", 1) + + args.encoder_layers = getattr(args, "encoder_layers", 6) + + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + + args.sent_loss = getattr(args, "sent_loss", False) + + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.apply_bert_init = getattr(args, "apply_bert_init", True) + base_architecture(args) diff --git a/fairseq/fairseq/models/model_utils.py b/fairseq/fairseq/models/model_utils.py new file mode 100644 index 0000000..732d66b --- /dev/null +++ b/fairseq/fairseq/models/model_utils.py @@ -0,0 +1,92 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Optional + +import torch +from torch import Tensor + + +@torch.jit.script +def script_skip_tensor_list(x: List[Tensor], mask): + res = [xi[mask] if xi.size(0) == mask.size(0) else xi[:, mask] for xi in x] + outputs = [] + for i, t in enumerate(res): + if t.numel() != 0: + outputs.append(t) + else: + outputs.append(x[i]) + return outputs + + +@torch.jit.script +def script_skip_tensor(x: Tensor, mask): + # None case + if x.size(0) == 0: + return x + res = x[mask] if x.size(0) == mask.size(0) else x[:, mask] + if res.numel() == 0: + return x + else: + return res + + +@torch.jit.script +def expand_2d_or_3d_tensor(x, trg_dim: int, padding_idx: int): + """ + Expand 2D/3D tensor on dim=1 + """ + if x is None: + return None + + assert x.dim() == 2 or x.dim() == 3 + assert trg_dim >= x.size(1), (trg_dim, x.size()) + if trg_dim == x.size(1): + return x + + dims = [x.size(0), trg_dim - x.size(1)] + if x.dim() == 3: + dims.append(x.size(2)) + x = torch.cat([x, torch.zeros(dims).to(x).fill_(padding_idx)], 1) + + return x + + +@torch.jit.script +def coalesce(x: Optional[Tensor], y: Tensor) -> Tensor: + return x if x is not None else y + + +@torch.jit.script +def fill_tensors( + x: Optional[Tensor], mask, y: Optional[Tensor], padding_idx: int +) -> Optional[Tensor]: + """ + Filling tensor x with y at masked positions (dim=0). + """ + if x is None or x.size()[0] == 0 or y is None: + return x + assert x.dim() == y.dim() and mask.size(0) == x.size(0) + assert x.dim() == 2 or (x.dim() == 3 and x.size(2) == y.size(2)) + + n_selected = mask.sum() + if n_selected == 0: + return x + assert n_selected == y.size(0) + if n_selected == x.size(0): + return y + + if x.size(1) < y.size(1): + x = expand_2d_or_3d_tensor(x, y.size(1), padding_idx) + x[mask] = y + elif x.size(1) > y.size(1): + x[mask] = torch.tensor(padding_idx).type_as(x) + if x.dim() == 2: + x[mask, : y.size(1)] = y + else: + x[mask, : y.size(1), :] = y + else: + x[mask] = y + return x diff --git a/fairseq/fairseq/models/multilingual_transformer.py b/fairseq/fairseq/models/multilingual_transformer.py new file mode 100644 index 0000000..e722b64 --- /dev/null +++ b/fairseq/fairseq/models/multilingual_transformer.py @@ -0,0 +1,229 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import OrderedDict + +from fairseq import utils +from fairseq.models import ( + FairseqMultiModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import ( + Embedding, + TransformerDecoder, + TransformerEncoder, + TransformerModel, + base_architecture, +) +from fairseq.utils import safe_hasattr + + +@register_model("multilingual_transformer") +class MultilingualTransformerModel(FairseqMultiModel): + """Train Transformer models for multiple language pairs simultaneously. + + Requires `--task multilingual_translation`. + + We inherit all arguments from TransformerModel and assume that all language + pairs use a single Transformer architecture. In addition, we provide several + options that are specific to the multilingual setting. + + Args: + --share-encoder-embeddings: share encoder embeddings across all source languages + --share-decoder-embeddings: share decoder embeddings across all target languages + --share-encoders: share all encoder params (incl. embeddings) across all source languages + --share-decoders: share all decoder params (incl. embeddings) across all target languages + """ + + def __init__(self, encoders, decoders): + super().__init__(encoders, decoders) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + TransformerModel.add_args(parser) + parser.add_argument( + "--share-encoder-embeddings", + action="store_true", + help="share encoder embeddings across languages", + ) + parser.add_argument( + "--share-decoder-embeddings", + action="store_true", + help="share decoder embeddings across languages", + ) + parser.add_argument( + "--share-encoders", + action="store_true", + help="share encoders across languages", + ) + parser.add_argument( + "--share-decoders", + action="store_true", + help="share decoders across languages", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + from fairseq.tasks.multilingual_translation import MultilingualTranslationTask + + assert isinstance(task, MultilingualTranslationTask) + + # make sure all arguments are present in older models + base_multilingual_architecture(args) + + if not safe_hasattr(args, "max_source_positions"): + args.max_source_positions = 1024 + if not safe_hasattr(args, "max_target_positions"): + args.max_target_positions = 1024 + + src_langs = [lang_pair.split("-")[0] for lang_pair in task.model_lang_pairs] + tgt_langs = [lang_pair.split("-")[1] for lang_pair in task.model_lang_pairs] + + if args.share_encoders: + args.share_encoder_embeddings = True + if args.share_decoders: + args.share_decoder_embeddings = True + + def build_embedding(dictionary, embed_dim, path=None): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + # build shared embeddings (if applicable) + shared_encoder_embed_tokens, shared_decoder_embed_tokens = None, None + if args.share_all_embeddings: + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=task.langs, + embed_dim=args.encoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.encoder_embed_path, + ) + shared_decoder_embed_tokens = shared_encoder_embed_tokens + args.share_decoder_input_output_embed = True + else: + if args.share_encoder_embeddings: + shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=src_langs, + embed_dim=args.encoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.encoder_embed_path, + ) + if args.share_decoder_embeddings: + shared_decoder_embed_tokens = FairseqMultiModel.build_shared_embeddings( + dicts=task.dicts, + langs=tgt_langs, + embed_dim=args.decoder_embed_dim, + build_embedding=build_embedding, + pretrained_embed_path=args.decoder_embed_path, + ) + + # encoders/decoders for each language + lang_encoders, lang_decoders = {}, {} + + def get_encoder(lang): + if lang not in lang_encoders: + if shared_encoder_embed_tokens is not None: + encoder_embed_tokens = shared_encoder_embed_tokens + else: + encoder_embed_tokens = build_embedding( + task.dicts[lang], + args.encoder_embed_dim, + args.encoder_embed_path, + ) + lang_encoders[lang] = cls._get_module_class( + True, args, task.dicts[lang], encoder_embed_tokens, src_langs + ) + return lang_encoders[lang] + + def get_decoder(lang): + if lang not in lang_decoders: + if shared_decoder_embed_tokens is not None: + decoder_embed_tokens = shared_decoder_embed_tokens + else: + decoder_embed_tokens = build_embedding( + task.dicts[lang], + args.decoder_embed_dim, + args.decoder_embed_path, + ) + lang_decoders[lang] = cls._get_module_class( + False, args, task.dicts[lang], decoder_embed_tokens, tgt_langs + ) + return lang_decoders[lang] + + # shared encoders/decoders (if applicable) + shared_encoder, shared_decoder = None, None + if args.share_encoders: + shared_encoder = get_encoder(src_langs[0]) + if args.share_decoders: + shared_decoder = get_decoder(tgt_langs[0]) + + encoders, decoders = OrderedDict(), OrderedDict() + for lang_pair, src, tgt in zip(task.model_lang_pairs, src_langs, tgt_langs): + encoders[lang_pair] = ( + shared_encoder if shared_encoder is not None else get_encoder(src) + ) + decoders[lang_pair] = ( + shared_decoder if shared_decoder is not None else get_decoder(tgt) + ) + + return MultilingualTransformerModel(encoders, decoders) + + @classmethod + def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs): + module_class = TransformerEncoder if is_encoder else TransformerDecoder + return module_class(args, lang_dict, embed_tokens) + + def load_state_dict(self, state_dict, strict=True, model_cfg=None): + state_dict_subset = state_dict.copy() + for k, _ in state_dict.items(): + assert k.startswith("models.") + lang_pair = k.split(".")[1] + if lang_pair not in self.models: + del state_dict_subset[k] + super().load_state_dict(state_dict_subset, strict=strict, model_cfg=model_cfg) + + +@register_model_architecture("multilingual_transformer", "multilingual_transformer") +def base_multilingual_architecture(args): + base_architecture(args) + args.share_encoder_embeddings = getattr(args, "share_encoder_embeddings", False) + args.share_decoder_embeddings = getattr(args, "share_decoder_embeddings", False) + args.share_encoders = getattr(args, "share_encoders", False) + args.share_decoders = getattr(args, "share_decoders", False) + + +@register_model_architecture( + "multilingual_transformer", "multilingual_transformer_iwslt_de_en" +) +def multilingual_transformer_iwslt_de_en(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 6) + base_multilingual_architecture(args) diff --git a/fairseq/fairseq/models/multires_hubert/__init__.py b/fairseq/fairseq/models/multires_hubert/__init__.py new file mode 100644 index 0000000..ec36505 --- /dev/null +++ b/fairseq/fairseq/models/multires_hubert/__init__.py @@ -0,0 +1,2 @@ +from .multires_hubert import * # noqa +from .multires_hubert_asr import * # noqa diff --git a/fairseq/fairseq/models/multires_hubert/multires_hubert.py b/fairseq/fairseq/models/multires_hubert/multires_hubert.py new file mode 100644 index 0000000..eacb29e --- /dev/null +++ b/fairseq/fairseq/models/multires_hubert/multires_hubert.py @@ -0,0 +1,1231 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import math +import torch.nn as nn +from omegaconf import II +from fairseq.models.wav2vec.wav2vec import norm_block + +from fairseq import utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2 import ( + EXTRACTOR_MODE_CHOICES, + MASKING_DISTRIBUTION_CHOICES, + LAYER_TYPE_CHOICES, + ConvFeatureExtractionModel, + TransformerEncoder, +) +from omegaconf import II, MISSING, open_dict +from fairseq.modules import GradMultiply, LayerNorm +from fairseq.tasks.multires_hubert_pretraining import ( + MultiresHubertPretrainingConfig, + MultiresHubertPretrainingTask, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class MultiresHubertConfig(FairseqDataclass): + label_rate: float = II("task.label_rate") + # label_rate: 1,2,2,5 + # (imply (1,2), (2,5)) + # if base label_rate = 50 + # (1,2), (2,5) --> label rates 50, 25, 10 + label_rate_ratios: List[int] = field( + default=MISSING, metadata={"help": "tuple for label rates e.g., [(1,2), (2,5)]"} + ) + + extractor_mode: EXTRACTOR_MODE_CHOICES = field( + default="default", + metadata={ + "help": "mode for feature extractor. default has a single group " + "norm with d groups in the first conv block, whereas layer_norm " + "has layer norms in every block (meant to use with normalize=True)" + }, + ) + # the blocks for each label rate + encoder_layers: int = field( + default="2", + metadata={ + "help": "num encoder layers in the each block (one sub module of the U-net)" + }, + ) + override_encoder_layers: str = field( + default="", + metadata={ + "help": "specific layer numbers for each block (one sub module of the U-net) for the training" + }, + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + encoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "encoder embedding dimension for FFN"} + ) + encoder_attention_heads: int = field( + default=12, metadata={"help": "num encoder attention heads"} + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + layer_type: LAYER_TYPE_CHOICES = field( + default="transformer", metadata={"help": "layer type in encoder"} + ) + conv_adapator_kernal: int = field( + default=7, metadata={"help": "kernal size for conv adaptor"} + ) + use_plain_updownsample: bool = field( + default=False, metadata={"help": "whether to use plain up downsample"} + ) + + # dropouts + dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for the transformer"}, + ) + attention_dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for attention weights"}, + ) + activation_dropout: float = field( + default=0.0, + metadata={"help": "dropout probability after activation in FFN"}, + ) + encoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + dropout_features: float = field( + default=0.0, + metadata={"help": "dropout to apply to the features (after feat extr)"}, + ) + + final_dim: int = field( + default=0, + metadata={ + "help": "project final representations and targets to this many " + "dimensions. set to encoder_embed_dim is <= 0" + }, + ) + untie_final_proj: bool = field( + default=True, + metadata={"help": "use separate projection for each target"}, + ) + layer_norm_first: bool = field( + default=False, + metadata={"help": "apply layernorm first in the transformer"}, + ) + conv_feature_layers: str = field( + default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2", + metadata={ + "help": "string describing convolutional feature extraction " + "layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_bias: bool = field( + default=False, metadata={"help": "include bias in conv encoder"} + ) + logit_temp: float = field( + default=0.1, metadata={"help": "temperature to divide logits by"} + ) + target_glu: bool = field( + default=False, metadata={"help": "adds projection + glu to targets"} + ) + feature_grad_mult: float = field( + default=1.0, + metadata={"help": "multiply feature extractor var grads by this"}, + ) + use_single_target: bool = field( + default=False, + metadata={ + "help": "whether to use single data (in that case, we will compute with the fixed label rate)" + }, + ) + use_single_prediction: bool = field( + default=False, + metadata={ + "help": "if true, we will not conduct mlm prediction in low resolution in the middle" + }, + ) + use_multi_stream: bool = field( + default=False, + metadata={ + "help": "whether to use multi-stream setting (in this setting, we have multiple streams with the same resolution)" + }, + ) + + # masking + mask_length: int = field(default=10, metadata={"help": "mask length"}) + mask_prob: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose mask length"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + mask_channel_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + + # positional embeddings + conv_pos: int = field( + default=128, + metadata={"help": "number of filters for convolutional positional embeddings"}, + ) + conv_pos_groups: int = field( + default=16, + metadata={"help": "number of groups for convolutional positional embedding"}, + ) + + latent_temp: Tuple[float, float, float] = field( + default=(2, 0.5, 0.999995), + metadata={"help": "legacy (to be removed)"}, + ) + + # loss computation + skip_masked: bool = field( + default=False, + metadata={"help": "skip computing losses over masked frames"}, + ) + skip_nomask: bool = field( + default=False, + metadata={"help": "skip computing losses over unmasked frames"}, + ) + + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=2, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + + # Conformer + depthwise_conv_kernel_size: int = field( + default=31, + metadata={ + "help": "depthwise-conv-kernel-size for convolution in conformer layer" + }, + ) + attn_type: str = field( + default="", + metadata={"help": "if espnet use ESPNET MHA"}, + ) + pos_enc_type: str = field( + default="abs", + metadata={"help": "Positional encoding type to use in conformer"}, + ) + fp16: bool = field(default=False, metadata={"help": "If fp16 is being used"}) + + +@register_model("multires_hubert", dataclass=MultiresHubertConfig) +class MultiresHubertModel(BaseFairseqModel): + def __init__( + self, + cfg: MultiresHubertConfig, + task_cfg: MultiresHubertPretrainingConfig, + dictionaries: List[Dictionary], + ) -> None: + super().__init__() + logger.info(f"MultiresHubertModel Config: {cfg}") + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + # Estimate label rates + assert ( + cfg.label_rate_ratios != "None" + ), "without ratios, the model is exactly as the Hubert model" + self.label_rate_ratios = [] + self.base_rate = cfg.label_rate + self.label_rates = [] + self.downsample_modules = nn.ModuleList() + self.upsample_modules = nn.ModuleList() + self.encoders = nn.ModuleList() + self.decoders = nn.ModuleList() + self.use_single_target = cfg.use_single_target + self.use_single_prediction = cfg.use_single_prediction + self.use_plain_updownsample = cfg.use_plain_updownsample + + # For decide the override encoder layers, so that the layer number is not equally distributed + if cfg.override_encoder_layers != "": + self.override_encoder_layers = eval(cfg.override_encoder_layers) + assert ( + len(self.override_encoder_layers) % 2 == 1 + ), "must be odd number of layers if specify detailed layers" + assert ( + len(self.override_encoder_layers) // 2 + == len(cfg.label_rate_ratios) // 2 + ), "number of override encoder layers must match the label rate ratios information" + self.len_encoder_modules = len(self.override_encoder_layers) + else: + self.override_encoder_layers = None + self.len_encoder_modules = None + + # use different layers instead of equally distributed ones + middle_override_encoder_layer = ( + self.override_encoder_layers[self.len_encoder_modules // 2] + if self.override_encoder_layers is not None + else None + ) + skip_middle_pos_conv = False if len(cfg.label_rate_ratios) < 2 else True + + self.middle_encoder = TransformerEncoder( + cfg, + skip_pos_conv=skip_middle_pos_conv, + override_encoder_layer=middle_override_encoder_layer, + ) + + first_pos_conv = False # only enable pos_conv for the first encoder + raw_label_rate_ratios = cfg.label_rate_ratios + for i in range(len(raw_label_rate_ratios) // 2): + # check if have override encoder layers + if self.override_encoder_layers is not None: + override_encoder_layer = self.override_encoder_layers[i] + override_decoder_layer = self.override_encoder_layers[ + self.len_encoder_modules - 1 - i + ] + else: + override_encoder_layer, override_decoder_layer = None, None + + self.label_rate_ratios.append( + (raw_label_rate_ratios[i * 2], raw_label_rate_ratios[i * 2 + 1]) + ) + if self.use_plain_updownsample: + self.downsample_modules.append( + ConvDownsampler( + k=cfg.conv_adapator_kernal, + label_rate=( + ( + raw_label_rate_ratios[i * 2], + raw_label_rate_ratios[i * 2 + 1], + ) + ), + dropout=0.0, + channels=cfg.encoder_embed_dim, + activation=nn.GELU(), + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + ) + ) + else: + self.downsample_modules.append( + ConvAdapter( + k=cfg.conv_adapator_kernal, + label_rate=( + ( + raw_label_rate_ratios[i * 2], + raw_label_rate_ratios[i * 2 + 1], + ) + ), + dropout=0.0, + channels=cfg.encoder_embed_dim, + activation=nn.GELU(), + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + ) + ) + if not first_pos_conv: + self.encoders.append( + TransformerEncoder( + cfg, override_encoder_layer=override_encoder_layer + ) + ) # TODO(jiatong): add conformer options + first_pos_conv = True + else: + self.encoders.append( + TransformerEncoder( + cfg, + skip_pos_conv=True, + override_encoder_layer=override_encoder_layer, + ) + ) + if self.use_plain_updownsample: + self.upsample_modules.append( + ConvUpsampler( + k=cfg.conv_adapator_kernal, + label_rate=( + ( + raw_label_rate_ratios[i * 2 + 1], + raw_label_rate_ratios[i * 2], + ) + ), + dropout=0.0, + channels=cfg.encoder_embed_dim, + activation=nn.GELU(), + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + ) + ) + else: + self.upsample_modules.append( + ConvAdapter( + k=cfg.conv_adapator_kernal, + label_rate=( + ( + raw_label_rate_ratios[i * 2 + 1], + raw_label_rate_ratios[i * 2], + ) + ), + dropout=0.0, + channels=cfg.encoder_embed_dim, + activation=nn.GELU(), + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + ) + ) + self.decoders.append( + TransformerEncoder( + cfg, + skip_pos_conv=True, + override_encoder_layer=override_decoder_layer, + ) + ) + + base_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feature_ds_rates = [base_ds_rate] + running_rate = self.base_rate + + if cfg.use_single_target or cfg.use_multi_stream: + self.label_rates = self.base_rate + else: + self.label_rates.append(self.base_rate) + + for label_rate_ratio in self.label_rate_ratios: + upsample_rate, downsample_rate = label_rate_ratio + if (base_ds_rate * upsample_rate) % downsample_rate != 0: + logger.warning( + "base rate: {} cannot be ideally processed with downsample rate {}".format( + base_ds_rate, downsample_rate + ) + ) + + base_ds_rate = base_ds_rate * downsample_rate // upsample_rate + self.feature_ds_rates.append(base_ds_rate) + + if not cfg.use_single_target and not cfg.use_multi_stream: + running_rate = running_rate * upsample_rate // downsample_rate + self.label_rates.append(running_rate) + self.label_nums = len( + self.feature_ds_rates + ) # the number of labels for prediction (activate at iter 2) + + if type(self.label_rates) == float: + self.feat2tar_ratios = [ + self.feature_ds_rates[i] * self.label_rates / task_cfg.sample_rate + for i in range(len(self.feature_ds_rates)) + ] + else: + self.feat2tar_ratios = [ + self.feature_ds_rates[i] * self.label_rates[i] / task_cfg.sample_rate + for i in range(len(self.feature_ds_rates)) + ] + + # self.feat2tar_ratios = self.feat2tar_ratios[::-1] + + # An running example of the label rate: + # base_ds_rate = 320 + # self.label_rate_ratios = [(1, 2)] + # self.feature_ds_rates = [320, 640] + # self.label_rates = [50, 25] + # self.feat2tar_ratios = [1, 1] + + # Another running example of the label rate: + # base_ds_rate = 320 + # self.label_rate_ratios = [(1, 2)] + # self.feature_ds_rates = [320, 640] + # self.label_rates = 100 + # self.feat2tar_ratios = [4, 2] + # self.use_sinlge_target = True + + logging.info( + "ds_rates: {}, label_rates: {}, feat2tar_ratios: {}".format( + self.feature_ds_rates, self.label_rates, self.feat2tar_ratios + ) + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + # Note(jiatong): different from hubert, we just set the final dim as encoder_embed_dim + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.layer_norm = LayerNorm(self.embed) + + self.predictor_head_num = 1 if self.use_single_prediction else self.label_nums + + self.target_glu = None + if cfg.target_glu: + self.target_glus = nn.ModuleList() + for i in range(self.predictor_head_num): + self.target_glus.append( + nn.Sequential(nn.Linear(final_dim, final_dim * 2), nn.GLU()) + ) + + self.untie_final_proj = cfg.untie_final_proj + self.final_projs = nn.ModuleList() + + # Note(jiatong): we do not have untie cases for multires hubert + for i in range(self.predictor_head_num): + self.final_projs.append(nn.Linear(cfg.encoder_embed_dim, final_dim)) + + # modules below are not needed during fine-tuning + self.multires_classes = [] + self.label_embs_concat = nn.ParameterList() + + for i in range(self.predictor_head_num): + if self.use_single_target: + num_classes = len(dictionaries[0]) + else: + num_classes = len(dictionaries[i]) + self.multires_classes.append(num_classes) + self.label_embs_concat.append( + nn.Parameter(torch.FloatTensor(num_classes, final_dim)) + ) + nn.init.uniform_(self.label_embs_concat[i]) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model( + cls, cfg: MultiresHubertConfig, task: MultiresHubertPretrainingTask + ): + """Build a new model instance.""" + + model = MultiresHubertModel(cfg, task.cfg, task.dictionaries) + return model + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def compute_nce(self, x, pos, negs): + neg_is_pos = (pos == negs).all(-1) + pos = pos.unsqueeze(0) + targets = torch.cat([pos, negs], dim=0) + + logits = torch.cosine_similarity(x.float(), targets.float(), dim=-1).type_as(x) + logits /= self.logit_temp + if neg_is_pos.any(): + logits[1:][neg_is_pos] = float("-inf") + logits = logits.transpose(0, 1) # (num_x, num_cls+1) + return logits + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target: torch.Tensor, + feat2tar_ratio: float, + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + + feat_tsz = features.size(1) + + # skip if no target is provided + if target is None: + return features, None, None + targ_tsz = target.size(1) + if feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / feat2tar_ratio) + features = features[:, :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * feat2tar_ratio + target = target[:, target_inds.long()] + return features, target + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def forward( + self, + source: torch.Tensor, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + + def align_size_sum(feat1, pad1, feat2): + assert ( + abs(feat1.size(1) - feat2.size(1)) < 10 + ), "misaligned results for feat1 and feat2 of size {} - {}".format( + feat1.size(1), feat2.size(1) + ) + common_size = min(feat1.size(1), feat2.size(1)) + + return ( + feat1[:, :common_size] + feat2[:, :common_size], + pad1[:, :common_size], + ) + + # process encoders + res_outputs = [] # final output for different resolution + multi_mask_indices = [] # mask indices for different resolution + residuals = [] # record the x in encoders + padding_masks = [] # final padding masks + # The encoder has (self.label_nums - 1) blocks + for i in range(self.label_nums - 1): + x, _ = self.encoders[i](x, padding_mask=padding_mask, layer=None) + residuals.append(x) + x, padding_mask, mask_indices = self.downsample_modules[i]( + x, padding=padding_mask, mask_indices=mask_indices + ) + + residual = self.middle_encoder(x, padding_mask=padding_mask, layer=None)[0] + x = x + residual + res_outputs.append(x) + + # process decoders + # The encoder has (self.label_nums - 1) blocks + padding_masks.append(padding_mask) + multi_mask_indices.append(mask_indices) + residuals.reverse() # NOTE(jiatong): reverse res_output to match corresponding input + for i in range(self.label_nums - 1): + x, padding_mask, mask_indices = self.upsample_modules[ + self.label_nums - 2 - i + ](x, padding=padding_mask, mask_indices=mask_indices) + x, _ = self.decoders[i](x, padding_mask=padding_mask, layer=None) + x, padding_mask = align_size_sum(x, padding_mask, residuals[i]) + res_outputs.append(x) + padding_masks.append(padding_mask) + multi_mask_indices.append(mask_indices) + + # NOTE(jiatong): need reverse of target list to allow matched target-representation + res_outputs.reverse() + padding_masks.reverse() + multi_mask_indices.reverse() + if target_list is not None: + new_target_list = [] + for i in range(self.label_nums): + if self.use_single_target: + res_outputs[i], reformat_target_list = self.forward_targets( + res_outputs[i], target_list[0], self.feat2tar_ratios[i] + ) + new_target_list.append(reformat_target_list) + else: + if target_list[i] is not None: + res_outputs[i], reformat_target_list = self.forward_targets( + res_outputs[i], target_list[i], self.feat2tar_ratios[i] + ) + new_target_list.append(reformat_target_list) + else: + # Append a None target list then it won't be used to calculate loss + new_target_list.append(None) + if padding_masks[i] is not None: + padding_masks[i] = self.forward_padding_mask( + res_outputs[i], padding_masks[i] + ) + if multi_mask_indices[i] is not None: + multi_mask_indices[i] = self.forward_padding_mask( + res_outputs[i], multi_mask_indices[i] + ) + + + if features_only: + # NOTE(jiatong): need to reverse back + res_outputs.reverse() + return { + "x": res_outputs, + "padding_mask": padding_masks[0], + "features": features, + } + + def compute_pred(proj_x, target, label_embs): + # compute logits for the i-th label set + y = torch.index_select(label_embs, 0, target.long()) + negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1) + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + # proj_x: (S, D) + # y: (S, D) + # negs: (Neg, S, D) + return self.compute_nce(proj_x, y, negs) + + logit_m_list, logit_u_list = [], [] + for j in range(self.label_nums): + if new_target_list[j] is None: + continue # skip empty targets + label_embs_list = self.label_embs_concat[j].split( + [self.multires_classes[j]], 0 + ) + # set the variables (after the set, the procedure is the same as hubert) + # all the elements are list with only one element (to simulate the normal hubert process) + x = res_outputs[j] + target = new_target_list[j] + padding_mask = padding_masks[j] + mask_indices = multi_mask_indices[j] + final_proj = self.final_projs[j] + + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = final_proj(x[masked_indices]) + logit_m_list.append( + compute_pred(proj_x_m, target[masked_indices], label_embs_list[0]) + ) + else: + logit_m_list.append(None) + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = final_proj(x[nomask_indices]) + logit_u_list.append( + compute_pred(proj_x_u, target[nomask_indices], label_embs_list[0]) + ) + else: + logit_u_list.append(None) + + # if we only want one prediction, we can exit now + if self.predictor_head_num == 1: + break + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + last_layer: Optional[bool] = False, + ) -> Tuple[torch.Tensor, torch.Tensor]: + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + feature = res["features"] if ret_conv else res["x"] + if last_layer: + feature = feature[-1] + return feature, res["padding_mask"] + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x.float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + logits_list = self.get_logits(net_output, is_masked) + targets_list = [x.new_zeros(x.size(0), dtype=torch.long) for x in logits_list] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + return extra_losses, names + + def remove_pretraining_modules(self): + self.target_glu = None + self.final_proj = None + + +class ConvAdapter(nn.Module): + """Conv adapter that combines two modules with different label rate with downsample or upsample. + To allow different ratios than integer, two convs are utilized with first to upsample (numerator) + and the second to downsample (denominator)""" + + def __init__( + self, + k, + label_rate, + dropout, + channels, + activation, + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + non_affine_group_norm=False, + ): + super().__init__() + + def downsample_block(channel, k, stride): + return nn.Sequential( + # with padding (k - 1) // 2 to keep the same size + nn.Conv1d( + channel, + channel, + k, + stride=stride, + bias=False, + padding=(k - 1) // 2, + ), + nn.Dropout(p=dropout), + norm_block( + is_layer_norm=False, dim=channel, affine=not non_affine_group_norm + ), + activation, + ) + + def upsample_block(channel, k, stride): + return nn.Sequential( + # with padding (k - 1) // 2 to keep the same size + nn.ConvTranspose1d( + channel, + channel, + k, + stride=stride, + bias=False, + padding=0, # padding=(k - 1) // 2, + output_padding=(stride - 1), + ), + nn.Dropout(p=dropout), + norm_block( + is_layer_norm=False, dim=channel, affine=not non_affine_group_norm + ), + activation, + ) + + assert len(label_rate) == 2, "label_rate should be sized two to apply fusion" + # Lout =(Lin~H~R1)~Wstride~H~R2~Wpadding+dilation~W(kernel_size~H~R1)+output_padding+1 + self.upsample_conv = upsample_block(channels, k, label_rate[0]) + self.downsample_conv = downsample_block(channels, k, label_rate[1]) + + self.upsample_rate, self.downsample_rate = label_rate + self.log_compression = log_compression + self.skip_connections = skip_connections + self.highway = highway + self.residual_scale = math.sqrt(residual_scale) + + def forward(self, x, padding=None, mask_indices=None): + # Assume x1 = (B, T, C) as input + x = x.permute(0, 2, 1) + residual_before_upsample = x + x = self.upsample_conv(x) + upsample_size = x.size(2) + + # conduct upsample + if self.skip_connections: + residual_upsample = torch.repeat_interleave( + residual_before_upsample, self.upsample_rate, dim=2 + ) + upsample_size = min(upsample_size, residual_upsample.size(2)) + x = ( + x[..., :upsample_size] + residual_upsample[..., :upsample_size] + ) * self.residual_scale + + residual_before_downsample = x + x = self.downsample_conv(x) + downsample_size = x.size(2) + + if self.skip_connections: + residual_downsample = residual_before_downsample[ + ..., :: self.downsample_rate + ] + downsample_size = min(x.size(2), residual_downsample.size(2)) + x = ( + x[..., :downsample_size] + residual_downsample[..., :downsample_size] + ) * self.residual_scale + + if self.highway: + residual_after_sample = residual_upsample[..., :: self.downsample_rate] + final_size = min(x.size(2), residual_after_sample.size(2)) + x = ( + x[..., :final_size] + residual_after_sample[..., :final_size] + ) * self.residual_scale + + if self.log_compression: + x = x.abs() + x = x + 1 + x = x.log() + + x = x.permute(0, 2, 1) + + # process padding + if padding is not None: + padding = torch.repeat_interleave(padding, self.upsample_rate, dim=1) + padding = padding[..., :: self.downsample_rate] + padding = padding[..., : x.size(1)] + + # process mask indices + if mask_indices is not None: + mask_indices = torch.repeat_interleave( + mask_indices, self.upsample_rate, dim=1 + ) + mask_indices = mask_indices[..., :: self.downsample_rate] + mask_indices = mask_indices[..., : x.size(1)] + return x, padding, mask_indices + + +class ConvDownsampler(nn.Module): + """Conv downsampler that combines two modules with different label rate with downsample or upsample. + To allow different ratios than integer, two convs are utilized with first to upsample (numerator) + and the second to downsample (denominator)""" + + def __init__( + self, + k, + label_rate, + dropout, + channels, + activation, + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + non_affine_group_norm=False, + ): + super().__init__() + + def downsample_block(channel, k, stride): + return nn.Sequential( + # with padding (k - 1) // 2 to keep the same size + nn.Conv1d( + channel, + channel, + k, + stride=stride, + bias=False, + padding=(k - 1) // 2, + ), + nn.Dropout(p=dropout), + norm_block( + is_layer_norm=False, dim=channel, affine=not non_affine_group_norm + ), + activation, + ) + + assert len(label_rate) == 2, "label_rate should be sized two to apply fusion" + self.downsample_conv = downsample_block(channels, k, label_rate[1]) + + upsample_rate, self.downsample_rate = label_rate + assert upsample_rate == 1, "must be 1 to perform downsample only" + self.log_compression = log_compression + self.skip_connections = skip_connections + self.highway = highway # Useless as placeholder + self.residual_scale = math.sqrt(residual_scale) + + def forward(self, x, padding=None, mask_indices=None): + # Assume x1 = (B, T, C) as input + x = x.permute(0, 2, 1) + + residual_before_downsample = x + x = self.downsample_conv(x) + downsample_size = x.size(2) + + if self.skip_connections: + residual_downsample = residual_before_downsample[ + ..., :: self.downsample_rate + ] + downsample_size = min(x.size(2), residual_downsample.size(2)) + x = ( + x[..., :downsample_size] + residual_downsample[..., :downsample_size] + ) * self.residual_scale + + if self.log_compression: + x = x.abs() + x = x + 1 + x = x.log() + + x = x.permute(0, 2, 1) + + # process padding + if padding is not None: + padding = padding[..., :: self.downsample_rate] + padding = padding[..., : x.size(1)] + + # process mask indices + if mask_indices is not None: + mask_indices = mask_indices[..., :: self.downsample_rate] + mask_indices = mask_indices[..., : x.size(1)] + return x, padding, mask_indices + + +class ConvUpsampler(nn.Module): + """Conv upsampler that combines two modules with different label rate with downsample or upsample. + To allow different ratios than integer, two convs are utilized with first to upsample (numerator) + and the second to downsample (denominator)""" + + def __init__( + self, + k, + label_rate, + dropout, + channels, + activation, + log_compression=False, + skip_connections=True, + highway=True, + residual_scale=0.4, + non_affine_group_norm=False, + ): + super().__init__() + + def upsample_block(channel, k, stride): + return nn.Sequential( + # with padding (k - 1) // 2 to keep the same size + nn.ConvTranspose1d( + channel, + channel, + k, + stride=stride, + bias=False, + padding=0, # padding=(k - 1) // 2, + output_padding=(stride - 1), + ), + nn.Dropout(p=dropout), + norm_block( + is_layer_norm=False, dim=channel, affine=not non_affine_group_norm + ), + activation, + ) + + assert len(label_rate) == 2, "label_rate should be sized two to apply fusion" + # Lout =(Lin~H~R1)~Wstride~H~R2~Wpadding+dilation~W(kernel_size~H~R1)+output_padding+1 + self.upsample_conv = upsample_block(channels, k, label_rate[0]) + + self.upsample_rate, downsample_rate = label_rate + assert downsample_rate == 1, "must be 1 to perform downsample only" + self.log_compression = log_compression + self.skip_connections = skip_connections + self.highway = highway # Useless + self.residual_scale = math.sqrt(residual_scale) + + def forward(self, x, padding=None, mask_indices=None): + # Assume x1 = (B, T, C) as input + x = x.permute(0, 2, 1) + residual_before_upsample = x + x = self.upsample_conv(x) + upsample_size = x.size(2) + + # conduct upsample + if self.skip_connections: + residual_upsample = torch.repeat_interleave( + residual_before_upsample, self.upsample_rate, dim=2 + ) + upsample_size = min(upsample_size, residual_upsample.size(2)) + x = ( + x[..., :upsample_size] + residual_upsample[..., :upsample_size] + ) * self.residual_scale + + if self.log_compression: + x = x.abs() + x = x + 1 + x = x.log() + + x = x.permute(0, 2, 1) + + # process padding + if padding is not None: + padding = torch.repeat_interleave(padding, self.upsample_rate, dim=1) + padding = padding[..., : x.size(1)] + + # process mask indices + if mask_indices is not None: + mask_indices = torch.repeat_interleave( + mask_indices, self.upsample_rate, dim=1 + ) + mask_indices = mask_indices[..., : x.size(1)] + return x, padding, mask_indices diff --git a/fairseq/fairseq/models/multires_hubert/multires_hubert_asr.py b/fairseq/fairseq/models/multires_hubert/multires_hubert_asr.py new file mode 100644 index 0000000..2e7ad99 --- /dev/null +++ b/fairseq/fairseq/models/multires_hubert/multires_hubert_asr.py @@ -0,0 +1,376 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Any + +import torch +import torch.nn as nn +from omegaconf import II, MISSING + +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.tasks import FairseqTask + + +@dataclass +class MultiresHubertAsrConfig(FairseqDataclass): + multires_hubert_path: str = field( + default=MISSING, metadata={"help": "path to multires_hubert model"} + ) + no_pretrained_weights: bool = field( + default=False, + metadata={"help": "if true, does not load pretrained weights"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "dropout after transformer and before final projection"}, + ) + dropout: float = field( + default=0.0, + metadata={"help": "dropout probability inside hubert model"}, + ) + attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights " "inside hubert model" + }, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " "inside hubert model" + }, + ) + + # masking + apply_mask: bool = field( + default=False, metadata={"help": "apply masking during fine-tuning"} + ) + mask_length: int = field( + default=10, metadata={"help": "repeat the mask indices multiple times"} + ) + mask_prob: float = field( + default=0.5, + metadata={ + "help": "probability of replacing a token with mask " + "(normalized by length)" + }, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose masks"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + freeze_finetune_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + feature_grad_mult: float = field( + default=0.0, + metadata={"help": "reset feature grad mult in hubert to this"}, + ) + layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a layer in hubert"}, + ) + normalize: bool = II("task.normalize") + data: str = II("task.data") + + # this holds the loaded hubert args + multires_hubert_args: Any = None + + +@dataclass +class MultiresHubertCtcConfig(MultiresHubertAsrConfig): + pass + + +@register_model("multires_hubert_ctc", dataclass=MultiresHubertAsrConfig) +class MultiresHubertCtc(BaseFairseqModel): + def __init__( + self, cfg: MultiresHubertAsrConfig, multireshubert_encoder: BaseFairseqModel + ): + super().__init__() + self.cfg = cfg + self.multireshubert_encoder = multireshubert_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: MultiresHubertAsrConfig, task: FairseqTask): + """Build a new model instance.""" + multireshubert_encoder = MultiresHubertEncoder(cfg, task) + return cls(cfg, multireshubert_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + + logits = net_output["encoder_out"] + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.multireshubert_encoder(**kwargs) + return x + + +@dataclass +class MultiresHubertSeq2SeqConfig(MultiresHubertAsrConfig): + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"}) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings " "(outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.0, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights " "inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " "inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + + +class MultiresHubertEncoder(FairseqEncoder): + def __init__(self, cfg: MultiresHubertAsrConfig, task): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.multires_hubert_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.multires_hubert_path, arg_overrides + ) + multires_hubert_args = state.get("cfg", None) + if multires_hubert_args is None: + multires_hubert_args = convert_namespace_to_omegaconf(state["args"]) + cfg.multires_hubert_args = multires_hubert_args + else: + state = None + multires_hubert_args = cfg.multires_hubert_args + if isinstance(multires_hubert_args, Namespace): + cfg.multires_hubert_args = ( + multires_hubert_args + ) = convert_namespace_to_omegaconf(multires_hubert_args) + + assert cfg.normalize == multires_hubert_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + multires_hubert_args.task.data = cfg.data + pretrain_task = tasks.setup_task(multires_hubert_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + pretrain_task.load_state_dict(state["task_state"]) + else: + pretrain_task.load_state_dict(task.state_dict()) + + model = pretrain_task.build_model( + multires_hubert_args.model, from_checkpoint=True + ) + if state is not None and not cfg.no_pretrained_weights: + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(pretrain_task.source_dictionary) + + d = multires_hubert_args.model.encoder_embed_dim + + self.multires_hubert_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + if task.target_dictionary is not None: + self.proj = Linear(d, len(task.target_dictionary)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, tbc=True, **kwargs): + multires_hubert_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + "last_layer": True, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.multires_hubert_model.extract_features( + **multires_hubert_args + ) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + } + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/fairseq/fairseq/models/nat/__init__.py b/fairseq/fairseq/models/nat/__init__.py new file mode 100644 index 0000000..05fe822 --- /dev/null +++ b/fairseq/fairseq/models/nat/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +from .fairseq_nat_model import * +from .nonautoregressive_transformer import * +from .nat_crf_transformer import * +from .iterative_nonautoregressive_transformer import * +from .cmlm_transformer import * +from .levenshtein_transformer import * +from .insertion_transformer import * diff --git a/fairseq/fairseq/models/nat/cmlm_transformer.py b/fairseq/fairseq/models/nat/cmlm_transformer.py new file mode 100644 index 0000000..c876e94 --- /dev/null +++ b/fairseq/fairseq/models/nat/cmlm_transformer.py @@ -0,0 +1,162 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +This file implements: +Ghazvininejad, Marjan, et al. +"Constant-time machine translation with conditional masked language models." +arXiv preprint arXiv:1904.09324 (2019). +""" + +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import NATransformerModel +from fairseq.utils import new_arange + + +def _skeptical_unmasking(output_scores, output_masks, p): + sorted_index = output_scores.sort(-1)[1] + boundary_len = ( + (output_masks.sum(1, keepdim=True).type_as(output_scores) - 2) * p + ).long() + skeptical_mask = new_arange(output_masks) < boundary_len + return skeptical_mask.scatter(1, sorted_index, skeptical_mask) + + +@register_model("cmlm_transformer") +class CMLMNATransformerModel(NATransformerModel): + @staticmethod + def add_args(parser): + NATransformerModel.add_args(parser) + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + assert not self.decoder.src_embedding_copy, "do not support embedding copy." + + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + # length prediction + length_out = self.decoder.forward_length( + normalize=False, encoder_out=encoder_out + ) + length_tgt = self.decoder.forward_length_prediction( + length_out, encoder_out, tgt_tokens + ) + + # decoding + word_ins_out = self.decoder( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + word_ins_mask = prev_output_tokens.eq(self.unk) + + return { + "word_ins": { + "out": word_ins_out, + "tgt": tgt_tokens, + "mask": word_ins_mask, + "ls": self.args.label_smoothing, + "nll_loss": True, + }, + "length": { + "out": length_out, + "tgt": length_tgt, + "factor": self.decoder.length_loss_factor, + }, + } + + def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs): + + step = decoder_out.step + max_step = decoder_out.max_step + + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + history = decoder_out.history + + # execute the decoder + output_masks = output_tokens.eq(self.unk) + _scores, _tokens = self.decoder( + normalize=True, + prev_output_tokens=output_tokens, + encoder_out=encoder_out, + ).max(-1) + output_tokens.masked_scatter_(output_masks, _tokens[output_masks]) + output_scores.masked_scatter_(output_masks, _scores[output_masks]) + + if history is not None: + history.append(output_tokens.clone()) + + # skeptical decoding (depend on the maximum decoding steps.) + if (step + 1) < max_step: + skeptical_mask = _skeptical_unmasking( + output_scores, output_tokens.ne(self.pad), 1 - (step + 1) / max_step + ) + + output_tokens.masked_fill_(skeptical_mask, self.unk) + output_scores.masked_fill_(skeptical_mask, 0.0) + + if history is not None: + history.append(output_tokens.clone()) + + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=None, + history=history, + ) + + +@register_model_architecture("cmlm_transformer", "cmlm_transformer") +def cmlm_base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", True) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + # --- special arguments --- + args.sg_length_pred = getattr(args, "sg_length_pred", False) + args.pred_length_offset = getattr(args, "pred_length_offset", False) + args.length_loss_factor = getattr(args, "length_loss_factor", 0.1) + args.ngram_predictor = getattr(args, "ngram_predictor", 1) + args.src_embedding_copy = getattr(args, "src_embedding_copy", False) + + +@register_model_architecture("cmlm_transformer", "cmlm_transformer_wmt_en_de") +def cmlm_wmt_en_de(args): + cmlm_base_architecture(args) diff --git a/fairseq/fairseq/models/nat/fairseq_nat_model.py b/fairseq/fairseq/models/nat/fairseq_nat_model.py new file mode 100644 index 0000000..a5594a4 --- /dev/null +++ b/fairseq/fairseq/models/nat/fairseq_nat_model.py @@ -0,0 +1,172 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +from fairseq.models.transformer import ( + TransformerDecoder, + TransformerEncoder, + TransformerModel, +) +from fairseq.modules.transformer_sentence_encoder import init_bert_params + + +def ensemble_encoder(func): + def wrapper(self, *args, **kwargs): + if self.ensemble_models is None or len(self.ensemble_models) == 1: + return func(self, *args, **kwargs) + encoder_outs = [ + func(model, *args, **kwargs, return_all_hiddens=True) + for model in self.ensemble_models + ] + _encoder_out = encoder_outs[0].copy() + + def stack(key): + outs = [e[key][0] for e in encoder_outs] + return [torch.stack(outs, -1) if outs[0] is not None else None] + + _encoder_out["encoder_out"] = stack("encoder_out") + _encoder_out["encoder_embedding"] = stack("encoder_embedding") + + num_layers = len(_encoder_out["encoder_states"]) + if num_layers > 0: + _encoder_out["encoder_states"] = [ + torch.stack([e["encoder_states"][i] for e in encoder_outs], -1) + for i in range(num_layers) + ] + return _encoder_out + + return wrapper + + +def ensemble_decoder(func): + def wrapper(self, normalize=False, encoder_out=None, *args, **kwargs): + if self.ensemble_models is None or len(self.ensemble_models) == 1: + return func( + self, normalize=normalize, encoder_out=encoder_out, *args, **kwargs + ) + + def _replace(encoder_out, new_val): + new_encoder_out = encoder_out.copy() + new_encoder_out["encoder_out"] = [new_val] + return new_encoder_out + + action_outs = [ + func( + model, + normalize=normalize, + encoder_out=_replace( + encoder_out, encoder_out["encoder_out"][0][:, :, :, i] + ), + *args, + **kwargs + ) + for i, model in enumerate(self.ensemble_models) + ] + + if not isinstance(action_outs[0], tuple): # return multiple values + action_outs = [[a] for a in action_outs] + else: + action_outs = [list(a) for a in action_outs] + + ensembled_outs = [] + for i in range(len(action_outs[0])): + if i == 0 and normalize: + ensembled_outs += [ + torch.logsumexp( + torch.stack([a[i] for a in action_outs], -1), dim=-1 + ) + - math.log(len(self.ensemble_models)) + ] + elif action_outs[0][i] is not None: + ensembled_outs += [torch.stack([a[i] for a in action_outs], -1)] + else: + ensembled_outs += [None] + + if len(ensembled_outs) == 1: + return ensembled_outs[0] + return tuple(ensembled_outs) + + return wrapper + + +class FairseqNATModel(TransformerModel): + """ + Abstract class for all nonautoregressive-based models + """ + + def __init__(self, args, encoder, decoder): + super().__init__(args, encoder, decoder) + self.tgt_dict = decoder.dictionary + self.bos = decoder.dictionary.bos() + self.eos = decoder.dictionary.eos() + self.pad = decoder.dictionary.pad() + self.unk = decoder.dictionary.unk() + + self.ensemble_models = None + + @property + def allow_length_beam(self): + return False + + @property + def allow_ensemble(self): + return True + + def enable_ensemble(self, models): + self.encoder.ensemble_models = [m.encoder for m in models] + self.decoder.ensemble_models = [m.decoder for m in models] + + @staticmethod + def add_args(parser): + TransformerModel.add_args(parser) + parser.add_argument( + "--apply-bert-init", + action="store_true", + help="use custom param initialization for BERT", + ) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + decoder = FairseqNATDecoder(args, tgt_dict, embed_tokens) + if getattr(args, "apply_bert_init", False): + decoder.apply(init_bert_params) + return decoder + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + encoder = FairseqNATEncoder(args, src_dict, embed_tokens) + if getattr(args, "apply_bert_init", False): + encoder.apply(init_bert_params) + return encoder + + def forward_encoder(self, encoder_inputs): + return self.encoder(*encoder_inputs) + + def forward_decoder(self, *args, **kwargs): + return NotImplementedError + + def initialize_output_tokens(self, *args, **kwargs): + return NotImplementedError + + def forward(self, *args, **kwargs): + return NotImplementedError + + +class FairseqNATEncoder(TransformerEncoder): + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens) + self.ensemble_models = None + + @ensemble_encoder + def forward(self, *args, **kwargs): + return super().forward(*args, **kwargs) + + +class FairseqNATDecoder(TransformerDecoder): + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__(args, dictionary, embed_tokens, no_encoder_attn) + self.ensemble_models = None diff --git a/fairseq/fairseq/models/nat/insertion_transformer.py b/fairseq/fairseq/models/nat/insertion_transformer.py new file mode 100644 index 0000000..bc28000 --- /dev/null +++ b/fairseq/fairseq/models/nat/insertion_transformer.py @@ -0,0 +1,280 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +import torch.nn.functional as F +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import ( + FairseqNATModel, + LevenshteinTransformerDecoder, + LevenshteinTransformerModel, + ensemble_decoder, +) +from fairseq.models.transformer import Linear +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import new_arange + + +class NegativeDistanceScore(object): + def __init__(self): + + # pre-compute some values + self.scores = {} + + self.scores[0.5] = self.compute_score_full(50, 0.5) + self.scores[1.0] = self.compute_score_full(50, 1.0) + self.scores[2.0] = self.compute_score_full(50, 2.0) + + def __call__(self, i, L, tau): + if (tau is None) or (tau > 1000): + return 1 / L + + if tau in self.scores: + if L < self.scores[tau].shape[0]: + return self.scores[tau][L - 1, i] + return self.compute_score(L, tau)[i] + + def compute_score(self, L, tau): + s = np.array([-abs(L / 2 - i) / tau for i in range(L)]) + s = np.exp(s - s.max()) + return s / s.sum() + + def compute_score_full(self, L, tau): + s = -abs(np.arange(0, L - 1)[:, None] / 2 - np.arange(L)[None, :]) / tau + s = np.tril(s, 0) + np.triu(s - float("inf"), 1) + s = np.exp(s - s.max(1, keepdims=True)) + return s / s.sum(1, keepdims=True) + + +neg_scorer = NegativeDistanceScore() + + +def _get_ins_targets(in_tokens, out_tokens, padding_idx, unk_idx, vocab_size, tau=None): + try: + from fairseq import libnat + except ImportError as e: + import sys + + sys.stderr.write("ERROR: missing libnat. run `pip install --editable .`\n") + raise e + + B = in_tokens.size(0) + T = in_tokens.size(1) + V = vocab_size + + with torch.cuda.device_of(in_tokens): + in_tokens_list = [ + [t for t in s if t != padding_idx] for i, s in enumerate(in_tokens.tolist()) + ] + out_tokens_list = [ + [t for t in s if t != padding_idx] + for i, s in enumerate(out_tokens.tolist()) + ] + + full_labels = libnat.suggested_ed2_path( + in_tokens_list, out_tokens_list, padding_idx + ) + insert_labels = [a[:-1] for a in full_labels] + + # numericalize1 + insert_label_tensors = in_tokens.new_zeros(B * (T - 1) * V).float() + insert_index, insert_labels = zip( + *[ + (w + (j + i * (T - 1)) * V, neg_scorer(k, len(label), tau)) + for i, labels in enumerate(insert_labels) + for j, label in enumerate(labels[1:-1]) + for k, w in enumerate(label) + ] + ) # HACK 1:-1 + insert_index, insert_labels = [ + torch.tensor(list(a), device=in_tokens.device) + for a in [insert_index, insert_labels] + ] + insert_label_tensors.scatter_(0, insert_index.long(), insert_labels) + insert_label_tensors = insert_label_tensors.view(B, T - 1, V) + + return insert_label_tensors + + +def _apply_ins_words(in_tokens, in_scores, word_ins_pred, word_ins_scores, padding_idx): + + padding_masks = in_tokens[:, 1:].eq(padding_idx) + word_ins_scores.masked_fill_(padding_masks, 0.0) + word_ins_pred.masked_fill_(padding_masks, padding_idx) + + in_coords = new_arange(in_tokens).type_as(in_scores) + + # shift all padding predictions to infinite + out_coords = (in_coords[:, 1:] - 0.5).masked_fill( + word_ins_pred.eq(padding_idx), float("inf") + ) + out_coords = torch.cat([in_coords, out_coords], 1).sort(-1)[1] + out_tokens = torch.cat([in_tokens, word_ins_pred], 1).gather(1, out_coords) + out_scores = torch.cat([in_scores, word_ins_scores], 1).gather(1, out_coords) + return out_tokens, out_scores + + +@register_model("insertion_transformer") +class InsertionTransformerModel(LevenshteinTransformerModel): + def __init__(self, args, encoder, decoder): + super().__init__(args, encoder, decoder) + + @staticmethod + def add_args(parser): + FairseqNATModel.add_args(parser) + parser.add_argument("--label-tau", default=None, type=float) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + decoder = InsertionTransformerDecoder(args, tgt_dict, embed_tokens) + if getattr(args, "apply_bert_init", False): + decoder.apply(init_bert_params) + return decoder + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + + assert tgt_tokens is not None, "forward function only supports training." + + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + # generate training labels for insertion + word_ins_out = self.decoder.forward_word_ins( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + + word_ins_tgt = _get_ins_targets( + prev_output_tokens, + tgt_tokens, + self.pad, + self.unk, + len(self.tgt_dict), + tau=self.decoder.label_tau, + ).type_as(word_ins_out) + word_ins_masks = prev_output_tokens[:, 1:].ne(self.pad) + + return { + "word_ins": { + "out": word_ins_out, + "tgt": word_ins_tgt, + "mask": word_ins_masks, + "ls": self.args.label_smoothing, + "nll_loss": True, + } + } + + def forward_decoder( + self, decoder_out, encoder_out, eos_penalty=0.0, max_ratio=None, **kwargs + ): + + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + history = decoder_out.history + + # TODO: decoding for InsertionTransformer + word_ins_score = self.decoder.forward_word_ins( + normalize=True, prev_output_tokens=output_tokens, encoder_out=encoder_out + ) + + if eos_penalty > 0.0: + word_ins_score[:, :, self.pad] -= eos_penalty + word_ins_score, word_ins_pred = word_ins_score.max(-1) + output_tokens, output_scores = _apply_ins_words( + output_tokens, output_scores, word_ins_pred, word_ins_score, self.pad + ) + + # delete some unnecessary paddings + cut_off = output_tokens.ne(self.pad).sum(1).max() + output_tokens = output_tokens[:, :cut_off] + output_scores = output_scores[:, :cut_off] + + if history is not None: + history.append(output_tokens.clone()) + + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=None, + history=history, + ) + + +class InsertionTransformerDecoder(LevenshteinTransformerDecoder): + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + # use the TransformerDecoder's __init__ + super(LevenshteinTransformerDecoder, self).__init__( + args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn + ) + + self.dictionary = dictionary + self.bos = dictionary.bos() + self.unk = dictionary.unk() + self.eos = dictionary.eos() + self.pool_out = Linear(self.output_embed_dim * 2, self.output_embed_dim) + + self.label_tau = getattr(args, "label_tau", None) + + @ensemble_decoder + def forward_word_ins(self, normalize, encoder_out, prev_output_tokens): + features = self.extract_features(prev_output_tokens, encoder_out=encoder_out)[0] + features = self.pool_out( + torch.cat([features[:, :-1, :], features[:, 1:, :]], 2) + ) + decoder_out = self.output_layer(features) + return F.log_softmax(decoder_out, -1) if normalize else decoder_out + + def forward_mask_ins(self, *args, **kwargs): + raise NotImplementedError + + def forward_word_del(self, *args, **kwargs): + raise NotImplementedError + + +@register_model_architecture("insertion_transformer", "insertion_transformer") +def insertion_base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + # special for insertion transformer + args.label_tau = getattr(args, "label_tau", None) diff --git a/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py b/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py new file mode 100644 index 0000000..bc39509 --- /dev/null +++ b/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py @@ -0,0 +1,228 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import NATransformerModel + + +def _sequential_poisoning(s, V, beta=0.33, bos=2, eos=3, pad=1): + # s: input batch + # V: vocabulary size + rand_words = torch.randint(low=4, high=V, size=s.size(), device=s.device) + choices = torch.rand(size=s.size(), device=s.device) + choices.masked_fill_((s == pad) | (s == bos) | (s == eos), 1) + + replace = choices < beta / 3 + repeat = (choices >= beta / 3) & (choices < beta * 2 / 3) + swap = (choices >= beta * 2 / 3) & (choices < beta) + safe = choices >= beta + + for i in range(s.size(1) - 1): + rand_word = rand_words[:, i] + next_word = s[:, i + 1] + self_word = s[:, i] + + replace_i = replace[:, i] + swap_i = swap[:, i] & (next_word != 3) + repeat_i = repeat[:, i] & (next_word != 3) + safe_i = safe[:, i] | ((next_word == 3) & (~replace_i)) + + s[:, i] = ( + self_word * (safe_i | repeat_i).long() + + next_word * swap_i.long() + + rand_word * replace_i.long() + ) + s[:, i + 1] = ( + next_word * (safe_i | replace_i).long() + + self_word * (swap_i | repeat_i).long() + ) + return s + + +def gumbel_noise(input, TINY=1e-8): + return ( + input.new_zeros(*input.size()) + .uniform_() + .add_(TINY) + .log_() + .neg_() + .add_(TINY) + .log_() + .neg_() + ) + + +@register_model("iterative_nonautoregressive_transformer") +class IterNATransformerModel(NATransformerModel): + @staticmethod + def add_args(parser): + NATransformerModel.add_args(parser) + parser.add_argument( + "--train-step", + type=int, + help="number of refinement iterations during training", + ) + parser.add_argument( + "--dae-ratio", + type=float, + help="the probability of switching to the denoising auto-encoder loss", + ) + parser.add_argument( + "--stochastic-approx", + action="store_true", + help="sampling from the decoder as the inputs for next iteration", + ) + + @classmethod + def build_model(cls, args, task): + model = super().build_model(args, task) + model.train_step = getattr(args, "train_step", 4) + model.dae_ratio = getattr(args, "dae_ratio", 0.5) + model.stochastic_approx = getattr(args, "stochastic_approx", False) + return model + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + + B, T = prev_output_tokens.size() + + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + # length prediction + length_out = self.decoder.forward_length( + normalize=False, encoder_out=encoder_out + ) + length_tgt = self.decoder.forward_length_prediction( + length_out, encoder_out, tgt_tokens + ) + + # decoding + word_ins_outs, word_ins_tgts, word_ins_masks = [], [], [] + for t in range(self.train_step): + word_ins_out = self.decoder( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + step=t, + ) + word_ins_tgt = tgt_tokens + word_ins_mask = word_ins_tgt.ne(self.pad) + + word_ins_outs.append(word_ins_out) + word_ins_tgts.append(word_ins_tgt) + word_ins_masks.append(word_ins_mask) + + if t < (self.train_step - 1): + # prediction for next iteration + if self.stochastic_approx: + word_ins_prediction = ( + word_ins_out + gumbel_noise(word_ins_out) + ).max(-1)[1] + else: + word_ins_prediction = word_ins_out.max(-1)[1] + + prev_output_tokens = prev_output_tokens.masked_scatter( + word_ins_mask, word_ins_prediction[word_ins_mask] + ) + + if self.dae_ratio > 0: + # we do not perform denoising for the first iteration + corrputed = ( + torch.rand(size=(B,), device=prev_output_tokens.device) + < self.dae_ratio + ) + corrputed_tokens = _sequential_poisoning( + tgt_tokens[corrputed], + len(self.tgt_dict), + 0.33, + self.bos, + self.eos, + self.pad, + ) + prev_output_tokens[corrputed] = corrputed_tokens + + # concat everything + word_ins_out = torch.cat(word_ins_outs, 0) + word_ins_tgt = torch.cat(word_ins_tgts, 0) + word_ins_mask = torch.cat(word_ins_masks, 0) + + return { + "word_ins": { + "out": word_ins_out, + "tgt": word_ins_tgt, + "mask": word_ins_mask, + "ls": self.args.label_smoothing, + "nll_loss": True, + }, + "length": { + "out": length_out, + "tgt": length_tgt, + "factor": self.decoder.length_loss_factor, + }, + } + + +@register_model_architecture( + "iterative_nonautoregressive_transformer", "iterative_nonautoregressive_transformer" +) +def inat_base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + # --- special arguments --- + args.sg_length_pred = getattr(args, "sg_length_pred", False) + args.pred_length_offset = getattr(args, "pred_length_offset", False) + args.length_loss_factor = getattr(args, "length_loss_factor", 0.1) + args.ngram_predictor = getattr(args, "ngram_predictor", 1) + args.src_embedding_copy = getattr(args, "src_embedding_copy", False) + + args.train_step = getattr(args, "train_step", 4) + args.dae_ratio = getattr(args, "dae_ratio", 0.5) + args.stochastic_approx = getattr(args, "stochastic_approx", False) + + +@register_model_architecture( + "iterative_nonautoregressive_transformer", + "iterative_nonautoregressive_transformer_wmt_en_de", +) +def iter_nat_wmt_en_de(args): + inat_base_architecture(args) diff --git a/fairseq/fairseq/models/nat/levenshtein_transformer.py b/fairseq/fairseq/models/nat/levenshtein_transformer.py new file mode 100644 index 0000000..d60d3c5 --- /dev/null +++ b/fairseq/fairseq/models/nat/levenshtein_transformer.py @@ -0,0 +1,510 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq.iterative_refinement_generator import DecoderOut +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import FairseqNATDecoder, FairseqNATModel, ensemble_decoder +from fairseq.models.transformer import Embedding +from fairseq.modules import TransformerDecoderLayer +from fairseq.modules.transformer_sentence_encoder import init_bert_params + +from .levenshtein_utils import ( + _apply_del_words, + _apply_ins_masks, + _apply_ins_words, + _fill, + _get_del_targets, + _get_ins_targets, + _skip, + _skip_encoder_out, +) + + +@register_model("levenshtein_transformer") +class LevenshteinTransformerModel(FairseqNATModel): + @property + def allow_length_beam(self): + return False + + @staticmethod + def add_args(parser): + FairseqNATModel.add_args(parser) + parser.add_argument( + "--early-exit", + default="6,6,6", + type=str, + help="number of decoder layers before word_del, mask_ins, word_ins", + ) + parser.add_argument( + "--no-share-discriminator", + action="store_true", + help="separate parameters for discriminator", + ) + parser.add_argument( + "--no-share-maskpredictor", + action="store_true", + help="separate parameters for mask-predictor", + ) + parser.add_argument( + "--share-discriminator-maskpredictor", + action="store_true", + help="share the parameters for both mask-predictor and discriminator", + ) + parser.add_argument( + "--sampling-for-deletion", + action="store_true", + help="instead of argmax, use sampling to predict the tokens", + ) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + decoder = LevenshteinTransformerDecoder(args, tgt_dict, embed_tokens) + if getattr(args, "apply_bert_init", False): + decoder.apply(init_bert_params) + return decoder + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + + assert tgt_tokens is not None, "forward function only supports training." + + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + # generate training labels for insertion + masked_tgt_masks, masked_tgt_tokens, mask_ins_targets = _get_ins_targets( + prev_output_tokens, tgt_tokens, self.pad, self.unk + ) + mask_ins_targets = mask_ins_targets.clamp(min=0, max=255) # for safe prediction + mask_ins_masks = prev_output_tokens[:, 1:].ne(self.pad) + + mask_ins_out, _ = self.decoder.forward_mask_ins( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + word_ins_out, _ = self.decoder.forward_word_ins( + normalize=False, + prev_output_tokens=masked_tgt_tokens, + encoder_out=encoder_out, + ) + + # make online prediction + if self.decoder.sampling_for_deletion: + word_predictions = torch.multinomial( + F.softmax(word_ins_out, -1).view(-1, word_ins_out.size(-1)), 1 + ).view(word_ins_out.size(0), -1) + else: + word_predictions = F.log_softmax(word_ins_out, dim=-1).max(2)[1] + + word_predictions.masked_scatter_( + ~masked_tgt_masks, tgt_tokens[~masked_tgt_masks] + ) + + # generate training labels for deletion + word_del_targets = _get_del_targets(word_predictions, tgt_tokens, self.pad) + word_del_out, _ = self.decoder.forward_word_del( + normalize=False, + prev_output_tokens=word_predictions, + encoder_out=encoder_out, + ) + word_del_masks = word_predictions.ne(self.pad) + + return { + "mask_ins": { + "out": mask_ins_out, + "tgt": mask_ins_targets, + "mask": mask_ins_masks, + "ls": 0.01, + }, + "word_ins": { + "out": word_ins_out, + "tgt": tgt_tokens, + "mask": masked_tgt_masks, + "ls": self.args.label_smoothing, + "nll_loss": True, + }, + "word_del": { + "out": word_del_out, + "tgt": word_del_targets, + "mask": word_del_masks, + }, + } + + def forward_decoder( + self, decoder_out, encoder_out, eos_penalty=0.0, max_ratio=None, **kwargs + ): + + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + attn = decoder_out.attn + history = decoder_out.history + + bsz = output_tokens.size(0) + if max_ratio is None: + max_lens = torch.zeros_like(output_tokens).fill_(255) + else: + if not encoder_out["encoder_padding_mask"]: + max_src_len = encoder_out["encoder_out"].size(0) + src_lens = encoder_out["encoder_out"].new(bsz).fill_(max_src_len) + else: + src_lens = (~encoder_out["encoder_padding_mask"][0]).sum(1) + max_lens = (src_lens * max_ratio).clamp(min=10).long() + + # delete words + # do not delete tokens if it is <s> </s> + can_del_word = output_tokens.ne(self.pad).sum(1) > 2 + if can_del_word.sum() != 0: # we cannot delete, skip + word_del_score, word_del_attn = self.decoder.forward_word_del( + normalize=True, + prev_output_tokens=_skip(output_tokens, can_del_word), + encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_del_word), + ) + word_del_pred = word_del_score.max(-1)[1].bool() + + _tokens, _scores, _attn = _apply_del_words( + output_tokens[can_del_word], + output_scores[can_del_word], + word_del_attn, + word_del_pred, + self.pad, + self.bos, + self.eos, + ) + output_tokens = _fill(output_tokens, can_del_word, _tokens, self.pad) + output_scores = _fill(output_scores, can_del_word, _scores, 0) + attn = _fill(attn, can_del_word, _attn, 0.0) + + if history is not None: + history.append(output_tokens.clone()) + + # insert placeholders + can_ins_mask = output_tokens.ne(self.pad).sum(1) < max_lens + if can_ins_mask.sum() != 0: + mask_ins_score, _ = self.decoder.forward_mask_ins( + normalize=True, + prev_output_tokens=_skip(output_tokens, can_ins_mask), + encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_ins_mask), + ) + if eos_penalty > 0.0: + mask_ins_score[:, :, 0] = mask_ins_score[:, :, 0] - eos_penalty + mask_ins_pred = mask_ins_score.max(-1)[1] + mask_ins_pred = torch.min( + mask_ins_pred, max_lens[can_ins_mask, None].expand_as(mask_ins_pred) + ) + + _tokens, _scores = _apply_ins_masks( + output_tokens[can_ins_mask], + output_scores[can_ins_mask], + mask_ins_pred, + self.pad, + self.unk, + self.eos, + ) + output_tokens = _fill(output_tokens, can_ins_mask, _tokens, self.pad) + output_scores = _fill(output_scores, can_ins_mask, _scores, 0) + + if history is not None: + history.append(output_tokens.clone()) + + # insert words + can_ins_word = output_tokens.eq(self.unk).sum(1) > 0 + if can_ins_word.sum() != 0: + word_ins_score, word_ins_attn = self.decoder.forward_word_ins( + normalize=True, + prev_output_tokens=_skip(output_tokens, can_ins_word), + encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_ins_word), + ) + word_ins_score, word_ins_pred = word_ins_score.max(-1) + _tokens, _scores = _apply_ins_words( + output_tokens[can_ins_word], + output_scores[can_ins_word], + word_ins_pred, + word_ins_score, + self.unk, + ) + + output_tokens = _fill(output_tokens, can_ins_word, _tokens, self.pad) + output_scores = _fill(output_scores, can_ins_word, _scores, 0) + attn = _fill(attn, can_ins_word, word_ins_attn, 0.0) + + if history is not None: + history.append(output_tokens.clone()) + + # delete some unnecessary paddings + cut_off = output_tokens.ne(self.pad).sum(1).max() + output_tokens = output_tokens[:, :cut_off] + output_scores = output_scores[:, :cut_off] + attn = None if attn is None else attn[:, :cut_off, :] + + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=attn, + history=history, + ) + + def initialize_output_tokens(self, encoder_out, src_tokens): + initial_output_tokens = src_tokens.new_zeros(src_tokens.size(0), 2) + initial_output_tokens[:, 0] = self.bos + initial_output_tokens[:, 1] = self.eos + + initial_output_scores = initial_output_tokens.new_zeros( + *initial_output_tokens.size() + ).type_as(encoder_out["encoder_out"][0]) + + return DecoderOut( + output_tokens=initial_output_tokens, + output_scores=initial_output_scores, + attn=None, + step=0, + max_step=0, + history=None, + ) + + +class LevenshteinTransformerDecoder(FairseqNATDecoder): + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn + ) + self.dictionary = dictionary + self.bos = dictionary.bos() + self.unk = dictionary.unk() + self.eos = dictionary.eos() + self.sampling_for_deletion = getattr(args, "sampling_for_deletion", False) + self.embed_mask_ins = Embedding(256, self.output_embed_dim * 2, None) + self.embed_word_del = Embedding(2, self.output_embed_dim, None) + + # del_word, ins_mask, ins_word + self.early_exit = [int(i) for i in args.early_exit.split(",")] + assert len(self.early_exit) == 3 + + # copy layers for mask-predict/deletion + self.layers_msk = None + if getattr(args, "no_share_maskpredictor", False): + self.layers_msk = nn.ModuleList( + [ + TransformerDecoderLayer(args, no_encoder_attn) + for _ in range(self.early_exit[1]) + ] + ) + self.layers_del = None + if getattr(args, "no_share_discriminator", False): + self.layers_del = nn.ModuleList( + [ + TransformerDecoderLayer(args, no_encoder_attn) + for _ in range(self.early_exit[0]) + ] + ) + + if getattr(args, "share_discriminator_maskpredictor", False): + assert getattr( + args, "no_share_discriminator", False + ), "must set saperate discriminator" + self.layers_msk = self.layers_del + + def extract_features( + self, + prev_output_tokens, + encoder_out=None, + early_exit=None, + layers=None, + **unused + ): + """ + Similar to *forward* but only return features. + Inputs: + prev_output_tokens: Tensor(B, T) + encoder_out: a dictionary of hidden states and masks + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + the LevenshteinTransformer decoder has full-attention to all generated tokens + """ + # embed positions + positions = ( + self.embed_positions(prev_output_tokens) + if self.embed_positions is not None + else None + ) + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + inner_states = [x] + + # decoder layers + decoder_padding_mask = prev_output_tokens.eq(self.padding_idx) + layers = self.layers if layers is None else layers + early_exit = len(layers) if early_exit is None else early_exit + for _, layer in enumerate(layers[:early_exit]): + x, attn, _ = layer( + x, + encoder_out["encoder_out"][0] + if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0) + else None, + encoder_out["encoder_padding_mask"][0] + if ( + encoder_out is not None + and len(encoder_out["encoder_padding_mask"]) > 0 + ) + else None, + self_attn_mask=None, + self_attn_padding_mask=decoder_padding_mask, + ) + inner_states.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": attn, "inner_states": inner_states} + + @ensemble_decoder + def forward_mask_ins(self, normalize, encoder_out, prev_output_tokens, **unused): + features, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + early_exit=self.early_exit[1], + layers=self.layers_msk, + **unused + ) + features_cat = torch.cat([features[:, :-1, :], features[:, 1:, :]], 2) + decoder_out = F.linear(features_cat, self.embed_mask_ins.weight) + if normalize: + return F.log_softmax(decoder_out, -1), extra["attn"] + return decoder_out, extra["attn"] + + @ensemble_decoder + def forward_word_ins(self, normalize, encoder_out, prev_output_tokens, **unused): + features, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + early_exit=self.early_exit[2], + layers=self.layers, + **unused + ) + decoder_out = self.output_layer(features) + if normalize: + return F.log_softmax(decoder_out, -1), extra["attn"] + return decoder_out, extra["attn"] + + @ensemble_decoder + def forward_word_del(self, normalize, encoder_out, prev_output_tokens, **unused): + features, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + early_exit=self.early_exit[0], + layers=self.layers_del, + **unused + ) + decoder_out = F.linear(features, self.embed_word_del.weight) + if normalize: + return F.log_softmax(decoder_out, -1), extra["attn"] + return decoder_out, extra["attn"] + + +@register_model_architecture("levenshtein_transformer", "levenshtein_transformer") +def levenshtein_base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.sampling_for_deletion = getattr(args, "sampling_for_deletion", False) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.early_exit = getattr(args, "early_exit", "6,6,6") + args.no_share_discriminator = getattr(args, "no_share_discriminator", False) + args.no_share_maskpredictor = getattr(args, "no_share_maskpredictor", False) + args.share_discriminator_maskpredictor = getattr( + args, "share_discriminator_maskpredictor", False + ) + args.no_share_last_layer = getattr(args, "no_share_last_layer", False) + + +@register_model_architecture( + "levenshtein_transformer", "levenshtein_transformer_wmt_en_de" +) +def levenshtein_transformer_wmt_en_de(args): + levenshtein_base_architecture(args) + + +# similar parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017) +@register_model_architecture( + "levenshtein_transformer", "levenshtein_transformer_vaswani_wmt_en_de_big" +) +def levenshtein_transformer_vaswani_wmt_en_de_big(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.3) + levenshtein_base_architecture(args) + + +# default parameters used in tensor2tensor implementation +@register_model_architecture( + "levenshtein_transformer", "levenshtein_transformer_wmt_en_de_big" +) +def levenshtein_transformer_wmt_en_de_big_t2t(args): + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_dropout = getattr(args, "activation_dropout", 0.1) + levenshtein_transformer_vaswani_wmt_en_de_big(args) diff --git a/fairseq/fairseq/models/nat/levenshtein_utils.py b/fairseq/fairseq/models/nat/levenshtein_utils.py new file mode 100644 index 0000000..375a98c --- /dev/null +++ b/fairseq/fairseq/models/nat/levenshtein_utils.py @@ -0,0 +1,293 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq.utils import new_arange + + +# -------------- Helper Functions --------------------------------------------------- # + + +def load_libnat(): + try: + from fairseq import libnat_cuda + + return libnat_cuda, True + + except ImportError as e: + print(str(e) + "... fall back to CPU version") + + try: + from fairseq import libnat + + return libnat, False + + except ImportError as e: + import sys + + sys.stderr.write( + "ERROR: missing libnat_cuda. run `python setup.py build_ext --inplace`\n" + ) + raise e + + +def _get_ins_targets(in_tokens, out_tokens, padding_idx, unk_idx): + libnat, use_cuda = load_libnat() + + def _get_ins_targets_cuda(in_tokens, out_tokens, padding_idx, unk_idx): + in_masks = in_tokens.ne(padding_idx) + out_masks = out_tokens.ne(padding_idx) + mask_ins_targets, masked_tgt_masks = libnat.generate_insertion_labels( + out_tokens.int(), + libnat.levenshtein_distance( + in_tokens.int(), + out_tokens.int(), + in_masks.sum(1).int(), + out_masks.sum(1).int(), + ), + ) + masked_tgt_masks = masked_tgt_masks.bool() & out_masks + mask_ins_targets = mask_ins_targets.type_as(in_tokens)[ + :, 1 : in_masks.size(1) + ].masked_fill_(~in_masks[:, 1:], 0) + masked_tgt_tokens = out_tokens.masked_fill(masked_tgt_masks, unk_idx) + return masked_tgt_masks, masked_tgt_tokens, mask_ins_targets + + def _get_ins_targets_cpu(in_tokens, out_tokens, padding_idx, unk_idx): + in_seq_len, out_seq_len = in_tokens.size(1), out_tokens.size(1) + + in_tokens_list = [ + [t for t in s if t != padding_idx] for i, s in enumerate(in_tokens.tolist()) + ] + out_tokens_list = [ + [t for t in s if t != padding_idx] + for i, s in enumerate(out_tokens.tolist()) + ] + + full_labels = libnat.suggested_ed2_path( + in_tokens_list, out_tokens_list, padding_idx + ) + mask_inputs = [ + [len(c) if c[0] != padding_idx else 0 for c in a[:-1]] for a in full_labels + ] + + # generate labels + masked_tgt_masks = [] + for mask_input in mask_inputs: + mask_label = [] + for beam_size in mask_input[1:-1]: # HACK 1:-1 + mask_label += [0] + [1 for _ in range(beam_size)] + masked_tgt_masks.append( + mask_label + [0 for _ in range(out_seq_len - len(mask_label))] + ) + mask_ins_targets = [ + mask_input[1:-1] + + [0 for _ in range(in_seq_len - 1 - len(mask_input[1:-1]))] + for mask_input in mask_inputs + ] + + # transform to tensor + masked_tgt_masks = torch.tensor( + masked_tgt_masks, device=out_tokens.device + ).bool() + mask_ins_targets = torch.tensor(mask_ins_targets, device=in_tokens.device) + masked_tgt_tokens = out_tokens.masked_fill(masked_tgt_masks, unk_idx) + return masked_tgt_masks, masked_tgt_tokens, mask_ins_targets + + if use_cuda: + return _get_ins_targets_cuda(in_tokens, out_tokens, padding_idx, unk_idx) + return _get_ins_targets_cpu(in_tokens, out_tokens, padding_idx, unk_idx) + + +def _get_del_targets(in_tokens, out_tokens, padding_idx): + libnat, use_cuda = load_libnat() + + def _get_del_targets_cuda(in_tokens, out_tokens, padding_idx): + in_masks = in_tokens.ne(padding_idx) + out_masks = out_tokens.ne(padding_idx) + + word_del_targets = libnat.generate_deletion_labels( + in_tokens.int(), + libnat.levenshtein_distance( + in_tokens.int(), + out_tokens.int(), + in_masks.sum(1).int(), + out_masks.sum(1).int(), + ), + ) + word_del_targets = word_del_targets.type_as(in_tokens).masked_fill_( + ~in_masks, 0 + ) + return word_del_targets + + def _get_del_targets_cpu(in_tokens, out_tokens, padding_idx): + out_seq_len = out_tokens.size(1) + with torch.cuda.device_of(in_tokens): + in_tokens_list = [ + [t for t in s if t != padding_idx] + for i, s in enumerate(in_tokens.tolist()) + ] + out_tokens_list = [ + [t for t in s if t != padding_idx] + for i, s in enumerate(out_tokens.tolist()) + ] + + full_labels = libnat.suggested_ed2_path( + in_tokens_list, out_tokens_list, padding_idx + ) + word_del_targets = [b[-1] for b in full_labels] + word_del_targets = [ + labels + [0 for _ in range(out_seq_len - len(labels))] + for labels in word_del_targets + ] + + # transform to tensor + word_del_targets = torch.tensor(word_del_targets, device=out_tokens.device) + return word_del_targets + + if use_cuda: + return _get_del_targets_cuda(in_tokens, out_tokens, padding_idx) + return _get_del_targets_cpu(in_tokens, out_tokens, padding_idx) + + +def _apply_ins_masks( + in_tokens, in_scores, mask_ins_pred, padding_idx, unk_idx, eos_idx +): + + in_masks = in_tokens.ne(padding_idx) + in_lengths = in_masks.sum(1) + + # HACK: hacky way to shift all the paddings to eos first. + in_tokens.masked_fill_(~in_masks, eos_idx) + mask_ins_pred.masked_fill_(~in_masks[:, 1:], 0) + + out_lengths = in_lengths + mask_ins_pred.sum(1) + out_max_len = out_lengths.max() + out_masks = new_arange(out_lengths, out_max_len)[None, :] < out_lengths[:, None] + + reordering = (mask_ins_pred + in_masks[:, 1:].long()).cumsum(1) + out_tokens = ( + in_tokens.new_zeros(in_tokens.size(0), out_max_len) + .fill_(padding_idx) + .masked_fill_(out_masks, unk_idx) + ) + out_tokens[:, 0] = in_tokens[:, 0] + out_tokens.scatter_(1, reordering, in_tokens[:, 1:]) + + out_scores = None + if in_scores is not None: + in_scores.masked_fill_(~in_masks, 0) + out_scores = in_scores.new_zeros(*out_tokens.size()) + out_scores[:, 0] = in_scores[:, 0] + out_scores.scatter_(1, reordering, in_scores[:, 1:]) + + return out_tokens, out_scores + + +def _apply_ins_words(in_tokens, in_scores, word_ins_pred, word_ins_scores, unk_idx): + word_ins_masks = in_tokens.eq(unk_idx) + out_tokens = in_tokens.masked_scatter(word_ins_masks, word_ins_pred[word_ins_masks]) + + if in_scores is not None: + out_scores = in_scores.masked_scatter( + word_ins_masks, word_ins_scores[word_ins_masks] + ) + else: + out_scores = None + + return out_tokens, out_scores + + +def _apply_del_words( + in_tokens, in_scores, in_attn, word_del_pred, padding_idx, bos_idx, eos_idx +): + # apply deletion to a tensor + in_masks = in_tokens.ne(padding_idx) + bos_eos_masks = in_tokens.eq(bos_idx) | in_tokens.eq(eos_idx) + + max_len = in_tokens.size(1) + word_del_pred.masked_fill_(~in_masks, 1) + word_del_pred.masked_fill_(bos_eos_masks, 0) + + reordering = new_arange(in_tokens).masked_fill_(word_del_pred, max_len).sort(1)[1] + + out_tokens = in_tokens.masked_fill(word_del_pred, padding_idx).gather(1, reordering) + + out_scores = None + if in_scores is not None: + out_scores = in_scores.masked_fill(word_del_pred, 0).gather(1, reordering) + + out_attn = None + if in_attn is not None: + _mask = word_del_pred[:, :, None].expand_as(in_attn) + _reordering = reordering[:, :, None].expand_as(in_attn) + out_attn = in_attn.masked_fill(_mask, 0.0).gather(1, _reordering) + + return out_tokens, out_scores, out_attn + + +def _skip(x, mask): + """ + Getting sliced (dim=0) tensor by mask. Supporting tensor and list/dict of tensors. + """ + if isinstance(x, int): + return x + + if x is None: + return None + + if isinstance(x, torch.Tensor): + if x.size(0) == mask.size(0): + return x[mask] + elif x.size(1) == mask.size(0): + return x[:, mask] + + if isinstance(x, list): + return [_skip(x_i, mask) for x_i in x] + + if isinstance(x, dict): + return {k: _skip(v, mask) for k, v in x.items()} + + raise NotImplementedError + + +def _skip_encoder_out(encoder, encoder_out, mask): + if not mask.any(): + return encoder_out + else: + return encoder.reorder_encoder_out( + encoder_out, mask.nonzero(as_tuple=False).squeeze() + ) + + +def _fill(x, mask, y, padding_idx): + """ + Filling tensor x with y at masked positions (dim=0). + """ + if x is None: + return y + assert x.dim() == y.dim() and mask.size(0) == x.size(0) + assert x.dim() == 2 or (x.dim() == 3 and x.size(2) == y.size(2)) + n_selected = mask.sum() + assert n_selected == y.size(0) + + if n_selected == x.size(0): + return y + + if x.size(1) < y.size(1): + dims = [x.size(0), y.size(1) - x.size(1)] + if x.dim() == 3: + dims.append(x.size(2)) + x = torch.cat([x, x.new_zeros(*dims).fill_(padding_idx)], 1) + x[mask] = y + elif x.size(1) > y.size(1): + x[mask] = padding_idx + if x.dim() == 2: + x[mask, : y.size(1)] = y + else: + x[mask, : y.size(1), :] = y + else: + x[mask] = y + return x diff --git a/fairseq/fairseq/models/nat/nat_crf_transformer.py b/fairseq/fairseq/models/nat/nat_crf_transformer.py new file mode 100644 index 0000000..d4b3cd9 --- /dev/null +++ b/fairseq/fairseq/models/nat/nat_crf_transformer.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import NATransformerModel, base_architecture +from fairseq.modules import DynamicCRF + + +@register_model("nacrf_transformer") +class NACRFTransformerModel(NATransformerModel): + def __init__(self, args, encoder, decoder): + super().__init__(args, encoder, decoder) + self.crf_layer = DynamicCRF( + num_embedding=len(self.tgt_dict), + low_rank=args.crf_lowrank_approx, + beam_size=args.crf_beam_approx, + ) + + @property + def allow_ensemble(self): + return False + + @staticmethod + def add_args(parser): + NATransformerModel.add_args(parser) + parser.add_argument( + "--crf-lowrank-approx", + type=int, + help="the dimension of low-rank approximation of transition", + ) + parser.add_argument( + "--crf-beam-approx", + type=int, + help="the beam size for apporixmating the normalizing factor", + ) + parser.add_argument( + "--word-ins-loss-factor", + type=float, + help="weights on NAT loss used to co-training with CRF loss.", + ) + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + # length prediction + length_out = self.decoder.forward_length( + normalize=False, encoder_out=encoder_out + ) + length_tgt = self.decoder.forward_length_prediction( + length_out, encoder_out, tgt_tokens + ) + + # decoding + word_ins_out = self.decoder( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + word_ins_tgt, word_ins_mask = tgt_tokens, tgt_tokens.ne(self.pad) + + # compute the log-likelihood of CRF + crf_nll = -self.crf_layer(word_ins_out, word_ins_tgt, word_ins_mask) + crf_nll = (crf_nll / word_ins_mask.type_as(crf_nll).sum(-1)).mean() + + return { + "word_ins": { + "out": word_ins_out, + "tgt": word_ins_tgt, + "mask": word_ins_mask, + "ls": self.args.label_smoothing, + "nll_loss": True, + "factor": self.args.word_ins_loss_factor, + }, + "word_crf": {"loss": crf_nll}, + "length": { + "out": length_out, + "tgt": length_tgt, + "factor": self.decoder.length_loss_factor, + }, + } + + def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs): + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + history = decoder_out.history + + # execute the decoder and get emission scores + output_masks = output_tokens.ne(self.pad) + word_ins_out = self.decoder( + normalize=False, prev_output_tokens=output_tokens, encoder_out=encoder_out + ) + + # run viterbi decoding through CRF + _scores, _tokens = self.crf_layer.forward_decoder(word_ins_out, output_masks) + output_tokens.masked_scatter_(output_masks, _tokens[output_masks]) + output_scores.masked_scatter_(output_masks, _scores[output_masks]) + if history is not None: + history.append(output_tokens.clone()) + + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=None, + history=history, + ) + + +@register_model_architecture("nacrf_transformer", "nacrf_transformer") +def nacrf_base_architecture(args): + args.crf_lowrank_approx = getattr(args, "crf_lowrank_approx", 32) + args.crf_beam_approx = getattr(args, "crf_beam_approx", 64) + args.word_ins_loss_factor = getattr(args, "word_ins_loss_factor", 0.5) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + base_architecture(args) diff --git a/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py b/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py new file mode 100644 index 0000000..0a0221f --- /dev/null +++ b/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py @@ -0,0 +1,254 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.nn.functional as F +from fairseq.models.nat import ( + _apply_del_words, + _apply_ins_masks, + _apply_ins_words, + _fill, + _skip, + _skip_encoder_out, +) + + +class _EnsembleModelEncoder(object): + def __init__(self, models): + self.models = models + + def reorder_encoder_out(self, encoder_outs, new_order): + encoder_outs = [ + model.encoder.reorder_encoder_out(encoder_out, new_order) + for model, encoder_out in zip(self.models, encoder_outs) + ] + return encoder_outs + + +class BasicEnsembleModel(torch.nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models = torch.nn.ModuleList(models) + self.bos = self.models[0].decoder.dictionary.bos() + self.eos = self.models[0].decoder.dictionary.eos() + self.pad = self.models[0].decoder.dictionary.pad() + self.unk = self.models[0].decoder.dictionary.unk() + self.encoder = _EnsembleModelEncoder(self.models) + + def has_encoder(self): + return hasattr(self.models[0], "encoder") + + def max_decoder_positions(self): + return min(m.max_decoder_positions() for m in self.models) + + @torch.no_grad() + def forward_encoder(self, encoder_input): + if not self.has_encoder(): + return None + return [model.forward_encoder(encoder_input) for model in self.models] + + @torch.no_grad() + def forward_decoder(self, *inputs): + raise NotImplementedError + + def initialize_output_tokens(self, *inputs): + raise NotImplementedError + + +class EnsembleLevT(BasicEnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + @torch.no_grad() + def forward_decoder( + self, decoder_out, encoder_outs, eos_penalty=0.0, max_ratio=None, **kwargs + ): + # LevT ensembling + # A pipeline of three steps: deletion, placeholder, and word insertion. + # We need to average scores in each step in a pipeline way because of dependence. + # deletion + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + attn = decoder_out.attn + + bsz = output_tokens.size(0) + if max_ratio is None: + max_lens = output_tokens.new().fill_(255) + else: + if not encoder_outs[0]["encoder_padding_mask"]: + src_lens = ( + encoder_outs[0]["encoder_out"][0] + .new(bsz) + .fill_(encoder_outs[0]["encoder_out"][0].size(1)) + ) + else: + src_lens = (~encoder_outs[0]["encoder_padding_mask"][0]).sum(1) + max_lens = (src_lens * max_ratio).clamp(min=10).long() + + # delete words + # do not delete tokens if it is <s> </s> + can_del_word = output_tokens.ne(self.pad).sum(1) > 2 + if can_del_word.sum() != 0: # we cannot delete, skip + output_tokens, output_scores, attn = self.forward_word_del( + encoder_outs, + output_tokens, + output_scores, + attn, + can_del_word, + ) + + # insert placeholders + can_ins_mask = output_tokens.ne(self.pad).sum(1) < max_lens + if can_ins_mask.sum() != 0: + output_tokens, output_scores = self.forward_mask_ins( + encoder_outs, + output_tokens, + output_scores, + can_ins_mask, + eos_penalty, + max_lens, + ) + + # insert words + can_ins_word = output_tokens.eq(self.unk).sum(1) > 0 + if can_ins_word.sum() != 0: + output_tokens, output_scores, attn = self.forward_word_ins( + encoder_outs, + output_tokens, + output_scores, + attn, + can_ins_word, + ) + + # delete some unnecessary paddings + cut_off = output_tokens.ne(self.pad).sum(1).max() + output_tokens = output_tokens[:, :cut_off] + output_scores = output_scores[:, :cut_off] + attn = None if attn is None else attn[:, :cut_off, :] + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=attn, + history=None, + ) + + def forward_word_del( + self, encoder_outs, output_tokens, output_scores, attn, can_del_word + ): + word_del_score_avg = [] + word_del_attn_avg = [] + for model, encoder_out in zip(self.models, encoder_outs): + word_del_out, word_del_attn = model.decoder.forward_word_del( + _skip(output_tokens, can_del_word), + _skip_encoder_out(model.encoder, encoder_out, can_del_word), + ) + word_del_score = F.log_softmax(word_del_out, 2) + word_del_score_avg.append(word_del_score) + word_del_attn_avg.append(word_del_attn) + word_del_score_avg = torch.logsumexp( + torch.stack(word_del_score_avg, dim=0), dim=0 + ) - math.log(len(self.models)) + word_del_pred = word_del_score_avg.max(-1)[1].bool() + if word_del_attn_avg[0] is not None: + word_del_attn_avg = torch.stack(word_del_attn_avg, dim=0) / len(self.models) + else: + word_del_attn_avg = None + + _tokens, _scores, _attn = _apply_del_words( + output_tokens[can_del_word], + output_scores[can_del_word], + word_del_attn_avg, + word_del_pred, + self.pad, + self.bos, + self.eos, + ) + output_tokens = _fill(output_tokens, can_del_word, _tokens, self.pad) + output_scores = _fill(output_scores, can_del_word, _scores, 0) + attn = _fill(attn, can_del_word, _attn, 0.0) + return output_tokens, output_scores, attn + + def forward_mask_ins( + self, + encoder_outs, + output_tokens, + output_scores, + can_ins_mask, + eos_penalty, + max_lens, + ): + mask_ins_score_avg = [] + for model, encoder_out in zip(self.models, encoder_outs): + mask_ins_out, _ = model.decoder.forward_mask_ins( + _skip(output_tokens, can_ins_mask), + _skip_encoder_out(model.encoder, encoder_out, can_ins_mask), + ) + mask_ins_score = F.log_softmax(mask_ins_out, 2) + if eos_penalty > 0.0: + mask_ins_score[:, :, 0] -= eos_penalty + mask_ins_score_avg.append(mask_ins_score) + mask_ins_score_avg = torch.logsumexp( + torch.stack(mask_ins_score_avg, dim=0), dim=0 + ) - math.log(len(self.models)) + mask_ins_pred = mask_ins_score_avg.max(-1)[1] + mask_ins_pred = torch.min( + mask_ins_pred, max_lens[can_ins_mask, None].expand_as(mask_ins_pred) + ) + _tokens, _scores = _apply_ins_masks( + output_tokens[can_ins_mask], + output_scores[can_ins_mask], + mask_ins_pred, + self.pad, + self.unk, + self.eos, + ) + output_tokens = _fill(output_tokens, can_ins_mask, _tokens, self.pad) + output_scores = _fill(output_scores, can_ins_mask, _scores, 0) + return output_tokens, output_scores + + def forward_word_ins( + self, encoder_outs, output_tokens, output_scores, attn, can_ins_word + ): + word_ins_score_avg = [] + word_ins_attn_avg = [] + for model, encoder_out in zip(self.models, encoder_outs): + word_ins_out, word_ins_attn = model.decoder.forward_word_ins( + _skip(output_tokens, can_ins_word), + _skip_encoder_out(model.encoder, encoder_out, can_ins_word), + ) + word_ins_score = F.log_softmax(word_ins_out, 2) + word_ins_score_avg.append(word_ins_score) + word_ins_attn_avg.append(word_ins_attn) + word_ins_score_avg = torch.logsumexp( + torch.stack(word_ins_score_avg, dim=0), dim=0 + ) - math.log(len(self.models)) + if word_ins_attn_avg[0] is not None: + word_ins_attn_avg = torch.stack(word_ins_attn_avg, dim=0) / len(self.models) + else: + word_ins_attn_avg = None + word_ins_score_max, word_ins_pred = word_ins_score_avg.max(-1) + + _tokens, _scores = _apply_ins_words( + output_tokens[can_ins_word], + output_scores[can_ins_word], + word_ins_pred, + word_ins_score_max, + self.unk, + ) + + output_tokens = _fill(output_tokens, can_ins_word, _tokens, self.pad) + output_scores = _fill(output_scores, can_ins_word, _scores, 0) + attn = _fill(attn, can_ins_word, word_ins_attn, 0.0) + return output_tokens, output_scores, attn + + def initialize_output_tokens(self, encoder_outs, src_tokens): + # LevT doesn't do length prediction. + return self.models[0].initialize_output_tokens(encoder_outs[0], src_tokens) diff --git a/fairseq/fairseq/models/nat/nonautoregressive_transformer.py b/fairseq/fairseq/models/nat/nonautoregressive_transformer.py new file mode 100644 index 0000000..d114202 --- /dev/null +++ b/fairseq/fairseq/models/nat/nonautoregressive_transformer.py @@ -0,0 +1,456 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.iterative_refinement_generator import DecoderOut +from fairseq.models import register_model, register_model_architecture +from fairseq.models.nat import FairseqNATDecoder, FairseqNATModel, ensemble_decoder +from fairseq.models.transformer import Embedding +from fairseq.modules.transformer_sentence_encoder import init_bert_params + + +def _mean_pooling(enc_feats, src_masks): + # enc_feats: T x B x C + # src_masks: B x T or None + if src_masks is None: + enc_feats = enc_feats.mean(0) + else: + src_masks = (~src_masks).transpose(0, 1).type_as(enc_feats) + enc_feats = ( + (enc_feats / src_masks.sum(0)[None, :, None]) * src_masks[:, :, None] + ).sum(0) + return enc_feats + + +def _argmax(x, dim): + return (x == x.max(dim, keepdim=True)[0]).type_as(x) + + +def _uniform_assignment(src_lens, trg_lens): + max_trg_len = trg_lens.max() + steps = (src_lens.float() - 1) / (trg_lens.float() - 1) # step-size + # max_trg_len + index_t = utils.new_arange(trg_lens, max_trg_len).float() + index_t = steps[:, None] * index_t[None, :] # batch_size X max_trg_len + index_t = torch.round(index_t).long().detach() + return index_t + + +@register_model("nonautoregressive_transformer") +class NATransformerModel(FairseqNATModel): + @property + def allow_length_beam(self): + return True + + @staticmethod + def add_args(parser): + FairseqNATModel.add_args(parser) + + # length prediction + parser.add_argument( + "--src-embedding-copy", + action="store_true", + help="copy encoder word embeddings as the initial input of the decoder", + ) + parser.add_argument( + "--pred-length-offset", + action="store_true", + help="predicting the length difference between the target and source sentences", + ) + parser.add_argument( + "--sg-length-pred", + action="store_true", + help="stop the gradients back-propagated from the length predictor", + ) + parser.add_argument( + "--length-loss-factor", + type=float, + help="weights on the length prediction loss", + ) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + decoder = NATransformerDecoder(args, tgt_dict, embed_tokens) + if getattr(args, "apply_bert_init", False): + decoder.apply(init_bert_params) + return decoder + + def forward( + self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs + ): + # encoding + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + # length prediction + length_out = self.decoder.forward_length( + normalize=False, encoder_out=encoder_out + ) + length_tgt = self.decoder.forward_length_prediction( + length_out, encoder_out, tgt_tokens + ) + + # decoding + word_ins_out = self.decoder( + normalize=False, + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + + return { + "word_ins": { + "out": word_ins_out, + "tgt": tgt_tokens, + "mask": tgt_tokens.ne(self.pad), + "ls": self.args.label_smoothing, + "nll_loss": True, + }, + "length": { + "out": length_out, + "tgt": length_tgt, + "factor": self.decoder.length_loss_factor, + }, + } + + def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs): + step = decoder_out.step + output_tokens = decoder_out.output_tokens + output_scores = decoder_out.output_scores + history = decoder_out.history + + # execute the decoder + output_masks = output_tokens.ne(self.pad) + _scores, _tokens = self.decoder( + normalize=True, + prev_output_tokens=output_tokens, + encoder_out=encoder_out, + step=step, + ).max(-1) + + output_tokens.masked_scatter_(output_masks, _tokens[output_masks]) + output_scores.masked_scatter_(output_masks, _scores[output_masks]) + if history is not None: + history.append(output_tokens.clone()) + + return decoder_out._replace( + output_tokens=output_tokens, + output_scores=output_scores, + attn=None, + history=history, + ) + + def initialize_output_tokens(self, encoder_out, src_tokens): + # length prediction + length_tgt = self.decoder.forward_length_prediction( + self.decoder.forward_length(normalize=True, encoder_out=encoder_out), + encoder_out=encoder_out, + ) + + max_length = length_tgt.clamp_(min=2).max() + idx_length = utils.new_arange(src_tokens, max_length) + + initial_output_tokens = src_tokens.new_zeros( + src_tokens.size(0), max_length + ).fill_(self.pad) + initial_output_tokens.masked_fill_( + idx_length[None, :] < length_tgt[:, None], self.unk + ) + initial_output_tokens[:, 0] = self.bos + initial_output_tokens.scatter_(1, length_tgt[:, None] - 1, self.eos) + + initial_output_scores = initial_output_tokens.new_zeros( + *initial_output_tokens.size() + ).type_as(encoder_out["encoder_out"][0]) + + return DecoderOut( + output_tokens=initial_output_tokens, + output_scores=initial_output_scores, + attn=None, + step=0, + max_step=0, + history=None, + ) + + def regenerate_length_beam(self, decoder_out, beam_size): + output_tokens = decoder_out.output_tokens + length_tgt = output_tokens.ne(self.pad).sum(1) + length_tgt = ( + length_tgt[:, None] + + utils.new_arange(length_tgt, 1, beam_size) + - beam_size // 2 + ) + length_tgt = length_tgt.view(-1).clamp_(min=2) + max_length = length_tgt.max() + idx_length = utils.new_arange(length_tgt, max_length) + + initial_output_tokens = output_tokens.new_zeros( + length_tgt.size(0), max_length + ).fill_(self.pad) + initial_output_tokens.masked_fill_( + idx_length[None, :] < length_tgt[:, None], self.unk + ) + initial_output_tokens[:, 0] = self.bos + initial_output_tokens.scatter_(1, length_tgt[:, None] - 1, self.eos) + + initial_output_scores = initial_output_tokens.new_zeros( + *initial_output_tokens.size() + ).type_as(decoder_out.output_scores) + + return decoder_out._replace( + output_tokens=initial_output_tokens, output_scores=initial_output_scores + ) + + +class NATransformerDecoder(FairseqNATDecoder): + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn + ) + self.dictionary = dictionary + self.bos = dictionary.bos() + self.unk = dictionary.unk() + self.eos = dictionary.eos() + + self.encoder_embed_dim = args.encoder_embed_dim + self.sg_length_pred = getattr(args, "sg_length_pred", False) + self.pred_length_offset = getattr(args, "pred_length_offset", False) + self.length_loss_factor = getattr(args, "length_loss_factor", 0.1) + self.src_embedding_copy = getattr(args, "src_embedding_copy", False) + self.embed_length = Embedding(256, self.encoder_embed_dim, None) + + @ensemble_decoder + def forward(self, normalize, encoder_out, prev_output_tokens, step=0, **unused): + features, _ = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + embedding_copy=(step == 0) & self.src_embedding_copy, + ) + decoder_out = self.output_layer(features) + return F.log_softmax(decoder_out, -1) if normalize else decoder_out + + @ensemble_decoder + def forward_length(self, normalize, encoder_out): + enc_feats = encoder_out["encoder_out"][0] # T x B x C + if len(encoder_out["encoder_padding_mask"]) > 0: + src_masks = encoder_out["encoder_padding_mask"][0] # B x T + else: + src_masks = None + enc_feats = _mean_pooling(enc_feats, src_masks) + if self.sg_length_pred: + enc_feats = enc_feats.detach() + length_out = F.linear(enc_feats, self.embed_length.weight) + return F.log_softmax(length_out, -1) if normalize else length_out + + def extract_features( + self, + prev_output_tokens, + encoder_out=None, + early_exit=None, + embedding_copy=False, + **unused + ): + """ + Similar to *forward* but only return features. + + Inputs: + prev_output_tokens: Tensor(B, T) + encoder_out: a dictionary of hidden states and masks + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + the LevenshteinTransformer decoder has full-attention to all generated tokens + """ + # embedding + if embedding_copy: + src_embd = encoder_out["encoder_embedding"][0] + if len(encoder_out["encoder_padding_mask"]) > 0: + src_mask = encoder_out["encoder_padding_mask"][0] + else: + src_mask = None + src_mask = ( + ~src_mask + if src_mask is not None + else prev_output_tokens.new_ones(*src_embd.size()[:2]).bool() + ) + + x, decoder_padding_mask = self.forward_embedding( + prev_output_tokens, + self.forward_copying_source( + src_embd, src_mask, prev_output_tokens.ne(self.padding_idx) + ), + ) + + else: + + x, decoder_padding_mask = self.forward_embedding(prev_output_tokens) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + inner_states = [x] + + # decoder layers + for i, layer in enumerate(self.layers): + + # early exit from the decoder. + if (early_exit is not None) and (i >= early_exit): + break + + x, attn, _ = layer( + x, + encoder_out["encoder_out"][0] + if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0) + else None, + encoder_out["encoder_padding_mask"][0] + if ( + encoder_out is not None + and len(encoder_out["encoder_padding_mask"]) > 0 + ) + else None, + self_attn_mask=None, + self_attn_padding_mask=decoder_padding_mask, + ) + inner_states.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": attn, "inner_states": inner_states} + + def forward_embedding(self, prev_output_tokens, states=None): + # embed positions + positions = ( + self.embed_positions(prev_output_tokens) + if self.embed_positions is not None + else None + ) + + # embed tokens and positions + if states is None: + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + if self.project_in_dim is not None: + x = self.project_in_dim(x) + else: + x = states + + if positions is not None: + x += positions + x = self.dropout_module(x) + decoder_padding_mask = prev_output_tokens.eq(self.padding_idx) + return x, decoder_padding_mask + + def forward_copying_source(self, src_embeds, src_masks, tgt_masks): + length_sources = src_masks.sum(1) + length_targets = tgt_masks.sum(1) + mapped_inputs = _uniform_assignment(length_sources, length_targets).masked_fill( + ~tgt_masks, 0 + ) + copied_embedding = torch.gather( + src_embeds, + 1, + mapped_inputs.unsqueeze(-1).expand( + *mapped_inputs.size(), src_embeds.size(-1) + ), + ) + return copied_embedding + + def forward_length_prediction(self, length_out, encoder_out, tgt_tokens=None): + enc_feats = encoder_out["encoder_out"][0] # T x B x C + if len(encoder_out["encoder_padding_mask"]) > 0: + src_masks = encoder_out["encoder_padding_mask"][0] # B x T + else: + src_masks = None + if self.pred_length_offset: + if src_masks is None: + src_lengs = enc_feats.new_ones(enc_feats.size(1)).fill_( + enc_feats.size(0) + ) + else: + src_lengs = (~src_masks).transpose(0, 1).type_as(enc_feats).sum(0) + src_lengs = src_lengs.long() + + if tgt_tokens is not None: + # obtain the length target + tgt_lengs = tgt_tokens.ne(self.padding_idx).sum(1).long() + if self.pred_length_offset: + length_tgt = tgt_lengs - src_lengs + 128 + else: + length_tgt = tgt_lengs + length_tgt = length_tgt.clamp(min=0, max=255) + + else: + # predict the length target (greedy for now) + # TODO: implementing length-beam + pred_lengs = length_out.max(-1)[1] + if self.pred_length_offset: + length_tgt = pred_lengs - 128 + src_lengs + else: + length_tgt = pred_lengs + + return length_tgt + + +@register_model_architecture( + "nonautoregressive_transformer", "nonautoregressive_transformer" +) +def base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.apply_bert_init = getattr(args, "apply_bert_init", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + # --- special arguments --- + args.sg_length_pred = getattr(args, "sg_length_pred", False) + args.pred_length_offset = getattr(args, "pred_length_offset", False) + args.length_loss_factor = getattr(args, "length_loss_factor", 0.1) + args.src_embedding_copy = getattr(args, "src_embedding_copy", False) + + +@register_model_architecture( + "nonautoregressive_transformer", "nonautoregressive_transformer_wmt_en_de" +) +def nonautoregressive_transformer_wmt_en_de(args): + base_architecture(args) diff --git a/fairseq/fairseq/models/roberta/__init__.py b/fairseq/fairseq/models/roberta/__init__.py new file mode 100644 index 0000000..4cd723a --- /dev/null +++ b/fairseq/fairseq/models/roberta/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .hub_interface import * # noqa +from .model import * # noqa +from .enc_dec import * # noqa +from .model_camembert import * # noqa +from .model_gottbert import * # noqa +from .model_xlmr import * # noqa diff --git a/fairseq/fairseq/models/roberta/alignment_utils.py b/fairseq/fairseq/models/roberta/alignment_utils.py new file mode 100644 index 0000000..ccc7f74 --- /dev/null +++ b/fairseq/fairseq/models/roberta/alignment_utils.py @@ -0,0 +1,118 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import Counter +from typing import List + +import torch + + +def align_bpe_to_words(roberta, bpe_tokens: torch.LongTensor, other_tokens: List[str]): + """ + Helper to align GPT-2 BPE to other tokenization formats (e.g., spaCy). + + Args: + roberta (RobertaHubInterface): RoBERTa instance + bpe_tokens (torch.LongTensor): GPT-2 BPE tokens of shape `(T_bpe)` + other_tokens (List[str]): other tokens of shape `(T_words)` + + Returns: + List[str]: mapping from *other_tokens* to corresponding *bpe_tokens*. + """ + assert bpe_tokens.dim() == 1 + assert bpe_tokens[0] == 0 + + def clean(text): + return text.strip() + + # remove whitespaces to simplify alignment + bpe_tokens = [roberta.task.source_dictionary.string([x]) for x in bpe_tokens] + bpe_tokens = [ + clean(roberta.bpe.decode(x) if x not in {"<s>", ""} else x) for x in bpe_tokens + ] + other_tokens = [clean(str(o)) for o in other_tokens] + + # strip leading <s> + bpe_tokens = bpe_tokens[1:] + assert "".join(bpe_tokens) == "".join(other_tokens) + + # create alignment from every word to a list of BPE tokens + alignment = [] + bpe_toks = filter(lambda item: item[1] != "", enumerate(bpe_tokens, start=1)) + j, bpe_tok = next(bpe_toks) + for other_tok in other_tokens: + bpe_indices = [] + while True: + if other_tok.startswith(bpe_tok): + bpe_indices.append(j) + other_tok = other_tok[len(bpe_tok) :] + try: + j, bpe_tok = next(bpe_toks) + except StopIteration: + j, bpe_tok = None, None + elif bpe_tok.startswith(other_tok): + # other_tok spans multiple BPE tokens + bpe_indices.append(j) + bpe_tok = bpe_tok[len(other_tok) :] + other_tok = "" + else: + raise Exception('Cannot align "{}" and "{}"'.format(other_tok, bpe_tok)) + if other_tok == "": + break + assert len(bpe_indices) > 0 + alignment.append(bpe_indices) + assert len(alignment) == len(other_tokens) + + return alignment + + +def align_features_to_words(roberta, features, alignment): + """ + Align given features to words. + + Args: + roberta (RobertaHubInterface): RoBERTa instance + features (torch.Tensor): features to align of shape `(T_bpe x C)` + alignment: alignment between BPE tokens and words returned by + func:`align_bpe_to_words`. + """ + assert features.dim() == 2 + + bpe_counts = Counter(j for bpe_indices in alignment for j in bpe_indices) + assert bpe_counts[0] == 0 # <s> shouldn't be aligned + denom = features.new([bpe_counts.get(j, 1) for j in range(len(features))]) + weighted_features = features / denom.unsqueeze(-1) + + output = [weighted_features[0]] + largest_j = -1 + for bpe_indices in alignment: + output.append(weighted_features[bpe_indices].sum(dim=0)) + largest_j = max(largest_j, *bpe_indices) + for j in range(largest_j + 1, len(features)): + output.append(weighted_features[j]) + output = torch.stack(output) + assert torch.all(torch.abs(output.sum(dim=0) - features.sum(dim=0)) < 1e-4) + return output + + +def spacy_nlp(): + if getattr(spacy_nlp, "_nlp", None) is None: + try: + from spacy.lang.en import English + + spacy_nlp._nlp = English() + except ImportError: + raise ImportError("Please install spacy with: pip install spacy") + return spacy_nlp._nlp + + +def spacy_tokenizer(): + if getattr(spacy_tokenizer, "_tokenizer", None) is None: + try: + nlp = spacy_nlp() + spacy_tokenizer._tokenizer = nlp.Defaults.create_tokenizer(nlp) + except ImportError: + raise ImportError("Please install spacy with: pip install spacy") + return spacy_tokenizer._tokenizer diff --git a/fairseq/fairseq/models/roberta/enc_dec.py b/fairseq/fairseq/models/roberta/enc_dec.py new file mode 100644 index 0000000..e538dee --- /dev/null +++ b/fairseq/fairseq/models/roberta/enc_dec.py @@ -0,0 +1,192 @@ +import argparse +import logging + +import torch.nn as nn +import fairseq.checkpoint_utils +from fairseq.models import ( + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import TransformerDecoder +from fairseq.models.roberta import model as roberta + +logger = logging.getLogger(__name__) + + +@register_model("roberta_enc_dec") +class RobertaEncDecModel(FairseqEncoderDecoderModel): + @staticmethod + def add_args(parser): + parser.add_argument( + "--pretrained-mlm-checkpoint", + default=None, + type=str, + metavar="PRETRAINED", + help="path to pretrained mlm checkpoint", + ) + parser.add_argument( + "--pretrained-decoder", action="store_true", help="reload decoder" + ) + parser.add_argument( + "--hack-layernorm-embedding", + action="store_true", + help="hack to reload old models trained with encoder-normalize-before=False (no equivalent to encoder-normalize-before=False and layernorm_embedding=False", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--share-all-embeddings", + action="store_true", + help="share encoder, decoder and output embeddings" + " (requires shared dictionary and embed dim)", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present + base_enc_dec_architecture(args) + if args.pretrained_mlm_checkpoint: + arg_overrides = None + if args.hack_layernorm_embedding: + arg_overrides = {"layernorm_embedding": False} + loaded = fairseq.checkpoint_utils.load_model_ensemble_and_task( + [args.pretrained_mlm_checkpoint], arg_overrides=arg_overrides + ) + ([roberta_enc], _cfg, _task) = loaded + else: + # Do we need to edit untie_weights here ? + share_in_out = ( + args.share_decoder_input_output_embed or args.share_all_embeddings + ) + args.untie_weights_roberta = not share_in_out + if args.hack_layernorm_embedding: + args.layernorm_embedding = False + args.encoder_normalize_before = False + roberta_enc = roberta.RobertaModel.build_model(args, task) + + return cls.from_roberta(roberta_enc, args, task.source_dictionary) + + @staticmethod + def from_roberta(roberta_enc: roberta.RobertaModel, args, dictionary): + encoder = roberta_enc.encoder.sentence_encoder + vocab_size, embed_dim = encoder.embed_tokens.weight.shape + + if args.share_all_embeddings: + lm_head = roberta_enc.encoder.lm_head + assert encoder.embed_tokens.weight is lm_head.weight, ( + "Can't use --share-all-embeddings with a model " + "that was pretraiend with --untie-weights-roberta_enc" + ) + else: + lm_head = roberta.RobertaLMHead( + embed_dim, vocab_size, roberta_enc.args.activation_fn + ) + + dec_embs = nn.Embedding(vocab_size, embed_dim, dictionary.pad()) + if args.share_all_embeddings or args.share_decoder_input_output_embed: + # Note: I wasn't able to use Embedding _weight parameter to achive this sharing. + dec_embs.weight = lm_head.weight + + decoder = TransformerDecoder( + RobertaEncDecModel.read_args_from_roberta(roberta_enc.args), + dictionary, + dec_embs, + no_encoder_attn=False, + output_projection=lm_head, + ) + if getattr(args, "pretrained_decoder", False): + decoder_dict = encoder.state_dict() + + # TODO: hide setting "encoder_attn" layers behind a flag. + for k, w in list(decoder_dict.items()): + if ".self_attn" in k: + k_enc_attn = k.replace(".self_attn", ".encoder_attn") + decoder_dict[k_enc_attn] = w.detach().clone() + + for k, w in lm_head.state_dict().items(): + decoder_dict["output_projection." + k] = w + + missing_keys, unexpected_keys = decoder.load_state_dict( + decoder_dict, strict=False + ) + # missing_keys = [m for m in missing_keys if ".encoder_attn" not in m] + assert not missing_keys and not unexpected_keys, ( + "Failed to load state dict. " + f"Missing keys: {missing_keys}. " + f"Unexpected keys: {unexpected_keys}." + ) + + if args.share_all_embeddings: + assert decoder.output_projection.weight is decoder.embed_tokens.weight + assert encoder.embed_tokens.weight is decoder.embed_tokens.weight + elif args.share_decoder_input_output_embed: + assert decoder.output_projection.weight is decoder.embed_tokens.weight + assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight + else: + assert decoder.output_projection.weight is not decoder.embed_tokens.weight + assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight + + return RobertaEncDecModel(encoder, decoder) + + @staticmethod + def read_args_from_roberta(roberta_args: argparse.Namespace): + # TODO: this would become easier if encoder/decoder where using a similar + # TransformerConfig object + args = argparse.Namespace(**vars(roberta_args)) + attr_map = [ + ("encoder_attention_heads", "decoder_attention_heads"), + ("encoder_embed_dim", "decoder_embed_dim"), + ("encoder_embed_dim", "decoder_output_dim"), + ("encoder_normalize_before", "decoder_normalize_before"), + ("encoder_layers_to_keep", "decoder_layers_to_keep"), + ("encoder_ffn_embed_dim", "decoder_ffn_embed_dim"), + ("encoder_layerdrop", "decoder_layerdrop"), + ("encoder_layers", "decoder_layers"), + ("encoder_learned_pos", "decoder_learned_pos"), + # should this be set from here ? + ("max_positions", "max_target_positions"), + ] + for k1, k2 in attr_map: + setattr(args, k2, getattr(roberta_args, k1)) + + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = not roberta_args.untie_weights_roberta + return args + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + super().upgrade_state_dict_named(state_dict, name) + old_keys = list(state_dict.keys()) + + # rename decoder -> encoder before upgrading children modules + for k in old_keys: + if k.startswith(prefix + "encoder.lm_head"): + state_dict.pop(k) + continue + new_k = k + new_k = new_k.replace(".sentence_encoder.", ".") + new_k = new_k.replace("decoder.lm_head.", "decoder.output_projection.") + if k == new_k: + continue + # print(k, "->", new_k) + state_dict[new_k] = state_dict.pop(k) + + +@register_model_architecture("roberta_enc_dec", "roberta_enc_dec") +def base_enc_dec_architecture(args): + args.hack_layernorm_embedding = getattr(args, "hack_layernorm_embedding", False) + args.pretrained_mlm_checkpoint = getattr(args, "pretrained_mlm_checkpoint", None) + args.pretrained_decoder = getattr(args, "pretrained_decoder", None) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + + roberta.base_architecture(args) diff --git a/fairseq/fairseq/models/roberta/hub_interface.py b/fairseq/fairseq/models/roberta/hub_interface.py new file mode 100644 index 0000000..ba298d6 --- /dev/null +++ b/fairseq/fairseq/models/roberta/hub_interface.py @@ -0,0 +1,235 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.data import encoders + + +class RobertaHubInterface(nn.Module): + """A simple PyTorch Hub interface to RoBERTa. + + Usage: https://github.com/pytorch/fairseq/tree/main/examples/roberta + """ + + def __init__(self, cfg, task, model): + super().__init__() + self.cfg = cfg + self.task = task + self.model = model + + self.bpe = encoders.build_bpe(cfg.bpe) + + # this is useful for determining the device + self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float)) + + @property + def device(self): + return self._float_tensor.device + + def encode( + self, sentence: str, *addl_sentences, no_separator=False + ) -> torch.LongTensor: + """ + BPE-encode a sentence (or multiple sentences). + + Every sequence begins with a beginning-of-sentence (`<s>`) symbol. + Every sentence ends with an end-of-sentence (`</s>`) and we use an + extra end-of-sentence (`</s>`) as a separator. + + Example (single sentence): `<s> a b c </s>` + Example (sentence pair): `<s> d e f </s> </s> 1 2 3 </s>` + + The BPE encoding follows GPT-2. One subtle detail is that the GPT-2 BPE + requires leading spaces. For example:: + + >>> roberta.encode('Hello world').tolist() + [0, 31414, 232, 2] + >>> roberta.encode(' world').tolist() + [0, 232, 2] + >>> roberta.encode('world').tolist() + [0, 8331, 2] + """ + bpe_sentence = "<s> " + self.bpe.encode(sentence) + " </s>" + for s in addl_sentences: + bpe_sentence += " </s>" if not no_separator else "" + bpe_sentence += " " + self.bpe.encode(s) + " </s>" + tokens = self.task.source_dictionary.encode_line( + bpe_sentence, append_eos=False, add_if_not_exist=False + ) + return tokens.long() + + def decode(self, tokens: torch.LongTensor): + assert tokens.dim() == 1 + tokens = tokens.numpy() + if tokens[0] == self.task.source_dictionary.bos(): + tokens = tokens[1:] # remove <s> + eos_mask = tokens == self.task.source_dictionary.eos() + doc_mask = eos_mask[1:] & eos_mask[:-1] + sentences = np.split(tokens, doc_mask.nonzero()[0] + 1) + sentences = [ + self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences + ] + if len(sentences) == 1: + return sentences[0] + return sentences + + def extract_features( + self, tokens: torch.LongTensor, return_all_hiddens: bool = False + ) -> torch.Tensor: + if tokens.dim() == 1: + tokens = tokens.unsqueeze(0) + if tokens.size(-1) > self.model.max_positions(): + raise ValueError( + "tokens exceeds maximum length: {} > {}".format( + tokens.size(-1), self.model.max_positions() + ) + ) + features, extra = self.model( + tokens.to(device=self.device), + features_only=True, + return_all_hiddens=return_all_hiddens, + ) + if return_all_hiddens: + # convert from T x B x C -> B x T x C + inner_states = extra["inner_states"] + return [inner_state.transpose(0, 1) for inner_state in inner_states] + else: + return features # just the last layer's features + + def register_classification_head( + self, name: str, num_classes: int = None, embedding_size: int = None, **kwargs + ): + self.model.register_classification_head( + name, num_classes=num_classes, embedding_size=embedding_size, **kwargs + ) + + def predict(self, head: str, tokens: torch.LongTensor, return_logits: bool = False): + features = self.extract_features(tokens.to(device=self.device)) + logits = self.model.classification_heads[head](features) + if return_logits: + return logits + return F.log_softmax(logits, dim=-1) + + def extract_features_aligned_to_words( + self, sentence: str, return_all_hiddens: bool = False + ) -> torch.Tensor: + """Extract RoBERTa features, aligned to spaCy's word-level tokenizer.""" + from fairseq.models.roberta import alignment_utils + from spacy.tokens import Doc + + nlp = alignment_utils.spacy_nlp() + tokenizer = alignment_utils.spacy_tokenizer() + + # tokenize both with GPT-2 BPE and spaCy + bpe_toks = self.encode(sentence) + spacy_toks = tokenizer(sentence) + spacy_toks_ws = [t.text_with_ws for t in tokenizer(sentence)] + alignment = alignment_utils.align_bpe_to_words(self, bpe_toks, spacy_toks_ws) + + # extract features and align them + features = self.extract_features( + bpe_toks, return_all_hiddens=return_all_hiddens + ) + features = features.squeeze(0) + aligned_feats = alignment_utils.align_features_to_words( + self, features, alignment + ) + + # wrap in spaCy Doc + doc = Doc( + nlp.vocab, + words=["<s>"] + [x.text for x in spacy_toks] + ["</s>"], + spaces=[True] + + [x.endswith(" ") for x in spacy_toks_ws[:-1]] + + [True, False], + ) + assert len(doc) == aligned_feats.size(0) + doc.user_token_hooks["vector"] = lambda token: aligned_feats[token.i] + return doc + + def fill_mask(self, masked_input: str, topk: int = 5): + masked_token = "<mask>" + assert ( + masked_token in masked_input and masked_input.count(masked_token) == 1 + ), "Please add one {0} token for the input, eg: 'He is a {0} guy'".format( + masked_token + ) + + text_spans = masked_input.split(masked_token) + text_spans_bpe = ( + (" {0} ".format(masked_token)) + .join([self.bpe.encode(text_span.rstrip()) for text_span in text_spans]) + .strip() + ) + tokens = self.task.source_dictionary.encode_line( + "<s> " + text_spans_bpe + " </s>", + append_eos=False, + add_if_not_exist=False, + ) + + masked_index = (tokens == self.task.mask_idx).nonzero(as_tuple=False) + if tokens.dim() == 1: + tokens = tokens.unsqueeze(0) + + with utils.model_eval(self.model): + features, extra = self.model( + tokens.long().to(device=self.device), + features_only=False, + return_all_hiddens=False, + ) + logits = features[0, masked_index, :].squeeze() + prob = logits.softmax(dim=0) + values, index = prob.topk(k=topk, dim=0) + topk_predicted_token_bpe = self.task.source_dictionary.string(index) + + topk_filled_outputs = [] + for index, predicted_token_bpe in enumerate( + topk_predicted_token_bpe.split(" ") + ): + predicted_token = self.bpe.decode(predicted_token_bpe) + # Quick hack to fix https://github.com/pytorch/fairseq/issues/1306 + if predicted_token_bpe.startswith("\u2581"): + predicted_token = " " + predicted_token + if " {0}".format(masked_token) in masked_input: + topk_filled_outputs.append( + ( + masked_input.replace( + " {0}".format(masked_token), predicted_token + ), + values[index].item(), + predicted_token, + ) + ) + else: + topk_filled_outputs.append( + ( + masked_input.replace(masked_token, predicted_token), + values[index].item(), + predicted_token, + ) + ) + return topk_filled_outputs + + def disambiguate_pronoun(self, sentence: str) -> bool: + """ + Usage:: + + >>> disambiguate_pronoun('The _trophy_ would not fit in the brown suitcase because [it] was too big.') + True + + >>> disambiguate_pronoun('The trophy would not fit in the brown suitcase because [it] was too big.') + 'The trophy' + """ + assert hasattr( + self.task, "disambiguate_pronoun" + ), "roberta.disambiguate_pronoun() requires a model trained with the WSC task." + with utils.model_eval(self.model): + return self.task.disambiguate_pronoun( + self.model, sentence, use_cuda=self.device.type == "cuda" + ) diff --git a/fairseq/fairseq/models/roberta/model.py b/fairseq/fairseq/models/roberta/model.py new file mode 100644 index 0000000..d7ced91 --- /dev/null +++ b/fairseq/fairseq/models/roberta/model.py @@ -0,0 +1,700 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +RoBERTa: A Robustly Optimized BERT Pretraining Approach. +""" + +import logging + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import DEFAULT_MIN_PARAMS_TO_WRAP, TransformerEncoder +from fairseq.modules import LayerNorm +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import safe_getattr, safe_hasattr + +from .hub_interface import RobertaHubInterface + +logger = logging.getLogger(__name__) + + +@register_model("roberta") +class RobertaModel(FairseqEncoderModel): + @classmethod + def hub_models(cls): + return { + "roberta.base": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz", + "roberta.large": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz", + "roberta.large.mnli": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz", + "roberta.large.wsc": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.wsc.tar.gz", + } + + def __init__(self, args, encoder): + super().__init__(encoder) + self.args = args + + # We follow BERT's random weight initialization + self.apply(init_bert_params) + + self.classification_heads = nn.ModuleDict() + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--encoder-layers", type=int, metavar="L", help="num encoder layers" + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="H", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="F", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="A", + help="num encoder attention heads", + ) + parser.add_argument( + "--activation-fn", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--pooler-activation-fn", + choices=utils.get_available_activation_fns(), + help="activation function to use for pooler layer", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN", + ) + parser.add_argument( + "--pooler-dropout", + type=float, + metavar="D", + help="dropout probability in the masked_lm pooler layers", + ) + parser.add_argument( + "--max-positions", type=int, help="number of positional embeddings to learn" + ) + parser.add_argument( + "--load-checkpoint-heads", + action="store_true", + help="(re-)register and load heads when loading checkpoints", + ) + parser.add_argument( + "--untie-weights-roberta", + action="store_true", + help="Untie weights between embeddings and classifiers in RoBERTa", + ) + # args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019) + parser.add_argument( + "--encoder-layerdrop", + type=float, + metavar="D", + default=0, + help="LayerDrop probability for encoder", + ) + parser.add_argument( + "--encoder-layers-to-keep", + default=None, + help="which layers to *keep* when pruning as a comma-separated list", + ) + # args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020) + parser.add_argument( + "--quant-noise-pq", + type=float, + metavar="D", + default=0, + help="iterative PQ quantization noise at training time", + ) + parser.add_argument( + "--quant-noise-pq-block-size", + type=int, + metavar="D", + default=8, + help="block size of quantization noise at training time", + ) + parser.add_argument( + "--quant-noise-scalar", + type=float, + metavar="D", + default=0, + help="scalar quantization noise and scalar quantization at training time", + ) + # args for "Better Fine-Tuning by Reducing Representational Collapse" (Aghajanyan et al. 2020) + parser.add_argument( + "--spectral-norm-classification-head", + action="store_true", + default=False, + help="Apply spectral normalization on the classification head", + ) + # args for Fully Sharded Data Parallel (FSDP) training + parser.add_argument( + "--min-params-to-wrap", + type=int, + metavar="D", + default=DEFAULT_MIN_PARAMS_TO_WRAP, + help=( + "minimum number of params for a layer to be wrapped with FSDP() when " + "training with --ddp-backend=fully_sharded. Smaller values will " + "improve memory efficiency, but may make torch.distributed " + "communication less efficient due to smaller input sizes. This option " + "is set to 0 (i.e., always wrap) when --checkpoint-activations or " + "--offload-activations are passed." + ), + ) + # args for AdaPruning + # In short, it adds regularizarion for the multihead attention module and feed forward neural nets + # For more details, please refer to the paper https://openreview.net/forum?id=_CMSV7FTzGI + parser.add_argument( + "--mha-reg-scale-factor", + type=float, + metavar="D", + default=0.0, + help="scaling factor for regularization term in adptive pruning, recommendation is 0.000375", + ) + parser.add_argument( + "--ffn-reg-scale-factor", + type=float, + metavar="D", + default=0.0, + help="scaling factor for regularization term in adptive pruning, recommendation is 0.000375", + ) + parser.add_argument( + "--mha-heads-to-keep", + type=int, + metavar="D", + default=-1, + help="number of heads to keep in each multi-head attention module, -1 means keeping all heads", + ) + parser.add_argument( + "--ffn-blocks-to-remove", + type=int, + metavar="D", + default=-1, + help="number of feedforward blocks to remove in each transformer layer, -1 means keeping all ffn blocks", + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + from omegaconf import OmegaConf + + if OmegaConf.is_config(args): + OmegaConf.set_struct(args, False) + + # make sure all arguments are present + base_architecture(args) + + if not safe_hasattr(args, "max_positions"): + if not safe_hasattr(args, "tokens_per_sample"): + args.tokens_per_sample = task.max_positions() + args.max_positions = args.tokens_per_sample + + encoder = RobertaEncoder(args, task.source_dictionary) + + if OmegaConf.is_config(args): + OmegaConf.set_struct(args, True) + + return cls(args, encoder) + + def forward( + self, + src_tokens, + features_only=False, + return_all_hiddens=False, + classification_head_name=None, + **kwargs, + ): + if classification_head_name is not None: + features_only = True + + x, extra = self.encoder(src_tokens, features_only, return_all_hiddens, **kwargs) + + if classification_head_name is not None: + x = self.classification_heads[classification_head_name](x) + return x, extra + + def _get_adaptive_head_loss(self): + norm_loss = 0 + scaling = float(self.args.mha_reg_scale_factor) + for layer in self.encoder.sentence_encoder.layers: + norm_loss_layer = 0 + for i in range(layer.self_attn.num_heads): + start_idx = i * layer.self_attn.head_dim + end_idx = (i + 1) * layer.self_attn.head_dim + norm_loss_layer += scaling * ( + torch.sum( + torch.abs( + layer.self_attn.q_proj.weight[ + start_idx:end_idx, + ] + ) + ) + + torch.sum( + torch.abs(layer.self_attn.q_proj.bias[start_idx:end_idx]) + ) + ) + norm_loss_layer += scaling * ( + torch.sum( + torch.abs( + layer.self_attn.k_proj.weight[ + start_idx:end_idx, + ] + ) + ) + + torch.sum( + torch.abs(layer.self_attn.k_proj.bias[start_idx:end_idx]) + ) + ) + norm_loss_layer += scaling * ( + torch.sum( + torch.abs( + layer.self_attn.v_proj.weight[ + start_idx:end_idx, + ] + ) + ) + + torch.sum( + torch.abs(layer.self_attn.v_proj.bias[start_idx:end_idx]) + ) + ) + + norm_loss += norm_loss_layer + return norm_loss + + def _get_adaptive_ffn_loss(self): + ffn_scale_factor = float(self.args.ffn_reg_scale_factor) + filter_loss = 0 + for layer in self.encoder.sentence_encoder.layers: + filter_loss += torch.sum( + torch.abs(layer.fc1.weight * ffn_scale_factor) + ) + torch.sum(torch.abs(layer.fc2.weight * ffn_scale_factor)) + filter_loss += torch.sum( + torch.abs(layer.fc1.bias * ffn_scale_factor) + ) + torch.sum(torch.abs(layer.fc2.bias * ffn_scale_factor)) + return filter_loss + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + logits = net_output[0].float() + if log_probs: + return F.log_softmax(logits, dim=-1) + else: + return F.softmax(logits, dim=-1) + + def register_classification_head( + self, name, num_classes=None, inner_dim=None, **kwargs + ): + """Register a classification head.""" + if name in self.classification_heads: + prev_num_classes = self.classification_heads[name].out_proj.out_features + prev_inner_dim = self.classification_heads[name].dense.out_features + if num_classes != prev_num_classes or inner_dim != prev_inner_dim: + logger.warning( + 're-registering head "{}" with num_classes {} (prev: {}) ' + "and inner_dim {} (prev: {})".format( + name, num_classes, prev_num_classes, inner_dim, prev_inner_dim + ) + ) + self.classification_heads[name] = RobertaClassificationHead( + input_dim=self.args.encoder_embed_dim, + inner_dim=inner_dim or self.args.encoder_embed_dim, + num_classes=num_classes, + activation_fn=self.args.pooler_activation_fn, + pooler_dropout=self.args.pooler_dropout, + q_noise=self.args.quant_noise_pq, + qn_block_size=self.args.quant_noise_pq_block_size, + do_spectral_norm=self.args.spectral_norm_classification_head, + ) + + @property + def supported_targets(self): + return {"self"} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="gpt2", + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + **kwargs, + ) + + logger.info(x["args"]) + return RobertaHubInterface(x["args"], x["task"], x["models"][0]) + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + + # rename decoder -> encoder before upgrading children modules + for k in list(state_dict.keys()): + if k.startswith(prefix + "decoder"): + new_k = prefix + "encoder" + k[len(prefix + "decoder") :] + state_dict[new_k] = state_dict[k] + del state_dict[k] + + # rename emb_layer_norm -> layernorm_embedding + for k in list(state_dict.keys()): + if ".emb_layer_norm." in k: + new_k = k.replace(".emb_layer_norm.", ".layernorm_embedding.") + state_dict[new_k] = state_dict[k] + del state_dict[k] + + # upgrade children modules + super().upgrade_state_dict_named(state_dict, name) + + # Handle new classification heads present in the state dict. + current_head_names = ( + [] + if not hasattr(self, "classification_heads") + else self.classification_heads.keys() + ) + keys_to_delete = [] + for k in state_dict.keys(): + if not k.startswith(prefix + "classification_heads."): + continue + + head_name = k[len(prefix + "classification_heads.") :].split(".")[0] + num_classes = state_dict[ + prefix + "classification_heads." + head_name + ".out_proj.weight" + ].size(0) + inner_dim = state_dict[ + prefix + "classification_heads." + head_name + ".dense.weight" + ].size(0) + + if getattr(self.args, "load_checkpoint_heads", False): + if head_name not in current_head_names: + self.register_classification_head(head_name, num_classes, inner_dim) + else: + if head_name not in current_head_names: + logger.warning( + "deleting classification head ({}) from checkpoint " + "not present in current model: {}".format(head_name, k) + ) + keys_to_delete.append(k) + elif ( + num_classes + != self.classification_heads[head_name].out_proj.out_features + or inner_dim + != self.classification_heads[head_name].dense.out_features + ): + logger.warning( + "deleting classification head ({}) from checkpoint " + "with different dimensions than current model: {}".format( + head_name, k + ) + ) + keys_to_delete.append(k) + for k in keys_to_delete: + del state_dict[k] + + # Copy any newly-added classification heads into the state dict + # with their current weights. + if hasattr(self, "classification_heads"): + cur_state = self.classification_heads.state_dict() + for k, v in cur_state.items(): + if prefix + "classification_heads." + k not in state_dict: + logger.info("Overwriting " + prefix + "classification_heads." + k) + state_dict[prefix + "classification_heads." + k] = v + + # adapt data2vec models + if ( + "encoder._ema" in state_dict + and "encoder.lm_head.weight" not in state_dict + ): + lm_state = self.encoder.lm_head.state_dict() + for k, v in lm_state.items(): + state_dict["encoder.lm_head." + k] = v + + for k in list(state_dict.keys()): + if k.startswith("encoder.regression_head") or k == "encoder._ema": + del state_dict[k] + + +class RobertaLMHead(nn.Module): + """Head for masked language modeling.""" + + def __init__(self, embed_dim, output_dim, activation_fn, weight=None): + super().__init__() + self.dense = nn.Linear(embed_dim, embed_dim) + self.activation_fn = utils.get_activation_fn(activation_fn) + self.layer_norm = LayerNorm(embed_dim) + + if weight is None: + weight = nn.Linear(embed_dim, output_dim, bias=False).weight + self.weight = weight + self.bias = nn.Parameter(torch.zeros(output_dim)) + + def forward(self, features, masked_tokens=None, **kwargs): + # Only project the masked tokens while training, + # saves both memory and computation + if masked_tokens is not None: + features = features[masked_tokens, :] + + x = self.dense(features) + x = self.activation_fn(x) + x = self.layer_norm(x) + # project back to size of vocabulary with bias + x = F.linear(x, self.weight) + self.bias + return x + + +class RobertaClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__( + self, + input_dim, + inner_dim, + num_classes, + activation_fn, + pooler_dropout, + q_noise=0, + qn_block_size=8, + do_spectral_norm=False, + ): + super().__init__() + self.dense = nn.Linear(input_dim, inner_dim) + self.activation_fn = utils.get_activation_fn(activation_fn) + self.dropout = nn.Dropout(p=pooler_dropout) + self.out_proj = apply_quant_noise_( + nn.Linear(inner_dim, num_classes), q_noise, qn_block_size + ) + if do_spectral_norm: + if q_noise != 0: + raise NotImplementedError( + "Attempting to use Spectral Normalization with Quant Noise. This is not officially supported" + ) + self.out_proj = torch.nn.utils.spectral_norm(self.out_proj) + + def forward(self, features, **kwargs): + x = features[:, 0, :] # take <s> token (equiv. to [CLS]) + x = self.dropout(x) + x = self.dense(x) + x = self.activation_fn(x) + x = self.dropout(x) + x = self.out_proj(x) + return x + + +class RobertaEncoder(FairseqEncoder): + """RoBERTa encoder.""" + + def __init__(self, args, dictionary): + super().__init__(dictionary) + + # set any missing default values + base_architecture(args) + self.args = args + + if args.encoder_layers_to_keep: + args.encoder_layers = len(args.encoder_layers_to_keep.split(",")) + + embed_tokens = self.build_embedding( + len(dictionary), args.encoder_embed_dim, dictionary.pad() + ) + + self.sentence_encoder = self.build_encoder(args, dictionary, embed_tokens) + + self.lm_head = self.build_lm_head( + embed_dim=args.encoder_embed_dim, + output_dim=len(dictionary), + activation_fn=args.activation_fn, + weight=( + self.sentence_encoder.embed_tokens.weight + if not args.untie_weights_roberta + else None + ), + ) + + def build_embedding(self, vocab_size, embedding_dim, padding_idx): + return nn.Embedding(vocab_size, embedding_dim, padding_idx) + + def build_encoder(self, args, dictionary, embed_tokens): + encoder = TransformerEncoder(args, dictionary, embed_tokens) + encoder.apply(init_bert_params) + return encoder + + def build_lm_head(self, embed_dim, output_dim, activation_fn, weight): + return RobertaLMHead(embed_dim, output_dim, activation_fn, weight) + + def forward( + self, + src_tokens, + features_only=False, + return_all_hiddens=False, + masked_tokens=None, + **unused, + ): + """ + Args: + src_tokens (LongTensor): input tokens of shape `(batch, src_len)` + features_only (bool, optional): skip LM head and just return + features. If True, the output will be of shape + `(batch, src_len, embed_dim)`. + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + + Returns: + tuple: + - the LM output of shape `(batch, src_len, vocab)` + - a dictionary of additional data, where 'inner_states' + is a list of hidden states. Note that the hidden + states have shape `(src_len, batch, vocab)`. + """ + x, extra = self.extract_features( + src_tokens, return_all_hiddens=return_all_hiddens + ) + if not features_only: + x = self.output_layer(x, masked_tokens=masked_tokens) + return x, extra + + def extract_features(self, src_tokens, return_all_hiddens=False, **kwargs): + encoder_out = self.sentence_encoder( + src_tokens, + return_all_hiddens=return_all_hiddens, + token_embeddings=kwargs.get("token_embeddings", None), + ) + # T x B x C -> B x T x C + features = encoder_out["encoder_out"][0].transpose(0, 1) + inner_states = encoder_out["encoder_states"] if return_all_hiddens else None + return features, {"inner_states": inner_states} + + def output_layer(self, features, masked_tokens=None, **unused): + return self.lm_head(features, masked_tokens) + + def max_positions(self): + """Maximum output length supported by the encoder.""" + return self.args.max_positions + + +@register_model_architecture("roberta", "roberta") +def base_architecture(args): + args.encoder_layers = safe_getattr(args, "encoder_layers", 12) + args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 3072) + args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 12) + + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_dropout = safe_getattr(args, "activation_dropout", 0.0) + args.pooler_dropout = safe_getattr(args, "pooler_dropout", 0.0) + + args.max_source_positions = safe_getattr(args, "max_positions", 512) + args.no_token_positional_embeddings = safe_getattr( + args, "no_token_positional_embeddings", False + ) + + # BERT has a few structural differences compared to the original Transformer + args.encoder_learned_pos = safe_getattr(args, "encoder_learned_pos", True) + args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", True) + args.no_scale_embedding = safe_getattr(args, "no_scale_embedding", True) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + args.encoder_normalize_before = safe_getattr( + args, "encoder_normalize_before", False + ) + args.pooler_activation_fn = safe_getattr(args, "pooler_activation_fn", "tanh") + args.untie_weights_roberta = safe_getattr(args, "untie_weights_roberta", False) + + # Adaptive input config + args.adaptive_input = safe_getattr(args, "adaptive_input", False) + + # LayerDrop config + args.encoder_layerdrop = safe_getattr(args, "encoder_layerdrop", 0.0) + args.encoder_layers_to_keep = safe_getattr(args, "encoder_layers_to_keep", None) + + # Quantization noise config + args.quant_noise_pq = safe_getattr(args, "quant_noise_pq", 0) + args.quant_noise_pq_block_size = safe_getattr(args, "quant_noise_pq_block_size", 8) + args.quant_noise_scalar = safe_getattr(args, "quant_noise_scalar", 0) + + # R4F config + args.spectral_norm_classification_head = safe_getattr( + args, "spectral_norm_classification_head", False + ) + + +@register_model_architecture("roberta", "roberta_prenorm") +def roberta_prenorm_architecture(args): + args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", False) + args.encoder_normalize_before = safe_getattr(args, "encoder_normalize_before", True) + base_architecture(args) + + +@register_model_architecture("roberta", "roberta_base") +def roberta_base_architecture(args): + base_architecture(args) + + +@register_model_architecture("roberta", "roberta_large") +def roberta_large_architecture(args): + args.encoder_layers = safe_getattr(args, "encoder_layers", 24) + args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 16) + base_architecture(args) + + +@register_model_architecture("roberta", "xlm") +def xlm_architecture(args): + args.encoder_layers = safe_getattr(args, "encoder_layers", 16) + args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 1280) + args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 1280 * 4) + args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 16) + base_architecture(args) diff --git a/fairseq/fairseq/models/roberta/model_camembert.py b/fairseq/fairseq/models/roberta/model_camembert.py new file mode 100644 index 0000000..4644754 --- /dev/null +++ b/fairseq/fairseq/models/roberta/model_camembert.py @@ -0,0 +1,50 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +CamemBERT: a Tasty French Language Model +""" + +from fairseq.models import register_model + +from .hub_interface import RobertaHubInterface +from .model import RobertaModel + + +@register_model("camembert") +class CamembertModel(RobertaModel): + @classmethod + def hub_models(cls): + return { + "camembert": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz", + "camembert.v0": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz", + "camembert-base": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz", + "camembert-large": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-large.tar.gz", + "camembert-base-ccnet": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet.tar.gz", + "camembert-base-ccnet-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet-4gb.tar.gz", + "camembert-base-wikipedia-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-wikipedia-4gb.tar.gz", + "camembert-base-oscar-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-oscar-4gb.tar.gz", + } + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="sentencepiece", + **kwargs + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + **kwargs, + ) + return RobertaHubInterface(x["args"], x["task"], x["models"][0]) diff --git a/fairseq/fairseq/models/roberta/model_gottbert.py b/fairseq/fairseq/models/roberta/model_gottbert.py new file mode 100644 index 0000000..dc7a019 --- /dev/null +++ b/fairseq/fairseq/models/roberta/model_gottbert.py @@ -0,0 +1,49 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +GottBERT: a pure German Language Model +""" + +from fairseq.models import register_model + +from .hub_interface import RobertaHubInterface +from .model import RobertaModel + + +@register_model("gottbert") +class GottbertModel(RobertaModel): + @classmethod + def hub_models(cls): + return { + "gottbert-base": "https://dl.gottbert.de/fairseq/models/gottbert-base.tar.gz", + } + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="hf_byte_bpe", + bpe_vocab="vocab.json", + bpe_merges="merges.txt", + bpe_add_prefix_space=False, + **kwargs + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + bpe_vocab=bpe_vocab, + bpe_merges=bpe_merges, + bpe_add_prefix_space=bpe_add_prefix_space, + **kwargs, + ) + return RobertaHubInterface(x["args"], x["task"], x["models"][0]) diff --git a/fairseq/fairseq/models/roberta/model_xlmr.py b/fairseq/fairseq/models/roberta/model_xlmr.py new file mode 100644 index 0000000..cf6e354 --- /dev/null +++ b/fairseq/fairseq/models/roberta/model_xlmr.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Unsupervised Cross-lingual Representation Learning at Scale +""" + +from fairseq.models import register_model + +from .hub_interface import RobertaHubInterface +from .model import RobertaModel + + +@register_model("xlmr") +class XLMRModel(RobertaModel): + @classmethod + def hub_models(cls): + return { + "xlmr.base": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz", + "xlmr.large": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz", + "xlmr.xl": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xl.tar.gz", + "xlmr.xxl": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xxl.tar.gz", + } + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="sentencepiece", + **kwargs + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + **kwargs, + ) + return RobertaHubInterface(x["args"], x["task"], x["models"][0]) diff --git a/fairseq/fairseq/models/speech_dlm/__init__.py b/fairseq/fairseq/models/speech_dlm/__init__.py new file mode 100644 index 0000000..6ea914d --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .speech_dlm import * # noqa +from .hub_interface import * # noqa diff --git a/fairseq/fairseq/models/speech_dlm/hub_interface.py b/fairseq/fairseq/models/speech_dlm/hub_interface.py new file mode 100644 index 0000000..11bc0f5 --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/hub_interface.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging +from typing import Any, Dict, Iterator, List + +import torch +from fairseq import utils +from omegaconf import open_dict +from torch import nn + +from tqdm import tqdm + +from fairseq.hub_utils import GeneratorHubInterface + + +logger = logging.getLogger(__name__) + + +class MultichannelGeneratorHubInterface(GeneratorHubInterface): + """Pytorch Hub interface for generating sequences from a pre-trained + multichannel language model. + """ + + def __init__(self, cfg, task, models): + super().__init__(cfg, task, models) + self.cfg = cfg + self.task = task + self.models = nn.ModuleList(models) + self.src_dicts = task.source_dictionaries + self.tgt_dicts = task.target_dictionaries + self.channels = task.channels + + # optimize model for generation + for model in self.models: + model.prepare_for_inference_(cfg) + + def sample( + self, + sentences: List[Dict[str, str]], + beam: int = 1, + verbose: bool = False, + **kwargs + ) -> List[str]: + if isinstance(sentences, dict): + return self.sample([sentences], beam=beam, verbose=verbose, **kwargs)[0] + tokenized_sentences = [self.encode(sentence) for sentence in sentences] + batched_hypos = self.generate(tokenized_sentences, beam, verbose, **kwargs) + return [self.decode(hypos[0]["tokens"]) for hypos in batched_hypos] + + def score(self, sentences: List[Dict[str, str]], **kwargs): + raise NotImplementedError( + "MultichannelGeneratorHubInterface doesn't support score() method" + ) + + def generate( + self, + tokenized_sentences: List[Dict[str, torch.LongTensor]], + beam: int = 5, + verbose: bool = False, + skip_invalid_size_inputs=False, + inference_step_args=None, + **kwargs + ) -> List[List[Dict[str, torch.Tensor]]]: + if isinstance(tokenized_sentences, dict): + return self.generate( + [tokenized_sentences], beam=beam, verbose=verbose, **kwargs + )[0] + + # build generator using current args as well as any kwargs + gen_args = copy.deepcopy(self.cfg.generation) + with open_dict(gen_args): + gen_args.beam = beam + for k, v in kwargs.items(): + setattr(gen_args, k, v) + generator = self.task.build_generator(self.models, gen_args) + + inference_step_args = inference_step_args or {} + results = [] + for batch in tqdm( + self._build_batches(tokenized_sentences, skip_invalid_size_inputs) + ): + batch = utils.apply_to_sample(lambda t: t.to(self.device), batch) + translations = self.task.inference_step( + generator, self.models, batch, **inference_step_args + ) + for id, hypos in zip(batch["id"].tolist(), translations): + # The output of the generator is supposed to be a tensor of size (bsz x max_len x n_channels) + # So we need to convert it to dictionary form + for i in range(len(hypos)): + hypos[i]["tokens"] = { + channel: hypos[i]["tokens"][..., j] + for j, channel in enumerate(self.channels) + } + results.append((id, hypos)) + + # sort output to match input order + outputs = [hypos for _, hypos in sorted(results, key=lambda x: x[0])] + + if verbose: + + def getarg(name, default): + return getattr(gen_args, name, getattr(self.cfg, name, default)) + + for source_tokens, target_hypotheses in zip(tokenized_sentences, outputs): + src_str_with_unk = { + channel: self.string(source_tokens[channel], channel) + for channel in source_tokens + } + logger.info("S\t{}".format(src_str_with_unk)) + for hypo in target_hypotheses: + hypo_str = self.decode(hypo["tokens"]) + logger.info("H\t{}\t{}".format(hypo["score"], hypo_str)) + # hypo["positional_scores"]: T x n_channels + pos_scores = {} + for c, channel in enumerate(source_tokens): + pos_scores[channel] = " ".join( + map( + lambda x: "{:.4f}".format(x), + hypo["positional_scores"][:, c].tolist(), + ) + ) + logger.info("P\t{}".format(pos_scores)) + + return outputs + + def encode(self, sentence: Dict[str, str]) -> Dict[str, torch.LongTensor]: + assert isinstance( + sentence, dict + ), "Input sentence is expected to be a dictionary over channels" + assert set(sentence.keys()) == set( + self.channels + ), "Mismatch between input sentence keys and model channels ({} vs {})".format( + set(sentence.keys()), set(self.channels) + ) + encoded_sentence = {} + for channel in sentence: + sentence_channel = sentence[channel] + sentence_channel = self.tokenize(sentence_channel) + sentence_channel = self.apply_bpe(sentence_channel) + sentence_channel = self.binarize(sentence_channel, channel) + encoded_sentence[channel] = sentence_channel + sentence_size = encoded_sentence[self.channels[0]].size() + assert all( + encoded_sentence[channel].size() == sentence_size + for channel in encoded_sentence + ), "Input tensors are expected to have the same size in all channels" + return encoded_sentence + + def decode(self, tokens: Dict[str, torch.LongTensor]) -> Dict[str, str]: + assert isinstance( + tokens, dict + ), "Input tokens are expected to be a dictionary over channels" + assert set(tokens.keys()) == set( + self.channels + ), "Mismatch between input tokens keys and model channels ({} vs {})".format( + set(tokens.keys()), set(self.channels) + ) + decoded_sentence = {} + for channel in tokens: + tokens_channel = tokens[channel] + sentence_channel = self.string(tokens_channel, channel) + sentence_channel = self.remove_bpe(sentence_channel) + sentence_channel = self.detokenize(sentence_channel) + decoded_sentence[channel] = sentence_channel + return decoded_sentence + + def binarize(self, sentence: str, channel: str) -> torch.LongTensor: + return ( + self.src_dicts[channel].encode_line(sentence, add_if_not_exist=False).long() + ) + + def string(self, tokens: torch.LongTensor, channel: str) -> str: + return self.tgt_dicts[channel].string(tokens) + + def _build_batches( + self, tokens: List[Dict[str, List[int]]], skip_invalid_size_inputs: bool + ) -> Iterator[Dict[str, Any]]: + lengths = torch.LongTensor([next(iter(d.values())).numel() for d in tokens]) + batch_iterator = self.task.get_batch_iterator( + dataset=self.task.build_dataset_for_inference(tokens, lengths), + max_tokens=self.cfg.dataset.max_tokens, + max_sentences=self.cfg.dataset.batch_size, + max_positions=self.max_positions, + ignore_invalid_inputs=skip_invalid_size_inputs, + disable_iterator_cache=True, + ).next_epoch_itr(shuffle=False) + return batch_iterator diff --git a/fairseq/fairseq/models/speech_dlm/modules/__init__.py b/fairseq/fairseq/models/speech_dlm/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder.py b/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder.py new file mode 100644 index 0000000..a14a1d6 --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder.py @@ -0,0 +1,572 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.models import FairseqIncrementalDecoder +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, +) +from .speech_dlm_decoder_layer import ( + CrossChannelTransformerDecoderLayer, + StandardTransformerDecoderLayer, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + + +class CrossChannelTransformerDecoder(FairseqIncrementalDecoder): + """ + Cross-channel Transformer Decoder Block for parallel spoken dialogue units + as described in the paper: https://arxiv.org/pdf/2203.16502.pdf; + consisting of *args.decoder_layers* layers. Each layer is a + :class:`StandardTransformerDecoderLayer` or + :class:`CrossChannelTransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + channels (list): list of channel names (string) + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__(self, args, dictionary, embed_tokens, channels, no_encoder_attn=False): + self.args = args + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.decoder_layerdrop = args.decoder_layerdrop + self.share_input_output_embed = args.share_decoder_input_output_embed + self.channels = channels + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = args.decoder_embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = args.decoder_output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = args.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim) + + if args.quant_noise_pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + args.quant_noise_pq, + args.quant_noise_pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + nn.Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=args.decoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + + if getattr(args, "layernorm_embedding", False): + self.layernorm_embedding = LayerNorm(embed_dim) + else: + self.layernorm_embedding = None + + self.cross_self_attention = getattr(args, "cross_self_attention", False) + + assert 0 <= args.decoder_cross_layers <= args.decoder_layers, ( + "The number of cross-channel attention decoder layers must be non-negative" + f"and not exceeds the number of decoder layers (found {args.decoder_cross_layers})" + ) + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_decoder_layer(args, no_encoder_attn) + if i < args.decoder_layers - args.decoder_cross_layers + else self.build_cross_decoder_layer(args, no_encoder_attn) + for i in range(args.decoder_layers) + ] + ) + self.num_layers = len(self.layers) + self.non_cross_layers = args.decoder_layers - args.decoder_cross_layers + + if args.decoder_normalize_before and not getattr( + args, "no_decoder_final_norm", False + ): + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + self.project_out_dim = ( + nn.Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim + else None + ) + + self.output_projection = None + self.is_cross_prediction = bool( + float(args.main_and_cross_weights.split(",")[1]) != 0 + ) + self.n_output_projections = ( + 1 if not self.is_cross_prediction else len(self.channels) + ) + + if self.share_input_output_embed: + # Output projection is a list of projections + # where the first proj is for the main-channel, + # then roll in a cicular way. + # For example: if the main channel has index i + # the second proj is for channel i+1 (mod N_channels), etc. + self.output_projection = nn.ModuleList( + [ + nn.Linear( + embed_tokens.weight.shape[1], # embed_dim + embed_tokens.weight.shape[0], # n_dictionaries + bias=False, + ) + for _ in range(self.n_output_projections) + ] + ) + # Only share the main-channel projection + self.output_projection[0].weight = embed_tokens.weight + for i in range(1, self.n_output_projections): + nn.init.normal_( + self.output_projection[i].weight, + mean=0, + std=embed_tokens.weight.shape[1] ** -0.5, + ) + else: + self.output_projection = nn.ModuleList( + [ + nn.Linear(self.output_embed_dim, len(dictionary), bias=False) + for _ in range(self.n_output_projections) + ] + ) + for i in range(self.n_output_projections): + nn.init.normal_( + self.output_projection[i].weight, + mean=0, + std=self.output_embed_dim**-0.5, + ) + self.output_duration_prediction = ( + None + if str(args.duration_prediction).lower() == "false" + else nn.ModuleList( + [ + nn.Linear(self.output_embed_dim, 1) + for _ in range(self.n_output_projections) + ] + ) + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + layer = StandardTransformerDecoderLayer(args, no_encoder_attn) + if getattr(args, "checkpoint_activations", False): + offload_to_cpu = getattr(args, "offload_activations", False) + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + return layer + + def build_cross_decoder_layer(self, args, no_encoder_attn=False): + layer = CrossChannelTransformerDecoderLayer(args, no_encoder_attn) + if getattr(args, "checkpoint_activations", False): + offload_to_cpu = getattr(args, "offload_activations", False) + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + return layer + + def forward( + self, + prev_output_tokens: Dict[str, Tensor], + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[ + List[Dict[str, Dict[str, Optional[Tensor]]]] + ] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + # return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (dict[str, LongTensor]): previous decoder outputs, + dictionary over all channels with the values being the tensors + of shape `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): list of dictionaries used for storing state + during :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output, dict over channels of tensors + of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens: Dict[str, Tensor], + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[ + List[Dict[str, Dict[str, Optional[Tensor]]]] + ] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens: Dict[str, Tensor], + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[ + List[Dict[str, Dict[str, Optional[Tensor]]]] + ] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + The core function of *forward* but only return features. + + The input (prev_output_tokens) is a dictionary over all channels, + expected to have the following form: + { + 'channel1' : Tensor((batch x tgt_len)), + 'channel2' : Tensor((batch x tgt_len)), + } + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features, dict over channels of tensors + of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + x_list = [] + for i, channel in enumerate(self.channels): + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens[channel], + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + ) + + if incremental_state is not None: + prev_output_tokens[channel] = prev_output_tokens[channel][:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_tokens(prev_output_tokens[channel]) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + x = self.embed_scale * x + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x_list.append(x) + + self_attn_padding_mask: Optional[Tensor] = None + if ( + self.cross_self_attention + or prev_output_tokens[self.channels[0]].eq(self.padding_idx).any() + ): + self_attn_padding_mask = prev_output_tokens[self.channels[0]].eq( + self.padding_idx + ) + + # decoder layers + attn: Optional[Dict[Tensor]] = None + inner_states: List[Optional[Dict[str, Tensor]]] = [ + {channel: x_list[i] for i, channel in enumerate(self.channels)} + ] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x_list[0]) + else: + self_attn_mask = None + + # need to change to tensor for the checkpoint activation to work + if isinstance(x_list, list): + x_list = torch.stack(x_list) + x_list, layer_attn_list, _ = layer( + x_list, + encoder_out["encoder_out"][0] + if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0) + else None, + encoder_out["encoder_padding_mask"][0] + if ( + encoder_out is not None + and len(encoder_out["encoder_padding_mask"]) > 0 + ) + else None, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + + inner_states.append( + {channel: x_list[i] for i, channel in enumerate(self.channels)} + ) + if idx == alignment_layer and all( + layer_attn is not None for layer_attn in layer_attn_list + ): + attn = { + channel: layer_attn_list[i].float().to(x_list[0]) + for i, channel in enumerate(self.channels) + } + # change back from tensor to list + if not isinstance(x_list, list): + x_list = list(torch.unbind(x_list)) + + if attn is not None: + for channel in attn: + if alignment_heads is not None: + attn[channel] = attn[channel][:alignment_heads] + + # average probabilities over heads + attn[channel] = attn[channel].mean(dim=0) + + for i, x in enumerate(x_list): + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + x_list[i] = x + + x = {channel: x_list[i] for i, channel in enumerate(self.channels)} + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size. + Return a dictionary of the form: + { + 'input-channel': { + 'predicted-channel': token prediction tensor of shape `(batch, tgt_len, vocab)`, + } + } + + if duration_prediction is enabled + { + 'input-channel': { + 'predicted-channel': { + 'pred_token': token prediction tensor of shape `(batch, tgt_len, vocab)`, + 'pred_duration': duration prediction tensor + } + } + } + """ + # project back to size of vocabulary + if self.output_duration_prediction is None: + if self.is_cross_prediction: + return { + channel: { + pred_channel: self.output_projection[j - i](features[channel]) + for j, pred_channel in enumerate(self.channels) + } + for i, channel in enumerate(self.channels) + } + else: + return { + channel: {channel: self.output_projection[0](features[channel])} + for i, channel in enumerate(self.channels) + } + else: + if self.is_cross_prediction: + return { + channel: { + pred_channel: { + "pred_token": self.output_projection[j - i]( + features[channel] + ), + "pred_duration": self.output_duration_prediction[j - i]( + features[channel] + ), + } + for j, pred_channel in enumerate(self.channels) + } + for i, channel in enumerate(self.channels) + } + else: + return { + channel: { + channel: { + "pred_token": self.output_projection[0](features[channel]), + "pred_duration": self.output_duration_prediction[0]( + features[channel] + ), + } + } + for i, channel in enumerate(self.channels) + } + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def get_normalized_probs_scriptable( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + + logits_dict = net_output[0] + out_dict = {} + for channel in logits_dict: + out_dict[channel] = {} + for pred_channel in logits_dict[channel]: + if isinstance(logits_dict[channel][pred_channel], dict): + pred_token_logits = logits_dict[channel][pred_channel]["pred_token"] + else: + pred_token_logits = logits_dict[channel][pred_channel] + if log_probs: + out = utils.log_softmax( + pred_token_logits, dim=-1, onnx_trace=self.onnx_trace + ) + else: + out = utils.softmax( + pred_token_logits, dim=-1, onnx_trace=self.onnx_trace + ) + if isinstance(logits_dict[channel][pred_channel], dict): + out_dict[channel][pred_channel] = { + "pred_token": out, + "pred_duration": logits_dict[channel][pred_channel][ + "pred_duration" + ].float(), + } # move to float32 to avoid inf loss + else: + out_dict[channel][pred_channel] = out + return out_dict + + def reorder_incremental_state_scripting( + self, + incremental_state: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order: Tensor, + ): + """Main entry point for reordering the incremental state. + + Due to limitations in TorchScript, we call this function in + :class:`fairseq.sequence_generator.SequenceGenerator` instead of + calling :func:`reorder_incremental_state` directly. + """ + for module in self.modules(): + if hasattr(module, "reorder_incremental_state"): + for i, incremental_state_channel in enumerate(incremental_state): + result = module.reorder_incremental_state( + incremental_state_channel, new_order + ) + if result is not None: + incremental_state[i] = result diff --git a/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder_layer.py b/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder_layer.py new file mode 100644 index 0000000..fb65fdf --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/modules/speech_dlm_decoder_layer.py @@ -0,0 +1,717 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Tuple, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.modules import LayerNorm, MultiheadAttention +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor + + +class CrossChannelTransformerDecoderLayer(nn.Module): + """Cross-Attention Transformer Decoder Layer block as described + in the paper: https://arxiv.org/pdf/2203.16502.pdf + + Composed of a Multi-head Self Attention block followed by a + Multi-head Cross-Attention block which attends to the self-attention + outputs of the other channels. The weights of the attention blocks + in all channels are shared. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False + ): + super().__init__() + self.embed_dim = args.decoder_embed_dim + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.quant_noise = getattr(args, "quant_noise_pq", 0) + self.quant_noise_block_size = getattr(args, "quant_noise_pq_block_size", 8) + + # This cross_self_attention is used for encoder-decoder systems, + # It's not the cross-channel attention (defined below as cross_channel_attn) + self.cross_self_attention = getattr(args, "cross_self_attention", False) + + self.self_attn = self.build_self_attention( + self.embed_dim, + args, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + self.cross_channel_attn = self.build_cross_channel_attention( + self.embed_dim, + args, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + self.activation_fn = utils.get_activation_fn( + activation=str(args.activation_fn) + if getattr(args, "activation_fn", None) is not None + else "relu" + ) + activation_dropout_p = getattr(args, "activation_dropout", 0) or 0 + if activation_dropout_p == 0: + # for backwards compatibility with models that use args.relu_dropout + activation_dropout_p = getattr(args, "relu_dropout", 0) or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = args.decoder_normalize_before + + # use layerNorm rather than FusedLayerNorm for exporting. + # char_inputs can be used to determint this. + # TODO remove this once we update apex with the fix + export = getattr(args, "char_inputs", False) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + self.cross_channel_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = self.build_encoder_attention(self.embed_dim, args) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + self.fc1 = self.build_fc1( + self.embed_dim, + args.decoder_ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + args.decoder_ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=export) + self.need_attn = True + + self.onnx_trace = False + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_self_attention( + self, embed_dim, args, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not getattr(args, "cross_self_attention", False), + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def build_cross_channel_attention( + self, embed_dim, args, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=False, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def build_encoder_attention(self, embed_dim, args): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def residual_connection(self, x, residual): + return residual + x + + def forward( + self, + x_list_tensor: List[torch.Tensor], + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[ + List[Dict[str, Dict[str, Optional[Tensor]]]] + ] = None, + prev_self_attn_state: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + ): + """ + Args: + x_list_tensor (List[Tensor]): list of input tensors in different channels, + each tensor is of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + incremental_state (optional): list of incremental_state dictionaries over + different channels (sequence generation mode) + prev_self_attn_state (List[Tuple[Tensor, Tensor]], optional): list of tuples + (self_attn_state, cross_channel_attn_state) over different channels + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + list of encoded output of shape `(seq_len, batch, embed_dim)` + """ + n_channels = len(x_list_tensor) + if need_head_weights: + need_attn = True + + # incremental_state is a list of dictionaries over different channels + if incremental_state is not None: + assert isinstance(incremental_state, list) + assert len(incremental_state) == n_channels + + # prev_self_attn_state is a list of tuples (self_attn_state, cross_channel_attn_state) over different channels + if prev_self_attn_state is not None: + assert isinstance(prev_self_attn_state, list) + assert len(prev_self_attn_state) == n_channels + for prev_self_attn_state_channel in prev_self_attn_state: + assert isinstance(prev_self_attn_state_channel, tuple) + assert len(prev_self_attn_state_channel) == 2 + + # Backup for other channels & cross channel attention + self_attn_mask_orin = self_attn_mask + self_attn_padding_mask_orin = self_attn_padding_mask + + x_list = [] + attn_list = [] + for i, x in enumerate(x_list_tensor): + residual = x + + if self.normalize_before: + x = self.self_attn_layer_norm(x) + + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[i][0][:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state[i][0]) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[i][0][2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state[i], saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer( + incremental_state[i] if incremental_state is not None else None + ) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask_orin is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + ( + x.new_zeros(x.size(0), encoder_out.size(0)), + self_attn_mask_orin, + ), + dim=1, + ) + if self_attn_padding_mask_orin is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask_orin.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask_orin), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + need_weights=False, + attn_mask=self_attn_mask, + ) + + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer( + incremental_state[i], saved_state + ) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + x_list.append(x) + attn_list.append(attn) + + # Store attentions & new x(s) (bc the old x(s) are used in other channels) + x_list_new = [] + # Here comes the cross channel attention + for i, x in enumerate(x_list): + residual = x + if self.normalize_before: + x = self.cross_channel_attn_layer_norm(x) + + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[i][1][:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state[i][1]) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[i][1][2] + assert incremental_state is not None + self.cross_channel_attn._set_input_buffer( + incremental_state[i], saved_state + ) + + # The cross attention is computed with the concatenation of attentions from other channels + if len(x_list) > 1: + x_other = torch.cat( + [x_list[(i + j) % len(x_list)] for j in range(1, len(x_list))], + dim=0, + ) + else: + # Self-attention when having only one channel + x_other = x_list[i] + + x, attn = self.cross_channel_attn( + query=x, + key=x_other, + value=x_other, + key_padding_mask=self_attn_padding_mask_orin, + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + need_weights=False, + attn_mask=self_attn_mask_orin, + ) + + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.cross_channel_attn_layer_norm(x) + + x_list_new.append(x) + x_list = x_list_new + + for i, x in enumerate(x_list): + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + + x_list[i] = x + # Trick for the checkpoint activation + x_list_tensor = torch.stack(x_list) + if self.onnx_trace and incremental_state is not None: + self_and_cross_attn_state_list = [] + for i in range(n_channels): + self_and_cross_attn_state = [] + for self_attn_module in [self.self_attn, self.cross_channel_attn]: + saved_state = self_attn_module._get_input_buffer( + incremental_state[i] + ) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_module_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_module_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + ] + self_and_cross_attn_state.append(self_attn_module_state) + self_and_cross_attn_state_list.append(tuple(self_and_cross_attn_state)) + return x_list_tensor, attn_list, self_and_cross_attn_state_list + return x_list_tensor, attn_list, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn + + +# Rewrite fairseq.modules.TransformerDecoderLayer +# to be compatible with checkpoint_activations +# (avoid forwarding model multiple times) +class StandardTransformerDecoderLayer(nn.Module): + """Rewrite fairseq.modules.TransformerDecoderLayer to avoid forwarding + model multiple times and be compatible with checkpoint_activations. + + The input is expected to be a list of tensors from different channels, + each is forwarded to the same model (shared attention weights). + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.decoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False + ): + super().__init__() + self.embed_dim = args.decoder_embed_dim + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.quant_noise = getattr(args, "quant_noise_pq", 0) + self.quant_noise_block_size = getattr(args, "quant_noise_pq_block_size", 8) + + self.cross_self_attention = getattr(args, "cross_self_attention", False) + + self.self_attn = self.build_self_attention( + self.embed_dim, + args, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + self.activation_fn = utils.get_activation_fn( + activation=str(args.activation_fn) + if getattr(args, "activation_fn", None) is not None + else "relu" + ) + activation_dropout_p = getattr(args, "activation_dropout", 0) or 0 + if activation_dropout_p == 0: + # for backwards compatibility with models that use args.relu_dropout + activation_dropout_p = getattr(args, "relu_dropout", 0) or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = args.decoder_normalize_before + + # use layerNorm rather than FusedLayerNorm for exporting. + # char_inputs can be used to determint this. + # TODO remove this once we update apex with the fix + export = getattr(args, "char_inputs", False) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = self.build_encoder_attention(self.embed_dim, args) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + self.fc1 = self.build_fc1( + self.embed_dim, + args.decoder_ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + args.decoder_ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=export) + self.need_attn = True + + self.onnx_trace = False + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_self_attention( + self, embed_dim, args, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not getattr(args, "cross_self_attention", False), + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def build_encoder_attention(self, embed_dim, args): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def residual_connection(self, x, residual): + return residual + x + + def forward( + self, + x_list_tensor: List[torch.Tensor], + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[ + List[Dict[str, Dict[str, Optional[Tensor]]]] + ] = None, + prev_self_attn_state: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + ): + """ + Args: + x_list_tensor (List[Tensor]): list of input tensors in different channels, + each tensor is of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + incremental_state (optional): list of incremental_state dictionaries over + different channels (sequence generation mode) + prev_self_attn_state (List[Tuple[Tensor, Tensor]], optional): list of tuples + (self_attn_state, cross_channel_attn_state) over different channels + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + list of encoded output of shape `(seq_len, batch, embed_dim)` + """ + n_channels = len(x_list_tensor) + if need_head_weights: + need_attn = True + + # incremental_state is a list of dictionaries over different channels + if incremental_state is not None: + assert isinstance(incremental_state, list) + assert len(incremental_state) == n_channels + + # prev_self_attn_state is a list of self_attn_state over different channels + if prev_self_attn_state is not None: + assert isinstance(prev_self_attn_state, list) + assert len(prev_self_attn_state) == n_channels + + x_list = [] + attn_list = [] + for i, x in enumerate(x_list_tensor): + residual = x + + if self.normalize_before: + x = self.self_attn_layer_norm(x) + + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[i][:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state[i]) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state[i], saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer( + incremental_state + ) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), + dim=1, + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state[i] + if incremental_state is not None + else None, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + + x_list.append(x) + attn_list.append(attn) + + # Trick for the checkpoint activation + x_list_tensor = torch.stack(x_list) + if self.onnx_trace and incremental_state is not None: + self_attn_state_list = [] + for i in range(n_channels): + saved_state = self.self_attn._get_input_buffer(incremental_state[i]) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + ] + self_attn_state_list.append(self_attn_state) + return x_list_tensor, attn_list, self_attn_state_list + return x_list_tensor, attn_list, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/fairseq/fairseq/models/speech_dlm/sequence_generator/__init__.py b/fairseq/fairseq/models/speech_dlm/sequence_generator/__init__.py new file mode 100644 index 0000000..a88e144 --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/sequence_generator/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .multichannel_sequence_generator import * # noqa diff --git a/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_search.py b/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_search.py new file mode 100644 index 0000000..db4b77f --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_search.py @@ -0,0 +1,430 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional + +import torch +import torch.nn as nn +from torch import Tensor + + +class MultichannelSearch(nn.Module): + def __init__(self, tgt_dicts): + super().__init__() + tgt_dict = list(tgt_dicts.values())[0] + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() + for tgt_dict in tgt_dicts.values(): + assert self.pad == tgt_dict.pad() + assert self.unk == tgt_dict.unk() + assert self.eos == tgt_dict.eos() + self.vocab_sizes = {channel: len(tgt_dicts[channel]) for channel in tgt_dicts} + self.src_lengths = torch.tensor(-1) + self.supports_constraints = False + self.stop_on_max_len = False + + def step( + self, step, lprobs, scores, prev_output_tokens=None, original_batch_idxs=None + ): + """Take a single search step. + + Args: + step: the current search step, starting at 0 + lprobs: dictionary of channels {channel : (bsz x input_beam_size x vocab_size_channel)} + the model's log-probabilities over the vocabulary at the current step + scores: {channel : (bsz x input_beam_size x step)} + the historical model scores of each hypothesis up to this point + prev_output_tokens: {channel : (bsz x step)} + the previously generated oputput tokens + original_batch_idxs: (bsz) + the tensor with the batch indices, in the range [0, bsz) + this is useful in case there has been applied a re-ordering + and we need to know the orignal indices + + Return: A tuple of (scores, indices, beams) where: + scores: {channel : (bsz x output_beam_size)} + the scores of the chosen elements; output_beam_size can be + larger than input_beam_size, e.g., we may return + 2*input_beam_size to account for EOS + indices: {channel : (bsz x output_beam_size)} + the indices of the chosen elements + beams: (bsz x output_beam_size) + the hypothesis ids of the chosen elements, in the range [0, input_beam_size) + """ + raise NotImplementedError + + @torch.jit.export + def set_src_lengths(self, src_lengths): + self.src_lengths = src_lengths + + @torch.jit.export + def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int): + """Initialize constraint states for constrained decoding (if supported). + + Args: + batch_constraints: (torch.Tensor, optional) + the list of constraints, in packed form + beam_size: (int) + the beam size + Returns: + *encoder_out* rearranged according to *new_order* + """ + pass + + def prune_sentences(self, batch_idxs: Tensor): + """ + Removes constraint states for completed sentences (if supported). + This is called from sequence_generator._generate() when sentences are + deleted from the batch. + + Args: + batch_idxs: Indices of *sentences* whose constraint state should be *kept*. + """ + pass + + def update_constraints(self, active_hypos: Tensor): + """ + Updates the constraint states by selecting the beam items that are retained. + This is called at each time step of sequence_generator._generate() when + the set of 2 * {beam_size} candidate hypotheses are reduced to the beam size. + + Args: + active_hypos: (batch size, beam size) + list of integers denoting, for each sentence, which beam candidate items + should be kept. + """ + pass + + +def unravel_index(index, shape): + out = [] + for dim in reversed(shape): + out.append(index % dim) + index = index // dim + return torch.stack(tuple(reversed(out)), dim=-1) + + +def topk_sum(lprobs_list, k): + """ + lprobs_list = [lprobs_1,...,lprobs_n], where: + lprobs_1 : (batch_size x beam_size x vocab_1) + ... + lprobs_n : (batch_size x beam_size x vocab_n) + + Return: + - topk_values : (batch_size x k) + values of the topk sum of the form : + lprobs_1[bsz, beam_idx, vocab_1_idx] + ... + lprobs_n[bsz, beam_idx, vocab_n_idx] + - topk_idxs : (batch_size x k x n+1) + each (n+1)-tensor being [beam_idx, vocab_1_idx, ..., vocab_n_idx] + """ + # Reduce all lprobs to k candidates first to reduce later complexity + # We may assume that k << vocab + lprobs_topk_list = [] + lprobs_topk_indices_list = [] + for lprobs in lprobs_list: + k_i = min(k, lprobs.size(-1)) + topk_values, topk_indices = torch.topk(lprobs, k=k_i) + # topk_values : (batch_size x beam_size x k_i) + # topk_indices : (batch_size x beam_size x k_i) + lprobs_topk_list.append(topk_values) + lprobs_topk_indices_list.append(topk_indices) + + # Compute all possible sums + sum_lprobs_topk = lprobs_topk_list[0] + for i in range(1, len(lprobs_topk_list)): + unsqueezed_lprobs = lprobs_topk_list[i] + for _ in range(i): + unsqueezed_lprobs = unsqueezed_lprobs.unsqueeze(-2) + sum_lprobs_topk = sum_lprobs_topk.unsqueeze(-1) + unsqueezed_lprobs + # sum_lprobs : (batch_size x beam_size x k_1 x ... x k_n) + + # Get the top k sums and the (transformed indices) + topk_sum_values, topk_sum_indices = torch.topk( + sum_lprobs_topk.view(sum_lprobs_topk.size(0), -1), k=k + ) + # topk_sum_values : (batch_size x k) + # topk_sum_indices : (batch_size x k) + topk_sum_indices = unravel_index(topk_sum_indices, tuple(sum_lprobs_topk.shape[1:])) + # topk_sum_indices : (batch_size x k x n+1) + + # Convert the transformed indices to the true indices + for i_batch in range(topk_sum_indices.size(0)): + for i_cand in range(topk_sum_indices.size(1)): + i_beam, *transformed_vocab_indices = topk_sum_indices[i_batch, i_cand] + true_vocab_indices = [i_beam] + for j, transformed_vocab_j_idx in enumerate(transformed_vocab_indices): + true_vocab_j_idx = lprobs_topk_indices_list[j][ + i_batch, i_beam, transformed_vocab_j_idx + ] + true_vocab_indices.append(true_vocab_j_idx) + topk_sum_indices[i_batch, i_cand] = torch.tensor(true_vocab_indices) + + topk_sum_beams = topk_sum_indices[:, :, 0] + topk_sum_indices = topk_sum_indices[:, :, 1:] + + return topk_sum_values, topk_sum_indices, topk_sum_beams + + +class MultichannelBeamSearch(MultichannelSearch): + def __init__(self, tgt_dicts): + super().__init__(tgt_dicts) + self.constraint_states = None + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores: Optional[Dict[str, Tensor]], + prev_output_tokens: Optional[Dict[str, Tensor]] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + channels = list(lprobs.keys()) + bsz, beam_size, _ = lprobs[channels[0]].size() + + lprobs_list = [] + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + for channel in channels: + lprobs_list.append(lprobs[channel][:, ::beam_size, :].contiguous()) + else: + # make probs contain cumulative scores for each hypothesis + assert scores is not None + for channel in channels: + lprobs_list.append( + lprobs[channel] + scores[channel][:, :, step - 1].unsqueeze(-1) + ) + + topk_sum_values, topk_sum_indices, topk_sum_beams = topk_sum( + lprobs_list, k=beam_size * 2 + ) + + beams_buf = topk_sum_beams + scores_buf = {} + indices_buf = {} + for i, channel in enumerate(channels): + indices_buf[channel] = topk_sum_indices[:, :, i] + scores_buf[channel] = ( + torch.tensor( + [ + lprobs_list[i][i_batch, i_beam, i_index] + for i_batch in range(bsz) + for i_beam, i_index in zip( + beams_buf[i_batch], indices_buf[channel][i_batch] + ) + ] + ) + .view(bsz, -1) + .to(lprobs_list[i].device) + ) + + # At this point, beams_buf and indices_buf are single-dim and contain relative indices + return scores_buf, indices_buf, beams_buf + + +class ContiguousMultichannelBeamSearch(MultichannelSearch): + def __init__(self, tgt_dicts): + super().__init__(tgt_dicts) + self.constraint_states = None + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores: Optional[Tensor], + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + n_channels = len(lprobs) + bsz, beam_size, _ = lprobs[0].size() + + lprobs_list = [] + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + for i in range(n_channels): + lprobs_list.append(lprobs[i][:, ::beam_size, :].contiguous()) + else: + # make probs contain cumulative scores for each hypothesis + assert scores is not None + for i in range(n_channels): + lprobs_list.append(lprobs[i] + scores[:, :, step - 1, i].unsqueeze(-1)) + + topk_sum_values, topk_sum_indices, topk_sum_beams = topk_sum( + lprobs_list, k=beam_size * 2 + ) + + beams_buf = topk_sum_beams + indices_buf = topk_sum_indices + scores_buf = ( + torch.tensor( + [ + lprobs_list[i][i_batch, i_beam, i_index] + for i in range(len(lprobs_list)) + for i_batch in range(bsz) + for i_beam, i_index in zip( + beams_buf[i_batch], indices_buf[i_batch, :, i] + ) + ] + ) + .view(len(lprobs_list), bsz, -1) + .permute(1, 2, 0) + .to(lprobs_list[0].device) + ) + + # At this point, beams_buf and indices_buf are single-dim and contain relative indices + return scores_buf, indices_buf, beams_buf + + +class ContiguousMultichannelSampling(MultichannelSearch): + sampling_topk: int + sampling_topp: float + + def __init__(self, tgt_dicts, sampling_topk=-1, sampling_topp=-1.0): + super().__init__(tgt_dicts) + self.sampling_topk = sampling_topk + self.sampling_topp = sampling_topp + + def _sample_topp(self, lprobs): + """Sample among the smallest set of elements whose cumulative probability mass exceeds p. + + See `"The Curious Case of Neural Text Degeneration" + (Holtzman et al., 2019) <https://arxiv.org/abs/1904.09751>`_. + + Args: + lprobs: (bsz x input_beam_size x vocab_size) + the model's log-probabilities over the vocabulary at the current step + + Return: A tuple of (trimed_probs, truncated_indices) where: + trimed_probs: (bsz x input_beam_size x ?) + the model's probabilities over the elements selected to sample from. The + width of the third dimension is determined by top-P. + truncated_indices: (bsz x input_beam_size x ?) + the indices of the chosen elements. + """ + probs = lprobs.exp_() + + # sort the last dimension (vocab dimension) in descending order + sorted_probs, sorted_indices = probs.sort(descending=True) + + # compute a mask to indicate the words to be included in the top-P set. + cumsum_probs = sorted_probs.cumsum(dim=2) + mask = cumsum_probs.lt(self.sampling_topp) + + # note that mask was computed by 'lt'. One more word needs to be included + # so that the cumulative probability mass can exceed p. + cumsum_mask = mask.cumsum(dim=2) + last_included = cumsum_mask[:, :, -1:] + last_included.clamp_(0, mask.size()[2] - 1) + mask = mask.scatter_(2, last_included, 1) + + # truncate unnecessary dims. + max_dim = last_included.max() + truncated_mask = mask[:, :, : max_dim + 1] + truncated_probs = sorted_probs[:, :, : max_dim + 1] + truncated_indices = sorted_indices[:, :, : max_dim + 1] + + # trim the words that are not in top-P by setting their probabilities + # to 0, so that they would not be sampled later. + trim_mask = ~truncated_mask + trimed_probs = truncated_probs.masked_fill_(trim_mask, 0) + return trimed_probs, truncated_indices + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores, + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + n_channels = len(lprobs) + bsz, beam_size, vocab_size = lprobs[0].size() + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + for i in range(n_channels): + lprobs[i] = lprobs[i][:, ::beam_size, :].contiguous() + + probs = [] + top_indices = [] + for i in range(n_channels): + if self.sampling_topp > 0: + # only sample from the smallest set of words whose cumulative probability mass exceeds p + probs_i, top_indices_i = self._sample_topp(lprobs[i]) + elif self.sampling_topk > 0: + # only sample from top-k candidates + lprobs[i], top_indices_i = lprobs[i].topk( + min(self.sampling_topk, lprobs[i].size(-1)) + ) + probs_i = lprobs[i].exp_() + else: + probs_i = lprobs[i].exp_() + + # dummy data to be consistent with true branch for type check + top_indices_i = torch.empty(0).to(probs_i) + probs.append(probs_i) + top_indices.append(top_indices_i) + # sample + indices_buf = [] + for i in range(n_channels): + if step == 0: + indices_buf.append( + torch.multinomial( + probs[i].view(bsz, -1), + beam_size, + replacement=True, + ).view(bsz, beam_size) + ) + else: + indices_buf.append( + torch.multinomial( + probs[i].view(bsz * beam_size, -1), + 1, + replacement=True, + ).view(bsz, beam_size) + ) + + if step == 0: + for i in range(n_channels): + # expand to beam size + probs[i] = probs[i].expand(bsz, beam_size, -1) + + # gather scores + scores_buf = [] + for i in range(n_channels): + scores_buf.append( + torch.gather(probs[i], dim=2, index=indices_buf[i].unsqueeze(-1)) + ) + scores_buf[i] = scores_buf[i].log_().view(bsz, -1) + + # remap indices if using top-k or top-P sampling + if self.sampling_topk > 0 or self.sampling_topp > 0: + for i in range(n_channels): + indices_buf[i] = torch.gather( + top_indices[i].expand(bsz, beam_size, -1), + dim=2, + index=indices_buf[i].unsqueeze(-1), + ).squeeze(2) + + if step == 0: + beams_buf = indices_buf[0].new_zeros(bsz, beam_size) + else: + beams_buf = torch.arange(0, beam_size).to(indices_buf[0]).repeat(bsz, 1) + # make scores cumulative + for i in range(n_channels): + scores_buf[i].add_( + torch.gather(scores[:, :, step - 1, i], dim=1, index=beams_buf) + ) + scores_buf = torch.stack(scores_buf, dim=-1) + indices_buf = torch.stack(indices_buf, dim=-1) + + return scores_buf, indices_buf, beams_buf diff --git a/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_sequence_generator.py b/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_sequence_generator.py new file mode 100644 index 0000000..24807b8 --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/sequence_generator/multichannel_sequence_generator.py @@ -0,0 +1,1110 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Dict, List, Optional + +from omegaconf.listconfig import ListConfig +from omegaconf.dictconfig import DictConfig + +import torch +import torch.nn as nn +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock +from .multichannel_search import ContiguousMultichannelBeamSearch +from fairseq.models.speech_dlm import SpeechDLM + + +class MultichannelSequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dicts, + beam_size=1, + max_len_a=0, + max_len_b=200, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + duration_temperature=1.0, + ): + """Generate multi-channel parallel units with the SpeechDLM model + as described in the paper: https://arxiv.org/pdf/2203.16502.pdf; + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + duration_temperature (float, optional): rate of the duration prediction, + higher rate induces a faster generated wav (default: 1.0) + """ + super().__init__() + if isinstance(models, MultichannelEnsembleModel): + self.model = models + else: + self.model = MultichannelEnsembleModel(models) + self.tgt_dicts = tgt_dicts + self.pad = list(tgt_dicts.values())[0].pad() + self.unk = list(tgt_dicts.values())[0].unk() + self.eos = list(tgt_dicts.values())[0].eos() if eos is None else eos + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + self.channels = list(tgt_dicts.keys()) + self.n_channels = len(self.channels) + self.vocab_sizes = [len(tgt_dicts[channel]) for channel in self.channels] + # the max beam size is the dictionary size - 1, since we never select pad + max_possible_beam_size = 1 + for i in self.vocab_sizes: + max_possible_beam_size *= i - 1 + self.beam_size = min(beam_size, max_possible_beam_size) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + if isinstance(temperature, (int, float)): + temperature = {channel: temperature for channel in self.channels} + elif isinstance(temperature, ListConfig) or isinstance(temperature, list): + temperature = { + channel: temperature[i] for i, channel in enumerate(self.channels) + } + assert isinstance(temperature, DictConfig) or isinstance( + temperature, dict + ), f"temperature: expected dict, but found {type(temperature)}" + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + for channel in temperature: + assert temperature[channel] > 0, "--temperature must be greater than 0" + + if search_strategy is None: + self.search = ContiguousMultichannelBeamSearch(tgt_dicts) + else: + self.search = search_strategy + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + if self.lm_model is not None: + self.lm_model.eval() + + self.duration_prediction = bool( + str(getattr(models[0].decoder.args, "duration_prediction", "false")).lower() + == "true" + ) + self.delayed_duration = bool( + str( + getattr(models[0].decoder.args, "delayed_duration_target", "false") + ).lower() + == "true" + ) + self.duration_temperature = duration_temperature + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], # TODO: Modify this + prefix_tokens: Optional[Dict[str, Tensor]] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (dict of torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + @torch.no_grad() + def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs): + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (dict of torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Dict[str, Tensor]] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """ + Here sample is expected to have the following form + { + 'id': index, + 'net_input': { + 'src_tokens': { + 'channel1' : tensor((batch x src_length)), + 'channel2' : tensor((batch x src_length)), + }, + ... + }, + } + and prefix_tokens + { + 'channel1' : tensor((batch x prefix_length)), + 'channel2' : tensor((batch x prefix_length)), + } + """ + if self.model.is_speech_dlm: + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [{} for _ in range(self.n_channels)], + ) + for i in range(self.model.models_size) + ], + ) + else: + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + # Convert from dict to tensor form + # shape of src_tokens : (bsz x src_len x n_channels) + src_tokens = torch.stack( + [net_input["src_tokens"][channel] for channel in self.channels], dim=-1 + ) + prefix_tokens = torch.stack( + [prefix_tokens[channel] for channel in self.channels], dim=-1 + ) + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens[..., 0].ne(self.eos) & src_tokens[..., 0].ne(self.pad)) + .long() + .sum(dim=1) + ) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + # exclude the EOS marker + self.model.max_decoder_positions() - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + encoder_outs = self.model.forward_encoder(net_input) + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + # cumulative scores of hypotheses + scores = ( + torch.zeros(bsz * beam_size, max_len + 1, self.n_channels) + .to(src_tokens) + .float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2, self.n_channels) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + finished = [ + False for i in range(bsz) + ] # a boolean array indicating if the sentence at the index is finished or not + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + if self.duration_prediction: + dur_counter = torch.ones(bsz * beam_size, self.n_channels).to(src_tokens) + # save the indice where the dur_counter just copied from dur_pred + dur_counter_jump_indices = None + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + + input_tokens = { + channel: tokens[:, : step + 1, i] + for i, channel in enumerate(self.channels) + } + + lprobs_dict, avg_attn_scores = self.model.forward_decoder( + input_tokens, + encoder_outs, + incremental_states, + self.temperature, + ) + + # Because the sizes of vocab is different, we cannot concat the lprobs to form a single tensor + if not self.duration_prediction: + lprobs_list = list(lprobs_dict.values()) + else: + lprobs_list = [ + net_output["pred_token"] for net_output in lprobs_dict.values() + ] + + # non-positive predicted durations + dur_preds = ( + torch.stack( + [ + net_output["pred_duration"] + for net_output in lprobs_dict.values() + ] + ) + .squeeze(-1) + .T + ) + dur_preds = dur_preds / self.duration_temperature + dur_preds = dur_preds.round().long() + dur_preds[dur_preds < 1] = 1 + + # dur_preds & dur_counter needs to be modified when there isn't an edge + if step > 0: + non_edge_indices = tokens[:, step, :] == tokens[:, step - 1, :] + if self.delayed_duration: + dur_preds[non_edge_indices] = 1 + else: + if dur_counter_jump_indices is not None: + dur_counter[dur_counter_jump_indices & non_edge_indices] = 2 + + # update dur_counter + if step > 0: + if self.delayed_duration: + dur_counter -= ( + (dur_counter == 1) + | (tokens[:, step, :] == tokens[:, step - 1, :]) + ).int() + dur_counter[dur_counter < 0] = 0 + else: + dur_counter -= ( + tokens[:, step, :] == tokens[:, step - 1, :] + ).int() + dur_counter[dur_counter < 1] = 1 + + # whether to copy previous token (ie. if the counter is still on) + # and get get the new duration + if self.delayed_duration: + dur_counter_jump_indices = dur_counter == 0 + dur_counter[dur_counter_jump_indices] = dur_preds[ + dur_counter_jump_indices + ] + + # whether to copy previous token in this step + copy_prev_token = dur_counter != 1 + if self.delayed_duration is False: + dur_counter_jump_indices = dur_counter == 1 + dur_counter[dur_counter_jump_indices] = dur_preds[ + dur_counter_jump_indices + ] + # else: + # dur_counter[dur_counter==0] = dur_preds[dur_counter==0] - 1 + # copy_prev_token = (dur_counter > 0) + + if self.lm_model is not None: + assert False, "Currently not supported in multichannelLM case" + + for i in range(self.n_channels): + lprobs_list[i][lprobs_list[i] != lprobs_list[i]] = torch.tensor( + -math.inf + ).to(lprobs_list[i]) + + lprobs_list[i][:, self.pad] = -math.inf # never select pad + lprobs_list[i][:, self.unk] -= self.unk_penalty # apply unk penalty + + # handle max length constraint + if step >= max_len: + lprobs_list[i][:, : self.eos] = -math.inf + lprobs_list[i][:, self.eos + 1 :] = -math.inf + else: + lprobs_list[i][ + :, self.eos + ] = -math.inf # quick fix for short generation + + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + ( + lprobs_list[i], + tokens[..., i], + scores[..., i], + ) = self._prefix_tokens( + step, + lprobs_list[i], + scores[..., i], + tokens[..., i], + prefix_tokens[..., i], + beam_size, + ) + if self.duration_prediction: + # Can copy previous token if the prefix token is padding or unk (1-channel conditionned case) + can_copy_mask = ( + prefix_tokens[:, step, i].eq(self.pad) + | prefix_tokens[:, step, i].eq(self.unk) + ).repeat_interleave(beam_size) + copy_prev_token[:, i] &= can_copy_mask + elif step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs_list[i][:, self.eos] = -math.inf + + if self.duration_prediction: + if step < max_len: + for j in range(copy_prev_token.size(0)): + if copy_prev_token[j, i]: + prev_token = tokens[j, step, i] + lprobs_list[i][j, :prev_token] = -math.inf + lprobs_list[i][j, prev_token + 1 :] = -math.inf + # lprobs_list[i][j, prev_token] = 0. + # dur_counter[j,i] -= 1 + # else: + # prev_token = tokens[j, step, i] + # if not (lprobs_list[i][j,:].ne(-math.inf).nonzero() == prev_token).all(): + # lprobs_list[i][j, prev_token] = -math.inf + # dur_counter[j,i] = 0. + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs_list[0]) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + for i in range(self.n_channels): + lprobs_list[i] = self.repeat_ngram_blocker( + tokens, lprobs_list[i], bsz, beam_size, step + ) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + [ + lprobs_list[i].view(bsz, -1, self.vocab_sizes[i]) + for i in range(self.n_channels) + ], + scores.view(bsz, beam_size, -1, self.n_channels)[:, :, :step, :], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask = torch.any(eos_mask, dim=-1, keepdim=False) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.stack( + [ + torch.masked_select( + cand_scores[:, :beam_size, i], mask=eos_mask[:, :beam_size] + ) + for i in range(self.n_channels) + ], + dim=-1, + ) + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, -1, self.n_channels + ) + tokens = tokens.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, -1, self.n_channels + ) + if self.duration_prediction: + dur_counter = dur_counter.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, self.n_channels + ) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_bbsz_idx = active_bbsz_idx.view(-1) + + # active_scores = torch.stack([ + # torch.gather(cand_scores[...,0], dim=1, index=active_hypos) + # for i in range(self.n_channels) + # ], dim = -1) + # active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + for i in range(self.n_channels): + tokens.view(bsz, beam_size, -1, self.n_channels)[ + :, :, step + 1, i + ] = torch.gather(cand_indices[..., i], dim=1, index=active_hypos) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + for i in range(self.n_channels): + scores.view(bsz, beam_size, -1, self.n_channels)[ + :, :, step, i + ] = torch.gather(cand_scores[..., i], dim=1, index=active_hypos) + + if self.duration_prediction: + dur_counter = torch.index_select( + dur_counter, dim=0, index=active_bbsz_idx + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + # used for 1-channel generation, do not force the unk token (i.e. unk tokens are changed) + prefix_mask &= prefix_toks.ne(self.unk) + # zeroing the copying tokens + # if step > 0: + # copy_mask = (prefix_tokens[:, step] == prefix_tokens[:, step-1]).unsqueeze(-1).repeat(1, beam_size).view(-1) + # prefix_lprobs[copy_mask & prefix_mask] = 0. + lprobs[prefix_mask] = torch.tensor(-math.inf).to(lprobs) + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # shouldn't stop at unk token + unk_mask = prefix_toks.eq(self.unk) + if len(lprobs[unk_mask]) > 0: + # otherwise it won't assign to lprobs, + # see: https://discuss.pytorch.org/t/how-to-mask-and-assign-a-value-to-tensor/18437 + copy_lprobs = lprobs[unk_mask][:, :] + copy_lprobs[:, self.eos] = -math.inf + lprobs[unk_mask] = copy_lprobs + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.size(0) + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step, :] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + + # The keys here are of the form "{sent}_{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # set() is not supported in script export + sents_seen: Dict[str, Optional[Tensor]] = {} + + # For every finished beam item + for i in range(bbsz_idx.size()[0]): + idx = bbsz_idx[i] + score = eos_scores[i].sum() + # sentence index in the current (possibly reduced) batch + unfin_idx = idx // beam_size + # sentence index in the original (unreduced) batch + sent = unfin_idx + cum_unfin[unfin_idx] + # Cannot create dict for key type '(int, int)' in torchscript. + # The workaround is to cast int to string + seen = str(sent.item()) + "_" + str(unfin_idx.item()) + if seen not in sents_seen: + sents_seen[seen] = None + + if self.match_source_len and step > src_lengths[unfin_idx]: + score = torch.tensor(-math.inf).to(score) + + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent].append( + { + "tokens": tokens_clone[i], + "score": score, + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + + for seen in sents_seen.keys(): + # check termination conditions for this sentence + sent: int = int(float(seen.split("_")[0])) + unfin_idx: int = int(float(seen.split("_")[1])) + + if not finished[sent] and self.is_finished( + step, unfin_idx, max_len, len(finalized[sent]), beam_size + ): + finished[sent] = True + newly_finished.append(unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class MultichannelEnsembleModel(nn.Module): + """A wrapper around an ensemble of SpeechDLM models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + if isinstance(models[0], SpeechDLM): + self.is_speech_dlm = True + # Otherwise it's a multi-channel language model (without cross-prediction outputs) + else: + self.is_speech_dlm = False + + if getattr(models[0].decoder.args, "duration_prediction", False): + self.is_duration_prediction = True + else: + self.is_duration_prediction = False + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min([m.max_decoder_positions() for m in self.models]) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: Dict[str, float] = 1.0, + ): + if isinstance(temperature, (float, int)): + temperature = {channel: temperature for channel in tokens} + log_probs = {channel: [] for channel in tokens} + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + if self.is_speech_dlm: + if self.is_duration_prediction: + decoder_out_divided_by_temperature = { + channel_src: { + channel_pred: { + "pred_token": decoder_out[0][channel_src][channel_pred][ + "pred_token" + ][:, -1:, :].div_(temperature[channel_pred]), + "pred_duration": decoder_out[0][channel_src][ + channel_pred + ]["pred_duration"][:, -1:, :], + } + for channel_pred in decoder_out[0][channel_src] + } + for channel_src in decoder_out[0] + } + else: + decoder_out_divided_by_temperature = { + channel_src: { + channel_pred: decoder_out[0][channel_src][channel_pred][ + :, -1:, : + ].div_(temperature[channel_pred]) + for channel_pred in decoder_out[0][channel_src] + } + for channel_src in decoder_out[0] + } + else: + decoder_out_divided_by_temperature = { + channel: decoder_out[0][channel][:, -1:, :].div_( + temperature[channel] + ) + for channel in decoder_out[0] + } + decoder_out_tuple = ( + decoder_out_divided_by_temperature, + None if decoder_len <= 1 else decoder_out[1], + ) + + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + + if self.is_speech_dlm: + if self.is_duration_prediction: + probs = { + channel: { + "pred_token": probs[channel][channel]["pred_token"][ + :, -1, : + ], + "pred_duration": probs[channel][channel]["pred_duration"][ + :, -1, : + ], + } + for channel in probs + } + else: + probs = { + channel: probs[channel][channel][:, -1, :] for channel in probs + } + else: + probs = {channel: probs[channel][:, -1, :] for channel in probs} + if self.models_size == 1: + return probs, attn + + for channel in probs: + log_probs[channel].append(probs[channel]) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = {} + for channel in log_probs: + avg_probs[channel] = torch.logsumexp( + torch.stack(log_probs[channel], dim=0), dim=0 + ) - math.log(self.models_size) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) diff --git a/fairseq/fairseq/models/speech_dlm/speech_dlm.py b/fairseq/fairseq/models/speech_dlm/speech_dlm.py new file mode 100644 index 0000000..dc13f56 --- /dev/null +++ b/fairseq/fairseq/models/speech_dlm/speech_dlm.py @@ -0,0 +1,280 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from dataclasses import dataclass, field +from typing import Optional + +from fairseq import utils +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import Embedding +from .modules.speech_dlm_decoder import CrossChannelTransformerDecoder +from omegaconf import II + + +DEFAULT_MAX_TARGET_POSITIONS = 1024 + +logger = logging.getLogger(__name__) + + +@dataclass +class SpeechDLMConfig(FairseqDataclass): + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="relu", metadata={"help": "activation function to use"} + ) + dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) + attention_dropout: float = field( + default=0.0, metadata={"help": "dropout probability for attention weights"} + ) + activation_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN."} + ) + relu_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN."} + ) + decoder_embed_dim: int = field( + default=512, metadata={"help": "decoder embedding dimension"} + ) + decoder_output_dim: int = field( + default=512, metadata={"help": "decoder output dimension"} + ) + decoder_input_dim: int = field( + default=512, metadata={"help": "decoder input dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=2048, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num decoder layers"}) + decoder_cross_layers: int = field( + default=-1, metadata={"help": "num self cross attention decoder layers"} + ) + decoder_attention_heads: int = field( + default=8, metadata={"help": "num decoder attention heads"} + ) + decoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each decoder block"} + ) + no_decoder_final_norm: bool = field( + default=False, + metadata={"help": "don't add an extra layernorm after the last decoder block"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings (outside self attention)" + }, + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "LayerDrop probability for decoder"} + ) + decoder_layers_to_keep: Optional[str] = field( + default=None, + metadata={ + "help": "which layers to *keep* when pruning as a comma-separated list" + }, + ) + layernorm_embedding: bool = field( + default=False, metadata={"help": "add layernorm to embedding"} + ) + no_scale_embedding: bool = field( + default=False, metadata={"help": "if True, dont scale embeddings"} + ) + checkpoint_activations: bool = field( + default=False, metadata={"help": "checkpoint activations at each layer"} + ) + offload_activations: bool = field( + default=False, + metadata={"help": "move checkpointed activations to CPU after they are used."}, + ) + quant_noise_pq: float = field( + default=0.0, + metadata={"help": "iterative PQ quantization noise at training time"}, + ) + quant_noise_pq_block_size: int = field( + default=8, + metadata={"help": "block size of quantization noise at training time"}, + ) + # TODO common var add to parent + quant_noise_scalar: float = field( + default=0.0, + metadata={ + "help": "scalar quantization noise and scalar quantization at training time" + }, + ) + add_bos_token: bool = II("task.add_bos_token") + tokens_per_sample: int = II("task.tokens_per_sample") + max_target_positions: Optional[int] = II("task.max_target_positions") + tpu: bool = II("common.tpu") + duration_prediction: str = II("task.duration_prediction") + delayed_duration_target: str = II("task.delayed_duration_target") + main_and_cross_weights: str = II("criterion.main_and_cross_weights") + + +@register_model("speech_dlm", dataclass=SpeechDLMConfig) +class SpeechDLM(FairseqLanguageModel): + """Spoken Unit-based Dialogue Language Model model (SpeechDLM) as described + in the paper: https://arxiv.org/pdf/2203.16502.pdf + """ + + def __init__(self, decoder): + super().__init__(decoder) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + # make sure all arguments are present in older models + base_lm_architecture(args) + + if args.decoder_layers_to_keep: + args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) + + if args.decoder_cross_layers < 0: + args.decoder_cross_layers = args.decoder_layers + + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + + # Assert all dictionary to be the same + assert all( + task.source_dictionaries[channel] == task.source_dictionary + for channel in task.channels + ), "Source dictionaries of all channels are expected to be the same!!!" + assert all( + task.target_dictionaries[channel] == task.target_dictionary + for channel in task.channels + ), "Target dictionaries of all channels are expected to be the same!!!" + # Build the unit embeddings + embed_tokens = cls.build_embedding( + args, task.source_dictionary, args.decoder_input_dim + ) + + decoder = CrossChannelTransformerDecoder( + args, + task.target_dictionary, + embed_tokens, + channels=task.channels, + no_encoder_attn=True, + ) + return cls(decoder) + + @classmethod + def build_embedding(cls, args, dictionary, embed_dim, path=None): + embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad()) + return embed_tokens + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + **kwargs, + ): + """ + Load a :class:`~fairseq.models.FairseqModel` from a pre-trained model + file. Downloads and caches the pre-trained model file if needed. + + The base implementation returns a + :class:`~fairseq.hub_utils.GeneratorHubInterface`, which can be used to + generate translations or sample from language models. The underlying + :class:`~fairseq.models.FairseqModel` can be accessed via the + *generator.models* attribute. + + This function return a class:`MultichannelGeneratorHubInterface` object, + which allows generation in multiple channels with a multichannel model. + + Args: + model_name_or_path (str): either the name of a pre-trained model to + load or a path/URL to a pre-trained model state dict + checkpoint_file (str, optional): colon-separated list of checkpoint + files in the model archive to ensemble (default: 'model.pt') + data_name_or_path (str, optional): point args.data to the archive + at the given path/URL. Can start with '.' or './' to reuse the + model archive path. + """ + from fairseq import hub_utils + from .hub_interface import MultichannelGeneratorHubInterface + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + **kwargs, + ) + logger.info(x["args"]) + return MultichannelGeneratorHubInterface(x["args"], x["task"], x["models"]) + + @property + def supported_targets(self): + return {"next", "edge", "duration"} + + +def base_lm_architecture(args): + # backward compatibility for older model checkpoints + if hasattr(args, "decoder_final_norm"): + args.no_decoder_final_norm = not args.decoder_final_norm + + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_cross_layers = getattr(args, "decoder_cross_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0) + args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8) + args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0) + + args.add_bos_token = getattr(args, "add_bos_token", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + # Model training is not stable without this + args.decoder_normalize_before = True + args.no_decoder_final_norm = getattr(args, "no_decoder_final_norm", False) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.checkpoint_activations = getattr(args, "checkpoint_activations", False) + args.offload_activations = getattr(args, "offload_activations", False) + if args.offload_activations: + args.checkpoint_activations = True + + +@register_model_architecture("speech_dlm", "speech_dlm_big") +def speech_dlm_big(args): + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.decoder_cross_layers = getattr(args, "decoder_cross_layers", 12) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + base_lm_architecture(args) diff --git a/fairseq/fairseq/models/speech_to_speech/__init__.py b/fairseq/fairseq/models/speech_to_speech/__init__.py new file mode 100644 index 0000000..f29215c --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .s2s_conformer import * # noqa +from .s2s_conformer_translatotron2 import * # noqa +from .s2s_conformer_unity import * # noqa +from .s2s_transformer import * # noqa diff --git a/fairseq/fairseq/models/speech_to_speech/modules/__init__.py b/fairseq/fairseq/models/speech_to_speech/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq/models/speech_to_speech/modules/ctc_decoder.py b/fairseq/fairseq/models/speech_to_speech/modules/ctc_decoder.py new file mode 100644 index 0000000..721efbf --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/modules/ctc_decoder.py @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from torch import nn + +from fairseq.models import FairseqEncoder + + +class CTCDecoder(FairseqEncoder): + def __init__(self, dictionary, in_dim): + super().__init__(dictionary) + self.proj = nn.Linear(in_dim, len(dictionary)) + + def forward(self, src_tokens, src_lengths=None, **kwargs): + encoder_out = self.proj(src_tokens) + return {"encoder_out": encoder_out} diff --git a/fairseq/fairseq/models/speech_to_speech/modules/stacked_embedding.py b/fairseq/fairseq/models/speech_to_speech/modules/stacked_embedding.py new file mode 100644 index 0000000..5955a08 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/modules/stacked_embedding.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from torch import nn + +from fairseq.models.transformer import Linear + + +class StackedEmbedding(nn.Embedding): + """Embedding module that supports stacked units -> single embedding""" + + def __init__(self, num_embeddings, embed_dim, padding_idx, num_stacked=1): + super().__init__(num_embeddings, embed_dim, padding_idx) + # follow transformer.Embedding + nn.init.normal_(self.weight, mean=0, std=embed_dim**-0.5) + nn.init.constant_(self.weight[padding_idx], 0) + + self.offset = ( + 4 # skip <bos>, <pad>, <eos>, <unk>, specific to fairseq dictionary + ) + self.vocab_size = num_embeddings - self.offset + self.num_stacked = num_stacked + + if self.num_stacked > 1: + self.project_in_dim = Linear(embed_dim * num_stacked, embed_dim, bias=False) + + def forward(self, input): + if self.num_stacked == 1: + return super().forward(input) + + # expand input indices + mask = input >= self.offset + stacked_input = [] + cum_input = input.new_zeros(input.shape) + for i in range(1, self.num_stacked + 1): + div = pow(self.vocab_size, i) + next_input = torch.remainder(input - self.offset - cum_input, div) + cum_input += next_input + next_input = torch.floor_divide(next_input, div // self.vocab_size) + stacked_input.append((next_input + self.offset) * mask + input * ~mask) + + stacked_input = torch.stack(stacked_input[::-1], dim=2) + embed = super().forward(stacked_input).view(input.size(0), input.size(1), -1) + embed = self.project_in_dim(embed) + return embed diff --git a/fairseq/fairseq/models/speech_to_speech/modules/transformer_decoder_aug.py b/fairseq/fairseq/models/speech_to_speech/modules/transformer_decoder_aug.py new file mode 100644 index 0000000..68f42c2 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/modules/transformer_decoder_aug.py @@ -0,0 +1,108 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, List, Optional + +from torch import Tensor + +from fairseq.models.transformer import Linear +from fairseq.models.transformer.transformer_decoder_aug import AugTransformerDecoder + + +class AugTransformerUnitDecoder(AugTransformerDecoder): + """Based on Transformer decoder, with support to decoding stacked units""" + + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn, output_projection + ) + self.n_frames_per_step = args.n_frames_per_step + + self.out_proj_n_frames = ( + Linear( + self.output_embed_dim, + self.output_embed_dim * self.n_frames_per_step, + bias=False, + ) + if self.n_frames_per_step > 1 + else None + ) + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + encoder_out_aug: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + encoder_out_aug=encoder_out_aug, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + bsz, seq_len, d = x.size() + if self.out_proj_n_frames: + x = self.out_proj_n_frames(x) + x = self.output_layer(x.view(bsz, seq_len, self.n_frames_per_step, d)) + x = x.view(bsz, seq_len * self.n_frames_per_step, -1) + if ( + incremental_state is None and self.n_frames_per_step > 1 + ): # teacher-forcing mode in training + x = x[ + :, : -(self.n_frames_per_step - 1), : + ] # remove extra frames after <eos> + + return x, extra + + def upgrade_state_dict_named(self, state_dict, name): + if self.n_frames_per_step > 1: + move_keys = [ + ( + f"{name}.project_in_dim.weight", + f"{name}.embed_tokens.project_in_dim.weight", + ) + ] + for from_k, to_k in move_keys: + if from_k in state_dict and to_k not in state_dict: + state_dict[to_k] = state_dict[from_k] + del state_dict[from_k] diff --git a/fairseq/fairseq/models/speech_to_speech/modules/transformer_encoder.py b/fairseq/fairseq/models/speech_to_speech/modules/transformer_encoder.py new file mode 100644 index 0000000..fb1af43 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/modules/transformer_encoder.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + +from fairseq.models import FairseqEncoder +from fairseq.modules import LayerNorm, TransformerEncoderLayer + + +class TransformerEncoderNoEmb(FairseqEncoder): + """Transformer encoder without token embeddings.""" + + def __init__(self, args): + super().__init__(None) + + self.layers = nn.ModuleList( + [TransformerEncoderLayer(args) for _ in range(args.encoder_layers)] + ) + if args.encoder_normalize_before: + self.layer_norm = LayerNorm(args.encoder_embed_dim) + else: + self.layer_norm = None + + def forward(self, x, encoder_padding_mask, return_all_hiddens=False): + + encoder_states = [] + + for layer in self.layers: + x = layer(x, encoder_padding_mask) + if return_all_hiddens: + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask] + if encoder_padding_mask is not None and encoder_padding_mask.any() + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + def reorder_encoder_out(self, encoder_out, new_order): + new_encoder_out = ( + [] + if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + + new_encoder_padding_mask = ( + [] + if len(encoder_out["encoder_padding_mask"]) == 0 + else [ + x.index_select(0, new_order) + for x in encoder_out["encoder_padding_mask"] + ] + ) + + new_encoder_embedding = ( + [] + if len(encoder_out["encoder_embedding"]) == 0 + else [ + x.index_select(0, new_order) for x in encoder_out["encoder_embedding"] + ] + ) + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], # B x T + "src_lengths": [], # B x 1 + } diff --git a/fairseq/fairseq/models/speech_to_speech/s2s_conformer.py b/fairseq/fairseq/models/speech_to_speech/s2s_conformer.py new file mode 100644 index 0000000..636396d --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/s2s_conformer.py @@ -0,0 +1,172 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from pathlib import Path + +import torch + +from fairseq import checkpoint_utils +from fairseq.models import register_model, register_model_architecture +from fairseq.models.speech_to_speech.s2s_transformer import ( + S2SpecTTransformerModel, + S2UTTransformerModel, + s2spect_architecture_base, + s2ut_architecture_base, +) +from fairseq.models.speech_to_text import S2TConformerEncoder +from fairseq.models.transformer import Linear + +logger = logging.getLogger(__name__) + + +def build_s2s_conformer_encoder(args): + encoder = S2SConformerEncoder(args) + pretraining_path = getattr(args, "load_pretrained_encoder_from", None) + if pretraining_path is not None: + if not Path(pretraining_path).exists(): + logger.warning( + f"skipped pretraining because {pretraining_path} does not exist" + ) + else: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=pretraining_path + ) + logger.info(f"loaded pretrained encoder from: {pretraining_path}") + return encoder + + +class S2SConformerEncoder(S2TConformerEncoder): + """Based on S2T transformer encoder, with support + to incorporate target speaker embedding.""" + + def __init__(self, args): + super().__init__(args) + + self.spk_emb_proj = None + if args.target_speaker_embed: + self.spk_emb_proj = Linear( + args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim + ) + + def forward( + self, src_tokens, src_lengths, tgt_speaker=None, return_all_hiddens=False + ): + out = super().forward(src_tokens, src_lengths, return_all_hiddens) + + if self.spk_emb_proj: + x = out["encoder_out"][0] + seq_len, bsz, _ = x.size() + tgt_speaker_emb = tgt_speaker.view(1, bsz, -1).expand(seq_len, bsz, -1) + x = self.spk_emb_proj(torch.cat([x, tgt_speaker_emb], dim=2)) + out["encoder_out"][0] = x + + return out + + +@register_model("s2ut_conformer") +class S2UTConformerModel(S2UTTransformerModel): + """ + Direct speech-to-speech translation model with Conformer encoder + Transformer discrete unit decoder + """ + + @staticmethod + def add_args(parser): + S2UTTransformerModel.add_args(parser) + parser.add_argument( + "--depthwise-conv-kernel-size", + type=int, + metavar="N", + help="kernel size of depthwise convolution layers", + ) + parser.add_argument( + "--attn-type", + type=str, + metavar="STR", + help="If not specified uses fairseq MHA. Other valid option is espnet for using conformer", + ) + parser.add_argument( + "--pos-enc-type", + type=str, + metavar="STR", + help="Must be specified in addition to attn-type=espnet for rel_pos and rope", + ) + + @classmethod + def build_encoder(cls, args): + return build_s2s_conformer_encoder(args) + + +@register_model("s2spect_conformer") +class S2SpecTConformerModel(S2SpecTTransformerModel): + """ + Direct speech-to-speech translation model with Conformer encoder + TTS Transformer decoder + """ + + @staticmethod + def add_args(parser): + S2SpecTTransformerModel.add_args(parser) + parser.add_argument("--depthwise-conv-kernel-size", type=int, default=31) + parser.add_argument( + "--attn-type", + type=str, + default=None, + help="If not specified uses fairseq MHA. Other valid option is espnet for using conformer", + ) + parser.add_argument( + "--pos-enc-type", + type=str, + default="abs", + help="Must be specified in addition to attn-type=espnet for rel_pos and rope", + ) + + @classmethod + def build_encoder(cls, args): + return build_s2s_conformer_encoder(args) + + +@register_model_architecture("s2ut_conformer", "s2ut_conformer") +def s2ut_conformer_architecture_base(args): + args.attn_type = getattr(args, "attn_type", None) + args.pos_enc_type = getattr(args, "pos_enc_type", "abs") + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.input_channels = getattr(args, "input_channels", 1) + args.max_source_positions = getattr(args, "max_source_positions", 6000) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 16) + args.depthwise_conv_kernel_size = getattr(args, "depthwise_conv_kernel_size", 31) + s2ut_architecture_base(args) + + +@register_model_architecture("s2spect_conformer", "s2spect_conformer") +def s2spect_conformer_architecture_base(args): + args.attn_type = getattr(args, "attn_type", None) + args.pos_enc_type = getattr(args, "pos_enc_type", "abs") + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.input_channels = getattr(args, "input_channels", 1) + args.max_source_positions = getattr(args, "max_source_positions", 6000) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 16) + args.depthwise_conv_kernel_size = getattr(args, "depthwise_conv_kernel_size", 31) + s2spect_architecture_base(args) + + +@register_model_architecture("s2spect_conformer", "s2spect_conformer_fisher") +def s2spect_architecture_fisher(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 8) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + + # decoder + args.prenet_dim = getattr(args, "prenet_dim", 32) + + s2spect_conformer_architecture_base(args) diff --git a/fairseq/fairseq/models/speech_to_speech/s2s_conformer_translatotron2.py b/fairseq/fairseq/models/speech_to_speech/s2s_conformer_translatotron2.py new file mode 100644 index 0000000..8016dae --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/s2s_conformer_translatotron2.py @@ -0,0 +1,262 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging + +from fairseq.models import ( + FairseqEncoderModel, + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_speech.modules.ctc_decoder import CTCDecoder +from fairseq.models.speech_to_speech.modules.transformer_encoder import ( + TransformerEncoderNoEmb, +) +from fairseq.models.speech_to_speech.s2s_conformer import S2SpecTConformerModel +from fairseq.models.speech_to_speech.s2s_conformer_unity import ( + multitask_text_transformer_decoder_arch, +) +from fairseq.models.speech_to_speech.s2s_transformer import ( + base_multitask_text_transformer_decoder_arch, + s2spect_architecture_base, +) +from fairseq.models.text_to_speech import TTSTransformerDecoder +from fairseq.models.transformer import TransformerDecoder, TransformerModelBase + +logger = logging.getLogger(__name__) + + +@register_model("s2spect2_conformer") +class S2SpecT2ConformerModel(S2SpecTConformerModel): + """ + Direct speech-to-speech translation model with Conformer encoder + MT Transformer decoder + TTS Transformer decoder + """ + + @staticmethod + def add_args(parser): + S2SpecTConformerModel.add_args(parser) + parser.add_argument( + "--translation-decoder-layers", + type=int, + default=4, + metavar="N", + help="num decoder layers in the first-pass translation module", + ) + parser.add_argument( + "--synthesizer", + default="transformer", + choices=["transformer"], + help="", + ) + parser.add_argument( + "--synthesizer-encoder-layers", + type=int, + default=0, + metavar="N", + help="num encoder layers in the second-pass synthesizer module", + ) + + @classmethod + def build_multitask_decoder( + cls, + args, + tgt_dict, + in_dim, + is_mt_decoder, + decoder_layers, + decoder_embed_dim, + decoder_attention_heads, + ): + decoder_args = args.decoder_args + decoder_args.encoder_embed_dim = in_dim + if args.decoder_type == "transformer": + if is_mt_decoder: + multitask_text_transformer_decoder_arch( + decoder_args, + decoder_layers, + decoder_embed_dim, + decoder_attention_heads, + ) # 4L + else: + base_multitask_text_transformer_decoder_arch(decoder_args) # 2L + task_decoder = TransformerDecoder( + decoder_args, + tgt_dict, + embed_tokens=TransformerModelBase.build_embedding( + decoder_args, + tgt_dict, + decoder_args.decoder_embed_dim, + ), + ) + elif args.decoder_type == "ctc": + task_decoder = CTCDecoder( + dictionary=tgt_dict, + in_dim=in_dim, + ) + else: + raise NotImplementedError( + "currently only support multitask decoder_type 'transformer', 'ctc'" + ) + + return task_decoder + + @classmethod + def build_decoder(cls, args): + _args = copy.deepcopy(args) + _args.encoder_embed_dim = args.decoder_embed_dim + + if args.synthesizer == "transformer": + return TTSTransformerDecoder(_args, None, padding_idx=1) + else: + raise NotImplementedError(args.synthesizer) + + @classmethod + def build_model(cls, args, task): + encoder = cls.build_encoder(args) + decoder = cls.build_decoder(args) + base_model = cls(encoder, decoder) + + # set up multitask decoders + base_model.mt_task_name = None + base_model.multitask_decoders = {} + has_first_pass_decoder = False + for task_name, task_obj in task.multitask_tasks.items(): + if task_obj.is_first_pass_decoder: + has_first_pass_decoder = True + base_model.mt_task_name = task_name + + in_dim = ( + args.encoder_embed_dim + if task_obj.args.input_from == "encoder" + else args.decoder_embed_dim + ) + task_decoder = cls.build_multitask_decoder( + task_obj.args, + task_obj.target_dictionary, + in_dim, + task_obj.is_first_pass_decoder, + getattr(args, "translation_decoder_layers", 4), + getattr(args, "decoder_embed_dim", 256), + getattr(args, "decoder_attention_heads", 4), + ) + + setattr(base_model, f"{task_name}_decoder", task_decoder) + decoder_model_cls = ( + FairseqEncoderModel + if task_obj.args.decoder_type == "ctc" + else FairseqLanguageModel + ) + base_model.multitask_decoders[task_name] = decoder_model_cls( + getattr(base_model, f"{task_name}_decoder") + ) + + assert has_first_pass_decoder, "set at least one intermediate non-CTC decoder" + + # set up encoder on top of the auxiliary MT decoder + if getattr(args, "synthesizer_encoder_layers", 0) > 0: + base_model.synthesizer_encoder = cls.build_text_encoder(args) + else: + base_model.synthesizer_encoder = None + + return base_model + + @classmethod + def build_text_encoder(cls, args): + _args = copy.deepcopy(args) + _args.encoder_layers = args.synthesizer_encoder_layers + _args.encoder_embed_dim = args.decoder_embed_dim + _args.encoder_ffn_embed_dim = args.decoder_ffn_embed_dim + _args.encoder_attention_heads = args.decoder_attention_heads + _args.encoder_normalize_before = True + return TransformerEncoderNoEmb(_args) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + prev_output_tokens_mt, + tgt_speaker=None, + incremental_state=None, + target_lengths=None, + speaker=None, + return_all_hiddens=False, + ): + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + tgt_speaker=tgt_speaker, + return_all_hiddens=return_all_hiddens, + ) + + # 1. MT decoder + mt_decoder = getattr(self, f"{self.mt_task_name}_decoder") + mt_decoder_out = mt_decoder( + prev_output_tokens_mt, + encoder_out=encoder_out, + ) + x = mt_decoder_out[1]["inner_states"][-1] + if mt_decoder.layer_norm is not None: + x = mt_decoder.layer_norm(x) + + mt_decoder_padding_mask = None + if prev_output_tokens_mt.eq(mt_decoder.padding_idx).any(): + mt_decoder_padding_mask = prev_output_tokens_mt.eq(mt_decoder.padding_idx) + + # 2. TTS encoder + if self.synthesizer_encoder is not None: + tts_encoder_out = self.synthesizer_encoder( + x, + mt_decoder_padding_mask, + return_all_hiddens=return_all_hiddens, + ) + else: + tts_encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [mt_decoder_padding_mask], # B x T + } + + # 3. TTS decoder + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=tts_encoder_out, + incremental_state=incremental_state, + target_lengths=target_lengths, + speaker=speaker, + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_states"] + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + decoder_out[-1]["mt_decoder_out"] = mt_decoder_out + return decoder_out + + +@register_model_architecture( + model_name="s2spect2_conformer", arch_name="s2spect2_conformer" +) +def s2spect2_conformer_architecture_base(args): + args.conv_version = getattr(args, "conv_version", "convtransformer") + args.attn_type = getattr(args, "attn_type", None) + args.pos_enc_type = getattr(args, "pos_enc_type", "abs") + args.max_source_positions = getattr(args, "max_source_positions", 6000) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 16) + args.depthwise_conv_kernel_size = getattr(args, "depthwise_conv_kernel_size", 31) + s2spect_architecture_base(args) + + +# for old naming +@register_model_architecture( + model_name="s2spect2_conformer", arch_name="s2spect_conformer_translatotron2" +) +def s2spect2_conformer_architecture_base_legacy(args): + s2spect2_conformer_architecture_base(args) diff --git a/fairseq/fairseq/models/speech_to_speech/s2s_conformer_unity.py b/fairseq/fairseq/models/speech_to_speech/s2s_conformer_unity.py new file mode 100644 index 0000000..64388d6 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/s2s_conformer_unity.py @@ -0,0 +1,298 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging + +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_speech.modules.ctc_decoder import CTCDecoder +from fairseq.models.speech_to_speech.modules.stacked_embedding import StackedEmbedding +from fairseq.models.speech_to_speech.modules.transformer_decoder_aug import ( + AugTransformerUnitDecoder, +) +from fairseq.models.speech_to_speech.modules.transformer_encoder import ( + TransformerEncoderNoEmb, +) +from fairseq.models.speech_to_speech.s2s_conformer import S2UTConformerModel +from fairseq.models.speech_to_speech.s2s_transformer import ( + TransformerUnitDecoder, + base_multitask_text_transformer_decoder_arch, + s2ut_architecture_base, +) +from fairseq.models.transformer import TransformerDecoder, TransformerModelBase + +logger = logging.getLogger(__name__) + + +def multitask_text_transformer_decoder_arch( + args, decoder_layers, decoder_embed_dim=256, decoder_attention_heads=4 +): + args.decoder_layers = decoder_layers + args.decoder_embed_dim = decoder_embed_dim + args.decoder_attention_heads = decoder_attention_heads + base_multitask_text_transformer_decoder_arch(args) + + +@register_model("unity_conformer") +class UnityConformerModel(S2UTConformerModel): + """ + Direct speech-to-speech translation model with Conformer encoder + MT Transformer decoder + Transformer discrete unit decoder + """ + + @staticmethod + def add_args(parser): + S2UTConformerModel.add_args(parser) + parser.add_argument( + "--translation-decoder-layers", + type=int, + default=4, + metavar="N", + help="num decoder layers in the first-pass translation module", + ) + parser.add_argument( + "--synthesizer", + default="transformer", + choices=["transformer"], + help="", + ) + parser.add_argument( + "--synthesizer-encoder-layers", + type=int, + default=0, + metavar="N", + help="num encoder layers in the second-pass synthesizer module", + ) + parser.add_argument( + "--synthesizer-augmented-cross-attention", + action="store_true", + default=False, + help="augmented cross-attention over speech encoder output", + ) + + @classmethod + def build_multitask_decoder( + cls, + args, + tgt_dict, + in_dim, + is_first_pass_decoder, + decoder_layers, + decoder_embed_dim, + decoder_attention_heads, + ): + decoder_args = args.decoder_args + decoder_args.encoder_embed_dim = in_dim + if args.decoder_type == "transformer": + if is_first_pass_decoder: + multitask_text_transformer_decoder_arch( + decoder_args, + decoder_layers, + decoder_embed_dim, + decoder_attention_heads, + ) # 4L + else: + base_multitask_text_transformer_decoder_arch(decoder_args) # 2L + task_decoder = TransformerDecoder( + decoder_args, + tgt_dict, + embed_tokens=TransformerModelBase.build_embedding( + decoder_args, + tgt_dict, + decoder_args.decoder_embed_dim, + ), + ) + elif args.decoder_type == "ctc": + task_decoder = CTCDecoder( + dictionary=tgt_dict, + in_dim=in_dim, + ) + else: + raise NotImplementedError( + "currently only support multitask decoder_type 'transformer', 'ctc'" + ) + + return task_decoder + + @classmethod + def build_decoder(cls, args, tgt_dict, aug_attn=False): + num_embeddings = len(tgt_dict) + padding_idx = tgt_dict.pad() + embed_tokens = StackedEmbedding( + num_embeddings, + args.decoder_embed_dim, + padding_idx, + num_stacked=args.n_frames_per_step, + ) + + _args = copy.deepcopy(args) + _args.encoder_embed_dim = args.decoder_embed_dim + + decoder_cls = AugTransformerUnitDecoder if aug_attn else TransformerUnitDecoder + return decoder_cls( + _args, + tgt_dict, + embed_tokens, + ) + + @classmethod + def build_model(cls, args, task): + encoder = cls.build_encoder(args) + decoder = cls.build_decoder( + args, + task.target_dictionary, + aug_attn=getattr(args, "synthesizer_augmented_cross_attention", False), + ) + base_model = cls(encoder, decoder) + + base_model.t2u_augmented_cross_attn = getattr( + args, "synthesizer_augmented_cross_attention", False + ) + + # set up multitask decoders + base_model.mt_task_name = None + base_model.multitask_decoders = {} + has_first_pass_decoder = False + for task_name, task_obj in task.multitask_tasks.items(): + if task_obj.is_first_pass_decoder: + has_first_pass_decoder = True + base_model.mt_task_name = task_name + + in_dim = ( + args.encoder_embed_dim + if task_obj.args.input_from == "encoder" + else args.decoder_embed_dim + ) + task_decoder = cls.build_multitask_decoder( + task_obj.args, + task_obj.target_dictionary, + in_dim, + task_obj.is_first_pass_decoder, + getattr(args, "translation_decoder_layers", 4), + getattr(args, "decoder_embed_dim", 256), + getattr(args, "decoder_attention_heads", 4), + ) + + setattr(base_model, f"{task_name}_decoder", task_decoder) + decoder_model_cls = ( + FairseqEncoderModel + if task_obj.args.decoder_type == "ctc" + else FairseqLanguageModel + ) + base_model.multitask_decoders[task_name] = decoder_model_cls( + getattr(base_model, f"{task_name}_decoder") + ) + + assert has_first_pass_decoder, "set at least one intermediate non-CTC decoder" + + # set up encoder on top of the auxiliary MT decoder + if getattr(args, "synthesizer_encoder_layers", 0) > 0: + base_model.synthesizer_encoder = cls.build_text_encoder(args) + else: + base_model.synthesizer_encoder = None + + return base_model + + @classmethod + def build_text_encoder(cls, args): + _args = copy.deepcopy(args) + _args.encoder_layers = args.synthesizer_encoder_layers + _args.encoder_embed_dim = args.decoder_embed_dim + _args.encoder_ffn_embed_dim = args.decoder_ffn_embed_dim + _args.encoder_attention_heads = args.decoder_attention_heads + _args.encoder_normalize_before = True + return TransformerEncoderNoEmb(_args) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + prev_output_tokens_mt, + tgt_speaker=None, + return_all_hiddens=False, + ): + mt_decoder = getattr(self, f"{self.mt_task_name}_decoder") + + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + tgt_speaker=tgt_speaker, + return_all_hiddens=return_all_hiddens, + ) + + # 1. MT decoder + mt_decoder_out = mt_decoder( + prev_output_tokens_mt, + encoder_out=encoder_out, + ) + x = mt_decoder_out[1]["inner_states"][-1] + if mt_decoder.layer_norm is not None: + x = mt_decoder.layer_norm(x) + + mt_decoder_padding_mask = None + if prev_output_tokens_mt.eq(mt_decoder.padding_idx).any(): + mt_decoder_padding_mask = prev_output_tokens_mt.eq(mt_decoder.padding_idx) + + # 2. T2U encoder + if self.synthesizer_encoder is not None: + t2u_encoder_out = self.synthesizer_encoder( + x, + mt_decoder_padding_mask, + return_all_hiddens=return_all_hiddens, + ) + else: + t2u_encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [mt_decoder_padding_mask], # B x T + } + + # 3. T2U decoder + if self.t2u_augmented_cross_attn: + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + encoder_out_aug=t2u_encoder_out, + ) + else: + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=t2u_encoder_out, + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_states"] + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + decoder_out[-1]["mt_decoder_out"] = mt_decoder_out + return decoder_out + + +@register_model_architecture(model_name="unity_conformer", arch_name="unity_conformer") +def unity_conformer_architecture_base(args): + args.conv_version = getattr(args, "conv_version", "convtransformer") + args.attn_type = getattr(args, "attn_type", None) + args.pos_enc_type = getattr(args, "pos_enc_type", "abs") + args.max_source_positions = getattr(args, "max_source_positions", 6000) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 16) + args.depthwise_conv_kernel_size = getattr(args, "depthwise_conv_kernel_size", 31) + s2ut_architecture_base(args) + + +# for old naming +@register_model_architecture( + model_name="unity_conformer", arch_name="s2ut_conformer_translatotron2" +) +def unity_conformer_architecture_base_legacy(args): + unity_conformer_architecture_base(args) diff --git a/fairseq/fairseq/models/speech_to_speech/s2s_transformer.py b/fairseq/fairseq/models/speech_to_speech/s2s_transformer.py new file mode 100644 index 0000000..07393d2 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_speech/s2s_transformer.py @@ -0,0 +1,722 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional + +import torch +from torch import Tensor + +from fairseq import checkpoint_utils, utils +from fairseq.models import ( + FairseqEncoderDecoderModel, + FairseqEncoderModel, + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_speech.modules.ctc_decoder import CTCDecoder +from fairseq.models.speech_to_speech.modules.stacked_embedding import StackedEmbedding +from fairseq.models.speech_to_text import S2TTransformerEncoder +from fairseq.models.text_to_speech import TTSTransformerDecoder +from fairseq.models.transformer import Linear, TransformerDecoder, TransformerModelBase + +logger = logging.getLogger(__name__) + + +class S2STransformerEncoder(S2TTransformerEncoder): + """Based on S2T transformer encoder, with support + to incorporate target speaker embedding.""" + + def __init__(self, args): + super().__init__(args) + + self.spk_emb_proj = None + if args.target_speaker_embed: + self.spk_emb_proj = Linear( + args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim + ) + + def forward( + self, src_tokens, src_lengths, tgt_speaker=None, return_all_hiddens=False + ): + out = super().forward(src_tokens, src_lengths, return_all_hiddens) + + if self.spk_emb_proj: + x = out["encoder_out"][0] + seq_len, bsz, _ = x.size() + tgt_speaker_emb = tgt_speaker.view(1, bsz, -1).expand(seq_len, bsz, -1) + x = self.spk_emb_proj(torch.cat([x, tgt_speaker_emb], dim=2)) + out["encoder_out"][0] = x + + return out + + +class TransformerUnitDecoder(TransformerDecoder): + """Based on Transformer decoder, with support to decoding stacked units""" + + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn, output_projection + ) + self.n_frames_per_step = args.n_frames_per_step + + self.out_proj_n_frames = ( + Linear( + self.output_embed_dim, + self.output_embed_dim * self.n_frames_per_step, + bias=False, + ) + if self.n_frames_per_step > 1 + else None + ) + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + bsz, seq_len, d = x.size() + if self.out_proj_n_frames: + x = self.out_proj_n_frames(x) + x = self.output_layer(x.view(bsz, seq_len, self.n_frames_per_step, d)) + x = x.view(bsz, seq_len * self.n_frames_per_step, -1) + if ( + incremental_state is None and self.n_frames_per_step > 1 + ): # teacher-forcing mode in training + x = x[ + :, : -(self.n_frames_per_step - 1), : + ] # remove extra frames after <eos> + + return x, extra + + def upgrade_state_dict_named(self, state_dict, name): + if self.n_frames_per_step > 1: + move_keys = [ + ( + f"{name}.project_in_dim.weight", + f"{name}.embed_tokens.project_in_dim.weight", + ) + ] + for from_k, to_k in move_keys: + if from_k in state_dict and to_k not in state_dict: + state_dict[to_k] = state_dict[from_k] + del state_dict[from_k] + + +class S2STransformerMultitaskModelBase(FairseqEncoderDecoderModel): + @classmethod + def build_encoder(cls, args): + encoder = S2STransformerEncoder(args) + pretraining_path = getattr(args, "load_pretrained_encoder_from", None) + if pretraining_path is not None: + if not Path(pretraining_path).exists(): + logger.warning( + f"skipped pretraining because {pretraining_path} does not exist" + ) + else: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=pretraining_path + ) + logger.info(f"loaded pretrained encoder from: {pretraining_path}") + return encoder + + @classmethod + def build_multitask_decoder(cls, args, tgt_dict, in_dim): + decoder_args = args.decoder_args + decoder_args.encoder_embed_dim = in_dim + if args.decoder_type == "transformer": + base_multitask_text_transformer_decoder_arch(decoder_args) + task_decoder = TransformerDecoder( + decoder_args, + tgt_dict, + embed_tokens=TransformerModelBase.build_embedding( + decoder_args, + tgt_dict, + decoder_args.decoder_embed_dim, + ), + ) + elif args.decoder_type == "ctc": + task_decoder = CTCDecoder( + dictionary=tgt_dict, + in_dim=in_dim, + ) + else: + raise NotImplementedError( + "currently only support multitask decoder_type 'transformer', 'ctc'" + ) + + return task_decoder + + @classmethod + def build_model(cls, args, task): + encoder = cls.build_encoder(args) + decoder = ( + cls.build_decoder(args, task.target_dictionary) + if task.args.target_is_code + else cls.build_decoder(args) + ) + base_model = cls(encoder, decoder) + + # set up multitask decoders + base_model.multitask_decoders = {} + for task_name, task_obj in task.multitask_tasks.items(): + in_dim = ( + args.encoder_embed_dim + if task_obj.args.input_from == "encoder" + else args.decoder_embed_dim + ) + task_decoder = cls.build_multitask_decoder( + task_obj.args, task_obj.target_dictionary, in_dim + ) + + setattr(base_model, f"{task_name}_decoder", task_decoder) + decoder_model_cls = ( + FairseqEncoderModel + if task_obj.args.decoder_type == "ctc" + else FairseqLanguageModel + ) + base_model.multitask_decoders[task_name] = decoder_model_cls( + getattr(base_model, f"{task_name}_decoder") + ) + + return base_model + + def forward_encoder(self, src_tokens, src_lengths, speaker=None, **kwargs): + return self.encoder( + src_tokens, src_lengths=src_lengths, tgt_speaker=speaker, **kwargs + ) + + +@register_model("s2ut_transformer") +class S2UTTransformerModel(S2STransformerMultitaskModelBase): + """ + Direct speech-to-speech translation model with Transformer encoder + Transformer discrete unit decoder + https://arxiv.org/abs/2107.05604 + """ + + @staticmethod + def add_args(parser): + # input + parser.add_argument( + "--conv-kernel-sizes", + type=str, + metavar="STR", + help="kernel sizes of Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-channels", + type=int, + metavar="N", + help="# of channels in Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-out-channels", + type=int, + metavar="N", + help="# of channels in Conv2d (convtransformer) subsampling layers", + ) + parser.add_argument( + "--conv-version", + type=str, + default="s2t_transformer", + choices=["s2t_transformer", "convtransformer"], + help="version of frontend convolutional layers", + ) + # Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + "--encoder-freezing-updates", + type=int, + metavar="N", + help="freeze encoder for first N updates", + ) + # speaker + parser.add_argument( + "--speaker-embed-dim", + type=int, + metavar="N", + help="speaker embedding dimension", + ) + + @classmethod + def build_decoder(cls, args, tgt_dict): + num_embeddings = len(tgt_dict) + padding_idx = tgt_dict.pad() + embed_tokens = StackedEmbedding( + num_embeddings, + args.decoder_embed_dim, + padding_idx, + num_stacked=args.n_frames_per_step, + ) + + return TransformerUnitDecoder( + args, + tgt_dict, + embed_tokens, + ) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + tgt_speaker=None, + return_all_hiddens=False, + ): + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + tgt_speaker=tgt_speaker, + return_all_hiddens=return_all_hiddens, + ) + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_states"] + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + return decoder_out + + +@register_model("s2spect_transformer") +class S2SpecTTransformerModel(S2STransformerMultitaskModelBase): + """ + Speech-to-spectrogram model with S2T Transformer encoder + TTS Transformer decoder + """ + + @staticmethod + def add_args(parser): + # input + parser.add_argument( + "--conv-kernel-sizes", + type=str, + metavar="STR", + help="kernel sizes of Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-channels", + type=int, + metavar="N", + help="# of channels in Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-version", + type=str, + default="s2t_transformer", + choices=["s2t_transformer", "convtransformer"], + help="version of frontend convolutional layers", + ) + # Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + "--encoder-freezing-updates", + type=int, + metavar="N", + help="freeze encoder for first N updates", + ) + # speaker + parser.add_argument( + "--speaker-embed-dim", + type=int, + metavar="N", + help="speaker embedding dimension", + ) + # decoder + parser.add_argument("--output-frame-dim", type=int) + # decoder prenet + parser.add_argument("--prenet-dropout", type=float) + parser.add_argument("--prenet-layers", type=int) + parser.add_argument("--prenet-dim", type=int) + # decoder postnet + parser.add_argument("--postnet-dropout", type=float) + parser.add_argument("--postnet-layers", type=int) + parser.add_argument("--postnet-conv-dim", type=int) + parser.add_argument("--postnet-conv-kernel-size", type=int) + # decoder transformer layers + parser.add_argument("--decoder-transformer-layers", type=int) + parser.add_argument("--decoder-embed-dim", type=int) + parser.add_argument("--decoder-ffn-embed-dim", type=int) + parser.add_argument("--decoder-normalize-before", action="store_true") + parser.add_argument("--decoder-attention-heads", type=int) + + @classmethod + def build_decoder(cls, args): + return TTSTransformerDecoder(args, None, padding_idx=1) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + tgt_speaker=None, + incremental_state=None, + target_lengths=None, + speaker=None, + return_all_hiddens=False, + ): + encoder_out = self.encoder( + src_tokens, + src_lengths=src_lengths, + tgt_speaker=tgt_speaker, + return_all_hiddens=return_all_hiddens, + ) + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + target_lengths=target_lengths, + speaker=speaker, + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_states"] + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + return decoder_out + + +def base_multitask_text_transformer_decoder_arch(args): + args.dropout = getattr(args, "dropout", 0.3) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", True + ) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.max_target_positions = getattr(args, "max_target_positions", 1024) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + + args.adaptive_input = getattr(args, "adaptive_input", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + + args.decoder_layers = getattr(args, "decoder_layers", 2) + + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + + # decoder layer + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048) + + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + + +def base_s2st_transformer_encoder_architecture(args): + args.encoder_freezing_updates = getattr(args, "encoder_freezing_updates", 0) + + # Convolutional subsampler + args.input_channels = getattr(args, "input_channels", 1) + args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5") # for Conv1d + args.conv_channels = getattr(args, "conv_channels", 1024) # for Conv1d + args.conv_out_channels = getattr(args, "conv_out_channels", 256) # for Conv2d + args.conv_version = getattr(args, "conv_version", "s2t_transformer") + # Transformer + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "relu") + + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 256) + + +@register_model_architecture( + model_name="s2ut_transformer", arch_name="s2ut_transformer" +) +def s2ut_architecture_base(args): + base_s2st_transformer_encoder_architecture(args) + + # decoder + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + +@register_model_architecture("s2ut_transformer", "s2ut_transformer_fisher") +def s2ut_architecture_fisher(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + + s2ut_architecture_base(args) + + +@register_model_architecture( + model_name="s2spect_transformer", arch_name="s2spect_transformer" +) +def s2spect_architecture_base(args): + base_s2st_transformer_encoder_architecture(args) + + # decoder + args.output_frame_dim = getattr(args, "output_frame_dim", 80) + # decoder prenet + args.prenet_dropout = getattr(args, "prenet_dropout", 0.5) + args.prenet_layers = getattr(args, "prenet_layers", 2) + args.prenet_dim = getattr(args, "prenet_dim", 256) + # decoder postnet + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + # decoder transformer layers + args.decoder_transformer_layers = getattr(args, "decoder_transformer_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", 4 * args.decoder_embed_dim + ) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + + +@register_model_architecture("s2spect_transformer", "s2spect_transformer_fisher") +def s2spect_architecture_fisher(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 8) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + + # decoder + args.prenet_dim = getattr(args, "prenet_dim", 32) + + s2spect_architecture_base(args) diff --git a/fairseq/fairseq/models/speech_to_text/__init__.py b/fairseq/fairseq/models/speech_to_text/__init__.py new file mode 100644 index 0000000..62ef663 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .berard import * # noqa +from .convtransformer import * # noqa +from .multi_modality_model import * # noqa +from .s2t_conformer import * # noqa +from .s2t_transformer import * # noqa +from .s2t_wav_transformer import * # noqa +from .xm_transformer import * # noqa +from .xm_transformer_unity import * # noqa diff --git a/fairseq/fairseq/models/speech_to_text/berard.py b/fairseq/fairseq/models/speech_to_text/berard.py new file mode 100644 index 0000000..107ac98 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/berard.py @@ -0,0 +1,607 @@ +#!/usr/bin/env python3 + +from ast import literal_eval +from typing import List, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import checkpoint_utils, utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) + + +@register_model("s2t_berard") +class BerardModel(FairseqEncoderDecoderModel): + """Implementation of a model similar to https://arxiv.org/abs/1802.04200 + + Paper title: End-to-End Automatic Speech Translation of Audiobooks + An implementation is available in tensorflow at + https://github.com/eske/seq2seq + Relevant files in this implementation are the config + (https://github.com/eske/seq2seq/blob/master/config/LibriSpeech/AST.yaml) + and the model code + (https://github.com/eske/seq2seq/blob/master/translate/models.py). + The encoder and decoder try to be close to the original implementation. + The attention is an MLP as in Bahdanau et al. + (https://arxiv.org/abs/1409.0473). + There is no state initialization by averaging the encoder outputs. + """ + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + parser.add_argument( + "--input-layers", + type=str, + metavar="EXPR", + help="List of linear layer dimensions. These " + "layers are applied to the input features and " + "are followed by tanh and possibly dropout.", + ) + parser.add_argument( + "--dropout", + type=float, + metavar="D", + help="Dropout probability to use in the encoder/decoder. " + "Note that this parameters control dropout in various places, " + "there is no fine-grained control for dropout for embeddings " + "vs LSTM layers for example.", + ) + parser.add_argument( + "--in-channels", + type=int, + metavar="N", + help="Number of encoder input channels. " "Typically value is 1.", + ) + parser.add_argument( + "--conv-layers", + type=str, + metavar="EXPR", + help="List of conv layers " "(format: (channels, kernel, stride)).", + ) + parser.add_argument( + "--num-blstm-layers", + type=int, + metavar="N", + help="Number of encoder bi-LSTM layers.", + ) + parser.add_argument( + "--lstm-size", type=int, metavar="N", help="LSTM hidden size." + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="Embedding dimension of the decoder target tokens.", + ) + parser.add_argument( + "--decoder-hidden-dim", + type=int, + metavar="N", + help="Decoder LSTM hidden dimension.", + ) + parser.add_argument( + "--decoder-num-layers", + type=int, + metavar="N", + help="Number of decoder LSTM layers.", + ) + parser.add_argument( + "--attention-dim", + type=int, + metavar="N", + help="Hidden layer dimension in MLP attention.", + ) + parser.add_argument( + "--output-layer-dim", + type=int, + metavar="N", + help="Hidden layer dim for linear layer prior to output projection.", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + "--load-pretrained-decoder-from", + type=str, + metavar="STR", + help="model to take decoder weights from (for initialization)", + ) + + @classmethod + def build_encoder(cls, args, task): + encoder = BerardEncoder( + input_layers=literal_eval(args.input_layers), + conv_layers=literal_eval(args.conv_layers), + in_channels=args.input_channels, + input_feat_per_channel=args.input_feat_per_channel, + num_blstm_layers=args.num_blstm_layers, + lstm_size=args.lstm_size, + dropout=args.dropout, + ) + if getattr(args, "load_pretrained_encoder_from", None) is not None: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=args.load_pretrained_encoder_from + ) + return encoder + + @classmethod + def build_decoder(cls, args, task): + decoder = LSTMDecoder( + dictionary=task.target_dictionary, + embed_dim=args.decoder_embed_dim, + num_layers=args.decoder_num_layers, + hidden_size=args.decoder_hidden_dim, + dropout=args.dropout, + encoder_output_dim=2 * args.lstm_size, # bidirectional + attention_dim=args.attention_dim, + output_layer_dim=args.output_layer_dim, + ) + if getattr(args, "load_pretrained_decoder_from", None) is not None: + decoder = checkpoint_utils.load_pretrained_component_from_model( + component=decoder, checkpoint=args.load_pretrained_decoder_from + ) + return decoder + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + encoder = cls.build_encoder(args, task) + decoder = cls.build_decoder(args, task) + + return cls(encoder, decoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = super().get_normalized_probs(net_output, log_probs, sample) + # lprobs is a (B, T, D) tensor + lprobs.batch_first = True + return lprobs + + +class BerardEncoder(FairseqEncoder): + def __init__( + self, + input_layers: List[int], + conv_layers: List[Tuple[int]], + in_channels: int, + input_feat_per_channel: int, + num_blstm_layers: int, + lstm_size: int, + dropout: float, + ): + """ + Args: + input_layers: list of linear layer dimensions. These layers are + applied to the input features and are followed by tanh and + possibly dropout. + conv_layers: list of conv2d layer configurations. A configuration is + a tuple (out_channels, conv_kernel_size, stride). + in_channels: number of input channels. + input_feat_per_channel: number of input features per channel. These + are speech features, typically 40 or 80. + num_blstm_layers: number of bidirectional LSTM layers. + lstm_size: size of the LSTM hidden (and cell) size. + dropout: dropout probability. Dropout can be applied after the + linear layers and LSTM layers but not to the convolutional + layers. + """ + super().__init__(None) + + self.input_layers = nn.ModuleList() + in_features = input_feat_per_channel + for out_features in input_layers: + if dropout > 0: + self.input_layers.append( + nn.Sequential( + nn.Linear(in_features, out_features), nn.Dropout(p=dropout) + ) + ) + else: + self.input_layers.append(nn.Linear(in_features, out_features)) + in_features = out_features + + self.in_channels = in_channels + self.input_dim = input_feat_per_channel + self.conv_kernel_sizes_and_strides = [] + self.conv_layers = nn.ModuleList() + lstm_input_dim = input_layers[-1] + for conv_layer in conv_layers: + out_channels, conv_kernel_size, conv_stride = conv_layer + self.conv_layers.append( + nn.Conv2d( + in_channels, + out_channels, + conv_kernel_size, + stride=conv_stride, + padding=conv_kernel_size // 2, + ) + ) + self.conv_kernel_sizes_and_strides.append((conv_kernel_size, conv_stride)) + in_channels = out_channels + lstm_input_dim //= conv_stride + + lstm_input_dim *= conv_layers[-1][0] + self.lstm_size = lstm_size + self.num_blstm_layers = num_blstm_layers + self.lstm = nn.LSTM( + input_size=lstm_input_dim, + hidden_size=lstm_size, + num_layers=num_blstm_layers, + dropout=dropout, + bidirectional=True, + ) + self.output_dim = 2 * lstm_size # bidirectional + if dropout > 0: + self.dropout = nn.Dropout(p=dropout) + else: + self.dropout = None + + def forward(self, src_tokens, src_lengths=None, **kwargs): + """ + Args + src_tokens: padded tensor (B, T, C * feat) + src_lengths: tensor of original lengths of input utterances (B,) + """ + bsz, max_seq_len, _ = src_tokens.size() + # (B, C, T, feat) + x = ( + src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim) + .transpose(1, 2) + .contiguous() + ) + + for input_layer in self.input_layers: + x = input_layer(x) + x = torch.tanh(x) + + for conv_layer in self.conv_layers: + x = conv_layer(x) + + bsz, _, output_seq_len, _ = x.size() + + # (B, C, T, feat) -> (B, T, C, feat) -> (T, B, C, feat) -> + # (T, B, C * feat) + x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1) + + input_lengths = src_lengths.clone() + for k, s in self.conv_kernel_sizes_and_strides: + p = k // 2 + input_lengths = (input_lengths.float() + 2 * p - k) / s + 1 + input_lengths = input_lengths.floor().long() + + packed_x = nn.utils.rnn.pack_padded_sequence(x, input_lengths) + + h0 = x.new(2 * self.num_blstm_layers, bsz, self.lstm_size).zero_() + c0 = x.new(2 * self.num_blstm_layers, bsz, self.lstm_size).zero_() + packed_outs, _ = self.lstm(packed_x, (h0, c0)) + + # unpack outputs and apply dropout + x, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_outs) + if self.dropout is not None: + x = self.dropout(x) + + encoder_padding_mask = ( + lengths_to_padding_mask(output_lengths).to(src_tokens.device).t() + ) + + return { + "encoder_out": x, # (T, B, C) + "encoder_padding_mask": encoder_padding_mask, # (T, B) + } + + def reorder_encoder_out(self, encoder_out, new_order): + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(1, new_order) + return encoder_out + + +class MLPAttention(nn.Module): + """The original attention from Badhanau et al. (2014) + + https://arxiv.org/abs/1409.0473, based on a Multi-Layer Perceptron. + The attention score between position i in the encoder and position j in the + decoder is: alpha_ij = V_a * tanh(W_ae * enc_i + W_ad * dec_j + b_a) + """ + + def __init__(self, decoder_hidden_state_dim, context_dim, attention_dim): + super().__init__() + + self.context_dim = context_dim + self.attention_dim = attention_dim + # W_ae and b_a + self.encoder_proj = nn.Linear(context_dim, self.attention_dim, bias=True) + # W_ad + self.decoder_proj = nn.Linear( + decoder_hidden_state_dim, self.attention_dim, bias=False + ) + # V_a + self.to_scores = nn.Linear(self.attention_dim, 1, bias=False) + + def forward(self, decoder_state, source_hids, encoder_padding_mask): + """The expected input dimensions are: + decoder_state: bsz x decoder_hidden_state_dim + source_hids: src_len x bsz x context_dim + encoder_padding_mask: src_len x bsz + """ + src_len, bsz, _ = source_hids.size() + # (src_len*bsz) x context_dim (to feed through linear) + flat_source_hids = source_hids.view(-1, self.context_dim) + # (src_len*bsz) x attention_dim + encoder_component = self.encoder_proj(flat_source_hids) + # src_len x bsz x attention_dim + encoder_component = encoder_component.view(src_len, bsz, self.attention_dim) + # 1 x bsz x attention_dim + decoder_component = self.decoder_proj(decoder_state).unsqueeze(0) + # Sum with broadcasting and apply the non linearity + # src_len x bsz x attention_dim + hidden_att = torch.tanh( + (decoder_component + encoder_component).view(-1, self.attention_dim) + ) + # Project onto the reals to get attentions scores (src_len x bsz) + attn_scores = self.to_scores(hidden_att).view(src_len, bsz) + + # Mask + softmax (src_len x bsz) + if encoder_padding_mask is not None: + attn_scores = ( + attn_scores.float() + .masked_fill_(encoder_padding_mask, float("-inf")) + .type_as(attn_scores) + ) # FP16 support: cast to float and back + # srclen x bsz + normalized_masked_attn_scores = F.softmax(attn_scores, dim=0) + + # Sum weighted sources (bsz x context_dim) + attn_weighted_context = ( + source_hids * normalized_masked_attn_scores.unsqueeze(2) + ).sum(dim=0) + + return attn_weighted_context, normalized_masked_attn_scores + + +class LSTMDecoder(FairseqIncrementalDecoder): + def __init__( + self, + dictionary, + embed_dim, + num_layers, + hidden_size, + dropout, + encoder_output_dim, + attention_dim, + output_layer_dim, + ): + """ + Args: + dictionary: target text dictionary. + embed_dim: embedding dimension for target tokens. + num_layers: number of LSTM layers. + hidden_size: hidden size for LSTM layers. + dropout: dropout probability. Dropout can be applied to the + embeddings, the LSTM layers, and the context vector. + encoder_output_dim: encoder output dimension (hidden size of + encoder LSTM). + attention_dim: attention dimension for MLP attention. + output_layer_dim: size of the linear layer prior to output + projection. + """ + super().__init__(dictionary) + self.num_layers = num_layers + self.hidden_size = hidden_size + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + self.embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx) + if dropout > 0: + self.dropout = nn.Dropout(p=dropout) + else: + self.dropout = None + + self.layers = nn.ModuleList() + for layer_id in range(num_layers): + input_size = embed_dim if layer_id == 0 else encoder_output_dim + self.layers.append( + nn.LSTMCell(input_size=input_size, hidden_size=hidden_size) + ) + + self.context_dim = encoder_output_dim + self.attention = MLPAttention( + decoder_hidden_state_dim=hidden_size, + context_dim=encoder_output_dim, + attention_dim=attention_dim, + ) + + self.deep_output_layer = nn.Linear( + hidden_size + encoder_output_dim + embed_dim, output_layer_dim + ) + self.output_projection = nn.Linear(output_layer_dim, num_embeddings) + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs + ): + encoder_padding_mask = encoder_out["encoder_padding_mask"] + encoder_outs = encoder_out["encoder_out"] + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + bsz, seqlen = prev_output_tokens.size() + + srclen = encoder_outs.size(0) + + # embed tokens + embeddings = self.embed_tokens(prev_output_tokens) + x = embeddings + if self.dropout is not None: + x = self.dropout(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # initialize previous states (or get from cache during incremental + # generation) + cached_state = utils.get_incremental_state( + self, incremental_state, "cached_state" + ) + if cached_state is not None: + prev_hiddens, prev_cells = cached_state + else: + prev_hiddens = [encoder_out["encoder_out"].mean(dim=0)] * self.num_layers + prev_cells = [x.new_zeros(bsz, self.hidden_size)] * self.num_layers + + attn_scores = x.new_zeros(bsz, srclen) + attention_outs = [] + outs = [] + for j in range(seqlen): + input = x[j, :, :] + attention_out = None + for i, layer in enumerate(self.layers): + # the previous state is one layer below except for the bottom + # layer where the previous state is the state emitted by the + # top layer + hidden, cell = layer( + input, + ( + prev_hiddens[(i - 1) % self.num_layers], + prev_cells[(i - 1) % self.num_layers], + ), + ) + if self.dropout is not None: + hidden = self.dropout(hidden) + prev_hiddens[i] = hidden + prev_cells[i] = cell + if attention_out is None: + attention_out, attn_scores = self.attention( + hidden, encoder_outs, encoder_padding_mask + ) + if self.dropout is not None: + attention_out = self.dropout(attention_out) + attention_outs.append(attention_out) + input = attention_out + + # collect the output of the top layer + outs.append(hidden) + + # cache previous states (no-op except during incremental generation) + utils.set_incremental_state( + self, incremental_state, "cached_state", (prev_hiddens, prev_cells) + ) + + # collect outputs across time steps + x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size) + attention_outs_concat = torch.cat(attention_outs, dim=0).view( + seqlen, bsz, self.context_dim + ) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + attention_outs_concat = attention_outs_concat.transpose(0, 1) + + # concat LSTM output, attention output and embedding + # before output projection + x = torch.cat((x, attention_outs_concat, embeddings), dim=2) + x = self.deep_output_layer(x) + x = torch.tanh(x) + if self.dropout is not None: + x = self.dropout(x) + # project back to size of vocabulary + x = self.output_projection(x) + + # to return the full attn_scores tensor, we need to fix the decoder + # to account for subsampling input frames + # return x, attn_scores + return x, None + + def reorder_incremental_state(self, incremental_state, new_order): + super().reorder_incremental_state(incremental_state, new_order) + cached_state = utils.get_incremental_state( + self, incremental_state, "cached_state" + ) + if cached_state is None: + return + + def reorder_state(state): + if isinstance(state, list): + return [reorder_state(state_i) for state_i in state] + return state.index_select(0, new_order) + + new_state = tuple(map(reorder_state, cached_state)) + utils.set_incremental_state(self, incremental_state, "cached_state", new_state) + + +@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard") +def berard(args): + """The original version: "End-to-End Automatic Speech Translation of + Audiobooks" (https://arxiv.org/abs/1802.04200) + """ + args.input_layers = getattr(args, "input_layers", "[256, 128]") + args.conv_layers = getattr(args, "conv_layers", "[(16, 3, 2), (16, 3, 2)]") + args.num_blstm_layers = getattr(args, "num_blstm_layers", 3) + args.lstm_size = getattr(args, "lstm_size", 256) + args.dropout = getattr(args, "dropout", 0.2) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128) + args.decoder_num_layers = getattr(args, "decoder_num_layers", 2) + args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 512) + args.attention_dim = getattr(args, "attention_dim", 512) + args.output_layer_dim = getattr(args, "output_layer_dim", 128) + args.load_pretrained_encoder_from = getattr( + args, "load_pretrained_encoder_from", None + ) + args.load_pretrained_decoder_from = getattr( + args, "load_pretrained_decoder_from", None + ) + + +@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_256_3_3") +def berard_256_3_3(args): + """Used in + * "Harnessing Indirect Training Data for End-to-End Automatic Speech + Translation: Tricks of the Trade" (https://arxiv.org/abs/1909.06515) + * "CoVoST: A Diverse Multilingual Speech-To-Text Translation Corpus" + (https://arxiv.org/pdf/2002.01320.pdf) + * "Self-Supervised Representations Improve End-to-End Speech Translation" + (https://arxiv.org/abs/2006.12124) + """ + args.decoder_num_layers = getattr(args, "decoder_num_layers", 3) + berard(args) + + +@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_512_3_2") +def berard_512_3_2(args): + args.num_blstm_layers = getattr(args, "num_blstm_layers", 3) + args.lstm_size = getattr(args, "lstm_size", 512) + args.dropout = getattr(args, "dropout", 0.3) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_num_layers = getattr(args, "decoder_num_layers", 2) + args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 1024) + args.attention_dim = getattr(args, "attention_dim", 512) + args.output_layer_dim = getattr(args, "output_layer_dim", 256) + berard(args) + + +@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_512_5_3") +def berard_512_5_3(args): + args.num_blstm_layers = getattr(args, "num_blstm_layers", 5) + args.lstm_size = getattr(args, "lstm_size", 512) + args.dropout = getattr(args, "dropout", 0.3) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_num_layers = getattr(args, "decoder_num_layers", 3) + args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 1024) + args.attention_dim = getattr(args, "attention_dim", 512) + args.output_layer_dim = getattr(args, "output_layer_dim", 256) + berard(args) diff --git a/fairseq/fairseq/models/speech_to_text/convtransformer.py b/fairseq/fairseq/models/speech_to_text/convtransformer.py new file mode 100644 index 0000000..4d0fc02 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/convtransformer.py @@ -0,0 +1,443 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from fairseq import checkpoint_utils, utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_text.modules.convolution import infer_conv_output_dim +from fairseq.models.transformer import Embedding, TransformerDecoder +from fairseq.modules import LayerNorm, PositionalEmbedding, TransformerEncoderLayer + +logger = logging.getLogger(__name__) + + +@register_model("convtransformer") +class ConvTransformerModel(FairseqEncoderDecoderModel): + """ + Transformer-based Speech translation model from ESPNet-ST + https://arxiv.org/abs/2004.10234 + """ + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + parser.add_argument( + "--input-feat-per-channel", + type=int, + metavar="N", + help="encoder input dimension per input channel", + ) + parser.add_argument( + "--activation-fn", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--decoder-output-dim", + type=int, + metavar="N", + help="decoder output dimension (extra linear layer if different from decoder embed dim)", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + "--load-pretrained-decoder-from", + type=str, + metavar="STR", + help="model to take decoder weights from (for initialization)", + ) + parser.add_argument( + "--conv-out-channels", + type=int, + metavar="INT", + help="the number of output channels of conv layer", + ) + + @classmethod + def build_encoder(cls, args): + encoder = ConvTransformerEncoder(args) + if getattr(args, "load_pretrained_encoder_from", None) is not None: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=args.load_pretrained_encoder_from + ) + return encoder + + @classmethod + def build_decoder(cls, args, task, embed_tokens): + decoder = TransformerDecoderNoExtra(args, task.target_dictionary, embed_tokens) + if getattr(args, "load_pretrained_decoder_from", None) is not None: + decoder = checkpoint_utils.load_pretrained_component_from_model( + component=decoder, checkpoint=args.load_pretrained_decoder_from + ) + return decoder + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + decoder_embed_tokens = build_embedding( + task.target_dictionary, args.decoder_embed_dim + ) + encoder = cls.build_encoder(args) + decoder = cls.build_decoder(args, task, decoder_embed_tokens) + return cls(encoder, decoder) + + @staticmethod + @torch.jit.unused + def set_batch_first(lprobs): + lprobs.batch_first = True + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + if self.training: + self.set_batch_first(lprobs) + return lprobs + + def output_layout(self): + return "BTD" + + """ + The forward method inherited from the base class has a **kwargs argument in + its input, which is not supported in torchscript. This method overrites the forward + method definition without **kwargs. + """ + + def forward(self, src_tokens, src_lengths, prev_output_tokens): + encoder_out = self.encoder(src_tokens=src_tokens, src_lengths=src_lengths) + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out + ) + return decoder_out + + +class ConvTransformerEncoder(FairseqEncoder): + """Conv + Transformer encoder""" + + def __init__(self, args): + """Construct an Encoder object.""" + super().__init__(None) + + self.dropout = args.dropout + self.embed_scale = ( + 1.0 if args.no_scale_embedding else math.sqrt(args.encoder_embed_dim) + ) + self.padding_idx = 1 + self.in_channels = 1 + self.input_dim = args.input_feat_per_channel + self.conv = torch.nn.Sequential( + torch.nn.Conv2d(1, args.conv_out_channels, 3, stride=2, padding=3 // 2), + torch.nn.ReLU(), + torch.nn.Conv2d( + args.conv_out_channels, + args.conv_out_channels, + 3, + stride=2, + padding=3 // 2, + ), + torch.nn.ReLU(), + ) + transformer_input_dim = infer_conv_output_dim( + self.in_channels, self.input_dim, args.conv_out_channels + ) + self.out = torch.nn.Linear(transformer_input_dim, args.encoder_embed_dim) + self.embed_positions = PositionalEmbedding( + args.max_source_positions, + args.encoder_embed_dim, + self.padding_idx, + learned=False, + ) + + self.transformer_layers = nn.ModuleList([]) + self.transformer_layers.extend( + [TransformerEncoderLayer(args) for i in range(args.encoder_layers)] + ) + if args.encoder_normalize_before: + self.layer_norm = LayerNorm(args.encoder_embed_dim) + else: + self.layer_norm = None + + def pooling_ratio(self): + return 4 + + def forward(self, src_tokens, src_lengths): + """Encode input sequence. + :param torch.Tensor xs: input tensor + :param torch.Tensor masks: input mask + :return: position embedded tensor and mask + :rtype Tuple[torch.Tensor, torch.Tensor]: + """ + bsz, max_seq_len, _ = src_tokens.size() + x = ( + src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim) + .transpose(1, 2) + .contiguous() + ) + x = self.conv(x) + bsz, _, output_seq_len, _ = x.size() + x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1) + x = self.out(x) + x = self.embed_scale * x + + subsampling_factor = int(max_seq_len * 1.0 / output_seq_len + 0.5) + input_len_0 = (src_lengths.float() / subsampling_factor).ceil().long() + input_len_1 = x.size(0) * torch.ones([src_lengths.size(0)]).long().to( + input_len_0.device + ) + input_lengths = torch.min(input_len_0, input_len_1) + + encoder_padding_mask = lengths_to_padding_mask(input_lengths) + + positions = self.embed_positions(encoder_padding_mask).transpose(0, 1) + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + for layer in self.transformer_layers: + x = layer(x, encoder_padding_mask) + + if not encoder_padding_mask.any(): + maybe_encoder_padding_mask = None + else: + maybe_encoder_padding_mask = encoder_padding_mask + + return { + "encoder_out": [x], + "encoder_padding_mask": [maybe_encoder_padding_mask] + if maybe_encoder_padding_mask is not None + else [], + "encoder_embedding": [], + "encoder_states": [], + "src_tokens": [], + "src_lengths": [], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + (encoder_out["encoder_padding_mask"][0]).index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + (encoder_out["encoder_embedding"][0]).index_select(0, new_order) + ] + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, + "encoder_padding_mask": new_encoder_padding_mask, + "encoder_embedding": new_encoder_embedding, + "encoder_states": encoder_states, + "src_tokens": [], + "src_lengths": [], + } + + +class TransformerDecoderNoExtra(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + + +@register_model_architecture(model_name="convtransformer", arch_name="convtransformer") +def base_architecture(args): + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + args.max_source_positions = getattr(args, "max_source_positions", 3000) + args.max_target_positions = getattr(args, "max_target_positions", 1024) + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.conv_out_channels = getattr(args, "conv_out_channels", args.encoder_embed_dim) + + +@register_model_architecture("convtransformer", "convtransformer_espnet") +def convtransformer_espnet(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) diff --git a/fairseq/fairseq/models/speech_to_text/hub_interface.py b/fairseq/fairseq/models/speech_to_text/hub_interface.py new file mode 100644 index 0000000..d78427f --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/hub_interface.py @@ -0,0 +1,128 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from argparse import Namespace +from typing import Optional, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import fairseq.data.audio.feature_transforms.utterance_cmvn as utt_cmvn +from fairseq.data import encoders +from fairseq.data.audio.audio_utils import convert_waveform as convert_wav +from fairseq.data.audio.audio_utils import get_fbank +from fairseq.data.audio.audio_utils import get_waveform as get_wav +from fairseq.data.audio.speech_to_text_dataset import SpeechToTextDataset + +logger = logging.getLogger(__name__) + + +class S2THubInterface(nn.Module): + def __init__(self, cfg, task, model): + super().__init__() + self.cfg = cfg + self.task = task + self.model = model + self.model.eval() + self.generator = self.task.build_generator([self.model], self.cfg.generation) + + @classmethod + def get_model_input(cls, task, audio: Union[str, torch.Tensor]): + input_type = task.data_cfg.hub.get("input_type", "fbank80") + if input_type == "fbank80_w_utt_cmvn": + if isinstance(audio, str): + feat = utt_cmvn.UtteranceCMVN()(get_fbank(audio)) + feat = feat.unsqueeze(0) # T x D -> 1 x T x D + else: + import torchaudio.compliance.kaldi as kaldi + + feat = kaldi.fbank(audio, num_mel_bins=80).numpy() # 1 x T x D + elif input_type in {"waveform", "standardized_waveform"}: + if isinstance(audio, str): + feat, sr = get_wav(audio) # C x T + feat, _ = convert_wav( + feat, sr, to_sample_rate=16_000, to_mono=True + ) # C x T -> 1 x T + else: + feat = audio.numpy() + else: + raise ValueError(f"Unknown value: input_type = {input_type}") + + src_lengths = torch.Tensor([feat.shape[1]]).long() + src_tokens = torch.from_numpy(feat) # 1 x T (x D) + if input_type == "standardized_waveform": + with torch.no_grad(): + src_tokens = F.layer_norm(src_tokens, src_tokens.shape) + + return { + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "prev_output_tokens": None, + }, + "target_lengths": None, + "speaker": None, + } + + @classmethod + def detokenize(cls, task, tokens): + text = task.tgt_dict.string(tokens) + tkn_cfg = task.data_cfg.bpe_tokenizer + tokenizer = encoders.build_bpe(Namespace(**tkn_cfg)) + return text if tokenizer is None else tokenizer.decode(text) + + @classmethod + def get_prefix_token(cls, task, lang): + prefix_size = int(task.data_cfg.prepend_tgt_lang_tag) + prefix_tokens = None + if prefix_size > 0: + assert lang is not None + lang_tag = SpeechToTextDataset.get_lang_tag_idx(lang, task.tgt_dict) + prefix_tokens = torch.Tensor([lang_tag]).long().unsqueeze(0) + return prefix_tokens + + @classmethod + def get_prediction( + cls, task, model, generator, sample, tgt_lang=None, synthesize_speech=False + ) -> Union[str, Tuple[str, Tuple[torch.Tensor, int]]]: + _tgt_lang = tgt_lang or task.data_cfg.hub.get("tgt_lang", None) + prefix = cls.get_prefix_token(task, _tgt_lang) + pred_tokens = generator.generate([model], sample, prefix_tokens=prefix) + pred = cls.detokenize(task, pred_tokens[0][0]["tokens"]) + eos_token = task.data_cfg.config.get("eos_token", None) + if eos_token: + pred = " ".join(pred.split(" ")[:-1]) + + if synthesize_speech: + pfx = f"{_tgt_lang}_" if task.data_cfg.prepend_tgt_lang_tag else "" + tts_model_id = task.data_cfg.hub.get(f"{pfx}tts_model_id", None) + speaker = task.data_cfg.hub.get(f"{pfx}speaker", None) + if tts_model_id is None: + logger.warning("TTS model configuration not found") + else: + _repo, _id = tts_model_id.split(":") + tts_model = torch.hub.load(_repo, _id, verbose=False) + pred = (pred, tts_model.predict(pred, speaker=speaker)) + return pred + + def predict( + self, + audio: Union[str, torch.Tensor], + tgt_lang: Optional[str] = None, + synthesize_speech: bool = False, + ) -> Union[str, Tuple[str, Tuple[torch.Tensor, int]]]: + # `audio` is either a file path or a 1xT Tensor + # return either text or (text, synthetic speech) + sample = self.get_model_input(self.task, audio) + return self.get_prediction( + self.task, + self.model, + self.generator, + sample, + tgt_lang=tgt_lang, + synthesize_speech=synthesize_speech, + ) diff --git a/fairseq/fairseq/models/speech_to_text/modules/__init__.py b/fairseq/fairseq/models/speech_to_text/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py b/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py new file mode 100644 index 0000000..2d330f9 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py @@ -0,0 +1,487 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Tuple + +import torch +import torch.nn.functional as F +from torch import Tensor, nn + +from fairseq.models import FairseqEncoder +from fairseq.models.speech_to_text import ConvTransformerEncoder +from fairseq.models.speech_to_text.utils import ( + attention_suppression, + lengths_to_encoder_padding_mask, + segments_to_sequence, + sequence_to_segments, +) +from fairseq.modules import MultiheadAttention, TransformerEncoderLayer + +# ------------------------------------------------------------------------------ +# AugmentedMemoryConvTransformerEncoder +# ------------------------------------------------------------------------------ + + +class AugmentedMemoryConvTransformerEncoder(ConvTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + args.encoder_stride = self.stride() + + self.left_context = args.left_context // args.encoder_stride + + self.right_context = args.right_context // args.encoder_stride + + self.left_context_after_stride = args.left_context // args.encoder_stride + self.right_context_after_stride = args.right_context // args.encoder_stride + + self.transformer_layers = nn.ModuleList([]) + self.transformer_layers.extend( + [ + AugmentedMemoryTransformerEncoderLayer(args) + for i in range(args.encoder_layers) + ] + ) + + def stride(self): + # Hard coded here. Should infer from convs in future + stride = 4 + return stride + + def forward(self, src_tokens, src_lengths, states=None): + """Encode input sequence. + :param torch.Tensor xs: input tensor + :param torch.Tensor masks: input mask + :return: position embedded tensor and mask + :rtype Tuple[torch.Tensor, torch.Tensor]: + """ + bsz, max_seq_len, _ = src_tokens.size() + x = ( + src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim) + .transpose(1, 2) + .contiguous() + ) + x = self.conv(x) + bsz, _, output_seq_len, _ = x.size() + x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1) + x = self.out(x) + x = self.embed_scale * x + + subsampling_factor = 1.0 * max_seq_len / output_seq_len + input_lengths = torch.max( + (src_lengths.float() / subsampling_factor).ceil().long(), + x.size(0) * src_lengths.new_ones([src_lengths.size(0)]).long(), + ) + + encoder_padding_mask, _ = lengths_to_encoder_padding_mask( + input_lengths, batch_first=True + ) + + # TODO: fix positional embedding + positions = self.embed_positions(encoder_padding_mask).transpose(0, 1) + + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + # State to store memory banks etc. + if states is None: + states = [ + {"memory_banks": None, "encoder_states": None} + for i in range(len(self.transformer_layers)) + ] + + for i, layer in enumerate(self.transformer_layers): + # x size: + # (self.left_size + self.segment_size + self.right_size) + # / self.stride, num_heads, dim + # TODO: Consider mask here + x = layer(x, states[i]) + states[i]["encoder_states"] = x[ + self.left_context_after_stride : -self.right_context_after_stride + ] + + lengths = ( + ( + ~encoder_padding_mask[ + :, self.left_context_after_stride : -self.right_context_after_stride + ] + ) + .sum(dim=1, keepdim=True) + .long() + ) + + return states[-1]["encoder_states"], lengths, states + + +# ------------------------------------------------------------------------------ +# AugmentedMemoryTransformerEncoderLayer +# ------------------------------------------------------------------------------ +class AugmentedMemoryTransformerEncoderLayer(TransformerEncoderLayer): + def __init__(self, args): + super().__init__(args) + + self.left_context = args.left_context // args.encoder_stride + self.right_context = args.right_context // args.encoder_stride + + def forward(self, x, state): + + length, batch_size, x_dim = x.size() + + residual = x + + if self.normalize_before: + x = self.self_attn_layer_norm(x) + + # init_state + if state.get("memory_banks", None) is None: + state["memory_banks"] = [] + + # TODO reseach new sum_query method + seg_start = self.left_context + seg_end = length - self.right_context + if seg_start < seg_end: + summarization_query = torch.mean(x[seg_start:seg_end], keepdim=True, dim=0) + else: + summarization_query = x.new_zeros(1, batch_size, x_dim) + + x = torch.cat([x, summarization_query], dim=0) + + x = self.self_attn(input_and_summary=x, state=state) + + x = self.dropout_module(x) + x = residual + x + + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = residual + x + if not self.normalize_before: + x = self.final_layer_norm(x) + + return x + + def build_self_attention(self, embed_dim, args): + return AugmentedMemoryMultiheadAttention( + embed_dim=embed_dim, + num_heads=args.encoder_attention_heads, + dropout=args.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + tanh_on_mem=True, + max_memory_size=args.max_memory_size, + ) + + +# ------------------------------------------------------------------------------ +# AugmentedMemoryMultiheadAttention +# ------------------------------------------------------------------------------ +class AugmentedMemoryMultiheadAttention(MultiheadAttention): + """ + Augmented Memory Attention from + Streaming Transformer-based Acoustic Models + Using Self-attention with Augmented Memory + https://arxiv.org/abs/2005.08042 + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + tanh_on_mem=False, + memory_dim=None, + std_scale=0.5, # 0.5 based on https://arxiv.org/abs/2005.09137 + max_memory_size=-1, + disable_mem_on_mem_attn=True, + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + + self.memory_dim = memory_dim if memory_dim is not None else embed_dim + self.std_scale = std_scale + self.disable_mem_on_mem_attn = disable_mem_on_mem_attn + + # This Operator was used for factorization in PySpeech + self.v2e = lambda x: x + + if tanh_on_mem: + self.squash_mem = torch.tanh + self.nonlinear_squash_mem = True + else: + self.squash_mem = lambda x: x + self.nonlinear_squash_mem = False + + self.max_memory_size = max_memory_size + + def forward(self, input_and_summary, state): + """ + input: Encoder states of current segment with left or right context, + plus one summarization query + + """ + + length, batch_size, _ = input_and_summary.shape + length = length - 1 # not include sum_query, last index + + memory = state["memory_banks"] + # TODO: positional embedding on memory + + if self.max_memory_size > -1 and len(memory) > self.max_memory_size: + # TODO: need to fix here + if self.max_memory_size == 0: + memory = memory.new_zeros(1, memory.size(1), self.memory_dim) + else: + memory = memory[-self.max_memory_size :] + + memory_and_input = torch.cat(memory + [input_and_summary[:-1]], dim=0) + input_and_sum_query = input_and_summary + + q = self.q_proj(self.v2e(input_and_sum_query)) + k = self.k_proj(self.v2e(memory_and_input)) + v = self.v_proj(self.v2e(memory_and_input)) + + q = ( + q.contiguous() + .view(-1, batch_size * self.num_heads, self.head_dim) + .transpose(0, 1) + * self.scaling + ) + k = ( + k.contiguous() + .view(-1, batch_size * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + v = ( + v.contiguous() + .view(-1, batch_size * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + attention_weights = torch.bmm(q, k.transpose(1, 2)) + + if self.disable_mem_on_mem_attn: + attention_weights = self.suppress_mem_on_mem_attention( + batch_size, self.num_heads, len(memory), attention_weights + ) + + if self.std_scale is not None: + attention_weights = attention_suppression(attention_weights, self.std_scale) + + assert list(attention_weights.shape) == [ + batch_size * self.num_heads, + length + 1, + length + len(memory), + ] + + attention_weights = torch.nn.functional.softmax( + attention_weights.float(), dim=-1 + ).type_as(attention_weights) + + attention_probs = self.dropout_module(attention_weights) + + # [T, T, B, n_head] + [T, B, n_head, d_head] -> [T, B, n_head, d_head] + attention = torch.bmm(attention_probs, v) + + assert list(attention.shape) == [ + batch_size * self.num_heads, + length + 1, + self.head_dim, + ] + + attention = ( + attention.transpose(0, 1) + .contiguous() + .view(length + 1, batch_size, self.embed_dim) + ) + + output_and_memory = self.out_proj(attention) + + next_m = output_and_memory[-1:] + next_m = self.squash_mem(next_m) + output = output_and_memory[:-1] + + state["memory_banks"].append(next_m) + + return output + + def suppress_mem_on_mem_attention( + self, B: int, num_heads: int, mem_size: int, attention_weight: Tensor + ): + """ + Arguments: + - B: batch size + - num_heads: number of attention heads + - mem_size: size of memory bank + - attention_weight: a [B*num_heads, T + 1, T + mem_size] vector + + Return: + modified attention_weight with [B*num_heads, -1, :mem_size] = -inf + """ + attention_weight[:, -1, :mem_size] = float("-inf") + return attention_weight + + +# ------------------------------------------------------------------------------ +# SequenceEncoder +# ------------------------------------------------------------------------------ +class SequenceEncoder(FairseqEncoder): + """ + SequenceEncoder encodes sequences. + + More specifically, `src_tokens` and `src_lengths` in `forward()` should + describe a batch of "complete" sequences rather than segments. + + Segment-by-segment inference can be triggered by `segment_size`: + 1) `segment_size` is None: + SequenceEncoder treats the input sequence as one single segment. + 2) `segment_size` is not None (some int instead): + SequenceEncoder does the following: + 1. breaks the input sequence into several segments + 2. inference on each segment and collect the outputs + 3. concatanete segment outputs into the output sequence. + Note that `segment_size` here shouldn't include additional left/right + contexts needed, for example if we wish to infer with LC-BLSTM where the + middle chunk size is 100 and right context is 20, `segment_size` should be + 100. + """ + + def __init__(self, args, module): + super().__init__(None) + + self.module = module + self.input_time_axis = 1 + self.output_time_axis = 0 + self.segment_size = args.segment_size + self.left_context = args.left_context + self.right_context = args.right_context + + def forward( + self, + src_tokens: Tensor, + src_lengths: Tensor, + states=None, + ): + + seg_src_tokens_lengths = sequence_to_segments( + sequence=src_tokens, + time_axis=self.input_time_axis, + lengths=src_lengths, + segment_size=self.segment_size, + extra_left_context=self.left_context, + extra_right_context=self.right_context, + ) + + seg_encoder_states_lengths: List[Tuple[Tensor, Tensor]] = [] + + for seg_src_tokens, seg_src_lengths in seg_src_tokens_lengths: + (seg_encoder_states, seg_enc_lengths, states) = self.module( + seg_src_tokens, + seg_src_lengths, + states=states, + ) + + seg_encoder_states_lengths.append((seg_encoder_states, seg_enc_lengths)) + + encoder_out, enc_lengths = segments_to_sequence( + segments=seg_encoder_states_lengths, time_axis=self.output_time_axis + ) + + encoder_padding_mask, _ = lengths_to_encoder_padding_mask( + enc_lengths, batch_first=True + ) + + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + return { + "encoder_out": [encoder_out], + "encoder_padding_mask": [encoder_padding_mask], + "encoder_embedding": [], + "encoder_states": [states], + "src_tokens": [], + "src_lengths": [], + } + + def incremental_encode( + self, + seg_src_tokens: Tensor, + seg_src_lengths: Tensor, + states=None, + ): + """ + Different from forward function, this function takes segmented speech + as input, and append encoder states to previous states + """ + (seg_encoder_states, seg_enc_lengths, states) = self.module( + seg_src_tokens, + seg_src_lengths, + states=states, + ) + return seg_encoder_states, seg_enc_lengths, states + + +# ------------------------------------------------------------------------------ +# Augmented memory model decorator +# ------------------------------------------------------------------------------ +def augmented_memory(klass): + class StreamSeq2SeqModel(klass): + @staticmethod + def add_args(parser): + super(StreamSeq2SeqModel, StreamSeq2SeqModel).add_args(parser) + parser.add_argument( + "--segment-size", type=int, required=True, help="Length of the segment." + ) + parser.add_argument( + "--left-context", + type=int, + default=0, + help="Left context for the segment.", + ) + parser.add_argument( + "--right-context", + type=int, + default=0, + help="Right context for the segment.", + ) + parser.add_argument( + "--max-memory-size", + type=int, + default=-1, + help="Right context for the segment.", + ) + + StreamSeq2SeqModel.__name__ = klass.__name__ + return StreamSeq2SeqModel diff --git a/fairseq/fairseq/models/speech_to_text/modules/convolution.py b/fairseq/fairseq/models/speech_to_text/modules/convolution.py new file mode 100644 index 0000000..526d754 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/modules/convolution.py @@ -0,0 +1,126 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import List + +import torch +import torch.nn as nn + + +class Conv1dSubsampler(nn.Module): + """Convolutional subsampler: a stack of 1D convolution (along temporal + dimension) followed by non-linear activation via gated linear units + (https://arxiv.org/abs/1911.08460) + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__( + self, + in_channels: int, + mid_channels: int, + out_channels: int, + kernel_sizes: List[int] = (3, 3), + ): + super(Conv1dSubsampler, self).__init__() + self.n_layers = len(kernel_sizes) + self.conv_layers = nn.ModuleList( + nn.Conv1d( + in_channels if i == 0 else mid_channels // 2, + mid_channels if i < self.n_layers - 1 else out_channels * 2, + k, + stride=2, + padding=k // 2, + ) + for i, k in enumerate(kernel_sizes) + ) + + def get_out_seq_lens_tensor(self, in_seq_lens_tensor): + out = in_seq_lens_tensor.clone() + for _ in range(self.n_layers): + out = ((out.float() - 1) / 2 + 1).floor().long() + return out + + def forward(self, src_tokens, src_lengths): + bsz, in_seq_len, _ = src_tokens.size() # B x T x (C x D) + x = src_tokens.transpose(1, 2).contiguous() # -> B x (C x D) x T + for conv in self.conv_layers: + x = conv(x) + x = nn.functional.glu(x, dim=1) + _, _, out_seq_len = x.size() + x = x.transpose(1, 2).transpose(0, 1).contiguous() # -> T x B x (C x D) + return x, self.get_out_seq_lens_tensor(src_lengths) + + +def infer_conv_output_dim(in_channels, input_dim, out_channels): + sample_seq_len = 200 + sample_bsz = 10 + x = torch.randn(sample_bsz, in_channels, sample_seq_len, input_dim) + x = torch.nn.Conv2d(in_channels, out_channels, 3, stride=2, padding=3 // 2)(x) + x = torch.nn.Conv2d(out_channels, out_channels, 3, stride=2, padding=3 // 2)(x) + x = x.transpose(1, 2) + mb, seq = x.size()[:2] + return x.contiguous().view(mb, seq, -1).size(-1) + + +class Conv2dSubsampler(nn.Module): + """Convolutional subsampler: a stack of 2D convolution based on ESPnet implementation + (https://github.com/espnet/espnet) + + Args: + input_channels (int): the number of input channels + input_feat_per_channel (int): encoder input dimension per input channel + conv_out_channels (int): the number of output channels of conv layer + encoder_embed_dim (int): encoder dimentions + """ + + def __init__( + self, + input_channels: int, + input_feat_per_channel: int, + conv_out_channels: int, + encoder_embed_dim: int, + ): + super().__init__() + assert input_channels == 1, input_channels + self.conv = torch.nn.Sequential( + torch.nn.Conv2d( + input_channels, conv_out_channels, 3, stride=2, padding=3 // 2 + ), + torch.nn.ReLU(), + torch.nn.Conv2d( + conv_out_channels, + conv_out_channels, + 3, + stride=2, + padding=3 // 2, + ), + torch.nn.ReLU(), + ) + transformer_input_dim = infer_conv_output_dim( + input_channels, input_feat_per_channel, conv_out_channels + ) + self.out = torch.nn.Linear(transformer_input_dim, encoder_embed_dim) + + def forward(self, src_tokens, src_lengths): + B, T_i, C = src_tokens.size() + x = src_tokens.view(B, T_i, 1, C).transpose(1, 2).contiguous() + x = self.conv(x) + B, _, T_o, _ = x.size() + x = x.transpose(1, 2).transpose(0, 1).contiguous().view(T_o, B, -1) + x = self.out(x) + + subsampling_factor = int(T_i * 1.0 / T_o + 0.5) + input_len_0 = (src_lengths.float() / subsampling_factor).ceil().long() + input_len_1 = x.size(0) * torch.ones([src_lengths.size(0)]).long().to( + input_len_0.device + ) + input_lengths = torch.min(input_len_0, input_len_1) + return x, input_lengths diff --git a/fairseq/fairseq/models/speech_to_text/modules/emformer.py b/fairseq/fairseq/models/speech_to_text/modules/emformer.py new file mode 100644 index 0000000..935d593 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/modules/emformer.py @@ -0,0 +1,1844 @@ +#!/usr/bin/env python3 +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + + +import math +import re +from functools import partial +from typing import List, Optional, Tuple + +import torch +import torch.nn as nn +from torch import Tensor +from torch import device as Device + +from fairseq.models import FairseqEncoder +from fairseq.models.speech_to_text.utils import ( + NoOp, + attention_suppression, + layer_norm_backward_hook, + lengths_to_padding_mask, + segments_to_sequence, +) + +try: + import torch.ao.quantization as quantization + from torch.ao.quantization.qconfig import ( + default_dynamic_qconfig, + per_channel_dynamic_qconfig, + ) +except ImportError: + import torch.quantization as quantization + from torch.quantization.qconfig import ( + default_dynamic_qconfig, + per_channel_dynamic_qconfig, + ) + + +class RelativePositionEmbedding(nn.Module): + """ + Implementation according to https://arxiv.org/abs/1803.02155 + """ + + def __init__(self, head_dim, max_position, norm_init=True): + super().__init__() + self.head_dim = head_dim + self.max_position = max_position + self.embeddings = nn.Parameter(torch.Tensor(max_position * 2 + 1, head_dim)) + if norm_init: + nn.init.xavier_normal_(self.embeddings) + else: + nn.init.xavier_uniform_(self.embeddings) + + def forward(self, input: Tensor): + output = nn.functional.embedding(input.long(), self.embeddings) + return output + + +class Fp32LayerNorm(nn.Module): + def __init__( + self, + input_dim, + clamp_grad=True, + max_grad_value=256, + eps=1e-5, + elementwise_affine=True, + ): + super().__init__() + self.torch_module = torch.nn.LayerNorm( + input_dim, eps=eps, elementwise_affine=elementwise_affine + ) + if clamp_grad: + hook = partial(layer_norm_backward_hook, clamp_value=max_grad_value) + self.torch_module.register_backward_hook(hook) + + def forward(self, input): + output = torch.nn.functional.layer_norm( + input.float(), + self.torch_module.normalized_shape, + self.torch_module.weight.float() + if self.torch_module.weight is not None + else None, + self.torch_module.bias.float() + if self.torch_module.bias is not None + else None, + self.torch_module.eps, + ).type_as(input) + return output + + +# ------------------------------------------------------------------------------ +# PositionwiseFF +# ------------------------------------------------------------------------------ + + +class PositionwiseFF(nn.Module): + """ + FFN layer in transformer. + + Args: + input_dim: input embedding dimension + ffn_dim: FFN layer inner dimension + dropout_on_fc1: dropout for first linear layer + dropout_on_fc2: dropout fr second linear layer + activation_fn: activation function used after first linear layer. \ + Only relu or gelu is supported. + + """ + + def __init__( + self, input_dim, ffn_dim, dropout_on_fc1, dropout_on_fc2, activation_fn + ): + super(PositionwiseFF, self).__init__() + + self.input_dim = input_dim + self.ffn_dim = ffn_dim + if activation_fn == "relu": + ac = nn.ReLU() + elif activation_fn == "gelu": + ac = nn.GELU() + else: + raise ValueError("Unsupported activation_fn = ({})".format(activation_fn)) + + # fc1 -> ac -> dropout -> fc2 -> dropout + self.module = nn.Sequential( + nn.Linear(input_dim, ffn_dim), + ac, + nn.Dropout(dropout_on_fc1), + nn.Linear(ffn_dim, input_dim), + nn.Dropout(dropout_on_fc2), + ) + + self.layer_norm = Fp32LayerNorm(input_dim) + + def forward(self, input): + module_out = self.module(self.layer_norm(input)) + output = module_out + input + + return output + + def quantize_(self, params=None): + if params and "per_channel" in params and params["per_channel"]: + qconfig = per_channel_dynamic_qconfig + else: + qconfig = default_dynamic_qconfig + quantization.quantize_dynamic( + self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True + ) + return self + + +# ------------------------------------------------------------------------------ +# SummarizationLayer +# ------------------------------------------------------------------------------ + + +class SummarizationLayer(nn.Module): + def __init__(self, method, segment_size, embedding_dim): + super(SummarizationLayer, self).__init__() + self.segment_size = segment_size + self.embedding_dim = embedding_dim + nonlin_match = re.match(r"nonlinear\((?P<act>[a-z]+),(?P<dim>[0-9]+)\)", method) + self.method = method + if method == "mean": + self.module = nn.AvgPool1d( + kernel_size=segment_size, + stride=segment_size, + ceil_mode=True, + ) + elif method == "max": + self.module = nn.MaxPool1d( + kernel_size=segment_size, + stride=segment_size, + ceil_mode=True, + ) + elif method == "linear": + self.module = nn.Linear(segment_size, 1) + elif nonlin_match: + nonlin_args = nonlin_match.groupdict() + act_type = nonlin_args["act"] + hid_dim = int(nonlin_args["dim"]) + if act_type == "relu": + act = nn.ReLU() + elif act_type == "gelu": + act = nn.GELU() + else: + raise ValueError("Unsupported activation_fn = ({})".format(act_type)) + self.module = nn.Sequential( + nn.Linear(segment_size, hid_dim), + act, + nn.Linear(hid_dim, 1), + ) + else: + raise ValueError("Unsupported summarization method = ({})".format(method)) + + def forward(self, input): + # T, B, D -> B, D, T + input = input.permute(1, 2, 0) + + if self.method == "mean" or self.method == "max": + output = self.module(input) + output = output.permute(2, 0, 1) + return output + + full_seg_length = input.size(2) // self.segment_size * self.segment_size + if full_seg_length > 0: + # at least one seg is full + B = input.size(0) + D = input.size(1) + input_todo = ( + input[:, :, :full_seg_length] + .contiguous() + .view(B, -1, self.segment_size) + ) + output = self.module(input_todo) + output = output.view(B, D, -1) + else: + output = input.new_zeros(input.size(0), input.size(1), 0) + left = input.size(2) - full_seg_length + if left > 0: + # when last seg is not full, use zeros as last memory placeholder + zeros = input.new_zeros(input.size(0), input.size(1), 1) + output = torch.cat([output, zeros], dim=2) + output = output.permute(2, 0, 1) + return output + + +# ------------------------------------------------------------------------------ +# NoSegAugmentedMemoryMultiheadAttentionBmm +# ------------------------------------------------------------------------------ + + +class NoSegAugmentedMemoryMultiheadAttentionBmm(nn.Module): + """ + Whole utterance augmented memory multihead attention using BMM. + + Different with previous augmented memory multihead attention where + the utterance is chunked into segments. Here we use attention mask + achieve so. The input embedding [right_context, utterance, summary] + is a concatenation of right context, utterance and summary. + + Right context block is the concatenation of all the right context for + each segments. [right_context_0, right_context_1, ..., right_context_n] + For example, if we have utterance = [v0, v1, v2, ...., v20]. segment + size 8, right_context size 4. Then the right context blocks = + [v8, v9, v10, v11, v16, v17, v18, v19, 0, 0, 0, 0], where v8, v9, v10, + and v11 are the right context for first segment. v16, v17, v18 and v19 + are the right context for second segment. 0, 0, 0 and 0 are right context + for the last segment. + + utterance is corresponding to input embedding sequence + + summary is concatenation of average of each segments. [summary_0, + summary_1, ..., ]. + + In augmented memory multihead attention, the query is [right_context, + utterance, summary], key is [memory, right_context, utterance]. Different + with AugmentedMemoryMultiheadAttentionBmm, memory here is passed from + previous attention layer. For the first attention layer, memory is average + of each segment. + + Memory is a concatenation of memory from each segments in previous attention + layer. For example, current layer is i, then memory is [m_0, m_1, ..., m_n]. + Each m_k is the output from seg_k in layer i-1. + + args: + input_dim: input embedding dimension + num_heads: number of heads in multihead self-attention + dropout: attention dropout + std_scale: if std_scale is not None. The weak attention suppression is + turned on. For std_scale = 0.5, all the attention smaller than + mean + 0.5 * std will be suppressed. + scaled_init: whether to use scaled init for linear weight + tanh_on_mem: whether to use tanh on memory output + use_mem: whether to use memory or not. When max_memory_size is 0, then + we don't have memory anymore. + layer_index: current self-attention layer index that is used in depth + initialization + max_relative_position: max relative position used in relative position + embedding + rpe_old_option: To be compatible with previous model. The previous model + was trained with attention += attention + rpe. The correct equation + should be attention = attention + rpe + + """ + + def __init__( + self, + input_dim, + num_heads, + dropout=0.0, + std_scale=None, + scaled_init=False, + tanh_on_mem=False, + use_mem=True, + mini_batches=False, + negative_inf="-inf", + layer_index=-1, + max_relative_position=0, + rpe_old_option=True, + ): + if input_dim % num_heads: + raise ValueError( + "input_dim ({}) must be divisible by num_heads ({})".format( + input_dim, num_heads + ) + ) + + super().__init__() + + embed_dim = input_dim + self.e2h_kv = torch.nn.Linear(input_dim, 2 * input_dim, bias=True) + self.e2h_q = torch.nn.Linear(input_dim, input_dim, bias=True) + self.rpe_old_option = rpe_old_option + if max_relative_position > 0: + self.use_rpe = True + self.rpe_k = RelativePositionEmbedding( + head_dim=input_dim // num_heads, + max_position=max_relative_position, + ) + self.rpe_v = RelativePositionEmbedding( + head_dim=input_dim // num_heads, + max_position=max_relative_position, + ) + else: + self.use_rpe = False + self.rpe_k = None + self.rpe_v = None + if scaled_init: + if layer_index == -1: + gain = 1.0 / math.sqrt(2) + else: + # https://arxiv.org/abs/2005.09684 depthwise initialization + # stablize the training greatly. Use depthwise initialization to + # replace incremental loss. + gain = 1.0 / math.sqrt(layer_index + 1) + torch.nn.init.xavier_uniform_(self.e2h_kv.weight, gain=gain) + torch.nn.init.xavier_uniform_(self.e2h_q.weight, gain=gain) + + self.out_proj = torch.nn.Linear(embed_dim, embed_dim, bias=True) + + self.embed_dim = embed_dim + self.num_heads = num_heads + self.dropout = dropout + + self.head_dim = embed_dim // num_heads + self.scaling = self.head_dim**-0.5 + + self.std_scale = std_scale + self.use_mem = use_mem + self.mini_batches = mini_batches + self.negative_inf = negative_inf + + if tanh_on_mem: + self.squash_mem = torch.tanh + self.nonlinear_squash_mem = True + else: + self.squash_mem = NoOp() + self.nonlinear_squash_mem = False + + def prepare_qkv( + self, + input: Tensor, + mems: Tensor, + lengths: Tensor, + summary_length: int, + lc_length: int, + ): + # T: right_context length + utterance_length + summary_length + T, B, D = input.shape + mem_length = mems.size(0) + utterance_length = torch.max(lengths) + + right_context_blocks_length = T - utterance_length - summary_length + rc_block = input[:right_context_blocks_length, :, :] + utterance_block = input[right_context_blocks_length : T - summary_length, :, :] + + if B == 1: + padding_mask = None + else: + klengths = lengths + mem_length + right_context_blocks_length + lc_length + padding_mask = lengths_to_padding_mask(lengths=klengths) + + mem_rc_input = torch.cat([mems, rc_block, utterance_block], dim=0) + + # In training lc_length = 0 + key_length = mem_rc_input.size(0) + lc_length + rc_input_sum = input + q = self.e2h_q(rc_input_sum) + kv = self.e2h_kv(mem_rc_input) + k, v = kv.chunk(chunks=2, dim=2) + result_qkv = (q, k, v) + input_shape = (T, B, D) + result_lengths_info = ( + mem_length, + utterance_length, + right_context_blocks_length, + key_length, + ) + if padding_mask is not None: + assert padding_mask.size(0) == B + assert padding_mask.size(1) == key_length + + return result_qkv, input_shape, result_lengths_info, padding_mask + + def prepare_attention_weights( + self, + q: Tensor, + new_k: Tensor, + new_v: Tensor, + input_shape: Tuple[int, int, int], + rpe: Optional[Tensor], + ) -> Tuple[Tensor, Tensor, Tensor]: + T, B, D = input_shape + q = ( + q.contiguous().view(-1, B * self.num_heads, self.head_dim).transpose(0, 1) + * self.scaling + ) + + k = ( + new_k.contiguous() + .view(-1, B * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + v = ( + new_v.contiguous() + .view(-1, B * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + attention_weights = torch.bmm(q, k.transpose(1, 2)) + if self.use_rpe and rpe is not None and self.rpe_v is not None: + r_k = self.rpe_k(rpe) + # [q, B*h, d] * [q, k, d] -> [B*h, q, k] + attention_weights_rpe = torch.matmul( + q.transpose(0, 1), r_k.transpose(1, 2) + ).transpose(0, 1) + attention_weights = attention_weights + attention_weights_rpe + attention_weights_float = attention_weights.float() + + return attention_weights, attention_weights_float, v + + def prepare_attention_output( + self, + attention_weights: Tensor, + attention_weights_float: Tensor, + v: Tensor, + input_shape: Tuple[int, int, int], + key_length: int, + padding_mask: Optional[Tensor], + rpe: Optional[Tensor], + ) -> Tensor: + T, B, D = input_shape + if padding_mask is not None: + attention_weights_float = attention_weights_float.view( + B, self.num_heads, T, key_length + ) + attention_weights_float = attention_weights_float.masked_fill( + padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), float("-inf") + ) + attention_weights_float = attention_weights_float.view( + B * self.num_heads, T, key_length + ) + + if self.std_scale is not None: + attention_weights_float = attention_suppression( + attention_weights_float, self.std_scale + ) + + attention_weights_float = torch.nn.functional.softmax( + attention_weights_float, dim=-1 + ) + attention_weights = attention_weights_float.type_as(attention_weights) + + attention_probs = torch.nn.functional.dropout( + attention_weights, p=self.dropout, training=self.training + ) + + # [T, key_length, B, n_head]+ [key_length, B, n_head, d_head] + # -> [T, B, n_head, d_head] + attention = torch.bmm(attention_probs, v) + if self.use_rpe and rpe is not None and self.rpe_v is not None: + r_v = self.rpe_v(rpe) + attention_rpe = torch.matmul( + attention_probs.transpose(0, 1), r_v + ).transpose(0, 1) + + if self.rpe_old_option: + attention += attention + attention_rpe + else: + attention = attention + attention_rpe + + assert list(attention.shape) == [B * self.num_heads, T, self.head_dim] + + attention = attention.transpose(0, 1).contiguous().view(T, B, self.embed_dim) + + rc_output_memory = self.out_proj(attention) + return rc_output_memory + + @torch.jit.unused + def forward( + self, + input: Tensor, + lengths: Tensor, + mems: Tensor, + attention_mask: Tensor, + pre_mems: Optional[Tensor] = None, + left_context_key: Optional[Tensor] = None, + left_context_val: Optional[Tensor] = None, + rpe: Optional[Tensor] = None, + ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """ + forward function for NoSegAugmentedMemoryMultiheadAttentionBmm in training. + + args: + input: formed in the following way + [right_context_0, right_contex_1, ..., seg_0, seg_1, + ..., summary_0, summary_1,..] + lengths: the length of query which is [seg_0, seg_1, ....] + mems: [mem_0, mem_1, ...]. + attention_mask: attention mask for query = [right_context, query, summary] + key = [mem, right_context, query]. This is only used for traing. + + """ + if self.use_mem: + mem_length = mems.size(0) + summary_length = mem_length + 1 + if pre_mems is not None: + mems = torch.cat([pre_mems, mems], dim=0) + else: + mem_length = 0 + summary_length = 0 + + # In training, lc_length = 0 + if left_context_key is not None: + lc_length = left_context_key.size(0) + else: + lc_length = 0 + results = self.prepare_qkv( + input=input, + mems=mems, + lengths=lengths, + summary_length=summary_length, + lc_length=lc_length, + ) + result_qkv, input_shape, result_lengths_info, padding_mask = results + q, k, v = result_qkv + ( + mem_length, + utterance_length, + right_context_blocks_length, + key_length, + ) = result_lengths_info + + if left_context_key is not None: + # add the cache key and value + new_k = torch.cat( + [ + k[: mem_length + right_context_blocks_length, :, :], + left_context_key, + k[-utterance_length:, :, :], + ], + dim=0, + ) + new_v = torch.cat( + [ + v[: mem_length + right_context_blocks_length, :, :], + left_context_val, + v[-utterance_length:, :, :], + ], + dim=0, + ) + next_k = new_k[mem_length + right_context_blocks_length :, :, :] + next_v = new_v[mem_length + right_context_blocks_length :, :, :] + else: + new_k = k + new_v = v + next_k = None + next_v = None + + attention_weights, attention_weights_float, v = self.prepare_attention_weights( + q=q, + new_k=new_k, + new_v=new_v, + input_shape=input_shape, + rpe=rpe, + ) + + # mask attention + attention_mask = attention_mask.unsqueeze(0) + attention_weights_float = attention_weights_float.masked_fill( + attention_mask, float(self.negative_inf) + ) + + rc_output_memory = self.prepare_attention_output( + attention_weights=attention_weights, + attention_weights_float=attention_weights_float, + v=v, + input_shape=input_shape, + key_length=key_length, + padding_mask=padding_mask, + rpe=rpe, + ) + + if self.use_mem: + # next_m length equals to summary length - 1 + # last memory is ignored + if self.mini_batches: + next_m = rc_output_memory[-summary_length:] + else: + next_m = rc_output_memory[-summary_length:-1] + + next_m = self.squash_mem(next_m) + # rc and output + rc_output = rc_output_memory[:-summary_length] + if not self.nonlinear_squash_mem: + next_m = torch.clamp(next_m, min=-10, max=10) + else: + next_m = mems + rc_output = rc_output_memory + + return rc_output, next_m, next_k, next_v + + @torch.jit.export + def forward_jit( + self, + input: Tensor, + lengths: Tensor, + mems: Tensor, + left_context_key: Tensor, + left_context_val: Tensor, + rpe: Optional[Tensor], + ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """ + forward function for NoSegAugmentedMemoryMultiheadAttentionBmm in decoding. + + args: + input: formed in the following way + [right_context_0, right_contex_1, ..., seg_0, seg_1, + ..., summary_0, summary_1,..] + lengths: the length of query which is [seg_0, seg_1, ....] + mems: [mem_0, mem_1, ...]. + left_context_key: left_context for key part. This is only used for online + decoding. In training, this is empty tensor + left_context_val: left_context for value part. This is only used for online + decoding. In training, this is empty tensor + + """ + lc_length = left_context_key.size(0) + + # In decoding, summary_length = 1 or 0 + if self.use_mem: + summary_length = 1 + else: + summary_length = 0 + + results = self.prepare_qkv( + input=input, + mems=mems, + lengths=lengths, + summary_length=summary_length, + lc_length=lc_length, + ) + result_qkv, input_shape, result_lengths_info, padding_mask = results + q, k, v = result_qkv + ( + mem_length, + utterance_length, + right_context_blocks_length, + key_length, + ) = result_lengths_info + + # add the cache key and value + new_k = torch.cat( + [ + k[: mem_length + right_context_blocks_length, :, :], + left_context_key, + k[-utterance_length:, :, :], + ], + dim=0, + ) + new_v = torch.cat( + [ + v[: mem_length + right_context_blocks_length, :, :], + left_context_val, + v[-utterance_length:, :, :], + ], + dim=0, + ) + next_k = new_k[mem_length + right_context_blocks_length :, :, :] + next_v = new_v[mem_length + right_context_blocks_length :, :, :] + + attention_weights, attention_weights_float, v = self.prepare_attention_weights( + q=q, + new_k=new_k, + new_v=new_v, + input_shape=input_shape, + rpe=rpe, + ) + # In online decoding, we don't have attention mask. But we still need + # to disable the attention from summary query to memory + attention_weights_float[:, -1, :mem_length] = float(self.negative_inf) + rc_output_memory = self.prepare_attention_output( + attention_weights=attention_weights, + attention_weights_float=attention_weights_float, + v=v, + input_shape=input_shape, + key_length=key_length, + padding_mask=padding_mask, + rpe=rpe, + ) + + # In decoding, summary length is 1 + if self.use_mem: + next_m = rc_output_memory[-1:] + next_m = self.squash_mem(next_m) + # rc and output + rc_output = rc_output_memory[:-1] + if not self.nonlinear_squash_mem: + next_m = torch.clamp(next_m, min=-10, max=10) + else: + rc_output = rc_output_memory + # empty tensor as input mems + next_m = mems + + return rc_output, next_m, next_k, next_v + + def quantize_(self, params=None): + if params and "per_channel" in params and params["per_channel"]: + qconfig = per_channel_dynamic_qconfig + else: + qconfig = default_dynamic_qconfig + quantization.quantize_dynamic( + self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True + ) + return self + + +class NoSegAugmentedMemoryTransformer(nn.Module): + """ + Whole utterance augmented memory transformer. + + This is not pyspeech nn layer. It is used as a module in a master layer where + multiple transformers is used. + """ + + def __init__( + self, + input_dim, + num_heads, + ffn_dim, + dropout_in_attn=0.0, + dropout_on_attn=None, + dropout_on_fc1=None, + dropout_on_fc2=None, + activation_fn="relu", + tanh_on_mem=False, + std_scale=None, + scaled_init=False, + segment_size=128, + use_mem=True, + mini_batches=False, + negative_inf="-inf", + layer_index=-1, + summarization_method="mean", + max_relative_position=0, + rpe_old_option=True, + ): + super(NoSegAugmentedMemoryTransformer, self).__init__() + + self.attention = NoSegAugmentedMemoryMultiheadAttentionBmm( + input_dim=input_dim, + num_heads=num_heads, + dropout=dropout_in_attn, + scaled_init=scaled_init, + tanh_on_mem=tanh_on_mem, + std_scale=std_scale, + use_mem=use_mem, + mini_batches=mini_batches, + negative_inf=negative_inf, + layer_index=layer_index, + max_relative_position=max_relative_position, + ) + self.dropout = nn.Dropout(dropout_on_attn) + self.pos_ff = PositionwiseFF( + input_dim=input_dim, + ffn_dim=ffn_dim, + dropout_on_fc1=dropout_on_fc1, + dropout_on_fc2=dropout_on_fc2, + activation_fn=activation_fn, + ) + self.layer_norm_pre = Fp32LayerNorm(input_dim) + self.layer_norm = Fp32LayerNorm(input_dim) + self.segment_size = segment_size + self.use_mem = use_mem + + self.memory_op = SummarizationLayer( + summarization_method, segment_size, input_dim + ) + + def set_mini_batches(self, mini_batches): + self.attention.mini_batches = mini_batches + + def gen_summary_queries(self, input): + sum_input = self.memory_op(input) + return sum_input + + def pre_attention_ops(self, input, right_context_blocks): + rc_length = right_context_blocks.size(0) + input_length = input.size(0) + + rc_and_input = torch.cat([right_context_blocks, input], dim=0) + residual_input = rc_and_input + rc_and_input = self.layer_norm_pre(rc_and_input) + + query_input = rc_and_input[-input_length:, :, :] + return rc_length, input_length, residual_input, query_input, rc_and_input + + def after_attention_ops(self, attention_output, residual_input): + output = self.dropout(attention_output) + output = output + residual_input + output = self.pos_ff(output) + output = self.layer_norm(output) + return output + + @torch.jit.export + def forward_jit( + self, + input: Tensor, + lengths: Tensor, + mems: Tensor, + left_context_key: Tensor, + left_context_val: Tensor, + right_context_blocks: Tensor, + rpe: Optional[Tensor], + ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + + results = self.pre_attention_ops(input, right_context_blocks) + rc_length, input_length, residual_input, query_input, rc_and_input = results + + # In online decoding, the summary query size is always 1 or 0 + if self.use_mem: + summary_query = self.gen_summary_queries(query_input) + summary_query = summary_query[0:1, :, :] + rc_qu_su = torch.cat([rc_and_input, summary_query], dim=0) + else: + rc_qu_su = rc_and_input + + rc_output, next_m, next_k, next_v = self.attention.forward_jit( + input=rc_qu_su, + lengths=lengths, + mems=mems, + left_context_key=left_context_key, + left_context_val=left_context_val, + rpe=rpe, + ) + rc_output = self.after_attention_ops(rc_output, residual_input) + results = ( + rc_output[-input_length:, :, :], + next_m, + rc_output[0:rc_length, :, :], + next_k, + next_v, + ) + return results + + @torch.jit.unused + def forward( + self, + input, + lengths, + mems, + right_context_blocks, + attention_mask, + pre_mems, + left_context_key, + left_context_val, + rpe, + ): + + results = self.pre_attention_ops(input, right_context_blocks) + rc_length, input_length, residual_input, query_input, rc_and_input = results + if self.use_mem: + summary_query = self.gen_summary_queries(query_input) + rc_qu_su = torch.cat([rc_and_input, summary_query], dim=0) + else: + rc_qu_su = rc_and_input + + rc_output, next_m, next_k, next_v = self.attention( + input=rc_qu_su, + lengths=lengths, + mems=mems, + attention_mask=attention_mask, + pre_mems=pre_mems, + left_context_key=left_context_key, + left_context_val=left_context_val, + rpe=rpe, + ) + + # [TODO] Note memory did not go through pos_ff. What happen if we pass + # memory through the pos_ff as well? + rc_output = self.after_attention_ops(rc_output, residual_input) + results = ( + rc_output[-input_length:, :, :], + next_m, + rc_output[0:rc_length, :, :], + next_k, + next_v, + ) + + return results + + +class NoSegAugmentedMemoryTransformerEncoderLayer(FairseqEncoder): + """ + Whole utterance augmented memory transformer encoder layer. This is a master layer + where we can define multiple augmented memory transformers. There are two reasons + to setup the master layer. + 1. We only need to define once about the attention mask. All the layers in the master + layer share the same mask. + 2. pyspeech nn layer has special input and output format. Defining one master layer is + easier to passing memory between different layes inside the master layer + + args: + input_dim: input embedding dimension + num_heads: number of heads in multihead self-attention + ffn_dim: ffn dimension in FFN layer + num_layers: number of augmented memory transformer layers + dropout_in_attn: dropout used in multi-head self-attention + dropout_on_attn: dropout used for output from te multihead self-attention + dropout_on_fc1: dropout used in FFN layer for the first linear layer + dropout_on_fc2: dropout used in FFN layer for the second linear layer + segment_size: segment size for each segment + context_config: (left_context_size, right_context_size) defines the surround context size + for each segment + max_memory_size: maximum memory size used for each segment + scaled_init: whether use scaled init for weight initialization in attention layer + std_scale: if std_scale is not None. The weak attention suppression is + turned on. For std_scale = 0.5, all the attention smaller than + mean + 0.5 * std will be suppressed. + activation_fn: activation function used in FFN layer. [ReLU, GELU] supported + tanh_on_mem: whether use tanh on memory + mini_batches: use mini-btach training + negative_inf: the negative infinity value used in attention masking. default is "-inf". + For some situation, e.g. LM. it is better to use "-1e8" to avoid nan issue. + summarization_method: method to generate segment summrization embedding + max_relative_position: max relatie position for relative position embedding + rpe_old_option: To be compatible with previous model. The previous model + was trained with attention += attention + rpe. The correct equation + should be attention = attention + rpe + [TODO]: remove the rpe_old_option by the end of 2021 Q1. + + """ + + def __init__( + self, + input_dim, + num_heads, + ffn_dim, + num_layers=1, + dropout_in_attn=0.0, + dropout_on_attn=0.0, + dropout_on_fc1=0.0, + dropout_on_fc2=0.0, + segment_size=128, + context_config=(0, 0), + max_memory_size=0, + scaled_init=True, + std_scale=None, + activation_fn="relu", + tanh_on_mem=False, + mini_batches=False, + negative_inf="-inf", + deep_init=True, + summarization_method="mean", + max_relative_position=0, + rpe_old_option=True, + ): + super().__init__(None) + if input_dim % num_heads: + raise ValueError( + "input_dim ({}) must be divisible by num_heads ({})".format( + input_dim, num_heads + ) + ) + + # we used to support growing memory size. However, it will cause + # cross stream batching failure. Now we need to have exact max memory size + if max_memory_size < 0: + raise ValueError("max_memory_size must be >= 0") + + # Only assign right_context. In decoding, left context will be cached. + # No need to let the online decoder to re-assign the left context + self.left_context, self.right_context = context_config + self.segment_size = segment_size + self.memory_dim = input_dim + self.max_memory_size = max_memory_size + self.mini_batches = mini_batches + if self.max_memory_size != 0: + self.use_mem = True + else: + self.use_mem = False + + self.memory_op = SummarizationLayer( + summarization_method, segment_size, input_dim + ) + + self.layers = torch.nn.ModuleList() + self.num_layers = num_layers + self.max_relative_position = max_relative_position + if self.max_relative_position > 0: + self.use_rpe = True + else: + self.use_rpe = False + for i in range(self.num_layers): + if deep_init: + layer_index = i + else: + layer_index = -1 + + self.layers.append( + NoSegAugmentedMemoryTransformer( + num_heads=num_heads, + input_dim=input_dim, + ffn_dim=ffn_dim, + dropout_in_attn=dropout_in_attn, + dropout_on_attn=dropout_on_attn, + dropout_on_fc1=dropout_on_fc1, + dropout_on_fc2=dropout_on_fc2, + segment_size=segment_size, + std_scale=std_scale, + activation_fn=activation_fn, + tanh_on_mem=tanh_on_mem, + scaled_init=scaled_init, + use_mem=self.use_mem, + mini_batches=mini_batches, + negative_inf=negative_inf, + layer_index=layer_index, + summarization_method=summarization_method, + max_relative_position=max_relative_position, + rpe_old_option=rpe_old_option, + ) + ) + + def set_mini_batches(self, mini_batches): + # handy function only used for unit test + self.mini_batches = mini_batches + for layer in self.layers: + layer.set_mini_batches(mini_batches) + + def _get_relative_position( + self, + input: Tensor, + max_relative_position: int, + left_context_length: int, + past_length: int, + is_decoding: bool, + ): + # For training, we copy the right context to the start of the utterance + # First dimension in distance is corresponding to query. + # [right context, utterance, summary vector] + # Second dimension in distance is corresponding to key. + # [Memory bank, right context, utterance] + # For summary vector in query part, the distance with + # all other position is 2*max_position. For memory bank in key, + # the distance with all other positions is 0. + + T, B, D = input.shape + num_segs = math.ceil((T - self.right_context) / self.segment_size) + + # utterance + u_st = past_length * self.segment_size + u_ed = u_st + T + utterance_ranges = torch.arange(u_st, u_ed - self.right_context) + + # left context. Only in minibatch or decoding + left_context_ranges = torch.arange(u_st - left_context_length, u_st) + + # Right context block + # right context + utterance + right_context_blocks = [] + for i in range(0, num_segs - 1): + st = (i + 1) * self.segment_size + u_st + ed = st + self.right_context + assert ed < u_ed + temp = torch.arange(st, ed) + right_context_blocks.append(temp) + right_context_blocks.append(torch.arange(u_ed - self.right_context, u_ed)) + right_context_ranges = torch.cat(right_context_blocks) + + if self.use_mem: + # Memory bank + # The position for memory -n, .., -1 + if is_decoding: + memory_size = min(past_length, self.max_memory_size) + else: + memory_size = num_segs + past_length - 1 + memory_bank_ranges = torch.arange( + -max_relative_position - 1, -max_relative_position - 1 - memory_size, -1 + ) + + # summary vector + # The position for summary vector as the T+max_relative_position+1. + # After the clamping, the relative position is max_relative_position + summary_pos_st = u_ed + max_relative_position + 1 + summary_vector_ranges = torch.arange( + summary_pos_st, summary_pos_st + num_segs + ) + + key_ranges = torch.cat( + [ + memory_bank_ranges, + right_context_ranges, + left_context_ranges, + utterance_ranges, + ] + ) + + query_ranges = torch.cat( + [right_context_ranges, utterance_ranges, summary_vector_ranges] + ) + else: + key_ranges = torch.cat( + [right_context_ranges, left_context_ranges, utterance_ranges] + ) + + query_ranges = torch.cat([right_context_ranges, utterance_ranges]) + + distance = key_ranges[None, :] - query_ranges[:, None] + distance_clamp = ( + torch.clamp(distance, -max_relative_position, max_relative_position) + + max_relative_position + ) + distance_clamp = distance_clamp.to(input.device).long().detach() + return distance_clamp + + def _get_attention_mask(self, input, past_length=0, left_context_cache=0): + # attention mask for each query contains three parts: + # 1. memory part + # 2. left_context + segment + # 3. right_context_block + # so for each segment and its correspoinding right context block, + # the attention matrix is formed by 9 parts: + # [0, m, 0, 0, right_context, 0, 0, seg, 0] + # [before memory, memory, after memory, before right context, right_context, + # after right context, before seg, seg, after seg] + # + # Query is formed in the way as [right_context_blocks, utterance, summary] + # + # Note: put m and right_context before segment is convenient + # for padding_mask operation. + # Key lengths = m_length + right_context_block_length + lengths + utterance_length, batch_size, _ = input.shape + summary_length = math.ceil(utterance_length / self.segment_size) + num_segs = summary_length + rc_length = self.right_context * num_segs + rc = self.right_context + lc = self.left_context + + # using mini-batches, there is left context cache available for current + # sequence. + lcc = left_context_cache + + # max_memory_size is 0 then we don't have memory and summary + # past_length is the memory carry from previous sequence + if self.use_mem: + mem_length = num_segs - 1 + past_length + else: + mem_length = 0 + rc_mask = [] + query_mask = [] + summary_mask = [] + for j in range(0, num_segs): + ssize = min(self.segment_size, utterance_length - j * self.segment_size) + + rc_size = rc + rc_mat = [] + q_mat = [] + s_mat = [] + m_start = max(j + past_length - self.max_memory_size, 0) + + # max_memory_size is 0, then we don't use memory + if self.use_mem: + # part 0: before memory + rc_mat.append(input.new_zeros(rc_size, m_start)) + q_mat.append(input.new_zeros(ssize, m_start)) + s_mat.append(input.new_zeros(1, m_start)) + + # part 1: memory + col_1 = j + past_length - m_start + rc_mat.append(torch.ones(rc_size, col_1, device=input.device)) + q_mat.append(torch.ones(ssize, col_1, device=input.device)) + # based on D22875746, disable summary query attention + # on memeory is better for long form utterance + s_mat.append(input.new_zeros(1, col_1)) + + # part 2: after memory + col_2 = mem_length - (j + past_length) + rc_mat.append(input.new_zeros(rc_size, col_2)) + q_mat.append(input.new_zeros(ssize, col_2)) + s_mat.append(input.new_zeros(1, col_2)) + + # part 3: before right context + rc_start = j * rc + rc_mat.append(input.new_zeros(rc_size, rc_start)) + q_mat.append(input.new_zeros(ssize, rc_start)) + s_mat.append(input.new_zeros(1, rc_start)) + + # part 4: right context + rc_end = rc_start + rc + col_4 = rc + rc_mat.append(torch.ones(rc_size, col_4, device=input.device)) + q_mat.append(torch.ones(ssize, col_4, device=input.device)) + s_mat.append(torch.ones(1, col_4, device=input.device)) + + # part 5: after right context + col_5 = rc_length - rc_end + rc_mat.append(input.new_zeros(rc_size, col_5)) + q_mat.append(input.new_zeros(ssize, col_5)) + s_mat.append(input.new_zeros(1, col_5)) + + # part 6: before query segment + seg_start = max(j * self.segment_size + lcc - lc, 0) + rc_mat.append(input.new_zeros(rc_size, seg_start)) + q_mat.append(input.new_zeros(ssize, seg_start)) + s_mat.append(input.new_zeros(1, seg_start)) + + # part 7: query segment + # note: right context is put in right context block + # here we only need to consider about left context + seg_end = min((j + 1) * self.segment_size + lcc, utterance_length + lcc) + col_7 = seg_end - seg_start + rc_mat.append(torch.ones(rc_size, col_7, device=input.device)) + q_mat.append(torch.ones(ssize, col_7, device=input.device)) + s_mat.append(torch.ones(1, col_7, device=input.device)) + + # part 8: after query segment + col_8 = utterance_length + lcc - seg_end + rc_mat.append(input.new_zeros(rc_size, col_8)) + q_mat.append(input.new_zeros(ssize, col_8)) + s_mat.append(input.new_zeros(1, col_8)) + + rc_mask.append(torch.cat(rc_mat, dim=1)) + query_mask.append(torch.cat(q_mat, dim=1)) + summary_mask.append(torch.cat(s_mat, dim=1)) + + # no memory, then we don't need summary either + if self.use_mem: + attention_mask = ( + 1 + - torch.cat( + [ + torch.cat(rc_mask, dim=0), + torch.cat(query_mask, dim=0), + torch.cat(summary_mask, dim=0), + ], + dim=0, + ) + ).to(torch.bool) + else: + attention_mask = ( + 1 + - torch.cat( + [torch.cat(rc_mask, dim=0), torch.cat(query_mask, dim=0)], dim=0 + ) + ).to(torch.bool) + + return attention_mask + + @torch.jit.export + def init_state( + self, batch_size: int, device: Optional[Device] = None + ) -> List[Tensor]: + empty_memory = torch.zeros( + self.num_layers, + self.max_memory_size, + batch_size, + self.memory_dim, + device=device, + ) + left_context_key = torch.zeros( + self.num_layers, + self.left_context, + batch_size, + self.memory_dim, + device=device, + ) + left_context_val = torch.zeros( + self.num_layers, + self.left_context, + batch_size, + self.memory_dim, + device=device, + ) + past_length = torch.zeros(1, batch_size, dtype=torch.int32, device=device) + + return [empty_memory, left_context_key, left_context_val, past_length] + + @torch.jit.export + def batch_state(self, states: List[List[Tensor]]) -> List[Tensor]: + if len(states) == 0: + return [] + batched_m = [] + batched_lc_key = [] + batched_lc_val = [] + batched_past_length = [] + for state in states: + if len(state) == 0: + continue + m, lc_key, lc_val, past_length = state + batched_m.append(m) + batched_lc_key.append(lc_key) + batched_lc_val.append(lc_val) + batched_past_length.append(past_length) + + if ( + (len(batched_m) == 0) + or (len(batched_lc_key) == 0) + or (len(batched_lc_val) == 0) + or (len(batched_past_length) == 0) + ): + return [ + torch.tensor([]), + torch.tensor([]), + torch.tensor([]), + torch.tensor([]), + ] + + batched_m = torch.cat(batched_m, dim=2) + batched_lc_key = torch.cat(batched_lc_key, dim=2) + batched_lc_val = torch.cat(batched_lc_val, dim=2) + batched_past_length = torch.cat(batched_past_length, dim=1) + return [batched_m, batched_lc_key, batched_lc_val, batched_past_length] + + @torch.jit.export + def reorder_state(self, state: List[Tensor], indices: Tensor) -> List[Tensor]: + if len(state) == 0: + return [] + m, lc_key, lc_val, past_length = state + indices = indices.to(device=m.device) + reord_m = torch.index_select(m, 2, indices) + reord_lc_key = torch.index_select(lc_key, 2, indices) + reord_lc_val = torch.index_select(lc_val, 2, indices) + reord_past_length = torch.index_select(past_length, 1, indices) + return [reord_m, reord_lc_key, reord_lc_val, reord_past_length] + + @torch.jit.export + def reset_state(self, state: List[Tensor], indices: Tensor) -> List[Tensor]: + m, lc_key, lc_val, past_length = state + m = m.index_fill(dim=2, index=indices, value=0.0) + lc_key = lc_key.index_fill(dim=2, index=indices, value=0.0) + lc_val = lc_val.index_fill(dim=2, index=indices, value=0.0) + past_length = past_length.index_fill(dim=1, index=indices, value=0) + + return [m, lc_key, lc_val, past_length] + + @torch.jit.export + def state_size(self) -> int: + return 4 + + @torch.jit.export + def batch_size_in_state( + self, state: Optional[List[Tensor]], sloppy: bool = True + ) -> Optional[int]: + if state is None: + return None + return state[0].size(2) + + def gen_summary_queries(self, input): + sum_input = self.memory_op(input) + return sum_input + + def _gen_right_context_padded_input(self, input): + # This function deals with input that is already + # padded with right context (e.g. minibatch training) + right_context_blocks = [] + T, B, D = input.shape + num_segs = math.ceil((T - self.right_context) / self.segment_size) + for i in range(0, num_segs - 1): + st = (i + 1) * self.segment_size + ed = st + self.right_context + assert ed < T + temp = input[st:ed, :, :] + right_context_blocks.append(temp) + + # last segment right context is already available + right_context_blocks.append(input[T - self.right_context :, :, :]) + return torch.cat(right_context_blocks, dim=0) + + def _gen_segs_right_context(self, input, lengths): + segments = [] + T, B, D = input.size() + nT = T - self.right_context + + # assume input is right context padded + num_segs = math.ceil(nT / self.segment_size) + # pad zeros to the utterance to make sure each + # segment has the same right context. For the + for i in range(0, num_segs - 1): + st = i * self.segment_size + ed = min(T, st + self.segment_size + self.right_context) + temp = input[st:ed, :, :] + rest_lengths = torch.clamp( + lengths - self.segment_size, min=0, max=nT - (i + 1) * self.segment_size + ) + segments.append((temp, lengths - rest_lengths + self.right_context)) + lengths = rest_lengths + + last_seg = input[st + self.segment_size :, :, :] + segments.append((last_seg, rest_lengths + self.right_context)) + + return segments + + @torch.jit.unused + def forward( + self, input: Tensor, padding_masks: Tensor, state: Optional[List[Tensor]] = None + ) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]: + # Xutai: originally the second argument is lengths. + lengths = (~padding_masks).sum(dim=1).long() + # mini batch training. + if self.mini_batches: + return self.forward_mini_batches(input, lengths, state) + + # regular full sequence training. Note, assume the right context in provided + # in the input. + T, B, D = input.size() + right_context_blocks = self._gen_right_context_padded_input(input) + + # generate the relative positional embedding + if self.use_rpe: + rpe = self._get_relative_position( + input=input, + max_relative_position=self.max_relative_position, + left_context_length=0, + past_length=0, + is_decoding=False, + ) + else: + rpe = None + input = input[: T - self.right_context, :, :] + + attention_mask = self._get_attention_mask(input) + + # firt layer use each segment mean as memory + # ignore the last one seg average + if self.use_mem: + mems = self.gen_summary_queries(input)[:-1, :, :] + else: + mems = torch.zeros(0, input.size(1), input.size(2), device=input.device) + mems = mems.type_as(input) + + output = input + all_outputs = [] + + for layer in self.layers: + output, mems, right_context_blocks, _, _ = layer( + input=output, + lengths=lengths, + attention_mask=attention_mask, + mems=mems, + right_context_blocks=right_context_blocks, + pre_mems=None, + left_context_key=None, + left_context_val=None, + rpe=rpe, + ) + all_outputs.append(output) + return output, padding_masks, [], all_outputs + + def forward_jit_mini_batch_init( + self, + seg: Tensor, + state: Optional[List[Tensor]] = None, + is_decoding: bool = False, + ): + # Prepare state. In whole sequence training, state is ignored. + # For minibatch training, we need to prepare state + if state is None: + state = self.init_state(batch_size=seg.size(1), device=seg.device) + if seg.dtype == torch.half: + state = [state[0].half(), state[1].half(), state[2].half(), state[3]] + + if self.use_mem: + # note input average only on seg, not on right context + # first layer use each segmetn mean as memory. the last + # one segment average is used in state + full_mems = self.gen_summary_queries(seg) + if is_decoding: + mems = full_mems[0:1, :, :] + state_mems = torch.cat([state[0][0], mems], dim=0) + else: + mems = full_mems[:-1, :, :] + state_mems = torch.cat([state[0][0], full_mems], dim=0) + else: + mems = state[0][0] + state_mems = mems + + # track processed segment number or memory number + # the same batch as the same bumber of past length + past_length = state[3][0][0].item() + past_left_context = min(past_length * self.segment_size, self.left_context) + past_length = min(self.max_memory_size, past_length) + + return state, mems, state_mems, past_length, past_left_context + + def state_update_before( + self, layer: int, state: List[Tensor], past_length: int, past_left_context: int + ): + pre_mems = state[0][layer][self.max_memory_size - past_length :, :, :] + lc_key = state[1][layer][self.left_context - past_left_context :, :, :] + lc_val = state[2][layer][self.left_context - past_left_context :, :, :] + return pre_mems, lc_key, lc_val + + def state_update_after( + self, + layer: int, + state: List[Tensor], + mems: Tensor, + next_key: Tensor, + next_val: Tensor, + mems_list: List[Tensor], + lc_key_list: List[Tensor], + lc_val_list: List[Tensor], + ): + # mems is used for next layer + if layer < self.num_layers - 1: + state_mems = torch.cat([state[0][layer + 1], mems], dim=0) + mems_list.append(state_mems[-self.max_memory_size :, :, :]) + + # when mems pass to next sequence, we need the last memory. when mems + # use for the next layer, we can ignore the last memory + mems = mems[:-1, :, :] + + # note state[1][i] and state[2][i] original length equals to self.left_context + new_k = torch.cat([state[1][layer], next_key], dim=0) + new_v = torch.cat([state[2][layer], next_val], dim=0) + lc_key_list.append(new_k[-self.left_context :, :, :]) + lc_val_list.append(new_v[-self.left_context :, :, :]) + return mems_list, lc_key_list, lc_val_list, mems + + def state_update_after_loop( + self, + state: List[Tensor], + mems_list: List[Tensor], + lc_key_list: List[Tensor], + lc_val_list: List[Tensor], + update_length: int, + ): + state[0] = torch.stack(mems_list, dim=0) + state[1] = torch.stack(lc_key_list, dim=0) + state[2] = torch.stack(lc_val_list, dim=0) + state[3] = state[3] + update_length + return state + + @torch.jit.unused + def forward_mini_batches( + self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None + ) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]: + T, B, D = input.size() + + # input without right context + seg = input[: T - self.right_context, :, :] + + # get right context blocks + right_context_blocks = self._gen_right_context_padded_input(input) + + mems_list = [] + lc_key_list = [] + lc_val_list = [] + results = self.forward_jit_mini_batch_init(seg, state, False) + state, mems, state_mems, past_length, past_left_context = results + + # relative position embedding + if self.use_rpe: + rpe = self._get_relative_position( + input=input, + max_relative_position=self.max_relative_position, + left_context_length=past_left_context, + past_length=past_length, + is_decoding=False, + ) + else: + rpe = None + + # get attention mask based on seg (not include right context) and available + # left context + attention_mask = self._get_attention_mask(seg, past_length, past_left_context) + mems_list.append(state_mems[-self.max_memory_size :, :, :]) + output = seg + i = 0 + all_outputs = [] + for layer in self.layers: + # In order to make cross stream batching work, mem, left context key + # and left context value in the state should always be the same shape. + # We use the past length to track the processed segment number. In this + # way, we take out the essential memory, left context key and left + # context val from the state. After finish the forward for current segment + # we add the new memory, left context key and left context value into the + # staate and trim out the oldest part to keep the shape consistent. + pre_mems, lc_key, lc_val = self.state_update_before( + i, state, past_length, past_left_context + ) + + output, mems, right_context_blocks, next_key, next_val = layer.forward( + input=output, + lengths=lengths, + attention_mask=attention_mask, + mems=mems, + right_context_blocks=right_context_blocks, + pre_mems=pre_mems, + left_context_key=lc_key, + left_context_val=lc_val, + rpe=rpe, + ) + all_outputs.append(output) + mems_list, lc_key_list, lc_val_list, mems = self.state_update_after( + layer=i, + state=state, + mems=mems, + next_key=next_key, + next_val=next_val, + mems_list=mems_list, + lc_key_list=lc_key_list, + lc_val_list=lc_val_list, + ) + + i += 1 + + # update state + update_length = math.ceil((T - self.right_context) / self.segment_size) + state = self.state_update_after_loop( + state=state, + mems_list=mems_list, + lc_key_list=lc_key_list, + lc_val_list=lc_val_list, + update_length=update_length, + ) + + return output, lengths, state, all_outputs + + def forward_jit_test( + self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None + ) -> Tuple[Tensor, Tensor, List[Tensor]]: + """ + This one simulate sequence encoder forward jit. This is for unit test purpose. + It is not used in training or decoding. Note, extra_right_context is set in + the model. In unit test, input = [utterance, right_context], lengths = + [utterance_length]. + args: + input: input utterance + lengths: utterance input length + state: None here. input is whole utterance + """ + # [TODO] sequence_to_segment has bug in lengths. + seg_src_tokens_lengths = self._gen_segs_right_context(input, lengths) + + seg_enc_tokens_lengths: List[Tuple[Tensor, Tensor]] = [] + state: Optional[List[Tensor]] = None + for seg_src_tokens, seg_src_lengths in seg_src_tokens_lengths: + seg_enc_tokens, seg_enc_lengths, state = self.forward_jit( + input=seg_src_tokens, lengths=seg_src_lengths, state=state + ) + seg_enc_tokens_lengths.append((seg_enc_tokens, seg_enc_lengths)) + + enc_tokens, enc_lengths = segments_to_sequence( + segments=seg_enc_tokens_lengths, time_axis=0 + ) + + state = [] # returns trivial state + + return enc_tokens, enc_lengths, state + + @torch.jit.export + def forward_jit( + self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None + ) -> Tuple[Tensor, Tensor, List[Tensor]]: + """ + Forward helper for online decoding. + + args: + input: [seg, right_context]. We assume in online we + always padding the right context to the preset right context size. + For the last segment, we may have short segment size, but right + context size is the same as other segments + lengths: utterance input length is the utterance segment length and + right context size + state: [memory, left_context_key, left_context_val]. To improve throughput, + in addition to memory, we also cache key and value for left_context in + multihead self-attention + """ + # In online decoding, input = [segment, right_context] + # Lengths = [segment_length, right_context_length] + # so we need strip right context in output + T, B, D = input.size() + rc_str = T - self.right_context + rc_end = T + right_context_blocks = input[rc_str:rc_end, :, :] + seg = input[:rc_str, :, :] + lengths = torch.clamp(lengths - self.right_context, min=0) + mems_list = [] + lc_key_list = [] + lc_val_list = [] + + results = self.forward_jit_mini_batch_init(seg, state, True) + state, mems, state_mems, past_length, past_left_context = results + + # relative position embedding + if self.use_rpe: + rpe = self._get_relative_position( + input=input, + max_relative_position=self.max_relative_position, + left_context_length=past_left_context, + past_length=past_length, + is_decoding=True, + ) + else: + rpe = None + + # memory for first layer. + mems_list.append(state_mems[-self.max_memory_size :, :, :]) + output = seg + i = 0 + for layer in self.layers: + # In order to make cross stream batching work, mem, left context key + # and left context value in the state should always be the same shape. + # We use the past length to track the processed segment number. In this + # way, we take out the essential memory, left context key and left + # context val from the state. After finish the forward for current segment + # we add the new memory, left context key and left context value into the + # staate and trim out the oldest part to keep the shape consistent. + true_mems, lc_key, lc_val = self.state_update_before( + layer=i, + state=state, + past_length=past_length, + past_left_context=past_left_context, + ) + + output, mems, right_context_blocks, next_key, next_val = layer.forward_jit( + input=output, + lengths=lengths, + mems=true_mems, + right_context_blocks=right_context_blocks, + left_context_key=lc_key, + left_context_val=lc_val, + rpe=rpe, + ) + # mems is used for next layer + mems_list, lc_key_list, lc_val_list, _ = self.state_update_after( + layer=i, + state=state, + mems_list=mems_list, + mems=mems, + next_key=next_key, + next_val=next_val, + lc_key_list=lc_key_list, + lc_val_list=lc_val_list, + ) + i += 1 + + # update state + state = self.state_update_after_loop( + state=state, + mems_list=mems_list, + lc_key_list=lc_key_list, + lc_val_list=lc_val_list, + update_length=1, + ) + + return output, lengths, state + + def quantize_(self, params=None): + if params and "per_channel" in params and params["per_channel"]: + qconfig = per_channel_dynamic_qconfig + else: + qconfig = default_dynamic_qconfig + quantization.quantize_dynamic( + self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True + ) + return self + + +# ------------------------------------------------------------------------------ +# Emformer encoder for seq2seq model +# This is a wrapper over the original emformer +# ------------------------------------------------------------------------------ +def emformer_encoder(klass): + class SpeechEncoder(klass): + def __init__(self, args): + super().__init__(args) + stride = SpeechEncoder.conv_layer_stride(args) + trf_left_context = args.segment_left_context // stride + trf_right_context = args.segment_right_context // stride + context_config = [trf_left_context, trf_right_context] + self.transformer_layers = nn.ModuleList( + [ + NoSegAugmentedMemoryTransformerEncoderLayer( + input_dim=args.encoder_embed_dim, + num_heads=args.encoder_attention_heads, + ffn_dim=args.encoder_ffn_embed_dim, + num_layers=args.encoder_layers, + dropout_in_attn=args.dropout, + dropout_on_attn=args.dropout, + dropout_on_fc1=args.dropout, + dropout_on_fc2=args.dropout, + activation_fn=args.activation_fn, + context_config=context_config, + segment_size=args.segment_length, + max_memory_size=args.max_memory_size, + scaled_init=True, # TODO: use constant for now. + tanh_on_mem=args.amtrf_tanh_on_mem, + ) + ] + ) + + def forward(self, src_tokens, src_lengths): + encoder_out = super().forward(src_tokens, src_lengths) + output = encoder_out["encoder_out"][0] + encoder_padding_masks = encoder_out["encoder_padding_mask"][0] + + # This is because that in the original implementation + # the output didn't consider the last segment as right context. + encoder_padding_masks = encoder_padding_masks[:, : output.size(0)] + + return { + "encoder_out": [output], + "encoder_padding_mask": [encoder_padding_masks], + "encoder_embedding": [], + "encoder_states": [], + "src_tokens": [], + "src_lengths": [], + } + + @staticmethod + def conv_layer_stride(args): + # TODO: make it configurable from the args + return 4 + + SpeechEncoder.__name__ = klass.__name__ + return SpeechEncoder diff --git a/fairseq/fairseq/models/speech_to_text/multi_modality_model.py b/fairseq/fairseq/models/speech_to_text/multi_modality_model.py new file mode 100644 index 0000000..0464216 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/multi_modality_model.py @@ -0,0 +1,49 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.models import FairseqDecoder, FairseqEncoder + + +# a container for different encoders with training samples from different modality +# each time, only one encoder is selected +class MultiModalityEncoder(FairseqEncoder): + def __init__(self, dictionary): + super().__init__(dictionary) + + def select_encoder(self, mode, **kwargs): + raise NotImplementedError("Model must implement the select_encoder method") + return None, kwargs + + # def post_encoder(self, encoder_out, src_tokens, src_lengths, mode, **kwargs): + # # Default do nothing + # return encoder_out + + # get sample data from JointSpeechTextDataset + def forward(self, src_tokens, src_lengths=None, mode="", **kwargs): + encoder, kwargs = self.select_encoder(mode, **kwargs) + # return self.post_encoder(encoder(src_tokens, src_lengths, **kwargs), src_tokens, src_lengths, mode, **kwargs) + return encoder(src_tokens, src_lengths, **kwargs) + + +# a container for different decoders with training samples from different modality +# each time, only one decoder is selected +class MultiInputDecoder(FairseqDecoder): + def __init__(self, dictionary): + super().__init__(dictionary) + + def select_decoder(self, mode, **kwargs): + raise NotImplementedError("Model must implement the select_decoder method") + return None, kwargs + + def forward( + self, prev_output_tokens, encoder_out, incremental_state=None, mode="", **kwargs + ): + decoder, kwargs = self.select_decoder(mode, **kwargs) + return decoder( + prev_output_tokens, + encoder_out, + incremental_state=incremental_state, + **kwargs + ) diff --git a/fairseq/fairseq/models/speech_to_text/s2t_conformer.py b/fairseq/fairseq/models/speech_to_text/s2t_conformer.py new file mode 100644 index 0000000..79dbbec --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/s2t_conformer.py @@ -0,0 +1,234 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from pathlib import Path + +import torch + +from fairseq import checkpoint_utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import FairseqEncoder, register_model, register_model_architecture +from fairseq.models.speech_to_text.modules.convolution import ( + Conv1dSubsampler, + Conv2dSubsampler, +) +from fairseq.models.speech_to_text.s2t_transformer import ( + S2TTransformerEncoder, + S2TTransformerModel, +) +from fairseq.models.speech_to_text.s2t_transformer import ( + base_architecture as transformer_base_architecture, +) +from fairseq.modules import PositionalEmbedding, RelPositionalEncoding +from fairseq.modules.conformer_layer import ConformerEncoderLayer + +logger = logging.getLogger(__name__) + + +class S2TConformerEncoder(FairseqEncoder): + """Conformer Encoder for speech translation based on https://arxiv.org/abs/2005.08100""" + + def __init__(self, args): + super().__init__(None) + + self.encoder_freezing_updates = args.encoder_freezing_updates + self.num_updates = 0 + + self.embed_scale = math.sqrt(args.encoder_embed_dim) + if args.no_scale_embedding: + self.embed_scale = 1.0 + self.padding_idx = 1 + self.conv_version = args.conv_version + if self.conv_version == "s2t_transformer": + self.subsample = Conv1dSubsampler( + args.input_feat_per_channel * args.input_channels, + args.conv_channels, + args.encoder_embed_dim, + [int(k) for k in args.conv_kernel_sizes.split(",")], + ) + elif self.conv_version == "convtransformer": + self.subsample = Conv2dSubsampler( + args.input_channels, + args.input_feat_per_channel, + args.conv_out_channels, + args.encoder_embed_dim, + ) + self.pos_enc_type = args.pos_enc_type + if self.pos_enc_type == "rel_pos": + self.embed_positions = RelPositionalEncoding( + args.max_source_positions, args.encoder_embed_dim + ) + elif self.pos_enc_type == "rope": + self.embed_positions = None + else: # Use absolute positional embedding + self.pos_enc_type = "abs" + self.embed_positions = PositionalEmbedding( + args.max_source_positions, args.encoder_embed_dim, self.padding_idx + ) + + self.linear = torch.nn.Linear(args.encoder_embed_dim, args.encoder_embed_dim) + self.dropout = torch.nn.Dropout(args.dropout) + self.conformer_layers = torch.nn.ModuleList( + [ + ConformerEncoderLayer( + embed_dim=args.encoder_embed_dim, + ffn_embed_dim=args.encoder_ffn_embed_dim, + attention_heads=args.encoder_attention_heads, + dropout=args.dropout, + depthwise_conv_kernel_size=args.depthwise_conv_kernel_size, + attn_type=args.attn_type, + pos_enc_type=self.pos_enc_type, + use_fp16=args.fp16, + ) + for _ in range(args.encoder_layers) + ] + ) + + def _forward(self, src_tokens, src_lengths, return_all_hiddens=False): + """ + Args: + src_tokens: Input source tokens Tensor of shape B X T X C + src_lengths: Lengths Tensor corresponding to input source tokens + return_all_hiddens: If true will append the self attention states to the encoder states + Returns: + encoder_out: Tensor of shape B X T X C + encoder_padding_mask: Optional Tensor with mask + encoder_embedding: Optional Tensor. Always empty here + encoder_states: List of Optional Tensors wih self attention states + src_tokens: Optional Tensor. Always empty here + src_lengths: Optional Tensor. Always empty here + """ + x, input_lengths = self.subsample(src_tokens, src_lengths) # returns T X B X C + encoder_padding_mask = lengths_to_padding_mask(input_lengths) + x = self.embed_scale * x + if self.pos_enc_type == "rel_pos": + positions = self.embed_positions(x) + + elif self.pos_enc_type == "rope": + positions = None + + else: + positions = self.embed_positions(encoder_padding_mask).transpose(0, 1) + x += positions + positions = None + + x = self.linear(x) + x = self.dropout(x) + encoder_states = [] + + # x is T X B X C + for layer in self.conformer_layers: + x, _ = layer(x, encoder_padding_mask, positions) + if return_all_hiddens: + encoder_states.append(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask] + if encoder_padding_mask.any() + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + def forward(self, src_tokens, src_lengths, return_all_hiddens=False): + if self.num_updates < self.encoder_freezing_updates: + with torch.no_grad(): + x = self._forward( + src_tokens, + src_lengths, + return_all_hiddens=return_all_hiddens, + ) + else: + x = self._forward( + src_tokens, + src_lengths, + return_all_hiddens=return_all_hiddens, + ) + return x + + def reorder_encoder_out(self, encoder_out, new_order): + """Required method for a FairseqEncoder. Calls the method from the parent class""" + return S2TTransformerEncoder.reorder_encoder_out(self, encoder_out, new_order) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self.num_updates = num_updates + + +@register_model("s2t_conformer") +class S2TConformerModel(S2TTransformerModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + S2TTransformerModel.add_args(parser) + parser.add_argument( + "--input-feat-per-channel", + type=int, + metavar="N", + help="dimension of input features per channel", + ) + parser.add_argument( + "--input-channels", + type=int, + metavar="N", + help="number of chennels of input features", + ) + parser.add_argument( + "--depthwise-conv-kernel-size", + type=int, + metavar="N", + help="kernel size of depthwise convolution layers", + ) + parser.add_argument( + "--attn-type", + type=str, + metavar="STR", + help="If not specified uses fairseq MHA. Other valid option is espnet", + ) + parser.add_argument( + "--pos-enc-type", + type=str, + metavar="STR", + help="Must be specified in addition to attn-type=espnet for rel_pos and rope", + ) + + @classmethod + def build_encoder(cls, args): + encoder = S2TConformerEncoder(args) + pretraining_path = getattr(args, "load_pretrained_encoder_from", None) + if pretraining_path is not None: + if not Path(pretraining_path).exists(): + logger.warning( + f"skipped pretraining because {pretraining_path} does not exist" + ) + else: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=pretraining_path + ) + logger.info(f"loaded pretrained encoder from: {pretraining_path}") + return encoder + + +@register_model_architecture("s2t_conformer", "s2t_conformer") +def conformer_base_architecture(args): + args.attn_type = getattr(args, "attn_type", None) + args.pos_enc_type = getattr(args, "pos_enc_type", "abs") + args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80) + args.input_channels = getattr(args, "input_channels", 1) + args.max_source_positions = getattr(args, "max_source_positions", 6000) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 16) + args.depthwise_conv_kernel_size = getattr(args, "depthwise_conv_kernel_size", 31) + transformer_base_architecture(args) diff --git a/fairseq/fairseq/models/speech_to_text/s2t_transformer.py b/fairseq/fairseq/models/speech_to_text/s2t_transformer.py new file mode 100644 index 0000000..50fae2f --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/s2t_transformer.py @@ -0,0 +1,552 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import checkpoint_utils, utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_text.hub_interface import S2THubInterface +from fairseq.models.speech_to_text.modules.convolution import ( + Conv1dSubsampler, + Conv2dSubsampler, +) +from fairseq.models.transformer import Embedding, TransformerDecoder +from fairseq.modules import ( + FairseqDropout, + LayerNorm, + PositionalEmbedding, + TransformerEncoderLayer, +) + +logger = logging.getLogger(__name__) + + +@register_model("s2t_transformer") +class S2TTransformerModel(FairseqEncoderDecoderModel): + """Adapted Transformer model (https://arxiv.org/abs/1706.03762) for + speech-to-text tasks. The Transformer encoder/decoder remains the same. + A trainable input subsampler is prepended to the Transformer encoder to + project inputs into the encoder dimension as well as downsample input + sequence for computational efficiency.""" + + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/s2t" + model_ids = [ + "s2t_transformer_s-en-asr-librispeech", + "s2t_transformer_m-en-asr-librispeech", + "s2t_transformer_l-en-asr-librispeech", + ] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + config_yaml="config.yaml", + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + config_yaml=config_yaml, + **kwargs, + ) + return S2THubInterface(x["args"], x["task"], x["models"][0]) + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # input + parser.add_argument( + "--conv-kernel-sizes", + type=str, + metavar="STR", + help="kernel sizes of Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-channels", + type=int, + metavar="N", + help="# of channels in Conv1d (s2t_transformer) subsampling layers", + ) + parser.add_argument( + "--conv-out-channels", + type=int, + metavar="N", + help="# of channels in Conv2d (convtransformer) subsampling layers", + ) + parser.add_argument( + "--conv-version", + type=str, + default="s2t_transformer", + choices=["s2t_transformer", "convtransformer"], + help="version of frontend convolutional layers", + ) + # Transformer + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + "--encoder-freezing-updates", + type=int, + metavar="N", + help="freeze encoder for first N updates", + ) + + @classmethod + def build_encoder(cls, args): + encoder = S2TTransformerEncoder(args) + pretraining_path = getattr(args, "load_pretrained_encoder_from", None) + if pretraining_path is not None: + if not Path(pretraining_path).exists(): + logger.warning( + f"skipped pretraining because {pretraining_path} does not exist" + ) + else: + encoder = checkpoint_utils.load_pretrained_component_from_model( + component=encoder, checkpoint=pretraining_path + ) + logger.info(f"loaded pretrained encoder from: {pretraining_path}") + return encoder + + @classmethod + def build_decoder(cls, args, task, embed_tokens): + return TransformerDecoderScriptable(args, task.target_dictionary, embed_tokens) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + decoder_embed_tokens = build_embedding( + task.target_dictionary, args.decoder_embed_dim + ) + args.tgt_dict_size = len(task.target_dictionary) + encoder = cls.build_encoder(args) + decoder = cls.build_decoder(args, task, decoder_embed_tokens) + return cls(encoder, decoder) + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def get_ctc_target(self, sample: Optional[Dict[str, Tensor]]): + return sample["target"], sample["target_lengths"] + + def get_ctc_output( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + sample: Optional[Dict[str, Tensor]], + ): + encoder_out = net_output[1]["encoder_out"]["encoder_out"][0] + logits = self.encoder.ctc_proj(encoder_out) # T x B x C + out = utils.log_softmax(logits.float(), dim=-1) + padding_mask = net_output[1]["encoder_out"]["encoder_padding_mask"] + lens = out.new_full((out.shape[1],), out.shape[0]).long() + if len(padding_mask) > 0: + lens -= padding_mask[0].sum(dim=-1) + return out, lens + + def forward(self, src_tokens, src_lengths, prev_output_tokens): + """ + The forward method inherited from the base class has a **kwargs + argument in its input, which is not supported in torchscript. This + method overwrites the forward method definition without **kwargs. + """ + encoder_out = self.encoder(src_tokens=src_tokens, src_lengths=src_lengths) + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out + ) + return decoder_out + + +class S2TTransformerEncoder(FairseqEncoder): + """Speech-to-text Transformer encoder that consists of input subsampler and + Transformer encoder.""" + + def __init__(self, args): + super().__init__(None) + + self.encoder_freezing_updates = args.encoder_freezing_updates + self.num_updates = 0 + + self.dropout_module = FairseqDropout( + p=args.dropout, module_name=self.__class__.__name__ + ) + self.embed_scale = math.sqrt(args.encoder_embed_dim) + if args.no_scale_embedding: + self.embed_scale = 1.0 + self.padding_idx = 1 + + self.conv_version = args.conv_version + if self.conv_version == "s2t_transformer": + self.subsample = Conv1dSubsampler( + args.input_feat_per_channel * args.input_channels, + args.conv_channels, + args.encoder_embed_dim, + [int(k) for k in args.conv_kernel_sizes.split(",")], + ) + elif self.conv_version == "convtransformer": + self.subsample = Conv2dSubsampler( + args.input_channels, + args.input_feat_per_channel, + args.conv_out_channels, + args.encoder_embed_dim, + ) + + self.embed_positions = PositionalEmbedding( + args.max_source_positions, args.encoder_embed_dim, self.padding_idx + ) + + self.transformer_layers = nn.ModuleList( + [TransformerEncoderLayer(args) for _ in range(args.encoder_layers)] + ) + if args.encoder_normalize_before: + self.layer_norm = LayerNorm(args.encoder_embed_dim) + else: + self.layer_norm = None + + self.ctc_proj = None + if getattr(args, "ctc_weight", 0.0) > 0.0: + self.ctc_proj = nn.Linear(args.encoder_embed_dim, args.tgt_dict_size) + + def _forward(self, src_tokens, src_lengths, return_all_hiddens=False): + x, input_lengths = self.subsample(src_tokens, src_lengths) + x = self.embed_scale * x + + encoder_padding_mask = lengths_to_padding_mask(input_lengths) + positions = self.embed_positions(encoder_padding_mask).transpose(0, 1) + x += positions + x = self.dropout_module(x) + + encoder_states = [] + + for layer in self.transformer_layers: + x = layer(x, encoder_padding_mask) + if return_all_hiddens: + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask] + if encoder_padding_mask.any() + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + def forward(self, src_tokens, src_lengths, return_all_hiddens=False): + if self.num_updates < self.encoder_freezing_updates: + with torch.no_grad(): + x = self._forward( + src_tokens, src_lengths, return_all_hiddens=return_all_hiddens + ) + else: + x = self._forward( + src_tokens, src_lengths, return_all_hiddens=return_all_hiddens + ) + return x + + def reorder_encoder_out(self, encoder_out, new_order): + new_encoder_out = ( + [] + if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + + new_encoder_padding_mask = ( + [] + if len(encoder_out["encoder_padding_mask"]) == 0 + else [ + x.index_select(0, new_order) + for x in encoder_out["encoder_padding_mask"] + ] + ) + + new_encoder_embedding = ( + [] + if len(encoder_out["encoder_embedding"]) == 0 + else [ + x.index_select(0, new_order) for x in encoder_out["encoder_embedding"] + ] + ) + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], # B x T + "src_lengths": [], # B x 1 + } + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self.num_updates = num_updates + + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + extra = {"encoder_out": encoder_out} if incremental_state is None else None + return x, extra + + +@register_model_architecture(model_name="s2t_transformer", arch_name="s2t_transformer") +def base_architecture(args): + args.encoder_freezing_updates = getattr(args, "encoder_freezing_updates", 0) + # Convolutional subsampler + args.input_channels = getattr(args, "input_channels", 1) + args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5") # for Conv1d + args.conv_channels = getattr(args, "conv_channels", 1024) # for Conv1d + args.conv_out_channels = getattr(args, "conv_out_channels", 256) # for Conv2d + args.conv_version = getattr(args, "conv_version", "s2t_transformer") + # Transformer + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_s") +def s2t_transformer_s(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 8) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.dropout = getattr(args, "dropout", 0.1) + base_architecture(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_xs") +def s2t_transformer_xs(args): + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.decoder_layers = getattr(args, "decoder_layers", 3) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 4) + args.dropout = getattr(args, "dropout", 0.3) + s2t_transformer_s(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_sp") +def s2t_transformer_sp(args): + args.encoder_layers = getattr(args, "encoder_layers", 16) + s2t_transformer_s(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_m") +def s2t_transformer_m(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.dropout = getattr(args, "dropout", 0.15) + base_architecture(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_mp") +def s2t_transformer_mp(args): + args.encoder_layers = getattr(args, "encoder_layers", 16) + s2t_transformer_m(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_l") +def s2t_transformer_l(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024 * 4) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.2) + base_architecture(args) + + +@register_model_architecture("s2t_transformer", "s2t_transformer_lp") +def s2t_transformer_lp(args): + args.encoder_layers = getattr(args, "encoder_layers", 16) + s2t_transformer_l(args) diff --git a/fairseq/fairseq/models/speech_to_text/s2t_wav_transformer.py b/fairseq/fairseq/models/speech_to_text/s2t_wav_transformer.py new file mode 100644 index 0000000..ad21aee --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/s2t_wav_transformer.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python3 + +import math + +import torch +import torch.nn as nn + +from fairseq.data.data_utils import compute_mask_indices +from fairseq.models import FairseqEncoder +from fairseq.models.wav2vec import ConvFeatureExtractionModel +from fairseq.modules import GradMultiply, LayerNorm, SamePad, TransformerEncoderLayer + + +# Transformer encoder with wave input, it is adopted from wav2vec 2.0 Encoder. +# use wav input +# use trained position embedding so it is easier to match with text input +class SpeechWavTransformerEncoder(FairseqEncoder): + + # extra parameters for speech encoder besides those defined in transformermodel + @staticmethod + def add_args(parser): + parser.add_argument( + "--dropout-input", + type=float, + metavar="D", + help="dropout to apply to the input (after feat extr)", + ) + parser.add_argument( + "--dropout-features", + type=float, + metavar="D", + help="dropout to apply to the unmasked features (after feat extr)", + ) + parser.add_argument( + "--speech-extractor-mode", + type=str, + default="layer_norm", + choices=["default", "layer_norm"], + help="feature extractor norm", + ) + + parser.add_argument( + "--speech-conv-bias", + action="store_true", + help="include bias in speech conv encoder", + ) + + parser.add_argument( + "--conv-feature-layers", + default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]", + help="string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]", + ) + + parser.add_argument( + "--speech-mask-length", + type=int, + help="repeat the mask indices multiple times", + ) + + parser.add_argument( + "--speech-mask-prob", + type=float, + help="probability of replacing a token with mask", + ) + + parser.add_argument( + "--speech-mask-selection", + type=str, + choices=["static", "uniform", "normal", "poisson"], + help="how to choose masks", + ) + + parser.add_argument( + "--speech-mask-other", + type=float, + help="stdev of the mask length in case of 'normal' selection strategy", + ) + + parser.add_argument( + "--speech-no-mask-overlap", + action="store_true", + help="whether to allow masks to overlap", + ) + + parser.add_argument( + "--speech-mask-min-space", + type=int, + help="min space between spans (if no overlap is enabled)", + ) + + parser.add_argument( + "--speech-mask-channel-length", + type=int, + help="repeat the mask indices multiple times", + ) + + parser.add_argument( + "--speech-mask-channel-prob", + type=float, + help="probability of replacing a token with mask", + ) + + parser.add_argument( + "--speech-mask-channel-selection", + type=str, + choices=["static", "uniform", "normal", "poisson"], + help="how to choose masks", + ) + + parser.add_argument( + "--speech-mask-channel-other", + type=float, + help="stdev of the mask length in case of 'normal' selection strategy", + ) + + parser.add_argument( + "--speech-no-mask-channel-overlap", + action="store_true", + help="whether to allow masks to overlap", + ) + + parser.add_argument( + "--no-scale-feature", + action="store_true", + help="no scale for the calculated features", + ) + + parser.add_argument( + "--speech-mask-channel-min-space", + type=int, + help="min space between spans (if no overlap is enabled)", + ) + + parser.add_argument( + "--feature-grad-mult", + type=float, + help="reset feature grad mult in wav2vec 2.0 to this", + ) + + # positional embeddings + parser.add_argument( + "--conv-pos", + type=int, + default=128, + help="number of filters for convolutional positional embeddings", + ) + + parser.add_argument( + "--conv-pos-groups", + type=int, + default=16, + help="number of groups for convolutional positional embedding", + ) + # model configures + parser.add_argument( + "--speech-encoder-layers", + type=int, + help="number of speech encoder layers", + ) + parser.add_argument( + "--text-encoder-layers", + type=int, + help="number of text encoder layers", + ) + + def __init__(self, args, alway_mask=False): + super().__init__(args) + self.args = args + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.feat_scale = math.sqrt(args.encoder_embed_dim) + if args.no_scale_feature: + self.feat_scale = 1.0 + + subsample = ConvFeatureExtractionModel( + conv_layers=eval(args.conv_feature_layers), + dropout=0.0, + mode=args.speech_extractor_mode, # default, layer_norm + conv_bias=args.speech_conv_bias, + ) + self.feature_enc_layers = eval(args.conv_feature_layers) + self.subsample = subsample + self.feat_proj = ( + nn.Linear(self.feature_enc_layers[-1][0], self.embedding_dim) + if self.feature_enc_layers[-1][0] != self.embedding_dim + else None + ) + + self.feat_layer_norm = LayerNorm(self.feature_enc_layers[-1][0]) + + self.embed_positions = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + std = math.sqrt(4 / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.embed_positions.weight, mean=0, std=std) + nn.init.constant_(self.embed_positions.bias, 0) + + self.embed_positions = nn.utils.weight_norm( + self.embed_positions, name="weight", dim=2 + ) + self.embed_positions = nn.Sequential( + self.embed_positions, SamePad(args.conv_pos), nn.GELU() + ) + + self.mask_prob = args.speech_mask_prob + self.mask_selection = args.speech_mask_selection + self.mask_other = args.speech_mask_other + self.mask_length = args.speech_mask_length + self.no_mask_overlap = args.speech_no_mask_overlap + self.mask_min_space = args.speech_mask_min_space + + self.mask_channel_prob = args.speech_mask_channel_prob + self.mask_channel_selection = args.speech_mask_channel_selection + self.mask_channel_other = args.speech_mask_channel_other + self.mask_channel_length = args.speech_mask_channel_length + self.no_mask_channel_overlap = args.speech_no_mask_channel_overlap + self.mask_channel_min_space = args.speech_mask_channel_min_space + + self.dropout_input = nn.Dropout(args.dropout_input) + self.dropout_features = nn.Dropout(args.dropout_features) + + self.feature_grad_mult = args.feature_grad_mult + + self.mask_emb = nn.Parameter( + torch.FloatTensor(args.encoder_embed_dim).uniform_() + ) + + self.layers = nn.ModuleList( + [TransformerEncoderLayer(args) for _ in range(args.encoder_layers)] + ) + self.layer_norm = LayerNorm(args.encoder_embed_dim) + self.normalize_before = args.encoder_normalize_before + self.alway_mask = alway_mask + + def _get_feat_extract_output_lengths(self, input_lengths: torch.LongTensor): + """ + Computes the output length of the convolutional layers + """ + + def _conv_out_length(input_length, kernel_size, stride): + return torch.floor((input_length - kernel_size) / stride + 1) + + for i in range(len(self.feature_enc_layers)): + input_lengths = _conv_out_length( + input_lengths, + self.feature_enc_layers[i][1], + self.feature_enc_layers[i][2], + ) + + return input_lengths.to(torch.long) + + def apply_mask(self, x, padding_mask): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward( + self, + src_tokens, + src_lengths, + return_all_hiddens=False, + padding_mask=None, + features_only=True, + ): + mask = self.training or self.alway_mask + if self.feature_grad_mult > 0 and self.training: + features = self.subsample(src_tokens) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.subsample(src_tokens) + features = features.transpose(1, 2) + features = self.feat_layer_norm(features) + if self.feat_proj is not None: + features = self.feat_proj(features) + + if padding_mask is not None: + input_lengths = (1 - padding_mask.long()).sum(-1) + else: + input_lengths = src_lengths + # apply conv formula to get real output_lengths + output_lengths = self._get_feat_extract_output_lengths(input_lengths) + + padding_mask = torch.zeros( + features.shape[:2], dtype=features.dtype, device=features.device + ) + + # these two operations makes sure that all values + # before the output lengths indices are attended to + padding_mask[ + ( + torch.arange(padding_mask.shape[0], device=padding_mask.device), + output_lengths - 1, + ) + ] = 1 + padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool() + + features = self.feat_scale * features if self.feat_scale != 1.0 else features + unmasked_features = features.clone() + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + if mask: + x, mask_indices = self.apply_mask(features, padding_mask) + else: + x = features + mask_indices = None + + def cal_transformer_layers(x, encoder_padding_mask, return_all_hiddens=False): + # x: B x T x C + positions = self.embed_positions(x.transpose(1, 2)).transpose(1, 2) + x = x + positions + if not self.normalize_before: + x = self.layer_norm(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + encoder_states = [] + for layer in self.layers: + x = layer(x, encoder_padding_mask) + if return_all_hiddens: + encoder_states.append(x) + if self.normalize_before: + x = self.layer_norm(x) + return x, encoder_states + + x, encoder_states = cal_transformer_layers(x, padding_mask, return_all_hiddens) + if features_only: + return { + "encoder_out": [x], # [T x B x C] + "encoder_padding_mask": [padding_mask] + if padding_mask is not None + else [], # B x T + "encoder_embedding": [], # + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + "mask_indices": [mask_indices], + } + + x_unmasked = x + if self.mask_prob > 0 or self.mask_channel_prob > 0: + x_unmasked, _ = cal_transformer_layers(unmasked_features, padding_mask) + return { + "encoder_out": [x], # [T x B x C] + "encoder_unmasked_out": [x_unmasked], # [T x B x C] + "encoder_padding_mask": [padding_mask] + if padding_mask is not None + else [], # B x T + "encoder_embedding": [], # + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + "mask_indices": [mask_indices] if mask_indices is not None else [], # B X T + } + + def reorder_encoder_out(self, encoder_out, new_order): + new_encoder_out = ( + [] + if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + + new_encoder_padding_mask = ( + [] + if len(encoder_out["encoder_padding_mask"]) == 0 + else [ + x.index_select(0, new_order) + for x in encoder_out["encoder_padding_mask"] + ] + ) + + new_encoder_embedding = ( + [] + if len(encoder_out["encoder_embedding"]) == 0 + else [ + x.index_select(0, new_order) for x in encoder_out["encoder_embedding"] + ] + ) + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], # B x T + "src_lengths": [], # B x 1 + } + + +class StackedSpeechWavTransformerEncoder(FairseqEncoder): + def __init__(self, speech_enc, text_enc_layers, text_layer_norm): + super().__init__(None) + self.speech_encoder = speech_enc + self.text_encoder_layers = text_enc_layers + self.final_layer_norm = text_layer_norm + + def forward( + self, + src_tokens, + src_lengths=None, + return_all_hiddens=False, + padding_mask=None, + features_only=True, + ): + + out = self.speech_encoder.forward( + src_tokens, + src_lengths, + return_all_hiddens, + padding_mask=padding_mask, + features_only=features_only, + ) + x = out["encoder_out"][0] + encoder_padding_mask = None + if len(out["encoder_padding_mask"]) > 0: + encoder_padding_mask = out["encoder_padding_mask"][0] + + def cal_text_layers(x, padding_mask, return_all_hiddens=False): + encoder_states = [] + for layer in self.text_encoder_layers: + x = layer(x, padding_mask) + if return_all_hiddens: + encoder_states.append(x) + if self.final_layer_norm is not None: + x = self.final_layer_norm(x) + return x, encoder_states + + x, encoder_states = cal_text_layers(x, encoder_padding_mask, return_all_hiddens) + if features_only: + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask] + if encoder_padding_mask is not None + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + x_u = out["encoder_unmasked_out"][0] + x_u, _ = cal_text_layers(x_u, encoder_padding_mask) + + return { + "encoder_out": [x], # [T x B x C] + "encoder_unmasked_out": [x_u], # [T x B x C] + "encoder_padding_mask": [encoder_padding_mask] + if encoder_padding_mask is not None + else [], # B x T + "encoder_embedding": [], # + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + "mask_indices": out["mask_indices"], # B X T + } + + def reorder_encoder_out(self, encoder_out, new_order): + return self.speech_encoder.reorder_encoder_out(encoder_out, new_order) diff --git a/fairseq/fairseq/models/speech_to_text/utils.py b/fairseq/fairseq/models/speech_to_text/utils.py new file mode 100644 index 0000000..3311744 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/utils.py @@ -0,0 +1,562 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + + +import logging +from collections.abc import Iterable +from itertools import repeat +from typing import List, Optional, Tuple + +import torch +from torch import Tensor + +# ------------------------------------------------------------------------------ +# assert_equal() +# ------------------------------------------------------------------------------ + + +def assert_equal(value1, value2, name1=None, name2=None): + """Asserts two values are equal otherwise raise an error.""" + + str_name1 = "" if name1 is None else "{} ".format(name1) + str_name2 = "" if name2 is None else "{} ".format(name2) + if value1 != value2: + str_value1 = "{}" if name1 is None else "({})" + str_value1 = str_value1.format(value1) + str_value2 = "{}" if name2 is None else "({})" + str_value2 = str_value2.format(value2) + raise ValueError( + "Expected {}{} == {}{}".format(str_name1, str_value1, str_name2, str_value2) + ) + + +def fill_config(config, key, value): + if value is not None: + if key not in config or config[key] is None: + config[key] = value + assert_equal(value, config[key], "value", f'config["{key}"]') + + +# ------------------------------------------------------------------------------ +# check_and_return_expected() +# ------------------------------------------------------------------------------ + + +def check_and_return_expected(value, undefined_value, expected_value, name=None): + """ + Return the expected value while checking if the given value is undefined or + equal to the expected value. + """ + if (undefined_value is None and value is None) or (undefined_value == value): + return expected_value + if value != expected_value: + str_name = "" if name is None else "{} ".format(name) + str_value = "{}" if name is None else "({})" + str_value = str_value.format(value) + raise ValueError( + "Expected {}{} == {}".format(str_name, str_value, expected_value) + ) + return expected_value + + +# ------------------------------------------------------------------------------ +# get_time_axis() +# ------------------------------------------------------------------------------ + + +def get_time_axis(layout): + """ + Extract the time axis from the layout, for example for breaking sequence into + segments. + """ + if layout in ["TB", "TBD"]: + return 0 + if layout in ["BT", "BTD"]: + return 1 + if layout in ["BCTD"]: + return 2 + raise ValueError("Unsupported layout = {}".format(layout)) + + +# ------------------------------------------------------------------------------ +# get_batch_axis() +# ------------------------------------------------------------------------------ + + +def get_batch_axis(layout): + """ + Extract the batch axis from the layout + """ + if layout in ["TB", "TBD"]: + return 1 + if layout in ["BT", "BTD", "BCTD"]: + return 0 + raise ValueError("Unsupported layout = {}".format(layout)) + + +# ------------------------------------------------------------------------------ +# monotonically_increasing_and_bounded() +# ------------------------------------------------------------------------------ + + +def monotonically_increasing_and_bounded(iterable, min=None, max=None): + """ + Check if the elements in the given iterable are monotonically increasing and + bounded by upper/lower bounds. + """ + if not isinstance(iterable, Iterable): + raise TypeError( + "Expected iterable to be of type Iterable, got ({})".format( + iterable.__class__.__name__ + ) + ) + for i in range(len(iterable)): + if min is not None and iterable[i] < min: + return False + if max is not None and iterable[i] > max: + return False + if i > 0 and iterable[i] <= iterable[i - 1]: + return False + return True + + +# ------------------------------------------------------------------------------ +# to_pair() +# ------------------------------------------------------------------------------ + + +def to_pair(value, name): + """Make a pair (of type tuple) of given value.""" + if isinstance(value, Iterable): + if len(value) != 2: + raise ValueError( + "Expected `{}` to have exactly 2 elements, got: ({})".format( + name, value + ) + ) + return value + return tuple(repeat(value, 2)) + + +# ------------------------------------------------------------------------------ +# infer_conv_output_attrs() +# ------------------------------------------------------------------------------ + + +# TODO(cfyeh): figure out if we can get `output_dim` without calling the module. +def infer_conv_output_attrs( + module, input_channels, input_dim, batch_size=1, max_length=8 +): + """Get output attributes of a module with input.""" + input = torch.randn(batch_size, input_channels, max_length, input_dim) + output = module(input) + output_channels = output.shape[1] + output_dim = output.shape[-1] + return output_channels, output_dim + + +# ------------------------------------------------------------------------------ +# NoOp +# ------------------------------------------------------------------------------ + + +class NoOp(torch.nn.Module): + """ + NoOp simply passes the input as the output. + """ + + def __init__(self): + super().__init__() + + def forward(self, input: Tensor) -> Tensor: + return input + + +# ------------------------------------------------------------------------------ +# Permute: a torch.nn.Module applies permutation on the input tensor. +# ------------------------------------------------------------------------------ + + +class Permute(torch.nn.Module): + def __init__(self, dims): + super().__init__() + self.dims = dims + + def forward(self, input: Tensor) -> Tensor: + return input.permute(self.dims).contiguous() + + +# ------------------------------------------------------------------------------ +# lengths_to_padding_mask() +# ------------------------------------------------------------------------------ + + +def lengths_to_padding_mask(lengths: Tensor) -> Tensor: + """Convert lengths of shape (B, ) to padding mask.""" + batch_size = lengths.shape[0] + max_length = int(torch.max(lengths).item()) + padding_mask = torch.arange( # [0, ..., T-1] + max_length, device=lengths.device, dtype=lengths.dtype + ).expand(batch_size, max_length) >= lengths.unsqueeze(1) + + return padding_mask + + +# ------------------------------------------------------------------------------ +# lengths_to_attention_mask() +# ------------------------------------------------------------------------------ + + +def lengths_to_attention_mask( + lengths: Tensor, + left_context: Optional[int] = None, + right_context: Optional[int] = None, +) -> Optional[Tensor]: + """ + Generate attention mask based on (lengths, left_context, right_context). + left_context is None means unlimited left context. + right_context is None means unlimited right context. + """ + + if left_context is None and right_context is None: + return None + + max_length = int(torch.max(lengths).item()) + + # For example, with `max_length` == 5, + # indices = tensor([ + # [ 0, 1, 2, 3, 4, 5], + # [-1, 0, 1, 2, 3, 4], + # [-2, -1, 0, 1, 2, 3], + # [-3, -2, -1, 0, 1, 2], + # [-4, -3, -2, -1, 0, 1], + # [-5, -4, -3, -2, -1, 0], + # ]) + + # In some cases the second torch.arange is created on cpu which causes a + # failure. Adding the device option to guard against it. + indices = torch.arange( + max_length, device=lengths.device, dtype=lengths.dtype + ).expand(max_length, max_length) - torch.arange( + max_length, device=lengths.device + ).view( + max_length, -1 + ) + + # For example, with `max_length` == 5, + # bool_mask = tensor([ + # [True, True, True, True, True], + # [True, True, True, True, True], + # [True, True, True, True, True], + # [True, True, True, True, True], + # [True, True, True, True, True], + # ]) + bool_mask = ( + torch.tensor([True]).to(device=lengths.device).expand(max_length, max_length) + ) + + # For example, with `max_length` == 5, left_context == 2 + # left_mask = tensor([ + # [ True, True, True, True, True], + # [ True, True, True, True, True], + # [ True, True, True, True, True], + # [False, True, True, True, True], + # [False, False, True, True, True], + # ]) + if left_context is not None: + left_mask = indices >= -left_context + bool_mask = bool_mask & left_mask + + # For example, with `max_length` == 5, right_context == 1 + # right_mask = tensor([ + # [True, True, False, False, False], + # [True, True, True, False, False], + # [True, True, True, True, False], + # [True, True, True, True, True], + # [True, True, True, True, True], + # ]) + if right_context is not None: + right_mask = indices <= right_context + bool_mask = bool_mask & right_mask + + bool_mask = (~bool_mask).to(device=lengths.device) + return bool_mask + + +# ------------------------------------------------------------------------------ +# infer_output_norm() +# ------------------------------------------------------------------------------ + + +def infer_output_norm(module, output_norm=None): + """ + Infer the output norm (string and module) needed on the module gvien desired + output normalization. + """ + if output_norm == module.output_norm(): + # output_norm already matches module.output_norm(). + return (None, NoOp()) + + if output_norm is None and module.output_norm() is not None: + logger = logging.getLogger("infer_output_norm()") + logger.warning( + "trying to set output_norm ({}) ".format(output_norm) + + "but got module.output_norm() ({}), ".format(module.output_norm()) + + "the combined output_norm() will be ({})".format(module.output_norm()) + ) + return (None, NoOp()) + + if output_norm == "log_softmax": + if module.output_norm() is not None: + raise ValueError( + "incompatible output_norm ({}) ".format(output_norm) + + "and module.output_norm() ({})".format(module.output_norm()) + ) + else: + return ("log_softmax", torch.nn.LogSoftmax(dim=-1)) + + if output_norm == "softmax": + if module.output_norm() is not None: + raise ValueError( + "incompatible output_norm ({}) ".format(output_norm) + + "and module.output_norm() ({})".format(module.output_norm()) + ) + else: + return ("softmax", torch.nn.Softmax(dim=-1)) + + raise ValueError( + "output_norm ({}) not in ".format(output_norm) + + "supported list = [None, softmax, log_softmax]" + ) + + +# ------------------------------------------------------------------------------ +# infer_channels_from_layout() +# ------------------------------------------------------------------------------ + + +def infer_channels_from_layout(layout, channels): + """Extract the number of channels from the layout.""" + if layout in ("TBD", "BTD"): + if channels is not None and channels != 1: + raise ValueError( + "Expected channels ({}) to be 1 for layout = {}".format( + channels, layout + ) + ) + if channels is None: + return 1 + return channels + + +# ------------------------------------------------------------------------------ +# pad_sequence() +# ------------------------------------------------------------------------------ + + +@torch.jit.export +def pad_sequence( + sequence: Tensor, + time_axis: int, + extra_left_context: int = 0, + extra_right_context: int = 0, +) -> Tensor: + """Pad extra left/right contexts to the sequence.""" + + if extra_left_context == 0 and extra_right_context == 0: + return sequence + + tensors_to_concat = [] + + if extra_left_context: + size = (extra_left_context,) + fill_value = 0 + indices = torch.full( + size=size, + fill_value=fill_value, + dtype=torch.long, + device=sequence.device, + ) + left_padding = torch.index_select(sequence, time_axis, indices) + tensors_to_concat.append(left_padding) + + tensors_to_concat.append(sequence) + + # NOTE(cfyeh): for efficiency reason we pad 0 instead of the last frame for + # extra right contexts. + if extra_right_context: + size = list(sequence.shape) + size[time_axis] = extra_right_context + right_padding = torch.zeros(size, dtype=sequence.dtype, device=sequence.device) + tensors_to_concat.append(right_padding) + + padded_sequence = torch.cat(tensors_to_concat, dim=time_axis) + return padded_sequence + + +# ------------------------------------------------------------------------------ +# sequence_to_segments() +# ------------------------------------------------------------------------------ + + +@torch.jit.export +def sequence_to_segments( + sequence: Tensor, + time_axis: int, + lengths: Tensor, + segment_size: Optional[int] = None, + extra_left_context: int = 0, + extra_right_context: int = 0, +) -> List[Tuple[Tensor, Tensor]]: + """Breaks sequence into segments.""" + + sequence = pad_sequence( + sequence=sequence, + time_axis=time_axis, + extra_left_context=extra_left_context, + extra_right_context=extra_right_context, + ) + + lengths = lengths + extra_left_context + extra_right_context + + segments: List[Tuple[Tensor, Tensor]] = [] + + if segment_size is None: + segments.append((sequence, lengths)) + return segments + + offset = 0 + end = sequence.shape[time_axis] + step = segment_size + size = extra_left_context + segment_size + extra_right_context + + while offset + extra_left_context + extra_right_context < end: + clamped_size = min(size, end - offset) + segment_lengths = torch.clamp(lengths - offset, min=0, max=clamped_size) + indices = torch.arange( + start=offset, + end=(offset + clamped_size), + step=1, + dtype=torch.long, + device=sequence.device, + ) + segment_tensor = torch.index_select(sequence, time_axis, indices) + segments.append((segment_tensor, segment_lengths)) + offset = offset + step + + return segments + + +# ------------------------------------------------------------------------------ +# segments_to_sequence() +# ------------------------------------------------------------------------------ + + +@torch.jit.export +def segments_to_sequence( + segments: List[Tuple[Tensor, Tensor]], time_axis: int +) -> Tuple[Tensor, Tensor]: + """Concatenate segments into a full sequence.""" + if len(segments) == 1: + return segments[0] + + tensors_to_concat: List[Tensor] = [] + lengths_to_stack: List[Tensor] = [] + + for tensor, lengths in segments: + tensors_to_concat.append(tensor) + lengths_to_stack.append(lengths) + + sequence = torch.cat(tensors_to_concat, dim=time_axis) + lengths = torch.stack(lengths_to_stack, dim=0) + lengths = torch.sum(lengths, dim=0) + + return sequence, lengths + + +def lengths_to_encoder_padding_mask(lengths, batch_first: bool = False): + """ + convert lengths (a 1-D Long/Int tensor) to 2-D binary tensor + + Args: + lengths: a (B, )-shaped tensor + batch_first: whether to return a (B, T) tensor + + Return: + max_length: maximum length of B sequences + encoder_padding_mask: a (max_length, B) binary mask, where + [t, b] = False for t < lengths[b] and True otherwise + + TODO: + kernelize this function if benchmarking shows this function is slow + """ + max_lengths = torch.max(lengths).item() + bsz = lengths.size(0) + encoder_padding_mask = torch.arange( + max_lengths + ).to( # a (T, ) tensor with [0, ..., T-1] + lengths.device + ).view( # move to the right device + 1, max_lengths + ).expand( # reshape to (1, T)-shaped tensor + bsz, -1 + ) > lengths.view( # expand to (B, T)-shaped tensor + bsz, 1 + ).expand( + -1, max_lengths + ) + if not batch_first: + return encoder_padding_mask.t(), max_lengths + else: + return encoder_padding_mask, max_lengths + + +# ------------------------------------------------------------------------------ +# attention suppression +# ------------------------------------------------------------------------------ + + +def attention_suppression(attention_weights: Tensor, scale: float): + # B, H, qlen, klen -> B, H, qlen, 1 + attention_prob = torch.nn.functional.softmax(attention_weights.float(), dim=-1) + attention_nozeros = attention_prob.to(torch.bool) + nozeros_sum = torch.sum(attention_nozeros.to(torch.float), dim=-1, keepdim=True) + + # For very sparse situation, we need get round about 0s + key_sum = torch.sum(attention_prob, dim=-1, keepdim=True) + + # nozeros_sum should > 1 + key_mean = key_sum / (nozeros_sum + 1e-8) + + # std calculation + dis = (attention_prob - key_mean) * (attention_prob - key_mean) + + # if attention_prob[i] < threshold, then dis_masked[i] = 0; for all i + dis_masked = torch.where( + attention_nozeros, dis, attention_prob.new_zeros(attention_prob.size()) + ) + + key_var = torch.sum(dis_masked, dim=-1, keepdim=True) + key_var = key_var / (nozeros_sum - 1.0 + 1e-8) + key_std = torch.sqrt(key_var) + key_thread = key_mean - scale * key_std + + # if attention_prob[i] >= key_thread, then attention_prob[i] + # , otherwise "-inf" + inf_tensor = attention_prob.new_zeros(attention_prob.size()).detach() + inf_tensor[:] = float("-inf") + attention_weights_float = torch.where( + attention_prob < key_thread, + inf_tensor, + attention_weights.float(), + ) + + return attention_weights_float.type_as(attention_weights) + + +def layer_norm_backward_hook(module, grad_input, grad_output, clamp_value): + return tuple(torch.clamp(v, min=-clamp_value, max=clamp_value) for v in grad_input) diff --git a/fairseq/fairseq/models/speech_to_text/xm_transformer.py b/fairseq/fairseq/models/speech_to_text/xm_transformer.py new file mode 100644 index 0000000..7b4b234 --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/xm_transformer.py @@ -0,0 +1,855 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import checkpoint_utils, utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqEncoderModel, + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_speech.modules.ctc_decoder import CTCDecoder +from fairseq.models.speech_to_text.hub_interface import S2THubInterface +from fairseq.models.transformer import ( + Embedding, + TransformerDecoder, + TransformerModelBase, +) +from fairseq.models.wav2vec import Wav2VecEncoder +from fairseq.modules.layer_norm import LayerNorm + +logger = logging.getLogger(__name__) + + +def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + +class Conv1dAdaptor(nn.Module): + def __init__( + self, + in_dim, + out_dim, + n_layers=3, + kernel_size=3, + stride=2, + layerdrop=0.0, + layernorm=False, + proj=False, + ): + super().__init__() + self.proj, self.proj_ln = None, None + self.post_proj, self.post_proj_ln = None, None + if proj: + self.proj = nn.Sequential( + nn.Linear(in_dim, in_dim * 4), nn.ReLU(), nn.Linear(in_dim * 4, in_dim) + ) + self.proj_ln = LayerNorm(in_dim) + self.post_proj = nn.Sequential( + nn.Linear(out_dim, out_dim * 4), + nn.ReLU(), + nn.Linear(out_dim * 4, out_dim), + ) + self.post_proj_ln = LayerNorm(out_dim) + + self.layers = nn.ModuleList( + nn.Conv1d( + in_dim if i == 0 else out_dim, + out_dim * 2, + kernel_size, + stride=stride, + padding=kernel_size // 2, + ) + for i in range(n_layers) + ) + self.stride = stride + self.layerdrop = layerdrop + self.layernorm = LayerNorm(in_dim) if layernorm else None + + @classmethod + def add_args(cls, parser): + parser.add_argument("--adaptor-n-layers", type=int) + parser.add_argument("--adaptor-kernel-size", type=int) + parser.add_argument("--adaptor-stride", type=int) + parser.add_argument("--adaptor-layerdrop", type=float) + parser.add_argument("--adaptor-layernorm", action="store_true") + parser.add_argument("--adaptor-proj", action="store_true") + + def forward(self, x, padding_mask: Optional[torch.Tensor]): + if self.layernorm is not None: + x = self.layernorm(x) + + if self.proj is not None: + x = x + 0.5 * self.proj(x) + x = self.proj_ln(x) + + if padding_mask is not None: + x = utils.index_put(x, padding_mask.T, 0) + + # T x B x C -> B x C x T + x = x.transpose(0, 1).transpose(1, 2) + out_lens = None + if padding_mask is not None: + out_lens = (~padding_mask).sum(1).float() + + for layer in self.layers: + layerdrop_prob = np.random.random() + if not self.training or (layerdrop_prob > self.layerdrop): + x = nn.functional.glu(layer(x), dim=1) + if padding_mask is not None: + out_lens = ((out_lens - 1) / self.stride + 1).floor() + # B x C x T -> T x B x C + x = x.transpose(1, 2).transpose(0, 1) + + if self.post_proj is not None: + x = x + 0.5 * self.post_proj(x) + x = self.post_proj_ln(x) + + out_padding_mask = None + if padding_mask is not None: + out_padding_mask = lengths_to_padding_mask(out_lens.long()) + x = utils.index_put(x, out_padding_mask.T, 0) + return x, out_padding_mask + + +def add_wav2vec_asr_args(parser): + parser.add_argument("--w2v-path", help="path to wav2vec 2.0 model") + parser.add_argument( + "--no-pretrained-weights", + action="store_true", + help="if true, does not load pretrained weights", + ) + parser.add_argument( + "--dropout-input", + type=float, + metavar="D", + help="dropout to apply to the input (after feat extr)", + ) + parser.add_argument( + "--final-dropout", + type=float, + metavar="D", + help="dropout after transformer and before final projection", + ) + parser.add_argument( + "--apply-mask", action="store_true", help="apply masking during fine-tuning" + ) + parser.add_argument( + "--dropout", + type=float, + metavar="D", + help="dropout probability inside wav2vec 2.0 model", + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights inside wav2vec 2.0 model", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN inside wav2vec 2.0 model", + ) + parser.add_argument( + "--mask-length", type=int, help="repeat the mask indices multiple times" + ) + parser.add_argument( + "--mask-prob", type=float, help="probability of replacing a token with mask" + ) + parser.add_argument( + "--mask-selection", + type=str, + choices=["static", "uniform", "normal", "poisson"], + help="how to choose masks", + ) + parser.add_argument( + "--mask-other", + type=float, + help="stdev of the mask length in case of 'normal' selection strategy", + ) + parser.add_argument( + "--no-mask-overlap", + action="store_true", + help="whether to allow masks to overlap", + ) + parser.add_argument( + "--mask-channel-length", type=int, help="repeat the mask indices multiple times" + ) + parser.add_argument( + "--mask-channel-prob", + type=float, + help="probability of replacing a token with mask", + ) + parser.add_argument( + "--mask-channel-selection", + type=str, + choices=["static", "uniform", "normal", "poisson"], + help="how to choose masks", + ) + parser.add_argument( + "--mask-channel-other", + type=float, + help="stdev of the mask length in case of 'normal' selection strategy", + ) + parser.add_argument( + "--no-mask-channel-overlap", + action="store_true", + help="whether to allow masks to overlap", + ) + parser.add_argument( + "--freeze-finetune-updates", + type=int, + metavar="N", + help="dont finetune wav2vec for this many updates", + ) + parser.add_argument( + "--feature-grad-mult", + type=float, + metavar="D", + help="reset feature grad mult in wav2vec 2.0 to this", + ) + parser.add_argument( + "--layerdrop", + type=float, + metavar="D", + help="probability of dropping a layer in wav2vec 2.0", + ) + parser.add_argument( + "--max-positions", + type=int, + metavar="N", + help="Max input positions to be used in the conformer encoder in wav2vec 2.0", + ) + parser.add_argument("--encoder-proj", action="store_true") + parser.add_argument("--w2v-args", default=None) + parser.add_argument( + "--remove-weight-norm", + action="store_true", + help="if set, then the weight-norm (in one pos_conv layer) is removed from the model", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension to be used when w2v_path is None and no encoder_proj is set", + ) + + +def need_finetuning(ft_params, param_name): + if ft_params == "all": + return True + ft_params_list = ft_params.split(",") + for ft_param in ft_params_list: + if ft_param in param_name: + return True + return False + + +class Wav2VecEncoderWithAdaptor(FairseqEncoder): + def build_adaptor(self, args): + adaptor = None + if args.adaptor_n_layers > 0: + adaptor = Conv1dAdaptor( + args.decoder_embed_dim, + args.decoder_embed_dim, + n_layers=args.adaptor_n_layers, + kernel_size=args.adaptor_kernel_size, + stride=args.adaptor_stride, + layerdrop=args.adaptor_layerdrop, + layernorm=args.adaptor_layernorm, + proj=args.adaptor_proj, + ) + return adaptor + + def __init__(self, args): + super().__init__(None) + self.w2v_encoder = Wav2VecEncoder(args) + self.is_v0_arch = not args.adaptor_proj + self.w2v_proj_ln = None + if not self.is_v0_arch and self.w2v_encoder.proj is not None: + self.w2v_proj_ln = LayerNorm(args.decoder_embed_dim) + self.adaptor = self.build_adaptor(args) + + self.num_updates = 0 + self.freezing_updates = args.w2v_freezing_updates + self.finetuning_params = args.finetune_w2v_params + for k, p in self.w2v_encoder.w2v_model.named_parameters(): + p.requires_grad = need_finetuning(self.finetuning_params, k) + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + add_wav2vec_asr_args(parser) + parser.add_argument( + "--normalize", + action="store_true", + help="if set, normalizes input to have 0 mean and unit variance", + ) + parser.add_argument( + "--finetune-w2v-params", + type=str, + metavar="STR", + help="comma-separated param strings to finetune.", + ) + parser.add_argument("--w2v-freezing-updates", type=int) + parser.add_argument("--load-pretrained-encoder-from", type=str, metavar="STR") + Conv1dAdaptor.add_args(parser) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens, src_lengths=None, **kwargs): + if ( + self.freezing_updates is not None + and self.num_updates > self.freezing_updates + ): + for p in self.w2v_encoder.w2v_model.parameters(): + p.requires_grad = True + + padding_mask = lengths_to_padding_mask(src_lengths) + out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True) + x, padding_mask = out["encoder_out"], out["padding_mask"] + if self.w2v_proj_ln is not None: + x = self.w2v_proj_ln(x) + + if self.adaptor is not None: + x, padding_mask = self.adaptor(x, padding_mask) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [] + if padding_mask is None + else [padding_mask], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": [], # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + def reorder_encoder_out(self, encoder_out, new_order): + new_encoder_out = ( + [] + if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + + new_encoder_padding_mask = ( + [] + if len(encoder_out["encoder_padding_mask"]) == 0 + else [ + x.index_select(0, new_order) + for x in encoder_out["encoder_padding_mask"] + ] + ) + + new_encoder_embedding = ( + [] + if len(encoder_out["encoder_embedding"]) == 0 + else [ + x.index_select(0, new_order) for x in encoder_out["encoder_embedding"] + ] + ) + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], # B x T + "src_lengths": [], # B x 1 + } + + +def add_decoder_args(parser): + parser.add_argument( + "--activation-fn", + type=str, + default="relu", + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--decoder-dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--decoder-attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--decoder-activation-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--decoder-embed-dim", type=int, metavar="N", help="decoder embedding dimension" + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--layernorm-embedding", action="store_true", help="add layernorm to embedding" + ) + parser.add_argument( + "--decoder-layerdrop", + type=float, + metavar="D", + help="layerdrop probability for decoder", + ) + parser.add_argument( + "--decoder-learned-pos", + action="store_true", + help="learn positional embedding in decoder", + ) + parser.add_argument( + "--share-decoder-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--no-scale-embedding", + action="store_true", + help="if True, dont scale embeddings", + ) + parser.add_argument( + "--load-pretrained-decoder-from", + type=str, + metavar="STR", + help="model to take decoder weights from (for initialization)", + ) + parser.add_argument( + "--finetune-decoder-params", + type=str, + metavar="STR", + help="comma-separated param strings to finetune.", + ) + + +def remove_weight_norm_from_model(model): + from functools import reduce + + layers_with_wn = [] + for param_name, _ in model.named_parameters(): + if param_name.endswith("_g"): + # retrieve the module with this param_name + module_names = param_name.split(".")[ + :-1 + ] # exclude the actual parameter name + wn_module = reduce(getattr, module_names, model) + layers_with_wn.append(wn_module) + for wn_module in layers_with_wn: + torch.nn.utils.remove_weight_norm(wn_module) + logger.warning(f"Weight norm removed from module with {wn_module}\n") + + +@register_model("xm_transformer") +class XMTransformerModel(FairseqEncoderDecoderModel): + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/s2t" + model_ids = [ + "xm_transformer_600m-es_en-multi_domain", + "xm_transformer_600m-ru_en-multi_domain", + "xm_transformer_600m-fr_en-multi_domain", + "xm_transformer_600m-en_es-multi_domain", + "xm_transformer_600m-en_ru-multi_domain", + "xm_transformer_600m-en_fr-multi_domain", + "xm_transformer_600m-en_zh-multi_domain", + "xm_transformer_600m-en_ar-multi_domain", + "xm_transformer_600m-en_tr-multi_domain", + "xm_transformer_600m-en_vi-multi_domain", + "xm_transformer-21_en-xls_r_300m", + "xm_transformer-en_15-xls_r_300m", + "xm_transformer-21_en-xls_r_1b", + "xm_transformer-en_15-xls_r_1b", + "xm_transformer-21_en-xls_r_2b", + "xm_transformer-en_15-xls_r_2b", + "xm_transformer-22_16-xls_r_2b", + "xm_transformer_s2ut_800m-es-en-st-asr-bt_h1_2022", + "xm_transformer_s2ut_800m-en-es-st_plus_asr", + "xm_transformer_s2ut_800m-hk-en-h1_2022", + "xm_transformer_s2ut_800m-en-hk-h1_2022", + ] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + config_yaml="config.yaml", + task="speech_to_text", + generation_args=None, + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + config_yaml=config_yaml, + task=task, + generation_args=generation_args, + **kwargs, + ) + return S2THubInterface(x["args"], x["task"], x["models"][0]) + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + Wav2VecEncoderWithAdaptor.add_args(parser) + add_decoder_args(parser) + parser.add_argument("--checkpoint-activations", action="store_true") + parser.add_argument("--offload-activations", action="store_true") + parser.add_argument("--min-params-to-wrap", type=int, metavar="N") + + @classmethod + def maybe_load_pretrained(cls, component, checkpoint: Optional[str] = None): + if checkpoint is None: + return component + + _load = checkpoint_utils.load_pretrained_component_from_model + try: + return _load(component, checkpoint) + except RuntimeError as e: + logger.warning(e) + return _load(component, checkpoint, strict=False) + + @classmethod + def build_encoder(cls, args): + _args = copy.deepcopy(args) + if not args.adaptor_proj and not args.encoder_proj: # V0 arch + if args.w2v_path: + state = checkpoint_utils.load_checkpoint_to_cpu(args.w2v_path) + if state.get("cfg") is not None: + encoder_embed_dim = state["cfg"]._content["model"][ + "encoder_embed_dim" + ] + elif state.get("args") is not None: + encoder_embed_dim = state["args"].encoder_embed_dim + else: + raise ValueError(f"Invalid config in {args.w2v_path}") + _args.decoder_embed_dim = encoder_embed_dim + del state + else: + _args.decoder_embed_dim = args.encoder_embed_dim + + encoder = Wav2VecEncoderWithAdaptor(_args) + encoder = cls.maybe_load_pretrained( + encoder, getattr(args, "load_pretrained_encoder_from", None) + ) + if args.remove_weight_norm: + # remove the wn for EMA usage + logger.warning("Removing weight norm from wav2vec encoder") + remove_weight_norm_from_model(encoder) + + return encoder + + @classmethod + def get_decoder_args_from_checkpoint(cls, ckpt_args): + assert "model" in ckpt_args, "Model args not found in checkpoint cfg!" + decoder_args = {} + for k, v in ckpt_args["model"].__dict__.items(): + if "decoder" in k: + decoder_args[k] = v + + return decoder_args + + @classmethod + def override_decoder_args(cls, cli_args, decoder_args_dict): + for k, v in decoder_args_dict.items(): + if v != getattr(cli_args, k, None): + logger.warning( + f"Overriding decoder arg {k}: from {getattr(cli_args, k, None)} to {v}" + ) + setattr(cli_args, k, v) + + return cli_args + + @classmethod + def build_decoder(cls, args, task, embed_tokens): + _args = copy.deepcopy(args) + if args.adaptor_proj or args.encoder_proj: # not V0 arch + _args.encoder_embed_dim = _args.decoder_embed_dim + _args.dropout = args.decoder_dropout + _args.attention_dropout = args.decoder_attention_dropout + _args.activation_dropout = args.decoder_activation_dropout + _args.layerdrop = _args.decoder_layerdrop + + decoder = TransformerDecoder(_args, task.target_dictionary, embed_tokens) + decoder = cls.maybe_load_pretrained( + decoder, getattr(args, "load_pretrained_decoder_from", None) + ) + + for k, p in decoder.named_parameters(): + p.requires_grad = need_finetuning(args.finetune_decoder_params, k) + return decoder + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + if getattr(args, "load_pretrained_decoder_from", None) is not None: + ckpt = torch.load(getattr(args, "load_pretrained_decoder_from", None)) + decoder_args_dict = cls.get_decoder_args_from_checkpoint(ckpt["cfg"]) + args = cls.override_decoder_args(args, decoder_args_dict) + + decoder_embed_tokens = build_embedding( + task.target_dictionary, args.decoder_embed_dim + ) + + encoder = cls.build_encoder(args) + decoder = cls.build_decoder(args, task, decoder_embed_tokens) + base_model = cls(encoder, decoder) + + # set up multitask decoders + base_model.multitask_decoders = {} + for i, (task_name, task_obj) in enumerate(task.multitask_tasks.items()): + # dummy auxiliary decoder + if task_obj.args.get_loss_weight(0) == 0: + continue + + task_decoder = cls.build_multitask_decoder( + args, task_obj.args, task_obj.target_dictionary, args.decoder_embed_dim + ) + + setattr(base_model, f"{task_name}_decoder", task_decoder) + decoder_model_cls = ( + FairseqEncoderModel + if task_obj.args.decoder_type == "ctc" + else FairseqLanguageModel + ) + base_model.multitask_decoders[task_name] = decoder_model_cls( + getattr(base_model, f"{task_name}_decoder") + ) + return base_model + + @classmethod + def build_multitask_decoder( + cls, + args, + mtl_args, + tgt_dict, + in_dim, + is_first_pass_decoder=False, + ): + decoder_args = mtl_args.decoder_args + decoder_args.encoder_embed_dim = in_dim + if mtl_args.decoder_type == "transformer": + if is_first_pass_decoder: + task_decoder = cls.build_text_decoder(args, tgt_dict) + else: + from fairseq.models.speech_to_speech import ( + base_multitask_text_transformer_decoder_arch, + ) + + base_multitask_text_transformer_decoder_arch(decoder_args) # 2L + task_decoder = TransformerDecoder( + decoder_args, + tgt_dict, + embed_tokens=TransformerModelBase.build_embedding( + decoder_args, + tgt_dict, + decoder_args.decoder_embed_dim, + ), + ) + elif mtl_args.decoder_type == "ctc": + task_decoder = CTCDecoder( + dictionary=tgt_dict, + in_dim=in_dim, + ) + else: + raise NotImplementedError( + "currently only support multitask decoder_type 'transformer', 'ctc'" + ) + + return task_decoder + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + return self.get_normalized_probs_scriptable(net_output, log_probs, sample) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + return_all_hiddens=False, + **kwargs, + ): + """ + The forward method inherited from the base class has a **kwargs + argument in its input, which is not supported in torchscript. This + method overwrites the forward method definition without **kwargs. + """ + encoder_out = self.encoder( + src_tokens=src_tokens, src_lengths=src_lengths, **kwargs + ) + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_out"] + # NOTE: from the top layer + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + return decoder_out + + def upgrade_state_dict(self, state_dict): + for k, _ in state_dict.items(): + if "adaptor.layers" in state_dict: + new = k.replace("adaptor.layers", "adaptor_layers") + state_dict[new] = state_dict[k] + del state_dict[k] + + +def set_default_w2v_encoder_args(args): + args.no_pretrained_weights = getattr(args, "no_pretrained_weights", False) + args.dropout_input = getattr(args, "dropout_input", 0) + args.final_dropout = getattr(args, "final_dropout", 0) + args.apply_mask = getattr(args, "apply_mask", False) + args.dropout = getattr(args, "dropout", 0) + args.attention_dropout = getattr(args, "attention_dropout", 0) + args.activation_dropout = getattr(args, "activation_dropout", 0) + args.encoder_proj = getattr(args, "encoder_proj", False) + args.remove_weight_norm = getattr(args, "remove_weight_norm", False) + + args.mask_length = getattr(args, "mask_length", 10) + args.mask_prob = getattr(args, "mask_prob", 0.5) + args.mask_selection = getattr(args, "mask_selection", "static") + args.mask_other = getattr(args, "mask_other", 0) + args.no_mask_overlap = getattr(args, "no_mask_overlap", False) + args.mask_channel_length = getattr(args, "mask_channel_length", 10) + args.mask_channel_prob = getattr(args, "mask_channel_prob", 0.5) + args.mask_channel_before = getattr(args, "mask_channel_before", False) + args.mask_channel_selection = getattr(args, "mask_channel_selection", "static") + args.mask_channel_other = getattr(args, "mask_channel_other", 0) + args.no_mask_channel_overlap = getattr(args, "no_mask_channel_overlap", False) + + args.freeze_finetune_updates = getattr(args, "freeze_finetune_updates", 0) + args.feature_grad_mult = 0.1 + args.layerdrop = getattr(args, "layerdrop", 0.0) + + args.normalize = getattr(args, "normalize", False) + args.finetune_w2v_params = getattr(args, "finetune_w2v_params", "all") + args.w2v_freezing_updates = getattr(args, "w2v_freezing_updates", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + + +def set_default_adaptor_args(args): + args.adaptor_n_layers = getattr(args, "adaptor_n_layers", 3) + args.adaptor_kernel_size = getattr(args, "adaptor_kernel_size", 3) + args.adaptor_stride = getattr(args, "adaptor_stride", 2) + args.adaptor_layerdrop = getattr(args, "adaptor_layerdrop", 0.0) + args.adaptor_layernorm = getattr(args, "adaptor_layernorm", False) + args.adaptor_proj = getattr(args, "adaptor_proj", False) + + +def set_default_transformer_decoder_args(args): + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4 * 1024) + args.decoder_layers = getattr(args, "decoder_layers", 12) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_attention_dropout = getattr(args, "decoder_attention_dropout", 0.0) + args.decoder_activation_dropout = getattr(args, "decoder_activation_dropout", 0.0) + args.decoder_dropout = getattr(args, "decoder_dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh") + args.pooler_dropout = getattr(args, "pooler_dropout", 0.0) + + args.finetune_decoder_params = getattr(args, "finetune_decoder_params", "all") + + +def set_default_general_args(args): + args.checkpoint_activations = getattr(args, "checkpoint_activations", False) + args.offload_activations = getattr(args, "offload_activations", False) + args.min_params_to_wrap = getattr(args, "min_params_to_wrap", int(1e8)) + args.max_positions = getattr(args, "max_positions", 3000) + + +@register_model_architecture(model_name="xm_transformer", arch_name="xm_transformer") +def base_architecture(args): + set_default_general_args(args) + set_default_w2v_encoder_args(args) + set_default_adaptor_args(args) + set_default_transformer_decoder_args(args) diff --git a/fairseq/fairseq/models/speech_to_text/xm_transformer_unity.py b/fairseq/fairseq/models/speech_to_text/xm_transformer_unity.py new file mode 100644 index 0000000..f77ef4e --- /dev/null +++ b/fairseq/fairseq/models/speech_to_text/xm_transformer_unity.py @@ -0,0 +1,315 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging + +from fairseq.models import ( + FairseqEncoderModel, + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.speech_to_speech.modules.ctc_decoder import CTCDecoder +from fairseq.models.speech_to_speech.modules.transformer_encoder import ( + TransformerEncoderNoEmb, +) +from fairseq.models.speech_to_text.xm_transformer import XMTransformerModel +from fairseq.models.speech_to_text.xm_transformer import ( + base_architecture as xm_t_base_architecture, +) +from fairseq.models.speech_to_text.xm_transformer import ( + build_embedding, + need_finetuning, + set_default_adaptor_args, + set_default_general_args, + set_default_transformer_decoder_args, + set_default_w2v_encoder_args, +) +from fairseq.models.transformer import Linear, TransformerDecoder, TransformerModelBase +from fairseq.models.transformer.transformer_decoder_aug import AugTransformerDecoder + +logger = logging.getLogger(__name__) + + +def unit_transformer_decoder_arch_base( + args, decoder_layers=6, decoder_embed_dim=768, decoder_attention_heads=12 +): + args.encoder_layers = decoder_layers + args.decoder_layers = decoder_layers + args.decoder_embed_dim = decoder_embed_dim + args.decoder_ffn_embed_dim = decoder_embed_dim * 4 + args.decoder_attention_heads = decoder_attention_heads + args.encoder_embed_dim = args.decoder_embed_dim + args.decoder_output_dim = decoder_embed_dim + args.decoder_input_dim = decoder_embed_dim + + +def unit_transformer_decoder_arch_large( + args, decoder_layers=12, decoder_embed_dim=1024, decoder_attention_heads=16 +): + args.encoder_layers = decoder_layers + args.decoder_layers = decoder_layers + args.decoder_embed_dim = decoder_embed_dim + args.decoder_ffn_embed_dim = decoder_embed_dim * 4 + args.decoder_attention_heads = decoder_attention_heads + args.encoder_embed_dim = args.decoder_embed_dim + args.decoder_output_dim = decoder_embed_dim + args.decoder_input_dim = decoder_embed_dim + + +@register_model("unity_xm_transformer") +class XMTransformerModelUnitY(XMTransformerModel): + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/s2t" + model_ids = [] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + XMTransformerModel.add_args(parser) + parser.add_argument( + "--translation-decoder-layers", + type=int, + default=4, + metavar="N", + help="num decoder layers in the first-pass translation module", + ) + parser.add_argument( + "--synthesizer-encoder-layers", + type=int, + default=0, + metavar="N", + help="num encoder layers in the second-pass synthesizer module", + ) + parser.add_argument( + "--synthesizer-augmented-cross-attention", + action="store_true", + default=False, + help="augmented cross-attention over speech encoder output", + ) + parser.add_argument( + "--load-pretrained-aux-decoder-from", + type=str, + metavar="STR", + help="model to take decoder weights from (for initialization)", + ) + + @classmethod + def build_text_decoder(cls, args, tgt_dict): + _args = copy.deepcopy(args) + + if args.adaptor_proj or args.encoder_proj: # not V0 arch + _args.encoder_embed_dim = _args.decoder_embed_dim + _args.dropout = args.decoder_dropout + _args.attention_dropout = args.decoder_attention_dropout + _args.activation_dropout = args.decoder_activation_dropout + _args.layerdrop = _args.decoder_layerdrop + _args.decoder_layers = _args.translation_decoder_layers + + embed_tokens = build_embedding(tgt_dict, _args.decoder_embed_dim) + decoder = TransformerDecoder(_args, tgt_dict, embed_tokens) + + if getattr(args, "load_pretrained_aux_decoder_from", None) is not None: + decoder = cls.maybe_load_pretrained( + decoder, getattr(args, "load_pretrained_aux_decoder_from", None) + ) + + for k, p in decoder.named_parameters(): + p.requires_grad = need_finetuning(args.finetune_decoder_params, k) + return decoder + + @classmethod + def build_decoder(cls, args, task, aug_attn=False): + _args = copy.deepcopy(args) + _args.layerdrop = 0.0 # turn off layerdrop for shallow layers + + _args.encoder_embed_dim = args.decoder_embed_dim + + proj = None + if args.decoder_embed_dim != _args.decoder_embed_dim: + proj = Linear(args.decoder_embed_dim, _args.decoder_embed_dim) + + embed_tokens = build_embedding(task.target_dictionary, _args.decoder_embed_dim) + decoder_cls = AugTransformerDecoder if aug_attn else TransformerDecoder + decoder = decoder_cls(_args, task.target_dictionary, embed_tokens) + + if getattr(args, "load_pretrained_decoder_from", None) is not None: + # load all layers first and then discard the bottom layers + embed_tokens = build_embedding( + task.target_dictionary, _args.decoder_embed_dim + ) + decoder_tmp = decoder_cls(_args, task.target_dictionary, embed_tokens) + decoder_tmp = cls.maybe_load_pretrained( + decoder_tmp, getattr(_args, "load_pretrained_decoder_from", None) + ) + state_dict = decoder_tmp.state_dict() + for k, p in decoder.named_parameters(): + p.data = state_dict[k].data + p.requires_grad = need_finetuning(_args.finetune_decoder_params, k) + decoder.layers = decoder.layers[-_args.decoder_layers :] + + return decoder, proj, _args + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + xm_t_base_architecture(args) + + encoder = cls.build_encoder(args) + decoder, proj, unit_args = cls.build_decoder( + args, + task, + aug_attn=getattr(args, "synthesizer_augmented_cross_attention", False), + ) + base_model = cls(encoder, decoder) + setattr(base_model, "proj", proj) + + base_model.t2u_augmented_cross_attn = getattr( + args, "synthesizer_augmented_cross_attention", False + ) + + # set up multitask decoders + base_model.mt_task_name = None + base_model.multitask_decoders = {} + has_first_pass_decoder = False + for task_name, task_obj in task.multitask_tasks.items(): + if task_obj.is_first_pass_decoder: + has_first_pass_decoder = True + base_model.mt_task_name = task_name + + task_decoder = cls.build_multitask_decoder( + args, + task_obj.args, + task_obj.target_dictionary, + args.decoder_embed_dim, + task_obj.is_first_pass_decoder, + ) + + setattr(base_model, f"{task_name}_decoder", task_decoder) + decoder_model_cls = ( + FairseqEncoderModel + if task_obj.args.decoder_type == "ctc" + else FairseqLanguageModel + ) + base_model.multitask_decoders[task_name] = decoder_model_cls( + getattr(base_model, f"{task_name}_decoder") + ) + + assert has_first_pass_decoder, "set at least one intermediate non-CTC decoder" + + # set up encoder on top of the auxiliary MT decoder + if getattr(args, "synthesizer_encoder_layers", 0) > 0: + base_model.synthesizer_encoder = cls.build_t2u_encoder(unit_args) + else: + base_model.synthesizer_encoder = None + + return base_model + + @classmethod + def build_t2u_encoder(cls, args): + _args = copy.deepcopy(args) + _args.encoder_layers = _args.synthesizer_encoder_layers + _args.encoder_embed_dim = args.decoder_embed_dim + _args.encoder_ffn_embed_dim = args.decoder_ffn_embed_dim + _args.encoder_attention_heads = args.decoder_attention_heads + _args.encoder_normalize_before = True + return TransformerEncoderNoEmb(_args) + + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + prev_output_tokens_mt, + return_all_hiddens=False, + tgt_speaker=None, + **kwargs, + ): + """ + The forward method inherited from the base class has a **kwargs + argument in its input, which is not supported in torchscript. This + method overwrites the forward method definition without **kwargs. + """ + encoder_out = self.encoder( + src_tokens=src_tokens, src_lengths=src_lengths, **kwargs + ) + + # 1. MT decoder + mt_decoder = getattr(self, f"{self.mt_task_name}_decoder") + mt_decoder_out = mt_decoder( + prev_output_tokens_mt, + encoder_out=encoder_out, + ) + x = mt_decoder_out[1]["inner_states"][-1] + if mt_decoder.layer_norm is not None: + x = mt_decoder.layer_norm(x) + if self.proj is not None: + x = self.proj(x) + + mt_decoder_padding_mask = None + if prev_output_tokens_mt.eq(mt_decoder.padding_idx).any(): + mt_decoder_padding_mask = prev_output_tokens_mt.eq(mt_decoder.padding_idx) + + # 2. T2U encoder + if self.synthesizer_encoder is not None: + t2u_encoder_out = self.synthesizer_encoder( + x, + mt_decoder_padding_mask, + ) + else: + t2u_encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [mt_decoder_padding_mask], # B x T + } + + # 3. T2U decoder + if self.t2u_augmented_cross_attn: + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + encoder_out_aug=t2u_encoder_out, + ) + else: + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=t2u_encoder_out, + ) + if return_all_hiddens: + decoder_out[-1]["encoder_states"] = encoder_out["encoder_out"] + # NOTE: from the top layer + decoder_out[-1]["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ] + decoder_out[-1]["mt_decoder_out"] = mt_decoder_out + return decoder_out + + +@register_model_architecture( + model_name="unity_xm_transformer", arch_name="unity_xm_transformer" +) +def base_architecture_unity(args): + set_default_general_args(args) + set_default_w2v_encoder_args(args) + set_default_adaptor_args(args) + set_default_transformer_decoder_args(args) + + args.layernorm_embedding = False + args.decoder_learned_pos = False + + +# for old models +@register_model_architecture( + model_name="unity_xm_transformer", arch_name="xm_transformer_t2" +) +def base_architecture_unity_legacy(args): + base_architecture_unity(args) diff --git a/fairseq/fairseq/models/text_to_speech/__init__.py b/fairseq/fairseq/models/text_to_speech/__init__.py new file mode 100644 index 0000000..c0dcd69 --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .tacotron2 import * # noqa +from .tts_transformer import * # noqa +from .fastspeech2 import * # noqa +from .vocoder import * # noqa diff --git a/fairseq/fairseq/models/text_to_speech/codehifigan.py b/fairseq/fairseq/models/text_to_speech/codehifigan.py new file mode 100644 index 0000000..d1574dd --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/codehifigan.py @@ -0,0 +1,95 @@ +from argparse import Namespace +import torch +import torch.nn as nn + +from fairseq.models.text_to_speech.fastspeech2 import VariancePredictor +from fairseq.models.text_to_speech.hifigan import Generator + + +class CodeGenerator(Generator): + def __init__(self, cfg): + super().__init__(cfg) + self.dict = nn.Embedding(cfg["num_embeddings"], cfg["embedding_dim"]) + self.multispkr = cfg.get("multispkr", None) + self.embedder = cfg.get("embedder_params", None) + + if self.multispkr and not self.embedder: + self.spkr = nn.Embedding(cfg.get("num_speakers", 200), cfg["embedding_dim"]) + elif self.embedder: + self.spkr = nn.Linear(cfg.get("embedder_dim", 256), cfg["embedding_dim"]) + + self.dur_predictor = None + if cfg.get("dur_predictor_params", None): + self.dur_predictor = VariancePredictor( + Namespace(**cfg["dur_predictor_params"]) + ) + + self.f0 = cfg.get("f0", None) + n_f0_bin = cfg.get("f0_quant_num_bin", 0) + self.f0_quant_embed = ( + None if n_f0_bin <= 0 else nn.Embedding(n_f0_bin, cfg["embedding_dim"]) + ) + + @staticmethod + def _upsample(signal, max_frames): + if signal.dim() == 3: + bsz, channels, cond_length = signal.size() + elif signal.dim() == 2: + signal = signal.unsqueeze(2) + bsz, channels, cond_length = signal.size() + else: + signal = signal.view(-1, 1, 1) + bsz, channels, cond_length = signal.size() + + signal = signal.unsqueeze(3).repeat(1, 1, 1, max_frames // cond_length) + + # pad zeros as needed (if signal's shape does not divide completely with max_frames) + reminder = (max_frames - signal.shape[2] * signal.shape[3]) // signal.shape[3] + if reminder > 0: + raise NotImplementedError( + "Padding condition signal - misalignment between condition features." + ) + + signal = signal.view(bsz, channels, max_frames) + return signal + + def forward(self, **kwargs): + x = self.dict(kwargs["code"]).transpose(1, 2) + + if self.dur_predictor and kwargs.get("dur_prediction", False): + assert x.size(0) == 1, "only support single sample" + log_dur_pred = self.dur_predictor(x.transpose(1, 2)) + dur_out = torch.clamp( + torch.round((torch.exp(log_dur_pred) - 1)).long(), min=1 + ) + # B x C x T + x = torch.repeat_interleave(x, dur_out.view(-1), dim=2) + + if self.f0: + if self.f0_quant_embed: + kwargs["f0"] = self.f0_quant_embed(kwargs["f0"].long()).transpose(1, 2) + else: + kwargs["f0"] = kwargs["f0"].unsqueeze(1) + + if x.shape[-1] < kwargs["f0"].shape[-1]: + x = self._upsample(x, kwargs["f0"].shape[-1]) + elif x.shape[-1] > kwargs["f0"].shape[-1]: + kwargs["f0"] = self._upsample(kwargs["f0"], x.shape[-1]) + x = torch.cat([x, kwargs["f0"]], dim=1) + + if self.multispkr: + assert ( + "spkr" in kwargs + ), 'require "spkr" input for multispeaker CodeHiFiGAN vocoder' + spkr = self.spkr(kwargs["spkr"]).transpose(1, 2) + spkr = self._upsample(spkr, x.shape[-1]) + x = torch.cat([x, spkr], dim=1) + + for k, feat in kwargs.items(): + if k in ["spkr", "code", "f0", "dur_prediction"]: + continue + + feat = self._upsample(feat, x.shape[-1]) + x = torch.cat([x, feat], dim=1) + + return super().forward(x) diff --git a/fairseq/fairseq/models/text_to_speech/fastspeech2.py b/fairseq/fairseq/models/text_to_speech/fastspeech2.py new file mode 100644 index 0000000..fb2d0df --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/fastspeech2.py @@ -0,0 +1,448 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from torch import nn + +from fairseq import utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.text_to_speech.hub_interface import TTSHubInterface +from fairseq.models.text_to_speech.tacotron2 import Postnet +from fairseq.modules import ( + FairseqDropout, + LayerNorm, + MultiheadAttention, + PositionalEmbedding, +) + +logger = logging.getLogger(__name__) + + +def model_init(m): + if isinstance(m, nn.Conv1d): + nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu")) + + +def Embedding(num_embeddings, embedding_dim, padding_idx=None): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + return m + + +class PositionwiseFeedForward(nn.Module): + def __init__(self, in_dim, hidden_dim, kernel_size, dropout): + super().__init__() + self.ffn = nn.Sequential( + nn.Conv1d( + in_dim, + hidden_dim, + kernel_size=kernel_size, + padding=(kernel_size - 1) // 2, + ), + nn.ReLU(), + nn.Conv1d( + hidden_dim, + in_dim, + kernel_size=kernel_size, + padding=(kernel_size - 1) // 2, + ), + ) + self.layer_norm = LayerNorm(in_dim) + self.dropout = self.dropout_module = FairseqDropout( + p=dropout, module_name=self.__class__.__name__ + ) + + def forward(self, x): + # B x T x C + residual = x + x = self.ffn(x.transpose(1, 2)).transpose(1, 2) + x = self.dropout(x) + return self.layer_norm(x + residual) + + +class FFTLayer(torch.nn.Module): + def __init__( + self, embed_dim, n_heads, hidden_dim, kernel_size, dropout, attention_dropout + ): + super().__init__() + self.self_attn = MultiheadAttention( + embed_dim, n_heads, dropout=attention_dropout, self_attention=True + ) + self.layer_norm = LayerNorm(embed_dim) + self.ffn = PositionwiseFeedForward( + embed_dim, hidden_dim, kernel_size, dropout=dropout + ) + + def forward(self, x, padding_mask=None): + # B x T x C + residual = x + x = x.transpose(0, 1) + x, _ = self.self_attn( + query=x, key=x, value=x, key_padding_mask=padding_mask, need_weights=False + ) + x = x.transpose(0, 1) + x = self.layer_norm(x + residual) + return self.ffn(x) + + +class LengthRegulator(nn.Module): + def forward(self, x, durations): + # x: B x T x C + out_lens = durations.sum(dim=1) + max_len = out_lens.max() + bsz, seq_len, dim = x.size() + out = x.new_zeros((bsz, max_len, dim)) + + for b in range(bsz): + indices = [] + for t in range(seq_len): + indices.extend([t] * utils.item(durations[b, t])) + indices = torch.tensor(indices, dtype=torch.long).to(x.device) + out_len = utils.item(out_lens[b]) + out[b, :out_len] = x[b].index_select(0, indices) + + return out, out_lens + + +class VariancePredictor(nn.Module): + def __init__(self, args): + super().__init__() + self.conv1 = nn.Sequential( + nn.Conv1d( + args.encoder_embed_dim, + args.var_pred_hidden_dim, + kernel_size=args.var_pred_kernel_size, + padding=(args.var_pred_kernel_size - 1) // 2, + ), + nn.ReLU(), + ) + self.ln1 = nn.LayerNorm(args.var_pred_hidden_dim) + self.dropout_module = FairseqDropout( + p=args.var_pred_dropout, module_name=self.__class__.__name__ + ) + self.conv2 = nn.Sequential( + nn.Conv1d( + args.var_pred_hidden_dim, + args.var_pred_hidden_dim, + kernel_size=args.var_pred_kernel_size, + padding=1, + ), + nn.ReLU(), + ) + self.ln2 = nn.LayerNorm(args.var_pred_hidden_dim) + self.proj = nn.Linear(args.var_pred_hidden_dim, 1) + + def forward(self, x): + # Input: B x T x C; Output: B x T + x = self.conv1(x.transpose(1, 2)).transpose(1, 2) + x = self.dropout_module(self.ln1(x)) + x = self.conv2(x.transpose(1, 2)).transpose(1, 2) + x = self.dropout_module(self.ln2(x)) + return self.proj(x).squeeze(dim=2) + + +class VarianceAdaptor(nn.Module): + def __init__(self, args): + super().__init__() + self.args = args + self.length_regulator = LengthRegulator() + self.duration_predictor = VariancePredictor(args) + self.pitch_predictor = VariancePredictor(args) + self.energy_predictor = VariancePredictor(args) + + n_bins, steps = self.args.var_pred_n_bins, self.args.var_pred_n_bins - 1 + self.pitch_bins = torch.linspace(args.pitch_min, args.pitch_max, steps) + self.embed_pitch = Embedding(n_bins, args.encoder_embed_dim) + self.energy_bins = torch.linspace(args.energy_min, args.energy_max, steps) + self.embed_energy = Embedding(n_bins, args.encoder_embed_dim) + + def get_pitch_emb(self, x, tgt=None, factor=1.0): + out = self.pitch_predictor(x) + bins = self.pitch_bins.to(x.device) + if tgt is None: + out = out * factor + emb = self.embed_pitch(torch.bucketize(out, bins)) + else: + emb = self.embed_pitch(torch.bucketize(tgt, bins)) + return out, emb + + def get_energy_emb(self, x, tgt=None, factor=1.0): + out = self.energy_predictor(x) + bins = self.energy_bins.to(x.device) + if tgt is None: + out = out * factor + emb = self.embed_energy(torch.bucketize(out, bins)) + else: + emb = self.embed_energy(torch.bucketize(tgt, bins)) + return out, emb + + def forward( + self, + x, + padding_mask, + durations=None, + pitches=None, + energies=None, + d_factor=1.0, + p_factor=1.0, + e_factor=1.0, + ): + # x: B x T x C + log_dur_out = self.duration_predictor(x) + dur_out = torch.clamp( + torch.round((torch.exp(log_dur_out) - 1) * d_factor).long(), min=0 + ) + dur_out.masked_fill_(padding_mask, 0) + + pitch_out, pitch_emb = self.get_pitch_emb(x, pitches, p_factor) + x = x + pitch_emb + energy_out, energy_emb = self.get_energy_emb(x, energies, e_factor) + x = x + energy_emb + + x, out_lens = self.length_regulator( + x, dur_out if durations is None else durations + ) + + return x, out_lens, log_dur_out, pitch_out, energy_out + + +class FastSpeech2Encoder(FairseqEncoder): + def __init__(self, args, src_dict, embed_speaker): + super().__init__(src_dict) + self.args = args + self.padding_idx = src_dict.pad() + self.n_frames_per_step = args.n_frames_per_step + self.out_dim = args.output_frame_dim * args.n_frames_per_step + + self.embed_speaker = embed_speaker + self.spk_emb_proj = None + if embed_speaker is not None: + self.spk_emb_proj = nn.Linear( + args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim + ) + + self.dropout_module = FairseqDropout( + p=args.dropout, module_name=self.__class__.__name__ + ) + self.embed_tokens = Embedding( + len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx + ) + + self.embed_positions = PositionalEmbedding( + args.max_source_positions, args.encoder_embed_dim, self.padding_idx + ) + self.pos_emb_alpha = nn.Parameter(torch.ones(1)) + self.dec_pos_emb_alpha = nn.Parameter(torch.ones(1)) + + self.encoder_fft_layers = nn.ModuleList( + FFTLayer( + args.encoder_embed_dim, + args.encoder_attention_heads, + args.fft_hidden_dim, + args.fft_kernel_size, + dropout=args.dropout, + attention_dropout=args.attention_dropout, + ) + for _ in range(args.encoder_layers) + ) + + self.var_adaptor = VarianceAdaptor(args) + + self.decoder_fft_layers = nn.ModuleList( + FFTLayer( + args.decoder_embed_dim, + args.decoder_attention_heads, + args.fft_hidden_dim, + args.fft_kernel_size, + dropout=args.dropout, + attention_dropout=args.attention_dropout, + ) + for _ in range(args.decoder_layers) + ) + + self.out_proj = nn.Linear(args.decoder_embed_dim, self.out_dim) + + self.postnet = None + if args.add_postnet: + self.postnet = Postnet( + self.out_dim, + args.postnet_conv_dim, + args.postnet_conv_kernel_size, + args.postnet_layers, + args.postnet_dropout, + ) + + self.apply(model_init) + + def forward( + self, + src_tokens, + src_lengths=None, + speaker=None, + durations=None, + pitches=None, + energies=None, + **kwargs, + ): + x = self.embed_tokens(src_tokens) + + enc_padding_mask = src_tokens.eq(self.padding_idx) + x += self.pos_emb_alpha * self.embed_positions(enc_padding_mask) + x = self.dropout_module(x) + + for layer in self.encoder_fft_layers: + x = layer(x, enc_padding_mask) + + if self.embed_speaker is not None: + bsz, seq_len, _ = x.size() + emb = self.embed_speaker(speaker).expand(bsz, seq_len, -1) + x = self.spk_emb_proj(torch.cat([x, emb], dim=2)) + + x, out_lens, log_dur_out, pitch_out, energy_out = self.var_adaptor( + x, enc_padding_mask, durations, pitches, energies + ) + + dec_padding_mask = lengths_to_padding_mask(out_lens) + x += self.dec_pos_emb_alpha * self.embed_positions(dec_padding_mask) + for layer in self.decoder_fft_layers: + x = layer(x, dec_padding_mask) + + x = self.out_proj(x) + x_post = None + if self.postnet is not None: + x_post = x + self.postnet(x) + return x, x_post, out_lens, log_dur_out, pitch_out, energy_out + + +@register_model("fastspeech2") +class FastSpeech2Model(FairseqEncoderModel): + """ + Implementation for https://arxiv.org/abs/2006.04558 + """ + + NON_AUTOREGRESSIVE = True + + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/s2" + model_ids = [ + "fastspeech2-en-ljspeech", + "fastspeech2-en-200_speaker-cv4", + ] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + config_yaml="config.yaml", + vocoder: str = "griffin_lim", + fp16: bool = False, + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + config_yaml=config_yaml, + vocoder=vocoder, + fp16=fp16, + **kwargs, + ) + return TTSHubInterface(x["args"], x["task"], x["models"][0]) + + @staticmethod + def add_args(parser): + parser.add_argument("--dropout", type=float) + parser.add_argument("--output-frame-dim", type=int) + parser.add_argument("--speaker-embed-dim", type=int) + # FFT blocks + parser.add_argument("--fft-hidden-dim", type=int) + parser.add_argument("--fft-kernel-size", type=int) + parser.add_argument("--attention-dropout", type=float) + parser.add_argument("--encoder-layers", type=int) + parser.add_argument("--encoder-embed-dim", type=int) + parser.add_argument("--encoder-attention-heads", type=int) + parser.add_argument("--decoder-layers", type=int) + parser.add_argument("--decoder-embed-dim", type=int) + parser.add_argument("--decoder-attention-heads", type=int) + # variance predictor + parser.add_argument("--var-pred-n-bins", type=int) + parser.add_argument("--var-pred-hidden-dim", type=int) + parser.add_argument("--var-pred-kernel-size", type=int) + parser.add_argument("--var-pred-dropout", type=float) + # postnet + parser.add_argument("--add-postnet", action="store_true") + parser.add_argument("--postnet-dropout", type=float) + parser.add_argument("--postnet-layers", type=int) + parser.add_argument("--postnet-conv-dim", type=int) + parser.add_argument("--postnet-conv-kernel-size", type=int) + + def __init__(self, encoder, args, src_dict): + super().__init__(encoder) + self._num_updates = 0 + + out_dim = args.output_frame_dim * args.n_frames_per_step + self.ctc_proj = None + if getattr(args, "ctc_weight", 0.0) > 0.0: + self.ctc_proj = nn.Linear(out_dim, len(src_dict)) + + @classmethod + def build_model(cls, args, task): + embed_speaker = task.get_speaker_embeddings(args) + encoder = FastSpeech2Encoder(args, task.src_dict, embed_speaker) + return cls(encoder, args, task.src_dict) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self._num_updates = num_updates + + def get_normalized_probs(self, net_output, log_probs, sample=None): + logits = self.ctc_proj(net_output[0]) + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + +@register_model_architecture("fastspeech2", "fastspeech2") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.2) + args.output_frame_dim = getattr(args, "output_frame_dim", 80) + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 64) + # FFT blocks + args.fft_hidden_dim = getattr(args, "fft_hidden_dim", 1024) + args.fft_kernel_size = getattr(args, "fft_kernel_size", 9) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.encoder_layers = getattr(args, "encoder_layers", 4) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2) + args.decoder_layers = getattr(args, "decoder_layers", 4) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2) + # variance predictor + args.var_pred_n_bins = getattr(args, "var_pred_n_bins", 256) + args.var_pred_hidden_dim = getattr(args, "var_pred_hidden_dim", 256) + args.var_pred_kernel_size = getattr(args, "var_pred_kernel_size", 3) + args.var_pred_dropout = getattr(args, "var_pred_dropout", 0.5) + # postnet + args.add_postnet = getattr(args, "add_postnet", False) + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) diff --git a/fairseq/fairseq/models/text_to_speech/hifigan.py b/fairseq/fairseq/models/text_to_speech/hifigan.py new file mode 100644 index 0000000..a852bee --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/hifigan.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn.utils import remove_weight_norm, weight_norm + +LRELU_SLOPE = 0.1 + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return (kernel_size * dilation - dilation) // 2 + + +class ResBlock(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock, self).__init__() + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + ] + ) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for layer in self.convs1: + remove_weight_norm(layer) + for layer in self.convs2: + remove_weight_norm(layer) + + +class Generator(torch.nn.Module): + def __init__(self, cfg): + super(Generator, self).__init__() + self.num_kernels = len(cfg["resblock_kernel_sizes"]) + self.num_upsamples = len(cfg["upsample_rates"]) + self.conv_pre = weight_norm( + Conv1d( + cfg.get("model_in_dim", 80), + cfg["upsample_initial_channel"], + 7, + 1, + padding=3, + ) + ) + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate( + zip(cfg["upsample_rates"], cfg["upsample_kernel_sizes"]) + ): + self.ups.append( + weight_norm( + ConvTranspose1d( + cfg["upsample_initial_channel"] // (2**i), + cfg["upsample_initial_channel"] // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = cfg["upsample_initial_channel"] // (2 ** (i + 1)) + for k, d in zip( + cfg["resblock_kernel_sizes"], cfg["resblock_dilation_sizes"] + ): + self.resblocks.append(ResBlock(ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x): + x = self.conv_pre(x) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print("Removing weight norm...") + for layer in self.ups: + remove_weight_norm(layer) + for layer in self.resblocks: + layer.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) diff --git a/fairseq/fairseq/models/text_to_speech/hub_interface.py b/fairseq/fairseq/models/text_to_speech/hub_interface.py new file mode 100644 index 0000000..e251c65 --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/hub_interface.py @@ -0,0 +1,188 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import random +from pathlib import Path +from typing import Dict, Optional, Tuple + +import torch +import torch.nn as nn + +logger = logging.getLogger(__name__) + + +class TTSHubInterface(nn.Module): + def __init__(self, cfg, task, model): + super().__init__() + self.cfg = cfg + self.task = task + self.model = model + self.model.eval() + + self.update_cfg_with_data_cfg(self.cfg, self.task.data_cfg) + self.generator = self.task.build_generator([self.model], self.cfg) + + @classmethod + def phonemize( + cls, + text: str, + lang: Optional[str], + phonemizer: Optional[str] = None, + preserve_punct: bool = False, + to_simplified_zh: bool = False, + ): + if to_simplified_zh: + import hanziconv + + text = hanziconv.HanziConv.toSimplified(text) + + if phonemizer == "g2p": + import g2p_en + + g2p = g2p_en.G2p() + if preserve_punct: + return " ".join("|" if p == " " else p for p in g2p(text)) + else: + res = [{",": "sp", ";": "sp"}.get(p, p) for p in g2p(text)] + return " ".join(p for p in res if p.isalnum()) + if phonemizer == "g2pc": + import g2pc + + g2p = g2pc.G2pC() + return " ".join([w[3] for w in g2p(text)]) + elif phonemizer == "ipa": + assert lang is not None + import phonemizer + from phonemizer.separator import Separator + + lang_map = {"en": "en-us", "fr": "fr-fr"} + return phonemizer.phonemize( + text, + backend="espeak", + language=lang_map.get(lang, lang), + separator=Separator(word="| ", phone=" "), + ) + else: + return text + + @classmethod + def tokenize(cls, text: str, tkn_cfg: Dict[str, str]): + sentencepiece_model = tkn_cfg.get("sentencepiece_model", None) + if sentencepiece_model is not None: + assert Path(sentencepiece_model).exists() + import sentencepiece as sp + + spm = sp.SentencePieceProcessor() + spm.Load(sentencepiece_model) + return " ".join(spm.Encode(text, out_type=str)) + else: + return text + + @classmethod + def update_cfg_with_data_cfg(cls, cfg, data_cfg): + cfg["task"].vocoder = data_cfg.vocoder.get("type", "griffin_lim") + + @classmethod + def get_model_input( + cls, task, text: str, speaker: Optional[int] = None, verbose: bool = False + ): + phonemized = cls.phonemize( + text, + task.data_cfg.hub.get("lang", None), + task.data_cfg.hub.get("phonemizer", None), + task.data_cfg.hub.get("preserve_punct", False), + task.data_cfg.hub.get("to_simplified_zh", False), + ) + tkn_cfg = task.data_cfg.bpe_tokenizer + tokenized = cls.tokenize(phonemized, tkn_cfg) + if verbose: + logger.info(f"text: {text}") + logger.info(f"phonemized: {phonemized}") + logger.info(f"tokenized: {tokenized}") + + spk = task.data_cfg.hub.get("speaker", speaker) + n_speakers = len(task.speaker_to_id or {}) + if spk is None and n_speakers > 0: + spk = random.randint(0, n_speakers - 1) + if spk is not None: + spk = max(0, min(spk, n_speakers - 1)) + if verbose: + logger.info(f"speaker: {spk}") + spk = None if spk is None else torch.Tensor([[spk]]).long() + + src_tokens = task.src_dict.encode_line(tokenized, add_if_not_exist=False).view( + 1, -1 + ) + src_lengths = torch.Tensor([len(tokenized.split())]).long() + return { + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "prev_output_tokens": None, + }, + "target_lengths": None, + "speaker": spk, + } + + @classmethod + def get_prediction(cls, task, model, generator, sample) -> Tuple[torch.Tensor, int]: + prediction = generator.generate(model, sample) + return prediction[0]["waveform"], task.sr + + def predict( + self, text: str, speaker: Optional[int] = None, verbose: bool = False + ) -> Tuple[torch.Tensor, int]: + sample = self.get_model_input(self.task, text, speaker, verbose=verbose) + return self.get_prediction(self.task, self.model, self.generator, sample) + + +class VocoderHubInterface(nn.Module): + """Vocoder interface to run vocoder models through hub. Currently we only support unit vocoder""" + + def __init__(self, cfg, model): + super().__init__() + self.vocoder = model + self.vocoder.eval() + self.sr = 16000 + self.multispkr = self.vocoder.model.multispkr + if self.multispkr: + logger.info("multi-speaker vocoder") + self.num_speakers = cfg.get( + "num_speakers", + 200, + ) # following the default in codehifigan to set to 200 + + def get_model_input( + self, + text: str, + speaker: Optional[int] = -1, + ): + units = list(map(int, text.strip().split())) + x = { + "code": torch.LongTensor(units).view(1, -1), + } + if not speaker: + speaker = -1 + if self.multispkr: + assert ( + speaker < self.num_speakers + ), f"invalid --speaker-id ({speaker}) with total #speakers = {self.num_speakers}" + spk = random.randint(0, self.num_speakers - 1) if speaker == -1 else speaker + x["spkr"] = torch.LongTensor([spk]).view(1, 1) + return x + + def get_prediction(self, sample, dur_prediction: Optional[bool] = True): + wav = self.vocoder(sample, dur_prediction) + return wav, self.sr + + def predict( + self, + text: str, + speaker: Optional[int] = None, + dur_prediction: Optional[bool] = True, + ): + sample = self.get_model_input(text, speaker) + return self.get_prediction(sample, dur_prediction) diff --git a/fairseq/fairseq/models/text_to_speech/tacotron2.py b/fairseq/fairseq/models/text_to_speech/tacotron2.py new file mode 100644 index 0000000..4df4075 --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/tacotron2.py @@ -0,0 +1,380 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from torch import nn +from torch.nn import functional as F + +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.modules import LSTMCellWithZoneOut, LocationAttention + + +logger = logging.getLogger(__name__) + + +def encoder_init(m): + if isinstance(m, nn.Conv1d): + nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu")) + + +class Tacotron2Encoder(FairseqEncoder): + def __init__(self, args, src_dict, embed_speaker): + super().__init__(src_dict) + self.padding_idx = src_dict.pad() + self.embed_speaker = embed_speaker + self.spk_emb_proj = None + if embed_speaker is not None: + self.spk_emb_proj = nn.Linear( + args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim + ) + + self.embed_tokens = nn.Embedding( + len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx + ) + + assert args.encoder_conv_kernel_size % 2 == 1 + self.convolutions = nn.ModuleList( + nn.Sequential( + nn.Conv1d( + args.encoder_embed_dim, + args.encoder_embed_dim, + kernel_size=args.encoder_conv_kernel_size, + padding=((args.encoder_conv_kernel_size - 1) // 2), + ), + nn.BatchNorm1d(args.encoder_embed_dim), + nn.ReLU(), + nn.Dropout(args.encoder_dropout), + ) + for _ in range(args.encoder_conv_layers) + ) + + self.lstm = nn.LSTM( + args.encoder_embed_dim, + args.encoder_embed_dim // 2, + num_layers=args.encoder_lstm_layers, + batch_first=True, + bidirectional=True, + ) + + self.apply(encoder_init) + + def forward(self, src_tokens, src_lengths=None, speaker=None, **kwargs): + x = self.embed_tokens(src_tokens) + x = x.transpose(1, 2).contiguous() # B x T x C -> B x C x T + for conv in self.convolutions: + x = conv(x) + x = x.transpose(1, 2).contiguous() # B x C x T -> B x T x C + + src_lengths = src_lengths.cpu().long() + x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True) + x = self.lstm(x)[0] + x = nn.utils.rnn.pad_packed_sequence(x, batch_first=True)[0] + + encoder_padding_mask = src_tokens.eq(self.padding_idx) + + if self.embed_speaker is not None: + seq_len, bsz, _ = x.size() + emb = self.embed_speaker(speaker).expand(seq_len, bsz, -1) + x = self.spk_emb_proj(torch.cat([x, emb], dim=2)) + + return { + "encoder_out": [x], # B x T x C + "encoder_padding_mask": encoder_padding_mask, # B x T + } + + +class Prenet(nn.Module): + def __init__(self, in_dim, n_layers, n_units, dropout): + super().__init__() + self.layers = nn.ModuleList( + nn.Sequential(nn.Linear(in_dim if i == 0 else n_units, n_units), nn.ReLU()) + for i in range(n_layers) + ) + self.dropout = dropout + + def forward(self, x): + for layer in self.layers: + x = F.dropout(layer(x), p=self.dropout) # always applies dropout + return x + + +class Postnet(nn.Module): + def __init__(self, in_dim, n_channels, kernel_size, n_layers, dropout): + super(Postnet, self).__init__() + self.convolutions = nn.ModuleList() + assert kernel_size % 2 == 1 + for i in range(n_layers): + cur_layers = ( + [ + nn.Conv1d( + in_dim if i == 0 else n_channels, + n_channels if i < n_layers - 1 else in_dim, + kernel_size=kernel_size, + padding=((kernel_size - 1) // 2), + ), + nn.BatchNorm1d(n_channels if i < n_layers - 1 else in_dim), + ] + + ([nn.Tanh()] if i < n_layers - 1 else []) + + [nn.Dropout(dropout)] + ) + nn.init.xavier_uniform_( + cur_layers[0].weight, + torch.nn.init.calculate_gain("tanh" if i < n_layers - 1 else "linear"), + ) + self.convolutions.append(nn.Sequential(*cur_layers)) + + def forward(self, x): + x = x.transpose(1, 2) # B x T x C -> B x C x T + for conv in self.convolutions: + x = conv(x) + return x.transpose(1, 2) + + +def decoder_init(m): + if isinstance(m, torch.nn.Conv1d): + nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("tanh")) + + +class Tacotron2Decoder(FairseqIncrementalDecoder): + def __init__(self, args, src_dict): + super().__init__(None) + self.args = args + self.n_frames_per_step = args.n_frames_per_step + self.out_dim = args.output_frame_dim * args.n_frames_per_step + + self.prenet = Prenet( + self.out_dim, args.prenet_layers, args.prenet_dim, args.prenet_dropout + ) + + # take prev_context, prev_frame, (speaker embedding) as input + self.attention_lstm = LSTMCellWithZoneOut( + args.zoneout, + args.prenet_dim + args.encoder_embed_dim, + args.decoder_lstm_dim, + ) + + # take attention_lstm output, attention_state, encoder_out as input + self.attention = LocationAttention( + args.attention_dim, + args.encoder_embed_dim, + args.decoder_lstm_dim, + (1 + int(args.attention_use_cumprob)), + args.attention_conv_dim, + args.attention_conv_kernel_size, + ) + + # take attention_lstm output, context, (gated_latent) as input + self.lstm = nn.ModuleList( + LSTMCellWithZoneOut( + args.zoneout, + args.encoder_embed_dim + args.decoder_lstm_dim, + args.decoder_lstm_dim, + ) + for i in range(args.decoder_lstm_layers) + ) + + proj_in_dim = args.encoder_embed_dim + args.decoder_lstm_dim + self.feat_proj = nn.Linear(proj_in_dim, self.out_dim) + self.eos_proj = nn.Linear(proj_in_dim, 1) + + self.postnet = Postnet( + self.out_dim, + args.postnet_conv_dim, + args.postnet_conv_kernel_size, + args.postnet_layers, + args.postnet_dropout, + ) + + self.ctc_proj = None + if getattr(args, "ctc_weight", 0.0) > 0.0: + self.ctc_proj = nn.Linear(self.out_dim, len(src_dict)) + + self.apply(decoder_init) + + def _get_states(self, incremental_state, enc_out): + bsz, in_len, _ = enc_out.size() + alstm_h = self.get_incremental_state(incremental_state, "alstm_h") + if alstm_h is None: + alstm_h = enc_out.new_zeros(bsz, self.args.decoder_lstm_dim) + alstm_c = self.get_incremental_state(incremental_state, "alstm_c") + if alstm_c is None: + alstm_c = enc_out.new_zeros(bsz, self.args.decoder_lstm_dim) + + lstm_h = self.get_incremental_state(incremental_state, "lstm_h") + if lstm_h is None: + lstm_h = [ + enc_out.new_zeros(bsz, self.args.decoder_lstm_dim) + for _ in range(self.args.decoder_lstm_layers) + ] + lstm_c = self.get_incremental_state(incremental_state, "lstm_c") + if lstm_c is None: + lstm_c = [ + enc_out.new_zeros(bsz, self.args.decoder_lstm_dim) + for _ in range(self.args.decoder_lstm_layers) + ] + + attn_w = self.get_incremental_state(incremental_state, "attn_w") + if attn_w is None: + attn_w = enc_out.new_zeros(bsz, in_len) + attn_w_cum = self.get_incremental_state(incremental_state, "attn_w_cum") + if attn_w_cum is None: + attn_w_cum = enc_out.new_zeros(bsz, in_len) + return alstm_h, alstm_c, lstm_h, lstm_c, attn_w, attn_w_cum + + def _get_init_attn_c(self, enc_out, enc_mask): + bsz = enc_out.size(0) + if self.args.init_attn_c == "zero": + return enc_out.new_zeros(bsz, self.args.encoder_embed_dim) + elif self.args.init_attn_c == "avg": + enc_w = (~enc_mask).type(enc_out.type()) + enc_w = enc_w / enc_w.sum(dim=1, keepdim=True) + return torch.sum(enc_out * enc_w.unsqueeze(2), dim=1) + else: + raise ValueError(f"{self.args.init_attn_c} not supported") + + def forward( + self, + prev_output_tokens, + encoder_out=None, + incremental_state=None, + target_lengths=None, + **kwargs, + ): + enc_mask = encoder_out["encoder_padding_mask"] + enc_out = encoder_out["encoder_out"][0] + in_len = enc_out.size(1) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:, :] + bsz, out_len, _ = prev_output_tokens.size() + + prenet_out = self.prenet(prev_output_tokens) + (alstm_h, alstm_c, lstm_h, lstm_c, attn_w, attn_w_cum) = self._get_states( + incremental_state, enc_out + ) + attn_ctx = self._get_init_attn_c(enc_out, enc_mask) + + attn_out = enc_out.new_zeros(bsz, in_len, out_len) + feat_out = enc_out.new_zeros(bsz, out_len, self.out_dim) + eos_out = enc_out.new_zeros(bsz, out_len) + for t in range(out_len): + alstm_in = torch.cat((attn_ctx, prenet_out[:, t, :]), dim=1) + alstm_h, alstm_c = self.attention_lstm(alstm_in, (alstm_h, alstm_c)) + + attn_state = attn_w.unsqueeze(1) + if self.args.attention_use_cumprob: + attn_state = torch.stack((attn_w, attn_w_cum), dim=1) + attn_ctx, attn_w = self.attention(enc_out, enc_mask, alstm_h, attn_state) + attn_w_cum = attn_w_cum + attn_w + attn_out[:, :, t] = attn_w + + for i, cur_lstm in enumerate(self.lstm): + if i == 0: + lstm_in = torch.cat((attn_ctx, alstm_h), dim=1) + else: + lstm_in = torch.cat((attn_ctx, lstm_h[i - 1]), dim=1) + lstm_h[i], lstm_c[i] = cur_lstm(lstm_in, (lstm_h[i], lstm_c[i])) + + proj_in = torch.cat((attn_ctx, lstm_h[-1]), dim=1) + feat_out[:, t, :] = self.feat_proj(proj_in) + eos_out[:, t] = self.eos_proj(proj_in).squeeze(1) + self.attention.clear_cache() + + self.set_incremental_state(incremental_state, "alstm_h", alstm_h) + self.set_incremental_state(incremental_state, "alstm_c", alstm_c) + self.set_incremental_state(incremental_state, "lstm_h", lstm_h) + self.set_incremental_state(incremental_state, "lstm_c", lstm_c) + self.set_incremental_state(incremental_state, "attn_w", attn_w) + self.set_incremental_state(incremental_state, "attn_w_cum", attn_w_cum) + + post_feat_out = feat_out + self.postnet(feat_out) + eos_out = eos_out.view(bsz, out_len, 1) + return post_feat_out, eos_out, {"attn": attn_out, "feature_out": feat_out} + + +@register_model("tacotron_2") +class Tacotron2Model(FairseqEncoderDecoderModel): + """ + Implementation for https://arxiv.org/pdf/1712.05884.pdf + """ + + @staticmethod + def add_args(parser): + # encoder + parser.add_argument("--encoder-dropout", type=float) + parser.add_argument("--encoder-embed-dim", type=int) + parser.add_argument("--encoder-conv-layers", type=int) + parser.add_argument("--encoder-conv-kernel-size", type=int) + parser.add_argument("--encoder-lstm-layers", type=int) + # decoder + parser.add_argument("--attention-dim", type=int) + parser.add_argument("--attention-conv-dim", type=int) + parser.add_argument("--attention-conv-kernel-size", type=int) + parser.add_argument("--prenet-dropout", type=float) + parser.add_argument("--prenet-layers", type=int) + parser.add_argument("--prenet-dim", type=int) + parser.add_argument("--postnet-dropout", type=float) + parser.add_argument("--postnet-layers", type=int) + parser.add_argument("--postnet-conv-dim", type=int) + parser.add_argument("--postnet-conv-kernel-size", type=int) + parser.add_argument("--init-attn-c", type=str) + parser.add_argument("--attention-use-cumprob", action="store_true") + parser.add_argument("--zoneout", type=float) + parser.add_argument("--decoder-lstm-layers", type=int) + parser.add_argument("--decoder-lstm-dim", type=int) + parser.add_argument("--output-frame-dim", type=int) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._num_updates = 0 + + @classmethod + def build_model(cls, args, task): + embed_speaker = task.get_speaker_embeddings(args) + encoder = Tacotron2Encoder(args, task.src_dict, embed_speaker) + decoder = Tacotron2Decoder(args, task.src_dict) + return cls(encoder, decoder) + + def forward_encoder(self, src_tokens, src_lengths, **kwargs): + return self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self._num_updates = num_updates + + +@register_model_architecture("tacotron_2", "tacotron_2") +def base_architecture(args): + # encoder + args.encoder_dropout = getattr(args, "encoder_dropout", 0.5) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_conv_layers = getattr(args, "encoder_conv_layers", 3) + args.encoder_conv_kernel_size = getattr(args, "encoder_conv_kernel_size", 5) + args.encoder_lstm_layers = getattr(args, "encoder_lstm_layers", 1) + # decoder + args.attention_dim = getattr(args, "attention_dim", 128) + args.attention_conv_dim = getattr(args, "attention_conv_dim", 32) + args.attention_conv_kernel_size = getattr(args, "attention_conv_kernel_size", 15) + args.prenet_dropout = getattr(args, "prenet_dropout", 0.5) + args.prenet_layers = getattr(args, "prenet_layers", 2) + args.prenet_dim = getattr(args, "prenet_dim", 256) + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + args.init_attn_c = getattr(args, "init_attn_c", "zero") + args.attention_use_cumprob = getattr(args, "attention_use_cumprob", True) + args.zoneout = getattr(args, "zoneout", 0.1) + args.decoder_lstm_layers = getattr(args, "decoder_lstm_layers", 2) + args.decoder_lstm_dim = getattr(args, "decoder_lstm_dim", 1024) + args.output_frame_dim = getattr(args, "output_frame_dim", 80) diff --git a/fairseq/fairseq/models/text_to_speech/tts_transformer.py b/fairseq/fairseq/models/text_to_speech/tts_transformer.py new file mode 100644 index 0000000..19afc2b --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/tts_transformer.py @@ -0,0 +1,454 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import List, Optional + +import torch +from torch import nn + +from fairseq import utils +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from fairseq.models.text_to_speech.hub_interface import TTSHubInterface +from fairseq.models.text_to_speech.tacotron2 import Postnet, Prenet +from fairseq.modules import ( + FairseqDropout, + LayerNorm, + PositionalEmbedding, + TransformerDecoderLayer, + TransformerEncoderLayer, +) + +logger = logging.getLogger(__name__) + + +def encoder_init(m): + if isinstance(m, nn.Conv1d): + nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu")) + + +def Embedding(num_embeddings, embedding_dim): + m = nn.Embedding(num_embeddings, embedding_dim) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + return m + + +class TTSTransformerEncoder(FairseqEncoder): + def __init__(self, args, src_dict, embed_speaker): + super().__init__(src_dict) + self.padding_idx = src_dict.pad() + self.embed_speaker = embed_speaker + self.spk_emb_proj = None + if embed_speaker is not None: + self.spk_emb_proj = nn.Linear( + args.encoder_embed_dim + args.speaker_embed_dim, args.encoder_embed_dim + ) + + self.dropout_module = FairseqDropout( + p=args.dropout, module_name=self.__class__.__name__ + ) + self.embed_tokens = nn.Embedding( + len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx + ) + assert args.encoder_conv_kernel_size % 2 == 1 + self.prenet = nn.ModuleList( + nn.Sequential( + nn.Conv1d( + args.encoder_embed_dim, + args.encoder_embed_dim, + kernel_size=args.encoder_conv_kernel_size, + padding=((args.encoder_conv_kernel_size - 1) // 2), + ), + nn.BatchNorm1d(args.encoder_embed_dim), + nn.ReLU(), + nn.Dropout(args.encoder_dropout), + ) + for _ in range(args.encoder_conv_layers) + ) + self.prenet_proj = nn.Linear(args.encoder_embed_dim, args.encoder_embed_dim) + self.embed_positions = PositionalEmbedding( + args.max_source_positions, args.encoder_embed_dim, self.padding_idx + ) + self.pos_emb_alpha = nn.Parameter(torch.ones(1)) + + self.transformer_layers = nn.ModuleList( + TransformerEncoderLayer(args) + for _ in range(args.encoder_transformer_layers) + ) + if args.encoder_normalize_before: + self.layer_norm = LayerNorm(args.encoder_embed_dim) + else: + self.layer_norm = None + + self.apply(encoder_init) + + def forward(self, src_tokens, src_lengths=None, speaker=None, **kwargs): + x = self.embed_tokens(src_tokens) + x = x.transpose(1, 2).contiguous() # B x T x C -> B x C x T + for conv in self.prenet: + x = conv(x) + x = x.transpose(1, 2).contiguous() # B x C x T -> B x T x C + x = self.prenet_proj(x) + + padding_mask = src_tokens.eq(self.padding_idx) + positions = self.embed_positions(padding_mask) + x += self.pos_emb_alpha * positions + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + for layer in self.transformer_layers: + x = layer(x, padding_mask) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + if self.embed_speaker is not None: + seq_len, bsz, _ = x.size() + emb = self.embed_speaker(speaker).transpose(0, 1) + emb = emb.expand(seq_len, bsz, -1) + x = self.spk_emb_proj(torch.cat([x, emb], dim=2)) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask] + if padding_mask.any() + else [], # B x T + "encoder_embedding": [], # B x T x C + "encoder_states": [], # List[T x B x C] + "src_tokens": [], + "src_lengths": [], + } + + +def decoder_init(m): + if isinstance(m, torch.nn.Conv1d): + nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("tanh")) + + +class TTSTransformerDecoder(FairseqIncrementalDecoder): + def __init__(self, args, src_dict, padding_idx=1): + super().__init__(None) + self._future_mask = torch.empty(0) + + self.args = args + self.padding_idx = src_dict.pad() if src_dict else padding_idx + self.n_frames_per_step = args.n_frames_per_step + self.out_dim = args.output_frame_dim * args.n_frames_per_step + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.embed_positions = PositionalEmbedding( + args.max_target_positions, args.decoder_embed_dim, self.padding_idx + ) + self.pos_emb_alpha = nn.Parameter(torch.ones(1)) + self.prenet = nn.Sequential( + Prenet( + self.out_dim, args.prenet_layers, args.prenet_dim, args.prenet_dropout + ), + nn.Linear(args.prenet_dim, args.decoder_embed_dim), + ) + + self.n_transformer_layers = args.decoder_transformer_layers + self.transformer_layers = nn.ModuleList( + TransformerDecoderLayer(args) for _ in range(self.n_transformer_layers) + ) + if args.decoder_normalize_before: + self.layer_norm = LayerNorm(args.decoder_embed_dim) + else: + self.layer_norm = None + + self.feat_proj = nn.Linear(args.decoder_embed_dim, self.out_dim) + self.eos_proj = nn.Linear(args.decoder_embed_dim, 1) + + self.postnet = Postnet( + self.out_dim, + args.postnet_conv_dim, + args.postnet_conv_kernel_size, + args.postnet_layers, + args.postnet_dropout, + ) + + self.ctc_proj = None + if getattr(args, "ctc_weight", 0.0) > 0.0: + self.ctc_proj = nn.Linear(self.out_dim, len(src_dict)) + + self.apply(decoder_init) + + def extract_features( + self, + prev_outputs, + encoder_out=None, + incremental_state=None, + target_lengths=None, + speaker=None, + **kwargs, + ): + alignment_layer = self.n_transformer_layers - 1 + self_attn_padding_mask = lengths_to_padding_mask(target_lengths) + positions = self.embed_positions( + self_attn_padding_mask, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_outputs = prev_outputs[:, -1:, :] + self_attn_padding_mask = self_attn_padding_mask[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + x = self.prenet(prev_outputs) + x += self.pos_emb_alpha * positions + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if not self_attn_padding_mask.any(): + self_attn_padding_mask = None + + attn: Optional[torch.Tensor] = None + inner_states: List[Optional[torch.Tensor]] = [x] + for idx, transformer_layer in enumerate(self.transformer_layers): + if incremental_state is None: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = transformer_layer( + x, + encoder_out["encoder_out"][0] + if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0) + else None, + encoder_out["encoder_padding_mask"][0] + if ( + encoder_out is not None + and len(encoder_out["encoder_padding_mask"]) > 0 + ) + else None, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + # average probabilities over heads, transpose to + # (B, src_len, tgt_len) + attn = attn.mean(dim=0).transpose(2, 1) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, {"attn": attn, "inner_states": inner_states} + + def forward( + self, + prev_output_tokens, + encoder_out=None, + incremental_state=None, + target_lengths=None, + speaker=None, + **kwargs, + ): + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + target_lengths=target_lengths, + speaker=speaker, + **kwargs, + ) + attn = extra["attn"] + feat_out = self.feat_proj(x) + bsz, seq_len, _ = x.size() + eos_out = self.eos_proj(x) + post_feat_out = feat_out + self.postnet(feat_out) + return ( + post_feat_out, + eos_out, + { + "attn": attn, + "feature_out": feat_out, + "inner_states": extra["inner_states"], + }, + ) + + def get_normalized_probs(self, net_output, log_probs, sample): + logits = self.ctc_proj(net_output[2]["feature_out"]) + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + +@register_model("tts_transformer") +class TTSTransformerModel(FairseqEncoderDecoderModel): + """ + Implementation for https://arxiv.org/pdf/1809.08895.pdf + """ + + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/s2" + model_ids = [ + "tts_transformer-en-ljspeech", + "tts_transformer-en-200_speaker-cv4", + "tts_transformer-es-css10", + "tts_transformer-fr-cv7_css10", + "tts_transformer-ru-cv7_css10", + "tts_transformer-zh-cv7_css10", + "tts_transformer-ar-cv7_css10", + "tts_transformer-tr-cv7_css10", + "tts_transformer-vi-cv7", + ] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + config_yaml="config.yaml", + vocoder: str = "griffin_lim", + fp16: bool = False, + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + config_yaml=config_yaml, + vocoder=vocoder, + fp16=fp16, + **kwargs, + ) + return TTSHubInterface(x["args"], x["task"], x["models"][0]) + + @staticmethod + def add_args(parser): + parser.add_argument("--dropout", type=float) + parser.add_argument("--output-frame-dim", type=int) + parser.add_argument("--speaker-embed-dim", type=int) + # encoder prenet + parser.add_argument("--encoder-dropout", type=float) + parser.add_argument("--encoder-conv-layers", type=int) + parser.add_argument("--encoder-conv-kernel-size", type=int) + # encoder transformer layers + parser.add_argument("--encoder-transformer-layers", type=int) + parser.add_argument("--encoder-embed-dim", type=int) + parser.add_argument("--encoder-ffn-embed-dim", type=int) + parser.add_argument("--encoder-normalize-before", action="store_true") + parser.add_argument("--encoder-attention-heads", type=int) + parser.add_argument("--attention-dropout", type=float) + parser.add_argument("--activation-dropout", "--relu-dropout", type=float) + parser.add_argument("--activation-fn", type=str, default="relu") + # decoder prenet + parser.add_argument("--prenet-dropout", type=float) + parser.add_argument("--prenet-layers", type=int) + parser.add_argument("--prenet-dim", type=int) + # decoder postnet + parser.add_argument("--postnet-dropout", type=float) + parser.add_argument("--postnet-layers", type=int) + parser.add_argument("--postnet-conv-dim", type=int) + parser.add_argument("--postnet-conv-kernel-size", type=int) + # decoder transformer layers + parser.add_argument("--decoder-transformer-layers", type=int) + parser.add_argument("--decoder-embed-dim", type=int) + parser.add_argument("--decoder-ffn-embed-dim", type=int) + parser.add_argument("--decoder-normalize-before", action="store_true") + parser.add_argument("--decoder-attention-heads", type=int) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._num_updates = 0 + + @classmethod + def build_model(cls, args, task): + embed_speaker = task.get_speaker_embeddings(args) + encoder = TTSTransformerEncoder(args, task.src_dict, embed_speaker) + decoder = TTSTransformerDecoder(args, task.src_dict) + return cls(encoder, decoder) + + def forward_encoder(self, src_tokens, src_lengths, speaker=None, **kwargs): + return self.encoder( + src_tokens, src_lengths=src_lengths, speaker=speaker, **kwargs + ) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self._num_updates = num_updates + + +@register_model_architecture("tts_transformer", "tts_transformer") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.1) + args.output_frame_dim = getattr(args, "output_frame_dim", 80) + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 64) + # encoder prenet + args.encoder_dropout = getattr(args, "encoder_dropout", 0.5) + args.encoder_conv_layers = getattr(args, "encoder_conv_layers", 3) + args.encoder_conv_kernel_size = getattr(args, "encoder_conv_kernel_size", 5) + # encoder transformer layers + args.encoder_transformer_layers = getattr(args, "encoder_transformer_layers", 6) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr( + args, "encoder_ffn_embed_dim", 4 * args.encoder_embed_dim + ) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + # decoder prenet + args.prenet_dropout = getattr(args, "prenet_dropout", 0.5) + args.prenet_layers = getattr(args, "prenet_layers", 2) + args.prenet_dim = getattr(args, "prenet_dim", 256) + # decoder postnet + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + # decoder transformer layers + args.decoder_transformer_layers = getattr(args, "decoder_transformer_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", 4 * args.decoder_embed_dim + ) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) diff --git a/fairseq/fairseq/models/text_to_speech/vocoder.py b/fairseq/fairseq/models/text_to_speech/vocoder.py new file mode 100644 index 0000000..dbc02da --- /dev/null +++ b/fairseq/fairseq/models/text_to_speech/vocoder.py @@ -0,0 +1,305 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import logging +from typing import Dict + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + +from fairseq.data.audio.audio_utils import ( + TTSSpectrogram, + get_fourier_basis, + get_mel_filters, + get_window, +) +from fairseq.data.audio.speech_to_text_dataset import S2TDataConfig +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.text_to_speech.codehifigan import CodeGenerator as CodeHiFiGANModel +from fairseq.models.text_to_speech.hifigan import Generator as HiFiGANModel +from fairseq.models.text_to_speech.hub_interface import VocoderHubInterface + +logger = logging.getLogger(__name__) + + +class PseudoInverseMelScale(torch.nn.Module): + def __init__(self, n_stft, n_mels, sample_rate, f_min, f_max) -> None: + super(PseudoInverseMelScale, self).__init__() + self.n_mels = n_mels + basis = get_mel_filters(sample_rate, (n_stft - 1) * 2, n_mels, f_min, f_max) + basis = torch.pinverse(basis) # F x F_mel + self.register_buffer("basis", basis) + + def forward(self, melspec: torch.Tensor) -> torch.Tensor: + # pack batch + shape = melspec.shape # B_1 x ... x B_K x F_mel x T + n_mels, time = shape[-2], shape[-1] + melspec = melspec.view(-1, n_mels, time) + + freq, _ = self.basis.size() # F x F_mel + assert self.n_mels == n_mels, (self.n_mels, n_mels) + specgram = self.basis.matmul(melspec).clamp(min=0) + + # unpack batch + specgram = specgram.view(shape[:-2] + (freq, time)) + return specgram + + +class GriffinLim(torch.nn.Module): + def __init__( + self, + n_fft: int, + win_length: int, + hop_length: int, + n_iter: int, + window_fn=torch.hann_window, + ): + super(GriffinLim, self).__init__() + self.transform = TTSSpectrogram( + n_fft, win_length, hop_length, return_phase=True + ) + + basis = get_fourier_basis(n_fft) + basis = torch.pinverse(n_fft / hop_length * basis).T[:, None, :] + basis *= get_window(window_fn, n_fft, win_length) + self.register_buffer("basis", basis) + + self.n_fft = n_fft + self.win_length = win_length + self.hop_length = hop_length + self.n_iter = n_iter + + self.tiny = 1.1754944e-38 + + @classmethod + def get_window_sum_square( + cls, n_frames, hop_length, win_length, n_fft, window_fn=torch.hann_window + ) -> torch.Tensor: + w_sq = get_window(window_fn, n_fft, win_length) ** 2 + n = n_fft + hop_length * (n_frames - 1) + x = torch.zeros(n, dtype=torch.float32) + for i in range(n_frames): + ofst = i * hop_length + x[ofst : min(n, ofst + n_fft)] += w_sq[: max(0, min(n_fft, n - ofst))] + return x + + def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor: + x = torch.cat( + [magnitude * torch.cos(phase), magnitude * torch.sin(phase)], dim=1 + ) + x = F.conv_transpose1d(x, self.basis, stride=self.hop_length) + win_sum_sq = self.get_window_sum_square( + magnitude.shape[-1], + hop_length=self.hop_length, + win_length=self.win_length, + n_fft=self.n_fft, + ).to(magnitude.device) + # remove modulation effects + approx_nonzero_indices = win_sum_sq > self.tiny + x[:, :, approx_nonzero_indices] /= win_sum_sq[approx_nonzero_indices] + x *= self.n_fft / self.hop_length + x = x[:, :, self.n_fft // 2 :] + x = x[:, :, : -self.n_fft // 2 :] + return x + + def forward(self, specgram: torch.Tensor) -> torch.Tensor: + angles = np.angle(np.exp(2j * np.pi * np.random.rand(*specgram.shape))) + angles = torch.from_numpy(angles).to(specgram) + _specgram = specgram.view(-1, specgram.shape[-2], specgram.shape[-1]) + waveform = self.inverse(_specgram, angles).squeeze(1) + for _ in range(self.n_iter): + _, angles = self.transform(waveform) + waveform = self.inverse(_specgram, angles).squeeze(1) + return waveform.squeeze(0) + + +class GriffinLimVocoder(nn.Module): + def __init__( + self, + sample_rate, + win_size, + hop_size, + n_fft, + n_mels, + f_min, + f_max, + window_fn, + spec_bwd_max_iter=32, + fp16=False, + ): + super().__init__() + self.inv_mel_transform = PseudoInverseMelScale( + n_stft=n_fft // 2 + 1, + n_mels=n_mels, + sample_rate=sample_rate, + f_min=f_min, + f_max=f_max, + ) + self.gl_transform = GriffinLim( + n_fft=n_fft, + win_length=win_size, + hop_length=hop_size, + window_fn=window_fn, + n_iter=spec_bwd_max_iter, + ) + if fp16: + self.half() + self.inv_mel_transform.half() + self.gl_transform.half() + else: + self.float() + self.inv_mel_transform.float() + self.gl_transform.float() + + def forward(self, x): + # x: (B x) T x D -> (B x) 1 x T + # NOTE: batched forward produces noisier waveform. recommend running + # one utterance at a time + self.eval() + x = x.exp().transpose(-1, -2) + x = self.inv_mel_transform(x) + x = self.gl_transform(x) + return x + + @classmethod + def from_data_cfg(cls, args, data_cfg: S2TDataConfig): + feat_cfg = data_cfg.config["features"] + window_fn = getattr(torch, feat_cfg["window_fn"] + "_window") + return cls( + sample_rate=feat_cfg["sample_rate"], + win_size=int(feat_cfg["win_len_t"] * feat_cfg["sample_rate"]), + hop_size=int(feat_cfg["hop_len_t"] * feat_cfg["sample_rate"]), + n_fft=feat_cfg["n_fft"], + n_mels=feat_cfg["n_mels"], + f_min=feat_cfg["f_min"], + f_max=feat_cfg["f_max"], + window_fn=window_fn, + spec_bwd_max_iter=args.spec_bwd_max_iter, + fp16=args.fp16, + ) + + +class HiFiGANVocoder(nn.Module): + def __init__( + self, checkpoint_path: str, model_cfg: Dict[str, str], fp16: bool = False + ) -> None: + super().__init__() + self.model = HiFiGANModel(model_cfg) + state_dict = torch.load(checkpoint_path) + self.model.load_state_dict(state_dict["generator"]) + if fp16: + self.model.half() + logger.info(f"loaded HiFiGAN checkpoint from {checkpoint_path}") + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # (B x) T x D -> (B x) 1 x T + model = self.model.eval() + if len(x.shape) == 2: + return model(x.unsqueeze(0).transpose(1, 2)).detach().squeeze(0) + else: + return model(x.transpose(-1, -2)).detach() + + @classmethod + def from_data_cfg(cls, args, data_cfg: S2TDataConfig): + vocoder_cfg = data_cfg.vocoder + assert vocoder_cfg.get("type", "griffin_lim") == "hifigan" + with open(vocoder_cfg["config"]) as f: + model_cfg = json.load(f) + return cls(vocoder_cfg["checkpoint"], model_cfg, fp16=args.fp16) + + +@register_model("CodeHiFiGANVocoder") +class CodeHiFiGANVocoder(BaseFairseqModel): + def __init__( + self, checkpoint_path: str, model_cfg: Dict[str, str], fp16: bool = False + ) -> None: + super().__init__() + self.model = CodeHiFiGANModel(model_cfg) + if torch.cuda.is_available(): + state_dict = torch.load(checkpoint_path) + else: + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + self.model.load_state_dict(state_dict["generator"]) + self.model.eval() + if fp16: + self.model.half() + self.model.remove_weight_norm() + logger.info(f"loaded CodeHiFiGAN checkpoint from {checkpoint_path}") + + def forward(self, x: Dict[str, torch.Tensor], dur_prediction=False) -> torch.Tensor: + assert "code" in x + x["dur_prediction"] = dur_prediction + + # remove invalid code + mask = x["code"] >= 0 + x["code"] = x["code"][mask].unsqueeze(dim=0) + if "f0" in x: + f0_up_ratio = x["f0"].size(1) // x["code"].size(1) + mask = mask.unsqueeze(2).repeat(1, 1, f0_up_ratio).view(-1, x["f0"].size(1)) + x["f0"] = x["f0"][mask].unsqueeze(dim=0) + + return self.model(**x).detach().squeeze() + + @classmethod + def from_data_cfg(cls, args, data_cfg): + vocoder_cfg = data_cfg.vocoder + assert vocoder_cfg is not None, "vocoder not specified in the data config" + with open(vocoder_cfg["config"]) as f: + model_cfg = json.load(f) + return cls(vocoder_cfg["checkpoint"], model_cfg, fp16=args.fp16) + + @classmethod + def hub_models(cls): + base_url = "http://dl.fbaipublicfiles.com/fairseq/vocoder" + model_ids = [ + "unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur", + "unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10_dur", + "unit_hifigan_HK_layer12.km2500_frame_TAT-TTS", + ] + return {i: f"{base_url}/{i}.tar.gz" for i in model_ids} + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + config="config.json", + fp16: bool = False, + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + config_yaml=config, + fp16=fp16, + is_vocoder=True, + **kwargs, + ) + + with open(f"{x['args']['data']}/{config}") as f: + vocoder_cfg = json.load(f) + assert len(x["args"]["model_path"]) == 1, "Too many vocoder models in the input" + + vocoder = CodeHiFiGANVocoder(x["args"]["model_path"][0], vocoder_cfg) + return VocoderHubInterface(vocoder_cfg, vocoder) + + +def get_vocoder(args, data_cfg: S2TDataConfig): + if args.vocoder == "griffin_lim": + return GriffinLimVocoder.from_data_cfg(args, data_cfg) + elif args.vocoder == "hifigan": + return HiFiGANVocoder.from_data_cfg(args, data_cfg) + elif args.vocoder == "code_hifigan": + return CodeHiFiGANVocoder.from_data_cfg(args, data_cfg) + else: + raise ValueError("Unknown vocoder") diff --git a/fairseq/fairseq/models/transformer/__init__.py b/fairseq/fairseq/models/transformer/__init__.py new file mode 100644 index 0000000..681fca3 --- /dev/null +++ b/fairseq/fairseq/models/transformer/__init__.py @@ -0,0 +1,50 @@ +# Copyright (c) Facebook Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +from .transformer_config import ( + TransformerConfig, + DEFAULT_MAX_SOURCE_POSITIONS, + DEFAULT_MAX_TARGET_POSITIONS, + DEFAULT_MIN_PARAMS_TO_WRAP, +) +from .transformer_decoder import TransformerDecoder, TransformerDecoderBase, Linear +from .transformer_encoder import TransformerEncoder, TransformerEncoderBase +from .transformer_legacy import ( + TransformerModel, + base_architecture, + tiny_architecture, + transformer_iwslt_de_en, + transformer_wmt_en_de, + transformer_vaswani_wmt_en_de_big, + transformer_vaswani_wmt_en_fr_big, + transformer_wmt_en_de_big, + transformer_wmt_en_de_big_t2t, +) +from .transformer_base import TransformerModelBase, Embedding + + +__all__ = [ + "TransformerModelBase", + "TransformerConfig", + "TransformerDecoder", + "TransformerDecoderBase", + "TransformerEncoder", + "TransformerEncoderBase", + "TransformerModel", + "Embedding", + "Linear", + "base_architecture", + "tiny_architecture", + "transformer_iwslt_de_en", + "transformer_wmt_en_de", + "transformer_vaswani_wmt_en_de_big", + "transformer_vaswani_wmt_en_fr_big", + "transformer_wmt_en_de_big", + "transformer_wmt_en_de_big_t2t", + "DEFAULT_MAX_SOURCE_POSITIONS", + "DEFAULT_MAX_TARGET_POSITIONS", + "DEFAULT_MIN_PARAMS_TO_WRAP", +] diff --git a/fairseq/fairseq/models/transformer/transformer_base.py b/fairseq/fairseq/models/transformer/transformer_base.py new file mode 100644 index 0000000..f9f097f --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_base.py @@ -0,0 +1,193 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +from torch import Tensor + +import logging + +from fairseq import utils +from fairseq.dataclass.utils import gen_parser_from_dataclass +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqEncoderDecoderModel +from fairseq.models.transformer import ( + TransformerConfig, + TransformerDecoderBase, + TransformerEncoderBase, +) + + +logger = logging.getLogger(__name__) + + +class TransformerModelBase(FairseqEncoderDecoderModel): + """ + Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017) + <https://arxiv.org/abs/1706.03762>`_. + + Args: + encoder (TransformerEncoder): the encoder + decoder (TransformerDecoder): the decoder + + The Transformer model provides the following named architectures and + command-line arguments: + + .. argparse:: + :ref: fairseq.models.transformer_parser + :prog: + """ + + def __init__(self, cfg, encoder, decoder): + super().__init__(encoder, decoder) + self.cfg = cfg + self.supports_align_args = True + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + # we want to build the args recursively in this case. + gen_parser_from_dataclass( + parser, TransformerConfig(), delete_default=False, with_prefix="" + ) + + @classmethod + def build_model(cls, cfg, task): + """Build a new model instance.""" + + # -- TODO T96535332 + # bug caused by interaction between OmegaConf II and argparsing + cfg.decoder.input_dim = int(cfg.decoder.input_dim) + cfg.decoder.output_dim = int(cfg.decoder.output_dim) + # -- + + if cfg.encoder.layers_to_keep: + cfg.encoder.layers = len(cfg.encoder.layers_to_keep.split(",")) + if cfg.decoder.layers_to_keep: + cfg.decoder.layers = len(cfg.decoder.layers_to_keep.split(",")) + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + if cfg.share_all_embeddings: + if src_dict != tgt_dict: + raise ValueError("--share-all-embeddings requires a joined dictionary") + if cfg.encoder.embed_dim != cfg.decoder.embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if cfg.decoder.embed_path and ( + cfg.decoder.embed_path != cfg.encoder.embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + encoder_embed_tokens = cls.build_embedding( + cfg, src_dict, cfg.encoder.embed_dim, cfg.encoder.embed_path + ) + decoder_embed_tokens = encoder_embed_tokens + cfg.share_decoder_input_output_embed = True + elif cfg.merge_src_tgt_embed: + logger.info(f"source dict size: {len(src_dict)}") + logger.info(f"target dict size: {len(tgt_dict)}") + src_dict.update(tgt_dict) + task.src_dict = src_dict + task.tgt_dict = src_dict + logger.info(f"merged dict size: {len(src_dict)}") + encoder_embed_tokens = cls.build_embedding( + cfg, src_dict, cfg.encoder.embed_dim + ) + decoder_embed_tokens = encoder_embed_tokens + cfg.share_decoder_input_output_embed = True + else: + encoder_embed_tokens = cls.build_embedding( + cfg, src_dict, cfg.encoder.embed_dim, cfg.encoder.embed_path + ) + decoder_embed_tokens = cls.build_embedding( + cfg, tgt_dict, cfg.decoder.embed_dim, cfg.decoder.embed_path + ) + if cfg.offload_activations: + cfg.checkpoint_activations = True # offloading implies checkpointing + encoder = cls.build_encoder(cfg, src_dict, encoder_embed_tokens) + decoder = cls.build_decoder(cfg, tgt_dict, decoder_embed_tokens) + return cls(cfg, encoder, decoder) + + @classmethod + def build_embedding(cls, cfg, dictionary, embed_dim, path=None): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + @classmethod + def build_encoder(cls, cfg, src_dict, embed_tokens): + return TransformerEncoderBase(cfg, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, cfg, tgt_dict, embed_tokens): + return TransformerDecoderBase( + cfg, + tgt_dict, + embed_tokens, + no_encoder_attn=cfg.no_cross_attention, + ) + + # TorchScript doesn't support optional arguments with variable length (**kwargs). + # Current workaround is to add union of all arguments in child classes. + def forward( + self, + src_tokens, + src_lengths, + prev_output_tokens, + return_all_hiddens: bool = True, + features_only: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Run the forward pass for an encoder-decoder model. + + Copied from the base class, but without ``**kwargs``, + which are not supported by TorchScript. + """ + encoder_out = self.encoder( + src_tokens, src_lengths=src_lengths, return_all_hiddens=return_all_hiddens + ) + decoder_out = self.decoder( + prev_output_tokens, + encoder_out=encoder_out, + features_only=features_only, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + src_lengths=src_lengths, + return_all_hiddens=return_all_hiddens, + ) + return decoder_out + + # Since get_normalized_probs is in the Fairseq Model which is not scriptable, + # I rewrite the get_normalized_probs from Base Class to call the + # helper function in the Base Class. + @torch.jit.export + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + """Get normalized probabilities (or log probs) from a net's output.""" + return self.get_normalized_probs_scriptable(net_output, log_probs, sample) + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m diff --git a/fairseq/fairseq/models/transformer/transformer_config.py b/fairseq/fairseq/models/transformer/transformer_config.py new file mode 100644 index 0000000..4650de2 --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_config.py @@ -0,0 +1,341 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import re +from dataclasses import dataclass, field, fields +from typing import List, Optional + +from omegaconf import II + +from fairseq import utils +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.utils import safe_getattr, safe_hasattr + +DEFAULT_MAX_SOURCE_POSITIONS = 1024 +DEFAULT_MAX_TARGET_POSITIONS = 1024 + +DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8) + +_NAME_PARSER = r"(decoder|encoder|quant_noise)_(.*)" + + +@dataclass +class EncDecBaseConfig(FairseqDataclass): + embed_path: Optional[str] = field( + default=None, metadata={"help": "path to pre-trained embedding"} + ) + embed_dim: Optional[int] = field( + default=512, metadata={"help": "embedding dimension"} + ) + ffn_embed_dim: int = field( + default=2048, metadata={"help": "embedding dimension for FFN"} + ) + layers: int = field(default=6, metadata={"help": "number of layers"}) + attention_heads: int = field( + default=8, metadata={"help": "number of attention heads"} + ) + normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each block"} + ) + learned_pos: bool = field( + default=False, metadata={"help": "use learned positional embeddings"} + ) + # args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019) + layerdrop: float = field(default=0, metadata={"help": "LayerDrop probability"}) + layers_to_keep: Optional[List[int]] = field( + default=None, metadata={"help": "which layers to *keep* when pruning"} + ) + + xformers_att_config: Optional[str] = field( + default=None, + metadata={ + "help": "config for xFormers attention, defined in xformers.components.attention.AttentionConfig" + }, + ) + + +@dataclass +class DecoderConfig(EncDecBaseConfig): + input_dim: int = II("model.decoder.embed_dim") + output_dim: int = field( + default=II("model.decoder.embed_dim"), + metadata={ + "help": "decoder output dimension (extra linear layer if different from decoder embed dim)" + }, + ) + + def __post_init__(self): + # II doesn't work if we are just creating the object outside of hydra so fix that + if self.input_dim == II("model.decoder.embed_dim"): + self.input_dim = self.embed_dim + if self.output_dim == II("model.decoder.embed_dim"): + self.output_dim = self.embed_dim + + +@dataclass +class QuantNoiseConfig(FairseqDataclass): + pq: float = field( + default=0.0, + metadata={"help": "iterative PQ quantization noise at training time"}, + ) + pq_block_size: int = field( + default=8, + metadata={"help": "block size of quantization noise at training time"}, + ) + scalar: float = field( + default=0.0, + metadata={ + "help": "scalar quantization noise and scalar quantization at training time" + }, + ) + + +@dataclass +class TransformerConfig(FairseqDataclass): + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="relu", + metadata={"help": "activation function to use"}, + ) + dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) + attention_dropout: float = field( + default=0.0, metadata={"help": "dropout probability for attention weights"} + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN.", + "alias": "--relu-dropout", + }, + ) + adaptive_input: bool = False + encoder: EncDecBaseConfig = EncDecBaseConfig() + # TODO should really be in the encoder config + max_source_positions: int = field( + default=DEFAULT_MAX_SOURCE_POSITIONS, + metadata={"help": "Maximum input length supported by the encoder"}, + ) + decoder: DecoderConfig = DecoderConfig() + # TODO should really be in the decoder config + max_target_positions: int = field( + default=DEFAULT_MAX_TARGET_POSITIONS, + metadata={"help": "Maximum output length supported by the decoder"}, + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + share_all_embeddings: bool = field( + default=False, + metadata={ + "help": "share encoder, decoder and output embeddings (requires shared dictionary and embed dim)" + }, + ) + merge_src_tgt_embed: bool = field( + default=False, + metadata={ + "help": "if true then the source and target embedding table is " + "merged into one table. This is going to make the model smaller but " + "it might hurt performance." + }, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if True, disables positional embeddings (outside self attention)" + }, + ) + adaptive_softmax_cutoff: Optional[List[int]] = field( + default=None, + metadata={ + "help": "list of adaptive softmax cutoff points. Must be used with adaptive_loss criterion" + }, + ) + adaptive_softmax_dropout: float = field( + default=0.0, + metadata={"help": "sets adaptive softmax dropout for the tail projections"}, + ) + adaptive_softmax_factor: float = field( + default=4, metadata={"help": "adaptive input factor"} + ) + layernorm_embedding: bool = field( + default=False, metadata={"help": "add layernorm to embedding"} + ) + tie_adaptive_weights: bool = field( + default=False, + metadata={ + "help": "if set, ties the weights of adaptive softmax and adaptive input" + }, + ) + tie_adaptive_proj: bool = field( + default=False, + metadata={ + "help": "if set, ties the projection weights of adaptive softmax and adaptive input" + }, + ) + no_scale_embedding: bool = field( + default=False, metadata={"help": "if True, dont scale embeddings"} + ) + checkpoint_activations: bool = field( + default=False, + metadata={ + "help": "checkpoint activations at each layer, which saves GPU memory usage at the cost of some additional compute" + }, + ) + offload_activations: bool = field( + default=False, + metadata={ + "help": "checkpoint activations at each layer, then save to gpu. Sets --checkpoint-activations." + }, + ) + # args for "Cross+Self-Attention for Transformer Models" (Peitz et al., 2019) + no_cross_attention: bool = field( + default=False, metadata={"help": "do not perform cross-attention"} + ) + cross_self_attention: bool = field( + default=False, metadata={"help": "perform cross+self-attention"} + ) + # args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020) + quant_noise: QuantNoiseConfig = field(default=QuantNoiseConfig()) + min_params_to_wrap: int = field( + default=DEFAULT_MIN_PARAMS_TO_WRAP, + metadata={ + "help": "minimum number of params for a layer to be wrapped with FSDP() when " + "training with --ddp-backend=fully_sharded. Smaller values will " + "improve memory efficiency, but may make torch.distributed " + "communication less efficient due to smaller input sizes. This option " + "is set to 0 (i.e., always wrap) when --checkpoint-activations or " + "--offload-activations are passed." + }, + ) + # DEPRECATED field, but some old checkpoints might have it + char_inputs: bool = field( + default=False, metadata={"help": "if set, model takes character ids as input"} + ) + relu_dropout: float = 0.0 + # config for "BASE Layers: Simplifying Training of Large, Sparse Models" + base_layers: Optional[int] = field( + default=0, metadata={"help": "number of BASE layers in total"} + ) + base_sublayers: Optional[int] = field( + default=1, metadata={"help": "number of sublayers in each BASE layer"} + ) + base_shuffle: Optional[int] = field( + default=1, + metadata={"help": "shuffle tokens between workers before computing assignment"}, + ) + + export: bool = field( + default=False, + metadata={"help": "make the layernorm exportable with torchscript."}, + ) + + # copied from transformer_lm but expected in transformer_decoder: + no_decoder_final_norm: bool = field( + default=False, + metadata={"help": "don't add an extra layernorm after the last decoder block"}, + ) + + # We need to make this hierarchical dataclass like the flat namespace + # __getattr__ and __setattr__ here allow backward compatibility + # for subclasses of Transformer(Legacy) that depend on read/write on + # the flat namespace. + + def __getattr__(self, name): + match = re.match(_NAME_PARSER, name) + if match: + sub = safe_getattr(self, match[1]) + return safe_getattr(sub, match[2]) + raise AttributeError(f"invalid argument {name}.") + + def __setattr__(self, name, value): + match = re.match(_NAME_PARSER, name) + if match: + sub = safe_getattr(self, match[1]) + setattr(sub, match[2], value) + else: + super().__setattr__(name, value) + + @staticmethod + def _copy_keys(args, cls, prefix, seen): + """ + copy the prefixed keys (decoder_embed_dim) to the DC fields: decoder.embed_dim + """ + cfg = cls() + for fld in fields(cls): + # for all the fields in the DC, find the fields (e.g. embed_dim) + # in the namespace with the prefix (e.g. decoder) + # and set it on the dc. + args_key = f"{prefix}_{fld.name}" + if safe_hasattr(args, args_key): + seen.add(args_key) + setattr(cfg, fld.name, safe_getattr(args, args_key)) + if safe_hasattr(args, fld.name): + seen.add(fld.name) + setattr(cfg, fld.name, safe_getattr(args, fld.name)) + return cfg + + @classmethod + def from_namespace(cls, args): + if args is None: + return None + if not isinstance(args, cls): + seen = set() + config = cls() + # currently, we can go generically from DC fields to args hierarchically + # but we can't easily deconstruct a flat namespace to a hierarchical + # DC. Mostly because we could have a sub-dc called `decoder-foo` that should not + # go to the sub struct called `decoder`. There are ways to go around this, but let's keep it simple + # for now. + for fld in fields(cls): + # concretelly, the transformer_config know what sub-dc it has, so we go through all the dc fields + # and if it's one that has a sub-dc, we build that sub-dc with `copy_keys()` + if fld.name == "decoder": + if safe_hasattr(args, "decoder"): + # in some cases, the args we receive is already structured (as DictConfigs), so let's just build the correct DC + seen.add("decoder") + config.decoder = DecoderConfig(**args.decoder) + else: + config.decoder = cls._copy_keys( + args, DecoderConfig, "decoder", seen + ) + elif fld.name == "encoder": + # same but for encoder + if safe_hasattr(args, "encoder"): + seen.add("encoder") + config.encoder = EncDecBaseConfig(**args.encoder) + else: + config.encoder = cls._copy_keys( + args, EncDecBaseConfig, "encoder", seen + ) + elif fld.name == "quant_noise": + # same but for quant_noise + if safe_hasattr(args, "quant_noise"): + seen.add("quant_noise") + config.quant_noise = QuantNoiseConfig(**args.quant_noise) + else: + config.quant_noise = cls._copy_keys( + args, QuantNoiseConfig, "quant_noise", seen + ) + elif safe_hasattr(args, fld.name): + # if it's not a structure field, it's just a normal field, copy it over + seen.add(fld.name) + setattr(config, fld.name, safe_getattr(args, fld.name)) + # we got all the fields defined in the dataclass, but + # the argparse namespace might have extra args for two reasons: + # - we are in a legacy class so all the args are not declared in the dataclass. Ideally once everyone has defined a dataclass for their model, we won't need this + # - some places expect args to be there but never define them + args_dict = ( + args._asdict() + if safe_hasattr(args, "_asdict") + else vars(args) + if safe_hasattr(args, "__dict__") + else {} + ) # namedtupled doesn't have __dict__ :-/ + for key, value in args_dict.items(): + if key not in seen: + setattr(config, key, value) + return config + else: + return args diff --git a/fairseq/fairseq/models/transformer/transformer_decoder.py b/fairseq/fairseq/models/transformer/transformer_decoder.py new file mode 100644 index 0000000..744c73f --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_decoder.py @@ -0,0 +1,474 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, + transformer_layer, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ + + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerDecoderBase": + return "TransformerDecoder" + else: + return module_name + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + cfg (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim**-0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # Prevent torchscript exporting issue for dynamic quant embedding + prev_output_tokens = prev_output_tokens.contiguous() + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) diff --git a/fairseq/fairseq/models/transformer/transformer_decoder_aug.py b/fairseq/fairseq/models/transformer/transformer_decoder_aug.py new file mode 100644 index 0000000..b73c06e --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_decoder_aug.py @@ -0,0 +1,384 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models.transformer import TransformerConfig +from fairseq.models.transformer.transformer_decoder import TransformerDecoderBase +from fairseq.modules import ( + LayerDropModuleList, + SinusoidalPositionalEmbedding, + transformer_layer_aug, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper + + +class AugTransformerDecoderBase(TransformerDecoderBase): + """ + Transformer decoder augmented with an additional cross-attention. Each layer + is a :class:`AugTransformerDecoderLayerBase`. + + Args: + cfg (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + encoder_attn_merge_type (str, optional): the way to combine outputs from + two cross-attention modules. If "sequential" is set, two cross-attention + modules are stacked sequentially. If "parallel" is set, they are processed + in parallel and combined before feeding it to FFN (default: sequential). + dropnet_ratio (float, optional): a probability to drop each cross-attention + module during training (default: 0.0). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + output_projection=None, + encoder_attn_merge_type="sequential", + dropnet_ratio=0.0, + ): + super().__init__( + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=output_projection, + ) + # assert cfg.cross_self_attention + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_decoder_layer(cfg, encoder_attn_merge_type, dropnet_ratio) + for _ in range(cfg.decoder.layers) + ] + ) + + def build_decoder_layer( + self, + cfg, + encoder_attn_merge_type="sequential", + dropnet_ratio=0, + ): + layer = transformer_layer_aug.AugTransformerDecoderLayerBase( + cfg, + no_encoder_attn=False, + encoder_attn_merge_type=encoder_attn_merge_type, + dropnet_ratio=dropnet_ratio, + ) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + encoder_out_aug: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + encoder_out_aug=encoder_out_aug, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + encoder_out_aug: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + encoder_out_aug, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + encoder_out_aug: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + enc_aug: Optional[Tensor] = None + padding_mask_aug: Optional[Tensor] = None + if encoder_out_aug is not None and len(encoder_out_aug["encoder_out"]) > 0: + enc_aug = encoder_out_aug["encoder_out"][0] + if ( + encoder_out_aug is not None + and len(encoder_out_aug["encoder_padding_mask"]) > 0 + ): + padding_mask_aug = encoder_out_aug["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # Prevent torchscript exporting issue for dynamic quant embedding + prev_output_tokens = prev_output_tokens.contiguous() + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + attn_aug: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, layer_attn_aug, _ = layer( + x, + enc, + padding_mask, + enc_aug, + padding_mask_aug, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + if layer_attn_aug is not None and idx == alignment_layer: + attn_aug = layer_attn_aug.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if attn_aug is not None: + if alignment_heads is not None: + attn_aug = attn_aug[:alignment_heads] + + # average probabilities over heads + attn_aug = attn_aug.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "attn_aug": [attn_aug], "inner_states": inner_states} + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "encoder_attn_layer_norm2", + "3": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +class AugTransformerDecoder(AugTransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=output_projection, + encoder_attn_merge_type=getattr( + args, "synthesizer_augmented_cross_attention_merge_type", "sequential" + ), + dropnet_ratio=getattr(args, "dropnet_ratio", 0), + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer( + self, + args, + encoder_attn_merge_type="sequential", + dropnet_ratio=0, + ): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), + no_encoder_attn=False, + encoder_attn_merge_type=encoder_attn_merge_type, + dropnet_ratio=dropnet_ratio, + ) diff --git a/fairseq/fairseq/models/transformer/transformer_encoder.py b/fairseq/fairseq/models/transformer/transformer_encoder.py new file mode 100644 index 0000000..a684fcb --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_encoder.py @@ -0,0 +1,362 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqEncoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, + transformer_layer, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ + + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerEncoderBase": + return "TransformerEncoder" + else: + return module_name + + +class TransformerEncoderBase(FairseqEncoder): + """ + Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, cfg, dictionary, embed_tokens, return_fc=False): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.encoder_layerdrop = cfg.encoder.layerdrop + self.return_fc = return_fc + + embed_dim = embed_tokens.embedding_dim + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = cfg.max_source_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_source_positions, + embed_dim, + self.padding_idx, + learned=cfg.encoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + if self.encoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.encoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] + ) + self.num_layers = len(self.layers) + + if cfg.encoder.normalize_before: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + def build_encoder_layer(self, cfg): + layer = transformer_layer.TransformerEncoderLayerBase( + cfg, return_fc=self.return_fc + ) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward_embedding( + self, src_tokens, token_embedding: Optional[torch.Tensor] = None + ): + # embed tokens and positions + if token_embedding is None: + token_embedding = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * token_embedding + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + if self.quant_noise is not None: + x = self.quant_noise(x) + return x, embed + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, src_lengths, return_all_hiddens, token_embeddings + ) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = ( + torch.tensor(src_tokens.device.type == "xla") or encoder_padding_mask.any() + ) + # Torchscript doesn't handle bool Tensor correctly, so we need to work around. + if torch.jit.is_scripting(): + has_pads = torch.tensor(1) if has_pads else torch.tensor(0) + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + x = x * ( + 1 - encoder_padding_mask.unsqueeze(-1).type_as(x) * has_pads.type_as(x) + ) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_states = [] + fc_results = [] + + if return_all_hiddens: + encoder_states.append(x) + + # encoder layers + for layer in self.layers: + lr = layer( + x, encoder_padding_mask=encoder_padding_mask if has_pads else None + ) + + if isinstance(lr, tuple) and len(lr) == 2: + x, fc_result = lr + else: + x = lr + fc_result = None + + if return_all_hiddens and not torch.jit.is_scripting(): + assert encoder_states is not None + encoder_states.append(x) + fc_results.append(fc_result) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "fc_results": fc_results, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + encoder_out["encoder_embedding"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["src_lengths"]) == 0: + src_lengths = [] + else: + src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "src_lengths": src_lengths, # B x 1 + } + + @torch.jit.export + def _reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """Dummy re-order function for beamable enc-dec attention""" + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + for i in range(self.num_layers): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerEncoder(TransformerEncoderBase): + def __init__(self, args, dictionary, embed_tokens, return_fc=False): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + return_fc=return_fc, + ) + + def build_encoder_layer(self, args): + return super().build_encoder_layer( + TransformerConfig.from_namespace(args), + ) diff --git a/fairseq/fairseq/models/transformer/transformer_legacy.py b/fairseq/fairseq/models/transformer/transformer_legacy.py new file mode 100644 index 0000000..00d14a7 --- /dev/null +++ b/fairseq/fairseq/models/transformer/transformer_legacy.py @@ -0,0 +1,277 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.dataclass.utils import gen_parser_from_dataclass +from fairseq.models import ( + register_model, + register_model_architecture, +) +from fairseq.models.transformer.transformer_config import ( + TransformerConfig, + DEFAULT_MAX_SOURCE_POSITIONS, + DEFAULT_MAX_TARGET_POSITIONS, + DEFAULT_MIN_PARAMS_TO_WRAP, +) +from fairseq.models.transformer.transformer_base import ( + TransformerModelBase, +) + + +@register_model("transformer") +class TransformerModel(TransformerModelBase): + """ + This is the legacy implementation of the transformer model that + uses argparse for configuration. + """ + + @classmethod + def hub_models(cls): + # fmt: off + + def moses_subword(path): + return { + 'path': path, + 'tokenizer': 'moses', + 'bpe': 'subword_nmt', + } + + def moses_fastbpe(path): + return { + 'path': path, + 'tokenizer': 'moses', + 'bpe': 'fastbpe', + } + + def spm(path): + return { + 'path': path, + 'bpe': 'sentencepiece', + 'tokenizer': 'space', + } + + return { + 'transformer.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2'), + 'transformer.wmt16.en-de': 'https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2', + 'transformer.wmt18.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz'), + 'transformer.wmt19.en-de': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz'), + 'transformer.wmt19.en-ru': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz'), + 'transformer.wmt19.de-en': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz'), + 'transformer.wmt19.ru-en': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz'), + 'transformer.wmt19.en-de.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.single_model.tar.gz'), + 'transformer.wmt19.en-ru.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.single_model.tar.gz'), + 'transformer.wmt19.de-en.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.single_model.tar.gz'), + 'transformer.wmt19.ru-en.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.single_model.tar.gz'), + 'transformer.wmt20.en-ta': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-ta.single.tar.gz'), + 'transformer.wmt20.en-iu.news': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.news.single.tar.gz'), + 'transformer.wmt20.en-iu.nh': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.nh.single.tar.gz'), + 'transformer.wmt20.ta-en': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta-en.single.tar.gz'), + 'transformer.wmt20.iu-en.news': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.news.single.tar.gz'), + 'transformer.wmt20.iu-en.nh': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.nh.single.tar.gz'), + 'transformer.flores101.mm100.615M': spm('https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz'), + 'transformer.flores101.mm100.175M': spm('https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_175M.tar.gz'), + } + # fmt: on + + def __init__(self, args, encoder, decoder): + cfg = TransformerConfig.from_namespace(args) + super().__init__(cfg, encoder, decoder) + self.args = args + + @classmethod + def add_args(cls, parser): + """Add model-specific arguments to the parser.""" + # we want to build the args recursively in this case. + # do not set defaults so that settings defaults from various architectures still works + gen_parser_from_dataclass( + parser, TransformerConfig(), delete_default=True, with_prefix="" + ) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + if args.encoder_layers_to_keep: + args.encoder_layers = len(args.encoder_layers_to_keep.split(",")) + if args.decoder_layers_to_keep: + args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) + + if getattr(args, "max_source_positions", None) is None: + args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + if args.share_all_embeddings: + if src_dict != tgt_dict: + raise ValueError("--share-all-embeddings requires a joined dictionary") + if args.encoder_embed_dim != args.decoder_embed_dim: + raise ValueError( + "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim" + ) + if args.decoder_embed_path and ( + args.decoder_embed_path != args.encoder_embed_path + ): + raise ValueError( + "--share-all-embeddings not compatible with --decoder-embed-path" + ) + args.share_decoder_input_output_embed = True + + if getattr(args, "offload_activations", False): + args.checkpoint_activations = True # offloading implies checkpointing + + if not args.share_all_embeddings: + args.min_params_to_wrap = getattr( + args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP + ) + cfg = TransformerConfig.from_namespace(args) + return super().build_model(cfg, task) + + @classmethod + def build_embedding(cls, args, dictionary, embed_dim, path=None): + return super().build_embedding( + TransformerConfig.from_namespace(args), dictionary, embed_dim, path + ) + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return super().build_encoder( + TransformerConfig.from_namespace(args), src_dict, embed_tokens + ) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return super().build_decoder( + TransformerConfig.from_namespace(args), tgt_dict, embed_tokens + ) + + +# architectures + + +@register_model_architecture("transformer", "transformer_tiny") +def tiny_architecture(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 64) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 64) + args.encoder_layers = getattr(args, "encoder_layers", 2) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2) + args.decoder_layers = getattr(args, "decoder_layers", 2) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2) + return base_architecture(args) + + +@register_model_architecture("transformer", "transformer") +def base_architecture(args): + args.encoder_embed_path = getattr(args, "encoder_embed_path", None) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False) + + args.decoder_embed_path = getattr(args, "decoder_embed_path", None) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.activation_fn = getattr(args, "activation_fn", "relu") + args.dropout = getattr(args, "dropout", 0.1) + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.share_decoder_input_output_embed = getattr( + args, "share_decoder_input_output_embed", False + ) + args.share_all_embeddings = getattr(args, "share_all_embeddings", False) + args.merge_src_tgt_embed = getattr(args, "merge_src_tgt_embed", False) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.no_cross_attention = getattr(args, "no_cross_attention", False) + args.cross_self_attention = getattr(args, "cross_self_attention", False) + + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + + args.no_scale_embedding = getattr(args, "no_scale_embedding", False) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False) + args.checkpoint_activations = getattr(args, "checkpoint_activations", False) + args.offload_activations = getattr(args, "offload_activations", False) + if args.offload_activations: + args.checkpoint_activations = True + args.encoder_layers_to_keep = getattr(args, "encoder_layers_to_keep", None) + args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8) + args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0) + + +@register_model_architecture("transformer", "transformer_iwslt_de_en") +def transformer_iwslt_de_en(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4) + args.decoder_layers = getattr(args, "decoder_layers", 6) + base_architecture(args) + + +@register_model_architecture("transformer", "transformer_wmt_en_de") +def transformer_wmt_en_de(args): + base_architecture(args) + + +# parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017) +@register_model_architecture("transformer", "transformer_vaswani_wmt_en_de_big") +def transformer_vaswani_wmt_en_de_big(args): + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.3) + base_architecture(args) + + +@register_model_architecture("transformer", "transformer_vaswani_wmt_en_fr_big") +def transformer_vaswani_wmt_en_fr_big(args): + args.dropout = getattr(args, "dropout", 0.1) + transformer_vaswani_wmt_en_de_big(args) + + +@register_model_architecture("transformer", "transformer_wmt_en_de_big") +def transformer_wmt_en_de_big(args): + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + transformer_vaswani_wmt_en_de_big(args) + + +# default parameters used in tensor2tensor implementation +@register_model_architecture("transformer", "transformer_wmt_en_de_big_t2t") +def transformer_wmt_en_de_big_t2t(args): + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_dropout = getattr(args, "activation_dropout", 0.1) + transformer_vaswani_wmt_en_de_big(args) diff --git a/fairseq/fairseq/models/transformer_align.py b/fairseq/fairseq/models/transformer_align.py new file mode 100644 index 0000000..eaf585b --- /dev/null +++ b/fairseq/fairseq/models/transformer_align.py @@ -0,0 +1,93 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer import ( + TransformerModel, + base_architecture, + transformer_wmt_en_de_big, +) + + +@register_model("transformer_align") +class TransformerAlignModel(TransformerModel): + """ + See "Jointly Learning to Align and Translate with Transformer + Models" (Garg et al., EMNLP 2019). + """ + + def __init__(self, encoder, decoder, args): + super().__init__(args, encoder, decoder) + self.alignment_heads = args.alignment_heads + self.alignment_layer = args.alignment_layer + self.full_context_alignment = args.full_context_alignment + + @staticmethod + def add_args(parser): + # fmt: off + super(TransformerAlignModel, TransformerAlignModel).add_args(parser) + parser.add_argument('--alignment-heads', type=int, metavar='D', + help='Number of cross attention heads per layer to supervised with alignments') + parser.add_argument('--alignment-layer', type=int, metavar='D', + help='Layer number which has to be supervised. 0 corresponding to the bottommost layer.') + parser.add_argument('--full-context-alignment', action='store_true', + help='Whether or not alignment is supervised conditioned on the full target context.') + # fmt: on + + @classmethod + def build_model(cls, args, task): + # set any default arguments + transformer_align(args) + + transformer_model = TransformerModel.build_model(args, task) + return TransformerAlignModel( + transformer_model.encoder, transformer_model.decoder, args + ) + + def forward(self, src_tokens, src_lengths, prev_output_tokens): + encoder_out = self.encoder(src_tokens, src_lengths) + return self.forward_decoder(prev_output_tokens, encoder_out) + + def forward_decoder( + self, + prev_output_tokens, + encoder_out=None, + incremental_state=None, + features_only=False, + **extra_args, + ): + attn_args = { + "alignment_layer": self.alignment_layer, + "alignment_heads": self.alignment_heads, + } + decoder_out = self.decoder(prev_output_tokens, encoder_out, **attn_args) + + if self.full_context_alignment: + attn_args["full_context_alignment"] = self.full_context_alignment + _, alignment_out = self.decoder( + prev_output_tokens, + encoder_out, + features_only=True, + **attn_args, + **extra_args, + ) + decoder_out[1]["attn"] = alignment_out["attn"] + + return decoder_out + + +@register_model_architecture("transformer_align", "transformer_align") +def transformer_align(args): + args.alignment_heads = getattr(args, "alignment_heads", 1) + args.alignment_layer = getattr(args, "alignment_layer", 4) + args.full_context_alignment = getattr(args, "full_context_alignment", False) + base_architecture(args) + + +@register_model_architecture("transformer_align", "transformer_wmt_en_de_big_align") +def transformer_wmt_en_de_big_align(args): + args.alignment_heads = getattr(args, "alignment_heads", 1) + args.alignment_layer = getattr(args, "alignment_layer", 4) + transformer_wmt_en_de_big(args) diff --git a/fairseq/fairseq/models/transformer_from_pretrained_xlm.py b/fairseq/fairseq/models/transformer_from_pretrained_xlm.py new file mode 100644 index 0000000..236d994 --- /dev/null +++ b/fairseq/fairseq/models/transformer_from_pretrained_xlm.py @@ -0,0 +1,152 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from typing import Any, Dict + +from fairseq import checkpoint_utils +from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary +from fairseq.models import register_model, register_model_architecture +from fairseq.models.transformer import ( + TransformerDecoder, + TransformerEncoder, + TransformerModel, + base_architecture as transformer_base_architecture, +) + + +@register_model("transformer_from_pretrained_xlm") +class TransformerFromPretrainedXLMModel(TransformerModel): + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + TransformerModel.add_args(parser) + parser.add_argument( + "--pretrained-xlm-checkpoint", + type=str, + metavar="STR", + help="XLM model to use for initializing transformer encoder and/or decoder", + ) + parser.add_argument( + "--init-encoder-only", + action="store_true", + help="if set, don't load the XLM weights and embeddings into decoder", + ) + parser.add_argument( + "--init-decoder-only", + action="store_true", + help="if set, don't load the XLM weights and embeddings into encoder", + ) + + @classmethod + def build_model(self, args, task, cls_dictionary=MaskedLMDictionary): + assert hasattr(args, "pretrained_xlm_checkpoint"), ( + "You must specify a path for --pretrained-xlm-checkpoint to use " + "--arch transformer_from_pretrained_xlm" + ) + assert isinstance(task.source_dictionary, cls_dictionary) and isinstance( + task.target_dictionary, cls_dictionary + ), ( + "You should use a MaskedLMDictionary when using --arch " + "transformer_from_pretrained_xlm because the pretrained XLM model " + "was trained using data binarized with MaskedLMDictionary. " + "For translation, you may want to use --task " + "translation_from_pretrained_xlm" + ) + assert not ( + getattr(args, "init_encoder_only", False) + and getattr(args, "init_decoder_only", False) + ), "Only one of --init-encoder-only and --init-decoder-only can be set." + return super().build_model(args, task) + + @classmethod + def build_encoder(cls, args, src_dict, embed_tokens): + return TransformerEncoderFromPretrainedXLM(args, src_dict, embed_tokens) + + @classmethod + def build_decoder(cls, args, tgt_dict, embed_tokens): + return TransformerDecoderFromPretrainedXLM(args, tgt_dict, embed_tokens) + + +def upgrade_state_dict_with_xlm_weights( + state_dict: Dict[str, Any], pretrained_xlm_checkpoint: str +) -> Dict[str, Any]: + """ + Load XLM weights into a Transformer encoder or decoder model. + + Args: + state_dict: state dict for either TransformerEncoder or + TransformerDecoder + pretrained_xlm_checkpoint: checkpoint to load XLM weights from + + Raises: + AssertionError: If architecture (num layers, attention heads, etc.) + does not match between the current Transformer encoder or + decoder and the pretrained_xlm_checkpoint + """ + if not os.path.exists(pretrained_xlm_checkpoint): + raise IOError("Model file not found: {}".format(pretrained_xlm_checkpoint)) + + state = checkpoint_utils.load_checkpoint_to_cpu(pretrained_xlm_checkpoint) + xlm_state_dict = state["model"] + for key in xlm_state_dict.keys(): + + for search_key in ["embed_tokens", "embed_positions", "layers"]: + if search_key in key: + subkey = key[key.find(search_key) :] + assert subkey in state_dict, ( + "{} Transformer encoder / decoder " + "state_dict does not contain {}. Cannot " + "load {} from pretrained XLM checkpoint " + "{} into Transformer.".format( + str(state_dict.keys()), subkey, key, pretrained_xlm_checkpoint + ) + ) + + state_dict[subkey] = xlm_state_dict[key] + return state_dict + + +class TransformerEncoderFromPretrainedXLM(TransformerEncoder): + def __init__(self, args, dictionary, embed_tokens): + super().__init__(args, dictionary, embed_tokens) + if getattr(args, "init_decoder_only", False): + # Don't load XLM weights for encoder if --init-decoder-only + return + + assert hasattr(args, "pretrained_xlm_checkpoint"), ( + "--pretrained-xlm-checkpoint must be specified to load Transformer " + "encoder from pretrained XLM" + ) + xlm_loaded_state_dict = upgrade_state_dict_with_xlm_weights( + state_dict=self.state_dict(), + pretrained_xlm_checkpoint=args.pretrained_xlm_checkpoint, + ) + self.load_state_dict(xlm_loaded_state_dict, strict=True) + + +class TransformerDecoderFromPretrainedXLM(TransformerDecoder): + def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__(args, dictionary, embed_tokens, no_encoder_attn) + if getattr(args, "init_encoder_only", False): + # Don't load XLM weights for decoder if --init-encoder-only + return + assert hasattr(args, "pretrained_xlm_checkpoint"), ( + "--pretrained-xlm-checkpoint must be specified to load Transformer " + "decoder from pretrained XLM" + ) + + xlm_loaded_state_dict = upgrade_state_dict_with_xlm_weights( + state_dict=self.state_dict(), + pretrained_xlm_checkpoint=args.pretrained_xlm_checkpoint, + ) + self.load_state_dict(xlm_loaded_state_dict, strict=True) + + +@register_model_architecture( + "transformer_from_pretrained_xlm", "transformer_from_pretrained_xlm" +) +def base_architecture(args): + transformer_base_architecture(args) diff --git a/fairseq/fairseq/models/transformer_lm.py b/fairseq/fairseq/models/transformer_lm.py new file mode 100644 index 0000000..1e3aa72 --- /dev/null +++ b/fairseq/fairseq/models/transformer_lm.py @@ -0,0 +1,607 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from dataclasses import dataclass, field +from typing import Optional + +from omegaconf import II + +from fairseq import options, utils +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.models.transformer import ( + DEFAULT_MIN_PARAMS_TO_WRAP, + Embedding, + TransformerDecoder, +) +from fairseq.modules import AdaptiveInput, CharacterTokenEmbedder +from fairseq.utils import safe_getattr, safe_hasattr + +DEFAULT_MAX_TARGET_POSITIONS = 1024 + + +@dataclass +class TransformerLanguageModelConfig(FairseqDataclass): + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="relu", metadata={"help": "activation function to use"} + ) + dropout: float = field(default=0.1, metadata={"help": "dropout probability"}) + attention_dropout: float = field( + default=0.0, metadata={"help": "dropout probability for attention weights"} + ) + activation_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN."} + ) + relu_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN."} + ) + decoder_embed_dim: int = field( + default=512, metadata={"help": "decoder embedding dimension"} + ) + decoder_output_dim: int = field( + default=512, metadata={"help": "decoder output dimension"} + ) + decoder_input_dim: int = field( + default=512, metadata={"help": "decoder input dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=2048, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num decoder layers"}) + decoder_attention_heads: int = field( + default=8, metadata={"help": "num decoder attention heads"} + ) + decoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each decoder block"} + ) + no_decoder_final_norm: bool = field( + default=False, + metadata={"help": "don't add an extra layernorm after the last decoder block"}, + ) + adaptive_softmax_cutoff: Optional[str] = field( + default=None, + metadata={ + "help": "comma separated list of adaptive softmax cutoff points. " + "Must be used with adaptive_loss criterion" + }, + ) + adaptive_softmax_dropout: float = field( + default=0, + metadata={"help": "sets adaptive softmax dropout for the tail projections"}, + ) + adaptive_softmax_factor: float = field( + default=4, metadata={"help": "adaptive input factor"} + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings (outside self attention)" + }, + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + character_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, uses character embedding convolutions to produce token embeddings" + }, + ) + character_filters: str = field( + default="[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]", + metadata={"help": "size of character embeddings"}, + ) + character_embedding_dim: int = field( + default=4, metadata={"help": "size of character embeddings"} + ) + char_embedder_highway_layers: int = field( + default=2, + metadata={"help": "number of highway layers for character token embeddder"}, + ) + adaptive_input: bool = field( + default=False, metadata={"help": "if set, uses adaptive input"} + ) + adaptive_input_factor: float = field( + default=4, metadata={"help": "adaptive input factor"} + ) + adaptive_input_cutoff: Optional[str] = field( + default=None, + metadata={"help": "comma separated list of adaptive input cutoff points."}, + ) + tie_adaptive_weights: bool = field( + default=False, + metadata={ + "help": "if set, ties the weights of adaptive softmax and adaptive input" + }, + ) + tie_adaptive_proj: bool = field( + default=False, + metadata={ + "help": "if set, ties the projection weights of adaptive softmax and adaptive input" + }, + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + layernorm_embedding: bool = field( + default=False, metadata={"help": "add layernorm to embedding"} + ) + no_scale_embedding: bool = field( + default=False, metadata={"help": "if True, dont scale embeddings"} + ) + checkpoint_activations: bool = field( + default=False, metadata={"help": "checkpoint activations at each layer"} + ) + offload_activations: bool = field( + default=False, + metadata={"help": "move checkpointed activations to CPU after they are used."}, + ) + # config for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "LayerDrop probability for decoder"} + ) + decoder_layers_to_keep: Optional[str] = field( + default=None, + metadata={ + "help": "which layers to *keep* when pruning as a comma-separated list" + }, + ) + # config for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020) + quant_noise_pq: float = field( + default=0.0, + metadata={"help": "iterative PQ quantization noise at training time"}, + ) + quant_noise_pq_block_size: int = field( + default=8, + metadata={"help": "block size of quantization noise at training time"}, + ) + quant_noise_scalar: float = field( + default=0.0, + metadata={ + "help": "scalar quantization noise and scalar quantization at training time" + }, + ) + # config for Fully Sharded Data Parallel (FSDP) training + min_params_to_wrap: int = field( + default=DEFAULT_MIN_PARAMS_TO_WRAP, + metadata={ + "help": ( + "minimum number of params for a layer to be wrapped with FSDP() when " + "training with --ddp-backend=fully_sharded. Smaller values will " + "improve memory efficiency, but may make torch.distributed " + "communication less efficient due to smaller input sizes. This option " + "is set to 0 (i.e., always wrap) when --checkpoint-activations or " + "--offload-activations are passed." + ) + }, + ) + # config for "BASE Layers: Simplifying Training of Large, Sparse Models" + base_layers: Optional[int] = field( + default=0, metadata={"help": "number of BASE layers in total"} + ) + base_sublayers: Optional[int] = field( + default=1, metadata={"help": "number of sublayers in each BASE layer"} + ) + base_shuffle: Optional[int] = field( + default=1, + metadata={"help": "shuffle tokens between workers before computing assignment"}, + ) + # NormFormer + scale_fc: Optional[bool] = field( + default=False, + metadata={"help": "Insert LayerNorm between fully connected layers"}, + ) + scale_attn: Optional[bool] = field( + default=False, metadata={"help": "Insert LayerNorm after attention"} + ) + scale_heads: Optional[bool] = field( + default=False, + metadata={"help": "Learn a scale coefficient for each attention head"}, + ) + scale_resids: Optional[bool] = field( + default=False, + metadata={"help": "Learn a scale coefficient for each residual connection"}, + ) + + # xFormers arguments + decoder_xformers_att_config: Optional[str] = field( + default=None, + metadata={ + "help": "config for xFormers library attention, defined in xformers.components.attention.AttentionConfig", + }, + ) + + # options from other parts of the config + add_bos_token: bool = II("task.add_bos_token") + tokens_per_sample: int = II("task.tokens_per_sample") + max_target_positions: Optional[int] = II("task.max_target_positions") + tpu: bool = II("common.tpu") + + +@register_model("transformer_lm", dataclass=TransformerLanguageModelConfig) +class TransformerLanguageModel(FairseqLanguageModel): + @classmethod + def hub_models(cls): + def moses_fastbpe(path): + return {"path": path, "tokenizer": "moses", "bpe": "fastbpe"} + + def spm(path): + return {"path": path, "tokenizer": "space", "bpe": "sentencepiece"} + + return { + "transformer_lm.gbw.adaptive_huge": "https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2", + "transformer_lm.wiki103.adaptive": "https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2", + "transformer_lm.wmt19.en": moses_fastbpe( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.bz2" + ), + "transformer_lm.wmt19.de": moses_fastbpe( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.bz2" + ), + "transformer_lm.wmt19.ru": moses_fastbpe( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.bz2" + ), + "transformer_lm.wmt20.en": spm( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.en.tar.gz" + ), + "transformer_lm.wmt20.ta": spm( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.ta.tar.gz" + ), + "transformer_lm.wmt20.iu.news": spm( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.iu.news.tar.gz" + ), + "transformer_lm.wmt20.iu.nh": spm( + "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.iu.nh.tar.gz" + ), + } + + def __init__(self, decoder): + super().__init__(decoder) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + if args.decoder_layers_to_keep: + args.decoder_layers = len(args.decoder_layers_to_keep.split(",")) + + if safe_getattr(args, "max_target_positions", None) is None: + args.max_target_positions = safe_getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + + if args.character_embeddings: + embed_tokens = CharacterTokenEmbedder( + task.source_dictionary, + eval(args.character_filters), + args.character_embedding_dim, + args.decoder_embed_dim, + args.char_embedder_highway_layers, + ) + elif args.adaptive_input: + embed_tokens = AdaptiveInput( + len(task.source_dictionary), + task.source_dictionary.pad(), + args.decoder_input_dim, + args.adaptive_input_factor, + args.decoder_embed_dim, + options.eval_str_list(args.adaptive_input_cutoff, type=int), + args.quant_noise_pq, + args.quant_noise_pq_block_size, + ) + else: + embed_tokens = cls.build_embedding( + args, task.source_dictionary, args.decoder_input_dim + ) + + if args.tie_adaptive_weights: + assert args.adaptive_input + assert args.adaptive_input_factor == args.adaptive_softmax_factor + assert ( + args.adaptive_softmax_cutoff == args.adaptive_input_cutoff + ), "{} != {}".format( + args.adaptive_softmax_cutoff, args.adaptive_input_cutoff + ) + assert args.decoder_input_dim == args.decoder_output_dim + + decoder = TransformerDecoder( + args, task.target_dictionary, embed_tokens, no_encoder_attn=True + ) + return cls(decoder) + + @classmethod + def build_embedding(cls, args, dictionary, embed_dim, path=None): + embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad()) + return embed_tokens + + +def base_lm_architecture(args): + # backward compatibility for older model checkpoints + if safe_hasattr(args, "no_tie_adaptive_proj"): + # previous models defined --no-tie-adaptive-proj, so use the existence of + # that option to determine if this is an "old" model checkpoint + args.no_decoder_final_norm = True # old models always set this to True + if args.no_tie_adaptive_proj is False: + args.tie_adaptive_proj = True + if safe_hasattr(args, "decoder_final_norm"): + args.no_decoder_final_norm = not args.decoder_final_norm + + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.0) + + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 512) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 2048) + args.decoder_layers = safe_getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 8) + args.adaptive_softmax_cutoff = safe_getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = safe_getattr(args, "adaptive_softmax_dropout", 0) + args.adaptive_softmax_factor = safe_getattr(args, "adaptive_softmax_factor", 4) + args.decoder_learned_pos = safe_getattr(args, "decoder_learned_pos", False) + args.activation_fn = safe_getattr(args, "activation_fn", "relu") + + args.decoder_layerdrop = safe_getattr(args, "decoder_layerdrop", 0) + args.decoder_layers_to_keep = safe_getattr(args, "decoder_layers_to_keep", None) + args.quant_noise_pq = safe_getattr(args, "quant_noise_pq", 0) + args.quant_noise_pq_block_size = safe_getattr(args, "quant_noise_pq_block_size", 8) + args.quant_noise_scalar = safe_getattr(args, "quant_noise_scalar", 0) + + args.base_layers = safe_getattr(args, "base_layers", 0) + args.base_sublayers = safe_getattr(args, "base_sublayers", 1) + args.base_shuffle = safe_getattr(args, "base_shuffle", False) + + args.add_bos_token = safe_getattr(args, "add_bos_token", False) + args.no_token_positional_embeddings = safe_getattr( + args, "no_token_positional_embeddings", False + ) + args.share_decoder_input_output_embed = safe_getattr( + args, "share_decoder_input_output_embed", False + ) + args.character_embeddings = safe_getattr(args, "character_embeddings", False) + + args.decoder_output_dim = safe_getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = safe_getattr( + args, "decoder_input_dim", args.decoder_embed_dim + ) + + # Model training is not stable without this + args.decoder_normalize_before = True + args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", False) + + args.adaptive_input = safe_getattr(args, "adaptive_input", False) + args.adaptive_input_factor = safe_getattr(args, "adaptive_input_factor", 4) + args.adaptive_input_cutoff = safe_getattr(args, "adaptive_input_cutoff", None) + + args.tie_adaptive_weights = safe_getattr(args, "tie_adaptive_weights", False) + args.tie_adaptive_proj = safe_getattr(args, "tie_adaptive_proj", False) + + args.no_scale_embedding = safe_getattr(args, "no_scale_embedding", False) + args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", False) + args.checkpoint_activations = safe_getattr(args, "checkpoint_activations", False) + args.offload_activations = safe_getattr(args, "offload_activations", False) + args.scale_fc = safe_getattr(args, "scale_fc", False) + args.scale_attn = safe_getattr(args, "scale_attn", False) + args.scale_heads = safe_getattr(args, "scale_heads", False) + args.scale_resids = safe_getattr(args, "scale_resids", False) + if args.offload_activations: + args.checkpoint_activations = True + + +@register_model_architecture("transformer_lm", "transformer_lm_big") +def transformer_lm_big(args): + args.decoder_layers = safe_getattr(args, "decoder_layers", 12) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16) + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_wiki103") +@register_model_architecture("transformer_lm", "transformer_lm_baevski_wiki103") +def transformer_lm_baevski_wiki103(args): + args.decoder_layers = safe_getattr(args, "decoder_layers", 16) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 8) + args.dropout = safe_getattr(args, "dropout", 0.3) + args.adaptive_input = safe_getattr(args, "adaptive_input", True) + args.tie_adaptive_weights = safe_getattr(args, "tie_adaptive_weights", True) + args.adaptive_input_cutoff = safe_getattr( + args, "adaptive_input_cutoff", "20000,60000" + ) + args.adaptive_softmax_cutoff = safe_getattr( + args, "adaptive_softmax_cutoff", "20000,60000" + ) + args.adaptive_softmax_dropout = safe_getattr(args, "adaptive_softmax_dropout", 0.2) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_dropout = safe_getattr(args, "activation_dropout", 0.1) + args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", True) + args.tie_adaptive_proj = safe_getattr(args, "tie_adaptive_proj", True) + transformer_lm_big(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gbw") +@register_model_architecture("transformer_lm", "transformer_lm_baevski_gbw") +def transformer_lm_baevski_gbw(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 512) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", True) + transformer_lm_big(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt") +def transformer_lm_gpt(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 768) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 3072) + args.decoder_layers = safe_getattr(args, "decoder_layers", 12) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 12) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_small") +def transformer_lm_gpt2_small(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 4096) + args.decoder_layers = safe_getattr(args, "decoder_layers", 24) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_tiny") +def transformer_lm_gpt2_tiny(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 64) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 64) + args.decoder_layers = safe_getattr(args, "decoder_layers", 2) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 1) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_medium") +def transformer_lm_gpt2_medium(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1280) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 5120) + args.decoder_layers = safe_getattr(args, "decoder_layers", 36) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 20) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_big") +def transformer_lm_gpt2_big(args): + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1600) + args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 6400) + args.decoder_layers = safe_getattr(args, "decoder_layers", 48) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 25) + args.dropout = safe_getattr(args, "dropout", 0.1) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_big_wide") +def transformer_lm_gpt2_big_wide(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 2048) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8192) + args.decoder_layers = getattr(args, "decoder_layers", 24) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt2_bigger") +def transformer_lm_gpt2_bigger(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 2048) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 8192) + args.decoder_layers = getattr(args, "decoder_layers", 48) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) + + +def base_gpt3_architecture(args): + args.decoder_input_dim = args.decoder_embed_dim + args.decoder_output_dim = args.decoder_embed_dim + args.decoder_ffn_embed_dim = safe_getattr( + args, "decoder_ffn_embed_dim", args.decoder_embed_dim * 4 + ) + # GPT-3 used learned positional embeddings, rather than sinusoidal + args.decoder_learned_pos = safe_getattr(args, "decoder_learned_pos", True) + args.dropout = safe_getattr(args, "dropout", 0.0) + args.attention_dropout = safe_getattr(args, "attention_dropout", 0.0) + args.activation_fn = safe_getattr(args, "activation_fn", "gelu") + args.share_decoder_input_output_embed = True + base_lm_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_small") +def transformer_lm_gpt3_small(args): + # 125M params + args.decoder_layers = safe_getattr(args, "decoder_layers", 12) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 768) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 12) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_medium") +def transformer_lm_gpt3_medium(args): + # 350M params + args.decoder_layers = safe_getattr(args, "decoder_layers", 24) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_large") +def transformer_lm_gpt3_large(args): + # 760M params + args.decoder_layers = safe_getattr(args, "decoder_layers", 24) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1536) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_xl") +def transformer_lm_gpt3_xl(args): + # 1.3B params + args.decoder_layers = safe_getattr(args, "decoder_layers", 24) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 2048) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_2_7") +def transformer_lm_gpt3_2_7(args): + # 2.7B params + args.decoder_layers = safe_getattr(args, "decoder_layers", 32) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 2560) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_6_7") +def transformer_lm_gpt3_6_7(args): + # 6.7B params + args.decoder_layers = safe_getattr(args, "decoder_layers", 32) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 4096) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_13") +def transformer_lm_gpt3_13(args): + # 13B params + args.decoder_layers = safe_getattr(args, "decoder_layers", 40) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 5120) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 40) + base_gpt3_architecture(args) + + +@register_model_architecture("transformer_lm", "transformer_lm_gpt3_175") +def transformer_lm_gpt3_175(args): + # 175B params + args.decoder_layers = safe_getattr(args, "decoder_layers", 96) + args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 12288) + args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 96) + base_gpt3_architecture(args) diff --git a/fairseq/fairseq/models/transformer_ulm.py b/fairseq/fairseq/models/transformer_ulm.py new file mode 100644 index 0000000..0fc9ae4 --- /dev/null +++ b/fairseq/fairseq/models/transformer_ulm.py @@ -0,0 +1,408 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from dataclasses import dataclass, field +from fairseq.models.fairseq_decoder import FairseqDecoder +import numpy as np +from typing import Optional, Dict, Any, List +import torch +from torch import nn +from fairseq.data.data_utils import compute_mask_indices +from fairseq.dataclass import ChoiceEnum +from fairseq.models import ( + FairseqLanguageModel, + register_model, + register_model_architecture, +) +from fairseq.tasks.speech_ulm_task import SpeechUnitLanguageModelingTask +from fairseq.models.transformer import Embedding, TransformerDecoder, Linear +from fairseq.models.transformer_lm import TransformerLanguageModelConfig +from torch import Tensor + + +DEFAULT_MAX_TARGET_POSITIONS = 1024 +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +@dataclass +class SpeechUnitLanguageModelConfig(TransformerLanguageModelConfig): + mask_unit_seg_prob: float = field( + default=0.0, metadata={"help": "probability to mask a segment of unit sequence"} + ) + mask_unit_seg_leng: int = field( + default=5, metadata={"help": "length of unit segment mask"} + ) + mask_unit_seg_type: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose unit mask length"} + ) + + mask_dur_prob: float = field( + default=0.0, metadata={"help": "probability to mask entire duration sequence"} + ) + mask_dur_seg_prob: float = field( + default=0.0, + metadata={"help": "probability to mask a segment of duration sequence"}, + ) + mask_dur_seg_leng: int = field( + default=5, metadata={"help": "length of duration segment mask"} + ) + mask_dur_seg_type: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose duration mask length"} + ) + + mask_f0_prob: float = field( + default=0.0, metadata={"help": "probability to mask entire duration sequence"} + ) + mask_f0_seg_prob: float = field( + default=0.0, metadata={"help": "probability to mask a segment of f0 sequence"} + ) + mask_f0_seg_leng: int = field( + default=5, metadata={"help": "length of f0 segment mask"} + ) + mask_f0_seg_type: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose f0 mask length"} + ) + + +@register_model("transformer_ulm", dataclass=SpeechUnitLanguageModelConfig) +class TransformerUnitLanguageModel(FairseqLanguageModel): + def __init__( + self, + cfg: SpeechUnitLanguageModelConfig, + task: SpeechUnitLanguageModelingTask, + decoder: FairseqDecoder, + ): + super().__init__(decoder) + self.cfg = cfg + + self.channel_names = task.channel_names + self.channel_sizes = task.channel_sizes + + self.unit_mask_val = task.source_dictionary.unk() + self.dur_mask_val = ( + task.source_duration_dictionary.unk() if task.cfg.discrete_duration else 0 + ) + self.f0_mask_val = ( + task.source_f0_dictionary.unk() if task.cfg.discrete_f0 else 0 + ) + + self.ignore_duration_input = task.cfg.ignore_duration_input + self.ignore_f0_input = task.cfg.ignore_f0_input + + @classmethod + def build_model(cls, args, task): + base_ulm_architecture(args) + + if getattr(args, "max_target_positions", None) is None: + args.max_target_positions = getattr( + args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS + ) + + embed_tokens = Embedding( + len(task.source_dictionary), + args.decoder_input_dim, + padding_idx=task.source_dictionary.pad(), + ) + embed_duration = None + if task.cfg.discrete_duration: + embed_duration = Embedding( + len(task.source_duration_dictionary), + args.decoder_input_dim, + padding_idx=0, # duration uses 0 for padding + ) + embed_f0 = None + if task.cfg.discrete_f0: + embed_f0 = Embedding( + len(task.source_f0_dictionary), + args.decoder_input_dim, + padding_idx=task.source_f0_dictionary.pad(), + ) + + decoder = MultiStreamTransformerDecoder( + args, + task.target_dictionary, + embed_tokens, + [embed_duration, embed_f0], + no_encoder_attn=True, + channel_sizes=task.channel_sizes, + ) + + return cls(args, task, decoder) + + def apply_seg_dropout(self, inp, mask_prob, mask_leng, mask_type, mask_val): + B, T = inp.size() + if mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), None, mask_prob, mask_leng, mask_type # may mask padding + ) + mask_indices = torch.from_numpy(mask_indices).to(inp.device) + inp[mask_indices] = mask_val + else: + mask_indices = torch.zeros_like(inp).bool() + return inp, mask_indices + + def apply_seq_dropout(self, inp, mask_prob, mask_val): + B, T = inp.size() + if mask_prob > 0: + mask_indices = np.random.uniform(0, 1, (B,)) < mask_prob + mask_indices = ( + torch.from_numpy(mask_indices).to(inp.device).unsqueeze(1).expand(-1, T) + ) + inp[mask_indices] = mask_val + else: + mask_indices = torch.zeros_like(inp).bool() + return inp, mask_indices + + def apply_dropout(self, src_tokens, dur_src, f0_src): + src_tokens, unit_mask = self.apply_seg_dropout( + src_tokens, + self.cfg.mask_unit_seg_prob, + self.cfg.mask_unit_seg_leng, + self.cfg.mask_unit_seg_type, + self.unit_mask_val, + ) + + dur_src, dur_mask = self.apply_seq_dropout( + dur_src, self.cfg.mask_dur_prob, self.dur_mask_val + ) + dur_src, _dur_mask = self.apply_seg_dropout( + dur_src, + self.cfg.mask_dur_seg_prob, + self.cfg.mask_dur_seg_leng, + self.cfg.mask_dur_seg_type, + self.dur_mask_val, + ) + dur_mask = dur_mask.logical_or(_dur_mask) + + f0_src, f0_mask = self.apply_seq_dropout( + f0_src, self.cfg.mask_f0_prob, self.f0_mask_val + ) + f0_src, _f0_mask = self.apply_seg_dropout( + f0_src, + self.cfg.mask_f0_seg_prob, + self.cfg.mask_f0_seg_leng, + self.cfg.mask_f0_seg_type, + self.f0_mask_val, + ) + f0_mask = f0_mask.logical_or(_f0_mask) + + return src_tokens, unit_mask, dur_src, dur_mask, f0_src, f0_mask + + def forward( + self, + src_tokens: torch.Tensor, + dur_src: torch.Tensor, + f0_src: torch.Tensor, + src_lengths: Optional[Any] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + if self.ignore_duration_input: + dur_src = torch.zeros_like(dur_src) + + if self.ignore_f0_input: + f0_src = torch.zeros_like(f0_src) + + if self.training: + ( + src_tokens, + unit_mask, + dur_src, + dur_mask, + f0_src, + f0_mask, + ) = self.apply_dropout(src_tokens, dur_src, f0_src) + else: + unit_masks = dur_mask = f0_mask = None + + prediction, _ = self.decoder( + prev_output_tokens=(src_tokens, dur_src, f0_src), + incremental_state=incremental_state, + src_lengths=src_lengths, + features_only=True, + ) + + result = dict(zip(self.channel_names, prediction)) + + return result + + +def base_ulm_architecture(args): + from .transformer_lm import base_lm_architecture + + base_lm_architecture(args) + + +@register_model_architecture("transformer_ulm", "transformer_ulm_big") +def transformer_ulm_big(args): + from .transformer_lm import transformer_lm_big + + transformer_lm_big(args) + base_ulm_architecture(args) + + +@register_model_architecture("transformer_ulm", "transformer_ulm_tiny") +def transformer_ulm_tiny(args): + from .transformer_lm import transformer_lm_gpt2_tiny + + transformer_lm_gpt2_tiny(args) + base_ulm_architecture(args) + + +class MultiStreamTransformerDecoder(TransformerDecoder): + def __init__( + self, + args, + dictionary, + embed_tokens, + embed_other_list, + no_encoder_attn, + channel_sizes, + ): + super().__init__( + args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn + ) + + # embed each channel and project if dimensions do not match + self.embed_other_list = torch.nn.ModuleList(embed_other_list) + self.proj_other_list = torch.nn.ModuleList() + dim = embed_tokens.embedding_dim + for embed_other in embed_other_list: + other_dim = 1 if embed_other is None else embed_other.embedding_dim + self.proj_other_list.append( + nn.Linear(other_dim, dim) if other_dim != dim else None + ) + + # tranformer output to prediction + self.channel_sizes = channel_sizes + self.project_out_dim = Linear( + embed_tokens.embedding_dim, sum(channel_sizes), bias=False + ) + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + # XXX: first multi-channel change start + prev_output_tokens, *other_channels = prev_output_tokens + # XXX: first multi-channel change end + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + other_channels = [o[:, -1:] for o in other_channels] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + # XXX: second multi-channel change start + other_channels = [ + o.unsqueeze(-1).to(dtype=x.dtype) if emb is None else emb(o) + for o, emb in zip(other_channels, self.embed_other_list) + ] + other_channels = [ + o if proj_other is None else proj_other(o) + for o, proj_other in zip(other_channels, self.proj_other_list) + ] + for o in other_channels: + x = x + o + # XXX: second multi-channel change end + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + encoder_out["encoder_out"][0] + if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0) + else None, + encoder_out["encoder_padding_mask"][0] + if ( + encoder_out is not None + and len(encoder_out["encoder_padding_mask"]) > 0 + ) + else None, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + else: + assert False + + # XXX: the last change start + result = [] + start = 0 + for channel_size in self.channel_sizes: + end = start + channel_size + result.append(x[:, :, start:end]) + start = end + assert end == x.size(-1) + # XXX: the last change end + + return result, {"attn": [attn], "inner_states": inner_states} diff --git a/fairseq/fairseq/models/wav2vec/__init__.py b/fairseq/fairseq/models/wav2vec/__init__.py new file mode 100644 index 0000000..b756e45 --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .wav2vec import * # noqa +from .wav2vec2 import * # noqa +from .wav2vec2_asr import * # noqa +from .wav2vec2_laser import * # noqa +from .wav2vec2_classification import * # noqa diff --git a/fairseq/fairseq/models/wav2vec/utils.py b/fairseq/fairseq/models/wav2vec/utils.py new file mode 100644 index 0000000..dd52d86 --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/utils.py @@ -0,0 +1,21 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import torch.nn.functional as F + + +def pad_to_multiple(x, multiple, dim=-1, value=0): + # Inspired from https://github.com/lucidrains/local-attention/blob/master/local_attention/local_attention.py#L41 + if x is None: + return None, 0 + tsz = x.size(dim) + m = tsz / multiple + remainder = math.ceil(m) * multiple - tsz + if m.is_integer(): + return x, 0 + pad_offset = (0,) * (-1 - dim) * 2 + + return F.pad(x, (*pad_offset, 0, remainder), value=value), remainder diff --git a/fairseq/fairseq/models/wav2vec/wav2vec.py b/fairseq/fairseq/models/wav2vec/wav2vec.py new file mode 100644 index 0000000..af6604d --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/wav2vec.py @@ -0,0 +1,630 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +import logging +import math +from typing import Optional, Tuple +from omegaconf import II +import sys + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.modules import ( + Fp32GroupNorm, + Fp32LayerNorm, + GumbelVectorQuantizer, + KmeansVectorQuantizer, + TransposeLast, +) +from fairseq.tasks import FairseqTask +from fairseq.utils import buffered_arange + + +logger = logging.getLogger(__name__) + + +AGGREGATOR_CHOICES = ChoiceEnum(["cnn", "gru"]) +PROJECT_FEATURES_CHOICES = ChoiceEnum(["none", "same", "new"]) +ACTIVATION_CHOICES = ChoiceEnum(["relu", "gelu"]) +VQ_TYPE_CHOICES = ChoiceEnum(["none", "gumbel", "kmeans"]) + + +@dataclass +class Wav2VecConfig(FairseqDataclass): + prediction_steps: int = field( + default=12, metadata={"help": "number of steps ahead to predict"} + ) + sample_distance: Optional[int] = field( + default=None, + metadata={ + "help": "sample distance from target. does not work properly with cross-sampling" + }, + ) + cross_sample_negatives: int = field( + default=0, metadata={"help": "num of cross sampled negatives"} + ) + num_negatives: int = field( + default=10, metadata={"help": "num of sampled negatives"} + ) + conv_feature_layers: str = field( + default="[(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1), (512, 1, 1)]", + metadata={ + "help": "convolutional feature extraction layers [(dim, kernel_size, stride), ...]" + }, + ) + conv_aggregator_layers: str = field( + default="[(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)]", + metadata={ + "help": "convolutional aggregator layers [(dim, kernel_size, stride), ...]" + }, + ) + dropout: float = field( + default=0.0, metadata={"help": "dropout to apply within the model"} + ) + dropout_features: float = field( + default=0.0, metadata={"help": "dropout to apply to the features"} + ) + dropout_agg: float = field( + default=0.0, metadata={"help": "dropout to apply after aggregation step"} + ) + aggregator: AGGREGATOR_CHOICES = field( + default="cnn", metadata={"help": "type of aggregator to use"} + ) + gru_dim: int = field(default=512, metadata={"help": "GRU dimensionality"}) + no_conv_bias: bool = field( + default=False, metadata={"help": "if set, does not learn bias for conv layers"} + ) + agg_zero_pad: bool = field( + default=False, + metadata={"help": "if set, zero pads in aggregator instead of repl pad"}, + ) + skip_connections_feat: bool = field( + default=False, + metadata={"help": "if set, adds skip connections to the feature extractor"}, + ) + skip_connections_agg: bool = field( + default=True, + metadata={"help": "if set, adds skip connections to the aggregator"}, + ) + residual_scale: float = field( + default=0.5, metadata={"help": "scales residual by sqrt(value)"} + ) + log_compression: bool = field( + default=True, + metadata={"help": "if set, adds a log compression to feature extractor"}, + ) + balanced_classes: bool = field( + default=False, + metadata={"help": "if set, loss is scaled to balance for number of negatives"}, + ) + project_features: PROJECT_FEATURES_CHOICES = field( + default="none", + metadata={ + "help": "if not none, features are projected using the (same or new) aggregator" + }, + ) + non_affine_group_norm: bool = field( + default=False, metadata={"help": "if set, group norm is not affine"} + ) + offset: str = field( + default="auto", + metadata={ + "help": "if set to 'auto', it is computed automatically from the receptive field, else set to int value" + }, + ) + activation: ACTIVATION_CHOICES = field( + default="relu", + metadata={ + "help": "if set to 'auto', it is computed automatically from the receptive field, else set to int value" + }, + ) + vq_type: VQ_TYPE_CHOICES = field( + default="none", metadata={"help": "which type of quantizer to use"} + ) + vq_vars: int = field( + default=320, + metadata={"help": "project to this many vector quantized variables per group"}, + ) + vq_groups: int = field( + default=2, metadata={"help": "number of groups of latent variables"} + ) + vq_dim: int = field( + default=0, + metadata={ + "help": "uses this dimensionality for quantized vectors. 0 to use model dim // groups" + }, + ) + vq_depth: int = field( + default=1, metadata={"help": "number of layers for vq weight projection"} + ) + combine_groups: bool = field( + default=False, metadata={"help": "if set, variables are shared among groups"} + ) + vq_temp: Tuple[float, float, float] = field( + default=(2.0, 0.5, 0.999995), + metadata={ + "help": "temperature for latent variable sampling with gumbel softmax. should be a tuple of 3 values (start, end, decay)" + }, + ) + vq_gamma: float = field( + default=0.25, + metadata={"help": "gamma parameter for kmeans style vector quantization"}, + ) + infonce: bool = II("criterion.infonce") + + +@register_model("wav2vec", dataclass=Wav2VecConfig) +class Wav2VecModel(BaseFairseqModel): + @classmethod + def build_model(cls, cfg: Wav2VecConfig, task: FairseqTask): + """Build a new model instance.""" + + model = Wav2VecModel(cfg) + logger.info(model) + return model + + def __init__(self, cfg: Wav2VecConfig): + super().__init__() + + self.prediction_steps = cfg.prediction_steps + offset = cfg.offset + + if cfg.activation == "relu": + activation = nn.ReLU() + elif cfg.activation == "gelu": + activation = nn.GELU() + else: + raise Exception("unknown activation " + cfg.activation) + + feature_enc_layers = eval(cfg.conv_feature_layers) + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + log_compression=cfg.log_compression, + skip_connections=cfg.skip_connections_feat, + residual_scale=cfg.residual_scale, + non_affine_group_norm=cfg.non_affine_group_norm, + activation=activation, + ) + embed = feature_enc_layers[-1][0] + + self.vector_quantizer = None + if cfg.vq_type == "gumbel": + self.vector_quantizer = GumbelVectorQuantizer( + dim=embed, + num_vars=cfg.vq_vars, + temp=cfg.vq_temp, + groups=cfg.vq_groups, + combine_groups=cfg.combine_groups, + vq_dim=cfg.vq_dim if cfg.vq_dim > 0 else embed, + time_first=False, + activation=activation, + weight_proj_depth=cfg.vq_depth, + weight_proj_factor=2, + ) + elif cfg.vq_type == "kmeans": + self.vector_quantizer = KmeansVectorQuantizer( + dim=embed, + num_vars=cfg.vq_vars, + groups=cfg.vq_groups, + combine_groups=cfg.combine_groups, + vq_dim=cfg.vq_dim if cfg.vq_dim > 0 else embed, + time_first=False, + gamma=cfg.vq_gamma, + ) + else: + assert ( + cfg.vq_type == "none" or cfg.vq_type is None + ), "Unknown quantizer type" + + if cfg.offset == "auto": + jin = 0 + rin = 0 + for _, k, stride in feature_enc_layers: + if rin == 0: + rin = k + rin = rin + (k - 1) * jin + if jin == 0: + jin = stride + else: + jin *= stride + offset = math.ceil(rin / jin) + + offset = int(offset) + + def make_aggregator(): + if cfg.aggregator == "cnn": + agg_layers = eval(cfg.conv_aggregator_layers) + agg_dim = agg_layers[-1][0] + feature_aggregator = ConvAggegator( + conv_layers=agg_layers, + embed=embed, + dropout=cfg.dropout, + skip_connections=cfg.skip_connections_agg, + residual_scale=cfg.residual_scale, + non_affine_group_norm=cfg.non_affine_group_norm, + conv_bias=not cfg.no_conv_bias, + zero_pad=cfg.agg_zero_pad, + activation=activation, + ) + elif cfg.aggregator == "gru": + agg_dim = cfg.gru_dim + feature_aggregator = nn.Sequential( + TransposeLast(), + nn.GRU( + input_size=embed, + hidden_size=agg_dim, + num_layers=1, + dropout=cfg.dropout, + ), + TransposeLast(deconstruct_idx=0), + ) + else: + raise Exception("unknown aggregator type " + cfg.aggregator) + + return feature_aggregator, agg_dim + + self.feature_aggregator, agg_dim = make_aggregator() + + self.wav2vec_predictions = Wav2VecPredictionsModel( + in_dim=agg_dim, + out_dim=embed, + prediction_steps=cfg.prediction_steps, + n_negatives=cfg.num_negatives, + cross_sample_negatives=cfg.cross_sample_negatives, + sample_distance=cfg.sample_distance, + dropout=cfg.dropout, + offset=offset, + balanced_classes=cfg.balanced_classes, + infonce=cfg.infonce, + ) + + self.dropout_feats = nn.Dropout(p=cfg.dropout_features) + self.dropout_agg = nn.Dropout(p=cfg.dropout_agg) + + if cfg.project_features == "none": + self.project_features = None + elif cfg.project_features == "same": + self.project_features = self.feature_aggregator + elif cfg.project_features == "new": + self.project_features, _ = make_aggregator() + + def forward(self, source): + result = {} + + features = self.feature_extractor(source) + if self.vector_quantizer: + q_res = self.vector_quantizer(features) + features = q_res["x"] + for k in q_res.keys(): + if k != "x": + result[k] = q_res[k] + + x = self.dropout_feats(features) + x = self.feature_aggregator(x) + x = self.dropout_agg(x) + + if self.project_features is not None: + features = self.project_features(features) + x, targets = self.wav2vec_predictions(x, features) + result["cpc_logits"] = x + result["cpc_targets"] = targets + + return result + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + + def max_positions(self): + """Maximum length supported by the model.""" + return sys.maxsize + + def get_logits(self, net_output): + logits = net_output["cpc_logits"] + return logits + + def get_targets(self, sample, net_output): + t = net_output["cpc_targets"] + if isinstance(t, tuple): + t = t[0] + return t.contiguous() + + def get_target_weights(self, targets, net_output): + targets = net_output["cpc_targets"] + if isinstance(targets, tuple) and targets[-1] is not None: + return targets[-1] + return None + + def get_extra_losses(self, net_output): + loss = None + if "prob_perplexity" in net_output: + loss = net_output["num_vars"] - net_output["prob_perplexity"] + elif "kmeans_loss" in net_output: + loss = net_output["kmeans_loss"] + + return loss + + +def norm_block(is_layer_norm, dim, affine=True): + if is_layer_norm: + mod = nn.Sequential( + TransposeLast(), + Fp32LayerNorm(dim, elementwise_affine=affine), + TransposeLast(), + ) + else: + mod = Fp32GroupNorm(1, dim, affine=affine) + + return mod + + +class ConvFeatureExtractionModel(nn.Module): + def __init__( + self, + conv_layers, + dropout, + log_compression, + skip_connections, + residual_scale, + non_affine_group_norm, + activation, + ): + super().__init__() + + def block(n_in, n_out, k, stride): + return nn.Sequential( + nn.Conv1d(n_in, n_out, k, stride=stride, bias=False), + nn.Dropout(p=dropout), + norm_block( + is_layer_norm=False, dim=n_out, affine=not non_affine_group_norm + ), + activation, + ) + + in_d = 1 + self.conv_layers = nn.ModuleList() + for dim, k, stride in conv_layers: + self.conv_layers.append(block(in_d, dim, k, stride)) + in_d = dim + + self.log_compression = log_compression + self.skip_connections = skip_connections + self.residual_scale = math.sqrt(residual_scale) + + def forward(self, x): + # BxT -> BxCxT + x = x.unsqueeze(1) + + for conv in self.conv_layers: + residual = x + x = conv(x) + if self.skip_connections and x.size(1) == residual.size(1): + tsz = x.size(2) + r_tsz = residual.size(2) + residual = residual[..., :: r_tsz // tsz][..., :tsz] + x = (x + residual) * self.residual_scale + + if self.log_compression: + x = x.abs() + x = x + 1 + x = x.log() + + return x + + +class ZeroPad1d(nn.Module): + def __init__(self, pad_left, pad_right): + super().__init__() + self.pad_left = pad_left + self.pad_right = pad_right + + def forward(self, x): + return F.pad(x, (self.pad_left, self.pad_right)) + + +class ConvAggegator(nn.Module): + def __init__( + self, + conv_layers, + embed, + dropout, + skip_connections, + residual_scale, + non_affine_group_norm, + conv_bias, + zero_pad, + activation, + ): + super().__init__() + + def block(n_in, n_out, k, stride): + # padding dims only really make sense for stride = 1 + ka = k // 2 + kb = ka - 1 if k % 2 == 0 else ka + + pad = ( + ZeroPad1d(ka + kb, 0) if zero_pad else nn.ReplicationPad1d((ka + kb, 0)) + ) + + return nn.Sequential( + pad, + nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias), + nn.Dropout(p=dropout), + norm_block(False, n_out, affine=not non_affine_group_norm), + activation, + ) + + in_d = embed + self.conv_layers = nn.ModuleList() + self.residual_proj = nn.ModuleList() + for dim, k, stride in conv_layers: + if in_d != dim and skip_connections: + self.residual_proj.append(nn.Conv1d(in_d, dim, 1, bias=False)) + else: + self.residual_proj.append(None) + + self.conv_layers.append(block(in_d, dim, k, stride)) + in_d = dim + self.conv_layers = nn.Sequential(*self.conv_layers) + self.skip_connections = skip_connections + self.residual_scale = math.sqrt(residual_scale) + + def forward(self, x): + for rproj, conv in zip(self.residual_proj, self.conv_layers): + residual = x + x = conv(x) + if self.skip_connections: + if rproj is not None: + residual = rproj(residual) + x = (x + residual) * self.residual_scale + return x + + +class Wav2VecPredictionsModel(nn.Module): + def __init__( + self, + in_dim, + out_dim, + prediction_steps, + n_negatives, + cross_sample_negatives, + sample_distance, + dropout, + offset, + balanced_classes, + infonce, + ): + super().__init__() + + self.n_negatives = n_negatives + self.cross_sample_negatives = cross_sample_negatives + self.sample_distance = sample_distance + self.project_to_steps = nn.ConvTranspose2d( + in_dim, out_dim, (1, prediction_steps) + ) + self.dropout = nn.Dropout(p=dropout) + self.offset = offset + self.balanced_classes = balanced_classes + self.infonce = infonce + + def sample_negatives(self, y): + bsz, fsz, tsz = y.shape + + y = y.transpose(0, 1) # BCT -> CBT + y = y.contiguous().view(fsz, -1) # CBT => C(BxT) + + cross_high = tsz * bsz + high = tsz if self.sample_distance is None else min(tsz, self.sample_distance) + assert high > 1 + + neg_idxs = torch.randint(low=0, high=high, size=(bsz, self.n_negatives * tsz)) + + with torch.no_grad(): + if self.n_negatives > 0: + tszs = ( + buffered_arange(tsz) + .unsqueeze(-1) + .expand(-1, self.n_negatives) + .flatten() + ) + + neg_idxs = torch.randint( + low=0, high=high - 1, size=(bsz, self.n_negatives * tsz) + ) + neg_idxs[neg_idxs >= tszs] += 1 + + if self.cross_sample_negatives > 0: + tszs = ( + buffered_arange(tsz) + .unsqueeze(-1) + .expand(-1, self.cross_sample_negatives) + .flatten() + ) + + cross_neg_idxs = torch.randint( + low=0, + high=cross_high - 1, + size=(bsz, self.cross_sample_negatives * tsz), + ) + cross_neg_idxs[cross_neg_idxs >= tszs] += 1 + + if self.n_negatives > 0: + for i in range(1, bsz): + neg_idxs[i] += i * high + else: + neg_idxs = cross_neg_idxs + + if self.cross_sample_negatives > 0 and self.n_negatives > 0: + neg_idxs = torch.cat([neg_idxs, cross_neg_idxs], dim=1) + + negs = y[..., neg_idxs.view(-1)] + negs = negs.view( + fsz, bsz, self.n_negatives + self.cross_sample_negatives, tsz + ).permute( + 2, 1, 0, 3 + ) # to NxBxCxT + + return negs + + def forward(self, x, y): + + x = x.unsqueeze(-1) + x = self.project_to_steps(x) # BxCxTxS + x = self.dropout(x) + + negatives = self.sample_negatives(y) + y = y.unsqueeze(0) + targets = torch.cat([y, negatives], dim=0) # Copies x B x C x T + + copies = targets.size(0) + bsz, dim, tsz, steps = x.shape + steps = min(steps, tsz - self.offset) + + predictions = x.new( + bsz * copies * (tsz - self.offset + 1) * steps + - ((steps + 1) * steps // 2) * copies * bsz + ) + if self.infonce: + labels = predictions.new_full( + (predictions.shape[0] // copies,), 0, dtype=torch.long + ) + else: + labels = torch.zeros_like(predictions) + weights = ( + torch.full_like(labels, 1 / self.n_negatives) + if self.balanced_classes and not self.infonce + else None + ) + + start = end = 0 + for i in range(steps): + offset = i + self.offset + end = start + (tsz - offset) * bsz * copies + if self.infonce: + predictions[start:end] = torch.einsum( + "bct,nbct->tbn", x[..., :-offset, i], targets[..., offset:] + ).flatten() + else: + pos_num = (end - start) // copies + predictions[start:end] = torch.einsum( + "bct,nbct->nbt", x[..., :-offset, i], targets[..., offset:] + ).flatten() + labels[start : start + pos_num] = 1.0 + if weights is not None: + weights[start : start + pos_num] = 1.0 + start = end + assert end == predictions.numel(), "{} != {}".format(end, predictions.numel()) + + if self.infonce: + predictions = predictions.view(-1, copies) + else: + if weights is not None: + labels = (labels, weights) + + return predictions, labels diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2.py b/fairseq/fairseq/models/wav2vec/wav2vec2.py new file mode 100644 index 0000000..0faba77 --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/wav2vec2.py @@ -0,0 +1,1499 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import List, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.distributed import fsdp_wrap +from fairseq.models import BaseFairseqModel, register_model +from fairseq.distributed.fully_sharded_data_parallel import FullyShardedDataParallel +from fairseq.modules import ( + Fp32GroupNorm, + Fp32LayerNorm, + GradMultiply, + GumbelVectorQuantizer, + LayerNorm, + MultiheadAttention, + RelPositionalEncoding, + SamePad, + TransposeLast, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.conformer_layer import ConformerWav2Vec2EncoderLayer +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import buffered_arange, index_put, is_xla_tensor + +from .utils import pad_to_multiple + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) +LAYER_TYPE_CHOICES = ChoiceEnum(["transformer", "conformer", "trf_adp"]) + + +@dataclass +class Wav2Vec2Config(FairseqDataclass): + extractor_mode: EXTRACTOR_MODE_CHOICES = field( + default="default", + metadata={ + "help": "mode for feature extractor. default has a single group norm with d " + "groups in the first conv block, whereas layer_norm has layer norms in " + "every block (meant to use with normalize=True)" + }, + ) + encoder_layers: int = field( + default=12, metadata={"help": "num encoder layers in the transformer"} + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + encoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "encoder embedding dimension for FFN"} + ) + encoder_attention_heads: int = field( + default=12, metadata={"help": "num encoder attention heads"} + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + layer_type: LAYER_TYPE_CHOICES = field( + default="transformer", metadata={"help": "layer type in encoder"} + ) + # dropouts + dropout: float = field( + default=0.1, metadata={"help": "dropout probability for the transformer"} + ) + attention_dropout: float = field( + default=0.1, metadata={"help": "dropout probability for attention weights"} + ) + activation_dropout: float = field( + default=0.0, metadata={"help": "dropout probability after activation in FFN"} + ) + encoder_layerdrop: float = field( + default=0.0, metadata={"help": "probability of dropping a tarnsformer layer"} + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + dropout_features: float = field( + default=0.0, + metadata={"help": "dropout to apply to the features (after feat extr)"}, + ) + + final_dim: int = field( + default=0, + metadata={ + "help": "project final representations and targets to this many dimensions." + "set to encoder_embed_dim is <= 0" + }, + ) + layer_norm_first: bool = field( + default=False, metadata={"help": "apply layernorm first in the transformer"} + ) + conv_feature_layers: str = field( + default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]", + metadata={ + "help": "string describing convolutional feature extraction layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_bias: bool = field( + default=False, metadata={"help": "include bias in conv encoder"} + ) + logit_temp: float = field( + default=0.1, metadata={"help": "temperature to divide logits by"} + ) + quantize_targets: bool = field( + default=False, metadata={"help": "use quantized targets"} + ) + quantize_input: bool = field( + default=False, metadata={"help": "use quantized inputs"} + ) + same_quantizer: bool = field( + default=False, metadata={"help": "use same quantizer for inputs and targets"} + ) + target_glu: bool = field( + default=False, metadata={"help": "adds projection + glu to targets"} + ) + feature_grad_mult: float = field( + default=1.0, metadata={"help": "multiply feature extractor var grads by this"} + ) + quantizer_depth: int = field( + default=1, + metadata={"help": "number of quantizer layers"}, + ) + quantizer_factor: int = field( + default=3, + metadata={ + "help": "dimensionality increase for inner quantizer layers (if depth > 1)" + }, + ) + latent_vars: int = field( + default=320, + metadata={"help": "number of latent variables V in each group of the codebook"}, + ) + latent_groups: int = field( + default=2, + metadata={"help": "number of groups G of latent variables in the codebook"}, + ) + latent_dim: int = field( + default=0, + metadata={ + "help": "if > 0, uses this dimensionality for latent variables. " + "otherwise uses final_dim / latent_groups" + }, + ) + + # masking + mask_length: int = field(default=10, metadata={"help": "mask length"}) + mask_prob: float = field( + default=0.65, metadata={"help": "probability of replacing a token with mask"} + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose mask length"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + require_same_masks: bool = field( + default=True, + metadata={ + "help": "whether to number of masked timesteps must be the same across all " + "examples in a batch" + }, + ) + mask_dropout: float = field( + default=0.0, + metadata={"help": "percent of masks to unmask for each sample"}, + ) + + # channel masking + mask_channel_length: int = field( + default=10, metadata={"help": "length of the mask for features (channels)"} + ) + mask_channel_prob: float = field( + default=0.0, metadata={"help": "probability of replacing a feature with 0"} + ) + mask_channel_before: bool = False + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, metadata={"help": "whether to allow channel masks to overlap"} + ) + mask_channel_min_space: int = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + + # negative selection + num_negatives: int = field( + default=100, + metadata={"help": "number of negative examples from the same sample"}, + ) + negatives_from_everywhere: bool = field( + default=False, + metadata={"help": "sample negatives from everywhere, not just masked states"}, + ) + cross_sample_negatives: int = field( + default=0, metadata={"help": "number of negative examples from the any sample"} + ) + codebook_negatives: int = field( + default=0, metadata={"help": "number of negative examples codebook"} + ) + + # positional embeddings + conv_pos: int = field( + default=128, + metadata={"help": "number of filters for convolutional positional embeddings"}, + ) + conv_pos_groups: int = field( + default=16, + metadata={"help": "number of groups for convolutional positional embedding"}, + ) + pos_conv_depth: int = field( + default=1, + metadata={"help": "depth of positional encoder network"}, + ) + + latent_temp: Tuple[float, float, float] = field( + default=(2, 0.5, 0.999995), + metadata={ + "help": "temperature for latent variable sampling. " + "can be tuple of 3 values (start, end, decay)" + }, + ) + max_positions: int = field(default=100000, metadata={"help": "Max positions"}) + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=2, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + crop_seq_to_multiple: int = field( + default=1, + metadata={ + "help": "crop convolutional feature extractor output such that the sequence length is divisible by multiple" + }, + ) + + # Conformer + depthwise_conv_kernel_size: int = field( + default=31, + metadata={ + "help": "depthwise-conv-kernel-size for convolution in conformer layer" + }, + ) + attn_type: str = field( + default="", + metadata={"help": "if espnet use ESPNET MHA"}, + ) + pos_enc_type: str = field( + default="abs", + metadata={"help": "Positional encoding type to use in conformer"}, + ) + fp16: bool = field(default=False, metadata={"help": "If fp16 is being used"}) + + # Adapter num + adp_num: int = field( + default=-1 + ) + adp_dim: int = field( + default=64 + ) + adp_act_fn: str = field( + default="relu" + ) + adp_trf_idx: str = field( + default="all", + ) + + +@register_model("wav2vec2", dataclass=Wav2Vec2Config) +class Wav2Vec2Model(BaseFairseqModel): + def __init__(self, cfg: Wav2Vec2Config): + super().__init__() + self.cfg = cfg + + feature_enc_layers = eval(cfg.conv_feature_layers) + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim and not cfg.quantize_input + else None + ) + + self.crop_seq_to_multiple = cfg.crop_seq_to_multiple + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_before = cfg.mask_channel_before + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + + self.quantizer = None + self.input_quantizer = None + + self.n_negatives = cfg.num_negatives + self.cross_sample_negatives = cfg.cross_sample_negatives + self.codebook_negatives = cfg.codebook_negatives + self.negatives_from_everywhere = cfg.negatives_from_everywhere + + self.logit_temp = cfg.logit_temp + + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + if cfg.quantize_targets: + vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else final_dim + self.quantizer = GumbelVectorQuantizer( + dim=self.embed, + num_vars=cfg.latent_vars, + temp=cfg.latent_temp, + groups=cfg.latent_groups, + combine_groups=False, + vq_dim=vq_dim, + time_first=True, + weight_proj_depth=cfg.quantizer_depth, + weight_proj_factor=cfg.quantizer_factor, + ) + self.project_q = nn.Linear(vq_dim, final_dim) + else: + self.project_q = nn.Linear(self.embed, final_dim) + + if cfg.quantize_input: + if cfg.same_quantizer and self.quantizer is not None: + vq_dim = final_dim + self.input_quantizer = self.quantizer + else: + vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else cfg.encoder_embed_dim + self.input_quantizer = GumbelVectorQuantizer( + dim=self.embed, + num_vars=cfg.latent_vars, + temp=cfg.latent_temp, + groups=cfg.latent_groups, + combine_groups=False, + vq_dim=vq_dim, + time_first=True, + weight_proj_depth=cfg.quantizer_depth, + weight_proj_factor=cfg.quantizer_factor, + ) + self.project_inp = nn.Linear(vq_dim, cfg.encoder_embed_dim) + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + encoder_cls = TransformerEncoder + if cfg.layer_type == "conformer" and cfg.pos_enc_type in ["rel_pos", "rope"]: + encoder_cls = ConformerEncoder + + self.encoder = encoder_cls(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + return state_dict + + @classmethod + def build_model(cls, cfg: Wav2Vec2Config, task=None): + """Build a new model instance.""" + + return cls(cfg) + + def apply_mask( + self, + x, + padding_mask, + mask_indices=None, + mask_channel_indices=None, + ): + B, T, C = x.shape + + if self.mask_channel_prob > 0 and self.mask_channel_before: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + if self.mask_prob > 0: + if mask_indices is None: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + require_same_masks=self.cfg.require_same_masks, + mask_dropout=self.cfg.mask_dropout, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x = index_put(x, mask_indices, self.mask_emb) + else: + mask_indices = None + + if self.mask_channel_prob > 0 and not self.mask_channel_before: + if mask_channel_indices is None: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x = index_put(x, mask_channel_indices, 0) + + return x, mask_indices + + def sample_negatives(self, y, num, padding_count=None): + + if self.n_negatives == 0 and self.cross_sample_negatives == 0: + return y.new(0) + + bsz, tsz, fsz = y.shape + y = y.view(-1, fsz) # BTC => (BxT)C + + # FIXME: what happens if padding_count is specified? + cross_high = tsz * bsz + high = tsz - (padding_count or 0) + with torch.no_grad(): + assert high > 1, f"{bsz,tsz,fsz}" + + if self.n_negatives > 0: + tszs = ( + buffered_arange(num) + .unsqueeze(-1) + .expand(-1, self.n_negatives) + .flatten() + ) + + neg_idxs = torch.randint( + low=0, high=high - 1, size=(bsz, self.n_negatives * num) + ) + neg_idxs[neg_idxs >= tszs] += 1 + + if self.cross_sample_negatives > 0: + tszs = ( + buffered_arange(num) + .unsqueeze(-1) + .expand(-1, self.cross_sample_negatives) + .flatten() + ) + + cross_neg_idxs = torch.randint( + low=0, + high=cross_high - 1, + size=(bsz, self.cross_sample_negatives * num), + ) + cross_neg_idxs[cross_neg_idxs >= tszs] += 1 + + if self.n_negatives > 0: + neg_idxs = neg_idxs + (torch.arange(bsz).unsqueeze(1) * high) + else: + neg_idxs = cross_neg_idxs + + if self.cross_sample_negatives > 0 and self.n_negatives > 0: + neg_idxs = torch.cat([neg_idxs, cross_neg_idxs], dim=1) + + negs = y[neg_idxs.view(-1)] + negs = negs.view( + bsz, num, self.n_negatives + self.cross_sample_negatives, fsz + ).permute( + 2, 0, 1, 3 + ) # to NxBxTxC + return negs, neg_idxs + + def compute_preds(self, x, y, negatives): + + neg_is_pos = (y == negatives).all(-1) + y = y.unsqueeze(0) + targets = torch.cat([y, negatives], dim=0) + + logits = torch.cosine_similarity(x.float(), targets.float(), dim=-1) + logits = logits / self.logit_temp + logits = logits.type_as(x) + + if is_xla_tensor(logits) or neg_is_pos.any(): + if not hasattr(self, "_inftensor"): + fillval = -float(2**30) + self._inftensor = ( + torch.tensor(fillval).to(x.device) + if is_xla_tensor(logits) + else float("-inf") + ) + logits[1:] = index_put(logits[1:], neg_is_pos, self._inftensor) + + return logits + + def _get_feat_extract_output_lengths(self, input_lengths: torch.LongTensor): + """ + Computes the output length of the convolutional layers + """ + + def _conv_out_length(input_length, kernel_size, stride): + return torch.floor((input_length - kernel_size) / stride + 1) + + conv_cfg_list = eval(self.cfg.conv_feature_layers) + + for i in range(len(conv_cfg_list)): + input_lengths = _conv_out_length( + input_lengths, conv_cfg_list[i][1], conv_cfg_list[i][2] + ) + + return input_lengths.to(torch.long) + + def forward( + self, + source, + padding_mask=None, + mask=True, + features_only=False, + layer=None, + mask_indices=None, + mask_channel_indices=None, + padding_count=None, + corpus_key=None, + ): + + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None and padding_mask.any(): + input_lengths = (1 - padding_mask.long()).sum(-1) + # apply conv formula to get real output_lengths + output_lengths = self._get_feat_extract_output_lengths(input_lengths) + + padding_mask = torch.zeros( + features.shape[:2], dtype=features.dtype, device=features.device + ) + + # these two operations makes sure that all values + # before the output lengths indices are attended to + padding_mask[ + ( + torch.arange(padding_mask.shape[0], device=padding_mask.device), + output_lengths - 1, + ) + ] = 1 + padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool() + else: + padding_mask = None + + time_steps_to_drop = features.size(1) % self.crop_seq_to_multiple + if time_steps_to_drop != 0: + features = features[:, :-time_steps_to_drop] + unmasked_features = unmasked_features[:, :-time_steps_to_drop] + if padding_mask is not None: + padding_mask = padding_mask[:, :-time_steps_to_drop] + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + num_vars = None + code_ppl = None + prob_ppl = None + curr_temp = None + + if self.input_quantizer: + q = self.input_quantizer(features, produce_targets=False) + features = q["x"] + num_vars = q["num_vars"] + code_ppl = q["code_perplexity"] + prob_ppl = q["prob_perplexity"] + curr_temp = q["temp"] + features = self.project_inp(features) + + if mask: + x, mask_indices = self.apply_mask( + features, + padding_mask, + mask_indices=mask_indices, + mask_channel_indices=mask_channel_indices, + ) + if not is_xla_tensor(x) and mask_indices is not None: + # tpu-comment: reducing the size in a dynamic way causes + # too many recompilations on xla. + y = unmasked_features[mask_indices].view( + unmasked_features.size(0), -1, unmasked_features.size(-1) + ) + else: + y = unmasked_features + else: + x = features + y = unmasked_features + mask_indices = None + + x, layer_results = self.encoder( + x, padding_mask=padding_mask, layer=layer, corpus_key=corpus_key + ) + + if features_only: + return { + "x": x, + "padding_mask": padding_mask, + "features": unmasked_features, + "layer_results": layer_results, + } + + if self.quantizer: + if self.negatives_from_everywhere: + q = self.quantizer(unmasked_features, produce_targets=False) + y = q["x"] + num_vars = q["num_vars"] + code_ppl = q["code_perplexity"] + prob_ppl = q["prob_perplexity"] + curr_temp = q["temp"] + y = self.project_q(y) + + negs, _ = self.sample_negatives( + y, + mask_indices[0].sum(), + padding_count=padding_count, + ) + y = y[mask_indices].view(y.size(0), -1, y.size(-1)) + + else: + q = self.quantizer(y, produce_targets=False) + y = q["x"] + num_vars = q["num_vars"] + code_ppl = q["code_perplexity"] + prob_ppl = q["prob_perplexity"] + curr_temp = q["temp"] + + y = self.project_q(y) + + negs, _ = self.sample_negatives( + y, + y.size(1), + padding_count=padding_count, + ) + + if self.codebook_negatives > 0: + cb_negs = self.quantizer.sample_from_codebook( + y.size(0) * y.size(1), self.codebook_negatives + ) + cb_negs = cb_negs.view( + self.codebook_negatives, y.size(0), y.size(1), -1 + ) # order doesnt matter + cb_negs = self.project_q(cb_negs) + negs = torch.cat([negs, cb_negs], dim=0) + else: + y = self.project_q(y) + + if self.negatives_from_everywhere: + negs, _ = self.sample_negatives( + unmasked_features, + y.size(1), + padding_count=padding_count, + ) + negs = self.project_q(negs) + else: + negs, _ = self.sample_negatives( + y, + y.size(1), + padding_count=padding_count, + ) + + if not is_xla_tensor(x): + # tpu-comment: reducing the size in a dynamic way causes + # too many recompilations on xla. + x = x[mask_indices].view(x.size(0), -1, x.size(-1)) + + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + + x = self.final_proj(x) + x = self.compute_preds(x, y, negs) + + result = { + "x": x, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + if prob_ppl is not None: + result["prob_perplexity"] = prob_ppl + result["code_perplexity"] = code_ppl + result["num_vars"] = num_vars + result["temp"] = curr_temp + + return result + + def quantize(self, x): + assert self.quantizer is not None + x = self.feature_extractor(x) + x = x.transpose(1, 2) + x = self.layer_norm(x) + return self.quantizer.forward_idx(x) + + def extract_features( + self, source, padding_mask, mask=False, layer=None, corpus_key=None + ): + res = self.forward( + source, + padding_mask, + mask=mask, + features_only=True, + layer=layer, + corpus_key=corpus_key, + ) + return res + + def get_logits(self, net_output): + logits = net_output["x"] + logits = logits.transpose(0, 2) + logits = logits.reshape(-1, logits.size(-1)) + return logits + + def get_targets(self, sample, net_output, expand_steps=True): + x = net_output["x"] + return x.new_zeros(x.size(1) * x.size(2), dtype=torch.long) + + def get_extra_losses(self, net_output): + pen = [] + + if "prob_perplexity" in net_output: + pen.append( + (net_output["num_vars"] - net_output["prob_perplexity"]) + / net_output["num_vars"] + ) + + if "features_pen" in net_output: + pen.append(net_output["features_pen"]) + + return pen + + def remove_pretraining_modules(self, last_layer=None): + self.quantizer = None + self.project_q = None + self.target_glu = None + self.final_proj = None + + if last_layer is not None: + self.encoder.layers = nn.ModuleList( + l for i, l in enumerate(self.encoder.layers) if i <= last_layer + ) + + +class ConvFeatureExtractionModel(nn.Module): + def __init__( + self, + conv_layers: List[Tuple[int, int, int]], + dropout: float = 0.0, + mode: str = "default", + conv_bias: bool = False, + ): + super().__init__() + + assert mode in {"default", "layer_norm"} + + def block( + n_in, + n_out, + k, + stride, + is_layer_norm=False, + is_group_norm=False, + conv_bias=False, + ): + def make_conv(): + conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) + nn.init.kaiming_normal_(conv.weight) + return conv + + assert ( + is_layer_norm and is_group_norm + ) == False, "layer norm and group norm are exclusive" + + if is_layer_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + nn.Sequential( + TransposeLast(), + Fp32LayerNorm(dim, elementwise_affine=True), + TransposeLast(), + ), + nn.GELU(), + ) + elif is_group_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + Fp32GroupNorm(dim, dim, affine=True), + nn.GELU(), + ) + else: + return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU()) + + in_d = 1 + self.conv_layers = nn.ModuleList() + for i, cl in enumerate(conv_layers): + assert len(cl) == 3, "invalid conv definition: " + str(cl) + (dim, k, stride) = cl + + self.conv_layers.append( + block( + in_d, + dim, + k, + stride, + is_layer_norm=mode == "layer_norm", + is_group_norm=mode == "default" and i == 0, + conv_bias=conv_bias, + ) + ) + in_d = dim + + def forward(self, x): + + # BxT -> BxCxT + x = x.unsqueeze(1) + + for conv in self.conv_layers: + x = conv(x) + + return x + + +def make_conv_pos(e, k, g, is_batch_norm=False): + pos_conv = nn.Conv1d( + e, + e, + kernel_size=k, + padding=k // 2, + groups=g, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (k * e)) + nn.init.normal_(pos_conv.weight, mean=0, std=std) + nn.init.constant_(pos_conv.bias, 0) + + if not is_batch_norm: + pos_conv = nn.utils.weight_norm(pos_conv, name="weight", dim=2) + pos_conv = nn.Sequential(pos_conv, SamePad(k), nn.GELU()) + else: + batch_norm = nn.BatchNorm1d(e) + pos_conv = nn.Sequential(batch_norm, pos_conv, SamePad(k), nn.GELU()) + + return pos_conv + + +class TransformerEncoder(nn.Module): + def build_encoder_layer(self, args: Wav2Vec2Config, **kwargs): + if args.layer_type == "transformer": + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + ) + elif args.layer_type == "conformer": + layer = ConformerWav2Vec2EncoderLayer( + embed_dim=self.embedding_dim, + ffn_embed_dim=args.encoder_ffn_embed_dim, + attention_heads=args.encoder_attention_heads, + dropout=args.dropout, + depthwise_conv_kernel_size=args.depthwise_conv_kernel_size, + activation_fn="swish", + attn_type=args.attn_type, + use_fp16=args.fp16, + pos_enc_type="abs", + ) + elif args.layer_type == "trf_adp": + use_adp = False + if args.adp_trf_idx == "all": + use_adp = True + else: + adp_trf_idx = list(range(*[int(g) for g in args.adp_trf_idx.split(":")])) + if kwargs.get("layer_idx", None) in adp_trf_idx: + use_adp = True + if use_adp: + layer = TransformerSentenceEncoderWithAdapterLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + adapter_num=args.adp_num, + adapter_dim=args.adp_dim, + adapter_act_fn=args.adp_act_fn, + ) + else: + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + ) + + layer = fsdp_wrap(layer) + if args.checkpoint_activations: + layer = checkpoint_wrapper(layer) + return layer + + def __init__(self, args: Wav2Vec2Config, skip_pos_conv: bool = False, override_encoder_layer: int = None): + super().__init__() + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + + pos_conv_depth = getattr(args, "pos_conv_depth", 1) + if pos_conv_depth > 1: + num_layers = args.pos_conv_depth + k = max(3, args.conv_pos // num_layers) + + def make_conv_block(e, k, g, l): + return nn.Sequential( + *[ + nn.Sequential( + nn.Conv1d( + e, + e, + kernel_size=k, + padding=k // 2, + groups=g, + ), + SamePad(k), + TransposeLast(), + LayerNorm(e, elementwise_affine=False), + TransposeLast(), + nn.GELU(), + ) + for _ in range(l) + ] + ) + + self.pos_conv = make_conv_block( + self.embedding_dim, k, args.conv_pos_groups, num_layers + ) + elif skip_pos_conv: + self.pos_conv = None + else: + self.pos_conv = make_conv_pos( + self.embedding_dim, + args.conv_pos, + args.conv_pos_groups, + is_batch_norm=args.conv_pos_batch_norm + if hasattr(args, "conv_pos_batch_norm") + else False, + ) + + if override_encoder_layer is None: + encoder_layers = args.encoder_layers + else: + encoder_layers = override_encoder_layer + + self.layers = nn.ModuleList( + [self.build_encoder_layer(args, layer_idx=ii) for ii in range(encoder_layers)] + ) + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None, corpus_key=None): + x, layer_results = self.extract_features( + x, padding_mask, layer, corpus_key=corpus_key + ) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features( + self, + x, + padding_mask=None, + tgt_layer=None, + min_layer=0, + corpus_key=None, + ): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + if self.pos_conv is not None: + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + layer_results = [] + r = None + + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() if self.layerdrop > 0 else 1 + if not self.training or (dropout_probability > self.layerdrop): + layer_check = layer + if isinstance(layer, FullyShardedDataParallel): + layer_check = layer.unwrapped_module + if (corpus_key is None) or ( + not isinstance(layer_check, ( + TransformerSentenceEncoderWithAdapterLayer, + ) + ) + ): + x, (z, lr) = layer( + x, self_attn_padding_mask=padding_mask, need_weights=False + ) + else: + x, (z, lr) = layer( + x, + self_attn_padding_mask=padding_mask, + need_weights=False, + corpus_key=corpus_key, + ) + if i >= min_layer: + layer_results.append((x, z, lr)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + def undo_pad(a, b, c): + return ( + a[:-pad_length], + b[:-pad_length] if b is not None else b, + c[:-pad_length], + ) + + layer_results = [undo_pad(*u) for u in layer_results] + + return x, layer_results + + def max_positions(self): + """Maximum output length supported by the encoder.""" + return self.args.max_positions + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + return state_dict + + +class ConformerEncoder(TransformerEncoder): + def build_encoder_layer(self, args): + layer = ConformerWav2Vec2EncoderLayer( + embed_dim=self.embedding_dim, + ffn_embed_dim=args.encoder_ffn_embed_dim, + attention_heads=args.encoder_attention_heads, + dropout=args.dropout, + depthwise_conv_kernel_size=args.depthwise_conv_kernel_size, + activation_fn="swish", + attn_type=args.attn_type, + pos_enc_type=args.pos_enc_type, + use_fp16=args.fp16, # only used for rope + ) + layer = fsdp_wrap(layer) + if args.checkpoint_activations: + layer = checkpoint_wrapper(layer) + return layer + + def __init__(self, args): + super().__init__(args) + self.args = args + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.pos_enc_type = args.pos_enc_type + max_source_positions = self.max_positions() + + if self.pos_enc_type == "rel_pos": + self.embed_positions = RelPositionalEncoding( + max_source_positions, self.embedding_dim + ) + elif self.pos_enc_type == "rope": + self.embed_positions = None + else: + raise Exception("Unsupported positional encoding type") + + self.layers = nn.ModuleList( + [self.build_encoder_layer(args) for _ in range(args.encoder_layers)] + ) + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + + self.apply(init_bert_params) + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # B X T X C here + position_emb = None + if self.pos_enc_type == "rel_pos": + position_emb = self.embed_positions(x) + + if not self.layer_norm_first: + x = self.layer_norm(x) + + x = F.dropout(x, p=self.dropout, training=self.training) + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer( + x, + self_attn_padding_mask=padding_mask, + need_weights=False, + position_emb=position_emb, + ) + if tgt_layer is not None: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + need_weights=False, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + + layer_result = x + + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + need_weights=False, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + + layer_result = x + + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, (attn, layer_result) + + +class AdapterFast(nn.Module): + def __init__(self, adapter_num, input_dim, hidden_dim, act_fn): + """ + Implements adapter modules directly with 3D tensor weight as parameters + and without using ModuleList orto speed up training throughput. + """ + super().__init__() + + self.adapter_num = adapter_num + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.W_a = nn.Parameter(torch.empty(adapter_num, hidden_dim, input_dim)) + self.W_b = nn.Parameter(torch.empty(adapter_num, input_dim, hidden_dim)) + self.b_a = nn.Parameter(torch.empty(adapter_num, hidden_dim)) + self.b_b = nn.Parameter(torch.empty(adapter_num, input_dim)) + + self.ln_W = nn.Parameter(torch.empty(adapter_num, input_dim)) + self.ln_b = nn.Parameter(torch.empty(adapter_num, input_dim)) + self.act_fn = nn.Identity() + if act_fn == "relu": + self.act_fn = nn.ReLU() + elif act_fn == "gelu": + self.act_fn = nn.GELU() + elif act_fn == "selu": + self.act_fn = nn.SELU() + else: + raise ValueError(f"unsupported {act_fn}") + + + self.input_dim = input_dim + self.reset_parameters() + + def reset_parameters(self): + for ii in range(self.adapter_num): + nn.init.kaiming_uniform_(self.W_a[ii], a=math.sqrt(5)) + nn.init.kaiming_uniform_(self.W_b[ii], a=math.sqrt(5)) + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_a[ii]) + bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0 + nn.init.uniform_(self.b_a[ii], -bound, bound) + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_b[ii]) + bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0 + nn.init.uniform_(self.b_b[ii], -bound, bound) + + nn.init.ones_(self.ln_W) + nn.init.zeros_(self.ln_b) + + def forward(self, x, adapter_id): + ii = adapter_id + h = x + h = F.layer_norm(h, (self.input_dim, ), self.ln_W[ii], self.ln_b[ii]) + h = F.linear(h, self.W_a[ii], self.b_a[ii]) + h = self.act_fn(h) + h = F.linear(h, self.W_b[ii], self.b_b[ii]) + outputs = h + return outputs + + def extra_repr(self): + return ('adapter={}, input_dim={}, hidden_dim={}'.format(self.adapter_num, self.input_dim, self.hidden_dim)) + + + +class TransformerSentenceEncoderWithAdapterLayer(TransformerSentenceEncoderLayer): + """ + Implements a Transformer Encoder Layer with adapters used in BERT/XLM style pre-trained + models. An adapter module is added along with vanilla Transformer module. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + adapter_num=201, + adapter_dim=64, + adapter_act_fn="relu", + ) -> None: + + super().__init__( + embedding_dim=embedding_dim, + ffn_embedding_dim=ffn_embedding_dim, + num_attention_heads=num_attention_heads, + dropout=dropout, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + activation_fn=activation_fn, + layer_norm_first=layer_norm_first, + + ) + + self.adapter_num = adapter_num + self.adapter_dim = adapter_dim + self.adapter_layer = AdapterFast(adapter_num, self.embedding_dim, self.adapter_dim, adapter_act_fn) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + corpus_key=None, + ): + + x, (attn, layer_result) = super().forward( + x=x, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_weights=need_weights, + att_args=att_args, + ) + assert corpus_key is not None + assert len(set(corpus_key)) == 1, f"corpus_key items are not same {corpus_key}" + y = self.adapter_layer(x, corpus_key[0]) + x = x + y + return x, (attn, layer_result) diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py b/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py new file mode 100644 index 0000000..0403efe --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py @@ -0,0 +1,878 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import copy +import logging +import math +import re +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Any, Optional + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import II, MISSING, open_dict + +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import ( + BaseFairseqModel, + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, +) +from fairseq.models.wav2vec.wav2vec2 import MASKING_DISTRIBUTION_CHOICES, LAYER_TYPE_CHOICES, AdapterFast +from fairseq.modules import LayerNorm, PositionalEmbedding, TransformerDecoderLayer +from fairseq.tasks import FairseqTask + +logger = logging.getLogger(__name__) + + +@dataclass +class Wav2Vec2AsrConfig(FairseqDataclass): + w2v_path: str = field( + default=MISSING, metadata={"help": "path to wav2vec 2.0 model"} + ) + no_pretrained_weights: bool = field( + default=False, metadata={"help": "if true, does not load pretrained weights"} + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + + final_dropout: float = field( + default=0.0, + metadata={"help": "dropout after transformer and before final projection"}, + ) + dropout: float = field( + default=0.0, metadata={"help": "dropout probability inside wav2vec 2.0 model"} + ) + attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights inside wav2vec 2.0 model" + }, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN inside wav2vec 2.0 model" + }, + ) + + # masking + apply_mask: bool = field( + default=False, metadata={"help": "apply masking during fine-tuning"} + ) + mask_length: int = field( + default=10, metadata={"help": "repeat the mask indices multiple times"} + ) + mask_prob: float = field( + default=0.5, + metadata={ + "help": "probability of replacing a token with mask (normalized by length)" + }, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose masks"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: Optional[int] = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + require_same_masks: bool = field( + default=True, + metadata={ + "help": "whether to number of masked timesteps must be the same across all " + "examples in a batch" + }, + ) + mask_dropout: float = field( + default=0.0, + metadata={"help": "percent of masks to unmask for each sample"}, + ) + + # channel masking + mask_channel_length: int = field( + default=10, metadata={"help": "length of the mask for features (channels)"} + ) + mask_channel_prob: float = field( + default=0.0, metadata={"help": "probability of replacing a feature with 0"} + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument (used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, metadata={"help": "whether to allow channel masks to overlap"} + ) + freeze_finetune_updates: int = field( + default=0, metadata={"help": "dont finetune wav2vec for this many updates"} + ) + feature_grad_mult: float = field( + default=0.0, metadata={"help": "reset feature grad mult in wav2vec 2.0 to this"} + ) + layerdrop: float = field( + default=0.0, metadata={"help": "probability of dropping a layer in wav2vec 2.0"} + ) + drop_path: float = 0 + mask_channel_min_space: Optional[int] = field( + default=1, + metadata={"help": "min space between spans (if no overlap is enabled)"}, + ) + mask_channel_before: bool = False + normalize: bool = II("task.normalize") + update_alibi: bool = True + data: str = II("task.data") + # this holds the loaded wav2vec args + w2v_args: Any = None + offload_activations: bool = field( + default=False, metadata={"help": "offload_activations"} + ) + min_params_to_wrap: int = field( + default=int(1e8), + metadata={ + "help": "minimum number of params for a layer to be wrapped with FSDP() when " + "training with --ddp-backend=fully_sharded. Smaller values will " + "improve memory efficiency, but may make torch.distributed " + "communication less efficient due to smaller input sizes. This option " + "is set to 0 (i.e., always wrap) when --checkpoint-activations or " + "--offload-activations are passed." + }, + ) + + checkpoint_activations: bool = field( + default=False, + metadata={"help": "recompute activations and save memory for extra compute"}, + ) + ddp_backend: str = II("distributed_training.ddp_backend") + + zero_mask: bool = False + load_ema: bool = False + + layer_decay: float = 1 + + + layer_type: LAYER_TYPE_CHOICES = field( + default="transformer", metadata={"help": "layer type in encoder"} + ) + # Adapter num + adp_num: int = field( + default=-1 + ) + adp_dim: int = field( + default=64 + ) + adp_act_fn: str = field( + default="relu" + ) + adp_trf_idx: str = field( + default="all", + ) + + freeze_regex: Optional[str] = field( + default=None, + ) + +@dataclass +class Wav2Vec2CtcConfig(Wav2Vec2AsrConfig): + blank_weight: float = 0 + blank_mode: str = "add" + + +@register_model("wav2vec_ctc", dataclass=Wav2Vec2CtcConfig) +class Wav2VecCtc(BaseFairseqModel): + def __init__(self, cfg: Wav2Vec2CtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + self.blank_weight = cfg.blank_weight + self.blank_mode = cfg.blank_mode + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: Wav2Vec2CtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = Wav2VecEncoder(cfg, len(task.target_dictionary)) + return cls(cfg, w2v_encoder) + + def get_logits(self, net_output, normalize=False): + logits = net_output["encoder_out"] + if self.blank_weight != 0: + if self.blank_mode == "add": + logits[..., 0] += self.blank_weight + elif self.blank_mode == "set": + logits[..., 0] = self.blank_weight + else: + raise Exception(f"invalid blank mode {self.blank_mode}") + + if net_output["padding_mask"] is not None and net_output["padding_mask"].any(): + number_of_classes = logits.size(-1) + masking_tensor = torch.ones( + number_of_classes, device=logits.device + ) * float("-inf") + masking_tensor[0] = 0 + + if logits.size(0) > net_output["padding_mask"].size(1): + net_output["padding_mask"] = F.pad( + net_output["padding_mask"], (1, 0), value=False + ) + + logits[net_output["padding_mask"].T] = masking_tensor.type_as(logits) + + if normalize: + logits = utils.log_softmax(logits.float(), dim=-1) + + return logits + + def get_normalized_probs(self, net_output, log_probs): + """Get normalized probabilities (or log probs) from a net's output.""" + + logits = self.get_logits(net_output) + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + +@dataclass +class Wav2Vec2Seq2SeqConfig(Wav2Vec2AsrConfig): + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"}) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each decoder block"} + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings (outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.0, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + autoregressive: bool = II("task.autoregressive") + + +@register_model("wav2vec_seq2seq", dataclass=Wav2Vec2Seq2SeqConfig) +class Wav2Vec2Seq2SeqModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def build_model(cls, cfg: Wav2Vec2Seq2SeqConfig, task: FairseqTask): + """Build a new model instance.""" + + assert ( + cfg.autoregressive + ), "Please set task.autoregressive=true for seq2seq asr models" + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + return emb + + decoder_embed_tokens = build_embedding(tgt_dict, cfg.decoder_embed_dim) + + encoder = cls.build_encoder(cfg) + decoder = cls.build_decoder(cfg, tgt_dict, decoder_embed_tokens) + + return Wav2Vec2Seq2SeqModel(encoder, decoder) + + @classmethod + def build_encoder(cls, cfg: Wav2Vec2AsrConfig): + return Wav2VecEncoder(cfg) + + @classmethod + def build_decoder(cls, cfg: Wav2Vec2Seq2SeqConfig, tgt_dict, embed_tokens): + return TransformerDecoder(cfg, tgt_dict, embed_tokens) + + def forward(self, **kwargs): + encoder_out = self.encoder(**kwargs) + decoder_out = self.decoder(encoder_out=encoder_out, **kwargs) + return decoder_out + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + +class Wav2VecEncoder(FairseqEncoder): + def __init__(self, cfg: Wav2Vec2AsrConfig, output_size=None): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "require_same_masks": getattr(cfg, "require_same_masks", True), + "pct_holes": getattr(cfg, "mask_dropout", 0), + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_before": cfg.mask_channel_before, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "checkpoint_activations": cfg.checkpoint_activations, + "offload_activations": cfg.offload_activations, + "min_params_to_wrap": cfg.min_params_to_wrap, + # d2v multi args + "encoder_dropout": cfg.dropout, + "drop_path": getattr(cfg, "drop_path", 0), + "mask_dropout": getattr(cfg, "mask_dropout", 0), + "zero_mask": getattr(cfg, "zero_mask", False), + "local_grad_mult": cfg.feature_grad_mult, + "layerdrop": cfg.layerdrop, + "prenet_layerdrop": cfg.layerdrop, + "prenet_dropout": cfg.dropout, + "post_mlp_drop": cfg.dropout, + "encoder_zero_mask": getattr(cfg, "zero_mask", False), + "inverse_mask": False, + "learned_alibi_scale": getattr(cfg, "update_alibi", True), + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + w2v_args.criterion = None + w2v_args.lr_scheduler = None + + cfg.w2v_args = w2v_args + + logger.info(w2v_args) + + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + self.is_d2v_multi = "data2vec_multi" in w2v_args.model.get("_name", None) + + if not self.is_d2v_multi: + model_normalized = w2v_args.task.get( + "normalize", w2v_args.model.get("normalize", False) + ) + assert cfg.normalize == model_normalized, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for both pre-training and here" + ) + + with open_dict(w2v_args): + args_replacement = ["checkpoint_activations", "layer_type", + "adp_num", "adp_dim", + "adp_act_fn", "adp_trf_idx"] + for _args in args_replacement: + if hasattr(cfg, _args) and getattr(cfg, _args, None) is not None: + w2v_args.model[_args] = getattr(cfg, _args, None) + + if hasattr(cfg, "checkpoint_activations") and cfg.checkpoint_activations: + with open_dict(w2v_args): + w2v_args.model.checkpoint_activations = cfg.checkpoint_activations + + w2v_args.task.data = cfg.data + task = tasks.setup_task(w2v_args.task, from_checkpoint=True) + model = task.build_model(w2v_args.model, from_checkpoint=True) + model.remove_pretraining_modules() + d = w2v_args.model.encoder_embed_dim + else: + assert cfg.normalize + + if hasattr(w2v_args.task, "audio"): + w2v_args.task.audio.data = cfg.data + else: + w2v_args.task.data = cfg.data + task = tasks.setup_task(w2v_args.task, from_checkpoint=True) + + model = task.build_model(w2v_args.model, from_checkpoint=True) + + model.remove_pretraining_modules(modality="audio") + d = w2v_args.model.embed_dim + + if state is not None and not cfg.no_pretrained_weights: + if cfg.load_ema: + assert "_ema" in state["model"] + for k in state["model"]["_ema"]: + mk = "encoder." + k + assert mk in state["model"], mk + state["model"][mk] = state["model"]["_ema"][k] + self.load_model_weights(state, model, cfg) + + super().__init__(task.source_dictionary) + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + targ_d = None + self.proj = None + + if output_size is not None: + targ_d = output_size + elif getattr(cfg, "decoder_embed_dim", d) != d: + targ_d = cfg.decoder_embed_dim + + if targ_d is not None: + self.proj = Linear(d, targ_d) + + if cfg.freeze_regex is not None: + self.freeze_regex(cfg.freeze_regex) + + layer_decay = getattr(cfg, "layer_decay", 1) + if layer_decay < 1: + mod_encs = list(model.modality_encoders.values()) + assert len(mod_encs) == 1, len(mod_encs) + blocks = list(mod_encs[0].context_encoder.blocks) + list(model.blocks) + num_layers = len(blocks) + 1 + layer_scales = list( + layer_decay ** (num_layers - i) for i in range(num_layers + 1) + ) + + for i, b in enumerate(blocks): + lid = i + 1 + if layer_scales[lid] == 1.0: + continue + + for n, p in b.named_parameters(): + optim_override = getattr(p, "optim_overrides", {}) + if "optimizer" not in optim_override: + optim_override["optimizer"] = {} + + optim_override["optimizer"]["lr_scale"] = layer_scales[lid] + p.optim_overrides = optim_override + + def freeze_regex(self, pattern): + unfrozen_names = [] + for name, param in self.named_parameters(): + if re.fullmatch(pattern, name) is not None: + param.requires_grad_(False) + else: + unfrozen_names.append(name) + + def load_model_weights(self, state, model, cfg): + if cfg.ddp_backend == "fully_sharded": + from fairseq.distributed import FullyShardedDataParallel + + for name, module in model.named_modules(): + if "encoder.layers" in name and len(name.split(".")) == 3: + # Only for layers, we do a special handling and load the weights one by one + # We dont load all weights together as that wont be memory efficient and may + # cause oom + new_dict = { + k.replace(name + ".", ""): v + for (k, v) in state["model"].items() + if name + "." in k + } + assert isinstance(module, FullyShardedDataParallel) + with module.summon_full_params(): + module.load_state_dict(new_dict, strict=True) + module._reset_lazy_init() + + # Once layers are loaded, filter them out and load everything else. + r = re.compile("encoder.layers.\d.") + filtered_list = list(filter(r.match, state["model"].keys())) + + new_big_dict = { + k: v for (k, v) in state["model"].items() if k not in filtered_list + } + + model.load_state_dict(new_big_dict, strict=False) + else: + to_delete = {"_ema", "target_proj", "decoder"} + for k in to_delete: + if k in state["model"]: + del state["model"][k] + + if hasattr(model, "modality_encoders"): + if "modality_encoders.AUDIO.encoder_mask" not in state["model"]: + model.modality_encoders["AUDIO"].encoder_mask = None + elif not cfg.zero_mask: + model.modality_encoders["AUDIO"].encoder_mask = None + del state["model"]["modality_encoders.AUDIO.encoder_mask"] + + for k in list(state["model"].keys()): + if k.startswith("modality_encoders.") and not k.startswith( + "modality_encoders.AUDIO" + ): + del state["model"][k] + + print(model) + model.load_state_dict(state["model"], strict=True) + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, **kwargs): + + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + if "corpus_key" in kwargs: + w2v_args["corpus_key"] = kwargs["corpus_key"] + + if self.is_d2v_multi: + w2v_args["mode"] = "AUDIO" + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + res = self.w2v_model.extract_features(**w2v_args) + + x = res["x"] + padding_mask = res["padding_mask"] + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "padding_mask": padding_mask, # B x T, + "layer_results": res["layer_results"], + } + + def forward_torchscript(self, net_input): + if torch.jit.is_scripting(): + return self.forward(net_input["source"], net_input["padding_mask"]) + else: + return self.forward_non_torchscript(net_input) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select( + 1, new_order + ) + if encoder_out["padding_mask"] is not None: + encoder_out["padding_mask"] = encoder_out["padding_mask"].index_select( + 0, new_order + ) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg: Wav2Vec2Seq2SeqConfig, + dictionary, + embed_tokens, + no_encoder_attn=False, + ): + super().__init__(dictionary) + + self.dropout = cfg.decoder_dropout + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder_embed_dim + self.output_embed_dim = cfg.decoder_embed_dim + + self.layerdrop = cfg.decoder_layerdrop + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder_learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + + # TODO: update this when transformer gets converted to dataclass configs + transformer_cfg = copy.deepcopy(cfg) + with open_dict(transformer_cfg): + transformer_cfg.dropout = transformer_cfg.decoder_dropout + transformer_cfg.attention_dropout = ( + transformer_cfg.decoder_attention_dropout + ) + transformer_cfg.activation_dropout = ( + transformer_cfg.decoder_activation_dropout + ) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + TransformerDecoderLayer(transformer_cfg, no_encoder_attn) + for _ in range(transformer_cfg.decoder_layers) + ] + ) + + if not self.share_input_output_embed: + self.embed_out = nn.Parameter( + torch.Tensor(len(dictionary), self.output_embed_dim) + ) + nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim**-0.5) + + if transformer_cfg.decoder_normalize_before: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + if type(prev_output_tokens) == list: + max_len = max((len(x) for x in prev_output_tokens)) + tmp = torch.zeros( + [len(prev_output_tokens), max_len], device=prev_output_tokens[0].device + ) + for (i, p) in enumerate(prev_output_tokens): + tmp[i, : len(p)] = p + prev_output_tokens = tmp + + prev_output_tokens = prev_output_tokens.long() + x, extra = self.extract_features( + prev_output_tokens, encoder_out, incremental_state + ) + x = self.output_layer(x) + return x, extra + + def extract_features( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + + # embed positions + positions = ( + self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + + inner_states = [x] + + # decoder layers + self_attn_padding_mask = None + if prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + for layer in self.layers: + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, attn, _ = layer( + x, + encoder_out["encoder_out"] if encoder_out is not None else None, + encoder_out["padding_mask"] if encoder_out is not None else None, + incremental_state, + self_attn_mask=self.buffered_future_mask(x) + if incremental_state is None + else None, + self_attn_padding_mask=self_attn_padding_mask, + ) + inner_states.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, {"attn": attn, "inner_states": inner_states} + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + # project back to size of vocabulary + if self.share_input_output_embed: + return F.linear(features, self.embed_tokens.weight) + else: + return F.linear(features, self.embed_out) + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2_classification.py b/fairseq/fairseq/models/wav2vec/wav2vec2_classification.py new file mode 100644 index 0000000..c9bbaab --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/wav2vec2_classification.py @@ -0,0 +1,348 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import logging +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Any, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from omegaconf import II, MISSING, open_dict + +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.wav2vec.wav2vec2 import MASKING_DISTRIBUTION_CHOICES, Wav2Vec2Config +from fairseq.models.wav2vec.wav2vec2_asr import Embedding, Linear, Wav2VecEncoder, Wav2Vec2AsrConfig +from fairseq.tasks import FairseqTask + +logging.basicConfig(level=logging.DEBUG) + + +@dataclass +class Wav2Vec2ClassificationConfig(Wav2Vec2AsrConfig): + latent_embed_dim: Optional[int] = field( + default=None, metadata={"help": "latent dim (encoder w2v -> latent -> class"} + ) + pooling: str = field( + default="first_token", + metadata={"help": "pooling layer choices"}, + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + + +@register_model("wav2vec_classification", dataclass=Wav2Vec2ClassificationConfig) +class Wav2VecClassification(BaseFairseqModel): + # TODO: Can be shared/merged with ASR model class as w2v_encoder params are common. + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + w2v_encoder: BaseFairseqModel, + pooling_layer, + ): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + self.pooling_layer = pooling_layer + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: Wav2Vec2ClassificationConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = Wav2VecEncoder(cfg, None) + pooling_layer = get_pooling_layer( + cfg, + w2v_encoder.w2v_model.encoder.layers[-1].embedding_dim, + len(task.target_dictionary), + len(w2v_encoder.w2v_model.encoder.layers), + ) + return cls(cfg, w2v_encoder, pooling_layer) + + def get_normalized_probs(self, net_output, log_probs): + """Get normalized probabilities (or log probs) from a net's output.""" + logits = net_output + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + return net_output + + def forward(self, **kwargs): + encoder_out_dict = self.w2v_encoder(**kwargs) + w2v_encoder_out = encoder_out_dict["encoder_out"] # TxBxC + w2v_encoder_padding_mask = encoder_out_dict["padding_mask"] # BxT + # w2v_encoder_layer_results = encoder_out_dict["layer_results"] + return self.pooling_layer( + last_layer_feats=w2v_encoder_out, + padding_mask=w2v_encoder_padding_mask, + # all_layer_feats=w2v_encoder_layer_results, + ) + + # def forward_latent(self, **kwargs): + # encoder_out_dict = self.w2v_encoder(**kwargs) + # w2v_encoder_out = encoder_out_dict["encoder_out"] + # w2v_encoder_padding_mask = encoder_out_dict["encoder_padding_mask"] + # w2v_encoder_layer_results = encoder_out_dict["layer_results"] + # return self.pooling_layer.forward_latent( + # last_layer_feats=w2v_encoder_out, + # padding_mask=w2v_encoder_padding_mask, + # all_layer_feats=w2v_encoder_layer_results, + # ) + + +def get_pooling_layer( + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + encoder_layers: int, +): + assert cfg.pooling == 'mean' + if cfg.pooling == "first_token": + return FirstToken(cfg, encoder_embed_dim, num_targets) + # elif cfg.pooling == "mean": + # return MeanPooling(cfg, encoder_embed_dim, num_targets) + elif cfg.pooling == "mean": + return MeanPoolingFast(cfg, encoder_embed_dim, num_targets) + elif cfg.pooling == "mean_amsoftmax": + return MeanPoolingFastAMSoftmax(cfg, encoder_embed_dim, num_targets) + elif cfg.pooling == "max": + return MaxPoolingFast(cfg, encoder_embed_dim, num_targets) + elif cfg.pooling == "elmo": + return LayerWeightedMeanPooling( + cfg, encoder_embed_dim, num_targets, encoder_layers + ) + else: + raise NotImplementedError(f"{cfg.pooling} has not been implemented yet.") + + +class Pooling(nn.Module): + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + ): + super().__init__() + self.projection = Linear(encoder_embed_dim, num_targets) + + def forward(self, last_layer_feats, **kwargs): + raise NotImplementedError() + + +class FirstToken(Pooling): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, last_layer_feats, **kwargs): + return self.projection(last_layer_feats[:, 0]) + + +# class MeanPooling(Pooling): +# def __init__( +# self, +# cfg: Wav2VecClassificationConfig, +# encoder_embed_dim: int, +# num_targets: int, +# **kwargs, +# ): +# super().__init__(cfg, encoder_embed_dim, num_targets) +# self.activation_fn = utils.get_activation_fn(cfg.activation_fn) +# self.linear = Linear(encoder_embed_dim, encoder_embed_dim) + +# def forward(self, last_layer_feats, padding_mask, **kwargs): +# # last_layer_feats: [BxTxD] +# # padding_mask: [BxT] +# last_layer_feats = self.linear(self.activation_fn(last_layer_feats)) +# input_lengths = (1 - padding_mask.long()).sum(-1) +# pooled_feature_list = [] +# for i in range(len(last_layer_feats)): +# length = input_lengths[i] +# pooled_feature = torch.mean(last_layer_feats[i][:length], dim=0) +# pooled_feature_list.append(pooled_feature) +# return self.projection(torch.stack(pooled_feature_list)) + + +def fn_mean(x, mask): + """ + Args: + x: TxBxD + mask: BxT + Return: + y: BxD + """ + if mask is not None: + mask = mask.t()[:, :, None] + return (x * mask).sum(0) / mask.sum(0) + else: + return x.sum(0) / x.shape[0] + + +class MeanPoolingFast(nn.Module): + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + **kwargs, + ): + super().__init__() + self.activation_fn = utils.get_activation_fn(cfg.activation_fn) + self.latent_embed_dim = ( + cfg.latent_embed_dim + if cfg.latent_embed_dim is not None + else encoder_embed_dim + ) + logging.debug(f"| {self.latent_embed_dim=}") + self.linear = Linear(encoder_embed_dim, self.latent_embed_dim) + self.projection = Linear(self.latent_embed_dim, num_targets) + + def forward(self, last_layer_feats, padding_mask, **kwargs): + """ + Arguments + features - [TxBxD] Acoustic feature with shape + padding_mask - [BxT] Padding Mask + """ + if padding_mask is not None: + feat_mask = (~padding_mask).to(last_layer_feats.dtype) + else: + feat_mask = None + feat = self.linear(last_layer_feats) + feat = fn_mean(feat, feat_mask) + feat = self.activation_fn(feat) + return self.projection(feat) + + def forward_latent(self, last_layer_feats, padding_mask, **kwargs): + """ + Arguments + features - [TxBxD] Acoustic feature with shape + padding_mask - [BxT] Padding Mask + """ + if padding_mask is not None: + feat_mask = (~padding_mask).to(last_layer_feats.dtype) + else: + feat_mask = None + feat = self.linear(last_layer_feats) + feat = fn_mean(feat, feat_mask) + return feat + + +class MeanPoolingFastAMSoftmax(MeanPoolingFast): + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + **kwargs, + ): + super().__init__(cfg, encoder_embed_dim, num_targets, **kwargs) + self.projection = Linear(self.latent_embed_dim, num_targets, bias=False) + nn.init.xavier_normal_(self.projection.weight, gain=1) + + def forward(self, last_layer_feats, padding_mask, **kwargs): + + """ + Arguments + features - [BxTxD] Acoustic feature with shape + padding_mask - [BxT] Padding Mask + """ + feat_mask = (~padding_mask).to(last_layer_feats.dtype) # T,B -> B,T + feat = self.linear(last_layer_feats) # B,T,D + feat = fn_mean(feat, feat_mask) # B,D + feat = self.activation_fn(feat) + # normalize feat + feat_norm = F.normalize(feat, p=2, dim=-1) # B,D + weight_norm = F.normalize(self.projection.weight.t(), p=2, dim=-1) # D,K + cos_fw = feat_norm @ weight_norm + return cos_fw + + +def fn_max(x, mask): + """ + Args: + x: TxBxD + mask: BxT + Return: + y: BxD + """ + mask = mask.t()[:, :, None].to(torch.bool) + return x.masked_fill(~mask, -1e-8).max(0)[0] + + +class MaxPoolingFast(Pooling): + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + **kwargs, + ): + super().__init__(cfg, encoder_embed_dim, num_targets) + self.activation_fn = utils.get_activation_fn(cfg.activation_fn) + self.linear = Linear(encoder_embed_dim, encoder_embed_dim) + + def forward(self, last_layer_feats, padding_mask, **kwargs): + + """ + Arguments + features - [TxBxD] Acoustic feature with shape + padding_mask - [BxT] Padding Mask + """ + feat_mask = (~padding_mask).to(last_layer_feats.dtype) + feat = self.linear(last_layer_feats) + feat = fn_max(feat, feat_mask) + feat = self.activation_fn(feat) + return self.projection(feat) + + +class LayerWeightedMeanPooling(MeanPoolingFast): + """Elmo-style weighted average representation.""" + + def __init__( + self, + cfg: Wav2Vec2ClassificationConfig, + encoder_embed_dim: int, + num_targets: int, + encoder_layers: int, + ): + super().__init__(cfg, encoder_embed_dim, num_targets) + self.num_layers = encoder_layers + self.weights = nn.Parameter(torch.ones(encoder_layers)) + + def forward(self, last_layer_feats, padding_mask, all_layer_feats): + # last_layer_feats: [BxTxD] + # padding_mask: [BxT] + if not self.training: + msg = ( + f"Number of layers in input features = {len(all_layer_feats)}." + f" Expected {self.num_layers} layers." + ) + assert len(all_layer_feats) == self.num_layers, msg + + # Stack up all layers and reshape to (num_layers, features) + all_layer_feats_stacked = torch.stack(all_layer_feats, dim=0) + num_layers, *original_feat_shape = all_layer_feats_stacked.shape + all_layer_feats_stacked_flat = all_layer_feats_stacked.view(num_layers, -1) + + # Weighted average + normalized_weights = F.softmax(self.weights, dim=-1) + weighted_avg_features = ( + normalized_weights.unsqueeze(-1) * all_layer_feats_stacked_flat + ).sum(dim=0) + weighted_avg_features = weighted_avg_features.view(*original_feat_shape) + + # Mean Pooling on weighted average features. + return super().forward(weighted_avg_features, padding_mask) \ No newline at end of file diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2_laser.py b/fairseq/fairseq/models/wav2vec/wav2vec2_laser.py new file mode 100644 index 0000000..ff89759 --- /dev/null +++ b/fairseq/fairseq/models/wav2vec/wav2vec2_laser.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2_asr import ( + Wav2Vec2CtcConfig, + Wav2VecCtc, + Wav2VecEncoder, +) +from fairseq.tasks import FairseqTask + + +@register_model("wav2vec2_laser", dataclass=Wav2Vec2CtcConfig) +class Wav2VecLaser(Wav2VecCtc): + def __init__(self, cfg: Wav2Vec2CtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__(cfg, w2v_encoder) + self.num_updates = 0 + self.freeze_finetune_updates = cfg.freeze_finetune_updates + + @classmethod + def build_model(cls, cfg: Wav2Vec2CtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = Wav2VecEncoder(cfg, 1024) + return cls(cfg, w2v_encoder) + + def forward(self, **kwargs): + output = super().forward(**kwargs) + x_out = output["encoder_out"] * 0.01 + out_pad_mask = output["padding_mask"] + # Set padded outputs to -inf so they are not selected by max-pooling + if out_pad_mask is not None and out_pad_mask.any(): + x_out = ( + x_out.float() + .masked_fill_(out_pad_mask.T.unsqueeze(-1), float("-inf")) + .type_as(x_out) + ) + return x_out.max(dim=0)[0] diff --git a/fairseq/fairseq/models/xmod/__init__.py b/fairseq/fairseq/models/xmod/__init__.py new file mode 100644 index 0000000..bbf7694 --- /dev/null +++ b/fairseq/fairseq/models/xmod/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .model import * # noqa +from .transformer_layer_xmod import * # noqa diff --git a/fairseq/fairseq/models/xmod/hub_interface.py b/fairseq/fairseq/models/xmod/hub_interface.py new file mode 100644 index 0000000..909bb42 --- /dev/null +++ b/fairseq/fairseq/models/xmod/hub_interface.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from fairseq.models.roberta.hub_interface import RobertaHubInterface +import torch +import torch.nn.functional as F + + +class XMODHubInterface(RobertaHubInterface): + def extract_features( + self, + tokens: torch.LongTensor, + return_all_hiddens: bool = False, + lang_id=None, + ) -> torch.Tensor: + if tokens.dim() == 1: + tokens = tokens.unsqueeze(0) + if tokens.size(-1) > self.model.max_positions(): + raise ValueError( + "tokens exceeds maximum length: {} > {}".format( + tokens.size(-1), self.model.max_positions() + ) + ) + features, extra = self.model( + tokens.to(device=self.device), + features_only=True, + return_all_hiddens=return_all_hiddens, + lang_id=lang_id, + ) + if return_all_hiddens: + # convert from T x B x C -> B x T x C + inner_states = extra["inner_states"] + return [inner_state.transpose(0, 1) for inner_state in inner_states] + else: + return features # just the last layer's features + + def predict( + self, + head: str, + tokens: torch.LongTensor, + return_logits: bool = False, + lang_id=None, + ): + features = self.extract_features(tokens.to(device=self.device), lang_id=lang_id) + logits = self.model.classification_heads[head](features) + if return_logits: + return logits + return F.log_softmax(logits, dim=-1) diff --git a/fairseq/fairseq/models/xmod/model.py b/fairseq/fairseq/models/xmod/model.py new file mode 100644 index 0000000..fb6c7a8 --- /dev/null +++ b/fairseq/fairseq/models/xmod/model.py @@ -0,0 +1,742 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from ..roberta.model_xlmr import XLMRModel +from fairseq.models.xmod.transformer_layer_xmod import XMODTransformerEncoderLayerBase +from ..roberta.model import base_architecture, RobertaEncoder +from fairseq.models.transformer import TransformerEncoder +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from typing import Optional +from fairseq.models.xmod.hub_interface import XMODHubInterface +import torch +from fairseq.distributed import fsdp_wrap +from fairseq.models import ( + register_model, + register_model_architecture, +) + +from fairseq.modules.checkpoint_activations import checkpoint_wrapper + +DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8) + + +@register_model("xmod") +class XMODModel(XLMRModel): + @classmethod + def hub_models(cls): + return { + "xmod.base": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.81.1M.tar.gz", + "xmod.large.prenorm": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.large.prenorm.81.500k.tar.gz", + "xmod.base.13.125k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.13.125k.tar.gz", + "xmod.base.30.125k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.30.125k.tar.gz", + "xmod.base.30.195k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.30.195k.tar.gz", + "xmod.base.60.125k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.60.125k.tar.gz", + "xmod.base.60.265k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.60.265k.tar.gz", + "xmod.base.75.125k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.75.125k.tar.gz", + "xmod.base.75.269k": "https://dl.fbaipublicfiles.com/fairseq/models/xmod/xmod.base.75.269k.tar.gz", + } + + @classmethod + def from_pretrained( + cls, + model_name_or_path, + checkpoint_file="model.pt", + data_name_or_path=".", + bpe="sentencepiece", + **kwargs, + ): + from fairseq import hub_utils + + x = hub_utils.from_pretrained( + model_name_or_path, + checkpoint_file, + data_name_or_path, + archive_map=cls.hub_models(), + bpe=bpe, + load_checkpoint_heads=True, + **kwargs, + ) + return XMODHubInterface(x["args"], x["task"], x["models"][0]) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + from omegaconf import OmegaConf + + if OmegaConf.is_config(args): + OmegaConf.set_struct(args, False) + + # make sure all arguments are present + base_architecture(args) + + if not hasattr(args, "max_positions"): + if not hasattr(args, "tokens_per_sample"): + args.tokens_per_sample = task.max_positions() + args.max_positions = args.tokens_per_sample + + encoder = XMODEncoder(args, task.source_dictionary) + + if OmegaConf.is_config(args): + OmegaConf.set_struct(args, True) + + return cls(args, encoder) + + def forward( + self, + src_tokens, + features_only=False, + return_all_hiddens=False, + classification_head_name=None, + lang_id=None, + **kwargs, + ): + if classification_head_name is not None: + features_only = True + x, extra = self.encoder( + src_tokens, features_only, return_all_hiddens, lang_id=lang_id, **kwargs + ) + + if classification_head_name is not None: + x = self.classification_heads[classification_head_name](x) + return x, extra + + +class XMODEncoder(RobertaEncoder): + """XMOD encoder.""" + + def build_encoder(self, args, dictionary, embed_tokens): + encoder = XMODTransformerEncoder(args, dictionary, embed_tokens) + encoder.apply(init_bert_params) + return encoder + + def forward( + self, + src_tokens, + features_only=False, + return_all_hiddens=False, + masked_tokens=None, + lang_id=None, + **unused, + ): + """ + Args: + src_tokens (LongTensor): input tokens of shape `(batch, src_len)` + features_only (bool, optional): skip LM head and just return + features. If True, the output will be of shape + `(batch, src_len, embed_dim)`. + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + + Returns: + tuple: + - the LM output of shape `(batch, src_len, vocab)` + - a dictionary of additional data, where 'inner_states' + is a list of hidden states. Note that the hidden + states have shape `(src_len, batch, vocab)`. + """ + x, extra = self.extract_features( + src_tokens, return_all_hiddens=return_all_hiddens, lang_id=lang_id + ) + if not features_only: + x = self.output_layer(x, masked_tokens=masked_tokens) + return x, extra + + def extract_features( + self, src_tokens, return_all_hiddens=False, lang_id=None, **kwargs + ): + encoder_out = self.sentence_encoder( + src_tokens, + return_all_hiddens=return_all_hiddens, + lang_id=lang_id, + token_embeddings=kwargs.get("token_embeddings", None), + ) + # T x B x C -> B x T x C + features = encoder_out["encoder_out"][0].transpose(0, 1) + inner_states = encoder_out["encoder_states"] if return_all_hiddens else None + return features, {"inner_states": inner_states} + + +class XMODTransformerEncoder(TransformerEncoder): + def build_encoder_layer(self, cfg): + layer = XMODTransformerEncoderLayerBase(cfg) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + lang_id=None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, + src_lengths, + return_all_hiddens, + token_embeddings, + lang_id=lang_id, + ) + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + lang_id=None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_states = [] + + if return_all_hiddens: + encoder_states.append(x) + + # encoder layers + for layer in self.layers: + x = layer( + x, + encoder_padding_mask=encoder_padding_mask if has_pads else None, + lang_id=lang_id, + ) + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + +@register_model_architecture("xmod", "xmod_base_13") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", False) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", True) + args.ln_before_adapter = getattr(args, "ln_before_adapter", True) + args.languages = getattr( + args, + "languages", + [ + "ar_AR", + "en_XX", + "fi_FI", + "fr_XX", + "hi_IN", + "id_ID", + "ka_GE", + "ko_KR", + "ru_RU", + "sw_KE", + "ta_IN", + "th_TH", + "vi_VN", + ], + ) + base_architecture(args) + + +@register_model_architecture("xmod", "xmod_base_30") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", False) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", True) + args.ln_before_adapter = getattr(args, "ln_before_adapter", True) + args.languages = getattr( + args, + "languages", + [ + "ar_AR", + "cs_CZ", + "en_XX", + "eu_ES", + "fi_FI", + "fr_XX", + "hi_IN", + "hr_HR", + "hu_HU", + "hy_AM", + "id_ID", + "it_IT", + "ka_GE", + "ko_KR", + "lt_LT", + "ml_IN", + "mn_MN", + "ms_MY", + "pl_PL", + "ro_RO", + "ru_RU", + "si_LK", + "sk_SK", + "sq_AL", + "sv_SE", + "sw_KE", + "ta_IN", + "th_TH", + "tl_XX", + "vi_VN", + ], + ) + base_architecture(args) + + +@register_model_architecture("xmod", "xmod_base_60") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", False) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", True) + args.ln_before_adapter = getattr(args, "ln_before_adapter", True) + args.languages = getattr( + args, + "languages", + [ + "af_ZA", + "am_ET", + "ar_AR", + "be_BY", + "bn_IN", + "ca_ES", + "cs_CZ", + "cy_GB", + "da_DK", + "en_XX", + "eo_EO", + "et_EE", + "eu_ES", + "fa_IR", + "fi_FI", + "fr_XX", + "ga_IE", + "gl_ES", + "gu_IN", + "ha_NG", + "hi_IN", + "hr_HR", + "hu_HU", + "hy_AM", + "id_ID", + "is_IS", + "it_IT", + "ka_GE", + "ko_KR", + "ku_TR", + "la_VA", + "lt_LT", + "lv_LV", + "mk_MK", + "ml_IN", + "mn_MN", + "ms_MY", + "ne_NP", + "nl_XX", + "no_XX", + "pl_PL", + "ps_AF", + "pt_XX", + "ro_RO", + "ru_RU", + "sa_IN", + "sd_PK", + "si_LK", + "sk_SK", + "sl_SI", + "so_SO", + "sq_AL", + "sr_RS", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "vi_VN", + ], + ) + base_architecture(args) + + +@register_model_architecture("xmod", "xmod_base_75") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", False) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", True) + args.ln_before_adapter = getattr(args, "ln_before_adapter", True) + args.languages = getattr( + args, + "languages", + [ + "af_ZA", + "am_ET", + "ar_AR", + "as_IN", + "be_BY", + "bn_IN", + "br_FR", + "bs_BA", + "ca_ES", + "cs_CZ", + "cy_GB", + "da_DK", + "en_XX", + "eo_EO", + "et_EE", + "eu_ES", + "fa_IR", + "fi_FI", + "fr_XX", + "fy_NL", + "ga_IE", + "gd_GB", + "gl_ES", + "gu_IN", + "ha_NG", + "hi_IN", + "hr_HR", + "hu_HU", + "hy_AM", + "id_ID", + "is_IS", + "it_IT", + "jv_ID", + "ka_GE", + "kn_IN", + "ko_KR", + "ku_TR", + "la_VA", + "lt_LT", + "lv_LV", + "mg_MG", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "ms_MY", + "ne_NP", + "nl_XX", + "no_XX", + "om_KE", + "or_IN", + "pa_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "ro_RO", + "ru_RU", + "sa_IN", + "sd_PK", + "si_LK", + "sk_SK", + "sl_SI", + "so_SO", + "sq_AL", + "sr_RS", + "su_ID", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "vi_VN", + "xh_ZA", + "yi_DE", + ], + ) + base_architecture(args) + + +@register_model_architecture("xmod", "xmod_base") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", False) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", True) + args.ln_before_adapter = getattr(args, "ln_before_adapter", True) + args.languages = getattr( + args, + "languages", + [ + "en_XX", + "id_ID", + "vi_VN", + "ru_RU", + "fa_IR", + "sv_SE", + "ja_XX", + "fr_XX", + "de_DE", + "ro_RO", + "ko_KR", + "hu_HU", + "es_XX", + "fi_FI", + "uk_UA", + "da_DK", + "pt_XX", + "no_XX", + "th_TH", + "pl_PL", + "bg_BG", + "nl_XX", + "zh_CN", + "he_IL", + "el_GR", + "it_IT", + "sk_SK", + "hr_HR", + "tr_TR", + "ar_AR", + "cs_CZ", + "lt_LT", + "hi_IN", + "zh_TW", + "ca_ES", + "ms_MY", + "sl_SI", + "lv_LV", + "ta_IN", + "bn_IN", + "et_EE", + "az_AZ", + "sq_AL", + "sr_RS", + "kk_KZ", + "ka_GE", + "tl_XX", + "ur_PK", + "is_IS", + "hy_AM", + "ml_IN", + "mk_MK", + "be_BY", + "la_VA", + "te_IN", + "eu_ES", + "gl_ES", + "mn_MN", + "kn_IN", + "ne_NP", + "sw_KE", + "si_LK", + "mr_IN", + "af_ZA", + "gu_IN", + "cy_GB", + "eo_EO", + "km_KH", + "ky_KG", + "uz_UZ", + "ps_AF", + "pa_IN", + "ga_IE", + "ha_NG", + "am_ET", + "lo_LA", + "ku_TR", + "so_SO", + "my_MM", + "or_IN", + "sa_IN", + ], + ) + base_architecture(args) + + +@register_model_architecture("xmod", "xmod_large_prenorm") +def roberta_base_architecture(args): + args.ffn_modules = getattr(args, "ffn_modules", False) + args.adapter_modules = getattr(args, "adapter_modules", True) + args.adapter_layer_norm = getattr(args, "adapter_layer_norm", True) + args.adapter_reuse_layer_norm = getattr(args, "adapter_reuse_layer_norm", False) + args.ln_before_adapter = getattr(args, "ln_before_adapter", False) + # args.bottleneck = getattr(args, "bottleneck", 8) + args.bottleneck = getattr(args, "bottleneck", 4) + args.languages = getattr( + args, + "languages", + [ + "en_XX", + "id_ID", + "vi_VN", + "ru_RU", + "fa_IR", + "sv_SE", + "ja_XX", + "fr_XX", + "de_DE", + "ro_RO", + "ko_KR", + "hu_HU", + "es_XX", + "fi_FI", + "uk_UA", + "da_DK", + "pt_XX", + "no_XX", + "th_TH", + "pl_PL", + "bg_BG", + "nl_XX", + "zh_CN", + "he_IL", + "el_GR", + "it_IT", + "sk_SK", + "hr_HR", + "tr_TR", + "ar_AR", + "cs_CZ", + "lt_LT", + "hi_IN", + "zh_TW", + "ca_ES", + "ms_MY", + "sl_SI", + "lv_LV", + "ta_IN", + "bn_IN", + "et_EE", + "az_AZ", + "sq_AL", + "sr_RS", + "kk_KZ", + "ka_GE", + "tl_XX", + "ur_PK", + "is_IS", + "hy_AM", + "ml_IN", + "mk_MK", + "be_BY", + "la_VA", + "te_IN", + "eu_ES", + "gl_ES", + "mn_MN", + "kn_IN", + "ne_NP", + "sw_KE", + "si_LK", + "mr_IN", + "af_ZA", + "gu_IN", + "cy_GB", + "eo_EO", + "km_KH", + "ky_KG", + "uz_UZ", + "ps_AF", + "pa_IN", + "ga_IE", + "ha_NG", + "am_ET", + "lo_LA", + "ku_TR", + "so_SO", + "my_MM", + "or_IN", + "sa_IN", + ], + ) + + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True) + args.encoder_layers = getattr(args, "encoder_layers", 24) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + base_architecture(args) diff --git a/fairseq/fairseq/models/xmod/transformer_layer_xmod.py b/fairseq/fairseq/models/xmod/transformer_layer_xmod.py new file mode 100644 index 0000000..47a91cd --- /dev/null +++ b/fairseq/fairseq/models/xmod/transformer_layer_xmod.py @@ -0,0 +1,179 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.modules.transformer_layer import TransformerEncoderLayer +from typing import Optional +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.modules import LayerNorm +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor + + +class Adapter(nn.Module): + def __init__(self, cfg, red_fac=2): + super(Adapter, self).__init__() + self.cfg = cfg + self.embed_dim = cfg.encoder_embed_dim + self.quant_noise = getattr(cfg, "quant_noise_pq", 0) + self.quant_noise_block_size = getattr(cfg, "quant_noise_pq_block_size", 8) or 8 + self.activation_fn = utils.get_activation_fn( + activation=getattr(cfg, "activation_fn", "relu") or "relu" + ) + self.fc1 = quant_noise( + nn.Linear(self.embed_dim, self.embed_dim // red_fac), + p=self.quant_noise, + block_size=self.quant_noise_block_size, + ) + self.fc2 = quant_noise( + nn.Linear(self.embed_dim // red_fac, self.embed_dim), + p=self.quant_noise, + block_size=self.quant_noise_block_size, + ) + activation_dropout_p = getattr(cfg, "activation_dropout", 0) or 0 + if activation_dropout_p == 0: + # for backwards compatibility with models that use cfg.relu_dropout + activation_dropout_p = getattr(cfg, "relu_dropout", 0) or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + + def forward(self, x): + x = self.activation_fn(self.fc1(x)) + if not hasattr(self.cfg, "adapter_dropout") or self.cfg.adapter_dropout: + x = self.activation_dropout_module(x) + x = self.fc2(x) + return x + + +class XMODTransformerEncoderLayerBase(TransformerEncoderLayer): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg): + super().__init__(cfg) + if hasattr(cfg, "adapter_modules") and cfg.adapter_modules: + export = getattr(cfg, "export", False) + if cfg.adapter_layer_norm: + self.adapter_layer_norm = LayerNorm(self.embed_dim, export=export) + self.adapter_modules = nn.ModuleDict(dict()) + if hasattr(self.cfg, "bottleneck"): + bottleneck = self.cfg.bottleneck + else: + bottleneck = 2 + for language in cfg.languages: + self.adapter_modules[str(language)] = Adapter(cfg, red_fac=bottleneck) + + def lang_adapter(self, lang_id, x): + # If language adapters exist pass throught them + if hasattr(self.cfg, "adapter_modules") and self.cfg.adapter_modules: + if lang_id is None: + lang_id = ["en_XX"] * x.shape[1] + d_langs = [lang_id[0]] + lang_lengths = [1] + for lang in lang_id[1:]: + if lang == d_langs[-1]: + lang_lengths[-1] += 1 + else: + d_langs.append(lang) + lang_lengths.append(1) + + if ( + not hasattr(self.cfg, "ln_before_adapter") + or not self.cfg.ln_before_adapter + ): + residual = x + if self.cfg.adapter_layer_norm: + x = self.adapter_layer_norm(x) + elif self.cfg.adapter_reuse_layer_norm: + x = self.final_layer_norm(x) + if hasattr(self.cfg, "ln_before_adapter") and self.cfg.ln_before_adapter: + residual = x + + split_x = torch.split(x, lang_lengths, 1) + x_ = [] + for i, (lang, s_x) in enumerate(zip(d_langs, split_x)): + lang = lang.replace("_rom", "").replace("_zaw", "") + x_.append(self.adapter_modules[str(lang)](s_x)) + x = torch.cat(x_, 1) + + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + + return x + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + lang_id: Optional[list] = None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill(attn_mask.to(torch.bool), -1e8) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + + x = self.lang_adapter(lang_id, x) + + if not self.normalize_before: + x = self.final_layer_norm(x) + return x diff --git a/fairseq/fairseq/modules/__init__.py b/fairseq/fairseq/modules/__init__.py new file mode 100644 index 0000000..dcfda9b --- /dev/null +++ b/fairseq/fairseq/modules/__init__.py @@ -0,0 +1,106 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +from .adaptive_input import AdaptiveInput +from .adaptive_softmax import AdaptiveSoftmax +from .base_layer import BaseLayer +from .beamable_mm import BeamableMM +from .character_token_embedder import CharacterTokenEmbedder +from .conv_tbc import ConvTBC +from .cross_entropy import cross_entropy +from .downsampled_multihead_attention import DownsampledMultiHeadAttention +from .dynamic_convolution import DynamicConv, DynamicConv1dTBC, DynamicConv_scripatable +from .dynamic_crf_layer import DynamicCRF +from .ema_module import EMAModuleConfig, EMAModule +from .fairseq_dropout import FairseqDropout +from .fp32_batch_norm import Fp32BatchNorm +from .fp32_group_norm import Fp32GroupNorm +from .fp32_instance_norm import Fp32InstanceNorm +from .gelu import gelu, gelu_accurate +from .grad_multiply import GradMultiply +from .gumbel_vector_quantizer import GumbelVectorQuantizer +from .kmeans_vector_quantizer import KmeansVectorQuantizer +from .layer_drop import LayerDropModuleList +from .layer_norm import Fp32LayerNorm, LayerNorm +from .learned_positional_embedding import LearnedPositionalEmbedding +from .lightweight_convolution import LightweightConv, LightweightConv1dTBC +from .linearized_convolution import LinearizedConvolution +from .location_attention import LocationAttention +from .lstm_cell_with_zoneout import LSTMCellWithZoneOut +from .multihead_attention import MultiheadAttention +from .positional_embedding import PositionalEmbedding +from .same_pad import SamePad, SamePad2d +from .scalar_bias import ScalarBias +from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding +from .transformer_sentence_encoder_layer import TransformerSentenceEncoderLayer +from .transformer_sentence_encoder import TransformerSentenceEncoder +from .transpose_last import TransposeLast +from .unfold import unfold1d +from .transformer_layer import TransformerDecoderLayer, TransformerEncoderLayer +from .vggblock import VGGBlock +from .espnet_multihead_attention import ( + ESPNETMultiHeadedAttention, + RelPositionMultiHeadedAttention, + RotaryPositionMultiHeadedAttention, +) +from .rotary_positional_embedding import RotaryPositionalEmbedding +from .positional_encoding import ( + RelPositionalEncoding, +) + +__all__ = [ + "AdaptiveInput", + "AdaptiveSoftmax", + "BaseLayer", + "BeamableMM", + "CharacterTokenEmbedder", + "ConvTBC", + "cross_entropy", + "DownsampledMultiHeadAttention", + "DynamicConv1dTBC", + "DynamicConv", + "DynamicConv_scripatable", + "DynamicCRF", + "EMAModule", + "EMAModuleConfig", + "FairseqDropout", + "Fp32BatchNorm", + "Fp32GroupNorm", + "Fp32LayerNorm", + "Fp32InstanceNorm", + "gelu", + "gelu_accurate", + "GradMultiply", + "GumbelVectorQuantizer", + "KmeansVectorQuantizer", + "LayerDropModuleList", + "LayerNorm", + "LearnedPositionalEmbedding", + "LightweightConv1dTBC", + "LightweightConv", + "LinearizedConvolution", + "LocationAttention", + "LSTMCellWithZoneOut", + "MultiheadAttention", + "PositionalEmbedding", + "SamePad", + "SamePad2d", + "ScalarBias", + "SinusoidalPositionalEmbedding", + "TransformerSentenceEncoderLayer", + "TransformerSentenceEncoder", + "TransformerDecoderLayer", + "TransformerEncoderLayer", + "TransposeLast", + "VGGBlock", + "unfold1d", + "ESPNETMultiheadedAttention", + "PositionalEmbedding", + "RelPositionMultiHeadedAttention", + "RelPositionalEncoding", + "RotaryPositionalEmbedding", + "RotaryPositionMultiHeadedAttention", +] diff --git a/fairseq/fairseq/modules/adaptive_input.py b/fairseq/fairseq/modules/adaptive_input.py new file mode 100644 index 0000000..01ac4ac --- /dev/null +++ b/fairseq/fairseq/modules/adaptive_input.py @@ -0,0 +1,81 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import List + +import torch +from torch import nn + +from fairseq.modules.quant_noise import quant_noise + + +class AdaptiveInput(nn.Module): + def __init__( + self, + vocab_size: int, + padding_idx: int, + initial_dim: int, + factor: float, + output_dim: int, + cutoff: List[int], + q_noise: float = 0, + qn_block_size: int = 8, + ): + super().__init__() + + if vocab_size > cutoff[-1]: + cutoff = cutoff + [vocab_size] + else: + assert ( + vocab_size == cutoff[-1] + ), "cannot specify cutoff larger than vocab size" + + self.cutoff = cutoff + self.embedding_dim = output_dim + self.padding_idx = padding_idx + + self.embeddings = nn.ModuleList() + for i in range(len(self.cutoff)): + prev = self.cutoff[i - 1] if i > 0 else 0 + size = self.cutoff[i] - prev + dim = int(initial_dim // (factor**i)) + seq = nn.Sequential( + nn.Embedding(size, dim, self.padding_idx), + quant_noise( + nn.Linear(dim, output_dim, bias=False), q_noise, qn_block_size + ), + ) + + self.embeddings.append(seq) + self.padding_idx = None + self.padding_idx = padding_idx + + def init_weights(m): + if isinstance(m, nn.Embedding): + nn.init.normal_(m.weight, mean=0, std=m.weight.shape[1] ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + elif hasattr(m, "weight"): + nn.init.xavier_uniform_(m.weight) + + self.apply(init_weights) + + self.register_buffer("_float_tensor", torch.FloatTensor(1)) + + def weights_for_band(self, band: int): + return self.embeddings[band][0].weight, self.embeddings[band][1].weight + + def forward(self, input: torch.Tensor): + result = self._float_tensor.new(input.shape + (self.embedding_dim,)) + for i in range(len(self.cutoff)): + mask = input.lt(self.cutoff[i]) + if i > 0: + mask.mul_(input.ge(self.cutoff[i - 1])) + chunk_input = input[mask] - self.cutoff[i - 1] + else: + chunk_input = input[mask] + if mask.any(): + result[mask] = self.embeddings[i](chunk_input) + return result diff --git a/fairseq/fairseq/modules/adaptive_softmax.py b/fairseq/fairseq/modules/adaptive_softmax.py new file mode 100644 index 0000000..ae0c77b --- /dev/null +++ b/fairseq/fairseq/modules/adaptive_softmax.py @@ -0,0 +1,268 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +import operator + +import torch +import torch.nn.functional as F +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from torch import nn + + +class TiedLinear(nn.Module): + def __init__(self, weight, transpose): + super().__init__() + self.weight = weight + self.transpose = transpose + + def forward(self, input): + return F.linear(input, self.weight.t() if self.transpose else self.weight) + + +class TiedHeadModule(nn.Module): + def __init__(self, weights, input_dim, num_classes, q_noise, qn_block_size): + super().__init__() + tied_emb, _ = weights + self.num_words, emb_dim = tied_emb.size() + + self.word_proj = quant_noise( + TiedLinear(tied_emb, transpose=False), q_noise, qn_block_size + ) + if input_dim != emb_dim: + self.word_proj = nn.Sequential( + quant_noise( + nn.Linear(input_dim, emb_dim, bias=False), q_noise, qn_block_size + ), + self.word_proj, + ) + + self.class_proj = quant_noise( + nn.Linear(input_dim, num_classes, bias=False), q_noise, qn_block_size + ) + self.out_dim = self.num_words + num_classes + + self.register_buffer("_float_tensor", torch.FloatTensor(1)) + + def forward(self, input): + inp_sz = functools.reduce(operator.mul, input.shape[:-1], 1) + out = self._float_tensor.new(inp_sz, self.out_dim) + out[:, : self.num_words] = self.word_proj(input.view(inp_sz, -1)) + out[:, self.num_words :] = self.class_proj(input.view(inp_sz, -1)) + return out + + +class AdaptiveSoftmax(nn.Module): + """ + This is an implementation of the efficient softmax approximation for + graphical processing units (GPU), described in the paper "Efficient softmax + approximation for GPUs" (http://arxiv.org/abs/1609.04309). + """ + + def __init__( + self, + vocab_size, + input_dim, + cutoff, + dropout, + factor=4.0, + adaptive_inputs=None, + tie_proj=False, + q_noise=0, + qn_block_size=8, + ): + super().__init__() + + if vocab_size > cutoff[-1]: + cutoff = cutoff + [vocab_size] + else: + assert ( + vocab_size == cutoff[-1] + ), "cannot specify cutoff larger than vocab size" + + output_dim = cutoff[0] + len(cutoff) - 1 + + self.vocab_size = vocab_size + self.cutoff = cutoff + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.input_dim = input_dim + self.factor = factor + self.q_noise = q_noise + self.qn_block_size = qn_block_size + + self.lsm = nn.LogSoftmax(dim=1) + + if adaptive_inputs is not None: + self.head = TiedHeadModule( + adaptive_inputs.weights_for_band(0), + input_dim, + len(cutoff) - 1, + self.q_noise, + self.qn_block_size, + ) + else: + self.head = quant_noise( + nn.Linear(input_dim, output_dim, bias=False), + self.q_noise, + self.qn_block_size, + ) + + self._make_tail(adaptive_inputs, tie_proj) + + def init_weights(m): + if ( + hasattr(m, "weight") + and not isinstance(m, TiedLinear) + and not isinstance(m, TiedHeadModule) + ): + nn.init.xavier_uniform_(m.weight) + + self.apply(init_weights) + + self.register_buffer("version", torch.LongTensor([1])) + + def _make_tail(self, adaptive_inputs=None, tie_proj=False): + self.tail = nn.ModuleList() + for i in range(len(self.cutoff) - 1): + dim = int(self.input_dim // self.factor ** (i + 1)) + + tied_emb, tied_proj = ( + adaptive_inputs.weights_for_band(i + 1) + if adaptive_inputs is not None + else (None, None) + ) + + if tied_proj is not None: + if tie_proj: + proj = quant_noise( + TiedLinear(tied_proj, transpose=True), + self.q_noise, + self.qn_block_size, + ) + else: + proj = quant_noise( + nn.Linear(tied_proj.size(0), tied_proj.size(1), bias=False), + self.q_noise, + self.qn_block_size, + ) + else: + proj = quant_noise( + nn.Linear(self.input_dim, dim, bias=False), + self.q_noise, + self.qn_block_size, + ) + + if tied_emb is None: + out_proj = nn.Linear( + dim, self.cutoff[i + 1] - self.cutoff[i], bias=False + ) + else: + out_proj = TiedLinear(tied_emb, transpose=False) + + m = nn.Sequential( + proj, + nn.Dropout(self.dropout_module.p), + quant_noise(out_proj, self.q_noise, self.qn_block_size), + ) + + self.tail.append(m) + + def upgrade_state_dict_named(self, state_dict, name): + version_name = name + ".version" + if version_name not in state_dict: + raise Exception("This version of the model is no longer supported") + + def adapt_target(self, target): + """ + In order to be efficient, the AdaptiveSoftMax does not compute the + scores for all the word of the vocabulary for all the examples. It is + thus necessary to call the method adapt_target of the AdaptiveSoftMax + layer inside each forward pass. + """ + + target = target.view(-1) + new_target = [target.clone()] + target_idxs = [] + + for i in range(len(self.cutoff) - 1): + mask = target.ge(self.cutoff[i]).mul(target.lt(self.cutoff[i + 1])) + new_target[0][mask] = self.cutoff[0] + i + + if mask.any(): + target_idxs.append(mask.nonzero(as_tuple=False).squeeze(1)) + new_target.append(target[mask].add(-self.cutoff[i])) + else: + target_idxs.append(None) + new_target.append(None) + + return new_target, target_idxs + + def forward(self, input, target): + """ + Args: + input: (b x t x d) + target: (b x t) + Returns: + 2 lists: output for each cutoff section and new targets by cut off + """ + + input = input.contiguous().view(-1, input.size(-1)) + input = self.dropout_module(input) + + new_target, target_idxs = self.adapt_target(target) + output = [self.head(input)] + + for i in range(len(target_idxs)): + if target_idxs[i] is not None: + output.append(self.tail[i](input.index_select(0, target_idxs[i]))) + else: + output.append(None) + + return output, new_target + + def get_log_prob(self, input, target): + """ + Computes the log probabilities for all the words of the vocabulary, + given a 2D tensor of hidden vectors. + """ + + bsz, length, dim = input.size() + input = input.contiguous().view(-1, dim) + + if target is not None: + _, target_idxs = self.adapt_target(target) + else: + target_idxs = None + + head_y = self.head(input) + log_probs = head_y.new_zeros(input.size(0), self.vocab_size) + + head_sz = self.cutoff[0] + len(self.tail) + log_probs[:, :head_sz] = self.lsm(head_y) + tail_priors = log_probs[:, self.cutoff[0] : head_sz].clone() + + for i in range(len(self.tail)): + start = self.cutoff[i] + end = self.cutoff[i + 1] + + if target_idxs is None: + tail_out = log_probs[:, start:end] + tail_out.copy_(self.tail[i](input)) + log_probs[:, start:end] = self.lsm(tail_out).add_( + tail_priors[:, i, None] + ) + elif target_idxs[i] is not None: + idxs = target_idxs[i] + tail_out = log_probs[idxs, start:end] + tail_out.copy_(self.tail[i](input[idxs])) + log_probs[idxs, start:end] = self.lsm(tail_out).add_( + tail_priors[idxs, i, None] + ) + + log_probs = log_probs.view(bsz, length, -1) + return log_probs diff --git a/fairseq/fairseq/modules/base_layer.py b/fairseq/fairseq/modules/base_layer.py new file mode 100644 index 0000000..e823f7b --- /dev/null +++ b/fairseq/fairseq/modules/base_layer.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import torch +import sys +from fairseq import utils +from fairseq.distributed import utils as distributed_utils +from fairseq.modules.layer_norm import LayerNorm + + +class BaseLayer(nn.Module): + def __init__(self, args): + super().__init__() + self.num_workers = distributed_utils.get_data_parallel_world_size() + expert_centroids = torch.empty(self.num_workers, args.decoder_embed_dim) + torch.nn.init.orthogonal_(expert_centroids, gain=0.1) + self.register_parameter( + "expert_centroids", torch.nn.Parameter(expert_centroids) + ) + self.expert_network = nn.Sequential( + *([BaseSublayer(args) for _ in range(args.base_sublayers)]) + ) + self.expert_id = distributed_utils.get_data_parallel_rank() + self.shuffle = args.base_shuffle + self.cpp = self.load_assignment() + + # Add a special attribute to the expert parameters, so we know not to sync their gradients + for param in self.expert_network.parameters(): + param.expert = True + + def forward(self, input_features, *args, **kwargs): + features = input_features.reshape(-1, input_features.size(-1)) + is_training = input_features.requires_grad + + if self.shuffle and is_training: + # Send each token to a random worker, to break correlations within the batch + shuffle_sort = torch.randperm(features.size(0), device=features.device) + features = All2All.apply(features[shuffle_sort]) + + with torch.no_grad(): + # Compute similarity of each token to each expert, for routing + token_expert_affinities = features.matmul( + self.expert_centroids.transpose(0, 1) + ) + + # Compute which token goes to which expert + sort_by_expert, input_splits, output_splits = ( + self.balanced_assignment(token_expert_affinities) + if is_training + else self.greedy_assignment(token_expert_affinities) + ) + # Swap these tokens for the right ones for our expert + routed_features = All2All.apply( + features[sort_by_expert], output_splits, input_splits + ) + + if routed_features.size(0) > 0: + # Mix in the expert network based on how appropriate it is for these tokens + alpha = torch.sigmoid( + routed_features.mv(self.expert_centroids[self.expert_id]) + ).unsqueeze(1) + routed_features = ( + alpha * self.expert_network(routed_features) + + (1 - alpha) * routed_features + ) + # Return to original worker and ordering + result = All2All.apply(routed_features, input_splits, output_splits)[ + self.inverse_sort(sort_by_expert) + ] + + if self.shuffle and is_training: + # Undo shuffling + result = All2All.apply(result)[self.inverse_sort(shuffle_sort)] + + # Return additional Nones for compatibility with TransformerDecoderLayer + return result.view(input_features.size()), None, None + + def inverse_sort(self, order): + # Creates an index that undoes a sort: xs==xs[order][inverse_sort(order)] + return torch.empty_like(order).scatter_( + 0, order, torch.arange(0, order.size(0), device=order.device) + ) + + def balanced_assignment(self, scores): + ok = scores.isfinite() + if not ok.all(): + # NaNs here can break the assignment algorithm + scores[~ok] = scores[ok].min() + return self.cpp.balanced_assignment(scores), None, None + + # Assigns each token to the top k experts + def greedy_assignment(self, scores, k=1): + token_to_workers = torch.topk(scores, dim=1, k=k, largest=True).indices.view(-1) + token_to_workers, sort_ordering = torch.sort(token_to_workers) + worker2token = sort_ordering // k + + # Find how many tokens we're sending to each other worker (being careful for sending 0 tokens to some workers) + output_splits = torch.zeros( + (self.num_workers,), dtype=torch.long, device=scores.device + ) + workers, counts = torch.unique_consecutive(token_to_workers, return_counts=True) + output_splits[workers] = counts + # Tell other workers how many tokens to expect from us + input_splits = All2All.apply(output_splits) + return worker2token, input_splits.tolist(), output_splits.tolist() + + def load_assignment(self): + try: + from fairseq import libbase + + return libbase + + except ImportError as e: + sys.stderr.write( + "ERROR: missing libbase. run `python setup.py build_ext --inplace`\n" + ) + raise e + + +class BaseSublayer(nn.Module): + def __init__(self, args): + super().__init__() + self.activation_fn = utils.get_activation_fn( + activation=getattr(args, "activation_fn", "relu") or "relu" + ) + self.norm = LayerNorm(args.decoder_embed_dim, export=False) + self.ff1 = torch.nn.Linear(args.decoder_embed_dim, args.decoder_ffn_embed_dim) + self.ff2 = torch.nn.Linear(args.decoder_ffn_embed_dim, args.decoder_embed_dim) + self.ff2.weight.data.zero_() + + def forward(self, xs): + return xs + self.ff2(self.activation_fn(self.ff1(self.norm(xs)))) + + +# Wraps torch.distributed.all_to_all_single as a function that supports autograd +class All2All(torch.autograd.Function): + @staticmethod + def forward(ctx, xs, input_splits=None, output_splits=None): + ctx.input_splits = input_splits + ctx.output_splits = output_splits + + ys = ( + torch.empty_like(xs) + if output_splits is None + else xs.new_empty(size=[sum(output_splits)] + list(xs.size()[1:])) + ) + torch.distributed.all_to_all_single( + ys, xs, output_split_sizes=output_splits, input_split_sizes=input_splits + ) + return ys + + @staticmethod + def backward(ctx, grad_output): + result = ( + torch.empty_like(grad_output) + if ctx.input_splits is None + else grad_output.new_empty( + size=[sum(ctx.input_splits)] + list(grad_output.size()[1:]) + ) + ) + torch.distributed.all_to_all_single( + result, + grad_output, + output_split_sizes=ctx.input_splits, + input_split_sizes=ctx.output_splits, + ) + return result, None, None diff --git a/fairseq/fairseq/modules/beamable_mm.py b/fairseq/fairseq/modules/beamable_mm.py new file mode 100644 index 0000000..eff1a46 --- /dev/null +++ b/fairseq/fairseq/modules/beamable_mm.py @@ -0,0 +1,49 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn + + +class BeamableMM(nn.Module): + """This module provides an optimized MM for beam decoding with attention. + + It leverage the fact that the source-side of the input is replicated beam + times and the target-side of the input is of width one. This layer speeds up + inference by replacing the inputs {(bsz x 1 x nhu), (bsz x sz2 x nhu)} + with smaller inputs {(bsz/beam x beam x nhu), (bsz/beam x sz2 x nhu)}. + """ + + def __init__(self, beam_size=None): + super(BeamableMM, self).__init__() + self.beam_size = beam_size + + def forward(self, input1, input2): + if ( + not self.training + and self.beam_size is not None # test mode + and input1.dim() == 3 # beam size is set + and input1.size(1) # only support batched input + == 1 # single time step update + ): + bsz, beam = input1.size(0), self.beam_size + + # bsz x 1 x nhu --> bsz/beam x beam x nhu + input1 = input1[:, 0, :].unfold(0, beam, beam).transpose(2, 1) + + # bsz x sz2 x nhu --> bsz/beam x sz2 x nhu + input2 = input2.unfold(0, beam, beam)[:, :, :, 0] + + # use non batched operation if bsz = beam + if input1.size(0) == 1: + output = torch.mm(input1[0, :, :], input2[0, :, :]) + else: + output = input1.bmm(input2) + return output.view(bsz, 1, -1) + else: + return input1.bmm(input2) + + def set_beam_size(self, beam_size): + self.beam_size = beam_size diff --git a/fairseq/fairseq/modules/character_token_embedder.py b/fairseq/fairseq/modules/character_token_embedder.py new file mode 100644 index 0000000..181221b --- /dev/null +++ b/fairseq/fairseq/modules/character_token_embedder.py @@ -0,0 +1,214 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import List, Tuple + +import torch +import torch.nn.functional as F +from fairseq.data import Dictionary +from torch import nn + + +CHAR_PAD_IDX = 0 +CHAR_EOS_IDX = 257 + + +logger = logging.getLogger(__name__) + + +class CharacterTokenEmbedder(torch.nn.Module): + def __init__( + self, + vocab: Dictionary, + filters: List[Tuple[int, int]], + char_embed_dim: int, + word_embed_dim: int, + highway_layers: int, + max_char_len: int = 50, + char_inputs: bool = False, + ): + super(CharacterTokenEmbedder, self).__init__() + + self.onnx_trace = False + self.embedding_dim = word_embed_dim + self.max_char_len = max_char_len + self.char_embeddings = nn.Embedding(257, char_embed_dim, padding_idx=0) + self.symbol_embeddings = nn.Parameter(torch.FloatTensor(2, word_embed_dim)) + self.eos_idx, self.unk_idx = 0, 1 + self.char_inputs = char_inputs + + self.convolutions = nn.ModuleList() + for width, out_c in filters: + self.convolutions.append( + nn.Conv1d(char_embed_dim, out_c, kernel_size=width) + ) + + last_dim = sum(f[1] for f in filters) + + self.highway = Highway(last_dim, highway_layers) if highway_layers > 0 else None + + self.projection = nn.Linear(last_dim, word_embed_dim) + + assert ( + vocab is not None or char_inputs + ), "vocab must be set if not using char inputs" + self.vocab = None + if vocab is not None: + self.set_vocab(vocab, max_char_len) + + self.reset_parameters() + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def set_vocab(self, vocab, max_char_len): + word_to_char = torch.LongTensor(len(vocab), max_char_len) + + truncated = 0 + for i in range(len(vocab)): + if i < vocab.nspecial: + char_idxs = [0] * max_char_len + else: + chars = vocab[i].encode() + # +1 for padding + char_idxs = [c + 1 for c in chars] + [0] * (max_char_len - len(chars)) + if len(char_idxs) > max_char_len: + truncated += 1 + char_idxs = char_idxs[:max_char_len] + word_to_char[i] = torch.LongTensor(char_idxs) + + if truncated > 0: + logger.info( + "truncated {} words longer than {} characters".format( + truncated, max_char_len + ) + ) + + self.vocab = vocab + self.word_to_char = word_to_char + + @property + def padding_idx(self): + return Dictionary().pad() if self.vocab is None else self.vocab.pad() + + def reset_parameters(self): + nn.init.xavier_normal_(self.char_embeddings.weight) + nn.init.xavier_normal_(self.symbol_embeddings) + nn.init.xavier_uniform_(self.projection.weight) + + nn.init.constant_( + self.char_embeddings.weight[self.char_embeddings.padding_idx], 0.0 + ) + nn.init.constant_(self.projection.bias, 0.0) + + def forward( + self, + input: torch.Tensor, + ): + if self.char_inputs: + chars = input.view(-1, self.max_char_len) + pads = chars[:, 0].eq(CHAR_PAD_IDX) + eos = chars[:, 0].eq(CHAR_EOS_IDX) + if eos.any(): + if self.onnx_trace: + chars = torch.where(eos.unsqueeze(1), chars.new_zeros(1), chars) + else: + chars[eos] = 0 + + unk = None + else: + flat_words = input.view(-1) + chars = self.word_to_char[flat_words.type_as(self.word_to_char)].type_as( + input + ) + pads = flat_words.eq(self.vocab.pad()) + eos = flat_words.eq(self.vocab.eos()) + unk = flat_words.eq(self.vocab.unk()) + + word_embs = self._convolve(chars) + if self.onnx_trace: + if pads.any(): + word_embs = torch.where( + pads.unsqueeze(1), word_embs.new_zeros(1), word_embs + ) + if eos.any(): + word_embs = torch.where( + eos.unsqueeze(1), self.symbol_embeddings[self.eos_idx], word_embs + ) + if unk is not None and unk.any(): + word_embs = torch.where( + unk.unsqueeze(1), self.symbol_embeddings[self.unk_idx], word_embs + ) + else: + if pads.any(): + word_embs[pads] = 0 + if eos.any(): + word_embs[eos] = self.symbol_embeddings[self.eos_idx] + if unk is not None and unk.any(): + word_embs[unk] = self.symbol_embeddings[self.unk_idx] + + return word_embs.view(input.size()[:2] + (-1,)) + + def _convolve( + self, + char_idxs: torch.Tensor, + ): + char_embs = self.char_embeddings(char_idxs) + char_embs = char_embs.transpose(1, 2) # BTC -> BCT + + conv_result = [] + + for conv in self.convolutions: + x = conv(char_embs) + x, _ = torch.max(x, -1) + x = F.relu(x) + conv_result.append(x) + + x = torch.cat(conv_result, dim=-1) + + if self.highway is not None: + x = self.highway(x) + x = self.projection(x) + + return x + + +class Highway(torch.nn.Module): + """ + A `Highway layer <https://arxiv.org/abs/1505.00387>`_. + Adopted from the AllenNLP implementation. + """ + + def __init__(self, input_dim: int, num_layers: int = 1): + super(Highway, self).__init__() + self.input_dim = input_dim + self.layers = nn.ModuleList( + [nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)] + ) + self.activation = nn.ReLU() + + self.reset_parameters() + + def reset_parameters(self): + for layer in self.layers: + # As per comment in AllenNLP: + # We should bias the highway layer to just carry its input forward. We do that by + # setting the bias on `B(x)` to be positive, because that means `g` will be biased to + # be high, so we will carry the input forward. The bias on `B(x)` is the second half + # of the bias vector in each Linear layer. + nn.init.constant_(layer.bias[self.input_dim :], 1) + + nn.init.constant_(layer.bias[: self.input_dim], 0) + nn.init.xavier_normal_(layer.weight) + + def forward(self, x: torch.Tensor): + for layer in self.layers: + projection = layer(x) + proj_x, gate = projection.chunk(2, dim=-1) + proj_x = self.activation(proj_x) + gate = torch.sigmoid(gate) + x = gate * x + (gate.new_tensor([1]) - gate) * proj_x + return x diff --git a/fairseq/fairseq/modules/checkpoint_activations.py b/fairseq/fairseq/modules/checkpoint_activations.py new file mode 100644 index 0000000..aa0b592 --- /dev/null +++ b/fairseq/fairseq/modules/checkpoint_activations.py @@ -0,0 +1,242 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +from typing import Any, Dict, List, Tuple, Union + +import torch +import torch.utils.checkpoint as checkpoint +from fairseq import utils + + +def checkpoint_wrapper(m, offload_to_cpu=False): + """ + A friendlier wrapper for performing activation checkpointing. + + Compared to the PyTorch version, this version: + - wraps an nn.Module, so that all subsequent calls will use checkpointing + - handles keyword arguments in the forward + - handles non-Tensor outputs from the forward + + Usage:: + + checkpointed_module = checkpoint_wrapper(my_module, offload_to_cpu=True) + a, b = checkpointed_module(x, y=3, z=torch.Tensor([1])) + """ + # should I check whether original_forward has already been set? + assert not hasattr( + m, "precheckpoint_forward" + ), "checkpoint function has already been applied?" + m.precheckpoint_forward = m.forward + m.forward = functools.partial( + _checkpointed_forward, + m.precheckpoint_forward, # original_forward + offload_to_cpu, + ) + return m + + +def unwrap_checkpoint(m: torch.nn.Module): + """ + unwrap a module and its children from checkpoint_wrapper + """ + for module in m.modules(): + if hasattr(module, "precheckpoint_forward"): + module.forward = module.precheckpoint_forward + del module.precheckpoint_forward + if hasattr(module, "old_deepcopy_method"): + module.__deepcopy__ = module.old_deepcopy_method + del module.old_deepcopy_method + return m + + +def _checkpointed_forward(original_forward, offload_to_cpu, *args, **kwargs): + # Autograd Functions in PyTorch work best with positional args, since + # the backward must return gradients (or None) for every input argument. + # We can flatten keyword arguments to make this easier. + kwarg_keys, flat_args = pack_kwargs(*args, **kwargs) + parent_ctx_dict = {"offload": offload_to_cpu} + output = CheckpointFunction.apply( + original_forward, parent_ctx_dict, kwarg_keys, *flat_args + ) + if isinstance(output, torch.Tensor): + return output + else: + packed_non_tensor_outputs = parent_ctx_dict["packed_non_tensor_outputs"] + if packed_non_tensor_outputs: + output = unpack_non_tensors(output, packed_non_tensor_outputs) + return output + + +def pack_kwargs(*args, **kwargs) -> Tuple[List[str], List[Any]]: + """ + Usage:: + + kwarg_keys, flat_args = pack_kwargs(1, 2, a=3, b=4) + args, kwargs = unpack_kwargs(kwarg_keys, flat_args) + assert args == [1, 2] + assert kwargs == {"a": 3, "b": 4} + """ + kwarg_keys = [] + flat_args = list(args) + for k, v in kwargs.items(): + kwarg_keys.append(k) + flat_args.append(v) + return kwarg_keys, flat_args + + +def unpack_kwargs( + kwarg_keys: List[str], flat_args: List[Any] +) -> Tuple[List[Any], Dict[str, Any]]: + if len(kwarg_keys) == 0: + return flat_args, {} + args = flat_args[: -len(kwarg_keys)] + kwargs = {k: v for k, v in zip(kwarg_keys, flat_args[-len(kwarg_keys) :])} + return args, kwargs + + +def split_non_tensors( + mixed: Union[torch.Tensor, Tuple[Any]] +) -> Tuple[Tuple[torch.Tensor], Dict[str, List[Any]]]: + """ + Usage:: + + x = torch.Tensor([1]) + y = torch.Tensor([2]) + tensors, packed_non_tensors = split_non_tensors((x, y, None, 3)) + recon = unpack_non_tensors(tensors, packed_non_tensors) + assert recon == (x, y, None, 3) + """ + if isinstance(mixed, torch.Tensor): + return (mixed,), None + tensors = [] + packed_non_tensors = {"is_tensor": [], "objects": []} + for o in mixed: + if isinstance(o, torch.Tensor): + packed_non_tensors["is_tensor"].append(True) + tensors.append(o) + else: + packed_non_tensors["is_tensor"].append(False) + packed_non_tensors["objects"].append(o) + return tuple(tensors), packed_non_tensors + + +def unpack_non_tensors( + tensors: Tuple[torch.Tensor], + packed_non_tensors: Dict[str, List[Any]], +) -> Tuple[Any]: + if packed_non_tensors is None: + return tensors + assert isinstance(packed_non_tensors, dict) + mixed = [] + is_tensor_list = packed_non_tensors["is_tensor"] + objects = packed_non_tensors["objects"] + assert len(tensors) + len(objects) == len(is_tensor_list) + obj_i = tnsr_i = 0 + for is_tensor in is_tensor_list: + if is_tensor: + mixed.append(tensors[tnsr_i]) + tnsr_i += 1 + else: + mixed.append(objects[obj_i]) + obj_i += 1 + return tuple(mixed) + + +class CheckpointFunction(torch.autograd.Function): + """Similar to the torch version, but support non-Tensor outputs. + + The caller is expected to provide a dict (*parent_ctx_dict*) that will hold + the non-Tensor outputs. These should be combined with the Tensor *outputs* + by calling ``unpack_non_tensors``. + """ + + @staticmethod + def forward(ctx, run_function, parent_ctx_dict, kwarg_keys, *args): + if torch.is_grad_enabled(): # grad may be disabled, e.g., during validation + checkpoint.check_backward_validity(args) + + ctx.run_function = run_function + ctx.kwarg_keys = kwarg_keys + ctx.fwd_rng_state = utils.get_rng_state() + + tensor_inputs, packed_non_tensor_inputs = split_non_tensors(args) + if parent_ctx_dict["offload"]: + ctx.fwd_device = tuple(x.device for x in tensor_inputs) + ctx.grad_requirements = tuple(x.requires_grad for x in tensor_inputs) + tensor_inputs = tuple( + x.to(torch.device("cpu"), non_blocking=True) for x in tensor_inputs + ) + + else: + ctx.fwd_device, ctx.grad_requirements = None, None + + ctx.save_for_backward(*tensor_inputs) + ctx.packed_non_tensor_inputs = packed_non_tensor_inputs + + with torch.no_grad(): + unpacked_args, unpacked_kwargs = unpack_kwargs(kwarg_keys, args) + outputs = run_function(*unpacked_args, **unpacked_kwargs) + + if isinstance(outputs, torch.Tensor): + return outputs + else: + # Autograd Functions don't like non-Tensor outputs. We can split the + # non-Tensor and Tensor outputs, returning the former by reference + # through *parent_ctx_dict* and returning the latter directly. + outputs, packed_non_tensor_outputs = split_non_tensors(outputs) + parent_ctx_dict["packed_non_tensor_outputs"] = packed_non_tensor_outputs + return outputs + + @staticmethod + def backward(ctx, *args): + if not torch.autograd._is_checkpoint_valid(): + raise RuntimeError( + "Checkpointing is not compatible with .grad(), please use .backward() if possible" + ) + + tensor_inputs: Tuple = ctx.saved_tensors + tensor_inputs = checkpoint.detach_variable(tensor_inputs) + if ctx.fwd_device is not None: + tensor_inputs = [ + t.to(ctx.fwd_device[i], non_blocking=True) + for i, t in enumerate(tensor_inputs) + ] + for i, need_grad in enumerate(ctx.grad_requirements): + tensor_inputs[i].requires_grad = need_grad + inputs = unpack_non_tensors(tensor_inputs, ctx.packed_non_tensor_inputs) + + # Store the current states. + bwd_rng_state = utils.get_rng_state() + + # Set the states to what it used to be before the forward pass. + utils.set_rng_state(ctx.fwd_rng_state) + + with torch.enable_grad(): + unpacked_args, unpacked_kwargs = unpack_kwargs(ctx.kwarg_keys, inputs) + outputs = ctx.run_function(*unpacked_args, **unpacked_kwargs) + tensor_outputs, _ = split_non_tensors(outputs) + # Set the states back to what it was at the start of this function. + utils.set_rng_state(bwd_rng_state) + + # Run backward() with only Tensors that require grad + outputs_with_grad = [] + args_with_grad = [] + for i in range(len(tensor_outputs)): + if tensor_outputs[i].requires_grad: + outputs_with_grad.append(tensor_outputs[i]) + args_with_grad.append(args[i]) + if len(outputs_with_grad) == 0: + raise RuntimeError( + "None of the outputs have requires_grad=True, " + "this checkpoint() is not necessary" + ) + + torch.autograd.backward(outputs_with_grad, args_with_grad) + + grads = tuple( + inp.grad if isinstance(inp, torch.Tensor) else None for inp in inputs + ) + return (None, None, None) + grads diff --git a/fairseq/fairseq/modules/conformer_layer.py b/fairseq/fairseq/modules/conformer_layer.py new file mode 100644 index 0000000..964af24 --- /dev/null +++ b/fairseq/fairseq/modules/conformer_layer.py @@ -0,0 +1,301 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Optional + +import torch + +from fairseq.modules import ( + ESPNETMultiHeadedAttention, + LayerNorm, + MultiheadAttention, + RelPositionMultiHeadedAttention, + RotaryPositionMultiHeadedAttention, +) +from fairseq.utils import get_activation_fn + + +class ConvolutionModule(torch.nn.Module): + """Convolution block used in the conformer block""" + + def __init__( + self, + embed_dim, + channels, + depthwise_kernel_size, + dropout, + activation_fn="swish", + bias=False, + export=False, + ): + """ + Args: + embed_dim: Embedding dimension + channels: Number of channels in depthwise conv layers + depthwise_kernel_size: Depthwise conv layer kernel size + dropout: dropout value + activation_fn: Activation function to use after depthwise convolution kernel + bias: If bias should be added to conv layers + export: If layernorm should be exported to jit + """ + super(ConvolutionModule, self).__init__() + assert ( + depthwise_kernel_size - 1 + ) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding" + self.layer_norm = LayerNorm(embed_dim, export=export) + self.pointwise_conv1 = torch.nn.Conv1d( + embed_dim, + 2 * channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.glu = torch.nn.GLU(dim=1) + self.depthwise_conv = torch.nn.Conv1d( + channels, + channels, + depthwise_kernel_size, + stride=1, + padding=(depthwise_kernel_size - 1) // 2, + groups=channels, + bias=bias, + ) + self.batch_norm = torch.nn.BatchNorm1d(channels) + self.activation = get_activation_fn(activation_fn)(channels) + self.pointwise_conv2 = torch.nn.Conv1d( + channels, + embed_dim, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.dropout = torch.nn.Dropout(dropout) + + def forward(self, x): + """ + Args: + x: Input of shape B X T X C + Returns: + Tensor of shape B X T X C + """ + x = self.layer_norm(x) + # exchange the temporal dimension and the feature dimension + x = x.transpose(1, 2) + + # GLU mechanism + x = self.pointwise_conv1(x) # (batch, 2*channel, dim) + x = self.glu(x) # (batch, channel, dim) + + # 1D Depthwise Conv + x = self.depthwise_conv(x) + x = self.batch_norm(x) + x = self.activation(x) + + x = self.pointwise_conv2(x) + x = self.dropout(x) + return x.transpose(1, 2) + + +class FeedForwardModule(torch.nn.Module): + """Positionwise feed forward layer used in conformer""" + + def __init__( + self, + input_feat, + hidden_units, + dropout1, + dropout2, + activation_fn="swish", + bias=True, + ): + """ + Args: + input_feat: Input feature dimension + hidden_units: Hidden unit dimension + dropout1: dropout value for layer1 + dropout2: dropout value for layer2 + activation_fn: Name of activation function + bias: If linear layers should have bias + """ + + super(FeedForwardModule, self).__init__() + self.layer_norm = LayerNorm(input_feat) + self.w_1 = torch.nn.Linear(input_feat, hidden_units, bias=bias) + self.w_2 = torch.nn.Linear(hidden_units, input_feat, bias=bias) + self.dropout1 = torch.nn.Dropout(dropout1) + self.dropout2 = torch.nn.Dropout(dropout2) + self.activation = get_activation_fn(activation_fn)(hidden_units) + + def forward(self, x): + """ + Args: + x: Input Tensor of shape T X B X C + Returns: + Tensor of shape T X B X C + """ + x = self.layer_norm(x) + x = self.w_1(x) + x = self.activation(x) + x = self.dropout1(x) + x = self.w_2(x) + return self.dropout2(x) + + +class ConformerEncoderLayer(torch.nn.Module): + """Conformer block based on https://arxiv.org/abs/2005.08100. We currently don't support relative positional encoding in MHA""" + + def __init__( + self, + embed_dim, + ffn_embed_dim, + attention_heads, + dropout, + use_fp16, + depthwise_conv_kernel_size=31, + activation_fn="swish", + attn_type=None, + pos_enc_type="abs", + ): + """ + Args: + embed_dim: Input embedding dimension + ffn_embed_dim: FFN layer dimension + attention_heads: Number of attention heads in MHA + dropout: dropout value + depthwise_conv_kernel_size: Size of kernel in depthwise conv layer in convolution module + activation_fn: Activation function name to use in convulation block and feed forward block + attn_type: MHA implementation from ESPNET vs fairseq + pos_enc_type: Positional encoding type - abs, rope, rel_pos + """ + self.pos_enc_type = pos_enc_type + super(ConformerEncoderLayer, self).__init__() + + self.ffn1 = FeedForwardModule( + embed_dim, + ffn_embed_dim, + dropout, + dropout, + ) + + self.self_attn_layer_norm = LayerNorm(embed_dim, export=False) + self.self_attn_dropout = torch.nn.Dropout(dropout) + if attn_type == "espnet": + if self.pos_enc_type == "rel_pos": + self.self_attn = RelPositionMultiHeadedAttention( + embed_dim, + attention_heads, + dropout=dropout, + ) + elif self.pos_enc_type == "rope": + self.self_attn = RotaryPositionMultiHeadedAttention( + embed_dim, attention_heads, dropout=dropout, precision=use_fp16 + ) + elif self.pos_enc_type == "abs": + self.self_attn = ESPNETMultiHeadedAttention( + embed_dim, + attention_heads, + dropout=dropout, + ) + else: + raise Exception(f"Unsupported attention type {self.pos_enc_type}") + else: + # Default to fairseq MHA + self.self_attn = MultiheadAttention( + embed_dim, + attention_heads, + dropout=dropout, + ) + + self.conv_module = ConvolutionModule( + embed_dim=embed_dim, + channels=embed_dim, + depthwise_kernel_size=depthwise_conv_kernel_size, + dropout=dropout, + activation_fn=activation_fn, + ) + + self.ffn2 = FeedForwardModule( + embed_dim, + ffn_embed_dim, + dropout, + dropout, + activation_fn=activation_fn, + ) + self.final_layer_norm = LayerNorm(embed_dim, export=False) + + def forward( + self, + x, + encoder_padding_mask: Optional[torch.Tensor], + position_emb: Optional[torch.Tensor] = None, + ): + """ + Args: + x: Tensor of shape T X B X C + encoder_padding_mask: Optional mask tensor + positions: + Returns: + Tensor of shape T X B X C + """ + residual = x + x = self.ffn1(x) + x = x * 0.5 + residual + residual = x + x = self.self_attn_layer_norm(x) + if self.pos_enc_type == "rel_pos": + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + pos_emb=position_emb, + need_weights=False, + ) + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + ) + x = self.self_attn_dropout(x) + x = x + residual + + residual = x + # TBC to BTC + x = x.transpose(0, 1) + x = self.conv_module(x) + # BTC to TBC + x = x.transpose(0, 1) + x = residual + x + + residual = x + x = self.ffn2(x) + + layer_result = x + + x = x * 0.5 + residual + + x = self.final_layer_norm(x) + return x, (attn, layer_result) + + +class ConformerWav2Vec2EncoderLayer(ConformerEncoderLayer): + """Encoder layer for Wav2vec2 encoder""" + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + position_emb=None, + ): + return super().forward(x, self_attn_padding_mask, position_emb) diff --git a/fairseq/fairseq/modules/conv_tbc.py b/fairseq/fairseq/modules/conv_tbc.py new file mode 100644 index 0000000..65e17ec --- /dev/null +++ b/fairseq/fairseq/modules/conv_tbc.py @@ -0,0 +1,53 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from torch import nn +from torch.nn.modules.utils import _single +from torch import Tensor + + +class ConvTBC(torch.nn.Module): + """1D convolution over an input of shape (time x batch x channel) + + The implementation uses gemm to perform the convolution. This implementation + is faster than cuDNN for small kernel sizes. + """ + + def __init__(self, in_channels, out_channels, kernel_size, padding=0): + super(ConvTBC, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _single(kernel_size) + self.padding = _single(padding) + + self.weight = torch.nn.Parameter( + torch.Tensor(self.kernel_size[0], in_channels, out_channels) + ) + self.bias = torch.nn.Parameter(torch.Tensor(out_channels)) + + self.reset_parameters() + + def reset_parameters(self): + nn.init.xavier_normal_(self.weight) + nn.init.zeros_(self.bias) + + def conv_tbc(self, input: Tensor): + return torch.conv_tbc( + input.contiguous(), self.weight, self.bias, self.padding[0] + ) + + def forward(self, input: Tensor): + return self.conv_tbc(input) + + def __repr__(self): + s = ( + "{name}({in_channels}, {out_channels}, kernel_size={kernel_size}" + ", padding={padding}" + ) + if self.bias is None: + s += ", bias=False" + s += ")" + return s.format(name=self.__class__.__name__, **self.__dict__) diff --git a/fairseq/fairseq/modules/cross_entropy.py b/fairseq/fairseq/modules/cross_entropy.py new file mode 100644 index 0000000..286c00e --- /dev/null +++ b/fairseq/fairseq/modules/cross_entropy.py @@ -0,0 +1,59 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +import torch.nn.functional as F + +logger = logging.getLogger(__name__) + + +def _cross_entropy_pytorch(logits, target, ignore_index=None, reduction="mean"): + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32) + return F.nll_loss( + lprobs, + target, + ignore_index=ignore_index, + reduction=reduction, + ) + + +try: + import xentropy_cuda + from apex.contrib import xentropy + + def cross_entropy(logits, target, ignore_index=-100, reduction="mean"): + if logits.device == torch.device("cpu"): + return _cross_entropy_pytorch(logits, target, ignore_index, reduction) + else: + if not getattr(cross_entropy, "_has_logged_once", False): + logger.info("using fused cross entropy") + cross_entropy._has_logged_once = True + + half_to_float = logits.dtype == torch.half + losses = xentropy.SoftmaxCrossEntropyLoss.apply( + logits, + target, + 0.0, + ignore_index, + half_to_float, + ) + if reduction == "sum": + return losses.sum() + elif reduction == "mean": + if ignore_index >= 0: + return losses.sum() / target.ne(ignore_index).sum() + else: + return losses.mean() + elif reduction == "none": + return losses + else: + raise NotImplementedError + +except ImportError: + + def cross_entropy(logits, target, ignore_index=-100, reduction="mean"): + return _cross_entropy_pytorch(logits, target, ignore_index, reduction) diff --git a/fairseq/fairseq/modules/cuda_utils.cu b/fairseq/fairseq/modules/cuda_utils.cu new file mode 100644 index 0000000..924f852 --- /dev/null +++ b/fairseq/fairseq/modules/cuda_utils.cu @@ -0,0 +1,202 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +template <typename U, typename V> +constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) { + return (a + b - 1) / b; +} + +template <int FS, int SB, int padding_l, typename scalar_t> +__inline__ __device__ void zeroSharedMem(scalar_t* data) { + /* + Given an array of length FS + SB, zero out the first padding_l and last + (FS - padding_l) values in the array + */ + + int tid = threadIdx.x; + + if (FS < SB) { + // zero all if we have enough threads in a block to do all of them + if (tid < padding_l || tid > SB - FS + padding_l - 1) { + data[tid] = scalar_t(0.0); + } + } else { + // otherwise zero out one block at a time + const int numIterations = divUp<int, int>(FS, SB); + for (int i = 0; i < numIterations; i++) { + int offset = i * SB; + if (tid + offset < padding_l) { + data[tid + offset] = scalar_t(0.0); + } else if (tid + offset < FS) { + data[SB + tid + offset] = scalar_t(0.0); + } + } + } +} + +template <typename scalar_t> +__inline__ __device__ scalar_t warpReduce(scalar_t data) { + /* + Reduce an array within each warp. After processing all values in warp will + caontain the sum of all original values in that warp. + + data - pointer to data to reduce + */ + data += __shfl_xor_sync(SHFL_MASK, data, 16); + data += __shfl_xor_sync(SHFL_MASK, data, 8); + data += __shfl_xor_sync(SHFL_MASK, data, 4); + data += __shfl_xor_sync(SHFL_MASK, data, 2); + data += __shfl_xor_sync(SHFL_MASK, data, 1); + return data; +} + +template <typename scalar_t> +__inline__ __device__ scalar_t blockReduce(scalar_t data) { + /* + Reduce an entire array on the block level. After processing, the + first value in the array will contain the reduced sum. + + data - pointer to data to reduce + */ + + static __shared__ scalar_t warpSum[32]; + const int tid = threadIdx.x; + int wid = tid / 32; + int lane = tid % 32; + + __syncthreads(); + + // reduce each warp then write to shared memory + scalar_t sum = warpReduce(data); + if (lane == 0) { + warpSum[wid] = sum; + } + + __syncthreads(); + + scalar_t v; + // perform final sum of partial warp sums + if (tid < blockDim.x / 32) { + v = warpSum[lane]; + } else { + v = scalar_t(0.0); + } + + if (wid == 0) { + v = warpReduce(v); + } + __syncthreads(); + + return v; +} + +void checkCudaStatus(cudaError_t status, int lineNumber = -1) { + if (status != cudaSuccess) { + std::cout << cudaGetErrorString(status) << " at line " << lineNumber + << std::endl; + std::cout << "Exiting" << std::endl; + exit(1); + } +} + +template <int FS, int SB, int padding_l, typename scalar_t> +__device__ void load_input_to_shared( + const scalar_t* input, // global memory + int inputOffset, + int sequenceLength, + int iteration, + int numIterations, + bool no_prev, + scalar_t* output /* shared memory */) { + /* + Load a block size of input into shared memory with + right and left overhang of total size FS. If previously + loaded memory, overlap will be shifted over to reduce + global memory access + + input - pointer to start of channel sequence + inputOffset - how far in the sequence to start loading + sequenceLength - total length of sequence + iteration - which block of sequence we are loading + numIterations - total number of blocks to load + no_prev - whether to load the whole block if the previous block + wasn't loaded + output - shared memory to write input to + */ + + const int tid = threadIdx.x; + + // Load the left "overhang" of input + if (iteration > 0) { + if (padding_l < SB) { + // load all at once + if (tid < padding_l) { + output[tid] = + (no_prev) ? input[inputOffset - padding_l + tid] : output[tid + SB]; + } + } else { + // load in chunks of size SB + int numIterations = divUp<int, int>(padding_l, SB); + for (int i = 0; i < numIterations; i++) { + int offset = i * SB; + if ((tid + offset) < padding_l) { + output[tid + offset] = (no_prev) + ? input[inputOffset - padding_l + tid + offset] + : output[tid + offset + SB]; + } + } + } + } + + // Load the right "overhang" of input + if (iteration < (numIterations - 1)) { + const int elementsLeft = sequenceLength - (iteration + 1) * SB; + + if ((FS - padding_l) < SB) { + // load all at once + if (tid < (FS - padding_l)) { + output[padding_l + SB + tid] = (tid < elementsLeft) + ? input[inputOffset + SB + tid] + : scalar_t(0.0); + } + } else { + // load in chunks of size SB + int numIterations = divUp<int, int>(FS - padding_l, SB); + for (int i = 0; i < numIterations; i++) { + int offset = i * SB; + if ((tid + offset) < (FS - padding_l)) { + output[padding_l + SB + tid + offset] = + ((tid + offset) < elementsLeft) + ? input[inputOffset + SB + tid + offset] + : scalar_t(0.0); + } + } + } + } + + // We should also clear out the right "overhang" + if (iteration == (numIterations - 1)) { + if ((FS - padding_l) < SB) { + // clear out all at once + if (tid < (FS - padding_l)) { + output[padding_l + SB + tid] = scalar_t(0.0); + } + } else { + // clear in chunks of size SB + int numIterations = divUp<int, int>(FS - padding_l, SB); + for (int i = 0; i < numIterations; i++) { + int offset = i * SB; + if ((tid + offset) < (FS - padding_l)) { + output[padding_l + SB + tid + offset] = scalar_t(0.0); + } + } + } + } + output[tid + padding_l] = ((inputOffset + tid) < sequenceLength) + ? input[inputOffset + tid] + : scalar_t(0.0); +} diff --git a/fairseq/fairseq/modules/downsampled_multihead_attention.py b/fairseq/fairseq/modules/downsampled_multihead_attention.py new file mode 100644 index 0000000..5e42942 --- /dev/null +++ b/fairseq/fairseq/modules/downsampled_multihead_attention.py @@ -0,0 +1,317 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.scalar_bias import scalar_bias + + +class SingleHeadAttention(nn.Module): + """ + Single-head attention that supports Gating and Downsampling + """ + + def __init__( + self, + out_channels, + embed_dim, + head_dim, + head_index, + dropout=0.0, + bias=True, + project_input=True, + gated=False, + downsample=False, + num_heads=1, + ): + super().__init__() + self.embed_dim = embed_dim + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.head_index = head_index + self.head_dim = head_dim + self.project_input = project_input + self.gated = gated + self.downsample = downsample + self.num_heads = num_heads + self.projection = None + + k_layers = [] + v_layers = [] + if self.downsample: + k_layers.append(Downsample(self.head_index)) + v_layers.append(Downsample(self.head_index)) + out_proj_size = self.head_dim + else: + out_proj_size = self.head_dim * self.num_heads + if self.gated: + k_layers.append(GatedLinear(self.embed_dim, out_proj_size, bias=bias)) + self.in_proj_q = GatedLinear(self.embed_dim, out_proj_size, bias=bias) + v_layers.append(GatedLinear(self.embed_dim, out_proj_size, bias=bias)) + else: + k_layers.append(Linear(self.embed_dim, out_proj_size, bias=bias)) + self.in_proj_q = Linear(self.embed_dim, out_proj_size, bias=bias) + v_layers.append(Linear(self.embed_dim, out_proj_size, bias=bias)) + + self.in_proj_k = nn.Sequential(*k_layers) + self.in_proj_v = nn.Sequential(*v_layers) + + if self.downsample: + self.out_proj = Linear(out_proj_size, self.head_dim, bias=bias) + else: + self.out_proj = Linear(out_proj_size, out_channels, bias=bias) + + self.scaling = self.head_dim**-0.5 + + def forward( + self, + query, + key, + value, + mask_future_timesteps=False, + key_padding_mask=None, + use_scalar_bias=False, + ): + """Input shape: Time x Batch x Channel + Self-attention can be implemented by passing in the same arguments for + query, key and value. Future timesteps can be masked with the + `mask_future_timesteps` argument. Padding elements can be excluded from + the key by passing a binary ByteTensor (`key_padding_mask`) with shape: + batch x src_len, where padding elements are indicated by 1s. + """ + src_len, bsz, out_channels = key.size() + tgt_len = query.size(0) + assert list(query.size()) == [tgt_len, bsz, out_channels] + assert key.size() == value.size() + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.downsample: + size = bsz + else: + size = bsz * self.num_heads + + k = key + v = value + q = query + if self.project_input: + q = self.in_proj_q(q) + k = self.in_proj_k(k) + v = self.in_proj_v(v) + src_len = k.size()[0] + q *= self.scaling + + if not self.downsample: + q = q.view(tgt_len, size, self.head_dim) + k = k.view(src_len, size, self.head_dim) + v = v.view(src_len, size, self.head_dim) + + q = q.transpose(0, 1) + k = k.transpose(0, 1) + v = v.transpose(0, 1) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + if mask_future_timesteps: + assert ( + query.size() == key.size() + ), "mask_future_timesteps only applies to self-attention" + attn_weights *= torch.tril( + attn_weights.data.new([1]).expand(tgt_len, tgt_len).clone(), + diagonal=-1, + )[:, :: self.head_index + 1 if self.downsample else 1].unsqueeze(0) + attn_weights += torch.triu( + attn_weights.data.new([-math.inf]).expand(tgt_len, tgt_len).clone(), + diagonal=0, + )[:, :: self.head_index + 1 if self.downsample else 1].unsqueeze(0) + tgt_size = tgt_len + if use_scalar_bias: + attn_weights = scalar_bias(attn_weights, 2) + v = scalar_bias(v, 1) + tgt_size += 1 + + if key_padding_mask is not None: + # don't attend to padding symbols + if key_padding_mask.max() > 0: + if self.downsample: + attn_weights = attn_weights.view(bsz, 1, tgt_len, src_len) + else: + attn_weights = attn_weights.view( + size, self.num_heads, tgt_len, src_len + ) + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + -math.inf, + ) + attn_weights = attn_weights.view(size, tgt_len, src_len) + attn_weights = F.softmax(attn_weights, dim=-1) + attn_weights = self.dropout_module(attn_weights) + + attn = torch.bmm(attn_weights, v) + if self.downsample: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.head_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim) + + attn = self.out_proj(attn) + + return attn, attn_weights + + +class DownsampledMultiHeadAttention(nn.ModuleList): + """ + Multi-headed attention with Gating and Downsampling + """ + + def __init__( + self, + out_channels, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + project_input=True, + gated=False, + downsample=False, + ): + self.embed_dim = embed_dim + self.num_heads = num_heads + self.head_dim = embed_dim // num_heads + self.downsample = downsample + self.gated = gated + self.project_input = project_input + assert self.head_dim * num_heads == embed_dim + + if self.downsample: + attention_heads = [] + for index in range(self.num_heads): + attention_heads.append( + SingleHeadAttention( + out_channels, + self.embed_dim, + self.head_dim, + index, + dropout, + bias, + self.project_input, + self.gated, + self.downsample, + self.num_heads, + ) + ) + super().__init__(modules=attention_heads) + self.out_proj = Linear(embed_dim, out_channels, bias=bias) + else: + # either we have a list of attention heads, or just one attention head + # if not being downsampled, we can do the heads with one linear layer instead of separate ones + super().__init__() + self.attention_module = SingleHeadAttention( + out_channels, + self.embed_dim, + self.head_dim, + 1, + dropout, + bias, + self.project_input, + self.gated, + self.downsample, + self.num_heads, + ) + + def forward( + self, + query, + key, + value, + mask_future_timesteps=False, + key_padding_mask=None, + use_scalar_bias=False, + ): + src_len, bsz, embed_dim = key.size() + tgt_len = query.size(0) + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + assert key.size() == value.size() + + tgt_size = tgt_len + if use_scalar_bias: + tgt_size += 1 + + attn = [] + attn_weights = [] + if self.downsample: + for attention_head_number in range(self.num_heads): + # call the forward of each attention head + _attn, _attn_weight = self[attention_head_number]( + query, + key, + value, + mask_future_timesteps, + key_padding_mask, + use_scalar_bias, + ) + attn.append(_attn) + attn_weights.append(_attn_weight) + full_attn = torch.cat(attn, dim=2) + full_attn = self.out_proj(full_attn) + return full_attn, attn_weights[0].clone() + else: + _attn, _attn_weight = self.attention_module( + query, + key, + value, + mask_future_timesteps, + key_padding_mask, + use_scalar_bias, + ) + attn.append(_attn) + attn_weights.append(_attn_weight) + full_attn = torch.cat(attn, dim=2) + full_attn_weights = torch.cat(attn_weights) + full_attn_weights = full_attn_weights.view( + bsz, self.num_heads, tgt_size, src_len + ) + full_attn_weights = full_attn_weights.sum(dim=1) / self.num_heads + return full_attn, full_attn_weights + + +class Downsample(nn.Module): + """ + Selects every nth element, where n is the index + """ + + def __init__(self, index): + super().__init__() + self.index = index + + def forward(self, x): + return x[:: self.index + 1] + + +def Linear(in_features, out_features, dropout=0.0, bias=True): + """Weight-normalized Linear layer (input: B x T x C)""" + m = nn.Linear(in_features, out_features, bias=bias) + m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features)) + m.bias.data.zero_() + return nn.utils.weight_norm(m) + + +def GatedLinear(in_features, out_features, dropout=0.0, bias=True): + """Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units""" + return nn.Sequential( + Linear(in_features, out_features * 4, dropout, bias), + nn.GLU(), + Linear(out_features * 2, out_features * 2, dropout, bias), + nn.GLU(), + Linear(out_features, out_features, dropout, bias), + ) diff --git a/fairseq/fairseq/modules/dynamic_convolution.py b/fairseq/fairseq/modules/dynamic_convolution.py new file mode 100644 index 0000000..0ff02cd --- /dev/null +++ b/fairseq/fairseq/modules/dynamic_convolution.py @@ -0,0 +1,526 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import ( + FairseqIncrementalState, + with_incremental_state, +) +from fairseq.modules.fairseq_dropout import FairseqDropout +from torch import Tensor + +from .unfold import unfold1d + + +def DynamicConv( + input_size, + kernel_size=1, + padding_l=None, + num_heads=1, + weight_dropout=0.0, + weight_softmax=False, + renorm_padding=False, + bias=False, + conv_bias=False, + query_size=None, + in_proj=False, +): + if torch.cuda.is_available(): + try: + from fairseq.modules.dynamicconv_layer import DynamicconvLayer + + return DynamicconvLayer( + input_size, + kernel_size=kernel_size, + padding_l=padding_l, + num_heads=num_heads, + weight_dropout=weight_dropout, + weight_softmax=weight_softmax, + renorm_padding=renorm_padding, + bias=bias, + conv_bias=conv_bias, + query_size=query_size, + ) + except ImportError as e: + print(e) + return DynamicConv1dTBC( + input_size, + kernel_size=kernel_size, + padding_l=padding_l, + num_heads=num_heads, + weight_dropout=weight_dropout, + weight_softmax=weight_softmax, + renorm_padding=renorm_padding, + bias=bias, + conv_bias=conv_bias, + query_size=query_size, + ) + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + + +@with_incremental_state +class DynamicConv1dTBC(nn.Module): + """Dynamic lightweight convolution taking T x B x C inputs + Args: + input_size: # of channels of the input + kernel_size: convolution channels + padding_l: padding to the left when using "same" padding + num_heads: number of heads used. The weight is of shape (num_heads, 1, kernel_size) + weight_dropout: the drop rate of the DropConnect to drop the weight + weight_softmax: normalize the weight with softmax before the convolution + renorm_padding: re-normalize the filters to ignore the padded part (only the non-padding parts sum up to 1) + bias: use bias + conv_bias: bias of the convolution + query_size: specified when feeding a different input as the query + in_proj: project the input and generate the filter together + + Shape: + Input: TxBxC, i.e. (timesteps, batch_size, input_size) + Output: TxBxC, i.e. (timesteps, batch_size, input_size) + + Attributes: + weight: the learnable weights of the module of shape + `(num_heads, 1, kernel_size)` + bias: the learnable bias of the module of shape `(input_size)` + """ + + def __init__( + self, + input_size, + kernel_size=1, + padding_l=None, + num_heads=1, + weight_dropout=0.0, + weight_softmax=False, + renorm_padding=False, + bias=False, + conv_bias=False, + query_size=None, + in_proj=False, + ): + super().__init__() + self.input_size = input_size + self.query_size = input_size if query_size is None else query_size + self.kernel_size = kernel_size + self.padding_l = padding_l + self.num_heads = num_heads + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + self.weight_softmax = weight_softmax + self.renorm_padding = renorm_padding + + if in_proj: + self.weight_linear = Linear( + self.input_size, self.input_size + num_heads * kernel_size * 1 + ) + else: + self.weight_linear = Linear( + self.query_size, num_heads * kernel_size * 1, bias=bias + ) + if conv_bias: + self.conv_bias = nn.Parameter(torch.Tensor(input_size)) + else: + self.conv_bias = None + self.reset_parameters() + + @property + def in_proj(self): + return ( + self.weight_linear.out_features + == self.input_size + self.num_heads * self.kernel_size + ) + + def reset_parameters(self): + self.weight_linear.reset_parameters() + if self.conv_bias is not None: + nn.init.constant_(self.conv_bias, 0.0) + + def forward(self, x, incremental_state=None, query=None, unfold=None): + """Assuming the input, x, of the shape T x B x C and producing an output in the shape T x B x C + args: + x: Input of shape T x B x C, i.e. (timesteps, batch_size, input_size) + incremental_state: A dict to keep the state + unfold: unfold the input or not. If not, we use the matrix trick instead + query: use the specified query to predict the conv filters + """ + unfold = ( + x.size(0) > 512 if unfold is None else unfold + ) # use unfold mode as default for long sequence to save memory + unfold = unfold or (incremental_state is not None) + assert query is None or not self.in_proj + + if query is None: + query = x + if unfold: + output = self._forward_unfolded(x, incremental_state, query) + else: + output = self._forward_expanded(x, incremental_state, query) + + if self.conv_bias is not None: + output = output + self.conv_bias.view(1, 1, -1) + return output + + def _forward_unfolded(self, x, incremental_state, query): + """The conventional implementation of convolutions. + Unfolding the input by having a window shifting to the right.""" + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + + if self.in_proj: + proj = self.weight_linear(x) + x = proj.narrow(2, 0, self.input_size).contiguous() + weight = ( + proj.narrow(2, self.input_size, H * K).contiguous().view(T * B * H, -1) + ) + else: + weight = self.weight_linear(query).view(T * B * H, -1) + + # renorm_padding is only implemented in _forward_expanded + assert not self.renorm_padding or incremental_state is not None + + if incremental_state is not None: + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is None: + input_buffer = x.new() + x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3) + if self.kernel_size > 1: + self._set_input_buffer( + incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :] + ) + x_unfold = x_unfold.view(T * B * H, R, -1) + else: + padding_l = self.padding_l + if K > T and padding_l == K - 1: + weight = weight.narrow(1, K - T, T) + K, padding_l = T, T - 1 + # unfold the input: T x B x C --> T' x B x C x K + x_unfold = unfold1d(x, K, padding_l, 0) + x_unfold = x_unfold.view(T * B * H, R, K) + + if self.weight_softmax and not self.renorm_padding: + weight = F.softmax(weight, dim=1) + weight = weight.narrow(1, 0, K) + + if incremental_state is not None: + weight = weight[:, -x_unfold.size(2) :] + K = weight.size(1) + + if self.weight_softmax and self.renorm_padding: + weight = F.softmax(weight, dim=1) + + weight = self.weight_dropout_module(weight, inplace=False) + + output = torch.bmm(x_unfold, weight.unsqueeze(2)) # T*B*H x R x 1 + output = output.view(T, B, C) + return output + + def _forward_expanded(self, x, incremental_stat, query): + """Turn the convolution filters into band matrices and do matrix multiplication. + This is faster when the sequence is short, but less memory efficient. + This is not used in the decoder during inference. + """ + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + if self.in_proj: + proj = self.weight_linear(x) + x = proj.narrow(2, 0, self.input_size).contiguous() + weight = ( + proj.narrow(2, self.input_size, H * K).contiguous().view(T * B * H, -1) + ) + else: + weight = self.weight_linear(query).view(T * B * H, -1) + + if not self.renorm_padding: + if self.weight_softmax: + weight = F.softmax(weight, dim=1) + weight = self.weight_dropout_module(weight, inplace=False) + weight = weight.narrow(1, 0, K).contiguous() + weight = weight.view(T, B * H, K).transpose(0, 1) + + x = x.view(T, B * H, R).transpose(0, 1) + if self.weight_softmax and self.renorm_padding: + # turn the convolution filters into band matrices + weight_expanded = weight.new(B * H, T, T + K - 1).fill_(float("-inf")) + weight_expanded.as_strided( + (B * H, T, K), (T * (T + K - 1), T + K, 1) + ).copy_(weight) + weight_expanded = weight_expanded.narrow(2, self.padding_l, T) + # normalize the weight over valid positions like self-attention + weight_expanded = F.softmax(weight_expanded, dim=2) + weight_expanded = self.weight_dropout_module(weight_expanded, inplace=False) + else: + P = self.padding_l + # For efficiency, we cut the kernel size and reduce the padding when the kernel is larger than the length + if K > T and P == K - 1: + weight = weight.narrow(2, K - T, T) + K, P = T, T - 1 + # turn the convolution filters into band matrices + weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False) + weight_expanded.as_strided( + (B * H, T, K), (T * (T + K - 1), T + K, 1) + ).copy_(weight) + weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T + output = torch.bmm(weight_expanded, x) + output = output.transpose(0, 1).contiguous().view(T, B, C) + return output + + def reorder_incremental_state(self, incremental_state, new_order): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return utils.get_incremental_state(self, incremental_state, "input_buffer") + + def _set_input_buffer(self, incremental_state, new_buffer): + return utils.set_incremental_state( + self, incremental_state, "input_buffer", new_buffer + ) + + def extra_repr(self): + s = "{}, kernel_size={}, padding_l={}, num_heads={}, weight_softmax={}, conv_bias={}, renorm_padding={}, in_proj={}".format( + self.input_size, + self.kernel_size, + self.padding_l, + self.num_heads, + self.weight_softmax, + self.conv_bias is not None, + self.renorm_padding, + self.in_proj, + ) + + if self.query_size != self.input_size: + s += ", query_size={}".format(self.query_size) + if self.weight_dropout_module.p > 0.0: + s += ", weight_dropout={}".format(self.weight_dropout_module.p) + return s + + +class DynamicConv_scripatable(nn.Module, FairseqIncrementalState): + """Dynamic lightweight convolution taking T x B x C inputs + Args: + input_size: # of channels of the input + kernel_size: convolution channels + padding_l: padding to the left when using "same" padding + num_heads: number of heads used. The weight is of shape (num_heads, 1, kernel_size) + weight_dropout: the drop rate of the DropConnect to drop the weight + weight_softmax: normalize the weight with softmax before the convolution + renorm_padding: re-normalize the filters to ignore the padded part (only the non-padding parts sum up to 1) + bias: use bias + conv_bias: bias of the convolution + query_size: specified when feeding a different input as the query + in_proj: project the input and generate the filter together + + Shape: + Input: TxBxC, i.e. (timesteps, batch_size, input_size) + Output: TxBxC, i.e. (timesteps, batch_size, input_size) + + Attributes: + weight: the learnable weights of the module of shape + `(num_heads, 1, kernel_size)` + bias: the learnable bias of the module of shape `(input_size)` + """ + + def __init__( + self, + input_size, + kernel_size=1, + padding_l=None, + num_heads=1, + weight_dropout=0.0, + weight_softmax=False, + renorm_padding=False, + bias=False, + conv_bias=False, + query_size=None, + in_proj=False, + ): + super().__init__() + self.input_size = input_size + self.query_size = input_size if query_size is None else query_size + self.kernel_size = kernel_size + self.padding_l = padding_l + self.num_heads = num_heads + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + self.weight_softmax = weight_softmax + self.renorm_padding = renorm_padding + + if in_proj: + self.weight_linear = Linear( + self.input_size, self.input_size + num_heads * kernel_size * 1 + ) + else: + self.weight_linear = Linear( + self.query_size, num_heads * kernel_size * 1, bias=bias + ) + self.in_proj = ( + self.weight_linear.out_features + == self.input_size + self.num_heads * self.kernel_size + ) + self.has_conv_bias = conv_bias + self.conv_bias = nn.Parameter(torch.Tensor(input_size).view(1, 1, -1)) + self.init_incremental_state() + + self.reset_parameters() + + def reset_parameters(self): + self.weight_linear.reset_parameters() + if self.has_conv_bias: + nn.init.constant_(self.conv_bias, 0.0) + + def forward( + self, + x, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + query: Optional[Tensor] = None, + ): + """Assuming the input, x, of the shape T x B x C and producing an output in the shape T x B x C + args: + x: Input of shape T x B x C, i.e. (timesteps, batch_size, input_size) + incremental_state: A dict to keep the state + unfold: unfold the input or not. If not, we use the matrix trick instead + query: use the specified query to predict the conv filters + """ + assert query is None or not self.in_proj + + if query is None: + query = x + + output = self._forward_unfolded(x, incremental_state, query) + + if self.has_conv_bias: + output = output + self.conv_bias + return output + + def _forward_unfolded( + self, + x, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + query, + ): + """The conventional implementation of convolutions. + Unfolding the input by having a window shifting to the right.""" + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + + TxBxH = T * B * H + + if self.in_proj: + proj = self.weight_linear(x) + x = proj.narrow(2, 0, self.input_size).contiguous() + weight = proj.narrow(2, self.input_size, H * K).contiguous().view(TxBxH, -1) + else: + weight = self.weight_linear(query).view(TxBxH, -1) + + # renorm_padding is only implemented in _forward_expanded + assert not self.renorm_padding or incremental_state is not None + + if incremental_state is not None: + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3) + else: + x_unfold = x.unsqueeze(3).clone() + if self.kernel_size > 1: + self._set_input_buffer( + incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :] + ) + x_unfold = x_unfold.view(TxBxH, R, -1) + else: + padding_l = self.padding_l + if K > T and padding_l == K - 1: + weight = weight.narrow(1, K - T, T) + K, padding_l = T, T - 1 + # unfold the input: T x B x C --> T' x B x C x K + x_unfold = unfold1d(x, K, padding_l, 0.0) + x_unfold = x_unfold.view(TxBxH, R, K) + + if self.weight_softmax and not self.renorm_padding: + weight = F.softmax(weight, dim=1) + weight = weight.narrow(1, 0, K) + + if incremental_state is not None: + weight = weight[:, -(x_unfold.size(2)) :] + K = weight.size(1) + + if self.weight_softmax and self.renorm_padding: + weight = F.softmax(weight, dim=1) + + weight = self.weight_dropout_module(weight, inplace=False) + + output = torch.bmm(x_unfold, weight.unsqueeze(2)) # T x B x H x R x 1 + output = output.view(T, B, C) + return output + + def reorder_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + new_order: Tensor, + ): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ): + result = self.get_incremental_state(incremental_state, "input_buffer") + if result is not None and "input_buffer" in result: + return result["input_buffer"] + else: + return None + + def _set_input_buffer( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + new_buffer: Optional[Tensor], + ): + result = self.set_incremental_state( + incremental_state, "input_buffer", {"input_buffer": new_buffer} + ) + if result is not None: + incremental_state = result + return incremental_state + + def extra_repr(self): + s = "{}, kernel_size={}, padding_l={}, num_heads={}, weight_softmax={}, conv_bias={}, renorm_padding={}, in_proj={}".format( # noqa + self.input_size, + self.kernel_size, + self.padding_l, + self.num_heads, + self.weight_softmax, + self.conv_bias is not None, + self.renorm_padding, + self.in_proj, + ) + + if self.query_size != self.input_size: + s += ", query_size={}".format(self.query_size) + if self.weight_dropout_module.p > 0.0: + s += ", weight_dropout={}".format(self.weight_dropout_module.p) + return s diff --git a/fairseq/fairseq/modules/dynamic_crf_layer.py b/fairseq/fairseq/modules/dynamic_crf_layer.py new file mode 100644 index 0000000..8fcc6b8 --- /dev/null +++ b/fairseq/fairseq/modules/dynamic_crf_layer.py @@ -0,0 +1,189 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +This file is to re-implemented the low-rank and beam approximation of CRF layer +Proposed by: + +Sun, Zhiqing, et al. +Fast Structured Decoding for Sequence Models +https://arxiv.org/abs/1910.11555 + +The CRF implementation is mainly borrowed from +https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py + +""" + +import numpy as np +import torch +import torch.nn as nn + + +def logsumexp(x, dim=1): + return torch.logsumexp(x.float(), dim=dim).type_as(x) + + +class DynamicCRF(nn.Module): + """Dynamic CRF layer is used to approximate the traditional + Conditional Random Fields (CRF) + $P(y | x) = 1/Z(x) exp(sum_i s(y_i, x) + sum_i t(y_{i-1}, y_i, x))$ + + where in this function, we assume the emition scores (s) are given, + and the transition score is a |V| x |V| matrix $M$ + + in the following two aspects: + (1) it used a low-rank approximation for the transition matrix: + $M = E_1 E_2^T$ + (2) it used a beam to estimate the normalizing factor Z(x) + """ + + def __init__(self, num_embedding, low_rank=32, beam_size=64): + super().__init__() + + self.E1 = nn.Embedding(num_embedding, low_rank) + self.E2 = nn.Embedding(num_embedding, low_rank) + + self.vocb = num_embedding + self.rank = low_rank + self.beam = beam_size + + def extra_repr(self): + return "vocab_size={}, low_rank={}, beam_size={}".format( + self.vocb, self.rank, self.beam + ) + + def forward(self, emissions, targets, masks, beam=None): + """ + Compute the conditional log-likelihood of a sequence of target tokens given emission scores + + Args: + emissions (`~torch.Tensor`): Emission score are usually the unnormalized decoder output + ``(batch_size, seq_len, vocab_size)``. We assume batch-first + targets (`~torch.LongTensor`): Sequence of target token indices + ``(batch_size, seq_len) + masks (`~torch.ByteTensor`): Mask tensor with the same size as targets + + Returns: + `~torch.Tensor`: approximated log-likelihood + """ + numerator = self._compute_score(emissions, targets, masks) + denominator = self._compute_normalizer(emissions, targets, masks, beam) + return numerator - denominator + + def forward_decoder(self, emissions, masks=None, beam=None): + """ + Find the most likely output sequence using Viterbi algorithm. + + Args: + emissions (`~torch.Tensor`): Emission score are usually the unnormalized decoder output + ``(batch_size, seq_len, vocab_size)``. We assume batch-first + masks (`~torch.ByteTensor`): Mask tensor with the same size as targets + + Returns: + `~torch.LongTensor`: decoded sequence from the CRF model + """ + return self._viterbi_decode(emissions, masks, beam) + + def _compute_score(self, emissions, targets, masks=None): + batch_size, seq_len = targets.size() + emission_scores = emissions.gather(2, targets[:, :, None])[:, :, 0] # B x T + transition_scores = (self.E1(targets[:, :-1]) * self.E2(targets[:, 1:])).sum(2) + + scores = emission_scores + scores[:, 1:] += transition_scores + + if masks is not None: + scores = scores * masks.type_as(scores) + return scores.sum(-1) + + def _compute_normalizer(self, emissions, targets=None, masks=None, beam=None): + # HACK: we include "target" which is a hueristic for training + # HACK: we use a beam of tokens to approximate the normalizing factor (which is bad?) + + beam = beam if beam is not None else self.beam + batch_size, seq_len = emissions.size()[:2] + if targets is not None: + _emissions = emissions.scatter(2, targets[:, :, None], np.float("inf")) + beam_targets = _emissions.topk(beam, 2)[1] + beam_emission_scores = emissions.gather(2, beam_targets) + else: + beam_emission_scores, beam_targets = emissions.topk(beam, 2) + beam_transition_score1 = self.E1(beam_targets[:, :-1]) # B x (T-1) x K x D + beam_transition_score2 = self.E2(beam_targets[:, 1:]) # B x (T-1) x K x D + beam_transition_matrix = torch.bmm( + beam_transition_score1.view(-1, beam, self.rank), + beam_transition_score2.view(-1, beam, self.rank).transpose(1, 2), + ) + beam_transition_matrix = beam_transition_matrix.view(batch_size, -1, beam, beam) + + # compute the normalizer in the log-space + score = beam_emission_scores[:, 0] # B x K + for i in range(1, seq_len): + next_score = score[:, :, None] + beam_transition_matrix[:, i - 1] + next_score = logsumexp(next_score, dim=1) + beam_emission_scores[:, i] + + if masks is not None: + score = torch.where(masks[:, i : i + 1], next_score, score) + else: + score = next_score + + # Sum (log-sum-exp) over all possible tags + return logsumexp(score, dim=1) + + def _viterbi_decode(self, emissions, masks=None, beam=None): + # HACK: we use a beam of tokens to approximate the normalizing factor (which is bad?) + + beam = beam if beam is not None else self.beam + batch_size, seq_len = emissions.size()[:2] + beam_emission_scores, beam_targets = emissions.topk(beam, 2) + beam_transition_score1 = self.E1(beam_targets[:, :-1]) # B x (T-1) x K x D + beam_transition_score2 = self.E2(beam_targets[:, 1:]) # B x (T-1) x K x D + beam_transition_matrix = torch.bmm( + beam_transition_score1.view(-1, beam, self.rank), + beam_transition_score2.view(-1, beam, self.rank).transpose(1, 2), + ) + beam_transition_matrix = beam_transition_matrix.view(batch_size, -1, beam, beam) + + traj_tokens, traj_scores = [], [] + finalized_tokens, finalized_scores = [], [] + + # compute the normalizer in the log-space + score = beam_emission_scores[:, 0] # B x K + dummy = ( + torch.arange(beam, device=score.device).expand(*score.size()).contiguous() + ) + + for i in range(1, seq_len): + traj_scores.append(score) + _score = score[:, :, None] + beam_transition_matrix[:, i - 1] + _score, _index = _score.max(dim=1) + _score = _score + beam_emission_scores[:, i] + + if masks is not None: + score = torch.where(masks[:, i : i + 1], _score, score) + index = torch.where(masks[:, i : i + 1], _index, dummy) + else: + score, index = _score, _index + traj_tokens.append(index) + + # now running the back-tracing and find the best + best_score, best_index = score.max(dim=1) + finalized_tokens.append(best_index[:, None]) + finalized_scores.append(best_score[:, None]) + + for idx, scs in zip(reversed(traj_tokens), reversed(traj_scores)): + previous_index = finalized_tokens[-1] + finalized_tokens.append(idx.gather(1, previous_index)) + finalized_scores.append(scs.gather(1, previous_index)) + + finalized_tokens.reverse() + finalized_tokens = torch.cat(finalized_tokens, 1) + finalized_tokens = beam_targets.gather(2, finalized_tokens[:, :, None])[:, :, 0] + + finalized_scores.reverse() + finalized_scores = torch.cat(finalized_scores, 1) + finalized_scores[:, 1:] = finalized_scores[:, 1:] - finalized_scores[:, :-1] + + return finalized_scores, finalized_tokens diff --git a/fairseq/fairseq/modules/dynamicconv_layer/__init__.py b/fairseq/fairseq/modules/dynamicconv_layer/__init__.py new file mode 100644 index 0000000..22dc6f4 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .dynamicconv_layer import DynamicconvLayer # noqa diff --git a/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py b/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py new file mode 100644 index 0000000..9304f99 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py @@ -0,0 +1,223 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +def gen_forward(): + + kernels = [3, 5, 7, 15, 31, 63, 127, 255] + blocks = [32, 64, 128, 256] + + head = """ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "dynamicconv_cuda.cuh" + +std::vector<at::Tensor> dynamicconv_cuda_forward(at::Tensor input, at::Tensor weight, int padding_l) { + + at::DeviceGuard g(input.device()); + const auto minibatch = input.size(0); + const auto numFeatures = input.size(1); + const auto sequenceLength = input.size(2); + + const auto numHeads = weight.size(1); + const auto filterSize = weight.size(2); + + const auto numFiltersInBlock = numFeatures / numHeads; + const dim3 blocks(minibatch, numFeatures); + + auto output = at::zeros_like(input); + auto stream = at::cuda::getCurrentCUDAStream(); +""" + + switch = """ + switch(filterSize) { +""" + + case_k = """ + case {k}: +""" + + main_block = """ + if (padding_l == {pad}) {{ + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "dynamicconv_forward", ([&] {{ + dynamicconv_forward_kernel<{k}, {b_size}, {pad}, scalar_t> + <<<blocks, {b_size}, 0, stream>>>( + input.data<scalar_t>(), + weight.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + numHeads, + output.data<scalar_t>()); + }})); + }} else +""" + + bad_padding = """ + { + std::cout << "WARNING: Unsupported padding size - skipping forward pass" << std::endl; + } + break;\n +""" + + end = """ + default: + std::cout << "WARNING: Unsupported filter length passed - skipping forward pass" << std::endl; + } + + return {output}; +} +""" + + with open("dynamicconv_cuda_forward.cu", "w") as forward: + forward.write(head) + forward.write(switch) + for k in kernels: + b_size = 32 + for b in blocks: + if b > k: + b_size = b + break + forward.write(case_k.format(k=k)) + for pad in [k // 2, k - 1]: + forward.write(main_block.format(k=k, b_size=b_size, pad=pad)) + forward.write(bad_padding) + forward.write(end) + + +def gen_backward(): + + kernels = [3, 5, 7, 15, 31, 63, 127, 255] + thresh = [512, 512, 512, 512, 512, 380, 256, 256] + min_block = [64, 64, 64, 64, 64, 64, 128, 256] + seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] + + head = """ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "dynamicconv_cuda.cuh" + +std::vector<at::Tensor> dynamicconv_cuda_backward(at::Tensor gradOutput, int padding_l, at::Tensor input, at::Tensor weight) { + + at::DeviceGuard g(input.device()); + const auto minibatch = input.size(0); + const auto numFeatures = input.size(1); + const auto sequenceLength = input.size(2); + + const auto numHeads = weight.size(1); + const auto filterSize = weight.size(2); + + const auto numFiltersInBlock = numFeatures / numHeads; + auto numChunks = 1; + + auto gradInput = at::zeros_like(input); + auto gradWeight = at::zeros_like(weight); + auto stream = at::cuda::getCurrentCUDAStream(); + + dim3 blocks(minibatch, numHeads, numChunks); +""" + + sequence_if = """ + if (sequenceLength < {seq}) {{ + switch(filterSize) {{ +""" + + case_k = """ + case {k}: +""" + + chunks_reset = """ + numChunks = int(ceilf(sequenceLength/float({b_size}))); + blocks = dim3(minibatch, numHeads, numChunks); +""" + + main_block = """ + if (padding_l == {p}) {{ + AT_DISPATCH_FLOATING_TYPES_AND_HALF(gradOutput.scalar_type(), "dynamicconv_backward", ([&] {{ + dynamicconv_backward_kernel<{k}, {b_size}, {p}, scalar_t> + <<<blocks, {b_size}, 0, stream>>>( + gradOutput.data<scalar_t>(), + input.data<scalar_t>(), + weight.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + numHeads, + gradWeight.data<scalar_t>(), + gradInput.data<scalar_t>()); + }})); + }} else +""" + + bad_padding = """ + { + std::cout << "WARNING: Unsupported padding size - skipping backward pass" << std::endl; + } + break;\n +""" + + bad_filter = """ + default: + std::cout << "WARNING: Unsupported filter length passed - skipping backward pass" << std::endl; + } +""" + + con_else = """ + } else +""" + + final_else = """ + { + switch(filterSize) { +""" + + last_return = """ + } + return {gradInput, gradWeight}; +} +""" + + with open("dynamicconv_cuda_backward.cu", "w") as backward: + backward.write(head) + for seq in seqs: + backward.write(sequence_if.format(seq=seq)) + for k, t, m in zip(kernels, thresh, min_block): + backward.write(case_k.format(k=k)) + if seq <= t: + b_size = seq + else: + b_size = m + backward.write(chunks_reset.format(b_size=b_size)) + for p in [k // 2, k - 1]: + backward.write(main_block.format(k=k, b_size=b_size, p=p)) + backward.write(bad_padding) + backward.write(bad_filter) + backward.write(con_else) + backward.write(final_else) + for k, m in zip(kernels, min_block): + backward.write(case_k.format(k=k)) + backward.write(chunks_reset.format(b_size=m)) + for p in [k // 2, k - 1]: + backward.write(main_block.format(k=k, b_size=m, p=p)) + backward.write(bad_padding) + backward.write(bad_filter) + backward.write(last_return) + + +if __name__ == "__main__": + gen_forward() + gen_backward() diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp new file mode 100644 index 0000000..744c363 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp @@ -0,0 +1,51 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <torch/extension.h> +#include <vector> + +std::vector<at::Tensor> +dynamicconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l); + +std::vector<at::Tensor> dynamicconv_cuda_backward( + at::Tensor gradOutput, + int padding_l, + at::Tensor input, + at::Tensor filters); + +#define CHECK_CUDA(x) \ + AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +std::vector<at::Tensor> +dynamicconv_forward(at::Tensor input, at::Tensor filters, int padding_l) { + CHECK_INPUT(input); + CHECK_INPUT(filters); + + return dynamicconv_cuda_forward(input, filters, padding_l); +} + +std::vector<at::Tensor> dynamicconv_backward( + at::Tensor gradOutput, + int padding_l, + at::Tensor input, + at::Tensor filters) { + CHECK_INPUT(gradOutput); + CHECK_INPUT(input); + CHECK_INPUT(filters); + + return dynamicconv_cuda_backward(gradOutput, padding_l, input, filters); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &dynamicconv_forward, "dynamicconv forward (CUDA)"); + m.def("backward", &dynamicconv_backward, "dynamicconv backward (CUDA)"); +} diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh new file mode 100644 index 0000000..44baf21 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh @@ -0,0 +1,50 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <ATen/ATen.h> +#include <c10/cuda/CUDAStream.h> + +#include <cuda.h> +#include <cuda_fp16.h> +#include <cuda_runtime.h> + +#include <algorithm> +#include <functional> +#include <iostream> +#include <stdexcept> +#include <utility> +#include <vector> + +#include <assert.h> +#include <math.h> +#include <stdlib.h> + +#define SHFL_MASK 0xffffffff + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void dynamicconv_forward_kernel( + const scalar_t* input, + const scalar_t* weight, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + scalar_t* output); + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void dynamicconv_backward_kernel( + const scalar_t* gradOutput, // B * C * T + const scalar_t* input, // B * C * T + const scalar_t* weight, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + scalar_t* gradWeight, + scalar_t* gradInput); // B * H * k * T diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu new file mode 100644 index 0000000..4630f1e --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu @@ -0,0 +1,176 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "../cuda_utils.cu" +#include "dynamicconv_cuda.cuh" +#include "dynamicconv_cuda_backward.cu" +#include "dynamicconv_cuda_forward.cu" + +// FS is filter size and kernels are specialized for filter sizes +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void dynamicconv_forward_kernel( + const scalar_t* input, + const scalar_t* weight, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + scalar_t* output) { + assert(blockDim.x == SB); + + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int featureIdx = blockIdx.y; + const int head = featureIdx / numFiltersInBlock; + + const int IOOffset = + batchIdx * numFeatures * sequenceLength + featureIdx * sequenceLength; + const scalar_t* inputFeature = &input[IOOffset]; + scalar_t* outputFeature = &output[IOOffset]; + + scalar_t filter[FS]; + + __shared__ scalar_t tempInput[SB + FS]; + zeroSharedMem<FS, SB, padding_l>(tempInput); + + const int numIterations = divUp<int, int>(sequenceLength, SB); + + for (int i = 0; i < numIterations; ++i) { + __syncthreads(); + const int inputOffset = i * SB; + load_input_to_shared<FS, SB, padding_l>( + inputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + tempInput); + __syncthreads(); + if (inputOffset + tid < sequenceLength) { +#pragma unroll + for (int k = 0; k < FS; ++k) { + const int filterOffset = batchIdx * numHeads * FS * sequenceLength + + head * FS * sequenceLength + k * sequenceLength + i * SB + tid; + filter[k] = weight[filterOffset]; + } + + scalar_t out = scalar_t(0.0); +#pragma unroll + for (int k = 0; k < FS; ++k) { + out += filter[k] * tempInput[tid + k]; + } + + outputFeature[inputOffset + tid] = out; + } + } +} + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void dynamicconv_backward_kernel( + const scalar_t* gradOutput, // B * C * T + const scalar_t* input, // B * C * T + const scalar_t* weight, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + scalar_t* gradWeight, + scalar_t* gradInput) { // B * H * k * T + + assert(blockDim.x == SB); + + // each block operates on a single batch and filter head + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int headIdx = blockIdx.y; + const int chunkIdx = blockIdx.z; + + const int numChunks = divUp<int, int>(sequenceLength, SB); + const int inputOffset = chunkIdx * SB; + + // initialize shared memory for output gradient and input + __shared__ scalar_t tempGradOutput[SB + FS]; + __shared__ scalar_t tempInput[SB + FS]; + const int padding = FS - padding_l - 1; + + zeroSharedMem<FS, SB, padding>(tempGradOutput); + zeroSharedMem<FS, SB, padding_l>(tempInput); + + // initialize local filter and weight gradient sum arrays + scalar_t tempGradSum[FS]; + scalar_t bfilter[FS]; + for (int k = 0; k < FS; ++k) { + tempGradSum[k] = scalar_t(0.0); + + int idxOffset = inputOffset + tid + k - padding; + if (idxOffset >= 0 && idxOffset < sequenceLength) { + int bfilterOffset = batchIdx * numHeads * FS * sequenceLength + + headIdx * FS * sequenceLength + (FS - k - 1) * sequenceLength + + idxOffset; + bfilter[k] = weight[bfilterOffset]; + } else { + bfilter[k] = scalar_t(0.0); + } + } + + // iterate over filter block + for (int featureIdx = 0; featureIdx < numFiltersInBlock; ++featureIdx) { + __syncthreads(); + + // load input and output gradient for this channel and chunk + const int IOOffset = batchIdx * numFeatures * sequenceLength + + (headIdx * numFiltersInBlock + featureIdx) * sequenceLength; + const scalar_t* inputFeature = &input[IOOffset]; + const scalar_t* gradOutputFeature = &gradOutput[IOOffset]; + scalar_t* gradInputFeature = &gradInput[IOOffset]; + + load_input_to_shared<FS, SB, padding>( + gradOutputFeature, + inputOffset, + sequenceLength, + chunkIdx, + numChunks, + true, + tempGradOutput); + load_input_to_shared<FS, SB, padding_l>( + inputFeature, + inputOffset, + sequenceLength, + chunkIdx, + numChunks, + true, + tempInput); + __syncthreads(); + + // sum input and weight gradients + scalar_t out = scalar_t(0.0); +#pragma unroll + for (int k = 0; k < FS; ++k) { + tempGradSum[k] += tempInput[tid + k] * tempGradOutput[tid + padding]; + out += bfilter[k] * tempGradOutput[tid + k]; + } + + if (inputOffset + tid < sequenceLength) { + gradInputFeature[inputOffset + tid] = out; + } + } + + const int gradOffset = + batchIdx * numHeads * FS * sequenceLength + headIdx * FS * sequenceLength; + scalar_t* gradWeightFeature = &gradWeight[gradOffset]; + + // write weight gradient + if (inputOffset + tid < sequenceLength) { + for (int k = 0; k < FS; ++k) { + const int outputOffset = k * sequenceLength + inputOffset + tid; + gradWeightFeature[outputOffset] = tempGradSum[k]; + } + } +} diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py new file mode 100644 index 0000000..711ed03 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py @@ -0,0 +1,227 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import dynamicconv_cuda +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.unfold import unfold1d +from torch import nn +from torch.autograd import Function + + +class dynamicconvFunction(Function): + @staticmethod + def forward(ctx, x, weights, padding_l): + ctx.padding_l = padding_l + outputs = dynamicconv_cuda.forward(x, weights, padding_l) + variables = [x, weights] + ctx.save_for_backward(*variables) + return outputs[0] + + @staticmethod + def backward(ctx, grad_output): + outputs = dynamicconv_cuda.backward( + grad_output.contiguous(), ctx.padding_l, *ctx.saved_tensors + ) + grad_input, grad_weights = outputs + return grad_input, grad_weights, None + + +@with_incremental_state +class DynamicconvLayer(nn.Module): + def __init__( + self, + input_size, + kernel_size=1, + padding_l=None, + weight_softmax=False, + num_heads=1, + weight_dropout=0.0, + bias=False, + renorm_padding=False, + conv_bias=False, + query_size=None, + ): + + super(DynamicconvLayer, self).__init__() + self.input_size = input_size + self.query_size = input_size if query_size is None else query_size + self.kernel_size = kernel_size + self.padding_l = padding_l + self.num_heads = num_heads + self.weight_softmax = weight_softmax + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + self.renorm_padding = renorm_padding + self.bias = bias + + self.weight_linear = nn.Linear(input_size, num_heads * kernel_size, bias) + if conv_bias: + self.conv_bias = nn.Parameter(torch.Tensor(input_size)) + else: + self.conv_bias = None + self.reset_parameters() + + def reset_parameters(self): + nn.init.xavier_uniform_(self.weight_linear.weight) + if self.conv_bias is not None: + nn.init.constant_(self.conv_bias, 0.0) + nn.init.constant_(self.weight_linaer.bias, 0.0) + + def forward(self, x, incremental_state=None, query=None, unfold=None): + + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + # R = C // H + + # during inference time, incremental BMM is faster + if incremental_state is not None: + unfold = ( + x.size(0) > 512 if unfold is None else unfold + ) # use unfold mode as default for long sequence to save memory + unfold = unfold or (incremental_state is not None) + assert query is None + + if query is None: + query = x + if unfold: + output = self._forward_unfolded(x, incremental_state, query) + else: + output = self._forward_expanded(x, incremental_state, query) + + if self.conv_bias is not None: + output = output + self.conv_bias.view(1, 1, -1) + + return output + + # during training time, use CUDA kernel + else: + weight = self.weight_linear(x).view(T, B, H, K) + if self.weight_softmax: + weight = F.softmax(weight, dim=-1) + if self.weight_dropout_module.p: + weight = self.weight_dropout_module(weight) + + weight = weight.permute(1, 2, 3, 0).contiguous() + self.filters = weight + x = x.permute(1, 2, 0).contiguous() + output = dynamicconvFunction.apply(x, weight, self.padding_l).permute( + 2, 0, 1 + ) + if self.conv_bias is not None: + output = output + self.conv_bias.view(1, 1, -1) + return output + + def reorder_incremental_state(self, incremental_state, new_order): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return utils.get_incremental_state(self, incremental_state, "input_buffer") + + def _set_input_buffer(self, incremental_state, new_buffer): + return utils.set_incremental_state( + self, incremental_state, "input_buffer", new_buffer + ) + + def _forward_unfolded(self, x, incremental_state, query): + """The conventional implementation of convolutions. + Unfolding the input by having a window shifting to the right.""" + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + + weight = self.weight_linear(query).view(T * B * H, -1) + + # renorm_padding is only implemented in _forward_expanded + assert not self.renorm_padding or incremental_state is not None + + if incremental_state is not None: + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is None: + input_buffer = x.new() + x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3) + if self.kernel_size > 1: + self._set_input_buffer( + incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :] + ) + x_unfold = x_unfold.view(T * B * H, R, -1) + else: + padding_l = self.padding_l + if K > T and padding_l == K - 1: + weight = weight.narrow(1, K - T, T) + K, padding_l = T, T - 1 + # unfold the input: T x B x C --> T' x B x C x K + x_unfold = unfold1d(x, K, padding_l, 0) + x_unfold = x_unfold.view(T * B * H, R, K) + + if self.weight_softmax and not self.renorm_padding: + weight = F.softmax(weight, dim=1) + weight = weight.narrow(1, 0, K) + + if incremental_state is not None: + weight = weight[:, -x_unfold.size(2) :] + K = weight.size(1) + + if self.weight_softmax and self.renorm_padding: + weight = F.softmax(weight, dim=1) + + weight = self.weight_dropout_module(weight, inplace=False) + + output = torch.bmm(x_unfold, weight.unsqueeze(2)) # T*B*H x R x 1 + output = output.view(T, B, C) + return output + + def _forward_expanded(self, x, incremental_stat, query): + """Turn the convolution filters into band matrices and do matrix multiplication. + This is faster when the sequence is short, but less memory efficient. + This is not used in the decoder during inference. + """ + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + weight = self.weight_linear(query).view(T * B * H, -1) + + if not self.renorm_padding: + if self.weight_softmax: + weight = F.softmax(weight, dim=1) + weight = self.weight_dropout_module(weight, inplace=False) + weight = weight.narrow(1, 0, K).contiguous() + weight = weight.view(T, B * H, K).transpose(0, 1) + + x = x.view(T, B * H, R).transpose(0, 1) + if self.weight_softmax and self.renorm_padding: + # turn the convolution filters into band matrices + weight_expanded = weight.new(B * H, T, T + K - 1).fill_(float("-inf")) + weight_expanded.as_strided( + (B * H, T, K), (T * (T + K - 1), T + K, 1) + ).copy_(weight) + weight_expanded = weight_expanded.narrow(2, self.padding_l, T) + # normalize the weight over valid positions like self-attention + weight_expanded = F.softmax(weight_expanded, dim=2) + weight_expanded = self.weight_dropout_module(weight_expanded, inplace=False) + else: + P = self.padding_l + # For efficiency, we cut the kernel size and reduce the padding when the kernel is larger than the length + if K > T and P == K - 1: + weight = weight.narrow(2, K - T, T) + K, P = T, T - 1 + # turn the convolution filters into band matrices + weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False) + weight_expanded.as_strided( + (B * H, T, K), (T * (T + K - 1), T + K, 1) + ).copy_(weight) + weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T + output = torch.bmm(weight_expanded, x) + output = output.transpose(0, 1).contiguous().view(T, B, C) + return output diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp b/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp new file mode 100644 index 0000000..d7e57c8 --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp @@ -0,0 +1,29 @@ +#include <torch/torch.h> +#include <vector> + +std::vector<float*> +dynamicconv_cpu_forward(float* input, float* filters, int padding_l); + +std::vector<float*> dynamicconv_cpu_backward( + float* gradOutput, + int padding_l, + float* input, + float* filters); + +std::vector<float*> +dynamicconv_forward(float* input, float* filters, int padding_l) { + return dynamicconv_cpu_forward(input, filters, padding_l); +} + +std::vector<float*> dynamicconv_backward( + float* gradOutput, + int padding_l, + float* input, + float* filters) { + return dynamicconv_cpu_backward(gradOutput, padding_l, input, filters); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &dynamicconv_forward, "dynamicconv forward (CPU)"); + m.def("backward", &dynamicconv_backward, "dynamicconv backward (CPU)"); +} diff --git a/fairseq/fairseq/modules/dynamicconv_layer/setup.py b/fairseq/fairseq/modules/dynamicconv_layer/setup.py new file mode 100644 index 0000000..6a21f7e --- /dev/null +++ b/fairseq/fairseq/modules/dynamicconv_layer/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + + +setup( + name="dynamicconv_layer", + ext_modules=[ + CUDAExtension( + name="dynamicconv_cuda", + sources=[ + "dynamicconv_cuda.cpp", + "dynamicconv_cuda_kernel.cu", + ], + ), + ], + cmdclass={"build_ext": BuildExtension}, +) diff --git a/fairseq/fairseq/modules/ema_module.py b/fairseq/fairseq/modules/ema_module.py new file mode 100644 index 0000000..f0ece84 --- /dev/null +++ b/fairseq/fairseq/modules/ema_module.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +""" +Used for EMA tracking a given pytorch module. The user is responsible for calling step() +and setting the appropriate decay +""" + +import copy +from dataclasses import dataclass, field +import logging + +import torch + +from omegaconf import II +from fairseq.dataclass import FairseqDataclass + +try: + from amp_C import multi_tensor_l2norm + + multi_tensor_l2norm_available = True +except ImportError: + multi_tensor_l2norm_available = False + +logger = logging.getLogger(__name__) + + +@dataclass +class EMAModuleConfig(FairseqDataclass): + ema_decay: float = field( + default=0.9999, metadata={"help": "decay for exponential moving average model"} + ) + ema_fp32: bool = field( + default=False, + metadata={"help": "If true, store EMA model in fp32 even if model is in fp16"}, + ) + add_missing_params: bool = True + log_norms: bool = False + + +class EMAModule: + """Exponential Moving Average of Fairseq Models""" + + def __init__( + self, + model, + config: EMAModuleConfig, + copy_model=True, + device=None, + skip_keys=None, + ): + """ + @param model model to initialize the EMA with + @param config EMAConfig object with configuration like + ema_decay, ema_update_freq, ema_fp32 + @param device If provided, copy EMA to this device (e.g. gpu). + Otherwise EMA is in the same device as the model. + """ + + self.config = config + + if copy_model: + self.model = copy.deepcopy(model) + self.model.requires_grad_(False) + else: + self.model = model + + self.config = config + self.decay = config.ema_decay + self.skip_keys = skip_keys or set() + self.add_missing_params = config.add_missing_params + self.fp32_params = {} + + if device is not None: + logging.info(f"Copying EMA model to device {device}") + self.model = self.model.to(device=device) + + if self.config.ema_fp32: + self.build_fp32_params() + + self.log_norms = config.log_norms and multi_tensor_l2norm_available + self.logs = {} + + def build_fp32_params(self, state_dict=None): + """ + Store a copy of the EMA params in fp32. + If state dict is passed, the EMA params is copied from + the provided state dict. Otherwise, it is copied from the + current EMA model parameters. + """ + if not self.config.ema_fp32: + raise RuntimeError( + "build_fp32_params should not be called if ema_fp32=False. " + "Use ema_fp32=True if this is really intended." + ) + + if state_dict is None: + state_dict = self.model.state_dict() + + def _to_float(t): + return t.float() if torch.is_floating_point(t) else t + + for param_key in state_dict: + if param_key in self.fp32_params: + if param_key == "__sq_mom": + self.fp32_params[param_key] = state_dict[param_key] + else: + self.fp32_params[param_key].copy_(state_dict[param_key]) + else: + self.fp32_params[param_key] = _to_float(state_dict[param_key]) + if "__sq_mom" in self.fp32_params: + self.fp32_params["__sq_mom"][param_key] = torch.zeros_like( + self.fp32_params[param_key] + ) + + def restore(self, state_dict, build_fp32_params=False): + """Load data from a model spec into EMA model""" + self.model.load_state_dict(state_dict, strict=False) + if build_fp32_params: + self.build_fp32_params(state_dict) + + def set_decay(self, decay, weight_decay=None): + self.decay = decay + if weight_decay is not None: + self.weight_decay = weight_decay + + def get_decay(self): + return self.decay + + def _step_internal(self, new_model): + """One update of the EMA model based on new model weights""" + decay = self.decay + + ema_state_dict = {} + ema_params = ( + self.fp32_params if self.config.ema_fp32 else self.model.state_dict() + ) + + new_p = [] + ema_p = [] + + for key, param in new_model.named_parameters(): + if isinstance(param, dict): + continue + + if not self.add_missing_params and key not in ema_params: + continue + + try: + ema_param = ema_params[key] + except KeyError: + ema_param = ( + param.float().clone() if param.ndim == 1 else copy.deepcopy(param) + ) + ema_params[key] = ema_param + + if param.shape != ema_param.shape: + raise ValueError( + "incompatible tensor shapes between model param and ema param" + + "{} vs. {}".format(param.shape, ema_param.shape) + ) + + if "version" in key: + # Do not decay a model.version pytorch param + continue + + lr = 1 - decay + + if key in self.skip_keys or not param.requires_grad: + ema_params[key].copy_(param.to(dtype=ema_param.dtype).data) + ema_param = ema_params[key] + else: + if self.log_norms: + new_p.append(param) + ema_p.append(ema_param) + + ema_param.mul_(1 - lr) + ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=lr) + + ema_state_dict[key] = ema_param + + for key, param in new_model.named_buffers(): + ema_state_dict[key] = param + + if self.log_norms: + if "model_norm" in self.logs: + self.prev_model_norm = self.logs["model_norm"] + + chunk_size = 2048 * 32 + has_inf = torch.zeros( + (1, 1), dtype=torch.int, device=next(new_model.parameters()).device + ) + + new_norm = multi_tensor_l2norm(chunk_size, has_inf, [new_p], False) + old_norm = multi_tensor_l2norm(chunk_size, has_inf, [ema_p], False) + + self.logs["model_norm"] = new_norm[0] + self.logs["ema_norm"] = old_norm[0] + + self.restore(ema_state_dict, build_fp32_params=False) + + @torch.no_grad() + def step(self, new_model): + self._step_internal(new_model) + + def reverse(self, model): + """ + Load the model parameters from EMA model. + Useful for inference or fine-tuning from the EMA model. + """ + d = self.model.state_dict() + if "_ema" in d: + del d["_ema"] + + model.load_state_dict(d, strict=False) + return model diff --git a/fairseq/fairseq/modules/espnet_multihead_attention.py b/fairseq/fairseq/modules/espnet_multihead_attention.py new file mode 100644 index 0000000..82bc0d7 --- /dev/null +++ b/fairseq/fairseq/modules/espnet_multihead_attention.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Multi-Head Attention layer definition.""" + +import math + +import torch +from torch import nn + +from fairseq.modules.rotary_positional_embedding import ( + RotaryPositionalEmbedding, + apply_rotary_pos_emb, +) + + +class ESPNETMultiHeadedAttention(nn.Module): + """Multi-Head Attention layer. + Args: + n_head: The number of heads. + n_feat: The number of features. + dropout: Dropout rate. + """ + + def __init__(self, n_feat, n_head, dropout): + """Construct an MultiHeadedAttention object.""" + super(ESPNETMultiHeadedAttention, self).__init__() + assert n_feat % n_head == 0 + # We assume d_v always equals d_k + self.d_k = n_feat // n_head + self.h = n_head + self.linear_q = nn.Linear(n_feat, n_feat) + self.linear_k = nn.Linear(n_feat, n_feat) + self.linear_v = nn.Linear(n_feat, n_feat) + self.linear_out = nn.Linear(n_feat, n_feat) + self.attn = None + self.dropout = nn.Dropout(p=dropout) + + def forward_qkv(self, query, key, value, **kwargs): + """Transform query, key and value. + Args: + query: Query tensor B X T1 X C + key: Key tensor B X T2 X C + value: Value tensor B X T2 X C + Returns: + torch.Tensor: Transformed query tensor B X n_head X T1 X d_k + torch.Tensor: Transformed key tensor B X n_head X T2 X d_k + torch.Tensor: Transformed value tensor B X n_head X T2 X d_k + """ + n_batch = query.size(0) + q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k) + k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k) + v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k) + q = q.transpose(1, 2) # (batch, head, time1, d_k) + k = k.transpose(1, 2) # (batch, head, time2, d_k) + v = v.transpose(1, 2) # (batch, head, time2, d_k) + return q, k, v + + def forward_attention(self, value, scores, mask): + """Compute attention context vector. + Args: + value: Transformed value B X n_head X T2 X d_k. + scores: Attention score B X n_head X T1 X T2 + mask: Mask T2 X B + Returns: + torch.Tensor: Transformed value B X T1 X d_model + weighted by the attention score B X T1 X T2 + """ + n_batch = value.size(0) + if mask is not None: + scores = scores.masked_fill( + mask.unsqueeze(1).unsqueeze(2).to(bool), + float("-inf"), # (batch, head, time1, time2) + ) + self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) + + else: + self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) + p_attn = self.dropout(self.attn) + x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) + x = ( + x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) + ) # (batch, time1, d_model) + + return self.linear_out(x) # (batch, time1, d_model) + + def forward(self, query, key, value, key_padding_mask=None, **kwargs): + """Compute scaled dot product attention. + Args: + query (torch.Tensor): Query tensor T X B X C + key (torch.Tensor): Key tensor T X B X C + value (torch.Tensor): Value tensor T X B X C + mask (torch.Tensor): Mask tensor T X B + Returns: + torch.Tensor: Output tensor T X B X D. + """ + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + q, k, v = self.forward_qkv(query, key, value) + scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k) + scores = self.forward_attention(v, scores, key_padding_mask) + scores = scores.transpose(0, 1) + return scores, None + + +class RelPositionMultiHeadedAttention(ESPNETMultiHeadedAttention): + """Multi-Head Attention layer with relative position encoding. + Paper: https://arxiv.org/abs/1901.02860 + Args: + n_head: The number of heads. + n_feat: The number of features. + dropout: Dropout rate. + zero_triu: Whether to zero the upper triangular part of attention matrix. + """ + + def __init__(self, n_feat, n_head, dropout, zero_triu=False): + """Construct an RelPositionMultiHeadedAttention object.""" + super().__init__(n_feat, n_head, dropout) + self.zero_triu = zero_triu + # linear transformation for positional encoding + self.linear_pos = nn.Linear(n_feat, n_feat, bias=False) + # these two learnable bias are used in matrix c and matrix d + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + self.pos_bias_u = nn.Parameter(torch.zeros(self.h, self.d_k)) + self.pos_bias_v = nn.Parameter(torch.zeros(self.h, self.d_k)) + torch.nn.init.xavier_uniform_(self.pos_bias_u) + torch.nn.init.xavier_uniform_(self.pos_bias_v) + + def rel_shift(self, x): + """Compute relative positional encoding. + Args: + x: Input tensor B X n_head X T X 2T-1 + Returns: + torch.Tensor: Output tensor. + """ + zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype) + x_padded = torch.cat([zero_pad, x], dim=-1) + + x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2)) + x = x_padded[:, :, 1:].view_as(x)[ + :, :, :, : x.size(-1) // 2 + 1 + ] # only keep the positions from 0 to time2 + + if self.zero_triu: + ones = torch.ones((x.size(2), x.size(3)), device=x.device) + x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :] + + return x + + def forward(self, query, key, value, pos_emb, key_padding_mask=None, **kwargs): + """Compute scaled dot product attention. + Args: + query: Query tensor T X B X C + key: Key tensor T X B X C + value: Value tensor T X B X C + pos_emb: Positional embedding tensor B X 2T-1 X C + key_padding_mask: Mask tensor T X B + Returns: + torch.Tensor: Output tensor T X B X C. + """ + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + pos_emb = pos_emb.transpose(0, 1) + q, k, v = self.forward_qkv(query, key, value) + q = q.transpose(1, 2) # (batch, time1, head, d_k) + n_batch_pos = pos_emb.size(0) + p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k) + p = p.transpose(1, 2) # (batch, head, 2*time1-1, d_k) + + # (batch, head, time1, d_k) + q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2) + # (batch, head, time1, d_k) + q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2) + + # compute attention score + # first compute matrix a and matrix c + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + # (batch, head, time1, time2) + matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1)) + + # compute matrix b and matrix d + # (batch, head, time1, 2*time1-1) + matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1)) + matrix_bd = self.rel_shift(matrix_bd) + + scores = (matrix_ac + matrix_bd) / math.sqrt( + self.d_k + ) # (batch, head, time1, time2) + + scores = self.forward_attention(v, scores, key_padding_mask) + scores = scores.transpose(0, 1) + return scores, None + + +class RotaryPositionMultiHeadedAttention(ESPNETMultiHeadedAttention): + def __init__( + self, + n_feat, + n_head, + dropout, + precision, + rotary_emd_base=10000, + ): + """Construct an RotaryPositionMultiHeadedAttention object.""" + super().__init__(n_feat, n_head, dropout) + precision = torch.float + self.rotary_ndims = self.d_k # also try self.d_k//2 + if precision == "fp16": + precision = torch.half + + self.rotary_emb = RotaryPositionalEmbedding( + self.rotary_ndims, base=rotary_emd_base, precision=precision + ) + + def forward(self, query, key, value, key_padding_mask=None, **kwargs): + """Compute rotary position attention. + Args: + query: Query tensor T X B X C + key: Key tensor T X B X C + value: Value tensor T X B X C + key_padding_mask: Mask tensor T X B + Returns: + torch.Tensor: Output tensor T X B X D. + Notes: + Assumes self attn + """ + + T, B, C = value.size() + query = query.view(T, B, self.h, self.d_k) + key = key.view(T, B, self.h, self.d_k) + value = value.view(T, B, self.h, self.d_k) + cos, sin = self.rotary_emb(value, seq_len=T) + query, key = apply_rotary_pos_emb( + query, key, cos, sin, offset=0 + ) # offset is based on layer_past + + query = query.view(T, B, self.h * self.d_k) + key = key.view(T, B, self.h * self.d_k) + value = value.view(T, B, self.h * self.d_k) + + # TBD to BTD + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + q, k, v = self.forward_qkv(query, key, value) + scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k) + scores = self.forward_attention(v, scores, key_padding_mask) + scores = scores.transpose(0, 1) + return scores, None diff --git a/fairseq/fairseq/modules/fairseq_dropout.py b/fairseq/fairseq/modules/fairseq_dropout.py new file mode 100644 index 0000000..3cddca7 --- /dev/null +++ b/fairseq/fairseq/modules/fairseq_dropout.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import List, Optional + +import torch.nn as nn +import torch.nn.functional as F + + +logger = logging.getLogger(__name__) + + +class FairseqDropout(nn.Module): + def __init__(self, p, module_name=None): + super().__init__() + self.p = p + self.module_name = module_name + self.apply_during_inference = False + + def forward(self, x, inplace: bool = False): + if self.p > 0 and (self.training or self.apply_during_inference): + return F.dropout(x, p=self.p, training=True, inplace=inplace) + else: + return x + + def make_generation_fast_( + self, + name: str, + retain_dropout: bool = False, + retain_dropout_modules: Optional[List[str]] = None, + **kwargs + ): + if retain_dropout: + if retain_dropout_modules is not None and self.module_name is None: + logger.warning( + "Cannot enable dropout during inference for module {} " + "because module_name was not set".format(name) + ) + elif ( + retain_dropout_modules is None # if None, apply to all modules + or self.module_name in retain_dropout_modules + ): + logger.info( + "Enabling dropout during inference for module: {}".format(name) + ) + self.apply_during_inference = True + else: + logger.info("Disabling dropout for module: {}".format(name)) diff --git a/fairseq/fairseq/modules/fp32_batch_norm.py b/fairseq/fairseq/modules/fp32_batch_norm.py new file mode 100644 index 0000000..c560f33 --- /dev/null +++ b/fairseq/fairseq/modules/fp32_batch_norm.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +batch norm done in fp32 (for fp16 training) +""" +import torch +import torch.nn as nn + + +class Fp32BatchNorm(nn.Module): + def __init__(self, sync=False, *args, **kwargs): + super().__init__() + + if sync: + from fairseq.distributed import utils + + if utils.get_global_world_size() == 1: + sync = False + + if sync: + self.bn = nn.SyncBatchNorm(*args, **kwargs) + else: + self.bn = nn.BatchNorm1d(*args, **kwargs) + + self.sync = sync + + def forward(self, input): + if self.bn.running_mean.dtype != torch.float: + if self.sync: + self.bn.running_mean = self.bn.running_mean.float() + self.bn.running_var = self.bn.running_var.float() + if self.bn.affine: + try: + self.bn.weight = self.bn.weight.float() + self.bn.bias = self.bn.bias.float() + except: + self.bn.float() + else: + self.bn.float() + + output = self.bn(input.float()) + return output.type_as(input) diff --git a/fairseq/fairseq/modules/fp32_group_norm.py b/fairseq/fairseq/modules/fp32_group_norm.py new file mode 100644 index 0000000..d03aac0 --- /dev/null +++ b/fairseq/fairseq/modules/fp32_group_norm.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Layer norm done in fp32 (for fp16 training) +""" + +import torch.nn as nn +import torch.nn.functional as F + + +class Fp32GroupNorm(nn.GroupNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.group_norm( + input.float(), + self.num_groups, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) diff --git a/fairseq/fairseq/modules/fp32_instance_norm.py b/fairseq/fairseq/modules/fp32_instance_norm.py new file mode 100644 index 0000000..30a5449 --- /dev/null +++ b/fairseq/fairseq/modules/fp32_instance_norm.py @@ -0,0 +1,35 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Layer norm done in fp32 (for fp16 training) +""" + +import torch.nn as nn +import torch.nn.functional as F + + +class Fp32InstanceNorm(nn.InstanceNorm1d): + def __init__(self, *args, **kwargs): + self.transpose_last = "transpose_last" in kwargs and kwargs["transpose_last"] + if "transpose_last" in kwargs: + del kwargs["transpose_last"] + super().__init__(*args, **kwargs) + + def forward(self, input): + if self.transpose_last: + input = input.transpose(1, 2) + output = F.instance_norm( + input.float(), + running_mean=self.running_mean, + running_var=self.running_var, + weight=self.weight.float() if self.weight is not None else None, + bias=self.bias.float() if self.bias is not None else None, + use_input_stats=self.training or not self.track_running_stats, + momentum=self.momentum, + eps=self.eps, + ) + if self.transpose_last: + output = output.transpose(1, 2) + return output.type_as(input) diff --git a/fairseq/fairseq/modules/gelu.py b/fairseq/fairseq/modules/gelu.py new file mode 100644 index 0000000..a2f1ecf --- /dev/null +++ b/fairseq/fairseq/modules/gelu.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with +the corresponding GitHub repo: https://github.com/hendrycks/GELUs +""" + +import math + +import torch +import torch.nn as nn + + +def gelu_accurate(x): + if not hasattr(gelu_accurate, "_a"): + gelu_accurate._a = math.sqrt(2 / math.pi) + return ( + 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) + ) + + +def gelu(x: torch.Tensor) -> torch.Tensor: + return torch.nn.functional.gelu(x.float()).type_as(x) diff --git a/fairseq/fairseq/modules/grad_multiply.py b/fairseq/fairseq/modules/grad_multiply.py new file mode 100644 index 0000000..08d15f5 --- /dev/null +++ b/fairseq/fairseq/modules/grad_multiply.py @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + + +class GradMultiply(torch.autograd.Function): + @staticmethod + def forward(ctx, x, scale): + ctx.scale = scale + res = x.new(x) + return res + + @staticmethod + def backward(ctx, grad): + return grad * ctx.scale, None diff --git a/fairseq/fairseq/modules/gumbel_vector_quantizer.py b/fairseq/fairseq/modules/gumbel_vector_quantizer.py new file mode 100644 index 0000000..867b019 --- /dev/null +++ b/fairseq/fairseq/modules/gumbel_vector_quantizer.py @@ -0,0 +1,212 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class GumbelVectorQuantizer(nn.Module): + def __init__( + self, + dim, + num_vars, + temp, + groups, + combine_groups, + vq_dim, + time_first, + activation=nn.GELU(), + weight_proj_depth=1, + weight_proj_factor=1, + hard=True, + std=0, + ): + """Vector quantization using gumbel softmax + + Args: + dim: input dimension (channels) + num_vars: number of quantized vectors per group + temp: temperature for training. this should be a tuple of 3 elements: (start, stop, decay factor) + groups: number of groups for vector quantization + combine_groups: whether to use the vectors for all groups + vq_dim: dimensionality of the resulting quantized vector + time_first: if true, expect input in BxTxC format, otherwise in BxCxT + activation: what activation to use (should be a module). this is only used if weight_proj_depth is > 1 + weight_proj_depth: number of layers (with activation in between) to project input before computing logits + weight_proj_factor: this is used only if weight_proj_depth is > 1. scales the inner dimensionality of + projections by this factor + """ + super().__init__() + + self.groups = groups + self.combine_groups = combine_groups + self.input_dim = dim + self.num_vars = num_vars + self.time_first = time_first + self.hard = hard + + assert ( + vq_dim % groups == 0 + ), f"dim {vq_dim} must be divisible by groups {groups} for concatenation" + + var_dim = vq_dim // groups + num_groups = groups if not combine_groups else 1 + + self.vars = nn.Parameter(torch.FloatTensor(1, num_groups * num_vars, var_dim)) + if std == 0: + nn.init.uniform_(self.vars) + else: + nn.init.normal_(self.vars, mean=0, std=std) + + if weight_proj_depth > 1: + + def block(input_dim, output_dim): + return nn.Sequential(nn.Linear(input_dim, output_dim), activation) + + inner_dim = self.input_dim * weight_proj_factor + self.weight_proj = nn.Sequential( + *[ + block(self.input_dim if i == 0 else inner_dim, inner_dim) + for i in range(weight_proj_depth - 1) + ], + nn.Linear(inner_dim, groups * num_vars), + ) + else: + self.weight_proj = nn.Linear(self.input_dim, groups * num_vars) + nn.init.normal_(self.weight_proj.weight, mean=0, std=1) + nn.init.zeros_(self.weight_proj.bias) + + if isinstance(temp, str): + import ast + + temp = ast.literal_eval(temp) + assert len(temp) == 3, f"{temp}, {len(temp)}" + + self.max_temp, self.min_temp, self.temp_decay = temp + self.curr_temp = self.max_temp + self.codebook_indices = None + + def set_num_updates(self, num_updates): + self.curr_temp = max( + self.max_temp * self.temp_decay**num_updates, self.min_temp + ) + + def get_codebook_indices(self): + if self.codebook_indices is None: + from itertools import product + + p = [range(self.num_vars)] * self.groups + inds = list(product(*p)) + self.codebook_indices = torch.tensor( + inds, dtype=torch.long, device=self.vars.device + ).flatten() + + if not self.combine_groups: + self.codebook_indices = self.codebook_indices.view( + self.num_vars**self.groups, -1 + ) + for b in range(1, self.groups): + self.codebook_indices[:, b] += self.num_vars * b + self.codebook_indices = self.codebook_indices.flatten() + return self.codebook_indices + + def codebook(self): + indices = self.get_codebook_indices() + return ( + self.vars.squeeze(0) + .index_select(0, indices) + .view(self.num_vars**self.groups, -1) + ) + + def sample_from_codebook(self, b, n): + indices = self.get_codebook_indices() + indices = indices.view(-1, self.groups) + cb_size = indices.size(0) + assert ( + n < cb_size + ), f"sample size {n} is greater than size of codebook {cb_size}" + sample_idx = torch.randint(low=0, high=cb_size, size=(b * n,)) + indices = indices[sample_idx] + + z = self.vars.squeeze(0).index_select(0, indices.flatten()).view(b, n, -1) + return z + + def to_codebook_index(self, indices): + res = indices.new_full(indices.shape[:-1], 0) + for i in range(self.groups): + exponent = self.groups - i - 1 + res += indices[..., i] * (self.num_vars**exponent) + return res + + def forward_idx(self, x): + res = self.forward(x, produce_targets=True) + return res["x"], res["targets"] + + def forward(self, x, produce_targets=False): + + result = {"num_vars": self.num_vars * self.groups} + + if not self.time_first: + x = x.transpose(1, 2) + + bsz, tsz, fsz = x.shape + x = x.reshape(-1, fsz) + x = self.weight_proj(x) + x = x.view(bsz * tsz * self.groups, -1) + + with torch.no_grad(): + _, k = x.max(-1) + hard_x = ( + x.new_zeros(*x.shape) + .scatter_(-1, k.view(-1, 1), 1.0) + .view(bsz * tsz, self.groups, -1) + ) + hard_probs = torch.mean(hard_x.float(), dim=0) + result["code_perplexity"] = torch.exp( + -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1) + ).sum() + + avg_probs = torch.softmax( + x.view(bsz * tsz, self.groups, -1).float(), dim=-1 + ).mean(dim=0) + result["prob_perplexity"] = torch.exp( + -torch.sum(avg_probs * torch.log(avg_probs + 1e-7), dim=-1) + ).sum() + + result["temp"] = self.curr_temp + + if self.training: + x = F.gumbel_softmax(x.float(), tau=self.curr_temp, hard=self.hard).type_as( + x + ) + else: + x = hard_x + + x = x.view(bsz * tsz, -1) + + vars = self.vars + if self.combine_groups: + vars = vars.repeat(1, self.groups, 1) + + if produce_targets: + result["targets"] = ( + x.view(bsz * tsz * self.groups, -1) + .argmax(dim=-1) + .view(bsz, tsz, self.groups) + .detach() + ) + + x = x.unsqueeze(-1) * vars + x = x.view(bsz * tsz, self.groups, self.num_vars, -1) + x = x.sum(-2) + x = x.view(bsz, tsz, -1) + + if not self.time_first: + x = x.transpose(1, 2) # BTC -> BCT + + result["x"] = x + + return result diff --git a/fairseq/fairseq/modules/kmeans_attention.py b/fairseq/fairseq/modules/kmeans_attention.py new file mode 100644 index 0000000..0088d1e --- /dev/null +++ b/fairseq/fairseq/modules/kmeans_attention.py @@ -0,0 +1,744 @@ +import math +from functools import reduce, wraps +from inspect import isfunction +from operator import mul + +import torch +import torch.nn as nn +import torch.nn.functional as F +from aml.multimodal_video.utils.einops.lib import rearrange, repeat +from aml.multimodal_video.utils.einops.lib.layers.torch import Rearrange + +from fairseq.modules.local_attention import LocalAttention + +# constants + +TOKEN_SELF_ATTN_VALUE = -5e4 +KMEAN_INIT_ITERS = 10 + +# helper functions + + +def exists(val): + return val is not None + + +def identity(x, *args, **kwargs): + return x + + +def default(x, d): + if not exists(x): + return d if not isfunction(d) else d() + return x + + +def cast_tuple(x): + return x if isinstance(x, tuple) else (x,) + + +def cache_fn(f): + cache = None + + @wraps(f) + def cached_fn(*args, **kwargs): + nonlocal cache + if exists(cache): + return cache + cache = f(*args, **kwargs) + return cache + + return cached_fn + + +def to(t): + return {"device": t.device, "dtype": t.dtype} + + +def find_modules(nn_module, type): + return [module for module in nn_module.modules() if isinstance(module, type)] + + +def is_empty(t): + return t.nelement() == 0 + + +def max_neg_value(tensor): + return -torch.finfo(tensor.dtype).max + + +def batched_index_select(values, indices): + last_dim = values.shape[-1] + return values.gather(2, expand_dim(indices, -1, last_dim)) + + +def merge_dims(ind_from, ind_to, tensor): + shape = list(tensor.shape) + arr_slice = slice(ind_from, ind_to + 1) + shape[arr_slice] = [reduce(mul, shape[arr_slice])] + return tensor.reshape(*shape) + + +def expand_dim(t, dim, k): + t = t.unsqueeze(dim) + expand_shape = [-1] * len(t.shape) + expand_shape[dim] = k + return t.expand(*expand_shape) + + +def scatter_mean(src, t, index, dim, eps=1e-5): + numer = src.scatter_add(dim, index, t) + denom = src.scatter_add(dim, index, torch.ones_like(t)) + return numer / (denom + eps) + + +def split_at_index(dim, index, t): + pre_slices = (slice(None),) * dim + l = (*pre_slices, slice(None, index)) + r = (*pre_slices, slice(index, None)) + return t[l], t[r] + + +def reshape_dim(t, dim, split_dims): + shape = list(t.shape) + num_dims = len(shape) + dim = (dim + num_dims) % num_dims + shape[dim : dim + 1] = split_dims + return t.reshape(shape) + + +def ema(old, new, decay): + if not exists(old): + return new + return old * decay + new * (1 - decay) + + +def ema_inplace(moving_avg, new, decay): + if is_empty(moving_avg): + moving_avg.data.copy_(new) + return + moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay)) + + +# helper classes + + +def map_first_tuple_or_el(x, fn): + if isinstance(x, tuple): + return (fn(x[0]),) + x[1:] + return fn(x) + + +class Chunk(nn.Module): + def __init__(self, chunks, fn, along_dim=-1): + super().__init__() + self.dim = along_dim + self.chunks = chunks + self.fn = fn + + def forward(self, x, **kwargs): + if self.chunks <= 1: + return self.fn(x, **kwargs) + chunks = x.chunk(self.chunks, dim=self.dim) + return torch.cat([self.fn(c, **kwargs) for c in chunks], dim=self.dim) + + +class PreNorm(nn.ModuleList): + def __init__(self, norm_class, dim, fn): + super().__init__() + self.norm = norm_class(dim) + self.fn = fn + + def forward(self, x, **kwargs): + x = self.norm(x) + return self.fn(x, **kwargs) + + +class ReZero(nn.Module): + def __init__(self, fn): + super().__init__() + self.residual_weight = nn.Parameter(torch.zeros(1)) + self.fn = fn + + def forward(self, x, **kwargs): + x = self.fn(x, **kwargs) + return map_first_tuple_or_el(x, lambda t: t * self.residual_weight) + + +class ScaleNorm(nn.Module): + def __init__(self, dim, eps=1e-5): + super().__init__() + self.g = nn.Parameter(torch.ones(1)) + self.eps = eps + + def forward(self, x): + def norm(t): + n = torch.norm(t, dim=-1, keepdim=True).clamp(min=self.eps) + return t / n * self.g + + return map_first_tuple_or_el(x, norm) + + +class ProjectInOut(nn.Module): + def __init__(self, fn, dim_in, dim_out, project_out=True): + super().__init__() + self.fn = fn + self.project_in = nn.Linear(dim_in, dim_out) + self.project_out = nn.Linear(dim_out, dim_in) if project_out else identity + + def forward(self, x, **kwargs): + x = self.project_in(x) + x, loss = self.fn(x, **kwargs) + x = self.project_out(x) + return x, loss + + +class MatrixMultiply(nn.Module): + def __init__(self, tensor, transpose=False): + super().__init__() + self.tensor = tensor + self.transpose = transpose + + def forward(self, x): + tensor = self.tensor + if self.transpose: + tensor = tensor.t() + return x @ tensor + + +# positional embeddings + + +class DepthWiseConv1d(nn.Module): + def __init__(self, dim_in, dim_out, kernel_size, stride=1, bias=True, causal=False): + super().__init__() + self.padding = ( + ((kernel_size - 1), 0) if causal else (kernel_size // 2, kernel_size // 2) + ) + + self.net = nn.Sequential( + nn.Conv1d( + dim_in, + dim_in, + kernel_size=kernel_size, + groups=dim_in, + stride=stride, + bias=bias, + ), + nn.Conv1d(dim_in, dim_out, 1, bias=bias), + ) + + def forward(self, x): + x = F.pad(x, self.padding, value=0.0) + return self.net(x) + + +class FixedPositionalEmbedding(nn.Module): + def __init__(self, dim, max_seq_len): + super().__init__() + inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim)) + position = torch.arange(0, max_seq_len, dtype=torch.float) + sinusoid_inp = torch.einsum("i,j->ij", position, inv_freq) + emb = torch.cat((sinusoid_inp.sin(), sinusoid_inp.cos()), dim=-1) + self.register_buffer("emb", emb) + + def forward(self, x): + return self.emb[None, : x.shape[1], :].to(x) + + +def rotate_every_two(x): + x = rearrange(x, "... (d j) -> ... d j", j=2) + x1, x2 = x.unbind(dim=-1) + x = torch.stack((-x2, x1), dim=-1) + return rearrange(x, "... d j -> ... (d j)") + + +def apply_rotary_pos_emb(q, k, sinu_pos): + sinu_pos = rearrange(sinu_pos, "() n (j d) -> n j d", j=2) + sin, cos = sinu_pos.unbind(dim=-2) + sin, cos = map(lambda t: repeat(t, "b n -> b (n j)", j=2), (sin, cos)) + q, k = map(lambda t: (t * cos) + (rotate_every_two(t) * sin), (q, k)) + return q, k + + +# kmeans related function and class + + +def update_kmeans_on_backwards(module): + module.kmean_modules = find_modules(module, Kmeans) + + def hook(_, grad_in, grad_out): + for m in module.kmean_modules: + m.update() + + return module.register_backward_hook(hook) + + +def similarity(x, means): + return torch.einsum("bhld,hcd->bhlc", x, means) + + +def dists_and_buckets(x, means): + dists = similarity(x, means) + _, buckets = torch.max(dists, dim=-1) + return dists, buckets + + +def batched_bincount(index, num_classes, dim=-1): + shape = list(index.shape) + shape[dim] = num_classes + out = index.new_zeros(shape) + out.scatter_add_(dim, index, torch.ones_like(index, dtype=index.dtype)) + return out + + +def kmeans_iter(x, means, buckets=None): + b, h, _, d, dtype, num_clusters = *x.shape, x.dtype, means.shape[1] + + if not exists(buckets): + _, buckets = dists_and_buckets(x, means) + + bins = batched_bincount(buckets, num_clusters).sum(0, keepdim=True) + zero_mask = bins.long() == 0 + + means_ = buckets.new_zeros(b, h, num_clusters, d, dtype=dtype) + means_.scatter_add_(-2, expand_dim(buckets, -1, d), x) + means_ = F.normalize(means_.sum(0, keepdim=True), dim=-1).type(dtype) + + means = torch.where(zero_mask.unsqueeze(-1), means, means_) + means = means.squeeze(0) + return means + + +def distribution(dists, window_size): + _, topk_indices = dists.topk(k=window_size, dim=-2) + indices = topk_indices.transpose(-2, -1) + return indices.reshape(*indices.size()[:2], -1) + + +class Kmeans(nn.Module): + def __init__( + self, num_heads, head_dim, num_clusters, ema_decay=0.999, commitment=1e-4 + ): + super().__init__() + self.commitment = commitment + self.ema_decay = ema_decay + + self.register_buffer("means", torch.randn(num_heads, num_clusters, head_dim)) + self.register_buffer("initted", torch.tensor(False)) + self.num_new_means = 0 + self.new_means = None + + @torch.no_grad() + def init(self, x): + if self.initted: + return + _, h, _, d, device, _ = *x.shape, x.device, x.dtype + + num_clusters = self.means.shape[1] + + means = x.transpose(0, 1).contiguous().view(h, -1, d) + num_samples = means.shape[1] + + if num_samples >= num_clusters: + indices = torch.randperm(num_samples, device=device)[:num_clusters] + else: + indices = torch.randint(0, num_samples, (num_clusters,), device=device) + + means = means[:, indices] + + for _ in range(KMEAN_INIT_ITERS): + means = kmeans_iter(x, means) + + self.num_new_means = 0 + self.means.data.copy_(means) + self.initted.data.copy_(torch.tensor(True)) + + @torch.no_grad() + def update(self, new_means=None): + new_means = default(new_means, self.new_means) + assert exists(new_means), "new kmeans has not been supplied" + ema_inplace(self.means, new_means, self.ema_decay) + + del self.new_means + self.new_means = None + self.num_new_means = 0 + + def forward(self, x, update_means=False): + self.init(x) + + b, dtype = x.shape[0], x.dtype + means = self.means.type(dtype) + x = F.normalize(x, 2, dim=-1).type(dtype) + + with torch.no_grad(): + dists, buckets = dists_and_buckets(x, means) + + routed_means = batched_index_select(expand_dim(means, 0, b), buckets) + loss = F.mse_loss(x, routed_means) * self.commitment + + if update_means: + with torch.no_grad(): + means = kmeans_iter(x, means, buckets) + self.new_means = ema( + self.new_means, means, self.num_new_means / (self.num_new_means + 1) + ) + self.num_new_means += 1 + + return dists, loss + + +# kmeans attention class + + +class KmeansAttention(nn.Module): + def __init__( + self, + num_clusters, + window_size, + num_heads, + head_dim, + causal=False, + dropout=0.0, + ema_decay=0.999, + commitment=1e-4, + context_window_size=None, + receives_context=False, + num_mem_kv=0, + shared_qk=False, + ): + super().__init__() + self.num_heads = num_heads + self.num_clusters = num_clusters + self.head_dim = head_dim + + self.window_size = window_size + self.context_window_size = default(context_window_size, window_size) + self.causal = causal + + self.shared_qk = shared_qk + self.receives_context = receives_context + self.kmeans = Kmeans(num_heads, head_dim, num_clusters, ema_decay, commitment) + self.dropout = nn.Dropout(dropout) + + self.num_mem_kv = max(num_mem_kv, 1 if causal and not shared_qk else 0) + self.mem_key = nn.Parameter( + torch.randn(num_heads, num_clusters, self.num_mem_kv, head_dim) + ) + self.mem_value = nn.Parameter( + torch.randn(num_heads, num_clusters, self.num_mem_kv, head_dim) + ) + + def forward(self, q, k, v, query_mask=None, key_mask=None, **kwargs): + b, h, t, d, kv_t, wsz, c_wsz, nc, device, dtype = ( + *q.shape, + k.shape[2], + self.window_size, + self.context_window_size, + self.num_clusters, + q.device, + q.dtype, + ) + is_reverse = kwargs.pop("_reverse", False) + + out = torch.zeros_like(q, dtype=dtype) + + update_kmeans = self.training and not is_reverse + + key_mask = ( + default(key_mask, query_mask) if not self.receives_context else key_mask + ) + kv_wsz = wsz if not self.receives_context else c_wsz + + wsz = min(wsz, t) + kv_wsz = min(kv_wsz, kv_t) + + if not self.shared_qk or self.receives_context: + dists, aux_loss = self.kmeans(torch.cat((q, k), dim=2), update_kmeans) + q_dists, k_dists = split_at_index(2, t, dists) + indices = distribution(q_dists, wsz) + kv_indices = distribution(k_dists, kv_wsz) + else: + dists, aux_loss = self.kmeans(q, update_kmeans) + k = F.normalize(k, dim=-1).to(q) + indices = distribution(dists, wsz) + kv_indices = indices + + q = batched_index_select(q, indices) + k = batched_index_select(k, kv_indices) + v = batched_index_select(v, kv_indices) + + reshape_with_window = lambda x: x.reshape(b, h, nc, -1, d) + q, k, v = map(reshape_with_window, (q, k, v)) + + m_k, m_v = map( + lambda x: expand_dim(x, 0, b).to(q), (self.mem_key, self.mem_value) + ) + k, v = map(lambda x: torch.cat(x, dim=3), ((m_k, k), (m_v, v))) + + dots = torch.einsum("bhnid,bhnjd->bhnij", q, k) * (d**-0.5) + + mask_value = max_neg_value(dots) + + if exists(query_mask) or exists(key_mask): + query_mask = default( + query_mask, lambda: torch.ones((b, t), device=device).bool() + ) + key_mask = default( + key_mask, lambda: torch.ones((b, kv_t), device=device).bool() + ) + + q_mask = expand_dim(query_mask, 1, h).gather(2, indices) + kv_mask = expand_dim(key_mask, 1, h).gather(2, kv_indices) + q_mask, kv_mask = map(lambda t: t.reshape(b, h, nc, -1), (q_mask, kv_mask)) + mask = q_mask[:, :, :, :, None] * kv_mask[:, :, :, None, :] + mask = F.pad(mask, (self.num_mem_kv, 0), value=1) + dots.masked_fill_(~mask, mask_value) + del mask + + if self.causal: + q_mask, kv_mask = map( + lambda t: t.reshape(b, h, nc, -1), (indices, kv_indices) + ) + mask = q_mask[:, :, :, :, None] >= kv_mask[:, :, :, None, :] + mask = F.pad(mask, (self.num_mem_kv, 0), value=1) + dots.masked_fill_(~mask, mask_value) + del mask + + if self.shared_qk: + q_mask, kv_mask = map( + lambda t: t.reshape(b, h, nc, -1), (indices, kv_indices) + ) + mask = q_mask[:, :, :, :, None] == kv_mask[:, :, :, None, :] + mask = F.pad(mask, (self.num_mem_kv, 0), value=0) + dots.masked_fill_(mask, TOKEN_SELF_ATTN_VALUE) + del mask + + dots = dots.softmax(dim=-1) + dots = self.dropout(dots) + + bo = torch.einsum("bhcij,bhcjd->bhcid", dots, v) + so = torch.reshape(bo, (b, h, -1, bo.shape[-1])).type(dtype) + out = scatter_mean(out, so, indices.unsqueeze(-1).expand_as(so), -2) + return out, aux_loss + + +# feedforward + + +class GELU_(nn.Module): + def forward(self, x): + return ( + 0.5 + * x + * ( + 1 + + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))) + ) + ) + + +GELU = nn.GELU if hasattr(nn, "GELU") else GELU_ + + +class FeedForward(nn.Module): + def __init__(self, dim, mult=4, dropout=0.0, activation=None, glu=False): + super().__init__() + activation = default(activation, GELU) + + self.glu = glu + self.w1 = nn.Linear(dim, dim * mult * (2 if glu else 1)) + self.act = activation() + self.dropout = nn.Dropout(dropout) + self.w2 = nn.Linear(dim * mult, dim) + + def forward(self, x, **kwargs): + if not self.glu: + x = self.w1(x) + x = self.act(x) + else: + x, v = self.w1(x).chunk(2, dim=-1) + x = self.act(x) * v + + x = self.dropout(x) + x = self.w2(x) + return x + + +# self attention + + +class SelfAttention(nn.Module): + def __init__( + self, + dim, + max_seq_len, + heads, + local_attn_heads, + window_size, + dim_head=None, + local_attn_window_size=None, + local_attn_radius_blocks=1, + causal=False, + attn_dropout=0.0, + dropout=0.0, + kmeans_ema_decay=0.999, + commitment_factor=1e-4, + receives_context=False, + context_window_size=None, + rel_pos_emb=True, + num_mem_kv=0, + shared_qk=False, + conv_query_kernel=9, + ): + super().__init__() + assert ( + dim_head or (dim % heads) == 0 + ), "hidden dimension must be divisible by number of heads" + assert ( + max_seq_len % window_size + ) == 0, "maximum sequence length must be divisible by the target window size" + assert ( + local_attn_heads <= heads + ), "number of local attention heads must be less than total heads" + assert not ( + receives_context and local_attn_heads > 0 + ), "local attention cannot be used for self attention with context" + assert not ( + receives_context and causal + ), "contextual attention layer cannot be causal" + + local_attn_window_size = default(local_attn_window_size, window_size) + context_window_size = default(context_window_size, window_size) + + self.shared_qk = shared_qk + self.receives_context = receives_context + self.heads = heads + self.local_attn_heads = local_attn_heads + self.global_attn_heads = heads - local_attn_heads + + self.causal = causal + self.window_size = window_size + + dim_head = default(dim_head, dim // heads) + dim_heads = dim_head * heads + self.dim_head = dim_head + + num_clusters = max_seq_len // window_size + + # local + + local_dim_heads = dim_head * self.local_attn_heads + + if self.local_attn_heads > 0: + rel_pos_emb_config = (dim_head, local_attn_heads) if rel_pos_emb else None + self.local_attn = LocalAttention( + dim=dim_head, + window_size=local_attn_window_size, + causal=causal, + dropout=attn_dropout, + rel_pos_emb_config=rel_pos_emb_config, + look_backward=local_attn_radius_blocks, + look_forward=0 if causal else local_attn_radius_blocks, + ) + self.local_to_qkv = nn.Linear(dim, 3 * local_dim_heads) + + # global + + global_dim_heads = dim_head * self.global_attn_heads + + if self.global_attn_heads > 0: + self.global_attn = KmeansAttention( + num_clusters, + window_size, + self.global_attn_heads, + dim_head, + causal=causal, + dropout=attn_dropout, + ema_decay=kmeans_ema_decay, + commitment=commitment_factor, + receives_context=receives_context, + num_mem_kv=num_mem_kv, + shared_qk=shared_qk, + ) + + self.to_q = nn.Sequential( + Rearrange("b n c -> b c n"), + DepthWiseConv1d(dim, global_dim_heads, conv_query_kernel, causal=causal), + Rearrange("b c n -> b n c"), + ) + + self.to_v = nn.Linear(dim, global_dim_heads, bias=False) + + if not self.shared_qk: + self.to_k = nn.Linear(dim, global_dim_heads, bias=False) + + # out + + self.to_out = nn.Linear(dim_heads, dim, bias=False) + self.dropout = nn.Dropout(dropout) + + def forward( + self, + query, + key, + value, + context=None, + key_padding_mask=None, + context_mask=None, + pos_emb=None, + **kwargs + ): + assert not ( + self.receives_context and not exists(context) + ), "context must be passed if self attention is set to receive context" + input_mask = key_padding_mask + x = query.transpose(0, 1) + b, t, _, h, dh = *x.shape, self.heads, self.dim_head + has_local, has_global = map( + lambda x: x > 0, (self.local_attn_heads, self.global_attn_heads) + ) + + split_heads = ( + lambda v: reshape_dim(v, -1, (-1, dh)).transpose(1, 2).contiguous() + ) + + if has_local: + local_qkv = self.local_to_qkv(x).chunk(3, dim=-1) + lq, lk, lv = map(split_heads, local_qkv) + + if has_global: + kv_input = x if not self.receives_context else context + + q, v = self.to_q(x), self.to_v(kv_input) + + if not self.shared_qk: + k = self.to_k(kv_input) + else: + k = self.to_q(kv_input) if self.receives_context else q + + q, k, v = map(split_heads, (q, k, v)) + + out = [] + total_loss = torch.tensor(0.0, requires_grad=True, **to(x)) + + if has_local: + local_out = self.local_attn(lq, lk, lv, input_mask=input_mask) + out.append(local_out) + + if has_global: + if not self.receives_context and exists(pos_emb): + q, k = apply_rotary_pos_emb(q, k, pos_emb) + + global_out, loss = self.global_attn( + q, k, v, query_mask=input_mask, key_mask=context_mask + ) + total_loss = total_loss + loss + + out.append(global_out) + + out = torch.cat(out, dim=1) + out = out.reshape(b, h, t, -1).transpose(1, 2).reshape(b, t, -1) + out = self.dropout(out.transpose(0, 1)) + # out = self.to_out(out) + return out, total_loss diff --git a/fairseq/fairseq/modules/kmeans_vector_quantizer.py b/fairseq/fairseq/modules/kmeans_vector_quantizer.py new file mode 100644 index 0000000..1015c38 --- /dev/null +++ b/fairseq/fairseq/modules/kmeans_vector_quantizer.py @@ -0,0 +1,128 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +from fairseq.modules import Fp32GroupNorm + + +class KmeansVectorQuantizer(nn.Module): + def __init__( + self, dim, num_vars, groups, combine_groups, vq_dim, time_first, gamma=0.25 + ): + """Vector quantization using straight pass-through estimator (i.e. kmeans) + + Args: + dim: input dimension (channels) + num_vars: number of quantized vectors per group + groups: number of groups for vector quantization + combine_groups: whether to use the vectors for all groups + vq_dim: dimensionality of the resulting quantized vector + time_first: if true, expect input in BxTxC format, otherwise in BxCxT + gamma: commitment loss coefficient + """ + super().__init__() + + self.groups = groups + self.combine_groups = combine_groups + self.input_dim = dim + self.num_vars = num_vars + self.vq_dim = vq_dim + self.time_first = time_first + + assert ( + vq_dim % groups == 0 + ), f"dim {vq_dim} must be divisible by groups {groups} for concatenation" + + self.var_dim = vq_dim // groups + num_groups = groups if not combine_groups else 1 + + self.embedding = nn.Parameter( + 0.01 * torch.randn(num_vars, num_groups, self.var_dim) + ) + self.projection = nn.Sequential( + nn.Conv1d(dim, dim, kernel_size=1, groups=groups, bias=False), + Fp32GroupNorm(groups, dim), + ) + self.gamma = gamma + self.mse_mean = nn.MSELoss(reduction="mean") + + def _pass_grad(self, x, y): + """Manually set gradient for backward pass. + for y = f(x), ensure that during the backward pass, + dL/dy = dL/dx regardless of f(x). + Returns: + y, with the gradient forced to be dL/dy = dL/dx. + """ + + return y.detach() + (x - x.detach()) + + @property + def expand_embedding(self): + if self.combine_groups: + return self.embedding.expand(self.num_vars, self.groups, self.var_dim) + return self.embedding + + def forward_idx(self, x): + res = self.forward(x, produce_targets=True) + return res["x"], res["targets"] + + def forward(self, x, produce_targets=False): + + result = {"num_vars": self.num_vars} + + if self.time_first: + x = x.transpose(1, 2) + + bsz, fsz, tsz = x.shape + + ze = self.projection(x) + ze_ = ze.view(bsz, self.groups, self.var_dim, tsz).permute(0, 3, 1, 2) + d = ( + (ze_.unsqueeze(0) - self.expand_embedding.unsqueeze(1).unsqueeze(1)) + .view(self.num_vars, bsz, tsz, self.groups, -1) + .norm(dim=-1, p=2) + ) + idx = d.argmin(dim=0) + zq = ( + torch.stack( + [ + self.expand_embedding[idx[..., group], group] + for group in range(self.groups) + ], + dim=-2, + ) + .view(bsz, tsz, self.groups * self.var_dim) + .permute(0, 2, 1) + ) + assert ze.shape == zq.shape, (ze.shape, zq.shape) + x = self._pass_grad(ze, zq) + + with torch.no_grad(): + hard_x = ( + idx.new_zeros(bsz * tsz * self.groups, self.num_vars) + .scatter_(-1, idx.view(-1, 1), 1.0) + .view(bsz * tsz, self.groups, -1) + ) + hard_probs = torch.mean(hard_x.float(), dim=0) + result["code_perplexity"] = torch.exp( + -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1) + ).sum() + + if produce_targets: + result["targets"] = idx + + if self.time_first: + x = x.transpose(1, 2) # BCT -> BTC + result["x"] = x + + ze = ze.float() + zq = zq.float() + latent_loss = self.mse_mean(zq, ze.detach()) + commitment_loss = self.mse_mean(ze, zq.detach()) + + result["kmeans_loss"] = latent_loss + self.gamma * commitment_loss + + return result diff --git a/fairseq/fairseq/modules/layer_drop.py b/fairseq/fairseq/modules/layer_drop.py new file mode 100644 index 0000000..8961d8b --- /dev/null +++ b/fairseq/fairseq/modules/layer_drop.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +LayerDrop as described in https://arxiv.org/abs/1909.11556. +""" + +import torch +import torch.nn as nn + + +class LayerDropModuleList(nn.ModuleList): + """ + A LayerDrop implementation based on :class:`torch.nn.ModuleList`. + + We refresh the choice of which layers to drop every time we iterate + over the LayerDropModuleList instance. During evaluation we always + iterate over all layers. + + Usage:: + + layers = LayerDropList(p=0.5, modules=[layer1, layer2, layer3]) + for layer in layers: # this might iterate over layers 1 and 3 + x = layer(x) + for layer in layers: # this might iterate over all layers + x = layer(x) + for layer in layers: # this might not iterate over any layers + x = layer(x) + + Args: + p (float): probability of dropping out each layer + modules (iterable, optional): an iterable of modules to add + """ + + def __init__(self, p, modules=None): + super().__init__(modules) + self.p = p + + def __iter__(self): + dropout_probs = torch.empty(len(self)).uniform_() + for i, m in enumerate(super().__iter__()): + if not self.training or (dropout_probs[i] > self.p): + yield m diff --git a/fairseq/fairseq/modules/layer_norm.py b/fairseq/fairseq/modules/layer_norm.py new file mode 100644 index 0000000..0b276ce --- /dev/null +++ b/fairseq/fairseq/modules/layer_norm.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +try: + from apex.normalization import FusedLayerNorm as _FusedLayerNorm + + has_fused_layernorm = True + + class FusedLayerNorm(_FusedLayerNorm): + @torch.jit.unused + def forward(self, x): + if not x.is_cuda: + return super().forward(x) + else: + with torch.cuda.device(x.device): + return super().forward(x) + +except ImportError: + has_fused_layernorm = False + + +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): + if torch.jit.is_scripting() or torch.jit.is_tracing(): + export = True + if not export and torch.cuda.is_available() and has_fused_layernorm: + return FusedLayerNorm(normalized_shape, eps, elementwise_affine) + return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) + + +class Fp32LayerNorm(nn.LayerNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.layer_norm( + input.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) diff --git a/fairseq/fairseq/modules/learned_positional_embedding.py b/fairseq/fairseq/modules/learned_positional_embedding.py new file mode 100644 index 0000000..378d0f7 --- /dev/null +++ b/fairseq/fairseq/modules/learned_positional_embedding.py @@ -0,0 +1,61 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + + +class LearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + Padding ids are ignored by either offsetting based on padding_idx + or by setting padding_idx to None and ensuring that the appropriate + position ids are passed to the forward function. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.onnx_trace = False + if self.padding_idx is not None: + self.max_positions = self.num_embeddings - self.padding_idx - 1 + else: + self.max_positions = self.num_embeddings + + def forward( + self, + input: Tensor, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + positions: Optional[Tensor] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + assert (positions is None) or ( + self.padding_idx is None + ), "If positions is pre-computed then padding_idx should not be set." + + if positions is None: + if incremental_state is not None: + # positions is the same for every token when decoding a single step + # Without the int() cast, it doesn't work in some cases when exporting to ONNX + positions = torch.zeros( + (1, 1), device=input.device, dtype=input.dtype + ).fill_(int(self.padding_idx + input.size(1))) + else: + positions = utils.make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + return F.embedding( + positions, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) diff --git a/fairseq/fairseq/modules/lightconv_layer/__init__.py b/fairseq/fairseq/modules/lightconv_layer/__init__.py new file mode 100644 index 0000000..3b2a99c --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .lightconv_layer import LightconvLayer # noqa diff --git a/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py b/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py new file mode 100644 index 0000000..a25433d --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py @@ -0,0 +1,289 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +def gen_forward(): + + kernels = [3, 5, 7, 15, 31, 63, 127, 255] + seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] + + head = """ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "lightconv_cuda.cuh" + +std::vector<at::Tensor> lightconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l) { + + at::DeviceGuard g(input.device()); + const auto minibatch = input.size(0); + const auto numFeatures = input.size(1); + const auto sequenceLength = input.size(2); + + const auto numHeads = filters.size(0); + const auto filterSize = filters.size(1); + + const auto numFiltersInBlock = numFeatures / numHeads; + + const dim3 blocks(minibatch, numFeatures); + + auto output = at::zeros_like(input); + auto stream = at::cuda::getCurrentCUDAStream(); +""" + + sequence_if = """ + if (sequenceLength <= {seq}) {{ + switch(filterSize) {{ +""" + + case_k = """ + case {k}: +""" + + main_block = """ + if (padding_l == {pad}) {{ + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "lightconv_forward", ([&] {{ + lightconv_forward_kernel<{k}, {b_size}, {pad}, scalar_t> + <<<blocks, {b_size}, 0, stream>>>( + input.data<scalar_t>(), + filters.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + output.data<scalar_t>()); + }})); + }} else +""" + + bad_padding = """ + { + std::cout << "WARNING: Unsupported padding size - skipping forward pass" << std::endl; + } + break; +""" + + bad_filter = """ + default: + std::cout << "WARNING: Unsupported filter length passed - skipping forward pass" << std::endl; + } +""" + + con_else = """ + } else +""" + + final_else = """ + { + switch(filterSize) { +""" + + final_return = """ + } + + return {output}; +} +""" + + with open("lightconv_cuda_forward.cu", "w") as forward: + forward.write(head) + for seq in seqs: + forward.write(sequence_if.format(seq=seq)) + for k in kernels: + forward.write(case_k.format(k=k)) + for pad in [k // 2, k - 1]: + forward.write(main_block.format(k=k, b_size=seq, pad=pad)) + forward.write(bad_padding) + forward.write(bad_filter) + forward.write(con_else) + + forward.write(final_else) + for k in kernels: + forward.write(case_k.format(k=k)) + for pad in [k // 2, k - 1]: + forward.write(main_block.format(k=k, b_size=seq, pad=pad)) + forward.write(bad_padding) + forward.write(bad_filter) + forward.write(final_return) + + +def gen_backward(): + + head = """ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "lightconv_cuda.cuh" + +std::vector<at::Tensor> lightconv_cuda_backward( + at::Tensor gradOutput, + int padding_l, + at::Tensor input, + at::Tensor filters) { + + // gradWrtInput + const int minibatch = input.size(0); + const int numFeatures = input.size(1); + const int sequenceLength = input.size(2); + + const int numHeads = filters.size(0); + const int filterSize = filters.size(1); + + const dim3 gradBlocks(minibatch, numFeatures); + const dim3 weightGradFirstpassShortBlocks(minibatch, numHeads); + const dim3 weightGradSecondpassBlocks(numHeads, filterSize); + + const int numFiltersInBlock = numFeatures / numHeads; + + auto gradInput = at::zeros_like(input); + auto gradFilters = at::zeros_like(filters); + + at::DeviceGuard g(input.device()); + auto stream = at::cuda::getCurrentCUDAStream(); + + switch(filterSize) { +""" + + sequence_if = """ + if (sequenceLength <= {seq}) {{ +""" + + case_k = """ + case {k}: +""" + + main_block = """ + if (padding_l == {p}) {{ + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "lightconv_backward", ([&] {{ + lightconv_grad_wrt_input_kernel<{k}, {b_size}, {p}, scalar_t> + <<<gradBlocks, {b_size}, 0, stream>>>( + gradOutput.data<scalar_t>(), + filters.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + gradInput.data<scalar_t>()); + +""" + + weight_grad_short = """ + at::Tensor tempSumGradFilters = at::zeros({{minibatch, numHeads, filterSize}}, input.options().dtype(at::kFloat)); + lightconv_grad_wrt_weights_firstpass_short_kernel<{k}, {b_size}, {p}, scalar_t> + <<<weightGradFirstpassShortBlocks, {b_size}, 0, stream>>>( + input.data<scalar_t>(), + gradOutput.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + numHeads, + tempSumGradFilters.data<float>() + ); + + lightconv_grad_wrt_weights_secondpass_short_kernel<{k}, {b_size}, scalar_t> + <<<weightGradSecondpassBlocks, {b_size}, 0, stream>>>( + tempSumGradFilters.data<float>(), + minibatch, + numFiltersInBlock, + gradFilters.data<scalar_t>() + ); + }})); + }} else +""" + + weight_grad = """ + at::Tensor tempSumGradFilters = at::zeros({{minibatch, numFeatures, filterSize}}, input.options().dtype(at::kFloat)); + lightconv_grad_wrt_weights_firstpass_kernel<{k}, {b_size}, {p}, scalar_t> + <<<gradBlocks, {b_size}, 0, stream>>>( + input.data<scalar_t>(), + gradOutput.data<scalar_t>(), + minibatch, + sequenceLength, + numFeatures, + numFiltersInBlock, + tempSumGradFilters.data<float>() + ); + + lightconv_grad_wrt_weights_secondpass_kernel<{k}, {b_size}, scalar_t> + <<<weightGradSecondpassBlocks, {b_size}, 0, stream>>>( + tempSumGradFilters.data<float>(), + minibatch, + numFiltersInBlock, + gradFilters.data<scalar_t>() + ); + }})); + }} else +""" + + bad_padding = """ + { + std::cout << "WARNING: Unsupported padding size - skipping backward pass" << std::endl; + } +""" + + breakout = """ + break; +""" + + bad_filter = """ + default: + std::cout << "WARNING: Unsupported filter length passed - skipping backward pass" << std::endl; +""" + + con_else = """ + } else +""" + + final_else = """ + { + switch(filterSize) { +""" + + last_return = """ + } + return {gradInput, gradFilters}; +} +""" + + kernels = [3, 5, 7, 15, 31, 63, 127, 255] + seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] + thresh = [32, 32, 64, 128, 256, -1, -1, -1] + max_mem = [-1, -1, -1, -1, -1, 192, 96, 64] + + with open("lightconv_cuda_backward.cu", "w") as backward: + backward.write(head) + for (k, t, mem) in zip(kernels, thresh, max_mem): + backward.write(case_k.format(k=k)) + for seq in seqs: + if (t == -1 or seq <= t) and (mem == -1 or seq < mem): + backward.write(sequence_if.format(seq=seq)) + for p in [k // 2, k - 1]: + backward.write(main_block.format(k=k, b_size=seq, p=p)) + backward.write(weight_grad_short.format(k=k, b_size=seq, p=p)) + backward.write(bad_padding) + else: + for p in [k // 2, k - 1]: + backward.write(main_block.format(k=k, b_size=32, p=p)) + backward.write(weight_grad.format(k=k, b_size=32, p=p)) + backward.write(bad_padding) + backward.write(breakout) + break + backward.write(con_else) + backward.write(bad_filter) + backward.write(last_return) + + +if __name__ == "__main__": + gen_forward() + gen_backward() diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp new file mode 100644 index 0000000..ece47a8 --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp @@ -0,0 +1,51 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <torch/extension.h> +#include <vector> + +std::vector<at::Tensor> +lightconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l); + +std::vector<at::Tensor> lightconv_cuda_backward( + at::Tensor gradOutput, + int padding_l, + at::Tensor input, + at::Tensor filters); + +#define CHECK_CUDA(x) \ + AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +std::vector<at::Tensor> +lightconv_forward(at::Tensor input, at::Tensor filters, int padding_l) { + CHECK_INPUT(input); + CHECK_INPUT(filters); + + return lightconv_cuda_forward(input, filters, padding_l); +} + +std::vector<at::Tensor> lightconv_backward( + at::Tensor gradOutput, + int padding_l, + at::Tensor input, + at::Tensor filters) { + CHECK_INPUT(gradOutput); + CHECK_INPUT(input); + CHECK_INPUT(filters); + + return lightconv_cuda_backward(gradOutput, padding_l, input, filters); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &lightconv_forward, "lighconv forward (CUDA)"); + m.def("backward", &lightconv_backward, "lighconv backward (CUDA)"); +} diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh new file mode 100644 index 0000000..610ab39 --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh @@ -0,0 +1,79 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <ATen/ATen.h> +#include <c10/cuda/CUDAStream.h> + +#include <cuda.h> +#include <cuda_runtime.h> + +#include <algorithm> +#include <functional> +#include <iostream> +#include <stdexcept> +#include <utility> +#include <vector> + +#include <assert.h> +#include <stdlib.h> + +#define SHFL_MASK 0xffffffff + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_forward_kernel( + const scalar_t* input, + const scalar_t* filters, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + scalar_t* output); + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_input_kernel( + const scalar_t* input, + const scalar_t* filters, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + scalar_t* output); + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_firstpass_short_kernel( + const scalar_t* input, + const scalar_t* gradInput, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + float* output); + +template <int FS, int SB, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_secondpass_short_kernel( + const float* input, + const int minibatch, + const int numFiltersInBlock, + scalar_t* output); + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_firstpass_kernel( + const scalar_t* input, + const scalar_t* gradInput, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + float* output); + +template <int FS, int SB, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_secondpass_kernel( + const float* input, + const int minibatch, + const int numFiltersInBlock, + scalar_t* output); diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu new file mode 100644 index 0000000..cdf31d5 --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu @@ -0,0 +1,400 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "../cuda_utils.cu" +#include "lightconv_cuda.cuh" +#include "lightconv_cuda_backward.cu" +#include "lightconv_cuda_forward.cu" + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_forward_kernel( + const scalar_t* input, + const scalar_t* filters, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + scalar_t* output) { + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int featureIdx = blockIdx.y; + const int filterIdx = featureIdx / numFiltersInBlock; + + const int IOOffset = + numFeatures * sequenceLength * batchIdx + featureIdx * sequenceLength; + const scalar_t* inputFeature = &input[IOOffset]; + scalar_t* outputFeature = &output[IOOffset]; + const scalar_t* inputFilter = &filters[filterIdx * FS]; + + assert(blockDim.x == SB); + + scalar_t filter[FS]; +#pragma unroll + for (int i = 0; i < FS; ++i) { + filter[i] = inputFilter[i]; + } + + __shared__ scalar_t temp[SB + FS]; + zeroSharedMem<FS, SB, padding_l>(temp); + + const int numIterations = divUp<int, int>(sequenceLength, SB); + + for (int i = 0; i < numIterations; ++i) { + // Read input into shared memory + const int inputOffset = i * SB; + + load_input_to_shared<FS, SB, padding_l>( + inputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + (numIterations == 1), + temp); + + __syncthreads(); + + scalar_t out = 0; +#pragma unroll + for (int j = 0; j < FS; ++j) { + out += filter[j] * temp[tid + j]; + } + + // Write output + const int outputOffset = inputOffset; + if ((outputOffset + tid) < sequenceLength) { + outputFeature[outputOffset + tid] = out; + } + + __syncthreads(); + } +} + +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_input_kernel( + const scalar_t* input, + const scalar_t* filters, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + scalar_t* output) { + // input grad kernel is similar to forward kernel + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int featureIdx = blockIdx.y; + const int filterIdx = featureIdx / numFiltersInBlock; + + const int IOOffset = + numFeatures * sequenceLength * batchIdx + featureIdx * sequenceLength; + const scalar_t* inputFeature = &input[IOOffset]; + scalar_t* outputFeature = &output[IOOffset]; + const scalar_t* inputFilter = &filters[filterIdx * FS]; + + assert(blockDim.x == SB); + + scalar_t filter[FS]; + +// The only change is loading the filter in reverse +#pragma unroll + for (int i = 0; i < FS; ++i) { + filter[i] = inputFilter[FS - i - 1]; + } + + __shared__ scalar_t temp[SB + FS]; + const int padding = FS - padding_l - 1; + zeroSharedMem<FS, SB, padding>(temp); + + __syncthreads(); + + const int numIterations = divUp<int, int>(sequenceLength, SB); + + for (int i = 0; i < numIterations; ++i) { + // Read input into shared memory + const int inputOffset = i * SB; + + load_input_to_shared<FS, SB, padding>( + inputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + temp); + + __syncthreads(); + + scalar_t out = 0; +#pragma unroll + for (int j = 0; j < FS; ++j) { + out += filter[j] * temp[tid + j]; + } + + // Write output + const int outputOffset = inputOffset; + if ((outputOffset + tid) < sequenceLength) { + outputFeature[outputOffset + tid] = out; + } + + __syncthreads(); + } +} + +// This is by far the most expensive kernel in terms of time taken. +// Can be 16x slower than the forward or grad_wrt_input when filter size is 31 +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_firstpass_short_kernel( + const scalar_t* input, + const scalar_t* gradInput, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + int numHeads, + float* output) { + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int filterIdx = blockIdx.y; + + const int numIterations = divUp<int, int>(sequenceLength, SB); + + float* tempOutputGradWeight = &output[filterIdx * FS * minibatch]; + + assert(blockDim.x == SB); + + __shared__ scalar_t tempInput[SB + FS]; + __shared__ scalar_t tempGradInput[SB + FS]; + + // local weight accumulation + float accumWeights[FS]; + + // Initialize memory + for (int i = 0; i < FS; ++i) { + accumWeights[i] = float(0.0); + } + + // loop over each sequence within filterblock + for (int idxInFilterBlock = 0; idxInFilterBlock < numFiltersInBlock; + ++idxInFilterBlock) { + const int featureOffset = batchIdx * numFeatures * sequenceLength + + (filterIdx * numFiltersInBlock + idxInFilterBlock) * sequenceLength; + const scalar_t* inputFeature = &input[featureOffset]; + const scalar_t* gradInputFeature = &gradInput[featureOffset]; + + zeroSharedMem<FS, SB, padding_l>(tempInput); + zeroSharedMem<FS, SB, (FS / 2)>(tempGradInput); + __syncthreads(); + + for (int i = 0; i < numIterations; ++i) { + const int inputOffset = i * SB; + + load_input_to_shared<FS, SB, padding_l>( + inputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + tempInput); + load_input_to_shared<FS, SB, (FS / 2)>( + gradInputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + tempGradInput); + + __syncthreads(); + + const int gradIndex = (FS / 2) + tid; + scalar_t tempGrad = tempGradInput[gradIndex]; + +#pragma unroll + for (int j = 0; j < FS; j++) { + const int inputIndex = tid + j; + accumWeights[j] += tempInput[inputIndex] * tempGrad; + } + + __syncthreads(); + } + } + + // Row-major sum + for (int filterWeightIdx = 0; filterWeightIdx < FS; ++filterWeightIdx) { + float temp; + if (tid < sequenceLength) { + temp = accumWeights[filterWeightIdx]; + } else { + temp = float(0.0); + } + + const int outputOffset = filterWeightIdx * minibatch + batchIdx; + + temp = blockReduce(temp); + + if (tid == 0) { + tempOutputGradWeight[outputOffset] = temp; + } + } +} + +template <int FS, int SB, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_secondpass_short_kernel( + const float* input, + const int minibatch, + const int numFiltersInBlock, + scalar_t* output) { + assert(blockDim.x == SB); + + const int tid = threadIdx.x; + + const int filterIdx = blockIdx.x; + const int filterWeightIdx = blockIdx.y; + + const int inputOffset = + filterIdx * FS * minibatch + filterWeightIdx * minibatch; + const float* tempInput = &input[inputOffset]; + + // read into shared memory for reduction + int readIndex = tid; + + float sum = 0.0; + while (readIndex < minibatch) { + sum += tempInput[readIndex]; + readIndex += SB; + } + + float temp = blockReduce(sum); + + if (tid == 0) { + output[blockIdx.x * FS + blockIdx.y] = temp; + } +} + +// This is by far the most expensive kernel in terms of time taken. +// Can be 16x slower than the forward or grad_wrt_input when filter size is 31 +template <int FS, int SB, int padding_l, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_firstpass_kernel( + const scalar_t* input, + const scalar_t* gradInput, + int minibatch, + int sequenceLength, + int numFeatures, + int numFiltersInBlock, + float* output) { + assert(blockDim.x == SB); + + const int tid = threadIdx.x; + const int batchIdx = blockIdx.x; + const int featureIdx = blockIdx.y; + const int filterIdx = featureIdx / numFiltersInBlock; + const int idxInFilterBlock = featureIdx % numFiltersInBlock; + + const int numIterations = divUp<int, int>(sequenceLength, SB); + + float temp; + + __shared__ scalar_t tempInput[SB + FS]; + __shared__ scalar_t tempGradInput[SB + FS]; + zeroSharedMem<FS, SB, padding_l>(tempInput); + zeroSharedMem<FS, SB, (FS / 2)>(tempGradInput); + __syncthreads(); + + float accumWeights[FS]; + + for (int i = 0; i < FS; ++i) { + accumWeights[i] = float(0.0); + } + + const int IOOffset = + batchIdx * numFeatures * sequenceLength + featureIdx * sequenceLength; + const scalar_t* inputFeature = &input[IOOffset]; + const scalar_t* gradInputFeature = &gradInput[IOOffset]; + float* tempOutputGradWeight = + &output[filterIdx * FS * minibatch * numFiltersInBlock]; + + for (int i = 0; i < numIterations; ++i) { + const int inputOffset = i * SB; + + load_input_to_shared<FS, SB, padding_l>( + inputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + tempInput); + load_input_to_shared<FS, SB, (FS / 2)>( + gradInputFeature, + inputOffset, + sequenceLength, + i, + numIterations, + false, + tempGradInput); + __syncthreads(); + +#pragma unroll + for (int j = 0; j < FS; ++j) { + accumWeights[j] += tempInput[tid + j] * tempGradInput[tid + (FS / 2)]; + } + + __syncthreads(); + } + + // Row-major sum + for (int filterWeightIdx = 0; filterWeightIdx < FS; ++filterWeightIdx) { + // Write to shared memory before reduction + if (tid < sequenceLength) { + temp = accumWeights[filterWeightIdx]; + } else { + temp = float(0.0); + } + + temp = blockReduce(temp); + + const int outputOffset = filterWeightIdx * minibatch * numFiltersInBlock + + batchIdx * numFiltersInBlock + idxInFilterBlock; + + if (tid == 0) { + tempOutputGradWeight[outputOffset] = temp; + } + } +} + +template <int FS, int SB, typename scalar_t> +__global__ void lightconv_grad_wrt_weights_secondpass_kernel( + const float* input, + const int minibatch, + const int numFiltersInBlock, + scalar_t* output) { + assert(blockDim.x == SB); + const int tid = threadIdx.x; + + // What is the id within a minibatch + const int filterIdx = blockIdx.x; + const int filterWeightIdx = blockIdx.y; + + const int inputOffset = filterIdx * FS * minibatch * numFiltersInBlock + + filterWeightIdx * minibatch * numFiltersInBlock; + const float* tempInput = &input[inputOffset]; + + int readIndex = tid; + + float sum = float(0.0); + while (readIndex < (minibatch * numFiltersInBlock)) { + sum += tempInput[readIndex]; + readIndex += SB; + } + + float temp = blockReduce(sum); + + if (tid == 0) { + output[blockIdx.x * FS + blockIdx.y] = temp; + } +} diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py b/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py new file mode 100644 index 0000000..e7e597f --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py @@ -0,0 +1,137 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import lightconv_cuda +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.fairseq_dropout import FairseqDropout +from torch import nn +from torch.autograd import Function + + +class lightconvFunction(Function): + @staticmethod + def forward(ctx, x, weights, padding_l): + ctx.padding_l = padding_l + outputs = lightconv_cuda.forward(x, weights, padding_l) + variables = [x, weights] + ctx.save_for_backward(*variables) + return outputs[0] + + @staticmethod + def backward(ctx, grad_output): + outputs = lightconv_cuda.backward( + grad_output.contiguous(), ctx.padding_l, *ctx.saved_tensors + ) + grad_input, grad_weights = outputs + return grad_input, grad_weights, None + + +@with_incremental_state +class LightconvLayer(nn.Module): + def __init__( + self, + input_size, + kernel_size=1, + padding_l=None, + weight_softmax=False, + num_heads=1, + weight_dropout=0.0, + bias=False, + ): + super(LightconvLayer, self).__init__() + self.input_size = input_size + self.kernel_size = kernel_size + self.padding_l = padding_l + self.num_heads = num_heads + self.weight_softmax = weight_softmax + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + + self.weight = nn.Parameter(torch.Tensor(num_heads, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(input_size)) + else: + self.bias = None + self.reset_parameters() + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + for k, v in state_dict.items(): + if k.endswith(prefix + "weight"): + if v.dim() == 3 and v.size(1) == 1: + state_dict[k] = v.squeeze(1) + + def reset_parameters(self): + nn.init.xavier_uniform_(self.weight) + if self.bias is not None: + nn.init.constant_(self.bias, 0.0) + + def forward(self, x, incremental_state=None): + + # during inference time, incremental BMM is faster + if incremental_state is not None: + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is None: + input_buffer = x.new() + x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3) + if self.kernel_size > 1: + self._set_input_buffer( + incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :] + ) + x_unfold = x_unfold.view(T * B * H, R, -1) + + weight = self.weight + if self.weight_softmax: + weight = F.softmax(weight.float(), dim=1).type_as(weight) + + weight = weight[:, -x_unfold.size(2) :] + + K = weight.size(1) + + weight = ( + weight.view(1, H, K) + .expand(T * B, H, K) + .contiguous() + .view(T * B * H, K, 1) + ) + + weight = self.weight_dropout_module(weight) + output = torch.bmm(x_unfold, weight) # T*B*H x R x 1 + output = output.view(T, B, C) + return output + + # during training time, use CUDA kernel + else: + x = x.permute(1, 2, 0).contiguous() + weight = self.weight + if self.weight_softmax: + weight = F.softmax(self.weight, -1) + if self.weight_dropout_module.p: + weight = self.weight_dropout_module(weight) + return lightconvFunction.apply(x, weight, self.padding_l).permute(2, 0, 1) + + def reorder_incremental_state(self, incremental_state, new_order): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return utils.get_incremental_state(self, incremental_state, "input_buffer") + + def _set_input_buffer(self, incremental_state, new_buffer): + return utils.set_incremental_state( + self, incremental_state, "input_buffer", new_buffer + ) + + def half(self): + return self._apply(lambda t: t.half() if t.is_floating_point() else t) diff --git a/fairseq/fairseq/modules/lightconv_layer/setup.py b/fairseq/fairseq/modules/lightconv_layer/setup.py new file mode 100644 index 0000000..052635b --- /dev/null +++ b/fairseq/fairseq/modules/lightconv_layer/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + + +setup( + name="lightconv_layer", + ext_modules=[ + CUDAExtension( + "lightconv_cuda", + [ + "lightconv_cuda.cpp", + "lightconv_cuda_kernel.cu", + ], + ), + ], + cmdclass={"build_ext": BuildExtension}, +) diff --git a/fairseq/fairseq/modules/lightweight_convolution.py b/fairseq/fairseq/modules/lightweight_convolution.py new file mode 100644 index 0000000..ec11a95 --- /dev/null +++ b/fairseq/fairseq/modules/lightweight_convolution.py @@ -0,0 +1,310 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.unfold import unfold1d + + +def LightweightConv( + input_size, + kernel_size=1, + padding_l=None, + num_heads=1, + weight_dropout=0.0, + weight_softmax=False, + bias=False, +): + if torch.cuda.is_available(): + try: + from fairseq.modules.lightconv_layer import LightconvLayer + + return LightconvLayer( + input_size, + kernel_size=kernel_size, + padding_l=padding_l, + num_heads=num_heads, + weight_dropout=weight_dropout, + weight_softmax=weight_softmax, + bias=bias, + ) + except ImportError as e: + print(e) + return LightweightConv1dTBC( + input_size, + kernel_size=kernel_size, + padding_l=padding_l, + num_heads=num_heads, + weight_dropout=weight_dropout, + weight_softmax=weight_softmax, + bias=bias, + ) + + +class LightweightConv1d(nn.Module): + """Lightweight Convolution assuming the input is BxCxT + This is just an example that explains LightConv clearer than the TBC version. + We don't use this module in the model. + + Args: + input_size: # of channels of the input and output + kernel_size: convolution channels + padding: padding + num_heads: number of heads used. The weight is of shape + `(num_heads, 1, kernel_size)` + weight_softmax: normalize the weight with softmax before the convolution + + Shape: + Input: BxCxT, i.e. (batch_size, input_size, timesteps) + Output: BxCxT, i.e. (batch_size, input_size, timesteps) + + Attributes: + weight: the learnable weights of the module of shape + `(num_heads, 1, kernel_size)` + bias: the learnable bias of the module of shape `(input_size)` + """ + + def __init__( + self, + input_size, + kernel_size=1, + padding=0, + num_heads=1, + weight_softmax=False, + bias=False, + weight_dropout=0.0, + ): + super().__init__() + self.input_size = input_size + self.kernel_size = kernel_size + self.num_heads = num_heads + self.padding = padding + self.weight_softmax = weight_softmax + self.weight = nn.Parameter(torch.Tensor(num_heads, 1, kernel_size)) + + if bias: + self.bias = nn.Parameter(torch.Tensor(input_size)) + else: + self.bias = None + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + self.reset_parameters() + + def reset_parameters(self): + nn.init.xavier_uniform_(self.weight) + if self.bias is not None: + nn.init.constant_(self.bias, 0.0) + + def forward(self, input): + """ + input size: B x C x T + output size: B x C x T + """ + B, C, T = input.size() + H = self.num_heads + + weight = self.weight + if self.weight_softmax: + weight = F.softmax(weight, dim=-1) + + weight = self.weight_dropout_module(weight) + # Merge every C/H entries into the batch dimension (C = self.input_size) + # B x C x T -> (B * C/H) x H x T + # One can also expand the weight to C x 1 x K by a factor of C/H + # and do not reshape the input instead, which is slow though + input = input.view(-1, H, T) + output = F.conv1d(input, weight, padding=self.padding, groups=self.num_heads) + output = output.view(B, C, T) + if self.bias is not None: + output = output + self.bias.view(1, -1, 1) + + return output + + +@with_incremental_state +class LightweightConv1dTBC(nn.Module): + """Lightweight Convolution assuming the input is TxBxC + Args: + input_size: # of channels of the input + kernel_size: convolution channels + padding_l: padding to the left when using "same" padding + num_heads: number of heads used. The weight is of shape (num_heads, 1, kernel_size) + weight_dropout: the drop rate of the DropConnect to drop the weight + weight_softmax: normalize the weight with softmax before the convolution + bias: use bias + + Shape: + Input: TxBxC, i.e. (timesteps, batch_size, input_size) + Output: TxBxC, i.e. (timesteps, batch_size, input_size) + + Attributes: + weight: the learnable weights of the module of shape + `(num_heads, 1, kernel_size)` + bias: the learnable bias of the module of shape `(input_size)` + """ + + def __init__( + self, + input_size, + kernel_size=1, + padding_l=None, + num_heads=1, + weight_dropout=0.0, + weight_softmax=False, + bias=False, + ): + super().__init__() + self.input_size = input_size + self.kernel_size = kernel_size + self.padding_l = padding_l + self.num_heads = num_heads + self.weight_dropout_module = FairseqDropout( + weight_dropout, module_name=self.__class__.__name__ + ) + self.weight_softmax = weight_softmax + + self.weight = nn.Parameter(torch.Tensor(num_heads, 1, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(input_size)) + else: + self.bias = None + + self.reset_parameters() + self.onnx_trace = False + + def reset_parameters(self): + nn.init.xavier_uniform_(self.weight) + if self.bias is not None: + nn.init.constant_(self.bias, 0.0) + + def forward(self, x, incremental_state=None, unfold=False): + """Assuming the input, x, of the shape T x B x C and producing an output in the shape T x B x C + args: + x: Input of shape T x B x C, i.e. (timesteps, batch_size, input_size) + incremental_state: A dict to keep the state + unfold: unfold the input or not. If not, we use the matrix trick instead + """ + unfold = unfold or (incremental_state is not None) + + if unfold: + output = self._forward_unfolded(x, incremental_state) + else: + output = self._forward_expanded(x, incremental_state) + + if self.bias is not None: + output = output + self.bias.view(1, 1, -1) + return output + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def _forward_unfolded(self, x, incremental_state): + """The conventional implementation of convolutions. + Unfolding the input by having a window shifting to the right.""" + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + + weight = self.weight.view(H, K) + if incremental_state is not None: + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is None: + input_buffer = x.new() + x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3) + if self.kernel_size > 1: + self._set_input_buffer( + incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :] + ) + x_unfold = x_unfold.view(T * B * H, R, -1) + else: + # unfold the input: T x B x C --> T' x B x C x K + x_unfold = unfold1d(x, self.kernel_size, self.padding_l, 0) + x_unfold = x_unfold.view(T * B * H, R, K) + + if self.weight_softmax: + weight = utils.softmax(weight, dim=1, onnx_trace=self.onnx_trace).type_as( + weight + ) + + if incremental_state is not None: + weight = weight[:, -x_unfold.size(2) :] + K = weight.size(1) + + weight = ( + weight.view(1, H, K).expand(T * B, H, K).contiguous().view(T * B * H, K, 1) + ) + + weight = self.weight_dropout_module(weight) + output = torch.bmm(x_unfold, weight) # T*B*H x R x 1 + output = output.view(T, B, C) + return output + + def _forward_expanded(self, x, incremental_state): + """Turn the convolution filters into band matrices and do matrix multiplication. + This is faster when the sequence is short, but less memory efficient. + This is not used in the decoder during inference. + """ + T, B, C = x.size() + K, H = self.kernel_size, self.num_heads + R = C // H + assert R * H == C == self.input_size + + weight = self.weight.view(H, K) + if self.weight_softmax: + weight = utils.softmax(weight, dim=1, onnx_trace=self.onnx_trace).type_as( + weight + ) + weight = weight.view(1, H, K).expand(T * B, H, K).contiguous() + weight = weight.view(T, B * H, K).transpose(0, 1) + + x = x.view(T, B * H, R).transpose(0, 1) + P = self.padding_l + if K > T and P == K - 1: + weight = weight.narrow(2, K - T, T) + K, P = T, T - 1 + # turn the convolution filters into band matrices + weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False) + weight_expanded.as_strided((B * H, T, K), (T * (T + K - 1), T + K, 1)).copy_( + weight + ) + weight_expanded = weight_expanded.narrow(2, P, T) + weight_expanded = self.weight_dropout_module(weight_expanded) + + output = torch.bmm(weight_expanded, x) + output = output.transpose(0, 1).contiguous().view(T, B, C) + return output + + def reorder_incremental_state(self, incremental_state, new_order): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return utils.get_incremental_state(self, incremental_state, "input_buffer") + + def _set_input_buffer(self, incremental_state, new_buffer): + return utils.set_incremental_state( + self, incremental_state, "input_buffer", new_buffer + ) + + def extra_repr(self): + s = "{}, kernel_size={}, padding_l={}, num_heads={}, weight_softmax={}, bias={}".format( + self.input_size, + self.kernel_size, + self.padding_l, + self.num_heads, + self.weight_softmax, + self.bias is not None, + ) + if self.weight_dropout_module.p > 0.0: + s += ", weight_dropout={}".format(self.weight_dropout_module.p) + return s diff --git a/fairseq/fairseq/modules/linearized_convolution.py b/fairseq/fairseq/modules/linearized_convolution.py new file mode 100644 index 0000000..1c7a9f0 --- /dev/null +++ b/fairseq/fairseq/modules/linearized_convolution.py @@ -0,0 +1,125 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state + +from .conv_tbc import ConvTBC + +from typing import Dict, Optional +from torch import Tensor + + +@with_incremental_state +class LinearizedConvolution(ConvTBC): + """An optimized version of nn.Conv1d. + + At training time, this module uses ConvTBC, which is an optimized version + of Conv1d. At inference time, it optimizes incremental generation (i.e., + one time step at a time) by replacing the convolutions with linear layers. + Note that the input order changes from training to inference. + """ + + def __init__(self, in_channels, out_channels, kernel_size, **kwargs): + super().__init__(in_channels, out_channels, kernel_size, **kwargs) + self._linearized_weight = None + self.register_backward_hook(self._clear_linearized_weight) + + def state_dict(self, destination=None, prefix="", keep_vars=False): + state = ConvTBC.state_dict(self, destination, prefix, keep_vars=keep_vars) + # don't store redundant _linearized_weight in checkpoints + if prefix + "_linearized_weight" in state: + del state[prefix + "_linearized_weight"] + return state + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + if prefix + "_linearized_weight" in state_dict: + del state_dict[prefix + "_linearized_weight"] + + @torch.jit.export + def forward( + self, + input, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + ): + """ + Args: + incremental_state: Used to buffer signal; if not None, then input is + expected to contain a single frame. If the input order changes + between time steps, call reorder_incremental_state. + Input: + Time x Batch x Channel during training + Batch x Time x Channel during inference + """ + if incremental_state is None: + output = self.conv_tbc(input) + if self.kernel_size[0] > 1 and self.padding[0] > 0: + # remove future timesteps added by padding + output = output[: -self.padding[0], :, :] + return output + + # reshape weight + weight = self._get_linearized_weight() + kw = self.kernel_size[0] + + bsz = input.size(0) # input: bsz x len x dim + if kw > 1: + input = input.data + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is None: + input_buffer = input.new(bsz, kw, input.size(2)).zero_() + self._set_input_buffer(incremental_state, input_buffer) + else: + # shift buffer + input_buffer[:, :-1, :] = input_buffer[:, 1:, :].clone() + # append next input + input_buffer[:, -1, :] = input[:, -1, :] + input = input_buffer + with torch.no_grad(): + output = F.linear(input.view(bsz, -1), weight, self.bias) + return output.view(bsz, 1, -1) + + @torch.jit.unused + def reorder_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + input_buffer = input_buffer.index_select(0, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + @torch.jit.unused + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ): + return utils.get_incremental_state(self, incremental_state, "input_buffer") + + @torch.jit.unused + def _set_input_buffer( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + new_buffer, + ): + return utils.set_incremental_state( + self, incremental_state, "input_buffer", new_buffer + ) + + @torch.jit.unused + def _get_linearized_weight(self): + if self._linearized_weight is None: + kw = self.kernel_size[0] + weight = self.weight.transpose(2, 1).transpose(1, 0).contiguous() + assert weight.size() == (self.out_channels, kw, self.in_channels) + return weight.view(self.out_channels, -1) + return self._linearized_weight + + @torch.jit.unused + def _clear_linearized_weight(self, *args): + self._linearized_weight = None diff --git a/fairseq/fairseq/modules/location_attention.py b/fairseq/fairseq/modules/location_attention.py new file mode 100644 index 0000000..dbbbfb9 --- /dev/null +++ b/fairseq/fairseq/modules/location_attention.py @@ -0,0 +1,83 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import torch +import torch.nn.functional as F + + +class LocationAttention(nn.Module): + """ + Attention-Based Models for Speech Recognition + https://arxiv.org/pdf/1506.07503.pdf + + :param int encoder_dim: # projection-units of encoder + :param int decoder_dim: # units of decoder + :param int attn_dim: attention dimension + :param int conv_dim: # channels of attention convolution + :param int conv_kernel_size: filter size of attention convolution + """ + + def __init__( + self, + attn_dim, + encoder_dim, + decoder_dim, + attn_state_kernel_size, + conv_dim, + conv_kernel_size, + scaling=2.0, + ): + super(LocationAttention, self).__init__() + self.attn_dim = attn_dim + self.decoder_dim = decoder_dim + self.scaling = scaling + self.proj_enc = nn.Linear(encoder_dim, attn_dim) + self.proj_dec = nn.Linear(decoder_dim, attn_dim, bias=False) + self.proj_attn = nn.Linear(conv_dim, attn_dim, bias=False) + self.conv = nn.Conv1d( + attn_state_kernel_size, + conv_dim, + 2 * conv_kernel_size + 1, + padding=conv_kernel_size, + bias=False, + ) + self.proj_out = nn.Sequential(nn.Tanh(), nn.Linear(attn_dim, 1)) + + self.proj_enc_out = None # cache + + def clear_cache(self): + self.proj_enc_out = None + + def forward(self, encoder_out, encoder_padding_mask, decoder_h, attn_state): + """ + :param torch.Tensor encoder_out: padded encoder hidden state B x T x D + :param torch.Tensor encoder_padding_mask: encoder padding mask + :param torch.Tensor decoder_h: decoder hidden state B x D + :param torch.Tensor attn_prev: previous attention weight B x K x T + :return: attention weighted encoder state (B, D) + :rtype: torch.Tensor + :return: previous attention weights (B x T) + :rtype: torch.Tensor + """ + bsz, seq_len, _ = encoder_out.size() + if self.proj_enc_out is None: + self.proj_enc_out = self.proj_enc(encoder_out) + + # B x K x T -> B x C x T + attn = self.conv(attn_state) + # B x C x T -> B x T x C -> B x T x D + attn = self.proj_attn(attn.transpose(1, 2)) + + if decoder_h is None: + decoder_h = encoder_out.new_zeros(bsz, self.decoder_dim) + dec_h = self.proj_dec(decoder_h).view(bsz, 1, self.attn_dim) + + out = self.proj_out(attn + self.proj_enc_out + dec_h).squeeze(2) + out.masked_fill_(encoder_padding_mask, -float("inf")) + + w = F.softmax(self.scaling * out, dim=1) + c = torch.sum(encoder_out * w.view(bsz, seq_len, 1), dim=1) + return c, w diff --git a/fairseq/fairseq/modules/lstm_cell_with_zoneout.py b/fairseq/fairseq/modules/lstm_cell_with_zoneout.py new file mode 100644 index 0000000..2733089 --- /dev/null +++ b/fairseq/fairseq/modules/lstm_cell_with_zoneout.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + + +class LSTMCellWithZoneOut(nn.Module): + """ + Zoneout: Regularizing RNNs by Randomly Preserving Hidden Activations + https://arxiv.org/abs/1606.01305 + """ + + def __init__( + self, prob: float, input_size: int, hidden_size: int, bias: bool = True + ): + super(LSTMCellWithZoneOut, self).__init__() + self.lstm_cell = nn.LSTMCell(input_size, hidden_size, bias=bias) + self.prob = prob + if prob > 1.0 or prob < 0.0: + raise ValueError( + "zoneout probability must be in the range from " "0.0 to 1.0." + ) + + def zoneout(self, h, next_h, prob): + if isinstance(h, tuple): + return tuple([self.zoneout(h[i], next_h[i], prob) for i in range(len(h))]) + + if self.training: + mask = h.new_zeros(*h.size()).bernoulli_(prob) + return mask * h + (1 - mask) * next_h + + return prob * h + (1 - prob) * next_h + + def forward(self, x, h): + return self.zoneout(h, self.lstm_cell(x, h), self.prob) diff --git a/fairseq/fairseq/modules/multihead_attention.py b/fairseq/fairseq/modules/multihead_attention.py new file mode 100644 index 0000000..262132d --- /dev/null +++ b/fairseq/fairseq/modules/multihead_attention.py @@ -0,0 +1,910 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn.functional as F +from torch import Tensor, nn +from torch.nn import Parameter + +try: + from xformers.components.attention import build_attention + from xformers.components.attention.utils import maybe_merge_masks + + _xformers_available = True +except ImportError: + _xformers_available = False + +from fairseq import utils +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from fairseq.models.fairseq_incremental_decoder import FairseqIncrementalDecoder + + +# TODO: move this into xformers? +# TODO: uint8 input type should just output a bool +def _mask_for_xformers(mask: Tensor, to_dtype: Optional[torch.dtype] = None): + """ + call to pytorch multihead accepts three mask types: + - ByteTensor where non-zero means to mask + - FloatTensor which is an additive mask + - BoolTensor where True means to mask + xFormers currently accepts boolean and additive maks. For boolean masks + the values have opposite meaning. For a BoolTensor True mean to keep the value. + """ + float_types = [torch.float, torch.float16] + # If an input mask is a float it is an additive mask. Otherwise it is either uint8 or bool. + additive = mask.dtype in float_types + # If to_dype is not specified, keep same dtype as mask. + to_dtype = mask.dtype if to_dtype is None else to_dtype + to_additive = to_dtype in float_types + + if additive: + if to_additive: + return mask.to(to_dtype) + mask = mask < 0 + + if to_additive: + # return additive mask + new_mask = torch.zeros_like(mask, dtype=to_dtype) + new_mask = new_mask.masked_fill_(mask, -float("inf")) + return new_mask + + # In xFormers True is value to keep rather than value to mask + mask = ~mask.to(torch.bool) + mask = mask.to(to_dtype) + return mask + + +class MultiheadAttention(FairseqIncrementalDecoder): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + dictionary=None, + q_noise=0.0, + qn_block_size=8, + # TODO: pass in config rather than string. + # config defined in xformers.components.attention.AttentionConfig + xformers_att_config: Optional[str] = None, + xformers_blocksparse_layout: Optional[ + torch.Tensor + ] = None, # This should be part of the config + xformers_blocksparse_blocksize: Optional[ + int + ] = 16, # This should be part of the config + ): + super().__init__(dictionary) + + xformers_att_config = utils.eval_str_dict(xformers_att_config) + self.use_xformers = xformers_att_config is not None + if self.use_xformers and not _xformers_available: + raise ImportError("\n\n Please install xFormers.") + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim**-0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, ( + "Self-attention requires query, key and " "value to be of the same size" + ) + + self.k_proj = quant_noise( + nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.v_proj = quant_noise( + nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.q_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + self.out_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + self.beam_size = 1 + self.reset_parameters() + + if self.use_xformers: + xformers_att_config["dropout"] = xformers_att_config.get("dropout", dropout) + xformers_att_config["num_heads"] = xformers_att_config.get( + "num_heads", num_heads + ) + + if xformers_blocksparse_layout is not None: + # Could be part of a single config passed only once + xformers_att_config["block_size"] = xformers_blocksparse_blocksize + xformers_att_config["layout"] = xformers_blocksparse_layout + xformers_att_config["name"] = "blocksparse" + + self.attention = build_attention(xformers_att_config) + + self.onnx_trace = False + self.skip_embed_dim_check = False + self.init_incremental_state() + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def reset_parameters(self): + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2)) + else: + nn.init.xavier_uniform_(self.k_proj.weight) + nn.init.xavier_uniform_(self.v_proj.weight) + nn.init.xavier_uniform_(self.q_proj.weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.out_proj.bias is not None: + nn.init.constant_(self.out_proj.bias, 0.0) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def _get_reserve_head_index(self, num_heads_to_keep: int): + k_proj_heads_norm = [] + q_proj_heads_norm = [] + v_proj_heads_norm = [] + + for i in range(self.num_heads): + start_idx = i * self.head_dim + end_idx = (i + 1) * self.head_dim + k_proj_heads_norm.append( + torch.sum( + torch.abs( + self.k_proj.weight[ + start_idx:end_idx, + ] + ) + ).tolist() + + torch.sum(torch.abs(self.k_proj.bias[start_idx:end_idx])).tolist() + ) + q_proj_heads_norm.append( + torch.sum( + torch.abs( + self.q_proj.weight[ + start_idx:end_idx, + ] + ) + ).tolist() + + torch.sum(torch.abs(self.q_proj.bias[start_idx:end_idx])).tolist() + ) + v_proj_heads_norm.append( + torch.sum( + torch.abs( + self.v_proj.weight[ + start_idx:end_idx, + ] + ) + ).tolist() + + torch.sum(torch.abs(self.v_proj.bias[start_idx:end_idx])).tolist() + ) + + heads_norm = [] + for i in range(self.num_heads): + heads_norm.append( + k_proj_heads_norm[i] + q_proj_heads_norm[i] + v_proj_heads_norm[i] + ) + + sorted_head_index = sorted( + range(self.num_heads), key=lambda k: heads_norm[k], reverse=True + ) + reserve_head_index = [] + for i in range(num_heads_to_keep): + start = sorted_head_index[i] * self.head_dim + end = (sorted_head_index[i] + 1) * self.head_dim + reserve_head_index.append((start, end)) + return reserve_head_index + + def _adaptive_prune_heads(self, reserve_head_index: List[Tuple[int, int]]): + new_q_weight = [] + new_q_bias = [] + new_k_weight = [] + new_k_bias = [] + new_v_weight = [] + new_v_bias = [] + new_out_proj_weight = [] + + for ele in reserve_head_index: + start_idx, end_idx = ele + new_q_weight.append( + self.q_proj.weight[ + start_idx:end_idx, + ] + ) + new_q_bias.append(self.q_proj.bias[start_idx:end_idx]) + + new_k_weight.append( + self.k_proj.weight[ + start_idx:end_idx, + ] + ) + + new_k_bias.append(self.k_proj.bias[start_idx:end_idx]) + + new_v_weight.append( + self.v_proj.weight[ + start_idx:end_idx, + ] + ) + new_v_bias.append(self.v_proj.bias[start_idx:end_idx]) + + new_out_proj_weight.append(self.out_proj.weight[:, start_idx:end_idx]) + + new_q_weight = torch.cat(new_q_weight).detach() + new_k_weight = torch.cat(new_k_weight).detach() + new_v_weight = torch.cat(new_v_weight).detach() + new_out_proj_weight = torch.cat(new_out_proj_weight, dim=-1).detach() + new_q_weight.requires_grad = True + new_k_weight.requires_grad = True + new_v_weight.requires_grad = True + new_out_proj_weight.requires_grad = True + + new_q_bias = torch.cat(new_q_bias).detach() + new_q_bias.requires_grad = True + + new_k_bias = torch.cat(new_k_bias).detach() + new_k_bias.requires_grad = True + + new_v_bias = torch.cat(new_v_bias).detach() + new_v_bias.requires_grad = True + + self.q_proj.weight = torch.nn.Parameter(new_q_weight) + self.q_proj.bias = torch.nn.Parameter(new_q_bias) + + self.k_proj.weight = torch.nn.Parameter(new_k_weight) + self.k_proj.bias = torch.nn.Parameter(new_k_bias) + + self.v_proj.weight = torch.nn.Parameter(new_v_weight) + self.v_proj.bias = torch.nn.Parameter(new_v_bias) + + self.out_proj.weight = torch.nn.Parameter(new_out_proj_weight) + + self.num_heads = len(reserve_head_index) + self.embed_dim = self.head_dim * self.num_heads + self.q_proj.out_features = self.embed_dim + self.k_proj.out_features = self.embed_dim + self.v_proj.out_features = self.embed_dim + + def _set_skip_embed_dim_check(self): + self.skip_embed_dim_check = True + + def _pad_masks( + self, + key_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor], + ) -> Tuple[Optional[Tensor], Optional[Tensor]]: + if attn_mask is not None: + shape = attn_mask.size()[:-1] + torch.Size([1]) + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(shape)], dim=-1) + if key_padding_mask is not None: + shape = key_padding_mask.size()[:-1] + torch.Size([1]) + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(shape), + ], + dim=-1, + ) + return key_padding_mask, attn_mask + + def _add_bias( + self, + k: Tensor, + v: Tensor, + key_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor], + bsz: int, + ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: + assert self.bias_k is not None + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + key_padding_mask, attn_mask = self._pad_masks( + key_padding_mask=key_padding_mask, attn_mask=attn_mask + ) + return k, v, key_padding_mask, attn_mask + + def _append_zero_attn( + self, + k: Tensor, + v: Tensor, + key_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor], + ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: + zero_attn_shape = k.size()[:-2] + torch.Size([1]) + k.size()[-1:] + k = torch.cat( + [k, torch.zeros(zero_attn_shape, dtype=k.dtype, device=k.device)], dim=-2 + ) + v = torch.cat( + [v, torch.zeros(zero_attn_shape, dtype=v.dtype, device=v.device)], dim=-2 + ) + key_padding_mask, attn_mask = self._pad_masks( + key_padding_mask=key_padding_mask, attn_mask=attn_mask + ) + return k, v, key_padding_mask, attn_mask + + def _xformers_attn_forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + need_weights: bool = True, + attn_mask: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + + tgt_len, bsz, embed_dim = query.size() + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == tgt_len + + if self.self_attention: + key = query + value = query + elif self.encoder_decoder_attention: + value = key + + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + + if self.bias_k is not None: + assert self.bias_v is not None + k, v, attn_mask, key_padding_mask = self._add_bias( + k, v, attn_mask, key_padding_mask, bsz + ) + + def fold_heads(x): + return ( + x.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + def split_heads(x): + return ( + x.contiguous() + .view(-1, bsz, self.num_heads, self.head_dim) + .transpose(0, 1) + .transpose(1, 2) + ) + + massage = split_heads if self.attention.requires_head_dimension else fold_heads + q = massage(q) + if k is not None: + k = massage(k) + if v is not None: + v = massage(v) + + if self.add_zero_attn: + k, v, key_padding_mask, attn_mask = self._append_zero_attn( + k=k, v=v, key_padding_mask=key_padding_mask, attn_mask=attn_mask + ) + + kwargs = {} + + if attn_mask is not None and self.attention.supports_attention_mask: + attn_mask = _mask_for_xformers(attn_mask, to_dtype=q.dtype) + kwargs["att_mask"] = attn_mask + + if key_padding_mask is not None: + key_padding_mask = _mask_for_xformers(key_padding_mask, to_dtype=torch.bool) + if not self.attention.requires_separate_masks: + attn_mask = maybe_merge_masks( + attn_mask, + key_padding_mask, + batch_size=bsz, + src_len=k.size(-2), + tgt_len=q.size(-2), + num_heads=self.num_heads, + ) + key_padding_mask = None + kwargs["att_mask"] = attn_mask + if self.attention.supports_key_padding_mask: + kwargs["key_padding_mask"] = key_padding_mask + + y = self.attention(q, k, v, **kwargs) + + y = ( + y.view(bsz, self.num_heads, tgt_len, self.head_dim) + .transpose(1, 2) + .flatten(start_dim=2, end_dim=3) + .transpose(0, 1) + ) + assert list(y.size()) == [tgt_len, bsz, embed_dim] + + # Dropout not needed because already applied in attention. + # It is applied to the attention weights before matmul with v. + y = self.out_proj(y) + + # TODO: support returning attention weights if needed. + return y, None + + def forward( + self, + query: Tensor, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + if not self.skip_embed_dim_check: + assert ( + embed_dim == self.embed_dim + ), f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert value is not None + assert src_len, key_bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + # The Multihead attention implemented in pytorch forces strong dimension check + # for input embedding dimention and K,Q,V projection dimension. + # Since pruning will break the dimension check and it is not easy to modify the pytorch API, + # it is preferred to bypass the pytorch MHA when we need to skip embed_dim_check + and not self.skip_embed_dim_check + ): + assert key is not None and value is not None + + if self.use_xformers: + return self._xformers_attn_forward( + query, key, value, key_padding_mask, need_weights, attn_mask + ) + + else: + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask.bool() if key_padding_mask is not None else None, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + if self.beam_size > 1 and bsz == key.size(1): + # key is [T, bsz*beam_size, C], reduce to [T, bsz, C] + key = key.view(key.size(0), -1, self.beam_size, key.size(2))[ + :, :, 0, : + ] + if key_padding_mask is not None: + key_padding_mask = key_padding_mask.view( + -1, self.beam_size, key_padding_mask.size(1) + )[:, 0, :] + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k, v, attn_mask, key_padding_mask = self._add_bias( + k, v, attn_mask, key_padding_mask, bsz + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + kv_bsz = bsz # need default value for scripting + if k is not None: + kv_bsz = k.size(1) + k = ( + k.contiguous() + .view(-1, kv_bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, kv_bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + kv_bsz = _prev_key.size(0) + prev_key = _prev_key.view(kv_bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + assert kv_bsz == _prev_value.size(0) + prev_value = _prev_value.view( + kv_bsz * self.num_heads, -1, self.head_dim + ) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=kv_bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(kv_bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view( + kv_bsz, self.num_heads, -1, self.head_dim + ) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == kv_bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k, v, key_padding_mask, attn_mask = self._append_zero_attn( + k=k, v=v, key_padding_mask=key_padding_mask, attn_mask=attn_mask + ) + + if self.encoder_decoder_attention and bsz != kv_bsz: + attn_weights = torch.einsum( + "bxhtd,bhsd->bxhts", + q.view((kv_bsz, -1, self.num_heads) + q.size()[1:]), + k.view((kv_bsz, self.num_heads) + k.size()[1:]), + ) + attn_weights = attn_weights.reshape((-1,) + attn_weights.size()[-2:]) + else: + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.view( + kv_bsz, -1, self.num_heads, tgt_len, src_len + ) + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1) + .unsqueeze(2) + .unsqueeze(3) + .to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn: Optional[Tensor] = None + if self.encoder_decoder_attention and bsz != kv_bsz: + attn = torch.einsum( + "bxhts,bhsd->bxhtd", + attn_probs.view( + ( + kv_bsz, + -1, + self.num_heads, + ) + + attn_probs.size()[1:] + ), + v.view( + ( + kv_bsz, + self.num_heads, + ) + + v.size()[1:] + ), + ) + attn = attn.reshape((-1,) + attn.size()[-2:]) + else: + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, self.embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1 + ) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + if src_len > prev_key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - prev_key_padding_mask.size(1)), + device=prev_key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), filler.float()], dim=1 + ) + else: + new_key_padding_mask = prev_key_padding_mask.float() + elif key_padding_mask is not None: + if src_len > key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - key_padding_mask.size(1)), + device=key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [filler.float(), key_padding_mask.float()], dim=1 + ) + else: + new_key_padding_mask = key_padding_mask.float() + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + new_order: Tensor, + ): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer_k = input_buffer[k] + if input_buffer_k is not None: + if self.encoder_decoder_attention: + if input_buffer_k.size(0) * self.beam_size == new_order.size(0): + return incremental_state + elif self.beam_size > 1: + input_buffer[k] = input_buffer_k.index_select( + 0, + new_order.reshape(-1, self.beam_size)[:, 0] + // self.beam_size, + ) + else: + input_buffer[k] = input_buffer_k.index_select(0, new_order) + else: + input_buffer[k] = input_buffer_k.index_select(0, new_order) + incremental_state = self._set_input_buffer(incremental_state, input_buffer) + return incremental_state + + def set_beam_size(self, beam_size): + """Used for effiecient beamable enc-dec attention""" + self.beam_size = beam_size + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) + + def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int): + return attn_weights + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + items_to_add = {} + keys_to_remove = [] + for k in state_dict.keys(): + if k.endswith(prefix + "in_proj_weight"): + # in_proj_weight used to be q + k + v with same dimensions + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim] + items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim] + items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :] + + keys_to_remove.append(k) + + k_bias = prefix + "in_proj_bias" + if k_bias in state_dict.keys(): + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim] + items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][ + dim : 2 * dim + ] + items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :] + + keys_to_remove.append(prefix + "in_proj_bias") + + for k in keys_to_remove: + del state_dict[k] + + for key, value in items_to_add.items(): + state_dict[key] = value diff --git a/fairseq/fairseq/modules/positional_embedding.py b/fairseq/fairseq/modules/positional_embedding.py new file mode 100644 index 0000000..fbc13d8 --- /dev/null +++ b/fairseq/fairseq/modules/positional_embedding.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + +from .learned_positional_embedding import LearnedPositionalEmbedding +from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding + + +def PositionalEmbedding( + num_embeddings: int, + embedding_dim: int, + padding_idx: int, + learned: bool = False, + auto_expand: bool = True, +): + if learned: + # if padding_idx is specified then offset the embedding ids by + # this index and adjust num_embeddings appropriately + # TODO: The right place for this offset would be inside + # LearnedPositionalEmbedding. Move this there for a cleaner implementation. + if padding_idx is not None: + num_embeddings = num_embeddings + padding_idx + 1 + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + else: + m = SinusoidalPositionalEmbedding( + embedding_dim, + padding_idx, + init_size=num_embeddings + padding_idx + 1, + auto_expand=auto_expand, + ) + return m diff --git a/fairseq/fairseq/modules/positional_encoding.py b/fairseq/fairseq/modules/positional_encoding.py new file mode 100644 index 0000000..67f6353 --- /dev/null +++ b/fairseq/fairseq/modules/positional_encoding.py @@ -0,0 +1,129 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import math +import torch + + +class PositionalEncoding(nn.Module): + """Positional encoding. + + Args: + d_model: Embedding dimension. + dropout_rate: Dropout rate. + max_len: Maximum input length. + reverse: Whether to reverse the input position. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False): + """Construct an PositionalEncoding object.""" + super(PositionalEncoding, self).__init__() + self.d_model = d_model + self.reverse = reverse + self.xscale = math.sqrt(self.d_model) + self.dropout = nn.Dropout(p=dropout_rate) + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + + def extend_pe(self, x): + """Reset the positional encodings.""" + if self.pe is not None: + if self.pe.size(1) >= x.size(1): + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + pe = torch.zeros(x.size(1), self.d_model) + if self.reverse: + position = torch.arange( + x.size(1) - 1, -1, -1.0, dtype=torch.float32 + ).unsqueeze(1) + else: + position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1) + div_term = torch.exp( + torch.arange(0, self.d_model, 2, dtype=torch.float32) + * -(math.log(10000.0) / self.d_model) + ) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0) + self.pe = pe.to(device=x.device, dtype=x.dtype) + + def forward(self, x: torch.Tensor): + """Add positional encoding. + Args: + x (torch.Tensor): Input tensor B X T X C + Returns: + torch.Tensor: Encoded tensor B X T X C + """ + self.extend_pe(x) + x = x * self.xscale + self.pe[:, : x.size(1)] + return self.dropout(x) + + +class RelPositionalEncoding(nn.Module): + """Relative positional encoding module (new implementation). + + Args: + d_model: Embedding dimension. + dropout_rate: Dropout rate. + max_len: Maximum input length. + """ + + def __init__(self, max_len, d_model): + """Construct an PositionalEncoding object.""" + super(RelPositionalEncoding, self).__init__() + self.d_model = d_model + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + + def extend_pe(self, x): + """Reset the positional encodings.""" + if self.pe is not None: + # self.pe contains both positive and negative parts + # the length of self.pe is 2 * input_len - 1 + if self.pe.size(1) >= x.size(1) * 2 - 1: + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + # Suppose `i` means to the position of query vecotr and `j` means the + # position of key vector. We use position relative positions when keys + # are to the left (i>j) and negative relative positions otherwise (i<j). + pe_positive = torch.zeros(x.size(1), self.d_model) + pe_negative = torch.zeros(x.size(1), self.d_model) + position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1) + div_term = torch.exp( + torch.arange(0, self.d_model, 2, dtype=torch.float32) + * -(math.log(10000.0) / self.d_model) + ) + pe_positive[:, 0::2] = torch.sin(position * div_term) + pe_positive[:, 1::2] = torch.cos(position * div_term) + pe_negative[:, 0::2] = torch.sin(-1 * position * div_term) + pe_negative[:, 1::2] = torch.cos(-1 * position * div_term) + + # Reserve the order of positive indices and concat both positive and + # negative indices. This is used to support the shifting trick + # as in https://arxiv.org/abs/1901.02860 + pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0) + pe_negative = pe_negative[1:].unsqueeze(0) + pe = torch.cat([pe_positive, pe_negative], dim=1) + self.pe = pe.to(device=x.device, dtype=x.dtype) + + def forward(self, x: torch.Tensor): + """Add positional encoding. + Args: + x : Input tensor T X B X C. + Returns: + torch.Tensor: Encoded tensor T X B X C. + + """ + x = x.transpose(0, 1) # Change TBC to BTC + self.extend_pe(x) + pos_emb = self.pe[ + :, + self.pe.size(1) // 2 - x.size(1) + 1 : self.pe.size(1) // 2 + x.size(1), + ] + pos_emb = pos_emb.transpose(0, 1) # change to TBC + return pos_emb diff --git a/fairseq/fairseq/modules/quant_noise.py b/fairseq/fairseq/modules/quant_noise.py new file mode 100644 index 0000000..d777dfb --- /dev/null +++ b/fairseq/fairseq/modules/quant_noise.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn + + +def quant_noise(module, p, block_size): + """ + Wraps modules and applies quantization noise to the weights for + subsequent quantization with Iterative Product Quantization as + described in "Training with Quantization Noise for Extreme Model Compression" + + Args: + - module: nn.Module + - p: amount of Quantization Noise + - block_size: size of the blocks for subsequent quantization with iPQ + + Remarks: + - Module weights must have the right sizes wrt the block size + - Only Linear, Embedding and Conv2d modules are supported for the moment + - For more detail on how to quantize by blocks with convolutional weights, + see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks" + - We implement the simplest form of noise here as stated in the paper + which consists in randomly dropping blocks + """ + + # if no quantization noise, don't register hook + if p <= 0: + return module + + # supported modules + assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d)) + + # test whether module.weight has the right sizes wrt block_size + is_conv = module.weight.ndim == 4 + + # 2D matrix + if not is_conv: + assert ( + module.weight.size(1) % block_size == 0 + ), "Input features must be a multiple of block sizes" + + # 4D matrix + else: + # 1x1 convolutions + if module.kernel_size == (1, 1): + assert ( + module.in_channels % block_size == 0 + ), "Input channels must be a multiple of block sizes" + # regular convolutions + else: + k = module.kernel_size[0] * module.kernel_size[1] + assert k % block_size == 0, "Kernel size must be a multiple of block size" + + def _forward_pre_hook(mod, input): + # no noise for evaluation + if mod.training: + if not is_conv: + # gather weight and sizes + weight = mod.weight + in_features = weight.size(1) + out_features = weight.size(0) + + # split weight matrix into blocks and randomly drop selected blocks + mask = torch.zeros( + in_features // block_size * out_features, device=weight.device + ) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_features) + + else: + # gather weight and sizes + weight = mod.weight + in_channels = mod.in_channels + out_channels = mod.out_channels + + # split weight matrix into blocks and randomly drop selected blocks + if mod.kernel_size == (1, 1): + mask = torch.zeros( + int(in_channels // block_size * out_channels), + device=weight.device, + ) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_channels) + else: + mask = torch.zeros( + weight.size(0), weight.size(1), device=weight.device + ) + mask.bernoulli_(p) + mask = ( + mask.unsqueeze(2) + .unsqueeze(3) + .repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1]) + ) + + # scale weights and apply mask + mask = mask.to( + torch.bool + ) # x.bool() is not currently supported in TorchScript + s = 1 / (1 - p) + mod.weight.data = s * weight.masked_fill(mask, 0) + + module.register_forward_pre_hook(_forward_pre_hook) + return module diff --git a/fairseq/fairseq/modules/quantization/__init__.py b/fairseq/fairseq/modules/quantization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq/modules/quantization/pq/__init__.py b/fairseq/fairseq/modules/quantization/pq/__init__.py new file mode 100644 index 0000000..c142a80 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .utils import SizeTracker, get_param, attrsetter, quantize_model_ # NOQA diff --git a/fairseq/fairseq/modules/quantization/pq/em.py b/fairseq/fairseq/modules/quantization/pq/em.py new file mode 100644 index 0000000..6f15c3e --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/em.py @@ -0,0 +1,211 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import random +from collections import Counter + +import torch + + +class EM: + """ + EM algorithm used to quantize the columns of W to minimize + + ||W - W_hat||^2 + + Args: + - W: weight matrix of size (in_features x out_features) + - n_iter: number of k-means iterations + - n_centroids: number of centroids (size of codebook) + - eps: for cluster reassignment when an empty cluster is found + - max_tentatives for cluster reassignment when an empty cluster is found + - verbose: print error after each iteration + + Remarks: + - If one cluster is empty, the most populated cluster is split into + two clusters + - All the relevant dimensions are specified in the code + """ + + def __init__( + self, W, n_centroids=256, n_iter=20, eps=1e-6, max_tentatives=30, verbose=True + ): + self.W = W + self.n_centroids = n_centroids + self.n_iter = n_iter + self.eps = eps + self.max_tentatives = max_tentatives + self.verbose = verbose + self.centroids = torch.Tensor() + self.assignments = torch.Tensor() + self.objective = [] + + def initialize_centroids(self): + """ + Initializes the centroids by sampling random columns from W. + """ + + in_features, out_features = self.W.size() + indices = torch.randint( + low=0, high=out_features, size=(self.n_centroids,) + ).long() + self.centroids = self.W[:, indices].t() # (n_centroids x in_features) + + def step(self, i): + """ + There are two standard steps for each iteration: expectation (E) and + minimization (M). The E-step (assignment) is performed with an exhaustive + search and the M-step (centroid computation) is performed with + the exact solution. + + Args: + - i: step number + + Remarks: + - The E-step heavily uses PyTorch broadcasting to speed up computations + and reduce the memory overhead + """ + + # assignments (E-step) + distances = self.compute_distances() # (n_centroids x out_features) + self.assignments = torch.argmin(distances, dim=0) # (out_features) + n_empty_clusters = self.resolve_empty_clusters() + + # centroids (M-step) + for k in range(self.n_centroids): + W_k = self.W[:, self.assignments == k] # (in_features x size_of_cluster_k) + self.centroids[k] = W_k.mean(dim=1) # (in_features) + + # book-keeping + obj = (self.centroids[self.assignments].t() - self.W).norm(p=2).item() + self.objective.append(obj) + if self.verbose: + logging.info( + f"Iteration: {i},\t" + f"objective: {obj:.6f},\t" + f"resolved empty clusters: {n_empty_clusters}" + ) + + def resolve_empty_clusters(self): + """ + If one cluster is empty, the most populated cluster is split into + two clusters by shifting the respective centroids. This is done + iteratively for a fixed number of tentatives. + """ + + # empty clusters + counts = Counter(map(lambda x: x.item(), self.assignments)) + empty_clusters = set(range(self.n_centroids)) - set(counts.keys()) + n_empty_clusters = len(empty_clusters) + + tentatives = 0 + while len(empty_clusters) > 0: + # given an empty cluster, find most populated cluster and split it into two + k = random.choice(list(empty_clusters)) + m = counts.most_common(1)[0][0] + e = torch.randn_like(self.centroids[m]) * self.eps + self.centroids[k] = self.centroids[m].clone() + self.centroids[k] += e + self.centroids[m] -= e + + # recompute assignments + distances = self.compute_distances() # (n_centroids x out_features) + self.assignments = torch.argmin(distances, dim=0) # (out_features) + + # check for empty clusters + counts = Counter(map(lambda x: x.item(), self.assignments)) + empty_clusters = set(range(self.n_centroids)) - set(counts.keys()) + + # increment tentatives + if tentatives == self.max_tentatives: + logging.info( + f"Could not resolve all empty clusters, {len(empty_clusters)} remaining" + ) + raise EmptyClusterResolveError + tentatives += 1 + + return n_empty_clusters + + def compute_distances(self): + """ + For every centroid m, computes + + ||M - m[None, :]||_2 + + Remarks: + - We rely on PyTorch's broadcasting to speed up computations + and reduce the memory overhead + - Without chunking, the sizes in the broadcasting are modified as: + (n_centroids x n_samples x out_features) -> (n_centroids x out_features) + - The broadcasting computation is automatically chunked so that + the tensors fit into the memory of the GPU + """ + + nb_centroids_chunks = 1 + + while True: + try: + return torch.cat( + [ + (self.W[None, :, :] - centroids_c[:, :, None]).norm(p=2, dim=1) + for centroids_c in self.centroids.chunk( + nb_centroids_chunks, dim=0 + ) + ], + dim=0, + ) + except RuntimeError: + nb_centroids_chunks *= 2 + + def assign(self): + """ + Assigns each column of W to its closest centroid, thus essentially + performing the E-step in train(). + + Remarks: + - The function must be called after train() or after loading + centroids using self.load(), otherwise it will return empty tensors + """ + + distances = self.compute_distances() # (n_centroids x out_features) + self.assignments = torch.argmin(distances, dim=0) # (out_features) + + def save(self, path, layer): + """ + Saves centroids and assignments. + + Args: + - path: folder used to save centroids and assignments + """ + + torch.save(self.centroids, os.path.join(path, "{}_centroids.pth".format(layer))) + torch.save( + self.assignments, os.path.join(path, "{}_assignments.pth".format(layer)) + ) + torch.save(self.objective, os.path.join(path, "{}_objective.pth".format(layer))) + + def load(self, path, layer): + """ + Loads centroids and assignments from a given path + + Args: + - path: folder use to load centroids and assignments + """ + + self.centroids = torch.load( + os.path.join(path, "{}_centroids.pth".format(layer)) + ) + self.assignments = torch.load( + os.path.join(path, "{}_assignments.pth".format(layer)) + ) + self.objective = torch.load( + os.path.join(path, "{}_objective.pth".format(layer)) + ) + + +class EmptyClusterResolveError(Exception): + pass diff --git a/fairseq/fairseq/modules/quantization/pq/modules/__init__.py b/fairseq/fairseq/modules/quantization/pq/modules/__init__.py new file mode 100644 index 0000000..b67c8e8 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/modules/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .qconv import PQConv2d # NOQA +from .qemb import PQEmbedding # NOQA +from .qlinear import PQLinear # NOQA diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qconv.py b/fairseq/fairseq/modules/quantization/pq/modules/qconv.py new file mode 100644 index 0000000..d15ec19 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/modules/qconv.py @@ -0,0 +1,115 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair + + +class PQConv2d(nn.Module): + """ + Quantized counterpart of nn.Conv2d module. Stores the centroid, the assignments + and the non-quantized biases. The full weight is re-instantiated at each forward + pass and autograd automatically computes the gradients with respect to the + centroids. + + Args: + - centroids: centroids of size n_centroids x block_size + - assignments: assignments of the centroids to the subvectors + of size self.out_channels x n_blocks + - bias: the non-quantized bias, must be either torch.Tensor or None + + Remarks: + - We refer the reader to the official documentation of the nn.Conv2d module + for the other arguments and the behavior of the module. + - Performance tests on GPU show that this implementation is 10% slower than + the non-quantized nn.Conv2d module for a standard training loop. + - During the backward, the gradients are averaged by cluster and not summed. + This explains the hook registered to the centroids. + """ + + def __init__( + self, + centroids, + assignments, + bias, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode="zeros", + ): + super(PQConv2d, self).__init__() + self.block_size = centroids.size(1) + self.n_centroids = centroids.size(0) + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.padding_mode = padding_mode + # check compatibility + if in_channels // groups * np.prod(self.kernel_size) % self.block_size != 0: + raise ValueError("Wrong PQ sizes") + if len(assignments) % out_channels != 0: + raise ValueError("Wrong PQ sizes") + if in_channels % groups != 0: + raise ValueError("in_channels must be divisible by groups") + if out_channels % groups != 0: + raise ValueError("out_channels must be divisible by groups") + # define parameters + self.centroids = nn.Parameter(centroids, requires_grad=True) + self.register_buffer("assignments", assignments) + self.register_buffer("counts", torch.bincount(assignments).type_as(centroids)) + if bias is not None: + self.bias = nn.Parameter(bias) + else: + self.register_parameter("bias", None) + # register hook for averaging gradients per centroids instead of summing + self.centroids.register_hook(lambda x: x / self.counts[:, None]) + + @property + def weight(self): + return ( + self.centroids[self.assignments] + .reshape(-1, self.out_channels, self.block_size) + .permute(1, 0, 2) + .reshape( + self.out_channels, self.in_channels // self.groups, *self.kernel_size + ) + ) + + def forward(self, x): + return F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + def extra_repr(self): + s = "{in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}" + if self.padding != (0,) * len(self.padding): + s += ", padding={padding}" + if self.dilation != (1,) * len(self.dilation): + s += ", dilation={dilation}" + if self.groups != 1: + s += ", groups={groups}" + if self.bias is None: + s += ", bias=False" + if self.padding_mode != "zeros": + s += ", padding_mode={padding_mode}" + s += ", n_centroids={n_centroids}, block_size={block_size}" + return s.format(**self.__dict__) diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qemb.py b/fairseq/fairseq/modules/quantization/pq/modules/qemb.py new file mode 100644 index 0000000..3a74ad3 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/modules/qemb.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PQEmbedding(nn.Module): + """ + Quantized counterpart of nn.Embedding module. Stores the centroids and + the assignments. The full weight is re-instantiated at each forward + pass. + + Args: + - centroids: centroids of size n_centroids x block_size + - assignments: assignments of the centroids to the subvectors + of size self.out_features x n_blocks + - bias: the non-quantized bias + + Remarks: + - We refer the reader to the official documentation of the nn.Embedding module + for the other arguments and the behavior of the module + - Performance tests on GPU show that this implementation is 10% slower than + the non-quantized nn.Embedding module for a standard training loop. + """ + + def __init__( + self, + centroids, + assignments, + num_embeddings, + embedding_dim, + padding_idx=None, + max_norm=None, + norm_type=2.0, + scale_grad_by_freq=False, + sparse=False, + _weight=None, + ): + super(PQEmbedding, self).__init__() + self.block_size = centroids.size(1) + self.n_centroids = centroids.size(0) + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + if padding_idx is not None: + if padding_idx > 0: + assert ( + padding_idx < self.num_embeddings + ), "Padding_idx must be within num_embeddings" + elif padding_idx < 0: + assert ( + padding_idx >= -self.num_embeddings + ), "Padding_idx must be within num_embeddings" + padding_idx = self.num_embeddings + padding_idx + self.padding_idx = padding_idx + self.max_norm = max_norm + self.norm_type = norm_type + self.scale_grad_by_freq = scale_grad_by_freq + self.sparse = sparse + # check compatibility + if self.embedding_dim % self.block_size != 0: + raise ValueError("Wrong PQ sizes") + if len(assignments) % self.num_embeddings != 0: + raise ValueError("Wrong PQ sizes") + # define parameters + self.centroids = nn.Parameter(centroids, requires_grad=True) + self.register_buffer("assignments", assignments) + self.register_buffer("counts", torch.bincount(assignments).type_as(centroids)) + + @property + def weight(self): + return ( + self.centroids[self.assignments] + .reshape(-1, self.num_embeddings, self.block_size) + .permute(1, 0, 2) + .flatten(1, 2) + ) + + def forward(self, input): + return F.embedding( + input, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) + + def extra_repr(self): + s = "{num_embeddings}, {embedding_dim}" + if self.padding_idx is not None: + s += ", padding_idx={padding_idx}" + if self.max_norm is not None: + s += ", max_norm={max_norm}" + if self.norm_type != 2: + s += ", norm_type={norm_type}" + if self.scale_grad_by_freq is not False: + s += ", scale_grad_by_freq={scale_grad_by_freq}" + if self.sparse is not False: + s += ", sparse=True" + s += ", n_centroids={n_centroids}, block_size={block_size}" + + return s.format(**self.__dict__) diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py b/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py new file mode 100644 index 0000000..9bdd25a --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py @@ -0,0 +1,71 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PQLinear(nn.Module): + """ + Quantized counterpart of nn.Linear module. Stores the centroid, the assignments + and the non-quantized biases. The full weight is re-instantiated at each forward + pass. + + Args: + - centroids: centroids of size n_centroids x block_size + - assignments: assignments of the centroids to the subvectors + of size self.out_features x n_blocks + - bias: the non-quantized bias + + Remarks: + - We refer the reader to the official documentation of the nn.Linear module + for the other arguments and the behavior of the module + - Performance tests on GPU show that this implementation is 15% slower than + the non-quantized nn.Linear module for a standard training loop. + """ + + def __init__(self, centroids, assignments, bias, in_features, out_features): + super(PQLinear, self).__init__() + self.block_size = centroids.size(1) + self.n_centroids = centroids.size(0) + self.in_features = in_features + self.out_features = out_features + # check compatibility + if self.in_features % self.block_size != 0: + raise ValueError("Wrong PQ sizes") + if len(assignments) % self.out_features != 0: + raise ValueError("Wrong PQ sizes") + # define parameters + self.centroids = nn.Parameter(centroids, requires_grad=True) + self.register_buffer("assignments", assignments) + self.register_buffer("counts", torch.bincount(assignments).type_as(centroids)) + if bias is not None: + self.bias = nn.Parameter(bias) + else: + self.register_parameter("bias", None) + + @property + def weight(self): + return ( + self.centroids[self.assignments] + .reshape(-1, self.out_features, self.block_size) + .permute(1, 0, 2) + .flatten(1, 2) + ) + + def forward(self, x): + return F.linear( + x, + self.weight, + self.bias, + ) + + def extra_repr(self): + return f"in_features={self.in_features},\ + out_features={self.out_features},\ + n_centroids={self.n_centroids},\ + block_size={self.block_size},\ + bias={self.bias is not None}" diff --git a/fairseq/fairseq/modules/quantization/pq/pq.py b/fairseq/fairseq/modules/quantization/pq/pq.py new file mode 100644 index 0000000..eddc2eb --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/pq.py @@ -0,0 +1,128 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .em import EM, EmptyClusterResolveError + + +class PQ(EM): + """ + Quantizes the layer weights W with the standard Product Quantization + technique. This learns a codebook of codewords or centroids of size + block_size from W. For further reference on using PQ to quantize + neural networks, see "And the Bit Goes Down: Revisiting the Quantization + of Neural Networks", Stock et al., ICLR 2020. + + PQ is performed in two steps: + (1) The matrix W (weights or fully-connected or convolutional layer) + is reshaped to (block_size, -1). + - If W is fully-connected (2D), its columns are split into + blocks of size block_size. + - If W is convolutional (4D), its filters are split along the + spatial dimension. + (2) We apply the standard EM/k-means algorithm to the resulting reshaped matrix. + + Args: + - W: weight matrix to quantize of size (in_features x out_features) + - block_size: size of the blocks (subvectors) + - n_centroids: number of centroids + - n_iter: number of k-means iterations + - eps: for cluster reassignment when an empty cluster is found + - max_tentatives for cluster reassignment when an empty cluster is found + - verbose: print information after each iteration + + Remarks: + - block_size be compatible with the shape of W + """ + + def __init__( + self, + W, + block_size, + n_centroids=256, + n_iter=20, + eps=1e-6, + max_tentatives=30, + verbose=True, + ): + self.block_size = block_size + W_reshaped = self._reshape(W) + super(PQ, self).__init__( + W_reshaped, + n_centroids=n_centroids, + n_iter=n_iter, + eps=eps, + max_tentatives=max_tentatives, + verbose=verbose, + ) + + def _reshape(self, W): + """ + Reshapes the matrix W as expained in step (1). + """ + + # fully connected: by convention the weight has size out_features x in_features + if len(W.size()) == 2: + self.out_features, self.in_features = W.size() + assert ( + self.in_features % self.block_size == 0 + ), "Linear: n_blocks must be a multiple of in_features" + return ( + W.reshape(self.out_features, -1, self.block_size) + .permute(2, 1, 0) + .flatten(1, 2) + ) + + # convolutional: we reshape along the spatial dimension + elif len(W.size()) == 4: + self.out_channels, self.in_channels, self.k_h, self.k_w = W.size() + assert ( + self.in_channels * self.k_h * self.k_w + ) % self.block_size == 0, ( + "Conv2d: n_blocks must be a multiple of in_channels * k_h * k_w" + ) + return ( + W.reshape(self.out_channels, -1, self.block_size) + .permute(2, 1, 0) + .flatten(1, 2) + ) + # not implemented + else: + raise NotImplementedError(W.size()) + + def encode(self): + """ + Performs self.n_iter EM steps. + """ + + self.initialize_centroids() + for i in range(self.n_iter): + try: + self.step(i) + except EmptyClusterResolveError: + break + + def decode(self): + """ + Returns the encoded full weight matrix. Must be called after + the encode function. + """ + + # fully connected case + if "k_h" not in self.__dict__: + return ( + self.centroids[self.assignments] + .reshape(-1, self.out_features, self.block_size) + .permute(1, 0, 2) + .flatten(1, 2) + ) + + # convolutional case + else: + return ( + self.centroids[self.assignments] + .reshape(-1, self.out_channels, self.block_size) + .permute(1, 0, 2) + .reshape(self.out_channels, self.in_channels, self.k_h, self.k_w) + ) diff --git a/fairseq/fairseq/modules/quantization/pq/utils.py b/fairseq/fairseq/modules/quantization/pq/utils.py new file mode 100644 index 0000000..eceeef8 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/pq/utils.py @@ -0,0 +1,376 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import re +from operator import attrgetter, itemgetter +import torch +import numpy as np +import torch.distributed as dist +import torch.nn as nn + +from .modules import PQConv2d, PQEmbedding, PQLinear +from .pq import PQ + + +def quantize_model_( + model, + size_tracker, + layers_to_quantize, + block_sizes_config, + n_centroids_config, + step=0, + n_iter=15, + eps=1e-6, + max_tentatives=100, + remove_weights=False, + verbose=True, + state_dict=None, +): + """ + Quantize a model in-place by stages. All the targeted + layers are replaced by their quantized counterpart, + and the model is ready for the finetuning of the + centroids in a standard training loop (no modifications + required). Note that we do not quantize biases. + + Args: + - model: a nn.Module + - size_tracker: useful for tracking quatization statistics + - layers_to_quantize: a list containing regexps for + filtering the layers to quantize at each stage according + to their name (as in model.named_parameters()) + - block_sizes_config: dict like + { + 'Conv2d': ('kernel_size', {'(3, 3)': 9, '(1, 1)': 4}), + 'Linear': ('in_features', {'*': 8}) + } + For instance, all conv2d layers with kernel size 3x3 have + a block size of 9 and all Linear layers are quantized with + a block size of 8, irrespective of their size. + - n_centroids_config: dict like + { + 'Conv2d': ('kernel_size', {'*': 256}), + 'Linear': ('in_features', {'*': 256}) + } + For instance, all conv2d layers are quantized with 256 centroids + - step: the layers to quantize inplace corresponding + to layers_to_quantize[step] + """ + + quantized_layers = get_layers( + model, layers_to_quantize[step], remove_weights=remove_weights + ) + + for layer in quantized_layers: + + # book-keeping + is_master_process = (not dist.is_initialized()) or ( + dist.is_initialized() and dist.get_rank() == 0 + ) + verbose = verbose and is_master_process + + # get block size and centroids + module = attrgetter(layer)(model) + block_size = get_param(module, layer, block_sizes_config) + n_centroids = get_param(module, layer, n_centroids_config) + if verbose: + logging.info( + f"Quantizing layer {layer} with block size {block_size} and {n_centroids} centroids" + ) + + # quantize layer + weight = module.weight.data.clone() + is_bias = "bias" in [x[0] for x in module.named_parameters()] + bias = module.bias.data.clone() if is_bias else None + quantizer = PQ( + weight, + block_size, + n_centroids=n_centroids, + n_iter=n_iter, + eps=eps, + max_tentatives=max_tentatives, + verbose=verbose, + ) + + # quantization performed on all GPUs with same seed + quantizer.encode() + centroids = quantizer.centroids.contiguous() + assignments = quantizer.assignments.contiguous() + + # If n_iter = 0 and state_dict is provided, then + # we initialize random assignments and centroids to + # random values of the appropriate dimensions + # because the quantized model parameters will + # overwritten by the state_dict later on. + if n_iter == 0 and state_dict: + # Initialize random centroids of the correct size + centroids = torch.rand(centroids.size()) + centroids.cuda() + # Get counts and assignment keys from layer in loaded checkpoint. + counts_key = layer + "." + "counts" + assignment_key = layer + "." + "assignments" + # Get number of different bins to include. + counts = list(state_dict[counts_key].shape)[0] + print(layer) + print(state_dict[counts_key]) + print(counts) + # Initialize random assignments of the correct size + # with an appropriate number of bins. + num_assignments = list(state_dict[assignment_key].shape)[0] + num_extra = num_assignments - counts + print(num_assignments) + print(num_extra) + assignments_bins = torch.arange(counts) + assignments_rand = torch.randint(0, counts - 1, (num_extra,)) + assignments = torch.cat((assignments_bins, assignments_rand), 0) + # assignments = assignments.type(torch.IntTensor) + assignments.cuda() + print("assignments") + print(assignments) + + # broadcast results to make sure weights are up-to-date + if dist.is_initialized(): + dist.broadcast(centroids, 0) + dist.broadcast(assignments, 0) + + # instantiate the quantized counterpart + if isinstance(module, nn.Linear): + out_features, in_features = map( + lambda k: module.__dict__[k], ["out_features", "in_features"] + ) + quantized_module = PQLinear( + centroids, assignments, bias, in_features, out_features + ) + elif isinstance(module, nn.Embedding): + num_embeddings, embedding_dim = map( + lambda k: module.__dict__[k], ["num_embeddings", "embedding_dim"] + ) + quantized_module = PQEmbedding( + centroids, assignments, num_embeddings, embedding_dim + ) + elif isinstance(module, nn.Conv2d): + out_channels, in_channels, kernel_size = map( + lambda k: module.__dict__[k], + ["out_channels", "in_channels", "kernel_size"], + ) + stride, padding, dilation, groups, padding_mode = map( + lambda k: module.__dict__[k], + ["stride", "padding", "dilation", "groups", "padding_mode"], + ) + + quantized_module = PQConv2d( + centroids, + assignments, + bias, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + padding_mode=padding_mode, + ) + else: + raise ValueError(f"Module {module} not yet supported for quantization") + + # replace layer by its quantized counterpart + attrsetter(layer)(model, quantized_module) + + # update statistics + size_tracker.update(weight, block_size, n_centroids) + + # return name of quantized layers + return quantized_layers + + +def get_layers(model, filter_regexp, remove_weights=False): + """ + Filters out the layers according to a regexp. Note that + we omit biases. + + Args: + - model: a nn.Module + - filter_regexp: a regexp to filter the layers to keep + according to their name in model.named_parameters(). + For instance, the regexp: + + down_layers\\.[123456]\\.(conv[12]|identity\\.conv)) + + is keeping blocks down_layers from 1 to 6, and inside + each block is keeping conv1, conv2 and identity.conv. + + Remarks: + - We add (module\\.)? at the beginning of the regexp to + account for the possible use of nn.parallel.DataParallel + """ + + # get all parameter names + all_layers = map(itemgetter(0), model.named_parameters()) + + # remove biases + all_layers = filter(lambda x: "bias" not in x, all_layers) + + # remove .weight in all other names (or .weight_orig is spectral norm) + all_layers = map(lambda x: x.replace(".weight_orig", ""), all_layers) + # remove weights indicates whether the weights extension should be removed, in addition to + # weight_orig and weight extension on names + if remove_weights: + all_layers = map(lambda x: x.replace(".weights", ""), all_layers) + all_layers = map(lambda x: x.replace(".weight", ""), all_layers) + + # return filtered layers + filter_regexp = "(module\\.)?" + "(" + filter_regexp + ")" + r = re.compile(filter_regexp) + + return list(filter(r.match, all_layers)) + + +def get_param(module, layer_name, param_config): + """ + Given a quantization configuration, get the right parameter + for the module to be quantized. + + Args: + - module: a nn.Module + - layer_name: the name of the layer + - param_config: a dict like + { + 'Conv2d': ('kernel_size', {'(3, 3)': 9, '(1, 1)': 4}), + 'Linear': ('in_features', {'*': 8}) + } + For instance, all conv2d layers with kernel size 3x3 have + a block size of 9 and all Linear layers are quantized with + a block size of 8, irrespective of their size. + + Remarks: + - if 'fuzzy_name' is passed as a parameter, layers whose layer_name + include 'fuzzy_name' will be assigned the given parameter. + In the following example, conv.expand layers will have a block + size of 9 while conv.reduce will have a block size of 4 and all + other layers will have a block size of 2. + { + 'Conv2d': ('fuzzy_name', {'expand': 9, 'reduce': 4, '*': 2}), + 'Linear': ('fuzzy_name', {'classifier': 8, 'projection': 4}) + } + + """ + + layer_type = module.__class__.__name__ + + if layer_type not in param_config: + raise KeyError(f"Layer type {layer_type} not in config for layer {module}") + + feature, params = param_config[module.__class__.__name__] + + if feature != "fuzzy_name": + feature_value = str(getattr(module, feature)) + if feature_value not in params: + if "*" in params: + feature_value = "*" + else: + raise KeyError( + f"{feature}={feature_value} not in config for layer {module}" + ) + else: + feature_values = [name for name in params if name in layer_name] + if len(feature_values) == 0: + if "*" in params: + feature_value = "*" + else: + raise KeyError(f"name={layer_name} not in config for {module}") + else: + feature_value = feature_values[0] + + return params[feature_value] + + +class SizeTracker(object): + """ + Class to keep track of the compressed network size with iPQ. + + Args: + - model: a nn.Module + + Remarks: + - The compressed size is the sum of three components + for each layer in the network: + (1) Storing the centroids given by iPQ in fp16 + (2) Storing the assignments of the blocks in int8 + (3) Storing all non-compressed elements such as biases + - This cost in only valid if we use 256 centroids (then + indexing can indeed by done with int8). + """ + + def __init__(self, model): + self.model = model + self.size_non_compressed_model = self.compute_size() + self.size_non_quantized = self.size_non_compressed_model + self.size_index = 0 + self.size_centroids = 0 + self.n_quantized_layers = 0 + + def compute_size(self): + """ + Computes the size of the model (in MB). + """ + + res = 0 + for _, p in self.model.named_parameters(): + res += p.numel() + return res * 4 / 1024 / 1024 + + def update(self, W, block_size, n_centroids): + """ + Updates the running statistics when quantizing a new layer. + """ + + # bits per weights + bits_per_weight = np.log2(n_centroids) / block_size + self.n_quantized_layers += 1 + + # size of indexing the subvectors of size block_size (in MB) + size_index_layer = bits_per_weight * W.numel() / 8 / 1024 / 1024 + self.size_index += size_index_layer + + # size of the centroids stored in float16 (in MB) + size_centroids_layer = n_centroids * block_size * 2 / 1024 / 1024 + self.size_centroids += size_centroids_layer + + # size of non-compressed layers, e.g. LayerNorms or biases (in MB) + size_uncompressed_layer = W.numel() * 4 / 1024 / 1024 + self.size_non_quantized -= size_uncompressed_layer + + def __repr__(self): + size_compressed = ( + self.size_index + self.size_centroids + self.size_non_quantized + ) + compression_ratio = self.size_non_compressed_model / size_compressed # NOQA + return ( + f"Non-compressed model size: {self.size_non_compressed_model:.2f} MB. " + f"After quantizing {self.n_quantized_layers} layers, size " + f"(indexing + centroids + other): {self.size_index:.2f} MB + " + f"{self.size_centroids:.2f} MB + {self.size_non_quantized:.2f} MB = " + f"{size_compressed:.2f} MB, compression ratio: {compression_ratio:.2f}x" + ) + + +def attrsetter(*items): + def resolve_attr(obj, attr): + attrs = attr.split(".") + head = attrs[:-1] + tail = attrs[-1] + + for name in head: + obj = getattr(obj, name) + return obj, tail + + def g(obj, val): + for attr in items: + resolved_obj, resolved_attr = resolve_attr(obj, attr) + setattr(resolved_obj, resolved_attr, val) + + return g diff --git a/fairseq/fairseq/modules/quantization/quantization_options.py b/fairseq/fairseq/modules/quantization/quantization_options.py new file mode 100644 index 0000000..b46d682 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/quantization_options.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +def parse_config_yaml(yaml_data): + # Initialize to default options. + quantization_options = { + "n_centroids": { + "Linear": ["in_features", {"*": 256}], + "Embedding": ["embedding_dim", {"*": 256}], + }, + "block_sizes": { + "Linear": ["fuzzy_name", {"fc": 8, "attn": 4, "emb": 4}], + "Embedding": ["fuzzy_name", {"emb": 8}], + }, + "layers_to_quantize": [ + "decoder\\.layers\\.\\d+\\.fc[12]", + "decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]", + "decoder\\.layers\\.\\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)", + ], + } + + if "n_centroids" in yaml_data: + quantization_options["n_centroids"] = { + layer: convert_yaml_to_tuple(layer_data) + for layer, layer_data in yaml_data["n_centroids"].items() + } + if "block_sizes" in yaml_data: + quantization_options["block_sizes"] = { + layer: convert_yaml_to_tuple(layer_data) + for layer, layer_data in yaml_data["block_sizes"].items() + } + if "layers_to_quantize" in yaml_data: + quantization_options["layers_to_quantize"] = yaml_data["layers_to_quantize"] + + return quantization_options + + +def convert_yaml_to_tuple(yaml_dictionary): + """Converts a yaml dictionary with two keys: `key` and `value` into a two + argument tuple of those values.""" + return (yaml_dictionary["key"], yaml_dictionary["value"]) diff --git a/fairseq/fairseq/modules/quantization/scalar/__init__.py b/fairseq/fairseq/modules/quantization/scalar/__init__.py new file mode 100644 index 0000000..143834f --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .utils import quantize_model_ # NOQA diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py b/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py new file mode 100644 index 0000000..8031d9c --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .qact import ActivationQuantizer # NOQA +from .qconv import IntConv2d # NOQA +from .qemb import IntEmbedding # NOQA +from .qlinear import IntLinear # NOQA diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qact.py b/fairseq/fairseq/modules/quantization/scalar/modules/qact.py new file mode 100644 index 0000000..b362c30 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/modules/qact.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from ..ops import emulate_int + + +class ActivationQuantizer: + """ + Fake scalar quantization of the activations using a forward hook. + + Args: + - module. a nn.Module for which we quantize the *post-activations* + - p: proportion of activations to quantize, set by default to 1 + - update_step: to recompute quantization parameters + - bits: number of bits for quantization + - method: choose among {"tensor", "histogram", "channel"} + - clamp_threshold: to prevent gradients overflow + + Remarks: + - Parameters scale and zero_point are recomputed every update_step + forward pass to reduce the overhead + - For the list of quantization methods and number of bits, see ops.py + - To remove the hook from the module, simply call self.handle.remove() + - At test time, the activations are fully quantized + - We use the straight-through estimator so that the gradients + back-propagate nicely in the network, this is implemented with + the detach() trick + - The activations are hard-clamped in [-clamp_threshold, clamp_threshold] + to prevent overflow during the backward pass + """ + + def __init__( + self, + module, + p=1, + update_step=1000, + bits=8, + method="histogram", + clamp_threshold=5, + ): + self.module = module + self.p = p + self.update_step = update_step + self.counter = 0 + self.bits = bits + self.method = method + self.clamp_threshold = clamp_threshold + self.handle = None + self.register_hook() + + def register_hook(self): + # forward hook + def quantize_hook(module, x, y): + + # update parameters every 1000 iterations + if self.counter % self.update_step == 0: + self.scale = None + self.zero_point = None + self.counter += 1 + + # train with QuantNoise and evaluate the fully quantized network + p = self.p if self.module.training else 1 + + # quantize activations + y_q, self.scale, self.zero_point = emulate_int( + y.detach(), + bits=self.bits, + method=self.method, + scale=self.scale, + zero_point=self.zero_point, + ) + + # mask to apply noise + mask = torch.zeros_like(y) + mask.bernoulli_(1 - p) + noise = (y_q - y).masked_fill(mask.bool(), 0) + + # using straight-through estimator (STE) + clamp_low = -self.scale * self.zero_point + clamp_high = self.scale * (2**self.bits - 1 - self.zero_point) + return torch.clamp(y, clamp_low.item(), clamp_high.item()) + noise.detach() + + # register hook + self.handle = self.module.register_forward_hook(quantize_hook) diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py b/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py new file mode 100644 index 0000000..2974474 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py @@ -0,0 +1,149 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn.functional as F +from torch.nn.modules.conv import _ConvNd +from torch.nn.modules.utils import _pair + +from ..ops import emulate_int + + +class IntConv2d(_ConvNd): + """ + Quantized counterpart of the nn.Conv2d module that applies QuantNoise during training. + + Args: + - standard nn.Conv2d parameters + - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights) + - bits: number of bits + - method: choose among {"tensor", "histogram", "channel"} + - update_step: recompute scale and zero_point every update_steps iterations + + Remarks: + - We use the straight-thgourh estimator so that the gradients + back-propagate nicely in the network, this is implemented with + the detach() trick + - Parameters scale and zero_point are recomputed every update_step + forward pass to reduce the overhead + - At test time, the weights are fully quantized + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode="zeros", + p=0, + bits=8, + method="histogram", + update_step=1000, + ): + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + super(IntConv2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + False, + _pair(0), + groups, + bias, + padding_mode, + ) + + # quantization parameters + self.p = p + self.bits = bits + self.method = method + self.update_step = update_step + self.counter = 0 + + def _conv_forward(self, input, weight): + if self.padding_mode != "zeros": + return F.conv2d( + F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), + weight, + self.bias, + self.stride, + _pair(0), + self.dilation, + self.groups, + ) + return F.conv2d( + input, + weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + def forward(self, input): + # train with QuantNoise and evaluate the fully quantized network + p = self.p if self.training else 1 + + # update parameters every 100 iterations + if self.counter % self.update_step == 0: + self.scale = None + self.zero_point = None + self.counter += 1 + + # quantize weight + weight_quantized, self.scale, self.zero_point = emulate_int( + self.weight.detach(), + bits=self.bits, + method=self.method, + scale=self.scale, + zero_point=self.zero_point, + ) + + # mask to apply noise + mask = torch.zeros_like(self.weight) + mask.bernoulli_(1 - p) + noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0) + + # using straight-through estimator (STE) + clamp_low = -self.scale * self.zero_point + clamp_high = self.scale * (2**self.bits - 1 - self.zero_point) + weight = ( + torch.clamp(self.weight, clamp_low.item(), clamp_high.item()) + + noise.detach() + ) + + # return output + output = self._conv_forward(input, weight) + return output + + def extra_repr(self): + return ( + "in_channels={}, out_channels={}, kernel_size={}, stride={}, " + "padding={}, dilation={}, groups={}, bias={}, quant_noise={}, " + "bits={}, method={}".format( + self.in_channels, + self.out_channels, + self.kernel_size, + self.stride, + self.padding, + self.dilation, + self.groups, + self.bias is not None, + self.p, + self.bits, + self.method, + ) + ) diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py b/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py new file mode 100644 index 0000000..3b293ac --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py @@ -0,0 +1,147 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..ops import emulate_int + + +class IntEmbedding(nn.Module): + """ + Quantized counterpart of the nn.Embedding module that applies QuantNoise during training. + + Args: + - num_embeddings: number of tokens + - embedding_dim: embedding dimension + - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights) + - bits: number of bits + - method: choose among {"tensor", "histogram", "channel"} + - update_step: recompute scale and zero_point every update_steps iterations + + Remarks: + - We use the straight-through estimator so that the gradients + back-propagate nicely in the network, this is implemented with + the detach() trick + - Parameters scale and zero_point are recomputed every update_step + forward pass to reduce the overhead + - At test time, the weights are fully quantized + """ + + def __init__( + self, + num_embeddings, + embedding_dim, + padding_idx=None, + max_norm=None, + norm_type=2.0, + scale_grad_by_freq=False, + sparse=False, + _weight=None, + p=0, + update_step=1000, + bits=8, + method="histogram", + ): + super(IntEmbedding, self).__init__() + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + if padding_idx is not None: + if padding_idx > 0: + assert ( + padding_idx < self.num_embeddings + ), "Padding_idx must be within num_embeddings" + elif padding_idx < 0: + assert ( + padding_idx >= -self.num_embeddings + ), "Padding_idx must be within num_embeddings" + padding_idx = self.num_embeddings + padding_idx + self.padding_idx = padding_idx + self.max_norm = max_norm + self.norm_type = norm_type + self.scale_grad_by_freq = scale_grad_by_freq + if _weight is None: + self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim)) + self.reset_parameters() + else: + assert list(_weight.shape) == [ + num_embeddings, + embedding_dim, + ], "Shape of weight does not match num_embeddings and embedding_dim" + self.weight = nn.Parameter(_weight) + self.sparse = sparse + + # quantization parameters + self.p = p + self.bits = bits + self.method = method + self.update_step = update_step + self.counter = 0 + + def reset_parameters(self): + nn.init.normal_(self.weight) + if self.padding_idx is not None: + with torch.no_grad(): + self.weight[self.padding_idx].fill_(0) + + def forward(self, input): + # train with QuantNoise and evaluate the fully quantized network + p = self.p if self.training else 1 + + # update parameters every 1000 iterations + if self.counter % self.update_step == 0: + self.scale = None + self.zero_point = None + self.counter += 1 + + # quantize weight + weight_quantized, self.scale, self.zero_point = emulate_int( + self.weight.detach(), + bits=self.bits, + method=self.method, + scale=self.scale, + zero_point=self.zero_point, + ) + + # mask to apply noise + mask = torch.zeros_like(self.weight) + mask.bernoulli_(1 - p) + noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0) + + # using straight-through estimator (STE) + clamp_low = -self.scale * self.zero_point + clamp_high = self.scale * (2**self.bits - 1 - self.zero_point) + weight = ( + torch.clamp(self.weight, clamp_low.item(), clamp_high.item()) + + noise.detach() + ) + + # return output + output = F.embedding( + input, + weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) + return output + + def extra_repr(self): + s = "{num_embeddings}, {embedding_dim}" + if self.padding_idx is not None: + s += ", padding_idx={padding_idx}" + if self.max_norm is not None: + s += ", max_norm={max_norm}" + if self.norm_type != 2: + s += ", norm_type={norm_type}" + if self.scale_grad_by_freq is not False: + s += ", scale_grad_by_freq={scale_grad_by_freq}" + if self.sparse is not False: + s += ", sparse=True" + s += "quant_noise={p}, bits={bits}, method={method}" + return s.format(**self.__dict__) diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py b/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py new file mode 100644 index 0000000..78606a2 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py @@ -0,0 +1,113 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..ops import emulate_int + + +class IntLinear(nn.Module): + """ + Quantized counterpart of the nn.Linear module that applies QuantNoise during training. + + Args: + - in_features: input features + - out_features: output features + - bias: bias or not + - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights) + - bits: number of bits + - method: choose among {"tensor", "histogram", "channel"} + - update_step: recompute scale and zero_point every update_steps iterations + + Remarks: + - We use the straight-through estimator so that the gradients + back-propagate nicely in the network, this is implemented with + the detach() trick. + - Parameters scale and zero_point are recomputed every update_step + forward pass to reduce the overhead + - At test time, the weights are fully quantized + """ + + def __init__( + self, + in_features, + out_features, + bias=True, + p=0, + update_step=3000, + bits=8, + method="histogram", + ): + super(IntLinear, self).__init__() + self.in_features = int(in_features) + self.out_features = int(out_features) + self.weight = torch.nn.Parameter(torch.Tensor(out_features, in_features)) + self.chosen_bias = bias + if self.chosen_bias: + self.bias = torch.nn.Parameter(torch.Tensor(out_features)) + else: + self.register_parameter("bias", None) + self.reset_parameters() + + # quantization parameters + self.p = p + self.bits = bits + self.method = method + self.update_step = update_step + self.counter = 0 + + def reset_parameters(self): + nn.init.xavier_uniform_(self.weight) + if self.chosen_bias: + nn.init.constant_(self.bias, 0.0) + return + + def forward(self, input): + # train with QuantNoise and evaluate the fully quantized network + p = self.p if self.training else 1 + + # update parameters every 100 iterations + if self.counter % self.update_step == 0: + self.scale = None + self.zero_point = None + self.counter += 1 + + # quantize weight + weight_quantized, self.scale, self.zero_point = emulate_int( + self.weight.detach(), + bits=self.bits, + method=self.method, + scale=self.scale, + zero_point=self.zero_point, + ) + + # mask to apply noise + mask = torch.zeros_like(self.weight) + mask.bernoulli_(1 - p) + noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0) + + # using straight-through estimator (STE) + clamp_low = -self.scale * self.zero_point + clamp_high = self.scale * (2**self.bits - 1 - self.zero_point) + weight = ( + torch.clamp(self.weight, clamp_low.item(), clamp_high.item()) + + noise.detach() + ) + + # return output + output = F.linear(input, weight, self.bias) + return output + + def extra_repr(self): + return "in_features={}, out_features={}, bias={}, quant_noise={}, bits={}, method={}".format( + self.in_features, + self.out_features, + self.bias is not None, + self.p, + self.bits, + self.method, + ) diff --git a/fairseq/fairseq/modules/quantization/scalar/ops.py b/fairseq/fairseq/modules/quantization/scalar/ops.py new file mode 100644 index 0000000..e0f9a0c --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/ops.py @@ -0,0 +1,59 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +try: + import torch.ao.quantization as quantization +except ImportError: + import torch.quantization as quantization + + +def emulate_int(w, bits, method, scale=None, zero_point=None): + q = globals()[f"emulate_int8_{method}"] + return q(w, scale=scale, zero_point=zero_point, bits=bits) + + +def quantize(w, scale, zero_point, bits=8): + # In the default behavior, max_val = 255. + max_val = 2**bits - 1 + return ( + torch.clamp(torch.round(w / scale + zero_point), 0, max_val) - zero_point + ) * scale + + +def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8): + if scale is None: + obs = quantization.observer.HistogramObserver() + obs.to(device=w.device) + _ = obs(w.float()) + scale, zero_point = obs.calculate_qparams() + scale = scale.cuda().type_as(w) + zero_point = zero_point.cuda().type_as(w) + return quantize(w, scale, zero_point, bits=bits), scale, zero_point + + +def emulate_int8_channel(w, scale=None, zero_point=None, bits=8): + if scale is None: + obs = quantization.observer.PerChannelMinMaxObserver( + ch_axis=-1, qscheme=torch.per_channel_symmetric + ) + obs.to(device=w.device) + _ = obs(w) + scale, zero_point, ch_axis = obs.get_qparams() + scale = scale.cuda().type_as(w) + zero_point = zero_point.cuda().type_as(w) + return quantize(w, scale, zero_point, bits=bits), scale, zero_point + + +def emulate_int8_tensor(w, scale=None, zero_point=None, bits=8): + if scale is None: + obs = quantization.observer.MinMaxObserver() + obs.to(device=w.device) + _ = obs(w) + scale, zero_point = obs.calculate_qparams() + scale = scale.cuda().type_as(w) + zero_point = zero_point.cuda().type_as(w) + return quantize(w, scale, zero_point, bits=bits), scale, zero_point diff --git a/fairseq/fairseq/modules/quantization/scalar/utils.py b/fairseq/fairseq/modules/quantization/scalar/utils.py new file mode 100644 index 0000000..d4b1cc2 --- /dev/null +++ b/fairseq/fairseq/modules/quantization/scalar/utils.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from operator import attrgetter + +import torch.distributed as dist +import torch.nn as nn + +from ..pq.utils import attrsetter, get_layers +from .modules import ActivationQuantizer, IntConv2d, IntEmbedding, IntLinear + + +MAPPING = {nn.Linear: IntLinear, nn.Embedding: IntEmbedding, nn.Conv2d: IntConv2d} + + +def quantize_model_( + model, p=0.2, bits=8, update_step=3000, method="histogram", remove_weights=False +): + """ + Replaces all modules with their scalar quantized counterpart and + registers hooks to quantize the post-ativations of those modules. + + Args: + - model: a nn.Module + - p: amount of noise (0 for no noise, 1 to quantize all the weights/activations) + - bits: number of bits + - update_step: update quantization parameters every update_step steps + """ + # quantize all layers + # remove weights indicates whether the weights extension should be removed, in addition to + # weight_orig and weight extension on names + quantized_layers = get_layers(model, "(.*?)", remove_weights=remove_weights) + + for layer in quantized_layers: + + # book-keeping + is_master_process = (not dist.is_initialized()) or ( + dist.is_initialized() and dist.get_rank() == 0 + ) + + # recover module + module = attrgetter(layer)(model) + if is_master_process: + logging.info( + f"Quantizing layer {layer} with bits={bits} and QuantNoise={p}" + ) + + # quantization params + q_params = { + "p": p, + "update_step": update_step, + "bits": bits, + "method": method, + "counter": 0, + } + + # instantiate the quantized counterpart + if isinstance(module, tuple(MAPPING.keys())): + QuantizedModule = MAPPING[module.__class__] + quantized_module = QuantizedModule.__new__(QuantizedModule) + params = module.__dict__ + params.update(q_params) + quantized_module.__dict__.update(params) + + else: + if is_master_process: + logging.info(f"Module {module} not yet supported for quantization") + continue + + # activation quantization + a_q = ActivationQuantizer(quantized_module, p=0, bits=bits, method=method) + + # replace layer by its quantized counterpart + attrsetter(layer)(model, quantized_module) + + # return name of quantized layers + return quantized_layers diff --git a/fairseq/fairseq/modules/rotary_positional_embedding.py b/fairseq/fairseq/modules/rotary_positional_embedding.py new file mode 100644 index 0000000..b74028b --- /dev/null +++ b/fairseq/fairseq/modules/rotary_positional_embedding.py @@ -0,0 +1,50 @@ +import torch + + +class RotaryPositionalEmbedding(torch.nn.Module): + def __init__(self, dim, base=10000, precision=torch.half): + """Rotary positional embedding + Reference : https://blog.eleuther.ai/rotary-embeddings/ + Paper: https://arxiv.org/pdf/2104.09864.pdf + Args: + dim: Dimension of embedding + base: Base value for exponential + precision: precision to use for numerical values + """ + super().__init__() + inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) + self.register_buffer("inv_freq", inv_freq) + self.seq_len_cached = 0 + self.cos_cached = torch.empty(self.seq_len_cached, 1, 1, dim) + self.sin_cached = torch.empty(self.seq_len_cached, 1, 1, dim) + self.precision = precision + + def forward(self, x, seq_len: int = 0): + """ + Args: + x: Input x with T X B X C + seq_len: Sequence length of input x + """ + if seq_len > self.seq_len_cached: + self.seq_len_cached = seq_len + t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) + freqs = torch.einsum("i,j->ij", t, self.inv_freq) + emb = torch.cat((freqs, freqs), dim=-1).to(x.device) + self.cos_cached = emb.cos().view(emb.size(0), 1, 1, emb.size(1)) + self.sin_cached = emb.sin().view(emb.size(0), 1, 1, emb.size(1)) + return self.cos_cached, self.sin_cached + +# rotary pos emb helpers: +def rotate_half(x): + x1, x2 = x[..., : x.shape[-1] // 2], x[..., x.shape[-1] // 2 :] + return torch.cat( + (-x2, x1), dim=x1.ndim - 1 + ) # dim=-1 triggers a bug in earlier torch versions + + +def apply_rotary_pos_emb(q, k, cos, sin, offset: int = 0): + cos, sin = ( + cos[offset : q.shape[0] + offset, ...], + sin[offset : q.shape[0] + offset, ...], + ) + return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin) diff --git a/fairseq/fairseq/modules/same_pad.py b/fairseq/fairseq/modules/same_pad.py new file mode 100644 index 0000000..a3ce413 --- /dev/null +++ b/fairseq/fairseq/modules/same_pad.py @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from torch import nn + + +class SamePad(nn.Module): + def __init__(self, kernel_size, causal=False): + super().__init__() + if causal: + self.remove = kernel_size - 1 + else: + self.remove = 1 if kernel_size % 2 == 0 else 0 + + def forward(self, x): + if self.remove > 0: + x = x[:, :, : -self.remove] + return x + + +class SamePad2d(nn.Module): + def __init__(self, kernel_size): + super().__init__() + self.remove = 1 if kernel_size % 2 == 0 else 0 + + def forward(self, x): + assert len(x.size()) == 4 + if self.remove > 0: + x = x[:, :, : -self.remove, : -self.remove] + return x diff --git a/fairseq/fairseq/modules/scalar_bias.py b/fairseq/fairseq/modules/scalar_bias.py new file mode 100644 index 0000000..c96247c --- /dev/null +++ b/fairseq/fairseq/modules/scalar_bias.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# + +import torch + + +class ScalarBias(torch.autograd.Function): + """ + Adds a vector of scalars, used in self-attention mechanism to allow + the model to optionally attend to this vector instead of the past + """ + + @staticmethod + def forward(ctx, input, dim, bias_init): + size = list(input.size()) + size[dim] += 1 + output = input.new(*size).fill_(bias_init) + output.narrow(dim, 1, size[dim] - 1).copy_(input) + ctx.dim = dim + return output + + @staticmethod + def backward(ctx, grad): + return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None + + +def scalar_bias(input, dim, bias_init=0): + return ScalarBias.apply(input, dim, bias_init) diff --git a/fairseq/fairseq/modules/sinusoidal_positional_embedding.py b/fairseq/fairseq/modules/sinusoidal_positional_embedding.py new file mode 100644 index 0000000..dd93ddc --- /dev/null +++ b/fairseq/fairseq/modules/sinusoidal_positional_embedding.py @@ -0,0 +1,122 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from typing import Any, Optional + +import torch +import torch.onnx.operators +from fairseq import utils +from torch import nn, Tensor + + +class SinusoidalPositionalEmbedding(nn.Module): + """This module produces sinusoidal positional embeddings of any length. + + Padding symbols are ignored. + """ + + def __init__(self, embedding_dim, padding_idx, init_size=1024, auto_expand=True): + super().__init__() + self.embedding_dim = embedding_dim + self.padding_idx = padding_idx if padding_idx is not None else 0 + self.register_buffer( + "weights", + SinusoidalPositionalEmbedding.get_embedding( + init_size, embedding_dim, padding_idx + ), + persistent=False, + ) + self.max_positions = int(1e5) + self.auto_expand = auto_expand + self.onnx_trace = False + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs): + # Ignore some deprecated keys that were used in older versions + deprecated_keys = ["weights", "_float_tensor"] + for key in deprecated_keys: + if prefix + key in state_dict: + del state_dict[prefix + key] + super()._load_from_state_dict(state_dict, prefix, *args, **kwargs) + + @staticmethod + def get_embedding( + num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None + ): + """Build sinusoidal embeddings. + + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb) + emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze( + 1 + ) * emb.unsqueeze(0) + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view( + num_embeddings, -1 + ) + if embedding_dim % 2 == 1: + # zero pad + emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1) + if padding_idx is not None: + emb[padding_idx, :] = 0 + return emb + + def forward( + self, + input, + incremental_state: Optional[Any] = None, + timestep: Optional[Tensor] = None, + positions: Optional[Any] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + bspair = torch.onnx.operators.shape_as_tensor(input) + bsz, seq_len = bspair[0], bspair[1] + max_pos = self.padding_idx + 1 + seq_len + weights = self.weights + + if max_pos > self.weights.size(0): + # If the input is longer than the number of pre-computed embeddings, + # compute the extra embeddings on the fly. + # Only store the expanded embeddings if auto_expand=True. + # In multithreading environments, mutating the weights of a module + # may cause trouble. Set auto_expand=False if this happens. + weights = SinusoidalPositionalEmbedding.get_embedding( + max_pos, self.embedding_dim, self.padding_idx + ).to(self.weights) + if self.auto_expand: + self.weights = weights + + if incremental_state is not None: + # positions is the same for every token when decoding a single step + pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len + if self.onnx_trace: + return ( + weights.index_select(index=self.padding_idx + pos, dim=0) + .unsqueeze(1) + .repeat(bsz, 1, 1) + ) + return weights[self.padding_idx + pos, :].expand(bsz, 1, -1) + + positions = utils.make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + if self.onnx_trace: + flat_embeddings = weights.detach().index_select(0, positions.view(-1)) + embedding_shape = torch.cat( + (bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long)) + ) + embeddings = torch.onnx.operators.reshape_from_tensor_shape( + flat_embeddings, embedding_shape + ) + return embeddings + return ( + weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach() + ) diff --git a/fairseq/fairseq/modules/sparse_multihead_attention.py b/fairseq/fairseq/modules/sparse_multihead_attention.py new file mode 100644 index 0000000..3cbd9d6 --- /dev/null +++ b/fairseq/fairseq/modules/sparse_multihead_attention.py @@ -0,0 +1,140 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch + +from .multihead_attention import MultiheadAttention + + +class SparseMultiheadAttention(MultiheadAttention): + """Sparse Multi-Headed Attention. + + "Generating Long Sequences with Sparse Transformers". Implements + fixed factorized self attention, where l=stride and c=expressivity. + A(1) includes all words in the stride window and A(2) takes a summary of c + words from the end of each stride window. + If is_bidirectional=False, we do not include any words past the current word, + as in the paper. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + stride=32, + expressivity=8, + is_bidirectional=True, + ): + + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + ) + + self.is_bidirectional = is_bidirectional + self.stride = stride + self.expressivity = expressivity + assert self.stride > 0 and self.stride >= self.expressivity + + # Used for Ai(2) calculations - beginning of [l-c, l] range + def compute_checkpoint(self, word_index): + if word_index % self.stride == 0 and word_index != 0: + checkpoint_index = word_index - self.expressivity + else: + checkpoint_index = ( + math.floor(word_index / self.stride) * self.stride + + self.stride + - self.expressivity + ) + return checkpoint_index + + # Computes Ai(2) + def compute_subset_summaries(self, absolute_max): + checkpoint_index = self.compute_checkpoint(0) + subset_two = set() + while checkpoint_index <= absolute_max - 1: + summary = set( + range( + checkpoint_index, + min(checkpoint_index + self.expressivity + 1, absolute_max), + ) + ) + subset_two = subset_two.union(summary) + checkpoint_index = self.compute_checkpoint(checkpoint_index + self.stride) + return subset_two + + # Sparse Transformer Fixed Attention Pattern: https://arxiv.org/pdf/1904.10509.pdf + def compute_fixed_attention_subset(self, word_index, tgt_len): + # +1s account for range function; [min, max) -> [min, max] + if not self.is_bidirectional: + absolute_max = word_index + 1 + else: + absolute_max = tgt_len + + # Subset 1 - whole window + rounded_index = ( + math.floor((word_index + self.stride) / self.stride) * self.stride + ) + if word_index % self.stride == 0 and word_index != 0: + subset_one = set( + range(word_index - self.stride, min(absolute_max, word_index + 1)) + ) + else: + subset_one = set( + range( + max(0, rounded_index - self.stride), + min(absolute_max, rounded_index + 1), + ) + ) + + # Subset 2 - summary per window + # If bidirectional, subset 2 is the same for every index + subset_two = set() + if not self.is_bidirectional: + subset_two = self.compute_subset_summaries(absolute_max) + + return subset_one.union(subset_two) + + # Compute sparse mask - if bidirectional, can pre-compute and store + def buffered_sparse_mask(self, tensor, tgt_len, src_len): + assert tgt_len > self.stride + sparse_mask = torch.empty((tgt_len, src_len)).float().fill_(float("-inf")) + + # If bidirectional, subset 2 is the same for every index + subset_summaries = set() + if self.is_bidirectional: + subset_summaries = self.compute_subset_summaries(tgt_len) + + for i in range(tgt_len): + fixed_attention_subset = self.compute_fixed_attention_subset(i, tgt_len) + fixed_attention_subset = fixed_attention_subset.union(subset_summaries) + included_word_indices = torch.LongTensor(list(fixed_attention_subset)) + sparse_mask[i].index_fill_(0, included_word_indices, 0) + return sparse_mask.type_as(tensor) + + def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): + sparse_mask = self.buffered_sparse_mask(attn_weights, tgt_len, src_len) + sparse_mask = sparse_mask.unsqueeze(0).expand( + bsz * self.num_heads, tgt_len, src_len + ) + attn_weights += sparse_mask diff --git a/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py new file mode 100644 index 0000000..f41ec09 --- /dev/null +++ b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py @@ -0,0 +1,96 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +from fairseq.modules import TransformerSentenceEncoder +from fairseq.modules.sparse_transformer_sentence_encoder_layer import ( + SparseTransformerSentenceEncoderLayer, +) + + +class SparseTransformerSentenceEncoder(TransformerSentenceEncoder): + """ + Sparse implementation of the TransformerSentenceEncoder + - see SparseMultiheadAttention + """ + + def __init__( + self, + padding_idx: int, + vocab_size: int, + num_encoder_layers: int = 6, + embedding_dim: int = 768, + ffn_embedding_dim: int = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + max_seq_len: int = 256, + num_segments: int = 2, + use_position_embeddings: bool = True, + offset_positions_by_padding: bool = True, + encoder_normalize_before: bool = False, + apply_bert_init: bool = False, + activation_fn: str = "relu", + learned_pos_embedding: bool = True, + embed_scale: float = None, + freeze_embeddings: bool = False, + n_trans_layers_to_freeze: int = 0, + export: bool = False, + is_bidirectional: bool = True, + stride: int = 32, + expressivity: int = 8, + ) -> None: + + super().__init__( + padding_idx, + vocab_size, + num_encoder_layers, + embedding_dim, + ffn_embedding_dim, + num_attention_heads, + dropout, + attention_dropout, + activation_dropout, + max_seq_len, + num_segments, + use_position_embeddings, + offset_positions_by_padding, + encoder_normalize_before, + apply_bert_init, + activation_fn, + learned_pos_embedding, + embed_scale, + freeze_embeddings, + n_trans_layers_to_freeze, + export, + ) + + self.layers = nn.ModuleList( + [ + SparseTransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=ffn_embedding_dim, + num_attention_heads=num_attention_heads, + dropout=dropout, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + activation_fn=activation_fn, + export=export, + is_bidirectional=is_bidirectional, + stride=stride, + expressivity=expressivity, + ) + for _ in range(num_encoder_layers) + ] + ) + + def freeze_module_params(m): + if m is not None: + for p in m.parameters(): + p.requires_grad = False + + for layer in range(n_trans_layers_to_freeze): + freeze_module_params(self.layers[layer]) diff --git a/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py new file mode 100644 index 0000000..d95da59 --- /dev/null +++ b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.modules import TransformerSentenceEncoderLayer +from fairseq.modules.sparse_multihead_attention import SparseMultiheadAttention + + +class SparseTransformerSentenceEncoderLayer(TransformerSentenceEncoderLayer): + """ + Implements a Sprase Transformer Encoder Layer (see SparseMultiheadAttention) + """ + + def __init__( + self, + embedding_dim: int = 768, + ffn_embedding_dim: int = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + export: bool = False, + is_bidirectional: bool = True, + stride: int = 32, + expressivity: int = 8, + ) -> None: + + super().__init__( + embedding_dim, + ffn_embedding_dim, + num_attention_heads, + dropout, + attention_dropout, + activation_dropout, + activation_fn, + export, + ) + + self.self_attn = SparseMultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + add_bias_kv=False, + add_zero_attn=False, + self_attention=True, + is_bidirectional=is_bidirectional, + stride=stride, + expressivity=expressivity, + ) diff --git a/fairseq/fairseq/modules/transformer_layer.py b/fairseq/fairseq/modules/transformer_layer.py new file mode 100644 index 0000000..19e035d --- /dev/null +++ b/fairseq/fairseq/modules/transformer_layer.py @@ -0,0 +1,562 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import utils +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import LayerNorm, MultiheadAttention +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise + + +class TransformerEncoderLayerBase(nn.Module): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + cfg (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg, return_fc=False): + super().__init__() + self.cfg = cfg + self.return_fc = return_fc + self.embed_dim = cfg.encoder.embed_dim + self.quant_noise = cfg.quant_noise.pq + self.quant_noise_block_size = cfg.quant_noise.pq_block_size + self.self_attn = self.build_self_attention(self.embed_dim, cfg) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=self.__class__.__name__ + ) + self.activation_fn = utils.get_activation_fn(activation=cfg.activation_fn) + activation_dropout_p = cfg.activation_dropout + if activation_dropout_p == 0: + # for backwards compatibility with models that use cfg.relu_dropout + activation_dropout_p = cfg.relu_dropout or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = cfg.encoder.normalize_before + self.fc1 = self.build_fc1( + self.embed_dim, + cfg.encoder.ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + cfg.encoder.ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise( + nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size + ) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise( + nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size + ) + + def _get_fc_rank(self, remove_num: int) -> List[int]: + f1_filter_param = [] + for i in range(self.fc1.out_features): + f1_filter_param.append( + torch.sum(torch.abs(self.fc1.weight[i])) + + torch.sum(torch.abs(self.fc2.weight[:, i])) + + torch.abs(self.fc1.bias[i]) + ) + return sorted( + range(len(f1_filter_param)), key=lambda k: f1_filter_param[k], reverse=False + )[0:remove_num] + + def _prune_fc_layer(self, remove_index: List[int]): + new_fc1_weight = [] + new_fc1_bias = [] + for i in range(self.fc1.out_features): + if i not in remove_index: + new_fc1_weight.append(self.fc1.weight[i]) + new_fc1_bias.append(self.fc1.bias[i]) + + new_fc1_weight = torch.stack(new_fc1_weight).detach() + new_fc1_weight.requires_grad = True + + new_fc1_bias = torch.stack(new_fc1_bias).detach() + new_fc1_bias.requires_grad = True + + self.fc1 = quant_noise( + nn.Linear(self.fc1.in_features, self.fc1.out_features - len(remove_index)), + p=self.quant_noise, + block_size=self.quant_noise_block_size, + ) + self.fc1.weight = torch.nn.Parameter(new_fc1_weight) + self.fc1.bias = torch.nn.Parameter(new_fc1_bias) + + new_fc2_weight = [] + new_fc2_bias = [] + for i in range(self.fc2.in_features): + if i not in remove_index: + new_fc2_weight.append(self.fc2.weight[:, i]) + new_fc2_bias = self.fc2.bias.detach() + + new_fc2_weight = torch.stack(new_fc2_weight, dim=-1).detach() + new_fc2_weight.requires_grad = True + + new_fc2_bias = self.fc2.bias.detach() + new_fc2_bias.requires_grad = True + + self.fc2 = quant_noise( + nn.Linear(self.fc2.in_features - len(remove_index), self.fc2.out_features), + p=self.quant_noise, + block_size=self.quant_noise_block_size, + ) + self.fc2.weight = torch.nn.Parameter(new_fc2_weight) + self.fc2.bias = torch.nn.Parameter(new_fc2_bias) + + def build_self_attention(self, embed_dim, cfg): + return MultiheadAttention( + embed_dim, + cfg.encoder.attention_heads, + dropout=cfg.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + xformers_att_config=cfg.encoder.xformers_att_config, + ) + + def residual_connection(self, x, residual): + return residual + x + + def upgrade_state_dict_named(self, state_dict, name): + """ + Rename layer norm states from `...layer_norms.0.weight` to + `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to + `...final_layer_norm.weight` + """ + layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"} + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layer_norms.{}.{}".format(name, old, m) + if k in state_dict: + state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k] + del state_dict[k] + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill( + attn_mask.to(torch.bool), -1e8 if x.dtype == torch.float32 else -1e4 + ) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + + fc_result = x + + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + + if self.return_fc and not torch.jit.is_scripting(): + return x, fc_result + return x + + +# backward compatible with the legacy argparse format +class TransformerEncoderLayer(TransformerEncoderLayerBase): + def __init__(self, args): + super().__init__(TransformerConfig.from_namespace(args)) + self.args = args + + def build_self_attention(self, embed_dim, args): + return super().build_self_attention( + embed_dim, TransformerConfig.from_namespace(args) + ) + + +class TransformerDecoderLayerBase(nn.Module): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False + ): + super().__init__() + self.embed_dim = cfg.decoder.embed_dim + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=self.__class__.__name__ + ) + self.quant_noise = cfg.quant_noise.pq + self.quant_noise_block_size = cfg.quant_noise.pq_block_size + + self.cross_self_attention = cfg.cross_self_attention + + self.self_attn = self.build_self_attention( + self.embed_dim, + cfg, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + self.attn_ln = ( + LayerNorm(self.embed_dim) + if utils.safe_getattr(cfg, "scale_attn", False) + else None + ) + self.nh = self.self_attn.num_heads + self.head_dim = self.self_attn.head_dim + scale_heads = utils.safe_getattr(cfg, "scale_heads", False) + self.c_attn = ( + nn.Parameter(torch.ones((self.nh,)), requires_grad=True) + if scale_heads + else None + ) + + self.activation_fn = utils.get_activation_fn(activation=cfg.activation_fn) + activation_dropout_p = cfg.activation_dropout + if activation_dropout_p == 0: + # for backwards compatibility with models that use cfg.relu_dropout + activation_dropout_p = cfg.relu_dropout or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = cfg.decoder.normalize_before + + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = self.build_encoder_attention(self.embed_dim, cfg) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + + self.ffn_layernorm = ( + LayerNorm(cfg.decoder.ffn_embed_dim) + if utils.safe_getattr(cfg, "scale_fc", False) + else None + ) + self.w_resid = ( + nn.Parameter( + torch.ones( + self.embed_dim, + ), + requires_grad=True, + ) + if utils.safe_getattr(cfg, "scale_resids", False) + else None + ) + + self.fc1 = self.build_fc1( + self.embed_dim, + cfg.decoder.ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + cfg.decoder.ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + self.need_attn = True + + self.onnx_trace = False + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + xformers_att_config=cfg.decoder.xformers_att_config, + ) + + def build_encoder_attention(self, embed_dim, cfg): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + kdim=cfg.encoder.embed_dim, + vdim=cfg.encoder.embed_dim, + dropout=cfg.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + xformers_att_config=cfg.encoder.xformers_att_config, + ) + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def residual_connection(self, x, residual): + return residual + x + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn + + +# backward compatible with the legacy argparse format +class TransformerDecoderLayer(TransformerDecoderLayerBase): + def __init__( + self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False + ): + super().__init__( + TransformerConfig.from_namespace(args), + no_encoder_attn=no_encoder_attn, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + self.args = args + + def build_self_attention( + self, embed_dim, args, add_bias_kv=False, add_zero_attn=False + ): + return super().build_self_attention( + embed_dim, + TransformerConfig.from_namespace(args), + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + def build_encoder_attention(self, embed_dim, args): + return super().build_encoder_attention( + embed_dim, + TransformerConfig.from_namespace(args), + ) diff --git a/fairseq/fairseq/modules/transformer_layer_aug.py b/fairseq/fairseq/modules/transformer_layer_aug.py new file mode 100644 index 0000000..7eb8169 --- /dev/null +++ b/fairseq/fairseq/modules/transformer_layer_aug.py @@ -0,0 +1,315 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List, Optional + +import torch +from numpy.random import uniform +from torch import Tensor + +from fairseq.modules import LayerNorm +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase + + +class AugTransformerDecoderLayerBase(TransformerDecoderLayerBase): + """Decoder layer block augmented with an additional cross-attention. + + This decoder block is processed with the sequence of the following sub-modules. + self-attention -> cross-attention (first) -> cross-attention (second) -> FFN + + Args: + cfg (argparse.Namespace): parsed command-line arguments + encoder_attn_merge_type (str, optional): the way to combine outputs from + two cross-attention modules. If "sequential" is set, two cross-attention + modules are stacked sequentially. If "parallel" is set, they are processed + in parallel and combined before feeding it to FFN (default: sequential). + dropnet_ratio (float, optional): a probability to drop each cross-attention + module during training (default: 0.0). + """ + + def __init__( + self, + cfg, + add_bias_kv=False, + add_zero_attn=False, + encoder_attn_merge_type="sequential", + dropnet_ratio=0.0, + ): + super().__init__( + cfg, + no_encoder_attn=False, + add_bias_kv=add_bias_kv, + add_zero_attn=False, + ) + self.encoder_attn = self.build_encoder_attention(self.embed_dim, cfg) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + self.encoder_attn2 = self.build_encoder_attention(self.embed_dim, cfg) + if encoder_attn_merge_type == "sequential": + self.encoder_attn_layer_norm2 = LayerNorm(self.embed_dim, export=cfg.export) + else: + self.encoder_attn_layer_norm2 = None + + self.encoder_attn_merge_type = encoder_attn_merge_type + self.dropnet_ratio = dropnet_ratio + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + encoder_out_aug: Optional[torch.Tensor] = None, + encoder_padding_mask2: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + assert encoder_out is not None + assert encoder_out_aug is not None + + if self.encoder_attn_merge_type == "sequential": + ratios = self.get_dropnet_ratio() + + # first encoder attention + if ratios[0] > 0: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + x = ratios[0] * x + + # second encoder attention + if ratios[1] > 0: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm2(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn2._set_input_buffer(incremental_state, saved_state) + + x, attn2 = self.encoder_attn2( + query=x, + key=encoder_out_aug, + value=encoder_out_aug, + key_padding_mask=encoder_padding_mask2, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm2(x) + x = ratios[1] * x + + elif self.encoder_attn_merge_type == "parallel": + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x1, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x2, attn2 = self.encoder_attn2( + query=x, + key=encoder_out_aug, + value=encoder_out_aug, + key_padding_mask=encoder_padding_mask2, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x1 = self.dropout_module(x1) + x2 = self.dropout_module(x2) + ratios = self.get_dropnet_ratio() + x = ratios[0] * x1 + ratios[1] * x2 + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + else: + raise NotImplementedError(self.encoder_attn_merge_type) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, attn2, self_attn_state + return x, attn, attn2, None + + def get_dropnet_ratio(self): + if self.encoder_attn_merge_type == "sequential": + if self.dropnet_ratio > 0: + frand = float(uniform(0, 1)) + if frand < self.dropnet_ratio and self.training: + return [2, 0] + elif frand > 1 - self.dropnet_ratio and self.training: + return [0, 2] + else: + return [1, 1] + else: + return [1, 1] + + elif self.encoder_attn_merge_type == "parallel": + if self.dropnet_ratio > 0: + frand = float(uniform(0, 1)) + if frand < self.dropnet_ratio and self.training: + return [1, 0] + elif frand > 1 - self.dropnet_ratio and self.training: + return [0, 1] + else: + return [0.5, 0.5] + else: + return [0.5, 0.5] diff --git a/fairseq/fairseq/modules/transformer_sentence_encoder.py b/fairseq/fairseq/modules/transformer_sentence_encoder.py new file mode 100644 index 0000000..5d2db91 --- /dev/null +++ b/fairseq/fairseq/modules/transformer_sentence_encoder.py @@ -0,0 +1,291 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Tuple + +import torch +import torch.nn as nn +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + MultiheadAttention, + PositionalEmbedding, + TransformerSentenceEncoderLayer, +) +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ + + +def init_bert_params(module): + """ + Initialize the weights specific to the BERT Model. + This overrides the default initializations depending on the specified arguments. + 1. If normal_init_linear_weights is set then weights of linear + layer will be initialized using the normal distribution and + bais will be set to the specified value. + 2. If normal_init_embed_weights is set then weights of embedding + layer will be initialized using the normal distribution. + 3. If normal_init_proj_weights is set then weights of + in_project_weight for MultiHeadAttention initialized using + the normal distribution (to be validated). + """ + + def normal_(data): + # with FSDP, module params will be on CUDA, so we cast them back to CPU + # so that the RNG is consistent with and without FSDP + data.copy_(data.cpu().normal_(mean=0.0, std=0.02).to(data.device)) + + if isinstance(module, nn.Linear): + normal_(module.weight.data) + if module.bias is not None: + module.bias.data.zero_() + if isinstance(module, nn.Embedding): + normal_(module.weight.data) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + if isinstance(module, MultiheadAttention): + normal_(module.q_proj.weight.data) + normal_(module.k_proj.weight.data) + normal_(module.v_proj.weight.data) + + +class TransformerSentenceEncoder(nn.Module): + """ + Implementation for a Bi-directional Transformer based Sentence Encoder used + in BERT/XLM style pre-trained models. + + This first computes the token embedding using the token embedding matrix, + position embeddings (if specified) and segment embeddings + (if specified). After applying the specified number of + TransformerEncoderLayers, it outputs all the internal states of the + encoder as well as the final representation associated with the first + token (usually CLS token). + + Input: + - tokens: B x T matrix representing sentences + - segment_labels: B x T matrix representing segment label for tokens + + Output: + - a tuple of the following: + - a list of internal model states used to compute the + predictions where each tensor has shape T x B x C + - sentence representation associated with first input token + in format B x C. + """ + + def __init__( + self, + padding_idx: int, + vocab_size: int, + num_encoder_layers: int = 6, + embedding_dim: int = 768, + ffn_embedding_dim: int = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + layerdrop: float = 0.0, + max_seq_len: int = 256, + num_segments: int = 2, + use_position_embeddings: bool = True, + offset_positions_by_padding: bool = True, + encoder_normalize_before: bool = False, + apply_bert_init: bool = False, + activation_fn: str = "relu", + learned_pos_embedding: bool = True, + embed_scale: float = None, + freeze_embeddings: bool = False, + n_trans_layers_to_freeze: int = 0, + export: bool = False, + traceable: bool = False, + q_noise: float = 0.0, + qn_block_size: int = 8, + ) -> None: + + super().__init__() + self.padding_idx = padding_idx + self.vocab_size = vocab_size + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.layerdrop = layerdrop + self.max_seq_len = max_seq_len + self.embedding_dim = embedding_dim + self.num_segments = num_segments + self.use_position_embeddings = use_position_embeddings + self.apply_bert_init = apply_bert_init + self.learned_pos_embedding = learned_pos_embedding + self.traceable = traceable + + self.embed_tokens = self.build_embedding( + self.vocab_size, self.embedding_dim, self.padding_idx + ) + self.embed_scale = embed_scale + + if q_noise > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(self.embedding_dim, self.embedding_dim, bias=False), + q_noise, + qn_block_size, + ) + else: + self.quant_noise = None + + self.segment_embeddings = ( + nn.Embedding(self.num_segments, self.embedding_dim, padding_idx=None) + if self.num_segments > 0 + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + self.max_seq_len, + self.embedding_dim, + padding_idx=(self.padding_idx if offset_positions_by_padding else None), + learned=self.learned_pos_embedding, + ) + if self.use_position_embeddings + else None + ) + + if encoder_normalize_before: + self.emb_layer_norm = LayerNorm(self.embedding_dim, export=export) + else: + self.emb_layer_norm = None + + if self.layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_transformer_sentence_encoder_layer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=ffn_embedding_dim, + num_attention_heads=num_attention_heads, + dropout=self.dropout_module.p, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + activation_fn=activation_fn, + export=export, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + for _ in range(num_encoder_layers) + ] + ) + + # Apply initialization of model params after building the model + if self.apply_bert_init: + self.apply(init_bert_params) + + def freeze_module_params(m): + if m is not None: + for p in m.parameters(): + p.requires_grad = False + + if freeze_embeddings: + freeze_module_params(self.embed_tokens) + freeze_module_params(self.segment_embeddings) + freeze_module_params(self.embed_positions) + freeze_module_params(self.emb_layer_norm) + + for layer in range(n_trans_layers_to_freeze): + freeze_module_params(self.layers[layer]) + + def build_embedding(self, vocab_size, embedding_dim, padding_idx): + return nn.Embedding(vocab_size, embedding_dim, padding_idx) + + def build_transformer_sentence_encoder_layer( + self, + embedding_dim, + ffn_embedding_dim, + num_attention_heads, + dropout, + attention_dropout, + activation_dropout, + activation_fn, + export, + q_noise, + qn_block_size, + ): + return TransformerSentenceEncoderLayer( + embedding_dim=embedding_dim, + ffn_embedding_dim=ffn_embedding_dim, + num_attention_heads=num_attention_heads, + dropout=dropout, + attention_dropout=attention_dropout, + activation_dropout=activation_dropout, + activation_fn=activation_fn, + export=export, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + + def forward( + self, + tokens: torch.Tensor, + segment_labels: torch.Tensor = None, + last_state_only: bool = False, + positions: Optional[torch.Tensor] = None, + token_embeddings: Optional[torch.Tensor] = None, + attn_mask: Optional[torch.Tensor] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + is_tpu = tokens.device.type == "xla" + + # compute padding mask. This is needed for multi-head attention + padding_mask = tokens.eq(self.padding_idx) + if not self.traceable and not is_tpu and not padding_mask.any(): + padding_mask = None + + if token_embeddings is not None: + x = token_embeddings + else: + x = self.embed_tokens(tokens) + + if self.embed_scale is not None: + x = x * self.embed_scale + + if self.embed_positions is not None: + x = x + self.embed_positions(tokens, positions=positions) + + if self.segment_embeddings is not None and segment_labels is not None: + x = x + self.segment_embeddings(segment_labels) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.emb_layer_norm is not None: + x = self.emb_layer_norm(x) + + x = self.dropout_module(x) + + # account for padding while computing the representation + if padding_mask is not None: + x = x * (1 - padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + inner_states = [] + if not last_state_only: + inner_states.append(x) + + for layer in self.layers: + x, _ = layer( + x, self_attn_padding_mask=padding_mask, self_attn_mask=attn_mask + ) + if not last_state_only: + inner_states.append(x) + + sentence_rep = x[0, :, :] + + if last_state_only: + inner_states = [x] + + if self.traceable: + return torch.stack(inner_states), sentence_rep + else: + return inner_states, sentence_rep diff --git a/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py b/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py new file mode 100644 index 0000000..f869c4b --- /dev/null +++ b/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py @@ -0,0 +1,139 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Callable, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.modules import LayerNorm, MultiheadAttention +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: int = 768, + ffn_embedding_dim: int = 3072, + num_attention_heads: int = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + export: bool = False, + q_noise: float = 0.0, + qn_block_size: int = 8, + init_fn: Callable = None, + ) -> None: + super().__init__() + + if init_fn is not None: + init_fn() + + # Initialize parameters + self.embedding_dim = embedding_dim + self.num_attention_heads = num_attention_heads + self.attention_dropout = attention_dropout + self.q_noise = q_noise + self.qn_block_size = qn_block_size + + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + self.activation_dropout_module = FairseqDropout( + activation_dropout, module_name=self.__class__.__name__ + ) + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = self.build_self_attention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim, export=export) + + self.fc1 = self.build_fc1( + self.embedding_dim, + ffn_embedding_dim, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + self.fc2 = self.build_fc2( + ffn_embedding_dim, + self.embedding_dim, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim, export=export) + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_self_attention( + self, + embed_dim, + num_attention_heads, + dropout, + self_attention, + q_noise, + qn_block_size, + ): + return MultiheadAttention( + embed_dim, + num_attention_heads, + dropout=dropout, + self_attention=True, + q_noise=q_noise, + qn_block_size=qn_block_size, + ) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer implementation. + """ + residual = x + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = self.dropout_module(x) + x = residual + x + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = residual + x + x = self.final_layer_norm(x) + return x, attn diff --git a/fairseq/fairseq/modules/transpose_last.py b/fairseq/fairseq/modules/transpose_last.py new file mode 100644 index 0000000..d7cca9a --- /dev/null +++ b/fairseq/fairseq/modules/transpose_last.py @@ -0,0 +1,21 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +transpose last 2 dimensions of the input +""" + +import torch.nn as nn + + +class TransposeLast(nn.Module): + def __init__(self, deconstruct_idx=None, tranpose_dim=-2): + super().__init__() + self.deconstruct_idx = deconstruct_idx + self.tranpose_dim = tranpose_dim + + def forward(self, x): + if self.deconstruct_idx is not None: + x = x[self.deconstruct_idx] + return x.transpose(self.tranpose_dim, -1) diff --git a/fairseq/fairseq/modules/unfold.py b/fairseq/fairseq/modules/unfold.py new file mode 100644 index 0000000..bbaafbd --- /dev/null +++ b/fairseq/fairseq/modules/unfold.py @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn.functional as F + + +def unfold1d(x, kernel_size: int, padding_l: int, pad_value: float = 0): + """unfold T x B x C to T x B x C x K""" + if kernel_size > 1: + T, B, C = x.size() + x = F.pad( + x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value + ) + x = x.as_strided((T, B, C, kernel_size), (B * C, C, 1, B * C)) + else: + x = x.unsqueeze(3) + return x diff --git a/fairseq/fairseq/modules/vggblock.py b/fairseq/fairseq/modules/vggblock.py new file mode 100644 index 0000000..ee5ee19 --- /dev/null +++ b/fairseq/fairseq/modules/vggblock.py @@ -0,0 +1,116 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +from collections.abc import Iterable +from itertools import repeat + +import torch +import torch.nn as nn + + +def _pair(v): + if isinstance(v, Iterable): + assert len(v) == 2, "len(v) != 2" + return v + return tuple(repeat(v, 2)) + + +def infer_conv_output_dim(conv_op, input_dim, sample_inchannel): + sample_seq_len = 200 + sample_bsz = 10 + x = torch.randn(sample_bsz, sample_inchannel, sample_seq_len, input_dim) + # N x C x H x W + # N: sample_bsz, C: sample_inchannel, H: sample_seq_len, W: input_dim + x = conv_op(x) + # N x C x H x W + x = x.transpose(1, 2) + # N x H x C x W + bsz, seq = x.size()[:2] + per_channel_dim = x.size()[3] + # bsz: N, seq: H, CxW the rest + return x.contiguous().view(bsz, seq, -1).size(-1), per_channel_dim + + +class VGGBlock(torch.nn.Module): + """ + VGG motibated cnn module https://arxiv.org/pdf/1409.1556.pdf + + Args: + in_channels: (int) number of input channels (typically 1) + out_channels: (int) number of output channels + conv_kernel_size: convolution channels + pooling_kernel_size: the size of the pooling window to take a max over + num_conv_layers: (int) number of convolution layers + input_dim: (int) input dimension + conv_stride: the stride of the convolving kernel. + Can be a single number or a tuple (sH, sW) Default: 1 + padding: implicit paddings on both sides of the input. + Can be a single number or a tuple (padH, padW). Default: None + layer_norm: (bool) if layer norm is going to be applied. Default: False + + Shape: + Input: BxCxTxfeat, i.e. (batch_size, input_size, timesteps, features) + Output: BxCxTxfeat, i.e. (batch_size, input_size, timesteps, features) + """ + + def __init__( + self, + in_channels, + out_channels, + conv_kernel_size, + pooling_kernel_size, + num_conv_layers, + input_dim, + conv_stride=1, + padding=None, + layer_norm=False, + ): + assert ( + input_dim is not None + ), "Need input_dim for LayerNorm and infer_conv_output_dim" + super(VGGBlock, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.conv_kernel_size = _pair(conv_kernel_size) + self.pooling_kernel_size = _pair(pooling_kernel_size) + self.num_conv_layers = num_conv_layers + self.padding = ( + tuple(e // 2 for e in self.conv_kernel_size) + if padding is None + else _pair(padding) + ) + self.conv_stride = _pair(conv_stride) + + self.layers = nn.ModuleList() + for layer in range(num_conv_layers): + conv_op = nn.Conv2d( + in_channels if layer == 0 else out_channels, + out_channels, + self.conv_kernel_size, + stride=self.conv_stride, + padding=self.padding, + ) + self.layers.append(conv_op) + if layer_norm: + conv_output_dim, per_channel_dim = infer_conv_output_dim( + conv_op, input_dim, in_channels if layer == 0 else out_channels + ) + self.layers.append(nn.LayerNorm(per_channel_dim)) + input_dim = per_channel_dim + self.layers.append(nn.ReLU()) + + if self.pooling_kernel_size is not None: + pool_op = nn.MaxPool2d(kernel_size=self.pooling_kernel_size, ceil_mode=True) + self.layers.append(pool_op) + self.total_output_dim, self.output_dim = infer_conv_output_dim( + pool_op, input_dim, out_channels + ) + + def forward(self, x): + for i, _ in enumerate(self.layers): + x = self.layers[i](x) + return x diff --git a/fairseq/fairseq/nan_detector.py b/fairseq/fairseq/nan_detector.py new file mode 100644 index 0000000..bd0f911 --- /dev/null +++ b/fairseq/fairseq/nan_detector.py @@ -0,0 +1,108 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch + + +logger = logging.getLogger(__name__) + + +class NanDetector: + """ + Detects the first NaN or Inf in forward and/or backward pass and logs, together with the module name + """ + + def __init__(self, model, forward=True, backward=True): + self.bhooks = [] + self.fhooks = [] + self.forward = forward + self.backward = backward + self.named_parameters = list(model.named_parameters()) + self.reset() + + for name, mod in model.named_modules(): + mod.__module_name = name + self.add_hooks(mod) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + # Dump out all model gnorms to enable better debugging + norm = {} + gradients = {} + for name, param in self.named_parameters: + if param.grad is not None: + grad_norm = torch.norm(param.grad.data.float(), p=2) + norm[name] = param.norm().item() + if torch.isnan(grad_norm).any() or torch.isinf(grad_norm).any(): + gradients[name] = param.grad.data + if len(gradients) > 0: + logger.info("Detected nan/inf grad norm, dumping norms...") + logger.info(f"norms: {norm}") + logger.info(f"gradients: {gradients}") + + self.close() + + def add_hooks(self, module): + if self.forward: + self.fhooks.append(module.register_forward_hook(self.fhook_fn)) + if self.backward: + self.bhooks.append(module.register_backward_hook(self.bhook_fn)) + + def reset(self): + self.has_printed_f = False + self.has_printed_b = False + + def _detect(self, tensor, name, backward): + err = None + if ( + torch.is_floating_point(tensor) + # single value tensors (like the loss) will not provide much info + and tensor.numel() >= 2 + ): + with torch.no_grad(): + if torch.isnan(tensor).any(): + err = "NaN" + elif torch.isinf(tensor).any(): + err = "Inf" + if err is not None: + err = f"{err} detected in output of {name}, shape: {tensor.shape}, {'backward' if backward else 'forward'}" + return err + + def _apply(self, module, inp, x, backward): + if torch.is_tensor(x): + if isinstance(inp, tuple) and len(inp) > 0: + inp = inp[0] + err = self._detect(x, module.__module_name, backward) + if err is not None: + if torch.is_tensor(inp) and not backward: + err += ( + f" input max: {inp.max().item()}, input min: {inp.min().item()}" + ) + + has_printed_attr = "has_printed_b" if backward else "has_printed_f" + logger.warning(err) + setattr(self, has_printed_attr, True) + elif isinstance(x, dict): + for v in x.values(): + self._apply(module, inp, v, backward) + elif isinstance(x, list) or isinstance(x, tuple): + for v in x: + self._apply(module, inp, v, backward) + + def fhook_fn(self, module, inp, output): + if not self.has_printed_f: + self._apply(module, inp, output, backward=False) + + def bhook_fn(self, module, inp, output): + if not self.has_printed_b: + self._apply(module, inp, output, backward=True) + + def close(self): + for hook in self.fhooks + self.bhooks: + hook.remove() diff --git a/fairseq/fairseq/ngram_repeat_block.py b/fairseq/fairseq/ngram_repeat_block.py new file mode 100644 index 0000000..4eb5030 --- /dev/null +++ b/fairseq/fairseq/ngram_repeat_block.py @@ -0,0 +1,120 @@ +# Originally from Microsoft Corporation. +# Licensed under the MIT License. + +""" Wrapper for ngram_repeat_block cuda extension """ +import math +import warnings +from typing import List + +import torch +from torch import nn + +try: + from fairseq import ngram_repeat_block_cuda + + EXTENSION_BUILT = True +except ImportError: + EXTENSION_BUILT = False + + +def is_cuda_extension_usable() -> bool: + """Check whether ngram_repeat_block_cuda is built properly""" + if not EXTENSION_BUILT or not torch.cuda.is_available(): + return False + bsz = 2 + tokens = torch.tensor([[4, 4, 3, 2], [1, 2, 3, 4]], dtype=torch.long, device="cuda") + lprobs = torch.rand((8, 12), device="cuda") + try: + outputs = ngram_repeat_block_cuda.forward(tokens, lprobs, bsz, 3, 4, 3) + outputs = outputs + 4 # This line breaks if the extension is built incorrectly. + return True + except RuntimeError: + warnings.warn( + "NGramRepeatBlock extension must be rebuilt." + 'Run TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0" python setup.py build_ext --inplace' + ) + return False + + +class NGramRepeatBlock(nn.Module): + """Wrapper class for calling ngram_repeat_block cuda extension""" + + def __init__(self, no_repeat_ngram_size: int, use_extension: bool = True): + super().__init__() + self.use_extension = is_cuda_extension_usable() if use_extension else False + self.no_repeat_ngram_size = no_repeat_ngram_size + + def reset_parameters(self): + pass + + @torch.jit.unused + def call_cuda_extension( + self, + tokens, + lprobs, + bsz: int, + beam_size: int, + step: int, + ): + return ngram_repeat_block_cuda.forward( + tokens, lprobs, bsz, step, beam_size, self.no_repeat_ngram_size + ) + + def forward( + self, + tokens, + lprobs, + bsz: int, + beam_size: int, + step: int, + ): + """ + Args: + tokens(Tensor): Input tokens(Bsz*beam, seq_len) + lprobs(Tensor): likelihood probability, + Expected to be updated in place.(Bsz*beam, vocab_size) + bsz(int): batch size + step(int): current step + beam_size(int): beam size + no_repeat_ngram_size(int): Ngram size + """ + msg = f"expected {bsz *beam_size} got" + assert tokens.size(0) == bsz * beam_size, f"{msg} {tokens.size(0)}" + assert lprobs.size(0) == bsz * beam_size, f"{msg} {lprobs.size(0)}" + if self.use_extension: + return self.call_cuda_extension(tokens, lprobs, bsz, beam_size, step) + + else: + return self._no_repeat_ngram( + tokens, + lprobs, + bsz, + beam_size, + step, + ) + + def _no_repeat_ngram(self, tokens, lprobs, bsz: int, beam_size: int, step: int): + """For each hypothesis generate a list of previous ngrams and set associated lprobs to -inf""" + banned_tokens = [ + torch.jit.annotate(List[int], []) for bbsz_idx in range(bsz * beam_size) + ] + if step + 2 - self.no_repeat_ngram_size >= 0: + cpu_tokens: List[List[int]] = tokens.cpu().tolist() + check_start_pos = step + 2 - self.no_repeat_ngram_size + for bbsz_idx in range(bsz * beam_size): + ngram_to_check = cpu_tokens[bbsz_idx][ + -(self.no_repeat_ngram_size - 1) : + ] + for i in range(check_start_pos): + if ( + ngram_to_check + == cpu_tokens[bbsz_idx][i : i + self.no_repeat_ngram_size - 1] + ): + banned_tokens[bbsz_idx].append( + cpu_tokens[bbsz_idx][i + self.no_repeat_ngram_size - 1] + ) + for bbsz_idx in range(bsz * beam_size): + lprobs[bbsz_idx][ + torch.tensor(banned_tokens[bbsz_idx], dtype=torch.int64) + ] = torch.tensor(-math.inf).to(lprobs) + return lprobs diff --git a/fairseq/fairseq/optim/__init__.py b/fairseq/fairseq/optim/__init__.py new file mode 100644 index 0000000..be783be --- /dev/null +++ b/fairseq/fairseq/optim/__init__.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import importlib +import os + +from fairseq import registry +from fairseq.optim.bmuf import FairseqBMUF # noqa +from fairseq.optim.fairseq_optimizer import ( # noqa + FairseqOptimizer, + LegacyFairseqOptimizer, +) +from fairseq.optim.amp_optimizer import AMPOptimizer +from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer +from fairseq.optim.shard import shard_ +from omegaconf import DictConfig + +__all__ = [ + "AMPOptimizer", + "FairseqOptimizer", + "FP16Optimizer", + "MemoryEfficientFP16Optimizer", + "shard_", +] + +( + _build_optimizer, + register_optimizer, + OPTIMIZER_REGISTRY, + OPTIMIZER_DATACLASS_REGISTRY, +) = registry.setup_registry("--optimizer", base_class=FairseqOptimizer, required=True) + + +def build_optimizer(cfg: DictConfig, params, *extra_args, **extra_kwargs): + if all(isinstance(p, dict) for p in params): + params = [t for p in params for t in p.values()] + params = list(filter(lambda p: p.requires_grad, params)) + return _build_optimizer(cfg, params, *extra_args, **extra_kwargs) + + +# automatically import any Python files in the optim/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + file_name = file[: file.find(".py")] + importlib.import_module("fairseq.optim." + file_name) diff --git a/fairseq/fairseq/optim/adadelta.py b/fairseq/fairseq/optim/adadelta.py new file mode 100644 index 0000000..f1a2154 --- /dev/null +++ b/fairseq/fairseq/optim/adadelta.py @@ -0,0 +1,47 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.optim + +from . import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("adadelta") +class Adadelta(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = torch.optim.Adadelta(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--adadelta-rho', type=float, default=0.9, metavar='RHO', + help='coefficient used for computing a running average of squared gradients') + parser.add_argument('--adadelta-eps', type=float, default=1e-6, metavar='EPS', + help='term added to the denominator to improve numerical stability') + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + parser.add_argument('--anneal-eps', action='store_true', help='flag to anneal eps') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "rho": self.args.adadelta_rho, + "eps": self.args.adadelta_eps, + "weight_decay": self.args.weight_decay, + } + + @property + def supports_flat_params(self): + return True diff --git a/fairseq/fairseq/optim/adafactor.py b/fairseq/fairseq/optim/adafactor.py new file mode 100644 index 0000000..042ae92 --- /dev/null +++ b/fairseq/fairseq/optim/adafactor.py @@ -0,0 +1,268 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import torch +import torch.optim + +from . import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("adafactor") +class FairseqAdafactor(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = Adafactor(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--adafactor-eps', default='(1e-30, 1e-3)', metavar="E", + help='epsilons for Adafactor optimizer') + parser.add_argument('--clip-threshold', type=float, default=1.0, metavar="C", + help='threshold for clipping update root mean square') + parser.add_argument('--decay-rate', type=float, default=-0.8, metavar="D", + help='decay rate of the second moment estimator') + parser.add_argument('--beta1', type=float, default=None, metavar="B", + help='beta for first moment estimator. Optional') + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + parser.add_argument('--scale-parameter', action='store_true', + help='scale learning rate by root mean square of parameter') + parser.add_argument('--relative-step', action='store_true', + help='set learning rate to inverse square root of timestep,' + 'otherwise use external learning rate') + parser.add_argument('--warmup-init', action='store_true', + help='use relative step for warm-up learning rate schedule') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + Note : Convergence issues empirically observed with fp16 on. + Might require search for appropriate configuration. + """ + return { + "lr": self.args.lr[0], + "eps": eval(self.args.adafactor_eps), + "clip_threshold": self.args.clip_threshold, + "decay_rate": self.args.decay_rate, + "beta1": self.args.beta1, + "weight_decay": self.args.weight_decay, + "scale_parameter": self.args.scale_parameter, # defaults to False + "relative_step": self.args.relative_step, # defaults to False + "warmup_init": self.args.warmup_init, + } + + +class Adafactor(torch.optim.Optimizer): + """Implements Adafactor algorithm. + + This implementation is based on: + `Adafactor: Adaptive Learning Rates with Sublinear Memory Cost` + (see https://arxiv.org/abs/1804.04235) + + Note that this optimizer internally adjusts the learning rate + depending on the *scale_parameter*, *relative_step* and + *warmup_init* options. To use a manual (external) learning rate + schedule you should set `scale_parameter=False` and + `relative_step=False`. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): external learning rate (default: None) + eps (tuple[float, float]): regularization constans for square gradient + and parameter scale respectively (default: (1e-30, 1e-3)) + clip_threshold (float): threshold of root mean square of + final gradient update (default: 1.0) + decay_rate (float): coefficient used to compute running averages of square + gradient (default: -0.8) + beta1 (float): coefficient used for computing running averages of gradient + (default: None) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + scale_parameter (bool): if True, learning rate is scaled by root mean square of + parameter (default: True) + relative_step (bool): if True, time-dependent learning rate is computed + instead of external learning rate (default: True) + warmup_init (bool): time-dependent learning rate computation depends on + whether warm-up initialization is being used (default: False) + """ + + def __init__( + self, + params, + lr=None, + eps=(1e-30, 1e-3), + clip_threshold=1.0, + decay_rate=-0.8, + beta1=None, + weight_decay=0.0, + scale_parameter=True, + relative_step=True, + warmup_init=False, + ): + if lr is not None and relative_step: + raise ValueError("Cannot combine manual lr and relative_step options") + if warmup_init and not relative_step: + raise ValueError("warmup_init requires relative_step=True") + + defaults = dict( + lr=lr, + eps=eps, + clip_threshold=clip_threshold, + decay_rate=decay_rate, + beta1=beta1, + weight_decay=weight_decay, + scale_parameter=scale_parameter, + relative_step=relative_step, + warmup_init=warmup_init, + ) + super(Adafactor, self).__init__(params, defaults) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return False + + def _get_lr(self, param_group, param_state): + rel_step_sz = param_group["lr"] + if param_group["relative_step"]: + min_step = ( + 1e-6 * param_state["step"] if param_group["warmup_init"] else 1e-2 + ) + rel_step_sz = min(min_step, 1.0 / math.sqrt(param_state["step"])) + param_scale = 1.0 + if param_group["scale_parameter"]: + param_scale = max(param_group["eps"][1], param_state["RMS"]) + return param_scale * rel_step_sz + + def _get_options(self, param_group, param_shape): + factored = len(param_shape) >= 2 + use_first_moment = param_group["beta1"] is not None + return factored, use_first_moment + + def _rms(self, tensor): + return tensor.norm(2) / (tensor.numel() ** 0.5) + + def _approx_sq_grad(self, exp_avg_sq_row, exp_avg_sq_col): + r_factor = ( + (exp_avg_sq_row / exp_avg_sq_row.mean(dim=-1, keepdim=True)) + .rsqrt_() + .unsqueeze(-1) + ) + c_factor = exp_avg_sq_col.unsqueeze(-2).rsqrt() + return torch.mul(r_factor, c_factor) + + def step(self, closure=None): + """Performs a single optimization step. + + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data + if grad.dtype in {torch.float16, torch.bfloat16}: + grad = grad.float() + if grad.is_sparse: + raise RuntimeError("Adafactor does not support sparse gradients.") + + state = self.state[p] + grad_shape = grad.shape + + factored, use_first_moment = self._get_options(group, grad_shape) + # State Initialization + if len(state) == 0: + state["step"] = 0 + + if use_first_moment: + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(grad) + if factored: + state["exp_avg_sq_row"] = torch.zeros(grad_shape[:-1]).to(grad) + state["exp_avg_sq_col"] = torch.zeros( + grad_shape[:-2] + grad_shape[-1:] + ).to(grad) + else: + state["exp_avg_sq"] = torch.zeros_like(grad) + + state["RMS"] = 0 + else: + if use_first_moment: + state["exp_avg"] = state["exp_avg"].to(grad) + if factored: + state["exp_avg_sq_row"] = state["exp_avg_sq_row"].to(grad) + state["exp_avg_sq_col"] = state["exp_avg_sq_col"].to(grad) + else: + state["exp_avg_sq"] = state["exp_avg_sq"].to(grad) + + p_data_fp32 = p.data + if p.data.dtype in {torch.float16, torch.bfloat16}: + p_data_fp32 = p_data_fp32.float() + + state["step"] += 1 + state["RMS"] = self._rms(p_data_fp32) + group["lr"] = self._get_lr(group, state) + + beta2t = 1.0 - math.pow(state["step"], group["decay_rate"]) + update = (grad**2) + group["eps"][0] + if factored: + exp_avg_sq_row = state["exp_avg_sq_row"] + exp_avg_sq_col = state["exp_avg_sq_col"] + + exp_avg_sq_row.mul_(beta2t).add_( + update.mean(dim=-1), alpha=1.0 - beta2t + ) + exp_avg_sq_col.mul_(beta2t).add_( + update.mean(dim=-2), alpha=1.0 - beta2t + ) + + # Approximation of exponential moving average of square of gradient + update = self._approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col) + update.mul_(grad) + else: + exp_avg_sq = state["exp_avg_sq"] + + exp_avg_sq.mul_(beta2t).add_(update, alpha=1.0 - beta2t) + update = exp_avg_sq.rsqrt().mul_(grad) + + update.div_( + (self._rms(update) / group["clip_threshold"]).clamp_(min=1.0) + ) + update.mul_(group["lr"]) + + if use_first_moment: + exp_avg = state["exp_avg"] + exp_avg.mul_(group["beta1"]).add_(update, alpha=1 - group["beta1"]) + update = exp_avg + + if group["weight_decay"] != 0: + p_data_fp32.add_( + p_data_fp32, alpha=-group["weight_decay"] * group["lr"] + ) + + p_data_fp32.add_(-update) + + if p.data.dtype in {torch.float16, torch.bfloat16}: + p.data.copy_(p_data_fp32) + + return loss diff --git a/fairseq/fairseq/optim/adagrad.py b/fairseq/fairseq/optim/adagrad.py new file mode 100644 index 0000000..4f53954 --- /dev/null +++ b/fairseq/fairseq/optim/adagrad.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.optim + +from . import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("adagrad") +class Adagrad(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "weight_decay": self.args.weight_decay, + } + + @property + def supports_flat_params(self): + return False diff --git a/fairseq/fairseq/optim/adam.py b/fairseq/fairseq/optim/adam.py new file mode 100644 index 0000000..678ec7c --- /dev/null +++ b/fairseq/fairseq/optim/adam.py @@ -0,0 +1,239 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import Any, List + +import torch +import torch.distributed as dist +import torch.optim +from fairseq.dataclass import FairseqDataclass +from fairseq.optim import FairseqOptimizer, register_optimizer +from fairseq.optim.fused_adam import get_fused_adam_class +from omegaconf import II, OmegaConf + + +logger = logging.getLogger(__name__) + + +@dataclass +class FairseqAdamConfig(FairseqDataclass): + adam_betas: Any = field( + default=(0.9, 0.999), metadata={"help": "betas for Adam optimizer"} + ) + adam_eps: float = field( + default=1e-8, metadata={"help": "epsilon for Adam optimizer"} + ) + weight_decay: float = field(default=0.0, metadata={"help": "weight decay"}) + use_old_adam: bool = field( + default=False, metadata={"help": "Use fairseq.optim.adam.Adam"} + ) + fp16_adam_stats: bool = field( + default=False, metadata={"help": "use FP16 stats (with automatic scaling)"} + ) + # TODO common vars below in parent + tpu: bool = II("common.tpu") + lr: List[float] = II("optimization.lr") + + +@register_optimizer("adam", dataclass=FairseqAdamConfig) +class FairseqAdam(FairseqOptimizer): + """Adam optimizer for fairseq. + + Important note: this optimizer corresponds to the "AdamW" variant of + Adam in its weight decay behavior. As such, it is most closely + analogous to torch.optim.AdamW from PyTorch. + """ + + def __init__(self, cfg: FairseqAdamConfig, params): + super().__init__(cfg) + fused_adam_cls = get_fused_adam_class() + use_fused_adam = ( + not getattr(cfg, "use_old_adam", False) + and fused_adam_cls is not None + and torch.cuda.is_available() + ) + if getattr(cfg, "tpu", False): + if self.cfg.fp16_adam_stats: + raise NotImplementedError("--fp16-adam-stats is only supported on GPU") + # on TPUs we use the Adam defined here, since it + # automatically casts gradients to FP32 + self._optimizer = Adam(params, **self.optimizer_config) + elif use_fused_adam: + logger.info("using FusedAdam") + self._optimizer = fused_adam_cls( + params, use_fp16_stats=self.cfg.fp16_adam_stats, **self.optimizer_config + ) + else: + if self.cfg.fp16_adam_stats: + raise NotImplementedError( + "--fp16-adam-stats is only supported with FusedAdamV1" + ) + self._optimizer = Adam(params, **self.optimizer_config) + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.cfg.lr[0] + if isinstance(self.cfg.lr, Collection) + else self.cfg.lr, + "betas": eval(self.cfg.adam_betas) + if isinstance(self.cfg.adam_betas, str) + else OmegaConf.to_container(self.cfg.adam_betas), + "eps": self.cfg.adam_eps, + "weight_decay": self.cfg.weight_decay, + } + + def average_params(self): + """Reduce Params is only used during BMUF distributed training.""" + state_dict = self.optimizer.state_dict() + total_gpus = float(dist.get_world_size()) + + for _, value in state_dict["state"].items(): + value["exp_avg"] /= total_gpus + value["exp_avg_sq"] /= total_gpus + dist.all_reduce(value["exp_avg"], op=dist.ReduceOp.SUM) + dist.all_reduce(value["exp_avg_sq"], op=dist.ReduceOp.SUM) + + +class Adam(torch.optim.Optimizer): + r"""Implements Adam algorithm. + + This implementation is modified from torch.optim.Adam based on: + `Fixed Weight Decay Regularization in Adam` + (see https://arxiv.org/abs/1711.05101) + + It has been proposed in `Adam: A Method for Stochastic Optimization`_. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + amsgrad (boolean, optional): whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__( + self, + params, + lr=1e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + amsgrad=False, + ): + defaults = dict( + lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad + ) + super(Adam, self).__init__(params, defaults) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + def step(self, closure=None): + """Performs a single optimization step. + + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data + if grad.dtype in {torch.float16, torch.bfloat16}: + grad = grad.float() + if grad.is_sparse: + raise RuntimeError( + "Adam does not support sparse gradients, please consider SparseAdam instead" + ) + amsgrad = group.get("amsgrad", False) + + p_data_fp32 = p.data + if p.data.dtype in {torch.float16, torch.bfloat16}: + p_data_fp32 = p_data_fp32.float() + + state = self.state[p] + + # State initialization + if len(state) == 0: + state["step"] = 0 + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(p_data_fp32) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state["max_exp_avg_sq"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].to(p_data_fp32) + state["exp_avg_sq"] = state["exp_avg_sq"].to(p_data_fp32) + if amsgrad: + state["max_exp_avg_sq"] = state["max_exp_avg_sq"].to( + p_data_fp32 + ) + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + if amsgrad: + max_exp_avg_sq = state["max_exp_avg_sq"] + beta1, beta2 = group["betas"] + + state["step"] += 1 + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + if amsgrad: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(group["eps"]) + else: + denom = exp_avg_sq.sqrt().add_(group["eps"]) + + bias_correction1 = 1 - beta1 ** state["step"] + bias_correction2 = 1 - beta2 ** state["step"] + step_size = group["lr"] * math.sqrt(bias_correction2) / bias_correction1 + + if group["weight_decay"] != 0: + p_data_fp32.add_( + p_data_fp32, alpha=-group["weight_decay"] * group["lr"] + ) + + p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size) + + if p.data.dtype in {torch.float16, torch.bfloat16}: + p.data.copy_(p_data_fp32) + + return loss diff --git a/fairseq/fairseq/optim/adamax.py b/fairseq/fairseq/optim/adamax.py new file mode 100644 index 0000000..98ff8ad --- /dev/null +++ b/fairseq/fairseq/optim/adamax.py @@ -0,0 +1,172 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.optim + +from . import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("adamax") +class FairseqAdamax(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = Adamax(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--adamax-betas', default='(0.9, 0.999)', metavar='B', + help='betas for Adam optimizer') + parser.add_argument('--adamax-eps', type=float, default=1e-8, metavar='D', + help='epsilon for Adam optimizer') + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + parser.add_argument('--no-bias-correction', default=False, action='store_true', + help='disable bias correction') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "betas": eval(self.args.adamax_betas), + "eps": self.args.adamax_eps, + "weight_decay": self.args.weight_decay, + "bias_correction": not self.args.no_bias_correction, + } + + +class Adamax(torch.optim.Optimizer): + """Implements Adamax algorithm (a variant of Adam based on infinity norm). + + It has been proposed in `Adam: A Method for Stochastic Optimization`__. + + Compared to the version in PyTorch, this version implements a fix for weight decay. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 2e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + bias_correction (bool, optional): enable bias correction (default: True) + + __ https://arxiv.org/abs/1412.6980 + """ + + def __init__( + self, + params, + lr=2e-3, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + bias_correction=True, + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + + defaults = dict( + lr=lr, + betas=betas, + eps=eps, + weight_decay=weight_decay, + bias_correction=bias_correction, + ) + super(Adamax, self).__init__(params, defaults) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + def step(self, closure=None): + """Performs a single optimization step. + + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: + continue + grad = p.grad.data.float() + if grad.is_sparse: + raise RuntimeError("Adamax does not support sparse gradients") + + p_data_fp32 = p.data + if p.data.dtype in {torch.float16, torch.bfloat16}: + p_data_fp32 = p_data_fp32.float() + + state = self.state[p] + + # State initialization + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(p_data_fp32) + state["exp_inf"] = torch.zeros_like(p_data_fp32) + else: + state["exp_avg"] = state["exp_avg"].to(p_data_fp32) + state["exp_inf"] = state["exp_inf"].to(p_data_fp32) + + exp_avg, exp_inf = state["exp_avg"], state["exp_inf"] + beta1, beta2 = group["betas"] + eps = group["eps"] + + state["step"] += 1 + + # Update biased first moment estimate. + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + + # Update the exponentially weighted infinity norm. + torch.max( + exp_inf.mul_(beta2), + grad.abs_(), + out=exp_inf, + ) + + step_size = group["lr"] + if group["bias_correction"]: + bias_correction = 1 - beta1 ** state["step"] + step_size /= bias_correction + + if group["weight_decay"] != 0: + p_data_fp32.add_( + p_data_fp32, alpha=-group["weight_decay"] * group["lr"] + ) + + p_data_fp32.addcdiv_(exp_avg, exp_inf.add(eps), value=-step_size) + + if p.data.dtype in {torch.float16, torch.bfloat16}: + p.data.copy_(p_data_fp32) + + return loss diff --git a/fairseq/fairseq/optim/amp_optimizer.py b/fairseq/fairseq/optim/amp_optimizer.py new file mode 100644 index 0000000..cfe57d0 --- /dev/null +++ b/fairseq/fairseq/optim/amp_optimizer.py @@ -0,0 +1,106 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import torch +from fairseq import optim +from omegaconf import DictConfig + +logger = logging.getLogger(__name__) + + +class AMPOptimizer(optim.FairseqOptimizer): + """ + Wrap an *optimizer* to support AMP (automatic mixed precision) training. + """ + + def __init__(self, cfg: DictConfig, params, fp32_optimizer, **kwargs): + super().__init__(cfg.optimizer) + self.fp32_optimizer = fp32_optimizer + amp_kwargs = {"init_scale": cfg.common.fp16_init_scale} + if getattr(cfg.common, "amp_scale_window", None) is not None: + amp_kwargs["growth_interval"] = cfg.common.amp_init_scale + self._grad_scaler = torch.cuda.amp.GradScaler(**amp_kwargs) + self.min_loss_scale = cfg.common.min_loss_scale + + @classmethod + def build_optimizer(cls, cfg: DictConfig, params, **kwargs): + """ + Args: + cfg (omegaconf.DictConfig): fairseq args + params (iterable): iterable of parameters to optimize + """ + fp32_optimizer = optim.build_optimizer(cfg.optimizer, params) + return cls(cfg, params, fp32_optimizer, **kwargs) + + def backward(self, loss): + """Computes the sum of gradients of the given tensor w.r.t. graph leaves. + + Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this + function additionally dynamically scales the loss to avoid gradient + underflow. + """ + self._grad_scaler.scale(loss).backward() + + def step(self): + self.scaler.step(self.fp32_optimizer) + self.scaler.update() + + def clip_grad_norm(self, max_norm, aggregate_norm_fn=None): + """Clips gradient norm.""" + self.scaler.unscale_(self.optimizer) + grad_norm = self.fp32_optimizer.clip_grad_norm(max_norm, aggregate_norm_fn) + if not torch.isfinite(grad_norm).all(): + new_loss_scale = self.next_loss_scale + if new_loss_scale <= self.min_loss_scale: + raise FloatingPointError( + ( + "AMP: Minimum loss scale reached ({}). Your loss is probably exploding. " + "Try restarting training or use fp32. {}" + ).format(self.min_loss_scale, new_loss_scale) + ) + else: + logger.info( + "AMP: overflow detected, setting scale to " f"to {new_loss_scale}" + ) + return grad_norm + + @property + def scaler(self): + return self._grad_scaler + + @property + def next_loss_scale(self): + return self.scaler.get_scale() * self.scaler.get_backoff_factor() + + @property + def optimizer(self): + return self.fp32_optimizer.optimizer + + @optimizer.setter + def optimizer(self, optimizer): + self.fp32_optimizer.optimizer = optimizer + + @property + def lr_scheduler(self): + return getattr(self.fp32_optimizer, "lr_scheduler", None) + + @property + def optimizer_config(self): + return self.fp32_optimizer.optimizer_config + + def get_lr(self): + return self.fp32_optimizer.get_lr() + + def set_lr(self, lr): + self.fp32_optimizer.set_lr(lr) + + def all_reduce_grads(self, module): + self.fp32_optimizer.all_reduce_grads(module) + + @property + def supports_flat_params(self): + return self.fp32_optimizer.supports_flat_params diff --git a/fairseq/fairseq/optim/bmuf.py b/fairseq/fairseq/optim/bmuf.py new file mode 100644 index 0000000..d6d0e04 --- /dev/null +++ b/fairseq/fairseq/optim/bmuf.py @@ -0,0 +1,200 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field + +import torch +import torch.distributed as dist +from fairseq.dataclass.configs import FairseqBMUFConfig +from fairseq.dataclass.utils import gen_parser_from_dataclass +from fairseq.optim.fairseq_optimizer import FairseqOptimizer + + +class FairseqBMUF(FairseqOptimizer): + """ + Implements incremental block distributed data parallelism similar to + https://ieeexplore.ieee.org/document/7472805 + + Paper title: Scalable training of deep learning machines by incremental + block training with intra-block parallel optimization and blockwise + model-update filtering + """ + + def __init__(self, cfg: FairseqBMUFConfig, optimizer): + super().__init__(cfg) + self._optimizer = optimizer + self._num_updates = 0 + self.sync_iter = cfg.global_sync_iter + self.block_momentum = cfg.block_momentum + self.block_lr = cfg.block_lr + self._reset_local_data() + self.warmup_iteration = cfg.warmup_iterations + self.use_nbm = cfg.use_nbm + self.initial_state = self._optimizer.state_dict() + self.average_sync = self.cfg.average_sync + self.world_size = self.cfg.distributed_world_size + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + gen_parser_from_dataclass(parser, FairseqBMUFConfig()) + + @property + def optimizer(self): + return self._optimizer.optimizer + + @property + def optimizer_config(self): + return self._optimizer.optimizer_config + + def get_lr(self): + return self._optimizer.get_lr() + + def set_lr(self, lr): + self._optimizer.set_lr(lr) + + def state_dict(self): + return self._optimizer.state_dict() + + def load_state_dict(self, state_dict, optimizer_overrides=None): + self._optimizer.load_state_dict(state_dict, optimizer_overrides) + self.initial_state = self._optimizer.state_dict() + + def multiply_grads(self, c): + """Multiplies grads by a constant *c*.""" + self._optimizer.multiply_grads(c) + + def clip_grad_norm(self, max_norm, aggregate_norm_fn=None): + """Clips gradient norm.""" + return self._optimizer.clip_grad_norm(max_norm, aggregate_norm_fn) + + def average_params(self): + self._optimizer.average_params() + + def _block_sync(self): + if self.world_size <= 1: + return + # Update the global model using local models from all GPUs + # (Step-1) Calculate grad between previously synced model and + # currrent local model + if self.block_momentum != 0: + self._calc_grad() + + # (Step-2) Average gradient from all GPUs + self._avg_grad_from_all_gpus() + + # (Step-3) Calculate global momentum and update the global model + if self.block_momentum != 0: + self._update_global_model() + + # (Step-4) Average local optimizer params + if self.average_sync: + self.average_params() + + def _is_warmup_end(self): + # Check whether train iterations is equal to warmup iter + if self.get_num_updates() == self.warmup_iteration: + return True + return False + + def _is_bmuf_iter(self): + # Check whether train iterations is equal to bmuf sync iter + if (self.get_num_updates() > self.warmup_iteration) and ( + self.get_num_updates() % self.sync_iter == 0 + ): + return True + return False + + def _warmup_sync(self, root_rank=0): + if self.world_size <= 1: + return + # Broadcast the local model to all gpus + for param in self.params: + dist.broadcast(param.data, src=root_rank) + + # Update local optimizer state + if self.average_sync: + self._optimizer.average_params() + else: + self._optimizer.load_state_dict(self.initial_state) + + self._reset_local_data() + + def step(self, closure=None): + """Performs a single optimization step.""" + self._optimizer.step(closure) + self.set_num_updates(self.get_num_updates() + 1) + if self._is_warmup_end(): + self._warmup_sync() + elif self._is_bmuf_iter(): + self._block_sync() + + def zero_grad(self): + """Clears the gradients of all optimized parameters.""" + self._optimizer.zero_grad() + + def get_num_updates(self): + """Get the number of parameters updates.""" + return self._num_updates + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self._num_updates = num_updates + + @torch.no_grad() + def _reset_local_data(self): + # (Step-0) Initialize global momentum parameters and store global copy on each gpu + self.global_params = [torch.zeros_like(p.data) for p in self.params] + self.smoothed_grads = [p.data.new_zeros(p.data.size()) for p in self.params] + self.grads = [p.data.new_zeros(p.data.size()) for p in self.params] + + # saving the global model locally for calculating gradient during bmuf sync + for param, global_param in zip(self.params, self.global_params): + global_param.copy_(param.data) + + @torch.no_grad() + def _calc_grad(self): + # global_params is basically the global copy from the previously finished + # synchronisation. param.data is local parameter after block_sync_freq + # for the local gpu. so grad is difference between previously synced + # model and currrent local model. + for index, (param, global_param) in enumerate( + zip(self.params, self.global_params) + ): + self.grads[index] = global_param - param.data + + def _avg_grad_from_all_gpus(self): + for index, param in enumerate(self.params): + sync_para = param.data if self.block_momentum == 0 else self.grads[index] + sync_para /= float(dist.get_world_size()) + dist.all_reduce(sync_para, op=dist.ReduceOp.SUM) + + @torch.no_grad() + def _update_global_model(self): + for index, (param, global_param, smoothed_grad, grad) in enumerate( + zip( + self.params, + self.global_params, + self.smoothed_grads, + # all gpus would share the same value of smoothed_grad, since it is + # always computed on synchronized gradients. + self.grads, + ) + ): + # global_param is basically last syncrhornized parameter. though + # smoothed_grad is local, all processes will have same value of + # smoothed_grad and hence param is globally synchronized copy. + # smoothed_grad(t) = BM * smoothed_grad(t-1) + BM_lr * grad(t) + smoothed_grad = self.block_momentum * smoothed_grad + self.block_lr * grad + param.data.copy_(global_param - smoothed_grad) + + # A Nesterov momentum here is to do a partial weight update before + # calculating the gradient + if self.use_nbm: + param.data.copy_(param.data - self.block_momentum * smoothed_grad) + + # backup for the next synchronization. + self.smoothed_grads[index] = smoothed_grad + global_param.copy_(param.data) diff --git a/fairseq/fairseq/optim/composite.py b/fairseq/fairseq/optim/composite.py new file mode 100644 index 0000000..1ef0114 --- /dev/null +++ b/fairseq/fairseq/optim/composite.py @@ -0,0 +1,273 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Dict, Any, List, Optional + +import torch.optim +from fairseq.dataclass import FairseqDataclass +from fairseq.optim import FairseqOptimizer, register_optimizer, _build_optimizer +from fairseq.optim.lr_scheduler import FairseqLRScheduler, build_lr_scheduler +from omegaconf import II, open_dict +import copy + + +logger = logging.getLogger(__name__) + + +@dataclass +class OptimizerAndSchedulerConfig(FairseqDataclass): + optimizer: Any = None + lr_scheduler: Optional[Any] = None + lr: List = II("optimization.lr") + lr_float: Optional[ + float + ] = None # this makes it easier to sweep on learning rate with auto sweepers + + +@dataclass +class CompositeOptimizerConfig(FairseqDataclass): + groups: Dict[str, Any] = field( + default_factory=lambda: {}, + metadata={ + "help": "optimizer name -> optimizer OptimizerAndSchedulerConfig. " + "Configures a different optimizer and (optionally) lr scheduler for each parameter group" + }, + ) + dynamic_groups: bool = field( + default=False, + metadata={ + "help": "create groups dynamically based on parameters, if set to False, all parameters needs to have group_names" + }, + ) + + +@register_optimizer("composite", dataclass=CompositeOptimizerConfig) +class FairseqCompositeOptimizer(FairseqOptimizer): + + optimizers: Dict[str, FairseqOptimizer] = {} + lr_schedulers: Dict[str, FairseqLRScheduler] = {} + lr_scheduler: FairseqLRScheduler = None + _optimizer: torch.optim.Optimizer + + def __init__(self, cfg: CompositeOptimizerConfig, params): + super().__init__(cfg) + + assert ( + len(params) > 1 + ), "Composite optimizer only works when there are multiple parameter groups (try fp16_no_flatten_grads: true)" + + def dict_hash(dictionary: Dict[str, Any]) -> str: + import hashlib + import json + + dhash = hashlib.md5() + encoded = json.dumps(dictionary, sort_keys=True).encode() + dhash.update(encoded) + return dhash.hexdigest() + + groupped_params = defaultdict(list) + overrides = defaultdict(dict) + if not cfg.dynamic_groups: + for p in params: + group = getattr(p, "param_group", "default") + override_config = getattr(p, "optim_overrides", None) + if override_config is not None and bool(override_config): + overrides[group] = override_config + else: + assert ( + override_config == None or override_config == overrides[group] + ), f"For group {group}, different overrides found {override_config} v/s {overrides[group]}" + groupped_params[group].append(p) + + for p, params in groupped_params.items(): + override_config = getattr(params[0], "optim_overrides", None) + if override_config is not None: + for pp in params[1:]: + assert override_config == getattr( + pp, "optim_overrides", None + ), f" {str(override_config)} != {str(getattr(pp, 'optim_overrides', None))}" + else: + for p in params: + group = getattr(p, "param_group", "default") + override_config = getattr(p, "optim_overrides", None) + if override_config is not None: + override_config["group_name"] = group + group_name = dict_hash(override_config) + overrides[group_name] = override_config + else: + group_name = group + groupped_params[group_name].append(p) + + self.optimizers_config = {} + for group, group_params in groupped_params.items(): + p_group = group + if group in overrides and "group_name" in overrides[group]: + p_group = overrides[group]["group_name"] + if group in cfg.groups: + group_cfg = cfg.groups[group] + optimizer_config = copy.deepcopy(group_cfg.optimizer) + scheduler_config = copy.deepcopy(group_cfg.lr_scheduler) + explicit_group_present = True + else: + group_cfg = cfg.groups[p_group] + optimizer_config = copy.deepcopy(group_cfg.optimizer) + scheduler_config = copy.deepcopy(group_cfg.lr_scheduler) + explicit_group_present = False + + if getattr(group_cfg, "lr_float", None) is not None: + with open_dict(optimizer_config): + optimizer_config.lr = [group_cfg.lr_float] + + if group in overrides and "optimizer" in overrides[group]: + with open_dict(optimizer_config): + if "lr_scale" in overrides[group]["optimizer"]: + lr_scale = overrides[group]["optimizer"]["lr_scale"] + optimizer_config.lr = [ + lr * lr_scale for lr in optimizer_config.lr + ] + + if explicit_group_present: + logger.info( + f"For group:{group}, config as well as override present for lr" + ) + + if ( + "weight_decay_scale" in overrides[group]["optimizer"] + and "optimizer_config" in optimizer_config + ): + weight_decay_scale = overrides[group]["optimizer"][ + "weight_decay_scale" + ] + optimizer_config.weight_decay = ( + optimizer_config.weight_decay * weight_decay_scale + ) + if explicit_group_present: + logger.info( + f"For group:{group}, config as well as override present for weight_decay" + ) + + with open_dict(scheduler_config): + scheduler_config.lr = optimizer_config.lr + self.optimizers[group] = _build_optimizer(optimizer_config, group_params) + self.optimizers_config[group] = optimizer_config + if scheduler_config is not None: + self.lr_schedulers[group] = build_lr_scheduler( + scheduler_config, self.optimizers[group] + ) + logger.info("Optimizers for different groups are as below") + for group in self.optimizers_config.keys(): + logger.info(f"Group : {group}:{self.optimizers_config[group]}") + if len(self.lr_schedulers) > 0: + assert len(self.lr_schedulers) == len(self.optimizers), ( + f"Please provide an lr scheduler for each optimizer to use pass_through scheduler. " + f"Optimizers: {self.optimizers}; Lr scheds: {self.lr_schedulers}" + ) + self.lr_scheduler = CompositeLRScheduler(self.lr_schedulers) + + self._optimizer = CompositeOptimizer(self.optimizers) + + @property + def supports_groups(self): + return True + + @property + def param_groups(self): + for opt in self.optimizers.values(): + for group in opt.param_groups: + yield group + + def get_lr(self): + """Return the current learning rate.""" + k = ( + "default" + if "default" in self.optimizers + else next(iter(self.optimizers.keys())) + ) + return self.optimizers[k].param_groups[0]["lr"] + + def state_dict(self): + """Return the LR scheduler state dict.""" + return {k: s.state_dict() for k, s in self.optimizers.items()} + + def load_state_dict(self, state_dict, optimizer_overrides=None): + """Load an LR scheduler state dict.""" + for k, state in state_dict.items(): + if k not in self.optimizers: + # skip extra keys like "loss_scale" added by fp16 optimizer + continue + + overrides = ( + optimizer_overrides[k] + if isinstance(optimizer_overrides, dict) and k in optimizer_overrides + else None + ) + self.optimizers[k].load_state_dict(state, optimizer_overrides=overrides) + + +class CompositeOptimizer(torch.optim.Optimizer): + def __init__(self, optimizers: Dict[str, FairseqOptimizer]): + self.optimizers = optimizers + + @property + def supports_memory_efficient_fp16(self): + return all(o.supports_memory_efficient_fp16 for o in self.optimizers.values()) + + @property + def supports_flat_params(self): + return all(o.supports_flat_params for o in self.optimizers.values()) + + def step(self, closure=None, groups=None): + """Performs a single optimization step. + + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for k, opt in self.optimizers.items(): + if groups is None or k in groups: + opt.step() + + return loss + + def zero_grad(self): + for opt in self.optimizers.values(): + opt.zero_grad() + + +class CompositeLRScheduler(FairseqLRScheduler): + def __init__(self, lr_schedulers): + super().__init__(None, None) + + self.lr_schedulers = lr_schedulers + + def state_dict(self): + """Return the LR scheduler state dict.""" + return {k: s.state_dict() for k, s in self.lr_schedulers.items()} + + def load_state_dict(self, state_dict): + """Load an LR scheduler state dict.""" + for k, state in state_dict.items(): + self.lr_schedulers[k].load_state_dict(state) + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + for s in self.lr_schedulers.values(): + s.step_begin_epoch(epoch) + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + for s in self.lr_schedulers.values(): + s.step(epoch) + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + return {k: s.step_update(num_updates) for k, s in self.lr_schedulers.items()} diff --git a/fairseq/fairseq/optim/cpu_adam.py b/fairseq/fairseq/optim/cpu_adam.py new file mode 100644 index 0000000..b218934 --- /dev/null +++ b/fairseq/fairseq/optim/cpu_adam.py @@ -0,0 +1,210 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import List + +import torch +from fairseq.dataclass import FairseqDataclass +from fairseq.optim import FairseqOptimizer, register_optimizer +from omegaconf import II, DictConfig + + +try: + import deepspeed + + has_deepspeed = True +except ImportError as e: + has_deepspeed = False + + +def _get_cpu_adam(): + try: + from deepspeed.ops.op_builder import CPUAdamBuilder + + return CPUAdamBuilder().load() + except ImportError: + # fbcode + from deepspeed.ops.adam import DeepSpeedCPUAdam as ds_opt_adam + + return ds_opt_adam + + +@dataclass +class FairseqCPUAdamConfig(FairseqDataclass): + adam_betas: str = field( + default="(0.9, 0.999)", metadata={"help": "betas for Adam optimizer"} + ) + adam_eps: float = field( + default=1e-8, metadata={"help": "epsilon for Adam optimizer"} + ) + weight_decay: float = field(default=0.0, metadata={"help": "weight decay"}) + fp16_adam_stats: bool = field( + default=False, metadata={"help": "use FP16 stats (with automatic scaling)"} + ) + # TODO common vars below in parent + lr: List[float] = II("optimization.lr") + + +@register_optimizer("cpu_adam", dataclass=FairseqCPUAdamConfig) +class FairseqCPUAdam(FairseqOptimizer): + """Adam optimizer for fairseq, optimized for CPU tensors. + + Important note: this optimizer corresponds to the "AdamW" variant of + Adam in its weight decay behavior. As such, it is most closely + analogous to torch.optim.AdamW from PyTorch. + """ + + def __init__(self, cfg: DictConfig, params): + super().__init__(cfg) + self._optimizer = CPUAdam(params, **self.optimizer_config) + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.cfg.lr[0] + if isinstance(self.cfg.lr, Collection) + else self.cfg.lr, + "betas": eval(self.cfg.adam_betas), + "eps": self.cfg.adam_eps, + "weight_decay": self.cfg.weight_decay, + "use_fp16_stats": self.cfg.fp16_adam_stats, + } + + +class CPUAdam(torch.optim.Optimizer): + + optimizer_id = 0 + + def __init__( + self, + params, + lr=1e-3, + bias_correction=True, + betas=(0.9, 0.999), + eps=1e-8, + weight_decay=0, + use_fp16_stats=False, + ): + defaults = { + "lr": lr, + "bias_correction": bias_correction, + "betas": betas, + "eps": eps, + "weight_decay": weight_decay, + } + super().__init__(params, defaults) + + self.use_fp16_stats = use_fp16_stats + self.FLOAT16_MAX = 65504.0 + + if not has_deepspeed: + raise ImportError("Please install DeepSpeed: pip install deepspeed") + + self.opt_id = CPUAdam.optimizer_id + CPUAdam.optimizer_id = CPUAdam.optimizer_id + 1 + + self.ds_opt_adam = _get_cpu_adam() + adamw_mode = True + self.ds_opt_adam.create_adam( + self.opt_id, lr, betas[0], betas[1], eps, weight_decay, adamw_mode + ) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + @torch.no_grad() + def step(self, closure=None): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + torch.cuda.synchronize() + + for group_id, group in enumerate(self.param_groups): + for param_id, p in enumerate(group["params"]): + if p.grad is None: + continue + + state = self.state[p] + if len(state) == 0: + state["step"] = 0 + dtype = torch.float16 if self.use_fp16_stats else p.data.dtype + # gradient momentums + state["exp_avg"] = torch.zeros_like( + p.data, dtype=dtype, device="cpu" + ) + # gradient variances + state["exp_avg_sq"] = torch.zeros_like( + p.data, dtype=dtype, device="cpu" + ) + if self.use_fp16_stats: + assert torch.is_floating_point(p.data) + state["exp_avg_scale"] = 1.0 + state["exp_avg_sq_scale"] = 1.0 + + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + + p_data_bak = p.data # backup of the original data pointer + + p.data = p.data.to(dtype=torch.float32, device="cpu") + p.grad.data = p.grad.data.to(dtype=torch.float32, device="cpu") + + if self.use_fp16_stats: + exp_avg = exp_avg.float() * state["exp_avg_scale"] + exp_avg_sq = exp_avg_sq.float() * state["exp_avg_sq_scale"] + + state["step"] += 1 + beta1, beta2 = group["betas"] + + self.ds_opt_adam.adam_update( + self.opt_id, + state["step"], + group["lr"], + beta1, + beta2, + group["eps"], + group["weight_decay"], + group["bias_correction"], + p.data, + p.grad.data, + exp_avg, + exp_avg_sq, + ) + + if p_data_bak.data_ptr() != p.data.data_ptr(): + p_data_bak.copy_(p.data) + p.data = p_data_bak + + if self.use_fp16_stats: + + def inf_norm(t): + return torch.norm(t, float("inf")) + + # from github.com/openai/jukebox/blob/master/jukebox/utils/fp16.py + state["exp_avg_scale"], state["exp_avg_sq_scale"] = ( + 1e-8 + inf_norm(exp_avg) / self.FLOAT16_MAX, + 1e-8 + inf_norm(exp_avg_sq) / self.FLOAT16_MAX, + ) + state["exp_avg"], state["exp_avg_sq"] = ( + (exp_avg / state["exp_avg_scale"]).half(), + (exp_avg_sq / state["exp_avg_sq_scale"]).half(), + ) + + return loss diff --git a/fairseq/fairseq/optim/dynamic_loss_scaler.py b/fairseq/fairseq/optim/dynamic_loss_scaler.py new file mode 100644 index 0000000..60c47b8 --- /dev/null +++ b/fairseq/fairseq/optim/dynamic_loss_scaler.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +class DynamicLossScaler(object): + def __init__( + self, + init_scale=2.0**15, + scale_factor=2.0, + scale_window=2000, + tolerance=0.0, + threshold=None, + min_loss_scale=1e-4, + ): + self.loss_scale = init_scale + self.scale_factor = scale_factor + self.scale_window = scale_window + self.tolerance = tolerance + self.threshold = threshold + self._iter = 0 + self._last_overflow_iter = -1 + self._last_rescale_iter = -1 + self._overflows_since_rescale = 0 + self.min_loss_scale = min_loss_scale + + def scale(self, outputs): + return self.loss_scale * outputs + + def update(self): + if (self._iter - self._last_overflow_iter) % self.scale_window == 0: + self.loss_scale *= self.scale_factor + self._last_rescale_iter = self._iter + self._iter += 1 + + def _decrease_loss_scale(self): + self.loss_scale /= self.scale_factor + if self.threshold is not None: + self.loss_scale = max(self.loss_scale, self.threshold) + + def check_overflow(self, grad_norm): + # detect inf and nan + if grad_norm == float("inf") or grad_norm != grad_norm: + # overflow has occured + prev_scale = self.loss_scale + iter_since_rescale = self._iter - self._last_rescale_iter + + self._last_overflow_iter = self._iter + self._overflows_since_rescale += 1 + pct_overflow = self._overflows_since_rescale / float(iter_since_rescale) + if pct_overflow >= self.tolerance: + self._decrease_loss_scale() + self._last_rescale_iter = self._iter + self._overflows_since_rescale = 0 + + if self.loss_scale <= self.min_loss_scale: + # Use FloatingPointError as an uncommon error that parent + # functions can safely catch to stop training. + self.loss_scale = prev_scale + raise FloatingPointError( + ( + "Minimum loss scale reached ({}). Your loss is probably exploding. " + "Try lowering the learning rate, using gradient clipping or " + "increasing the batch size." + ).format(self.min_loss_scale) + ) + + self._iter += 1 + raise OverflowError("setting loss scale to: " + str(self.loss_scale)) diff --git a/fairseq/fairseq/optim/fairseq_optimizer.py b/fairseq/fairseq/optim/fairseq_optimizer.py new file mode 100644 index 0000000..73c7c69 --- /dev/null +++ b/fairseq/fairseq/optim/fairseq_optimizer.py @@ -0,0 +1,187 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq import utils +from fairseq.dataclass.utils import gen_parser_from_dataclass +from collections import defaultdict + + +class FairseqOptimizer(object): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + + @classmethod + def add_args(cls, parser): + """Add optimizer-specific arguments to the parser.""" + dc = getattr(cls, "__dataclass", None) + if dc is not None: + gen_parser_from_dataclass(parser, dc()) + + @property + def optimizer(self): + """Return a torch.optim.optimizer.Optimizer instance.""" + if not hasattr(self, "_optimizer"): + raise NotImplementedError + if not isinstance(self._optimizer, torch.optim.Optimizer): + raise ValueError("_optimizer must be an instance of torch.optim.Optimizer") + return self._optimizer + + @optimizer.setter + def optimizer(self, optimizer): + """Reset optimizer instance.""" + if not hasattr(self, "_optimizer"): + raise NotImplementedError + if not isinstance(self._optimizer, torch.optim.Optimizer): + raise ValueError("_optimizer must be an instance of torch.optim.Optimizer") + self._optimizer = optimizer + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + raise NotImplementedError + + @property + def params(self): + """Return an iterable of the parameters held by the optimizer.""" + for param_group in self.param_groups: + for p in param_group["params"]: + yield p + + @property + def param_groups(self): + return self.optimizer.param_groups + + def __getstate__(self): + return self._optimizer.__getstate__() + + def get_lr(self): + """Return the current learning rate.""" + return self.param_groups[0]["lr"] + + def set_lr(self, lr): + """Set the learning rate.""" + for param_group in self.param_groups: + param_group["lr"] = lr + + def state_dict(self): + """Return the optimizer's state dict.""" + return self.optimizer.state_dict() + + def load_state_dict(self, state_dict, optimizer_overrides=None): + """Load an optimizer state dict. + + In general we should prefer the configuration of the existing optimizer + instance (e.g., learning rate) over that found in the state_dict. This + allows us to resume training from a checkpoint using a new set of + optimizer args. + """ + self.optimizer.load_state_dict(state_dict) + + if optimizer_overrides is not None and len(optimizer_overrides) > 0: + # override learning rate, momentum, etc. with latest values + for group in self.param_groups: + group.update(optimizer_overrides) + + def backward(self, loss): + """Computes the sum of gradients of the given tensor w.r.t. graph leaves.""" + loss.backward() + + def all_reduce_grads(self, module): + """Manually all-reduce gradients (if required).""" + if hasattr(module, "all_reduce_grads"): + module.all_reduce_grads() + + def multiply_grads(self, c): + """Multiplies grads by a constant *c*.""" + per_device_and_dtype_grads = defaultdict(lambda: defaultdict(list)) + for p in self.params: + if p.grad is not None: + if p.grad.is_sparse: + p.grad.data.mul_(c.to(p.grad.device) if torch.is_tensor(c) else c) + else: + per_device_and_dtype_grads[p.grad.device][p.grad.dtype].append( + p.grad.data + ) + for device, per_dtype_grads in per_device_and_dtype_grads.items(): + for grads in per_dtype_grads.values(): + torch._foreach_mul_(grads, c.to(device) if torch.is_tensor(c) else c) + + def clip_grad_norm(self, max_norm, aggregate_norm_fn=None): + """Clips gradient norm.""" + return utils.clip_grad_norm_(self.params, max_norm, aggregate_norm_fn) + + def step(self, closure=None, scale=1.0, groups=None): + """Performs a single optimization step.""" + if self.supports_step_with_scale: + if self.supports_groups: + self.optimizer.step(closure, scale=scale, groups=groups) + else: + self.optimizer.step(closure, scale=scale) + else: + if scale != 1.0: + self.multiply_grads(1.0 / scale) + if self.supports_groups: + self.optimizer.step(closure, groups=groups) + else: + self.optimizer.step(closure) + + def zero_grad(self): + """Clears the gradients of all optimized parameters.""" + for p in self.params: + p.grad = None + self.optimizer.zero_grad() + + @property + def supports_memory_efficient_fp16(self): + if hasattr(self.optimizer, "supports_memory_efficient_fp16"): + return self.optimizer.supports_memory_efficient_fp16 + return False + + @property + def supports_step_with_scale(self): + if hasattr(self.optimizer, "supports_step_with_scale"): + return self.optimizer.supports_step_with_scale + return False + + @property + def supports_groups(self): + if hasattr(self.optimizer, "supports_groups"): + return self.optimizer.supports_groups + return False + + @property + def supports_flat_params(self): + """ + Whether the optimizer supports collapsing of the model + parameters/gradients into a single contiguous Tensor. + """ + if hasattr(self.optimizer, "supports_flat_params"): + return self.optimizer.supports_flat_params + return False + + def average_params(self): + pass + + def broadcast_global_state_dict(self, state_dict): + """ + Broadcasts a global state dict to all ranks. + Useful for optimizers that shard state between ranks. + """ + if hasattr(self.optimizer, "broadcast_global_state_dict"): + return self.optimizer.broadcast_global_state_dict(state_dict) + else: + return state_dict + + +class LegacyFairseqOptimizer(FairseqOptimizer): + def __init__(self, args): + self.args = args diff --git a/fairseq/fairseq/optim/fp16_optimizer.py b/fairseq/fairseq/optim/fp16_optimizer.py new file mode 100644 index 0000000..6a4da34 --- /dev/null +++ b/fairseq/fairseq/optim/fp16_optimizer.py @@ -0,0 +1,558 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import defaultdict +from itertools import chain + +import torch +from omegaconf import DictConfig + +from fairseq import optim + +from .dynamic_loss_scaler import DynamicLossScaler + + +class _FP16OptimizerMixin(object): + def __init__(self, *args, **kwargs): + # forward __init__ call to the next class in mro(method resolution order) + super().__init__(*args, **kwargs) + self._multiply_factor = 1.0 + + @property + def has_flat_params(self): + return torch.is_tensor(self.fp32_params) or ( + isinstance(self.fp32_params, dict) + and all(torch.is_tensor(t) for t in self.fp32_params.values()) + ) + + @classmethod + def build_fp32_params(cls, args, params, flatten=True): + # create FP32 copy of parameters and grads + if flatten: + is_pipeline_parallel = getattr( + args, "pipeline_model_parallel", False + ) and getattr(args, "distributed_no_spawn", False) + total_param_size = sum(p.data.numel() for p in params) + devices = [torch.cuda.current_device()] + if is_pipeline_parallel: + devices = list(set(args.pipeline_devices)) + fp32_params = {} + for device in devices: + if is_pipeline_parallel: + device_param_size = sum( + p.data.numel() for p in params if p.device.index == device + ) + device_params = [p for p in params if p.device.index == device] + else: + device_param_size = total_param_size + device_params = params + fp32_params[device] = ( + device_params[0].new(0).float().new(device_param_size) + ) + offset = 0 + for p in device_params: + numel = p.data.numel() + fp32_params[device][offset : offset + numel].copy_(p.data.view(-1)) + offset += numel + fp32_params[device] = torch.nn.Parameter(fp32_params[device]) + fp32_params[device].grad = fp32_params[device].data.new( + device_param_size + ) + return fp32_params + else: + fp32_params = [] + for p in params: + p32 = torch.nn.Parameter(p.data.float()) + if hasattr(p, "expert"): + p32.expert = True + elif hasattr(p, "base_expert"): + p32.base_expert = True + p32.grad = torch.zeros_like(p32.data) + if hasattr(p, "param_group"): + p32.param_group = p.param_group + if hasattr(p, "optim_overrides"): + p32.optim_overrides = p.optim_overrides + fp32_params.append(p32) + return fp32_params + + def state_dict(self): + """Return the optimizer's state dict.""" + state_dict = self.fp32_optimizer.state_dict() + if self.scaler is not None: + state_dict["loss_scale"] = self.scaler.loss_scale + return state_dict + + def load_state_dict(self, state_dict, optimizer_overrides=None): + """Load an optimizer state dict. + + In general we should prefer the configuration of the existing optimizer + instance (e.g., learning rate) over that found in the state_dict. This + allows us to resume training from a checkpoint using a new set of + optimizer args. + """ + if "loss_scale" in state_dict and self.scaler is not None: + self.scaler.loss_scale = state_dict["loss_scale"] + self.fp32_optimizer.load_state_dict(state_dict, optimizer_overrides) + + def backward(self, loss): + """Computes the sum of gradients of the given tensor w.r.t. graph leaves. + + Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this + function additionally dynamically scales the loss to avoid gradient + underflow. + """ + if self.scaler is not None: + loss = self.scaler.scale(loss) + loss.backward() + self._needs_sync = True + + def _sync_fp16_grads_to_fp32(self): + if self._needs_sync: + # copy FP16 grads to FP32 + if self.has_flat_params: + devices = list(self.fp32_params.keys()) + device_params_dict = defaultdict(list) + for p in self.fp16_params: + if p.requires_grad: + device_params_dict[p.device.index].append(p) + for device in devices: + device_params = device_params_dict[device] + offset = 0 + for p in device_params: + grad_data = ( + p.grad.data + if p.grad is not None + else p.data.new_zeros(p.data.shape) + ) + numel = grad_data.numel() + self.fp32_params[device].grad.data[ + offset : offset + numel + ].copy_(grad_data.view(-1)) + offset += numel + else: + for p, p32 in zip(self.fp16_params, self.fp32_params): + if not p.requires_grad: + continue + if p.grad is not None: + if p32.grad is None: + p32.grad = p.grad.data.float() + else: + p32.grad.data.copy_(p.grad.data) + else: + p32.grad = torch.zeros_like(p.data, dtype=torch.float) + + self._needs_sync = False + + def _sync_fp32_params_to_fp16(self): + # copy FP32 params back into FP16 model + if self.has_flat_params: + devices = list(self.fp32_params.keys()) + device_params_dict = defaultdict(list) + for p in self.fp16_params: + device_params_dict[p.device.index].append(p) + for device in devices: + device_params = device_params_dict[device] + offset = 0 + for p in device_params: + numel = p.data.numel() + p.data.copy_( + self.fp32_params[device] + .data[offset : offset + numel] + .view_as(p.data) + ) + offset += numel + else: + for p, p32 in zip(self.fp16_params, self.fp32_params): + if not p.requires_grad: + continue + p.data.copy_(p32.data) + + def _unscale_grads(self): + self._sync_fp16_grads_to_fp32() + if ( + # Skip the multiplication if it's a no-op (i.e., if _multiply_factor + # is 1.0). At the same time, we want to avoid the device-to-host + # transfer by comparing it to 1.0. Since _multiply_factor starts as + # a Python float, we roughly assume that if it's a tensor then it's + # probably not =1.0 anymore and we do the multiplication. Otherwise + # we can safely check the value without a D2H transfer. + torch.is_tensor(self._multiply_factor) + or self._multiply_factor != 1.0 + ): + self.fp32_optimizer.multiply_grads(self._multiply_factor) + self._multiply_factor = 1.0 + + def multiply_grads(self, c): + """Multiplies grads by a constant ``c``.""" + self._multiply_factor *= c + + def clip_grad_norm(self, max_norm, aggregate_norm_fn=None): + """Clips gradient norm and updates dynamic loss scaler.""" + self._sync_fp16_grads_to_fp32() + + grad_norm = self._multiply_factor * self.fp32_optimizer.clip_grad_norm( + 0, aggregate_norm_fn + ) + + if torch.is_tensor(self._multiply_factor): + self._multiply_factor = self._multiply_factor.to(grad_norm.device) + + if self.scaler is not None: + if grad_norm > max_norm > 0.0: + self._multiply_factor *= max_norm / grad_norm + + self.scaler.check_overflow(grad_norm) + elif max_norm > 0.0: + clip_coef = (max_norm / (grad_norm + 1e-6)).clamp_(max=1) + self._multiply_factor *= clip_coef + + return grad_norm + + def step(self, closure=None, groups=None): + """Performs a single optimization step.""" + self._sync_fp16_grads_to_fp32() + + if getattr(self, "supports_step_with_scale", False): + self.fp32_optimizer.step( + closure, scale=(1.0 / self._multiply_factor), groups=groups + ) + else: + self._unscale_grads() + self.fp32_optimizer.step(closure, groups=groups) + + if self.scaler is not None: + self.scaler.update() + + self._sync_fp32_params_to_fp16() + + def zero_grad(self): + """Clears the gradients of all optimized parameters.""" + for p in self.fp16_params: + p.grad = None + if self.has_flat_params: + if torch.is_tensor(self.fp32_params): + self.fp32_params.grad.zero_() + elif isinstance(self.fp32_params, dict): + for fp32_params in self.fp32_params.values(): + fp32_params.grad.zero_() + else: + raise RuntimeError("self.fp32_params must be a tensor or dict") + else: + for p32 in self.fp32_params: + if p32.grad is not None: + p32.grad.zero_() + self._needs_sync = False + + if self.scaler is not None: + self._multiply_factor = 1.0 / float(self.scaler.loss_scale) + + +class FP16Optimizer(_FP16OptimizerMixin, optim.FairseqOptimizer): + """ + Wrap an *optimizer* to support FP16 (mixed precision) training. + """ + + def __init__(self, cfg: DictConfig, params, fp32_optimizer, fp32_params, **kwargs): + super().__init__(cfg.optimizer) + self.fp16_params = params + self.fp32_optimizer = fp32_optimizer + self.fp32_params = fp32_params + + if getattr(cfg.common, "fp16_scale_window", None) is None: + if len(cfg.optimization.update_freq) > 1: + raise ValueError( + "--fp16-scale-window must be given explicitly when using a " + "custom --update-freq schedule" + ) + data_parallel_size = int( + cfg.distributed_training.distributed_world_size + / cfg.common.model_parallel_size + ) + scale_window = int( + 2**14 / data_parallel_size / cfg.optimization.update_freq[0] + ) + else: + scale_window = cfg.common.fp16_scale_window + + if not getattr(cfg.common, "bf16", False): + self.scaler = DynamicLossScaler( + init_scale=cfg.common.fp16_init_scale, + scale_window=scale_window, + tolerance=cfg.common.fp16_scale_tolerance, + threshold=cfg.common.threshold_loss_scale, + min_loss_scale=cfg.common.min_loss_scale, + ) + else: + # disable loss scaling for bfloat16 + self.scaler = None + + @classmethod + def build_optimizer(cls, cfg: DictConfig, params, **kwargs): + """ + Args: + cfg (omegaconf.DictConfig): fairseq args + params (iterable): iterable of parameters to optimize + """ + flatten = not getattr(cfg.common, "fp16_no_flatten_grads", False) + if getattr(cfg.common, "bf16", False): + flatten = False # mixed precision is faster on TPUs without flat grads + fp32_params = cls.build_fp32_params(cfg.optimizer, params, flatten=flatten) + if flatten: + fp32_optimizer = optim.build_optimizer(cfg.optimizer, [fp32_params]) + else: + fp32_optimizer = optim.build_optimizer(cfg.optimizer, fp32_params) + if flatten and not fp32_optimizer.supports_flat_params: + raise RuntimeError( + f"chosen optimizer {fp32_optimizer.__class__.__name__} does not support flat params, please set --fp16-no-flatten-grads" + ) + return cls(cfg, params, fp32_optimizer, fp32_params, **kwargs) + + @property + def optimizer(self): + return self.fp32_optimizer.optimizer + + @optimizer.setter + def optimizer(self, optimizer): + self.fp32_optimizer.optimizer = optimizer + + @property + def lr_scheduler(self): + return getattr(self.fp32_optimizer, "lr_scheduler", None) + + @property + def optimizer_config(self): + return self.fp32_optimizer.optimizer_config + + def get_lr(self): + return self.fp32_optimizer.get_lr() + + def set_lr(self, lr): + self.fp32_optimizer.set_lr(lr) + + def all_reduce_grads(self, module): + self.fp32_optimizer.all_reduce_grads(module) + + @property + def supports_flat_params(self): + return self.fp32_optimizer.supports_flat_params + + +class _MemoryEfficientFP16OptimizerMixin(object): + def __init__(self, *args, **kwargs): + # forward __init__ call to the next class in MRO (method resolution order) + super().__init__(*args, **kwargs) + self._multiply_factor = 1.0 + + @property + def has_flat_params(self): + return False + + def state_dict(self): + """Return the optimizer's state dict.""" + state_dict = self.wrapped_optimizer.state_dict() + if self.scaler is not None: + state_dict["loss_scale"] = self.scaler.loss_scale + return state_dict + + def load_state_dict(self, state_dict, optimizer_overrides=None): + """Load an optimizer state dict. + + In general we should prefer the configuration of the existing optimizer + instance (e.g., learning rate) over that found in the state_dict. This + allows us to resume training from a checkpoint using a new set of + optimizer args. + """ + if "loss_scale" in state_dict and self.scaler is not None: + self.scaler.loss_scale = state_dict["loss_scale"] + + self.wrapped_optimizer.load_state_dict(state_dict, optimizer_overrides) + + # Hack: PyTorch automatically casts the optimizer state to match the + # type of the current parameters. But with --memory-efficient-fp16 the + # params are FP16 while the optimizer state is FP32 and we don't want + # to cast. A workaround is to manually copy back the original state + # after the optimizer has been loaded. + if not getattr(self.optimizer, "disable_mem_eff_fp16_loading_hack", False): + groups = self.optimizer.param_groups + saved_groups = state_dict["param_groups"] + id_map = { + old_id: p + for old_id, p in zip( + chain(*(g["params"] for g in saved_groups)), + chain(*(g["params"] for g in groups)), + ) + } + for k, v in state_dict["state"].items(): + if k in id_map: + param = id_map[k] + self.optimizer.state[param] = v + + def backward(self, loss): + """Computes the sum of gradients of the given tensor w.r.t. graph leaves. + + Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this + function additionally dynamically scales the loss to avoid gradient + underflow. + """ + if self.scaler is not None: + loss = self.scaler.scale(loss) + loss.backward() + + def _unscale_grads(self): + if ( + # Skip the multiplication if it's a no-op (i.e., if _multiply_factor + # is 1.0). At the same time, we want to avoid the device-to-host + # transfer by comparing it to 1.0. Since _multiply_factor starts as + # a Python float, we roughly assume that if it's a tensor then it's + # probably not =1.0 anymore and we do the multiplication. Otherwise + # we can safely check the value without a D2H transfer. + torch.is_tensor(self._multiply_factor) + or self._multiply_factor != 1.0 + ): + self.wrapped_optimizer.multiply_grads(self._multiply_factor) + self._multiply_factor = 1.0 + + def multiply_grads(self, c): + """Multiplies grads by a constant *c*.""" + self._multiply_factor *= c + + def clip_grad_norm(self, max_norm, aggregate_norm_fn=None): + """Clips gradient norm and updates dynamic loss scaler.""" + max_norm = float(max_norm) + grad_norm = self._multiply_factor * self.wrapped_optimizer.clip_grad_norm( + 0, aggregate_norm_fn + ) + + if self.scaler is not None: + grad_norm_cpu = float(grad_norm) + if grad_norm_cpu > max_norm > 0.0: + self._multiply_factor *= max_norm / grad_norm_cpu + + # detect overflow and adjust loss scale + self.scaler.check_overflow(grad_norm_cpu) + elif max_norm > 0.0: + clip_coef = (max_norm / (grad_norm + 1e-6)).clamp_(max=1) + self._multiply_factor *= clip_coef + + return grad_norm + + def step(self, closure=None, groups=None): + """Performs a single optimization step.""" + if getattr(self, "supports_step_with_scale", False): + # NOTE(msb) optimizer divides by scale factor + self.wrapped_optimizer.step( + closure, scale=(1.0 / self._multiply_factor), groups=groups + ) + else: + self._unscale_grads() + self.wrapped_optimizer.step(closure, groups=groups) + + if self.scaler is not None: + self.scaler.update() + + def zero_grad(self): + """Clears the gradients of all optimized parameters.""" + self.wrapped_optimizer.zero_grad() + if self.scaler is not None: + self._multiply_factor = 1.0 / float(self.scaler.loss_scale) + else: + self._multiply_factor = 1.0 + + @property + def supports_flat_params(self): + return self.wrapped_optimizer.supports_flat_params + + +class MemoryEfficientFP16Optimizer( + _MemoryEfficientFP16OptimizerMixin, optim.FairseqOptimizer +): + """ + Wrap an *optimizer* to support FP16 (mixed precision) training. + + Compared to :class:`fairseq.optim.FP16Optimizer`, this version does not + maintain an FP32 copy of the model. We instead expect the optimizer to + convert the gradients to FP32 internally and sync the results back to the + FP16 model params. This significantly reduces memory usage but slightly + increases the time spent in the optimizer. + + Since this wrapper depends on specific functionality in the wrapped + optimizer (i.e., on-the-fly conversion of grads to FP32), only certain + optimizers can be wrapped. This is determined by the + *supports_memory_efficient_fp16* property. + """ + + def __init__( + self, cfg: DictConfig, params, optimizer, allow_unsupported=False, **kwargs + ): + if not allow_unsupported and not optimizer.supports_memory_efficient_fp16: + raise ValueError( + "Unsupported optimizer: {}".format(optimizer.__class__.__name__) + ) + + super().__init__(getattr(cfg, "optimizer", None)) + self.wrapped_optimizer = optimizer + + if getattr(cfg.common, "fp16_scale_window", None) is None: + if len(cfg.optimization.update_freq) > 1: + raise ValueError( + "--fp16-scale-window must be given explicitly when using a " + "custom --update-freq schedule" + ) + data_parallel_size = int( + cfg.distributed_training.distributed_world_size + / cfg.common.model_parallel_size + ) + scale_window = int( + 2**14 / data_parallel_size / cfg.optimization.update_freq[0] + ) + else: + scale_window = cfg.common.fp16_scale_window + + if not getattr(cfg.common, "bf16", False): + self.scaler = DynamicLossScaler( + init_scale=cfg.common.fp16_init_scale, + scale_window=scale_window, + tolerance=cfg.common.fp16_scale_tolerance, + threshold=cfg.common.threshold_loss_scale, + min_loss_scale=cfg.common.min_loss_scale, + ) + else: + # disable loss scaling for bfloat16 + self.scaler = None + + @classmethod + def build_optimizer(cls, cfg: DictConfig, params, **kwargs): + """ + Args: + args (argparse.Namespace): fairseq args + params (iterable): iterable of parameters to optimize + """ + fp16_optimizer = optim.build_optimizer(cfg.optimizer, params) + return cls(cfg, params, fp16_optimizer, **kwargs) + + @property + def optimizer(self): + return self.wrapped_optimizer.optimizer + + @optimizer.setter + def optimizer(self, optimizer): + self.wrapped_optimizer.optimizer = optimizer + + @property + def optimizer_config(self): + return self.wrapped_optimizer.optimizer_config + + @property + def lr_scheduler(self): + return getattr(self.wrapped_optimizer, "lr_scheduler", None) + + def get_lr(self): + return self.wrapped_optimizer.get_lr() + + def set_lr(self, lr): + self.wrapped_optimizer.set_lr(lr) + + def all_reduce_grads(self, module): + self.wrapped_optimizer.all_reduce_grads(module) diff --git a/fairseq/fairseq/optim/fused_adam.py b/fairseq/fairseq/optim/fused_adam.py new file mode 100644 index 0000000..39a2a83 --- /dev/null +++ b/fairseq/fairseq/optim/fused_adam.py @@ -0,0 +1,389 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import types + +import torch + + +def get_fused_adam_class(): + """ + Look for the FusedAdam optimizer from apex. We first try to load the + "contrib" interface, which is a bit faster than the main interface, + but is technically deprecated. + """ + try: + # The "deprecated" interface in recent versions of apex is a bit + # faster than the main interface, since we don't use the apex + # optimizer. This can be installed by passing the + # `--deprecated_fused_adam` option when building apex. + global fused_adam_cuda + import importlib + + fused_adam_cuda = importlib.import_module("fused_adam_cuda") + return FusedAdamV1 + except ImportError: + try: + # fallback to the newer interface + from apex.multi_tensor_apply import multi_tensor_applier + from apex.optimizers import FusedAdam as _FusedAdam # noqa + + if multi_tensor_applier.available: + return FusedAdamV2 + except ImportError: + pass + return None + + +class FusedAdamV1(torch.optim.Optimizer): + """ + Implements Adam algorithm. Currently GPU-only. Requires Apex to be installed via + ``python setup.py install --cuda_ext --cpp_ext``. + + It has been proposed in `Adam: A Method for Stochastic Optimization`_. + + Compared to the original version in Apex, the fairseq version casts grads + and params to FP32 internally to support ``--memory-efficient-fp16``. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups. + lr (float, optional): learning rate. (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square. (default: (0.9, 0.999)) + eps (float, optional): term added to the denominator to improve + numerical stability. (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + amsgrad (boolean, optional): whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) NOT SUPPORTED in FusedAdam! + eps_inside_sqrt (boolean, optional): in the 'update parameters' step, + adds eps to the bias-corrected second moment estimate before + evaluating square root instead of adding it to the square root of + second moment estimate as in the original paper. (default: False) + .. _Adam: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__( + self, + params, + lr=1e-3, + bias_correction=True, + betas=(0.9, 0.999), + eps=1e-8, + eps_inside_sqrt=False, + weight_decay=0.0, + max_grad_norm=0.0, + amsgrad=False, + use_fp16_stats=False, + ): + global fused_adam_cuda + import importlib + + fused_adam_cuda = importlib.import_module("fused_adam_cuda") + + if amsgrad: + raise RuntimeError("FusedAdam does not support the AMSGrad variant.") + defaults = { + "lr": lr, + "bias_correction": bias_correction, + "betas": betas, + "eps": eps, + "weight_decay": weight_decay, + "max_grad_norm": max_grad_norm, + } + super().__init__(params, defaults) + self.eps_mode = 0 if eps_inside_sqrt else 1 + + self.use_fp16_stats = use_fp16_stats + self.FLOAT16_MAX = 65504.0 + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + @property + def supports_step_with_scale(self): + return True + + def step(self, closure=None, grads=None, scale=1.0, grad_norms=None): + """Performs a single optimization step. + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + grads (list of tensors, optional): weight gradient to use for the + optimizer update. If gradients have type torch.half, parameters + are expected to be in type torch.float. (default: None) + output params (list of tensors, optional): A reduced precision copy + of the updated weights written out in addition to the regular + updated weights. Have to be of same type as gradients. (default: None) + scale (float, optional): factor to divide gradient tensor values + by before applying to weights. (default: 1) + """ + loss = None + if closure is not None: + loss = closure() + + if grads is None: + grads_group = [None] * len(self.param_groups) + # backward compatibility + # assuming a list/generator of parameter means single group + elif isinstance(grads, types.GeneratorType): + grads_group = [grads] + elif type(grads[0]) != list: + grads_group = [grads] + else: + grads_group = grads + + if grad_norms is None: + grad_norms = [None] * len(self.param_groups) + + for group, grads_this_group, grad_norm in zip( + self.param_groups, grads_group, grad_norms + ): + if grads_this_group is None: + grads_this_group = [None] * len(group["params"]) + + # compute combined scale factor for this group + combined_scale = scale + if group.get("max_grad_norm", 0) > 0: + # norm is in fact norm*scale + clip = ((grad_norm / scale) + 1e-6) / group["max_grad_norm"] + if clip > 1: + combined_scale = clip * scale + + bias_correction = 1 if group.get("bias_correction", 1) else 0 + + for p, grad in zip(group["params"], grads_this_group): + # note: p.grad should not ever be set for correct + # operation of mixed precision optimizer that sometimes + # sends None gradients + if p.grad is None and grad is None: + continue + if grad is None: + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError( + "FusedAdam does not support sparse gradients, " + "please consider SparseAdam instead" + ) + + if p.device.type == "cpu": + p_data_fp32 = p.data.cuda(non_blocking=True).float() + out_p = torch.tensor([], dtype=torch.float) + else: + p_data_fp32 = p.data.float() + out_p = p.data + + state = self.state[p] + + # State initialization + dtype = torch.float16 if self.use_fp16_stats else p_data_fp32.dtype + if len(state) == 0: + state["step"] = 0 + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(p_data_fp32, dtype=dtype) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like(p_data_fp32, dtype=dtype) + if self.use_fp16_stats: + state["exp_avg_scale"] = 1.0 + state["exp_avg_sq_scale"] = 1.0 + else: + device = p_data_fp32.device + state["exp_avg"] = state["exp_avg"].to(device, dtype) + state["exp_avg_sq"] = state["exp_avg_sq"].to(device, dtype) + + exp_avg = state["exp_avg"] + exp_avg_sq = state["exp_avg_sq"] + if self.use_fp16_stats: + assert exp_avg.dtype == torch.float16 + exp_avg = exp_avg.float() * state["exp_avg_scale"] + exp_avg_sq = exp_avg_sq.float() * state["exp_avg_sq_scale"] + beta1, beta2 = group["betas"] + + if "step" not in state: + state["step"] = group["step"] + + state["step"] += 1 + + with torch.cuda.device(p_data_fp32.device): + fused_adam_cuda.adam( + p_data_fp32, + out_p, + exp_avg, + exp_avg_sq, + grad, + group["lr"], + beta1, + beta2, + group["eps"], + combined_scale, + state["step"], + self.eps_mode, + bias_correction, + group["weight_decay"], + ) + + if p.device.type == "cpu": + p.data.copy_(p_data_fp32, non_blocking=True) + + if self.use_fp16_stats: + + def inf_norm(t): + return torch.norm(t, float("inf")) + + # from github.com/openai/jukebox/blob/master/jukebox/utils/fp16.py + state["exp_avg_scale"], state["exp_avg_sq_scale"] = ( + 1e-8 + inf_norm(exp_avg) / self.FLOAT16_MAX, + 1e-8 + inf_norm(exp_avg_sq) / self.FLOAT16_MAX, + ) + state["exp_avg"], state["exp_avg_sq"] = ( + (exp_avg / state["exp_avg_scale"]).half(), + (exp_avg_sq / state["exp_avg_sq_scale"]).half(), + ) + + return loss + + +try: + from apex.multi_tensor_apply import multi_tensor_applier + from apex.optimizers import FusedAdam + + class FusedAdamV2(FusedAdam): + """ + Compared to the original version in Apex, the fairseq version casts grads + and params to FP32 internally to support ``--memory-efficient-fp16``. + """ + + def __init__(self, *args, use_fp16_stats=False, **kwargs): + if use_fp16_stats: + raise NotImplementedError( + "--fp16-adam-stats is only supported with FusedAdamV1" + ) + super().__init__(*args, **kwargs) + if not hasattr(self, "multi_tensor_adam"): + raise Exception( + "Apex installation is outdated. Please install an updated version of apex." + ) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + def step( + self, + closure=None, + grads=None, + output_params=None, + scale=None, + grad_norms=None, + ): + """Performs a single optimization step.""" + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + bias_correction = 1 if group["bias_correction"] else 0 + beta1, beta2 = group["betas"] + + # assume same step across group now to simplify things + # per parameter step can be easily support by making it tensor, or pass list into kernel + if "step" in group: + group["step"] += 1 + else: + group["step"] = 1 + + # create lists for multi-tensor apply + g_16, p_16, orig_p_16, m_16, v_16 = [], [], [], [], [] + g_32, p_32, m_32, v_32 = [], [], [], [] + + for p in group["params"]: + if p.grad is None: + continue + if p.grad.data.is_sparse: + raise RuntimeError( + "FusedAdam does not support sparse gradients, " + "please consider SparseAdam instead" + ) + + state = self.state[p] + # State initialization + if len(state) == 0: + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like(p.data, dtype=torch.float) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like( + p.data, dtype=torch.float + ) + else: + state["exp_avg"] = state["exp_avg"].to( + device=p.data.device, dtype=torch.float + ) + state["exp_avg_sq"] = state["exp_avg_sq"].to( + device=p.data.device, dtype=torch.float + ) + + if p.dtype == torch.float16: + g_16.append(p.grad.data.float()) + p_16.append(p.data.float()) + orig_p_16.append(p.data) + m_16.append(state["exp_avg"]) + v_16.append(state["exp_avg_sq"]) + elif p.dtype == torch.float32: + g_32.append(p.grad.data) + p_32.append(p.data) + m_32.append(state["exp_avg"]) + v_32.append(state["exp_avg_sq"]) + else: + raise RuntimeError("FusedAdam only support fp16 and fp32.") + + with torch.cuda.device(p.device): + if len(g_16) > 0: + multi_tensor_applier( + self.multi_tensor_adam, + self._dummy_overflow_buf, + [g_16, p_16, m_16, v_16], + group["lr"], + beta1, + beta2, + group["eps"], + group["step"], + self.adam_w_mode, + bias_correction, + group["weight_decay"], + ) + for orig_p, p in zip(orig_p_16, p_16): + orig_p.copy_(p.data) + if len(g_32) > 0: + multi_tensor_applier( + self.multi_tensor_adam, + self._dummy_overflow_buf, + [g_32, p_32, m_32, v_32], + group["lr"], + beta1, + beta2, + group["eps"], + group["step"], + self.adam_w_mode, + bias_correction, + group["weight_decay"], + ) + + return loss + +except ImportError: + pass diff --git a/fairseq/fairseq/optim/fused_lamb.py b/fairseq/fairseq/optim/fused_lamb.py new file mode 100644 index 0000000..f4f2bdb --- /dev/null +++ b/fairseq/fairseq/optim/fused_lamb.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from fairseq.optim import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("lamb") +class FairseqLAMB(LegacyFairseqOptimizer): + """LAMB optimizer.""" + + def __init__(self, args, params): + super().__init__(args) + try: + from apex.optimizers import FusedLAMB + + self._optimizer = FusedLAMB(params, **self.optimizer_config) + except ImportError: + raise ImportError("Please install apex to use LAMB optimizer") + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--lamb-betas', default='(0.9, 0.999)', metavar='B', + help='betas for LAMB optimizer') + parser.add_argument('--lamb-eps', type=float, default=1e-8, metavar='D', + help='epsilon for LAMB optimizer') + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "betas": eval(self.args.lamb_betas), + "eps": self.args.lamb_eps, + "weight_decay": self.args.weight_decay, + } + + @property + def supports_flat_params(self): + return False diff --git a/fairseq/fairseq/optim/lr_scheduler/__init__.py b/fairseq/fairseq/optim/lr_scheduler/__init__.py new file mode 100644 index 0000000..5b3dbc0 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import importlib +import os + +from fairseq import registry +from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import ( # noqa + FairseqLRScheduler, + LegacyFairseqLRScheduler, +) +from omegaconf import DictConfig + + +( + build_lr_scheduler_, + register_lr_scheduler, + LR_SCHEDULER_REGISTRY, + LR_SCHEDULER_DATACLASS_REGISTRY, +) = registry.setup_registry( + "--lr-scheduler", base_class=FairseqLRScheduler, default="fixed" +) + + +def build_lr_scheduler(cfg: DictConfig, optimizer): + return build_lr_scheduler_(cfg, optimizer) + + +# automatically import any Python files in the optim/lr_scheduler/ directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + file_name = file[: file.find(".py")] + importlib.import_module("fairseq.optim.lr_scheduler." + file_name) diff --git a/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py new file mode 100644 index 0000000..5fcaea2 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py @@ -0,0 +1,146 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import List + +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class CosineLRScheduleConfig(FairseqDataclass): + warmup_updates: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + warmup_init_lr: float = field( + default=-1, + metadata={ + "help": "initial learning rate during warmup phase; default is cfg.lr" + }, + ) + lr: List[float] = field( + default=II("optimization.lr"), + metadata={"help": "max learning rate, must be more than cfg.min_lr"}, + ) + min_lr: float = field(default=0.0, metadata={"help": "min learning rate"}) + t_mult: float = field( + default=1.0, metadata={"help": "factor to grow the length of each period"} + ) + lr_period_updates: float = field( + default=-1, metadata={"help": "initial number of updates per period"} + ) + lr_shrink: float = field( + default=0.1, metadata={"help": "shrink factor for annealing"} + ) + # This is not required, but is for convenience in inferring lr_period_updates + max_update: int = II("optimization.max_update") + + +@register_lr_scheduler("cosine", dataclass=CosineLRScheduleConfig) +class CosineLRSchedule(FairseqLRScheduler): + """Assign LR based on a cyclical schedule that follows the cosine function. + + See https://arxiv.org/pdf/1608.03983.pdf for details. + + We also support a warmup phase where we linearly increase the learning rate + from some initial learning rate (``--warmup-init-lr``) until the configured + max learning rate (``--lr``). + + During warmup:: + + lrs = torch.linspace(cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates) + lr = lrs[update_num] + + After warmup:: + + lr = cfg.min_lr + 0.5*(cfg.lr - cfg.min_lr)*(1 + cos(t_curr / t_i)) + + where ``t_curr`` is current percentage of updates within the current period + range and ``t_i`` is the current period range, which is scaled by ``t_mul`` + after every iteration. + """ + + def __init__(self, cfg: CosineLRScheduleConfig, fairseq_optimizer): + super().__init__(cfg, fairseq_optimizer) + if isinstance(cfg.lr, Collection) and len(cfg.lr) > 1: + raise ValueError( + "Cannot use a fixed learning rate schedule with cosine." + f" Consider --lr-scheduler=fixed instead. ({cfg.lr})" + ) + + self.max_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr + if self.max_lr < cfg.min_lr: + cfg.min_lr = self.max_lr + + warmup_end_lr = self.max_lr + if cfg.warmup_init_lr < 0: + cfg.warmup_init_lr = cfg.min_lr + + self.t_mult = cfg.t_mult + self.period = cfg.lr_period_updates + + if self.period <= 0: + assert ( + cfg.max_update > 0 + ), "Either --max_update or --lr-period-updates must be set" + self.period = cfg.max_update - cfg.warmup_updates + + if cfg.warmup_updates > 0: + # linearly warmup for the first cfg.warmup_updates + self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates + else: + self.lr_step = 1 + + self.warmup_updates = cfg.warmup_updates + self.lr_shrink = cfg.lr_shrink + + # initial learning rate + self.lr = cfg.warmup_init_lr + self.optimizer.set_lr(self.lr) + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + super().step(epoch, val_loss) + # we don't change the learning rate at epoch boundaries + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + if num_updates < self.cfg.warmup_updates: + self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step + else: + curr_updates = num_updates - self.cfg.warmup_updates + if self.t_mult != 1: + i = math.floor( + math.log( + 1 - curr_updates / self.period * (1 - self.t_mult), self.t_mult + ) + ) + t_i = self.t_mult**i * self.period + t_curr = ( + curr_updates + - (1 - self.t_mult**i) / (1 - self.t_mult) * self.period + ) + else: + i = math.floor(curr_updates / self.period) + t_i = self.period + t_curr = curr_updates - (self.period * i) + + lr_shrink = self.lr_shrink**i + min_lr = self.cfg.min_lr * lr_shrink + max_lr = self.max_lr * lr_shrink + + self.lr = min_lr + 0.5 * (max_lr - min_lr) * ( + 1 + math.cos(math.pi * t_curr / t_i) + ) + + self.optimizer.set_lr(self.lr) + return self.lr diff --git a/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py new file mode 100644 index 0000000..6c12fa5 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py @@ -0,0 +1,59 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from argparse import Namespace + +from fairseq.dataclass.utils import gen_parser_from_dataclass +from fairseq.optim import FairseqOptimizer + + +class FairseqLRScheduler(object): + def __init__(self, cfg, optimizer): + super().__init__() + if optimizer is not None and not isinstance(optimizer, FairseqOptimizer): + raise ValueError("optimizer must be an instance of FairseqOptimizer") + self.cfg = cfg + self.optimizer = optimizer + self.best = None + + @classmethod + def add_args(cls, parser): + """Add arguments to the parser for this LR scheduler.""" + dc = getattr(cls, "__dataclass", None) + if dc is not None: + gen_parser_from_dataclass(parser, dc()) + + def state_dict(self): + """Return the LR scheduler state dict.""" + return {"best": self.best} + + def load_state_dict(self, state_dict): + """Load an LR scheduler state dict.""" + self.best = state_dict["best"] + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + pass + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + if val_loss is not None: + if self.best is None: + self.best = val_loss + else: + self.best = min(self.best, val_loss) + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + return self.optimizer.get_lr() + + +class LegacyFairseqLRScheduler(FairseqLRScheduler): + def __init__(self, args: Namespace, optimizer): + if not isinstance(optimizer, FairseqOptimizer): + raise ValueError("optimizer must be an instance of FairseqOptimizer") + self.args = args + self.optimizer = optimizer + self.best = None diff --git a/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py b/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py new file mode 100644 index 0000000..d0e7e14 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py @@ -0,0 +1,76 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import Optional, List +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class FixedLRScheduleConfig(FairseqDataclass): + force_anneal: Optional[int] = field( + default=None, + metadata={"help": "force annealing at specified epoch"}, + ) + lr_shrink: float = field( + default=0.1, + metadata={"help": "shrink factor for annealing, lr_new = (lr * lr_shrink)"}, + ) + warmup_updates: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + lr: List[float] = II("optimization.lr") + + +@register_lr_scheduler("fixed", dataclass=FixedLRScheduleConfig) +class FixedLRSchedule(FairseqLRScheduler): + """Decay the LR on a fixed schedule.""" + + def __init__(self, cfg: FixedLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + + self.lr = cfg.lr[0] + if cfg.warmup_updates > 0: + self.warmup_factor = 1.0 / cfg.warmup_updates + else: + self.warmup_factor = 1 + + def state_dict(self): + return {"lr": self.lr} + + def load_state_dict(self, state_dict): + if "lr" in state_dict: + self.lr = state_dict["lr"] + + def get_next_lr(self, epoch): + lrs = self.cfg.lr + if self.cfg.force_anneal is None or epoch < self.cfg.force_anneal: + # use fixed LR schedule + next_lr = lrs[min(epoch - 1, len(lrs) - 1)] + else: + # annneal based on lr_shrink + next_lr = lrs[-1] * self.cfg.lr_shrink ** ( + epoch + 1 - self.cfg.force_anneal + ) + return next_lr + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + self.lr = self.get_next_lr(epoch) + self.optimizer.set_lr(self.warmup_factor * self.lr) + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + if self.cfg.warmup_updates > 0 and num_updates < self.cfg.warmup_updates: + self.warmup_factor = (num_updates + 1) / float(self.cfg.warmup_updates) + self.optimizer.set_lr(self.warmup_factor * self.lr) + else: + self.optimizer.set_lr(self.lr) + return self.optimizer.get_lr() diff --git a/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py b/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py new file mode 100644 index 0000000..987c905 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import List + +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class InverseSquareRootLRScheduleConfig(FairseqDataclass): + warmup_updates: int = field( + default=4000, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + warmup_init_lr: float = field( + default=-1, + metadata={ + "help": "initial learning rate during warmup phase; default is cfg.lr" + }, + ) + lr: List[float] = II("optimization.lr") + + +@register_lr_scheduler("inverse_sqrt", dataclass=InverseSquareRootLRScheduleConfig) +class InverseSquareRootSchedule(FairseqLRScheduler): + """Decay the LR based on the inverse square root of the update number. + + We also support a warmup phase where we linearly increase the learning rate + from some initial learning rate (``--warmup-init-lr``) until the configured + learning rate (``--lr``). Thereafter we decay proportional to the number of + updates, with a decay factor set to align with the configured learning rate. + + During warmup:: + + lrs = torch.linspace(cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates) + lr = lrs[update_num] + + After warmup:: + + decay_factor = cfg.lr * sqrt(cfg.warmup_updates) + lr = decay_factor / sqrt(update_num) + """ + + def __init__(self, cfg: InverseSquareRootLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + if isinstance(cfg.lr, Collection) and len(cfg.lr) > 1: + raise ValueError( + "Cannot use a fixed learning rate schedule with inverse_sqrt." + " Consider --lr-scheduler=fixed instead." + ) + warmup_end_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr + if cfg.warmup_init_lr < 0: + cfg.warmup_init_lr = 0 if cfg.warmup_updates > 0 else warmup_end_lr + + # linearly warmup for the first cfg.warmup_updates + self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates + + # then, decay prop. to the inverse square root of the update number + self.decay_factor = warmup_end_lr * cfg.warmup_updates**0.5 + + # initial learning rate + self.lr = cfg.warmup_init_lr + self.optimizer.set_lr(self.lr) + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + super().step(epoch, val_loss) + # we don't change the learning rate at epoch boundaries + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + if num_updates < self.cfg.warmup_updates: + self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step + else: + self.lr = self.decay_factor * num_updates**-0.5 + self.optimizer.set_lr(self.lr) + return self.lr diff --git a/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py new file mode 100644 index 0000000..57edc25 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from . import LegacyFairseqLRScheduler, register_lr_scheduler +import logging +import ast + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +@register_lr_scheduler("manual") +class ManualSchedule(LegacyFairseqLRScheduler): + """Decay the LR on a manual schedule.""" + + def __init__(self, args, optimizer): + super().__init__(args, optimizer) + + self.epoch2lr = self.parse_manuallr_args(args.epoch2lr) + self.update2lr = self.parse_manuallr_args(args.update2lr) + logger.info("@@@ ManualSchedule epoch2lr={}".format(self.epoch2lr)) + logger.info("@@@ ManualSchedule update2lr={}".format(self.update2lr)) + + if 1 in self.epoch2lr: + self.lr = self.epoch2lr[1] + elif 1 in self.update2lr: + self.lr = self.update2lr[1] + else: + self.lr = args.lr[0] + self.optimizer.set_lr(self.lr) # Set the beginning of the epoch. + + def parse_manuallr_args(self, lr_args_str): + lr_dict = ast.literal_eval(lr_args_str.replace(" ", "")) + if not isinstance(lr_dict, dict): + raise ValueError("epoch2lr/update2lr must be abel to evaluated to a dict") + + lr_args = {} + logger.info("@@@ after parsing input dictionary lr_dict = {}".format(lr_dict)) + for key, val in lr_dict.items(): + if "," in key: + for k in key.split(","): + lr_args[int(k)] = float(val) + elif "-" in key: + s = int(key.split("-")[0]) + e = int(key.split("-")[1]) + for k in range(s, e + 1, 1): + lr_args[k] = float(val) + else: + lr_args[int(key)] = float(val) + + return lr_args + + @staticmethod + def add_args(parser): + """Add arguments to the parser for this LR scheduler.""" + # fmt: off + parser.add_argument( + "--epoch2lr", + type=str, + metavar="DICT", + default="{}", + help="a dictionary used to set lr for each epoch manually", + ) + parser.add_argument( + "--update2lr", + type=str, + metavar="DICT", + default="{}", + help="a dictionary used to set lr for each update manually", + ) + # fmt: on + + def state_dict(self): + return {"lr": self.lr} + + def load_state_dict(self, state_dict): + if "lr" in state_dict: + self.lr = state_dict["lr"] + + def get_next_lr(self, epoch): + manual_keys = [k for k in self.epoch2lr if k <= epoch] + if manual_keys: + manual_lr = self.epoch2lr[max(manual_keys)] + else: + logger.warning( + "@@@ epoch={} does not exist in manual lr input. epoch2lr={}...".format( + epoch, + list(self.epoch2lr.items())[ + : min(10, len(self.epoch2lr.keys()) - 1) + ], + ) + ) + manual_lr = self.optimizer.get_lr() + return manual_lr + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + self.lr = self.get_next_lr(epoch) + self.optimizer.set_lr(self.lr) + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + manual_keys = [k for k in self.update2lr if k <= num_updates] + if manual_keys: + manual_lr = self.update2lr[max(manual_keys)] + else: + logger.warning( + "epoch={} does not exist in manual lr input update2lr={}...".format( + num_updates, + list(self.update2lr.items())[ + : min(10, len(self.update2lr.keys()) - 1) + ], + ) + ) + manual_lr = self.optimizer.get_lr() + + self.optimizer.set_lr(manual_lr) + return self.optimizer.get_lr() diff --git a/fairseq/fairseq/optim/lr_scheduler/pass_through.py b/fairseq/fairseq/optim/lr_scheduler/pass_through.py new file mode 100644 index 0000000..2f93db3 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/pass_through.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class PassThroughScheduleConfig(FairseqDataclass): + pass + + +@register_lr_scheduler("pass_through", dataclass=PassThroughScheduleConfig) +class PassThroughScheduleSchedule(FairseqLRScheduler): + """Delegate lr scheduling to the optimizer.""" + + def __init__(self, cfg: PassThroughScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + assert ( + hasattr(optimizer, "lr_scheduler") and optimizer.lr_scheduler is not None + ), "Pass-through schedule can only be used with optimizers with their own schedulers" + + def state_dict(self): + return self.optimizer.lr_scheduler.state_dict() + + def load_state_dict(self, state_dict): + self.optimizer.lr_scheduler.load_state_dict(state_dict) + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + return self.optimizer.lr_scheduler.step_begin_epoch(epoch) + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + return self.optimizer.lr_scheduler.step_update(num_updates) diff --git a/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py b/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py new file mode 100644 index 0000000..b8109a7 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py @@ -0,0 +1,89 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import Optional, List +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class PolynomialDecayLRScheduleConfig(FairseqDataclass): + warmup_updates: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + force_anneal: Optional[int] = field( + default=None, + metadata={"help": "force annealing at specified epoch"}, + ) + end_learning_rate: float = field( + default=0.0, + metadata={"help": "learning rate to decay to"}, + ) + power: float = field( + default=1.0, + metadata={"help": "decay exponent"}, + ) + total_num_update: float = field( + default=II("optimization.max_update"), + metadata={"help": "total number of updates over which to decay learning rate"}, + ) + lr: List[float] = II("optimization.lr") + + +@register_lr_scheduler("polynomial_decay", dataclass=PolynomialDecayLRScheduleConfig) +class PolynomialDecayLRSchedule(FairseqLRScheduler): + """Decay the LR on a fixed schedule.""" + + def __init__(self, cfg: PolynomialDecayLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + + assert cfg.total_num_update > 0 + + self.lr = cfg.lr[0] + if cfg.warmup_updates > 0: + self.warmup_factor = 1.0 / cfg.warmup_updates + else: + self.warmup_factor = 1 + self.end_learning_rate = cfg.end_learning_rate + self.total_num_update = cfg.total_num_update + self.power = cfg.power + self.optimizer.set_lr(self.warmup_factor * self.lr) + + def get_next_lr(self, epoch): + lrs = self.cfg.lr + if self.cfg.force_anneal is None or epoch < self.cfg.force_anneal: + # use fixed LR schedule + next_lr = lrs[min(epoch, len(lrs) - 1)] + else: + # annneal based on lr_shrink + next_lr = self.optimizer.get_lr() + return next_lr + + def step_begin_epoch(self, epoch): + """Update the learning rate at the beginning of the given epoch.""" + self.lr = self.get_next_lr(epoch) + self.optimizer.set_lr(self.warmup_factor * self.lr) + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + if self.cfg.warmup_updates > 0 and num_updates <= self.cfg.warmup_updates: + self.warmup_factor = num_updates / float(self.cfg.warmup_updates) + lr = self.warmup_factor * self.lr + elif num_updates >= self.total_num_update: + lr = self.end_learning_rate + else: + warmup = self.cfg.warmup_updates + lr_range = self.lr - self.end_learning_rate + pct_remaining = 1 - (num_updates - warmup) / ( + self.total_num_update - warmup + ) + lr = lr_range * pct_remaining ** (self.power) + self.end_learning_rate + self.optimizer.set_lr(lr) + return self.optimizer.get_lr() diff --git a/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py b/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py new file mode 100644 index 0000000..5ee9c1b --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import List + +import torch.optim.lr_scheduler +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class ReduceLROnPlateauLRScheduleConfig(FairseqDataclass): + lr_shrink: float = field( + default=0.1, metadata={"help": "shrink factor for annealing"} + ) + lr_threshold: float = field( + default=1e-4, + metadata={ + "help": ( + "threshold for measuring the new optimum, to only focus on " + "significant changes" + ) + }, + ) + lr_patience: int = field( + default=0, + metadata={ + "help": ( + "number of epochs with no improvement after which learning rate will " + "be reduced" + ) + }, + ) + warmup_updates: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + warmup_init_lr: float = field( + default=-1, + metadata={ + "help": "initial learning rate during warmup phase; default is cfg.lr" + }, + ) + lr: List[float] = II("optimization.lr") + maximize_best_checkpoint_metric: bool = II( + "checkpoint.maximize_best_checkpoint_metric" + ) + + +@register_lr_scheduler( + "reduce_lr_on_plateau", dataclass=ReduceLROnPlateauLRScheduleConfig +) +class ReduceLROnPlateauLRSchedule(FairseqLRScheduler): + """ + Decay the LR by a factor every time the validation loss plateaus. + Also comes with optional warmup phase, where we linearly increase + the learning rate from some initial learning rate + (``--warmup-init-lr``) until the configured learning rate + (``--lr``). Thereafter the lr is adjusted according to original + reduce_on_plateau scheme. + + During warmup:: + + lrs = torch.linspace( + cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates + ) + lr = lrs[update_num] + """ + + def __init__(self, cfg: ReduceLROnPlateauLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + if len(cfg.lr) > 1: + raise ValueError( + "Cannot use a fixed learning rate schedule with reduce_lr_on_plateau." + " Consider --lr-scheduler=fixed instead." + ) + self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + self.optimizer.optimizer, + patience=cfg.lr_patience, + factor=cfg.lr_shrink, + mode="max" if cfg.maximize_best_checkpoint_metric else "min", + threshold=cfg.lr_threshold, + ) + warmup_end_lr = cfg.lr[0] + # if no warm up, sets initial lr to be cfg.lr[0] + if cfg.warmup_init_lr < 0: + cfg.warmup_init_lr = 0 if cfg.warmup_updates > 0 else warmup_end_lr + + # linearly warmup for the first cfg.warmup_updates + if cfg.warmup_updates > 0: + self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates + + # this flag is either set from arg when no warm up, or set by + # step_update() when warmup finishes + self.warmup_end = True if cfg.warmup_updates <= 0 else False + + # initial learning rate + # this self.lr is used only during init and/or warm up period + self.lr = warmup_end_lr if self.warmup_end else cfg.warmup_init_lr + self.optimizer.set_lr(self.lr) + + def state_dict(self): + """Return the LR scheduler state dict.""" + return { + "best": self.lr_scheduler.best, + "last_epoch": self.lr_scheduler.last_epoch, + } + + def load_state_dict(self, state_dict): + """Load an LR scheduler state dict.""" + self.lr_scheduler.best = state_dict["best"] + if "last_epoch" in state_dict: + self.lr_scheduler.last_epoch = state_dict["last_epoch"] + + def step(self, epoch, val_loss=None): + """ + Update the learning rate at the end of the given epoch if warmup + finishes otherwise no update of lr on epoch boundaries + """ + if val_loss is not None and self.warmup_end is True: + self.lr_scheduler.step(val_loss) + else: + self.lr_scheduler.last_epoch = epoch + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """ + Update the learning rate after each update.""" + # if there is warmup + if self.cfg.warmup_updates > 0: + if num_updates <= self.cfg.warmup_updates: + self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step + self.optimizer.set_lr(self.lr) + else: + if self.warmup_end is False: + self.warmup_end = True + # else do nothing + return self.optimizer.get_lr() diff --git a/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py new file mode 100644 index 0000000..db99d4e --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import List + +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class StepLRScheduleConfig(FairseqDataclass): + warmup_updates: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + warmup_init_lr: float = field( + default=-1, + metadata={ + "help": "initial learning rate during warmup phase; default is cfg.lr" + }, + ) + lr: List[float] = field( + default=II("optimization.lr"), + metadata={"help": "max learning rate, must be more than cfg.min_lr"}, + ) + min_lr: float = field(default=0.0, metadata={"help": "min learning rate"}) + lr_deacy_period: int = field(default=25000, metadata={"help": "decay period"}) + lr_decay: float = field(default=0.5, metadata={"help": "decay factor"}) + + +@register_lr_scheduler("step", dataclass=StepLRScheduleConfig) +class StepLRSchedule(FairseqLRScheduler): + """Decay learning rate every k updates by a fixed factor""" + + def __init__(self, cfg: StepLRScheduleConfig, fairseq_optimizer): + super().__init__(cfg, fairseq_optimizer) + self.max_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr + self.min_lr = cfg.min_lr + self.lr_deacy_period = cfg.lr_deacy_period + self.lr_decay = cfg.lr_decay + self.warmup_updates = cfg.warmup_updates + self.warmup_init_lr = ( + cfg.warmup_init_lr if cfg.warmup_init_lr >= 0 else self.min_lr + ) + + assert self.lr_deacy_period > 0 + assert self.lr_decay <= 1 + assert self.min_lr >= 0 + assert self.max_lr > self.min_lr + + if cfg.warmup_updates > 0: + # linearly warmup for the first cfg.warmup_updates + self.warmup_lr_step = ( + self.max_lr - self.warmup_init_lr + ) / self.warmup_updates + else: + self.warmup_lr_step = 1 + + # initial learning rate + self.lr = self.warmup_init_lr + self.optimizer.set_lr(self.lr) + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + super().step(epoch, val_loss) + # we don't change the learning rate at epoch boundaries + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + if num_updates < self.cfg.warmup_updates: + self.lr = self.warmup_init_lr + num_updates * self.warmup_lr_step + else: + curr_updates = num_updates - self.cfg.warmup_updates + lr_mult = self.lr_decay ** (curr_updates // self.lr_deacy_period) + self.lr = max(self.max_lr * lr_mult, self.min_lr) + + self.optimizer.set_lr(self.lr) + return self.lr diff --git a/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py new file mode 100644 index 0000000..4d5547c --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py @@ -0,0 +1,175 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import Optional, List, Tuple +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class TriStageLRScheduleConfig(FairseqDataclass): + warmup_steps: int = field( + default=0, + metadata={"help": "warmup the learning rate linearly for the first N updates"}, + ) + hold_steps: int = field( + default=0, + metadata={"help": "steps in hold stage"}, + ) + decay_steps: int = field( + default=0, + metadata={"help": "steps in decay stages"}, + ) + phase_ratio: Optional[Tuple[float, float, float]] = field( + default=None, + metadata={ + "help": ( + "if set, automatically sets warmup/hold/decay steps to the ratio " + "specified here from max_updates. the ratios must add up to 1.0" + ) + }, + ) + init_lr_scale: float = field( + default=0.01, + metadata={"help": "initial learning rate scale during warmup phase"}, + ) + final_lr_scale: float = field( + default=0.01, + metadata={"help": "final learning rate scale"}, + ) + max_update: float = II("optimization.max_update") + lr: List[float] = II("optimization.lr") + + +@register_lr_scheduler("tri_stage", dataclass=TriStageLRScheduleConfig) +class TriStageLRSchedule(FairseqLRScheduler): + """Tristage learning rate schedulr + + Implement the learning rate scheduler in https://arxiv.org/pdf/1904.08779.pdf + + Similar to inverse_squre_root scheduler, but tri_stage learning rate employs + three stages LR scheduling: + + - warmup stage, starting from `lr` * `init_lr_scale`, linearly + increased to `lr` in `warmup_steps` iterations + + - hold stage, after `warmup_steps`, keep the LR as `lr` for `hold_steps` + iterations + + - decay stage, after hold stage, decay LR exponetially to + `lr` * `final_lr_scale` in `decay_steps`; + after that LR is keep as `final_lr_scale` * `lr` + + During warmup:: + + init_lr = cfg.init_lr_scale * cfg.lr + lrs = torch.linspace(init_lr, cfg.lr, cfg.warmup_steps) + lr = lrs[update_num] + + During hold:: + + lr = cfg.lr + + During decay:: + + decay_factor = - math.log(cfg.final_lr_scale) / cfg.decay_steps + lr = cfg.lr * exp(- (update_num - warmup_steps - decay_steps) * decay_factor) + + After that:: + + lr = cfg.lr * cfg.final_lr_scale + """ + + def __init__(self, cfg: TriStageLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + if len(cfg.lr) > 1: + raise ValueError( + "Cannot use a fixed learning rate schedule with tri-stage lr." + " Consider --lr-scheduler=fixed instead." + ) + + # calculate LR at each point + self.peak_lr = cfg.lr[0] + self.init_lr = cfg.init_lr_scale * cfg.lr[0] + self.final_lr = cfg.final_lr_scale * cfg.lr[0] + + if cfg.phase_ratio is not None: + assert cfg.max_update > 0 + assert sum(cfg.phase_ratio) == 1, "phase ratios must add up to 1" + self.warmup_steps = int(cfg.max_update * cfg.phase_ratio[0]) + self.hold_steps = int(cfg.max_update * cfg.phase_ratio[1]) + self.decay_steps = int(cfg.max_update * cfg.phase_ratio[2]) + else: + self.warmup_steps = cfg.warmup_steps + self.hold_steps = cfg.hold_steps + self.decay_steps = cfg.decay_steps + + assert ( + self.warmup_steps + self.hold_steps + self.decay_steps > 0 + ), "please specify steps or phase_ratio" + + self.warmup_rate = ( + (self.peak_lr - self.init_lr) / self.warmup_steps + if self.warmup_steps != 0 + else 0 + ) + self.decay_factor = -math.log(cfg.final_lr_scale) / self.decay_steps + + # initial learning rate + self.lr = self.init_lr + self.optimizer.set_lr(self.lr) + + def _decide_stage(self, update_step): + """ + return stage, and the corresponding steps within the current stage + """ + if update_step < self.warmup_steps: + # warmup state + return 0, update_step + + offset = self.warmup_steps + + if update_step < offset + self.hold_steps: + # hold stage + return 1, update_step - offset + + offset += self.hold_steps + + if update_step <= offset + self.decay_steps: + # decay stage + return 2, update_step - offset + + offset += self.decay_steps + + # still here ? constant lr stage + return 3, update_step - offset + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + super().step(epoch, val_loss) + # we don't change the learning rate at epoch boundaries + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + stage, steps_in_stage = self._decide_stage(num_updates) + if stage == 0: + self.lr = self.init_lr + self.warmup_rate * steps_in_stage + elif stage == 1: + self.lr = self.peak_lr + elif stage == 2: + self.lr = self.peak_lr * math.exp(-self.decay_factor * steps_in_stage) + elif stage == 3: + self.lr = self.final_lr + else: + raise ValueError("Undefined stage") + + self.optimizer.set_lr(self.lr) + + return self.lr diff --git a/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py new file mode 100644 index 0000000..2a32bd1 --- /dev/null +++ b/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py @@ -0,0 +1,83 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +from dataclasses import dataclass, field +from typing import List + +from omegaconf import II + +from fairseq.dataclass import FairseqDataclass +from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler + + +@dataclass +class TriangularLRScheduleConfig(FairseqDataclass): + max_lr: float = field( + default="???", metadata={"help": "max learning rate, must be more than cfg.lr"} + ) + lr_period_updates: float = field( + default=5000, + metadata={"help": "initial number of updates per period (cycle length)"}, + ) + lr_shrink: float = field( + default=0.1, metadata={"help": "shrink factor for annealing"} + ) + shrink_min: bool = field( + default=False, metadata={"help": "if set, also shrinks min lr"} + ) + lr: List[float] = II("optimization.lr") + + +@register_lr_scheduler("triangular", dataclass=TriangularLRScheduleConfig) +class TriangularLRSchedule(FairseqLRScheduler): + """Assign LR based on a triangular cyclical schedule. + + See https://arxiv.org/pdf/1506.01186.pdf for details. + """ + + def __init__(self, cfg: TriangularLRScheduleConfig, optimizer): + super().__init__(cfg, optimizer) + if len(cfg.lr) > 1: + raise ValueError( + "Cannot use a fixed learning rate schedule with triangular." + " Consider --lr-scheduler=fixed instead." + ) + + lr = cfg.lr[0] + + assert cfg.max_lr > lr, "max_lr must be more than lr" + self.min_lr = lr + self.max_lr = cfg.max_lr + self.stepsize = cfg.lr_period_updates // 2 + self.lr_shrink = cfg.lr_shrink + self.shrink_min = cfg.shrink_min + + # initial learning rate + self.lr = self.min_lr + self.optimizer.set_lr(self.lr) + + def step(self, epoch, val_loss=None): + """Update the learning rate at the end of the given epoch.""" + super().step(epoch, val_loss) + # we don't change the learning rate at epoch boundaries + return self.optimizer.get_lr() + + def step_update(self, num_updates): + """Update the learning rate after each update.""" + cycle = math.floor(num_updates / (2 * self.stepsize)) + + lr_shrink = self.lr_shrink**cycle + max_lr = self.max_lr * lr_shrink + if self.shrink_min: + min_lr = self.min_lr * lr_shrink + else: + min_lr = self.min_lr + + x = abs(num_updates / self.stepsize - 2 * (cycle + 1) + 1) + self.lr = min_lr + (max_lr - min_lr) * max(0, (1 - x)) + + self.optimizer.set_lr(self.lr) + return self.lr diff --git a/fairseq/fairseq/optim/nag.py b/fairseq/fairseq/optim/nag.py new file mode 100644 index 0000000..c30a6c0 --- /dev/null +++ b/fairseq/fairseq/optim/nag.py @@ -0,0 +1,111 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections.abc import Collection +from dataclasses import dataclass, field +from typing import List + +import torch +from fairseq.dataclass import FairseqDataclass +from omegaconf import II, DictConfig +from torch.optim.optimizer import Optimizer, required + +from . import FairseqOptimizer, register_optimizer + + +@dataclass +class FairseqNAGConfig(FairseqDataclass): + momentum: float = field(default=0.99, metadata={"help": "momentum factor"}) + weight_decay: float = field(default=0.0, metadata={"help": "weight decay"}) + # TODO common vars in parent class + lr: List[float] = II("optimization.lr") + + +@register_optimizer("nag", dataclass=FairseqNAGConfig) +class FairseqNAG(FairseqOptimizer): + def __init__(self, cfg: DictConfig, params): + super().__init__(cfg) + self._optimizer = NAG(params, **self.optimizer_config) + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.cfg.lr[0] + if isinstance(self.cfg.lr, Collection) + else self.cfg.lr, + "momentum": self.cfg.momentum, + "weight_decay": self.cfg.weight_decay, + } + + +class NAG(Optimizer): + def __init__(self, params, lr=required, momentum=0, weight_decay=0): + defaults = dict(lr=lr, lr_old=lr, momentum=momentum, weight_decay=weight_decay) + super(NAG, self).__init__(params, defaults) + + @property + def supports_memory_efficient_fp16(self): + return True + + @property + def supports_flat_params(self): + return True + + def step(self, closure=None): + """Performs a single optimization step. + + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + weight_decay = group["weight_decay"] + momentum = group["momentum"] + lr = group["lr"] + lr_old = group.get("lr_old", lr) + lr_correct = lr / lr_old if lr_old > 0 else lr + + for p in group["params"]: + if p.grad is None: + continue + + p_data_fp32 = p.data + if p_data_fp32.dtype in {torch.float16, torch.bfloat16}: + p_data_fp32 = p_data_fp32.float() + + d_p = p.grad.data.float() + param_state = self.state[p] + if "momentum_buffer" not in param_state: + param_state["momentum_buffer"] = torch.zeros_like(d_p) + else: + param_state["momentum_buffer"] = param_state["momentum_buffer"].to( + d_p + ) + + buf = param_state["momentum_buffer"] + + if weight_decay != 0: + p_data_fp32.mul_(1 - lr * weight_decay) + p_data_fp32.add_(buf, alpha=momentum * momentum * lr_correct) + p_data_fp32.add_(d_p, alpha=-(1 + momentum) * lr) + + buf.mul_(momentum * lr_correct).add_(d_p, alpha=-lr) + + if p.data.dtype in {torch.float16, torch.bfloat16}: + p.data.copy_(p_data_fp32) + + group["lr_old"] = lr + + return loss diff --git a/fairseq/fairseq/optim/sgd.py b/fairseq/fairseq/optim/sgd.py new file mode 100644 index 0000000..8e34fb9 --- /dev/null +++ b/fairseq/fairseq/optim/sgd.py @@ -0,0 +1,43 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.optim + +from . import LegacyFairseqOptimizer, register_optimizer + + +@register_optimizer("sgd") +class SGD(LegacyFairseqOptimizer): + def __init__(self, args, params): + super().__init__(args) + self._optimizer = torch.optim.SGD(params, **self.optimizer_config) + + @staticmethod + def add_args(parser): + """Add optimizer-specific arguments to the parser.""" + # fmt: off + parser.add_argument('--momentum', default=0.0, type=float, metavar='M', + help='momentum factor') + parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD', + help='weight decay') + # fmt: on + + @property + def optimizer_config(self): + """ + Return a kwarg dictionary that will be used to override optimizer + args stored in checkpoints. This allows us to load a checkpoint and + resume training using a different set of optimizer args, e.g., with a + different learning rate. + """ + return { + "lr": self.args.lr[0], + "momentum": self.args.momentum, + "weight_decay": self.args.weight_decay, + } + + @property + def supports_flat_params(self): + return True diff --git a/fairseq/fairseq/optim/shard.py b/fairseq/fairseq/optim/shard.py new file mode 100644 index 0000000..9d7f2eb --- /dev/null +++ b/fairseq/fairseq/optim/shard.py @@ -0,0 +1,58 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from fairseq.distributed import utils + + +try: + from fairscale.optim import OSS + + _has_fairscale = True +except ImportError: + _has_fairscale = False + + +def shard_(optimizer, group): + if not _has_fairscale: + raise ImportError( + "\n\nPlease install the fairscale package:" "\n\n pip install fairscale" + ) + + class FairseqOSS(OSS): + @property + def disable_mem_eff_fp16_loading_hack(self): + return True + + def __getattr__(self, name): + if name.startswith("supports") and hasattr(self.optim, name): + return getattr(self.optim, name) + raise AttributeError( + "'FairseqOSS' object has no attribute {0!r}".format(name) + ) + + def broadcast_global_state_dict( + self, state_dict: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Broadcasts the entire state_dict to all other ranks + each rank is responsible to load their own partition of data + """ + return utils.broadcast_object( + state_dict, + src_rank=0, + group=self.group, + ) + + torch_optimizer = optimizer.optimizer + optim_cls = type(torch_optimizer) + + optimizer.optimizer = FairseqOSS( + torch_optimizer.param_groups, + optim_cls, + group=group, + **optimizer.optimizer_config + ) diff --git a/fairseq/fairseq/options.py b/fairseq/fairseq/options.py new file mode 100644 index 0000000..9205916 --- /dev/null +++ b/fairseq/fairseq/options.py @@ -0,0 +1,413 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +from pathlib import Path +from typing import Callable, List, Optional, Union + +import torch +from fairseq import utils +from fairseq.data.indexed_dataset import get_available_dataset_impl +from fairseq.dataclass.configs import ( + CheckpointConfig, + CommonConfig, + CommonEvalConfig, + DatasetConfig, + DistributedTrainingConfig, + EvalLMConfig, + GenerationConfig, + InteractiveConfig, + OptimizationConfig, + EMAConfig, +) +from fairseq.dataclass.utils import gen_parser_from_dataclass + +# this import is for backward compatibility +from fairseq.utils import csv_str_list, eval_bool, eval_str_dict, eval_str_list # noqa + + +def get_preprocessing_parser(default_task="translation"): + parser = get_parser("Preprocessing", default_task) + add_preprocess_args(parser) + return parser + + +def get_training_parser(default_task="translation"): + parser = get_parser("Trainer", default_task) + add_dataset_args(parser, train=True) + add_distributed_training_args(parser) + add_model_args(parser) + add_optimization_args(parser) + add_checkpoint_args(parser) + add_ema_args(parser) + return parser + + +def get_generation_parser(interactive=False, default_task="translation"): + parser = get_parser("Generation", default_task) + add_dataset_args(parser, gen=True) + add_distributed_training_args(parser, default_world_size=1) + add_generation_args(parser) + add_checkpoint_args(parser) + if interactive: + add_interactive_args(parser) + return parser + + +def get_speech_generation_parser(default_task="text_to_speech"): + parser = get_parser("Speech Generation", default_task) + add_dataset_args(parser, gen=True) + add_distributed_training_args(parser, default_world_size=1) + add_speech_generation_args(parser) + return parser + + +def get_interactive_generation_parser(default_task="translation"): + return get_generation_parser(interactive=True, default_task=default_task) + + +def get_eval_lm_parser(default_task="language_modeling"): + parser = get_parser("Evaluate Language Model", default_task) + add_dataset_args(parser, gen=True) + add_distributed_training_args(parser, default_world_size=1) + add_eval_lm_args(parser) + return parser + + +def get_validation_parser(default_task=None): + parser = get_parser("Validation", default_task) + add_dataset_args(parser, train=True) + add_distributed_training_args(parser, default_world_size=1) + group = parser.add_argument_group("Evaluation") + gen_parser_from_dataclass(group, CommonEvalConfig()) + return parser + + +def parse_args_and_arch( + parser: argparse.ArgumentParser, + input_args: List[str] = None, + parse_known: bool = False, + suppress_defaults: bool = False, + modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None, +): + """ + Args: + parser (ArgumentParser): the parser + input_args (List[str]): strings to parse, defaults to sys.argv + parse_known (bool): only parse known arguments, similar to + `ArgumentParser.parse_known_args` + suppress_defaults (bool): parse while ignoring all default values + modify_parser (Optional[Callable[[ArgumentParser], None]]): + function to modify the parser, e.g., to set default values + """ + if suppress_defaults: + # Parse args without any default values. This requires us to parse + # twice, once to identify all the necessary task/model args, and a second + # time with all defaults set to None. + args = parse_args_and_arch( + parser, + input_args=input_args, + parse_known=parse_known, + suppress_defaults=False, + ) + suppressed_parser = argparse.ArgumentParser(add_help=False, parents=[parser]) + suppressed_parser.set_defaults(**{k: None for k, v in vars(args).items()}) + args = suppressed_parser.parse_args(input_args) + return argparse.Namespace( + **{k: v for k, v in vars(args).items() if v is not None} + ) + + from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY, MODEL_REGISTRY + + # Before creating the true parser, we need to import optional user module + # in order to eagerly import custom tasks, optimizers, architectures, etc. + usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) + usr_parser.add_argument("--user-dir", default=None) + usr_args, _ = usr_parser.parse_known_args(input_args) + utils.import_user_module(usr_args) + + if modify_parser is not None: + modify_parser(parser) + + # The parser doesn't know about model/criterion/optimizer-specific args, so + # we parse twice. First we parse the model/criterion/optimizer, then we + # parse a second time after adding the *-specific arguments. + # If input_args is given, we will parse those args instead of sys.argv. + args, _ = parser.parse_known_args(input_args) + + # Add model-specific args to parser. + if hasattr(args, "arch"): + model_specific_group = parser.add_argument_group( + "Model-specific configuration", + # Only include attributes which are explicitly given as command-line + # arguments or which have default values. + argument_default=argparse.SUPPRESS, + ) + if args.arch in ARCH_MODEL_REGISTRY: + ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group) + elif args.arch in MODEL_REGISTRY: + MODEL_REGISTRY[args.arch].add_args(model_specific_group) + else: + raise RuntimeError() + + if hasattr(args, "task"): + from fairseq.tasks import TASK_REGISTRY + + TASK_REGISTRY[args.task].add_args(parser) + if getattr(args, "use_bmuf", False): + # hack to support extra args for block distributed data parallelism + from fairseq.optim.bmuf import FairseqBMUF + + FairseqBMUF.add_args(parser) + + # Add *-specific args to parser. + from fairseq.registry import REGISTRIES + + for registry_name, REGISTRY in REGISTRIES.items(): + choice = getattr(args, registry_name, None) + if choice is not None: + cls = REGISTRY["registry"][choice] + if hasattr(cls, "add_args"): + cls.add_args(parser) + elif hasattr(cls, "__dataclass"): + gen_parser_from_dataclass(parser, cls.__dataclass()) + + # Modify the parser a second time, since defaults may have been reset + if modify_parser is not None: + modify_parser(parser) + + # Parse a second time. + if parse_known: + args, extra = parser.parse_known_args(input_args) + else: + args = parser.parse_args(input_args) + extra = None + # Post-process args. + if ( + hasattr(args, "batch_size_valid") and args.batch_size_valid is None + ) or not hasattr(args, "batch_size_valid"): + args.batch_size_valid = args.batch_size + if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None: + args.max_tokens_valid = args.max_tokens + if getattr(args, "memory_efficient_fp16", False): + args.fp16 = True + if getattr(args, "memory_efficient_bf16", False): + args.bf16 = True + args.tpu = getattr(args, "tpu", False) + args.bf16 = getattr(args, "bf16", False) + if args.bf16: + args.tpu = True + if args.tpu and args.fp16: + raise ValueError("Cannot combine --fp16 and --tpu, use --bf16 on TPUs") + + if getattr(args, "seed", None) is None: + args.seed = 1 # default seed for training + args.no_seed_provided = True + else: + args.no_seed_provided = False + + if getattr(args, "update_epoch_batch_itr", None) is None: + if hasattr(args, "grouped_shuffling"): + args.update_epoch_batch_itr = args.grouped_shuffling + else: + args.grouped_shuffling = False + args.update_epoch_batch_itr = False + + # Apply architecture configuration. + if hasattr(args, "arch") and args.arch in ARCH_CONFIG_REGISTRY: + ARCH_CONFIG_REGISTRY[args.arch](args) + + if parse_known: + return args, extra + else: + return args + + +def get_parser(desc, default_task="translation"): + # Before creating the true parser, we need to import optional user module + # in order to eagerly import custom tasks, optimizers, architectures, etc. + usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) + usr_parser.add_argument("--user-dir", default=None) + usr_args, _ = usr_parser.parse_known_args() + utils.import_user_module(usr_args) + + parser = argparse.ArgumentParser(allow_abbrev=False) + gen_parser_from_dataclass(parser, CommonConfig()) + + from fairseq.registry import REGISTRIES + + for registry_name, REGISTRY in REGISTRIES.items(): + parser.add_argument( + "--" + registry_name.replace("_", "-"), + default=REGISTRY["default"], + choices=REGISTRY["registry"].keys(), + ) + + # Task definitions can be found under fairseq/tasks/ + from fairseq.tasks import TASK_REGISTRY + + parser.add_argument( + "--task", + metavar="TASK", + default=default_task, + choices=TASK_REGISTRY.keys(), + help="task", + ) + # fmt: on + return parser + + +def add_preprocess_args(parser): + group = parser.add_argument_group("Preprocessing") + # fmt: off + group.add_argument("-s", "--source-lang", default=None, metavar="SRC", + help="source language") + group.add_argument("-t", "--target-lang", default=None, metavar="TARGET", + help="target language") + group.add_argument("--trainpref", metavar="FP", default=None, + help="train file prefix (also used to build dictionaries)") + group.add_argument("--validpref", metavar="FP", default=None, + help="comma separated, valid file prefixes " + "(words missing from train set are replaced with <unk>)") + group.add_argument("--testpref", metavar="FP", default=None, + help="comma separated, test file prefixes " + "(words missing from train set are replaced with <unk>)") + group.add_argument("--align-suffix", metavar="FP", default=None, + help="alignment file suffix") + group.add_argument("--destdir", metavar="DIR", default="data-bin", + help="destination dir") + group.add_argument("--thresholdtgt", metavar="N", default=0, type=int, + help="map words appearing less than threshold times to unknown") + group.add_argument("--thresholdsrc", metavar="N", default=0, type=int, + help="map words appearing less than threshold times to unknown") + group.add_argument("--tgtdict", metavar="FP", + help="reuse given target dictionary") + group.add_argument("--srcdict", metavar="FP", + help="reuse given source dictionary") + group.add_argument("--nwordstgt", metavar="N", default=-1, type=int, + help="number of target words to retain") + group.add_argument("--nwordssrc", metavar="N", default=-1, type=int, + help="number of source words to retain") + group.add_argument("--alignfile", metavar="ALIGN", default=None, + help="an alignment file (optional)") + parser.add_argument('--dataset-impl', metavar='FORMAT', default='mmap', + choices=get_available_dataset_impl(), + help='output dataset implementation') + group.add_argument("--joined-dictionary", action="store_true", + help="Generate joined dictionary") + group.add_argument("--only-source", action="store_true", + help="Only process the source language") + group.add_argument("--padding-factor", metavar="N", default=8, type=int, + help="Pad dictionary size to be multiple of N") + group.add_argument("--workers", metavar="N", default=1, type=int, + help="number of parallel workers") + group.add_argument("--dict-only", action='store_true', + help="if true, only builds a dictionary and then exits") + # fmt: on + return parser + + +def add_dataset_args(parser, train=False, gen=False): + group = parser.add_argument_group("dataset_data_loading") + gen_parser_from_dataclass(group, DatasetConfig()) + # fmt: on + return group + + +def add_distributed_training_args(parser, default_world_size=None): + group = parser.add_argument_group("distributed_training") + if default_world_size is None: + default_world_size = max(1, torch.cuda.device_count()) + gen_parser_from_dataclass( + group, DistributedTrainingConfig(distributed_world_size=default_world_size) + ) + return group + + +def add_optimization_args(parser): + group = parser.add_argument_group("optimization") + # fmt: off + gen_parser_from_dataclass(group, OptimizationConfig()) + # fmt: on + return group + + +def add_checkpoint_args(parser): + group = parser.add_argument_group("checkpoint") + # fmt: off + gen_parser_from_dataclass(group, CheckpointConfig()) + # fmt: on + return group + + +def add_common_eval_args(group): + gen_parser_from_dataclass(group, CommonEvalConfig()) + + +def add_eval_lm_args(parser): + group = parser.add_argument_group("LM Evaluation") + add_common_eval_args(group) + gen_parser_from_dataclass(group, EvalLMConfig()) + + +def add_generation_args(parser): + group = parser.add_argument_group("Generation") + add_common_eval_args(group) + gen_parser_from_dataclass(group, GenerationConfig()) + return group + + +def add_speech_generation_args(parser): + group = parser.add_argument_group("Speech Generation") + add_common_eval_args(group) # NOTE: remove_bpe is not needed + # fmt: off + group.add_argument('--eos_prob_threshold', default=0.5, type=float, + help='terminate when eos probability exceeds this') + # fmt: on + return group + + +def add_interactive_args(parser): + group = parser.add_argument_group("Interactive") + gen_parser_from_dataclass(group, InteractiveConfig()) + + +def add_model_args(parser): + group = parser.add_argument_group("Model configuration") + # fmt: off + + # Model definitions can be found under fairseq/models/ + # + # The model architecture can be specified in several ways. + # In increasing order of priority: + # 1) model defaults (lowest priority) + # 2) --arch argument + # 3) --encoder/decoder-* arguments (highest priority) + from fairseq.models import ARCH_MODEL_REGISTRY + group.add_argument('--arch', '-a', metavar='ARCH', + choices=ARCH_MODEL_REGISTRY.keys(), + help='model architecture') + # fmt: on + return group + + +def get_args( + data: Union[str, Path], + task: str = "translation", + arch: str = "transformer", + **overrides +): + parser = get_training_parser(task) + args = parse_args_and_arch(parser, [str(data), "--task", task, "--arch", arch]) + + for k, v in overrides.items(): + setattr(args, k, v) + + return args + + +def add_ema_args(parser): + group = parser.add_argument_group("EMA configuration") + gen_parser_from_dataclass(group, EMAConfig()) diff --git a/fairseq/fairseq/pdb.py b/fairseq/fairseq/pdb.py new file mode 100644 index 0000000..1ba6ef0 --- /dev/null +++ b/fairseq/fairseq/pdb.py @@ -0,0 +1,47 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import multiprocessing +import os +import pdb +import sys + + +__all__ = ["set_trace"] + + +_stdin = [None] +_stdin_lock = multiprocessing.Lock() +try: + _stdin_fd = sys.stdin.fileno() +except Exception: + _stdin_fd = None + + +class MultiprocessingPdb(pdb.Pdb): + """A Pdb wrapper that works in a multiprocessing environment. + + Usage: `from fairseq import pdb; pdb.set_trace()` + """ + + def __init__(self): + pdb.Pdb.__init__(self, nosigint=True) + + def _cmdloop(self): + stdin_bak = sys.stdin + with _stdin_lock: + try: + if _stdin_fd is not None: + if not _stdin[0]: + _stdin[0] = os.fdopen(_stdin_fd) + sys.stdin = _stdin[0] + self.cmdloop() + finally: + sys.stdin = stdin_bak + + +def set_trace(): + pdb = MultiprocessingPdb() + pdb.set_trace(sys._getframe().f_back) diff --git a/fairseq/fairseq/quantization_utils.py b/fairseq/fairseq/quantization_utils.py new file mode 100644 index 0000000..11fc414 --- /dev/null +++ b/fairseq/fairseq/quantization_utils.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +from fairseq.modules.quantization import pq, quantization_options, scalar +from omegaconf import DictConfig + + +logger = logging.getLogger(__name__) + + +def quantize_model_scalar(model, model_cfg: DictConfig): + quant_noise_scalar = getattr(model_cfg, "quant_noise_scalar", 0) or 0 + if quant_noise_scalar > 0: + # quantize_model edits the model in place + scalar.quantize_model_(model, p=quant_noise_scalar, bits=8, update_step=1000) + return model + + +class Quantizer(object): + def __init__(self, config_path, max_epoch, max_update): + try: + import yaml + except ImportError: + raise ImportError("Please install yaml with: pip install yaml") + + # parse config + if config_path: + with open(config_path) as config_file: + config = quantization_options.parse_config_yaml( + yaml.safe_load(config_file) + ) + else: + config = quantization_options.parse_config_yaml({}) + + self.n_centroids_config = config["n_centroids"] + self.block_sizes_config = config["block_sizes"] + self.layers_to_quantize = config["layers_to_quantize"] + + # We assume that training will run for a fixed number of epochs + # (or updates) and that we should train for equal durations + # between iterations of PQ. + num_iterations = len(self.layers_to_quantize) + if max_epoch > 0: + assert max_epoch % num_iterations == 0, ( + "for iterative PQ, --max-epoch (={}) must be evenly divisible by " + "len(layers_to_quantize) (={})".format(max_epoch, num_iterations) + ) + self.epoch_schedule = max_epoch // num_iterations + else: + self.epoch_schedule = None + if max_update > 0: + assert max_update % num_iterations == 0, ( + "for iterative PQ, --max-update (={}) must be evenly divisible by " + "len(layers_to_quantize) (={})".format(max_update, num_iterations) + ) + self.update_schedule = max_update // num_iterations + else: + self.update_schedule = None + assert (self.epoch_schedule is not None) ^ ( + self.update_schedule is not None + ), "for iterative PQ, cannot specify both --max-update and --max-epoch" + + # 0 is a special value for quantization step, which will force + # the first call to begin_epoch() to call step() + self.quantization_step = 0 + + def set_trainer(self, trainer): + self.trainer = trainer + self.size_tracker = pq.SizeTracker(self.trainer.get_model()) + + def step(self): + """Move to the next stage of quantization.""" + if self.quantization_step >= len(self.layers_to_quantize): + # Maybe we just finished the last training step or we loaded + # a checkpoint for an iterative PQ model which previously + # finished training. Either way, don't quantize again. + return + + logger.info( + "quantizing model (step={}; layers_to_quantize[step]={})".format( + self.quantization_step, self.layers_to_quantize[self.quantization_step] + ) + ) + quantized_layers = pq.quantize_model_( + self.trainer.get_model(), + self.size_tracker, + self.layers_to_quantize, + self.block_sizes_config, + self.n_centroids_config, + step=self.quantization_step, + ) + logger.info("quantized layers: {}".format(quantized_layers)) + logger.info(self.size_tracker) + + self.quantization_step += 1 + + # reintialize the Trainer since model parameters have changed + self.trainer.reinitialize() + + def begin_epoch(self, epoch): + """Called at the beginning of each epoch (epochs start at 1).""" + if ( + ( + self.epoch_schedule is not None + and epoch > 0 + and (epoch - 1) % self.epoch_schedule == 0 + ) + # we always step once in the beginning, even if using + # update-based quantization + or self.quantization_step == 0 + ): + self.step() + + def step_update(self, num_updates): + """Called at the end of each step.""" + if ( + self.update_schedule is not None + and num_updates > 0 + and num_updates % self.update_schedule == 0 + ): + self.step() + + def state_dict(self): + return { + "n_centroids_config": self.n_centroids_config, + "block_sizes_config": self.block_sizes_config, + "layers_to_quantize": self.layers_to_quantize, + "epoch_schedule": self.epoch_schedule, + "update_schedule": self.update_schedule, + "quantization_step": self.quantization_step, + } + + def load_state_dict(self, state_dict): + self.n_centroids_config = state_dict["n_centroids_config"] + self.block_sizes_config = state_dict["block_sizes_config"] + self.layers_to_quantize = state_dict["layers_to_quantize"] + self.epoch_schedule = state_dict["epoch_schedule"] + self.update_schedule = state_dict["update_schedule"] + self.quantization_step = state_dict["quantization_step"] diff --git a/fairseq/fairseq/registry.py b/fairseq/fairseq/registry.py new file mode 100644 index 0000000..904ffcd --- /dev/null +++ b/fairseq/fairseq/registry.py @@ -0,0 +1,104 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from argparse import Namespace + +from typing import Union +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import merge_with_parent +from hydra.core.config_store import ConfigStore +from omegaconf import DictConfig + +REGISTRIES = {} + + +def setup_registry(registry_name: str, base_class=None, default=None, required=False): + assert registry_name.startswith("--") + registry_name = registry_name[2:].replace("-", "_") + + REGISTRY = {} + REGISTRY_CLASS_NAMES = set() + DATACLASS_REGISTRY = {} + + # maintain a registry of all registries + if registry_name in REGISTRIES: + return # registry already exists + REGISTRIES[registry_name] = { + "registry": REGISTRY, + "default": default, + "dataclass_registry": DATACLASS_REGISTRY, + } + + def build_x(cfg: Union[DictConfig, str, Namespace], *extra_args, **extra_kwargs): + if isinstance(cfg, DictConfig): + choice = cfg._name + + if choice and choice in DATACLASS_REGISTRY: + from_checkpoint = extra_kwargs.get("from_checkpoint", False) + dc = DATACLASS_REGISTRY[choice] + cfg = merge_with_parent(dc(), cfg, remove_missing=from_checkpoint) + elif isinstance(cfg, str): + choice = cfg + if choice in DATACLASS_REGISTRY: + cfg = DATACLASS_REGISTRY[choice]() + else: + choice = getattr(cfg, registry_name, None) + if choice in DATACLASS_REGISTRY: + cfg = DATACLASS_REGISTRY[choice].from_namespace(cfg) + + if choice is None: + if required: + raise ValueError("{} is required!".format(registry_name)) + return None + + cls = REGISTRY[choice] + if hasattr(cls, "build_" + registry_name): + builder = getattr(cls, "build_" + registry_name) + else: + builder = cls + + if "from_checkpoint" in extra_kwargs: + del extra_kwargs["from_checkpoint"] + + return builder(cfg, *extra_args, **extra_kwargs) + + def register_x(name, dataclass=None): + def register_x_cls(cls): + if name in REGISTRY: + raise ValueError( + "Cannot register duplicate {} ({})".format(registry_name, name) + ) + if cls.__name__ in REGISTRY_CLASS_NAMES: + raise ValueError( + "Cannot register {} with duplicate class name ({})".format( + registry_name, cls.__name__ + ) + ) + if base_class is not None and not issubclass(cls, base_class): + raise ValueError( + "{} must extend {}".format(cls.__name__, base_class.__name__) + ) + + if dataclass is not None and not issubclass(dataclass, FairseqDataclass): + raise ValueError( + "Dataclass {} must extend FairseqDataclass".format(dataclass) + ) + + cls.__dataclass = dataclass + if cls.__dataclass is not None: + DATACLASS_REGISTRY[name] = cls.__dataclass + + cs = ConfigStore.instance() + node = dataclass() + node._name = name + cs.store(name=name, group=registry_name, node=node, provider="fairseq") + + REGISTRY[name] = cls + + return cls + + return register_x_cls + + return build_x, register_x, REGISTRY, DATACLASS_REGISTRY diff --git a/fairseq/fairseq/scoring/__init__.py b/fairseq/fairseq/scoring/__init__.py new file mode 100644 index 0000000..58f2f56 --- /dev/null +++ b/fairseq/fairseq/scoring/__init__.py @@ -0,0 +1,55 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import importlib +import os +from abc import ABC, abstractmethod + +from fairseq import registry +from omegaconf import DictConfig + + +class BaseScorer(ABC): + def __init__(self, cfg): + self.cfg = cfg + self.ref = [] + self.pred = [] + + def add_string(self, ref, pred): + self.ref.append(ref) + self.pred.append(pred) + + @abstractmethod + def score(self) -> float: + pass + + @abstractmethod + def result_string(self) -> str: + pass + + +_build_scorer, register_scorer, SCORER_REGISTRY, _ = registry.setup_registry( + "--scoring", default="bleu" +) + + +def build_scorer(choice, tgt_dict): + _choice = choice._name if isinstance(choice, DictConfig) else choice + + if _choice == "bleu": + from fairseq.scoring import bleu + + return bleu.Scorer( + bleu.BleuConfig(pad=tgt_dict.pad(), eos=tgt_dict.eos(), unk=tgt_dict.unk()) + ) + return _build_scorer(choice) + + +# automatically import any Python files in the current directory +for file in sorted(os.listdir(os.path.dirname(__file__))): + if file.endswith(".py") and not file.startswith("_"): + module = file[: file.find(".py")] + importlib.import_module("fairseq.scoring." + module) diff --git a/fairseq/fairseq/scoring/bertscore.py b/fairseq/fairseq/scoring/bertscore.py new file mode 100644 index 0000000..6d5a845 --- /dev/null +++ b/fairseq/fairseq/scoring/bertscore.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field + +import numpy as np + +from fairseq.dataclass import FairseqDataclass +from fairseq.scoring import BaseScorer, register_scorer + + +@dataclass +class BertScoreScorerConfig(FairseqDataclass): + bert_score_lang: str = field(default="en", metadata={"help": "BERTScore language"}) + + +@register_scorer("bert_score", dataclass=BertScoreScorerConfig) +class BertScoreScorer(BaseScorer): + def __init__(self, cfg): + super(BertScoreScorer, self).__init__(cfg) + try: + import bert_score as _bert_score + except ImportError: + raise ImportError("Please install BERTScore: pip install bert-score") + + self.cfg = cfg + self._bert_score = _bert_score + self.scores = None + + def add_string(self, ref, pred): + self.ref.append(ref) + self.pred.append(pred) + + def score(self, order=4): + _, _, self.scores = self._bert_score.score( + self.pred, self.ref, lang=self.cfg.bert_score_lang + ) + self.scores = self.scores.numpy() + return np.mean(self.scores) + + def result_string(self, order=4): + return f"BERTScore: {self.score():.4f}" diff --git a/fairseq/fairseq/scoring/bleu.py b/fairseq/fairseq/scoring/bleu.py new file mode 100644 index 0000000..e55bd2f --- /dev/null +++ b/fairseq/fairseq/scoring/bleu.py @@ -0,0 +1,168 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import ctypes +import math +import sys +from dataclasses import dataclass, field + +import torch +from fairseq.dataclass import FairseqDataclass +from fairseq.scoring import BaseScorer, register_scorer +from fairseq.scoring.tokenizer import EvaluationTokenizer + + +class BleuStat(ctypes.Structure): + _fields_ = [ + ("reflen", ctypes.c_size_t), + ("predlen", ctypes.c_size_t), + ("match1", ctypes.c_size_t), + ("count1", ctypes.c_size_t), + ("match2", ctypes.c_size_t), + ("count2", ctypes.c_size_t), + ("match3", ctypes.c_size_t), + ("count3", ctypes.c_size_t), + ("match4", ctypes.c_size_t), + ("count4", ctypes.c_size_t), + ] + + +@dataclass +class SacrebleuConfig(FairseqDataclass): + sacrebleu_tokenizer: EvaluationTokenizer.ALL_TOKENIZER_TYPES = field( + default="13a", metadata={"help": "tokenizer"} + ) + sacrebleu_lowercase: bool = field( + default=False, metadata={"help": "apply lowercasing"} + ) + sacrebleu_char_level: bool = field( + default=False, metadata={"help": "evaluate at character level"} + ) + + +@register_scorer("sacrebleu", dataclass=SacrebleuConfig) +class SacrebleuScorer(BaseScorer): + def __init__(self, cfg): + super(SacrebleuScorer, self).__init__(cfg) + import sacrebleu + + self.sacrebleu = sacrebleu + self.tokenizer = EvaluationTokenizer( + tokenizer_type=cfg.sacrebleu_tokenizer, + lowercase=cfg.sacrebleu_lowercase, + character_tokenization=cfg.sacrebleu_char_level, + ) + + def add_string(self, ref, pred): + self.ref.append(self.tokenizer.tokenize(ref)) + self.pred.append(self.tokenizer.tokenize(pred)) + + def _score(self, order=4): + if order != 4: + raise NotImplementedError + # tokenization and lowercasing are performed by self.tokenizer instead. + return self.sacrebleu.corpus_bleu(self.pred, [self.ref], tokenize="none") + + def score(self, order=4): + return self._score(order).score + + def result_string(self, order=4): + return self._score(order).format() + + +@dataclass +class BleuConfig(FairseqDataclass): + pad: int = field(default=1, metadata={"help": "padding index"}) + eos: int = field(default=2, metadata={"help": "eos index"}) + unk: int = field(default=3, metadata={"help": "unk index"}) + + +@register_scorer("bleu", dataclass=BleuConfig) +class Scorer(object): + def __init__(self, cfg): + self.stat = BleuStat() + self.pad = cfg.pad + self.eos = cfg.eos + self.unk = cfg.unk + + try: + from fairseq import libbleu + except ImportError as e: + sys.stderr.write( + "ERROR: missing libbleu.so. run `pip install --editable .`\n" + ) + raise e + + self.C = ctypes.cdll.LoadLibrary(libbleu.__file__) + + self.reset() + + def reset(self, one_init=False): + if one_init: + self.C.bleu_one_init(ctypes.byref(self.stat)) + else: + self.C.bleu_zero_init(ctypes.byref(self.stat)) + + def add(self, ref, pred): + if not isinstance(ref, torch.IntTensor): + raise TypeError("ref must be a torch.IntTensor (got {})".format(type(ref))) + if not isinstance(pred, torch.IntTensor): + raise TypeError("pred must be a torch.IntTensor(got {})".format(type(pred))) + + # don't match unknown words + rref = ref.clone() + assert not rref.lt(0).any() + rref[rref.eq(self.unk)] = -999 + + rref = rref.contiguous().view(-1) + pred = pred.contiguous().view(-1) + + self.C.bleu_add( + ctypes.byref(self.stat), + ctypes.c_size_t(rref.size(0)), + ctypes.c_void_p(rref.data_ptr()), + ctypes.c_size_t(pred.size(0)), + ctypes.c_void_p(pred.data_ptr()), + ctypes.c_int(self.pad), + ctypes.c_int(self.eos), + ) + + def score(self, order=4): + psum = sum( + math.log(p) if p > 0 else float("-Inf") for p in self.precision()[:order] + ) + return self.brevity() * math.exp(psum / order) * 100 + + def precision(self): + def ratio(a, b): + return a / b if b > 0 else 0 + + return [ + ratio(self.stat.match1, self.stat.count1), + ratio(self.stat.match2, self.stat.count2), + ratio(self.stat.match3, self.stat.count3), + ratio(self.stat.match4, self.stat.count4), + ] + + def brevity(self): + r = self.stat.reflen / self.stat.predlen + return min(1, math.exp(1 - r)) + + def result_string(self, order=4): + assert order <= 4, "BLEU scores for order > 4 aren't supported" + fmt = "BLEU{} = {:2.2f}, {:2.1f}" + for _ in range(1, order): + fmt += "/{:2.1f}" + fmt += " (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})" + bleup = [p * 100 for p in self.precision()[:order]] + return fmt.format( + order, + self.score(order=order), + *bleup, + self.brevity(), + self.stat.predlen / self.stat.reflen, + self.stat.predlen, + self.stat.reflen + ) diff --git a/fairseq/fairseq/scoring/chrf.py b/fairseq/fairseq/scoring/chrf.py new file mode 100644 index 0000000..5df5a1c --- /dev/null +++ b/fairseq/fairseq/scoring/chrf.py @@ -0,0 +1,36 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from dataclasses import dataclass + +from fairseq.dataclass import FairseqDataclass +from fairseq.scoring import BaseScorer, register_scorer + + +@dataclass +class ChrFScorerConfig(FairseqDataclass): + pass + + +@register_scorer("chrf", dataclass=ChrFScorerConfig) +class ChrFScorer(BaseScorer): + def __init__(self, args): + super(ChrFScorer, self).__init__(args) + import sacrebleu + + self.sacrebleu = sacrebleu + + def add_string(self, ref, pred): + self.ref.append(ref) + self.pred.append(pred) + + def score(self, order=4): + return self.result_string(order).score + + def result_string(self, order=4): + if order != 4: + raise NotImplementedError + return self.sacrebleu.corpus_chrf(self.pred, [self.ref]).format() diff --git a/fairseq/fairseq/scoring/meteor.py b/fairseq/fairseq/scoring/meteor.py new file mode 100644 index 0000000..3271995 --- /dev/null +++ b/fairseq/fairseq/scoring/meteor.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +from dataclasses import dataclass + +from fairseq.dataclass import FairseqDataclass +from fairseq.scoring import BaseScorer, register_scorer + + +@dataclass +class MeteorScorerConfig(FairseqDataclass): + pass + + +@register_scorer("meteor", dataclass=MeteorScorerConfig) +class MeteorScorer(BaseScorer): + def __init__(self, args): + super(MeteorScorer, self).__init__(args) + try: + import nltk + except ImportError: + raise ImportError("Please install nltk to use METEOR scorer") + + self.nltk = nltk + self.scores = [] + + def add_string(self, ref, pred): + self.ref.append(ref) + self.pred.append(pred) + + def score(self, order=4): + self.scores = [ + self.nltk.translate.meteor_score.single_meteor_score(r, p) + for r, p in zip(self.ref, self.pred) + ] + return np.mean(self.scores) + + def result_string(self, order=4): + return f"METEOR: {self.score():.4f}" diff --git a/fairseq/fairseq/scoring/tokenizer.py b/fairseq/fairseq/scoring/tokenizer.py new file mode 100644 index 0000000..b0cedd5 --- /dev/null +++ b/fairseq/fairseq/scoring/tokenizer.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unicodedata + +import sacrebleu as sb + +from fairseq.dataclass import ChoiceEnum + +SACREBLEU_V2_ABOVE = int(sb.__version__[0]) >= 2 + + +class EvaluationTokenizer(object): + """A generic evaluation-time tokenizer, which leverages built-in tokenizers + in sacreBLEU (https://github.com/mjpost/sacrebleu). It additionally provides + lowercasing, punctuation removal and character tokenization, which are + applied after sacreBLEU tokenization. + + Args: + tokenizer_type (str): the type of sacreBLEU tokenizer to apply. + lowercase (bool): lowercase the text. + punctuation_removal (bool): remove punctuation (based on unicode + category) from text. + character_tokenization (bool): tokenize the text to characters. + """ + + SPACE = chr(32) + SPACE_ESCAPE = chr(9601) + _ALL_TOKENIZER_TYPES = ( + sb.BLEU.TOKENIZERS + if SACREBLEU_V2_ABOVE + else ["none", "13a", "intl", "zh", "ja-mecab"] + ) + ALL_TOKENIZER_TYPES = ChoiceEnum(_ALL_TOKENIZER_TYPES) + + def __init__( + self, + tokenizer_type: str = "13a", + lowercase: bool = False, + punctuation_removal: bool = False, + character_tokenization: bool = False, + ): + + assert ( + tokenizer_type in self._ALL_TOKENIZER_TYPES + ), f"{tokenizer_type}, {self._ALL_TOKENIZER_TYPES}" + self.lowercase = lowercase + self.punctuation_removal = punctuation_removal + self.character_tokenization = character_tokenization + if SACREBLEU_V2_ABOVE: + self.tokenizer = sb.BLEU(tokenize=str(tokenizer_type)).tokenizer + else: + self.tokenizer = sb.tokenizers.TOKENIZERS[tokenizer_type]() + + @classmethod + def remove_punctuation(cls, sent: str): + """Remove punctuation based on Unicode category.""" + return cls.SPACE.join( + t + for t in sent.split(cls.SPACE) + if not all(unicodedata.category(c)[0] == "P" for c in t) + ) + + def tokenize(self, sent: str): + tokenized = self.tokenizer(sent) + + if self.punctuation_removal: + tokenized = self.remove_punctuation(tokenized) + + if self.character_tokenization: + tokenized = self.SPACE.join( + list(tokenized.replace(self.SPACE, self.SPACE_ESCAPE)) + ) + + if self.lowercase: + tokenized = tokenized.lower() + + return tokenized diff --git a/fairseq/fairseq/scoring/wer.py b/fairseq/fairseq/scoring/wer.py new file mode 100644 index 0000000..633dc47 --- /dev/null +++ b/fairseq/fairseq/scoring/wer.py @@ -0,0 +1,58 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field + +from fairseq.dataclass import FairseqDataclass +from fairseq.scoring import BaseScorer, register_scorer +from fairseq.scoring.tokenizer import EvaluationTokenizer + + +@dataclass +class WerScorerConfig(FairseqDataclass): + wer_tokenizer: EvaluationTokenizer.ALL_TOKENIZER_TYPES = field( + default="none", metadata={"help": "sacreBLEU tokenizer to use for evaluation"} + ) + wer_remove_punct: bool = field( + default=False, metadata={"help": "remove punctuation"} + ) + wer_char_level: bool = field( + default=False, metadata={"help": "evaluate at character level"} + ) + wer_lowercase: bool = field(default=False, metadata={"help": "lowercasing"}) + + +@register_scorer("wer", dataclass=WerScorerConfig) +class WerScorer(BaseScorer): + def __init__(self, cfg): + super().__init__(cfg) + self.reset() + try: + import editdistance as ed + except ImportError: + raise ImportError("Please install editdistance to use WER scorer") + self.ed = ed + self.tokenizer = EvaluationTokenizer( + tokenizer_type=self.cfg.wer_tokenizer, + lowercase=self.cfg.wer_lowercase, + punctuation_removal=self.cfg.wer_remove_punct, + character_tokenization=self.cfg.wer_char_level, + ) + + def reset(self): + self.distance = 0 + self.ref_length = 0 + + def add_string(self, ref, pred): + ref_items = self.tokenizer.tokenize(ref).split() + pred_items = self.tokenizer.tokenize(pred).split() + self.distance += self.ed.eval(ref_items, pred_items) + self.ref_length += len(ref_items) + + def result_string(self): + return f"WER: {self.score():.2f}" + + def score(self): + return 100.0 * self.distance / self.ref_length if self.ref_length > 0 else 0 diff --git a/fairseq/fairseq/search.py b/fairseq/fairseq/search.py new file mode 100644 index 0000000..c7378bb --- /dev/null +++ b/fairseq/fairseq/search.py @@ -0,0 +1,892 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +from typing import List, Optional + +import torch +import torch.nn as nn +from fairseq.token_generation_constraints import ( + ConstraintState, + OrderedConstraintState, + UnorderedConstraintState, +) +from torch import Tensor + + +class Search(nn.Module): + def __init__(self, tgt_dict): + super().__init__() + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() + self.vocab_size = len(tgt_dict) + self.src_lengths = torch.tensor(-1) + self.supports_constraints = False + self.stop_on_max_len = False + + def step( + self, step, lprobs, scores, prev_output_tokens=None, original_batch_idxs=None + ): + """Take a single search step. + + Args: + step: the current search step, starting at 0 + lprobs: (bsz x input_beam_size x vocab_size) + the model's log-probabilities over the vocabulary at the current step + scores: (bsz x input_beam_size x step) + the historical model scores of each hypothesis up to this point + prev_output_tokens: (bsz x step) + the previously generated oputput tokens + original_batch_idxs: (bsz) + the tensor with the batch indices, in the range [0, bsz) + this is useful in case there has been applied a re-ordering + and we need to know the orignal indices + + Return: A tuple of (scores, indices, beams) where: + scores: (bsz x output_beam_size) + the scores of the chosen elements; output_beam_size can be + larger than input_beam_size, e.g., we may return + 2*input_beam_size to account for EOS + indices: (bsz x output_beam_size) + the indices of the chosen elements + beams: (bsz x output_beam_size) + the hypothesis ids of the chosen elements, in the range [0, input_beam_size) + """ + raise NotImplementedError + + @torch.jit.export + def set_src_lengths(self, src_lengths): + self.src_lengths = src_lengths + + @torch.jit.export + def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int): + """Initialize constraint states for constrained decoding (if supported). + + Args: + batch_constraints: (torch.Tensor, optional) + the list of constraints, in packed form + beam_size: (int) + the beam size + Returns: + *encoder_out* rearranged according to *new_order* + """ + pass + + def prune_sentences(self, batch_idxs: Tensor): + """ + Removes constraint states for completed sentences (if supported). + This is called from sequence_generator._generate() when sentences are + deleted from the batch. + + Args: + batch_idxs: Indices of *sentences* whose constraint state should be *kept*. + """ + pass + + def update_constraints(self, active_hypos: Tensor): + """ + Updates the constraint states by selecting the beam items that are retained. + This is called at each time step of sequence_generator._generate() when + the set of 2 * {beam_size} candidate hypotheses are reduced to the beam size. + + Args: + active_hypos: (batch size, beam size) + list of integers denoting, for each sentence, which beam candidate items + should be kept. + """ + pass + + +class BeamSearch(Search): + def __init__(self, tgt_dict): + super().__init__(tgt_dict) + self.constraint_states = None + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores: Optional[Tensor], + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + candidate_multiple: int = 2, + ): + bsz, beam_size, vocab_size = lprobs.size() + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + lprobs = lprobs[:, ::beam_size, :].contiguous() + else: + # make probs contain cumulative scores for each hypothesis + assert scores is not None + lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1) + + top_prediction = torch.topk( + lprobs.view(bsz, -1), + k=min( + # Take the best `candidate_muliple`(default 2) x beam_size predictions. We'll choose the first + # beam_size of these which don't predict eos to continue with. + candidate_multiple * beam_size, + lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad + ), + ) + scores_buf = top_prediction[0] + indices_buf = top_prediction[1] + # Project back into relative indices and beams + beams_buf = torch.div(indices_buf, vocab_size, rounding_mode="trunc") + indices_buf = indices_buf.fmod(vocab_size) + + # At this point, beams_buf and indices_buf are single-dim and contain relative indices + return scores_buf, indices_buf, beams_buf + + +class PrefixConstrainedBeamSearch(Search): + def __init__(self, tgt_dict, prefix_allowed_tokens_fn): + super().__init__(tgt_dict) + self.prefix_allowed_tokens_fn = prefix_allowed_tokens_fn + self.stop_on_max_len = True + + @torch.jit.export + def apply_mask(self, x, prev_output_tokens, original_batch_idxs): + beam_size = x.shape[0] // original_batch_idxs.shape[0] + original_batch_idxs = ( + original_batch_idxs.unsqueeze(-1).repeat((1, beam_size)).flatten().tolist() + ) + + mask = torch.full_like(x, -math.inf) + for sent_i, (sent, batch_i) in enumerate( + zip(prev_output_tokens, original_batch_idxs) + ): + mask[sent_i, :, self.prefix_allowed_tokens_fn(batch_i, sent)] = 0 + + return mask + + @torch.jit.export + def step( + self, + step: int, + lprobs: Tensor, + scores: Tensor, + prev_output_tokens: Tensor, + original_batch_idxs: Tensor, + ): + bsz, beam_size, vocab_size = lprobs.size() + + lprobs += self.apply_mask( + lprobs.view(bsz * beam_size, 1, vocab_size), + prev_output_tokens, + original_batch_idxs, + ).view(bsz, beam_size, vocab_size) + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + lprobs = lprobs[:, ::beam_size, :].contiguous() + else: + # make probs contain cumulative scores for each hypothesis + assert scores is not None + lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1) + + top_prediction = torch.topk( + lprobs.view(bsz, -1), + k=min( + # Take the best beam_size predictions. We'll choose the first + # beam_size of these which don't predict eos to continue with. + beam_size, + lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad + ), + ) + scores_buf = top_prediction[0] + indices_buf = top_prediction[1] + beams_buf = indices_buf // vocab_size + indices_buf = indices_buf.fmod(vocab_size) + return scores_buf, indices_buf, beams_buf + + +class LexicallyConstrainedBeamSearch(Search): + """Implements lexically constrained beam search as described in + + Fast Lexically Constrained Decoding with Dynamic Beam + Allocation for Neural Machine Translation. Post & Vilar, + NAACL 2018. https://www.aclweb.org/anthology/N18-1119/ + + and + + Improved Lexically Constrained Decoding for Translation and + Monolingual Rewriting. Hu et al, NAACL + 2019. https://www.aclweb.org/anthology/N19-1090/ + + This is accomplished by maintaining, for each beam hypothesis, a + ConstraintState object (see constraints.py) that tracks which + constraints have been generated and using this information to + shape the beam for each input sentence. + """ + + def __init__(self, tgt_dict, representation): + super().__init__(tgt_dict) + self.representation = representation + self.vocab_size = len(tgt_dict) + self.num_cands = 0 + self.supports_constraints = True + + @torch.jit.export + def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int): + self.constraint_states = [] + for constraint_tensor in batch_constraints: + if self.representation == "ordered": + constraint_state = OrderedConstraintState.create(constraint_tensor) + elif self.representation == "unordered": + constraint_state = UnorderedConstraintState.create(constraint_tensor) + + self.constraint_states.append([constraint_state for i in range(beam_size)]) + + @torch.jit.export + def prune_sentences(self, batch_idxs: Tensor): + self.constraint_states = [ + self.constraint_states[i] for i in batch_idxs.tolist() + ] + + @torch.jit.export + def update_constraints(self, active_hypos: Tensor): + if self.constraint_states: + batch_size = active_hypos.size(0) + for sentid in range(batch_size): + self.constraint_states[sentid] = [ + self.constraint_states[sentid][i] for i in active_hypos[sentid] + ] + + @torch.jit.export + def step( + self, + step: int, + lprobs: Tensor, + scores: Optional[Tensor], + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + """ + A constrained step builds a large candidates list from the following: + - the top 2 * {beam_size} items over the whole beam + - for each item in the beam + - the top {each_k} (default 1) + - all next constraints + We then compute the constrained state of each beam item, and assign + stripe codes: 0 to the best in each bank, 1 to the 2nd-best, and so + on. We then sort by (stripe, score), and truncate the list at + 2 * beam size. + + Args: + step: the decoder step + lprobs: (batch size, beam size, target vocab) + the target-vocab distributions for each item in the beam. + Retrun: A tuple of (scores, indices, beams, constraints) where: + scores: (batch, output beam size) + the scores of the chosen elements + indices: (batch, output beam size) + the target vocab indices of the chosen elements + beams: (batch, output beam size) + the 0-indexed hypothesis ids of the chosen elements + constraints: (batch, output beam size) + the new constraint states + """ + each_k = 1 + device = lprobs.device + + batch_size, beam_size, vocab_size = lprobs.size() + + self.num_cands = min( + # Just take the k-best. We'll get another k from the 1-best from each + # row, plus more from the constraints + beam_size * 2, + lprobs.view(batch_size, -1).size(1) - 1, # -1 so we never select pad + ) + + # STEP 0: Preliminary. Prevent EOS for unfinished hyps across all batch items + constraint_states = self.constraint_states + if constraint_states and step > 0: + not_finished_indices = [] + for sentno, sent_constraints in enumerate(constraint_states): + for beamno, state in enumerate(sent_constraints): + index = sentno * beam_size + beamno + if not state.finished: + not_finished_indices.append(index) + not_finished_indices = torch.tensor(not_finished_indices) + if not_finished_indices.numel() > 0: + lprobs.view(batch_size * beam_size, -1)[ + not_finished_indices, self.eos + ] = -math.inf + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam entry for each batch item + lprobs = lprobs[:, ::beam_size, :].contiguous() + else: + # make probs contain cumulative scores for each hypothesis + assert scores is not None + lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1) + + top_prediction = torch.topk( + lprobs.view(batch_size, -1), + self.num_cands, + ) + scores_buf, indices_buf = top_prediction + # Project back into relative indices and beams + beams_buf = indices_buf // vocab_size + indices_buf = indices_buf.fmod(vocab_size) + + # Short circuit if there are no constraints in this batch + if not constraint_states: + return scores_buf, indices_buf, beams_buf + + # STEP 1: get top-1 from each hypothesis across all sentences in the batch + if step > 0: + top_scores, top_indices = torch.topk( + lprobs.view(batch_size * beam_size, -1), + k=each_k, + dim=1, + ) + top_scores = top_scores.view(batch_size, -1) + top_indices = top_indices.view(batch_size, -1) + scores_buf = torch.cat((scores_buf, top_scores), dim=1) + indices_buf = torch.cat((indices_buf, top_indices), dim=1) + new_beams = torch.arange(0, beam_size, device=device).repeat(batch_size, 1) + beams_buf = torch.cat((beams_buf, new_beams), dim=1) + + # Now, process sentences in the batch one by one. + new_scores_buf = torch.zeros((batch_size, 2 * beam_size), device=device) + new_indices_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long() + new_beams_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long() + for sentno, states in enumerate(constraint_states): + scores, indices, beams, new_states = self.step_sentence( + step, + sentno, + lprobs[sentno], + constraint_states[sentno], + beams_buf[sentno].clone(), + indices_buf[sentno].clone(), + scores_buf[sentno].clone(), + ) + new_scores_buf[sentno] = scores + new_indices_buf[sentno] = indices + new_beams_buf[sentno] = beams + self.constraint_states[sentno] = new_states + + return new_scores_buf, new_indices_buf, new_beams_buf + + @torch.jit.export + def step_sentence( + self, + step: int, + sentno: int, + lprobs: Tensor, + constraint_states: List[List[ConstraintState]], + beams_buf: Tensor, + indices_buf: Tensor, + scores_buf: Tensor, + ): + """Does per-sentence processing. Adds all constraints for each + hypothesis to the list of candidates; then removes duplicates, + sorts, and dynamically stripes across the banks. All tensor inputs + are collapsed to those pertaining to a single input sentence. + """ + device = lprobs.device + + # STEP 2: Add all constraints for each beam item + for beamno, state in enumerate(constraint_states): + next_tokens = torch.tensor(list(state.next_tokens()), device=device).long() + if next_tokens.numel() != 0: + indices_buf = torch.cat((indices_buf, next_tokens)) + next_beams = ( + torch.tensor(beamno, device=device) + .repeat(next_tokens.size(0)) + .long() + ) + beams_buf = torch.cat((beams_buf, next_beams)) + next_values = lprobs[beamno].take(next_tokens.view(-1)) + scores_buf = torch.cat((scores_buf, next_values)) + + # At the 0th time step, there is just one beam item + if step == 0: + break + + # STEP 3: Compute the "bank" for each candidate. This is the + # number of constraints it's generated. We need this so that + # we can do round-robin allocation of the beam across these + # banks. If C is the number of constraints, we select the best + # item in bank C, then the best in bank C-1, etc, followed by + # the 2nd-best in bank C, the 2nd-best in bank C-1, etc, and so + # on, until the maximum beam size. We accomplish this by + # creating a sort key and striping across the banks. + + # Compute the new states for all candidates + cands_size = indices_buf.size(0) + constraint_states = [ + constraint_states[beams_buf[i]].advance(indices_buf[i]) + for i in range(cands_size) + ] + + banks = torch.tensor([state.bank for state in constraint_states], device=device) + + # STEP 4: Sort + num_constraint_tokens = len(state.tokens) + + # Sort by keys (bank, score) (i.e., sort banks together, and scores + # within banks). AFAIK pytorch doesn't support either stable sort or + # multi-key sorting, so we have to hack this. + MAX_SCORE = -100 + sort_key = (num_constraint_tokens - banks) * MAX_SCORE + scores_buf + sort_values, sort_indices = sort_key.sort(dim=0, descending=True) + scores_buf = scores_buf[sort_indices] + indices_buf = indices_buf[sort_indices] + beams_buf = beams_buf[sort_indices] + banks = banks[sort_indices] + + # Sort the constraints to follow suit + constraint_states = [constraint_states[i] for i in sort_indices] + + # STEP 5: Remove duplicates. The topk calls (overall and + # per-row) plus the per-row generation of constraints will + # produce duplicates. Here we remove them. + + def roll(t): + """Rolls a 1d tensor left by 1. + + [0, 1, 2, 3, 4] becomes [4, 0, 1, 2, 3] + """ + return torch.cat((t[-1].unsqueeze(0), t[0:-1]), dim=0) + + # We map candidates (beam, token_id) to a single dimension. + # This is then shifted by 1. We can then easily identify + # duplicates and create a mask that identifies unique + # extensions. + uniques_mask = beams_buf * (self.vocab_size + 1) + indices_buf + uniques_mask = roll(uniques_mask) != uniques_mask + + # Use the mask to pare down the data structures + scores_buf = torch.masked_select(scores_buf, uniques_mask) + indices_buf = torch.masked_select(indices_buf, uniques_mask) + beams_buf = torch.masked_select(beams_buf, uniques_mask) + banks = torch.masked_select(banks, uniques_mask) + i = 1 + for mask in uniques_mask[1:]: + if not mask: + constraint_states.pop(i) + i += mask + + # STEP 6: Assign IDs round-robin across banks, sort, and + # truncate. Now that the candidates are sorted by (bank, + # score) and uniqed, we dynamically allocate the {beam_size} + # beam by striping across the candidates. These stripes will + # be used as sort keys to do round-robin selection. This is + # accomplished in a single pass with offsets. Sorting by + # highest-banks (furthest-along hypotheses) first ensures + # progress through the constraints. + # + # e.g., BANKS: 3 3 3 2 2 2 2 1 1 1 0 0 + # OLD STRIPES: 0 1 2 0 1 2 3 0 1 2 0 1 + # NEW STRIPES: 0 1+4 2+8 0+1 1+5 2+9 3+11 0+2 1+6 2+10 0+3 1+7 + # = 0 5 10 1 6 11 13 2 7 12 3 8 + # + # Sorting by this then gives the following banks: + # + # 3 2 1 0 3 2 1 0 3 2 1 2 + # + # We'll take the top {beam_size} of these. + stripe_offsets = [offset * (len(banks) + 1) for offset in range(len(banks) + 1)] + stripes = torch.zeros_like(banks) + cur_bank_count = -1 + cur_bank = banks[0] + for i, bank in enumerate(banks): + if bank != cur_bank: + cur_bank_count = 0 + cur_bank = bank + else: + cur_bank_count += 1 + stripes[i] = num_constraint_tokens - bank + stripe_offsets[cur_bank_count] + + # STEP 7: Sort by the stripes values + sort_values, sort_indices = stripes.sort(dim=0) + scores_buf = scores_buf[sort_indices] + indices_buf = indices_buf[sort_indices] + beams_buf = beams_buf[sort_indices] + constraint_states = [constraint_states[i] for i in sort_indices] + + # STEP 8: Truncate to the candidates size! + scores_buf = scores_buf[: self.num_cands] + indices_buf = indices_buf[: self.num_cands] + beams_buf = beams_buf[: self.num_cands] + + return scores_buf, indices_buf, beams_buf, constraint_states + + +class LengthConstrainedBeamSearch(Search): + def __init__(self, tgt_dict, min_len_a, min_len_b, max_len_a, max_len_b): + super().__init__(tgt_dict) + self.min_len_a = min_len_a + self.min_len_b = min_len_b + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.beam = BeamSearch(tgt_dict) + self.needs_src_lengths = True + + def step( + self, + step: int, + lprobs, + scores, + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + min_lens = self.min_len_a * self.src_lengths + self.min_len_b + max_lens = self.max_len_a * self.src_lengths + self.max_len_b + lprobs[step < min_lens, :, self.eos] = -math.inf + lprobs[step >= max_lens, :, self.eos] = 0 + return self.beam.step(step, lprobs, scores) + + +class DiverseBeamSearch(Search): + """Diverse Beam Search. + + See "Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence + Models" for details. + + We implement cumulative diversity penalty here as default, optionally provide Hamming diversity described + in the original paper, and a way to interpolate between the two through diversity_discount. + + Take the example below for illustration of cumulative diversity implemented. + A) I like dogs. + B) I like ____. + C) There are ___. + And we are at step=2, trying to fill in the blank: + + Hamming diversity: + Penalty for B from A is 1 for "dogs" and 0 for any other words like "cats". + Penalty for C from A is 1 for "dogs" and 0 for any other words like "cats". + + Cumulative diversity (default): + Penalty for B from A is 3 for "dogs" and 0 for any other words like "cats". + Penalty for C from A is 1 for "dogs" and 0 for any other words like "cats". + B and C differ because B matches with A for "I" and "like" at respective steps incurring 2 cumulative penalty. + + Using divesrity_discount to interpolate between the two: + if diverstiy_discount = 0.5, then + Penalty for B from A is 1.75 (1 + 0.5 + 0.25) for "dogs" and 0 for any other words like "cats". + Penalty for C from A is 1 for "dogs" and 0 for any other words like "cats". + "I" and "like" matched for B and A at step 0 and 1 respectively. Since "I" is two steps away and "like" is one step away, they are discounted by (0.5)^2 and 0.5 respectively. + When diversity_discount = 0, we recover Hammning diversity and when diversity_discount = 1, we recover cumulative diversity. + + NB: During beam search for each diversity group, `candidate_mutiple` is set to 1 rather than BeamSearch default(2). + This is to ensure we have final `beam_size` candidates so that no diversity groups would be dropped during final token selection in sequence generation. + For full backwards compatibility, use diversity_discount=0 and candidate_multiple=2. + + """ + + def __init__( + self, + tgt_dict, + num_groups, + diversity_strength, + diversity_discount=1.0, + candidate_multiple=1, + ): + super().__init__(tgt_dict) + self.num_groups = num_groups + self.diversity_strength = -diversity_strength + self.beam = BeamSearch(tgt_dict) + self.diversity_discount = diversity_discount + self.candidate_multiple = candidate_multiple + + # Float tensor to keep track of overlap between groups. + # Each token shared at the same step between two groups is counted as one. + # Then token counts are discounted by `diversity_discount` for every next timestep. + # Once initialized, dimension is batch_size * num_groups * num_groups. + self.group_overlap = torch.empty(0) + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores, + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + bsz, beam_size, vocab_size = lprobs.size() + if beam_size % self.num_groups != 0: + raise ValueError( + "DiverseBeamSearch requires --beam to be divisible by the number of groups" + ) + + # initialize diversity penalty + diversity_buf = torch.zeros(lprobs[:, 0, :].size()).to(lprobs) + + scores_G, beams_G = [], [] + + # pre-allocating tensor for indices for all groups + indices_G_stacked = torch.empty( + bsz, + int(beam_size / self.num_groups) * self.candidate_multiple, + self.num_groups, + dtype=torch.long, + device=lprobs.device, + ) + + for g in range(self.num_groups): + lprobs_g = lprobs[:, g :: self.num_groups, :] + scores_g = scores[:, g :: self.num_groups, :] if step > 0 else None + + diversity_buf.zero_() + # apply diversity penalty + if g > 0: + indices_ = indices_G_stacked[:, :, :g] + if step > 0: + penalty_val = 1 + self.group_overlap[original_batch_idxs, g, :g] + penalty_val = penalty_val.unsqueeze(1) + else: + penalty_val = torch.ones(bsz, 1, 1) + diversity_buf.scatter_add_( + 1, + indices_.reshape(bsz, -1), + penalty_val.expand(indices_.size()) + .reshape(bsz, -1) + .to(diversity_buf), + ) + + lprobs_g = torch.add( + lprobs_g, + other=diversity_buf.unsqueeze(1), + alpha=self.diversity_strength, + ) + else: + lprobs_g = lprobs_g.contiguous() + + scores_buf, indices_buf, beams_buf = self.beam.step( + step, lprobs_g, scores_g, candidate_multiple=self.candidate_multiple + ) + beams_buf.mul_(self.num_groups).add_(g) + + scores_G.append(scores_buf.clone()) + beams_G.append(beams_buf.clone()) + + indices_G_stacked[:, :, g] = indices_buf + + # interleave results from different groups + scores_buf = torch.stack(scores_G, dim=2).view(bsz, -1) + indices_buf = indices_G_stacked.view(bsz, -1) + beams_buf = torch.stack(beams_G, dim=2).view(bsz, -1) + # find num of overlapped tokens for each group pair + # then discount it for next timestamp + overlap = self.diversity_discount * torch.sum( + indices_G_stacked.unsqueeze(2).eq(indices_G_stacked.unsqueeze(3)), dim=1 + ) + if step == 0: + self.group_overlap = overlap + else: + self.group_overlap[original_batch_idxs] = ( + self.group_overlap[original_batch_idxs] * self.diversity_discount + + overlap + ) + + return scores_buf, indices_buf, beams_buf + + +class Sampling(Search): + sampling_topk: int + sampling_topp: float + + def __init__(self, tgt_dict, sampling_topk=-1, sampling_topp=-1.0): + super().__init__(tgt_dict) + self.sampling_topk = sampling_topk + self.sampling_topp = sampling_topp + + def _sample_topp(self, lprobs): + """Sample among the smallest set of elements whose cumulative probability mass exceeds p. + + See `"The Curious Case of Neural Text Degeneration" + (Holtzman et al., 2019) <https://arxiv.org/abs/1904.09751>`_. + + Args: + lprobs: (bsz x input_beam_size x vocab_size) + the model's log-probabilities over the vocabulary at the current step + + Return: A tuple of (trimed_probs, truncated_indices) where: + trimed_probs: (bsz x input_beam_size x ?) + the model's probabilities over the elements selected to sample from. The + width of the third dimension is determined by top-P. + truncated_indices: (bsz x input_beam_size x ?) + the indices of the chosen elements. + """ + probs = lprobs.exp_() + + # sort the last dimension (vocab dimension) in descending order + sorted_probs, sorted_indices = probs.sort(descending=True) + + # compute a mask to indicate the words to be included in the top-P set. + cumsum_probs = sorted_probs.cumsum(dim=2) + mask = cumsum_probs.lt(self.sampling_topp) + + # note that mask was computed by 'lt'. One more word needs to be included + # so that the cumulative probability mass can exceed p. + cumsum_mask = mask.cumsum(dim=2) + last_included = cumsum_mask[:, :, -1:] + last_included.clamp_(0, mask.size()[2] - 1) + mask = mask.scatter_(2, last_included, 1) + + # truncate unnecessary dims. + max_dim = last_included.max() + truncated_mask = mask[:, :, : max_dim + 1] + truncated_probs = sorted_probs[:, :, : max_dim + 1] + truncated_indices = sorted_indices[:, :, : max_dim + 1] + + # trim the words that are not in top-P by setting their probabilities + # to 0, so that they would not be sampled later. + trim_mask = ~truncated_mask + trimed_probs = truncated_probs.masked_fill_(trim_mask, 0) + return trimed_probs, truncated_indices + + @torch.jit.export + def step( + self, + step: int, + lprobs, + scores, + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + bsz, beam_size, vocab_size = lprobs.size() + + if step == 0: + # at the first step all hypotheses are equally likely, so use + # only the first beam + lprobs = lprobs[:, ::beam_size, :].contiguous() + + if self.sampling_topp > 0: + # only sample from the smallest set of words whose cumulative probability mass exceeds p + probs, top_indices = self._sample_topp(lprobs) + elif self.sampling_topk > 0: + # only sample from top-k candidates + lprobs, top_indices = lprobs.topk(self.sampling_topk) + probs = lprobs.exp_() + else: + probs = lprobs.exp_() + + # dummy data to be consistent with true branch for type check + top_indices = torch.empty(0).to(probs) + # sample + if step == 0: + indices_buf = torch.multinomial( + probs.view(bsz, -1), + beam_size, + replacement=True, + ).view(bsz, beam_size) + else: + indices_buf = torch.multinomial( + probs.view(bsz * beam_size, -1), + 1, + replacement=True, + ).view(bsz, beam_size) + + if step == 0: + # expand to beam size + probs = probs.expand(bsz, beam_size, -1) + + # gather scores + scores_buf = torch.gather(probs, dim=2, index=indices_buf.unsqueeze(-1)) + scores_buf = scores_buf.log_().view(bsz, -1) + + # remap indices if using top-k or top-P sampling + if self.sampling_topk > 0 or self.sampling_topp > 0: + indices_buf = torch.gather( + top_indices.expand(bsz, beam_size, -1), + dim=2, + index=indices_buf.unsqueeze(-1), + ).squeeze(2) + + if step == 0: + beams_buf = indices_buf.new_zeros(bsz, beam_size) + else: + beams_buf = torch.arange(0, beam_size).to(indices_buf).repeat(bsz, 1) + # make scores cumulative + scores_buf.add_( + torch.gather(scores[:, :, step - 1], dim=1, index=beams_buf) + ) + + return scores_buf, indices_buf, beams_buf + + +class DiverseSiblingsSearch(Search): + """ + Beam search with diverse siblings. + + See "A Simple, Fast Diverse Decoding Algorithm for Neural Generation" for details. + https://arxiv.org/abs/1611.08562 + + 1/ Calculate hypotheses for each beam + 2/ Intra-sibling ordering + 3/ Rewrite scores + 4/ Choose top K hypotheses + + if diversity_rate == 0 is equivalent to BeamSearch + """ + + def __init__(self, tgt_dict, diversity_rate): + super().__init__(tgt_dict) + self.diversity_rate = diversity_rate + self.beam = BeamSearch(tgt_dict) + + def step( + self, + step: int, + lprobs, + scores, + prev_output_tokens: Optional[Tensor] = None, + original_batch_idxs: Optional[Tensor] = None, + ): + bsz, beam_size, vocab_size = lprobs.size() + k = min( + # Take the best 2 x beam_size predictions. We'll choose the first + # beam_size of these which don't predict eos to continue with. + beam_size * 2, + lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad + ) + s_list: List[Tensor] + i_list: List[Tensor] + s_list = [torch.empty(0).to(lprobs) for i in range(beam_size)] + i_list = [torch.LongTensor().to(device=lprobs.device) for i in range(beam_size)] + sibling_score = torch.arange(1, k + 1).to(lprobs) * self.diversity_rate + + if step == 0: + return self.beam.step(step, lprobs, scores) + lprobs.add_(scores[:, :, step - 1].unsqueeze(-1)) + + # 1/ Calculate hypotheses for each beam + for i in range(beam_size): + torch.topk(lprobs[:, i, :].view(bsz, -1), k, out=(s_list[i], i_list[i])) + i_list[i].fmod_(vocab_size) + + # 2/ Intra-sibling ordering by default from topk + 3/ Rewrite scores + s_list[i].sub_(sibling_score) + + # 4/ Choose top K hypotheses + indices = torch.stack(i_list, dim=1).view(bsz, -1) + + final_scores = torch.empty(0).to(lprobs) + final_indices = torch.LongTensor().to(device=lprobs.device) + final_beams = torch.LongTensor().to(device=lprobs.device) + (final_scores, final_indices) = torch.topk( + torch.stack(s_list, dim=1).view(bsz, -1), + k, + ) + + final_beams = final_indices // k + + for i in range(bsz): + final_indices[i] = indices[i][final_indices[i]] + + return final_scores, final_indices, final_beams diff --git a/fairseq/fairseq/sequence_generator.py b/fairseq/fairseq/sequence_generator.py new file mode 100644 index 0000000..78db504 --- /dev/null +++ b/fairseq/fairseq/sequence_generator.py @@ -0,0 +1,1020 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import sys +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from fairseq.ngram_repeat_block import NGramRepeatBlock + + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + tokens_to_suppress=(), + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + + self.token_indices_to_suppress: Optional[Tensor] = None + token_indices_to_suppress = [] + for token_string in tokens_to_suppress: + token_index = tgt_dict.index(token_string) + assert token_index != self.unk + token_indices_to_suppress.append(token_index) + if len(token_indices_to_suppress) > 0: + self.token_indices_to_suppress = torch.Tensor( + token_indices_to_suppress + ).long() + + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.model.set_decoder_beam_size(self.beam_size) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + if self.lm_model is not None: + self.lm_model.eval() + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate( + self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs + ) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + # if src_lengths exists in net_input (speech_to_text dataset case), then use it + if "src_lengths" in net_input: + src_lengths = net_input["src_lengths"] + else: + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)) + .long() + .sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception( + "expected src_tokens or source in net input. input keys: " + + str(net_input.keys()) + ) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.model.forward_encoder(net_input) + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + with torch.autograd.profiler.record_function( + "EnsembleModel: forward_decoder" + ): + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + ) + + if self.lm_model is not None: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs += probs + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + else: + if step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + if self.token_indices_to_suppress is not None: + lprobs[:, self.token_indices_to_suppress] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.tensor(-math.inf).to(lprobs) + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx) + + unfin_idx = torch.div(bbsz_idx, beam_size, rounding_mode="trunc") + sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx) + + # Create a set of "{sent}{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # For every finished beam item + # sentence index in the current (possibly reduced) batch + seen = (sent << 32) + unfin_idx + unique_seen: List[int] = torch.unique(seen).tolist() + + if self.match_source_len: + condition = step > torch.index_select(src_lengths, 0, unfin_idx) + eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores) + sent_list: List[int] = sent.tolist() + for i in range(bbsz_idx.size()[0]): + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent_list[i]]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent_list[i]].append( + { + "tokens": tokens_clone[i], + "score": eos_scores[i], + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + for unique_s in unique_seen: + # check termination conditions for this sentence + unique_sent: int = unique_s >> 32 + unique_unfin_idx: int = unique_s - (unique_sent << 32) + + if not finished[unique_sent] and self.is_finished( + step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size + ): + finished[unique_sent] = True + newly_finished.append(unique_unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min( + [ + m.max_decoder_positions() + for m in self.models + if hasattr(m, "max_decoder_positions") + ] + + [sys.maxsize] + ) + + def set_decoder_beam_size(self, beam_size): + """Set beam size for efficient beamable enc-dec attention.""" + if beam_size > 1: + for model in self.models: + if hasattr(model, "set_beam_size"): + model.set_beam_size(beam_size) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/fairseq/fairseq/sequence_scorer.py b/fairseq/fairseq/sequence_scorer.py new file mode 100644 index 0000000..411d4df --- /dev/null +++ b/fairseq/fairseq/sequence_scorer.py @@ -0,0 +1,153 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + +import torch +from fairseq import utils + + +class SequenceScorer(object): + """Scores the target for a given source sentence.""" + + def __init__( + self, + tgt_dict, + softmax_batch=None, + compute_alignment=False, + eos=None, + symbols_to_strip_from_output=None, + ): + self.pad = tgt_dict.pad() + self.eos = tgt_dict.eos() if eos is None else eos + self.softmax_batch = softmax_batch or sys.maxsize + assert self.softmax_batch > 0 + self.compute_alignment = compute_alignment + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + """Score a batch of translations.""" + net_input = sample["net_input"] + + def batch_for_softmax(dec_out, target): + # assumes decoder_out[0] is the only thing needed (may not be correct for future models!) + first, rest = dec_out[0], dec_out[1:] + bsz, tsz, dim = first.shape + if bsz * tsz < self.softmax_batch: + yield dec_out, target, True + else: + flat = first.contiguous().view(1, -1, dim) + flat_tgt = target.contiguous().view(flat.shape[:-1]) + s = 0 + while s < flat.size(1): + e = s + self.softmax_batch + yield (flat[:, s:e],) + rest, flat_tgt[:, s:e], False + s = e + + def gather_target_probs(probs, target): + probs = probs.gather( + dim=2, + index=target.unsqueeze(-1), + ) + return probs + + orig_target = sample["target"] + + # compute scores for each model in the ensemble + avg_probs = None + avg_attn = None + for model in models: + model.eval() + decoder_out = model(**net_input) + attn = decoder_out[1] if len(decoder_out) > 1 else None + if type(attn) is dict: + attn = attn.get("attn", None) + + batched = batch_for_softmax(decoder_out, orig_target) + probs, idx = None, 0 + for bd, tgt, is_single in batched: + sample["target"] = tgt + curr_prob = model.get_normalized_probs( + bd, log_probs=len(models) == 1, sample=sample + ).data + if is_single: + probs = gather_target_probs(curr_prob, orig_target) + else: + if probs is None: + probs = curr_prob.new(orig_target.numel()) + step = curr_prob.size(0) * curr_prob.size(1) + end = step + idx + tgt_probs = gather_target_probs( + curr_prob.view(tgt.shape + (curr_prob.size(-1),)), tgt + ) + probs[idx:end] = tgt_probs.view(-1) + idx = end + sample["target"] = orig_target + + probs = probs.view(sample["target"].shape) + + if avg_probs is None: + avg_probs = probs + else: + avg_probs.add_(probs) + if attn is not None: + if torch.is_tensor(attn): + attn = attn.data + else: + attn = attn[0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(models) > 1: + avg_probs.div_(len(models)) + avg_probs.log_() + if avg_attn is not None: + avg_attn.div_(len(models)) + + bsz = avg_probs.size(0) + hypos = [] + start_idxs = sample["start_indices"] if "start_indices" in sample else [0] * bsz + for i in range(bsz): + # remove padding from ref + ref = ( + utils.strip_pad(sample["target"][i, start_idxs[i] :], self.pad) + if sample["target"] is not None + else None + ) + tgt_len = ref.numel() + avg_probs_i = avg_probs[i][start_idxs[i] : start_idxs[i] + tgt_len] + score_i = avg_probs_i.sum() / tgt_len + if avg_attn is not None: + avg_attn_i = avg_attn[i] + if self.compute_alignment: + alignment = utils.extract_hard_alignment( + avg_attn_i, + sample["net_input"]["src_tokens"][i], + sample["target"][i], + self.pad, + self.eos, + ) + else: + alignment = None + else: + avg_attn_i = alignment = None + hypos.append( + [ + { + "tokens": ref, + "score": score_i, + "attention": avg_attn_i, + "alignment": alignment, + "positional_scores": avg_probs_i, + } + ] + ) + return hypos diff --git a/fairseq/fairseq/speech_generator.py b/fairseq/fairseq/speech_generator.py new file mode 100644 index 0000000..f2cc8b5 --- /dev/null +++ b/fairseq/fairseq/speech_generator.py @@ -0,0 +1,427 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch + +from fairseq.data.audio.speech_to_text_dataset import S2TDataConfig + + +class SpeechGenerator(object): + def __init__(self, model, vocoder, data_cfg: S2TDataConfig): + self.model = model + self.vocoder = vocoder + stats_npz_path = data_cfg.global_cmvn_stats_npz + self.gcmvn_stats = None + if stats_npz_path is not None: + self.gcmvn_stats = np.load(stats_npz_path) + + def gcmvn_denormalize(self, x): + # x: B x T x C + if self.gcmvn_stats is None: + return x + mean = torch.from_numpy(self.gcmvn_stats["mean"]).to(x) + std = torch.from_numpy(self.gcmvn_stats["std"]).to(x) + assert len(x.shape) == 3 and mean.shape[0] == std.shape[0] == x.shape[2] + x = x * std.view(1, 1, -1).expand_as(x) + return x + mean.view(1, 1, -1).expand_as(x) + + def get_waveform(self, feat): + # T x C -> T + return None if self.vocoder is None else self.vocoder(feat).squeeze(0) + + +class AutoRegressiveSpeechGenerator(SpeechGenerator): + def __init__( + self, + model, + vocoder, + data_cfg, + max_iter: int = 6000, + eos_prob_threshold: float = 0.5, + ): + super().__init__(model, vocoder, data_cfg) + self.max_iter = max_iter + self.eos_prob_threshold = eos_prob_threshold + + @torch.no_grad() + def generate(self, model, sample, has_targ=False, **kwargs): + model.eval() + + src_tokens = sample["net_input"]["src_tokens"] + src_lengths = sample["net_input"]["src_lengths"] + bsz, src_len = src_tokens.size()[:2] + n_frames_per_step = model.decoder.n_frames_per_step + out_dim = model.decoder.out_dim + raw_dim = out_dim // n_frames_per_step + + # initialize + encoder_out = model.forward_encoder( + src_tokens, src_lengths, speaker=sample["speaker"] + ) + incremental_state = {} + feat, attn, eos_prob = [], [], [] + finished = src_tokens.new_zeros((bsz,)).bool() + out_lens = src_lengths.new_zeros((bsz,)).long().fill_(self.max_iter) + + prev_feat_out = encoder_out["encoder_out"][0].new_zeros(bsz, 1, out_dim) + for step in range(self.max_iter): + cur_out_lens = out_lens.clone() + cur_out_lens.masked_fill_(cur_out_lens.eq(self.max_iter), step + 1) + _, cur_eos_out, cur_extra = model.forward_decoder( + prev_feat_out, + encoder_out=encoder_out, + incremental_state=incremental_state, + target_lengths=cur_out_lens, + speaker=sample["speaker"], + **kwargs, + ) + cur_eos_prob = torch.sigmoid(cur_eos_out).squeeze(2) + feat.append(cur_extra["feature_out"]) + attn.append(cur_extra["attn"]) + eos_prob.append(cur_eos_prob) + + cur_finished = cur_eos_prob.squeeze(1) > self.eos_prob_threshold + out_lens.masked_fill_((~finished) & cur_finished, step + 1) + finished = finished | cur_finished + if finished.sum().item() == bsz: + break + prev_feat_out = cur_extra["feature_out"] + + feat = torch.cat(feat, dim=1) + feat = model.decoder.postnet(feat) + feat + eos_prob = torch.cat(eos_prob, dim=1) + attn = torch.cat(attn, dim=2) + alignment = attn.max(dim=1)[1] + + feat = feat.reshape(bsz, -1, raw_dim) + feat = self.gcmvn_denormalize(feat) + + eos_prob = eos_prob.repeat_interleave(n_frames_per_step, dim=1) + attn = attn.repeat_interleave(n_frames_per_step, dim=2) + alignment = alignment.repeat_interleave(n_frames_per_step, dim=1) + out_lens = out_lens * n_frames_per_step + + finalized = [ + { + "feature": feat[b, :out_len], + "eos_prob": eos_prob[b, :out_len], + "attn": attn[b, :, :out_len], + "alignment": alignment[b, :out_len], + "waveform": self.get_waveform(feat[b, :out_len]), + } + for b, out_len in zip(range(bsz), out_lens) + ] + + if has_targ: + assert sample["target"].size(-1) == out_dim + tgt_feats = sample["target"].view(bsz, -1, raw_dim) + tgt_feats = self.gcmvn_denormalize(tgt_feats) + tgt_lens = sample["target_lengths"] * n_frames_per_step + for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)): + finalized[b]["targ_feature"] = f[:l] + finalized[b]["targ_waveform"] = self.get_waveform(f[:l]) + return finalized + + +class MultiDecoderSpeechGenerator(SpeechGenerator): + def __init__( + self, + models, + args, + vocoder, + data_cfg, + tgt_dict_mt, + max_iter: int = 6000, + eos_prob_threshold: float = 0.5, + eos_mt=None, + symbols_to_strip_from_output=None, + ): + super().__init__(models[0], vocoder, data_cfg) + self.max_iter = max_iter + self.eos_prob_threshold = eos_prob_threshold + + self.tgt_dict_mt = tgt_dict_mt + self.eos_mt = eos_mt + + from examples.speech_to_speech.unity.sequence_generator import SequenceGenerator + from fairseq import search + + self.text_generator = SequenceGenerator( + models, + tgt_dict_mt, + beam_size=max(1, getattr(args, "beam", 5)), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + search_strategy=search.BeamSearch(tgt_dict_mt), + eos=eos_mt, + symbols_to_strip_from_output=symbols_to_strip_from_output, + ) + + @torch.no_grad() + def generate(self, model, sample, has_targ=False, **kwargs): + model.eval() + + src_tokens = sample["net_input"]["src_tokens"] + src_lengths = sample["net_input"]["src_lengths"] + bsz, src_len = src_tokens.size()[:2] + n_frames_per_step = model.decoder.n_frames_per_step + out_dim = model.decoder.out_dim + raw_dim = out_dim // n_frames_per_step + + # initialize + encoder_out = model.forward_encoder( + src_tokens, src_lengths, speaker=sample["speaker"] + ) + + prefix_tokens = None + constraints = None + bos_token = None + + mt_decoder = getattr(model, f"{model.mt_task_name}_decoder") + + # 1. MT decoder + finalized_mt = self.text_generator.generate_decoder( + [encoder_out], + src_tokens, + src_lengths, + sample, + prefix_tokens, + constraints, + bos_token, + aux_task_name=model.mt_task_name, + ) + + # extract decoder output corresponding to the best hypothesis + max_tgt_len = max([len(hypo[0]["tokens"]) for hypo in finalized_mt]) + prev_output_tokens_mt = ( + src_tokens.new_zeros(src_tokens.shape[0], max_tgt_len) + .fill_(mt_decoder.padding_idx) + .int() + ) # B x T + for i, hypo in enumerate(finalized_mt): + i_beam = 0 + tmp = hypo[i_beam]["tokens"].int() # hyp + eos + prev_output_tokens_mt[i, 0] = self.text_generator.eos + if tmp[-1] == self.text_generator.eos: + tmp = tmp[:-1] + prev_output_tokens_mt[i, 1 : len(tmp) + 1] = tmp + + text = "".join([self.tgt_dict_mt[c] for c in tmp]) + text = text.replace("_", " ") + text = text.replace("▁", " ") + text = text.replace("<unk>", " ") + text = text.replace("<s>", "") + text = text.replace("</s>", "") + if len(text) > 0 and text[0] == " ": + text = text[1:] + sample_id = sample["id"].tolist()[i] + print("{} (None-{})".format(text, sample_id)) + + mt_decoder_out = mt_decoder( + prev_output_tokens_mt, + encoder_out=encoder_out, + features_only=True, + ) + x = mt_decoder_out[0].transpose(0, 1) + + mt_decoder_padding_mask = None + if prev_output_tokens_mt.eq(mt_decoder.padding_idx).any(): + mt_decoder_padding_mask = prev_output_tokens_mt.eq(mt_decoder.padding_idx) + + # 2. TTS encoder + if getattr(model, "synthesizer_encoder", None) is not None: + synthesizer_encoder_out = model.synthesizer_encoder( + x, + mt_decoder_padding_mask, + ) + else: + synthesizer_encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [mt_decoder_padding_mask] + if mt_decoder_padding_mask is not None + else [], # B x T + "encoder_embedding": [], + "encoder_states": [], + "src_tokens": [], + "src_lengths": [], + } + + # 3. TTS decoder + incremental_state = {} + feat, attn, eos_prob = [], [], [] + finished = src_tokens.new_zeros((bsz,)).bool() + out_lens = src_lengths.new_zeros((bsz,)).long().fill_(self.max_iter) + + prev_feat_out = encoder_out["encoder_out"][0].new_zeros(bsz, 1, out_dim) + for step in range(self.max_iter): + cur_out_lens = out_lens.clone() + cur_out_lens.masked_fill_(cur_out_lens.eq(self.max_iter), step + 1) + _, cur_eos_out, cur_extra = model.forward_decoder( + prev_feat_out, + encoder_out=synthesizer_encoder_out, + incremental_state=incremental_state, + target_lengths=cur_out_lens, + speaker=sample["speaker"], + **kwargs, + ) + cur_eos_prob = torch.sigmoid(cur_eos_out).squeeze(2) + feat.append(cur_extra["feature_out"]) + attn.append(cur_extra["attn"]) + eos_prob.append(cur_eos_prob) + + cur_finished = cur_eos_prob.squeeze(1) > self.eos_prob_threshold + out_lens.masked_fill_((~finished) & cur_finished, step + 1) + finished = finished | cur_finished + if finished.sum().item() == bsz: + break + prev_feat_out = cur_extra["feature_out"] + + feat = torch.cat(feat, dim=1) + feat = model.decoder.postnet(feat) + feat + eos_prob = torch.cat(eos_prob, dim=1) + attn = torch.cat(attn, dim=2) + alignment = attn.max(dim=1)[1] + + feat = feat.reshape(bsz, -1, raw_dim) + feat = self.gcmvn_denormalize(feat) + + eos_prob = eos_prob.repeat_interleave(n_frames_per_step, dim=1) + attn = attn.repeat_interleave(n_frames_per_step, dim=2) + alignment = alignment.repeat_interleave(n_frames_per_step, dim=1) + out_lens = out_lens * n_frames_per_step + + finalized = [ + { + "feature": feat[b, :out_len], + "eos_prob": eos_prob[b, :out_len], + "attn": attn[b, :, :out_len], + "alignment": alignment[b, :out_len], + "waveform": self.get_waveform(feat[b, :out_len]), + } + for b, out_len in zip(range(bsz), out_lens) + ] + + if has_targ: + assert sample["target"].size(-1) == out_dim + tgt_feats = sample["target"].view(bsz, -1, raw_dim) + tgt_feats = self.gcmvn_denormalize(tgt_feats) + tgt_lens = sample["target_lengths"] * n_frames_per_step + for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)): + finalized[b]["targ_feature"] = f[:l] + finalized[b]["targ_waveform"] = self.get_waveform(f[:l]) + return finalized + + +class NonAutoregressiveSpeechGenerator(SpeechGenerator): + @torch.no_grad() + def generate(self, model, sample, has_targ=False, **kwargs): + model.eval() + + bsz, max_src_len = sample["net_input"]["src_tokens"].size() + n_frames_per_step = model.encoder.n_frames_per_step + out_dim = model.encoder.out_dim + raw_dim = out_dim // n_frames_per_step + + feat, feat_post, out_lens, log_dur_out, _, _ = model( + src_tokens=sample["net_input"]["src_tokens"], + src_lengths=sample["net_input"]["src_lengths"], + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + incremental_state=None, + target_lengths=sample["target_lengths"], + speaker=sample["speaker"], + ) + if feat_post is not None: + feat = feat_post + + feat = feat.view(bsz, -1, raw_dim) + feat = self.gcmvn_denormalize(feat) + + dur_out = torch.clamp(torch.round(torch.exp(log_dur_out) - 1).long(), min=0) + + def get_dur_plot_data(d): + r = [] + for i, dd in enumerate(d): + r += [i + 1] * dd.item() + return r + + out_lens = out_lens * n_frames_per_step + finalized = [ + { + "feature": feat[b, :l] if l > 0 else feat.new_zeros([1, raw_dim]), + "waveform": self.get_waveform( + feat[b, :l] if l > 0 else feat.new_zeros([1, raw_dim]) + ), + "attn": feat.new_tensor(get_dur_plot_data(dur_out[b])), + } + for b, l in zip(range(bsz), out_lens) + ] + + if has_targ: + tgt_feats = sample["target"].view(bsz, -1, raw_dim) + tgt_feats = self.gcmvn_denormalize(tgt_feats) + tgt_lens = sample["target_lengths"] * n_frames_per_step + for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)): + finalized[b]["targ_feature"] = f[:l] + finalized[b]["targ_waveform"] = self.get_waveform(f[:l]) + return finalized + + +class TeacherForcingAutoRegressiveSpeechGenerator(AutoRegressiveSpeechGenerator): + @torch.no_grad() + def generate(self, model, sample, has_targ=False, **kwargs): + model.eval() + + src_tokens = sample["net_input"]["src_tokens"] + src_lens = sample["net_input"]["src_lengths"] + prev_out_tokens = sample["net_input"]["prev_output_tokens"] + tgt_lens = sample["target_lengths"] + n_frames_per_step = model.decoder.n_frames_per_step + raw_dim = model.decoder.out_dim // n_frames_per_step + bsz = src_tokens.shape[0] + + feat, eos_prob, extra = model( + src_tokens, + src_lens, + prev_out_tokens, + incremental_state=None, + target_lengths=tgt_lens, + speaker=sample["speaker"], + ) + + attn = extra["attn"] # B x T_s x T_t + alignment = attn.max(dim=1)[1] + feat = feat.reshape(bsz, -1, raw_dim) + feat = self.gcmvn_denormalize(feat) + eos_prob = eos_prob.repeat_interleave(n_frames_per_step, dim=1) + attn = attn.repeat_interleave(n_frames_per_step, dim=2) + alignment = alignment.repeat_interleave(n_frames_per_step, dim=1) + tgt_lens = sample["target_lengths"] * n_frames_per_step + + finalized = [ + { + "feature": feat[b, :tgt_len], + "eos_prob": eos_prob[b, :tgt_len], + "attn": attn[b, :, :tgt_len], + "alignment": alignment[b, :tgt_len], + "waveform": self.get_waveform(feat[b, :tgt_len]), + } + for b, tgt_len in zip(range(bsz), tgt_lens) + ] + + if has_targ: + tgt_feats = sample["target"].view(bsz, -1, raw_dim) + tgt_feats = self.gcmvn_denormalize(tgt_feats) + for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)): + finalized[b]["targ_feature"] = f[:l] + finalized[b]["targ_waveform"] = self.get_waveform(f[:l]) + return finalized diff --git a/fairseq/fairseq/tasks/__init__.py b/fairseq/fairseq/tasks/__init__.py new file mode 100644 index 0000000..6da1f00 --- /dev/null +++ b/fairseq/fairseq/tasks/__init__.py @@ -0,0 +1,138 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import argparse +import importlib +import os + +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import merge_with_parent +from hydra.core.config_store import ConfigStore + +from .fairseq_task import FairseqTask, LegacyFairseqTask # noqa + + +# register dataclass +TASK_DATACLASS_REGISTRY = {} +TASK_REGISTRY = {} +TASK_CLASS_NAMES = set() + + +def setup_task(cfg: FairseqDataclass, **kwargs): + task = None + task_name = getattr(cfg, "task", None) + + if isinstance(task_name, str): + # legacy tasks + task = TASK_REGISTRY[task_name] + if task_name in TASK_DATACLASS_REGISTRY: + dc = TASK_DATACLASS_REGISTRY[task_name] + cfg = dc.from_namespace(cfg) + else: + task_name = getattr(cfg, "_name", None) + + if task_name and task_name in TASK_DATACLASS_REGISTRY: + remove_missing = "from_checkpoint" in kwargs and kwargs["from_checkpoint"] + dc = TASK_DATACLASS_REGISTRY[task_name] + cfg = merge_with_parent(dc(), cfg, remove_missing=remove_missing) + task = TASK_REGISTRY[task_name] + + assert ( + task is not None + ), f"Could not infer task type from {cfg}. Available argparse tasks: {TASK_REGISTRY.keys()}. Available hydra tasks: {TASK_DATACLASS_REGISTRY.keys()}" + + return task.setup_task(cfg, **kwargs) + + +def register_task(name, dataclass=None): + """ + New tasks can be added to fairseq with the + :func:`~fairseq.tasks.register_task` function decorator. + + For example:: + + @register_task('classification') + class ClassificationTask(FairseqTask): + (...) + + .. note:: + + All Tasks must implement the :class:`~fairseq.tasks.FairseqTask` + interface. + + Args: + name (str): the name of the task + """ + + def register_task_cls(cls): + if name in TASK_REGISTRY: + return TASK_REGISTRY[name] + + if not issubclass(cls, FairseqTask): + raise ValueError( + "Task ({}: {}) must extend FairseqTask".format(name, cls.__name__) + ) + if cls.__name__ in TASK_CLASS_NAMES: + raise ValueError( + "Cannot register task with duplicate class name ({})".format( + cls.__name__ + ) + ) + TASK_REGISTRY[name] = cls + TASK_CLASS_NAMES.add(cls.__name__) + + if dataclass is not None and not issubclass(dataclass, FairseqDataclass): + raise ValueError( + "Dataclass {} must extend FairseqDataclass".format(dataclass) + ) + + cls.__dataclass = dataclass + if dataclass is not None: + TASK_DATACLASS_REGISTRY[name] = dataclass + + cs = ConfigStore.instance() + node = dataclass() + node._name = name + cs.store(name=name, group="task", node=node, provider="fairseq") + + return cls + + return register_task_cls + + +def get_task(name): + return TASK_REGISTRY[name] + + +def import_tasks(tasks_dir, namespace): + for file in os.listdir(tasks_dir): + path = os.path.join(tasks_dir, file) + if ( + not file.startswith("_") + and not file.startswith(".") + and (file.endswith(".py") or os.path.isdir(path)) + ): + task_name = file[: file.find(".py")] if file.endswith(".py") else file + importlib.import_module(namespace + "." + task_name) + + # expose `task_parser` for sphinx + if task_name in TASK_REGISTRY: + parser = argparse.ArgumentParser(add_help=False) + group_task = parser.add_argument_group("Task name") + # fmt: off + group_task.add_argument('--task', metavar=task_name, + help='Enable this task with: ``--task=' + task_name + '``') + # fmt: on + group_args = parser.add_argument_group( + "Additional command-line arguments" + ) + TASK_REGISTRY[task_name].add_args(group_args) + globals()[task_name + "_parser"] = parser + + +# automatically import any Python files in the tasks/ directory +tasks_dir = os.path.dirname(__file__) +import_tasks(tasks_dir, "fairseq.tasks") diff --git a/fairseq/fairseq/tasks/audio_classification.py b/fairseq/fairseq/tasks/audio_classification.py new file mode 100644 index 0000000..4c21d23 --- /dev/null +++ b/fairseq/fairseq/tasks/audio_classification.py @@ -0,0 +1,269 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +from collections import OrderedDict +import itertools +import logging +import os +import sys +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +import torch +from omegaconf import II, MISSING +from sklearn import metrics as sklearn_metrics + +from fairseq.data import AddTargetDataset, Dictionary, FileAudioDataset +from fairseq.data.multi_corpus_dataset import MultiCorpusDataset +from fairseq.data.text_compressor import TextCompressionLevel, TextCompressor +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks.audio_pretraining import AudioPretrainingConfig, AudioPretrainingTask +from fairseq.tasks.audio_finetuning import label_len_fn, LabelEncoder + +from .. import utils +from ..logging import metrics +from . import FairseqTask, register_task + +logger = logging.getLogger(__name__) + +@dataclass +class AudioClassificationConfig(AudioPretrainingConfig): + target_dictionary: Optional[str] = field( + default=None, metadata={"help": "override default dictionary location"} + ) + + +@register_task("audio_classification", dataclass=AudioClassificationConfig) +class AudioClassificationTask(AudioPretrainingTask): + """Task for audio classification tasks.""" + + cfg: AudioClassificationConfig + + def __init__( + self, + cfg: AudioClassificationConfig, + ): + super().__init__(cfg) + self.state.add_factory("target_dictionary", self.load_target_dictionary) + logging.info(f"=== Number of labels = {len(self.target_dictionary)}") + + def load_target_dictionary(self): + if self.cfg.labels: + target_dictionary = self.cfg.data + if self.cfg.target_dictionary: # override dict + target_dictionary = self.cfg.target_dictionary + dict_path = os.path.join(target_dictionary, f"dict.{self.cfg.labels}.txt") + logger.info("Using dict_path : {}".format(dict_path)) + return Dictionary.load(dict_path, add_special_symbols=False) + return None + + def load_dataset( + self, split: str, task_cfg: AudioClassificationConfig = None, **kwargs + ): + super().load_dataset(split, task_cfg, **kwargs) + task_cfg = task_cfg or self.cfg + assert task_cfg.labels is not None + text_compression_level = getattr( + TextCompressionLevel, str(self.cfg.text_compression_level) + ) + data_path = self.cfg.data + if task_cfg.multi_corpus_keys is None: + label_path = os.path.join(data_path, f"{split}.{task_cfg.labels}") + skipped_indices = getattr(self.datasets[split], "skipped_indices", set()) + text_compressor = TextCompressor(level=text_compression_level) + with open(label_path, "r") as f: + labels = [ + text_compressor.compress(l) + for i, l in enumerate(f) + if i not in skipped_indices + ] + + assert len(labels) == len(self.datasets[split]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.datasets[split])}) do not match" + ) + + process_label = LabelEncoder(self.target_dictionary) + + self.datasets[split] = AddTargetDataset( + self.datasets[split], + labels, + pad=self.target_dictionary.pad(), + eos=self.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + label_len_fn=label_len_fn, + add_to_input=False, + # text_compression_level=text_compression_level, + ) + else: + target_dataset_map = OrderedDict() + + multi_corpus_keys = [ + k.strip() for k in task_cfg.multi_corpus_keys.split(",") + ] + corpus_idx_map = {k: idx for idx, k in enumerate(multi_corpus_keys)} + + data_keys = [k.split(":") for k in split.split(",")] + + multi_corpus_sampling_weights = [ + float(val.strip()) + for val in task_cfg.multi_corpus_sampling_weights.split(",") + ] + data_weights = [] + for key, file_name in data_keys: + k = key.strip() + label_path = os.path.join( + data_path, f"{file_name.strip()}.{task_cfg.labels}" + ) + skipped_indices = getattr( + self.dataset_map[split][k], "skipped_indices", set() + ) + text_compressor = TextCompressor(level=text_compression_level) + with open(label_path, "r") as f: + labels = [ + text_compressor.compress(l) + for i, l in enumerate(f) + if i not in skipped_indices + ] + + assert len(labels) == len(self.dataset_map[split][k]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.dataset_map[split][k])}) do not match" + ) + + process_label = LabelEncoder(self.target_dictionary) + + # TODO: Remove duplication of code from the if block above + target_dataset_map[k] = AddTargetDataset( + self.dataset_map[split][k], + labels, + pad=self.target_dictionary.pad(), + eos=self.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + label_len_fn=label_len_fn, + add_to_input=False, + # text_compression_level=text_compression_level, + ) + + data_weights.append(multi_corpus_sampling_weights[corpus_idx_map[k]]) + + if len(target_dataset_map) == 1: + self.datasets[split] = list(target_dataset_map.values())[0] + else: + self.datasets[split] = MultiCorpusDataset( + target_dataset_map, + distribution=data_weights, + seed=0, + sort_indices=True, + ) + + @property + def source_dictionary(self): + return None + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.state.target_dictionary + + def train_step(self, sample, model, *args, **kwargs): + sample["target"] = sample["target"].to(dtype=torch.long) + loss, sample_size, logging_output = super().train_step( + sample, model, *args, **kwargs + ) + self._log_metrics(sample, model, logging_output) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + sample["target"] = sample["target"].to(dtype=torch.long) + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + self._log_metrics(sample, model, logging_output) + return loss, sample_size, logging_output + + def _log_metrics(self, sample, model, logging_output): + metrics = self._inference_with_metrics( + sample, + model, + ) + """ + logging_output["_precision"] = metrics["precision"] + logging_output["_recall"] = metrics["recall"] + logging_output["_f1"] = metrics["f1"] + logging_output["_eer"] = metrics["eer"] + logging_output["_accuracy"] = metrics["accuracy"] + """ + logging_output["_correct"] = metrics["correct"] + logging_output["_total"] = metrics["total"] + + def _inference_with_metrics(self, sample, model): + def _compute_eer(target_list, lprobs): + # from scipy.optimize import brentq + # from scipy.interpolate import interp1d + + y_one_hot = np.eye(len(self.state.target_dictionary))[target_list] + fpr, tpr, thresholds = sklearn_metrics.roc_curve( + y_one_hot.ravel(), lprobs.ravel() + ) + # Revisit the interpolation approach. + # eer = brentq(lambda x: 1.0 - x - interp1d(fpr, tpr)(x), 0.0, 1.0) + + fnr = 1 - tpr + eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + + return eer + + with torch.no_grad(): + net_output = model(**sample["net_input"]) + lprobs = ( + model.get_normalized_probs(net_output, log_probs=True).cpu().detach() + ) + target_list = sample["target"][:, 0].detach().cpu() + predicted_list = torch.argmax(lprobs, 1).detach().cpu() # B,C->B + + metrics = { + "correct": torch.sum(target_list == predicted_list).item(), + "total": len(target_list), + } + return metrics + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + zero = torch.scalar_tensor(0.0) + correct, total = 0, 0 + for log in logging_outputs: + correct += log.get("_correct", zero) + total += log.get("_total", zero) + metrics.log_scalar("_correct", correct) + metrics.log_scalar("_total", total) + + if total > 0: + def _fn_accuracy(meters): + if meters["_total"].sum > 0: + return utils.item(meters["_correct"].sum / meters["_total"].sum) + return float("nan") + + metrics.log_derived("accuracy", _fn_accuracy) + """ + prec_sum, recall_sum, f1_sum, acc_sum, eer_sum = 0.0, 0.0, 0.0, 0.0, 0.0 + for log in logging_outputs: + prec_sum += log.get("_precision", zero).item() + recall_sum += log.get("_recall", zero).item() + f1_sum += log.get("_f1", zero).item() + acc_sum += log.get("_accuracy", zero).item() + eer_sum += log.get("_eer", zero).item() + + metrics.log_scalar("avg_precision", prec_sum / len(logging_outputs)) + metrics.log_scalar("avg_recall", recall_sum / len(logging_outputs)) + metrics.log_scalar("avg_f1", f1_sum / len(logging_outputs)) + metrics.log_scalar("avg_accuracy", acc_sum / len(logging_outputs)) + metrics.log_scalar("avg_eer", eer_sum / len(logging_outputs)) + """ \ No newline at end of file diff --git a/fairseq/fairseq/tasks/audio_finetuning.py b/fairseq/fairseq/tasks/audio_finetuning.py new file mode 100644 index 0000000..d79553c --- /dev/null +++ b/fairseq/fairseq/tasks/audio_finetuning.py @@ -0,0 +1,404 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +from fairseq.data.multi_corpus_dataset import MultiCorpusDataset +import torch +import json + +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Optional, Any, OrderedDict + +from fairseq.data import AddTargetDataset, Dictionary, encoders +from fairseq.tasks.audio_pretraining import AudioPretrainingTask, AudioPretrainingConfig +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.configs import GenerationConfig +from fairseq.data.text_compressor import TextCompressor, TextCompressionLevel + +from . import register_task +from .. import utils +from ..logging import metrics + + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary): + self.dictionary = dictionary + + def __call__(self, label): + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False + ) + + +def label_len_fn(label): + return len(label.split(" ")) + + +@dataclass +class AudioFinetuningConfig(AudioPretrainingConfig): + # Options for reporting WER metrics during validation. Only applicable to + # Seq2Seq models during fine-tuning + eval_wer: bool = field( + default=False, metadata={"help": "compute WER for Seq2Seq models"} + ) + eval_wer_config: GenerationConfig = field( + default_factory=lambda: GenerationConfig(), + metadata={"help": "beam search config for evaluating wer during training"}, + ) + eval_wer_tokenizer: Any = field( + default=None, + metadata={"help": "tokenizer config for evaluating wer during training"}, + ) + eval_wer_post_process: str = field( + default="letter", + metadata={ + "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" + }, + ) + eval_bleu: bool = field( + default=False, metadata={"help": "evaluation with BLEU scores"} + ) + eval_bleu_detok: Optional[str] = field( + default=None, + metadata={ + "help": "detokenize before computing BLEU (e.g., 'moses'); " + "required if using --eval-bleu; use 'space' to disable " + "detokenization; see fairseq.data.encoders for other options" + }, + ) + eval_bleu_detok_args: str = field( + default="{}", metadata={"help": "args for building the tokenizer, if needed"} + ) + eval_tokenized_bleu: bool = field( + default=False, metadata={"help": "compute tokenized BLEU instead of sacrebleu"} + ) + eval_bleu_remove_bpe: Optional[str] = field( + default=None, metadata={"help": "remove BPE before computing BLEU"} + ) + eval_bleu_args: str = field( + default="{}", + metadata={ + "help": "generation args for BLUE scoring, e.g., " + '\'{"beam": 4, "lenpen": 0.6}\'' + }, + ) + eval_bleu_print_samples: bool = field( + default=False, metadata={"help": "print sample generations during validation"} + ) + autoregressive: bool = field( + default=False, + metadata={ + "help": "required for autoregressive decoders (like seq2seq models); " + "adds 'prev_output_tokens' to input and appends eos to target" + }, + ) + rebuild_batches: bool = True + target_dictionary: Optional[str] = field( + default=None, + metadata={ + "help": "override default dictionary location" + } + ) + +@register_task("audio_finetuning", dataclass=AudioFinetuningConfig) +class AudioFinetuningTask(AudioPretrainingTask): + """ """ + + cfg: AudioFinetuningConfig + + def __init__( + self, + cfg: AudioFinetuningConfig, + ): + super().__init__(cfg) + self.blank_symbol = "<s>" + + self.state.add_factory("target_dictionary", self.load_target_dictionary) + + def load_target_dictionary(self): + if self.cfg.labels: + target_dictionary = self.cfg.data + if self.cfg.target_dictionary: # override dict + target_dictionary = self.cfg.target_dictionary + dict_path = os.path.join(target_dictionary, f"dict.{self.cfg.labels}.txt") + logger.info('Using dict_path : {}'.format(dict_path)) + return Dictionary.load(dict_path) + return None + + def load_dataset( + self, split: str, task_cfg: AudioFinetuningConfig = None, **kwargs + ): + super().load_dataset(split, task_cfg, **kwargs) + + task_cfg = task_cfg or self.cfg + assert task_cfg.labels is not None + text_compression_level = getattr( + TextCompressionLevel, str(self.cfg.text_compression_level) + ) + data_path = self.cfg.data + if task_cfg.multi_corpus_keys is None: + label_path = os.path.join(data_path, f"{split}.{task_cfg.labels}") + skipped_indices = getattr(self.datasets[split], "skipped_indices", set()) + text_compressor = TextCompressor(level=text_compression_level) + with open(label_path, "r") as f: + labels = [ + text_compressor.compress(l) + for i, l in enumerate(f) + if i not in skipped_indices + ] + + assert len(labels) == len(self.datasets[split]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.datasets[split])}) do not match" + ) + + process_label = LabelEncoder(self.target_dictionary) + + self.datasets[split] = AddTargetDataset( + self.datasets[split], + labels, + pad=self.target_dictionary.pad(), + eos=self.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + label_len_fn=label_len_fn, + add_to_input=task_cfg.get("autoregressive", False), + text_compression_level=text_compression_level, + ) + else: + + target_dataset_map = OrderedDict() + + multi_corpus_keys = [k.strip() for k in task_cfg.multi_corpus_keys.split(",")] + corpus_idx_map = {k: idx for idx, k in enumerate(multi_corpus_keys)} + + data_keys = [k.split(":") for k in split.split(",")] + + multi_corpus_sampling_weights = [float(val.strip()) for val in task_cfg.multi_corpus_sampling_weights.split(",")] + data_weights = [] + for key, file_name in data_keys: + k = key.strip() + label_path = os.path.join(data_path, f"{file_name.strip()}.{task_cfg.labels}") + skipped_indices = getattr(self.dataset_map[split][k], "skipped_indices", set()) + text_compressor = TextCompressor(level=text_compression_level) + with open(label_path, "r") as f: + labels = [ + text_compressor.compress(l) + for i, l in enumerate(f) + if i not in skipped_indices + ] + + assert len(labels) == len(self.dataset_map[split][k]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.dataset_map[split][k])}) do not match" + ) + + process_label = LabelEncoder(self.target_dictionary) + + # TODO: Remove duplication of code from the if block above + target_dataset_map[k] = AddTargetDataset( + self.dataset_map[split][k], + labels, + pad=self.target_dictionary.pad(), + eos=self.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + label_len_fn=label_len_fn, + add_to_input=task_cfg.get("autoregressive", False), + text_compression_level=text_compression_level, + ) + + data_weights.append(multi_corpus_sampling_weights[corpus_idx_map[k]]) + + if len(target_dataset_map) == 1: + self.datasets[split] = list(target_dataset_map.values())[0] + else: + self.datasets[split] = MultiCorpusDataset(target_dataset_map, distribution=data_weights, seed=0, sort_indices=True) + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.state.target_dictionary + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + if self.cfg.eval_wer and self.cfg.autoregressive: + metrics = self._inference_with_wer(self.sequence_generator, sample, model) + logging_output["_num_char_errors"] = metrics["num_char_errors"] + logging_output["_num_chars"] = metrics["num_chars"] + logging_output["_num_word_errors"] = metrics["num_word_errors"] + logging_output["_num_words"] = metrics["num_words"] + if self.cfg.eval_bleu and self.cfg.autoregressive: + metrics = self._inference_with_bleu(self.sequence_generator, sample, model) + logging_output["_bleu_sys_len"] = metrics.sys_len + logging_output["_bleu_ref_len"] = metrics.ref_len + # we split counts into separate entries so that they can be + # summed efficiently across workers using fast-stat-sync + assert len(metrics.counts) == 4 + for i in range(4): + logging_output[f"_bleu_counts_{i}"] = metrics.counts[i] + logging_output[f"_bleu_totals_{i}"] = metrics.totals[i] + return loss, sample_size, logging_output + + def build_model(self, model_cfg: FairseqDataclass, from_checkpoint=False): + model = super().build_model(model_cfg, from_checkpoint) + + if self.cfg.eval_wer and self.cfg.autoregressive: + self.sequence_generator = self.build_generator( + [model], + self.cfg.eval_wer_config, + ) + if self.cfg.eval_wer_tokenizer: + self.tokenizer = encoders.build_tokenizer(self.cfg.eval_wer_tokenizer) + else: + self.tokenizer = None + if self.cfg.eval_bleu and self.cfg.autoregressive: + assert self.cfg.eval_bleu_detok is not None, ( + "--eval-bleu-detok is required if using --eval-bleu; " + "try --eval-bleu-detok=moses (or --eval-bleu-detok=space " + "to disable detokenization, e.g., when using sentencepiece)" + ) + detok_args = json.loads(self.cfg.eval_bleu_detok_args) + self.tokenizer = encoders.build_tokenizer( + Namespace(tokenizer=self.cfg.eval_bleu_detok, **detok_args) + ) + gen_args = json.loads(self.cfg.eval_bleu_args) + gen_args = Namespace(**gen_args) + self.sequence_generator = self.build_generator([model], gen_args) + + return model + + def _inference_with_wer(self, generator, sample, model): + import editdistance + + def decode(toks): + s = self.target_dictionary.string( + toks.int().cpu(), + self.cfg.eval_wer_post_process, + escape_unk=True, + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + num_word_errors, num_char_errors = 0, 0 + num_chars, num_words = 0, 0 + gen_out = self.inference_step(generator, [model], sample, None) + for i in range(len(gen_out)): + hyp = decode(gen_out[i][0]["tokens"]) + ref = decode( + utils.strip_pad(sample["target"][i], self.target_dictionary.pad()), + ) + num_char_errors += editdistance.eval(hyp, ref) + num_chars += len(ref) + hyp_words = hyp.split() + ref_words = ref.split() + num_word_errors += editdistance.eval(hyp_words, ref_words) + num_words += len(ref_words) + + return { + "num_char_errors": num_char_errors, + "num_chars": num_chars, + "num_word_errors": num_word_errors, + "num_words": num_words, + } + + def _inference_with_bleu(self, generator, sample, model): + import sacrebleu + + def decode(toks, is_ref): + s = self.target_dictionary.string( + toks.int().cpu(), + self.cfg.eval_bleu_remove_bpe, + # The default unknown string in fairseq is `<unk>`, but + # this is tokenized by sacrebleu as `< unk >`, inflating + # BLEU scores. Instead, we use a somewhat more verbose + # alternative that is unlikely to appear in the real + # reference, but doesn't get split into multiple tokens. + unk_string=("UNKNOWNTOKENINREF" if is_ref else "UNKNOWNTOKENINHYP"), + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + gen_out = self.inference_step(generator, [model], sample) + hyps, refs = [], [] + for i in range(len(gen_out)): + hyps.append(decode(gen_out[i][0]["tokens"], is_ref=False)) + refs.append( + decode( + utils.strip_pad(sample["target"][i], self.target_dictionary.pad()), + is_ref=True, # don't count <unk> as matches to the hypo + ) + ) + if self.cfg.eval_bleu_print_samples: + logger.info("H-{} {}".format(sample["id"][0], hyps[0])) + logger.info("T-{} {}".format(sample["id"][0], refs[0])) + + eval_tokenization = "none" if self.cfg.eval_tokenized_bleu else "13a" + return sacrebleu.corpus_bleu(hyps, [refs], tokenize=eval_tokenization) + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + if self.cfg.eval_wer: + zero = torch.scalar_tensor(0.0) + num_char_errors = sum( + log.get("_num_char_errors", zero) for log in logging_outputs + ) + num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs) + num_word_errors = sum( + log.get("_num_word_errors", zero) for log in logging_outputs + ) + num_words = sum(log.get("_num_words", zero) for log in logging_outputs) + metrics.log_scalar("_num_char_errors", num_char_errors) + metrics.log_scalar("_num_chars", num_chars) + metrics.log_scalar("_num_word_errors", num_word_errors) + metrics.log_scalar("_num_words", num_words) + if num_chars > 0: + metrics.log_derived( + "uer", + lambda meters: meters["_num_char_errors"].sum + * 100.0 + / meters["_num_chars"].sum + if meters["_num_chars"].sum > 0 + else float("nan"), + ) + if num_words > 0: + metrics.log_derived( + "wer", + lambda meters: meters["_num_word_errors"].sum + * 100.0 + / meters["_num_words"].sum + if meters["_num_words"].sum > 0 + else float("nan"), + ) + if self.cfg.eval_bleu: + len_keys = ["_bleu_sys_len", "_bleu_ref_len"] + count_keys = [f"_bleu_counts_{i}" for i in range(4)] + total_keys = [f"_bleu_totals_{i}" for i in range(4)] + for k in len_keys + count_keys + total_keys: + metrics.log_scalar(k, sum(log.get(k, 0) for log in logging_outputs)) + + import sacrebleu + + metrics.log_derived( + "bleu", + lambda meters: sacrebleu.compute_bleu( + correct=[meters[k].sum for k in count_keys], + total=[meters[k].sum for k in total_keys], + sys_len=meters["_bleu_sys_len"].sum, + ref_len=meters["_bleu_ref_len"].sum, + smooth_method="exp", + ).score, + ) diff --git a/fairseq/fairseq/tasks/audio_pretraining.py b/fairseq/fairseq/tasks/audio_pretraining.py new file mode 100644 index 0000000..3e91303 --- /dev/null +++ b/fairseq/fairseq/tasks/audio_pretraining.py @@ -0,0 +1,253 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +import sys + +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Optional, OrderedDict +from fairseq.data.multi_corpus_dataset import MultiCorpusDataset +from omegaconf import MISSING, II, OmegaConf + +from fairseq.data import BinarizedAudioDataset, FileAudioDataset, SubsampleDataset +from fairseq.dataclass import FairseqDataclass, ChoiceEnum +from fairseq.data.text_compressor import TextCompressionLevel + +from . import FairseqTask, register_task + + +logger = logging.getLogger(__name__) + + +@dataclass +class AudioMaskingConfig: + feature_encoder_spec: str = II("model.modalities.audio.feature_encoder_spec") + mask_prob: float = II("model.modalities.audio.mask_prob") + mask_prob_adjust: float = II("model.modalities.audio.mask_prob_adjust") + mask_length: int = II("model.modalities.audio.mask_length") + inverse_mask: bool = II("model.modalities.audio.inverse_mask") + mask_dropout: float = II("model.modalities.audio.mask_dropout") + clone_batch: int = II("model.clone_batch") + expand_adjacent: bool = False + non_overlapping: bool = False + + +@dataclass +class AudioPretrainingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + labels: Optional[str] = field( + default=None, + metadata={"help": "extension of the label file to load, used for fine-tuning"}, + ) + multi_corpus_keys: Optional[str] = field( + default=None, + metadata={"help": "Comma separated names for loading multi corpus datasets"}) + multi_corpus_sampling_weights: Optional[str] = field( + default=None, + metadata={"help": "Comma separated string of sampling weights corresponding to the multi_corpus_keys"}) + binarized_dataset: bool = field( + default=False, + metadata={ + "help": "if true, loads binarized dataset (useful for very large datasets). " + "See examples/wav2vec/scripts/binarize_manifest.sh" + }, + ) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, + ) + enable_padding: bool = field( + default=False, metadata={"help": "pad shorter samples instead of cropping"} + ) + max_sample_size: Optional[int] = field( + default=None, metadata={"help": "max sample size to crop to for batching"} + ) + min_sample_size: Optional[int] = field( + default=None, metadata={"help": "min sample size to skip small examples"} + ) + num_batch_buckets: int = field( + default=0, + metadata={"help": "number of buckets"}, + ) + tpu: bool = II("common.tpu") + text_compression_level: ChoiceEnum([x.name for x in TextCompressionLevel]) = field( + default="none", + metadata={ + "help": "compression level for texts (e.g. audio filenames, " + "target texts): none/low/high (default: none). " + }, + ) + + rebuild_batches: bool = True + precompute_mask_config: Optional[AudioMaskingConfig] = None + + post_save_script: Optional[str] = None + + subsample: float = 1 + seed: int = II("common.seed") + + +@register_task("audio_pretraining", dataclass=AudioPretrainingConfig) +class AudioPretrainingTask(FairseqTask): + """ """ + + cfg: AudioPretrainingConfig + + @classmethod + def setup_task(cls, cfg: AudioPretrainingConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (AudioPretrainingConfig): configuration of this task + """ + + return cls(cfg) + + def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs): + data_path = self.cfg.data + task_cfg = task_cfg or self.cfg + + # upgrade old task + if isinstance(task_cfg, Namespace): + if not hasattr(task_cfg, "autoregressive"): + task_cfg.autoregressive = not task_cfg.criterion == "ctc" + + text_compression_level = getattr( + TextCompressionLevel, str(self.cfg.text_compression_level) + ) + + compute_mask = getattr(task_cfg, "precompute_mask_config", None) is not None + mask_args = {} + if compute_mask: + mask_args = task_cfg.precompute_mask_config + + if getattr(task_cfg, "binarized_dataset", False): + self.datasets[split] = BinarizedAudioDataset( + data_path, + split=split, + sample_rate=task_cfg.get("sample_rate", self.cfg.sample_rate), + max_sample_size=self.cfg.max_sample_size, + min_sample_size=self.cfg.min_sample_size, + pad=task_cfg.labels is not None or task_cfg.enable_padding, + normalize=task_cfg.normalize, + num_buckets=self.cfg.num_batch_buckets or int(self.cfg.tpu), + compute_mask=compute_mask, + **mask_args, + ) + else: + if task_cfg.multi_corpus_keys is None: + manifest_path = os.path.join(data_path, "{}.tsv".format(split)) + + self.datasets[split] = FileAudioDataset( + manifest_path=manifest_path, + sample_rate=task_cfg.get("sample_rate", self.cfg.sample_rate), + max_sample_size=self.cfg.max_sample_size, + min_sample_size=self.cfg.min_sample_size, + pad=task_cfg.labels is not None or task_cfg.enable_padding, + normalize=task_cfg.normalize, + num_buckets=self.cfg.num_batch_buckets or int(self.cfg.tpu), + text_compression_level=text_compression_level, + compute_mask=compute_mask, + **mask_args, + ) + else: + dataset_map = OrderedDict() + self.dataset_map = {} + multi_corpus_keys = [k.strip() for k in task_cfg.multi_corpus_keys.split(",")] + corpus_idx_map = {k: idx for idx, k in enumerate(multi_corpus_keys)} + data_keys = [k.split(":") for k in split.split(",")] + + multi_corpus_sampling_weights = [float(val.strip()) for val in task_cfg.multi_corpus_sampling_weights.split(",")] + data_weights = [] + + for key, file_name in data_keys: + + k = key.strip() + manifest_path = os.path.join(data_path, "{}.tsv".format(file_name.strip())) + + # TODO: Remove duplication of code from the if block above + dataset_map[k] = FileAudioDataset( + manifest_path=manifest_path, + sample_rate=task_cfg.get("sample_rate", self.cfg.sample_rate), + max_sample_size=self.cfg.max_sample_size, + min_sample_size=self.cfg.min_sample_size, + pad=task_cfg.labels is not None or task_cfg.enable_padding, + normalize=task_cfg.normalize, + num_buckets=self.cfg.num_batch_buckets or int(self.cfg.tpu), + text_compression_level=text_compression_level, + compute_mask=compute_mask, + corpus_key=corpus_idx_map[k], + **mask_args, + ) + + data_weights.append(multi_corpus_sampling_weights[corpus_idx_map[k]]) + + self.dataset_map[split] = dataset_map + + if len(dataset_map) == 1: + self.datasets[split] = list(dataset_map.values())[0] + else: + self.datasets[split] = MultiCorpusDataset(dataset_map, distribution=data_weights, seed=0, sort_indices=True) + + if getattr(task_cfg, "subsample", 1) < 1: + self.datasets[split] = SubsampleDataset( + self.datasets[split], + task_cfg.subsample, + shuffle=True, + seed=task_cfg.seed, + ) + + if self.cfg.tpu and task_cfg.inferred_w2v_config.mask_channel_prob == 0.0: + logger.info( + "Pretraining on TPUs may suffer convergence " + "issues when training with `mask_channel_prob` value of " + "0. You may want to set this to a low value close to 0." + ) + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return sys.maxsize, sys.maxsize + + def build_model(self, model_cfg: FairseqDataclass, from_checkpoint=False): + model = super().build_model(model_cfg, from_checkpoint) + + actualized_cfg = getattr(model, "cfg", None) + if actualized_cfg is not None: + # if "w2v_args" in actualized_cfg: + if hasattr(actualized_cfg, "w2v_args"): + model_cfg.w2v_args = actualized_cfg.w2v_args + + return model + + def post_save(self, cp_path, num_updates): + if self.cfg.post_save_script is not None: + logger.info(f"launching {self.cfg.post_save_script}") + import os.path as osp + from fairseq.file_io import PathManager + + eval_cp_path = osp.join( + osp.dirname(cp_path), f"checkpoint_eval_{num_updates}.pt" + ) + + print(cp_path, eval_cp_path, osp.dirname(cp_path)) + + assert PathManager.copy( + cp_path, eval_cp_path, overwrite=True + ), f"Failed to copy {cp_path} to {eval_cp_path}" + + import subprocess + import shlex + + subprocess.call(shlex.split(f"{self.cfg.post_save_script} {eval_cp_path}")) diff --git a/fairseq/fairseq/tasks/cross_lingual_lm.py b/fairseq/fairseq/tasks/cross_lingual_lm.py new file mode 100644 index 0000000..8f8fe7e --- /dev/null +++ b/fairseq/fairseq/tasks/cross_lingual_lm.py @@ -0,0 +1,191 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import logging +import os +from collections import OrderedDict + +import numpy as np +from fairseq import tokenizer, utils +from fairseq.data import ConcatDataset, Dictionary, TokenBlockDataset, data_utils +from fairseq.data.legacy.masked_lm_dataset import MaskedLMDataset +from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary +from fairseq.data.multi_corpus_sampled_dataset import MultiCorpusSampledDataset +from fairseq.tasks import LegacyFairseqTask, register_task + + +logger = logging.getLogger(__name__) + + +@register_task("cross_lingual_lm") +class CrossLingualLMTask(LegacyFairseqTask): + """ + Task for training cross-lingual language models. + + For more details look at: https://arxiv.org/pdf/1901.07291.pdf + + Args: + dictionary (Dictionary): the dictionary for the input of the task + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "data", + help="colon separated path to data directories list, \ + will be iterated upon during epochs in round-robin manner", + ) + parser.add_argument( + "--tokens-per-sample", + default=512, + type=int, + help="max number of total tokens over all segments" " per sample", + ) + parser.add_argument( + "--monolingual-langs", + default="en", + type=str, + help="comma separated list of languages for which we" + " want to train XLM on", + ) + parser.add_argument( + "--shuffle", + action="store_true", + help="shuffle each monolingual dataset while" " training", + ) + + def __init__(self, args, dictionary): + super().__init__(args) + self.dictionary = dictionary + self.seed = args.seed + self.distributed_world_size = args.distributed_world_size + self.langs2id = self._lang_to_id(args.monolingual_langs) + + def _lang_to_id(self, languages: str): + """ + Build a map from languages to ids. These ids are used as segment labels + for cross-lingual LM training. + """ + lang2id = {} + langs = [l.strip() for l in languages.split(",")] + for id, lang in enumerate(langs): + lang2id[lang] = id + return lang2id + + @classmethod + def load_dictionary(cls, filename): + return MaskedLMDictionary.load(filename) + + @classmethod + def build_dictionary( + cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8 + ): + d = MaskedLMDictionary() + for filename in filenames: + Dictionary.add_file_to_dictionary( + filename, d, tokenizer.tokenize_line, workers + ) + d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor) + return d + + @property + def target_dictionary(self): + return self.dictionary + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task.""" + dictionary = MaskedLMDictionary.load(os.path.join(args.data, "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + return cls(args, dictionary) + + def _load_single_lang_dataset(self, split, epoch): + loaded_datasets = [] + + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + path = os.path.join(data_path, split_k) + + ds = data_utils.load_indexed_dataset( + path, self.dictionary, self.args.dataset_impl + ) + if ds is None: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + # Since we append each block with the classification_token, + # we need to effectively create blocks of length + # tokens_per_sample-1 + loaded_datasets.append( + TokenBlockDataset( + ds, + ds.sizes, + self.args.tokens_per_sample - 1, + pad=self.dictionary.pad(), + eos=self.dictionary.eos(), + ) + ) + + logger.info( + "{} {} {} examples".format(data_path, split_k, len(loaded_datasets[-1])) + ) + + if len(loaded_datasets) == 1: + dataset = loaded_datasets[0] + sizes = dataset.sizes + else: + dataset = ConcatDataset(loaded_datasets) + sizes = np.concatenate([ds.sizes for ds in loaded_datasets]) + + return dataset, sizes + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + dataset_map = OrderedDict() + + for lang in self.langs2id.keys(): + # Datasets are expected to be in "split.lang" format (Eg: train.en) + language_split = "{}.{}".format(split, lang) + + block_dataset, sizes = self._load_single_lang_dataset( + split=language_split, epoch=epoch + ) + + dataset_map[lang] = MaskedLMDataset( + dataset=block_dataset, + sizes=sizes, + vocab=self.dictionary, + pad_idx=self.dictionary.pad(), + mask_idx=self.dictionary.mask(), + classif_token_idx=self.dictionary.eos(), + sep_token_idx=self.dictionary.eos(), + shuffle=getattr(self.args, "shuffle", False), + has_pairs=False, + segment_id=self.langs2id[lang], + seed=self.seed, + ) + + self.datasets[split] = MultiCorpusSampledDataset(dataset_map) + logger.info( + "{} {} {} examples".format( + utils.split_paths(self.args.data)[epoch - 1], + split, + len(self.datasets[split]), + ) + ) diff --git a/fairseq/fairseq/tasks/denoising.py b/fairseq/fairseq/tasks/denoising.py new file mode 100644 index 0000000..57b824d --- /dev/null +++ b/fairseq/fairseq/tasks/denoising.py @@ -0,0 +1,296 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import Any, Optional + +import numpy as np +from omegaconf import II, MISSING + +from fairseq import utils +from fairseq.data import ( + AppendTokenDataset, + DenoisingDataset, + Dictionary, + IdDataset, + NestedDictionaryDataset, + NumelDataset, + PadDataset, + PrependTokenDataset, + StripTokenDataset, + TokenBlockDataset, + data_utils, +) +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +from ..data.indexed_dataset import get_available_dataset_impl + +logger = logging.getLogger(__name__) + +SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"]) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) +MASK_LENGTH_CHOICES = ChoiceEnum(["subword", "word", "span-poisson"]) + + +@dataclass +class DenoisingConfig(FairseqDataclass): + data: str = field( + default=MISSING, + metadata={"help": "path to data directory"}, + ) + bpe: Optional[str] = field( + default=None, + metadata={"help": "TODO"}, + ) + tokens_per_sample: int = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments " + "per sample for dataset" + }, + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="complete_doc", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + replace_length: int = field( + default=0, + metadata={"help": "TODO, should only allow -1, 0 and 1"}, + ) + mask: float = field( + default=0.0, + metadata={"help": "fraction of words/subwords that will be masked"}, + ) + mask_random: float = field( + default=0.0, + metadata={"help": "instead of using [MASK], use random token this often"}, + ) + insert: float = field( + default=0.0, + metadata={"help": "insert this percentage of additional random tokens"}, + ) + permute: float = field( + default=0.0, + metadata={"help": "take this proportion of subwords and permute them"}, + ) + rotate: float = field( + default=0.5, + metadata={"help": "rotate this proportion of inputs"}, + ) + poisson_lambda: float = field( + default=3.0, + metadata={"help": "randomly shuffle sentences for this proportion of inputs"}, + ) + shuffle_instance: float = field( + default=0.0, + metadata={"help": "shuffle this proportion of sentences in all inputs"}, + ) + mask_length: MASK_LENGTH_CHOICES = field( + default="subword", + metadata={"help": "mask length to choose"}, + ) + permute_sentences: int = field( + default=-1, + metadata={ + "help": "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)" + }, + ) + seed: int = II("common.seed") + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + max_source_positions: int = field( + default=1024, + metadata={"help": "max number of tokens in the source sequence"}, + ) + max_target_positions: int = field( + default=1024, + metadata={"help": "max number of tokens in the target sequence"}, + ) + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + + +@register_task("denoising", dataclass=DenoisingConfig) +class DenoisingTask(FairseqTask): + """ + Denoising task for applying sequence to sequence denoising. (ie. BART) + """ + + cfg: DenoisingConfig + + def __init__(self, cfg, dictionary): + super().__init__(cfg) + self.dictionary = dictionary + + # add mask token + self.mask_idx = self.dictionary.add_symbol("<mask>") + + @classmethod + def setup_task(cls, cfg: DenoisingConfig, **kwargs): + """Setup the task.""" + paths = utils.split_paths(cfg.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + if not hasattr(cfg, "shuffle_instance"): + cfg.shuffle_instance = False + return cls(cfg, dictionary) + + def _load_dataset_split(self, split, epoch, combine): + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.dictionary, + self.cfg.dataset_impl, + combine=combine, + ) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + dataset = StripTokenDataset(dataset, self.dictionary.eos()) + + dataset = maybe_shorten_dataset( + dataset, + split, + self.cfg.shorten_data_split_list, + self.cfg.shorten_method, + self.cfg.tokens_per_sample, + self.cfg.seed, + ) + + # create continuous blocks of tokens + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.cfg.tokens_per_sample - 2, + # one less for <s> and one for </s> + pad=self.dictionary.pad(), + eos=self.dictionary.eos(), + break_mode=self.cfg.sample_break_mode, + document_sep_len=0, + ) + logger.info("loaded {} blocks from: {}".format(len(dataset), split_path)) + + # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT) + dataset = PrependTokenDataset(dataset, self.source_dictionary.bos()) + dataset = AppendTokenDataset(dataset, self.source_dictionary.eos()) + return dataset + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + dataset = self._load_dataset_split(split, epoch, combine) + + mask_whole_words = ( + get_whole_word_mask(self.cfg.bpe, self.source_dictionary) + if self.cfg.mask_length != "subword" + else None + ) + + self.datasets[split] = DenoisingDataset( + dataset, + dataset.sizes, + self.dictionary, + self.mask_idx, + mask_whole_words, + shuffle=self.cfg.shuffle_instance, + seed=self.cfg.seed, + mask=self.cfg.mask, + mask_random=self.cfg.mask_random, + insert=self.cfg.insert, + rotate=self.cfg.rotate, + permute_sentences=self.cfg.permute_sentences, + bpe=self.cfg.bpe, + replace_length=self.cfg.replace_length, + mask_length=self.cfg.mask_length, + poisson_lambda=self.cfg.poisson_lambda, + ) + logger.info( + "Split: {0}, Loaded {1} samples of denoising_dataset".format( + split, + len(self.datasets[split]), + ) + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + """ + Generate batches for inference. We assume that the input begins with a + bos symbol (`<s>`) and ends with an eos symbol (`</s>`). + """ + pad = self.source_dictionary.pad() + eos = self.source_dictionary.eos() + src_dataset = TokenBlockDataset( + src_tokens, + src_lengths, + block_size=self.cfg.tokens_per_sample - 2, # for <s> and </s> + pad=pad, + eos=eos, + break_mode=self.cfg.sample_break_mode, + document_sep_len=0, + ) + prev_output_tokens = PrependTokenDataset( + StripTokenDataset(src_dataset, eos), eos + ) + src_dataset = PadDataset(src_dataset, pad_idx=pad, left_pad=False) + return NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": src_dataset, + "src_lengths": NumelDataset(src_dataset, reduce=False), + "prev_output_tokens": PadDataset( + prev_output_tokens, pad_idx=pad, left_pad=False + ), + }, + "target": src_dataset, + }, + sizes=[np.array(src_lengths)], + ) + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + return (self.cfg.max_source_positions, self.cfg.max_target_positions) + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary`.""" + return self.dictionary + + @property + def target_dictionary(self): + """Return the target :class:`~fairseq.data.Dictionary`.""" + return self.dictionary diff --git a/fairseq/fairseq/tasks/fairseq_task.py b/fairseq/fairseq/tasks/fairseq_task.py new file mode 100644 index 0000000..e39d1d6 --- /dev/null +++ b/fairseq/fairseq/tasks/fairseq_task.py @@ -0,0 +1,708 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import warnings +from argparse import Namespace +from typing import Any, Callable, Dict, List + +import torch +from fairseq import search, tokenizer, utils +from fairseq.logging import metrics +from fairseq.data import Dictionary, FairseqDataset, data_utils, encoders, iterators +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import gen_parser_from_dataclass +from fairseq.optim.amp_optimizer import AMPOptimizer +from omegaconf import DictConfig + + +logger = logging.getLogger(__name__) + + +class StatefulContainer(object): + def __init__(self): + self._state = dict() + self._factories = dict() + + def add_factory(self, name, factory: Callable[[], Any]): + self._factories[name] = factory + + def merge_state_dict(self, state_dict: Dict[str, Any]): + self._state.update(state_dict) + + @property + def state_dict(self) -> Dict[str, Any]: + return self._state + + def __getattr__(self, name): + if name not in self._state and name in self._factories: + self._state[name] = self._factories[name]() + + if name in self._state: + return self._state[name] + + raise AttributeError(f"Task state has no factory for attribute {name}") + + +class FairseqTask(object): + """ + Tasks store dictionaries and provide helpers for loading/iterating over + Datasets, initializing the Model/Criterion and calculating the loss. + + Tasks have limited statefulness. In particular, state that needs to be + saved to/loaded from checkpoints needs to be stored in the `self.state` + :class:`StatefulContainer` object. For example:: + + self.state.add_factory("dictionary", self.load_dictionary) + print(self.state.dictionary) # calls self.load_dictionary() + + This is necessary so that when loading checkpoints, we can properly + recreate the task state after initializing the task instance. + """ + + @classmethod + def add_args(cls, parser): + """Add task-specific arguments to the parser.""" + dc = getattr(cls, "__dataclass", None) + if dc is not None: + gen_parser_from_dataclass(parser, dc()) + + @staticmethod + def logging_outputs_can_be_summed(criterion) -> bool: + """ + Whether the logging outputs returned by `train_step` and `valid_step` can + be summed across workers prior to calling `aggregate_logging_outputs`. + Setting this to True will improves distributed training speed. + """ + return criterion.logging_outputs_can_be_summed() + + def __init__(self, cfg: FairseqDataclass, **kwargs): + self.cfg = cfg + self.datasets = dict() + self.dataset_to_epoch_iter = dict() + self.state = StatefulContainer() + + @classmethod + def load_dictionary(cls, filename): + """Load the dictionary from the filename + + Args: + filename (str): the filename + """ + return Dictionary.load(filename) + + @classmethod + def build_dictionary( + cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8 + ): + """Build the dictionary + + Args: + filenames (list): list of filenames + workers (int): number of concurrent workers + threshold (int): defines the minimum word count + nwords (int): defines the total number of words in the final dictionary, + including special symbols + padding_factor (int): can be used to pad the dictionary size to be a + multiple of 8, which is important on some hardware (e.g., Nvidia + Tensor Cores). + """ + d = Dictionary() + for filename in filenames: + Dictionary.add_file_to_dictionary( + filename, d, tokenizer.tokenize_line, workers + ) + d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor) + return d + + @classmethod + def setup_task(cls, cfg: DictConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + cfg (omegaconf.DictConfig): parsed command-line arguments + """ + return cls(cfg, **kwargs) + + def has_sharded_data(self, split): + return os.pathsep in getattr(self.cfg, "data", "") + + def load_dataset( + self, + split: str, + combine: bool = False, + task_cfg: FairseqDataclass = None, + **kwargs, + ): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + combine (bool): combines a split segmented into pieces into one dataset + task_cfg (FairseqDataclass): optional task configuration stored in the checkpoint that can be used + to load datasets + """ + raise NotImplementedError + + def dataset(self, split): + """ + Return a loaded dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + + Returns: + a :class:`~fairseq.data.FairseqDataset` corresponding to *split* + """ + from fairseq.data import FairseqDataset + + if split not in self.datasets: + raise KeyError("Dataset not loaded: " + split) + if not isinstance(self.datasets[split], FairseqDataset): + raise TypeError("Datasets are expected to be of type FairseqDataset") + return self.datasets[split] + + def filter_indices_by_size( + self, indices, dataset, max_positions=None, ignore_invalid_inputs=False + ): + """ + Filter examples that are too large + + Args: + indices (np.array): original array of sample indices + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + Returns: + np.array: array of filtered sample indices + """ + indices, ignored = dataset.filter_indices_by_size(indices, max_positions) + if len(ignored) > 0: + if not ignore_invalid_inputs: + raise Exception( + ( + "Size of sample #{} is invalid (={}) since max_positions={}, " + "skip this example with --skip-invalid-size-inputs-valid-test" + ).format(ignored[0], dataset.size(ignored[0]), max_positions) + ) + logger.warning( + ( + "{:,} samples have invalid sizes and will be skipped, " + "max_positions={}, first few sample ids={}" + ).format(len(ignored), max_positions, ignored[:10]) + ) + return indices + + def can_reuse_epoch_itr(self, dataset): + # We can reuse the epoch iterator across epochs as long as the dataset + # hasn't disabled it. We default to ``False`` here, although in practice + # this will be ``True`` for most datasets that inherit from + # ``FairseqDataset`` due to the base implementation there. + return getattr(dataset, "can_reuse_epoch_itr_across_epochs", False) + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 1). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + skip_remainder_batch (bool, optional): if set, discard the last + batch in each training epoch, as the last batch is often smaller than + local_batch_size * distributed_word_size (default: ``True``). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + logger.info(f"can_reuse_epoch_itr = {can_reuse_epoch_itr}") + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + def make_batches(dataset, epoch): + logger.info(f"creating new batches for epoch {epoch}") + + # get indices ordered by example size + with data_utils.numpy_seed(seed + epoch): + indices = dataset.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + + # create mini-batches with given size constraints + batches = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + return batches + + reuse_dataloader = getattr(self.cfg, "reuse_dataloader", True) + persistent_workers = getattr(self.cfg, "persistent_workers", True) + rebuild_batches = getattr(self.cfg, "rebuild_batches", False) + logger.info(f"reuse_dataloader = {reuse_dataloader}") + logger.info(f"rebuild_batches = {rebuild_batches}") + + if rebuild_batches: + logger.info("batches will be rebuilt for each epoch") + batch_sampler = make_batches + else: + batch_sampler = make_batches(dataset, epoch) + + # return a reusable, sharded iterator + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + grouped_shuffling=grouped_shuffling, + reuse_dataloader=reuse_dataloader, + persistent_workers=persistent_workers, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + def build_model(self, cfg: FairseqDataclass, from_checkpoint=False): + """ + Build the :class:`~fairseq.models.BaseFairseqModel` instance for this + task. + + Args: + cfg (FairseqDataclass): configuration object + + Returns: + a :class:`~fairseq.models.BaseFairseqModel` instance + """ + from fairseq import models, quantization_utils + + model = models.build_model(cfg, self, from_checkpoint) + model = quantization_utils.quantize_model_scalar(model, cfg) + return model + + def build_criterion(self, cfg: DictConfig, from_checkpoint=False): + """ + Build the :class:`~fairseq.criterions.FairseqCriterion` instance for + this task. + + Args: + cfg (omegaconf.DictConfig): configration object + + Returns: + a :class:`~fairseq.criterions.FairseqCriterion` instance + """ + from fairseq import criterions + + return criterions.build_criterion(cfg, self, from_checkpoint=from_checkpoint) + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + prefix_allowed_tokens_fn=None, + ): + """ + Build a :class:`~fairseq.SequenceGenerator` instance for this + task. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + args (fairseq.dataclass.configs.GenerationConfig): + configuration object (dataclass) for generation + extra_gen_cls_kwargs (Dict[str, Any]): extra options to pass + through to SequenceGenerator + prefix_allowed_tokens_fn (Callable[[int, torch.Tensor], List[int]]): + If provided, this function constrains the beam search to + allowed tokens only at each step. The provided function + should take 2 arguments: the batch ID (`batch_id: int`) + and a unidimensional tensor of token ids (`inputs_ids: + torch.Tensor`). It has to return a `List[int]` with the + allowed tokens for the next generation step conditioned + on the previously generated tokens (`inputs_ids`) and + the batch ID (`batch_id`). This argument is useful for + constrained generation conditioned on the prefix, as + described in "Autoregressive Entity Retrieval" + (https://arxiv.org/abs/2010.00904) and + https://github.com/facebookresearch/GENRE. + """ + if getattr(args, "score_reference", False): + from fairseq.sequence_scorer import SequenceScorer + + return SequenceScorer( + self.target_dictionary, + compute_alignment=getattr(args, "print_alignment", False), + ) + + from fairseq.sequence_generator import ( + SequenceGenerator, + SequenceGeneratorWithAlignment, + ) + + # Choose search strategy. Defaults to Beam Search. + sampling = getattr(args, "sampling", False) + sampling_topk = getattr(args, "sampling_topk", -1) + sampling_topp = getattr(args, "sampling_topp", -1.0) + diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) + diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) + match_source_len = getattr(args, "match_source_len", False) + diversity_rate = getattr(args, "diversity_rate", -1) + constrained = getattr(args, "constraints", False) + if prefix_allowed_tokens_fn is None: + prefix_allowed_tokens_fn = getattr(args, "prefix_allowed_tokens_fn", None) + if ( + sum( + int(cond) + for cond in [ + sampling, + diverse_beam_groups > 0, + match_source_len, + diversity_rate > 0, + ] + ) + > 1 + ): + raise ValueError("Provided Search parameters are mutually exclusive.") + assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" + assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" + + if sampling: + search_strategy = search.Sampling( + self.target_dictionary, sampling_topk, sampling_topp + ) + elif diverse_beam_groups > 0: + search_strategy = search.DiverseBeamSearch( + self.target_dictionary, diverse_beam_groups, diverse_beam_strength + ) + elif match_source_len: + # this is useful for tagging applications where the output + # length should match the input length, so we hardcode the + # length constraints for simplicity + search_strategy = search.LengthConstrainedBeamSearch( + self.target_dictionary, + min_len_a=1, + min_len_b=0, + max_len_a=1, + max_len_b=0, + ) + elif diversity_rate > -1: + search_strategy = search.DiverseSiblingsSearch( + self.target_dictionary, diversity_rate + ) + elif constrained: + search_strategy = search.LexicallyConstrainedBeamSearch( + self.target_dictionary, args.constraints + ) + elif prefix_allowed_tokens_fn: + search_strategy = search.PrefixConstrainedBeamSearch( + self.target_dictionary, prefix_allowed_tokens_fn + ) + else: + search_strategy = search.BeamSearch(self.target_dictionary) + + extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} + if seq_gen_cls is None: + if getattr(args, "print_alignment", False): + seq_gen_cls = SequenceGeneratorWithAlignment + extra_gen_cls_kwargs["print_alignment"] = args.print_alignment + else: + seq_gen_cls = SequenceGenerator + + return seq_gen_cls( + models, + self.target_dictionary, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + search_strategy=search_strategy, + **extra_gen_cls_kwargs, + ) + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + """ + Do forward and backward, and return the loss as computed by *criterion* + for the given *model* and *sample*. + + Args: + sample (dict): the mini-batch. The format is defined by the + :class:`~fairseq.data.FairseqDataset`. + model (~fairseq.models.BaseFairseqModel): the model + criterion (~fairseq.criterions.FairseqCriterion): the criterion + optimizer (~fairseq.optim.FairseqOptimizer): the optimizer + update_num (int): the current update + ignore_grad (bool): multiply loss by 0 if this is set to True + + Returns: + tuple: + - the loss + - the sample size, which is used as the denominator for the + gradient + - logging outputs to display while training + """ + model.train() + model.set_num_updates(update_num) + with torch.autograd.profiler.record_function("forward"): + with torch.cuda.amp.autocast(enabled=(isinstance(optimizer, AMPOptimizer))): + loss, sample_size, logging_output = criterion(model, sample) + if ignore_grad: + loss *= 0 + with torch.autograd.profiler.record_function("backward"): + optimizer.backward(loss) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + model.eval() + with torch.no_grad(): + loss, sample_size, logging_output = criterion(model, sample) + return loss, sample_size, logging_output + + def optimizer_step(self, optimizer, model, update_num): + optimizer.step() + + def build_dataset_for_inference( + self, src_tokens: List[torch.Tensor], src_lengths: List[int], **kwargs + ) -> torch.utils.data.Dataset: + raise NotImplementedError + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, constraints=constraints + ) + + def begin_epoch(self, epoch, model): + """Hook function called before the start of each epoch.""" + pass + + def begin_valid_epoch(self, epoch, model): + """Hook function called before the start of each validation epoch.""" + pass + + def aggregate_logging_outputs(self, logging_outputs, criterion): + """[deprecated] Aggregate logging outputs from data parallel training.""" + utils.deprecation_warning( + "The aggregate_logging_outputs API is deprecated. " + "Please use the reduce_metrics API instead." + ) + with metrics.aggregate() as agg: + self.reduce_metrics(logging_outputs, criterion) + return agg.get_smoothed_values() + + def reduce_metrics(self, logging_outputs, criterion): + """Aggregate logging outputs from data parallel training.""" + # backward compatibility for tasks that override aggregate_logging_outputs + base_func = FairseqTask.aggregate_logging_outputs + self_func = getattr(self, "aggregate_logging_outputs").__func__ + if self_func is not base_func: + utils.deprecation_warning( + "Tasks should implement the reduce_metrics API. " + "Falling back to deprecated aggregate_logging_outputs API." + ) + agg_logging_outputs = self.aggregate_logging_outputs( + logging_outputs, criterion + ) + for k, v in agg_logging_outputs.items(): + metrics.log_scalar(k, v) + return + + if not any("ntokens" in log for log in logging_outputs): + warnings.warn( + "ntokens not found in Criterion logging outputs, cannot log wpb or wps" + ) + else: + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + metrics.log_scalar("wpb", ntokens, priority=180, round=1) + metrics.log_speed("wps", ntokens, priority=90, round=1) + + if not any("nsentences" in log for log in logging_outputs): + warnings.warn( + "nsentences not found in Criterion logging outputs, cannot log bsz" + ) + else: + nsentences = sum(log.get("nsentences", 0) for log in logging_outputs) + metrics.log_scalar("bsz", nsentences, priority=190, round=1) + + criterion.__class__.reduce_metrics(logging_outputs) + + def state_dict(self): + if self.state is not None: + return self.state.state_dict + return {} + + def load_state_dict(self, state_dict: Dict[str, Any]): + if self.state is not None: + self.state.merge_state_dict(state_dict) + + def max_positions(self): + """Return the max input length allowed by the task.""" + return None + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary` (if applicable + for this task).""" + return None + + @property + def target_dictionary(self): + """Return the target :class:`~fairseq.data.Dictionary` (if applicable + for this task).""" + return None + + def build_tokenizer(self, args): + """Build the pre-tokenizer for this task.""" + return encoders.build_tokenizer(args) + + def build_bpe(self, args): + """Build the tokenizer for this task.""" + return encoders.build_bpe(args) + + def get_interactive_tokens_and_lengths(self, lines, encode_fn): + tokens = [ + self.source_dictionary.encode_line( + encode_fn(src_str), add_if_not_exist=False + ).long() + for src_str in lines + ] + lengths = [t.numel() for t in tokens] + return tokens, lengths + + +class LegacyFairseqTask(FairseqTask): + def __init__(self, args: Namespace): + super().__init__(None) + self.args = args + self.datasets = {} + self.dataset_to_epoch_iter = {} + + @classmethod + def setup_task(cls, args: Namespace, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + return cls(args, **kwargs) + + def has_sharded_data(self, split): + return os.pathsep in getattr(self.args, "data", "") + + def build_model(self, args: Namespace, from_checkpoint=False): + """ + Build the :class:`~fairseq.models.BaseFairseqModel` instance for this + task. + + Args: + args (argparse.Namespace): parsed command-line arguments + + Returns: + a :class:`~fairseq.models.BaseFairseqModel` instance + """ + from fairseq import models, quantization_utils + + model = models.build_model(args, self, from_checkpoint) + model = quantization_utils.quantize_model_scalar(model, args) + return model + + def build_criterion(self, args: Namespace): + """ + Build the :class:`~fairseq.criterions.FairseqCriterion` instance for + this task. + + Args: + args (argparse.Namespace): parsed command-line arguments + + Returns: + a :class:`~fairseq.criterions.FairseqCriterion` instance + """ + from fairseq import criterions + + return criterions.build_criterion(args, self) diff --git a/fairseq/fairseq/tasks/frm_text_to_speech.py b/fairseq/fairseq/tasks/frm_text_to_speech.py new file mode 100644 index 0000000..667f5f8 --- /dev/null +++ b/fairseq/fairseq/tasks/frm_text_to_speech.py @@ -0,0 +1,55 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +from fairseq.data.audio.frm_text_to_speech_dataset import FrmTextToSpeechDatasetCreator +from fairseq.tasks import register_task +from fairseq.tasks.text_to_speech import TextToSpeechTask + + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + + +@register_task("frm_text_to_speech") +class FrmTextToSpeechTask(TextToSpeechTask): + @staticmethod + def add_args(parser): + TextToSpeechTask.add_args(parser) + parser.add_argument("--do_chunk", action="store_true", help="train on chunks") + parser.add_argument("--chunk_bound", default=-1, type=int) + parser.add_argument("--chunk_init", default=50, type=int) + parser.add_argument("--chunk_incr", default=5, type=int) + parser.add_argument("--add_eos", action="store_true") + parser.add_argument("--dedup", action="store_true") + parser.add_argument("--ref_fpu", default=-1, type=float) + + def load_dataset(self, split, **unused_kwargs): + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + self.datasets[split] = FrmTextToSpeechDatasetCreator.from_tsv( + self.args.data, + self.data_cfg, + split, + self.src_dict, + pre_tokenizer, + bpe_tokenizer, + is_train_split=is_train_split, + n_frames_per_step=self.args.n_frames_per_step, + speaker_to_id=self.speaker_to_id, + do_chunk=self.args.do_chunk, + chunk_bound=self.args.chunk_bound, + chunk_init=self.args.chunk_init, + chunk_incr=self.args.chunk_incr, + add_eos=self.args.add_eos, + dedup=self.args.dedup, + ref_fpu=self.args.ref_fpu, + ) diff --git a/fairseq/fairseq/tasks/hubert_pretraining.py b/fairseq/fairseq/tasks/hubert_pretraining.py new file mode 100644 index 0000000..1a3605f --- /dev/null +++ b/fairseq/fairseq/tasks/hubert_pretraining.py @@ -0,0 +1,191 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple + +import numpy as np + +from dataclasses import dataclass, field +from fairseq.data import Dictionary, HubertDataset +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from omegaconf import MISSING + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, + append_eos=False, + add_if_not_exist=False, + ) + + +@dataclass +class HubertPretrainingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + fine_tuning: bool = field( + default=False, metadata={"help": "set to true if fine-tuning Hubert"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: float = field( + default=-1.0, + metadata={"help": "label frame rate. -1.0 for sequence label"}, + ) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_keep_size: Optional[int] = field( + default=None, + metadata={"help": "exclude sample longer than this"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to crop to for batching"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to crop to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + + +@register_task("hubert_pretraining", dataclass=HubertPretrainingConfig) +class HubertPretrainingTask(FairseqTask): + + cfg: HubertPretrainingConfig + + def __init__( + self, + cfg: HubertPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"HubertPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + self.blank_symbol = "<s>" + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @classmethod + def setup_task( + cls, cfg: HubertPretrainingConfig, **kwargs + ) -> "HubertPretrainingTask": + return cls(cfg) + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [ + Dictionary.load(f"{label_dir}/dict.{label}.txt") + for label in self.cfg.labels + ] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_dataset(self, split: str, **kwargs) -> None: + manifest = f"{self.cfg.data}/{split}.tsv" + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + paths = [f"{self.get_label_dir()}/{split}.{l}" for l in self.cfg.labels] + + # hubert v1: pad_audio=True, random_crop=False; + self.datasets[split] = HubertDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + ) + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size(self, indices: np.array, *args, **kwargs) -> np.array: + return indices diff --git a/fairseq/fairseq/tasks/language_modeling.py b/fairseq/fairseq/tasks/language_modeling.py new file mode 100644 index 0000000..44d5324 --- /dev/null +++ b/fairseq/fairseq/tasks/language_modeling.py @@ -0,0 +1,383 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +import torch +from fairseq import utils +from fairseq.data import ( + AppendTokenDataset, + Dictionary, + IdDataset, + LMContextWindowDataset, + MonolingualDataset, + NestedDictionaryDataset, + NumelDataset, + PadDataset, + PrependTokenDataset, + StripTokenDataset, + TokenBlockDataset, + TruncatedDictionary, + data_utils, +) +from fairseq.data.indexed_dataset import get_available_dataset_impl +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import LegacyFairseqTask, register_task +from omegaconf import II + + +SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"]) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) +logger = logging.getLogger(__name__) + + +@dataclass +class LanguageModelingConfig(FairseqDataclass): + data: Optional[str] = field( + default=None, metadata={"help": "path to data directory"} + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="none", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + tokens_per_sample: int = field( + default=1024, + metadata={"help": "max number of tokens per sample for LM dataset"}, + ) + output_dictionary_size: int = field( + default=-1, metadata={"help": "limit the size of output dictionary"} + ) + self_target: bool = field(default=False, metadata={"help": "include self target"}) + future_target: bool = field( + default=False, metadata={"help": "include future target"} + ) + past_target: bool = field(default=False, metadata={"help": "include past target"}) + add_bos_token: bool = field( + default=False, metadata={"help": "prepend beginning of sentence token (<s>)"} + ) + max_target_positions: Optional[int] = field( + default=None, metadata={"help": "max number of tokens in the target sequence"} + ) + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + pad_to_fixed_length: Optional[bool] = field( + default=False, + metadata={"help": "pad to fixed length"}, + ) + pad_to_fixed_bsz: Optional[bool] = field( + default=False, + metadata={"help": "boolean to pad to fixed batch size"}, + ) + + # TODO common vars below add to parent + seed: int = II("common.seed") + batch_size: Optional[int] = II("dataset.batch_size") + batch_size_valid: Optional[int] = II("dataset.batch_size_valid") + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + data_buffer_size: int = II("dataset.data_buffer_size") + tpu: bool = II("common.tpu") + use_plasma_view: bool = II("common.use_plasma_view") + plasma_path: str = II("common.plasma_path") + + +@register_task("language_modeling", dataclass=LanguageModelingConfig) +class LanguageModelingTask(LegacyFairseqTask): + """ + Train a language model. + + Args: + dictionary (~fairseq.data.Dictionary): the dictionary for the input of + the language model + output_dictionary (~fairseq.data.Dictionary): the dictionary for the + output of the language model. In most cases it will be the same as + *dictionary*, but could possibly be a more limited version of the + dictionary (if ``--output-dictionary-size`` is used). + targets (List[str]): list of the target types that the language model + should predict. Can be one of "self", "future", and "past". + Defaults to "future". + + .. note:: + + The language modeling task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate`, :mod:`fairseq-interactive` and + :mod:`fairseq-eval-lm`. + + The language modeling task provides the following additional command-line + arguments: + + .. argparse:: + :ref: fairseq.tasks.language_modeling_parser + :prog: + """ + + def __init__(self, args, dictionary, output_dictionary=None, targets=None): + super().__init__(args) + self.dictionary = dictionary + self.output_dictionary = output_dictionary or dictionary + + if targets is None: + targets = ["future"] + self.targets = targets + + @classmethod + def setup_dictionary(cls, args, **kwargs): + dictionary = None + output_dictionary = None + if args.data: + paths = utils.split_paths(args.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + output_dictionary = dictionary + if args.output_dictionary_size >= 0: + output_dictionary = TruncatedDictionary( + dictionary, args.output_dictionary_size + ) + return (dictionary, output_dictionary) + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + dictionary, output_dictionary = cls.setup_dictionary(args, **kwargs) + + # upgrade old checkpoints + if getattr(args, "exclude_self_target", False): + args.self_target = False + + targets = [] + if getattr(args, "self_target", False): + targets.append("self") + if getattr(args, "future_target", False): + targets.append("future") + if getattr(args, "past_target", False): + targets.append("past") + if len(targets) == 0: + # standard language modeling + targets = ["future"] + + return cls(args, dictionary, output_dictionary, targets=targets) + + def build_model(self, args, from_checkpoint=False): + model = super().build_model(args, from_checkpoint) + for target in self.targets: + if target not in model.supported_targets: + raise ValueError( + "Unsupported language modeling target: {}".format(target) + ) + + return model + + def load_dataset( + self, split: str, epoch=1, combine=False, **kwargs + ) -> MonolingualDataset: + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, valid1, test) + """ + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + # each process has its own copy of the raw data (likely to be an np.memmap) + dataset = data_utils.load_indexed_dataset( + split_path, self.dictionary, self.args.dataset_impl, combine=combine + ) + if dataset is None: + raise FileNotFoundError(f"Dataset not found: {split} ({split_path})") + + dataset = maybe_shorten_dataset( + dataset, + split, + self.args.shorten_data_split_list, + self.args.shorten_method, + self.args.tokens_per_sample, + self.args.seed, + ) + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.args.tokens_per_sample, + pad=self.dictionary.pad(), + eos=self.dictionary.eos(), + break_mode=self.args.sample_break_mode, + include_targets=True, + use_plasma_view=self.args.use_plasma_view, + split_path=split_path, + plasma_path=self.args.plasma_path, + ) + + add_eos_for_other_targets = ( + self.args.sample_break_mode is not None + and self.args.sample_break_mode != "none" + ) + fixed_pad_length = None + if self.args.pad_to_fixed_length: + fixed_pad_length = self.args.tokens_per_sample + + pad_to_bsz = None + if self.args.pad_to_fixed_bsz: + pad_to_bsz = ( + self.args.batch_size_valid if "valid" in split else self.args.batch_size + ) + + self.datasets[split] = MonolingualDataset( + dataset=dataset, + sizes=dataset.sizes, + src_vocab=self.dictionary, + tgt_vocab=self.output_dictionary, + add_eos_for_other_targets=add_eos_for_other_targets, + shuffle=True, + targets=self.targets, + add_bos_token=self.args.add_bos_token, + fixed_pad_length=fixed_pad_length, + pad_to_bsz=pad_to_bsz, + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + """ + Generate batches for inference. We prepend an eos token to src_tokens + (or bos if `--add-bos-token` is set) and we append a <pad> to target. + This is convenient both for generation with a prefix and LM scoring. + """ + dataset = StripTokenDataset( + TokenBlockDataset( + src_tokens, + src_lengths, + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ), + # remove eos from (end of) target sequence + self.source_dictionary.eos(), + ) + src_dataset = PrependTokenDataset( + dataset, + token=( + self.source_dictionary.bos() + if getattr(self.args, "add_bos_token", False) + else self.source_dictionary.eos() + ), + ) + tgt_dataset = AppendTokenDataset(dataset, token=self.source_dictionary.pad()) + return NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": PadDataset( + src_dataset, + pad_idx=self.source_dictionary.pad(), + left_pad=False, + ), + "src_lengths": NumelDataset(src_dataset, reduce=False), + }, + "target": PadDataset( + tgt_dataset, pad_idx=self.source_dictionary.pad(), left_pad=False + ), + }, + sizes=[np.array(src_lengths)], + ) + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + # Generation will always be conditioned on bos_token + if getattr(self.args, "add_bos_token", False): + bos_token = self.source_dictionary.bos() + else: + bos_token = self.source_dictionary.eos() + + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the language_modeling task is not supported" + ) + + # SequenceGenerator doesn't use src_tokens directly, we need to + # pass the `prefix_tokens` argument instead + if prefix_tokens is None and sample["net_input"]["src_tokens"].nelement(): + prefix_tokens = sample["net_input"]["src_tokens"] + if prefix_tokens[:, 0].eq(bos_token).all(): + prefix_tokens = prefix_tokens[:, 1:] + + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token + ) + + def eval_lm_dataloader( + self, + dataset, + max_tokens: Optional[int] = 36000, + batch_size: Optional[int] = None, + max_positions: Optional[int] = None, + num_shards: int = 1, + shard_id: int = 0, + num_workers: int = 1, + data_buffer_size: int = 10, + # ensures that every evaluated token has access to a context of at least + # this size, if possible + context_window: int = 0, + ): + if context_window > 0: + dataset = LMContextWindowDataset( + dataset=dataset, + tokens_per_sample=self.args.tokens_per_sample, + context_window=context_window, + pad_idx=self.source_dictionary.pad(), + ) + return self.get_batch_iterator( + dataset=dataset, + max_tokens=max_tokens, + max_sentences=batch_size, + max_positions=max_positions, + ignore_invalid_inputs=True, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + data_buffer_size=data_buffer_size, + ).next_epoch_itr(shuffle=False) + + @property + def source_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.dictionary + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.output_dictionary diff --git a/fairseq/fairseq/tasks/legacy_masked_lm.py b/fairseq/fairseq/tasks/legacy_masked_lm.py new file mode 100644 index 0000000..9754976 --- /dev/null +++ b/fairseq/fairseq/tasks/legacy_masked_lm.py @@ -0,0 +1,152 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import logging +import os + +import numpy as np +from fairseq import tokenizer, utils +from fairseq.data import ConcatDataset, Dictionary, data_utils, indexed_dataset +from fairseq.data.legacy.block_pair_dataset import BlockPairDataset +from fairseq.data.legacy.masked_lm_dataset import MaskedLMDataset +from fairseq.data.legacy.masked_lm_dictionary import BertDictionary +from fairseq.tasks import LegacyFairseqTask, register_task + + +logger = logging.getLogger(__name__) + + +@register_task("legacy_masked_lm") +class LegacyMaskedLMTask(LegacyFairseqTask): + """ + Task for training Masked LM (BERT) model. + Args: + dictionary (Dictionary): the dictionary for the input of the task + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "data", + help="colon separated path to data directories list, \ + will be iterated upon during epochs in round-robin manner", + ) + parser.add_argument( + "--tokens-per-sample", + default=512, + type=int, + help="max number of total tokens over all segments" + " per sample for BERT dataset", + ) + parser.add_argument( + "--break-mode", default="doc", type=str, help="mode for breaking sentence" + ) + parser.add_argument("--shuffle-dataset", action="store_true", default=False) + + def __init__(self, args, dictionary): + super().__init__(args) + self.dictionary = dictionary + self.seed = args.seed + + @classmethod + def load_dictionary(cls, filename): + return BertDictionary.load(filename) + + @classmethod + def build_dictionary( + cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8 + ): + d = BertDictionary() + for filename in filenames: + Dictionary.add_file_to_dictionary( + filename, d, tokenizer.tokenize_line, workers + ) + d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor) + return d + + @property + def target_dictionary(self): + return self.dictionary + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task.""" + paths = utils.split_paths(args.data) + assert len(paths) > 0 + dictionary = BertDictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + + return cls(args, dictionary) + + def load_dataset(self, split, epoch=1, combine=False): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + loaded_datasets = [] + + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + logger.info("data_path", data_path) + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + path = os.path.join(data_path, split_k) + ds = indexed_dataset.make_dataset( + path, + impl=self.args.dataset_impl, + fix_lua_indexing=True, + dictionary=self.dictionary, + ) + + if ds is None: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + with data_utils.numpy_seed(self.seed + k): + loaded_datasets.append( + BlockPairDataset( + ds, + self.dictionary, + ds.sizes, + self.args.tokens_per_sample, + break_mode=self.args.break_mode, + doc_break_size=1, + ) + ) + + logger.info( + "{} {} {} examples".format(data_path, split_k, len(loaded_datasets[-1])) + ) + + if not combine: + break + + if len(loaded_datasets) == 1: + dataset = loaded_datasets[0] + sizes = dataset.sizes + else: + dataset = ConcatDataset(loaded_datasets) + sizes = np.concatenate([ds.sizes for ds in loaded_datasets]) + + self.datasets[split] = MaskedLMDataset( + dataset=dataset, + sizes=sizes, + vocab=self.dictionary, + pad_idx=self.dictionary.pad(), + mask_idx=self.dictionary.mask(), + classif_token_idx=self.dictionary.cls(), + sep_token_idx=self.dictionary.sep(), + shuffle=self.args.shuffle_dataset, + seed=self.seed, + ) diff --git a/fairseq/fairseq/tasks/masked_lm.py b/fairseq/fairseq/tasks/masked_lm.py new file mode 100644 index 0000000..b064907 --- /dev/null +++ b/fairseq/fairseq/tasks/masked_lm.py @@ -0,0 +1,327 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field + +import numpy as np +from omegaconf import II, MISSING, OmegaConf + +from fairseq import utils +from fairseq.data import ( + Dictionary, + IdDataset, + MaskTokensDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + PrependTokenDataset, + RightPadDataset, + RightPaddingMaskDataset, + SortDataset, + TokenBlockDataset, + data_utils, +) +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.dataclass import FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +from .language_modeling import SAMPLE_BREAK_MODE_CHOICES, SHORTEN_METHOD_CHOICES + +logger = logging.getLogger(__name__) + + +@dataclass +class MaskedLMConfig(FairseqDataclass): + data: str = field( + default=MISSING, + metadata={ + "help": "colon separated path to data directories list, \ + will be iterated upon during epochs in round-robin manner" + }, + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="none", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + tokens_per_sample: int = field( + default=1024, + metadata={"help": "max number of tokens per sample for LM dataset"}, + ) + mask_prob: float = field( + default=0.15, + metadata={"help": "probability of replacing a token with mask"}, + ) + leave_unmasked_prob: float = field( + default=0.1, + metadata={"help": "probability that a masked token is unmasked"}, + ) + random_token_prob: float = field( + default=0.1, + metadata={"help": "probability of replacing a token with a random token"}, + ) + freq_weighted_replacement: bool = field( + default=False, + metadata={"help": "sample random replacement words based on word frequencies"}, + ) + mask_whole_words: bool = field( + default=False, + metadata={"help": "mask whole words; you may also want to set --bpe"}, + ) + mask_multiple_length: int = field( + default=1, + metadata={"help": "repeat the mask indices multiple times"}, + ) + mask_stdev: float = field( + default=0.0, + metadata={"help": "stdev of the mask length"}, + ) + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + seed: int = II("common.seed") + + include_target_tokens: bool = field( + default=False, + metadata={ + "help": "include target tokens in model input. this is used for data2vec" + }, + ) + include_index: bool = field( + default=True, + metadata={"help": "include index in model input. this is used for data2vec"}, + ) + skip_masking: bool = field( + default=False, + metadata={"help": "skip masking at dataset"}, + ) + # subsample_train: float = field( + # default=1, + # metadata={"help": "shorten training set for debugging"}, + # ) + d2v2_multi: bool = field( + default=False, + metadata={"help": "prepare dataset for data2vec_multi"}, + ) + + +@register_task("masked_lm", dataclass=MaskedLMConfig) +class MaskedLMTask(FairseqTask): + + cfg: MaskedLMConfig + + """Task for training masked language models (e.g., BERT, RoBERTa).""" + + def __init__(self, cfg: MaskedLMConfig, dictionary=None): + super().__init__(cfg) + self.dictionary = dictionary or self.load_dict(cfg) + + # add mask token + self.mask_idx = self.dictionary.add_symbol("<mask>") + + @classmethod + def setup_task(cls, cfg: MaskedLMConfig, **kwargs): + dictionary = cls.load_dict(cfg) + return cls(cfg, dictionary) + + @classmethod + def load_dict(cls, cfg): + paths = utils.split_paths(cfg.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + return dictionary + + def _load_dataset_split(self, split, epoch, combine): + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.source_dictionary, + combine=combine, + ) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + dataset = maybe_shorten_dataset( + dataset, + split, + self.cfg.shorten_data_split_list, + self.cfg.shorten_method, + self.cfg.tokens_per_sample, + self.cfg.seed, + ) + + # create continuous blocks of tokens + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.cfg.tokens_per_sample - 1, # one less for <s> + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode=self.cfg.sample_break_mode, + ) + logger.info("loaded {} blocks from: {}".format(len(dataset), split_path)) + + # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT) + return PrependTokenDataset(dataset, self.source_dictionary.bos()) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + dataset = self._load_dataset_split(split, epoch, combine) + + # create masked input and targets + mask_whole_words = ( + get_whole_word_mask(self.args, self.source_dictionary) + if self.cfg.mask_whole_words + else None + ) + + src_dataset, tgt_dataset = MaskTokensDataset.apply_mask( + dataset, + self.source_dictionary, + pad_idx=self.source_dictionary.pad(), + mask_idx=self.mask_idx, + seed=self.cfg.seed, + mask_prob=self.cfg.mask_prob, + leave_unmasked_prob=self.cfg.leave_unmasked_prob, + random_token_prob=self.cfg.random_token_prob, + freq_weighted_replacement=self.cfg.freq_weighted_replacement, + mask_whole_words=mask_whole_words, + mask_multiple_length=self.cfg.mask_multiple_length, + mask_stdev=self.cfg.mask_stdev, + skip_masking=self.cfg.skip_masking, + ) + + with data_utils.numpy_seed(self.cfg.seed): + shuffle = np.random.permutation(len(src_dataset)) + + target_dataset = RightPadDataset( + tgt_dataset, + pad_idx=self.source_dictionary.pad(), + ) + + if self.cfg.d2v2_multi: + dataset = self._d2v2_multi_dataset(src_dataset) + else: + dataset = self._regular_dataset(src_dataset, target_dataset) + + self.datasets[split] = SortDataset( + dataset, sort_order=[shuffle, src_dataset.sizes] + ) + + def _regular_dataset(self, src_dataset, target_dataset): + input_dict = { + "src_tokens": RightPadDataset( + src_dataset, + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": NumelDataset(src_dataset, reduce=False), + } + if self.cfg.include_target_tokens: + input_dict["target_tokens"] = target_dataset + if self.cfg.include_index: + input_dict["src_id"] = IdDataset() + + dataset = NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": input_dict, + "target": target_dataset, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_dataset, reduce=True), + }, + sizes=[src_dataset.sizes], + ) + return dataset + + def _d2v2_multi_dataset(self, src_dataset): + input_dict = { + "source": RightPadDataset( + src_dataset, + pad_idx=self.source_dictionary.pad(), + ), + "id": IdDataset(), + "padding_mask": RightPaddingMaskDataset(src_dataset), + } + + dataset = NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": input_dict, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_dataset, reduce=True), + }, + sizes=[src_dataset.sizes], + ) + return dataset + + def build_dataset_for_inference(self, src_tokens, src_lengths, sort=True): + src_dataset = RightPadDataset( + TokenBlockDataset( + src_tokens, + src_lengths, + self.cfg.tokens_per_sample - 1, # one less for <s> + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ), + pad_idx=self.source_dictionary.pad(), + ) + src_dataset = PrependTokenDataset(src_dataset, self.source_dictionary.bos()) + src_dataset = NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": src_dataset, + "src_lengths": NumelDataset(src_dataset, reduce=False), + }, + }, + sizes=src_lengths, + ) + if sort: + src_dataset = SortDataset(src_dataset, sort_order=[src_lengths]) + return src_dataset + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + def begin_epoch(self, epoch, model): + model.set_epoch(epoch) + + def max_positions(self): + return self.cfg.tokens_per_sample diff --git a/fairseq/fairseq/tasks/multilingual_denoising.py b/fairseq/fairseq/tasks/multilingual_denoising.py new file mode 100644 index 0000000..cb5ee34 --- /dev/null +++ b/fairseq/fairseq/tasks/multilingual_denoising.py @@ -0,0 +1,268 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +from omegaconf import II + +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + DenoisingDataset, + Dictionary, + PrependTokenDataset, + ResamplingDataset, + SortDataset, + TokenBlockDataset, + data_utils, +) +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.tasks import register_task + +from .denoising import DenoisingConfig, DenoisingTask + +logger = logging.getLogger(__name__) + + +@dataclass +class MultilingualDenoisingConfig(DenoisingConfig): + multilang_sampling_alpha: float = field( + default=1.0, + metadata={"help": "smoothing alpha for sample ratios across multiple datasets"}, + ) + add_lang_token: bool = field( + default=False, + metadata={"help": ""}, + ) + langs: Optional[str] = field( + default=None, + metadata={"help": "language ids we are considering"}, + ) + no_whole_word_mask_langs: str = field( + default="", + metadata={ + "help": "languages without spacing between words don't support whole word masking" + }, + ) + train_subset: str = II("common.train_subset") + valid_subset: str = II("common.valid_subset") + + +@register_task("multilingual_denoising", dataclass=MultilingualDenoisingConfig) +class MultilingualDenoisingTask(DenoisingTask): + + cfg: MultilingualDenoisingConfig + + @classmethod + def setup_task(cls, cfg: MultilingualDenoisingConfig, **kwargs): + """Setup the task.""" + paths = cfg.data.split(":") + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + + data_path = paths[0] + if cfg.langs is None: + languages = sorted( + [ + name + for name in os.listdir(data_path) + if os.path.isdir(os.path.join(data_path, name)) + ] + ) + else: + languages = cfg.langs.split(",") + + if cfg.add_lang_token: + for lang in languages: + dictionary.add_symbol("[{}]".format(lang)) + + logger.info("dictionary: {} types".format(len(dictionary))) + if not hasattr(cfg, "shuffle_instance"): + cfg.shuffle_instance = False + return cls(cfg, dictionary) + + def __init__(self, cfg: MultilingualDenoisingConfig, dictionary): + super().__init__(cfg, dictionary) + self.dictionary = dictionary + + # add mask token + self.mask_idx = self.dictionary.add_symbol("<mask>") + self.cfg = cfg + + def _get_sample_prob(self, dataset_lens): + """ + Get smoothed sampling probability by languages. This helps low resource + languages by upsampling them. + """ + prob = dataset_lens / dataset_lens.sum() + smoothed_prob = prob**self.cfg.multilang_sampling_alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + return smoothed_prob + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = self.cfg.data.split(":") + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + if self.cfg.langs is None: + languages = sorted( + [ + name + for name in os.listdir(data_path) + if os.path.isdir(os.path.join(data_path, name)) + ] + ) + else: + languages = self.cfg.langs.split(",") + for name in languages: + p = os.path.join(data_path, name) + assert os.path.exists(p), "data not found: {}".format(p) + + logger.info("Training on {0} languages: {1}".format(len(languages), languages)) + logger.info( + "Language to id mapping: ", {lang: id for id, lang in enumerate(languages)} + ) + + mask_whole_words = get_whole_word_mask(self.cfg.bpe, self.dictionary) + language_without_segmentations = self.cfg.no_whole_word_mask_langs.split(",") + lang_datasets = [] + for language in languages: + split_path = os.path.join(data_path, language, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.source_dictionary, + self.cfg.dataset_impl, + combine=combine, + ) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + end_token = ( + self.source_dictionary.index("[{}]".format(language)) + if self.cfg.add_lang_token + else self.source_dictionary.eos() + ) + + # create continuous blocks of tokens + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.cfg.tokens_per_sample - 2, # one less for <s> + pad=self.source_dictionary.pad(), + eos=end_token, + break_mode=self.cfg.sample_break_mode, + ) + logger.info("loaded {} blocks from: {}".format(len(dataset), split_path)) + + # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT) + dataset = PrependTokenDataset(dataset, self.source_dictionary.bos()) + dataset = AppendTokenDataset(dataset, end_token) + + lang_mask_whole_words = ( + mask_whole_words + if language not in language_without_segmentations + else None + ) + lang_dataset = DenoisingDataset( + dataset, + dataset.sizes, + self.dictionary, + self.mask_idx, + lang_mask_whole_words, + shuffle=self.cfg.shuffle_instance, + seed=self.cfg.seed, + mask=self.cfg.mask, + mask_random=self.cfg.mask_random, + insert=self.cfg.insert, + rotate=self.cfg.rotate, + permute_sentences=self.cfg.permute_sentences, + bpe=self.cfg.bpe, + replace_length=self.cfg.replace_length, + mask_length=self.cfg.mask_length, + poisson_lambda=self.cfg.poisson_lambda, + eos=None + if not self.cfg.add_lang_token + else self.source_dictionary.index("[{}]".format(language)), + ) + lang_datasets.append(lang_dataset) + + dataset_lengths = np.array( + [len(d) for d in lang_datasets], + dtype=float, + ) + logger.info( + "loaded total {} blocks for all languages".format( + int(dataset_lengths.sum()), + ) + ) + if split == self.cfg.train_subset: + # For train subset, additionally up or down sample languages. + sample_probs = self._get_sample_prob(dataset_lengths) + logger.info( + "Sample probability by language: {}".format( + { + lang: "{0:.4f}".format(sample_probs[id]) + for id, lang in enumerate(languages) + } + ) + ) + size_ratio = (sample_probs * dataset_lengths.sum()) / dataset_lengths + logger.info( + "Up/Down Sampling ratio by language: {}".format( + { + lang: "{0:.2f}".format(size_ratio[id]) + for id, lang in enumerate(languages) + } + ) + ) + + resampled_lang_datasets = [ + ResamplingDataset( + lang_datasets[i], + size_ratio=size_ratio[i], + seed=self.cfg.seed, + epoch=epoch, + replace=size_ratio[i] >= 1.0, + ) + for i, d in enumerate(lang_datasets) + ] + dataset = ConcatDataset( + resampled_lang_datasets, + ) + else: + dataset = ConcatDataset(lang_datasets) + lang_splits = [split] + for lang_id, lang_dataset in enumerate(lang_datasets): + split_name = split + "_" + languages[lang_id] + lang_splits.append(split_name) + self.datasets[split_name] = lang_dataset + + if split in self.cfg.valid_subset: + self.cfg.valid_subset = self.cfg.valid_subset.replace( + split, ",".join(lang_splits) + ) + + with data_utils.numpy_seed(self.cfg.seed + epoch): + shuffle = np.random.permutation(len(dataset)) + + self.datasets[split] = SortDataset( + dataset, + sort_order=[ + shuffle, + dataset.sizes, + ], + ) diff --git a/fairseq/fairseq/tasks/multilingual_language_modeling.py b/fairseq/fairseq/tasks/multilingual_language_modeling.py new file mode 100644 index 0000000..8fd5e59 --- /dev/null +++ b/fairseq/fairseq/tasks/multilingual_language_modeling.py @@ -0,0 +1,627 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +import torch +from omegaconf import II + +from fairseq import utils +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + Dictionary, + IdDataset, + LMContextWindowDataset, + MonolingualDataset, + NestedDictionaryDataset, + NumelDataset, + PadDataset, + PrependTokenDataset, + ResamplingDataset, + SortDataset, + StripTokenDataset, + TokenBlockDataset, + TruncatedDictionary, + data_utils, +) +from fairseq.data.indexed_dataset import get_available_dataset_impl +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import LegacyFairseqTask, register_task + +SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"]) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) +logger = logging.getLogger(__name__) + + +def lang_token(lang): + return f"<{lang}>" + + +@dataclass +class MultilingualLanguageModelingConfig(FairseqDataclass): + # TODO common var add to parent + data: Optional[str] = field( + default=None, metadata={"help": "path to data directory"} + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="none", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + tokens_per_sample: int = field( + default=1024, + metadata={"help": "max number of tokens per sample for LM dataset"}, + ) + output_dictionary_size: int = field( + default=-1, metadata={"help": "limit the size of output dictionary"} + ) + self_target: bool = field(default=False, metadata={"help": "include self target"}) + future_target: bool = field( + default=False, metadata={"help": "include future target"} + ) + past_target: bool = field(default=False, metadata={"help": "include past target"}) + add_bos_token: bool = field( + default=False, metadata={"help": "prepend lang id token <dialect>"} + ) + max_source_positions: Optional[int] = field( + default=None, metadata={"help": "max number of tokens in the source sequence"} + ) + max_target_positions: Optional[int] = field( + default=None, metadata={"help": "max number of tokens in the target sequence"} + ) + pad_to_fixed_length: Optional[bool] = field( + default=False, metadata={"help": "pad to fixed length"} + ) + pad_to_fixed_bsz: Optional[bool] = field( + default=False, metadata={"help": "boolean to pad to fixed batch size"} + ) + + multilang_sampling_alpha: Optional[float] = field( + default=1.0, + metadata={ + "help": "smoothing alpha for sample rations across multiple datasets" + }, + ) + + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + + langs: str = field( + default="", + metadata={ + "help": "comma-separated list of languages (default: all directories in data path)" + }, + ) + baseline_model_langs: str = field( + default="", + metadata={ + "help": "comma-separated list of languages in the baseline model (default: none)" + }, + ) + # TODO: legacy parameter kept for compatibility + baseline_model: str = field( + default="", + metadata={"help": "path to the baseline model (default: none)"}, + ) + + lang_to_offline_shard_ratio: str = field( + default="", + metadata={ + "help": "absolute path of tsv file location to indicate lang to offline shard ratio.", + }, + ) + # TODO common vars below add to parent + seed: int = II("common.seed") + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + data_buffer_size: int = II("dataset.data_buffer_size") + tpu: bool = II("common.tpu") + batch_size: Optional[int] = II("dataset.batch_size") + batch_size_valid: Optional[int] = II("dataset.batch_size_valid") + train_subset: str = II("common.train_subset") + valid_subset: str = II("common.valid_subset") + + +@register_task( + "multilingual_language_modeling", dataclass=MultilingualLanguageModelingConfig +) +class MultilingualLanguageModelingTask(LegacyFairseqTask): + """ + Train a language model. + + Args: + dictionary (~fairseq.data.Dictionary): the dictionary for the input of + the language model + output_dictionary (~fairseq.data.Dictionary): the dictionary for the + output of the language model. In most cases it will be the same as + *dictionary*, but could possibly be a more limited version of the + dictionary (if ``--output-dictionary-size`` is used). + targets (List[str]): list of the target types that the language model + should predict. Can be one of "self", "future", and "past". + Defaults to "future". + + .. note:: + + The language modeling task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate`, :mod:`fairseq-interactive` and + :mod:`fairseq-eval-lm`. + + The language modeling task provides the following additional command-line + arguments: + + .. argparse:: + :ref: fairseq.tasks.language_modeling_parser + :prog: + """ + + def __init__(self, args, dictionary, output_dictionary=None, targets=None): + super().__init__(args) + self.dictionary = dictionary + self.output_dictionary = output_dictionary or dictionary + + if targets is None: + targets = ["future"] + self.targets = targets + + @staticmethod + def _get_langs(args, epoch=1): + paths = utils.split_paths(args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + languages = sorted( + name + for name in os.listdir(data_path) + if os.path.isdir(os.path.join(data_path, name)) + ) + if args.langs: + keep_langs = set(args.langs.split(",")) + languages = [lang for lang in languages if lang in keep_langs] + assert len(languages) == len(keep_langs) + + return languages, data_path + + @classmethod + def setup_dictionary(cls, args, **kwargs): + dictionary = None + output_dictionary = None + if args.data: + paths = utils.split_paths(args.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + if args.add_bos_token: + languages, _ = cls._get_langs(args) + logger.info("----------------") + for lang in languages: + dictionary.add_symbol(lang_token(lang)) + logger.info(f"add language token: {lang_token(lang)}") + logger.info("----------------") + + logger.info("dictionary: {} types".format(len(dictionary))) + output_dictionary = dictionary + if args.output_dictionary_size >= 0: + output_dictionary = TruncatedDictionary( + dictionary, args.output_dictionary_size + ) + return (dictionary, output_dictionary) + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + dictionary, output_dictionary = cls.setup_dictionary(args, **kwargs) + + # upgrade old checkpoints + if hasattr(args, "exclude_self_target"): + args.self_target = not args.exclude_self_target + + targets = [] + if getattr(args, "self_target", False): + targets.append("self") + if getattr(args, "future_target", False): + targets.append("future") + if getattr(args, "past_target", False): + targets.append("past") + if len(targets) == 0: + # standard language modeling + targets = ["future"] + + return cls(args, dictionary, output_dictionary, targets=targets) + + def build_model(self, args, from_checkpoint=False): + model = super().build_model(args, from_checkpoint) + for target in self.targets: + if target not in model.supported_targets: + raise ValueError( + f"Unsupported language modeling target: {target} not in {model.supported_targets}" + ) + + return model + + def _get_sample_prob(self, dataset_lens): + """ + Get smoothed sampling porbability by languages. This helps low resource + languages by upsampling them. + """ + prob = dataset_lens / dataset_lens.sum() + smoothed_prob = prob**self.args.multilang_sampling_alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + return smoothed_prob + + def load_dataset(self, split: str, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + languages, data_path = MultilingualLanguageModelingTask._get_langs( + self.args, epoch + ) + lang_to_offline_shard_ratio = None + if self.args.lang_to_offline_shard_ratio != "": + lang_to_offline_shard_ratio = {} + assert os.path.exists( + self.args.lang_to_offline_shard_ratio + ), "provided offline shard ratio file doesn't exist: {0}".format( + self.args.lang_to_offline_shard_ratio + ) + with open(self.args.lang_to_offline_shard_ratio) as fin: + for line in fin: + lang, ratio = line.strip().split("\t") + ratio = float(ratio) + lang_to_offline_shard_ratio[lang] = ratio + + logger.info( + "Found offline sharded ratio: %s", + lang_to_offline_shard_ratio, + ) + + if split == self.args.train_subset: + logger.info( + "Training on {0} languages: {1}".format(len(languages), languages) + ) + else: + logger.info( + "Evaluating on {0} languages: {1}".format(len(languages), languages) + ) + + tokens_per_sample = self.args.tokens_per_sample - int(self.args.add_bos_token) + + fixed_pad_length = None + if self.args.pad_to_fixed_length: + fixed_pad_length = self.args.tokens_per_sample + + pad_to_bsz = None + if self.args.pad_to_fixed_bsz: + pad_to_bsz = ( + self.args.batch_size_valid if "valid" in split else self.args.batch_size + ) + + lang_datasets = [] + for lang_id, language in enumerate(languages): + split_path = os.path.join(data_path, language, split) + dataset = data_utils.load_indexed_dataset( + split_path, self.dictionary, self.args.dataset_impl, combine=combine + ) + # print('len(dataset) =', len(dataset)) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + dataset = maybe_shorten_dataset( + dataset, + split, + self.args.shorten_data_split_list, + self.args.shorten_method, + tokens_per_sample, + self.args.seed, + ) + + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + tokens_per_sample, + pad=self.dictionary.pad(), + eos=self.dictionary.eos(), + break_mode=self.args.sample_break_mode, + include_targets=True, + ) + + add_eos_for_other_targets = ( + self.args.sample_break_mode is not None + and self.args.sample_break_mode != "none" + ) + src_lang_idx, tgt_lang_idx = None, None + if self.args.add_bos_token: + src_lang_idx = self.dictionary.index(lang_token(language)) + tgt_lang_idx = self.output_dictionary.index(lang_token(language)) + + lang_datasets.append( + MonolingualDataset( + dataset=dataset, + sizes=dataset.sizes, + src_vocab=self.dictionary, + tgt_vocab=self.output_dictionary, + add_eos_for_other_targets=add_eos_for_other_targets, + shuffle=True, + targets=self.targets, + fixed_pad_length=fixed_pad_length, + pad_to_bsz=pad_to_bsz, + add_bos_token=self.args.add_bos_token, + src_lang_idx=src_lang_idx, + tgt_lang_idx=tgt_lang_idx, + ) + ) + + dataset_lengths = np.array( + [len(d) for d in lang_datasets], + dtype=float, + ) + logger.info( + "loaded total {} blocks for all languages".format( + dataset_lengths.sum(), + ) + ) + if split == self.args.train_subset: + dataset_lengths_ratio_multiplier = np.ones(len(dataset_lengths)) + if lang_to_offline_shard_ratio is not None: + dataset_lengths_ratio_multiplier = [] + for lang in languages: + assert ( + lang in lang_to_offline_shard_ratio + ), "Lang: {0} missing in offline shard ratio file: {1}".format( + lang, + self.args.lang_to_offline_shard_ratio, + ) + dataset_lengths_ratio_multiplier.append( + lang_to_offline_shard_ratio[lang] + ) + dataset_lengths_ratio_multiplier = np.array( + dataset_lengths_ratio_multiplier + ) + true_dataset_lengths = ( + dataset_lengths * dataset_lengths_ratio_multiplier + ) + else: + true_dataset_lengths = dataset_lengths + # For train subset, additionally up or down sample languages. + sample_probs = self._get_sample_prob(true_dataset_lengths) + + logger.info( + "Sample probability by language: %s", + { + lang: "{0:.4f}".format(sample_probs[id]) + for id, lang in enumerate(languages) + }, + ) + size_ratio = (sample_probs * true_dataset_lengths.sum()) / dataset_lengths + # TODO: add an option for shrinking all size ratios to below 1 + # if self.args.multilang_sampling_alpha != 1: + # size_ratio /= size_ratio.max() + + # Fix numeric errors in size ratio computation + # 0.999999999999999999 -> 1 + # 1.000000000000000002 -> 1 + for i in range(len(size_ratio)): + size_ratio[i] = round(size_ratio[i], 8) + + logger.info( + "Up/Down Sampling ratio by language: %s", + { + lang: "{0:.2f}".format(size_ratio[id]) + for id, lang in enumerate(languages) + }, + ) + logger.info( + "Actual dataset size by language: %s", + { + lang: "{0:.2f}".format(len(lang_datasets[id])) + for id, lang in enumerate(languages) + }, + ) + resampled_lang_datasets = [ + ResamplingDataset( + lang_datasets[i], + size_ratio=size_ratio[i], + seed=self.args.seed, + epoch=epoch, + replace=size_ratio[i] > 1.0, + ) + for i, d in enumerate(lang_datasets) + ] + logger.info( + "Resampled dataset size by language: %s", + { + lang: "{0:.2f}".format(len(resampled_lang_datasets[id])) + for id, lang in enumerate(languages) + }, + ) + dataset = ConcatDataset(resampled_lang_datasets) + else: + dataset = ConcatDataset(lang_datasets) + lang_splits = [split] + for lang_id, lang_dataset in enumerate(lang_datasets): + split_name = split + "_" + languages[lang_id] + lang_splits.append(split_name) + self.datasets[split_name] = lang_dataset + + # [TODO]: This is hacky for now to print validation ppl for each + # language individually. Maybe need task API changes to allow it + # in more generic ways. + if split in self.args.valid_subset: + self.args.valid_subset = self.args.valid_subset.replace( + split, ",".join(lang_splits) + ) + + with data_utils.numpy_seed(self.args.seed + epoch): + shuffle = np.random.permutation(len(dataset)) + + self.datasets[split] = SortDataset( + dataset, + sort_order=[ + shuffle, + dataset.sizes, + ], + ) + + def build_dataset_for_inference( + self, src_tokens, src_lengths, language="en_XX", **kwargs + ): + """ + Generate batches for inference. We prepend an eos token to src_tokens + (or bos if `--add-bos-token` is set) and we append a <pad> to target. + This is convenient both for generation with a prefix and LM scoring. + """ + dataset = StripTokenDataset( + TokenBlockDataset( + src_tokens, + src_lengths, + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ), + # remove eos from (end of) target sequence + self.source_dictionary.eos(), + ) + + src_lang_idx = self.dictionary.index(lang_token(language)) + src_dataset = PrependTokenDataset( + dataset, + token=( + (src_lang_idx or self.source_dictionary.bos()) + if getattr(self.args, "add_bos_token", False) + else self.source_dictionary.eos() + ), + ) + + max_seq_len = max(src_lengths) + 1 + tgt_dataset = AppendTokenDataset(dataset, token=self.source_dictionary.pad()) + return NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": PadDataset( + src_dataset, + pad_idx=self.source_dictionary.pad(), + left_pad=False, + pad_length=max_seq_len, + ), + "src_lengths": NumelDataset(src_dataset, reduce=False), + }, + "target": PadDataset( + tgt_dataset, + pad_idx=self.source_dictionary.pad(), + left_pad=False, + pad_length=max_seq_len, + ), + }, + sizes=[np.array(src_lengths)], + ) + + @torch.no_grad() + def inference_step( + self, + generator, + models, + sample, + language="en_XX", + prefix_tokens=None, + constraints=None, + ): + # Generation will always be conditioned on bos_token + if getattr(self.args, "add_bos_token", False): + src_lang_idx = self.dictionary.index(lang_token(language)) + bos_token = src_lang_idx or self.source_dictionary.bos() + else: + bos_token = self.source_dictionary.eos() + + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the language_modeling task is not supported" + ) + + # SequenceGenerator doesn't use src_tokens directly, we need to + # pass the `prefix_tokens` argument instead + if prefix_tokens is None and sample["net_input"]["src_tokens"].nelement(): + prefix_tokens = sample["net_input"]["src_tokens"] + if prefix_tokens[:, 0].eq(bos_token).all(): + prefix_tokens = prefix_tokens[:, 1:] + + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token + ) + + def eval_lm_dataloader( + self, + dataset, + max_tokens: Optional[int] = 36000, + batch_size: Optional[int] = None, + max_positions: Optional[int] = None, + num_shards: int = 1, + shard_id: int = 0, + num_workers: int = 1, + data_buffer_size: int = 10, + # ensures that every evaluated token has access to a context of at least + # this size, if possible + context_window: int = 0, + ): + if context_window > 0: + dataset = LMContextWindowDataset( + dataset=dataset, + tokens_per_sample=self.args.tokens_per_sample, + context_window=context_window, + pad_idx=self.source_dictionary.pad(), + ) + return self.get_batch_iterator( + dataset=dataset, + max_tokens=max_tokens, + max_sentences=batch_size, + max_positions=max_positions, + ignore_invalid_inputs=True, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + data_buffer_size=data_buffer_size, + ) + + @property + def source_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.dictionary + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.output_dictionary diff --git a/fairseq/fairseq/tasks/multilingual_masked_lm.py b/fairseq/fairseq/tasks/multilingual_masked_lm.py new file mode 100644 index 0000000..156d085 --- /dev/null +++ b/fairseq/fairseq/tasks/multilingual_masked_lm.py @@ -0,0 +1,338 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os + +import numpy as np +import torch + +from fairseq import utils +from fairseq.data import ( + ConcatDataset, + Dictionary, + IdDataset, + MaskTokensDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + PadDataset, + PrependTokenDataset, + RawLabelDataset, + ResamplingDataset, + SortDataset, + TokenBlockDataset, + data_utils, + encoders, +) +from fairseq.tasks import LegacyFairseqTask, register_task + +logger = logging.getLogger(__name__) + + +@register_task("multilingual_masked_lm") +class MultiLingualMaskedLMTask(LegacyFairseqTask): + """Task for training masked language models (e.g., BERT, RoBERTa).""" + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument( + "data", + help="colon separated path to data directories list, \ + will be iterated upon during epochs in round-robin manner", + ) + parser.add_argument( + "--sample-break-mode", + default="complete", + choices=["none", "complete", "complete_doc", "eos"], + help='If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.', + ) + parser.add_argument( + "--tokens-per-sample", + default=512, + type=int, + help="max number of total tokens over all segments " + "per sample for BERT dataset", + ) + parser.add_argument( + "--mask-prob", + default=0.15, + type=float, + help="probability of replacing a token with mask", + ) + parser.add_argument( + "--leave-unmasked-prob", + default=0.1, + type=float, + help="probability that a masked token is unmasked", + ) + parser.add_argument( + "--random-token-prob", + default=0.1, + type=float, + help="probability of replacing a token with a random token", + ) + parser.add_argument( + "--freq-weighted-replacement", + action="store_true", + help="sample random replacement words based on word frequencies", + ) + parser.add_argument( + "--mask-whole-words", + default=False, + action="store_true", + help="mask whole words; you may also want to set --bpe", + ) + parser.add_argument( + "--multilang-sampling-alpha", + type=float, + default=1.0, + help="smoothing alpha for sample rations across multiple datasets", + ) + + def __init__(self, args, dictionary): + super().__init__(args) + self.dictionary = dictionary + self.seed = args.seed + + # add mask token + self.mask_idx = dictionary.add_symbol("<mask>") + + @classmethod + def setup_task(cls, args, **kwargs): + paths = utils.split_paths(args.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + return cls(args, dictionary) + + def _get_whole_word_mask(self): + # create masked input and targets + if self.args.mask_whole_words: + bpe = encoders.build_bpe(self.args) + if bpe is not None: + + def is_beginning_of_word(i): + if i < self.source_dictionary.nspecial: + # special elements are always considered beginnings + return True + tok = self.source_dictionary[i] + if tok.startswith("madeupword"): + return True + try: + return bpe.is_beginning_of_word(tok) + except ValueError: + return True + + mask_whole_words = torch.ByteTensor( + list(map(is_beginning_of_word, range(len(self.source_dictionary)))) + ) + else: + mask_whole_words = None + return mask_whole_words + + def _get_sample_prob(self, dataset_lens): + """ + Get smoothed sampling porbability by languages. This helps low resource + languages by upsampling them. + """ + prob = dataset_lens / dataset_lens.sum() + smoothed_prob = prob**self.args.multilang_sampling_alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + return smoothed_prob + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + languages = sorted( + name + for name in os.listdir(data_path) + if os.path.isdir(os.path.join(data_path, name)) + ) + + logger.info("Training on {0} languages: {1}".format(len(languages), languages)) + logger.info( + "Language to id mapping: ", {lang: id for id, lang in enumerate(languages)} + ) + + mask_whole_words = self._get_whole_word_mask() + lang_datasets = [] + for lang_id, language in enumerate(languages): + split_path = os.path.join(data_path, language, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.source_dictionary, + self.args.dataset_impl, + combine=combine, + ) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + # create continuous blocks of tokens + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.args.tokens_per_sample - 1, # one less for <s> + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode=self.args.sample_break_mode, + ) + logger.info("loaded {} blocks from: {}".format(len(dataset), split_path)) + + # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT) + dataset = PrependTokenDataset(dataset, self.source_dictionary.bos()) + + src_dataset, tgt_dataset = MaskTokensDataset.apply_mask( + dataset, + self.source_dictionary, + pad_idx=self.source_dictionary.pad(), + mask_idx=self.mask_idx, + seed=self.args.seed, + mask_prob=self.args.mask_prob, + leave_unmasked_prob=self.args.leave_unmasked_prob, + random_token_prob=self.args.random_token_prob, + freq_weighted_replacement=self.args.freq_weighted_replacement, + mask_whole_words=mask_whole_words, + ) + + lang_dataset = NestedDictionaryDataset( + { + "net_input": { + "src_tokens": PadDataset( + src_dataset, + pad_idx=self.source_dictionary.pad(), + left_pad=False, + ), + "src_lengths": NumelDataset(src_dataset, reduce=False), + }, + "target": PadDataset( + tgt_dataset, + pad_idx=self.source_dictionary.pad(), + left_pad=False, + ), + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_dataset, reduce=True), + "lang_id": RawLabelDataset([lang_id] * src_dataset.sizes.shape[0]), + }, + sizes=[src_dataset.sizes], + ) + lang_datasets.append(lang_dataset) + + dataset_lengths = np.array( + [len(d) for d in lang_datasets], + dtype=float, + ) + logger.info( + "loaded total {} blocks for all languages".format( + dataset_lengths.sum(), + ) + ) + if split == self.args.train_subset: + # For train subset, additionally up or down sample languages. + sample_probs = self._get_sample_prob(dataset_lengths) + logger.info( + "Sample probability by language: ", + { + lang: "{0:.4f}".format(sample_probs[id]) + for id, lang in enumerate(languages) + }, + ) + size_ratio = (sample_probs * dataset_lengths.sum()) / dataset_lengths + logger.info( + "Up/Down Sampling ratio by language: ", + { + lang: "{0:.2f}".format(size_ratio[id]) + for id, lang in enumerate(languages) + }, + ) + + resampled_lang_datasets = [ + ResamplingDataset( + lang_datasets[i], + size_ratio=size_ratio[i], + seed=self.args.seed, + epoch=epoch, + replace=size_ratio[i] >= 1.0, + ) + for i, d in enumerate(lang_datasets) + ] + dataset = ConcatDataset(resampled_lang_datasets) + else: + dataset = ConcatDataset(lang_datasets) + lang_splits = [split] + for lang_id, lang_dataset in enumerate(lang_datasets): + split_name = split + "_" + languages[lang_id] + lang_splits.append(split_name) + self.datasets[split_name] = lang_dataset + + # [TODO]: This is hacky for now to print validation ppl for each + # language individually. Maybe need task API changes to allow it + # in more generic ways. + if split in self.args.valid_subset: + self.args.valid_subset = self.args.valid_subset.replace( + split, ",".join(lang_splits) + ) + + with data_utils.numpy_seed(self.args.seed + epoch): + shuffle = np.random.permutation(len(dataset)) + + self.datasets[split] = SortDataset( + dataset, + sort_order=[ + shuffle, + dataset.sizes, + ], + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, sort=True): + src_dataset = PadDataset( + TokenBlockDataset( + src_tokens, + src_lengths, + self.args.tokens_per_sample - 1, # one less for <s> + pad=self.source_dictionary.pad(), + eos=self.source_dictionary.eos(), + break_mode="eos", + ), + pad_idx=self.source_dictionary.pad(), + left_pad=False, + ) + src_dataset = PrependTokenDataset(src_dataset, self.source_dictionary.bos()) + src_dataset = NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": src_dataset, + "src_lengths": NumelDataset(src_dataset, reduce=False), + }, + }, + sizes=src_lengths, + ) + if sort: + src_dataset = SortDataset(src_dataset, sort_order=[src_lengths]) + return src_dataset + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary diff --git a/fairseq/fairseq/tasks/multilingual_translation.py b/fairseq/fairseq/tasks/multilingual_translation.py new file mode 100644 index 0000000..cef7656 --- /dev/null +++ b/fairseq/fairseq/tasks/multilingual_translation.py @@ -0,0 +1,463 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import logging +import os +from collections import OrderedDict +from argparse import ArgumentError + +import torch +from fairseq import options, utils +from fairseq.logging import metrics +from fairseq.data import ( + Dictionary, + LanguagePairDataset, + RoundRobinZipDatasets, + TransformEosLangPairDataset, +) +from fairseq.models import FairseqMultiModel +from fairseq.tasks.translation import load_langpair_dataset + +from . import LegacyFairseqTask, register_task + + +logger = logging.getLogger(__name__) + + +def _lang_token(lang: str): + return "__{}__".format(lang) + + +def _lang_token_index(dic: Dictionary, lang: str): + """Return language token index.""" + idx = dic.index(_lang_token(lang)) + assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang) + return idx + + +@register_task("multilingual_translation") +class MultilingualTranslationTask(LegacyFairseqTask): + """A task for training multiple translation models simultaneously. + + We iterate round-robin over batches from multiple language pairs, ordered + according to the `--lang-pairs` argument. + + The training loop is roughly: + + for i in range(len(epoch)): + for lang_pair in args.lang_pairs: + batch = next_batch_for_lang_pair(lang_pair) + loss = criterion(model_for_lang_pair(lang_pair), batch) + loss.backward() + optimizer.step() + + In practice, `next_batch_for_lang_pair` is abstracted in a FairseqDataset + (e.g., `RoundRobinZipDatasets`) and `model_for_lang_pair` is a model that + implements the `FairseqMultiModel` interface. + + During inference it is required to specify a single `--source-lang` and + `--target-lang`, which indicates the inference langauge direction. + `--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to + the same value as training. + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + parser.add_argument('data', metavar='DIR', help='path to data directory') + parser.add_argument('--lang-pairs', default=None, metavar='PAIRS', + help='comma-separated list of language pairs (in training order): en-de,en-fr,de-fr') + parser.add_argument('-s', '--source-lang', default=None, metavar='SRC', + help='source language (only needed for inference)') + parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET', + help='target language (only needed for inference)') + parser.add_argument('--left-pad-source', default='True', type=str, metavar='BOOL', + help='pad the source on the left (default: True)') + parser.add_argument('--left-pad-target', default='False', type=str, metavar='BOOL', + help='pad the target on the left (default: False)') + try: + parser.add_argument('--max-source-positions', default=1024, type=int, metavar='N', + help='max number of tokens in the source sequence') + parser.add_argument('--max-target-positions', default=1024, type=int, metavar='N', + help='max number of tokens in the target sequence') + except ArgumentError: + # this might have already been defined. Once we transition this to hydra it should be fine to add it here. + pass + parser.add_argument('--upsample-primary', default=1, type=int, + help='amount to upsample primary dataset') + parser.add_argument('--encoder-langtok', default=None, type=str, choices=['src', 'tgt'], + metavar='SRCTGT', + help='replace beginning-of-sentence in source sentence with source or target ' + 'language token. (src/tgt)') + parser.add_argument('--decoder-langtok', action='store_true', + help='replace beginning-of-sentence in target sentence with target language token') + # fmt: on + + def __init__(self, args, dicts, training): + super().__init__(args) + self.dicts = dicts + self.training = training + if training: + self.lang_pairs = args.lang_pairs + else: + self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)] + # eval_lang_pairs for multilingual translation is usually all of the + # lang_pairs. However for other multitask settings or when we want to + # optimize for certain languages we want to use a different subset. Thus + # the eval_lang_pairs class variable is provided for classes that extend + # this class. + self.eval_lang_pairs = self.lang_pairs + # model_lang_pairs will be used to build encoder-decoder model pairs in + # models.build_model(). This allows multitask type of sub-class can + # build models other than the input lang_pairs + self.model_lang_pairs = self.lang_pairs + self.langs = list(dicts.keys()) + + @classmethod + def setup_task(cls, args, **kwargs): + dicts, training = cls.prepare(args, **kwargs) + return cls(args, dicts, training) + + @classmethod + def update_args(cls, args): + args.left_pad_source = utils.eval_bool(args.left_pad_source) + args.left_pad_target = utils.eval_bool(args.left_pad_target) + + if args.lang_pairs is None: + raise ValueError( + "--lang-pairs is required. List all the language pairs in the training objective." + ) + if isinstance(args.lang_pairs, str): + args.lang_pairs = args.lang_pairs.split(",") + + @classmethod + def prepare(cls, args, **kargs): + cls.update_args(args) + sorted_langs = sorted( + list({x for lang_pair in args.lang_pairs for x in lang_pair.split("-")}) + ) + if args.source_lang is not None or args.target_lang is not None: + training = False + else: + training = True + + # load dictionaries + dicts = OrderedDict() + for lang in sorted_langs: + paths = utils.split_paths(args.data) + assert len(paths) > 0 + dicts[lang] = cls.load_dictionary( + os.path.join(paths[0], "dict.{}.txt".format(lang)) + ) + if len(dicts) > 0: + assert dicts[lang].pad() == dicts[sorted_langs[0]].pad() + assert dicts[lang].eos() == dicts[sorted_langs[0]].eos() + assert dicts[lang].unk() == dicts[sorted_langs[0]].unk() + if args.encoder_langtok is not None or args.decoder_langtok: + for lang_to_add in sorted_langs: + dicts[lang].add_symbol(_lang_token(lang_to_add)) + logger.info("[{}] dictionary: {} types".format(lang, len(dicts[lang]))) + return dicts, training + + def get_encoder_langtok(self, src_lang, tgt_lang): + if self.args.encoder_langtok is None: + return self.dicts[src_lang].eos() + if self.args.encoder_langtok == "src": + return _lang_token_index(self.dicts[src_lang], src_lang) + else: + return _lang_token_index(self.dicts[src_lang], tgt_lang) + + def get_decoder_langtok(self, tgt_lang): + if not self.args.decoder_langtok: + return self.dicts[tgt_lang].eos() + return _lang_token_index(self.dicts[tgt_lang], tgt_lang) + + def alter_dataset_langtok( + self, + lang_pair_dataset, + src_eos=None, + src_lang=None, + tgt_eos=None, + tgt_lang=None, + ): + if self.args.encoder_langtok is None and not self.args.decoder_langtok: + return lang_pair_dataset + + new_src_eos = None + if ( + self.args.encoder_langtok is not None + and src_eos is not None + and src_lang is not None + and tgt_lang is not None + ): + new_src_eos = self.get_encoder_langtok(src_lang, tgt_lang) + else: + src_eos = None + + new_tgt_bos = None + if self.args.decoder_langtok and tgt_eos is not None and tgt_lang is not None: + new_tgt_bos = self.get_decoder_langtok(tgt_lang) + else: + tgt_eos = None + + return TransformEosLangPairDataset( + lang_pair_dataset, + src_eos=src_eos, + new_src_eos=new_src_eos, + tgt_bos=tgt_eos, + new_tgt_bos=new_tgt_bos, + ) + + def load_dataset(self, split, epoch=1, **kwargs): + """Load a dataset split.""" + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + def language_pair_dataset(lang_pair): + src, tgt = lang_pair.split("-") + langpair_dataset = load_langpair_dataset( + data_path, + split, + src, + self.dicts[src], + tgt, + self.dicts[tgt], + combine=True, + dataset_impl=self.args.dataset_impl, + upsample_primary=self.args.upsample_primary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + max_source_positions=self.args.max_source_positions, + max_target_positions=self.args.max_target_positions, + ) + return self.alter_dataset_langtok( + langpair_dataset, + src_eos=self.dicts[src].eos(), + src_lang=src, + tgt_eos=self.dicts[tgt].eos(), + tgt_lang=tgt, + ) + + self.datasets[split] = RoundRobinZipDatasets( + OrderedDict( + [ + (lang_pair, language_pair_dataset(lang_pair)) + for lang_pair in self.lang_pairs + ] + ), + eval_key=None + if self.training + else "%s-%s" % (self.args.source_lang, self.args.target_lang), + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the multilingual_translation task is not supported" + ) + + lang_pair = "%s-%s" % (self.args.source_lang, self.args.target_lang) + return RoundRobinZipDatasets( + OrderedDict( + [ + ( + lang_pair, + self.alter_dataset_langtok( + LanguagePairDataset( + src_tokens, src_lengths, self.source_dictionary + ), + src_eos=self.source_dictionary.eos(), + src_lang=self.args.source_lang, + tgt_eos=self.target_dictionary.eos(), + tgt_lang=self.args.target_lang, + ), + ) + ] + ), + eval_key=lang_pair, + ) + + def build_model(self, args, from_checkpoint=False): + def check_args(): + messages = [] + if ( + len(set(self.args.lang_pairs).symmetric_difference(args.lang_pairs)) + != 0 + ): + messages.append( + "--lang-pairs should include all the language pairs {}.".format( + args.lang_pairs + ) + ) + if self.args.encoder_langtok != args.encoder_langtok: + messages.append( + "--encoder-langtok should be {}.".format(args.encoder_langtok) + ) + if self.args.decoder_langtok != args.decoder_langtok: + messages.append( + "--decoder-langtok should {} be set.".format( + "" if args.decoder_langtok else "not" + ) + ) + + if len(messages) > 0: + raise ValueError(" ".join(messages)) + + # Update args -> the fact that the constructor here + # changes the args object doesn't mean you get the same one here + self.update_args(args) + + # Check if task args are consistant with model args + check_args() + + from fairseq import models + + model = models.build_model(args, self, from_checkpoint) + if not isinstance(model, FairseqMultiModel): + raise ValueError( + "MultilingualTranslationTask requires a FairseqMultiModel architecture" + ) + return model + + def _per_lang_pair_train_loss( + self, lang_pair, model, update_num, criterion, sample, optimizer, ignore_grad + ): + loss, sample_size, logging_output = criterion( + model.models[lang_pair], sample[lang_pair] + ) + if ignore_grad: + loss *= 0 + optimizer.backward(loss) + return loss, sample_size, logging_output + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + from collections import defaultdict + + agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, defaultdict(float) + curr_lang_pairs = [ + lang_pair + for lang_pair in self.model_lang_pairs + if sample[lang_pair] is not None and len(sample[lang_pair]) != 0 + ] + + for idx, lang_pair in enumerate(curr_lang_pairs): + + def maybe_no_sync(): + if ( + self.args.distributed_world_size > 1 + and hasattr(model, "no_sync") + and idx < len(curr_lang_pairs) - 1 + ): + return model.no_sync() + else: + return contextlib.ExitStack() # dummy contextmanager + + with maybe_no_sync(): + loss, sample_size, logging_output = self._per_lang_pair_train_loss( + lang_pair, + model, + update_num, + criterion, + sample, + optimizer, + ignore_grad, + ) + agg_loss += loss.detach().item() + # TODO make summing of the sample sizes configurable + agg_sample_size += sample_size + for k in logging_output: + agg_logging_output[k] += logging_output[k] + agg_logging_output[f"{lang_pair}:{k}"] += logging_output[k] + return agg_loss, agg_sample_size, agg_logging_output + + def _per_lang_pair_valid_loss(self, lang_pair, model, criterion, sample): + return criterion(model.models[lang_pair], sample[lang_pair]) + + def valid_step(self, sample, model, criterion): + model.eval() + with torch.no_grad(): + from collections import defaultdict + + agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, defaultdict(float) + for lang_pair in self.eval_lang_pairs: + if ( + lang_pair not in sample + or sample[lang_pair] is None + or len(sample[lang_pair]) == 0 + ): + continue + loss, sample_size, logging_output = self._per_lang_pair_valid_loss( + lang_pair, model, criterion, sample + ) + agg_loss += loss.data.item() + # TODO make summing of the sample sizes configurable + agg_sample_size += sample_size + for k in logging_output: + agg_logging_output[k] += logging_output[k] + agg_logging_output[f"{lang_pair}:{k}"] += logging_output[k] + return agg_loss, agg_sample_size, agg_logging_output + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + if self.args.decoder_langtok: + bos_token = _lang_token_index( + self.target_dictionary, self.args.target_lang + ) + else: + bos_token = self.target_dictionary.eos() + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + bos_token=bos_token, + ) + + def reduce_metrics(self, logging_outputs, criterion): + with metrics.aggregate(): + # pass 'sample_size', 'nsentences', 'ntokens' stats to fairseq_task + super().reduce_metrics(logging_outputs, criterion) + for k in ["sample_size", "nsentences", "ntokens"]: + metrics.log_scalar(k, sum(l[k] for l in logging_outputs)) + + @property + def source_dictionary(self): + if self.training: + return next(iter(self.dicts.values())) + else: + return self.dicts[self.args.source_lang] + + @property + def target_dictionary(self): + if self.training: + return next(iter(self.dicts.values())) + else: + return self.dicts[self.args.target_lang] + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + if len(self.datasets.values()) == 0: + return { + "%s-%s" + % (self.args.source_lang, self.args.target_lang): ( + self.args.max_source_positions, + self.args.max_target_positions, + ) + } + return OrderedDict( + [ + (key, (self.args.max_source_positions, self.args.max_target_positions)) + for split in self.datasets.keys() + for key in self.datasets[split].datasets.keys() + ] + ) diff --git a/fairseq/fairseq/tasks/multires_hubert_pretraining.py b/fairseq/fairseq/tasks/multires_hubert_pretraining.py new file mode 100644 index 0000000..cfed147 --- /dev/null +++ b/fairseq/fairseq/tasks/multires_hubert_pretraining.py @@ -0,0 +1,204 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple + +import numpy as np + +from dataclasses import dataclass, field +from fairseq.data import Dictionary, HubertDataset +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from omegaconf import MISSING + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, + append_eos=False, + add_if_not_exist=False, + ) + + +@dataclass +class MultiresHubertPretrainingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + fine_tuning: bool = field( + default=False, metadata={"help": "set to true if fine-tuning Hubert"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr50", "ltr25"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: float = field( + default=-1.0, + metadata={"help": "label frame rate. -1.0 for sequence label"}, + ) + # label_rate: 1,2,2,5 + # (imply (1,2), (2,5)) + # if base label_rate = 50 + # (1,2), (2,5) --> label rates 50, 25, 10 + label_rate_ratios: List[int] = field(default=MISSING, metadata={"help": "tuple for label rates e.g., [(1,2), (2,5)]"}) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={"help": "if set, normalizes input to have 0 mean and unit variance"}, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_keep_size: Optional[int] = field( + default=None, + metadata={"help": "exclude sample longer than this"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to crop to for batching"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to crop to for batching"}, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + + +@register_task("multires_hubert_pretraining", dataclass=MultiresHubertPretrainingConfig) +class MultiresHubertPretrainingTask(FairseqTask): + """ + Multiresolution HuBERT Pretraining Task. + The task is based on `HubertPretrainingTask` but extended to multiresolution. + """ + + cfg: MultiresHubertPretrainingConfig + + def __init__( + self, + cfg: MultiresHubertPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"MultiresHubertPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + self.res_number = 1 + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + self.blank_symbol = "<s>" + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @classmethod + def setup_task( + cls, cfg: MultiresHubertPretrainingConfig, **kwargs + ) -> "MultiresHubertPretrainingTask": + return cls(cfg) + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + self.res_number = len(label_dir) + dictionaries = [ (Dictionary.load(f"{label_dir}/dict.{label}.txt") if label is not "" else None ) for label in self.cfg.labels] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_dataset(self, split: str, **kwargs) -> None: + manifest = f"{self.cfg.data}/{split}.tsv" + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [(dict.pad() if dict is not None else None) for dict in dicts] + eos_list = [(dict.eos() if dict is not None else None) for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + paths = [(f"{self.get_label_dir()}/{split}.{l}" if l != "" else None) for l in self.cfg.labels] + + base_rate = self.cfg.label_rate + self.label_rates = [base_rate] + label_rate_ratios = self.cfg.label_rate_ratios + self.label_rate_ratios = [] + for i in range(len(label_rate_ratios) // 2): + + upsample_rate, downsample_rate = label_rate_ratios[i * 2], label_rate_ratios[i * 2 + 1] + # parse label rate ratios + self.label_rate_ratios.append((upsample_rate, downsample_rate)) + base_rate = base_rate * upsample_rate // downsample_rate + self.label_rates.append(base_rate) + + # hubert v1: pad_audio=True, random_crop=False; + self.datasets[split] = HubertDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.label_rates, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + ) + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size(self, indices: np.array, *args, **kwargs) -> np.array: + return indices diff --git a/fairseq/fairseq/tasks/nlu_finetuning.py b/fairseq/fairseq/tasks/nlu_finetuning.py new file mode 100644 index 0000000..a335021 --- /dev/null +++ b/fairseq/fairseq/tasks/nlu_finetuning.py @@ -0,0 +1,477 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import os +import torch +import json + +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Optional, Any + +from fairseq.data import AddTargetDataset, Dictionary, encoders +from fairseq.tasks.audio_pretraining import AudioPretrainingTask, AudioPretrainingConfig +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.configs import GenerationConfig +from fairseq.data.text_compressor import TextCompressor, TextCompressionLevel + +from . import register_task +from .. import utils +from ..logging import metrics + + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary): + self.dictionary = dictionary + + def __call__(self, label): + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False + ) + + +def label_len_fn(label): + return len(label.split(" ")) + + +@dataclass +class NLUFinetuningConfig(AudioPretrainingConfig): + # Options for reporting WER metrics during validation. Only applicable to + # Seq2Seq models during fine-tuning + eval_wer: bool = field( + default=False, metadata={"help": "compute WER for Seq2Seq models"} + ) + eval_wer_parse: bool = field( + default=False, metadata={"help": "compute WER for Seq2Seq models"} + ) + eval_wer_config: GenerationConfig = field( + default_factory=lambda: GenerationConfig(), + metadata={"help": "beam search config for evaluating wer during training"}, + ) + eval_wer_tokenizer: Any = field( + default=None, + metadata={"help": "tokenizer config for evaluating wer during training"}, + ) + eval_wer_post_process: str = field( + default="letter", + metadata={ + "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)" + }, + ) + eval_bleu: bool = field( + default=False, metadata={"help": "evaluation with BLEU scores"} + ) + eval_bleu_detok: Optional[str] = field( + default=None, + metadata={ + "help": "detokenize before computing BLEU (e.g., 'moses'); " + "required if using --eval-bleu; use 'space' to disable " + "detokenization; see fairseq.data.encoders for other options" + }, + ) + eval_bleu_detok_args: str = field( + default="{}", metadata={"help": "args for building the tokenizer, if needed"} + ) + eval_tokenized_bleu: bool = field( + default=False, metadata={"help": "compute tokenized BLEU instead of sacrebleu"} + ) + eval_bleu_remove_bpe: Optional[str] = field( + default=None, metadata={"help": "remove BPE before computing BLEU"} + ) + eval_bleu_args: str = field( + default="{}", + metadata={ + "help": "generation args for BLUE scoring, e.g., " + '\'{"beam": 4, "lenpen": 0.6}\'' + }, + ) + eval_bleu_print_samples: bool = field( + default=False, metadata={"help": "print sample generations during validation"} + ) + autoregressive: bool = field( + default=False, + metadata={ + "help": "required for autoregressive decoders (like seq2seq models); " + "adds 'prev_output_tokens' to input and appends eos to target" + }, + ) + + +@register_task("nlu_finetuning", dataclass=NLUFinetuningConfig) +class NLUFinetuningTask(AudioPretrainingTask): + """ """ + + cfg: NLUFinetuningConfig + + def __init__( + self, + cfg: NLUFinetuningConfig, + ): + super().__init__(cfg) + self.blank_symbol = "<s>" + + self.state.add_factory("target_dictionary", self.load_target_dictionary) + + def load_target_dictionary(self): + if self.cfg.labels: + dict_path = os.path.join(self.cfg.data, f"dict.{self.cfg.labels}.txt") + return Dictionary.load(dict_path) + return None + + def load_dataset(self, split: str, task_cfg: NLUFinetuningConfig = None, **kwargs): + super().load_dataset(split, task_cfg, **kwargs) + + task_cfg = task_cfg or self.cfg + assert task_cfg.labels is not None + text_compression_level = getattr( + TextCompressionLevel, str(self.cfg.text_compression_level) + ) + data_path = self.cfg.data + label_path = os.path.join(data_path, f"{split}.{task_cfg.labels}") + skipped_indices = getattr(self.datasets[split], "skipped_indices", set()) + text_compressor = TextCompressor(level=text_compression_level) + with open(label_path, "r") as f: + labels = [ + text_compressor.compress(l) + for i, l in enumerate(f) + if i not in skipped_indices + ] + + assert len(labels) == len(self.datasets[split]), ( + f"labels length ({len(labels)}) and dataset length " + f"({len(self.datasets[split])}) do not match" + ) + + process_label = LabelEncoder(self.target_dictionary) + + self.datasets[split] = AddTargetDataset( + self.datasets[split], + labels, + pad=self.target_dictionary.pad(), + eos=self.target_dictionary.eos(), + batch_targets=True, + process_label=process_label, + label_len_fn=label_len_fn, + add_to_input=task_cfg.get("autoregressive", False), + text_compression_level=text_compression_level, + ) + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.state.target_dictionary + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + if self.cfg.eval_wer_parse and self.cfg.autoregressive: + metrics = self._inference_with_wer_parse( + self.sequence_generator, sample, model + ) + logging_output["_num_char_errors"] = metrics["num_char_errors"] + logging_output["_num_chars"] = metrics["num_chars"] + logging_output["_num_word_errors"] = metrics["num_word_errors"] + logging_output["_num_words"] = metrics["num_words"] + logging_output["_num_em_errors"] = metrics["num_em_errors"] + logging_output["_num_ems"] = metrics["num_ems"] + logging_output["_num_tree_errors"] = metrics["num_tree_errors"] + logging_output["_num_trees"] = metrics["num_trees"] + if self.cfg.eval_wer and self.cfg.autoregressive: + metrics = self._inference_with_wer(self.sequence_generator, sample, model) + logging_output["_num_char_errors"] = metrics["num_char_errors"] + logging_output["_num_chars"] = metrics["num_chars"] + logging_output["_num_word_errors"] = metrics["num_word_errors"] + logging_output["_num_words"] = metrics["num_words"] + if self.cfg.eval_bleu and self.cfg.autoregressive: + metrics = self._inference_with_bleu(self.sequence_generator, sample, model) + logging_output["_bleu_sys_len"] = metrics.sys_len + logging_output["_bleu_ref_len"] = metrics.ref_len + # we split counts into separate entries so that they can be + # summed efficiently across workers using fast-stat-sync + assert len(metrics.counts) == 4 + for i in range(4): + logging_output[f"_bleu_counts_{i}"] = metrics.counts[i] + logging_output[f"_bleu_totals_{i}"] = metrics.totals[i] + return loss, sample_size, logging_output + + def build_model(self, model_cfg: FairseqDataclass): + model = super().build_model(model_cfg) + + if (self.cfg.eval_wer or self.cfg.eval_wer_parse) and self.cfg.autoregressive: + self.sequence_generator = self.build_generator( + [model], + self.cfg.eval_wer_config, + ) + if self.cfg.eval_wer_tokenizer: + self.tokenizer = encoders.build_tokenizer(self.cfg.eval_wer_tokenizer) + else: + self.tokenizer = None + if self.cfg.eval_bleu and self.cfg.autoregressive: + assert self.cfg.eval_bleu_detok is not None, ( + "--eval-bleu-detok is required if using --eval-bleu; " + "try --eval-bleu-detok=moses (or --eval-bleu-detok=space " + "to disable detokenization, e.g., when using sentencepiece)" + ) + detok_args = json.loads(self.cfg.eval_bleu_detok_args) + self.tokenizer = encoders.build_tokenizer( + Namespace(tokenizer=self.cfg.eval_bleu_detok, **detok_args) + ) + gen_args = json.loads(self.cfg.eval_bleu_args) + gen_args = Namespace(**gen_args) + self.sequence_generator = self.build_generator([model], gen_args) + + return model + + def _inference_with_wer_parse(self, generator, sample, model): + import editdistance + + def decode(toks): + s = self.target_dictionary.string( + toks.int().cpu(), + self.cfg.eval_wer_post_process, + escape_unk=True, + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + def decode_to_list(toks): + def token_string(i): + if i == self.target_dictionary.unk(): + return self.target_dictionary.unk_string(False) + else: + return self.target_dictionary[i] + + return [token_string(i) for i in toks] + + def is_ont_token(token): + return "[" in token or "]" in token + + def post_process(l): + o = [] + for w in l: + if w == self.target_dictionary.eos_word or w == "|": + continue + if w == "_": + o.append(" ") + else: + o.append(w) + if is_ont_token(w): + o.append(" ") + return o + + num_word_errors, num_char_errors = 0, 0 + num_chars, num_words = 0, 0 + num_em_errors, num_ems = 0, 0 + num_tree_errors, num_trees = 0, 0 + gen_out = self.inference_step(generator, [model], sample, None) + for i in range(len(gen_out)): + hyp_tokens = gen_out[i][0]["tokens"] + # hyp = decode(hyp_tokens) + ref_tokens = utils.strip_pad( + sample["target"][i], self.target_dictionary.pad() + ) + # ref = decode(ref_tokens) + hyp_list = decode_to_list(hyp_tokens) + ref_list = decode_to_list(ref_tokens) + + hyp_list = post_process(hyp_list) + ref_list = post_process(ref_list) + + hyp = "".join(hyp_list).strip() + ref = "".join(ref_list).strip() + num_chars += len(ref) + num_char_errors += editdistance.eval(hyp, ref) + hyp_words = hyp.split() + ref_words = ref.split() + hyp_tree = [word for word in hyp_list if ("[" in word or "]" in word)] + ref_tree = [word for word in ref_list if ("[" in word or "]" in word)] + # num_word_errors += editdistance.eval(hyp_words, ref_words) + hyp_before = decode(hyp_tokens).split() + ref_before = decode(ref_tokens).split() + + num_word_errors += editdistance.eval(hyp_before, ref_before) + num_words += len(ref_before) + if hyp != ref: + num_em_errors += 1 + if hyp_tree != ref_tree: + num_tree_errors += 1 + num_ems += 1 + num_trees += 1 + + return { + "num_char_errors": num_char_errors, + "num_chars": num_chars, + "num_word_errors": num_word_errors, + "num_words": num_words, + "num_ems": num_ems, + "num_em_errors": num_em_errors, + "num_trees": num_trees, + "num_tree_errors": num_tree_errors, + } + + def _inference_with_wer(self, generator, sample, model): + import editdistance + + def decode(toks): + s = self.target_dictionary.string( + toks.int().cpu(), + self.cfg.eval_wer_post_process, + escape_unk=True, + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + num_word_errors, num_char_errors = 0, 0 + num_chars, num_words = 0, 0 + gen_out = self.inference_step(generator, [model], sample, None) + for i in range(len(gen_out)): + hyp = decode(gen_out[i][0]["tokens"]) + ref = decode( + utils.strip_pad(sample["target"][i], self.target_dictionary.pad()), + ) + num_char_errors += editdistance.eval(hyp, ref) + num_chars += len(ref) + hyp_words = hyp.split() + ref_words = ref.split() + num_word_errors += editdistance.eval(hyp_words, ref_words) + num_words += len(ref_words) + + return { + "num_char_errors": num_char_errors, + "num_chars": num_chars, + "num_word_errors": num_word_errors, + "num_words": num_words, + } + + def _inference_with_bleu(self, generator, sample, model): + import sacrebleu + + def decode(toks, is_ref): + s = self.target_dictionary.string( + toks.int().cpu(), + self.cfg.eval_bleu_remove_bpe, + # The default unknown string in fairseq is `<unk>`, but + # this is tokenized by sacrebleu as `< unk >`, inflating + # BLEU scores. Instead, we use a somewhat more verbose + # alternative that is unlikely to appear in the real + # reference, but doesn't get split into multiple tokens. + unk_string=("UNKNOWNTOKENINREF" if is_ref else "UNKNOWNTOKENINHYP"), + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + gen_out = self.inference_step(generator, [model], sample) + hyps, refs = [], [] + for i in range(len(gen_out)): + hyps.append(decode(gen_out[i][0]["tokens"], is_ref=False)) + refs.append( + decode( + utils.strip_pad(sample["target"][i], self.target_dictionary.pad()), + is_ref=True, # don't count <unk> as matches to the hypo + ) + ) + if self.cfg.eval_bleu_print_samples: + logger.info("H-{} {}".format(sample["id"][0], hyps[0])) + logger.info("T-{} {}".format(sample["id"][0], refs[0])) + + eval_tokenization = "none" if self.cfg.eval_tokenized_bleu else "13a" + return sacrebleu.corpus_bleu(hyps, [refs], tokenize=eval_tokenization) + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + if self.cfg.eval_wer or self.cfg.eval_wer_parse: + zero = torch.scalar_tensor(0.0) + num_char_errors = sum( + log.get("_num_char_errors", zero) for log in logging_outputs + ) + num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs) + num_word_errors = sum( + log.get("_num_word_errors", zero) for log in logging_outputs + ) + num_words = sum(log.get("_num_words", zero) for log in logging_outputs) + metrics.log_scalar("_num_char_errors", num_char_errors) + metrics.log_scalar("_num_chars", num_chars) + metrics.log_scalar("_num_word_errors", num_word_errors) + metrics.log_scalar("_num_words", num_words) + if num_chars > 0: + metrics.log_derived( + "uer", + lambda meters: meters["_num_char_errors"].sum + * 100.0 + / meters["_num_chars"].sum + if meters["_num_chars"].sum > 0 + else float("nan"), + ) + if num_words > 0: + metrics.log_derived( + "wer", + lambda meters: meters["_num_word_errors"].sum + * 100.0 + / meters["_num_words"].sum + if meters["_num_words"].sum > 0 + else float("nan"), + ) + if self.cfg.eval_wer_parse: + num_em_errors = sum( + log.get("_num_em_errors", zero) for log in logging_outputs + ) + num_ems = sum(log.get("_num_ems", zero) for log in logging_outputs) + metrics.log_scalar("_num_em_errors", num_em_errors) + metrics.log_scalar("_num_ems", num_ems) + num_tree_errors = sum( + log.get("_num_tree_errors", zero) for log in logging_outputs + ) + num_trees = sum(log.get("_num_trees", zero) for log in logging_outputs) + metrics.log_scalar("_num_tree_errors", num_tree_errors) + metrics.log_scalar("_num_trees", num_trees) + + if num_ems > 0: + metrics.log_derived( + "em_error", + lambda meters: meters["_num_em_errors"].sum + * 100.0 + / meters["_num_ems"].sum + if meters["_num_ems"].sum > 0 + else float("nan"), + ) + if num_trees > 0: + metrics.log_derived( + "tree_error", + lambda meters: meters["_num_tree_errors"].sum + * 100.0 + / meters["_num_trees"].sum + if meters["_num_trees"].sum > 0 + else float("nan"), + ) + + if self.cfg.eval_bleu: + len_keys = ["_bleu_sys_len", "_bleu_ref_len"] + count_keys = [f"_bleu_counts_{i}" for i in range(4)] + total_keys = [f"_bleu_totals_{i}" for i in range(4)] + for k in len_keys + count_keys + total_keys: + metrics.log_scalar(k, sum(log.get(k, 0) for log in logging_outputs)) + + import sacrebleu + + metrics.log_derived( + "bleu", + lambda meters: sacrebleu.compute_bleu( + correct=[meters[k].sum for k in count_keys], + total=[meters[k].sum for k in total_keys], + sys_len=meters["_bleu_sys_len"].sum, + ref_len=meters["_bleu_ref_len"].sum, + smooth_method="exp", + ).score, + ) diff --git a/fairseq/fairseq/tasks/online_backtranslation.py b/fairseq/fairseq/tasks/online_backtranslation.py new file mode 100644 index 0000000..da24fe8 --- /dev/null +++ b/fairseq/fairseq/tasks/online_backtranslation.py @@ -0,0 +1,683 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import json +import logging +import math +import os +from argparse import Namespace +from collections import OrderedDict, defaultdict +from pathlib import Path +from typing import Dict, Sequence, Tuple +from argparse import ArgumentError + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +import fairseq +from fairseq import options, utils +from fairseq.logging import metrics +from fairseq.data import ( + FairseqDataset, + LanguagePairDataset, + NoisingDataset, + PrependTokenDataset, + RoundRobinZipDatasets, + TransformEosLangPairDataset, + data_utils, + encoders, +) +from fairseq.sequence_generator import SequenceGenerator +from fairseq.tasks import register_task +from fairseq.tasks.translation import TranslationTask, load_langpair_dataset + +logger = logging.getLogger(__name__) + + +class PiecewiseLinearFn: + """Piecewise linear function. Can be configured with a string.""" + + def __init__(self, pieces: Sequence[Tuple[int, float]]): + assert pieces == sorted( + pieces + ), f"PiecewiseLinearFn configuration should be sorted, received: {pieces}" + + self.pieces = pieces + + def __call__(self, x: int) -> float: + for i, (x_a, y_a) in enumerate(self.pieces[:-1]): + x_b, y_b = self.pieces[i + 1] + if x_a <= x <= x_b: + return y_a + (x - x_a) * (y_b - y_a) / (x_b - x_a) + + return self.pieces[-1][1] + + @staticmethod + def from_string(configuration: str) -> "PiecewiseLinearFn": + """ + Parse the configuration of lambda coefficient (for scheduling). + x = "3" # lambda will be a constant equal to x + x = "0:1,1000:0" # lambda will start from 1 and linearly decrease + # to 0 during the first 1000 iterations + x = "0:0,1000:0,2000:1" # lambda will be equal to 0 for the first 1000 + # iterations, then will linearly increase to 1 until iteration 2000 + """ + if isinstance(configuration, float): + return PiecewiseLinearFn([(0, configuration)]) + + try: + parts = configuration.split(",") + if len(parts) == 1: + v = float(configuration) + return PiecewiseLinearFn([(0, v)]) + + split = [s.split(":") for s in parts] + pieces = [(int(t), float(v)) for t, v in split] + return PiecewiseLinearFn(pieces) + except Exception: + raise ValueError( + f"Invalid PiecewiseLinearFn configuration: {configuration!r}" + ) + + @staticmethod + def one() -> "PiecewiseLinearFn": + return PiecewiseLinearFn([(0, 1.0)]) + + +@register_task("online_backtranslation") +class OnlineBackTranslationTask(TranslationTask): + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + # Generic translation args + parser.add_argument('data', help='colon separated path to data directories list, \ + will be iterated upon during epochs in round-robin manner; \ + however, valid and test data are always in the first directory to \ + avoid the need for repeating them in all directories') + parser.add_argument('--mono-langs', metavar='MONO_LANGS', + help='monolingual languages for training') + parser.add_argument('--valid-lang-pairs', default=None, metavar='VALID_LANG_PAIRS', + help='language pairs for validation') + parser.add_argument('--load-alignments', action='store_true', + help='load the binarized alignments') + parser.add_argument('--left-pad-source', default='False', type=str, metavar='BOOL', + help='pad the source on the left') + parser.add_argument('--left-pad-target', default='False', type=str, metavar='BOOL', + help='pad the target on the left') + parser.add_argument('--upsample-primary', default=1, type=int, + help='amount to upsample primary dataset') + try: + parser.add_argument('--max-source-positions', default=1024, type=int, metavar='N', + help='max number of tokens in the source sequence') + parser.add_argument('--max-target-positions', default=1024, type=int, metavar='N', + help='max number of tokens in the target sequence') + except ArgumentError: + # this might have already been defined. Once we transition this to hydra it should be fine to add it here. + pass + parser.add_argument('--truncate-source', action='store_true', default=False, + help='truncate source to max-source-positions') + parser.add_argument('--num-batch-buckets', default=0, type=int, metavar='N', + help='if >0, then bucket source and target lengths into N ' + 'buckets and pad accordingly; this is useful on TPUs ' + 'to minimize the number of compilations') + + # Denoising args + parser.add_argument('--max-word-shuffle-distance', default=3.0, type=float, metavar='N', + help='maximum word shuffle distance for denoising autoencoding data generation') + parser.add_argument('--word-dropout-prob', default=0.1, type=float, metavar='N', + help='word dropout probability for denoising autoencoding data generation') + parser.add_argument('--word-blanking-prob', default=0.2, type=float, metavar='N', + help='word blanking probability for denoising autoencoding data generation') + + # Backtranslation args + parser.add_argument('--lambda-bt', default="1.0", type=str, metavar='N', + help='back-translation weight') + parser.add_argument('--lambda-dae', default="1.0", type=str, metavar='N', + help='denoising auto-encoder weight') + + # Evaluation args + parser.add_argument('--generate-one-by-one', action='store_true', + help='generate one sentence at a time for backtranslation') + + parser.add_argument('--eval-bleu', action='store_true', + help='evaluation with BLEU scores') + parser.add_argument('--eval-bleu-detok', type=str, default="space", + help='detokenize before computing BLEU (e.g., "moses"); ' + 'required if using --eval-bleu; use "space" to ' + 'disable detokenization; see fairseq.data.encoders ' + 'for other options') + parser.add_argument('--eval-bleu-detok-args', type=str, metavar='JSON', + help='args for building the tokenizer, if needed') + parser.add_argument('--eval-tokenized-bleu', action='store_true', default=False, + help='compute tokenized BLEU instead of sacrebleu') + parser.add_argument('--eval-bleu-remove-bpe', nargs='?', const='@@ ', default=None, + help='remove BPE before computing BLEU') + parser.add_argument('--eval-bleu-args', type=str, metavar='JSON', + help='generation args for BLUE scoring, ' + 'e.g., \'{"beam": 4, "lenpen": 0.6}\'') + parser.add_argument('--eval-bleu-print-samples', action='store_true', + help='print sample generations during validation') + # fmt: on + + def __init__(self, args, common_dict, mono_langs, valid_lang_pairs): + super().__init__(args, common_dict, common_dict) + self.common_dict = common_dict + self.mono_langs = mono_langs + self.valid_lang_pairs = valid_lang_pairs + + self.SHOW_SAMPLES_INTERVAL = 1000 + # Start by showing samples + self._show_samples_ctr = self.SHOW_SAMPLES_INTERVAL + self.SHOW_SAMPLES_NUMBER = 5 + self.lambda_bt = PiecewiseLinearFn.from_string(args.lambda_bt) + self.lambda_dae = PiecewiseLinearFn.from_string(args.lambda_dae) + + self.args = args + self.data = utils.split_paths(self.args.data) + if len(self.data) == 1: + shards = list(Path(self.data[0]).glob("shard*")) + if len(shards) > 0: + # keep this as strings, since it can also be a manifold path + old_data = self.data + self.data = [str(shard) for shard in shards] + logging.warning(f"Expanded data directory {old_data} to {self.data}") + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + args.left_pad_source = options.eval_bool(args.left_pad_source) + args.left_pad_target = options.eval_bool(args.left_pad_target) + + paths = utils.split_paths(args.data) + assert len(paths) > 0 + assert args.mono_langs is not None + + mono_langs = args.mono_langs.split(",") + valid_lang_pairs = args.valid_lang_pairs.split(",") + + # load dictionary + dict_path = os.path.join(paths[0], "dict.txt") + common_dict = cls.load_dictionary(dict_path) + + return cls(args, common_dict, mono_langs, valid_lang_pairs) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs) -> FairseqDataset: + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if split == "train": + data_path = self.data[(epoch - 1) % len(self.data)] + dataset = self.load_train_dataset(data_path) + else: + # valid/test should always be the same. + dataset = self.load_translation_dataset(split, self.data[0]) + + self.datasets[split] = dataset + return dataset + + def load_train_dataset(self, data_path: str) -> FairseqDataset: + """The training dataset is made of backtranslation dataset and denoising dataset.""" + data = [] + for lang in self.mono_langs: + train_path = os.path.join(data_path, lang, "train") + # TODO: could we do the BT using denoise sample ? + # this would half the data loading work + data.append((f"{lang}-BT", self.load_bt_dataset(train_path, lang))) + data.append( + (f"{lang}-DENOISE", self.load_denoise_dataset(train_path, lang)) + ) + + return RoundRobinZipDatasets(OrderedDict(data)) + + def _langpair_dataset( + self, src: FairseqDataset, tgt: FairseqDataset + ) -> LanguagePairDataset: + return LanguagePairDataset( + src, + src.sizes, + self.dictionary, + tgt=tgt, + tgt_sizes=tgt.sizes, + tgt_dict=self.dictionary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + # TODO: should we shuffle ? we are already sorting batch by sizes so ? + # shuffle=True, + ) + + def _prepend_lang_bos_to_target( + self, dataset: LanguagePairDataset, lang: str + ) -> LanguagePairDataset: + bos = _lang_token_index(self.dictionary, lang) + return TransformEosLangPairDataset( + dataset, + src_eos=self.dictionary.eos(), + new_src_eos=self.dictionary.eos(), + tgt_bos=self.dictionary.eos(), + new_tgt_bos=bos, + ) + + def load_bt_dataset(self, data_path: str, lang: str) -> FairseqDataset: + """The BT dataset is generated with (tgt, tgt) pairs. + The actual translation to a (generated_src, tgt) pair + is done on the fly during training. + """ + mono_dataset = data_utils.load_indexed_dataset( + data_path, self.common_dict, self.args.dataset_impl + ) + assert mono_dataset is not None, f"No dataset found for {lang}" + + mono_dataset_src = PrependTokenDataset( + mono_dataset, _lang_token_index(self.dictionary, lang) + ) + + mono_dataset_bt = self._langpair_dataset(mono_dataset_src, mono_dataset) + logger.info( + f"mono_lang = {lang} " + f"lang token index = {_lang_token_index(self.dictionary, lang)} " + f"lang token = {_lang_token(lang)}" + ) + + mono_dataset_bt = self._prepend_lang_bos_to_target(mono_dataset_bt, lang) + return mono_dataset_bt + + def load_denoise_dataset(self, data_path: str, lang: str) -> FairseqDataset: + """Classic denoising dataset""" + dataset = data_utils.load_indexed_dataset( + data_path, self.common_dict, self.args.dataset_impl + ) + noisy_dataset = NoisingDataset( + dataset, + self.dictionary, + seed=1, + max_word_shuffle_distance=self.args.max_word_shuffle_distance, + word_dropout_prob=self.args.word_dropout_prob, + word_blanking_prob=self.args.word_blanking_prob, + ) + noisy_dataset = PrependTokenDataset( + noisy_dataset, _lang_token_index(self.dictionary, lang) + ) + + clean_dataset = data_utils.load_indexed_dataset( + data_path, self.common_dict, self.args.dataset_impl + ) + denoising_dataset = self._langpair_dataset(noisy_dataset, clean_dataset) + denoising_dataset = self._prepend_lang_bos_to_target(denoising_dataset, lang) + return denoising_dataset + + def load_translation_dataset( + self, split: str, data_path: str, combine: bool = False + ): + # only judging with one language pair for the moment, + # since ConcatDataset doesn't work as expected + assert len(self.valid_lang_pairs) == 1, "For now..." + valid_lang_pair = self.valid_lang_pairs[0] + src, tgt = valid_lang_pair.split("-") + + # use the same function than TranslationTask + src_tgt_dt = load_langpair_dataset( + data_path, + split, + src, + self.common_dict, + tgt, + self.common_dict, + combine=combine, + dataset_impl=self.args.dataset_impl, + upsample_primary=self.args.upsample_primary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + max_source_positions=self.args.max_source_positions, + max_target_positions=self.args.max_target_positions, + load_alignments=self.args.load_alignments, + truncate_source=self.args.truncate_source, + num_buckets=self.args.num_batch_buckets, + shuffle=(split != "test"), + prepend_bos_src=_lang_token_index(self.dictionary, src), + ) + + src_tgt_eos_dt = self._prepend_lang_bos_to_target(src_tgt_dt, tgt) + src_tgt_eos_dt.args = self.args + return src_tgt_eos_dt + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + raise NotImplementedError + + def build_model(self, args, from_checkpoint=False): + # torch.autograd.set_detect_anomaly(True) + model = super().build_model(args, from_checkpoint) + + add_secial_tokens_to_dict_and_model(self.common_dict, model, self.mono_langs) + + self.sequence_generators = {} + for mono_lang in self.mono_langs: + self.sequence_generators[mono_lang] = SequenceGenerator( + [model], + tgt_dict=self.dictionary, + beam_size=1, + max_len_a=1.3, + max_len_b=5, + min_len=5, + # keep 1 to be able to prepend bos + max_len=model.max_decoder_positions() - 1, + ) + + if getattr(args, "eval_bleu", False): + assert getattr(args, "eval_bleu_detok", None) is not None, ( + "--eval-bleu-detok is required if using --eval-bleu; " + "try --eval-bleu-detok=moses (or --eval-bleu-detok=space " + "to disable detokenization, e.g., when using sentencepiece)" + ) + detok_args = json.loads(getattr(args, "eval_bleu_detok_args", "{}") or "{}") + self.tokenizer = encoders.build_tokenizer( + Namespace( + tokenizer=getattr(args, "eval_bleu_detok", None), **detok_args + ) + ) + + gen_args = json.loads(getattr(args, "eval_bleu_args", "{}") or "{}") + self.bleu_sequence_generator = self.build_generator( + [model], Namespace(**gen_args) + ) + + return model + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + return (self.args.max_source_positions, self.args.max_target_positions) + + @property + def dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary`.""" + return self.common_dict + + def display_samples_once_in_a_while(self, smp, mono_lang, other_lang): + self._show_samples_ctr += 1 + if self._show_samples_ctr < self.SHOW_SAMPLES_INTERVAL: + return + self._show_samples_ctr = 0 + + ln = smp["net_input"]["src_tokens"].shape[0] + + logger.info( + f"(r:{self.args.distributed_rank}) : " + f"{other_lang} ---> {mono_lang} " + f"({other_lang} was generated by back-translation.) {ln} samples" + ) + + for i in range(min(ln, self.SHOW_SAMPLES_NUMBER)): + src_tokens = smp["net_input"]["src_tokens"][i] + tgt_tokens = smp["target"][i] + + src_str = self.dictionary.string(src_tokens, "sentencepiece") + tgt_str = self.dictionary.string(tgt_tokens, "sentencepiece") + logger.info( + f"\n{i}\t\t[{other_lang} generated] {src_str}\n" + f"\t\t[{mono_lang} original ] {tgt_str}\n" + f"\t\t[ src tokens] {src_tokens}\n" + ) + + def backtranslate_sample(self, smp, orig_lang, other_lang) -> None: + """ + * WARNING: smp is modified in place. + * At the start of this function, `smp` has the same input and target: + |--------------------------------------------------------| + | smp['net_input']['src_tokens'] | smp['target'] | + | (from data) __en__ hello world | __en__ hello world | + |--------------------------------------------------------| + + * We call generator.generate(smp, bos_token = token("ro")), + and copy the result as input + * At the end, `smp` has the translation to other language. + |--------------------------------------------------------| + | smp['net_input']['src_tokens'] | smp['target'] | + | (generated) __ro__ salut lume | __en__ hello world | + |--------------------------------------------------------| + + """ + bos_token = _lang_token_index(self.dictionary, other_lang) + generated = self.sequence_generators[orig_lang].generate( + models=[], sample=smp, bos_token=bos_token + ) + + max_lngth = max([gn[0]["tokens"].size(0) for gn in generated]) + net_input = smp["net_input"] + n_src_tokens = torch.empty( + size=(len(generated), max_lngth + 1), dtype=net_input["src_tokens"].dtype + ) + n_src_lengths = torch.empty( + len(generated), dtype=net_input["src_lengths"].dtype + ) + + for i, gn in enumerate(generated): + tokens = gn[0]["tokens"] + tokens_size = tokens.size(0) + padding_needed = max_lngth - tokens_size + tokens = torch.cat([tokens.new([bos_token]), tokens]) + tokens = F.pad(tokens, (0, padding_needed), value=self.dictionary.pad()) + n_src_tokens[i] = tokens + n_src_lengths[i] = tokens_size + 1 + + device = net_input["src_tokens"].device + # This seems to be important + del net_input["src_tokens"] + del net_input["src_lengths"] + net_input["src_tokens"] = n_src_tokens.to(device) + net_input["src_lengths"] = n_src_lengths.to(device) + + def generate(self, smp, model): + model.eval() + orig_lang = ( + self.dictionary[smp["net_input"]["src_tokens"][0][0]] + .replace(" ", "") + .replace("_", "") + ) + bos_token = smp["net_input"]["prev_output_tokens"][0][0] + with torch.no_grad(): + generated = self.sequence_generators[orig_lang].generate( + models=[model], sample=smp, bos_token=bos_token + ) + return generated + + def get_other_lang(self, lang): + # TODO: allow more complex mapping + if lang != self.mono_langs[0]: + return self.mono_langs[0] + if len(self.mono_langs) == 2: + return self.mono_langs[1] + return self.mono_langs[np.random.randint(1, len(self.mono_langs))] + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + + model.train() + model.set_num_updates(update_num) + + agg_loss, agg_sample_size = 0.0, 0.0 + agg_logging_output: Dict[str, float] = defaultdict(float) + + dataset_keys = self.datasets["train"].datasets.keys() + + weights = { + "BT": self.lambda_bt(update_num), + "DENOISE": self.lambda_dae(update_num), + } + log_keys = {"BT": "bt_", "DENOISE": "dae_"} + + for dataset_key in dataset_keys: + smp = sample[dataset_key] + mono_lang, task_subtype = dataset_key.split("-") + if weights[task_subtype] == 0: + continue + + if task_subtype == "BT": + with torch.autograd.profiler.record_function("backtranslation"): + model.eval() + # TODO: Could we translate to several language at once ? + # this would allow to share encoder_out and maximize GPU usage. + other_lang = self.get_other_lang(mono_lang) + self.backtranslate_sample(smp, mono_lang, other_lang) + self.display_samples_once_in_a_while(smp, mono_lang, other_lang) + model.train() + + # Like in FairseqTask.train_step + with torch.autograd.profiler.record_function("forward"): + loss, sample_size, logging_output = criterion(model, smp) + loss *= weights[task_subtype] + if ignore_grad: + loss *= 0 + with torch.autograd.profiler.record_function("backward"): + optimizer.backward(loss) + + agg_loss += loss.item() + agg_sample_size += sample_size + for k in logging_output: + agg_logging_output[log_keys[task_subtype] + k] += logging_output[k] + agg_logging_output[k] += logging_output[k] + + return agg_loss, agg_sample_size, agg_logging_output + + def get_bos_token_from_sample(self, sample): + net_input = sample["net_input"] + source_lang_token_id = torch.unique(net_input["src_tokens"][:, 0]).item() + source_lang_token = self.dictionary[source_lang_token_id].replace("_", "") + target_lang_token_id = _lang_token_index( + self.dictionary, self.get_other_lang(source_lang_token) + ) + + return target_lang_token_id + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + bt_sample_size = sum(x.get("bt_sample_size", 0) for x in logging_outputs) + if bt_sample_size: + bt_loss_sum = sum(x.get("bt_loss", 0) for x in logging_outputs) + bt_loss_sum *= 1 / bt_sample_size / math.log(2) + metrics.log_scalar("bt_loss", bt_loss_sum, bt_sample_size, round=3) + + bt_nll_loss_sum = sum(x.get("bt_nll_loss", 0) for x in logging_outputs) + bt_ntokens = sum(x.get("bt_ntokens", 0) for x in logging_outputs) + bt_nll_loss_sum *= 1 / bt_ntokens / math.log(2) + metrics.log_scalar("bt_nll_loss", bt_nll_loss_sum, bt_ntokens, round=3) + metrics.log_derived( + "bt_ppl", lambda meters: utils.get_perplexity(meters["bt_nll_loss"].avg) + ) + + dae_sample_size = sum(x.get("dae_sample_size", 0) for x in logging_outputs) + if dae_sample_size: + dae_loss_sum = sum(x.get("dae_loss", 0) for x in logging_outputs) + dae_loss_sum *= 1 / dae_sample_size / math.log(2) + metrics.log_scalar("dae_loss", dae_loss_sum, dae_sample_size, round=3) + + dae_nll_loss_sum = sum(x.get("dae_nll_loss", 0) for x in logging_outputs) + dae_ntokens = sum(x.get("dae_ntokens", 0) for x in logging_outputs) + dae_nll_loss_sum *= 1 / dae_ntokens / math.log(2) + metrics.log_scalar("dae_nll_loss", dae_nll_loss_sum, dae_ntokens, round=3) + metrics.log_derived( + "dae_ppl", + lambda meters: utils.get_perplexity(meters["dae_nll_loss"].avg), + ) + + +@torch.no_grad() +def extend_embedding( + emb: nn.Module, new_vocab_size: int, copy_from_token_id: int +) -> None: + old_emb_data = emb.weight.data + (old_vocab_size, dim) = old_emb_data.shape + assert new_vocab_size >= old_vocab_size + + if new_vocab_size > old_vocab_size: + emb.weight.data = torch.zeros((new_vocab_size, dim)) + emb.weight.data[:old_vocab_size, :] = old_emb_data + # initialize new embeddings + emb.weight.data[old_vocab_size:, :] = old_emb_data[copy_from_token_id] + if hasattr(emb, "num_embeddings"): + emb.num_embeddings = new_vocab_size + if hasattr(emb, "out_features"): + emb.out_features = new_vocab_size + + if getattr(emb, "bias", None) is None: + return + + # Fix the bias. + # Bias shape can be different from the previous vocab size + # if the weight matrix was shared and alread extended but not the bias. + (old_vocab_size,) = emb.bias.shape + assert new_vocab_size >= old_vocab_size + if new_vocab_size > old_vocab_size: + old_bias = emb.bias.data + new_bias = torch.zeros( + (new_vocab_size,), dtype=old_bias.dtype, device=old_bias.device + ) + new_bias[:old_vocab_size] = old_bias + emb.bias.data = new_bias + + +def add_secial_tokens_to_dict_and_model( + dictionary: "fairseq.data.Dictionary", + model: nn.Module, + mono_langs: Sequence[str], +) -> None: + embs = model.encoder.embed_tokens + vocab_size, embedding_dim = embs.weight.shape + + # The model may or may not have a '<mask>' embedding yet + assert ( + len(dictionary) <= vocab_size <= len(dictionary) + 1 + ), f"Dictionary len ({len(dictionary)}) doesn't match embs shape ({embs.weight.shape})" + # TODO: we should reuse the pretrained model dict which already has <mask> + dictionary.add_symbol("<mask>") + + for lang in mono_langs: + lang_token = _lang_token(lang) + dictionary.add_symbol(lang_token) + logger.info( + f"dictionary: {len(dictionary)} -> {vocab_size} tokens " + f"after adding {len(mono_langs)} lang tokens." + ) + + if len(dictionary) <= vocab_size: + return + + extend_embedding(embs, len(dictionary), dictionary.bos()) + dec_embs = model.decoder.embed_tokens + extend_embedding(dec_embs, len(dictionary), dictionary.bos()) + lm_head = model.decoder.output_projection + extend_embedding(lm_head, len(dictionary), dictionary.bos()) + assert lm_head.weight.shape == (len(dictionary), embedding_dim) + + +def _lang_token(lang: str) -> str: + return f"__{lang}__" + + +def _lang_token_index(dictionary, lang: str) -> int: + return dictionary.index(_lang_token(lang)) + + +@contextlib.contextmanager +def assert_weights_have_changed(model: nn.Module): + def checksum(model: nn.Module) -> float: + return sum(p.sum().item() for p in model.parameters()) + + initial_checksum = checksum(model) + yield model + final_checksum = checksum(model) + logger.info( + f"initial_checksum={initial_checksum} -> final_checksum={final_checksum}" + ) + assert initial_checksum != final_checksum, "Model hasn't changed !" diff --git a/fairseq/fairseq/tasks/semisupervised_translation.py b/fairseq/fairseq/tasks/semisupervised_translation.py new file mode 100644 index 0000000..432b8a5 --- /dev/null +++ b/fairseq/fairseq/tasks/semisupervised_translation.py @@ -0,0 +1,485 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from collections import OrderedDict + +from fairseq import utils +from fairseq.data import ( + BacktranslationDataset, + IndexedCachedDataset, + IndexedDataset, + IndexedRawTextDataset, + LanguagePairDataset, + NoisingDataset, + RoundRobinZipDatasets, + data_utils, + indexed_dataset, +) +from fairseq.models import FairseqMultiModel +from fairseq.sequence_generator import SequenceGenerator + +from . import register_task +from .multilingual_translation import MultilingualTranslationTask + + +logger = logging.getLogger(__name__) + + +def _get_bt_dataset_key(lang_pair): + return "bt:" + lang_pair + + +def _get_denoising_dataset_key(lang_pair): + return "denoising:" + lang_pair + + +# ported from UnsupervisedMT +def parse_lambda_config(x): + """ + Parse the configuration of lambda coefficient (for scheduling). + x = "3" # lambda will be a constant equal to x + x = "0:1,1000:0" # lambda will start from 1 and linearly decrease + # to 0 during the first 1000 iterations + x = "0:0,1000:0,2000:1" # lambda will be equal to 0 for the first 1000 + # iterations, then will linearly increase to 1 until iteration 2000 + """ + split = x.split(",") + if len(split) == 1: + return float(x), None + else: + split = [s.split(os.pathsep) for s in split] + assert all(len(s) == 2 for s in split) + assert all(k.isdigit() for k, _ in split) + assert all( + int(split[i][0]) < int(split[i + 1][0]) for i in range(len(split) - 1) + ) + return float(split[0][1]), [(int(k), float(v)) for k, v in split] + + +@register_task("semisupervised_translation") +class SemisupervisedTranslationTask(MultilingualTranslationTask): + """A task for training multiple translation models simultaneously. + + We iterate round-robin over batches from multiple language pairs, ordered + according to the `--lang-pairs` argument. + + The training loop is roughly: + + for i in range(len(epoch)): + for lang_pair in args.lang_pairs: + batch = next_batch_for_lang_pair(lang_pair) + loss = criterion(model_for_lang_pair(lang_pair), batch) + loss.backward() + optimizer.step() + + In practice, `next_batch_for_lang_pair` is abstracted in a FairseqDataset + (e.g., `RoundRobinZipDatasets`) and `model_for_lang_pair` is a model that + implements the `FairseqMultiModel` interface. + + During inference it is required to specify a single `--source-lang` and + `--target-lang`, instead of `--lang-pairs`. + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + MultilingualTranslationTask.add_args(parser) + parser.add_argument('--lambda-parallel-config', default="1.0", type=str, metavar='CONFIG', + help='cross-entropy reconstruction coefficient (parallel data). ' + 'use fixed weight during training if set to floating point number. ' + 'use piecewise linear function over number of updates to schedule the ' + 'weight with the format: w0:step0,w1:step1,...') + parser.add_argument('--lambda-denoising-config', default="0.0", type=str, metavar='CONFIG', + help='Cross-entropy reconstruction coefficient (denoising autoencoding)' + 'use fixed weight during training if set to floating point number. ' + 'use piecewise linear function over number of updates to schedule the ' + 'weight with the format: w0:step0,w1:step1,...') + parser.add_argument('--lambda-otf-bt-config', default="0.0", type=str, metavar='CONFIG', + help='cross-entropy reconstruction coefficient (on-the-fly back-translation parallel data)' + 'use fixed weight during training if set to floating point number. ' + 'use piecewise linear function over number of updates to schedule the ' + 'weight with the format: w0:step0,w1:step1,...') + parser.add_argument('--bt-max-len-a', default=1.1, type=float, metavar='N', + help='generate back-translated sequences of maximum length ax + b, where x is the ' + 'source length') + parser.add_argument('--bt-max-len-b', default=10.0, type=float, metavar='N', + help='generate back-translated sequences of maximum length ax + b, where x is the ' + 'source length') + parser.add_argument('--bt-beam-size', default=1, type=int, metavar='N', + help='beam size used in beam search of online back-translation') + parser.add_argument('--max-word-shuffle-distance', default=3.0, type=float, metavar='N', + help='maximum word shuffle distance for denoising autoencoding data generation') + parser.add_argument('--word-dropout-prob', default=0.1, type=float, metavar='N', + help='word dropout probability for denoising autoencoding data generation') + parser.add_argument('--word-blanking-prob', default=0.2, type=float, metavar='N', + help='word blanking probability for denoising autoencoding data generation') + # fmt: on + + def __init__(self, args, dicts, training): + super().__init__(args, dicts, training) + self.lambda_parallel, self.lambda_parallel_steps = parse_lambda_config( + args.lambda_parallel_config + ) + self.lambda_otf_bt, self.lambda_otf_bt_steps = parse_lambda_config( + args.lambda_otf_bt_config + ) + self.lambda_denoising, self.lambda_denoising_steps = parse_lambda_config( + args.lambda_denoising_config + ) + if self.lambda_denoising > 0.0 or self.lambda_denoising_steps is not None: + denoising_lang_pairs = [ + "%s-%s" % (tgt, tgt) + for tgt in {lang_pair.split("-")[1] for lang_pair in args.lang_pairs} + ] + self.model_lang_pairs = self.model_lang_pairs + denoising_lang_pairs + self.backtranslate_datasets = {} + self.backtranslators = {} + + @classmethod + def setup_task(cls, args, **kwargs): + dicts, training = MultilingualTranslationTask.prepare(args, **kwargs) + return cls(args, dicts, training) + + def load_dataset(self, split, epoch=1, **kwargs): + """Load a dataset split.""" + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + def split_exists(split, src, tgt, lang): + if src is not None: + filename = os.path.join( + data_path, "{}.{}-{}.{}".format(split, src, tgt, lang) + ) + else: + filename = os.path.join( + data_path, "{}.{}-None.{}".format(split, src, tgt) + ) + return indexed_dataset.dataset_exists(filename, impl=self.args.dataset_impl) + + def load_indexed_dataset(path, dictionary): + return data_utils.load_indexed_dataset( + path, dictionary, self.args.dataset_impl + ) + + # load parallel datasets + src_datasets, tgt_datasets = {}, {} + if ( + self.lambda_parallel > 0.0 + or self.lambda_parallel_steps is not None + or not split.startswith("train") + ): + for lang_pair in self.lang_pairs: + src, tgt = lang_pair.split("-") + if split_exists(split, src, tgt, src): + prefix = os.path.join( + data_path, "{}.{}-{}.".format(split, src, tgt) + ) + elif split_exists(split, tgt, src, src): + prefix = os.path.join( + data_path, "{}.{}-{}.".format(split, tgt, src) + ) + else: + continue + src_datasets[lang_pair] = load_indexed_dataset( + prefix + src, self.dicts[src] + ) + tgt_datasets[lang_pair] = load_indexed_dataset( + prefix + tgt, self.dicts[tgt] + ) + logger.info( + "parallel-{} {} {} examples".format( + data_path, split, len(src_datasets[lang_pair]) + ) + ) + if len(src_datasets) == 0: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + # back translation datasets + backtranslate_datasets = {} + if ( + self.lambda_otf_bt > 0.0 or self.lambda_otf_bt_steps is not None + ) and split.startswith("train"): + for lang_pair in self.lang_pairs: + src, tgt = lang_pair.split("-") + if not split_exists(split, tgt, None, tgt): + raise FileNotFoundError( + "Dataset not found: backtranslation {} ({})".format( + split, data_path + ) + ) + filename = os.path.join( + data_path, "{}.{}-None.{}".format(split, tgt, tgt) + ) + dataset = load_indexed_dataset(filename, self.dicts[tgt]) + lang_pair_dataset_tgt = LanguagePairDataset( + dataset, + dataset.sizes, + self.dicts[tgt], + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + ) + lang_pair_dataset = LanguagePairDataset( + dataset, + dataset.sizes, + src_dict=self.dicts[src], + tgt=dataset, + tgt_sizes=dataset.sizes, + tgt_dict=self.dicts[tgt], + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + ) + backtranslate_datasets[lang_pair] = BacktranslationDataset( + tgt_dataset=self.alter_dataset_langtok( + lang_pair_dataset_tgt, + src_eos=self.dicts[tgt].eos(), + src_lang=tgt, + tgt_lang=src, + ), + backtranslation_fn=self.backtranslators[lang_pair], + src_dict=self.dicts[src], + tgt_dict=self.dicts[tgt], + output_collater=self.alter_dataset_langtok( + lang_pair_dataset=lang_pair_dataset, + src_eos=self.dicts[src].eos(), + src_lang=src, + tgt_eos=self.dicts[tgt].eos(), + tgt_lang=tgt, + ).collater, + ) + logger.info( + "backtranslate-{}: {} {} {} examples".format( + tgt, + data_path, + split, + len(backtranslate_datasets[lang_pair]), + ) + ) + self.backtranslate_datasets[lang_pair] = backtranslate_datasets[ + lang_pair + ] + + # denoising autoencoder + noising_datasets = {} + if ( + self.lambda_denoising > 0.0 or self.lambda_denoising_steps is not None + ) and split.startswith("train"): + for lang_pair in self.lang_pairs: + _, tgt = lang_pair.split("-") + if not split_exists(split, tgt, None, tgt): + continue + filename = os.path.join( + data_path, "{}.{}-None.{}".format(split, tgt, tgt) + ) + tgt_dataset1 = load_indexed_dataset(filename, self.dicts[tgt]) + tgt_dataset2 = load_indexed_dataset(filename, self.dicts[tgt]) + noising_dataset = NoisingDataset( + tgt_dataset1, + self.dicts[tgt], + seed=1, + max_word_shuffle_distance=self.args.max_word_shuffle_distance, + word_dropout_prob=self.args.word_dropout_prob, + word_blanking_prob=self.args.word_blanking_prob, + ) + noising_datasets[lang_pair] = self.alter_dataset_langtok( + LanguagePairDataset( + noising_dataset, + tgt_dataset1.sizes, + self.dicts[tgt], + tgt_dataset2, + tgt_dataset2.sizes, + self.dicts[tgt], + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + ), + src_eos=self.dicts[tgt].eos(), + src_lang=tgt, + tgt_eos=self.dicts[tgt].eos(), + tgt_lang=tgt, + ) + logger.info( + "denoising-{}: {} {} {} examples".format( + tgt, + data_path, + split, + len(noising_datasets[lang_pair]), + ) + ) + + def language_pair_dataset(lang_pair): + src, tgt = lang_pair.split("-") + src_dataset, tgt_dataset = src_datasets[lang_pair], tgt_datasets[lang_pair] + return self.alter_dataset_langtok( + LanguagePairDataset( + src_dataset, + src_dataset.sizes, + self.dicts[src], + tgt_dataset, + tgt_dataset.sizes, + self.dicts[tgt], + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + ), + self.dicts[src].eos(), + src, + self.dicts[tgt].eos(), + tgt, + ) + + self.datasets[split] = RoundRobinZipDatasets( + OrderedDict( + [ + (lang_pair, language_pair_dataset(lang_pair)) + for lang_pair in src_datasets.keys() + ] + + [ + (_get_bt_dataset_key(lang_pair), dataset) + for lang_pair, dataset in backtranslate_datasets.items() + ] + + [ + (_get_denoising_dataset_key(lang_pair), dataset) + for lang_pair, dataset in noising_datasets.items() + ] + ), + eval_key=None + if self.training + else "%s-%s" % (self.args.source_lang, self.args.target_lang), + ) + + def build_model(self, args, from_checkpoint=False): + from fairseq import models + + model = models.build_model(args, self, from_checkpoint) + if not isinstance(model, FairseqMultiModel): + raise ValueError( + "SemisupervisedTranslationTask requires a FairseqMultiModel architecture" + ) + + # create SequenceGenerator for each model that has backtranslation dependency on it + self.sequence_generators = {} + if ( + self.lambda_otf_bt > 0.0 or self.lambda_otf_bt_steps is not None + ) and self.training: + for lang_pair in self.lang_pairs: + src, tgt = lang_pair.split("-") + key = "{}-{}".format(tgt, src) + self.sequence_generators[key] = SequenceGenerator( + [model.models[key]], + tgt_dict=self.dicts[src], + beam_size=args.bt_beam_size, + max_len_a=args.bt_max_len_a, + max_len_b=args.bt_max_len_b, + ) + decoder_lang_tok_idx = self.get_decoder_langtok(src) + + def backtranslate_fn( + sample, + model=model.models[key], + bos_token=decoder_lang_tok_idx, + sequence_generator=self.sequence_generators[key], + ): + return sequence_generator.generate( + [model], + sample, + bos_token=bos_token, + ) + + self.backtranslators[lang_pair] = backtranslate_fn + + return model + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + + if update_num > 0: + self.update_step(update_num) + + agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, {} + + def forward_backward(model, samples, logging_output_key, weight): + nonlocal agg_loss, agg_sample_size, agg_logging_output + if samples is None or len(samples) == 0: + return + loss, sample_size, logging_output = criterion(model, samples) + if ignore_grad: + loss *= 0 + else: + loss *= weight + optimizer.backward(loss) + agg_loss += loss.detach().item() + # TODO make summing of the sample sizes configurable + agg_sample_size += sample_size + for k in logging_output: + agg_logging_output[k] += logging_output[k] + agg_logging_output[logging_output_key] += logging_output[k] + + if self.lambda_parallel > 0.0: + for lang_pair in self.lang_pairs: + forward_backward( + model.models[lang_pair], + sample[lang_pair], + lang_pair, + self.lambda_parallel, + ) + + if self.lambda_otf_bt > 0.0: + for lang_pair in self.lang_pairs: + sample_key = _get_bt_dataset_key(lang_pair) + forward_backward( + model.models[lang_pair], + sample[sample_key], + sample_key, + self.lambda_otf_bt, + ) + + if self.lambda_denoising > 0.0: + for lang_pair in self.lang_pairs: + _, tgt = lang_pair.split("-") + sample_key = _get_denoising_dataset_key(lang_pair) + forward_backward( + model.models["{0}-{0}".format(tgt)], + sample[sample_key], + sample_key, + self.lambda_denoising, + ) + + return agg_loss, agg_sample_size, agg_logging_output + + def update_step(self, num_updates): + def lambda_step_func(config, n_iter): + """ + Update a lambda value according to its schedule configuration. + """ + ranges = [ + i + for i in range(len(config) - 1) + if config[i][0] <= n_iter < config[i + 1][0] + ] + if len(ranges) == 0: + assert n_iter >= config[-1][0] + return config[-1][1] + assert len(ranges) == 1 + i = ranges[0] + x_a, y_a = config[i] + x_b, y_b = config[i + 1] + return y_a + (n_iter - x_a) * float(y_b - y_a) / float(x_b - x_a) + + if self.lambda_parallel_steps is not None: + self.lambda_parallel = lambda_step_func( + self.lambda_parallel_steps, num_updates + ) + if self.lambda_denoising_steps is not None: + self.lambda_denoising = lambda_step_func( + self.lambda_denoising_steps, num_updates + ) + if self.lambda_otf_bt_steps is not None: + self.lambda_otf_bt = lambda_step_func(self.lambda_otf_bt_steps, num_updates) diff --git a/fairseq/fairseq/tasks/sentence_prediction.py b/fairseq/fairseq/tasks/sentence_prediction.py new file mode 100644 index 0000000..de80add --- /dev/null +++ b/fairseq/fairseq/tasks/sentence_prediction.py @@ -0,0 +1,303 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os + +import contextlib +from dataclasses import dataclass, field +from typing import Optional +from omegaconf import MISSING, II, open_dict, OmegaConf + +import numpy as np +from fairseq.data import ( + ConcatSentencesDataset, + Dictionary, + IdDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + OffsetTokensDataset, + PrependTokenDataset, + RawLabelDataset, + RightPadDataset, + RightPaddingMaskDataset, + RollDataset, + SortDataset, + StripTokenDataset, + data_utils, +) +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.tasks import FairseqDataclass, FairseqTask, register_task +from fairseq.dataclass import ChoiceEnum + + +logger = logging.getLogger(__name__) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) + + +@dataclass +class SentencePredictionConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "path to data directory"}) + num_classes: int = field( + default=-1, + metadata={"help": "number of classes or regression targets"}, + ) + init_token: Optional[int] = field( + default=None, + metadata={"help": "add token at the beginning of each batch item"}, + ) + separator_token: Optional[int] = field( + default=None, + metadata={"help": "add separator token between inputs"}, + ) + no_shuffle: bool = field( + default=False, + ) + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed tokens_per_sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + add_prev_output_tokens: bool = field( + default=False, + metadata={ + "help": "add prev_output_tokens to sample, used for encoder-decoder arch" + }, + ) + max_positions: int = field( + default=512, + metadata={"help": "max tokens per example"}, + ) + + regression_target: bool = II("criterion.regression_target") + classification_head_name: str = II("criterion.classification_head_name") + seed: int = II("common.seed") + + d2v2_multi: bool = field( + default=False, + metadata={"help": "prepare dataset for data2vec_multi"}, + ) + + +@register_task("sentence_prediction", dataclass=SentencePredictionConfig) +class SentencePredictionTask(FairseqTask): + """ + Sentence (or sentence pair) prediction (classification or regression) task. + + Args: + dictionary (Dictionary): the dictionary for the input of the task + """ + + def __init__(self, cfg, data_dictionary, label_dictionary): + super().__init__(cfg) + self.dictionary = data_dictionary + self._label_dictionary = label_dictionary + + @classmethod + def load_dictionary(cls, filename): + """Load the dictionary from the filename + + Args: + filename (str): the filename + """ + dictionary = Dictionary.load(filename) + dictionary.add_symbol("<mask>") + return dictionary + + @classmethod + def setup_task(cls, cfg, **kwargs): + assert cfg.num_classes > 0, "Must set task.num_classes" + + # load data dictionary + data_dict = cls.load_dictionary( + os.path.join(cfg.data, "input0", "dict.txt"), + ) + logger.info("[input] dictionary: {} types".format(len(data_dict))) + + # load label dictionary + if not cfg.regression_target: + label_dict = cls.load_dictionary( + os.path.join(cfg.data, "label", "dict.txt"), + ) + logger.info("[label] dictionary: {} types".format(len(label_dict))) + else: + label_dict = data_dict + return cls(cfg, data_dict, label_dict) + + def load_dataset(self, split, combine=False, **kwargs): + """Load a given dataset split (e.g., train, valid, test).""" + + def get_path(key, split): + return os.path.join(self.cfg.data, key, split) + + def make_dataset(key, dictionary): + split_path = get_path(key, split) + + try: + dataset = data_utils.load_indexed_dataset( + split_path, + dictionary, + combine=combine, + ) + except Exception as e: + if "StorageException: [404] Path not found" in str(e): + logger.warning(f"dataset {e} not found") + dataset = None + else: + raise e + return dataset + + input0 = make_dataset("input0", self.source_dictionary) + assert input0 is not None, "could not find dataset: {}".format( + get_path("input0", split) + ) + input1 = make_dataset("input1", self.source_dictionary) + + if self.cfg.init_token is not None: + input0 = PrependTokenDataset(input0, self.cfg.init_token) + + if input1 is None: + src_tokens = input0 + else: + if self.cfg.separator_token is not None: + input1 = PrependTokenDataset(input1, self.cfg.separator_token) + + src_tokens = ConcatSentencesDataset(input0, input1) + + with data_utils.numpy_seed(self.cfg.seed): + shuffle = np.random.permutation(len(src_tokens)) + + src_tokens = maybe_shorten_dataset( + src_tokens, + split, + self.cfg.shorten_data_split_list, + self.cfg.shorten_method, + self.max_positions(), + self.cfg.seed, + ) + + if self.cfg.d2v2_multi: + net_input = { + "source": RightPadDataset( + src_tokens, + pad_idx=self.source_dictionary.pad(), + ), + "id": IdDataset(), + "padding_mask": RightPaddingMaskDataset(src_tokens), + } + else: + net_input = { + "src_tokens": RightPadDataset( + src_tokens, + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": NumelDataset(src_tokens, reduce=False), + } + if self.cfg.add_prev_output_tokens: + prev_tokens_dataset = RightPadDataset( + RollDataset(src_tokens, 1), + pad_idx=self.dictionary.pad(), + ) + net_input.update( + prev_output_tokens=prev_tokens_dataset, + ) + + dataset = { + "id": IdDataset(), + "net_input": net_input, + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_tokens, reduce=True), + } + + if not self.cfg.regression_target: + label_dataset = make_dataset("label", self.label_dictionary) + if label_dataset is not None: + dataset.update( + target=OffsetTokensDataset( + StripTokenDataset( + label_dataset, + id_to_strip=self.label_dictionary.eos(), + ), + offset=-self.label_dictionary.nspecial, + ) + ) + else: + label_path = "{0}.label".format(get_path("label", split)) + if os.path.exists(label_path): + + def parse_regression_target(i, line): + values = line.split() + assert ( + len(values) == self.cfg.num_classes + ), f'expected num_classes={self.cfg.num_classes} regression target values on line {i}, found: "{line}"' + return [float(x) for x in values] + + with open(label_path) as h: + dataset.update( + target=RawLabelDataset( + [ + parse_regression_target(i, line.strip()) + for i, line in enumerate(h.readlines()) + ] + ) + ) + + nested_dataset = NestedDictionaryDataset( + dataset, + sizes=[src_tokens.sizes], + ) + + if self.cfg.no_shuffle: + dataset = nested_dataset + else: + dataset = SortDataset( + nested_dataset, + # shuffle + sort_order=[shuffle], + ) + + logger.info("Loaded {0} with #samples: {1}".format(split, len(dataset))) + + self.datasets[split] = dataset + return self.datasets[split] + + def build_model(self, cfg, from_checkpoint=False): + from fairseq import models + + with open_dict(cfg) if OmegaConf.is_config(cfg) else contextlib.ExitStack(): + cfg.max_positions = self.cfg.max_positions + + model = models.build_model(cfg, self, from_checkpoint) + + model.register_classification_head( + self.cfg.classification_head_name, + num_classes=self.cfg.num_classes, + ) + + return model + + def max_positions(self): + return self.cfg.max_positions + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + @property + def label_dictionary(self): + return self._label_dictionary diff --git a/fairseq/fairseq/tasks/sentence_prediction_adapters.py b/fairseq/fairseq/tasks/sentence_prediction_adapters.py new file mode 100644 index 0000000..afe5569 --- /dev/null +++ b/fairseq/fairseq/tasks/sentence_prediction_adapters.py @@ -0,0 +1,56 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import contextlib +from omegaconf import open_dict, OmegaConf + +from fairseq.tasks import register_task +from fairseq.tasks.sentence_prediction import ( + SentencePredictionTask, + SentencePredictionConfig, +) + + +logger = logging.getLogger(__name__) + + +@register_task("sentence_prediction_adapters", dataclass=SentencePredictionConfig) +class SentencePredictionAdapterTask(SentencePredictionTask): + def build_model(self, cfg): + from fairseq import models + + with open_dict(cfg) if OmegaConf.is_config(cfg) else contextlib.ExitStack(): + cfg.max_positions = self.cfg.max_positions + + model = models.build_model(cfg, self) + + model.register_classification_head( + self.cfg.classification_head_name, + num_classes=self.cfg.num_classes, + ) + + logger.info("Freezing Embedding Parameters") + for parameter in model.encoder.sentence_encoder.embed_positions.parameters(): + parameter.requires_grad = False + for ( + parameter + ) in model.encoder.sentence_encoder.layernorm_embedding.parameters(): + parameter.requires_grad = False + for parameter in model.encoder.sentence_encoder.embed_tokens.parameters(): + parameter.requires_grad = False + + logger.info("Freezing Adapters") + for k, v in model.encoder.sentence_encoder.layers._modules.items(): + logger.info("Freezing Adapters in Layer " + str(k)) + if hasattr(v, "adapter_layer_norm"): + logger.info("Freezing Adapter LN") + for parameter in v.adapter_layer_norm.parameters(): + parameter.requires_grad = False + for parameter in v.adapter_modules.parameters(): + parameter.requires_grad = False + + return model diff --git a/fairseq/fairseq/tasks/sentence_ranking.py b/fairseq/fairseq/tasks/sentence_ranking.py new file mode 100644 index 0000000..57f63aa --- /dev/null +++ b/fairseq/fairseq/tasks/sentence_ranking.py @@ -0,0 +1,219 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os + +import numpy as np +from fairseq import utils +from fairseq.data import ( + ConcatSentencesDataset, + Dictionary, + IdDataset, + NestedDictionaryDataset, + NumelDataset, + NumSamplesDataset, + PrependTokenDataset, + RawLabelDataset, + RightPadDataset, + SortDataset, + TruncateDataset, + data_utils, +) +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.tasks import LegacyFairseqTask, register_task + + +logger = logging.getLogger(__name__) + + +@register_task("sentence_ranking") +class SentenceRankingTask(LegacyFairseqTask): + """ + Ranking task on multiple sentences. + + Args: + dictionary (Dictionary): the dictionary for the input of the task + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + parser.add_argument("data", metavar="FILE", help="file prefix for data") + parser.add_argument( + "--num-classes", type=int, help="number of sentences to be ranked" + ) + parser.add_argument( + "--init-token", + type=int, + help="add token at the beginning of each batch item", + ) + parser.add_argument( + "--separator-token", type=int, help="add separator token between inputs" + ) + parser.add_argument("--no-shuffle", action="store_true") + parser.add_argument( + "--shorten-method", + default="none", + choices=["none", "truncate", "random_crop"], + help="if not none, shorten sequences that exceed --tokens-per-sample", + ) + parser.add_argument( + "--shorten-data-split-list", + default="", + help="comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)', + ) + parser.add_argument( + "--max-option-length", type=int, help="max length for each option" + ) + + def __init__(self, args, dictionary): + super().__init__(args) + self.dictionary = dictionary + + @classmethod + def load_dictionary(cls, args, filename, source=True): + """Load the dictionary from the filename + + Args: + filename (str): the filename + """ + dictionary = Dictionary.load(filename) + dictionary.add_symbol("<mask>") + return dictionary + + @classmethod + def setup_task(cls, args, **kwargs): + assert ( + args.criterion == "sentence_ranking" + ), "Must set --criterion=sentence_ranking" + + # load data dictionary + data_dict = cls.load_dictionary( + args, + os.path.join(args.data, "input0", "dict.txt"), + source=True, + ) + logger.info("[input] dictionary: {} types".format(len(data_dict))) + return SentenceRankingTask(args, data_dict) + + def load_dataset(self, split, combine=False, **kwargs): + """Load a given dataset split (e.g., train, valid, test).""" + + def get_path(type, split): + return os.path.join(self.args.data, type, split) + + def make_dataset(type, dictionary): + split_path = get_path(type, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.source_dictionary, + self.args.dataset_impl, + combine=combine, + ) + return dataset + + input0 = make_dataset("input0", self.source_dictionary) + input_options = [ + make_dataset("input{idx}".format(idx=idx + 1), self.source_dictionary) + for idx in range(self.args.num_classes) + ] + + if self.args.separator_token is not None: + input0 = PrependTokenDataset(input0, self.args.separator_token) + + src_tokens = [] + for input_option in input_options: + if self.args.init_token is not None: + input_option = PrependTokenDataset(input_option, self.args.init_token) + if self.args.max_option_length is not None: + input_option = TruncateDataset( + input_option, self.args.max_option_length + ) + src_token = ConcatSentencesDataset(input_option, input0) + src_token = maybe_shorten_dataset( + src_token, + split, + self.args.shorten_data_split_list, + self.args.shorten_method, + self.args.max_positions, + self.args.seed, + ) + src_tokens.append(src_token) + + with data_utils.numpy_seed(self.args.seed): + shuffle = np.random.permutation(len(src_tokens[0])) + + dataset = { + "id": IdDataset(), + "nsentences": NumSamplesDataset(), + "ntokens": NumelDataset(src_tokens[0], reduce=True), + } + + for src_token_idx in range(len(src_tokens)): + dataset.update( + { + "net_input{idx}".format(idx=src_token_idx + 1): { + "src_tokens": RightPadDataset( + src_tokens[src_token_idx], + pad_idx=self.source_dictionary.pad(), + ), + "src_lengths": NumelDataset( + src_tokens[src_token_idx], reduce=False + ), + } + } + ) + + label_path = "{}.label".format(get_path("label", split)) + if os.path.exists(label_path): + with open(label_path) as h: + dataset.update( + target=RawLabelDataset([int(x.strip()) for x in h.readlines()]) + ) + + nested_dataset = NestedDictionaryDataset( + dataset, + sizes=[np.maximum.reduce([src_token.sizes for src_token in src_tokens])], + ) + + if self.args.no_shuffle: + dataset = nested_dataset + else: + dataset = SortDataset( + nested_dataset, + # shuffle + sort_order=[shuffle], + ) + + logger.info("Loaded {0} with #samples: {1}".format(split, len(dataset))) + + self.datasets[split] = dataset + return self.datasets[split] + + def build_model(self, args, from_checkpoint=False): + from fairseq import models + + model = models.build_model(args, self, from_checkpoint) + + model.register_classification_head( + getattr(args, "ranking_head_name", "sentence_classification_head"), + num_classes=1, + ) + + return model + + def max_positions(self): + return self.args.max_positions + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary diff --git a/fairseq/fairseq/tasks/simultaneous_translation.py b/fairseq/fairseq/tasks/simultaneous_translation.py new file mode 100644 index 0000000..9576b26 --- /dev/null +++ b/fairseq/fairseq/tasks/simultaneous_translation.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from fairseq.tasks import register_task +from fairseq.tasks.speech_to_text import SpeechToTextTask +from fairseq.tasks.translation import TranslationTask, TranslationConfig + +try: + import examples.simultaneous_translation # noqa + + import_successful = True +except BaseException: + import_successful = False + + +logger = logging.getLogger(__name__) + + +def check_import(flag): + if not flag: + raise ImportError( + "'examples.simultaneous_translation' is not correctly imported. " + "Please considering `pip install -e $FAIRSEQ_DIR`." + ) + + +@register_task("simul_speech_to_text") +class SimulSpeechToTextTask(SpeechToTextTask): + def __init__(self, args, tgt_dict): + check_import(import_successful) + super().__init__(args, tgt_dict) + + +@register_task("simul_text_to_text", dataclass=TranslationConfig) +class SimulTextToTextTask(TranslationTask): + def __init__(self, cfg, src_dict, tgt_dict): + check_import(import_successful) + super().__init__(cfg, src_dict, tgt_dict) diff --git a/fairseq/fairseq/tasks/span_masked_lm.py b/fairseq/fairseq/tasks/span_masked_lm.py new file mode 100644 index 0000000..d746aa1 --- /dev/null +++ b/fairseq/fairseq/tasks/span_masked_lm.py @@ -0,0 +1,243 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +from omegaconf import II, MISSING + +from fairseq import utils +from fairseq.data import ( + AppendTokenDataset, + Dictionary, + IdDataset, + NestedDictionaryDataset, + NumelDataset, + PadDataset, + PrependTokenDataset, + StripTokenDataset, + TokenBlockDataset, + data_utils, +) +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.data.span_mask_tokens_dataset import SpanMaskedTokensDataset +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + +from ..data.indexed_dataset import get_available_dataset_impl + +logger = logging.getLogger(__name__) + +SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"]) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) + + +@dataclass +class SpanMaskedLMConfig(FairseqDataclass): + shuffle: bool = field( + default=False, + ) + noise_density: float = field( + default=0.15, + metadata={"help": "What fraction of the tokens to select as noise"}, + ) + mean_noise_span_length: float = field( + default=3, + metadata={"help": "Mean noise span length, must be >= 1"}, + ) + data: str = field( + default=MISSING, + metadata={ + "help": "colon separated path to data directories list, " + "will be iterated upon during epochs in round-robin manner" + }, + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="none", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + tokens_per_sample: int = field( + default=1024, + metadata={"help": "max number of tokens per sample for LM dataset"}, + ) + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + seed: int = II("common.seed") + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + max_source_positions: int = field( + default=1024, metadata={"help": "max number of tokens in the source sequence"} + ) + max_target_positions: int = field( + default=1024, metadata={"help": "max number of tokens in the target sequence"} + ) + include_target_tokens: bool = field( + default=False, + metadata={ + "help": "include target tokens in model input. this is used for data2vec" + }, + ) + + +@register_task("span_masked_lm", dataclass=SpanMaskedLMConfig) +class SpanMaskedLMTask(FairseqTask): + """ + Span masked language modeling task. (ie. T5) + """ + + cfg: SpanMaskedLMConfig + + def __init__(self, cfg, dictionary): + super().__init__(cfg) + self.dictionary = dictionary + + @classmethod + def setup_task(cls, cfg: SpanMaskedLMConfig, **kwargs): + """Setup the task.""" + paths = utils.split_paths(cfg.data) + assert len(paths) > 0 + dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt")) + logger.info("dictionary: {} types".format(len(dictionary))) + if not hasattr(cfg, "shuffle"): + cfg.shuffle = False + return cls(cfg, dictionary) + + def _load_dataset_split(self, split, epoch, combine): + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = os.path.join(data_path, split) + + dataset = data_utils.load_indexed_dataset( + split_path, + self.dictionary, + self.cfg.dataset_impl, + combine=combine, + ) + if dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, split_path) + ) + + dataset = StripTokenDataset(dataset, self.dictionary.eos()) + + dataset = maybe_shorten_dataset( + dataset, + split, + self.cfg.shorten_data_split_list, + self.cfg.shorten_method, + self.cfg.tokens_per_sample, + self.cfg.seed, + ) + + # create continuous blocks of tokens + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.cfg.tokens_per_sample - 2, # one less for <s> and one for </s> + pad=self.dictionary.pad(), + eos=self.dictionary.eos(), + break_mode=self.cfg.sample_break_mode, + document_sep_len=0, + ) + logger.info("loaded {} blocks from: {}".format(len(dataset), split_path)) + + # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT) + dataset = PrependTokenDataset(dataset, self.source_dictionary.bos()) + dataset = AppendTokenDataset(dataset, self.source_dictionary.eos()) + return dataset + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + dataset = self._load_dataset_split(split, epoch, combine) + + self.datasets[split] = SpanMaskedTokensDataset( + dataset, + self.dictionary, + noise_density=self.cfg.noise_density, + mean_noise_span_length=self.cfg.mean_noise_span_length, + shuffle=self.cfg.shuffle, + seed=self.cfg.seed, + ) + logger.info( + "Split: {0}, Loaded {1} samples of span_masked_tokens_dataset".format( + split, + len(self.datasets[split]), + ) + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + """ + Generate batches for inference. We assume that the input begins with a + bos symbol (`<s>`) and ends with an eos symbol (`</s>`). + """ + pad = self.source_dictionary.pad() + eos = self.source_dictionary.eos() + src_dataset = TokenBlockDataset( + src_tokens, + src_lengths, + block_size=self.cfg.tokens_per_sample - 2, # for <s> and </s> + pad=pad, + eos=eos, + break_mode=self.cfg.sample_break_mode, + document_sep_len=0, + ) + prev_output_tokens = PrependTokenDataset( + StripTokenDataset(src_dataset, eos), eos + ) + src_dataset = PadDataset(src_dataset, pad_idx=pad, left_pad=False) + return NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": src_dataset, + "src_lengths": NumelDataset(src_dataset, reduce=False), + "prev_output_tokens": PadDataset( + prev_output_tokens, pad_idx=pad, left_pad=False + ), + }, + "target": src_dataset, + }, + sizes=[np.array(src_lengths)], + ) + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + return (self.cfg.max_source_positions, self.cfg.max_target_positions) + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary`.""" + return self.dictionary + + @property + def target_dictionary(self): + """Return the target :class:`~fairseq.data.Dictionary`.""" + return self.dictionary diff --git a/fairseq/fairseq/tasks/speech_dlm_task.py b/fairseq/fairseq/tasks/speech_dlm_task.py new file mode 100644 index 0000000..340732b --- /dev/null +++ b/fairseq/fairseq/tasks/speech_dlm_task.py @@ -0,0 +1,561 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional +from collections import OrderedDict + +import numpy as np +import torch +from fairseq import utils +from fairseq.data import ( + AppendTokenDataset, + Dictionary, + IdDataset, + LMContextWindowDataset, + MonolingualDataset, + NestedDictionaryDataset, + NumelDataset, + PadDataset, + PrependTokenDataset, + SpeechDLMDataset, + StripTokenDataset, + TokenBlockDataset, + TruncatedDictionary, + data_utils, +) +from fairseq.data.indexed_dataset import get_available_dataset_impl +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import LegacyFairseqTask, register_task +from omegaconf import II + + +SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"]) +SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"]) +logger = logging.getLogger(__name__) + + +@dataclass +class SpeechDLMConfig(FairseqDataclass): + data: Optional[str] = field( + default=None, metadata={"help": "path to data directory"} + ) + channels: Optional[str] = field( + default=None, + metadata={ + "help": 'comma-separated list of channels to load e.g., "unitA,unitB"' + "(default: load all possible channels in the data path)" + }, + ) + channel_weights: Optional[str] = field( + default=None, + metadata={ + "help": "comma-separated list of weights for different losses" + "(default: None, which means all losses are treated equally)" + }, + ) + sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field( + default="none", + metadata={ + "help": 'If omitted or "none", fills each sample with tokens-per-sample ' + 'tokens. If set to "complete", splits samples only at the end ' + "of sentence, but may include multiple sentences per sample. " + '"complete_doc" is similar but respects doc boundaries. ' + 'If set to "eos", includes only one sentence per sample.' + }, + ) + tokens_per_sample: int = field( + default=1024, + metadata={"help": "max number of tokens per sample for LM dataset"}, + ) + output_dictionary_size: int = field( + default=-1, metadata={"help": "limit the size of output dictionary"} + ) + # str type is a workaround to put **default=True** here + next_unit_prediction: str = field( + default="False", + metadata={ + "help": "Perform Next Unit Prediction, expected str input ('True' or 'False')" + }, + ) + edge_unit_prediction: str = field( + default="True", + metadata={ + "help": "Perform Edge Unit Prediction, expected str input ('True' or 'False')" + }, + ) + duration_prediction: str = field( + default="True", + metadata={ + "help": "Perform Duration Prediction, expected str input ('True' or 'False')" + }, + ) + delayed_duration_target: str = field( + default="True", + metadata={ + "help": "Perform Delayed Duration Prediction, expected str input ('True' or 'False')" + "(default: 'True')" + }, + ) + max_target_durations: Optional[int] = field( + default=256, + metadata={"help": "max duration considered (cut off to this value)"}, + ) + add_bos_token: bool = field( + default=False, metadata={"help": "prepend beginning of sentence token (<s>)"} + ) + max_target_positions: Optional[int] = field( + default=None, metadata={"help": "max number of tokens in the target sequence"} + ) + shorten_method: SHORTEN_METHOD_CHOICES = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed --tokens-per-sample" + }, + ) + shorten_data_split_list: str = field( + default="", + metadata={ + "help": "comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)' + }, + ) + # TODO common vars below add to parent + seed: int = II("common.seed") + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + data_buffer_size: int = II("dataset.data_buffer_size") + tpu: bool = II("common.tpu") + + +@register_task("speech_dlm_task", dataclass=SpeechDLMConfig) +class SpeechDLMTask(LegacyFairseqTask): + """Task for the SpeechDLM model as described in the paper: + https://arxiv.org/pdf/2203.16502.pdf + + It create a multi-channel dataset (SpeechDLMDataset) from multiple + dictionaries. + + Args: + dictionaries (Dict[str, ~fairseq.data.Dictionary]): the dictionaries for + each input channel of the SpeechDLM model + output_dictionaries (Dict[str, ~fairseq.data.Dictionary]): the dictionaries + for the output of each channel of the SpeechDLM model. In most cases it + will be the same as *dictionaries*. + targets (List[str]): list of the target types that the SpeechDLM model + should predict. Can be one of "next", "edge", "duration". + Defaults to "next". + + .. note:: + + The SpeechDLM task is only compatible with + :mod:`fairseq-train` and :mod:`fairseq-validate`. + To generate new samples, please refer to example codes + at examples/textless_nlp/dgslm . + """ + + def __init__(self, args, dicts, output_dicts=None, targets=None): + super().__init__(args) + self.dicts = dicts + self.output_dicts = output_dicts or dicts + + if targets is None: + targets = ["next"] + self.targets = targets + + self.channels = list(dicts.keys()) + + if args.channel_weights is not None: + self.channel_weights = [float(w) for w in args.channel_weights.split(",")] + else: + self.channel_weights = [1.0 for _ in self.channels] + assert len(self.channel_weights) == len( + self.channels + ), "number of channel_weights must be the same as number of channels" + + assert str(args.next_unit_prediction).lower() in [ + "true", + "false", + ], f"Expected to be a string of boolean, found {args.next_unit_prediction}" + assert str(args.edge_unit_prediction).lower() in [ + "true", + "false", + ], f"Expected to be a string of boolean, found {args.edge_unit_prediction}" + assert str(args.duration_prediction).lower() in [ + "true", + "false", + ], f"Expected to be a string of boolean, found {args.duration_prediction}" + assert str(args.delayed_duration_target).lower() in [ + "true", + "false", + ], f"Expected to be a string of boolean, found {args.delayed_duration_target}" + self.next_unit_prediction = bool( + str(args.next_unit_prediction).lower() == "true" + ) + self.edge_unit_prediction = bool( + str(args.edge_unit_prediction).lower() == "true" + ) + self.duration_prediction = bool(str(args.duration_prediction).lower() == "true") + self.delayed_duration_target = bool( + str(args.delayed_duration_target).lower() == "true" + ) + + self.max_target_durations = args.max_target_durations + + @classmethod + def setup_dictionary(cls, args, **kwargs): + """The dictionaries will be a dict over channel keys and values of type + ~fairseq.data.Dictionary. + """ + paths = utils.split_paths(args.data) + assert len(paths) > 0 + data_path = paths[0] + + dicts = None + output_dicts = None + if args.channels is None: + sorted_channels = sorted( + name[5:-4] + for name in os.listdir(data_path) + if name[:5] == "dict." and name[-4:] == ".txt" + ) + else: + sorted_channels = sorted(args.channels.split(",")) + logger.info("channels: {}".format(sorted_channels)) + # load dictionaries + dicts = OrderedDict() + output_dicts = OrderedDict() + for channel in sorted_channels: + dictionary = Dictionary.load( + os.path.join(data_path, "dict.{}.txt".format(channel)) + ) + logger.info("[{}] dictionary: {} types".format(channel, len(dictionary))) + output_dictionary = dictionary + if args.output_dictionary_size >= 0: + output_dictionary = TruncatedDictionary( + dictionary, args.output_dictionary_size + ) + dicts[channel] = dictionary + output_dicts[channel] = output_dictionary + if len(dicts) > 0: + assert dicts[channel].pad() == dicts[sorted_channels[0]].pad() + assert dicts[channel].bos() == dicts[sorted_channels[0]].bos() + assert dicts[channel].eos() == dicts[sorted_channels[0]].eos() + assert dicts[channel].unk() == dicts[sorted_channels[0]].unk() + return (dicts, output_dicts) + + @classmethod + def setup_task(cls, args, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + dicts, output_dicts = cls.setup_dictionary(args, **kwargs) + + targets = [] + if str(getattr(args, "next_unit_prediction", "false")).lower() == "true": + targets.append("next") + if str(getattr(args, "edge_unit_prediction", "false")).lower() == "true": + targets.append("edge") + if str(getattr(args, "duration_prediction", "false")).lower() == "true": + targets.append("duration") + if len(targets) == 0: + # standard language modeling + targets = ["next"] + + return cls(args, dicts, output_dicts, targets=targets) + + def build_model(self, args): + model = super().build_model(args) + for target in self.targets: + if target not in model.supported_targets: + raise ValueError("Unsupported SpeechDLM target: {}".format(target)) + return model + + def load_dataset( + self, split: str, epoch=1, combine=False, **kwargs + ) -> SpeechDLMDataset: + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + + data_path = paths[(epoch - 1) % len(paths)] + + channel_datasets = {} + for channel in self.channels: + split_path = os.path.join(data_path, split + "." + channel) + dictionary = self.dicts[channel] + output_dictionary = self.output_dicts[channel] + + dataset = data_utils.load_indexed_dataset( + split_path, dictionary, self.args.dataset_impl, combine=combine + ) + + if dataset is None: + raise FileNotFoundError( + "[{}] Dataset not found: {} ({})".format(channel, split, split_path) + ) + + dataset = maybe_shorten_dataset( + dataset, + split, + self.args.shorten_data_split_list, + self.args.shorten_method, + self.args.tokens_per_sample, + self.args.seed, + ) + + dataset = TokenBlockDataset( + dataset, + dataset.sizes, + self.args.tokens_per_sample, + pad=dictionary.pad(), + eos=dictionary.eos(), + break_mode=self.args.sample_break_mode, + include_targets=True, + ) + + add_eos_for_other_targets = ( + self.args.sample_break_mode is not None + and self.args.sample_break_mode != "none" + ) + + channel_datasets[channel] = MonolingualDataset( + dataset=dataset, + sizes=dataset.sizes, + src_vocab=dictionary, + tgt_vocab=output_dictionary, + add_eos_for_other_targets=add_eos_for_other_targets, + shuffle=False, + targets=["future"], + add_bos_token=self.args.add_bos_token, + ) + + self.datasets[split] = SpeechDLMDataset( + datasets=channel_datasets, + targets=self.targets, + max_target_durations=self.max_target_durations, + shuffle=True, + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + """ + Generate batches for inference. We prepend an eos token to src_tokens + (or bos if `--add-bos-token` is set) and we append a <pad> to target. + This is convenient both for generation with a prefix and LM scoring. + """ + src_datasets = {} + tgt_datasets = {} + for channel in src_tokens[0]: + dataset = StripTokenDataset( + TokenBlockDataset( + [src_tokens[i][channel] for i in range(len(src_tokens))], + src_lengths, + block_size=None, # ignored for "eos" break mode + pad=self.source_dictionaries[channel].pad(), + eos=self.source_dictionaries[channel].eos(), + break_mode="eos", + ), + # remove eos from (end of) target sequence + self.source_dictionaries[channel].eos(), + ) + src_dataset = PrependTokenDataset( + dataset, + token=( + self.source_dictionaries[channel].bos() + if getattr(self.args, "add_bos_token", False) + else self.source_dictionaries[channel].eos() + ), + ) + tgt_dataset = AppendTokenDataset( + dataset, token=self.source_dictionaries[channel].pad() + ) + + src_datasets[channel] = src_dataset + tgt_datasets[channel] = tgt_dataset + + return NestedDictionaryDataset( + { + "id": IdDataset(), + "net_input": { + "src_tokens": OrderedDict( + [ + ( + channel, + PadDataset( + src_datasets[channel], + pad_idx=self.source_dictionaries[channel].pad(), + left_pad=False, + ), + ) + for channel in src_datasets + ] + ), + "src_lengths": NumelDataset( + next(iter(src_datasets.values())), reduce=False + ), + }, + "target": OrderedDict( + [ + ( + channel, + PadDataset( + tgt_datasets[channel], + pad_idx=self.source_dictionaries[channel].pad(), + left_pad=False, + ), + ) + for channel in tgt_datasets + ] + ), + }, + sizes=[np.array(src_lengths)], + ) + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + # Generation will always be conditioned on bos_token + if getattr(self.args, "add_bos_token", False): + bos_token = self.source_dictionary.bos() + else: + bos_token = self.source_dictionary.eos() + + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the SpeechDLM task is not supported" + ) + # SequenceGenerator doesn't use src_tokens directly, we need to + # pass the `prefix_tokens` argument instead + if prefix_tokens is None: + prefix_tokens = {} + for channel in sample["net_input"]["src_tokens"]: + if sample["net_input"]["src_tokens"][channel].nelement(): + prefix_tokens_channel = sample["net_input"]["src_tokens"][ + channel + ] + if prefix_tokens_channel[:, 0].eq(bos_token).all(): + prefix_tokens_channel = prefix_tokens_channel[:, 1:] + prefix_tokens[channel] = prefix_tokens_channel + else: + prefix_tokens = None + break + return generator.generate( + models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token + ) + + def eval_lm_dataloader( + self, + dataset, + max_tokens: Optional[int] = 36000, + batch_size: Optional[int] = None, + max_positions: Optional[int] = None, + num_shards: int = 1, + shard_id: int = 0, + num_workers: int = 1, + data_buffer_size: int = 10, + # ensures that every evaluated token has access to a context of at least + # this size, if possible + context_window: int = 0, + ): + if context_window > 0: + dataset = LMContextWindowDataset( + dataset=dataset, + tokens_per_sample=self.args.tokens_per_sample, + context_window=context_window, + pad_idx=self.source_dictionary.pad(), + ) + return self.get_batch_iterator( + dataset=dataset, + max_tokens=max_tokens, + max_sentences=batch_size, + max_positions=max_positions, + ignore_invalid_inputs=True, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + data_buffer_size=data_buffer_size, + ).next_epoch_itr(shuffle=False) + + @property + def source_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.dicts[self.channels[0]] + + @property + def target_dictionary(self): + """Return the :class:`~fairseq.data.Dictionary` for the language + model.""" + return self.output_dicts[self.channels[0]] + + @property + def source_dictionaries(self): + """Return the dict of :class:`~fairseq.data.Dictionary` for the + multichannel language model.""" + return self.dicts + + @property + def target_dictionaries(self): + """Return the dict of :class:`~fairseq.data.Dictionary` for the + multichannel language model.""" + return self.output_dicts + + def build_generator(self, models, args, extra_gen_cls_kwargs=None): + + from fairseq.models.speech_dlm.sequence_generator import ( + multichannel_search, + MultichannelSequenceGenerator, + ) + + # Choose search strategy. Defaults to Beam Search. + sampling = getattr(args, "sampling", False) + sampling_topk = getattr(args, "sampling_topk", -1) + sampling_topp = getattr(args, "sampling_topp", -1.0) + assert ( + sampling_topk < 0 or sampling + ), "--sampling-topk requires sampling (not beam search)" + assert ( + sampling_topp < 0 or sampling + ), "--sampling-topp requires sampling (not beam search)" + + if sampling: + search_strategy = multichannel_search.ContiguousMultichannelSampling( + self.target_dictionaries, sampling_topk, sampling_topp + ) + else: + search_strategy = multichannel_search.ContiguousMultichannelBeamSearch( + self.target_dictionaries + ) + + extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} + + return MultichannelSequenceGenerator( + models, + self.target_dictionaries, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 500), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + search_strategy=search_strategy, + duration_temperature=getattr(args, "duration_temperature", 1.0), + **extra_gen_cls_kwargs, + ) diff --git a/fairseq/fairseq/tasks/speech_to_speech.py b/fairseq/fairseq/tasks/speech_to_speech.py new file mode 100644 index 0000000..5aaaa95 --- /dev/null +++ b/fairseq/fairseq/tasks/speech_to_speech.py @@ -0,0 +1,597 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import logging +import math +from argparse import Namespace +from pathlib import Path +from typing import List + +import torch +import torch.nn as nn + +from fairseq import utils +from fairseq.data import Dictionary +from fairseq.data.audio.data_cfg import MultitaskConfig, S2SDataConfig +from fairseq.data.audio.speech_to_speech_dataset import SpeechToSpeechDatasetCreator +from fairseq.data.audio.speech_to_text_dataset import ( + SpeechToTextDataset, + TextTargetMultitaskData, +) +from fairseq.tasks import LegacyFairseqTask, register_task +from fairseq.tasks.speech_to_text import DummyMultiTask +from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion + +logger = logging.getLogger(__name__) + + +class StackUnitSequenceGenerator(nn.Module): + def __init__(self, tgt_dict, vocab_size): + super().__init__() + self.pad = tgt_dict.pad() + self.eos = tgt_dict.eos() + self.unk = tgt_dict.unk() + self.offset = len(tgt_dict) - vocab_size + self.vocab_size = vocab_size + + def pack_units(self, input: torch.Tensor, n_frames_per_step) -> torch.Tensor: + if n_frames_per_step <= 1: + return input + + bsz, _, n = input.shape + assert n == n_frames_per_step + + scale = [ + pow(self.vocab_size, n_frames_per_step - 1 - i) + for i in range(n_frames_per_step) + ] + scale = torch.LongTensor(scale).squeeze(0).to(input.device) + mask = input >= self.offset + res = ((input - self.offset) * scale * mask).sum(dim=2) + self.offset + return res + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + # currently only support viterbi search for stacked units + model = models[0] + model.eval() + + max_len = model.max_decoder_positions() + # TODO: incorporate max_len_a and max_len_b + + src_tokens = sample["net_input"]["src_tokens"] + src_lengths = sample["net_input"]["src_lengths"] + bsz, src_len, _ = src_tokens.size() + n_frames_per_step = model.decoder.n_frames_per_step + + # initialize + encoder_out = model.forward_encoder( + src_tokens, src_lengths, speaker=sample["speaker"] + ) + incremental_state = {} + pred_out, attn, scores = [], [], [] + finished = src_tokens.new_zeros((bsz,)).bool() + + prev_output_tokens = src_lengths.new_zeros((bsz, 1)).long().fill_(self.eos) + for _ in range(max_len): + cur_out, cur_extra = model.forward_decoder( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + ) + + lprobs = model.get_normalized_probs([cur_out], log_probs=True) + # never select pad, unk + lprobs[:, :, self.pad] = -math.inf + lprobs[:, :, self.unk] = -math.inf + + cur_pred_lprob, cur_pred_out = torch.max(lprobs, dim=2) + scores.append(cur_pred_lprob) + pred_out.append(cur_pred_out) + + prev_output_tokens = torch.cat( + ( + prev_output_tokens, + self.pack_units( + cur_pred_out.view(bsz, 1, n_frames_per_step), n_frames_per_step + ), + ), + dim=1, + ) + + attn.append(cur_extra["attn"][0]) + + cur_finished = torch.any(cur_pred_out.squeeze(1) == self.eos, dim=1) + finished = finished | cur_finished + if finished.sum().item() == bsz: + break + + pred_out = torch.cat(pred_out, dim=1).view(bsz, -1) + attn = torch.cat(attn, dim=2) + alignment = attn.max(dim=1)[1] + attn = attn.repeat_interleave(n_frames_per_step, dim=2) + alignment = alignment.repeat_interleave(n_frames_per_step, dim=1) + scores = torch.cat(scores, dim=1) + eos_idx = (pred_out == self.eos).nonzero(as_tuple=True) + out_lens = src_lengths.new_zeros((bsz,)).long().fill_(max_len) + for b, l in zip(eos_idx[0], eos_idx[1]): + out_lens[b] = min(l, out_lens[b]) + + hypos = [ + [ + { + "tokens": pred_out[b, :out_len], + "attn": attn[b, :, :out_len], + "alignment": alignment[b, :out_len], + "positional_scores": scores[b, :out_len], + "score": utils.item(scores[b, :out_len].sum().data), + } + ] + for b, out_len in zip(range(bsz), out_lens) + ] + + return hypos + + +@register_task("speech_to_speech") +class SpeechToSpeechTask(LegacyFairseqTask): + @classmethod + def add_args(cls, parser): + parser.add_argument("data", help="manifest root path") + parser.add_argument( + "--config-yaml", + type=str, + default="config.yaml", + help="Configuration YAML filename (under manifest root)", + ) + parser.add_argument( + "--multitask-config-yaml", + type=str, + default=None, + help="Configuration YAML filename for the multitasks (under manifest root)", + ) + parser.add_argument( + "--max-source-positions", + default=6000, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + parser.add_argument( + "--target-is-code", + action="store_true", + help="set if target is discrete unit instead of spectrogram", + ) + parser.add_argument( + "--target-code-size", type=int, default=None, help="# discrete units" + ) + parser.add_argument( + "--n-frames-per-step", + type=int, + default=1, + help="# stacked frames, use 0 for reduced discrete unit sequence", + ) + parser.add_argument("--eval-inference", action="store_true") + parser.add_argument( + "--eval-args", + type=str, + default="{}", + help='generation args for speech-to-unit model , e.g., \'{"beam": 5, "max_len_a": 1}\', as JSON string', + ) + parser.add_argument("--eos-prob-threshold", type=float, default=0.5) + parser.add_argument( + "--mcd-normalize-type", + type=str, + default="targ", + choices=["targ", "pred", "path"], + ) + parser.add_argument( + "--vocoder", + type=str, + default="griffin_lim", + choices=["griffin_lim", "hifigan", "code_hifigan"], + ) + parser.add_argument("--spec-bwd-max-iter", type=int, default=8) + parser.add_argument( + "--infer-target-lang", + type=str, + default="", + help="target language for inference", + ) + + def __init__(self, args, tgt_dict, infer_tgt_lang_id=None): + super().__init__(args) + self.tgt_dict = tgt_dict + self.data_cfg = S2SDataConfig(Path(args.data) / args.config_yaml) + + self.multitask_tasks = {} + self.tgt_dict_mt = None + self.eos_token_mt = None + if getattr(args, "multitask_config_yaml", None) is not None: + multitask_cfg = MultitaskConfig( + Path(args.data) / args.multitask_config_yaml + ) + first_pass_task_idx = multitask_cfg.first_pass_decoder_task_index + for i, (task_name, task_config) in enumerate( + multitask_cfg.get_all_tasks().items() + ): + task_obj = DummyMultiTask( + task_config, + task_config.tgt_dict, + first_pass=i == first_pass_task_idx, + ) + self.multitask_tasks[task_name] = task_obj + if task_obj.is_first_pass_decoder: + self.tgt_dict_mt = task_obj.target_dictionary + if task_config.prepend_bos_and_append_tgt_lang_tag: + self.eos_token_mt = task_config.eos_token + assert not isinstance(self.eos_token_mt, List) + + if not self.eos_token_mt: + raise Warning( + "Please provide eos_token in --multitask-config-yaml to replace eos in sequence generator" + ) + + self._infer_tgt_lang_id = infer_tgt_lang_id + + @classmethod + def setup_task(cls, args, **kwargs): + data_cfg = data_cfg = S2SDataConfig(Path(args.data) / args.config_yaml) + tgt_dict = None + infer_tgt_lang_id = None + if args.target_is_code: + if data_cfg.prepend_tgt_lang_tag_as_bos: + # dictionary with language tags + dict_path = Path(args.data) / data_cfg.vocab_filename + if not dict_path.is_file(): + raise FileNotFoundError( + f"Dict has to be provided when setting prepend_tgt_lang_tag_as_bos: true, but dict not found: {dict_path}" + ) + tgt_dict = Dictionary.load(dict_path.as_posix()) + + # target langauge for inference + if args.infer_target_lang != "": + tgt_lang_tag = SpeechToTextDataset.LANG_TAG_TEMPLATE.format( + args.infer_target_lang + ) + infer_tgt_lang_id = tgt_dict.index(tgt_lang_tag) + assert infer_tgt_lang_id != tgt_dict.unk() + else: + assert args.target_code_size is not None + + tgt_dict = Dictionary() + for i in range(args.target_code_size): + tgt_dict.add_symbol(str(i)) + logger.info(f"dictionary size: " f"{len(tgt_dict):,}") + + if getattr(args, "train_subset", None) is not None: + if not all(s.startswith("train") for s in args.train_subset.split(",")): + raise ValueError('Train splits should be named like "train*".') + + assert args.n_frames_per_step >= 1 + assert ( + not args.eval_inference + or (args.target_is_code and args.vocoder == "code_hifigan") + or (not args.target_is_code and args.vocoder != "code_hifigan") + ) + + return cls(args, tgt_dict, infer_tgt_lang_id=infer_tgt_lang_id) + + def build_criterion(self, args): + from fairseq import criterions + + if len(self.multitask_tasks) > 0: + if self.args.target_is_code and not args._name.startswith("speech_to_unit"): + raise ValueError( + "set --criterion speech_to_unit for speech-to-unit loss with multitask" + ) + elif not self.args.target_is_code and not args._name.startswith( + "speech_to_spectrogram" + ): + raise ValueError( + "set --criterion speech_to_spectrogram for speech-to-spectrogram loss with multitask" + ) + + return criterions.build_criterion(args, self) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + self.datasets[split] = SpeechToSpeechDatasetCreator.from_tsv( + root=self.args.data, + data_cfg=self.data_cfg, + splits=split, + is_train_split=split.startswith("train"), + epoch=epoch, + seed=self.args.seed, + target_is_code=self.args.target_is_code, + tgt_dict=self.target_dictionary, + n_frames_per_step=self.args.n_frames_per_step, + multitask=self.multitask_tasks, + ) + + @property + def target_dictionary(self): + return self.tgt_dict + + @property + def target_dictionary_mt(self): + return self.tgt_dict_mt + + @property + def source_dictionary(self): + return None + + def max_positions(self): + return self.args.max_source_positions, self.args.max_target_positions + + def build_model(self, args, from_checkpoint=False): + args.input_feat_per_channel = self.data_cfg.input_feat_per_channel + args.input_channels = self.data_cfg.input_transformed_channels + args.target_speaker_embed = self.data_cfg.target_speaker_embed is not None + args.n_frames_per_step = self.args.n_frames_per_step + + model = super().build_model(args, from_checkpoint) + + if len(self.multitask_tasks) > 0: + from fairseq.models.speech_to_speech.s2s_transformer import ( + S2STransformerMultitaskModelBase, + ) + + assert isinstance(model, S2STransformerMultitaskModelBase) + + if self.args.eval_inference: + self.eval_gen_args = json.loads(self.args.eval_args) + self.generator = self.build_generator( + [model], Namespace(**self.eval_gen_args) + ) + + return model + + def build_generator_dual_decoder( + self, + models, + args, + extra_gen_cls_kwargs=None, + ): + from examples.speech_to_speech.unity.sequence_generator_multi_decoder import ( + MultiDecoderSequenceGenerator, + ) + + return MultiDecoderSequenceGenerator( + models, + self.target_dictionary, + self.target_dictionary_mt, + beam_size=max(1, getattr(args, "beam", 1)), + beam_size_mt=max(1, getattr(args, "beam_mt", 1)), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + max_len_a_mt=getattr(args, "max_len_a_mt", 0), + max_len_b_mt=getattr(args, "max_len_b_mt", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + **extra_gen_cls_kwargs, + ) + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + + if not self.args.target_is_code or self.args.eval_inference: + from fairseq.models.text_to_speech.vocoder import get_vocoder + + self.vocoder = get_vocoder(self.args, self.data_cfg) + self.vocoder = ( + self.vocoder.cuda() + if torch.cuda.is_available() and not self.args.cpu + else self.vocoder.cpu() + ) + + has_dual_decoder = getattr(models[0], "mt_task_name", None) is not None + + if self.args.target_is_code: + if self.args.n_frames_per_step == 1: + if has_dual_decoder: + seq_generator = self.build_generator_dual_decoder( + models, + args, + extra_gen_cls_kwargs=extra_gen_cls_kwargs, + ) + else: + seq_generator = super().build_generator( + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=extra_gen_cls_kwargs, + ) + else: + assert ( + getattr(args, "beam", 1) == 1 and getattr(args, "nbest", 1) == 1 + ), "only support viterbi search for stacked units" + seq_generator = StackUnitSequenceGenerator( + self.tgt_dict, + self.args.target_code_size, + ) + else: + if has_dual_decoder: + if getattr(args, "teacher_forcing", False): + raise NotImplementedError + else: + from fairseq.speech_generator import MultiDecoderSpeechGenerator + + generator = MultiDecoderSpeechGenerator + + lang_token_ids_aux = { + i + for s, i in self.tgt_dict_mt.indices.items() + if TextTargetMultitaskData.is_lang_tag(s) + } + + if extra_gen_cls_kwargs is None: + extra_gen_cls_kwargs = {} + extra_gen_cls_kwargs[ + "symbols_to_strip_from_output" + ] = lang_token_ids_aux + + eos_id_mt = ( + self.tgt_dict_mt.index(self.eos_token_mt) + if self.eos_token_mt + else None + ) + assert eos_id_mt != self.tgt_dict_mt.unk() + extra_gen_cls_kwargs["eos_mt"] = eos_id_mt + + seq_generator = generator( + models, + args, + self.vocoder, + self.data_cfg, + self.target_dictionary_mt, + max_iter=self.args.max_target_positions, + eos_prob_threshold=self.args.eos_prob_threshold, + **extra_gen_cls_kwargs, + ) + else: + if getattr(args, "teacher_forcing", False): + from fairseq.speech_generator import ( + TeacherForcingAutoRegressiveSpeechGenerator, + ) + + generator = TeacherForcingAutoRegressiveSpeechGenerator + logger.info("Teacher forcing mode for generation") + else: + from fairseq.speech_generator import AutoRegressiveSpeechGenerator + + generator = AutoRegressiveSpeechGenerator + + seq_generator = generator( + models[0], + self.vocoder, + self.data_cfg, + max_iter=self.args.max_target_positions, + eos_prob_threshold=self.args.eos_prob_threshold, + ) + + return seq_generator + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + for task_name, task_obj in self.multitask_tasks.items(): + criterion.set_multitask_loss_weight( + task_name, task_obj.args.get_loss_weight(update_num) + ) + if task_name in model.multitask_decoders: + model.multitask_decoders[task_name].train() + + loss, sample_size, logging_output = super().train_step( + sample, model, criterion, optimizer, update_num, ignore_grad + ) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + for task_name in self.multitask_tasks.keys(): + if task_name in model.multitask_decoders: + model.multitask_decoders[task_name].eval() + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + + if self.args.eval_inference: + hypos, inference_losses = self.valid_step_with_inference( + sample, model, self.generator + ) + for k, v in inference_losses.items(): + assert k not in logging_output + logging_output[k] = v + + return loss, sample_size, logging_output + + def valid_step_with_inference(self, sample, model, generator): + if self.args.target_is_code: + hypos = generator.generate([model], sample) + tgt_lens = ( + sample["target_lengths"] - 1 + ) * self.args.n_frames_per_step # strip <eos> + for b, (f, l) in enumerate(zip(sample["target"], tgt_lens)): + hypos[b][0]["targ_waveform"] = self.vocoder( + {"code": f[:l] - 4}, # remove <bos>, <pad>, <eos>, <unk> + dur_prediction=self.eval_gen_args.get("dur_prediction", False), + ) + if len(hypos[b][0]["tokens"]) > 0: + hypos[b][0]["waveform"] = self.vocoder( + {"code": hypos[b][0]["tokens"] - 4}, + dur_prediction=self.eval_gen_args.get("dur_prediction", False), + ) + else: + hypos[b][0]["waveform"] = torch.flip( + hypos[b][0]["targ_waveform"], dims=[0] + ) + else: + hypos = [ + [hypo] for hypo in generator.generate(model, sample, has_targ=True) + ] + + losses = { + "mcd_loss": 0.0, + "targ_frames": 0.0, + "pred_frames": 0.0, + "path_frames": 0.0, + "nins": 0.0, + "ndel": 0.0, + } + rets = batch_mel_cepstral_distortion( + [hypo[0]["targ_waveform"] for hypo in hypos], + [hypo[0]["waveform"] for hypo in hypos], + self.data_cfg.output_sample_rate, + normalize_type=None, + ) + for d, extra in rets: + pathmap = extra[-1] + losses["mcd_loss"] += d.item() + losses["targ_frames"] += pathmap.size(0) + losses["pred_frames"] += pathmap.size(1) + losses["path_frames"] += pathmap.sum().item() + losses["nins"] += (pathmap.sum(dim=1) - 1).sum().item() + losses["ndel"] += (pathmap.sum(dim=0) - 1).sum().item() + losses["norm_frames"] = losses[ + f"{getattr(self.args, 'mcd_normalize_type', 'targ')}_frames" + ] + + return hypos, losses + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + if self._infer_tgt_lang_id is not None: + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + bos_token=self._infer_tgt_lang_id, + ) + else: + return super().inference_step( + generator, + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + ) diff --git a/fairseq/fairseq/tasks/speech_to_text.py b/fairseq/fairseq/tasks/speech_to_text.py new file mode 100644 index 0000000..8840821 --- /dev/null +++ b/fairseq/fairseq/tasks/speech_to_text.py @@ -0,0 +1,350 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from argparse import Namespace +from pathlib import Path +from typing import List + +from fairseq.data import Dictionary, encoders +from fairseq.data.audio.audio_utils import get_features_or_waveform +from fairseq.data.audio.data_cfg import MultitaskConfig +from fairseq.data.audio.speech_to_text_dataset import ( + S2TDataConfig, + SpeechToTextDataset, + SpeechToTextDatasetCreator, + TextTargetMultitaskData, +) +from fairseq.tasks import LegacyFairseqTask, register_task + +logger = logging.getLogger(__name__) + + +@register_task("speech_to_text") +class SpeechToTextTask(LegacyFairseqTask): + @classmethod + def add_args(cls, parser): + parser.add_argument("data", help="manifest root path") + parser.add_argument( + "--config-yaml", + type=str, + default="config.yaml", + help="Configuration YAML filename (under manifest root)", + ) + parser.add_argument( + "--multitask-config-yaml", + type=str, + default=None, + help="Configuration YAML filename for the multitasks (under manifest root)", + ) + parser.add_argument( + "--max-source-positions", + default=6000, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + + def __init__(self, args, tgt_dict): + super().__init__(args) + self.tgt_dict = tgt_dict + self.data_cfg = S2TDataConfig(Path(args.data) / args.config_yaml) + self.speaker_to_id = self._get_speaker_to_id() + if ( + self.data_cfg.prepend_tgt_lang_tag + and self.data_cfg.prepend_bos_and_append_tgt_lang_tag + ): + raise ValueError( + "Please set only one of the two options to avoid adding target token multiple times" + ) + + self.multitask_tasks = {} + self.tgt_dict_mt = None + self.eos_token_mt = None + if getattr(args, "multitask_config_yaml", None) is not None: + multitask_cfg = MultitaskConfig( + Path(args.data) / args.multitask_config_yaml + ) + first_pass_task_idx = multitask_cfg.first_pass_decoder_task_index + for i, (task_name, task_config) in enumerate( + multitask_cfg.get_all_tasks().items() + ): + task_obj = DummyMultiTask( + task_config, + task_config.tgt_dict, + first_pass=i == first_pass_task_idx, + ) + self.multitask_tasks[task_name] = task_obj + if task_obj.is_first_pass_decoder: + self.tgt_dict_mt = task_obj.target_dictionary + if task_config.prepend_bos_and_append_tgt_lang_tag: + self.eos_token_mt = task_config.eos_token + assert not isinstance(self.eos_token_mt, List) + + if not self.eos_token_mt: + raise Warning( + "Please provide eos_token in --multitask-config-yaml to replace eos in sequence generator" + ) + + def _get_speaker_to_id(self): + speaker_to_id = None + speaker_set_filename = self.data_cfg.config.get("speaker_set_filename") + if speaker_set_filename is not None: + speaker_set_path = Path(self.args.data) / speaker_set_filename + with open(speaker_set_path) as f: + speaker_to_id = {r.strip(): i for i, r in enumerate(f)} + return speaker_to_id + + @classmethod + def setup_task(cls, args, **kwargs): + data_cfg = S2TDataConfig(Path(args.data) / args.config_yaml) + dict_path = Path(args.data) / data_cfg.vocab_filename + if not dict_path.is_file(): + raise FileNotFoundError(f"Dict not found: {dict_path.as_posix()}") + tgt_dict = Dictionary.load(dict_path.as_posix()) + logger.info( + f"dictionary size ({data_cfg.vocab_filename}): " f"{len(tgt_dict):,}" + ) + + if getattr(args, "train_subset", None) is not None: + if not all(s.startswith("train") for s in args.train_subset.split(",")): + raise ValueError('Train splits should be named like "train*".') + return cls(args, tgt_dict) + + def build_criterion(self, args): + from fairseq import criterions + + if self.data_cfg.prepend_tgt_lang_tag and args.ignore_prefix_size != 1: + raise ValueError( + 'Please set "--ignore-prefix-size 1" since ' + "target language ID token is prepended as BOS." + ) + return criterions.build_criterion(args, self) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + self.datasets[split] = SpeechToTextDatasetCreator.from_tsv( + root=self.args.data, + cfg=self.data_cfg, + splits=split, + tgt_dict=self.tgt_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + is_train_split=is_train_split, + epoch=epoch, + seed=self.args.seed, + speaker_to_id=self.speaker_to_id, + multitask=self.multitask_tasks, + ) + + @property + def target_dictionary(self): + return self.tgt_dict + + @property + def target_dictionary_mt(self): + return self.tgt_dict_mt + + @property + def source_dictionary(self): + return None + + def max_positions(self): + return self.args.max_source_positions, self.args.max_target_positions + + def build_model(self, args, from_checkpoint=False): + args.input_feat_per_channel = self.data_cfg.input_feat_per_channel + args.input_channels = self.data_cfg.input_channels + args.speaker_to_id = self.speaker_to_id + return super(SpeechToTextTask, self).build_model(args, from_checkpoint) + + def build_generator_dual_decoder( + self, + models, + args, + extra_gen_cls_kwargs, + ): + from examples.speech_to_speech.unity.sequence_generator_multi_decoder import ( + MultiDecoderSequenceGenerator, + ) + + lang_token_ids_aux = { + i + for s, i in self.tgt_dict_mt.indices.items() + if TextTargetMultitaskData.is_lang_tag(s) + } + + extra_gen_cls_kwargs["symbols_to_strip_from_output"].update(lang_token_ids_aux) + + eos_id_mt = ( + self.tgt_dict_mt.index(self.eos_token_mt) if self.eos_token_mt else None + ) + assert eos_id_mt != self.tgt_dict_mt.unk() + extra_gen_cls_kwargs["eos_mt"] = eos_id_mt + + return MultiDecoderSequenceGenerator( + models, + self.target_dictionary, + self.target_dictionary_mt, + beam_size=max(1, getattr(args, "beam", 1)), + beam_size_mt=max(1, getattr(args, "beam_mt", 1)), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + max_len_a_mt=getattr(args, "max_len_a_mt", 0), + max_len_b_mt=getattr(args, "max_len_b_mt", 0), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + len_penalty_mt=getattr(args, "lenpen_mt", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + **extra_gen_cls_kwargs, + ) + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + if self.data_cfg.prepend_tgt_lang_tag and args.prefix_size != 1: + raise ValueError( + 'Please set "--prefix-size 1" since ' + "target language ID token is prepended as BOS." + ) + lang_token_ids = { + i + for s, i in self.tgt_dict.indices.items() + if SpeechToTextDataset.is_lang_tag(s) + } + + if extra_gen_cls_kwargs is None: + extra_gen_cls_kwargs = {} + extra_gen_cls_kwargs["symbols_to_strip_from_output"] = lang_token_ids + + eos_token = ( + args.eos_token + if "eos_token" in args and args.eos_token is not None + else self.data_cfg.config.get("eos_token", None) + ) + + if self.data_cfg.prepend_bos_and_append_tgt_lang_tag and not eos_token: + raise Warning( + "Please provide --eos_token to replace eos in sequence generator" + ) + + eos_id = self.tgt_dict.index(eos_token) if eos_token else None + extra_gen_cls_kwargs["eos"] = eos_id + + has_dual_decoder = getattr(models[0], "mt_task_name", None) is not None + + if has_dual_decoder: + return self.build_generator_dual_decoder( + models, + args, + extra_gen_cls_kwargs=extra_gen_cls_kwargs, + ) + else: + return super().build_generator( + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=extra_gen_cls_kwargs, + ) + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + for task_name, task_obj in self.multitask_tasks.items(): + criterion.set_multitask_loss_weight( + task_name, task_obj.args.get_loss_weight(update_num) + ) + if task_name in model.multitask_decoders: + model.multitask_decoders[task_name].train() + + loss, sample_size, logging_output = super().train_step( + sample, model, criterion, optimizer, update_num, ignore_grad + ) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + for task_name, task_obj in self.multitask_tasks.items(): + if task_name in model.multitask_decoders: + model.multitask_decoders[task_name].eval() + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + + return loss, sample_size, logging_output + + def build_tokenizer(self, args): + logger.info(f"pre-tokenizer: {self.data_cfg.pre_tokenizer}") + return encoders.build_tokenizer(Namespace(**self.data_cfg.pre_tokenizer)) + + def build_bpe(self, args): + logger.info(f"tokenizer: {self.data_cfg.bpe_tokenizer}") + return encoders.build_bpe(Namespace(**self.data_cfg.bpe_tokenizer)) + + def get_interactive_tokens_and_lengths(self, lines, encode_fn): + n_frames = [get_features_or_waveform(p).shape[0] for p in lines] + return lines, n_frames + + def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs): + return SpeechToTextDataset( + "interactive", False, self.data_cfg, src_tokens, src_lengths + ) + + +class DummyMultiTask(LegacyFairseqTask): + def __init__(self, args, tgt_dict, first_pass=False): + super().__init__(args) + self.tgt_dict = tgt_dict + self.first_pass = first_pass + + @property + def target_dictionary(self): + return self.tgt_dict + + @property + def is_first_pass_decoder(self): + return self.first_pass + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + if self.args.decoder_type == "ctc": + model = models[0] # only support single model + encoder_out = model(**sample) + if hasattr(model, "get_logits"): + emissions = model.get_logits( + encoder_out + ) # no need to normalize emissions + else: + emissions = model.get_normalized_probs(encoder_out, log_probs=True) + return generator.decode( + emissions.transpose(0, 1).float().cpu().contiguous() + ) + else: + raise NotImplementedError("only ctc decoder is supported at the moment") + + def build_generator( + self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None + ): + if self.args.decoder_type == "ctc": + from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder + + return W2lViterbiDecoder(args, self.tgt_dict) + else: + raise NotImplementedError("only ctc decoder is supported at the moment") diff --git a/fairseq/fairseq/tasks/speech_ulm_task.py b/fairseq/fairseq/tasks/speech_ulm_task.py new file mode 100644 index 0000000..b9d3019 --- /dev/null +++ b/fairseq/fairseq/tasks/speech_ulm_task.py @@ -0,0 +1,224 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. + +import logging +import sys +import torch +from dataclasses import dataclass, field +from typing import List, Optional, Tuple + +from fairseq.data import Dictionary +from fairseq.data.codedataset import ExpressiveCodeDataConfig, CodeDataset +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from omegaconf import MISSING, DictConfig + + +logger = logging.getLogger(__name__) + + +class UnitDictionary(Dictionary): + """ + A fixed-sized Dictionary that operates on integer-valued tokens + wth a trivial (identity) token <-> id mapping. + Special symbols (bos, eos, ...) have ids above n_units. + """ + + def __init__( + self, + *, # begin keyword-only arguments + n_units, + bos="<s>", + pad="<pad>", + eos="</s>", + unk="<unk>", + extra_special_symbols=None, + clip=False, + ): + self.n_units = n_units + self.bos_word, self.unk_word, self.pad_word, self.eos_word = bos, unk, pad, eos + self.clip = clip + + self.symbols = [] + self.count = [] + self.indices = {} + for i in range(n_units): + self.add_symbol(str(i)) + + self.bos_index = self.add_symbol(bos) + self.pad_index = self.add_symbol(pad) + self.eos_index = self.add_symbol(eos) + self.unk_index = self.add_symbol(unk) + + if extra_special_symbols: + for s in extra_special_symbols: + self.add_symbol(s) + self.nspecial = len(self.symbols) + + def encode_line(self, line, append_eos=True, prepend_bos=False) -> torch.IntTensor: + words = [int(x) for x in line.split()] + if self.clip: + words = [min(self.n_units - 1, word) for word in words] + if prepend_bos: + words = [self.bos_index] + words + if append_eos: + words.append(self.eos_index) + ids = torch.IntTensor(words) + return ids + + +@dataclass +class SpeechUnitModelingConfig(FairseqDataclass): + data: str = field(default=MISSING, metadata={"help": "Path to data config.json"}) + max_token_duration: int = field( + default=20, metadata={"help": "all token durations are capped to this value"} + ) + tokens_per_sample: int = field( + default=1024, metadata={"help": "tokens in a sample"} + ) + max_target_positions: int = field( + default=1024, metadata={"help": "max target positions"} + ) + + # duration modeling + ignore_duration_input: bool = field( + default=False, metadata={"help": "whether token durations should be zeroed out"} + ) + discrete_duration: bool = field( + default=False, metadata={"help": "treat duration as discrete variable"} + ) + # F0 modeling + ignore_f0_input: bool = field( + default=False, metadata={"help": "whether F0 should be zeroed out"} + ) + discrete_f0: bool = field( + default=False, metadata={"help": "load quantized f0. get bin from config"} + ) + log_f0: bool = field( + default=False, metadata={"help": "whether f0 should be modeled in log space"} + ) + normalize_f0_mean: bool = field( + default=False, metadata={"help": "whether normalize f0 by speaker mean"} + ) + normalize_f0_std: bool = field( + default=False, metadata={"help": "whether normalize f0 by speaker stddev"} + ) + interpolate_f0: bool = field( + default=False, + metadata={"help": "whether interpolate f0 for non-voiced segments"}, + ) + + # input/output streams + stream_shifts: str = field( + default="0,0", + metadata={ + "help": ( + "comma-separated integer list denoting right-shift for " + "duration and pitch streams" + ) + }, + ) + + +@register_task("speech_unit_modeling", dataclass=SpeechUnitModelingConfig) +class SpeechUnitLanguageModelingTask(FairseqTask): + def __init__(self, cfg: SpeechUnitModelingConfig) -> None: + super().__init__(cfg) + assert not self.cfg.normalize_f0_std or self.cfg.normalize_f0_mean + + self.data_config = ExpressiveCodeDataConfig(cfg.data) + self._source_dictionary = self._target_dictionary = UnitDictionary( + n_units=self.data_config.n_units + ) + self._source_duration_dictionary = self._target_duration_dictionary = ( + UnitDictionary(n_units=self.cfg.max_token_duration + 1, clip=True) + if self.cfg.discrete_duration + else None + ) + self._source_f0_dictionary = self._target_f0_dictionary = ( + UnitDictionary(n_units=self.data_config.f0_vq_n_units) + if self.cfg.discrete_f0 + else None + ) + + self._channel_names = ["token", "duration", "f0"] + self._channel_sizes = [ + len(self.target_dictionary), + len(self.target_duration_dictionary) if self.cfg.discrete_duration else 1, + len(self.target_f0_dictionary) if self.cfg.discrete_f0 else 1, + ] + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return self._source_dictionary + + @property + def source_duration_dictionary(self) -> Optional[Dictionary]: + return self._source_duration_dictionary + + @property + def source_f0_dictionary(self) -> Optional[Dictionary]: + return self._source_f0_dictionary + + @property + def channel_names(self) -> List[str]: + return self._channel_names + + @property + def channel_sizes(self) -> List[int]: + return self._channel_sizes + + @property + def dictionary(self) -> Optional[Dictionary]: + return self._source_dictionary + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self._target_dictionary + + @property + def target_duration_dictionary(self) -> Optional[Dictionary]: + return self._target_duration_dictionary + + @property + def target_f0_dictionary(self) -> Optional[Dictionary]: + return self._target_f0_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return [self._dictionaries[l] for l in self.cfg.labels] + + @classmethod + def setup_task( + cls, cfg: SpeechUnitModelingConfig, **kwargs + ) -> "SpeechUnitLanguageModelingTask": + return cls(cfg) + + def load_dataset(self, split: str, **kwargs) -> None: + self.datasets[split] = CodeDataset( + manifest=self.data_config.manifests[split], + dictionary=self.source_dictionary, + dur_dictionary=self.source_duration_dictionary, + f0_dictionary=self.source_f0_dictionary, + config=self.data_config, + discrete_dur=self.cfg.discrete_duration, + discrete_f0=self.cfg.discrete_f0, + log_f0=self.cfg.log_f0, + normalize_f0_mean=self.cfg.normalize_f0_mean, + normalize_f0_std=self.cfg.normalize_f0_std, + interpolate_f0=self.cfg.interpolate_f0, + shifts=self.cfg.stream_shifts, + ) + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def build_criterion(self, cfg: DictConfig): + import fairseq.criterions + + return fairseq.criterions.build_criterion(cfg, self) diff --git a/fairseq/fairseq/tasks/text_to_speech.py b/fairseq/fairseq/tasks/text_to_speech.py new file mode 100644 index 0000000..82e7e66 --- /dev/null +++ b/fairseq/fairseq/tasks/text_to_speech.py @@ -0,0 +1,501 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import os.path as op + +import torch +import torch.nn.functional as F +import numpy as np + +from fairseq.data.audio.text_to_speech_dataset import TextToSpeechDatasetCreator +from fairseq.tasks import register_task +from fairseq.tasks.speech_to_text import SpeechToTextTask +from fairseq.speech_generator import ( + AutoRegressiveSpeechGenerator, + NonAutoregressiveSpeechGenerator, + TeacherForcingAutoRegressiveSpeechGenerator, +) + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + + +try: + from tensorboardX import SummaryWriter +except ImportError: + logger.info("Please install tensorboardX: pip install tensorboardX") + SummaryWriter = None + + +@register_task("text_to_speech") +class TextToSpeechTask(SpeechToTextTask): + @staticmethod + def add_args(parser): + parser.add_argument("data", help="manifest root path") + parser.add_argument( + "--config-yaml", + type=str, + default="config.yaml", + help="Configuration YAML filename (under manifest root)", + ) + parser.add_argument( + "--max-source-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1200, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + parser.add_argument("--n-frames-per-step", type=int, default=1) + parser.add_argument("--eos-prob-threshold", type=float, default=0.5) + parser.add_argument("--eval-inference", action="store_true") + parser.add_argument("--eval-tb-nsample", type=int, default=8) + parser.add_argument("--vocoder", type=str, default="griffin_lim") + parser.add_argument("--spec-bwd-max-iter", type=int, default=8) + + def __init__(self, args, src_dict): + super().__init__(args, src_dict) + self.src_dict = src_dict + self.sr = self.data_cfg.config.get("features").get("sample_rate") + + self.tensorboard_writer = None + self.tensorboard_dir = "" + if args.tensorboard_logdir and SummaryWriter is not None: + self.tensorboard_dir = os.path.join(args.tensorboard_logdir, "valid_extra") + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + self.datasets[split] = TextToSpeechDatasetCreator.from_tsv( + self.args.data, + self.data_cfg, + split, + self.src_dict, + pre_tokenizer, + bpe_tokenizer, + is_train_split=is_train_split, + epoch=epoch, + seed=self.args.seed, + n_frames_per_step=self.args.n_frames_per_step, + speaker_to_id=self.speaker_to_id, + ) + + @property + def target_dictionary(self): + return None + + @property + def source_dictionary(self): + return self.src_dict + + def get_speaker_embeddings_path(self): + speaker_emb_path = None + if self.data_cfg.config.get("speaker_emb_filename") is not None: + speaker_emb_path = op.join( + self.args.data, self.data_cfg.config.get("speaker_emb_filename") + ) + return speaker_emb_path + + @classmethod + def get_speaker_embeddings(cls, args): + embed_speaker = None + if args.speaker_to_id is not None: + if args.speaker_emb_path is None: + embed_speaker = torch.nn.Embedding( + len(args.speaker_to_id), args.speaker_embed_dim + ) + else: + speaker_emb_mat = np.load(args.speaker_emb_path) + assert speaker_emb_mat.shape[1] == args.speaker_embed_dim + embed_speaker = torch.nn.Embedding.from_pretrained( + torch.from_numpy(speaker_emb_mat), + freeze=True, + ) + logger.info( + f"load speaker embeddings from {args.speaker_emb_path}. " + f"train embedding? {embed_speaker.weight.requires_grad}\n" + f"embeddings:\n{speaker_emb_mat}" + ) + return embed_speaker + + def build_model(self, cfg, from_checkpoint=False): + cfg.pitch_min = self.data_cfg.config["features"].get("pitch_min", None) + cfg.pitch_max = self.data_cfg.config["features"].get("pitch_max", None) + cfg.energy_min = self.data_cfg.config["features"].get("energy_min", None) + cfg.energy_max = self.data_cfg.config["features"].get("energy_max", None) + cfg.speaker_emb_path = self.get_speaker_embeddings_path() + model = super().build_model(cfg, from_checkpoint) + self.generator = None + if getattr(cfg, "eval_inference", False): + self.generator = self.build_generator([model], cfg) + return model + + def build_generator(self, models, cfg, vocoder=None, **unused): + if vocoder is None: + vocoder = self.build_default_vocoder() + model = models[0] + if getattr(model, "NON_AUTOREGRESSIVE", False): + return NonAutoregressiveSpeechGenerator(model, vocoder, self.data_cfg) + else: + generator = AutoRegressiveSpeechGenerator + if getattr(cfg, "teacher_forcing", False): + generator = TeacherForcingAutoRegressiveSpeechGenerator + logger.info("Teacher forcing mode for generation") + return generator( + model, + vocoder, + self.data_cfg, + max_iter=self.args.max_target_positions, + eos_prob_threshold=self.args.eos_prob_threshold, + ) + + def build_default_vocoder(self): + from fairseq.models.text_to_speech.vocoder import get_vocoder + + vocoder = get_vocoder(self.args, self.data_cfg) + if torch.cuda.is_available() and not self.args.cpu: + vocoder = vocoder.cuda() + else: + vocoder = vocoder.cpu() + return vocoder + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + + if getattr(self.args, "eval_inference", False): + hypos, inference_losses = self.valid_step_with_inference( + sample, model, self.generator + ) + for k, v in inference_losses.items(): + assert k not in logging_output + logging_output[k] = v + + picked_id = 0 + if self.tensorboard_dir and (sample["id"] == picked_id).any(): + self.log_tensorboard( + sample, + hypos[: self.args.eval_tb_nsample], + model._num_updates, + is_na_model=getattr(model, "NON_AUTOREGRESSIVE", False), + ) + return loss, sample_size, logging_output + + def valid_step_with_inference(self, sample, model, generator): + hypos = generator.generate(model, sample, has_targ=True) + + losses = { + "mcd_loss": 0.0, + "targ_frames": 0.0, + "pred_frames": 0.0, + "nins": 0.0, + "ndel": 0.0, + } + rets = batch_mel_cepstral_distortion( + [hypo["targ_waveform"] for hypo in hypos], + [hypo["waveform"] for hypo in hypos], + self.sr, + normalize_type=None, + ) + for d, extra in rets: + pathmap = extra[-1] + losses["mcd_loss"] += d.item() + losses["targ_frames"] += pathmap.size(0) + losses["pred_frames"] += pathmap.size(1) + losses["nins"] += (pathmap.sum(dim=1) - 1).sum().item() + losses["ndel"] += (pathmap.sum(dim=0) - 1).sum().item() + + return hypos, losses + + def log_tensorboard(self, sample, hypos, num_updates, is_na_model=False): + if self.tensorboard_writer is None: + self.tensorboard_writer = SummaryWriter(self.tensorboard_dir) + tb_writer = self.tensorboard_writer + for b in range(len(hypos)): + idx = sample["id"][b] + text = sample["src_texts"][b] + targ = hypos[b]["targ_feature"] + pred = hypos[b]["feature"] + attn = hypos[b]["attn"] + + if is_na_model: + data = plot_tts_output( + [targ.transpose(0, 1), pred.transpose(0, 1)], + [f"target (idx={idx})", "output"], + attn, + "alignment", + ret_np=True, + suptitle=text, + ) + else: + eos_prob = hypos[b]["eos_prob"] + data = plot_tts_output( + [targ.transpose(0, 1), pred.transpose(0, 1), attn], + [f"target (idx={idx})", "output", "alignment"], + eos_prob, + "eos prob", + ret_np=True, + suptitle=text, + ) + + tb_writer.add_image( + f"inference_sample_{b}", data, num_updates, dataformats="HWC" + ) + + if hypos[b]["waveform"] is not None: + targ_wave = hypos[b]["targ_waveform"].detach().cpu().float() + pred_wave = hypos[b]["waveform"].detach().cpu().float() + tb_writer.add_audio( + f"inference_targ_{b}", targ_wave, num_updates, sample_rate=self.sr + ) + tb_writer.add_audio( + f"inference_pred_{b}", pred_wave, num_updates, sample_rate=self.sr + ) + + +def save_figure_to_numpy(fig): + data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + return data + + +DEFAULT_V_MIN = np.log(1e-5) + + +def plot_tts_output( + data_2d, + title_2d, + data_1d, + title_1d, + figsize=(24, 4), + v_min=DEFAULT_V_MIN, + v_max=3, + ret_np=False, + suptitle="", +): + try: + import matplotlib.pyplot as plt + from mpl_toolkits.axes_grid1 import make_axes_locatable + except ImportError: + raise ImportError("Please install Matplotlib: pip install matplotlib") + + data_2d = [ + x.detach().cpu().float().numpy() if isinstance(x, torch.Tensor) else x + for x in data_2d + ] + fig, axes = plt.subplots(1, len(data_2d) + 1, figsize=figsize) + if suptitle: + fig.suptitle(suptitle[:400]) # capped at 400 chars + axes = [axes] if len(data_2d) == 0 else axes + for ax, x, name in zip(axes, data_2d, title_2d): + ax.set_title(name) + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + im = ax.imshow( + x, + origin="lower", + aspect="auto", + vmin=max(x.min(), v_min), + vmax=min(x.max(), v_max), + ) + fig.colorbar(im, cax=cax, orientation="vertical") + + if isinstance(data_1d, torch.Tensor): + data_1d = data_1d.detach().cpu().numpy() + axes[-1].plot(data_1d) + axes[-1].set_title(title_1d) + plt.tight_layout() + + if ret_np: + fig.canvas.draw() + data = save_figure_to_numpy(fig) + plt.close(fig) + return data + + +def antidiag_indices(offset, min_i=0, max_i=None, min_j=0, max_j=None): + """ + for a (3, 4) matrix with min_i=1, max_i=3, min_j=1, max_j=4, outputs + + offset=2 (1, 1), + offset=3 (2, 1), (1, 2) + offset=4 (2, 2), (1, 3) + offset=5 (2, 3) + + constraints: + i + j = offset + min_j <= j < max_j + min_i <= offset - j < max_i + """ + if max_i is None: + max_i = offset + 1 + if max_j is None: + max_j = offset + 1 + min_j = max(min_j, offset - max_i + 1, 0) + max_j = min(max_j, offset - min_i + 1, offset + 1) + j = torch.arange(min_j, max_j) + i = offset - j + return torch.stack([i, j]) + + +def batch_dynamic_time_warping(distance, shapes=None): + """full batched DTW without any constraints + + distance: (batchsize, max_M, max_N) matrix + shapes: (batchsize,) vector specifying (M, N) for each entry + """ + # ptr: 0=left, 1=up-left, 2=up + ptr2dij = {0: (0, -1), 1: (-1, -1), 2: (-1, 0)} + + bsz, m, n = distance.size() + cumdist = torch.zeros_like(distance) + backptr = torch.zeros_like(distance).type(torch.int32) - 1 + + # initialize + cumdist[:, 0, :] = distance[:, 0, :].cumsum(dim=-1) + cumdist[:, :, 0] = distance[:, :, 0].cumsum(dim=-1) + backptr[:, 0, :] = 0 + backptr[:, :, 0] = 2 + + # DP with optimized anti-diagonal parallelization, O(M+N) steps + for offset in range(2, m + n - 1): + ind = antidiag_indices(offset, 1, m, 1, n) + c = torch.stack( + [ + cumdist[:, ind[0], ind[1] - 1], + cumdist[:, ind[0] - 1, ind[1] - 1], + cumdist[:, ind[0] - 1, ind[1]], + ], + dim=2, + ) + v, b = c.min(axis=-1) + backptr[:, ind[0], ind[1]] = b.int() + cumdist[:, ind[0], ind[1]] = v + distance[:, ind[0], ind[1]] + + # backtrace + pathmap = torch.zeros_like(backptr) + for b in range(bsz): + i = m - 1 if shapes is None else (shapes[b][0] - 1).item() + j = n - 1 if shapes is None else (shapes[b][1] - 1).item() + dtwpath = [(i, j)] + while (i != 0 or j != 0) and len(dtwpath) < 10000: + assert i >= 0 and j >= 0 + di, dj = ptr2dij[backptr[b, i, j].item()] + i, j = i + di, j + dj + dtwpath.append((i, j)) + dtwpath = dtwpath[::-1] + indices = torch.from_numpy(np.array(dtwpath)) + pathmap[b, indices[:, 0], indices[:, 1]] = 1 + + return cumdist, backptr, pathmap + + +def compute_l2_dist(x1, x2): + """compute an (m, n) L2 distance matrix from (m, d) and (n, d) matrices""" + return torch.cdist(x1.unsqueeze(0), x2.unsqueeze(0), p=2).squeeze(0).pow(2) + + +def compute_rms_dist(x1, x2): + l2_dist = compute_l2_dist(x1, x2) + return (l2_dist / x1.size(1)).pow(0.5) + + +def get_divisor(pathmap, normalize_type): + if normalize_type is None: + return 1 + elif normalize_type == "len1": + return pathmap.size(0) + elif normalize_type == "len2": + return pathmap.size(1) + elif normalize_type == "path": + return pathmap.sum().item() + else: + raise ValueError(f"normalize_type {normalize_type} not supported") + + +def batch_compute_distortion(y1, y2, sr, feat_fn, dist_fn, normalize_type): + d, s, x1, x2 = [], [], [], [] + for cur_y1, cur_y2 in zip(y1, y2): + assert cur_y1.ndim == 1 and cur_y2.ndim == 1 + cur_x1 = feat_fn(cur_y1) + cur_x2 = feat_fn(cur_y2) + x1.append(cur_x1) + x2.append(cur_x2) + + cur_d = dist_fn(cur_x1, cur_x2) + d.append(cur_d) + s.append(d[-1].size()) + max_m = max(ss[0] for ss in s) + max_n = max(ss[1] for ss in s) + d = torch.stack( + [F.pad(dd, (0, max_n - dd.size(1), 0, max_m - dd.size(0))) for dd in d] + ) + s = torch.LongTensor(s).to(d.device) + cumdists, backptrs, pathmaps = batch_dynamic_time_warping(d, s) + + rets = [] + itr = zip(s, x1, x2, d, cumdists, backptrs, pathmaps) + for (m, n), cur_x1, cur_x2, dist, cumdist, backptr, pathmap in itr: + cumdist = cumdist[:m, :n] + backptr = backptr[:m, :n] + pathmap = pathmap[:m, :n] + divisor = get_divisor(pathmap, normalize_type) + + distortion = cumdist[-1, -1] / divisor + ret = distortion, (cur_x1, cur_x2, dist, cumdist, backptr, pathmap) + rets.append(ret) + return rets + + +def batch_mel_cepstral_distortion(y1, y2, sr, normalize_type="path", mfcc_fn=None): + """ + https://arxiv.org/pdf/2011.03568.pdf + + The root mean squared error computed on 13-dimensional MFCC using DTW for + alignment. MFCC features are computed from an 80-channel log-mel + spectrogram using a 50ms Hann window and hop of 12.5ms. + + y1: list of waveforms + y2: list of waveforms + sr: sampling rate + """ + + try: + import torchaudio + except ImportError: + raise ImportError("Please install torchaudio: pip install torchaudio") + + if mfcc_fn is None or mfcc_fn.sample_rate != sr: + melkwargs = { + "n_fft": int(0.05 * sr), + "win_length": int(0.05 * sr), + "hop_length": int(0.0125 * sr), + "f_min": 20, + "n_mels": 80, + "window_fn": torch.hann_window, + } + mfcc_fn = torchaudio.transforms.MFCC( + sr, n_mfcc=13, log_mels=True, melkwargs=melkwargs + ).to(y1[0].device) + return batch_compute_distortion( + y1, + y2, + sr, + lambda y: mfcc_fn(y).transpose(-1, -2), + compute_rms_dist, + normalize_type, + ) diff --git a/fairseq/fairseq/tasks/translation.py b/fairseq/fairseq/tasks/translation.py new file mode 100644 index 0000000..6897ebe --- /dev/null +++ b/fairseq/fairseq/tasks/translation.py @@ -0,0 +1,498 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +import itertools +import json +import logging +import os +from typing import Optional +from argparse import Namespace +from omegaconf import II + +import numpy as np +from fairseq import utils +from fairseq.logging import metrics +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + LanguagePairDataset, + PrependTokenDataset, + StripTokenDataset, + TruncateDataset, + data_utils, + encoders, + indexed_dataset, +) +from fairseq.data.indexed_dataset import get_available_dataset_impl +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.tasks import FairseqTask, register_task + + +EVAL_BLEU_ORDER = 4 + + +logger = logging.getLogger(__name__) + + +def load_langpair_dataset( + data_path, + split, + src, + src_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, +): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + if truncate_source: + src_dataset = AppendTokenDataset( + TruncateDataset( + StripTokenDataset(src_dataset, src_dict.eos()), + max_source_positions - 1, + ), + src_dict.eos(), + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index("[{}]".format(src)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index("[{}]".format(tgt)) + ) + eos = tgt_dict.index("[{}]".format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + ) + + +@dataclass +class TranslationConfig(FairseqDataclass): + data: Optional[str] = field( + default=None, + metadata={ + "help": "colon separated path to data directories list, will be iterated upon during epochs " + "in round-robin manner; however, valid and test data are always in the first directory " + "to avoid the need for repeating them in all directories" + }, + ) + source_lang: Optional[str] = field( + default=None, + metadata={ + "help": "source language", + "argparse_alias": "-s", + }, + ) + target_lang: Optional[str] = field( + default=None, + metadata={ + "help": "target language", + "argparse_alias": "-t", + }, + ) + load_alignments: bool = field( + default=False, metadata={"help": "load the binarized alignments"} + ) + left_pad_source: bool = field( + default=True, metadata={"help": "pad the source on the left"} + ) + left_pad_target: bool = field( + default=False, metadata={"help": "pad the target on the left"} + ) + max_source_positions: int = field( + default=1024, metadata={"help": "max number of tokens in the source sequence"} + ) + max_target_positions: int = field( + default=1024, metadata={"help": "max number of tokens in the target sequence"} + ) + upsample_primary: int = field( + default=-1, metadata={"help": "the amount of upsample primary dataset"} + ) + truncate_source: bool = field( + default=False, metadata={"help": "truncate source to max-source-positions"} + ) + num_batch_buckets: int = field( + default=0, + metadata={ + "help": "if >0, then bucket source and target lengths into " + "N buckets and pad accordingly; this is useful on TPUs to minimize the number of compilations" + }, + ) + train_subset: str = II("dataset.train_subset") + dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II( + "dataset.dataset_impl" + ) + required_seq_len_multiple: int = II("dataset.required_seq_len_multiple") + + # options for reporting BLEU during validation + eval_bleu: bool = field( + default=False, metadata={"help": "evaluation with BLEU scores"} + ) + eval_bleu_args: Optional[str] = field( + default="{}", + metadata={ + "help": 'generation args for BLUE scoring, e.g., \'{"beam": 4, "lenpen": 0.6}\', as JSON string' + }, + ) + eval_bleu_detok: str = field( + default="space", + metadata={ + "help": "detokenize before computing BLEU (e.g., 'moses'); required if using --eval-bleu; " + "use 'space' to disable detokenization; see fairseq.data.encoders for other options" + }, + ) + eval_bleu_detok_args: Optional[str] = field( + default="{}", + metadata={"help": "args for building the tokenizer, if needed, as JSON string"}, + ) + eval_tokenized_bleu: bool = field( + default=False, metadata={"help": "compute tokenized BLEU instead of sacrebleu"} + ) + eval_bleu_remove_bpe: Optional[str] = field( + default=None, + metadata={ + "help": "remove BPE before computing BLEU", + "argparse_const": "@@ ", + }, + ) + eval_bleu_print_samples: bool = field( + default=False, metadata={"help": "print sample generations during validation"} + ) + + +@register_task("translation", dataclass=TranslationConfig) +class TranslationTask(FairseqTask): + """ + Translate from one (source) language to another (target) language. + + Args: + src_dict (~fairseq.data.Dictionary): dictionary for the source language + tgt_dict (~fairseq.data.Dictionary): dictionary for the target language + + .. note:: + + The translation task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate` and :mod:`fairseq-interactive`. + """ + + cfg: TranslationConfig + + def __init__(self, cfg: TranslationConfig, src_dict, tgt_dict): + super().__init__(cfg) + self.src_dict = src_dict + self.tgt_dict = tgt_dict + + @classmethod + def setup_task(cls, cfg: TranslationConfig, **kwargs): + """Setup the task (e.g., load dictionaries). + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + paths = utils.split_paths(cfg.data) + assert len(paths) > 0 + # find language pair automatically + if cfg.source_lang is None or cfg.target_lang is None: + cfg.source_lang, cfg.target_lang = data_utils.infer_language_pair(paths[0]) + if cfg.source_lang is None or cfg.target_lang is None: + raise Exception( + "Could not infer language pair, please provide it explicitly" + ) + + # load dictionaries + src_dict = cls.load_dictionary( + os.path.join(paths[0], "dict.{}.txt".format(cfg.source_lang)) + ) + tgt_dict = cls.load_dictionary( + os.path.join(paths[0], "dict.{}.txt".format(cfg.target_lang)) + ) + assert src_dict.pad() == tgt_dict.pad() + assert src_dict.eos() == tgt_dict.eos() + assert src_dict.unk() == tgt_dict.unk() + logger.info("[{}] dictionary: {} types".format(cfg.source_lang, len(src_dict))) + logger.info("[{}] dictionary: {} types".format(cfg.target_lang, len(tgt_dict))) + + return cls(cfg, src_dict, tgt_dict) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + if split != self.cfg.train_subset: + # if not training data set, use the first shard for valid and test + paths = paths[:1] + data_path = paths[(epoch - 1) % len(paths)] + + # infer langcode + src, tgt = self.cfg.source_lang, self.cfg.target_lang + + self.datasets[split] = load_langpair_dataset( + data_path, + split, + src, + self.src_dict, + tgt, + self.tgt_dict, + combine=combine, + dataset_impl=self.cfg.dataset_impl, + upsample_primary=self.cfg.upsample_primary, + left_pad_source=self.cfg.left_pad_source, + left_pad_target=self.cfg.left_pad_target, + max_source_positions=self.cfg.max_source_positions, + max_target_positions=self.cfg.max_target_positions, + load_alignments=self.cfg.load_alignments, + truncate_source=self.cfg.truncate_source, + num_buckets=self.cfg.num_batch_buckets, + shuffle=(split != "test"), + pad_to_multiple=self.cfg.required_seq_len_multiple, + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + return LanguagePairDataset( + src_tokens, + src_lengths, + self.source_dictionary, + tgt_dict=self.target_dictionary, + constraints=constraints, + ) + + def build_model(self, cfg, from_checkpoint=False): + model = super().build_model(cfg, from_checkpoint) + if self.cfg.eval_bleu: + detok_args = json.loads(self.cfg.eval_bleu_detok_args) + self.tokenizer = encoders.build_tokenizer( + Namespace(tokenizer=self.cfg.eval_bleu_detok, **detok_args) + ) + + gen_args = json.loads(self.cfg.eval_bleu_args) + self.sequence_generator = self.build_generator( + [model], Namespace(**gen_args) + ) + return model + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + if self.cfg.eval_bleu: + bleu = self._inference_with_bleu(self.sequence_generator, sample, model) + logging_output["_bleu_sys_len"] = bleu.sys_len + logging_output["_bleu_ref_len"] = bleu.ref_len + # we split counts into separate entries so that they can be + # summed efficiently across workers using fast-stat-sync + assert len(bleu.counts) == EVAL_BLEU_ORDER + for i in range(EVAL_BLEU_ORDER): + logging_output["_bleu_counts_" + str(i)] = bleu.counts[i] + logging_output["_bleu_totals_" + str(i)] = bleu.totals[i] + return loss, sample_size, logging_output + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + if self.cfg.eval_bleu: + + def sum_logs(key): + import torch + + result = sum(log.get(key, 0) for log in logging_outputs) + if torch.is_tensor(result): + result = result.cpu() + return result + + counts, totals = [], [] + for i in range(EVAL_BLEU_ORDER): + counts.append(sum_logs("_bleu_counts_" + str(i))) + totals.append(sum_logs("_bleu_totals_" + str(i))) + + if max(totals) > 0: + # log counts as numpy arrays -- log_scalar will sum them correctly + metrics.log_scalar("_bleu_counts", np.array(counts)) + metrics.log_scalar("_bleu_totals", np.array(totals)) + metrics.log_scalar("_bleu_sys_len", sum_logs("_bleu_sys_len")) + metrics.log_scalar("_bleu_ref_len", sum_logs("_bleu_ref_len")) + + def compute_bleu(meters): + import inspect + + try: + from sacrebleu.metrics import BLEU + + comp_bleu = BLEU.compute_bleu + except ImportError: + # compatibility API for sacrebleu 1.x + import sacrebleu + + comp_bleu = sacrebleu.compute_bleu + + fn_sig = inspect.getfullargspec(comp_bleu)[0] + if "smooth_method" in fn_sig: + smooth = {"smooth_method": "exp"} + else: + smooth = {"smooth": "exp"} + bleu = comp_bleu( + correct=meters["_bleu_counts"].sum, + total=meters["_bleu_totals"].sum, + sys_len=int(meters["_bleu_sys_len"].sum), + ref_len=int(meters["_bleu_ref_len"].sum), + **smooth, + ) + return round(bleu.score, 2) + + metrics.log_derived("bleu", compute_bleu) + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + return (self.cfg.max_source_positions, self.cfg.max_target_positions) + + @property + def source_dictionary(self): + """Return the source :class:`~fairseq.data.Dictionary`.""" + return self.src_dict + + @property + def target_dictionary(self): + """Return the target :class:`~fairseq.data.Dictionary`.""" + return self.tgt_dict + + def _inference_with_bleu(self, generator, sample, model): + import sacrebleu + + def decode(toks, escape_unk=False): + s = self.tgt_dict.string( + toks.int().cpu(), + self.cfg.eval_bleu_remove_bpe, + # The default unknown string in fairseq is `<unk>`, but + # this is tokenized by sacrebleu as `< unk >`, inflating + # BLEU scores. Instead, we use a somewhat more verbose + # alternative that is unlikely to appear in the real + # reference, but doesn't get split into multiple tokens. + unk_string=("UNKNOWNTOKENINREF" if escape_unk else "UNKNOWNTOKENINHYP"), + ) + if self.tokenizer: + s = self.tokenizer.decode(s) + return s + + gen_out = self.inference_step(generator, [model], sample, prefix_tokens=None) + hyps, refs = [], [] + for i in range(len(gen_out)): + hyps.append(decode(gen_out[i][0]["tokens"])) + refs.append( + decode( + utils.strip_pad(sample["target"][i], self.tgt_dict.pad()), + escape_unk=True, # don't count <unk> as matches to the hypo + ) + ) + if self.cfg.eval_bleu_print_samples: + logger.info("example hypothesis: " + hyps[0]) + logger.info("example reference: " + refs[0]) + if self.cfg.eval_tokenized_bleu: + return sacrebleu.corpus_bleu(hyps, [refs], tokenize="none") + else: + return sacrebleu.corpus_bleu(hyps, [refs]) diff --git a/fairseq/fairseq/tasks/translation_from_pretrained_bart.py b/fairseq/fairseq/tasks/translation_from_pretrained_bart.py new file mode 100644 index 0000000..0fd7a5b --- /dev/null +++ b/fairseq/fairseq/tasks/translation_from_pretrained_bart.py @@ -0,0 +1,132 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from fairseq import utils +from fairseq.data import LanguagePairDataset + +from . import register_task +from .translation import TranslationTask, load_langpair_dataset + + +@register_task("translation_from_pretrained_bart") +class TranslationFromPretrainedBARTTask(TranslationTask): + """ + Translate from source language to target language with a model initialized with a multilingual pretrain. + + Args: + src_dict (~fairseq.data.Dictionary): dictionary for the source language + tgt_dict (~fairseq.data.Dictionary): dictionary for the target language + + .. note:: + + The translation task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate` and :mod:`fairseq-interactive`. + + The translation task provides the following additional command-line + arguments: + + .. argparse:: + :ref: fairseq.tasks.translation_parser + :prog: + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + TranslationTask.add_args(parser) + parser.add_argument('--langs', type=str, metavar='LANG', + help='comma-separated list of monolingual language, ' + 'for example, "en,de,fr". These should match the ' + 'langs from pretraining (and be in the same order). ' + 'You should always add all pretraining language idx ' + 'during finetuning.') + parser.add_argument('--prepend-bos', action='store_true', + help='prepend bos token to each sentence, which matches ' + 'mBART pretraining') + # fmt: on + + def __init__(self, args, src_dict, tgt_dict): + super().__init__(args, src_dict, tgt_dict) + self.langs = args.langs.split(",") + for d in [src_dict, tgt_dict]: + for l in self.langs: + d.add_symbol("[{}]".format(l)) + d.add_symbol("<mask>") + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + # infer langcode + src, tgt = self.args.source_lang, self.args.target_lang + + self.datasets[split] = load_langpair_dataset( + data_path, + split, + src, + self.src_dict, + tgt, + self.tgt_dict, + combine=combine, + dataset_impl=self.args.dataset_impl, + upsample_primary=self.args.upsample_primary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + max_source_positions=getattr(self.args, "max_source_positions", 1024), + max_target_positions=getattr(self.args, "max_target_positions", 1024), + load_alignments=self.args.load_alignments, + prepend_bos=getattr(self.args, "prepend_bos", False), + append_source_id=True, + ) + + def build_generator(self, models, args, **unused): + if getattr(args, "score_reference", False): + from fairseq.sequence_scorer import SequenceScorer + + return SequenceScorer( + self.target_dictionary, + eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)), + ) + else: + from fairseq.sequence_generator import SequenceGenerator + + return SequenceGenerator( + models, + self.target_dictionary, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)), + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + src_lang_id = self.source_dictionary.index("[{}]".format(self.args.source_lang)) + source_tokens = [] + for s_t in src_tokens: + s_t = torch.cat([s_t, s_t.new(1).fill_(src_lang_id)]) + source_tokens.append(s_t) + dataset = LanguagePairDataset( + source_tokens, + src_lengths, + self.source_dictionary, + tgt_dict=self.target_dictionary, + constraints=constraints, + ) + return dataset diff --git a/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py b/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py new file mode 100644 index 0000000..a05f289 --- /dev/null +++ b/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary +from fairseq.tasks.translation import TranslationConfig, TranslationTask + +from . import register_task + + +@dataclass +class TranslationFromPretrainedXLMConfig(TranslationConfig): + pass + + +@register_task( + "translation_from_pretrained_xlm", dataclass=TranslationFromPretrainedXLMConfig +) +class TranslationFromPretrainedXLMTask(TranslationTask): + """ + Same as TranslationTask except use the MaskedLMDictionary class so that + we can load data that was binarized with the MaskedLMDictionary class. + + This task should be used for the entire training pipeline when we want to + train an NMT model from a pretrained XLM checkpoint: binarizing NMT data, + training NMT with the pretrained XLM checkpoint, and subsequent evaluation + of that trained model. + """ + + @classmethod + def load_dictionary(cls, filename): + """Load the masked LM dictionary from the filename + + Args: + filename (str): the filename + """ + return MaskedLMDictionary.load(filename) diff --git a/fairseq/fairseq/tasks/translation_lev.py b/fairseq/fairseq/tasks/translation_lev.py new file mode 100644 index 0000000..b45fecd --- /dev/null +++ b/fairseq/fairseq/tasks/translation_lev.py @@ -0,0 +1,195 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +import torch +from fairseq import utils +from fairseq.data import LanguagePairDataset +from fairseq.dataclass import ChoiceEnum +from fairseq.tasks import register_task +from fairseq.tasks.translation import ( + TranslationConfig, + TranslationTask, + load_langpair_dataset, +) +from fairseq.utils import new_arange + + +NOISE_CHOICES = ChoiceEnum(["random_delete", "random_mask", "no_noise", "full_mask"]) + + +@dataclass +class TranslationLevenshteinConfig(TranslationConfig): + noise: NOISE_CHOICES = field( + default="random_delete", + metadata={"help": "type of noise"}, + ) + + +@register_task("translation_lev", dataclass=TranslationLevenshteinConfig) +class TranslationLevenshteinTask(TranslationTask): + """ + Translation (Sequence Generation) task for Levenshtein Transformer + See `"Levenshtein Transformer" <https://arxiv.org/abs/1905.11006>`_. + """ + + cfg: TranslationLevenshteinConfig + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.cfg.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + # infer langcode + src, tgt = self.cfg.source_lang, self.cfg.target_lang + + self.datasets[split] = load_langpair_dataset( + data_path, + split, + src, + self.src_dict, + tgt, + self.tgt_dict, + combine=combine, + dataset_impl=self.cfg.dataset_impl, + upsample_primary=self.cfg.upsample_primary, + left_pad_source=self.cfg.left_pad_source, + left_pad_target=self.cfg.left_pad_target, + max_source_positions=self.cfg.max_source_positions, + max_target_positions=self.cfg.max_target_positions, + prepend_bos=True, + ) + + def inject_noise(self, target_tokens): + def _random_delete(target_tokens): + pad = self.tgt_dict.pad() + bos = self.tgt_dict.bos() + eos = self.tgt_dict.eos() + + max_len = target_tokens.size(1) + target_mask = target_tokens.eq(pad) + target_score = target_tokens.clone().float().uniform_() + target_score.masked_fill_( + target_tokens.eq(bos) | target_tokens.eq(eos), 0.0 + ) + target_score.masked_fill_(target_mask, 1) + target_score, target_rank = target_score.sort(1) + target_length = target_mask.size(1) - target_mask.float().sum( + 1, keepdim=True + ) + + # do not delete <bos> and <eos> (we assign 0 score for them) + target_cutoff = ( + 2 + + ( + (target_length - 2) + * target_score.new_zeros(target_score.size(0), 1).uniform_() + ).long() + ) + target_cutoff = target_score.sort(1)[1] >= target_cutoff + + prev_target_tokens = ( + target_tokens.gather(1, target_rank) + .masked_fill_(target_cutoff, pad) + .gather(1, target_rank.masked_fill_(target_cutoff, max_len).sort(1)[1]) + ) + prev_target_tokens = prev_target_tokens[ + :, : prev_target_tokens.ne(pad).sum(1).max() + ] + + return prev_target_tokens + + def _random_mask(target_tokens): + pad = self.tgt_dict.pad() + bos = self.tgt_dict.bos() + eos = self.tgt_dict.eos() + unk = self.tgt_dict.unk() + + target_masks = ( + target_tokens.ne(pad) & target_tokens.ne(bos) & target_tokens.ne(eos) + ) + target_score = target_tokens.clone().float().uniform_() + target_score.masked_fill_(~target_masks, 2.0) + target_length = target_masks.sum(1).float() + target_length = target_length * target_length.clone().uniform_() + target_length = target_length + 1 # make sure to mask at least one token. + + _, target_rank = target_score.sort(1) + target_cutoff = new_arange(target_rank) < target_length[:, None].long() + prev_target_tokens = target_tokens.masked_fill( + target_cutoff.scatter(1, target_rank, target_cutoff), unk + ) + return prev_target_tokens + + def _full_mask(target_tokens): + pad = self.tgt_dict.pad() + bos = self.tgt_dict.bos() + eos = self.tgt_dict.eos() + unk = self.tgt_dict.unk() + + target_mask = ( + target_tokens.eq(bos) | target_tokens.eq(eos) | target_tokens.eq(pad) + ) + return target_tokens.masked_fill(~target_mask, unk) + + if self.cfg.noise == "random_delete": + return _random_delete(target_tokens) + elif self.cfg.noise == "random_mask": + return _random_mask(target_tokens) + elif self.cfg.noise == "full_mask": + return _full_mask(target_tokens) + elif self.cfg.noise == "no_noise": + return target_tokens + else: + raise NotImplementedError + + def build_generator(self, models, args, **unused): + # add models input to match the API for SequenceGenerator + from fairseq.iterative_refinement_generator import IterativeRefinementGenerator + + return IterativeRefinementGenerator( + self.target_dictionary, + eos_penalty=getattr(args, "iter_decode_eos_penalty", 0.0), + max_iter=getattr(args, "iter_decode_max_iter", 10), + beam_size=getattr(args, "iter_decode_with_beam", 1), + reranking=getattr(args, "iter_decode_with_external_reranker", False), + decoding_format=getattr(args, "decoding_format", None), + adaptive=not getattr(args, "iter_decode_force_max_iter", False), + retain_history=getattr(args, "retain_iter_history", False), + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + if constraints is not None: + # Though see Susanto et al. (ACL 2020): https://www.aclweb.org/anthology/2020.acl-main.325/ + raise NotImplementedError( + "Constrained decoding with the translation_lev task is not supported" + ) + + return LanguagePairDataset( + src_tokens, src_lengths, self.source_dictionary, append_bos=True + ) + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + sample["prev_target"] = self.inject_noise(sample["target"]) + loss, sample_size, logging_output = criterion(model, sample) + if ignore_grad: + loss *= 0 + optimizer.backward(loss) + return loss, sample_size, logging_output + + def valid_step(self, sample, model, criterion): + model.eval() + with torch.no_grad(): + sample["prev_target"] = self.inject_noise(sample["target"]) + loss, sample_size, logging_output = criterion(model, sample) + return loss, sample_size, logging_output diff --git a/fairseq/fairseq/tasks/translation_multi_simple_epoch.py b/fairseq/fairseq/tasks/translation_multi_simple_epoch.py new file mode 100644 index 0000000..5db36a7 --- /dev/null +++ b/fairseq/fairseq/tasks/translation_multi_simple_epoch.py @@ -0,0 +1,441 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import datetime +import logging +import time + +import torch +from fairseq.data import ( + FairseqDataset, + LanguagePairDataset, + ListDataset, + data_utils, + iterators, +) +from fairseq.data.multilingual.multilingual_data_manager import ( + MultilingualDatasetManager, +) +from fairseq.data.multilingual.sampling_method import SamplingMethod +from fairseq.tasks import LegacyFairseqTask, register_task +from fairseq.utils import FileContentsAction + + +### +def get_time_gap(s, e): + return ( + datetime.datetime.fromtimestamp(e) - datetime.datetime.fromtimestamp(s) + ).__str__() + + +### + + +logger = logging.getLogger(__name__) + + +@register_task("translation_multi_simple_epoch") +class TranslationMultiSimpleEpochTask(LegacyFairseqTask): + """ + Translate from one (source) language to another (target) language. + + Args: + langs (List[str]): a list of languages that are being supported + dicts (Dict[str, fairseq.data.Dictionary]): mapping from supported languages to their dictionaries + training (bool): whether the task should be configured for training or not + + .. note:: + + The translation task is compatible with :mod:`fairseq-train`, + :mod:`fairseq-generate` and :mod:`fairseq-interactive`. + + The translation task provides the following additional command-line + arguments: + + .. argparse:: + :ref: fairseq.tasks.translation_parser + :prog: + """ + + @staticmethod + def add_args(parser): + """Add task-specific arguments to the parser.""" + # fmt: off + parser.add_argument('-s', '--source-lang', default=None, metavar='SRC', + help='inference source language') + parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET', + help='inference target language') + parser.add_argument('--lang-pairs', default=None, metavar='PAIRS', + help='comma-separated list of language pairs (in training order): en-de,en-fr,de-fr', + action=FileContentsAction) + parser.add_argument('--keep-inference-langtok', action='store_true', + help='keep language tokens in inference output (e.g. for analysis or debugging)') + + SamplingMethod.add_arguments(parser) + MultilingualDatasetManager.add_args(parser) + # fmt: on + + def __init__(self, args, langs, dicts, training): + super().__init__(args) + self.langs = langs + self.dicts = dicts + self.training = training + if training: + self.lang_pairs = args.lang_pairs + else: + self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)] + # eval_lang_pairs for multilingual translation is usually all of the + # lang_pairs. However for other multitask settings or when we want to + # optimize for certain languages we want to use a different subset. Thus + # the eval_lang_pairs class variable is provided for classes that extend + # this class. + self.eval_lang_pairs = self.lang_pairs + # model_lang_pairs will be used to build encoder-decoder model pairs in + # models.build_model(). This allows multitask type of sub-class can + # build models other than the input lang_pairs + self.model_lang_pairs = self.lang_pairs + self.source_langs = [d.split("-")[0] for d in self.lang_pairs] + self.target_langs = [d.split("-")[1] for d in self.lang_pairs] + self.check_dicts(self.dicts, self.source_langs, self.target_langs) + + self.sampling_method = SamplingMethod.build_sampler(args, self) + self.data_manager = MultilingualDatasetManager.setup_data_manager( + args, self.lang_pairs, langs, dicts, self.sampling_method + ) + + def check_dicts(self, dicts, source_langs, target_langs): + if self.args.source_dict is not None or self.args.target_dict is not None: + # no need to check whether the source side and target side are sharing dictionaries + return + src_dict = dicts[source_langs[0]] + tgt_dict = dicts[target_langs[0]] + for src_lang in source_langs: + assert ( + src_dict == dicts[src_lang] + ), "Diffrent dictionary are specified for different source languages; " + "TranslationMultiSimpleEpochTask only supports one shared dictionary across all source languages" + for tgt_lang in target_langs: + assert ( + tgt_dict == dicts[tgt_lang] + ), "Diffrent dictionary are specified for different target languages; " + "TranslationMultiSimpleEpochTask only supports one shared dictionary across all target languages" + + @classmethod + def setup_task(cls, args, **kwargs): + langs, dicts, training = MultilingualDatasetManager.prepare( + cls.load_dictionary, args, **kwargs + ) + return cls(args, langs, dicts, training) + + def has_sharded_data(self, split): + return self.data_manager.has_sharded_data(split) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + if split in self.datasets: + dataset = self.datasets[split] + if self.has_sharded_data(split): + if self.args.virtual_epoch_size is not None: + if dataset.load_next_shard: + shard_epoch = dataset.shard_epoch + else: + # no need to load next shard so skip loading + # also this avoid always loading from beginning of the data + return + else: + shard_epoch = epoch + else: + # estimate the shard epoch from virtual data size and virtual epoch size + shard_epoch = self.data_manager.estimate_global_pass_epoch(epoch) + logger.info(f"loading data for {split} epoch={epoch}/{shard_epoch}") + logger.info(f"mem usage: {data_utils.get_mem_usage()}") + if split in self.datasets: + del self.datasets[split] + logger.info("old dataset deleted manually") + logger.info(f"mem usage: {data_utils.get_mem_usage()}") + self.datasets[split] = self.data_manager.load_dataset( + split, + self.training, + epoch=epoch, + combine=combine, + shard_epoch=shard_epoch, + **kwargs, + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + if constraints is not None: + raise NotImplementedError( + "Constrained decoding with the multilingual_translation task is not supported" + ) + + src_data = ListDataset(src_tokens, src_lengths) + dataset = LanguagePairDataset(src_data, src_lengths, self.source_dictionary) + src_langtok_spec, tgt_langtok_spec = self.args.langtoks["main"] + if self.args.lang_tok_replacing_bos_eos: + dataset = self.data_manager.alter_dataset_langtok( + dataset, + src_eos=self.source_dictionary.eos(), + src_lang=self.args.source_lang, + tgt_eos=self.target_dictionary.eos(), + tgt_lang=self.args.target_lang, + src_langtok_spec=src_langtok_spec, + tgt_langtok_spec=tgt_langtok_spec, + ) + else: + dataset.src = self.data_manager.src_dataset_tranform_func( + self.args.source_lang, + self.args.target_lang, + dataset=dataset.src, + spec=src_langtok_spec, + ) + return dataset + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + if not getattr(args, "keep_inference_langtok", False): + _, tgt_langtok_spec = self.args.langtoks["main"] + if tgt_langtok_spec: + tgt_lang_tok = self.data_manager.get_decoder_langtok( + self.args.target_lang, tgt_langtok_spec + ) + extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} + extra_gen_cls_kwargs["symbols_to_strip_from_output"] = {tgt_lang_tok} + + return super().build_generator( + models, args, seq_gen_cls=None, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + def build_model(self, args, from_checkpoint=False): + return super().build_model(args, from_checkpoint) + + def valid_step(self, sample, model, criterion): + loss, sample_size, logging_output = super().valid_step(sample, model, criterion) + return loss, sample_size, logging_output + + def inference_step( + self, generator, models, sample, prefix_tokens=None, constraints=None + ): + with torch.no_grad(): + _, tgt_langtok_spec = self.args.langtoks["main"] + if not self.args.lang_tok_replacing_bos_eos: + if prefix_tokens is None and tgt_langtok_spec: + tgt_lang_tok = self.data_manager.get_decoder_langtok( + self.args.target_lang, tgt_langtok_spec + ) + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.size(0) + prefix_tokens = ( + torch.LongTensor([[tgt_lang_tok]]).expand(bsz, 1).to(src_tokens) + ) + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + ) + else: + return generator.generate( + models, + sample, + prefix_tokens=prefix_tokens, + bos_token=self.data_manager.get_decoder_langtok( + self.args.target_lang, tgt_langtok_spec + ) + if tgt_langtok_spec + else self.target_dictionary.eos(), + ) + + def reduce_metrics(self, logging_outputs, criterion): + super().reduce_metrics(logging_outputs, criterion) + + def max_positions(self): + """Return the max sentence length allowed by the task.""" + return (self.args.max_source_positions, self.args.max_target_positions) + + @property + def source_dictionary(self): + return self.data_manager.get_source_dictionary(self.source_langs[0]) + + @property + def target_dictionary(self): + return self.data_manager.get_target_dictionary(self.target_langs[0]) + + def create_batch_sampler_func( + self, + max_positions, + ignore_invalid_inputs, + max_tokens, + max_sentences, + required_batch_size_multiple=1, + seed=1, + ): + def construct_batch_sampler(dataset, epoch): + splits = [ + s for s, _ in self.datasets.items() if self.datasets[s] == dataset + ] + split = splits[0] if len(splits) > 0 else None + # NEW implementation + if epoch is not None: + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + start_time = time.time() + logger.info(f"start batch sampler: mem usage: {data_utils.get_mem_usage()}") + + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + logger.info( + f"[{split}] @batch_sampler order indices time: {get_time_gap(start_time, time.time())}" + ) + logger.info(f"mem usage: {data_utils.get_mem_usage()}") + + # filter examples that are too large + if max_positions is not None: + my_time = time.time() + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + logger.info( + f"[{split}] @batch_sampler filter_by_size time: {get_time_gap(my_time, time.time())}" + ) + logger.info(f"mem usage: {data_utils.get_mem_usage()}") + + # create mini-batches with given size constraints + my_time = time.time() + batch_sampler = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + + logger.info( + f"[{split}] @batch_sampler batch_by_size time: {get_time_gap(my_time, time.time())}" + ) + logger.info( + f"[{split}] per epoch batch_sampler set-up time: {get_time_gap(start_time, time.time())}" + ) + logger.info(f"mem usage: {data_utils.get_mem_usage()}") + + return batch_sampler + + return construct_batch_sampler + + # we need to override get_batch_iterator because we want to reset the epoch iterator each time + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 0). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + # initialize the dataset with the correct starting epoch + assert isinstance(dataset, FairseqDataset) + if dataset in self.dataset_to_epoch_iter: + return self.dataset_to_epoch_iter[dataset] + if self.args.sampling_method == "RoundRobin": + batch_iter = super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + self.dataset_to_epoch_iter[dataset] = batch_iter + return batch_iter + + construct_batch_sampler = self.create_batch_sampler_func( + max_positions, + ignore_invalid_inputs, + max_tokens, + max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + ) + + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=construct_batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + ) + return epoch_iter diff --git a/fairseq/fairseq/token_generation_constraints.py b/fairseq/fairseq/token_generation_constraints.py new file mode 100644 index 0000000..e708dc5 --- /dev/null +++ b/fairseq/fairseq/token_generation_constraints.py @@ -0,0 +1,506 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Implements tracking of constraints for a beam item. + +A list of constraints is given as a list of one or more token +sequences, each of length at least one token. For example, for an input sentence + +> Die maschinelle Übersetzung ist schwer zu kontrollieren. + +We could have the constraints: +* to influence +* hard + +There are two implementations: +* OrderedConstraintState: Tracks progress through an ordered list of multitoken constraints. +* UnorderedConstraintState: Tracks progress through an unordered list of multitoken constraints. + +The difference is that in the first, the constraints are assumed to be +in order; the algorithm will permit zero or more tokens between them. +In the second, the constraints are not ordered, so many orderings will +be explored. + +The same sequence can be present any number of times, and will appear +that many times in the output. +""" + +from collections import Counter +from typing import List, Optional, Set, Tuple + +import torch + + +class ConstraintState: + def __init__(self): + pass + + +def pack_constraints(batch_constraints: List[List[torch.Tensor]]) -> torch.Tensor: + """Takes a list of list of constraints in tensor form (a list of + tensor constraints for each sentence) and transforms it into a + packed Tensor. For example, here is a batch of size 3 with 3, 0, + and 1 constraints: + + [ [ [3 1 2], [3], [4 5 6 7], ] + [], + [ [1 8 9 10 1 4 11 12], ] + ] + + Its corresponding packed structure is: + + [ [ 3 3 1 2 0 3 0 4 5 6 7 0], + [ 0 0 0 0 0 0 0 0 0 0 0 0], + [ 1 1 8 9 10 1 4 11 12 0 0 0] ] + + The packed tensor has shape (batch size, maxlen), where + maxlen is defined below. Each row contains concatenated + constraint tokens for that sentence, with 0 appended after + each constraint. The first item in each row is the number + of constraints for that sentence. So maxlen is the maximum + of + + (number of constraints) + (sum length of constraints) + 1. + + across all sentences in the batch. + """ + # The maximum word length of concatenated constraints for any sentence + max_constraints_len = 1 + for sentence_constraints in batch_constraints: + if len(sentence_constraints): + # number of constraints, plus sum of constrain lens, plus a zero after each + constraints_len = ( + 1 + + sum([c.size(0) for c in sentence_constraints]) + + len(sentence_constraints) + ) + max_constraints_len = max(max_constraints_len, constraints_len) + + batch_size = len(batch_constraints) + constraints_tensor = torch.zeros((batch_size, max_constraints_len)).long() + for i, sentence_constraints in enumerate(batch_constraints): + constraints_tensor[i, 0] = len(sentence_constraints) + offset = 1 + for j, constraint in enumerate(sentence_constraints): + this_len = constraint.size(0) + constraints_tensor[i, offset : offset + this_len] = constraint + offset += this_len + 1 + + return constraints_tensor.long() + + +def unpack_constraints(constraint_tensor: torch.Tensor) -> List[torch.Tensor]: + """ + Transforms *one row* of a packed constraint tensor (e.g., for one + sentence in the batch) into a list of constraint tensors. + """ + constraint_list = [] + num_constraints = constraint_tensor[0] + constraints = constraint_tensor.tolist() + offset = 1 + for i in range(num_constraints): + where = constraints.index(0, offset) + constraint_list.append(constraint_tensor[offset:where]) + offset = where + 1 + + return constraint_list + + +class ConstraintNode: + """ + Represents a node in a trie managing unordered constraints. + """ + + def __init__(self, token: int = None, parent=None): + # The token associate with this node (None for the root) + self.token = int(token) if token is not None else None + # The parent (None at the root) + self.parent = parent + # Whether this node is a completed constraint + self.terminal = 0 + # List of child nodes + self.children = {} + + # The cumulative number of constraints from this point in the + # trie forward + self.num_constraints = 0 + + @property + def id(self): + return self.token + + def __str__(self): + term = self.terminal != 0 + return f"[{self.token}].{term}#{self.num_constraints}" + + def __getitem__(self, key: int): + return self.children.get(key, None) + + def next_tokens(self) -> Set[int]: + """The set of child labels.""" + return set(self.children.keys()) + + @staticmethod + def create(constraints: List[List[int]]): + root = ConstraintNode() + for sequence in constraints: + root.add_sequence(sequence) + + return root + + @staticmethod + def print_graph(node: "ConstraintNode"): + if len(node.children) == 0: + return str(node) + else: + s = f"({node}" + for child in node.children.values(): + s += " " + ConstraintNode.print_graph(child) + s += ")" + return s + + def token_counts(self) -> Counter: + """Returns a counter of the number of times each token is used + in a constraint. + """ + token_counts = Counter() + kids = list(self.children.values()) + while len(kids) > 0: + kid = kids.pop() + token_counts[kid.id] += kid.num_constraints + kids += list(kid.children.values()) + + return token_counts + + def tokens(self) -> Set[int]: + """Returns the set of tokens in constraints.""" + return set(self.token_counts().keys()) + + def add_sequence(self, sequence: List[int]): + """Adds a constraint, represented as a list of integers, to + the trie.""" + assert len(sequence) > 0 + + token = int(sequence[0]) + if token not in self.children: + self.children[token] = ConstraintNode(token, parent=self) + + node = self.children[token] + if len(sequence) == 1: + node.terminal += 1 + node.num_constraints += 1 + parent = node.parent + while parent is not None: + parent.num_constraints += 1 + parent = parent.parent + else: + node.add_sequence(sequence[1:]) + + +class UnorderedConstraintState(ConstraintState): + """ + Records progress through the set of constraints for each item in the beam + using a trie. + """ + + def __init__(self, node: ConstraintNode, copy_from: "ConstraintState" = None): + self.node = node + + if copy_from is None: + # The root node + self.root = node + # The set of states in the graph that have been completed + self.completed = Counter() + # The... + self.generated = Counter() + # The list of tokens we need to generate + self.needed_tokens = self.root.tokens() + else: + self.completed = Counter(copy_from.completed) + self.generated = Counter(copy_from.generated) + self.root = copy_from.root + + # Mark the node as generated + if self.node != self.root: + self.generated[node] += 1 + + @staticmethod + def create(constraint_tensor: torch.Tensor): + constraint_list = unpack_constraints(constraint_tensor) + constraint_trie_root = ConstraintNode.create(constraint_list) + return UnorderedConstraintState(constraint_trie_root) + + def __str__(self): + gen_str = ",".join([str(node) for node in self.generated]) + return f"{self.name}/{self.bank}({gen_str})x{self.num_completed}" + + def __copy__(self): + copied_state = UnorderedConstraintState(self.node, copy_from=self) + return copied_state + + def copy(self): + return self.__copy__() + + @property + def name(self): + if self.node.id is None: + return "ROOT" + else: + return str(self.node.id) + + @property + def is_root(self): + return self.node == self.root + + @property + def bank(self): + return sum(self.generated.values()) + + @property + def num_completed(self): + """The number of constraints (not constraint tokens) that are completed. + In addition to the already-completed states, we need to account for the + current state, which might get marked as completed when another token + is generated. + """ + in_final = self.node.terminal and self.completed[self.node] < self.node.terminal + return sum(self.completed.values()) + in_final + + @property + def finished(self): + return self.root.num_constraints - self.num_completed == 0 + + @property + def token_counts(self): + return self.root.token_counts() + + @property + def tokens(self): + return self.root.tokens() + + @property + def num_constraint_tokens(self): + return sum(self.token_counts.values()) + + def next_tokens(self) -> Set[int]: + """Returns the list of tokens that could come next. + These are (a) all tokens extending the root state and, for + non-root states, additionally all tokens extending the current + state.""" + + if self.node != self.root: + return self.root.next_tokens().union(self.node.next_tokens()) + else: + return self.root.next_tokens() + + def advance(self, token: int): + """Reads in a token and advances the state. Here's how it works. + + We can advance to the next state if: + - there is a matching child + - its path isn't blocked + + A path is blocked when all constraints that are descendants of + that node have already been generated, in the current state. + + If we are not able to advance from the current state, we "fall + off the graph" and return to the root state. There, we again + try to advance, checking the same criteria. + + In any case, when falling off the graph, we need to do some + bookkeeping. We: + - check whether any constraints were met (all prefixes of + current state) + - if one is found, mark it as completed + - adjust visited nodes accordingly + """ + token = int(token) + + next_state = None + child = self.node[token] + if child is not None and self.generated[child] < child.num_constraints: + next_state = UnorderedConstraintState(child, copy_from=self) + + def rewind(): + """If we're mid-trie and an "illegal" token is chosen next, we need + to reset our state to the root state. However, along the way, we need + to check whether a prefix of the current trie state represents a state + we could mark as completed. + """ + node = self.node + while node != self.root: + if node.terminal and self.completed[node] < node.terminal: + next_state.completed[node] += 1 + return + + next_state.generated[node] -= 1 + node = node.parent + + # Fall off the graph, check the root + if next_state is None and token in self.root.next_tokens(): + child = self.root[token] + # We can only traverse this edge if it's not saturated + if self.generated[child] < child.num_constraints: + next_state = UnorderedConstraintState(child, copy_from=self) + else: + next_state = UnorderedConstraintState(self.root, copy_from=self) + + # Rewind + rewind() + + elif next_state is None: + next_state = UnorderedConstraintState(self.root, copy_from=self) + # Rewind + rewind() + + return next_state + + +class ConstraintSequence: + def __init__(self, sequences: List[List[int]]): + """Represents a set of possibly multitoken constraints by + concatenating them and internally recording the end points. + """ + self.sequences = [] + self.endpoints = [] + self.num_tokens = 0 + self.tokens = set() + for sequence in sequences: + for token in sequence: + self.tokens.add(token) + self.num_tokens += len(sequence) + self.endpoints += [False for x in range(len(sequence) - 1)] + [True] + self.sequences += sequence + + def __getitem__(self, key: int): + return self.sequences[key] + + def __len__(self): + return len(self.sequences) + + def __str__(self): + return str(self.sequences) + + +class OrderedConstraintState(ConstraintState): + """ + Records progress through the set of linear nonbranching constraints with gaps. + """ + + def __init__(self, sequence: ConstraintSequence, state: int = -1): + self.sequence = sequence + self.state = state + + @staticmethod + def create(constraint_tensor: torch.Tensor): + constraint_list = unpack_constraints(constraint_tensor) + return OrderedConstraintState(ConstraintSequence(constraint_list), -1) + + def __str__(self): + return f"{self.state}/{self.bank}x{self.num_completed}" + + def __copy__(self): + return OrderedConstraintState(self.sequence, self.state) + + def copy(self): + return self.__copy__() + + @property + def num_completed(self): + if self.state == -1: + return 0 + count = len( + list(filter(lambda x: x, self.sequence.endpoints[0 : self.state + 1])) + ) + return count + + @property + def is_root(self): + return self.state == -1 + + @property + def name(self): + if self.state == -1: + return "ROOT" + else: + return str(self.sequence[self.state]) + + @property + def bank(self) -> int: + return self.state + 1 + + @property + def finished(self): + return self.state + 1 == len(self.sequence) + + @property + def token_counts(self): + return self.sequence.token_counts() + + @property + def tokens(self): + return self.sequence.tokens + + @property + def num_constraint_tokens(self): + return sum(self.token_counts.values()) + + def next_tokens(self) -> Set[int]: + """Returns the list of tokens that could come next. + These are (a) all tokens extending the root state and, for + non-root states, additionally all tokens extending the current + state.""" + + tokens = set() + if self.state > 0: + tokens.add(self.sequence[0]) + if not self.finished: + tokens.add(self.sequence[self.state + 1]) + return tokens + + def advance(self, token: int): + """Reads in a token and advances the state. Here's how it works. + + We can advance to the next state if: + - there is a matching child + - its path isn't blocked + + A path is blocked when all constraints that are descendants of + that node have already been generated, in the current state. + + If we are not able to advance from the current state, we "fall + off the graph" and return to the root state. There, we again + try to advance, checking the same criteria. + + In any case, when falling off the graph, we need to do some + bookkeeping. We: + - check whether any constraints were met (all prefixes of + current state) + - if one is found, mark it as completed + - adjust visited nodes accordingly + """ + token = int(token) + # print(f"{self} ADVANCE({token}) {self.sequence} -> ", end="") + + if self.finished: + # Accept anything + next_state = self.copy() + + elif self.sequence[self.state + 1] == token: + # Advance to the next token + next_state = OrderedConstraintState(self.sequence, self.state + 1) + + elif self.sequence.endpoints[self.state]: + # Accept anything between constraints (*) + next_state = self.copy() + + elif token == self.sequence[0]: + # Start over having generated the first token + next_state = OrderedConstraintState(self.sequence, 0) + else: + # Start over from the root + next_state = OrderedConstraintState(self.sequence, -1) + + return next_state diff --git a/fairseq/fairseq/tokenizer.py b/fairseq/fairseq/tokenizer.py new file mode 100644 index 0000000..42131f7 --- /dev/null +++ b/fairseq/fairseq/tokenizer.py @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import re + + +SPACE_NORMALIZER = re.compile(r"\s+") + + +def tokenize_line(line): + line = SPACE_NORMALIZER.sub(" ", line) + line = line.strip() + return line.split() diff --git a/fairseq/fairseq/trainer.py b/fairseq/fairseq/trainer.py new file mode 100644 index 0000000..16b1b91 --- /dev/null +++ b/fairseq/fairseq/trainer.py @@ -0,0 +1,1622 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Train a network across multiple GPUs. +""" + +import contextlib +import logging +import os +import sys +import time +from argparse import Namespace +from itertools import chain +from typing import Any, Dict, List + +import torch +from omegaconf import OmegaConf + +from fairseq import checkpoint_utils, models, optim, utils +from fairseq.dataclass.configs import FairseqConfig +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.distributed import utils as distributed_utils +from fairseq.file_io import PathManager +from fairseq.logging import meters, metrics +from fairseq.models.ema import build_ema +from fairseq.nan_detector import NanDetector +from fairseq.optim import lr_scheduler +from fairseq.utils import safe_hasattr + +logger = logging.getLogger(__name__) + + +class Trainer(object): + """Main class for data parallel training. + + This class supports synchronous distributed data parallel training, + where multiple workers each have a full model replica and gradients + are accumulated across workers before each update. We use + :class:`~torch.nn.parallel.DistributedDataParallel` to handle + communication of the gradients across workers. + """ + + def __init__(self, cfg: FairseqConfig, task, model, criterion, quantizer=None): + + if isinstance(cfg, Namespace): + logger.warning( + "argparse.Namespace configuration is deprecated! Automatically converting to OmegaConf" + ) + cfg = convert_namespace_to_omegaconf(cfg) + + self.cfg = cfg + self.task = task + + # catalog shared parameters + shared_params = _catalog_shared_params(model) + self.tpu = cfg.common.tpu + self.cuda = torch.cuda.is_available() and not cfg.common.cpu and not self.tpu + if self.cuda: + self.device = torch.device("cuda") + elif self.tpu: + self.device = utils.get_tpu_device() + else: + self.device = torch.device("cpu") + + if self.is_fsdp: + import fairscale + + if self.cfg.common.bf16: + raise ValueError( + "FullyShardedDataParallel is not compatible with --bf16 or " + "--memory-efficient-bf16" + ) + if self.cfg.distributed_training.zero_sharding != "none": + raise ValueError( + "FullyShardedDataParallel is not compatible with --zero-sharding " + "option (it's already built in)" + ) + if ( + max(self.cfg.optimization.update_freq) > 1 + and fairscale.__version__ < "0.4.0" + ): + raise RuntimeError( + "Please update to fairscale 0.4.0 or newer when combining " + "--update-freq with FullyShardedDataParallel" + ) + else: + if ( + hasattr(self.cfg.distributed_training, "cpu_offload") + and self.cfg.distributed_training.cpu_offload + ): + raise ValueError("--cpu-offload requires --ddp-backend=fully_sharded") + + # copy model and criterion to current device/dtype + self._criterion = criterion + self._model = model + if not self.is_fsdp: + if cfg.common.fp16: + assert not cfg.common.amp, "Cannot use fp16 and AMP together" + self._criterion = self._criterion.half() + self._model = self._model.half() + elif cfg.common.bf16: + self._criterion = self._criterion.to(dtype=torch.bfloat16) + self._model = self._model.to(dtype=torch.bfloat16) + elif cfg.common.amp: + self._amp_retries = 0 + if ( + not cfg.distributed_training.pipeline_model_parallel + # the DistributedFairseqModel wrapper will handle moving to device, + # so only handle cases which don't use the wrapper + and not self.use_distributed_wrapper + ): + self._criterion = self._criterion.to(device=self.device) + self._model = self._model.to(device=self.device) + self.pipeline_model_parallel = cfg.distributed_training.pipeline_model_parallel + self.last_device = None + if self.cuda and self.pipeline_model_parallel: + self.last_device = torch.device( + cfg.distributed_training.pipeline_devices[-1] + ) + + # check that shared parameters are preserved after device transfer + for shared_param in shared_params: + ref = _get_module_by_path(self._model, shared_param[0]) + for path in shared_param[1:]: + logger.info( + "detected shared parameter: {} <- {}".format(shared_param[0], path) + ) + _set_module_by_path(self._model, path, ref) + + self._dummy_batch = None # indicates we don't have a dummy batch at first + self._lr_scheduler = None + self._num_updates = 0 + self._num_xla_compiles = 0 # for TPUs + self._optim_history = None + self._optimizer = None + self._warn_once = set() + self._wrapped_criterion = None + self._wrapped_model = None + self._ema = None + + # TODO(myleott): support tpu + if self.cuda and self.data_parallel_world_size > 1: + self._grad_norm_buf = torch.cuda.DoubleTensor(self.data_parallel_world_size) + else: + self._grad_norm_buf = None + + self.quantizer = quantizer + if self.quantizer is not None: + self.quantizer.set_trainer(self) + + # get detailed cuda environment + if self.cuda: + self.cuda_env = utils.CudaEnvironment() + if self.data_parallel_world_size > 1: + self.cuda_env_arr = distributed_utils.all_gather_list( + self.cuda_env, group=distributed_utils.get_global_group() + ) + else: + self.cuda_env_arr = [self.cuda_env] + if self.data_parallel_rank == 0: + utils.CudaEnvironment.pretty_print_cuda_env_list(self.cuda_env_arr) + else: + self.cuda_env = None + self.cuda_env_arr = None + + metrics.log_start_time("wall", priority=790, round=0) + + self._start_time = time.time() + self._previous_training_time = 0 + self._cumulative_training_time = None + + def reinitialize(self): + """Reinitialize the Trainer, typically after model params change.""" + self._lr_scheduler = None + self._optimizer = None + self._wrapped_criterion = None + self._wrapped_model = None + + @property + def data_parallel_world_size(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 1 + return distributed_utils.get_data_parallel_world_size() + + @property + def data_parallel_process_group(self): + return distributed_utils.get_data_parallel_group() + + @property + def data_parallel_rank(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 0 + return distributed_utils.get_data_parallel_rank() + + @property + def is_data_parallel_master(self): + # NOTE: this returns true for all model parallel replicas with data + # parallel rank 0 + return self.data_parallel_rank == 0 + + @property + def use_distributed_wrapper(self) -> bool: + return ( + self.data_parallel_world_size > 1 and not self.cfg.optimization.use_bmuf + ) or (self.is_fsdp and self.cfg.distributed_training.cpu_offload) + + @property + def should_save_checkpoint_on_current_rank(self) -> bool: + """Indicates whether to save checkpoints on the current DDP rank.""" + if ( + self.is_fsdp and self.cfg.distributed_training.use_sharded_state + ) or getattr(self.cfg.model, "base_layers", 0) > 0: + return True + else: + return self.is_data_parallel_master + + @property + def always_call_state_dict_during_save_checkpoint(self) -> bool: + if self.is_fsdp and not self.cfg.distributed_training.use_sharded_state: + # FSDP calls communication collective when consolidating checkpoints + return True + else: + return False + + @property + def checkpoint_suffix(self) -> str: + """Suffix to add to the checkpoint file name.""" + if self.is_fsdp and self.cfg.distributed_training.use_sharded_state: + return self.cfg.checkpoint.checkpoint_suffix + "-shard{0}".format( + self.data_parallel_rank + ) + else: + return self.cfg.checkpoint.checkpoint_suffix or "" + + @property + def criterion(self): + if self._wrapped_criterion is None: + if utils.has_parameters(self._criterion) and self.use_distributed_wrapper: + self._wrapped_criterion = models.DistributedFairseqModel( + self.cfg.distributed_training, + self._criterion, + process_group=self.data_parallel_process_group, + device=self.device, + ) + else: + self._wrapped_criterion = self._criterion + return self._wrapped_criterion + + @property + def model(self): + if self._wrapped_model is None: + if self.use_distributed_wrapper: + self._wrapped_model = models.DistributedFairseqModel( + self.cfg.distributed_training, + self._model, + process_group=self.data_parallel_process_group, + device=self.device, + ) + else: + self._wrapped_model = self._model + return self._wrapped_model + + @property + def ema(self): + if self._ema is None: + self._build_ema() + return self._ema + + def _build_ema(self): + if self.cfg.ema.store_ema: + self._ema = build_ema(self._model, self.cfg.ema, self.device) + logger.info("Exponential Moving Average Shadow Model is initialized.") + + @property + def optimizer(self): + if self._optimizer is None: + self._build_optimizer() + return self._optimizer + + @property + def lr_scheduler(self): + if self._lr_scheduler is None: + self._build_optimizer() # this will initialize self._lr_scheduler + return self._lr_scheduler + + def _build_optimizer(self): + + if ( + self.cfg.optimization.debug_param_names + and self.cfg.common.fp16_no_flatten_grads + ): + params = [] + self.param_names = [] + + for n, p in chain( + self.model.named_parameters(), self.criterion.named_parameters() + ): + if p.requires_grad: + params.append(p) + self.param_names.append(n) + else: + params = list( + filter( + lambda p: p.requires_grad, + chain(self.model.parameters(), self.criterion.parameters()), + ) + ) + + if self.is_fsdp and self.cfg.common.fp16: + # FullyShardedDataParallel always uses MemoryEfficientFP16 wrapper, + # mostly for the grad scaling. But if we don't have the + # --memory-efficient-fp16 flag set, then we're effectively doing + # regular --fp16 and can allow the use of optimizers that would + # otherwise be unsupported by MemoryEfficientFP16Optimizer. + allow_unsupported = not self.cfg.common.memory_efficient_fp16 + self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer( + self.cfg, params, allow_unsupported=allow_unsupported + ) + elif self.cfg.common.fp16 or self.cfg.common.bf16 or self.cfg.common.amp: + if self.cuda and torch.cuda.get_device_capability(0)[0] < 7: + logger.info( + "NOTE: your device does NOT support faster training with --fp16 or --amp, " + "please switch to FP32 which is likely to be faster" + ) + if ( + self.cfg.common.memory_efficient_fp16 + or self.cfg.common.memory_efficient_bf16 + ): + self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer( + self.cfg, params + ) + elif self.cfg.common.amp: + self._optimizer = optim.AMPOptimizer.build_optimizer(self.cfg, params) + else: + self._optimizer = optim.FP16Optimizer.build_optimizer(self.cfg, params) + else: + if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7: + logger.info( + "NOTE: your device may support faster training with --fp16 or --amp" + ) + self._optimizer = optim.build_optimizer(self.cfg.optimizer, params) + + if self.is_fsdp: + assert ( + not self.cfg.optimization.use_bmuf + ), "--ddp-backend=fully_sharded is not compatible with BMUF" + assert self._optimizer.supports_flat_params, ( + "--ddp-backend=fully_sharded is only compatible with pointwise " + "optimizers (e.g., Adam, AdamW, Adadelta, Adamax, SGD, etc.). " + "However, the sharding will result in slightly different results when " + "using non-pointwise optimizers (e.g., Adagrad, Adafactor, LAMB)" + ) + + if self.cfg.optimization.use_bmuf: + self._optimizer = optim.FairseqBMUF( + self.cfg.bmuf, + self._optimizer, + ) + + if self.cfg.distributed_training.zero_sharding == "os": + if ( + self.cfg.common.fp16 + and not self.cfg.common.memory_efficient_fp16 + and not self.cfg.common.memory_efficient_bf16 + ) and not self.cfg.common.fp16_no_flatten_grads: + raise ValueError( + "ZeRO is incomptabile with fp16 and flattened grads. " + "Please use --fp16-no-flatten-grads" + ) + else: + optim.shard_(self._optimizer, self.data_parallel_process_group) + + # We should initialize the learning rate scheduler immediately after + # building the optimizer, so that the initial learning rate is set. + self._lr_scheduler = lr_scheduler.build_lr_scheduler( + self.cfg.lr_scheduler, + self.optimizer, + ) + self._lr_scheduler.step_update(0) + + @property + def is_fsdp(self): + return self.cfg.distributed_training.ddp_backend == "fully_sharded" + + def consolidate_optimizer(self): + """For OSS, we need to consolidate the state dict.""" + if self.cfg.checkpoint.no_save_optimizer_state: + return + self._gathered_optim_state = None + if hasattr(self.optimizer.optimizer, "consolidate_state_dict"): + self.optimizer.optimizer.consolidate_state_dict() + elif self.is_fsdp and not self.model.use_sharded_state: + st = self.model.gather_full_optim_state_dict( + self.optimizer + ) # only returns on rank 0 + self._gathered_optim_state = st + + def state_dict(self): + state_dict = { + "args": None, # legacy + "cfg": ( + OmegaConf.to_container(self.cfg, resolve=True, enum_to_str=True) + if OmegaConf.is_config(self.cfg) + else self.cfg + ), + "model": self.model.state_dict(), + "criterion": ( + self.criterion.state_dict() + if utils.has_parameters(self.criterion) + else None + ), + "optimizer_history": (self._optim_history or []) + + [ + { + "criterion_name": self.get_criterion().__class__.__name__, + "optimizer_name": self.optimizer.__class__.__name__, + "lr_scheduler_state": self.lr_scheduler.state_dict(), + "num_updates": self.get_num_updates(), + } + ], + "task_state": self.task.state_dict() if self.task is not None else {}, + "extra_state": { + "metrics": metrics.state_dict(), + "previous_training_time": self.cumulative_training_time(), + }, + } + if self.cfg.ema.store_ema: + # Save EMA model state as extra state + state_dict["extra_state"]["ema"] = self.ema.get_model().state_dict() + if self.cfg.ema.ema_fp32: + # Save EMA params in fp32 + state_dict["extra_state"]["ema_fp32_params"] = self.ema.fp32_params + if not self.cfg.checkpoint.no_save_optimizer_state: + if self._gathered_optim_state is not None: + state_dict["last_optimizer_state"] = self._gathered_optim_state + self._gathered_optim_state = None + else: + state_dict["last_optimizer_state"] = self.optimizer.state_dict() + if self.is_fsdp: + # save meta data for recombining checkpoint upon loading + state_dict["fsdp_metadata"] = self.model.local_metadata_dict() + return state_dict + + def save_checkpoint(self, filename, extra_state): + """Save all training state in a checkpoint file.""" + if self.should_save_checkpoint_on_current_rank: + + logger.info(f"Saving checkpoint to {os.path.abspath(filename)}") + # call state_dict on all ranks in case it needs internal communication + state_dict = utils.move_to_cpu(self.state_dict()) + state_dict["extra_state"].update(extra_state) + + checkpoint_utils.torch_persistent_save( + state_dict, + filename, + async_write=self.cfg.checkpoint.write_checkpoints_asynchronously, + ) + logger.info(f"Finished saving checkpoint to {os.path.abspath(filename)}") + return os.path.abspath(filename) + return None + + def load_checkpoint( + self, + filename, + reset_optimizer=False, + reset_lr_scheduler=False, + optimizer_overrides=None, + reset_meters=False, + ): + """ + Load all training state from a checkpoint file. + rank = 0 will load the checkpoint, and then broadcast it to all + other ranks. + """ + extra_state, self._optim_history, last_optim_state = None, [], None + + logger.info(f"Preparing to load checkpoint {filename}") + is_distributed = self.data_parallel_world_size > 1 + bexists = PathManager.isfile(filename) + if bexists: + load_on_all_ranks = ( + self.cfg.checkpoint.load_checkpoint_on_all_dp_ranks + # TPUs don't support broadcast yet, so load checkpoints + # on every worker for now + or self.tpu + # FSDP requires loading checkpoint shards on all ranks + or (self.is_fsdp and self.cfg.distributed_training.use_sharded_state) + or getattr(self.cfg.model, "base_layers", 0) > 0 + ) + + if load_on_all_ranks or self.data_parallel_rank == 0: + state = checkpoint_utils.load_checkpoint_to_cpu( + filename, load_on_all_ranks=load_on_all_ranks + ) + last_optim_state = state.get("last_optimizer_state", None) + + # If doing zero_sharding, do not broadcast global optimizer + # state. Later we will broadcast sharded states to each rank + # to avoid memory from exploding. + if ( + not load_on_all_ranks + and self.cfg.distributed_training.zero_sharding == "os" + and "last_optimizer_state" in state + and is_distributed + ): + state["last_optimizer_state"] = "SHARDED" + else: + last_optim_state = None + state = None + + if is_distributed and not load_on_all_ranks: + state = distributed_utils.broadcast_object( + state, + src_rank=0, + group=self.data_parallel_process_group, + dist_device=self.device, + ) + if self.data_parallel_rank > 0: + last_optim_state = state.get("last_optimizer_state", None) + + # load model parameters + try: + if ( + "optimizer_history" in state + and len(state["optimizer_history"]) > 0 + and "num_updates" in state["optimizer_history"][-1] + ): + self.model.set_num_updates( + state["optimizer_history"][-1]["num_updates"] + ) + + # this is the code related to AdaPrune + # In short, it removes redundant heads in multi-head attention module based on heads importance provided + # For more info, please refer to the paper: https://openreview.net/forum?id=_CMSV7FTzGI + # The idea of prune in mha can be summarized as + # Fine tune model (e.g. roberta encoder) on a certain datasets with regularization + # After the model is trained. User could use get_reserve_head_index and _adaptive_prune_heads functions to get the top X heads with most importance. + # Then user uses the rank to prune a new roberta encoder and save the pruned ckpt manually. + # User will fine tune the the new roberta encoder via the ckpt saved above + # To get rid of registering different pruned version of Roberta, I use the argument --mha-heads-to-keep to prune the Roberta model into a pruned version which matches the pruned ckpt. + if ( + safe_hasattr(self.model, "args") + and safe_hasattr(self.model.args, "mha_heads_to_keep") + and self.model.args.mha_heads_to_keep != -1 + ): + logger.info( + f"Prune model: keep {self.model.args.mha_heads_to_keep} heads for each multihead attention module" + ) + for layer in self.model.encoder.sentence_encoder.layers: + reserve_head_index = layer.self_attn._get_reserve_head_index( + num_heads_to_keep=self.model.args.mha_heads_to_keep + ) + layer.self_attn._adaptive_prune_heads( + reserve_head_index=reserve_head_index + ) + layer.self_attn._set_skip_embed_dim_check() + logger.info(self.model) + # this is the code related to AdaPrune + # In short, it removes redundant units in feedforward layer in each transformer layer based on importance + # For more info, please refer to the paper: https://openreview.net/forum?id=_CMSV7FTzGI + # The idea of prune in ffn can be summarized as + # Fine tune model (e.g. roberta encoder) on a certain datasets with regularization + # After the model is trained. User could use _get_fc_rank and _prune_fc_layer functions to get the top X units with most importance. + # Then user uses the rank to prune a new roberta encoder and save the pruned ckpt manually. + # User will fine tune the the new roberta encoder via the ckpt saved above + # To get rid of registering different pruned version of Roberta, I use the argument --ffn-blocks-to-remove to prune the Roberta model into a pruned version which matches the pruned ckpt. + if ( + safe_hasattr(self.model, "args") + and safe_hasattr(self.model.args, "ffn_blocks_to_remove") + and self.model.args.ffn_blocks_to_remove != -1 + ): + logger.info( + f"Prune model: remove {self.model.args.ffn_blocks_to_remove} ffn blocks for each transformer layer" + ) + for layer in self.model.encoder.sentence_encoder.layers: + remove_index = layer._get_fc_rank( + remove_num=self.model.args.ffn_blocks_to_remove + ) + layer._prune_fc_layer(remove_index=remove_index) + logger.info(self.model) + + self.model.load_state_dict( + state["model"], strict=True, model_cfg=self.cfg.model + ) + # save memory for later steps + del state["model"] + if utils.has_parameters(self.get_criterion()): + self.get_criterion().load_state_dict( + state["criterion"], strict=True + ) + del state["criterion"] + + except Exception: + raise Exception( + "Cannot load model parameters from checkpoint {}; " + "please ensure that the architectures match.".format(filename) + ) + extra_state = state["extra_state"] + self._optim_history = state["optimizer_history"] + + if last_optim_state is not None and not reset_optimizer: + # rebuild optimizer after loading model, since params may have changed + self._build_optimizer() + + # only reload optimizer and lr_scheduler if they match + last_optim = self._optim_history[-1] + assert ( + last_optim["criterion_name"] == self.get_criterion().__class__.__name__ + ), f"Criterion does not match; please reset the optimizer (--reset-optimizer). {last_optim['criterion_name']} vs {self.get_criterion().__class__.__name__}" + assert ( + last_optim["optimizer_name"] == self.optimizer.__class__.__name__ + ), f"Optimizer does not match; please reset the optimizer (--reset-optimizer). {last_optim['optimizer_name']} vs {self.optimizer.__class__.__name__}" + + if not reset_lr_scheduler: + self.lr_scheduler.load_state_dict(last_optim["lr_scheduler_state"]) + + if self.is_fsdp and not self.model.use_sharded_state: + # if use_sharded_state, the last_optim_state is already sharded, skip this + last_optim_state = self.model.get_shard_from_optim_state_dict( + last_optim_state + ) + elif not load_on_all_ranks and is_distributed: + last_optim_state = self.optimizer.broadcast_global_state_dict( + last_optim_state + ) + + self.optimizer.load_state_dict(last_optim_state, optimizer_overrides) + + self.set_num_updates(last_optim["num_updates"]) + + if extra_state is not None: + itr_state = extra_state["train_iterator"] + epoch = itr_state["epoch"] + + if "previous_training_time" in extra_state: + self._previous_training_time = extra_state["previous_training_time"] + self._start_time = time.time() + + self.lr_step(epoch) + + if ( + itr_state.get("version", 1) >= 2 + and itr_state["iterations_in_epoch"] == 0 + ): + # reset meters at start of epoch + reset_meters = True + + if "metrics" in extra_state and not reset_meters: + metrics.load_state_dict(extra_state["metrics"]) + + # reset TimeMeters, since their start times don't make sense anymore + for meter in metrics.get_meters("default"): + if isinstance(meter, meters.TimeMeter): + meter.reset() + + if self.cfg.ema.store_ema: + if "ema" not in extra_state: + logger.warn( + "EMA not found in checkpoint. But store_ema is True. " + "EMA is re-initialized from checkpoint." + ) + self.ema.restore( + state["model"], build_fp32_params=self.cfg.ema.ema_fp32 + ) + else: + logger.info("Loading EMA from checkpoint") + self.ema.restore(extra_state["ema"], build_fp32_params=False) + + if self.cfg.ema.ema_fp32: + if "ema_fp32_params" in extra_state: + logger.info("Loading EMA fp32 params from checkpoint") + self.ema.build_fp32_params(extra_state["ema_fp32_params"]) + else: + logger.info( + "Building EMA fp32 params from EMA model in checkpoint" + ) + self.ema.build_fp32_params() + + logger.info( + "Loaded checkpoint {} (epoch {} @ {} updates)".format( + filename, epoch, self.get_num_updates() + ) + ) + + else: + logger.info("No existing checkpoint found {}".format(filename)) + + return extra_state + + def get_train_iterator( + self, + epoch, + combine=True, + load_dataset=True, + data_selector=None, + shard_batch_itr=True, + disable_iterator_cache=False, + ): + """Return an EpochBatchIterator over the training set for a given epoch.""" + if load_dataset: + logger.info("loading train data for epoch {}".format(epoch)) + self.task.load_dataset( + self.cfg.dataset.train_subset, + epoch=epoch, + combine=combine, + data_selector=data_selector, + tpu=self.tpu, + ) + batch_iterator = self.task.get_batch_iterator( + dataset=self.task.dataset(self.cfg.dataset.train_subset), + max_tokens=self.cfg.dataset.max_tokens, + max_sentences=self.cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + self.task.max_positions(), + self.model.max_positions(), + self.cfg.dataset.max_tokens, + ), + ignore_invalid_inputs=True, + required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple, + seed=(self.cfg.common.seed + epoch) + if self.cfg.dataset.update_ordered_indices_seed + else self.cfg.common.seed, + num_shards=self.data_parallel_world_size if shard_batch_itr else 1, + shard_id=self.data_parallel_rank if shard_batch_itr else 0, + num_workers=self.cfg.dataset.num_workers, + epoch=epoch, + data_buffer_size=self.cfg.dataset.data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=self.cfg.optimization.skip_remainder_batch, + grouped_shuffling=self.cfg.dataset.grouped_shuffling, + update_epoch_batch_itr=self.cfg.dataset.update_epoch_batch_itr, + ) + self.reset_dummy_batch(batch_iterator.first_batch) + return batch_iterator + + def get_valid_iterator( + self, + subset, + disable_iterator_cache=False, + ): + """Return an EpochBatchIterator over given validation subset for a given epoch.""" + batch_iterator = self.task.get_batch_iterator( + dataset=self.task.dataset(subset), + max_tokens=self.cfg.dataset.max_tokens_valid, + max_sentences=self.cfg.dataset.batch_size_valid, + max_positions=utils.resolve_max_positions( + self.task.max_positions(), + self.model.max_positions(), + ), + ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple, + seed=self.cfg.common.seed, + num_shards=self.data_parallel_world_size, + shard_id=self.data_parallel_rank, + num_workers=self.cfg.dataset.num_workers, + # always pass a fixed "epoch" to keep validation data consistent + # across training epochs + epoch=1, + data_buffer_size=self.cfg.dataset.data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=False, + ) + self.reset_dummy_batch(batch_iterator.first_batch) + return batch_iterator + + def begin_epoch(self, epoch): + """Called at the beginning of each epoch.""" + logger.info("begin training epoch {}".format(epoch)) + + self.lr_step_begin_epoch(epoch) + + if self.quantizer is not None: + self.quantizer.begin_epoch(epoch) + + # task specific setup per epoch + self.task.begin_epoch(epoch, self.get_model()) + + if self.tpu: + import torch_xla.core.xla_model as xm + + xm.rendezvous("begin_epoch") # wait for all workers + xm.mark_step() + + def begin_valid_epoch(self, epoch): + """Called at the beginning of each validation epoch.""" + + # task specific setup per validation epoch + self.task.begin_valid_epoch(epoch, self.get_model()) + + def reset_dummy_batch(self, batch): + self._dummy_batch = batch + + @metrics.aggregate("train") + def train_step(self, samples, raise_oom=False): + """Do forward, backward and parameter update.""" + self._set_seed() + self.model.train() + self.criterion.train() + self.zero_grad() + + metrics.log_start_time("train_wall", priority=800, round=0) + + # If EMA is enabled through store_ema=True + # and task.uses_ema is True, pass the EMA model as a keyword + # argument to the task. + extra_kwargs = {} + if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False): + extra_kwargs["ema_model"] = self.ema.get_model() + + has_oom = False + + # forward and backward pass + logging_outputs, sample_size, ooms = [], 0, 0 + for i, sample in enumerate(samples): # delayed update loop + sample, is_dummy_batch = self._prepare_sample(sample) + + def maybe_no_sync(): + """ + Whenever *samples* contains more than one mini-batch, we + want to accumulate gradients locally and only call + all-reduce in the last backwards pass. + """ + if ( + self.data_parallel_world_size > 1 + and hasattr(self.model, "no_sync") + and i < len(samples) - 1 + # The no_sync context manager results in increased memory + # usage with FSDP, since full-size gradients will be + # accumulated on each GPU. It's typically a better tradeoff + # to do the extra communication with FSDP. + and not self.is_fsdp + ): + return self.model.no_sync() + else: + return contextlib.ExitStack() # dummy contextmanager + + try: + with maybe_no_sync(): + # forward and backward + loss, sample_size_i, logging_output = self.task.train_step( + sample=sample, + model=self.model, + criterion=self.criterion, + optimizer=self.optimizer, + update_num=self.get_num_updates(), + ignore_grad=is_dummy_batch, + **extra_kwargs, + ) + del loss + + logging_outputs.append(logging_output) + sample_size += sample_size_i + + # emptying the CUDA cache after the first step can + # reduce the chance of OOM + if self.cuda and self.get_num_updates() == 0: + torch.cuda.empty_cache() + except RuntimeError as e: + if "out of memory" in str(e): + self._log_oom(e) + has_oom = True + if raise_oom: + raise e + else: + raise e + except Exception: + self.consolidate_optimizer() + self.save_checkpoint( + os.path.join(self.cfg.checkpoint.save_dir, "crash.pt"), {} + ) + raise + + if has_oom: + logger.warning( + "attempting to recover from OOM in forward/backward pass" + ) + ooms += 1 + self.zero_grad() + if self.cuda: + torch.cuda.empty_cache() + + if self.cfg.distributed_training.distributed_world_size == 1: + return None + + if self.tpu and i < len(samples) - 1: + # tpu-comment: every XLA operation before marking step is + # appended to the IR graph, and processing too many batches + # before marking step can lead to OOM errors. + # To handle gradient accumulation use case, we explicitly + # mark step here for every forward pass without a backward pass + self._xla_markstep_and_send_to_cpu() + + if is_dummy_batch: + if torch.is_tensor(sample_size): + sample_size.zero_() + else: + sample_size *= 0.0 + + if torch.is_tensor(sample_size): + sample_size = sample_size.float() + else: + sample_size = float(sample_size) + + # gather logging outputs from all replicas + if self._sync_stats(): + train_time = self._local_cumulative_training_time() + ( + logging_outputs, + ( + sample_size, + ooms, + total_train_time, + ), + ) = self._aggregate_logging_outputs( + logging_outputs, sample_size, ooms, train_time, ignore=is_dummy_batch + ) + self._cumulative_training_time = ( + total_train_time / self.data_parallel_world_size + ) + + overflow = False + try: + with torch.autograd.profiler.record_function("reduce-grads"): + # reduce gradients across workers + self.optimizer.all_reduce_grads(self.model) + if utils.has_parameters(self.criterion): + self.optimizer.all_reduce_grads(self.criterion) + + with torch.autograd.profiler.record_function("multiply-grads"): + # multiply gradients by (data_parallel_size / sample_size) since + # DDP normalizes by the number of data parallel workers for + # improved fp16 precision. + # Thus we get (sum_of_gradients / sample_size) at the end. + # In case of fp16, this step also undoes loss scaling. + # (Debugging note: Some optimizers perform this scaling on the + # fly, so inspecting model.parameters() or optimizer.params may + # still show the original, unscaled gradients.) + numer = ( + self.data_parallel_world_size + if not self.cfg.optimization.use_bmuf or self._sync_stats() + else 1 + ) + self.optimizer.multiply_grads(numer / (sample_size or 1.0)) + # Note: (sample_size or 1.0) handles the case of a zero gradient, in a + # way that avoids CPU/device transfers in case sample_size is a GPU or + # TPU object. The assumption is that the gradient itself is also 0. + + with torch.autograd.profiler.record_function("clip-grads"): + # clip grads + grad_norm = self.clip_grad_norm(self.cfg.optimization.clip_norm) + + # check that grad norms are consistent across workers + # on tpu check tensor is slow + if not self.tpu: + if ( + not self.cfg.optimization.use_bmuf + and self.cfg.distributed_training.ddp_backend != "slowmo" + ): + self._check_grad_norms(grad_norm) + if not torch.isfinite(grad_norm).all(): + # in case of AMP, if gradients are Nan/Inf then + # optimizer step is still required + if self.cfg.common.amp: + overflow = True + else: + # check local gradnorm single GPU case, trigger NanDetector + raise FloatingPointError("gradients are Nan/Inf") + + with torch.autograd.profiler.record_function("optimizer"): + # take an optimization step + self.task.optimizer_step( + self.optimizer, model=self.model, update_num=self.get_num_updates() + ) + if self.cfg.common.amp and overflow: + if self._amp_retries == self.cfg.common.amp_batch_retries: + logger.info("AMP: skipping this batch.") + self._amp_retries = 0 + else: + self._amp_retries += 1 + return self.train_step( + samples, raise_oom + ) # recursion to feed in same batch + + except FloatingPointError: + + self.consolidate_optimizer() + self.save_checkpoint( + os.path.join(self.cfg.checkpoint.save_dir, "crash.pt"), {} + ) + + # re-run the forward and backward pass with hooks attached to print + # out where it fails + self.zero_grad() + with NanDetector(self.get_model()): + for _, sample in enumerate(samples): + sample, _ = self._prepare_sample(sample) + self.task.train_step( + sample, + self.model, + self.criterion, + self.optimizer, + self.get_num_updates(), + ignore_grad=False, + **extra_kwargs, + ) + raise + except OverflowError as e: + overflow = True + logger.info( + f"NOTE: gradient overflow detected, ignoring gradient, {str(e)}" + ) + + if hasattr(self, "param_names") and hasattr( + self.optimizer, "fp32_optimizer" + ): + for p, n in zip(self.optimizer.fp32_optimizer.params, self.param_names): + if torch.isinf(p.grad).any() or torch.isnan(p.grad).any(): + logger.info(f"overflow in param {n}") + + grad_norm = torch.tensor(0.0).cuda() + self.zero_grad() + except RuntimeError as e: + if "out of memory" in str(e): + self._log_oom(e) + logger.error("OOM during optimization, irrecoverable") + raise e + + # Some distributed wrappers (e.g., SlowMo) need access to the optimizer + # after the step + if hasattr(self.model, "perform_slowmo"): + self.model.perform_slowmo( + self.optimizer.optimizer, getattr(self.optimizer, "fp32_params", None) + ) + + logging_output = None + if not overflow or self.cfg.distributed_training.ddp_backend == "slowmo": + self.set_num_updates(self.get_num_updates() + 1) + + if self.cfg.ema.store_ema: + # Step EMA forward with new model. + self.ema.step( + self.get_model(), + self.get_num_updates(), + ) + metrics.log_scalar( + "ema_decay", + self.ema.get_decay(), + priority=10000, + round=5, + weight=0, + ) + + if self.tpu: + import torch_xla.core.xla_model as xm + + # mark step on TPUs + self._xla_markstep_and_send_to_cpu() + + # only log stats every log_interval steps + # this causes wps to be misreported when log_interval > 1 + logging_output = {} + if self.get_num_updates() % self.cfg.common.log_interval == 0: + # log memory usage + mem_info = xm.get_memory_info(self.device) + gb_free = mem_info["kb_free"] / 1024 / 1024 + gb_total = mem_info["kb_total"] / 1024 / 1024 + metrics.log_scalar( + "gb_free", gb_free, priority=1500, round=1, weight=0 + ) + metrics.log_scalar( + "gb_total", gb_total, priority=1600, round=1, weight=0 + ) + logging_outputs = self._xla_markstep_and_send_to_cpu( + logging_outputs + ) + logging_output = self._reduce_and_log_stats( + logging_outputs, sample_size, grad_norm + ) + + # log whenever there's an XLA compilation, since these + # slow down training and may indicate opportunities for + # optimization + self._check_xla_compilation() + else: + if self.cuda and self.cuda_env is not None: + # log minimum free memory over the iteration + gb_used = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024 + torch.cuda.reset_peak_memory_stats() + gb_free = self.cuda_env.total_memory_in_GB - gb_used + metrics.log_scalar( + "gb_free", gb_free, priority=1500, round=1, weight=0 + ) + + # log stats + logging_output = self._reduce_and_log_stats( + logging_outputs, sample_size, grad_norm + ) + + # clear CUDA cache to reduce memory fragmentation + if ( + self.cuda + and self.cfg.common.empty_cache_freq > 0 + and ( + (self.get_num_updates() + self.cfg.common.empty_cache_freq - 1) + % self.cfg.common.empty_cache_freq + ) + == 0 + ): + torch.cuda.empty_cache() + + if self.cfg.common.fp16 or self.cfg.common.amp: + metrics.log_scalar( + "loss_scale", + ( + self.optimizer.scaler.loss_scale + if self.cfg.common.fp16 + else self.optimizer.scaler.get_scale() + ), + priority=700, + round=4, + weight=0, + ) + + metrics.log_stop_time("train_wall") + return logging_output + + @metrics.aggregate("valid") + def valid_step(self, sample, raise_oom=False): + """Do forward pass in evaluation mode.""" + if self.tpu: + import torch_xla.core.xla_model as xm + + xm.rendezvous("valid_step") # wait for all workers + + # If EMA is enabled through store_ema=True + # and task.uses_ema is True, pass the EMA model as a keyword + # argument to the task. + extra_kwargs = {} + if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False): + extra_kwargs["ema_model"] = self.ema.get_model() + + with torch.no_grad(): + self.model.eval() + self.criterion.eval() + + sample, is_dummy_batch = self._prepare_sample(sample) + + try: + _loss, sample_size, logging_output = self.task.valid_step( + sample, self.model, self.criterion, **extra_kwargs + ) + except RuntimeError as e: + if "out of memory" in str(e): + self._log_oom(e) + if not raise_oom: + logger.warning( + "ran out of memory in validation step, retrying batch" + ) + for p in self.model.parameters(): + if p.grad is not None: + p.grad = None # free some memory + if self.cuda: + torch.cuda.empty_cache() + return self.valid_step(sample, raise_oom=True) + raise e + + logging_outputs = [logging_output] + if is_dummy_batch: + if torch.is_tensor(sample_size): + sample_size.zero_() + else: + sample_size *= 0.0 + + # gather logging outputs from all replicas + if self.data_parallel_world_size > 1: + logging_outputs, (sample_size,) = self._aggregate_logging_outputs( + logging_outputs, + sample_size, + ignore=is_dummy_batch, + ) + + # log validation stats + if self.tpu: + logging_outputs = self._xla_markstep_and_send_to_cpu(logging_outputs) + logging_output = self._reduce_and_log_stats(logging_outputs, sample_size) + + return logging_output + + def zero_grad(self): + self.optimizer.zero_grad() + + def lr_step_begin_epoch(self, epoch): + """Adjust the learning rate at the beginning of the epoch.""" + self.lr_scheduler.step_begin_epoch(epoch) + # prefer updating the LR based on the number of steps + return self.lr_step_update() + + def lr_step(self, epoch, val_loss=None): + """Adjust the learning rate at the end of the epoch.""" + self.lr_scheduler.step(epoch, val_loss) + # prefer updating the LR based on the number of steps + return self.lr_step_update() + + def lr_step_update(self): + """Update the learning rate after each update.""" + new_lr = self.lr_scheduler.step_update(self.get_num_updates()) + if isinstance(new_lr, dict): + for k, v in new_lr.items(): + metrics.log_scalar(f"lr_{k}", v, weight=0, priority=300) + new_lr = new_lr.get("default", next(iter(new_lr.values()))) + else: + metrics.log_scalar("lr", new_lr, weight=0, priority=300) + return new_lr + + def get_lr(self): + """Get the current learning rate.""" + return self.optimizer.get_lr() + + def get_model(self): + """Get the (non-wrapped) model instance.""" + return self._model + + def get_criterion(self): + """Get the (non-wrapped) criterion instance.""" + return self._criterion + + def get_meter(self, name): + """[deprecated] Get a specific meter by name.""" + from fairseq import meters + + if "get_meter" not in self._warn_once: + self._warn_once.add("get_meter") + utils.deprecation_warning( + "Trainer.get_meter is deprecated. Please use fairseq.metrics instead." + ) + + train_meters = metrics.get_meters("train") + if train_meters is None: + train_meters = {} + + if name == "train_loss" and "loss" in train_meters: + return train_meters["loss"] + elif name == "train_nll_loss": + # support for legacy train.py, which assumed this meter is + # always initialized + m = train_meters.get("nll_loss", None) + return m or meters.AverageMeter() + elif name == "wall": + # support for legacy train.py, which assumed this meter is + # always initialized + m = metrics.get_meter("default", "wall") + return m or meters.TimeMeter() + elif name == "wps": + m = metrics.get_meter("train", "wps") + return m or meters.TimeMeter() + elif name in {"valid_loss", "valid_nll_loss"}: + # support for legacy train.py, which assumed these meters + # are always initialized + k = name[len("valid_") :] + m = metrics.get_meter("valid", k) + return m or meters.AverageMeter() + elif name == "oom": + return meters.AverageMeter() + elif name in train_meters: + return train_meters[name] + return None + + def get_num_updates(self): + """Get the number of parameters updates.""" + return self._num_updates + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self._num_updates = num_updates + self.lr_step_update() + if self.quantizer: + self.quantizer.step_update(self._num_updates) + metrics.log_scalar("num_updates", self._num_updates, weight=0, priority=200) + + def clip_grad_norm(self, clip_norm): + def agg_norm_fn(total_norm): + total_norm = total_norm.cuda().float() ** 2 + total_norm = distributed_utils.all_reduce( + total_norm, group=self.data_parallel_process_group + ) + return total_norm**0.5 + + should_agg_norm = self.is_fsdp and ( + self.data_parallel_process_group is not None + or torch.distributed.is_initialized() + ) + return self.optimizer.clip_grad_norm( + clip_norm, aggregate_norm_fn=agg_norm_fn if should_agg_norm else None + ) + + def cumulative_training_time(self): + if self._cumulative_training_time is None: + # single GPU + return self._local_cumulative_training_time() + else: + return self._cumulative_training_time + + def _local_cumulative_training_time(self): + """Aggregate training time in seconds.""" + return time.time() - self._start_time + self._previous_training_time + + def _fp_convert_sample(self, sample): + def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + + def apply_bfloat16(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.bfloat16) + return t + + if self.cfg.common.fp16: + sample = utils.apply_to_sample(apply_half, sample) + + if self.cfg.common.bf16: + sample = utils.apply_to_sample(apply_bfloat16, sample) + + return sample + + def _prepare_sample(self, sample, is_dummy=False): + if sample == "DUMMY": + raise Exception( + "Trying to use an uninitialized 'dummy' batch. This usually indicates " + "that the total number of batches is smaller than the number of " + "participating GPUs. Try reducing the batch size or using fewer GPUs." + ) + + if sample is None or len(sample) == 0: + assert ( + self._dummy_batch is not None and len(self._dummy_batch) > 0 + ), "Invalid dummy batch: {}".format(self._dummy_batch) + sample, _ = self._prepare_sample(self._dummy_batch, is_dummy=True) + return sample, True + + # Given that PCIe/NVLink bandwidth is significantly smaller than DRAM bandwidth + # it makes sense to do the format conversion on the CPU and then transfer + # a smaller buffer to the device. This also saves GPU memory capacity. + + if self.cfg.common.on_cpu_convert_precision: + sample = self._fp_convert_sample(sample) + + if self.cuda: + if self.pipeline_model_parallel: + if "target" in sample: + sample["target"] = utils.move_to_cuda( + sample["target"], device=self.last_device + ) + else: + sample = utils.move_to_cuda(sample) + elif self.tpu and is_dummy: + # the dummy batch may not be on the appropriate device + sample = utils.move_to_cuda(sample, device=self.device) + + if not self.cfg.common.on_cpu_convert_precision: + sample = self._fp_convert_sample(sample) + + if self._dummy_batch == "DUMMY": + self._dummy_batch = sample + + return sample, False + + def _set_seed(self): + # Set seed based on args.seed and the update number so that we get + # reproducible results when resuming from checkpoints + seed = self.cfg.common.seed + self.get_num_updates() + utils.set_torch_seed(seed) + + def _sync_stats(self): + # Return True if it's using multiple GPUs and DDP or multiple GPUs with + # BMUF and it's a bmuf sync with warmup iterations completed before. + if self.data_parallel_world_size == 1: + return False + elif self.cfg.optimization.use_bmuf: + return ( + self.get_num_updates() + 1 + ) % self.cfg.bmuf.global_sync_iter == 0 and ( + self.get_num_updates() + 1 + ) > self.cfg.bmuf.warmup_iterations + else: + return True + + def _log_oom(self, exc): + msg = "OOM: Ran out of memory with exception: {}".format(exc) + logger.warning(msg) + if torch.cuda.is_available() and hasattr(torch.cuda, "memory_summary"): + for device_idx in range(torch.cuda.device_count()): + logger.warning(torch.cuda.memory_summary(device=device_idx)) + sys.stderr.flush() + + def _aggregate_logging_outputs( + self, + logging_outputs: List[Dict[str, Any]], + *extra_stats_to_sum, + ignore=False, + ): + if self.task.__class__.logging_outputs_can_be_summed(self.get_criterion()): + return self._fast_stat_sync_sum( + logging_outputs, *extra_stats_to_sum, ignore=ignore + ) + else: + return self._all_gather_list_sync( + logging_outputs, *extra_stats_to_sum, ignore=ignore + ) + + def _all_gather_list_sync( + self, + logging_outputs: List[Dict[str, Any]], + *extra_stats_to_sum, + ignore=False, + ): + """ + Sync logging outputs across workers. all_gather_list_sync is + suitable when logging outputs are complex types. + """ + if self.tpu: + raise NotImplementedError + if ignore: + logging_outputs = [] + results = list( + zip( + *distributed_utils.all_gather_list( + [logging_outputs] + list(extra_stats_to_sum), + max_size=getattr(self.cfg.common, "all_gather_list_size", 16384), + group=self.data_parallel_process_group, + ) + ) + ) + logging_outputs, extra_stats_to_sum = results[0], results[1:] + logging_outputs = list(chain.from_iterable(logging_outputs)) + extra_stats_to_sum = [sum(s) for s in extra_stats_to_sum] + return logging_outputs, extra_stats_to_sum + + def _fast_stat_sync_sum( + self, + logging_outputs: List[Dict[str, Any]], + *extra_stats_to_sum, + ignore=False, + ): + """ + Sync logging outputs across workers. fast_stat_sync_sum is + faster than all_gather_list_sync, but is only suitable when + logging outputs are scalars and can be summed. Note that + *logging_outputs* cannot contain any nested dicts/lists. + """ + data = {} + for i, stat in enumerate(extra_stats_to_sum): + data["extra_stats_" + str(i)] = stat + if len(logging_outputs) > 0: + log_keys = list(logging_outputs[0].keys()) + for k in log_keys: + if not ignore: + v = sum(log[k] for log in logging_outputs if k in log) + else: + v = logging_outputs[0][k] + v = torch.zeros_like(v) if torch.is_tensor(v) else 0 + data["logging_outputs_" + k] = v + else: + log_keys = None + + data = distributed_utils.all_reduce_dict( + data, device=self.device, group=self.data_parallel_process_group + ) + + extra_stats_to_sum = [ + data["extra_stats_" + str(i)] for i in range(len(extra_stats_to_sum)) + ] + if log_keys is not None: + logging_outputs = [{k: data["logging_outputs_" + k] for k in log_keys}] + else: + logging_outputs = [] + return logging_outputs, extra_stats_to_sum + + def _check_grad_norms(self, grad_norm): + """Check that grad norms are consistent across workers.""" + if self._grad_norm_buf is not None: + self._grad_norm_buf.zero_() + self._grad_norm_buf[self.data_parallel_rank] = grad_norm + distributed_utils.all_reduce( + self._grad_norm_buf, group=self.data_parallel_process_group + ) + + def is_consistent(tensor): + max_abs_diff = torch.max(torch.abs(tensor - tensor[0])) + return ( + ( + torch.isfinite(tensor).all() + and (max_abs_diff / (tensor[0] + 1e-6) < 1e-6).all() + ) + or (self.cfg.common.amp and not torch.isfinite(tensor).all()) + # in case of amp non-finite grads are fine + ) + + if not is_consistent(self._grad_norm_buf): + pretty_detail = "\n".join( + "rank {:3d} = {:.8f}".format(r, n) + for r, n in enumerate(self._grad_norm_buf.tolist()) + ) + error_detail = "grad_norm across the workers:\n{}\n".format( + pretty_detail + ) + # use FloatingPointError to trigger NanDetector + raise FloatingPointError( + "Fatal error: gradients are inconsistent between workers. " + "Try --ddp-backend=legacy_ddp. " + "Or are you mixing up different generation of GPUs in training?" + + "\n" + + "-" * 80 + + "\n{}\n".format(error_detail) + + "-" * 80 + ) + + def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None): + if grad_norm is not None and ( + not torch.is_tensor(grad_norm) or torch.isfinite(grad_norm) + ): + metrics.log_speed("ups", 1.0, priority=100, round=2) + metrics.log_scalar("gnorm", grad_norm, priority=400, round=3) + if self.cfg.optimization.clip_norm > 0: + metrics.log_scalar( + "clip", + torch.where( + grad_norm > self.cfg.optimization.clip_norm, + grad_norm.new_tensor(100), + grad_norm.new_tensor(0), + ), + priority=500, + round=1, + ) + + with metrics.aggregate() as agg: + if logging_outputs is not None: + self.task.reduce_metrics(logging_outputs, self.get_criterion()) + del logging_outputs + + # extra warning for criterions that don't properly log a loss value + if "loss" not in agg: + if "loss" not in self._warn_once: + self._warn_once.add("loss") + logger.warning( + "Criterion.reduce_metrics did not log a 'loss' value, " + "which may break some functionality" + ) + metrics.log_scalar("loss", -1) + + # support legacy interface + if self.tpu: + logging_output = {} + else: + logging_output = agg.get_smoothed_values() + logging_output["sample_size"] = sample_size + for key_to_delete in ["ppl", "wps", "wpb", "bsz"]: + if key_to_delete in logging_output: + del logging_output[key_to_delete] + return logging_output + + def _check_xla_compilation(self): + import torch_xla.debug.metrics as met + + compile_stats = met.metric_data("CompileTime") + if compile_stats is None: + return + num_xla_compiles = compile_stats[0] + if num_xla_compiles > self._num_xla_compiles: + logger.warning( + "XLA compilation detected on device #{}; too many of these can lead " + "to slow training, but we expect a few in the beginning".format( + self.cfg.distributed_training.distributed_rank + ) + ) + self._num_xla_compiles = num_xla_compiles + + def _xla_markstep_and_send_to_cpu(self, data=None): + import torch_xla.core.xla_model as xm + + xm.mark_step() + if data is not None: + from fairseq.utils import xla_device_to_cpu + + return xla_device_to_cpu(data) + + +def _catalog_shared_params(module, memo=None, prefix=""): + if memo is None: + first_call = True + memo = {} + else: + first_call = False + for name, param in module._parameters.items(): + param_prefix = prefix + ("." if prefix else "") + name + if param not in memo: + memo[param] = [] + memo[param].append(param_prefix) + for name, m in module._modules.items(): + if m is None: + continue + submodule_prefix = prefix + ("." if prefix else "") + name + _catalog_shared_params(m, memo, submodule_prefix) + if first_call: + return [x for x in memo.values() if len(x) > 1] + + +def _get_module_by_path(module, path): + path = path.split(".") + for name in path: + module = getattr(module, name) + return module + + +def _set_module_by_path(module, path, value): + path = path.split(".") + for name in path[:-1]: + module = getattr(module, name) + setattr(module, path[-1], value) diff --git a/fairseq/fairseq/utils.py b/fairseq/fairseq/utils.py new file mode 100644 index 0000000..4d4b350 --- /dev/null +++ b/fairseq/fairseq/utils.py @@ -0,0 +1,951 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import collections +import contextlib +import copy +import importlib +import logging +import os +import sys +import warnings +from itertools import accumulate +from typing import TYPE_CHECKING, Callable, Dict, List, Optional + +import torch +import torch.nn.functional as F +from torch import Tensor + +if TYPE_CHECKING: + from fairseq.modules.multihead_attention import MultiheadAttention + +try: + from amp_C import multi_tensor_l2norm + + multi_tensor_l2norm_available = True +except ImportError: + multi_tensor_l2norm_available = False + +try: + import torch_xla.core.xla_model as xm +except ImportError: + xm = None + + +logger = logging.getLogger(__name__) + + +MANIFOLD_PATH_SEP = "|" + + +class FileContentsAction(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + if nargs is not None: + raise ValueError("nargs not allowed") + super(FileContentsAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + from fairseq.file_io import PathManager + + if PathManager.isfile(values): + with PathManager.open(values) as f: + argument = f.read().strip() + else: + argument = values + setattr(namespace, self.dest, argument) + + +def split_paths(paths: str, separator=os.pathsep) -> List[str]: + return ( + paths.split(separator) if "://" not in paths else paths.split(MANIFOLD_PATH_SEP) + ) + + +def load_ensemble_for_inference(filenames, task, model_arg_overrides=None): + from fairseq import checkpoint_utils + + deprecation_warning( + "utils.load_ensemble_for_inference is deprecated. " + "Please use checkpoint_utils.load_model_ensemble instead." + ) + return checkpoint_utils.load_model_ensemble( + filenames, arg_overrides=model_arg_overrides, task=task + ) + + +def apply_to_sample(f, sample): + if hasattr(sample, "__len__") and len(sample) == 0: + return {} + + def _apply(x): + if torch.is_tensor(x): + return f(x) + elif isinstance(x, collections.OrderedDict): + # OrderedDict has attributes that needs to be preserved + od = collections.OrderedDict( + (key, _apply(value)) for key, value in x.items() + ) + od.__dict__ = x.__dict__ + return od + elif isinstance(x, dict): + return {key: _apply(value) for key, value in x.items()} + elif isinstance(x, list): + return [_apply(x) for x in x] + elif isinstance(x, tuple): + return tuple(_apply(x) for x in x) + elif isinstance(x, set): + return {_apply(x) for x in x} + else: + return x + + return _apply(sample) + + +def move_to_cuda(sample, device=None): + device = device or torch.cuda.current_device() + + def _move_to_cuda(tensor): + # non_blocking is ignored if tensor is not pinned, so we can always set + # to True (see github.com/PyTorchLightning/pytorch-lightning/issues/620) + return tensor.to(device=device, non_blocking=True) + + return apply_to_sample(_move_to_cuda, sample) + + +def move_to_cpu(sample): + def _move_to_cpu(tensor): + # PyTorch has poor support for half tensors (float16) on CPU. + # Move any such tensors to float32. + if tensor.dtype in {torch.bfloat16, torch.float16}: + tensor = tensor.to(dtype=torch.float32) + return tensor.cpu() + + return apply_to_sample(_move_to_cpu, sample) + + +def move_to_tpu(sample): + + import torch_xla.core.xla_model as xm + + device = xm.xla_device() + + def _move_to_tpu(tensor): + return tensor.to(device) + + return apply_to_sample(_move_to_tpu, sample) + + +def get_incremental_state( + module: "MultiheadAttention", + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + key: str, +) -> Optional[Dict[str, Optional[Tensor]]]: + """Helper for getting incremental state for an nn.Module.""" + return module.get_incremental_state(incremental_state, key) + + +def set_incremental_state( + module: "MultiheadAttention", + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], + key: str, + value: Dict[str, Optional[Tensor]], +) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]: + """Helper for setting incremental state for an nn.Module.""" + if incremental_state is not None: + result = module.set_incremental_state(incremental_state, key, value) + if result is not None: + incremental_state = result + return incremental_state + + +def load_align_dict(replace_unk): + if replace_unk is None: + align_dict = None + elif isinstance(replace_unk, str) and len(replace_unk) > 0: + # Load alignment dictionary for unknown word replacement if it was passed as an argument. + align_dict = {} + with open(replace_unk, "r") as f: + for line in f: + cols = line.split() + align_dict[cols[0]] = cols[1] + else: + # No alignment dictionary provided but we still want to perform unknown word replacement by copying the + # original source word. + align_dict = {} + return align_dict + + +def print_embed_overlap(embed_dict, vocab_dict): + embed_keys = set(embed_dict.keys()) + vocab_keys = set(vocab_dict.symbols) + overlap = len(embed_keys & vocab_keys) + logger.info("found {}/{} types in embedding file".format(overlap, len(vocab_dict))) + + +def parse_embedding(embed_path): + """Parse embedding text file into a dictionary of word and embedding tensors. + + The first line can have vocabulary size and dimension. The following lines + should contain word and embedding separated by spaces. + + Example: + 2 5 + the -0.0230 -0.0264 0.0287 0.0171 0.1403 + at -0.0395 -0.1286 0.0275 0.0254 -0.0932 + """ + embed_dict = {} + with open(embed_path) as f_embed: + next(f_embed) # skip header + for line in f_embed: + pieces = line.rstrip().split(" ") + embed_dict[pieces[0]] = torch.Tensor( + [float(weight) for weight in pieces[1:]] + ) + return embed_dict + + +def load_embedding(embed_dict, vocab, embedding): + for idx in range(len(vocab)): + token = vocab[idx] + if token in embed_dict: + embedding.weight.data[idx] = embed_dict[token] + return embedding + + +def replace_unk(hypo_str, src_str, alignment, align_dict, unk): + from fairseq import tokenizer + + # Tokens are strings here + hypo_tokens = tokenizer.tokenize_line(hypo_str) + # TODO: Very rare cases where the replacement is '<eos>' should be handled gracefully + src_tokens = tokenizer.tokenize_line(src_str) + ["<eos>"] + for i, ht in enumerate(hypo_tokens): + if ht == unk: + src_token = src_tokens[alignment[i]] + # Either take the corresponding value in the aligned dictionary or just copy the original value. + hypo_tokens[i] = align_dict.get(src_token, src_token) + return " ".join(hypo_tokens) + + +def post_process_prediction( + hypo_tokens, + src_str, + alignment, + align_dict, + tgt_dict, + remove_bpe=None, + extra_symbols_to_ignore=None, +): + hypo_str = tgt_dict.string( + hypo_tokens, remove_bpe, extra_symbols_to_ignore=extra_symbols_to_ignore + ) + if align_dict is not None: + hypo_str = replace_unk( + hypo_str, src_str, alignment, align_dict, tgt_dict.unk_string() + ) + if align_dict is not None or remove_bpe is not None: + # Convert back to tokens for evaluating with unk replacement or without BPE + # Note that the dictionary can be modified inside the method. + hypo_tokens = tgt_dict.encode_line(hypo_str, add_if_not_exist=True) + return hypo_tokens, hypo_str, alignment + + +def make_positions(tensor, padding_idx: int, onnx_trace: bool = False): + """Replace non-padding symbols with their position numbers. + + Position numbers begin at padding_idx+1. Padding symbols are ignored. + """ + # The series of casts and type-conversions here are carefully + # balanced to both work with ONNX export and XLA. In particular XLA + # prefers ints, cumsum defaults to output longs, and ONNX doesn't know + # how to handle the dtype kwarg in cumsum. + mask = tensor.ne(padding_idx).int() + return (torch.cumsum(mask, dim=1).type_as(mask) * mask).long() + padding_idx + + +def strip_pad(tensor, pad): + return tensor[tensor.ne(pad)] + + +def buffered_arange(max, device="cpu"): + if not hasattr(buffered_arange, "buf"): + buffered_arange.buf = torch.LongTensor().to(device) + if max > buffered_arange.buf.numel(): + buffered_arange.buf.resize_(max) + torch.arange(max, out=buffered_arange.buf) + return buffered_arange.buf[:max] + + +def convert_padding_direction( + src_tokens, padding_idx, right_to_left: bool = False, left_to_right: bool = False +): + assert right_to_left ^ left_to_right + pad_mask = src_tokens.eq(padding_idx) + if not pad_mask.any(): + # no padding, return early + return src_tokens + if left_to_right and not pad_mask[:, 0].any(): + # already right padded + return src_tokens + if right_to_left and not pad_mask[:, -1].any(): + # already left padded + return src_tokens + max_len = src_tokens.size(1) + buffered = torch.empty(0).long() + if max_len > 0: + torch.arange(max_len, out=buffered) + range = buffered.type_as(src_tokens).expand_as(src_tokens) + num_pads = pad_mask.long().sum(dim=1, keepdim=True) + if right_to_left: + index = torch.remainder(range - num_pads, max_len) + else: + index = torch.remainder(range + num_pads, max_len) + return src_tokens.gather(1, index) + + +def item(tensor): + # tpu-comment: making this a no-op for xla devices. + if torch.is_tensor(tensor) and tensor.device.type == "xla": + return tensor.detach() + if hasattr(tensor, "item"): + return tensor.item() + if hasattr(tensor, "__getitem__"): + return tensor[0] + return tensor + + +def multi_tensor_total_norm(grads, chunk_size=2048 * 32) -> torch.Tensor: + per_device_grads = {} + norms = [] + for grad in grads: + device = grad.device + cur_device_grads = per_device_grads.get(device) + if cur_device_grads is None: + cur_device_grads = [] + per_device_grads[device] = cur_device_grads + cur_device_grads.append(grad) + for device in per_device_grads.keys(): + cur_device_grads = per_device_grads[device] + if device.type == "cuda": + # TODO(msb) return has_inf + has_inf = torch.zeros((1, 1), dtype=torch.int, device=device) + with torch.cuda.device(device): + norm = multi_tensor_l2norm( + chunk_size, has_inf, [cur_device_grads], False + ) + norms.append(norm[0].to(torch.cuda.current_device())) + else: + norms += [torch.norm(g, p=2, dtype=torch.float32) for g in cur_device_grads] + total_norm = torch.norm(torch.stack(norms)) + return total_norm + + +@torch.no_grad() +def clip_grad_norm_(params, max_norm, aggregate_norm_fn=None) -> torch.Tensor: + def grad_exists(p): + return p is not None and getattr(p, "grad", None) is not None + + if isinstance(params, torch.Tensor): + params = [params] + params = list(params) + grads = [ + p.grad.detach() for p in params if grad_exists(p) and not hasattr(p, "expert") + ] + expert_grads = [ + p.grad.detach() for p in params if grad_exists(p) and hasattr(p, "expert") + ] + + if len(grads) == 0: + if len(params) > 0: + return params[0].new_tensor(0.0) + else: + return torch.tensor(0.0) + + if len(grads) == 1: + total_norm = torch.norm(grads[0], p=2, dtype=torch.float32) + else: + if multi_tensor_l2norm_available: + total_norm = multi_tensor_total_norm(grads) + else: + if torch.cuda.is_available(): + warnings.warn( + "amp_C fused kernels unavailable, disabling multi_tensor_l2norm; " + "you may get better performance by installing NVIDIA's apex library" + ) + device = torch.cuda.current_device() + elif grads[0].device.type == "xla": + device = grads[0].device + else: + device = torch.device("cpu") + total_norm = torch.norm( + torch.stack( + [torch.norm(g, p=2, dtype=torch.float32).to(device) for g in grads] + ) + ) + + if aggregate_norm_fn is not None: + total_norm = aggregate_norm_fn(total_norm) + + if max_norm > 0: + max_norm = float(max_norm) + clip_coef = (max_norm / (total_norm + 1e-6)).clamp_(max=1) + torch._foreach_mul_(grads + expert_grads, clip_coef) + + return total_norm + + +def fill_with_neg_inf(t): + """FP16-compatible function that fills a tensor with -inf.""" + return t.float().fill_(float("-inf")).type_as(t) + + +def _match_types(arg1, arg2): + """Convert the numerical argument to the same type as the other argument""" + + def upgrade(arg_number, arg_structure): + if isinstance(arg_structure, tuple): + return tuple([arg_number] * len(arg_structure)) + elif isinstance(arg_structure, dict): + arg = copy.deepcopy(arg_structure) + for k in arg: + arg[k] = upgrade(arg_number, arg_structure[k]) + return arg + else: + return arg_number + + if isinstance(arg1, float) or isinstance(arg1, int): + return upgrade(arg1, arg2), arg2 + elif isinstance(arg2, float) or isinstance(arg2, int): + return arg1, upgrade(arg2, arg1) + + return arg1, arg2 + + +def resolve_max_positions(*args): + """Resolve max position constraints from multiple sources.""" + + def map_value_update(d1, d2): + updated_value = copy.deepcopy(d1) + for key in d2: + if key not in updated_value: + updated_value[key] = d2[key] + else: + updated_value[key] = min(d1[key], d2[key]) + return updated_value + + def nullsafe_min(l): + minim = None + for item in l: + if minim is None: + minim = item + elif item is not None and item < minim: + minim = item + return minim + + max_positions = None + for arg in args: + if max_positions is None: + max_positions = arg + elif arg is not None: + max_positions, arg = _match_types(max_positions, arg) + if isinstance(arg, float) or isinstance(arg, int): + max_positions = min(max_positions, arg) + elif isinstance(arg, dict): + max_positions = map_value_update(max_positions, arg) + else: + max_positions = tuple(map(nullsafe_min, zip(max_positions, arg))) + + return max_positions + + +def import_user_module(args): + module_path = getattr(args, "user_dir", None) + if module_path is not None: + module_path = os.path.abspath(args.user_dir) + if not os.path.exists(module_path) and not os.path.isfile( + os.path.dirname(module_path) + ): + fairseq_rel_path = os.path.join(os.path.dirname(__file__), args.user_dir) + if os.path.exists(fairseq_rel_path): + module_path = fairseq_rel_path + else: + fairseq_rel_path = os.path.join( + os.path.dirname(__file__), "..", args.user_dir + ) + if os.path.exists(fairseq_rel_path): + module_path = fairseq_rel_path + else: + raise FileNotFoundError(module_path) + + # ensure that user modules are only imported once + import_user_module.memo = getattr(import_user_module, "memo", set()) + if module_path not in import_user_module.memo: + import_user_module.memo.add(module_path) + + module_parent, module_name = os.path.split(module_path) + if module_name not in sys.modules: + sys.path.insert(0, module_parent) + importlib.import_module(module_name) + + tasks_path = os.path.join(module_path, "tasks") + if os.path.exists(tasks_path): + from fairseq.tasks import import_tasks + + import_tasks(tasks_path, f"{module_name}.tasks") + + models_path = os.path.join(module_path, "models") + if os.path.exists(models_path): + from fairseq.models import import_models + + import_models(models_path, f"{module_name}.models") + elif module_path in sys.modules[module_name].__path__: + logger.info(f"--user-dir={module_path} has already been imported.") + else: + raise ImportError( + "Failed to import --user-dir={} because the corresponding module name " + "({}) is not globally unique. Please rename the directory to " + "something unique and try again.".format(module_path, module_name) + ) + + +def softmax(x, dim: int, onnx_trace: bool = False): + if onnx_trace: + return F.softmax(x.float(), dim=dim) + else: + return F.softmax(x, dim=dim, dtype=torch.float32) + + +def log_softmax(x, dim: int, onnx_trace: bool = False): + if onnx_trace: + return F.log_softmax(x.float(), dim=dim) + else: + return F.log_softmax(x, dim=dim, dtype=torch.float32) + + +def get_perplexity(loss, round=2, base=2): + from fairseq.logging.meters import safe_round + + if loss is None: + return 0.0 + try: + return safe_round(base**loss, round) + except OverflowError: + return float("inf") + + +def deprecation_warning(message, stacklevel=3): + # don't use DeprecationWarning, since it's ignored by default + warnings.warn(message, stacklevel=stacklevel) + + +def relu_squared(x: torch.Tensor): + return F.relu(x).pow(2) + + +def get_activation_fn(activation: str) -> Callable: + """Returns the activation function corresponding to `activation`""" + from fairseq.modules import gelu, gelu_accurate + + if activation == "relu": + return F.relu + elif activation == "relu_squared": + return relu_squared + elif activation == "gelu": + return gelu + elif activation == "gelu_fast": + deprecation_warning( + "--activation-fn=gelu_fast has been renamed to gelu_accurate" + ) + return gelu_accurate + elif activation == "gelu_accurate": + return gelu_accurate + elif activation == "tanh": + return torch.tanh + elif activation == "linear": + return lambda x: x + elif activation == "swish": + return torch.nn.SiLU + else: + raise RuntimeError("--activation-fn {} not supported".format(activation)) + + +def get_available_activation_fns() -> List: + return [ + "relu", + "gelu", + "gelu_fast", # deprecated + "gelu_accurate", + "tanh", + "linear", + ] + + +@contextlib.contextmanager +def model_eval(model): + is_training = model.training + model.eval() + yield + model.train(is_training) + + +def has_parameters(module): + try: + next(module.parameters()) + return True + except StopIteration: + return False + + +def get_rng_state(): + state = {"torch_rng_state": torch.get_rng_state()} + if xm is not None: + state["xla_rng_state"] = xm.get_rng_state() + if torch.cuda.is_available(): + state["cuda_rng_state"] = torch.cuda.get_rng_state() + return state + + +def set_rng_state(state): + torch.set_rng_state(state["torch_rng_state"]) + if xm is not None: + xm.set_rng_state(state["xla_rng_state"]) + if torch.cuda.is_available(): + torch.cuda.set_rng_state(state["cuda_rng_state"]) + + +class set_torch_seed(object): + def __init__(self, seed): + assert isinstance(seed, int) + self.rng_state = get_rng_state() + + torch.manual_seed(seed) + if xm is not None: + xm.set_rng_state(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + + def __enter__(self): + return self + + def __exit__(self, *exc): + set_rng_state(self.rng_state) + + +def parse_alignment(line): + """ + Parses a single line from the alingment file. + + Args: + line (str): String containing the alignment of the format: + <src_idx_1>-<tgt_idx_1> <src_idx_2>-<tgt_idx_2> .. + <src_idx_m>-<tgt_idx_m>. All indices are 0 indexed. + + Returns: + torch.IntTensor: packed alignments of shape (2 * m). + """ + alignments = line.strip().split() + parsed_alignment = torch.IntTensor(2 * len(alignments)) + for idx, alignment in enumerate(alignments): + src_idx, tgt_idx = alignment.split("-") + parsed_alignment[2 * idx] = int(src_idx) + parsed_alignment[2 * idx + 1] = int(tgt_idx) + return parsed_alignment + + +def get_token_to_word_mapping(tokens, exclude_list): + n = len(tokens) + word_start = [int(token not in exclude_list) for token in tokens] + word_idx = list(accumulate(word_start)) + token_to_word = {i: word_idx[i] for i in range(n)} + return token_to_word + + +def extract_hard_alignment(attn, src_sent, tgt_sent, pad, eos): + tgt_valid = ( + ((tgt_sent != pad) & (tgt_sent != eos)).nonzero(as_tuple=False).squeeze(dim=-1) + ) + src_invalid = ( + ((src_sent == pad) | (src_sent == eos)).nonzero(as_tuple=False).squeeze(dim=-1) + ) + src_token_to_word = get_token_to_word_mapping(src_sent, [eos, pad]) + tgt_token_to_word = get_token_to_word_mapping(tgt_sent, [eos, pad]) + alignment = [] + if len(tgt_valid) != 0 and len(src_invalid) < len(src_sent): + attn_valid = attn[tgt_valid] + attn_valid[:, src_invalid] = float("-inf") + _, src_indices = attn_valid.max(dim=1) + for tgt_idx, src_idx in zip(tgt_valid, src_indices): + alignment.append( + ( + src_token_to_word[src_idx.item()] - 1, + tgt_token_to_word[tgt_idx.item()] - 1, + ) + ) + return alignment + + +def extract_soft_alignment(attn, src_sent, tgt_sent, pad, eos): + tgt_valid = ((tgt_sent != pad)).nonzero(as_tuple=False) + src_valid = ((src_sent != pad)).nonzero(as_tuple=False).squeeze(dim=-1) + alignment = [] + if len(tgt_valid) != 0 and len(src_valid) != 0: + attn_valid = attn[tgt_valid, src_valid] + alignment = [ + ["{:.6f}".format(p) for p in src_probs.tolist()] for src_probs in attn_valid + ] + return alignment + + +def new_arange(x, *size): + """ + Return a Tensor of `size` filled with a range function on the device of x. + If size is empty, using the size of the variable x. + """ + if len(size) == 0: + size = x.size() + return torch.arange(size[-1], device=x.device).expand(*size).contiguous() + + +def get_tpu_device(): + return xm.xla_device() + + +def tpu_data_loader(itr): + import torch_xla.core.xla_model as xm + import torch_xla.distributed.parallel_loader as pl + + from fairseq.data import iterators + + xm.rendezvous("tpu_data_loader") # wait for all workers + xm.mark_step() + device = xm.xla_device() + return iterators.CountingIterator( + pl.ParallelLoader(itr, [device]).per_device_loader(device), + start=getattr(itr, "n", 0), + total=len(itr), + ) + + +def is_xla_tensor(tensor): + return torch.is_tensor(tensor) and tensor.device.type == "xla" + + +def index_put(tensor, indices, value): + if is_xla_tensor(tensor): + for _ in range(indices.dim(), tensor.dim()): + indices = indices.unsqueeze(-1) + if indices.size(-1) < tensor.size(-1): + indices = indices.expand_as(tensor) + tensor = torch.mul(tensor, ~indices) + torch.mul(value, indices) + else: + tensor[indices] = value + return tensor + + +def xla_device_to_cpu(dat): + import torch_xla.core.xla_model as xm + + return xm._maybe_convert_to_cpu(dat) + + +class CudaEnvironment(object): + def __init__(self): + cur_device = torch.cuda.current_device() + prop = torch.cuda.get_device_properties("cuda:{}".format(cur_device)) + self.name = prop.name + self.major = prop.major + self.minor = prop.minor + self.total_memory_in_GB = prop.total_memory / 1024 / 1024 / 1024 + + @staticmethod + def pretty_print_cuda_env_list(cuda_env_list): + """ + Given a list of CudaEnviorments, pretty print them + """ + num_workers = len(cuda_env_list) + center = "CUDA enviroments for all {} workers".format(num_workers) + banner_len = 40 - len(center) // 2 + first_line = "*" * banner_len + center + "*" * banner_len + logger.info(first_line) + for r, env in enumerate(cuda_env_list): + logger.info( + "rank {:3d}: ".format(r) + + "capabilities = {:2d}.{:<2d} ; ".format(env.major, env.minor) + + "total memory = {:.3f} GB ; ".format(env.total_memory_in_GB) + + "name = {:40s}".format(env.name) + ) + logger.info(first_line) + + +def csv_str_list(x): + return x.split(",") + + +def eval_str_list(x, type=float): + if x is None: + return None + if isinstance(x, str): + x = eval(x) + try: + return list(map(type, x)) + except TypeError: + return [type(x)] + + +def eval_str_dict(x, type=dict): + if x is None: + return None + if isinstance(x, str): + x = eval(x) + return x + + +def eval_bool(x, default=False): + if x is None: + return default + try: + return bool(eval(x)) + except TypeError: + return default + + +def reset_logging(): + root = logging.getLogger() + for handler in root.handlers: + root.removeHandler(handler) + root.setLevel(os.environ.get("LOGLEVEL", "INFO").upper()) + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter( + logging.Formatter( + fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + ) + root.addHandler(handler) + + +def safe_getattr(obj, k, default=None): + """Returns obj[k] if it exists and is not None, otherwise returns default.""" + from omegaconf import OmegaConf + + if OmegaConf.is_config(obj): + return obj[k] if k in obj and obj[k] is not None else default + + return getattr(obj, k, default) + + +def safe_hasattr(obj, k): + """Returns True if the given key exists and is not None.""" + return getattr(obj, k, None) is not None + + +def hotreload_function(name=None): + """ + Decorator to function to enable hot-reload for debugging. + It allows you to debug a function without having reloading all heavy models, dataset loading and + preprocessing, allow faster debugging. + If you want to change model or dataset loading, consider relaunching your code + ----------------------------------- + This will run the decorated function func: + if func run successful: + It will pause, allow user to edit code, and prompt user to: + Press enter to re-run the function with updated code + Type "done" to finish the function, return output + Type "disable" to stop pausing this function and let code continue without pause + Ctril + C to terminal + if func raise error: + it will prompt user to + 1. Edit code, and press enter to retry + 2. Ctrl + C to terminate + 3. Type "raise" to raise that exception + * Requirements: + 0. Fairseq was installed with `pip install --editable .` + 1. pip install jurigged[develoop] + 2. set environment HOTRELOAD_PAUSE=1 CUDA_LAUNCH_BLOCKING=1 + 3. Run on only 1 GPU (no distributed) + * How to use: + 1. in python, import and decorate the top-level function to be re-run after code edits: + ```python + from fairseq.utils import hotreload_function + .... + @hotreload_function("train_step") + def train_step(self, sample ....): + .... + .... + ``` + 2. in bash run scripts: + ```bash + watch_dir=<home>/fairseq-py/fairseq/tasks # directory to watch for file changes + export CUDA_VISIBLE_DEVICES=0 # single-gpu + HOTRELOAD_PAUSE=1 CUDA_LAUNCH_BLOCKING=1 python -m jurigged -w ${watch_dir} --poll 2 -v train.py ...... + ``` + * NOTE: + 1. -w ${watch_dir} specify all the files to be watched for changes + once functions, class, ... code are changed, all instances in the process will get updated (hot-reload) + * Limitation: + * Currently distributed debugging not working + * Need to launch train.py locally (cannot submit jobs) + """ + try: + import jurigged + except ImportError as e: + logger.warning("Please install jurigged: pip install jurigged[develoop]") + raise e + from fairseq.distributed import utils as distributed_utils + import traceback + + def hotreload_decorator(func): + assert callable(func), f"not callable: {func}" + jname = name or func.__name__ + logger.info(f"jurigged-hotreload:Apply jurigged on {jname}:{func.__name__}") + HOTRELOAD_PAUSE = bool(os.environ.get("HOTRELOAD_PAUSE", 0)) + cublk = bool(os.environ.get("CUDA_LAUNCH_BLOCKING", 0)) + prefix = f"HOTRELOAD:{jname}:[cublk={cublk}]" + hot_reload_state = {"disable": False} + + def func_wrapper(*args, **kwargs): + if not HOTRELOAD_PAUSE or hot_reload_state["disable"]: + return func(*args, **kwargs) + world_size = distributed_utils.get_global_world_size() + assert ( + world_size <= 1 + ), f"HOTRELOAD_PAUSE:{jname} currently cannot do distributed training" + success = False + while not success: + try: + output = func(*args, **kwargs) + # success = True + end_action = input( + f"{prefix}: PAUSE, you may edit code now. Enter to re-run, ctrl+C to terminate, " + f'type "done" to continue (function still being watched), or type "disable" to stop pausing this function :' + ) + if end_action.strip().lower() in ["disable", "done"]: + success = True + else: + logger.warning( + f"{prefix}: action={end_action} function will re-run now." + ) + except Exception as e: + action = input( + f"{prefix}:ERROR: \n{traceback.format_exc()}\n" + f'Edit code to try again: enter to continue, ctrl+C to terminate, or type "raise" to raise the exception: ' + ) + if action.strip().lower() == "raise": + raise e + + if end_action.strip().lower() == "disable": + logger.warning( + f"{prefix}: Stop pausing {jname}. The function is still being watched and newly editted code will take effect " + f"if the {jname} is called again later." + f' "unset HOTRELOAD_PAUSE" before relaunch to disable hotreload and' + f" remove @hotreload_function decorator in the code." + ) + hot_reload_state["disable"] = True + return output + + return func_wrapper + + return hotreload_decorator diff --git a/fairseq/fairseq/version.txt b/fairseq/fairseq/version.txt new file mode 100644 index 0000000..26acbf0 --- /dev/null +++ b/fairseq/fairseq/version.txt @@ -0,0 +1 @@ +0.12.2 diff --git a/fairseq/fairseq_cli/__init__.py b/fairseq/fairseq_cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/fairseq_cli/eval_lm.py b/fairseq/fairseq_cli/eval_lm.py new file mode 100644 index 0000000..dbd1450 --- /dev/null +++ b/fairseq/fairseq_cli/eval_lm.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Evaluate the perplexity of a trained language model. +""" + +import logging +import math +import os +import sys +from argparse import Namespace +from typing import Iterable, List, Optional + +import torch +from omegaconf import DictConfig + +import fairseq +from fairseq import checkpoint_utils, distributed_utils, options, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from fairseq.logging.meters import StopwatchMeter +from fairseq.sequence_scorer import SequenceScorer + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.eval_lm") + + +def eval_lm( + models: List[fairseq.models.FairseqModel], + source_dictionary: fairseq.data.Dictionary, + batch_iterator: Iterable, + post_process: Optional[str] = None, + output_word_probs: bool = False, + output_word_stats: bool = False, + target_dictionary: Optional[fairseq.data.Dictionary] = None, + softmax_batch: int = 0, + remove_bos_token: bool = False, + device: Optional[torch.device] = None, +): + """ + Args: + models (List[~fairseq.models.FairseqModel]): list of models to + evaluate. Models are essentially `nn.Module` instances, but + must be compatible with fairseq's `SequenceScorer`. + source_dictionary (~fairseq.data.Dictionary): dictionary for + applying any relevant post processing or outputing word + probs/stats. + batch_iterator (Iterable): yield batches of data + post_process (Optional[str]): post-process text by removing BPE, + letter segmentation, etc. Valid options can be found in + fairseq.data.utils.post_process, although not all options + are implemented here. + output_word_probs (Optional[bool]): output words and their + predicted log probabilities + output_word_stats (Optional[bool]): output word statistics such + as word count and average probability + target_dictionary (Optional[~fairseq.data.Dictionary]): output + dictionary (defaults to *source_dictionary*) + softmax_batch (Optional[bool]): if BxT is more than this, will + batch the softmax over vocab to this amount of tokens, in + order to fit into GPU memory + remove_bos_token (Optional[bool]): if True, confirm that the + first token is the beginning-of-sentence symbol (according + to the relevant dictionary) and remove it from the output + device (Optional[torch.device]): device to use for evaluation + (defaults to device of first model parameter) + """ + if target_dictionary is None: + target_dictionary = source_dictionary + if device is None: + device = next(models[0].parameters()).device + + gen_timer = StopwatchMeter() + scorer = SequenceScorer(target_dictionary, softmax_batch) + + score_sum = 0.0 + count = 0 + + if post_process is not None: + if post_process in {"subword_nmt", "@@ "}: + bpe_cont = post_process.rstrip() + bpe_toks = { + i + for i in range(len(source_dictionary)) + if source_dictionary[i].endswith(bpe_cont) + } + else: + raise NotImplementedError( + f"--post-process={post_process} is not implemented" + ) + bpe_len = len(bpe_cont) + else: + bpe_toks = None + bpe_len = 0 + + word_stats = dict() + + for sample in batch_iterator: + if "net_input" not in sample: + continue + + sample = utils.move_to_cuda(sample, device=device) + + gen_timer.start() + hypos = scorer.generate(models, sample) + gen_timer.stop(sample["ntokens"]) + + for i, hypos_i in enumerate(hypos): + hypo = hypos_i[0] + sample_id = sample["id"][i] + + tokens = hypo["tokens"] + tgt_len = tokens.numel() + pos_scores = hypo["positional_scores"].float() + + if remove_bos_token: + assert hypo["tokens"][0].item() == target_dictionary.bos() + tokens = tokens[1:] + pos_scores = pos_scores[1:] + + skipped_toks = 0 + if bpe_toks is not None: + for i in range(tgt_len - 1): + if tokens[i].item() in bpe_toks: + skipped_toks += 1 + pos_scores[i + 1] += pos_scores[i] + pos_scores[i] = 0 + + inf_scores = pos_scores.eq(float("inf")) | pos_scores.eq(float("-inf")) + if inf_scores.any(): + logger.info( + "skipping tokens with inf scores:", + target_dictionary.string(tokens[inf_scores.nonzero()]), + ) + pos_scores = pos_scores[(~inf_scores).nonzero()] + score_sum += pos_scores.sum().cpu() + count += pos_scores.numel() - skipped_toks + + if output_word_probs or output_word_stats: + w = "" + word_prob = [] + is_bpe = False + for i in range(len(tokens)): + w_ind = tokens[i].item() + w += source_dictionary[w_ind] + if bpe_toks is not None and w_ind in bpe_toks: + w = w[:-bpe_len] + is_bpe = True + else: + word_prob.append((w, pos_scores[i].item())) + + next_prob = None + ind = i + 1 + while ind < len(tokens): + if pos_scores[ind].item() != 0: + next_prob = pos_scores[ind] + break + ind += 1 + + word_stats.setdefault(w, WordStat(w, is_bpe)).add( + pos_scores[i].item(), next_prob + ) + is_bpe = False + w = "" + if output_word_probs: + logger.info( + str(int(sample_id)) + + " " + + ( + "\t".join( + "{} [{:2f}]".format(x[0], x[1]) for x in word_prob + ) + ) + ) + + avg_nll_loss = ( + -score_sum / count / math.log(2) if count > 0 else 0 + ) # convert to base 2 + logger.info( + "Evaluated {:,} tokens in {:.1f}s ({:.2f} tokens/s)".format( + gen_timer.n, gen_timer.sum, 1.0 / gen_timer.avg if gen_timer.avg > 0 else 0 + ) + ) + + if output_word_stats: + for ws in sorted(word_stats.values(), key=lambda x: x.count, reverse=True): + logger.info(ws) + + return { + "loss": avg_nll_loss, + "perplexity": 2**avg_nll_loss, + } + + +class WordStat(object): + def __init__(self, word, is_bpe): + self.word = word + self.is_bpe = is_bpe + self.log_prob = 0 + self.next_word_prob = 0 + self.count = 0 + self.missing_next_words = 0 + + def add(self, log_prob, next_word_prob): + """increments counters for the sum of log probs of current word and next + word (given context ending at current word). Since the next word might be at the end of the example, + or it might be not counted because it is not an ending subword unit, + also keeps track of how many of those we have seen""" + if next_word_prob is not None: + self.next_word_prob += next_word_prob + else: + self.missing_next_words += 1 + self.log_prob += log_prob + self.count += 1 + + def __str__(self): + return "{}\t{}\t{}\t{}\t{}\t{}".format( + self.word, + self.count, + self.log_prob, + self.is_bpe, + self.next_word_prob, + self.count - self.missing_next_words, + ) + + +def main(cfg: DictConfig, **unused_kwargs): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + utils.import_user_module(cfg.common) + + logger.info(cfg) + + if cfg.eval_lm.context_window > 0: + # reduce tokens per sample by the required context window size + cfg.task.tokens_per_sample -= cfg.eval_lm.context_window + + # Initialize the task using the current *cfg* + task = tasks.setup_task(cfg.task) + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, model_args, task = checkpoint_utils.load_model_ensemble_and_task( + [cfg.common_eval.path], + arg_overrides=eval(cfg.common_eval.model_overrides), + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + task=task, + ) + + use_fp16 = cfg.common.fp16 + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + if use_cuda: + torch.cuda.set_device(cfg.distributed_training.device_id) + + # Optimize ensemble for generation and set the source and dest dicts on the model + # (required by scorer) + for model in models: + if use_fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + assert len(models) > 0 + + logger.info( + "num. model params: {:,}".format(sum(p.numel() for p in models[0].parameters())) + ) + + # Load dataset splits + task.load_dataset(cfg.dataset.gen_subset) + dataset = task.dataset(cfg.dataset.gen_subset) + logger.info( + "{} {} {:,} examples".format( + cfg.task.data, cfg.dataset.gen_subset, len(dataset) + ) + ) + + itr = task.eval_lm_dataloader( + dataset=dataset, + max_tokens=cfg.dataset.max_tokens or 36000, + batch_size=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + *[model.max_positions() for model in models] + ), + num_shards=max( + cfg.dataset.num_shards, + cfg.distributed_training.distributed_world_size, + ), + shard_id=max( + cfg.dataset.shard_id, + cfg.distributed_training.distributed_rank, + ), + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + context_window=cfg.eval_lm.context_window, + ) + + itr = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + results = eval_lm( + models=models, + source_dictionary=task.source_dictionary, + batch_iterator=itr, + post_process=cfg.common_eval.post_process, + output_word_probs=cfg.eval_lm.output_word_probs, + output_word_stats=cfg.eval_lm.output_word_stats, + target_dictionary=task.target_dictionary, + softmax_batch=cfg.eval_lm.softmax_batch, + remove_bos_token=getattr(cfg.task, "add_bos_token", False), + ) + + logger.info( + "Loss (base 2): {:.4f}, Perplexity: {:.2f}".format( + results["loss"], results["perplexity"] + ) + ) + + return results + + +def cli_main(): + parser = options.get_eval_lm_parser() + args = options.parse_args_and_arch(parser) + + distributed_utils.call_main(convert_namespace_to_omegaconf(args), main) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/generate.py b/fairseq/fairseq_cli/generate.py new file mode 100644 index 0000000..b875783 --- /dev/null +++ b/fairseq/fairseq_cli/generate.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Translate pre-processed data with a trained model. +""" + +import ast +import logging +import math +import os +import sys +from argparse import Namespace +from itertools import chain + +import numpy as np +import torch +from omegaconf import DictConfig + +from fairseq import checkpoint_utils, options, scoring, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from fairseq.logging.meters import StopwatchMeter, TimeMeter + + +def main(cfg: DictConfig): + + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + assert cfg.common_eval.path is not None, "--path required for generation!" + assert ( + not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam + ), "--sampling requires --nbest to be equal to --beam" + assert ( + cfg.generation.replace_unk is None or cfg.dataset.dataset_impl == "raw" + ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)" + + if cfg.common_eval.results_path is not None: + os.makedirs(cfg.common_eval.results_path, exist_ok=True) + output_path = os.path.join( + cfg.common_eval.results_path, + "generate-{}.txt".format(cfg.dataset.gen_subset), + ) + with open(output_path, "w", buffering=1, encoding="utf-8") as h: + return _main(cfg, h) + else: + return _main(cfg, sys.stdout) + + +def get_symbols_to_strip_from_output(generator): + if hasattr(generator, "symbols_to_strip_from_output"): + return generator.symbols_to_strip_from_output + else: + return {generator.eos} + + +def _main(cfg: DictConfig, output_file): + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=output_file, + ) + logger = logging.getLogger("fairseq_cli.generate") + + utils.import_user_module(cfg.common) + + if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: + cfg.dataset.max_tokens = 12000 + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + + # Load dataset splits + task = tasks.setup_task(cfg.task) + + # Set dictionaries + try: + src_dict = getattr(task, "source_dictionary", None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + overrides = ast.literal_eval(cfg.common_eval.model_overrides) + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides=overrides, + task=task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + + # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config + task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) + + if cfg.generation.lm_path is not None: + overrides["data"] = cfg.task.data + + try: + lms, _ = checkpoint_utils.load_model_ensemble( + [cfg.generation.lm_path], arg_overrides=overrides, task=None + ) + except: + logger.warning( + f"Failed to load language model! Please make sure that the language model dict is the same " + f"as target dict and is located in the data dir ({cfg.task.data})" + ) + raise + + assert len(lms) == 1 + else: + lms = [None] + + # Optimize ensemble for generation + for model in chain(models, lms): + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(cfg.generation.replace_unk) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(cfg.dataset.gen_subset), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + task.max_positions(), *[m.max_positions() for m in models] + ), + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=cfg.distributed_training.distributed_world_size, + shard_id=cfg.distributed_training.distributed_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + # Initialize generator + gen_timer = StopwatchMeter() + + extra_gen_cls_kwargs = {"lm_model": lms[0], "lm_weight": cfg.generation.lm_weight} + generator = task.build_generator( + models, cfg.generation, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + # Handle tokenization and BPE + tokenizer = task.build_tokenizer(cfg.tokenizer) + bpe = task.build_bpe(cfg.bpe) + + def decode_fn(x): + if bpe is not None: + x = bpe.decode(x) + if tokenizer is not None: + x = tokenizer.decode(x) + return x + + scorer = scoring.build_scorer(cfg.scoring, tgt_dict) + + num_sentences = 0 + has_target = True + wps_meter = TimeMeter() + for sample in progress: + sample = utils.move_to_cuda(sample) if use_cuda else sample + if "net_input" not in sample: + continue + + prefix_tokens = None + if cfg.generation.prefix_size > 0: + prefix_tokens = sample["target"][:, : cfg.generation.prefix_size] + + constraints = None + if "constraints" in sample: + constraints = sample["constraints"] + + gen_timer.start() + hypos = task.inference_step( + generator, + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + ) + num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) + gen_timer.stop(num_generated_tokens) + + for i, sample_id in enumerate(sample["id"].tolist()): + has_target = sample["target"] is not None + + # Remove padding + if "src_tokens" in sample["net_input"]: + src_tokens = utils.strip_pad( + sample["net_input"]["src_tokens"][i, :], tgt_dict.pad() + ) + else: + src_tokens = None + + target_tokens = None + if has_target: + target_tokens = ( + utils.strip_pad(sample["target"][i, :], tgt_dict.pad()).int().cpu() + ) + + # Either retrieve the original sentences or regenerate them from tokens. + if align_dict is not None: + src_str = task.dataset(cfg.dataset.gen_subset).src.get_original_text( + sample_id + ) + target_str = task.dataset(cfg.dataset.gen_subset).tgt.get_original_text( + sample_id + ) + else: + if src_dict is not None: + src_str = src_dict.string(src_tokens, cfg.common_eval.post_process) + else: + src_str = "" + if has_target: + target_str = tgt_dict.string( + target_tokens, + cfg.common_eval.post_process, + escape_unk=True, + extra_symbols_to_ignore=get_symbols_to_strip_from_output( + generator + ), + ) + + src_str = decode_fn(src_str) + if has_target: + target_str = decode_fn(target_str) + + if not cfg.common_eval.quiet: + if src_dict is not None: + print("S-{}\t{}".format(sample_id, src_str), file=output_file) + if has_target: + print("T-{}\t{}".format(sample_id, target_str), file=output_file) + + # Process top predictions + for j, hypo in enumerate(hypos[i][: cfg.generation.nbest]): + hypo_tokens, hypo_str, alignment = utils.post_process_prediction( + hypo_tokens=hypo["tokens"].int().cpu(), + src_str=src_str, + alignment=hypo["alignment"], + align_dict=align_dict, + tgt_dict=tgt_dict, + remove_bpe=cfg.common_eval.post_process, + extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator), + ) + detok_hypo_str = decode_fn(hypo_str) + if not cfg.common_eval.quiet: + score = hypo["score"] / math.log(2) # convert to base 2 + # original hypothesis (after tokenization and BPE) + print( + "H-{}\t{}\t{}".format(sample_id, score, hypo_str), + file=output_file, + ) + # detokenized hypothesis + print( + "D-{}\t{}\t{}".format(sample_id, score, detok_hypo_str), + file=output_file, + ) + print( + "P-{}\t{}".format( + sample_id, + " ".join( + map( + lambda x: "{:.4f}".format(x), + # convert from base e to base 2 + hypo["positional_scores"] + .div_(math.log(2)) + .tolist(), + ) + ), + ), + file=output_file, + ) + + if cfg.generation.print_alignment == "hard": + print( + "A-{}\t{}".format( + sample_id, + " ".join( + [ + "{}-{}".format(src_idx, tgt_idx) + for src_idx, tgt_idx in alignment + ] + ), + ), + file=output_file, + ) + if cfg.generation.print_alignment == "soft": + print( + "A-{}\t{}".format( + sample_id, + " ".join( + [",".join(src_probs) for src_probs in alignment] + ), + ), + file=output_file, + ) + + if cfg.generation.print_step: + print( + "I-{}\t{}".format(sample_id, hypo["steps"]), + file=output_file, + ) + + if cfg.generation.retain_iter_history: + for step, h in enumerate(hypo["history"]): + _, h_str, _ = utils.post_process_prediction( + hypo_tokens=h["tokens"].int().cpu(), + src_str=src_str, + alignment=None, + align_dict=None, + tgt_dict=tgt_dict, + remove_bpe=None, + ) + print( + "E-{}_{}\t{}".format(sample_id, step, h_str), + file=output_file, + ) + + # Score only the top hypothesis + if has_target and j == 0: + if ( + align_dict is not None + or cfg.common_eval.post_process is not None + ): + # Convert back to tokens for evaluation with unk replacement and/or without BPE + target_tokens = tgt_dict.encode_line( + target_str, add_if_not_exist=True + ) + hypo_tokens = tgt_dict.encode_line( + detok_hypo_str, add_if_not_exist=True + ) + if hasattr(scorer, "add_string"): + scorer.add_string(target_str, detok_hypo_str) + else: + scorer.add(target_tokens, hypo_tokens) + + wps_meter.update(num_generated_tokens) + progress.log({"wps": round(wps_meter.avg)}) + num_sentences += ( + sample["nsentences"] if "nsentences" in sample else sample["id"].numel() + ) + + logger.info("NOTE: hypothesis and token scores are output in base 2") + logger.info( + "Translated {:,} sentences ({:,} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)".format( + num_sentences, + gen_timer.n, + gen_timer.sum, + num_sentences / gen_timer.sum, + 1.0 / gen_timer.avg, + ) + ) + if has_target: + if cfg.bpe and not cfg.generation.sacrebleu: + if cfg.common_eval.post_process: + logger.warning( + "BLEU score is being computed by splitting detokenized string on spaces, this is probably not what you want. Use --sacrebleu for standard 13a BLEU tokenization" + ) + else: + logger.warning( + "If you are using BPE on the target side, the BLEU score is computed on BPE tokens, not on proper words. Use --sacrebleu for standard 13a BLEU tokenization" + ) + # use print to be consistent with other main outputs: S-, H-, T-, D- and so on + print( + "Generate {} with beam={}: {}".format( + cfg.dataset.gen_subset, cfg.generation.beam, scorer.result_string() + ), + file=output_file, + ) + + return scorer + + +def cli_main(): + parser = options.get_generation_parser() + # TODO: replace this workaround with refactoring of `AudioPretraining` + parser.add_argument( + "--arch", + "-a", + metavar="ARCH", + default="wav2vec2", + help="Model architecture. For constructing tasks that rely on " + "model args (e.g. `AudioPretraining`)", + ) + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/hydra_train.py b/fairseq/fairseq_cli/hydra_train.py new file mode 100644 index 0000000..607340a --- /dev/null +++ b/fairseq/fairseq_cli/hydra_train.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os + +import hydra +import torch +from hydra.core.hydra_config import HydraConfig +from omegaconf import OmegaConf, open_dict + +from fairseq import distributed_utils, metrics +from fairseq.dataclass.configs import FairseqConfig +from fairseq.dataclass.initialize import add_defaults, hydra_init +from fairseq.dataclass.utils import omegaconf_no_object_check +from fairseq.utils import reset_logging +from fairseq_cli.train import main as pre_main + +logger = logging.getLogger("fairseq_cli.hydra_train") + + +@hydra.main(config_path=os.path.join("..", "fairseq", "config"), config_name="config") +def hydra_main(cfg: FairseqConfig) -> float: + _hydra_main(cfg) + + +def _hydra_main(cfg: FairseqConfig, **kwargs) -> float: + add_defaults(cfg) + + if cfg.common.reset_logging: + reset_logging() # Hydra hijacks logging, fix that + else: + # check if directly called or called through hydra_main + if HydraConfig.initialized(): + with open_dict(cfg): + # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126) + cfg.job_logging_cfg = OmegaConf.to_container( + HydraConfig.get().job_logging, resolve=True + ) + + with omegaconf_no_object_check(): + cfg = OmegaConf.create( + OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + ) + OmegaConf.set_struct(cfg, True) + + try: + if cfg.common.profile: + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + distributed_utils.call_main(cfg, pre_main, **kwargs) + else: + distributed_utils.call_main(cfg, pre_main, **kwargs) + except BaseException as e: + if not cfg.common.suppress_crashes: + raise + else: + logger.error("Crashed! " + str(e)) + + # get best val and return - useful for sweepers + try: + best_val = metrics.get_smoothed_value( + "valid", cfg.checkpoint.best_checkpoint_metric + ) + except: + best_val = None + + if best_val is None: + best_val = float("inf") + + return best_val + + +def cli_main(): + try: + from hydra._internal.utils import get_args + + cfg_name = get_args().config_name or "config" + except: + logger.warning("Failed to get config name from hydra args") + cfg_name = "config" + + hydra_init(cfg_name) + hydra_main() + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/hydra_validate.py b/fairseq/fairseq_cli/hydra_validate.py new file mode 100644 index 0000000..cb6f761 --- /dev/null +++ b/fairseq/fairseq_cli/hydra_validate.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys +from itertools import chain + +import torch +from hydra.core.hydra_config import HydraConfig +from omegaconf import OmegaConf, open_dict +import hydra + +from fairseq import checkpoint_utils, distributed_utils, utils +from fairseq.dataclass.configs import FairseqConfig +from fairseq.dataclass.initialize import add_defaults, hydra_init +from fairseq.dataclass.utils import omegaconf_no_object_check +from fairseq.distributed import utils as distributed_utils +from fairseq.logging import metrics, progress_bar +from fairseq.utils import reset_logging + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.validate") + + +@hydra.main(config_path=os.path.join("..", "fairseq", "config"), config_name="config") +def hydra_main(cfg: FairseqConfig) -> float: + return _hydra_main(cfg) + + +def _hydra_main(cfg: FairseqConfig, **kwargs) -> float: + add_defaults(cfg) + + if cfg.common.reset_logging: + reset_logging() # Hydra hijacks logging, fix that + else: + # check if directly called or called through hydra_main + if HydraConfig.initialized(): + with open_dict(cfg): + # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126) + cfg.job_logging_cfg = OmegaConf.to_container( + HydraConfig.get().job_logging, resolve=True + ) + + with omegaconf_no_object_check(): + cfg = OmegaConf.create( + OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + ) + OmegaConf.set_struct(cfg, True) + + assert ( + cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None + ), "Must specify batch size either with --max-tokens or --batch-size" + + distributed_utils.call_main(cfg, validate, **kwargs) + + +def validate(cfg): + utils.import_user_module(cfg.common) + + use_fp16 = cfg.common.fp16 + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + + if use_cuda: + torch.cuda.set_device(cfg.distributed_training.device_id) + + if cfg.distributed_training.distributed_world_size > 1: + data_parallel_world_size = distributed_utils.get_data_parallel_world_size() + data_parallel_rank = distributed_utils.get_data_parallel_rank() + else: + data_parallel_world_size = 1 + data_parallel_rank = 0 + + overrides = {"task": {"data": cfg.task.data}} + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [cfg.common_eval.path], + arg_overrides=overrides, + suffix=cfg.checkpoint.checkpoint_suffix, + ) + model = models[0] + + # Move models to GPU + for model in models: + model.eval() + if use_fp16: + model.half() + if use_cuda: + model.cuda() + + # Print args + logger.info(saved_cfg) + + # Build criterion + criterion = task.build_criterion(saved_cfg.criterion, from_checkpoint=True) + criterion.eval() + + for subset in cfg.dataset.valid_subset.split(","): + try: + task.load_dataset(subset, combine=False, epoch=1, task_cfg=saved_cfg.task) + dataset = task.dataset(subset) + except KeyError: + raise Exception("Cannot find dataset: " + subset) + + # Initialize data iterator + itr = task.get_batch_iterator( + dataset=dataset, + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + task.max_positions(), + *[m.max_positions() for m in models], + ), + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=data_parallel_world_size, + shard_id=data_parallel_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + prefix=f"valid on '{subset}' subset", + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + + log_outputs = [] + for i, sample in enumerate(progress): + sample = utils.move_to_cuda(sample) if use_cuda else sample + + if use_fp16: + sample = utils.apply_to_sample(apply_half, sample) + + _loss, _sample_size, log_output = task.valid_step(sample, model, criterion) + with metrics.aggregate() as agg: + task.reduce_metrics([log_output], criterion) + progress.log(agg.get_smoothed_values(), step=i) + # progress.log(log_output, step=i) from vision + log_outputs.append(log_output) + + if data_parallel_world_size > 1: + log_outputs = distributed_utils.all_gather_list( + log_outputs, + max_size=cfg.common.all_gather_list_size, + group=distributed_utils.get_data_parallel_group(), + ) + log_outputs = list(chain.from_iterable(log_outputs)) + + with metrics.aggregate() as agg: + task.reduce_metrics(log_outputs, criterion) + log_output = agg.get_smoothed_values() + + progress.print(log_output, tag=subset, step=i) + + +def cli_main(): + try: + from hydra._internal.utils import get_args + + cfg_name = get_args().config_name or "config" + except: + logger.warning("Failed to get config name from hydra args") + cfg_name = "config" + + hydra_init(cfg_name) + hydra_main() + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/interactive.py b/fairseq/fairseq_cli/interactive.py new file mode 100644 index 0000000..03265d0 --- /dev/null +++ b/fairseq/fairseq_cli/interactive.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Translate raw text with a trained model. Batches data on-the-fly. +""" + +import ast +import fileinput +import logging +import math +import os +import sys +import time +from argparse import Namespace +from collections import namedtuple + +import numpy as np +import torch + +from fairseq import checkpoint_utils, distributed_utils, options, tasks, utils +from fairseq.dataclass.configs import FairseqConfig +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.token_generation_constraints import pack_constraints, unpack_constraints +from fairseq_cli.generate import get_symbols_to_strip_from_output + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.interactive") + + +Batch = namedtuple("Batch", "ids src_tokens src_lengths constraints") +Translation = namedtuple("Translation", "src_str hypos pos_scores alignments") + + +def buffered_read(input, buffer_size): + buffer = [] + with fileinput.input(files=[input], openhook=fileinput.hook_encoded("utf-8")) as h: + for src_str in h: + buffer.append(src_str.strip()) + if len(buffer) >= buffer_size: + yield buffer + buffer = [] + + if len(buffer) > 0: + yield buffer + + +def make_batches(lines, cfg, task, max_positions, encode_fn): + def encode_fn_target(x): + return encode_fn(x) + + if cfg.generation.constraints: + # Strip (tab-delimited) contraints, if present, from input lines, + # store them in batch_constraints + batch_constraints = [list() for _ in lines] + for i, line in enumerate(lines): + if "\t" in line: + lines[i], *batch_constraints[i] = line.split("\t") + + # Convert each List[str] to List[Tensor] + for i, constraint_list in enumerate(batch_constraints): + batch_constraints[i] = [ + task.target_dictionary.encode_line( + encode_fn_target(constraint), + append_eos=False, + add_if_not_exist=False, + ) + for constraint in constraint_list + ] + + if cfg.generation.constraints: + constraints_tensor = pack_constraints(batch_constraints) + else: + constraints_tensor = None + + tokens, lengths = task.get_interactive_tokens_and_lengths(lines, encode_fn) + + itr = task.get_batch_iterator( + dataset=task.build_dataset_for_inference( + tokens, lengths, constraints=constraints_tensor + ), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=max_positions, + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + ).next_epoch_itr(shuffle=False) + for batch in itr: + ids = batch["id"] + src_tokens = batch["net_input"]["src_tokens"] + src_lengths = batch["net_input"]["src_lengths"] + constraints = batch.get("constraints", None) + + yield Batch( + ids=ids, + src_tokens=src_tokens, + src_lengths=src_lengths, + constraints=constraints, + ) + + +def main(cfg: FairseqConfig): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + start_time = time.time() + total_translate_time = 0 + + utils.import_user_module(cfg.common) + + if cfg.interactive.buffer_size < 1: + cfg.interactive.buffer_size = 1 + if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: + cfg.dataset.batch_size = 1 + + assert ( + not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam + ), "--sampling requires --nbest to be equal to --beam" + assert ( + not cfg.dataset.batch_size + or cfg.dataset.batch_size <= cfg.interactive.buffer_size + ), "--batch-size cannot be larger than --buffer-size" + + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + + # Setup task, e.g., translation + task = tasks.setup_task(cfg.task) + + # Load ensemble + overrides = ast.literal_eval(cfg.common_eval.model_overrides) + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, _model_args = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides=overrides, + task=task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + + # Set dictionaries + src_dict = task.source_dictionary + tgt_dict = task.target_dictionary + + # Optimize ensemble for generation + for model in models: + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + # Initialize generator + generator = task.build_generator(models, cfg.generation) + + # Handle tokenization and BPE + tokenizer = task.build_tokenizer(cfg.tokenizer) + bpe = task.build_bpe(cfg.bpe) + + def encode_fn(x): + if tokenizer is not None: + x = tokenizer.encode(x) + if bpe is not None: + x = bpe.encode(x) + return x + + def decode_fn(x): + if bpe is not None: + x = bpe.decode(x) + if tokenizer is not None: + x = tokenizer.decode(x) + return x + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(cfg.generation.replace_unk) + + max_positions = utils.resolve_max_positions( + task.max_positions(), *[model.max_positions() for model in models] + ) + + if cfg.generation.constraints: + logger.warning( + "NOTE: Constrained decoding currently assumes a shared subword vocabulary." + ) + + if cfg.interactive.buffer_size > 1: + logger.info("Sentence buffer size: %s", cfg.interactive.buffer_size) + logger.info("NOTE: hypothesis and token scores are output in base 2") + logger.info("Type the input sentence and press return:") + start_id = 0 + for inputs in buffered_read(cfg.interactive.input, cfg.interactive.buffer_size): + results = [] + for batch in make_batches(inputs, cfg, task, max_positions, encode_fn): + bsz = batch.src_tokens.size(0) + src_tokens = batch.src_tokens + src_lengths = batch.src_lengths + constraints = batch.constraints + if use_cuda: + src_tokens = src_tokens.cuda() + src_lengths = src_lengths.cuda() + if constraints is not None: + constraints = constraints.cuda() + + sample = { + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + }, + } + translate_start_time = time.time() + translations = task.inference_step( + generator, models, sample, constraints=constraints + ) + translate_time = time.time() - translate_start_time + total_translate_time += translate_time + list_constraints = [[] for _ in range(bsz)] + if cfg.generation.constraints: + list_constraints = [unpack_constraints(c) for c in constraints] + for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)): + src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad()) + constraints = list_constraints[i] + results.append( + ( + start_id + id, + src_tokens_i, + hypos, + { + "constraints": constraints, + "time": translate_time / len(translations), + }, + ) + ) + + # sort output to match input order + for id_, src_tokens, hypos, info in sorted(results, key=lambda x: x[0]): + src_str = "" + if src_dict is not None: + src_str = src_dict.string(src_tokens, cfg.common_eval.post_process) + print("S-{}\t{}".format(id_, src_str)) + print("W-{}\t{:.3f}\tseconds".format(id_, info["time"])) + for constraint in info["constraints"]: + print( + "C-{}\t{}".format( + id_, + tgt_dict.string(constraint, cfg.common_eval.post_process), + ) + ) + + # Process top predictions + for hypo in hypos[: min(len(hypos), cfg.generation.nbest)]: + hypo_tokens, hypo_str, alignment = utils.post_process_prediction( + hypo_tokens=hypo["tokens"].int().cpu(), + src_str=src_str, + alignment=hypo["alignment"], + align_dict=align_dict, + tgt_dict=tgt_dict, + remove_bpe=cfg.common_eval.post_process, + extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator), + ) + detok_hypo_str = decode_fn(hypo_str) + score = hypo["score"] / math.log(2) # convert to base 2 + # original hypothesis (after tokenization and BPE) + print("H-{}\t{}\t{}".format(id_, score, hypo_str)) + # detokenized hypothesis + print("D-{}\t{}\t{}".format(id_, score, detok_hypo_str)) + print( + "P-{}\t{}".format( + id_, + " ".join( + map( + lambda x: "{:.4f}".format(x), + # convert from base e to base 2 + hypo["positional_scores"].div_(math.log(2)).tolist(), + ) + ), + ) + ) + if cfg.generation.print_alignment: + alignment_str = " ".join( + ["{}-{}".format(src, tgt) for src, tgt in alignment] + ) + print("A-{}\t{}".format(id_, alignment_str)) + + # update running id_ counter + start_id += len(inputs) + + logger.info( + "Total time: {:.3f} seconds; translation time: {:.3f}".format( + time.time() - start_time, total_translate_time + ) + ) + + +def cli_main(): + parser = options.get_interactive_generation_parser() + args = options.parse_args_and_arch(parser) + distributed_utils.call_main(convert_namespace_to_omegaconf(args), main) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/preprocess.py b/fairseq/fairseq_cli/preprocess.py new file mode 100644 index 0000000..2ba9e09 --- /dev/null +++ b/fairseq/fairseq_cli/preprocess.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Data pre-processing: build vocabularies and binarize training data. +""" + +import logging +import os +import shutil +import sys +import typing as tp +from argparse import Namespace +from itertools import zip_longest + +from fairseq import options, tasks, utils +from fairseq.binarizer import ( + AlignmentDatasetBinarizer, + FileBinarizer, + VocabularyDatasetBinarizer, +) +from fairseq.data import Dictionary + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.preprocess") + +##################################################################### +# file name tools +##################################################################### + + +def _train_path(lang, trainpref): + return "{}{}".format(trainpref, ("." + lang) if lang else "") + + +def _file_name(prefix, lang): + fname = prefix + if lang is not None: + fname += ".{lang}".format(lang=lang) + return fname + + +def _dest_path(prefix, lang, destdir): + return os.path.join(destdir, _file_name(prefix, lang)) + + +def _dict_path(lang, destdir): + return _dest_path("dict", lang, destdir) + ".txt" + + +def dataset_dest_prefix(args, output_prefix, lang): + base = os.path.join(args.destdir, output_prefix) + if lang is not None: + lang_part = f".{args.source_lang}-{args.target_lang}.{lang}" + elif args.only_source: + lang_part = "" + else: + lang_part = f".{args.source_lang}-{args.target_lang}" + + return "{}{}".format(base, lang_part) + + +def dataset_dest_file(args, output_prefix, lang, extension): + return "{}.{}".format(dataset_dest_prefix(args, output_prefix, lang), extension) + + +##################################################################### +# dictionary tools +##################################################################### + + +def _build_dictionary( + filenames, + task, + args, + src=False, + tgt=False, +): + assert src ^ tgt + return task.build_dictionary( + filenames, + workers=args.workers, + threshold=args.thresholdsrc if src else args.thresholdtgt, + nwords=args.nwordssrc if src else args.nwordstgt, + padding_factor=args.padding_factor, + ) + + +##################################################################### +# bin file creation logic +##################################################################### + + +def _make_binary_dataset( + vocab: Dictionary, + input_prefix: str, + output_prefix: str, + lang: tp.Optional[str], + num_workers: int, + args: Namespace, +): + logger.info("[{}] Dictionary: {} types".format(lang, len(vocab))) + + binarizer = VocabularyDatasetBinarizer( + vocab, + append_eos=True, + ) + + input_file = "{}{}".format(input_prefix, ("." + lang) if lang is not None else "") + full_output_prefix = dataset_dest_prefix(args, output_prefix, lang) + + final_summary = FileBinarizer.multiprocess_dataset( + input_file, + args.dataset_impl, + binarizer, + full_output_prefix, + vocab_size=len(vocab), + num_workers=num_workers, + ) + + logger.info(f"[{lang}] {input_file}: {final_summary} (by {vocab.unk_word})") + + +def _make_binary_alignment_dataset( + input_prefix: str, output_prefix: str, num_workers: int, args: Namespace +): + + binarizer = AlignmentDatasetBinarizer(utils.parse_alignment) + + input_file = input_prefix + full_output_prefix = dataset_dest_prefix(args, output_prefix, lang=None) + + final_summary = FileBinarizer.multiprocess_dataset( + input_file, + args.dataset_impl, + binarizer, + full_output_prefix, + vocab_size=None, + num_workers=num_workers, + ) + + logger.info( + "[alignments] {}: parsed {} alignments".format( + input_file, final_summary.num_seq + ) + ) + + +##################################################################### +# routing logic +##################################################################### + + +def _make_dataset( + vocab: Dictionary, + input_prefix: str, + output_prefix: str, + lang: tp.Optional[str], + args: Namespace, + num_workers: int, +): + if args.dataset_impl == "raw": + # Copy original text file to destination folder + output_text_file = _dest_path( + output_prefix + ".{}-{}".format(args.source_lang, args.target_lang), + lang, + args.destdir, + ) + shutil.copyfile(_file_name(input_prefix, lang), output_text_file) + else: + _make_binary_dataset( + vocab, input_prefix, output_prefix, lang, num_workers, args + ) + + +def _make_all(lang, vocab, args): + if args.trainpref: + _make_dataset( + vocab, args.trainpref, "train", lang, args=args, num_workers=args.workers + ) + if args.validpref: + for k, validpref in enumerate(args.validpref.split(",")): + outprefix = "valid{}".format(k) if k > 0 else "valid" + _make_dataset( + vocab, validpref, outprefix, lang, args=args, num_workers=args.workers + ) + if args.testpref: + for k, testpref in enumerate(args.testpref.split(",")): + outprefix = "test{}".format(k) if k > 0 else "test" + _make_dataset( + vocab, testpref, outprefix, lang, args=args, num_workers=args.workers + ) + + +def _make_all_alignments(args): + if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix): + _make_binary_alignment_dataset( + args.trainpref + "." + args.align_suffix, + "train.align", + num_workers=args.workers, + args=args, + ) + if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix): + _make_binary_alignment_dataset( + args.validpref + "." + args.align_suffix, + "valid.align", + num_workers=args.workers, + args=args, + ) + if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix): + _make_binary_alignment_dataset( + args.testpref + "." + args.align_suffix, + "test.align", + num_workers=args.workers, + args=args, + ) + + +##################################################################### +# align +##################################################################### + + +def _align_files(args, src_dict, tgt_dict): + assert args.trainpref, "--trainpref must be set if --alignfile is specified" + src_file_name = _train_path(args.source_lang, args.trainpref) + tgt_file_name = _train_path(args.target_lang, args.trainpref) + freq_map = {} + with open(args.alignfile, "r", encoding="utf-8") as align_file: + with open(src_file_name, "r", encoding="utf-8") as src_file: + with open(tgt_file_name, "r", encoding="utf-8") as tgt_file: + for a, s, t in zip_longest(align_file, src_file, tgt_file): + si = src_dict.encode_line(s, add_if_not_exist=False) + ti = tgt_dict.encode_line(t, add_if_not_exist=False) + ai = list(map(lambda x: tuple(x.split("-")), a.split())) + for sai, tai in ai: + srcidx = si[int(sai)] + tgtidx = ti[int(tai)] + if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk(): + assert srcidx != src_dict.pad() + assert srcidx != src_dict.eos() + assert tgtidx != tgt_dict.pad() + assert tgtidx != tgt_dict.eos() + if srcidx not in freq_map: + freq_map[srcidx] = {} + if tgtidx not in freq_map[srcidx]: + freq_map[srcidx][tgtidx] = 1 + else: + freq_map[srcidx][tgtidx] += 1 + align_dict = {} + for srcidx in freq_map.keys(): + align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get) + with open( + os.path.join( + args.destdir, + "alignment.{}-{}.txt".format(args.source_lang, args.target_lang), + ), + "w", + encoding="utf-8", + ) as f: + for k, v in align_dict.items(): + print("{} {}".format(src_dict[k], tgt_dict[v]), file=f) + + +##################################################################### +# MAIN +##################################################################### + + +def main(args): + # setup some basic things + utils.import_user_module(args) + + os.makedirs(args.destdir, exist_ok=True) + + logger.addHandler( + logging.FileHandler( + filename=os.path.join(args.destdir, "preprocess.log"), + ) + ) + logger.info(args) + + assert ( + args.dataset_impl != "huffman" + ), "preprocessing.py doesn't support Huffman yet, use HuffmanCodeBuilder directly." + + # build dictionaries + + target = not args.only_source + + if not args.srcdict and os.path.exists(_dict_path(args.source_lang, args.destdir)): + raise FileExistsError(_dict_path(args.source_lang, args.destdir)) + + if ( + target + and not args.tgtdict + and os.path.exists(_dict_path(args.target_lang, args.destdir)) + ): + raise FileExistsError(_dict_path(args.target_lang, args.destdir)) + + task = tasks.get_task(args.task) + + if args.joined_dictionary: + assert ( + not args.srcdict or not args.tgtdict + ), "cannot use both --srcdict and --tgtdict with --joined-dictionary" + + if args.srcdict: + src_dict = task.load_dictionary(args.srcdict) + elif args.tgtdict: + src_dict = task.load_dictionary(args.tgtdict) + else: + assert ( + args.trainpref + ), "--trainpref must be set if --srcdict is not specified" + src_dict = _build_dictionary( + { + _train_path(lang, args.trainpref) + for lang in [args.source_lang, args.target_lang] + }, + task=task, + args=args, + src=True, + ) + tgt_dict = src_dict + else: + if args.srcdict: + src_dict = task.load_dictionary(args.srcdict) + else: + assert ( + args.trainpref + ), "--trainpref must be set if --srcdict is not specified" + src_dict = _build_dictionary( + [_train_path(args.source_lang, args.trainpref)], + task=task, + args=args, + src=True, + ) + + if target: + if args.tgtdict: + tgt_dict = task.load_dictionary(args.tgtdict) + else: + assert ( + args.trainpref + ), "--trainpref must be set if --tgtdict is not specified" + tgt_dict = _build_dictionary( + [_train_path(args.target_lang, args.trainpref)], + task=task, + args=args, + tgt=True, + ) + else: + tgt_dict = None + + # save dictionaries + + src_dict.save(_dict_path(args.source_lang, args.destdir)) + if target and tgt_dict is not None: + tgt_dict.save(_dict_path(args.target_lang, args.destdir)) + + if args.dict_only: + return + + _make_all(args.source_lang, src_dict, args) + if target: + _make_all(args.target_lang, tgt_dict, args) + + # align the datasets if needed + if args.align_suffix: + _make_all_alignments(args) + + logger.info("Wrote preprocessed data to {}".format(args.destdir)) + + if args.alignfile: + _align_files(args, src_dict=src_dict, tgt_dict=tgt_dict) + + +def cli_main(): + parser = options.get_preprocessing_parser() + args = parser.parse_args() + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/score.py b/fairseq/fairseq_cli/score.py new file mode 100644 index 0000000..0b207be --- /dev/null +++ b/fairseq/fairseq_cli/score.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +BLEU scoring of generated translations against reference translations. +""" + +import argparse +import os +import sys + +from fairseq.data import dictionary +from fairseq.scoring import bleu + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Command-line script for BLEU scoring." + ) + # fmt: off + parser.add_argument('-s', '--sys', default='-', help='system output') + parser.add_argument('-r', '--ref', required=True, help='references') + parser.add_argument('-o', '--order', default=4, metavar='N', + type=int, help='consider ngrams up to this order') + parser.add_argument('--ignore-case', action='store_true', + help='case-insensitive scoring') + parser.add_argument('--sacrebleu', action='store_true', + help='score with sacrebleu') + parser.add_argument('--sentence-bleu', action='store_true', + help='report sentence-level BLEUs (i.e., with +1 smoothing)') + # fmt: on + return parser + + +def cli_main(): + parser = get_parser() + args = parser.parse_args() + print(args) + + assert args.sys == "-" or os.path.exists( + args.sys + ), "System output file {} does not exist".format(args.sys) + assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref) + + dict = dictionary.Dictionary() + + def readlines(fd): + for line in fd.readlines(): + if args.ignore_case: + yield line.lower() + else: + yield line + + if args.sacrebleu: + import sacrebleu + + def score(fdsys): + with open(args.ref) as fdref: + print(sacrebleu.corpus_bleu(fdsys, [fdref]).format()) + + elif args.sentence_bleu: + + def score(fdsys): + with open(args.ref) as fdref: + scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk()) + for i, (sys_tok, ref_tok) in enumerate( + zip(readlines(fdsys), readlines(fdref)) + ): + scorer.reset(one_init=True) + sys_tok = dict.encode_line(sys_tok) + ref_tok = dict.encode_line(ref_tok) + scorer.add(ref_tok, sys_tok) + print(i, scorer.result_string(args.order)) + + else: + + def score(fdsys): + with open(args.ref) as fdref: + scorer = bleu.Scorer( + bleu.BleuConfig( + pad=dict.pad(), + eos=dict.eos(), + unk=dict.unk(), + ) + ) + for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)): + sys_tok = dict.encode_line(sys_tok) + ref_tok = dict.encode_line(ref_tok) + scorer.add(ref_tok, sys_tok) + print(scorer.result_string(args.order)) + + if args.sys == "-": + score(sys.stdin) + else: + with open(args.sys, "r") as f: + score(f) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/train.py b/fairseq/fairseq_cli/train.py new file mode 100644 index 0000000..f771bff --- /dev/null +++ b/fairseq/fairseq_cli/train.py @@ -0,0 +1,581 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Train a new model on one or across multiple GPUs. +""" + +import argparse +import logging +import math +import os +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple + +# We need to setup root logger before importing any fairseq libraries. +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.train") + +import numpy as np +import torch +from omegaconf import DictConfig, OmegaConf + +from fairseq import checkpoint_utils, options, quantization_utils, tasks, utils +from fairseq.data import data_utils, iterators +from fairseq.data.plasma_utils import PlasmaStore +from fairseq.dataclass.configs import FairseqConfig +from fairseq.dataclass.initialize import add_defaults +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.distributed import fsdp_enable_wrap, fsdp_wrap +from fairseq.distributed import utils as distributed_utils +from fairseq.file_io import PathManager +from fairseq.logging import meters, metrics, progress_bar +from fairseq.model_parallel.megatron_trainer import MegatronTrainer +from fairseq.trainer import Trainer + + +def main(cfg: FairseqConfig) -> None: + if isinstance(cfg, argparse.Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + utils.import_user_module(cfg.common) + add_defaults(cfg) + + if ( + distributed_utils.is_master(cfg.distributed_training) + and "job_logging_cfg" in cfg + ): + # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126) + logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg)) + + assert ( + cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None + ), "Must specify batch size either with --max-tokens or --batch-size" + metrics.reset() + + if cfg.common.log_file is not None: + handler = logging.FileHandler(filename=cfg.common.log_file) + logger.addHandler(handler) + + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + if distributed_utils.is_master(cfg.distributed_training): + checkpoint_utils.verify_checkpoint_directory(cfg.checkpoint.save_dir) + + # Print args + logger.info(cfg) + + if cfg.checkpoint.write_checkpoints_asynchronously: + try: + import iopath # noqa: F401 + except ImportError: + logging.exception( + "Asynchronous checkpoint writing is specified but iopath is " + "not installed: `pip install iopath`" + ) + return + + # Setup task, e.g., translation, language modeling, etc. + task = tasks.setup_task(cfg.task) + + assert cfg.criterion, "Please specify criterion to train a model" + + # Build model and criterion + if cfg.distributed_training.ddp_backend == "fully_sharded": + with fsdp_enable_wrap(cfg.distributed_training): + model = fsdp_wrap(task.build_model(cfg.model)) + else: + model = task.build_model(cfg.model) + criterion = task.build_criterion(cfg.criterion) + logger.info(model) + logger.info("task: {}".format(task.__class__.__name__)) + logger.info("model: {}".format(model.__class__.__name__)) + logger.info("criterion: {}".format(criterion.__class__.__name__)) + logger.info( + "num. shared model params: {:,} (num. trained: {:,})".format( + sum( + p.numel() for p in model.parameters() if not getattr(p, "expert", False) + ), + sum( + p.numel() + for p in model.parameters() + if not getattr(p, "expert", False) and p.requires_grad + ), + ) + ) + + logger.info( + "num. expert model params: {} (num. trained: {})".format( + sum(p.numel() for p in model.parameters() if getattr(p, "expert", False)), + sum( + p.numel() + for p in model.parameters() + if getattr(p, "expert", False) and p.requires_grad + ), + ) + ) + + # Load valid dataset (we load training data below, based on the latest checkpoint) + # We load the valid dataset AFTER building the model + if not cfg.dataset.disable_validation: + data_utils.raise_if_valid_subsets_unintentionally_ignored(cfg) + if cfg.dataset.combine_valid_subsets: + task.load_dataset("valid", combine=True, epoch=1) + else: + for valid_sub_split in cfg.dataset.valid_subset.split(","): + task.load_dataset(valid_sub_split, combine=False, epoch=1) + + # (optionally) Configure quantization + if cfg.common.quantization_config_path is not None: + quantizer = quantization_utils.Quantizer( + config_path=cfg.common.quantization_config_path, + max_epoch=cfg.optimization.max_epoch, + max_update=cfg.optimization.max_update, + ) + else: + quantizer = None + + # Build trainer + if cfg.common.model_parallel_size == 1: + trainer = Trainer(cfg, task, model, criterion, quantizer) + else: + trainer = MegatronTrainer(cfg, task, model, criterion) + logger.info( + "training on {} devices (GPUs/TPUs)".format( + cfg.distributed_training.distributed_world_size + ) + ) + logger.info( + "max tokens per device = {} and max sentences per device = {}".format( + cfg.dataset.max_tokens, + cfg.dataset.batch_size, + ) + ) + + # Load the latest checkpoint if one is available and restore the + # corresponding train iterator + extra_state, epoch_itr = checkpoint_utils.load_checkpoint( + cfg.checkpoint, + trainer, + # don't cache epoch iterators for sharded datasets + disable_iterator_cache=task.has_sharded_data("train"), + ) + if cfg.common.tpu: + import torch_xla.core.xla_model as xm + + xm.rendezvous("load_checkpoint") # wait for all workers + + max_epoch = cfg.optimization.max_epoch or math.inf + lr = trainer.get_lr() + + # TODO: a dry run on validation set to pin the memory + valid_subsets = cfg.dataset.valid_subset.split(",") + if not cfg.dataset.disable_validation: + for subset in valid_subsets: + logger.info('begin dry-run validation on "{}" subset'.format(subset)) + itr = trainer.get_valid_iterator(subset).next_epoch_itr( + shuffle=False, set_dataset_epoch=False # use a fixed valid set + ) + if cfg.common.tpu: + itr = utils.tpu_data_loader(itr) + for _ in itr: + pass + # TODO: end of dry run section + + train_meter = meters.StopwatchMeter() + train_meter.start() + while epoch_itr.next_epoch_idx <= max_epoch: + if lr <= cfg.optimization.stop_min_lr: + logger.info( + f"stopping training because current learning rate ({lr}) is smaller " + "than or equal to minimum learning rate " + f"(--stop-min-lr={cfg.optimization.stop_min_lr})" + ) + break + + # train for one epoch + valid_losses, should_stop = train(cfg, trainer, task, epoch_itr) + if should_stop: + break + + # only use first validation loss to update the learning rate + lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0]) + + epoch_itr = trainer.get_train_iterator( + epoch_itr.next_epoch_idx, + # sharded data: get train iterator for next epoch + load_dataset=task.has_sharded_data("train"), + # don't cache epoch iterators for sharded datasets + disable_iterator_cache=task.has_sharded_data("train"), + ) + train_meter.stop() + logger.info("done training in {:.1f} seconds".format(train_meter.sum)) + + # ioPath implementation to wait for all asynchronous file writes to complete. + if cfg.checkpoint.write_checkpoints_asynchronously: + logger.info( + "ioPath PathManager waiting for all asynchronous checkpoint " + "writes to finish." + ) + PathManager.async_close() + logger.info("ioPath PathManager finished waiting.") + + +def should_stop_early(cfg: DictConfig, valid_loss: float) -> bool: + # skip check if no validation was done in the current epoch + if valid_loss is None: + return False + if cfg.checkpoint.patience <= 0: + return False + + def is_better(a, b): + return a > b if cfg.checkpoint.maximize_best_checkpoint_metric else a < b + + prev_best = getattr(should_stop_early, "best", None) + if prev_best is None or is_better(valid_loss, prev_best): + should_stop_early.best = valid_loss + should_stop_early.num_runs = 0 + return False + else: + should_stop_early.num_runs += 1 + if should_stop_early.num_runs >= cfg.checkpoint.patience: + logger.info( + "early stop since valid performance hasn't improved for last {} runs".format( + cfg.checkpoint.patience + ) + ) + return True + else: + return False + + +@metrics.aggregate("train") +def train( + cfg: DictConfig, trainer: Trainer, task: tasks.FairseqTask, epoch_itr +) -> Tuple[List[Optional[float]], bool]: + """Train the model for one epoch and return validation losses.""" + # Initialize data iterator + itr = epoch_itr.next_epoch_itr( + fix_batches_to_gpus=cfg.distributed_training.fix_batches_to_gpus, + shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum), + ) + update_freq = ( + cfg.optimization.update_freq[epoch_itr.epoch - 1] + if epoch_itr.epoch <= len(cfg.optimization.update_freq) + else cfg.optimization.update_freq[-1] + ) + itr = iterators.GroupedIterator( + itr, + update_freq, + skip_remainder_batch=cfg.optimization.skip_remainder_batch, + ) + if cfg.common.tpu: + itr = utils.tpu_data_loader(itr) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_file=cfg.common.log_file, + log_interval=cfg.common.log_interval, + epoch=epoch_itr.epoch, + aim_repo=( + cfg.common.aim_repo + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + aim_run_hash=( + cfg.common.aim_run_hash + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + aim_param_checkpoint_dir=cfg.checkpoint.save_dir, + tensorboard_logdir=( + cfg.common.tensorboard_logdir + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + wandb_project=( + cfg.common.wandb_project + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + wandb_run_name=os.environ.get( + "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir) + ), + azureml_logging=( + cfg.common.azureml_logging + if distributed_utils.is_master(cfg.distributed_training) + else False + ), + ) + progress.update_config(_flatten_config(cfg)) + + trainer.begin_epoch(epoch_itr.epoch) + + valid_subsets = cfg.dataset.valid_subset.split(",") + should_stop = False + num_updates = trainer.get_num_updates() + logger.info("Start iterating over samples") + for i, samples in enumerate(progress): + with metrics.aggregate("train_inner"), torch.autograd.profiler.record_function( + "train_step-%d" % i + ): + log_output = trainer.train_step(samples) + + if log_output is not None: # not OOM, overflow, ... + # log mid-epoch stats + num_updates = trainer.get_num_updates() + if num_updates % cfg.common.log_interval == 0: + stats = get_training_stats(metrics.get_smoothed_values("train_inner")) + progress.log(stats, tag="train_inner", step=num_updates) + + # reset mid-epoch stats after each log interval + # the end-of-epoch stats will still be preserved + metrics.reset_meters("train_inner") + + end_of_epoch = not itr.has_next() + valid_losses, should_stop = validate_and_save( + cfg, trainer, task, epoch_itr, valid_subsets, end_of_epoch + ) + + if should_stop: + break + + # log end-of-epoch stats + logger.info("end of epoch {} (average epoch stats below)".format(epoch_itr.epoch)) + stats = get_training_stats(metrics.get_smoothed_values("train")) + progress.print(stats, tag="train", step=num_updates) + + # reset epoch-level meters + metrics.reset_meters("train") + return valid_losses, should_stop + + +def _flatten_config(cfg: DictConfig): + config = OmegaConf.to_container(cfg) + # remove any legacy Namespaces and replace with a single "args" + namespace = None + for k, v in list(config.items()): + if isinstance(v, argparse.Namespace): + namespace = v + del config[k] + if namespace is not None: + config["args"] = vars(namespace) + return config + + +def validate_and_save( + cfg: DictConfig, + trainer: Trainer, + task: tasks.FairseqTask, + epoch_itr, + valid_subsets: List[str], + end_of_epoch: bool, +) -> Tuple[List[Optional[float]], bool]: + num_updates = trainer.get_num_updates() + max_update = cfg.optimization.max_update or math.inf + + # Stopping conditions (and an additional one based on validation loss later + # on) + should_stop = False + if num_updates >= max_update: + should_stop = True + logger.info( + f"Stopping training due to " + f"num_updates: {num_updates} >= max_update: {max_update}" + ) + + training_time_hours = trainer.cumulative_training_time() / (60 * 60) + if ( + cfg.optimization.stop_time_hours > 0 + and training_time_hours > cfg.optimization.stop_time_hours + ): + should_stop = True + logger.info( + f"Stopping training due to " + f"cumulative_training_time: {training_time_hours} > " + f"stop_time_hours: {cfg.optimization.stop_time_hours} hour(s)" + ) + + do_save = ( + (end_of_epoch and epoch_itr.epoch % cfg.checkpoint.save_interval == 0) + or should_stop + or ( + cfg.checkpoint.save_interval_updates > 0 + and num_updates > 0 + and num_updates % cfg.checkpoint.save_interval_updates == 0 + and num_updates >= cfg.dataset.validate_after_updates + ) + ) + do_validate = ( + ( + (not end_of_epoch and do_save) # validate during mid-epoch saves + or (end_of_epoch and epoch_itr.epoch % cfg.dataset.validate_interval == 0) + or should_stop + or ( + cfg.dataset.validate_interval_updates > 0 + and num_updates > 0 + and num_updates % cfg.dataset.validate_interval_updates == 0 + ) + ) + and not cfg.dataset.disable_validation + and num_updates >= cfg.dataset.validate_after_updates + ) + + # Validate + valid_losses = [None] + if do_validate: + valid_losses = validate(cfg, trainer, task, epoch_itr, valid_subsets) + + should_stop |= should_stop_early(cfg, valid_losses[0]) + + # Save checkpoint + if do_save or should_stop: + cp_path = checkpoint_utils.save_checkpoint( + cfg.checkpoint, trainer, epoch_itr, valid_losses[0] + ) + if cp_path is not None and hasattr(task, "post_save"): + task.post_save(cp_path, num_updates) + + return valid_losses, should_stop + + +def get_training_stats(stats: Dict[str, Any]) -> Dict[str, Any]: + stats["wall"] = round(metrics.get_meter("default", "wall").elapsed_time, 0) + return stats + + +def validate( + cfg: DictConfig, + trainer: Trainer, + task: tasks.FairseqTask, + epoch_itr, + subsets: List[str], +) -> List[Optional[float]]: + """Evaluate the model on the validation set(s) and return the losses.""" + + if cfg.dataset.fixed_validation_seed is not None: + # set fixed seed for every validation + utils.set_torch_seed(cfg.dataset.fixed_validation_seed) + + trainer.begin_valid_epoch(epoch_itr.epoch) + valid_losses = [] + for subset_idx, subset in enumerate(subsets): + logger.info('begin validation on "{}" subset'.format(subset)) + + # Initialize data iterator + itr = trainer.get_valid_iterator(subset).next_epoch_itr( + shuffle=False, set_dataset_epoch=False # use a fixed valid set + ) + if cfg.common.tpu: + itr = utils.tpu_data_loader(itr) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + epoch=epoch_itr.epoch, + prefix=f"valid on '{subset}' subset", + aim_repo=( + cfg.common.aim_repo + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + aim_run_hash=( + cfg.common.aim_run_hash + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + aim_param_checkpoint_dir=cfg.checkpoint.save_dir, + tensorboard_logdir=( + cfg.common.tensorboard_logdir + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + wandb_project=( + cfg.common.wandb_project + if distributed_utils.is_master(cfg.distributed_training) + else None + ), + wandb_run_name=os.environ.get( + "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir) + ), + ) + + # create a new root metrics aggregator so validation metrics + # don't pollute other aggregators (e.g., train meters) + with metrics.aggregate(new_root=True) as agg: + for i, sample in enumerate(progress): + if ( + cfg.dataset.max_valid_steps is not None + and i > cfg.dataset.max_valid_steps + ): + break + trainer.valid_step(sample) + + # log validation stats + # only tracking the best metric on the 1st validation subset + tracking_best = subset_idx == 0 + stats = get_valid_stats(cfg, trainer, agg.get_smoothed_values(), tracking_best) + + if hasattr(task, "post_validate"): + task.post_validate(trainer.get_model(), stats, agg) + + progress.print(stats, tag=subset, step=trainer.get_num_updates()) + + valid_losses.append(stats[cfg.checkpoint.best_checkpoint_metric]) + return valid_losses + + +def get_valid_stats( + cfg: DictConfig, + trainer: Trainer, + stats: Dict[str, Any], + tracking_best: bool, +) -> Dict[str, Any]: + stats["num_updates"] = trainer.get_num_updates() + if tracking_best and hasattr(checkpoint_utils.save_checkpoint, "best"): + key = "best_{0}".format(cfg.checkpoint.best_checkpoint_metric) + best_function = max if cfg.checkpoint.maximize_best_checkpoint_metric else min + stats[key] = best_function( + checkpoint_utils.save_checkpoint.best, + stats[cfg.checkpoint.best_checkpoint_metric], + ) + return stats + + +def cli_main( + modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None +) -> None: + parser = options.get_training_parser() + args = options.parse_args_and_arch(parser, modify_parser=modify_parser) + + cfg = convert_namespace_to_omegaconf(args) + + if cfg.common.use_plasma_view: + server = PlasmaStore(path=cfg.common.plasma_path) + logger.info( + f"Started plasma server pid {server.server.pid} {cfg.common.plasma_path}" + ) + + if args.profile: + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + distributed_utils.call_main(cfg, main) + else: + distributed_utils.call_main(cfg, main) + + # if cfg.common.use_plasma_view: + # server.server.kill() + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/fairseq_cli/validate.py b/fairseq/fairseq_cli/validate.py new file mode 100644 index 0000000..4617b6d --- /dev/null +++ b/fairseq/fairseq_cli/validate.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys +from argparse import Namespace +from itertools import chain + +import torch +from omegaconf import DictConfig + +from fairseq import checkpoint_utils, distributed_utils, options, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import metrics, progress_bar +from fairseq.utils import reset_logging + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("fairseq_cli.validate") + + +def main(cfg: DictConfig, override_args=None): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + utils.import_user_module(cfg.common) + + reset_logging() + + assert ( + cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None + ), "Must specify batch size either with --max-tokens or --batch-size" + + use_fp16 = cfg.common.fp16 + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + + if use_cuda: + torch.cuda.set_device(cfg.distributed_training.device_id) + + if cfg.distributed_training.distributed_world_size > 1: + data_parallel_world_size = distributed_utils.get_data_parallel_world_size() + data_parallel_rank = distributed_utils.get_data_parallel_rank() + else: + data_parallel_world_size = 1 + data_parallel_rank = 0 + + if override_args is not None: + overrides = vars(override_args) + overrides.update(eval(getattr(override_args, "model_overrides", "{}"))) + else: + overrides = None + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [cfg.common_eval.path], + arg_overrides=overrides, + suffix=cfg.checkpoint.checkpoint_suffix, + ) + model = models[0] + + # Move models to GPU + for model in models: + model.eval() + if use_fp16: + model.half() + if use_cuda: + model.cuda() + + # Print args + logger.info(saved_cfg) + + # Build criterion + criterion = task.build_criterion(saved_cfg.criterion) + criterion.eval() + + for subset in cfg.dataset.valid_subset.split(","): + try: + task.load_dataset(subset, combine=False, epoch=1, task_cfg=saved_cfg.task) + dataset = task.dataset(subset) + except KeyError: + raise Exception("Cannot find dataset: " + subset) + + # Initialize data iterator + itr = task.get_batch_iterator( + dataset=dataset, + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + task.max_positions(), + *[m.max_positions() for m in models], + ), + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=data_parallel_world_size, + shard_id=data_parallel_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + prefix=f"valid on '{subset}' subset", + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + log_outputs = [] + for i, sample in enumerate(progress): + sample = utils.move_to_cuda(sample) if use_cuda else sample + _loss, _sample_size, log_output = task.valid_step(sample, model, criterion) + progress.log(log_output, step=i) + log_outputs.append(log_output) + + if data_parallel_world_size > 1: + log_outputs = distributed_utils.all_gather_list( + log_outputs, + max_size=cfg.common.all_gather_list_size, + group=distributed_utils.get_data_parallel_group(), + ) + log_outputs = list(chain.from_iterable(log_outputs)) + + with metrics.aggregate() as agg: + task.reduce_metrics(log_outputs, criterion) + log_output = agg.get_smoothed_values() + + progress.print(log_output, tag=subset, step=i) + + +def cli_main(): + parser = options.get_validation_parser() + args = options.parse_args_and_arch(parser) + + # only override args that are explicitly given on the command line + override_parser = options.get_validation_parser() + override_args = options.parse_args_and_arch(override_parser, suppress_defaults=True) + + distributed_utils.call_main( + convert_namespace_to_omegaconf(args), main, override_args=override_args + ) + + +if __name__ == "__main__": + cli_main() diff --git a/fairseq/hubconf.py b/fairseq/hubconf.py new file mode 100644 index 0000000..5949e27 --- /dev/null +++ b/fairseq/hubconf.py @@ -0,0 +1,73 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""isort:skip_file""" + +import functools +import importlib + + +dependencies = [ + "dataclasses", + "hydra", + "numpy", + "omegaconf", + "regex", + "requests", + "torch", +] + + +# Check for required dependencies and raise a RuntimeError if any are missing. +missing_deps = [] +for dep in dependencies: + try: + importlib.import_module(dep) + except ImportError: + # Hack: the hydra package is provided under the "hydra-core" name in + # pypi. We don't want the user mistakenly calling `pip install hydra` + # since that will install an unrelated package. + if dep == "hydra": + dep = "hydra-core" + missing_deps.append(dep) +if len(missing_deps) > 0: + raise RuntimeError("Missing dependencies: {}".format(", ".join(missing_deps))) + + +# only do fairseq imports after checking for dependencies +from fairseq.hub_utils import ( # noqa; noqa + BPEHubInterface as bpe, + TokenizerHubInterface as tokenizer, +) +from fairseq.models import MODEL_REGISTRY # noqa + + +# torch.hub doesn't build Cython components, so if they are not found then try +# to build them here +try: + import fairseq.data.token_block_utils_fast # noqa +except ImportError: + try: + import cython # noqa + import os + from setuptools import sandbox + + sandbox.run_setup( + os.path.join(os.path.dirname(__file__), "setup.py"), + ["build_ext", "--inplace"], + ) + except ImportError: + print( + "Unable to build Cython components. Please make sure Cython is " + "installed if the torch.hub model you are loading depends on it." + ) + + +# automatically expose models defined in FairseqModel::hub_models +for _model_type, _cls in MODEL_REGISTRY.items(): + for model_name in _cls.hub_models().keys(): + globals()[model_name] = functools.partial( + _cls.from_pretrained, + model_name, + ) diff --git a/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/__init__.py b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/__init__.py new file mode 100644 index 0000000..4884f5b --- /dev/null +++ b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +__version__ = "0.1" diff --git a/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/config.py b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/config.py new file mode 100644 index 0000000..91926c4 --- /dev/null +++ b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/config.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +from dataclasses import dataclass, field + +from hydra.core.config_store import ConfigStore + +from hydra_plugins.hydra_submitit_launcher.config import SlurmQueueConf + + +@dataclass +class DependencySubmititConf(SlurmQueueConf): + """Slurm configuration overrides and specific parameters""" + + _target_: str = ( + "hydra_plugins.dependency_submitit_launcher.launcher.DependencySubmititLauncher" + ) + + +ConfigStore.instance().store( + group="hydra/launcher", + name="dependency_submitit_slurm", + node=DependencySubmititConf(), + provider="dependency_submitit_slurm", +) diff --git a/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/launcher.py b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/launcher.py new file mode 100644 index 0000000..b3fcf79 --- /dev/null +++ b/fairseq/hydra_plugins/dependency_submitit_launcher/hydra_plugins/dependency_submitit_launcher/launcher.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import logging +import os +import subprocess +from pathlib import Path +from typing import Any, List, Sequence + +from hydra.core.singleton import Singleton +from hydra.core.utils import JobReturn, filter_overrides +from omegaconf import OmegaConf + +log = logging.getLogger(__name__) + +from .config import DependencySubmititConf +from hydra_plugins.hydra_submitit_launcher.submitit_launcher import BaseSubmititLauncher + + +class DependencySubmititLauncher(BaseSubmititLauncher): + _EXECUTOR = "slurm" + + def launch( + self, job_overrides: Sequence[Sequence[str]], initial_job_idx: int + ) -> Sequence[JobReturn]: + + # lazy import to ensure plugin discovery remains fast + import submitit + + assert self.config is not None + + num_jobs = len(job_overrides) + assert num_jobs > 0 + + next_script = None + + for jo in job_overrides: + if next_script is None: + for item in jo: + if "next_script=" in item: + next_script = item + break + assert ( + next_script is not None + ), "job overrides must contain +next_script=path/to/next/script" + jo.remove(next_script) + + idx = next_script.find("=") + next_script = next_script[idx + 1 :] + + params = self.params + # build executor + init_params = {"folder": self.params["submitit_folder"]} + specific_init_keys = {"max_num_timeout"} + + init_params.update( + **{ + f"{self._EXECUTOR}_{x}": y + for x, y in params.items() + if x in specific_init_keys + } + ) + init_keys = specific_init_keys | {"submitit_folder"} + executor = submitit.AutoExecutor(cluster=self._EXECUTOR, **init_params) + + # specify resources/parameters + baseparams = set(OmegaConf.structured(DependencySubmititConf).keys()) + params = { + x if x in baseparams else f"{self._EXECUTOR}_{x}": y + for x, y in params.items() + if x not in init_keys + } + executor.update_parameters(**params) + + log.info( + f"Submitit '{self._EXECUTOR}' sweep output dir : " + f"{self.config.hydra.sweep.dir}" + ) + sweep_dir = Path(str(self.config.hydra.sweep.dir)) + sweep_dir.mkdir(parents=True, exist_ok=True) + if "mode" in self.config.hydra.sweep: + mode = int(str(self.config.hydra.sweep.mode), 8) + os.chmod(sweep_dir, mode=mode) + + job_params: List[Any] = [] + for idx, overrides in enumerate(job_overrides): + idx = initial_job_idx + idx + lst = " ".join(filter_overrides(overrides)) + log.info(f"\t#{idx} : {lst}") + job_params.append( + ( + list(overrides), + "hydra.sweep.dir", + idx, + f"job_id_for_{idx}", + Singleton.get_state(), + ) + ) + + jobs = executor.map_array(self, *zip(*job_params)) + + for j, jp in zip(jobs, job_params): + job_id = str(j.job_id) + task_id = "0" if "_" not in job_id else job_id.split("_")[1] + sweep_config = self.config_loader.load_sweep_config(self.config, jp[0]) + dir = sweep_config.hydra.sweep.dir + + dir = ( + dir.replace("[", "") + .replace("]", "") + .replace("{", "") + .replace("}", "") + .replace(",", "_") + .replace("'", "") + .replace('"', "") + ) + + subprocess.call( + [next_script, job_id, task_id, dir], + shell=False, + ) + + return [j.results()[0] for j in jobs] diff --git a/fairseq/hydra_plugins/dependency_submitit_launcher/setup.py b/fairseq/hydra_plugins/dependency_submitit_launcher/setup.py new file mode 100644 index 0000000..bf79546 --- /dev/null +++ b/fairseq/hydra_plugins/dependency_submitit_launcher/setup.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# type: ignore +from pathlib import Path + +from read_version import read_version +from setuptools import find_namespace_packages, setup + +setup( + name="dependency-submitit-launcher", + version=read_version("hydra_plugins/dependency_submitit_launcher", "__init__.py"), + author="Alexei Baevski", + author_email="abaevski@fb.com", + description="Dependency-supporting Submitit Launcher for Hydra apps", + packages=find_namespace_packages(include=["hydra_plugins.*"]), + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Operating System :: MacOS", + "Operating System :: POSIX :: Linux", + "Development Status :: 4 - Beta", + ], + install_requires=[ + "hydra-core>=1.0.4", + "submitit>=1.0.0", + ], + include_package_data=True, +) diff --git a/fairseq/pyproject.toml b/fairseq/pyproject.toml new file mode 100644 index 0000000..4d84c9b --- /dev/null +++ b/fairseq/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = [ + "setuptools>=18.0", + "wheel", + "cython", + "numpy>=1.21.3", + "torch>=1.10", +] +build-backend = "setuptools.build_meta" + +[tool.black] +extend-exclude = ''' +( +^/examples/| +^/fairseq/model_parallel/megatron| +^/build/ +) +''' + +[tool.isort] +profile = "black" +known_third_party = "_cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml" +skip_gitignore = true diff --git a/fairseq/release_utils.py b/fairseq/release_utils.py new file mode 100644 index 0000000..69a5e8d --- /dev/null +++ b/fairseq/release_utils.py @@ -0,0 +1,72 @@ +import argparse +from typing import Tuple + + +def get_next_version(release_type) -> Tuple[Tuple[int, int, int], str, str]: + current_ver = find_version("fairseq/version.txt") + version_list = [int(x) for x in current_ver.strip("'").split(".")] + major, minor, patch = version_list[0], version_list[1], version_list[2] + if release_type == "patch": + patch += 1 + elif release_type == "minor": + minor += 1 + patch = 0 + elif release_type == "major": + major += 1 + minor = patch = 0 + else: + raise ValueError( + "Incorrect release type specified. Acceptable types are major, minor and patch." + ) + + new_version_tuple = (major, minor, patch) + new_version_str = ".".join([str(x) for x in new_version_tuple]) + new_tag_str = "v" + new_version_str + return new_version_tuple, new_version_str, new_tag_str + + +def find_version(version_file_path) -> str: + with open(version_file_path) as f: + version = f.read().strip() + return version + + +def update_version(new_version_str) -> None: + """ + given the current version, update the version to the + next version depending on the type of release. + """ + + with open("fairseq/version.txt", "w") as writer: + writer.write(new_version_str) + + +def main(args): + if args.release_type in ["major", "minor", "patch"]: + new_version_tuple, new_version, new_tag = get_next_version(args.release_type) + else: + raise ValueError("Incorrect release type specified") + + if args.update_version: + update_version(new_version) + + print(new_version, new_tag) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Versioning utils") + parser.add_argument( + "--release-type", + type=str, + required=True, + help="type of release = major/minor/patch", + ) + parser.add_argument( + "--update-version", + action="store_true", + required=False, + help="updates the version in fairseq/version.txt", + ) + + args = parser.parse_args() + main(args) diff --git a/fairseq/scripts/__init__.py b/fairseq/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/scripts/average_checkpoints.py b/fairseq/scripts/average_checkpoints.py new file mode 100644 index 0000000..49f4f9d --- /dev/null +++ b/fairseq/scripts/average_checkpoints.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import collections +import os +import re + +import torch + +from fairseq.file_io import PathManager + + +def average_checkpoints(inputs): + """Loads checkpoints from inputs and returns a model with averaged weights. + + Args: + inputs: An iterable of string paths of checkpoints to load from. + + Returns: + A dict of string keys mapping to various values. The 'model' key + from the returned dict should correspond to an OrderedDict mapping + string parameter names to torch Tensors. + """ + params_dict = collections.OrderedDict() + params_keys = None + new_state = None + num_models = len(inputs) + + for fpath in inputs: + with PathManager.open(fpath, "rb") as f: + state = torch.load( + f, + map_location=( + lambda s, _: torch.serialization.default_restore_location(s, "cpu") + ), + ) + # Copies over the settings from the first checkpoint + if new_state is None: + new_state = state + + model_params = state["model"] + + model_params_keys = list(model_params.keys()) + if params_keys is None: + params_keys = model_params_keys + elif params_keys != model_params_keys: + raise KeyError( + "For checkpoint {}, expected list of params: {}, " + "but found: {}".format(f, params_keys, model_params_keys) + ) + + for k in params_keys: + p = model_params[k] + if isinstance(p, torch.HalfTensor): + p = p.float() + if k not in params_dict: + params_dict[k] = p.clone() + # NOTE: clone() is needed in case of p is a shared parameter + else: + params_dict[k] += p + + averaged_params = collections.OrderedDict() + for k, v in params_dict.items(): + averaged_params[k] = v + if averaged_params[k].is_floating_point(): + averaged_params[k].div_(num_models) + else: + averaged_params[k] //= num_models + new_state["model"] = averaged_params + return new_state + + +def last_n_checkpoints(paths, n, update_based, upper_bound=None): + assert len(paths) == 1 + path = paths[0] + if update_based: + pt_regexp = re.compile(r"checkpoint_\d+_(\d+)\.pt") + else: + pt_regexp = re.compile(r"checkpoint(\d+)\.pt") + files = PathManager.ls(path) + + entries = [] + for f in files: + m = pt_regexp.fullmatch(f) + if m is not None: + sort_key = int(m.group(1)) + if upper_bound is None or sort_key <= upper_bound: + entries.append((sort_key, m.group(0))) + if len(entries) < n: + raise Exception( + "Found {} checkpoint files but need at least {}", len(entries), n + ) + return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)[:n]] + + +def main(): + parser = argparse.ArgumentParser( + description="Tool to average the params of input checkpoints to " + "produce a new checkpoint", + ) + # fmt: off + parser.add_argument('--inputs', required=True, nargs='+', + help='Input checkpoint file paths.') + parser.add_argument('--output', required=True, metavar='FILE', + help='Write the new checkpoint containing the averaged weights to this path.') + num_group = parser.add_mutually_exclusive_group() + num_group.add_argument('--num-epoch-checkpoints', type=int, + help='if set, will try to find checkpoints with names checkpoint_xx.pt in the ' + 'path specified by input, and average last this many of them.') + num_group.add_argument('--num-update-checkpoints', type=int, + help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by' + ' input, and average last this many of them.') + num_group.add_argument('--num-best-checkpoints', type=int, default=0, + help='if set, will try to find checkpoints with names checkpoint_best_ee_xx.pt in the path specified by' + ' input, and average last this many of them.') + parser.add_argument('--checkpoint-upper-bound', type=int, + help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, ' + 'when using --num-update-checkpoints, this will set an upper bound on which update to use' + 'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be' + ' averaged.' + 'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would' + ' be averaged assuming --save-interval-updates 500' + ) + # fmt: on + args = parser.parse_args() + print(args) + + num = None + is_update_based = False + if args.num_update_checkpoints is not None: + num = args.num_update_checkpoints + is_update_based = True + elif args.num_epoch_checkpoints is not None: + num = args.num_epoch_checkpoints + + assert args.checkpoint_upper_bound is None or ( + args.num_epoch_checkpoints is not None + or args.num_update_checkpoints is not None + ), "--checkpoint-upper-bound requires --num-epoch-checkpoints or --num-update-checkpoints" + assert ( + args.num_epoch_checkpoints is None or args.num_update_checkpoints is None + ), "Cannot combine --num-epoch-checkpoints and --num-update-checkpoints" + + if num is not None: + args.inputs = last_n_checkpoints( + args.inputs, + num, + is_update_based, + upper_bound=args.checkpoint_upper_bound, + ) + print("averaging checkpoints: ", args.inputs) + + if args.num_best_checkpoints > 0: + args.inputs = list( + sorted( + args.inputs, + key=lambda x: float( + os.path.basename(x).split("_")[-1].replace(".pt", "") + ), + ) + ) + args.inputs = args.inputs[: args.num_best_checkpoints] + for path in args.inputs: + print(os.path.basename(path)) + new_state = average_checkpoints(args.inputs) + with PathManager.open(args.output, "wb") as f: + torch.save(new_state, f) + print("Finished writing averaged checkpoint to {}".format(args.output)) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/build_sym_alignment.py b/fairseq/scripts/build_sym_alignment.py new file mode 100644 index 0000000..0ca5c18 --- /dev/null +++ b/fairseq/scripts/build_sym_alignment.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Use this script in order to build symmetric alignments for your translation +dataset. +This script depends on fast_align and mosesdecoder tools. You will need to +build those before running the script. +fast_align: + github: http://github.com/clab/fast_align + instructions: follow the instructions in README.md +mosesdecoder: + github: http://github.com/moses-smt/mosesdecoder + instructions: http://www.statmt.org/moses/?n=Development.GetStarted +The script produces the following files under --output_dir: + text.joined - concatenation of lines from the source_file and the + target_file. + align.forward - forward pass of fast_align. + align.backward - backward pass of fast_align. + aligned.sym_heuristic - symmetrized alignment. +""" + +import argparse +import os +from itertools import zip_longest + + +def main(): + parser = argparse.ArgumentParser(description="symmetric alignment builer") + # fmt: off + parser.add_argument('--fast_align_dir', + help='path to fast_align build directory') + parser.add_argument('--mosesdecoder_dir', + help='path to mosesdecoder root directory') + parser.add_argument('--sym_heuristic', + help='heuristic to use for symmetrization', + default='grow-diag-final-and') + parser.add_argument('--source_file', + help='path to a file with sentences ' + 'in the source language') + parser.add_argument('--target_file', + help='path to a file with sentences ' + 'in the target language') + parser.add_argument('--output_dir', + help='output directory') + # fmt: on + args = parser.parse_args() + + fast_align_bin = os.path.join(args.fast_align_dir, "fast_align") + symal_bin = os.path.join(args.mosesdecoder_dir, "bin", "symal") + sym_fast_align_bin = os.path.join( + args.mosesdecoder_dir, "scripts", "ems", "support", "symmetrize-fast-align.perl" + ) + + # create joined file + joined_file = os.path.join(args.output_dir, "text.joined") + with open(args.source_file, "r", encoding="utf-8") as src, open( + args.target_file, "r", encoding="utf-8" + ) as tgt: + with open(joined_file, "w", encoding="utf-8") as joined: + for s, t in zip_longest(src, tgt): + print("{} ||| {}".format(s.strip(), t.strip()), file=joined) + + bwd_align_file = os.path.join(args.output_dir, "align.backward") + + # run forward alignment + fwd_align_file = os.path.join(args.output_dir, "align.forward") + fwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v > {FWD}".format( + FASTALIGN=fast_align_bin, JOINED=joined_file, FWD=fwd_align_file + ) + assert os.system(fwd_fast_align_cmd) == 0 + + # run backward alignment + bwd_align_file = os.path.join(args.output_dir, "align.backward") + bwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v -r > {BWD}".format( + FASTALIGN=fast_align_bin, JOINED=joined_file, BWD=bwd_align_file + ) + assert os.system(bwd_fast_align_cmd) == 0 + + # run symmetrization + sym_out_file = os.path.join(args.output_dir, "aligned") + sym_cmd = "{SYMFASTALIGN} {FWD} {BWD} {SRC} {TGT} {OUT} {HEURISTIC} {SYMAL}".format( + SYMFASTALIGN=sym_fast_align_bin, + FWD=fwd_align_file, + BWD=bwd_align_file, + SRC=args.source_file, + TGT=args.target_file, + OUT=sym_out_file, + HEURISTIC=args.sym_heuristic, + SYMAL=symal_bin, + ) + assert os.system(sym_cmd) == 0 + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/check_installation.py b/fairseq/scripts/check_installation.py new file mode 100644 index 0000000..e5a9d9d --- /dev/null +++ b/fairseq/scripts/check_installation.py @@ -0,0 +1,36 @@ +from pathlib import Path +import os + +cwd = Path(".").resolve() +print("running 'check_installation.py' from:", cwd) + +# Old versions of numpy/torch can prevent loading the .so files +import torch + +print("torch:", torch.__version__) +import numpy + +print("numpy:", numpy.__version__) + +import fairseq + +print("Fairseq installed at:", fairseq.__file__) +import fairseq.criterions +import fairseq.dataclass.configs + +import _imp + +print("Should load following .so suffixes:", _imp.extension_suffixes()) + +so_files = list(Path(fairseq.__file__).parent.glob("*.so")) +so_files.extend(Path(fairseq.__file__).parent.glob("data/*.so")) +print("Found following .so files:") +for so_file in so_files: + print(f"- {so_file}") + +from fairseq import libbleu + +print("Found libbleu at", libbleu.__file__) +from fairseq.data import data_utils_fast + +print("Found data_utils_fast at", data_utils_fast.__file__) diff --git a/fairseq/scripts/compare_namespaces.py b/fairseq/scripts/compare_namespaces.py new file mode 100644 index 0000000..bc24db6 --- /dev/null +++ b/fairseq/scripts/compare_namespaces.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +"""Helper script to compare two argparse.Namespace objects.""" + +from argparse import Namespace # noqa + + +def main(): + + ns1 = eval(input("Namespace 1: ")) + ns2 = eval(input("Namespace 2: ")) + + def keys(ns): + ks = set() + for k in dir(ns): + if not k.startswith("_"): + ks.add(k) + return ks + + k1 = keys(ns1) + k2 = keys(ns2) + + def print_keys(ks, ns1, ns2=None): + for k in ks: + if ns2 is None: + print("{}\t{}".format(k, getattr(ns1, k, None))) + else: + print( + "{}\t{}\t{}".format(k, getattr(ns1, k, None), getattr(ns2, k, None)) + ) + + print("Keys unique to namespace 1:") + print_keys(k1 - k2, ns1) + print() + + print("Keys unique to namespace 2:") + print_keys(k2 - k1, ns2) + print() + + print("Overlapping keys with different values:") + ks = [k for k in k1 & k2 if getattr(ns1, k, "None") != getattr(ns2, k, "None")] + print_keys(ks, ns1, ns2) + print() + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/compound_split_bleu.sh b/fairseq/scripts/compound_split_bleu.sh new file mode 100644 index 0000000..1972fdd --- /dev/null +++ b/fairseq/scripts/compound_split_bleu.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [ $# -ne 1 ]; then + echo "usage: $0 GENERATE_PY_OUTPUT" + exit 1 +fi + +GEN=$1 + +SYS=$GEN.sys +REF=$GEN.ref + +if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then + echo "not done generating" + exit +fi + +grep ^H $GEN | awk -F '\t' '{print $NF}' | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS +grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF +fairseq-score --sys $SYS --ref $REF diff --git a/fairseq/scripts/constraints/extract.py b/fairseq/scripts/constraints/extract.py new file mode 100644 index 0000000..437b373 --- /dev/null +++ b/fairseq/scripts/constraints/extract.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Extracts random constraints from reference files.""" + +import argparse +import random +import sys + + +def get_phrase(words, index, length): + assert index < len(words) - length + 1 + phr = " ".join(words[index : index + length]) + for i in range(index, index + length): + words.pop(index) + return phr + + +def main(args): + + if args.seed: + random.seed(args.seed) + + for line in sys.stdin: + constraints = [] + + def add_constraint(constraint): + constraints.append(constraint) + + source = line.rstrip() + if "\t" in line: + source, target = line.split("\t") + if args.add_sos: + target = f"<s> {target}" + if args.add_eos: + target = f"{target} </s>" + + if len(target.split()) >= args.len: + words = [target] + + num = args.number + + choices = {} + for i in range(num): + if len(words) == 0: + break + segmentno = random.choice(range(len(words))) + segment = words.pop(segmentno) + tokens = segment.split() + phrase_index = random.choice(range(len(tokens))) + choice = " ".join( + tokens[phrase_index : min(len(tokens), phrase_index + args.len)] + ) + for j in range( + phrase_index, min(len(tokens), phrase_index + args.len) + ): + tokens.pop(phrase_index) + if phrase_index > 0: + words.append(" ".join(tokens[0:phrase_index])) + if phrase_index + 1 < len(tokens): + words.append(" ".join(tokens[phrase_index:])) + choices[target.find(choice)] = choice + + # mask out with spaces + target = target.replace(choice, " " * len(choice), 1) + + for key in sorted(choices.keys()): + add_constraint(choices[key]) + + print(source, *constraints, sep="\t") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--number", "-n", type=int, default=1, help="number of phrases") + parser.add_argument("--len", "-l", type=int, default=1, help="phrase length") + parser.add_argument( + "--add-sos", default=False, action="store_true", help="add <s> token" + ) + parser.add_argument( + "--add-eos", default=False, action="store_true", help="add </s> token" + ) + parser.add_argument("--seed", "-s", default=0, type=int) + args = parser.parse_args() + + main(args) diff --git a/fairseq/scripts/constraints/validate.py b/fairseq/scripts/constraints/validate.py new file mode 100644 index 0000000..d531ad9 --- /dev/null +++ b/fairseq/scripts/constraints/validate.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + + +"""Reads in a fairseq output file, and verifies that the constraints +(C- lines) are present in the output (the first H- line). Assumes that +constraints are listed prior to the first hypothesis. +""" + +constraints = [] +found = 0 +total = 0 +for line in sys.stdin: + if line.startswith("C-"): + constraints.append(line.rstrip().split("\t")[1]) + elif line.startswith("H-"): + text = line.split("\t")[2] + + for constraint in constraints: + total += 1 + if constraint in text: + found += 1 + else: + print(f"No {constraint} in {text}", file=sys.stderr) + + constraints = [] + +print(f"Found {found} / {total} = {100 * found / total:.1f}%") diff --git a/fairseq/scripts/convert_dictionary.lua b/fairseq/scripts/convert_dictionary.lua new file mode 100644 index 0000000..14ee8c9 --- /dev/null +++ b/fairseq/scripts/convert_dictionary.lua @@ -0,0 +1,34 @@ +-- Copyright (c) Facebook, Inc. and its affiliates. +-- +-- This source code is licensed under the MIT license found in the +-- LICENSE file in the root directory of this source tree. +-- +-- Usage: convert_dictionary.lua <dict.th7> +require 'fairseq' +require 'torch' +require 'paths' + +if #arg < 1 then + print('usage: convert_dictionary.lua <dict.th7>') + os.exit(1) +end +if not paths.filep(arg[1]) then + print('error: file does not exit: ' .. arg[1]) + os.exit(1) +end + +dict = torch.load(arg[1]) +dst = paths.basename(arg[1]):gsub('.th7', '.txt') +assert(dst:match('.txt$')) + +f = io.open(dst, 'w') +for idx, symbol in ipairs(dict.index_to_symbol) do + if idx > dict.cutoff then + break + end + f:write(symbol) + f:write(' ') + f:write(dict.index_to_freq[idx]) + f:write('\n') +end +f:close() diff --git a/fairseq/scripts/convert_model.lua b/fairseq/scripts/convert_model.lua new file mode 100644 index 0000000..61b9213 --- /dev/null +++ b/fairseq/scripts/convert_model.lua @@ -0,0 +1,108 @@ +-- Copyright (c) Facebook, Inc. and its affiliates. +-- +-- This source code is licensed under the MIT license found in the +-- LICENSE file in the root directory of this source tree. +-- +-- Usage: convert_model.lua <model_epoch1.th7> +require 'torch' +local fairseq = require 'fairseq' + +model = torch.load(arg[1]) + +function find_weight_norm(container, module) + for _, wn in ipairs(container:listModules()) do + if torch.type(wn) == 'nn.WeightNorm' and wn.modules[1] == module then + return wn + end + end +end + +function push_state(dict, key, module) + if torch.type(module) == 'nn.Linear' then + local wn = find_weight_norm(model.module, module) + assert(wn) + dict[key .. '.weight_v'] = wn.v:float() + dict[key .. '.weight_g'] = wn.g:float() + elseif torch.type(module) == 'nn.TemporalConvolutionTBC' then + local wn = find_weight_norm(model.module, module) + assert(wn) + local v = wn.v:float():view(wn.viewOut):transpose(2, 3) + dict[key .. '.weight_v'] = v + dict[key .. '.weight_g'] = wn.g:float():view(module.weight:size(3), 1, 1) + else + dict[key .. '.weight'] = module.weight:float() + end + if module.bias then + dict[key .. '.bias'] = module.bias:float() + end +end + +encoder_dict = {} +decoder_dict = {} +combined_dict = {} + +function encoder_state(encoder) + luts = encoder:findModules('nn.LookupTable') + push_state(encoder_dict, 'embed_tokens', luts[1]) + push_state(encoder_dict, 'embed_positions', luts[2]) + + fcs = encoder:findModules('nn.Linear') + assert(#fcs >= 2) + local nInputPlane = fcs[1].weight:size(1) + push_state(encoder_dict, 'fc1', table.remove(fcs, 1)) + push_state(encoder_dict, 'fc2', table.remove(fcs, #fcs)) + + for i, module in ipairs(encoder:findModules('nn.TemporalConvolutionTBC')) do + push_state(encoder_dict, 'convolutions.' .. tostring(i - 1), module) + if nInputPlane ~= module.weight:size(3) / 2 then + push_state(encoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1)) + end + nInputPlane = module.weight:size(3) / 2 + end + assert(#fcs == 0) +end + +function decoder_state(decoder) + luts = decoder:findModules('nn.LookupTable') + push_state(decoder_dict, 'embed_tokens', luts[1]) + push_state(decoder_dict, 'embed_positions', luts[2]) + + fcs = decoder:findModules('nn.Linear') + local nInputPlane = fcs[1].weight:size(1) + push_state(decoder_dict, 'fc1', table.remove(fcs, 1)) + push_state(decoder_dict, 'fc2', fcs[#fcs - 1]) + push_state(decoder_dict, 'fc3', fcs[#fcs]) + + table.remove(fcs, #fcs) + table.remove(fcs, #fcs) + + for i, module in ipairs(decoder:findModules('nn.TemporalConvolutionTBC')) do + if nInputPlane ~= module.weight:size(3) / 2 then + push_state(decoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1)) + end + nInputPlane = module.weight:size(3) / 2 + + local prefix = 'attention.' .. tostring(i - 1) + push_state(decoder_dict, prefix .. '.in_projection', table.remove(fcs, 1)) + push_state(decoder_dict, prefix .. '.out_projection', table.remove(fcs, 1)) + push_state(decoder_dict, 'convolutions.' .. tostring(i - 1), module) + end + assert(#fcs == 0) +end + + +_encoder = model.module.modules[2] +_decoder = model.module.modules[3] + +encoder_state(_encoder) +decoder_state(_decoder) + +for k, v in pairs(encoder_dict) do + combined_dict['encoder.' .. k] = v +end +for k, v in pairs(decoder_dict) do + combined_dict['decoder.' .. k] = v +end + + +torch.save('state_dict.t7', combined_dict) diff --git a/fairseq/scripts/count_docs.py b/fairseq/scripts/count_docs.py new file mode 100644 index 0000000..58d85af --- /dev/null +++ b/fairseq/scripts/count_docs.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Count the number of documents and average number of lines and tokens per +document in a large file. Documents should be separated by a single empty line. +""" + +import argparse +import gzip +import sys + +import numpy as np + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("--gzip", action="store_true") + args = parser.parse_args() + + def gopen(): + if args.gzip: + return gzip.open(args.input, "r") + else: + return open(args.input, "r", encoding="utf-8") + + num_lines = [] + num_toks = [] + with gopen() as h: + num_docs = 1 + num_lines_in_doc = 0 + num_toks_in_doc = 0 + for i, line in enumerate(h): + if len(line.strip()) == 0: # empty line indicates new document + num_docs += 1 + num_lines.append(num_lines_in_doc) + num_toks.append(num_toks_in_doc) + num_lines_in_doc = 0 + num_toks_in_doc = 0 + else: + num_lines_in_doc += 1 + num_toks_in_doc += len(line.rstrip().split()) + if i % 1000000 == 0: + print(i, file=sys.stderr, end="", flush=True) + elif i % 100000 == 0: + print(".", file=sys.stderr, end="", flush=True) + print(file=sys.stderr, flush=True) + + print("found {} docs".format(num_docs)) + print("average num lines per doc: {}".format(np.mean(num_lines))) + print("average num toks per doc: {}".format(np.mean(num_toks))) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/read_binarized.py b/fairseq/scripts/read_binarized.py new file mode 100644 index 0000000..a414095 --- /dev/null +++ b/fairseq/scripts/read_binarized.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse + +from fairseq.data import Dictionary, data_utils, indexed_dataset + + +def get_parser(): + parser = argparse.ArgumentParser( + description="writes text from binarized file to stdout" + ) + # fmt: off + parser.add_argument('--dataset-impl', help='dataset implementation', + choices=indexed_dataset.get_available_dataset_impl()) + parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None) + parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + dictionary = Dictionary.load(args.dict) if args.dict is not None else None + dataset = data_utils.load_indexed_dataset( + args.input, + dictionary, + dataset_impl=args.dataset_impl, + default="lazy", + ) + + for tensor_line in dataset: + if dictionary is None: + line = " ".join([str(int(x)) for x in tensor_line]) + else: + line = dictionary.string(tensor_line) + + print(line) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/rm_pt.py b/fairseq/scripts/rm_pt.py new file mode 100644 index 0000000..6cd063d --- /dev/null +++ b/fairseq/scripts/rm_pt.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import re +import shutil +import sys + + +pt_regexp = re.compile(r"checkpoint(\d+|_\d+_\d+|_[a-z]+)\.pt") +pt_regexp_epoch_based = re.compile(r"checkpoint(\d+)\.pt") +pt_regexp_update_based = re.compile(r"checkpoint_\d+_(\d+)\.pt") + + +def parse_checkpoints(files): + entries = [] + for f in files: + m = pt_regexp_epoch_based.fullmatch(f) + if m is not None: + entries.append((int(m.group(1)), m.group(0))) + else: + m = pt_regexp_update_based.fullmatch(f) + if m is not None: + entries.append((int(m.group(1)), m.group(0))) + return entries + + +def last_n_checkpoints(files, n): + entries = parse_checkpoints(files) + return [x[1] for x in sorted(entries, reverse=True)[:n]] + + +def every_n_checkpoints(files, n): + entries = parse_checkpoints(files) + return [x[1] for x in sorted(sorted(entries)[::-n])] + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Recursively delete checkpoint files from `root_dir`, " + "but preserve checkpoint_best.pt and checkpoint_last.pt" + ) + ) + parser.add_argument("root_dirs", nargs="*") + parser.add_argument( + "--save-last", type=int, default=0, help="number of last checkpoints to save" + ) + parser.add_argument( + "--save-every", type=int, default=0, help="interval of checkpoints to save" + ) + parser.add_argument( + "--preserve-test", + action="store_true", + help="preserve checkpoints in dirs that start with test_ prefix (default: delete them)", + ) + parser.add_argument( + "--delete-best", action="store_true", help="delete checkpoint_best.pt" + ) + parser.add_argument( + "--delete-last", action="store_true", help="delete checkpoint_last.pt" + ) + parser.add_argument( + "--no-dereference", action="store_true", help="don't dereference symlinks" + ) + args = parser.parse_args() + + files_to_desymlink = [] + files_to_preserve = [] + files_to_delete = [] + for root_dir in args.root_dirs: + for root, _subdirs, files in os.walk(root_dir): + if args.save_last > 0: + to_save = last_n_checkpoints(files, args.save_last) + else: + to_save = [] + if args.save_every > 0: + to_save += every_n_checkpoints(files, args.save_every) + for file in files: + if not pt_regexp.fullmatch(file): + continue + full_path = os.path.join(root, file) + if ( + not os.path.basename(root).startswith("test_") or args.preserve_test + ) and ( + (file == "checkpoint_last.pt" and not args.delete_last) + or (file == "checkpoint_best.pt" and not args.delete_best) + or file in to_save + ): + if os.path.islink(full_path) and not args.no_dereference: + files_to_desymlink.append(full_path) + else: + files_to_preserve.append(full_path) + else: + files_to_delete.append(full_path) + + if len(files_to_desymlink) == 0 and len(files_to_delete) == 0: + print("Nothing to do.") + sys.exit(0) + + files_to_desymlink = sorted(files_to_desymlink) + files_to_preserve = sorted(files_to_preserve) + files_to_delete = sorted(files_to_delete) + + print("Operations to perform (in order):") + if len(files_to_desymlink) > 0: + for file in files_to_desymlink: + print(" - preserve (and dereference symlink): " + file) + if len(files_to_preserve) > 0: + for file in files_to_preserve: + print(" - preserve: " + file) + if len(files_to_delete) > 0: + for file in files_to_delete: + print(" - delete: " + file) + while True: + resp = input("Continue? (Y/N): ") + if resp.strip().lower() == "y": + break + elif resp.strip().lower() == "n": + sys.exit(0) + + print("Executing...") + if len(files_to_desymlink) > 0: + for file in files_to_desymlink: + realpath = os.path.realpath(file) + print("rm " + file) + os.remove(file) + print("cp {} {}".format(realpath, file)) + shutil.copyfile(realpath, file) + if len(files_to_delete) > 0: + for file in files_to_delete: + print("rm " + file) + os.remove(file) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/sacrebleu.sh b/fairseq/scripts/sacrebleu.sh new file mode 100644 index 0000000..c10bf2b --- /dev/null +++ b/fairseq/scripts/sacrebleu.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +if [ $# -ne 4 ]; then + echo "usage: $0 TESTSET SRCLANG TGTLANG GEN" + exit 1 +fi + +TESTSET=$1 +SRCLANG=$2 +TGTLANG=$3 + +GEN=$4 + +if ! command -v sacremoses &> /dev/null +then + echo "sacremoses could not be found, please install with: pip install sacremoses" + exit +fi + +grep ^H $GEN \ +| sed 's/^H\-//' \ +| sort -n -k 1 \ +| cut -f 3 \ +| sacremoses detokenize \ +> $GEN.sorted.detok + +sacrebleu --test-set $TESTSET --language-pair "${SRCLANG}-${TGTLANG}" < $GEN.sorted.detok diff --git a/fairseq/scripts/shard_docs.py b/fairseq/scripts/shard_docs.py new file mode 100644 index 0000000..97232c3 --- /dev/null +++ b/fairseq/scripts/shard_docs.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Split a large file into shards while respecting document boundaries. Documents +should be separated by a single empty line. +""" + +import argparse +import contextlib + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("--num-shards", type=int) + args = parser.parse_args() + + assert args.num_shards is not None and args.num_shards > 1 + + with open(args.input, "r", encoding="utf-8") as h: + with contextlib.ExitStack() as stack: + outputs = [ + stack.enter_context( + open(args.input + ".shard" + str(i), "w", encoding="utf-8") + ) + for i in range(args.num_shards) + ] + + doc = [] + first_doc = [True] * args.num_shards + + def output_doc(i): + if not first_doc[i]: + outputs[i].write("\n") + first_doc[i] = False + for line in doc: + outputs[i].write(line) + doc.clear() + + num_docs = 0 + for line in h: + if line.strip() == "": # empty line indicates new document + output_doc(num_docs % args.num_shards) + num_docs += 1 + else: + doc.append(line) + output_doc(num_docs % args.num_shards) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/split_train_valid_docs.py b/fairseq/scripts/split_train_valid_docs.py new file mode 100644 index 0000000..ff15978 --- /dev/null +++ b/fairseq/scripts/split_train_valid_docs.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Split a large file into a train and valid set while respecting document +boundaries. Documents should be separated by a single empty line. +""" + +import argparse +import random +import sys + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("sample_output", help="train output file") + parser.add_argument("remainder_output", help="valid output file") + parser.add_argument("-k", type=int, help="remainder size") + parser.add_argument( + "--lines", action="store_true", help="split lines instead of docs" + ) + args = parser.parse_args() + + assert args.k is not None + + sample = [] + remainder = [] + num_docs = [0] + + def update_sample(doc): + if len(sample) < args.k: + sample.append(doc.copy()) + else: + i = num_docs[0] + j = random.randrange(i + 1) + if j < args.k: + remainder.append(sample[j]) + sample[j] = doc.copy() + else: + remainder.append(doc.copy()) + num_docs[0] += 1 + doc.clear() + + with open(args.input, "r", encoding="utf-8") as h: + doc = [] + for i, line in enumerate(h): + if line.strip() == "": # empty line indicates new document + update_sample(doc) + else: + doc.append(line) + if args.lines: + update_sample(doc) + if i % 1000000 == 0: + print(i, file=sys.stderr, end="", flush=True) + elif i % 100000 == 0: + print(".", file=sys.stderr, end="", flush=True) + if len(doc) > 0: + update_sample(doc) + print(file=sys.stderr, flush=True) + + assert len(sample) == args.k + + with open(args.sample_output, "w", encoding="utf-8") as out: + first = True + for doc in sample: + if not first and not args.lines: + out.write("\n") + first = False + for line in doc: + out.write(line) + + with open(args.remainder_output, "w", encoding="utf-8") as out: + first = True + for doc in remainder: + if not first and not args.lines: + out.write("\n") + first = False + for line in doc: + out.write(line) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/spm_decode.py b/fairseq/scripts/spm_decode.py new file mode 100644 index 0000000..7d7b68b --- /dev/null +++ b/fairseq/scripts/spm_decode.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse + +import sentencepiece as spm + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="sentencepiece model to use for decoding" + ) + parser.add_argument("--input", required=True, help="input file to decode") + parser.add_argument("--input_format", choices=["piece", "id"], default="piece") + args = parser.parse_args() + + sp = spm.SentencePieceProcessor() + sp.Load(args.model) + + if args.input_format == "piece": + + def decode(input): + return "".join(sp.DecodePieces(input)) + + elif args.input_format == "id": + + def decode(input): + return "".join(sp.DecodeIds(input)) + + else: + raise NotImplementedError + + def tok2int(tok): + # remap reference-side <unk> (represented as <<unk>>) to 0 + return int(tok) if tok != "<<unk>>" else 0 + + with open(args.input, "r", encoding="utf-8") as h: + for line in h: + if args.input_format == "id": + print(decode(list(map(tok2int, line.rstrip().split())))) + elif args.input_format == "piece": + print(decode(line.rstrip().split())) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/spm_encode.py b/fairseq/scripts/spm_encode.py new file mode 100644 index 0000000..f91e0bb --- /dev/null +++ b/fairseq/scripts/spm_encode.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import contextlib +import sys + +import sentencepiece as spm + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="sentencepiece model to use for encoding" + ) + parser.add_argument( + "--inputs", nargs="+", default=["-"], help="input files to filter/encode" + ) + parser.add_argument( + "--outputs", nargs="+", default=["-"], help="path to save encoded outputs" + ) + parser.add_argument("--output_format", choices=["piece", "id"], default="piece") + parser.add_argument( + "--min-len", + type=int, + metavar="N", + help="filter sentence pairs with fewer than N tokens", + ) + parser.add_argument( + "--max-len", + type=int, + metavar="N", + help="filter sentence pairs with more than N tokens", + ) + args = parser.parse_args() + + assert len(args.inputs) == len( + args.outputs + ), "number of input and output paths should match" + + sp = spm.SentencePieceProcessor() + sp.Load(args.model) + + if args.output_format == "piece": + + def encode(input): + return sp.EncodeAsPieces(input) + + elif args.output_format == "id": + + def encode(input): + return list(map(str, sp.EncodeAsIds(input))) + + else: + raise NotImplementedError + + if args.min_len is not None or args.max_len is not None: + + def valid(line): + return (args.min_len is None or len(line) >= args.min_len) and ( + args.max_len is None or len(line) <= args.max_len + ) + + else: + + def valid(lines): + return True + + with contextlib.ExitStack() as stack: + inputs = [ + stack.enter_context(open(input, "r", encoding="utf-8")) + if input != "-" + else sys.stdin + for input in args.inputs + ] + outputs = [ + stack.enter_context(open(output, "w", encoding="utf-8")) + if output != "-" + else sys.stdout + for output in args.outputs + ] + + stats = { + "num_empty": 0, + "num_filtered": 0, + } + + def encode_line(line): + line = line.strip() + if len(line) > 0: + line = encode(line) + if valid(line): + return line + else: + stats["num_filtered"] += 1 + else: + stats["num_empty"] += 1 + return None + + for i, lines in enumerate(zip(*inputs), start=1): + enc_lines = list(map(encode_line, lines)) + if not any(enc_line is None for enc_line in enc_lines): + for enc_line, output_h in zip(enc_lines, outputs): + print(" ".join(enc_line), file=output_h) + if i % 10000 == 0: + print("processed {} lines".format(i), file=sys.stderr) + + print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr) + print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/fairseq/scripts/spm_train.py b/fairseq/scripts/spm_train.py new file mode 100644 index 0000000..9db668f --- /dev/null +++ b/fairseq/scripts/spm_train.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys + +import sentencepiece as spm + + +if __name__ == "__main__": + spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:])) diff --git a/fairseq/scripts/test_fsdp.sh b/fairseq/scripts/test_fsdp.sh new file mode 100644 index 0000000..1f428a0 --- /dev/null +++ b/fairseq/scripts/test_fsdp.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +rm -rf fsdp_dummy +mkdir -p fsdp_dummy +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 256 --batch-size 8 \ + --arch transformer_lm_gpt2_tiny \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 5 --log-format json --log-interval 1 \ + --save-interval-updates 5 --save-dir fsdp_dummy --disable-validation \ + --restore-file x.pt "$@" + +# Now we try to load the checkpoint +CUDA_VISIBLE_DEVICES=0,1 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 256 --batch-size 8 \ + --arch transformer_lm_gpt2_tiny \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 2 --log-format json --log-interval 1 \ + --save-interval-updates 2 --save-dir fsdp_dummy diff --git a/fairseq/setup.cfg b/fairseq/setup.cfg new file mode 100644 index 0000000..3fa679d --- /dev/null +++ b/fairseq/setup.cfg @@ -0,0 +1,4 @@ +[flake8] +max-line-length = 127 +extend-ignore = E203, W503 +extend-exclude = fairseq/model_parallel/megatron diff --git a/fairseq/setup.py b/fairseq/setup.py new file mode 100644 index 0000000..dae0608 --- /dev/null +++ b/fairseq/setup.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import subprocess +import sys + +from setuptools import Extension, find_packages, setup +from torch.utils import cpp_extension + +if sys.version_info < (3, 6): + sys.exit("Sorry, Python >= 3.6 is required for fairseq.") + + +def write_version_py(): + with open(os.path.join("fairseq", "version.txt")) as f: + version = f.read().strip() + + # write version info to fairseq/version.py + with open(os.path.join("fairseq", "version.py"), "w") as f: + f.write('__version__ = "{}"\n'.format(version)) + return version + + +version = write_version_py() + + +with open("README.md") as f: + readme = f.read() + + +if sys.platform == "darwin": + extra_compile_args = ["-stdlib=libc++", "-O3"] +else: + extra_compile_args = ["-std=c++11", "-O3"] + + +class NumpyExtension(Extension): + """Source: https://stackoverflow.com/a/54128391""" + + def __init__(self, *args, **kwargs): + self.__include_dirs = [] + super().__init__(*args, **kwargs) + + @property + def include_dirs(self): + import numpy + + return self.__include_dirs + [numpy.get_include()] + + @include_dirs.setter + def include_dirs(self, dirs): + self.__include_dirs = dirs + + +extensions = [ + Extension( + "fairseq.libbleu", + sources=[ + "fairseq/clib/libbleu/libbleu.cpp", + "fairseq/clib/libbleu/module.cpp", + ], + extra_compile_args=extra_compile_args, + ), + NumpyExtension( + "fairseq.data.data_utils_fast", + sources=["fairseq/data/data_utils_fast.pyx"], + language="c++", + extra_compile_args=extra_compile_args, + ), + NumpyExtension( + "fairseq.data.token_block_utils_fast", + sources=["fairseq/data/token_block_utils_fast.pyx"], + language="c++", + extra_compile_args=extra_compile_args, + ), +] + + +extensions.extend( + [ + cpp_extension.CppExtension( + "fairseq.libbase", + sources=[ + "fairseq/clib/libbase/balanced_assignment.cpp", + ], + ), + cpp_extension.CppExtension( + "fairseq.libnat", + sources=[ + "fairseq/clib/libnat/edit_dist.cpp", + ], + ), + cpp_extension.CppExtension( + "alignment_train_cpu_binding", + sources=[ + "examples/operators/alignment_train_cpu.cpp", + ], + ), + ] +) +if "CUDA_HOME" in os.environ: + extensions.extend( + [ + cpp_extension.CppExtension( + "fairseq.libnat_cuda", + sources=[ + "fairseq/clib/libnat_cuda/edit_dist.cu", + "fairseq/clib/libnat_cuda/binding.cpp", + ], + ), + cpp_extension.CppExtension( + "fairseq.ngram_repeat_block_cuda", + sources=[ + "fairseq/clib/cuda/ngram_repeat_block_cuda.cpp", + "fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu", + ], + ), + cpp_extension.CppExtension( + "alignment_train_cuda_binding", + sources=[ + "examples/operators/alignment_train_kernel.cu", + "examples/operators/alignment_train_cuda.cpp", + ], + ), + ] + ) + +cmdclass = {"build_ext": cpp_extension.BuildExtension} + +if "READTHEDOCS" in os.environ: + # don't build extensions when generating docs + extensions = [] + if "build_ext" in cmdclass: + del cmdclass["build_ext"] + + # use CPU build of PyTorch + dependency_links = [ + "https://download.pytorch.org/whl/cpu/torch-1.7.0%2Bcpu-cp36-cp36m-linux_x86_64.whl" + ] +else: + dependency_links = [] + + +if "clean" in sys.argv[1:]: + # Source: https://bit.ly/2NLVsgE + print("deleting Cython files...") + + subprocess.run( + ["rm -f fairseq/*.so fairseq/**/*.so fairseq/*.pyd fairseq/**/*.pyd"], + shell=True, + ) + + +extra_packages = [] +if os.path.exists(os.path.join("fairseq", "model_parallel", "megatron", "mpu")): + extra_packages.append("fairseq.model_parallel.megatron.mpu") + + +def do_setup(package_data): + setup( + name="fairseq", + version=version, + description="Facebook AI Research Sequence-to-Sequence Toolkit", + url="https://github.com/pytorch/fairseq", + classifiers=[ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], + long_description=readme, + long_description_content_type="text/markdown", + install_requires=[ + "cffi", + "cython", + "hydra-core>=1.0.7,<1.1", + "omegaconf<2.1", + "numpy>=1.21.3", + "regex", + "sacrebleu>=1.4.12", + "torch>=1.13", + "tqdm", + "bitarray", + "torchaudio>=0.8.0", + "scikit-learn", + "packaging", + ], + extras_require={ + "dev": ["flake8", "pytest", "black==22.3.0"], + "docs": ["sphinx", "sphinx-argparse"], + }, + dependency_links=dependency_links, + packages=find_packages( + exclude=[ + "examples", + "examples.*", + "scripts", + "scripts.*", + "tests", + "tests.*", + ] + ) + + extra_packages, + package_data=package_data, + ext_modules=extensions, + test_suite="tests", + entry_points={ + "console_scripts": [ + "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main", + "fairseq-generate = fairseq_cli.generate:cli_main", + "fairseq-hydra-train = fairseq_cli.hydra_train:cli_main", + "fairseq-interactive = fairseq_cli.interactive:cli_main", + "fairseq-preprocess = fairseq_cli.preprocess:cli_main", + "fairseq-score = fairseq_cli.score:cli_main", + "fairseq-train = fairseq_cli.train:cli_main", + "fairseq-validate = fairseq_cli.validate:cli_main", + ], + }, + cmdclass=cmdclass, + zip_safe=False, + ) + + +def get_files(path, relative_to="fairseq"): + all_files = [] + for root, _dirs, files in os.walk(path, followlinks=True): + root = os.path.relpath(root, relative_to) + for file in files: + if file.endswith(".pyc"): + continue + all_files.append(os.path.join(root, file)) + return all_files + + +if __name__ == "__main__": + try: + # symlink examples into fairseq package so package_data accepts them + fairseq_examples = os.path.join("fairseq", "examples") + if "build_ext" not in sys.argv[1:] and not os.path.exists(fairseq_examples): + os.symlink(os.path.join("..", "examples"), fairseq_examples) + + package_data = { + "fairseq": ( + get_files(fairseq_examples) + + get_files(os.path.join("fairseq", "config")) + ) + } + do_setup(package_data) + finally: + if "build_ext" not in sys.argv[1:] and os.path.islink(fairseq_examples): + os.unlink(fairseq_examples) diff --git a/fairseq/tests/__init__.py b/fairseq/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/tests/distributed/__init__.py b/fairseq/tests/distributed/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/tests/distributed/test_bmuf.py b/fairseq/tests/distributed/test_bmuf.py new file mode 100644 index 0000000..995d0db --- /dev/null +++ b/fairseq/tests/distributed/test_bmuf.py @@ -0,0 +1,204 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import functools +import random +import unittest +from multiprocessing import Manager + +import torch +import torch.nn as nn +from omegaconf import OmegaConf + +from fairseq import optim +from fairseq.distributed import utils as distributed_utils + + +class Model(nn.Module): + def __init__(self, input_size, output_size): + super(Model, self).__init__() + self.fc = nn.Linear(input_size, output_size) + + def forward(self, input): + output = self.fc(input) + return output + + +def setup_model_loss_criterion(cfg, args, rank, is_cuda): + """ + setup model, criterion and optimizer based on input args + """ + args.distributed_rank = rank + cfg.distributed_training.distributed_rank = args.distributed_rank + if cfg.distributed_training.distributed_world_size > 1: + distributed_utils.distributed_init(cfg) + torch.manual_seed(1) + model = Model(args.input_size, args.nb_classes) + loss_fn = nn.CrossEntropyLoss() + if is_cuda: + model = model.cuda() + loss_fn = loss_fn.cuda() + + optimizer = optim.sgd.SGD(args, model.parameters()) + optimizer = optim.FairseqBMUF(cfg=cfg.bmuf, optimizer=optimizer) + + return model, loss_fn, optimizer + + +def train_step(input, target, model, loss_fn, optimizer, **unused): + """Do forward, backward and parameter update.""" + model.train() + output = model(input) + loss = loss_fn(output, target) + optimizer.backward(loss) + optimizer.step() + + +def single_gpu_training(cfg, args, rank, iterations, shared_results): + + is_cuda = torch.cuda.is_available() + if is_cuda: + torch.cuda.set_device(rank) + + model, loss_fn, optimizer = setup_model_loss_criterion(cfg, args, rank, is_cuda) + + for _ in range(iterations): + input = torch.randn(1, args.input_size) + target = torch.empty(args.batch_size, dtype=torch.long).random_(args.nb_classes) + + if is_cuda: + input = input.cuda() + target = target.cuda() + train_step(input, target, model, loss_fn, optimizer) + + results = [] + for param in model.parameters(): + if len(results) == 0: + results = param.flatten().cpu().data + else: + results = torch.cat((results, param.flatten().cpu().data), 0) + + shared_results[rank] = results + + +def setup_args(): + args = argparse.Namespace() + args.global_sync_iter = 20 + args.block_momentum = 0.875 + args.block_lr = 0.5 + args.input_size = 5 + args.nb_classes = 2 + args.batch_size = 1 + args.lr = [1e-3] + args.momentum = 0 + args.weight_decay = 0 + args.warmup_iterations = 0 + args.use_nbm = True + args.average_sync = True + args.global_sync_iter = 1 + args.model_parallel_size = 1 + args.distributed_backend = "gloo" + + args.distributed_world_size = 2 + port = random.randint(10000, 20000) + args.distributed_init_method = "tcp://localhost:{port}".format(port=port) + args.distributed_init_host = "localhost" + args.distributed_port = port + 1 + args.local_world_size = args.distributed_world_size + + cfg = OmegaConf.create() + cfg.optimization = OmegaConf.create() + cfg.common = OmegaConf.create() + cfg.distributed_training = OmegaConf.create() + cfg.dataset = OmegaConf.create() + cfg.bmuf = OmegaConf.create() + cfg.optimizer = OmegaConf.create() + + cfg.bmuf.global_sync_iter = args.global_sync_iter + cfg.bmuf.block_momentum = args.block_momentum + cfg.bmuf.block_lr = args.block_lr + cfg.dataset.batch_size = args.batch_size + cfg.optimization.lr = args.lr + cfg.optimizer.momentum = args.momentum + cfg.optimizer.weight_decay = args.weight_decay + cfg.bmuf.warmup_iterations = args.warmup_iterations + cfg.bmuf.use_nbm = args.use_nbm + cfg.bmuf.average_sync = args.average_sync + cfg.common.model_parallel_size = args.model_parallel_size + cfg.distributed_training.distributed_backend = args.distributed_backend + cfg.distributed_training.distributed_world_size = args.distributed_world_size + cfg.bmuf.distributed_world_size = args.distributed_world_size + cfg.distributed_training.distributed_init_method = args.distributed_init_method + cfg.distributed_training.distributed_port = args.distributed_port + + return cfg, args + + +@unittest.skipIf(torch.cuda.device_count() < 2, "test requires 2 GPUs") +class TestBMUF(unittest.TestCase): + def bmuf_process(self, cfg, args, iterations): + results = Manager().dict() + torch.multiprocessing.spawn( + fn=functools.partial(single_gpu_training, cfg, args), + args=(iterations, results), + nprocs=args.distributed_world_size, + join=True, + ) + return results + + def test_bmuf_sync(self): + # Train model for 1 iteration and do bmuf sync without doing warmup + cfg, args = setup_args() + iterations = 1 + results = self.bmuf_process(cfg, args, iterations) + # Make sure params in both machines are same + assert len(results) == 2 + self.assertAlmostEqual(results[0], results[1]) + + def test_warmup_sync(self): + # Train model for 20 iteration and do warmup sync without doing bmuf sync + cfg, args = setup_args() + args.warmup_iterations = 20 + cfg.bmuf.warmup_iterations = args.warmup_iterations + iterations = 20 + results = self.bmuf_process(cfg, args, iterations) + # Make sure params in both machines are same + assert len(results) == 2 + self.assertAlmostEqual(results[0], results[1]) + + def test_warmup_sync_bmuf_sync(self): + # Train model for 25 iteration and do warmup sync after 20 iteration + # and bmuf sync after 25 iteration + cfg, args = setup_args() + args.warmup_iterations = 20 + args.global_sync_iter = 5 + cfg.bmuf.warmup_iterations = args.warmup_iterations + cfg.bmuf.global_sync_iter = args.global_sync_iter + iterations = 25 + results = self.bmuf_process(cfg, args, iterations) + # Make sure params in both machines are same + assert len(results) == 2 + self.assertAlmostEqual(results[0], results[1]) + + def test_single_gpu_bmuf(self): + # Train model for 5 iterations and use GPU 1 + cfg, args = setup_args() + args.distributed_world_size = 1 + args.warmup_iterations = 5 + cfg.distributed_training.distributed_world_size = args.distributed_world_size + cfg.bmuf.distributed_world_size = args.distributed_world_size + cfg.bmuf.warmup_iterations = args.warmup_iterations + iterations = 20 + results = self.bmuf_process(cfg, args, iterations) + assert len(results) == 1 + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-4) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/distributed/test_distributed_timeout_wrapper.py b/fairseq/tests/distributed/test_distributed_timeout_wrapper.py new file mode 100644 index 0000000..996093c --- /dev/null +++ b/fairseq/tests/distributed/test_distributed_timeout_wrapper.py @@ -0,0 +1,52 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import signal +import time +import unittest + +import torch +from torch import nn + +from fairseq.distributed import DistributedTimeoutWrapper + + +class ModuleWithDelay(nn.Module): + def __init__(self, delay): + super().__init__() + self.delay = delay + + def forward(self, x): + time.sleep(self.delay) + return x + + +class TestDistributedTimeoutWrapper(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_no_timeout(self): + module = DistributedTimeoutWrapper(ModuleWithDelay(1), 0, signal.SIGINT) + module(torch.rand(5)) + module.stop_timeout() + + def test_timeout_safe(self): + module = DistributedTimeoutWrapper(ModuleWithDelay(1), 10, signal.SIGINT) + module(torch.rand(5)) + module.stop_timeout() + + def test_timeout_killed(self): + with self.assertRaises(KeyboardInterrupt): + module = DistributedTimeoutWrapper(ModuleWithDelay(5), 1, signal.SIGINT) + module(torch.rand(5)) + module.stop_timeout() + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/distributed/test_module_proxy_wrapper.py b/fairseq/tests/distributed/test_module_proxy_wrapper.py new file mode 100644 index 0000000..2ac1a87 --- /dev/null +++ b/fairseq/tests/distributed/test_module_proxy_wrapper.py @@ -0,0 +1,74 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from torch import nn + +from fairseq.distributed import ModuleProxyWrapper + +from .utils import objects_are_equal + + +class MockDDPWrapper(nn.Module): + """A simple wrapper with an interface similar to DistributedDataParallel.""" + + def __init__(self, module): + super().__init__() + self.module = module + + def forward(self, x): + return self.module(x) + + +class Model(nn.Module): + def __init__(self): + super().__init__() + self.linear = nn.Linear(5, 10) + self.xyz = "hello" + + def forward(self, x): + return self.linear(x) + + def get_xyz(self): + return self.xyz + + +class TestModuleProxyWrapper(unittest.TestCase): + def _get_module(self): + module = Model() + wrapped_module = MockDDPWrapper(module) + wrapped_module = ModuleProxyWrapper(wrapped_module) + return wrapped_module, module + + def test_getattr_forwarding(self): + wrapped_module, module = self._get_module() + assert module.xyz == "hello" + assert module.get_xyz() == "hello" + assert wrapped_module.xyz == "hello" + + wrapped_module.xyz = "world" + assert wrapped_module.xyz == "world" + assert module.get_xyz() == "hello" + + def test_state_dict(self): + wrapped_module, module = self._get_module() + assert objects_are_equal(wrapped_module.state_dict(), module.state_dict()) + + def test_load_state_dict(self): + wrapped_module, module = self._get_module() + wrapped_module.load_state_dict(module.state_dict()) + input = torch.rand(4, 5) + torch.testing.assert_allclose(wrapped_module(input), module(input)) + + def test_forward(self): + wrapped_module, module = self._get_module() + input = torch.rand(4, 5) + torch.testing.assert_allclose(wrapped_module(input), module(input)) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/distributed/test_utils.py b/fairseq/tests/distributed/test_utils.py new file mode 100644 index 0000000..30f995b --- /dev/null +++ b/fairseq/tests/distributed/test_utils.py @@ -0,0 +1,124 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +import sys +import unittest + +import torch + +from fairseq.distributed import utils as dist_utils + +from .utils import objects_are_equal, spawn_and_init + + +class DistributedTest(unittest.TestCase): + def setUp(self): + if not torch.cuda.is_available(): + raise unittest.SkipTest("CUDA not available, skipping test") + if sys.platform == "win32": + raise unittest.SkipTest("NCCL doesn't support Windows, skipping test") + if torch.cuda.device_count() < 2: + raise unittest.SkipTest("distributed tests require 2+ GPUs, skipping") + + +class TestBroadcastObject(DistributedTest): + def test_str(self): + spawn_and_init( + functools.partial( + TestBroadcastObject._test_broadcast_object, "hello world" + ), + world_size=2, + ) + + def test_tensor(self): + spawn_and_init( + functools.partial( + TestBroadcastObject._test_broadcast_object, + torch.rand(5), + ), + world_size=2, + ) + + def test_complex(self): + spawn_and_init( + functools.partial( + TestBroadcastObject._test_broadcast_object, + { + "a": "1", + "b": [2, torch.rand(2, 3), 3], + "c": (torch.rand(2, 3), 4), + "d": {5, torch.rand(5)}, + "e": torch.rand(5), + "f": torch.rand(5).int().cuda(), + }, + ), + world_size=2, + ) + + @staticmethod + def _test_broadcast_object(ref_obj, rank, group): + obj = dist_utils.broadcast_object( + ref_obj if rank == 0 else None, src_rank=0, group=group + ) + assert objects_are_equal(ref_obj, obj) + + +class TestAllGatherList(DistributedTest): + def test_str_equality(self): + spawn_and_init( + functools.partial( + TestAllGatherList._test_all_gather_list_equality, + "hello world", + ), + world_size=2, + ) + + def test_tensor_equality(self): + spawn_and_init( + functools.partial( + TestAllGatherList._test_all_gather_list_equality, + torch.rand(5), + ), + world_size=2, + ) + + def test_complex_equality(self): + spawn_and_init( + functools.partial( + TestAllGatherList._test_all_gather_list_equality, + { + "a": "1", + "b": [2, torch.rand(2, 3), 3], + "c": (torch.rand(2, 3), 4), + "d": {5, torch.rand(5)}, + "e": torch.rand(5), + "f": torch.rand(5).int(), + }, + ), + world_size=2, + ) + + @staticmethod + def _test_all_gather_list_equality(ref_obj, rank, group): + objs = dist_utils.all_gather_list(ref_obj, group) + for obj in objs: + assert objects_are_equal(ref_obj, obj) + + def test_rank_tensor(self): + spawn_and_init( + TestAllGatherList._test_all_gather_list_rank_tensor, world_size=2 + ) + + @staticmethod + def _test_all_gather_list_rank_tensor(rank, group): + obj = torch.tensor([rank]) + objs = dist_utils.all_gather_list(obj, group) + for i, obj in enumerate(objs): + assert obj.item() == i + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/distributed/utils.py b/fairseq/tests/distributed/utils.py new file mode 100644 index 0000000..be4e19c --- /dev/null +++ b/fairseq/tests/distributed/utils.py @@ -0,0 +1,65 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +import tempfile + +import torch + + +def spawn_and_init(fn, world_size, args=None): + if args is None: + args = () + with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + torch.multiprocessing.spawn( + fn=functools.partial(init_and_run, fn, args), + args=( + world_size, + tmp_file.name, + ), + nprocs=world_size, + join=True, + ) + + +def distributed_init(rank, world_size, tmp_file): + torch.distributed.init_process_group( + backend="nccl", + init_method="file://{}".format(tmp_file), + world_size=world_size, + rank=rank, + ) + torch.cuda.set_device(rank) + + +def init_and_run(fn, args, rank, world_size, tmp_file): + distributed_init(rank, world_size, tmp_file) + group = torch.distributed.new_group() + fn(rank, group, *args) + + +def objects_are_equal(a, b) -> bool: + if type(a) is not type(b): + return False + if isinstance(a, dict): + if set(a.keys()) != set(b.keys()): + return False + for k in a.keys(): + if not objects_are_equal(a[k], b[k]): + return False + return True + elif isinstance(a, (list, tuple, set)): + if len(a) != len(b): + return False + return all(objects_are_equal(x, y) for x, y in zip(a, b)) + elif torch.is_tensor(a): + return ( + a.size() == b.size() + and a.dtype == b.dtype + and a.device == b.device + and torch.all(a == b) + ) + else: + return a == b diff --git a/fairseq/tests/gpu/__init__.py b/fairseq/tests/gpu/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/tests/gpu/test_binaries_gpu.py b/fairseq/tests/gpu/test_binaries_gpu.py new file mode 100644 index 0000000..5caf94c --- /dev/null +++ b/fairseq/tests/gpu/test_binaries_gpu.py @@ -0,0 +1,590 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import json +import logging +import os +import tempfile +import unittest +from io import StringIO + +import torch + +from fairseq import options +from fairseq_cli import train +from tests.utils import ( + create_dummy_data, + generate_main, + preprocess_lm_data, + preprocess_translation_data, + train_language_model, + train_translation_model, +) + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestMultiGPU(unittest.TestCase): + @staticmethod + def parse_logs(logfile): + logs = [] + for ln in open(logfile, "r").readlines(): + try: + logs.append(json.loads(ln)) + except json.JSONDecodeError: + continue + return logs + + @property + def world_size(self): + return torch.cuda.device_count() + + def train_flags(self, mu): + return [ + "--memory-efficient-fp16", + "--update-freq", + "1", + "--seed", + "1", + "--log-format", + "json", + "--max-update", + str(mu), + "--tokens-per-sample", + "20", + "--batch-size", + "2", + "--share-decoder-input-output-embed", + "--optimizer", + "adam", + "--max-valid-steps", + "1", + "--pad-to-fixed-length", + "--sample-break-mode", + "none", + ] + + def _test_resume_multilingual_training( + self, extra_clargs, arch="transformer_lm_gpt2_tiny" + ): + languages = ["en_XX", "fr_XX", "zh_CN"] + save_interval = 5 + mu = 10 + flags = ( + self.train_flags(mu) + + ["--save-interval-updates", str(save_interval), "--log-interval", "1"] + + extra_clargs + ) + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fp16") as data_dir: + log = os.path.join(data_dir, "train.log") + create_dummy_data( + data_dir, + num_examples=int( + mu * 20 * self.world_size * 1.5 + ), # make sure enough data for max updates + languages=languages, + ) + preprocess_lm_data(data_dir, languages) + train_language_model( + data_dir, + arch, + flags + ["--log-file", log], + task="multilingual_language_modeling", + world_size=self.world_size, + ) + log2 = os.path.join(data_dir, "resume.log") + ckpt_name = f"checkpoint_1_{save_interval}.pt" + restore_file = os.path.join(data_dir, ckpt_name) + train_language_model( + data_dir, + arch, + flags + + ["--log-file", log2, "--restore-file", restore_file, "--no-save"], + task="multilingual_language_modeling", + world_size=self.world_size, + ) + + l1 = self.parse_logs(log) + assert ( + int(l1[-1]["train_num_updates"]) == mu + ), f"The first run did not complete {mu} updates. Add more data" + l2 = self.parse_logs(log2) + + if int(l2[0]["num_updates"]) != save_interval + 1: + all_ckpt_files = [ + x for x in os.listdir(data_dir) if x.endswith(".pt") + ] + import shutil + + shutil.move(data_dir, "last_failed_resume") + raise AssertionError( + f"Likely failed to load {ckpt_name}. {all_ckpt_files} \n LOGS: {l1} \n\n {l2}. " + ) + for k in [ + "train_loss", + "train_num_updates", + "train_ppl", + "train_gnorm", + ]: + from_scratch, resumed = float(l1[-1][k]), float(l2[-1][k]) + # This fails without rounding! + assert ( + from_scratch == resumed + ), f"difference at {k} {from_scratch} != {resumed}" + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestTranslationGPU(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_fp16_multigpu(self): + self._test_multigpu("test_fp16", ["--fp16"]) + + def test_slowmo_multigpu(self): + self._test_multigpu( + "test_slowmo", ["--ddp-backend", "slowmo", "--nprocs-per-node", "1"] + ) + + def test_slowmo_single_node_multigpu(self): + self._test_multigpu( + "test_slowmo_single_node", + ["--ddp-backend", "slowmo", "--nprocs-per-node", "2"], + ) + + def _test_multigpu(self, test_name, test_args): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory(test_name) as data_dir: + log = os.path.join(data_dir, "train.log") + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + test_args + ["--log-file", log], + world_size=min(torch.cuda.device_count(), 2), + ) + generate_main(data_dir) + assert os.path.exists(log) + + @staticmethod + def parse_logs(logfile): + logs = [] + for ln in open(logfile, "r").readlines(): + try: + logs.append(json.loads(ln)) + except json.JSONDecodeError: + continue + return logs + + def test_resume_training_fsdp(self): + self._test_resume_training(["--ddp-backend", "fully_sharded"]) + + def test_resume_training_fsdp_sharded_state(self): + self._test_resume_training( + ["--ddp-backend", "fully_sharded", "--use-sharded-state"] + ) + + def test_resume_training_noc10d(self): + self._test_resume_training([]) + + def _test_resume_training(self, extra_clargs, arch="fconv_iwslt_de_en"): + flags = [ + "--fp16", + "--log-format", + "json", + "--max-update", + "10", + "--save-interval-updates", + "2", + "--log-interval", + "1", + ] + extra_clargs + world_size = min(torch.cuda.device_count(), 2) + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fp16") as data_dir: + log = os.path.join(data_dir, "train.log") + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + arch, + flags + ["--log-file", log], + world_size=world_size, + ) + log2 = os.path.join(data_dir, "resume.log") + restore_file = os.path.join(data_dir, "checkpoint_1_2.pt") + train_translation_model( + data_dir, + arch, + flags + ["--log-file", log2, "--restore-file", restore_file], + world_size=world_size, + ) + + l1 = self.parse_logs(log) + l2 = self.parse_logs(log2) + assert int(l2[0]["num_updates"]) == 3, f"{l1}\n\n {l2}" + for k in [ + "train_loss", + "train_num_updates", + "train_ppl", + "train_gnorm", + ]: + from_scratch, resumed = l1[-1][k], l2[-1][k] + assert ( + from_scratch == resumed + ), f"difference at {k} {from_scratch} != {resumed}" + + def test_memory_efficient_fp16(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_memory_efficient_fp16") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, "fconv_iwslt_de_en", ["--memory-efficient-fp16"] + ) + generate_main(data_dir) + + def test_transformer_fp16(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "64", + "--decoder-embed-dim", + "64", + "--fp16", + ], + run_validation=True, + ) + generate_main(data_dir) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_amp(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_amp") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model(data_dir, "fconv_iwslt_de_en", ["--amp"]) + generate_main(data_dir) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_transformer_amp(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "64", + "--decoder-embed-dim", + "64", + "--amp", + ], + run_validation=True, + ) + generate_main(data_dir) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_levenshtein_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_levenshtein_transformer" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--joined-dictionary"]) + train_translation_model( + data_dir, + "levenshtein_transformer", + [ + "--apply-bert-init", + "--early-exit", + "6,6,6", + "--criterion", + "nat_loss", + ], + task="translation_lev", + ) + gen_config = [ + "--task", + "translation_lev", + "--iter-decode-max-iter", + "9", + "--iter-decode-eos-penalty", + "0", + "--print-step", + ] + # non-ensemble generation + generate_main(data_dir, gen_config) + # ensemble generation + generate_main( + data_dir, + gen_config, + path=os.pathsep.join( + [ + os.path.join(data_dir, "checkpoint_last.pt"), + os.path.join(data_dir, "checkpoint_last.pt"), + ] + ), + ) + + def test_fsdp_checkpoint_generate(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fsdp_sharded") as data_dir: + log = os.path.join(data_dir, "train.log") + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + world_size = min(torch.cuda.device_count(), 2) + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + ["--log-file", log, "--ddp-backend", "fully_sharded"], + world_size=world_size, + ) + generate_main(data_dir) + assert os.path.exists(log) + + def test_fsdp_sharded_checkpoint_generate(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fsdp_sharded") as data_dir: + log = os.path.join(data_dir, "train.log") + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + world_size = min(torch.cuda.device_count(), 2) + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + [ + "--log-file", + log, + "--ddp-backend", + "fully_sharded", + "--use-sharded-state", + ], + world_size=world_size, + ) + generate_main(data_dir, ["--checkpoint-shard-count", str(world_size)]) + assert os.path.exists(log) + + +def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=False): + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + "language_modeling", + data_dir, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--criterion", + "adaptive_loss", + "--adaptive-softmax-cutoff", + "5,10,15", + "--max-tokens", + "500", + "--tokens-per-sample", + "500", + "--save-dir", + data_dir, + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + train.main(train_args) + + # try scalar quantization + scalar_quant_train_parser = options.get_training_parser() + scalar_quant_train_args = options.parse_args_and_arch( + scalar_quant_train_parser, + [ + "--task", + "language_modeling", + data_dir, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--criterion", + "adaptive_loss", + "--adaptive-softmax-cutoff", + "5,10,15", + "--max-tokens", + "500", + "--tokens-per-sample", + "500", + "--save-dir", + data_dir, + "--max-update", + "3", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + "--quant-noise-scalar", + "0.5", + ] + + (extra_flags or []), + ) + train.main(scalar_quant_train_args) + + # try iterative PQ quantization + quantize_parser = options.get_training_parser() + quantize_args = options.parse_args_and_arch( + quantize_parser, + [ + "--task", + "language_modeling", + data_dir, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--criterion", + "adaptive_loss", + "--adaptive-softmax-cutoff", + "5,10,15", + "--max-tokens", + "50", + "--tokens-per-sample", + "50", + "--max-update", + "6", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + "--restore-file", + os.path.join(data_dir, "checkpoint_last.pt"), + "--reset-optimizer", + "--quantization-config-path", + os.path.join( + os.path.dirname(__file__), "transformer_quantization_config.yaml" + ), + ] + + (extra_flags or []), + ) + train.main(quantize_args) + + +@unittest.skipIf( + int(torch.__version__[2]) < 10, reason="quantized kernels are only supported on CPU" +) +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestQuantization(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_quantization(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_quantization") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + # tests both scalar and iterative PQ quantization + _quantize_language_model(data_dir, "transformer_lm") + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestOptimizersGPU(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_flat_grads(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_flat_grads") as data_dir: + # Use just a bit of data and tiny model to keep this test runtime reasonable + create_dummy_data(data_dir, num_examples=10, maxlen=5) + preprocess_translation_data(data_dir) + with self.assertRaises(RuntimeError): + # adafactor isn't compatible with flat grads, which + # are used by default with --fp16 + train_translation_model( + data_dir, + "lstm", + [ + "--required-batch-size-multiple", + "1", + "--encoder-layers", + "1", + "--encoder-hidden-size", + "32", + "--decoder-layers", + "1", + "--optimizer", + "adafactor", + "--fp16", + ], + ) + # but it should pass once we set --fp16-no-flatten-grads + train_translation_model( + data_dir, + "lstm", + [ + "--required-batch-size-multiple", + "1", + "--encoder-layers", + "1", + "--encoder-hidden-size", + "32", + "--decoder-layers", + "1", + "--optimizer", + "adafactor", + "--fp16", + "--fp16-no-flatten-grads", + ], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/gpu/test_ema_gpu.py b/fairseq/tests/gpu/test_ema_gpu.py new file mode 100644 index 0000000..33fb560 --- /dev/null +++ b/fairseq/tests/gpu/test_ema_gpu.py @@ -0,0 +1,215 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from copy import deepcopy +from dataclasses import dataclass +from typing import Optional + +import torch + +from fairseq.models.ema import EMA + + +class DummyModule(torch.nn.Module): + def __init__(self) -> None: + """LightningModule for testing purposes + + Args: + epoch_min_loss_override (int, optional): Pass in an epoch that will be set to the minimum + validation loss for testing purposes (zero based). If None this is ignored. Defaults to None. + """ + super().__init__() + self.layer = torch.nn.Linear(in_features=32, out_features=2) + self.another_layer = torch.nn.Linear(in_features=2, out_features=2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.layer(x) + return self.another_layer(x) + + +@dataclass +class EMAConfig(object): + ema_decay: float = 0.99 + ema_start_update: int = 0 + ema_fp32: bool = False + ema_seed_model: Optional[str] = None + ema_update_freq: int = 1 + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestEMAGPU(unittest.TestCase): + def assertTorchAllClose(self, x, y, atol=1e-8, rtol=1e-5, msg=None): + diff = x.float() - y.float() + diff_norm = torch.norm(diff) + other_norm = torch.norm(y.float()) + + if msg is None: + msg = "|input - other| > {} + {} * |other|".format(atol, rtol) + + self.assertLessEqual( + diff_norm, + atol + rtol * other_norm, + msg=msg, + ) + + def test_ema(self): + model = DummyModule().cuda() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig() + ema = EMA(model, config) + + # set decay + ema._set_decay(config.ema_decay) + self.assertEqual(ema.get_decay(), config.ema_decay) + + # get model + self.assertEqual(ema.get_model(), ema.model) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + # EMA step + x = torch.randn(32).cuda() + y = model(x) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + ema_state_dict = ema.get_model().state_dict() + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema_state_dict[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + self.assertTorchAllClose( + ema_param, + config.ema_decay * prev_param + (1 - config.ema_decay) * param, + ) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + # Load EMA into model + model2 = DummyModule().cuda() + ema.reverse(model2) + + for key, param in model2.state_dict().items(): + ema_param = ema_state_dict[key] + self.assertTrue(torch.allclose(ema_param, param)) + + def test_ema_fp32(self): + model = DummyModule().cuda().half() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig(ema_fp32=True) + ema = EMA(model, config) + + x = torch.randn(32).cuda() + y = model(x.half()) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema.get_model().state_dict()[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + self.assertIn(key, ema.fp32_params) + + # EMA update is done in fp32, and hence the EMA param must be + # closer to the EMA update done in fp32 than in fp16. + self.assertLessEqual( + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ) + .half() + .float() + ), + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param + (1 - config.ema_decay) * param + ).float() + ), + ) + self.assertTorchAllClose( + ema_param, + ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ).half(), + ) + + def test_ema_fp16(self): + model = DummyModule().cuda().half() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig(ema_fp32=False) + ema = EMA(model, config) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + x = torch.randn(32).cuda() + y = model(x.half()) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema.get_model().state_dict()[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + + # EMA update is done in fp16, and hence the EMA param must be + # closer to the EMA update done in fp16 than in fp32. + self.assertLessEqual( + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param + (1 - config.ema_decay) * param + ).float() + ), + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ) + .half() + .float() + ), + ) + self.assertTorchAllClose( + ema_param, + config.ema_decay * prev_param + (1 - config.ema_decay) * param, + ) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/gpu/transformer_quantization_config.yaml b/fairseq/tests/gpu/transformer_quantization_config.yaml new file mode 100644 index 0000000..de31d81 --- /dev/null +++ b/fairseq/tests/gpu/transformer_quantization_config.yaml @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# This file defines example configuration arguments for quantizing +# a transformer model with product quantization + +n_centroids: + Linear: + key: in_features + value: {"*": 8} + Embedding: + key: embedding_dim + value: {"*": 8} + +block_sizes: + Linear: + key: fuzzy_name + value: {fc: 8, attn: 4, emb: 4} + Embedding: + key: fuzzy_name + value: {emb: 8} + +layers_to_quantize: + - decoder\\.layers\\.\d+\\.fc[12] + - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01] + - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj) diff --git a/fairseq/tests/speech/__init__.py b/fairseq/tests/speech/__init__.py new file mode 100644 index 0000000..dba99e4 --- /dev/null +++ b/fairseq/tests/speech/__init__.py @@ -0,0 +1,210 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from argparse import Namespace +import os +import re +import unittest +from pathlib import Path +from tqdm import tqdm +from typing import List, Dict, Optional +import torch +from fairseq.checkpoint_utils import load_model_ensemble_and_task +from fairseq.scoring.wer import WerScorer +from fairseq.scoring.bleu import SacrebleuScorer +from fairseq import utils +import zipfile + +S3_BASE_URL = "https://dl.fbaipublicfiles.com/fairseq" + + +class TestFairseqSpeech(unittest.TestCase): + @classmethod + def download(cls, base_url: str, out_root: Path, filename: str): + url = f"{base_url}/{filename}" + path = out_root / filename + if not path.exists(): + torch.hub.download_url_to_file(url, path.as_posix(), progress=True) + return path + + def _set_up(self, dataset_id: str, s3_dir: str, data_filenames: List[str]): + self.use_cuda = torch.cuda.is_available() + self.root = Path.home() / ".cache" / "fairseq" / dataset_id + self.root.mkdir(exist_ok=True, parents=True) + os.chdir(self.root) + self.base_url = ( + s3_dir if re.search("^https:", s3_dir) else f"{S3_BASE_URL}/{s3_dir}" + ) + for filename in data_filenames: + self.download(self.base_url, self.root, filename) + + def set_up_librispeech(self): + self._set_up( + "librispeech", + "s2t/librispeech", + [ + "cfg_librispeech.yaml", + "spm_librispeech_unigram10000.model", + "spm_librispeech_unigram10000.txt", + "librispeech_test-other.tsv", + "librispeech_test-other.zip", + ], + ) + + def set_up_ljspeech(self): + self._set_up( + "ljspeech", + "s2/ljspeech", + [ + "cfg_ljspeech_g2p.yaml", + "ljspeech_g2p_gcmvn_stats.npz", + "ljspeech_g2p.txt", + "ljspeech_test.tsv", + "ljspeech_test.zip", + ], + ) + + def set_up_sotasty_es_en(self): + self._set_up( + "sotasty_es_en", + "s2t/big/es-en", + [ + "cfg_es_en.yaml", + "spm_bpe32768_es_en.model", + "spm_bpe32768_es_en.txt", + "sotasty_es_en_test_ted.tsv", + "sotasty_es_en_test_ted.zip", + ], + ) + + def set_up_mustc_de_fbank(self): + self._set_up( + "mustc_de_fbank", + "https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de", + [ + "config.yaml", + "spm.model", + "dict.txt", + "src_dict.txt", + "tgt_dict.txt", + "tst-COMMON.tsv", + "tst-COMMON.zip", + ], + ) + + def download_and_load_checkpoint( + self, + checkpoint_filename: str, + arg_overrides: Optional[Dict[str, str]] = None, + strict: bool = True, + ): + path = self.download(self.base_url, self.root, checkpoint_filename) + _arg_overrides = arg_overrides or {} + _arg_overrides["data"] = self.root.as_posix() + models, cfg, task = load_model_ensemble_and_task( + [path.as_posix()], arg_overrides=_arg_overrides, strict=strict + ) + if self.use_cuda: + for model in models: + model.cuda() + + return models, cfg, task, self.build_generator(task, models, cfg) + + def build_generator( + self, + task, + models, + cfg, + ): + return task.build_generator(models, cfg) + + @classmethod + def get_batch_iterator(cls, task, test_split, max_tokens, max_positions): + task.load_dataset(test_split) + return task.get_batch_iterator( + dataset=task.dataset(test_split), + max_tokens=max_tokens, + max_positions=max_positions, + num_workers=1, + ).next_epoch_itr(shuffle=False) + + @classmethod + def get_wer_scorer( + cls, tokenizer="none", lowercase=False, remove_punct=False, char_level=False + ): + scorer_args = { + "wer_tokenizer": tokenizer, + "wer_lowercase": lowercase, + "wer_remove_punct": remove_punct, + "wer_char_level": char_level, + } + return WerScorer(Namespace(**scorer_args)) + + @classmethod + def get_bleu_scorer(cls, tokenizer="13a", lowercase=False, char_level=False): + scorer_args = { + "sacrebleu_tokenizer": tokenizer, + "sacrebleu_lowercase": lowercase, + "sacrebleu_char_level": char_level, + } + return SacrebleuScorer(Namespace(**scorer_args)) + + @torch.no_grad() + def base_test( + self, + ckpt_name, + reference_score, + score_delta=0.3, + dataset="librispeech_test-other", + max_tokens=65_536, + max_positions=(4_096, 1_024), + arg_overrides=None, + strict=True, + score_type="wer", + ): + models, _, task, generator = self.download_and_load_checkpoint( + ckpt_name, arg_overrides=arg_overrides, strict=strict + ) + if not self.use_cuda: + return + + batch_iterator = self.get_batch_iterator( + task, dataset, max_tokens, max_positions + ) + if score_type == "bleu": + scorer = self.get_bleu_scorer() + elif score_type == "wer": + scorer = self.get_wer_scorer() + else: + raise Exception(f"Unsupported score type {score_type}") + + progress = tqdm(enumerate(batch_iterator), total=len(batch_iterator)) + for batch_idx, sample in progress: + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + hypo = task.inference_step(generator, models, sample) + for i, sample_id in enumerate(sample["id"].tolist()): + tgt_str, hypo_str = self.postprocess_tokens( + task, + sample["target"][i, :], + hypo[i][0]["tokens"].int().cpu(), + ) + if batch_idx == 0 and i < 3: + print(f"T-{sample_id} {tgt_str}") + print(f"H-{sample_id} {hypo_str}") + scorer.add_string(tgt_str, hypo_str) + + print(scorer.result_string() + f" (reference: {reference_score})") + self.assertAlmostEqual(scorer.score(), reference_score, delta=score_delta) + + def postprocess_tokens(self, task, target, hypo_tokens): + tgt_tokens = utils.strip_pad(target, task.tgt_dict.pad()).int().cpu() + tgt_str = task.tgt_dict.string(tgt_tokens, "sentencepiece") + hypo_str = task.tgt_dict.string(hypo_tokens, "sentencepiece") + return tgt_str, hypo_str + + def unzip_files(self, zip_file_name): + zip_file_path = self.root / zip_file_name + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + zip_ref.extractall(self.root / zip_file_name.strip(".zip")) diff --git a/fairseq/tests/speech/test_convtransformer_simul_trans.py b/fairseq/tests/speech/test_convtransformer_simul_trans.py new file mode 100644 index 0000000..0562404 --- /dev/null +++ b/fairseq/tests/speech/test_convtransformer_simul_trans.py @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from tests.speech import TestFairseqSpeech + +S3_BASE_URL = "https://dl.fbaipublicfiles.com/fairseq/" + + +class TestConvtransformerSimulTrans(TestFairseqSpeech): + def setUp(self): + self._set_up( + "simul", + "speech_tests/simul", + ["config_gcmvn_specaug.yaml", "dict.txt", "dev.tsv"], + ) + + def test_waitk_checkpoint(self): + """Only test model loading since fairseq currently doesn't support inference of simultaneous models""" + _, _, _, _ = self.download_and_load_checkpoint( + "checkpoint_best.pt", + arg_overrides={ + "config_yaml": "config_gcmvn_specaug.yaml", + "load_pretrained_encoder_from": None, + }, + ) + return + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_dual_input_wav_transformer.py b/fairseq/tests/speech/test_dual_input_wav_transformer.py new file mode 100644 index 0000000..3581bc1 --- /dev/null +++ b/fairseq/tests/speech/test_dual_input_wav_transformer.py @@ -0,0 +1,76 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from collections import namedtuple +from pathlib import Path + +import torch +from tqdm import tqdm + +import fairseq +from fairseq import utils +from fairseq.checkpoint_utils import load_model_ensemble_and_task +from fairseq.scoring.bleu import SacrebleuScorer +from fairseq.tasks import import_tasks +from tests.speech import S3_BASE_URL, TestFairseqSpeech + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestLibrispeechDualInputWavTransformer(TestFairseqSpeech): + def setUp(self): + dataset_id = "librispeech_wvtrasnformer" + base_url = "https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/acl2022/librispeech/finetuned" + data_filenames = [ + "checkpoint_ave_10.pt", + "spm.model", + "src_dict.txt", + "tgt_dict.txt", + "config.yaml", + ] + self._set_up( + dataset_id, + "s2t", + [ + "librispeech_flac_test-other.tsv", + "librispeech_flac_test-other.zip", + ], + ) + for filename in data_filenames: + self.download(base_url, self.root, filename) + + def import_user_module(self): + user_dir = ( + Path(fairseq.__file__).parent.parent / "examples/speech_text_joint_to_text" + ) + Arg = namedtuple("Arg", ["user_dir"]) + arg = Arg(user_dir.__str__()) + utils.import_user_module(arg) + + @torch.no_grad() + def test_librispeech_dualinput_wav_transformer_checkpoint(self): + self.import_user_module() + checkpoint_filename = "checkpoint_ave_10.pt" + arg_overrides = { + "config_yaml": "config.yaml", + "load_pretrained_speech_text_encoder": "", + "load_pretrained_speech_text_decoder": "", + "beam": 10, + "nbest": 1, + "lenpen": 1.0, + "load_speech_only": True, + } + self.base_test( + checkpoint_filename, + 4.6, + dataset="librispeech_flac_test-other", + max_tokens=800000, + max_positions=(800000, 1024), + arg_overrides=arg_overrides, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_dualinput_s2t_transformer.py b/fairseq/tests/speech/test_dualinput_s2t_transformer.py new file mode 100644 index 0000000..76675b9 --- /dev/null +++ b/fairseq/tests/speech/test_dualinput_s2t_transformer.py @@ -0,0 +1,110 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from argparse import Namespace +from collections import namedtuple +from pathlib import Path + +import torch +from tqdm import tqdm + +import fairseq +from fairseq import utils +from fairseq.checkpoint_utils import load_model_ensemble_and_task +from fairseq.scoring.bleu import SacrebleuScorer +from fairseq.tasks import import_tasks +from tests.speech import TestFairseqSpeech + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestDualInputS2TTransformer(TestFairseqSpeech): + def setUp(self): + self.set_up_mustc_de_fbank() + + def import_user_module(self): + user_dir = ( + Path(fairseq.__file__).parent.parent / "examples/speech_text_joint_to_text" + ) + Arg = namedtuple("Arg", ["user_dir"]) + arg = Arg(user_dir.__str__()) + utils.import_user_module(arg) + + @torch.no_grad() + def test_mustc_de_fbank_dualinput_s2t_transformer_checkpoint(self): + self.import_user_module() + checkpoint_filename = "checkpoint_ave_10.pt" + path = self.download(self.base_url, self.root, checkpoint_filename) + models, cfg, task = load_model_ensemble_and_task( + [path.as_posix()], + arg_overrides={ + "data": self.root.as_posix(), + "config_yaml": "config.yaml", + "load_pretrain_speech_encoder": "", + "load_pretrain_text_encoder_last": "", + "load_pretrain_decoder": "", + "beam": 10, + "nbest": 1, + "lenpen": 1.0, + "load_speech_only": True, + }, + ) + if self.use_cuda: + for model in models: + model.cuda() + generator = task.build_generator(models, cfg) + test_split = "tst-COMMON" + task.load_dataset(test_split) + batch_iterator = task.get_batch_iterator( + dataset=task.dataset(test_split), + max_tokens=250_000, + max_positions=(10_000, 1_024), + num_workers=1, + ).next_epoch_itr(shuffle=False) + + tokenizer = task.build_tokenizer(cfg.tokenizer) + bpe = task.build_bpe(cfg.bpe) + + def decode_fn(x): + if bpe is not None: + x = bpe.decode(x) + if tokenizer is not None: + x = tokenizer.decode(x) + return x + + scorer_args = { + "sacrebleu_tokenizer": "13a", + "sacrebleu_lowercase": False, + "sacrebleu_char_level": False, + } + scorer = SacrebleuScorer(Namespace(**scorer_args)) + progress = tqdm(enumerate(batch_iterator), total=len(batch_iterator)) + for batch_idx, sample in progress: + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + hypo = task.inference_step(generator, models, sample) + for i, sample_id in enumerate(sample["id"].tolist()): + tgt_tokens = ( + utils.strip_pad(sample["target"][i, :], task.tgt_dict.pad()) + .int() + .cpu() + ) + + tgt_str = task.tgt_dict.string(tgt_tokens, "sentencepiece") + hypo_str = task.tgt_dict.string( + hypo[i][0]["tokens"].int().cpu(), "sentencepiece" + ) + if batch_idx == 0 and i < 3: + print(f"T-{sample_id} {tgt_str}") + print(f"D-{sample_id} {hypo_str}") + scorer.add_string(tgt_str, hypo_str) + reference_bleu = 27.3 + result = scorer.result_string() + print(result + f" (reference: {reference_bleu})") + res_bleu = float(result.split()[2]) + self.assertAlmostEqual(res_bleu, reference_bleu, delta=0.3) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_fastspeech2.py b/fairseq/tests/speech/test_fastspeech2.py new file mode 100644 index 0000000..7150a3b --- /dev/null +++ b/fairseq/tests/speech/test_fastspeech2.py @@ -0,0 +1,53 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from tqdm import tqdm + +from fairseq import utils +from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion +from tests.speech import TestFairseqSpeech + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestFastSpeech2(TestFairseqSpeech): + def setUp(self): + self.set_up_ljspeech() + + @torch.no_grad() + def test_ljspeech_fastspeech2_checkpoint(self): + models, cfg, task, generator = self.download_and_load_checkpoint( + "ljspeech_fastspeech2_g2p.pt", + arg_overrides={ + "config_yaml": "cfg_ljspeech_g2p.yaml", + "vocoder": "griffin_lim", + "fp16": False, + }, + ) + + batch_iterator = self.get_batch_iterator(task, "ljspeech_test", 65_536, 4_096) + progress = tqdm(batch_iterator, total=len(batch_iterator)) + mcd, n_samples = 0.0, 0 + for sample in progress: + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + hypos = generator.generate(models[0], sample, has_targ=True) + rets = batch_mel_cepstral_distortion( + [hypo["targ_waveform"] for hypo in hypos], + [hypo["waveform"] for hypo in hypos], + sr=task.sr, + ) + mcd += sum(d.item() for d, _ in rets) + n_samples += len(sample["id"].tolist()) + + mcd = round(mcd / n_samples, 1) + reference_mcd = 3.2 + print(f"MCD: {mcd} (reference: {reference_mcd})") + self.assertAlmostEqual(mcd, reference_mcd, delta=0.1) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_s2s_transformer.py b/fairseq/tests/speech/test_s2s_transformer.py new file mode 100644 index 0000000..180f463 --- /dev/null +++ b/fairseq/tests/speech/test_s2s_transformer.py @@ -0,0 +1,51 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from tests.speech import TestFairseqSpeech +from fairseq import utils + +S3_BASE_URL = "https://dl.fbaipublicfiles.com/fairseq/" + + +class TestS2STransformer(TestFairseqSpeech): + def setUp(self): + self._set_up( + "s2s", + "speech_tests/s2s", + [ + "dev_shuf200.tsv", + "src_feat.zip", + "config_specaug_lb.yaml", + "vocoder", + "vocoder_config.json", + ], + ) + + def test_s2s_transformer_checkpoint(self): + self.base_test( + ckpt_name="s2u_transformer_reduced_fisher.pt", + reference_score=38.3, + dataset="dev_shuf200", + arg_overrides={ + "config_yaml": "config_specaug_lb.yaml", + "multitask_config_yaml": None, + "target_is_code": True, + "target_code_size": 100, + "eval_inference": False, + }, + score_type="bleu", + strict=False, + ) + + def postprocess_tokens(self, task, target, hypo_tokens): + tgt_tokens = utils.strip_pad(target, task.tgt_dict.pad()).int().cpu() + tgt_str = task.tgt_dict.string(tgt_tokens) + hypo_str = task.tgt_dict.string(hypo_tokens) + return tgt_str, hypo_str + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_s2t_conformer.py b/fairseq/tests/speech/test_s2t_conformer.py new file mode 100644 index 0000000..5aaa4a0 --- /dev/null +++ b/fairseq/tests/speech/test_s2t_conformer.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from tests.speech import TestFairseqSpeech + + +class TestS2TConformer(TestFairseqSpeech): + def setUp(self): + self.set_up_librispeech() + + def test_librispeech_s2t_conformer_s_checkpoint(self): + self.base_test( + ckpt_name="librispeech_conformer_rel_pos_s.pt", + reference_score=12, + arg_overrides={"config_yaml": "cfg_librispeech.yaml"}, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_s2t_transformer.py b/fairseq/tests/speech/test_s2t_transformer.py new file mode 100644 index 0000000..172f548 --- /dev/null +++ b/fairseq/tests/speech/test_s2t_transformer.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from tests.speech import TestFairseqSpeech + + +class TestS2TTransformer(TestFairseqSpeech): + def setUp(self): + self.set_up_librispeech() + + def test_librispeech_s2t_transformer_s_checkpoint(self): + self.base_test( + ckpt_name="librispeech_transformer_s.pt", + reference_score=9, + arg_overrides={"config_yaml": "cfg_librispeech.yaml"}, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_tts_transformer.py b/fairseq/tests/speech/test_tts_transformer.py new file mode 100644 index 0000000..b6330c6 --- /dev/null +++ b/fairseq/tests/speech/test_tts_transformer.py @@ -0,0 +1,53 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from tqdm import tqdm + +from fairseq import utils +from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion +from tests.speech import TestFairseqSpeech + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestTTSTransformer(TestFairseqSpeech): + def setUp(self): + self.set_up_ljspeech() + + @torch.no_grad() + def test_ljspeech_tts_transformer_checkpoint(self): + models, cfg, task, generator = self.download_and_load_checkpoint( + "ljspeech_transformer_g2p.pt", + arg_overrides={ + "config_yaml": "cfg_ljspeech_g2p.yaml", + "vocoder": "griffin_lim", + "fp16": False, + }, + ) + + batch_iterator = self.get_batch_iterator(task, "ljspeech_test", 65_536, 1024) + progress = tqdm(batch_iterator, total=len(batch_iterator)) + mcd, n_samples = 0.0, 0 + for sample in progress: + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + hypos = generator.generate(models[0], sample, has_targ=True) + rets = batch_mel_cepstral_distortion( + [hypo["targ_waveform"] for hypo in hypos], + [hypo["waveform"] for hypo in hypos], + sr=task.sr, + ) + mcd += sum(d.item() for d, _ in rets) + n_samples += len(sample["id"].tolist()) + + mcd = round(mcd / n_samples, 1) + reference_mcd = 3.3 + print(f"MCD: {mcd} (reference: {reference_mcd})") + self.assertAlmostEqual(mcd, reference_mcd, delta=0.1) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_wav2vec2.py b/fairseq/tests/speech/test_wav2vec2.py new file mode 100644 index 0000000..eff6114 --- /dev/null +++ b/fairseq/tests/speech/test_wav2vec2.py @@ -0,0 +1,90 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import torch +from tests.speech import TestFairseqSpeech +from fairseq.data.data_utils import post_process +from fairseq import utils +from omegaconf import open_dict + +S3_BASE_URL = "https://dl.fbaipublicfiles.com/fairseq" + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestWav2Vec2(TestFairseqSpeech): + def setUp(self): + self._set_up( + "librispeech_w2v2", + "conformer/wav2vec2/librispeech", + [ + "test_librispeech-other.ltr", + "test_librispeech-other.tsv", + "test_librispeech-other_small.ltr_100", + "test_librispeech-other_small.tsv", + "test-other.zip", + "dict.ltr.txt", + "dict.ltr_100.txt", + ], + ) + self.unzip_files( + "test-other.zip", + ) + + def test_transformer_w2v2(self): + self.base_test( + ckpt_name="transformer_oss_small_100h.pt", + reference_score=38, + score_delta=1, + dataset="test_librispeech-other", + max_tokens=1000000, + max_positions=(700000, 1000), + arg_overrides={ + "task": "audio_finetuning", + "labels": "ltr", + "nbest": 1, + "tpu": False, + }, + strict=False, + ) + + def test_conformer_w2v2(self): + self.base_test( + ckpt_name="conformer_LS_PT_LS_FT_rope.pt", + reference_score=4.5, + score_delta=1, + dataset="test_librispeech-other_small", + max_tokens=1000000, + max_positions=(700000, 1000), + arg_overrides={ + "task": "audio_finetuning", + "labels": "ltr_100", + "nbest": 1, + "tpu": False, + }, + strict=True, + ) + + def build_generator(self, task, models, cfg): + try: + from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder + except Exception: + raise Exception("Cannot run this test without flashlight dependency") + with open_dict(cfg): + cfg.nbest = 1 + return W2lViterbiDecoder(cfg, task.target_dictionary) + + def postprocess_tokens(self, task, target, hypo_tokens): + tgt_tokens = utils.strip_pad(target, task.target_dictionary.pad()).int().cpu() + tgt_str = task.target_dictionary.string(tgt_tokens) + tgt_str = post_process(tgt_str, "letter") + + hypo_pieces = task.target_dictionary.string(hypo_tokens) + hypo_str = post_process(hypo_pieces, "letter") + return tgt_str, hypo_str + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech/test_xm_transformer.py b/fairseq/tests/speech/test_xm_transformer.py new file mode 100644 index 0000000..0a55094 --- /dev/null +++ b/fairseq/tests/speech/test_xm_transformer.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from tests.speech import TestFairseqSpeech + + +class TestXMTransformer(TestFairseqSpeech): + def setUp(self): + self.set_up_sotasty_es_en() + + # TODO: investigate increases BLEU score (30.42 -> 31.74) + def test_sotasty_es_en_600m_checkpoint(self): + self.base_test( + ckpt_name="xm_transformer_600m_es_en_md.pt", + reference_score=31.74, + score_delta=0.2, + max_tokens=3_000_000, + max_positions=(1_000_000, 1_024), + dataset="sotasty_es_en_test_ted", + arg_overrides={"config_yaml": "cfg_es_en.yaml"}, + score_type="bleu", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech_recognition/__init__.py b/fairseq/tests/speech_recognition/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fairseq/tests/speech_recognition/asr_test_base.py b/fairseq/tests/speech_recognition/asr_test_base.py new file mode 100644 index 0000000..8c5d414 --- /dev/null +++ b/fairseq/tests/speech_recognition/asr_test_base.py @@ -0,0 +1,557 @@ +#!/usr/bin/env python3 + +import argparse +import os +import unittest +from inspect import currentframe, getframeinfo + +import numpy as np +import torch +from examples.speech_recognition.data.data_utils import lengths_to_encoder_padding_mask +from fairseq.data import data_utils as fairseq_data_utils +from fairseq.data.dictionary import Dictionary +from fairseq.models import ( + BaseFairseqModel, + FairseqDecoder, + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqEncoderModel, + FairseqModel, +) +from fairseq.tasks.fairseq_task import LegacyFairseqTask + + +DEFAULT_TEST_VOCAB_SIZE = 100 + + +# /////////////////////////////////////////////////////////////////////////// +# utility function to setup dummy dict/task/input +# /////////////////////////////////////////////////////////////////////////// + + +def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE): + dummy_dict = Dictionary() + # add dummy symbol to satisfy vocab size + for id, _ in enumerate(range(vocab_size)): + dummy_dict.add_symbol("{}".format(id), 1000) + return dummy_dict + + +class DummyTask(LegacyFairseqTask): + def __init__(self, args): + super().__init__(args) + self.dictionary = get_dummy_dictionary() + if getattr(self.args, "ctc", False): + self.dictionary.add_symbol("<ctc_blank>") + self.tgt_dict = self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + +def get_dummy_task_and_parser(): + """ + to build a fariseq model, we need some dummy parse and task. This function + is used to create dummy task and parser to faciliate model/criterion test + + Note: we use FbSpeechRecognitionTask as the dummy task. You may want + to use other task by providing another function + """ + parser = argparse.ArgumentParser( + description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS + ) + DummyTask.add_args(parser) + args = parser.parse_args([]) + task = DummyTask.setup_task(args) + return task, parser + + +def get_dummy_input(T=100, D=80, B=5, K=100): + forward_input = {} + # T max sequence length + # D feature vector dimension + # B batch size + # K target dimension size + feature = torch.randn(B, T, D) + # this (B, T, D) layout is just a convention, you can override it by + # write your own _prepare_forward_input function + src_lengths = torch.from_numpy( + np.random.randint(low=1, high=T, size=B, dtype=np.int64) + ) + src_lengths[0] = T # make sure the maximum length matches + prev_output_tokens = [] + for b in range(B): + token_length = np.random.randint(low=1, high=src_lengths[b].item() + 1) + tokens = np.random.randint(low=0, high=K, size=token_length, dtype=np.int64) + prev_output_tokens.append(torch.from_numpy(tokens)) + + prev_output_tokens = fairseq_data_utils.collate_tokens( + prev_output_tokens, + pad_idx=1, + eos_idx=2, + left_pad=False, + move_eos_to_beginning=False, + ) + src_lengths, sorted_order = src_lengths.sort(descending=True) + forward_input["src_tokens"] = feature.index_select(0, sorted_order) + forward_input["src_lengths"] = src_lengths + forward_input["prev_output_tokens"] = prev_output_tokens + + return forward_input + + +def get_dummy_encoder_output(encoder_out_shape=(100, 80, 5)): + """ + This only provides an example to generate dummy encoder output + """ + (T, B, D) = encoder_out_shape + encoder_out = {} + + encoder_out["encoder_out"] = torch.from_numpy( + np.random.randn(*encoder_out_shape).astype(np.float32) + ) + seq_lengths = torch.from_numpy(np.random.randint(low=1, high=T, size=B)) + # some dummy mask + encoder_out["encoder_padding_mask"] = torch.arange(T).view(1, T).expand( + B, -1 + ) >= seq_lengths.view(B, 1).expand(-1, T) + encoder_out["encoder_padding_mask"].t_() + + # encoer_padding_mask is (T, B) tensor, with (t, b)-th element indicate + # whether encoder_out[t, b] is valid (=0) or not (=1) + return encoder_out + + +def _current_postion_info(): + cf = currentframe() + frameinfo = " (at {}:{})".format( + os.path.basename(getframeinfo(cf).filename), cf.f_back.f_lineno + ) + return frameinfo + + +def check_encoder_output(encoder_output, batch_size=None): + """we expect encoder_output to be a dict with the following + key/value pairs: + - encoder_out: a Torch.Tensor + - encoder_padding_mask: a binary Torch.Tensor + """ + if not isinstance(encoder_output, dict): + msg = ( + "FairseqEncoderModel.forward(...) must be a dict" + _current_postion_info() + ) + return False, msg + + if "encoder_out" not in encoder_output: + msg = ( + "FairseqEncoderModel.forward(...) must contain encoder_out" + + _current_postion_info() + ) + return False, msg + + if "encoder_padding_mask" not in encoder_output: + msg = ( + "FairseqEncoderModel.forward(...) must contain encoder_padding_mask" + + _current_postion_info() + ) + return False, msg + + if not isinstance(encoder_output["encoder_out"], torch.Tensor): + msg = "encoder_out must be a torch.Tensor" + _current_postion_info() + return False, msg + + if encoder_output["encoder_out"].dtype != torch.float32: + msg = "encoder_out must have float32 dtype" + _current_postion_info() + return False, msg + + mask = encoder_output["encoder_padding_mask"] + if mask is not None: + if not isinstance(mask, torch.Tensor): + msg = ( + "encoder_padding_mask must be a torch.Tensor" + _current_postion_info() + ) + return False, msg + if mask.dtype != torch.uint8 and ( + not hasattr(torch, "bool") or mask.dtype != torch.bool + ): + msg = ( + "encoder_padding_mask must have dtype of uint8" + + _current_postion_info() + ) + return False, msg + + if mask.dim() != 2: + msg = ( + "we expect encoder_padding_mask to be a 2-d tensor, in shape (T, B)" + + _current_postion_info() + ) + return False, msg + + if batch_size is not None and mask.size(1) != batch_size: + msg = ( + "we expect encoder_padding_mask to be a 2-d tensor, with size(1)" + + " being the batch size" + + _current_postion_info() + ) + return False, msg + return True, None + + +def check_decoder_output(decoder_output): + """we expect output from a decoder is a tuple with the following constraint: + - the first element is a torch.Tensor + - the second element can be anything (reserved for future use) + """ + if not isinstance(decoder_output, tuple): + msg = "FariseqDecoder output must be a tuple" + _current_postion_info() + return False, msg + + if len(decoder_output) != 2: + msg = "FairseqDecoder output must be 2-elem tuple" + _current_postion_info() + return False, msg + + if not isinstance(decoder_output[0], torch.Tensor): + msg = ( + "FariseqDecoder output[0] must be a torch.Tensor" + _current_postion_info() + ) + return False, msg + + return True, None + + +# /////////////////////////////////////////////////////////////////////////// +# Base Test class +# /////////////////////////////////////////////////////////////////////////// + + +class TestBaseFairseqModelBase(unittest.TestCase): + """ + This class is used to facilitate writing unittest for any class derived from + `BaseFairseqModel`. + """ + + @classmethod + def setUpClass(cls): + if cls is TestBaseFairseqModelBase: + raise unittest.SkipTest("Skipping test case in base") + super().setUpClass() + + def setUpModel(self, model): + self.assertTrue(isinstance(model, BaseFairseqModel)) + self.model = model + + def setupInput(self): + pass + + def setUp(self): + self.model = None + self.forward_input = None + pass + + +class TestFairseqEncoderDecoderModelBase(TestBaseFairseqModelBase): + """ + base code to test FairseqEncoderDecoderModel (formally known as + `FairseqModel`) must be derived from this base class + """ + + @classmethod + def setUpClass(cls): + if cls is TestFairseqEncoderDecoderModelBase: + raise unittest.SkipTest("Skipping test case in base") + super().setUpClass() + + def setUpModel(self, model_cls, extra_args_setters=None): + self.assertTrue( + issubclass(model_cls, (FairseqEncoderDecoderModel, FairseqModel)), + msg="This class only tests for FairseqModel subclasses", + ) + + task, parser = get_dummy_task_and_parser() + model_cls.add_args(parser) + + args = parser.parse_args([]) + + if extra_args_setters is not None: + for args_setter in extra_args_setters: + args_setter(args) + model = model_cls.build_model(args, task) + self.model = model + + def setUpInput(self, input=None): + self.forward_input = get_dummy_input() if input is None else input + + def setUp(self): + super().setUp() + + def test_forward(self): + if self.model and self.forward_input: + forward_output = self.model.forward(**self.forward_input) + # for FairseqEncoderDecoderModel, forward returns a tuple of two + # elements, the first one is a Torch.Tensor + succ, msg = check_decoder_output(forward_output) + if not succ: + self.assertTrue(succ, msg=msg) + self.forward_output = forward_output + + def test_get_normalized_probs(self): + if self.model and self.forward_input: + forward_output = self.model.forward(**self.forward_input) + logprob = self.model.get_normalized_probs(forward_output, log_probs=True) + prob = self.model.get_normalized_probs(forward_output, log_probs=False) + + # in order for different models/criterion to play with each other + # we need to know whether the logprob or prob output is batch_first + # or not. We assume an additional attribute will be attached to logprob + # or prob. If you find your code failed here, simply override + # FairseqModel.get_normalized_probs, see example at + # https://fburl.com/batch_first_example + self.assertTrue(hasattr(logprob, "batch_first")) + self.assertTrue(hasattr(prob, "batch_first")) + + self.assertTrue(torch.is_tensor(logprob)) + self.assertTrue(torch.is_tensor(prob)) + + +class TestFairseqEncoderModelBase(TestBaseFairseqModelBase): + """ + base class to test FairseqEncoderModel + """ + + @classmethod + def setUpClass(cls): + if cls is TestFairseqEncoderModelBase: + raise unittest.SkipTest("Skipping test case in base") + super().setUpClass() + + def setUpModel(self, model_cls, extra_args_setters=None): + self.assertTrue( + issubclass(model_cls, FairseqEncoderModel), + msg="This class is only used for testing FairseqEncoderModel", + ) + task, parser = get_dummy_task_and_parser() + model_cls.add_args(parser) + args = parser.parse_args([]) + if extra_args_setters is not None: + for args_setter in extra_args_setters: + args_setter(args) + + model = model_cls.build_model(args, task) + self.model = model + + def setUpInput(self, input=None): + self.forward_input = get_dummy_input() if input is None else input + # get_dummy_input() is originally for s2s, here we delete extra dict + # items, so it can be used for EncoderModel / Encoder as well + self.forward_input.pop("prev_output_tokens", None) + + def setUp(self): + super().setUp() + + def test_forward(self): + if self.forward_input and self.model: + bsz = self.forward_input["src_tokens"].size(0) + forward_output = self.model.forward(**self.forward_input) + + # we expect forward_output to be a dict with the following + # key/value pairs: + # - encoder_out: a Torch.Tensor + # - encoder_padding_mask: a binary Torch.Tensor + succ, msg = check_encoder_output(forward_output, batch_size=bsz) + if not succ: + self.assertTrue(succ, msg=msg) + self.forward_output = forward_output + + def test_get_normalized_probs(self): + if self.model and self.forward_input: + forward_output = self.model.forward(**self.forward_input) + logprob = self.model.get_normalized_probs(forward_output, log_probs=True) + prob = self.model.get_normalized_probs(forward_output, log_probs=False) + + # in order for different models/criterion to play with each other + # we need to know whether the logprob or prob output is batch_first + # or not. We assume an additional attribute will be attached to logprob + # or prob. If you find your code failed here, simply override + # FairseqModel.get_normalized_probs, see example at + # https://fburl.com/batch_first_example + self.assertTrue(hasattr(logprob, "batch_first")) + self.assertTrue(hasattr(prob, "batch_first")) + + self.assertTrue(torch.is_tensor(logprob)) + self.assertTrue(torch.is_tensor(prob)) + + +class TestFairseqEncoderBase(unittest.TestCase): + """ + base class to test FairseqEncoder + """ + + @classmethod + def setUpClass(cls): + if cls is TestFairseqEncoderBase: + raise unittest.SkipTest("Skipping test case in base") + super().setUpClass() + + def setUpEncoder(self, encoder): + self.assertTrue( + isinstance(encoder, FairseqEncoder), + msg="This class is only used for test FairseqEncoder", + ) + self.encoder = encoder + + def setUpInput(self, input=None): + self.forward_input = get_dummy_input() if input is None else input + # get_dummy_input() is originally for s2s, here we delete extra dict + # items, so it can be used for EncoderModel / Encoder as well + self.forward_input.pop("prev_output_tokens", None) + + def setUp(self): + self.encoder = None + self.forward_input = None + + def test_forward(self): + if self.encoder and self.forward_input: + bsz = self.forward_input["src_tokens"].size(0) + + forward_output = self.encoder.forward(**self.forward_input) + succ, msg = check_encoder_output(forward_output, batch_size=bsz) + if not succ: + self.assertTrue(succ, msg=msg) + self.forward_output = forward_output + + +class TestFairseqDecoderBase(unittest.TestCase): + """ + base class to test FairseqDecoder + """ + + @classmethod + def setUpClass(cls): + if cls is TestFairseqDecoderBase: + raise unittest.SkipTest("Skipping test case in base") + super().setUpClass() + + def setUpDecoder(self, decoder): + self.assertTrue( + isinstance(decoder, FairseqDecoder), + msg="This class is only used for test FairseqDecoder", + ) + self.decoder = decoder + + def setUpInput(self, input=None): + self.forward_input = get_dummy_encoder_output() if input is None else input + + def setUpPrevOutputTokens(self, tokens=None): + if tokens is None: + self.encoder_input = get_dummy_input() + self.prev_output_tokens = self.encoder_input["prev_output_tokens"] + else: + self.prev_output_tokens = tokens + + def setUp(self): + self.decoder = None + self.forward_input = None + self.prev_output_tokens = None + + def test_forward(self): + if ( + self.decoder is not None + and self.forward_input is not None + and self.prev_output_tokens is not None + ): + forward_output = self.decoder.forward( + prev_output_tokens=self.prev_output_tokens, + encoder_out=self.forward_input, + ) + succ, msg = check_decoder_output(forward_output) + if not succ: + self.assertTrue(succ, msg=msg) + self.forward_input = forward_output + + +class DummyEncoderModel(FairseqEncoderModel): + def __init__(self, encoder): + super().__init__(encoder) + + @classmethod + def build_model(cls, args, task): + return cls(DummyEncoder()) + + def get_logits(self, net_output): + # Inverse of sigmoid to use with BinaryCrossEntropyWithLogitsCriterion as + # F.binary_cross_entropy_with_logits combines sigmoid and CE + return torch.log( + torch.div(net_output["encoder_out"], 1 - net_output["encoder_out"]) + ) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + lprobs = super().get_normalized_probs(net_output, log_probs, sample=sample) + lprobs.batch_first = True + return lprobs + + +class DummyEncoder(FairseqEncoder): + def __init__(self): + super().__init__(None) + + def forward(self, src_tokens, src_lengths): + mask, max_len = lengths_to_encoder_padding_mask(src_lengths) + return {"encoder_out": src_tokens, "encoder_padding_mask": mask} + + +class CrossEntropyCriterionTestBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + if cls is CrossEntropyCriterionTestBase: + raise unittest.SkipTest("Skipping base class test case") + super().setUpClass() + + def setUpArgs(self): + args = argparse.Namespace() + args.sentence_avg = False + args.threshold = 0.1 # to use with BinaryCrossEntropyWithLogitsCriterion + return args + + def setUp(self): + args = self.setUpArgs() + self.model = DummyEncoderModel(encoder=DummyEncoder()) + self.criterion = self.criterion_cls.build_criterion(args, task=DummyTask(args)) + + def get_src_tokens(self, correct_prediction, aggregate): + """ + correct_prediction: True if the net_output (src_tokens) should + predict the correct target + aggregate: True if the criterion expects net_output (src_tokens) + aggregated across time axis + """ + predicted_idx = 0 if correct_prediction else 1 + if aggregate: + src_tokens = torch.zeros((2, 2), dtype=torch.float) + for b in range(2): + src_tokens[b][predicted_idx] = 1.0 + else: + src_tokens = torch.zeros((2, 10, 2), dtype=torch.float) + for b in range(2): + for t in range(10): + src_tokens[b][t][predicted_idx] = 1.0 + return src_tokens + + def get_target(self, soft_target): + if soft_target: + target = torch.zeros((2, 2), dtype=torch.float) + for b in range(2): + target[b][0] = 1.0 + else: + target = torch.zeros((2, 10), dtype=torch.long) + return target + + def get_test_sample(self, correct, soft_target, aggregate): + src_tokens = self.get_src_tokens(correct, aggregate) + target = self.get_target(soft_target) + L = src_tokens.size(1) + return { + "net_input": {"src_tokens": src_tokens, "src_lengths": torch.tensor([L])}, + "target": target, + "ntokens": src_tokens.size(0) * src_tokens.size(1), + } diff --git a/fairseq/tests/speech_recognition/test_collaters.py b/fairseq/tests/speech_recognition/test_collaters.py new file mode 100644 index 0000000..6a5029a --- /dev/null +++ b/fairseq/tests/speech_recognition/test_collaters.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import numpy as np +import torch +from examples.speech_recognition.data.collaters import Seq2SeqCollater + + +class TestSeq2SeqCollator(unittest.TestCase): + def test_collate(self): + + eos_idx = 1 + pad_idx = 0 + collater = Seq2SeqCollater( + feature_index=0, label_index=1, pad_index=pad_idx, eos_index=eos_idx + ) + + # 2 frames in the first sample and 3 frames in the second one + frames1 = np.array([[7, 8], [9, 10]]) + frames2 = np.array([[1, 2], [3, 4], [5, 6]]) + target1 = np.array([4, 2, 3, eos_idx]) + target2 = np.array([3, 2, eos_idx]) + sample1 = {"id": 0, "data": [frames1, target1]} + sample2 = {"id": 1, "data": [frames2, target2]} + batch = collater.collate([sample1, sample2]) + + # collate sort inputs by frame's length before creating the batch + self.assertTensorEqual(batch["id"], torch.tensor([1, 0])) + self.assertEqual(batch["ntokens"], 7) + self.assertTensorEqual( + batch["net_input"]["src_tokens"], + torch.tensor( + [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [pad_idx, pad_idx]]] + ), + ) + self.assertTensorEqual( + batch["net_input"]["prev_output_tokens"], + torch.tensor([[eos_idx, 3, 2, pad_idx], [eos_idx, 4, 2, 3]]), + ) + self.assertTensorEqual(batch["net_input"]["src_lengths"], torch.tensor([3, 2])) + self.assertTensorEqual( + batch["target"], + torch.tensor([[3, 2, eos_idx, pad_idx], [4, 2, 3, eos_idx]]), + ) + self.assertEqual(batch["nsentences"], 2) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/speech_recognition/test_cross_entropy.py b/fairseq/tests/speech_recognition/test_cross_entropy.py new file mode 100644 index 0000000..b05400e --- /dev/null +++ b/fairseq/tests/speech_recognition/test_cross_entropy.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from examples.speech_recognition.criterions.cross_entropy_acc import ( + CrossEntropyWithAccCriterion, +) + +from .asr_test_base import CrossEntropyCriterionTestBase + + +class CrossEntropyWithAccCriterionTest(CrossEntropyCriterionTestBase): + def setUp(self): + self.criterion_cls = CrossEntropyWithAccCriterion + super().setUp() + + def test_cross_entropy_all_correct(self): + sample = self.get_test_sample(correct=True, soft_target=False, aggregate=False) + loss, sample_size, logging_output = self.criterion( + self.model, sample, "sum", log_probs=True + ) + assert logging_output["correct"] == 20 + assert logging_output["total"] == 20 + assert logging_output["sample_size"] == 20 + assert logging_output["ntokens"] == 20 + + def test_cross_entropy_all_wrong(self): + sample = self.get_test_sample(correct=False, soft_target=False, aggregate=False) + loss, sample_size, logging_output = self.criterion( + self.model, sample, "sum", log_probs=True + ) + assert logging_output["correct"] == 0 + assert logging_output["total"] == 20 + assert logging_output["sample_size"] == 20 + assert logging_output["ntokens"] == 20 diff --git a/fairseq/tests/speech_recognition/test_data_utils.py b/fairseq/tests/speech_recognition/test_data_utils.py new file mode 100644 index 0000000..a72e0b6 --- /dev/null +++ b/fairseq/tests/speech_recognition/test_data_utils.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import unittest + +import torch +from examples.speech_recognition.data import data_utils + + +class DataUtilsTest(unittest.TestCase): + def test_normalization(self): + sample_len1 = torch.tensor( + [ + [ + -0.7661, + -1.3889, + -2.0972, + -0.9134, + -0.7071, + -0.9765, + -0.8700, + -0.8283, + 0.7512, + 1.3211, + 2.1532, + 2.1174, + 1.2800, + 1.2633, + 1.6147, + 1.6322, + 2.0723, + 3.1522, + 3.2852, + 2.2309, + 2.5569, + 2.2183, + 2.2862, + 1.5886, + 0.8773, + 0.8725, + 1.2662, + 0.9899, + 1.1069, + 1.3926, + 1.2795, + 1.1199, + 1.1477, + 1.2687, + 1.3843, + 1.1903, + 0.8355, + 1.1367, + 1.2639, + 1.4707, + ] + ] + ) + out = data_utils.apply_mv_norm(sample_len1) + assert not torch.isnan(out).any() + assert (out == sample_len1).all() diff --git a/fairseq/tests/speech_recognition/test_vggtransformer.py b/fairseq/tests/speech_recognition/test_vggtransformer.py new file mode 100644 index 0000000..4dc73b8 --- /dev/null +++ b/fairseq/tests/speech_recognition/test_vggtransformer.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +# import models/encoder/decoder to be tested +from examples.speech_recognition.models.vggtransformer import ( + TransformerDecoder, + VGGTransformerEncoder, + VGGTransformerModel, + vggtransformer_1, + vggtransformer_2, + vggtransformer_base, +) + +# import base test class +from .asr_test_base import ( + DEFAULT_TEST_VOCAB_SIZE, + TestFairseqDecoderBase, + TestFairseqEncoderBase, + TestFairseqEncoderDecoderModelBase, + get_dummy_dictionary, + get_dummy_encoder_output, + get_dummy_input, +) + + +class VGGTransformerModelTest_mid(TestFairseqEncoderDecoderModelBase): + def setUp(self): + def override_config(args): + """ + vggtrasformer_1 use 14 layers of transformer, + for testing purpose, it is too expensive. For fast turn-around + test, reduce the number of layers to 3. + """ + args.transformer_enc_config = ( + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 3" + ) + + super().setUp() + extra_args_setter = [vggtransformer_1, override_config] + + self.setUpModel(VGGTransformerModel, extra_args_setter) + self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE)) + + +class VGGTransformerModelTest_big(TestFairseqEncoderDecoderModelBase): + def setUp(self): + def override_config(args): + """ + vggtrasformer_2 use 16 layers of transformer, + for testing purpose, it is too expensive. For fast turn-around + test, reduce the number of layers to 3. + """ + args.transformer_enc_config = ( + "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 3" + ) + + super().setUp() + extra_args_setter = [vggtransformer_2, override_config] + + self.setUpModel(VGGTransformerModel, extra_args_setter) + self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE)) + + +class VGGTransformerModelTest_base(TestFairseqEncoderDecoderModelBase): + def setUp(self): + def override_config(args): + """ + vggtrasformer_base use 12 layers of transformer, + for testing purpose, it is too expensive. For fast turn-around + test, reduce the number of layers to 3. + """ + args.transformer_enc_config = ( + "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 3" + ) + + super().setUp() + extra_args_setter = [vggtransformer_base, override_config] + + self.setUpModel(VGGTransformerModel, extra_args_setter) + self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE)) + + +class VGGTransformerEncoderTest(TestFairseqEncoderBase): + def setUp(self): + super().setUp() + + self.setUpInput(get_dummy_input(T=50, D=80, B=5)) + + def test_forward(self): + print("1. test standard vggtransformer") + self.setUpEncoder(VGGTransformerEncoder(input_feat_per_channel=80)) + super().test_forward() + print("2. test vggtransformer with limited right context") + self.setUpEncoder( + VGGTransformerEncoder( + input_feat_per_channel=80, transformer_context=(-1, 5) + ) + ) + super().test_forward() + print("3. test vggtransformer with limited left context") + self.setUpEncoder( + VGGTransformerEncoder( + input_feat_per_channel=80, transformer_context=(5, -1) + ) + ) + super().test_forward() + print("4. test vggtransformer with limited right context and sampling") + self.setUpEncoder( + VGGTransformerEncoder( + input_feat_per_channel=80, + transformer_context=(-1, 12), + transformer_sampling=(2, 2), + ) + ) + super().test_forward() + print("5. test vggtransformer with windowed context and sampling") + self.setUpEncoder( + VGGTransformerEncoder( + input_feat_per_channel=80, + transformer_context=(12, 12), + transformer_sampling=(2, 2), + ) + ) + + +class TransformerDecoderTest(TestFairseqDecoderBase): + def setUp(self): + super().setUp() + + dict = get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE) + decoder = TransformerDecoder(dict) + dummy_encoder_output = get_dummy_encoder_output(encoder_out_shape=(50, 5, 256)) + + self.setUpDecoder(decoder) + self.setUpInput(dummy_encoder_output) + self.setUpPrevOutputTokens() diff --git a/fairseq/tests/tasks/test_denoising.py b/fairseq/tests/tasks/test_denoising.py new file mode 100644 index 0000000..5c22168 --- /dev/null +++ b/fairseq/tests/tasks/test_denoising.py @@ -0,0 +1,96 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import unittest +from tempfile import TemporaryDirectory + +from fairseq import options +from fairseq.binarizer import FileBinarizer, VocabularyDatasetBinarizer +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.tasks.denoising import DenoisingTask +from tests.utils import build_vocab, make_data + + +class TestDenoising(unittest.TestCase): + def test_denoising(self): + with TemporaryDirectory() as dirname: + + # prep input file + raw_file = os.path.join(dirname, "raw") + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + + # binarize + binarizer = VocabularyDatasetBinarizer(vocab, append_eos=False) + split = "train" + bin_file = os.path.join(dirname, split) + dataset_impl = "mmap" + FileBinarizer.multiprocess_dataset( + input_file=raw_file, + binarizer=binarizer, + dataset_impl=dataset_impl, + vocab_size=len(vocab), + output_prefix=bin_file, + ) + + # setup task + train_args = options.parse_args_and_arch( + options.get_training_parser(), + [ + "--task", + "denoising", + "--arch", + "bart_base", + "--seed", + "42", + "--mask-length", + "word", + "--permute-sentences", + "1", + "--rotate", + "0", + "--replace-length", + "-1", + "--mask", + "0.2", + dirname, + ], + ) + cfg = convert_namespace_to_omegaconf(train_args) + task = DenoisingTask(cfg.task, binarizer.dict) + + # load datasets + original_dataset = task._load_dataset_split(bin_file, 1, False) + task.load_dataset(split) + masked_dataset = task.dataset(split) + + iterator = task.get_batch_iterator( + dataset=masked_dataset, + max_tokens=65_536, + max_positions=4_096, + ).next_epoch_itr(shuffle=False) + mask_index = task.source_dictionary.index("<mask>") + for batch in iterator: + for sample in range(len(batch)): + net_input = batch["net_input"] + masked_src_tokens = net_input["src_tokens"][sample] + masked_src_length = net_input["src_lengths"][sample] + masked_tgt_tokens = batch["target"][sample] + + sample_id = batch["id"][sample] + original_tokens = original_dataset[sample_id] + original_tokens = original_tokens.masked_select( + masked_src_tokens[:masked_src_length] == mask_index + ) + masked_tokens = masked_tgt_tokens.masked_select( + masked_src_tokens == mask_index + ) + + assert masked_tokens.equal(original_tokens) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/tasks/test_masked_lm.py b/fairseq/tests/tasks/test_masked_lm.py new file mode 100644 index 0000000..215cd35 --- /dev/null +++ b/fairseq/tests/tasks/test_masked_lm.py @@ -0,0 +1,78 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import unittest +from tempfile import TemporaryDirectory + +from fairseq.binarizer import FileBinarizer, VocabularyDatasetBinarizer +from fairseq.tasks.masked_lm import MaskedLMConfig, MaskedLMTask +from tests.utils import build_vocab, make_data + + +class TestMaskedLM(unittest.TestCase): + def test_masks_tokens(self): + with TemporaryDirectory() as dirname: + + # prep input file + raw_file = os.path.join(dirname, "raw") + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + + # binarize + binarizer = VocabularyDatasetBinarizer(vocab, append_eos=False) + split = "train" + bin_file = os.path.join(dirname, split) + FileBinarizer.multiprocess_dataset( + input_file=raw_file, + binarizer=binarizer, + dataset_impl="mmap", + vocab_size=len(vocab), + output_prefix=bin_file, + ) + + # setup task + cfg = MaskedLMConfig( + data=dirname, + seed=42, + mask_prob=0.5, # increasing the odds of masking + random_token_prob=0, # avoiding random tokens for exact match + leave_unmasked_prob=0, # always masking for exact match + ) + task = MaskedLMTask(cfg, binarizer.dict) + + original_dataset = task._load_dataset_split(bin_file, 1, False) + + # load datasets + task.load_dataset(split) + masked_dataset = task.dataset(split) + + mask_index = task.source_dictionary.index("<mask>") + iterator = task.get_batch_iterator( + dataset=masked_dataset, + max_tokens=65_536, + max_positions=4_096, + ).next_epoch_itr(shuffle=False) + for batch in iterator: + for sample in range(len(batch)): + net_input = batch["net_input"] + masked_src_tokens = net_input["src_tokens"][sample] + masked_src_length = net_input["src_lengths"][sample] + masked_tgt_tokens = batch["target"][sample] + + sample_id = batch["id"][sample] + original_tokens = original_dataset[sample_id] + original_tokens = original_tokens.masked_select( + masked_src_tokens[:masked_src_length] == mask_index + ) + masked_tokens = masked_tgt_tokens.masked_select( + masked_tgt_tokens != task.source_dictionary.pad() + ) + + assert masked_tokens.equal(original_tokens) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/tasks/test_multilingual_denoising.py b/fairseq/tests/tasks/test_multilingual_denoising.py new file mode 100644 index 0000000..a0227f6 --- /dev/null +++ b/fairseq/tests/tasks/test_multilingual_denoising.py @@ -0,0 +1,98 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import unittest +from tempfile import TemporaryDirectory + +from fairseq import options +from fairseq.binarizer import FileBinarizer, VocabularyDatasetBinarizer +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.tasks.multilingual_denoising import MultilingualDenoisingTask +from tests.utils import build_vocab, make_data + + +class TestMultilingualDenoising(unittest.TestCase): + def test_multilingual_denoising(self): + with TemporaryDirectory() as dirname: + + # prep input file + lang_dir = os.path.join(dirname, "en") + os.mkdir(lang_dir) + raw_file = os.path.join(lang_dir, "raw") + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + + # binarize + binarizer = VocabularyDatasetBinarizer(vocab, append_eos=False) + split = "train" + bin_file = os.path.join(lang_dir, split) + dataset_impl = "mmap" + FileBinarizer.multiprocess_dataset( + input_file=raw_file, + binarizer=binarizer, + dataset_impl=dataset_impl, + vocab_size=len(vocab), + output_prefix=bin_file, + ) + + # setup task + train_args = options.parse_args_and_arch( + options.get_training_parser(), + [ + "--task", + "multilingual_denoising", + "--arch", + "bart_base", + "--seed", + "42", + "--mask-length", + "word", + "--permute-sentences", + "1", + "--rotate", + "0", + "--replace-length", + "-1", + "--mask", + "0.2", + dirname, + ], + ) + cfg = convert_namespace_to_omegaconf(train_args) + task = MultilingualDenoisingTask(cfg.task, binarizer.dict) + + # load datasets + original_dataset = task._load_dataset_split(bin_file, 1, False) + task.load_dataset(split) + masked_dataset = task.dataset(split) + + iterator = task.get_batch_iterator( + dataset=masked_dataset, + max_tokens=65_536, + max_positions=4_096, + ).next_epoch_itr(shuffle=False) + mask_index = task.source_dictionary.index("<mask>") + for batch in iterator: + for sample in range(len(batch)): + net_input = batch["net_input"] + masked_src_tokens = net_input["src_tokens"][sample] + masked_src_length = net_input["src_lengths"][sample] + masked_tgt_tokens = batch["target"][sample] + + sample_id = batch["id"][sample] + original_tokens = original_dataset[sample_id] + original_tokens = original_tokens.masked_select( + masked_src_tokens[:masked_src_length] == mask_index + ) + masked_tokens = masked_tgt_tokens.masked_select( + masked_src_tokens == mask_index + ) + + assert masked_tokens.equal(original_tokens) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/tasks/test_span_masked_lm.py b/fairseq/tests/tasks/test_span_masked_lm.py new file mode 100644 index 0000000..d289cf8 --- /dev/null +++ b/fairseq/tests/tasks/test_span_masked_lm.py @@ -0,0 +1,106 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import unittest +from tempfile import TemporaryDirectory + +from fairseq import options +from fairseq.binarizer import FileBinarizer, VocabularyDatasetBinarizer +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.tasks.span_masked_lm import SpanMaskedLMTask +from tests.utils import build_vocab, make_data + + +class TestSpanMaskedLM(unittest.TestCase): + def test_masks_token_spans(self): + with TemporaryDirectory() as dirname: + + # prep input file + raw_file = os.path.join(dirname, "raw") + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + + # binarize + binarizer = VocabularyDatasetBinarizer(vocab, append_eos=False) + split = "train" + bin_file = os.path.join(dirname, split) + dataset_impl = "mmap" + + FileBinarizer.multiprocess_dataset( + input_file=raw_file, + binarizer=binarizer, + dataset_impl=dataset_impl, + vocab_size=len(vocab), + output_prefix=bin_file, + ) + + # adding sentinel tokens + for i in range(100): + vocab.add_symbol(f"<extra_id_{i}>") + + # setup task + train_args = options.parse_args_and_arch( + options.get_training_parser(), + [ + "--task", + "span_masked_lm", + "--arch", + "bart_base", + "--seed", + "42", + dirname, + ], + ) + cfg = convert_namespace_to_omegaconf(train_args) + task = SpanMaskedLMTask(cfg.task, binarizer.dict) + + # load datasets + original_dataset = task._load_dataset_split(bin_file, 1, False) + task.load_dataset(split) + masked_dataset = task.dataset(split) + + iterator = task.get_batch_iterator( + dataset=masked_dataset, + max_tokens=65_536, + max_positions=4_096, + ).next_epoch_itr(shuffle=False) + num_tokens = len(vocab) + for batch in iterator: + for sample in range(len(batch)): + sample_id = batch["id"][sample] + original_tokens = original_dataset[sample_id] + masked_src_tokens = batch["net_input"]["src_tokens"][sample] + masked_src_length = batch["net_input"]["src_lengths"][sample] + masked_tgt_tokens = batch["target"][sample] + + original_offset = 0 + masked_tgt_offset = 0 + extra_id_token = len(vocab) - 1 + for masked_src_token in masked_src_tokens[:masked_src_length]: + if masked_src_token == extra_id_token: + assert ( + masked_src_token == masked_tgt_tokens[masked_tgt_offset] + ) + extra_id_token -= 1 + masked_tgt_offset += 1 + while ( + original_offset < len(original_tokens) + and masked_tgt_tokens[masked_tgt_offset] + != extra_id_token + ): + assert ( + original_tokens[original_offset] + == masked_tgt_tokens[masked_tgt_offset] + ) + original_offset += 1 + masked_tgt_offset += 1 + else: + assert original_tokens[original_offset] == masked_src_token + original_offset += 1 + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_activation_checkpointing.py b/fairseq/tests/test_activation_checkpointing.py new file mode 100644 index 0000000..647a957 --- /dev/null +++ b/fairseq/tests/test_activation_checkpointing.py @@ -0,0 +1,79 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +import torch.nn as nn +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from torch.utils.checkpoint import checkpoint + + +class Model(nn.Module): + def __init__( + self, use_pytorch_checkpoint=False, use_fairseq_checkpoint=False, **kwargs + ): + super().__init__() + torch.manual_seed(0) + self.use_pytorch_checkpoint = use_pytorch_checkpoint + self.ffn = nn.Sequential( + nn.Linear(32, 128), + # add a Dropout layer to test RNG save/restore + nn.Dropout(p=0.5), + nn.Linear(128, 32), + ) + if use_fairseq_checkpoint: + self.ffn = checkpoint_wrapper(self.ffn, **kwargs) + self.out = nn.Linear(32, 1) + + def forward(self, x): + if self.use_pytorch_checkpoint: + x = checkpoint(self.ffn, x) + else: + x = self.ffn(x) + return self.out(x) + + +class TestActivationCheckpointing(unittest.TestCase): + def _test_checkpoint_wrapper(self, device, log_memory_usage=False): + def get_loss_and_gnorm(model): + torch.manual_seed(1) + input = torch.rand(2, 16, 32).requires_grad_(True).to(device) + model.zero_grad() + loss = model(input).sum() + loss.backward() + gnorm = torch.norm( + torch.stack([torch.norm(p.grad.detach()) for p in model.parameters()]) + ) + return {"loss": loss, "gnorm": gnorm} + + model = Model().to(device) + no_cpt = get_loss_and_gnorm(model) + + model = Model(use_pytorch_checkpoint=True).to(device) + pyt_cpt = get_loss_and_gnorm(model) + torch.testing.assert_allclose(no_cpt["loss"], pyt_cpt["loss"]) + torch.testing.assert_allclose(no_cpt["gnorm"], pyt_cpt["gnorm"]) + + model = Model(use_fairseq_checkpoint=True).to(device) + fairseq_cpt = get_loss_and_gnorm(model) + torch.testing.assert_allclose(no_cpt["loss"], fairseq_cpt["loss"]) + torch.testing.assert_allclose(no_cpt["gnorm"], fairseq_cpt["gnorm"]) + + model = Model(use_fairseq_checkpoint=True, offload_to_cpu=True).to(device) + fairseq_cpt_offload = get_loss_and_gnorm(model) + torch.testing.assert_allclose(no_cpt["loss"], fairseq_cpt_offload["loss"]) + torch.testing.assert_allclose(no_cpt["gnorm"], fairseq_cpt_offload["gnorm"]) + + def test_checkpoint_wrapper_cpu(self): + self._test_checkpoint_wrapper(device=torch.device("cpu")) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_checkpoint_wrapper_cuda(self): + self._test_checkpoint_wrapper(device=torch.device("cuda")) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_amp_optimizer.py b/fairseq/tests/test_amp_optimizer.py new file mode 100644 index 0000000..4d6073a --- /dev/null +++ b/fairseq/tests/test_amp_optimizer.py @@ -0,0 +1,75 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import copy +import unittest + +import torch +from torch.cuda.amp import GradScaler, autocast + +from fairseq.optim import build_optimizer + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestGradientScalingAMP(unittest.TestCase): + def setUp(self): + self.x = torch.tensor([2.0]).cuda().half() + weight = 3.0 + bias = 5.0 + self.error = 1.0 + self.target = torch.tensor([self.x * weight + bias + self.error]).cuda() + self.loss_fn = torch.nn.L1Loss() + + self.model = torch.nn.Linear(1, 1) + self.model.weight.data = torch.tensor([[weight]]) + self.model.bias.data = torch.tensor([bias]) + self.model.cuda() + self.params = list(self.model.parameters()) + + self.namespace_dls = argparse.Namespace( + optimizer="adam", + lr=[0.1], + adam_betas="(0.9, 0.999)", + adam_eps=1e-8, + weight_decay=0.0, + threshold_loss_scale=1, + min_loss_scale=1e-4, + ) + self.scaler = GradScaler( + init_scale=1, + growth_interval=1, + ) + + def run_iter(self, model, params, optimizer): + optimizer.zero_grad() + with autocast(): + y = model(self.x) + loss = self.loss_fn(y, self.target) + self.scaler.scale(loss).backward() + self.assertEqual(loss, torch.tensor(1.0, device="cuda:0", dtype=torch.float16)) + + self.scaler.unscale_(optimizer) + grad_norm = optimizer.clip_grad_norm(0) + self.assertAlmostEqual(grad_norm.item(), 2.2361, 4) + + self.scaler.step(optimizer) + self.scaler.update() + self.assertEqual( + model.weight, + torch.tensor([[3.1]], device="cuda:0", requires_grad=True), + ) + self.assertEqual( + model.bias, + torch.tensor([5.1], device="cuda:0", requires_grad=True), + ) + self.assertEqual(self.scaler.get_scale(), 2.0) + + def test_automatic_mixed_precision(self): + model = copy.deepcopy(self.model) + params = list(model.parameters()) + optimizer = build_optimizer(self.namespace_dls, params) + + self.run_iter(model, params, optimizer) diff --git a/fairseq/tests/test_average_checkpoints.py b/fairseq/tests/test_average_checkpoints.py new file mode 100644 index 0000000..f348b56 --- /dev/null +++ b/fairseq/tests/test_average_checkpoints.py @@ -0,0 +1,134 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import collections +import os +import shutil +import tempfile +import unittest + +import numpy as np +import torch +from scripts.average_checkpoints import average_checkpoints +from torch import nn + + +class ModelWithSharedParameter(nn.Module): + def __init__(self): + super(ModelWithSharedParameter, self).__init__() + self.embedding = nn.Embedding(1000, 200) + self.FC1 = nn.Linear(200, 200) + self.FC2 = nn.Linear(200, 200) + # tie weight in FC2 to FC1 + self.FC2.weight = nn.Parameter(self.FC1.weight) + self.FC2.bias = nn.Parameter(self.FC1.bias) + + self.relu = nn.ReLU() + + def forward(self, input): + return self.FC2(self.ReLU(self.FC1(input))) + self.FC1(input) + + +class TestAverageCheckpoints(unittest.TestCase): + def test_average_checkpoints(self): + params_0 = collections.OrderedDict( + [ + ("a", torch.DoubleTensor([100.0])), + ("b", torch.FloatTensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])), + ("c", torch.IntTensor([7, 8, 9])), + ] + ) + params_1 = collections.OrderedDict( + [ + ("a", torch.DoubleTensor([1.0])), + ("b", torch.FloatTensor([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])), + ("c", torch.IntTensor([2, 2, 2])), + ] + ) + params_avg = collections.OrderedDict( + [ + ("a", torch.DoubleTensor([50.5])), + ("b", torch.FloatTensor([[1.0, 1.5, 2.0], [2.5, 3.0, 3.5]])), + # We expect truncation for integer division + ("c", torch.IntTensor([4, 5, 5])), + ] + ) + + fd_0, path_0 = tempfile.mkstemp() + fd_1, path_1 = tempfile.mkstemp() + torch.save(collections.OrderedDict([("model", params_0)]), path_0) + torch.save(collections.OrderedDict([("model", params_1)]), path_1) + + output = average_checkpoints([path_0, path_1])["model"] + + os.close(fd_0) + os.remove(path_0) + os.close(fd_1) + os.remove(path_1) + + for (k_expected, v_expected), (k_out, v_out) in zip( + params_avg.items(), output.items() + ): + self.assertEqual( + k_expected, + k_out, + "Key mismatch - expected {} but found {}. " + "(Expected list of keys: {} vs actual list of keys: {})".format( + k_expected, k_out, params_avg.keys(), output.keys() + ), + ) + np.testing.assert_allclose( + v_expected.numpy(), + v_out.numpy(), + err_msg="Tensor value mismatch for key {}".format(k_expected), + ) + + def test_average_checkpoints_with_shared_parameters(self): + def _construct_model_with_shared_parameters(path, value): + m = ModelWithSharedParameter() + nn.init.constant_(m.FC1.weight, value) + torch.save({"model": m.state_dict()}, path) + return m + + tmpdir = tempfile.mkdtemp() + paths = [] + path = os.path.join(tmpdir, "m1.pt") + m1 = _construct_model_with_shared_parameters(path, 1.0) + paths.append(path) + + path = os.path.join(tmpdir, "m2.pt") + m2 = _construct_model_with_shared_parameters(path, 2.0) + paths.append(path) + + path = os.path.join(tmpdir, "m3.pt") + m3 = _construct_model_with_shared_parameters(path, 3.0) + paths.append(path) + + new_model = average_checkpoints(paths) + self.assertTrue( + torch.equal( + new_model["model"]["embedding.weight"], + (m1.embedding.weight + m2.embedding.weight + m3.embedding.weight) / 3.0, + ) + ) + + self.assertTrue( + torch.equal( + new_model["model"]["FC1.weight"], + (m1.FC1.weight + m2.FC1.weight + m3.FC1.weight) / 3.0, + ) + ) + + self.assertTrue( + torch.equal( + new_model["model"]["FC2.weight"], + (m1.FC2.weight + m2.FC2.weight + m3.FC2.weight) / 3.0, + ) + ) + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_backtranslation_dataset.py b/fairseq/tests/test_backtranslation_dataset.py new file mode 100644 index 0000000..dffc3b4 --- /dev/null +++ b/fairseq/tests/test_backtranslation_dataset.py @@ -0,0 +1,123 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import tests.utils as test_utils +import torch +from fairseq.data import ( + BacktranslationDataset, + LanguagePairDataset, + TransformEosDataset, +) +from fairseq.sequence_generator import SequenceGenerator + + +class TestBacktranslationDataset(unittest.TestCase): + def setUp(self): + ( + self.tgt_dict, + self.w1, + self.w2, + self.src_tokens, + self.src_lengths, + self.model, + ) = test_utils.sequence_generator_setup() + + dummy_src_samples = self.src_tokens + + self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples) + self.cuda = torch.cuda.is_available() + + def _backtranslation_dataset_helper( + self, + remove_eos_from_input_src, + remove_eos_from_output_src, + ): + tgt_dataset = LanguagePairDataset( + src=self.tgt_dataset, + src_sizes=self.tgt_dataset.sizes, + src_dict=self.tgt_dict, + tgt=None, + tgt_sizes=None, + tgt_dict=None, + ) + + generator = SequenceGenerator( + [self.model], + tgt_dict=self.tgt_dict, + max_len_a=0, + max_len_b=200, + beam_size=2, + unk_penalty=0, + ) + + backtranslation_dataset = BacktranslationDataset( + tgt_dataset=TransformEosDataset( + dataset=tgt_dataset, + eos=self.tgt_dict.eos(), + # remove eos from the input src + remove_eos_from_src=remove_eos_from_input_src, + ), + src_dict=self.tgt_dict, + backtranslation_fn=( + lambda sample: generator.generate([self.model], sample) + ), + output_collater=TransformEosDataset( + dataset=tgt_dataset, + eos=self.tgt_dict.eos(), + # if we remove eos from the input src, then we need to add it + # back to the output tgt + append_eos_to_tgt=remove_eos_from_input_src, + remove_eos_from_src=remove_eos_from_output_src, + ).collater, + cuda=self.cuda, + ) + dataloader = torch.utils.data.DataLoader( + backtranslation_dataset, + batch_size=2, + collate_fn=backtranslation_dataset.collater, + ) + backtranslation_batch_result = next(iter(dataloader)) + + eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2 + + # Note that we sort by src_lengths and add left padding, so actually + # ids will look like: [1, 0] + expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]]) + if remove_eos_from_output_src: + expected_src = expected_src[:, :-1] + expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]]) + generated_src = backtranslation_batch_result["net_input"]["src_tokens"] + tgt_tokens = backtranslation_batch_result["target"] + + self.assertTensorEqual(expected_src, generated_src) + self.assertTensorEqual(expected_tgt, tgt_tokens) + + def test_backtranslation_dataset_no_eos_in_output_src(self): + self._backtranslation_dataset_helper( + remove_eos_from_input_src=False, + remove_eos_from_output_src=True, + ) + + def test_backtranslation_dataset_with_eos_in_output_src(self): + self._backtranslation_dataset_helper( + remove_eos_from_input_src=False, + remove_eos_from_output_src=False, + ) + + def test_backtranslation_dataset_no_eos_in_input_src(self): + self._backtranslation_dataset_helper( + remove_eos_from_input_src=True, + remove_eos_from_output_src=False, + ) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_binaries.py b/fairseq/tests/test_binaries.py new file mode 100644 index 0000000..41d9210 --- /dev/null +++ b/fairseq/tests/test_binaries.py @@ -0,0 +1,1915 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import json +import logging +import os +import random +import sys +import tempfile +import unittest +from packaging import version +from io import StringIO +from typing import Dict, List + +import torch + +from fairseq import options +from fairseq_cli import eval_lm, train +from tests.utils import ( + create_dummy_data, + create_laser_data_and_config_json, + generate_main, + preprocess_lm_data, + preprocess_summarization_data, + preprocess_translation_data, + train_language_model, + train_translation_model, +) + +try: + import transformers # noqa + + has_hf_transformers = True +except ImportError: + has_hf_transformers = False + + +class TestTranslation(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_fconv(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fconv") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model(data_dir, "fconv_iwslt_de_en") + generate_main(data_dir) + + def test_raw(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fconv_raw") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--dataset-impl", "raw"]) + train_translation_model( + data_dir, "fconv_iwslt_de_en", ["--dataset-impl", "raw"] + ) + generate_main(data_dir, ["--dataset-impl", "raw"]) + + def test_update_freq(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_update_freq") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, "fconv_iwslt_de_en", ["--update-freq", "3"] + ) + generate_main(data_dir) + + def test_max_positions(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_max_positions") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + with self.assertRaises(Exception) as context: + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + ["--max-target-positions", "5"], + ) + self.assertTrue( + "skip this example with --skip-invalid-size-inputs-valid-test" + in str(context.exception) + ) + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + [ + "--max-target-positions", + "5", + "--skip-invalid-size-inputs-valid-test", + ], + ) + with self.assertRaises(Exception) as context: + generate_main(data_dir) + generate_main(data_dir, ["--skip-invalid-size-inputs-valid-test"]) + + def test_generation(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_sampling") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model(data_dir, "fconv_iwslt_de_en") + generate_main( + data_dir, + [ + "--sampling", + "--temperature", + "2", + "--beam", + "2", + "--nbest", + "2", + ], + ) + generate_main( + data_dir, + [ + "--sampling", + "--sampling-topk", + "3", + "--beam", + "2", + "--nbest", + "2", + ], + ) + generate_main( + data_dir, + [ + "--sampling", + "--sampling-topp", + "0.2", + "--beam", + "2", + "--nbest", + "2", + ], + ) + generate_main( + data_dir, + [ + "--diversity-rate", + "0.5", + "--beam", + "6", + ], + ) + with self.assertRaises(ValueError): + generate_main( + data_dir, + [ + "--diverse-beam-groups", + "4", + "--match-source-len", + ], + ) + generate_main(data_dir, ["--prefix-size", "2"]) + generate_main(data_dir, ["--retain-dropout"]) + + def test_eval_bleu(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_eval_bleu") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "fconv_iwslt_de_en", + [ + "--eval-bleu", + "--eval-bleu-print-samples", + "--eval-bleu-remove-bpe", + "--eval-bleu-detok", + "space", + "--eval-bleu-args", + '{"beam": 4, "min_len": 10}', + ], + ) + + def test_lstm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lstm") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "lstm_wiseman_iwslt_de_en", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--decoder-out-embed-dim", + "8", + ], + ) + generate_main(data_dir) + + def test_lstm_bidirectional(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lstm_bidirectional") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "lstm", + [ + "--encoder-layers", + "2", + "--encoder-bidirectional", + "--encoder-hidden-size", + "16", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--decoder-out-embed-dim", + "8", + "--decoder-layers", + "2", + ], + ) + generate_main(data_dir) + + def test_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + ], + run_validation=True, + ) + generate_main(data_dir) + + def test_multilingual_transformer(self): + # test with all combinations of encoder/decoder lang tokens + encoder_langtok_flags = [ + [], + ["--encoder-langtok", "src"], + ["--encoder-langtok", "tgt"], + ] + decoder_langtok_flags = [[], ["--decoder-langtok"]] + with contextlib.redirect_stdout(StringIO()): + for i in range(len(encoder_langtok_flags)): + for j in range(len(decoder_langtok_flags)): + enc_ltok_flag = encoder_langtok_flags[i] + dec_ltok_flag = decoder_langtok_flags[j] + with tempfile.TemporaryDirectory( + f"test_multilingual_transformer_{i}_{j}" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + arch="multilingual_transformer", + task="multilingual_translation", + extra_flags=[ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + ] + + enc_ltok_flag + + dec_ltok_flag, + lang_flags=["--lang-pairs", "in-out,out-in"], + run_validation=True, + extra_valid_flags=enc_ltok_flag + dec_ltok_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--task", + "multilingual_translation", + "--lang-pairs", + "in-out,out-in", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ltok_flag + + dec_ltok_flag, + ) + + @unittest.skipIf( + sys.platform.lower() == "darwin", "skip latent depth test on MacOS" + ) + def test_multilingual_translation_latent_depth(self): + # test with latent depth in encoder, decoder, or both + encoder_latent_layer = [[], ["--encoder-latent-layer"]] + decoder_latent_layer = [[], ["--decoder-latent-layer"]] + with contextlib.redirect_stdout(StringIO()): + for i in range(len(encoder_latent_layer)): + for j in range(len(decoder_latent_layer)): + if i == 0 and j == 0: + continue + enc_ll_flag = encoder_latent_layer[i] + dec_ll_flag = decoder_latent_layer[j] + with tempfile.TemporaryDirectory( + f"test_multilingual_translation_latent_depth_{i}_{j}" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data( + data_dir, extra_flags=["--joined-dictionary"] + ) + train_translation_model( + data_dir, + arch="latent_multilingual_transformer", + task="multilingual_translation_latent_depth", + extra_flags=[ + "--user-dir", + "examples/latent_depth/latent_depth_src", + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--share-encoders", + "--share-decoders", + "--sparsity-weight", + "0.1", + ] + + enc_ll_flag + + dec_ll_flag, + lang_flags=["--lang-pairs", "in-out,out-in"], + run_validation=True, + extra_valid_flags=[ + "--user-dir", + "examples/latent_depth/latent_depth_src", + ] + + enc_ll_flag + + dec_ll_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--user-dir", + "examples/latent_depth/latent_depth_src", + "--task", + "multilingual_translation_latent_depth", + "--lang-pairs", + "in-out,out-in", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ll_flag + + dec_ll_flag, + ) + + def test_translation_multi_simple_epoch(self): + # test with all combinations of encoder/decoder lang tokens + encoder_langtok_flags = [ + [], + ["--encoder-langtok", "src"], + ["--encoder-langtok", "tgt"], + ] + decoder_langtok_flags = [[], ["--decoder-langtok"]] + with contextlib.redirect_stdout(StringIO()): + for i in range(len(encoder_langtok_flags)): + for j in range(len(decoder_langtok_flags)): + enc_ltok_flag = encoder_langtok_flags[i] + dec_ltok_flag = decoder_langtok_flags[j] + with tempfile.TemporaryDirectory( + f"test_translation_multi_simple_epoch_{i}_{j}" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data( + data_dir, extra_flags=["--joined-dictionary"] + ) + train_translation_model( + data_dir, + arch="transformer", + task="translation_multi_simple_epoch", + extra_flags=[ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--sampling-method", + "temperature", + "--sampling-temperature", + "1.5", + "--virtual-epoch-size", + "1000", + ] + + enc_ltok_flag + + dec_ltok_flag, + lang_flags=["--lang-pairs", "in-out,out-in"], + run_validation=True, + extra_valid_flags=enc_ltok_flag + dec_ltok_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--task", + "translation_multi_simple_epoch", + "--lang-pairs", + "in-out,out-in", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ltok_flag + + dec_ltok_flag, + ) + + def test_translation_multi_simple_epoch_no_vepoch(self): + # test with all combinations of encoder/decoder lang tokens + with contextlib.redirect_stdout(StringIO()): + enc_ltok_flag = ["--encoder-langtok", "src"] + dec_ltok_flag = ["--decoder-langtok"] + with tempfile.TemporaryDirectory( + "test_translation_multi_simple_epoch_dict" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, extra_flags=[]) + train_translation_model( + data_dir, + arch="transformer", + task="translation_multi_simple_epoch", + extra_flags=[ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--sampling-method", + "temperature", + "--sampling-temperature", + "1.5", + ] + + enc_ltok_flag + + dec_ltok_flag, + lang_flags=["--lang-pairs", "in-out"], + run_validation=True, + extra_valid_flags=enc_ltok_flag + dec_ltok_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--task", + "translation_multi_simple_epoch", + "--lang-pairs", + "in-out", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ltok_flag + + dec_ltok_flag, + ) + + def test_translation_multi_simple_epoch_dicts(self): + # test with all combinations of encoder/decoder lang tokens + with contextlib.redirect_stdout(StringIO()): + enc_ltok_flag = ["--encoder-langtok", "src"] + dec_ltok_flag = ["--decoder-langtok"] + with tempfile.TemporaryDirectory( + "test_translation_multi_simple_epoch_dict" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, extra_flags=[]) + train_translation_model( + data_dir, + arch="transformer", + task="translation_multi_simple_epoch", + extra_flags=[ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--sampling-method", + "temperature", + "--sampling-temperature", + "1.5", + "--virtual-epoch-size", + "1000", + ] + + enc_ltok_flag + + dec_ltok_flag, + lang_flags=["--lang-pairs", "in-out"], + run_validation=True, + extra_valid_flags=enc_ltok_flag + dec_ltok_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--task", + "translation_multi_simple_epoch", + "--lang-pairs", + "in-out", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ltok_flag + + dec_ltok_flag, + ) + + def test_translation_multi_simple_epoch_src_tgt_dict_spec(self): + # test the specification of explicit --src-dict and --tgt-dict + with contextlib.redirect_stdout(StringIO()): + enc_ltok_flag = ["--encoder-langtok", "src"] + dec_ltok_flag = ["--decoder-langtok"] + with tempfile.TemporaryDirectory( + "test_translation_multi_simple_epoch_dict" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, extra_flags=[]) + train_translation_model( + data_dir, + arch="transformer", + task="translation_multi_simple_epoch", + extra_flags=[ + "--source-dict", + f"{data_dir}/dict.in.txt", + "--target-dict", + f"{data_dir}/dict.out.txt", + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--sampling-method", + "temperature", + "--sampling-temperature", + "1.5", + "--virtual-epoch-size", + "1000", + ] + + enc_ltok_flag + + dec_ltok_flag, + lang_flags=["--lang-pairs", "in-out"], + run_validation=True, + extra_valid_flags=enc_ltok_flag + dec_ltok_flag, + ) + generate_main( + data_dir, + extra_flags=[ + "--task", + "translation_multi_simple_epoch", + "--lang-pairs", + "in-out", + "--source-lang", + "in", + "--target-lang", + "out", + ] + + enc_ltok_flag + + dec_ltok_flag, + ) + + def test_transformer_cross_self_attention(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_transformer_cross_self_attention" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--no-cross-attention", + "--cross-self-attention", + ], + run_validation=True, + ) + generate_main(data_dir, extra_flags=[]) + + @unittest.skipIf( + version.parse(torch.__version__) > version.parse("1.8"), + "skip for latest torch versions", + ) + def test_transformer_pointer_generator(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_transformer_pointer_generator" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_summarization_data(data_dir) + train_translation_model( + data_dir, + "transformer_pointer_generator", + extra_flags=[ + "--user-dir", + "examples/pointer_generator/pointer_generator_src", + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--alignment-layer", + "-1", + "--alignment-heads", + "1", + "--source-position-markers", + "0", + ], + run_validation=True, + extra_valid_flags=[ + "--user-dir", + "examples/pointer_generator/pointer_generator_src", + ], + ) + generate_main( + data_dir, + extra_flags=[ + "--user-dir", + "examples/pointer_generator/pointer_generator_src", + ], + ) + + def test_lightconv(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lightconv") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "lightconv_iwslt_de_en", + [ + "--encoder-conv-type", + "lightweight", + "--decoder-conv-type", + "lightweight", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + ], + ) + generate_main(data_dir) + + def test_dynamicconv(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_dynamicconv") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "lightconv_iwslt_de_en", + [ + "--encoder-conv-type", + "dynamic", + "--decoder-conv-type", + "dynamic", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + ], + ) + generate_main(data_dir) + + def test_cmlm_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_cmlm_transformer") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--joined-dictionary"]) + train_translation_model( + data_dir, + "cmlm_transformer", + [ + "--apply-bert-init", + "--criterion", + "nat_loss", + "--noise", + "full_mask", + "--pred-length-offset", + "--length-loss-factor", + "0.1", + ], + task="translation_lev", + ) + generate_main( + data_dir, + [ + "--task", + "translation_lev", + "--iter-decode-max-iter", + "9", + "--iter-decode-eos-penalty", + "0", + "--print-step", + ], + ) + + def test_nonautoregressive_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_nonautoregressive_transformer" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--joined-dictionary"]) + train_translation_model( + data_dir, + "nonautoregressive_transformer", + [ + "--apply-bert-init", + "--src-embedding-copy", + "--criterion", + "nat_loss", + "--noise", + "full_mask", + "--pred-length-offset", + "--length-loss-factor", + "0.1", + ], + task="translation_lev", + ) + generate_main( + data_dir, + [ + "--task", + "translation_lev", + "--iter-decode-max-iter", + "0", + "--iter-decode-eos-penalty", + "0", + "--print-step", + ], + ) + + # def test_nat_crf_transformer(self): + # with contextlib.redirect_stdout(StringIO()): + # with tempfile.TemporaryDirectory('test_nat_crf_transformer') as data_dir: + # create_dummy_data(data_dir) + # preprocess_translation_data(data_dir, ['--joined-dictionary']) + # train_translation_model(data_dir, 'nacrf_transformer', [ + # '--apply-bert-init', '--criterion', + # 'nat_loss', '--noise', 'full_mask', '--pred-length-offset', + # '--length-loss-factor', '0.1', + # '--word-ins-loss-factor', '0.5', + # '--crf-lowrank-approx', '1', + # '--crf-beam-approx', '1' + # ], task='translation_lev') + # generate_main(data_dir, [ + # '--task', 'translation_lev', + # '--iter-decode-max-iter', '0', + # '--iter-decode-eos-penalty', '0', + # '--print-step', + # ]) + + def test_iterative_nonautoregressive_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_iterative_nonautoregressive_transformer" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--joined-dictionary"]) + train_translation_model( + data_dir, + "iterative_nonautoregressive_transformer", + [ + "--apply-bert-init", + "--src-embedding-copy", + "--criterion", + "nat_loss", + "--noise", + "full_mask", + "--stochastic-approx", + "--dae-ratio", + "0.5", + "--train-step", + "3", + ], + task="translation_lev", + ) + generate_main( + data_dir, + [ + "--task", + "translation_lev", + "--iter-decode-max-iter", + "9", + "--iter-decode-eos-penalty", + "0", + "--print-step", + ], + ) + + def test_insertion_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_insertion_transformer") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir, ["--joined-dictionary"]) + train_translation_model( + data_dir, + "insertion_transformer", + [ + "--apply-bert-init", + "--criterion", + "nat_loss", + "--noise", + "random_mask", + ], + task="translation_lev", + ) + generate_main( + data_dir, + [ + "--task", + "translation_lev", + "--iter-decode-max-iter", + "9", + "--iter-decode-eos-penalty", + "0", + "--print-step", + ], + ) + + def test_mixture_of_experts(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_moe") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--task", + "translation_moe", + "--user-dir", + "examples/translation_moe/translation_moe_src", + "--method", + "hMoElp", + "--mean-pool-gating-network", + "--num-experts", + "3", + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + ], + ) + generate_main( + data_dir, + [ + "--task", + "translation_moe", + "--user-dir", + "examples/translation_moe/translation_moe_src", + "--method", + "hMoElp", + "--mean-pool-gating-network", + "--num-experts", + "3", + "--gen-expert", + "0", + ], + ) + + def test_alignment(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_alignment") as data_dir: + create_dummy_data(data_dir, alignment=True) + preprocess_translation_data(data_dir, ["--align-suffix", "align"]) + train_translation_model( + data_dir, + "transformer_align", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--load-alignments", + "--alignment-layer", + "1", + "--criterion", + "label_smoothed_cross_entropy_with_alignment", + ], + run_validation=True, + ) + generate_main(data_dir) + + def test_laser_lstm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_laser_lstm") as data_dir: + laser_config_file = create_laser_data_and_config_json(data_dir) + train_translation_model( + laser_config_file.name, + "laser_lstm", + [ + "--user-dir", + "examples/laser/laser_src", + "--weighting-alpha", + "0.3", + "--encoder-bidirectional", + "--encoder-hidden-size", + "512", + "--encoder-layers", + "5", + "--decoder-layers", + "1", + "--encoder-embed-dim", + "320", + "--decoder-embed-dim", + "320", + "--decoder-lang-embed-dim", + "32", + "--save-dir", + data_dir, + "--disable-validation", + ], + task="laser", + lang_flags=[], + ) + + def test_laser_transformer(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_laser_transformer") as data_dir: + laser_config_file = create_laser_data_and_config_json(data_dir) + train_translation_model( + laser_config_file.name, + "laser_transformer", + [ + "--user-dir", + "examples/laser/laser_src", + "--weighting-alpha", + "0.3", + "--encoder-embed-dim", + "320", + "--decoder-embed-dim", + "320", + "--decoder-lang-embed-dim", + "32", + "--save-dir", + data_dir, + "--disable-validation", + ], + task="laser", + lang_flags=[], + ) + + def test_alignment_full_context(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_alignment") as data_dir: + create_dummy_data(data_dir, alignment=True) + preprocess_translation_data(data_dir, ["--align-suffix", "align"]) + train_translation_model( + data_dir, + "transformer_align", + [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--load-alignments", + "--alignment-layer", + "1", + "--criterion", + "label_smoothed_cross_entropy_with_alignment", + "--full-context-alignment", + ], + run_validation=True, + ) + generate_main(data_dir) + + def test_transformer_layerdrop(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer_layerdrop") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "3", + "--decoder-layers", + "3", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--encoder-layerdrop", + "0.01", + "--decoder-layerdrop", + "0.01", + ], + ) + generate_main(data_dir) + generate_main( + data_dir, + [ + "--model-overrides", + "{'encoder_layers_to_keep':'0,2','decoder_layers_to_keep':'1'}", + ], + ) + + +class TestStories(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_fconv_self_att_wp(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fconv_self_att_wp") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + config = [ + "--encoder-layers", + "[(128, 3)] * 2", + "--decoder-layers", + "[(128, 3)] * 2", + "--decoder-attention", + "True", + "--encoder-attention", + "False", + "--gated-attention", + "True", + "--self-attention", + "True", + "--project-input", + "True", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--decoder-out-embed-dim", + "8", + "--multihead-self-attention-nheads", + "2", + ] + train_translation_model(data_dir, "fconv_self_att_wp", config) + generate_main(data_dir) + + # fusion model + os.rename( + os.path.join(data_dir, "checkpoint_last.pt"), + os.path.join(data_dir, "pretrained.pt"), + ) + config.extend( + [ + "--pretrained", + "True", + "--pretrained-checkpoint", + os.path.join(data_dir, "pretrained.pt"), + "--save-dir", + os.path.join(data_dir, "fusion_model"), + ] + ) + train_translation_model(data_dir, "fconv_self_att_wp", config) + + +class TestLanguageModeling(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_fconv_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_fconv_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "fconv_lm", + [ + "--decoder-layers", + "[(850, 3)] * 2 + [(1024,4)]", + "--decoder-embed-dim", + "280", + "--optimizer", + "nag", + "--lr", + "0.1", + ], + ) + eval_lm_main(data_dir) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_transformer_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "transformer_lm", + ["--add-bos-token", "--nval", "1"], + run_validation=True, + ) + eval_lm_main(data_dir) + eval_lm_main(data_dir, extra_flags=["--context-window", "25"]) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_normformer_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "transformer_lm", + [ + "--add-bos-token", + "--nval", + "1", + "--scale-fc", + "--scale-heads", + "--scale-attn", + "--scale-fc", + ], + run_validation=True, + ) + eval_lm_main(data_dir) + eval_lm_main(data_dir, extra_flags=["--context-window", "25"]) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_transformer_lm_with_adaptive_softmax(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_transformer_lm_with_adaptive_softmax" + ) as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "transformer_lm", + [ + "--add-bos-token", + "--criterion", + "adaptive_loss", + "--adaptive-softmax-cutoff", + "5,10,15", + ], + run_validation=True, + ) + eval_lm_main(data_dir) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_lightconv_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lightconv_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "lightconv_lm", + ["--add-bos-token"], + run_validation=True, + ) + eval_lm_main(data_dir) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_lstm_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lstm_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "lstm_lm", + ["--add-bos-token"], + run_validation=True, + ) + eval_lm_main(data_dir) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + def test_lstm_lm_residuals(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_lstm_lm_residuals") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "lstm_lm", + ["--add-bos-token", "--residuals"], + run_validation=True, + ) + eval_lm_main(data_dir) + generate_main( + data_dir, + [ + "--task", + "language_modeling", + "--sample-break-mode", + "eos", + "--tokens-per-sample", + "500", + ], + ) + + @unittest.skipIf(not has_hf_transformers, "skip test if transformers is missing") + def test_transformer_xl_bptt_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer_xl_bptt_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + task_flags = [ + "--user-dir", + "examples/truncated_bptt", + "--task", + "truncated_bptt_lm", + "--batch-size", + "2", + "--tokens-per-sample", + "50", + ] + train_language_model( + data_dir=data_dir, + arch="transformer_xl", + extra_flags=task_flags + + [ + "--n-layer", + "2", + ], + task="truncated_bptt_lm", + run_validation=True, + extra_valid_flags=task_flags, + ) + eval_lm_main(data_dir, extra_flags=task_flags) + # Train with activation offloading + train_language_model( + data_dir=data_dir, + arch="transformer_xl", + extra_flags=task_flags + + [ + "--n-layer", + "2", + "--offload-activations", + ], + task="truncated_bptt_lm", + run_validation=True, + extra_valid_flags=task_flags, + ) + + +class TestMaskedLanguageModel(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_legacy_masked_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_legacy_masked_language_model(data_dir, "masked_lm") + + def test_roberta_masked_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_masked_lm( + data_dir, "roberta_base", extra_flags=["--encoder-layers", "2"] + ) + + def test_roberta_sentence_prediction(self): + num_classes = 3 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_roberta_head") as data_dir: + create_dummy_roberta_head_data(data_dir, num_classes=num_classes) + preprocess_lm_data(os.path.join(data_dir, "input0")) + preprocess_lm_data(os.path.join(data_dir, "label")) + train_roberta_head(data_dir, "roberta_base", num_classes=num_classes) + + def test_roberta_regression_single(self): + num_classes = 1 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_roberta_regression_single" + ) as data_dir: + create_dummy_roberta_head_data( + data_dir, num_classes=num_classes, regression=True + ) + preprocess_lm_data(os.path.join(data_dir, "input0")) + train_roberta_head( + data_dir, + "roberta_base", + num_classes=num_classes, + extra_flags=["--regression-target"], + ) + + def test_roberta_regression_multiple(self): + num_classes = 3 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_roberta_regression_multiple" + ) as data_dir: + create_dummy_roberta_head_data( + data_dir, num_classes=num_classes, regression=True + ) + preprocess_lm_data(os.path.join(data_dir, "input0")) + train_roberta_head( + data_dir, + "roberta_base", + num_classes=num_classes, + extra_flags=["--regression-target"], + ) + + def test_linformer_roberta_masked_lm(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_linformer_roberta_mlm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_masked_lm( + data_dir, + "linformer_roberta_base", + extra_flags=[ + "--user-dir", + "examples/linformer/linformer_src", + "--encoder-layers", + "2", + ], + ) + + def test_linformer_roberta_sentence_prediction(self): + num_classes = 3 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_linformer_roberta_head") as data_dir: + create_dummy_roberta_head_data(data_dir, num_classes=num_classes) + preprocess_lm_data(os.path.join(data_dir, "input0")) + preprocess_lm_data(os.path.join(data_dir, "label")) + train_roberta_head( + data_dir, + "linformer_roberta_base", + num_classes=num_classes, + extra_flags=["--user-dir", "examples/linformer/linformer_src"], + ) + + def test_linformer_roberta_regression_single(self): + num_classes = 1 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_linformer_roberta_regression_single" + ) as data_dir: + create_dummy_roberta_head_data( + data_dir, num_classes=num_classes, regression=True + ) + preprocess_lm_data(os.path.join(data_dir, "input0")) + train_roberta_head( + data_dir, + "linformer_roberta_base", + num_classes=num_classes, + extra_flags=[ + "--regression-target", + "--user-dir", + "examples/linformer/linformer_src", + ], + ) + + def test_linformer_roberta_regression_multiple(self): + num_classes = 3 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory( + "test_linformer_roberta_regression_multiple" + ) as data_dir: + create_dummy_roberta_head_data( + data_dir, num_classes=num_classes, regression=True + ) + preprocess_lm_data(os.path.join(data_dir, "input0")) + train_roberta_head( + data_dir, + "linformer_roberta_base", + num_classes=num_classes, + extra_flags=[ + "--regression-target", + "--user-dir", + "examples/linformer/linformer_src", + ], + ) + + def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb, encoder_only): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_mlm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_legacy_masked_language_model( + data_dir, + arch="masked_lm", + extra_args=("--encoder-learned-pos",) if learned_pos_emb else (), + ) + with tempfile.TemporaryDirectory( + "test_mlm_translation" + ) as translation_dir: + create_dummy_data(translation_dir) + preprocess_translation_data( + translation_dir, extra_flags=["--joined-dictionary"] + ) + # Train transformer with data_dir/checkpoint_last.pt + train_translation_model( + translation_dir, + arch="transformer_from_pretrained_xlm", + extra_flags=[ + "--decoder-layers", + "1", + "--decoder-embed-dim", + "32", + "--decoder-attention-heads", + "1", + "--decoder-ffn-embed-dim", + "32", + "--encoder-layers", + "1", + "--encoder-embed-dim", + "32", + "--encoder-attention-heads", + "1", + "--encoder-ffn-embed-dim", + "32", + "--pretrained-xlm-checkpoint", + "{}/checkpoint_last.pt".format(data_dir), + "--activation-fn", + "gelu", + "--max-source-positions", + "500", + "--max-target-positions", + "500", + ] + + ( + ["--encoder-learned-pos", "--decoder-learned-pos"] + if learned_pos_emb + else [] + ) + + (["--init-encoder-only"] if encoder_only else []), + task="translation_from_pretrained_xlm", + ) + + def test_pretrained_masked_lm_for_translation_learned_pos_emb(self): + self._test_pretrained_masked_lm_for_translation(True, False) + + def test_pretrained_masked_lm_for_translation_sinusoidal_pos_emb(self): + self._test_pretrained_masked_lm_for_translation(False, False) + + def test_pretrained_masked_lm_for_translation_encoder_only(self): + self._test_pretrained_masked_lm_for_translation(True, True) + + def test_r4f_roberta(self): + num_classes = 3 + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_r4f_roberta_head") as data_dir: + create_dummy_roberta_head_data(data_dir, num_classes=num_classes) + preprocess_lm_data(os.path.join(data_dir, "input0")) + preprocess_lm_data(os.path.join(data_dir, "label")) + train_roberta_head( + data_dir, + "roberta_base", + num_classes=num_classes, + extra_flags=[ + "--user-dir", + "examples/rxf/rxf_src", + "--criterion", + "sentence_prediction_r3f", + "--spectral-norm-classification-head", + ], + ) + + +def train_legacy_masked_language_model(data_dir, arch, extra_args=()): + train_parser = options.get_training_parser() + # TODO: langs should be in and out right? + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + "cross_lingual_lm", + data_dir, + "--arch", + arch, + # Optimizer args + "--optimizer", + "adam", + "--lr-scheduler", + "reduce_lr_on_plateau", + "--lr-shrink", + "0.5", + "--lr", + "0.0001", + "--stop-min-lr", + "1e-09", + # dropout, attention args + "--dropout", + "0.1", + "--attention-dropout", + "0.1", + # MLM args + "--criterion", + "legacy_masked_lm_loss", + "--masked-lm-only", + "--monolingual-langs", + "in,out", + "--num-segment", + "5", + # Transformer args: use a small transformer model for fast training + "--encoder-layers", + "1", + "--encoder-embed-dim", + "32", + "--encoder-attention-heads", + "1", + "--encoder-ffn-embed-dim", + "32", + # Other training args + "--max-tokens", + "500", + "--tokens-per-sample", + "500", + "--save-dir", + data_dir, + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--dataset-impl", + "raw", + "--num-workers", + "0", + ] + + list(extra_args), + ) + train.main(train_args) + + +class TestOptimizers(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_optimizers(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_optimizers") as data_dir: + # Use just a bit of data and tiny model to keep this test runtime reasonable + create_dummy_data(data_dir, num_examples=10, maxlen=5) + preprocess_translation_data(data_dir) + optimizers = ["adafactor", "adam", "nag", "adagrad", "sgd", "adadelta"] + last_checkpoint = os.path.join(data_dir, "checkpoint_last.pt") + for optimizer in optimizers: + if os.path.exists(last_checkpoint): + os.remove(last_checkpoint) + train_translation_model( + data_dir, + "lstm", + [ + "--required-batch-size-multiple", + "1", + "--encoder-layers", + "1", + "--encoder-hidden-size", + "32", + "--decoder-layers", + "1", + "--optimizer", + optimizer, + ], + ) + generate_main(data_dir) + + +def read_last_log_entry( + logs: List[logging.LogRecord], logger_name: str +) -> Dict[str, float]: + for x in reversed(logs): + if x.name == logger_name: + return json.loads(x.message) + raise ValueError(f"No entries from {logger_name} found in captured logs") + + +class TestActivationCheckpointing(unittest.TestCase): + base_flags = [ + "--encoder-layers", + "2", + "--decoder-layers", + "2", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--restore-file", + "x.pt", + "--log-format", + "json", + "--log-interval", + "1", + "--max-update", + "2", + ] + + def _train(self, data_dir, extra_flags): + with self.assertLogs() as logs: + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + self.base_flags + extra_flags, + run_validation=True, + extra_valid_flags=["--log-format", "json"], + ) + return logs.records + + def test_activation_offloading_does_not_change_metrics(self): + """Neither ----checkpoint-activations nor --offload-activations should change loss""" + with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: + + with self.assertLogs(): + create_dummy_data(data_dir, num_examples=20) + preprocess_translation_data(data_dir) + offload_logs = self._train(data_dir, ["--offload-activations"]) + baseline_logs = self._train(data_dir, []) + + assert len(baseline_logs) == len(offload_logs) + + baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") + offload_valid_stats = read_last_log_entry(offload_logs, "valid") + baseline_train_stats = read_last_log_entry(baseline_logs, "train") + offload_train_stats = read_last_log_entry(offload_logs, "train") + + assert ( + baseline_train_stats["train_loss"] == offload_train_stats["train_loss"] + ) + assert ( + baseline_valid_stats["valid_loss"] == offload_valid_stats["valid_loss"] + ) + + def test_activation_checkpointing_does_not_change_metrics(self): + """--checkpoint-activations should not change loss""" + + with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: + with self.assertLogs(): + create_dummy_data(data_dir, num_examples=20) + preprocess_translation_data(data_dir) + ckpt_logs = self._train(data_dir, ["--checkpoint-activations"]) + baseline_logs = self._train(data_dir, []) + assert len(baseline_logs) == len(ckpt_logs) + + baseline_train_stats = read_last_log_entry(baseline_logs, "train") + ckpt_train_stats = read_last_log_entry(ckpt_logs, "train") + assert baseline_train_stats["train_loss"] == ckpt_train_stats["train_loss"] + + baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") + ckpt_valid_stats = read_last_log_entry(ckpt_logs, "valid") + assert baseline_valid_stats["valid_loss"] == ckpt_valid_stats["valid_loss"] + + +def create_dummy_roberta_head_data( + data_dir, num_examples=100, maxlen=10, num_classes=2, regression=False +): + input_dir = "input0" + + def _create_dummy_data(filename): + random_data = torch.rand(num_examples * maxlen) + input_data = 97 + torch.floor(26 * random_data).int() + if regression: + output_data = torch.rand((num_examples, num_classes)) + else: + output_data = 1 + torch.floor(num_classes * torch.rand(num_examples)).int() + with open(os.path.join(data_dir, input_dir, filename + ".out"), "w") as f_in: + label_filename = filename + ".label" if regression else filename + ".out" + with open(os.path.join(data_dir, "label", label_filename), "w") as f_out: + offset = 0 + for i in range(num_examples): + # write example input + ex_len = random.randint(1, maxlen) + ex_str = " ".join(map(chr, input_data[offset : offset + ex_len])) + print(ex_str, file=f_in) + # write example label + if regression: + class_str = " ".join(map(str, output_data[i].numpy())) + print(class_str, file=f_out) + else: + class_str = "class{}".format(output_data[i]) + print(class_str, file=f_out) + offset += ex_len + + os.mkdir(os.path.join(data_dir, input_dir)) + os.mkdir(os.path.join(data_dir, "label")) + _create_dummy_data("train") + _create_dummy_data("valid") + _create_dummy_data("test") + + +def train_masked_lm(data_dir, arch, extra_flags=None): + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + "masked_lm", + data_dir, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--criterion", + "masked_lm", + "--batch-size", + "500", + "--required-batch-size-multiple", + "1", + "--save-dir", + data_dir, + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + train.main(train_args) + + +def train_roberta_head(data_dir, arch, num_classes=2, extra_flags=None): + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + "sentence_prediction", + data_dir, + "--arch", + arch, + "--encoder-layers", + "2", + "--num-classes", + str(num_classes), + "--optimizer", + "adam", + "--lr", + "0.0001", + "--criterion", + "sentence_prediction", + "--max-tokens", + "500", + "--max-positions", + "500", + "--batch-size", + "500", + "--save-dir", + data_dir, + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + "1", + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + train.main(train_args) + + +def eval_lm_main(data_dir, extra_flags=None): + eval_lm_parser = options.get_eval_lm_parser() + eval_lm_args = options.parse_args_and_arch( + eval_lm_parser, + [ + data_dir, + "--path", + os.path.join(data_dir, "checkpoint_last.pt"), + "--no-progress-bar", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + eval_lm.main(eval_lm_args) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_binarizer.py b/fairseq/tests/test_binarizer.py new file mode 100644 index 0000000..50075ea --- /dev/null +++ b/fairseq/tests/test_binarizer.py @@ -0,0 +1,122 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import typing as tp +import unittest +from tempfile import TemporaryDirectory + +from fairseq.binarizer import BinarizeSummary, FileBinarizer, VocabularyDatasetBinarizer +from fairseq.data import Dictionary, indexed_dataset +from tests.utils import make_data, sizes + + +def build_vocab(data: tp.List[tp.List[str]]) -> Dictionary: + d = Dictionary() + for s in data: + for token in s: + d.add_symbol(token) + d.finalize() + return d + + +class TestBinarizer(unittest.TestCase): + def compare_ds_data(self, summary, data, prefix, impl, vocab): + self.assertEqual(summary.num_seq, len(data)) + self.assertEqual(summary.num_tok, sum([len(s) for s in data])) + + dataset = indexed_dataset.make_dataset(prefix, impl) + + self.assertEqual(len(dataset), len(data)) + decoded = [vocab.string(dataset[i]).split() for i in range(0, len(dataset))] + + self.assertEqual(decoded, data) + data_sizes = [i.item() for i in dataset.sizes] + self.assertEqual(data_sizes, sizes(data)) + + def test_can_binarize_line(self): + data = make_data(length=1) + vocab = build_vocab(data) + + binarizer = VocabularyDatasetBinarizer( + vocab, + ) + + sentence = data[0] + summary = BinarizeSummary() + + tensor = binarizer.binarize_line( + " ".join(sentence), + summary, + ) + + self.assertEqual(len(tensor), len(sentence) + 1) + + self.assertEqual(summary.num_tok, len(sentence) + 1) + self.assertEqual(summary.num_seq, 1) + + def test_can_binarize_file_chunk(self): + # test without multiprocess logic + with TemporaryDirectory() as dirname: + raw_file = os.path.join(dirname, "raw1") + prefix = os.path.join(dirname, "test1") + impl = "mmap" + + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + + binarizer = VocabularyDatasetBinarizer( + vocab, + append_eos=False, + ) + + summary = FileBinarizer._binarize_chunk_and_finalize( + binarizer, + raw_file, + offset_start=0, + offset_end=-1, + output_prefix=prefix, + dataset_impl=impl, + vocab_size=len(vocab), + ) + + self.compare_ds_data(summary, data, prefix, impl, vocab) + + def test_can_multiprocess(self): + with TemporaryDirectory() as dirname: + raw_file = os.path.join(dirname, "raw1") + prefix = os.path.join(dirname, "test1") + impl = "mmap" + data = make_data(out_file=raw_file) + vocab = build_vocab(data) + binarizer = VocabularyDatasetBinarizer( + vocab, + append_eos=False, + ) + # with one worker + summary = FileBinarizer.multiprocess_dataset( + raw_file, + impl, + binarizer, + output_prefix=prefix, + vocab_size=len(vocab), + num_workers=1, + ) + + self.compare_ds_data(summary, data, prefix, impl, vocab) + + # with multiple worker + prefix_multi = os.path.join(dirname, "test2") + summary = FileBinarizer.multiprocess_dataset( + raw_file, + impl, + binarizer, + output_prefix=prefix_multi, + vocab_size=len(vocab), + num_workers=3, + ) + + self.compare_ds_data(summary, data, prefix_multi, impl, vocab) diff --git a/fairseq/tests/test_character_token_embedder.py b/fairseq/tests/test_character_token_embedder.py new file mode 100644 index 0000000..24940eb --- /dev/null +++ b/fairseq/tests/test_character_token_embedder.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from fairseq.data import Dictionary +from fairseq.modules import CharacterTokenEmbedder + + +class TestCharacterTokenEmbedder(unittest.TestCase): + def test_character_token_embedder(self): + vocab = Dictionary() + vocab.add_symbol("hello") + vocab.add_symbol("there") + + embedder = CharacterTokenEmbedder( + vocab, [(2, 16), (4, 32), (8, 64), (16, 2)], 64, 5, 2 + ) + + test_sents = [["hello", "unk", "there"], ["there"], ["hello", "there"]] + max_len = max(len(s) for s in test_sents) + input = torch.LongTensor(len(test_sents), max_len + 2).fill_(vocab.pad()) + for i in range(len(test_sents)): + input[i][0] = vocab.eos() + for j in range(len(test_sents[i])): + input[i][j + 1] = vocab.index(test_sents[i][j]) + input[i][j + 2] = vocab.eos() + embs = embedder(input) + + assert embs.size() == (len(test_sents), max_len + 2, 5) + self.assertAlmostEqual(embs[0][0], embs[1][0]) + self.assertAlmostEqual(embs[0][0], embs[0][-1]) + self.assertAlmostEqual(embs[0][1], embs[2][1]) + self.assertAlmostEqual(embs[0][3], embs[1][1]) + + embs.sum().backward() + assert embedder.char_embeddings.weight.grad is not None + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-6) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_checkpoint_utils.py b/fairseq/tests/test_checkpoint_utils.py new file mode 100644 index 0000000..f8cd943 --- /dev/null +++ b/fairseq/tests/test_checkpoint_utils.py @@ -0,0 +1,125 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import logging +import os +import tempfile +import unittest +from io import StringIO +from unittest.mock import patch + +from fairseq import checkpoint_utils +from tests.utils import ( + create_dummy_data, + preprocess_translation_data, + train_translation_model, +) +import torch + + +class TestCheckpointUtils(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + @contextlib.contextmanager + def _train_transformer(self, seed, extra_args=None): + if extra_args is None: + extra_args = [] + with tempfile.TemporaryDirectory(f"_train_transformer_seed{seed}") as data_dir: + create_dummy_data(data_dir) + preprocess_translation_data(data_dir) + train_translation_model( + data_dir, + "transformer_iwslt_de_en", + [ + "--encoder-layers", + "3", + "--decoder-layers", + "3", + "--encoder-embed-dim", + "8", + "--decoder-embed-dim", + "8", + "--seed", + str(seed), + ] + + extra_args, + ) + yield os.path.join(data_dir, "checkpoint_last.pt") + + def test_load_model_ensemble_and_task(self): + # with contextlib.redirect_stdout(StringIO()): + with self._train_transformer(seed=123) as model1: + with self._train_transformer(seed=456) as model2: + ensemble, cfg, task = checkpoint_utils.load_model_ensemble_and_task( + filenames=[model1, model2] + ) + self.assertEqual(len(ensemble), 2) + + # after Transformer has been migrated to Hydra, this will probably + # become cfg.common.seed + self.assertEqual(ensemble[0].args.seed, 123) + self.assertEqual(ensemble[1].args.seed, 456) + + # the task from the first model should be returned + self.assertTrue("seed123" in task.cfg.data) + + # last cfg is saved + self.assertEqual(cfg.common.seed, 456) + + def test_prune_state_dict(self): + with contextlib.redirect_stdout(StringIO()): + extra_args = ["--encoder-layerdrop", "0.01", "--decoder-layerdrop", "0.01"] + with self._train_transformer(seed=1, extra_args=extra_args) as model: + ensemble, cfg, task = checkpoint_utils.load_model_ensemble_and_task( + filenames=[model], + arg_overrides={ + "encoder_layers_to_keep": "0,2", + "decoder_layers_to_keep": "1", + }, + ) + self.assertEqual(len(ensemble), 1) + self.assertEqual(len(ensemble[0].encoder.layers), 2) + self.assertEqual(len(ensemble[0].decoder.layers), 1) + + def test_torch_persistent_save_async(self): + state_dict = {} + filename = "async_checkpoint.pt" + + with patch(f"{checkpoint_utils.__name__}.PathManager.opena") as mock_opena: + with patch( + f"{checkpoint_utils.__name__}._torch_persistent_save" + ) as mock_save: + checkpoint_utils.torch_persistent_save( + state_dict, filename, async_write=True + ) + mock_opena.assert_called_with(filename, "wb") + mock_save.assert_called() + + def test_load_ema_from_checkpoint(self): + dummy_state = {"a": torch.tensor([1]), "b": torch.tensor([0.1])} + with patch(f"{checkpoint_utils.__name__}.PathManager.open") as mock_open, patch( + f"{checkpoint_utils.__name__}.torch.load" + ) as mock_load: + + mock_load.return_value = {"extra_state": {"ema": dummy_state}} + filename = "ema_checkpoint.pt" + state = checkpoint_utils.load_ema_from_checkpoint(filename) + + mock_open.assert_called_with(filename, "rb") + mock_load.assert_called() + + self.assertIn("a", state["model"]) + self.assertIn("b", state["model"]) + self.assertTrue(torch.allclose(dummy_state["a"], state["model"]["a"])) + self.assertTrue(torch.allclose(dummy_state["b"], state["model"]["b"])) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_checkpoint_utils_for_task_level_attributes.py b/fairseq/tests/test_checkpoint_utils_for_task_level_attributes.py new file mode 100644 index 0000000..53ab401 --- /dev/null +++ b/fairseq/tests/test_checkpoint_utils_for_task_level_attributes.py @@ -0,0 +1,172 @@ +#!/usr/bin/env fbpython +# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +import contextlib +import logging +import unittest +from io import StringIO +from unittest.mock import MagicMock, patch + +import torch +from fairseq import checkpoint_utils, data +from omegaconf import OmegaConf + + +def mock_trainer(epoch, num_updates, iterations_in_epoch): + trainer = MagicMock() + trainer.load_checkpoint.return_value = { + "train_iterator": { + "epoch": epoch, + "iterations_in_epoch": iterations_in_epoch, + "shuffle": False, + }, + "FakeTask": checkpoint_dict()["FakeTask"], + } + trainer.get_num_updates.return_value = num_updates + trainer.task.__class__.__name__ = "FakeTask" + trainer.task.get_checkpoint_dict.return_value = checkpoint_dict() + trainer.task.set_checkpoint_dict = MagicMock() + + return trainer + + +def checkpoint_dict(): + return { + "FakeTask": { + "observer_stats": { + ( + 4, + 16, + "MovingAveragePerChannelMinMax", + "MovingAveragePerChannelMinMax", + ): {"mod1": 1, "mod2": 2, "mod3": 3} + } + } + } + + +def mock_dict(): + d = MagicMock() + d.pad.return_value = 1 + d.eos.return_value = 2 + d.unk.return_value = 3 + return d + + +def get_trainer_and_epoch_itr(epoch, epoch_size, num_updates, iterations_in_epoch): + tokens = torch.LongTensor(list(range(epoch_size))).view(1, -1) + tokens_ds = data.TokenBlockDataset( + tokens, + sizes=[tokens.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + trainer = mock_trainer(epoch, num_updates, iterations_in_epoch) + dataset = data.LanguagePairDataset( + tokens_ds, tokens_ds.sizes, mock_dict(), shuffle=False + ) + epoch_itr = data.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=[[i] for i in range(epoch_size)], + ) + return trainer, epoch_itr + + +def get_mock_cfg(finetune_from_model): + cfg_mock = OmegaConf.create( + { + "checkpoint": { + "save_dir": None, + "optimizer_overrides": "{}", + "reset_dataloader": False, + "reset_meters": False, + "reset_optimizer": False, + "reset_lr_scheduler": False, + "finetune_from_model": finetune_from_model, + "model_parallel_size": 1, + "restore_file": "checkpoint_last.pt", + "no_save": False, + "save_interval_updates": 0, + "no_last_checkpoints": False, + "keep_interval_updates": 0, + "keep_last_epochs": 0, + "keep_best_checkpoints": 0, + }, + "common": { + "model_parallel_size": 1, + }, + } + ) + return cfg_mock + + +class TestCheckpointsForTaskLevelAttributes(unittest.TestCase): + def setUp(self) -> None: + self.cfg_mock = get_mock_cfg(None) + self.patches = { + "os.makedirs": MagicMock(), + "os.path.join": MagicMock(), + "os.path.isfile": MagicMock(return_value=True), + "os.path.isabs": MagicMock(return_value=False), + "fairseq.file_io.PathManager.exists": MagicMock(return_value=False), + } + self.applied_patches = [patch(p, d) for p, d in self.patches.items()] + [p.start() for p in self.applied_patches] + logging.disable(logging.CRITICAL) + + self.trainer, self.epoch_itr = get_trainer_and_epoch_itr(2, 150, 200, 50) + self.trainer.get_train_iterator = MagicMock(return_value=self.epoch_itr) + self.epoch_itr.next_epoch_itr(shuffle=False) + + checkpoint_utils.save_checkpoint( + self.cfg_mock.checkpoint, self.trainer, self.epoch_itr, None + ) + + def tearDown(self): + patch.stopall() + logging.disable(logging.NOTSET) + + def test_verify_checkpoint(self) -> None: + cp_dict = self.trainer.task.get_checkpoint_dict() + self.assertTrue(len(cp_dict) == 1) + self.assertTrue("FakeTask" in cp_dict) + self.assertTrue("observer_stats" in cp_dict["FakeTask"]) + self.assertTrue(len(cp_dict["FakeTask"]["observer_stats"]) == 1) + self.assertTrue( + ( + 4, + 16, + "MovingAveragePerChannelMinMax", + "MovingAveragePerChannelMinMax", + ) + in cp_dict["FakeTask"]["observer_stats"] + ) + self.assertTrue( + cp_dict["FakeTask"]["observer_stats"][ + ( + 4, + 16, + "MovingAveragePerChannelMinMax", + "MovingAveragePerChannelMinMax", + ) + ] + == {"mod1": 1, "mod2": 2, "mod3": 3} + ) + + def test_load_checkpoint(self) -> None: + with contextlib.redirect_stdout(StringIO()): + # Now, load checkpoint to ensure the respective logic works as expected + _, epoch_itr = checkpoint_utils.load_checkpoint( + self.cfg_mock.checkpoint, self.trainer + ) + + self.trainer.task.set_checkpoint_dict.assert_called_once_with( + checkpoint_dict()["FakeTask"] + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_concat_dataset.py b/fairseq/tests/test_concat_dataset.py new file mode 100644 index 0000000..d94aeff --- /dev/null +++ b/fairseq/tests/test_concat_dataset.py @@ -0,0 +1,58 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from fairseq.data import LanguagePairDataset, TokenBlockDataset +from fairseq.data.concat_dataset import ConcatDataset +from tests.test_train import mock_dict + + +class TestConcatDataset(unittest.TestCase): + def setUp(self): + d = mock_dict() + tokens_1 = torch.LongTensor([1]).view(1, -1) + tokens_ds1 = TokenBlockDataset( + tokens_1, + sizes=[tokens_1.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_1 = LanguagePairDataset( + tokens_ds1, tokens_ds1.sizes, d, shuffle=False + ) + tokens_2 = torch.LongTensor([2]).view(1, -1) + tokens_ds2 = TokenBlockDataset( + tokens_2, + sizes=[tokens_2.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_2 = LanguagePairDataset( + tokens_ds2, tokens_ds2.sizes, d, shuffle=False + ) + + def test_concat_dataset_basics(self): + d = ConcatDataset([self.dataset_1, self.dataset_2]) + assert len(d) == 2 + assert d[0]["source"][0] == 1 + assert d[1]["source"][0] == 2 + + d = ConcatDataset([self.dataset_1, self.dataset_2], sample_ratios=[1, 2]) + assert len(d) == 3 + assert d[0]["source"][0] == 1 + assert d[1]["source"][0] == 2 + assert d[2]["source"][0] == 2 + + d = ConcatDataset([self.dataset_1, self.dataset_2], sample_ratios=[2, 1]) + assert len(d) == 3 + assert d[0]["source"][0] == 1 + assert d[1]["source"][0] == 1 + assert d[2]["source"][0] == 2 diff --git a/fairseq/tests/test_constraints.py b/fairseq/tests/test_constraints.py new file mode 100644 index 0000000..d766d51 --- /dev/null +++ b/fairseq/tests/test_constraints.py @@ -0,0 +1,275 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from typing import List + +import torch + +from fairseq.token_generation_constraints import ( + ConstraintNode, + OrderedConstraintState, + UnorderedConstraintState, + pack_constraints, +) + + +def tensorize(constraints: List[List[int]]) -> torch.Tensor: + return [torch.tensor(x) for x in constraints] + + +class TestHelperRoutines(unittest.TestCase): + def setUp(self): + self.examples = [ + ([[]], torch.tensor([[0]])), + ([[], []], torch.tensor([[0], [0]])), + ([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])), + ( + [ + [ + torch.tensor([3, 1, 2]), + torch.tensor([3]), + torch.tensor([4, 5, 6, 7]), + ], + [], + [torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])], + ], + torch.tensor( + [ + [3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0], + ] + ), + ), + ] + + def test_packing(self): + """Ensures the list of lists of tensors gets packed correctly.""" + for batch_constraints, expected_tensor in self.examples: + packed = pack_constraints(batch_constraints) + assert torch.equal(packed, expected_tensor) + + +class TestUnorderedConstraintState(unittest.TestCase): + def setUp(self): + # Tuples of (contraint set, expected printed graph, token counts per node) + self.examples = [ + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + "([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))", # noqa + {1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1}, + ), + ([], "[None].False#0", {}), + (tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}), + ( + tensorize([[100000, 1, 2, 3, 4, 5]]), + "([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))", + {100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1}, + ), + ( + tensorize([[1, 2], [1, 2]]), + "([None].False#2 ([1].False#2 [2].True#2))", + {1: 2, 2: 2}, + ), + ( + tensorize([[1, 2], [3, 4]]), + "([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))", + {1: 1, 2: 1, 3: 1, 4: 1}, + ), + ] + + self.sequences = [ + ( + self.examples[0][0], + [], + {"bank": 0, "num_completed": 0, "finished": False, "is_root": True}, + ), + ( + self.examples[0][0], + [1, 2], + {"bank": 2, "num_completed": 0, "finished": False, "is_root": False}, + ), + ( + self.examples[0][0], + [1, 2, 94], + {"bank": 1, "num_completed": 1, "finished": False, "is_root": True}, + ), + ( + self.examples[0][0], + [1, 3, 999, 1, 4], + {"bank": 4, "num_completed": 2, "finished": False, "is_root": False}, + ), + ( + self.examples[0][0], + [1, 3, 999, 1, 4, 999], + {"bank": 4, "num_completed": 2, "finished": False, "is_root": True}, + ), + ( + self.examples[0][0], + [4, 5, 6, 8], + {"bank": 2, "num_completed": 1, "finished": False, "is_root": True}, + ), + ( + self.examples[0][0], + # Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5] + # [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]], + [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5], + {"bank": 14, "num_completed": 6, "finished": True, "is_root": False}, + ), + ( + self.examples[0][0], + [1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117], + {"bank": 14, "num_completed": 6, "finished": True, "is_root": True}, + ), + ( + tensorize([[1], [2, 3]]), + # Should not be able to get credit for entering 1 a second time + [1, 1], + {"bank": 1, "num_completed": 1, "finished": False, "is_root": True}, + ), + ( + self.examples[4][0], + [1, 2, 1, 2], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": False}, + ), + ( + self.examples[4][0], + [1, 2, 1, 2, 1], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": True}, + ), + ( + self.examples[5][0], + [1, 2, 3, 4, 5], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": True}, + ), + ] + + def test_graphs(self): + """ + Test whether unordered graph systems are created correctly. + """ + for example in self.examples: + constraints, expected, gold_counts = example + c = ConstraintNode.create(constraints) + assert ( + ConstraintNode.print_graph(c) == expected + ), f"got {ConstraintNode.print_graph(c)}, expected {expected}" + assert ( + c.token_counts() == gold_counts + ), f"{c} got {c.token_counts()} wanted {gold_counts}" + + def test_next_tokens(self): + """ + Tests that the set of next tokens is correct. + """ + for example in self.examples: + constraints, expected, gold_counts = example + root = ConstraintNode.create(constraints) + + root_tokens = set(root.children.keys()) + for sequence in constraints: + state = UnorderedConstraintState(root) + for token in sequence: + all_tokens = root_tokens.union(state.node.children.keys()) + assert ( + all_tokens == state.next_tokens() + ), f"ALL {all_tokens} NEXT {state.next_tokens()}" + state = state.advance(token) + + def test_sequences(self): + for constraints, tokens, expected in self.sequences: + state = UnorderedConstraintState.create(pack_constraints([constraints])[0]) + for token in tokens: + state = state.advance(token) + result = {} + for attr in expected.keys(): + result[attr] = getattr(state, attr) + + assert ( + result == expected + ), f"TEST({tokens}) GOT: {result} WANTED: {expected}" + + +class TestOrderedConstraintState(unittest.TestCase): + def setUp(self): + self.sequences = [ + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [], + {"bank": 0, "num_completed": 0, "finished": False, "is_root": True}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2], + {"bank": 2, "num_completed": 0, "finished": False, "is_root": False}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2, 94], + {"bank": 0, "num_completed": 0, "finished": False, "is_root": True}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 3, 999, 1, 4], + {"bank": 0, "num_completed": 0, "finished": False, "is_root": True}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2, 3, 999, 999], + {"bank": 3, "num_completed": 1, "finished": False, "is_root": False}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2, 3, 77, 1, 3, 1], + {"bank": 6, "num_completed": 2, "finished": False, "is_root": False}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5], + {"bank": 14, "num_completed": 6, "finished": True, "is_root": False}, + ), + ( + tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]), + [1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117], + {"bank": 14, "num_completed": 6, "finished": True, "is_root": False}, + ), + ( + tensorize([[1], [2, 3]]), + [1, 1], + {"bank": 1, "num_completed": 1, "finished": False, "is_root": False}, + ), + ( + tensorize([[1, 2], [1, 2]]), + [1, 2, 1, 2], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": False}, + ), + ( + tensorize([[1, 2], [1, 2]]), + [1, 2, 1, 2, 1], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": False}, + ), + ( + tensorize([[1, 2], [3, 4]]), + [1, 2, 3, 4, 5], + {"bank": 4, "num_completed": 2, "finished": True, "is_root": False}, + ), + ] + + def test_sequences(self): + for i, (constraints, tokens, expected) in enumerate(self.sequences): + state = OrderedConstraintState.create(pack_constraints([constraints])[0]) + for token in tokens: + state = state.advance(token) + result = {} + for attr in expected.keys(): + result[attr] = getattr(state, attr) + assert ( + result == expected + ), f"TEST({tokens}) GOT: {result} WANTED: {expected}" + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_convtbc.py b/fairseq/tests/test_convtbc.py new file mode 100644 index 0000000..3a3c9b9 --- /dev/null +++ b/fairseq/tests/test_convtbc.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +import torch.nn as nn +from fairseq.modules import ConvTBC + + +class TestConvTBC(unittest.TestCase): + def test_convtbc(self): + # ksz, in_channels, out_channels + conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1) + # out_channels, in_channels, ksz + conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1) + + conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2)) + conv_tbc.bias.data.copy_(conv1d.bias.data) + + input_tbc = torch.randn(7, 2, 4, requires_grad=True) + input1d = input_tbc.data.transpose(0, 1).transpose(1, 2) + input1d.requires_grad = True + + output_tbc = conv_tbc(input_tbc) + output1d = conv1d(input1d) + + self.assertAlmostEqual( + output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data + ) + + grad_tbc = torch.randn(output_tbc.size()) + grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous() + + output_tbc.backward(grad_tbc) + output1d.backward(grad1d) + + self.assertAlmostEqual( + conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data + ) + self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data) + self.assertAlmostEqual( + input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data + ) + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-4) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_data_utils.py b/fairseq/tests/test_data_utils.py new file mode 100644 index 0000000..c48d02c --- /dev/null +++ b/fairseq/tests/test_data_utils.py @@ -0,0 +1,136 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import numpy as np + +from fairseq.data.data_utils_fast import batch_by_size_fn, batch_by_size_vec + + +class TestBatchBySize(unittest.TestCase): + @classmethod + def batch_by_size_baseline( + cls, + indices, + num_tokens_vec, + max_tokens, + max_sentences, + bsz_mult, + ): + """Simple, reliable and slow implementation of batch by size""" + batches = [] + start = 0 + while start < len(indices): + for end in range(start + 1, len(indices) + 1): + max_val = max(num_tokens_vec[pos] for pos in range(start, end)) + sent_count = end - start + num_tokens = max_val * sent_count + overflow = num_tokens > max_tokens > 0 or sent_count > max_sentences > 0 + terminate = overflow or end == len(indices) + if overflow: + sent_count -= 1 + if terminate: + if sent_count > bsz_mult: + sent_count = sent_count - sent_count % bsz_mult + batches.append(indices[start : start + sent_count]) + start = start + sent_count + break + return batches + + @classmethod + def _get_error_message( + cls, max_sentences, max_tokens, bsz_mult, num_tokens_vec, validation, results + ): + return f"""Reference batch_by_size implementation should produce + same output as the baseline method. + Params: + max_sentences={max_sentences}, + max_tokens={max_tokens}, + bsz_mult={bsz_mult}, + num_tokens_vec={num_tokens_vec}, + expected_batches={validation}, + returned_batches={results}""" + + def _compare_results( + self, + indices_len, + batch_by_size_impl, + max_sentences, + max_tokens, + bsz_mult, + num_tokens_vec, + ): + indices = np.array(list(range(indices_len))) + validation = self.batch_by_size_baseline( + indices, + num_tokens_vec, + max_tokens=max_tokens, + max_sentences=max_sentences, + bsz_mult=bsz_mult, + ) + results = batch_by_size_impl( + indices, + num_tokens_vec, + max_tokens=max_tokens, + max_sentences=max_sentences, + bsz_mult=bsz_mult, + ) + error_msg = self._get_error_message( + max_sentences, max_tokens, bsz_mult, num_tokens_vec, validation, results + ) + self.assertEqual(len(validation), len(results), error_msg) + for first, second in zip(validation, results): + self.assertTrue(np.array_equal(first, second), error_msg) + + def _run_compare_with_baseline_sweep(self, batch_by_size_impl): + """Compare reference batch_by_size implementation with batch_by_size_baseline + across a dense grid of hyperparam values""" + MAX_MAX_TOKENS = 10 + NUM_TOKENS_VECS_COUNT = 5 + for indices_len in [10, 11]: # try odd and even len of indices + for max_sentences in range(0, indices_len + 2): + for max_tokens in range(0, MAX_MAX_TOKENS): + for bsz_mult in range(1, max(MAX_MAX_TOKENS, indices_len) + 2): + for _ in range(NUM_TOKENS_VECS_COUNT): + num_tokens_vec = np.random.randint( + 0, max_tokens + 1, size=indices_len + ) + self._compare_results( + indices_len, + batch_by_size_impl, + max_sentences, + max_tokens, + bsz_mult, + num_tokens_vec, + ) + + +class TestBatchBySizeVec(TestBatchBySize): + def test_compare_with_baseline(self): + self._run_compare_with_baseline_sweep(batch_by_size_vec) + + +class TestBatchBySizeFn(TestBatchBySize): + def test_compare_with_baseline(self): + def batch_by_size_fn_wrapper( + indices, + num_tokens_vec, + max_tokens, + max_sentences, + bsz_mult, + ): + def num_tokens_fn(idx): + return num_tokens_vec[idx] + + return batch_by_size_fn( + indices, num_tokens_fn, max_tokens, max_sentences, bsz_mult + ) + + self._run_compare_with_baseline_sweep(batch_by_size_fn_wrapper) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_dataclass_utils.py b/fairseq/tests/test_dataclass_utils.py new file mode 100644 index 0000000..231f86b --- /dev/null +++ b/fairseq/tests/test_dataclass_utils.py @@ -0,0 +1,87 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from argparse import ArgumentParser +from dataclasses import dataclass, field + +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import gen_parser_from_dataclass + + +@dataclass +class A(FairseqDataclass): + data: str = field(default="test", metadata={"help": "the data input"}) + num_layers: int = field(default=200, metadata={"help": "more layers is better?"}) + + +@dataclass +class B(FairseqDataclass): + bar: A = field(default=A()) + foo: int = field(default=0, metadata={"help": "not a bar"}) + + +@dataclass +class D(FairseqDataclass): + arch: A = field(default=A()) + foo: int = field(default=0, metadata={"help": "not a bar"}) + + +@dataclass +class C(FairseqDataclass): + data: str = field(default="test", metadata={"help": "root level data input"}) + encoder: D = field(default=D()) + decoder: A = field(default=A()) + lr: int = field(default=0, metadata={"help": "learning rate"}) + + +class TestDataclassUtils(unittest.TestCase): + def test_argparse_convert_basic(self): + parser = ArgumentParser() + gen_parser_from_dataclass(parser, A(), True) + args = parser.parse_args(["--num-layers", "10", "the/data/path"]) + self.assertEqual(args.num_layers, 10) + self.assertEqual(args.data, "the/data/path") + + def test_argparse_recursive(self): + parser = ArgumentParser() + gen_parser_from_dataclass(parser, B(), True) + args = parser.parse_args(["--num-layers", "10", "--foo", "10", "the/data/path"]) + self.assertEqual(args.num_layers, 10) + self.assertEqual(args.foo, 10) + self.assertEqual(args.data, "the/data/path") + + def test_argparse_recursive_prefixing(self): + self.maxDiff = None + parser = ArgumentParser() + gen_parser_from_dataclass(parser, C(), True, "") + args = parser.parse_args( + [ + "--encoder-arch-data", + "ENCODER_ARCH_DATA", + "--encoder-arch-num-layers", + "10", + "--encoder-foo", + "10", + "--decoder-data", + "DECODER_DATA", + "--decoder-num-layers", + "10", + "--lr", + "10", + "the/data/path", + ] + ) + self.assertEqual(args.encoder_arch_data, "ENCODER_ARCH_DATA") + self.assertEqual(args.encoder_arch_num_layers, 10) + self.assertEqual(args.encoder_foo, 10) + self.assertEqual(args.decoder_data, "DECODER_DATA") + self.assertEqual(args.decoder_num_layers, 10) + self.assertEqual(args.lr, 10) + self.assertEqual(args.data, "the/data/path") + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_dataset.py b/fairseq/tests/test_dataset.py new file mode 100644 index 0000000..a3e3970 --- /dev/null +++ b/fairseq/tests/test_dataset.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import unittest +from typing import Sequence + +from fairseq.data import LanguagePairDataset, ListDataset, RoundRobinZipDatasets +from tests.test_train import mock_dict + + +def lang_pair_dataset(lengths: Sequence[int]) -> LanguagePairDataset: + tokens = [[i] * l for i, l in enumerate(lengths)] + return LanguagePairDataset(ListDataset(tokens), lengths, mock_dict()) + + +def sample(id: int, length: int): + return {"id": id, "source": [id] * length, "target": None} + + +class TestDataset(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_round_robin_zip_datasets(self): + long_dataset = lang_pair_dataset([10, 9, 8, 11]) + short_dataset = lang_pair_dataset([11, 9]) + + dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset}) + # Dataset is now sorted by sentence length + dataset.ordered_indices() + assert dataset.longest_dataset is long_dataset + self.assertEqual(dict(dataset[0]), {"a": sample(2, 8), "b": sample(1, 9)}) + # The item 2 of dataset 'a' is with item (2 % 2 = 0) of dataset 'b' + self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(1, 9)}) + + def test_round_robin_zip_datasets_filtered(self): + long_dataset = lang_pair_dataset([10, 20, 8, 11, 1000, 7, 12]) + short_dataset = lang_pair_dataset([11, 20, 9, 1000]) + + dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset}) + # Dataset is now sorted by sentence length + idx = dataset.ordered_indices() + idx, _ = dataset.filter_indices_by_size(idx, {"a": 19, "b": 900}) + self.assertEqual(list(idx), [0, 1, 2, 3, 4]) + self.assertEqual(dict(dataset[0]), {"a": sample(5, 7), "b": sample(2, 9)}) + self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(1, 20)}) + self.assertEqual(dict(dataset[4]), {"a": sample(6, 12), "b": sample(0, 11)}) + + def test_round_robin_zip_datasets_filtered_with_tuple(self): + long_dataset = lang_pair_dataset([10, 20, 8, 11, 1000, 7, 12]) + short_dataset = lang_pair_dataset([11, 20, 9, 1000]) + + dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset}) + # Dataset is now sorted by sentence length + idx = dataset.ordered_indices() + idx, _ = dataset.filter_indices_by_size(idx, 19) + self.assertEqual(list(idx), [0, 1, 2, 3, 4]) + self.assertEqual(dict(dataset[0]), {"a": sample(5, 7), "b": sample(2, 9)}) + self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(2, 9)}) + self.assertEqual(dict(dataset[4]), {"a": sample(6, 12), "b": sample(2, 9)}) diff --git a/fairseq/tests/test_dictionary.py b/fairseq/tests/test_dictionary.py new file mode 100644 index 0000000..dc9d71b --- /dev/null +++ b/fairseq/tests/test_dictionary.py @@ -0,0 +1,145 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import io +import os +import string +import tempfile +import unittest + +import torch +from fairseq import tokenizer +from fairseq.data import Dictionary + + +class TestDictionary(unittest.TestCase): + def test_finalize(self): + txt = [ + "A B C D", + "B C D", + "C D", + "D", + ] + ref_ids1 = list( + map( + torch.IntTensor, + [ + [4, 5, 6, 7, 2], + [5, 6, 7, 2], + [6, 7, 2], + [7, 2], + ], + ) + ) + ref_ids2 = list( + map( + torch.IntTensor, + [ + [7, 6, 5, 4, 2], + [6, 5, 4, 2], + [5, 4, 2], + [4, 2], + ], + ) + ) + + # build dictionary + d = Dictionary() + for line in txt: + d.encode_line(line, add_if_not_exist=True) + + def get_ids(dictionary): + ids = [] + for line in txt: + ids.append(dictionary.encode_line(line, add_if_not_exist=False)) + return ids + + def assertMatch(ids, ref_ids): + for toks, ref_toks in zip(ids, ref_ids): + self.assertEqual(toks.size(), ref_toks.size()) + self.assertEqual(0, (toks != ref_toks).sum().item()) + + ids = get_ids(d) + assertMatch(ids, ref_ids1) + + # check finalized dictionary + d.finalize() + finalized_ids = get_ids(d) + assertMatch(finalized_ids, ref_ids2) + + # write to disk and reload + with tempfile.NamedTemporaryFile(mode="w") as tmp_dict: + d.save(tmp_dict.name) + d = Dictionary.load(tmp_dict.name) + reload_ids = get_ids(d) + assertMatch(reload_ids, ref_ids2) + assertMatch(finalized_ids, reload_ids) + + def test_overwrite(self): + # for example, Camembert overwrites <unk>, <s> and </s> + dict_file = io.StringIO( + "<unk> 999 #fairseq:overwrite\n" + "<s> 999 #fairseq:overwrite\n" + "</s> 999 #fairseq:overwrite\n" + ", 999\n" + "▁de 999\n" + ) + d = Dictionary() + d.add_from_file(dict_file) + self.assertEqual(d.index("<pad>"), 1) + self.assertEqual(d.index("foo"), 3) + self.assertEqual(d.index("<unk>"), 4) + self.assertEqual(d.index("<s>"), 5) + self.assertEqual(d.index("</s>"), 6) + self.assertEqual(d.index(","), 7) + self.assertEqual(d.index("▁de"), 8) + + def test_no_overwrite(self): + # for example, Camembert overwrites <unk>, <s> and </s> + dict_file = io.StringIO( + "<unk> 999\n" "<s> 999\n" "</s> 999\n" ", 999\n" "▁de 999\n" + ) + d = Dictionary() + with self.assertRaisesRegex(RuntimeError, "Duplicate"): + d.add_from_file(dict_file) + + def test_space(self): + # for example, character models treat space as a symbol + dict_file = io.StringIO(" 999\n" "a 999\n" "b 999\n") + d = Dictionary() + d.add_from_file(dict_file) + self.assertEqual(d.index(" "), 4) + self.assertEqual(d.index("a"), 5) + self.assertEqual(d.index("b"), 6) + + def test_add_file_to_dict(self): + counts = {} + num_lines = 100 + per_line = 10 + with tempfile.TemporaryDirectory("test_sampling") as data_dir: + filename = os.path.join(data_dir, "dummy.txt") + with open(filename, "w", encoding="utf-8") as data: + for c in string.ascii_letters: + line = f"{c} " * per_line + for _ in range(num_lines): + data.write(f"{line}\n") + counts[c] = per_line * num_lines + per_line += 5 + + dict = Dictionary() + Dictionary.add_file_to_dictionary( + filename, dict, tokenizer.tokenize_line, 10 + ) + dict.finalize(threshold=0, nwords=-1, padding_factor=8) + + for c in string.ascii_letters: + count = dict.get_count(dict.index(c)) + self.assertEqual( + counts[c], count, f"{c} count is {count} but should be {counts[c]}" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_ema.py b/fairseq/tests/test_ema.py new file mode 100644 index 0000000..bd2cf2c --- /dev/null +++ b/fairseq/tests/test_ema.py @@ -0,0 +1,275 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from copy import deepcopy +from dataclasses import dataclass +import pytest +from typing import Optional +from unittest.mock import patch + +import torch + +from fairseq.models.ema import EMA + + +class DummyModule(torch.nn.Module): + def __init__(self) -> None: + """LightningModule for testing purposes + + Args: + epoch_min_loss_override (int, optional): Pass in an epoch that will be set to the minimum + validation loss for testing purposes (zero based). If None this is ignored. Defaults to None. + """ + super().__init__() + self.layer = torch.nn.Linear(in_features=32, out_features=2) + self.another_layer = torch.nn.Linear(in_features=2, out_features=2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.layer(x) + return self.another_layer(x) + + +@dataclass +class EMAConfig(object): + ema_decay: float = 0.99 + ema_start_update: int = 0 + ema_fp32: bool = False + ema_seed_model: Optional[str] = None + ema_update_freq: int = 1 + + +class TestEMA(unittest.TestCase): + def assertTorchAllClose(self, x, y, atol=1e-8, rtol=1e-5, msg=None): + diff = x.float() - y.float() + diff_norm = torch.norm(diff) + other_norm = torch.norm(y.float()) + + if msg is None: + msg = "|input - other| > {} + {} * |other|".format(atol, rtol) + + self.assertLessEqual( + diff_norm, + atol + rtol * other_norm, + msg=msg, + ) + + def test_ema(self): + model = DummyModule() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig() + ema = EMA(model, config) + + # set decay + ema._set_decay(config.ema_decay) + self.assertEqual(ema.get_decay(), config.ema_decay) + + # get model + self.assertEqual(ema.get_model(), ema.model) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + # EMA step + x = torch.randn(32) + y = model(x) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + ema_state_dict = ema.get_model().state_dict() + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema_state_dict[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + self.assertTorchAllClose( + ema_param, + config.ema_decay * prev_param + (1 - config.ema_decay) * param, + ) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + # Load EMA into model + model2 = DummyModule() + ema.reverse(model2) + + for key, param in model2.state_dict().items(): + ema_param = ema_state_dict[key] + self.assertTrue(torch.allclose(ema_param, param)) + + # Check that step_internal is called once + with patch.object(ema, "_step_internal", return_value=None) as mock_method: + ema.step(model) + mock_method.assert_called_once_with(model, None) + + def _test_ema_start_update(self, updates): + model = DummyModule() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig(ema_start_update=1) + ema = EMA(model, config) + + # EMA step + x = torch.randn(32) + y = model(x) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model, updates=updates) + ema_state_dict = ema.get_model().state_dict() + + self.assertEqual(ema.get_decay(), 0 if updates == 0 else config.ema_decay) + + for key, param in model.state_dict().items(): + ema_param = ema_state_dict[key] + prev_param = state[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + if updates == 0: + self.assertTorchAllClose( + ema_param, + param, + ) + else: + self.assertTorchAllClose( + ema_param, + config.ema_decay * prev_param + (1 - config.ema_decay) * param, + ) + + # Check that step_internal is called once + with patch.object(ema, "_step_internal", return_value=None) as mock_method: + ema.step(model, updates=updates) + mock_method.assert_called_once_with(model, updates) + + def test_ema_before_start_update(self): + self._test_ema_start_update(updates=0) + + def test_ema_after_start_update(self): + self._test_ema_start_update(updates=1) + + def test_ema_fp32(self): + dtype = torch.float + + model = DummyModule().to(dtype) + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig(ema_fp32=True) + ema = EMA(model, config) + + x = torch.randn(32) + y = model(x.to(dtype)) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema.get_model().state_dict()[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + self.assertIn(key, ema.fp32_params) + + # EMA update is done in fp32, and hence the EMA param must be + # closer to the EMA update done in fp32 than in fp16. + self.assertLessEqual( + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ) + .to(dtype) + .float() + ), + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param + (1 - config.ema_decay) * param + ).float() + ), + ) + self.assertTorchAllClose( + ema_param, + ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ).to(dtype), + ) + + @pytest.mark.skipif( + not torch.cuda.is_available(), + reason="CPU no longer supports Linear in half precision", + ) + def test_ema_fp16(self): + model = DummyModule().cuda().half() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + state = deepcopy(model.state_dict()) + config = EMAConfig(ema_fp32=False) + ema = EMA(model, config) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + x = torch.randn(32).cuda() + y = model(x.half()) + loss = y.sum() + loss.backward() + optimizer.step() + + ema.step(model) + + for key, param in model.state_dict().items(): + prev_param = state[key] + ema_param = ema.get_model().state_dict()[key] + + if "version" in key: + # Do not decay a model.version pytorch param + continue + + # EMA update is done in fp16, and hence the EMA param must be + # closer to the EMA update done in fp16 than in fp32. + self.assertLessEqual( + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param + (1 - config.ema_decay) * param + ).float() + ), + torch.norm( + ema_param.float() + - ( + config.ema_decay * prev_param.float() + + (1 - config.ema_decay) * param.float() + ) + .half() + .float() + ), + ) + self.assertTorchAllClose( + ema_param, + config.ema_decay * prev_param + (1 - config.ema_decay) * param, + ) + + # Since fp32 params is not used, it should be of size 0 + self.assertEqual(len(ema.fp32_params), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_espnet_multihead_attention.py b/fairseq/tests/test_espnet_multihead_attention.py new file mode 100644 index 0000000..ee71dd0 --- /dev/null +++ b/fairseq/tests/test_espnet_multihead_attention.py @@ -0,0 +1,176 @@ +import torch +import numpy as np +import unittest +from fairseq.modules import ( + ESPNETMultiHeadedAttention, + RelPositionMultiHeadedAttention, + RotaryPositionMultiHeadedAttention, +) + +torch.use_deterministic_algorithms(True) + + +class TestESPNETMultiHeadedAttention(unittest.TestCase): + def setUp(self) -> None: + self.T = 3 + self.B = 1 + self.C = 2 + torch.manual_seed(0) + self.sample = torch.randn(self.T, self.B, self.C) # TBC + self.sample_scores = torch.randn(self.B, 1, self.T, self.T) + self.MHA = ESPNETMultiHeadedAttention(self.C, 1, dropout=0) + + def test_forward(self): + expected_scores = torch.tensor( + [[[0.1713, -0.3776]], [[0.2263, -0.4486]], [[0.2243, -0.4538]]] + ) + scores, _ = self.MHA(self.sample, self.sample, self.sample) + self.assertTrue( + np.allclose( + expected_scores.cpu().detach().numpy(), + scores.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_forward_qkv(self): + expected_query = torch.tensor( + [[[[-1.0235, 0.0409], [0.4008, 1.3077], [0.5396, 2.0698]]]] + ) + expected_key = torch.tensor( + [[[[0.5053, -0.4965], [-0.3730, -0.9473], [-0.7019, -0.1935]]]] + ) + expected_val = torch.tensor( + [[[[-0.9940, 0.5403], [0.5924, -0.7619], [0.7504, -1.0892]]]] + ) + sample_t = self.sample.transpose(0, 1) + query, key, val = self.MHA.forward_qkv(sample_t, sample_t, sample_t) + self.assertTrue( + np.allclose( + expected_query.cpu().detach().numpy(), + query.cpu().detach().numpy(), + atol=1e-4, + ) + ) + self.assertTrue( + np.allclose( + expected_key.cpu().detach().numpy(), + key.cpu().detach().numpy(), + atol=1e-4, + ) + ) + self.assertTrue( + np.allclose( + expected_val.cpu().detach().numpy(), + val.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_forward_attention(self): + expected_scores = torch.tensor( + [[[0.1627, -0.6249], [-0.2547, -0.6487], [-0.0711, -0.8545]]] + ) + scores = self.MHA.forward_attention( + self.sample.transpose(0, 1).view(self.B, 1, self.T, self.C), + self.sample_scores, + mask=None, + ) + self.assertTrue( + np.allclose( + expected_scores.cpu().detach().numpy(), + scores.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + +class TestRelPositionMultiHeadedAttention(unittest.TestCase): + def setUp(self) -> None: + self.T = 3 + self.B = 1 + self.C = 2 + torch.manual_seed(0) + self.sample = torch.randn(self.T, self.B, self.C) # TBC + self.sample_x = torch.randn(self.B, 1, self.T, self.T * 2 - 1) + self.sample_pos = torch.randn(self.B, self.T * 2 - 1, self.C) + self.MHA = RelPositionMultiHeadedAttention(self.C, 1, dropout=0) + + def test_rel_shift(self): + expected_x = torch.tensor( + [ + [ + [ + [-0.7193, -0.4033, -0.5966], + [-0.8567, 1.1006, -1.0712], + [-0.5663, 0.3731, -0.8920], + ] + ] + ] + ) + x = self.MHA.rel_shift(self.sample_x) + self.assertTrue( + np.allclose( + expected_x.cpu().detach().numpy(), + x.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_forward(self): + expected_scores = torch.tensor( + [ + [[-0.9609, -0.5020]], + [[-0.9308, -0.4890]], + [[-0.9473, -0.4948]], + [[-0.9609, -0.5020]], + [[-0.9308, -0.4890]], + [[-0.9473, -0.4948]], + [[-0.9609, -0.5020]], + [[-0.9308, -0.4890]], + [[-0.9473, -0.4948]], + [[-0.9609, -0.5020]], + [[-0.9308, -0.4890]], + [[-0.9473, -0.4948]], + [[-0.9609, -0.5020]], + [[-0.9308, -0.4890]], + [[-0.9473, -0.4948]], + ] + ) + scores, _ = self.MHA(self.sample, self.sample, self.sample, self.sample_pos) + self.assertTrue( + np.allclose( + expected_scores.cpu().detach().numpy(), + scores.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + +class TestRotaryPositionMultiHeadedAttention(unittest.TestCase): + def setUp(self) -> None: + self.T = 3 + self.B = 1 + self.C = 2 + torch.manual_seed(0) + self.sample = torch.randn(self.T, self.B, self.C) # TBC + self.MHA = RotaryPositionMultiHeadedAttention( + self.C, 1, dropout=0, precision=None + ) + + def test_forward(self): + expected_scores = torch.tensor( + [[[-0.3220, -0.4726]], [[-1.2813, -0.0979]], [[-0.3138, -0.4758]]] + ) + scores, _ = self.MHA(self.sample, self.sample, self.sample) + self.assertTrue( + np.allclose( + expected_scores.cpu().detach().numpy(), + scores.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_export.py b/fairseq/tests/test_export.py new file mode 100644 index 0000000..3e9a48d --- /dev/null +++ b/fairseq/tests/test_export.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import tempfile +import unittest + +import torch + +from fairseq.data.dictionary import Dictionary +from fairseq.models.transformer import TransformerModel +from fairseq.modules import multihead_attention, sinusoidal_positional_embedding +from fairseq.tasks.fairseq_task import LegacyFairseqTask + +DEFAULT_TEST_VOCAB_SIZE = 100 + + +class DummyTask(LegacyFairseqTask): + def __init__(self, args): + super().__init__(args) + self.dictionary = get_dummy_dictionary() + if getattr(self.args, "ctc", False): + self.dictionary.add_symbol("<ctc_blank>") + self.src_dict = self.dictionary + self.tgt_dict = self.dictionary + + @property + def source_dictionary(self): + return self.src_dict + + @property + def target_dictionary(self): + return self.dictionary + + +def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE): + dummy_dict = Dictionary() + # add dummy symbol to satisfy vocab size + for id, _ in enumerate(range(vocab_size)): + dummy_dict.add_symbol("{}".format(id), 1000) + return dummy_dict + + +def get_dummy_task_and_parser(): + """ + Return a dummy task and argument parser, which can be used to + create a model/criterion. + """ + parser = argparse.ArgumentParser( + description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS + ) + DummyTask.add_args(parser) + args = parser.parse_args([]) + task = DummyTask.setup_task(args) + return task, parser + + +def _test_save_and_load(scripted_module): + with tempfile.NamedTemporaryFile() as f: + scripted_module.save(f.name) + torch.jit.load(f.name) + + +class TestExportModels(unittest.TestCase): + def test_export_multihead_attention(self): + module = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2) + scripted = torch.jit.script(module) + _test_save_and_load(scripted) + + def test_incremental_state_multihead_attention(self): + module1 = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2) + module1 = torch.jit.script(module1) + module2 = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2) + module2 = torch.jit.script(module2) + + state = {} + state = module1.set_incremental_state(state, "key", {"a": torch.tensor([1])}) + state = module2.set_incremental_state(state, "key", {"a": torch.tensor([2])}) + v1 = module1.get_incremental_state(state, "key")["a"] + v2 = module2.get_incremental_state(state, "key")["a"] + + self.assertEqual(v1, 1) + self.assertEqual(v2, 2) + + def test_positional_embedding(self): + module = sinusoidal_positional_embedding.SinusoidalPositionalEmbedding( + embedding_dim=8, padding_idx=1 + ) + scripted = torch.jit.script(module) + _test_save_and_load(scripted) + + @unittest.skipIf( + torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release" + ) + def test_export_transformer(self): + task, parser = get_dummy_task_and_parser() + TransformerModel.add_args(parser) + args = parser.parse_args([]) + model = TransformerModel.build_model(args, task) + scripted = torch.jit.script(model) + _test_save_and_load(scripted) + + @unittest.skipIf( + torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release" + ) + def test_export_transformer_no_token_pos_emb(self): + task, parser = get_dummy_task_and_parser() + TransformerModel.add_args(parser) + args = parser.parse_args([]) + args.no_token_positional_embeddings = True + model = TransformerModel.build_model(args, task) + scripted = torch.jit.script(model) + _test_save_and_load(scripted) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_file_chunker_utils.py b/fairseq/tests/test_file_chunker_utils.py new file mode 100644 index 0000000..5cded04 --- /dev/null +++ b/fairseq/tests/test_file_chunker_utils.py @@ -0,0 +1,63 @@ +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import tempfile +import unittest +from typing import Optional + + +class TestFileChunker(unittest.TestCase): + _tmpdir: Optional[str] = None + _tmpfile: Optional[str] = None + _line_content = "Hello, World\n" + _num_bytes = None + _num_lines = 200 + _num_splits = 20 + + @classmethod + def setUpClass(cls) -> None: + cls._num_bytes = len(cls._line_content.encode("utf-8")) + cls._tmpdir = tempfile.mkdtemp() + with open(os.path.join(cls._tmpdir, "test.txt"), "w") as f: + cls._tmpfile = f.name + for _i in range(cls._num_lines): + f.write(cls._line_content) + f.flush() + + @classmethod + def tearDownClass(cls) -> None: + # Cleanup temp working dir. + if cls._tmpdir is not None: + shutil.rmtree(cls._tmpdir) # type: ignore + + def test_find_offsets(self): + from fairseq.file_chunker_utils import find_offsets + + offsets = find_offsets(self._tmpfile, self._num_splits) + self.assertEqual(len(offsets), self._num_splits + 1) + (zero, *real_offsets, last) = offsets + self.assertEqual(zero, 0) + for i, o in enumerate(real_offsets): + self.assertEqual( + o, + self._num_bytes + + ((i + 1) * self._num_bytes * self._num_lines / self._num_splits), + ) + self.assertEqual(last, self._num_bytes * self._num_lines) + + def test_readchunks(self): + from fairseq.file_chunker_utils import Chunker, find_offsets + + offsets = find_offsets(self._tmpfile, self._num_splits) + for start, end in zip(offsets, offsets[1:]): + with Chunker(self._tmpfile, start, end) as lines: + all_lines = list(lines) + num_lines = self._num_lines / self._num_splits + self.assertAlmostEqual( + len(all_lines), num_lines, delta=1 + ) # because we split on the bites, we might end up with one more/less line in a chunk + self.assertListEqual( + all_lines, [self._line_content for _ in range(len(all_lines))] + ) diff --git a/fairseq/tests/test_file_io.py b/fairseq/tests/test_file_io.py new file mode 100644 index 0000000..af7c4ce --- /dev/null +++ b/fairseq/tests/test_file_io.py @@ -0,0 +1,59 @@ +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import sys +import tempfile +import unittest +from typing import Optional +from unittest.mock import MagicMock + + +class TestFileIO(unittest.TestCase): + + _tmpdir: Optional[str] = None + _tmpfile: Optional[str] = None + _tmpfile_contents = "Hello, World" + + @classmethod + def setUpClass(cls) -> None: + cls._tmpdir = tempfile.mkdtemp() + with open(os.path.join(cls._tmpdir, "test.txt"), "w") as f: + cls._tmpfile = f.name + f.write(cls._tmpfile_contents) + f.flush() + + @classmethod + def tearDownClass(cls) -> None: + # Cleanup temp working dir. + if cls._tmpdir is not None: + shutil.rmtree(cls._tmpdir) # type: ignore + + def test_file_io(self): + from fairseq.file_io import PathManager + + with PathManager.open(os.path.join(self._tmpdir, "test.txt"), "r") as f: + s = f.read() + self.assertEqual(s, self._tmpfile_contents) + + def test_file_io_oss(self): + # Mock iopath to simulate oss environment. + sys.modules["iopath"] = MagicMock() + from fairseq.file_io import PathManager + + with PathManager.open(os.path.join(self._tmpdir, "test.txt"), "r") as f: + s = f.read() + self.assertEqual(s, self._tmpfile_contents) + + def test_file_io_async(self): + # ioPath `PathManager` is initialized after the first `opena` call. + try: + from fairseq.file_io import PathManager + + _asyncfile = os.path.join(self._tmpdir, "async.txt") + f = PathManager.opena(_asyncfile, "wb") + f.close() + + finally: + self.assertTrue(PathManager.async_close()) diff --git a/fairseq/tests/test_fp16_optimizer.py b/fairseq/tests/test_fp16_optimizer.py new file mode 100644 index 0000000..27085a1 --- /dev/null +++ b/fairseq/tests/test_fp16_optimizer.py @@ -0,0 +1,111 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging +import unittest + +import torch +from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer +from omegaconf import OmegaConf + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestGradientScaling(unittest.TestCase): + def setUp(self): + self.x = torch.tensor([2.0]).cuda().half() + weight = 3.0 + bias = 5.0 + self.error = 1.0 + self.target = torch.tensor([self.x * weight + bias + self.error]).cuda().half() + self.loss_fn = torch.nn.L1Loss() + + self.model = torch.nn.Linear(1, 1) + self.model.weight.data = torch.tensor([[weight]]) + self.model.bias.data = torch.tensor([bias]) + self.model.cuda().half() + self.params = list(self.model.parameters()) + + self.cfg_dls = OmegaConf.create( + { + "optimization": { + "lr": [0.1], + }, + "optimizer": { + "_name": "adam", + "lr": [0.1], + "adam_betas": "(0.9, 0.999)", + "adam_eps": 1e-8, + "weight_decay": 0.0, + }, + "common": { + "fp16_init_scale": 1, + "fp16_scale_window": 1, + "fp16_scale_tolerance": 1, + "threshold_loss_scale": 1, + "min_loss_scale": 1e-4, + "tpu": False, + }, + } + ) + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def run_iter(self, model, params, optimizer): + optimizer.zero_grad() + y = model(self.x) + loss = self.loss_fn(y, self.target) + optimizer.backward(loss) + self.assertEqual(loss, torch.tensor(1.0, device="cuda:0", dtype=torch.float16)) + + grad_norm = optimizer.clip_grad_norm(0) + self.assertAlmostEqual(grad_norm.item(), 2.2361, 4) + + optimizer.step() + self.assertEqual( + model.weight, + torch.tensor( + [[3.0996]], device="cuda:0", dtype=torch.float16, requires_grad=True + ), + ) + self.assertEqual( + model.bias, + torch.tensor( + [5.1016], device="cuda:0", dtype=torch.float16, requires_grad=True + ), + ) + self.assertEqual(optimizer.scaler.loss_scale, 2.0) + + def test_mixed_precision(self): + model = copy.deepcopy(self.model) + params = list(model.parameters()) + optimizer = FP16Optimizer.build_optimizer(self.cfg_dls, params) + + self.run_iter(model, params, optimizer) + self.assertTrue( + all( + torch.all( + fp32_params.eq( + torch.tensor( + [3.1000, 5.1000], device="cuda:0", requires_grad=True + ) + ) + ) + for fp32_params in optimizer.fp32_params.values() + ) + ) + + def test_memory_efficient(self): + model = copy.deepcopy(self.model) + params = list(model.parameters()) + optimizer = MemoryEfficientFP16Optimizer.build_optimizer(self.cfg_dls, params) + + self.run_iter(model, params, optimizer) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_hf_hub.py b/fairseq/tests/test_hf_hub.py new file mode 100644 index 0000000..5cfef70 --- /dev/null +++ b/fairseq/tests/test_hf_hub.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch + +try: + import huggingface_hub +except ImportError: + huggingface_hub = None + +from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub + + +@unittest.skipIf(not huggingface_hub, "Requires huggingface_hub install") +class TestHuggingFaceHub(unittest.TestCase): + @torch.no_grad() + def test_hf_fastspeech2(self): + hf_model_id = "facebook/fastspeech2-en-ljspeech" + models, cfg, task = load_model_ensemble_and_task_from_hf_hub(hf_model_id) + self.assertTrue(len(models) > 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_huffman.py b/fairseq/tests/test_huffman.py new file mode 100644 index 0000000..85d0c72 --- /dev/null +++ b/fairseq/tests/test_huffman.py @@ -0,0 +1,179 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import typing as tp +import unittest +from collections import Counter +from tempfile import NamedTemporaryFile, TemporaryDirectory + +from fairseq.data import Dictionary, indexed_dataset +from fairseq.data.huffman import ( + HuffmanCodeBuilder, + HuffmanCoder, + HuffmanMMapIndexedDataset, + HuffmanMMapIndexedDatasetBuilder, +) +from tests.utils import POPULATION, make_data, sizes + + +def make_counts(data: tp.List[tp.List[str]]) -> Counter: + return Counter([symbol for sentence in data for symbol in sentence]) + + +def make_code_builder(data: tp.List[tp.List[str]]) -> HuffmanCodeBuilder: + builder = HuffmanCodeBuilder() + for sentence in data: + builder.add_symbols(*sentence) + return builder + + +class TestCodeBuilder(unittest.TestCase): + def test_code_builder_can_count(self): + data = make_data() + counts = make_counts(data) + builder = make_code_builder(data) + + self.assertEqual(builder.symbols, counts) + + def test_code_builder_can_add(self): + data = make_data() + counts = make_counts(data) + builder = make_code_builder(data) + + new_builder = builder + builder + + self.assertEqual(new_builder.symbols, counts + counts) + + def test_code_builder_can_io(self): + data = make_data() + builder = make_code_builder(data) + + with NamedTemporaryFile() as tmp_fp: + builder.to_file(tmp_fp.name) + other_builder = HuffmanCodeBuilder.from_file(tmp_fp.name) + + self.assertEqual(builder.symbols, other_builder.symbols) + + +class TestCoder(unittest.TestCase): + def test_coder_can_io(self): + data = make_data() + builder = make_code_builder(data) + coder = builder.build_code() + + with NamedTemporaryFile() as tmp_fp: + coder.to_file(tmp_fp.name) + other_coder = HuffmanCoder.from_file(tmp_fp.name) + + self.assertEqual(coder, other_coder) + + def test_coder_can_encode_decode(self): + data = make_data() + builder = make_code_builder(data) + coder = builder.build_code() + + encoded = [coder.encode(sentence) for sentence in data] + decoded = [[n.symbol for n in coder.decode(enc)] for enc in encoded] + + self.assertEqual(decoded, data) + + unseen_data = make_data() + unseen_encoded = [coder.encode(sentence) for sentence in unseen_data] + unseen_decoded = [ + [n.symbol for n in coder.decode(enc)] for enc in unseen_encoded + ] + self.assertEqual(unseen_decoded, unseen_data) + + +def build_dataset(prefix, data, coder): + with HuffmanMMapIndexedDatasetBuilder(prefix, coder) as builder: + for sentence in data: + builder.add_item(sentence) + + +class TestHuffmanDataset(unittest.TestCase): + def test_huffman_can_encode_decode(self): + data = make_data() + builder = make_code_builder(data) + coder = builder.build_code() + + with TemporaryDirectory() as dirname: + prefix = os.path.join(dirname, "test1") + build_dataset(prefix, data, coder) + dataset = HuffmanMMapIndexedDataset(prefix) + + self.assertEqual(len(dataset), len(data)) + decoded = [list(dataset.get_symbols(i)) for i in range(0, len(dataset))] + + self.assertEqual(decoded, data) + data_sizes = [i.item() for i in dataset.sizes] + self.assertEqual(data_sizes, sizes(data)) + + def test_huffman_compresses(self): + data = make_data() + builder = make_code_builder(data) + coder = builder.build_code() + + with TemporaryDirectory() as dirname: + prefix = os.path.join(dirname, "huffman") + build_dataset(prefix, data, coder) + + prefix_mmap = os.path.join(dirname, "mmap") + mmap_builder = indexed_dataset.make_builder( + indexed_dataset.data_file_path(prefix_mmap), + "mmap", + vocab_size=len(POPULATION), + ) + dictionary = Dictionary() + for c in POPULATION: + dictionary.add_symbol(c) + dictionary.finalize() + for sentence in data: + mmap_builder.add_item(dictionary.encode_line(" ".join(sentence))) + mmap_builder.finalize(indexed_dataset.index_file_path(prefix_mmap)) + + huff_size = os.stat(indexed_dataset.data_file_path(prefix)).st_size + mmap_size = os.stat(indexed_dataset.data_file_path(prefix_mmap)).st_size + self.assertLess(huff_size, mmap_size) + + def test_huffman_can_append(self): + data1 = make_data() + builder = make_code_builder(data1) + coder = builder.build_code() + + with TemporaryDirectory() as dirname: + prefix1 = os.path.join(dirname, "test1") + build_dataset(prefix1, data1, coder) + + data2 = make_data() + prefix2 = os.path.join(dirname, "test2") + build_dataset(prefix2, data2, coder) + + prefix3 = os.path.join(dirname, "test3") + + with HuffmanMMapIndexedDatasetBuilder(prefix3, coder) as builder: + builder.append(prefix1) + builder.append(prefix2) + + dataset = HuffmanMMapIndexedDataset(prefix3) + + self.assertEqual(len(dataset), len(data1) + len(data2)) + + decoded1 = [list(dataset.get_symbols(i)) for i in range(0, len(data1))] + self.assertEqual(decoded1, data1) + + decoded2 = [ + list(dataset.get_symbols(i)) for i in range(len(data1), len(dataset)) + ] + self.assertEqual(decoded2, data2) + + data_sizes = [i.item() for i in dataset.sizes] + self.assertEqual(data_sizes[: len(data1)], sizes(data1)) + self.assertEqual(data_sizes[len(data1) : len(dataset)], sizes(data2)) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_inference_dropout.py b/fairseq/tests/test_inference_dropout.py new file mode 100644 index 0000000..353ac67 --- /dev/null +++ b/fairseq/tests/test_inference_dropout.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import unittest + +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models.transformer import TransformerModel +from tests.test_sequence_generator import get_dummy_task_and_parser + + +class TestInferenceDropout(unittest.TestCase): + def setUp(self): + self.task, self.parser = get_dummy_task_and_parser() + TransformerModel.add_args(self.parser) + self.args = self.parser.parse_args([]) + self.args.encoder_layers = 2 + self.args.decoder_layers = 1 + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_sets_inference_dropout_to_true(self): + self.args.retain_dropout = True + self.transformer_model = TransformerModel.build_model(self.args, self.task) + cfg = convert_namespace_to_omegaconf(self.args) + self.transformer_model.prepare_for_inference_(cfg) + assert self.transformer_model.encoder.dropout_module.apply_during_inference + assert self.transformer_model.decoder.dropout_module.apply_during_inference + for layer in self.transformer_model.encoder.layers: + assert layer.dropout_module.apply_during_inference + + def test_inference_dropout_false_by_default(self): + self.transformer_model = TransformerModel.build_model(self.args, self.task) + cfg = convert_namespace_to_omegaconf(self.args) + self.transformer_model.prepare_for_inference_(cfg) + assert not self.transformer_model.encoder.dropout_module.apply_during_inference + assert not self.transformer_model.decoder.dropout_module.apply_during_inference + for layer in self.transformer_model.encoder.layers: + assert not layer.dropout_module.apply_during_inference + for layer in self.transformer_model.decoder.layers: + assert not layer.dropout_module.apply_during_inference + + def test_applies_training_mode(self): + self.transformer_model = TransformerModel.build_model(self.args, self.task) + assert self.transformer_model.encoder.dropout_module.training + for layer in self.transformer_model.encoder.layers: + assert layer.dropout_module.training + + self.transformer_model.eval() + assert not self.transformer_model.decoder.dropout_module.training + for layer in self.transformer_model.encoder.layers: + assert not layer.dropout_module.training + + def test_retain_modules(self): + self.args.retain_dropout = True + self.args.retain_dropout_modules = [ + "TransformerEncoder", + "TransformerEncoderLayer", + ] + self.transformer_model = TransformerModel.build_model(self.args, self.task) + cfg = convert_namespace_to_omegaconf(self.args) + self.transformer_model.prepare_for_inference_(cfg) + assert self.transformer_model.encoder.dropout_module.apply_during_inference + assert not self.transformer_model.decoder.dropout_module.apply_during_inference + for layer in self.transformer_model.decoder.layers: + assert not layer.dropout_module.apply_during_inference diff --git a/fairseq/tests/test_iopath.py b/fairseq/tests/test_iopath.py new file mode 100644 index 0000000..48230a6 --- /dev/null +++ b/fairseq/tests/test_iopath.py @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from unittest import mock + + +class TestIOPath(unittest.TestCase): + def test_no_iopath(self): + from .test_reproducibility import TestReproducibility + + with mock.patch.dict("sys.modules", {"iopath": None}): + # reuse reproducibility tests, which are e2e tests that should cover + # most checkpoint related functionality + TestReproducibility._test_reproducibility(self, "test_reproducibility") + + def test_no_supports_rename(self): + from .test_reproducibility import TestReproducibility + + with mock.patch("fairseq.file_io.PathManager.supports_rename") as mock_fn: + mock_fn.return_value = False + TestReproducibility._test_reproducibility(self, "test_reproducibility") + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_iterators.py b/fairseq/tests/test_iterators.py new file mode 100644 index 0000000..2e2eb2f --- /dev/null +++ b/fairseq/tests/test_iterators.py @@ -0,0 +1,194 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from fairseq.data import iterators, ListDataset + + +class TestIterators(unittest.TestCase): + def test_counting_iterator_index(self, ref=None, itr=None): + # Test the indexing functionality of CountingIterator + if ref is None: + assert itr is None + ref = list(range(10)) + itr = iterators.CountingIterator(ref) + else: + assert len(ref) == 10 + assert itr is not None + + self.assertTrue(itr.has_next()) + self.assertEqual(itr.n, 0) + self.assertEqual(next(itr), ref[0]) + self.assertEqual(itr.n, 1) + self.assertEqual(next(itr), ref[1]) + self.assertEqual(itr.n, 2) + itr.skip(3) + self.assertEqual(itr.n, 5) + self.assertEqual(next(itr), ref[5]) + itr.skip(2) + self.assertEqual(itr.n, 8) + self.assertEqual(list(itr), [ref[8], ref[9]]) + self.assertFalse(itr.has_next()) + + def test_counting_iterator_length_mismatch(self): + ref = list(range(10)) + # When the underlying iterable is longer than the CountingIterator, + # the remaining items in the iterable should be ignored + itr = iterators.CountingIterator(ref, total=8) + self.assertEqual(list(itr), ref[:8]) + # When the underlying iterable is shorter than the CountingIterator, + # raise an IndexError when the underlying iterable is exhausted + itr = iterators.CountingIterator(ref, total=12) + self.assertRaises(IndexError, list, itr) + + def test_counting_iterator_take(self): + # Test the "take" method of CountingIterator + ref = list(range(10)) + itr = iterators.CountingIterator(ref) + itr.take(5) + self.assertEqual(len(itr), len(list(iter(itr)))) + self.assertEqual(len(itr), 5) + + itr = iterators.CountingIterator(ref) + itr.take(5) + self.assertEqual(next(itr), ref[0]) + self.assertEqual(next(itr), ref[1]) + itr.skip(2) + self.assertEqual(next(itr), ref[4]) + self.assertFalse(itr.has_next()) + + def test_grouped_iterator(self): + # test correctness + x = list(range(10)) + itr = iterators.GroupedIterator(x, 1) + self.assertEqual(list(itr), [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]) + itr = iterators.GroupedIterator(x, 4) + self.assertEqual(list(itr), [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]) + itr = iterators.GroupedIterator(x, 5) + self.assertEqual(list(itr), [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) + + # test the GroupIterator also works correctly as a CountingIterator + x = list(range(30)) + ref = list(iterators.GroupedIterator(x, 3)) + itr = iterators.GroupedIterator(x, 3) + self.test_counting_iterator_index(ref, itr) + + def test_sharded_iterator(self): + # test correctness + x = list(range(10)) + itr = iterators.ShardedIterator(x, num_shards=1, shard_id=0) + self.assertEqual(list(itr), x) + itr = iterators.ShardedIterator(x, num_shards=2, shard_id=0) + self.assertEqual(list(itr), [0, 2, 4, 6, 8]) + itr = iterators.ShardedIterator(x, num_shards=2, shard_id=1) + self.assertEqual(list(itr), [1, 3, 5, 7, 9]) + itr = iterators.ShardedIterator(x, num_shards=3, shard_id=0) + self.assertEqual(list(itr), [0, 3, 6, 9]) + itr = iterators.ShardedIterator(x, num_shards=3, shard_id=1) + self.assertEqual(list(itr), [1, 4, 7, None]) + itr = iterators.ShardedIterator(x, num_shards=3, shard_id=2) + self.assertEqual(list(itr), [2, 5, 8, None]) + + # test CountingIterator functionality + x = list(range(30)) + ref = list(iterators.ShardedIterator(x, num_shards=3, shard_id=0)) + itr = iterators.ShardedIterator(x, num_shards=3, shard_id=0) + self.test_counting_iterator_index(ref, itr) + + def test_counting_iterator_buffered_iterator_take(self): + ref = list(range(10)) + buffered_itr = iterators.BufferedIterator(2, ref) + itr = iterators.CountingIterator(buffered_itr) + itr.take(5) + self.assertEqual(len(itr), len(list(iter(itr)))) + self.assertEqual(len(itr), 5) + + buffered_itr = iterators.BufferedIterator(2, ref) + itr = iterators.CountingIterator(buffered_itr) + itr.take(5) + self.assertEqual(len(buffered_itr), 5) + self.assertEqual(len(list(iter(buffered_itr))), 5) + + buffered_itr = iterators.BufferedIterator(2, ref) + itr = iterators.CountingIterator(buffered_itr) + itr.take(5) + self.assertEqual(next(itr), ref[0]) + self.assertEqual(next(itr), ref[1]) + itr.skip(2) + self.assertEqual(next(itr), ref[4]) + self.assertFalse(itr.has_next()) + self.assertRaises(StopIteration, next, buffered_itr) + + ref = list(range(4, 10)) + buffered_itr = iterators.BufferedIterator(2, ref) + itr = iterators.CountingIterator(buffered_itr, start=4) + itr.take(5) + self.assertEqual(len(itr), 5) + self.assertEqual(len(buffered_itr), 1) + self.assertEqual(next(itr), ref[0]) + self.assertFalse(itr.has_next()) + self.assertRaises(StopIteration, next, buffered_itr) + + def test_epoch_batch_iterator_skip_remainder_batch(self): + reference = [1, 2, 3] + itr1 = _get_epoch_batch_itr(reference, 2, True) + self.assertEqual(len(itr1), 1) + itr2 = _get_epoch_batch_itr(reference, 2, False) + self.assertEqual(len(itr2), 2) + itr3 = _get_epoch_batch_itr(reference, 1, True) + self.assertEqual(len(itr3), 2) + itr4 = _get_epoch_batch_itr(reference, 1, False) + self.assertEqual(len(itr4), 3) + itr5 = _get_epoch_batch_itr(reference, 4, True) + self.assertEqual(len(itr5), 0) + self.assertFalse(itr5.has_next()) + itr6 = _get_epoch_batch_itr(reference, 4, False) + self.assertEqual(len(itr6), 1) + + def test_grouped_iterator_skip_remainder_batch(self): + reference = [1, 2, 3, 4, 5, 6, 7, 8, 9] + itr1 = _get_epoch_batch_itr(reference, 3, False) + grouped_itr1 = iterators.GroupedIterator(itr1, 2, True) + self.assertEqual(len(grouped_itr1), 1) + + itr2 = _get_epoch_batch_itr(reference, 3, False) + grouped_itr2 = iterators.GroupedIterator(itr2, 2, False) + self.assertEqual(len(grouped_itr2), 2) + + itr3 = _get_epoch_batch_itr(reference, 3, True) + grouped_itr3 = iterators.GroupedIterator(itr3, 2, True) + self.assertEqual(len(grouped_itr3), 1) + + itr4 = _get_epoch_batch_itr(reference, 3, True) + grouped_itr4 = iterators.GroupedIterator(itr4, 2, False) + self.assertEqual(len(grouped_itr4), 1) + + itr5 = _get_epoch_batch_itr(reference, 5, True) + grouped_itr5 = iterators.GroupedIterator(itr5, 2, True) + self.assertEqual(len(grouped_itr5), 0) + + itr6 = _get_epoch_batch_itr(reference, 5, True) + grouped_itr6 = iterators.GroupedIterator(itr6, 2, False) + self.assertEqual(len(grouped_itr6), 1) + + +def _get_epoch_batch_itr(ref, bsz, skip_remainder_batch): + dsz = len(ref) + indices = range(dsz) + starts = indices[::bsz] + batch_sampler = [indices[s : s + bsz] for s in starts] + dataset = ListDataset(ref) + itr = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + skip_remainder_batch=skip_remainder_batch, + ) + return itr.next_epoch_itr() + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_label_smoothing.py b/fairseq/tests/test_label_smoothing.py new file mode 100644 index 0000000..04c0f97 --- /dev/null +++ b/fairseq/tests/test_label_smoothing.py @@ -0,0 +1,123 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import copy +import unittest + +import tests.utils as test_utils +import torch +from fairseq.criterions.cross_entropy import CrossEntropyCriterion +from fairseq.criterions.label_smoothed_cross_entropy import ( + LabelSmoothedCrossEntropyCriterion, +) + + +class TestLabelSmoothing(unittest.TestCase): + def setUp(self): + # build dictionary + self.d = test_utils.dummy_dictionary(3) + vocab = len(self.d) + self.assertEqual(vocab, 4 + 3) # 4 special + 3 tokens + self.assertEqual(self.d.pad(), 1) + self.assertEqual(self.d.eos(), 2) + self.assertEqual(self.d.unk(), 3) + pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6 # noqa: F841 + + # build dataset + self.data = [ + # the first batch item has padding + { + "source": torch.LongTensor([w1, eos]), + "target": torch.LongTensor([w1, eos]), + }, + { + "source": torch.LongTensor([w1, eos]), + "target": torch.LongTensor([w1, w1, eos]), + }, + ] + self.sample = next(test_utils.dummy_dataloader(self.data)) + + # build model + self.args = argparse.Namespace() + self.args.sentence_avg = False + self.args.report_accuracy = False + self.args.probs = ( + torch.FloatTensor( + [ + # pad eos unk w1 w2 w3 + [0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05], + [0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10], + [0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15], + ] + ) + .unsqueeze(0) + .expand(2, 3, 7) + ) # add batch dimension + self.task = test_utils.TestTranslationTask.setup_task(self.args, self.d, self.d) + self.model = self.task.build_model(self.args) + + def test_nll_loss(self): + self.args.label_smoothing = 0.1 + nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) + smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion( + self.args, self.task + ) + nll_loss, nll_sample_size, nll_logging_output = nll_crit( + self.model, self.sample + ) + smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit( + self.model, self.sample + ) + self.assertLess(abs(nll_loss - nll_logging_output["loss"]), 1e-6) + self.assertLess(abs(nll_loss - smooth_logging_output["nll_loss"]), 1e-6) + + def test_padding(self): + self.args.label_smoothing = 0.1 + crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) + loss, _, logging_output = crit(self.model, self.sample) + + def get_one_no_padding(idx): + # create a new sample with just a single batch item so that there's + # no padding + sample1 = next(test_utils.dummy_dataloader([self.data[idx]])) + args1 = copy.copy(self.args) + args1.probs = args1.probs[idx, :, :].unsqueeze(0) + model1 = self.task.build_model(args1) + loss1, _, _ = crit(model1, sample1) + return loss1 + + loss1 = get_one_no_padding(0) + loss2 = get_one_no_padding(1) + self.assertAlmostEqual(loss, loss1 + loss2) + + def test_reduction(self): + self.args.label_smoothing = 0.1 + crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task) + loss, _, logging_output = crit(self.model, self.sample, reduce=True) + unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False) + self.assertAlmostEqual(loss, unreduced_loss.sum()) + + def test_zero_eps(self): + self.args.label_smoothing = 0.0 + nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task) + smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion( + self.args, self.task + ) + nll_loss, nll_sample_size, nll_logging_output = nll_crit( + self.model, self.sample + ) + smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit( + self.model, self.sample + ) + self.assertAlmostEqual(nll_loss, smooth_loss) + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-6) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_lm_context_window.py b/fairseq/tests/test_lm_context_window.py new file mode 100644 index 0000000..165e04a --- /dev/null +++ b/fairseq/tests/test_lm_context_window.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch + +from fairseq.data import MonolingualDataset +from fairseq.tasks.language_modeling import LanguageModelingConfig, LanguageModelingTask +from tests import utils as test_utils + + +class TestLMContextWindow(unittest.TestCase): + def test_eval_dataloader(self): + dictionary = test_utils.dummy_dictionary(10) + assert len(dictionary) == 14 # 4 extra special symbols + assert dictionary.pad() == 1 + + dataset = test_utils.TestDataset( + [ + torch.tensor([4, 5, 6, 7], dtype=torch.long), + torch.tensor([8, 9, 10, 11], dtype=torch.long), + torch.tensor([12, 13], dtype=torch.long), + ] + ) + dataset = MonolingualDataset(dataset, sizes=[4, 4, 2], src_vocab=dictionary) + + config = LanguageModelingConfig(tokens_per_sample=4) + task = LanguageModelingTask(config, dictionary) + + eval_dataloader = task.eval_lm_dataloader( + dataset=dataset, + batch_size=1, + context_window=2, + num_workers=0, + ) + + batch = next(eval_dataloader) + assert batch["net_input"]["src_tokens"][0].tolist() == [4, 5, 6, 7, 1, 1] + assert batch["target"][0].tolist() == [4, 5, 6, 7, 1, 1] + + batch = next(eval_dataloader) + assert batch["net_input"]["src_tokens"][0].tolist() == [6, 7, 8, 9, 10, 11] + assert batch["target"][0].tolist() == [1, 1, 8, 9, 10, 11] + + batch = next(eval_dataloader) + assert batch["net_input"]["src_tokens"][0].tolist() == [10, 11, 12, 13] + assert batch["target"][0].tolist() == [1, 1, 12, 13] + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_lstm_jitable.py b/fairseq/tests/test_lstm_jitable.py new file mode 100644 index 0000000..38f79d1 --- /dev/null +++ b/fairseq/tests/test_lstm_jitable.py @@ -0,0 +1,115 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import tempfile +import unittest + +import torch +from fairseq.data.dictionary import Dictionary +from fairseq.models.lstm import LSTMModel +from fairseq.tasks.fairseq_task import LegacyFairseqTask + + +DEFAULT_TEST_VOCAB_SIZE = 100 + + +class DummyTask(LegacyFairseqTask): + def __init__(self, args): + super().__init__(args) + self.dictionary = get_dummy_dictionary() + if getattr(self.args, "ctc", False): + self.dictionary.add_symbol("<ctc_blank>") + self.src_dict = self.dictionary + self.tgt_dict = self.dictionary + + @property + def source_dictionary(self): + return self.src_dict + + @property + def target_dictionary(self): + return self.dictionary + + +def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE): + dummy_dict = Dictionary() + # add dummy symbol to satisfy vocab size + for id, _ in enumerate(range(vocab_size)): + dummy_dict.add_symbol("{}".format(id), 1000) + return dummy_dict + + +def get_dummy_task_and_parser(): + """ + to build a fariseq model, we need some dummy parse and task. This function + is used to create dummy task and parser to faciliate model/criterion test + + Note: we use FbSpeechRecognitionTask as the dummy task. You may want + to use other task by providing another function + """ + parser = argparse.ArgumentParser( + description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS + ) + DummyTask.add_args(parser) + args = parser.parse_args([]) + task = DummyTask.setup_task(args) + return task, parser + + +class TestJitLSTMModel(unittest.TestCase): + def _test_save_and_load(self, scripted_module): + with tempfile.NamedTemporaryFile() as f: + scripted_module.save(f.name) + torch.jit.load(f.name) + + def assertTensorEqual(self, t1, t2): + t1 = t1[~torch.isnan(t1)] # can cause size mismatch errors if there are NaNs + t2 = t2[~torch.isnan(t2)] + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + def test_jit_and_export_lstm(self): + task, parser = get_dummy_task_and_parser() + LSTMModel.add_args(parser) + args = parser.parse_args([]) + args.criterion = "" + model = LSTMModel.build_model(args, task) + scripted_model = torch.jit.script(model) + self._test_save_and_load(scripted_model) + + def test_assert_jit_vs_nonjit_(self): + task, parser = get_dummy_task_and_parser() + LSTMModel.add_args(parser) + args = parser.parse_args([]) + args.criterion = "" + model = LSTMModel.build_model(args, task) + model.eval() + scripted_model = torch.jit.script(model) + scripted_model.eval() + idx = len(task.source_dictionary) + iter = 100 + # Inject random input and check output + seq_len_tensor = torch.randint(1, 10, (iter,)) + num_samples_tensor = torch.randint(1, 10, (iter,)) + for i in range(iter): + seq_len = seq_len_tensor[i] + num_samples = num_samples_tensor[i] + src_token = (torch.randint(0, idx, (num_samples, seq_len)),) + src_lengths = torch.randint(1, seq_len + 1, (num_samples,)) + src_lengths, _ = torch.sort(src_lengths, descending=True) + # Force the first sample to have seq_len + src_lengths[0] = seq_len + prev_output_token = (torch.randint(0, idx, (num_samples, 1)),) + result = model(src_token[0], src_lengths, prev_output_token[0], None) + scripted_result = scripted_model( + src_token[0], src_lengths, prev_output_token[0], None + ) + self.assertTensorEqual(result[0], scripted_result[0]) + self.assertTensorEqual(result[1], scripted_result[1]) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_memory_efficient_fp16.py b/fairseq/tests/test_memory_efficient_fp16.py new file mode 100644 index 0000000..2bf2f29 --- /dev/null +++ b/fairseq/tests/test_memory_efficient_fp16.py @@ -0,0 +1,78 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import unittest + +import torch +from fairseq.optim.adam import FairseqAdam +from fairseq.optim.fp16_optimizer import MemoryEfficientFP16Optimizer +from omegaconf import OmegaConf + + +@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") +class TestMemoryEfficientFP16(unittest.TestCase): + def setUp(self): + logging.disable(logging.CRITICAL) + + def tearDown(self): + logging.disable(logging.NOTSET) + + def test_load_state_dict(self): + # define simple FP16 model + model = torch.nn.Linear(5, 5).cuda().half() + params = list(model.parameters()) + + # initialize memory efficient FP16 optimizer + # with pseudo DictConfigs + optimizer = FairseqAdam( + cfg=OmegaConf.create( + vars( + argparse.Namespace( + adam_betas="(0.9, 0.999)", + adam_eps=1e-8, + weight_decay=0.0, + lr=[0.00001], + ) + ) + ), + params=params, + ) + me_optimizer = MemoryEfficientFP16Optimizer( + cfg=OmegaConf.create( + { + "common": vars( + argparse.Namespace( + fp16_init_scale=1, + fp16_scale_window=1, + fp16_scale_tolerance=1, + threshold_loss_scale=1, + min_loss_scale=1e-4, + ) + ) + } + ), + params=params, + optimizer=optimizer, + ) + + # optimizer state is created in the first step + loss = model(torch.rand(5).cuda().half()).sum() + me_optimizer.backward(loss) + me_optimizer.step() + + # reload state + state = me_optimizer.state_dict() + me_optimizer.load_state_dict(state) + for k, v in me_optimizer.optimizer.state.items(): + self.assertTrue(k.dtype == torch.float16) + for v_i in v.values(): + if torch.is_tensor(v_i): + self.assertTrue(v_i.dtype == torch.float32) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_metrics.py b/fairseq/tests/test_metrics.py new file mode 100644 index 0000000..fc93b48 --- /dev/null +++ b/fairseq/tests/test_metrics.py @@ -0,0 +1,77 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import uuid + +from fairseq.logging import metrics + + +class TestMetrics(unittest.TestCase): + def test_nesting(self): + with metrics.aggregate() as a: + metrics.log_scalar("loss", 1) + with metrics.aggregate() as b: + metrics.log_scalar("loss", 2) + + self.assertEqual(a.get_smoothed_values()["loss"], 1.5) + self.assertEqual(b.get_smoothed_values()["loss"], 2) + + def test_new_root(self): + with metrics.aggregate() as a: + metrics.log_scalar("loss", 1) + with metrics.aggregate(new_root=True) as b: + metrics.log_scalar("loss", 2) + + self.assertEqual(a.get_smoothed_values()["loss"], 1) + self.assertEqual(b.get_smoothed_values()["loss"], 2) + + def test_nested_new_root(self): + with metrics.aggregate() as layer1: + metrics.log_scalar("loss", 1) + with metrics.aggregate(new_root=True) as layer2: + metrics.log_scalar("loss", 2) + with metrics.aggregate() as layer3: + metrics.log_scalar("loss", 3) + with metrics.aggregate(new_root=True) as layer4: + metrics.log_scalar("loss", 4) + metrics.log_scalar("loss", 1.5) + + self.assertEqual(layer4.get_smoothed_values()["loss"], 4) + self.assertEqual(layer3.get_smoothed_values()["loss"], 3) + self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5) + self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25) + + def test_named(self): + name = str(uuid.uuid4()) + metrics.reset_meters(name) + + with metrics.aggregate(name): + metrics.log_scalar("loss", 1) + + metrics.log_scalar("loss", 3) + + with metrics.aggregate(name): + metrics.log_scalar("loss", 2) + + self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5) + + def test_nested_duplicate_names(self): + name = str(uuid.uuid4()) + metrics.reset_meters(name) + + with metrics.aggregate(name): + metrics.log_scalar("loss", 1) + with metrics.aggregate() as other: + with metrics.aggregate(name): + metrics.log_scalar("loss", 2) + metrics.log_scalar("loss", 6) + + self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3) + self.assertEqual(other.get_smoothed_values()["loss"], 2) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_multi_corpus_dataset.py b/fairseq/tests/test_multi_corpus_dataset.py new file mode 100644 index 0000000..79900ab --- /dev/null +++ b/fairseq/tests/test_multi_corpus_dataset.py @@ -0,0 +1,82 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from collections import OrderedDict + +import torch + +from fairseq.data import LanguagePairDataset, TokenBlockDataset +from fairseq.data.multi_corpus_dataset import MultiCorpusDataset +from tests.test_train import mock_dict + + +class TestMultiCorpusDataset(unittest.TestCase): + def setUp(self): + d = mock_dict() + tokens_1 = torch.LongTensor([i for i in range(1, 5000, 2)]).view(1, -1) + tokens_ds1 = TokenBlockDataset( + tokens_1, + sizes=[tokens_1.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_1 = LanguagePairDataset( + tokens_ds1, tokens_ds1.sizes, d, shuffle=False + ) + tokens_2 = torch.LongTensor([i for i in range(0, 5000, 2)]).view(1, -1) + tokens_ds2 = TokenBlockDataset( + tokens_2, + sizes=[tokens_2.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_2 = LanguagePairDataset( + tokens_ds2, tokens_ds2.sizes, d, shuffle=False + ) + + def _test_sample_helper( + self, + distribution, + ): + m = MultiCorpusDataset( + OrderedDict({0: self.dataset_1, 1: self.dataset_2}), + distribution=distribution, + seed=0, + sort_indices=True, + ) + m.set_epoch(1) + indices = m.ordered_indices() + count_sample_from_first_dataset = 0 + items = set() + for i in indices: + item = m[i]["source"].item() + if item % 2 == 1: + count_sample_from_first_dataset += 1 + + items.add(item) + sample_from_first_ds_percentage = ( + 1.0 * count_sample_from_first_dataset / len(indices) + ) + self.assertLess( + abs(sample_from_first_ds_percentage - distribution[0]), + 0.01, + ) + self.assertEqual( + len(items), + int( + min(len(self.dataset_1), len(indices) * distribution[0]) + + min(len(self.dataset_1), len(indices) * distribution[1]) + ), + ) + print(distribution) + + def test_multi_corpus_dataset(self): + for distribution in [[0.5, 0.5], [0.1, 0.9], [0.9, 0.1], [0.0, 1.0]]: + self._test_sample_helper(distribution=distribution) diff --git a/fairseq/tests/test_multi_corpus_sampled_dataset.py b/fairseq/tests/test_multi_corpus_sampled_dataset.py new file mode 100644 index 0000000..88f0817 --- /dev/null +++ b/fairseq/tests/test_multi_corpus_sampled_dataset.py @@ -0,0 +1,95 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from collections import OrderedDict + +import numpy as np +import torch +from fairseq.data import LanguagePairDataset, TokenBlockDataset +from fairseq.data.multi_corpus_sampled_dataset import MultiCorpusSampledDataset +from tests.test_train import mock_dict + + +class TestMultiCorpusSampledDataset(unittest.TestCase): + def setUp(self): + d = mock_dict() + tokens_1 = torch.LongTensor([1]).view(1, -1) + tokens_ds1 = TokenBlockDataset( + tokens_1, + sizes=[tokens_1.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_1 = LanguagePairDataset( + tokens_ds1, tokens_ds1.sizes, d, shuffle=False + ) + tokens_2 = torch.LongTensor([2]).view(1, -1) + tokens_ds2 = TokenBlockDataset( + tokens_2, + sizes=[tokens_2.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + self.dataset_2 = LanguagePairDataset( + tokens_ds2, tokens_ds2.sizes, d, shuffle=False + ) + + def _test_sample_helper( + self, + expected_sample_from_first_ds_percentage, + num_samples=1000, + sampling_func=None, + ): + # To make sure test is not flaky + np.random.seed(0) + if sampling_func is None: + m = MultiCorpusSampledDataset( + OrderedDict({0: self.dataset_1, 1: self.dataset_2}), + ) + else: + m = MultiCorpusSampledDataset( + OrderedDict({0: self.dataset_1, 1: self.dataset_2}), + sampling_func=sampling_func, + ) + m.ordered_indices() + count_sample_from_first_dataset = 0 + for _ in range(num_samples): + if m.collater([m[0], m[1]])["net_input"]["src_tokens"][0] == 1: + count_sample_from_first_dataset += 1 + sample_from_first_ds_percentage = ( + 1.0 * count_sample_from_first_dataset / num_samples + ) + self.assertLess( + abs( + sample_from_first_ds_percentage + - expected_sample_from_first_ds_percentage + ), + 0.01, + ) + + def test_multi_corpus_sampled_dataset_uniform_sample(self): + self._test_sample_helper(expected_sample_from_first_ds_percentage=0.5) + + def test_multi_corpus_sampled_dataset_weighted_sample(self): + def naive_weighted_sample(weights): + def f(input): + v = np.random.random() + agg = 0 + for i, weight in enumerate(weights): + agg += weight + if agg > v: + return i + + return f + + self._test_sample_helper( + expected_sample_from_first_ds_percentage=0.9, + sampling_func=naive_weighted_sample(weights=[0.9, 0.1]), + ) diff --git a/fairseq/tests/test_multihead_attention.py b/fairseq/tests/test_multihead_attention.py new file mode 100644 index 0000000..4a0b430 --- /dev/null +++ b/fairseq/tests/test_multihead_attention.py @@ -0,0 +1,488 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import random +import unittest + +import pytest +import torch + +from fairseq.modules.multihead_attention import MultiheadAttention, _mask_for_xformers + +BATCH = [20, 41, 97] +SEQ = [64] +EMB = [48] +HEADS = [4] +DROP = 0.1 +DEVICE = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"] +ATTN_MASK_DTYPE = [None, torch.uint8, torch.bool, torch.float] +KEY_PADDING_MASK_DTYPE = [None, torch.uint8, torch.bool] + + +# FIXME: some tests fail when decimal=2, fix this and set decimal to 2 +def assert_almost_equal(x, y, decimal=1, err_msg=""): + import numpy.testing as npt + + if isinstance(x, torch.Tensor): + x = x.cpu().detach().numpy() + if isinstance(y, torch.Tensor): + y = y.cpu().detach().numpy() + npt.assert_array_almost_equal(x, y, err_msg=err_msg, decimal=decimal) + + +def _reset_seeds(): + torch.manual_seed(0) + torch.random.manual_seed(0) + random.seed(0) + torch.cuda.manual_seed_all(0) + + +def _get_mask(to_dtype: torch.dtype, dim0: int, dim1: int): + if to_dtype == torch.float: + mask = torch.randint(0, 2, (dim0, dim1)).to(dtype=torch.bool) + return mask.to(dtype=to_dtype).masked_fill(mask, -float("inf")) + return torch.randint(0, 2, (dim0, dim1)).to(dtype=to_dtype) + + +def test_mask_for_xformers(): + # Additive Mask + m_float_add = torch.tensor([float("-inf"), 0]).to(torch.float) + m_float_add_flipped = torch.tensor([0, float("-inf")]).to(torch.float) + m_float16_add = torch.tensor([float("-inf"), 0]).to(torch.float16) + m_float16_add_flipped = torch.tensor([0, float("-inf")]).to(torch.float16) + m_uint = torch.tensor([1, 0]).to(torch.uint8) + m_uint_flipped = torch.tensor([0, 1]).to(torch.uint8) + m_bool = torch.tensor([False, True]) + + assert torch.equal(_mask_for_xformers(m_float_add), m_float_add) + assert torch.equal(_mask_for_xformers(m_float16_add), m_float16_add) + assert torch.equal(_mask_for_xformers(m_uint), m_uint_flipped) + assert torch.equal(_mask_for_xformers(m_bool), ~m_bool) + + assert torch.equal( + _mask_for_xformers(m_float_add, to_dtype=torch.float16), m_float16_add + ) + assert torch.equal( + _mask_for_xformers(m_float_add, to_dtype=torch.float), m_float_add + ) + assert torch.equal(_mask_for_xformers(m_float_add, to_dtype=torch.bool), m_bool) + assert torch.equal( + _mask_for_xformers(m_float_add, to_dtype=torch.uint8), m_uint_flipped + ) + + assert torch.equal( + _mask_for_xformers(m_float16_add, to_dtype=torch.float16), m_float16_add + ) + assert torch.equal( + _mask_for_xformers(m_float16_add, to_dtype=torch.float), m_float_add + ) + assert torch.equal(_mask_for_xformers(m_float16_add, to_dtype=torch.bool), m_bool) + assert torch.equal( + _mask_for_xformers(m_float16_add, to_dtype=torch.uint8), m_uint_flipped + ) + + assert torch.equal( + _mask_for_xformers(m_bool, to_dtype=torch.float16), m_float16_add_flipped + ) + assert torch.equal( + _mask_for_xformers(m_bool, to_dtype=torch.float), m_float_add_flipped + ) + assert torch.equal(_mask_for_xformers(m_bool, to_dtype=torch.bool), ~m_bool) + assert torch.equal(_mask_for_xformers(m_bool, to_dtype=torch.uint8), m_uint) + + assert torch.equal( + _mask_for_xformers(m_uint, to_dtype=torch.float16), m_float16_add + ) + assert torch.equal(_mask_for_xformers(m_uint, to_dtype=torch.float), m_float_add) + assert torch.equal(_mask_for_xformers(m_uint, to_dtype=torch.bool), m_bool) + assert torch.equal(_mask_for_xformers(m_uint, to_dtype=torch.uint8), m_uint_flipped) + + +@pytest.mark.skipif(not torch.cuda.is_available(), reason="blocksparse requires gpu") +@pytest.mark.skip(reason="not part of latest xformers") +@pytest.mark.parametrize("device", ["cuda"]) +@pytest.mark.parametrize("add_zero_attn", [False]) +@pytest.mark.parametrize("batch_size", [20]) +@pytest.mark.parametrize("embedding", [64]) +@pytest.mark.parametrize("seq_len", [64]) +@pytest.mark.parametrize("num_heads", [4]) +def test_xformers_blocksparse_parity( + device, + add_zero_attn, + batch_size, + embedding, + seq_len, + num_heads, +): + + xformers_att_config = '{"name": "scaled_dot_product"}' + xformers_blocksparse_blocksize = 16 + xformers_blocksparse_layout = torch.ones( + seq_len // xformers_blocksparse_blocksize, + seq_len // xformers_blocksparse_blocksize, + dtype=torch.int32, + ) + + q = torch.rand(seq_len, batch_size, embedding).to(device).half() + q.requires_grad = True + k = torch.rand(seq_len, batch_size, embedding).to(device).half() + k.requires_grad = True + v = torch.rand(seq_len, batch_size, embedding).to(device).half() + v.requires_grad = True + + q_ = q.detach().clone().half() + q_.requires_grad = True + k_ = k.detach().clone().half() + k_.requires_grad = True + v_ = v.detach().clone().half() + v_.requires_grad = True + + _reset_seeds() + xf_blocksparse_mha = ( + MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + add_zero_attn=add_zero_attn, + xformers_att_config=xformers_att_config, + xformers_blocksparse_layout=xformers_blocksparse_layout, + xformers_blocksparse_blocksize=xformers_blocksparse_blocksize, + ) + .to(device) + .half() + ) + + xf_blocksparse_output, _ = xf_blocksparse_mha( + q, + k, + v, + ) + + _reset_seeds() + xformers_mha = ( + MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + add_zero_attn=add_zero_attn, + xformers_att_config=xformers_att_config, + xformers_blocksparse_layout=None, + ) + .to(device) + .half() + ) + + xformers_output, _ = xformers_mha( + q_, + k_, + v_, + ) + + # # account for when nan != nan + rand = random.uniform(0, 1) + xformers_output = xformers_output.masked_fill(xformers_output.isnan(), rand) + xf_blocksparse_output = xf_blocksparse_output.masked_fill( + xf_blocksparse_output.isnan(), rand + ) + + assert_almost_equal(xformers_output, xf_blocksparse_output) + + loss_blocksparse = torch.norm(xformers_output) + loss_original = torch.norm(xf_blocksparse_output) + loss_blocksparse.backward() + loss_original.backward() + + q.masked_fill(q.isnan(), rand) + q_.masked_fill(q_.isnan(), rand) + k.masked_fill(k.isnan(), rand) + k_.masked_fill(k_.isnan(), rand) + v.masked_fill(v.isnan(), rand) + v_.masked_fill(v_.isnan(), rand) + + assert_almost_equal(q.grad, q_.grad) + assert_almost_equal(k.grad, k_.grad) + assert_almost_equal(v.grad, v_.grad) + + +@pytest.mark.parametrize("device", DEVICE) +@pytest.mark.parametrize("attn_dtype", ATTN_MASK_DTYPE) +@pytest.mark.parametrize("key_padding_dtype", KEY_PADDING_MASK_DTYPE) +@pytest.mark.parametrize("add_bias_kv", [True, False]) +@pytest.mark.parametrize("add_zero_attn", [True, False]) +# TODO: test with static_kv True +@pytest.mark.parametrize("static_kv", [False]) +@pytest.mark.parametrize("batch_size", BATCH) +@pytest.mark.parametrize("embedding", EMB) +@pytest.mark.parametrize("seq_len", SEQ) +@pytest.mark.parametrize("num_heads", HEADS) +def test_xformers_single_forward_parity( + device, + attn_dtype, + key_padding_dtype, + add_bias_kv, + add_zero_attn, + static_kv, + batch_size, + embedding, + seq_len, + num_heads, +): + + xformers_att_config = '{"name": "scaled_dot_product"}' + + attn_mask = ( + None + if attn_dtype is None + else _get_mask(to_dtype=attn_dtype, dim0=seq_len, dim1=seq_len).to(device) + ) + key_padding_mask = ( + None + if key_padding_dtype is None + else _get_mask(to_dtype=key_padding_dtype, dim0=batch_size, dim1=seq_len).to( + device + ) + ) + + q = torch.rand(seq_len, batch_size, embedding).to(device) + q.requires_grad = True + k = torch.rand(seq_len, batch_size, embedding).to(device) + k.requires_grad = True + v = torch.rand(seq_len, batch_size, embedding).to(device) + v.requires_grad = True + + q_ = q.detach().clone() + q_.requires_grad = True + k_ = k.detach().clone() + k_.requires_grad = True + v_ = v.detach().clone() + v_.requires_grad = True + + # TODO: dropouts in the two implementations lead to different entries dropped. + _reset_seeds() + xformers_mha = MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + xformers_att_config=xformers_att_config, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ).to(device) + xformers_output, _ = xformers_mha( + q, + k, + v, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + + _reset_seeds() + original_mha = MultiheadAttention( + embedding, + num_heads, + dropout=0.0, + xformers_att_config=None, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ).to(device) + original_output, _ = original_mha( + q_, + k_, + v_, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask, + static_kv=static_kv, + ) + + # account for when nan != nan + if xformers_output.isnan().any() or original_output.isnan().any(): + rand = random.uniform(0, 1) + xformers_output = xformers_output.masked_fill(xformers_output.isnan(), rand) + original_output = original_output.masked_fill(original_output.isnan(), rand) + + # torch.equal works for cpu, on cuda allclose is needed. + assert torch.allclose( + xformers_output, original_output, atol=1e-06 + ), f"max diff is {torch.max(torch.abs(xformers_output - original_output))}" + + loss_xformers = torch.norm(xformers_output) + loss_original = torch.norm(original_output) + loss_xformers.backward() + loss_original.backward() + + # torch.equal works for cpu, on cuda allclose is needed. + assert torch.allclose( + q.grad, q_.grad + ), f"max diff is {torch.max(torch.abs(q.grad - q_.grad))}" + assert torch.allclose( + k.grad, k_.grad + ), f"max diff is {torch.max(torch.abs(k.grad - k_.grad))}" + assert torch.allclose( + v.grad, v_.grad + ), f"max diff is {torch.max(torch.abs(v.grad - v_.grad))}" + + +def test_mask_padding_parity(): + def old_padding_code(key_padding_mask, attn_mask): + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask), + ], + dim=1, + ) + return key_padding_mask, attn_mask + + # values don't matter for this test. + mha = MultiheadAttention( + embed_dim=8, + num_heads=2, + dropout=0.0, + add_bias_kv=True, + add_zero_attn=True, + ) + + key_padding_mask = torch.rand((8, 64)) + attn_mask = torch.rand((64, 64)) + + kp_mask_orig, a_mask_orig = old_padding_code(key_padding_mask, attn_mask) + kp_mask_new, a_mask_new = mha._pad_masks(key_padding_mask, attn_mask) + + assert kp_mask_orig.size() == kp_mask_new.size() + assert a_mask_orig.size() == a_mask_new.size() + assert torch.equal(kp_mask_orig, kp_mask_new) + assert torch.equal(a_mask_orig, a_mask_new) + + +def test_add_bias_parity(): + # values don't matter for this test. + mha = MultiheadAttention( + embed_dim=8, + num_heads=2, + dropout=0.0, + add_bias_kv=True, + add_zero_attn=True, + ) + + def old_bias_code(k, v, key_padding_mask, attn_mask, bsz): + k = torch.cat([k, mha.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, mha.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + return k, v, key_padding_mask, attn_mask + + seq_len = 64 + bsz = 8 + embedding = 8 + key_padding_mask = torch.rand((bsz, seq_len)) + attn_mask = torch.rand((seq_len, seq_len)) + k = torch.rand((seq_len, bsz, embedding)) + v = torch.rand((seq_len, bsz, embedding)) + + k_orig, v_orig, kp_mask_orig, a_mask_orig = old_bias_code( + k, v, key_padding_mask, attn_mask, bsz + ) + k_new, v_new, kp_mask_new, a_mask_new = mha._add_bias( + k, v, key_padding_mask, attn_mask, bsz + ) + + assert torch.equal(k_orig, k_new) + assert torch.equal(v_orig, v_new) + assert torch.equal(kp_mask_orig, kp_mask_new) + assert torch.equal(a_mask_orig, a_mask_new) + + +class TestMultiheadAttention(unittest.TestCase): + def test_append_prev_key_padding_mask(self): + bsz = 1 + src_len = 4 + + cases = [ + # no padding mask + (None, None, None), + # current padding mask only + ( + torch.tensor([[1]]).bool(), + None, + torch.tensor([[0, 0, 0, 1]]).bool(), + ), + # previous padding mask only + ( + None, + torch.tensor([[0, 1, 0]]).bool(), + torch.tensor([[0, 1, 0, 0]]).bool(), + ), + # both padding masks + ( + torch.tensor([[1]]).bool(), + torch.tensor([[0, 1, 0]]).bool(), + torch.tensor([[0, 1, 0, 1]]).bool(), + ), + # prev_key_padding_mask already full + ( + torch.tensor([[0, 1, 0, 1]]).bool(), + None, + torch.tensor([[0, 1, 0, 1]]).bool(), + ), + # key_padding_mask already full + ( + None, + torch.tensor([[0, 1, 0, 1]]).bool(), + torch.tensor([[0, 1, 0, 1]]).bool(), + ), + ] + for c in cases: + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + c[0], + c[1], + batch_size=bsz, + src_len=src_len, + static_kv=False, + ) + + if key_padding_mask is not None: + self.assertTrue( + torch.all(torch.eq(key_padding_mask, c[2])), + f"Unexpected resultant key padding mask: {key_padding_mask}" + f" given current: {c[0]} and previous: {c[1]}", + ) + self.assertEqual(key_padding_mask.size(0), bsz) + self.assertEqual(key_padding_mask.size(1), src_len) + else: + self.assertIsNone(c[2]) + + def test_pruning_heads(self): + embed_dim = 768 + num_heads = 12 + num_heads_to_keep = 8 + dummy_input = torch.randn(32, 2, embed_dim) + mha = MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads) + reserve_head_index = mha._get_reserve_head_index( + num_heads_to_keep=num_heads_to_keep + ) + mha._adaptive_prune_heads(reserve_head_index=reserve_head_index) + mha._set_skip_embed_dim_check() + mha(query=dummy_input, key=dummy_input, value=dummy_input) + self.assertEqual(mha.head_dim, embed_dim / num_heads) + self.assertEqual(mha.num_heads, num_heads_to_keep) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_noising.py b/fairseq/tests/test_noising.py new file mode 100644 index 0000000..1956f6a --- /dev/null +++ b/fairseq/tests/test_noising.py @@ -0,0 +1,531 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from typing import Dict, List + +import torch + +import tests.utils as test_utils +from fairseq import utils +from fairseq.data import ( + Dictionary, + LanguagePairDataset, + TransformEosDataset, + data_utils, + noising, +) + + +class TestDataNoising(unittest.TestCase): + def _get_test_data_with_bpe_cont_marker(self, append_eos=True): + """ + Args: + append_eos: if True, each input sentence in the source tokens tensor + will have an EOS appended to the end. + + Returns: + vocabs: BPE vocab with continuation markers as suffixes to denote + non-end of word tokens. This is the standard BPE format used in + fairseq's preprocessing. + x: input tensor containing numberized source tokens, with EOS at the + end if append_eos is true + src_lengths: and source lengths. + """ + vocab = Dictionary() + vocab.add_symbol("he@@") + vocab.add_symbol("llo") + vocab.add_symbol("how") + vocab.add_symbol("are") + vocab.add_symbol("y@@") + vocab.add_symbol("ou") + vocab.add_symbol("n@@") + vocab.add_symbol("ew") + vocab.add_symbol("or@@") + vocab.add_symbol("k") + + src_tokens = [ + ["he@@", "llo", "n@@", "ew", "y@@", "or@@", "k"], + ["how", "are", "y@@", "ou"], + ] + x, src_lengths = x, src_lengths = self._convert_src_tokens_to_tensor( + vocab=vocab, src_tokens=src_tokens, append_eos=append_eos + ) + return vocab, x, src_lengths + + def _get_test_data_with_bpe_end_marker(self, append_eos=True): + """ + Args: + append_eos: if True, each input sentence in the source tokens tensor + will have an EOS appended to the end. + + Returns: + vocabs: BPE vocab with end-of-word markers as suffixes to denote + tokens at the end of a word. This is an alternative to fairseq's + standard preprocessing framework and is not generally supported + within fairseq. + x: input tensor containing numberized source tokens, with EOS at the + end if append_eos is true + src_lengths: and source lengths. + """ + vocab = Dictionary() + vocab.add_symbol("he") + vocab.add_symbol("llo_EOW") + vocab.add_symbol("how_EOW") + vocab.add_symbol("are_EOW") + vocab.add_symbol("y") + vocab.add_symbol("ou_EOW") + vocab.add_symbol("n") + vocab.add_symbol("ew_EOW") + vocab.add_symbol("or") + vocab.add_symbol("k_EOW") + + src_tokens = [ + ["he", "llo_EOW", "n", "ew_EOW", "y", "or", "k_EOW"], + ["how_EOW", "are_EOW", "y", "ou_EOW"], + ] + x, src_lengths = x, src_lengths = self._convert_src_tokens_to_tensor( + vocab=vocab, src_tokens=src_tokens, append_eos=append_eos + ) + return vocab, x, src_lengths + + def _get_test_data_with_word_vocab(self, append_eos=True): + """ + Args: + append_eos: if True, each input sentence in the source tokens tensor + will have an EOS appended to the end. + + Returns: + vocabs: word vocab + x: input tensor containing numberized source tokens, with EOS at the + end if append_eos is true + src_lengths: and source lengths. + """ + vocab = Dictionary() + + vocab.add_symbol("hello") + vocab.add_symbol("how") + vocab.add_symbol("are") + vocab.add_symbol("you") + vocab.add_symbol("new") + vocab.add_symbol("york") + src_tokens = [ + ["hello", "new", "york", "you"], + ["how", "are", "you", "new", "york"], + ] + x, src_lengths = self._convert_src_tokens_to_tensor( + vocab=vocab, src_tokens=src_tokens, append_eos=append_eos + ) + return vocab, x, src_lengths + + def _convert_src_tokens_to_tensor( + self, vocab: Dictionary, src_tokens: List[List[str]], append_eos: bool + ): + src_len = [len(x) for x in src_tokens] + # If we have to append EOS, we include EOS in counting src length + if append_eos: + src_len = [length + 1 for length in src_len] + + x = torch.LongTensor(len(src_tokens), max(src_len)).fill_(vocab.pad()) + for i in range(len(src_tokens)): + for j in range(len(src_tokens[i])): + x[i][j] = vocab.index(src_tokens[i][j]) + if append_eos: + x[i][j + 1] = vocab.eos() + + x = x.transpose(1, 0) + return x, torch.LongTensor(src_len) + + def assert_eos_at_end(self, x, x_len, eos): + """Asserts last token of every sentence in x is EOS""" + for i in range(len(x_len)): + self.assertEqual( + x[x_len[i] - 1][i], + eos, + ( + "Expected eos (token id {eos}) at the end of sentence {i} " + "but got {other} instead" + ).format(i=i, eos=eos, other=x[i][-1]), + ) + + def assert_word_dropout_correct(self, x, x_noised, x_len, l_noised): + # Expect only the first word (2 bpe tokens) of the first example + # was dropped out + self.assertEqual(x_len[0] - 2, l_noised[0]) + for i in range(l_noised[0]): + self.assertEqual(x_noised[i][0], x[i + 2][0]) + + def test_word_dropout_with_eos(self): + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True) + + with data_utils.numpy_seed(1234): + noising_gen = noising.WordDropout(vocab) + x_noised, l_noised = noising_gen.noising(x, x_len, 0.2) + self.assert_word_dropout_correct( + x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised + ) + self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos()) + + def assert_word_blanking_correct(self, x, x_noised, x_len, l_noised, unk): + # Expect only the first word (2 bpe tokens) of the first example + # was blanked out + self.assertEqual(x_len[0], l_noised[0]) + for i in range(l_noised[0]): + if i < 2: + self.assertEqual(x_noised[i][0], unk) + else: + self.assertEqual(x_noised[i][0], x[i][0]) + + def test_word_blank_with_eos(self): + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True) + + with data_utils.numpy_seed(1234): + noising_gen = noising.WordDropout(vocab) + x_noised, l_noised = noising_gen.noising(x, x_len, 0.2, vocab.unk()) + self.assert_word_blanking_correct( + x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised, unk=vocab.unk() + ) + self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos()) + + def generate_unchanged_shuffle_map(self, length): + return {i: i for i in range(length)} + + def assert_word_shuffle_matches_expected( + self, + x, + x_len, + max_shuffle_distance: int, + vocab: Dictionary, + expected_shufle_maps: List[Dict[int, int]], + expect_eos_at_end: bool, + bpe_end_marker=None, + ): + """ + This verifies that with a given x, x_len, max_shuffle_distance, and + vocab, we get the expected shuffle result. + + Args: + x: Tensor of shape (T x B) = (sequence_length, batch_size) + x_len: Tensor of length B = batch_size + max_shuffle_distance: arg to pass to noising + expected_shuffle_maps: List[mapping] where mapping is a + Dict[old_index, new_index], mapping x's elements from their + old positions in x to their new positions in x. + expect_eos_at_end: if True, check the output to make sure there is + an EOS at the end. + bpe_end_marker: str denoting the BPE end token. If this is not None, we + set the BPE cont token to None in the noising classes. + """ + bpe_cont_marker = None + if bpe_end_marker is None: + bpe_cont_marker = "@@" + + with data_utils.numpy_seed(1234): + word_shuffle = noising.WordShuffle( + vocab, bpe_cont_marker=bpe_cont_marker, bpe_end_marker=bpe_end_marker + ) + x_noised, l_noised = word_shuffle.noising( + x, x_len, max_shuffle_distance=max_shuffle_distance + ) + + # For every example, we have a different expected shuffle map. We check + # that each example is shuffled as expected according to each + # corresponding shuffle map. + for i in range(len(expected_shufle_maps)): + shuffle_map = expected_shufle_maps[i] + for k, v in shuffle_map.items(): + self.assertEqual(x[k][i], x_noised[v][i]) + + # Shuffling should not affect the length of each example + for pre_shuffle_length, post_shuffle_length in zip(x_len, l_noised): + self.assertEqual(pre_shuffle_length, post_shuffle_length) + if expect_eos_at_end: + self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos()) + + def test_word_shuffle_with_eos(self): + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True) + + # Assert word shuffle with max shuffle distance 0 causes input to be + # unchanged + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + max_shuffle_distance=0, + vocab=vocab, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(example_len) + for example_len in x_len + ], + expect_eos_at_end=True, + ) + + # Assert word shuffle with max shuffle distance 3 matches our expected + # shuffle order + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + vocab=vocab, + max_shuffle_distance=3, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(x_len[0]), + {0: 0, 1: 3, 2: 1, 3: 2}, + ], + expect_eos_at_end=True, + ) + + def test_word_shuffle_with_eos_nonbpe(self): + """The purpose of this is to test shuffling logic with word vocabs""" + vocab, x, x_len = self._get_test_data_with_word_vocab(append_eos=True) + + # Assert word shuffle with max shuffle distance 0 causes input to be + # unchanged + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + max_shuffle_distance=0, + vocab=vocab, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(example_len) + for example_len in x_len + ], + expect_eos_at_end=True, + ) + + # Assert word shuffle with max shuffle distance 3 matches our expected + # shuffle order + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + vocab=vocab, + max_shuffle_distance=3, + expected_shufle_maps=[ + {0: 0, 1: 1, 2: 3, 3: 2}, + {0: 0, 1: 2, 2: 1, 3: 3, 4: 4}, + ], + expect_eos_at_end=True, + ) + + def test_word_shuffle_without_eos(self): + """Same result as word shuffle with eos except no EOS at end""" + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False) + + # Assert word shuffle with max shuffle distance 0 causes input to be + # unchanged + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + max_shuffle_distance=0, + vocab=vocab, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(example_len) + for example_len in x_len + ], + expect_eos_at_end=False, + ) + + # Assert word shuffle with max shuffle distance 3 matches our expected + # shuffle order + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + vocab=vocab, + max_shuffle_distance=3, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(x_len[0]), + {0: 0, 1: 3, 2: 1, 3: 2}, + ], + expect_eos_at_end=False, + ) + + def test_word_shuffle_without_eos_with_bpe_end_marker(self): + """Same result as word shuffle without eos except using BPE end token""" + vocab, x, x_len = self._get_test_data_with_bpe_end_marker(append_eos=False) + + # Assert word shuffle with max shuffle distance 0 causes input to be + # unchanged + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + max_shuffle_distance=0, + vocab=vocab, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(example_len) + for example_len in x_len + ], + expect_eos_at_end=False, + bpe_end_marker="_EOW", + ) + + # Assert word shuffle with max shuffle distance 3 matches our expected + # shuffle order + self.assert_word_shuffle_matches_expected( + x=x, + x_len=x_len, + vocab=vocab, + max_shuffle_distance=3, + expected_shufle_maps=[ + self.generate_unchanged_shuffle_map(x_len[0]), + {0: 0, 1: 3, 2: 1, 3: 2}, + ], + expect_eos_at_end=False, + bpe_end_marker="_EOW", + ) + + def assert_no_eos_at_end(self, x, x_len, eos): + """Asserts that the last token of each sentence in x is not EOS""" + for i in range(len(x_len)): + self.assertNotEqual( + x[x_len[i] - 1][i], + eos, + "Expected no eos (token id {eos}) at the end of sentence {i}.".format( + eos=eos, i=i + ), + ) + + def test_word_dropout_without_eos(self): + """Same result as word dropout with eos except no EOS at end""" + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False) + + with data_utils.numpy_seed(1234): + noising_gen = noising.WordDropout(vocab) + x_noised, l_noised = noising_gen.noising(x, x_len, 0.2) + self.assert_word_dropout_correct( + x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised + ) + self.assert_no_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos()) + + def test_word_blank_without_eos(self): + """Same result as word blank with eos except no EOS at end""" + vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False) + + with data_utils.numpy_seed(1234): + noising_gen = noising.WordDropout(vocab) + x_noised, l_noised = noising_gen.noising(x, x_len, 0.2, vocab.unk()) + self.assert_word_blanking_correct( + x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised, unk=vocab.unk() + ) + self.assert_no_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos()) + + def _get_noising_dataset_batch( + self, + src_tokens_no_pad, + src_dict, + append_eos_to_tgt=False, + ): + """ + Constructs a NoisingDataset and the corresponding + ``LanguagePairDataset(NoisingDataset(src), src)``. If + *append_eos_to_tgt* is True, wrap the source dataset in + :class:`TransformEosDataset` to append EOS to the clean source when + using it as the target. + """ + src_dataset = test_utils.TestDataset(data=src_tokens_no_pad) + + noising_dataset = noising.NoisingDataset( + src_dataset=src_dataset, + src_dict=src_dict, + seed=1234, + max_word_shuffle_distance=3, + word_dropout_prob=0.2, + word_blanking_prob=0.2, + noising_class=noising.UnsupervisedMTNoising, + ) + tgt = src_dataset + language_pair_dataset = LanguagePairDataset( + src=noising_dataset, tgt=tgt, src_sizes=None, src_dict=src_dict + ) + language_pair_dataset = TransformEosDataset( + language_pair_dataset, + src_dict.eos(), + append_eos_to_tgt=append_eos_to_tgt, + ) + + dataloader = torch.utils.data.DataLoader( + dataset=language_pair_dataset, + batch_size=2, + collate_fn=language_pair_dataset.collater, + ) + denoising_batch_result = next(iter(dataloader)) + return denoising_batch_result + + def test_noising_dataset_with_eos(self): + src_dict, src_tokens, _ = self._get_test_data_with_bpe_cont_marker( + append_eos=True + ) + + # Format data for src_dataset + src_tokens = torch.t(src_tokens) + src_tokens_no_pad = [] + for src_sentence in src_tokens: + src_tokens_no_pad.append( + utils.strip_pad(tensor=src_sentence, pad=src_dict.pad()) + ) + denoising_batch_result = self._get_noising_dataset_batch( + src_tokens_no_pad=src_tokens_no_pad, src_dict=src_dict + ) + + eos, pad = src_dict.eos(), src_dict.pad() + + # Generated noisy source as source + expected_src = torch.LongTensor( + [[4, 5, 10, 11, 8, 12, 13, eos], [pad, pad, pad, 6, 8, 9, 7, eos]] + ) + # Original clean source as target (right-padded) + expected_tgt = torch.LongTensor( + [[4, 5, 10, 11, 8, 12, 13, eos], [6, 7, 8, 9, eos, pad, pad, pad]] + ) + generated_src = denoising_batch_result["net_input"]["src_tokens"] + tgt_tokens = denoising_batch_result["target"] + + self.assertTensorEqual(expected_src, generated_src) + self.assertTensorEqual(expected_tgt, tgt_tokens) + + def test_noising_dataset_without_eos(self): + """ + Similar to test noising dataset with eos except that we have to set + *append_eos_to_tgt* to ``True``. + """ + + src_dict, src_tokens, _ = self._get_test_data_with_bpe_cont_marker( + append_eos=False + ) + + # Format data for src_dataset + src_tokens = torch.t(src_tokens) + src_tokens_no_pad = [] + for src_sentence in src_tokens: + src_tokens_no_pad.append( + utils.strip_pad(tensor=src_sentence, pad=src_dict.pad()) + ) + denoising_batch_result = self._get_noising_dataset_batch( + src_tokens_no_pad=src_tokens_no_pad, + src_dict=src_dict, + append_eos_to_tgt=True, + ) + + eos, pad = src_dict.eos(), src_dict.pad() + + # Generated noisy source as source + expected_src = torch.LongTensor( + [[4, 5, 10, 11, 8, 12, 13], [pad, pad, pad, 6, 8, 9, 7]] + ) + # Original clean source as target (right-padded) + expected_tgt = torch.LongTensor( + [[4, 5, 10, 11, 8, 12, 13, eos], [6, 7, 8, 9, eos, pad, pad, pad]] + ) + + generated_src = denoising_batch_result["net_input"]["src_tokens"] + tgt_tokens = denoising_batch_result["target"] + + self.assertTensorEqual(expected_src, generated_src) + self.assertTensorEqual(expected_tgt, tgt_tokens) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_online_backtranslation.py b/fairseq/tests/test_online_backtranslation.py new file mode 100644 index 0000000..0ae7e77 --- /dev/null +++ b/fairseq/tests/test_online_backtranslation.py @@ -0,0 +1,206 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import tempfile +import unittest +from pathlib import Path +from typing import Any, Dict, Sequence + +import fairseq.data.indexed_dataset as indexed_dataset +import fairseq.options +import fairseq.tasks.online_backtranslation as obt +import torch +from tests import utils + + +def mk_sample(tokens: Sequence[int], batch_size: int = 2) -> Dict[str, Any]: + batch = torch.stack([torch.tensor(tokens, dtype=torch.long)] * batch_size) + sample = { + "net_input": { + "src_tokens": batch, + "prev_output_tokens": batch, + "src_lengths": torch.tensor([len(tokens)] * batch_size, dtype=torch.long), + }, + "target": batch[:, 1:], + } + return sample + + +def mk_dataset(num_samples: int, max_len: int, output: Path): + output.parent.mkdir(exist_ok=True) + idx = indexed_dataset.IndexedDatasetBuilder(str(output)) + data = torch.randint(5, 100, (num_samples, max_len)) + lengths = torch.randint(3, max_len, (num_samples,)) + for d, l in zip(data, lengths): + d[0] = 0 + idx.add_item(d[:l]) + idx.finalize(output.with_suffix(".idx")) + assert output.exists() + assert output.with_suffix(".idx").exists() + + +class OnlineBacktranslationTest(unittest.TestCase): + + tmp_dir = Path(tempfile.mkdtemp(suffix="OnlineBacktranslationTest")) + + @classmethod + def obt_task( + cls, languages: Sequence[str], data: Path = None, language_mapping: str = None + ): + dict_path = cls.tmp_dir / "dict.txt" + if not dict_path.exists(): + dictionary = utils.dummy_dictionary(100) + dictionary.save(str(dict_path)) + + if data is not None: + (data / "dict.txt").write_text(dict_path.read_text()) + else: + data = cls.tmp_dir + assert len(languages) >= 2 + + kwargs = { + "arch": "transformer", + # --max-sentences=1 for better predictability of batches + "max_sentences": 1, + # Use characteristics dimensions + "encoder_layers": 3, + "encoder_embed_dim": 12, + "encoder_ffn_embed_dim": 14, + "encoder_attention_heads": 4, + "decoder_layers": 3, + "decoder_embed_dim": 12, + "decoder_output_dim": 12, + "decoder_ffn_embed_dim": 14, + "decoder_attention_heads": 4, + # Disable dropout so we have comparable tests. + "dropout": 0, + "attention_dropout": 0, + "activation_dropout": 0, + "encoder_layerdrop": 0, + } + + args = fairseq.options.get_args( + data, + task="online_backtranslation", + mono_langs=",".join(languages), + valid_lang_pairs=f"{languages[0]}-{languages[1]}", + tokens_per_sample=256, + language_mapping=language_mapping, + **kwargs, + ) + task = obt.OnlineBackTranslationTask.setup_task(args) + # we need to build the model to have the correct dictionary + model = task.build_model(task.args) + return task, model + + def tmp_path(self, test_case: str) -> Path: + return Path(tempfile.mkdtemp(test_case, dir=self.tmp_dir)) + + def test_lang_tokens(self): + task, model = self.obt_task(["en", "ro", "zh"]) + assert obt._lang_token("en") in task.dictionary + assert obt._lang_token("ro") in task.dictionary + assert obt._lang_token("zh") in task.dictionary + + en_bos = obt._lang_token_index(task.common_dict, "en") + assert "en" == task.common_dict[en_bos].strip("_") + zh_bos = obt._lang_token_index(task.common_dict, "zh") + assert "zh" == task.common_dict[zh_bos].strip("_") + zh_sample = mk_sample([zh_bos, 16, 14, 12, 10]) + + # we expect to receive the bos token for translation + assert task.get_bos_token_from_sample(zh_sample) == en_bos + + def test_backtranslate_sample(self): + task, model = self.obt_task(["en", "ro", "zh"]) + + en_bos = obt._lang_token_index(task.common_dict, "en") + zh_bos = obt._lang_token_index(task.common_dict, "zh") + sample = mk_sample([zh_bos, 16, 14, 12, 10]) + + task.backtranslate_sample(sample, "zh", "en") + target_zh = list(sample["target"][0]) + assert target_zh == [16, 14, 12, 10] # original zh sentence + generated_en = sample["net_input"]["src_tokens"][0] + assert generated_en[0] == en_bos + + def test_train_dataset(self): + data = self.tmp_path("test_train_dataset") + mk_dataset(20, 10, data / "en" / "train.bin") + mk_dataset(10, 10, data / "zh" / "train.bin") + task, model = self.obt_task(["en", "zh"], data) + task.load_dataset("train") + + en_bos = obt._lang_token_index(task.common_dict, "en") + zh_bos = obt._lang_token_index(task.common_dict, "zh") + + train = task.datasets["train"] + train.ordered_indices() + train.prefetch([0, 19]) + sample_0 = train[0] + sample_19 = train[19] + self.assertEqual( + set(sample_0.keys()), {"en-BT", "en-DENOISE", "zh-BT", "zh-DENOISE"} + ) + for sample in (sample_0, sample_19): + self.assertEqual(sample["en-BT"]["source"][0], en_bos) + # bt target isn't ready to look at. + self.assertEqual(sample["en-DENOISE"]["source"][0], en_bos) + # TODO What could we check on the target side ? + + for i in range(10): + # Zh dataset is shorter, and is wrapped around En dataset. + train.prefetch([i, i + 10]) + self.assertEqual( + list(train[i]["zh-DENOISE"]["source"]), + list(train[i + 10]["zh-DENOISE"]["source"]), + ) + self.assertEqual(train[i]["zh-DENOISE"]["source"][0].item(), zh_bos) + + # Sorted by increasing len + self.assertLess( + len(sample_0["en-BT"]["source"]), len(sample_19["en-BT"]["source"]) + ) + + def test_valid_dataset(self): + data = self.tmp_path("test_valid_dataset") + mk_dataset(10, 21, data / "valid.en-zh.en.bin") + mk_dataset(10, 21, data / "valid.en-zh.zh.bin") + + task, model = self.obt_task(["en", "zh"], data) + valid = task.load_dataset("valid") + en_bos = obt._lang_token_index(task.common_dict, "en") + + assert valid is not None + valid.prefetch(range(10)) + sample_0 = valid[0] + sample_9 = valid[9] + self.assertEqual(sample_0["id"], 0) + self.assertEqual(sample_9["id"], 9) + self.assertEqual(sample_0["source"][0], en_bos) + self.assertEqual(sample_9["source"][0], en_bos) + # TODO: could we test the target side ? + + def assertFnMatch(self, fn, values): + for x, y in values.items(): + fn_x = fn(x) + self.assertEqual(fn_x, y, f"Fn has wrong value: fn({x}) = {fn_x} != {y}") + + def test_piecewise_linear_fn(self): + self.assertFnMatch( + obt.PiecewiseLinearFn.from_string("1.0"), {0: 1, 100: 1, 500: 1, 1000: 1} + ) + self.assertFnMatch( + obt.PiecewiseLinearFn.from_string("0:1,1000:0"), + {0: 1, 500: 0.5, 1000: 0, 2000: 0}, + ) + self.assertFnMatch( + obt.PiecewiseLinearFn.from_string("0:0,1000:1"), + {0: 0, 500: 0.5, 1000: 1, 2000: 1}, + ) + self.assertFnMatch( + obt.PiecewiseLinearFn.from_string("0:0,1000:1,2000:0"), + {0: 0, 500: 0.5, 1000: 1, 1500: 0.5, 2000: 0, 3000: 0}, + ) diff --git a/fairseq/tests/test_plasma_utils.py b/fairseq/tests/test_plasma_utils.py new file mode 100644 index 0000000..7286c6c --- /dev/null +++ b/fairseq/tests/test_plasma_utils.py @@ -0,0 +1,127 @@ +import contextlib +import tempfile +import unittest +from io import StringIO + +import numpy as np + +from tests.utils import create_dummy_data, preprocess_lm_data, train_language_model + +try: + from pyarrow import plasma + + from fairseq.data.plasma_utils import PlasmaStore, PlasmaView + + PYARROW_AVAILABLE = True +except ImportError: + PYARROW_AVAILABLE = False + +dummy_path = "dummy" + + +@unittest.skipUnless(PYARROW_AVAILABLE, "") +class TestPlasmaView(unittest.TestCase): + def setUp(self) -> None: + self.tmp_file = tempfile.NamedTemporaryFile() # noqa: P201 + self.path = self.tmp_file.name + self.server = PlasmaStore.start(path=self.path, nbytes=10000) + self.client = plasma.connect(self.path, num_retries=10) + + def tearDown(self) -> None: + self.client.disconnect() + self.tmp_file.close() + self.server.kill() + + def test_two_servers_do_not_share_object_id_space(self): + data_server_1 = np.array([0, 1]) + data_server_2 = np.array([2, 3]) + server_2_path = self.path + with tempfile.NamedTemporaryFile() as server_1_path: + server = PlasmaStore.start(path=server_1_path.name, nbytes=10000) + arr1 = PlasmaView( + data_server_1, dummy_path, 1, plasma_path=server_1_path.name + ) + assert len(arr1.client.list()) == 1 + assert (arr1.array == data_server_1).all() + arr2 = PlasmaView(data_server_2, dummy_path, 1, plasma_path=server_2_path) + assert (arr2.array == data_server_2).all() + assert (arr1.array == data_server_1).all() + server.kill() + + def test_hash_collision(self): + data_server_1 = np.array([0, 1]) + data_server_2 = np.array([2, 3]) + arr1 = PlasmaView(data_server_1, dummy_path, 1, plasma_path=self.path) + assert len(arr1.client.list()) == 1 + arr2 = PlasmaView(data_server_2, dummy_path, 1, plasma_path=self.path) + assert len(arr1.client.list()) == 1 + assert len(arr2.client.list()) == 1 + assert (arr2.array == data_server_1).all() + # New hash key based on tuples + arr3 = PlasmaView( + data_server_2, dummy_path, (1, 12312312312, None), plasma_path=self.path + ) + assert ( + len(arr2.client.list()) == 2 + ), "No new object was created by using a novel hash key" + assert ( + arr3.object_id in arr2.client.list() + ), "No new object was created by using a novel hash key" + assert ( + arr3.object_id in arr3.client.list() + ), "No new object was created by using a novel hash key" + del arr3, arr2, arr1 + + @staticmethod + def _assert_view_equal(pv1, pv2): + np.testing.assert_array_equal(pv1.array, pv2.array) + + def test_putting_same_array_twice(self): + data = np.array([4, 4, 4]) + arr1 = PlasmaView(data, dummy_path, 1, plasma_path=self.path) + assert len(self.client.list()) == 1 + arr1b = PlasmaView( + data, dummy_path, 1, plasma_path=self.path + ) # should not change contents of store + arr1c = PlasmaView( + None, dummy_path, 1, plasma_path=self.path + ) # should not change contents of store + + assert len(self.client.list()) == 1 + self._assert_view_equal(arr1, arr1b) + self._assert_view_equal(arr1, arr1c) + PlasmaView( + data, dummy_path, 2, plasma_path=self.path + ) # new object id, adds new entry + assert len(self.client.list()) == 2 + + new_client = plasma.connect(self.path) + assert len(new_client.list()) == 2 # new client can access same objects + assert isinstance(arr1.object_id, plasma.ObjectID) + del arr1b + del arr1c + + def test_plasma_store_full_raises(self): + with tempfile.NamedTemporaryFile() as new_path: + server = PlasmaStore.start(path=new_path.name, nbytes=10000) + with self.assertRaises(plasma.PlasmaStoreFull): + # 2000 floats is more than 2000 bytes + PlasmaView( + np.random.rand(10000, 1), dummy_path, 1, plasma_path=new_path.name + ) + server.kill() + + def test_object_id_overflow(self): + PlasmaView.get_object_id("", 2**21) + + def test_training_lm_plasma(self): + with contextlib.redirect_stdout(StringIO()): + with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: + create_dummy_data(data_dir) + preprocess_lm_data(data_dir) + train_language_model( + data_dir, + "transformer_lm", + ["--use-plasma-view", "--plasma-path", self.path], + run_validation=True, + ) diff --git a/fairseq/tests/test_positional_encoding.py b/fairseq/tests/test_positional_encoding.py new file mode 100644 index 0000000..4e38c43 --- /dev/null +++ b/fairseq/tests/test_positional_encoding.py @@ -0,0 +1,63 @@ +import unittest + +import torch +from fairseq.modules import RelPositionalEncoding +import numpy as np + + +class TestRelPositionalEncoding(unittest.TestCase): + def setUp(self) -> None: + self.T = 3 + self.B = 1 + self.C = 2 + torch.manual_seed(0) + self.sample = torch.randn(self.T, self.B, self.C) # TBC + self.rel_pos_enc = RelPositionalEncoding(max_len=4, d_model=self.C) + + def test_extend_pe(self): + inp = self.sample.transpose(0, 1) + self.rel_pos_enc.extend_pe(inp) + expected_pe = torch.tensor( + [ + [ + [0.1411, -0.9900], + [0.9093, -0.4161], + [0.8415, 0.5403], + [0.0000, 1.0000], + [-0.8415, 0.5403], + [-0.9093, -0.4161], + [-0.1411, -0.9900], + ] + ] + ) + + self.assertTrue( + np.allclose( + expected_pe.cpu().detach().numpy(), + self.rel_pos_enc.pe.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_forward(self): + pos_enc = self.rel_pos_enc(self.sample) + expected_pos_enc = torch.tensor( + [ + [[0.9093, -0.4161]], + [[0.8415, 0.5403]], + [[0.0000, 1.0000]], + [[-0.8415, 0.5403]], + [[-0.9093, -0.4161]], + ] + ) + self.assertTrue( + np.allclose( + pos_enc.cpu().detach().numpy(), + expected_pos_enc.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_reproducibility.py b/fairseq/tests/test_reproducibility.py new file mode 100644 index 0000000..b285593 --- /dev/null +++ b/fairseq/tests/test_reproducibility.py @@ -0,0 +1,148 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +import tempfile +import unittest + +import torch + +from . import test_binaries + + +class TestReproducibility(unittest.TestCase): + def _test_reproducibility( + self, + name, + extra_flags=None, + delta=0.0001, + resume_checkpoint="checkpoint1.pt", + max_epoch=3, + ): + def get_last_log_stats_containing_string(log_records, search_string): + for log_record in logs.records[::-1]: + if isinstance(log_record.msg, str) and search_string in log_record.msg: + return json.loads(log_record.msg) + + if extra_flags is None: + extra_flags = [] + + with tempfile.TemporaryDirectory(name) as data_dir: + with self.assertLogs() as logs: + test_binaries.create_dummy_data(data_dir) + test_binaries.preprocess_translation_data(data_dir) + + # train epochs 1 and 2 together + with self.assertLogs() as logs: + test_binaries.train_translation_model( + data_dir, + "fconv_iwslt_de_en", + [ + "--dropout", + "0.0", + "--log-format", + "json", + "--log-interval", + "1", + "--max-epoch", + str(max_epoch), + ] + + extra_flags, + ) + train_log = get_last_log_stats_containing_string(logs.records, "train_loss") + valid_log = get_last_log_stats_containing_string(logs.records, "valid_loss") + + # train epoch 2, resuming from previous checkpoint 1 + os.rename( + os.path.join(data_dir, resume_checkpoint), + os.path.join(data_dir, "checkpoint_last.pt"), + ) + with self.assertLogs() as logs: + test_binaries.train_translation_model( + data_dir, + "fconv_iwslt_de_en", + [ + "--dropout", + "0.0", + "--log-format", + "json", + "--log-interval", + "1", + "--max-epoch", + str(max_epoch), + ] + + extra_flags, + ) + train_res_log = get_last_log_stats_containing_string( + logs.records, "train_loss" + ) + valid_res_log = get_last_log_stats_containing_string( + logs.records, "valid_loss" + ) + + for k in ["train_loss", "train_ppl", "train_num_updates", "train_gnorm"]: + self.assertAlmostEqual( + float(train_log[k]), float(train_res_log[k]), delta=delta + ) + for k in [ + "valid_loss", + "valid_ppl", + "valid_num_updates", + "valid_best_loss", + ]: + self.assertAlmostEqual( + float(valid_log[k]), float(valid_res_log[k]), delta=delta + ) + + def test_reproducibility(self): + self._test_reproducibility("test_reproducibility") + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_reproducibility_fp16(self): + self._test_reproducibility( + "test_reproducibility_fp16", + [ + "--fp16", + "--fp16-init-scale", + "4096", + ], + delta=0.011, + ) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_reproducibility_memory_efficient_fp16(self): + self._test_reproducibility( + "test_reproducibility_memory_efficient_fp16", + [ + "--memory-efficient-fp16", + "--fp16-init-scale", + "4096", + ], + ) + + @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU") + def test_reproducibility_amp(self): + self._test_reproducibility( + "test_reproducibility_amp", + [ + "--amp", + "--fp16-init-scale", + "4096", + ], + delta=0.011, + ) + + def test_mid_epoch_reproducibility(self): + self._test_reproducibility( + "test_mid_epoch_reproducibility", + ["--save-interval-updates", "3"], + resume_checkpoint="checkpoint_1_3.pt", + max_epoch=1, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_resampling_dataset.py b/fairseq/tests/test_resampling_dataset.py new file mode 100644 index 0000000..ccb53a2 --- /dev/null +++ b/fairseq/tests/test_resampling_dataset.py @@ -0,0 +1,103 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import collections +import unittest + +import numpy as np +from fairseq.data import ListDataset, ResamplingDataset + + +class TestResamplingDataset(unittest.TestCase): + def setUp(self): + self.strings = ["ab", "c", "def", "ghij"] + self.weights = [4.0, 2.0, 7.0, 1.5] + self.size_ratio = 2 + self.dataset = ListDataset( + self.strings, np.array([len(s) for s in self.strings]) + ) + + def _test_common(self, resampling_dataset, iters): + assert len(self.dataset) == len(self.strings) == len(self.weights) + assert len(resampling_dataset) == self.size_ratio * len(self.strings) + + results = {"ordered_by_size": True, "max_distribution_diff": 0.0} + + totalfreqs = 0 + freqs = collections.defaultdict(int) + + for epoch_num in range(iters): + resampling_dataset.set_epoch(epoch_num) + + indices = resampling_dataset.ordered_indices() + assert len(indices) == len(resampling_dataset) + + prev_size = -1 + + for i in indices: + cur_size = resampling_dataset.size(i) + # Make sure indices map to same sequences within an epoch + assert resampling_dataset[i] == resampling_dataset[i] + + # Make sure length of sequence is correct + assert cur_size == len(resampling_dataset[i]) + + freqs[resampling_dataset[i]] += 1 + totalfreqs += 1 + + if prev_size > cur_size: + results["ordered_by_size"] = False + + prev_size = cur_size + + assert set(freqs.keys()) == set(self.strings) + for s, weight in zip(self.strings, self.weights): + freq = freqs[s] / totalfreqs + expected_freq = weight / sum(self.weights) + results["max_distribution_diff"] = max( + results["max_distribution_diff"], abs(expected_freq - freq) + ) + + return results + + def test_resampling_dataset_batch_by_size_false(self): + resampling_dataset = ResamplingDataset( + self.dataset, + self.weights, + size_ratio=self.size_ratio, + batch_by_size=False, + seed=0, + ) + + results = self._test_common(resampling_dataset, iters=1000) + + # For batch_by_size = False, the batches should be returned in + # arbitrary order of size. + assert not results["ordered_by_size"] + + # Allow tolerance in distribution error of 2%. + assert results["max_distribution_diff"] < 0.02 + + def test_resampling_dataset_batch_by_size_true(self): + resampling_dataset = ResamplingDataset( + self.dataset, + self.weights, + size_ratio=self.size_ratio, + batch_by_size=True, + seed=0, + ) + + results = self._test_common(resampling_dataset, iters=1000) + + # For batch_by_size = True, the batches should be returned in + # increasing order of size. + assert results["ordered_by_size"] + + # Allow tolerance in distribution error of 2%. + assert results["max_distribution_diff"] < 0.02 + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_roberta.py b/fairseq/tests/test_roberta.py new file mode 100644 index 0000000..14f01f9 --- /dev/null +++ b/fairseq/tests/test_roberta.py @@ -0,0 +1,344 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +import unittest +from typing import Any, Dict, Sequence + +import fairseq +import fairseq.options +import fairseq.tasks +import torch +from tests.utils import dummy_dictionary + +VOCAB_SIZE = 100 + + +@fairseq.tasks.register_task("fake_task") +class FakeTask(fairseq.tasks.LegacyFairseqTask): + def __init__(self, args): + super().__init__(args) + self.dictionary = dummy_dictionary(VOCAB_SIZE - 4) + assert len(self.dictionary) == VOCAB_SIZE + + @property + def source_dictionary(self): + return self.dictionary + + @property + def target_dictionary(self): + return self.dictionary + + +@functools.lru_cache() +def get_toy_model( + device: str, + architecture: str = "roberta_enc_dec", + **extra_args: Any, +): + assert device in ("gpu", "cpu") + kwargs = { + "arch": architecture, + # Use characteristics dimensions + "encoder_layers": 3, + "encoder_embed_dim": 12, + "encoder_ffn_embed_dim": 14, + "encoder_attention_heads": 4, + "decoder_layers": 3, + "decoder_embed_dim": 12, + "decoder_ffn_embed_dim": 14, + "decoder_attention_heads": 4, + # Disable dropout so we have comparable tests. + "dropout": 0, + "attention_dropout": 0, + "activation_dropout": 0, + "encoder_layerdrop": 0, + # required args + "tokens_per_sample": 256, + "data": "/tmp/test_roberta", + } + kwargs.update(extra_args) + fake_task = FakeTask(kwargs) + args = fairseq.options.get_args( + task="online_backtranslation", + mono_langs="en,ro", + valid_lang_pairs="en-ro", + **kwargs, + ) + torch.manual_seed(0) + model = fake_task.build_model(args) + if device == "gpu": + model.cuda() + return fake_task, model + + +def mk_sample( + lang: str, device: str, tok: Sequence[int] = None, batch_size: int = 2 +) -> Dict[str, Any]: + assert device in ("gpu", "cpu") + if not tok: + if lang == "en": + tok = [10, 11, 12, 13, 14, 15, 2] + else: + tok = [20, 21, 22, 23, 24, 25, 26, 27, 2] + + batch = torch.stack([torch.tensor(tok, dtype=torch.long)] * batch_size) + if device == "gpu": + batch = batch.cuda() + sample = { + "net_input": { + "src_tokens": batch, + "prev_output_tokens": batch, + "src_lengths": torch.tensor( + [len(tok)] * batch_size, dtype=torch.long, device=batch.device + ), + }, + "target": batch[:, 1:], + } + return sample + + +def cpu_gpu(fn): + def helper(self): + fn(self, "cpu") + if torch.cuda.is_available(): + fn(self, "gpu") + + return helper + + +def architectures(fn): + def helper(self): + for arch in ["roberta_enc_dec", "transformer"]: + fn(self, arch) + + return helper + + +class RobertaTest(unittest.TestCase): + def assertTensorEqual(self, t1, t2, delta: float = 1e-6): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + if delta == 0.0: + self.assertEqual(t1.ne(t2).long().sum(), 0) + else: + self.assertEqual(((t2 - t1).abs() > delta).long().sum(), 0) + + def assertSharing(self, model, link_groups: Sequence[Sequence[str]]): + ids = {} + for group in link_groups: + group_ids = {name: id(params(model, name)) for name in group} + shared_id = group_ids[group[0]] + self.assertEqual(group_ids, {name: shared_id for name in group}) + self.assertNotIn(shared_id, ids) + ids[shared_id] = group + + def test_roberta_shared_params(self): + _, roberta = get_toy_model("cpu", architecture="roberta") + self.assertSharing( + roberta, + [ + [ + "encoder.sentence_encoder.embed_tokens.weight", + "encoder.lm_head.weight", + ] + ], + ) + + _, roberta = get_toy_model( + "cpu", architecture="roberta", untie_weights_roberta=True + ) + self.assertSharing( + roberta, + [ + ["encoder.sentence_encoder.embed_tokens.weight"], + ["encoder.lm_head.weight"], + ], + ) + + def test_roberta_enc_dec_shared_params(self): + # 3 distinct embeddings + _, enc_dec = get_toy_model("cpu", architecture="roberta_enc_dec") + self.assertSharing( + enc_dec, + [ + ["encoder.embed_tokens.weight"], + ["decoder.embed_tokens.weight"], + ["decoder.output_projection.weight"], + ], + ) + + # 2 distinct embeddings, one for encoder, one for decoder + _, enc_dec = get_toy_model( + "cpu", architecture="roberta_enc_dec", share_decoder_input_output_embed=True + ) + self.assertSharing( + enc_dec, + [ + ["encoder.embed_tokens.weight"], + [ + "decoder.embed_tokens.weight", + "decoder.output_projection.weight", + ], + ], + ) + + # shared embeddings + _, enc_dec = get_toy_model( + "cpu", architecture="roberta_enc_dec", share_all_embeddings=True + ) + self.assertSharing( + enc_dec, + [ + [ + "encoder.embed_tokens.weight", + "decoder.embed_tokens.weight", + "decoder.output_projection.weight", + ] + ], + ) + + def test_roberta_max_positions_is_correctly_set(self): + device = "cpu" + task, model = get_toy_model(device) + max_pos = model.max_decoder_positions() + self.assertEqual(max_pos, 256) + self.assertEqual(max_pos, model.decoder.max_positions()) + self.assertEqual(max_pos, model.encoder.max_positions()) + self.assertEqual(max_pos, model.encoder.embed_positions.max_positions) + + sentence = [31 for _ in range(max_pos)] + sample = mk_sample("en", device, sentence, batch_size=1) + self.assertEqual(list(sample["net_input"]["src_lengths"]), [max_pos]) + self.assertEqual(len(sample["net_input"]["src_tokens"][0]), max_pos) + x, _ = model.forward(**sample["net_input"]) + self.assertEqual(x.shape, (1, max_pos, VOCAB_SIZE)) + + @cpu_gpu + def test_roberta_forward_backward(self, device: str): + _, model = get_toy_model(device) + sample = mk_sample("en", device) + en_tokens = sample["net_input"]["src_tokens"] + (bs, l) = en_tokens.shape + # Forward + logits, _ = model(**sample["net_input"]) + self.assertEqual(logits.shape, (bs, l, VOCAB_SIZE)) + + # Backward + loss = logits.sum() + loss.backward() + + @cpu_gpu + def test_roberta_forward_backward_bs1(self, device: str): + _, model = get_toy_model(device) + sample = mk_sample("en", device, batch_size=1) + o, _ = model.forward(**sample["net_input"]) + loss = o.sum() + sample2 = mk_sample("ro", device, batch_size=1) + o, _ = model.forward(**sample2["net_input"]) + loss += o.sum() + loss.backward() + + @cpu_gpu + def test_roberta_batching(self, device: str): + """ + Checks that the batch of size 2 give twice the same results than the batch of size 1. + """ + _, model = get_toy_model(device) + sample = mk_sample("en", device, batch_size=1) + slen = sample["net_input"]["src_lengths"][0] + sample2 = mk_sample("en", device, batch_size=2) + with torch.no_grad(): + z = model.encoder.forward( + sample["net_input"]["src_tokens"], sample["net_input"]["src_lengths"] + ) + z = z["encoder_out"][-1] + logits, _ = model.forward(**sample["net_input"]) + + z2 = model.encoder.forward( + sample2["net_input"]["src_tokens"], sample["net_input"]["src_lengths"] + ) + z2 = z2["encoder_out"][-1] + logits2, _ = model.forward(**sample2["net_input"]) + + self.assertEqual(z.shape, (slen, 1, 12)) + self.assertEqual(z2.shape, (slen, 2, 12)) + self.assertTensorEqual(logits2[0], logits2[1]) + self.assertTensorEqual(logits[0], logits2[0]) + + @cpu_gpu + def test_roberta_incremental_decoder(self, device: str): + """ + Checks that incremental decoding yields the same result than non incremental one. + """ + task, model = get_toy_model(device) + + en_sample = mk_sample("en", device) + en_tokens = en_sample["net_input"]["src_tokens"] + ro_sample = mk_sample("ro", device) + ro_tokens = ro_sample["net_input"]["src_tokens"] + + en_enc = model.encoder.forward( + en_tokens, src_lengths=en_sample["net_input"]["src_lengths"] + ) + (bs, tgt_len) = ro_tokens.shape + + # Decode without incremental state + ro_dec, _ = model.decoder.forward(ro_tokens, encoder_out=en_enc) + self.assertEqual(ro_dec.shape, (bs, tgt_len, VOCAB_SIZE)) + self.assertTensorEqual(ro_dec[0], ro_dec[1]) + + # Decode with incremental state + inc_state = {} + ro_dec_inc = [] + for i in range(tgt_len): + ro, _ = model.decoder.forward( + ro_tokens[:, : i + 1], encoder_out=en_enc, incremental_state=inc_state + ) + self.assertEqual(ro.shape, (bs, 1, VOCAB_SIZE)) + ro_dec_inc.append(ro) + + for i in range(tgt_len): + # Intra-batch + self.assertTensorEqual(ro_dec_inc[i][0], ro_dec_inc[i][1]) + # Incremental vs non-incremental + self.assertTensorEqual(ro_dec_inc[i][:, 0], ro_dec[:, i]) + + @cpu_gpu + def test_regularize_for_adaprune_in_roberta(self, device: str): + _, model = get_toy_model( + device=device, + architecture="roberta_base", + mha_reg_scale_factor=0.000375, + ffn_reg_scale_factor=0.000375, + ) + sample = mk_sample("en", device, batch_size=1) + task_loss, _ = model.forward(**sample["net_input"]) + head_loss = model._get_adaptive_head_loss() + ffn_loss = model._get_adaptive_ffn_loss() + loss = task_loss.sum() + head_loss + ffn_loss + loss.backward() + + @cpu_gpu + def test_ffn_prune_for_adaprune_in_roberta(self, device: str): + _, model = get_toy_model( + device=device, + architecture="roberta_base", + ) + sample = mk_sample("en", device, batch_size=1) + for layer in model.encoder.sentence_encoder.layers: + fc1_original_size = layer.fc1.out_features + remove_index = layer._get_fc_rank(remove_num=2) + layer._prune_fc_layer(remove_index=remove_index) + self.assertEqual(layer.fc1.out_features, fc1_original_size - 2) + + task_loss, _ = model.forward(**sample["net_input"]) + + +def params(model, name): + if "." not in name: + return getattr(model, name) + + prefix, name = name.split(".", 1) + return params(getattr(model, prefix), name) diff --git a/fairseq/tests/test_rotary_positional_embedding.py b/fairseq/tests/test_rotary_positional_embedding.py new file mode 100644 index 0000000..7c44e86 --- /dev/null +++ b/fairseq/tests/test_rotary_positional_embedding.py @@ -0,0 +1,85 @@ +import torch +import numpy as np +import unittest +from fairseq.modules.rotary_positional_embedding import apply_rotary_pos_emb +from fairseq.modules import RotaryPositionalEmbedding + + +class TestRotaryPositionalEmbedding(unittest.TestCase): + def setUp(self) -> None: + self.T = 3 + self.B = 1 + self.C = 2 + torch.manual_seed(0) + self.sample = torch.randn(self.T, self.B, self.C) # TBC + self.rope_pos_emd = RotaryPositionalEmbedding(dim=self.C) + + def test_forward(self): + expected_cos = torch.tensor( + [[[[1.0000, 1.0000]]], [[[0.5403, 0.5403]]], [[[-0.4161, -0.4161]]]] + ) + expected_sin = torch.tensor( + [[[[0.0000, 0.0000]]], [[[0.8415, 0.8415]]], [[[0.9093, 0.9093]]]] + ) + cos, sin = self.rope_pos_emd(self.sample, self.T) + self.assertTrue( + np.allclose( + expected_cos.cpu().detach().numpy(), + cos.cpu().detach().numpy(), + atol=1e-4, + ) + ) + self.assertTrue( + np.allclose( + expected_sin.cpu().detach().numpy(), + sin.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_apply_rotary_pos_emb(self): + cos, sin = self.rope_pos_emd(self.sample, self.T) + query = self.sample.view(self.T, self.B, 1, self.C) + expected_query = torch.tensor( + [[[[1.5410, -0.2934]]], [[[-1.6555, -1.5263]]], [[[1.7231, -0.4041]]]] + ) + new_query, new_key = apply_rotary_pos_emb(query, query, cos, sin) + self.assertTrue( + np.allclose( + expected_query.cpu().detach().numpy(), + new_query.cpu().detach().numpy(), + atol=1e-4, + ) + ) + self.assertTrue( + np.allclose( + expected_query.cpu().detach().numpy(), + new_key.cpu().detach().numpy(), + atol=1e-4, + ) + ) + + def test_jit_compile_rope_module(self): + module_scripted = torch.jit.script(self.rope_pos_emd) + apply_rotary_scripted = torch.jit.script(apply_rotary_pos_emb) + # Test several different lengths + for T in [3, 5, 10]: + sample = torch.randn(T, self.B, self.C) + # Run forward pass with the original module + cos_original, sin_original = self.rope_pos_emd(sample, T) + query = sample.view(T, self.B, 1, self.C) + new_query, new_key = apply_rotary_pos_emb(query, query, cos_original, sin_original) + + # Run forward pass with the scripted module + cos_scripted, sin_scripted = module_scripted(sample, T) + new_query_scripted, new_key_scripted = apply_rotary_scripted(query, query, cos_scripted, sin_scripted) + + # Ensure the outputs are the same + self.assertTrue(torch.allclose(cos_original, cos_scripted)) + self.assertTrue(torch.allclose(sin_original, sin_scripted)) + self.assertTrue(torch.allclose(new_query, new_query_scripted)) + self.assertTrue(torch.allclose(new_key, new_key_scripted)) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_sequence_generator.py b/fairseq/tests/test_sequence_generator.py new file mode 100644 index 0000000..2e42df0 --- /dev/null +++ b/fairseq/tests/test_sequence_generator.py @@ -0,0 +1,744 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import math +import tempfile +import unittest + +import numpy as np +import torch + +import tests.utils as test_utils +from fairseq import search +from fairseq.data.dictionary import Dictionary +from fairseq.models.transformer import TransformerModel +from fairseq.ngram_repeat_block import NGramRepeatBlock +from fairseq.sequence_generator import EnsembleModel, SequenceGenerator +from fairseq.tasks.fairseq_task import LegacyFairseqTask + +DEFAULT_TEST_VOCAB_SIZE = 100 + + +class DummyTask(LegacyFairseqTask): + def __init__(self, args): + super().__init__(args) + self.dictionary = get_dummy_dictionary() + if getattr(self.args, "ctc", False): + self.dictionary.add_symbol("<ctc_blank>") + self.src_dict = self.dictionary + self.tgt_dict = self.dictionary + + @property + def source_dictionary(self): + return self.src_dict + + @property + def target_dictionary(self): + return self.dictionary + + +def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE): + dummy_dict = Dictionary() + # add dummy symbol to satisfy vocab size + for id, _ in enumerate(range(vocab_size)): + dummy_dict.add_symbol("{}".format(id), n=1000) + return dummy_dict + + +def get_dummy_task_and_parser(): + """ + to build a fariseq model, we need some dummy parse and task. This function + is used to create dummy task and parser to faciliate model/criterion test + + Note: we use FbSpeechRecognitionTask as the dummy task. You may want + to use other task by providing another function + """ + parser = argparse.ArgumentParser( + description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS + ) + DummyTask.add_args(parser) + args = parser.parse_args([]) + task = DummyTask.setup_task(args) + return task, parser + + +class TestJitSequenceGeneratorBase(unittest.TestCase): + def setUp(self): + self.task, self.parser = get_dummy_task_and_parser() + eos = self.task.tgt_dict.eos() + src_tokens = torch.randint(3, 50, (2, 10)).long() + src_tokens = torch.cat((src_tokens, torch.LongTensor([[eos], [eos]])), -1) + src_lengths = torch.LongTensor([2, 10]) + self.sample = { + "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths} + } + TransformerModel.add_args(self.parser) + args = self.parser.parse_args([]) + args.encoder_layers = 2 + args.decoder_layers = 1 + self.transformer_model = TransformerModel.build_model(args, self.task) + + def assertOutputEqual(self, hypo, pos_probs): + pos_scores = torch.FloatTensor(pos_probs).log() + self.assertTensorSizeEqual(hypo["positional_scores"], pos_scores) + self.assertTensorSizeEqual(pos_scores.numel(), hypo["tokens"].numel()) + + def assertTensorSizeEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-4) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + def assertHypoEqual(self, h1, h2): + "Check two hypos are equal" + self.assertTensorEqual(h1["tokens"], h2["tokens"]) + self.assertAlmostEqual(h1["positional_scores"], h2["positional_scores"]) + self.assertLess(abs(h1["score"] - h2["score"]), 1e-6) + self.assertAlmostEqual(h1["attention"], h2["attention"]) + + def _test_save_and_load(self, scripted_module): + with tempfile.NamedTemporaryFile() as f: + scripted_module.save(f.name) + torch.jit.load(f.name) + + +JIT_MSG = "Targeting OSS scriptability for the 1.6 release" + + +@unittest.skipIf(torch.__version__ < "1.6.0", JIT_MSG) +class TestJitSequenceGenerator(TestJitSequenceGeneratorBase): + def test_export_transformer(self): + model = self.transformer_model + torch.jit.script(model) + + def test_ensemble_sequence_generator(self): + model = self.transformer_model + generator = SequenceGenerator( + [model], + self.task.tgt_dict, + beam_size=2, + no_repeat_ngram_size=2, + max_len_b=10, + ) + scripted_model = torch.jit.script(generator) + self._test_save_and_load(scripted_model) + + def test_export_ensemble_model(self): + model = self.transformer_model + ensemble_models = EnsembleModel([model]) + torch.jit.script(ensemble_models) + + +class TestExportSearch(unittest.TestCase): + def setUp(self): + task, _ = get_dummy_task_and_parser() + self.tgt_dict = task.tgt_dict + self.min_top1_prob = 0.4 + + def test_export_diverse_bs(self): + search_strategy = search.DiverseBeamSearch( + self.tgt_dict, num_groups=2, diversity_strength=0.0 + ) + torch.jit.script(search_strategy) + + def test_export_sampling(self): + low_sampling_topp = self.min_top1_prob / 2.0 + search_strategy = search.Sampling( + self.tgt_dict, sampling_topp=low_sampling_topp + ) + torch.jit.script(search_strategy) + + def test_export_diverse_siblings_search(self): + search_strategy = search.DiverseSiblingsSearch( + self.tgt_dict, diversity_rate=0.5 + ) + torch.jit.script(search_strategy) + + +class TestSequenceGeneratorBase(unittest.TestCase): + def assertHypoTokens(self, hypo, tokens): + self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens)) + + def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0): + pos_scores = torch.FloatTensor(pos_probs).log() + self.assertAlmostEqual(hypo["positional_scores"], pos_scores) + self.assertEqual(pos_scores.numel(), hypo["tokens"].numel()) + score = pos_scores.sum() + if normalized: + score /= pos_scores.numel() ** lenpen + self.assertLess(abs(score - hypo["score"]), 1e-6) + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-4) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + +class TestSequenceGenerator(TestSequenceGeneratorBase): + def setUp(self): + ( + self.tgt_dict, + self.w1, + self.w2, + src_tokens, + src_lengths, + self.model, + ) = test_utils.sequence_generator_setup() + self.sample = { + "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths} + } + + def test_with_normalization(self): + generator = SequenceGenerator([self.model], self.tgt_dict, beam_size=2) + hypos = generator.forward(self.sample) + eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 1.0]) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos]) + self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0]) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0]) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w2, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6]) + + def test_without_normalization(self): + # Sentence 1: unchanged from the normalized case + # Sentence 2: beams swap order + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, normalize_scores=False + ) + hypos = generator.forward(self.sample) + eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 1.0], normalized=False) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos]) + self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0], normalized=False) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6], normalized=False) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0], normalized=False) + + def test_with_lenpen_favoring_short_hypos(self): + lenpen = 0.6 + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, len_penalty=lenpen + ) + hypos = generator.forward(self.sample) + eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 1.0], lenpen=lenpen) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos]) + self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0], lenpen=lenpen) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6], lenpen=lenpen) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0], lenpen=lenpen) + + def test_with_lenpen_favoring_long_hypos(self): + lenpen = 5.0 + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, len_penalty=lenpen + ) + hypos = generator.forward(self.sample) + eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w2, w1, w2, eos]) + self.assertHypoScore(hypos[0][0], [0.1, 0.9, 0.9, 1.0], lenpen=lenpen) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w1, eos]) + self.assertHypoScore(hypos[0][1], [0.9, 1.0], lenpen=lenpen) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0], lenpen=lenpen) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w2, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6], lenpen=lenpen) + + def test_maxlen(self): + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, max_len_b=2 + ) + hypos = generator.forward(self.sample) + eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 1.0]) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w2, w2, eos]) + self.assertHypoScore(hypos[0][1], [0.1, 0.1, 0.6]) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6]) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w2, w2, eos]) + self.assertHypoScore(hypos[1][1], [0.3, 0.9, 0.01]) + + def test_encoder_with_different_output_len(self): + args = self.model.encoder.args + task = test_utils.TestTranslationTask.setup_task( + args, self.tgt_dict, self.tgt_dict + ) + reshaping_model = test_utils.TestReshapingModel.build_model(args, task) + generator = SequenceGenerator( + [reshaping_model], self.tgt_dict, beam_size=2, max_len_b=2 + ) + hypos = generator.forward(self.sample) + for sent in [0, 1]: + for beam in [0, 1]: + assert hypos[sent][beam]["attention"] is not None + + def test_generation_with_additional_input(self): + args = self.model.encoder.args + task = test_utils.TestTranslationTask.setup_task( + args, self.tgt_dict, self.tgt_dict + ) + add_input_model = test_utils.TestAdditionalInputModel.build_model(args, task) + generator = SequenceGenerator([add_input_model], self.tgt_dict, beam_size=2) + sample = self.sample.copy() + sample["net_input"]["fancy_other_input"] = sample["net_input"]["src_tokens"] + hypos = generator.forward(self.sample) + eos, w1 = self.tgt_dict.eos(), self.w1 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 1.0]) + + +@unittest.skipUnless(torch.cuda.is_available(), "") +class TestRepeatNgramBlocking(TestSequenceGeneratorBase): + @classmethod + def setUpClass(cls): + ( + cls.tgt_dict, + cls.w1, + cls.w2, + src_tokens, + src_lengths, + cls.model, + ) = test_utils.sequence_generator_setup() + return cls + + def test_finds_repetitive_tokens(self): + bsz, vocab_size, beam_size, step = 2, 4, 1, 3 + generated_tok = torch.tensor( + [[2, 2, 2, 2], [3, 3, 3, 3]], dtype=torch.long, device="cuda" + ) + lprobs = torch.zeros((beam_size * bsz, vocab_size), device="cuda") + desired_result = lprobs.new_tensor( + [[0.0, 0.0, -math.inf, 0.0], [0.0, 0.0, 0.0, -math.inf]] + ) + + cuda_ext_result, baseline_result = self._compare_cuda_ext_to_default_implem( + bsz, beam_size, generated_tok, lprobs, step, 2 + ) + self.assertTensorEqual(cuda_ext_result, desired_result) + self.assertTensorEqual(baseline_result, desired_result) + + @unittest.skipIf(torch.__version__ < "1.6.0", JIT_MSG) + def test_jit_no_extension(self): + bsz, vocab_size, beam_size, step = 2, 4, 1, 3 + generated_tok = torch.tensor( + [[2, 2, 2, 2], [3, 3, 3, 3]], dtype=torch.long, device="cuda" + ) + lprobs = torch.zeros((beam_size * bsz, vocab_size), device="cuda") + blocker = NGramRepeatBlock(2, use_extension=False) + base_result = blocker(generated_tok, lprobs.clone(), bsz, beam_size, step) + scripted_blocker = torch.jit.script(blocker) + jit_result = scripted_blocker( + generated_tok, lprobs.clone(), bsz, beam_size, step + ) + self.assertTensorEqual(base_result, jit_result) + + def test_ngram_blocking_same_as_default_implem(self): + """Test that cuda extension returns same things as default impl in many settings.""" + vocab_size = 4 + step = 6 + for _ in range(2): + block_param = np.random.choice([1, 2, 3, 4]) + batch_size = np.random.randint(1, 8) + beam_size = np.random.choice([1, 2, 4, 8]) + lprobs = torch.zeros((beam_size * batch_size, vocab_size), device="cuda") + + generated_tok = torch.tensor( + np.random.randint( + 0, vocab_size, size=(batch_size * beam_size, step + 1) + ), + device="cuda", + dtype=torch.long, + ) + self._compare_cuda_ext_to_default_implem( + batch_size, + beam_size, + generated_tok, + lprobs, + step, + block_param, + ) + + def _compare_cuda_ext_to_default_implem( + self, bsz, beam_size, generated_tok, lprobs, step, block_param + ): + """Assert that cuda extension and default implem return the same thing.""" + blocker = NGramRepeatBlock(block_param) + assert blocker.use_extension, "Extension not compiled" + cuda_ext_result = blocker( + generated_tok, + lprobs.clone(), + bsz, + beam_size, + step, + ) + blocker.use_extension = False + baseline_result = blocker( + generated_tok, + lprobs.clone(), + bsz, + beam_size, + step, + ) + self.assertTensorEqual(cuda_ext_result, baseline_result) + blocker.use_extension = True + return cuda_ext_result, baseline_result + + +class TestDiverseBeamSearch(TestSequenceGeneratorBase): + def setUp(self): + # construct dummy dictionary + d = test_utils.dummy_dictionary(vocab_size=2) + self.assertEqual(d.pad(), 1) + self.assertEqual(d.eos(), 2) + self.assertEqual(d.unk(), 3) + self.eos = d.eos() + self.w1 = 4 + self.w2 = 5 + + # construct source data + self.src_tokens = torch.LongTensor( + [ + [self.w1, self.w2, self.eos], + [self.w1, self.w2, self.eos], + ] + ) + self.src_lengths = torch.LongTensor([2, 2]) + + args = argparse.Namespace() + unk = 0.0 + args.beam_probs = [ + # step 0: + torch.FloatTensor( + [ + # eos w1 w2 + # sentence 1: + [0.0, unk, 0.9, 0.1], # beam 1 + [0.0, unk, 0.9, 0.1], # beam 2 + # sentence 2: + [0.0, unk, 0.7, 0.3], + [0.0, unk, 0.7, 0.3], + ] + ), + # step 1: + torch.FloatTensor( + [ + # eos w1 w2 + # sentence 1: + [0.0, unk, 0.6, 0.4], + [0.0, unk, 0.6, 0.4], + # sentence 2: + [0.25, unk, 0.35, 0.4], + [0.25, unk, 0.35, 0.4], + ] + ), + # step 2: + torch.FloatTensor( + [ + # eos w1 w2 + # sentence 1: + [1.0, unk, 0.0, 0.0], + [1.0, unk, 0.0, 0.0], + # sentence 2: + [0.9, unk, 0.1, 0.0], + [0.9, unk, 0.1, 0.0], + ] + ), + ] + + task = test_utils.TestTranslationTask.setup_task(args, d, d) + self.model = task.build_model(args) + self.tgt_dict = task.target_dictionary + + def test_diverse_beam_search(self): + search_strategy = search.DiverseBeamSearch( + self.tgt_dict, num_groups=2, diversity_strength=0.0 + ) + generator = SequenceGenerator( + [self.model], + self.tgt_dict, + beam_size=2, + search_strategy=search_strategy, + ) + sample = { + "net_input": { + "src_tokens": self.src_tokens, + "src_lengths": self.src_lengths, + } + } + hypos = generator.forward(sample) + eos, w1, w2 = self.eos, self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0]) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w1, w1, eos]) + self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0]) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9]) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w2, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9]) + + +class TestDiverseSiblingsSearch(TestDiverseBeamSearch): + def assertHypoScore( + self, hypo, pos_probs, sibling_rank, diversity_rate, normalized=True, lenpen=1.0 + ): + pos_scores = torch.FloatTensor(pos_probs).log() + pos_scores.sub_(torch.Tensor(sibling_rank) * diversity_rate) + self.assertAlmostEqual(hypo["positional_scores"], pos_scores) + self.assertEqual(pos_scores.numel(), hypo["tokens"].numel()) + score = pos_scores.sum() + if normalized: + score /= pos_scores.numel() ** lenpen + self.assertLess(abs(score - hypo["score"]), 1e-6) + + def test_diverse_beam_search(self): + search_strategy = search.DiverseSiblingsSearch( + self.tgt_dict, diversity_rate=0.5 + ) + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy + ) + sample = { + "net_input": { + "src_tokens": self.src_tokens, + "src_lengths": self.src_lengths, + } + } + hypos = generator.forward(sample) + eos, w1, w2 = self.eos, self.w1, self.w2 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, w1, eos]) + self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0], [0, 1, 1], 0.5) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w1, w2, eos]) + self.assertHypoScore(hypos[0][1], [0.9, 0.4, 1.0], [0, 2, 1], 0.5) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w2, eos]) + self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9], [0, 1, 1], 0.5) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w1, eos]) + self.assertHypoScore(hypos[1][1], [0.7, 0.35, 0.9], [0, 2, 1], 0.5) + + +class TestTopPSamplingSearch(TestSequenceGeneratorBase): + def setUp(self): + # construct dummy dictionary + d = test_utils.dummy_dictionary(vocab_size=2) + self.assertEqual(d.pad(), 1) + self.assertEqual(d.eos(), 2) + self.assertEqual(d.unk(), 3) + self.eos = d.eos() + self.w1 = 4 + self.w2 = 5 + + # construct source data + self.src_tokens = torch.LongTensor( + [ + [self.w1, self.w2, self.eos], + [self.w1, self.w2, self.eos], + ] + ) + self.src_lengths = torch.LongTensor([2, 2]) + + args = argparse.Namespace() + unk = 0.0 + # The minimal probability of top 2 tokens. + self.min_top2_prob = 0.75 + # The minimal probability of the top 1 token. + self.min_top1_prob = 0.4 + + w1_prob = self.min_top1_prob + w2_prob = self.min_top2_prob - self.min_top1_prob + eos_prob = 1 - self.min_top2_prob + + args.beam_probs = [ + # step 0: + torch.FloatTensor( + [ + # eos w1 w2 + [0.0, unk, 1.0, 0.0], + [0.0, unk, 1.0, 0.0], + [0.0, unk, 1.0, 0.0], + [0.0, unk, 1.0, 0.0], + ] + ), + # step 1: + torch.FloatTensor( + [ + # eos w1 w2 + [eos_prob, unk, w1_prob, w2_prob], + [eos_prob, unk, w1_prob, w2_prob], + [eos_prob, unk, w1_prob, w2_prob], + [eos_prob, unk, w1_prob, w2_prob], + ] + ), + # step 2: + torch.FloatTensor( + [ + # eos w1 w2 + [1.0, unk, 0.0, 0.0], + [1.0, unk, 0.0, 0.0], + [1.0, unk, 0.0, 0.0], + [1.0, unk, 0.0, 0.0], + ] + ), + ] + + task = test_utils.TestTranslationTask.setup_task(args, d, d) + self.model = task.build_model(args) + self.tgt_dict = task.target_dictionary + + def test_topp_sampling_search_low_prob(self): + # Given a prob low enough to top-P sampling, we expect only the top + # 1 token to be sampled, which always results in the same output. + low_sampling_topp = self.min_top1_prob / 2.0 + search_strategy = search.Sampling( + self.tgt_dict, sampling_topp=low_sampling_topp + ) + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy + ) + sample = { + "net_input": { + "src_tokens": self.src_tokens, + "src_lengths": self.src_lengths, + } + } + hypos = generator.forward(sample) + eos, w1 = self.eos, self.w1 + # sentence 1, beam 1 + self.assertHypoTokens(hypos[0][0], [w1, w1, eos]) + self.assertHypoScore(hypos[0][0], [1.0, 0.4, 1.0]) + # sentence 1, beam 2 + self.assertHypoTokens(hypos[0][1], [w1, w1, eos]) + self.assertHypoScore(hypos[0][1], [1.0, 0.4, 1.0]) + # sentence 2, beam 1 + self.assertHypoTokens(hypos[1][0], [w1, w1, eos]) + self.assertHypoScore(hypos[1][0], [1.0, 0.4, 1.0]) + # sentence 2, beam 2 + self.assertHypoTokens(hypos[1][1], [w1, w1, eos]) + self.assertHypoScore(hypos[1][1], [1.0, 0.4, 1.0]) + + def test_topp_sampling_search_high_prob(self): + # Given a prob high enough to top-P sampling, any of the top 2 + # tokens could be sampled. This can cause different outputs. + high_sampling_topp = (self.min_top1_prob + self.min_top2_prob) / 2.0 + search_strategy = search.Sampling( + self.tgt_dict, sampling_topp=high_sampling_topp + ) + generator = SequenceGenerator( + [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy + ) + sample = { + "net_input": { + "src_tokens": self.src_tokens, + "src_lengths": self.src_lengths, + } + } + hypos = generator.forward(sample) + eos, w1, w2 = self.eos, self.w1, self.w2 + # sentence 1, beam 1 + self.assertTrue( + self.hypoTokens(hypos[0][0], [w1, w1, eos]) + or self.hypoTokens(hypos[0][0], [w1, w2, eos]) + ) + self.assertTrue( + self.hypoScore(hypos[0][0], [1.0, 0.4, 1.0]) + or self.hypoScore(hypos[0][0], [1.0, 0.35, 1.0]) + ) + + # sentence 1, beam 2 + self.assertTrue( + self.hypoTokens(hypos[0][1], [w1, w1, eos]) + or self.hypoTokens(hypos[0][1], [w1, w2, eos]) + ) + self.assertTrue( + self.hypoScore(hypos[0][1], [1.0, 0.4, 1.0]) + or self.hypoScore(hypos[0][1], [1.0, 0.35, 1.0]) + ) + + # sentence 2, beam 1 + self.assertTrue( + self.hypoTokens(hypos[1][0], [w1, w1, eos]) + or self.hypoTokens(hypos[1][0], [w1, w2, eos]) + ) + self.assertTrue( + self.hypoScore(hypos[1][0], [1.0, 0.4, 1.0]) + or self.hypoScore(hypos[1][0], [1.0, 0.35, 1.0]) + ) + + # sentence 2, beam 2 + self.assertTrue( + self.hypoTokens(hypos[1][1], [w1, w1, eos]) + or self.hypoTokens(hypos[1][1], [w1, w2, eos]) + ) + self.assertTrue( + self.hypoScore(hypos[1][1], [1.0, 0.4, 1.0]) + or self.hypoScore(hypos[1][1], [1.0, 0.35, 1.0]) + ) + + def hypoTokens(self, hypo, tokens): + return self.tensorEqual(hypo["tokens"], torch.LongTensor(tokens)) + + def hypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0): + pos_scores = torch.FloatTensor(pos_probs).log() + if not self.almostEqual(hypo["positional_scores"], pos_scores): + return False + if pos_scores.numel() != hypo["tokens"].numel(): + return False + score = pos_scores.sum() + if normalized: + score /= pos_scores.numel() ** lenpen + return abs(score - hypo["score"]) < 1e-6 + + def almostEqual(self, t1, t2): + return t1.size() == t2.size() and (t1 - t2).abs().max() < 1e-4 + + def tensorEqual(self, t1, t2): + return t1.size() == t2.size() and t1.ne(t2).long().sum() == 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_sequence_scorer.py b/fairseq/tests/test_sequence_scorer.py new file mode 100644 index 0000000..42f9447 --- /dev/null +++ b/fairseq/tests/test_sequence_scorer.py @@ -0,0 +1,120 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import unittest + +import tests.utils as test_utils +import torch +from fairseq.sequence_scorer import SequenceScorer + + +class TestSequenceScorer(unittest.TestCase): + def test_sequence_scorer(self): + # construct dummy dictionary + d = test_utils.dummy_dictionary(vocab_size=2) + self.assertEqual(d.pad(), 1) + self.assertEqual(d.eos(), 2) + self.assertEqual(d.unk(), 3) + eos = d.eos() + w1 = 4 + w2 = 5 + + # construct dataloader + data = [ + { + "source": torch.LongTensor([w1, w2, eos]), + "target": torch.LongTensor([w1, w2, w1, eos]), + }, + { + "source": torch.LongTensor([w2, eos]), + "target": torch.LongTensor([w2, w1, eos]), + }, + { + "source": torch.LongTensor([w2, eos]), + "target": torch.LongTensor([w2, eos]), + }, + ] + data_itr = test_utils.dummy_dataloader(data) + + # specify expected output probabilities + args = argparse.Namespace() + unk = 0.0 + args.beam_probs = [ + # step 0: + torch.FloatTensor( + [ + # eos w1 w2 + [0.0, unk, 0.6, 0.4], # sentence 1 + [0.0, unk, 0.4, 0.6], # sentence 2 + [0.0, unk, 0.7, 0.3], # sentence 3 + ] + ), + # step 1: + torch.FloatTensor( + [ + # eos w1 w2 + [0.0, unk, 0.2, 0.7], # sentence 1 + [0.0, unk, 0.8, 0.2], # sentence 2 + [0.7, unk, 0.1, 0.2], # sentence 3 + ] + ), + # step 2: + torch.FloatTensor( + [ + # eos w1 w2 + [0.10, unk, 0.50, 0.4], # sentence 1 + [0.15, unk, 0.15, 0.7], # sentence 2 + [0.00, unk, 0.00, 0.0], # sentence 3 + ] + ), + # step 3: + torch.FloatTensor( + [ + # eos w1 w2 + [0.9, unk, 0.05, 0.05], # sentence 1 + [0.0, unk, 0.00, 0.0], # sentence 2 + [0.0, unk, 0.00, 0.0], # sentence 3 + ] + ), + ] + expected_scores = [ + [0.6, 0.7, 0.5, 0.9], # sentence 1 + [0.6, 0.8, 0.15], # sentence 2 + [0.3, 0.7], # sentence 3 + ] + + task = test_utils.TestTranslationTask.setup_task(args, d, d) + model = task.build_model(args) + scorer = SequenceScorer(task.target_dictionary) + for sample in data_itr: + hypos = task.inference_step(scorer, [model], sample) + for id, hypos_id in zip(sample["id"].tolist(), hypos): + self.assertHypoTokens(hypos_id[0], data[id]["target"]) + self.assertHypoScore(hypos_id[0], expected_scores[id]) + + def assertHypoTokens(self, hypo, tokens): + self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens)) + + def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0): + pos_scores = torch.FloatTensor(pos_probs).log() + self.assertAlmostEqual(hypo["positional_scores"], pos_scores) + self.assertEqual(pos_scores.numel(), hypo["tokens"].numel()) + score = pos_scores.sum() + if normalized: + score /= pos_scores.numel() ** lenpen + self.assertLess(abs(score - hypo["score"]), 1e-6) + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess((t1 - t2).abs().max(), 1e-4) + + def assertTensorEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertEqual(t1.ne(t2).long().sum(), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_sparse_multihead_attention.py b/fairseq/tests/test_sparse_multihead_attention.py new file mode 100644 index 0000000..3e32b25 --- /dev/null +++ b/fairseq/tests/test_sparse_multihead_attention.py @@ -0,0 +1,114 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from fairseq.modules.sparse_multihead_attention import SparseMultiheadAttention + + +class TestSparseMultiheadAttention(unittest.TestCase): + def test_sparse_multihead_attention(self): + attn_weights = torch.randn(1, 8, 8) + bidirectional_sparse_mask = torch.tensor( + [ + [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0], + [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0], + [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0], + [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0], + [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0], + [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0], + [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0], + [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0], + ] + ) + + bidirectional_attention = SparseMultiheadAttention( + 16, 1, stride=4, expressivity=1, is_bidirectional=True + ) + bidirectional_attention_sparse_mask = ( + bidirectional_attention.buffered_sparse_mask(attn_weights, 8, 8) + ) + torch.all( + torch.eq(bidirectional_attention_sparse_mask, bidirectional_sparse_mask) + ) + + sparse_mask = torch.tensor( + [ + [ + 0, + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + ], + [ + 0, + 0, + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + ], + [ + 0, + 0, + 0, + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + ], + [ + 0, + 0, + 0, + 0, + float("-inf"), + float("-inf"), + float("-inf"), + float("-inf"), + ], + [0, 0, 0, 0, 0, float("-inf"), float("-inf"), float("-inf")], + [ + float("-inf"), + float("-inf"), + float("-inf"), + 0, + 0, + 0, + float("-inf"), + float("-inf"), + ], + [ + float("-inf"), + float("-inf"), + float("-inf"), + 0, + 0, + 0, + 0, + float("-inf"), + ], + [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0], + ] + ) + + attention = SparseMultiheadAttention( + 16, 1, stride=4, expressivity=1, is_bidirectional=False + ) + attention_sparse_mask = attention.buffered_sparse_mask(attn_weights, 8, 8) + + torch.all(torch.eq(attention_sparse_mask, sparse_mask)) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_token_block_dataset.py b/fairseq/tests/test_token_block_dataset.py new file mode 100644 index 0000000..c4d7b76 --- /dev/null +++ b/fairseq/tests/test_token_block_dataset.py @@ -0,0 +1,92 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import tests.utils as test_utils +import torch +from fairseq.data import TokenBlockDataset + + +class TestTokenBlockDataset(unittest.TestCase): + def _build_dataset(self, data, **kwargs): + sizes = [len(x) for x in data] + underlying_ds = test_utils.TestDataset(data) + return TokenBlockDataset(underlying_ds, sizes, **kwargs) + + def test_eos_break_mode(self): + data = [ + torch.tensor([5, 4, 3, 2, 1], dtype=torch.long), + torch.tensor([1], dtype=torch.long), + torch.tensor([8, 7, 6, 1], dtype=torch.long), + ] + ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos") + self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1]) + self.assertEqual(ds[1].tolist(), [1]) + self.assertEqual(ds[2].tolist(), [8, 7, 6, 1]) + + data = [ + torch.tensor([5, 4, 3, 2, 1], dtype=torch.long), + torch.tensor([8, 7, 6, 1], dtype=torch.long), + torch.tensor([1], dtype=torch.long), + ] + ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos") + self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1]) + self.assertEqual(ds[1].tolist(), [8, 7, 6, 1]) + self.assertEqual(ds[2].tolist(), [1]) + + def test_block_break_mode(self): + data = [ + torch.tensor([5, 4, 3, 2, 1], dtype=torch.long), + torch.tensor([8, 7, 6, 1], dtype=torch.long), + torch.tensor([9, 1], dtype=torch.long), + ] + ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none") + self.assertEqual(ds[0].tolist(), [5, 4, 3]) + self.assertEqual(ds[1].tolist(), [2, 1, 8]) + self.assertEqual(ds[2].tolist(), [7, 6, 1]) + self.assertEqual(ds[3].tolist(), [9, 1]) + + def test_complete_break_mode(self): + data = [ + torch.tensor([5, 4, 3, 2, 1], dtype=torch.long), + torch.tensor([8, 7, 6, 1], dtype=torch.long), + torch.tensor([9, 1], dtype=torch.long), + ] + ds = self._build_dataset( + data, block_size=6, pad=0, eos=1, break_mode="complete" + ) + self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1]) + self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1]) + + data = [ + torch.tensor([4, 3, 2, 1], dtype=torch.long), + torch.tensor([5, 1], dtype=torch.long), + torch.tensor([1], dtype=torch.long), + torch.tensor([6, 1], dtype=torch.long), + ] + ds = self._build_dataset( + data, block_size=3, pad=0, eos=1, break_mode="complete" + ) + self.assertEqual(ds[0].tolist(), [4, 3, 2, 1]) + self.assertEqual(ds[1].tolist(), [5, 1, 1]) + self.assertEqual(ds[2].tolist(), [6, 1]) + + def test_4billion_tokens(self): + """Regression test for numpy type promotion issue https://github.com/numpy/numpy/issues/5745""" + data = [torch.tensor(list(range(10000)), dtype=torch.long)] * 430000 + ds = self._build_dataset( + data, block_size=6, pad=0, eos=1, break_mode="complete" + ) + ds[-1] # __getitem__ works + start, end = ds.slice_indices[-1] + assert end > 4294967295 # data must be sufficiently large to overflow uint32 + assert not isinstance( + end + 1, float + ) # this would also raise, since np.uint64(1) + 1 => 2.0 + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_train.py b/fairseq/tests/test_train.py new file mode 100644 index 0000000..02ef94c --- /dev/null +++ b/fairseq/tests/test_train.py @@ -0,0 +1,247 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import contextlib +import logging +import unittest +from io import StringIO +from unittest.mock import MagicMock, patch + +import torch +from fairseq import checkpoint_utils, data +from omegaconf import OmegaConf + + +def mock_trainer(epoch, num_updates, iterations_in_epoch): + trainer = MagicMock() + trainer.load_checkpoint.return_value = { + "train_iterator": { + "epoch": epoch, + "iterations_in_epoch": iterations_in_epoch, + "shuffle": False, + }, + } + trainer.get_num_updates.return_value = num_updates + return trainer + + +def mock_dict(): + d = MagicMock() + d.pad.return_value = 1 + d.eos.return_value = 2 + d.unk.return_value = 3 + return d + + +def get_trainer_and_epoch_itr(epoch, epoch_size, num_updates, iterations_in_epoch): + tokens = torch.LongTensor(list(range(epoch_size))).view(1, -1) + tokens_ds = data.TokenBlockDataset( + tokens, + sizes=[tokens.size(-1)], + block_size=1, + pad=0, + eos=1, + include_targets=False, + ) + trainer = mock_trainer(epoch, num_updates, iterations_in_epoch) + dataset = data.LanguagePairDataset( + tokens_ds, tokens_ds.sizes, mock_dict(), shuffle=False + ) + epoch_itr = data.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=[[i] for i in range(epoch_size)], + ) + return trainer, epoch_itr + + +def get_mock_cfg(finetune_from_model): + cfg_mock = OmegaConf.create( + { + "checkpoint": { + "save_dir": None, + "optimizer_overrides": "{}", + "reset_dataloader": False, + "reset_meters": False, + "reset_optimizer": False, + "reset_lr_scheduler": False, + "finetune_from_model": finetune_from_model, + "model_parallel_size": 1, + "restore_file": "checkpoint_last.pt", + }, + "common": { + "model_parallel_size": 1, + }, + } + ) + return cfg_mock + + +class TestLoadCheckpoint(unittest.TestCase): + def setUp(self): + self.cfg_mock = get_mock_cfg(None) + self.patches = { + "os.makedirs": MagicMock(), + "os.path.join": MagicMock(), + "os.path.isfile": MagicMock(return_value=True), + "os.path.isabs": MagicMock(return_value=False), + "fairseq.file_io.PathManager.exists": MagicMock(return_value=False), + } + self.applied_patches = [patch(p, d) for p, d in self.patches.items()] + [p.start() for p in self.applied_patches] + logging.disable(logging.CRITICAL) + + def tearDown(self): + patch.stopall() + logging.disable(logging.NOTSET) + + def test_load_partial_checkpoint(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(2, 150, 200, 50) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + + _, epoch_itr = checkpoint_utils.load_checkpoint( + self.cfg_mock.checkpoint, trainer + ) + + self.assertEqual(epoch_itr.epoch, 2) + self.assertEqual(epoch_itr.iterations_in_epoch, 50) + + itr = epoch_itr.next_epoch_itr(shuffle=False) + self.assertEqual(epoch_itr.epoch, 2) + self.assertEqual(epoch_itr.iterations_in_epoch, 50) + + self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 50) + self.assertEqual(epoch_itr.iterations_in_epoch, 51) + + for _ in range(150 - 52): + next(itr) + self.assertEqual(epoch_itr.iterations_in_epoch, 149) + self.assertTrue(itr.has_next()) + next(itr) + self.assertFalse(itr.has_next()) + + itr = epoch_itr.next_epoch_itr(shuffle=False) + self.assertTrue(itr.has_next()) + self.assertEqual(epoch_itr.epoch, 3) + self.assertEqual(epoch_itr.iterations_in_epoch, 0) + + def test_load_full_checkpoint(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(2, 150, 300, 150) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + + _, epoch_itr = checkpoint_utils.load_checkpoint( + self.cfg_mock.checkpoint, trainer + ) + itr = epoch_itr.next_epoch_itr(shuffle=False) + + self.assertEqual(epoch_itr.epoch, 3) + self.assertEqual(epoch_itr.iterations_in_epoch, 0) + self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 0) + + def test_load_no_checkpoint(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + self.patches["os.path.isfile"].return_value = False + + _, epoch_itr = checkpoint_utils.load_checkpoint( + self.cfg_mock.checkpoint, trainer + ) + itr = epoch_itr.next_epoch_itr(shuffle=False) + + self.assertEqual(epoch_itr.epoch, 1) + self.assertEqual(epoch_itr.iterations_in_epoch, 0) + self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 0) + + def test_finetune_from_model_args_conflict(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + + for arg in [ + "reset_optimizer", + "reset_lr_scheduler", + "reset_meters", + "reset_dataloader", + ]: + with self.subTest(arg=arg): + cfg_mock = get_mock_cfg("/temp/checkpoint_pretrained.pt") + cfg_mock["checkpoint"][arg] = True + with self.assertRaises(Exception) as context: + _, _ = checkpoint_utils.load_checkpoint( + cfg_mock.checkpoint, trainer + ) + + self.assertTrue( + "--finetune-from-model can not be set together with either --reset-optimizer" + " or reset_lr_scheduler or reset_meters or reset_dataloader" + in str(context.exception) + ) + + def test_finetune_from_model(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + from_model_path = "/temp/checkpoint_pretrained.pt" + + def mock_finetune_exist(path): + if path == from_model_path: + return True + else: + return False + + self.patches[ + "fairseq.file_io.PathManager.exists" + ].side_effect = mock_finetune_exist + cfg_mock = get_mock_cfg(from_model_path) + cfg_mock.checkpoint.restore_file = "checkpoint_last.pt" + _, _ = checkpoint_utils.load_checkpoint(cfg_mock.checkpoint, trainer) + ( + checkpoint_path, + reset_optimizer, + reset_lr_scheduler, + optimizer_overrides, + ) = trainer.load_checkpoint.call_args[0] + reset_meters = trainer.load_checkpoint.call_args[1]["reset_meters"] + self.assertTrue(reset_optimizer) + self.assertTrue(reset_lr_scheduler) + self.assertTrue(reset_meters) + + def test_finetune_from_model_resume(self): + with contextlib.redirect_stdout(StringIO()): + trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0) + trainer.get_train_iterator = MagicMock(return_value=epoch_itr) + from_model_path = "/temp/checkpoint_pretrained.pt" + + # launch second time + # both restore_file=checkpoint_last.pt and finetune_from_model are set + def mock_finetune_exist(path): + if path == from_model_path or path.endsWith("checkpoint_last.pt"): + return True + else: + return False + + self.patches[ + "fairseq.file_io.PathManager.exists" + ].side_effect = mock_finetune_exist + cfg_mock = get_mock_cfg(from_model_path) + cfg_mock.checkpoint.restore_file = "checkpoint_last.pt" + _, _ = checkpoint_utils.load_checkpoint(cfg_mock.checkpoint, trainer) + ( + checkpoint_path, + reset_optimizer, + reset_lr_scheduler, + optimizer_overrides, + ) = trainer.load_checkpoint.call_args[0] + reset_meters = trainer.load_checkpoint.call_args[1]["reset_meters"] + self.assertFalse(reset_optimizer) + self.assertFalse(reset_lr_scheduler) + self.assertFalse(reset_meters) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_transformer.py b/fairseq/tests/test_transformer.py new file mode 100644 index 0000000..de5c5bd --- /dev/null +++ b/fairseq/tests/test_transformer.py @@ -0,0 +1,65 @@ +import argparse +import unittest +from typing import Any, Dict, Sequence + +import torch +from fairseq.models import transformer + +from tests.test_roberta import FakeTask + + +def mk_sample(tok: Sequence[int] = None, batch_size: int = 2) -> Dict[str, Any]: + if not tok: + tok = [10, 11, 12, 13, 14, 15, 2] + + batch = torch.stack([torch.tensor(tok, dtype=torch.long)] * batch_size) + sample = { + "net_input": { + "src_tokens": batch, + "prev_output_tokens": batch, + "src_lengths": torch.tensor( + [len(tok)] * batch_size, dtype=torch.long, device=batch.device + ), + }, + "target": batch[:, 1:], + } + return sample + + +def mk_transformer(**extra_args: Any): + overrides = { + # Use characteristics dimensions + "encoder_embed_dim": 12, + "encoder_ffn_embed_dim": 14, + "decoder_embed_dim": 12, + "decoder_ffn_embed_dim": 14, + # Disable dropout so we have comparable tests. + "dropout": 0, + "attention_dropout": 0, + "activation_dropout": 0, + "encoder_layerdrop": 0, + } + overrides.update(extra_args) + # Overrides the defaults from the parser + args = argparse.Namespace(**overrides) + transformer.tiny_architecture(args) + + torch.manual_seed(0) + task = FakeTask(args) + return transformer.TransformerModel.build_model(args, task) + + +class TransformerTestCase(unittest.TestCase): + def test_forward_backward(self): + model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=12) + sample = mk_sample() + o, _ = model.forward(**sample["net_input"]) + loss = o.sum() + loss.backward() + + def test_different_encoder_decoder_embed_dim(self): + model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=16) + sample = mk_sample() + o, _ = model.forward(**sample["net_input"]) + loss = o.sum() + loss.backward() diff --git a/fairseq/tests/test_utils.py b/fairseq/tests/test_utils.py new file mode 100644 index 0000000..7919590 --- /dev/null +++ b/fairseq/tests/test_utils.py @@ -0,0 +1,114 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from fairseq import utils + + +class TestUtils(unittest.TestCase): + def test_convert_padding_direction(self): + pad = 1 + left_pad = torch.LongTensor( + [ + [2, 3, 4, 5, 6], + [1, 7, 8, 9, 10], + [1, 1, 1, 11, 12], + ] + ) + right_pad = torch.LongTensor( + [ + [2, 3, 4, 5, 6], + [7, 8, 9, 10, 1], + [11, 12, 1, 1, 1], + ] + ) + + self.assertAlmostEqual( + right_pad, + utils.convert_padding_direction( + left_pad, + pad, + left_to_right=True, + ), + ) + self.assertAlmostEqual( + left_pad, + utils.convert_padding_direction( + right_pad, + pad, + right_to_left=True, + ), + ) + + def test_make_positions(self): + pad = 1 + left_pad_input = torch.LongTensor( + [ + [9, 9, 9, 9, 9], + [1, 9, 9, 9, 9], + [1, 1, 1, 9, 9], + ] + ) + left_pad_output = torch.LongTensor( + [ + [2, 3, 4, 5, 6], + [1, 2, 3, 4, 5], + [1, 1, 1, 2, 3], + ] + ) + right_pad_input = torch.LongTensor( + [ + [9, 9, 9, 9, 9], + [9, 9, 9, 9, 1], + [9, 9, 1, 1, 1], + ] + ) + right_pad_output = torch.LongTensor( + [ + [2, 3, 4, 5, 6], + [2, 3, 4, 5, 1], + [2, 3, 1, 1, 1], + ] + ) + + self.assertAlmostEqual( + left_pad_output, + utils.make_positions(left_pad_input, pad), + ) + self.assertAlmostEqual( + right_pad_output, + utils.make_positions(right_pad_input, pad), + ) + + def test_clip_grad_norm_(self): + params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False) + grad_norm = utils.clip_grad_norm_(params, 1.0) + self.assertTrue(torch.is_tensor(grad_norm)) + self.assertEqual(grad_norm, 0.0) + + params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)] + for p in params: + p.grad = torch.full((5,), fill_value=2.0) + grad_norm = utils.clip_grad_norm_(params, 1.0) + exp_grad_norm = torch.full((15,), fill_value=2.0).norm() + self.assertTrue(torch.is_tensor(grad_norm)) + self.assertEqual(grad_norm, exp_grad_norm) + + grad_norm = utils.clip_grad_norm_(params, 1.0) + self.assertAlmostEqual(grad_norm, torch.tensor(1.0)) + + def test_resolve_max_positions_with_tuple(self): + resolved = utils.resolve_max_positions(None, (2000, 100, 2000), 12000) + self.assertEqual(resolved, (2000, 100, 2000)) + + def assertAlmostEqual(self, t1, t2): + self.assertEqual(t1.size(), t2.size(), "size mismatch") + self.assertLess(utils.item((t1 - t2).abs().max()), 1e-4) + + +if __name__ == "__main__": + unittest.main() diff --git a/fairseq/tests/test_valid_subset_checks.py b/fairseq/tests/test_valid_subset_checks.py new file mode 100644 index 0000000..c39fb89 --- /dev/null +++ b/fairseq/tests/test_valid_subset_checks.py @@ -0,0 +1,143 @@ +import os +import shutil +import tempfile +import unittest + +from fairseq import options +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.data.data_utils import raise_if_valid_subsets_unintentionally_ignored +from .utils import create_dummy_data, preprocess_lm_data, train_language_model + + +def make_lm_config( + data_dir=None, + extra_flags=None, + task="language_modeling", + arch="transformer_lm_gpt2_tiny", +): + task_args = [task] + if data_dir is not None: + task_args += [data_dir] + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + *task_args, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--max-tokens", + "500", + "--tokens-per-sample", + "500", + "--save-dir", + data_dir, + "--max-epoch", + "1", + ] + + (extra_flags or []), + ) + cfg = convert_namespace_to_omegaconf(train_args) + return cfg + + +def write_empty_file(path): + with open(path, "w"): + pass + assert os.path.exists(path) + + +class TestValidSubsetsErrors(unittest.TestCase): + """Test various filesystem, clarg combinations and ensure that error raising happens as expected""" + + def _test_case(self, paths, extra_flags): + with tempfile.TemporaryDirectory() as data_dir: + [ + write_empty_file(os.path.join(data_dir, f"{p}.bin")) + for p in paths + ["train"] + ] + cfg = make_lm_config(data_dir, extra_flags=extra_flags) + raise_if_valid_subsets_unintentionally_ignored(cfg) + + def test_default_raises(self): + with self.assertRaises(ValueError): + self._test_case(["valid", "valid1"], []) + with self.assertRaises(ValueError): + self._test_case( + ["valid", "valid1", "valid2"], ["--valid-subset", "valid,valid1"] + ) + + def partially_specified_valid_subsets(self): + with self.assertRaises(ValueError): + self._test_case( + ["valid", "valid1", "valid2"], ["--valid-subset", "valid,valid1"] + ) + # Fix with ignore unused + self._test_case( + ["valid", "valid1", "valid2"], + ["--valid-subset", "valid,valid1", "--ignore-unused-valid-subsets"], + ) + + def test_legal_configs(self): + self._test_case(["valid"], []) + self._test_case(["valid", "valid1"], ["--ignore-unused-valid-subsets"]) + self._test_case(["valid", "valid1"], ["--combine-val"]) + self._test_case(["valid", "valid1"], ["--valid-subset", "valid,valid1"]) + self._test_case(["valid", "valid1"], ["--valid-subset", "valid1"]) + self._test_case( + ["valid", "valid1"], ["--combine-val", "--ignore-unused-valid-subsets"] + ) + self._test_case( + ["valid1"], ["--valid-subset", "valid1"] + ) # valid.bin doesn't need to be ignored. + + def test_disable_validation(self): + self._test_case([], ["--disable-validation"]) + self._test_case(["valid", "valid1"], ["--disable-validation"]) + + def test_dummy_task(self): + cfg = make_lm_config(task="dummy_lm") + raise_if_valid_subsets_unintentionally_ignored(cfg) + + def test_masked_dummy_task(self): + cfg = make_lm_config(task="dummy_masked_lm") + raise_if_valid_subsets_unintentionally_ignored(cfg) + + +class TestCombineValidSubsets(unittest.TestCase): + def _train(self, extra_flags): + with self.assertLogs() as logs: + with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: + create_dummy_data(data_dir, num_examples=20) + preprocess_lm_data(data_dir) + + shutil.copyfile(f"{data_dir}/valid.bin", f"{data_dir}/valid1.bin") + shutil.copyfile(f"{data_dir}/valid.idx", f"{data_dir}/valid1.idx") + train_language_model( + data_dir, + "transformer_lm", + ["--max-update", "0", "--log-format", "json"] + extra_flags, + run_validation=False, + ) + return [x.message for x in logs.records] + + def test_combined(self): + flags = ["--combine-valid-subsets", "--required-batch-size-multiple", "1"] + logs = self._train(flags) + assert any(["valid1" in x for x in logs]) # loaded 100 examples from valid1 + assert not any(["valid1_ppl" in x for x in logs]) # metrics are combined + + def test_subsets(self): + flags = [ + "--valid-subset", + "valid,valid1", + "--required-batch-size-multiple", + "1", + ] + logs = self._train(flags) + assert any(["valid_ppl" in x for x in logs]) # loaded 100 examples from valid1 + assert any(["valid1_ppl" in x for x in logs]) # metrics are combined diff --git a/fairseq/tests/utils.py b/fairseq/tests/utils.py new file mode 100644 index 0000000..af3f714 --- /dev/null +++ b/fairseq/tests/utils.py @@ -0,0 +1,797 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import json +import os +import random +import shutil +import string +import sys +import typing as tp +from io import StringIO + +import torch +import torch.nn.functional as F + +import fairseq.distributed.utils as distributed_utils +from fairseq import options, utils +from fairseq.data import Dictionary +from fairseq.data.language_pair_dataset import collate +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import ( + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, +) +from fairseq.models.fairseq_encoder import EncoderOut +from fairseq.tasks import LegacyFairseqTask +from fairseq_cli import generate, interactive, preprocess, train, validate + + +def dummy_dictionary(vocab_size, prefix="token_"): + d = Dictionary() + for i in range(vocab_size): + token = prefix + str(i) + d.add_symbol(token) + d.finalize(padding_factor=1) # don't add extra padding symbols + return d + + +def dummy_dataloader( + samples, + padding_idx=1, + eos_idx=2, + batch_size=None, +): + if batch_size is None: + batch_size = len(samples) + + # add any missing data to samples + for i, sample in enumerate(samples): + if "id" not in sample: + sample["id"] = i + + # create dataloader + dataset = TestDataset(samples) + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + collate_fn=(lambda samples: collate(samples, padding_idx, eos_idx)), + ) + return iter(dataloader) + + +def sequence_generator_setup(): + # construct dummy dictionary + d = dummy_dictionary(vocab_size=2) + + eos = d.eos() + w1 = 4 + w2 = 5 + + # construct source data + src_tokens = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]]) + src_lengths = torch.LongTensor([2, 2]) + + args = argparse.Namespace() + unk = 0.0 + args.beam_probs = [ + # step 0: + torch.FloatTensor( + [ + # eos w1 w2 + # sentence 1: + [0.0, unk, 0.9, 0.1], # beam 1 + [0.0, unk, 0.9, 0.1], # beam 2 + # sentence 2: + [0.0, unk, 0.7, 0.3], + [0.0, unk, 0.7, 0.3], + ] + ), + # step 1: + torch.FloatTensor( + [ + # eos w1 w2 prefix + # sentence 1: + [1.0, unk, 0.0, 0.0], # w1: 0.9 (emit: w1 <eos>: 0.9*1.0) + [0.0, unk, 0.9, 0.1], # w2: 0.1 + # sentence 2: + [0.25, unk, 0.35, 0.4], # w1: 0.7 (don't emit: w1 <eos>: 0.7*0.25) + [0.00, unk, 0.10, 0.9], # w2: 0.3 + ] + ), + # step 2: + torch.FloatTensor( + [ + # eos w1 w2 prefix + # sentence 1: + [0.0, unk, 0.1, 0.9], # w2 w1: 0.1*0.9 + [ + 0.6, + unk, + 0.2, + 0.2, + ], # w2 w2: 0.1*0.1 (emit: w2 w2 <eos>: 0.1*0.1*0.6) + # sentence 2: + [ + 0.60, + unk, + 0.4, + 0.00, + ], # w1 w2: 0.7*0.4 (emit: w1 w2 <eos>: 0.7*0.4*0.6) + [0.01, unk, 0.0, 0.99], # w2 w2: 0.3*0.9 + ] + ), + # step 3: + torch.FloatTensor( + [ + # eos w1 w2 prefix + # sentence 1: + [ + 1.0, + unk, + 0.0, + 0.0, + ], # w2 w1 w2: 0.1*0.9*0.9 (emit: w2 w1 w2 <eos>: 0.1*0.9*0.9*1.0) + [ + 1.0, + unk, + 0.0, + 0.0, + ], # w2 w1 w1: 0.1*0.9*0.1 (emit: w2 w1 w1 <eos>: 0.1*0.9*0.1*1.0) + # sentence 2: + [ + 0.1, + unk, + 0.5, + 0.4, + ], # w2 w2 w2: 0.3*0.9*0.99 (emit: w2 w2 w2 <eos>: 0.3*0.9*0.99*0.1) + [ + 1.0, + unk, + 0.0, + 0.0, + ], # w1 w2 w1: 0.7*0.4*0.4 (emit: w1 w2 w1 <eos>: 0.7*0.4*0.4*1.0) + ] + ), + ] + + task = TestTranslationTask.setup_task(args, d, d) + model = task.build_model(args) + tgt_dict = task.target_dictionary + + return tgt_dict, w1, w2, src_tokens, src_lengths, model + + +def create_dummy_data( + data_dir, num_examples=100, maxlen=20, alignment=False, languages=None +): + def _create_dummy_data(dir, filename): + data = torch.rand(num_examples * maxlen) + data = 97 + torch.floor(26 * data).int() + with open(os.path.join(dir, filename), "w") as h: + offset = 0 + for _ in range(num_examples): + ex_len = random.randint(1, maxlen) + ex_str = " ".join(map(chr, data[offset : offset + ex_len])) + print(ex_str, file=h) + offset += ex_len + + def _create_dummy_alignment_data(filename_src, filename_tgt, filename): + with open(os.path.join(data_dir, filename_src), "r") as src_f, open( + os.path.join(data_dir, filename_tgt), "r" + ) as tgt_f, open(os.path.join(data_dir, filename), "w") as h: + for src, tgt in zip(src_f, tgt_f): + src_len = len(src.split()) + tgt_len = len(tgt.split()) + avg_len = (src_len + tgt_len) // 2 + num_alignments = random.randint(avg_len // 2, 2 * avg_len) + src_indices = torch.floor(torch.rand(num_alignments) * src_len).int() + tgt_indices = torch.floor(torch.rand(num_alignments) * tgt_len).int() + ex_str = " ".join( + [ + "{}-{}".format(src, tgt) + for src, tgt in zip(src_indices, tgt_indices) + ] + ) + print(ex_str, file=h) + + files_to_write = [ + "train.in", + "train.out", + "valid.in", + "valid.out", + "test.in", + "test.out", + ] + if languages is None: # En only dummy dataset + for f in files_to_write: + _create_dummy_data(data_dir, f) + else: + for lang in languages: + lang_dir = os.path.join(data_dir, lang) + os.makedirs(lang_dir, exist_ok=True) + for f in files_to_write: + _create_dummy_data(lang_dir, f) + + if alignment: + _create_dummy_alignment_data("train.in", "train.out", "train.align") + _create_dummy_alignment_data("valid.in", "valid.out", "valid.align") + _create_dummy_alignment_data("test.in", "test.out", "test.align") + + +def preprocess_lm_data(data_dir, languages=None): + preprocess_parser = options.get_preprocessing_parser() + if languages is None: + preprocess_args = preprocess_parser.parse_args( + [ + "--only-source", + "--trainpref", + os.path.join(data_dir, "train.out"), + "--validpref", + os.path.join(data_dir, "valid.out"), + "--testpref", + os.path.join(data_dir, "test.out"), + "--destdir", + data_dir, + ] + ) + preprocess.main(preprocess_args) + else: + for lang in languages: + lang_dir = os.path.join(data_dir, lang) + assert os.path.exists(lang_dir) + preprocess_args = preprocess_parser.parse_args( + [ + "--only-source", + "--trainpref", + os.path.join(lang_dir, "train.out"), + "--validpref", + os.path.join(lang_dir, "valid.out"), + "--testpref", + os.path.join(lang_dir, "test.out"), + "--destdir", + lang_dir, + ] + ) + preprocess.main(preprocess_args) + shutil.copyfile( + os.path.join(data_dir, languages[0], "dict.txt"), + os.path.join(data_dir, "dict.txt"), + ) + + +def preprocess_translation_data(data_dir, extra_flags=None): + preprocess_parser = options.get_preprocessing_parser() + preprocess_args = preprocess_parser.parse_args( + [ + "--source-lang", + "in", + "--target-lang", + "out", + "--trainpref", + os.path.join(data_dir, "train"), + "--validpref", + os.path.join(data_dir, "valid"), + "--testpref", + os.path.join(data_dir, "test"), + "--thresholdtgt", + "0", + "--thresholdsrc", + "0", + "--destdir", + data_dir, + ] + + (extra_flags or []), + ) + preprocess.main(preprocess_args) + + +def preprocess_summarization_data(data_dir, extra_flags=None): + preprocess_parser = options.get_preprocessing_parser() + preprocess_args = preprocess_parser.parse_args( + [ + "--source-lang", + "in", + "--target-lang", + "out", + "--trainpref", + os.path.join(data_dir, "train"), + "--validpref", + os.path.join(data_dir, "valid"), + "--testpref", + os.path.join(data_dir, "test"), + "--thresholdtgt", + "0", + "--thresholdsrc", + "0", + "--joined-dictionary", + "--destdir", + data_dir, + ] + + (extra_flags or []), + ) + preprocess.main(preprocess_args) + + +def create_laser_data_and_config_json(data_dir): + src_langs = ["de", "fr", "ru", "tr", "zh"] + tgt_langs = ["en", "es"] + config_json = {} + config_train_json = [] + src_vocab = None + tgt_vocab = None + + for src_lang in src_langs: + for tgt_lang in tgt_langs: + langpair_folder = f"{src_lang}-{tgt_lang}" + + langpair_path = os.path.join(data_dir, langpair_folder) + os.mkdir(langpair_path) + create_dummy_data(langpair_path) + preprocess_translation_data(langpair_path, ["--dataset-impl", "cached"]) + + src_vocab = os.path.join(langpair_path, "dict.in.txt") + tgt_vocab = os.path.join(langpair_path, "dict.out.txt") + config_train_json.append( + { + "id": 0 if tgt_lang == "en" else 1, + "src": os.path.join(langpair_path, "train.in-out.in"), + "tgt": os.path.join(langpair_path, "train.in-out.out"), + } + ) + + config_json["src_vocab"] = src_vocab + config_json["tgt_vocab"] = tgt_vocab + config_json["train"] = config_train_json + + with open(os.path.join(data_dir, "laserconfig.json"), "w") as config_file: + json.dump(config_json, config_file) + + return config_file + + +def train_translation_model( + data_dir, + arch, + extra_flags=None, + task="translation", + run_validation=False, + lang_flags=None, + extra_valid_flags=None, + world_size=1, +): + if lang_flags is None: + lang_flags = [ + "--source-lang", + "in", + "--target-lang", + "out", + ] + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + task, + data_dir, + "--save-dir", + data_dir, + "--arch", + arch, + "--optimizer", + "nag", + "--lr", + "0.05", + "--max-tokens", + "500", + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + str(world_size), + "--num-workers", + "0", + ] + + lang_flags + + (extra_flags or []), + ) + + cfg = convert_namespace_to_omegaconf(train_args) + distributed_utils.call_main(cfg, train.main) + + if run_validation: + # test validation + validate_parser = options.get_validation_parser() + validate_args = options.parse_args_and_arch( + validate_parser, + [ + "--task", + task, + data_dir, + "--path", + os.path.join(data_dir, "checkpoint_last.pt"), + "--valid-subset", + "valid", + "--max-tokens", + "500", + "--no-progress-bar", + "--num-workers", + "0", + ] + + lang_flags + + (extra_valid_flags or []), + ) + validate.main(validate_args) + + +def generate_main(data_dir, extra_flags=None, path=None): + if extra_flags is None: + extra_flags = [ + "--print-alignment", + ] + if path is None: + path = os.path.join(data_dir, "checkpoint_last.pt") + generate_parser = options.get_generation_parser() + generate_args = options.parse_args_and_arch( + generate_parser, + [ + data_dir, + "--path", + path, + "--beam", + "3", + "--batch-size", + "64", + "--max-len-b", + "5", + "--gen-subset", + "valid", + "--no-progress-bar", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + + # evaluate model in batch mode + generate.main(generate_args) + + # evaluate model interactively + generate_args.buffer_size = 0 + generate_args.input = "-" + generate_args.batch_size = None + orig_stdin = sys.stdin + sys.stdin = StringIO("h e l l o\n") + interactive.main(generate_args) + sys.stdin = orig_stdin + + +class TestDataset(torch.utils.data.Dataset): + def __init__(self, data): + super().__init__() + self.data = data + self.sizes = None + + def __getitem__(self, index): + return self.data[index] + + def __len__(self): + return len(self.data) + + +class TestTranslationTask(LegacyFairseqTask): + def __init__(self, args, src_dict, tgt_dict, model): + super().__init__(args) + self.src_dict = src_dict + self.tgt_dict = tgt_dict + self.model = model + + @classmethod + def setup_task(cls, args, src_dict=None, tgt_dict=None, model=None): + return cls(args, src_dict, tgt_dict, model) + + def build_model(self, args, from_checkpoint=False): + return TestModel.build_model(args, self) + + @property + def source_dictionary(self): + return self.src_dict + + @property + def target_dictionary(self): + return self.tgt_dict + + +class TestModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def build_model(cls, args, task): + encoder = TestEncoder(args, task.source_dictionary) + decoder = TestIncrementalDecoder(args, task.target_dictionary) + return cls(encoder, decoder) + + +class TestEncoder(FairseqEncoder): + def __init__(self, args, dictionary): + super().__init__(dictionary) + self.args = args + + def forward(self, src_tokens, src_lengths=None, **kwargs): + return EncoderOut( + encoder_out=src_tokens, + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + def reorder_encoder_out(self, encoder_out, new_order): + return EncoderOut( + encoder_out=encoder_out.encoder_out.index_select(0, new_order), + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + +class TestIncrementalDecoder(FairseqIncrementalDecoder): + def __init__(self, args, dictionary): + super().__init__(dictionary) + assert hasattr(args, "beam_probs") or hasattr(args, "probs") + args.max_decoder_positions = getattr(args, "max_decoder_positions", 100) + self.args = args + + def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None): + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + bbsz = prev_output_tokens.size(0) + vocab = len(self.dictionary) + src_len = encoder_out.encoder_out.size(1) + tgt_len = prev_output_tokens.size(1) + + # determine number of steps + if incremental_state is not None: + # cache step number + step = utils.get_incremental_state(self, incremental_state, "step") + if step is None: + step = 0 + utils.set_incremental_state(self, incremental_state, "step", step + 1) + steps = [step] + else: + steps = list(range(tgt_len)) + + # define output in terms of raw probs + if hasattr(self.args, "probs"): + assert ( + self.args.probs.dim() == 3 + ), "expected probs to have size bsz*steps*vocab" + probs = self.args.probs.index_select(1, torch.LongTensor(steps)) + else: + probs = torch.FloatTensor(bbsz, len(steps), vocab).zero_() + for i, step in enumerate(steps): + # args.beam_probs gives the probability for every vocab element, + # starting with eos, then unknown, and then the rest of the vocab + if step < len(self.args.beam_probs): + probs[:, i, self.dictionary.eos() :] = self.args.beam_probs[step] + else: + probs[:, i, self.dictionary.eos()] = 1.0 + + # random attention + attn = torch.rand(bbsz, tgt_len, src_len) + + dev = prev_output_tokens.device + return probs.to(dev), {"attn": [attn.to(dev)]} + + def get_normalized_probs(self, net_output, log_probs, _): + # the decoder returns probabilities directly + probs = net_output[0] + if log_probs: + return probs.log() + else: + return probs + + def max_positions(self): + return self.args.max_decoder_positions + + +class TestReshapingEncoder(FairseqEncoder): + def __init__(self, args, dictionary): + super().__init__(dictionary) + self.args = args + + def forward(self, src_tokens, src_lengths=None, **kwargs): + b_sz, t_sz = src_tokens.shape + padding_needed = t_sz % 2 + x = src_tokens + if padding_needed > 0: + padding_needed = 2 - padding_needed + x = F.pad(x, (0, padding_needed)) + + return EncoderOut( + encoder_out=x.view(b_sz, -1, 2), + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + def reorder_encoder_out(self, encoder_out, new_order): + return EncoderOut( + encoder_out=encoder_out.encoder_out.index_select(0, new_order), + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + +class TestReshapingModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def build_model(cls, args, task): + encoder = TestReshapingEncoder(args, task.source_dictionary) + decoder = TestIncrementalDecoder(args, task.target_dictionary) + return cls(encoder, decoder) + + +class TestAdditionalInputEncoder(FairseqEncoder): + def __init__(self, args, dictionary): + super().__init__(dictionary) + self.args = args + + def forward(self, src_tokens, src_lengths=None, **kwargs): + assert "fancy_other_input" in kwargs + assert kwargs["fancy_other_input"] is not None + return EncoderOut( + encoder_out=src_tokens, + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + def reorder_encoder_out(self, encoder_out, new_order): + return EncoderOut( + encoder_out=encoder_out.encoder_out.index_select(0, new_order), + encoder_padding_mask=None, + encoder_embedding=None, + encoder_states=None, + src_tokens=None, + src_lengths=None, + ) + + +class TestAdditionalInputModel(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder): + super().__init__(encoder, decoder) + + @classmethod + def build_model(cls, args, task): + encoder = TestAdditionalInputEncoder(args, task.source_dictionary) + decoder = TestIncrementalDecoder(args, task.target_dictionary) + return cls(encoder, decoder) + + def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs) + decoder_out = self.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return decoder_out + + +def train_language_model( + data_dir, + arch, + extra_flags=None, + run_validation=False, + extra_valid_flags=None, + task="language_modeling", + world_size=1, +): + train_parser = options.get_training_parser() + train_args = options.parse_args_and_arch( + train_parser, + [ + "--task", + task, + data_dir, + "--arch", + arch, + "--optimizer", + "adam", + "--lr", + "0.0001", + "--max-tokens", + "500", + "--tokens-per-sample", + "500", + "--save-dir", + data_dir, + "--max-epoch", + "1", + "--no-progress-bar", + "--distributed-world-size", + str(world_size), + "--ddp-backend", + "no_c10d", + "--num-workers", + "0", + ] + + (extra_flags or []), + ) + cfg = convert_namespace_to_omegaconf(train_args) + distributed_utils.call_main(cfg, train.main) + + if run_validation: + # test validation + validate_parser = options.get_validation_parser() + validate_args = options.parse_args_and_arch( + validate_parser, + [ + "--task", + task, + data_dir, + "--path", + os.path.join(data_dir, "checkpoint_last.pt"), + "--valid-subset", + "valid", + "--max-tokens", + "500", + "--no-progress-bar", + "--num-workers", + "0", + ] + + (extra_valid_flags or []), + ) + validate.main(validate_args) + + +def sizes(data): + return [len(sentence) for sentence in data] + + +POPULATION = string.ascii_letters + string.digits + + +def make_sentence() -> tp.List[str]: + length = random.randint(10, 50) + return random.choices( + population=POPULATION, k=length, weights=range(1, len(POPULATION) + 1) + ) + + +def make_data(length=1000, out_file=None) -> tp.List[tp.List[str]]: + data = ( + [make_sentence() for _ in range(0, length)] + # add all the symbols at least once + + [list(string.ascii_letters), list(string.digits)] + ) + if out_file is not None: + with open(out_file, "w", encoding="utf-8") as out: + for s in data: + print(" ".join(s), file=out) + + return data + + +def build_vocab(data: tp.List[tp.List[str]]) -> Dictionary: + d = Dictionary() + for s in data: + for token in s: + d.add_symbol(token) + d.finalize() + return d diff --git a/fairseq/train.py b/fairseq/train.py new file mode 100644 index 0000000..321de3d --- /dev/null +++ b/fairseq/train.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Legacy entry point. Use fairseq_cli/train.py or fairseq-train instead. +""" + +from fairseq_cli.train import cli_main + + +if __name__ == "__main__": + cli_main() diff --git a/inference_av2av.py b/inference_av2av.py new file mode 100644 index 0000000..6f6ddae --- /dev/null +++ b/inference_av2av.py @@ -0,0 +1,165 @@ +import os +import argparse +import numpy as np +import torch +import torch.nn.functional as F + +from fairseq import utils +from fairseq_cli.generate import get_symbols_to_strip_from_output + +from av2unit.inference import load_model as load_av2unit_model +from unit2unit.inference import load_model as load_unit2unit_model +from unit2av.inference import load_model as load_unit2av_model, load_speaker_encoder_model + +from util import process_units, extract_audio_from_video, save_video + +class AVSpeechToAVSpeechPipeline: + def __init__(self, + av2unit_model, av2unit_task, + unit2unit_task, unit2unit_generator, + unit2av_model, speaker_encoder, + use_cuda=False + ): + self.av2unit_model = av2unit_model + self.av2unit_task = av2unit_task + self.unit2unit_task = unit2unit_task + self.unit2unit_generator = unit2unit_generator + self.unit2av_model = unit2av_model + self.speaker_encoder = speaker_encoder + self.use_cuda = use_cuda + + def process_av2unit(self, lip_video_path, audio_path): + task = self.av2unit_task + video_feats, audio_feats = task.dataset.load_feature((lip_video_path, audio_path)) + audio_feats, video_feats = torch.from_numpy(audio_feats.astype(np.float32)) if audio_feats is not None else None, torch.from_numpy(video_feats.astype(np.float32)) if video_feats is not None else None + if task.dataset.normalize and 'audio' in task.dataset.modalities: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + + collated_audios, _, _ = task.dataset.collater_audio([audio_feats], len(audio_feats)) + collated_videos, _, _ = task.dataset.collater_audio([video_feats], len(video_feats)) + + sample = {"source": { + "audio": collated_audios, "video": collated_videos, + }} + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + + pred = task.inference( + self.av2unit_model, + sample, + ) + pred_str = task.dictionaries[0].string(pred.int().cpu()) + + return pred_str + + def process_unit2unit(self, unit): + task = self.unit2unit_task + unit = list(map(int, unit.strip().split())) + unit = task.source_dictionary.encode_line( + " ".join(map(lambda x: str(x), process_units(unit, reduce=True))), + add_if_not_exist=False, + append_eos=True, + ).long() + unit = torch.cat([ + unit.new([task.source_dictionary.bos()]), + unit, + unit.new([task.source_dictionary.index("[{}]".format(task.source_language))]) + ]) + + sample = {"net_input": { + "src_tokens": torch.LongTensor(unit).view(1,-1), + }} + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + + pred = task.inference_step( + self.unit2unit_generator, + None, + sample, + )[0][0] + + pred_str = task.target_dictionary.string( + pred["tokens"].int().cpu(), + extra_symbols_to_ignore=get_symbols_to_strip_from_output(self.unit2unit_generator) + ) + + return pred_str + + def process_unit2av(self, unit, audio_path, video_path, bbox_path): + unit = list(map(int, unit.strip().split())) + + sample = { + "code": torch.LongTensor(unit).view(1,-1), + "spkr": torch.from_numpy(self.speaker_encoder.get_embed(audio_path)).view(1,1,-1), + } + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + + wav, video, full_video, bbox = self.unit2av_model(sample, video_path, bbox_path, dur_prediction=True) + + return wav, video, full_video, bbox + +def main(args): + use_cuda = torch.cuda.is_available() and not args.cpu + + av2unit_model, av2unit_task = load_av2unit_model(args.av2unit_path, args.modalities, use_cuda=use_cuda) + unit2unit_task, unit2unit_generator = load_unit2unit_model(args.utut_path, args.src_lang, args.tgt_lang, use_cuda=use_cuda) + cfg_path = os.path.join("unit2av", "config.json") + unit2av_model = load_unit2av_model(args.unit2av_path, cfg_path, args.tgt_lang, use_cuda=use_cuda) + speaker_encoder_model = load_speaker_encoder_model(os.path.join("unit2av", "encoder.pt"), use_cuda=use_cuda) + + pipeline = AVSpeechToAVSpeechPipeline( + av2unit_model, av2unit_task, + unit2unit_task, unit2unit_generator, + unit2av_model, speaker_encoder_model, + use_cuda=use_cuda + ) + + temp_audio_path = os.path.splitext(args.in_vid_path)[0]+".temp.wav" + lip_video_path = os.path.splitext(args.in_vid_path)[0]+".lip.mp4" + bbox_path = os.path.splitext(args.in_vid_path)[0]+".bbox.pkl" + extract_audio_from_video(args.in_vid_path, temp_audio_path) + + src_unit = pipeline.process_av2unit(lip_video_path, temp_audio_path) + tgt_unit = pipeline.process_unit2unit(src_unit) + tgt_audio, tgt_video, full_video, bbox = pipeline.process_unit2av(tgt_unit, temp_audio_path, args.in_vid_path, bbox_path) + + save_video(tgt_audio, tgt_video, full_video, bbox, args.out_vid_path) + + os.remove(temp_audio_path) + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-vid-path", type=str, required=True, help="File path of source video input" + ) + parser.add_argument( + "--out-vid-path", type=str, required=True, help="File path of translated video output" + ) + parser.add_argument( + "--src-lang", type=str, required=True, + choices=["en","es","fr","it","pt"], + help="source language" + ) + parser.add_argument( + "--tgt-lang", type=str, required=True, + choices=["en","es","fr","it","pt"], + help="target language" + ) + parser.add_argument( + "--modalities", type=str, default="audio,video", help="input modalities", + choices=["audio,video","audio","video"], + ) + parser.add_argument( + "--av2unit-path", type=str, required=True, help="path to the mAV-HuBERT pre-trained model" + ) + parser.add_argument( + "--utut-path", type=str, required=True, help="path to the UTUT pre-trained model" + ) + parser.add_argument( + "--unit2av-path", type=str, required=True, help="path to the Unit AV Renderer" + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + args = parser.parse_args() + main(args) + +if __name__ == "__main__": + cli_main() diff --git a/notebooks/check_checkpoint.ipynb b/notebooks/check_checkpoint.ipynb new file mode 100644 index 0000000..b599267 --- /dev/null +++ b/notebooks/check_checkpoint.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2e8cb32a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Top Level Keys ===\n", + "dict_keys(['audio', 'video'])\n", + "\n", + "\n", + "=== Audio model Keys ===\n", + "dict_keys(['en', 'es', 'fr', 'it', 'pt'])\n", + "\n", + "=== Keys (Layer Names) - First 10 ===\n", + "conv_pre.bias\n", + "conv_pre.weight_g\n", + "conv_pre.weight_v\n", + "ups.0.bias\n", + "ups.0.weight_g\n", + "ups.0.weight_v\n", + "ups.1.bias\n", + "ups.1.weight_g\n", + "ups.1.weight_v\n", + "ups.2.bias\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# 체크포인트 파일 경로 (확인하고 싶은 파일 경로로 변경하세요)\n", + "checkpoint_path = \"/home/2022113135/av2av/checkpoints/unit_av_renderer.pt\" \n", + "\n", + "try:\n", + " # CPU로 로드 (GPU가 없어도 확인 가능하게)\n", + " checkpoint = torch.load(checkpoint_path, map_location='cpu')\n", + " \n", + " # 1. 최상위 키(Key) 확인\n", + " print(\"=== Top Level Keys ===\")\n", + " print(checkpoint.keys())\n", + " print(\"\\n\")\n", + "\n", + " checkpoint = checkpoint['audio']\n", + " print(\"=== Audio model Keys ===\")\n", + " print(checkpoint.keys())\n", + " print()\n", + " checkpoint = checkpoint['en']\n", + "\n", + " # 2. 'generator' 혹은 모델 State Dict 키 내부 확인\n", + " # (보통 'generator', 'model', 'state_dict' 등의 키를 사용함)\n", + " if 'generator' in checkpoint:\n", + " print(\"=== Generator State Dict Keys (Layer Names) - First 10 ===\")\n", + " # 너무 많으니까 처음 10개만 출력\n", + " for key in list(checkpoint['generator'].keys())[:10]:\n", + " print(key)\n", + " \n", + " # 3. 모델의 Weight Shape 확인 (하나만 예시로)\n", + " first_key = list(checkpoint['generator'].keys())[0]\n", + " print(f\"\\nShape of '{first_key}': {checkpoint['generator'][first_key].shape}\")\n", + " \n", + " elif 'state_dict' in checkpoint:\n", + " print(\"=== State Dict Keys (Layer Names) - First 10 ===\")\n", + " for key in list(checkpoint['state_dict'].keys())[:10]:\n", + " print(key)\n", + "\n", + " else:\n", + " # 키가 없고 바로 State Dict인 경우\n", + " print(\"=== Keys (Layer Names) - First 10 ===\")\n", + " for key in list(checkpoint.keys())[:10]:\n", + " print(key)\n", + "\n", + "except FileNotFoundError:\n", + " print(f\"Error: 파일을 찾을 수 없습니다. 경로를 확인해주세요: {checkpoint_path}\")\n", + "except Exception as e:\n", + " print(f\"Error 발생: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "de3a590a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Top Level Keys ===\n", + "dict_keys(['generator'])\n", + "\n", + "\n", + "=== Generator Keys ===\n", + "conv_pre.bias\n", + "conv_pre.weight_g\n", + "conv_pre.weight_v\n", + "ups.0.bias\n", + "ups.0.weight_g\n", + "ups.0.weight_v\n", + "ups.1.bias\n", + "ups.1.weight_g\n", + "ups.1.weight_v\n", + "ups.2.bias\n", + "ups.2.weight_g\n", + "ups.2.weight_v\n", + "ups.3.bias\n", + "ups.3.weight_g\n", + "ups.3.weight_v\n", + "ups.4.bias\n", + "ups.4.weight_g\n", + "ups.4.weight_v\n", + "resblocks.0.convs1.0.bias\n", + "resblocks.0.convs1.0.weight_g\n", + "resblocks.0.convs1.0.weight_v\n", + "resblocks.0.convs1.1.bias\n", + "resblocks.0.convs1.1.weight_g\n", + "resblocks.0.convs1.1.weight_v\n", + "resblocks.0.convs1.2.bias\n", + "resblocks.0.convs1.2.weight_g\n", + "resblocks.0.convs1.2.weight_v\n", + "resblocks.0.convs2.0.bias\n", + "resblocks.0.convs2.0.weight_g\n", + "resblocks.0.convs2.0.weight_v\n", + "resblocks.0.convs2.1.bias\n", + "resblocks.0.convs2.1.weight_g\n", + "resblocks.0.convs2.1.weight_v\n", + "resblocks.0.convs2.2.bias\n", + "resblocks.0.convs2.2.weight_g\n", + "resblocks.0.convs2.2.weight_v\n", + "resblocks.1.convs1.0.bias\n", + "resblocks.1.convs1.0.weight_g\n", + "resblocks.1.convs1.0.weight_v\n", + "resblocks.1.convs1.1.bias\n", + "resblocks.1.convs1.1.weight_g\n", + "resblocks.1.convs1.1.weight_v\n", + "resblocks.1.convs1.2.bias\n", + "resblocks.1.convs1.2.weight_g\n", + "resblocks.1.convs1.2.weight_v\n", + "resblocks.1.convs2.0.bias\n", + "resblocks.1.convs2.0.weight_g\n", + "resblocks.1.convs2.0.weight_v\n", + "resblocks.1.convs2.1.bias\n", + "resblocks.1.convs2.1.weight_g\n", + "resblocks.1.convs2.1.weight_v\n", + "resblocks.1.convs2.2.bias\n", + "resblocks.1.convs2.2.weight_g\n", + "resblocks.1.convs2.2.weight_v\n", + "resblocks.2.convs1.0.bias\n", + "resblocks.2.convs1.0.weight_g\n", + "resblocks.2.convs1.0.weight_v\n", + "resblocks.2.convs1.1.bias\n", + "resblocks.2.convs1.1.weight_g\n", + "resblocks.2.convs1.1.weight_v\n", + "resblocks.2.convs1.2.bias\n", + "resblocks.2.convs1.2.weight_g\n", + "resblocks.2.convs1.2.weight_v\n", + "resblocks.2.convs2.0.bias\n", + "resblocks.2.convs2.0.weight_g\n", + "resblocks.2.convs2.0.weight_v\n", + "resblocks.2.convs2.1.bias\n", + "resblocks.2.convs2.1.weight_g\n", + "resblocks.2.convs2.1.weight_v\n", + "resblocks.2.convs2.2.bias\n", + "resblocks.2.convs2.2.weight_g\n", + "resblocks.2.convs2.2.weight_v\n", + "resblocks.3.convs1.0.bias\n", + "resblocks.3.convs1.0.weight_g\n", + "resblocks.3.convs1.0.weight_v\n", + "resblocks.3.convs1.1.bias\n", + "resblocks.3.convs1.1.weight_g\n", + "resblocks.3.convs1.1.weight_v\n", + "resblocks.3.convs1.2.bias\n", + "resblocks.3.convs1.2.weight_g\n", + "resblocks.3.convs1.2.weight_v\n", + "resblocks.3.convs2.0.bias\n", + "resblocks.3.convs2.0.weight_g\n", + "resblocks.3.convs2.0.weight_v\n", + "resblocks.3.convs2.1.bias\n", + "resblocks.3.convs2.1.weight_g\n", + "resblocks.3.convs2.1.weight_v\n", + "resblocks.3.convs2.2.bias\n", + "resblocks.3.convs2.2.weight_g\n", + "resblocks.3.convs2.2.weight_v\n", + "resblocks.4.convs1.0.bias\n", + "resblocks.4.convs1.0.weight_g\n", + "resblocks.4.convs1.0.weight_v\n", + "resblocks.4.convs1.1.bias\n", + "resblocks.4.convs1.1.weight_g\n", + "resblocks.4.convs1.1.weight_v\n", + "resblocks.4.convs1.2.bias\n", + "resblocks.4.convs1.2.weight_g\n", + "resblocks.4.convs1.2.weight_v\n", + "resblocks.4.convs2.0.bias\n", + "resblocks.4.convs2.0.weight_g\n", + "resblocks.4.convs2.0.weight_v\n", + "resblocks.4.convs2.1.bias\n", + "resblocks.4.convs2.1.weight_g\n", + "resblocks.4.convs2.1.weight_v\n", + "resblocks.4.convs2.2.bias\n", + "resblocks.4.convs2.2.weight_g\n", + "resblocks.4.convs2.2.weight_v\n", + "resblocks.5.convs1.0.bias\n", + "resblocks.5.convs1.0.weight_g\n", + "resblocks.5.convs1.0.weight_v\n", + "resblocks.5.convs1.1.bias\n", + "resblocks.5.convs1.1.weight_g\n", + "resblocks.5.convs1.1.weight_v\n", + "resblocks.5.convs1.2.bias\n", + "resblocks.5.convs1.2.weight_g\n", + "resblocks.5.convs1.2.weight_v\n", + "resblocks.5.convs2.0.bias\n", + "resblocks.5.convs2.0.weight_g\n", + "resblocks.5.convs2.0.weight_v\n", + "resblocks.5.convs2.1.bias\n", + "resblocks.5.convs2.1.weight_g\n", + "resblocks.5.convs2.1.weight_v\n", + "resblocks.5.convs2.2.bias\n", + "resblocks.5.convs2.2.weight_g\n", + "resblocks.5.convs2.2.weight_v\n", + "resblocks.6.convs1.0.bias\n", + "resblocks.6.convs1.0.weight_g\n", + "resblocks.6.convs1.0.weight_v\n", + "resblocks.6.convs1.1.bias\n", + "resblocks.6.convs1.1.weight_g\n", + "resblocks.6.convs1.1.weight_v\n", + "resblocks.6.convs1.2.bias\n", + "resblocks.6.convs1.2.weight_g\n", + "resblocks.6.convs1.2.weight_v\n", + "resblocks.6.convs2.0.bias\n", + "resblocks.6.convs2.0.weight_g\n", + "resblocks.6.convs2.0.weight_v\n", + "resblocks.6.convs2.1.bias\n", + "resblocks.6.convs2.1.weight_g\n", + "resblocks.6.convs2.1.weight_v\n", + "resblocks.6.convs2.2.bias\n", + "resblocks.6.convs2.2.weight_g\n", + "resblocks.6.convs2.2.weight_v\n", + "resblocks.7.convs1.0.bias\n", + "resblocks.7.convs1.0.weight_g\n", + "resblocks.7.convs1.0.weight_v\n", + "resblocks.7.convs1.1.bias\n", + "resblocks.7.convs1.1.weight_g\n", + "resblocks.7.convs1.1.weight_v\n", + "resblocks.7.convs1.2.bias\n", + "resblocks.7.convs1.2.weight_g\n", + "resblocks.7.convs1.2.weight_v\n", + "resblocks.7.convs2.0.bias\n", + "resblocks.7.convs2.0.weight_g\n", + "resblocks.7.convs2.0.weight_v\n", + "resblocks.7.convs2.1.bias\n", + "resblocks.7.convs2.1.weight_g\n", + "resblocks.7.convs2.1.weight_v\n", + "resblocks.7.convs2.2.bias\n", + "resblocks.7.convs2.2.weight_g\n", + "resblocks.7.convs2.2.weight_v\n", + "resblocks.8.convs1.0.bias\n", + "resblocks.8.convs1.0.weight_g\n", + "resblocks.8.convs1.0.weight_v\n", + "resblocks.8.convs1.1.bias\n", + "resblocks.8.convs1.1.weight_g\n", + "resblocks.8.convs1.1.weight_v\n", + "resblocks.8.convs1.2.bias\n", + "resblocks.8.convs1.2.weight_g\n", + "resblocks.8.convs1.2.weight_v\n", + "resblocks.8.convs2.0.bias\n", + "resblocks.8.convs2.0.weight_g\n", + "resblocks.8.convs2.0.weight_v\n", + "resblocks.8.convs2.1.bias\n", + "resblocks.8.convs2.1.weight_g\n", + "resblocks.8.convs2.1.weight_v\n", + "resblocks.8.convs2.2.bias\n", + "resblocks.8.convs2.2.weight_g\n", + "resblocks.8.convs2.2.weight_v\n", + "resblocks.9.convs1.0.bias\n", + "resblocks.9.convs1.0.weight_g\n", + "resblocks.9.convs1.0.weight_v\n", + "resblocks.9.convs1.1.bias\n", + "resblocks.9.convs1.1.weight_g\n", + "resblocks.9.convs1.1.weight_v\n", + "resblocks.9.convs1.2.bias\n", + "resblocks.9.convs1.2.weight_g\n", + "resblocks.9.convs1.2.weight_v\n", + "resblocks.9.convs2.0.bias\n", + "resblocks.9.convs2.0.weight_g\n", + "resblocks.9.convs2.0.weight_v\n", + "resblocks.9.convs2.1.bias\n", + "resblocks.9.convs2.1.weight_g\n", + "resblocks.9.convs2.1.weight_v\n", + "resblocks.9.convs2.2.bias\n", + "resblocks.9.convs2.2.weight_g\n", + "resblocks.9.convs2.2.weight_v\n", + "resblocks.10.convs1.0.bias\n", + "resblocks.10.convs1.0.weight_g\n", + "resblocks.10.convs1.0.weight_v\n", + "resblocks.10.convs1.1.bias\n", + "resblocks.10.convs1.1.weight_g\n", + "resblocks.10.convs1.1.weight_v\n", + "resblocks.10.convs1.2.bias\n", + "resblocks.10.convs1.2.weight_g\n", + "resblocks.10.convs1.2.weight_v\n", + "resblocks.10.convs2.0.bias\n", + "resblocks.10.convs2.0.weight_g\n", + "resblocks.10.convs2.0.weight_v\n", + "resblocks.10.convs2.1.bias\n", + "resblocks.10.convs2.1.weight_g\n", + "resblocks.10.convs2.1.weight_v\n", + "resblocks.10.convs2.2.bias\n", + "resblocks.10.convs2.2.weight_g\n", + "resblocks.10.convs2.2.weight_v\n", + "resblocks.11.convs1.0.bias\n", + "resblocks.11.convs1.0.weight_g\n", + "resblocks.11.convs1.0.weight_v\n", + "resblocks.11.convs1.1.bias\n", + "resblocks.11.convs1.1.weight_g\n", + "resblocks.11.convs1.1.weight_v\n", + "resblocks.11.convs1.2.bias\n", + "resblocks.11.convs1.2.weight_g\n", + "resblocks.11.convs1.2.weight_v\n", + "resblocks.11.convs2.0.bias\n", + "resblocks.11.convs2.0.weight_g\n", + "resblocks.11.convs2.0.weight_v\n", + "resblocks.11.convs2.1.bias\n", + "resblocks.11.convs2.1.weight_g\n", + "resblocks.11.convs2.1.weight_v\n", + "resblocks.11.convs2.2.bias\n", + "resblocks.11.convs2.2.weight_g\n", + "resblocks.11.convs2.2.weight_v\n", + "resblocks.12.convs1.0.bias\n", + "resblocks.12.convs1.0.weight_g\n", + "resblocks.12.convs1.0.weight_v\n", + "resblocks.12.convs1.1.bias\n", + "resblocks.12.convs1.1.weight_g\n", + "resblocks.12.convs1.1.weight_v\n", + "resblocks.12.convs1.2.bias\n", + "resblocks.12.convs1.2.weight_g\n", + "resblocks.12.convs1.2.weight_v\n", + "resblocks.12.convs2.0.bias\n", + "resblocks.12.convs2.0.weight_g\n", + "resblocks.12.convs2.0.weight_v\n", + "resblocks.12.convs2.1.bias\n", + "resblocks.12.convs2.1.weight_g\n", + "resblocks.12.convs2.1.weight_v\n", + "resblocks.12.convs2.2.bias\n", + "resblocks.12.convs2.2.weight_g\n", + "resblocks.12.convs2.2.weight_v\n", + "resblocks.13.convs1.0.bias\n", + "resblocks.13.convs1.0.weight_g\n", + "resblocks.13.convs1.0.weight_v\n", + "resblocks.13.convs1.1.bias\n", + "resblocks.13.convs1.1.weight_g\n", + "resblocks.13.convs1.1.weight_v\n", + "resblocks.13.convs1.2.bias\n", + "resblocks.13.convs1.2.weight_g\n", + "resblocks.13.convs1.2.weight_v\n", + "resblocks.13.convs2.0.bias\n", + "resblocks.13.convs2.0.weight_g\n", + "resblocks.13.convs2.0.weight_v\n", + "resblocks.13.convs2.1.bias\n", + "resblocks.13.convs2.1.weight_g\n", + "resblocks.13.convs2.1.weight_v\n", + "resblocks.13.convs2.2.bias\n", + "resblocks.13.convs2.2.weight_g\n", + "resblocks.13.convs2.2.weight_v\n", + "resblocks.14.convs1.0.bias\n", + "resblocks.14.convs1.0.weight_g\n", + "resblocks.14.convs1.0.weight_v\n", + "resblocks.14.convs1.1.bias\n", + "resblocks.14.convs1.1.weight_g\n", + "resblocks.14.convs1.1.weight_v\n", + "resblocks.14.convs1.2.bias\n", + "resblocks.14.convs1.2.weight_g\n", + "resblocks.14.convs1.2.weight_v\n", + "resblocks.14.convs2.0.bias\n", + "resblocks.14.convs2.0.weight_g\n", + "resblocks.14.convs2.0.weight_v\n", + "resblocks.14.convs2.1.bias\n", + "resblocks.14.convs2.1.weight_g\n", + "resblocks.14.convs2.1.weight_v\n", + "resblocks.14.convs2.2.bias\n", + "resblocks.14.convs2.2.weight_g\n", + "resblocks.14.convs2.2.weight_v\n", + "conv_post.bias\n", + "conv_post.weight_g\n", + "conv_post.weight_v\n", + "dict.weight\n", + "spkr.weight\n", + "spkr.bias\n", + "dur_predictor.conv1.0.weight\n", + "dur_predictor.conv1.0.bias\n", + "dur_predictor.ln1.weight\n", + "dur_predictor.ln1.bias\n", + "dur_predictor.conv2.0.weight\n", + "dur_predictor.conv2.0.bias\n", + "dur_predictor.ln2.weight\n", + "dur_predictor.ln2.bias\n", + "dur_predictor.proj.weight\n", + "dur_predictor.proj.bias\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# 체크포인트 파일 경로 (확인하고 싶은 파일 경로로 변경하세요)\n", + "checkpoint_path = \"/home/2022113135/gyucheol/NetfLips/av2av-main/unit2av/checkpoint/zeroth-hubert/g_00500000\"\n", + "# CPU로 로드 (GPU가 없어도 확인 가능하게)\n", + "checkpoint_ko = torch.load(checkpoint_path, map_location='cpu')\n", + "\n", + "# 1. 최상위 키(Key) 확인\n", + "print(\"=== Top Level Keys ===\")\n", + "print(checkpoint_ko.keys())\n", + "print(\"\\n\")\n", + "\n", + "checkpoint_ko = checkpoint_ko['generator']\n", + "print(\"=== Generator Keys ===\")\n", + "for key in checkpoint_ko.keys():\n", + " print(key)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a7ff3197", + "metadata": {}, + "outputs": [], + "source": [ + "checkpoint_path = \"/home/2022113135/av2av/checkpoints/unit_av_renderer.pt\" \n", + "checkpoint = torch.load(checkpoint_path)\n", + "\n", + "checkpoint['audio']['ko'] = checkpoint_ko" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e521077", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "odict_keys(['conv_pre.bias', 'conv_pre.weight_g', 'conv_pre.weight_v', 'ups.0.bias', 'ups.0.weight_g', 'ups.0.weight_v', 'ups.1.bias', 'ups.1.weight_g', 'ups.1.weight_v', 'ups.2.bias', 'ups.2.weight_g', 'ups.2.weight_v', 'ups.3.bias', 'ups.3.weight_g', 'ups.3.weight_v', 'ups.4.bias', 'ups.4.weight_g', 'ups.4.weight_v', 'resblocks.0.convs1.0.bias', 'resblocks.0.convs1.0.weight_g', 'resblocks.0.convs1.0.weight_v', 'resblocks.0.convs1.1.bias', 'resblocks.0.convs1.1.weight_g', 'resblocks.0.convs1.1.weight_v', 'resblocks.0.convs1.2.bias', 'resblocks.0.convs1.2.weight_g', 'resblocks.0.convs1.2.weight_v', 'resblocks.0.convs2.0.bias', 'resblocks.0.convs2.0.weight_g', 'resblocks.0.convs2.0.weight_v', 'resblocks.0.convs2.1.bias', 'resblocks.0.convs2.1.weight_g', 'resblocks.0.convs2.1.weight_v', 'resblocks.0.convs2.2.bias', 'resblocks.0.convs2.2.weight_g', 'resblocks.0.convs2.2.weight_v', 'resblocks.1.convs1.0.bias', 'resblocks.1.convs1.0.weight_g', 'resblocks.1.convs1.0.weight_v', 'resblocks.1.convs1.1.bias', 'resblocks.1.convs1.1.weight_g', 'resblocks.1.convs1.1.weight_v', 'resblocks.1.convs1.2.bias', 'resblocks.1.convs1.2.weight_g', 'resblocks.1.convs1.2.weight_v', 'resblocks.1.convs2.0.bias', 'resblocks.1.convs2.0.weight_g', 'resblocks.1.convs2.0.weight_v', 'resblocks.1.convs2.1.bias', 'resblocks.1.convs2.1.weight_g', 'resblocks.1.convs2.1.weight_v', 'resblocks.1.convs2.2.bias', 'resblocks.1.convs2.2.weight_g', 'resblocks.1.convs2.2.weight_v', 'resblocks.2.convs1.0.bias', 'resblocks.2.convs1.0.weight_g', 'resblocks.2.convs1.0.weight_v', 'resblocks.2.convs1.1.bias', 'resblocks.2.convs1.1.weight_g', 'resblocks.2.convs1.1.weight_v', 'resblocks.2.convs1.2.bias', 'resblocks.2.convs1.2.weight_g', 'resblocks.2.convs1.2.weight_v', 'resblocks.2.convs2.0.bias', 'resblocks.2.convs2.0.weight_g', 'resblocks.2.convs2.0.weight_v', 'resblocks.2.convs2.1.bias', 'resblocks.2.convs2.1.weight_g', 'resblocks.2.convs2.1.weight_v', 'resblocks.2.convs2.2.bias', 'resblocks.2.convs2.2.weight_g', 'resblocks.2.convs2.2.weight_v', 'resblocks.3.convs1.0.bias', 'resblocks.3.convs1.0.weight_g', 'resblocks.3.convs1.0.weight_v', 'resblocks.3.convs1.1.bias', 'resblocks.3.convs1.1.weight_g', 'resblocks.3.convs1.1.weight_v', 'resblocks.3.convs1.2.bias', 'resblocks.3.convs1.2.weight_g', 'resblocks.3.convs1.2.weight_v', 'resblocks.3.convs2.0.bias', 'resblocks.3.convs2.0.weight_g', 'resblocks.3.convs2.0.weight_v', 'resblocks.3.convs2.1.bias', 'resblocks.3.convs2.1.weight_g', 'resblocks.3.convs2.1.weight_v', 'resblocks.3.convs2.2.bias', 'resblocks.3.convs2.2.weight_g', 'resblocks.3.convs2.2.weight_v', 'resblocks.4.convs1.0.bias', 'resblocks.4.convs1.0.weight_g', 'resblocks.4.convs1.0.weight_v', 'resblocks.4.convs1.1.bias', 'resblocks.4.convs1.1.weight_g', 'resblocks.4.convs1.1.weight_v', 'resblocks.4.convs1.2.bias', 'resblocks.4.convs1.2.weight_g', 'resblocks.4.convs1.2.weight_v', 'resblocks.4.convs2.0.bias', 'resblocks.4.convs2.0.weight_g', 'resblocks.4.convs2.0.weight_v', 'resblocks.4.convs2.1.bias', 'resblocks.4.convs2.1.weight_g', 'resblocks.4.convs2.1.weight_v', 'resblocks.4.convs2.2.bias', 'resblocks.4.convs2.2.weight_g', 'resblocks.4.convs2.2.weight_v', 'resblocks.5.convs1.0.bias', 'resblocks.5.convs1.0.weight_g', 'resblocks.5.convs1.0.weight_v', 'resblocks.5.convs1.1.bias', 'resblocks.5.convs1.1.weight_g', 'resblocks.5.convs1.1.weight_v', 'resblocks.5.convs1.2.bias', 'resblocks.5.convs1.2.weight_g', 'resblocks.5.convs1.2.weight_v', 'resblocks.5.convs2.0.bias', 'resblocks.5.convs2.0.weight_g', 'resblocks.5.convs2.0.weight_v', 'resblocks.5.convs2.1.bias', 'resblocks.5.convs2.1.weight_g', 'resblocks.5.convs2.1.weight_v', 'resblocks.5.convs2.2.bias', 'resblocks.5.convs2.2.weight_g', 'resblocks.5.convs2.2.weight_v', 'resblocks.6.convs1.0.bias', 'resblocks.6.convs1.0.weight_g', 'resblocks.6.convs1.0.weight_v', 'resblocks.6.convs1.1.bias', 'resblocks.6.convs1.1.weight_g', 'resblocks.6.convs1.1.weight_v', 'resblocks.6.convs1.2.bias', 'resblocks.6.convs1.2.weight_g', 'resblocks.6.convs1.2.weight_v', 'resblocks.6.convs2.0.bias', 'resblocks.6.convs2.0.weight_g', 'resblocks.6.convs2.0.weight_v', 'resblocks.6.convs2.1.bias', 'resblocks.6.convs2.1.weight_g', 'resblocks.6.convs2.1.weight_v', 'resblocks.6.convs2.2.bias', 'resblocks.6.convs2.2.weight_g', 'resblocks.6.convs2.2.weight_v', 'resblocks.7.convs1.0.bias', 'resblocks.7.convs1.0.weight_g', 'resblocks.7.convs1.0.weight_v', 'resblocks.7.convs1.1.bias', 'resblocks.7.convs1.1.weight_g', 'resblocks.7.convs1.1.weight_v', 'resblocks.7.convs1.2.bias', 'resblocks.7.convs1.2.weight_g', 'resblocks.7.convs1.2.weight_v', 'resblocks.7.convs2.0.bias', 'resblocks.7.convs2.0.weight_g', 'resblocks.7.convs2.0.weight_v', 'resblocks.7.convs2.1.bias', 'resblocks.7.convs2.1.weight_g', 'resblocks.7.convs2.1.weight_v', 'resblocks.7.convs2.2.bias', 'resblocks.7.convs2.2.weight_g', 'resblocks.7.convs2.2.weight_v', 'resblocks.8.convs1.0.bias', 'resblocks.8.convs1.0.weight_g', 'resblocks.8.convs1.0.weight_v', 'resblocks.8.convs1.1.bias', 'resblocks.8.convs1.1.weight_g', 'resblocks.8.convs1.1.weight_v', 'resblocks.8.convs1.2.bias', 'resblocks.8.convs1.2.weight_g', 'resblocks.8.convs1.2.weight_v', 'resblocks.8.convs2.0.bias', 'resblocks.8.convs2.0.weight_g', 'resblocks.8.convs2.0.weight_v', 'resblocks.8.convs2.1.bias', 'resblocks.8.convs2.1.weight_g', 'resblocks.8.convs2.1.weight_v', 'resblocks.8.convs2.2.bias', 'resblocks.8.convs2.2.weight_g', 'resblocks.8.convs2.2.weight_v', 'resblocks.9.convs1.0.bias', 'resblocks.9.convs1.0.weight_g', 'resblocks.9.convs1.0.weight_v', 'resblocks.9.convs1.1.bias', 'resblocks.9.convs1.1.weight_g', 'resblocks.9.convs1.1.weight_v', 'resblocks.9.convs1.2.bias', 'resblocks.9.convs1.2.weight_g', 'resblocks.9.convs1.2.weight_v', 'resblocks.9.convs2.0.bias', 'resblocks.9.convs2.0.weight_g', 'resblocks.9.convs2.0.weight_v', 'resblocks.9.convs2.1.bias', 'resblocks.9.convs2.1.weight_g', 'resblocks.9.convs2.1.weight_v', 'resblocks.9.convs2.2.bias', 'resblocks.9.convs2.2.weight_g', 'resblocks.9.convs2.2.weight_v', 'resblocks.10.convs1.0.bias', 'resblocks.10.convs1.0.weight_g', 'resblocks.10.convs1.0.weight_v', 'resblocks.10.convs1.1.bias', 'resblocks.10.convs1.1.weight_g', 'resblocks.10.convs1.1.weight_v', 'resblocks.10.convs1.2.bias', 'resblocks.10.convs1.2.weight_g', 'resblocks.10.convs1.2.weight_v', 'resblocks.10.convs2.0.bias', 'resblocks.10.convs2.0.weight_g', 'resblocks.10.convs2.0.weight_v', 'resblocks.10.convs2.1.bias', 'resblocks.10.convs2.1.weight_g', 'resblocks.10.convs2.1.weight_v', 'resblocks.10.convs2.2.bias', 'resblocks.10.convs2.2.weight_g', 'resblocks.10.convs2.2.weight_v', 'resblocks.11.convs1.0.bias', 'resblocks.11.convs1.0.weight_g', 'resblocks.11.convs1.0.weight_v', 'resblocks.11.convs1.1.bias', 'resblocks.11.convs1.1.weight_g', 'resblocks.11.convs1.1.weight_v', 'resblocks.11.convs1.2.bias', 'resblocks.11.convs1.2.weight_g', 'resblocks.11.convs1.2.weight_v', 'resblocks.11.convs2.0.bias', 'resblocks.11.convs2.0.weight_g', 'resblocks.11.convs2.0.weight_v', 'resblocks.11.convs2.1.bias', 'resblocks.11.convs2.1.weight_g', 'resblocks.11.convs2.1.weight_v', 'resblocks.11.convs2.2.bias', 'resblocks.11.convs2.2.weight_g', 'resblocks.11.convs2.2.weight_v', 'resblocks.12.convs1.0.bias', 'resblocks.12.convs1.0.weight_g', 'resblocks.12.convs1.0.weight_v', 'resblocks.12.convs1.1.bias', 'resblocks.12.convs1.1.weight_g', 'resblocks.12.convs1.1.weight_v', 'resblocks.12.convs1.2.bias', 'resblocks.12.convs1.2.weight_g', 'resblocks.12.convs1.2.weight_v', 'resblocks.12.convs2.0.bias', 'resblocks.12.convs2.0.weight_g', 'resblocks.12.convs2.0.weight_v', 'resblocks.12.convs2.1.bias', 'resblocks.12.convs2.1.weight_g', 'resblocks.12.convs2.1.weight_v', 'resblocks.12.convs2.2.bias', 'resblocks.12.convs2.2.weight_g', 'resblocks.12.convs2.2.weight_v', 'resblocks.13.convs1.0.bias', 'resblocks.13.convs1.0.weight_g', 'resblocks.13.convs1.0.weight_v', 'resblocks.13.convs1.1.bias', 'resblocks.13.convs1.1.weight_g', 'resblocks.13.convs1.1.weight_v', 'resblocks.13.convs1.2.bias', 'resblocks.13.convs1.2.weight_g', 'resblocks.13.convs1.2.weight_v', 'resblocks.13.convs2.0.bias', 'resblocks.13.convs2.0.weight_g', 'resblocks.13.convs2.0.weight_v', 'resblocks.13.convs2.1.bias', 'resblocks.13.convs2.1.weight_g', 'resblocks.13.convs2.1.weight_v', 'resblocks.13.convs2.2.bias', 'resblocks.13.convs2.2.weight_g', 'resblocks.13.convs2.2.weight_v', 'resblocks.14.convs1.0.bias', 'resblocks.14.convs1.0.weight_g', 'resblocks.14.convs1.0.weight_v', 'resblocks.14.convs1.1.bias', 'resblocks.14.convs1.1.weight_g', 'resblocks.14.convs1.1.weight_v', 'resblocks.14.convs1.2.bias', 'resblocks.14.convs1.2.weight_g', 'resblocks.14.convs1.2.weight_v', 'resblocks.14.convs2.0.bias', 'resblocks.14.convs2.0.weight_g', 'resblocks.14.convs2.0.weight_v', 'resblocks.14.convs2.1.bias', 'resblocks.14.convs2.1.weight_g', 'resblocks.14.convs2.1.weight_v', 'resblocks.14.convs2.2.bias', 'resblocks.14.convs2.2.weight_g', 'resblocks.14.convs2.2.weight_v', 'conv_post.bias', 'conv_post.weight_g', 'conv_post.weight_v', 'dict.weight', 'spkr.weight', 'spkr.bias', 'dur_predictor.conv1.0.weight', 'dur_predictor.conv1.0.bias', 'dur_predictor.ln1.weight', 'dur_predictor.ln1.bias', 'dur_predictor.conv2.0.weight', 'dur_predictor.conv2.0.bias', 'dur_predictor.ln2.weight', 'dur_predictor.ln2.bias', 'dur_predictor.proj.weight', 'dur_predictor.proj.bias'])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "checkpoint['audio']['ko'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ceea9d6f", + "metadata": {}, + "outputs": [], + "source": [ + "output_path = '/home/2022113135/av2av/checkpoints/unit_av_renderer_withKO.pt'\n", + "torch.save(checkpoint, output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d852a6ac", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lip-bbox", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/sample_inference_g_500000.ipynb b/notebooks/sample_inference_g_500000.ipynb new file mode 100644 index 0000000..4537b77 --- /dev/null +++ b/notebooks/sample_inference_g_500000.ipynb @@ -0,0 +1,1426 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "aee17ce7", + "metadata": {}, + "source": [ + "# Sample Inference Results\n", + "\n", + "This notebook runs the `inference_unit2a.py` script to generate audio and displays the results.\n", + "\n", + "**Model Checkpoint**: `g_best` (Zeroth-Hubert)\n", + "**Dataset**: All subjects in `test_data_01/003`" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "98dc1317", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Scanning for files in /home/2022113135/datasets/zeroth/test_data_01/003...\n", + "Subject 104: found 3 pairs\n", + "Subject 105: found 1 pairs\n", + "Subject 112: found 1 pairs\n", + "Subject 118: found 3 pairs\n", + "Subject 121: found 3 pairs\n", + "Subject 126: found 3 pairs\n", + "Subject 132: found 2 pairs\n", + "Subject 137: found 2 pairs\n", + "Subject 147: found 1 pairs\n", + "Subject 149: found 3 pairs\n", + "Total pairs found: 22\n", + "Output directory: /home/2022113135/gyucheol/NetfLips/av2av-main/output/g_500000\n" + ] + } + ], + "source": [ + "import os\n", + "import subprocess\n", + "import IPython.display as ipd\n", + "\n", + "# Paths\n", + "ROOT_DIR = \"/home/2022113135\"\n", + "PROJECT_DIR = os.path.join(ROOT_DIR, \"gyucheol/NetfLips/av2av-main\")\n", + "INFERENCE_SCRIPT = os.path.join(PROJECT_DIR, \"inference_unit2a.py\")\n", + "CHECKPOINT = os.path.join(PROJECT_DIR, \"unit2av/checkpoint/zeroth-hubert/g_00500000\")\n", + "CONFIG = os.path.join(PROJECT_DIR, \"unit2av/checkpoint/zeroth-hubert/config.json\")\n", + "OUTPUT_DIR = os.path.join(PROJECT_DIR, \"output/g_500000\")\n", + "\n", + "# Directories\n", + "WAV_ROOT = \"/home/2022113135/datasets/zeroth/test_data_01/003\"\n", + "PT_ROOT = \"/home/2022113135/datasets/final_unit2a_split/test\"\n", + "\n", + "# Find all subjects\n", + "subjects = sorted([d for d in os.listdir(WAV_ROOT) if os.path.isdir(os.path.join(WAV_ROOT, d))])\n", + "\n", + "file_pairs = []\n", + "\n", + "print(f\"Scanning for files in {WAV_ROOT}...\")\n", + "\n", + "for subject in subjects:\n", + " subject_dir = os.path.join(WAV_ROOT, subject)\n", + " wav_files = sorted([f for f in os.listdir(subject_dir) if f.endswith('.wav')])\n", + " \n", + " found_count = 0\n", + " for wav_file in wav_files:\n", + " if found_count >= 3:\n", + " break\n", + " \n", + " # Construct expected pt filename\n", + " # items: 104_003_0253.wav -> 104_003_0253_preprocessed.pt\n", + " base_name = os.path.splitext(wav_file)[0]\n", + " pt_filename = f\"{base_name}_preprocessed.pt\"\n", + " pt_path = os.path.join(PT_ROOT, pt_filename)\n", + " \n", + " if os.path.exists(pt_path):\n", + " file_pairs.append({\n", + " \"subject\": subject,\n", + " \"wav\": os.path.join(subject_dir, wav_file),\n", + " \"pt\": pt_path\n", + " })\n", + " found_count += 1\n", + " \n", + " print(f\"Subject {subject}: found {found_count} pairs\")\n", + "\n", + "print(f\"Total pairs found: {len(file_pairs)}\")\n", + "print(f\"Output directory: {OUTPUT_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "55f538b6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 104_003_0253_preprocessed.pt...\n", + "Processing 104_003_0577_preprocessed.pt...\n", + "Processing 104_003_0993_preprocessed.pt...\n", + "Processing 105_003_2905_preprocessed.pt...\n", + "Processing 112_003_0107_preprocessed.pt...\n", + "Processing 118_003_0522_preprocessed.pt...\n", + "Processing 118_003_0836_preprocessed.pt...\n", + "Processing 118_003_1091_preprocessed.pt...\n", + "Processing 121_003_0527_preprocessed.pt...\n", + "Processing 121_003_0791_preprocessed.pt...\n", + "Processing 121_003_2994_preprocessed.pt...\n", + "Processing 126_003_1107_preprocessed.pt...\n", + "Processing 126_003_2205_preprocessed.pt...\n", + "Processing 126_003_2432_preprocessed.pt...\n", + "Processing 132_003_1366_preprocessed.pt...\n", + "Processing 132_003_2657_preprocessed.pt...\n", + "Processing 137_003_1351_preprocessed.pt...\n", + "Processing 137_003_1614_preprocessed.pt...\n", + "Processing 147_003_1675_preprocessed.pt...\n", + "Processing 149_003_0927_preprocessed.pt...\n", + "Processing 149_003_2332_preprocessed.pt...\n", + "Processing 149_003_2621_preprocessed.pt...\n" + ] + } + ], + "source": [ + "# Run Inference for each file\n", + "for pair in file_pairs:\n", + " input_pt = pair['pt']\n", + " print(f\"Processing {os.path.basename(input_pt)}...\")\n", + " \n", + " command = [\n", + " \"python\", INFERENCE_SCRIPT,\n", + " \"--checkpoint\", CHECKPOINT,\n", + " \"--config\", CONFIG,\n", + " \"--input_file\", input_pt,\n", + " \"--output_folder\", OUTPUT_DIR,\n", + " \"--device\", \"cuda\"\n", + " ]\n", + " \n", + " # Run the command\n", + " result = subprocess.run(command, capture_output=True, text=True, cwd=PROJECT_DIR)\n", + " \n", + " if result.returncode != 0:\n", + " print(f\"Error processing {input_pt}:\")\n", + " print(result.stderr)\n", + " else:\n", + " # Optional: Print only if needed, to avoid clutter\n", + " # print(result.stdout)\n", + " pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "578d74d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------------------------\n", + "g_500000 Inference Results\n", + "------------------------------------------------------------\n", + "\n", + "################################################################################\n", + " SUBJECT: 104\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 104_003_0253\n", + "Original Audio: 104_003_0253.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 104_003_0253_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 104_003_0577\n", + "Original Audio: 104_003_0577.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 104_003_0577_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 104_003_0993\n", + "Original Audio: 104_003_0993.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 104_003_0993_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 105\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 105_003_2905\n", + "Original Audio: 105_003_2905.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 105_003_2905_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,UklGRiSlBQBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAAZGF0YQClBQD4//v/9//2//j//P/+//v//v///wIA+/8AAP////8AAP////8BAAQAAQAAAAEABAAGAAgABAAFAAYAAgAFAAAA/f////7///8CAAMAAwACAP///v///wEA/v/8//r/+v/5/wAA+v/8//7//f/8//z/+//+//z//P/8//7//P/+//3/+P/6//j/9f/4//f/+P/8//n//P/0//f///////z/+P////v//P/7//z/+v/4//f/9//z//b/+P/8/////P/9//7//f/+//7//P/9//7//f///wEAAgD+//v//f/9/wAAAgD+//v/+//6//r//P/7//3//v/+//7/+v/8//r/9//4//b/+f/+//z//v8CAP///P///wAA/f/4//r//v/+/wEA/f////z//P/9/wIA/v/9//z//f/8//v/+v/5//7/+v/9//v/+f/8/wEA///+/wIA/v/8//v/AgACAPv/AAADAAIAAQD//wEAAgAEAAQAAgACAP//AAD//wMA+//5/wAA+v/7//z/+f////z/+P/7//r/+P/6//v/9//1//b/8//1//T/8v/2//b/9v/3//n/+f/3//P/9v/z//r/+P/6//n/+f/6//r/9v/3//v/+v/7//r//f/6//v//f/6//z/AAD//wQAAQD///3/AQD+/wIA///9//3/AQAEAPv/AQD9//3/BQAAAP///f/5//7//P//////AAD+//7/AAD+/wQA/f/5//7//f/+//z/+P/5//b/9v/3//r/9//7//z//P/6//v//f///wEAAAAEAAEA/v/7/wAAAwD9////AwD//////f/7//f/+f/2//L/+f/5//n/+v////7//v8BAP7//f/9/wAAAQABAPr/+//0/wEA/v/6//3/+v/7//T/+P/1//j/9f/7//L/8//0//P/9//5//z//P/4//r//P/5//r//f/4//b/+f/3//H/8v/w/+7/6v/u/+//9P/x//D/9v/y//L/9//6//T/8P/v/+7/8P/4//r/9v/3//f/9v/2//X/9v/0//v/9//1//r/AAAAAAQABgAFAAkABgACAAUABAADAAsABgADAAEAAwACAP7/AgAAAAMAAwD9/wEA//8AAAIABAAFAAgAAQD///z//v/6//v//f/3//j/9v/4//n/+f/5//n//P/+/wAAAAABAPX/+//4//X/9f/w//f/9P/z//b/9v/0//T/8//u//L/9f/1//D/8v/x/+3/8v/v//L/+f/9//n/+f/+//H/9v/7//n//f/3//z//P/4//f/9f/2//T/+v/5//X/+v/3//v/+//3//j/+f/3//X/9P/2//n//P/6//f/+P/3//n/+P/7//f/8//z//r/+P/4//r/9//1//f//f/9/////v/6//n/AgD6/wEA/f8DAAUA/v8CAPz/+//7//z//f8BAAUA/v8CAP7/AwAIAAQABAD+////CAAAAAEAAwACAAMAAQAEAP3//P////7///8AAAEAAwD8//7/AQD9/wAA//8DAP//+P/9//z//P/9//z/+f/6//z/+v/4//r/+f/3//f//P/8//v//f/0//3//v/9//r//P/5//n/+v/2//j/+P/6//v//f/7//3//P/8/////v/8//3/+//7/wAA+/8CAAUA/f////r/+P/9//n//P///wAA/v8AAPv//P/9//v//f/5//v//P/7//r/+f/6//n/+v/5//j/+v/x//X/9v/2//b/9f/5//b/9v/5//n/9f/z//b/+//7//X/9//7//v//f/7/wAA/v/5/wEAAAACAPz//f/9//r//P/6/wEAAAABAP7/BQABAAEAAQACAAMAAwABAAMA/v8FAAcAAQAAAAUACgABAAYADAAJAAUA/v8FAAoACAAOABIAEAAQABMADwANAA0ABwAPAAYABwD//wQADQALAAIAAwACAAAA+f/0//r/+v/r/+3/8//8//7/CwASAB4AHQAiADMAMwArAC4AOAA1ACsAMgA1ADIAHgAmABUAGgAWAAcAGAAnACUAIwBIAGIAOQBBAGMAVwA2AFYAUwARADYAYgBqAHoAigCaAJ0A1QC1ABQBjwHsAHsBfgWWB9oB6PxL+8v6Wf6L/Wz7xfu4+nr6f/mk+C35H/n/92f6Sfu4/LP/Yv9MAB0AIf9gAOkAqQBgAZgBBwImAhECkgIUA6gCIQLEArYCDQPcA0kDagPHA44D8QN1A9oCPgMHA5oCwwKqAp8CegMtBHYDjAJTAtcBlAHNAeIBmgFYASMBrQCUAKkA6f/8/6cATgC1/0v/lP97/+3+mP5z/jH+oP01/f/8Rv2u/Y390fyO/B/9NP2s/Iv8/fxI/UP95vwa/VP9Rf1O/Tf9dv27/ZT9sf0p/o7+jf5X/k/+U/5g/or+pP6N/pD+2v73/vL+bv+B/1H/zf81ABkAFABiAMMAngBsANYABAENAS0BYwFXAXQBaAEsAXUBpQEKAjYC5wEUAkAC4QHXAb0BpQGOAXIBpgGcAeUBOALoAbcBaQFWAYEBSQGGASQBBgFzAQ8BowDBANcAlACdAJEAqgB+ADsADgDO/9T/EQBoAGAAPAAmAAgA2f/t/+b/vf+l/0f/DP92/8z/Pv/E/g7/s/+1//H+4v4Z/y3/Ov/u/uD+BP/7/sT+Xf5L/lz+Q/5k/o7+x/4m/0P/OP9U/0D/3P40/2H/Z/+z/5P/lv+M/0//XP+f/6D/0f8hAFQASAAFADQAWwBhAGwAYwAxAEoAlgB9AFcAYQCZAJIAhQCVAIEAgACMAHQAQgBZAEcALwBcAFAALwAgAEUAOAAeAEUAFQAvAFYARgBWAEoAOQAsABgADgAMABAAHgD//+P/4P/t/+3/9P8OAPv/8f/2/+v/2P/l//L/6v/u/+7/4f/J/9L/4f/j//T/AgD5//D/+//8//r/AgD4//L/6//t/+D/3v/2//3/9v8CAPD/3v/x//n/AAAAAPL/6//x//P/7P/m/93/6f/t/+H/4v/j/+H/2f/S/83/z//Q/8v/zP/K/9P/zf/L/8r/z//b/9//4//Z/8z/yv/O/+D/8v/4//f/8f/e/9j/2f/X/+j/9P/1//D/6f/u/+z/6//t//v/BQAQABMAAADw/wMACwALABQAHAAWAAwABAAJAAkAEAAOAA0ABgAOABIADAAIAP7/+v/2//3/DgANAAMACAD5/wMABAD+//r/+P/3//X/+f/2//n/+//3//P/+P/3//P/6P/s/+3/6P/p//H/6f/i/+X/5f/m/+f/4P/j/9//4v/p//P/9P/4//v/AQAAAAEAAQD1//P/8v/1//L/9f/6//T/+v/2//v/+P/w//P/7f/w/+3/9f/5//7///8DAAcA+//5//P/+v8BAP7/+P/5//n/+v/7//r/+/8AAP7//P/8/wAABAADAAIAAgAEAPv/+/////3//v/7////+P/8//7/9v/z//j/9//5//n//v/9//P//f////f/9f/v/+//8//4/wAABQAFAP//+P/7//z/+//2//j/9f/5//n/AQAJAAgABgAMABIADgAGAAwADQAKAAMABwAJAAIABwAGAP//AwACAAIABgABAAcAAQADAAYAAwACAP//CAADAAUABAD+//b/+P/6//b/9P/3//D/8P/v//H/+P/4//z//f/5//r/+/8EAAIAAQADAAUAAAD8//z/+v/8////AAADAAQADAAPAA4AAwAEAAUABAADAAEACQAKAAkACQACAAEAAQD///3/BAAGAAIABwAKAAoACQAHAAgACAAMAAgACgAGAAYABQAIAAkADAAJAAgACgAJAAMA//8AAPn/+f/5//b/+f/3//b/+v/+//v/+v/4//n/9P/w//T/+P/5//3//P/9//j//P/9//r//v8EAAUA//8AAP///f////v////+//r//v////3//v8AAAEABAACAP7/BAADAAAA/v8AAP3/AQD+////AgAGAAYABgAFAAIA/f8DAP///P/9/////f8AAPn/+/8AAP3//v8AAAEA/f/8//3/+//9/wEA/P8CAAAA/v8AAAMAAgADAAIA/v/8/wEACAABAAQA/v/+/wgABgAGAAYAAQAFAAEABQAEAAIABgAHAAkABgANAAMA/P/9//7/AwAAAPn/+/////v/+P/8//z/AgABAAAA/////wEACAAIAAMACAACAAEA/v8AAAcAAAABAAMAAgAFAAIAAAD9/wAA/P/4//7//v/+//3/AQACAAMABQAEAAIA/////wEA/v/4//r/9P8AAP///P/+////AAD7/wEA//8BAAEABwACAAIAAAD9/wIAAAABAAUA//8BAAIAAQAAAAIA/f8BAAIA/f/8////AwAAAAEAAAACAAUAAgD+/wEA/v/8/wEAAAD9//v//v/7//r/AQACAP7/AgAGAAMAAgAFAAgAAgAAAP//AAD+//3//P8FAAMA/f///wAA/P8AAAIAAwAMAAQABgAEAP//AwD+/wQAAwD//wEA//8BAPv//f/8//v//f/7//r/+//8//z/+v/5//f/9//9//3/+//6//n/+P/5//j/9f/5//v//P/5/wAABwAHAAQAAAACAAIA/v8AAAEA/f8BAAAAAAABAAAAAgADAAEAAgD9//7//P/9////AQD+/wAAAQD+////AgAAAPn/AAD//wMA/v/9/wEABAACAAEAAgD+//r/9//5//n/9P/4//v/+v/5//j/+P/4//r/+v/2//j/+f/7//f/+v/2//b/+v/2//b/9v/2//r/9//0//v/+P/0//n//v/5//X/9f/2//X/9P/2//z//f/7//3//v/+//r/+P/4//X////8//3/+//+/////v/5//v//P/7/////f8CAPn/+v/7//r//v8AAP7////8//f/9//7//j//f/8//n/+v/7/wAA+v/9//r//f8AAPv/9f/3//X/9//0//n//f/0//f/+f/7//b//v/5//f/+f/6//n/9//2//b/8//z//b/9f/2//r/9f/3//T/9P/4//r/+P/1//j/9v/2//b/+f/+//n/+f/5//j/+//6//r/9v/5//n/9f/6//n/+f/3//j/+P/4//f/8//2//v/9v/4//n/9//0//b/+v/2//L/8v/2//j/8//x//H/9f/v//X/8f/y//X/8P/v//H/8f/z//H/7//0//b/9//9//r/+v/4//n/9v/1//b/9v/5//j/+P/4//r/+v/6//f/+v/7//j/+P/6//f/+P/7/wMAAgABAP///f/+//z/AwAAAP3/AQABAPv/AAD9//3//f/+/wAA//8AAPz//v/+//r/+v/5//f/9v/2//j/8v/0//H/8P/z/+//8f/u//H/8v/0//j/9//1//b/8v/1//j/9v/z//P/9f/x//H/8//0//f/9//4//7/AQD7/wAA/f/+//z/+/8BAP7/AAAAAP///v8AAAAA/v8AAPf///8CAAAA/v/+//7//P8BAP/////9//7/AAD///7/+f/7//7//P////v/+f/5//b/+P/7//T/+f/9//n/8v/9//v/9//7//j/+f/5//f/9//3//r/+f/5//f/9//5//X/9//4//j/9//3//r/+f/3//f/+v/2//r/+//+////AgD///r/+/8BAAAABAAEAAgACAAIAAUABQAGAAkABgAIAAkACQAIAA0ADAALAAoADAAMAAsABgAHAAQABQAKAAMACwACAAMAAwABAAEABwAAAAAAAAD+//7//P/9//3//f/9//v/9v/4//r/+//8//v/+P/2//r/+v/2//j/+v/0//b/8v/1//n/+P/4//X/9//7//z/9//3//j//f/6//T/+//7//v//f/9//b//P/+//7/AAD//wIABQAAAAMABAAEAAEAAQAAAAAAAgABAAYABAADAAMABAAGAAcABwAFAAYABgAHAAQABAAEAAIAAgADAAIA//8AAAEA/f8AAPn/+f/5//n//P/8//7//P///wMAAQAEAAgACAAHAAMAAAAAAPf/AAAAAPr////+//z/AQD9//z/+//5//n/9f/+//7/AwAAAAEAAAAEAAUAAAAAAAIABAD9/wIABAD/////AAAFAAMAAwAAAPv//v/7/wAAAwAAAP7/BAD9/wEA+P/8/wEAAwAEAP//BgABAAMABAD///v////9//n/+v////v/9//3//j//f/3//v/+f/6//3//v/+//7//v/+//j/+f/8//z/+f/4//n/9f/1//X/9v/4//j/+P/8//3/9//6//b/+P/1//T/+f/4//v//P/9//z/AAAAAP//AQD7/wEAAAD+//3//f////7////8//7////9/wEAAgABAPr//P8BAAIAAQAAAPz//v/7//3/AAD8//7/AgD///v/BAACAAIABAABAAIABAADAAAA/v8CAAQAAAABAAEABAD+/////f/9//z//P/+//v/+//4//f/9f/4//n/+/////7//v/4//f/+v/5//v//P///wEAAAD9//7//f////7////9//3//f8FAAAA+v/8//7//v////r/+P/4//v//v/4/wAA9//3//r/9//1//7/+f/6//j/9v/5//n/+v/7//v//P/7//n/+//5//n/+f/5//r/+P/5//z/+f/7//3///////f//P8AAP///f/+/wIAAAACAAEABAADAAIAAAD9//3/+f/6//3/AgD5////AAD+/wIABAAAAAEAAwD+//7//v8CAAcAAwADAAUABQAFAAEAAQD+//3/AAD+/wEA/f/+//3//P/9//3///8AAAAAAQABAAIA/////////v/9//7/9//4//v/+f/6//7///8DAAEAAgD//wEAAgD+//7//P/6/wMAAAD+/wEAAQD7//7//P8BAAEA/v/+//z//v/9/wEA/f////3/AQD9////AAD7//3//v/8//z//P/+//r/+//8////AgAAAAAA/v/9/wEA///8//3/+v/3//L/9P/9//v/9v/2//3/+f/7//n/9//8//n/+//7//v/+f/5//z//P/7//z//v/+/wIAAgD///3/AgABAAAABAAFAAYAAwAEAAIA//8AAP//+//7//v/AAAFAAYABgAFAAYAAgD//wEAAQAAAAIABAAEAAYAAgACAAIAAAADAAMABgACAP//AAAFAAIABAABAAAA/v/+//r////6//n/+P////v/9//5//X/+//6//z/+//2//P/+f/0//P/9//5//r/+f/+//7/+v/8//z/+v/7//v/+//8/wEAAAD7//n/+P/5//n////6//n//f/6//7//P/3//z//f/5//z/+v/3//j/+//4//X/+//6//3//P/5//v/+//7//f/+//+//r/+/8AAP3//v/5//7/+v/6//z/AAD9//z/AAD///////8BAAQABAABAP7////+//v//v///wAA/f8AAP//AQD+/////f/9/wEA+P/8//v//P/+//z//P/6//r//P/5//r//v8FAP7/9//5//r////4//P/9v/2//j/+P/y//b/9P/z//X/+//4//r/+//4//X/+P/4//b/+f/5//7//P/8//j/+v/6//r/+P/6//j/9//6//7//P/9//v/+P/5//3//P/8////AAACAAMABAACAP7/AAABAPv/+f8CAPr///8BAAQABAD+/wAAAQAAAP3/AAAEAAUA+//9//7///8CAAAABgAEAPv/AgD///v/9//4//X/9P/2//T/9P/3//f/9//1//z/9v/2//L/8v/0//P/9f/2//r/9v/2//b/9f/2//b/9//z//b/9//3//f/9v/5//b/+v/4//n/+/////7/AAD9//z/BAD+/wEAAQD//wAACAACAAAAAAD8//7///8DAAAA/f////r////4//f/+v/6//v/+//8//7//f/8//3//P/+////AQD//wMAAQD///7//v/9//n//f///wAA9//5//3/AAD///n//P8AAPz/AgD+//z////8//T/9v/1//X/9f/1//H/7f/z//X/+P/+//7//v/+/wIA+f/8//7/+P/2//P/9f/2//X/+P/9//3//v8BAAEA+v/7//v//f/9//n/+//8//z/9//4//b/8//y//P/8f/2//T/9P/0//j/9v/2//j/+f/4//j/+//2//j/+v/+/wAAAwABAP//AAD///z/AAD6//3/AAD+//7/AAD///7//f///wAA/P/5//7///////z//v/+//r/9//4//b/8//6//X//P/0//T/9v/3//f/+v/2//n/9f/z//P/8//z//b/9//5//j/8v/z//j/+P/5//r/+P/1//n/+//5//3/AQD7//3/+//+/wAA///9//v//f8AAAEA+//9//z/AAD9//n/+v/+///////9//n/+//6//z/+//4//n/+v/2//X/9v/1//L/8v/x//H/9P/x//X/9P/z//L/8v/1//X/9f/y//P/9f/1//L/8v/1//H/8P/1//T/8f/y//T/9P/2/+7/8f/1//L/9P/2//f/9f/2//3//P8BAAYABwAJAAYAAQABAP3///8AAPn/AAD+//3/AQD+//v/+//9//n/9//+//7//v/9/wMAAQAFAAUAAwAAAP7//f/4//j/9//x//X/9f/3//P/9f/z/+7/8f/x//X/8//y//P/9P/r/+3/6v/r/+3/7//x/+3/8//w//P/9f/0//P/+f/3//b/+f////v/+f/4//z/AAD5//z/+v/3//v//v8AAP////8CAP7/AAADAAMA/f/8/wEA+//4//r/+//8//3//f8HAAkAAwAHAAMABAAAAP7/AQD9/wEAAQAFAAQACQAKAAUABgD//wUABgADAP///v/9//n//P/5//r/+v/7//3//f/7//P/9P/5//f//f/6//b/9//0//X/9v/v//X/+//3//H/+//7//b/+//6//n/+v/6//r/+v8BAAEAAgD//wIABQD+//7//P/+//////8BAAUAAwABAAIA/P/+/wEAAAADAAkABAD9//z/BQD+//7///8FAAcABAAAAPz//P8AAPv////9//3//P8EAP7/+////wQAAQAAAPv/AQD+//v////7/wMA+P/4//z/+P/3/wEA+f/7//v/+//9//z//v8AAP7//v/9//f/9v/5//7//f/9//v/+f/+/wAA/f/+/////v/9//v//v8DAAEA///7/wEABAAHAAcACAAIAAgABwAGAAYAAQABAAUABgABAAUAAwADAAUAAwD+/////f/+//7//P///wMAAQAAAAAA/v8CAP7//f/7////AgAAAAEA+/////7//P/5//j/+v/6//f/+f/6//b/9v/4//j/+//3//b/+//+/wEAAgAFAAMAAwAGAAEA/P/6//v//f/2//f/+f/4/wAA///6/////P/5/wMABgALAAYABgAFAAAA/v/3//z/+//3//H//P/9/wAA///9/wEAAAABAP3/9//7//7////6/wAAAAD5//3//P/2//f/9P/v//L/8f/0//H/8v/3//f/+//4//r///8DAP7//P/1//f/+f/y//f//v/7//7///8AAAcAAQAAAP7///8BAAQAAgABAAkADQAKAAsABwACAP3/+v/3//H/8v/z//z//f/7/wAAAgD9//X/8f/y//T/8f/4/wAAAQD8//3/AQD///v//v/4//P/+P/w/+f/4//l/+v/8//8/wsADwACAOz/0//B//f/YACYAKEAywAKAcwA7/9L/4f/1v/R/9L/n/9T/1X/XP+Y/0IAfAATANH/dP8P/03/4/9kAOoAMQHHABAAuv/Z/+z/sv/C//P//P8oAP//jP9X/2z/7/+BAHIAFAD8/yIAggDVABcBkgG0AboACgBUAKEAvAB7APX/kv8n/93+Lv++//n/sv9i/1//F/+i/sD+Zv8IAG0AfAA2AA4Auf9U/3z///9XAEYA8P+G/0X/Q/+B/6D/uf/j/xUAvf9l/5b/w/+1/8r/+/8DAAwAIwAuADwARQBrAKoAuACdAJsApwCIAFgAQgAtABMADwAAANf/0P/h/8b/ev9C/1r/nv++/7j/tP/J/8//0P/J/+P/LQBTADEAGQAWAAYADgAgACAAFAAgADcAHQD8/xgAUgBDACQAGgAdACIABwD2/wsAIAAeABYAFQAJAOX/r/+Z/67/xf/K/9r/7v/e/93/8f8NAAMAzf+p/6f/w//s/y8ASABIADkAIwAfABAADwA1AFAAPwAcAO//0v/J/6r/tP/g//7/BwD+//H/+P8EABAAGQAsADIAIAABAOf/6v/5/wwAHgA+AEYALAAdABwA/f/L/5//kP+R/5r/uv/g/wkAIQBNAE4AGwADAAIABAAOABEA/f/Y/6//nv+d/6T/rf/H/9//9P8LABcADgAKACcAPAAwACAADQAKABcAFwA8AF4ASwAXAAQA+f/e/9//6//3//z/AAD6//L/+P8EAA8ACQAIAAQA/v8JAA8ACwAMABQAEQAcABYABwAWABAABwAYABEA/f/5//H/9P/5//n/BgAQABkAAADf/+b/BAAUABwAFwARAAsABgABAA0ADgD+/+f/5v/l/+r//P/0/+z/4//d/9D/z//g/+//6//m//P/9P/4//7/AwAOABMAEQALAAAA9//6//r/AAAPABAACgACAPv/9v/4/wMABAAAAAQA/P/6/wAAAQASAA8ACAAFAP7/9//2//j/+f/3//r//P8BAAMAAAACAAYABwACAAQAAgD9/wIACQALAA0ACwATABEAEAAMABMAEAAXABcACgAGAAkABQADAAQADgAMAAQA///w//n/9f/4//T/7//s/+//8f/w//L/+v8AAPr//f/3//P/9P/s/+n/6//p//H/8f/5//3//P/7//n/+//6//3/+f/x//L/8v/1//j/+P8BAAEA/v/6//b/9f/6//7//v8BAAMABAAFAAEAAAABAAYACAAJAA4ADAD///z/AAD8//3/AAADAAMAAAACAAEA/v8BAP3//P////z/+f/5////AgAFAAgACgAFAP///v/x//7/AAACAAQA+//5//f/+//1//b/+v8EAPv/+f/4//L/9f/3//v//f///wQAAQD8//j/+f/z//D/8//1//T//f////3/+/8DAAwACgABAPj/9v/w//H/+v8AAPz/+f/3//n/9f/y//L/8//3//j/+f/8/////f/6////+v/4//v//v/8/wAABAAIAA0ACAADAPz/+f/5/wEA/P/3//b/8v/1//T/+//9////AQD7//v/+f/z//L/+P/8//z/+P/8//n/+//2//b/+f/6/wAA//8EAAQAAQAAAAIA/v/5//3/+v/9//L/9f/4//X/9P/v//D/8f/w//j/+v/6////+//w//X/+//7//b/9//1/+//8f/x//f/+v/8//v//P////b//f8BAP3//f/4//f/9//z//X/+//4//v////8//n//P/7//r/+//3//f/+f/5//r/9//4//b/9v/4//v/AAD8//r/9P/x//L/8f/7/wEAAwD+//v/8v/x//X/+/8AAAIA///5//b/+v/0//T/8//4//7//f/5//f/9f/5//j/+f/6//j/8//3//X/9v/0//v/9v/0//D/9v/0/+//8//v//j/8v/u//X/9P/y//n/7//0//L/8v/x//P/9v/5//v//P/9//j/9v/6/wAA/f/8//r/+f/8//z/+P/7//z/+f/7//r//v8BAP3////4//n//P8AAPz/+f/6/wEAAQD9//7/AQACAAQABgAEAAAA/////wEA+f/2//v/+P/4//P/9P/0//X/7//r/+3/5v/o/+f/5//k/+H/5f/l/+P/4P/h/+f/6f/l/+T/6f/l/+X/6v/p/+f/6//v/+r/6v/p/+3/8f/z//b/+P/9//r/+/8AAAEABQAKAA0ADQALAAoACAD+/wQAAgD7/wAA/f/+/wUAAwADAAIABAAEAAMABAACAAYAAgAFAAMACwAHAAMAAAAAAAEA/f/8//j/8v/1//X/+P/z//X/+P/1//T/9v/5//z/+v/5//v/8//4//T/8f/x//X/9//z//n/9P/3//r/+f/6//3/AAD9//z//v/2//T/9P/7/wIA/f////7//P/9/wAAAgAFAAIAAAD9////AAD+//3/AAAGAAEA/P/5//r/9//z//X/+v////v/+//6/////P/4//T/8f/3//j/+f/0//T/9f/0//X/9f/6//r/+f/v/+r/6P/q/+z/7v/x//D/7//x//b//v8FAAkABQACAAQAAAD2//j//f8DAAAA9v/2//f/9v/9/w0AEwARAA8ACAAFAAwAFAAYABQAEwAEAPf/+P8GABwAJwAbAAIA9//8/wAA+//9/wsAEQAPAAQA/v8FAPv/+//9////AgAHAAgAAgD7//L/7f/u//b///8CAP//AgD0/+X/4//q//T/9//z//P/7//t/+f/9v/7//n/+P/5//T/6v/1//3/AgASAA0A9v/9/+v/8//3/wIACgAKAAQA9//z//b///8DAAgABgADAP///f8CAAcABQADAP//AQAEAAsADgALAAcAAAD9/wEACgAUABUADgACAP7//f8BAAYACgAHAPf/6f/f/+X/6//4//3//P/5//L/9f8AAAEABAD+//X/7P/4/wcADwALAAUABQD4/+r/6//3/wcAEgAVABMADQD9/+H/xP/P//v/IAAtACkAHgADAOj/4P/y/xwARQBUADUADgDu/97/2f/l/wEAEwAVAA4ABAD///7/EAApADMAKQAsACMAFgAJAP7/+v/s/9P/1//Z/9z/6P/+/xAAEAANAAYABQADAAUA/f/6//X/7f/l/9j/3f/l/+r/7//8/w8ADwABAP//BgAEAPP/8P/4//D/3P/i//7/EQAOAAcA/f/p/9v/4v/z//b/+P/z/+j/3v/d//H/BQAaACwAJgAcABYADQANABAADQAJAAgA/P/w/+r/7f/v//v/BwAJAPv///8SABgAEwAQABoAGgAQAAsADwAHAPj///8LAAAA+/8PABcADAD9/wEADAAFAO//5P/c/8f/vv/B/8z/1//n////FgAjACkALQAtACgAJwAvAEAAMgAJAOr/5f/g/9r/8P8LABkAEAAHAP7//f8JAA0A8//n//z/BgACAAUAGQAvAC8AHgANAAkACwAMAAMAAwAUABMA8//J/7n/zP/w/wsAAADy//7////6//n/CAAuADkAJgALAAkAFgAYAAgA7//S/8r/1v/Y/7n/p//K/+3/2/+t/6//0//4/xAAEwAGAA0AAwDj/83/+f8uAD8AIQAMAAYA8P/n/+//+v8GAP3/9//U/7z/8P/u/9r/vv/J/+T/8v/d/9D/7f8QACIAJgAiACYATABQAC0AKABEAD4ACADE/6r/wf/n//H//f8kAEAAKADu/8z/4/8TADYAJQD2/9H/xf+//8H/6v8rAEgAOAA+AGAAgACQAHkAPwATAPn/5P/f/+T/8f/p/63/af9d/5H/5f83AFUAOQACAN3/7v8NACMANgAzAAIAuP95/4H/xP/w//T///8dACcAEwAlAFIASwAHAN//1v/o/w4ARAB4AHgARQD1/8L/3f8yAGMAXABFAAwAr/9j/1z/bv9z/5H/sP+Y/1D/Q/+L/+D/EwBCAFkARAAlABUAIwAyACsAHwAWAPH/xf/J/wwAOAAUAN//zP/K/8L/oP+c/9j/EgAMANz/w//L/9f/yP/R/xMALwAFANX/vP+w/8j/8f8eACoACQAJADYAPAAkADUAaQBkACoAIABOAFUAMwAYACQAIgDk/7L/uv+//6n/tf/e/wAAEQAXADMAZQB1AHcAkACWAGYAJQAmAD8AIADi/9f/5/+7/2r/Z//M/xcADwAZAEIAKwAAAA4AIQAeAAUA5v8dAHsAXwAZAAAAwf9i/z//af+i/67/pP/D/+n/uP+M/8j/OABYAB0A8f8aAD0A+/+L/1r/iP/L/8T/f/90/9H/DQDL/3P/jP8CAD4ADADi/wUAQgAtANL/w/8zAHQALQD2/zAASAACAJ//Vv8r/0j/v//8/67/hP/D//X/y/+0/zYA4AD9AIYAFAD4/xUALgAgAAMA7P/d/5//Of8T/2b/2P8MAPf/AgBdAKoApACUAHgATAAHAOD/QgDxAOsAXwAUAPT/0v/J//X/aADSAMQAKwBq/zL/uf9qAG8AQgAqAC0AWACMAJwAcABbAHAAVgABAAcAYAByAOP/Df+s/hT/ov/Y//X/PQBhABAAk/9O/zT/g/9BAJUALQDL/8n/5f/u/wAAJwBCAA0AhP8H/9T+vP60/pH+MP4p/sX+Gv/+/h3/if/l//D/t//4//oAsAGMAREBiAABAHP/Bf86/xYAcQDq/3f/Z/9U/0n/bf/s/48AzACRADwA5P/H/93/vv+M/9n/ewDvAPkA7QD8ALoABABo/0n/iP+h/47/hv+9//7/8//y/6gAgQGrAaoBygFYAXgAIQBxAGkA1f+C/9b/OAD//7H/FQCXAIEAYQCdALIAlABuADcAx/86/77+gv6L/sD+Bv9b/5b/yf8MAEgAYgB3AG8APAD3/9L/tf+Q/6D/EQBhAFoAZgClAK4AeABHAFAAOADp/+3/NgDq/1v/ef8AAPr/zP9HANcAugB3AMQAKAGPAGv/Zv/9/2v/ov4X/5T/Iv+0/tD+N/91/43/IQDAAHUA8f8RADsAyv9u/7z/EwDo/4T/N/86/4D/e/9U/6z/+//U/+v/VwA9ANT/aP/q/tT+a/8HAFIAbABwAHYAfACEAOAARAEuAdUAjQD9/3L/sv9jAIUATgBCAGUAnADQAB4BiQF3AS0BPQEiAbgAqAClADwAvf+G/4H/lf+y/+T/KABVAD8AHQAiAHAA4QDiADsA0v/r/7//Sf9a//3/cwAqAM7/EQBHAML/Rv9l/5H/Xv8Z/x7/cP9y/w7/DP9m/3b/gv/n/1QAXwAKAJb/UP8b/wz/cP/y//T/2P/p/9f/kv+c//L/YQBpAA8A9/8fAPf/zv8GAGEAMwC//7P/6P8SACsAhAC9AHIA8v8RAFUALQArAEkAOgD8/4L/Rv/b/4cA0wAFAf8AqwApAGH/Fv+w/yYABQARAFkAkACiAGwADAABAF0AxwDiAIYA/f/S/6D/G//k/i//SP8v/2D/wP/q/+7/6v/u/+X/1f8YAIAAbAA8AE8ASwATALL/TP9s/7D/k/+O/8v/2/+7/6n/+v+NALkAhACaAMgAzQDQALcAggCrABsBbAGqAQoCDwKFAbcAGAC6/4H/Pf8g/2X/nf9i//7+3P5F//r/MgAAABsAZQBrAEEAPgAnALz/TP8h/+v+g/5p/tf+Q/80/+7+/P45/0f/Tv+y/xUAEwD0//P/x/+d/8f/FwBBAEMALAAnADsAGwCx/2X/Vv9f/2r/Yv9P/2r/cv9O/2j/2v8pADQASQClAOUAvACaAN0A9ACPADAAMgBOAFUAZgCOAGMAGwBMAIYAPQAIAFUA3QD/AIAA///5/9n/fv+C/8//9P8FABEA9//V/9v/MwCtAN8AuQBjADMAEwDo/9X/8v/9/7D/YP+C/9v/8v/V/9X/zv+X/0//V/+x/9P/i/98/+T/KgAbABQAJQATAOf/4f8TACQArv9b/6r/JAAoAPH/9/9GAFwA+f+Q/6D/7/8MAAwALAAzAP3/uP+I/5v/AgBAAA0Auf+M/6T/wf+R/1//ff+s/4//Y/9l/4f/hP9W/yf/NP+S/zUApgCKACsALQBWABgAvP/l/ykA8/+l/7L/yv+z/8D/AwAyACYAEgAoAC8AEQAaAFgAOwDG/5H/v//v/woAFwBBAFMADADK//L/EgD4//r/PABdADMAyP+1/yUAfgCJAIQAkwCiAHcAMAA1AJQAywCCAE4ARAAbAJ7/U/+l/yIAJwDN/8b//P/t/7///P85ABYA/f8gABMA8//6//3/2P+z/8//IwBHACQA4/+3/63/uf/M/wkAOQAKAKr/lf/L/+T/t/+X/6f/pf+D/57/7P8IAO//3/+9/37/Vv9w/7r/x/+A/zn/P/9o/3r/rP8aAJgAngAmANb/8f/X/3r/kv8TAC0A0v+S/7b/6v/x/wQANAAFAIT/JP8S/x//TP+i/woAUQBeAC8Awv+P/xAAxQAHAdsAtQCOADMA1v/j/z0AegBzAE4AJQDx/6j/iP/j/5kA/wABAcAAXQAfAAoA+/86ALgA6gCmAAgAZf91/wcATwBDAEkADgCI/07/kv/f/yEAhQACAQIBdQAVACwALQD7/wgAXQBbAO3/pP+7/9n/2P/h/97/tf+t/+3/UwCNAG0ADgDv/yoAPADu/+v/UwB8AFoAPAANAOn/v/9z/6j/cgDLAKwAogBAAGv/0f7N/hX/O/8f/yP/Ff9//v39L/7i/pn/OACIAIgARwDQ/27/Qv8k/zH/hf+t/4D/cv94/1z/WP+6/zcAYQAaAP7/TgBjAAUA/f9wAJgAKgCN/zD/TP+e/y0A9gBUAeMARAD6//v/TQARAcEBrgHHANv/vP9EAK0AyQDNAN4AmwDU/0j/vv+7AEABBQF1AAwA5v+2/6r/NgAVAWEB4AA2AB0AXABlAEIASwBIAB8A+/+r/yT/8v5G/8D/BQATACAANAAqAO3/uP+w/8b/2P/T/9v/0v+n/4H/ff+O/93/RQBiACIA+v8KAAgA9f8DANT/S/+z/oX+FP8IAFkAIQARAB4ACQCs/1L/o/9kALUAQQCm/4L/pf/e/8v/2P/q/+//HwBYAFMAHwALACMAMwAdAF4A6AAKAZkA8f+H/7n/EAAYAN//cP8F/+P+4P7Y/vb+Xv/P/+P/m/+g/yIAqgDUALwAoQBnAOn/hP+g/wMAOwA0AOj/Tv/I/sn+Nv+w//r/PQCdALsAdwBqAN0AagGtAaEBawElAaYACwDC/+j/IwAmAAYA3P+R/zf/Cf9L//3/lgClAIoAZgAVAMv/1v8HAD0AegCyALUAfQBJADYAHADC/0v/GP8a/zz/d//K/x8ANwAQAAIACwARADEASwBIAD8ALgD0/3n/Fv9F/+H/QQAXAOH/7f/p/7b/nP+q/7//x/+z/6H/kv9u/03/Sf9b/23/if/K/yUAcwBoADQAWgC3AL0AgwB4AJ0AggAUANr/BQAGALX/iP+e/6L/iP+X/9X/5v+8/67/wP+X/1b/d//b//T/1P/a/wgAIgAcABsAKwAUANH/wv/J/4//Yf9//7L/5P8XABwALgCLAN4A7QDXAM8A9ADnAHkAIQAjACIADQAbADIABwDC/8b/6f/c/83/8/8VAP7/y//A/+T/3/+y/77/FgBeAG4AawByAHgAVQAoADsAcwCOAHwARQDI/zX/F/94/7z/yv/u/xIAIAAdABMAKQBRAGcAbABWACgAFgAEAML/d/9s/4v/mv+U/6L/uv/B/7n/sf+l/7f/AABHACcA5f/u/w0A4/+Y/6r/EwA7ABoAGQA0ABcAzv+t/7v/zv/X/+D/BgArABAA+v8uAGgAcgB3ALMA8ADUAGYAHQACANH/gP9E/03/i/+N/1P/Sf+b/87/vf+S/5n/zf/k/+X/6P/g/+3/8v/e/9f///9FAEIALgAVAAYA5v/p//X/0//H/+//HAAzACUADgAeABoAFAArADwAVAB9AFMADgAZACkA9f/1/0sAfgBWAAEArf9z/2//uv/2/97/uv/k/xwAMAA5AEgAXgB7AKYAxwC9AKgAmQByAFYATQAwAPD/uv+y/53/Xv87/1L/ff+u/9z/+v8JAP3/5P/4/zcAZQBMAAwA3P++/43/Yv9x/6D/xv/4/zgASAAiACgAYgB4ADoA9P/F/6T/jP+g//X/NAAtAAMA7v/6/xkALQAsAA0AzP+F/2D/cP+S/6z/5P8cAAMApP+T//7/hQDZAPoA3ACDAB8A3v/J/8b/vv/B/8P/n/9//4L/i/+O/6r/8f9QAJoApgB0AE0AWwBWAA4Aw//T//L/vv90/33/vf/C/6L/q//M/9f/7/8gAC0AGgAmADoADQDE/7b/2v8IACIAHQAFAAEAAAD8/wIAFgA6AEIAGgDy//L/9v/f/9T/8v8SABMACwARABUAJQA7ADIA+/+//6r/pv+t/8n/+/8jACIAHAAsAEAAUABkAGEAQgAkAA8A8//G/5r/kv+4/+7/CwALABUAMgBGAEkASgBRADsAAADm/wUAHQAHANz/r/+S/5b/yv8LACsAGgDi/8v/2//i/9X/6P8dACoA6/+v/77/9P8OAA4ACwAJAO//wv+2/9b/AgAbABEA/v/7/+z/3P/f//n/EQARAP7/6//l/+v/8P8LADEAPgAgAOH/u/+//9j/5//6/x8AMgAbAA4AJwAxABUA/P8DAAYA3/+r/6T/s//A/8v/2P/t/xwAQgBPAFsAdQBwAFcAKwALAPn/6//u//v/EQAaAAcAAQDz/+D/7P/W//D/EwAgAAgA4/++/5//h/+D/6f/1P/q/+z/+v/8//3/BAAPABgAIwAVAPn/6P/p/97/v/+u/8X/9f8YAC8ATwCDAKMAdwAlAPT/8v/1//P/+f8FAPb/0f/E/9n/9v8PACwAOwA3ACUAEAAIABAAGwAWAPn/3P/W/+n/+/8AABAAIwAiAAQA5//h//n/HQApAA0A6f/Y/97/7P/8/xQAMAAyABEA6f/n//7/BQDv/93/z/+3/6X/qf/D/97/4P/I/7j/wv/U/9//8/8KAAwA/v/z/wAAFQAfABwADwD5/+7/9f/1//X/AgAVAAgA6//n//b/+P/o/9v/5P/8/wIA//8JABsAKgAfAA4AEgAiACwAEADo/93//f8NAOn/wf/A/9z/6//r//H/+v/y/97/1v/c//H/EgAqACQACADt/+T/5//1/xMAKQApABwADQAJABIAKQAtABwACwD7/+P/z//S/+z/+//1/+r/3f/Z/+H//v8QAAkA8v/i/9r/0P/a//7/IQAtADEANgAyACgAKABGAHAAeQBZADMAHAAHAO7/5v/s/+z/2P/I/8b/yf/L/87/0v/T/8z/xf/G/8T/yP/T/9z/z/+y/5//rf/K/97/6//5/wcADgABAPb//v8FAAIA8v/h/+f///8OAAwAAgD8//r/+P/s/+v/AQAbACsAKwAiAAQA3f/L/+P/FQA/AEkALQAAAOP/0//T/+P/AQAUAA8ABwALAA4AAAAAAAwAGQAdAB8AIQAWAA8ACQAIAAEA/P8DABUAIQAmABgABgD6//X/7v/p/+j/7//x/+j/0//U/9X/0v/V/+L/5//i//P/BAANAB4AGgAMABkACAAFAAAABgAJAAkABwABAP3/+f8BAAMAAQAAAAUAAwD4/+3/6//v/+//8P/2//v///8DAAIABgATABUAEAACAPz/+f/1//P/+f/9//r/9f/2//n/9v/0//f//P/4/+7/6P/n/+b/5P/n/+z/5//k/+H/5v/v////CgANAAwAAAD5//H/7//y//f//P/7//X/8f/1/wEADAAPAA8ACQD+//f/9f/6/wAABwAIAAAA+//1//f/+v8BAAUAAwAGAAIAAwADAAQACQAOABAADAAEAAIAAQD5/wMABgAAAPr/9f/6/wMABQAJAAcACwAMAA0ACwAEAAcACQAFAPr//P/4//X/8v/y//f/+v/9//f/8v/3//z/AQD//wUACwAIAAMA///9/wEAAwAAAAAA/P////z/+v/8/wEAAQD+/wQAAwAIAAoABwAGAAYAAwD+//3//P/z/+7/8P/0//n/9v/x/+7/7//1//r/+v/8//v/+//0//D/8P/w/+z/6v/t/+f/5f/l/+f/5f/g/+H/4//q/+//9P/3//n/8v/s/+f/4//o//H//f8EAAoACgAFAAgADwAaACEAIAATAAMA9P/o/+P/5//x//X/8//w/+v/7v/x//r/AgAHAAYA///2//r///8HAAsABgD///7//P///wUABwAHAAsABwACAAEABwAOABMAGAAUAA0ACwAKABMAHwAiABkAFgAUAAsA+P/k/+b/7f/v/+7/7v/v/+H/2v/Y/9z/4v/0/wMAAQD6/+//6//q/+7/+P8CAAYACwAAAPP/8P/y//r//f/6//z/+//6//b/AQAHAAYAAgD8//T/6f/x//j/AQAUABEA+v/5/+L/4v/h/+z/+P/+//3/7//k/+L/6//1//3/AgADAP3/9f/z//j//v8CAAAA///8//v//P/7/wAABQADAP///f8CAAEAAAADAAgABgAGAAYABgAFAPv/9//3//3/+v/8//7/+v/9//3//v8CAAMABAD+//r/+v////7/AwAJAAsAEAAJAAYABAADAAUABwAKAAcACAAHAP//9P/1//v/AgAGAAYACwALAAgABgAFAAcABwAIAAAAAQD+//n/+f/5//v/+//8//3/AAAFAAUAAAD8//n/9f/8//r/+f/9//n/9v/2//P/+f/7//r/+v/8/wAA//////z/AAD+/wIA/f8CAAIA/P/9/////f/8//7/AQAAAAMAAgAFAAgABwAGAAUAAQAGAAQA//8AAAAAAgD+//3/AAD+//n/9///////AwAAAPv//f/8//7///8AAAIA/v///wEA/v///wAAAAABAAAA/f/6/wMAAwAAAAIABwAEAAAAAgAAAAEAAwACAP7/+//7//7/AQD+//v/+//8//n/9//7//3/AAACAAMABQAGAAMAAgABAAAAAQACAAYAAgD7//3/AgAAAAQABQAGAAUABAAAAAQA/v/+////CAADAAAA///2//3/+//+/////P/8/wAA+f/3//n/+//9//3/AgAGAAMABAADAP7//f/8//z///8FAAQA/P/2//L/8f/z//v/+v/5//7/+P/7//3/9/8BAAAA/v8DAAIA/f///wMAAAD9/wIAAgAGAAMA/v////3/+//3//z//v/8////AwADAAMA/P////3//v/8/wAAAgAGAAoABQACAAAAAQADAAIABAD///z/+f/0//f/9v/7//j/+//4//v/+P/1//X/9//9//b/9//0//T/9f/0//f/9//1//b/7//w//L/9f/y/+//8f/y//b/8P/q/+z/7P/w//T/8P/1//X/9f/1//3///8BAAEA/v/9/////P/6//v/+P/9/wAAAwAAAAIAAwD///z//P/6//r//v8CAAEAAgABAAAAAQAEAAIAAwAGAAQABAADAAYACAAEAAUABgAAAPz//v/0/wAAAgAFAAYAAAAAAPv//f/6//3/AQAJAP3/+//5//T/9v/2//z/+//2//z/+f/2//L/9f/y//H/9P/0//H/+P/1//T/9v/8//3//f/4//T/8v/v/+//8//5//X/9v/3//f/9v/2//X/8v/1//b/9//5//n/+//4//z/+f/4//b/+//7/wAAAQAAAAQAAgD///////8CAAwABgAGAAcAAAAAAP3/AgD+//v//f/4//7/9v/0//P/9v/4//n/+P/8//v/+//7//n/+f/4//7//f8AAP///f/6//z/+//4//v//f/+//b//P8CAAUAAgD8//z////7/wAA/f/6//3/+v/0//X/+P/5//z/+//5//X/+v/7//3/AAACAAAAAQADAP7///8DAP//+//7//v//v/6//v/AAADAAMABgAHAAIA///9//7//f/5//r//f/8//j/9//3//X/9v/2//P/9v/1//X/8f/z//H/8v/1//T/8//y//P/9P/z//L/+P/4//n/+//+//z/+P/0//f/9P/0//j/+v/7//v/+//8//z//P/7//r/+f8CAAAAAQD//wIAAwD///v//v/8//n//f/4//7/9P/z//f/9v/4//3/+v/7//j/9f/2//j/9//7//v//P/9//v//f/9/wAA//8AAAIA/P/6//7/+v/7//z//P/+//T/+f/8//z//P/8//z//P/8//v/+//4//n/+P/2//f/+v/4//n//P/3//n/+P/5//n/+P/2//f/9f/1//X/9P/2//n/9f/1//f/9P/4//f/9//0//f/+f/2//n/9f/2//X/9f/1//T/9P/x//T/+P/2//T/+P/3//X/+f/5//n/9f/2//r//P/6//n/+f/8//j/+//4//n//P/4//T/9f/0//b/9f/y//j/+v/6/wAA///8//v//P/6//j/+//5//z/+v/9//z//v/9//v/+P/4//j/9P/2//f/8//0//f//P/8//r/+P/2//b/9P/7//v/9//5//r/9P/4//T/9f/5//r//f/7/wAA/P///wEA/f/8/////f/8//z/AAD6//n/+P/4//v/9f/3//X/9f/4//r//f/8//v//P/2//n//f/7//b/9f/3//H/8f/x//L/8//z//X/+//+//f//P/6//n/+P/3//v/+P/7//z//P/7////AAD9//7/+f////7//P/7//r/+//3//z/+v/7//n//P/+//3//f/6//v//f/8/wAA/f/6//v/+v/6//7/+P/8//7/+//2/////v/8/wAA/f/9//7//v/9//z/AQABAP///v8AAAIA/f/9//7////+////AgACAAAA/v////v//v8AAAEAAwAGAAIA+//5/wEA/v8BAAEABgAGAAQAAAABAAAAAgD//wMAAwABAAIACgAGAAQABQAIAAcABwAAAAIAAQACAAcAAQAKAAEAAQAEAAAA//8IAAAAAgACAAAAAwACAAQAAwACAAIAAgD9//3///8BAP///v/9//z//v8AAPv//P/+//z/+//2//j//v/+//7/+/8AAAIABAAAAAIAAgAGAAMA/f8BAAEAAAAEAAUA/v8CAAIAAQABAP7///8DAP7/AAACAAAA//8BAP//////////BAABAAIAAwADAAYABgAEAAEAAwADAAEA/v///wAA///+/////f/+/wEAAgAAAAUAAAABAAEAAwAHAAUABwAGAAgACAAEAAUABwAJAAgABQABAAAA9/////7/9//8//3//P8EAAUABgAFAAMAAgD9/wMAAQAEAP//AAD//wMAAQD+//7/AAACAPv//v8BAPz/+//9/wQAAQACAAEA/P/+//v//P////7/+/8DAPn////1//n///////7/+v8FAAAAAgAFAAEA/P/+//3/+v/6/wEA+//4//v///8FAP//AwABAAEAAQABAAAAAAD+////+//5//z//f/8//z//f/4//b/9P/z//L/8P/x//X/+P/0//f/9v/3//T/8//z//H/8//z//b/9//9/wEAAQAFAAIABAAAAP//+v/4//n/9//4//b/+//8//z/AgAEAAUA/v///wEAAQD+//7//P/+//3//v8CAP//AAADAAAA/f8FAAMAAAAFAAMABAAFAAUAAwAAAAIAAQD+//7/AQAIAAYABQAAAP3/+v/6//v/+v/7//j/9P/x/+//8P/u//X//P////z//P/9//j/9v/1//f/+f/7//v//v/+/wIAAAD+//z/+f/3////+v/2//n//v/9//7/9//z//T/9v/8//X////5//f////5//P/AAD2//n/9//3//f/+f/4//b/9P/0//X/8//2//P/9//1//b/+P/3//f/+v/2//j/+f/8//z/9f/6//7////+//7////7//v/+f/9//v/+//7//r/+P/y//H/8v/3/+//9P/5//n///8EAAEAAwAEAP7//P/7////BAD+////BAAEAAcAAgABAP3/+/////7/AwD//wAAAAD+//z//P/+/wEABAAFAAYABwAEAAIA/v/9//3//v/2//j/+v/2//f/+P/4//3//P8AAP3/AQACAP3//P/5//f////6//j//f/8//b/+f/2//z//P/5//v/+v/8//v/AAD7//7//P8AAPz//v/+//n/+v/8//n/9v/3//r/9//4//r//v8CAAAAAQD///3/AAD9//r/+//4//b/8P/y//v/+v/1//P/+v/3//v/+f/3//v/9v/4//r/+v/6//n//f/+//v//P/9//v//P/9//v/+f8BAAEA//8EAAYABgACAAMAAQD//////v/4//T/9f/5/wAAAgADAAUACAAFAAAAAQAAAP//AQADAAUACAAFAAYABwAGAAcABwALAAUA/v8AAAYAAwAFAAQABAADAAEA/P8BAPz/+v/5/wIA/P/4//n/8f/5//f/+P/4//L/8v/2//D/7v/y//T/9v/4//7/AgAAAAIAAQD+//3/+//7//7/BQAFAAAA/f/6//r/+v8BAP3//P////v/AAD+//n/AQAAAP7/AQD///n/+v/9//r/9f/6//n//f/8//j/+v/5//f/8//5//v/+f/8/wIAAQACAPv////7//z//f8CAAAAAQAFAAEA///+//7/AgADAAQA///9//n/9P/4//b/+v/4//v/+v/9//r/+v/4//n//v/0//j/9v/2//j/9v/3//f/9//6//X/+f/9/wMA/f/2//n/+/////j/8//4//f/+v/7//X/+v/4//f/+f8BAAAAAQACAP///P/+/////f/+//7/BAACAAIA/v8BAAEA///7//3/+v/5//v/AAD+///////7//z////8//z//v/+/wEABAAGAAUAAAACAAIA+f/1//r/8f/7//3/AAABAPr/+v/5//r/+P/7/wAABAD3//j/+v/4//z/+/8AAP7/9v/9//v/+P/0//b/8v/w//P/8v/w//X/9f/2//X//v/6//r/9f/y//P/8P/y//X//P/5//r/+f/5//n/+P/4//X/+P/5//v/+//6//3/+v/+//z/+v/8/wMAAQACAAAAAQAHAAAAAgADAP//AAALAAQAAwADAP7/AQAAAAUAAgAAAAEA+/8AAPf/9P/2//f/+f/6//v/AQAAAAEAAQAAAAIAAgAGAAQACwAJAAYABAAEAAIA/P8AAAMABAD3//r//v8AAAAA+v/9/wEA/P8DAP///v8BAP3/8//0//X/9P/y//H/7f/p/+//8P/0//v//f/8//3/AQD5//3////6//r/9v/3//j/9f/4//z/+//+/wEAAAD6//3//f/9////+//8//z//f/7//v/+f/3//j/+P/2//v/9//3//b/+f/4//b/+v/7//v//P////r//P/+/wEABAAGAAUAAQABAAIA/f8CAP7/AwAGAAQAAgAFAAMAAgAAAAEAAgD+//z/AgACAAAA//8CAAEA/P/4//z/+v/1//v/+f8AAPj/9//7//v//P8BAPz//v/8//z//P/8//3/AAD//wEAAgD9//7/AwAGAAUABwAGAAEABQAIAAUABgAIAAIAAgD+/wEAAwACAAEA/f/9///////5//n/9//6//f/8f/0//b/9v/2//b/8v/0//T/9f/0//D/8v/1/+//8P/y//H/7v/s/+r/6v/s/+v/7//v/+3/7v/u//P/8v/z//H/8v/z//X/8//z//f/9f/3//v/+v/4//v//P/6////+f/7//3//f8CAAQABgAFAAcADgALABIAFAATABMADgAHAAcAAAAFAAUA/v8FAAQAAQAGAAIA/v/+//3/+//3/wEA/v8AAPv////+/wAA///7//b/9v/3/+//8v/x/+z/7f/t//H/7v/t/+v/6f/r/+r/8v/0//L/8f/2//D/9P/r/+//9P/2//f/9P/8//j//P/+//3/+/8AAAAA/v///wQAAAD9//z//f8AAPn//P/5//n/+//9//7//f/9//7/+P/5//3//v/5//j/+v/1//L/8f/y//P/8//y//j/+P/z//b/8v/z/+//7//z//L/8//y//T/9P/4//j/9//5//L/+f/6//j/9f/1//X/8//0//L/8//y//L/9v/2//b/8f/0//n/+/////7//P////3/AAAFAAAAAwAHAAUAAAAJAAkACAAMAAkACAAKAAkABgAFAAsADQAKAAkACwANAAUABAACAAEAAAAAAAEAAAD9//r/+P/y//T/9v/2//r//P/4//H/7v/z/+//8P/x//b/+P/2//L/8v/w//P/8f/1//T/8//0//z/+P/w//T/9v/4//j/8//0//P/9f/6//X//v/1//b/+//5//b/AAD6//z/+//7/////f////////8AAP///P/9//3//v/9//z/+//7//3/AAD8////AQAAAP//+P/6//7//f/9//r/AQAAAAQAAgAFAAQABwADAPv//f/8//n/+f/8//b//f/+//z/AAD+//v//f/6//r/+f/5//v/AAAAAAAAAgABAAUAAAADAAEA//8FAAUABAAAAAAA/v/6//n/+v/9/wIAAwAFAAcACgAJAAkACAAKAAMABAD+//z//v/7////AAADAAUABAACAAEAAwAGAAMAAQAAAP3/BwAEAP//AgAAAPr//f/8/////v/6//j/9f/4//j//v/5//7/+/8AAPz//f////v//P/7//v/+//4//v/9//4//f/+//9//r//v/8//v/AAD+//r//v/7//v/9f/5/wEA///6//f//v/7//7/+//7//7//v8AAAAAAwACAAEAAwADAAEAAgADAAMABQAFAAIAAAAGAAYABgALAA4ADAAJAAcAAwAAAAIAAAD8//v/+//+/wMAAgABAAEAAgD///v/AAABAAEAAgAFAAUABgACAAMAAwACAAMABAAHAAMA//8BAAYAAwAFAAIAAgD///7/+v/+//r/+v/5/wEA/f/6//z/+P////z////+//j/9//9//f/9//7//v//f/9/wEAAgD9////AQD//wAAAgAAAAEABgADAP///f/7//z//P8CAPz/+v////v//f/9//f//f/9//n/+//6//j/+f/7//n/9f/7//v//f/8//j/+v/7//v/9//8//3/+//8/wAA/f////v////7//z//v8BAP7//v8BAAEAAgACAAQABwAHAAUAAQACAAEA/v8CAAEABAAAAAIAAgADAAAAAAD//wAAAwD6/////f/9/wAA/v/+//v/+v/9//r/+//+/wYA///4//v/+/8AAPj/8v/2//b/+P/4//L/9//1//T/9f/7//j/+f/8//n/9v/5//r/+P/7//r/AQD+//3/+f/8//z/+//6//7/+//7//3/AAD+//3/+v/2//j//P/7//3///8AAAIABAADAAEA/P///wAA+v/4/wIA+v8AAAEAAwADAPz//v/+//3/+v/9/wIAAwD5//v//f/9/wAA//8EAAIA+P/+//v/+P/z//X/8//y//X/9f/0//j/9v/2//X//P/3//f/9P/z//f/9v/3//j//f/4//b/9v/1//f/9//5//X/+P/5//n/+P/3//r/9//6//j/+f/7/wIA//8AAPz//P8DAPz/AAAAAP3//P8HAP///v////r//v///wQAAAD7//v/9//9//X/9P/4//f/+P/4//n/+//5//n/+//7//3///8BAP7/AwABAP7//P/8//v/9//7//z//v/0//X/+P/6//v/9f/6//z/+v8AAP7//P////v/8v/2//b/9f/0//P/8P/t//P/9f/3//z//f/8//z////4//r//f/5//z/+f/5//v/+f/6//v/+P/6//3/+//z//n/+v/7//7/+f/5//r/+v/3//b/9//1//X/9f/z//j/8//0//X/+P/2//P/9f/1//L/9P/5//P/9v/4//r/+//+//z/+P/6//v/+f8AAPr///8CAP7//P8AAP7//f/8////AQD///r///8BAP///P/9//z/+P/2//j/9v/z//n/9//6//T/9P/1//T/9f/3//P/9f/0//L/8//y//X/9v/1//n/+v/z//P/+P/5//r/+v/3//T/+v/6//n//f8BAPn//P/8//z////+//7/+v/8//7//v/3//n/+P/8//n/9//5//z//v/8//z/+v/7//r/+//8//b/+f/7//X/9//6//j/9f/1//P/8//0//D/8//y//T/8//w//P/9//0//L/9f/4//f/8v/0//n/9P/y//j/+P/2//j/+f/4//z/7v/x//X/8f/0//b/+P/z//X//P/7/wAABQAGAAgAAAD5//z/9//6//n/8//9//r/+f////z/+P/4//r/9v/0//v/+/////z/AgABAAgABwAEAAQABAADAP3///8AAPj//f/9//7/+P/6//j/8v/2//T/+P/4//f/9v/5//D/8v/r/+7/8v/y//L/7v/3//L/9v/2//b/9P/5//r/9//4//3/+f/4//b/+f/8//b/+P/2//b/9v/4//z/+v/6//v/+v/5//z//f/7//v////6//f/9f/3//j/+//6////AAD9//7/+//9//r/+P/5//j/+v/7//7//v8DAAUAAgAFAAQABgAFAAMAAgABAAAA/v/+//z//v8AAAIABgAKAAgAAQABAAUABAAFAAMA/f/9//z//P/+//b/+f/+//j/9f8AAAEA/P/7//f/+P/7//v//P/5//v/9v/1//P/9f/9/wAA///3//L/9f/5//r/+f/8/wAA///5//r//v/4//3/AgD9//X/8//3//X/9v/2//3/AgAGAAQAAwAEAAcAAwAAAAAAAQACAAYAAQAAAAAA///8/wEA9//x//L/+v/6/+//9f/x//D//P/9//f/BgD9/wMAAwADAAUACAAIAAkACQAGAAYABwAJAAYACgAEAAEAAwADAAMABwADAAgABwAKAAkAAwAKAAgACAAFAAQAAgD//wMAAwALAAcAAgADAAMA///1//b/8//7//j//P8AAP//AwAHAAUAAwAFAP7//v/9/wIABgD7////BAAGAAsACAAFAAEA//////7/AgD+//3/+//5//X/+P/9/wEAAwAFAAgACQAGAAMAAQD//wcABAD6//n/+v/6//f///8CAAUABQAJAAkACwAJAAUABgAEAAIACwACAAMACAAFAAEAAwD+/wUABQD//wEAAgAHAAUABwAEAAgACAAJAAEAAwABAPr//v/+//v//P8AAAIA/v8AAAAA//8CAAEAAAD9/wAABgAEAAAAAgD+//3/9v/0/wAAAAD7//v/AgD8//7/AAD//wUAAQADAAUAAwAFAAIAAwAEAAEABAAHAAkADAAOAAsACQANAAoABwAJAAwACwAGAAoACAAIAAgACAADAP7///8CAAcAAwD+//3////8//f//v8DAAQAAwACAAUABgAAAAAAAAD+/wEABAAJAAYAAAACAAgABgAHAAMABgACAAEA/P/+//r/+v/8/wAA/f///wEA9/8AAP7//v/5//P/+f/9//X/9P////7/+v/6///////1//j//v///wEAAwAHAAoADAALAAIA+v/y//T/+P//////AQAJAAUABQAFAAEABgD///v//P/5//L/9f/+//3/9v/8////+//1//T/+//6//j/8//5//v/+v/7//3/+//+//v/AAD//wIAAgAFAAIABQAHAAQABgAFAAsACgAGAAoABQABAAEA9v/9////AQD9/wIA/f8EAPv/+P/1//r//v/x//X/7//z//f/9f/1//P/8//4//L/9P/3//r/9f/y//f/+f/+//f/9P/5//n/9v/5//b/+P/z//L/9P/5//X//v8CAAAA/P///wIAAAABAP3/AgABAAAA/v/+/wEA+v/6//3/+v/4//n/+v/0//P/8v/x//j/+v/6//3/BAAGAAYABgAFAAIAAQABAAEA+//1//j/7//9//r/+f/8//X/+P/0//f/8//z//b//P/v//H/9P/v//H/9f/5//f/8P/2//f/9P/w//H/7//v//D/8P/v//X/9v/3//X/+v/8//3/+P/0//n/9//2//3/AgD7//f/9P/x/+//8f/y//H/9f/3//z//P/8//3//P////v/+f/4/////f//////AQAEAP7//P/7//n/9/8AAPb/+P/3//f/9v/0//7//v8AAP//+f/8//j/9v/0//f/9v/2//D/8v/t/+7/7P/u/+//8P/2//X/+v/7//z/+v/8//z/+P/7//v//P/x//b/+P/1//L/7v/w//L/7//4//f/9v/6//j/8P/z//f/9v/0//L/8P/q/+7/7//x//T/9P/0//X/+f/w//P/9v/1//f/8//z//P/8v/z//X/8f/y//f/9P/v//P/9f/z//b/9P/z//L/8P/y//D/8f/0//f/9v/1//j/8f/2//T/9P/y/+//8//2//X/8v/0/+7/7//y//b/+f/8//n/9//2//r/9v/6//b/+/8BAP///v/+//r//P/4//r//P/8//X/+f/3//r/+v/7//b/9P/y//T/8v/z//b/8v/6//f/9f/1//P/9f/6//H/9//4//n/9P/1//n/+//2//n//f/1//T/9v/8//r/+P/3//b/+//6//f//P/5//X/8//0//b/9v/1//f/8P/x//P/9P/y//H/8v/1//b/8//z//f/+f/5//b/9//1//X/9//5//P/8v/3//X/8//1//j/+v/9//v/+v/7//j/+v/4//j/+P/3//f/+f/3//P/9P/1//P/8f/y//b/8f/x//T/8f/w//L/9P/z//X/9P/2//n/+//7//v/AQD7//3//f/8//j/+v/7//r/9v/3//f/8//4//j/9P/5//P/9P/+//7//v/8//3//f/7//3/+v/+//3//f/7/wAA+v/4//f/+//9//3/AAADAAIACAAHAAoACAAHAAkABwACAAUABgAHAAcABwAKAAYACgAIAAcACAAJAAcAAgAEAP7//P/9//v/9v/2//z/+f/3//v/+f/5//n/+f////////8FAAgABwAHAAoACAAEAAMABAD+//z/BgADAAQAAQD5//f/8P/2//n/+v/v//X/+f/7//7///8FAPn/+P/4//f/7//n/+j/8f/z/+3/7v/p/+v/8//4//H/9P/9/wQABgARABwAGAASABgAFwAVAB4AKgAkACQALwAyABcACAAPAP//CgAmAC8AKwAjAAEABwARAOv/AwD2/+X/AgDh/8//3//D/9v/8P/Q/+b/3P/Y/+n/2f/i/+T/8f/y//j/AwDm/+3/3P/K/9r/4v8JAN3/iv//AKMD8QQ5AuT/xgCvAOf/Lf//ALX/yv4nACr+Ef6I/zD+Qf05//r/Tv9F/2j/qv9UAHkAcgCJAGoAOADv/8z/4P9NAMQA5P9sAB0BLAAPADwAJgFQAGgAkQF7ALcArQDi/k3+BADD/9P/ugAWAHH/Sf80AKb/hv5y/yAAyv6S/xAA2P6X/4EAYAB5/xEBjgHw/4kBIAIJAZ8BFQK0ANj/9gB6AL3/ewA7/1n+m/+cAScA8P0CAYoAu/8eAVYBfQFjARwBCv9V/zoAFf+B/4YARv8J/zgBpgBYAAcCjAAe/5AAoP8B/4j/e/9FAXwAdADrAIL+2/+LAO//dAHqAK0A3AChAbkBd/+eATUBnv+MALABpQE3/zQAh/39/B/+8Puo/M39mPwR/PT8Lf0a/r3+2P5o/wMBrwHAAGoCcgOuA3IDMQJwAgYCtgH3AVgCaQIwAWD/LAARATQAHwBU/0v+Kv4v/j39tP3X/ST9Jf2b/u7+T/67ACkCFQJ9AZsCJQMAAksD0wJIAcsB3wHRAAf+ev7v/tn8L/1Z/UL+wP7w/jf/vf5e/07/dP9wAPEA+gFkAkECJQPPA4MD0gDk/8kBRgCC/RT9Dv2+/fn90v3i/Oj74vzU/d/9Pf4cAbwBqgAQAVcCDQIcAwoDvQArAjsDOwHt/84CHAGX/ysAmf8C/wX/4wBe/4H9J/8q/0L+M//A/sP+Hv9WAO//of9eAZEAGwAsAGUAGgCh/6//Uv9lAO8AgP9J/zcAxgDP/z7/YgBX/+D+mv+E/+L/VP8b/9f+J/9ZAI4AGwEgAUgAyv8tAKr/d/8w/1L/fwCq/97/d/9a/0ABkwB4AK8BlQFSATwChwI7ARMBpgALAI3/Yv4e/zQAiP98/uT+Mf81/6b/vf6D/iX/Pf/i/j//KQEBAVIByQHt/9YANAEjAHYA4QAOAkMB/P/t/77+l/1c/ff87/z4/a7+2P79/g8AOwCo/lj/qgCLABQBrgGfAHn/yP9h/xL/MQDd/6b+IQB0AG8ASAFYAXUAJP/Z/0v+z/2f/rv+3P5F/5wBRgLCAZgCvgLmAeoBBwJdAaz/nAD+AHD+H/5a/oL9PP06/lf9q/zs/YT9g/2k/xEBhwEVAxkCHgKeBAQEiAMaAyEDHAJM//X+1/++/+D9Tv3k/Az9U/2P+7z7Kf0d/vL9R/47/7T+Z/7T/XX/TwKhAUEBKwJ4AYIBMQLCADgBMQIEAqwBTQB6AVEBkv8PADP+av0p/3v/vP6p/skANAH0AOL/P/6G/jb/PAFcAgUC4wCP/tX9/vxT/G3+LwF4AtYBiQH9/w7/Mf9G/nX9Fv0+/zP/zP1G/+T+n/4aAIwAAwJ4AqQAzwDyAJoAiQH//77+ngCV/679hf7f/4MApAAoAdMA1P+i/gD+v/yq/G7+s/yc+pD72v0+/wz/yACcA3ADuAFpAQACBQI6AsYB9v7S/QH+/Pwa/Ln7hP2G/zL+yf1sAccD/gPDAkEAmQB+AHD+hv74/pj/vQCr/hX7dvw9/+H96f0zAcEDwAVQBZICGQLNAXEAUQAvAGEA9AFDAU392PwI/sP9zv/WAWoCPgQJBIMB7wD5/6b/RAKxAtwAIAH+AW8ABACBAA4BGwMbAmn/2f0I/Tb9D/7x/dj91f/Y/9P+Hv91//IArgNgBEwDngPlAXX9sfo1+VD5W/vr/e4AVwJdALv/EgJOAJL8mv3k/1IAz/85/hP+GP4u/a39kv2M/Vr9YP24/h0AWgFqAZ4BTABO/uX9tvt4+1r/4AKQAXz//QIPBHMAsv5t/nj9tf2K/y4AOgCu/9oAiQGP/qL/1gMaAlr/BQErBDwFSAKlAV0EUwPT///9qf2a/9QCcANpA/EDnQO5BDwC9vwF/UcA6QCq/Ez8JwFUAkj/gP3f/Gr8o/8PA1QC6QGRA0oEYgHV/dL9/v4gANv+9/xM/UP9qvzf+3384v2GAdUEGQNlAk8EpQSDAYD7iPmD+/j5Gvj9+wcB9QRWB/AFoQYwB4sEBAILALb/6P9mAGQBEf8D/Qb+Gv3o+hH60/v2/qsCUwU9BZ0FVAW9BIAChf6//pj+ff1j/S79Xf4Z/wQArv+E/Tn9W/2q+7X54fja+N/4b/i4+Bj7fv64/2P/xACTAPb/TwFuADQA4AMrB30HegZsA08AF/48+j72W/bw+c79kgBkApUGhAr/CD4F7gSzBbgFvQTfA8UFUQYIBTEEJgKpALMBOQNvBF8EpQSrBGsDQQPYAav/WP7R/5gCaAITApwCRgLpAMf+Tvye+578kPyU+zP6G/rX+yD7C/jL9xH6tPjv9gL5rPoR/Jr8yvth/F38mfpH+g38Dv+cAQwDdAPyAcb+cPsr+gX79PxxAFIEdwYQCSgL0gmBBl0E0gRtBf4DpAOIBqMHmwXZAp8BagKIA5cDFQPRBQgJ4AhsBoQEHASaAigAcf4s/3v/1P2A/Mb7Ivyd+yv7zPvF+7/7M/uM+bT3QfZs9UL1IfYx+Mv5z/iM+Az6cfkn+Rn6IfoE/OP/pgEKApgD/wSJBQkFtAMSAxkCDAFmAkEEtgQrBVMG2Qa3BhcH0gUWBBoFDwcZCLIGtQXKB0YHIgQiBGkFUQUiBVsESgRzBREE/ALNBNAERQJBAB0AV/+r/Ez6H/mN+EL3xPZn+Iz5DviR9YH0q/Sx9PPyCfIM9tD5Rvol+2f9iQBdAb/+//52AnkC6v8r/7EAigKFAer+Qv85AHn/E//q/d79ZwFSAwYDAAadCXcJ4gf5Bk0HEQmSCGAHkQjiCKEH/gVZBcQFpAUtBJcC5QKjAyIDfwLAAjED2gJ4AakAAgHj/xz9zfot+fP3/Pab9QP0nPMa9JD0b/SV8/XyIvJq8PrvGfQw+iL98v8JBowL5wzmCvAJBwqxBvIBUP8E/Rr6hPf09o34WvlO+rj+gQSBBnkG8Qg6C1UKwwj7B4YI1QlNCA4GcQbdBZcEAQSjA0oFFgaBBKwEJAVnA58B+gAfAe8AOQCAAPUBvgLOAHv+Pf7k/L/4h/Sf8T7wMe8c7jbuoe7x7irv4O1T7NnvKvgz/xgDbwkJEz4YxhdXFnwVgRKpC58Dlv1i+C/zIu+A7fjvgfQp+A79bQOSCCwMcA4LD7cOgw2fCtwG+gNwAez+Pf28+3P7wf0FADIC2wR2BxgK0Aq8Ca8I/gbBA6QAdP7a/OX7zfuU/NX9QP/c/1f/yv0K+iX1T/Gk7BLoF+bp5ETlIeje6UvsXPRg/9kIGxA/F0wflyJaHgEaHRcPD3sEsPxW9ZvuC+q/5zrqU+5s8aH4hwEmBl4KLA+AEMQP0w09DDcMIwnAA2UB5/+R/N35NPmw+gH+ggDqAecFaQvEDUEMsgohCpUHqQMpAXMAFgAg/z7/dAENA0MCNgHCADH++/dl8CXqYeXb4Dbd39vY3d7iTOmW8Bz7xAjWFJMdxyTtKKYnxyF/GfoP6wWN+/7xYeut53Ll9OUG6hjw6vWn+xoCAwcRCXAKRgsCCnII5geOB3oHzAWKAoEAK/+S/ID6r/o//Y8BrQR4BjIL7Q9QEDgPdw5yDbcKnAZEA7YBsADB/6kAYwIVA8wC7QCP/Uv4m+8D57/hF94v2xzaEtxU4m3qsfCt95gDsRH5G74hNCYVKZAm5BxFESUIq/6A81Drc+eU5qPoiOt+77X2bfzK/qYCigXVBH4FDwbtAk0BvwIoBIsE+ALFAdgCkgIS//z8YP+YAxcGZwelChoQ4BMPFPMSqBLAEdsNIghrAyEB1/9V/f/7j/39/gL/+fy/+DX19O/l5lngpN1g20Xc+9//5N3tKPch/WwEaA9+GWQfZSH8IQ4i8hxjEUoGgP5R9oTteOde5hTpN+yc7p7zPvvWAAsDAwQjBFIDXgHO/XH7o/wC/94AWwKkA9sFAAhMB9oFmQZnCAYKBQuTC6wNshAYEiwRYg/VDkYOQws1B7oE4AP4ApAA4/3U/aP+F/xR9xT0H/Gf61rlNeFq37LgR+T/55btz/Wj/HMBNQcKDyEXIxu+GzEewR5cFxsNrgXc/if2pOyp5kLnoeq66zXuh/WU/EP/OQCKAD//O/19+tv3PvjX+gT+ZAK5BugJ3gwCDlMM8wm4B9AFrATsAywEfQa8Ci4OGg8UEKMSqxPbECQMzQhrB2UFCwG6/SX+uf7S+6P3zvSO8iDvROod5nvlTehl7F7w8vTr+rMAgQOeBNIHEA1jEPAQaRLTFckVKA/XBtABEv3C9Fbs1Olz7KLut+4E8R33bfy0/Nb5yPgp+Zz3j/UM9kD6zgAJBnwJqw2mELcPMQyyB8wCu/6T/LD8mf4GAkAH2Q00E3YVbhajF4gWXxE7C2wHHQU9Atn+j/3j/nf/Mv36+Qv43/Yp9EDwyu4T8YX0f/cb+tn8y/9OAAr8uPfv+Az94f4EAOQFiQ+bE/UOnQp8CQAFHPyT8+/vI/Ev8VHu4u9O9vH5lvnh+Dv5g/kh+JX1WPUA+cv94QFTBSwIjQp/C+YIMAQyAXQAWwCxADgC1gb3DS0TMBXyFp8Yzhd3ExsN5QeeBOYA/fxh+2H8dv4EAH4ATAGMAm0CvQCu/tn8J/yp/P787PsY+nz4D/br8GPrB+ve75vz6/XY/McGbAs3ClwJgQqOCdQDgf2r/D/+xPvP9mv1Q/du93L0kvFl8UPyg/FJ8Njx8vZ4/BkAtAKeBb4HSgfeBGACLwF8AisFFQegCekOpRSwF/MXtRe7Fx0VUg53B6QDHwHz/X/7O/wyAOADfAUbB5IJ5wrtCUwHgwRvAokAf/6r/Gn6xfcP9afwjOmb4+TiP+a+6C/r5PPmAGIIfgmTDMkRXRLYDbUJJAgpBnkBC/tl9qr03fIH7+XrqetJ7RPubO0c77H0qfkt/An/jQKYBAMFEASOAjoCVQPmBC0GEAjqDKoSihQjFHQVoBYOFKQO5gknB84EVAEB/yQAXQMcBikIdgoxDdgO8Q0mC/wHgQQKAFT7Vvh49ibzde8D7njseOhd5Znmbuqq7Q3xuPf6/x4F/QbiCW8ObRDtDeEK3wlrB+AAFPo79hf0TPFB7kPt1e5s8LbwG/Go8+L39Pqf+zL86P0C/wT+Dfyt++/9CgFuAxgG0AoREasVRxfBGOkaSRqSFQIQKQx9CUYGPQPvAlsFUgfFB0UInAmwCuoJQQfUBL0DKQJ6/8/9ZP1i/Hj6Q/gC9XfwLuva5lnlWear6Ons5PLZ+N79IgIBBnAJoQpnCVAILwdkA2j+Wfvw+TT4PvYe9Xf1dvaC9m/1x/Q99ff1cvVE9DL0KvVA9Tv0evSc99j7H//aAigJSRBVFW0YERv+HOUcIRraFRcSbQ88DEcI2gXqBR4G/ARbBOYF2wdCCM0HuwegBxsHQAYDBbwD9wLaAS//mfvw94bzt+6O6sXn8ubj5/jpnO2i8Ur1e/kk/Rf/uwCeAvYDQQR7A98C7AJvAfL+F/6L/SD7Q/mP+IP2rvMW8rXwDe9W7vruRPBE8mj0S/Ya+XP94wGzBf8J+g5ME/YVixfWGDEZJBhNFkgUHRL0D4cN3wqyCHYHvwbwBd4EnwTCBRQHaAd8BxMIvQhCCFYGAARFAnEAD/1f+K70HfLP7qnrlOrd6hzs/u0l73Twk/Nf9u33RPqY/REAMwG7AQ8C/QGgAV8AgP5u/Rz9fPtG+YD4OPhx9jb1EvWp88rxc/E28efwPvIW9WX4PvxuAJsEVglfDiUSiBTSFuYYYxlJGN0WiRVMExQQ1Aw3Cv8HLwbxBIwECQU1BmcHGAgRCJ4HBwdKBroEKwIvAPT/3f9Q/tj76fpD/Kf7FPb98CXwve586tbndukJ7oryDPXG99D8fwDd/7/+0gDqApABsP5F/vv/hAB2/uL8hf1p/Sb6QPb684LyCvAu7UXsL+7a8FTzafd4/RYDmgdtDMwQCROcFIYWyRbYFFMTKxI2D9ULRQreCKoGngUzBvoG3QfWCNIJHwtIDAoMfQrfCIIHgQUwA68BxgDB/63+Kv1p+3j5mPb68hnwZu6o7cHsF+v16ifu+fE49Kv2tvrk/X3+x/5+AegEAQZ4BcsF9AX4A8cA/v28+vb2FfNU7y3tiO2g7bXsvO4P9Df4RPv//+YERghCCzoNuA3MDusPIw5mC6gKHQpHCMgHBAnaCYkK7ws6DQYOXw4iDpUNXA0JDdULxglXCH8IuwiVB1MGZwWFA40AsPzD97jzVvHW7s/soO3k7pTt6eyK8C/1k/fs+Bf7vP2T/4AAFwKFBCQGOQaSBSoE7gFN/zT8e/jH9Sb0+PEa8JHwZPEJ8QbyEPVB90r4SvpY/Zj/7AC+AlEFPwfRBx8IJAn7CdsJjAlcCnoMjw6nDt0N6w64EPAPBA7MDfQNLAzwCWQJrgniCDwHJAZxBQUE8gF3/zX8hPiM9ejyAvBT7onv6vGG8gnykfTJ+Sn82PqE+83+BABn/9kAgQNsBOYDKwNMAnIBov9a/Pz53PnI+Pv14/Tn9dD17fTu9Dv1wvXz9lX3NPcZ+Ub8g/0T/qAA1wPhBbYHoAlxCxAOkRBHEZcRzBJTEwcSdxAoD0oNXAtZCl4J9gcBB4MG4wVkBeEElQOXAZn/7fzj+Af1IfM48nzxMvKi9BD30Pjz+Qr7Ef0f/0v/G/8JAA4Ay/4//zcBowFOADMAngGPAQf/ofyh+9X6MvkO93b1qPUp9hv0GfFD8XbzvfMc8xb12fik+4X9OQDYA2sHhgk7CqYLVw58D5YOCQ93EcYRGg97DTUO6A2EC1sJkwgaCFEHbAaDBXMERQOBAUv/lf24+9r3DfRb8/3zd/Pl9Kf5j/xx+8P6hvxj/ur+gv5r/hAAtAEEAZYAMAMrBbYDTAJ8AyYEvAHk/bj7Nfvr+az2J/Rd9Bf11vMT8hzy5PNz9cz1ffZE+Wb85f17/9cC3wU5B7EI4AolDI0MLA0+DiAP9g40DbILHAxpDFIKYAjmCK4JXwhUBlkF5wS7A20BzP7Y/Dv6oPUT8ZDvMfC/8PLxbvV3+U37yPsX/tIBrgM6A6YDCgVUBPUB1gG5AwgE5AIyAyMEHQNNAJn9evuo+T/3fPSt81X13PUN9E/zCPVj9n72+Pa3+HD7NP6C/50AAwTFBwUI7AZKCIQKVQqACaAKaAyyDAUMzAtXDHwMUQuvCWQJ5wndCAQGDATOA68CCP87+yL5A/c/84nvEO4n71HxYPNx9mz7Ff9c/4H/vgE5A9kCBwMpBOgDqwLdAnEEUQVdBeQEywOxAlsBCv4s+mz4xPfc9fHz6vNO9Uz27fUr9Sn2e/iv+Qr6I/wO/5AA8wGPBKAGXQeIB/AHRwkICzML4gq9DNMOrQ3SCzMMqwwMC+MIOAemBRwEUgJ0AMH/X/9x/c769fiM9ufy0O/u7uzv7PE19Cr36fol/rj/rQD5Af8CIAO1AjkCUALlAnwDBwTaBFsFEAVlBG8DZwHU/qv8ufp7+Nf2I/bu9d/1m/XH9JT0EfbO90z4Pvnc+6P+fACRAnkFPwi/CTIKuAoqDF8NKg2/DOgN0Q4aDZIKAQoiCvYIzwbzBNsDJQOlAUr/oP00/QH81PhS9SLzh/EC8ELvAPBo8mn2a/rP/DT+hADnAicDIALpAbAB0gDKAEICAATOBeUGjgYKBvEFdgSgAT7/qv3D+4H5p/fS9l72ZfVA9PjzhvRN9R/2rPdA+kj9KwAMAysGGwn2CtoLlgx+Dc4Nsw0DDoQOyA0gDCQL0wqhCX0HdAXgA4ICRAHL/zr+A/3W+7f58fZK9MLxPu/N7QPuee/+8Wn1evjw+ob9DgCbAYQCJgOFA7MDxwMNBEgF/Qb0B4cHVQZDBRcEvAEV/6D90/xO+7/5Ifkg+Xz4BPe39aj1hPYz96X3Cfln++L9aACSA+AGVQmVCrILqA2ID90Pmg/VD10PHQ2ICsoIcgfsBXIEXwPbAlICcgHdALEAcv+4/LL51vZr86fvwezK63DsDe6y8Jf0Z/gP+zz9+f+sAmkE8QTsBP8EVQWYBcgFBgZvBjoGEwX+A2gD7QGv/0H+uP3E/C77vPn4+Dr4zvYr9af0evXR9uf3T/nb+zL/TwIuBQAIdAphDOMNxA5BD1MPuA6CDe4L5An1B64GtAVqBFcD5gKvAoUCkgIlAtsALv85/S36MPYa8qHuI+wm68Xrue1K8GbzVfeN+93+bAGeA2UFnQbzBk0GDgbuBsQHqgegB6sHxAYwBacDmQE9/3H91PuU+b73xPbY9Tb1l/VM9iD3kfhl+kL8rP50ARwEkQbKCGUKZAsrDNwMEw3hDAUMfAo/CXEIFweTBbQERwQDBPgDuQNjAzMDcQKDAGL+ufzS+gD46vT98U3vh+0z7Znt3+688Tj1G/gc+2T+YQHfA+YFLgdECIYJZQp1CjgK7QlDCYMHBgW2ApYAwf2b+n74iPeM9l315/SZ9ZD2bfd3+Pz5x/ub/Vr/dQE/BPEGeggmCRAKAAsbC7cKhApnCscJzQi6B3MGxAQyAxQCXQHxAOIACQEyAWsBrwFTAfH/Bf7O+8/4XfVK8hXw3e7Z7uvvNPI59U74NPsk/h0B4wMaBvkH2glPC5ILTQv+CkQKwQiTBtMD7gBK/vz7JPoV+Wj46ffM9xL4OPhR+ML4sfnX+iL8zf2b/wgBqQL7BD4Hagj8CKcJJwoZCnQJtQgzCIAHIwZIBJICPQE4AGH/I/+C/9n/qv+F/67/mf/P/of9q/s4+YL2//MB8gfxGvFb8q/0xPfo+rj9MQDDAnEFtgdJCXAKBwvXCgIK1wgtByYFYQPpATIAZ/6//FP7Sfqz+WL5PPk7+W/5/PnH+ln7HvyS/Vj/3wBTAqED0QQuBl0HwwfiByMI8wc9B2cGMwXuAxgDagJ+AeMAowA6AOP/GABpAGcAFgBb//X9H/w4+lb4O/aH9NTz8PM39Bz1Mvc4+u78L//UAaoEkAbNB1oJjwpjCnUJjghsB+4FVQRGAjIA9/5L/lj9LvwO+3T6rfrX+jL62/lU+r76APvf+wb9F/5Y/8oA5wHsAgEEwgQTBToF9QRVBCwEWwT9A20DWwNNA5UCpAHaADgAqf8L/0X+Av5H/g3+N/2I/Mf7j/pz+fH4ovib+Ob4mPk5+7b9xv94AbsD2gXMBkUH8wdhCDUIyAckBx4G6QTnAwID9wGqAGn/iP7Y/fH8PPzp+637ZPtu+9/7M/xL/KD8C/1H/bH9jf5G/4T/uv9kABMBUQE3AYMBTQLMAooCFwK1Af0ACwAn/2P+DP4I/gf+Ov7V/kH/af+v/xUARwAHAFT/t/5p/gv+rv3L/UX+D/9MAKgBZgIUA0AEMAVmBUkFGAWtBCYEpgMbA3ICXgEyAJH/cP8h/9r+9P4j//n+hP4s/kf+b/4e/qb9e/1K/dn8n/z//Hn9uP0L/o7+0f7U/hT/jf/M/8P/0P8JAPb/g/8T//D+2/51/vT94v34/er9Gf7U/o7/LQDYAG0BkQFiAToBWQF5AXYBpAEEAjoChwJyA30EzwTkBCoFLwXaBDUEOAMXAoEBLAFzANH/tv+W/0H/Q/97/6//2P/S/2//Hv/b/mb+9P3N/Yf9K/0U/ev8bfwx/En8K/wJ/In8M/1r/Zj9cf54/zgA1wBdAZoBmQFRAaEAo/+j/vz9t/2X/bP9Kv7+/rL/HgCaAEkB4QEzAmUCvQJJA8gDEQRLBJgE1gSBBLsDPAMMA6cCSwIqAgQC2QHNAXsB/ADUAMoAVgDX/7P/2v/h/9//+P/8/7v/aP8m/7D+1f33/H38NfzP+277IvsB+w37K/tf+wD89fzM/ZL+h/9rAPsAKAELAbkAHgAg/zL+kv0k/cj8t/wL/bT9qv7B/4kA+ABeAdEB/wEMAi8CeALJAgUDNwOAA8oD4wPEA7IDzAPdA50DNAOyAh0CowFIAeYA1gAcAVoBggGkAZgBswEaAjYCxwF/AWgB8QA8AMr/ef8t/wD/s/45/h/+Ov4f/hn+Zf5z/kf+a/7B/sn+4f43/0j/Cf/x/tb+Uv6u/Wr9Uv0J/cX83/ws/XH9kv3f/WT+vP74/o3/EQBGALwAdgHRAdcBAAIuAvgBggEBAYkAGgCS/93+SP74/fD9RP7v/q3/iAB5ATMCkgLDAscCpQKEAnkCSQL7AcoBvAGVASIBtgCYAGwA//+9//D/7P++//3/SwAIAKr/zv/8//D/OgDSACMBBwHgAK8AVwCM/43+7/2k/RH9g/yj/Cb9iP3y/Zf+Ev9q/+//SQA1AA0AKgAsAO//u/+7/6f/bP8p//r+6/4H/zD/b//c/2sA5QBFAZkB4QH2AbQBMwHjANIAuACFAI4AwwDSAKwAvQDTAK4AbwBiAF8ASwA8AD0AHwDc/6n/3v8aABkAPgCAAPgANgEsARwBDwG6AAQASf/J/nr+N/4L/vH9BP5P/sb+R//A/zoAmAB4APP/e/9N///+h/5C/m3+sv7c/gn/Vf+e/9D/z/+w/6L/uP/Q/9P/y//H/8j/t/+l/6v/tf/H/+r/FwAtADQATwB9AJgAnADBACcBZgFrAYcBswGRAUIBHwH7AIYAFADx/+7/9/8bAE8AWwBMAEcATgA/AAEAx/+Y/1v/Lv9K/4H/qf/E/+D/4v/Z/+P/6P/W/8//4P/X/53/nf/p/xgA+//L/73/r/+M/5P/uf+9/7X/vP+w/3f/W/9q/1//Gf/Z/uH+A/8F/wH/NP+M/7v/rf+S/5D/s//W/+b/DgBxAMAAuwCsAMYAuwBsAAYAvv+V/4f/of/i/zEAYwBdADIA/P/G/47/Y/9M/1H/Wv94/7j/EQB3ANIACgETAfcAwQByABsA0v+m/5H/hP9i/1T/af+B/5H/qf/I/+P/AAALAO7/uv+E/1//Qf8d/wP/Ef8n/yj/JP9T/5T/wf/f/w0AOQA7ACkAFgD4/+P/5P/y/+T/2v/m//L/9P8KADcAYACMAMkA+gD9AOwA2ACcAEcACgD///f///83AI0AzgAAAScBOwFIAVABKwHiAKMAhABWABYA9v/y//H/3v+2/6r/wf/R/8T/uv+9/5n/QP/t/qv+cv5I/kr+UP5V/mn+ov7S/uz+Fv9Z/4j/o//F/+n/8P/a/7n/kf9k/0X/Mv8s/0b/cv+l/9L//v8qAD8AQQAwABkA8v/a/8T/vP/Q//3/KwBsAMEADwFFAX8BlAGUAYkBeAFOARsB3ACtAHcAVABFADwAPQBCAFUAXgBlAJEApwCaALEAqQCZAGcAMgASAPv/3P+x/5r/if+I/3v/df97/4H/a/9N/yn/Bv/r/uL+3P7h/vz+J/9M/2f/dv+M/5b/hP9j/17/ZP9g/1r/cv+P/57/sP/O/+D/7P8EACAAPABuAJ4A0AACASoBJwEZAQ8B8wDOALcAnABpADQAHQARAA4AKQBgAI0ArADEANAAxgC2AKcAmQCHAH8AagBRAE4ATQA6AB0A+//d/7v/mP9x/1D/Qv86/zb/L/8z/0T/Vf9l/3b/jv+a/5v/l/+b/6T/qP+n/6b/r//C/8r/0P/Q/87/x/+9/67/pv+o/6P/mP+O/47/kf+V/63/y//r/wIAGAAiACoALQApAB4AFAAMAPn/5f/j/+j/+/8bAEsAdgCVAKUAowCRAIMAcwBrAIAArgDoABwBTQF8AYwBewFNARUB1ACRAE0AHAAGAAkAHAAzAEsAWQBNACwA+v/I/5r/c/9h/2f/f/+e/7T/wf/A/6v/jP9o/0H/GP///u3+6v71/gT/Gv8x/0n/W/9r/2//d/+D/5T/rf/X/w0AQgB8ALgA5QABAQoBCgH3ANYAqgB/AFQALQANAPb/6P/g/93/3//f/9v/1v/V/9P/1P/Z/9n/2P/P/73/qP+T/4L/ev96/4L/j/+g/7T/zf/h/+7/9f/7//z/8//t/+r/5P/s/wQAHQA7AFsAeQCLAJAAiQCAAHcAcgBwAHIAdwB7AHoAbABZAEkAOwApABYACgADAPL/6P/k/93/1v/S/9T/0P/J/73/tP+t/6v/rv+7/83/4P/2/wkAEwAcACIAIQAUABIADgAQABEAJAArADMASwBoAHQAZQBbAE0AMQAtABoA/P8LAPH/6//f/9f/zP/F/7//t/+4/7b/u/+//8P/u/+v/6H/jf98/3D/af9o/23/eP9//4z/lf+g/6r/s/++/8r/0//Y/97/7f/2////BwAOABYAHgAqADsAVwByAJEAsQDIAN0A4wDdANEAvAClAIoAbgBXAEIAMQAiABUADQADAPn/7//h/9D/vf+y/7L/sf+4/8L/0//d/+n/8//6//z//P8CAAYABgABAP/////4/+7/7f/n/9b/x/+5/6v/mP+V/5b/nP+o/7n/xP/O/9b/4P/m/+z/+f8GAAwAFQAkAC8AMAAwADAALQAiABMAAwD0/+T/1f/X/9r/4v/x/wkAGAAoADMAOwBBAEIASABNAFEAVQBXAFcAUABIADsAKwAdABIACQAEAAQAAgACAPz/9f/r/9v/zv+9/6//pv+j/6f/rf+2/7z/wv/B/8D/u/+4/7j/tv++/8H/xv/L/8z/0P/R/9P/1v/Z/9v/5v/1/wQAEQAfACwAOQBDAEkATgBSAFEASwBEADkAKgAfABUADgAJAAcACAAOABIAEgAWABcAFwAYABoAHAAbABwAHAAYABMADQAHAP7/9f/t/+j/4//f/9z/1v/S/8//z//N/9D/1f/d/+j/8P/7/wYACgAPABUAGgAYABUAEQANAAAA9v/x/+j/3f/Z/9b/1P/T/9T/2v/f/+b/6v/u//j/+P/5//f/9v/u/+X/3//c/9T/1f/V/9v/3P/e/+D/6f/u//j/CQAWACAAKgA2ADwAPwA/AEEAQQA9ADkANgAxAC0ALQAsACkAKAAmACUAIAAdABcADwADAP//8//l/+X/6P/p/9r/2v/c/9b/4f/k/9f/5v/d/+L/3v/h/9//4P/X/8//zf/I/8X/xf/J/8z/zv/U/93/6P/v//v/BAALAAsACgAMAAoACQALAAgABgAHAAgACwAPABQAFQASABMAEgARAA4ACAD+//f/6//p/+r/6f/p/+v/7P/y//b//v8BAAcACgALAAoACQAJAAkACQAIAAkACAABAAMAAAAEAAQAAwACAP//+f/z/+//8v/2//T/+P/9//z/AgAJABIADgAOABAACgD///b/7v/l/9v/0P/I/8L/vP+4/7L/tv+3/7j/uP++/8f/2P/d/+X/9P/6/wAADAATACAAKAAsAC0ALQAuACcAIgAfABkAEQATAAgABwACAAIACAAFAAMABwAGAAMAAwAFAAYAAAD+//7/+v/7//n/9v/3//b/9f/q/+r/6v/r/+n/5//m/+b/4//Y/9j/2f/W/9f/2f/g/+T/6f/2/wMAFAAhADYASABQAFgAaABqAGgAagBrAGYAYwBbAD0AIQDr/7D/cP8x/wH/2f7Q/tf+8f4g/2z/xf8hAI0ACQGCAeIBMAJ2ApICfQI+AtgBPAGFALb/2P76/Sf9fvwG/Mb72fsz/Nf8tP2t/r7/yQC6AW4C5wIzAzcD8gKCAvkBQwF5ALX/AP9k/uD9hf1T/VD9ff3N/Uz+3P5y/w8ArQAjAXwBqwG6AacBbgETAbgAUgDs/4//Pv8F/+D+3v4A/z7/m/8VAJsAGAGOAewBLAJGAjkCAQKvAUwB2ABqABoA2f+y/6b/sP/O//f/LwBjAJUAswDDAL4AkQBMAO7/Zv+8/g3+YP3F/Ez8Bvzx+yv8rvyR/aP+6f9NAcICLgRqBWQGGwd0B1YHuga3BWgEuQL8ABz/I/1G+7X5VfhO99D2tvYO9/L3Vfn/+vz8H/9NAV8DHwWEBloHlAc3B0QG2AQQAx8BI/8+/Zv7avqj+Uv5Zvn7+eP6CPxO/bH+FABSAW0CXgMZBJQE3QT4BNMEdQT3A2IDvwIVAngB4wByABkAzv+j/4L/a/9H/yT//P7B/n7+Qf4Y/uv95/0Q/l/+zf5X/xEAyAB0AQ8CkQLeAvAC0AJ6AvMBPQFtAIn/o/7Y/RX9c/z9+7v7pvuz+/37gfwr/QT++P7z/+cAzAGRAhMDYwOLA3oDNAPXAogCKwLPAYYBNQH1ALkAeAAuAO7/xP+C/z3/9f6l/mb+Rv4X/gH+If5w/tj+Z/8JALUAbwHoAUICawJMAv8BogEgAXsA+/93//P+hv4m/tr9nf2Z/c39Iv6e/kT/8/+hAGIBAwJ/At8CAAPlAoMC4wENASEAK/9O/rz9Qf0U/T79jP37/YL+5/4y/5D/w/+1/7f/sf+A/1T/Nf/0/ov+bv5a/iD+GP5n/rz+Lf/v/70AbQEpAugCXQOUA5QDXgPeAi4CegHOABUAi/9N/yD/D/80/2z/n//b/yUAXABxAIAAkABxACkA8f+o/yf/wP6Q/mT+SP5p/qj+AP97/yYAwABIAdYBUwJ0AoQCdQIIAn0B6QBuAAIAmP9//4X/df+c//b/KwB5AOUALgFOAWkBTgH6AHkA3/80/2f+n/3+/KT8ePxt/L78Tf3g/bD+hP82ANAAPgFtAVsBIQG6ADMAm/8G/2f+6P15/Tn9C/0U/Tz9b/3R/Vn+3/6Q/1kAMQEsAjkDLwQsBQMGwgZnB9YHAgjzB40HyQa0BVwEjQKMAG7+KPzd+c/3DvbK9CL09vNp9JH1MvdQ+cf7gf48Ae4DdAaUCNIJXgopCskIcwaAA+T/oftj94fz+e+K7VjsS+ye7ZfwsPRs+cn+6ATxCi0QxhR4GL0agBvXGqoYJRXOEOwLlwZZATj9Bvpj9w72aPbE9735ZPws/2EBJgNZBAEEHALn/+/8Gvgy8unsbOeP4Uvd39pZ2TXapd7j5DHsYvbPAqEO9hmaJcUvBzeVOzg9EzudNbotSSPFFsQJ8vzC8EvmYN7z2G7WudaB2a3e6eX/7QT20P1aBVQLKg/9EMoQiA5ECqEEAf6y9gvv0+fk4THdfdmH193YOt1I48bqS/VWAxsS7h6nKhc2Qz+4Q8tD0ECtOlsxKyWMFm8HcPli7KvgnNcr0mTQkNES1fvaRuOY7J31l/3WBEcLHRA6EvERQhBGDYYIPQLO+yv2xPD06lfmkuPR4DTeO99h5BLqnvCR+3kJbxalITwsxTVXPMc+szy5NocuwiQJGIoIwfkF7kTkpdu71TDUc9bN2hbgeuYp7tv14vtVANADyQb5CPAJswnbCPEHcQa3A0QALP1T+nP2ofHV7TPrGOfL4qrjdelk7ofzBf+oDtcbYCauMIQ58T59P8E5HTA6JqgaMgqI+E3rguJM22LVAtPM1a/bleEU53rtxPQt+wn/SQEjBEcHZQh6B6kGywZOBnQDNv9L/FH68fWj7+vrJerw5PzeEOHK6ffv2fQwAYkTiSJuLMg00TvyP+c+KjfaKwYhTBWqBVz1FOqu4+TeCtu12eHbk+Es58Tpwexm8+j5WfzT/Jb/tQQwCB0IeQfOCFwK0QgnBOD//vyu98/vOOom5rbe8dhA3j3pNu8F9gUHsxoFKCcxCDkvP1ZBgjwDMuYmWxztD48A0PHa6EPlduFu3ErbP98v423kUeUV6WXvIPRb9Sf4aP8YBsYIZQrhDAgPSw+lCzsFPQHs/aL00ekx5QHhqtdn0wvd3ul48Fb77RD5JDYwfTjPPxRDN0EBOjsu4iHnFiAKu/q07TLmKOFI3CbZwdj32p7f8OPH5iDs0vT6+28AfwVkDDMSeBQcFAsThhGkDZ8GG/+I+FLwn+Un3TrY2dC8yczNZttN5zXycASjG7ouAjpxQAhF60V0QKU16yhDHUES8wS39jTsTeWn3wfbtNe61ubZsd5v4Vrmz/Dk+kMBeQdTD0sW3xlKGXQW1xPWD04I1v+T+Ozw5Odt32DZw9P/zH7LaNXL4+/uLvwqEToooTeOPgJEp0jgRrE9tzEJJ6Qceg5y/sbxzeer3n3XWNJnz0DRntZ52ynhZuuv98sAGgfsDoYXihsZGjwXZBRPD/0Gr/3h9ULvQOec3trZWNg51BjQkNVa44rvBPqSCtkgXjM0POJA+UbASMlADzTuKD4e6Q/Z/oTvs+Pv2RfR8coVyS/MltNM3AHlLfCK/bAICxDsFgcegSFaH98amxZlENAGp/xn9ObtMufz3jLZWNls2YTVB9a74XDx2vxtCOcbODFRPBs/JkN1Rj1BuzPQJIAY+wsi+/roRtvk0R3KwcTrw2zJz9Sl4cTse/gYBioSpRm0HngjkyWiIrQcRxbEDtsEP/mb7pfn4OIF3WPXydZQ2sfaoNhZ33Xwh/7WBlMWfC1UPRhBRkJTRbxD+jeDJi4XPQoh+n/mwdU1zCfH9cLkwYrINNZ55enyDwBKD8cdMSaCKccr4izoKJUfeRTdCbL+bfHC4z3aJ9au0yDQCM+J1D/cV97v4OXuhgDJCtkUIScYOidDmEMBRE9ERTykKm8XYwhQ+tznu9SayVHGucRDxG3Js9b356n2kQLND1UeaSg+K38qCitXKo8iSBULCf7+HfNh5FrXDNHy0PbQ1M5x0QLdGeez6A7scfm4CSgUAh0/LUc/C0YARMlD7kHTNXghYg13/tzwGt+nzrvHocezxxfJhNHd4OPwu/23CMcUtiDxJxUpUyj/J/wkBxxYD9gC6fbW6dfbb9Dfy/bMyM7A0FPXc+J863XvuvR8AKENihcxIgsxnT/rRvZF9EETPBYvOBrDBHD05uWk1jzLu8dAyj/PntV532vtWvueBUANHxVxHRMjWiThI80jayGcGTsNhv8T8vfjkNUxy6HIWMxa0aPW/99D7ev1uvZy+e0DzA++F4IgCTCdQEpGw0HTPcI4KShWDnf4Bev13o/Ri8nVyz7UYNvM4FTqA/ihApYHqgtyEw8dlSK8InMiQSMLINMU7wTG9X/nZdhHy3PFC8lV0QPYB9+766v3gPmc9pb7CghbEQoXiCN+OPVG3Eb4QdQ+hTTBHYQED/Jj5bHZ+s4Iy43Q49hI3mTjYOyP+OIC8Qc9DUsYdSMOJ3olJCSzIo4bTwxl+nHrmN4J0szIxMeRzjfX692B5Z3v5/YO99r16vu1B7oRuRvKLFxAlkkQRxNC2jpKKtoR+/lP6fnevtXwzRTOc9Uw3FngleZq8dP9rwb0DIQVyx/DJRklYiHvHq4bbxKzAzX1oOkZ3zjUaMyrzALUZdvj38vmRvJ5+Dj1FvVj/10LzRNhIOY0+EWaSblE6D55NCgg2gX573LkitzX0QTMNNG52ezeuuOr7ET61wa/DaMSahoOIq0jpR+/GyEaKBZ9Czv97PBE53fcC9CdyOzLwtWG3DvguOq5+Mv73/aT+iMIdBOdG3spcjxiSO1GUT4mNcMnrREz+ezozuCj2EzRntDI1fXcIuRw6471pAJ0DhEWJxsYIMMjXyIVHMAVxBHfC0YAMvJG57be0dRJzBDM59Mg3PjgrejV9Cj6K/ay9+QEmxFeGQ8mtTgRRf5DrDrzMEgl3BH6+o7spOa73+/XMdVZ2PzdZOM56Qfz4QASDg8XER0eIkQlNSPtG2QUlw9HCfz9XfHr5+3fJdYYzX3LStK22c/dC+aq8wz6MfdT+oIIjBUHHWYoazkxRFdB0Ta9LJ0h5Q86++7uBOsS5WHcL9jX2mDgdOSD6IryYAKSEAIZrB4HJPQm3iPJG1cUjA8vCKX7ku4U5RDd+tKuyUzJLdIj2l/dv+V79K76xvbK+oMLYxmrHzoqmTp1QqA7yC8bJ/IcPgvL+JLw1e5E6KjdE9gF25HgquLJ5R/yOgQMEn8ZPCCfJxEqeyTiGyIWdxHmBzD6Q+4y5hnemNJUyDDID9HU1kHYkeJ19ED7mfZg+0IOvx1HI8Mr2TsdRGI7qCxaIxcayQgs9l/uQe416Yre/NeI2pHgAeMY5jnz/QbxFUcdDSO9Ke4rtCXhG3sVOxEICC/6Pu6C5nHeD9I6xpXEssx10nLTN93Q8T397PiV+0IQrSIFJ/MrYzoyRCU9pSxeIUoaEQzh+B/vu+/57PnhU9ji2Brf1+HY48/v0wTXFjMfRCPbKHcs1SfsHJ8ULREnCn38Q++J5xLhc9Wtx+DCYckYz23Pddd967b4Q/YW+cINFyIrKHcs5jljRPs9cCyNH0kYzwsB+mDwmfLM8rjoQ93s2preSd9C36zpDf/jE8keyiIRKG8tXCqCHvMTCBCyCur9+O8p6f7lXd1Z0PzIxsuA0e/RuNM84l3yP/S69NcFzB2VKVEsmDXuQsZByC+9Hg8XfQ5h/v/x0fLX9e3uquIj3LjcP95f39Lml/jyDtAe7SO3JUgq2CoTIH8SrA1rDHIDdvRf6pzm4+DT1bXL5sp+0m/WidR12nDpavBT7xH7thXYKDstMDQMQldDpDE4Hi4V9w5SAuf1V/Qt+PP0/+jp3dra99wJ3zbkC/PKCeUdISYwJnUohypiIqAUPg75DVMI6foF7zfpfeON2Y/PssxT0sfXCNaF1nriQ+3u7FTyPguIJTcuVjIiPxVEgzSEHpUTVg+GBfT4W/XF+Vj5ZO083u3Xk9q03RviCfB7BtwbRSfAJ9Yl2iUtITEWNg+UD8QNvgM/9+7tZOaR3UzTecy4zjDW2NeU1G3Zl+eO7mrt7fo+GscwgjRrOKlBhD2HKHkUgQs6BwIAffhh+FX87fZo5TbXRtd73I3gM+q9/DoTmSRWKRglfCKgIl8elhW7EGoSyQ8xAqPxjed24cPYM88JzPrQY9hN2u7WzNq15q7qhO2+BD0luTXMN1M8DkFjNpseKgxcB8oGXADF+Hv5G/sY8DfdINRg12rdFuWu87oIyx1fKGElKiCiIN4g2Rl6EgwVkhgSDnf6WO225ordx9I5zR3PbtaU2/3WSNOo31TuSezK7XwKRCtJM3wxbjtZQbMxMxrxD98P+wnw/Jf1MffJ9d3oSNnG1AjbnOIT6vT3owwoHzsnaSU3IUcgEB9gGBYTmBUUFsMJRfeN6dbhbNtz1ODPP9Gg1zXbLNce1m7hTutF6ZjuzwoYKnQ1bTVcO4Q/PzNeHa8OQgr7Btv+4/ZN9sf2Ye353bLWnNoy4rXrK/uADuQfRyn7JSIeOh13HckWFBJpFgwY6wsQ+FTqguMg28HRKM5G0p7aSN/P2s/VDN0f6Y7qIu64CaIrsTarNAg7cT48LhQW9AnNCT8IZgDL+rn8I/xu71bdQ9TI1+bfBepO+TQMXB2nJdEiXhwqGa0XZBWAFb4ZjxtGEZf9c+sq37LWm9HD0HvUSNvB38jcTNdd2tfjuefk7mwJAyqoOfw51TpxOVwqjBQJB/wFBwnLBiIBwP9K/fvvNN0H1FrXxt8h66H7hQ8DIH4lDh/9FfYTNRfaF94XGh7MIEYSG/r06OjfUNcf0dzSq9kQ4MDh3Nv61R7dGupa7JTvlwjcJwoyIS6OMEYyNiUPFLUNog/wD6gKCgPm/df40OwY3X/Xvt/d6bDwafvlClUXhBuoFxURjA9CE9oU3hXCGwAfSRSl/9ntWeQI36na9tiD27PgiOMu35nZI9095W7m9ul3/7AceitAK3Qsqi8kKYoa3Q9eD34S7Q5pBVz/yvx79Yjn5duf23Xk2O7f+BUEHRAHGBwVRgvqB5sM0hBrFBwcZSF8GucIffcT7Cnkx97W3THhRuXk5IXeDNZk1S3gn+iD6MbznBAwJtUo1SdNKyQpAR5UFBwSWBRjFFYOXwZrARP6Z+sV3nHcE+Pv6wf2kQBvCo4RxhGfCgAEtAbTD7MWuhsFIx8kxBWp/wDwvuiO42jff9+Q4pvkHuLn2mDW59uc49HkeuqD/xwXriAEI1go5yjUH4EXsBYLGc4YDRQSDaUGdv4e8bjjkd/a5BHsdPLC+fIBWAlIDGAIfQOhBbUN9xRoG+MhCyJ0F7MG2ff079rsYurV58PlL+Q84tncTtZM1/Xf0uX+5zfygQUfFNYY0h1wJiwo5CAwG0kb1hvuGGATAg5+CRcB6fLb5Xrg9eIt6ZnwnPhlAEsGDgc4AkH/IwRfDZ4WGyAdJh4iFBRrA6n2U+8O7UXt4eqP5bnf89jL0Z3Py9Y842zr6O6m+KwHGg8cEi8c8ybkJgwi0iFJI9cgZRu7FegQzgoFAALyJudL4mDi2+bh7qP37/5wAvT/WPtV/UEIBRUGH+EmDShwHQoMLv2D9L3xyfFq8FXsheYQ30rXvtF30uvcJOmE7Mjts/iEBTsKtA9hHcIoHCmQJSomZyZuIL0YzBTmEQgLeP8S8oLnVuFF36riZes79Vb8Df9Z/Qf8owBXChcVNx5lI3whWRdeCYD/MPvW9wb02++O6Q/ic9qa0tPO5tNl3xHpkuxM8aj8JwYzCkYS/x/4J6kncid0KQEo6yGbG+cWfhHnB7z5u+tt4xjg3N5z4cfpK/Rf+ov7Q/wFAO4Gqw9eGYAh5yOmHbERjQae/4D8hPrr9WHuueWa3MrUvNCG0XHZauYl7+fxMfej/8QEYQnBEw0g/SbQKZIsWC1hKGkfRhccEQMKSP8L83/p7eMn4VDhQ+W97Gv1pvpE+1T8oQH9CEcQmhjrHlUeIxewDd0F5wGXAHj9gfaV7kPngN/518DULNkn4sfoE+z98Dn4Vf4IBMkNKhorIvAkyydzKs8nKCAlGbMUSA+WBnT8r/PW67bk8ODN4rnolu8/9TT4+fmO/RgD/giNDjkUOhiaFqcPzgiNBE8Bs/5P+2v1Pe+i6Q/jv92d3bnhi+ar6a/svvLd+vsAigb5Dxkarh5QIGYjbCVkI28eWRhNEswLnAJ8+D7xeuxB6fXomOvL77fzZvYR+Db50/vyAIgGdguPEc0V3hOyDcsHJwTtAXb/Ufzv947yjO376J7ldeYa6onr5uof7VP0sfwlAi0HRQ/xFoMaMxwuHvAe2xuBFYsPNQwqCagDkvxj9nvx4u1L7HPtufCx9JP3Nfid9yP50f1LAp8F5Qo+EIEP/AnYBZIEbgN+AFT8/vg/9yf0Y+/d7DvuTfAd8NbusfHy+eYAKAMpBoAMeBECE/QTnhWkFtMUfBDkDD0LtggVAzz8xPed9gX2tvP+8XDzOPZe9hD0Y/Nu9nX60/3kAbQG6gmMCacGCAWbBaQEBQFY/o79gvxy+nH4QfcA9srzJvNs98v90gADAa4CGwbQCMYKjQxiDkoQFBB2Cz0GNAX9BZkDHv8t/Rr+Av1H+FT0xvQW98b3CveZ99r5c/ul+/38OwEUBvIHwwbHBKwDxQMzA0wA1f2i/q//x/1t+/b6mPsj/RP/FgB2ATEE8QWpBX8FlgXPBZEHCwlQB6UE8AKCAN39Qvzw+l36U/rW+NP2WfbT9fr0hfZ2+c760ftx/VD+V/9PAQIDhQS9BcEFdwVEBTwDnwBmAN0BdwIIAoABegEkAmQC6QE5AvkDqwXiBlYICQlXCKkGIwTUAfwAcAC1/3j/m/4N/Mr4Y/XQ8nvywvPs9F72zfcL+JL3ovdF+Iv5p/yHAeYEfQVtBTsFeAQOBP8DkwRPBosG7QRPBcwG2wV5BHgEcASTBIYFdAaeBvIFwgS5AzgCpgDpAA8CmQGDACoAJ/8+/Mj41PYf93b4XfkR+eL3tPXy8rjxNfNg9SP3dvn8+4H9/v0O/pv+5f8jAr8FvgmEDMINKg38C3gMRQ21DK8MBg2vC+wIwAVbA4sCJgFg/oP84fuC+7P7+ftg/M38pPsU+nX6tvvj/H7+XP82/o38Mfu8+R/4fvZA9BvycPKy9Gj1L/Sy8gTyBvRL+Iz8HgLLCOwLcQs4DIoPzxLlE1QUzBV0Fj4UjxAXDakJWAXCAHj9iPuU+fj3TfeN9oj0xPI99Fb4cftP/av/9gEqAxwDBgJ+AVwBBP+T+vH2afXk9FLzDfDJ7ErrLuxE70Dz7/fi/UcDLAZzCPULMBDHE4IWYxk/HHocBBrFFzAVyQ8BCSIEmwHP/lz6ePUV8inwx+5e7nrw5PSV+fb8zv7g/ygB/wLgBOcFxgV7A6X+BvoN+HT2F/M+7zjscurI6gnsVu2G8Jn2w/2NAnEDmgThCZUPQhKCFd8aJh6nHSEcLxphFhIR/QvrB3EE2/+n+dXzH/Co7SDsxewy8Lz0QPgv+sL7Bv6YANMCLQWFB5YH3QOu/hj7QvnB94n1RfJK73/thexg7NTtJfFE9iv7Sv1//v0BpwaJCq4O9xPVGC4c9h1MHm8dehspGBUU0g/UCmQEPv1O9s7wc+2e62vr9u2X8QjzkvLA8xX4F/3oAFcEwAYvBpQD0QCg/vj97f0O/FT4vfTd8efvnu6p7oXx0vV5+Cr5lvlN+2L+/wGSBioN2hMpGCQbCh7KH3Eflx1vG+MYZBQPDXcE7PwY9/Dxn+3/6nDqJ+ug62Hrt+yj8Bj1ufkT/xQDKQS9AxoDZAIrAqgCzwICAS39Kfl39h/0wvEg8Z/ya/MN8tfwWPLM9XX5yv3pA4QK7A/bFDYa8B6mIYwifSJEIRgeThiPELEInQGI+hP00e+a7afr0uj65WvlEegX7D3wqPX1+xEA6QA2AW8C6gPEBf4HsAg5B3cEwQBE/Pz3XvUQ9Qv15vKa72vu2O/68U/0Xvhi/mgEtglgD6wVFxujHikhLyNiI24gBxuIFCEOBghxAc760PWp8WHsy+Zb45niReSY597rZvFi9z77L/1x/20CuQWCCYEMhA02DbALiwiBBJkAtv1g+0H3GfGV7YHuaO9w7s/uo/Fa9Sn6gwBZB7MNShMLGH8b/BxLHJ0a6xjmFrgTTQ89CgcFI/8r+BfxoOtO6LvmdObt5zzrQ+9t8qb0CveL+tz+MAMoB4UKcAzTDGwMEAsnCN0ESgJc/9/6t/bF9MzzGfKY8Kzw3vGM86L2C/tP/0oDVwi+DWQRdRPwFFwVtxSaEzESyhA9DyMMUAcMAun8mPcM82vwz++s8E3yZfPg88r0Zvbn+PT8UgE6BCMGJwibCbgJ5AgHCDQH/ATOADj9W/tl+e32N/WB9Lf0j/VN9ur2XPiR+kz9igCcA/4F6gc8Cf4JewqiCkEK/Am3CYQICAa1Av7+cPul+BP37Pby99b4BvlY+V36B/x0/tYAcgIjBIkGBggFCP8HgggTCLgFQQJu/3z9S/sP+Sj4OvgH+Nj3mPjN+ez6a/yh/mgBPwRDBiwHNgcxB4cHeAe4Bn8G6gZTBpIDqf8T/Hb5oPeU9mf2wva79lz2svYN+Nf5afzU/9MCqAQzBlEHOge8BhQH7AdRB6kErgF2/9z8yPk4+HT42vj9+L35zfrT+zX9Bv+9AHkCSwTMBZQGBQe8B58I/wg9CQYKcwrICH4FeAIWAKT93ftl+zP7LvrT+Mv3TPd299X4e/sb/oP/egC9AZMCwAJ+AyUFaAb2BSYE7gGb/yj9e/sg+zL7oPoM+jj6wvpu+0X8QP2V/iQAQAG2AU8CNgPwA3UEhQU0B4EITgikBncErgJpAa0ApQDNAH3/hfy++SL4Tfep91z5UPt0/Ab9vf2F/r7+E//vAJgDtQQNBCADHQJQAJD+4/3h/Vr9hfxM/IP8TfxD/Ej9w/7C/4cAPAHDAXECfwNmBBgFBAYKB1IHdwYEBaUDyQKaAtkC7gIzAqwAfP5E/Ov60vqk++j8yv0R/kn+ov7F/gj/WADJAuIETwVaBA0DcAF3/8z9+Pxb/G37s/qw+hH7hfsR/Mz8wP3i/r3/UwAJAcUBDAJAAjQD1wTXBZMFowTRA0EDzQJgAjkCNwKVAb7/dP0b/BP8RPwr/Fz80/wq/Vz9rP19/hMAPwL7A4AE7QP7AsIBPQAq//n+nf6t/Q39G/3W/Ab8pPtL/CD9Z/2Q/WT+df8gALkAeQEIAiQCJQI+AjkCBQLaAbwBnAF3AS4B1wArAB//W/5k/rT+qf5J/lT+ov6P/q/+5P9rATgCYgKOApoCNwLHAcYBygFlAcgAOwDA/03/wv5c/oj+9f7U/kH+Rf4s/wEAPACOAIUBmQJSA4wDNQM1Ag8BhQCNAH4AcQCIAHAANAAVAOf/tP+G/y7/uf5j/uL9Vf1c/Qb+iP6K/on+7v5D/0P/Ef8W/2f/sf+h/53/7/8XALz/e/+W/9T/yf+G/3T/gv+t/8T/AQDSAPcBcAIQAqsBcwEDAW8AZQDrAFgBMAHrAMMAbgAOAA0AAgCj/03/Nv/h/jT+z/1E/uv+6f5w/l3+3P5Z/4v/3f9QAF0ADgDZ/7T/q/8HAIcApAB5AEUAFADG/27/bP/u/5oASQEJApcCYAKSAbcAPQBQAMwANgFoAUABhgB5/8z+k/6Q/uP+lv8YADAAHQDG/zj/9v67/gP+RP05/dL9i/4t/6z/p/9D/+P+w/7u/p//mQBtAcMBngEYAbIAjQBPAAwAQgDrAI8B3gEFAtsBOwGkAHYAjQDFACQBggGtAWABxgB2AGEAOwAfAF0ArgCxAGIAGADW/2//kP54/e/8C/3+/AX91f3M/tr+gf6V/sX+v/4H/8H/YgC9ABgBQwELAaQAUQAcAAcAEwB/AA4BFAFVAIn/L/83/53/TQDPAA8BGwHLAD8AEgBNAKsAAQFRAWUBNAGtAPD/cf9d/xj/Qv6S/Vf98/y0/Ev9Nv6c/vP+kv/u/x0AvwB8AccB+gFhAlkCsQEJAakAZwBVAI4A4QDVAGwA4P9A/7f+qf5I/yUAvAANATMBEAGQAEEAeQD4ACIB9ACqAEkAlP+5/kL+iP7k/nj+jP0n/TP9Bv33/L790v5q/77/PADAAC4BuQFLAqoC8ALjAlAChQHwAHoALgAwAH0AxgDjAIQAyf9B/zH/cv8NAPwApwGOAQMBegDp/3b/pv9UAOAAyABMANH/PP9X/qX9xv1W/l7+xP1M/Tv9Nf13/UT+I/+W////mgANAU4BwQFKApACjAJwAvUBLgGCACYAyP9w/1v/pv/I/3z//f6d/pP+3P5+/1cACQFVATgB7AC1AHwAfwC+APAA3QB9AOr/B/8D/k39TP2P/YP9TP13/ej9Of53/hr/AQCbAN4AKwGYAQUCYAK1AvcCMwMZA20ChgHhAF4A3v+J/3z/h/94/0H/8P6P/kz+Zf7j/pr/YADxAEgBbgFeARoB1wDDAMAAjgBIAPH/a/+l/uz9lP2e/bX9mf2C/bn9N/7c/nv/HwDBAFoB2gE3AoECzwIFA/YCqQJZAucBPQGIAAAAo/9Y/wD/w/7Q/gv/Hv/1/tD+3v4M/1j/zf92ACoBowG4AXoBFAHOAI8ASwD0/5v/Hv9d/of9+Pze/Pj87vzh/Dr94f1x/vP+l/9JANYAPAGmATMCxQIiAxEDngL8AVwBuQAyAN3/wv+s/3v/Nv8b/xj/8P6p/pP+tP7p/if/df/b/0sAnADGAO4ANAFdAVUBJwH9AMEATwCh/wf/vf6L/hX+lf2I/d39Lf6J/vj+af/P/zQApgBBAf8BrAL8AvMCuQJQArQBJQHOAKoAmACPAIgAjACDAF4A+v97/y3/C//5/hT/VP+R/5z/fP9c/2X/n//+/2UAuwDUAK0APwCf/wn/2/7v/tf+k/5v/mv+a/5i/nD+ov72/nT/BQCrAFsB5QERAvEBwQGAATAB1wCcAI4AowCsAJwAiAB3ADsAy/9R/+3+t/6+/uD+5f6y/nX+X/5y/p3+GP/N/3YA5gBOAZEBfwEjAccAhgBUAAwAn/85/+z+vv6j/oD+Z/5l/oX+0P44/+D/nAA1AZwB5QEeAiQC+gGsAXMBYQFYATwBJQEcAfIAYQCT/9r+Zf4b/vP97f0J/hn+DP7t/Q/+kv5q/1EAEgGaAfoBFALPAUoB5AC1AHkAIQCH/xv/3v6r/nr+XP5q/pL+yf4T/5v/TwAJAZQB2QHmAeoB0wGhAXcBfQGpAcQBpwFqARQBkwDR///+a/4m/gX+AP78/fL92f3C/cr9Fv6w/n3/OQDGAEoBvgHtAcABewFxAXEBIwGBAOv/mv9c/xD/wP5y/kv+Wf6R/vb+j/9YABsBpAHiAeYByAGXAWYBPQErASoBIAHtAIMA/v98/wH/kf4w/vL97/0T/iP++/28/bH99f1t/vH+hf8uANUASgF+AW4BQgEXAfAA0gC1AJAATwD5/7j/i/9c/yL/BP8b/1f/mv/e/zwAsQAgAV4BZwFYAT8BKgETAeoAyAC6AJgATgDp/4z/QP/n/oL+Nv4e/iz+OP5N/nf+qv7h/iz/oP9JAPsAgQHQAe0B6QHCAXgBNAH8ALMAOACW/yD//f7//gH/Af8m/4b/7v9HAJEA7QBUAZcBrQGOAVUBCgGtAFkAKgAJAOT/0//U/8r/kv9F///+wv6W/pH+lP6l/sX+4f7//in/Z/+k/+b/MwCcAAUBVwF6AWgBOQHgAIEALAD4/8//oP9e/xP/3P7J/uD+EP9u//H/kwAcAYQBuQGvAaABYwENAeEAwAB/AB4Aof83/xb/E/8J/9v+d/4m/gr+MP52/tD+Fv8p//P+kf5n/nv+1v5r//b/SAB1AHEANgD1/+n/IgCDALcA1QDOAHYAQQBKAHEA3gA5AVABSgE3ASMBOgFoAaoBuwFKAdkAmwB6AG0AQQD4/5T/G/+Z/lv+Ev7E/d39HP5l/tL+Vf+9/9L/tf/O/ygAxACaAQICiAEKAaIALQA/AHIAfwCPAGMADwDt/7j/iQBdAh8D2wIeAr8AUACtAXUDbgRZBM0CGf9w/G/84vwq/oAABAGK/yL9S/rc+Fr5Tvvd/QwANgBg/tD8nftD+3z9t//tAL8C9QKtAkMDrgJkAj0D2AKNAXUABP4M/KX81/0t/9QBpQO4AlAAM/5L/a/9KgD4AjwEUQXbA+kAnQDdAK0BCwOTAugAjv96/Wb7QPxU/L77YP1M/RX83fv4+9/8BADIAY8B7gLhAeH/sP+D/xgB1wOEBOoDJwMqACT9TfzX+yn+IwL/AiACNwAQ/qn9Av6q/lAAygKeA5IE7wTAA8MCBgGP/yb+4/1K/rr+JP9U//z/+QDXAHX/0P5i/Wj9eACyAQYCxgIkAXz+Rv5z/kX/tgGWAYkBzgA6/rr8O/yp/Mr9SP8IAIAAjv8B/tL+7/9PAX4D6wOdA6ID5wFA/7X+E/9c/24AAwHrAFgBIgESAJn/VP/a/+7/Vf5+/cf9Iv5v/u/+WP8w/1f/9P5+/Tr9a/3w/BT98f3V/4IBgQJrAhsBUgCi/2v+uf1i/nf+sP9eAT8BcwEDAej+pv5uACwBZwKnAycDkgK7ARUABwC3AZEC0QL+AkICDwFEAFP/6P0M/U39mf31/Yz+uv0z/Ef7kvov+3n97f8JAtoDEwQKBLQEJQTqAtMCagPJA2oENwROA5YCRQE4/1f9Kvx8+8D7NfwZ/JL8gv1j/Yj8yvuZ+9v8P/68/wMCoAMmBAAE0QLDAQICEgJlAocDiwNhAooBz/+t/cv8Gf3v/Rr/0P+B/+7+BP+H/4H/3//FAPoAzgBHATwB9wB1AdgA3/9mAI0A6v+Q/8P+Q/53/mP+KP6N/oT+9v3N/TX94fzp/cL+Av/q/1AAJwB/AKb/PP69/hr/Cv9RAC0BMQHdALX/qf7b/or/hADXAZQC7gInA4ICPwG6AOIAPwHrAdgBxwAgAOb/8v8VAEwAnQC2/07+Xv3B/ML8Xf0A/or+G/9a/3v+7/1f/qL+MP9fAAoBowA/AP7/4P9tALYBrgJMA3IDWgLDAQ0CvQH6AZ4DiQRkBDgEsAPGAl8C2wJ8A8ADuANSA38C7AAJ/xn+2P2H/af93P0X/ZT8yftz+h36OPpw+kT7tfuK+5P7MPtQ+mD6y/oC+zX8pf1W/rn+Jv58/cj9d/2W/Wj/GAGiAs0EqQWgBCYEQQT9A74ENgakBtsGjwedB6gHDQjJBiIFngT3A7kDBwQMAsD/Qf9i/ab7Efz0+/f7Uv2g/f38FP33/Iv8Ovya+4P6/vg1+H/4EviF+IL6IfrD+Pf4s/fO9kv52fvc/YgBJwSYBBIGXQdUBuAGkglXCx0O1xAdDxAMpAqcCCYHWQfBBRQETwSdAkf+/Pkq9QjxGfFX85v0CPeQ+tD6FvtX/Wv8P/tz/w4CIwF9A2YFPQPJAsoDnQIhApAB9f7f/Nr6YflL+wr8AfoX+/78QPvr+u/9zgD+A+YHegpiC6oK0AldCn4KNgoHDGsOWQ5ZDCgJOQTb/hT8rftH+1v6y/na+MP1NPL48DXxs/EZ9J/3ffkc+sn6h/u++1P7XP1eAtsEKwQ7BW4FSQHQ/mn/1/zX+uP+OAHg/Z38Dv47/S79AwDXAZACswNbAyED8ANwA/0DOAaKBp8GSAi/B54EcAKXAZgAv/+V/2YA9gGkAncBOQBg/zP+MP+/Aa8BxQHgBG8FiAOGBEQFVQOyA+AFowU+BcsF/gN5AOn9l/s9+eb3WPcT90L2ivO372bsXOku6Fzquuy47131qvi3+DD7sv5GAGwCQAaECqIOfBKEFYgWbRW9EwISRBAODgMMHQtSCWUFmgEA/iL5yvX89Gr0w/Sq9k33gfaO9nn3pPkD/QgA/ANRCcoMXg2oDT4Oog68D5sQcw9kDSEM7QnFBtIDZv+E+fX0UvDA6jnnJeTj30be1d7S3d7fb+dp7FDt5fGZ+Lv8AAKCCG4MkxHLGKobuxvWHPYb5RmGGlkZ6xPxD1wNxwetAaj9QvlJ9ZH0ivRy8inwK+9/7tnuz/AR84P2/PtiAB8D8wYtCocLwg4OE30UoBVVFwUWURPbERAPdwrkBdcAffzt+KjyXOz96PTjYN723M/aGtf12n3jPebv5iXsJvJ393v9vQIFCasQShZlGy0gmiDyH7EiuiNjIbggoR+TGuQUmw+OCIgB+/xM+RH1L/HM7cbqM+gF5g/lu+Vj54fqPe/R89b33vyGArIH+Qu7D3ETdBZ3GGEZeBkSGIwW8BS/EHgKKwX9/3H50PPW7m/pd+XN4avcMtka19TU8den4BrlA+Yq7Ar0a/cm/PoEIQyMEpcb7SK7JbImlydMKE8oVyd7JVIiOB3dFpQPZgfi/576T/Zb8WLsdej15GHi/OEn4sLhouPO59zrCfFC98T8ZAKHCIkNxREnFgoaJByAHPMcqx1kHCsZaBV6EL4KNwUQ/9n3XfCb6GDiSd7g2fPVY9Sa0jrQ9NEu2Gbfx+U36r/vqfm1AqcIrhHPG80hZii6Lwcxfi/zL/UuYixEK5wnWB9nF9QPYwWk+1X1Xe+G6fPkG+Cc26PZ8tjf2HTaGt1I4RDoI+/79IP70QJgCjsT7xowHxojFCglK0gr1ilmJw0kJR8DGXgSpApmAbL4V/AQ5wfeF9bWz+/LiMglxbnEkMWBxoLOcdyo4+vlEu8Z+4UCcwo9FfgedyhCMoM4cDqTOT83HzUAM14vGSooI+cahRIICa399/Ob7UPn6uAd3WPZE9SU0SfTtdRH1g3bW+L/6fHxEvoCAo0KtBPyG5QiEii+LB4wPzEyMNUtqyqxJlUhbRnrD5oGdv2h84fpUOC/2FXT584UytXF8sOhw/PDDMaBy77V5+H56Bntt/bOAr8L5BUwIZYoYS9DN0U6bDh2NtQ0XzNyMUYsOSQ4HAYUaQprAP/2K+8+6UvjNNzu1V3RW858zk/RQNSi2MjfBecA7oH2e/9uCOAS7xxzJP4qxDBgNB82AzbMM7EwIC2UJzMgexcFDcICzvqa8mDoN9/n10bQkclCxXDBob45v7XBIMRlydnTyd845x3r7PMkAt0MFRWqIAQrCzD7NPg5ljmONsw1bzW0MnAu+SfhHn0VuwuzACP3f+8755vfhtlq04fOJc1wzbTOgdLN15PdP+XF7QL2wP/rCRUTJR3mJpQsXzDwNIo3ljakNFIysC4AKkgjcRkfDyQGXP159H3rYeJY23TWKc89xmLC1sFHv16/SMSWx67LCtgf5nrsMPFm+3AHLxJzHDclTSzrMs03+znzOQA4ezXEM68wFyrhIUoZKQ/yBNT7IPLd6HnilNyp1b7QFM7uy5vMDtFx1ejZOeGb6RXx0PmMA44MsxZeIfQoJC7XMmU1NDViNI8zsjEdLpAofyEXGewOqwSW/NP0wut947vc6tX/zjvJYcXBw8HDsMSmxuPJQ87B1PnfQu0o9Nr3eQNcErEZdB8AKeUuzTBnNFk19TAXLtstjSrdJPUfBxm+Di8E5/rO8jjsNua34J/ck9go1DvSQNNV1bXZuODT59Dun/Zm/ogGLQ9jFqMcKyQGK8Mu6TDsMd8wkC4vKyYmkiBNGwEVgg36BJj7Q/NR7U/nsd8i2VXVndIA0O/NIM1Rzs3QO9On1hDcieBa5qHzXQBXAmsFBhKFGkcbox7tI9YlkicJKB4k9SClH7Abphd6FMwNwwY+Ay398/Py7k/sL+go5WLi6d4430XiEOMB5QHrcvD49Dn7SgF6B+MOZBSOF1wc8CEKJUkmwSZcJdsiCCGfHhga+xRyELULWga//2j4avM68NXrNOe648DgMt8o397ept4T34TfdeB84sXlZ+oG7pTxUflA/4H9jv4dBskIjAdwCn8NkQ05D44RPBFPD0YNgQxsDdUMdQmnBxwI2AZWBGgCu/7X+oX62PkE90T3pviJ91b4bPvg+9T7U/9rAvkDPAcdC74NvhBuEioRJxENE5MSORHnEUURJA41DNQKFgfUA3ACBQCS/cj8JfvL+LT3dPX/8ofzb/Ke7WjtePH48E/tHusf6SPpOOvM6cfnLuoL7MTrKO0Y7UXs3vAz90r52Pod/+wCNwZ5Ck8NLA+7EtQVBhe4F9kWjxSUE/IStRAuDq0L8Qj4BnIEjQAf/pz95fxS/U7/PQDr/9j/kwCZAlwEDgTGBBoIQgpiCgwLLAwRDYwOkw/fDogNxQxmDKULwwn1Bo0EIwPzAV4A7f2p+mX43PdS9VDvA+tr6mnooeMA4ADeI9wb27XaBNoZ2gDbZdwj3wfihOSE6efwhfYy+wACnwjQDUwTmhe7GRocCx44Hk4f/yDIH0MdnhvTF4cTvRKaEVEN8QnQCNsGPQOwALoA+wBiAO0A+AFMAYkA5AEvBMEFAQd1CDEK0AvODE0NfA2KDcANoQ0iDO0JtQjUB0oF3QEq/0b8fvhh9abyTu8J7THsKeqn5jTk3OKr4Afe79yu3aDeHN934Gnit+NQ5f/o0e1h8XT0+fkNALIC/gMkB+8J4AtQD3ERChFIElMUYhP1EG4P5Q36DDoNswtECXwJTQr+CPgHTwg6CFQICArAC1EMVQ53EVASeRIyFI4V9xVDFvcVXBVKFUkU7hEmEPgNFgosBtACAP/2+4T6kvhV9XTyAPBA7fjqmOmy6DboZOg06IDnu+dt6KznQOdC6R/r5eom64zswO2q74nxmvFr8l71a/fd90j5Lfty/EH+Yv9p/q79zv7S/1kAigCn/zwAfQKZAp8BmgNjBq0Hogl3C4QMqA+eE1gVthdOG7kcCh47IP8flB9iIZQhUx89Hg4doxm5FWsSsg7jCicIYAWjAUn+lPuP+ND1WvPA8GbvRO9B7tTssuze7AXstuub7GLt0O2I7jLv5u888TLyRPJw8sryEfMl9Ef13/SC9AP1/vQZ9Zj1hfR384j0fvSU8l3y6/KO8tXz2vUy9vD27/hR+gv8Yf6X/w8CwwbjCWkLsA4GEi0UeRdaGoAbbx2PH5gfBiBzIbMgUB/jHlAdABsuGVsWPhNcEdwOrAvRCPQECwFg/9H99fq++N72SPQh80fzwvFy7xXv+e/37wnvkO4e767v/O/p8MDxv/GU8jz0r/Rc9Lf04vRb9E70SfT281n0+vTL9Jz04fSg9BH0A/R69FL1/vUf9n/2X/cl+EL5C/vM/KX+SAGpAyoF+wZFCXULAQ6AECYSnhPCFa0Xuhi5GcEaOhs5G5gaERnNF80X6xfGFpgU1hEaD/gMmQoRBzIEQAMGAgv/OvxR+gH4uvWA9G/z8fGD8Szy8fGU8M/vSPDe8ArxrvHU8rTzefQt9SX12PRO9S72h/an9vz26PZ39lf2Mfa19WH1G/W29Lf0ePSU84TzSfTA9Jr1HPd5+Aj6vvsW/eD+SwFxAwEGKwnUC/wN/A+zEasTAxZxFxQYWBlKGpUZbBnyGXEYkBbuFvsW9hSBEscPxgzJCj0JIgfMBE0CDwCr/gT9RPo++Kz3CfcM9i/18fNA8yb06vSh9O/0q/XP9Rz2bPYG9gD25vY89+r20/aL9iv2PPYB9iv1v/St9HT0bPQv9B7zivJG8wT0efQ19er1n/bZ9xP55vkP+wP9o/9ZAnwE/gWfB3oJDwuPDGEOIRCQEZ4SJRNmE6ATZRPDElsSFhKLEdwQaxCsD/sN/gtyCi8JJAhsB98GGAZlBGkCiwFWAXAAm/9t/97+1P0q/Wv8gPvl+m/65vk7+Xz46veJ9wL3Yfb+9Xr1fPSP8x7zn/L88dHxvvF78WjxdPFZ8afxLvJF8kvy8fI/9On1r/ck+cv6pPwE/kT/sgAbAuUDFgboBxgJDwr/CsoLOwx/DNAMKA2HDdMNjw3oDF8M/QunC0MLpAr8Ce4JPwrjCSIJ+Ag0CcEI5QdkBwkHdwYMBogFpgThA1ADaAJ4AQwBpgDG//7+U/4u/er78frY+av49/c+9/31oPR383fypvEp8bHwIfC+75bvge+v7zrwB/Ei8nDzt/Ta9fH2a/g9+sD7Ff2N/hoARgEzAhgDtQMqBB0FLAZ7BoEGCgd+B4oH3gc7CCAIIQh6CFoICQhCCMUIzQiYCKII1Ai6CK0IHQleCfoItAioCCUIeQdhB4EHTQfOBiQGZgW0BPID7QLbAQ0BMQDo/nP9GPzF+of5Wfj/9s31EPVj9J/z//Jt8s7xofH98TXyL/KO8kDzs/M+9Dv1YPZe94f42vnV+nD7Mvxq/X7+Ov8wADkB6wGBAukCJwOkAzIEbQSzBF8F+AURBvMF4AX+BVAGkAZ0BnEG4gY/BwUHsAb2BnoHqwe6B9wHywewB70H1gfAB30HNgfaBh8GJwVsBOgDMAM2AjMBLABF/3D+ef2Y/P/7Lfsa+nL56fgP+Ff39fZ69tz1hfV19Xr1gPWO9cP1//X29en1VfYV98D3Zfg1+fz5jPoR+9j7hfz4/Nf9/P6V/7v/PgANAY4B4wGEAjIDpAMEBGgEiASuBEUF4gU2Bn4G1AYBB/YG2wbmBh4HfAfaB+oHgAfqBqsGrQZtBuMFrwWzBVUFowQBBHMDHAMHA60C1gE3AQABcwB2/6v+N/6m/bT8yfs2+5z6y/k6+fT4evjf9633ifcG99b2OveE95H38vd0+Jv40vh1+SL6lvoC+6X7jvxF/aH9FP7g/n7/2/9sABMBbwGxAQgCQwJ1At8CYAOmA80DCwRFBFwEawSgBN4EIQVhBXMFOAUZBUgFUwUUBRoFeAWkBYIFbAVjBUEFKwUhBfwEpARKBAsEvQMeA2EC3QFaAYwAlP+9/vX9Lv1f/HL7l/o7+iH6nvn1+On4CPnC+JP43PgM+SP5ifn9+R/6XvoP+437t/sO/H78sfzG/Bz9tP0R/pP+Ef9l/4X/p//8/2oAtADfACEBRQF1AdQBHwIjAlQCyAL+Au8CJAOJA/YDbwTGBNsEIgWTBcsF3gUcBnMGqgayBpkGcAY/BusFgwVKBekEHQRsA+wCFgIVAXYAyP/I/gz+jv3e/Cf8uftv+0f7QPsI+7j6wPru+tz62voh+1b7dPuX+6X7tfvx+y78efzq/Eb9Yf2E/d79FP4T/jr+i/6a/mz+ef6e/oD+Zf6D/p7+mv6t/vj+Of9Y/7D/YgD+ADYBcAHiAYICDANLA70DqQRwBagFvQX6BRIGEwY6BkgGJQYCBsgFZAUKBaIE/ANaA/ICcALHAScBiwAKAJn/Cv+L/lL+Df6b/Uz9Mv0C/bH8mPy2/Kf8d/xz/JL8nPx+/Hb8ofzR/Nj85PwL/RX9JP1r/Zz9ff1s/b/9C/4U/v/9B/5B/nP+ef6J/tL+Jv8r/xP/Ov+B/8D/7/82AKcAFwFeAaQBEQJjAogC1QJUA64D1QMFBEIEYQRQBCcECgQBBNwDogNsAzMD1gJzAjQC/AGwAWcBNAH0AJkAWQBJADMAAQDe/8f/kf89/w3/Fv8M/+D+xv6y/oL+R/4l/hT+C/4p/kf+Pv4X/v39+v0E/v792/3E/bT9gf1F/Tj9Nf0n/Sr9If31/Nz88Pwm/XP9uP3u/Uf+yf5B/6r/DwBpAOEAbgHjAUECpgLzAhYDOgNQAzkDLQNTA3UDSgP1AqsCfwJjAjoCAgLdAcEBiAFMARQB3AC0AKoAngB5AFoAPwAvABoACgAMABYACgD1/+z/9f/9//z/+P8JABAA///i/8//uf+d/3b/U/8w//3+yf6O/lP+Ff7O/aH9Xv0V/Qb92PzK/Mf83fz0/Bf9R/1+/c79KP6Y/g//i////3MA1AAlAW8BvgEAAi8CSwJdAlQCRgI0AhgC7wHPAa8BgQFPAS4BEgHvAMEAlwB1AFoARgA8ADYANgAzACQAEAANABcALQBHAF4AeQCTAKQAqwC9ANsA+wAKARMBGwEWAQEB8ADcALwAjgBVABgAz/92/yD/0/6O/kj+Df7W/Zr9X/02/S/9Pf1M/XH9tv33/ST+Sf6I/uT+Tf+2/wwATQCGALEA1gD1ACEBWQF/AXUBNQH7AOkA5wDbAMsAzQDKAKgAaAAxACYAKQAuAC8AJQACANf/sv+S/37/hf+i/7T/uf+t/7H/w//l/xIASgCKALkA0ADdAPEAFwEyAUUBWQFhAU0BJAH8ANMAowBvAEEABgDG/4r/Tv8F/8P+mf6C/mj+Uf5J/lP+YP5s/ob+rP7P/vz+MP9f/47/sf/d/wgAKQBGAGYAhwCXAJ4AoQCoAKgApwCkAJ4AlgCPAH8AbQBqAGYAYwBbAFEAQAAhAAkA+f/o/9X/xP+6/7L/pv+i/6j/tf/E/93//v8XACoAQQBeAH8AmwC0AMcA0ADTAMwAvgCxAKIAigBkAD8AFgDq/77/nv+I/3X/Yf9G/zD/Hv8R/wr/Cf8T/yT/Mv88/0j/TP9c/3T/kv+t/8T/1v/p//r//f8HABwAKgAtAC0ANQAyACgAOQA+ADUAUAB2AIAAiQCMAHMAewCCADsAGADc/4v/8/9I/7n+mgCvAa0Ak/8B/8H+SP9V/7j/WABSALUAagAOAGcAmACJAI8A0gCXAFIAjwBRAPv/RgBCAPr/FwAkAAEACwD+////bACIAEIAHQAOABIAFwA2ACQAwf/a/87/vP93/3z/Yv8R/zr/AP8V/zv/B/8v/3L/P/8d/0v/Wv9B/27/s/+6/7z/4//M/5b/x//0/+L/7f8wAB4AHQAhAP3/KAAsAAcANwBXADkAUQCDAFEAXADAAH8AZQDDAMYArgC1AI4AcACDAJQAdQBjAHIAVgAiADkAPQDk/wEAEADv/+H/+P8GAK7/zv/2/8f/5f/6/8L/n//f/yAA+v8MACwAJADo//r/CgC/////AQDo//j/8//E/9H/7/+1//v/2f+N/8r/o/+V/4r/ev9Q/+D+DP8P/zH/Y/8+/7X/b/+r/yAApf99ALYAAQCyAHcA5wDcAG0AaQFYAf4BiwH5AFkCPQHcAC0C5gB//6YADABM/8cAcf/p/lH/T/8l/27+OP9I/7D+jwDX/nv/pgAsADoBp/48AXX/gP9BAer/cwBl/loAKf/D/fP/0P7Z/hcA2ADaACIAIwCn//3/Af+//k4BBf57/q8AXv5sAUQAOv9hAAH/8/71/gUAm/9qAH4Aof8MAg8ArAG1AhD/qgJwAh//+AIXAfL+YwCi/8sATv+q//b+rv74ACX+wP9FAZz/4v9MAbIAeQAaAt4AQwF+AaYA7AGjALP/ggHV/pP+AQHd/sb+rACy/XL+RAEd/Qb/S//V/vQAav8U/zkBpAAy/yMBBP/hAFUAD//NANEAnwBs/ub/HwBE//n/fv40/xf/OP+b/hf+UADs/cz/l/6z/7wAB/5RApb9ngHBACn/zQF/AHcBX/3qAsr/uP9CBAf/vQHEAUgAWwCnAdEAGADeAb4AnAA1AZUBxwCx/y7/7v/8ALX+4f+cAQD8nwApAPz7VARu/Pj+uwEo/QUDA/4TAUQB5v3X/zj/QQEfAYkAeQBNAOkAmv8X/TECY/8a/WYEGv2W/lsCfPnSAMP/Mv0XAJ39IABoAEz/cAACABr+YQGTADH+yQH2AL395gGcAJr+dQC1/1z/cP6IADwBGv/mANcCwf4dAZwDmP/w/noBBwH9/4f/EwBmAgX+lQKnAb/+IQDV/7ABTP3dAV8B6f1oAXH93/99/7j/VAF7/DkCNv9U/fwCE/7t/iUA0/60/e0AMwFF/UYC8f4RASYBdv0LAGwA1AAr/8UAvQGK/kv/KgC5/l3+FP/w/8z+7/+qAEEA9f7jAnkBkP2QAigA5gC4/5EAaQFy/3kApf+l/+gBGAA1ACkB5f6eAUcA9P+NACgA9wDv/qX/3wAU/owAYQFf/IABOgLA/NMBUgDs/vcB/P0lAZP9JP75AST9wf4cAFUBjgBV/r3/4//T/gT/+P7R/nX/xgDZ/on//f82//oAv/1MAZb/7/4jAuD+g/0qAtMBDP6eAPj9jgBiAEn/mgChAYgBtfwv/4cCPv/e/xIAJP0uAfcBaf7OAcwAYP78AkgATP6GA8sB1QC8Asv9n/9MA8L/cwH2/+P/VwC2AJL/SgDjAJv8/f9m/Q//EP4n/jsCCfwKAG4AU/8NAv//TwG5/2cAtAFK/kQByQAc/+D/4f7RAOT9sP9nARn+pwA3/3f/zv8Y/nH/ZQAjAP/8oAGUAS3/ywCo/nYBJwAOAf0BSP8rAngCogAoARYCmv///mAB//1b/e7/8v68/SL/7v+Z/8H/h/4IAMUB7gAJAIz/hwPjAUX94wBpA+kA9v+ZAOUAkwEgAKcAjvyw/DACk/3a/VD/PP7z/1H77/y9AC789AEA///7agLc/b8AngH//ZgAyP6BANYA0f/fAWUARgFGAdn+n/6AAXAAXv+xAdL+0P0RARwDI//l/zYC7v0Y/3X/ewF0/23+1QTT/nX8egQLAyf/hQMpA5j+zAGSBJIAJ//tAhT+vP2aAMoAb/4I/JQCOP7e+4QB8/yw/EwB/v56/oz/YAL1AfD+MAJhAcgB3gIcAMr/mQCmAqwA0v1WAGv+oP6S/cP81P/8+5v9hP8S+6T+hgDD/A0Chv/d/hoEbv0nAUQDYv54A9UADwD5AhoA/ABfATcAiv3//2T/D/9pAez8YgDt/8b97P+A/qT/nAC9/yMB6AD7/14B5wLxACgA3ALaADoBqgBwAfIAdgCkASn+IAAgADv+4QAn/y7/Qf8Z/z8B8v1bAOb/Mv8QACv+WQHr/W7/iQBf/nIAOP8hANL/igECARf9SgAbAEz/HwEdAOT+Yf9pAFn//P9LAWYAmf/E/3H/lv6zALYBq/95/5oAsQBr/08A9ADKAHwBggDmAAoAcAEHA3kAxgAMABMAXQALAloBQf78Acv+w/3a/+H+jf63/DL/Bf0v/Yj/H/9j/bn8iv8V/JP9ZAFR/tz9y/1G/+f+CP4+AFz+hv4Z/4b+Uv/4/zoBif7Y/w0AJAGrAg4DOwViAjwEagX1A94GzAYyBzYGMQbjBhkEfQWABC4DNwLZAL4AcP9WATAADv8HALb/wv3B/a3+F/xa/cr8E/wi/cD6S/3h/Iv6hvzD+a/5efo9+F/4rvfj9iT2O/Ww9av2SPaX98z3vfi6/0UB6QHfBOMGAgvaC+MJfAsaDbsNxw9FDHsJBQ7+C30HygXJAUICPgNzAHP7Gviz+Sj4rfOg9q/3lvT98zvx1PGI94/9y/sk+bv+CgKYA84JuA3HDFoMYwkkB7UMZhIyEXwMbQfoBvUJjwkTCYkG9wDH/377g/cD+1P75vcz9C7xWvG38x/1lfTT86Lz7vWP9xD1BfVe9pf0YvXV9pj3RPvh/iH9AvxdAJkD5wjnC6YKlAyUDlwQ4xD2DxIQoQ/oDLQIWAOFASEDwv5a+ir47vb39UzzG/Vn9Cf1ffk098b26fo1/0ED5AYCCzcNMg/aEAwSGxRMF0QXpBJ/D8QMsQjABVoDEABv/L34zfRb7+ftNO7c6zfrN+oQ6Vrojeg36iTqxuoF7uTsvOpO9Nr/MwNbBocDdQI9DrYUtxGLE2UYzBUbEr4R5Q8XDx8QygwRAuX7a/3K+EbzLfMv8HbrZup168Ds4PDy9GP38Pnm/3sGvArKEBEVhxh1HH8dnB/8IrAiDB92GdsTHRARDE0HHAFT+uP2oPLW66bqU+qj5y/nFuan5WTnz+lU6zLqFuyM7oDslO2F8Lrw+fTEAfoJ0APRAOwGGgllDeATbxKZEBYSmxBuDEMOXBKaDNUFqwOd/nX6lPrG+J30ifFV7/bsYu2F8cDx4vNU+bP7DAA3B5cMAhAwFN8XeBukHFUctB1pHRIbihehEqUO+gwECawCUv0N+iz3KvQ+8WXtQOtV6unnMeZg5xbnbuME477kxuJG4jblbed06WXuOfy4CfUHFQKqB6EVQxwUGhkcriFHITIbyhjHGHYWfBXxD3ICEPxw/sD4T+6n69jpNuP14Cjil+Fn5enpaexf8CL2If+CB1wMeRC+Fi8cRiBeI58kUCaAJRohSx1zG9sZrRWZDIUDDgAP/i75m/Ol7yjsIuWf4EfiS+HA4ePi/NyJ2xHjLee25IDmqOvr69ryLAV3DsMJUwnlEn4cvh55HvsfPSPmIogahhZQFSsRUg6JBdz4gvTD9Qjwz+W+4SLis98J22nb796y4lbn/eoe75D38QEECLkLXRJdGqQeeiKtJjcoZSn2KNYleyHBHu0cUxdTDgEHtwMaAY/7pPWo8d/sT+dj46/i5eIS43biWt8T32Lj0eas5c/lfOtM7hvxnf9rDrcOJgsLERobKx96H6YgCiLYH10Z/hSNEQEPcgwhAzX3UfGm8Wzvlebo3wTgTN7J2mDbYN534QTl2+kC7oH0If9hByYKBw9yFzMf4iQUJzYn3CgdLOYqjiXFId0fzRsiFAcM3waKAmr8avVM8Dzu2+rk5Y/io+Co4IPiMeMI4qHh1eNc6MnruuzF8EP2cPdC+uEHDRUsFEwRVBZDHIYcVRyzHq8c7BfvEx4Okwk9CEMFnv258/TtS+3G6WHj1N/d3f3cSt2e3XXfVuQL6iruR/Jd+TkCmgouEAYUSxsSI5ImbCeXKIIqICwBK/okfx4LHL8YnBCeCLgDvP4B+CzxXeyD6QHomOWM4IrdgN4U4DniiOQW5bjmB+wa7wHvp/Kz+cP8HP0zB2IWRxh9EtUUvBuSHRUdwh42HR0YhxN+D7wKywZPA3X7TvH866HrzejE4DfbfdsM2zvagt1U4aTjbOgA8Fz3v/7LB2sPBRNrGCwiXSn1K78suyw3LOwrNSvyJloegBa6EfMLUwRn/Zj2ve9d6xfoe+T94oHhYN7H3bHf9eBT41nn6el+7HHymffR+Lr6J/8/AML9HgVQF+AdDhNWDvUVQhq9GmgbvBYFEVcPkg1wCTAEIQER/djytejb5yLqIOXi3PDZptu93X/gleRW59/pVvE++jUAvAeYEC0VexhiIAgpwCsGK9grDyz/KXEoRyakHnsULA5BCoEEgvyx817soucS5dvjKeFk3q/eHeBX4abjp+ZA6kzwcfVr9yj7Iv88/z4BVwULA5cCsw95G3UWFQ2oD/4XABh0Eh0SrRE/C7IHYwb0/3z7k/sQ9YfqR+dQ6CDm8eD+3Q3f8N984b/mGOsb7cXy4voXAO0GehDsFCoWdh3hJ8or7imXKWwryyrRJ/Yk2R+0FkEOXQnyA2L8OPVl7gDoT+Q046fi/+G/4ariEeQo5q/q7+9J84r1aPiX/QEEeQX6AdcBqQNnADkBPxD6GR0OTQNdCZYOHQq+B5MJZAiqBccD6ACZ/j//TP9t+N3u6u7+9MrxCOhY5WDpguv96tTrp+4Y8rz1ivne/1oI1Q2ODp4Qnhl6Ix0lbiKyIlMjkSJTI8chRhlZDzQKkgdrAjr71PSb7jzpD+i76NHnhOj36XDph+qx7Rfx5vVZ+EX3MPvRAdcADf7ZAFQC0f0e+J38yQqYDcwAxPyfBHsGdAXQCHAHRgPCBX8IzwU+BOMFkQPZ++33afvy/GX2N+5K6xXsBO5V74Dtf+uO7t3ziPfI/BkEHAjQCAMObxniIKQgxiCnI40kUCTAJDUikBomEtwMXwkLBe7+6/a27ufqJexs7IPo/OVG6PXqvOtY7KHt0/BC9Vb2wPUN+Df5o/gU+6v61PSl9+AFdQzFBMX/CwaiDP4KRwntDJQNdwoyC1UMPAqvCTcIEwFe+7H7G/th9jbwPuuQ6T/pH+is55bnwuhu7DHwBvWX/QkE7ASICbMWsSGIIyIj5CXpKPcpiyqmKPcgJxdREWoOqQnQAo/6PvFk6ynrsutr6WPm/eSL5dPmAuii6sLtR+4G7u/wK/YN+r36KPrl+f33BPkzBU8S0Q0NAj8DwAriDX8QvRA0DA4KWgsVC7sJFQknB0UBLfol+Az6yfd48OrqKOlQ6DLou+le6rDpnOt18OD1dP2IBTkIiAnuEv0f1yRWI0glGykuKYsoQCiNIjIYqRBUDQMK5gN9+xrz8OzC6l7rD+rH5p3lJeZI5pLmZedl6oTvwvCU7lzxAPc9+Ln4Tfvp+Sb4pAE6EF4RzwXXAVIKDRDRDtoOQA+8DHELCgyCCnAHbwanBNb90vYT9vz2+vK/7AXoveXL5vLoNOjo5+XrQvBz85z6jwMVB1EJYBIOHrIj4yQMJ9Up8Cr/KhUqYiWHHPEUDxHBDD0G/f6D9obuVesT6yjpWObN40vh3eD34izlyeYR6AzpAusJ8HH2x/jw95H6XP30+2cBAhEpF4cMTgW1DIQVLRW4EVQQUw6bDIUMcgkHBP4CkwLo+oby9/ER823ub+cu44bjNOaW5irl9Obf60Pw4vTf/CAGPQtcDU0UECHgKeYqxSpcLAwt8iyALLsnJx1SE0IOigqHBEf8ovPY7C7poOfb5YDj0uFi4cbh7+FF4uHlxOtG7Uzrje709Y/5rPvO/0n/b/yjBUYW8xcOC1YGVg68E3gRaA9qDhAL1Qi2CFMG+gK8AWn+Svf98YzxLPKp7vDnAuQI5Z/nc+ja57fpF++a8/X2Zf6YBzsLcA0mF8Yj1CjMJ3go6SrVK6krPCpSJHIZLhDlDI8KgARn+8/w2Oh453jooeZ3477god8M4WvieuMl5/Hqi+uN7AbzYvvR/cP8cv+UADT+RQeXGW8bqwxNB6sPphUjFMIQhg1jCoYITgf8BBcCigAc/TD1Vu+u8Ljx/Ovk5K3ikuSb55vprelT6izuivNc+U8CGAtbDQ8OPhcdJT4rYimZKKsq3yskLF8rTyUZGlMRTQ1ACegC//r/8STqseb75uTlPeId4Hfg2OBO4fLiNOZn6nLs1ezV8Qn6o/yq+wMA+QOB/yQA3RDiG2AQSgWRDHEUQhElDyMQrgtYBnkGQAavAvYA0f+3+DDvMO0z8bHww+jr4Ljg3+W/6LLo0uq/7mjyifcZAMYJDw+9EPwW8yKbKm8qvymXK4osGyzsKrglsxtlEvIMDgnQA5L7PfGP6ZXmc+U75aLk0eD43KvdB+Gk5FHoFerS6dbr8fOG/Ur/PvxJAPMEEAH+BFkXXh0bDlMFRw5RFkcUFxDpDIAJuQhqCTkGQQG6/5P9hvaX75HuU/AH7eLke+FM5V3nSOaS6Ffsqu4A8/P4rwCLCk0PtA41FY4jKit/KYkowyp5LGIsdypdJZAcmRPxDeoJIgMf+q7ywuzf5wTmPuTb37neGOE+4BzfZ+GJ5Kvpzu0T7Jnu2Ph4/B775gBQA9T9cgVFGVkcBQ0UBj8P+BcsFXkOAgxgCjAJiwmPBXP/7f8GAIb2L+1v7r7yF+8A5nnhZeTh6FrqdOl06mfv5/Tw+PH/RgmTDbQO+xViIvYoxiddJ8Mq+ixNLDQqiCSxGoUSrA4RC8sE5vyx89fqhOaf5rnmOeTG31zcitxj32Djoedf6XToVupO8mf7vf4x/dX+jgO/Ai0CdBGTIRgXugSaCVUWuxV4EkMRmwojBkgI8waXAkoCfgHc+UTwpe7d89vyMeiq4KLjNOgq6B7naudC6nDw1fWO+1cFLwx7DIgRJSDxKiMrqSkIK/wrUy3YLmwqCR/mFMkP8QspBqD+Ifbg7F7lJOP044XiEuAV35LdadwQ3wnlfOpG66Hp8+6k+S79y/siAdgGhgK1/oIKLxqOFkAJYgvPE2gRQg8hEoYN1AWaBjcI+wPPAH0AGvyJ88vw3PVo9o3tjeUk5RzoYuoK667qWOsB7lPyx/nDAtMHZAkTDj4ZjySwJ2QlXSYPKkIrxCp5KWEj0hj0EN8NuwokBe38UfIX6mnomela6BPkut+z3tDg3eLc4/bkPOYL6XTuFPWj+e/5UvxPBGAE3PseBEkaWRsDCdEFIRJpFeUP8A6wDnMJcAaYCJEIlATcATH/qvgW9Nv2w/iE8rrpI+f/6ZTrOOr36H7pH+3/8av0CvoWBLAIpAenDmgdAiUeJEskhyYeKI4qZixsJ9wbHxMhEWUPxwi0/z/27uxX6Hfpf+nh5J/fEd2Y3dLgTOS75YTlDOba6bfx+vgn+sT6kwFOAwP7GP5FE5YdDQ8mAmEIwBJUFBcRJgteBGcF6QyKDWgGbAKBAXz8DPf++Eb8Lved7dPooekq7DLtNutu6fzqwe528yL6HwKmBugGjgt+GQ0lGiWpIkkmLyp+KpkrVio5IFkUdRBfEFEMjAM6+EzuourR6wHrN+Wt3yjfDuHe4R/hs+Do4/boGuri64Xzifgn+UP+VgF2+tz7gQ8WG4YOAwM7C6MVdhJnDeYOrA0+CfMJBgt5ByEFBASB/dj12/VZ+hn5mO9J567n1utU7APp4OYM6rLup/CL9ev//QS+Ar8HcxdVIjwhIB+lI/0ovClwKf4ouyKuGMwTuBI8DmMHDgAV9qvtzew57oPqw+Mj34Dej+AJ4mjiDOTc5ezlZujj8ET4DfgT+B3/qwCQ+a8A+BelHAYIMwA8EOIZVxTbD2YNwgh0CeEOKA0sBLL/MgGa/eL05PST+/f2EukC5EfqIO/466PmYufK7EPw8fJa++oEbQVTA00NQB6sJEchCyEdJIol8igBLXEnjhnEEUQTuBNoDagC7vce8cXvRfAz7V3mdOCm307iOeJh35Lha+ee5yHmwOyt9I71Bfdv/db8V/Zc/6gVmRmvBjT/2A0HF0oR7Q5jEE8K1gdXDyEPzwVCBOEF/f3r9Z344/yu91vtjudD6UHtxOxq6H3n5Oql7R3x//gmAFcB1wKFDNQZuR9cHjYfciN4JWgmdCj1JRodKBZ0FF4S5w0SCBf/S/VH8Qby1/Ak7JXmoePp40DkGuQN5r7n+uXQ5dTsEPVY9hT17/my/Q34PvigCxsa0wzm+yEDCBMFFXoPOQycCLgGXQqeDOAIPAV0BBMBb/qD+DP9N/1D8q3nnemP8KLvGOqg6BDrcO688uX4Z/9XAdEAUQgzFm8cfhrNGlEdFB8/I88m5SL1GoYWzxS7Es8PRgsaBKD6RPOd8xb2xfEb6yzo3eXG5E7n1+jg57Lmd+YG6wnygPIb8gf7HQHh+EzzugMtGUUUVP4J/B8NTBXlEIoNDgqpBDoGQw00DvMIQgT0/vX43/jg/Z79FfS16PTlseye8WDuRuqP6srsg/Hu+Z4AeQKDA5YHjhBYG00f4RsVGmkchx/PIvoi9RvHEgEPRw9cDygMRQMj+f30A/YY90f1Qu7B5u/nv+1O7WHpxujj6FzpX+5C9LT1qvYO+6b6r/Rb+0ISQhsvBs3zeADcFUoWXQvPB+UGPwSDBx4OpQzxA7b9cPsO+pb6Jfye+I3vGukR64jw+fAh7ZHrsux/78j2lv/8AUz/wwFjDL0XCxxpGaYVZBZzHR0kTyEoF8gPMg6qDzYQhwu2Aqv7EfmJ+YH60feA8Xbtve3R7bzseexL7H/st+6S8e3zQfYc+TH8Vvwa+Zv8Cg4kG+IN7Peo+qUPqhY3C5kCCgIWASoBjgStBsIE0P90+ab16PYl+9T8DvdI7SPqivB79sL0APF48aD01PleAU8FoQNCBFwLnRNpFy4WzRPKE7MVdhiNGhMW+AqOBA4HmglABm/+dPV18Rr2Uvo+9iLxxvFQ8770J/hp9+PxCvNf/D8BCf40/Kn9G/8sA1oJpQ5IEsQM3/2p+VkILBa6ElkB9/Bf8+ID3Qg+/nz0GvDK8BX4mfuf9YTxI/TT8+3wYfTm+R354PWy9kP8pQPPBtUDdQJICEYRYhUHEpkOjxIBGJ8W8RLVECoLBgYoCCsHI/6H+RD61vTA7dvu6vL48WDvlO9v7sfrXvHA/ooDlPts9tv5lAEgETEd0Q9o+h4EoB9xILsJQQIkDGQNlQbUB/IIq/1s88z2lPrw9E/w5u9X67fnW+9c98byOetj7X70mPd1+Ff7CP+7/73+7wOcDyYToQlWBLwNXxnSGosUnA+vEOcTmRHdC/MKnQulBC/63vch/ED6CPDh6Nfq/O6f743rl+OR4CHsxPuc/MXyqfGk/OEIARAAE68SgRDEEXUWihioG8of5xWXArgEUxiTFLv3We3EACINbPxh50bn4PDI7xfoAueZ6oLqcOmR7bT0cvjk9lrzsPblBCsQCQpw/TcARhAyGBYTMRHlFvcVTAxCCBIPeBVMEeUFn/4T/zkBpf339AbyyveV93rsG+dL7ZHvd+o26Wjs8+6p9NP6O/hz9p4EyhPBD7kGcwzLFyEYnhFWE3cbbxmDDbALLxWdFJAGOf3o/W//Cfwt83XpIukJ8uX1uO3/49Xmd/T6+Zjxhu01988AUAEO/wAAZgMxBrsGRwi7DMkPVwwrBnEH/BBbFRcObAWUBIsK+A4QCXr96fiG+zz+3v249rDqxuWA6wXuvOqB7THxHOi631nsFwLfCaYCa/sTAOANchgDGwAZuRXLF+oc4xjiEpoYvxmaCb79KgGRAnP9yvm18cDnVewx94b0euyD7oXxv+8l9Y8AhgK6+JryqvgBAQsDGAAq+0f5KAE2Ch0FH/pU/XEKzw2gBU8BXQYPDCkMzQlBB6cDdQHQA78HmQiGAz762fUH+l/8+fik9nfzGuxH6wP1Cvku743mhO2f/F8D1v/7+5b7FQJiEuQbdxD6Aq8KoBzFHh4PkgN+BWsLewypCNgDJwEaAED+wPvG+/H95Py39xz1bvjx/SICKwIP/Hn0y/JG9z37Iflm84LxNPMF9If0BPiz/h8FmAWmAi8F0gpyDN0OyBTEE5QK8AeADsMPwgYJAIkAv/9R+nv2+PXk8sTroug47nT0U/RQ8sPzMPgz/ggFTgjwBIEBaAfcEi4WoguJ//H/kggCCjYDEP+e/C/3HPej/YL9rfeo+bn+5PzI+7YBrQf0CaEKwwi8Bt4J6gx3CMkELglaCSH+jvVe+Hz8rflP9NnvFunT4/fnTu7q65voqO0R8y/0E/uNBy0N9g02E7UWLBWmGSMhiRy7EXwRoReME3IGAgABAbj8vfXA9KHx2+ft5fDt9e6G55LmG+4R9VL4Wvnq+Z/9vwe2E6wVBQzvB28R1hhkFhsUaRIHCysG6AgTCekDmP/k+pz19fS19xX5Zvd681/yd/cQ/Yv82/lz/CMDBQa8Atv/CAOVCU4NKgymCEkGEwnvDU4JmPzH9+H7ePsj9PLsIOgR5yvqk+r65YDmb/BT+Kn2uPM9+YcD5gkTDEMNJgxPCzcQvROtDVgJHg5vD50JQghoCskGiwDw/ZL9fP4wAKz+4fo1+cb4Mvn9+978o/nD+FT8NP3S+nz7NgC3BLMEWf8e/Gn/HQIQAaMBXAKr/nb64PrI/S4AuwLZBRYGrQF3/Nb8TAUeD5MOGgWWAjoLdRGJDvYINAZcCDYNRAvJAYr88/2P/Mj2mfT5+Mr7yPXo7LHtfPeA/Qf6WvMt8Tf1IPtk+1nzs+2s9Ob9j/xN9/X3ifn3+SYAtQiKC5QLtw39DnwPjxPCGOUY5hKYC5oJZQ0FDz8J+P9g+ZH5X/7//nn3Lu9078731vzi90LzSfjf/Tv7YffR+cT/jgNjASX7K/hJ+yz/uf8a/sv8bP2s/lX8F/oYADUHWAP2/IYAHgjRC+wMGwy9CicL7AplCWsJ5gc9Avf+Pv8F/D73EvgF+m326fEj8cDx0fNP+LT58vUY9PD40v5U/7n8fP4tBZQJsAYvAZ0BuAeuCvgGCAI2AHsA3wC1AIIAxQDLAO7/Tf+D/6MAhwOYBcAD0gKDBSIFqgENAugDcwKoAa8CQgE4APsC7AOSAP3+yQEXBbUE6AENASoB+/6w/4kHAwskAl360v6zBUEDqvys+eT43/jP+or7X/kZ+Eb6VPz5+un4PPvA/h79R/p+/EH/z/4N/p38r/uaAF8H5AY5AvQBAwbNC4EREhLIDOQJEg1zD4kMNAlVCDAFQ/6V+db52flc9SbvmutT65zsKe5r7iLtoewb7pLvtPBL8wv3fPq1/SkATwEhAgME4wfmDLIOCgsdCKwJCApKBwIHuAi1BnUDCwRfBTUF9wbrCEQHBwVXBTMGPgW7Ao0AmP+J/pv8//qo+e331vba9k72mvV/92L7EP1i/ET9aQDNAjcEOgakB7QH2gjeCo0LSgx+DE8KAwmTCREI5gVMBY0CrP44/tj9gfoW+EL4lvmB+nb5lvjI+9r/OAAn//f/0AHvA1YGJwecBCMB1wCTApsBff6A/OH66vgA+Or3kPit+f74l/am9vj4hvrw+4f9//36/poB4QPGBNADHQJnA+EG0QaXAkz+hf1nAI8Baf3f+Hz4U/rD+4T7n/lD+Aj5a/oC+9P6yvoB/D79evwr/DL/pQIpA5QCKQOvBHwHSQuuDEQKpAhyCwAPpQ30CHkG0gZfB24GFQM6/17+of9w/2L9e/vn+g/7v/ne94n4mvnE98L2vfgY+Rj4L/tbANEBrADfAHUBGgLEBOAH4Qe8BT8ErANVAxsDRwPMAx8DVgE5AWMCsgFNAKMAnAEXAaT/Zf+xAI4BYwD//a78X/2B/sv9gPsm+lb7PP1b/Qv81vt//Xz/0QCoARoClAJ8AzsEEQTJAwwEIwRhAxICxQBVAL8AUQDR/XT72PtB/fT85Pv3+3P8cfy4/Ir97P0u/mf/jAAmAF3/CQCMARMC+QBM/0L+6/11/W78NPtb+pr5rviw9lP0U/XI+Pj4lvdw+Vv70/vK/a7/OwGEBesJ/QmtB0IHZws2EgkTcgwSClIPWxEoDR4KlggIBv8ENwVjAS36Yfc/+5P9bfls9FfzvPR3+AP8u/kI9vv5p/+Y/rr8pP5+/8r/5QEbASP+vf+OAzYDxgAZAdoCWQM+A+UDLQUQBuMF6AQSA4QB2QJ/BA4Bf/tF+pP7v/qj+Kf3+veY+I35LPoo+tP7tf+lAWoALf/2/xwD6wZgCDgHvAVTBbwFwwU2BCMCKQHkAKz/+PxD+v/5H/u0+lT5v/iv+E36vfzn+3T6nf1JAZgAdv65/S7/tAKTBCQDNALeAjwD/wMIBU0FDAb/BggGLQVVBjEHowbPBUoEWgLsASMCJAAV/XH8Tv2p++33T/U29Dr0MfWZ9LHyKfMB9dL0o/TQ9pT5efuv/XAA2gIyBSYIXQrpCZoJUwzGDssNMwwRDKELCgvOCy8M9wk+B2AG6AUCBOIBWQA5/rr7kfoX+if44PVl9eb0avOi8yv1SfXJ9Tr4dvna+L/5kPwY/1IAKAFpAkoDuwP5BCAGVgZaB7AIHgibBvYFjQUABI8C9gJ4A+0B5gDxAdoBRwBbAE4BiQBD/1j/4v/6/9r/Xv+L/VP7t/oe+1b69vgJ+D72/PN386304vVx9qv20fZO+M77Mf9+ABgBcwJCBIkG0wgVCukKswv0C2YMHA1RDF8KLQm6CMYH+wWBBOwE7wWXBLMBvf/4/kL/GQAj/+37Cvpr+hb6YPj29rb1q/Qo9YD2ufbo9Yn1LPdG+m382P1L/4kAiAKNBbsGdQWrBd4HswjzByUHXQYBBhcGWQVqA4wBiQBYANT/hf7F/Rf+8P2T/VD+DP8h/y0AuwH/ASkCRAN9A9wCHgONA50CMwGCACAAbv95/sX9OP1S/Kn71/vd+wb7Q/o2+nX6mvrZ+iL7PPtI+4X7g/sE+8D6xPuL/Wf+Ov7Q/kYASgEDAuECVANqA+0D9QSdBakF4AVPBo0FKwQeBLgEPwQvA/EBkADh/7P/Fv+U/r7+KP9c/+3+6P3y/YP/cwBi/6n9ufzf/FH9lPx8+uj4TfmZ+u36ivpv+9T9nAB2A6cFhAalB7YKCA5SD4IPKxCVEMsPlw7tDfkMywrDBxoE4f+y/C37L/nv9c/yDfC+7X3sRuwd7aXud+8g8CnyDPUq+NT7Vv8TAmwEfgZvCK0K1AwyDgQODwwQCq0JignsB0wFyAI/AFH9evqn+Cv3TPU09ODz0vLl8XHypvPv9MH2pvgf+i/7cPyw/gcBYQKZA04EWgPNAtkDpQSHBAUE7QLwAZgBXQH4AaoD2ASzBC8EGwQVBckGYAesBmYGxga1BggG8QSpA0kDPgPNAZb/9/3d/I78Kf0b/c/7J/uU+wv8wvxL/p//ngA7AhEEWgWuBnYI4AkUCgcJ6QdtB/oGWAYgBqcFygOPAQcAff4U/ZT8Rfyt+7D66vg697X2uPb49uD20PVM9cP1SvWt9Ev2D/hq94b2Rfdj+Hv5QPvE/J79j/+QAkMEGAR9BKIGOAlpCrYJOghjB4cHewdSBvgDewEoALv/af7X+/b5x/nx+Xn5t/gz+LL4rfpA/Hb8aP3l/9QB2wJIBB8F+wT4BQIITwhdB6EHYAi6ByEG7ASRBGYEUgNvAU8ANQCU/2P+mf3Q/Jf7BfvV+jz6XPro+9X8UvwN/Ln8f/2z/uAAsALgAokCQQPSBLMFMQVCBEkElARbA+oABf/I/Xf8Afs7+ZT2LfTL83n0EvRZ8yP0pfXa9tv4u/sO/q0AngURC/kNfQ/kEX8UNhaOFy8YfhdfFgkVqBJvD9ILzwf/A6AAEP0v+dj1CPOJ8P7uPu5t7Tjtju7J8APzYPWm9+H5wPyf/00BdAL1AxQF4gRgA2UBFgDp/rD80PnO9s/zdPJb8pTwQ+5J7xnybPJe8TnyKfUy+Ur+CwNNBfQFGgiPC8EMPwyPDscSjROIEVsQUg9kDQkMHAr1BcICqQJTArz/pv1T/Q79iPzT+2n6PvqD/WUBuwIpA9sEIgfvCN0JIAq+CgMMkAyOC88J3Qe0BcYD6QH8/3r++/xj+tv3l/Zb9Rr0vvMq88rxhfEc8sfxFvJf9B/2sfX39J70WfQ49Vf3TPmJ+mv7gPz4/T7/iQDQAr8FFQixCfAKFwzwDAENbwzUC2MLWgsIC4cJiQc7BR0CVv9g/YL7vfof+5f6Pvmm+Af5WPpL/DX+CgBDAuQExwc3CtgLmA29DwsRUhF6ERwRrg+dDaMLlAlbB/IEFQKm/mT7fvhT9TTy+u+I7nDtZ+yn6rLoFOhH6MDnk+fc6PPq7eya7bDsRO3Y8G70z/YC+fH6xf24AgIHOwgoCe8LQQ+YEf8R0REtE8wUohSgE5QRWQ7DDAEMfgiTBCMDBgEt/cL6APpY+Uf5yfmc+gn8KP3m/RoAWgPaBVYIWgvrDD4NhQ4EEEIQfxAWEc4QkA+yDZ0LuwliB0sEAAJ6AIT+dfxz+sn3l/Vm9OzyRfHY8DXx+PAj8BzvAO5g7aDtc+057B3s6O0B78LuGO9G8BPyZfUM+EH4l/mZ/SYBhQPWBa0HAAlYCm0LxgsqC6wKcwv+C+wKoAniCLkHqwVKA3kBmQCqAGAB4gHiATMCXAPYBE4GzgecCfILnQ7GEA4SERPWE1wTrxEWEOYOew2eC4cJMgeaBAgCh/+F/NL5UvjP9qD0+PLu8anwfO++7irulO0C7ZzsneyQ7FTsuOxM7fvsr+yJ7STv5vD48lb1Mvie++n+bQENA0oE+wX9B5EJvwrFC0EMYgw5DPkK2QhUB7sGMQaBBQ8FsgRFBLIDHAPOAvoCRQNJBHkGiQjHCX4LZg08Dh0PoBDUEe8SvhMTEwwSvhHRD30MvQqjCT8HBgU/A8MAOf4C/Fz57fY99dfzzfOY9KHzOfKc8qTyDPH079bvHvDl8AXxS/AU7yjsUOqO7Obtnuv+65fv//AN8aby5/S094L7FP/IAjEGaAcDCIYKhAxSDJkMgA3JDKgLWQtQCu0Iqwh/CH4HvAYiBg4FGQT6A+oEFgeZCbULug1yD1kQKhFXEtAScxISE74UyxQqEn8PNg6iDPoJYAfeBEYCjgCP/r763PY09K3xW+8f7X7qJ+lo6RDpB+hi567mluYy6HfpEemn6eDrxe1R79LwMfJN9UT5Uvu3/aQB/wP4BYkJZAvmClQLKgy5Cx0LJQpjCEMHEAakA8MBugDE/jL9b/27/Yf9R/6Q/9QA0gIPBTEHgApKDkoRSBRoF3MZnhp+G7YbNBujGkUaLhl2FnUTchEBD84KtwaCAwQAgPx7+d/1H/K/7zLu9Oy1663p5ed8563mFeXQ5L3lYuaQ56nofOdn5pDoTuuW61nrX+2d8VT1dfdv+on+IgEhA68GRQnjCNwI8gqjDLQMLwyZCxgL/AloB/EEhwNtAW3/bf94/1T+Nf5B/wMAAgG/AjIF8gjiDNIPzRIDFigYVhlrGk8b4xtqHEwc5hqhGJEWjBRsEQkNsQgfBfcBhf4r+sD1vPKv8C3uzOsZ6lLokOZC5STkZeNx49/j/OOc4xPjrOMK5t7n7OdE6WbtS/Ed9NL39fsr/5sC2AYOCmcLVQzkDVQPdA9eDjQNFAwiClcHqAR7AlUAEf56/IP7Tvo4+ZT5OvsQ/er+agHTBCgIzArCDXARuBR0F/8Z8RsGHcodiB6kHlYdRRucGakXdhTmELsNWAqpBpACsP32+Aj1ZfFo7grsQek65uDjAeJ34DTfG97w3a7elt5+3q/gYeMy5G/lCOnN7CTwJ/Xp+sH+IgLcBuUKWQzpDHoOTxDHECAQhA9aDrYLYQhJBR8CDP/A/Eb7A/qv+Gn3tPYV93H4b/pY/TQBJgW/CFIM/w+CE4cWMBn4Gzwech9lIAEhFCAZHp8cHhtOGLsUiBFaDqQKRAaQAfv8sfiT9Czxv+5I7Ajp/OUS5MPic+Fr4DbgjeCk4OXgxeJ45XvmAefR6ajtY/DY8/r4Af2R/0QDoAeMCewJcAt3DSMO8Q2YDWAMDgovBygEUgHc/lf87fk2+Iv2Y/Tt8grzXPSx9gT66f3JAV8FBQkoDWoRIhXMGLkczh+MIRgjNCRwI6chdyAmH4ccGBmfFbcRMw1SCD0DNf5l+Tr1BfLH7s3qYuco5UzjtOG/4C7gHeCA4Jvgi+Fw5A3n3Ofh6Ubuu/Ee9En4+vyt/w4C2AXpCOMJPQpJC8wMQA0oDKoKHgl7Bj8DjQAZ/jf7Tfj+9TT0SvIp8B/vBfAA8m708/do/GoA7wN1CM4NiRKmFt4awh59IW0jLiUeJqMliiRkI5shph68GpsWaxKqDXYIoAPs/vX5nfUz8m/uVeoO54TkeeJP4e7gKOHQ4dHhBeK45BDoyei46QnuPPKH9ET4MP0jAE0CjAViCM0JUAqdCqELfAyTC/kJnggOBkcCGv84/Nr4Fvbw84nxqu8R7hXs0esJ7lXw1fI69+H7ov9dBFcKwQ92FOkYsRzDHy4iwiMFJbIlHSUfJB4jiCBSHEEYRhRsD58KkwYrAjD9qfjH9Onw7Ox36VPnQuYy5U3kg+Tp5DHkYuQI6PPrWux17U/yj/WA9iH6Cv9BAYACUwUOCKYI6wf1B1oJ2QlSCO0G7QXsArD+5fvP+fb2m/RA87nxoe9+7Ujs+OzC7uvwffRq+bL9BQEABakJHA6gEoMX0BvrHjMh6iJ+I5AiBSGwHxge2RsOGUcVhhAIDNIH7QLz/dz5lfbo8/3wV+2k6qTpE+jy5SnmuedW51znsOva8ALyX/Lo9Tn5Uvq//EIBOgQKBasGTAmZCXkHywYDCHoHUAVOBDMD8P9V/OT5f/fo9Lby0fBt73TuAO3k6+3sPe8N8aTz6Pft+0z/0gM9CfkNKRInFnwZ0RsjHQgeLx9wH+4djhzIG2oZOhU8EYQNrAkrBucCQf+L+xv49fQF8g3vrezf63vrWOpU6tXr8Otk6w7vHfXh9h32KPmx/RP/fACyBJcH2ActCUULhwoTCPMGrwbMBXME9gLDALX9Qvro9gD0vfEs8BnvU+7P7RrtsuzR7W/wJfMD9vH5bv55AkkGPwpEDjgSmBXuF4EZeRqmGmIatRnsF4oVrRNMETkN4QhcBQYC3f4//EL5w/XM8mnwMO7F7EvsT+xB7QXvAPA78ATzOPnU/bL+AAFFBtUIXwhHCoMNFQ7+DUMPLg92DBgJwwYzBewCif/H/Nb6V/eZ8kvvd+2m60HqBuqb6lXr8Oti7bvwhPR895v7PQFOBU0IIA0KEksUBhaXGMoZNBmNGNkX4RWhEncPrAysCI8Dav87/Fv4VvSj8brvWe2x6lHpTOlN6ffp2+yh78rvW/Hq+AkBPAMjBFUKPhFyEosSjRZtGZcX9RapGPgV9A+pDYYMqQeUAo//X/sd9h7ycO746kDpQOjV5g/m+OWb5qzpbe6t8Vn0uvh6/V0BOAYjDN0QfRTQF5MZuRkqGp8arhnlF0UVSBHVDB4IfQIq/bv4u/Ol7gTrhed/44DhouFz4YrhleMt5q7nqeml77L4Yf4PAQII0RDuE70W6R3uIQkhSiJNJHshkRzqGYUXixJyDGUHfQIH/LD1NvFw7frpUud95PXhquHV4oPkIOiq7MjvRfOA+Jn9YQI3CBgOoxJEFpIYqBkDGzscNBxFGyQZ5xRyELQM2wdhAcf6pfQ57yzqw+Ra4G3eCN202rjZydoB3FbeGOWe7qb0LPfQ/b0HgQxEDywYrCCPIa8ibSe6J8QjQyIrIYQc7xUmECgMlgfp/0/5QPdX8/Pq7+UH5mnklOG14mflQuZh6FHtSfKI9sv7hQLyCLkMJQ8/E2cXJBl5GsUbmRpIGL4WaxSXEBQMiQZ2AAj7cfW570nrBufR4S3eWd013DHZu9gw35Tnb+mt6Fbv5fl6/9YDlQyEE1oV5BlRIT8jdCFgInEicR5HG9sZixXhDkMKXgfnAe35/PSg82fw3evL6iTrOek66Enqp+zf7mjzBfmY/Pv+3wLRB1kL+Q1IEdgTyRO+EqoS4REnD0YMpQnpBY8B5f2j+lz2B/F+7YLsOunh48Xk7Oqu6xjof+mN7kHx6fNw+an/xAPnBXIJTRA/FR8VpBWlGPEYRRc7F9AVQhLhDxMNkAjoBC4BBP0K+/b4RfVd8wzyOe8N73HxfPJS9NT3avkG+wX/TQIIBW0I8QkeCoELoAzDDNMM2wqnBtADWgIIANj8S/lS9jP02e8m6lTrlPEQ8WLqtOiw7Onv2/Kv+OD9zP1Y/ccDfAviDEEN4hCAEpUROBOdFhkXYxPeD8sPVg4UCZAG2wawAt38rvui+i72J/PM8lbyvPLE9Nv1KfbL93X6wf2XAWcE4QWiByUKmgwqDsQNOwyuC+sK/wcnBSEDJ/97+lD5efnJ9Mrr4eY07PnzE/JZ6annzu1V8VLz+PjY+/v5FP1pBYwKYAz6DX0NaA0gEaYUmxSTEq4Qdw/FDRwLggk5CDcEh/+8/Qn9Gftm+N31pfQI9SX2lffa+B75x/lT/LT/ggINBIoE6QX1B4wIIAklCpUIUAUxBCMDAACQ/vr+w/v281bvDPVZ/Yn5t+0V60vxe/Sj9ib7cvp+9UL4SAJ8COkGhwMxBFYISgyEDooO/Qv/Ce4K5gvwCo0JSQfTA6ECdwOGAfb9E/0k/Mb5l/oX/c77qfgz+ML60v6SAccA8f9XAjgFQAaDBj0G8wSeA8oDVgSoAkwAo/+7+sjw0vK4AGMB8PAu6Vzy9PhD99T3Zvq4+L73S/4ZBhMGCwPFBIoIHguxDbsOewypCigLEAv7CcwIfQamAxoCjgHTAPb+LPxY+h36j/oC/Ef9zPsJ+Sn5oPxXAGkBpQAZARYC4ALkBSEJBgftAt0C8QOcA44D8wDq+gb6T/48+cjr4uzn+079J/Dl67bzHvc69nX6TP10+Rn5HAJ/CvgItATcB6gN0A4mD3wQZg4vDMANkg2vCn0JRQdTAuH/gAFiAQX9vPkx+jf5FPYa+An9Pfs89r32bftSADcD9AF/ALkCyQThBW4IAwlZBOD/ZwEVBDQAtfq3/DX8uO2+5z35+AKH86jmWO83+QL3W/W1+jT9k/r3/H0GNw3oCggGfghiEC4TWg8qDOML7AxvDXQK9wZsBqsDCgC9AVsC2P0o/Ej8iPg/+Gv+Kf9Z+WP2+vjg/iYDCwGv/p0B0gPgAx0GFAcWBPoBMAKgAaX/MgBSA3X+P+5x6bH7GgW786/jK+x8+Yr4bvME97z7jvh1+JgCWgkBBeYBEAaJC1AQ3hHaCxUHtgu2D4cLcgdcB4EEGgC2AJMDUwLx/GP3zfbz+8b/b/2l+Or1Xfjf/20EkQAi/Jb+XQMQBXUEZwP0AaQAdwDe/zz9mvyZAGX+++6Q6DL8zwl++HbkNe1AALkAHPZT9uD+6f9f/osGhw64CVIDjQfMD98S+g4vCSMIdQroCbcGRgXHA6f+6PqK/hkDqwDD+tb3tfli/iUBXf5v+dH3zPsRAmoDlP+q/s4AQwA3AE8DOgMy/6L9r/2B+xL8agHd/lbuNufu+SAIGfsH6mLtw/lg/tD8Svt7/LT+5v8nBHQMcg/dCXYFLwoYEtsQ6wg4BysJCgZPBAgIugWH/Er6Uv9cAB/8RPox+176tPrN/wwCuPwy+kj/FgP4Af0ANgG/AN7//v8NAfAAGP9Z/T/6XPb5+Mv/Svv46jDoFvvoBRf25uU77hr/qQB7+FH4sgHFByIGTAcIDycStw1HC7YOURPQEm0L9gWBCKEJbwQhARUBlPxD9476XgCt/G70V/Xf/Lz+Vvws/a39YPtb/skFKgYWAcQCOwiWBR3/9f+iAy0BI/zq+KL1ivbj/N/4H+iO5ef3AQDB75rl4vJRAJT8wvZi/GQFoAklCg8K0Q0EFUQWBBEqEBMUGBMqDrsLOwkkBJgCIQT1/Tf09/Sc+5H6x/Ou7xPySfn3/BH5ivcK/Kb/bQEtBGMGvwbmBVwFKQfCBgsClQHtAzz8ufIx+QcDZ/ms5RfmUPkJ/yjv5efv8qn4afa0+yICvACeAmwK+A3yDuQSWhUqE/4QVRIfFQAUGg29BmcGlgecA937U/al9Wr36PVh8TPvze8O8nX3yPpJ90z25f0MBMcD+wPdBtIHuAQeAQ8EcApOBYX34fjqB5YEluxP55P8tAY39qvp/fAu+Rz6qP1mAOX7mfx4CPAPMA0WDYMRyRBGDbkQpRcSFSEJgQNmCfMMEweF/oj3NfRz+Fv8nvXw6/DqxfA594H5NfXQ8ff2qP8QBHADpgG+AU4C0AGxBQwKewD19BAAWw4i/53pSPL0Bh4EdPNx8GT5j/zN/OQBsQPP/g0BlwtaD8cM2Q1FD7ELmQsfEyQWFA15AoIC5QjPCf0CBPks8Yjyl/uL/Mrwg+dk66/0mfmL+Hj0cvN0+tsDUwZWA4YC8wIFAd0DMgtjBuX4j/0FDfgDW+298WcGcQTq84DxMPpk+0r77wHOAw3+///MCUcOYQ7dD1MObQk1CxwVpxgoDYAAOQF1Ch4NHwPx9r7yR/bx+sL4me9M65DvzfFS8nL3XfcY8yX56AEdANT/TQZ3Az79sQWLDecBZ/nSB54PRvqM7PP+lQtI/OTwefkB/mb4T/2JBiQCzPtgA5cN2w10CyMNXA2VCoMOXhagEu8FXwOfCqYKXQQVADz6yvTb96r7CvZT7mPsUu5f8oz3Lvfi8O3wrvpWACb+tv4N/zL6YP7uCYcDxPXEAKwPsABS7x36PQm4Anf4Fvx9/636af6/CdIH+v38AsEPJxAZDA4Pnw+DCsMMPBXAEngFOADcBgUKXwOx/JP4yPNN9S77GfbQ6zft3/EO8Njz2vpF91L0MPyWAPf/+QKAALj6DgFTCDAAXfsaCN4L7fhZ8J0CQw0b/1b03fxbAgv+TwLWCIoC7v2BCH4RNA2VCaANEQ4xCnoOeRToDGsBfQIlCWsG6v7X+rz2YfNl9mr4xfGw67btNPE28qj1g/j39TH2Hf2gAKv/5ADg/sn7ngJDB2b9zftuC6QKSPTb8cMH3wsp/aX5PwDzAAECXQqGC5QCqwJ1Dg8TgwzhCqMPTA4nCTgMuBHQCv/+av7cA7ABmfos9gvy3+839Ej1E+6B6xXwcvCK8Hr4Ifz79974O/6gABgCyAHv/fL+fwO//8v7qwWtC0n83O+m/eoNggWu+N79aQRtApwHiQ/CCTwDuQvrFAcT3g53D3APGgzODAoRSwzeAOz98gAP//L55vUx8O/t2PLs8pPrZ+rL8ATxR+7r8o762/xh+536o/0WBCYEav3b/XcE9v/3+LkEQQ8m/dXsif0vDmIEHftOAvgDj/56Bv8Rag0BBDoJeBRhFOMO7g+iEHQLvAvAEM8K8f7h/VoCU/609oHzW/AB7QrvXfHR7B7oper973XxZ/N5+Oj5fPig/FkD9gMIAUH/YgEABawBQPwtBdsOrf8x7578Mw/rBlP6k/93BHgC6ghaEWcMQQWrChgUlBRQD3sOzQ6WClUJxQ4gDDv+UfkF/1T9yPSH8U/v0uqy673vrext53jrovHK8EzxjPkY/uf7ef0bA7oFwgS6AeIAhAWSBYb8LPxiCgMLsvVn7i8DKBBIAn/3uf8xBiMGWAwbENAIYQd+Eu0XBBIJDgMQlQ/ZCw8MpA2IBoH7Y/on/u35CPKA7QDpw+in78zuouVs5vvvt/E+8Cj2Wfz2/Tf/BAGiBPEI6AXV/ykDUAlLAx77cQOHDOP+9+5V+iILtgON+R8AcAN9/zIIRhIXC2oD+wteFkoUYA6NDlAPeAvzCvoNOwgQ/IH5ff5p+9Xy5O4A7HboW+vO74LrwOX96TbyrvS79g/8If3M/DYEOAr0B68FXQS9AxMIpAjX/l/9AgoQCKbz/u9SA2gKufx59i7+DwOUBT8MRAwUBU4IEhQpFh4Pbg3HEBgPYgldCosN1QSL+IL4af1W+R7x9OyG6bPnG+3X767pHOgj75/y/fMK/K8ABf4s/w0FtAjiCKoFcwKpBWoI/ABw+8QEcAsR/SPu0feqCF4EG/lp/DUBvQBsCKsQ4QpnBcYNeRUnEyIQBhGvDiYJxAkLDhAJs/yY9xP6p/lM9MXueOpU6ZLseu126i3rdvDv8bLyZ/mMADoCTAHXAS0FjgktCCYCaQKcBxUD1vpWAiYMsf6O7Jr1XQfSA0b50ftM/+z90gW0EEENegRECe4UHxazEIAQcxAIC38Kjg/8CtH9IPkC/Rz8/vUH8evr4eg07XzxQO3P6LPsM/PE9Sb5sf5O/7b9cAPwCacI+gQCA70DrQcwBif9Qv7ECYEEJvEd8dECFgWr+aL4MP7M/9cElg37DMkGbwrvE0cVzRCcEaASxgwHCZgN5A2xApb5/vk2+8X3pvJf7WLpU+rR7kXvU+tv7AjyvfQ790D+zwEfAFUBjAWeCLgH0QJ6AYcG8gSn+hv7dAciBonzbe1Z/M8GIv9s96z7awByBMYNOBCzBxsIZRRRGM0STxEaEiUOHQuHDaYMYgOM+tb5VPvo9xXyIO316QzsY/DZ7ZHqNvCn9cDzKfYT/wEDPAK/A8YFkwf0CJIFEgJxBM4EH/52/CgFFAZW9sLtxftJB3P+8PXW+xgCGQTYCQUNFAm0CSESVRYwEi4QHBM9EQoKngkGDZYGVvvX+DH7+fcC8knuR+tN6q7tue9V7Qftq/E49kP4e/xXAgsDkAHkBaUKAgnBBJQCjwTlBrAAn/kAAS8ISvvA7mD3pAJt/tT4Nvws/wMBWghJDisLewmwEDUVehGoEA4UWRC6CRIK7gurBsH9A/qx+ZD2t/GK70jubuup6s7tne/c7zbzrvXJ9gb9kwQfBIcC/gWMCDUILwaHAsYC0gX0/wX4kv35BID8DvER9Tb+4v5W/Hj9nv/jAfYIVw8+DX4Kyg/+FZ8U1BEeEjoROg3OCmsK8AUp/sz67Pqt9jnx5u+m7t3rlewA75TuT/D49H32NPd2/YwDIQQRAyIEIQehB0gDlwDDA7YCEPoz+D4BAQNj9hDwzfnoAZ79ofqo/7UC8QQeC34NYAuSDpcUIxTWEDMR5BKhECcMmwniBpkB//wq/Oz5CfPu7QPuB+5R7Wjuh+3t7Bfy9PeR+MH4M/2BA88GDgVYAxMGIQctAzwCSgOX/X/34Px0BCD+VfGv8hcASAPq+r36wgLeBZMHpgx+DvQNxhCWE18TRhL1ESUR/g0SCkYInwUNAGT7Qvlm9mzxPO6G7TntHe3a7Cft2u9l9Cr3MvjV+k4ACwUbBVMD/gSzBiQEmQHlATQAq/r9+PT+TAFw9wPwxPh9ApT+0vnC/y4GxgZSCRkPURFUEOcRLxVhFe0TRxNjEDMLVgmmCPAC8/uX+fT3lPJ07qTtK+xP6+vssu2C7d/wA/ak+Mj5Y/1IAz4GgwRaBNkGRAXHAMgA1ALK/pf3s/juAHgAmPQg8cH83wN4/gH9PgTCB6II0w6DE8MQkRA0Fu0WiRJGEqgSqAzsByMIYQS2+073Pfju9YTvXOsq6zHs7+zd7VzumO6z8Rb4DPx7/Hn/tAPPA2AE0QajBET/wP5uAtwAhvgQ9zcAEwHl9NTywP/9BJX9e/zBBbALxAvbDWIRbhEKE+kXABceEaYQXhLQDOsGYAZvArX5kvbS+N31NO746eXqXe0p7sLtne1y7r3yyvnP/In7fv5KBLIEOgRdBlAEHv9J/18C3f+T+er5/f+y/mH2a/cAAUAC6vyO/zQIswtMC/gNfhJQE8ITBReaFvgRuBC+EDUMRwgABsz/7PgY+Bf5mPMJ7DjqL+zy7Bvs5esu7bXvyvOk+PP6BvwUAKcDWAPPBCEHxQMK/1sAsgKX/qP5kfss/8f7r/YW+ugAUQAg/VYCsAqIC2YKjQ8zFVEVTBR4FeMVBhSmEboOIwoKB6METv6j91P2Gfbf8NnqhOk36wLsueuJ7BHuoPAe9ab5l/s7/UgBRQQvBOgEwgU1A+cAswGqAOb74/qv/wEAZ/h19uj/QQVr/zX9HgZnDhQOCwzKD0QWRxcXFAoULRVEEwYQtgvCBsgECgKG+sv0jfTU88Du/+ll6VjrmewI7MHs4u9d87L2KPpI/FP+RAKaBO0D/gPmA+0BsAEKAvn9pvlf/OkAH/6694n4DQFdBRYBWv8fB50PLg9EDHoQLRd4F84T3RJJExoS+w74Cf4EeQLP/gz4wPSU9dvy3+v26Inr8u2E7Srslu2A8bH1MPkf++L7rf4jA8IEmgPVAtQBCQJxBIYBQfoZ+z8CIwGy+ZD5gf9hA1gDKgITBAEKXA9uDyYOXhHtFjkXxRIsEQASHRDRC5UHZAOl/1T7kPYR9XD0k++o6frpiu3O7d/rtOui7vjyUPdO+vr60fspADcE+wPrAp0C4QDJAcwEu//O+P39QASZ/QD3zfxMA74BlACtBKEH3wkTD64Puw2zEqcXQhSLEHoR5BDIDU8KJAa9AJj8EPpO97P1ovFE65bqAO9U79vrwupp7Wnz6ffG9/r3Nftm/+UDIQUpAqUAqgEwAl4EbARe+9H2RwF4B+v8Kvad/q8F7ANfAqoGjwqWC3cPxBEgEOkSFxb7EUwPbxAnDX0JpAjpA8378PcA+Zn4gPO/6xbp4e2m8aHuaeo37CbyNPdV+Oj3RPrD/aAAUQSDBDkAbf+bAj0DsAH+/rL66fwtBDUBnfiw/FYGrQQMAFMFCA0fDUgLFA/YEjcT/hN1Ek8O0Q5eEJUKKAQvA18A9vjs9db36vTf7eLplev074LweezV61XxCvbm9zL5V/pf/K3/SgKsA+MCKABW/5gB1AJCAET8kvvQ/z8CoP1V+30CzgcLA6wBtwsrEZMMowyREu0THxMNExMPTAynDtcMpQR7AKsAqfx/9uH0YPWW8ijtmeor7q/xHPCT7nLw6/MD+P/6P/tL/Ib/xAAKAd4CSQIPAFABbAG2/a39PwHOAE794fwL/14CKQXkBKoDmQaBDW8Q0w2DDXwRKROaEQ4Q+QwZChYK3wfPATf+Gfyl9/f0MvXU8j/u9+wn7sHvMvFi8a7xsPNK9wj7q/yA/IL+3wE/AjwBJQK1Af//ugCl/yH8UP1hAIH9nPoc/goBuwB1AnIFJwdtCi0OLg5uDmUSfRSDEeYOgw+ZDhcLRgdgA8f/Bf4J+0r1AfLn8ZrwD+7e7bTu1e4y8ELz3/UE9wb5Jfz5/tMA9wG/AoQCPAKLAg4BSP/2/zP+RPpJ/ML/K/xX+aD9DAHhABcC5gUjCa0KIw3iDqAPIxNAFBgPVQ2fECkPlgnUBIUB1f8R/gf68vST8dvvvu938Jjvpe4d757xs/a2+Yr43fnO/pIB6QEDA7oDjALiAZMC9wAp/pL+HP7D+iP7Svzf+Wj73P6t/Tb/OgQwB1gJtgnNCnIPABJ2EUsQ8gzpDKIQtAzxBZ4DwgA3/r79KPpa9Yny7fBY8L3wEPJK8fXv9fIy+MD6dvqC/Ov/IwFvAloEEQQmAlkBNQGoAKr/Vv7o+yP6ZvwW/sn6+/ko/3IAl/4wA/8IFginB7sLCQ7fDmgQYQ6aCzoNag0lCTIFlQPxAa392PqU+tD32/Pf8NTwN/TX9HTxrvBU9V36RvyA+zn83wBPA18CVgRKBb8BaAAuAjIB2/68/mT8M/o//Yz9lfrT/M/+2vxU/z8FWAgrB6oFRwnWDcMOLA50C0wJIAxwDSgJ/gQBA4IBQf40/C/8Xfh38xnz1vNE9GD1d/NW8tv2gfu7/Mf7Yv3xAbgDYANRBHIDyAFWAuABCwDQ/nL9svte+7j8Wv2S+qP5Af+tAUf/pQEICEUJvgcVClAMTw3nDnUMlQksC9oKhQcEBfcBnP/O/R/7/PhD9430EfHq8Vb1q/T/8dTys/dQ++P7sfsZ/l0CdgPxAnQEtgT3AsABxAAIAMz/j/5e+wf7qv3F+1P5OP2VAMz9Lf3wAyEJuwcbBkYIXAzVDj4N9AkJCZsLrgwTCCUE1wK7AGL+Y/yY+kj4YfS+8cPzIvZn9fbzu/N+96r8rv1x/AX/aAPoA7sCGATCBY4DEwEuAvEBqv+3//z+SP0x/l79bfsL/18B/fwD/fcDBAiSBaICygV3C48MngkcBzoHjwmvCXsFhQL/Ab0A8P0Q/ID7Evlk9TnzD/Sh9tX2I/Rw9Ln4wPvW/Ef9a/6MAVsDvwJFBLQFvAMBAqIC0ALgAYQBDv/k/MT/nACa/XP9Qv+n/iz+HgKmBFUCpwGVBG0GFAezB7cFogP5BaoHagRBArsBMgBQ/qr8kvyV+1H3y/T59sP4s/gQ9+P1pfkT/kj/rf0w/scCnwS4A8gEjwVoBNEDngTQA80B6wFJAf3/CABo/7v+/f/o/wX9bPxnAN8DZgEU/8UBwwNSBCoFKQQTAkYCLAQeBBACAgBV/vT9MABI/oT55fmA+v35Avt++lT58vrG/Hv9Uv7B/tkAcAJ6AWMCuQR+BD8D1AIeA5gDdQJSAUsBAAATAFEBXf+t/tH/l/8H/6sAQAQKAIX9egWEA8z9SgOaBCYBiwGVAfgA9gF7ACT+hP3f/VX+Rf19+wL78fuN+5T6oPsp/PH7I/1s/ib/owAzAcIA/AECBNQE8gElAUgF4wPhAMwB1QHMAE4AtABy/3j/pwHY/cT7cwJ2BPL+5PtuAFQFtwL2/u3/NAJaAoABCgFaAAoAXABe/5f/Tf/8/LT8PfxU/OP9kvxw+hf8Nf/+/rX8zv21AHkBPwGBAKACEwUOAlgBDAR2AoEBxAI/AY8A6AEWAa//TQGwAdH9Ov0VA8YE8P0E+o4A7QYlAsH8Jf/CAToD5QE4/XT/xQMbAN/8yf8yAjD/3Pu5/fz/P/+p/YH8/P4JAfr9u/yWAGcD2ABn/h8BrwSdA2kA8//wAjcEugGoAccBXQBcAnUDYgC6/2gBIwKgARkAJAAW/3X9sQAfAnr79/kMAaQBeP2s/QH+HP9QAuz/uftU/goCIAG+/W/7E/+8Aob+4fsv/t7+Sv/L/lP9Rv+5AKT/O/+LAKMCgwKCADYBBgRLBA4CRgFlAigEXwPs/zkAmAPuAzEAtf0WAe4D6P/u+xz+XwEqAHP9cP1uAD0CUv8I/c//yQIwAP78Sf/GAc//W/2T/i8Bf/8a/L39iAAy/0P9lP2i/0ABCQAc/qH/BwLOASAAYf9/AZkDxAJ8AGT/tgEYBJ8BVf5/AHkDzQGN/+D/cAG/AU7/Jf49AF8AJf4B/nD/7f/A/ln9rP6UAaUApP1m/4MCqgHV/1P/UACUAeIA7P5z/sP/+AAiAPP9Cv4yAHIA0/5s/p//qQCeAMYA/AA0AOwARgHhAEkBZQCM/1gASAKHAWr9fv42BN8B1/r4/LoC8wBO/GT80/5MACUAgv21/JX/egCZ/vn+RgCK/zj/7P/e/zz/Wv/b/6n/mf7B/rn/Nv49/TMAngB2/V/+dgElAlcBNv8tAKID6wJrAQ0BMAErBA0Dbf+2AP8AnQCyAR//5P1BAcEBkP1A+z/9PgASANv7XvoK/18Dj/5K+ur/wAKGAZoBNP/O/48DwQAJ/zYA2/1Y/Xb+6P5y/9z96vxz/xoBEgHyADUAgQLCAwYBIgCJAewDjgK4/Fj/7QTwAIn+Gf8O/8cBvQBm/Wr/dgDJ/Nv8igFiADH6zfpRAQcDuP5Z+7b93AHmAZv/Gf5S/nwBWgMD/1b80gBqAkL+D/x+/mYBrADE/bL81P+kA7kCMf9J/8kD+wWHAocAEwLSASIC1wGS/pb90f7V/3IAzP0I+4L/RgI3/oT82P31/j4AQQAj/hz9Wv2q/9IBpP9q/Ln87gEkBDT/7vuY/jsDOwNR/aD7ogGJBLsAL/0R/gcBSQJAAH79cv5oAMQANgEuABD/jAKPA7D+Yf4CAtsBDADhANYAh/9g/3X/EQAN//r9Nf5D/j0At/9V/Ez/7AHn/QD+rgH4AE/+t/6EATECnv/c/sMC9wN7AMP+jABlA5ADeP5K/GABxwKQ/yr+ov54ALAAPP+kAJkCPQDO/lgBRQKCANL9U/za/rMBZP4i+pn8sQGAAr/86flt/xADrv+c+3/8HQGKAq79wftMACoCEv9W/ST/wQF4ATj+1f7SAh8Cef6g/nUB+gJLAKT9fADmAoEA6P1T/+EBrgCO/kr/pwDIAO7/OP7b/nQBzwF8/+X9lQAiA+oAPf7n/iMCIgKH/Xf9/gBTAOH9Jv0w/sD/rv7E/Mr91wDFATH+2P6EA4kCYP/3/z8CQgOFARYBYgOgA+4CHAN0AqUBRwK6A90Byf1bAMgDKf+8+0v9sP+T/4n7Hvt9/o/+Wf7L/SP8oP4cASf/Df4H/4oAiAEGAF7/5AByANn+2/8OADD9cv76AVEAf/4W/3j/1gCxAtcBx/4q/9oDXAXbACP+BgCCAqcBE/8V/+X+f/2W/pgAHQAL/kb9BgDNAkwBzv5q/zUCbQOj/1n8MgB7A84AxPtS++gAkAIj/zD9o/0pAAEB2v4s/ygAxf+7AAUBZwAYAKgAyQJQAzABaAJxBGcCywC5AfUCZAHT/9wATQCL/sD+FP9C/13+Kv3U/54Aw/3L/f/+j/5t/iv+S/61/ir+d/3l/WD/4f7c/Qn/rACkAPn/JADWAPQBWgIUAu0ABwEbA68CKAEzAucCjALgAW8A7wBRAvcAcv4G/tb/6f85/Tr8Tv39/aT9qfyt/Cz+/f+2/wL+wv5IAaACkgHG//EADwNrApkACwDYAOkBFQGw/pT+QgBmAAX/iP7y/3gA4P8lAAsBDQFhAOsACAJIApQBCwDpAJoCcwCq/nwAEgITAcX+Ev4ZAF4BfwDz/xMA2/9G/7X/YwGCAon/C/x6/oYBOgB4/YL8yP56AI7+9vzS/eT/JgHg/fX6Vf/yAi0AIP75/sEBYgMbACP++wBAAmUBAACx/oAArQEhAOT/pv+C//v/Mf+r/+oA7v/0/0EBpgDy/74ATQHrAL//vf98AE3/RP4l/xMAzP+P/gX/BAFFASYAmADWAWgCNQLTADkB5AI+ArwAHwDQ//L/8/9J/yb/WP4M/oz/7/6H/a7+//7U/av9Jf7G/kL+RP61/xn++vzC/28A7v29/JP+eQAk/xH+5/9nAYQBYQH5AQMEaAVSBK0DyAUXBxAF+gLqA4UEZALtAHUAMwDW/7b+xf3P/bn+nf65/Rr9hP3K/g7+xfvU/IT+zftA+oX8lvxr+Xv54fuO/HP91/7I/xQBbwHqAYsFzgckB3MGTQbQCPQLMAs5CNIGcAecB04FIQJpAA7/P/wM+Zb3wfcg9ibzB/PU89vy8vLN9ND0l/Sw9sf4+vnS+i/87f9UA8kDTAUTCU8LkwtqDagPnw8+D1UPEA9qDnkNbgueCBAHMwYcA/j+rfwv+9D4UvZz9Pny/PHR8aTxz/CZ8Y/z4fOd9IH27faV9w/7Zv4M/df6SP6pBFMHtQWtBp8LQw/nDoMONREfE20SqRAfD4IO3w0pC1oHqQXDBEwBCv1L+/T5k/ZG82jyAfN48gzwR+9k8QzyPPFp8qDyx/KW9rX3s/QT9379pv1B+vP7aQLsBmQGWQZdC6IPHA+uD1MSKhTGFEETKBH8EHMRRA84CoAG0wXiA+j+xfr/+EP37vRS8qDwzvH08vTw3u9o8m70UvTS8w70G/VZ9mL3y/aN9fr4wv7e/UH5QvxHBYcHZASaB44N6w6/D1YS6BPyFZEWghMvEWkRDhI7DioHgASxBWgD9Pxm94P1ifUs8/ju9e0p8KPxsvBr7zPyFPa89J7yPvR09kb4J/nm9sT0h/mA/0X8v/ct/W4FcwY0BVQIrQ3AEFQRxhNUFu4XhxnFFzUU7RPmFCMQ8QdVBcMGiAOs+0/3KvfB9InwKu7p7WXukO5i7sfuqfH49CH0HPL884X23/YK9/n1avOG9sL9Bv7D+Fv6igPjB6oF6AahDOoPGhHmEyIXyhmfGloYLxW7FL4V4hF/CY8EhgVnBEj9gvbc9NT02PEm7l7t2e6i8AHwF+508ar2EvVq8dzyTvU59t31K/Ix73v0dv19/Vb1sfaxAxQKwQYYB2kOXxJPE0kWRhjxGcgcdRumFe4TbBbIEzIKpQOiBM4ERv4791j1YvXo8v3uz+2z7xDx3+8c7mrvkPNn9MjwQe+18YjzT/MG8ZbttvD/+PT5oPRs93MCvAe7BVkH3w02EvETtRaIGtsdzR7xHFYaAhk9GdcViQ3MB4kH9QVF/w/5NfdE9mTy3u5d7wXwiO6T7dft0u/A8qLxau7o7vHwJPKY8pbvnuwV8Rf5u/pL9Vz03/5JCV8JmwjWDQMSchQQGUocgB1TH6YeHRsAGqsa9BbsDgAJQwd3BcT/cPgY9Yf0P/FU7PDrdO4P7gXseew27xryIvMy8ELtou9+8nLwDu+X71nu6fBn+K/5/fQQ+C4CKQc9B+gJQg/rEoIVtxlYHbUeWB/THhocnBlIGIYVkQ9ACSgHdwZWAFP4kPY19nnw9+sF7kPwve447Tbuh/CX8vTyi/F38E3xZ/Ir8Z7vq+/K7oXwYfix+zf1FfQcALoK2AofCcsLlxADFocbQR7lHcQdNB5PHXMbRhkoFHoMvAcwB7UFCv939tbzKvWJ8f3rhOwK7kHsXuzr7rnwtfIX9CnyVfC88hr1WPJE7wbxK/KA8QD3cfwf97Xz8P0BCbgK5AkBDM0PBhWuGlEesB6IHc8dMB4IHNcXHhNEDbUH9AU3BX//+vYu89HzsfGe7A/ske8B7w3sce378KryRfPH8vHwH/GD8+fzcvEP8NXwUfKv9qb7l/l59KL58wWBC6sKNgvxDcQSpRmSHdAcqxvuG+gcMRwwGCUTHw7YCJUFEAWHAi/8tPa/9HnzYfD97nLxS/IM77bt0+8N8ZPyPvU39QnzPvJN8rfyc/Nv8pXxk/Us/EP9Uvhv+F0ByAicCWwJ3AoODXkRHBf6GTYa8RlsGMsV+xOSEk8PEAoUBX8C7QENAID8lPkR9x30jPLD9B345vZg8pfwC/IZ9Fb2bfYf9G7zc/OT8bXxu/NE89Hzsvek+UH4KvgU/MkB5QSnBdUHYwotDHAPOBOdFakXYBfIEyESoxJrEMgLBQgiBtMF3ASeAcr/a/8q/Fn4yPg0/Ev9yPnb9FTzOvS59P/12Pa49A3yl/DO79Pxi/MJ8kfz/PbS9rX1Eviy+xz/wQFcApEDTQaWB28IPQssDyIS6REVD2oOIRAXEHYOggw5CtUJVws6C/QJuAdqA7QAfgF/AnUBNP74+OX0//ND9N30S/Tm79nqAOrR6xfuwu+i8B/zh/WY9AT1kPn7/HT+XABLARUCGwPjAmEElgjxC28Ngw1DDOkMhw/FD0kPPBApEDMQKxKqExoTKA96CdwH2ginBukBRf3Q+EH1E/MX8j7ynvCu69nmyeX56LPsBe0u7aTwcfNf9Or2KfqK+7r73Ptg/BD+iP+//74ANQPwBJAFzAYbCfMLSQ6ZDnsPTxNpFjcXuBfIF1AX9BUyEx0SFBJ9DYcFhv+t/Aj7ufeW8n/v7u1o6kfnNucb6HDpIezC7k3wm/Ev86b1HviA+JD3Yffi94D4YPnC+gD8CP1f/9oC9QQNBgcJ3Q0xEp4VKRgdGXQZ4RqxHAIdcxoeFkcTOBEVDFYFjADx/PH4D/WT8QLvLu1T67Xqr+xK7jHuMe/n8O3xpvNQ9fH1zvYj9+/1IfUP9SD1dfVP9YH0gfUk+ej8bP8pAsgGBA3dElgXJBuBHscg8iFOIsEhqR8HHDkYhhR7D5UIMQFl+0D4i/XR8Cvsoekw6LPnpOm87MPu2u/F8OPxy/OK9lP5Zvrh+LD2gvZ495T3/va+9iv2DPX09AL3H/ph/WUB2AWHCbMNLxONF0Mash3CIBQhxR9kHq4cwhkXFbkPjwriBCz/0/qp9vnxP+5r68ToUOgn6ifrk+o56hnrnu3m8GvzQ/V/9iD3efiN+h38rPw1/N36YvpK+yf8Lf1O/8UBWAR3B+MKNA4dEcYT+habGUkaWBoSG1wbERoUF0ATjg+ECwEH8wKO/s355PUM8lTuCe0u7Zjrkugn57ro6OsY7grvf/Dw8jP1x/bY9235uvvF/Lj7ZvsT/Wz+zv6y/6kBSQRpBrsHtAlgDH4O2BCWEysVuBXEFs8XpRdSFmgUNBIkD0QL6QcaBYIBi/zU9xL1kfMZ8d/tC+sI6YTom+mN6szqCuyy7lrxWvO89Bf2nffY+Pn5bvuw/Aj+i/+WADEC1gRsBgIHkwj7Cv8MXA92EUYSNxMUFXQWjxaXFbcTOhJbEJkMxQg+BscDrgBf/c/51PaD9KLxle6u7G/rYOrx6VPqjOtt7UjvBPFw8oLzD/VD99j48/kL/AL/QwGWAiYEPAYQCGwJpApzDLsOghAfEWYRShKfE/wTFxMaEksRiQ+CDH0JgwfWBeQDrQFY/0D9CPuZ91D0QPPS8ozwVe2B62XrOex+7Z3uTe9u76bv2/Ce8v/z7PUk+SD8wP1I/+IBuQTABpgIzArzDGEOiw/VEC0SWhPVE4oTgROOE/gRwA6gC40JhwhUB1QFbgO5AeD+ffte+Yv4a/fw9Mzxce/t7TTt4O067y7v4e1V7U/u1O/c8Pjx3PT4+Hn74vse/ZEAAwQHBtMHCwrLCwwNEQ9bEcwSZRNTE1YT4RNtE+IQPQ0YCkwIgQddBsQESAPzAEL9iPoj+hr6Bvip9B7ytPB97xvvD/C+8LLvAe6V7czuCPAn8NvwG/Rl+NT6pvuv/Q8BlAOABfIH/glTCxENaA+EEQMT5hN0FPwUGBWjE5UQPg33CtoJiAhVBugElATjAg7/vPtk+lT56/Yi9Cfy2/DA73zvXPAx8d7wAfC+707wvvCs8H7xcPQ0+Jn6iPs1/S4AZQIrA1kEagY3COkJcwwpDxkRRhL3EmATrRMLE+gQCQ6kCwIKwAixBxEHNQbHA7n/EPz0+Vb49/Ws8wLyYfDl7uHuJvAg8fjwXPCJ8NPx8fI/83b0R/fO+TL7jPxq/t7/xADHAfMCBASWBdwHVwquDPMOwxACEt0SThO+EgER3A5pDbsMwAtgCv0JuQneBuUBGP4U/AH6OffD9Hzy3e/q7QHuXO8P8GTv0O7/7xzyA/NX8331+/hW+4X85P1A/8f/MgD4AJwBCwIyAz4FiAesCaELZg3MDt0PWRAUEEcPVA6mDYkNVQ2lDGQM+gtUCcEEMAEw/+v82Pkg9470l/E/777uOO8w71Tu8O1I73jxhfIg84r1Cvkp+xr8Yv2n/gL/VP8XAJoAvQCHAVMDcgVCB+QIZgqoC84M0Q1RDvMNhA2UDQcOOg4xDoQOMA6LC1QHSASLAkIANP10+pf3MPRX8QPwau/e7jnu2u2S7jPwIPFv8VzzLfdD+nb7XPzm/db+A/9z/z4AfgC9ANkBfAPPBBQGnQf3CEgK1wtJDbsNrw0CDtwOsg9QECcRlxEgEJ8MOAnJBlAEEAF3/ef5d/Zq807xB/BD75bu6u3v7SnvpPCJ8djykPWi+JT6pfvo/Bv+l/6u/rr+mP5+/o/+6P7V/40BdwPmBFQGWQglCgsLjguGDP0NpA86EUESVBJjEWEPuAzJCccGewP6/6f8u/kp98j0qfLv8HjvSe7J7XTuz+8T8bnyivXP+C37bvyB/bj+if/X//T/vP8D/x7+xf0S/sD+c/81AFoBJAMyBRkH0QhvChUM7w0dEC4SWxNsE0wS7Q/nDPkJ/wbCA2sATv0p+i332fQn86LxMfBW73XvS/Cl8YXz7/V3+Ij6J/yf/fr+AgCxAAEB3wBNAIj//f6a/kX+GP4X/j3+sv57/6wAJQLQA7sF0AexCWsLEQ0+Dp4OZg6oDVkMoAqhCEUGxwOWAZX/bP37+sP4CveK9VL0rvPC82P0bvUc90v5HPuD/Pn9Rf8IAJgAOAF0AU0BQAEMAZ8ANQDj/3r/Av+G/i/+Ff4f/hr+Wf5H/4gAjgFaAg4DggOPA1kDTANxA0AD2wKFAiwCpwEFAWYAuf8K/3j+/v3I/ev9UP7Y/l7/2/9vABkBtQEjAn4CywKXAhoC1wGNARgBvgBwAPb/m/8t/8H+fP5a/kn+Yv6//jP/Zf9N/+n+W/7E/Sj9i/zx+1379vrT+hn7tvuJ/KD9Df/UAO0C6wR+Bt0HOAlcCjsLmQtUC04KjQhuBhgEcQHB/k38b/ow+aX4PfjG98/3k/h/+YH6+/vK/X7/qwB9AdYBbQEZAM39Hftc+J/1vPKb7w/s+ui05y7p/ew18k34if+tB1oPlxUMG7Eg2CWWKd8rZyxAKnkkchvUEKIG6v3f9ivxP+3C6nToteWt44zjAebz6hrxdPfS/SMDHga5BrgFlQO4AJ/9c/qM9mry8+285lzditku4RTuWfe+/QQHMRNeHC8gQSOSKW4wJjMhMYYs6CXLG2oNf/188CTpJebO4+rg5t8o4bXiEOUQ6TDu/fU1AM4H1ArIDIAOBA5pCyAHZQHJ+071Iuy645XfN9t402HSUuIM+lgHHgrtEDcfTyncKussmjZBQTFBcDawKTwdGg7Y+zTqv9+J3hzgJd4Y2l/Zo90Q4k/k2el19b8BlAsMFKIZwBnTFb4QZQzlCN4Dz/t28lLpst9q1y7U/9Kvz9XR8uPQ/WoMyw6RFBQiECzTLQwvQjbqPfA6PyzHGzAPiQIh8u3idtz13jDizN6P2Sfc1OWS7W/xUvhUBS0S4hdaGNcZgRvOFwwPiAcuBHgAiPcf7ADjmdu+1X7UPNVI1PHZpO5yB6kTgBV0G78nsS9hL74uIjQWOCsvoxs1Cuj/fPWQ5p/aXdlF3nngQ96I3sDmTPI5+dv8nwRpENsXuBeTFagV6hTPDhUFef6Q/In4p+6r5ADfQ9tt19/W/9nC26rfdfAJCeQX5RqpHqcmmCt5Kswo5isfLwwoTxZoBSD8T/Rg6GDeVt6D5cTquOme6Krv9PvpA7QG7guRFfobEhoEFXsT+BLRDJABFvnh9pT0Qeze4trek92d2jbZz9z44EDk6++XBbUWeBvPHDMgJiK5IEoffiENJUUh3BIsAsf3gfGU6YriUePk6/zzv/Tj8ab0av5SB4IKOwxvEtIYwhYWEGMOXA8tCiz/aPaf86XyA+0K5HXfZeB74I7f6+PW68DumPD0/DwPJhnNGdYaIR7RHk0cWhrPGwMdHBZIBzv7ofbV8q3scene7Sb2i/nY9mn2wf2/BwIMYgseDyQWDRahDwoNsw4+DPoCD/jq8MztHerp4zLfpd/J4fvfLN7946zqGeu973j/IQ5fE6YW6xv9HY8bchn5Gg4ezxskEQUFFv7j+Sr0fe+b8DT26vhD9qT0APoSBDELqwtyDKUSrhYbE2wP1hDJEZYMcgKH+C7y1ezn5f/fDt864bLg0t2M4P/nhOqr6cDxMwRWFNoavh2ZIIQfqhvDGugcFh/QHDoTmwe9/xf5qfGp7XLvSvPt9C/0BPUB+u8AoAVZB9AJgA65EGUO1wzZDrYOnQfK/If0Q+8t6d/hut1L38bi7eIQ4o/kRufl523tMPzLDKEX1B12IfMgIxznF0IZqB4+ILcZ5w6pBWL+9fU37jDtG/Kb9dT03PLB8x364wEfBfkG4gvDD4QOBgy1DZ0QqwxiAmD6KPbB8GDpp+Oc4h3lFuas5efo5+zb7K7uePg4BR8PnBUHGdEZfRh8FvkW5BpFHXQZGxCQBqcAxvpy8gnu9fDz9Iz0MvGV8Av3FQAZBL4Ezwd7C9sKHwgTCRANOAxDBLv8KPq1+Kzz9uw76sfs+e7M7TnuoPPU+B35s/d7+9oEWgx4DusQuRTyEzMPQQ1AD+kQOg5ECJYEJgN7/uv2y/IF9Xb4Cvjc9vn7HQWGCCgE8ACNAu4Czv93/1EEqwefAx38mvcF9l7zNO9S7XzwivVE91j3ofpX//3/Lvz0+fn9RAMiBasHfwtNC9oG5gL/AfIDKgUPBJEFaApTCysG/wCEABcC/gBm/4cEHw53EO8IIQGy/Z/6rPYM9T/4CP1a/HP2xPHC8Rv0svQ+9Kz4+/9XAj8CZAayCsAJrgXzAY4A1ABiAEkBowRyBC//NPs7+fj26ffg/ckDrAZGCUYLMQmtBOkCbgQGBqwIGg1UDm0IpP9Q+Ur1GPJB8WjzxPWP9Cnxgu+G7+/vLfJw9pb7fAGcBpIK/w3aD/8Ptw5iDPcKyAnwBloETAJ5/iT65vdg9i71z/XM+CX9hwHMBOcGzgdvB6AGOwe1CUQMDwzZCCIEKP7V9/PzifJD8WnxOPNe8mjv2u7P8EzyjPO593T/ZwVUCEsMjQ9aD44PdxDOD3IPUg4YCcIDIQJCAIf7iPel9kf3VPcm+PD6b/z/+7/9DAIRBS0GeghxCiMI5gJF/7/9rfvz9+H1fPbH9NbxCvP087XxnfEk9Fv3gftB/Q//CQckDhgO4Q7EEg8UTxOmElMQFw3+CrwIzwO8/Tn7dvu2+OH1nPjf+6X66Pgd+W/7kQAiBfcEggMdA4wA6/yx+gf4fPf3+Av2WPPE9az0DPBl8Wr2H/nR++r+HwBDAkQHBwzTDloPuA/6EAUQBg6yDrcN2Qd/AokAPv+t/Wb8kPyA/br85PrY+r374PwjALgDZARmA2sCdQAo/R/7N/uj+dT0R/Ln8d3u9Ozw7zLyWfL+9JD5yvzs/sYCPwkZD+ESehVZFagS/BFaE7kSLhC3DckJJAI5+mv4evoV+gL5EPw3/ZH4LffD+/D9E/6MAgsIqQkHCaIG8gGz/bX77Phv9DTxl+8u7FfnAuV35kTqne568mP3tf2HAkwGIgzrEjMZpR2CHaca5xnBGIwUDBHQDqsJTgEU+dnzP/Ke8jLzDPUJ99D36fn3/KP+4gGyB64LJg6cEEUP6An4BDcBSvyE9gnx2+tR5ujhFeDI36PhX+e67przO/b9+14GmQ4NE4Yb3iXEJ4ci5h41HbwZnxT+Dk8IGwGJ+XbxZOvN6mbt9O5F79bxT/fx+tP7t/+OB+QN7RDiEqETGBHQCs4D+/4G+3j0x+zA5j/iMN933eTcneBX6BPtr+6H9SwBUgqfEfIaICS4KJEnTCT5ItwhMB0jFUQN7AZI/pPySepB6RzqA+pD6/zujfNu9o73+fq8A04N2BGjE/4VbBVlD0wIOQMm/1D6aPNc6+nlnuJm3gHcjN/b5UfpZerz79H6qwTSDOsWLSCBJEMl2CR6JFUk2CGpG2cUdQ2kA1n3Xu6r65brkOu56xbtp/Cf83vzqvSj/DcGsQoWDbIQMxKUDs8I4QMaAXj/9PrJ8onsH+m45A7hgeGj5RXqLuvT7GX0wf3PBNUM4RVIHBEgJyIKI38kPCRXH+UY6BK3ChoAY/bg8PPvNPAh7irtpe9+8d3xp/MX+Fn+BARdB60JwAvPChcGuQEgAboBE//f+NryNu9g7CDpEOjN6sztYO2T7Szzhvp+//kE2wzFE7EXTBrnHKMfTyG0H0MbpRY1EeMInP/n+QP4fPah8y/ynvPS9KHzjfIS9Xn6uv6cAKQCaAWMBSoCj/4h/goACgBk/L/4w/bO85DvW+3R7gPxsu/L7b/wFPaH+V/95wPgCp4PrhIZFucZrxwaHTwbkRj5FMEO5AY1ASz+mfuq+NL2xPbI9gr1BvNk9IL49/qe+6H9TQAkAKb9MPuq+nj8lf2w+8T5G/kx96v0+vNQ9TD3UvZH82nzsvZ9+I36UgBIBy4MUw9KEv0VSxlhGokZbxj5FoUTVQ0dB58DSgFI/Rf6Pfrt+u/4R/UX9Nz2Kvno+PX5hvwL/LD4v/Wy9aD4evqC+Mb2tPfT91L2zvX79sb49fgR9w32lvdR+Qn78v6JBCAJCwzDDmsS5RWpF38XKxbmFHATbQ8lClUHaQVKAaH9y/zm/NL7ZPmK90j4h/kU+SL5dPqi+Y32DPSI85X0VvXw8z7y5vKC9Fj1evaZ+Jr6yvrB+RL6Svwy/gsAXAMVB48JrAslDvMQMRSXFmIWnRRVE+oRqA7/CqIIfwZUA0EACP5+/Ej7Avo0+fP5vPoy+rH5VPkt93f08vI98mPyrPJT8Q3w3fAd8v3yFfXr96X5xfkI+kP8nP/cARkENgeoCVgLgQ3nD8sSABYKF34VvxNEEvEP7QxLCksIvAYuBFEAhf3h/Pj7Fvpq+bL68/vi+8X6M/nt9mL0YvJV8RrxAfHY7yzuvO2p7qrv9/BQ87f13/YT+H/6Fv6mAvgGrAkjDAYPPxE/FJ0Y4RqpGYsXoRX0EqQPMgxhCXkHcQWzAXL9/frx+bD4lfcU+Cv6g/uP+sb49fcA9zP1QPMg8jLyIvJv8HLuRu2j7JDtze8J8eLxMfRt9zX7TQBWBRAJeAzTD50SvBWIGTwdqB8hHxgcEhkvFvwRJg1wCWAG4wKp/qf6Avia9g712/PN9Fb3yPhm+LT3n/cq9wz2MvXr9D70gPIv8FDuPO207CXtge5971TwYfL79Sn6iv4hA9AHxgvFDikSxxZbG8MelyCJIEUfCB20GCwTxA5KCxgHggJA/gL7yvjV9UnyT/Fi8631yPai90D4uPcA9gX1l/UC9rH0NfLt743uqu1D7Vnuf/A28dTwHPKn9cD5Dv4MA40IXQ3IENoTzhcfHCIfdSCuIDog0B1xGHESIA5mCmkFfQA0/ej68/f286/wPfAQ8qnzQvRc9aX28vXQ843zzvSq9Ony6vAl7w7ud+2S7ZrvTvIL8/zypPT098z7IQDtBNIJBg4fESAUdRgGHYQfrCC5IWAhDh5FGHUSSg6kCvQFCAImACH+O/qp9a3yafKn80P0jPTC9UT2nfSk8onyvfLl8OPtMey+6ynrler960HwMvQL9cT1VPl8/er/uAJcB8ELIQ5FEB0UoxhMHNseRyCmIGYfVRtRFVsQvAzkCNUECgKnANv+Svu19zH2mfVr9KPzZPTd9bb10vOy8tHyEvG77MjoFuc+5mnlguYq6+vwB/QM9VL3ivuy/9sCiQbtChEOig/GERQWxhp8HVIeAB9pH0cdaRgdEz8P1Av7B/AEpwM0Arz+efoK+Ln3VPdK9kH2YPeV9wX2i/RJ9L3zk/Cb67vnreX941Tj4eUW64PvtPHB84n39/vE/4wDGwhCDAUPYBHlFDsZiRzXHS4ebR5BHUIZURT7EPIOCwxeCAAGjgRVAU78x/hs+Bz5W/jm9tf2efeu9u30uvR79TH0RPA57G/p/OZw5B/kh+f46wDuJO+G8jn31/o2/gIDbwiaDFkPDBIBFocaGx1hHa0dJx4xHFoXMBOSEaoQ+Q1qChUIBgZvAXP7Lvj49+D3nfbN9Sn2LPaz9MPyN/Jh8tXwSe1Y6ufoa+f/5Q3nWuqf7OrsRO4e8oz2bPrt/gAEbQjbC+0OlxI0F7QalhsNHLcd2R22GpwWSRQREw8R+A20C1AKKweQAcb8cvv++vf4xPZi9nP2xfSU8u3xsvKP8jPw9OwN6xXqfegr5zrorupN7FXtre+I87X3nvu9/xEE6gfSCgIOnxLeFp4YOBnEGhYc4RrIF/AUnxOjEqsQOA7QDJILvAc3Aqn+qv1K/LL55/ch99T1QvMy8RrxwvGs8A7u8+ul6gfp5udh6Jbpqeoj7H7uXfFt9ND3k/uY/xcDwgWpCAIN6RH7FAQWeRfqGeMa6xhLFhEV0xTQE8sRDhAqDx8NpQjYA/8Agf/8/Nz54ffl9lj1dvJh8LXwkvFF8NbtZuyL63DqSept63Lsyey97ZnvvvEW9M722PlX/bAAMwMaBt0KmQ+bEUsSfRSCF2AYqRZOFfsVCRctFkAULRNwEsgPJAvcBvwDdwE7/rj6UfjG9qD0gvG171PwG/Hh7zHu0e3s7YftlO1T7szuQe+L8LPxMfI+8031TPdK+av7Wv7pAU0G3gk/DAsPnxJBFc8VZhU+FlsYihkqGZMZiBoKGTAUvw63CgEHZwJc/W757PYR9Gjwru1f7SbuRu6z7ZHtJO7s7pjvffB88ebxl/HQ8fTy9/Mr9Kz01/X/9mX40vrZ/eQAuANkBoAJIw0MENkR6BMwFz0anBu9HDYf3iB0Hp0YExNVDxgLwAS+/hb7APho873uLOwa7J3sP+yf66Ls3O6r8CPyUvQ+9pz2FfYA9j/2Xvbk9dX0FPQb9FX0AfUI98H58vt9/nYCywY7CkANoBCdFAsZJx1XIE8jZyU9JE4fwRlaFV0QBQrFAyf+yPiP8+Pucus56vvpY+mh6fDrse7i8KHzPfcB+hD7+vrT+ur6kPr3+OD2D/WP8wry2PBk8CLx8vKj9WL55f0hAg8GwAqEEIYWXRwmIjUntCnQKGMlLSElHUsYnhF6Ch0E0P3o9qvwEuxS6ZLnGuaK5VLnrOop7hXyp/YV+un7r/2y/4MApv+E/Qv7IPmY9x714PFK79/tMO3Y7dvvhfKt9br5Yf7YAxQLKxNXGpEgfSXPJ40nUCaQJHIhoByoFj8QfQk1AlX7p/Um8SXtkOla50TneuhR6iPt0PAj9Mz2ZPnq+7n9av4X/oP9Fv1d/Gf6t/cK9cfyEvEW8HXvae+j8E3zIfdW/LcCSgl0DysVyhnjHAwfwCCQIQAhzR54G+wXlBOcDY8H1gKg/tT5OfXV8fLvM+8M7/Xufu9A8W7zm/Vj+BH7d/x//Ob7Ovvm+jf6kviQ9uf0I/NQ8ULwF/A/8A3x6vLT9fP5Lv9OBBIJ+g1zEm0VWRcXGZIaOBvHGooZcBdfFDURPQ7LCqUGYgIl/kH6NPcQ9V3z6vEX8VLxg/Ih9KP1Bvc/+GD5Tvq9+lL6nPkN+Wr4RfcY9kD1a/SR80vzvPP59Mf2s/i0+rD9lAEqBcMH/glbDHkO2w/LEN0R6xJzE7oTzxMdEyQRcw6nC7cIRwXSAdD++/ta+Sv3i/XA9Lj03vT/9An2yvdF+VH6IPv++kf6APr8+Yj5BPnC+Lz4NvkA+lL6bPou+8X8Y/7A/8YAowF9AjEDcwPnA/UENAZXB7cIBwr7CqML/Au2CyELeQpxCeIH/wUkBC0CvgCU/yT+8fyD/L/8Ev02/eL8hvwq/DX7dPnS96j2vfUw9Vf1/vXy9hv4dfn2+p/8Bf4j/y4ALAERAhcD/wNXBGsElwRvBFUE5wSoBZYFFwXMBJQEDwRZA70CVAImAt0BUQEQAYcBbwJRA2gEeAX2BZ0F3wTDA14CowCQ/lP8DvrF99f15PTP9Nv07PRg9UH2Uvea+K35dvqF+wL9Xv6J/7oA/QEJA+gDdgSRBGkEOATPA2MDRANKAycD4AJ8AugBVAE9AacBYgJkA98EcwarB0gIdQgvCH8HYwbZBBgDYQHI/z7+wvx6+4H6v/kh+dj4CPms+Uv6k/q8+jr7/fum/BP9qP2b/qn/VQCdALgAsQA0AGr/CP9Q/6r/tf96/1j/jv8CAGcABwFCAtID8wSEBfQFsQZTBzQHTAYhBR8ECQPOAcYALQAAAMH/CP8d/q39Zv37/J38evxS/B38D/z++9n7y/us+3T7dPvA+837tPvI+wH8OvyL/N78Ff1s/Rr+zP5g/wEA0gDbAdYCeAMWBCIFcgYlB+YGRAbiBYcFkATrAp0BVwGkAZUBNQFIAfsBaAL4AUkBPQFnAeMAsP+d/hf+tf3+/DP88vsk/Oz7+/oJ+ub5JPog+uL57Plf+vj6Vvvb+zP9Mf/jANcBpQK8A6YEEwVNBbsFWQajBggG1wTRA0YD1gJrAksCoAL7AhcD/wLoAuICyAKAAu0BKgGDAO//Wv/R/pb+iv5W/tv9Xv0d/ST9Lf3z/In8J/zF+yz7ffpN+sb6VvuK+7b7MvwO/fv9vv5J/8j/MwBBAOH/mv+w//n/BwALAHwAdgF0AikD5gPvBMsFCAa0BT0FFAWjBNkD6AI5AqUB5QDl/wL/j/5B/u39l/2S/dH9Gf42/lX+jv7F/sr+s/7C/iD/q/8RABUA8P/X/5//Gv9x/ur9YP2U/L37IvsN+3b7SPxy/c3+JwB+Ae8CYQS2BdQGeQeBB+oG8wXmBNoDyQLAAckA1P/E/sr9Lf0O/UP9mP3z/Ub+Yv5j/pD+J/8NAN4AhgEAAkYCVQJZAk0CCAJ1AYIALf9+/cv7dfqC+b74G/i39973o/js+af7xf0QACAChQNOBNwEiwUjBjcGxAULBUIEWgMyAicBegAaAKf/9f4s/sX9z/0C/kH+sP57/0QA1ABPAQMC9wLRA2MEogShBFMErAO6ArgB1gAFABr/Cv4H/Vj8+/vY++v7VvwO/dD9Q/6B/rr+/v47/1f/cP+L/37/KP+U/gH+n/1v/UL9A/3A/Jf8iPyf/Ov8cf0g/sn+b/8lAOcAxAGJAh8DhQO1A7YDqAOYA3sDPQPcAmoC9QGFAUEBOAFRAXcBnwHnAXECJgMFBO0EvgVHBlEG1QUEBRwEMgMeAtAAX//y/Yz8LvvW+bD42vdS9wr3+vZH9wT4I/l++uP7TP22/hYARgEeAqAC5gL0ArsCNAJ5AbAA2f/n/u39Bv1a/PT74vsn/Lz8hf1+/p7/6QBdAsID/AT0BbIGNAdQBxkHxAZpBtYF4wSTA0ICCAHc/7r+sP3r/Gn8Dfzh+/77k/x3/Vr+Cf+C/+L/PAByAG8AXQBAABoAwP8z/7X+Yf4u/v790f3K/Rj+rv57/1kAOwEiAvQCsgNpBA0FhAWVBTwFjASbA4YCXwEnAOz+rv1s/Cj7/Pkt+dD4vvjZ+CH5zfnJ+vL7L/1q/or/eAAlAZEB0QHiAeMBsgEsAXQAvf87/9f+d/43/lH+u/5M/97/hgBnAYgCzgP6BMcFRgaiBvIGEQfXBlMGtQUBBT0EPgMPAtQAw//q/kH+uP1P/RH9IP1n/b39EP56/vf+TP9t/5r/6v8XAOz/c//c/iT+Mf0v/ED7ZfqI+bL4+fev97v3GPj0+Eb6tPvq/B3+f//w/7b+AgB2BgsLJAmMBvoHuQlvB58CnAERBjgGO/3g92f+/gQABNQCwAQEBbgCKwHEAY0ExgcJCdQGpQPrAlwCm/94/Ib72vsO+zX50Pcg9mzyEPH89Lb5pPym/4YE4Ac0BGr/8AQPDhcNmAeACUYPcg5dBwEEuQbEBbr8vfVm96b5bfiD9qX21Phu+Nf1KPZV+ff79/xB/cb+PAHTAf//6v9jAoAE4gPoAJX/IgCC/6z90fyt/Wr+Yfys+On3v/lZ+b32OPc5+jr7j/ok+z39BP8JABYBrAL2BG8HPAkYCt8KFwwvDacNjQ0cDRUNNA7zDkkOmg2IDUsMyAnEB4cGjQRqAfn9S/vt+WT4yPXt8+TzpPMT8hnwKe/p79/wR/DR75vxn/Nh84vyv/Mj9mf3kfd6+cz9KwEsAuQDFwerCMcHZAjfC7oNVA2oDrMRoBKGDwwLygkQC9kJrwVDBEQF7QIW/rH7Sfv6+Xf4dffW9gj39vY39fvzt/Sm9ZH2VviH+S36BfzX/TD+tP5VAPkAQf+o/i8EEgyeDaMKDAzhEDkQRQpMB9IIggltB2YFmQWXBokFzwGW/hL+XP7O/Ov65fu5/oL/b/24+hr5t/gu+Hf2qPWS99D4s/ZH9NHz9PIq8SPxxPLj81n0DvXM9VT2e/aq9or49vsb/cr6VvsdAvYIjAqZCZALphB9EksO3QurD5USyBAxEB0SahJpEtwTThIGDgcNzw0sC34GvANiA6QC7P5Z+mr4cPdW9BjxCvB27zvubu3Y7TDvTvA48DLwc/EV81v0ZfUr9wz7cf94AMX+AwA7BRUIUwXEAoQF2gmnCRoHSwc0CSYKyQofCzYKBwm+CHIILAcDBqQF8QRCAz8BY//z/c/84PlH9THzTfTL82jxdfGB8yL0LfSm9Rv3hves+Aj7dfyK/A794P4AASkC/gIKBc0G/ATmAeUCYQZNB0cHFgn4ChcMEw15DRIOpg9zENcP4w+gEAAQjw5aDtwNzAn5A1kADP5G+sT1q/KW8MvukO087SXtZOz+6uXpnOn96TfqPerW687va/PG9Bn2y/ja+mT7rfxf/6gABgAVArgHEAsEC4sNChPLFNsSmBPIFpsXHBZrFb8VABX0EtUQRg9pDYEKdQdbBdoCcf53+tX42faC8vTuuu5P8CHxZPCV7/nwYfPg81rzu/Q++Mr6Q/p2+Kv4Q/p4+ob6Lv2XANgAS/+aAOwDAgQIAmoEvgnQCm4JMAxdEKkQCBB9ESES4RBwD24NuQo+CYoIeAaEA8UA/P03+mT1CPEr7g3sO+pS6Zfp9eqE7E3tNO4J8Ajy8/Pi9V73fvi5+Un7Sf5YAtsE0QY8CtILLArhCqcOPA9kDLQM6RDiEmkRXxHKE80UGhNaETMQLg6tCzkKoggiBfQBQgEDADH8Ufik9ezyue/h60fo7ed86n7sEe7377XwEPJi9av2cvXr9rr6/fuK+xL9wv9bAPD/wgGhBOcDhQDrAF4ErAO7AK0DqQkkChIInAqoDukOMw6OD14QrQ+QD4YPfQ3KCgEKogl9ByEEMgEq//P8Qvlc9Zvz9vJB8b7v6u+Q8HbxgfNJ9WX2pfhP+jT50/jA+oH7cvtI/KP8r/7DAl4Cnf4fAPED/wI8AVsDxAYUCUoKWwtCDeYOIw+iD7gQVRDnDjYOPQ3eCmcIPwZ8Ay0A9fzx+QH34vPx74HsKOuX6sbpFeo86yTsC+4a8Urz8vSJ99X5rfpT/Ej/mACGAJoCrgX5BcUE+gWTCLIITQehCCMMhQ0oDaQOsBBSEHsPnhC5EeIQ8w8qEHAPvwzUCUAHQwQuAfb9xPoN+H31+/JB8frvHu7+7MTtbO7i7avuxvE59HL1Gvjj+s36Afr8+qn7mPow+nj86/6M/V/7Pf5XAnIBzwAIBcIH6AbQCO8M+Q08DnAR2xTeFfwVLxZzFdoS5w6+C1cJtQWEARL/Tv0B+uP1ZPLJ767tGuxw67/rcexo7Vzv6vHY86n13Pfh+OT4f/oF/dT89PsI/7wD+gO2ASoDvgZHBsoDcAXkCDUJWQlXDJMOeA5ZD+IRCxN2EhUS0hH+D54MIQkCBuQCpP+q/Mr5U/YQ8xHxN+9I7G/qYOtD7GXrpOuk7tHxEPS99oT57Ppx+1n8L/3H/CL8y/2eAT4DQgHTANYD4wS+Ak0D/wZkCD4IpwqODVIOVw9AEm0U1BQhFYYVWRR4EZoO6AtmCBQEjQDJ/WH6QvYL8wvxGu857ZbsEu0G7Yrsku1Z8Ovy5fQ19475DPvw++P8gf0Z/X788f19ADoABv5M/7sCAwJU/4MBBAaWBiAGNAmPDFwNug4AEg8UTRQHFaEVexMoD20LmwjDBBkA2PzX+q/3zfOn8YnwfO607K3s3Ow77ITso+6g8ar03vcj+739yP73/qX/8/9D/hr9Gf8xAfX/G/6H/w4C5QHTAG0CIgVWBpUHiApODUsPSRK8FQMXDxfPF3AXdBS8EL0N2AnZBMD/W/uR97vz+O9D7WXrTOlM6Ajpjuma6aPrVe8y8uz09fif/Dr+CP9xAKABHgG6/+3/2wExArn/ev67APIBjf+0/hwC2QQqBQ0Hvwp3DeQPKRNQFSQWfhcpGAMWhBKkD6QMhgjCA/3+2frG93D06O+F7KrrAeuL6Y7pLOt67Bzu+PBl9MH3BfvS/eX/FAF+ARoCgwJyAcEAGwLaAUj+A/2y/xYAYP20/X0BlwN8BGoHEgtdDdgPJxMTFYcVcxbQFhYUDxCBDbkKjwUqAGD8pvhe9J/wye2Q6yfqeuly6WzqH+wt7sfwpPOC9tj5P/2W/0gB9QIOBIoEWQTbArUBSQKVAdD9Bvw3/pr+e/yv/cEBygM1BRkJ/QzdDrwRsRU4F6MWXRcdGBUVPxD5DNUJFAUoAMf7I/fZ8uPvOu0v6kjosuhJ6tHrb+3B78jy8vWu+BH7nf0uAM0BgwJOA+wDPQPlAX8B+ABb/or73fty/Wr8SfsR/scBZgO+BbsJpAweD/oSgxVeFeAVPRepFWcRXg43DFMIeANA//X6SvZv8l/vVuwh6qTp6Omu6vjsbfA283j1xPiO/N7+PwDnAdwCeQJkAsgCXwFW/z7/IP8//LD5mvrQ+9/6Y/vv/q0BdAN4B1YMAg9PEf8UjxemF7AX7BfJFbkRVw72Ct4FpwDd/Bb5TPQ68OPtD+xN6mjpsekz6/XtyfD68rH1l/lH/cv/cQGvAooD0QNmA0cClQAh/8j+Gf4E+4f4Pvpl/EH7MfsA/4gCsQRxCKwMVw9DEvsVJxiCGMMYeBgcFkcSaA5yCrIFlgAT/D/4ZvSj8NvtH+yC6ovpY+pw7CjuAvAB89D2fPqY/SQAJQJaA38DRAPRAjUBDP+P/rP+F/yX+A35SPtU+v/4zPtm/0cBxASACXAMMA+aE7sWgRdNGP4YdRdRFBIRVw3cCHEEAgBC+0T39POO8MjtEuyX6szpNeuG7fju2/Ca9N74JPzc/sgB7wMQBCYD8gK/AqUADf6u/eD9fPs0+FL4O/rZ+XH5QPxR/yMBNQXFCu8NEBDyE60X1BjsGO4YNRf3E8MQFA1RCJ4Djf+r+7n3vvNo8FvuCe1U63bqC+y17jTwxfE29UP5Xvw5/woC6ANVBNQDGgPcAc//r/3h/G38OPpu92T38fhf+Ib3cPoG/5wBdgR2Cd8N1BBzFOAXRBmpGREa8xiiFYUReA0tCUsETv8J+6H3rfOV7zztYOxV66zqyOup7S3vB/E69PT3Sftb/oEB5wOnBDcEggOLAoQA1/03/On7tPqv9wH2V/eC+Fb4Avrw/VkBewQICYYNbRB0EyEXaxn4GQUaABktFpUSmA7hCdUE3/8z+zz3xfNq8Ojtq+xr63bqcevA7VrvEfEu9Nn3+/rX/a4A9gIOBNUD/QLuAZwAoP6g/F77SPpf+J/2B/eB+Bf5r/qd/mUCYwWZCTwOeBE7FGAXsxlwGloaXBnJFkMTgw8VCycGSgFu/Nn3DfS38K3ttuvR6unpzenV69Hup/Bn8s313fnW/Ej/5AFTAzsDwQJUAsgAE/5A/Jv72fmV9pH1lveX+Hr4B/t4/zQCGwVbCu8OYxFoFPEXexmrGRQaQRnYFfER4Q6bCtkEIAB6/Ln3CPNR8AXuNuua6dnpa+qT69LtUPDD8hr2Dfpa/dL/LQIBBEoEdQPEAocB5/4E/VP8Q/rh9p325fgx+WL4QfspAPEC2AXlCigPqBEgFYwYihm4GV4aEBkFFT4RSA6eCeQDof/S+7v2BvJA7+zsZupJ6X/p/ulw62zuifEA9Pv29vqX/icBAgNTBKQEJgRKA18BVf4q/Pj6j/jG9Rz2VfiZ+An5pfyVAOUCggaqC1cP0hFVFUsYMRmXGfsZbxgjFSYS6A4cCtUESwB7+yr21/Hf7sTr/ugC6KTouumK6y/uEfFG9Cf4kvsT/poA9wIFBPED3AMJA+AAiv4M/d/6c/d09Xn2xfeC9/n4VP3SACgDvQfpDMkPjBJVFj0YVhhLGbwZFheTE4ERog5dCT0EhQAB/ED2tPHG7pPrfehq59vnoeht6u7tb/FR9Az4m/zd/5cBagMSBW8F2wQLBDcCrP+f/Xn7VvgN9pr2/Pcz+Nb5Qf7gAR4EXAi+De0QNBNzFpAYuRg6GZoZTxflE9wR+w6dCT4EZgDW++H1CfHs7aXqiec95pDmXec66WHss+/W8qH22foj/oQAvwKqBGEFYAXBBLwCHgCB/tf8jfk/9y742/n2+UH7Bf9gAgUFVwnPDTkQ/BLzFoQYvhdtGFUZtRbDEqgQ4w2sCJ0Dxf/n+vL0ePBl7eTpoubs5eTmiuf66LzszPCY8yj32PsO/9cAPgNYBagFSQXSBDoD4wDw/rL87vnq+Of5MfoQ+p/8wQBOA+QFUQo3DpsQpROHFgEXkBZVFxgX2BOGEBgPSAwKB14C7v5o+h/1DvEO7q7q3+cZ55XnNuhO6iruYvE89MP4YP2M/z0BCgSKBVUFXQXRBIICfABY/+/81flD+bX6DvtK+xz+jgE/A5wF+gkoDegOKhKiFZIW/hZeGN0XQhRHEbAPmgstBbIAk/21+HPzKfBe7enpDehq6KTou+gg6xfv5PFK9Ir4U/0nAAkCZgQwBrMGdwYXBcsCNAGf/zf8Mvmt+TT7vfq4+tf9BwFrAu4ExQiOCwAOpRE8FOQUUxYLGIcWABNEEakP0gr0BDoBsf04+B7zKPBm7SvqzuhP6ZPpQOoQ7YrwsfIs9Xn5Zf2x/+ABdgTcBQ0GxgVyBCECHQAz/kr7Yfn5+Zj67PlI+wf/SQF7ApQFhwlcDBIPDBILFGgVQRcYGGoWNxQCE0IQ0Aq2BVECJ/56+OzzPPFZ7jrrs+m96QfqL+uL7Znvd/EA9TX5wvur/f4A5gPIBAgF9wRoAzwBFAA6/nD6CPil+Zv7vPp5+gz+VgGhApEFcwmrC2AOdBK2FIoVmBcUGScXdRQzEzkQOwreBKABk/0S+OHzaPGr7r7reOpb6i/q2Ool7VfvovAH81H31vrL/Eb/KAL3A58EqAR1A5UBMgCT/q37dflT+of85/w1/U8AhwOaBJoGfwr0DEkOMBFkFKoVdxZ2F4oW6RO8EfYO6wmGBJIAYPwj9z/zQfHW7vfrx+pS66br9Ots7TXvjfDE8iX24/gt+yH+LAEeA0UEtATKA3cC1gFwAEr9Tvvg/Cr/+/4X/1kCmwWaBgsIlAohDNwNDhEXE4YTIxW+FiYVQBJeECgNfgd0Alr/mPtn9qry8/DE7qzrfOo+62PrbOs07WTvOvDa8ZL1A/k0+/D9cgEiBJsFWQa6BcAD+gGuAG7+vvzN/XX/J//2/2QDEgXaBHkGCwlFChYMFA/9EFUSthT5FT8UEhKwEBkNNgdTAkP/Tfs+9tPyLPEr79fs9+vg67rrU+zu7dnuK+9r8U71QPhx+jP+dAJBBSQHPQhuB6QFhgSxAq//gv42AAEBYgCzAbsDuAOyAz8FGQZvBjEJLg0XD7MQzhNCFTMT6xCRD9gL6AWWAeX+Evuk9uHz6vFE7xftVOyz6xnr7uus7afuhu8n8vf14/hm+wT/8AIABh4I5wgKCPUGKQX6Abb/NAAgAXIAuACOAhgDLwJ1AosDMwTcBS4JMAziDnkSPhXSFN0SsRF9D1AKyATQAYL/dPsd95j0l/Ky7z7ty+uf6j/qg+sR7c7tZe/98pH2D/kF/EQA8ANUBnsIiglVCFsGxwQTAiH/Ev+oAEwA6v/5ARID8wHMAeECeQPyBEEIzwucDpkR+xOuE9YRhhCNDjwK1QViAx8BWP0r+fH1/PK37/zss+tp68vr++yQ7tHvffFO9FH3oPlc/Pn/RQMQBlcI9Qj6B0kHzgXZAf3+WAD/ATwACP8DATgCFwHYAHMBhgGSA30IwgskDVMQShMyEtsOBA02C8QHwwR1A9sB0v7h+z/5cPUG8XTuoO0Y7V7tHu+t8EHxxvJm9Qj3VPhh+2j/kQJLBbIHRAguB+QFvQMpAC/+8v/oAeMA9f9xAdYB4v9h/8IAmgGFAwIIAwwCDk4QVBIbESIOnwxOC8QIQwfSBj4E+v+C/C/5m/Sx8O3uye6m7xbxTvLX8v7yX/MS9Mf04fXq+HD9WAE1BFcGdgbrBCQEwgJ1/gv8TP98AvoAhP8XAXwBXf+x/lMADAKJBO8I+wwZD1gQpxCiDk0LaAkxCSUJTQkHCewG9gMsASv9Mvic9O/yYvKb8vDyIPNA8y3z0vK+8pnz1vXE+Df7XP3E/6MB9AGEAfsB7AHd/of8jv/fAg0BWP9XAdMBp/8RAOwC3gN8BBgIOgx5DVgNrg3ODHIKawk/Cr8K6wotC9cJ9gXAAfD+PfwM+bH2R/YY91/3p/Xq8ljxJvEq8eTxDfS49sv4kfo2/L/8h/zi/ZEAiwB7/T79+ADiAcb+Ff41AFwADQBcAn4ECgWABtwIZwocCzkLsgoNCosJ7QjNCJUJegkWCAEHzgUqAzgAqv6h/UD8ZPuy+4r7pvlV90H20PVN9Ur1CfYH9/r34vi4+QX6a/ox/Bv+sP3e/NH+8gD0/xn+wf5lAJMAxwBnAoID0wMZBXYGwAZmB3oIdwgMCCUIRQg0CEIIGQhMBxwGEgXEAzICVAHXAO7/V/9a/2n+FPw3+l/5f/h19+b2rvZX9mD2G/fP9z349PiR+ln8svwL/Nr89/6e/0L/oQC2AmoD3AMdBY0FygTbBPsFcAZhBvIGhgdZB80GDQYpBc0EeQQnA2EBBwAL/xX+F/0I/LX7gvxF/QH9HPxQ+x77IPuP+qD5XfmM+b/5bfoT+y77Evxe/uP/gv8U/1UADgIQAmsBlgLLBIQFOwVuBaAFEAWwBAsFOQVZBfcFPAZMBSYE7gMjBNMDKAPAAlICUwHA/3r+yP0W/Ur8SPwX/TP9JvxS+yj7qfrH+UP5zvgi+NT3PviT+Jr4K/mg+rL7hPvJ+9H9l/+l/zIATgJ/A2IDSQSjBcQFsQWPBlwHgAfRByoIwwcPB6IGUQYIBuAFnAXoBAYEJQMuAuAAp/8g/wn/E/9g/2L/nv7h/a79J/0l/GP74voc+lD50vhv+NL3fPcv+BP5xfhe+Oz5Dvxd/FL8Bv7T/1cAQQG8AkADjgMPBYMGAgfWBwgJGgl2CDUIvAfzBrgGpQaeBUsEigNzAqcAJf83/mL93Pyu/D/8Yfvf+t/65Pq0+qH6q/qZ+oz6ovp3+uX5rflL+uD6qvqr+uf7OP2F/dT9t/5J/4j/fwDUAZACXAMPBcEGuQeXCE4JYwkcCb8ICgiQB8AHuAe2Bo8F3QTSA+gB5P97/o/9AP2Z/Kn7lvpj+tb6wPpI+kL6bfo5+jL6ifp4+iH6xfot/KD8P/zK/EL+zf5g/q7+kP/u/20ApwFlAmAC+wJjBCsFWgXcBWMGegZXBiQGzQW/BRIGAgZTBccEaQRLA5UBGwAQ/wb+B/0u/HT7ufpG+lD6T/ra+YX5gvl1+Y35AvpO+o/6s/t1/U7+Tv4i/9MAiQERASwB+AEmAiYC0QJPAygDbAM5BIMEeATMBPcEmwQmBMYDVgM7A30DaQOrAggCzQFaASIA0/4i/rr9HP1T/K37EPun+qX6nPpH+vz5D/oz+mf6tfrc+gP75/tO/TT+k/6W/zgBGAIjAoQCQgOVA8MDRASJBGwEowRGBXYFWAWXBbIFJAV7BPoDVwP/AhoDvwLhAXwBbgGoAIj/BP/c/m/+A/6s/QT9SfwK/Of7g/s9+0X7JPsJ+2b7qPtN+x37sfsg/P/7aPy//db+F/+E/3AA9AAbAbQBQQJgAtcCAATIBAgFlQUtBhYGrwV2BREFagQfBB8EuwMGA08CkwGXAJb/0f4t/rH9hP1m/eL8SPwF/Pj7pfsx+wD7GPtQ+5f7qPuF+5P75/sO/O77DPz0/Db+F/+U/0sAEgFYAZsBSALLAikD9gPmBHIF5gVYBk4G8gXABXEFwgQ/BDgE7QP8Av8BOAFXAFL/Tf52/dX8c/wq/Nn7Y/sF++j68/rX+pf6gPrj+qL7Pfxo/JP8Ov3s/Qb+DP7Y/hQA3QA+AbcBUgKyAtkCDANWA6sDLAStBAkFWQWiBZoFSgUABagE/gNhAyYD3ALzAcwADwCJ/53+hv3Q/IH8OPzw+8D7g/sz+yD7XvuF+2T7bvvt+5r8D/1c/aj9I/6x/ub+xv4+/1kAIAFJAbEBTAJ8AowC2ALyAugCSgPXA/gD/AM/BEIExgNYAwYDbwLZAaIBWQGZAMv/Rf+4/uT9Cv2k/HD8LPwA/Pz79/vm+/n7Pvx2/Jr8zPxN/fb9gv7S/iD/hv/C/7b/0f9ZAPMAOwFjAb0BFwJEAl4CfgKpAvQCWwOZA80DCQQpBPoDqANoAw4DbwLfAZoBYgHLAAAATv+//gv+SP22/GP8Nfwe/AL87/v7+y78ZfyW/MT8Cf1q/fP9a/6//gz/d//O/8r/o//X/2IAxQDJANUAEgFWAYkBvwH8AVACxQJIA54D6gM8BFAEIATvA7cDSAOqAi8CwgEhAUQAgP/P/hD+Vf3S/Gr8Gfz8+wr8A/wC/DP8lPzz/Dn9ev3l/XP+DP9y/7L///9cAIEAWgAoAEkApgDVAMsA0AAVAWkBsAHoAR0CeALXAhUDKgNTA3ADWgMZA94ClgIpAp4BHwGvACsAcf+k/u39Vv3H/Er88fva++37BPwJ/CL8a/zL/Cj9b/2s/RT+tv5I/5X/1v89ALgA6gDNAMIAFwGKAbcBowG4AQUCUAJ7AqAC2gIZAz4DWQNiA2QDVwMmA8sCZQL5AYAB8ABdANX/Q/+a/uz9S/3Y/IX8Pvz3++L7CfxD/F38a/yh/BT9i/3k/Tj+u/5P/8n/FwBYALwAGwEzASMBRgG1AQ0CKAIpAkcChwLDAuoCGANGA2EDVgM6AxMD2gJ6Ag0CswFbAd8ASgDK/2r///5p/r39OP3j/Kb8Yfwp/CX8XPyO/Ir8dfyZ/PL8Vv2W/c79Lf65/kf/lP+1//v/cgC8ALoAywBJAfABOgIpAh4CWAKlAr4CsALQAhcDUQM+AwUDxAJ6AhYCmQElAccAZQASALv/TP++/ir+pP1D/fb8v/yX/IX8n/zi/Cn9W/2E/b39Df57/vr+bf+7////VQCnANwAFQFiAZ0BvQHwASUCPwJQAnMCkAKhAqMCwwLzAgwDCgOJAwgE4gJ1AFv/JQGKBFcFHwIBALMAPgBx/mn9YP0G/lH9J/y5+8D7Dvy9+Sn6avxj/JP8L/z0+5H8lPwV/QD+kv7f/mn+u/84AGz/fQB8AeUBVgLAASkC5QSIBAYDZQN9ArYBqAJ2Agn/YP7UBOoJxwYQAqEBxALnAQYA9wBAAQsAs/6F+5/6Yfol+1T8yfgA+e/78/kE+1783fkg/OX9ev49AD//mQBlAxQEdwONAjUEMQXeBHAFWAXCA44DggUUBC0EswS+A0YErAIeAQQCOwKTAET/Cv+t/23/g/3I/J/9w/zH+w/8BPxL+xT7kfz5/O37Avzp/GD9wf0L/13/5f7C/xEA0wDXAFAA2gFyAjcBZgFFAqMCowI8AhMC7v/7AbMDiQFrAQ4CjgEZAKUAaQARAiMCGgBkANsAnQCu/6z/d//r/sn95v3y/cf85fz3/In8tftK/K780Ppl/Kj9h/3f/Zb9C/8Y/xEARgHoAF4C6wIMA50DYANbAwwDYwN/AsMBfAJuAqYCHwLpAbUBFwIGAq8APgARASYBsv/j/64AggDD/zL/Iv+M/2H/lP4n/vr9d/0Q/fT8mvxe/Nz7Y/zr/Hv8H/1x/+//kv7D/2oAMwDuAIYBvQJLAwYDwQNoAzgCcQIzAlYBegA1AbwA5P+F/8H/xgEoAPf/NgAkAMYA4f9yAFYBGwPuAj8BQf9Z/uYA//9k/nn/BgBg/lb85Py3/ar91P3x/af9N/13/DT9Xf4C//X/KwEdAZv/mQBBAeb/+gC4AuICJgK5AAkA4gB1Ac8B1AF3AecAsv8y/q79x/+zAXgCIQHaAAUClgCx/uz+xAHZAR8ARP/P/aP9Wv2x/Er9YP68/v39cv1/+1v7cv++/3P/hP/D/tX+m/50ADQBNwK1AkgCxgJ4AcEAEgLaAU0BUAJTApgAPgBRARkAUwBSAUABjwCMAIoBUQFuAPf+TwDNAMD/4v9oAL4Ar/5m/pL/uP++/6v+7f4J/uv8jv4W/7P+M/1W/j3/C/78/qH/uwA5AGEADAIfAXsAmwCTAfYAxgB8AqICRgJ8AdkCSAJPAKgAkAADAboA3/8I/yj/6P9g/mX+gv+X/9T/EgCd/xf+tf65/yT/j/4Z/xIAZ/9j/nf+J/9A/yz/pf/x/nD9a/52/x3+Mv6QANoBYwBt/5EAwQAPAEgAPwH5AJoAqwC1/wv/4/9GAXsAvf8NAAsAsf+r/s//uAFfAcf/cf/u/1D/yP5V/wIAgQBZAKP/jv8A/+H+/v/1/5z/SAD6/8P/BgBAAAIBXgHNAC4A4gCqAL0A4gHQAZgBCgF5AAgBBAGYAA8BtwHQAHH/3v/B/6T/IQANAFcApgCNALr/ef6S/mH/af/M/hv+Xv4p/ib+Pv7f/Y7+p/4V/sj8n/wE/kL+tP2D/RD/xv9m/xgATwD0AAEC/QEkAZ0B/QL5AnwCSwJhA7MDfwIUA+MDyQOkA/ACKgKAAeQB3wG6AZQB7wClANb/V/8G/8v/MwAl/3P+DP5G/jH9jfx+/U/9Sfzr+gT6OvmW+Iz43vik+UT55vic+b35qPly+in9nf+0/x7/rP7u/ycBHAJiBI0G6Af3BnYG1gZABjcH3ghGCu4K1Ak7CI8GGwSzApQC4QGOARQBdf9G/kj9tPwM/V79r/3C/T7+Pv6j/Q3+Dv93ABgBEAENAXgAowDl/+T+LwAbAPL+lf7F/bf8M/ul+bz4k/cQ90H3BPft90P4sPdh+Hj4ifm5/B//DgCoAEcCdATABJIE9wb+Cf0KCwrJCJUIIAhmBuAFowZvBWADXQHr/gH+F/5d/Zb9uP67/Vz8Qfy2/Kn9Mv/sAUUDrQM2BHIEGAU1BYQFmga0BzQHAAYhBX0EXgOiAcQBDQEB/7T9JPvZ+Ej3rfWR9dT05vKi8tHzi/Tk8mDyxfUT91r2RPiP+zn+4/+XAS0DZgPaA1AF5weVCncL8wqRCr8JWgeHBi8HzAY0BcwDnwLXAOn+Bf3f/GX9if02/YT83vwf/b38e/0m/98A6wEZAxAEtgTvBXAGIgcFB3AGkAavBvoFxQSWBJMEUQMcAdv+nvx5+s348vfj93733fSZ8Qbx/fL89C31n/V694r40PdH+E77Mv4uASMEYAXFBWwG1gcQCeAJjAvCDKAMZAsbCcUHEwdnBRID0QF3AZb/qf0F/KT63vkt+Wn4Efg2+Sb6ovpj+zX8vv2//zABQQJFA6gE0QUnBsoGWQfqB6sIqwgnCLEHWQfwBSUEsQJyAWkAz/6d/P76GPoA+TT3ePR68kHyGPO389nzX/Xk9tT2+PY5+Kj6pP0FAR0EfAWZBQUGMgeHB9QHWgldCj4J7QdQB7QFyQPfAmQCnQFyAAP+ZfwF/PX6jvlv+eP6CPuL+oP7xfxL/UL+wP/qAFkC3wMDBWUGZQdUB98HpAkbChoJXAm3CUAI0gbhBcsDSAIUArwADf9z/vn79/c+99711/EL8gj1XvSp83D1tfRR87j0Cfa197v7w/6l/5gCpgVxBE0D+QRhBsAHGgoJC5YKOArxCMQGSQU/BD4DEQPeAkABcv9+/sH8ZvqC+Yj5c/mL+RT6jvsl/PH7xvwT/gT/MgBYAsYEcAZnB2UI6ghACMwHHAgGCGIHiwbSBQYGQQYJBB4BnwCYANj9xvl6+S38D/yi+Hj2HvbU9IPz8/Ml9Zn1CvaY98z5W/r6+MX5p/2m/+3+1wDABLUFQgQ1BIQFLQYPBoMGRwf9Bg8G/gQ6A/cArf+5/woANv+U/d38GP1j/Fr77/vz/Cb9Qf06/gIAqwE+AlkCkwNNBZEFRAUABmsHMAj+B6wHmQcFByYGzAVhBZMEdARoBLgC1v+p/WT+JQDc/Vb6APxj/h377PaF9dz0Bfar+Bb5P/i5+Mv4Evj+97f43/q5/UX+6vzK/an/t/6b/aT/cALjA9MDYALUAUMDsAP0Au4DBgWaA9wBdAElAS4B3QEmAjMCIgIbAcf/Gf8t/xgAXwG7AQUB8wD0AUcCjQGWASMDDAUyBSIEKQQrBe8FQwYpBhoGNgbSBeYE7QMCAzQC1gFCAYL/8v2m/VH9wfuk+Uf46fhJ+oj50/eV+BX6N/i+9G30z/ZI+PL3SveY9yX4PPci9qH3jfpf/Gr9cv6Q/r7+FQEPBDwFiAXqBvAItAlCCEEGCga5Bt0F7AMmA7kDowM3AisBAAGbAD4A3AD3AZQCrAIpA3IEFQUDBfkFVAeNB1sHWgeVBwoIbQd6BaUE2gRNBIYDfgMfA9oBGAF5AIj+gvxq/Db9xvzy+iv51PgT+Tb3L/Rp9Kf2MfYh9HzzE/MP8h3ySvNq9HX1NfaH9l73fvgX+bD6dv2D/gP+af/LAYYC0ALaA/wEnwW8BWUFugUYB9gHtwclCFQIIQcmBnEG2wa1BvgGqweNB+AGhQZlBlEGEAa8BckFGQYQBjIG6AbUBmoFKQRZBCUF/wQyBAQE2gNaAlkAaP8y/9T+N/6Y/Q/9JPyt+tv5E/oL+nH5Avlv+Ej3vvbf9m32EvZh9g327fQS9GvzTvPV87XzavMI9PbzYvMF9Xj34ffp93H5bftO/dr+5P8WAmEFWAfHB7YILAp0CzMNlg6HDkUOhg6EDkMOdA3iCxELLguZCnIJyghKCLgHAwfqBQAFdQSjAzEDtwNlA9oBQwGMATABBgFUAc8Awv8h/xz/tf/h/4v+ov1s/sr+zf3K/EL8Lfw6/E37pfmy+FX4yPfD9hn1OfNw8ozyc/L28ebwZ+9H73fwkfBe8O/xkPP38zL1X/fv+IH6CP2G/1AB5gLGBE8H2QkzCyoM/A0/D0UP2A/3EKAQjg9HDxQPOA4GDbIL5gp7CkAJswf7BlYGVQXiBMoEVgTZA3QDTgOiA3cDUAIJAioDrQMBA2gCzAHuAJwAhADF//b+cf7B/dT8q/tW+qX5vPlw+Wj4cffB9hr2jfUh9Z30EvSX8wDzgPIl8qjxXPG78THyJvK78nP07/VS9rn2EPgQ+uD7Dv03/hMAEAJEA+0D9gSYBnMI6QmzChkLiQsODLsMRg1zDZkN7w3ZDUUNAQ0pDQwNpQxuDC4MiQu0Cu8JiAl/CR8JEgjcBqsFQwTzAu0B8gD+//f+qP0f/ML6yvlU+RH5y/iO+GP4Zfii+N74P/nM+eL5Yvk5+Zz51/mq+Wb5+PhY+Mv3c/c89zD3GPex9iL2w/WU9VD1JfWo9a/2Vvdk97v33PhJ+i/7y/sq/WD/MwFgAsADewX2BlQIvgkpC5oM9A3gDpoPOxCaEM8QEhE1EQARuBBOEDIPgw0xDHYLagrUCE0H/wWGBOUCQgH1/y//kf6d/Yz8nfvX+k36E/q0+V75kfm6+S75k/il+CL5evmW+ZX53vlP+j36j/kx+WP5MflD+G73U/d19yj3WvaO9TX1JPUW9Qz1VfXf9aT2ZPfp93n4q/lt+//8CP74/moAcAJSBJMFpQYXCGkJWQpUC4QMaA33DVkOkw6kDnAOJw4eDggOiA3ODCEMSgt3Ct8JOAkiCOYGyQXWBPsDBAPnAQ0BYgB8/2D+i/3+/HP84fuG+1D75Poq+qH5qfnx+dv5VPn5+Pr4n/jI90v3U/cJ93D2OPYz9ur1xfUi9nf2fPah9v32a/fj94n4Ufn3+XP62fqN+6f8qf3//TT+Df80AK0AqAB0ASsDhQTHBAwFOga+B58IIgnNCY8KKAu3CzUMdwyBDKEMyAyRDOsLZAtbC0ULTwrxCCgI4wclB80FzgRHBEoD3gHvAGoAaP8y/mf9ufxq+8/58vjF+FH4Sfdu9lj2dPYR9pD1vfVi9sv2+PYy95n3FviW+Pv4VfmY+b758/lI+nf6dvq5+jL7dPta+1X7tPsE/N77pfv/+7z8S/3D/WH+Ev+s/2UAYgGJAngDZgSgBdgGkAc2CE4JagoRC44LUAwIDSUN3wzMDN8MvQxSDMsL+QrhCckIswdpBjkFdQSsA2QC7wDq/yz/a/6h/Rb9p/wQ/HP7//pt+r/5d/l5+R35Yfjb96f3cvdp93n3Vfcj9zL3T/cN9/j2X/fL9/z3PPh2+Jf49PiC+dj5Cfp3+gH7cfvM+y38h/z9/KH9QP67/kb/6P+ZAGIBVQJXA1IEHAW6BYUGqgfDCFsJqwkjCsEKCAvtCtsKDws3C88KJgqyCS0JVQiwB04HrQbLBesEGQRHA2ICqQE9AZ4AgP9f/oz95PyM/En8qvvr+lv6+Pm++a75ovlv+d34MfgN+Br4rfdl97D3rvcB94H2tvZj99z3xfdw94X3TPhN+cn53/lG+ur6Yvv8+/z8uP14/S/9Lv57/9f/EgCxABgBUAEiAiwDGwQyBWoGNgduB9wH1AjSCZYK8ArTCt8KNwt3CzYLngpPCvsJIwn8BwMHWQZoBSgEMAO2Aj0CdAGhALn/4P50/mX+Cf5X/en8ePxq+zv63Pko+k/6z/mq+Lf3lvf+90z4Q/g0+FH4yPfT9tX2offb93z3IvfX9mj2PPbE9i73C/do93b4mPi19+v3SPlq+n37qPyj/ZD+Zv9tAMoBbANABTQHeQguCTIKYwtpDIcNTQ6fDu0OGA+2DiMOMw4KDk0MeQpjCq0KbwltBwYGBAUXBFYDpAK0AY8Adf9o/q39mP3o/bT9sPzL+8/7/Pu6+6f78/vp+xf7HfrB+Z/59/ge+JL3Sfev9nb1mfRC9LzzLvMU83byC/Fl8ATxvfFE8izzTfSy9Pn0W/Yp+Tf8B/7U/o0AlwOIBsYISQo5C+kMNw+xEG4R6xHkEa0RSxFoEFwPcg6KDXgM8gooCbkHywb3BUQFngSBA1ACMALoAhcDYAK+AdABSwK/AtYCCgIkAYEBbgIoAhUBXgDN/+H++/2H/e78lPv6+f/4bPhA9+P1ovTr8ifxTfDK7+/uE+4t7bXrZOqU6oXsJe7T7bzt2O+58uj0xfY7+Vn8Sv93AcEDlQZwCbYLDw16DvsQxBKoEmYS3hLbEj0SjxGIEDMPNQ5LDdILrwpSClwJiQeEBu8GIgdyBucF/wVDBkgGCAYCBoYGFwdtBu8EbgTIBPYEbAT1AlkBRQBQ/wP+6Py8+xv6ePgJ95X1L/TJ8hHxN+/07Xbt2uyx61Lre+vO6Yfndunn7XTueeyQ7XXwcfIn9fz46/v2/SgAtQKpBXcJ/gwbDiYOyw84EkIT2xKzEc8QshCkEBgQbQ4MDKsKSgpbCXoIHAiWBiMEnANYBToGMAUcBG4ENAWmBVcGiAdkCDkIrgePB0EIfQnjCV4IaAaFBRoF8wMiAjYAnv7J/EL6x/cM9mr0FfKk79DtWOy/6u3oqecP6L7nruMT4TPlyekD6XrnJemU6/ztWvIy97f5ifu//9gE+AjoDEcQpRGEElwVwRiXGdAXSBa6FTkVtBS1Ex8Rxg2jC3AKXgnNCJgH2AT5AhsEKQbSBc4DlAN9BSoGLAVkBSoH5gdpBx4HSwf3B+IIwAhDB+wFlAVSBSgEUwLvAF7/k/zY+T74ZPau8yXx7u7H7MTqYuh35tjmKOa04OTdhuNx6D/mMuSH5nnp/+tj8HT1svdy+Kr8qQOZCCkLtw3DDzURwxTrGOwYxxXzFA8WDhVQEnsQ8w5PDIgJfAeGBssGZwZ+AwYB0QKVBuQGcwSHBLgHdwlgCIcI9goGDCAL9ArMC0gMZQzLC0sKNgncCNwHvAVsA80BAgAl/c75PPcy9WDyQe+87cnsruna5UTl2ua25MfehN3a4zHoB+V+4k3mfur664PwpfZ89xL4af8IB4UJmwtYDvkPJBOzF6sYDxZvFC4VkhXyEjIPfA0vDEsJxAZsBbcDzwLYAiUBP/8vAUkF4QX1AxAFFwmwCssJRQsoDmUOcg0iDosPHxDID2YO5wwfDFYL7gnnB/ME4gFc/2f8Rvm79lfzfu4V6wvq9ujT5abhX+As4uPfh9po3e7kdORa4FLjluj96tnuTvRg90X5fv0oBNYJkQynDs4QbhJ0Fc4ZuRk3FQITshSoFF0QDwz2CT4H4QNXAlIBRv8g/q/9+fsc/B4AVQMHAnIBTgUxCYQJxQnQDNcPgBCyEDcSqRN5FKEUhhNyEUYQ6w+KDvcLMQmGBh4D9P4p+zf4sPQU75fp5ufT5/Dj9t7X3gjfy9ns1jjdRuLY3kLdhOKz5j/pke628xL2bPnu/8AG2wqQDXMRxRMrFN8XGx3sGqAUUxQQF40UkA55C+AJTwaUArEAfP/5/aH8UvpC+Mz68f8aAKH8Y/7RBHgHNQaPB1oLTQ02DpgQXBKrEg4ULhVgE8ER5hJVE4AQnwxFCrEIsgWgAJT75/fD85ju4OrF6BDl/d/V3vnfC9un1FXYZuCa39fZFtyU4xjnH+pC8Dj0W/bl/O0ERwlyDGURmhQMFEkW9By1HvUX4RO6FkMXDhIqDagKmwcjBMUBVP+J/Uz9HPtF9if2uPyp//j67PgL/0kFxwWaBW4IkQvHDSgQ+BGpEj8UcBbZFRoTLRMUFTMTsg4ZDD0LeghLA7P98vl59g3x1+wO697myeAe4B3jKN/F1fnUbN5646HdS9o84Ybn9Og/7eLyIPWW+UICMgejCAIO4xS0FfUTARhzHWEbNRUnFJYWzRQtDz0KzgavBCMDY/9++m/5i/pK+Cr1R/fU+777HPlr+9MBEgW0BIYGaAvlDgMQVhF1EzoVTxbEFgEWkxQfFMITNBGCDRELRgnEBX8Ae/vD97nzsu5r67Tpy+T83kPgM+Sy3g3V4tfi4q/kdt4v3ljkDepW8Pv1//Wb9on/mQm4DEsNQhF5FT4WdBfjG/McCBdYErwTpBSrECoLrwX8ABgAkQBD/En2qPXw9v70nfTp+GD69/bC9xv/lgQUBDwD6QZMDVYRghHoEMYSthbcGEIX1hQ6FaAWShQ4D18M8gu7CU0EUP6V+hb4Q/NX7dTqaumh423fP+NG5NLah9W93m/mi+FN3WviK+kQ7v/zSffG9vL6iAUhDd0NUA9CFIYWORbFGWse4xoqEwcSORVpE2sNBQi8Ahj/j/+c/rz4wfTs9Zb1QvPf9aT6//he9bb5LALkBBMDQARACUkOARFzEV0RPxO/FtUXgRUmFBsWxRYkEnUMQgtsC0QHdwC/+435TPYa8AXrNeoK6EnhP9/8493hrNiP2NXiKuY537bet+jT7kLvtvMz+cL7TQLKCrMM7A1OFYMajRc6FlEcTCA1GhATABReFh8S3gqoBl8E1gHd/vn5bPaF9xn4f/O58OX1xPqu+Pr1OPpUATsEDQS7BeYJ8g3MENUR3RFDEx0WkxYeFEwT3RQwFPYOdwr6CT0JHARj/Tb62vjk9H7uhOpB6ULmY+Gk4JPjBuCh2I3bhuWI5QLei+D967/w9e8C9Or5Zv55BS4LiQvrDnYXGhqvFWoWcB0bH4gX6REdFV0XwhDOB88EIAZ0BeX/GPjl9Wz6APwn9kvyhPZh+yH7B/o0/aoB6APvBMQGyglYDaEPtQ8kEEASLxRrE2oRPBEVEksQfQtECPEHkgaVAaj7BvjJ9cLyru4M6+7niOSt4rnkwOXN3pLZfuGg6p7mUeBe5urv4vKz9E34IvviADsJpAvjCzoSIBh2Fl8UcRjVHKQaFBTGEHUT0BTQDi8GzwJ6BE0Eo/7d96X3zvu2+sf14PYL/OL78fhS+00BHQTRAssBIwV8CzMPRg1ICnMMwhH+EhoPbQztDW0PiAwqB+4EHgWYAl/9ifkV9z3zbu+a7bLr7ebB4lvky+eA5bned97Y5jvsFehl5aLsFPRs9Un39Psq/5kDfwrlDKYNfBOdFwMVwxRwGhMc0RZ7EoYSSxMWEVAMwAZrA+QDZgOk/rf6qvsk/Hr5TfnI/NT9Tvt2+7b/VAMwA8MB8gJaB48KAQrmCEoKywybDYgMbAvWCxMMCwr2BvsE7wNeAT/9vfq1+ef1DfCU7YftS+tX51bk0uNz5hHni+Gd39rms+s36Jvnw+509Pn1efgu/Pz/JwW4CS4Luw33EmEVYhQbFYgYzRn8FmgT/hK4ExwRWgwhCS4ILAcwBMX/HP6GAIoAyPvD+cz9ZgA0/n393v+EAaYBSAK2AyEGPwisBzoG2Ad6CxgMygmUB4sHvAigCCQG1AJqAc//hP2//ML7rPcM8yryYvLL78/rB+n458ro+OhP5YLiZuUU6fLnI+cy6yPvxfDG8l72efrQ/u4C6wT9BuoLshByEZYRcRQ8FnAVihTFFF4UpxFDDroMsQyPCw8ISwSOA2QF0AXaApz/aQBYA6sD9gGGAY0CmAM5BDoEGQXyBV0FxgT4BIYGdQilB5UEbwSGBpsF+wIbApQBIf90/QD9nft2+VL36fS18hnyMfAM7c7rh+vr6cvnCOf/5oLnKuha6GjpDuxg7gnw7PI591n6XPxl/yQDgAY6CekKuAwID/gP8w+gEJ4ROxEFD80MHQ3lDR8MTwlEB7gGKQjfCDUGsQPVBCkHEwd4BQoF/gUQB50HGgeFBn0HvQjXBw0H9geKCAUIMweZBloGgAYlBTgDkwJiAeH/hP9U/mL7fvlj+Dn2OvQs8iPvWO1U7bvrTuj65qnnfecT55nnCug66ffqBez47cfwxfIb9dL39/nJ/OL/KQLyBJIHXAibCdkMPQ7zDQYOtg0UDjsPsw5NDPAKeQvUC8IKMgnhB7AHkAhjCPoGVQYDB58HFAfWBn8IgQm5CMwHzAeXCUgLDQqzB8gH/whyCBMH+wWCBWoENgKrABcAHf8x/aP6dvjv98n2QPS08q7xt+9F7oXtGOyT62HrReps6uLrnesG66Tsbe5E7+nwS/KT8+b2QPkA+l/86P6YADgCowNGBd4GDAg8CAsIpgmtCtYJvAhHCJoJcwpdCd4H/gd3CfwJbglACQMKcwqVCh8LOgwCDUEN4AyRDMQN3w5jDogNDg0IDTsN+wuOCkoKDgk5B4cFQQQ2A7sAWP7n/Bb78/ja9t70ZPMk8tTvEe6d7ePsiOuN6trqu+uH6xLrX+wG7sLuR/Cx8dLyIfWP9/b4OPr++6X90v7z/1EBFQIHAvQB2QLUA3MDqQIlAlsCWgPDAxMDSQLuAl8EKwUUBb8FfAdcCM0IwgmLC3cNBw6nDYgORRBAEc4R4xGtEcQRRBI4EnMRJREMEL0O/w3sDAYLjQj5BmUFxgLy/7P9jfuf+ST3C/Rn8lfxre+j7bfsv+yq7BHsr+uL7IjtUO6g7yvx+fE583n1P/fN+Dj61vqe+zX9ZP7L/pr+J/60/rr/i/9s/gb+m/4Y/0//HP/w/pv/bQDlAFUBXQI+A8QDsQQBBo4HdgjlCNkJLQt5DKQNfg7cDm0PixA/EVURWRGjERoRKxDkD2QPQA5eDKcKRAmZB6MFBwQIAg7/Kf0S/GL6Evji9W30gPOx8obxgfBF8FTwTvCl8HTxE/Kd8mTzhvTx9UD3G/ht+Bf5TfpR+6X7j/uU+/37iPy0/KH8lvym/Nb8N/2R/aP9xv1Q/ub+VP/l/3sADgHpAcICfgN3BLAFfAYCB1cI8AmOCssK7AtCDbgNFg6pDhYPFQ/UDs8OkA70DdcMuAvhCsgJYwj7BlkFeQPaAXEAJ/+B/aP7JvpK+aD4nfdZ9nD1PPVo9W71JvXj9BX1hvUM9qj2+/YB9/72dfck+Jz4kPgM+MH3dfh7+Vv5j/jC+Gr5vfkm+rz64/rX+pr70/yJ/e/9kv4s/+b/DQFfAu0CEAP2A04FZAb3BngHJQgaCQwKigr/CpgLRQx/DKQMHQ1JDS0NsQwJDGAL4wosCukItweOBi0FpwOOAnoB8v+P/rn9Af3/+x/7XfrP+Z/5dvkg+aX4bPiD+KH4x/iQ+Br41ffx9zX4CPiY91L3Cff19mH3lfc499T29faF9xv4oPjJ+M74LPlA+nf78fv9+3j8l/2e/ln/DwACAeEBMAINA54E3wUwBiAG5QZNCD4JhwmgCaUJEgriCgcLvgqkCs0KiArMCf0JUQrcCDgH9gaxBh4GRwXHA0ICwQGOAYQAYf+o/tr9Bv3d/AD9ZPyK+yz77vrS+t76jvrm+R/55PgY+cj4OPis93/3rfeG9zf3X/eZ93D3ZvfQ92n4sPii+OP4XPnU+Xj68PpH+9T7fvx8/ZX+M/+8/3gAXwFtAlUDygP4A+sE+AUvBokGcQfdB+kHkwgjCSsJOQl7CbAJmglNCQ8J7AhlCKsHYQcDBxwGJgWxBCgESQO0AkECNQEeAOP/vP/z/hv+Y/1C/Xn9qvzJ+5r7fftE+5b6B/om+h76gPke+UL5iPkc+Zv44PgC+f34FvnM+LH4gfn++bD5hPk7+h37CPvo+oX71vxR/c38u/3v/k7/BQCYAEYBCwKCAkwDIASRBMsEWwUnBjAGnQZyBzoHTAfeB14IzwhRCKEHvAcKCKMHHgfIBjsG6AUJBakEmgQIA4YCGwLaAPYAnACx/9b+Gv7E/kf+Nf0E/c/8r/y/+4374vtC+z/7vfqD+qP6Afpz+hL6VPli+hH6Ivn++Qb6S/r9+Yf5/foL+5j68/pq+xj84vs//OP82vzk/Yb+df5F/+//oAAuATAB8AJZA/ICUwTEBFAF1wVSBvAGbgYeB/EH7AYTB54H+gbDBmsGTwZCBk4F/wTaBBoEBwRlA8cCowIuAswBIQEHAd8A+f/t/8X/9f6+/oj+Rv7N/W/9mv3g/FT8Kf11/If7Afzk+6v7NPs/+4b71/ra+iv7tPrf+tb6dfqX+nX6zfrH+nH6B/sF/Ij7sfu2/F78xf0t/ij+hv/O/yYAJAHLATECzgLkAmgDIAR1BPwEmQQdBfgF3ASuBaoGLgVwBYkF0QUPBiYE8wTTBJQDRgSIA+EC4gJ8Al8CigHwADMB9QA4AJr/AAAXAEf/sv7I/h//8/4i/u79ff5H/n/93P3N/ZT99PwP/QL9wvxf/Yf74Pzm/J372/yk+xL8BP2L+kP8lv1h+w79rfy7/KD9OPxW/QX/8vys/gwB8v0YADMCZQCCAiwC7AEWBM4ChwIYBAUE7QMeBK8DAQSTBBAEIwTJA9gDnwPMAwQEQgMqA54COQJFApMCmgEMAbQATQBnAOj/7P9h/zf/xf6j/8L+9v2X/+z+k/6U/rf+ZP/v/r39j/8j/zD9+f60/Sb9NP68+yT93f2Y+r/8XP39+4r9PP2w/Bv+M/0j/Vf+Ov6r/gL+X/4S/3X/mP70/qsAYQCnAJQAHgHNAWwCfwJJAooCZQO4AwcC/AJEA8gDvgPnAbED8QMJApsCAgLfAjUDzP/EAtUBMgDMArn/pgCmAZX/RQCLAIEARwBF/zkAFQH1/oT/FgBw/2r/hP4XAGj+8f0nAA79Hf6F/p78cv7s/Vr8Ov5i/jj8w/1y/Z79kf2k/Tj+cP2o/tz94v3W/uv+pf7j/vL+2ADv/53/fgEiAKEAzgAlATMBrwCNAS8BCQE3AUoBtwFAAZIB2QL/ARMC0AKsAYgCKQLyAZsCCgIpAq8BqgGzAckBsQEUAZkAiQHmANn/OQHzAL7/hAD/ADQALwGM/icA/P8W/kkB9v1m/ZEApv31/W7+d/zs/s78cP3Y/mn7rf4v/qz8Y/44/ZD/Bf5C/fr/ZP09/0z/zv7EADz+Uf9P/w8AOQGh/ssAiwAsAH4B7wAZASQBWwElAVIBaAGMAhcBOAJ+Amj/uQOaArL/kgOGAWQBEwP//zcCoAHBAIQBlACnAe0AkwBbAYkAOAAmAkwAeQBDAE8AZQGz/igAqv+f//7+8v4i/qX+8gDJ+xT/Lv+n/F3/j/2F/bT+wP5g/Vf+Cf/J/Wz/K/5M/sb/u/w3/yQAZv0ZABD/d/9o/67+BQGd/k3/xwAMAI8AxwAPAecA6ACMALIAEwEaAZoAHAFEAdb/FwK4Aon/IAEtAtEB9QHZ/0wBxgGfAYoBgf+TAVYBh//SAZn/yf8PAi3+xv+rABf/GQBLAEf+mP/rAev9LgDHAFf+FAFk/2T+KQB7/9D++f6R/8b+qP42/7r+/P6T/q/+SP96/rb+sf+3/t7/ZP55/5kA5f2D/9v/Q/8LAOsAjwBZ//EARAEiATMBeP8dAg4BYwGZAWb/DQLIAJcA+AEjAXgBtQE/AC8BbgIBAJcB/wC8/t8AuAGT/2b/Jv8W//wB+/4u/ngAAf7c/9L/aP1gANL/qf7l/+/9if+1AUf+oP7n/6P+7gFtAAv8uP/BAZv+G/+W/3b+t/8qAI79YP7p/0z+qP/u/qP96f8TAcH+Tf6KAIsBPwCx/jIA6wFcAhwA+P9MATIBZAINAaX+4ABfA4j/9/93AdkCOAEa/xoCev61AdgDcf7U/vwBjAEIALEAJv9bACf/KQH//nn95gIx/6f9tf+c/yv/rv/t/zr+Uv/q/7T+iwAd/pz/jAF2/f7/SwBm/k7+NgHh/kn9KgKi/KH+PgC6/UsBrP4wAEoA7/2PAToA6v+iATb/LP8AAdv/KQLIAL/9QAI4Ab0AKwCGAS4D8P6PAqwAUgALBbT+oQDhApj/JwIm/wAAsQIW/noAoAFL/6X/VAGxAIP/D/9N/7QAZP4h/0sBsf7R/Pz/qf/X/m//L/5v/1P/BP+T/oP/PQDY/uv+WP1Q/8D/DP1g/+T+af1eAF0Awfsn/wYB7//0/rH9KQFU/vP/NAHL/fb/LAK3AA0AOgHuAMkBx/9ZAqQBmv5OAj8Czf8XAvYCUf9vAcIBoQHWAW8AlAEqAsD/KAKEAoX+twOKAOT9NgNUAuMA3QBi/yX+tgHRAQ7+0/6a/vf//f8U/0z+1P7cAP77pf6g/yj9pv9T/h/9fv0X/4L9L/4x/tX9sP9n/WD+5wBs/i/+L/+j/h3/Z/+m/xP+bv9w/+z+zQBLANsBEwBHAKgC5QByArMDaABmARUEJgLRAQoCMwIVAmwBiAFrASgC7QDwAD4BXv+CAoEBFv9bASsA+QH3AJD+s/8JABMB1v34/YMBaP9T/eL93v4I/z/+L/xT/A3/cv2f/ZD8hfxr/zT9R/xN/l//z/3g/rH+JP8LAdH/SwA8AFkBjAK5AfcAuwBYAwgC8wAwAisAaAHwAJsA2wB0/w8BKv/n/hwCBf9QAHgCWf4lAZUBNgDDAY3/v/9gAAkBFgGy/hYANgBe/57/6f5Q/pL9OP4y/m7+mf0//aH+4/yB/Ub/8/9n/q/9UAKbAA3/CgFlAkQDTgDtAEoDPAMbAnwByAL7ApMCvwLaAcoBVAKhAqYATACXAZX/IQBN/yP8sv00/zv7g/p3/Kj9IP9b/Cj8VQEiAeD+WgJ4ASQB8wHAAMoDHgJeAgUDyP8JAVcC9wLk/0j9j/6F/mz/e/zd+5/9//uY/Gv8nf31/U/8Yf5F/4//rf+0AL4CXQHEALUAEQLRAioBvwDT//r/t//4/gX/Mf4M/b79DP6A/Wn+//48AZj+yv2yAj8CKwEHAoUBvAI8AywCoQOqAxQDxAN3A5wEkwQ+BIYEKQS6A6kCKAPTAZUAlQBc/5v+Ef0m/Kn9Ov2/9+r3KPtI+mH4rPXn9n75+fnE9/727vsr/r79lf1c/YIAFwWBBU8EwQUtBnUHwAb6BcsJ5Aj9BNgEJgXABsYF2gEzAfL+PQGTAvr9BP6M/lv++v4L/db/NQGK/eL9wv9rAV0BwABeADX/6QCvAKr+Fv8G/nL9yf3Y/DT8bvrf+d/6Dfjy+c/8avjw+A75i/vv/f/6rf3g/iT+CAJHBWME7QOmBOEHcglEBxwIQAhSBykILQYtBAsGSAP3//3/IgDm/2n9dvzf+zX8V/6T/dP7uvyM/tn/P/96/0QBNAHJBP4ClQCkBJICigHGAp8BfwHS/6f/LP9X/JT9/fyr+KfzIfQ0/j//WfU58RD0B/sV/iT6K/rd/NT9BACYAlkFTQePBt4DrwZxDEUN1glGBgMHzghCCEYGfQT/AjgABf7O/qP/HP/4+1L5Yfzb/iP+Vv27/aX+//49AigDpQGnA3cDBANEBd4FEASaAHcBhAPr/pL95/6K+tn0a/TS+3j+J/ej8Yrw+/W//Dz7UPfm9sb7N/9h/psBWAVwBf8C8wMECvQNkgvnBYcHRQp9CDwIoAaCBBECNgBSAs0At/3D/aT7rfqr++D83fzo+1j9hf6Y//oAaAKQAscA2QBWA8UD2AGUAAEBDQEs/7b/V/9K+vH1uPfo/1QAQ/Tz74P2E/v9+mj4o/fI+pX9HP2O/UwDvgh1Bl0BAwW/DXcPaQpcBSkHiAphCvAHKgP8AcUC2QB0/vf+IACQ/R76MPsI/Xj9lf6y/DP7JP7zABYCnwGm/6MAxgJ7Az0Bff8DA9UAzPq9AF4EsPfD8SL7HwHb+2LzAfHz9mv7MvpI+FX4/P5fAnL9Bv4RBuUL1QjXA+wF3A15EdgLPgf6B2MLWQxTB5EDsARJBIsBEP7l/KD+I/1O+VL3OPkw/PL7X/tt+sD65v7f/wr9Gf7QAO7/4/1F/60B3P5u/Eb/+P3r81TysAFVBcv0k+2j9V/9WP1P+Yv38fmDAI8EXwDaAOcJ3Ay2Bi8FTg78E/EOoweHBtoKsg0RCYMBlf9OAlQDB/9o+hD7Kvxm+vn5OfrV+hf9/vyt+6j81wAJBEQBkv4nAIcC3QLjANP/WP9k/dIAiAKI9dju0ftXBSf8LO7S7hT6yf4c+X/1Afq/ADAEzACk/nAIxxBaCtMCLAgMExQU4QrDBMgHeQ1xDMACm/2TAX4DVf7d+Az5+/oO+hD3//d2+576v/l//PX+7v5hAJUBfgAtAYgBuwC8AqEC2/zi/f0Esv5J8HLzAwO/Agr00e4q99n9Yfu596T4nv3vBO4Dh/72A88OhhCmBj0DIxCqGK8P1QSHBpsNsA18Bv3/BP8uAcX/D/rb9p/3lfcm9cf0Avfc9i/2QPhP+oX6pP3wAKz/p/8DAc8AvAOQBp//aPu7BYAICfeg8JoAMAb6+pby0PYr/dL8c/us+lH8dAOJB0kCxAAEC8kRegpCA+oLHxY9ERYHkwbLDAMN3QW7AfgBTQAD/Wf6+fdh9kf3K/bC8VLzsPj89mPzm/fc/BT86voD/rcAJf/A/vMClwOW/Tz/hAg+A7TzMfffBgQEy/fh97b9fv0H/MP9tP7o/y0FugZdAksFoQ1GDrAHGwajDnwTRQy9BH0HXgs0B38BWABK/8r7j/py+TX1qvPw9Rvzre9J9PX4Vffn9WH5xvzE/fb+agDj/o7+ZQTKBkX+AfypCjML0PTd8y0IPQk4/L36qP+u/jv/JwIFAgIC0wRJBusEFwZtC4YN/wYaA2AKChFhC8MCbgNvBSADBgKp/4L5UPfM+qX5j/NX8kT2sPRV8F/0Rvkm9kj2KPxN+7H5ef+WALX7h/8/BWf+/PogCJULj/hg8eADfQ7VAab4qv57AVkBFgYeBfMAuwX8CkMH4QWADd0R/QmtAlAJyRJCD50Dx/+YA0EFBgML/gf3zPWY+xP6UfDP7kb1oPOc72P0f/g59jD3r/vm/Ob88/72/nT9+wF1BpEA6PtJBwIOB/0q82MDJg4FA0j7DP7l/kQBBgchBSv/xQJrCT0HrAPXCF0OWwjEAKwFcg8nDdUBN/6oAU0DIgJ9/TT2FvXl+i76cPIo8WP20vWx8vj2cvuk+ej5Mv00/f/9NADs/bL9dAMkASr7CgRDC/79f/T4//0I5wIq/scA4gAr/+wEuQhBAwcCyQjpCSIFiAh7D4ALlwJDBisQgQyDAKr+kQQSBEr+AvtJ+Ff2HvlJ+Uny+u+s9U32ivGr9OH7S/yY+Yr6pf3I/1sAiP1C/D8ACgLq/QT/1Ab9AjP3ovohB+IELP2AAPwB3P4FBKYJywVOA0AIGgtqCeIJyA1sDUkH3wasDgkQDgbc/5gCxgNOAHr8ifes9Cf31vYE8UjvLPOy8nfvwvKH+cv6kfiy+Rb+JQHuAC/+Ov2sAc0Df/7R/W0GkAUx+ev4QQQzBcn+kv/VAKH+5QGiBykH4wRnB2QKsQmPCukODw+UCF0GQgyAD6kI6wBmAfwCGgBY/NP4LPWO9Hf1fvKk7mbwfPOz8XfxL/ei+pf5FPsA/rP/NQFo/+X8KwHsBEf/HP2JBaIGqfuo+I8C4QaSANv9VQLwA2cCTAabCdAG5wfdDP4KfghkDoIRRApvBVwLtA+kBx3/CwEMBNH+yfcr9ir2W/Qc8z/x2e2X7sPysPH37/f2Ov39+tT4mPx5ATgCuv76/e8C2gPw/Rz/IQghBt35RflXBKsHgwF9/ywC8gKBBX4KTAnlBHIIMA/5DVgKogzRDu4KNglIDVsMCwTD/4cCnwLF/Gf3lfR58kPzWPRY77PqS+7m8UjvI/Bu92j7d/ns+A79fwG+AVT/Lv/RAIIADgDaBFAIXQC092v+OwmxBT3+uQCtAwYDHwiyDLYIMwfnDAwPQw3vDvMQgQ2dCSEMXQ+aCtwBtv9OArH/CPm19BXyM/Fz8sfvBesO7GHvCu5i7XrzUPoj+y/3Ifcl/10FhQFY/B/+7gBHAeME5Ag+Agz4avxNCZcI+/1y/r0F/QVbBtoLyQtyCJwMyhBSDkoOXxJGEQ8LmQrCDzMOjwT4/m0AjAD8+yn2YfHU78nw7O4B6+Dqvu097S3r1+8q+RH8QPYS9NP8PwWCAnX85f0yATQB3AVkC6gCQPZ4/WEMMAnC/fb+/gRxBYsIqQ3ZCgEHNQznEYoQFQ8oEWgQ2wswDAQRDw4vA7L+FgJMAZP62vRx8N/uVfE38Cbq/Ocl62nsw+sV7wv2d/nZ9f7zlPzDBEYBoPvC/OL+zgDSB3gL5wAu9kv+SgwNCX39mf57BR4G8QicDj8M4wgGDqgS5hGqEasS/BBgDecNrxH4DScDC/8tAsMAFPpY9Hzvx+218Jvv4+ig5mnqzOvk6pjulvZP+hL2UPRI/ccEIAFV/Yj+c/6BAn0NVAxB+1X2cgZKEJ0Fg/svAKgHMAqqDD4NRgn4CfIRMhV9EUMQTBIPESQOgQ8FECgIu/8tAYcDcPwa9DXxmO7x7JvuROzq5W7kzehV61zrx+/W9yD55vMG9uv/IAOg/8b+6f2m/UUGRQ+XB/H3J/qICpYNzwBK/K4ESwoUC7cMdAyKC0sQVRZJFY4RwhLwFLkREw5kD7gNtQR6/nkAMgCl+HPy/O4V6uXo0uyq66PliOPN5dbof+6T9h/5sPNM8hv8FAQAAU39Z/6r/sMDOw5nC+P7SfkmBxgOQgXh/Z0CMgpGDRMPxg4yDCcP5BY/GI8T2RIiFYITWg8oD5cPUAiG/q/8Bv/x+mnyuO1e69nnPed66CzmZOQh5r7mt+i58Wr5dPcz8wz4QAFeAzwA4P6//isCZgy/DmQABfhRBHkPcwgL/xUB3QfADdMRdhA3DEYOJxZRGf4ULRISFPgTmxCwDgMNsgfOAIj9bPx++LLxWuzW6WTohOhS6Abkx+BW5Xzq1urj7VP0Y/Yi9tP6HQGJAfP+gv//AEgDFQtkDosCHfpxBQ8QAAlvAaYEsQg1DAgSTBIBDm8PWhUdGFIWVxR1FKYS0g7ODoUOcwZf/bv7Lvwr+JDxP+tz5xPna+fo5YTjhuLn44TmWunN7nH1xPUk86P42QE7Aw0Ar/+oACwEkAz2Dp8DOfvPA8sOGAwJBBkDJQf2C08RQRNCEKIPoRSMGIkXUBVeFLcSBhB5DmoMKwaX/iX8EvzG9uTuXuqy5/nlteZY5g/j+uAW4yDnnepE79z0/PVi9EP5+QEDBFkBAQG8AVUEYgvUDpwH6f/VA58MKg0MBwYFqQieDH8P6xHYEZgQNBN7FzgXhBRsE7ARjA7kDDkL3AWM/pT6YvkD9YTu4uv96TrkkeEP5VLma+Mj4hvkOugN7zL2UviD9Y/2Ff8MBY8Cq/90AHIC3QgLEMsK/v7//0ULYQ9bCQ0EuQXyC8URTBROEpoPwxLwGGMZahXUEyATRxBuDVkLQwenAIL6FveQ9KHwsuyK6VDl8uFb43vm8uUv47fhp+Tw7Tf3Mfhl9JX1h/1WBfYFwAFrAO8CwQe5DrgP6ARd/fsFZBFBEFQIpgTSByAQOhjjGJsSOg+/E3EZYhlEFc0QcwzoCZ4JZAfdAMr44/IJ8P7uUu2E6VDkAuHZ4v3mU+fz49Dio+Yf7sT1X/hT9jv3C/5+BDUFBAKaADgDwQfBDKkODgja/9QDMg7LD2wK4QekCJ4M8BRUGukX3xLoEZ4VNhkdGGkSxAvAB48HyAavABz4QPLH7jvswuqd6IrkfuGg4i/mYOew5Wflb+kv8Ij2wvlz+cv5TP91BqkHXQNfAYkEBgmfC2MLngbRAP8ClAsNDnYIPgUVCH4NEhN3Fu8VahPOEosVeBizF9AT6Q5iCRQFmAPrAGb6KvQc8LHr6Ofh5qrmFuWL4+3jhub16GXpK+sN8ZP35vov/GD+IgLVBTEHQAZlBLoD5gY/DPAMVgY6/+r+zgQNCo0JYQaOBVwH6QubEgYWeRS2EiYT4RTUFksWsxFUCxYGswLjAH3+X/ku8uvqGeYI5ovoBOkJ55TkHeOK5enrCfHS8vP05vjc/P3/ewLYBFUHWAgDB00FNgVwBjcHZAUqAQ/+Kv9QA8UGHgdrBR4ESwaNDL0RGRIiEYYS9BSMFtMWwxTOEIcMMQguBFwBDv6B+Q31lO//6Hzm2uhi6nLpAuhg5ufmSOtH8B/0WffO+H36sP4VAsAD8QVNBzMHVgdoBxQHwQYmBL3/g/4QAZIDSQWRBqQGnwU2BfMHfw7IE/cTTRLQEasR3xHMEVIPegtNCLcEiwDX/ZH7vPaM8Dvsx+r26prry+si6zHq0url7fzx2vRl9sb4a/yN/uT+PADGASwBwQA/ArsDIQQIA8v/w/03AKcDpgQvBFACtQGkBlwN7A8WELkQbxGIEj8T3xG4DxMOigxBC0gJtQWEAgEAFPzK9nry9vA68rrz3PKI7xnsc+tG7qfxePPC9Cj2gffH+RT8GPx1+h363Ptf/mMAUAFXAIf+WP92A74FOQNlAU8FoQtODjkNKA1TD78QjRCMEFwQPQ9mDrANLguEB3IEFgIf/8n6yPbk9Fv0RPQa9IvybfBG8fHzwvTP9Dz2EPgI+qX8bf95AYsB6P49/HP8z/0q/W/72fo0+6f6S/gJ9nD3AfzH/9wB+QOdBYwHaAvvDgYQexBoEcoSWRRLFHgS8RBuDygNpQqTBvMAXf0T++32KPMr8YHuiuyH7RLvH+9Z72Xx4PUL+5j9KP1U/B793/+KAngCkQBFAB0CVwMRAq7/6f2m/Iz78Pu9/VH+6Pzt/EoAWgR2BpQHxwj5CcILqw4lEV4R7Q9jDxIQBw+dC6QI4gVMAeX8nfrw9y70rvFb8C3uOOyA7IvuDfD68Nfy//S19qn55PzK/Xn+9gBeAiACLgMkBTEF4wO9AwgFpQVcBY8FrwWDBBMDSQIgAtMC1gPwAy4EGAa3CNYJkAm8CW4KFQoUCWEIvAZeBAwDxQFr/yb9Hvtg+EH2AvZK9q70z/HE8JHy5PQg9l72+vWr9cz2tvgC+eb38viu/Nb/aQH+ArUEPAbzB64JHAtKDLIMIQzbCi4KDQtQC64IkwVZBHUD+AGJAU4CQgITAboAdQGPAYEA4/7m/Ej72PpU+5H75Prx+af5kfk7+Wr5kfrv+5b8//s++6r7tPyy/LL7BvuM+6v83v2C/00BGAJtAukDxwYTCSwJHAj4B7gI/Ai3B0YFVgOXAnYCywEt/0z8y/v7+9z54vaa9WT2Xviw+cT5Xvqh+878jP7q/4L/f//OALoB2QImBesGoAehB5MGmgVtBTcE4gGPAIoAvwCWAIj/4v4VALcBRQJdAmYCAwNbBO8EOwTPAwYEaQQeBEMCwQAdAVkAff12+2T6kPgq9+32mPZ19ffzHvOB9AH3cveV9ln38/h0+kn9jwDUAYsCugQxBzIIpweSBmsGPwcxB8wFVAV4BgoH1QXpA78CAAOyA7cDRgONAlABRAFAA7wEjQSpBLUEkQM+A6sDKgLx/zb/8f72/cX8q/vE+un50/im9yD3fvc5+Gr4yPfx9lr3Evm4+gT8kf2r/iD/MgCXAZEBbQHIAtQDQgOSA4oFcgaxBXUFLgYXBgsFUQQIBIcDcQLdAaYC/QKbAdcAaQFaASMB+AEeAtIA2P8VAKQAqgAsAMn/2v4N/Zz89v20/Wf7hfpT+0P6b/e59nL52ftj+8v6u/zh/if/Kf8YANcATwFQAscDAAUjBREEGQTNBQQGQgStAy0EsgMNAx8D4AL1AaAAdP/C/h3+df0Z/rT/OQC+/1IADwJ+A9wDPQMxAm8B5QAkACD//v2M/KL78ft7/Lf7tfo9+8P8tf2R/rj/9v99/wEAagFZAlsCdAJhAy4EYQNKAtgCBgTSA8oCMgLbAfUA0f8B/yj+gvwI+3r6BPoc+eL4sfkZ+5P8dP0n/rr/twHjAvkCmwKpAtQDwgS4A+cBZQFWAcYAxQA3AZ8Arv/e/xABMgKXAigCjwEQAZMAHQDL/9b/vADLAQcC1QEgAkoC2wGZAawB8wCz/03/av/Y/hz+0f1k/Yn8+vvQ+1T7Lvqh+bf6h/w1/VP9if4gAEwABwACAfIBPAGKABoBNAH1/2b/PAAGAScBpQFSAkcCywHwAY4CqgL+AcsA7/9QAHkBGgL2ATICvQITA1wDdAP6AsoCHwP/Ai8CUwGCAL7/wv6b/Rn9eP1O/UX8ofuh+0j7Bvvt+1b9qf0s/TP9yf0b/mX+5f7W/of9Ovxq/MT9bP4X/hr+9f7Z/5oAcgHJAUEBLwE0AtsCqgJoAgsCrwE6AicD6QL6AegBoALPAokCoQLwAsoCmAJjAowBugDOALEAv//g/jX+/fwC/JT8Ev5r/qf9wP0V/xwAXgA2AEX//v1C/iwAcwEAAWMA2ADQAYEC1gK5AhQCHAGVAKgAWAAd/wP+bv3M/CP8Bvw7/Hn8vvzY/LP81fwo/Tj9XP0p/t/+rP5N/pv+3P6G/oT+Rf+f/0P/Zv9iANAAlAANAfMBygExAZMBcAJpAu0BOQISA4EDoQP3A04EKgQyBOMEbQUTBXUESAQ9BMoD9gL6AS8B0AA0AJD+3/xC/DL86Pu0+6T7PvuI+jn6b/qY+pD66vp0+0n7nPrh+jX89PyG/Dr8k/zv/Ef96f0u/q/9bf1X/mz/jf+Y/4wARwFJAcIB1wJWA1kDrAMnBDwEHwRoBBcFnAWRBe4EHQSZA4QDjgMNA9oBsgA0AEQAZgBTAPf/7P96ANkAfgA9AKMAEAHoAKEAmwCBAFIAgwDcAMUAYgA1ACAAs//k/i3+jv3v/E38ufsy+8j6hfqc+t765frl+k77mvtp+0T7cvud+/H7jPz6/P38E/2c/X/+Xv/X/6b/VP/h/1QBWQJCAioC4gJwAy4D/AJVA3EDYAO1AwEEwwPWA54E/gR+BEIEsgTTBD4EDwR7BMoE7wQwBTEFrgRVBMMERwX7BBsELwNBApoBQAFZANX+wv1I/Yj8pPs8+7761Pkc+bL4APgZ92v2MvYz9j/2XvZ/9o32vvZU9yr4Ffk++pb7vfyU/Xz+Tf+9/0YAfwHoAvYDkAS0BKQEHAXoBVEGMwa9BQ4FtAQDBYUFcgXlBGcEUQSFBJwEWwTFAzQDPgPrA3EETwTNA6ED5QNvBOcEywQFBCcDpgI1Ar4BoQG5AUUBeADj/3L/pv7m/ZP9If1R/NX7ifuL+pD52fln+vL5UPkv+ev4ofjo+Dn5Cfn/+Bj5y/iY+Cf50Pn0+U/6fvvv/Nn9LP6W/m//rwAQAjQDlAMgA/8C1ANiBAkElQNhAwoDsgKgApkCdAKzAi4DiAPbA2sEKQXzBe0GIQhDCe4JEAoGCmEKJgtKC2IKYQnwCFsIAwdEBcsDzQLdAVAASf75/Nz8uPzT+8/6Rfrf+Vf5ePjp9kD1pvSR9L3zbfKc8UTxNfGJ8RXyqPJu82T0lPUn9/H4gvri+4X9hv9VAX4CWQOsBBwGDQe3B0EIRggECNkHZAc1Bt4EOATmA64C8QA1ALQAVgH0AeMCHQQUBboFoAbcBxYJPwodCykL3woTC0QL+gqzCmEKSwn+ByYHNwapBO8CawHl/xn+VPz5+q75Tfhi9/b2e/bP9T31pfQR9KPzTPPA8vfxVPHf8FHwAfAr8LrwN/L+9MH34vlP/Ez/8QH0A5EF5gbxB4cIZQjmBzEHWga5BdcEJwNdAS0ARP9p/q39Hv3//CX9T/38/Ur/tAByAn4EcAZ3CJUKGAyIDZUPhBGeEgMTAhPmErUSPhJuEd8PaA0dC0QJmgblApb/iv3A+w758fUz8+TwLe8z7lPtM+xF6+HqsupT6gDqUeq86gPreuzz7qDwVvKn9Qv5XfsK/iIBiAP1BdMI6QoPDPYMnA2fDa0M/gqFCQoIsgVIA1ABz/5E/PX6a/rI+bX5tvo9/Jr9A/+8AJYCbAScBhcJXQtsDcUP+xGiE+cU2BUsFt4VMxUFFPgRnQ+0DWwL1gfPA1AAxvwQ+Yf1b/Kt70ztnutv6kjpP+j/54Xo6+jg6Pro9+jw6HzqoO0v8LDyrfY++1H/ZQNvBxALMA5pEMERUxK6EWUQ2A6WDIoJrgYLBDMBaf7y+5/5jvfX9Sf06PJ88mfyn/IC9Dv2Qfij+hj+8QGyBRIK5w41EycXBhsOHtUf3CBeIc0g7h4SHK4YwhRJEKsLDQc/Ao39iPn99TLyhe7N66zpiefW5aLktuOL4zDkrORQ5f/muOiz6WDrVe5A8brz+fYo+1T/DwPFBooK3g2XEPkSpBQCFUkUFhPjEG4NnwnvBRoCFP5c+kv32vSj8tXwE/Aj8GvwZvF089L1Wfh1++f+SQIbBqgKPA9bE10XWRvUHkghryL/IlAi3yBpHssarhZXEmYN7wdTAgT9dfh29NLwmO2N6tDnD+bQ5EbjPOJZ4r3i+OLK4y/lbeY36JHrX+9K8qX1cvof/9QCrgZvCkMNVBB/E9UU5xTtFAAU1hFbDzEMRgiGBPgAEv00+QP2T/PM8KzuE+0o7D/sP+3f7l3xoPRE+HT8JAH9BUQLEBF1FjIbjB8yIwAm2CdcKKcnEiZdI4EfIRsTFgkQ6wkqBDr+gPjt8+Pv+evu6LDmu+Rv44XiPOFU4HjgwuDz4K7hlOJ5453l+OhG7I7v1PPL+I79JgL5BocLgw+SEysX2RhDGfsZuxkHF8cT1RCaDLgHrAMZ/4D5RfVc8nvuperV6P7nWucD6CXq4exm8C71Bvv0AIgG2gwgFBoafx4tI10nWik0KqoqZinCJusjLSD0Gm4V5w+wCUADaf0F+NzyDu4H6tfmFeTX4crgbeDB33bfR+Av4dnhSOPQ5KDle+fz6gPuhfD+81j44vzPAZQGawqIDnETGhcGGe0aTBz5G7EacBh9FC8QpgvaBdD/Tvpk9NvuCOt95xLk0eKG41PkROaO6krv5vMk+iwBHAdGDWMUShqrHt0iryY2KXMq2Cp3Kv4oMiaVInEeORkrEx8N+QZwAHP6b/WV8ALsfOif5bficODa3jzd5tt+26vbRdyg3WLfG+E549Dl4+gG7dnx2vWQ+dr+2AQvCdwM4xHTFv0ZsxxpH2EgkB9ZHtkbJhftEd4MfAZD//X4KfNP7ZnoB+Xn4WfgWuFZ4zLm/erj8K32B/3HA50JLQ9pFSIbSR8TIz0nUSp1K/IrOyz5KhMohiTmH+kZ4xMCDnAH2wAt+8T1rvBN7BDoQOSV4SPfgdzc2gLaFNk72Z7ahduT3C/f9eG54zHmlep579nzVPia/RQDxAcLDHgQOhQ+F+0ahR75HyIgLCDvHvYbSxhXEzQNHQfaAM35UPPp7QvpZOVY46ri3eOH5o/pu+1M82X45PxNAtgHxQxOEgYYhxwEIUwmECq5Kz8tpi7MLZoqkCbgIfgbLhUtDuAGbP8q+VD0S++y6bHlb+N+4KHcndmz1xrW9dSH1A3V2dYW2V7bbt6b4bPj0eY47ALxN/RY+XMA1wWxCkARMxd1G1UgSSVVJ2EncidoJggjrB7UGcAT3wwuBnf/kPjD8Q3sGOg45Rbj+OKo5JfmlOlE7pHyDPYN+xkBUwb3C6sSzhiQHs0kISqRLbAv6DAFMZMvCizeJgchbxr2EmgLygMs/Ev1WO+J6ezjD98l2+DXsdTU0UDQq88jz33Pz9G81AnXudmA3QjhBuTy55PtxvOj+e//NwdpDswU+xpyIIEkQCiBK1QsEisXKdsl3yD/Gq4Uyw0CB54AfPqg9H/vPOva50vlrON04+vkPuf36f/td/MK+Yb+0wThC9sSmhnjHy8l9Cm3LuMxczKpMaMwIi4OKYkihRvdE5ILWwNH+ybzC+wg5kDgNtpi1SzSg88Jzb7Lyst0zBTOJdFz1HrXbNsc4OHjjOdR7JnwcPSE+iACyQcADXkUcxtPH04iMSX6JZMlQSVOI4IfRhwaGVcUHA96CnoFSQCv+9D2fPFy7abqKujD5rPnWuou7vryIvin/YUD9gjSDb8SyhfsHFEi+yaBKq8tbTACMQYvvitwJ6UhLxriEVsJtgBs+PjwQerZ45jezNo8143T4NAxzwHOrM0NzhrP0NHc1dHZ7d164mvm5Onm7enwPvLC9NH5Bv9RA3cI9g5nFWwawh3FHywh7yFyIWofhRziGZ8XqxS6ENcMgQnvBaYB3vxH+ED0C/G07lHtIO0875TzPfib/OYBogcJDL4P7xO8FwQb7h7tIrslNyiyKlwrgil2JpEivhwmFQgN0wSW/Lr01O0g6IPjm9923KzZrtbZ07nR889hzgrOYc8g0uDVUNpu3+bkL+my6z/uKPHc8YLxsPTx+sL/AgS8CpYRchahGm4ddR1IHSQeah3uGgkZNBe2FLsSQxBSDFAJBgfTAlz+uvvm+Bf25/Uz92f4ufvTAM0EEwjqCxIPNBElE/MUDhfBGRccJx6yIOcilyOhIgAg4RsnF3kRLwqfAhr80PV67yjq2uVC4jDfRtxv2SfXQ9Wg09zS29JM0yPVrthu3PLfMuSz6BDsSe4L8Hfxa/Jy81v12fjT/YEDJwnuDaARKRVKGKAYjhb1FewW+RXOEycT0hIdETQPXg3FChoIKAYKBKcBXgBxACwBmwJvBIsGdwnWDOkO0Q9VEVsTTRR3FEcV6xZ6GG8Z0RkTGu0ZThgFFccQMgxWByACZvzm9o3y2+7k6kDnzeSs4i3g2N3v203ae9l82ZDZStqB3FXf9uH35FDoB+sb7cfu9u/H8IXx5/IA9fT2q/no/lsE7wbnCQIPORFhEKIRDBNyEc4QUhKXEdMPpxAgETwPPw5LDjkNIAzTC0QLCwvoC+kMsg0JD1kQbBFpEqYSVRKfErkSuxErEbwRrxGaEMwPHA+mDboLLAmDBRYCaP8C/Nr3bPQZ8gDwXO2t6jfpWOjI5iblV+Sp4yPjf+PS45vjYuQb5v7muue/6bvr0uzr7bzuqe697j7vb+8t8HrysfVW+QX9JQBeA1wHKQrjCsALZw1eDsEOWA9pEM0REhP6Ez0VLxZSFhoX+heDF2kXeBhCGCAXlRe2F5wVLBQ1FMESHxDlDiAOKgxACvYIMgcPBSUD7wBu/kv8tfoQ+Qj3R/Wb9DL0A/PD8VXx7vD/7zLv6O6h7qjuNO+C76rvQvCb8CDwqu9w78Du6+257RnuTe5F7hHudu1+7LnrS+tC693rKu0x73Pyw/a/+oX90/+rAmQFeAbRBuEIlwxQD1AR4BRJGUMctx7RIfQj8CSYJt4neSZjJMojUiJlHpsaShjeFcMSVg/qC6cIcAXPAXj9pPi+9G3yPvBY7YXrwesY7NDrFOzX7KLt8O5v8KPxgfOv9Uf3Efkn+y78Bv3R/qL/xf5F/pf9cfs8+W33gfQG8hXxr++27evs/+sW6hTpnuhG55rmFud95zXpeeyt7q7wD/Se9pv4cPyC/3gBjAbfDL4QfhUZHMQgliStKT0tsS5zMGcxYDAmL2ktPyo7Jx8kwB/PG+YX7xEIDHIHfAG5+tT1efG67JTpeOcX5fbjVeRh5Jfkm+Wy5mjo7+rs7Dbv1/JF9i75DP2aAM0CiwVqCL4IiQfQBjsFTQK3/4P9QvuB+SH4d/bP9N7yyvAd7zLtk+rm6Gvopudc54rox+mz6qvsFe+j8LbysPU4+A/7S/+YA6UHowypEboV9hkUHvcgiyM2JnwnqCcTKDUoRicNJr4kkyK9H5Yc0BhcFMAP5ArhBf8AQ/z792H0GfFL7njsy+ri6L3nseeS53jnlejQ6v3sWe9n8sn18/gP/PP+BwFmAsEDKwThArsBYQH1//X9CP0x/KP6avkM+Bz2k/R+88fxvO+Q7inu5e237dntcO5R72DwrvFq8wP11Pa3+ff8j//AAu0GcwqCDSQRiRRvF2Iavxx4Hi8gGCHbIGsgsB9RHgQdehvpGE4WERTyEOoMLQmiBbkBCf7L+tb3dfXR80ryuvBz74juyu1b7WTtxO2V7mXwr/K69NT2mvlM/Cj+lv/pAJ0BnQGCAREBBQAa/5X+sf2R/AH8OvvC+Y34o/fw9Sz0LPMP8s7wm/Dz8Mnw/fBO8tXzS/W/9kL4NfqK/HL+VQDJAv0ENwf1CfALQw2DD68RdRI2E1IUrxSpFPIU1RSKFHwUOhTLEzUT8BFcEO4OmwyECcUGBARCAUv/e/2N+4360vki+Ib20PXe9I3z3PLe8jPz8/PA9Kz1M/fe+Pr50/qk+w/8A/y8+0D7zvp5+iD6D/qL+vX6HPuH++X7qvtr+zb7rvpO+kj6SPpM+pr6Cft9+yL8uvwx/cT9cP4q/wAA4gDXAQ4DWQRJBRoGEgfhB2wIDAmLCcEJ9Ak+CmYKkArMCicLuwsjDA4Mygt/C8EKjQliCCYH8wUqBZkE2wNUA9kC4QGjAGL/yv0E/Ib6NvkX+GL39va99t72Hvci9xD37va09o72gfZh9l72o/YL94X3PPgD+bH5qvrX+7r8aP0//uX+EP86/3//hP9X/3L/s//k/zkA3wBnAb0BGgJgAmQCaQJ2AlUCPwJYAk8CSwJnAnsCjgLwAlkDiAPUA2UE5ARvBSkG6wbQB+MIkgnECeEJ0AlLCa4IJAjEB6MHgQfxBmEGxAWuBDYDtAHh/xr+Fv0y/Mz6v/lk+fD4QfiE96T23vVV9ar02PM/8wXzYvMo9OL0sPUa9734FfpY+338bv26/lQAXgESAggDtwPzA0AEWgQLBBoEZwRWBB4EGQTdA4MDMAOiAukBdgH6AFUA0f9M/67+b/5l/gj+wP0N/n3+yP5l/2cAagFxApoD0wThBYkGCAfKB1oIgwjKCEYJPgn2CN4IhwicB5QGfQUlBNICjgEgALn+mf10/CX78Pnh+ND3lfZk9Vz0dvPJ8n/yjvLf8qnz6/Q+9lP3d/i9+cv6zvsi/YP+r///AGgCcgMvBOcEZQV/BaMF7gX4BfwFLgZKBggGiAXdBBsEQQMuAhwBTAB+/8X+Uf7U/T395/zB/IT8jPwA/Yz9JP4Q/18AigF4AnUDiwRbBdEFMwadBtsGAgciBw8HzAaEBhsGbgWyBOED3QLKAcQAtP+Q/nX9dPx/+2v6R/kn+EP3cfa49UX1QPWD9e/1mfZb9xj45vi3+Xn6Nfsl/FP9Sf4w/ysAIQHsAbkCdwPkAycErQQrBToFQAV5BXMFFAW7BE0EiQO5AiUCYAFvANr/mv8e/3D+Bv7b/a39nP3E/fX9Qf7n/qr/MwCdADoB4AFFAn0CxQIZA18DrwP5Aw8EAATqA58DDANiAs0BQAGwAB4An/8u/6j+JP6f/e78EfxZ+8v6Jvp5+Rb5Afn8+Cj5j/n9+UT6pfot+5P70/sz/MD8M/2c/Vb+Lv/J/3sAcAE5AqcCIwO2AxgEUASNBMIExASgBHwETAT2A3EDCgPWAngC1wFdAQcBoQBCAPr/t/+L/6n/1//U/+P/NQCOALAAuwDBAMIAxADhAAABEgEcAUgBewFlAQUBtgCAADEA4P+n/3n/T/9B/zb/8P6K/i/+1v1S/cH8Z/xA/A387vsO/Ef8YPx0/JL8mPyJ/Iv8k/yD/Hj8o/zm/CX9dv37/Zb+Gf+Q/xkAggDgAF8B3wEhAmMCzgIbAxYDIgNbA3ADTQNOA2sDXAM2AzQDPQMcA+sC2gLAAo8CWgJFAi4CDQLoAb8BfgElAbwAYwAPAML/h/9m/0H/F//m/rb+g/5S/jj+Ov5d/pr+2f4f/2b/nf+r/5T/df9d/0//R/9R/2r/k//I//H/8P/F/43/Rf/d/lL+2P2B/Un9IP0W/Sj9Uv2Q/dL9+v0Y/jj+d/7T/ib/bP+8/x4AfgDHAAQBPwGVAQACVwKbAvYCTgOEA54DtAO0A54DgANpA04DMQMYA/4C1AKRAisCtAExAagAJQC8/3H/Pf8N/+P+wP6c/mD+Hv7m/bz9o/2p/cv9Cf5V/rP+DP9U/3P/iv+f/73/6f8sAIkA2QApAW0BfAFuASsBrwAzAIj/8f5y/hT+0f2y/a39wP3e/fL9AP4A/vX93P3I/cD9xf3e/Q/+Tv6r/h7/nv8bAJkAFQGVAQYCaQLCAh0DdQPBAwcEQwRpBHsEeARNBAIEmQMWA30C1AEzAZ4AIgDB/4f/YP82/w3/4/6s/l3+Ef7U/af9kf2d/db9KP6D/uD+M/9w/5v/uf/T//P/FgBEAHcAswDuABgBMAE6AS0BAwHGAHgAGwDM/5D/X/81/x//EP/z/sP+jP5W/hD+xv2F/VT9Mv0l/TD9Xf2v/RL+if4Z/7X/UwDbAFcBzgE4Ao0CzgIEAykDOwNBAzQDDAPgArYCfwIzAt0BmgFJAdwAbgAYAMz/hf9I/yb/E//5/uf+2f7C/qP+f/5m/lX+T/5f/of+wP4C/0v/lf/R//f/CQASABUAEAAHAAwAHgAsADIAKwAMAOP/vP+X/3D/Vf9W/2T/c/+E/33/Yf8w/+j+jv4u/uP9uf23/dn9Hf6E/gr/l/8aAJQA+ABHAYcBuwHdAfQBAwITAhUCDgICAvMB4wHLAacBjAFoAUQBGwHuAMIAkgBjADwAHQAAAOr/4P/f/9D/v/+3/6r/l/+J/47/n/+5/97/DwA+AGAAdwCBAH8AbQBRADUAGAD//+D/w/+e/2v/KP/g/pX+Vv4v/hj+GP4r/kr+cf6Q/pr+lf6H/mn+Sf4r/iD+J/5J/n/+y/4w/6b/GAB+ANMAFwFGAV4BZwFqAWwBcQFyAXMBdwFzAXEBZAFKAS0BDQHtAMkAsACZAI0AhQB9AHMAaQBXAEUAMgAgABMAFgAeADEAQwBcAHQAiACMAI0AeABdAEYAMwAZAPf/5P/X/7n/pf98/zX/Cv+6/nj+QP4j/hj+H/43/mH+l/7E/vv+Jf9A/03/WP9Y/1P/VP9h/3T/lv/D/wEAQgCEALsA6wAGARQBEAH8ANwAtwCPAGsARwAtAB8AFAAUABsAIwArADMAPQBHAFMAXgBwAIkApgDHAPAAGwFEAWwBjQGjAbMBrgGbAXsBVAEsAf4A0QCuAJIAewBoAE0AKwD7/7v/cP8W/7v+Zv4X/tb9pP2G/Xn9ev2J/Z/9uP3c/Qv+Q/5//sD+B/9L/4j/uv/k////FAAhAC0AQQBZAHoAnAC+ANwA6wDoANAAtgCOAGEAOgAaAAgA+//3//L/9//4//H/8v/u/+7/6v/w//v/DQAoAE0AdQCeAM0A/gAqAVABdQGaAa4BtwG3AawBmAF5AVUBMwEPAewAxwCWAGEALQD1/7j/eP9B/w7/1/6m/nv+Vf4w/hf+D/4U/iH+RP53/q3+6/4g/1X/gf+e/7H/vf/G/8n/z//Z/+r//v8aADAAPwBLAFEASQA6ADAAIQATAAgACgARABUAHAAlAC4ALAAlACUAIwAeAB4AJQAtADMAPABOAGQAewCXAL8A6wATATcBVwFlAWEBTgEzARAB7gDOALMAmgCDAG0ATwAwAA4A5f+4/4n/W/8n//b+zv6s/pb+h/6B/oP+lP6s/sv+8P4Z/0T/a/+H/5z/sv++/8n/1P/i/+z/9v8AAAgADgAXACEAJgAnAC0AMAApACUAJAAmACYAIgAfAB8AEAALAAgABgAEAAcADgAOABMAEQAWABwAJgA1AEwAZQCHAKgAwwDYAOwA9QD2AOcA2wDIALQAnACPAHMAVQBIAEIANgAWAAQA9v/Z/87/tv+Q/4r/Yv9N/zf/L/8p/yj/Lv81/z//R/9a/2f/c/+A/47/mP+f/6f/r/+1/7z/w//M/9T/3v/m/+3/8//8/////v/8//z//P/9/wIACwAYACMAMAA7AEQASABKAEcARQBGAEYARwBOAFkAYQBmAGsAbQBvAG4AawBrAGoAagBpAGEAXwBZAFIASgBDAD8APQA1AC0AIAAPAPz/4//K/7j/p/+Z/43/hv+D/4L/g/+E/4j/kf+c/6v/uv/K/9f/4f/k/+X/3//W/8v/wv+7/7j/tv+4/73/wv/G/8v/zP/J/8v/yf/J/87/0P/X/93/6P/u//n/AwALABoAIwAtADYAQQBJAFEAWgBmAHMAfgCKAJgAoACmAKoAsQCwAKwApACYAIcAcwBdAEkAMgAdAA8A/f/w/+T/2f/O/8P/u/+w/6X/nf+W/5H/j/+S/5n/pf+q/7T/vf/C/8n/yv/O/9L/0//W/9j/2f/V/9H/z//O/8v/zP/R/9b/2//h/+X/6f/s/+r/6P/n/+f/5//o/+v/6v/s/+z/6v/p/+z/7v/1/wAACwAXACUANAA/AEsAVwBjAHAAfACGAJAAlwCaAJgAkACEAHQAZABSAEEAMQAmABcACAD6/+f/1P+//63/nv+R/4n/hv+J/4z/l/+i/63/tP+8/8X/yv/O/9T/1P/a/+H/5P/p/+v/7P/q/+r/6v/o/+v/7//1//b/9//2//X/7f/p/+r/7P/u//D/9v/+//3//v8BAAEA/v///wMACAANABQAHgAnADMAPgBNAFkAaAB4AIIAiACMAI4AjACAAH0AdgByAGsAbABcAEcAPQA6AC4AEAAAAPH/2P/U/8H/nv+g/4L/ef9q/2f/Y/9j/2T/Zv9t/3L/fP+H/5P/mv+k/63/tf/B/8v/1f/f/+f/8P/z//n/+//8/wMACgASABYAHAAbABsAHgAhACMAJQAlACUAIQAcABgAFQAMAAoADQALAAoABwAGAAcABgAJABIAHAAlAC4ANQA8AD4ARABGAFAAVQBYAFkAUQBKAEQANgAuACMAHAAVABAACwACAPz/8f/t/+r/5P/g/9z/2f/U/9D/0//U/87/0P/Q/87/yP/G/8T/xP/E/8f/w//G/8f/x//J/87/1P/e/+H/6P/z//r/AAAHAAgADQARABUAFQAaABwAHAAeAB4AIQAhACAAGgAZABYAGQAZABYAEwAMAAcAAAD6//r/9v/0//L/8P/s/+j/6v/q/+r/6f/t//D/8P/z//b//f8AAAEAAwAEAAUABAAGAAIAAAD5//f/8P/y//D/7f/y//H/9//4//T/9f/u/+//7P/u/+3/7//v/+7/6//s/+v/7v/v//L/9//4//j/+v////7/+////wIABgAGAAsADQATABMAEgAXABQAFQAUABYAGAAXABMAEQAMAAIABQAGAAUAAAABAAEA/v8BAAAA/v/9//z//v/8//7/9v/2//P/7f/t/+v/5//p/+j/6//u/+z/7//z//H/8f/z//P/8f/y//P/8f/w/+z/7f/r/+v/6v/r/+3/7f/r/+v/7f/t/+3/7P/u//H/+P/3//j/+v/3//b/+v/3//r//v/8//n/+//+//v//P/7/wAABAAGAAkACwAJAAoABgAEAAIAAAD+//7/9//3//f/9//0//f/9P/y//L/8v/0//D/9P/0/+7/8//x/+n/8//q/+3/6v/q/+n/7P/s/+7/7v/t/+//8f/y//L/9//z//L/9f/4//b/9//5//n/+f/6//j/9v/3//j/+f/4//f/+v/5//z//v///wEAAQADAAAAAAD9/wAA/f/7//z/+f/6//z//////wAAAAD9//r/+f/6//z////+////AAACAAAA+v/z/+3/6f/o/+j/6v/r/+v/6v/n/+L/4P/g/9z/3P/e/9//4P/i/+X/5P/o/+v/7v/u//X/9//1//X/8v/1//X/8//v//H/9//3//v//v/+//3/AQAFAAYACwAIAAcADgAOAA4ACQAHAAYABQADAP7///8AAAIAAgAHAAQABAAAAAMAAwAAAAAAAQABAAMAAQAAAP7//P/7//r/9//6//3/AQAFAAcACgAJAAkACAAHAAcACAAFAAMABgADAAEAAAAAAP7/+v/5//j/+P/5//b/9v/y//H/9//3//n/+//5//f/+//9//7/+f/2//b/9//3//7//v8AAAkAAgAEAAMABwAFAAQAAgAIAAkACAAMAAYADAAMAAkABgAEAAQAAgAAAP7//P/8//v/9v/3//L/9P/3//v/AAADAAUADwAVABIAAwAAAAMA+//x//H/7//0/wcADgAGAPn////7//z/CQAGAAEA+f/2//b/BgABAPX/+f/0//7/AQD3//v///8GABAACAAJAAIAAgABAPj/9v8BAAYACQD+/wQABgAEAAUAAwAFAA4AFQAMAAcABQAJABUADAADAAIA/v8HAAsAEAAYAB4AFAAMAAoAEQAVAAoABwAMABAACwAPABEAEwATABgAHAAcABoAHAAWAA0ACQAFAP///f8MAAcACAANAAwA///w//X/8f/a/9f/yP/O/9v/2//l/+T/6//0/+X/4v/e/+D/5v/s/+v/6v/k/+7/9//m/wAA8P/p/w8A9f8EACsAMAAgABIAGwAKAAYAFwAfADMABwD+/wgA8P8JAMj/+v84AA4ASgAyADYAXgBNADYADwA2AAMAGQCSAGQAbgC4AIUAdgCWAFMA2v+y/zb/BP8R////DAJdAVP/Tv5H/vf/zP8O/i//UgC/AK8BywLhAlcBHgGVAB8BLgIMArEBjABwAGb/kv60/j7+mP2v/Aj9S/y2/Nr9Xf1G/o7+lP6h/yAARAC/AAYBQwFpAVMB6AIiBVEGOwbTBYQFMAPqAkYGngYqBAIAEP0p/ZL9u/4o/Un7KfyF+337b/0g/3QAkgDZAJcAUwHSAyEDCwMJBWEFrgOJAikDTgJH/7/+rf0r+zT85Pua+nD7Bfyv+rn5a/p0+vv6VP2i/sD+hgB5AtcDRgSaBIUDtwEqAd3/Bf9l/yf/IP6Q/Gr7R/tM++v7Gf3U/ZT9nP68/xEA/QGSA+kCSgJqAjQC0gI5BKsDGQIvA2QDKgKbAbMBUwEvAUECbQAE/+8AxQGZAFUAvwCQAPwAhwGNAHQAbgIqAj0AfgETA8YBkQADAN3/JgAHAIz/Lf93AD4AKv7a/q4ACQD5/vz+OP7G/kT/0/6V/qf+cADt/4L+kv4r/nT+0f4P/zT/2v4Z/9r+BP4h/m7+LP6h/S79a/1F/sL+5/7X/mL+u/6H/2v+lP63/9X/gwDgAPEArwGLAyMDPwLqAmQDmAPyA7EErQQUBUMFXATSAzsEugThAygCTgDm/rH+Rv4x/an8+fsA+3T6UPpP+mH6OvqN+RT5nfi4+FH43Pc/+Lz3ZPgY+Uf32PbN+EX7uPzq/En/RAJuBGMH2Aj0CdMLkwzmDDUNeg69DjUNdwyeC3oKsQk7CA4GKgMuAd3/RP2O+6f6kPk/+Z35Hvqc+vj7J/36/cn+fgA5AsMCWwPvA5wDagOLA3oCJAHb/17+Yfs0+ED2lvPQ8L/ut+xH67jscvHb9rf4uvZq9r36GQKCBgIHaAraDn4PhQ/xErAVaRTAENgLIwlgCW8HpgBb+jD4SfVD8TLwnPGy8XDx5fHp8vv3v/4nAoMDSgYXClkNKhDQEvYUYBUcFA8SsxAbEGcO2QmoAw7+kvi088fvfesb5yfjPd8G3PfaX9xn33bhh+Zy8vb9MwNTBc4J0BFhG9Efvh4BIIwhwB32GMwXBxWiDXIF1f3e93f1XvGr6TTl/uQK4xbiwORO56jqTfCc9Ub61QEKCk0PThTPGWYdYR+QIB0gEh/IHYcaohUZEd0MggeFALP6t/aB8ZbsJeoC59viEOKw4VfdjNyV4ezhsN+K6WL5bf1b+5oBkQtyEa0Vwhj5Gl4fMSHeHAQaCxwAG8QTEAuJBJcCswCU9/ztx+tA6/TnZuUv5Kflp+l97LvtnfHy+UYBxgRICEAPbBfdG3kcDR2GH4oh8h90G8YWNBIHDZwGuf6z9yjzou6z6ATjH9/d21bXnNSN1zPYaNd043P1vPdw8N71jQXCEFIW3xp1IK4mHCkEJQ4hFCPtIw0c9Q+nCIkHsgP39yDsVec45lfiYt3H3jbj4+Qj5mfp5u/h+Az/QwHDBjYRrRmVHNQb6xz6ITUkuCDfHToceRcSEbkKMQOn/BD4mPCH5hbhPeAT3KXUXtMl1jvTW9Pd5Vn6kfdr7+T65QzNFUgcuCBeIoEoRSwmJgkiSiUUJMcYcAuhBRMG/gCa8o/nx+UN5F7fF94z3kjd2uC15ufoYO7t+BT/FALYCCIS+RitG18dVSFtJVElHyPaIEAc8hVyEOcJbQH/+uj0Wevy4wPhl9pe0YDPQNE0zdjN6t6Q7o/s2Orn+OMJLxKkGPIgDicSLPsvYi69Kv8q+Sm7IPoTrA0ZC5oBcPLK6NPl/+BR2tjYHNo52Xbbw+Ir6JTsL/Wv/Y8CRAqaFSUbWBsgH2EkRSTSIRshuB0wFjEQeAvrAyz9rvj17+3lt+HH3YLWR9KX0eTOddAL3+jx4ffo8WT0rAZhF6kcWiHTKHIsai4oL+MpryXUJrsg1xD9Bu0FFv+08LHknt2s2pDaLNgJ1VDXAt5X5CTqkvCt+D0B+AdUECkbNCG9IiYmACnYKCUpRSiqIZkY6RHFDK0GuP609JPpJ+Hr23fVJM/7zTbLJMWJzPri2OwN4zLkN/n2CLoO4hdiIHcmrS/CMkwsVCvMMIktECGMFpARVA1MAz/0aukM5YjgR9u42B7XTdfd22rgDeNi6Qby7vicAQEMJBJKFU8bPSLiJHcl/yZIJd4fzRu/F4sRBAssAoT2/+6w6yHkKNre1azRicmJzDXg7evN4SPdbu4GAUMIAA/0F1QfeyaEKvooNClmLmEvNyZcGnAVdhNJCsP7RvFv60bl3N4V2uzWgtb12SPeb+CC5Cfs7fPe+7EF6A3oEkkYBB8EI/MiPyO7I74faxktFuUSaAtBA9T7avLS6fLlaeP/2q/OTs4d3ePjatoH2x3rzvPS98ADJg6VFPkdESSmJF8o5i3cLX4oSCPxH48aFBEWBzD/Gvj88MbqiOVO4WvfxN4N3crbZ97T49HpAPJP+zMB1wVkDRcUexc2G7ofeCGoHosZ/RZcFiYRwwftABL7bvSJ8W3rj9tI05PdFOec4BfXTNs26RTwa/AO+VQICxJOFQUW4xnKI9wpQSbiIeQhtSHxHJsTBwp1BDIASPjq7rrp2OhV53niJd/t4Xrmw+en6dPw/Pj5/ekCkAkWD1ATdhgoHGUc4hstG+4XoRJ6DBAH7QLX/Aj2MPOE7CveCdp85bbp8d6h2Xfjd+6T8Ijy9vyKCV4QHhNiFdUZfSIMKN4iphtBHLwc4hQ8C+8FtAFF+3jzJu166qjqXenG5GDh6+Lr5rjqI/Bn9rv6hP4cBJ8K0xC1FWAYqxlBGU4X/RbrFkkRMQnUA2r9h/b/9CHyP+cg3fTdWebg6iTl/+Ep7JH1vPb/+vMEvw78FTcXWBU2GQ8hlSKCHKMXRxb2EhcMCAWP/wz8zPg8827toOzH7r3skeYW5O3oc+878lH0gPnc/sgCmQdZDNEPpRMDFzcXXBTAECUO4QuACM0D/v0T9/vx9e5O6MvgCeRC7BXqL+NM5kDwOPcz+vv9ZgYXEIEUhRQ+F1oe4SJSINIaGRgmF7gT7gy0BUgAT/sn9RjvQuuO61ftQeqp5PjlsuzY8EfzhPeV+5z/hgUEDCURbxTgFcUV+xPDERQRPg9/CdIDtf9s+c/0VfTx7RDin+CA6gXuM+d948np9vJt+I37OgCFB60PShT2FJIXBB49IfAcixYSFX0VyxA5CIsBRv0q+ab0E/CQ7Snt4Orn5qXnqe1z8lT0aPa/+f39wASIDLQQsBBqEJYREhN6E5cRSA07CDwDjf6r+iv4y/We70DmJOPT6SvvveuC5zHqePFz+Bn9qQGaCNQNCw8XEm4ZWh+cH50b0hfgFqEW7xJVDOwFiQCM+/H2VPMJ8QPusej95Ljn4e0O8e3wTPKr9qz8rAPICeEMKg4lDwMQnxEaEyARFQy5B9IEZwHt/Gz4cPRG76PpXOlE7pTvEOwL6mjs5vH59+b8XAJDB5MIcAnNDgYW+hnPGe8XtBZdFhkV0BHEDZYJ5AOD/fb4vvZR9FnvIumu5u7pZO5f8JLx+fN996f7OQCsBVML5w1fDZkNNw+uD20OcwwfCqEGXQFP/J/5IPcV8mvtO+578bTvV+rE6nLwzPMC9nL77AA+BJYHFgrXC/AQWhcnGGEW9xfzFwsUnxH9DyoMNgcyAZj7Vvls90/yQO1I63/r/exU75zxUvPY9Bn4zf2jA3YHxwkWC6ALPAwNDQANuwvKCb0GkAJf/xL+Rvzf9yTzYfK59O/zn++a7tTyIPZa9vr3o/y0AXEFcAc8CfkM7BD/EZAR7hFBErwQ3A11C3wJ9QU3ARn96Ppl+db1QPF471Lw5/AY8Qry0/NE9g/5I/yE/98C/QWgBw4IUgq4DFUKywbgBi0GNwO6AsQCC/7b9xr2tffU9/70zfGg8u/2xfjT9+H5Lv7BACwDVQfOCn8Mng0vDrcOQxDDEOUN1grzCZUHygKK/7L+iP2a+lH2U/PF8xD11PO/8kX14Ph9+kr7Ov0zAHQDKwatB+AIFwoVCqQIdwehBvwEqgOcA4ACh/7e+Rj4svjI99z0XfTN9Z70xPKs9G74tvso/m//RQHEBBcIuQlRCtsKVAtjCzcLFQpeB3EEZQLnAHH/s/1i+zf5q/fX9tP2jvcK+SP6OfrK+x3/mAFIA0EFZwaYB/oJEwuNCWMIXAhBB/gFkQUeBEUBBv6W+kT5xPqo+QX10/I580ny4fGk81z1SfaZ9/f4h/oY/u0CvQXYBoEIVwrzC98MrAusCYIJogkkB6YDlAG2/z/9Qfss+RH4qPm++of4DvdB+uj+9QBlAQICBwQiCBAL8wk7CckK8QoFCrgJ+gdhBVsECAKY/ff7ofxL+j/2XfTG8krxxPGK8fLvEfFK8xnzFfS8+AD8LvxG/vIBugSfB7oJSAokCz0MgwsICrIJ5QhPBw8GgQNTAGj/YP8x/Un61PnL+ir7e/t8+xf8y/68ARoDtQPIBAgGBgc0CEMIZgeHB/EGUAMuADYBOAI3/5v7Evoa+an3cvaY9bH0tvRJ9bD1JfbZ9o34yPpv/GD+sgCpAjQE6QUGCAoJSAo8C0kKFQnRCAEIKAZPBCEDcAFw/vr8S/wU+076Qvr8+p37bfwS/d3+sAAiATsCvAL0ApADXQRzBFIDSwNkA3cCwgAS//n9Fvwz+gz53Pho+ab4Efho+On4y/k8+hb7kvzK/YD+8/+PAjQDZgNtBH8FlQbYBsUGFgacBV0GUQX6AvYCjALHAML+Zf1s/dL9YP5S/Q/9Tv/F/1b+0/76/6UAqQFxAu4C+wLvAhMCSgK2BIEEZQGm/yv/xv2A/Gb85PtZ+6L7jPo8+RH60vsp/Vf9Jf1X/Rz+K/+lAO0AQwFYAmwBvAG5AvQCxQLRAkoCOAD9/xIADgCB/4T+0v5H/rL+ZP75/ED+rv8VAAMADwCiAM4BcAMCA2cCwAMbBBMDBgPhAusCmgPgApMAv/5w/2P/f/2v/UL9uvwy/Xb86PzI/Cr9Gv9q/rP+w/80ACIBFgCPACgC+AC8AYAByf9UAXEAhf6C/v39Ff7F/VT97Pxh/E79sf2k/CX9SP6r/2kASgDhAAMCxQJ4AiID9wOHBDcEPQNrA9YClwPtAuYA+QHFANv/Ov49/OP9O/2f/XX9a/s//bL93v1t/yb/fQD/ABMA6AAxAc8CJAQYAckB1gIBAesAy//H/z7/zv0y/fH7xPxZ/sT8KvyW/fn9o/7F/Tn/bAECAQYBrwCRAOkAQAKnAyIEsANEAnkBkAGuAi0CggDXAI3+ev20/XD8X/1x/Vn84vqo+nn9tf4B/r/+Tv8PAEkBEgGaAfMCggM4AwICyQIbAzUCZgNbAhoAHwBoACj/tf/XADz+hf41/80ASACH/iIBi/9mALsCPACBATwCdQEjApwAvALZAusBMgThADD/lgD7/Sj+zv1W+8j75Pvl+oL6BvwO/c78h/3Z+wr9wv5D/m0APABmATsAgwDhAnQBtQPOAl0BpgH7/6YCLwFp/5b/l/1j/X39gP7m/Tr+yv2m/R3/Q/+mAFgCVgJnAm8E2gEDBF0GfgT3A4ADfAMDAeoAWgEsAU3/Z/5h/5v98/zd/aL8nf2R/rL94PyX/RwAX/+2AFwA9v/1Ac8BSgFFARECNQL7AeEBhgFvATYBCAEHAKj/CQCA/gH+Mf85/439Vv1E/iL99f5t/kT91v8G/0v/rwDUAO0AmgDxAHYBawAEAHoBlwCVAM0A8/9m/jT/BQBX/Sv/cf+2/bD+Vv7D/i//FQB6APv+mP/k/8UAfQG7ARYD3QLLAnYCngKaA90CZQKqAesBYAHr/2wBMwCF//b/cP51/q/+/v7b/sv+5v/K/0T/u//n/0wAlAH2AEEB0wCd/ywBaABK/0QAdwBO/47+Hf/1/gr+DP58/i/+Wf4G/vj9fP4T/4z/JP84AB4AVv+2ADYBLwEHAmgBHgFAAUcB2AHQAJoAiACn/z3/8/69/kP/Lv/p/h7/Rf6R//X9CP8/AYAASgEaAg8C8ABMA2ECNgLoAjUCZALDAIQBvwFDACsBhgD9/s3/N/+P/9H+8P+q/yv+Qv8I/gT/CAD2/0oA+P+9AKsAiABMAboAgwGaAMf/DQEVADn/rv9h/wL/bP4k/mv+v/1T/u79jf7m/mL/HgAI/9AATgDoAGQBkwDNAWIBggGpAGYAwAETAbQAsAApAMT/jv/qAI7/E//A/zX/TAAD/9D/GQDh/okAMv/Y/5QAmP8oAZwAsAAiAXUBIAFC/x0BBQAu/5IAZwCz/4H+Cf/E/rn9xv1c/fP+c/5R/pD/If8JAJf/KgB+AHsAJgEpAQ0CogDTANACLwGPAX8AwQAkAfr/0v+l/ub/5v4W/8T+S/5S/pn+xP5Z/isAnv/R/43/6v+TAAwA3ABWARQCrAHxAG4B0QBgAcsBvAH/AC7/4P9d/8P/FgCS//v+q/53/yn+6/7d/4X/IQAeAJX/j//8/1YBJQHlAA0CAwEtAbkAgwAbAZEAfAHN/0r/8v4v/un+aP7W/1X9o/3K/S/96v94/oT/Xv/L/xEAW/8CAWr/uQA6ALL/3f/M/v4A8P/0AOcAHAAGAvYAFAAKAJ4AkQAQAAkAfP8CAP7/EgBMAEEA0wDT/+4AuQBpAPcAEQDDANj/HQBfAB4AggAtAMQABf9V/+n/6/6o/zv/yv8G/4T++f6j/6v/lf6p/y3/Vv+a/0D/rv++//7/KgAKAFr/DP8tAGMAKP/k/0cA9v/6/08AVwDk/gr/zf/9/3AAq/9h/87/jgBHAKr/rP9nACYB7f+bAPoA6QDrAJQAzgBvAMUAHAC8/3kAFADo/57/EwBxAIr/8P88AEoAdAA6ADUANAAGAB0AZwC/AHEAWABOACsA7P/i/xkBKQAbAAAAWv+q/xH/+f/P/4n/FP8z/mr/Cv9y/+b/xv/n/wf/Vf9E/8v/qgAgABQAvwCb/wP/cACs/zUA9f9o/4z/PP9ZAOf/7f+U//X+KP+G/8r/uf9tAAIAO/9HAL3/IAATARQAFQG1/yQAjwFiAO4AjgDe/8L/ff8YACkAqP/U/zz/9f/E/7D/bgDO/6///f/T/5T/HwBj/2wAw/+Y//MAMgAlAGEA2P/Q/64AWv94/zkAuv/n/+P/z//Y/9H/IgDu/5f/UgCBABEABAABAJAASABeAIsAZ/+f/3cAFwAYADUA2v+6/8j/g/+W/9X/QP8d/wn/lADi/7b/HQAr//b/v/9pACMAkwCrAKf/OACDAAgAJQCCAD0A/f+IALAAjP+WALEAFADA/8T/GQD5/6kAewA8/7b/IABm/5UAmv/g/7n/KgCaAAD/CAArACQA+f8AAO3/c/8rAJT/0f8CAVYAnf9H/+7/2f+3/8H//f4QAHP/Of+i/7X/IgAS/8n/MwCn//v/vwCWAMn/7ABkAGH/PQCN/zoAFgBz/0YAMf8AACb/Sf+MAJ7/sv+J/7T/vP/T/zAACgDt/97/agCq/xoA4ADA/9cAHgDB/ywAIv/o/8v/sADGAG//IQA7AMH/TgBWAFv/awBTAL3/9v9q/18A1f/U/2MA7v8tAFUAxQDEALsAQgCq/4EA8f+X//IA0v8DALH/x//s/8P+ZQCv/woAk/+7/98Axv4KAK3/e/9cAIn/NADo/y4ARQCSADABYADTAGj/t//SAOz/SwARALn/pv/U/xT/bP/+/zr/vv8i/33/fP/r/2oB3P9IAHYAVQBVAKAAqgEs/zkACwFQ/yAAGwBM/+L/iQD6/2D/e/9//+P+Y/8SABP/Dv+i/8r+9P61/6v/TAAjAN3/EADr/8MAxv+6/0EBNQBSAJIAWP/T/y8A/P8wAM7/1/+q/xP/lf9SAPv//P+A/9D+nAA/ACkAegGr/0MApgCFAJoA+/9EAeQA6f96ADEABQASALwAegB0/0kAj/8fAHH/gQCwABYAHwA1/0IAJgApAOL/SgAFAJb/tP+g/ykAdQBrAIEANgD3ADcACwDDAMD/1P8K/iEAbP9E/loAJP8m/x4A7f8P/0L/SwAjALH/EgCgADgA/v93ABv/8gBkAYb/1QCc/7f/ev+X/yoAT/5PAIf/rP+TAIf/SwBh/38ALwAYAFgAkv/eAPD/bQCgAC0AhACc/5UAdv/D/zgAev9WAJ7/ef/M/8f/if8yAGP/ev/2/0X/CQCO/+z/JQAJAGYAEgBgAAsACQAIAAMAPAAZADoA+f9mAM3/5v8bAI3/ZQCy/0kA3P/S//3/oP/oAHr/pwBZAGMAeAGG/9IAlQAdAJUACwBJAPL/NwDJ/3j/pQDG/wEAVwAp/7D/nf/t/xUAjf/d/+P/cf+u/z4ALQCCACUAcf+q//X///8tAEYACQC7/47/OQBx/8X/tgBW/3j/xv9i/xEA8v9r/1oA7f/M/wUACwBsAD0AcADw/04A6f9JAKUAIwA4AFkAKQDr/lcA7P98/0YAnf+6/9X/+f+5/yH/pv8AAE7/SQCM/2L/BQBBAEYAqf+5AMf/7f9bACcAjwB9ALAANwD5/yEAJAAxAM//rv/Q/03/t/9l/3EA/P9k/1oAU/+h//z/nv+i/87/NQBPAN//gv+5/1MAagAgAFwA1f9IAPf/TwCaAPP/5P+L//L/yv8+ACMAHgA8AMn/HgCu/4//gADT/2MANwAeAHoAjP/JANv/LgB3ALr/NQDV/xUAhwAbAP7/1P+b/7f/yP/n/5H/fv+2//j/+/8CABgAJgDJ/5X/rgDl/3n/WwDL/5v/GQA1AGD/KABwAFb/5v8kAOr/CQBYAMv/qf/0/wkA8/+e/20AXv+e/0kAnv+l/xoAUAD4/2YA1v/C/7z/NQCQ/73/iwCv/+//GwDw/7v/r/+1/yoAjf9PABkArP8xAJb/gQCV/wkAUQAXAFgAi//e/0QAQwBMAD7/LgAWAPz/ugBE/+D/IgCq//D/YwBk/0AAVQBq/1YA4P9MAPD/3v8KAPb/1P/E/6wAuv/+/0EApv9QACQA8P/S/wYAyv+a/5QAnf/Q/w8AZ/9jAH//2v9jAM7/7/8jACMA/v8fAMb/HABbABcAAQDAADoAm/8WAKf/cAASAOr/w/+z//P/Mf+wAKz/sf+4/2D/VACq/0EA//+eAAAAPgA0AWj/kP/p/7L/uf+E/5L/T/85ALj/Hf/9AL4AdwArAGoAdACO/zwA0v8AABwAlP9N/1j/FgDF/5cAtACg/+X/cf9W/47/HwB/AEH/uAAeAMj/qwBB/4QAy/8QAKMA8f/k/xb/8f8jAAgAlv+2/s//0f9L/53/fP/o/xcA///f/zMAUf8dANMAzP+jAFIAkgA0AIkA8QDi/7f/pP9yAIv/w/+yALr/HQCz/5EAnv9KACoBwP7M/zUAAwD9/4wAoQCE/83/XwA+AOf/NABoADf/ZACEAP3+QACg/y0AkQC0/y4Ax/+e/6//vP9sAK//qf+T/xEAIQAg/+UAkP+k/y8ApP+XAHP/HwBMABgA7/82/10AFABtAHEAtP+MAIn/1v8sAEAANACx/x4AZAADAPv+bQDD/wUANQBN/9b/sv8vAHsAkAAJAEP/mv/2/8//pwDGALD/Qv8YAOz/4v+pACQAEgA3/9b/dgALAKAA+f+a/6P/0/8cALb/6v8JAMv/AgB8////JwAKAHEAFwA4ABP/JQC0/5j/4wCP/1IAlf8hAL4AUP8QABoAtP/X/8H/1/+k/zoAngAFAFcAcwDB/7z/jgA8AEQAZgDs/+v/EACNALX/9P/A/wUAagAVAC8AmP/7/1IADwBiACAAkP/B/wwASQDf/4oABgBu//3/wf97/8P/iAC2/5X/pf/H/8P/fv85AMf/BABAAOn/+//M/4QAPQDA/1kA6f8GAP7/z/8OAOX/MAAAAC0ADADw/yYA3v9AACUASQCc/wwA+P+K/68Auf8JAC4A0/+4AID/p//5/7H/dwB2/5L/CwAZACwA4P8RAAIA5//a/+3/BwAnAL//sv96AFkAc/8AADQAsf8rACcA6v/H/2EARgCd//7//v8VACcAxf8WAPn/7v9/AKr/wf9bAOL/QQDo/53/6v+x/0YAqv+6/wIAnP8VAH3/HQApAIb/BQDJ/+T/EgD//9L/dv9IAHMAq//M/8b/uf/I/wMATwBRAOf/cQDr/wQAXwDT/4kA0/8gAA0A8v+gAAsACQDK/7H/fgDU/3D/IQB3//3/+v9o/xUA1//t/+r/zv9DAPT/OwA0AB0AnAAuAIEADgAzANEAPAB0AFQAGABTAFMAn/+z/73/Xv97/xD/dP8u/8f+uf6j/tP+iv7C/o/+l/4M/27/9f8nAJ4AIQGNAScCFAP9A/kDmgTBBGcE5ASQBI8EYwQKBIADPgJkAZYAw/8m/+39zfwY/JH6XPlV+Bb3rfZe9u/12fU19e701/S79Jj10fZx+Oz6ovxq/jIBqQKvBC4HlQlTDWwQSRLgEnsRVxEIEbUPoA/DDTQLrAccA7X/+vxU+zv6y/i89m70qPIP8tfxDvMx9R32GffX95n3xvcM+ZX6lPx8/mH/YP+m/q/++wAeBRgJLwuOC0EKTwg1BxkGCAa+ByYIuwWdAqj/I/6s/fz90f7C/hv/3/6C/dT8gv24/iz/uv4S/8/+ev4B/43+Tf+OAOAAoP/E/TP9M/2e/Zn9Ef3P/Br81vq5+bT45/hI+Vn5AvqD+oX8HAFlBEQD9QPaB/oIrQd2CMsK/guzDagNVgrMCMUIEQcQBUIDIAGU/iz8sPpo+Qf5Yfhd9jP1lvVp9i/3Wvil+eP6k/ym/nYAUQKTBJ0FdAXbBA8EdgPVAWoAWACK//3+Yv18+9v5lPaw9Ij4xf+iALr8u/2aAF7/MwDpA8UG6AnoC5UKSwe2CREOewvFBx0IagZtAfP+a/7c/GD7VPly9kf1dPUW9Ub05vUG+En50vvI/YX/kQIqBbkElQS9Bi0H8gXFBUgF3QONAs0Awf2k+i/4efTB8ib4pP4l/uH6FPs//X0AXgSOB5kL0A88D/IIhwbbCnsNYwukBhgCUQDJ/hn74feo9z/4OfRz77Du3PB19GH2ePb29qn6AgAbA2MFVwjyCoULHwv0C8ANZQ74C4EHGgTSAer/Qf1L+aL1LvPE8O3uU/OA+6f8sfdz91v8sAEkBqkJqA0mEdoRsA1kC+QQ4xTbD0sH4AL2AKv9wPpB91D0x/Ha7KbnuOeE7QXxCfHt8Xr0FPjg/GgCMAd8CwwOig0RDWkPfhGrEMwNOgpWBhQDwwA4/Cf3/PQ68tbsfupe8JT4ifoq9obzDPmXAPUDoAhlDzoUrhOQEP0PdhJIFlQUaAwhBmoDP/8k+jL5BPfy8HLrVOno6dTrfu7C7yXyVfb0+Wr+IAQBCr0Now+KEFIRaxTFFtMUUxAzDKcISQQ0Adz9IfcS8JjrE+g45UjnWu/R9D7y8+668kn7mgGYBZUMmRKRFLUSQBAmE+8WGBbvD54IagRpAd38+Pjk99f0We6y6Q/rne6c8JjxYPPI9mb6J/4ZA8sIow21Dx8P7Q5iEBwTDhP0DV4IbwQTAMz7E/mv9IruEeoJ56Xj1eWq8J/6wPt/96f31v4cCacPgxLZF9sa8Ra8EXMRgxV0FWEOBgYnAIX8Evll9pL0OfJ27vDp9ui27UHzhPUG9gj3wfk9/vcCyAf2C00O5A3QDOENaw+yDwgNlAdxAgr/Zvye+YT2IfIc7rXsr+r159rr2faK/i38KvhA/aEFUQq6DfEQSBT3FagTxw7yDqETvBBRB0IBoP4h+wP4gfex9krzs+817qPvzvMi96j3Pfi0+sj9PgECBbcIyAuPDXYNzAwADtsPqQ6HCoAHIQTA/m/78Pky9rTxoe9h7RnpOuYQ6zH2afzx+MX0+/cN/50FJAsnEPcUJxUiEAkNyg/eE4QRsQkrBLYB7v2F+U/4JvlI96zxg+047rfxd/VI93/3Evme/NIAogTcCLAMTA4vDosNTg7gD7wPhAzLBwcDJ/5o+kf5CvjZ8/7w6e8H7HzoCu1F9k77oPom+Hv44f24BK4I5AzOEm0TDA5WC30NfhD9DkwInALVAKL/Wvx2+sX6SfnY9NvwbfFY9Tn4R/hT9w34V/sV/3ABqQT9CCELWAqtCScLCQ1yDR8L4Ab6ArP/kf1E/An6HPdO9SHzF++c7HHx2/k++xr2+vZ7/Tz/dABUB0ENXw0qDAgLlgq9DbUQSA2iBwgHcAYpAYH9Bf/i/w38NfcD9rv3KfnU+Jr37vaQ+Gf7y/zA/UcAHANUBPAETAZOCG8J9gjrB6wGjQS0AYf/mv1z+vn3vfc79cPvtO4l9Cn4+PUu81f1dPnx+yD/JQTuB00KkwrOCBoK8Q7mD/kKugcLCPEGvAOYAXkB1AC9/bn5TPgw+tT7mvq6+JX4ffnp+hT97P4fAcMD9wQoBe4GxgmXCpIJpwjWB5wF6AIoAcX+bfvn+QX5evS673Pxh/ZV9wD0jfJq9eH4wPrp/WoDegfSCFAIMAidC7QPqg5tCgQJAgpkCFAEbAJwAmQA1vtt+O33UvmQ+cP2zvSa9/36Xfrl+aH9+wHfAnwCSASWBzkJXwjHBnEG2QaCBWYCgQBa/0/8ofnZ+LH1yvH48074YPf29Oz1wvdy+R383f7uAvYHjAndB8UIMw1TEIEP3gyaC5oLuwnoBTgDKQLh/+T6ZPas9sD4hPfl9Kj0dPY++Jf50PoQ/SYA+wCFAGwDQQiRCfsHQwetB4AHbgbtAxIBsf8F/uP6lvjU9Wjy4fN8+M/2cfE+8i324vZw+Dj+DwQHB6AHYgaNB18OnhMTEfUMTwzxC1IJTwb8A24CXADS+yL3+vZJ+bv45PWh9NX1Yvj8+iX8Jf2wAG0DiQKfA94ICQxJC0AKKgmMBz4HKwfDBEUBJf8Y/Vf5CfUg8yH1Vfd89IHvke+88/T11fZu+uj/LQMCBJMEkAYMC04PUA/eC6oKsguWCb0FnwQxBCoByvzs+Pn2O/gQ+hT4vPTy9KL3u/ml+179VP4UAA4DyQTnBcAITgssCxcKmgkDCVYI+wasA6kARACN/pT49fNk9W74mfcM9EPxLfLW9Wr4UPqX/uwCNgRuAykEnghjDUsNjgrrCQEKNgiJBc4DHQMiASb9HfqL+Y/5FPn69+T2EPd9+JP5R/pK/Bb/OgBdAFoCzwUYCOIIHAkvCdkI9Ae3BpIFBgSZAVz/r/2W+tH25fUd93H2c/T68+H0Dfac95X59vxGAW8DNgNgBOUH9gqlC3cKagkWCdIHpQW9BNoEaQM2ADb9TfwF/aH8avr9+Bb5QvmT+dz6gfxe/lcAsgEHAx8FagZRBjUHQAkQCZ4GwwTUA1MCrQC4/2T+8Prm9tf1VvdY93b1cfQ+9Wv2LvfA+AL8TP/dAA8BRgKvBYQI9QeFBjkHawilB+kFAQWiBHgDOgHX/jT+1f66/e/6GPpz+zr8IPz+/MT+HwDFAAIClAToBhoHiQYhByQIqwecBX0DEgJ9AKD+9v3p/Vb7BPc/9b72t/cF93z25fbe9y35bfqI/Mr/ygE5AV0BGQSRBpwGmAU/BQ4Gmga3BY4EWASCA0EB2/83AOf/pv0g+0P6T/vm/M78IvxU/RD/Gv+u/3EChwRZBDUEQQVIBkkGSwXBA1ACtwA2/xL/bv9i/dr5XfhA+R36SPrB+fT44PjS+Xb7vf2I/7b/cf+YAA0DiAXbBvAGsgY3BwoIQgjeB+oGAgUOA6ICDAPXAe3+2fsW+iD62Pqg+jT6p/r5+sH6cPxRAM0CUAIWAuID2AUXBv4E/wONA0cCOABs/6X/G/4d+zz5F/l0+ar5HPkY+J73NPhn+fr6f/yc/Ub+ef/8Ab4EBAZVBhEH5AfpB+UHKAjyB6kGngS/AuwBRQE9/338Bvu/+oL6nPqG+zr8Zvyg/KD9CAD9AvYDgANoBJAGXgeXBpwF3QTvA6ECRgE5AL3+Zvw2+mb5hPmz+TH58Peg9qj2WvjP+rX8e/1q/Uf+AAH6A24FPAY0B9IH0QfQBy4IhgiSB/sEMQLLAC8Atf5f/Ev65vh1+JD4pvgP+Zj6D/zN/En+tQDBAjkEjgXiBq8HtwcxB10GPAVABK0D+gJBAaP+Nfz5+or69Pm++LD3Rfca90T3wvjU+kj8X/2D/hYAGQPvBW8GrgZECNcITgiYCNYIQge7BEACOwAu/3T+dPyU+dD3z/du+Nf4HPnz+WP78vx7/jgAuAEKA5IEQQY3By8HlgbNBQEFJwRxA9ACJgFf/hb8N/vU+jf64PhL99D2ifcw+Bb5y/qu/BD+bf9nAQsEFgbnBmEHNwjiCAQJpggMCAMHLgXXAjUBYABi/6z9hPuk+R/5Dvoc+xv7+vqe+6X8Ff7v/1IBNgKXA2MFPwZHBkYGEAZABVQEXAMgAnEAE/6J+yb6nvnm+Nb39PaU9gr32ve9+B765Psm/VD+WADrAqcEWwWkBUwGeQeOCJkIoQdOBhcF/wMWAxICoQDP/tz8K/t8+rH6GfsU++j6Kvtj/GX+RQByAa4CSAS0BWMGeAY2BrUF0gSsA+ICOgJwANj91/vW+iX6wflA+UX4nvfz98n4Ifrt+4L9eP6n/6oBvwO8BDIFAQbiBh8H9QbNBnUGUQWkAzACUQGKAED/VP2N+576ffqE+oz63vqA+/L7Wfyg/az/7wBtAZECZQRRBR4FrAQgBD0DkwIpAjcBZv9I/Wb7M/rD+Qn6Svrf+Vv58/l3+yP9t/72/88ALAIlBKAFEgZhBvAGWAeBB70HswffBmAFBAQ2A5gCugFSAGT+nPyc+2j7U/tG+1D7efut+zX8Wv2y/oT/SwCEAaYC4QKoAn8C6QEQAf8AIAHa/5j9J/xr+7H6MPoq+uv5ZPk7+dT5GvvF/Cj+CP8WAM8BWwMuBNME5wUKB50HkwdhBx0HYgYbBQUEXANsAsAABP+c/ZH80vth+wr7HPuX++j7GfwO/bf+FQApAZoC7QN3BE0E3wNBA4cC5gGeAUkBHwAh/qj8RfxC/N77g/tB+9L6h/oP+2b87P0Q/wEAHAGJAq0DTwTuBP8F0AbbBosGQwaXBUAEiwI9AYcAi//N/fH74vqv+or6O/o9+tL6XPuu+3D80P0t/08AcAGqAngDsQNuA+wCXgInAjACpgE3AN/+Fv5J/Yb8c/yH/D78zPvX+338pv0E/wAAqwDsAWEDOgSABOUEfwX/BRgGDAYDBpMFOgSyAtYBPwFWAAr/wv2n/Kn7H/v1+uT62foF+3v7XfyX/b/+jv9eAE0B4QHKAZoBWAHHAHQAkgAFALD+h/0O/db8sPxu/MD7G/sr+5v7jPw1/tX/hwAcAWsC2AOrBFkFMwbiBgkH0gaaBnQG8QWcBPQC4wE6ASQAev4S/Sb8nvtK+x37JvuN+wf8SPzc/F/+DAAVAa0BPgKVArgCjALlAVkBVAENAdj/Z/7L/aX9M/1t/Pn7x/t++yb7X/ts/PX9Q/8eAO4AUwLiA/AEcAX6BYEGowZLBtQFZQWoBCQDRQHl/xD/HP7S/G37S/qg+XT5mPkN+rn6UvvD+6z8U/5NANMBlwLCAuECLwNrAywDpQIgAnsBlgCq/9r+WP7h/Vj93/yI/C78Dfxr/Fn9k/6w/2IANwGXAiYEOwXtBWYGsAa6BpUGSgayBY0EAANsAUIAVv9D/vT8kPsz+nj5ZfmX+cT5F/qs+of7rvwd/oL/pQB5ARYCbwK1AtAChgL4AXkB0ADd/wn/mf4B/lD90fxq/Pj73/sn/L78sf31/vz/2wALAqYDEgUOBqUGAgcbBwIH2QafBuEFcASlAgkBzP+x/oz9afwx+xb6aPlY+aX5WvpQ+xf8s/zf/YX/EAELAp4C+AJDA2YDCANhAvMBbgFpAEv/v/5k/qX9mPzD+y375vrf+iD7xvvZ/N79zP4JAMYBbQO6BMYFmgYZB4UHvgd5B8AGiwXRAx4C0QCp/1T+BP2l+yb6AvnF+BL5avno+aj6nfsH/aP+NACdAcsCewPqA0AEWgQUBJgD8AIoAiABHgBU/4/+j/2r/Cf80vtk+1n72/uj/IT9Z/49/2AA6gGfA+IElgUXBpkG6wa8Bk8GuAWIBM0CRgErAAH/zP2A/Bz78Pl7+Wj5d/nO+Y/6XPtG/In9Gf9zAHMBIwKqAvkCHAP5Ao4CLgLzAT8BMgBu/+X+DP4w/bL8avwa/Aj8Svzz/Pz9Jf8SABMBXwKtA7kExwWrBjEHUQcZB70GYQaWBRkEdAIQAcX/kP5m/TT8+PpL+kT6afqV+jX7JPz7/Nr9T//9APwBHAIvAoECwAJ7AvIBjwEpAVkAK/8C/i/9jvzm+1b7IPv1+tL6+fqj+6r8x/3M/vj/WQHLAhIEMwUBBn0GtQbfBu8GmgalBTkErQJFAREA/f7d/Yv8N/uB+mf6evqc+hz76Pu1/Ir9vP4kAGsBNAKNArYC2QKoAggCRgHCAEYAhP+x/g7+ev0W/fH85Pza/PT8UP0P/iP/SwAwASQCZQOfBG0FIAaxBtYGkwZbBj0G1wXeBF4DtQExAPb+D/4x/RH82von+u750/nb+Xv6evtS/CP9dP71/xkBsQHoAfcBCQLeAU4BngA4ANX/E/8l/oj9Gv2m/FD8SPxj/J/8G/3o/QL/OgBIAVcChAOwBJ0FTwbGBgIH7Qa6BnkGBwY2BfQDcQIAAb7/nf6W/Zj8hPux+lT6TvpY+pz6Lfvu+778wf32/jMAJgGtAdEB5AGwAfMAEAB///z+OP5b/cH8efxH/A38DvxZ/Mj8XP1W/rv/PgF4AoYDrwTdBbQGPweXB6cHWQfaBlwG3wUaBckDEwJXAOX+xP25/LP7ufoV+tL5zPnu+YX6cftS/FP9uf4vAFAB+gFdAosCiwIyAnoBpQDx/yr/K/4h/XL8//un+3D7fPu4+xL8svyq/ez+XQC8ARoDmQTlBesGoAcJCBYI0AdnB+YGQgZLBfkDVwKnABz/0/29/Lz7r/rV+WP5V/mF+ef5cvoa+9j72vwY/oX/xQB/AcEB+QEIAqQBAgGjAFYAk/+G/sj9VP3M/EH8AfwW/HT8JP0x/oj/BQFzAsUDGQVlBm4HNwihCJoIIQiVBywHwAbsBZIE1wIDAWP/CP7j/M37s/qp+d/4jvib+Pf4mfla+iH7MPyP/eD+tv87ALAAGgE9ARQB5gDbAIQAlP94/qf9//xe/OT7w/u6+877P/wj/Vz+w/8oAZgCIASyBQgH9geLCM0IowgoCJUH7Qb8BaAE/QJaAc//Y/4Z/fH7z/rP+Sv5C/k6+Z35P/oV+wv8N/2g/iYAeQF2AhMDcAN+AzADsAIbAlsBXABB/0X+bv2p/Pn7cfsc+xr7ZvsU/Cb9jf4eAKYBFAN7BMYF2gaSB9IHvweEBxUHcQZ6BTMEoQLYABb/jf1U/Eb7O/op+VH4C/gx+KD4Svk7+k/7g/z3/Zj/FgFWAkAD4QM1BCkExAMpA3QCfgFCAPb+yf3Y/Bz8oPtn+1/7iPv2+8r8C/6e/1IB7QJrBNIFGgcFCIUItAizCHUI2wfoBroFQARtAn0Aw/5R/RD8yPqI+Vz4l/df96H3K/ji+Mj52/o0/PT96f+kAeECswMNBAgEzQNtA9gC4QGKACr/5P23/Kz77Ppu+h76M/qx+nz7mvwO/rn/agEQA6YENwaGB3EICwlmCXsJLwmGCHEH7gUrBFcCmAAA/5T9H/ys+k75XfgG+Cr4j/g4+SX6S/un/Fr+LQDgAUcDVQQKBVoFRgX6BGIEXQPrATwAiv7r/IP7cPqR+dz4dPh/+AH53/ky+9H8ff4kANwBoAMrBX0GeAcmCIkInghiCM8HzgZnBcgDHgKTACT/pP0I/GP6CvkV+KX3pvf595z4kPm5+lH8M/4SAJIBvAKdA0YEvgTfBLcEYwTBA5kCEgGj/2b+Nf0l/E77ufp0+p36KfsF/B39Vf7I/0EBrwIgBJkFywakByoIfwi1CJIICAgMB7oFRgTJAlgB5P9h/q/87/pa+Vj4yveA94v3B/jm+Aj6X/vy/Kb+TgCYAXkCEAN8A5QDXwMBA48CtgFYAND+cP0//D37a/rO+W/5k/kp+gz7Xfz9/Zz/HQGmAjUEogXVBr0HdgjdCPwI0ghXCH8HSgbZBEgDswEpANH+l/08/Pn6D/ql+Xr5lPkF+sj6x/vl/An+YP+kAGIBpgHqAQYCigHdAKEAegCX/w7+y/z1+zD7gfoj+gj6Wfo6+1P8Wf2W/icAvgE5A8sELwY1B+4HfwjJCLEIRwivB9wGqQVUBOcCLQFY/7v9VPwJ+x/6g/kV+ef4Ovns+cv6xfv8/En+av9/ALEBlAK/AlcCGwLCAYgAcP+K/4v/0v2V++H6fvvt+437Uvsg/KH9cP+JAUcDywSaBv0HkAhrCS0KmwmlCHcIPAgSBxYFHAOhAbEAVv8M/VH7efpA+iH6tfjS9gD3ePj/+KL5s/ql+2H9DP9A/8n/DQGLAMD+If55/tn9//tV/IX/ov6p9831tvyMAAH9jvqX/Nv/2gPSBsoGzwaOCYEMCg08DCEMNgyQCiMIige7Bz0F7wC7/qL+Bv6V/Kf65fj6+BD6tfmz+O74FPpT+6T85P0t/0AAUgEvAuwBVAG1AKL+dfzx/BP9hPmh9+X63PvQ9ib0ZPjW/AD9MfyD/ZgAqATmB4oIUAggCvQM2A3qDDgMKAz4CoYI9gaDBuMEjwGp/pD9XP2R/N/6A/k3+Nv4OPmr+Gn46fiP+Rj7//zK/Yv+AQDlAPwAMwHPACH/oP3z/Zj++ft1+cn8p/8I+2b3yfu8/17+Y/5dAQUDGgTHBkMJ7QkVCikLmwuMCo0Kcwt5CQEG7gTGBM4Czv80/Wf7ifrr+eL44ved98j3Dvie+OL4MvlV+rP7rPy2/a3+9v9YAdsAyf9KAGj/b/w5/JL9FPtu+In7gv4T+/f2Yvnh/jIAPv5S/4ED/QU5B90J9wsCDE8Mxg3+Dv0Owg2rC5QJQwiZB+YF/gEe/hf9aP3I+wT5rff192v4+/d39xz4Oflv+VT6Wfy//eb+MwDAAE4BewHB/7n90vy2/Fb9yvtN+PH6ggA3/Jj17vqZAm8Aav1TAbIFiwYgByUJ4wojC3ELJQwIC7IJqQrJCbgFjQOYA/YB9/70/NT7i/rs+Lb3HPhH+YD4z/YV+CP7Lfw/+0D71f0LAZsBwgDIAb8CEQL7AKD+1vzS/mH/8vny+H//4v9Z+a73GP17AaYAzv5IAMoEdAhGCDMHvAkODWAMDgpeCnsLzwoDCMoE5wPuA1YBvf2Z/HL8bPqe9/j2hfgt+WD3uvV499j6HfxY+6n7p/22/xcBSwHJAJYB1QEJ/2D8P/1k/i/84vf09zP+lP+b+MH2zv6bAsv+IP/pBAcHzAUHCPsLoAzQC+oMYA1pDIgM5AtZCAgGHAYQBL7/mP3q/cH8c/mC94f47/mD+dX3DffU+FX7NfxK+0b7+v2sAMsAlAAuAcgA5v/L/jn8gfvX/RP8uPVE95n/g/729af1Sf5cAkf/uv2jACsFJwhmB3QGBwqeDZ8LnAhZCoIMMQr7BSkEJQQ9A67/oPuV++r8svlT9aX2q/kj+WT3Bffv+FH8of1W/AD9g/8DAXAC/gJfAdwB6ALs/gf8jv/R//X5Vvfs+iX/iv319qz3IgEcA+r8J/5PBs0HZwSJBjcLjAuHCkELfAprCasKYQnwA7oBSgMDAZ77gPrA+9T4EfW29YD3w/fX9/f25vZz+mD9y/wA/Gr9YADMAmgCbgE3AlQCvACn/nH8mPyY/kn7vvXt+VgBEvwm9BX5DQGqAHr+CgHzA9oFtAiCCfsIlgvHDXkLpAmqC40MgwlrBaMDgQOGARr91vqi+4X6rPYJ9bX3tvmL+Aj3O/fq+YH9M/71/Fb+WwFSAyME9gKNAfcCrwIY/gz8TP7L/TH5VPeH+/7+6fqe9aL5vwDn/6X80P/XBBgFqgP8BfgJGAosCLgIcgnDCIQICwdPAygCqAIu/776YvvG/M/4uPR59pX5kfnr+O74qPlg/E3/JgDl/48AhgJzBOwEVAT8AmsBSgHGAGr9LPw//n77B/ag+YkAUfxV9NH3PQC+AXf/H/82AeUFWwk7B3QFeQndDEoKEAd9CLsK8AiPBGYChgIQAXb9KPt2+9v63ffw9Q/43/pS+lD4hfjW+4D/aAAp/6z//gF7BKsFoAPuAPgBtQJh/pX7QP4S/lT4fPZM+3j+LPuq9Xb36P9bAv78h/2gBPUFVANNBrkKqQkxCDcK6wpsCQIJJwjTBCwDfwNrAAL8S/xR/cP5MPYg9xX5Uflk+UP5XfnG+wj/ZQAfAJAAhQJpBH8E0QMzAw8C4QBA/9z8UP3z/vb5dfS1+kABh/nj8cH4zgBL/+T8Xf71AOoEQwcUBfYEqAlgCzgI2wYQCRoK7gdaBF8CXQIrAbH9kPvY+436rfda99H5NvtA+mn5z/pR/v0AUgEHARsCHwTOBb0F7QNhA6IDsgDZ/KP9P/9D+9f1+val/DH99PYU9Nb6VwHz/v/6D/84BsYGqgMsBrQLrAsTCcIJvAqwCkgKrQZCA/UEjATH/Sf6t/zx/Hb4WfWu9l/59fmw+J/45frs/cT/PgCHABcCegQ+BcEDDAMiA34BS/7V+9X77vwe+oD0X/YB/UL6HfPI9hP/9f3J+qb/ywRDBBwFCAl+CjgLSA3zC4QJKQxSDtkIYQPVBCQG7AB4+4T7dvyg+c/1ZvXt93X5O/hR9735qf0OACIAUgCqAmYFjAU9BLEDvgPOAnL/fPtZ/Pz+kfkQ8pT2Fv8i+4fxYvNQ/SUB0v3Q/DEBPQbJCCEIpAeuC7UP1gzeCE8LLA7+CtUFpwN2AzsCIv7Q+e359voQ91LzL/a7+a/4Xfef+MH78P84ApIBEAK/BKgGkwYcBXoDHAOvAAn8oPt7/fn4ufJO9Ur7K/op9D3zCPuJAW/+LPvdAWkJUwhfBoMK8Q4hD1YNBwxYDHENaQtRBnkDDwRIAkj8YPiq+SL62fVS8hX0cvcj+Cn3Vffe+VD+PQG3AA8A8ALUBtoG6wOiAx4ERwD2+8X8FP6++f/zT/U+/NP8wvRr89P8dgFu/Q7+FQW+B2gGDQnaDXsOJw2tDU4N6wu9DC8MFgb2AcoDXwJo+wP5X/tg+Rf0KfM79ln40fd09uD2gfoe/7EAdv8jANkDZwZmBe0DYwOFARP+vfs7/PT8Fvil8SX2Hf+C+lTwS/X0ABIAy/szARwH5QZiCMsM9w0UDgsQwQ6EC0YN1Q+kCo8DWAPRBBMAVvpT+g77YPdP84HzP/Y990L1CvQE96T7Q/5t/pf+OgETBWAGGgXZA48DBQKq/iX99/7F/HD0XPRk/p3+H/NC8n7+lQHc+xr+NgW7BpQHGwx5DfsMIhAjEVkNfAwND78MsQbqA7YDKQFV/Hj5V/kv+Ofz6/D48sH11fTf8tfzP/dQ+5n9Tf1k/soCtgUwBcMDdgMAAw0AnPwn/Vf9+vYd9Ob7i/7m9Fvy7vw6AVH8rf2vBDkHSQhIDCYOIQ7zEKIStQ/gDeYP/g5VCTUF2gS6Ahn9jPka+kb5PvR/8AHyxvRN9GXypvI89X/5EP27/VL+ogE4BeQGogWqA+IDyQJW/p39yf9K+bTyV/qZACv3d/B5+ekAC/5r/CMBGQaSCPAK1wy1DfEP4xJPEtcOQQ4+DzEMoAY9BF0DNv+V+QL4L/lJ9mnw/O518hD05vGK8Eny2PbH+yP9LPyr/lcEWAdlBf8CpgPdA1UA+v1k/5b8zfW/93//P/x58mj1+v9WAbD9JgBZBfcIugz7DTQNGxCZFBQU9w/8DegOnA51CdoDegJ/AHb6xPYQ+Kz2V/DB7DrvDvKM8cfvZe8W86T57vyc+/j7HgHZBg0HygKnASYEBQO7/+v/i/139gX3yv/n/+X1wPNa/ZQEUAKQ//EDgAryDdYOAw+YEAAVBhfyEcINaBBZESMKFAMdAs8A5fr49Wf1Y/Q58H7swey/7//wBe+h7c7x7vgN/HX6OfsOAdQGQwe1A84CyQSYA28ANQBK/kL4l/el/i8AuPfl8wr8FgSVAv7//QOoCaUNgA8KD8MQKhYwF3cRUA7NEBoRigp1AwoBff+i+ov1C/Qv8ynvierc6uPuGvCK7T7s8PC6+PX7E/rj+v0APQeQCOEEgwLtBHQG5gMZAgcAIPrx+KUB9gQY+tjyxPs6Bh8E8f4aAqkJ7Q4pD2sNFxBgFlUXNRFXDY8PrRCUCnECRf+0/o/6cPQm8rzxBO5G6dPp4O2k7vbrZuuq8EH4j/vQ+Wf6RAFhCFQIhwPeAicGKwaIA4UCCgA+++z7MAIRAuj4QfW4/YAF2wKu/l0C3AmZDkAOFQwED1gWWRcUEK0MUhAsEaYKXQP//yz+o/oj9mPzW/F67cfp/eq97qbu1uvB6/nwRfhQ+z35ZPpFAnAIUQehBKwFjwfJBqUFRgUhAvf87f2zBDAEJfrh9p7/bAVnATH+5gGhCH0NFgwBCU4NfxUTFqgOqgrdDYQQjAv3ApT+zv1T+xn3K/Tz8F3sXuoE7RDvvux46ozsEPIE9734G/i2+e//KgZoBvQDNwX1BhMF8ATABqcCRfxW/jsEYQLX+v73yvyKA+gDN/5d/tsHxg4PDAsIOAz6FHYWDA9PC04PBxIfDmsGt/8k/hP/Vfsz9P/vC+5r653rzO7k7R/qsuvr8er11Pdm+nP82f4uBPkIzwiDB80I+QdeBNIE8QbkAVr7WP5dBAsBvfjw9p782QBw/qb68v3DBscJLQaGB3UOIBFdD0cPlg9GD6EPKg0JB5UDawNAAM754/X99Evy2+2F7Jnue++r7WftdfHh9qf5bfoT/Jz/3QRGCcgI6AWIBhYJCQhsBEgD2APHAMD8a/7oAEr7jfXd+Uv+Q/lw9g3+JQQcAkwB4QRxCH4MQQ+dC/sIgA9PFA0OgQfMCNcJGgUtADb++/sg+DH0QfEW8d3yrvGK7cfuofWu9+jzBvW1+/j+dv/XAkIFJgSPBc8ISQcKBYoH8Qf5AXj/1wNBBLn9jfoE/D/7bPqs+4X5X/c8/FMAUP5K/6kEBgYBBsYKZg7iDU4OxA6fDAUMNA3ACesCugAoAr3/Tvnl9Rz3ufbb8knxtPM29Q30+vI187T2Sv0qAEn8avujAwkL2Qg5BOQFVAosCtgFUgPzBDwFzv+a+Uf5U/vH+aP11PJt8/D2DPgB9pz5iAI+BAf/gQEKDP8Qaw1PCssMRBKYEzgNdAehCdIMGggLAOL7LPu7+vX4LfUY8g7ywPLp8V3xKvPi9i35Vviz+G79oQJUBa8GLQdCBzgJpQsxC3IJsAiIBjUDpwFdANT9ovt2+OT0J/Qm9CPzHPWQ90X1bfQq+uD+pf9pASEEigZBChQNJg0JDaQNAA23C9YKaAmQB7kFOwK5/fH7tPxt+8f2C/OA9JP33vaq9EX2aPmB+s77D/9eAmEFDQfcBRQG8goEDeQHVwUlCdAJIAV/AED8qfoa/ln9QPVJ8jn3cfi09Az0wfa+95X2l/ZE+nn+pP86/17/OQAKBFYJkAnRBp0HjAmdChoNxgx8B8UFuQgqB14Bpv78/hH/Uv1F+oj4/fia+Wf6D/zj/X7/KgC8/7UB/QYbCc0E+AHpBYIKyQgoAzEAMwHwAHf8v/jA+IT2nfDr7Sfwp/F+8Ajvt+6G8PH0gPgp+Xb7pQFQBb8E2gZZDAQPtQ5yDzwRhhEgENUNrwtYCtEIWwWUAdf/kP6m/JT7FvuS+in72fuG+3b8gP4x/wkAdwLPA7YDuQQgBrEFiQQyBAQFdwV8Amr9VPzH/nr+PPrD9Z3zLPWr9371ePDE72LyN/NO8xD18faR+Az7vf0d/7r/uAGfBbYIWAmACO0H1QgdC80LFAlABmAGZQZ8BF0EJAa5Bd4DfgOXBCoGjQZEBBQCMANlBREFqgIoAB///v9SADH+4/yN/pr/1P1y/Cn9pf52/3z+X/yH/GD/LQGBAEr/df64/kIAcADw/Wf8Nv2L/OD58Pjh+Mf2SvT48yb0h/P78sHywvKN9KH3cflN+rL8PwBPA0gGSQkMC50LOgxuDagO6g4JDQAKdgiUCJMHkARxAaf/x/4C/mP84fr4++v+IwCEAMUCtQV+B2kIDAliCo8MDg3XCjwJqwlxCewGuQORAdcA+f8G/c74X/bE9oT4lfiu9TbzYfTl9cH05PPy9Dv1yvQc9eP0ffQv9qP3pPYj99X60/wg/LX8j/+5AvkEegVeBWoHrQp7C6YKOgtCDGALBgkgB+MGVAcMBoYC7f9tANsB/gBu/hn9SP7+/8j/m/5V/4sBhgIOAooCVASPBSgFqgSaBegGPQYuBIQDVATZA4sBTf+J/q/+5P2e+3j6c/uc+4b5Q/iq+Zb70PsA+xP7+vzi/qr+Hf3A/Kn9GP4f/dv7ZPvx+uX54fn2+qj67PiM+Oj5c/tg/Jn8xvyw/tIBUgObAogCkASkBtMGCwb5BUwGBQakBaEFqQULBcwDCwMlBMoFiAXjA48D9wRiBoYG9wXpBbsGYwd1B2IHQgfgBnYG1gUjBaMEQgN7AKT+Xf75/Lz5Yvdo9qH1FPW09LXzRfOX9PH1C/a69qb4//la+h77gfxf/a79Bv58/vj+Jf+K/q396f20/uj9G/ys/CT/nP8m/qL+bwG4A/gDFAMxA3sF1wdMB7IE9AOhBVsG0gSpA+MDogNtAg8CBwM4BCUEwQI2Ah0EWAYvBq0EpgQYBv4Gkga8BQEFbwSGA0QCbwFSAYgAT/6B/OH8nv08/Bz6+flK+5v7P/r++KT5UvtD+6n5Sflv+uf6MvqM+bP5iPoI+//5z/i6+X77/fp++Tn6d/yE/S797fz9/Q4AYAEHAbMA1QElA0EDPAOLA2YDbgNmBKgEqQOIA4gEoAQtBJYEDQUDBXIFtQVFBb4FRgdvByYG8AXLBtoGGgZFBcIE2gQLBQsEdgLgAR0CswE8AIT+i/2P/av9iPyR+s35+voC/Bf7S/n4+Fv6h/v7+v/5i/oH/DT8dvuW+2v8j/z7+5376/uA/Fv8WPv2+iT8h/07/QL8OPwA/u7+XP4e/gz/zf/g/xEAogD1APoAuQCZAI8B9gLkAhcCkgKmA/kDRATLBNUE5wSLBagFPQWIBQkGswUuBdoEmwQJBcMF7QQVA+ECbQTIBOIC3QDbAP4B1AHI/yf+qv77/1b/Nf2M/KD9wv3E/JT87PyT/EH8O/zr+/v7bvya+wD6Dvo7+9z6hvlC+fL5VPoz+mr5xPjH+Yj7i/ui+lv7wfwi/cH9Nf9CAMMAuQGpAhoDtAPABDkFxwRpBN4EXAXpBAIEngODA7ADTwQ5BM8CSgKHAyMEGgOjApkDPATPA6oDLAR9BIQEtwTKBJgEUgTbAzMD+QLyAkICEAFBAAUAxP/g/pj95/w+/TL9zft5+p/63PoU+lP5Qfke+cX4rPh++C74Y/jJ+Nb4Ivk8+jT7V/tr+yz8qv0d/1z/d/6G/hEA7gBvAA8AlQAeAWIBcwFBAekAFwHBASMC/AEXArcC9QKdAvoCIAS3BIAEXASFBPMEmAXnBV8F9QRmBd0FfQWxBDwERAQ7BJYDtQJSAkkC9AE8AekAKgEYATsAfP+y/+7/Cv/U/WP9W/3d/Nz71fqv+in7lfq7+CL4vfnA+or5efhR+XL6fvqd+nL79Pvh+y/86fyP/TD+rf6y/rv+b/9EAH4AcQCgAA4BYAGPAcUBNwJFAvYBTAJHA1MDQQImAl4D4AMzA/oCvQNYBDYE/ANaBCAFegXoBCoEkwSrBZAFKgRAA7wDbQTsA38CtgHFAVcB+v8T/03/Pv/+/dD82vxc/SH9T/yA+yH7WPuv+6n7u/v2++r7rvu/+yv8afwn/LD7qPtc/Ob8n/yF/Hb9Jf7C/Zv9ZP7o/qL+c/6j/tf+9/77/v/+NP+Z/7X/iv+W//7/fgALAXQBgAHFAZcCUQPAA1oEFgWWBfEFbgbWBuIG+gYeBxUHwgZIBqUF/ARqBMMDIAOLApcBaACj//r+E/6N/Uj9vfxh/Db89vul+9z7QvwM/Bj81Pw+/Z79Vf6Y/oX+0/7G/mT9lfyJ/MP6tfnq+z3/WQOGBMgAF/wK+jj7ifvr+1v+K/5x/GP9p/1V/wkDZQInALEAdAKhAwIDkAK/Aj0DzARuBJ0DYQT0A5kCZAKNAxgEuQPfAx8DfQGoAEQA8P/K/7oA1gEVAQ0ANQDLAPgA1QB4AL/+Rf2s/LT6mPnP+nH6kvme+hD68vhY+QX6mPrG+3n9Wv4fAGcCgQN5BPAFjAc6CDgIUAcWBpAFEwUyBMwCDwIFAQL/4/2b/PP65Poj+4v6nfrq+i365PmK+sr6wPvT/cL+Xv/WAAQC4gLRA/kE4gUCBskFMQUMBdsFrQX4BLgEkgR8BNYDHQMlAqYB0QFGAdYAGgCu/+r/xf/m/0wAqwDQAKYAFwDZ/6//MP/C/rj9gfz0+oj4IPbY81vxpu8B7/jstul86gnwH/VK+Ij7P/8MAygHbgp7DdoRfxUqFdcRLQ/wDEsJYgSQ/1z8jfrn92L0//Gx8mH1SPdm+ND69f5OA40HgQvzD+wU6hftGIgZgxleGIcW0BMOEEgMwgiMBIP/Tfpc9kHzf+/S6/Pp7+ik5xHmJeQ34iviieOW47HjhOZC6fbrd/ZqCAIVdBaQFiAbuh4AHwUgTSJ8InYekhQzBuf5kPOI7qHn+eKh4Zvd/daf05PWed/d65H22f1rBi0RWhotIiUr5TJ+NXwzmC6PJ5kg+hpPFM8KQgDK9W7rFuNn3ineWOFd5XDokevK79j0ufpNAS8HzQoiDKALQwg9BN8CmQDj+QT0pfDB6h3lSulW9iT/Mv45/NL/gQQSCIAOlBbAG60aIxJ1BS37bfVI8tzvxO297PLrCuc/4GzhDuvP87b6VgQkDigWSR3EIgMneyq3KsglVx41GN0T5A7TB+b+WvY88IHsOeqF6gPuU/Kg9XL4+/pD/pADFAn9CzUMgQoqCDwFTv+M9vrvJexe54ziS9/E2d7UGdcg2y3f3vAvDU8bmxoJIdUtyzBdL0kz9jOuKxcfHQ2m9knnAuEg2Y/OR8uizcPLRshIzQDbMusw+5wJzharJHkwWTeUO4w9cDp1MlknjBoND4UGwPyq7wDksN2D24TcbODo5sTviPhD/p4CUAnGEYwY4BuhG4wYTBT/DgUGcPrg8Ebo2dyt05fPeMuXyBTMo9Gt1uLj+v7BHu8xjjU3OhVETUPsON0z6jA+JrIVkAAY5ejPTsgOwfm3nbkpwkHFZsiR0zLk2/dEDKwdOi8NQA9HBka3RLc/SzO2JFQWhgf8+zn0OerH3qvYBNni3ADkXe3u9vkAQAolEKAUYhp8H7ggjh2/F1ASGQ0pBQD7CfLE6nTjJdpZ0DLKEcvxz8nTw9jv4gvsbPK8AyUj7TlkOgk2BjoeORAt7iOEIFcYXgjS8ubZ3cfPwyrCVb3Rv/7KjdJY1NHaXeuPAnYYuSeeNG5BuEV/QIc6eDRGKiUdEg8yAYL2MvDF66TmLuPq4wzoYe8g+qkFsw68FMAXthe6FRITvBLiE7IQjQiGABD4Ce5X55nkVOGC36Dfatv/1qrck+SG5jztk/iN+d33EwaQHtUtuiuoJRgoTycyGuQOEw4SDvMFn/RN4QPWdNIezzzNd9Nr3//nsula7Hr5qg1LHDgjVS3aN8k1pisRJX4gdRebCaT79fJR8KPvLO/x7zjzOPn7/z8Gpg2TFS8awBnUFaEQ+wubCHoG9wLg++b0Y/E/7tLqNejb5q7oBesO6lLoM+iS6truUvB68DP1Rfhf934BMhnVJUcd3BUPGigbYhREEFIRahBZB0X18OJ82rzaD9rj15fcA+fB6+Lp5u1v/DIM/BU+HDQk6ipOKNMfsRpWF0EQ+QXX/C75+/og/Vv9q/2o/z8C/gQoCtIR1hcZGVkVIw8KCS0EUwIaA/8BWv6n+of2ZfHj7YTt2+7i7zfwxe576Trj6uOA6kLu/e+P9DT09+7a94EQNx4RGFoVbx9UIvUWCQ8SEcQPGwQu8nziqt2G3zjcDNjw3VPpGPE38+70HAAJEN4VYBaLH4spjyf8HlYZSBUTDdEBjPqg+9oBeAR6/x75Jvio+uD+nQdfE7cashnQEikLAQb0BO4HogtmCvID5Pxf99DykfAP8b3yQ/Qh86XtvOS33MDdLOeD7WHtQfCQ8ujsNvEwDJ0hkhyPExkYDRuWEsYKegxiD3YHsPVU5mbf7t1f3Qbc690d5oHu3fAM9BsBLBAbFSYWAR9mKA8mZh2wGeMYTRHZA+364vpA/ij/rP1I/F78Vf2Q/5YFVRDlGm4eJhooE2oMywY/BWgIXQtiCq0EU/wz9NztFeto7KntCu3065HmEd3924/lB+1J8X73TvZY7YnxMAkiHdkcpxVDF6AXDgxeAZQDQgn8BFP2vecy4Mfca9pE227hmeyz9u/4CfoQBEEQ2BTVF8sg4SiMJRcauRJMEJ8IbfzF9gH6mP/oAuIClwDt/ur/cAVCEEIblh7tGLcPVQiIA1sBRQMsCAIKhQVp/+H7Rfh986jw4O9x7uvrH+gY4tXcxt7P5q7th/J/9v30s/N1AtgbvCTgFpAMhhGSEkEHwP8yA8YDJPj25hLbu9cD2XLbW+CI6u72/vwO/H3+hwmiFNgYQBtBIXIkGx5fFF0PgAs7A6b6KvdT+D/88QDaA4cEwgUQCu8QNxf3GRUZGxUGDhIG7AC9ABAEvgWtA4kCPwMDAEP3B/Eg8+H1D/Gw6ELiXt+t4Y7lO+e+7or6pvmk80oE4iIXKtUX+AppCz0Ia/5h+ZP7e/wr9J/kr9jz143dIeAM4h7tcfvw/z/+pgESC8ESdhWcGZUgoiAUGXoTWBCGCmUCz/tz+RH62vs+AUoI0wmMBngFGQmDD1sVcRbQEesK7QNx/QL7Kv8/BQkIxQfSBMb/+/sV+fj0yPDp6+HkEN9W3jnhy+Ua72X7f/9k/TcJHyWCNP4oShePESUNhv+482DybvM07nrhsdIgzsPVBNvT3C/pmvuJBFUDNALQB4EQWRUjGCAcih2EGUkSewvYBkADjf/B+yH5kfonAagIJgw1Cy4J/gk5DisR8g5+Cp8H6QOe/Uf5IfupASoI1gnmBU4Anv1c/RT5TO8v6B3mfONR5N3qYu5187n+GQLRAaoVbjHYNMAjkxeNEVsEGvRy7jzyY/KL6ELanNF90xHbreGm6Zr2iAF2AzEA1v+kBe8M7RA+FSwavRfYDnoJ4Qg5B+wCs/+Z/lr72PbN+HUBSAlsDOYMUA50EQQTaBC8CxsJFAgsBP/9Wfux/Av+EP2P+S/34fip+HXy5usX6HXl9ubH7YTzXPcIADUHDAWECXkiyDezMWQePBaREuQE7fXy8Vbztu825D/WJdHe1+XfWOQK7cn7JAYnBucCJgUoDJ0Q3hCREmAVyRHFCLAD6QI6/6z4yvRq8wLywfJX+KIAXAjODssT1hZQGP0XWhWdEbkOrwsOBjj+fPf68yzyLO9P7Cvt5PCi8irxx++L7gHst+2j9W/7wf2uBL0JOgcwEcwtrDoSKXUWjBV3En0C1PY0+Lf4xu7g3nXR786Y15jhGenz9OQEOQ2RCYMErwcLD6gR2BCbE6UVGw6wApr+Qv7n+VT1PPXY9WT0uvQm+Sj+cgIiCMsNXhEmFKwVjxSEEhoRhQ5LCdcCFP1S+FvzNu7O6wDsDOtY6ArnnehW6ZHngOuL9r79HQHOCDoLnwdbGM03YDxyJFwXmxtHFYYDK/o5+ZD0XOqo3YrS39Af2jjlxO0S+n0KpxKuDg4LDw/rEmgQvQ7rEiITjAlmAQ0BA//p9670O/Z+9Z7zP/a6+p37vfuf/wIFnAmdDlgSmhINEjQSQg/eCNsD8wBz/PH1Se8a6gDn3OPs32/f7uL/5KbmM+75+HcAmgfrDjkN1ArxHlA7OztQI3wWahUqC4T7PPW19EnxSusq4wDcpd3v5ubt8fJn/nkM0REfDsQKFwyaDdkLDQwgEbUR+wkFBWgFOgGx+Z/4z/sa/ND6nvtt/Nn5k/Zm9038pQLqCDkNdA8kEYURKg+1DDgM8wqaBQ79HPSI7F7lMt0m1lvTm9QG16HbmeZW9UEDfhF7GZUTChNdK6w/2DN+HN4WshLwADDzbvGk7znrFukH5iviZOWe7/v1/PjtBNYTHhXWDcEL7grfBnMHKg4XEaIMawpADbsIkPus9XH6oP2V/GP97/4C/bL3UfKl8Wb45wOADaMR6hNhFQ4SLQv3BgcHbgfrA3z7cfHG6JDf2dWR0ZfRVNG+1Uri3O7G+MgHdRVnFBERQSTDPmg9USUsGe4WsAaf8d3rYe6y7E7pj+bb4yHlIuxV8kT30AOeFPgZNBPVDlMPhAvcBSEJNBHKELsJLgmJDPUF3fiC9En4pfqN/J4AoAFP/cT3+fQD+AQBVgtIEoYVNxYyE1oM+gX1A34FHAbCAG32Iu235LjX0cyFzWHSp9Q33BLqpPQo/tsLcBIzEUMgmj+dSa40Sx/+GekMfvLb4eXgeuO449Hfy9kX22bkO+y78Vf9oRDYH+8glhnwFSQW+RJvDRcNPhBnDToHeQWBAlr5XvKs8kX0r/VP/O4FDAk6BAX/+P6OA7IJ9g55EjAU1RO7D6UHyAFsAwwGAgH89qruV+YV2rXPc8y9zD3RP9yk5vjst/hBBxcNMhYrNJpOoUu5NQ4oSB2bBkXymuz56XrhbteXzFbFDcoH1xzj2vCPBWIaByP/HvYaJRxLGm4VvBa/GlAWuwvgBDwANfi48Xvz7/fC9/L2mvoH/qT9nf0nAGME3wqTETUUZxMmEyETmhBcDGQJ/QfEBBH9cvFX4zLXWNFtzpzMC9Gy2YHfHeVJ7vb1+wEdIFxEXFKSR505DjQwJsIOjQBF/Rr34ej81PzAWbjOveTGCdED5Rv+9A46FIsUixcIH58lqiarJVckkB1FEuAJZwTL/CL2cfRc8frqVeoo8vb4o/kz+Xf8RwLMBx0NdxO0GVgdkx16GrsUuw7cCcQD2fmp7WDi7dc2zM7DhMWUzBbRHdeI4lTrm/WwEj856EsmSFxDV0M5OX4mNRtQFpULLff720bAmLDfsMy488N217nw4QOEC+8MXxAOGvMm4y9WMOApUCEWGeYQyQi+Ae/8jfi58PLlhuAI5SztV/LU9eT6pgDnBaoKOQ+XFY4dIiLRIPAbKRW7DTYFMPlc6+DfSdXQypXFLMVuxBDHedG52yznsQMOLAxEskTYQZRFCES9Ofox7yweIaoLR+6Ozc23LrOitvu8f8qf3hDwy/fw+QABlBGWJb4ymjRwL3AojiDGGJ8Sjw3ACXoFFfxh7hjlhuTz597ppOri7ELw3fOg+Oj/IgtPGQ4k4yYnJNkerRiKEWoGNPed56TZDs70xWG/wrl+uzPGzdFE4A3/RCXNN+s3ejz0ReRGXELcP4A7Ni8EGQ36y9hJwiW7lLr5vOPGg9V44E3m2+pd9H0G1Bv0Kkswui6SKuUnpSf/JUUg5RjwEPQEU/ZR7cfrp+su6P7h7Nyx3D3iW+t49v0DAhLCG+UfjCGLIvAiyiDhF5wH6fOP3xbPssZqwV27RrxbxBXLUdiK9+UY2yeJLD01UT+9Q9ZE5UUEQ0M3iSFHBY3r29pWz0rHLsaByQnN1dCY0wvYmuUV+vMLqRcHH9ciEyV4KSsvgzFhML4tUSb/FwkJBwCG+orzvOk13xrXjdL20SvXpOLr8FP+HAmCEH4Wdh0iJIolZx93E9UCTO+y3jjTvMnCw+fDa8V5yLjX3PLDCWkUBBulJjg1pT4AQbVArz05NA0kOxDY/DfuxOPe2aDRpM4C0ADSENQC2Qfjn/Di/esH2w21EtUaEibNLsUxRTKjMukuAiVRGkcTVwwXAaTye+W125rVp9MP1oLc7eUy7931SPyiBhUUyh7WIioh4hkgC2/4t+gd3hbYgtXR0lzOJ85/2TjsQvqCAGEHwhMBIYsrKjQ4O0s+mjpHL6AeAA5pAaP2MOtr4rXeId1Q2kbWtNRS2JnfJ+hl8GH3zf4JCegUuh6JJcoq0i4UMDQuKyv7JugerxLUBJ/33utY4v/bYNk/2mvd9OCY5JLqD/TJ/9wKHRIWFXsU6w8ECJT/hPiV86rvIOrV4xniiOdc75jzuPUo+7ADvwvIEgYZnB1kIJwg4RzvFoARLgwaBQb9X/Zi8snv5ez/6bnokOl065rt8u/y8Uf0SfkxAScJ5Q83FusbriBJJdcomSjtI7ccPRT6CrABifn28kLt2ufW4ybin+J+5QHrXfL9+aAArQWpCNkIYgZCAzwBhgBH/7/6q/Nq8D70mPgN93Lz+PLK9Cb47v2BBPIJwA7fEZIRfA+JDT8LBghoBUsEBwMIAL/70va/8Z7tIey97cnw7fJY9Oj2FfuQ/y0DTAawCiARIBigHdsfTB6kGvwW1xNuEMILMgY9AS79jflp9iz0ePLE8Zrzr/dU+9z9uQA2A5gDlwJ6AeX/iPwT9/jxYfEt9d/2bPJD7Jfpr+p67tn0sfu7AAQEMgY4B8MHewjoCHcJ7Qq8C1AJUgQvAHT9fPow+L/4qfr6+tj4L/Yx9Vr2x/h1+2r+awJAB5oLkQ5lD08ORw2rDKALBwtWCxYLdwkwBkACggA0ATUBUgDzAAIDVATZBNwFcAejCK0JtAoCCsEGMAKe/Db29vAp7rDtmO+O8UbwPO3T7KbvXvTt+ZH+hQF2BLoHBAkACKMGkwWcBF8DjQCR+zH3EfVt883xpPHk8pr0X/a798H4xPqP/QcAxwKXBlIKLw02D8EPiQ7bDP4KKgnbCBkJTgc3BWoF2AWtBBgDnAGdAEsBcgNzBhwKAg1YDd8K7AdaB4UIxgeIA1f9m/iL99/2mPIg7mjt/O2x7X3uS/He9Yr7Y/8CABsAGwFQAXkAx/8L/4L+z/0w+2f3l/Xt9cz24PcA+Zv5nfre+8T7lPsZ/cv+wP+DAQcEggWGBuQHWwkyCz0NUQ1oC8wKNQuGCfQFTQMOAhoBpP+S/ZT8Bv7UALkDkAYCCSgLYw2WDpMNTAz1DHYNVQq4BNr/Ofyf+On0QvGC7qvt3e1U7Q/tr+6B8ePzjfXS9kT4sfql/df/LwFiAroDZgTUAzoD5QPcBJ4ExQP6AqsBXv8S/Tf8T/wS/Nz7nPz+/Rz/sP+VABwCbANnA2cClAHsAHj/5P3j/YP/gwG0AvcCdgTRCJAMTAwiC8AMPw/yDvQMwgwMDuQM+QemArP/Qv0v+Vf1I/Tf8xTy1+8r7xbvOe4v7ZvtAPDo8pb07fWC+F37KP2J/rgAfAMKBmwImQqDC90KAgroCSkKVgkMB0gEfgEs/j77MPqI+rf61vpZ+6j74/ut/Kb98/6KAT4EFAW2BDEEmwO0A0AF7wcOC8YNtA5jDRALIQnHB3cGXgXCBDEEbgLU/gj64PUG8yTx4fDG8kP1k/Zp9kv1oPMw8pLx7/El80P14PeB+sL9bAFzA9YDmwSnBskJygyRDakM6AtWCtcGwgMXAzEDAAKLAOb/7P4P/Xn7dPqu+Tr56Pjp+ED6afwB/mz/LAIlBgAK0wzuDgYR1RJJEzASvBAgD9gLBwd3AmH+PvpP96z11/Mb8lPxIPBP7qztRe6z7n3vWvF+89H0a/Xe9VX3Y/re/T0BcwUmCXoJ4gfmB6wJTQxxD9QQYg7eCQ0FLADu/A79Mv7Y/Qj93vxk/NP7KPxU/Rb/uAA7AAD+gf1r/8IALwGDAnoEzgU7Bm0GkQhvDUARYBAuDTwLFAr/BxkFbAI4AdEAff7q+T32xvTz8zDzz/IP8qnwTO4K64fpsOsp7o7u9e5L8Hfxp/Pa92r9VQSdC68PSRB/E0kbmCDhHsAa9RhhFrMNRgFg+XH4B/h18zTuROwj7OTq7uk47Rr1/PyHAWEEUgi2DIUPmRHrFGkYdRnBF6UUVRAlC8AGQgQqAvL+Ofs++N717fNL8/30Nfie+hH77Prj+8T99v5X/r78afse+Sv0RO6N6oXoQOZK5IDjbuPy5Lbose7U9kP/FQYHD3ceKC0MMDQprCMxI04hFRrQEZcNGgrn/u/szt8l3Hrb8NkV2/rguuiL7k3yx/heBdoTAR7RI7IowSxCListzCkkIz0Y6wkE+wLwveu469briOrY6Kfo5Ops7/n1VP4pB9ANXxCwDrkKEAePBJICPwB6/J32DO+45tvetdkB2GnZ99xa4Fzip+V762byovx5DSIh5S3/LuUq2SmQK6MpKSTkH3cadAzH9hTj1deY07LSN9Ob1Xnalt+24lPoavaUCp8dTitYMwk3NzfONPgwcyufIgwW0wYh91vq2OMt44fkOuVm5lHqxfA++GYAZgktElQYvhljFg4RKAy7ByQDqP6w+Q3zSuv440zdfden08nS9NQY2hfhG+hB7jf0nfoMBJwVDS3sO+84+i6EKpsoQiKDGW4TAQ0J/inm0dGKyVrJNsu3z0bYXeFS52HsNfZHBwUcsC3EOEU+wj4GOrIyFiw+JecZQgjD893hpdZG1FzZy+Di5pzsKPQF/t8IdhMEHRQkMSYrIX4XIw9XCv0FFP839uTt/OaR4K/bHdtE3VHepd1E3JHcIuKl6uHx4/he/5UAbwMnFWYs4jK/Ju0bvRxQHwIZpw2ICI8GWvll4WvQ/M1Y0TPUENoJ5mzz/PpY/ScDbRF0IgoulDNKNpA0eitSH5AW9BBXCZj9SfHT6FLli+W96K7ufvbH/pAG2A0tFB8Y8hhOFwUUXA9mCooFHQBT+Yzx+Or451fnceaW5c7mO+l+6oDq5+nP6aPsQPE381Dy5vN79vb3CgJhGO8nTSGnEnQPjhSdFQ8T0hN+FDoJ3fCW2hXTC9if307lXutm8qb1UPSR9r8DAxe7JQksvS3gLEgnYh5qGEoWxBG5B8/7RfKH7QnvTvWa+o37gvti/sgDEQo0EV0YYhzVGTIQYgR4/P/5mPkp+KP0tu886tfkCeFK4bbm1O6z9Rf5UflN9xfzzO8R8Dfwl+177WPvVu0h758AcBUHGOENwQvOE+YYvxdEGBAbhRaqBbPwNeJ+3f3eieEN5ZPsfPTV9s/3Fv/wCzoY6SHdKZwuqCzSI2YahBUsEsYLuwTx/yD8dPl6+jn9k/2H/OX8Sf4IAP0DrAokEcwTLxFSCgECP/xM+oP5gvd99Qn0ZvCP6j/nH+nY7fbylvdK+ir5dvPD6prj8eIS55vpQ+pp6hPmO+QY9wUasSzeJG8bYh6gHtETJAupDBUPAAcH9I3gxdV01KPXQ9686vL5VQSkB0IJbQ4CF2offiUjKVgppyQSHaYWfxFhC68FEgP2AbgA0gEsBkcITAT4/ab6kvtF/xsEqgiZC+4LrwmYBpQE7QMuA8sAM/z+9TnvK+nz5TXnFetS7fLsou1k8Dfxne7b6rzmi+JB4aHilOOo54Tw2fTR9jUJDyjNNMwoyxpJFR4QQwmOBlcHSwVs/P3tbuCu2Ufamt7S5VHyhQFsC/sNnQ6BEeQWVB6dJs4sxC7qKzclHB0SFTQNVQZyABv6x/TX9Fb5pvsm+Xn2z/ck/OUBwAkjFOMdjyHHHIsSwghgApP91fjW9HfwlumY4VXcKtt43Tvi/ebT6Znrgex/6zDqierF65rtQfEj9QL37/ge/F/73fhwBbAg2iwCHXcJcASkBEMCcAFSAggBvfs98QHmRuII53nunvcXBFIPsRT+Fc4VcxbNGjUiuCf3KIUmYSCkGLUSDg3IBMX8MvhU9NvwgvT7/4AJAAt9CJYGFQZ+CJ4OehX3GLAWlg57A5T6DPZB86TwP+/97JXmOt552QnaSt2N4anmreq068Lr2+2E8Rr1HPhB+c34dPla+kr43PTo8gzuyufp8dANZxxBEC0EqQnYEi4SrQ5FEPoQXgkU+zLtpOY76qTzSP3MBnYQkRanF5MXLBrlHtwi1ySUJAYgahZ8DHAHXQUNASL84vvl/t0AJwPyCLUPlhJhEJAL6wdjB9oH0wZjBN4Abfsr9PztUuxG7i7v5Oxw6inqHurx5zLlzuT05pPqBO7G7/jxxfa4+nj7c/sv+kD1ju6i6XvmLeLO3uzfnN+K3eDs+xO0MUUxqCkSMGs0TiYdEy4JfgPJ+i/uGODo1hnb+Ot//nEMPRl9JfMr3SqDJjcjEiKBIY4epRefDicHmwOJAlwBkgCAAaECbwLpArIGOgvGCyUJ6ga9BLYA9/vU+EP3FvWT8dnugO6Y723wTvCb7+/upO2g60rq2el76Xfqku6w9L/54vru+Oj4b/us+hH1UO9v6hXjn9mC0QzN0sxb0nTd+ewcBxwxblTEWmdK2TcyLdkfMg1K/mn4G/WQ6s/bMdQC2VXoqf3CEygmYDMQO1A7tDQ1LbooCSUgH8AXZA68AU327e8A7OXpN+46+GIAKAQ8CEgP5hS3E0ELpP/c9qryTO/c6tznJujO6kLuMvKk9kj6R/z5+3H4hvJ47RHqxOZg5Qrpq+839Gb1z/TM8g7wI+8D8Jbu7+hV4ZfaXNc22KPd3uj8970IaR4cOn5LZ0POKg0dFR2jGPILfQQhB1EEgvJG34DblehJ/ScRvh+6KYAx6TQRMngtsivRKk8muhvnCdP05OYI5qvrwe0f7vX0TgCGBhQH+AlSEHoSRQsc/sbyge2+7JftGu5q7QHtye6R8j/3/Pvp/xAB8PwL883n0OAr3yjgxOIW6NDuq/Lj8TfwAPLM9hj7wPx2/Ln5cPLg6Qfn2ujj6K3puO3660jkIesAC6kumTtYNLUvNzUNNpQrSSJvJdoqLyF8CATwm+Im4F/nDfeqCMgVZB74I8ckzSGZIEQkjyfWIUgQLfoN7GDpdelu54Pp3fIi+4X7BfhC94j7+wF8BeAEXAI6/of4E/ML7+XsVe4L9Er58/bM7YXm9eRZ5kvq9e9W82fzhPLD8C3uMu6p8mX4jfqo+B32m/Td9Mn38PpE+4f5N/bG8DHsOOsF7KbvQvbU9zb2bAL0HqY2YjwSOXg7P0OjQlc1xCZXHhAWCQYx8kLmEuYu6/jvyfNp+Pr/fgl6EN0Uoxu0JIgo5CFmE80Eb/zp+L/03e646ononeSD39bfCegA82f7HQD+ARoCrgHQAD7/0fyf+GXy8esb5/nk2ubS7EP0DPoU/DH6y/a79Dr1rvYX97b2PvVD8HbpJ+fk7If39AGOCroQBBGfCo4CEPxw9/T2z/rP/ef8mvt6+2j5EfgfA0IftTsARKw4uyqTJcsjIB/8G8UdVxpuCD3wjuAy3oPlvPLrAZALUAtABeP/JwAdCIsTShuqGi0Q9/7H7ijlWeMp53zsyfBW8rbvROyz7JTwtfSs+Ef8OPxs97TyDfKT9HT4L/uZ+kz4SPfx9wr5x/my+mD7Hvos+AD4mvlf/GAADwXiB3YEevuc9Gn1A/zBAlAGbAm7DCwL7wUeA/kBOADr/0YApf3n+Lj2F/jU90z2s/3OEzMvtDp1MEIk3CJyIdUYhxIzFR0YjA0v9fng/Nz24OHjjell9EX8kvop9NXzW/2/CZYSgRcEF+0OJQLG9i/wD++/8S/0T/MK74vpaObl5vTp7+7h8+H2Avgq93v2jfqYArMJMA5zEHIQqQ2UCN0E8AQhBpsFUwPM/wv7S/U+8cXytvgU/lsABgBr/xgBqgSnB1gJzAs4D7IPjgu5BaEAn/3O/NP7CfqF+Cr2JvFI6XznavleGcYrTyTlFvIWWRywGDUSCheIHgUUgviC4JrYftx15JDun/fA+Vv0Qu1o6oTvHPsYBkoLZwrABO78+few+Tj/ygMfBoYFQwFv+/D21fWO9+/48vfH9BDxZvCl9Ev7WAJnChsSOBVeE8sQ1A44DJoKtAosChEH/wCn+n733/bR9lj4Nvw0AVMFqAeDCW0LHgudCNsHWAjlBmcFQAVXBEABgfzG90nzx+x954nnBOmJ6WXsdfI5+WH/KgXZCc4MVg+zE2oY1BjIFAUQmwlpADT51PRW7pTm3+Ix5Z/pWusU7GPwpvb1+6IBDQd4CZgL5w8NE+ISlRPJFkgX9BBmCOwDHAFZ/P75KfwP/Xn60/cp9e/xf/Nh/BQFtwf8B/AJBAu8CDYHmwrXDwwS3hCLDTYIegKC/zwARAJkA4oCGf9g+SP0ZPIt8xT10vku/4n/Lvw6+0D9OP5W/TH9HP0++qr0/+1b6InpqPFb9MHrr+Vy63LwxerB5y/1nAjsDogHVANYCYcQrBEUEVYUFxg4FTILqAHY/3IDIQN2/c/7GQEPA/X8dfeP+Ab8//4lA6AJlxDHE60Q/gpSCFIL9RLZGIsWQg+kCP8AQfca8mL2y/6zA8sCT/1N9zL2T/r1/m8CFwVqBjUGGwSxAEcAfASpB5oFXAH9/cb73PoY+/n7avuD9obwo+4/7m/sFO7Y9Dz77v3f/WP7g/Zd8c/uxu+68d/w/O5R7vXpO+Hk3tzmmfDo+HsCJg3xFJEUJQ9SEDQaqiMBKAsnBSFDGXcS8QuDB+MHUwphCGIARfiP9dX2U/gV+vj85/6V/gz+m/8DA7kHqQxFD3UOaQzzCioKVwoQDAEOoAwHBsj9CflX+Ez5FftJ/In6s/al8qHuK+2k8Hn2Gvse/Sj8w/rG+1L9ofwg/Bj/gAOOBZYEYwG3/Zn7SPov+JH3IfmP+rH77Poj9aPuoOwr7eru3O/m6kLkxONc57Ls2fVg/zYFLwgVCU0IPgq4E8gg2SZ/Ik4buxcwFtkUdBZ8GksbZRYLDsgFOwDM/Sn9h/0b/jf99PlC96X4tvy5//oBygT4BTgEywHj/5D+a/4I/ov7Ivmy+Nn43/cJ9hD1/vWX95T5DP1RAA8AQf3T+2/97ADpA80EHAUpBxcJsgdrBBIC5gAEAID+g/su+MP1wfQQ9mH4T/g+9iD1KPYR+Ob4X/mP+3b8CPnf9MXyUfDk7QPwCPX39Uv0JfbN+UL7Wf3MAx4MYRG+E/UV8BaaFQMVXxb5FmEWjhaJFiETog32Cl0LBwo6BpkDbAI2ABT9AfsN+hD5x/dL98D3Svgp+Xb7Gv75/k3+jv3e+/f45fco+lv8NPxA+3r6kvgJ9jz11fY4+vT+CAOvA20BuP9g/xD+W/xM/gQEYQgZCU4IWQYTA1MBPQKOA8ME9QV6BVMC/P1X+r/4nvhs+Mn3mPaf8+nvBe9E8dryvfIW8wr1/fdi+mr75fxsABwEagWfBLADqAVrDBITkxJqDYgKigomCm8ILAe8CDgL6QnYBfYCigGtAQMEVgaFBj8G/QZbB40GYgX0AysCeADn/lf9tfvV+L301PGD8RXyTPKn8yH3P/pw+7/81v+PAwsGwAZxB3gJlAqbCF8GkgatBwEIjwdDBjEEpgF6/77+EP7o+kH3yvXe9N7y1fFi88H12PYS98/20/XH9QX43vka+aX37fad9Zzz6PJE9GT28fjN/P8AjAMgBSwH8AgmCS8I/wdGCkINew2GC2gK3AmrCD0IowmCCxYNVw4YDtMLdAl1CIsINwnmCVsJzQbiAp3/dv50/jP9b/sN++76L/kH92T2ufeC+i79I/4h/u7+3wBRA4YFnga+BmIG4ARFAmUALwC0AGgAw/4C/HH5Mfj09wD4afjI+G/4gvdB9zX40Phm96j1gvXh9f708fIU8WPw8e/a7svuuPEQ9tT4jfms+jX+AwMoBh8HnAhZDAsQwxChDyQQcBJPE6QR8A/lDysQGQ/MDSkO4Q41DdUJUAeYBecCb/8M/fH8+P3o/Uz8JfvQ+3X97P52ABQCHQMsA6ACZgLmAr4CaQFtAEsAQwBtAM4AYQC3/iL9afwx/BP8EPw4/Kz8cfyt+mL4XPed9yn37PQB8wDzovN58+zy9fLN8wP0y/IL8mnzz/QW9M3y5/KU8130c/Z7+jP/pwJOBKcFlAdpCS4LqA0wELYRuRFuELwO7Q0rDvUOAxAYEW4RHRHqEI4QXg+4DcQLqwkwCC0HYQXGAosA9v6H/S/81vur/Cn9nvwt/MT8yv26/q//ogBzAckB4gB7/wL/Af/p/Vf8yfvw+0/76/lX+QX6cPpE+mL6Q/oD+Vz3ZPYx9hv26fWF9Zr06vIi8ejvUe8b75PvUvBJ8P7vF/Eb8xb0+fPc9NP3JPsa/Rb/wwIeB10KQwx9DVoPvhHXEo8SqBK0EgES9BH0EoATcBMhE/gRHBC3DhMOCQ5ADu4NOQyLCQcHGAWBA6QCaAJjAWD+2/o6+Tr5sPjg92n46vmb+kb6SfrQ+y3+//+5AOEAIQFVAW8Atf6f/U398fxd/Kv7YvtQ+yH6pvev9fL0GfQU8iHwhe8W72rt6uuh6yTsee2J72TxcPMk9hb4VPjB+GP6afvS+mb6pPve/bv/lAFKBAQHQwlWDEMQshOTFpQZpBuTG08avhkCGrwZbxiWFiAUXBG/Dh0MxAmqCLwH1gWaA0cBff4v/Gn7dPtL+xf7h/pj+Xn4u/iP+Zj67/tb/Vj+tf4x/lb9+fxs/QL+F/6r/fD88/uz+pL5ifnO+gb8qvv4+R/4rvYg9QjzVPAT7i3t+Ox07MnrTuu361btJ++K8ArzxPY8+UX5uvh8+QT7oPt6+6H80P9jA2MGXglsDEcPcRLaFQwZRhyJHxohTiCVHsMcDBrnFm0UIBJwDx4Nygq/B+wEPgOHAUT/t/2l/Sb+I/59/dn8b/xH/H78s/xd/Cf86Pys/WP9Qv07/j3/7f7S/ev8WfyN+6b6Kvr7+TH5s/eP9lz2Zfan9lv3xfck9zv2cfUc9IHy7/H+8THxdu/07ZTsMevY6jjsN+737zvxFfJm82L21Pmi++L8ZgB6BVsJEgxfD6QSbxSoFXgXnRnDG/wd5x5mHeUaAhlsF44V6RNYEg8Q6Ax1CR8GrwNdAjEBZv/s/Yb9Rv3G/B/9Nv7i/ur+3v57/ob9r/xy/B/8tvvq+4T8gfz9+5b7TPsK+wz7KPsk+yr7EPsx+mL4TfbE9MHzEvOC8ujxF/F58IzwgvGO8nHy7vC87/HvS/C+77HvvvAy8ZTwg/Du8Un0f/Zs+O/6R/63AWIFkglrDVEQqxLTFPsWSBm9G6gdjB62HicelBzHGqMZkRgrFqIS0g7nCgkHNwT0AjUC2wAX/3f92Ptf+g76KPun/MT9WP4J/in97vyX/SD+Xv7Q/h//Vf7//F78efyW/KT8zvzL/AD8JPrV9wb21PQJ9HLzy/KV8ffvxe7G7uLvOfHg8cvxQPF58OLv3+9c8Drx//Hn8ffwePBg8XDz2fUa+Ff6O/2jANEDvAbnCQENVw9BEc4TjRaIGM4ZrhoSG7UaEhquGW8ZoBgNF/kUlhL6DzMNrQqgCMkGmwSIAiYBIwCY/hf9hvxB/EL7BPqN+aj5svn5+YL6nfoK+v75LftY/FL85/s+/On8Bf2N/P37Xft4+lj5Mvga9/X18vRS9L3zx/It8qXyZ/NL877yhPJz8hbyxPHu8WnymvJL8trx1fFl8jLzA/Qp9f32pPkF/acA0gNEBn8IXwusDlURMxNyFQQYqBlSGg0bBhxvHB4ciBttGnEY4hV/EzIRZQ4sC1EI2wU4A6wAzv6r/dT8DPws+xT61Pjw99z3Tvim+Kv42vhR+fv5xPqO+9P7mPvU+9v8jv0q/TT8OvsC+qH4v/d79+H2qPVf9DTzN/II8uTyAvSB9F708vOW86rzMfSh9BT0+vLl8Qnxj/Ci8AzxbPE+8gH0kfa8+Yr9UQFbBAoHawpfDgYS1hT4FnEYNhm4GY0a/RtrHZodeBwLG3oZFRcxFMIRiA+pDHQJ5QbLBF8CGgD5/oH+VP2J+1r6Mfpt+rz6HPt2+zP7jvpr+uX6Vftx+5P7z/uR+8b6LPri+W/5zPhS+OL3+vaT9Uv0ffMc8y/zk/Mi9Jj0nPQt9MXztfPX8/HzHfQH9Cbz6fEw8eLwQ/Bl72XvnvBb8qn0MvhK/H7/xAG1BHcI7gsPD4oSAxaBGOMZDhtpHFMdWB3WHEQcRxt2GTsXEBWmEs8PEA20CnIIPAZ3BCkD9gG7AKX/of56/a38uPwG/cD8Cfya+4v7hPtI++v6d/rz+T35fPgM+O33gPel9vH1c/WT9FXzhPIe8sHxz/Gh8mbzN/Or8rPy/fLW8r3yM/Of807zvfKI8n7yKPLd8RzyoPIN8+Dz/vVK+c/8FwBPA5UGWwl9C7UNjRBUE1kV0RYaGOgYMhl1GcIZWxklGLsWdRUWFIYS+xCTDw8OUwxwCqUILAdWBi8GOAbaBckEMQOQAW8Al//F/gj+Ov31+x76Wvg39532QfYs9hD2TvXl87DyIfK+8SLxrfB48Bzwg+8374Hv/+9F8GLwaPBS8DXwWfDv8NPxp/I080fzvPL68efx1PJd9Lz1xvYS+Jb6W/6CAlkGrAmEDAoPhBFYFEgXzBlhG+objxuRGmcZThhEF/kVbhSzEtIQsg5FDNUJAAgkB80GgQYMBrEFgwWbBfgFYwaRBnMGNwbYBS4FUQRMA+QB+P/N/df7O/qm+Bj3tfUV9ODxZ+8w7WDr/Onz6DHo1Oe+53/nKedZ5w/o3ei06a3qn+uq7APuye+18dzy2vIv89n1SPqF/aD+nf+7AfoDgQZRCkYOWhADEV8SAhXmFiQXJxdKFx8W9RO1EocSaBG9DiIMaQqkCPQGfAYVB2oHpgfcCAAL6ww9Dm0P3RB/EvkTDhVIFX0UARNTEXIP7QyCCZ8F5gF5/vD6xPbU8avs8+cM5P7ght6k3EjbeNp02jfbAtxm3Prcmd6H4SzlhuhC67ztSvBn8qnzNfav/NQEjQklCi8KMgubDJ4OnREgE0kR+w7tDjcPow4HDuIMTwomBxMFzQRcBYMEPgKxAGQA5wCGAooEkgW0BmgJFg3eEHEUxhY0GKYaKx4rIbwitiIRIbAesxw4G9IYBhRjDdAGYAFz/Cn3NPHS6u/k4+DP3jXd1tpB2L/WttaY157Y/dkI3AzeEeCU4n3laegS6tHp2+u69PD+FgE1/ksAVQZvCTgL6A7fEKUONwzZDNUOHA7TCuUIKQi0BVgDtgPyAy0BZf32+8j9cwBAAhQEOgYdCFoL+RCtFrYZuBp/HVMjSChtKfonmCVYIz4iZSHCHY8W2A7kCSYHrQJA+7nzw+3W6a/oP+iT5jrkV+G+3m7ett+V4Izh0eJA5JXnKuuT6y/spOw/58nmxvavBUkBLfW59BX+ggPnAvUC6wER/bH8yAN+CDMFs/+X/FX7tvvv/Yj/Gf2596H0hPcx/mICuAHrAFMD0QY2C4IT/ho0GtEWEB3eKQQwvCylJwoljyXDKMwoIR93EF0J2AqPCnEDq/jp7w7tkO5f8KvvYOvX5srma+mq6kvrr+ol6HrpaO+T8SvxGPRr9EnthOok+HcJ8AbR9Ijv4/pYAxgEgAK++8Pyl/Me/ZsBqvs29DrySPJ/8g32tvmw9ufuJuxr84f8Qf7I+7D9TQNmB0EM2BOoGDAYYxiTHiIokSwdKN8gyh4eI14njyMOF6MLPAnVC/4KYwM3+drzLfU4+C35OPcB8ybxuvIy87ry3fIS8g7y1vRg9qD1ufeq+m31y+3a9Z0IyQvT+QLvlvfUALAC+QI//rXzvfDJ+bH/bPkS8fnuNe7I7IrvzPOq8iTt2+nt7LHz9vYM9kT3uPrx/MIAQgiND4oSgxGHE40d+SVHJI0eTRwKHrIh7SH2GesNGwcpCPcJqwV+/d/3O/Zx9/v6Df2J+iH45viA+Mf3wfqi/ZT8YPvP+6X7NP3QAEH8uPGd+N0QWhgvBbL39P59B5cIqQi+AvP1+fCp+EL+//c97uDq8+nF5unnz+1T7brmaOWY6jvw8/Sa+Bf6rvsI//ACnwh+EHwUBxFUDigWWyEKIVsX6RFUE48WQBhsEwwH8/w//eUBEgHB+rn0xPG28jD4/P2m/vr79/l0+Vj8ZwGNAz8DUwLc/2X/twLzBOX/UfUn+CMPvxk8B0r6uAPFCnEKqA1nCfz7KPkDAt8DP/qf8YTw0+/y6gnpQ+z3687mruQh6GLuL/Pn9Gj3Wvsr/a//XgcpEPcSag+/DpYXXyByHvsVjhDBEVUWmxV2CwX/Svm9+pL7vvaU8Jrt1+vz62jxd/eW98z0y/Mh9QL6hv/X/wIAUQNgAbr+fASjBHP47/c+DGcZQA/LA+cIyRH+ErUUURffD0AF7QVWCS8DAvtm+IX08+yg6kXvF/BL6XnkR+hE7/HzX/Yg9874Bf54A4MH6wz+EBQQDxCMFm4dUR07GMQSvw52DroPCgv8/8f27/M19LDy/O3T6U3pJOuV7SjwhvJY9D/zQO8r8NP3EfwG/NT9SP3S+tX+sv4+8oPy3guZG5ML7fopBWgWjxiVFy4auRIfCJ4LMhCrB/r9ovvb90PwB+528pry/ekY5dbqjPHV9Z36Qfv1+1QD7QiPC6MTsxrBF9MTUhorJYsmWxykEZ0N0Q1nDcAHi/sf8Dvup/Hj8Z3tOOfU5fTrbu917+fym/Kc7TrvGvUP9ln0OfX19kH1IvGi8azxSO3K930R/BWLAPb5WgyXG3EeeR4uG+0RZQzeELcR/wTa+sD6L/Ru6iDtrfCV6kfmuuhW7V/yPfc7/OsArARQCV4OHRTOG2cfXRyzG6EhcyboIucZGROsDrgJCQY9AV/2uuws7C7uee236+npIesw8Zz26/fb9uP0k/Va+aL49/Is8tn0RfGE66vs2evn5G3saQW8D80AGvkeCAIX/xgdGzofDxp0EBkN8AqMBG7+wPsz97frK+Pv5zvt/+eE5ZfsnfIo9Zj5S/8gBfULIxLwFUMZZh5XIkciJCIrJGAilBnrD44KRgczAy/+ZPUs6ebjm+uo8/jwkuya73b1Ofrg/YD+KP6M/oX8lvli+Yb5Nfhq9Fzts+fW4qrkpPu/EzwJyPTi/jsUXhy7IagjXBlvD2QPwxA/C/D+B/Y49FruwuMW4+npx+jZ45Dnhe4p8Wr2KwAXBe8GyQ2iFG8XxxxII8QiJh++HqYcWRU/DocKvARq+ir01PKq6s7gjuTy7hbwXuu07AX1d/0rAWYBZgEtABj9pvpG+U35nPhL8hTsmuzt6uXpivsgEhIQrgGhBOQUTh+4I5AoJSaDGEAPCxFEDuEB7fcd8w7t7uWl4+Hmsufc4yvlLO0C82D4LwH9Bm0J/Q5fFssaeRzSHDMbxxjlGMwYhxJPCaYD+P7n96bxaO1h6U3mqOb46VftSvAB8/DzavYf/gwEqAFf/Rj8EfpS+HD4WvWw8bTyNPBy6g/yEQeYEpYKRAMtDVAbvR53H+wiQB5rEyUQWg/qBWb6aPXw8kju4emA6nHuTu427eLzc/yV/7oD9wrmDuQROxdpGKUVTxV6E0gObA3DD9QMtgRs/Uj6qPih86Dsr+kh6gzq7OrX7V3w7/Eh8+H0s/j1+xD7cvkn+gP54vWQ82rw6u5U8tDxH+7T+PcM/BBOBZcCbA2IFkgasx7YHvgVYw/dEa0QIwST+Ar3u/b+8APtCfDL9AP3HvqoABgGeQhhDR8VVRitF1EZ6RlxFaoPLwueCPgIYgmFBvcAHfqF9WL1CPVH8gHxovBl7pLswu1m8R71DPcG96b0UvKw85T1W/Sd8+P0DfXC9GT01O+h7RD8HRGhEfABJQBaDeQSjQ5WDbQODQ1FC/cLdwkf/mHzpfWz+rz1g/KJ+ED8C/x6/3cFGwtRD3ETahmaGqMVOBVbFygQBAWpACQAwv9wANH/3vuQ9zP4mfxg+2f0ifOV+PH5n/kT/K79ffuz9anvTO7B79HwdfOD9JPw2vCP9TLx9OrY9oEMShRpDdAKqBGgEkMJMgZ1C6oIIAIzBWUGmPtF8nDzU/aW9ETzP/gt/pv9ePxdA3ULfA5OEn4Y+RtBHIYb8xipEuwI0P/i+vb4jPjA+d35MfcL9MHxrPEA9Yj47fo+APkFpgXVAVL/Bfu68ybw2PPq+Hv4afQD9JX0Ve526gj3Cwh2Cz4INgvBEIIROQ5VDJAMnwikAv4CLwNt+kbzlvaQ+Z/1DvJZ80P31Prc/QkDyAiFDP4O0A+YD0ES2BavFr8QIQobBf8AWv4y/R77Vvd29MryMfBB7WrtjPL4+D77FPuh/bUAqwD2/YL5yvUq93L75Px2/Ij8svh48OXuNPtoCiQNSQcbCOANFg20CKEJIg0fDZgLpwp6BZL7PPYX+FT5d/il+eT8dP+9/q39sQH4Bo8K7g6PEDQOnw7KEN4NLgeJAtACWQXMBCkCmwB1/K71x/Jm84jz5PTN9xz4rfRV8MrvN/RD9zv0pPDp8B/y7/M89wr5nfq//J35BPc/AsMR4xK0CfgFkQYbA+z+vQJvC04NggfmA78Chf3W+GH7LACrAoYEOQagBmoEmAJYByMPmRH2D/4O6w1WC0oHSgIk/v38RP+/AQQBTgDXAsMD9P84/Hz9cwLuBHQBnfv99r/yeO9l77fwku5t6hjp4+hx6Kfqie6k87v4Qvde9a4D2RngHlcTRA44EtUNov9o+mACoAVh/Rr4d/li9RjtyOy39ID8OALoCeYQzQ+fCg0N9xTEFrkT2hVBGy0Z7w4zBoABsfvb9V30CvWa9vv7fAHzAJP93v0hAx4IngapAk8CmP9p+TP4C/gj8bLpvuWo45LmJet/62zuhvIK74LzjwxZIBEbwhDoFNIb6hN/BPUBYggFBGP2Ce8e7eHoAuaS6IbsOvGN+iAEgwd8B0cJeQ49FScZwBmyG9keXx6jGD0PBgUC/UT3FvK/7pLv6/K19aD3sfnm+0j+dAI2CBoL4AhDBigIXAthBxT7ge+v6RHl7eB94qDnWuuJ7uDv/ev37ZYD7B1/IyMbKR6oJhkf7g5JCKYGUwCh+nD4zfEr5zviz+J+4wPmI+46+pkE1gheCvgN1hBzElEXbR1YHrga4hhwFy8O5f8o+Nb3N/h29zn1cfG17+jwg/E387n68wSyCn4KHQjABuUGeAfqBY0Awfmj8tfoDt9r2/TdkuMZ6jXuFe+B9IsHfx/TJtAe0h58JRIgzxPqDr4McAZX/+X59/IZ6Nzfy+DN4zjj8uf99QQCfAUbB4ALDQ9WEdwWjxyuG00YpxnrGskSPARE+4/6xPoS+cT3RPZq9IHzYvFN7pDwsvl1ArwFPwamCPQLhwtsB5cCkvx59PvqGeGD2xbdBeAE4hHmdugQ6G7wWgfbHo8oqSqMLyAu1x+dEigOHQk2AHr6wvga9Nbpn+Bn34XiK+Yp7hL72gaODQkRlxNlFBMUsxVGF4oWYRfFGbIWCg2kAyP+nfoN+HH4pvrZ+S725/R09uH2LPdT+wICKgYwBzAIQwlNCdwIeAZbAEn4JO884xLWFs5f0AzaMuP45GTgSuQV/fYbqCk7KvkvQDfIMLQe1g7FB9gEff/59jnuO+YA36fYBNVb2G7lD/j+By0RCxYFGRYZLxd0FuYXHBoQG6EZyBT3C5gBJvrE9iX29/gR/vn/ovwq+BH2IPbk94/7twHvCZoQPxIJEOkMQAtZC14HCfw78PTnyN2X0c7JfMnMzxDZIt0a3NLmfwTPIaAt4i8hNRo4BS9NH5gTKw0mByn/L/Wd6pnh99nW0pzQE9l+6a/5EAb4DoQUhxZOFqAWnRnDHQ4gWSB5HvMWxQmL/SL2VfGO7wj0+/sjAJr+/Pu2+2L8Mf1IAPsFLQzIETIVVhToEMsN/gpwBv3+lvWf65DfP9HRx4/Io80i0tHX0txZ4ZnwWQ8OK68y0jGFNy05xCqFF90NzwnUAlf62vMd7ZXjydkZ1NnTJ9mY5Tn2QwO9CrIQeRb9GAwY3RclGr8cyx0yG8QS2AYI/af2BvKQ8Rr3VP1q/zv//v+WASYD+wTIBrkINwwXEDYRABC4DrcM0AksB/YBtfYH6ffcg9EYySnJ4dCs2cLgveW35LrkuvamFrIrYC8lNFE9Djk2Jq0VJA1OBQf8ofVo8q3so+KG2aTVdtY63Ijog/dnAxkMyhLmFq0YBBohGuoWqxTVFqgV/wq8/k75Wvdc9D70/vnJ/w4BFAJABhwK5Qs3DgQQQQ7DC2oMswwOCD4D1gOnBXsD8P47+Y3wYuWA2gXU19MM16Taz98S5ffjtN+t528BKhwtJwsr/TUWPKIu6xpFEn0NUQLM+CD3M/RY6gHgx9rk2cXc/+M37in6EAZ3DoIS4RSxFxIYPhNwDgAPdg/SCccCz/79+iT3z/eG/IcA5QKtBKQGkgr9D9wSkRCuDP0LpAwhCYcCGP+O/zMAYwBtAAT/uPwc+UXxZOcx4GPb6teI17zXOdhY3mXlneUM6/cDkiOXM4M0Izi4PF0wKxgoCVEDePkr7Ljm6eqV7BHmluNw617yX/Nc9uH+KAhFDG0LuQumDvUOxAuHCUgKDgwJC/MFxACW/nf+//9SAbD/LAAxCM0QkBHhDs8QKBW9E40LOARmAjwBgfwR+Db4VvsZ/if/AP9B/oj7O/TK6PHdWNYJ0WbOZdGj2sjnbPPP9xn7iwtFJPkt1CacJfQrwyQODxH/tPtF+OXvH+4m9q76uPan8wT0hPBL68btgfUr+s3+UAgEEFQQFw57DO0I7wMDAkADhwPBAvoERAk7C38Mnw79DLUHtwaFDMES4hP3ETARXhCPC48Ci/kr9Y32iPln+gj8UgEVBVMBffmk8oHqnt5F1IPQbdIc2Q7m5vabAm8CXPxG/zANiRSVDpQOvhvcIJsTbAYtBhUIZQDE9wv6iwAy/urzY+yl68vrIurF64Hzvv1UBrAJ/gfxBo8IAAgdBE0CjgXbCY4KeAlWC8kPKxITEJcLqwmKDWITWRSiEIYMNQiJAi79BPpG+Rb7Lf87BPIGZAXlAeL+jvtD95DzAfG97XzoAeO/383f3eJj51ntxPQ9+CLzsu4Y+BkHTQp0COMSRCJaJBoczhe7FiIPFgN2/JX6OfVs7e3ni+On37/e0OBO5TvuVftFBx0OSxExFLQVJhNqDwMOIg5qEO0UvBapFJITOBPsDUwD1fke9iX3VPrw/QkCwAcMDf8NFAzrClsK8wk/CsEJGwc0A6b+/Ph38Xbo4+B83W7dSt6Y4PHlx+uQ7ynzp/e7+ST3IfOv9dMBGg92Fl4djCYpKXwhFxf4DZcDnve37Z3npOOf3+/bWtsw3wrlwOuD9LP+uQdFDuYSGRe/GnocRxycGh4XqRK4DnwKqQTL/0D+D/1M+Sn2IfcO+8wAiwiREOsVGBgWGDcW/xLdDzcNtAlMBWEBcf3P96nxp+x86KPlfeTs48fkbujz7FXwrvJz9Lz2lfjj9uXxmvDy+v0L0RXgF0od2CRZItoTJQR1+3P06edz3N/byuH74wvjZecR8er31/o9AJsJHhKKFncYBBpZGlsXCxFiCS0Dtv7f+T/1WvT993P+yAWqC84PsBOIFkAXfBgwG3wbNReMEaUMxAa2/uL2xvJ+8cnvxe7w8Qf3XPnM+Wz7G/4r/+j8uvmX+Bn3HPKe7Ofp+eiU6H3pcOxe8Gv1kP8ODSUT9A9JDsYPJgs9AJz5Q/rf+u32Y/QD+eT+vv3P+IP4m/vq+qn2VfUs+UL93P1l/Zv/6wJmA2wCRwWyCwoQthF5FZMbdR+DICwhdx8oGEgOWgYbAJj6uvaq9IP06/ao+jD9nP62AQAHNgu0DA0OeQ+GDV0H/v/b+QX1xvBH7SzqY+b14k3hOuB730Dhr+Z47LHuUvDe+K4Gyw/TEkYXwhxTG9ASBQt5BgkBjfm09DP13/ZC9bby2/Gx8ELul+wq7MXsx+/d9bj8EQNeC9EVgx0oIUQjECMyHwsabxZpFOgREw4vCaUChPpL8/3uv+3A7+70l/wwBm8QvRhFHeYegR70GqYTJAu1AyH9Ifda8v7u0uxE66jpzedC5XjhfN0E2/Datd0949nqOfOB+R7+qAfEFd8bexi0GMwc0hccCuMA0//B/tb4D/O58273pvXh7j3r7OsU7KXqSus28Tv7rgQGDPgTGhyRILMglh07GEoTjQ/WChUHWQjAC78KxQUPAdv9wPrZ90r48f0TBvUNRBWiGoccRhxjGW4R7QYG/0n5B/Pw7rrvYfLN8770jPVJ9KTv5+h74lDdYdop29Xf6eY17j3zjPaR/HUFFwv8C7cOFhV7FzATWxDkEskS/wqiAnD/Ifwq89LodONl4oXhpeC84zbsFvZm/oYGxw7uFUcbtx6AH6Id0ho6GHUUXg+gC9IJ5gacAaD8u/r0+l77Sf2uAhUKuRAjFRMXXhcJFr4RbgvKBakBFf63+zT7Mvsw+8X7XPtH+Gf03PCm6zzkH93W2ALYxtls3fLjLuyH8c7ykfagAIoJmAxXEWkbMSL4IDweQx4ZHM4S0wXr+jLykuhv3oHXstYk2rneGeU97ib4egHrCA4O7RJoGJkb+xqkGeoZdRmSFYQQ5Q2TDJQJpQRDAEb+av5K/7wAqwN7CHUNhxDTEa8SKRPWEeYNRwl1BsAEdAGq/RX8jftY+eL1JfMw8Z3u5eqU5xDlv+FS3rDc6dxT3lThT+Z97e70Hfox/4AIpBRJHHMd2h5gIhEgwBV6DAQIcQJ5+NjuiOnm5iTk8eFD49XoW++09Dv6UwD+BEEI9gr9DIwPVxOXFUcVIxXkFbIUVRDaC1YKjAnXBmEF7wcCCyIMmg0fEL4QmA64CzkJkgb8AwUCwwCTAJgBXAJvAtAC9wKZAPr7Q/gL9kvyTu3Z6sHpaOZL4jjfGNzR2b7aZ94L42npLPT9AN8IRA1dFkMgxiA2GhEVdhHMC8AESP+a/Db7Bvl79kf1n/Xv9PbxLO9w7pHuiu+I8ezz0/d7/lUFpAqHEK0XSB34H3QhUSLRIGccMRf0Ej4O3wePAY78T/i39Tz1Nfau+TYA1AZzC80PXxTyFT8THRCYDt0KCgRW/lP6GvS97F7oN+Zn49ng6t+f317fh9+a4K7jLuiR7K3xlfhJ/7QDjgYDCt4NzA+dD5IPORCmDxcNRwprCIkGvwJI/XT4YfXv8SDtYukW6UzrDu678ff32P/TBrgMexI9F5Ya9Ry5HXccOxrmFtERQwzLB1oE7AF+APP/ewAhAsoDsQXuCGAMBw4PDrAN0wx6CgMH6wNEAQ3+U/sg+tz4vvWV8qbxqfDO7K3oLucn5vvjpeIz49XkUecb61/vPvOr9x/8DP6u/vMAjwMcBEMEKAcQDFMO0w0bD1URow92CgUFXQCz+/r1HvAt7U3tFu7b7vTwRPVJ+/EAbQUTCj0PmhMxFrkXsxlEG2sa9BfBFUATvQ8fDGEJgweUBvgF0gTEAx0EngR/A0ICkQL3AvoBlwARAK//Of4o/Nj6jPmV91v2JPb39B7zLPK78DTtZunE5sjkrOMj5EPmAeps7uPy5vfb/BMABQI9BFQGcAboBKUEnwZbCMYIhwlTCxgM9gk7BnoDswC2+2b2gvOq8QbwPPDR8qf2kfv9ACgGBQtYD9cSbhX/FgEY/BisGXEZNRi3FpwVgBNoD+UKTQenA63/rvyo+xv85vyo/c/+IACJAH//q/2X+2H5QPeR9cn0rfVC99/3Pfg0+Yj4QvWt8cTuFOtX57/lFOba5s7oUu3c8iP3k/pT/lABvgGyAJQAoAExAzcFaAdICmwOvRGdEeYPww7TC6EEfvyW9oPx9et76HHp/O1382r5tgBsCM0OlBNvF+4a0R1CH1AfLB+8HoActRj6FBARSQwZB0wCpf5s/HH63vhY+Sr7NPym/EP9iv0L/d379/n49x/2U/Ra893zGfWY9ln4bPk/+Yb4+fbw833wBO4j7I3q7Olm6l3r4+xL7+Px//Pn9iv7Uv8iAxIIvw01EhQVPhciGEwW5xGMDH0GJf+F907xcuwK6ZboG+ve7uLz0fpfApkI5A0aE4MXZxqFHG8eCiDSIG0gVR8AHl4byRaHEVgMRga7/2L6sfZD9BzzFvPs85b1dPel+CH5Ffmz+Ev4Ifhj+Fn5x/o//H793/2h/Gb63/dO9MbvD+yR6Zrn0ObO58Hpc+y877ryxvWR+dT8IP/+AZQFrghkCwAOBRCVEf0SuBLVD7MLRwd/AUH6bfPt7rDsDezJ7cvycPkVAPkGIQ7VE0wXxBnRG4McyRvYGl4aZxk3F9QUJBPlEDkNgAnKBvUDSgD7/BX7lfmk9zT2/PW89b/0IfRs9GD00vMe9J/1LPdp+P75I/yx/dj9OP15/Mv6vfeO9M7xsu7U6xDqqei453zoVeq07E7wzvSs+Q3/8APeB/oL4w8pEikThBP3EjIRNA77CX4FQAGl/Pn3CvXw8/TzlfXc+Pb83gEdB88LQhB1FGMXbRkpG7kb6RrSGR8YYRXAEnAQDA08CX0GGQTKAIP9qfvJ+kT5Jvf39QX2bvXG81Hzt/Sk9b711/bZ+Cr6nfq4+r36SfrY+M/29fQZ8z7x0++j7tvtEe647oPvyPBU8r3zHfVU9uL3Xvp5/R4BoAVwCsYOZhLJFA0VnxP6ELcMGAeoAdv8q/ja9S/1Y/bg+EP8nwBwBZ0JTw0wEScUwBUzF0AYZRfRFbIU1xKcD8EMFAtWCbIGagTAAzgDQwEs/zH+P/1v+3n5rPc19kX1E/R48ufxLPIT8vnxlfKn8xT1nvXC9ID0LfVh9NfyEfMN9E30HvVn9qf2pfZz9773Mfeo9kn2wPaa+HD6RvwAAPgEEAklDGMOsg89EEwPSgyGCCoFVQLe/4H9+/so/dT/WwFEA8QHUAytDtYQDRRBFo4WRRY6FnYVpBNJEakOygv3CBMGFQOUABf/BP7F/JH78/p8+tP5uvhn97H2y/ar9in2TfYW93f3S/fJ9uj1j/Si8ozw4e5V7RfsMuyS7QzvJ/F29JL39/m1/D//vAD6ASkDVAPbAqcCjQKVAcP/a/5N/gP+Jf2k/bL/DQEPAioEAwaOBj0HCQj3B70HBggzCEMIoQiSCfcKfQzjDagPwBE6E5kThxMmE8kRIA/hC8UIqQVxAor//vzp+r75FPm791L2D/Ye9jD1KfQV9If02vRF9Ur2DPi6+dv65/u7/KP84/ud+nj46fXY887xrO+g7v3u5u9h8cXzvPYX+m79CQBxAgkFiwa4BtMGWAacBN0CbwES/8D8Gfw9/Gj84v2hAKwDJgfRCvEN3xAbE7ETbhPoEgkRUg4oDAoKrgdDBokF5gQSBfUFYQaNBr4GVwZXBRoEPAIhAKb+M/0m+6r5MPmI+ID3LveA96z3svcT+Lz4T/mv+RH6b/qL+lb6CPqA+Vj4ufYv9XbzSfFk77bu9+6n71rxm/RW+KP73f4eAoUEzwVYBvUFigS8AsYAjP50/Dr70frj+rr7uv1IAN8CvwUwCZQMMQ8BEYsSkRMzE8ARfBA1DwwNwgp9CX0IEgfsBZYFcwUiBc0E1QTnBD4EBwMdAg4BJ/9e/Zz8vftO+qb5Hvpo+iP6Mfrq+nr7WPtK+/D7Svzc+5T7ivub+tz4ZPcC9gv0K/Ic8YLw1++H7zDwt/Fw81/1I/iU+5X+MQH0AywG8AbsBs4G8AWyAzEBh/8w/lv85frT+qz7m/wn/vQAPQQoB1wK+w2HEM4R8xKzE8oS8RCRDxAOpAsZCTgHiAXgA3ACaAHnAM8AnQCTABQBYwEEAboAeQCJ/zf+Of0R/J36jfkV+a74afiq+HP5Lvpe+on6Fvsc+y/6S/nR+Dn4WffO9pv2mfbE9un2JvfH95H4P/kz+l/7l/wn/un/OQFSAm8DJwQJBHgDvQLGAWwA+P77/YH9Lv1g/Y7+PwAHAjkEvgYWCTMLNg3FDtYPkhC4EEAQbw8XDh0MIApGCPsFhAPDAaEASP/v/VP9Nv3d/EX8APwq/Bz83Pv1+1L8qPwW/YX9qP2r/b79Xv2H/Nb7R/tp+o35BPlw+O/33vfa9573n/cV+ED4GPhc+Cn5tPkB+vD6Yfx4/WH+t///ANMBcgLbAvgC7gKqAh0CogEiAWsA5/+8/53/4/+5AMUB9AK0BLEGcQggCv0Lpw2cDgEPKQ/mDs0N7gvgCcQHQgWQAj8AY/7t/OT7UPs0+4v7MPwt/U/+Lf/v/+gAfAEiAZMATwCG/wf+pvyY+z/60Pi599X2Jvb39RP2YvY092v4pPnf+gL89vzN/XX+j/4l/rX9TP2t/Pz7tPv7+3z8L/09/oP/iAB6AV8C4gL2AgkDMAMQA7kCowLPAvACHwOqA2EEFwXgBfEGEQjnCKwJowpSC08L/gqdCr8JSwitBgIFBQPnAAH/W/3T+5v67PnL+S36F/tm/PX9jf8FARoCrAKvAi0CPQHw/1r+qfz7+kn5kPf69cv07/N087LzmvTa9Xv3qfnF+1j9qf7w/7QAyADAAL0AYgDX/4P/NP+x/m7+g/63/vL+Xf8IAN4AewHMASgCnwLlAu4CGgOJAw0EigQlBeYFrAaSB5QIXAmyCfEJHArCCbkIfwdOBvkEbAPfAaQAp/++/gj+qP2V/Zv9wv0X/qP+RP/q/4wAJQGEAX0BJwGiAK//T/7d/Hv7Avqj+Lz3Offy9gn3h/dR+CP54Pmq+lH7xvsN/Ev8fvyq/N38I/1s/db9X/7Q/hz/YP+y/+v/DwAfAC8AYgC6APMAEwFlAf4BdAKrAgIDqQNTBKME3wRbBfEFXgagBvUGTgd+B18H7gYyBkoFNQT6AtABzgAEAJD/Xv89/zL/WP+A/33/Y/9d/2z/fv95/43/9f9dAIAAmAC9AIsA4/8E/+n9ifwS+5/5WPhZ9672ePbW9oz3b/jJ+Wj7w/zY/d3+m//n/wAA/f+4/1L/D//o/rv+l/6n/vb+Uv+1/zUA3wCVAUMC9AKZAzcE5wRsBZwFtQXJBZMFEgWkBGQEDASHAzYDJgMLA7cCgAKTApQCWgIjAjICPAIVAt8BtgFzAfoAZwDQ/zz/tf5F/uD9jf1j/YH9n/2f/bn9Df5d/n7+gv5u/jH+u/0C/Q38AfsQ+kb5x/ih+Or4hflt+on7tPzO/a3+aP/7/zAA+P+f/yv/kv7u/W39S/16/dz9gf59/6wA3QH/AhAE/QSuBRsGSwZCBgoGsQU9BbsEJgSRAxEDpAIwAscBjwF+AXwBjQHFATICigKjArACowJHArQBDgFZAL//Q//d/o/+dP5y/mr+av55/qD+yP7T/rL+af7z/UP9evy7+zX74fq9+uf6S/vT+2T89fyP/Sf+wP5K/8L/MACSAMYAxwCVADQAp//7/lH+y/15/VP9av2t/Rz+wf6c/5YAoAHLAvoD8wSbBf0FEwbNBR8FTwSEA7YC/QGDAUkBSwF3AbkB/gFGAoICngKXAl0C/wGDAdgAFABf/7j+If69/aP9sP3J/Q3+fP7O/vT+DP8T//X+uP5w/in+y/1o/Rj9zfyU/Gz8W/xK/ED8Uvxb/Fr8kPzo/Hv9I/7w/tb/uQBlAdIBFQIVAt8BbwHVAD8AtP8j/6r+eP64/kX/AgD9ADACYANWBAQFegWXBVQFwATsAwYDGwI7AX0A9f+g/4X/kv/F/xoAhQD4AG4B0gEEAv4BzwF0AccA3f/6/kP+tv07/fP88fwo/WT9g/2n/e39Sv6Y/sX+4f4T/z3/G//F/nT+VP5L/kT+Tf55/sr+Hf9M/2b/k//F/9//2P/S//X/QQCpABQBcQHOARgCKAICAskBnwGaAZoBoAGwAckB6wHwAd4B0wHZAcoBmQFLAfsApAA0ALj/W/83/zj/RP9a/4D/xf8DABkAGQAkACcA+v+a/zP/2f5o/uD9Yf0W/Q39KP1U/af9NP7l/nP/u//0/zYAUQAqAOn/y/+6/5b/b/9U/0r/UP9Z/1X/Sv9U/2b/Zf9w/6b/9v9NAK0AAQExAT0BOQEdAe4AxgC0AMEA2wDqAPkAJwFNAVIBSQFHAS0B8ACiAEYA6f+c/1//Kv8G///+BP8U/zH/cv/B/w8AYQCuAOYACgESARMB/wDeALwAlgBuAEAAEQAGAAsABwAEAAkADgAFAOz/0f/N/+f/EwBTAKwAHwF5AZkBjAFsATEB2AB6AEgANwAwACsAGQDx/7z/cv///mX+zv04/ZX87ftU+9L6d/pQ+kL6Svqf+jD70ft9/Gb9cf5//4cAlQGaApwDmARxBQIGZgaoBqsGUgawBe0EGgQ2AzcCPQF2AM7/I/+i/mH+Tv5S/lL+a/6r/vv+P/+m/y4A4gCsAWgCAgODA80D2AOYA+8C6QHsANr/cf7t/MP7tPqI+Xj4zvdb9+72vvb89nv3S/iM+SP7vvx4/o0AawLuA0YFfwZSB60H1wetByEHcQauBZIEcQN6AnYBgADW/0P/5/7V/sr+of6s/gj/Qv9T/6H/EABoAMgAYAH+Ac8CvwOTBEIF4gUoBukFMQXvA0oCpwDe/sP83Ppw+Qj4UPbJ9P3zefMb83rzzvSA9p34S/sA/qAAtQPmBlsJNQu7DFQN5QzAC+sJfQcOBccCawBR/g79HPwg+4/6evpo+mL6svpV+/z70/z7/SH/LwBtAbcCuAOVBI0FVgbaBkwHpQefB2gH+gYABqkEYgPEAY//Tf1A+9r4SPYD9MLxiu8h7q/tuu2x7mfxFfWj+H/8IwGcBTkJXwwbD5AQ2hBxEBEPhgzNCR0HDwQrATX/nf01/I/7ZvsB+7z60/rW+vT6ovuD/Gb9p/4aADIBUAK9A/sE+AUuB5gI0wmxCk8LkQsqCxUKbAg4BsADLgFV/kT7h/gf9p/zNvGH717uhu097ertj+8Q8sT0tPdW+3L/WAP5BnUKXg00D+APbA8WDgIMZQmmBiEEiQEX/0b9y/tF+lb5Kvko+WH5NfpA+zz8c/3H/tr/twB0AVACTgNKBFIFtQY5CHIJdgpdC68LUQufCnoJngdbBewCKwAC/aL5DPaV8o7v1+x/6l/ph+l26qnsbvCj9B35bP6/A/EHugskDwgRiBGAEWkQ9Q08C7EIygXPAk0ANf5S/O/6Dfqh+af58/l5+jr75/uN/H79dP4q//T/zwCIAXECmAOLBJkFJweNCFkJKQrnCuYKVApxCcIHhgV4AycByf1n+qD3U/SN8JTtQ+ug6cvpiuuS7fXwTPau+xwA3wStCSwNeA/bEO4Q4g8WDpoLtgjgBRYDiwCp/gz90ftD+xT76/om+9j7NvxJ/Jv8Av0X/VT9zf1E/hj/iQDsAUADEwUwB+AIMApoC04MrQx+DK4LYQq1CFwGMgOq/xP8Xvh29Nbw2O2u6yXqKekQ6WHqseyC7wXzf/dR/PAASAVkCf0Mpw8lEaMRQxH3D8gNLAteCHQFfQLR/3b9ifsf+j/50vjh+FP58fmc+lX7Hfzy/LD9if6w//4AbQIlBOwFqAeNCYQLBg0aDtEO3Q4NDmAM1gnXBqsDHQAm/GP4+PTe8UPvI+1D65LqfOu57NztpPDi9KT42Puj/7EDCAd0CRsLCQyKDE0M+gpLCfcHZgZRBHgCMgHB/1P+aP2w/Of7uPv2++n76vtg/I78WvyM/AP9Yv04/qD/HAH/An8FyAe+CQMMDg7xDjMPIA8cDiwM0AnMBmMDNgDp/O34T/Ws8hDwRO1169jqDOs27I/ulPF99Tr6h/4DAtIFhwmRC2sMVQ2EDVMMowoPCSAH9gTSArEAvf5R/Rz82frX+Wn5Rfkf+QD5PvnU+XP6HvsP/CH9b/4fAOcBigNfBWQHJwmgCvgLDw3ODe4NMg3KC/0JlwesBIUBP/68+lz3RvQ08ULuZ+wS7LbszO0F8LTzD/gI/JT/MQMABzsKAwytDEQNYw02DBwKQgh/BnYEiQLMAPr+x/07/VP8Vvtr+9H7dfsL+1f7oPuV+7P7GPzk/DH+if+oABQC6QOCBboG3AcCCeIJWgpyCkkKwAnGCEEHOwXfAnoA7f35+v73yvXb823xXO+27qnu2u5I8AjzCPZY+TD9IAHCBPYHOgprC/8LBwztCggJGgdkBZwD4AGhAA0Aqf8u/8P+uf7n/rr++v0e/aX8VvzN+2D7jftn/Jr97f5qAGMCgQQuBqIHQAmTCjELVQsxC4AKNwlMB8UE2QHx/gf89/j99fLzpfIo8abvTe8A8BDxj/LV9LH3CPtc/hMBawMgBlEIDwksCYcJRAn9B3MGGwWwAzECzAC+/xf/tf5k/ir+Of6k/uz+8/7i/qz+PP77/bn9WP1h/ff9rf60/zoB5QKaBNEGCQmfCiQMww1aDsUN3wySC0kJmQbHA5gAPf07+mb3n/QX8jbwK+8O73fvG/BE8d7zC/fs+C/6/PwwALQBygLfBJwGUgezB6QHJAcPB5UG5QR1AzsD2wKcAUgAlv/y/uT9wfwC/Gj78fq5+qH6evq4+ln7+vu3/Az+7f9SAu8ETQdpCb0Lww1+DikOtQ3bDAgLugiLBjcEJwLVABb/ZPya+q75VPe481fxxfAI8T3xWfGd8gn26/jh+An5MPzo/m3/3gD+A2gG7Ac1CdQJxAmVCYYIQgaDBPID1QK0AFX/Iv99/hv9/PuD+yX7wfpg+kj6kPor+/n74/zk/Tr/1QCOAhIEYQWtBvYH6AiCCcsJGAqDCqsKVAoACp0JhgjcBrAEfQEi/pn7/PgH9kT0rvMW8/7xtvB077buke7x7i3wKPOe9zb8YACVBGUI+wo5DLkMPA3MDYgNwQxhDIgLNQktBtcCXP8i/Gf5KPf59Zb1afWf9Z72wvea+Bz6qfzZ/l4AGQLHA54EIAVsBZMFYwaVBxsIrwhQCv0LOgykC4ELIwsXCXsGVQSmAWz+rPuE+Ov09/KB8ifxnu/z7zPxOfHI8Ebxc/Lf81v2B/p1/nsDPQhgC3YNrQ+UEPIODQ3CDIEMpgqKCAIHtwS4APb7w/ey9H/ySPFU8dnyhPU4+Ln5zfpq/Mb9Pv7q/oMA4AJVBT0HrAhrCvYLIQy2C3UMYA32DOMLEwswCpMIogV7AgwBbgBF/n77uvkK+DH1xfG07v7s1ey87Y3vevI/9S/34/jD+gj9RwDZA/gGZgoBDosPvg4/DZsLSwmHBq0DYAHW//b9+fq4+D742/fo9gX3Gfgd+Vr6Afxx/R7/ZwGdA00FyQarB+cH8gdcB+oFGAVVBR8FEgTdA4sEcgReA2MCSgH//z7/rv7h/Rr+Tf+c/zP/p//i/9L+5v1//VP8OvsK+376cfnK+S/72vsC/M/8G/45/2T/4f4Q/z4A4AB0AEUA1ACqAFr/Df5T/Zb8JvxK/FD8B/zL/Kb+2P8dACYBuwJ1A6IDXgR1BWUGrwYSBisFxgT2AzwCvQAIAFb/yf7A/rn+of50/7YAPgHnAaoD/wQhBTgFmQXABecFzgXvBAQEzgPoAooAHv7o/O/7Sfqa+B34pfgl+Vj5BPoS+7T7yvvX++z7NPz0/Lb9jP3U/MH8Vv0h/UD8EPzr/Kv9sv2q/cz+xwAwAtsC6wMcBU8FqAQuBNwDcgP9AoUCEwL6AawB2wAvAOr/hf8L/5D+Sf60/vD/SQGWAv4DTAVJBgEHJAcUB2QHbwc3Bl8EvAJQAfv/9f5G/t39ef0A/aX8hvwn/AH8yPzZ/Qn+6/1A/j7+A/2l+0D7Tvvt+kv6Sfod+5L79vpO+ov67Prs+l/74fyB/tT/LwGLAnQDBAROBIsEGAWkBVAFZgSHA0MCSgCe/rz9MP2R/Eb8UvyP/Bj9R/7T/44BVgPLBLUFawYrB8QH8wfdB4cH1gbvBfEE2gPxAowCawLMAfAAlABrAKL/0f7//qj/Iv+J/ab8xfwo/Mn6T/re+vT6Zvru+aL5G/lz+Lz3ZPfu90L5hPpn+038qf0s/1EALwFkAqwDiwQzBeYFxwWqBEQDzgEgAGz+zfzt+1P8Rv20/Sr+V/90AOQAQQGlAR8CIwNuBCYFuwWNBtwGYAYBBv0FxQUeBZoErgQRBcAEFAQgBEoEVgP8ATcBegBP/5b+h/5t/h/+4/1Q/VX8U/t++nf5XPiK9xD3nfaF9vz2t/db+D/5TfpB+2/8/v1P/08AXAFLAtMCDAPTAnECKgLcARYB8v/u/lP+8P3E/dL9Hv5H/lb+l/73/hv/nP/2AJMC3QMXBRkGkgbnBlMHegeZB9AHige8BjQG5gVmBfwE/QTbBJMEiAQzBBsDLwLEAQ4BBgBa/6H+lf2U/Jn7RPpE+bf4yffm9kD3Lfif+Nj4UvmU+dX5dfrz+ir7/PsU/ZD9B/4q////OwCxAG4BswGrAZYBUgHVAH4AGgCS/xj/uv52/nH+g/6p/kL/TAAUAYsBCQKZAhYDkAP8A40EVAUZBn0GowbaBkcHrgfiB+UHEAglCKsHowaZBaoEiwMTAr4A8v9N/zz+Cv0g/Fj7V/pa+Wj4dvfO9rT2hfYr9lT2F/eO98P3kPjz+TH7cfzw/WH/gwCqAbACVwO8A0UEoQSYBDgEswMMA1gClAG/AOf/I/9j/pz96/x3/DL8Mfxy/Pf8uv2q/nr/TABMAZQC5QMwBY8GHgicCbYKTgugC8ELawtsCiUJ6ge4BkkFswMnAuIAuf9G/nf83/qf+V74BvcG9nb1IvX/9Af1QPUp9oX3lvhW+Xj64fvw/Nf99f5SAKwBpAIIAysDXANWAxAD0wLvAhkD7AJZArgBHAFRAA//l/1x/Lv7Mvu3+nD6w/qU+4f8av2E/vj/nwEXA24E5wWYByQJPQoBC6ILAwwPDLEL/goUCggJrAcSBm4E7QJwAfL/j/5W/Sn8APvU+cD4wfff9jz25/Wt9Xb1ifVd9rf3+fjt+QH7a/zY/e/+zP/IAPgB6AJQA2YDkwOqA2kD5QJ+AkkC6AEMAfb/A/8k/vn8k/tu+uz5xvm6+cj5R/os+xT83PzE/R7/zgB1AhoE5wXMB2cJeQokC5kLzwuqCyoLhwrhCSIJEgjABjwFqQP5ATMAd/7t/If7N/r5+Nf35/Yr9qH1ZPV79eb1l/am9xL5pfr8+/787P33/un/sgB1AVYCMgO4A88DzAPkA/QDwANtAzED/gJmAjkBz/+K/nH9WPxU+6j6b/pt+lT6Y/rY+s37Af1E/rT/eAFmAyAFegaMB4IIVwnrCToKXQpWCgYKUglTCEgHHAbZBHIDAgKcADT/sv0f/Jj6Qfkh+D73h/b19Zj1e/WG9dD1g/ah9/L4WPqt++b8J/6F/84ABQJUA50EjwUIBgYGtwVQBc8EDgQcAxkC/ACn/zH+sPx4+6z6N/r9+Qv6UvrP+mn7H/wJ/Sz+j/8kAb0CQgSeBb8GkgcoCKoIKAlrCX0JTgnSCP0H0gZmBeoDkwIzAZf/7f1H/Jn6CPm797b2L/Yu9l/2hfYB9wn4D/k8+i/8pP4SASkDfATcBDQF2QVGBnYG8gYhB0wGzQTvAqkApf5f/YD8zft9+x77KPoS+aP4p/gt+Vn68/t3/bv++f8cAWAC/APQBZwHIgkUCjwKvQkECQoI+wbyBcsEBwPUAHf+9fuV+Yz3+vXW9A30RfOl8k3y8fL49Mj3sfo8/jkC/wRYBpQHrAlwDDgPjRBGELMPVw5MC/oGiQNiAbr+ZftU+GT1vfI88W7wvO908MnylPWF+C/7E/1F/7YCJwbMCGoLyw3EDs0Oaw4IDT0L3gn3BxMFcgIoANb8QfnU9s309/Ip8qbxvvDW8Jjxw/FG8hD0IvYc+Bb7bP5GAMQDrAq1Dg4P2hEWFjoWFxQdE+kQjg2WC7sIxAKS/g7+8Pqb9Yv0P/Uq8w/y8/O29ED1GfgP+kL6Pvyg/3sBNAMzBk8IzAhHCYEJdQiQBywHwwXoA74COwHR/tH8u/ud+qT5hvm0+Wn5XPmV+Qv5P/hY+Av5o/l4+QP5k/ib9/H2M/jN/M4DnAg6COIFowUqBpMFJAbbCQ0NTAvzBSUC5AB+/3z9mP1b/+L+qvuP+Hv2nva9+cb8D/5gAJkDhgTOBHMHXgqUC5YMQg2zC50JvAhNB+kFDgYLBcYBAv/t/Ov5m/gr+tr75/xq/v3+MP5H/gT/gP6s/koAev/p+wP5gfYZ9LXyzPCF7vbta+ze6EjqO/SZ/SkAngKzCOALYwqbCtAO2RM2FoQTKg6BCsYHPwKh+935GfzS+iL1HfGC8TH0SPZU9+T65gEGB2IIBAoLDUUP3g8iD48N/wy0DP0JJgZwBDcD5f/+/ND8IP0C/dD92/5//1QB4wOXBUQHxwgDCIIF6AJRALD97Pov9wLzPu9x617n/eNo4mHjGeXu5kPuhfvMAwwEJgd/Du8PXA3FD+8UHRbSEgEMFQSG/kr6XvTh7xHxOfQ182zve+/K9OL4vvjd+rMDNAzkDpIQvROdFLMSNhDmDdkMnAztCe0ECAFP/6f+5f4lAFMCQQT6BLEF4QdcChwMiA0TDq0MwwmDBeMA3P6Y/hD7xfQm8Brscebz4/LlXefZ5zDqdOya7drvZfZpBGUTWhfEFLYUExJhChoGKQYGBd4BcPw985brW+lr6OzoiO6B9Mv1Q/bi+O/7BwBfBb4JiQ5PE4kTDhHsDxQO6AkpBmwE/wN0A3ICiwFBAZUBeAIuBHoHlQv8DbENmw1AD50PIw3cCyoMXAkoBKkA5/3c+pf4vPTL7izrc+kN50/nputZ7k7vL/Mx9lH1k/jBBDUR1hTmERgPmwweBin+1fxmAEn+vvX87o/s3evj64nsJvDN91T92/vM+QL+ZgMHBPUDYwfjCw0N8wpZCbIJGAm2BeoBCwEHA/8EBARjAQ8B6QL3A9cFQQvsEN4SWhLbEAUPMQ5wDTILlQmcCK0Ebv4B+hj44PaU9dX0UPX69T30VvGo8qH4g/w9/G79ff/E+5b1jvdqAt4KTAlQBDcD7v+k9rHv1vEn+GT5TvQ/8UPzLPR98qHzf/m4/+QAsv2+/PAAxATlAu3/+AKyB5YG1gK6AskEEwXZAvz/UQCmBN4GnwSkAw8GGghbCLoIMQtwDw0SHhCWDJALZwztC7IJsAfHBusEswDv/Dn8tfy3/GL9TP6C/Rj8pvrq91n2g/ne++r4a/cK+S70M+sz7bL61QSPBKABFgOXA8z78/Ei89/78vzX9Yzznvfc+OL1yPT5+K//pQGT/YH9uQRLBzcAVPt0/+sDQwI7AKECuAX0BUMDzwAiA7MIUgpICGoIDQr5CHAGKwcqCysOuA60Db4L+wn6CPcH0QdXCMcHDQeaBusDkAAhADsANf5T/eP9J/vP9e/y5vGy8evyc/N78/vzWfBd7Nr0+QQTC78FQwMQBY4AHvfn8774jfyQ+Zb0zvRj92n3FfdV+cP9MgLlACz6Qvlw/mP9UviA+q8AvQK7AoAD/gMuBdwGdAZEB1wM+g/FDQcKqAeGBDcC5wRPCoANzQ0gDVsLQggtBzoK4Q19D84OHAvrBfoCWgHy/Wj7T/xQ+zz2QfN58n3vMO8U9Fv1JfMI9gD4x/Hp7Jf0KQP3C/sKPQeABYoBn/oM91z59f3I/tr5AvWN9Nn0+fM59ar55P3J/hD8Avkq+kr9rPwa/AABYwW7BCIFXAcdBgME7QQYBUoFkQliDDMJNgZQBjIFnQRlCdgOeQ/VDnEP4Q1rC3MLTAwrDRwPgA61CM0BT/7p/Of5JfZa9S32JfXw8ojwaO6E8Kb0C/TX9Of6QfhH7nTyCQOfCWIEngQXCj0HG/5n+hz95ACLAJD5tvTF9zb3U+6S6xj0x/qL+cb3w/rI/v3+k/yd/ToEBgrIB/MB4AL8BoMEG/+p/8MF4AvQDHIIYwWtBmQHPQaFCXsSIBj5FDAOUQrOCRwKRQpxC4AN5wxYBkf9Q/jq9hz1E/Ve+Jb5Ivff887vI+658un2EPaY9zb8X/mz8GfztwPhDE4GqgL5CB4JCf9p+Wj9dQKlAAn5m/Ol9Ab25fCF68TwE/u4/VT6Afs4/yQB5v+o/VL/cQVeBwoDKAK9BjcI1gRTA30GKgv4DdwMlQjTBXMHbgmVCLoJoQ84EwcQzQsuCy0L4glFCUgJyAeiBKn/W/kz9fn00PUQ9uj3mfry+cD1HfKG8tf1Lffk9RX3Afh98crrivVKBjAL3AYOCT8Plwpi/UP57/9EAhz7WfSQ86b0QfIG7VbsHfTf/KL9vPrg/R0Cpv4J+Tf78gK8BhQF/gQ/CBMKYQhUBosHJQt1DUENKws4CAcGWgUXBhkJOw4OEp0Rmg6xCz4J6wfmCCwK+wiVBR8BFPwg+K32vPZC9/X5A/6N/Q34IfSJ8wzzJ/IF8YTvGvAm8PLrBu9FAYcPGAplA9cKSA7OAa/5HQGeB/cBIveo7zLvEvL572fsx/Eb/D3/Kfrx9mb7iQEAAZr9UAJ/C0YMRwYlBQUJgwnVBWgEnAeeC0cMjgmBBp0FagbXBsQHOQwDEh4Tag5TCYMHEAhFCSEKLwkrBsYBZvzF+BH6b/1v/n/+8f4i/Uv5RPVD8FTr3urX7L3rGetZ7+rwJu9E+P8L4RUkDssG4QliCkEBtvsFATkEmfv07uboBeun72XxgPMw+7ECTgHS+b339v2UAvr/b/4VBD0JVwaJAZkDKgkfCpsGFAWXCM4N5w45C6kHegd/CIgHwwYmCoUPchBbC1cG2wUlB+oGAAeRCAoJtAZsAgv+c/u6+vn5ufhF+Iv37fNi7vjqLOsP7NfrOu4I81TyR+6F9ssJwBHOCZwGNQsnCEEA6wBfBrYGQQDK9WHtRO2C8ZHyuvOV+g4ALPz886nyU/mn/jX/5wFdCDMLSQjvBQsIVAsrC5UIWAg5C4INYg0zCzcHAwPOAWEEPwj2C6IPGhGZDXoHfATBBSoIFwqKC58KIAYYAAz7jvfe9gv6r/y4+W71//KB7WLnSOiI64fsdfD/9Jvwn+yl+ZgLZwycAj8CWAnPCaIDsQIDCvgNJgTw8tDrT/Dj8Rbtee5a+YP/S/nb8CDyh/ra/2n/VQAuCOIPFA2sBHcEPwvnDHkIBwfnCkUPHg/kCDYCewLRBjEIrwk2D5gScBC/DJ4JVQgPC5sNcwvaCOQHnwME/R76V/q1+az5f/ve+qX1i++964frOO1r7DjrE+/u8QLsQule+B4KfQjF/2YF3g7DCosDJAmHEogPSgEO80ntyvDs8pruq+/y+HT6zu/B6L/vTfyBAT//+AExC00NWQZ1A1sI5wuOChMJwwmgDF0PgA04CFQFGQVcBCEFrQpJEoEVYxGtChcHxwaOCLUMnxAsEQQOdQcU/9H4zfbA92f5gPow+rz2c/Aa6hbnM+pd7w/wOe3h7ATrBufd73oFGBBjCNEDqQlNC8IF/wRBDLMQWAmm+q/w4+7f737vyO/P8wj3vvLQ6zbuMfl5AB8BTQMtCEAJQQbNAwkFbApyDgkMLglODAcRNxBEC04I+wiICqULDw1nD5ERohCyC5UGUAWxB8MKOwzCC9MJbwUM/7v68/lf+ub7WP5x/u36TfXs7rHqQ+sj7ZvrF+sV7TnpvOJo6Zz6UgR0BCsH6QwaDREJZAgQC8oMLgkm/+r1PfNu8W7tSO6y9FH3h/P+7w/xHPXk+AL7t/0rAmAE/wHy/0sCggbJCUANZRFgFf0XwRbHEWUNwwv+CzwOjBH6EYYOagpNB3UElQO/BdYH2AeABqcDaACW/+YACgJJAkIBJP+D/Sn80/iw9DryU/CR7ILo8+eO6aXmOuIt6Ij3nwHI/z79lATgDMUJkASuCsgSMA4RAs38K/64/D33PPT/9Tn4avVl7oPrdPBZ9FDxmu+u9dD7n/yi/s4FhwyoDswNlg35EeoZZB6jHGEYLRTcDygM/AmeCVYKSgr+B60DG/+H/QwAnAO+BcUHkwmWCOgF5wMrArQBrAJtACz6XfWI8RXsg+nl6rDpYueX6Vjrc+fH5HLtd/1/BhYGSgigDsUMBAQZAvsHWQv4CPEDb//g/rz/Qfrn8xH2bfiF8hHtYO7g8AHxI/CX7+bzv/w1AWUAFAOgCl8PYA9QETUYcR6xH4IcOBapEFUPcw9lDTgMmA1kDIkF7f09+9H9bgIiBnsIEgo3CacEAwDK/iv/YP8S/1L8Iffl8UTscueD5vXnxekv7THux+gD5tTvtPyD/6T+xwTaCtQHlAImBFAK1A1NC+wGSQbDBp0BKPpn+Ev6RfmP9ejx5e8p7xztB+qx6rDv8vTT+HP9KgRFChMNRA/7E/YYKxshGkEWyxJKEscRhA85DwEQMw02CSAIfQayAtgCiQcrCrEKHAxZC7MFI//K+8X6APqJ+sL7EflU8gzstuZz4vviv+db7FLwMvJv8I/xTPlq/pr86vweA5EGdwTdA18GegcJB+gGogbWBf4FIwYiA4v97Pno96jzfu+m72nwe+027CvxTfaV+Dj9qAOuBjQIYAu1DnIRChQtFRkU1BGDEBwSjBUYF04V/BD+C0wJ+wi0CD0KJA4uDpQHXAD3++H4i/nt/Wj+qvla9T3xOetV6O7q/+3H7uPvxfD97f3pBexR9GT7m/w3+/n7W/50//n/kwPzCfIMrQkZBW0E+gVhBYgD0ANJBI4Bpfx1+AP2OPUP9WP02vMj9WP28/Qr9Er4Wf2t/kAA0wUyCpYKkwukDjsR+hNcF3cYxBYRFUIUihPoEowS1xHMD4sMWgm7BQQBFf6s/W37Vfck9VT0lPLZ76Hsoepn61vt7+4e8ZfzdfXn9ln3Z/hQ/XwA2vvq+Pj+zALP/sH8MAC4AuUAVfz4+sIAZQcMB+wD1wOaA3oAZvwG+Yj4MvpN+dD12PP08vDxrPLE9Aj34/uLAgMGTwe5CtcNcQ09DcoQXhadGncbVBrUGREZBRbEEjERoxCHECUPxwqbBmIFOAO6/AP2f/Nz8snu3+rw6ZrqA+s26+XrpO6I8pvzjfJG9Nj3s/ig9lX0IPTP9vX5Yvt7/O/+twDM/zj/KwNrCJ8J+whLClkKtAaNA1kDwgJ7AAH/Zv7k+xf4SvXE87ryg/Lb8+z2C/qX+9v8yf8CA7kE9AZKDLkShRbKF90Y4hmmGQIYfxZWFr0WOBZ9FK4RbA4eCxQGh/6z+Jb3RveY8/LuMext6r7og+gk60rw1/Qq9nj1rPVC91f4VvfV9Uv2Pvj3+FD4uPjR+uT7//pR+5P+QwKkBE4GrQe9BwgGEwRsAysDIAJaAL3+kP0//LL64fkg+Yz3uvbh9xD63PyN//YA7wEjBNYGSQnTDPERfhadGJkYKhj1F70WUBQIE7MTwRMKEZsMFwgyA2X9p/gV94D2w/NN8Jvuqe4F8OvxlvJC8uvxwfEJ84j1q/Xh86jzd/QJ9F7zhPMV9YH4X/u5++T8zQCTA7UDZQTQBZEF9APzAjMDgQRaBdsDcwDl/fD84/ty+kz6P/uJ+4r6VPlF+aH6Tfwy/moB3QXLCVEMgg5xETsUtxVyFs0XwRmoGn4ZJRdVFJ8QiAwUCasFJgJP/4v8i/iQ9I7yEPIG8qvyg/PH8+nzK/TC8/Dyw/Jo89fzwfMF9EL1ifZZ9474jPod/Ez8mvta+0H8xf1D/+UANQJTAqYBfgHNAe0B2gGeAdQAgf97/SL7jfn3+EH4h/eT96z4jfon/Ln8i/1GAJ0E8giYDPkPUxM/FhwYgBiCGJIZyBp8GdoVXRLXD3ENnwp3B8sEhwIC/6L6B/g+95n2d/Xz8yTyr/Cm78LudO7z7hTvEe+X8B3zx/TM9Zv2XPfc+Mn6j/u6+5785f3W/pj/IQCUAP0AWAENAjUDggMcA24DewN9Adz+Jv2++0H6PfnV+D352fl++fT4DvpY/Jv+5wC5A/8GVgqwDD4OzRCyFLwXuBjAGH0ZnxrOGVIWIhOVEcgOzgm/BVkD6wBy/rb8PvuI+ST3D/T08P/uxe5170PvTO747S/ub+7N75/yUfXZ9m/4WfpG+477Af3l/kb/uP7v/pL/5v/g/wsAjQA/AXwBBAHIAGABSQHd/1P+IP1C+1X5bPgo+Nn3GPiX+MH4DfpQ/eIAGAS+B5sLvA78EIoSzRMPFToWixb/FU4VmBQqEwsRvw7sDPwLCAu0CJ4FrAJm/5j7MPhm9V/zBPKQ8Mruse2W7SvuIe8q8Brxo/IH9UD3PfgP+c76QPzu+0H7Vvxh/j//Wv9IAKwB3AELAVYAcQB4AZwCWgJlAbYAEP+4+/r4QPhD+An4XPgI+Qv51vjj+TP8B/8FAl8F9AgEDCgOPBCREsAUaBYdF3kWGxX+EzUTEBKBEMgOyAz9CZUGjgNpAeL+fvuu+E33svXt8nTwwe/f74zvI++q76vwC/EB8SDywvSp97z5UfsN/fz+UgDiAGUBVQL/AiUDFQN2At8AvP+4/zr/8f2s/f/9HP1S+176BvoN+dv3TPcq90b3ufdq+JL5Y/uH/eP/6wKIBhYKxg17ETsUnhWIFiMXzRadFcoUUhSIEqgOuQp6CNAG8gPqAFr/Lv6d+4X4evZ/9Yr0i/PR8ivyFvEw8FXwBfFk8WXytPQd94z4CvqZ/A7/8f87AHcBIgPTAwUEhwQZBe4E9AO4AtoBSgFdAKT+wfwT+3b5FPgO9/n1/vR79H306/QF9uX3YPry/GL/IAJeBaMIzQsRDzYSKBTTFDAVjBUoFRIU9RKXESEPsgscCDEFqgLy/4L97PuW+gr5qvdi9q70SPPF8l3yuvGR8a/xqfE88h70ZPbd95P4m/mI+yT+xgAyA1oFCQc+CEcJ0QlLCTYIeQeBBo8EKQLx/0X97PnS9qj0RfMw8gTxaPAa8cfyafTS9QP4hPs5/wwCjQScB9oKlA3XD8ER5BJZE3YTMhNKEiER+g9fDtsL1QjBBdICGADM/TT8RPtd+hP5qvdS9tv0LfPJ8VvxfPFT8Rjxo/Gr8uzzCPYd+av7of1NAKcDYQZACOcJdQtCDDAMrQuvCu4IcgfaBn8FPgJR/qv6UfeR9KHy6/B/7xrvo+9h8Inx1fMd95H6wf0GAXwEXAdWCTgLjQ2FD6AQGxEoEaEQARDGD4QPlA4kDYQLbAmDBnYDBAEE//P86foc+Sv3w/S08pnxkfAX7zLuUO7h7sHvLvHe8uj08Pe3+/n+vwH5BMQI0wteDS8OIA9jD2wOJg0ODFoKAQilBQMDnv/k+7r4SvY69JTyaPGs8FvwzvAz8hL09vUo+AX7A/5SAFUCpgTgBmIImQkdC7oMrg0xDqoO/A7VDmQOCQ6RDV8MQAqSB90EeQIxAL794vrd9/z0VPIF8EvuWu0M7VztQu6d7x7x7/Jc9VL4afu0/lcCGQZICZ8LVQ11Dt4O7A7RDg4OMgzqCeoH0wU6A74Ak/5V/CH6jfhi9+r1d/TS87PzXPMw8yn0+PWl9zT5Uvul/Zv/bgF6A4UFVAc4CU8L9QzMDRoOYg7ODhIP1Q4GDqYM4wrICAMGlQI7/2b80vkW9370RPI68I7uv+3t7f7utPDf8i71c/fN+VP82v4+AacDKgYlCBkJcAn1CYoKwwpnCtAJ9AjKB3gG8wQ/A+IB6QDF/zL+hfzv+mT5/vcB94D2Ivba9fX1kfYg94r3ffgn+iH89f21/30BQAPzBN4G3ginCkoM6g0pD9EP4A9iDwYOugvUCNwF4QLB/6X84fk59+D0SPNN8m/x0vAu8YzyOfTp9c33zvmS+yn9tv4qAEsBYgKLA7cEtQVuBqgGlgZ6Bn8GdAZYBjgG9QVpBccEHwQqA9oBjABe/9z98Ptn+n/50/gt+MT3gPcy9yD3w/fk+Gf6OPxc/pYAjAJJBBkG4weVCR4LeQyLDRgOAw47Dc4LvwkjB1cEzQGb/1b9AfvO+PT2VfUL9FrzYPO78y701fSx9Xv2jPfp+E36u/tL/db+VwDgARYEogb4B/kH+gfnBz8HrAYSByMIUgjaBtcEYAMLAkIAtP4Z/iv+ov0A/HH67Pm0+bb52vl4+dz4Fflc+hD81f2j/0IB+AIABZkG5wcbCWcKwAw8D5MPdA4VDegKbQg1BtkD3wHT/2D83vhw9kv0Y/L28Gjve+7q7rPvXPB88ULyevNj90P7q/sY/Ff/5wPiCOALRAsiC5YNtA5LDeQLzwrcCSoJpQdHBRkDHwBB/fr8Jv0W+4H5D/pj+qP5U/ml+YH5kPmt+s37Dvxt/Gf+WgGEAyQFaAfcCIsIHwlxC38M5At+C/0KhwncB1EGpQTeAgwB6/6U/CX6SPeT9MjyUvEg8A3wbPA67nzqduzu9F/4lfP388P9BQX7Bb4HPAvsDeMQthKeEYEQDRFMEIEMxwejBYQFYgOj/sT7b/sT+u/4CfpR+Tv2mfb1+Rr7Pfp/+s783/+zAT8C6QMXBvgFtQW6B/YIQQgPCPoHuwaJBv4GKwW5AjkCyQH8/yv+O/xz+RP35PRe8unxMfMH8qbuAuw37fPzZfcG8WztYvZjADgC0gBIAlsJGRFIEM4LcQ/OFScUORCIEaISwA+tDAIK2gYmBKcAffwk+8j7wfkh9/D3p/i69wv5RfrI+Lf64gDlApQA+QGIBtoHaQY8Bh8IzAkXCTkH/gY/B8UFegMiAab+F/00/PH5LPZD9PH1IfYT7yDomuxD9Zb0ZO2M7CH2Df9y/B/30f3HB+gGKAQ1CioQcRBkEKgQ2hAjE08S3guOCU0M7gjJAS8AhQC6/VP7Nfpe+Vb5P/ly+NX40PoH/eX+p//2/5MC7wVGBr4FHghyCmoJzQciCMYHUwWhAmMAcv4+/dL7EvkG9r31JPgq9qbsOuoU9E/2+e137YH17fnK+vz5mfkvAP4FNQIMAewJGg/rC5IKpg1JEGYQcA3nClEMSgxlB5sDJwTeA2z/ffp9+ar7ZPsm+B73xvmH/C7+Tf8hALoBgQR+BoMH6AiCCskK+QnaCbEJFgdjAz8BNv9I/JL5z/WD8+D14/Ms6JbmYfUU+o7sC+hx9+AAk/lc91MBmQS3/2sD1Qm/B/cHogyUCekGPgwoDNYDsQK6CFIISQKd/yYBKgMkAmn9P/p6/Z8At/y7+Kb8WAGa/+n8hv9HBGIF8QL0A8QIfQkLB/QHDAmfB38H/QVgANv9i/7N+kb1hPNr8yT0cvEt57PmevUl+D7p7uU498gAn/gq9TABWgslCD0E4AroEgsSzgwtDLEP1xA0DdQHswXIB1YHWAEU/WP+HP+a+3P4f/m++or43PZ1+S78LfyR/Ib+VgC5AjgF9wXUBT8I1wunCykJHQrOCwEJcQTVAXf/cfyA+HLzSPJR9HLv/uWf51fxYvCZ5nvmyfGz98P1tfeh/bsB9wXgCPIIfAxDEroRWQ9SEjUVyhCPCpsKMw20CMYAeP9LATr/zPvz+TP44feK+c/40/al+If7HvsB/LsAzwKMABICFghqCmsIiwibCgELeAojCSYFYwI8A2AA2Pj99738tPc26/Lo3PDh9ETwGeg750Hzuvs+8y3uZ/vHBOz9F/2YCdgONAodCvQN+w7rDsENMQqhCCIKsQhCA9kA3gLsAWT8/vl9/Fn9Evup+Xj6q/s5/Mn8/P1C/qz+rgFyBLkDFASTB7IIUwccCFwIsgWNBPcErwEv/OP64fzo+nrxTemA7437vPZB58bqGPxB/PDxtPe4A/sA0vz4BTIPYQ15CTELZQ5hD0gORQrJBTcGaQjjBJX+XP36/j79Vfpt+pz7tvoA+mD8nP4a/ar8zADVAT7/xQEbBe0BngFyB+wGrAGeAzIGGQIBARsELADy+If68P+M/J/xHO2R9S/9g/Ya7DbwOPzt/Iv2xPkHAr4Bfv+fBc0MlgxECXwIlQvwD0QNhwTTAjwIjgZN/qv8AACl/j77m/qt+/795f6H+5H6XABHBDwB6v52AqIGJgZuAy8DPQXeBZIDjgEOAmEBKP5l/Ib96fv29Vf0Cfo5+sLvAeuH9Dj8u/YK7yf0DwAHAOb4nv6yCYUHIQOzCV8QYRDEDuwL+Qo7D6gOLwUBAQoGegWN/FT44vta/e/4GfV99y39IP77+X75nv/1BH4DbAAVA2cIywh1BqwHTwlhB9cF0QYyBZ3/9fzm/if98fS68WX3U/dy7QrqDPAY84Tyi/Jd8Rb1jv7n/7/71AFPC+0J8QfyDgYUOhE4DvwObRC5DrwJggWrBD0EVQDz+TH38vlE+qf0EvKa90j8Ifq+9wH8EQJlA64CCQVWBxMI9QmICmUI/wcTCUcGjwJYAl8AEvzx+gj5wPM48yf28vHl6nft6fMZ8w/wN/NL+CX60vs4/wcDsAaqCEEJ+gysEtgS9g4dDzoSkBCmChcHwwf+BtwAkvqG+iz8Tvh78xD0FvZL92X5+fjr98n9bAT1AtMBsgfGCloIVQn3CwAKwQefB+QEUQHmAG//UvrX9K/zmfYT9Xfsa+lc8aD2+fHs7UTy6foF/xr87vtJBKsLZgrNCHUO+hNDEYENTxD4Ee0MawjoBzMGoANRAc379vZD+Rj7uPWB8oX2t/jE99P5qvtM/NIAlgQxA5ME2gkSCtcHhQk1CjIHFwVCA7cADwDl/kD5CvTB9Qr5nvQt6+LpAfRw+f/xWO1n9pD/NP15+i0BHwgvCAEIJQwHEZQS3Q8zDVIPxxHeDNoFPwVvBlsC6PzK+jT5z/Y09sb1rPTc9TL4kfgc+qf+oQEeAigE1wYdCVoLmwoFCdAK1gpWBWkCfwRjAq37gPem9qP3Tfdn75/on+8++KvxQOnc8BX8Qfo/9rb8FQTCBGIGMAvrDfMOURBSEHUP1Q8VD3cKjwf+CCEH+/9A/Hf9Kfwg+JL1bPSU9fD4ffhN9Uz4yP8BAkf/yAFJCHgJcAfmCUIMZglGBygH2wQzBMcEbf0z9kH8VACV89zptPJA+S3x7+tn8cn1IvfV+D349fpaAzkGfANdB68Odg7AC3cOTRGkDigL+wpaCyIIBwOPAIwAif7R+RX3Yfel9zH3MvYs9Qn3mfvH/Kb72/6jA2EE9QQlCFkJIwh1ByYGKwbUB9IDHfzK/MEB1vyq8srwjvVn+Kf1EO+67sH3hfup9b31nf7sAhUBywLUCHMMQQyJC6QMLQ53DcsKLgn2CFoHxwMOAYMAiP+C/B/6lfki+fn4n/nW+OH45ftS/Wb92QDMA1MDZASiBj4HvQeEBrEDgQV7B9gAuPre/lMCdPsX88TxfPZl+g/2Eu4n8T/7UPog9In50QH4/x7/GgZaCl8KcwuWC9ILxg7nDQ8I3wYcCjEH+v96/jwAfP0p+R34z/eR90X5tvlQ+Df5kPz1/vL/wgBMAy8GEQZHBqYJzQkUBaMEQQhnBnMAZ/1B/fz+lP4K9dXtxPZh/gzz2elK8wT8w/cm9O73KP2JAUkD4AJgBgQMlgzZC8cOdg/mCz4KPgsECiAG1QF9/sz9xP4Y/EH1/vHK9sf7i/h38oD04/yAAcT/W/4oAdUFqAioCHcIaAmGCZsHJwaJB5gHYwHa+nP9NgJO/MTx3vC39kb4ZfQv8AfxTveq+eD1KPhSAFcBW/9aBRsMVww5DGcNlw2QD24Q1AvQCL0JoQZQAXQBtQCh+bH1e/jy+Kb1JfQM9Hv1Lfqe/Bf72ftRADoEJgbRBkcH8wicCZsHBwfJCAwHDwIdAAgAnv4q/Tb63/Mg8hz4fvnC8bbufPXi+JX1efel/Yn9gfyNA78KqgkjB3IKlA8cEHkNRQwbCxIJBwlgCEgCwPyu/lUAGftS9iT3sPcl9yz5h/kM95f4K/5dAJEAcAKhA6gE/QdFCR0GmgSgBikHxANd/4D+IwGj/yj3T/MX+ZH70fRX8OXzpPcv95T2HPjE+uT9tgASA54FowfACIMKMQ2vDSkLKAo5DBUMFghABGYCbwFDAXT/3fmA9iP6HP2w+G/0nvfc+3T7z/qb/az/AgCeAuUFQwVDBMIGRgfNA0sE8waDAf36EgBBBLr5NvEn+Lv87fWX8oz2gPf090n7T/oW+n8BAAU4AVoEvQzPDHsJ+Qu/DgUNYQviCqsItgVZBCUDiAAP/fH5C/ni+TX5pfYH9dD1Y/jI+hT6aPna/CgAfACiAqAFhQRPBP4GQwWhArsG0Abo/LP6CQMDAuD3X/XC95D3b/mn+qH06/ED+hv/g/sV+9//9QF/A+MIcAywCkIJ4AsqD7AOAAs9CI4HuQe0BpgCHv3Q+qT75vpc+Bz2xvOR8y34TfvG96716foAAJj/UgDvA2oEqgMCB3cJ4gUNA/kFGgd6Ad784P40AeP99/Zz9Nv56P0z+PvxS/Yx/Sn8rfkN/akAMQKqBasItwhuCcwLVg14DQkMogmECLoIdgdIBMv/0fuY+1v9s/pM9M/y7PaB+LL1M/Tk9mX6n/z1/T//vgDoAmcFsAZHBhkGFAduBq0EPgUkBM3+x/04An3+ovRh9gv+8fh48cr2CP1R+uf4QfwA/toAygX3BHcC4QddDooMyQgQCo8L7wnlCP0HDAQ3AAEAwv8d/Zn6ZPjg9Yn28vkX+QX0F/Q4+pP96/sn/Ef/8gCNApEG0Ae/BLoE7gdSB9sENgXRAh7+YACyA6T8hfS19/b78ff59Mf3RvhF96T6//xx/KT/twNbA/ME6gphDNcIQQnSDLcLGQiYCKMJgQVoAX0BlwD8/DD7g/oZ+E735PgL+Mf1avdf+9v7TPoe/FYA1QHaARYELwbiBeUFLAdMB4QFBgP4AMUAAgFp/tn5v/f/9x34cvfd9VD1Svib+tr4bvn7/l8BjP/qAfsHMwoICRkJdQqrC0AMBAt3CEcGQAVABDwCRf/H/CL73vlx+dX4avcY96P4tvnj+TT7Y/1c/zMBYAO+BBMFOgY0CEEIFAccB+MFrwIjAvECxf/Y+rf5afl79hz1kPdJ98Hy1fJ/+Lz6rvlB+3D9Gv/MA+MH9ga8BvAKSg1CC4gK+gt/CsAHNgcVBecAj/8T/5765PZu+Pr4vPXZ9HD3Ofg++Or6tP2T/oMApgO6BHAFzAjLCnwIhwfcCZYJ4wZIBeQB7f1y/9X/QvjL8+z3ZPfH8HDxu/Yr9e/yNPdj+nv6oP72AjEBpAF/CaMN9wiRB8AMMA4tC8wKFgq3BS8E/AWHAwf9p/ke+uD68vlD96T0tPU2+of8u/oa+/z/4wKCArQEPwi+B9QGYgmbCuUI2QewBZoBMQHCArb+RPiP90D49PPA8C30Afe580LwLfON+fH8xfvV+nz+owRUCEIIXAdkCDILPA1QDMIJOwgyCNAHPAUrAQj/CP+k/RD6xPcI+Nf49Pjj+Pz48Pmz/Or/4QC2AN8C7gYhCYsI/gdNCWIKJQn4BtIFnARjArf/Mv3B+vj45feX9mH0rPKw8n/zMvQd9Zf1NPYs+H/6Mf2UADMCfALRBOIHwwhKCfMKcwuFCcEH1AdTCBgHNwQqAYT/Jv/y/jL+ovyQ+gr6cvxe/8b/7/7a/6kCjAU2B3EHPwe1B/MIYQmYB/8EggTtBMICs/58/GP8zfvz+AT1IPOm9Bb2pvTJ8mrzGPV19uv3f/kg+pr6Lv2EAPcBswKdA6wDPARQBkYHDgaEBEID/gFWApcDVgLe/sj9GP8m/8b+IgCoAGr/AQDsAnoEOAQKBcYGNQetB0IJaglFB98FMgbABSQDagDe/8v/t/18+7X6gPoW+lj5SfhZ+PP5B/ue+sb6p/x2/gH+3/zP/W3/Vf9x/iv+jv6Y/2EAq//6/nD/Xv9T/gD+lf5I/tL8Hvxl/Db8vPsx/B791/0d/zoB2ALTA68EEwY8CMsJegm5CIUJLQu7CiQIDgc4CM8HOQRxAdcB4AJNAQv+/Pz7/ev9pPzq+9H7QvzW/OL83vwX/Qn9BP2c/VL+q/0//IT82f1b/dT7/PoL+9f7Efzg+u75i/pd+xn7uvpb+6r8b/1t/dT9O//DAKgBTAJ/AkQCRwM2BTgFRgOBArIDAAWfBNMCfAEHAiAD+QIAAjsBtQCKANoAOAH8AEAACwAZASACAAJxAZ4BHQKmAjEDEANJAv8BbAJTAsEBdQFfAdsA0f87/2z/Wv9D/pT9Ov63/ob9Q/xC/Sv/9P5w/TX9L/5E//z/gv9b/mP+X/9m/1v+2P1N/kP+Uv1O/Db8MP2K/UL8fftK/Av9Pv20/dT9iP1O/rf/EgDS/2gAeAH6ARICHwJpAr4CsgIpAmEBHwHVATsCKAFaAHUBkgKxApcCaQJfArkDawUGBYQDawOXBHEFMgXYA1AC7QGvAvACkAHN/xj/XP+3/7D/7P7M/S39dv0S/hP+bv2z/GP8vPxh/Wj9yvza/KH9C/4D/h3+Y/58/jz+8P0m/nP+3/0I/Vv9Qf7M/dP8K/21/ZD93/0O/h/9BP3T/gsAb//R/pH/3QDFATkCkQKjAqgCQwNOBLgEIwSPA6QDpANlA34DdwNTAh0BpgH9AlkCUgAHAHIBAAIMAR8AVwBQAcsBFQFsADwBYALKAXkAdwAsAQcBEAAn/+n+Cv/A/sj9Jv1j/eX9u/0Q/f782/2K/nv+Hv61/dv9C/8PACT/Uf0r/az+Zf9p/gn9oPxV/Qz+6v0o/fD8mP1n/vj+Yv9d/0r/PQCfAfgBkAHqAdcCLAM6A2QDgAOzA/QDjgMOAz0DRgNzAvgBKwITAqMBYAH6ALgAQAHDARQBFwBoAF0BUgGPAAkAuf/T/30AwwAQAAj/4/7R/3UA0v+8/pL+L/9r/xb/lP4q/uX96v3h/X/9Nf0X/cD8V/xl/LH8xvy+/Mz8d/2R/gj/uf7N/pb/9v+Q/zf/ff/k//X/BgAcAPD/oP/3/60AxQBnAHgA4QAaAVwBmwFbAQ8BlAGNAr8CTQJHAgQDYwMrAzsDXwPWAnkC7gIBAxcCVgFhAXsBLAGJALn/RP+b/yUAtP+z/qD+d//c/5L/Sv82/33/OgBwALv/Vf+0/8H/YP9V/zz/mf44/mv+ev4b/sj9cf0Y/Tn94P0N/nD98Pwf/af9HP4n/rr9a/3w/eT+W/8+/yj/Wv8FAAgBZQHVAGgABAHrAfoBYQH6APQANAGqAaIBswAqANoAggFcAQQBvQCPACkB8wGmAcgAxwCAAf4B6wFqAcgAxABAATIBggAKAM3/hf+A/7b/hP/0/or+kv4D/1X//f5W/nD+OP94/wb/1/7x/uj+KP+U/13/wP7m/pH/pf85/yH/Xf9//3T/a/9n/1L/Nv8q/zj/Uf9p/1H/Lv9f/+v/UwAJAJr/3P/QAG0B5AArAKYAowG3ARIBrwCdALsAKQFsAdMAFQAuAOoASgEOAYMAPACHABwBQwHqAHsALABaAO8AMgG0ABAA/P9cAJ0AWgC//z//Q/+B/2T/2P51/qz+8v6K/gD++v0R/gb+Ff4L/sf97P2A/rH+u/4P/0P/GP9W/+L/1/90/3j/nf+h/9H/CwDY/6D/0P80ADYA5v/N/8L/wv8hAJYAbwD3/93/QQAPAaQBOAGIAM8A3QF2AhcCfgFDAZ0BAAIRArUBDwHGABoBRwHOAFwAXwBXAFEAkABqAA8ARgCOAFwAawDKAFkAqP8ZAIIAr/9X/9//ZP9R/qD+Ef9L/sb9Lv4h/pb9u/00/jL+Rv6V/p3+k/6v/gj/I//N/tL+Jv9O/2b/kP+7/w0APQBjAI4AwAD8AAQBAwFOAXYBQgEuATABFQG0AJMAyQCQACkAXwDIALIAbQBDAHIAmgCqAF8AKgCjAL0AWwA7AH8AqgCCAKIApACnAOgA6gCsAKcAzwCfAB8A6v/N/1X/Uv/X/i/+If+q/+f+P/45/t7+8/69/k3/ff8h/6v/+f9y/+j/jf+c/lP/0/9Z//H+rv4a/2j/Yf93/xn/U/+b/5L/tP/Z/0IAOgBJAFIAQADhAOAAWAD8ADgBvwDnAKUA2QCqAL4AVAG1AIcAhADMANgAjABKADwAXwCSAJAAQwBQAEEAXgCRAG8AmADGAK0AuwCnAKYACQHVAGgAYQB5AGwAHQDT/3P/bf9M///+3v6L/mT+e/6w/l7+Xv7D/qX+nf61/s3++f74/h3//f4D/43/cv9V/5z/rv8O/yL/1/++/yT/d//k/3r/4P/8/w4AYwC0ANEAtQBjAfsBoAG8ATkC7gHmAQQCqwFQAX4BtwGoAUYBJQGlAIYALgHbAMcAHQCm/00ALACU/0H/YP8d/6v+D/8j/zz/HP/+/lH/W//O/4n/Lv/V/4z/Fv+h/zD/jv83/8L+Yf8W/4P/8v4S/6L/NP+B//L/4f8z/4r/UwAYAB4A3//n/38ANQBIAGoAYACwANAAMAFlAOIAIgEwAQcBZgBqAWkAfQDeAFQAUQDW/y0AKQAt/0QAaAC1/x0BkgCzAL4Abf+YABAAJv/aABoA+v4UAIwAKQByALj/jP+t/17/SP8N/w0AvP44/37/5P5gAAkAWgDo/+P/xQBbADIAwwAVAF3/cv+L//gAUf93/qD+sP7Y/8X+Kf8VAL//YP8xAO3/PAB3AM3/rwC7AKoAtQCNADwA8ACbAPz/bwHh/53/LAFi//7/QgAU/3EA7f8DANgAMwBEAFEA8v8EAVQAl//jAPn/iwDGAGIAeADw/8gAuwBQAEwADQA9ADIABAC3/9j/RQCJ/3X/vv/2/xoAWv+E/zYAwv82//n/kP9u/5j/xf6p/0v/YP8dAP3+BP8///H+Zv9K/2v/7v+//uT/fwBY/3AA2f/q/1cA5/+qACUAFwC6AO3/yv93AMoAVgDj/yoAQgAbAYoA5/81AZsABQDkANEA9QDcAEgA0wAaAe4A3wBjALIAiAF8AIYAgwBnABkACf8/AMT/mf/B/+3+8v/B/1f/Sf8Z/jj/9f/t/jD/7/5L/hb/kf9O/uv+Df+l/hD/Ev+9AHX/YP93ALb/mwBbAFMAvQAQ/4QAGwG9//UA5f/FAMH/xf8GAV//Vv8//2b/TQB9AML/bv8c/5H/+gD2/7MAaADT/v0AegDM/0EBlf+vAG7/qf+YAZAAEACK/74A8P/GAOUA2P4lADQBlv+aAGUA6/+yALP/4P8rAMv/9/9JAFX/bgHRAF8A+//3/+sAcwCTALX+RwA9/yz/eQAj/3b/6P7k/6v/0P6RAKX/L/8+AA0AAwBGACEAfP9G/wUAGgBa/8r/kP+u/vH/mACA/m//q/93/+3/Z/94ADH+WP/AABD/ZwA1ALkAJgBU/7cAjQDg/8QAkv/y/8EBLwFCAOP/VgDC/+H/df8MATAAvP/w/9r+AgC3AA8BKv7i/zgAS//fANf/RgAkAfH//P1//2oCUwDn/m//vv70AEYBzf7r/v//D/9BAG3/Bv9EAVT/fv80/8/+IAEoAbj/pv/aANsAdwBJAZsAugANAEz/hwCR/ykASv9x/3r/sf6eAIL/gwCn/5f+EgG+/1j/QAGY/2j/JAC5/4wAa/9l/4gAWwAbABgA9QCAAIcByv9h/z8C8gAbAPL+TABkAdf+kf76/r3/vwCeAHP+1P+FAf8AUgBj/hkC5wAs/i8AAP+q/7QAQ/6+/XH/HwAqAHT+BP5HADcAaP5j/83/GAHEAMb8Q/9fAmkBhgB6/i4AOgKTANj/4f/9/5MBYgCp/s8AgQCpAI3/hv64APz/1ADl//P+5v+P/ycB0P60/soA7v4SABkAq/8VAAQASQDU//T+W/+pAID+kf8YARL/sP+GAOgA7f8RAUUBHf37APkBcf9B/6b/ngHS/j//0ACM//X/UQF1/5/+UQIFA4L/Av4kAsIACwA2AmgAOf+LAHEAR/+h/2QAJgDv/UT+YQANAO3+5gDW/m3+mAFN/07/0gAZAAoAqADNACUAQwDLAFwAIADf/hkAawAW/qz/p/+G/ln/0v4t/vH/Yf9v/3YACP5NAT8Blv7DAUQAFwEkAtX/ywDdAIgAKAA6AMD/rgAoAKP9HgEIAIj+vP8e/g7/1P9F/ygAX/+W/s8ACwFD/w8A1wDVAKkBcQDuAJQA2QD7AQL/EgDIAAEAeQAs/wgAkQCK/00A3v4M/xMB/QCr/nX9lQDR/0X/g/8r/2IA6f95AJv/0f9TAVMAFf83/7cATgBN/53/IQC+AOD/TP/G/zsAYgHm/3P+Zv/mAAoBBAA8ANcA9ADI/7H/IAD2ASkBWv67/8D/2QBTAWL+sv9oACb/YgCPAFT/ygBBAav93v9HAA0AVwDJ/YwAaP/o/k7/V/9r/+f+YgCR/kj/6wBSAGAAlP5r/5gA0P8nAXEAAv9o/8QAJQExAFoANwBfAIz/kwEZATf/BwINAGf/VAGIAYUBFwDZADEBUwFwAqMAWP8nAKMBUf9mAG8BYP8dAGz/9wDiAHsA//+A/eP/SAD0/yUAD/5v/pL/UADt/oH8dP7f/mn+d/6i/az+Dv6P/jL+wPzG/mP/ov3C/Vj9rv4TABf+8/2b/bb/cP8D/qX/Yf9+AC8ANwFxATQAAQLYARwCZwLGAaQCEQOqAqgC0APkA8YCLQKjAzMEBgN6ApQBkwGoAXgCTgFKAEsCJAHeAK8BOwHwAf8Axv/cABEClAJjAScACgFQAuUBJABDABcBzf/e/7j/+v0a/Un8FPsa+gv5ovfK9kH1EPW29ID0RPWc9F31XPad96D4e/p8/Pr8+v5aAdACugMIBawG6AhTCY8JxAsnDPgLMQwWC9cJcQnEB7UGeAXyA1kDdgG0AHD/zv6I/zf+lf4I/wz/PwBX//j/kQE/AYEC/wIvA24EEQTdBOYFRgXyBLkELARoAxcDnwHA///9gPwF+1P5Gve883nyj/D47mDuu+0w7e3rmusJ7BXvd/JX9L7yO/RM+7f9+f+rAkoExgfgC2wOyg1LELcSyRHrEQkTZBKcENcNywrLCeAHzAWQAeT+Jv8n/c37Lvum+pv63/oO+1P8Q/3A/YL+aP9DApsEMgXwBZkGwwhLC2YLFwt6CrQKRAuWCVkIJAfUA6QB3//K/Lj6E/hq9b3yivC97znt/uqN6trpb+mz6iPqj+mu6mXtAPN29Rf2PPdJ+rr+lwHlBfYHVQkSDB0NWw9XEW4SjxInEEIQ2BI3EaEN+wu2CesH+AU7A30Bo/6k/X77Qvqq+/n6mvr1+X/6kf3O/0oAPgHbAm0FtQe7CFIKnQpMCxgM4QszDCUMNQsfCQ0HVQVgBOgBGP8S/an5Zves9FnyOfDz7SfsXOnD50Ln8Ofn57rnx+eO5+bo8Ov18mH2u/TE9af5UQCqBIUGMAjeCvYOMhFGE9YUsRaAFk4U9RM2FtIVXxD9DJ0KggjMB5kEMwBi/e779voa+gj6Zvqq+X74Xvpg/cz/UQHrAKAC7QVwCG0KNwtsC7QMig31De8NYg0nDaULkghuBg4G3QJx/y792/kM97nztPFv7lnrQ+qh56blkeQB5V3kNOTu5RbmI+X85QPuKfNt8TfxiPXl/NMBTgSjBI4IKg/FESgSNRPAF1oYmxVrFEgVaBd9EyEO1ApBCbwJlgYuArr+ev2w/cH7dfpu+0b8fvuk+9L9DwFYA4oDRgT9BtcJyAtlDQ0NBA5wD98OpA53DtcOPQ3jCdgHigayBOYA2fx++Uf2FvPx72Xstume5+nksOJZ4Vfi/+H24B3ibeRo5f/jQ+nS70jx5/Fb9Mj6z/8sA8YFvgjyDdIR8BKJE7oXbBprGMwWKBdpGL0WLRLRDb0LTQuSCCsEUACS/xP+RvsY+h76uPtW+4b6Zfsb/xYDvgNKBAwG7AgwDJ4NFw0fDqYPUBCtD+4ObQ8FDtILQwmVBzYGNgMeAC78R/lp9n/ze/CX7Kbpg+eP5YPjK+I04cHgxuDG4rLjXeG+4Rnna+4S8CDvUvK39yP+YQGRA1cI0g1aEYQRuhMZGFIbnhn7FSYXhRngGHETkA7NDUEMkAmnBU0CxQCL/279efuU/HD+cv3X+sn7PgCGA5sDZALDAzEIaAz4Cw0KZQvvDukPGA6tDcAO9g5FDJsJYAhECJAGZgGl/P/6xPqX96Lyf+7n65Dq7+jx5c/iw+EL4R3gleBU4hHi3d4b4I7nuO6F7m7sTvFq+b//kgKWBBgIlw7yEm4TuBY3G8Ec2xkxGI0bCx0BGsQTVxCAEZQQHQ3TB3AE/AOmAmAAOv/z/8T/wf2T/Mv+nAErAugA1/8UAt0FzAd5BuMFUgdUCfMJfgk0CkEK0AkbCd0IawhaB1gFfQI+AM3+9/wy+Rz2VvMJ8fTtDup+56bkHOM14ebeud1p32rgXdzQ3GzjpelH6lHoAe0D9j/8QP6BAEUGmQ0zEkoT6BRLGfkdSh4PHIQc/x1wHXAa4xdjFicUkxEsDjoMywpRCEcF0QLoAq4CBAEx/+T+EwDHAMEAYwAtAYoChgP9Au4CNQRzBWcFIgRwBG8GnAe5BT4ENQRPBK0DtAEeADr+Rf3a+yP5sPZu9M3xNe+37BvqvOek5CbipODn4BPhlt722yHdB+ML50jnlOcd6z/y4/ip/UIBAwU3ClsP1hMYF9kZIxx+HNocxB39HnYeRhs2GJ0WHxYUFQwS0g25CwgLQwqHCFIGAwUHBB0EGQTQA3sD0gImAwwD5gI/A9YCZQF4AB0B4wFOASIAFv+K/vn+oP9u/jf8CfzD+8T6cvpU+YT3Hfam9SP09vGW8VLw6e257OrruupX6UDp8+gS50Tne+hI6Hjpnutb7UvvFfJX9T34KPwQALICOgXICO0M+A6NENUSKBP+EzYWYharFCAUrhT+E8gSNRLAEYAQ7w+ED4kODg5TDqYNOQw1DKMMZQyUCzcKNgn1CNwIyAetBREEgAPrAgkByv67/aP84/rg+L72LPWB9KLzQvEU76zuue7H7W/s5Ovj60DsYex568/rl+027rLtLe337mHyzPJ08azyU/Ub+B/5yfi/+vT9FwC3AGUB1QMCBv4GZAe1B9cIjAlDCX0IlAhACbwIJwgXCG0IDgnsCE8JiQqFC1cMsAzaDYAPmxBXEXIR0BGgEuoS4BIHEmIRjRAfD9QNXQzICrkIbwZOBG4CNACd/Xj7LPnV9r70gvJY8IjuzuwH68vpGekM6NDmjuYL5xjn5uao5z3pcupc6xTtae9M8ZTzjPVV9zf6Iv0z/1MACAIxBZEG+wYSCFMIKAmZCfkIYQioB+IGTAb1BakF2ARBA+EC5QPcBIEEcAQsBR0GnQe0CHUK9QuLDE4O3g/fEMoS0BMfE0ET6BMwFK8ToRHUDxUPLw7BC7UIeQZjBB4CPf9U/IP5zPa49Gbyme+K7fTr/OmH6AXoluf05i7mHuYO58ToEOpT6knro+2J8IjyrvPY9TD4C/pr/FX+qP+VAQED0wPdBCgGRgclB3wGkAbaBrQG8QWaBBIEoASxBIsDrAIqA98DXwRgBHkE3QVhBwwIdAghCr4MzA1ADq4OGBCYEi8TeRL1EZESnhOREmQQEQ88DrAMQgrHB6YFjgM+ATX+6vvp+a73pfUX8/PwHfB271rtretB623r1+th6+fqFOsH7Jztke49783w5PHj8sL07Pbo+D36C/tA/En+aQBqAaQB6wGTAroD5AOaA5ADiwO+A0gD6QI/AzYDRgNeA0sDFwQvBc4FBwaQBhgI0gmCCikLGAz0DBwO/w5LD7YPDBD7D/8PwQ8yD2kOfQ1jDB0L9QmNCHsGiwQoAzIB9P4c/Vr7ivmv9wj2ZvQe817yN/HJ707vv++F78bu9O7x73vw0PCP8V3yEvMz9Gn1LfYm9474mfmC+r772fx7/S3+Of8gAHUAhADLAGMB5QHZAaEBywFVAqgCyQJOA+kDXgTcBHgFjAbjB5AIswg1CZoKJgyWDIIM8gyyDWwOpw5ADg4OPw4PDusMvwtiC9QKYwmrBysGyQSvA1QCZACr/qf9qfwH+4T5qPjq9+/2yfXl9HH0QvQb9ITz2PLX8kPzevN185Hz2fNk9C71n/Xo9aX2nPdc+Lb4Gvn6+fb60vsO/Ov7xPz7/Z7+ev5Q/jf/QwCxAOkAOwEBAjMD/wNZBDMFeQZ2B/wHwwgfClIL6wvhCxMMBA3gDQEOeg0CDSEN3Qw3DLEL7ArdCb8I1AfDBqEFoQRjA88BxQAAAMD+rP3+/BL89fp0+uT58fia+Iz4Lvh59wr3MvdZ9133TPf49hH3xfcw+On38ve1+AL5xfje+Hz5xvmk+aD58vmC+t76uPp0+gP7KPyC/AT8+Psn/Xv+AP8Q/4b/1ABeAmQDsQNcBFUGxgfyB80IYgqXCyEMbwzJDF0NdA7XDtcNcQ3uDZgNdAxtC7MKjglCCAkHiQUDBPQCrQHp/9P+Xv4i/Vb7rfqa+gj6Rvnk+Ff42vc++GT4wvel90r4M/jk94H4PflE+SP5efnv+WP6uPqG+lL6wvov+yL75/q7+uj6L/sg+wD7A/sy+2L7XPvD+3z8HP1Q/V/9NP66/9sAHgGEAagCFgR+BXEGAwfhB/4IIQrhClILxgsFDCEMbgyKDBoMsAs4C5MK6Ak6CTQIHAdiBkoFIwQ2AyIC9gDS//7+Ov5T/ZT84/tS+yD79PpO+qv5ofkD+v/5lfl7+Zn5uPne+ev51Pn++Vn6YvpN+p/66/rR+uf6OvtH+y77Tvtk+1P7ZPuA+3v7d/u4+/77OfyG/PL8fv0K/q/+VP/7/+EA6QGuAk8DQwRTBSEG3QacBy0Itwg7CZgJvAnfCfsJzAmECTUJ2ghjCK4H8AYzBnQFtATMA+ICDgJYAYMAs//2/kj+wP1d/cj8+vt/+3X7O/vP+pP6NPr/+SD6ZvpH+gz6HPpT+ob61foS++X6sfoI+3n7d/tR+0f7NPs8+5n7wvt/+3j71PsX/EL8r/wb/Tn9if1P/g7/g/8AAJAAGQHZAbsCTwOiAyYE5QSIBQIGcAa5BgAHaAevB6cHrgfOB7wHiwdMB+cGdgYqBt4FNAVrBO4DawO3AhgCfAHCAC8Az/9U/6T+Of4c/sT9PP3p/ND8qPxW/AT8u/uN+5b7efsW+776rPqj+nT6OvoK+un55/np+dv56PkG+ir6TPqH+tb6JfuD+9P7FfyI/Cn9of0J/pP+Mf+8/0YA3wBjAd4BdAIEA1oDsQM0BJcE0gQWBWAFgAWGBcEF6wWyBYIFpgWcBS8F6QS9BFwEFQTkA2IDuQJyAl4CAAJqAQgBzACkAIcAQwDS/6f/uv+H/xn/zv7F/qn+Wf7t/aX9b/1D/ef8Y/wb/AX8svsz++P6xPqu+oL6Uvoi+i/6ZPqD+ov6ofrt+lX7sfv6+0b8x/xb/dH9T/7d/oD/MAC6AEYB8QGMAgYDcQPqA10EogToBDIFPwU/BW8FggVJBQoF9gTQBJcEbAQ8BM4DfgN/Az0DswJJAh0C4QFvASQB7ACXAHMAVgAHAOj/8f/w/7H/dP+L/5b/dv9B//X+vv6o/mv+/f2d/VT9//yZ/Dj8/fvI+177/vrm+t36tfqc+p36sfrV+vH6Jvt6+7/7KPyW/PT8j/0c/qD+Wf/K/yUAwwBtAagBEAKsAq8CGAN8A4IDEARNBAwE+gN1BLkEpQSuBI4EyQSxBI0EdQTKA+4DqwO+AvIC4QIuAswBVwE+AeIAugC9ADsAKQAUABQA5v+j//D/vv+S/4f/D/8u/wz/kv6t/kT+zP3g/Vr9Nv3s/JX8rPxg/F38AvzH+yb8APzW+yn8Hvw4/JP8i/zC/N78IP2k/Y39Hf6z/of+Ev+I/8b/dgDiACcBcAHGAXQCowKhAgQDUAN3A4wDkgPKA78DxgP8A60DzwP3A7oDggNoA28DTAMBA+0CgAIuAl0C5wFuATcBGgHOAGYAWAAvAL//xf+3/yz/Qv8m/9n+w/6P/qL+PP4T/kv+xP2y/a79Q/0R/fL8+Pyw/H38ovzG/Jf8lPym/LH8M/1a/Yr9sP3U/UD+b/6f/s3+JP9R/2H/yv8eAIQAnADQAFgBcAHdAQEC5AGgAnUCTQL1AocCvwIOA6kCMAPuArYCJgP4AiUD6QKWAjQD4wJIAn8CiAI/AggCtQGGAaQBRwECAbYAlACxABUAbgCK/3D/zP9D/+3+cf6v/vv9//2w/YD9eP2h/CX9ZP0v/Ar9fP2e/J/9YP1D/VX92/wu/cz9kf0B/sv+J/0J/jcAev+j/1L/LP/I/7z/TQA0AP8APwDAAMMBBQH/AdIB6AEpAr0BjgK/AvUB1AJ3AloBZwL4AXwCRAIAAZwByQGYAcoBzQGiAY4CawH4AY0CMQFWAugBjQGbArAB4wCNAWYA8ADaAD//VQDL/hL+Tf9q/eH95v1v/Lb9Y/0A/a39Xv3p/H/9Jv1I/ZH9yfzN/Yf9p/00/h3+4v3q/dv+zf4n//H+vf6b/7f/pv/s/7b/fABxABUADQHRAP8AGAHSAKkBjgEOAYQBNAFLAZ4B0gC5AZMB7wBSApcBOgFeAlsBCQJuAsoBxgL1AVwClQLcAZkCqQECAsQB0gAwApcAKABQASL/AQARAFj+y/5//mT+Zf61/aj98f1l/vD9SPy4/sL+2Pzl/nv9Nf55/3n9CP7O/f3+EwDJ/Wf+/f6H/sD+Df6x/o7+Xv8e/3D+/f+Q/+//d//F/rIARAEQAAMAmABHAEcBXAG8//sAPQGYAHYA3gBpAoMBgAGGAecBwQLeAdACrgHvAGID3gEpAjICJAFMA7sAgADTAi8A/f/kAPH+KAAlAfn+mP4e/rz+7//I/XX/3v50/FcAZ/7I/TgAKf3R/lf+o/2W/+j+sP7M/qv+MP6V/6QAFv5I/m8BPf5N/wEBi/51/7//F/8O/+7+M/9wACb+V//uAGD/qf/fACwAAQD+ABkA7QDo/84A2QH1AIwAtQDGAZIBRQFWAYQBjgEmAm8BiQETAsYBbwHE/7YB2QHO/zEBhQDz/iQBoAEu/mcASQBX/8j/5f7p/yL+af8r/7v9Rv9Y/4X/AP9Z/aH+bf/i/bX/4P3A/en/1P5t/vH+7/4k/sX+t/1GAJH/O/4uAOr9hf7cADMAw/6y/yj/uv/xAC4ALgD6AK0Ad/52AI0CkP/kALYA0v6qAlQCcf8LAWQBvf9tAk8B7f7FAr8BeQAJAYAAlQHiAc8AgwGJAQMBFwEXAVMBLwHw/77/WwAh/9cAo/7c/kgAyv2k/9L/XP8j/wX/WP9c/xgAwv/o/gn/xv8tADr/4/2p/6X/m/4m/17/m/83//D/Jf6Z/tcBcf/N/Sr/agCnAE7/rv7l/tn/9gCRAEj/zP9BAUwByv/G/5UCnQDG/mIA9P8xAKsBMP+g/e0AewE0/2n/QgCv/8IAoP8GAG4AhgDjAVb+k/8eAn8BUwDk/rEA3gBeAOr/Cv9Y/6YAAAAY/uL/ff85/5r/qv6x/zP/IgBtAH7/8/8uAEUB+f6D/+0Ayf/SAIn/EgARANL/6AAbALD+5v7SABT/kP9rANX+8P99AFoANv+uAMcB1P3HAWwBjf/CAHUAHQLx/pgA1AFQ/pcAsAFB/6b/WQAjASkAqv31AMsA//2eAaIA//xtAesAl/1aACQAev4v/6P/i/8tAFb/5QADAOn+iAHKAF7/kQAPAaP/dgAPAUMAuADK/zT/hAHd/qD+vAE8/nr+KwBT/+j/RP6R/rQBI/9q/wgBZ/7YARwCd/7KAHQBYQIpAUL/ugFkAVIAxACTAEP/5wB+ADX9zP9LAAb+Av7J/EP+g/85/bv+Of2Z/cwAUP6I/ev+X/+H/xEALv67/5oAW/8pACD/YQAJAdoAkACyAG0CnQF0ACMC1QEmAm4DmwIhAb8BOwPHAfUB7AGLAY4ABAEJAcT/BgEDAFf+r/4o/6X/cf91/rr+Ev/H/2wAZP8SALIAeQAnAND/ywCbAeoB1gBjAPkBogHQADQBrQCAAW8B0f/c/yn/awDMADH9Xv6S/tX7WfzJ+2X6Eft8+uL3Q/gS+XD6Aflh9TH6ofqT+tT8Ffs//ED/UAGnAI8BdQVABcIFcgYbCNcJ5QguCjUKEAlRCuAKUgnXCEsI2QfsBqUD6gW9A4YCzANM/8v+hf8P/97/RP6A/Un+h/2a/qj+h/2d/q79rvyH/Jf7K/zK+ub4kfjL9xv3e/Qs8yLzRfFy8RP1ePfs9nr1KfbX+Rf8QgB/AZMCegaoBlUJJAz8DWwP8gx1C0YNyAyyCwgKQwVnBHgD8//4/hT9EvuU+ab3e/hu+en4Uvm4+R37XP7v/08BmwPnBYgIPQoqCyALfgxlDq4OsQ0eDG0JIgfIBbYDjgHq/Zr6ofaa873zvfBW7szt8ung5xPqh+8U8xHvCu4i8373pftc/ksB9gMPB8EK2AqgDgAT4xKHEMsNBxDuEUsP1wq/BtgEAwQdATL9zfqy+Bb32/Of8vHzSvV59ZDz1vQg+df8ov5+AJYD8gc6C3AMAg79D7wSABO2EVIQew71DJYKkAdvBHsBKv0w+MbzLPJa8Ovr9Omt5/vkkuRE5/Pr0+2v6xHtM/SN+AH9rwGFBDEJPA1nD1ER5hW3GYIZ6BV2EzYV8hVkEX4LnAeQAxwBBP2g93f0+/Gb8CvtGesy7XvuC+467hLxw/WD+k79EQBZBaAJPw31D5ERkBOaFrEXVRYeFsMU/hDRDagLvgd8AjD9nvhE8truiO4x6XflzOWN4mTe+eIj6jPsUOtx7Bzy5/ccANEFyweQDawSvBWcF5MbsiBTIOEbvhfuF2waRhZNDPwFIwMNAGP7kfU28Ensi+pr50TkcuUm6f/oyOYT6hDxmfd8+279cQHuB4gOpRFsEpUUSBhnGgIaThi+FX0TLBFqDQAIhQR0/1v4QPPM75fssudh5Svj/9zX2ofigum66WvlU+gm9B76qf4ABXEJ3Q+tFiQY1RmJILUkNiOsHRQboh3LHN8VqQxsB0AF1wBD+pDzpu7L63vpx+V647Xkbec55qnkvOqJ8sf2hvlV/eUClwmqDysSlRP9Frka4Bq1GfEZjhhTFfkQ8QzkCF8EmP6993Pxoe2T6o7kfuDL3qLaH9n54Tnn2uGI4bjqnvT4+Tn/BARHCgQU5xnUGhMe7SU/KaclSiHLIPYiSh/JFBINMQqjBoMAJvg58J/rYemB5lPhNN+h4d3iQeE+4/nqLvEY9Qn5Mv62Be0NbROoFO4Wvxx/IK4fQh5cHYIbCBjYEmYNgQgCA1H7Y/Nv7YjoPuNy3RDaV9eG0xTV7NyX4OLd/N6J6Pzz+PdY+x8GBA86FOEa4B67IVEpnSzbJy4kMCZCKC8h3RboEoIPnAg7ARD5YvIG7+DqZuQE3w3gG+O84GfdtuEm62Lw//Gf9Tr91gaKDooRvhImGMge4SF5IDYe/h4HHngZMRReD+cJqQOV/Dv0CO7F6f/iw9wK2ujUYtHk1WDbPdt32cLdMOeG8PH1FPqXAqwN/xSTGOkdWSS0KZwrvihqJgApnyiLIGoYkxQEEOAIFQJ/+hnzAu+q6nHk+d8e4DTho9513crhN+nJ7nTx+fTS/LMGjA1lEd4T4heFHosjgiKmIHghXB+VGkUWKRHeCkIFGf6Z9AzuTekM4rrcddm70frNG9YR3WPZXtaY3P/mdfDX9MX4gQPeDpwVEho0HpIkoiz1LHonTicOKyEp2h9yF7cSeg7GCNkAI/jh8KLs5OiM44vea90P323elN3y4VXp/e0D8gD4QP5JBi8P7xM2FbgZeyBOJDMkPSKVIDofexyQFkYQ0Qm9AXj67PTZ7enkat7X2ofV5s2fz+TasN1o1drTNOBP7T70+vlO/2UFeBD4G84fyyKAKl8v7isBKCErMy2+Jn0crRXlEPoKqwW8/g70Outc6PHlAeDq29/cF93b2iTdieS56invavT0+Fv/Hwt+E3ATFBV4HRsk/yT0IoQg6h9RH7QauBLvC3UFEv2F9QTw3uiB353Zftapz4vK39KY3iPbsNE21m7nOPWF+7X+rALXDEAaRiKkJGcpTTDwMJcrLyrSLrEtYyJBFu0QOQ67B9f+6fR067nmDuXK32vZ6tjn23jbktlc3mLoke/p8lj3tf5NCRAUThfOFiAd3yZsKUImTSRNI9YhOx7kFvAOtgh/APr1yu6/6SvhfdiS1P3O0sfuy0jaO9060SfPRN9m8V36a/3TATkLJRiXId0knijqMH81TzAwKz8uGDFEKZEbKhNPEPMK/QD89jXuiueU4wbfo9ix1bzYK9oh2HPaxuIw6unv8vaZ/T4FhQ8MF+wY4BvDI8AqCCtJJuAibSLpIAgbeRIkCqABefgt8ezqAeKR2eLUDM/5xhjHWNKk27fXH9A91ujoUPdV/F0BtglyEwIevCWAKm8wEDYiNbMuuywtMWIvNiL2FZ4QRwssA0f7jvIE6FThqd+U26PVYtaN2srYUteb3xrqTO929Cj8lgNxDIwWvhtIHNYglylzLVUqNCbkI/wgzxy6FjAOHwW3/JDznOpv5BLe0dXg0LjNQMZQxTXTjN6z2FHTjd0L7VD5mgPhCNANZBnvIycoEC2FM3A2zzOrLWcrDS5+KksdiRGwC3YF0P3N9jrtzuGx3Kzcw9j004TVhNgX2JbaCeOw62XxIvf4/ogH2xD1GV0eRR7jIT8qLi79KuclEyKaHgYb/BSSCz4Cjvk68M/oJ+Nr2z3UBdEwzarGtcfL1JffFtyJ1o/fQvJdAGAHtgtOEUcb6iY8LBgtEjJTN3wzICuJKVcsnCdhGmUO3QeyAlL83fPb6H3fR9ya2/XXbNQX1lHZfdl63DPmcu8c9A35xQAWCroUBR3MHgYebyJcKg4uBSsyJOgeURy6GBsS3QlTAEL1MuzN5t/gntgl053QncrUxGzLXtqL3lfX99dn5rr1+v9GCMQN3xMtH2Eo1SoILxQ2dTYnMOIr8izALFgk/hb6DXoJ7AIm+n/xBOhh4Krd8drU1TnVu9l52jfYXN3t6Bnx+fSY+dkALQvjFcEa5hrQHdgkESrXKYImESPoH04bbxWWDxgJ2P+N9ELseefp4HLZztXU0HbIasky1r/cotbd1EzgEu7M9hT+DAalDgsYUx/aI/goYy+pM6cx2CsHKq4rNSc6HAgUcw8qCLn+Cvf277boa+O13w7bVdfU2BDdb91f3Czid+xS82b3QP3GBHYN2BUyGU4ZCB63JaQneCQhIhMgkhwAGK4R/gpLBWj9VvPA66DlA98228fX9s5JynvTu9252ujVZNyj5/Lwdvli/2gFZRBOGvYeFCSDKhAvOTAQLZ8pQirWKaki4Bg0ErkMeAVH/qz32e7P5t/j3OG33DDaYt3r3gvdxd+w6JXwV/Vy+Rv+dgW6DzEXSBieGGEdZiM8JSgj+CBEHz0bXxRFDucJzwPw+ubxWeoP5PLeptq71TXPx8uL0QTblttW1vzauejY8cn2a/6WBYQMNBdUH/ohQyavLDMu5yqMKYIqZyigIaEZdBMqDvgH/gC6+aHxeurr5gzlleFJ3i7eh9/v3zniGek58Pnznfex/eEEFQ2pFAsXeBZyGrchgCRHIiwg2h4iHKAXDhI9DB4Gnf6a9rfv5ejE4nrft9on0WXNZNX020nYRdQe2bHjf+1983f3L/6BCDASFxjNHFQjSClmKnMo0igXK6IpEiO/G7kWKhIVDPoEtP3V9v/w3OsN56njbuKJ4f7f498542nolewg8NP0tvopAYYH6gycEDIUHhmdHa8fKCCqINQf6RywGU4WdBFyCwIF8P0Q95zw6unN45Te9df+0WfTXtnN2CPTvtMm3CTlCuzA8Ib1CP/GCdoO7BKYG7Ej8yacJ2InHyduKFQoZSNEHToZthMKDOUFHwA8+UX0lfBK6qfkO+Rc5RHkBuNU5EDnlet68L30M/nL/nwEQAl8DUASvxf4GyQd/hzyHbweXR0fG88YnBSoDtQICAPX/LT3sPK+6l/hI9yK3YLgKN3l1M/RrNfA3qvhduQF64Ty6PdT/KUCDwwWFWIYDBinGkUgZiRfJL8hvB+ZHqgbbRakEd4NYQmCAwn9HPcy8yzxMu4C6iHok+g06JznYOkV7W/xJvU999P5XP+HBYUKbw/iEiQUbBY1GeoZPxs3Hf8aOhX7EV0QDAyfBnMBsft09rXxO+w36e/oJuXo3WXa6tsU3yHi0uNP5Rrq4e/W8rf2zP2CA2wHBwzYDpMQORVeGasYnxfBGWEaKxcjFBsTURFTDbgI7gSdARr+hPp89m3yKfGL8bju8epp7IXvku9x8Bb0dfdD+yQAMQP0BQ4LqQ82Eo0VQhj/F7gXnBgvGJ0WNRUlElIMigaqA3wCpf7g9sPw9e7R7InoVOW+47HjRObW5yPllOSt6prw+vFt88f3BPxL/mIAwQMiB9YIfAmCCm0MSA7CDssNHg1WDQENgwtaCrsJIAhbBcsCIwElANn+IPwZ+dP3yfcE97H14/UL+D76sfqA+iH8+v/GAyUGiAiACw8NMw39DZYP8xAeEg8SXg/7CwML2wo3CCsEEwFT/pX78Pgs9r/0s/T88hzw0O8L8fXwKfFk8qjyE/PK9ZT4JPmH+dD6D/vh+pD8o/4Q/u37svtI/d/9f/1v/ST9r/wu/k4Axv9a/lv/AQFgARoCEAOuAv4BaQIsBFcGSAePBgsGFQf0CEYKRwqHCbAJ4ApdC2wKUQmnCH0I9AhNCYsI0QfUB08HCgZbBSkFdQRNA1QCkAFyAJT+Uv2F/ZT9ofxA/Hv8GPz5+zn9A/4p/XD8o/yc/Db8m/tB+qL43fff9oL07/GE8M/vvu567Kzp8egm67vsGuuH6bPro++i8in1o/di+r3+RgPOBWkI7gwJERMTgRTfFTgWTBYFF3QXNRZvE/8Pdg3ZDMYMTwtMCHcEswHRASIDMwKx/6D+j/83AbYCCQO3AkoDIARUBNQFjwgxCWYHXwZLBuwF3AX7BU4EJgHM/qj9SPzC+hr6v/lV+O71iPPj8e7wBPDx7hLuauxy6X7nAuiE6aTqUuug6xfsU+6E8qv2LflR+zD+ywAnA4kGUwpIDDsMdgxNDiYQBxAoDkYM3gtkDN0LRQl8BhIGLwd0BlsEIgR8BdgFAAZyB+IIaAk3CmYLNgxADUMO6g3xDNcM/AxIDHQLsAozCVYHxgWVA/0Ad/8a/vr6uvd19pr1f/Pz8N7utu2Q7Xztyexh7J7s1ezA7NzsEO5D8JnxNPEf8fHyyvXI+NT6uPpa+lX8tP55/6oAYwK/AXcANALwBKIEQgL6APcB+gObBHwD4wL3AzYFsAX4BYgGzge0CfUKIwvVC00NKw4GDz0RdxLYEDkQqRJoE6MQGw/1D/MPYA7nC+QINAccB3IFeAH2/Wv8kPtI+WH1jvOR9D/zKe8n7jvwKfDm7ZDsAO1Y7invr+567vTvuPHb8QjyVvRw9u71xPWP+OD6ZfpI+Yz6BP5A/8n9Xf3P/hwAswC7AU4CkQIsA1kDDwPgAmIDJgUKBrwE3QMeBfIGSQfEBs8H4gooDXANzA1bD7YQpBAhEY8TLxUlE9sPlw/hEFoQkQ6WDN4KtQgzBtgEuANzASH/KP2X+gr46vZ99vPzQPAN79TvG+8A7Evpfukw68Xqneig6B7qZunH6PvqpO2+7nnvfPHU9Gb3OPkp/Gn/6wDzAToEJQbfBrIHNQldClUKmAmzCEUIjQgTCDgG3AR6BF4E0wROBZ8EwwPjA0UFTQeGCK4ISQnhCuMLAgxLDSwQGBLjEWMRbRGGEXYRkxGLEdkPsgy/CigKmwhgBpkEAQIE/mH69ffJ9hH1YfAw60zpFumv5jPjD+Ii5D7mfeXr5Gvn8+nt6njtufFu9AT2Hvmn/UkA0P8QAVEGuQlLCNwGIQh+CY0JKwmzCL0HbQaSBOABkP/u/tn/SQC4/hn9mv0U/0wA8wGwA2UE4gRFBnIH2AiIDPQQBhKtEDER4hNmFnYYHRkXF48UbxNrEjYQcA02CmcGWwJG/sH6OfgV9nDzFPDx7CLraelA5nTjt+IX46Pkied26V7p1+ro7lvynfX5+hkA6QJYBdUHrQiECZ4MKA+GDQwKcwjnBpEDmAE+AWT++vkz+JP2FvLm7wPzqvQk8tvx9fVu+b76xfznAL0FIwqyDT0QfhP9F0EbOhwNHTcfpCAmH0kcHxq5FwMUfxCNDVAJqgPt/s77G/h48/nvcO0t6nznO+be5Ebk5uU154LmJuk68u74+Pej95P+/QU9B+oGagpaDlcOMQ3gDdYNxQtYCU8GbgKo/+b9FPpn9BrwO+2s6XXm2OV75iPmROWA5QXozOy98QL27PohAcoHjA56FFoZ6x76JGgovCiZKAYpIygGJdEgTByUF18S+AstBej/rPvc9nfxiOsd5p/jQeNS4qzh/+Eo4tziruXv6BzsJPQnAZsHPAPYAksOchhOGYgZZx3tHqQb1hduFhgVeBFXDGUGg/8K+TT0uu/M6SXjl93N2czX2deb2NfYK9p93tjjtej47h73E/8DBqIMIRPBGaYgcyb0KZsr9Sv8KjAp7iZuI1AesxjPEgEMCgXT/sb4RfOf7nbphOTa4d7gc+Dc4AjhxuDY4oXm1+cC6hD0bQO6DPQJ0wR7CvgXTyC+ITEiMSK8IO4eWxwNGXsWOBNLDKkCZvqb9WXypO1J5mze89jU1o7WfNb/1QLXCdtF4JrkdeqQ8/D8nwQLDJUTEBphICsneyx4LrcttCutKFkkOx/3GQkUuAxCBMj7J/Sb7ovr2+ge5OLextyp3W/fbOKh5fLlp+Xe6ajv9/Hl938I+BRbD2oHIxBSHr0ifiRFKcUomSJhH68eZBuwFwwVkQ4sA2b5q/Tp8JfqvOO33jDadNXp0zXWYNdS19Db5eLP5tPrTPZsAA0Hmg5EF2UdEyIOKGEteS6WLHQqCSeGIJkZOxWFEB0Ihv4f9nPuH+nE5m3jH9532ljZkNl927/eQ+Do3h/hVefo6F/t4QGuEmsLMgJ8DQAfuSaIKRos+itBKbkmiyVFJDkiqB6VFmkKqwBW/Yr6ifGl5uDgbN3q2DPXt9Zq0pfQYNeW3rThE+hN8vH5mP8uCd0VAh/2I6wo3CxeLrEu7i6pLNAmMiBeGkgTugoiA0r8b/SR7DnmE+Co2Q3WqdXe1UrX39mj2MjUAdie4Dnir+Rn+OUKmwVD/bwIpxmrIp0qPjCwLjMsUyyWK8UpzSjOJQsduQ9yBDUAi/2J9KLpUeQA4CXZDtba1trU/dLe15Tej+HI5ozxjvr7/zIJPRb6HhwjciipLskx4zGzMGMtLifoIIMcFhefDk8FMPze8lXrq+Ye4sLbn9Wg0VnQndLD1jzX1dJU0mTYnNtB4AL0uAcFBBb5lQFDFfch2CiZLnQwMi3JKvss9i3CKpgmKh92EWAFaQGf/aPzlekk42PcxtUo1FPUvtB7ztDTgdsG4U/oY/H9+EUBww0uG70jIykGMKI2wzhjOKo49TYcMHgn9yAvGnERhwhZ/0j1cOxK5ZHdKdbv0dbPAc5Xzs/Pss10yyXQ4tZd1z/bvvDyBW4BZvcjBeEZgCFFKg02+jTcLUEuqjDvLlAsLShHHp8PVATbAMD81/Ck5Jnemtme0y3RR9Exz/LMpdDR2Eng9+ep7zD1tP7kEC0hNyaEJ8UunjhXPvU/Nj1RNIoq8CYjJh0gsBS0B4D73/KN7lTqL+Ib2B3RXs7vzebPkdFCzmDK+ssdzsTSHOe4/4H/WO8T82YNriMPLHst+CsSKysvwDTPNBMvYCi0IJ0VNAtcBvsB0vZ96NXeXtq617zVgdJizJ7IrMx41W/eA+ay6TrsK/fjCk8bfSAWITknmDIZPCVAij2TNB8stCp0KzUllhgKDPABOvpl9RnxC+lh3QLUmNGx00rWZdaj0YnNf9Eu1qvVhd/C9tP/6/N78fQFDxwyJgAs+i9YLZcrNjJwNTguaie8I64ZCAxTBa4BLfgG61fgatnx1NbSQdHyzI3IJ8ok0QTZKeHW6NTtdPQiA8oULR9RIzQp9DCKNgo7fz0zOa8v3CjqJp4jtBoJDxoF4/x/9GjsLeXf3rTaOdYs0JnPl9NC0ZrN2NJe1p3Tj93c9qgCUPZ/8qQJ5B/TIpMm6DIQNkIynzXKN0owmiqYKWcgMw8QBYkDufua6jLe1dqd1k/P28tfyxnIcsaOzSTXnNzS49HtPPUP/5wQcB/4I+gmvy5mNmM6XjztOlYztyklJbMjgR3bEq8He/t98aftHepk4ezXKdL9z6PQ1tCuzmvOq9HS0rHQ8tik8sQEw/rw8MgCLxxLKTEwYDK3MD01rDw6O+Ez4S/yLNQj2RTJB80Bd/xa8HPhcddz0oLP/M2pyjrEAsT1zaTVxdcr4CTt7fRq/SENDhvBIfQnozBkNzw7mz36OucxICsrKxYpkR63ENYFuP0o9vrtO+Ul3XfWA9CZyrjKE843zKzHVslrzNHNo9zh9yn/ku5F7okKNiQ6LEMuOTKHN3Q8/D/kPws6+jLkLQkkZRTgCh0I4fxo6TXdI9ny0+rN9sl1xVvCbsaCzmnUxtpx5NPsWfQYAmoUGCAJI6snFDL2Oow/sT8QOC4udyxqLgEovBowDkkDwPrn9Pjt2+Sq2h3QR8oyzInPVs16x9jFHMoszBTPFuLl+WL56uvO86sOVCKwK7QxDzPpMVs2bz7KPws5kzGUKcsdsRO7D84INvh8533f1Nsd14DSEc3dxlTGZsyH0aPW9uBX6X3r0/IDBocYJR6RHsAlRjCbN0M8LDsOMgwqQyrkKvciVBWHCfcBCv0D+KzveePl2FbUEdLez3XQwM/yyi3K5M16zZPR2+Z7+5f2Sek29ZUU1CUiJ4Ur5y90L0Q1vz25O0gz/yzXJPMYYQ//CnAFpPer5q/dGNyl2ZvUFs/DyhTKz81w1Ofd+eZY6UrrifmxDq4acRxRH84npTFhOsc+vTl7LnYoNirWKSMhKBTfCEQAM/n/8vbrS+Nr22/U+s0LzfbPVc2gyaHNIs/zy1vW4e349yTwrvFFBvoYax+DJkgxljS7M9I3uTrONq8xbSwgIjIVXA2oCW8Ae/Cc4+TdU9rd1RrR2cwYynrKXc8o14rejuMZ50vvxgDCEjQZgBlkIMoswzayO1U6/jI7LJcrXS0PKVwdhxB3Bov+0/jP81zrod+/1JLO+87F0bDOOchMyLrMz8yMzpffMPXU9vfuuPkODxga+yMqMXszPS9ENHU87johNOovjikkHV4SJw4hCAb6UOvg4qzd1di01SzSk8sjyM3NpdUW2r7fb+Yk66D0nAYnFVQYchkjIg8u0jVKOes3UDAqKdgpQCxrJk0aCA5JA2v8v/nx9Cjqpt1S1NLPodCM0qvO48hNyWvLAMpF0RPp1/pV84/q9/njErIh+ik/LTUrsi7cON08UDiqMkUt/CQMG+8SuwzfA5X2TemV4KrcHtpy1QvPcsqRymbPiNYA3p/jreU/6vr5WQ1rFRQVcBnOIy4utzZ2OXAyuyiJJ7MsUizrItUV6QksAoX/YPxF86bmr9vX1A3TGtUI1BnMnsYFyiHMDM2R3Ovy2/N65q3rqgRPGjokvyWMJPcoVjR8PXU8rTRXL/8rTSRHGgUULw5BAoXz4uhW41jfmdq81CrOk8pcza7TtNnQ3lPhiOMB7nEASQ79ECoRphipJYYwfTVEMxwrByZLKhkvdym6HPIRtwqFBecBh/z98WDkedny1WzX0ddR0znLgMckzAvP/M6+3I3y9fNw55jtMAZiGIUg9yZ6KkcqCS8SORk8+TVUMKMrhSJgGToVzQ5kALrxFOqT5SffjNgT0xrOIsx1zvfRctau3Y3jYuaZ7fz8EQumD6YR1RntJNEsNjH2MaguNCsTK1ErbicZILoXIg+JB9sBDfvP8H/nvOHV2z7W59MY0MHKXss6zbzIQ8sl4GHzQO6f4iruWwl6FxwaUSIxKvgq7jDOOhI6ezPmMQ4vYCUOHOAWExCZA/X16ux65kPfaNnf1CnPUssFzbLRPdZh2tbdg+IV7Kn5jAXxC7UPWhYCIWgr+DDKMPgsZyrtLE4wWyyOIrgaqRRRDQYGUf6F9c7tSOXF2xHY3te80Y/KzMoKyzTGBMnj2/js0OhX4FrtrQQCD8QV8yJSKQYopy5XOek5PDXqM40w+CfNICUcUBOLBYz56vFL6i3haNui2IfSj8vqy5LRkdXm2IHdjuBF5vv0wwPxB3kIMBAmHG4lXCuNLXsrESmWKgouxy3hJwMeuBPIDMoJVwa6/Tzxe+aD4KrdWduQ1tLPAc0GzQ/IzseM2q3uneh02ZDh+/tqD6cXyhkkGZ0f1i/yOrU4vTJDMdIwGC1gJpQeNBaXDL0B9vYt7jXopeLk2rnSqs6zz7XSRtYE2aHYwtkH5Vv1qP1O/uoBEAyCGNsjtyr2KaElRicHL0Uzli8LJ6EdJxdkFRwTyArn/gH01+s959TkOeCL1xHQq87PzWHJKcwR3KrlU9yr1bzjRflYBAEIOA2eFPsbbiWTL4Uz9zGTMZwxbS6+KQYl4x3wE9UJzgBY+BDw3+dF4N/ZGNUr0lbSUtW91rfUCNYS4Z3uO/Mn8tT3TwaTFG0b0hvsHA0j2CqWLx8whizIJmAjESMBIeMaThKXB7L88PZa9Rfwq+Vi3E7YJdf00gTMr83l2WPfE9aez5DadO1997b6cQExClQR0Rl3I64rvzBdMSkwVDBYL5Mr8SYsIPUVhwyFBUD9YPQp7enkVtwX2BzY1Ncl1YbSidOk2cHii+l96z3u4Pc9BQIOIxGAFfAcBiPrJhwqcSsRK2MqLSghJJEfdhq6FCoOQgaP/qb39O/o6bjlw97Q2NnX29EDyCPMa9yj4ajX4dFp2Z3ob/fz/gkCowkxE9wXJx5jKWUwXjD/LlstCSvpKfMnCSKjGMANvQVOARH6o+8o6F3in9sA1xjUZNEh00jYvNj415TeSuiY8BX6lwHvBGwLlhZhIC0oyy01LWsqzix1MLwu9ijIIgod2xbDDw4J0AJd+onwr+hZ4njcD9hH1CnQ9Mx6yuPJbNG13jLjH91p3UTsnf0oBmYLCxS/G6MfniViL9Q0xjHMLY4ucC7hKAohqhmsEroLJgNM+Cjvt+k25FncidTJ0C7TQdbj06/RIdbX3H7k0e4U9VP5pgRtD3oTcBv/JnEsfy7BLwouXC5qMaguPSZgHzgZeRFCCv4CLPtV9I3sxOED2bLWsdaJ0x/Nx8d2yenQRdYt1lHXYORd+msBcvU09qUQLieXJ5Ei4CcqMKMyyjJrM3QxYyw8JagbdBMQD3MJ+P6g8abmseHM31/bitM9zCLL4s8M04/T5tiD4gPpyO269hQDOQ7UFV8c1ySBK/csBS9DNf04KTZwL1snWyF2H98behGIBKT6LfPO6vfho9uQ1lTPZsmAyQvK+sbHyC/Qj87oytzeYf+WBQD4LPnXDEoiozEpNuoyMzLANBY33zqFPKE1tycTGdUPgw4iC5L80euO4ZTZytMN07DQCcn6wzXGScvZ0Z7c3OUf6PrsifwXDvEYyB4oI+0o7zFgO0Y/PTvlM90vCC8LLHglzxtvDhIBMPkL9J/rIuGX1wPN6sNbxLzJmcYjv3rBv8isybTRAe53A5n5L/A3BT0iCC7RMd83gzpdN7c4oUCLQdw34y1DJHQXpA5gC5QBw/De4zLbNNSj0CTN0MWNwI/CZMcAzcbWqeCM5A3pL/ihDaMakhsOHigrjDnDPxA/gjrqNLAz9zWsMs4mCxnWDfkEIv4F98zrONyEzqjIp8isyUrHjL8vvNvDusjOxvPTdPF6/5z13vKVCZIl5TLfNq05qDjKN2xAbUjFQa0zyCowIxUZARF8CY38Ie1p4ejYX9ML0SfMTsSTwW3EYMeLzo/c1+RN4z/nMfstFIIf6R26INgsDzquROdGnzsDLkYtOTTBMnskqxH1Am37Qvi08XnjPtQFywrGdsQdxiTD5r27wBzEscFbyzzo0/zz9l/tJ/yWHPMwMTTWNKM1IDbqPtVJQ0flOREvtCbSHSYXrhA9BPDykORZ3OXX3tMpzr7HVsMTwi3EX8vS1wPiLuM05IPznAy7HFIfjB+CJWgyyUH2R88+zTFdL1g0DjRVKbkYCApQAfz7l/Tl6GfcO9N7zH3G2MS3xVXC77+gxOPG38cZ2bHzJPld7Yf0PxJSJhQqSy/9NUc2FjoORQlHVzuUMC0r1iJIGIARLwr6+xnsUeIs3bfXBdEJy8zGB8TgxG7LYNQ5253fnuOH7ND+7RIRHKQcEyHwLF46OEIyQFQ3XTBIMIYyxi1XIHwSDwnKAJT4o/Bo5pTb8dIEy3TH7slNx8zAzsNoyIDEgMpL5Rj4z+8b7IUBbxcBH0MoYzNKNII0dD03Qos76jOIMHEqHB+AFfoQUwho953qAef54DvWK9Er0DDKjMUfy/TRLtO72KPj8+hW7tH/oBK9GW0cOCO9K0c0qTssPHk1NS+8LfktOyrNIGYUOAiJ/gn5yvS0673e69SC0EXPX85oytzFNMX+xJ7GOtWS6kzvz+Z97FcC0BHuGNYi/iooLBUwnTlLPTo4rDKpLi8okiAGGzkUgghS/I3zmOt247TcaNeN0lrMUskpzwfVKNRU2F7iIeah7B7/dAzvDp0VJiC7Jr0u6TZyNaQvJy+xL60seihzIXcWZw7jCA3/0/Wg8J/nMd2z2ALV2857yzLKx8YUw4jI1Np36ATixNxk7XoBjgj/D2YadB1jIp8wuzgZNa8yRzR7MfYqFiZkIVQZEg4dA2T77/OZ6eLgmtuu0xLNm89J0/DQBNF81trajuGt7gX67f+QBVsNxRdEI6EqZixzLNQtKTHhM+AwqijwIWkecxkpEdIHe/+597TuneRQ3qzbxdVlzgDMisiNw1XM0N+n4sjV7tay6Sj5tQGYCSkNbRCMHdYsKDDeLJ4vejMwMM8smC0mKbwd1RTLDtkF2vtN9HbsAuJM2rDYr9dL1GfShtJq0pHUUdxC5rDtQ/NR+YEANgpnFkcf2CA4IakmiS3sMNAwIC2vJgoiZyBFHBsT+AgOASP6afNH7bbmL9/d2bnVYM6/y5rV5t1r2cDUFNnq4TTtoPbE93f5gAQjEYkZTyBmJBkliCZJKqItFi3yJ1MhcBulFfwP3QnSAeb4tfGZ7Kzoh+W84szejNmL1j/aGeP56P7nq+dz7sb43AJ2CnkM2wzeEUYZgB+bJOYlCSLrHbsc3xzIG6gWYA0cBbEB5/+P+wr2TvF162Dk7eBM5LLoKecZ4tjgDuVD62fw+fOu9XL2gftgBnsPnBHWEIARlxOuF/ob6RrEFdwSuxAmDDcI/wV1Abj6o/WN8kzwgu/M7fbnJuJt4//pnO5y7n3sH+5c9Z794gKHBtgJRAy6D4wVNhujHXwcOxrcGa8akhnCFVsQkQqNBocEPwHd+6r3SvQe7x3s5u4q8LPqKudi66TvDvD68WD13/Wo9gz8KAKIBDkFxgY1CWQMqg87EWAQvg5BDr8NmwrvBsMGygYSAlH8W/pe+QX3RPT978TqqOlN7cHvBu7l63TtkPE59Uf4xPtP/+wBBQVJCvYPOBKrEGAPqRHfFVgXcBQbEEcNywuTCrgJqwjsBBj+EPrW/AH/IPoN9eb0gvQV8570FvZ39JPz6vVk+UX8m/1e/qgA/wNgBikIkgmKCTQKpwwlDW0KpAizCGwHTQR0AcT/Kf7B+2z4hfTi8OXvnvIh9K/wwu2j7yTy+fKM9O32c/nf+/n8lf4fA/oGwAfJCCAKfQmcCcELSAzGCigJsQarBTsIRwjhAjQArQOQBckBCv6n/TP9l/uf+0H8SPqw95n4KPsW/Gr82f1I/2AAiQLNBfUHIwjTCO8K/gtFC/8KgwssC4UJJgf+BJYDBgJV/y78K/n29jr27vUc9ITxbPAF8e3xvfKd8zb1/PdM+rH6mfv0/lkCnwPSAyoEtgQ7BVEFOAVhBeAE+wOCBB8EHACV/bgAKgM3AFz9mP3x/an9Xv1m/Jv7ovt3+z77pvvo+3H8qf1c/jT/4QESBE8EwAWXCMkJQwoTDEkNswxiDN4M0wx+DHkLYQi3BPoCfQI/AQD/4vuk+AT3JfcB9+f1yfRn9MD0yPWY96L5a/rx+Rb69vs7/nn/HACnAKkAvwB3AT4B9P9aACICkQHA/iv+dAA1Aeb+Kv1m/Wv9tfxW/E38aPzb/Er98fxs/M78+f3V/p3/DQFwArwCxQK6AwsF6wW9BpUHvwd2B4EHhwftBmIGjQblBiYGFARgAi4CUQJoASEAS//I/ob+c/4C/u797/7R/6H/Qv8h/wn//P4X/6f/FwD6/jn9Of0q/qr9sPwU/ef9iv2P/Eb8mPxI/JL7lfv0+7D7J/v/+gf7EPtI+3r7wfuD/IT9MP7C/nb/TgA2ARICzQKYAzoElwQLBXEFHwVTBHUEpgXmBT8EcQIZAkMCxwEuAcIAjf8W/gP+OP/V/07/Df+J/7r/WQBPAoADswJMAhQDxAOhBO0EAANiAcACRQQLAyMBowDhAKwAAgBh/47+E/3g+9P72vsA+3X6p/ou+jD5Sfne+Xn5IfnU+Y369vrA+2z8h/yV/bb//gBNAS8CBAOGAhUCMgOhBNkEDwQtA3kCCgKsAUABygBrAOb/Df9D/kX+qf7G/hr/gf9K/4T/GgGHAoUCNgKNAoADogRaBHgCJQIZBBwFPwSCAzIDGANTAx8DpwJwAr4B1wC3AEEA//7y/rX/3f5a/VL9wf1w/SL93PzQ+yH7vvtB/ML7kvsO/Fj8lPxG/c/94P1Z/q7/3QAyASAB4QCiAHMAEwBz/xL/yP4b/nn9dP3C/ar9Tv1i/Q/+4f7b/2YAyf9d/x8AiACVALUBQAJQADf/rQELBPwC4wESAzUDxQFoAnQEHgSGAscCtgNSA2sCPwJaAt4BFwHVAMUAYgAmAAkAMP8O/uj9p/7N/v79nf0v/o7+pP4b/1P/Bv+c//8ALgE1APz/jACiAEAAyv8V/23+Bf6j/Wr9lv2B/cn8EPws/C794/2B/TX9lf2c/Yb9lv7K/1j/b/4p/6sAGgHCAOoAOgE+AY4BZALxAt8ClgJ9AmMCUwKZAuoCngLtAYUBfwHAAQIC8wFSASoAPP9R//3/+P9N/+L+z/68/g7/hP9//67/1ACeAR0BwAA0AYABlgG7AUwBTwDO/8n/pf9e/wH/Pf59/WH9k/13/T39Cv22/F78P/xx/BD9ff28/Ln7kPxo/hT/G/+i/5X/O/83AJABcQHSAO8ASAEuARwBcQHQAawBBwHGAB4BLQEkAYQBcwGSAGAALgFMAXUAMAB5AGgAWQC2AIQAxP8dAM0BqwIIApsBEwJjAqQCKwMWAw0CcwGlAWoBlABKAFoAvf8A/wr//v4x/p/9t/17/fj8LP20/WD9d/xV/Gn9Yf6d/sf+9P6n/sb+x/9ZAP3/5/8/AGQAgwCxAE0Aw//4/zYApf8P/03/AgBHAND/Jv8z/7n/5f+W/z//Ov/I/0oAy//x/iz/LwDVAOkAxwDCAHgBdQKPAgQCUAIUAw8D0gL4ApIC0QEMAsACWwJjAd0AfADV/1D/Lf8y/+H+If6D/XP9lP3J/Qj+9f2Z/bn9YP7g/hL/j//x/7X/h//+/3AAcACIAIsA5f+K/zgA+wDoAC4An/8+/yr/av+N/1L/8/6n/nX+hf6F/oT+5/5s/0X/xf7a/pH/FQAuAEoAoADGALAA2QCMAU0CjwIlAoUBhQF8Aj4DqwKXAWUBowF6AWkBdQG9ANX/rf/H/57/tf/B/zD/mP6t/gD/Qv9//4//YP9K/2z/m/+k/7D/+P8WALX/aP/F/z4AFwDg/+7/0v++/zIAjAA+AN//4f/c/7f/lf95/13/RP8s/w7/3f69/q7+rv7Z/lz/u/99/zb/v/+FAH0A/f8dAKwAEwFIAScBxQDpAG4BUAHCALEAzwCIADcARgBDAB4AEAAXAO//oP9S/yX/RP+y//f/tf9a/4r/6P/p/+7/QgB6AIcApACTAGwArQAHAfIApwCkAKMAiABvAFQAGgD3/+D/jf81/yL/L/8f/9n+gf5U/of+9v49/wX/rP7E/nD/DQAzADkAhwDHAMwA+ABBATABHQFDASwBwgCsAMwApwBrAGcAIgCN/zz/XP9o/0T/QP9e/1H/Qf9v/67/pv+2/wkAGADo//z/TwB6AH8AigCMAJ4A3gAgAQ0BwwCPAIcAigCEAE8AAADM/8n/vv+H/0n/Mf8o/yH/JP8Z//L+HP+O/7T/gf+n/zMAfABpAHMAkwCMAKAA+QDmAE0A/f8yADYA4f++/9T/oP9E/yr/HP/j/vf+I//V/nv+5f56/4T/ZP+A/5b/u/80AHkAQgBZAOUAEQHNAMgACAEUAQoBDgH6AOEA3QDUALAAewA3ABkAIwADAJv/Vf9X/3r/m/+j/4f/e/99/3X/lf/5/xkABQAvAHgAcwBuAK4A0ACbAJwA0gCoABUA1//y/9X/n/+k/5r/R/8H/xv/O/8K//T+5f6r/oz+2f4g/zb/af+2//b/DAArAF0AlwCvAKYAqADSAPIA9QAKARkB7wDAAMUAwQBzAB4ACwAaAAYA2P+c/3j/Zf9i/2L/X/9q/5n/s/+R/4L/uv8LAD0AVgCDALEAyQDZAPUABgHrAL8AkgBXACwAIgASANz/nP9p/1P/SP88/yD/4/68/sn+1f7N/tL+//5A/33/lf9u/0T/h//5/ysAIgAiAC4ANgBWAIIAiwCMAK0A0wDXAMoAngBlADQALgA2AB0ABAAQAAcA0/+2/6f/kf+h/+T/DAADAAQAHwA3AFkAjACnAKEAsgDNAMMAxgDaANMA0AD1AB4BSQFPAY8B6wFBAT3/c/0X/KD65vlI+tX6vfvO/bT/JAGhAsQDUAR1BHYEtAMoApMAb//N/kz+1/3O/Y/+Hf9u/y0A1gAuAYABpwFBAZQA7//E/oX9J/wa+wL7Vvuf/IT+3wBjA5YFtAcBCREJighRBzQFZgLQ/yz9Q/pn+CH3n/bv9jP4H/qP/Bj/3wC5Ai8ExQS5BIsEPwRjA0UCaQGSAIn/pf48/jn+bP4N/8n/aADkAEcBcAHlAA8ARf9W/pz9EP27/Jv81vw8/cb9nv5p/xcA7ACcAYoBdAGfAWIBHgEqASIB5gDZAN0AkwApABkAKAD9/yUAJQDj/+f/2P+w/8D/2//7/xYABgC+/7b/9P/q/+T/AQAOANj/IwDOAN4A6gBOAXEBXwGRAa8BoAGhAXMBIAG5ADwAyf9b/wL/n/4v/hP+Nv42/mf+5P48/5X/3P/s/ykAgQBmADcA///h/+7/2f8TAEwAagDMAC0BGwH4ABoBPQFoAWkBWwEZAe4AxAAUAEX/Z/7x/fT9uP3B/eb9Bv4o/jb+Fv6O/jj/N/+q/0sAaQBMADkAOQASAPj/x/9l/1D/Xf9y/1L/Nf9d/zz/Xf97/2L/c/9X/2//Vf8i/2z/hP/w/2UA5wBrAVUBewGrAWgB8QDsALUACgCs/2T/Wv+Z/1IADQGnAZECPgNrAz0DSAMjA3sC0wEPAXMA6P9i/83+9v0w/cf8evxD/Dj8g/xH/Tf+hv8EASsCZQNwBLcErQQHBIMD4AKUAXkAW/+d/v79pf1E/dj8Mv27/f392P04/rj+0f66/qX+If9d/3z/rP8ZAIQAlQDMAMsAagBVAI4AigB3ALsA7wCfAAgAg/8C/27+6v1e/SX9b/1Y/q3/0ADkAcgCQAMdA6gC9QH1APb/U//v/lj+Av4g/or+/f6A/ykAmQDJAM0AqgBbAPT/YP9H/0v/Bf/v/k//uP/z/y0AOgBpAFcA3v98/0T/G/8i/4f/CwBlAOsAjQG4AX8BXwEhAWwAuv8d/4X+Kv5U/pj++P5d/9b/ggD5ABEBIgE4AQIB8AC+ACwAxv/H//T/VQB9AF8AtgAcAesAiABLAO//uv+I/yX/H/9C/4b/OgCAAFsAzAADAWwAGAC6///+n/7Q/gf/Ff9K/7n/DQDm/9D/x/+i/4D/av9W/zT/n/82AF8ALgATAHIABgEuARUBDwHzAMQAQgBs/9X+Sv+i/y7/Mv8SAMoA4wA/AVEBcgEXAl8CRQLrAUYB4gAKAFv+mv13/QX9a/1j/i//JAA3AZMBWQFTAboA0/86/zX+Rf3o/DP81/tP/Jb8N/1H/rD+q/7l/vL+cf7U/V/9Fv3n/H/8ffwe/Tv9Y/1r/tr+8f7N/6YAQwHdATIC2gKWA08D5AJBA6wD3wOXBG8FAQY/B7AI+wgACSQJhQiiB5cGFgX4A8MCYwFuAD3/fP6i/nj+Ev5Q/nP+IP4p/uf9Kv24/G38Dfxc+7362/pG+9/6U/qJ+qH6N/rr+cv5K/lU+Nn3F/fs9T/1MvVq9dn1tvak+Pf7dP9yAnIFpghmC1INfw6UDqAOYg+VDuMLAApSCEcFZALH/6v8vPp++v75vvkE++j89v4OAV8CwAO8BfoGkAfKCHYJPQnLCX4KuAmQCBEIIAeZBQUEFgL3/3D+EP13+0H6mPlV+S35sfj29+L3BPhQ97H2wPYN9uv0hPSV863xq/D57y7uGe017cPtQPBO9J33qvuuAsEJhw5aEsYVChjKGJcWBhInDnoKtgT1/ln7R/i19TX1ovUJ9sD3efrK/Ej/fgJxBfMH2gp/DVwPShH9Ev0TGRWpFe0UvBN5EpEQ8Q1BCmcGVwMFADP8fvkk+Af3bvbo9uv32/j/+Rb7t/uG/Ov8a/wm/I77vPla+Pz2JvSa8RvwLu567IvrSeoS6YPo4eft52jpzuo87Ub0V/yOAHkGwhCcF+wZrByQHsIdJxsvFkQQlAtZBs3/uvor99LzLPIV8nvx3fGX9D33Rfkf/b4BuQVACoEOFhJBFkAZKRr8GhsbJhmzFvgT2g+xC0UIcAS9APP9ufts+tP5oviV9+z3ufdb9nL2pve097b3DfmH+rf7/Pwv/kr/1/8P/5v9vPsI+MDyAe5N6aHjId/i3C/bktp13Gfgbueq8aD6SAIJDYMXTR0cIXsjryImINUbXBS9DFcHawLk/Hr4DfaJ9ZH1kPQf9Lr1d/du+PH6n/9ABPgIRQ/eFe0aBx+kIo4kXiOBH0kb+xbFEMUJzAR5Acr9fPrb+Bv4HvdD9iX2Jva09WD1y/Vl9qn2bPeu+Gj5Avo3+7/7X/sL+wH6jved9GPxvO026vDmQ+Sm4hrhqN/o3yrhweGu40Tq0PO5+xUDjA23GCsgRyRaJiUm5SPoHocWng2ABikAxvmX9Hrx7/C/8nb0tvUO+aX9kAD5Au0G7wqDDqASWxYMGa0bkR1lHbcbZBlWFpISLA6SCSMGUgRzAjsAff/p/5L/Xf7D/Ij6Pvgj9nzzaPFX8SXypfKS8x71q/Y/90v2/fTu8wTyje/D7XHsNutY6t3pj+kM6YTo/Ofr5uzl8OVx54PsS/SH+u0A+AqPFPUZNR1hHwwgtx5wGicURw63CTsFMgDu+2P6VPui/F392f6rAVUEGAbhB8wJ4QtBDjUQaBGREr8TkBSuFM4TChOsEyQU2xJdEfsQSBC9DfEJSAbgAq7+y/k79Yjxk+4l7IDqGOoA65jsBO7g7v/vgPEU8qXxavGc8drxFPIa8l/y/vL68nPyz/E28EntJOpF54nkHeJJ4SrkIetX8xT7EAS1DiMYVR3/HpUfWB/SHCAYEhNID/kMrwrZB6AFGAVwBc0EAgNrAfQAWwGnAW4CuwQDCEQLTA6uEIYS3BMmFBgUgRTiFFgV5hWwFRoVZhTMESMNIQjqAgP9/fak8bHtdevZ6YPo2+ik6gjslewB7Urteu2Z7UjtfO0v7yXxcPLz86v1svb/9ln2svSW8tbv8+sf6I7lhONZ4vvkouvg8nb5DgHuCYgRsxUyFy0YVhnOGQAZlRcWF68X5xckFiUTIxGlDwgN4ghcBLkBAwERACj/dwBQAxgGOQjdCUsLhgyrDSMPshBVEnsUYhYiFwAXLBbFEyEPgQmXBLT/lPkf9Cnx5e7d66XpDukZ6fHocOjU59fnL+gP6C3orOn96xbuO/D18uz1Dfia+CD4GvdA9YfyNe8j6yHnCuU85MXjiOY17tH2Uf0LBBAMhxLVFQ8XNhjEGmAdsR0GHcMdgR7OHOQYcRSxEHMNgQkxBbICYAJiAogCJgNLBB8GgQeXByoIoQm6Cg8MYQ6FEFkSbxSFFbAUJxOcEYQO5QnIBVwCNv6i+cT1E/P08BfugOsJ62XrU+q86PPnWedm5oHlGeXy5Rfoj+qY7SjxYvNz9Jn1O/Wb8lbwtO7B68rokOff5m7m6Oji7yv44/2PAqQJOxGmFFYV3BeAG2IdWR6LHxsgFyDwHl4bnhasEisP3AtGCU4HkQbcB5gIVQeiBj8Hcga7BCMERwR7BCwFnwb1CMELSw51EMkRABKPEUwQig28CTUG2gJO/iT5uPWT87Twgu0K7EvsIew86ivoqudW5y/lzuLT4qzkjuab6I7rCO/U8d7yafKD8UfwZO5E7D7qqei/507nTugc7Sb18/ukAE0H5Q/cFEIVKxaUGdUcqR6/H7UgICKcItUf2hpsFm8TixGCD7EMZQuwDDANAwpNBjMF1QQXA9IBfQLtA8UEigX8BtkIXgrmC6YNfw6XDtsO5Q1zCp4GhgNr/x76IvY+9PzybPEZ8K7vNO8t7f/pWuf85Cji5t9j36vgLOPz5enoZexO7z7w8e/X75/vnO6a7SftXuxe6yHrROvp68LvIfhsAL8FFAxeFDQZfRmpGf4bqx7BHwIgyCCOIeogDx5hGV0VpBOZEp4QWw54DSANCwvMBuAC2gAuAKz/wP+uAYoECgZwBiQHwgceCDAJdgooCz4MjA3CDHEJkwXsAdX90vnt9lz1UPTt8nTxzO/w7ALpUuWD4nXgKd8N38jgvuMH5qDnr+lZ68zrC+yo7H/te+4173zva+/b7h7u/O0s7nrvLPWq/pwF7gkQEZwYPRu2Gr8a8BvFHUsfih/HH2Mh2CESH2Eb8BghF+oUFRIjD18NCwzvCJQE4QFlATQB6v+K/+EBfwR9BL4DYARLBYcFsQVEBlYHxAhICdMHsgRGASX+5/rG90j1yfM481jyHvBe7afqiueF5J7iguFs4eni9OSW5kros+mE6pXrXuxb7EPtRe9A8FfwKPHh8Tnx3u8I7/fuO/CR9Dz9ewZyDDkScxmyHPgaYBpeHK4dfB7vH74gqiG3IikgMxujGIwX/hTqEYoPTQ5XDVgK4wVWA5UCIwEz/+3+ZgCfAYMBFQFVAQcCkgLaAi8DBARUBQUG/QQVAw4BgP6J+/74//aK9TX0ffLW8EDvgOxl6SDn+uQU42bigeJO4wrlt+YY6EPqi+yV7aXup/AO8q7yx/Nz9Hvza/Lu8ejwcvCR8l746QC3By4MmREIF58ZDxr7GZUanxy8HsUeNR72H3AhUB9XHCgbxBkcFxgUQBFUD/wNhQumB38EXAOaAigA7v3I/cj9cvxe+/763fqH+5X82fxS/fr+GQB1/2n+yv3h/If7Dvpt+aD5L/mm9732ifad9PbwsO1o607pk+cT5iXlMeYF6GLoDukE6+jrPewm7l3wG/L683H1+vUT9pD1tvQr9Un2dPjH/m4FoAe/Cr8Q7hJoEtcT2BQRFmkaLB3NHMwexCFuIdgfdB4rHYodHx45HEYasxmSFxoTew7fCSEFQwEU/if7Cfkc93f00vL88TzwIu997/DvKPGX83b13fau+J35TfnR+Cn58Pqe/Ar9tv3B/vb9rvq+9srz9fC87SDstex37dztcO557lXuQO/e8GTyJPSp9iT5Y/oq+gr60vrx+vT5bPqd/RQB+gI/BM8FiwfHCHsISQcaCOUKHg3yDZgOjxB5E+IUbhTJFAQXLxnHGVYZRhntGc8ZXxcKFOoRDRDTDFAJkAZuAzwAfP0A+k/2Y/Rp8ynyg/Ha8XLypvJb8hvyR/KM8k3zDPVC99z43/n2+kz7HPq0+HP3pvVU9KPzhfJS8v7yYvL38XrzCvW19nX5qfsD/Z3+VP/J/nj+0P5i/6T/+f58/tr/VAGcAMT/xgAmAuACyAMmBLME0wZ+CM0IeApTDf0O6A/7EOMRahOEFfoVeRUGFkcW5xRWEm0PSw10C2MIHQUxAx8BNP6O+x354Pad9U71y/RA9Mj0ePXD9KHzqfNQ9H/00PRP9n74/PmZ+kD7JPyA/P37Avs9+rX51PgD+Kr3w/dr+Cj5rvnE+n78jP2Z/XH9gP2a/VH9bvwx/Fv97v3t/OP8zP7i/1z/i//YACsC5QJFA8ED2AQsBjUHIQgcCXIKMgzODAIMWgwGDuQO7Q4EDx0PjA9wD0YNFQt1CpwJyAcKBp8EHgNHAc/+OPwk+rb4Ofhm+Ar4vvdf+EL4ePbl9I/0ivR89Mf0wfWK98f4hPj09/b3sPfe9of25PZ29y/4xPgc+Sn6cPvH+1T8Tf5OAHgBbgI/AwIEuwRmBMoDjQRTBe0ExQR3BfYF1wVtBS4FagU6BXwEZgTjBC0FywXdBoMH1AdgCHgIogfpBgAHOwcWB6oGQAYQBv8FHAWJA90CNQPMAn8BwAB+AJH/Fv6//NP7SPvT+oz6mvpr+tn5vPmm+Y/4WfdM98z37vcI+FD4jvjf+PL4ZvjR99X3Ofh/+J34+Pjc+fj6u/sd/LH8ev0u/uP+d//k/8AA3QE9Ag0CUwIfA6oDdgNnA28EnQXLBekFfQa7Bl4GLwZIBj8GRwbPBnQHsge8B/sH+QdNB7sGvga0Bi8GxwXGBa4FdQXuBDEEHQSWBPUDSQJHAeEAwf8E/qT85Ptp+/L6jPoU+lT5wfjD+IT4XPfD9rT3kPiH+Lv4W/mz+an5I/lc+AX4Vfjb+Gz5C/qf+pb7+/wB/nP+Ov9NABgBrAESAncCVgM7BGAEKwR+BNoEbgShA3cDEQSEBDUE0AMIBCAESwOZAo0CXgISAmsC5wIOA0QDZQMnA9gChgIWAr0BvAHoARkCnQJtA+cDDgRWBL4EfwRAAyYC6QGDAWYAmP+m/4r/r/6W/ab8yvsx+6v6AfqP+V35U/k++db4Sfgg+Dn4FPjT97/38/dS+Nj4Yvnw+c369vsR/ez90v7R/7MAjgE8ArwCgQOZBEgFlAXQBQ0GNAbyBU0FAQVmBagFSQUYBSUFygQXBIgD0QIOArUBwQHWAeoBCQITAr8BIQGJAPL/YP8w/4v/AACHAGQBEgI2AjUCQwLoASABgABJADAA9f/M/9n/tf8h/2D+wv03/cH8mPxZ/Ob7p/u2+5D7BPt4+h363fl0+e34hPif+AL5T/mR+Sn6/Pq4+1D83/ys/cn+xv92AEMBUAI+A9gDaQS8BNUECwVKBT4F0ARXBGME0gTPBEQE6wP4A8ADPwPGAj8C1wHvARcC3gGEAWcBcAFDAZ4Ax/89/z3/mP/W//X/fQBKAZoBMwG/AJUAUgDc/5P/o/+a/2v/X/8y/67+/f1k/RT90/yM/Hb8mfyA/Cj8+Pvf+5n7K/vs+un6+frU+qv63vpQ+3j7d/vZ+2n82fw1/bj9c/4//9//ZwBtAZYCLgOHA1EEDgVLBUwFhQW6BZQFPwUsBWMFTwXBBF4EXQQ4BI4D4wKgAqYClQJNAu0BowFnAR8BwAAvAIL/GP80/23/Zf9T/4P/zv+n//P+S/4y/kT++P2a/a795/21/TL97/zy/Nj8i/xa/IP8svy4/Nn8If1J/Tf9If0b/Rz99vzN/OT8H/0q/RP9KP1h/Xn9cP2D/bn9+P1a/u7+pP9WAA8B0AF9AucCSQPFAxsEKARVBKMEqwRiBEoEcwR+BCAExAO5A7gDaQMeAywDXgNgAzcDHQP/AtMCmAIoAo0B9wCfAHgAQQDa/4X/ef9y//3+M/6N/WH9Zv0r/dj83vwq/TP91PyI/JX8u/y5/Jn8m/zn/G/97/01/k/+dP6g/qn+hv5N/iH+Iv5S/nz+bf5K/mz+s/7T/rv+pv6//g7/c//A//n/XwAOAbYBBwI+ApIC+wIvA0EDTwNVAy8DFAMFA/YC1AK6ArICsgKvAoUCVgJuAqACpgJ2Aj0CLQIrAuQBZgHxAJ4AWQAKALX/d/9V/0z/Pv8k/x7/Lf8h/9z+df4e/t79n/1L/fP8yfzU/Or8H/1m/aj94v36/Qn+NP5V/ob+7f5M/9L/HwAhAFQAjwCmAI0AUQAyAB4AFAAIAPj//v/b//X/NQAOAEkAmACXAD4BcQE9AeECxgTQAq3+zv6lAokEmwLJAS4D5AFo/+/94f4CAF8A3wEeARUBWAC0/hcAzADkAN4A9AA3APP+If+//ir/af+D/hAAIQJ0A2AC/P+sAQMBIf4y/ir+Zv6E/u79mv3c/Zf9hfsT/Cr9p/uB/Gn+Tf58/+sB+gEHAhkD5wE9Ak8CkgCnAgoESQIBAWL/4f5j/zT/oQATAGj+Af6K/bD/sAD2/xQAYwApAfEBhALMAvgDCQOKABkB1gHh/x3/Rf49/XX9hvyb/b7+tf2g/jQAJgHUAfAB9gJfBMQDTAJEAscC8gIDA2AD+gM6A8QBogHlAewBNwHV/8f+8/1b/qn98/sA/P37Nfvk+lz7kvzQ/YD+w/1U/Sr/FwBrAP0ALQFQAlUCHQFyASQC5QGQAL7/y/+C/+r+Zv3x/DH+zv5D/j791fzn/Ar9d/1o/Y79gv+0ALb/eP9aAB8B0wHEAc4AKwFkAjQBPgAeAQgCpgJ8A2wEuAM9AysDqgJGA1kEBQQ5A5ACdQE9AKH/4P7i/br+K/4Q/Ej8tPyh+wf8af0x/fr77fvQ/Df8bvx3/UH+mf+WABcBcADUAOkB4gDAAGQB7AGLAlcBEwHHAWMBNAEOAagCRwQ/BJcE5QS7BD0EVwOcApsCVgJ4AocBPAAZAFj+tf07/eP7s/vF+s75GPjN9k/3hfYo9gP3vviM+7j8Fv3R/a/+A/9c/4IBuQLiAiIF8wbVBr0GBwdzByAIMQjPCPMJSgniCLUHgQWuBIQEeATnA3cDVwOzANX9wfz0+hX69/ga91T2f/X29NrzpvLs8yT1+/R69az1q/XF9tz3zPcU+QD/YQQzBCwDZgM0BTAH2AZUBxcJKApiCfEGRQXhA4ED4QR/BIkDZgQlBHQAr/xl/Vj/SP/a//8APAH5AJcApQAbAXYCYAThBL8E1gRmBAwDPAI8AtsCpwNvA9gCiAHe/8P+e/1V/Ir7Gfqw+LT3SfYn9MXyG/N58s3xLvXj+iX9n/tH+9L8Jf54ALUEQQj/CdsKlgrwB+wElQWwBy4G3ANBBKADtf8I/EX6qfiR98r3gvcc9iL27/cp+Z/5X/vl/k8DZgZlB9kIGQzoDWkNzw3tDowPig/HDoMNDwxlCq4HWwSmAV//kPwQ+W31DPLl7hfsL+nG5jDm8uUf5ernEvA89r32Jvf4+okAuQXCCbQN4hKVF0gYmRUnE/oS9RIuEF4NkQyHCQkDUf2R+OPywu9L8Fzw0+6F7R3tc+157rLwEPUX+ln+YgIiBnEJoQy7DykS4BM+FSoWQBaPFUYU7REoD1oMpgk7Bo4BMv2B+Xf1M/Ge7ejqe+gE5uvjbeJi4jPj4uTB66X2XPt1+LD5AAJeCVINzhFyF98b5R0qHNkXmhbbGFYXshF2Dl8MfwfeAIH5ZPIS7i3tmeyI6v3nS+ec6M7qE+0Z8J/16vtSATsFFQgsDFoR+xQBF+0YIBnhF60XXRbuETUNjgqrB3IDO/49+Nvz5vCV7CDoceV94pneE9143sTef+A+6k32jfiK9GX4qgJTCdkNoRVeHbch1SMoIuMc8RqVHTsdAhiAE8wQHAwIBAT6TPEt7RXtYOuh5m/j9+Ln4qfj6uVR6WzvY/dc/SoBbQV4C1cSKxfgGVEc9B3pHYMdPhzvGFsVHhNpEEMLTgRi/YP3IfJL7KnmV+Ka3pTaTtfo1VjV/9W43CnpkfB+76/xOPy9BCUIfw+4GociASjJKl4n1CSDJ40mtx86GyMZoBPVCuoAJ/c18W/v7OvV5fvgSd5S3XzdSt694dzolvBV9r36t/97BgoOfhSJGUUdJx/PHwAgXh/GHU8bmRnyF3QT1Av/A4j9i/ek8ffrsOZ64dXbv9Z+07HQZNDx2NLk7eVL4TTnHPOu+ZAA6AshFq8eLyXOJKYhviOXKK4payb8IpEfNxkkD50Dtvre9mf12fDI6SXk1N/D2/LZNtt93hnkAuvs7+LytPfU/1QJ9hGiGFkd0CA3I2gkJiTpIgMiOiLwIAQb4hK0DEcHzf/k97/xI+x55WLeb9hA04HO0c461x/fDd0u2d/f++my7dz0XQTPD/gVER7ZIf0fviOiKqQq0idpJ8AkQR7hFTgMQwQcAE38xvV87tDnqeGH3YncR9223njhkuV+6Q3tB/KY+bACnwpZEBEVLhn9G2YeoCDfIYgiDCNuIcQbgRR+D8YLeAZeAN765/M366TjFd1T1gnSY9RU2wjeq9l52NfebuQC6hz2KgKjCHsQfxnAG4EcHSLlJtEnxSeZJsEiOx3/FasNSAf2ApD9V/aa7yXps+I139Deq96E3ybip+QD513ryfL6+wkE0AnvDqATLBeMGhoe/iB/IzwlbSSmIFAbCBbjEZwNsgd3AcL7sfQe7GXkCN3B1RTV/dvx3dnWPdSe2mjfjePp7fr4DAHtCZURxhRgGBQfFSW0J/cnbSe/JWIhwxo0FB8P0ApdBR3+//UL7r7mvuHh3+beXd0I3jLhFOOH5B/qP/M/+xMBhgYBDAQR4hWmGo4ecCGqI7ckpCMKIMwbrhgFFdYO3QdiAdb59PEu62Ljy9qf1yTbwdxy15rSrdQ52jrgjOgU8tf6uwPpCj4OABIGGrAhqyTHJVwngCfzJFAgmhrgFagS2A1lBSv81fSa7oHopuMA4Srgd+AC4QnhPeLU58HwC/ig/LkBaghLDvkRExVkGWseqiHnIYAgPh7oGqYX2RMSDmsIqAOu/Cb0oOy65IvdCdwl3qHc/tch1nfYC9yx4D3ovfGD+t4BmgddC1EPuRUvHPcfXiIvJA4k6SFMHn0ZcRXsEg0PaAiaAPz4dPK07bjpiOaX5QvmFOaC5c7lZemQ8Mf39vzbAdMGzAqaDrIS9xXLGGEcTx+6HsoawhYsFO4R0Q46CnQE0P0q9hDupubw4PXf4eNe5J7dldgJ2kTd7eEx6tXyOfpvAYQFPQVJB30OLBXAGMMb+h3JHecbfxjlE0gR+hCJDr8HHQCD+rT1x/Ap7eDrSOzU7EvsEOus6x7w2vab/AcBhQUICk0Nbw+RETgUVhdfGtwbVRo7FhUSlg9UDZAJAgXEAEv8Ffe+8FXo7+G446voKedz4rvhFePr5Ejp0u5H9SP++ARRBSkFWAmoDfUP2hFsE9MUoBWHE8MOOQvkCU8IeQRz/377Vfgk9BjwnO4m76bvoe+k73LwlfPV+GL9ggBtBJ0Jxg00EFAS4hQ9F6sYkBndGU0YORVuEp4PmgsfB/oC1P6X+rz1XO8J6kvpdupT6GTk3uJl42XkWeeR7CnyePf5+4f+7gDYBTALnw3sDkkRPRMKE0IRuA5vDBwLhgktBjwCG/+m+xf3lfMh8gXxru8+7+LvSPFi9Or4iPyZ/i4BRAUPCXsLEA6LEQEUUBSkE28TkxO4EjMQFg1/CiEIJQX+APj7Tfgn9gryk+xm65Ttl+yz6YnpFeqi6mnumvPk9jf6fP4OAnoFSAnXCygNbg6QDyYQGBAEDy0Nigv5CUcHkQMUAEj9TfqO9wb24PQC82jx1fC58OfxvPVw+tz85/2LAEkE5AYdCX8MoA8REXkRlBF3EUIRhRDGDnkMAwqtB2kFzwGl/JH4Tvb78rjuwOzl7NLraOl36BXqQuwq7kfxrPVX+Yv8dQA7BHQHngrSDFANcQ1zDqQPkw/WDfALzAqQCHEE8gDs/q38Pvob+WL4kfZ/9HHzJPMq9MH3Dfw9/kD/EwFVA2wFHwhPC/YNjA/mDy8P0w4zD5gOcAxBCrgIEQcrBMn/UvtZ+O31jfIu77Dtle3O7EXr5urq617t0u+A88/2C/oO/iMBfwLIBPIHXAnwCbULdQ1LDqwOww2YC/wJ1Ah1Bn0DYgFk/738j/qF+Yf4qfdR99j25fY++RD9tP/VAHUC2QTNBjQIQwrnDGkOGw6dDfwN3Q0ZDJsJmwdGBgMFfgIL/sn5CvfC82PvMe307S/ukewP65Xqm+uN7hfyBvWu+Nn8Df/E/9wBQAWPB4MImAkNC20MFw0vDPMJgQihCB4IZwWUAukAtP7I+zv6Ffr6+aP55fg/+Iv53fy8//EANAKaBP4GPAhrCYALbg3mDb8N5A28Dc8MRQsfCcgG4ATCAmH/afup9+Dzj/BB76XvYe9+7bPrO+sH7EPu9PGk9Sj5KP0TAOsAhwLoBTYIBQlYCqcL6wu0C5sKCwgYBu4FngWgA1MBm/92/d76YPll+cz5x/lf+Sz5A/pG/M/+TwBQAV4DNwbuB88IdgpmDDcNkg0zDocOBA6aDHgKPgg9BigEfgEV/lD6nfal8pXvd+928L7u5Osk65/rQ+y47nDy4vUz+gD/IAHLAXwEyAe/CcALhQ1uDZwM4wvjCTMHJAYjBqgE8QGE/0v91voT+Yn4xfge+XD5+PmA+kr7EP1Q/wUB3gJ1BVYHDwjzCG4KtguJDPEMDg3FDHYLUAk6BzoFGQP6AKz+4fuK+Hn01PDh7w7xkPAv7ivt0O107iXw//Lh9Z/54/0dADUBrAOOBpwIhwpCDCINPw29DEELEgljB5oGfQVRA/wAzv40/O/5Cfnv+IX4XPjP+EH5H/pb/OP+ogCVAjUFHwcmCEEJlAqyC2EMxgwYDecMiQtVCSYHEAXzAtoAqf5D/ID5yvXd8dPvpu+u7g3tE+0e7nzu7O/88qf1O/gF/Fz/cwEEBPgGnwiiCTULlAwEDTEN0QwyC4AJcAixBvsD1QE0ACT+ZvxS+xr6o/i690L3QPel+Gr72f0t/1gARAJcBCMG3QfDCWsLPQxhDIUMWgwlC2kJ5gdCBlQEGQI7/y78wfkM95TzGPF38L3vMO6B7T7uOe+R8MbyRvXD98v6zf0iAHICLAU0BxYI2wgZCiALjQtbC+IKeAq0Cb8HFwUFA3oB4f+T/nn9I/yo+nb5gvhS+NX5jvyW/nb/dgA4AuEDRgXyBvcIvgqNC3MLNgssC7kKtQmfCGMH3wXSA98Alv0v+zr5YvZJ84bxhPAA76vtvu3F7kHwH/IY9OT1RfhM+/T9LADRAnIF2AY9B9wH2giaCaMJQgkDCesI6we5BW8DygF+AFv/Zv5p/Sr89Pr6+UX5vfnU+1j+rP8nACwBsAIxBNMFxAeECZQK0QqHCksKdgqJCvMJ0wiQB+kFnwPlAFP+PPwH+uX2CPS+8tfx2O9G7oDueO838E7x6fIp9Tr4Svtq/Wj/5wEQBEYFUga9BzkJUAqMCvkJgwlICRwIAQZYBFoDGAKNAFT/D/6z/KT7tfrv+X76f/xB/s3+U/+MAOkBKgO+BIwGGgj2CPsIsQi9CPIIxggPCCYHVQZvBcwDkgGf//n9ovvY+On22fWG9BLzQ/L48ZTxhPEr8qnzzfUQ+Mv5G/uc/Lv+7AB1Ap4DKgXtBgYIeAiuCN0I2wg8CMYGHQXMA00CnQBd/4z+if18/KT79/oF+wP8+fxw/Tf+xP9FAVICagMCBYUGXAefB/IHggixCA8IYAcmB8wGyQVsBPwCZQGM/4b90/vC+rX5N/i89qL1WvRK80zzX/TK9Sr36/cd+O/4tPov/GD9Lv8+AcQC3gPRBMEF2Qa0B6oHzQbsBT4FSwQZAykCggGVAHv/hv4I/h3+jv7W/gj/nf92AP8AZwEcAiUDEASEBHsEsQRVBaEFWQU4BT4FEAXBBFAEogMJAz4CsQAS/zH+fP1i/Dn7Ufpe+V/4efcI9zj3xfcI+N/33/eY+I/5Rfod+6T8Sf5//2kAawFzAmoDAwQPBAsEIwTYAyYDkwIUAmYBrwDL/7j+O/6d/uj+1f4J/7D/LACBABcBFwLoAkEDgAPeAywEcQS9BPYEFQVOBV8F9gRwBCwE/ANTAwgC0wA+ALH/eP5U/bH8Hfx5+wT7ffpV+tf65voT+v359vqU+5n7EPzh/LH9cv4R/1X/0/+dAMQAUwBRANgA7ABxAD8AZQBdAB8Asf9X/1n/5f9wAHYAZAB8AHQAXwCpAB4BmwEoArICBQNGA3IDqwMPBH0EswT8BDMF3gRLBP8DnAO0Ao0BywCTAEIAOf9B/u79lf2u/Bn8e/ww/WH9Qv1R/Z/95v0E/vj9If6b/h//Ov/8/qz+iv5f/tv9Mv3z/O/8rPxs/Jn88Pzl/Kb8qfzv/I39hP5r//3/jwA4AbYB8gFUAgADiQOZA6ED6gMhBO8DrAObA6wDpgNuAyQDDgP0AjQC+gBSAGMAOwB3//f+3P6S/hn+8/05/uX+rv8VADYAqQAuAToBFgE3AVIBMQH9ALAAQwDW/0T/bv6Y/QX9bvzC+1v7X/ts+0n7EPsZ+537g/x7/Vj+Cf+o/zgAogD8AIQBNQLTAh0DLAMYAwoD/QL4Au4C4wLMAo8CLgLNAXQBBAF3ACgAJQASALP/a/84/+j+m/6X/uD+c/83APMAbwHUAV4CAwOAA+kDTwSYBJcEOgSZA+ACMQJ7AYgAkP/F/vT95fzn+yX7hPrP+Tf5Eflz+Rv6wfoz+6f7TPz//Kj9dv6H/7EAmQEXAj8CRgI/AhoCygGMAXgBYwEOAY4ACgCa/yD/kf4W/ur9+f0K/vX90/2q/Zj9yv04/tv+xv+kADcBtgGEAmgDLATIBFcFuQW8BXAF7QR5BAcEWwOYAgYCgQG/ANn/A/9//jL+1f1m/UT9e/2//cT9qf2b/bb9Bf6M/kT/BACbAAMBUgGRAakBrwG9AbgBhwFBAe4AlAA4AN3/hP8s/7X++v0p/bD8qfzU/Nf8l/xN/Ez8h/zr/IT9a/5f/xQAfADaAFUB5gFFAlkCawKJAnACDgK1AZwBcAEkAbAANwD0/5//YP88/1z/f/+L/3r/b/+e/+D/FAAhAEIAjgD2AEwBhQGcAbsB8wEZAg4C3QGdAUABxwBmADkAMgArAA4ApP/+/mD+Ff4J/gD+5f3E/Z39Uf3e/KD85fx+/RD+g/7o/kX/fP+n/wcAbgCmALEAtgC2AIsAWwBGAEIALQAFAOz/+f8HAPD/vf+g/6z/x//Y/+j/CAAhAAcA1v/m/0gAuwAeAW0BsAHlAfsBDQIuAlwCfAJ0AlMCKwL+Ac0BqAF/AVIBHAHxAMEAbADz/33/Qf8q/wX/z/6o/pr+mf6K/mz+f/7R/hX/K/8e/xj/D//u/sL+nP6H/oL+c/5S/jj+Nv5Y/oL+rv7e/gv/HP8S/w//J/87/0H/Ov8u/yP/Jf85/27/uf8JAGQAwAAfAWMBdgFxAXABkAG6AcsBzQHaAd4BygGmAZsBqgG0AZsBWgEFAboAZgAZAOr/+v8mAC8ABwDT/9P///8sADcAOwA7ACMA6/+p/4T/fv9r/0j/K/8a/z3/dv+v/8v/z/++/6P/c/8j/9z+o/6D/nH+WP4j/t/9uv3M/fv9PP5+/uP+Uf+1/wIAKABDAF0AfQCkAMsA6QACAR8BIwEJARYBSwEsAYgATAA4AY4D5gRjA1wB/gCKAQIBTQDEAFIBxQDa/3z/+P+VAMP/wf5j/i/+QP7P/Uj9Yv13/db9X/7h/ob/5v8uAEEAYQDeACwBVAE5AdYAqwAUAF7/IP/M/pb+vf55/kP+kP65/gn/eP+h/7n/cP8h/zD/W/95/+r/YwBPAM0AtQDIADkB5wA6AVEBVgFVAQkBHgH+AMAA/QDUAL0ACAGuAI0A6QB+AKwA6QD7ACMBFQGhAf0BTAIgA5YHFwrdBQYCnwAbAAv/4f0c/Uf9ef1I/K/7VfzD/Nz83vw0/IL79vz2/Qb99f0x/br8r/7J/Tj+3f6J/lv/fgDpAYsB2AHBAZsACgGAAAoATwC0/4cAhP8A/k/+UPy3+y/87PuE/BX9HP7Y/gwARgGAAdgBNwLjAUABzQFKAqYBHwF0ARgC6AFiAcwBxQFSAVQBmQG4AZ0BLAG5AKcAcwCxAWAChwKDAwwENgQFBJ0ETQRxBOYEFwSnAzsD3AKgAVQAUf8Z/pf9L/3F+0f7zPsJ/CX8Tvwf/Q79Af0s/Vn9sv3c/bz9AP2m/Xv+NP63/SD9Vf1k/df96/6U/lP+L/+V/03/Qv+r/2X/nf+lAFwAWQBfAbsB0wGsAoUDIASQBEUECwNPAogDjQNSAisC6wH/AHsArABIAbABGwJeAY4A1gBWAMH/LP+a/4cAygCFAWABWwAnAE8Arv/4/h//Hv82/lD9CPy2+gf6G/ly+MT31fcL+Sb56vme+4z8Of1V/sv+sv1//Ub+vv14/ZH+0P4b/3sAZQG6Aa8CXQQtBbQGoQi5CCsIlgfDBnAFeAR9A5sBegD2/0r+jPxf/dL+W/+XAOoB6gJIBAEGRQYABqsHYAmCCSIJIAk5CUMJWQk8CF4GEwUnAwAAvf0f/Hr6E/nD96P2dPUV9cb0DfQ69OD0U/UC9oP2G/ZM9WT0ufJC8VjxYfF28mb3Lf69AswE1gYGCewKkgzjDT4PYA/9DEMIXwJB/Qj6fPfl9G30Q/Zc9/73l/mq/JQBSgceDP8P/hOgFpEWXBUrFFwS+Q8SDQYJyQTUAb7/SP1d+2n6+vjt9p71/PQH9Vn2Vfgb+kb8Vv4J//z+zf4I/sz86vok+Fn1YPKE7mLrTOnS5pbmienb6yzzQAVnE0YTJxPEGKQZ7xZoGBYZohUhEl8Lvf1Y8vLt1ujj4nXjF+mg7QjxGPSM90/+gAc7D0EVDByFIfUhbB6hGaAUsQ9/CsoEZADE/kz9S/mQ9eX1sfgC+5H8JP6c/30AAgA4/w0AmAJ7BLsDHgEs/sH6rvav8njvye0p7dTqKOe45erliuQ25VTrbfGz9h8EtBV7HOYbNx8rIUscsBctFgETtwx4AxL4Je6T50viDN8m4MPl3+279EP4BPzgArIJwg4PFYgcgiDhHz4dXxlRE2gM4wZHAoT+qfzw++L5Wfc39wD6pf3sACME4wZlCI8HqQQHAl0BVwLMAmsB2v6q/AX6jPUl8bfvbPD/7yvtHOkL5UjjruUN6YDrCvLh+5cBkgiCGe0lciIUHsgeRRhODb4IKgVk/iz5+fFA5zDid+Sa5efl6Oyq90H+3QANAysHFg59FKkXaRtrIFwg4RkaEvwLegZOAX/9Vfr59zH4KfrL+gD7oP1mAoAHNAxtDyYQ3A74C8wG7gCc/bb9k/4u/TH5s/Vv80HvD+sT7ETvKvDX8Hzw5uwH7QbzmPaj+JL+qwCC/IgBdhBBFq0QBA1SDWgLeQT0/CX89f8e/iv1ou4r7hLvRO747ULy9/ouAav/qfy+AYIK0QxUDRcVVx3YG0kUSw8bDccJEQXHAYsBQAPSAzMCOQEhA7cFDQdrCRgO4BFOEd4MLQfWAUj+rPxA/KH8h/yx+Uj1gPGH7mrtA+8Z8RzyGfPN8rrvnO207s3wU/Mf9276kvnL9fP5+wlhFdAQrApVDWwNZwUGAbsD5AS3AWv8bvS+7jrwCfD16r3tnPiT/PP5vfv/AHQFxQksDj4UXhqZGiAWQBMXEtEPiwyfCUYI4Qg6Ck4JwAXjAyUFLAV5A3kEXAgFC7kJEgX2/zz8ZPpl+jb7BPzT/bD+AvuV9a3zL/RH9G71zvey+O72NvJx60jmUeZN6bzq2+vr7rnvsu9q+fQKCRQgEmoTCRkZF/IOiQoWCrEJUgWK+VztYunZ59vhvN9w6JHz6viq+gT9LgKyCaUOZRE2GLUfwh+UGxEazhkGF0MS7Q16C9AK7wgDBEP+mfom+Fb1cfSI+Nr/SgWrBlcGmganB5YJbwxIDuENhgvwBgQAiPnc9eDzOvLt8Arv+us06cnnuOaG5KThJeGr4v7iveT16uPwOfigCkAgRSYTHpkXqxR0D+oKGAq6CVcGG/2a7Z/fstrg3EjhBug18qf8lQFfAGgA+gYuEBwXmByUIlQnTSfeIhgeVBr7FQsQFwllA+MA3/7y+aTzw++z7i3vMPLl+L4BPgotEXwWhBpEHeIcTRiZEQMLfgOg+l7yoew76XfmNOPj4CjhNeNY5Wbm9Oau6AXrsuuW61Ltle9l8Sf18fg4+iMCPRQrHnYWNA42DvcLbAb3BuEJngj8A0H5Rul93+DfIeJV5Tzv2vs+A74EDAUHCSESIB0SJvArLy9KLrQnqR6rF1MS8Au9Ax/7l/TD8Urwhu5+7xT26f+9ByINPxOSGT4cvBoIGMsVNRN7DnQHef8c+Jbxhevk5THixeDT3kfbk9nI25TfTuO857/sxvCo8yj1vvRW9ZL3zPeT95r5n/nY+RIFXBYsGwgSoAvbDDULOQQKAOEBEARJ/QXtOeFS4STl5+lA9mcHsxRUGoUZTBizG7MhvCRVJKokJyQXHEYO1gSRArkAv/uo+Fn7GgDsAjwF3wdrCssNgRC8EN8QChNaFBYRpgomBTUBCv0M+Tj1GPD56VHkCt8G20zaldw/32zhNORy55PqfO618+H4PPyV/Nv5EPZ88/nxV/B878DwQPFR73fy3AFUE+QW3xBhEcQW5BHoBZICbQfIB+H+r/N58N/11vl9+eL+ggxhFjwVDRC3EOIWVxu9GyccKx+oIOwbWBUDFBkVLBJMDOYHXgZ+BkMFnQHH/pT/ggHQAHj/cgF8BPMCoP1r+f72UfUQ9X71NvTM8Njrkebq4ibiSOTM523qYOyh7nXv7O5g8FbzR/QO8yvyOPHq7c/p/egu6sXpEevl8A71lvcTBdscSSlZIoMaExyQGz0ROwYTBJIG6AER9JDo1eb2637z5/yLCQ8YLyE1IcweuiHEKLksQStBKYkoqCN6GGENlAcGBG39ePWl8Tzzc/Zn94v2XPeW+7kAIQN7A6oF7Qh7ByMAtPip9Rr10/Mn8bvuQuye55fhzd0k3rrhUeYa6mDtX/CR8d7wBPE99EH4+vhz9YHvuulK5l7kneOv53Dw/fUj+foGHB7qKM4g9hjsG0EdSBP5B0gGbwkzBpb67/D6847/sAWDB5AQpB0yIoweCR1DIkQoRyjXI0wgYx3HFkkNYAdPBmEDHPwR9gX1ZPc9+t/73fsP+xn7J/z7/Fb9K/4W/0D+gPsV+br4g/m6+dz3APMm7Abm3eGJ37DfxuL85RXm3+SX5ZjnR+sY89z7//83AGD9AfW96S7kc+Ri47DkSO/v9ory/vYDEAYinRsvE2YapSG+G/wUfhirH+ofxhRrBGz9WQPlB7cFOAolGGkdMRMGCWkL5BS3GRUYkxgoHg0f0hVLC4AI/ggQBUD+afpm+yb86fde8nnyZfdN+vT4p/cn+cP6wPlb+Ez6wv67AFP++vlR9S7vvefg4czfeuCG4eHhtOIe5YDox+s673bzMPhA/Af+r/zk+SX2CvD76d3nKOcH5Rvl8+k+7j/xlP7/FgIkXB3rGaommDGKLnIpcC7JNE4sGRbxAsT9ZwGCAcr96QDNCTsLfgKG/KYEUxTLGy0ZwRjJG44YRhCqDPEOFRAZC88Bpfqc+O/2BfJj7Qvt3u737Zvpqece7VD1K/iW9+76PgCx/+D6Bvk4+mP50PV18ffstejP5YTkiuRs5RTnw+mv64TrJuwM8J31fvke+of4UPV78CPu+fCj8+jzWvmBAhABh/hi/u8R1R2+H1Ij4ijJLKct+yjxIZcgayHtGAgKlARKCiUMigWEAdADrgUZBNICjAbxDuQUxROVD8ANpw6ZDo0LJQlVCHMELP1p9sXxIfBn8Jbu2ulV57/pX+3J7r7wwvUI+gD6ffiW+Pz4+vg4+UP4A/bT9I3zFO/66QTpkus47WzsCusi66DsqO408jb4Lv4kAQQBhP+v/YT7PvqI+2P9qf3Z/X/+l/yO98n2jAAiDloWrxhHGfUbNiL+J9UpiiuQMJAxiCeQGpgTPA5+BrEBmgGsALj7CPVE8Yjyr/XY9yP8MAPnBtcF4gWRCLAJwgidCLwIBQc1A7T9cPYB8J7sSeo/59PlAeZw5ODh+OJ/6JXuj/IZ9Uv3Yvmw+1L+YgAmAosD1gHX/Cn51/j9+TL7N/xw/AT7ePdS80XykfUF+tn8zv0z/en7nPqH+Ef3YvkI/hIB1wDBAWEJCBIUEUkL1Q9WHckk1SWoKk0v9yq+IZAa+xYxF84WGhHhCAECavvd9CLwNO8m8lf0iPGI7knwhvNz9v/6e/8YAgEDdgGe/wQC4QXcBXMDTAEX/fn1iO/R7M3s/OxQ693nxeRJ4zTjP+YH7RT1VPyLAa8EKwhXCycLTgrHDL4Pww7ACoEH3AVoA+T+qPpd+OT28/Rk8u3vu+5s7kDu6+5P8Qf1Rvls/If7hvmSAPEPMxmPGfgcPCOWI5cfEx7RIJQkKSNIGwEUjRB/DBoG+wDU/mT9Hvos9FvvD/Cz8zj11/XV9+P4ZvdA9VzzzfK79VT6cfwq/CH76Pn4+Ob4i/q0/uECdAOhAPn8DPkt9gv2FPdr+ET6jvlE9nL11fe2+n7+QQKuA+IDFATaAt0BSgQ4CBYKvQn0B5IFbwNHAYn//P6k/fX56fVa8xbxmO4a7eXug/S1+D73QvcL/2QHVAo3DfcTgRpzHLEaDxiAFjIWYhW0E2sSrRAQDSIIGAPo/wP/cf0a+/X6NPsV97vwF+6X79DwMvDc7xrye/VH9iL1sfbU+jf9pP1R/ywC5wMABOgDFQVDB3oHOgTt/x3+Fv9Q//f9tv7/ADsA/Pwf+6f6rvrV/MAAsAJ8Am8CpQFh/5H+7QAdBJsFxgUwBdcC5/6h+9754/hN+Pv37PYo9O/wI/BV8bfxOPJE9a34Bvp8+7b/7AV7C/4OWhJ/F28cTh5DHqwe9B5IHJEWChAyCsMF0wEC/Eb12PAb7mnqVeew5/LqQ+5E8JnxgfPY9QT4QPrY/Kb/HQNdBrwHdAjXCT8JyQX/A54FtwZnBZsDKgNBBMMEIAPiATADLgUFBlkFugKZ/0D+AP4m/XX8Qvzt+7D7h/sR+037Sfzk/Nb8zvsF+Sj3QPm3+7f5RvbP9Ony0+827tbuTPH28630qvQD9tD3pvpoAEkH+Qy7Eg4YohoXHNsdeR3BGt0YYxhXF18UEg/0CCkE6/81+q70t/G28OjvwO6R7cvtHfDF8k70gvbj+pn/cQKpBMsHAQvMDLkMgwtbCi4JPAcTBaADiQI9AZn/aP0K+y/6Tfux/MX9lv+nAPf+yvxM/fn+K/+x/ib/YP+u/d769/j69533L/gZ+Jj1DPOa8u/xRu847bDtLO93733ume5Q8cz0sPc2+zkAgwU1Ci4OIREaE/oUNxdGGfUacBzLHLgasxbzEvUQlA9xDEkIJQX+AQD9C/hF9bH03PWZ9xj4G/hv+dL7LP6gAJADpAabCDQI6gUcBPoDYQQyBPQCTgBF/fT6U/mT+K/5rvus/ET8wvsg/GD8mvux+5/9OP/t/s79B/1a/P76WvmK+Fr4c/fx9Yb0FfOs8QTxnvDh70nvyO+m8c3z1vRn9cn26Pid+yf/BwPMBl8KYA1kD4wQrxGCE+8UWBR5EvUQpg+JDm4ObQ6xDBIKeAjjB4oHVwcSB4IGRQVQA0IBkv/J/Tr87ftl/ED8MPx//eb+4/6c/in/HQDlAIEB7QF1ApkCfwGh/0z+iv2u/N/7nvt9+/v6KvpF+bf4x/jK+Lv4WPlQ+o76ffrh+gj7Afpj+EH3yPZt9tH16PRB9LXzX/Lz8H/x1PPX9YD2nPb29hD4xPnd++b+6QJZBp0InQq0DNMO/hDkEtUUwRZ1F/gWnBaMFtUVgRT5EicRzw4mDJ8J9wbyA98ANv4y/NX60/kK+TX4SPes9v/2Rfha+u78LP9CAPQAOQJgA2cDDgMbA74ChQFAAJH/9P5q/aH77frg+jv6DPnH9/z2A/c099L2tfbG9zD5wfmY+Tv57Pht+Cn3NvWk89LywPJW86PziPIY8RTxNPJN83/0XvYX+Vv8VP+nAZwEfAhgC/kMKw8BEjEUMhanGBsavxnQGCkYWRc2FhsV0RP+ELIM4ggaBhYDMABy/lP9l/tR+dX3/vcn+TT69/r3+0v9h/4m/3f/JgARAbsBKgI4AgQCqgGhALH+M/0u/cT9TP3R+2v6LfmB9+P13PR+9O30+fW69lP3pvgt+kv6pPnt+Xb6nPns9732HPYb9eLzcfNm86by9fFX8lDzePRg9sj46vrQ/HL/UQOcBygLJA5mEcMUCBfMF5oYnBoaHJgb/BkpGK0VEhMJEdYO1QvdCDwGVANxAI/+S/3u+5D61Pm8+bz5UfmZ+OX3B/iO+dr7k/2F/nv/fACfAHYAZAEVA7cD0gIYASX/T/0J/CL7YPoQ+sr5qvgD9wX2ffbB92j46feX9+333vce98L2Qfey9+f2IvXR837zMfPO8iHzPvQc9Uj1XvWL9i75B/yq/Sn/IQKoBfkH5QnZDOIPwBGPE/AVsxe8GOoZSRr1GNUWIxWjE+IR9g87DjoMmAmpBrcD2gBn/nz8fvoN+DP2xfVs9iT3ffe+90T4+Pj9+Z77/v1dACACEAONA9MDBwTeAwADtwG7AL3/G/5C/EH7tfqi+Tz4XPe/9ub1NPVJ9Y31dfU59fv0p/T99Dz2Fvfe9tD2WPdu97P2WPbx9mr3NfcC97f3Lvl8+iT7rfvo/AT/3gE3BeEIPwz4DhkR0RJeFBwWzhe5GFcYHRejFe4T9BFtEF8P8A19C4EIiwW/AkgAB/6j+5j5afi299z2R/Zo9uj2SPfh9xX5n/op/B/+hgB1AlIDrQPyA+kDSgNzArIB6wDE/7L+4P1N/Nz5Jfi49wn3IPWD803zlfOz8zL0A/WT9fX1h/YB9zH3+PZz9vz1qfWN9S72d/d0+Ib4qvjU+Vv7Sfwu/fj+ewHqA1kGDgnIC+ANWg+4EH8SpBRkFoMXExiiF/oV9BOXEm4RhQ8eDeAKYghnBbMCxgAD/+T80fo7+Z33BfZp9fj1hfbH9rr3gfn1+t/7Rv2Y/7MBtgINA3YDeQOiAn8BwQBGAH3/RP7J/Cr7U/l/9z/2yfXo9e/1dvWl9Az05/NG9DH1D/Y79iz2L/ak9Yv0HvSt9G/1FfYR99337vck+F/5APs+/IL9U/+HAf0DzQa5CWYMyg6wEBsSoxNqFZAW7BYtFwgXZxUBE10RXRDQDpsMYQoRCEUFjwKUAOP+0vwh+576UvoM+YP3+PYk90L3ffdj+MT5RfsE/fj+hwA4AbIBjQJKA0MDpQLbAc8Aa/8B/s/84/sX+zr6EvkZ+ML3nPcR9232EfaF9Wb0j/O48zf0GvSl82HzCfOA8oXyi/Po9M71cPZq99z4d/oN/JT9Mf8sAbADaQYGCXALtg2ND/EQPhKuE9gUVRWHFXkVvBQwE6URixAnD+0MWwo1CGEGawSLAjEBCACZ/hT93/vJ+pP5p/hp+Gb4SfhI+MP4q/nj+nf8Sf6i/1sAzwBSAcMB3wHiAc0BVwE9ALj+Nv3/++T64/kP+Wr4c/cx9kL14vS19IP0n/TL9Kr0K/Sp81nzMPMj83/zXPRs9Q72+PX+9Qv35fiv+h38o/1b/2oBIgQkB6UJjwtdDUIP8RB6EiUUuxWsFnYWUxUGFCkTrRLlEVQQHg6zCzwJuAZlBI4CIQHg/7f+a/3p+5362flh+Qz55fjh+Ob4NPky+rr7Q/11/oT/oACUAesBzAHHAcoBQQFAAFD/N/5g/Df6pfiY92j2VPXN9Jj0Q/QK9Dz0fPRw9Gr0YfTa8/LyivKy8tPy9/KO8zz0o/Rc9f32IPkm+w79BP/5APgCPgXSBz0KSAwADlYPShBbEdASURRhFbsVZxWHFH0TkBKKEQ4QCg7fC9IJyAeeBa4DJALHAE//9P3b/O37OfsW+0T7TfsU+9v60/r0+oP7uPwH/vz+lP/4/wcAqP8o/9f+gP70/UH9R/zj+lb58vfL9rn13fQ19K3zU/ND83vzCfTQ9H71yfWu9T31pfQF9Lnz3/Nq9DH13/Vv9lP3zfjP+gP9ZP/FAQ0EPAZoCJsKswx/DgAQRhGDEs8TFBX2FRwWnhXRFPET/RLuEZUQnQ5IDA4K8AeIBQAD2gA5/+/96/wW/F/7zPqe+uL6TPuL+9L7Kfyi/ET9I/7//pL/tv+R/2L/N/8a/xf/Av9l/iv9ovsi+uP4v/eV9kv1C/Qk8+DyLvOl89rzCvRp9Nn0ZvUe9oz2WPbL9YP1SvWr9E70fPRE9GHzPPS0+OL90P+R/yABsgRGCBsM1w/MESQSYhK9E+MVFBeKFpMV6BS3FBMVdxT0ERAPmgybCqQJZQhRBfsB0f9//iH+e/4O/nb8N/tJ+8n8Vv6C/sv9Ff2h/MD8tv09/pf92PwM/af9qP3x/E38Hfx2+/r5svg++B34kvc+9nv0XPNn81X0hfWW9hz39fY797f4Cfr++UD5Pfi59o31O/U89fT0X/Nk8Nrv9vO8+D76KvoK/M0AnAZzC0sPZhJJFNcVHBiNGmEczBxrG8sZXxkpGcIXBhVuEecNUwuwCcsHmwSyAGj9Z/v/+pT7V/uQ+VT4ufmh/FD++/0t/en83/yh/Ur/awBQANT/fP8z/73+vP1x/C77e/la91L1r/Oz8j7yAPEh75vufu/F8KPyhPR99U72ZvcO+Kr4g/la+YD4X/gg+Jz2T/UO9ZHzp/DO8MT2sfyi/Mn6UP61BZcLbQ8FE78VqBY4GMEb2R3pHD8bVxkRFwwWnxWmEzEQSgxlCRcJvglfCIwFTwP/AXUBoQHFAZQAZf6l/Y3/twHJAY4Ay/+X/1D/Bf+R/uD8nPrQ+er5B/li9/n1wfTC88zyI/Ft77Dun+5r7hbube4s8DHyTvOM9Hz2PPiE+T36xfqy+8r7t/pb+v75B/gk99P32vQe8LjykfvI/iP6//iAAMoILgzgDboQqxMmFmQY5xkYGmkZSxidFt8UcRRVFKwR3QyECVEJogqtCmAI/wWxBSsGhwaMB8wHhAWAAx4FcQjMCOEFVQOWAqMByf8M/rP7hPfI8/DysvJr8K/tROy467zrL+xC7CHso+zY7ffuiO9e8Lby+fQf9W/1DPdQ9+z2m/hZ+aH38vdi+dX3cPd5+nH4yvFu9AgBjAbn/oX5OwHjC2sO9Q05EHYRtRLWFjgYOhXNEwwUOhJ0D1YPrRCwDkcJZAbFCDkMmgy8CtAJGwuVDTAQ0BFtEYEPkA7JDxURbA8rCzwH3wSpAsf/k/zE+E/0w/DJ7ujsV+rV5xfme+Xr5Q/nnec95/nnBut17jXwRvCK8B3zm/ZP9yz14PJH8qbzcfTM8xfz1/Fl8iL2L/Wn71r0FARaCzIDiP3XBr0TYxZOE2ITIxSoEyIVcBXAEdAOBg4hCx8HuAaJCaMKXQcrA7MDMAk+Dt4OYA0mD1cU6hdFGasb0Bz8GUAYShqUGTsTTA0ICogFFQDJ/Pv4avIf7U3rWeoD6K3km+LQ4rvjaeRx5QTmc+Zc6Avrf+19767vT+/R8MfxKvC47oHure3J7HPtju6J7uju0/Cb8BTwqvnzCYcNPgM7AdsOaBqpGRsW4BaaFp0TmxMSFbURswuTB8sEVwO/A0EE5AOoAn8BCgQSC9IQPhIoE5YW2RpVHrgh6iP7ISAenB3mHsgbBxSADVUKcQZKABr7oPeJ8+HuYeu16QDpnOe05dbkTeTN4zrlKOjK6QTq2ekF6s7rQu627vfskeos6DznOOhK6UDoluUc5Rvojung6vv1uwTtBBH74PztC2YVdBNJE+QWPRTwEDcVyhaOEJQLkwlSBnIDEgNgBJgEtQFaABUFLwudDnoRMRQzF4UcHCKRJWknNibDIg4ioSPLIJYYdxG2DXMJhwPo/pD7f/Zl8OTt/u5B7lDrrupu6zLqcOnR61buQO7H7GLsPe147ePrs+hV5Q/jOOLz4vPjfeGl3f/eUOL94MbjKPOE/1b7SvTX+l0I2g2JCwMMGQ83Du0MGA/JDyMOggz/B4gCXwJLBaIFUAMtAW4CngfjDO4P/BGpFLoZ2R94I0kmPyn9JzglZyaqJnEhkBvMF7ISkguzBi0FoAGc+fnyX/Ju80TyrPA07rPqduua8Avz1PHz73/uFu+98HzvrOvB5wjktuGe4VHh7d8O33/efNxk2qTfd+/++2f3IO5k85gAZga4BWMFTAUnBN4D2gQXBXkElAScAir+cf3hAdkENAQJA+oCVwbUDJYQKBFKFJEaoB5/HysiLScwKXIowChhJjMgRx4oHwUYdQ0YDOIN+QWA+mP4if3L/tD5iPTI8h70Bffp+Oz24/M19L703vGe7mLt3uwc6wLnmuNr49viPOHg4Z7gddwi4cbwtfll8mzqm/Hx/sgBg/2J/v0ARf5G+yb7Ofx7/Nb5g/Y59vL2F/ij+yz+k/3K/oADxAdhCiEOKxRGGZwaMxuWHpki0iP+IrIisyEJHhQbBxoyFlMRSRDSDqkIKgNNA18G7wYtA6L+Z/28/18CCQMhAnr/6fpL96718vNx8hvwieof5nvlkeO14brj1ePb3wPjC/JD/QX3fu0x8zn9af0L/KD/UwBw+/X2T/Xw9GP1Yfaq9Mfu/+p57vbzNfZo9yb68f29ARgFlwgpDeQRYBVfFrUV+xbDGnAcBhoRF0UWXBbiFP4Qag2mDZgPwA2JCckIkQuyDfAMMQq5CEYL7w4xDt8JLQcFBsECAv73+TT2JPIT7l/q2OdT5/HodegH5untk/+2A0r0pe6W/IEF0gAS/A/8yfzA+mL05+/+8G3yIvGt7X3oh+ZH6wHwWe+Y7lnzdvrM/oUANwMbCXoO2Q4wDRgOxw+CD/UNlQxhDGwMIAocBvwDwQVVCfYJNgfwBn0KOgzECv4Kqw6CEWYQaQ5KD/sQxw/sCwcHBQJr/p/89fqS95zz3/Lt84vw8u2B+awLKQ5qACT6YgSBDVYIeAFEAyUCUvgp8VXxa/N48vrs5ObW5TDp4+z77iLwDvMC+EP7Y/yJ/uABxAStBUQESATdBuIGEgPGACUCwAOTAlL/2fxE/e//mwGiAG0A9AJ7BAUElwV2CDgJhgkbC3AMQw05DRsLNwhPBS8CIgA3/jL81PzL/br7S/qP+tn8CwoLHRkd5QvkBhQSNBj+EQoKVwclA1P4OO8M7mPu/O3t7gPtOOno64T05vnz+iv9+/9QAcoDSgYFBSAEtAXFA3X+lftY+xr7d/mT9qv0qfT39Aj10PXy9xP7if10/t7/5gJ0BfEF3ASlA0cDSgOyAooBm/8f/Cr57Pfg9I/xHvRO+CT44/hm/WD/ogHxDzMmtC6ZIuQWihkSHi0ZGBMSEvgMhwA+9fruNOuO6STsqvGG9Pj0KPpeAVMDDgaWDeYQeQ81EMUPJAu6B8IHVgWb/QP3V/Z59VHx0/Aj9A3zEe7260TuqfJC94T7Sv8GAeQBtQTvBeAC7f92/lP9nPxW+evyaO6x7LHpXOTA4mHph/Bn8073cvn3+u0NISsrMOQeOhmhIhwkCRkVEVwSqA/1AUb0cu3X6C7pt/F997P2DPszBnULxwgFC20VxhoiGHAXHBahDukJnAqwBt3+hPwN/qz7NfZf9ar5dPn88tDu0u5U78bx3va5+yL/ZgFoAicC2ABi/5r+Kv2f+XH1Z/DP56feuNwA4TLjTeRK697x6O9y9OwOwCcVJCkXiBxcJh4gRxOLDpsNtwfz/OPxbOsd7ALw3fAT8Yf4HgVHDRkP9w8tE3oX3RlrGO0TeBCFEA8Q0Ao7Axn/uP+2AJX/1/8dA2EDL/1+9avyevV1+b/6R/yBAJ0BPP2J+kz8u/1c/ZH8zvr69svwTuk7497gI+Lc5FLoFO107zjwMvxVEz0e4xTzDqUXeBsREKUEWwOWA3X8xvAH6PLlp+h57OzvevUBABIMTRK2Eu0UgBoDHEQXXhL0D8wOiQ5MDBQElPkO9Tn3Yfk4+4ACQgzjDYEFI/2b/EMBMAWeB5cJnQjlAqL8Q/rk+vX79/uP+lX3HPCk5iTjZ+Zo6tjvoPZc91LyBvV+B/0bjR9bF04VchQeCKX6wPqmAZ0BPvpn8nHrQuMQ3g/ip+yr92wCFgzVD74Omw9fE4EVChTsEUkQZwv9Axz/lvvK9Z7xXPPO98X6df1EASIEqATABF0GmQieCekIMwcJBR0D5AJLBIwEPgIg/of3S+8s6uTqC+7+72ny6PfT+o/4kf24EQ0j/R49EtMP8w+yBmH/ZwMYB34A3PS065Hm1OSs5uTrDvO8/E0HdgvDCAwJuw94FFgSFg8uDhMLPQSH/jH7h/Z58OrsuOzE7R3w5/WL/fwCkQV1BkgFQQOkA98FhwZMBisH8AbOAj386/W08DDt0uwo70DxkfJ69VL4l/cU+c8HtR1oJo8eeRnLG5kXpAzNCGMLFgYe98/rOOp86jrokuoy9eH/NwSTBE0EwAWgCg4QgxIXFFMW0RZrEwsNBQeIA2X/GvdN7r3pc+is6C/rKvC29b345/jP+tsALQbTBj0GKggOCl4HUQA9+UD1y/K87oTqi+rC68jm89945gb9AxGPErcLuw03FUoWOxSNGGkfQhtjCZT2vO007Bftv/G/+QMBKAUcBJb98vdg+xgGeg11DtcPcRKXEYoOmA1nD7ASShSiD3UF3fzb+FD17fAo8E3zAvS+8enyP/dc+P33//wvBeUItQaDAWv7BPcv9U3ztfAg8Zny9Owa5L7pR/8RDlYKmgPlBT4KiwpsCx8QzxToEWgEHvS16kzpp+zQ8mv6nwJoCRcKkwQSAQUGig5KETYO5QpiB1QDNgKWA0cE7gU6CusMUQqkBrkGqAcbBZIBVwDP//3+5v5a/tz7O/lt+Zr6wfg+9in3SvfP8ubuw++18Y7ydPWP+cb4Evc8AVwTGBkTDp4ESQM8AE37QP7HBv8IHgEc9rTuJutq6iHtePM6/IoFDwu1CRAGPggcEBIV3hPjEYwQWwtmAvP6Z/c99gz2M/YF91z6XADIBWwIagoPDS8OUg0FDXkMrgdo/y/6JPlD9uXwwu4b79Ds6uki7GDx9fTB+QgByQKeAGUJQR3NJpYcnA8TCrcDz/nV9oL9tQEw/Izzwu/K7rzrmelZ7wD7vARcCfMJ/gckB4gKqA8gEwQW6BfCFD0L3f9I97TxPO8V8ZX0hPY2+O75T/lK+Zb/YgmKDbULAQuFCgoFGv7Q+rz43vTq8BPuJ+tP6OLp4fBN9XfzP/V4AuATnhwOGggXnRkjGWwObwJHAPkBnvt58vDyUvhX9RPtOe2S9v3+yAJGBaoHjggnB+UEgAWGCkEQGBOeFBgWMBN2CH/6kfFB8YL2nfsg/fv7j/re+MP2v/ez/ZwDPwXMBEYEUgJj/h356fOd8GDvTO6l7anv3PAy63vorPgTEOQVNQ4ND+YW9BTuC/gKpg9yDCwBMvjV9CnzbvFb8Mfxo/er/6gCKADTALUFrQZjBG4GhQsjDggQzhOfFT8RKQq8Ba8DswG0AMoBCQIi/Yb07u8C84j4+/s7/xIDXwKS/IL5BPzu/Gv5evae9XHznO+c7fDur+5D7K7yPwS/D8ALhQe1DSYSAQzsB1sN6w+zBWH1/utk7aLwre6s7rr3KQLlAwEAYgAUB6gNiA+uDqcOQA97DSAJMgaOBQIDcf3A+SX8CQPtCAUKxAcDBV4BxvwD/PwBZAg/B0oAifuq+uj4XPXW8+30Q/Xd8ojuc+u37a/xvPDJ8bUATBXBG8USjgyxDoUNeQb0BI4LEw5WA/jxXedz51zq1unQ66z3JgS4BP/+LADsBmQKfQutD18V3RZsEoYLXgYHBCMCuf2x9+f1CfoG/X76bfiK/GwC2AOrAh4EdQeYB2oDIP+b/Vj7y/WC8fPwAe8F7GPuH/K27xTvf/u8DdUWzxTfEbYSdRAnCSIG/grNDfEF0/fI7QvrQOpo6Nzr7/dNA1oEPf+A/3gGPQzVDZEOoBC3ErYRoAxxCOoIDgnJAkL6K/cR+L31EfFA8cD2gvsB/iYBogSJBXsEpwN6A2QDEwJH/mv4VvGU6kHmsuaf6TnrF/GsAVcRhxH+CpUMgA/LCZYGsRCQG/0W4gYu+jT0Je1r5anmefPhASQHPAN3/nP9ivz2+bP76wWREuMYVBhRF7gXxBTmDGUFIQKZAQL/cfeO7uTqrOs67NHvpPmoAx8I8QfDBtkGjAdeBSr/Lvge9Mfv5ecA5bPsqPI+8qv5kQlwDz8IagQoCbQLcQnmCikRhRN5C+P7de+B7VXxNvMP9i7+4gPe/r70I/J2+UEBrgTfCNAQ3xamFf0PnA2yDyQQcwwWCT0IigVa/Qj0CPBk8WL0Qvfz+AT6Z/wB/hj8hfu6AE8FcQEL+nD2KvNj7art0/Zo/In6Wf4ADNwU3Q9hB74FzwctB2AEpQLuAisB2/gT7wDv+ffS/SP8Hvvd/hwBl/4J/A3+HQSvCnQOPg+DDs0LMgjXBmgH9AboBbwE5AEw/mz8YPsf+Uz5cP6VAm4BlP/b/j/6WPTs9ML5Xft6+lz7xfuD9yTz/PXs+pf5zfeB/YUGng3zEc4QMg2LDnUQuQhk/tj+dwIx+lvv/fF4+E/zeOxj8sL87f3B+vH92wTUCCQLNA5cEIAQ4w8PDx0OCg2zCtUFEQDh/Fr8yPmg81Xubu0E72Ly7/meA1IIcQW2/xX86frQ+l76Svnt+DD5xfbE8VLxmPqIBhoLtQr6DPYN8wjeBY8L7g9DCPD7nfe7+Ob2YPZB/roGKQXb/M732Pb59l76BwLOCBwLuwk3Bg4EDwccDYQSkxfGGuEWmQucADH8I/ou9RryVPP88Ifqxuh27q32Vvyg/V/9bv7s/p/75/eK+4cDRQLr+U/7qQW8CTcGgAVICccKDgY4/tj7hQFPBB38IvTg93j9nPjs8fn1R/8uARb+VwL9C4oMuwMGAGUF5Qr3CzML5AozCtIG2QB2/lgEygoFB6b9n/qA/H373vnR/MoA1/6R9+bwG+/W8LzyLPQ3+H7+yP/Z+sj59gDcB6YIYgkzEBYWkg/eAgj/iwLrACD6+/iw/pL/Y/jj8wD3ZflG99T1YfjG/GAAsgEtAm0FDgvqDFsKNgvpECoR5Qm1BhMKMQlnArz+Hv+7/Nr2f/Ls8tP2NfrW+QD4qPkZ/ef7VviW+eH8Kvsa+cn9lQO2A9sCgAQvBccEXwaRCHEJswkGCC8D+f6z/d37EfmI+lr+/vt19HjyDfdm+Yz5FP5kA0oDoQKSBY8H4QjCDfURcw9uCh8IUgYaA94BEgO1Amj/+/pI9tvyw/Jt9QX4m/gC9+X0hPM78i/y8ve6AacGhQR/AysGyAVHAWQBfAmaEBcPewqnCaEIYQOv/sv94v3L/I76MvbD8c/w1PI/9Nf13vo5AJEAGQBNBMYIRwqADf0RXxEEDXUL6ArzBlcDzAPYA/H/6vp9+Bb42Pbr8yPz2/b5+fj4vviN+yv8rfld+Z76uPq9/BIBGQGZ/dL9BgH+AdICYQhGD1UP7gkWBwUHAgUTAu8ArwCd/5n8DPc/8kTxIvJ18tzznfaj+GD6m/0LAuIG9gr3DccRPxbIFooS9A0gCz8IpgOA/Xv5V/ns9+3yEPEP9Ir1OvUo99/4OPkT/FkAQgEBADYAmgHUAaAAgwC5BKEKOAsiBwMFMwNk/uD8rwHjBIEDqgH5/if6bveV+PL5cvpz/En+jfwG+TT4l/oe/Vb+4AAHBn8Jigd8BTIHlQdFBU4G+Qj8BpAD1QLl/jj2i/Lz9tb5MPhm+Mb6ivnE9Tb27PvRANMCuQS3BosGJwVxBVsHIgh+B28GMAR+AT8BkgICAvb/pf4v/HD3VfTx9mz8ev/C/zH/Jf3M+QT4zPkh/mwDogdJCXkJxAkmCT4HEAcoCfgI4QSb/+367vaF9N3zh/Qs9qD3a/fi9qH4tPsh/XL+PAK0Bj8JEArDCSoJCwkjCNkF3wTOBR8FZQGi/UH71Pj49dT0Ivaw98z3Tfgp+67+bgDZAT8EZwWTBPUE8wYTCNoI/wl0CNMDjACB/1f9G/ua+7D83vuO+rf5bfku+4D+7P/S/2ABTQNYAnYAUAAxAGj+5fyi/MT8vP20/7sAKAAQ/7r+GACzAnsEGQXzBIIDWwFBAAMA9P+6AJgBKQDO/Lz5ZvgV+Y36gPo1+lH8Af8YANgBpgQMBngGDweABnMF3wWsBt0FFASGAfr9iPuG++j7l/vl+738//wc/UP9Cf2J/Jn8lv0K/xgALAEMA3EEaAMuAWgAIwInBfoGkgbZBR4FDALe/SH9Yv8fAIr+Hf3C+6v5r/jL+T77wPwq/90AGwAE/wMAXAKYA1UDTAMuBPgDkQF9/34AjAKbARD/WP/1ACQAOv7v/Y7+b/4V/YD7c/v2/KH98vwO/cT+3P8S//X9c/5OACQCJAMMBDoFfgXpA7QB2P+T/oz+df/V//H/TwBv/2H8mflp+Sn7SP28/xgCHQPWAoYC0wLZAy8FZgZXB54HSgbLA60BQQCK/gL94vxP/TD8Uvoj+kT7i/uD+6j8SP4x/8L/mwCHAQICQQK4AkEDbANIA+QCmAKnAm0CGQG6/6f/0P9e/pv8R/xU/FT7j/oF+7H71/vD+wD8kv0xAAUCjQKLA3YFfgbbBSQF/gSgBNADvgJMAT0At/9C/uD7CfuM/FP+E/8gANkB7QH0/qX7V/to/fH++v/uAbcDVAN0ATwAEAG1A9oFXwXjA+wCyQBO/Ur7BPvo+mr7kPxo/FH7Wft0/J/9Lf/GAJMBDgLIAvoCbwIyAt4CoQPEA0sDcgI/AaT/Tv5w/r7/NwD2/kv9+vv++gL7dPws/k3/3v/4/6//XP86/zAArgLdBNUEigMKAi0AN/+zAN8ClwMdA8MBs/5y+976xfyA/tH/aAGhAa//3/29/ZT+oP+zAA8BfADo/4P/yf6Z/vb/wAGUAhcDvwPcA04DmQKhAdQAjQDg/0v+NP0K/c78V/w6/H38bv3t/rf/sv+VAEsCgQIbAf8AxgJ4A88BdQA8AXwCCAJyAC//gv4F/rP9Uf1K/Un+ef8E/yr9dfwp/k0AAwEhAQICtQLTAUMA/v8qAU8C/wGtABMATQCC/+/9BP62/wEAdv5K/Xf9nf5QAFQBMQEIAQwBBgC6/jT/egFlA6sD8wJZArMBlADv/90AQwI0Ap8AHf8Y/vP8r/tZ+1D8mv1N/s7+m/9eAI4AewDMAFoBKQGPAKcA3AD5/xP/bP8HANb/n//R/8z/kf/G/z8AgwAuADX/H/6L/U39fv2Y/iMA2AC8AK8AvQCBAMUAVwJIBOoEkQRvBA8EoQJoAXsBnQGMAE3/9v7P/iz+D/7o/pP/Ov+7/hn/7v8jAOj/MwDdAKYAkf8u/wgAkgA4ABQARAB//1D+Y/5F/y3/iP6a/sL+Yf5d/uv+gv8IAFMACQDq/3oA6gCyAGEAzf/c/mP+xP5Y/xMABAGUAXMBNwFSAWsBHQHaAEEB7gF+AQAAGf9x/4T/qv4z/v/+3//O/2H/nf8vABYAR/8J/+T/kQAFAFT/mv8qADwAlABrAd0BuAGeAVsBFgHEASkDdAOMAr8BBQGW/x/+if2w/dH9p/1b/Ur9cf3O/bH+JwBHAcAB+QEYAvEBvAF5ARoBxwA6APb+mv0l/T/9Dv0H/dL91v7a/ib+H/7i/iD/8f6x/2ABPALVAXMBqAHHAaoBxQFbAg0DOQNLAsEA6v9kABkBGAHVAAQBwwBg/wr+F/7d/lT/RP8A/8z+pv6F/uz+HACkAXcCeQIyAgEC6QHvAUECZgL/AfwA1//r/gz+Jf1n/DH8avy8/PH8iv20/q//7P8CAG4ArwBBAO//agD4AKIALgBeAGUAnv8E/0L/of+V/5j/kf88/+P+wv6i/s7+Qv+j//v/PQD4/3z/nf9NAM8AOgHiASwCiQHQAMMA6QDPAOEA+AC+AD4AXP/n/ez8RP0O/nX+Fv8MAJ0A7ACVASICWwKZAp8CCQKGAX0BWgHmALYAwgB6AMb/Nf/0/v3+Kv9o/4r/hf8z/8z+1v5W/6v/6v9tALsAHQA6/yD/j/+m/7f/PQCAAL//w/6R/r7+3/5B/6T/Vv+a/kP+AP66/Sz+eP9TAEQAGgA2ACsAEwBAALUAUgG+AWgBwACqAOsAlgDi/6f/xP9m/8r+1/6h/0gAWAATAOP/uf9u/0v/vv+VAG8B7gHqAYkBSAFOAUQB/wD4AD8BMwGCAOH/0f/7/8v/b/9W/0X/s/4U/jT+0P4r/2P/ov97//z+1v4H/2f/FgC/ALgAZgBRAAgAh/+3/z8AOQDZ/8v/k//8/sT+EP8V/87+8v5a/1n/W//0/5YAaADD/4v/4f8wACUA2v+//9H/df+m/oX+YP9HAKQA0wDuAM8AmwCsAPMAPgF4AY0BMgGlAHIAjwBdAPH/AwB6AEYAf/9Y/+H/9/+t/93/ngBTAXUBzAAmACwALwCL/0D/1P9XAAkAe/9D/07/cf+0/w4AVgAuAJv/RP9c/2z/rv88AFoA3v9w/0D/Mf9+/xsARgDY/3T/Qv8J/y7/zv9WAHMAVwDp/0v/7f7j/ur+Lf+L/6L/ef+M/9L/PQC7ADUBnQECAtwBHwGcAJIAfAA/ACsAPwACAHT/5v6q/gj/s/9/ABIBRgEEAX4A8v+K/3T/s/8VACsAAQDL/5f/S/8s/3T//P9gAHsATAAYAB0ALgACAO//BgDQ/0H//v44/5b/z/8IABoA2f95/0X/SP+e////MQA0AAYArf9w/3j/q//6/1QAegBEAMr/W/9G/5P/3v8bAF4AlABjAND/Xf9o/9X/UgCtAO0A+gDUALwA0gDFAJsAiwB1ACQAu/96/6L/BwBIAFUAbQBaAAoABwB+AKwAjgDXADUBywA1AHYAHwE+ARIB/QDAABQAOv+u/sT+BP8B/+r+7v65/oT+0/5I/4L/uf/O/3T/KP9d/4r/iP+z/+r/s/9P/wb/6v40/+L/QwAdAPT//f/o/7X/lv+d/5//d/8k/wT/Ff8I//D+TP/k/z0AWgC2AFgBwQG9AcYBBAIFApQBJQHnAKUAXQAoAOX/yf/Y/8H/h/+g//3/SABlAHMARgDZ/2v/Gf/0/if/jf/V/9j/qf9d/zD/S//J/3QA1QCxAGEAKADR/2//p/86AFQAxP9G/yX/Kf9b/+z/XAA8ANv/o/+e/9j/UgDZABIB6wBqANj/pv/k/yMANwAfAMb/Pv/0/hX/iP8AAGgArwDSAMMAnwB/AGgAVwBCABoAKwB7ALYAdgDP/0T/Ev8F//f+EP+j/0oAUQDW/9v/bACpAGQAMwA0ABUAtP9g/1X/tv/9/7v/av++/0cAaQCZAEYBggH5AIUAQgDU/7z/IABqAFQAMgAcALX/VP+F/+r/1P+E/3D/O//x/vz+K//t/pb+iP6Y/sP+R//I/9//z//6//v/wv/e/zcAIAC//2r/PP88/4z/1P/p/xsARwAbAP//MQB9ALIAvACxAJYASgD4/2cAAQHAAIUA8QAEAboA4ADuAJMAXwBeAAwAoP+6/3AAxwB5AE8AfwCIAF0AMABSAMsA5gA+ALj/6f8sAP7//v8lAPL/h/9b/3L/fv9S/1v/i/9w/5L/QAB4ABoANQCDAN//AP8z/9X/BABJALYAtwBaACoAHgDp//T/RQAgAIP/CP/D/mv+Kv4v/n3+Gv+n/8D/CwBhAB0A6P8xAG8AmwDXAOgArACCAJAAbQASABoAoQDPACsAjP+z/93/X/9F/3n/M/8h/5b/z//z/3kA4ADWAAQBRAHxAJwAswCTAAUAr/+4/6b/S/8a/2f/dP8k/4b/aQBbANX/7f8jAPD/uv+D/0T///5s/gv+ff4t/87/igAJAesAYgDa/+z/0ADdARgCyAFqAXwARP/x/jn/Zf+s/wwABQCi/4T/9P+iAGYB2gHBAWABJQERAfwAxABSAJz/NP9g/5P/xP9/AIsBAwJYAXwAdQB5AB4AVwCqAA4AJ/+S/hH+5f17/nf/XgAIAeMAMQDr/6j/6f67/iz/Nv/j/qj+of7e/tn+hP7I/pj/3v+Z/4r/DgCmAHAAef8v/8X/4f+y/8L/dP9M/8H/+//l//b/5f/I/63/jv/W/0wAbgC3AMkAKQDb/2oAEwEdAbQA1gBfAfgA/f8GALcA0wBfALr/Tv+V/+L/rf/G/7AAcgE5AccATQEaAucBHwHaAPAApwDQ/yr/+P6g/lb+xf5r/3//kf8IAEEAGgAWACwAVwA+AMn/jf+c/5v/CP9f/nn+4/4t/8z/EQB4//P+Mv+S/17/G/9H//3+av7F/sL/RgAxAKj/Yv/3/3oAUgBSAJUArABaAHn/6f5I/yr/qP6v//AA4QCzADEBpAF2ASEBYQHEAckBtwEhAVoALwDm/zL/Af/g/y0BgAHTACMAkf+Y/z8Avf/U/gsAmwFRADP+k/4LALb/o/4n/7sAagGZAOz/LQD//8//d//2/ZL9t/4M/xz/kP+r/7f/6P/T/10AxQHnAh8DHwKhAL3/Uv9y/qD9ff4kAPb/Nf8S/8X+C/+c/04AWwFPAV8Akv+U/tD9D/7B/uX+Hv+b/ysAkwDFAGkBrAGpAGT/5/50/kD+9v5+/zQAwgDF//3+rP97APMAtQFLAoYBCACR/30A7ADr/1j/FwCaAHL/hf4XAIkBzQBMANoA+gBXAKf/zv8nAMv/Dv8a/x4AXgBc/yD/vP9oAPEAqQCoAIAB1QFPAa4A0P+P/xEApv9D/yQAzgBMAFT/+P5+/3v/P//x//gArAGiAY0AY/9L/7f/p//b/4UAogA8AI7/a/8oAAP/JP0G/v3+jP46/zwA+/8Q/8P9Z/2h/tn/3gBCAlAC2wACAMD/yP9X/4b+sP8ZATEAe/92/yv/ZP9A//3+7P+dAPH/9//SANMAsABRAGb/kf8MAZsBDQFjAZ0BSgAm/0v/KwDpAGwArf8sAIH/HP7y/tL/h/9g/+n+t/7R/0gA6/+EAJQAZQDiAKAAoQAgAYEAw/+7/6r/6v/HAOoAjADkAMIAaP9I/2IArQCsAOsAaQAl/3v+Qf9iAFMABQDaACsBLADq/6wAtAAaAMH/3P9WAPf/df5d/uL/+//o/qn+Tf9UAJYAJQDOAIgBZAD3/uf+e//b/+3/1f/L/+n/nv+A/ln+dv/w/4r/uf51/iT/Rv/d/gf/qP8TACUACwBaAGIBUAIpAnABgQDv/6QAjgBD/zz/i/9D/1//u/47/qz+pP7P/jv/Kf9dAHUB6f9s/6MAuwD1APAB4AHiAV8C0gHtAHwAagC/AMUA2f9f/wQADABU/6/+iP5L/zMAGQBv/7n/oQCMABgAngByAYABYQCz/6oAiQDs/6wArf8r/jv/+P5T/Rf+vP4//qT+E/+B/x0Ajv9u/5UA5AAuARECiwGrAJIB5AFSAIb/uv8ZAEYAZ/9m/8z/6f6U/un/SgAO/23//f9w/9P/SAB2AKAA+v/h/4YAUQBmACABzwBm/x3/2f+m/3z+Pf4rANABgQHSAKIAFgBQ/3b/VwAPAa8BzwD9/af9EQCJAPz+kf5U/5cAiAEBAWT/OP9JAHsAMQCQ/wMAIAFtABz+wv2a//H/lf5T/nb/DQCb/z//JwCVAHEAsgC/AMwAfgBNAHUApgCGAFgATQAQ/3v+dv/g/4YA0gDj/vv9mP8VAOH+df9RARACmgFJAE3/9f8RAV8BwQHyADb/Ev+T/3f/Kv80/yEAfAB3/37/MgA6ACcADQAVACAAnf8k//z/JQF4AJb/PQCn/3X+UQBhAkYBv/9B/1b/4v/X/yYA1AAlAEr/yP++/2j/EABYACgAqf+1/lL+tf6O/6cAEwF8ALD/TP+w//3/mQC5ARkBh//T/hP/7v/ZAHgBPwFNAHz/df+3/6gA/gG3AYP/0f1+/jMAtgBcAMMAfQH6AKf/k/+WAPoAtADc/yn/XP9G/xz/5v+CANf/Jv82/5j/dgAYAf0AXQCM/9/+UP7Q/U7+FAD/AJoAxgAFASgAuf8oAQoCegELARcACf88/+X/wP9z/6P/Kv+v/uf+zf6v/t/+Q/+8/3v+P/0b/7MA2v8H/yP/0f/m/9r+pf+IAbQBHgG7ACEA8v84AET/2/4fAKcAFwDa/xgABgEKAsUBsQCsADcCYgN5AtIAtgB2AbkAOP/1/tH/iABfAFwAYgCM/zf/fv++/20AkQC8/2v/qv+z/xH/CP+N/8f+Mf4a/zIAFQBp//X/ogAHAIT/mQBGAQMA/v5e/4X/Af8z/97/zv9m/5//IgApAAMAfwAwAVEAC/9J/04AngAnAOz/FgDR/wP/7v7W/6QAUQCP/xsAJgFBARYBngCj/2P/sf9UANAADwBq/9n/VADf/zD/CABVAboAUf+0/wIB5QAuAFwAbQAQAPv/ogBvAUIBkQB0AGYAsf8K/5D/iABnAA//3f04/u7+lv5r/kX/xP8M/yT+Of60/3AAgP/H/ov/IgBK/w3/UAB0AdIAFQC4/0H/IwB0AY8B9gADAAj/I/8UAF8A0P9e/xT/Ov/9/uv+DQCpAKUAWwAp/7v+/f+MAWICewGaAE0BFAE6ACgBUAJ8AjICkQDw/04BRwFkACMAFQCMANb/e/5O/9EAygDX/3L/kf+5//z/7v90/1H/6f/gAHQApP4v/lb/TwCr/+j+0/9IAGv/nP7a/Z3+fgC2APT/pP8y/7r+eP5//m7/TgDi/3//ff8l/0//l/8p/+j+W//i/yoAEADH/7T/7v9MAPn/s/+GAAMClwL/AEH/sv/PAHoAhf+jAJ4ByP/f/vD/PgA7AAMAwv9VAPD/m/96ALoAVgDnAJ8BrwDA/6X/xP+cAFgB8QC0ADEAd/6U/Zn+yf8zAbUBa/+y/ZX+hv8iAOkALQHDAIf/g/7c/uP/YAAiAEkAigDF/1b/yP/d/+D/l/9Y/y8ANAAp/wP/gf8VABwBPwEHACYAiQHUATAB8QDLAOf/H//G/4gAKAASAHoA6/8R/yb/xP9BAMIA0ABFAJL/Q//2/10Aa/9T/00AWwDo/14A7QAKAP/+K/9U/1v/UP+L/3MAagCK//r+ev7h/kIAfwAUAKUA0AA6/xr+N/+K/8T+ev/TAJsBqgBS/yAAmACE/8f/jQBAAN3/lP9M/xb/k/8jALz/P//4/9sA5P8b/nv+0QBPAan/9f8JAVkAov8fAAcBFAFKAC0AVAAUAM//3P+HABYBIAEJATUAU/8UAOEAVABXAN4AKgBD/yn/xP6//sT/UADE/1D/o/9vAHUAgP+x/xwBugF4AZ8Awf9M/9r+E//2/3EAbgCC/2j+3P7w/3D/sP6m/3gA2f9X/9r/WQH7AZ4Ax/+zADcC3QEdANEAhAFdAHwAMACM/tT+6v9n/5T/0ADDAMYAwgCU/+v+Mf+v/wAAu//k/j3+K/6E/n7/sQAgAagAVABYABQA+/9NAEAA8P8YAPb///4u/qD+k/8b/y7+hv5x/ykAHACP/zMATQHBADP///4LAAABhAFwAKT+iv47/+r/cwB9ADAAyf9P/9r+1v7u/y4BBgHB//b+Xv+hAMwBmAEyAagBGgG//1H/TgBWAnUDhAEa/g/9f/6h/33/uv+yAGkAtv57/aX+gQHVAoEBVQAiACcAxQDyAAMAIgBaAUMBWADH/00AuAGTAc3/vP7H/qf/hwBDAFX/S//l/4X/KP+e/4oAqQGZAeH/Kf/9/8sA+AASAOL/0gCv/8H97/7uAIkAx/7F/Vj+P//a/hL+Nf/VAH4ADv+f/nb/PwCaALsAnAD4/43/RwCEAV8B6/9f/9j/GwDMAJ0B1ADZ//D/Yf/g/u//GQELAb//Nf6r/Sj+VP9cAOb/2v5C/9D/z/6+/rsA4gG8ACj/7P72/+IA8AAwAZkB1AAdAAEAYf/P/w8B5QDP/0r/6/+3AEUAPv/C/98A6QCzAAABWQFIAV4BtwBa/8P/CQItA78Bh//I/hj/xv4R/3QAVwCv/qv9U/1t/Sr+IP/X/1cAvwDtAFUAw/+IADQBhwDE/2z/4/7q/ur/swBDAT4BFgAa/yv/3P/qAJYB/QAw/xH+2f4HABcA3P8VAIgAtgCl/5/+EQCfAZUAbv8dAHEBcgErAM7/IwASAMX/sv86AN7/bP6O/pX/1/+b/3b/yf8dAH4A+gDEADIA7ADIASQBPQESAqkBtQCV/6r+Mf9D/4L+4f7b/3sARABD/zT+Zf7T/xMAbP81APEAXgBT/zP/3wDCAJb+zv4hAPv/3f+TAGoACf+k/bf9Nv///0gAVQE1ATP/if4GAAMBHABp/8wAKgKuAaoAv/+6/78AcACi/5UAHQEYAKL/2v8XAOL/fP5//bf+pQAyAagAFgD//x4ALQBuAEIB5AFgAX8AWgC7/wb/GADBAIL/bf49/sj+ov9e/2X/qwAfAJj+k/90AbIB9gCqACUBIgETAPD/SAGWAUwAOf/Z/vH+a/93/7v/uwB0AFf+5/yg/VX/JQApAOcAdwFPAH//tgCkAakAGQC/AM4AEAAn/3P+9f7e/1P/Rv5w/mD/MAB5ALQAnAH6AYYAG//X/3sBngGqAJcAJAHkAJv/Tv79/p8ALgBe/vb9qf4Z/83+NP6a/of/HP9H/pn+xf9SAQkCdgFnAFD/bv9tARMCmQDh/8n/aP9b/07/5P+WAHH/lP6i/zMAjgAgAeb/1/6I/xsAoAB3ATgBtQDUAFMAg/+J/2cAQQGUAOT+3/5cAMYACQB+/6v/FgAxAIYA7ACYAEMAKgA9AKIAnABQABcAuP+e/8n/RwB7AC//5/1x/jH/jP5w/sv/IgBy/+H/rwAjAEL/Wf8RAOEAUQGGAU0BlwDXALEBYwEgATcBKQAT/zr/6v8WAEv/4P3s/Tz/J//f/rD/zf/v/8oAnADj/0EAHAEJARcA5v99AFAAkP+s/wkAVv8r/s7+OAEJAtkAIQD5/3//J/8u/0P/b/8LANX/Gv5c/ooAlABK/3T/NgDfAG8BugBO/wH/3f/PAH4AOv+e/7wATwAQ/+X+KgDMALf/Pf9YAHoAWv9u/0AAJwBPAF8AnP8e//n+zP+TAOL/e/8BALn/6/45/wIAhADcANAAFwCR/+j/aQBTADAAywB4AZABJwE4AK3/JwDfABoBQwBQ/2n/lP9+/4L/V/9//9L/rP/P/1MArgAuAVMBkwAVAE0AXgAwAA4Avv+5/2UAGACq/vj+VAD+/1P/wv9AAFYA9P9l/0b/sf8rAFsA9/9+/wYA4wCTALv/qv9c/9z+5f/eADQAov+M/zv/KP/n/iH/TgBGAL3/yf+D/7L/ggCUADUABQCj/37/3f9PAG4A5/+v/nL+hv+J/77+pf/+ALcAJgApANP/FQA3AZ0BygCy/9H/ZgB0AK4AvwDl/53/jwDpAJYAdwCLADsASP+T/qr+4P4L/7H/LwAIAAsA+P+Y/9b/5ABlAawAev8F/zz/g/8bAOQAIAGGAFn/1P79/yUBmgDg/yYAcwBUALj/NP/t/sT+Cf+r/xEABAAeAGQA+f9g/+7/kgCKAKMAigDH/0P/V/+z/+H/mv+4/4AAtAA3AP3/pwDWAQoCywDg/xQApgC4AFMAGADq/4H/Ov9u/ykAugC8AMIAbgBj/8D+8P7y/x0BCwEHAED/w/6O/kb/ZgBqAK7/lf9y/y3/6v8fAJP/x//t/3T/qv5Q/lf/dAD6//n+yf4X/0n/Kf+T/50AEgGKAPT/2f8/AHcAKwBNAKsANAAk/8n+v/+IAB4AmP/r/1YAUABzAMsAhABNAIwAYwBlANsA0gCKADQA9f9EAEYAvf+o/97/GQB2AEcA2v9NAN4AqwBUAL0AZQEkAVwA9P/5/wcAIQB7ADkAa//n/pL+q/4u/3f/w/+4/23/v/+S/4D+nv6j/+z/4////1YArQDm/9T+TP9OAMkAigAHAIgA4wBpAFMADgCm/1sA7gA7AH3/uP9FAEsAif/p/jX/i/+c/7j/Yf/G/l7/ogC1AFoAKAH+AXsBDwEtAfUA7wCMAGT/DP/B/+r/of9I/+L+Vf/C/0H/HP/M/3IARADF/9T/GgD4/0D/7f7E/7gAoQB9/3D+i/6J/54AxgD3/xL/7/6a/+X/Uv+X/3cA+v/e/rr+rv/kAP0Az/+N/2gAZQC0/1X/sf/wAKcBawC4/uL+SADSAC4Ao/9QAG8B+gBP/2b/7wByAX4AYP+a//MAPQEnAL//VQCfAHMAigCoAEUAjgB5Ad8ASP/L/4kBwQD3/jH/lgAPAWX/x/2q/t//rv+Q/83/LQA7AKf/mf/8/8r/vv+E//X+DwCUAboAGf/J/m//UQBrAMb/mP/3/xQAXf99/vD+KwBnAJn/Gv9C/7D/QQDiANkA/v/I/58AFAGvABIAvP8gAFwAW//K/igAcQGfAL7+UP5Y/+7/q/+k/5n/Y/8cAMsAv/+4/qb/HgFFATsAhP9fAE4BWAC8/uf+vv+I//H+6f6l/wkAcf8u/3f/4//BAE4BDQEBAWUBuAGsAScBiQA/AEkAbv8V/qT+FwAbAHf/3f5F/oX+L//m/ygBswHMAA4Azf9E/9T+GP/U/48AvgCqAIcAVQCZAAEBhwBb/7b+z/5p/yYABQDP/wUAcv8Z/wgAvADIAAMBIgF9AIH/NP/s/1sAx/+H/30ARwFXAP/+f/9zAFEAYwCtAEMAtv+P/6r/af/O/qX+5v7h/r3+O/8MAPz/wv8JACEAWgD2AEUBLAG+AP7/m//J/+L/BQBNAD0Auf92/6z/CwAaAO//9v9/AEQBCwG//zv/2/9WABEAzf+0/1L/Xv/c/xMAXQDe//b+Pv8l/37+RP94AGMA7v+t/5b/y/80APMAXAEDAcgAzwBdALP/Nf8v/+H/ggBjACsAa/+M/jr/GgDa/8X/PgBTAP3/5f9rAOgARQAd/yT/ZgAeAc8AUwDn/1T/Av9b/08A8QB0ANf/JQDR/8P+If8mAFAA4f9T/6T/lQCCABgAmgCYAAQAiQBsAU4BjQD2/8H/cv8D/0X/VADuAJYA3f98/5v/2//H/w0ALQGvAX0A9/65/qn/VwApAD8A6QDbAEsAZACXAPL/h//W/5n/9v7n/g//a//9/wIAxf+q/1z/mv99AAABYQGLAaIAcP/6/h7/dv+X/3D/AwCtAM3/v/5n/zAA2v8R/77+d/+HAIsA+v/T/+f/nv9s/wsA+wA0Aa0AiwDSAPj/5/6Z/9z/q/5N/k//dACOAID/Nf8TAOr/X/9DACcBRAEMAf3/Z//0//n/qv9SAOMAGAFJAYMAUP8V/7T/pADhAMP/EP+5/zEAIgBBAJoAogBSAIEALAF3AT8BzQC5AN0AWQCQ/yr//P5P/4T/e//x/3r/DP4f/mz/BwD8/8D/SP8p/9T/tgC0AAEArf8PALIAGAEhAfIAkQB4ANwAzwBDAAAAzv9Z///+dP9CACsAsf4e/sz/MQElAcEARQAJAEsAPgAHADEADgCL/yz/+/4n/8z/+v+e/xP/ef7T/icAOAFyAVcBLwGGAKL/lf8+AE4A2v8NAF0Am//h/k//x/+H/6P/nQB1ATYBGQA4/4z/XgDgAMwABwCS/3//Df+//lT/egDhANX/3v5m/8j/g/9EAPgAEwA5/03/Ov9V/wEAyADqACAAjP/q/wIAYv/z/xgBtwDS/97/+v/s/1UAxgBsAIH/bP+RAD8BawBc/1D/2/8lACUAFAAAAMz/q//Y/+T/AQCHAMMAggBYADwAJgA/ACAAAwBdAJQAHgBi/wX/S//m/3cAigDm/3j/nf/K/wUAdwC9AE4AO//e/rb/pwD1AMUADQB8/+n/iwCiAIwAPQCz/0v/Vf/P//L/bv8p/3D/rP+J/4b/7f84AC8A+v/F/6//vf/b/yYAuQDgAHAA/v+6/4//Zf9d/2IAkQGnAJf+TP5i/wIAIgBoAMcAuAAaAG7/MP+y/6EAzAByAEUA/P9q/yf/xP/PAHABCAEJAFf/Av/M/s/+G/+j//n/n/8u/5j/EQD7/y0A8gBOAQEBlwBnADAAzf/J/ygAVABMAOf/Ov9B/97/2f+J/+v/qgDeAA4APf8x/4z/vv/a/xIAJQD5/wgAQwAiAFkA8wANAasASwDi/2z/Mf+K/w0A+f94/1H/m//l/x4AeADQABUB9wBXAPf/IQA5AD8AVwAPAGb/Ev9e/+//QQBNAEYAEgCz/3r/gf+k/9H/9//v/7H/c/9t/4P/m//B/y0AnACNABYA0P/C/8H/9P8vACgA+//l/yMAWAD5/1v/Q/+R/7j/vP8GAFIALwDJ/77/AAANANv/z/8lAHcARADI/3j/kv/N/8z/uP/q/ysAOwAyAD0ALwAEAPb/JAByALMAwwCpAF4A/v/c//j/FQAsAD4AOgDw/2j/Qv/J/zsANQAjACkABQDh//b/KABEAEAAIgDX/2z/e////w8Akv9c/4r/p/+A/1//d/93/zv/N/+H/7v/z/8WAEsAEAC1/7D/1//6/2sAAQHxAJUAiwCKAHEAYQA0ACIABgCT/2z/4v8+AFsAWQABAKf/qP/o/z8AdQBaABwA0f9+/5r/NADCANMAsgCQAIoA2wAQAc8AhABPAAUA1P+5/5j/of99/9X+Yv6c/vb+H/9Z//b/iwB8AOr/jf+2//7/AgC//1j/+/7s/lf/n/9j/wH/4/4J/y//PP+g/04AcgDs/5f/uv8PAFEASwBAAH0AkgBxAF4AUgBpAIYAWQD9/8f/0v/v/wQAEgAuABsAtP9I/3r/UAAYATkBvQAdAOT/AAAeAE0AvwDmAG0A8//z/xMAMACBAKIAYAAHABIAUgAqAOf/CAAwAOf/h/+h/xMANAAHANb/tP+a/5v/tf+y/47/lP+g/3n/av+7//z//v/J/5H/nP/P//j/DQATACEAGgDj/83/5f8YAAEA4//E/6b/q//I//P/BQDq/7T/oP++/wwAXQCCAGMAEwDJ/9r/MQB1AHkAXwBHAB8A7P/n//f/6//t/w0A+v/E/7L/4v8PAAYA6v/0/xUAFAAKABYAHgAgABkA5v/G/+L/+f8CABAAGAAJAM7/of/A//j/CAAFAPr/8f8FAA8ACQDy/9v/5//8//P/8f8HABYA/v/e/+j/DgAkABIA9f/4//f/7P/i/+n/9v/5/wYAGgAxAC4AHQAaAB8AHwAZABkAEgACAPv/BAASABoADgADACEARQBJADcAKgAqABoA/f///wYA+v/r/+v/+P8GAA8ADAADAP3/BgATAB8AFwAIAAQAAAD5//D/4f/b/+r////5/+T/2P/d//f/AwAIABoAEwD+/wMAHAAcAAUA9v/t/9//2P/q//v/9P/r//H/8//u//n/DwAWABgAIAAOAP7/AQALAB0AGQALAAEAAwAFAAIADAAQAAAA9//8//T/5v/1/xIAHgATAA0ADgD+//P/AAAUAA8ABQAFAAYA7P/d//L/AAAJABYAFAAIAAMAAAAIAAwABQAKABMAFAABAO//+/8UABcAAwDo/9f/3P/2/w8AJAAiAAkA6P/g/+3/CAAkACEADgD1/+P/1P/e//z/EwAIAPz/BAADAP7/AAAQAB8AGAAJAP3/+P/8/w8AFwAbAB8AFgAPAAEA8v/p/+r/+f/+//X/9//1//n/9P/m//j/BQALAAoABgAEAAUABAAAAPX/9//+/wMA/P/y//v/CQAPAAQAAwACAAMACQAJAAUACwAHAAUA+P/8/wYAFgAXABoAFwARABcAFgAMAAoAAQD6/+3/4//x//X////r/+H/3P/f/93/5v/v//H/7//u/+3/5P/0////AAD6/+//8f/6//r/+v/3//n/8v/r/+r/8//9/wAA/f/w/+v/+/8AAPv/+f/1//f/8v/x//f//v8AAAQADAAGAAoACgD//wMABQAAAAUADAANAAUA/P/3//L/9v8BAAMAAwAAAPr/8//0//r/+//+/wMA/v8AAP//9v/1//7/EAAaABUACAADAPj/8v/3/+7/AgAIAAcA/v/t//n/+v/u/9//4P/t/wIA9v/u//L/7//u/+//+//5/wQAFQAKAPP/4//n/+r/7f/t//n/BAAIAPz/8f/0/wcAFgATAAIA+P/+//z/+v8BAP//9f/5//7/AQD0/+n/7P/2//7/+//6/wAA/f/o/+T/8//z//P/9//6//T/+v/8/wEACwACAP3/9f/q/+7//v/3/+3/6//l/+3/9/8DAAAA//8AAPv/+v/y/+v/9P8CAAIAAgD+//n/9P/2//X/9//6//j//f/5//r//f/8//r/AQAHAAQAAwD9//v/+/8DAAMA///3/+z/6//y//b/AAAAAAEABgAEAPn//v8BAP7/9P/1//b/6f/k/+X/6//z//b/8//1//T/7P/3//7/9P/w/+f/7f/8//n/8P/x/+3/9v8CAPn/8P/4/wEA///4//H/8v/5/wIABQD5//f/7f/p/+7/9/8EAAAA/f/y//L/9//7/wAAAgACAAMAAwDz/+v/8v/6/wMABgACAP7/+/////v/+f/x//3/AQD8//z//P/4////+//5//3/+v/w/+3/7v/1//v//P/z//T/9f/7//T/8//4/+3/+P/5//v/+v/w/+r/8P/m/+3/8f/5//X/8v/2//b/+P/8//7/9//5//7/BwACAPf/6//t//n//P///wgAAgDy//b///8FAAMA+v/4//H/9v///wUA///7//n//f/3//P/9v/2//b/8//0//T/8v/1//X////2//D/7//y//r/+v/4//P/9//1//f/9//x//b/8P/u/+//8v/5//n/8P/t/+//9//5//b/9v/9//z//f/+//z/+P/8//7/+/8DAPr//P/+//f/+P/9/wYAAwADAAEA9//6/wgACwAOABAAEQAJAPr/AwAHAPz/+f/z//H/9//6//7////9//j/+P/9//7/BgAEAAEA+v8DAAQAAwADAAAA//8BAAQA/f/y//z/AQD7//H/+f/6//D/8//z//P/+P/y/+3/9P/w//H/7//1//b/9//3//j/AgAAAAUABAAAAPj//f8FAPz/+v/9//f/9f/1//v/BgAGAAIA/v8AAAEAAwAOABIADgAMAAkABAABAAQABwAOABAADQALAAcABwAIAAgA/v8AAAYACgAKAAMABQD8//b/+v/0//L/9v/4//j//f/7//T/+/8BAP7/9//z//b/+P/8//n/+v/7//3//f8CAAYADAAIAAQAAQAFAAQA/v8CAAAACAACAAAACAAHAP///v8EAAAAAwAJAA0ACgD//wMADAANAA4ADQAKAAQABAADAAMACwAOAAoA///+//7//P/5//b/+v/9//n////8//n/+v/7//r/+v/5//b/9f/z//L/8f/w//L/+//7//z/AAAGAAQA///9//3//P8AAPz/AAADAAMA/f////z//v8CAAEAAQD///7//v///wgABwAEAA4AAAALAAwACAAEAAgAAgAGAAYA///8/wUADQACAAMA/f8BAA4ADAALAAoAAQABAPz/AgAEAAMACQANAA4ABgAJAP//+//8//3/BwABAPb/+f8CAP3/+f/9//T////9//n////6//H/+f8AAPP//f8BAAcAFgAPAP7/7//4//7/BgAKAAAA+v/v/+3/7//r//f/8P/h/+f/+P/9////+v/y//f/BQAIAAMA+v/4/wMA//8UABMADwAMAPn/+//6//v//P/3//X/AAD2//f/8v/t//P/AwAPABEAEgAOAAcAAAD9//r/+v8DAAIABwAFAAAA+P/z//L/+f8BAAcABAD5/wAA9//1/xAAFAAEAP//+//v/+3/7//w/+7/8P/y//L/8//z/+v/8P8CAAIA+f/1//f/9v8AAAAAAAAKAAQA/f/6//P/9f/9//n/+P/4//n////+/wsAEAAQABMACQAGAP3//v8AAAIA/////wAA/P/4//r/+v/+/wAA/v8HAAEA/f8BAAAA9//5//z/+P/8//3/+f/w//v/AgD7//H/6f/x//P/9P/9/wAA/f/7//r/9f/3//f/9f/0//P/9f/w//H/8f/1//n/+//7//7/+//v//P/+//4//P/8P/2//z/9f/1//r/9//3//7//v/7//7////6//7///8BAAAA/P8AAPz/+v/6/wAAAAD8/wAA/f8CAAAAAwD///z/AAACAP//+v/6//j/9f/2//j/+//8//r/9v/1//b/8v/1//H/9v/6//v/+v/5//f/9//1//b/9v/x/+//9v/z//T/8//6//j/9P/x//P/8//z//j/+P/8//b/8//6//f/8//7//T/+f/4//X/9P/1//P/9//2//b/9f/1//n/+f8AAP3//f8AAAAA//8AAP7/AQD///3//P/4//n/+f/4//n/+f/6//v//v/9//z//P/9//z/+v/6//v/+//4//f/9f/z//D/8f/y/+//6//v//D/7//v//P/9v/5//X/9//6//n/+v/5//n/9v/2//r/9//4//X/9v/1//X/9P/z//f/8//z//j/9f/1//b/+P/1//b/+v/4//X/+P/8//v/+v/2//j/+P/2//X/8f/z//T/7//t/+3/7v/y/+7/6v/y//L/8f/5//f/+P/1//v/+//4//b/8v/0//L/8f/v//L/7//u/+//8v/2//X/9//2//T/+P/2//r/+f/6//v/9//0//H/9f/2//f/+f/9//r////+/wAABQAHAAcABAAHAAEAAAABAAAA/f/6//3/+f/7//r/9f/4//f/+v////r//P/+/wAA//8BAAYABQAAAP///v////3/AQD///v/AAD3//j/9v/5//n/+//7//3//v/9//z/+v/7//n/+P/6//r/+v/3//b/9f/z//H/8f/y/+//8f/x//H/7P/t//D/6//v/+7/7//u//T/9//8/wMAAgAEAAMABAAEAAAA/v/9//n/9f/9////+//5//v/+/8AAAIABAABAP//AQAKAAUABwANABMADwAJAAUABwAEAAQA///7/wAACgASABwAIQAbABYACwDe/8H/p/+P/7v/Yv/8/wYDEQRhAYb/pf9mAD8AOP8dAPD/Av8hAGr/wP5b/xH///4b/2j/Xv/c/lD/ef9Z/5z/tv9z/6f//P/1/xUAGQBsAJEARQBMAGAAPwBhAFcAVACBAEIAMQBEAH8APwAtAEAAIABBADwAHQD1/wkACAD8/+T/3P/0/8z/pP/C/7f/of+s/43/sv/J/9b/AgDR/xMAcQAqAFUAkwBMAIgAjAAwAI4AbwArAFkAFgANABoAHwAQAO//UABrABkAHAAvAAoA6f/n//T/4P+y/9//oP+l/8f/+/4K/3r/Av+x/n//cP8O/8H/+v9BAFMAXwB3AFYA1wDyAOkAQAETAaIAlAAWAacA/P+kACwA/v9nACIA3P/A//z/Iv9U/+3/Jv/I/3j/Yv/l/1z/dP8Y/87/sP+a/zYAPwDS/wQA7gA/AGEA5wCrAAoAJgCwAJQAMgAdANH/ngBSAMj//ADu/+7/+P+OABMA0f5qAIn/bf92AMz/af85/zwAKwDI/sP/fABk/0sA5//V/8oAvP9iAPj+7f90AD7/FgC7/zkAW/9CAJ3/gv6mAKP/Gv/2APQAAQG+AH//ZQFYAK7/wACG/3P/1/++/w0ANgCX/08Ae/96/5j/Bf8IAcr/ev/WAVUAsQDHAJUAXQF7ABcB+P+M/yQBkv9b/lX/8v4mAF3+RP4tAHf+/f8f/1j/TwHX/w4AegAyAYABDwDQAZkApgDhAfL/igAa/4gAKf9k/roAZP5Z/wAAyP4W/5f/of8r/zj/xP9j/7L/gv6yAMYApP7XAAf/eQDA/0X/AwE7AAgBBf9sAH4Akv4FAEcA2wA6/6MATP/M/ukBNP8j/4wAtgHtAKT/AALd/zkArwDA/xwAd/8MAV7+uv/e/zf/3QGI//IAlADN/yUAUv/6ACX/nQDb/2v+7wBxAP3/Fv87ASgB/f8QAP39UAB+/6H+3/8p/2IBPv7i/1EBO/4UAsv/5ABnAAL/ZgKM/gX/2ABpAAUBRf+7/kH/rgDW/3L9i/+iAbT+yv73AYP+jv+zAQv/Jv/Q/7AC8QHqAFIB6P/A/9H/iv8B/ov+ZP9T/eD+GwCU/gr+DwBnAFD9SgAkAk7/+/62AaMBFP8YAXcBHgCrAaoAfAEoAdEA5QKwANcBGgL8/+f/BgGj/+z85v9S/jX9iP6o/Z3+uf6V/7H+J/6eAe7/mf5TAZgCmQAf/lgAGAAhAB0Ahf9AAU0AFAKF/43+ZQFBALn/6P0UAaX+9PwXANj/pQBY/okAjQBZ/7sABf81AY8BfgEUAdD/r/9W/3cAtP5g/9EAV/+E/07/4AEVA3QA3gEZAVD/hQDdAM3/Of8OAd3/bf4a/nz+tQEiAKf+HAB1AfIAof5VAJMBWQILAQQB7gHF/iEBSwEa/5/+f/7S/17/9f6A/xsA9f5T/c78K/1J/Wz+nv+Q/rr+owBXAdIALQHWAioFpwLR/1oAcgE3AQ//mP7F/qT9a/s4/VT9j/11/1D9Xf/TAJb/oAFeAcr/CQAgAZsAPgGlBIECUgL/AOL/fQBS/zYAyQDPAOT/MP9l/S/+X/7q/dn+XP8nAJr/ev9V//n+oADlAD8ATQKxAdsALwFOAb8BgAFSAmoBdACe//j/JQAj/kD/QP5K/+H/W/4c/j7+egGZAMkAOAKcATsCYgBY/woAVf+F/vX+IwBeAK0AtQB8AUoBegDbAfT/e/9n/2z+8v+n/4D/8ADp/4j+j/99/r39gACuAbD/e/9gAG7+wP2W/6cA+/+eADwD/QA//qj/JABc/xf/KgAQAAD/BADC/+P91P4RAGb/Vv6i/pcAYgBP/+v/7P8X/9L9Ef1D/rr+wv7u/t/+bf6n/g4AJ//B/xEC0QEyAJgBTAPcA/UDWgIoAR//RwDB/XH7J/6I/zD+5vtI/hIAEAHdASMCtQMzA0IDCQTHAxkEMQS7Arn/fP/S/zf+of1p/c3/QQAN/rb9Bf72/XL9bf5z/iP/xwL5AEP9ZwCDA24BQwCPAWADIARaAR4ApwDN/50Akv/E/Pf8sf2b/Zv8zfrS/CP+U/ww/t8BbgJPAcEA0f/n/+wA6QGrAwcCEALMAPb8m/7U/9X/qABT/0AA9ACv/wUAXQAcATsAM/60/cH9tv0D/T3+ywBWAaEA8f7i/gYAkAHOAqoB7gFgAYv/VvwB+wn+FgCcAPr+1/9HAPT+LACO/7D/6gJYA9IAIP4P/aX+k/78/d4A+AFkAd8BbP93/f//KwPjAsEBzAJCAx4Bbf5x/1ABiAEuAV3+cfsP+6z9iABzAU0E2wd2BzQDuwD8AfkBkQGoAEf/yvwa+pP4C/fr+Pn8DAHDAbQAKgOEBBoDRQG//5QAbwA0/bL6AvqL+/n9b/2x+0j+fQFp/xP9LgAfA9cBdQA2/7T8bfsd+377vP2s/wYCAgSgAqkBLAIbAggDkwR/BXMGjQU3A5sB9P+cAAQDVgUnBgUFjQX/BF0DOgJyApQFQwUNAsr/o/5z/uD8afxi/0EChQJIART/tvzC+7b7M/uW/FIAogCw/dP5J/a49Br0cPTU9tH56fqQ/Ir++/sh+AD4ufrK+6j7J/3v/0gBsgBaAZr/gvz9/Pv+qwF8A/UEXggeDGIKCgXfAyED4wJCBcsHawnMCdML1QyoC44IWgVoBHAE7wTCBNMEWgP+AWEBV/9w/58A8f+UALkBtgJNA0kCuAFLAJP9S/oi+Fj3K/aV9E/0Affs+Jz4Afh396L3VfiU90P1xPQW9h33QvYS9Yn3yfnE+K34Sv3xARkDxgRYBzcIVwcUBmkFGwYoBc8DHgZMCHQHYwZnBgMHLQisBwoHLQgFCQMJ9QerBhMHCAicB/MGfAfqCK0KDAoEB4YF5QRpA9sAn/4O/uv8jvtT+j34XPgZ+ef3gPai9Uz2Q/Zl9Wr1uPSJ8/PxvfBZ77jtV+3j613sA/Fs9XD5hf96BqgKqgqrCQkKyQjSBF8C5wG5AFf/xP17/YD/LwFxAiwFsQiODA0QVhGVEJ0PZQ+rDZAJfgdvCE8IMAaAAZEBrAd7CToH7QYQCuoKvwZ6ASb/kv55/Tj8pPvF/Ff9s/zF+/75F/gj9k/0NvK27k/sueo+6QzqAuox6Qjqf+uB8JT5ywFOB74M5xJ1FcQT0RHLD3kMRAc8Acr8nvmM9VPyT/FL8Xrzvvd9/DICewgDDoQSqBSdFOET+xFmDrUK1Qe3BC0AEv11/Cn8Bf28/tYBvAUdB3MHCQltCRoIywYKBlkFBwTvAbcAmADL/nH8bfq395T0E/H+7Fzp9Ofn5/TneufI5gPnIudT6f7zegKaC+0TNh7HI1shIRuUFWUPjQaY/a74SPZp8p3tv+rR6s3sOPAe9Tn64v/NBjoNNhEME1AUrRTyEtgOOgq2BpgCD/6/+uP4qPm6/AUA0wJ1BeIJhA3FDIoKFApeC2cK9gavBXAGeQVlAsn/xf6Y/eb6C/eJ8q7tHOmP5WPifd/33VnePuFi5fbowvCQAaMT5h3vI9AqoS0rJ7EbSxH9CIj/U/V07gfs9Orw6FDngOhP7KvwufRT+Uz/6wUfDJAR0hRkFgcXQRTGDlMJbAMo/jH6yvaV9YL3bvut/wsDeweIDaUR1hGPEAMRyBDZDOIIKwc/Ba4BKP5Q/aD9avs3+Pf1jvF+6vDjad+i3VLdtNzu3UTiC+gx7jH2VgRQF6wlRSwcMm81FC7HHvYPhwQi+s7uVeaA5CHlNeWG57/r+e+m9Hn57v15As8GjgqPDSQPtw72DUwNOgpHBXEA2/uL+Dn22PRx90r+mwSpCJINDhR9GKQYLxaFFB4U+g9iCO8D7AJyAUr+V/tO/JH+LvwD+HL0ze1c5e7dxddz1WHWCdin3GzlLO/D+FUCEQ6NHfYoGSvHK6UsgCXqF0kLaQFr+QnxyOmp6Gzqw+q/7MTwJvRf9/L6dP6EARUEwgbOCAYJRAgwCS0KuQdXBEsCQf+H+6f4n/jd/BkCBgbICpgRRBhyGwMbARrEGPQU6g3eBvsCAwBx/K76xPuQ/i4AiP7Y+8n4tPHx5kncQNMYzX/LnM4l1sjie/KDAJ0KHRNMHWEm5Sd9JPYiICD5FmQLXwIW/Af2X+/Q60btMu8N7wrvy/AX9CH3evhj+iv+zwDTAZcC+wIrBUMItgfIBT8FwwO5Adf/Dv50//oDqweuClEPVxVjGkUcEBwbHBYbZRV3DI8FDQE8/Az4Fvc0+nj/kQGG/8D9h/vF8wnnrdpS0pzOWs861FDe8uyP+1kG9A3WFeAd8iC6HgEdxxuTFgsOawbCAbr9xvcP80nzEPS68fHuou458C3xc/BU8XH11fh0+hz9tQC7A+0FuAYvBlcFBgSKAuMBFQLGAt8EeQj5C5EOjRFEFQ8Y3xiVGOgXsxVYEbgMRAldBg8DNQD5/xcC0QJkAED9UvpX9c3sNOIM2U3Ux9OB1jjds+dH9N4AMQriDjITthd4FwYUSxJ+EJULEQWZAGL/wf2L+an2hfar9QbzCPEs8ZbyBfP38vn05Pc9+SX6sPsn/Yz+2P/DACcCqAN3BEoFBAaaBd4EBgZLCaILOgx1DiYTXBZkFnoWuhiRGswY5RRxEn4QswsABU0Ab/7m/Gz55vTN8azvFevk5BLh998R4YHlSewE9On89wXZC8wMQAtaCm8IswMgAdcC1QILAOH+lP+q/yD+ovov+MH4m/i09c3zH/Ss9D71P/ZJ90z48PiZ+EL4XflC+1/9p/9tATIDnQXNBhsGdgXSBcgGpAcuCCoKdA76EiEWQRlgHacgeSDgHAkYFxOmDEcEbvyy9/31p/Tg8Y3wS/Jr80HyNPFy8XTzhvak+JX6x/1GAN7/IPz391H3CfjB9jf3BfwyAWMDMAM0A0kE7APJADz9vfsI+7L4tPUq9YH2L/eu9hP2qfbq9zz3JvXx9Iv3jPp//Fz+VgG1BNUGEweQBtIGCAgJCVIJ9gm4DD0RwhShFg4ZBBxVHKYYrROJDxwLkAWLAHP9k/w+/Ij6Lvkt+kj7uPo/+ef3Hvh1+Zf5OflY+pn7jfpV94DzJ/Ff8ZTy3fPp967+mwP5BK0FSgdeB/0DPAAa/pv7cfjy9ebzdvNi9Ej08fOb9LL0QPRS9PL0rPZw+UH8Df/wAa4E2AaPB58GtgUuBlkHdwcvBz8J1A3YERUUTRYZGXYavxivFJgQSw1MCfwD+P/o/jb/d//b/+4ApgInA0AB5f4v/Qv74PhK9/n1H/Uc9DDxvO0Q7fzuyPAK8+/3mv3AALYBUwIiA7YDMQM4Adv/t/9R/gL7rvj091P3CvaJ9PvzbPRY9M/zW/R79nX5W/yF/oIAAgNTBbUFXQSIAy0EGgWHBWwGVgnvDeERaRQhF+wZ6BpsGVwW4BJlD7kK8wTHAF///v5//nL+k/9iAVQCVwH0/nr81vpK+SX3J/Uv9Dv0R/Ri8nnvOO/L8XbzC/SE9u/6Ef6g/kP/fwLhBfgFxAQ4BUAFWAJ8/Sv5c/Zt9B3xJO7B7srxVfO189z18vmC/SD/JABLAjwEAARxArwBLQITA/sDrwV1CXwOYhJiFWEZfR38HogdnxohF4MScgzWBb0A+f1H/NX6ufrA/KX/TQFpAQEBdACW/u368/Y19BXyKfCj70bw1O/97hnwpvKk9MP2lPoO/8sBugLgAxQGmgeiBkcEOAOSAkv/9flx9hr1hvN+8fnwbvKy9Ej24fYP+Or6o/2r/vb+8v8+AXoBsAAMARcDKQXqBoYJVQ3lEeEViRjGGrQctxwnGsgVOxEoDYoI2AIa/ub7Lvvx+sz7pf2P/w0BywE5AUD//vtQ+C312fIs8bPwfPHl8Wvw9u4l8BPzVfWM9z77fv9VAocDsQSCBkUHqgUeA88ByQDV/R/5zPUH9XH0LfLL8OLyAPYY97/3IfpF/QT/+/6i/nb/tgDyALoAQwK8BfYICAvPDfcRaRUEFw0YXxn4GV0Y9BQJEXoN5wmjBacB+P8RANr/Sv/e/64BPwMLAzkB1/7l+7/3ffMg8Avu1O0c7xnwf/B88crzovb7+BX7Kv6LAeEC6QFIASkChwL3AKD/AQAeAKb97/mo9yP36vVo8wbyT/Nw9TH2CfYg96T5ofsk/In8B/4fAHcBDQJ6A5EGlAnvCsIL8g3NEIMSPxODFDwWrxbDFO8R+Q9cDl4LuweqBRoFyQOaAboAfgHuASIBqP8B/uj75/gp9Rvyq/B18CrxAPOk9Mz0w/RP9kL4Sfld+mz8F/5K/nn+PAB5AqEDiQMCAzcCkAB+/T36APhi9nD0s/JR8mXzYvSm9Fv1OPde+Rf7P/x7/Xb/kAFxAvkCAwXIB+MIywjfCVgMTw59D2ARbhR+FgIWfxT0EyETaRCPDGoJYwd9BSkDdQEsAYgBLwHO/zr+lvym+dz1TPOH8ozyOvNr9E/1fvV29Yj15/WS9rv3ZPkT+xH8/vzz/rcBbgMnAywCFAKSAcr+y/qS+OH3DfZH87Xya/Rz9fT01vT89dD3Vfk6+lP7nv37/yIBQQKQBHUGDwerB/YIOgqtC5YNrA8WEmsUPBWlFPwTXBOnEQwPYAw4Cu4I3gf2Bd0DngKcAfn//v0C/DH6Zfii9nb1TfWw9Wz2GPe49lD1pvQr9X31HfXZ9V34vPrO+2P9NABLArkC8QIPA00C2wAs/+T8afrt9yj10PIa8kLySvKa8gv0Y/be+L36VvxR/mYAwAF2AgkDNATYBd4GCgcVCFoKKwyCDX8PcxGaEj0TnhOzE6ETchLkD3oNoQvPCIAFpwO5AtgAof4T/X/7j/n092H2CfUM9Tv2cPdU+Bv4o/aI9X71VPWb9Z73Nvpa++z7tf1hAEYC3wK2AoIC9AEyAEj9H/sT+n/4Hfbq9Fr1+vVv9V30tfTC9i74XPg1+U/7Hv0c/s7+NgC0AgoF3AU6BgIIjQo7DJUN0A/AEswUJBXqFD4VoBTfEZMOFgxfCUcGugOnAQsARv8s/ln8bfsC+/z4wvYP9tP15/Wy99D5JvqO+YL57fmD+pH6ffqq+3X9uv16/e7+cgGqAjgClgHIASEBHf6y+nj5+/hc98718vX19kD3bvbC9Tz2W/fa9+b3A/mM+4/9Ov5r/yYCRwS4BGUFggdyCXIK5guMDk8RDRNvE1oTnhM7E8kQrg28CycKjweyBJwCDgE0/xv9Yvsr+o34Q/aV9HX0N/Vg9uP3Dvkn+fL4T/kG+jr6hPrZ+679kP79/ocACwOaBEUEKgPkApYCfQBd/ej77PvU+jr42vZb91f33/XU9DX1V/Ya9yf3mvev+fH73fzZ/W4AMgOUBDgFjAZ+COwJewp2C4UNaA8FEDEQmBD9EKIQPA9FDegL9QrpCNAFZQO+AaP/GP24+gL4KvUN8+fxp/HP8hj1s/fH+cz6QvtK/Fb9wf1Q/pb/qAAIAS4B+AFmA1sECwTAAywE3gPlAdL/t/6Y/YD7c/mB+FD4cve39V/0UfTZ9DD10fWU9wL6Hvy8/cz/XAJpBGIFDAYkB2UIDAmXCfIKrwx1DX4Nvw0sDtMNtAyRC+UKIQpyCOAFewOBAQ3/FPy++eb3XfVi8rPwwPCX8bny6/TK97/5ZfoW+0X8m/25/mn/GQBLASwCwAJZBKUGmAd2B14H+gavBdgDuQHS/1z+zvyf+pz4gvey9i/1+vNy9Lv1ZvYu9zP59/uL/sQAyALoBNsGyQfJBxQIqQj3CIkJ+ApPDOIM6gwCDYYN6g27DHgK1QhnB50ETQH+/lL9MfuQ+JP1ofJY8OjuK+7M7vLw0fOF9oP4kvnA+pv8Rf4Y/9L/LwEcA8UEBga2B9EJ9wrECswJoAgcB+cEMAIwAA7/mv1a+5r51fge+Pb2LPYY9kn2XfbR9jn4gfrO/Mn+6wB9A3kFMAanBusHDwmDCXMKOAwyDcoMPAxfDEgMPguTCewHbga8BEECq/8K/qz8L/pJ9wf1z/IJ8Lvtz+xh7RnvvfG69GX3CPlF+j38p/45ADEBwQIVBVoHNQmsCukLpAxgDOgKHQmYB98FWAPLAC3/I/56/F76H/nz+DT4tvYY9qL2Gve099j4ZPqL/DP/mgHsA3sGbwifCbQKeAv8C54M+wxmDHsLngqsCcYI6QdEBjAE2gIaAvAAgv/5/Qj8lfm39iXzke/67ILrxeqO633uEfIr9N/1DPmq/AT/wgBiAuED/QVDCD8JYQr3DKEOnw2BDFEM3QrDBzsFuwMFAoX/9vwD+8X5ZviY9k/1afUn9pL29vZ1+PX6c/2k/zMCCgVaB6UIcwmCCtYLVQwQDCIMXQyFCwgKFAmNCDYHFAU2A+UBdQDL/g/9Vftn+S73uPRV8uLvn+1M7A7snOx97l7xE/RD9p74L/vV/WcAegICBOkFUQiVCo0MjQ7bD+EPIw9BDmAMFAlnBW0CxP/8/Jb6Ivkp+BP3DfbT9TT20fZ29zn4rPk//PX+KQG1A5kGZAhcCQ8L2QwhDcoMNw3FDTkN7gtZCo0IcAYXBIABNv+r/aD8ovvY+kX6Yfm491z1nPIK8NPtP+yd61jsGu6T8G3zx/Zu+uz9rwDcAiEFbQcYCbEKAQ1ZD3UQnxD6D18OEQwSCS4FvAGp/+z9uPs1+tn5pvmy+KD3IPfe9ln2PvZH9zH5ZPva/aEAzAM8BzAKvwt5DIgNfg6zDlYO5g1EDcwLZAl1BpcDlQBU/YL69fj799r29/Xu9Rz2jfU99NjyZPHe78Duoe5k7wrxS/Mb9pn5lP3mAAwD/wSJB98JVguiDHEO4w+yDwkObQzlCngIUAW2AtwANf93/f77JvvP+j36FPkN+MD3vPeo9yv49vmC/CT/uAFBBJgGmAjeCeAKTwymDTEOQw4PDvMMpAoJCHAFfgJo/8/8d/o/+In2gfX59PH01fTm82nyUfGh8CbwHfAW8eHy7fTZ9l75zfxuAAYDrQSmBioJCwsODN0Mdw2EDRUNBQwvCvoHuQXuAgUAP/6k/cj8UPsq+uT54flq+ab4dfgF+eL5zfo5/FD+ygBCA2oF/AY+CI0JzwqpCx0MKwzZC2QLUAryBwoFbAKq/zT8uPga9rP09PMn8xbyz/G18pnzw/Me9AD12PV69k73ufgv+7v9M/9dAKECCAW0BmMIIQpPCxgMcgwvDJQLngqOCLUFewM1ArwA6v6f/SX98/yM/Mf7MfsL+yv7fftI/Gb9k/76/34BowKdA9sEBQarBkQH8AcwCOYHXQdgBgsFyAN9AosADP5k+8/4dPZt9JzydvFU8RDyVfPw9Hf2Jvgu+gX8Gv0Z/nX/vwCxAaYCpgOjBKQFewasBokGjAbOBgcH7QY7BnYF+wRaBEsDXgKhAcwA8/87/3z+/f0I/if+BP4Q/p/+Vf9QAPkBlANyBMUE5wTeBKMEIQSaA18DRQO3Aq0BgQB2/1r+8vwv+5L5cvht96v1mPNR8hTyF/I78hXzBvWL9yb6sfx7/zYCbAQdBkUH4gdoCIMI7AcQB4gGAgZZBfgEowS9A7kCOgIDAqEBIwGjAA0Ao/+U/7n/EACAANgAMQHXAcYCuwNcBOEEjQUhBjUGAgbDBXMFpQR+A1YCKAGe/+j9Qvya+t34dfeI9sv1P/UX9dH0ofQP9aP1vfUo9lj3w/gw+vX7A/45AHgCdQT2BUMHFAgTCKMHkAdyB60GnQUBBaIE+APiArcB5wB2AOv/Of/V/jf/zf/w/w8A1wDaAZYCPAMMBP0EFAYPB58H6wdNCHEIBAglBxAGkwSkAmQAFP7T++b5Pvh99ov0HfNA8nDxsPC98KvxGfOP9MT1Offa+e/8Tv8+AY8DzgVSB0sIDwmWCbYJ9QiQB2kGiAUYBAYCJwAu/5/+y/3H/CD8A/wk/B/8Rfwu/dv+hgDYARADdATuBWkHmAhWCdYJOQpBCs4J9AjfB30G1wTJAngAOf4Z/Kz5FPcJ9Qz0p/Mk82Py/PFg8mnzQvTk9Bb2L/hN+hL8Ev6lAEkDngWlB3MJGQtnDN0MoQwIDB4Lhwk9B88EqwKyAK/+sPwO+yP6/flT+tf6b/tY/H79tf79/24B5gI7BFEFOQYlBxoI0QgZCfcIjQjnB/QGYgVzA1sB+v4g/Br5TPb08yby4/At8Cvw4fAN8mDz8/S99rL4tPrA/MP+vAB6AvMDNgVQBjgH8geFCPYIRgl1CU4J9AhjCEYHjgVlAwkByP7W/EH7B/pY+Vv57Pnd+iX8tf2P/38BOwODBFsF7AWDBg4HMAfdBkgGnwXOBMoDowIuAXf/t/0O/Hn6BPmt95L2wfUq9bP0fPSM9Bv1Sfb+9wn6YfzN/hgBEAO5BAUGEgfNBygI5QcdBy4GYwWZBOgDRQO4AkoC4wF5AQ8BwABqAAcA1//M/7//0v/Q/9f/BQB0APwAWAG0AU8CHwPRA2AEpASABP0DOQMpAugAdf/5/Xj8GPsF+lD5yfhA+LD3Offv9uX2HveM9wL4vfjg+Wr7IP3E/kgA4wGmA2AFtQaIB+wHGAgYCNoHTAd0BjUFvAM/AtgAj/+I/uf9mf2d/RP+5v70/x4BJALsAoYD6wMCBOADrgNSA9YCYwL0AVoBygBkAAkAnv8k/4P+zP0a/X78t/vs+lD6u/kY+Zz4VfhI+Hb4yPgt+c75u/rE+7L8iv1j/lP/XwCAAYgCcwNLBCkFEgb+BpcHpQdTB98GNAY6BegDbgIEAa//ff6U/R39Jv1n/dX9lP6k/84A3AHDArEDvASpBS8GXQZIBuYFHwX6A5ICFwGH/9z9Ifyd+nv5xvhk+GL4xfhl+QP6jvoc+8z7jfxV/Sf+Gv8wADABBALQApYDNgR8BFkE9wOoA08DvwLzAUoB4QCjAG0ALQADABgAPABDADoAOAAtAP//r/9X/yD/Bv/0/uz+/P4q/17/oP8KAKUAVgHuAUcCdQKaAqMCUwKLAXAARP8s/jH9P/xn+7f6M/rN+YL5bfmv+Tz6Bvv++zD9hP7I/9AAjQEzAtYCXAPKAysEiQTYBPUEyQRvBPsDegPjAjcChgH7AJIASwAaAPP/5//w/w8ASgClACABogEQAlICfgKGAngCQgLYAU8B3gCMAEEA/f+f/yn/pv4w/tj9pf1//Uf9+Pyg/GD8R/xg/Jz82fwP/VL9l/3V/R3+j/4p/9L/egAJAZMBLwK5AgcDGgMbAxsDHQP7AqcCMAKzAS8BngAEAJH/Q/8J/8v+rv7T/iv/if/I/xQAjwAWAZ8BAQI/Ao8CrQKQAikCpgETAXMArv/B/tb9Cv1r/N77bPsl+xD7Kftm+9L7cvw2/fj9ov4n/5b/BgBdAJYAtgDsADgBewGrAcgB4wERAj0CXQJ6Ao4ClgJ9AkAC/wHCAXsBKAHOAG4AIADj/67/hP9p/2v/gv+q/+H/KQCKAPkAbwHlAUsCkAKwAqsCiAI/As4BPwGeAPP/LP8+/jf9Ofxt+9z6gfpI+k36f/ra+mr7JfwL/QD+8/7r/+cA3AGzAlgDxQMDBBUE/wO9A2wD/QJqArgB8gBBALb/Wv8r/xf/HP8x/z7/Tf9r/53/0P///x0AQwBtAJIAoACWAI0AhgB2AE4AHwDk/77/qv+a/5T/kf+M/2n/MP/s/p/+Sf7t/Zb9R/0H/dT8uvyy/Lj83vwU/WD9u/0m/qb+M//Q/3MAGAG1AUYCwQIOAzcDPQMfA9YCaQLcATYBhwDV/0j/0P54/kP+L/44/l/+oP7+/m//6f9vAPoAjAEDAl4CkQKdAoMCSwIAApsBNgHJAFcA3f9j//X+lP48/u39qP1q/TX9Ff3+/Oz84/zx/AX9E/00/YT9Kf7d/jr/Uf9l/5z/DgCsADkBBwLVArkCFwKqAVUBDgHbAGcA8P+3/2r/Ev/m/vb+1f6P/mj+KP7//ff96v0G/lX+yv4+/5L/qf+R/5H/t//s/xgADADa/5z/Wf8R/8n+gP4t/uP9n/18/Yj9q/3N/ej9A/4d/lH+kP7W/iD/Y/+y//3/TgCcAOQAKAFnAZgBsQHDAbcBpAGVAYYBgQGSAbEB2QH2AQICCQIYAigCLwIpAhcC9AHIAZ0BeAFOARoB4wCmAGUAJgD1/9H/uP+p/6L/ov+v/6T/mv+M/4X/iP+J/3n/X/9N/zX/G/8J//T+4P7U/sf+wv7Q/u/+F/87/1n/cv+Q/7j/3f8NADoAXAByAGkATwAxABQABwAFAAAA9v/t/+3/5//l/+T/3P/l//3/GwA2AEwAVwBsAIgAmgCkAJgAhwB9AHEAZgBYAE0ATABVAF8AXABRAEkASQAyABoAGAAlADoAQgBHAEYAYwCGAJgAmQCgAJ4AkgCJAH0AfACOALkA0ADhAPoAFgEnASsBOAFBAUwBMgEaAQ8BBwHsAMgArgChAK4AtwClAIsAhgCIAIYAfgCIAJwAqgCvALAApwCqAMAAvgCJAFYATwBmAIYAjgCvALwAmgCCAKIArwCYAIMAfACWAKMAgQBmAHsAdQBcAGcAYwCBAJcAhQCWAJwAsADwAAYB+QAOAdAAmwChAEIA7P/l/5f/Of9l/3j/Lv8i/zP/S/9K/2//BQB2AGQARwAnAZUC1gKPASsAm//q/xQBZwEjANb/XQCvANcAXf+R/pb/RP9r/gkAOwA/////CgD8/34A6QAsAa4BuQE0AYUB6QEUAQcBTQFnABkApf/r/mj/BADO/wv/cf4Q//D/u//5/z8AMQCiACIARAA/AXYAcgDpAHoAEQHPAPD/uAB3ABwAcQD+/xcANADx/wQAlf9m/3b/Uf91/5j/Yf8x/zH/C/8v/2//Df9D/9r/rv+r/+f/+/8dAB4ALgBeAGAAOAC7/7T/NAD9/7f/GAAyAPv/EADL/3X/ff8t/+/+JP8+/0v/5P6O/hr/DP+L/q/+pf6L/sr+ff4w/qf+Af+4/nj+2v4Z/xf/Zv+B/43/8f+c/9b+QP+h/1f/UP86/0T/hf8G/47+g/6V/jr/Lf/I/ln/o/9b/0v/1/8ZABIASgBUAPH/FgBkAM7/wv9OAIcATwBw/xD/Ef8h/0D/YP5m/gL/fv40/hD+Gf59/r/+Iv/x/mL+zf4y/8r/xP96/pn/iwEmAJv+KP/F/2oA8/8p/zUAxQA7AIMA+QBOAYgB2wH3AZUBUgL4ARYBRgK1AukB/AEYAjUCCgIdARgBfAH2AKcA2QChABQAm/+V/1//9v7Y/mn+4v23/Xf9//wN/Sr9k/xH/IH8lPzG/Nv80PyO/TL+qf2x/RD//f8QAMT/VgANAj8C6wAJAdgCVAPOAIv/GAMsBS4BCP76/0kCmgAq/b/9nQCl/z79Df6n/xEA8v8M/9H/4AFyAcMAigK2A50DKgQWBJEEoAYfBiIERwXCBoIF6AO2A2AElwP2AMH/fwD5/2P9k/ug+8L7xvpB+WX4U/he+Dr4dfgR+b/5gfoz+3f8jv3X/SP/sgDNAHkBOgMMBKAEOgVVBVQFagXsBdwF9QTCBPUECAQvA+MCmAJMAtwAvf+rAAwBLv84/tD++P5o/s391f38/gQA5P8uAFsBOQLgApwDFQRSBPwErwVzBSYFXQWyBJQDSgOZAjkBmwDo/2n+Q/1s/Fr7MvqF+Yb5lvhv9pL3T/y4/Er47PfB/UcCTgGa/XT9PwO4BdQAdgB+BQsFtwGZAhME+gNCAx8BAwDqAc4Bqv4l/tj/J/+M/eH9gf5s/lT++v1n/qwAcwFIAFEBxQM1BNMDswTqBD4E3QQ2BUgDggK/ApMA/P6b/7H96frz+yn8QPi19p75e/rK+Cz42/Z4+Dv/kv9Z+fb7nAMrAh//sQSGCJ4E/wNGCU4KRgf1BZ0FbQa9B/IE5gDmANsBRQCh/SP8AvxQ/L37Qvv8+wX9Hf5l/ir+Nf85AesBPwGpALIBIQT1A+sBeAI1An3/vf+3AGD8e/p5/fz5dval/FP9M/Eb8Ir9rP7c9Dnzv/qw/zj/cP0w/V4CVAgxBuQEZQvDDGIIzQliDHwJagbABYYEfgIRAaT+w/to+z78Z/pX9wT4X/t9+1P4Ifie+9L++/9OAPwAPwKpBBQHFQfVBf4FjgVAA14C6AJDAf/7R/ht/G4AyPg97pLwLPvd/dP0Yu3E9B4AyPzD9NH6SAO9ABoA3gXECIUJowkMB4AHdAtVCpMFAgTpBMcElAHD/ZP9NP6e+7H5fvvo/Fr8PPwV/XL+WAFlA4wC1QEbBM4HDQlgB+sFewU1BFEDHATTARn9CvzS+/D7hv9A+wju9vAvAXb+be9m8Dj7Qf0y/P788vpF/nUHNwdkA3QJXw4wCjoIFQulC+4IEwY+AxICzgIEADP6gPjK+sX5vPWn9Lv2S/jN90v3Wfkc/aD+t/8aA4cE/wNPB7oKfAhoBmEH9wWlBZ4I3QP1+kD/4gfj/43xAfOW/Vn+4fWN8YP0Y/l4/HH6/veh/M0CqgLKAaoECwhYCmIJ/QbYCAYLPAcKBKYFZwXqABv9Vfw4/Tn9PPr/9sH3Yfuo/Eb6LfrU/sMC4AL6AoAECQe9CcMJnAhQCW0HeQOABboHwf/W96L8FgK2+oHu9u3Z+Gn8cvEb6eLwy/s9+in0Gfh6AJ4B/QBOBkMM3w0oDV0MhQ6QE5ESgwpaB+4LmQzVBFD/1P/g/vL71fqH+LT1B/dW+HP2j/aA+pj9O/7n/aIA6AVsBvUEGwh6B3kCQAXlCKMDeAHmAon7pfmbA1L/I+0z7UT+h/6a7hvqzfaI/pD4S/RB+MH/wQVWBC0CQQm8D4gNPQs6DR0Qrg5cCowIHwj/Bf0C7v4F+hv5kvoc+JDza/OH9vX32PfB+ET7Wf2xAJwEGgW/BF4I3gtrCuMIHQnyBrQGeQnlA9b5pf0XB3D+hu0s7fr54fxU8rbrr/AR+AT6q/Xb9DP+SQX+ASYBxgeGDnkQQAuECOcPQBPZCr4G9wldCLIC0f6y+yn6pvre90DznPKY9LL1TvQT9CP5jP6O/oH+FwOxB7wJRwpJCncKgAhZBaAHJAswBUH8UP7cBE//be8z7Cb6S/318HTqrfID/HL5V/N0+EwBtACR/ysF2wr5Du4PxgoHCmESIRRTChgFvwj2Ce4EHv/z+lP5MvqO+P3zhvJs9Jz0CPRq9q/6t/xn/Db+ZAMEB8kILAySDOcGowT2CTQMUwaf/5P7yvvBAZsAJPEz6C7z0f0F9gjpz+rv9738Hff39e37fgJxB5sHzgazDfoTCRDaDGEQtBGnDuYL3gjGBboEkAKB/If3bvcb9zbzmvAS8m/zmvLs82T4NvtM+zP+3ANIBhwHiQsXDqAK2Ai3CmcKGQm9BgX/y/rJAYUFUfrh62PsyvlN/VXw6+mI8qT5dvpM+R/6sQGECFEFxwQKDugSxQ8XDSoN6g4dEJMMbQYlA2ECOADE+6r36vSb87fypPHE8cfynfLG8lb3Hv2t/9cAHAJaBFYLzhAXDSQJnwoyC7MLcQyDBTf8Ev47BS4BwPLH65bz3fuf92juJezi8vj6jfkk9UX8EgbwA5QBQwpaEtERXw02C+gO+hIfD2cIiwYgBisDo/8e/Ez4/PX+8z7xkvBv8kvzI/KD8vf28vyL//b+mP+/BAcLQgzMCQwJjAnnCZ0Kiwl7BGz+/P2gAzQCavOk6vr0Af+R9/jqp+25+t37DvSK+ZQFdQPK/lQGDg9oET4Q7Ao1CYoQSxNZCy0G1wanBE8Bbf+M+5v3rvXO8sXx2PSA9Wny/fHx9cb79P4t/TL+DQZpCRAH/gpZEL0MgAn1DEMNfQh/BUMEjgJm/077UPk298rxrvAD9EvxBO3e8Qf3qPTZ9AH7Uv+vALMBBwTpCWQO3Av9CbsOhhE4DC0HXQkfDIQGrP3e+3f+vPxp91zzJfIC9ET2vPW29F/22/p//0YB7wD3A34JMAu+CUQLRA6zDE4KCgxiDM4GhwEpAowD9P5t9970uPZn9oLyj+/H7nTvGPNY99P1JvKi9rwApwSdAGX/5Ab6DiYOfwgvCb4PRBHvCh8GgwZrB+4Egf/p+mD6BPvK90rz0fNs98/3rvXE9rf72f8mAID/MAPjCYkK/wXdB1EPbw8SCDcFKQkjCxAGQAD/AH4Cjv0h91T1avdB+nz2oOyp68z1Ifr89uz0xvKB9QQCuwiXABD8OAUPDuENIQrPCPAJxAooC9QLTAijAfkABAQLABb6Q/sg/YH5VPV79Vj6Hf5C+S7zA/oiBT8DQfzI/ykHCAgmBuAGHAhSCAMIhAduBwcH/wQHAoT/wv/AAMf7lfON80X6kvro8iPu9fEt+Zj5W/IQ8dv7hgLU+5L5NATWCiUHXwQgByEMzw8PDQwHjgfPDHAMGwazAfMCrwQOABP58vkh/z79UPZa9B/4L/x3/RD7c/jG+nIADwSABKsDYAKgAwQI8Aq2CDAE4AKrBqgIJwL++mv9gAFv/N/0iPSw90f3zPJH73TxWveQ+r/4DPYi+ZsAIANwAjIG8ghXByALKxKGEBUM2gxQDXML5QpsCG4DkAGIAcf9xvlM+aP3TfTo9EL3p/Uz9FT48Pu7+s76YP/LAkQDOwQAByUJ2gmBCuYI1QRZBM0H9gZF/zL4uvhn/yoAB/QG6KHtuP0p/wfu6efK+CEFkf3h9/MAogjOCHoL5w6BDqQPYBE2D0IOrA5FCpwF6QUCBA793/nC++L4TfBg7Z7z1vfC8wnvhfGj9277X/2r/hX+OADSBrQKKArMCjILLQh3B3QKYAmyA3b+mvox/LoCT/386bPnZfwrARbuwuYY9TH/m/0x/OD8wwLODEoOVQozDhwVSBV4EagOSxAxEtQLxQPBA+ADo/5u+pn14u8P8Vz1TvOW7iftQO8w9UH7/Pvn+DX6/gEFCNkIIAvfDH4Jtwi8CykLLAolCAj+6PfEARMImfmm55jqEfuR/Svv4ujY8RP6+/x1/en8zwAbCagNZQ3IDUQRHhU/E7cO/g6tDyoLVwZZBP8BkP1G+P308/Ne8obx9vA47iHtofBh9Ez3DPr++o786QDuBTsL3g2gCgQINAp4DCgNpgs8BNP7If5rB/gEyfL55h/w9v1P+4TuUOlh8BX89ADQ/Pv6iAGBCbgNxA8uEfkRrRHzECoRkRHaDlMIiwPYA+MDDv4T9kjysPIw81zxOu+w7rLu3O8I9WT64voh+/j/mgSXB44MBhCJDi4MhwrtCbwMqgwsA+b5Lf2aBUUBLu/J5cLxZP2J9Rjpner789T69P7K/pn8JgFSCwoRLxE3EdoR1xF+EloUqBNzDX0FKwMLBvoDQ/pT8uzwz/Ea8T/ub+w/7rbvHu+h8ZL49P1I/1D/EwIsCekQGxMIDzIKwgoHEI4QEgmyAJ/7qPyEA3gAFO3O4ubv+/uG8z/n4+hG8037Jv6s/bT9zgOWDVUR5w95ErUWLRVTEtYTxhSSD3sIlwUlBY4C5Py59I3vHfL7833uK+ri6yvtC+6w81n5tvrR+2f/QgRpCvoQahOGD8ILMQ7+ETQQzAokBID8U/xZBHwAo+zl4lvuAPjp723kCudd8ij45fef9+n7rAVODlkOdgzXEksbkxo1FSYVaRa9Em0OxwpdBUUBSf049nzxJfH67hDrL+nB6ATrtO5G7qjvtvhm/4X/gwL2B3oMsBPBFxsT5A8aEnQR2A4ODdsErfr8/VwFk/mr5KvjYPJJ9erqyOFB5JzyjPvw9dX0Xv8dB7wLLRHAElcVBhviGdkVuxiDGXARbQrGCO8FXAAw+g7zUO467hjtFOn152Tpneh46t7xn/bp9yr9igIDBYgKghE2FSQWWRNwEYUVxhVSDmEKOAct/kb8awKO+UPlIOLn7xL0Xedw3XDl7PN99gjzevUc/QEIuRCnD/EOnBi/H7QcRRkHGfQXnhXOEKwJJQVTAaP5bPOK8V/toegF6SzoreRS5y/tmu5V8V/34Prh/0wIPgu+DPMTIRgWFi0VNBQJElcSZxBpB3L+rv0EA+H+/ugr2y3quvj+6gHbAuAb7Hfz4vdn9Xn0TAH3DqIPVg/KFpocHxwwG7QcVxwzFwAQIQtLCdQFU/5L9mXwDu3d66jq++cL5qvlIOhI7mLzWfUj+dn98wBUB60PZhOYFZcX2hTOE30YqhdTD2IJ4QSD/ij96f83+Ujmndnp4jX1OPMN35XW8OSK+Kr9M/dd9hYBhQzcEp4XaBqWGiobYxxMHbsdmxrNEXII9wUsBxMC2vWU7PrqiOsR6jToTOfM5VXmi+0w9hT4+PfA/YIEegnJDxUU8BTYFn0XkBW+FigXdhBOCuUH5ADo+fL82fwh7AvaK90v7ufwxuH02a3iqu9++cT67vWQ+0ILXxLgEnIXfBuhHNke7R+NHA4YkhXyEcIJ1AEq/2j8I/Tb64bpwumK6OHmZ+YQ5i7oUfB09033s/i3AecHBgq7EPEVJRMnExgYiBfFE9oRRQ1GB/UEyAFw+lb1evRO7eHdstuq7Jry2+L33N/osPJS+S3+if32AbAMgBEwFgMeUR1XGUscdB9SHdEYJhPpDUgJEQP8/az6sfLA6rLrI+zS5ZLkn+tW7k3sQ/AG+L77Hf90Ba8HyAfIDjEWZhSIEg8WGhZzEvUP5QtJB9IEJAEh+uHxDe+19HPwfNnE0lrqy/Zg567cgOPc8Bj/CgUp/s/+3A3JF9gZQB1zHicbnxqNHTIeTBonEuwIWwTfA7UAFfm08LTq+OnL6yPqDOe85ljogu0x9n/5vPcm/NoFYQqiCm4NhA8WEJAVyRroFdAPThJLEyEMTQWHAQn8jfUB8X7sU+hD6N7pYOIA1gDc6PM5+hrnGOLn+N8LuAluBRgLixJDGB4e8B8BHc0bFB3OG/kWohFjDfMIwwID/OD20fMP8WXsN+fz5qPqvevI6hbsc+9e9Ur98wACAMoC8QowEK4PTxDNE6cVqxbiF/IUag//DGYLewYUAL362PUK77boT+kx7NrlBNzx2SjeE+d18obwt+TH7BUHUw+VA7kF4RaPG7MXrh1fIwoc9xb6HYsf7BJgCbYLQAsvAKz2/PQ99Ivwjeuc5mXmEewk75TsPOuH70/4NgCbAL/9WQMED0cTsQ92EJUWARmzFYASHhKKENULLwiIBbn+bPa08oruFuiE54DpoeJn13TW5OLb7jvsx+L955X8zwgMBW0Duw0nGb4bJRueIH0lnR4JF68cuSBMFbAKMQsyCZ4A3Pqc9nvwI+3R7HzpG+Tw427q5e4o7Gnr5/P8/Mv/xAEJBpwKnBDVFUkWHBbqGEMasBcAFr4VZxHgCQ0GiQOL+37zxO8f6YrjX+f85CPVRNA434joN+Ip4Yrs/Pbu+XT6hwCYDcsV2hKXE1YegSQHI5kiLCEFHSMcGxx5FpkMRgS4AdkAKPkN7tvqjOv85pfjF+Za5KXgeOil8t3x2vL8+wUBhAT4DdMT2xMnFyYcPx72HZIb+hkUGvkUCAw1CSAIlf+R9kLzkOt54Nbe8+C73KrYt9bG1X/dCuld6O/jDO3D/McDYQO2BVcQpRxIH8EbGh7UJOknyCUHH5IZChvmGbMQxwgpA1z7RPn3+BDrMd1j49LrsuOZ3KXij+gC6qzvO/YG+FP8IAdIDzsQQBPDGxMfxRvtHlklyx/jFmcZsxpQEHwH9QOA/EX1XfEh6RbgadtW2D7ag9zI1N/Nr9R24HDnC+yE8BL0f/eGAIASxx0vFa0N/xz1L90uBSOiH+8jgybaIjEZgRE9DwUJaP049zv2aO/u5JvfQN8G4N3fLN3E21riUe378Unxo/WQAUYNsxLwE98WsR32JSQpASQpIBEkOyMvGdoTDxLhCTgCwP5Z9arpyubl5OzcfNVC0hLRvNFX1DbYydeK0+DbK/Of/Yf03vQqCL4ZKB1YHEokqC5aLDgnSDH/OKsqgBsTHecb6RBvCkIF//bz6xnr7egq4lTb19R60+PZnt+Q4Dzhq+R87qD8XAQaBZ8JOhVBIWgmDCWlJbgqqiwTKvsn4SPkGp0UuxMVDRv/c/bN88nsvuEr2snVudLS0JvN+8oZzwLXltde0onZOPFp/3H2Y/N3CCgcCh7mHSsjwCgcMPwz6C2IKd8rKCfxG/4XDRZEC2n/dvns8/7sD+YL3xraJtjU16TZtdsE23/caeWt76D0nvlwAsgKnRLsGscf9SL2KC0t4SsdKlQqbymvJN0bjhSIEqIMMP9S9+D2EfCO4l/avNg31k3RMM4OzfDKds2Z2vXhldaS1CPxbwl3/rn0AgnEHSwgViM5KjEpoioENZk3li4BKV0n+x8hGe8X+g5B/UT2TvjI71LjaN8E24HTL9N61tHV89b63NfgzeR/8eX+KgF/AmAQLR66Hu4fhCskMdYqhSoCMoMv1iQgIXYe5xKPCSMH6v4R8m7sb+oY4pjWFNFF0QXOQshTyaDNJs861uTe2doo21XyPQimBnIBOArrGkYphi6WKyUqai4FMz0zPTAvLPojLxmKFHMTjAlV+lzyWu4E5uneQNwq1t/PctPP2H/VG9Xb4Cjry+369KcBxQrbETMZxR0ZIxErGy5rK5wspjACLpAlTyCqHZ0WcQ32Bmv/nPU88QDuyuAM1dzYwtsxzzLGI8zx0D7QBdeU34LdEN2m6877jwF7B/kNFQ7kFhIsuTImJt0i9i4JMkkq6CdcJsAarBHfE1wQ8gDz9xr2pusz4JLhLOKx1xbSE9bF1vjYBeUk6GrgfukoAe0H+gA7CcMbkSGSILsn0C6LLdUsay7kKuYmNyeoIHQRZgtIDgoGWvPH6nzrhuXN2p3UttC3zcXNCM07yxPORNU23IzfzN875Sr25Qh5D/4LKQ1rGjQpwSwZKq0rfC7tLVAsqCnVJNIdfhNuClUJ1Acb+0fsw+dX5/7hdNoC1m/VXdU71NHXyOH85tDlfe12/nkHVwcfDfIXiB1NI1otqS8aKdIovy+CMFEoxB+zGmIWlxF7Cjb/Z/X/8uDuc+K32YPaCtjTzqnL/M+20AnPNdPD11fZJ+Th8xvzQO9DBCAc5BdSEHwe+ytdKjUpPS0gLX0ptyRUHWEaNxw0FD8E3fwS/Cn4avI/6uvfgNtQ3jXeQdhy1WfbrOKE4tbix+2g+X/8AAGmDAgTmBWGIEwqnifHJYgrZS0WKxkqvSShG/QYNhhzD7MC5/q390jyKujj3g/cD9sG1hTRu9DN0AbR7tSp2MTbdeQL7Z3vwPamAuUEoQVKExwgRR87HY4gJCQJKgQuZSZkGrcahCPjH9cMJQGgBW8Frfdc7NrokOQ/4W3fktfa0evYRuBT3RDdf+b67srz7fuZBF8JyhCaHHcjSiQCKV0wzy93K4otpS5aJt4enB2jF+cMNQf+AMr0NO336tniPthE1NbU19T+0XHO6c5701zYl9x84aLpbvEu8afzoQcPGZ8RiwYfEi0m/yvIJAYeeR5TJOQpHiccHDsURRMpENwIRgMO/enyZepj5w7lF+CB2kfXV9dh2Sbcwt9G42nnee96+an/kwQUDsEY7h1yIEYnTS87MCItcy0WLYApWSe0I5ca6BLFDt0GEv2Z9z/yGulR4Wjeht2V2VnSec4c0v7XhtmO1gzWJN3N5uXr8/DA91f4mfkiClMbmRgcEW0X1x8VIq0nrys1I4oaxR6GIygcsxA6C8wI8QP8/M71cu+26aHkVeFB32PbzNeb2X7e2+BZ49vqb/PD+I//Ywr5EuQWqR3LJ88qDChBK2MxcS9SKMYkSCTlIZgb8xJ5C0QG3QCh+RHxNun44/3gnt1v2d7Wa9e+2DzY1tgK36Tkc+If4P7n5PPT+Eb2lvIY9/IGHBPbDdQF9wx0GHAYixXuGtoelhlRFgoaVxq1FaQRhQudBKEDwgPm++/v+ukq67vrCucA48vjhuVx5wDszO+f8g/6ggJCBYkJuhSEHNEbShw6I/soTii2JdYjdiC3HbgdghsOFOIL4AbiAmT9bPcd8gHtVukJ56TjzOAI4nTjwuGD4efkZ+dt5wjpLOw46t3mMO4/+VP5Z/Nb8Zr2/gN7DfkFA/5RBycUExQ4D/AT7BtrGOoQyBT7GTYSwQnnCaEGfv7W/Lz9FPhv8FTuDe/V7nPvNfF38G7vsPQd/qECbQFQA0cMsxP0E24UbBrtHkUeaR3THrQfBhztFYgUNRX+DiAGJQTLBCn/Q/YW8jvxOO8m7YbrGecr40bmqetR6vzmh+lf7I/qlOpU7l7uJOq76rPwKPN98U/zwfRx8RX2KweCDtYB9PrGCg4apBRXDE4StRgkFLkRwhYEFocNOQiPB3oGMARSABj7Cvi299z2y/Wz9Yn2RfgP+Zr5Yv8QB1EH3gQLCiASbxRrEwQVdhfSFmwWhBmuGWgTeg4RD/gOwQqxBYsCUgAd/kX8P/r99pjzuvCS7fTqeunG5y7m6eWX5drkeOYf6XznhuPB5ZHrguq65i3rMvLk8yvzQfE18RD9UQ3rCtb7SAAgF2Adew+GDekbVx7pE/wTDRyXGbQP5wrYCosLJgoVA3r6aPgl+zz77fbq8iz0cfnI+q34O/weA6EDrQLOCLwPoxDeECUUZRY5FsQWSxdiFaUUzRZDFRAPZwy9DN8ITgJa/sb7uPd+8gHtFelN553kat/d3Kze9N7X3IndDeDS4KHilegI7QTriOqr8oj4X/Wt9ZP9Uf9F+/X9rgS4CBcMbwtgBWwHvxQGGdwO+Au2FY4XLhA4EXIVDA5/Bq8KIw00BqMBlAJiASn/TP+a/XP79P4oA6v/4PubAWIIyAYiBb8JJg50DmIOARF/FBwUDhGUEIkRcBCKDdgJhQZqBKMAzfoL+Eb3lPLs7DDrBupg55vlP+Rc4o/iH+UL50rnTOj760/wQfK89Ab5LPrf+Jn6sP29/Zn6bve59+/6efuj9irvc+3I+IYE7P3D8VD4DAmcDcMIJwqYEq0WnBOjFJ4d3yB1GiEWdxa3GFEcVBmPDQcI5g0BEEkI0wF/AuMDywAb/d/+mgEp/n/5b/v/ADUDqACh/Xj/YARsBLAA/gCsAyECA/9e/8sARv+a+2H4vveA+R35u/T48OzwzvKO81jxLO6F7tXvt+2o7ZjxAfIn8L3wZvHW8VrzG/OM8YDxZvO79C7yK/AP9ev1Dusn6XD7wwd2/a7zivzXC2QTdBPOEjUYhiF6JYYlLCl3LZ8rOiWqIackiSYLHu0RtA3ODo0M5gRk/GX3g/Un8xXwF+8Q7xnsZegz6uLw9PTR8y3y5/Rj+zkBjAM8A0oCJQPXBv8JGwnFBqAF6ANxAvQCEgJX/jr7aPir9Nfz/fQW823udeoy6W/rje1U7Ozqt+tP7Q/vdO8+7nHvG/Jj8bbxbfXu9CzzQPhW+c/wvPH+AecJTQLg/0AKbhP5FckaZiPyJ4goyCkKLJowcDXFMkkpJiNXJH4lZB5TER8IGwWtAW/6E/JZ69vl9uCf3gjgCeGy3RXbrN9w6DPuxu998Wz27/zoAqgHaArpCtILVg52EA4RJxAXDSYJAwf9BTUDQP4I+mf4efb/8Sbuoe2T7Wjrtej55xfpMOqT6Y7psOys7wPvb+7v8JLzB/Xn9r74yfiA+EL83wDA/Sv5qwDtDbcSTBARDq0QABs7JYEmOyRxJpUr9yxYKYwp7S71KqEcMBZGGWAWtAwQBKH73PTD83jxfOiQ4U7htd+K2yTdeOKK4sDfq+Pn7HHzyPZ7+tb9cQGjCF0QdRJoEUoSbRMsE0wTgxJfDX0FRAH1Ad//yvj588Pxau1F61DtMexa6MzmZefV6VjsHux27I/vGfIR9bb5VfzV/TMBHATWBGQGEQkrCcMGEwZkB94I3QlqB/8A6v+uCusVgROrCXQIWRJXHMkecxzOGnUaFRv5HEkdMhrNFJkNEAfOBJwE7P+x9uLv4+0F7P7n3+SN44vhBuAx4vDmC+oW67rtl/MA+kb/vAO4Bk0IWQvxDz0S1hAEDmILdgnkB6kFNAJf/QL59veX+Av35fQI9HDzs/Nz9XD2zfXH9dX3QvtJ/ZX8HfzP/r4BTwLFAkAEUgULB/kIbAh+BzgIqwerBQcERgOQApj/Zf3nAJ0CCfwK+AT+SAZoCpwI0gJEA5gOURfQEwMOgg8uE08SvQ8mDx8NrQd3A6sCHAHH/Nv3NfKO7FLrS+0k6/7k5uL+5vzqwOtP7BPuDfGi9lD9AgGVAoAFVwkgDGsOXBDpD3kN3wytDZ0M4wpqCTQHkQVcBC4CSABl/13+Gf2B+8/5QPmB+Qr6vPpw+p35t/k1+739MgB0AJn/uwDOA0MGtwaIBY0EXwXOBl8G7QM3AR3/qfwY+ov4WPfC9Vj1GvVR8v3xNPulBXkEWfz1+30FiQ5lDhMJ0Qc4C7INmwynCX0HKgW0ABD9xvzV+yX4ePRy8Qrvfu9F8cTwUO/Q8LL0WPdJ+c39jwJLBH4GHguZDlcQIhLPEvIRWxKEFOUU7BC5C6YJzwjqBYUDvQK1//b59PYQ+WX7nviO84ryzPVD+c36CvoY+Xz7/wBhBJsD0wJwBekIcwlVCCUJOArFB2EElQTqBJ0Am/sh+m74afV+863wmOu86Evrge5i6y7kVOSX7lD2CPTv7znzh/s6AoQEMQZfCkkNgwwuDtQTdBWCEkwQ4A4xDvoOXA2xBiwAtv96ASf+zPhR+D76q/ld+Qf7rvtT+zr9VAAHAjYDzwRgBZYFQge6CRoLAAvaCekImgk6CvgIogYJBA4CCQIVAl7/8Pyb/Xv+XP3Q/PH98/7R/nL+//1h/dX+jwGiAYEA0gDFAMX/sv+T/wn+v/v6+db4TPdy9Vb0i/Jf7yftUO0S7gDtpuqd6rjsF+5M7+/v++158e/+cwavABn/ogg9EBoSeRNsFtwaTBtVFxUYdh2ZHYkXNhEqDaALOgt9CB4C8/qf9kD1zvQd9DTyF++v7CDtcfGU9ej0VPOX9mz8LAETA/QCoAReCcsNhw/UDugNpg/WEZkQyg0cDEoKPAcyBAgCUQDK/Zj5ufUb9WP2U/ax9FvzWPNT9Yn4qPmA9+72RvoQ/cb8uPyG/RP95fy9/qH+Cft5+U/7+vuE+of5gfl2+Xr4Fvdt9yP5E/sk/Hz6Rvgb+4MBNwOZ/p38iAKRCX0JgwVhBk0MWhDDEP4Pyw7lDkcRpBGfDYwJBQkpCeMFZAAw/ZT89voa96TztfL/8ivyfPAg8PDxa/Rv9sP31fiM+6oAlwToBeIH+Ar5DMsOGhG2EWoQqA/sD0gPJg2zCrEHgwPb/8j9RPsS997yavAj70juVe6m7ibuRO6r8Gj0Svcy+ZT70f5+AagDRQaRCJYJ+An/CUkJpwjACNsHrgRDAb3/3/7A/CT5wfYz94H25vLw8S71pffX9z73gvaz9wz71/3Y/kL+BP8NBPUGOAMpA08KxgwhCQIIrwgPCKcIuwrZCvcHQQUwBAcD2AHyAXMAfvuZ9+73Y/l2+GH1LPO38y/2M/j0+Ev5evqz/Nn/CwTHByMJCQkoCvMMIA95D6MOvAxFCgcJbAiLBbsAkP1J/Ff6afeB9QD1QvT58nzzmPUo9+b4oPvf/UAAowPxBS4HLglrC6MMrwxcDIEM3wufCeMH/gYGBcQBrf6x/NP7rvrw9+n0HfS59LLzt/E08U3yofQR9t/zJfIN9h35HPai9aH6cfww+636j/nD+m3/vACq/p8A3AU7B44FrQYQC84NmQx7CQcI3wkaDAULOgeOA0cCXQPSAhH/3/ur+mb5Q/hV+PX3U/Yx9Wn1A/bq9xf7Pv07/lcA0gKCBMMG5gmPC6ML+QzRD5EQwg6xDSMOmg77DTwLpgfYBRcFvwNmAsUASP4g/Iz6Hfma+CT5PPkQ+Fb3CPhx+Uv7mfxw/HT96v/8AH8BnALzAf7/hP/j/xb/lv20/Hr8y/qp99b2Sfhi9zH10fWS9hn1TvXr9iP2tvUk+M34Xvcm+Kf6Bfym+537uv4CAgMBzP60AF4FxQiTCRYJvwl2DNsOow8KEDMQVg9YDlsNTQsBCT8HzgQQAkYAzv6K/GP6bvnw+aj6p/oC+w/8JfxH/ND+zAFUAp4CeAQDBrUGBQgUCRMJSAmxCb8I0AbJBaIFMAQIART+PPwi+5L6XvkF9x71W/VI9lr2U/cb+QX5NPkI/A3+0f1U/hYA1QAzAL7/nf+g/j/95Pyg/Of6iPjN95L4VfjC9sb2cPjM+dz70/5h/0b+QAALBPAExATuBpIIYgefBnEHDAhACEkI6AeJB7QGiAVnBcEF4QSjBAQG7AXnA2gDpwT3BIoEoATLAxoCVQKrAzwCQP83/v798Pxo/LX76/kB+Q351Pcn95z4rPl0+Vn6UvzO/RT/lwC8AeAB1gLjBRAI1wbvBMcEiAXJBr8HoQYcBNYCrALMAQcAV/7C/OD60fiE9wX3O/ar9Jfz2vO/9D/1XfXG9VT37Pnh+wX8Gfzi/YAAUAJfA8AETAbZBukGbgdfCNQJZQuxC6AKGgoXC7kLywriCYsJKAhcBnwFMARDAuoBEAJoAJH+mv4R/z/+3fyE/MD8kfwy/Hz7UPrN+Yj5bvj790r5v/qJ+nD44/ZJ+Mb6k/pP+Nn3+fkT+9v51fm0/Dr/y/8OALMBFQV1CBEJkAe+B6wK3gyPC9UImgixCYoHrAP0AoADTQGk/RL7fvlR+ZX5ffcz9Jbz5PRO9VD1BvZB96D4vPkL+2/9DgDUAbUClgPzBQ0JmAkVCDAJoAxeDVcLnAplCz4LuAqEChEJGQedBtAGlwWzA10CQwGa//r9m/1W/Zr74/lc+Qr5tviJ+N33Rfdf91b38fZf94v4Afnn+Fz5MPpc+xf9If4V/mL+sv9sAesC0wPdA7QDXgTUBbsGgAYoBiMGxAVXBW4FzwSgArkAPAC//9H9KPuB+SD5Yvjp9j/2a/YV9uj16PY7+D/5q/oJ/OX8U/4EAIYArADhATkD0QNcBGEEUAMDA5AEgQWCBKwDWgTdBPED9gJ8A18EAAQxA+ACegIgApACBANrArQB6QEvAnMBlADpAEACKAMSA4MCIgJDAtECHAPRAi8CPwFPAP3/v/+0/p39R/3h/ET8dPwj/TX93PzZ/Jr9xv6l/zwAowCwAFoBvgIrA+YCXgN+A28CwAFeAQoA/v4w/+f+Nv2d+3n6Rvl7+E/4iPc49rb1u/Vc9er0p/Sm9GD1rvZN9yH3+vZ89/b4AfuA/F79SP5p/44AFALkA3wFtgb2BwUJiwn6CckKXQtJCwsL4gpRCogJ8Qj+B4cGlQVABacErAO7AuIBZQGTAbYBfQGOAQoCkgIrA8sD7gPtA18E/gQ3BQkFdgSWA7AC/QEyAdv/Tf5b/db8xPsZ+vH4wPhG+eT5rPm0+Kn4/fkE+wL73vo3++H79PvP+oL5gvkF+vz5mfkn+TX4Mvf79sH3bvjl97/2nvb793T53vnd+av6fvxV/oL/YgBsAZcC9gOzBW4HRQhJCGIIMAkfCoUKOwqtCYIJ8QnVCYAIOgdMB54HLAeZBmQG4AU3BR0FNgXdBF0E9QNqA/kC0gJ6AqQB8ADGAJoA9f8j/2b+Ev4F/tX9Jf1i/HL8AP3L/M77XPuD+4v7i/uP+wL7XPpw+rr6UPq/+QT62vr5+i36Z/l7+RD6gPqJ+jD6n/lL+Wn5xfnf+Xb55PgH+df5dPqM+tb6p/vU/CP+K/+3/5cAPwL/AxgFAAYhB2gItQnlCsALWgzFDAsNdg3PDaYNDQ1YDHsLpAonCm0J+gd8BsYFKQURBAQDOQIhAd7/JP/B/iT+jf0Y/Yj8CvwP/CP8yvt8+5T7ivtH+2L7oPtS++v67vrM+iD6svnO+bP5Ovka+Tn5//jj+Gz54fnf+Sv62/og+zb7yPtl/G78XvyW/K78ifyk/Lv8Xfw0/M38M/3c/M78uv3f/tf/5ADsAYcCXwMaBegG+wfZCN4JkQoPC/ULuQywDGYMpAy8DCAMWQvNCi4KfQnxCCUIBgcdBo8FzgTNA/oCVgKGAWsAaf/S/oT+9P0H/S782fvK+3r7zvpE+hv6E/rH+Vj5LvlN+Ur5DPnY+Mb41vjh+Mb4pvjO+E75lfmQ+eP5m/oj+3f79Pua/FX9C/6D/sH+Iv+m/9f/z//m//v/vP9n/03/Vv9N/0n/OP9Z//z/+ACpARwCAANhBK4FhQZHBzgIDwmKCbgJzgnsCQIK2wluCcYIHAiJBxQHfgaoBbsE3wMMA0cChwG+AO//Uf/V/jj+i/0n/eL8jPwb/Kz7Vvst+xP72vqJ+ln6VPpJ+kb6ifoA+yz74Prh+pb7O/w0/Ar8ZPzU/Kn8T/yM/EX9sv2Q/Vn9jP0u/t7+UP+a/xcA3wCrAUECtQI1A7oDEQQyBDYEHQTyA60DbwMcA8cCaQIyAisCOQI+AlYCngIHA1gDywNfBMME5QTOBA8FaAVVBeQEiAQuBIQDzAJMArkB3AAVAIf/rv6a/Qz9/fyb/L77FPvL+qX6e/pf+j76EfrZ+bf5xPkA+iT6FPoI+jf6jfoB+5f7CfxO/Mb8n/11/vj+aP8fANwARgGhAQ8CUwJpAqwCAwP8Ap4CYgKAAs4C6wK5AocCkQKfAqoC+gJNAzMD+gIhA10DKgPVAugCCgOrAicCGAJJAiQCywGhAacBqAGnAbgB1gHPAbcBxAHfAaoBRwFLAXMB9AD7/37/mP9C/1f+p/1+/R/9W/zY+7n7ivsc+9f6u/qZ+nj6o/r6+iD7Bfsf+5r74/u6+8b7ePwb/Tb9Qv3K/XH+7v5t/w8AsAAzAZcB+QGcAlEDuAPmAx0EWgR8BKUE7QQCBbYEXwRKBEoEEgS0A4MDewMmA5oCWwJGAvkBogGlAZoBHgHAAOoA+gB0AAIAFAAJAJn/Vf9O/w//mP5q/lv+Ff65/Xn9Pv30/MX8qvx5/Fr8Ufwa/Of79/sl/An86Psc/ET8JPwg/Gz8qvzJ/Pr8LP1S/aL9DP5N/nf+6P5y/7L/0f8ZAGMAnQDXAPUA/wA3AaAB4gHmAf8BMwJ5AtgCQQOTA94DQwSUBMMEDAViBYYFgQV7BVAF+AS+BJYELASaAyIDqQL9AWgBFQGzADEAwP92/yX/3/7D/rb+j/5L/ir+If4T/vv93f3A/Z79Wf0Y/fL83vy9/Jf8YPwW/OT71PvR+8L71Pvt++P72fsS/Gz8sPwD/Xf9xP3v/Uj+zf5I/6X/8/9FAHgAmwDqADcBTgFfAY4BzQHvAecB+QEiAl0CgwKKAnYCbQKBApACmwKBAkgCCALUAb8BmwGLAYgBXwEyAQsBCQHpAPQA/ADmAJ8AWwA/ABUA3v/E/7L/Pv/P/rb+1v6W/lX+lP6b/mr+ZP6N/rT+wv75/g7/wP6+/gP/Dv/e/p3+f/56/j/+DP7o/b39uv2r/Z/9t/3X/fX9BP4w/oL+vP4W/6n/7/8MAEgAogAfAWYBxQEoAjgCVAKLAsYC6wIBAxEDEQP0AusCzwK0AoUCRwJLAioC1QGEAW0BQQHaAJEAkAB7ACwA2v94/yP/Cv/q/pz+df5a/lH+Af7F/bz91f0Y/gv+5P2p/cL9/P0N/t793/0d/tT96P08/nj+sP7B/ur+6v5C/7j/oP+1/8//AAACABkAUwBhAE0Az/8YAHIAfQCRAGkAtgBrAJcABwHhAEcBFQFhAYABRwGMAQwBRgF7ATABTAEhAeYAFwF6Ab0A1QDHAKMAUAGIAP0AYQA3AKcA/v91APT/CAD0//v+4f+f/4//KgBi/z3/wP5u/pr+sv5u/oD+vf5R/Z79qf4q/+//u/7z/dr+Vf4S/wb/Ff+h/4f+U//2/mj/aAAmACgAWwAvACgBigGAAZEBuQA9AVsBZgFWAW4BuwCMAJ0AoABSAdUAJwHPAC4BXAEdAesB8AGwAcgBvgHNAfMBdAG0AaUB7gAjAbcAHADBAMn/jP+B/8j+Sf/9/tj+Bf/O/pT+MP7l/Q7+BP72/ar9Wf2a/Vn9rf2X/T39dP28/QX+6P3z/Sz+Sv6X/qz+q/79/gr/av84/1P/3//s/0IAUwCEAIwA1wDvABwBcQFdAcMBCwIDAjgCegL0ATACfgJcAr0CngJAAkQCggJUAi8CTwJPAhcCvAHWAcoBtQHcAecBpAFEAS0BMgEgAfoA5wCjAOUAHwAhAE8ATf+B/1//KP8t/4D+kv4O/vD9h/7F/U79ev2q/Xf9ZP02/S39GP3K/Kn9wfw5/Yj99fx5/U790v3e/dv9QP6U/q7+YP+A/1T/AAD2/1wAaQCOADEBDAGHAcwB7AFpAn8CwQKaAgQDhANRAxoDNgOCAzEDSQP/Ah0D/gKFAjoCiQEeAlkBMwFLAdgA9wCKACwBVQCfANUA6P+DACIA8//Q/5z/2/8R/z3/Fv9T/vH+Pv54/jL+Kv5H/oX9aP5Y/XL9M/6W/f79sP0F/gf+7P3s/dL9Vv6j/Yj+kv64/jT/6/4y/yT/av8F/3r/af+g/7D/0/9hAEoAEAG+AIYB9QGNATICxgEJApoCPwIrAi4CRwIeArYBDQIGAskBCAIQArEBaAHkAVEBwwFYAawAqQGjABgBSAAWAN4A8f+PADYAJwDq/5j/1f+I/v3+MP+B/hf/6v66/tH+g/5K/hX+Uv7Y/SL+Iv7s/SP+hf20/dX9Dv4T/pz9Uf6K/qn+Z/59/rr/Dv+T/yf/lf8oAIr/OQBv/9r/GACZ/9r/9P8qAKwAQQBFAPcAnADkAOcAUwG2AcQBqQGAAWgCdgIVAg8CTwJkAm0CSgLoAccBUAF3AZABJAEqAbQAZgCBADcACwBrAA0AhP8FAGT/ov9f/83/3v/f/pX/9v6S/yr/+/4s/8f+6f/n/r/+w/6//vf+JP4u/yP+Of6Q/Wr9Tf9p/hz/4P7f/z8A0P+xAD//5P/I/23/c/+U/n7/GP9r/zT/qP7//0UAtf9V/0YADwDK/1IA3f+UALYA9f8aACABJQGjAG4CywE+Ab0B2AARAbAAFwEgAYoAcAE8AMQA0QCP/14ACwDHAKIAvAA+AJn/1gBqAI0ALADj/8r/zP8WAFv/bv/y/5n/7/5v/3b/EP5h/sf+ef7k/vL+fP/i/mT/fv+7/hv/0P7S//3+Uv/KAGD/9v85ANgAbwCPAG8Bu/9IAAYBuADg/z8BSAG5/z4AbQAhAOn/tQAsAOv+rQAbAJn+dQDm/xgAIQA0AEkAHP8sAJH/AgDRAPn/VgD2/6AAKwB+/2AB7f/x/xsAMACNAGz/AgDo/0kAVf+M/2EA0P9AADAALwAtAKv/7v9j/zoADQD8/iUA5/+L/7X+Pf9o/zD/0v9g/z7/Yf9i/xQAdQDW/8T/cv+h/9r/fgDdANn/6v+DAFUANADdANgAWgAjACgAJAA2ACkBbwDm/xUA5P+SAC4Ax/+I/6n/OgBV/7z/LgB/ALwAWAAsAPz+dQDQ/5//hQDm/5wAWf/e/7AA3v9FAB4AZv9M/1b/S/9G/9D/UwD1/wMAEACH/5//6wCnAE4AxgAxAB4AFgCOAHcAwf85AA8AUf/q/9b/Mv+I//j/9//g/7P/0f7A/nD/kP9C/1sAEwCM/07/5P5I/43/kABWAMv/7v+EABgA+P8XAW4ApgD+AMkAMAAdAC0BWgBcAJ0ASgBmAKf/kP9K/5r/ogDH/ycAjQAjANH/r/80ACYA+wD6/1IATgCu/3wAcP93AIsAKADJAJP/jf9+/4b/BABe/0j/Jf+//wAAHAB5AEgAoADQ/1j/0//8/9T/1f+kAKQArv/2/5H/Hf/U/wQALP/a/iYAff9S/qj+/f4A/wf/bv9d/0n/QAB5AND/QwCUAKMA2gDMALEAwwDnALQA6v8aAIwAMABRABsARwBmAP7/ZgBHACgAJgAKAA0AX//2/4gAUwB6ADwAoACiAHwAzQDnANUAGgHAAFsAnAAqAOb/Bv/j/h//gf6s/jn+Qv79/YL9Mv6Z/YP9Hf7B/eP99P22/Yn9Gf0E/TH9yPyq/J38K/y4+pX5Dfr8+lD9Zv8IAo4EMAa4B/oHyAiDCXoKKAqgCZ4JHQisBh4EVQICAcT+hf1w/Db7j/r3+br55vov/f//tAIdBVgHjghWCSMKaQqnCuAJuwi1BrUDxQEO/zH8T/rM+Br3MfXD8+3xKfBl74vuoe2V7fDtk+2K7of0gPxqAfMCTQVFCEMKGgwkDgcSyxUZFV4P7wccAvr9cvoM+HT2tfRM8ojuCurL6fDt4fJz91X8twCTA2QHgAtcENEWRhx6HmkeixyNGEQT3w4vDGAIGwQa/9z4NPQn8qzwA/DH8lH2Mvh0+Vj6efrn+pr7FPsG+0f7YPm79Sjz/vCF7xX0+P3wAzEE8QSkBZEEywSXCEEOShOYFRYSAwpfA/b+YPnA9Jj0JvU58u3saenq6Qnt0fKH+Yz/QAbfCz0Oag8NEuwUfxWaFKASVg+qC3IHLgOo/0L96/rS91z1qPTb9Y/3u/m3/P3/7gKpBFcFzwVqBkoFNgKn/7T9zfrI96T0aO+f6gTpMegr5/7trP4OC/ILigtzDisPkg+QE90XABwiHW0UdwS5+KzzvO8G7cDtHvAd8NPs+ueb5lDuA/p0AucIXRDpFLsTOhITE+UToxLFDmsIiQIN/3/83vlx+Fj5UfnK9oP1I/c1+rb+0wLbBEUGkgfpBkoEIANDAysCzf+l+yr1Hu8k7Ibp9+Ue5X3oZusm7tz31QeqEw0WyBX9GEEctBukGngcwB0TGNgJH/rF8AftrunP5nbove128IPuLO0H8+z93QUyCjUQGBeSGU4YqBaZFDEROwxfBS3+z/lE+Af3WPQb8sHwQO/T73zz1fih/zwGwgl6ChoK1Qj6BzcIYQd0BEAAuvnU8DLpJeaX5UzkP+Nv5XDonuo98g0DYhRLGxwcwR9BI3cgiRuvGnIbTxfYCvr6P+8K6aLkOeD+3ivk9OqJ7XvvzPcqBO0MfBKUGC0emR8ZHVgZFxV9EG8KPAK5+pT2RPUL9Q/0i/LB8aXxKPLy9Nr6bAKkCA0MwwxdC+AJegmXCIUG/wOS/1b3Ce1/497dwd0o31ve8uCD6IXsve9ZAPgXdyKlIjwlGCiKJI4c5RZrFSwTkwlK+hjtvuWC4BfaONe23RPpBfES9Wj73QVKD2IVFBtCIfYkdiSbH9EXug8UCBkAnPjU85Xy0/PP85bxwvCs8hv1pvjd/jUGmAzkEM4RDhBLDo8NYAysCXgFZv9r9tbryeHM2gXZ6NlB2W3be+N86FrqKvhfEiUkQiYiJ2krgynEIEAZExUeEVkIbPkP6sngOdyv1v3SY9mz5kDxtfcn/2AJChPRGLgbhx9TI8sihB0JFlgO2AZK/lb1S/AZ8U/0zfSC8l7x//Ku9UL5//7tBtoOERNQEuQPdw4rDkUOvwyOB4MAp/h47XrgjNoy3OnbotrP36nl9uah74IG4Rw8JE8jHiZGKXAkJRtoFk8V4w5F/xftUuEH3Ybactci2Yrjf+9R9f/4xALREDgZXBqVHGUhTiGIG3EVxA8HCckABPfu7p/tMvGf85HzyfOg9LP0qPUy+jgBeAh1DjkRxBAVD8cMrgvVC9EIIALC+2/0ierm4hnhyeD13y/jlui76ejupgS8H44pPCMjITkljSGpFtMPog7vDDkDnO8W3z/bfdvV2JbaxuZb9nP/gwFqBMINsBfqGT8YthuSH+cadBLCDFkGQP1A9Ovsgekf7BnxU/MN89b0VPli/GD+7QMWDAIRbg9UCkMHLQceB+kEwv9o+rX1u+2t5V/lQOjz6BnrIu4+7X/vyABGGoIn9iMeIOghCB6iEWgJuwuxDV0EP/RH6cLlb+MH38jep+nC+HkACwIvBuENoxPyE2ARZxOkGL0XqxG9DvIMAQXe+NDvBOxb7KTvUfNL9cH2APhB+K/58P4oBgYLtQtMCkkJCgicBAAADvy/91XxSOop5knm6ucO6jLuo/G08RL25QYZG1QioR5kH2QjLh1xD60H5AZmA+X4ou1l6vnrkerc6ebv1/gYAvMJWAumC7QQzREiC5QHOguLDjcOMg7vDhoMhwUQ/qf3BfWt9rP3wvUV9GD01/Te9XP5+v7HA3UG1AZhBRcEtATpBGABtvpd8p3pxuQd5CfkAucz7tvy4/Jy+fkKFxhrFmwTWRmXHFsVwwwZCtcImQGz9Cjqweeg6n3tn/Av90QBrQq6DWwLMwzsECgRIQxTCdcJXglJB/wFiwZvB44GYgQ3AjgBKwJ4AgL/SfvC+gH7Gvqw+W374v61AKr+8/vP/EP/vf4F/Hv5AfWL76Ttme0Q7brxe/lt93jwRvlqDrkXDxLoD0cUZhKXCEABkgBsAU/8pPCd5x7p5u5L75ruyfawA8cLuQ5EEKcS4BUmFf8OLgoECU0FDP1G97f4JPxi/Dv8yv/9BdwLOA4/DPIJOQnbBsYCaQCPAOIAif6g+RX2vPSW8wDz6vJl8gryB/Jj88H2Tvgo+C78rgBX/kb8WweAGSkflRUPDtAL6gLv9VrxTPXv+f75s/S473zwP/Lt8Irz8v37CLcPIRNKE64RshAXDvsHRQNnAvH/5fr99wL3A/bc9jj6Qf6kAs0IbQ6tDnsKEwdVBW8EHAVLBUoDpwGW/vr1rOyl6gHtTu0L7dHuQPDs8tn4TfyB/vAHoRAjDI8HYhLxHqAcYROaDhgJU/1Y8bjszO6H8ULxZvC78Vj1PflO+33+HQdXEH8T6xIUEngQvw2vCQYGKwURA2380/Wu8sLxiPE18pT1T/x5A4sIFAvDClMIYAa7BTgFUQUJBi8DG/zI9mj03u8Q63Pqx+oT6v3rTfDq9IX6CQFWBwwMcQ17DJ8N2xXjH7Yf6Bb0DxIIYvmn6yvoluxC8UXylvPv+G783fkq+AL+7QhUEl0WTBYfFfoSQA7yB0MEAAQEA8b/pfx9+Zv0ju9k7UPvm/Ui//wG7wmkChwKrwa3A7cE7gW6BEYD5/9T9w7ta+WS4O7eK+KG6FLvSPf5/vL/JPynAD4Qdh3WH3MdfhzxFkYHzfau8avzvPKr8cP2KP3k/aj6/vi7+wcC4gemCjYMug1RDDgHQgOfA8QGgAsEEFgRww9lDPYFef1I9ljyFPKm9CD4Zvsn/mwAhgHiAOsBegacCUUJjwh1BhgAJ/c675DqIuhD5nnl7eaS6Knn2ehf9ugMchukHZgfDiHSF9AIvv8W/RT8w/nY9BjwUu5Q7QTs5O4u+KYDLA1UE6UVcBR2EDkK5gOmADIATQBSArgGSAg2BfYCwQMsBWkGYQcuB5kFtgFa+5T1F/RL9xn8DgDTAjsD/wHnASoBCv7+/Cf+8vtl9rPwX+tb6H3o5ufb51Lzmwq7GiwaLhf5FsYPBgUKANv+M/+D/236rvJ37wbuwerY6zX1mgDTCWYRDBUAFNkRbQ4HCPgCiAKzAhkB5P4h/ND47vZ1+P38dQMMC+UPMQ4JCL8B6Pxx+nP7IAADBn4IKgZWAp3+M/rx9Ur0r/VC9/j2k/Wi8t3uOu0j7IXrwPXzDv8lyimUIX4ZcQ2r/ELx4e6C8S31W/d09831i/Kj8FPynPfOAQgPxxfSGMIVDhD3BxoBr/4uAEwCbQNuAxYA6fiM88Pz/fZ/+3UBqgaIB7kDqv7g+zz8t/+9BjgONBAjDIYHUgMV/LDzVe9B74Hw4fCJ7+Dsguz+7Q/qGOcZ960TeCTVJQcmzyTGGO8GGfr09Nr02/Tf8ArttO3r7J/pee3d+T0GlxDyGe8cFBiFESUMXganAk4EswZGBSICYv5X+A/zEfNx9nP66QDgB88IJwOk/Tr7D/rI+5sCagroDdINMQzmBm7+6fhj9630jPEN8EXtouic4+7bodf45J8B8RfxHfYh3yjaIygSiwRtAQQC8f8L+bvwtexg603nTeSD6tL56gk7EzYVCxXkFGsRbAoVBf0DBgW7BaQFwwRpA9wBIgC9/sz+SQBrAcwA1/7W/PH7GfxK/Eb9kgAgBAkFVAXSB6cJSAhLBecAu/nU8T3rpeX84VfiJOOP4hjrdAFgFXAZKRgKG5wZlg+9BowEywTnAXj5xO6w56nl5eRp5nbv1v5EDTIWfhmvGAkVaA7jBWL/Wv2k/W79Pf3Q/YL+kP/gAQQFcwg8DAwPUg6vCS4Dl/ws95b08fXx+b39JgAWAv0DIAW5BYQG4AaOBfsB8/u59Pjt0+g55srk4eL95sf5sREaG/8VfBJ2EnEMXgNEAR8FRQc2BDr8vPEl6KnikeHY5a/xRAODEy0bnxotF0oSWArXAef91/3g/HL6kfgm9/72x/m2/goFDQ5yFxAawRPMCnYDA/yf9Xf0Jfdb+bv6Avzv/AD/9wI4Bm8GHATq/z36kvQ+8Ortu+8i88Lwu+yV9VIIcRE+D1gQyROmDswEdAA8/9L8y/uT+9v4YvWr8qPutusD8Bn7pQbuDusTwhUtFPwODQgBA44AD/4y+4v5sfiE93H3tPnO/VkEHQ0TE/sR6gzdBxgC5/vM+Jv5nfoh+tn5x/qh/Dv/DgGIAM7+vfx7+av1NfP/8873ePkJ9QLyV/sPC6cRiA4GD2EUMRJYBkr9zPyK/fL5lPY8+Pj65fhQ8xPwGvOq/DgIkw5SD6oQuxHbC5MCvf9JApsCKgFbApUCXv0G9wv1Y/isAJEKrw8fDokJsQJ2+ZHzX/Us+lf8nf1+AIwC5gGb/7X8Mvpw+C/2r/P28R3y7PSO9vzxHe2h9KEEpQypDPESUB0PHJcONAJF/fv7q/hM9FL1I/sm/EL15e588Lf4sgJqCtMOTBIxFLEPRQaeAJQBAgT1BYMJXgtvBgH9Jfb+9Q/8LAQGCcEJUwhlA9n6+/RY9iz7rf5aADQBKQKqAzsEagIKAA/+tflC8t3rYuqR7HnvBfCo7rnzbgLsDb8NoA5dGCkcpRCvBHwDMQSC/Rn16vI+9Ijywu0M62/uNfi7BGUNyQ/eEZcUcw9RAz/9nwAqBM4ERghrDKwJ8AH1/D/9TAFKB1cL0AoXB9oAK/hC8R/xKfbZ+sH97v9VAcACAAW3BuoGLQWGAIn5dPJr7Bjo6Og67a3tPO0N+NsJbRHYDxEThBcFEQ0FlgDLAWoAPPzC+IH2IfSj8I7tse5O9Z7/tQpfEnIUYhM2EL0ICgDT/Fv+xv9YAeADZQQkAukAOwJhBGkH6guCDsQLHQX5/Xn3x/I78kz10vgC+wz8ov1/AX0FTgaABbEEyQDQ+JnxYu3n6u3rLe9J74ny7QMaGFQbuxRWFGcS8gV3+137Ev3N+mz3mvNQ7yftPu6b8Z331wFcDnQXChmjFKUP0QvJBYD++fum/Yn9vPrp+Cr5cfo8/YIBQAZiDPYSvRTZDhoGGf/0+JPzQPLn9Uv69Puj/Dr/JgK7AhoCMgHp/dr3MfLA7avp8+i87Ejv2PLjA+8ceiaKHd8WuhQECaT54/b7+/L6o/Sf77LqOOZ357DtZPVsAIEPcxshHjQawRUPEi8MMAQK/yz/lf9p+4b1FfOM9Kj3U/uK/9EFiA1eEZsN3gYeAx8Bcv0x+iD7Pv7S/jD9wf3+AIoCpAAY/TT4YvF161roPudu6cfuD/Lo9SIGdh3eJaIdlxn5Gp0R+gGy/Jv+B/tw8cDn0uAL33zjneof8xsAYhAQHJYdUhg9FNcS1A66B+gDrwT3A8v+o/gU9fH0evck+jP81QG/CoEOXgkZAzYBQQAI/l/9VP96AYwBrf+v/iEAcgGMAO79/fgY8Y7p1uTl4rTlGOxb79rzIwdLIEAn6B3UGZsa7hFdBIH/9gD2/e3z6edx32zdE+G45z3xP//hD7gbiB0dGUYWthS+DoUGPwPxAwACfPyk98b1ifZa+W38Iv+IBPsLnA7DCfsDXAG//mb7S/qp+1/9Av6+/dX+RQKEBNQCiv6G+OPwGOt46Ann4en58Xr1oPRLAKIXSiLBGpgWnRrnFRgILAG4AWn+m/VJ7IXjRN744HHo8u9++vYKRxnzHAUazRhfF0sQEAc1AnABUgD//Ob4Tvab9vb4HPtZ/W8DWwxnECcMywUPAhX/OPx0+9T8f/6F/vT8FfzT/Bz9yPvr+Hv0TPBv7jjtl+wc8ZT3lfeV+dcKzB0SHgAVYhP7EYMHE/5o/kUB3/3i9CPr3uTS42zmGOty9AAE1ROqG+caRhhMFvgQYAiXApsBUQHt/nX7fvgt9i71IPZy+bj/VQgaD8wPpQuDBi4BoPte+bv8eQE5Akf/1fzj+4T6b/gG9n/zifIe8yLyKPEr9Xv5G/cH91AHuBwnH2oUixNWFl4LZP0j/d0Bzv2F8ynqbOUA5pXoSusb8/4CThNyGnoZ3RjaGAgT/gj5Ar4CuQL5/5j8Pflq9bbyiPLe9Hv7PwZVDioOqAlVBtYD+wDO/zUAK//O/Aj7O/nB99z3bfev9M/xiPC/76Tv8/Qk/aj7ufKS+I8PZxt7Eg8OhBYHFrMICAFvAbz/Pflj7+rlOOUY7Cnvyu8y+WIIzxFMFOEUDxWhFFQTzA9kDBIMMAoSAiX4VPOT8lHxrO868pD6kQMaB1UG6gfRDKQOnAp4BrIF7wLK+njySO908ETzDPWu897x6/KM8pTvpPIC+1b87/w+DYMfmBwkDyILXgv6BbMBUwSiB+oEifpJ68XfYN6S4/3qWfZYBisUKRc9Ef0NuBC5EXQPRhC8E1QSSwp/AJH4B/NP8H3v7u/88zf7C/+z/fL+Jgb/CyEMvAumDZ8M1AUi/f729vSz9OPxyu1h7MfrMenq6Y3xkPdc95D8KQ6aHT0djhYZFXoSMAnVAscE4QZcAVr1D+j83nbdZeFM6Dr02AReEeMS+A6rDjMSZBWOF9sZxhouFq4KTv0e9TryGPD/7S3vhPO69pX2CPYG+RAAVgjzDSEQjBFjEQoM0gIW/P35I/iR9OPxdO/66iDmjOXQ6aft8vDl/XsVoCQ/IEcXTRQMDpYCZf8EB7QNUQkY+nnpNODw3dLfFOnc+U4JRRASDvMGywOhCB8P+RLYF+gcxhoPEAUErf0l/FP6xfa88/Hxc+8f67TncOoq9aEC+wsmEGMSmBLpDpcJXgeUB/sDnPoS8UTpQODU2n/fdubt6Sb2Aw+FH0kc7xWrFToRGgkSCE4M0A6vCi77Y+eU3sbhNOUS62j7oQvkDp4JAATX/0UB/Aj7Do0TYRoRG04QEwX4ASQDFARQBO0AwvgT8DDp8+Jl4rvtmf3RBiEK4wzLDbsKTgfeBrUHbwaAApH7X/HY5vbhh+PL5Uro3/JZB7gafSBEGeUQiQx3B70ALACPCMoNSgTi8sfoRecN5mzme/CvAScQfBS2DqoHuAZmB80DlAIvChkRkg50CcQJtQu9CWgEBv4G+aL3Lva/7yXrA/DK9Zz1z/gJA8IJ7wl7CoIMegzVCjUH3P6Z84zqvuRF4oDlpu2v99kFqhUbGCsL9QIzCHwMbQl/C8gTahJ3AX3tZONl4+jmx+yk9/oEIQ1VDDIGsQHMA4kJ9AubC5MMegxnCMEDywKFBTYIAgjfAy/+KvoJ95vyhu/18cH4Of8WA/QDvAHR/t79aP5PAKQElQeqA637EfVC76Hr8u9V96X4E/v9CGcWGxMZB2IDLAeTBzkENQU8CvoI9/zc7j7qgO/K9Gz2p/ogAx8HOwMh/58AewRgBzMLYRBdEhIPBAooCCoJDgjlA7YAW/4T+on0nu9v7J3sXfCG9en6uAFTCOQJBAeVBfAFYwQ7AeL+TP2I+6X3xPPP99b/+/2x9ob7swrWEWsLFgb9CYcLwQFg9CXvE/TT+WH5BPpXA3ILAQeV/sr/jAbtBTT/lvzf/RD9DvtH+yb9/f8BBosOVRWVF0AV4w12BJT+6/pB9Dvuo+1D7NnmYOgZ9ej+CgC1A6oLYw4LC5oHfgSdACP9hfvd+2L7WPdL89b1yv/dCjYOawcMAs8HfQ7MCM39GPwUAPf7OfE87ub2cv6T/Yf8xAJ8CWYJRgj/DA8SdxDMCasCX/6g/df7yvfY+d4BhgER+dH76AuRE3oLfAPRAOX4Vu597GnvuvAK9Mf22/Pb8xX+dggNDIURahlqFmcJbgAK/Qn6Dfph/JL3ce186Y/sZPKl/J8JLRH3EzIX3hRYB//5cvfb+J32ZvWh+eX9CPwd+Jf5IAEBC1kSchIXDaEHvAF3+/f7GwTOBvb8xPL877rsJ+pT9W0JMxTzEasL/QNZ+1r3GPlr+1j/Ywb0BXz59vC09+8CGQedCNQO4xSmD0sBs/jC+X74PfBi7CH0F/0P/hH+ugOQCGIIzQcSCbEJBAjJBPEC1gL8/634TvT2+WkBCP4c99b6XQIqAR3+cQOJChoJfgO4ADn9bPe39pj5N/n++/YGPgoD/RryqPQA9/71BgGOFUEbUQ0H/zD4t/T99DD6KQLjCfoMYAfr/X75NPvQ/bP/jAKmAhv7ivRw+df/Fvwk+V4ANAWN//f7vwQcEfoTUhFtEnsQWwPr9EPxsvN18fTstu008evz7/hQAC8H1w2HFOsVMA7AA7f+N/uq9bj0WPkd+5n5NfwbAsoD/QKcBzUPbw5UBj8CIAJ5/5L6NPX28sT3Pfwq96XwjfdLB+4NaApdCykPFwjQ+YrzB/eP+nT8EgB6Aen9A/uj+8/+PQadENwUbw6VAhb3e+6i6eDn/+r/82X9TwKnBOoGEQlVCq0KcQmACOcO2xi2E+z+WO7n6qfq9+cb7Tr+QQ0RD20HqAA6AXgFmwYNBvoJZQ/gCiT83O9f7v3zdPeb9pr5XwLjB7IFZANUBloIIAYmBEICZ/+M/aj8bPvI+Fv2M/jw/CgAmgBVAGkDnArHDvMH9flK8iDy4e7m6+Lyr//cCWgL4QSCAK4GoxHAFLERhRHMC0j6zurX5LnjYul+96wDBAezCYwPkxCZDNYLswywCHcB7/uE9ZrsVemT663sG/EA/CkIuA80EF8PGxHIDZUGqgVBCXMI/QAn9/jxT/O28yHvS/Ds+6IE2QPe/4H9Gf8AA9oF7QcSCkcM2gtIBRr8FfSo7PfnzepB9VsD/RCkGSUYiAyM/pf0fe+H8I/4mgNwChYK9AM3+nPxle7a8Qz67gbsFK4bohfMD0UIt/za7i3mVOac7ar2af3mAqwHigolCyAKnwkIC40LYQh3AuD8IPkF9pPxme9r9W/9jgA+BOcLDhALDUcHnQMWAbL7cfbV9P3z7/I58/fzTvbO/J4EMQkqDYQUfhqwFnUNigmjCfsEQvt58hTuIO1W7hDxAPYZ/iYF3gPW/N/9hwimD1cOXAtrCVgCEfZX8EvxMe0U6d/txPI18njz6PrXAhgFQAYaClYJwwH//CD+I/8t/h8AXwarDI8S0xffFIcKvQUBCKgG2P168znu5+sb54Ll/u6e/U8JORHVE1QPKQnfBsQG9QVLBisJmQtaCDD9+u6D5kDnh+159X4AQg9YGWMXkg71BlIC7P+2AMcF2QtJDLgFhPtr8LvnAuUp6e7ziwHjCksMqQhMBH8Ajf2L/nAEEQtjDZkKNgUW/1v4tfK88dH2Xf4RBCYEB/0k89Xt7O017f7qEvDA/IwDpP6Y+EX6iAJwDaMXXB6/IXshhxkKCcr3ie+H8Q31NfSB80/2XvYg8TXtpvFa/uMM8xeTH8Qj+R/bEZ4AL/VX8InvZvSs/LAAHv4y+pD22/DH7Mnxmf2vBswIIgggBwcDU/1L/GUBIwiSC+gKtgfGAcj5Q/T+8zv3dvsUACEFRggYBucASP6z/pX/DwFBA3YF9gfFCXUIdQTEAWsBv/4K+ZX2t/mL/dz/qQFvAj8BUgAzARgBqv6h/Xz+vfzr96rzxfHC8un2/PzfAgAInA2FEiISbwwjCMEHWQV7/lL5V/kO+VL0Iu9r7tPwrvNX+DQBEQxhFMoYnxkRFXYLSAIZ/lT9d/wP+7X5D/dp8+Dwj++E74Hzjvqy/+0BnAMOBMEBKv4++zb5Ofkh/PP/ngLABLkGTQcKBt8ELQaqCeoM0w6ADn8KCwQx/kr5vfSC86T4TgDcA78DqwQ2BfMBmv3K/O/+2gBsAfsA0P6/+zT6Cvux/HP/oAQvCf4H8AEh/cP7uvoU+Sv5vfr0+aL2R/TT8y30ifZJ+00AbwSuB/0IEgjZBuoFyANuAccBKwSYBAkDeQO8BBkC9f6SAd0GoAgiCJYI7wcUA/r8A/ok+cX31/dW+tb7evtW/CD/EQJ2BTwKXA0ACzUFuQDn+wX09+0x73T0XPiA+nj9JQGoAnABw/9PAB4FyQtgDhELHQZLArn+g/uC+lP9VgQQDRISnRDGDGkK5wcvAgP7avYH9Vvzz++v7ZnvI/NU9U/2pfea+lcAqAfnDE0OYQwZB9P/kPl19R7z1fOz92X8s/8/AGj+lP2D/lr+Af7dAPEEsgUIA9z/A/3y+lf7Cv6pAQUHjQ0NEW4PggvfBxYFaQJv/mr5c/b29hv4vfdr+Fv8NQGbAxQELAWZB3UJPwn3B7sGrAMI/mb5kfiF+Sv6MvuT/RAA7ADVAKUB5ALNA1gFEAdqBjIENgMDAk79+PeA9+D6Cf1z/sgBQwRzAg3/IP4H/z7/Nv9eAIkB3P8e/A36X/r6+iz8jP5iAPAAeQF9Acf/SP1C/Of81/2W/oL/fADfAVoDPwMxAZ8A7QOhBwMH0QNaAngC+ABN/YT66vql/Mr9Ef+WAK0B8wLMA9sCkAFmAZoAY/4m/YH+DQC3/pz7L/pQ+l36RvuR/icDrwaVCGgJJgiYA1H++fs1+y35NPdD+Cn7gPwE/FT8cP54AVkEBQb+BWYFrQSHAur+XvyG/KD9xPzr+jP60fkP+Uv68v6TBBgIsQmbCvQJ0ga6Aqr/Bf8XAJMAT//W/V394fyl+zr7fv3oAcEFjQcjCMUHJQXcAPn9Of5wAPMCZAQjBL0CjwF9AEv+/vth/OX+5/83/lH8r/uX+wv8wv3b/ykB8gGwAgICa/8v/Tz9p/01/Hj60/rj+3D7evpq+wT+XwCVAs0FtAgDCTkHQgWUA4gBFf96/Zn8Fvvt+JT3b/dj+Mn6n/6zAjUG8wgiCh0J4QZKBCgBZv0r+vP47/l6+4P8l/1g/yQBiQHqAFkBygIMA+kBWwGqAVMBDgBy/10ASgEjABb+A/7//+UB9gLAA7cE7ASSAlH+SvvL+h785f2T/gH+Gv6Z/vn8LPoH+q78hv9PAq0FUQiBCWgJ+AYqAvz90Psv+vL4lPmY+0n9Ov7J/pP+F/59/vD/YwFoApYD7wTnBMoCJwBg/6AApAGSAV4CFgTwAzMBpf7a/cD9Vf0t/Ub96/yb/H39Sv+dAY0E9wYbB3gFBQR7Akf/8vv4+oj7cvsh+1H7i/u/+2T8EP30/c7/+gH9AgMDNwOEA+gCZgEEAHf/SP9I/+D/cgD7/1f/cf9y/+/+I//7/xIAff+D/zYArwBSAMj/JAANAbUAHf83/gP/SQD/ADoBiwEjAm0CLwGk/t78F/3t/fv9Bv4N/x4ARgAKAC0AqQBRAbgBmQEeAW4ANf8j/nf+7P8eAdUBUQJoAkUCWAKHApcCYwLGAYgAw/6e/Mf6JvoL+8T8aP7N/8AB8wOMBEMDhQIsA3YDnQLDAdAAzP5F/M36hfqm+jz79/wn/1AAFABI/5L+hv5b/18AswAPARECggJkARwA4P/P/zP/Gv9MADYCwANrBDQEVAOZAU7/ov1M/Xf9Nf3h/Fb9B/7e/Uz99v0nAG8CnANEBAIFEQW3A+QBuwAvAHz/i/7S/Z39l/1+/X/9/f37/g0AcQAZAKz/iP9s/3z/RwAxAjUEtQSCA8kBbABV/2r+mf7g/9MAKgCD/kb93PyT/Cb8i/yM/v4AcgLUAqQCwgEoAPX+F/4I/cL8Sf5gAPkAZgAUAPX/GP8L/jv+wv9IASAChgJ3AqABOgCu/n/9Ov33/eT+yv/9AEUCwgJ7AmUC/wLbA3sEfwThA7wCBwG//nT87Pqy+m37Nfy1/J79EP9rAIYB7gJcBM4E9wOZAl4BPgDd/oH9ufy0/AH9Ef3l/Fn9pP7i/0kALwA3AJcANQG3AY4BAgG+AH0Apv/q/jj/OgCcAA8Ak//A/6X/+v7d/vD/UQFTAvoCEANLAj0BZwB6/1P+wP38/RP+mP1s/e/9pP5M/0EA/wBAAcABxgIsA4kCMAJlAtABBwCG/i7+A/6F/WL93P1Y/qH+xv7//qb/8AATAoICwQIhA+oC3AHPAG8AZgANAEH/gf7w/WD9Lf3//WL/UQCZAOMALAG3AMT/XP9R/+j+X/4Z/q39f/1d/pb/wP+L/2AArgEAAsYBogEfATYAfP/x/sz+df+AAK0Az//8/vn+eP/g/xwAawCSAFAA6P/h/4EAsAGjAvUCBAPYArcBNQCd/9b/+v/M/0j/nf4q/vL9b/0U/ez9zP9SAR0CsgL2AjECiQAl/9j+PP+2/wYABACv/zD/Yv5o/fv8v/0B/+v/uwC8AUgCFwKoAScBTACa/2//PP/T/vL+v/9wAGEAv/8A/7z+Gf+0/28AwwERA/YCYwHS/+z+Kf5l/Tn9p/1V/tr+3/5l/l3+Nv8dAG0A7gD9AdgC3QJnAvwBqgEsAYgADgDP/2//yv5e/kr+Rv5j/sb+S/+6/08A5wBCAY0BCAI8AvIBpgGNAfkA8P88//H+U/67/fz9yP4J/9T+sv7C/un+U//M/1AAPgFSAqgC+AECAUMASv82/un9M/55/rL+S/+6/3f/Sf+8/wwACACWAH8ByQFmAeAA/f/t/n/+2v4h/2z/PAAIAS0BNgGFAbIBogGqAaABTwHkAIAAtf+J/rP9nP3a/Tn+3v6//14AiABmAEwAegDrAEYBigHRAe0BdgGyAPz/T/+4/kT+3v3a/Xn+Mv9h/4X/KwC7AIoAUgDjANcBQAIAAmkBxgD8/9z+uf1y/R7+3/44/3j/3f9cAG8ACgDh/ykAKQCx/3P/qv+u/x7/lv6l/vr+K/8V//D+6/4B/xr/Qf+D/xsA+wCmAccBtwG4AaoBggFXAfAAbQD5/3H/+v4A/1H/pP8OAIEAkwB7ALoAKwFKAV8BowGwAWABCgGdAOH/IP/D/oL+KP4Q/oX+E/9a/5z/GABWABkAtf99/3T/sv8sAKEAxwCuAGgACADN/8z/zP/c//n/+v/h/8r/if9S/4r/IACkAPsAJAEIAZEA1v8p/wj/dP8sAOgAgwG4AWIBvAAhAJT/C/+u/qn+9v5A/zb/N/+H/9X/tf9V/y3/bv/N/yYAggDKAOIA8wDyAGgAjf8+/53/AwAWAC8AYABpAAwAef/v/r3+9f54/xUArwAPATcBNAHgAFMA9//Y/8H/pf+t/6v/c/8t/xn/Gv8J//b+B/88/47/3P/3/+T/4P/1//f/2//p/xUALAArAA4Aqv9j/6X/MAB5AJwA5gAsAQwBtgCWALcA7AARAS4BIQGvABQAnf9J/xH/Qv/v/6MA3AC6AJwAlgB3AEoAQABVAFAAMgAMAOL/rP9//1X/Kf8T/zv/Zv9v/4//+/9JACoA2f/H/83/1f/3/zYAYABwAGcADQBw/xL/F/9L/1D/Nf9o/5//1v/S/67/qf/t/zEALgAcAC4AZACNAKMApwCoAJsAZgDr/1j/C/8k/0T/Qf9J/5f/2P/N/43/fv/S/10ArwCsAJgArwCiADcAv/+1/wMASgBVAFAAagCMAIIARQAAAOP/6//+/xYAOQBuAK4AyACaAGEAdACXAI4AggCNAJAAiwB0ACYAof8b/77+iv5l/mj+yf5e/6D/Zf8v/0b/Pf8A///+dv8lAL8ADAEIAdsAvQBaAIz/2v7Y/lj/x/8IACsAJAATACUAMAA0AKMAZwHEAXcBIwEEAcwAfABNACgA/P/o/9//vv+T/4f/lv+Z/5H/m//f/zIAUQAiAOH/wP/Q/9L/mf9Z/17/d/9d/1j/3P+fABUBMAEOAakANwD//+f/t/+p/+P//v+9/3z/Z/9R/zv/cf/N/w0ARgCRALEAgwBBAAsA3f/H/8P/rv+I/4L/df84//H+/f5U/53/o/+b/7j/5v/r/9L/0/8CACkAVACgANYA2gDoABkBEQHDAJQAiABNAPX/0P+9/5z/hv+R/6H/0P81AJcAyADVAM0AoQBEANT/fv9v/3//iv+e//P/UQBXABsA/P8VADUAOAAcAPH/xP9//xL/lv5n/pX+3/4P/1H/zv9pAOgAGAEbATkBXgFCAd4AXQDo/6H/gv9+/6D/+v9lAK4A4gDvAKsAUgAnAAAAnv9b/4//AgBGAE4AQwAzAAMAvv+W/53/z/8vAJQAjwAVAKn/dv85/wL/Ev9T/6z/EgA9AAkAzf/W/+z/w/+h/8z/JgBLACEA1P+t/5z/g/9j/1T/b//E/zUAjACnAMgA6gDEAEQAtf9h/zz/Nf9T/3b/uP8GADMAVgBIAGoArQDLAKsAiwCJAGcAGgCh/yn/z/6o/qf+yv4p/9X/swBsAcsB3wG6AXUBFAGdAB0A5P/3/w4A7/+5/6b/uP/X//r/FQA1AEoAGACW//7+kf5x/pD+v/74/l3/3f8pACkAEgAaADkAWABWACoA8f+s/0T/sf4x/v/9Ff5O/pz++/5w//j/dQDUACYBhAHxAS0CIAL8AeYBwAF0AScBAgHhAK4AfABcAEAALAAfAA8A7P/T/8//xv+2/77/0P/c/+H/AQA4AFwAbgCCAIMAXAANALX/WP/9/rP+ev5X/lP+bv6P/qf+1f5D/9j/ZAD3AI8BHgJ+AosCOwLcAZIBTAHuAJ8AXAANAKz/Nf+U/vv9h/0v/c38gfx8/Kv89Pxl/e39hv4o/5//5/8VACYAJQAYAOH/cf8X/+j+vP6j/tH+Bf8f/yv/Mf8W//j+7f7r/uL+9f4n/3v/+f+SAEQBEgLUAmgDygMWBEUEYQR1BJUEnARgBPIDfQPcAg8CVAEBAeAA2wD5ACwBMgH3AKcAPgC6/3b/iv+8//H/SQCwANAAoABjAAUAkv85/w//3v6n/lX+s/3Z/An8NPt8+iP6JfoL+ub57/nK+Tr57/gK+f/49PjP+Vb7gvyA/dL+uf/0/6EAzAFwAkkDUwU+B94HMAilCFsIiQcbB5gGuAUPBYUEXwMMAu0AzP+i/sT9R/38/Lj8nPy+/P/8P/2A/Z39s/0i/tf+mP98AIcBmQKnA6UElgWbBlgHqwcDCD0IzgfyBvsF0gRjA+kBagDW/mP9SPwa+7r5fPiO97j25fUP9Yz0dfSg9Aj1tvVW9vb2xfdc+Gv4bvjG+F75CvrB+sL7af1s/yABrAJvBPYFNwfACDgKCQsEDCsNTg2WDAUMHwuXCTUIGAdpBaEDegJZAeb/9f64/nX+I/5F/qj+AP9//18AIAHFAY4CbgP8A2cEzwT3BNQEiATxA+MC0wEbAWAAaP96/rP9pPx0+4L6qfnH+HT4wPji+LX40fjV+DT4gvc899v2mfbh9i/3Jvdc9/f3afiY+CP5H/pI+6n8YP48ADsCTgQ3BrAH0giwCXYK/Qr9CqQKPwpzCQQIQgaEBJ4C3gCt/87+3/11/cT99/0F/s/+KgB+AQAD1wRqBqgH5QjdCWMK6wqFC/YLPQwjDGsLeAo1CUAH/QTuAqoASP5E/I36mvjD9kT1gvNj8cvv5u4e7qPtKu4X76DvR/BN8e3xXvJo84H0L/WW9uv40vpB/Ir+TAFnA1YF4QcjCosL2AwGDj8O3w2LDeMMlQtBChQJoAcQBp4EBANDAaP/NP7j/Pb7p/vo+178H/1P/sj/fAF7A7IFEwhuCnEM6g0jDxMQmBDEELsQPhA9DwAOfwxRCtUHbgWLAhf//vtR+WP2pPOz8ffv+e0o7ODqj+le6B7oWuh96Hrpk+se7bztXu/w8avzEfW999T6NP2z/7ICIwUjB28JVgtODCANsA1/DTAN9AwbDA8LVwoyCVAHYQVRA8AAOP4z/DT6gPjz93z4gfk4+8n9ogBvA5AGuAlcDKkOBxEaE4gUkhWMFhsX1RYSFgcVNBOhEBIOTQvAByEEFgHx/bH6Fvh69Sfyru6F60zoSuUa4wri1+GE4jDkPebQ503pveuf7n7wNPIh9XP47fps/WMAEAOOBRYIFAp5C8QMlQ2dDT0NvwwzDFoL9AlXCLwG5wSyAm0AOf5B/O36Hvp9+XH5Tfqh+1n9ov9jAkEFVAiNC7wOkBEKFCkWvReQGNwYmxiOFzwWChU3E3oQ2A17C1sIuwSQAX7+7Ppv9xj0UPCE7Hrp1OaU5Ebjo+JY4rjiguMJ5OfkCefj6VvsZO5A8fH0+PcS+u78uQD1A7MGqQkODGENXw6gDmgN2gvrCnkJEQcLBYcDeQEK/w/9LPsp+dr3r/e796v3pvgG+679XgAMBEsIzgvdDj0SLhXpFloY1hmCGjAavRktGRoYwhYKFZkSyw8SDTsKHAfMA6gA2v24+r32avKQ7hLrAuj95Qrlp+QH5V7mLedg5lTm1eh16x/sWe3y8Ib01fZY+Q38bv5sAcwE/wabCHoKbQvVCpsJewh2ByQGEAToAV0Aov4A/Ib55vdB9sz0lPRX9Tb2wPeF+tX9PwFNBdoJsA2/EC4UqxfhGVEbWh0BH9YesR3QHGAbFBmdFvoT6hDaDcEKRAfWA54AHf0x+Rj1zfDb7JHpduY95IXjbuOZ44PkROU65Vzmcuna6xrt7e8S9Iv2H/jE+tz9kQBbAyMGQAjbCdAKsgqwCXAIlgfkBk4FswJhAK3+HvyO+LX14/P+8WLwEPCP8FrxBvPa9S75y/xIAXIGDwv0Dj4TyBcqG70drCD5IkEjfCLEIXAgRx4WHKQZfhYeE8UP4gt2B1IDmv+r+z73+PI777jrgOgW5pTkkeMi437jvOM4423jyuUR6Rfriexq7y7zCfZp+G77D//OAjEGmAhcCvILfQycC0gKWQmUCFIHFwU9Ap7/sfzT+Av10vJ68R7wU++q733wZfE38wD2Z/kF/tMDQQmhDQsSdhZ6GWgbih2uH8EgDyEtIY4gsR4DHO4YaRWrESwO4ApuB/IDqABH/Wr5rPVt8kzvYexC6pvoC+dF5mnmIOaT5bHmZulS6wXsge0N8Nbx6fL69Er41ftd/4sC8gQgBzYJMQqsCR8JqQngCeUHtwRZAg4AC/x/9+v0qPO+8eLvn+8c8Cvwx/DC8mr16/gb/twDcQijDLIRFxaKGMQasx3PH58gHyEGIe0fUx4aHMoYMRUgErAOlwrBBncDXgA8/eH5o/ZY9JHyh/Cn7ofte+x46zDrdupO6KDnYOoE7QHtfe0g8CLy9fKG9O32rfn0/Nf/pwGUA/IFQwcYB9cGSAeGBzQGigPYAKH+w/vH91f0n/Jj8cnvyO7w7qrvAPEl8+D1Svnh/S4D3AfzC4sQRhXnGJYbRh6rIIwhRSHcIO0f3x11Gy0ZRRbZEqQPPwzyB6gDXQBp/Rf6JPcm9V/zRvFW75/t7+u+6lXqFOpC6XToLume637t0O2o7t3woPKj8771Afkv/C7/DQIIBJIFSQclCLwHoQfrB1wHlwU6A0YAAP2l+Qr2AvNV8ejvVO5C7qPv6fBt8kD1bfiP+9H/pASFCEIMBBFHFR4YuBqbHYIfASAcIC4gZR/FHQYcNhrbF94UnhEaDmgKlgacAuH+9Pt7+RD30fSY8n7wu+4H7VrrR+pI6cfnBOfW56/oz+iB6QjrOOw07QbvDPJ69b/4mftf/lkB5wN0BUUGOAd0CCYJlQgWB3MFhgPJAIH9rfqw+Pz2IfXP83nz5vPD9CP21ffw+dL8aQDAA4EGqAmnDUsRKhT6Fs0ZqxuIHPIcQR0qHVgc4Br0GMMWMhRiEUYOAgvCB8kE5AH5/iv8lvn+9if0pvGi77ftA+zb6pzpIuhd56XnsecS5xfnS+hZ6RDq6utX76XyQPVs+Fz8lP/gAQgEyQUWB9UIGAq9CcUIfAibB9cEmAFv/6D94foG+IL23fV19eL1LPdo+Bv6Pf3QAEoDjAUBCcQMvQ+lEgEW3hilGtwblBytHIMcHBzNGuwYORdCFT8SyA7fCx0J0QW9An0AP/5u+5v4+vV38xrx3e7W7IPreOqe6HjmreXA5S/lg+TK5HPlWeZa6FbrXe6/8b31nfkI/WwAfAOiBUAH9AhACooKAQo8CUUItgZXBN4B/v8i/sH7vPmy+Eb4Lvi9+Kj5ufqO/E3/6gHvA2QG4gmFDcgQEBRzF0UaHxwUHaAd/B3oHQkdrxsvGnEYHRYdE6gPKwzYCHMF7gHN/jH8U/m+9T3yO+9X7Jrpq+c15mbkXOJt4b/h++Gi4efhc+Om5e3nr+pr7vfyffdI+4n+7wEjBUQHYQh5CcAKnQtLC+wJdQhHB20FmgLv/zD+hPx++un4V/hT+Lz4ifmG+gr8jP5qAd8DRwZaCQ4NyBAEFBUXQhr+HKMeVx+KH1IfzB7NHQ0c/BntFygVRhE4DZYJ8gUZAo7+mPu5+E71tfF47mrrUujq5Vrk2uIh4SHgFuBx4L/gEeH64bjj5eWd6F7s9fBe9QT5LvyC/+0CnwUaBzoIoQnLCtYK8gm4CKYHWwYxBJ8BrP86/n78yPrf+a/57vmZ+sf7cf2b/ywCBwXLB4cKwA1rEboUnBdlGs0cTx4KHzwf7R46Hj4dvRu4GWUXqhRFEYUNvAkDBl0CC/8S/D/5RfY280bwdu236m3owOZH5YnjAOJW4T3hJeHY4BvheeJd5Jzmkull7YLxWPXL+CL8iP+NAqEEGwabBwcJmAlMCXMIVQcoBswExAKhABj/wP0K/IP68Pkn+qD6efve/Mj+NAEcBDYHMAolDWgQyhPOFloZnxtSHUseth7EHmoeqB17HNcatRhDFoQTYhDnDDwJuwWBAmD/WPxj+V72OPMp8C3tUOrd5wTmN+Ql4k3gVN8R3w7fGd+e3wThT+MP5izp4ewp8WP1+vgO/DH/LgJxBPUFKwdhCEsJMQk5CDEHegZpBZoDiwH4/9j+u/2A/Lr72vu7/N/9Jf/GACUDCwb3CJsLJQ4JEUQURxe0GaQbWR2hHi0f5R40HnMdghztGs4YihYkFC4Rqg0DCogGLgPL/1b8/vju9Qfz1++A7HPp8+bK5L/ij+Cw3tTd4N3z3fzdyd6Y4NHiNOUD6LXrP/DK9HT4f/u2/uQBVQTfBeAG8QfUCO0IAgjuBjAGYQWuA3ABxv/q/v/94fw8/Ij8gP2v/s3/PgGOA6gGtAk9DKsOpxEAFf0XLRruG6YdEB+PHz0fuB5CHkQdYhvtGKsWmRTvEVYOgAooBwUEgwDP/Fj5Lvbb8lXv8OsJ6Z3moeSm4l3gK94P3RzdUd1c3ebdX9/C4bXk0uc965vvbvSK+L77w/6gAeIDpQUNBygICglLCacIrAfvBskF4wPQAV0AYf93/n/94/wj/Sn+W/9sAOsBXQRrB0YKngwoD0ISeBUMGAMawht0HZse1B5gHtodUR1VHJgahxhzFikUVRERDq4KZQcVBJoACP2u+X32RfPf73zsYOnM5o3kYeIT4CreB92i3IvcstyX3VffnOFI5IDncOvG7+/zqvf2+kv+SwFrA+kEXAauBzcI5Ac7B7IG6gV7BHsCvACV/53+df2a/H78HP0J/hL/dQCXAl4FZwg5C+UNvhDCE4IWxRiwGo4cGh7vHv0eth5nHtMdlBy2GpAYVhbeE/cQpg04CtsGcAPq/4b8Ovnt9ZXyQu/66w3pkOZp5DjiCeAt3h7d29wG3WTdQt7s30Li/eQ86B7sVvBU9OH3H/s7/vgACgOPBOcFIwfcB6kH9gY+BoYFSgSBAr8Agv+a/rj99/zr/LD94P4EAFUBTgP8Be0Ipgs4DgYRBRTPFhIZ3xppHKIdWR6CHi4esR0QHekbABq4F2IV3RLpD2EMnAgrBecBZ/6Z+vf2s/OE8Brt2ukz5zjlUOMr4TTfLd7w3erdCd6y3iDgIuJp5Bvna+pO7j7yufXU+O778v5hARsDaQSZBYkGsQYdBlcFwATsA4cC4QCi/9L+Ev42/cL8Mf1S/p7/5QCAAuIExweXCg4Nhw9XEkEVuxebGUAb1xwbHqQebx4JHqQd6hxhGzwZCBfhFEMS4A41C+cHvgQxATn9evk59hbzj+/76/zo1Obp5L3isOCS30zfP98135zf3uDO4gbld+dz6ibu/fFM9Rb4+voH/psARgJoA3kEbgXBBUgFbQTEAyUDFwKDACv/bv7W/fr8UvyJ/KD9//4zAH4BdQMRBrcI5Qr5DG0PMhK3FLEWTBjYGSkb0Ru8G2sbQxvsGtUZEBg3FnUUVxJ4DyUM7AjVBYcC4f5D++H3sfRx8Unui+uX6SLon+b75NzjkOPG4w/kb+RM5fnmE+kl61rtM/B183f22fjx+ir9Tf+xAFkB2QFxAr8CVQJbAZoATgDC/6/+if0M/f78zPyT/Oj8If7K/zcBYwIDBGsGDAkkC9kMwA4aEVcTAxUwFlEXXhjVGIgY5hdoF9wWpxXJE+kRVBCTDjEMYAmqBh4EggG8/gX8lvle9w71ufKx8DrvGu757L7r1eqN6r7q8uok66TruuxA7r7vRvF18zj2mvgZ+kH7rPwo/hz/Z/97/8P/3/9g/3D+yP2c/VP9ifzM+7v7GPxI/FP83PxD/v7/cgG0Ak0EcwakCFIKqQsmDd8OdRCZEUsSyxIVE/ISPBIxETQQRw8EDlQMigr+CKUHJgZPBHMC/gC8/2T+8/x7+yT62viF91D2mfVN9RH1f/St8ynzUvO+8+rz7fN89KP1svZf90j42vl7+2n8s/z6/IX96P2g/eP8YvxG/BL8gPsG+yP7mvva+9X7Bfy//LP9jP5r/6gAVQL0Ay8FTgajByoJZgoXC5MLNgzYDBwN5gyDDCsMqQvECokJXwhlB1IGDgXOA8oCCwJWAXwApv8i/8v+V/7T/Uf90fxR/MT7QPv8+ur60vqn+mz6Tfp3+q/6sfqT+sT6QfuH+3z7l/sB/Fv8S/z3+8v72vvT+477OvsP+wj7/fr7+jf7zvuC/AH9dv0+/lj/WQAdAekB8AIIBO4EkAUWBpkGBAcMB7kGUAYPBtcFbQXiBGME8ANhA6MCzwEZAYUA2/81/7j+bP5G/i7+F/4o/pj+Lv+j//v/XgDUADgBggHHAT4C0QI2A0kDPgMwA/4CiQLRAQYBXgC6/9v+4v0k/aD8H/x9++/6nfp8+lr6Rfpt+sn6QvvF+3L8dP2k/rP/fAAvAe8BuQJZA9gDSQSoBOUE5QS+BIQEKQSOA5oCcgFqAHT/if6o/eL8UPzq+2f7q/oB+q35mPmE+Xb5j/na+WD6Avu6+6X8yf3l/tn/5QA1ApcDwwTYBecG5AehCP0IEAkGCdgIXgiOB48GaAUlBNQCbAEYAPj+9/38/Db8sftb+yz7NvuR+zD88fy5/an+y//gALsBeAJMA0AEEwWLBcAF6wUBBtMFQwWDBLcDsgJSAcD/TP4C/a77RvoG+Rz4fffy9mv2CPbX9cL1rvWk9dr1Tvbo9pn3bfhh+W76p/vs/Dz+of8lAbQCNgSNBb8G+gc4CTsK4AouC0QLGwuhCuwJBQkICPgGrQU2BMoCcwEuAPj+7/1G/QL94/zc/Bz9t/2B/lH/MQAzAVcCZwM7BN8EcwXtBSkGKwYVBugFcQWpBKQDjwJrARMAgv7y/If7L/rc+KL3qPb59X/1KvXk9Mb01PT99C71WfWN9dn1RPbH9l33F/j8+PH57fr2+yL9f/7y/2EBygI8BKwF7wbzB9cImQkfClYKQwrwCXoJ8QhJCH0HnwbWBRMFTgSHA+UCfwI4AgYC7QH+ATkClQL4AlsDxANBBNIERwWlBfUFIAYLBsAFUwXMBCEEPQMwAhIB+v/d/qv9cvxV+176gvm2+PH3Oveh9iP2v/Vm9RD1zfSr9Jr0nPSz9NP0EPVx9dz1R/bK9nj3SPg2+T/6aPup/Pj9Uv+xAB0CgwPXBPcF8wbKB3AI6AhCCYQJkAlZCfgIlwg2CM0HYwf7BqcGbgY9BhoG/wXqBdAFrQWSBZAFpQWjBaEFtAXgBQUG/AXABWcF8wRPBH0DnAK4AdUA2P/J/sH91/wL/Ej7fPqz+QP5Z/jW90j33faQ9jD2z/Vu9Qr1wvR79EL0F/QP9Cn0VvSV9Pn0hPUx9gX39/cR+U36qvsO/Xn+5f9NAaUC4gP6BOYFrQZZB98HRAiQCM0ICQlJCYAJowm3CdMJ4AngCdsJ3gniCdIJqglyCTcJAAnJCIcINAjfB4MHHQehBiMGpwUdBXQErwPRAtsB0ACz/47+bP1U/ED7K/ok+TT4cvfN9jv2wvVm9SH17vTM9Lr0rvSa9Ib0gPSW9MT0CPVg9dL1bPYy9x/4Gfkc+iX7MPw2/SL+9f6q/0EAyAA7AagBFQKFAusCTgO+A0AExARFBcIFQQbNBlkH3gdiCNwIUgnGCScKdAqjCsIK0ArDCqAKaQooCtIJbwn+CIsIDAiEB+kGOAZxBYwEkAN8AlMBIwDq/qz9bvw4+xf6FPkm+E/3jvbs9W71EPXC9Iv0dPR+9J30zfQb9Xn15/Vo9vr2pvdi+CL55/mx+oL7UfwW/dX9lf5I/9j/UQC+AC4BkwHrATUChQLnAlsDywM6BLwETAXgBWoG4QZSB78HKQh+CMsIFQlhCZ0JwgncCekJ4wnKCZ8JbgkpCcoIQgioB/YGLQZGBT8EHgP0Ab0Af/81/vD8uvuV+oT5g/if9+D2QPbC9WH1F/Xq9NH0zvTi9BX1Y/W89SP2lfYd9633PvjU+Gr5Cvqq+kT72vtt/Aj9p/1B/tj+Zv/g/1wA1wBXAcoBNwKbAgMDewP9A4UEEQWiBTcGzQZpB/wHkAgfCa8JJAqLCucKMQthC3YLcAtLCw0LtwpCCrMJDglVCH4HmQalBakEmgOEAmwBVgBD/zD+G/0L/P/6BvoR+TP4YPel9vn1bfUF9cL0m/SG9Jf0wfT99GD1zfUw9rX2Jfeb9wj4gfgC+YT5+vlw+uL6Xfvh+2/8Bv2Y/Sv+wP5a//b/mAAvAcMBUQLmAn8DIwTMBHwFHgaxBj4HyAdBCKoIEwl+CdAJCwo3ClcKZgpfCkUKFArYCX0JDwmTCPcHSAeDBrEFxwTVA+AC5gHrAPH///4T/in9R/xw+6r68/lP+bz4O/jQ94T3SPcP9+722fbT9tP28/Yl9133l/fL9wj4TfiQ+Nz4Jflz+b/5E/p2+uz6avvw+378F/2s/T7+1f5y/xwAxABjAQICoQJHA+kDhQQlBcMFUAbXBk8HtwcNCGIIrwjnCAsJIQkdCQgJ2wieCFAI9weMBwcHfwbeBTIFegS7A/YCLAJgAZMA0v8b/2n+vf0X/X787/ti++f6dPoM+qr5WPkS+dD4k/hn+Ez4OPgu+DP4O/hP+Gj4j/i6+Or4Jvlq+bP5A/pZ+rn6HfuH+/z7a/zg/GD95v1t/vL+g/8XALAAQwHRAWUC+wKSAycEtQRBBcAFNgagBvkGRQeGB7sH2QflB+QH1ge5B4oHTwcCB6sGQgbMBUwFvgQoBI8D+QJXArcBGAF3AOH/Uf/N/k3+0v1a/ej8gfwf/MX7c/sk++H6pvpu+jn6E/r3+d75xvm1+bP5s/m0+b/51Pn1+Rr6Rfp5+rL67foy+4D71Psq/I/8/Pxw/er9ZP7l/m3/+v+GABcBoQEpAq0CMAOwAykEnAQFBWsFwgULBksGgQarBsAGxwbABqQGfwZPBgsGtgVbBfoEiAQSBJADDgOLAgUCfQH9AHsA//+M/yD/uv5a/gD+rf1b/RT90/ya/GT8OfwK/OP7xfut+5T7cvtf+0j7Kvsi+xz7DPse+x77M/tO+3D7mvvI+/j7M/x3/Lj8BP1V/an9Bf5m/sb+If+I//X/ZQDdAFEByQE/ArUCJwOTA/gDVQSnBO8EJQVPBW8FhQWDBXYFXwU8BQ0F0gSOBEYE+gOpA1gDCQOxAlwCEQK+AWMBEAHAAGwAHQDS/4b/Nv/0/rb+cv4t/vT9wf2R/Wr9Qv0k/Qv98vzg/Mz8u/yt/Jn8hPxz/Gr8XPxT/Fb8W/xp/ID8nPy+/Oz8Iv1a/ZX92/0o/nf+yP4a/2//xv8hAHoA1AAvAYUB1gEgAmcCpwLkAhYDQANrA4gDnAOsA7sDvAO3A7EDoQOOA3IDTwMlA/MCywKYAlsCHALgAaYBZgEiAeAAoABcABoA4f+r/3v/Sf8b/+3+w/6d/nr+Wv4+/iT+D/78/eX90v3A/bL9pf2X/Y79iP2D/YH9gv2L/Zn9rf3G/eT9Af4l/k3+cv6h/sz+A/88/3D/qf/f/xYASQB/ALUA5wAXAUcBdgGhAckB8AEQAioCSgJfAmwCcgJ8AnoCcAJnAloCSQI0AhsC/wHiAcYBqgGLAWkBRgEmAQYB5ADCAJ8AewBVAC0ABQDh/7r/kv9t/0j/Jv8K/+3+zf6v/pv+jP57/mz+W/5N/j3+L/4p/iL+G/4b/hr+HP4i/iv+P/5W/mj+hP6q/sv+7/4b/0b/cv+h/8r/9v8gAEoAbACPAK4AygDkAP0AFwEtAT0BUgFmAXEBgAGHAZQBngGhAaMBqAGiAZsBlAGBAWgBVAFGATABGgEAAe8A3gDIAK4AlgB+AGkAUQA6ACIACwDz/9r/vf+m/5H/e/9j/03/NP8b/wv//P7s/tf+y/7C/rH+qv6j/pb+nf6V/p3+of6p/rL+wP7P/uL+/P4N/yb/Qv9h/4L/of+//97//v8bADQATgBpAH4AkACjALMAwQDRAOMA8QD7AAEBBwEJAQ0BEAETARMBDAEFAQEB+wD2APQA6gDaAMoAuwCyAKgAnACKAHkAaQBYAEgAOgApABcAAwDs/9f/wf+v/5//i/91/2T/Vf9I/zr/LP8b/wv/+f7t/uX+5P7k/uH+4f7n/vH+Av8X/yn/Nv9M/2X/e/+Q/6v/wf/X//H/BwAdADMARwBXAGYAdwCCAI0AkQCUAJkAnwCgAJwAmgCWAJYAmwCbAJ0AmwCcAJwAnQCfAJkAlgCRAIkAfwB7AG8AagBiAGIAYABcAFUATQBFAEIANwArACAAEgAEAPT/4//U/8H/rf+d/4z/f/9v/2H/VP9G/zv/M/8p/yH/Hv8a/xj/Gv8g/yj/LP84/0T/U/9o/3r/j/+j/7n/0//o//r/DAAcACoAOQBCAE8AWABkAGsAbAByAIYAiACDAIIAfQCMAJkAqQCxALYAtwDDAMgAxQDEALYAqwCjAJ0AjwB+AGYAWABSAEcAJQAGAPT/4v/V/7z/tP+v/6//uf/H/8j/vf+s/5v/hP+U/8//sf9U/3f/XgB4ASEBr//7/kn/gP/o/sP+Mf8o/4z+I/4n/mz+qP42/g3+jP75/jr/CP8b/6T/4f/7/xIAJgBNAHwAqADDANcA/wAqASoBKQEvAToBHgEJARIBFwEgAScBIgEHAQEBBAEKAQABzwDNANsAxgCxAKwArgCfAIsAXwAxAC0ANwAjAN3/zf/n/9z/u/+b/5z/ov+F/3P/Wv8q/yb/Bv/Y/t/+xv7Z/t7+6/4q/y//Tf9o/5v/ov+l/9r/kf9Q/8r+Kf8+AbUB+P+a/2YADwBD/9L+gv/N/9r+if8ZAKf/jf82/77/v/85AKoAcP/4/4YAgwAQAR0BKgHHAPgAtQBkABYBnAC1AOMA6P/p/0EAhAFRAbr/fwE5AaT/QgCgAP4ASADD/3v/Zf/E/1H/OgACAc4ApQGYATEB0AENAmAArP55/0MAogAq/3L+HACS/ub7RPu++yj+Gf+i/I381P2C//YABgGdAKP/EAH0AScCyQMCBkMG/wI2AaEACQE2AVv/W/3f+1f7G/qJ+Zb62vvv/BX8tvvy/XcAkgFbAXACqgNmA1YDzQPKBEcGKAZRBSsFOQR3BMsFYgVABKUBbP+X/uP9n/73/L76DvtX+rf59fr2+8f8Cf4z/27/CwBDAoUDhwRCBuYFUgOVAnUDbwMTAhACBQEN/lL+nv50/U79Ff3K+/X6Bvs7+gP6ffs6/Bb8cf0g/+z/HQDXANsARQCXAJr/o/56/0r/DP6P/NL7fPwE/av9jP66/xsAxwArAewA/wJrBNQCzwEEAh4CwwJuAzkC/wCHApsC8ACSAJIBowEBAcoBSgBQ/zoBwwGSAFoA/wDwAFABUgIwAjQCNQNxAt4AGQJ6A2kBBQACAIr/sv/Z/9H/Y/9ZAJQA1/6Z/+QBFgGg//b/oP85AC8AHv81/q7+MgH2/8X92v2Z/eP9GP4D/qP9TP0K/k39YfwZ/Zz9gf0f/bf8uvyB/oD/3P6f/pX+m/9tACP/Qv+KAOMAOQGpAHwAUQE0AmMBQgCOAAcBXgHMAWECAAPZA6oDmQIKAiIDawSsA0cC5wBpADsBkAAg//L+7v71/er8rfzD/NP8hPzE+5r7GvzP/Mb8evyk/UH+xP4w/9n9k/1z/n3/sv/P/kX/IgBxABkBqwATAF4APwCO/07/sgBMAQUBGwIdA/kDBAW/BVcGAAZ+BlUHaQYFBtUFAAVlBNED+gK6AfMA9//N/oP+2v6S/oj99PyX/Fn8+fw6/Y38IPwS/Lb8//zb/L78yfv2+qX6w/kT+WH46/Vm8xLyiPKf9BH2l/bJ9m/4Yf0bA3MGqQm4Di4RyxBoEiMVFRbKFMYRdA5CDMYKtwYRAe/9YPyI+Zz2I/W19AX1JvVf9O/0tvcx+uT7M/7GAfgEXga+B24K2gzcDYkOcA3QCocIawXmAWz+b/rS9U3ww+sl6ZDlFuKi4eXfrN5T5oH0Evxr+Rr5LwJvDlQUPBWVGJAcGRyhGGYWjRV7FNUQtQcz/7/+vP0l9Ufu8u2k7C7qGerv6krtc/Eb9UT4VP7DBcIKKA5OEigXOxoSGpoYyBgjGcMXthRND/IIkAQ2AbX80vc/8+7ux+tm6sLofOWv48bjweHN3WDhsO4j92Hz8vEY/WQJfQ1JEHEV4BgyG+8bgBgUFzkalxeMDBQFWARRAQH6O/OS7orsuut16nPqeO2c8YP0zfWY+dkBzgjkCj0MUxETFt0WBBihGrIa5hezFcETqA5YCKoEwv+Z+DT0Ne8t6NXlOubx4VncIN1x30Tcpdxx6i334vY695gBsgsmEAwWNxvxHEkiNiWPHmoZKxoQF4gNxANm/27+4vcW7Fzmx+VP4yDi3eMW5tzocO4b9qz7hQAfCFoNNhDtFyMevRtHGjAeSh6fGcoV/RF6DJYGCAFr+1X3X/Sg73TpV+XN5JPjet8l3trf3Nyb3Rju3/28/dv8twLRCW0WDyHHIMoh8yWFJEYg9ht9GA4W7QsM/pT7qPzC87zoyuLv33ThceXs5XXlVOuH9T/7T/5nBVIMcQ9UFMYaDB0JGkYYWxpmGLYRyg70DCgF3/0W+wn3rfKj8A3rXuJO4LjjruKq3ingl+EY3wvma/dW/7z7Vv6uCIYSkBtmIZkiiCTiJjglfCBTHPsZEhUwCkgA//3a+r7vZ+SI36befN8+4aviYOUF7LnzuvjV/p8HlQ1lEcEYmh+WHykedR8tHn0ZghZMEwAMcQOw/FL3sPIj7bPl7d3X2c/bu9zK2SLbNdua1r7gsfjz/8r4AP1rCQ4VCCHnJi0nrSqtLewqPyVKHzIbCRb3CuP/5vud9+HrN98X2fjXkNjC2yXfgeBo50T0zvu9AKcKfRJ8F8AhnSrFKbYnuCi3JmYhRxyHFlMO5wO2+hP18e+A6Zvhftie1BHY+tjO1gTawNuq2MjgqfVLAMX7yf3DC58YnCBaJX4mByc0KX8prCO+GvcVOhEyBk77/fd09PzoZ92011rVutcB3gjg/+Ds6i74EP+xAz0LthJFGukjESohKAMliCWpI7ockhboEUcK7v8f9gHx/+6b6BHek9Z31TnZO92y3k7gzd2u2nbpHgLCB64BQQZpEF4axiaSLeMqbSi5KZkoMyL3GXATCAtpAOT4YfQm7VniBdhx0s3T+dhU3Lvcb+FJ7s36dgBhBTMNPRR6HOAm3yocKIIojynGIzIcThh7E5YJzPw58pbtp+rm4eDWj9I10hPRPdZN4Hze3NRL3cj2wAMXAG8EKxPmG5cj6S0XMFUu2TBAL64mlx44GPcP8ANh+Wv0wu7c40XZSNJZ0X3X69qv2XrfH+0r+Fz+jgO5ClkVgSCUKEEsKCyJLMMtCiltH5UZGxUhCkX7XO+v6Unmdd1K0ZTM8c5J0MTTcNqS18bSC+Kr+ncBx/xXBO8V0x/0I0osVjGiL7AxATKIJn8bhRcFD28BQPhP81nrkOAt2E7TyNF21e7YFdko4Y/xi/pv/NcDMw9mGNggECdRKX0rIi6nLZIoiSCqGhIXiQw7/Nryee8852fbndT/0kHRKNHh1ynbftKh0jHp4PqF9l/4uQreFqEbMyUOLZwvsTLmM3ounSZBIS4bYg+jAJH4oPXL6/DbJtLR0CjS8NNt1cjXbeDp7hv5cf3ZA7EMRxXaH9wp3CxJLLUs5ys6KIEioRuIE7IHPvm78cjvDOcS2tDSKc4dyjfP7NhK1i3NStZN7/r65fceAJARrhlcIhAvlTLpM3Y50TaVKwgk0x/5FrsH3vmq8gbsCuGM1dbNFc0u0/vWJtYm2wbpbvaB/XECBgvYFrEgWSf1K/ot2i7ML50rnSF4GtQW+g3h/lDxXupf5vLeotXHzjbKUcx01ybaOM1PztTqJwJe/iX5rAa1Gekk5isyMWMyRTWiOW8yASRHHo8aaQu2+l/zTu7j42/XHM+wzc/Re9Z510bZA+Wh9Tv+/QBeCK8U6h9JJ1sp1CgAKycucSucIs4ZIhUzEMEDqPM465TpIORz2GvPrs0xz2fTvth+0wXN0+CnA0IJGvkG/foScCMELjcyXC2VKwsyvTPLKdAcohRaC9f8IPFF7Vnpr92Nzy/KNNER3Lbfst1C4i7y+AKpCn0NoROhG6IixSjWKtcoLSi0Jtofixi0FPwPYAWh9DboeOjd6Inez9X80rrOuNHB3urfp9Ov14Dz1AjhBOr9+gmPG4slKC/0MWspySmnMQgq7hq6FZMPHwC18jfuC+zW4/vWBM9x0T/bIuMp483k9fEmAqQJcQyxEUMYGh8eJQIngSTFIQkhKB+qGE0RpgyHBKX2P+2s6/Dn+94B2NfW69Yq1kzbKuNr3nXZ9u0cCDEIcwFDCnIWhCAULWIxkSp+Ju0oBig0HwQVqQxOALPxuexI7s/mUde6zqTS69tw4xnm8uVH7S7/mQzEDcoNihOFG/YiFifEJNcfJx2IG+oXYRLHDOgFFPpj7VXq6OzQ5w/dQtZy07LUYN1b5argGtnc5ET/6AuYBv4GqhKAHRIpizL6LisoUinjJ0EfgReuD2sDofaJ7wntYuiq3n7VRdOE2eDjkejD55fug/6oCcQMsw+dFGQa7R/1Ipwihx9pHNMabhaZDgMKdwZq/Lvwwup06Kzla+DY2kHYt9fP25fm7eg03j/iYfw7DAkIiQeUEKcZtCO9LAssrCREInQkPyC0FNEKtAIP+CnwZe1d6J3ertaJ1gfcAeLm5mrqje/++8EJGA8PEHQTmBizHhMkgyOTHtUaKRlkFtoP1ggKBSL/GPMz6iLqfekE4orZOdXN1Vzeu+vl7J/eK90u9yYPOQ1PBi0NaxchIdIr1CvKItEgmSMyHwQVugsMA3P4cO/Y6z7qN+Qb22/WcdoZ5KfrSu3a7ur44Qf2EIUSqBOrF/Qbwh8lIj0gphtbGUwWbA7fB5QGzgIM95rpsOQW6Gfo/OB82dLXIt3F6cPwTOTF3EbzCw24DLUFlQthFZwdOyaDKOYhSB38H4AfbBVdCpYD7/p78aztveu05Lnbe9ju2zjidujW6zDu/PeLBl8ODRBLE9oYWh7LIhsjOh+UG+UZDxcZEIAHEgSMAkj4D+rm5YboTeXw3FDX6da43AfooO0K5N3dhvEWDIQObwWACuIWSx+9JkMpIiNVHkcfvx0RFqcMZwTM+jDxAu0A7cToId9Y2AvbPeRK67Psue7391gFJw+DEoAUfBn+HZof1iCZIB4dthljFb4MUgWAA6gAQvZs6NLhxeSx5lfhL9s22YPdFepi8o7o/eF69X4MUQ2QBsAK7hTuHJ4i7CShICkb6BsnHJ0TVgmOAxf7hvD/7Nvs5ubH3Z/Z09wX5L/qr+1p8Gj6nAhVEPwRTRX2GsEfCSO4I+Ug8xtmGBIWVg8hBTMAjf2s8qzmzuQ75gnjdt7x2UPYD+EO7t/sg+EZ5wAAxA5fCWYGRg8mGCIfGyX2IoQdUh6KHtIWbQ08CM8C8feh7Qjr4Oo/5Q7d8tnn3kLoLe7L7izySP3zCRcR/BMsF4MbIh+mIW4iaB/zGeEVNxE6CZICs/8g+sLu+uQC46jkZOQZ4rveCd2s5HLyOvJ55VLrkAYjFF8KGQSJDRkaByFSIyYgBhl+F0obYxY3CRMCJv989crrd+sq7GHlYN2a3MriHevw8GbyafXMAHIO2hOfE3QWtxvtHnkgHCDKG14VsRHuDvwHaACQ/Mn16OkP5BfnY+g15aXh3t2h4IHuKPYQ62zjx/U4EFsTNAUSA34QZxwVIkEiUBrcE6EXCRkDENoGpgFf+cju+eqI7UnsFuRM3CfdiOfK8WjzjfK1+nYIIxEHFJsWIRrzHJUe+h9MH1IZDxI4DgIKJwTe/7f5ze7f5kTms+Yl5cvkRuUX5PjlSPAK9lvrzOcwAR0Y9w3A/kwGFBVtHfohER7jEkIP6xTLFZAMGgKw+/b0xu217ePxVe7U42/fD+Yb8K/1K/b29mX+Kwt+EzwUbhTYFqsZPR21Hq4ZrhGTDMEJfQbfAvf+E/ja7HzlT+kX7w7sr+YM5nbmgus1+Bn6Kuu56HP/ORKXDcwC/QN7DhgZ5R60G8ERPA4zExISXggUAhv/6vfR7yTuuu9e7aTnBuR45hDvP/aX9m34DwKPC3QPZxHnE7IWahnZGhgaYxbgEGgN6QrKBK3+PfxJ933u1OkA6gDp3Oak59rp8Ook7xP3rvbR7WHyLweSEW0JEgTFCfoRMxnzHLcYQBBNDXQQyw9BB7j+Z/k49HbwrvDx8F/t0eji6HjutfQR+J36kf/KBlUNzxBUEgkU4xXSF3oZNxiIEgMMTgjeBcIBff0D+rbzf+p95sPqTO5u6r/lE+ce7Jn0E/36+HjucvUbCQAPMQeeBZkLRRDDFLkYJRVcDTcMwQ6NC0gEQP/m+j72KfSL9PzzGPB7687rM/GQ9hz5wvqa/zcHBgwCDTMO0Q99EY4VfBihFF4NXwlUCF0GbQIF/lf4BPF97enuCu556qvooeZR54Txrvyb+kzvbO/RALsP9w3WBhMGoQmSEV8aFxjJDZMJrAv6CyYIfwJF/Uj47PWT91L3RvIg7ofuC/JS9ln4CvkN/WcEDAqGC4ALzAzgDuAQhBNSFAQQjgoXCOAFagMQAsX+9/dg8Rnu6O3l7QPsWOnx53nrEPUZ+x32KfEr+e4GFAzcB50GKQwTEZ4TbhZxFO8MqglTC7IJ6gQ9AKP5N/Rw9Wj3DfWs8fbvhPHq9Wf4CPmR/GsCRQfTCS0KswqmDLIOUhH5EgcQ2At3CksIVANp/5z92vqi9WfwFO7d7RzsxukH6ivs0PDv9nP2K/FX9TIDtwpcB4EGqQ1VEvkQPRJSFAsQPwytDGgJDgTqAUH9E/Z29Gz3mPdI9NXxsvKi9Zr4nfpT/VwCQwcoCEcH5AgXDM8NNA7QDjkPUg3CCOgEgwPnAVj+rvo5+FX1lvGW7p/sxOpY6krt2PKv9qv0U/BE9LYBbAxWDBUIcQc2Cw4RDxT+EcsPUA7rCSsFlAORAuL/i/vI93H2sPVr9e32oPZR9BD17/gw/X4B2QP0AzcFLwi4CrwMWg5xDxsQow7NCRwFxgNaA20AUvzP+PP1IfPn7qrqgOrh7BDuEfDv84D1cPQx9h/+bAe7CkgJGAoYDlsR4hB1DwAQgA56CW8GqQVcA6X/hPtk9i/01Paq94f18vWp9pD1Lfg2/jUB4gHhAxkGKwjjCaAJKAr5DEgOjw2MC90GkgL8AXsBB/6o+UX1RfEO7u3rcuu3637s8/Dd9pn31vRN948BYwtuDOcJ0gzYEFIQ3w+UEN0PIA41CvIEcwIHAcf9R/mW9AXzA/Zf94X1kPWB95/4ufq9/60FwwhiCBgHiQfECaUL8QsuC1AJ7gaGBW0EPgIpAJ39GvnN9Inyo/A87h/rfOmB6xjvH/Rv+ff4g/bQ/HAIxA6sDjAORhB0EYsPZQ3YC7kJrgfhBLsB5wDd/9j6GPZP9Rv28PaP99H3mPnr/FT/nQAvA60G8Qd9BnQGdAmiC+gKEwqtCqoKnwchA+cAlgDz/oL7q/b68avv8u3i6pTpoeqc7ATyXPnt+8v5/Pp6A2YMXA02C7MNlw/iDeEN1w1gCiMHnQSMAOP9Y/7z/Ef4DPam+I36Pvn9+Mj7Kf4X/2wBPgW6Br4ELwJRAjYG8AkfCV0IIQvlC1oIMQQpAWIAXgDD/LP2fvNt81byQu/e7J3rDewk8Xj4b/u8+sL7zwDSCagRlhESDQ0LEQt/C5YMYgv2BzoFLwJ7/kv9bP1q+zP5OPrh/D/+d/4T/xX/vP23/QIBawTjA+f/f/2K/xQD3gRgBn4IxAdZA0cBOgTyBR4DN/9F+2/3q/UG9DTwcu057drtEPDl9J35Kfvq+hn+BwZkDL4MJAzfDRwOpgsrCsUJ7gdQBD0BLwD2/kf8pftG/Tf9Q/ws/cP+if/C/4IA7QEgAtYAJgD5/rL8XvwP/2oC/ATpBgMJ2wmWB4AFwAWrBB4BGv5l/Pn59fVM8TruSO3B7ffvYPOG9tX5Rf3P/6ICpgdUDaAPTw00CogJSgreCeQH2ARYAVL+y/zA/Jv9o/4J/xv+ef0b/wYCngP3AoIBKgEaAcn/hf7w/in/lf3c+0L8+/6tArcFKAfbBmMGWwdcB8UD4P/b/nb9ZfjL8vLvCO/k7iTw4fIp9r747/vCAjkKHAzoCgsMEQ3ACjcIZQf7BSMDBQEzAML+8PtH+rD7Yv3Y+3H6Av1MALIAf/8x/pz9V/8sAvACLgLLAd8BJQL9AQoBWgG4A7sFggWmA78B4QCf/1X9W/yU/B774Pe89B7zW/Mw9Sb4P/vb/GL9Of+yAqgFjQePCb0LOQy4CWwFvALkAUEAZP4p/rz9iPxE/Fj9CP+JACEB6wEjAyQDKAJyARoBbgFRAe//1/4u/9gA9QKXA6sDLAWRBosGbQUVA+cA+gCoAVsAm/7s/JD5TvWo8tPxlPLj9Q/5Bfj89SX5qQBhBucGJQZ3B9wHZwbqBdMFCgRPASL+cfsa+yX8Ifwf/CH+wQFbBDoEBwNdA9QDbAInAXsBzAEuAXr/E/3O+6j9agH4A3cEKAWvBpsGVAQ2A88EqQbfBRICR/1G+XH1AfJd8czzafYy92P2UPaq+QYA/AT6BZgGyAhbCOAE9wJVA90CrgA7/in9kPz1+vr5+/sS/zMACwBGARkDBwMrAp8C3gJGAngCywKoASwAZP+V/ygBMANFBGcEuwOVAnAB3wDCAaMDDQRNAs//wf0y/Jv6fvhg9wD4J/iE91b3cPf/+QoA7gSjBsoHJQgXCF8JkwntBTIBi/4A/cH6e/jl9wD6bf3L/4EATwFRAy4FVwU7BQAGxAXtAl7/A/44/2gAeADJACUC4AKvAdwASgIZBMwEHAQfAoQAo//w/OT4aPYc9gr2mvWB9gn50frT+6/+3AKaBKQDbwMNBGIC1/9r/xEAiP+G/Yj7PPua+yj8GP7qAGYDywQfBPMCjAOQBO0DjwJEAnEDXQN9AO/9Xf77/0UAQf8bAPoDMwY8BNQCBQQbBNwB+P9OABsBOwD7/r39MfuN+KT3dve795H5Hvyf/RQAKQVRCFAF2QHjAnEDRQBq/vP/yQDa/qf9av8cAaf/ufxB+5r87P+8Ah0E7AXbBo8DG/5U/FX/SAK9Ad3/zP9wAFj+HPvZ+xMALQIXAYIACgJJBGIF1gQzBDIECAN8/+v6pveW96z4Yvfj9Q35Wv7y/xb/FwHDBZYIqwduBSwEMAPXAHv9Lfu7+/38gPx5/Kn+rwABAmkDNgTJBJ8FsAUIBAEBT//TAOwB0f7b+pv5qvmu+fT67P56BGEImwktCQQHYANVAID+CP4u/9H/gP3K+fn2XvWu9C71H/he/dwAdwB0AAgDaQQGAz0BmAESA9sB1f7W/ioAo/+1/oz9vfzn/m8CnAScBs8HtAV7AqsA2/8TAEAAs/4P/vz/agHVAH//fP5G/3kBZQNrBTIH4AbTBEMCOQB4/x7+8vu+/Gb/q/7++gP5Svk8+aL5Ff2ZAHsAnf7X/XX+aQCUAh0DKQKgAeABwAElAJn9f/sh+2H8Ov4rALABXQEPAAcAxABYAVICFwPxAqsCzQKkAnkBv/8///T/zf/q/on/WQGEAtUCJQPtAnUBOgAcAPb/EwCPATcCv/5B+jv55Pj59vf3RPxp/lz9Zvx6/e3/OALWA9cE+wSVA0EBrP+v/wUAZ/9g/or+0P9/AMD/3f7V/nn/YQB7AZYCngPiA5ICvAAXAPD/of9dADQC/gJQAgMCAgOmA38CxQAaAO//SP+i/h//ggAkAcL/uP2v/Fr80/s5+wT7Efzu/e/9tPsU+2v9vv+DAEQBmwLtAhAB2v7L/mYAHwGlAAgA4v9aALgAif+y/Yr9NP/9AE8CTQPgA08DcQHz/0wAQAGoAQYC4gL8A6kEMARyA2kDTgPaAqECPgKsAT8BagAY/+r9LPza+W73XPW/9Af2zPi7/AL/M/6//Sz/vwCYAt4EPQZHBewBm/5//TP9fvx8/Gb9OP40/2AALQE/AeMA4ABWAZ8B2QGQAdP/6v1W/tUA+wKgA+wDUQQcBDME/gUxCPsIUAgUB7oF1QOwAJD9+fvl+zH8O/u4+I/2W/V69Hj1rPmR/oEAJf9k/W/9mP6h/2QAxABsAD3/hv7Q/1MCfQMaA8oCDAOSAgsBlP+t/ov9sPwc/XP+ff/h/zwArQCnADkBUwOUBa0GrQYwBX0D6QMMBrgH/wcVB8kFpAMYAEb9zvyY/Sf+b/15+734fPVr8vLw+fED9p/7vv61/br8LP/ZAjIFzwbEB64GiwN/AH7/CgDw/1//t/+oAN8Aqv9e/bH7h/tl/Bb+iwCFAkoDJQO7Au8CjQTlBh8ILwcdBcgDvwPIA6cD6QRoB+0HtAXcArYAnf/J/lv9Zvz++uT2yvE17s7ruuul73n2QfxP/Uf81/7DA1cHYwtwDwIOYQgHBOgAfP0T+3/6Kfub/CL+Kv4z/Hz5Y/i8+fT8ywHVBl8JEAksB9QE1APMBAUGCgdhBxYG7wQ2BVAF2QXVB9EI8QZCA9H/e/4W/8f/O/8w/EH2T++16ZvnYenz7Rn1sv1oAyUERwNgBFQGjgcuCd4KIgpeBjYBXf1K+5z5J/nF+qH8k/7m/2z+3Ps4+0/80v5/AqgFuQfECA0IgAaQBXgFIgbsBscGIQapBWsFTAUABYwEgQRkBL8D/AJLAvwAEv4q+cXzPu/k6ybqPOtW70b1gfvQ/hL9WPvc/2IHvQu4DsYS7hLZCw4DhP0I+WX0MPMv9eP33PoO/Mb5bvem9+v5IP78A7YJhA2+DQQLiwhFB/QFNwWGBesFkQUzBHUCNAL2A1AGUAemBs8FuARFAvv/d/5B+/H15fAR7XrqLOmA6VDsH/KW+C38av7QBCYOkhNtFbEWQhPJCf0APfwK+SL3yfe6+cr62/qN+d/29vQ19k76uP+GBb4KTA0LDBoJSAegBsoGLQjECXIJ3QaOA3QBgQFSA7MFYwcZCNUHUAUQASH+Ov29+zP4IPO87SnpqeYV547qQvG9+HT8/PwQAdEJvA8OEUET4xRtD7QFI/+m++b4Cvjo+CX6dfor+dX2zPNc8XDzhvopAtgHMQxSDhUN/AnvB5QHLAjDCXoL1wrYBzoEWAGnADcCjgT2BvwHHAa6AhkA2v4B/fD3f/He7Mrp5Ocx6ObqzfDZ+Cn9mfuV/LUELA1kEjgXlhk9FbsMDgXZ/rf40/Pi8sH0IfYC9iL0d/BQ7s7wzvYD/oYFYAy2EG0QBA3CCvYKbwxEDmoOnQuOB7sDIgDx/fT+YAL8BJoFSgWQBNADCwMBAKD5LPIk7NDo3Ofi5/LpmvCk+TL+zfxp/Q8E2gogEPEX0x0VGXUNowWwAST7K/Vd9dP3Rfh297L0CfC87BLuJ/Tu++YCMAmZDRoOUAxfC+ULfg0hDw8PGQzLBjIBz/2k/QUA+wKuBBIFeATqAgwC0AK9AgT/Lfjv75bnGOFY3nfgi+nm9oP+Jf23/X8FQwwuD4EUcBqTGLERWg32CGYBzPo59371yfSr8nHu6On553/qp/CF+GEBnwnVDtoQaxD4Dq8PwxIZFPcQ/gqcBTsCq//6/g8C3gWrBq4FBwQgAXr//wByApj/hvjK8NDqhOaF41rkHeuJ9fP8xvxH+5UAWwjnDF0TsxuiG5oTxA25CvEETP0N+NT1YPNo7yvrg+dU5W/n6+4Q+Mf/kAYtDO4OLQ99DwQRURN3FaAVOhLpC60EcP9s/mwAPgLFAlICzQCU/h7+0wD6AlAA3vqj9eXvl+lU5Z3l9Oqh82r59vaL8yf6HgVUCsIPSRtnIfYZzA9KDEsJwQHe+zf6GvjE82Xu4+dr4qbh8uaZ8Mf63AJJCVkN+g0nDjgRUBb/GScZAxRBDYQGfwC4/X//DAN8BEUD/QD//X/7efxZ/8f+0fob9wvzJu3W56DlpOfK7pj3zPpm97T3uQFNDHQRUBeWHdkbexSzD3oMNQcwAeT7I/eE8nzskOWu4NTgdOYe70v4JwHzCNoNIQ+nDmQPNhO+GNEbCRlMEZcICwLb/Zn8Tv8KAw4E0gLa/4z7jfna+zn+cPwM94LxIe6M7Bjrv+pv7oX1lvnU9nj1kf1LCC4OyRO1Gv0aIRSuDr4MtAnyAxX+X/nT80rteOdb42vir+bh7mH3Lf+yBqILeQz8C68NaBEnFUcYgRlKFgAP1AYeAcX/xQHFA7QDtgHZ/nj7ofg/+aH8Nv0K+ir2//FT7THq2+kL7cbzKvkL9yHy1PapAvEIuAtTE8QZTBc7ESoPtA5gCh0EEf/o+Q70Y+4k6HPjt+Ny6FjvlvbC/PkBxQXRB5cKUBBRF8ccKR7xGq8UJA0TBkICwAKiBAcEywBy/Vj6LffI9un5AfyC+vb3SvX48Gjsq+pa7T/0YPti/FH20fJ4+QIE0Am6DJMRshQJE9oQ/Q8ODs0JKwQS/1n7TPYz77jonuX95nDr/u8W9IP4rfwVAKoDOwnPETAbSSGFIVoc9RSDDkMJHwa9BtMHzwQt/xr5G/OO8Fn0zvkp+sz1p/Kl8k7ySvBu7w7y4ffz/C772PSl9Q//EwaIB6cJ1QwcDU0MIw7fD24NzQj1BNIACvzP9rjwqOvY6ubs2e7J8Krz1vZj+Tv8uAH0CXUS+hkXH/Ue3xnDExoPSAyqC24LcwgmAkD7m/XE8TzxSfXD+ST5FvXw8lbzGPRk9HX08PXS+UP7B/cg9G/43P3j/6QDMQppDLAKfQz6EPgRsw/jC40G6gFr/iD5YPPN7x/ucO3a7CDs8+2I8nz2uPrrAYoKwxIWGp8epx7NGikVsRCJDjwNoQpIBWD+QflM9ZPxoPH+9ST5lviN9jf1NfXJ9s/4UvlH+RX6FPg388/yT/g//GH9RQD6A8MFHQg0DkQVqBZyEZwLZQhuBVQBfvzH99vzWfDz64foCOmW7BLwrPKy9lj+8QfrDxoWWRthHUkbrhYvEqQQUhEUDz4JYAPf/WP3GfOA9ED4Ovg49dXztvQM9c7zg/La8v703fV38UTuj/XA/iX+G/tw/0EFogcxC18RHBZhFrgSwQ3aCTcGPwHs+xf4f/SV8EjtsuoZ6nPs+e6y8QL4hQDuCK0RcxjRGhwaNxcqE10RwhFaEdMO+QkVA1H83Pbd9FD44ftu+eb1WfaL9nX0wfJI8k70DfiP9xjx4+2W8+X5APqC+dj9AQO8Bt0L9hFAFvcW7hPvD14NJAqzBJf+y/lT9lfznO8n7O7qi+sM7Pbt9vPi/DIGmg5FFF4VUhP/EGkP+Q8JE24UnhBSCkIEyv2n+Gn5f/6+/1T6FPU69AP1T/QR8nDwC/JS9R30v+007Ffyr/bn92r7jv+7AuwHQg5gEgoUohSUFEkTthCWDLcGmACs/ET6DfeS8rruy+xX7AztXPAu9s78aQTaC3MPvA7BDMsL6AwdEK8SRxI7DxgLiQZCAjsAwgGyA/kBQP0d+a/2AfVg8g/vlO+g87fy6+sP6BLqFe2l75D0uPslANMBEgchDuQQjxP3F+8XtxVdFYcRIgmPAtX/z/0f+gP1BfHA7hztQe2D8ET1s/pWAVQGyQZHBRgFsAYnClYPlBJMEWUObQzJCYMHtwfTB2gFHQJk/sP5BfXB8B3upe7V8Jfw5uu25i/nKexg7p3vT/aU/ZAAoQXIDQQSuhOnFtEXVBdUF9EULA4eCEIFSQNl/8H5BfW38n/wy+117uHyd/d7/NwBKAMUASUBvAJkBNcIJg4lD80NmQxtCQ4GSQfSCnAKgwWSALX9R/td93fy9u7c7nTwme7n6BjnUes/7ofvgPTf+f78dwK4CcMN/Q9KE9MVHBaWFSMUaxB+CzoIKgYQA/D+BPvj98P1DvSG81n1g/dM+dn8hP/s/VX7//r1+8T+dwMMBxQIfQgqCCsGcgXACGgMzQsACFEE8wHc/777cvYN9NT0YPQN8avtUO3M7lHvWvAg9WP60PwGAAEFAAghC74P5hA4EEsSWxJvDfYIxweNBwwGfgI7/3H+0/0v+/D4rPjT+UP8bP7D/dT6vfiY+Oj4Nvqv/VUBOgPLBL4FQwUGBvoIsQoNCgEJ/AcSBtoCP/6a+u359fia9BfxIPL/80HzsvJp9ET2m/jD/LD/hwEFBZYHmQf8B40IlAfcBf4DswKpA+EEtQMBAnIBjwA//1z++v1l/tv/CwGvAMP+Pvy6+Xn4nPlD/A7/rgGQA64E9AQeBZoHMQtmCwgJzQfoBsoEWAI+//H8EP1Z+zv2jvTJ9nT2xPVr+Jb5U/my+4n9eP3F/mUA6gBmAawBawEcAUYArP+BANQBYgKTAogCMgIaAjsCQwJ5AuICPwNHAxAC8v4t/C/7Svvx/CgAHgK8AgwEKQW6BZIH3gk8CswI1gbUBJ0DXAIo/4r8JP2L/LP4uPaK92P3X/fg+W/8+Pws/Uf+F//E/uD+zP/N/0f/zv+U/2X9j/vJ+8n8Kf0C/bj9af9PACgAQwC+AMgB5gOqBSEFwgJvAEv/mP5F/sD/OgJ0A1cEPwUQBaEF/QfbCL8H2ga4BRkEwQJQAFf92PxA/cn6yffq9/z40fiu+bz7kPzj/F7+tv/+//X/+P86ALYAdQCN/0/+/fyq/An+J//U/pn+9f61/if+NP7s/m0AXgKxA78D7gHc/oH8Bfwh/Y7/LQKdAyMEVATtA18EFgeXCfEIvAbDBKgCLQFkAGH+5vxX/rT+5/vq+k/8d/z1/B//lf/r/rr/RQAPACMA9P8RAMUAxwATADP/mv1y/NX8m/3J/f79hf7z/tj+n/75/uH/NwG0Am0D0QL+ALL+6fyt/Df+mgAmAnoCewK9AnwD/AQaBnQFigPvARcBqgCv/9T9Dv0U/gf+Vfwc/Bf94Pwa/SX/OgCQ/5j/PADY//H+EP8PAMoA5gCwAPT/pP7j/cH++//7/+z/0gANAYb/qf3O/CT9s/59AE0BpgDV/qT8Ovtb+yj9zv/XAZMCrwJrAkECTwPlBNQEjwPYAmcCuQHTAEz/oP7J/8r/qP3O/KT9B/6C/sP/kAC3ADgB/wHjAegAgAAWAY8B1AHqARMBvP99/0kA4QAAAR8BdAGWAcwAUP8z/tr9JP4W//H/eP/y/dH8Sfx7/Ov97/8WAV0BQQHMAK8AxQEUAxgDAwJfAWsBWAGcAJ//Y/+3/yj/8v3B/WD+df6P/kz/rv99/8X/UwA6AOv/HwBaAFQAUQB3AHcAWABdAI0AogCOAGkATgBEAAsAVf+R/l/+Wf5s/uz+Cf83/kn95vzn/If9of5b/43/s/+f/7b/0wBCAnwC3wFMAfIAzACuAA0Ad//K/xsARf8U/74AuQHOACUAIACs/5z/UgCoAI0AfgBeABQABwBsACIByAEfAiUCAgLTAXcB3ACgAPgA9gA+AK3/Xv8+/8v/rwCsAMf/x/4v/jz+6P69/24A+wAaAYYA+/8lAGEAJADy/wwAdAC7AB4A9v61/hr/6P6W/j7/QAB6AB4AwP8y/9T+Tv/x/73/g/+3/3r/uP5f/oX+wP4H/2f/k/9y/zz/Jf9X/7f/yf9i/+n+yf4j/zAAiwH+ATgBZgDy/6T/EAA5AcoBowFIAZQAKgC/ACQBtABuAKEAvwDHAGoAtf+R/8j/Nf/E/tz/HQHAANj/bv/t/pn+TP8KAOj/u//k/9z/5v8zAG8AZQA6ABAALwBaACoAuv9b/w//tv4m/r393/1N/vP++f+aAH4AIQDm/+7/eQBTAfEBGwLyAYsBMgE/AV4B9gBkAB4AQgBqADIApv9h/6T/m//7/hr/FwBsACMA7P9R/8z+Kf+W/0X/H/+H/9v/yv/U/wQAAwAfAHsAXwA7AK4ArAAaAOD/dP/o/h3/Gf+e/lL/qAAwAQIBkQDo/7//CgArAGAAdQDz/5H/z/9XAKQAVgCH/xn/PP9y/5P/vf/F/2b/zP7X/sT/pQCpAEoA6/94/0v/kf+b/2b/c/97/yf/Cf9X/7X/3P8OADgAZwDNAAcBygCcAKoAhABLABUAvP/F/1gA4ADdAG4A0f94/4T/1/9HANAAFAH4AMQAwADuAAYB5QDMAOIAHQEdAcsAbAD//2b/9v4I/1T/hf+o/5j/Nf+z/oL+u/7w/vn+F//+/tH+D/9Y/0b/Yf+X/8T/HgB/AIsAngC6AHEA5/+G/z7/Kv/M/88AGwGGAKj/DP8Q/6f/GgBoANwA+QCEADgAPABPAHYAXwAhAFUAkgCGAIAAngCRAC4AzP8AAIIA2QDEAEYAu/90/1T/Mv/S/nn+r/7r/un+Jv91/4b/xf9JAH8ArQAkAYIBWQHoAEQArf9j/0v/Q/+c/y4AGwCU/1f/Yf+Y/xwAnQDLALEAWAAiAF4A3AD5ALwAYgA+AEMALAATABUA7f+m/6f/z//w/zcAbABfAEIAJwAOABQA5/+R/1n/a/+W/5P/Yf9X/37/p//1/5EAEQFxAa4BdgEGAbwAZwAPAOz/AAD3/6v/S/8o/2b/xf8QAHcAuQCQAHsAdgBvAHQAZQAeAMT/dv8d/9T+cf5L/mn+YP6c/gT/hv5J/hAAhwKDAs3/E//NAPv/wv3x/aj+mP44/oT9S/5b/1j+D/6f/xcAmwDFAnMDlwGMAO4AUQGJAV0BcwFkAaYAdgABAXkAaABoAjoDGQJlAAMAeAEgAjYBUwC1/wT/Sv/k/1T/fP6n/sH+Pv5s/kj/FwBsAMoAnQAxALUAzACOAMsARQAgAJ4AeABmACEAjf+o/9b/z/95ACkBjQAhAMgAhwB4/yz/v/4//o3+l/4K/vD95v16/cL9gv5y/rz+AABsACYAUQCfAAMBJgHBAJIAuADkAMwAVAAyAHMArgDsACoBdAEEAk8CMgIOAtIBbwEBAW8Avf/E/uL9Sf1c/CX7Ovr6+S36qfoG+8P6lvon+8b7KvyL/Kz8YP0L/3MAowFRA8sEBwaRB7wIOwl3CVQJHAkBCYEIYwdPBkgFEATZAmgB7v8b/5j+rP2k/If8Ev0z/db89fxw/VL+iv8HABEATgAqAGr/cf4U/VP73vmE+Lv2S/Xg9a/2KfOZ7m/yH/sR/Yb4efeU/P0CPwfXB+0GKgkrDmYRuBEMEVoQFBDxD5YPVA50C5IHLgQoArsAG/7c+aX2ZfWW9J/zcPIv8S3x6PLy9Gj3MvoD/Lb9wgC7A1kG2wjGCN4GHQfiBzQF2AE9ANH9z/oz+tD52fPL61HtMfdm+pbzXPDY9aH8cwKWBtEFbgXCC0cT3hRME0YTQxObET4Qig9hDaQITwKM/Sv8yfpU9prxlu/C74rwn/Db72PwYfKc9BH48fwVAegD6QX/B7cLYBAcEhMPRgtNC7UMrwnzAzUAcvw1+Ff4vfd17QHk5+lX9MHzOu3R7OPyqfmX/+IDHAWQBwEPrhWRFnEWNRh1FxwUxRKCEicPTwg6AfD8EvtP+PXxp+tl62XuJO6961/rz+1S8nL39PpK/YkAvgUIC50N1Q5cEn8VvBKjDt0ObQ5BCUMDcv5y+eP22PfA8zLlitsV5ln0dfCl5nTpF/Lc+C0BqgatBo4KnRP8GJ8ZYxvqHZcbPRa9FLYVkBGTB//9h/n5+Kf2Z+8u6GDnl+uj7c7qkujn64fxTfZC+3j/qAIpCPENyg+jEUcWhhghFe0Pqg6XD5oKowHJ/cz63PNC8sbybOWA2PziePMA8OTjhOVz8r76jv33AsEHqwo5E2Mb1BkcGdUehR5nFtASyBQBEpYIiP9f+pH3A/UV8Gfpjuf86zjup+uA6hzuS/Qb+vr9vAG5BosLOhAZFDoVoRb8GZ8YchFNDtIPOwvOAEj7NPlv8njsj+1V5w/WI9XQ6mnzeONG3prv3Po2+uIAZAl9C10Trx4oHvEZsB/pJO8ccxKJEyAXrw4NALT4qfeS9dPwl+q35IDkmept7Q7qjumH7kn0WvtwAy4HZggnDbgTKRefGLUb/By0FkwObg7bED8JZ/7G+bHzi+qb6gju2uGv0YfZt+6x7ubgIOaE9lr5e/wHCVMOyxBpGxkgqBsTHtEl7yPbGE0ToRV9Eg0Gdvuq9l7ye+7k6z7mheBl4yDqaOpg56/poPCk96j+xwQgCG4MBBRAGIkXhBl5HjYeihaUDoMN4g4YCL77f/XY8kDs1+jV6ELczc5E2uDvcO4B4HrjsfYrAMH/Pgd4ESYVsxzHI3oeThzrJIoktRcGEdwT+xFLBo36C/XO8bLtQunm5EfjoeaP6onrv+tM7hHzevkKAg8KsgznDKMSshrVHOQcKB+bHT4WAw+pC9QJQQTE+kHztu2G5v3jIed43tPNN9PO6obvVOIW4z3xR/kJ/zcJ8Q8wEmoYcB8THzIf6yVWJcEYNROIGHQWUAnh/aD4o/Xd8hXvQuhC4pLkj+tQ7Prom+tu8qT3gf4mBrMHzweJDgUWAxhSGcAblRrhFbQSjBHeDacFF/yo9nj2JPTX61rmF+RJ2ZHRd+GR9IXtlN+f5P3z1/5MBlELtQwDDroV2B/WIE8emCHqH90UgRJlGaoU3APf+Er3BfcL9brwlOjH4qXnoO/k7l/qPezR8rH6aAPFBvoDdwXyDr0WqRejF+sYBheFEkoRPRL1DnYG8fxJ9yj3UvYw70Xo9uMG2tXUJOXU9A/ph9uy5jj3RfuoALsILwr9DYMXbBoyGUwehyI8HOESiRPhGScW5Qc1/jf9z/s/9x7yCeyU6c3tiu5P6RDq6PAv9EX3Iv7UAJb/HQNBCukPABSXFaUToRDSDxoSgRPNDvEF0/95/Xv74fcd8orsHefS3urbYejD86HrBeH66Kz3Ef3j/6EEhginDjIUdRPHFC4dYCD5GCIRfRATE5EQRAhKAqQAEv1H9+LzdPJc8WPwIe547Eru4/CX8m/24/v3/i8AeQLOBlsMRxAlEWsRuBEAESQR1hE8D54JTAR3ALf9g/r49IXu8OfE4OPffumg79LoAuIy51TyRfnF+xAB4AmiDqsOnRD2FOAZCx3OGC8SORMJFYoO7AfnBusDZv1P+aH3mva59T/xWOuR7L/yC/SU8qf1d/qG/XkArQKlBeILUBA8DwcOhQ/qEIYR6hA0DZoI9gURA3z+G/tU+P7yh+sc5kLopO8r8JPnEORP6zzyDPWI+UH/BQTUB70HVAdlDYMUfBMrDtEMkg6iDSIJKgbtBvMFagB0+4z6ofrP+BX2MfWb9nT3H/aV9jT7XP/1/2cAGQM2BngIPgp3C2AMhQxKC24KagtnC+8HvAPPAUEBOgESAF/6cvPq88D4yPd+83LzjPQT9Jj0K/VZ9ln6ff1w/QX+PQH8A6EDKwG2AeoF8gbxAooArwHFAnUCMQEmAGEA4P+0/TH9AP8U/5b8FPsY/jEDYgM8/3f/QwTHBq0G7gbbBrIGCwdRBl8FcQatBksDUgChATMFtAVSAPn63fxwAUEAGPv4+Fn72v1Y/fb7VvxW/Tf+Wv7S/OX8XQCzABz8z/ou/k7+mfoz+bP6RPvZ+rr6Tvrd+Tv6Xvqt+0n+J/23+ev7AgIRBBsCFQHmAk0GSQcnBUQFXAiKCNoFZwQrBKIE9wTIAkMA+ADCApUCzP8h/Nv97QPfAxv+V/5kAyUEWwL7AlMD7QIrBBoEUgJMA2IF9gO2AJf/sgBeAWv/H/xL+sP5FPnU+ET4cvad9WD2jPU29HD1dvc4+bL8I/95/Zj8ugCRBSQHWAcjCDwIWgciB+AHhQcqBQ8C2v9O/wUApQAwAEf+HvtT+Wv7sf2V/K/7p/3W/hT/UQD/AGIBPwOeBB0EFQSEBb8GsgaEBVgEdwQRBbADkwB4/lL+A/8R/0n9X/v5+yn9qfud+dv5afvc/LP9Q/3P/B/+LgAYAVYB0gF/At0CVgJuAYcBzAFdAO79hPyd/AP+9/8kAOj8y/ms++H/wf9M/N77O/7+/mT+c/41/4cA+AHFAbwAsQHEBNMG3gUWBOMELQdfB4cF7wMLA1sC6wELASb//f0u/lT9hPqg+A75dfrs+xb9Kf0M/Zf+hwGjAycERAQ6BW4GggbDBZ0FsgXFBPEC3gB6/2QAbwIIAZ77kPh5+3T+R/zW+L/4PfqX+hD6vvnl+qr9Yv9a/rn9+P/rAt4D5AIQAi0DnwSdA8ABzAFFAnUBHQDC/rP9zP0M/uz8q/t2+yD7xPoE/CD+yv5g/un+9gDDAhIDEwMMBM0EgAQGBPoDvAMoA2ICQgGiAPgBbAMaAZ38VvzfAEEDKwBQ/ZX+twC/ANn/if98AE8CdwL6/+T+zwGvBFUDUQBJALMCgAN+AV//Mf8GAOH/+f3X+5n7Ff1u/Zz79vk/+ub60/qD+zT9mv3A/CT9zv6q//z/xwB/AXkBZgGoATwCCgNQA0oC1gBVAIcBfwMAAzH/c/1wAH0CeQB5/o/+Nv+T/zv/l/6M/4YBfgHJ/6T/YwFWAwwE2QICAukDzgWxBOYCvgIIAycD5QI4AWD/kP8wAMT+gfzJ++37ofuJ++/75/v5+9D8aP1q/RP+TP8rAGcAJgAqADkBEAJrAXQAOAAwALIAjQFxAJX9//wr/wQArf4o/n7+5v0r/ZP9KP4x/p3+OP/e/t/+QwBxAVsBRwHIAVgCxwLiAqACXgIDAtMB/wG9AQwBRgHZARYBxv9u/0X/Af+W/4EANwBs/23/uf+m//b/+gCuARwBjABXAVwCJQKbASABCwDS/2ABpQFT/w7+O//b/97+9/3Z/fX9Bv6E/av8lvyU/W3+QP6l/SH+p/9aANX/3P+7AEABUgE0AdQAuwA0ATcB2v+d/j3/mABdAOz+CP73/e79RP4l/23/8/5P/0AAXwCLAMcBhQIyAjICAgPBAxEE2gM5A7wCFQMTBB4ENAKYAE8BQALTAAr/DP81/2/+2P1D/Wn8Zfxc/bb9Of1H/TX+uv5U/gj+vP6z/wUAIgBiADYA6/8vACwAtP8MAKQA9f/A/mb+MP7m/Sr+RP6y/Xz9B/67/sb+S/47/gD/pP+2/wAA6ACOAdsBNAI3AhQCvwJMAxoCYQBXAIEB7QFBAd8A6ACgABEAw/+4//H/MwAIAHf/Sv/m/7UAtABIAJAAWAGrAbcBpAFkAX8B+AGoAbEAXQC7ALgA9v8C/4D+t/5m/4T/x/4S/kH+1/7x/pP+hP72/o//AgA7ABkA8/8RAC4AEgANADYAagBLAJX/9f5j/ysAFQCI/33/fv8R/6X+ov7R/iL/bP9W/9L+q/5R/wgA+P+Y/7z/VgDAANkArwB5AHgApADMAN8A7wC/ACIAfP9V/6H/EAB7AFgAov9N/+b/kQDTAAIBAgGNAEYAmQAgAVABJAGsACsABgB8AA8B9gAJABT/Af/K/wsAOv90/oj+3v4C/9f+pP4G/7L/oP8y/37/XgDNAHYA+P8GAIcA5AC7AFsAIgA/AEAA7P+4//v/IACl/+b+p/7l/k//j/90/x//Ev9V/5D/p//O/x4AmgDQALAArgALAYMBmgERAZwA4gCjAdcBJAFeAIkALAE1AZ0AEACy/6z/tv9y/0P/z/9aAPj/Q/94/1sAzwB6AOb/yP8mAG0ASwD6/4T/8P7g/iX/Hv/+/kj/Tf+r/jD+Xv6f/tP+Gv8I/5v+t/4+/3H/kv/h/+L/9/95ALsAkwDUADgB+gCTAK4ABwEqAaAArP9w/y0AgQDx/0f//v77/jP/QP8c/1D/9/9mAI4AxAAzAcUBEwLKAW4BegGXAWUBKwHIAPj/UP9I/x//kf6D/vv+u/4O/hv+nP7c/lr//v/k/57/TAAMAfEA7gCbAc8BYgExAUUBTAFzAVYBmwDv/wAAVQBBAJH/3f7W/iX/1/44/hv+Xf5q/m7+bv5u/uX+uv/n/4f/of9aAOkAFQEOAQwBKgFkAW4BLAHGAI4AcAAbAIj/Rv96/4//G/+F/lv+w/4M/8z+g/62/jL/9P+vANEAswBZAVoCtQKHAnECXQJgAl0CwAHvANAArwDC/+T+4P7m/n7+Bf63/bT9NP5r/uT9wf1+/vn+9/5N/wwAlQDzACABGQE6AZMBpQGGAXQBQAHJAGYAAgCY/1r/JP+J/s/9ev2w/Sn+mP5o/uv9Bv75/vL/TQBOAIEA4ABWAcwBQQKYAmsCDQK1AWYBSAFdAT8BmADi/3f/V/9B/yz/2/5m/iT+Rv5e/lH+dv7y/lH/cv+l/zQA8gCbAe8BCAIoAmsCmwKYAkUCrgEOAcgAqgBGAKv/J/+R/vD9lv2T/bT9yv2g/XP9sP1R/tf+Nv+F/9L/OgDlAGwBfQFOATgBGQHhAMoA5QDmAJwA8/9m/1b/hv99/0L/Af/s/gv/P/9B/zf/gv8AABwAAQBgADABmgFwAT8BegH5AXkCkQIeAoUBXAFOAesAfABgAAgALP9O/tn9u/3o/R3+7f1u/Wz9Hf7y/mz/rf/5/00AcgCSAP0AfQFsAfkAswCWAIIAdgA0AMD/a/9J//7+1f4C/x3/zP5t/kz+cv7P/jv/P/8O/zH/yP9rAOkAFAEAAe0ABgE4AWkBewF6AUABrAAPAPX/QQBXAPT/W//5/jj/yv8SAAEAHAA4AAQA7P9MAJgAoQCvAKYAZQBiAKoAwgCXAHoASQD7/9X/CABVAGYAHQDf/+n/+v/Z/93/3f+D/xz/Fv8k/w//KP9m/03/Cv8L/0P/dv/P/zUAUQAsAEQAgACFAFQAQgBNAEcAGADX/7b/sv+M/1z/WP+F/57/o/+3/+L/CwBiANEACgH4APwACAHXAJwArgCxAG8AMAAHALv/o//V/+n/tv+k/7j/p/9x/2//nv+t/2H/Bv/6/lT/l/+S/4f/xP8OACIAFwBQAKMApgBJANT/oP/U/x8A+/+F/2z/u//w/wMAIgBFAGQAdwCNAKgAvQCmAJAAegBTABsA+//S/5j/ZP9H/wr/yP6+/gX/Sv9c/1H/Xv99/63/2/8MACsAKwAoADEAPABBAEwAewCYAIMAeQBxAHwAiACaALIAmgBMAA8ABQD5/+D/sf9//1b/U/9m/5f/zf/t/+v/BQBTAK8A7QAlAUABKQEFAQcBCwHyAMYAnwBKANr/jf96/2b/S/8t/wP/2v70/jn/XP9u/7X/6//e/9b/FQBXAIEApQCvAKEAtADMALEAeQBaAD0ACgDR/4z/LP/u/tL+pv5u/nD+oP7F/s3+4v4c/5P//v8GAO//HgBzAKsAuwDLAOkACgH4ALYAhwCEAHMAOgDx/73/lf9r/zH/FP8c/zD/Hv/u/tT+8/43/4L/pv/N/xAAWwCaAP0AVwF4AXsBlgG9AcEBmgFDAc8AiQBkABYAo/9u/0n/6f6C/m3+mP7W/vv+7P7V/iP/t/8dAEIAfgDKAPwAHwE4ATkBTQFOAQMBrQCgAJcATQDe/4j/Uv9c/4v/fv84/y3/Zv+H/4D/n//g/xUALgA4AFAAoQAHAUQBVAFdAWQBZgFUASsB/ADXAJoAMwDC/4v/dP8+/93+mv6T/pL+av5w/qT+0P7n/gj/Mf95/9v/HAAhACcAVAB4AH4AkgCoAKgAgwBRACAADQAAAOH/pP9k/yn/Av/s/uH+5/4G/xH/Af8J/0f/jv/A/+L/DwA+AGkAhACjAM0A8QDwAM4AowCLAH4AcABNAB4A8//h/9X/y/+9/83/5//v/+//AAARADEAWwB1AG4AdwCYAL8A1gDjANYAtQCkAJ4AjwByAFAAKQDy/8D/qf+1/77/p/95/1L/Rf9j/4X/h/9w/2f/av95/5b/zf8CAB0AGQAZADAAYgCVALAApgCFAG8AewB1AFoAKwAPAO//yf+x/7b/yP/O/83/wv+r/8L/6/8EAC8AFAAAAPj/CQAjADYANgAZAAAA7//5/w4AJAAoABkAAwDt/+L/6P/r/9//u/+a/4H/fv98/4P/mP+r/6n/k/9//4H/pf/U/9//y/+4/7f/yP/g/+z/6v/l/9v/3f/w/wYAHQAgABAABQAPAC0ATABWAEUAKAAtAFMAfgCNAH0AXABSAGEAagBfAFMAQAAWAOr/1P/d/+7/8//j/9P/2//x////AwAPABsAIQAdACAAJAAxACcABwD9/xgAMgAjAP3/3f/b//X/CQADAOj/1P/W/+v//v8RABcACQACAAwADwAJAAUACAAJAPn/3P/N/8f/xP+6/7P/rP+r/67/uv++/8P/0P/j//T/BAATACQAMgBEAFIAVwBeAGwAcwBlAE8ASwBKAEAALAAZAAkA/v/s/9P/v/+//7j/n/+E/4P/j/+O/4b/g/+L/5f/pP+q/7T/xv/O/9H/zP/U/+P/6f/k/97/3P/j//L/9f/t/+7/8P/7/wYACwAMABEAHwAlAB4AJAAvADgAOgA5ADYAOgBAAD4ANQAyADMAJAARABUAHgAlABkAAQDz/+//8P/4//3//f8AAPr/6//r//v/DwAHAPf/6f/m//D/8v/r//T/AQD3/+P/2//f/+r/8P/u/+X/4//q/+3/6P/3/wYA+//u/+//7v/v//n/AAD9//j/6P/r//r/CgAOABUAFwANAAsAGQAgACcAMQApABQABgADAP3//f/4/+L/0v/V/8//zP/V/9j/2P/e/+T/5f/w//v/DwAVAA4ABgAEAAQACgANAAcA8//j/9r/2P/b/+T/1v/C/7r/x//O/9D/1P/X/93/2v/q//X/+f8LABAABgATAAsADAALABMADwADAPL/5v/n/+X/6v/m/97/1v/Q/8v/1f/p//T/7f/n/+//+/8KABgAHwAnADUAOwA7AEIATQBYAFcATAA4ACwAKQAgABEAAADq/8v/u/+s/6n/r/+w/6z/pv+k/63/wP/U/+T/8f///wkACgAMABMAJgA1ADAAGgAGAAAAAgAGAAgABQABAPj/6//m//L/BAAOAAYA9//z////EgAjAC8ALgAgAA8ACwAQABoAHQAPAPH/1f/C/8P/y//N/8f/uf+q/53/o/+7/9D/2P/T/8P/wf/Y/+7/AAAJAAcABQAFABEAJAA5AEIAOwAwACoALAAxADgANQArABoABgABAAIA///1/+P/2P/J/7z/tf+1/7r/vP+5/7j/vP/J/97/7v/8/wAAAQAEAAoAFAAfACIAHgAYAA8ABwAEAAQADQAMAAcA+v/y//b/+f8GAAoADAAMAAAA9v/5/wwAHQAoACEAEgAMABIAHwAkAB0AGQABAOz/8P/x////BQD8/+v/3//i//b/CwASAAsA+//x/+3/8f/9/wQABgD5/+j/5v/r//z/AwD9//j/9v/7/wQABwAHAAUA/v/2//L/7f/t/+j/5f/g/+D/5P/n/+7/+f8CAAQAAwAHABMAGgAfABwAEwAKAP//+/8DAAEA9v/q/+P/6P/t/+T/4f/a/9z/5P/g/+T/2f/V/9T/0P/b/+3/+P/7//X/+f/5//3/AgAHAAAA//8IAAUABgAHAAoAGAAbABoAJAAbAB0AIQAfABsAEAAIAAsADgAcABYAAwAFAAAACQALABEAHwAcABgAEQAGAA0AEAASAPT/5//n/+P/1f/K/9D/zP/S/9f/2P/a/+D/8v/l/7L/oP+x/+f/CwAGAA0ADgALABIAHwBGAGAATwBDADQAMgA9ABgAJQBQADIAUAA5AAkAEwADABsADQAVABIA0//c/97/1f/2/xIAHwAiABUADADl/wAAHQAOAFUAXACbAJoA/P9o/0j/bADhAA4AY/85/yv/8v52/ln+wf5p/9b/j/8e/yH/v/5H/m/+iP4r/7b/WP/M/rH+RP/T/6D/dgCpAZkBFwF4ANgABQEiAW4BgAG2AYgBGwG9AB0BEQEfAWkBKAHFAFQAoACuAIwA1gCdABkA7P+u/8//xP9Y/0//y/6I/n/+7v3J/Z79sv30/Un+FP8PAGcBbgK0A/AFBweVB/4I4QjHCNsITgdmBTIDxgE2AG/+y/1S/E/6xfjy9+73n/cK+MT42Pjt+aD6PvrT+nn7//u2+876I/sY+yX6H/gK9oD0hPEe7zbuvuwC7374OgOzCNgIewjIClEQ0Ba/G2sgwyWXJvYc4A+rCAAFmQE7/uz5W/Uc8KHoQeCo3f7kP/Bx+Hz9GQIkBzkM7RCSFgAeSCNQIjcclBQTDvQIpQS5AKL87/dJ8UTpKOTB5Czodet477DzZ/Wl9CT0APUN9yn6EfwZ/JH8Ifza95zzrfOF9aD49QItEJUT9g7dCzoNsBDcEpsTDBVlFXYPkQPZ93DxdfAU8Z7vNu6P7yvw3O2B7Enx2/uuBroOGRXqGnUfByICI08j5iKDHy4XnwtYAcD6jPVj7+noIOTT4d3h4+Og6GHw5fiM/+YD8AZXCiMPJBQrFncUOxGbDJkF6f2d94/yN+7T6UXjnNvz1bbT69NM1tjaBeJk6x72LgW1GKslTifDKaAwEjIKLXwoxSPjGnANUvwJ7Gbj5+Cl24TUSdSW2nXfi+FO5lbybwT7FCkfBSghMyc6RDk9NYUw1yjSHeoQQQMu+HHxSOsc4+3cMdsk3Ojfk+et8db7FQQ4CWEMdQ9PEo8UixUdFPwPUAkmAM/27vB/7jns+ecq4ineRd0R3RHdCuG45xTs9fDL+B79IAFwEa0nRDA6KRgiDiFBH1kYQxEcDv0Iw/su6ULa9dWr2iTfueA757Ty7ff19av58AcwGFMjxCiXK+gtSyvtIS8ZlhSeDpAEv/qL86vv+u/s8Y7xl/D48if3sfrp/0YImg8OEmIQowzaB4IDzwH8AVEBe/6/+dnyh+vg56PowOqq7STxN/Fj7Tvqr+hP6TvupPPc81rzZPao9HTuSvYVD4MhySCLGQgXixSuDD4GHAm5D2wNGQC98bnp4Oe86PTpV/Br/WwFVwJ1/g0CJQpZEdcW2xzhItkjwh1VFg0T+BHoDW0HYgPqAhAENwQpATv8Ivn89pvzCfJ89kb/RgZlCPMGDANY/oT8+/1D/5n/+/60+zb2n/Hy72Dyb/f7+n/7xvkQ9Qjud+iR5GzgmOD05cTnrOYU7enyQ+/R9G4OjCTnJaEfpR/nIIoazQ/mCtcNGQ2g/jnrLeJx4mbi3+Hn5sPyeP20/8z+KgWLEVobrCCxJqsu0TD9KcIhZxxRFvIMfgK++X30nvLL8Y7vPey36knsMO//8ov6twXWDioSHxIaEbQORwz8CloJlwanAkH7n/C3503koeXq6NnrM+4a8DrwEu7Z687rnewk7CDtmPF99NX0QPkL/kn6hfinCHMc+RznE6IUMxf7DosEqQJTBsIGwP6j8vHr1exS76rwIvXn/08KDQwVCQwM/RTkGtsbfB8MJ5Ao1SAKGZ8VaRAXBo782fdx9dnzYfRM9Ur0M/PG89X08vYa/QAGmAutDHQM3QqgB+cFmAWiAycAJP0t+f/xTOp+5xzoTeYo4zzjdOXh5gbpiu3h8pb2K/ci9Vzzr/SF9gP1d/SF9/32oPSD/oISvhxIGfwYix+tHmoU4QtXCBwHVANk+f7vbe747+XtIe1I9aMC5gtlEHYWZSCpKG8qEykOKQMnYR8mFr0OJwbN+k3we+na5ZfmUuwB8z33Avv8/t8AmwJmB/cM5g5BDaEKigc5BHQCRAIdATX+1/mn8wvsR+VO4dTfJ98U3yrg9uEa5fDqLvIh+Kv7m/x7+9j4jPVE9Pj07vQT9pz4Qfdz9nIEBx3+KXsnTCQ2I2EbYguQ+2704fRA9M/vCe7+8TD4e/0iA+oMZhrXJMkouigSJz0knh4XFtQOvgpyB0QD+/1c+BD0qfAA7vjui/Uo/2oHYwvdC4YKZQe2A08BVQDL//r9wfo8+Hr2UPSE81P1Gve19dfxgO5Z7Avp9+SO45vluOi16/DvCPYb/HUAHQIcAF/79/T67bfpEene6Y7s2vHE9KLzVPoOEdIoZDBwLLQqhykQHS0IXvqK+Jf4dPMk7i7vFvad/SADfQkUFF0g9icGKdYmXCSvIHAa+hOqDmkIuAG0/Qz6yfLr62zr7u9T9V/8IgabDa4ObAsSBn3/R/rN97f2bPal95j5yPkB+Ij3hfnX+k35i/aC81Hvjum641TgIeDK4nron++59bj7QwIqBgwFsQC/+hfzr+ye6qzqOOqO7C3x1u9g7Mf3rhP9KW0tlyv8Luwr0htyC1sEzQItAK/7/fga+wYB1AUPCEYNkhewH9QgtB65HCYZkxLRC7cHmATX/yb7P/ig9V/yR/D38Q/4tABQCZQP4hHAD1oKLQMN/Fv2KfJw70ru/e1Q7tTvT/IR9Vb4Vfvs+3D5s/X68UzuHeu36fTqx+1B8Qj1CPh2+pn9bP8s/QH5Q/Vj8ETqkuf76N/oruhr7aLwTPDx/OMb+DKaMn8tYDEwMDshORNLEMQQigwWBYr+Kfp7+KH4APqq/tUHbhKkGaIbIBs+GzQanhU3EIsLkAVU/2X7rffG8Vztlu558wP45vxOBIcKBAsWB5wBJvtr9ZTy0/Gs8a3y0/TV9Y70VfNK9LP1WvWf9Gb03vII8O3tGO1F7druLfKu9tn5svpl/In/kf8J+4T1uPCM633mo+X66ZTvifXd+kX7C/wUC0EkGzKqMLAw2DRVMI4iDhYND/gJ+gPy/UH5ufVi9Hf2Hfs7AWEJpBMoHAgfrx3NGmgW/xDPC+MFIv9E+sb34PMH7Y3oNusQ8Xr1l/q9AX4G8AWtAWH8+fcl9afzgfNG9JP1xvbk9c7yqvC58T70aPVe9U31kvSH8ibwbe8X8fPygPPU9HT3Jfkz+l/72vov+D71SfNX8q/xjvHq82D4U/xk/kAABwgsGlgsQDEzLrMtKCtYIG0TnAoHB6AGLAV0Aa/+/P54AFYAVAD8BM0N/BVmGlsaOxc1E0AOlgd6ALr69fd/90/1S/CG7E/s8e3k7wzzovgB/6kCkwJEAED9dPo5+G/2V/XW9LTzefH37rDsH+x574/0Evds9xH49feQ9aHyLPLO9IT4h/uV/dP+Qf9x/s/7S/iH9Y3zkvGs8Eryc/RZ9iL5Cvrt+XMENBvKK2wtAiyjLVEqIx6hEUENZA5VDoQLcgf7Akn+afkz9oT3rv73CegTkhc/FoITdQ4WByYCDgGhAHoAaQGZ/6j3PO5F6vnq8utf7sD01fsoAF8BkP6c+Iv0/PNz8wnzrPXg+Rr7lfiE9UD0uPPo8qTyEPNn9J/2rPcM9/D2mviM+2z+2f+NANEBiQK9AL/7+PSS73ntZO6O8BHysfQW+Sv5s/UP/HcQXSToLOUvvTIpMFMlLBluEdsNxgzBC8MHagHC+8H2N/F27knzS/6UCW8RDhVHFZUTzA++CnsHbwY1Bm0G0gS8/vX1gu6y6X3moeVW6bnwTPfi+kL8nftY+Tb3CvbT9d/3Ivwl/2r+2/sg+sj4y/Xm8Ynwn/KK9QD34ffU+bv79Pt3+9/7Z/2V/80Avf9o/Uj6QfV077nseO7v8RD1G/ml/Ez9twJeEhUgxiMVJpQqSiohI54a8hQcEn8PPQu5BTL/t/iJ81jvsO0R8rH7JAYGDl8SzRN3E8wRow+ADcYKQggxB+sET/6U9ejvmu3k6p3nOOjV7WnzKPbC9+r4N/nn+cn6xvoS+2v9ZgAyAdP/Zf5S/Uj6D/W+8Djv1u8i8sr1Xfri/mIBNwFg/wL9FPw3/R/+xv2o/Jz6kPe281fwHO9G8PLz6Peo+Yn+TgxeGwAimiOXJi0obiTzHRgYJBTKEXQO+AdHAGr6i/Vz7x7r6e2j9rP/gAa5C9EPHhJIEjoRfRBMD7oMgwqICD0DPvuW9Fvwru1S7DnsJe6J8UP0efU19XX0ufUU+Zr7av29ACgEQAQeARL+XPwh+uL2GPXi9SH3rfYn9WH1iffB+A355vr8/RYBPgN9Alz/3vuI9+fytPD18GLyRfVc+Gn4OffV/EEJzhJCF3Ic9SKLJg4l1iBoHesZ/hTMDyUKOQOD/bb5EPWZ8GDwU/Sh+JX7cP9WBR0KxwvaDUsRThFKDjgOLg9eCikDXADl/gD6IfX884XzQ/EP72PuS+7a7m3x5vQI90/5L/0b/379Zv0CAXoDygJ4AVUAC/4U+i72ifTH9Gj16/aC+aX7M/zL+8f7xfuu+d/16PKR8RDxa/He8qz1ePka+7j5ePuBBDIPhhU0GZIdsyFrIr8fnxzbGpsZQheiEioMbwUL/4v4Q/JO7i7v2PMA+Kv61f4DBDoHGAmMC64NFg98EcATkxFUCxEG/wIc/1D60PYu9dHzk/Fb707uRu6g7/Dx0/Ll8kf11fgH+rr5gvun/3IC8wEFAHL+VP3z+2j6Xfp4/AP/YwCi/0z9Lvt5+jb7qvs++af0bPA87bDrluwV76rzdvqs/gn+Mv+gBvoORRMVFpQapx4qH9Qd3hxUG34YWhW7EYUM+AUEAHb7Qvd+9Fz1QfiR+sP8lv9pAWICfwR6BzoJ6Qk5C3YM7QqQBsYC7ADG/sP7Rfqg+ur5Cvf08xfyEfGq8NXwNfGj8eny6/SI9Y/0vPVI+g3+4/40/4EA8QBK/0T9pPzb/Nr8/PyF/ez98v38/aT95/uB+Az01O+j7aftAu/O8cv2E/z0/dL9ZAFSCJIMCw8GFN0YQxqfGrkbxxsoGnkYVBfXFEYQYwunB20EBgE6/h79KP2d/GX7JfuH+6n7LPzI/BD8f/v7/Ib+/f1Z/e7+dAH4AaEASgDWAF0ARv+v/jT+EP47/rn86fh79U/0c/MD8dbuqO+P8rT0xvXw9vP3u/gG+rj7J/2R/kQAlgGMAVkAmv8NAHgAdv8B/dP5g/ZO9LTz8vP+9I73jvrU+8j7eP1YAZwEXgeSC9sPxBKhFYUYdhmKGLMXfBf0FUcSIA5NC6AJ/Qb+AwcDNAPdAc7/+f6B/qH9Av0f/FD6cPll+tv6Ufnx97D4Evph+sD6gPy//hcAVgDY/1v/Of8I/wD+GPwB+jz4Efbw8k/wCfCc8SzzG/T19N71h/Yp97P4T/vb/en/BwKpAxgEQQRMBWsG0gUGA4//5fzd+in55fi7+jX9K/5K/a/8xP3p/oD/hgE6BTsIkQq5DWgQPxHKEbQSYhJ+EIsOxwzACr4IDAdnBhQH1AemBxMHdQZDBagD0gFG/7/8Bfw5/OH6evhq94X39/a49QH1V/XB9r34KPrc+ib8YP4KAOj/1v7r/dD8evpb9031NPXg9Sv2SfbY9r33Xfg++D74tfm9+5/8Qv0K/zMB7gKVBLsFZQWYA08BSv/7/cf9Wf9OAnEELwTJAhsCzwGwAN3/+wBPA2cFqQc3CvEL6wwUDvoOpg5jDVEMbAvdCasH+AW1BcMF3wTRA38DMANXAmIBBQD7/Xv8Wfy/+4f5wffu92744fcg9+r25PZA9yX47viT+Sb7gv3w/vL+6/4b/2f+WvwK+uX4O/nL+ZX5OPmP+UH6mvpX+hL6qPqr+yn8svwX/iAAXQJGBPgEZAQmA8gBxQBoALUAPQLIBG4G+AXYBIMEiQQzBCAE5ATKBVgGLwfsB8UHeQfTB/cHXgeNBiYGhAU8BBsD4QKqA9gEwAU3BpoGxgbgBdQDMAHc/rf9QP03/Kv61/nO+U354fd49tH15PVl9v/2jfdF+KL5Yfud/O78Mf0N/rz+HP7B/E78AP1o/dj8QPxX/KT8zPz5/E794P3E/oT/kv9d//r/uAF9A/8DcQN7AmIBVQC5/+//fgERBAUGIQY9BcAEhATdAzYDLQNPAxsDMAOGAzsDWALyASMC/gF+AW8BygHKAcsBfQLdA/4EbwWyBaEFxQReAxQCzACu/2P/ov83/w7+DP08/P36ufn9+GX45vc8+C/5hvk9+bb5FPsX/H38bf07/5oAlgDd/6v/BgDw/x7/Lv66/Z79iP15/e79A/8oAPQA0AHbArkDWwT4BEIFuQRLA3QBtv+Y/mT+K/+lAFICUgNBA+UCtwJ8AmYC0AIoA7gCAQJ9AbsAqP8C/wv/d/8OAK0AAgE3AXQBbAGXAWoCZwPWA7IDIwPqAXEATv9V/nr9f/1F/nL+nf3H/C38b/uC+rX5Wfmu+Xn6/Prq+vv6yPv1/NH9WP4F/xsA6QDGADAAFgBMACoAsf9X/z//bv/X/28ARQFvApYDVASkBO4ERwVkBdkErwMdAisASP5M/Un94v0X//cAtQI0A8MCsgJIA6wDoQOkA3kDnwJbASMA0v5x/X78T/x7/PP81f3t/ub/egC7AP8AbQFhAa4A/f+Y/xP/Xv7o/XP9Jf22/br+8v6F/mv+kP4Y/i/9jvxZ/FX8fPy4/Nn8Av2N/Tv+hv6g/kT/WQAXAWoB6QGOAuUCwgJJArEBYAFkAYEBogEPArcCQgOEA7YD+AM3BFIELgR8AzYCvQB+/5f+A/7//d7+IgDxABABKQGhASUCcQK4Av4ChAJmAV8AX/8M/r/8I/z7+777wfs5/Kz80vxK/Sb+1f4a/07/b/8E/1T+9/3N/WX9K/2l/XP+7v4A/9j+mP5Y/if+9v0B/mn+9P5O/13/Ov8O/xn/dv/B//b/eQBYAfwBKQIyAmwCvwLsAs8CiAI+AjACNQINAtkB5gEUAiwCUQKyAhgDXQNgA/8CEQK+AHH/f/4v/tH+EwAOATMB5ADHAM8AvQC5APcAGQGvAOr/HP89/g794ftR+337sfvU+378oP1o/sr+cf9KAMYA9AAOAaAAev9q/sP98/wN/Av86/yZ/dX9Tv7n/i//cf/2/2kAtgA1AbYBqwE/AfoA3QCQAEkATAB1AJAA7QCYASMCZwLEAiMD9gJDAqUBUwE0ATEBZwHHASkCXwJgAjkCBALeAdwBsQEOAQsAFf9G/p39a/3n/Zv+4f69/rz+Ef9S/3H/v/8bABQAqP88/6v+2P0Q/ZD8P/wa/CX8gPwn/er9nP5E/wwAugD7AOQArQAuAGL/xv58/jn+Df5t/iD/g/+X/9//NgBhAHcAjgCmAO0AXQGdAYkBcwGEAY8BZwFRAXIBmgGcAZEBhAGMAbAB4wH3AfAB8AHiAaUBegGKAcgB/wEXAhkCGAIPAuwBkwEEAUQAZf+I/tz9Zf00/YD9Sf4A/zz/Pf9m/6D/tv+w/6L/h/9F/+H+aP7S/UH92fyX/Fn8TfyE/PT8h/0n/rz+Sv/a/0IAZQBsAGwAWAAqAAgA6P/J/8j//P8+AFgAWwBuAIIAdwBiAIgA4gBAAYMBrQG9Ab0BwwHrARgCPAJUAk8CDAKiAUMBFwEBAf8AEAEyAToBIQHxANkA1ADbAOQA6QDvAOUAxgB/AAEAg//+/l7+3/2A/X/9z/01/l7+R/4o/jH+Sv5M/lP+gP7A/uf+6/7H/oX+NP7r/bD9hf13/Zv96v1S/rH+D/9z/+D/WADOACoBYQFkATYB3AB/AD0AIgAlADwAVgBvAIEAkgC6AB8BpQEzAqgC8gIJA/oC4ALDAqYCkwKAAlYC9gGCARwB1ACmAJEAngDYACUBUQE9AfoApABHAOL/gP9J/1T/dP9n/w3/nv46/tf9gf1f/Zz9If6U/q/+fP49/hn+FP4m/k/+kP7Q/vX+/f7w/s7+q/6B/lL+Lv4f/iL+Mv5W/ov+y/4Z/3v/9f91AOcAMAFBAR0B7gDeAOUA7QDjANgAxQCtAIwAdwCBALwAFwFjAYkBiwGFAYkBnwHKAfABCgIKAuQBiwElAdEArwDBAOsAEAEaARYBCQHhAJQAOwACAPX/AQAIAPn/yv+C/z//A//R/qj+kv6J/or+jf58/m3+bv5t/nr+nP7O/gf/SP9w/3D/Tv8j//X+0/67/qj+hv5Z/iT+7/3b/fD9Pf64/lX/CwC0ADUBhQGgAZkBdAFAAQUB2gDZAM4ArQCLAFQACgDn//X/8//w/wIA+v/M/7T/zv8VAJgAQwHUARoCJQIBAp8BJAHVAMcA0wD9ADABSAFAASUB8gCuAHwAbQBdAEYASABMADsADwDL/2v/9/59/g7+sv18/XP9gf2p/fD9Sf6r/gz/Z/+1//7/OABaAFsAQwAFAKn/Uf8K/9/+4f76/iD/Rf91/7L/9/9MAKoA/wA3AT0BFQHYAI0APwAKAPj/AwANABMAHQAjABgABAD0/9v/tP+D/27/Zf9z/6P/5P8iAFcAhACFAHoAeQB4AHYAbgBPABoAxv+D/1P/OP9A/27/lv+5/+X//f8RAEcAjQDFAPIABQH6ANMArwCRAHkAdACEAIwAkwCTAJkAkwCBAG8AVQAzACYAGwAVABYAFgAGAOH/tP+M/3P/ff+z//7/TwCOAJ8AhwBIAOj/h/9J/yf/G/8n/1T/iP+m/6X/kv9o/xz/sv5Q/tX9Tf3E/DX8mvv2+mj6+/my+aP51/lq+k37Xvyb/QD/TAB3AZYChwMuBNoEqAU2BpsGHgduB2IHWAeHB5sHqQfKB+gHnwcFB1sGagUVBM0CwgGSAET/Nv41/fX71Poi+oX5/fgH+aj5ivq8+2b9Pv/7ANgCpgQDBssGMAcZBz0GwwQVAwkBqv6B/Jn6vvgs9/v17vQk9JDz5vJ58pryyPLS8pfzsfQ59Sj2SPh5+qn8RQCaBO0H4ApIDusQIRLsEn8TERPKEUMQTQ6bC6II1wUNAyEAmv2x+9v5UPii9133B/c/9xj48fjn+Wv7E/2A/ksAeAJJBPcFvwdSCZIKzAurDA8NNA3vDPMLZgpOCLMFzQKl/0n8Hfkr9lzz+fAz7+bt/+yc7LPs++xp7R3uFe8X8BDxSvLq87z1xvdg+oH97ADsBHIJlA0REaMUJhhkGk4buxsZG6sYWhWcEaEMHQdPAqj9x/jg9CDyuu8c7sbtFO7I7orwDPOi9ZD4pvuB/mYBewQ4B7oJOgx8DnQQAhKjEpYSVxKGEdwP5w2+C/cIzAWNAiH/fPvI92L0ifHi7kLsSOr96K/nveYC57/nMOho6cTrq+0X727xd/RM93v6u/6iA5cImA3NEoIXCxsUHpYgJiGzH20dDBrnFMgOcwi5ASX7nvXS8GPsWuk36NXnFOjc6ebsRfBL9Av5q/0HAp4GDAumDswRmxR3Fm8XGRgYGCUXlRXQE5cRvA6bC2wI/AQ6AXL93flD9ozyQu+w7Dfq8ueW5gPmleW15bPmf+cX6LTp5utI7fTu5PLr98v76/+gBssN0xJpF84c3CBrIgsjoyKwHycbSBY2EJgI+wAb+ofzau1x6NDkq+Ls4YTin+RB6KPsg/Ew9wT9FAKgBkELaQ+YElwVBhgPGv8aTBs9Gy8aJhjKFeMSDg/1CuQGQQKF/Xz53vWC8sTveO2Z633qlOli6K7neues5s/lFeaI5lDmq+cS7GTx/vX0+14ElQzyEqoYSh4eIoEjySONInweRRkvFL4N0gXD/gT5h/Pv7svrMuls5xvnsOes6JzqVe168GD0ZfgF/EQAHgUSCcAMCxGtFCUXbRn1GgEbaRpOGQwXLxRnETQOlwpABy4EJQFE/kT7Bfj69ADyqe6E68jo6eU543PhDuDP3jffBOLl5bzqwPGE+msD3wvoE/Ya1SArJc4mxiVBIx8f9RjFEU8KeQIX+0X1pPCi7J3pI+jz50nozOgt6qbsVO/A8Xz0sffx+kr+tQH/BEMIywswD/0RJRS8FaMWhBZfFX8TYBH3DhEMJgnTBrEEQAL7/yz+DPxV+aT2CPTH8ATtw+nF5kbjkOBq4FHi0OTi6OXvkPjYAM0IWxFzGcMfiCSgJzoooSaQI4MedBeVD8gHJQDn+Gny9+zl6Evmy+Rg5ErlM+eC6SjsFu8G8hD1dPgX/Lr/YQM+BzULzQ6nEdITdBUkFnoVCRQ2EsoPwgy8CfcGWQTsAez/DP7n+6T5qfe29QrzoO+c7Gvqhefy44zix+M+5R3nCOxV85j6HgJLCtARcRjGHpEjkiXjJV8l+yJOHnMYKxK1C34Fcv9b+Rb0K/At7anqEumH6LroXel26t/rbu1z72by+vWg+bL9gAJuB8ILlw/ZEucU2BXbFZcUDhLtDoAL8geuBMAB9v7F/Ev7v/n495T2IvXg8lrw2e156qTmZeQq5D3k1eRk6Ezvsvac/aMFVA4EFpIcByJVJUcm+CULJIcfdhl0E3UNRgeDAYb8KfjD9DPyPvAK76vutu7m7jfvyO+v8N7xgfPh9bf41Puk//EDBgi5Cx0PuhFHExAU1BNKEsUPwwxXCXcFhgHK/V36YffU9MnyCPE474HtX+w26zLpUucd5+vncOi/6Z3t8vIh+EP+yQUhDaUT5hnqHqchzCK2IqMgYRw+FxoSvQwkBwQCdv1a+Qz25fNI8vjwhvAB8ZDx9/G58v7zpvXS92v6JP0YAKwDkwfZCl0Nnw+qEfwSDhMiEr4Qww4QDNcIHgUKARL9VPl09bDxme7z603pSucR5nnk++Kt4wLm5edb6snvo/YP/HcBSgg3Dm8SzRaQGvcbHhwKHHka1BanEnAOvgnxBGwA6vsC+Gf1uPOU8mXyHvNE9KD1Lfeo+Bj61vvf/bn/aAFsA9YFQwhfCkgMTQ45EKoRUxJREt8RqhBODjgL4AfyA2D/1fqS9oDyqu4y61XoSeaC5NzikeIW5AnmT+h17HnynPiE/vcEUgvUEIkVHBnSGgQbWhpoGP4UuBAzDKkHTQMa/wP7LPcM9NjxdvCc75Hvs/Cw8sj0/faz+cz80v9rAqQEjwZ5CGgK8gvcDG4NDA6hDsQORg6gDf8MwAudCTIHjARQAcn9mvqK9yH0pfDE7Xjr8Og25orkqeTs5QPoxuuP8Y/41//cBoANlBOEGLUb8xxjHGYaWxcKE5wNEwgiA2T+5PkJ9rTyBPC+7mvuWu4474fxS/QQ9x36P/09AEED/QWtB8wIVAryC6YMrgzyDFcNSQ3fDDwMLAvBCSQI5AXCApb/ufyW+UL2mvNc8QHvB+2L65fpk+ca5xLobens643wpPaE/f0EUQz1EiAZRB4lIYwhSCCoHUsZfBMIDYkGPgCc+rT1SPFs7fDq0umG6Q7q4+sL7/7y6fan+rX++wKjBkYJOwu7DBwObw/1D3YP+A7WDh0ObQxPCjEI9QVMA/n/a/wm+TD2bvME8d3uGO3R6wjrgeqn6WnoNOjC6aPrfu1u8Zn3C/64BDQMWxO4Gc4fFiTjJNcjGSL5HRgXmg90CEoBnPoQ9Ujwneye6hLqRuov6zvtbvD/84z3Efuz/mICzQWbCMgK1QzVDmgQYhH3EUkSRxLFEVsQCQ5UC0kIVQS+/zn7APf28mfvm+zd6vHpTukS6Yvp5Omc6czp5+oa7MPt7vA19fD56P/2BqcNyxPMGY8e/SB4IYMgzR1MGWIT0gxrBnMAvPq09bDxqu617B7sbewp7cDuufH49Jj3Wvr0/ZwBlQRNBycK9Qx9D4wR9xL5E6cUrRS5E8kRZw+eDA8JoQQQAKP7Gve/8ijvLuzx6dzoeuhN6O3oF+qS6hHrDu2C70zx+fNt+B/9tQE9B/cMzhFUFkUaIBz8G+0aYBjtE6cOXgnXA6f+d/ry9tbz0PGo8Pbv8e+A8P7wDfIW9CL2K/g+++f+IAKEBW8J4AynD2MShRSBFfoV+RXSFJkS3w/CDCIJ5gRrAEL8d/i29JDxdO/c7avsrexz7a7trO2+7prw2fFl8tnzgfbj+AL7Gf52AXgENQgEDO8NCg/iELcRNRBiDtQMNgr7BkEEXAEV/nb7d/lw97r1oPTv86nz+/OZ9Hz1Dvdj+ef7iP6MAdIE/gfXCkgNVA/xEAkSVBLcEdAQRQ8vDaYKpgdiBBkBtv1s+v33V/bB9MrzevS79Qr2dvZu+HX60PqE+iP79Pu++xX7HPue+xP8vfyd/Vn+YP/CALMBCwJwAtcC5gLaApYCogGMAOD/CP/Q/Qr9sfw9/N/74/sE/Fn8/Pyz/VX+LP9sAMYB0ALnA5AFOgf/B5sIoQkrCuoJhwnZCIoHJQbABNwCBwHW/8n+qv1q/cj9pv1u/S7+N/+g/8//dAAlATAB1QC5AI8A7v87/6n+0v36/Kj8Yfyn+yz7Xfug+7z7I/yI/Fr8APz/+8/7Pfvw+hb7HPse+3/7D/yv/K392f6l/1UAZAF4AgcDUAPUA1sEwAQSBVwFdgV5BWYFAwUzBDMDBAKmAI7/zv78/Xj96P2c/r/+Jf+FAO0BeQIgA08EJwWDBekF+QWBBU4FUgWZBHgDuALqAYgAK/8k/hT9Hvyt+4j7LfvE+qD6gPpd+m76dfpB+l/65/pU+6X7bPx5/Vr+P/8ZAHYAqAD0AMoAMQD5/9v/av8q/1X/YP9o/63/2f+v/4b/bf89//v+tP6W/q7+qP5v/nL+5f5x//T/mAClAe4CGwQ0BWoGggdlCCAJbQkpCacI4QebBgcFeAPzAZUAjf+p/rH97fx+/ED8//vC+5v7yPsv/Kn8Mv0N/jX/ZABoATICtAITA0oDDAM2AkIBgwBz/wr+zPzQ++L6IPqZ+Rr5vfjd+Fv51fll+kD7TfxG/Qv+k/4L/4f/xf+m/4v/fv+F/9D/QQClAGEBgAJlA+wDdQTZBNkEsQR+BPkDYgM5AwcDbwIFAvIBoQEhAeQAqgAyANH/y//U/8j/8P+LAFAB/wHVAusDxwRmBQ4GXAYeBrYFFQX6A4YC/QA9/2b9yfso+o34ifcV97/2Dvc0+ED5J/qb+xz9s/3r/WT+iP4M/rn9vv1+/Sz9lP1f/sH+Mf8kAO0AKQFtAbwBfgHxAIAA1P/9/nf+Nf79/Qz+bf6z/gn/rf8wAE0AmgBAAagB5gGNAnUDEwSVBH0FggY2B9QHpwhOCXgJQgnNCO4HkAbTBPkCIgE7/1z9/PtK+9D6j/r5+sz7bPz9/Nf9Yv5T/jn+Qv7o/Wb9U/2E/Z392v1x/uf+9/7z/t7+W/6B/cb8Jfxr+9T6q/qt+sP6IPvD+0n8zfx+/SL+jf73/m7/z/8eAHQAzAAYAWoB0QE3AqICHAOZAwkEhQT8BEUFYAVvBUgFzQQyBIIDowLHATQBygBbADwAfAC2AK8A2gA4AVQBTgGZAeYB5QH3AWICnAJ6Am8CcQIQAl4BwAAHANn+iP16/Gr7Lfo4+cb4hPhO+If4K/nf+YH6O/sH/Lf8Tv3g/Wv+7v5W/6z/9P8uAFMAgwDLACQBdgHiAXcCBANLA2oDbgMuA50C7QE9AYUA4/94/0P/QP9v/93/XwDoAG4B8gFvAukCSwOYA/IDXQS1BOoEHQU2BQkFpQQpBGoDVwJTAYIAqf+7/jH+Dv7l/Zj9f/1//VT98vyi/GX8+/t6+0v7WPtU+1v7t/sr/Ij84fxf/fr9a/7U/mn/FwCPAOgAWQGoAZQBUQEJAXsAt/8T/5j+Hv7J/dL9Gf5d/rf+Iv+C/+D/WQDTAEgBuwEtApYCAQNeA54D2wMdBC8E+wOkAzUDnALrATcBkQD7/5//hv+W/7T/9f9OAJMArwCoAIUAQQDq/6D/X/8j//v+8P7+/hP/Kf9b/6P/3/8LAEUAfgCnALwApQBVAMj/IP9d/ov9yvww/M37o/u1+/f7Z/z4/Jz9Pf7x/qz/TwDYAGIB2QEaAkgCfwKfAowCVQIAAncBvAAbAJH/C//D/sL+3P72/jT/h//K/xIAaQC+APkAMwGBAcYB/wEwAmUCiwKMAnoCYQIjAtcBoQF2AUUBNAFGAVABNQEGAbcAKABn/6X+7P0x/aT8Z/xx/LP8K/3c/Zn+M/+7/zAAcACIAKAApwCMAHwAmQCwAMUA7AD4AL8ATgCw/9v+2/3W/PX7J/uI+kL6Wfq9+lb7JPwh/TD+MP8mAB0B+wG5AlwD2AMcBB8E/wPFA2YD+wKLAisC4wG5AaABnQGlAZwBdgE9AdsAVQC9/yz/uP5c/hz+FP5C/ob+8f6d/2cAFQGuAS0CbAJuAlECKwIWAh4CLgIfAuMBegHVAOj//v5H/rv9bf1n/Y39pv2o/Zz9f/1F/QP95Pz2/BX9P/2c/SL+of4V/3b/rf/h/zAAeQCfAJMAegBgADIA/P/w/xwAYwCiANwA7QDTALUAjQByAFoAUQBuAKYA3wAZAWsBvwH2ARICEwITAv0BzgGlAYIBSAEGAbsAPACp/0f/PP9E/yL/GP91//r/YQDGAD4BmgGZAUIBxQAJACX/V/6j/R/93/wU/aD9JP6f/h//r/81AGcAfABlAC0A8v+1/zv/rf5Y/h3+yf1a/f38yfy9/L386fxv/VP+Tf8iAKYA3wD/ADEBVgFfAWoBowH6AUICYwJkAnwCwgL0AugCtQKBAj0CsgHHAJn/gP7e/cT9Kf7//g0A9gCPAdoB6QHFAXwBRQEkAdQALwBN/13+hP3u/MX8DP3M/en+/f+fAPAAOgGUAd0B6gHHAWwB5wA8AIb/4f5V/s/9R/3m/M/8Cv2M/VD+Kv/m/28AjQBEABUAeAAYAXgBggGPAYwBOQHTAMsAOgGwAbUBGAEEAOb+I/7S/eP9P/7d/pv/GgAyAFMA6ADSAa8CMwM9A8QCtgFUAB7/Tf7S/b79K/6//g//Ev8Z/3X/HwDVAG8BxgGxASgBVQB///D+rf6s/sL+w/5a/oL91vyz/Mv8svyr/DL9+v2A/uj+mv+MAHgBJgKQAqYCXQLJARMBUwC1/3H/of/q/wwACgD+/9H/kv+n/zkA4QBKAXwBfAEhAYkAMABKAHIAhgDnAJQBvAEaAU0Ayv92/yf/Kf/c/8YAGAHDAGkAewD8AMQB3QIiBO4EsgSeA2kCQAHj/83+dv5c/rj9mvyk+yX7DPuA+6/8ZP7z//4AwAFrAsQCuQJyAvgBGwH5/7/+uv0+/Rf9zvx6/ID8G/3g/V7+D/98AO8BWQIMAusBwQEMAQMAcv/h/+8AkQFzAQABZACG/5n+Rf4Z/44AwQHrAf4Akf9j/hT+wv71/ycBGwKkAqACqQEhAAv/rv6J/l/+jv7t/tr+IP6G/bT9lP7D/xkBLQK6Aq4CGAJDAXwA3v9j/93+QP7i/SL+qP4Q/0H/W/+k/w0AMABBAOIAwwG0AXEAJP+M/nX+pv49/woAfACOALAA0ADdAEUB6QEDAjwBDwD6/hj+mP0K/n//MwEcAgsC5gFQAvECMgPnApcCQQIOAY/+5PuC+q/6jvtU/Kr81/w//e79uf6L/8QAsQKfBEMFLQQkAoEASQDrACcBgwC6/yn/Tv5w/c79xf8bAlUDhwOrA7wDMwOwAg0DqANdAzwCBAHS/6v+8f3z/XD+3v7Z/mD+6/3e/VP+BP/C/1EAYwDi/2T/nv91AA8BCwHeAKYAo/+r/d/7IPvr+rX6mfqw+tb6Cvst+wn74fpJ+1j8ev3x/ab9V/3H/ej+IwAaAQsCJQPqA6gD4gK/Am8DwwMlAxUCGQEgAE3/Wv+JACcCfwOOBGYFyQX7BZkGkQfxBy8HpgUCBJoCmwENAeIA3ADMAKMAWAACAOb/MQDAACQBGQHtADMBpwFXARQAyf4U/tP9oP08/Y38ivto+nT53fjN+GH5dfpG+zT7a/rA+Zr51fk3+tP6ifsB/Bb8Cvxc/Hb9cv/cAZ4D0wPwAm8CywJIA1YDYAOAA84C/gCG/33/ZwCfAVYDHgWZBXwEggPMA3MEfQTPBAMG7gZuBjkFsATjBAUFGwVqBSwFzQMoAj0BvwAVAJT/3v95AFsAmv/4/qD+Ov7//R/+Gf5g/U38bfuh+s75gfkC+pD6Tvpj+WP4uPd994/3s/ff9x/4rviE+SH6bPoG+6n8aP8WAhcDtQLnAlUEqAXPBQIF4wP3AjwCPgHD/1r++/3s/gsAVgBdAE4B+AJvBIcF7QaICEcJrgiAB48GRQZ+BrUGpQYsBjIFUATaAykDWwJ1AlwDrANyAjEAZ/7p/Vj+Af8f/3H+if00/Xj9qf1+/U/9fP3C/av91/xL+635afgk97L1pvQt9PHzxvPq83X0ivWf92b6NvzM+5T7EP/tBJYIHQlbCbAJeAgrBr8EyQQnBYcEBAPpAPr9F/vU+VD6m/sn/d7+gQDuARUDLATEBfkHzAmfChELhQs9CwYKuwjxBzQHAAaXBHUDjgKkAcIA9f+3/y4AXgAo/3b9+Pwb/qz/dQBcAI//Ef5l/IH7f/vM+w38MvyX+4H5evYk9ELzOPMt8xLzI/OH8/LzAfSI9KX2Cvow/bP+Bf+rAIMEVgf1BpkFVQUMBRYEmAPuA2AEBAUzBvcGuAUbA5YBnAGKARgBQgHjAZsBMwDC/iP+pP4PAS0FuAh8CsoLVw38DU4NfQyLDN8M3QsDCUIFbwEr/r38MP0v/aX73fks+cn5Nfu9/OX9dP5K/n39avzd+zf84/zw/Ab8lPo9+Rf4Ffe99jL3bPes9hH1BvMc8Q3wGvC38GrxVfJQ8w307PXQ+vUBNgjnC1kOvBCeEqcTLRTXE+YRZw5LCnYGwgLJ/m/7dfku+D/3/vaD9yv5yPuX/mABVgSSB6sLaRAeFNcVSBYiFjgVMRNvEJYNVgrwBQkB0/wS+YX1O/OO8rTyb/Mb9Zv3FvoM/Pn9CQA/AfsA+/9c/2D/Ef+q/Xv7PPkW9+308fKf8VHxrPHT8dbxpfG08M3vNPBu8bTyP/T39RP3Nvcq+L38FgQ7CWUKDAv2DUsS9hU6F34VJBIPD+wLlQdhA0wBtwAq/+j74fgR+DX5Wfv3/ecALAS5B7YKlQxuDooRLxUcFzYWvhNZEckOTAuvB+4EXwKY/qL5PfUA85ny1/Ja84b0+PVH97b4SPrl+5f9av/hAOkAS/+m/df8t/sT+qH4Kfdn9YrzxvGv8Fnwb/Cn8NDwDfHF8YfyCvMa9Gf1XPa290P5Wfm/+bf/KwrAD/UMOgohDR4SVhVBF3AYwRfOFNoQPQ31CeEGyQQfA+j/F/zE+pH7/fvd+3X89/1LAOsCsgWWCRcOVBBhDwoOKg7VDhgO6QtoCqoJ9AbJAe38nPks93X1KvRp8rvwYfA18UnyFvOJ8xL05fRr9f71W/dt+L34SPnZ+Zb5J/mI+ZT6HfuQ+rX5OPnK+A74AfcT9jT1ivPZ8a7xI/JE8nrzBPVV9Wv4AwJODM8OnQswDJETnhuHHwsh2iEdIIobwRYsEzsQjQ32CboDW/wN+OL3S/iR9kb02PQh+Rf+jgGqBKoIXwwcDtwNPQ3LDZYOzg2pC0wJzga2A6L/y/rP9uT00fN08T3uBuyI64Tsxe0W7k7utO/b8QD07PX797j6Df19/S39kP04/mT/+QD7AOL+ZfxM+j75XfmS+Nf1A/Pi8EnvO++J8AnyzvO49Yv2Ave/+6kHZBT5FzoUwBO/GUQhOibAJyAmASIZHbsYExSyDuAJ2wRP/oz4ivUZ9Fvzh/Ij8T/xrfSu+VD+0wExBE8GMAgtCS4KoAvbC8YKaAmCBwwFyAIdABv87fep9JjxP+7J66zqYOqn6kDrtuvF7KXvjPN09pP4k/vg/rQAWgGbApAEJwV+A28BqgB4AE3/QvyV+Iv2BPZX9cb03/Tq9OH0GfWP9Rb3LPlp+T751P2FBwIQ2BGrD4QRbhlhIeEl0icqJ1gkLCFuHQoZUBXPEIoJRAHR+tj2UPRR8V7tvuou6y/uPfKR9bP3Yvqz/QQAIQLiBNMGvgfmCNEJrAmrCKMGbAOf/1f87flb94Lzce/q7MPr7eon6qbpS+re7HbwtPO99nP6pv7wAS8DhQPqBOgGtQfYBkEFjQNZAY7+y/ya/K/74vgX9sH09/Rm9gv4EvmU+RD6G/pR+TP7IwRxD4QSBw6LDb8Uix1gI2MlYiSSIg8hgh5EGnQV4RCPCwEEuPsR9krzYvC763vnUuao6NLscfC48QLzGPey+3r+JgF+BBEHSwlWCxsM9wurC9UJfgXKAO/95/vs92Py9u2H6zrqXum56BPpWevK7tTx0vQx+TL+AgInBKwFAwiNCg4LFwrhCZQJwgciBXcCHgDl/Sr7CfmI+GH44/d79+P2y/bt9+T3j/WF9pz/sgpbDbgIngiAEeEbgiGeIoMhFyH3IT8gBBtnFoQSIgysA+z7qPao8/bvsOlp5MnjouZG6s7sYu3E7nLzAfmS/LD+ggDBAgcGIgm2CjkLQAsxCm0HHQRmAvYBj/+B+rr16fJg8UHw0+7r7QPv1/Dc8VDzk/Zb+w8AbALsAqQEjAcQCfsIyQj2CN0IiAciBbIC4QDS/x3+yvpv+MT4j/lw+YP4Cfcm9zf4BvYd9MD6xgW5Ck4JeAi6DK8VHR7bIDsfOx0rHPsbxxpJFugPwglbA3H9g/mc9dTvt+mo5XTk5OXv6CDrfutE7FDvgvPW9837QP6M/5YCQAgnDbYOYA4KDk0NHQxSC+sJYAZ+AVX9Qfqv9yz1r/L38KzwXfE38vnyT/Qg95f6/PzX/nMBoQNWBC0FNAdpCaQKvQleBocDDwPQApwBxf8o/ZX7xfsi+4L5m/hH9xz1HPO38mf3y/8FA8b/e//aBcYOEBbgGJgXahaSF1UZrxk7F1gS1wzqBtYALP0/++z2RfDh6oXok+kz7OXsgOsl63btEfLz9mX5nfl0+/AAVQcxC9AMEQ44D1QQthGsEdIO8AohCBwGewN//yD74Pfh9W30V/OC8lDy7fJq8/zztPbB+0cAqgFdAMz/vgIKB6MIzwefBi0FJwT1A7MCAgC1/VH8aftS+mP4n/bw9bT0avJo8gX3dfw9/uX95P8eBSIL4A/IEs8TqROREykU9hSQFJ8R0gzuB1AEUAKQ/1H6FPX+8Y3wsvAD8cTvRe9x8DHxWfKj9dr46Pnk+k3+DAMUB/QJwAu2DB8ORxBHEdIPRw1GC4EJ6Qb2AqT+xPtQ+ur4MPc29YLzifPn9JD1E/YG+Er6rfuW/Mb9o/+uAd0CaQOqA08DqgKiARIASf87/zv+qvzo+3X7oPp0+TX47fdo+fr6Xfu4+0X9DQBRA68FOwcJCZYKagt4DKMNMQ6eDc0LcgmsB3UGvgQCAtf+RPzP+mL6zPrr+sX5ffhz+PT4ovl8+mz62Pm3+vj8e/+nAS4DOQRdBd0GjAikCXoJrghBCK8HUwajBPACXwE8AMX+9/wr/Pr7IvsZ+qX5H/o0+1L7E/q0+Qv73vwr/s/9IPwG/NT9eP6B/fH8Uv17/oP/Qv+1/uX+av5c/bz9K//v/zIACQAv/63+Ff+8/ykA9v+c/zAAfwFTAnwCKAJrAQEBdQEPAuEBDgE6AH3/Ov/y/84AwgCoAD8BqwHXAccCrAMhAwEC+AHgAtYDeASHBN0DewM+BFIFVwU+BAsDiAJFAmgB7v/E/n7+lv4L/r38pPuq+3D8qfwk/CP84vyx/Q7+zP2H/TX+I/9q/7H/CADB/xP/v/4U/2b/6v4R/jD+LP9s/2v+4f27/gMA2wBDASwB9gAuAcoBEAJoAQ8A7P6w/hf/Bv91/t/9Kf3J/A/9QP0K/d/8ePyx+4n7YfyJ/TT+W/6Z/mz/0gCeAtoDxgMoA6sDYQWbBqEGUgYKBqgFkQW6BW0FgASdAwMDJQLoAKT/wv6A/nH+k/1A/Mz7OvyE/D38sPti+4P72fsk/Ij8D/2o/SX+hP4m/x0AfgDm/4f/KgDpAAgB2QDZAP0A8QC8ABgBLQJzA0gESwTVA+MDcwS9BG0EWQOhATAAev8G/1X+iP33/KH8ZfxU/IT8w/z0/AH9iPz++0b8R/0L/lD+dv7G/mf/gwCAAYQBDQGbATADPwQcBKEDjgPqA2QEjgT6AwkDiQKAAkECYQEOACD/8/7F/gL+B/2D/L38+Pxo/GT70frb+k77v/vG+5b7tfsS/K/8s/3G/ir/8/7m/qn/9ADKAdMBzQH8ARUCNwJxAt4CxQOLBNcE9wToBMsECwVNBfkEEATeAtwBVQHdAEAAsv9R/yf/Nf8M/53+Xv5O/hD+pf09/UX9tP3i/ZX9WP2Y/Tb+xf4Z/y//M/+a/7wA5QFAAgYC0AHaARUCMQLbAR8BkQB0AEwApv/t/qP+jP4Y/mL98/zz/An9Fv0L/cf8bfxY/Gj8YfxJ/ID8If3H/RP+QP5t/rj+LP98/5P/6f/NAM4BXgKWAqYC6gKtA88EvQVDBk0GDAblBfAF2QVdBXAEQQMeAk8BvQBLAPD/mf8w/+v+Bv9c/2z/+f5R/tr9uf3H/Zf9Sv0Z/QL9CP1z/RL+Vv4V/vT9if6s/5gADAE1AWQB4gE9Ai4CrgEqAd0ApQAvAIT/If8B/7j+CP5k/TL9Zv2P/Xn9Kv3Z/Mf85vzN/KP8y/w6/YP9y/1i/g//Mf/s/uT+Yv8kAM8AUQGsAbYBdQFfAcoBgAJTA/cDKgQCBB0EuQRABRgFLwTvAvwBkgFoAQEBNABS/wf/Y/+y/6j/b//7/pj+o/7A/pr+jf6b/lf+8P24/ZL9lf0Y/q7+u/6Z/tD+b/8XAKMAKgF6AYIBuAEXAiYCygFlAQEBswCKADkAlf8Z/w//DP+X/vL9nP3N/R7+If7T/Zn9zf1H/mj+If4W/qD+Iv8V/8z+//6O////XADNAPsAvwCLAMoAZwEGAk0CawJ9AoICxAJGA24DJgPrApMC6wF/AXQBJgFDAFv/CP9A/5D/o/9S/8H+U/52/vX+Df+S/i7+Cv7m/e/9J/4Q/rv9tP3l/eL9EP6v/k3/o/8TAJUA0gDYABsBlgHTAZABKAHrAL0AagAWAOz/3/++/5X/UP/t/q3+1P7+/uH+rv61/tn+D/9l/7P/zf/c/+v/AgAzAHYAowDIAOMA2gCmAH0AjgDsAFUBgAFEAQ4BSwH3AaAC2QJ1At0BogHYAfIBpgEYAX8A9v+t/5H/jf+H/37/VP8B/8z+7f4o/yH/xv51/kr+Kv4E/u/98v0E/hj+IP4Y/jv+sP5c/+H/KQBcAKwACQFdAZQBnAGJAYwBjAFJAcsAbwBKADYAKgA0AA0Aq/9G/xr/Jv9E/1P/Uf9L/2//qv/U/+f/+f8XAEAAZwCBAGUAJwAAABUANAA3ABoAAgDz/wgAQwCLAMgACQE0ATwBSQF/AY0BQAG9AGMAUwBFAAsAq/89/w3/F/8x/1D/KP8E//3+9v7e/sf+sP6W/of+cv5f/l3+kf7d/hT/HP8n/1L/jP/K/x0AYgCHAJEAuAD3ADgBaAGUAaIBjQGAAY8BkAFuATQBAAHAAG0AHQDv/9L/uP+c/4v/j/+g/6j/sv/V/xEAUgCLAKUAoQCTAIwAfwBoAEgAMwAdAAUA9P/s//D/+f/5//3/FwBAAFYATABEAFQAYQBQAB8A7//a/9j/v/9+/0j/TP9k/1D/Cv/T/sn+2P7d/sn+qf6l/q/+rP6l/tL+Iv9M/0T/Q/90/8f/BgAUAPn/8/8eAGUAggCRAMEADgFQAYcBtgHcAekB0wGVAVABKwElAeoAbQDy/8P/0v/a/8X/t/+5/8X/3P8KAC8ARgBIAEkAUgBgAF8AUQA0ABgA/v/j/8X/of9//2j/Wv9P/0T/Rf9M/0//U/9h/3r/jP+K/4b/k/+t/7b/nf9x/1//YP9g/0v/J/8N/wX/CP8M/wz/FP8n/0P/Yf+H/77/BwBAAFkAaQCUAMEA4wDnANsA0ADNANUA6wD9AAkBEAEjATYBRwFPAVkBVQE5AREB+QDnAMcAkgBeADgAIAAOAPb/2P/D/8L/z//U/8//y//Q/9H/z//S/+D/6P/T/6b/hf99/4r/jP9y/0X/L/9E/2j/cf9b/0P/RP9j/3//fv9w/2r/cP95/33/ef90/2r/YP9a/1//av9o/0r/OP9I/3n/mv+d/5L/n//a/x8ARwBIAFAAdACmALwAtQCrALkA3QDwAOQAzgDmABMBNAEpAQgB/QAVASIBBQHBAI0AegB0AE8AEgDW/8D/w//G/7v/sP+r/7D/w//X/+r/8v/w/+T/0//X/9j/wf+j/3f/cf9//4j/dP9X/0v/Xv93/3j/bP9d/2H/dv+P/5b/mf+e/6T/sf+8/8n/0//X/9T/yf/G/87/2//Y/83/wf/P/+7/DgAZABQAFgAvAFYAdwB8AG4AcACAAJkArQCoAJkAmACwAM0A0ADAALcAwwDfAOQAzwCpAJIAjAB9AFYAHwDx/9b/yP+y/5v/i/+F/47/mP+m/6//uf++/8L/w//N/9H/vP+j/5j/pv+v/6n/lP+E/47/qP+1/6j/nP+h/7D/s/+r/5//mv+k/6b/mv+M/4//of+s/6r/rf+4/8X/2f/i/+H/6v8BABsAJwAtADQAPQBQAGcAaQBZAE8AVQBtAHcAbwBgAFgAagCEAJAAiwB6AHYAhgCfAKkAnwCKAIcAigCGAG4AQgAfABEACADq/7j/j/+R/6f/s/+o/47/gv+Y/8j/4v/f/9f/1f/O/6//hP91/3b/e/93/2n/W/9e/2//jf+Y/5z/n/+b/6T/uP+4/7T/tf+h/63/uP+0/77/1//Z/8//0f/E/8D/z//W/9z/8f8OACAAQQBhAGwAjQCbAI0AqgCtAAYBugEeAUgC1gckCm4F6P9y+z360v25AHwCSAMQAH38j/zl/Dj+8AGTA7YDUgNfAV8AqQDeAFgBQgGnAFMA4v7O/df9Rv03/XD99PyW/Bj8x/q4+eb5EfvI/P3+//8eABsAVf9Y/4AA7AEGA5MD9ALoAVwB9wDqACEBEAF9AJj/8f7o/ib/M/8Z///+u/6G/rD++v7p/vv+h//G/9j/TwADAacBFQIpAgkC4wEYAqICfQLFAZcBXwHfAP0ANwEBARcBPwH6AI8A+/+7/34A7gCEAN4AxgA9ABoBtQHLAKoBwAMpAvv/7v8T/9v9Nv7s/UL90P1x/Qj9q/1b/VP9Kv4K/j7+EP8L/63/KgHQAMQABQJnAZgAlwCD/9D+vf+2/+L+hP8s/2T+Kf/O/jn+xf7E/kD+mP4h/6L/twCOALMACgLKAQEAsP8BAXYBJQLtAcn/Tf+m//v+hP8NAJP+0/1M/vz93v6v/0z/kv+B/zH/fAC3ARABSQHYAUEBiAE+ArsBywEsAhoBjQCWAGwARQGtAQABiQAyAMP/Gf/X//4AmgCTAKL/cP4J/+v/JQCRABgAIP9AACAAF/8a/8D+rf6v/6f//P5n/+n+kf5i/0v/aP/y/5r/ev9S//D+JwCgAeoA1QAMAcX/df+d/1L/1v8eALf/GAD1/yX/dv8pAAUA7P/V/0H/gv8EAPz/AgA+AKQAqwC/APQAuABRALn/Qv/Z/2sASAAQAHv/8P6+/7YApADZAJoAyP8jAKoAnwA6AUMBtAADAcAAeP8y/0X/Hv+u/5X/2v6M/qn+v/6j/iL+zv32/UH+Pf5v/gD/Jf+O/+D/sf8OAK0AxQBhAcoBLQEhATgB6ADpAKgANgBeAEIA+P9YAJ0AjwCiAGIA+/9KAOEAcgEwAqYCQwLgAWcCiwINAswBmgGOAXIB5wBCAOr/PP9Y/qf9Hv3i/Kn8dfyd/LT8kfzA/FH8r/sm/Gf8XfzB/Pn87fxw/bX9ef3I/Tv+TP49/gn/l/8l/9z+4P7e/nD/UwBDAS4CwAKCAycEeASRBeAGZwc5CAMJ+QivCI0IEQigB2cH2wYhBjkFGgQ2A1oCFQHl//3+7/21/AD8svsN+7r6vvpC+vD5Wfqm+kb6XvkA+Y355vmY+Wz5+fhR+ET41/fL9rT2//ab9mP2WfY09vj18PU79ur2W/nc/sgE6wZRBtYHHwxWD08QTBGZEzMVhxQRE40RjQ+xDQILSwYUAn4AGv9U+/f2J/Si8jPy4fK380b06/WK+Ln7C/8oArcFpQnvDLcPwBIqFegWFBiuF8kVKRQ3EhAOngg4A4P+f/pO9rHxoO0h6q7m3OPW4V/gHODG4DPh3+GE45/l9udh6uDsJPAw9B74uP0vBzgPMA8jDH4PbhZ3GVkaPBtXGhAZPRjNFMwPNw2nC9UHVQI5/sD7jvgM9PrvFO6f7j7wxPFg8+b0zfYT+z8B4wUnCXIN5hHXFNAXWxvcHZAelx1dG7kYGRZZE6gPZApRBGH+DflA9ALwwuwb6ubm1eMr4n7hNeFr4bbi0OTO5XHlEuf26ibtUu6I8t32+vjeALYNbRFhDegONxT4FI0UfhZGFo4UoRRPETMKhAc8CHcFhgCS/MD4wPY49X3vLOv57Qrx1fBb8uH0g/V8+B7/YQRlCC0O4BOfF/sasx4xIn4k1yM4IY8fYB2xGNITPg//CMICEP6B+IPyau+h7RzqDeck5pblCOXV5ALlJuZy56Xnvec86DbpDOu97N3v/PRo+Kf+ZwtwEfMKjgiXD2kSbhFnFNsTCw+dDgANdAWCATcCLf8g+5r6F/kG94b1AfDK6xDvUPGW72bxjvRl9fr4w/44AiIH0w6lEwkWMhoKH1ojOyaFJaYjkSMvIuYdpBmHFRYQHQoiBM/9O/iA9KfxKu476/vphegD5qnkP+VV5hvn9uf66JLpCuqY7JPwRPKP8lf1wvcv+Br/Jgv2C34DSASBC2YLXwoYDrMO6w51ElIPVwenB2gMZwpABTcEgwP0/kL5nfO370Dydfbl8m7ttu2W7dfrbu9X9Rb5a//SB6ALhw2ZE9wamR/mI/om4Ca6JZEjPx95G24YYxMADuMJJwQ1/UH49fOk7+rtd+zg6FbmkOXb5LbluOgA7FXvpvIV9HbzO/Px8wXzOPH18Kvw5fAY9Nn1Y/Mj8nb2i/yk/pL+TgNAC8kOwg8ME7UVcxaHF64WMxPGEXcRbg3MB94EIQLg/JD3BfRy8Z3vxO2X64rquOtH7tbx6/ZO/WQDDwjwC0YPtRL1F3Ec3B0MINoiaiErHd4ZjRd5FogUwA7fCAoG1AJR/Yb4wfUs9LTz9vL07yLtCu2+7OjqMOu37DHsKuxr7PzpkOje6FvmB+QS5tHo/+mt6wPu1+8d9FX8vQIYBpYLTBBYENARchb/GCMaHBu2GKITjg8EDHcIYwbrBEkCfP9D/Sn7dvnc+Bn5wPlL+uz6kPx+/jr/7f+QASwDRAXhB1MJzwoxDokRhhJ8EmkSpBEBEZ8ROBJRESQPbgxcCRAGqgLu/779L/uE+Er2UfNg7zvsQ+qu6Nrn9ObX5JrjrePf4lXj+OYs6rPrbO6C8enyavSO92772f8CBBUGTQemCSYKpAceBogGuAafBWEDIgJIA7kEeQQxA+kBBAIbBHoFhAVlBxUKXgv8DF4OKg4hDysRYhEPEfoRDxIdENINcQz1ChAJWAchBV0CHQFAAdEA9//S/zQAegDq/3X+kP0a/vX+5/7d/Rz8r/mf96b2pvUL9BvzMvIx8PbudO+r72nv3u+d8GDxV/PT9bL34vkn/Cn83voM+zD8s/0b/z//kP5F/mD9s/s/+0H7JvrR+XD6UPnB91v4Efow/Pz+6gAdApwE5QcCC4wOUxEKE14VPhdjF9EXVxhBF9wWdRf4FOMPmQyBCqMH3QS1AaL+iv3+/Fv6tfcY94/2GfVe9G302PSq9tn48PgM+P/3GvhR+H/5u/rV+1T9o/5H/2r/dP/H/6f/J//E/xABQwFwAIz/jf4y/ff7bvsC+675ZPhM+Pf3m/aR9b/0C/RC9Uf39PaX9pf4ZPre+sr7a/3B/zIDTgZ5B5UITgvaDdoOiQ9VELgQUxGDEYQQNRDeEIUPVwykCvoJOAhjBtYELwKK/+T91vvP+VL57vh89zH2ZvWk9C70CPRW9MP1tPd6+AT42Pfy+MX6d/xH/Zn9s/47ALMA6wARAjoDYgOwApsB2gCNACwAeP9x/i79GPyX+r34gPjE+TD6k/m1+MH3yPct+e36vvxm/lv/g//J/4UB6gQgCPoJzAolC6YLowzPDcYOfw++DyMP/g0/DQwNfQwzC4gJmge8BS8EqgJAARAA3v4O/QH74PmX+e341ffO9r715fSN9CX00POs9BT2rPbZ9mr3rvjB+vz8Jv53/rv+J//C/2MAlACdALsAUABg/2T+Qv0L/FL7/vqE+v/5vfnO+Ub6uvqK+n36RfvO/CP/nwHdAk4DGASTBEsEVARGBaYG5AeHCFEI/AcsCIAI7gjXCZ4K9QpMC2YLFgtJC60LuArCCHwHpAY5BaYDUwLfAHL/NP56/IL6Svld+F/3NffO98D3MvdL96D3pPfT90X47fgL+jr70vsc/FX8Z/yp/Cv9eP3E/fb9fv2N/BX85vug++L71PxU/ej8kfzk/LT9hf56/hj+a/4L/4r/GgCFAIsAyAAwAZ0BsQJ/BLoFPQazBq4GaQYHBzcIrAjYCE4JFAk5CAIILQjkB1UHwQbNBZIEpgMOA2QC6QGyAQABrv/K/t3+c/+A/6r+0v28/fb98P12/aD81vtL+1/6CPmR+LP59/qo+q35pPlC+oz6rfoM+4T7KPyV/AL8N/u7+9P8ofyW+4D7K/xx/Gj8afyp/LH95/4U/+v+/f+AAf8B8AF1AkADzwM5BHcEbQSZBDcFogVaBTAFqgUfBu4FfAVBBY4FLAZlBgEGswWbBRAFSQTsA/ID4gMIA1gB4v8v/2v+W/3I/Ar9n/3T/Wv9Pf0R/m//JgAQAMT/q/+X/0P/lf70/ez92/0h/Tz8t/us+637JPt1+oD61/qz+pf6KvvW+yn8JPzV+5z7FPz1/Gv9iP3N/QX++f3Q/RP+RP/AAFEBGwEzARcCdQPPBOIFJQeYCFkJRwkRCQIJYQkACuUJkgg5B2AGCgU0A88BjwDz/qj97PxI/Pj7OPxi/Ar8x/sV/AD9JP4G/6r/TgCSAGcAygCmAfcB4QHCAUwBtAB0AMn/df51/e78R/yT+wP72Pol+1L75vq4+j77ufu1+577lvuN+877Nvxq/Kj8Bf0E/dj87vwL/Tn90f2r/nH/cwB4AQcCiQKgA7cEaAVOBooHWgjWCGMJnglLCfYIpwjzB+EG8QU2BVoEBAOAAUEAQ/9P/kT9X/zm++T7Gfwa/C380vz7/dD+KP+W/y4AfACUALIA4QAqAV0BAAE2AM3/x/9o/6z+Jf79/df9af3R/Jf81/wp/SD90vyK/Gr8Tfxi/MP8Kv0O/Zf8Q/wT/Ob7sPuc+wT8tPxK/WL9fv1p/r7/LwFTAh4D4wPwBLIFwQXEBVoGBAcSB8EGegZZBiYGwgUxBaIEIgR/A20CJgE9AOv/nf8U/8f+Bf9f/3j/bf9S/1f/sf/8/9j/nf+n/5v/QP/K/l7+9/2x/XH9A/19/Cb86Pul+5L7pvux++T7bvzi/Bv9l/1D/sP+Bf8M/9r+3f5R/5n/Wf8X/wr/1v5p/uP9fP2Y/Sv+mv68/gb/bv+3/wMAmACSAbMCeAPUAycEnATVBMYExgTrBP0EEwUCBZQEGQQHBAUElQPZAjsC9gHlAcMBowGRAVkB+QCdADwA8/9QABcBVAHTAFAAIgCy/+7+NP6Z/RH9qvw1/Hv77voF+zj73Ppg+oj6GPuf+yr8tPz7/Bv9LP05/Xf9Gf7m/m7/e/88/wr/Dv8N/9D+n/7O/hn/K/9C/6T/HwCXAB0BZQE6AUkB/AHKAhsDHAMUA/0C2QLKArsCrwKqAn8C5gEDAX0AowAEASkBOwFOASEB9QA0AacBAgJqAqMCVALlARcCcQJTAvEBpQE+AZgA+f+c/2T/W/9e/wD/O/7P/eH9z/1s/Sr9JP3//MD8s/y0/Kj8xvzx/NX8o/zU/F79z/37/RX+Lv4b/gH+Ov69/iv/gf/l/0UAfgDZAGwB8gE3AkkCKALnAcwBEAJfAnMCUgIZArABGQGoAK8A+QArARMBzACJAIIAowCoAJ0AygD9AM8AeQCUAOMA+QDzAOcAsQCTAMEA7ADtAAsBOQEhAfYA8QDlALsAiAA3AMj/a/8p/xP/Bf/S/oT+LP6k/Tv9WP2+/c39if08/QP92vzI/M/8+/w+/Yj9qf22/dn9Jf6i/jH/vP85AH8AyQAcAW8B9AFTAoECagJdAlkCHAK6AYYBYAHjAEoAzv+R/3v/af8j//v+Kf9u/37/mP/i/0kAtgA+AckBMwKBAsMCxQJxAi0CVgKKAmsCGwLXAXAB2gBfAB4A8v8JAGUAgABDAC8AQQA9AE4AeABWABQA9/+o/wz/jv5A/sz9O/3N/Gv8E/z9+xf8FPwl/ID88fxW/d39f/4v/+v/kQDkADEBnAHOAa8BfQFMARQBxwB7AEAAJgAUAO7/z//t/zwAbwBxAJYA5QALAeAAuQCxALgAvwCXADEA5v/n/+P/pv+M/67/0v/b/+P/CABtAOkAMgFdAZ0B1QHgAd0B3gHTAc0BsgFgAe0AqQB8AC0A1/+t/5v/g/9o/zj/7v7B/qX+dP45/hn+Cv7+/fj9+f3r/fX9Mv5q/nz+m/7S/u/+8P4S/1f/m//L//v/KgBVAHsAoAC0AL0AxwDMALYAnwChAM0ABQEmAT4BcAGqAcIBrQF/AU8BGgHmAMIAqwCdAIkAYwAnAPH/0f/T/9n/2//e/97/0f/N/+D/AgAnAEsAXABXAF0AfQCeAKQAlgCQAHIAMwD8//D/6P+x/1D/6f6P/k7+Pv5W/nb+jP6f/rP+yf70/kD/m//n/xIABwDh/8X/tv+j/3H/HP/H/pD+ff54/oD+nP7U/hr/Xv+m/yMAxABPAaMBvwGpAZ4BtwHRAcIBnAFtATQB6QCWAEcAIQAqADEACwDR/8n/DwBpAJUAogDEAPwAGQERAfMA2QDWAMwAnABIAAsA4v+7/4z/cf9y/4X/jf99/3H/gv+x/+n/DwAhACkAMAAsACMAFAAWABMAAQDd/6f/bf83/xX/9/7B/qD+cP4i/gb+1/3d/f/9Mv5o/qL+1/4K/1L/qP8YAJgAFAFnAZIBpQG2AcIBywHMAcQBmQFTAQEByQCjAI0AfgB5AHsAfgB7AHcAfACXAKsAqwCZAH0AXwBCACgAEAD6/9T/n/9m/y//BP/n/tf+1f7h/vn+EP8w/1r/k//i/zgAiwDTAAMBIgE8AVMBVQE4AQkBzABxAP7/f/8A/5j+Wf4k/u79wf2x/cD95/0h/mr+1f5J/5n/yf8IAGcAygAaAUIBSgFGATgBGgHuANkA2QDYALkAewBDADkAYgCjANoA/QAfAUEBUwFLAUkBXQFtAVgBFgGzAE8A/P+v/2H/EP/I/oL+Q/4V/gX+G/5V/p7+5/4r/2j/rv8EAGAAsgDuAB4BPgFAASYBAgHeAMAAlgBTAAEAtP94/0b/Ev/l/sz+yv7M/sb+x/7b/gP/Mv9d/4T/sP/V//L/CQAbADYARABHADwAKAARAAIA/v/+/wEA/f/y/+j/5P/w/w8ANABZAHgAlwC7ANwACgE5AVgBXwFOASoBAwHfAMAAlwBpAC8A6v+l/3n/Yv9Z/1f/Uv9G/z//Sv9q/5T/uP/Z//D/9P/t/+3/AwAgAC0AIwAHAOT/yf++/8D/zP/S/8n/sv+b/5n/pv+x/7j/sf+k/5f/kf+W/6L/qP+n/6f/pP+i/6b/s//D/8n/vP+7/8r/5f8AACMASABnAIUArwDWAPsAIAE+AUcBNAEZAQQB8wDcAL8AnAB3AEUAGwD5/9b/vP+p/5T/ev9t/2f/cP96/4D/g/+G/4f/k/+d/6D/of+k/5//lf+M/4r/g/94/2f/Zv9f/13/bf+H/57/ov+//+D/9P8dADAAKQBQAEsATwBLAEsAQgAzABsA/v/t/9n/zv++/7L/qf+i/5z/nf+o/7n/z//p/wEAHgA5AFUAbACEAJgAqACxALoAvAC3AK0ApACMAGwAUQBBADAAGAD//+n/4P/T/87/1P/Y/9v/2v/c/9n/3P/l/+f/7P/t/+v/5v/h/97/2v/P/8X/tf+i/5T/kf+T/5j/lv+V/5T/mf+p/7r/yv/e/+f/4P/X/9b/4f/z/wIACQAEAAAA/v8BAAUADgAWABcAEAALAAsADAAOABEAEwAXABQAEQAPAA4ADwAYABkAFAAZABIACQAIAAgACgAHAAgABQADAAIAAgALABMAHwAmACoAKAAlACMAHQAcABcAEgAEAPP/5P/V/9H/zv/Q/9P/zv/E/7r/uv/E/9D/2v/o/+b/4P/W/9P/1v/f/+v/7//v/+z/7f/v/+7/9f///wkACAAGAAgABgAJAAwADwASABEACwAEAAEAAAADAAYACgAMAA4AEQAXAB8AJwAlACQAJAAbABMADAAIAAUAAAD5/+//8P/0//f/9f/4//j/+/8CAAUADAAUABYAEQALAAUAAQAAAPr/9P/q/97/1P/P/83/zv/Q/9D/zf/J/8f/y//N/9T/2f/b/9v/2v/d/+P/5f/n/+n/7f/u/+//8v/8//7///8FAAQABAAKAA0AEgAXABYAGgAeAB8AHwAnAC0AKQAhABwAFgALAAgABwAHAP7/9//z//P/8P/v//P//v/9//7/AgABAPz/AQAFAAUACAAIAAkADAAQABUAHAAhACUAJgAiAB0AGwAcABgAEQASAAwAAwD1/+//4v/W/9f/3P/d/8//zv/P/8L/yf/H/7b/yP+5/7z/uf/B/8f/0f/T/9X/2v/d/+r/+P8EAA0AFgAaAB8AJgAqADAANAA2ADYAMQAxADAAMAAzADYANQAwACsAKAApAC4AKwAjABoADQADAPz/9f/q/+j/3P/X/9n/2P/b/97/4v/o/+z/8P/1//z//v////7/+f/w/+//7//z//D/7f/o/+H/3v/j/+b/7P/s/+b/2//V/9P/1f/T/9L/0P/O/8r/yf/Q/+H/8/8BAA0AFAAWABsAHgAeAB0AGwAXABEACQAFAAMABwAGAAYACwAQABoAKAAuADUAPAA6ADYAOgA8AEQAQAA9ADoANgAvACYAJQAkAB0AEQAJAP7/9//x/+r/5v/b/9P/zP/I/8f/xf/A/8H/w//I/8v/0v/a/+P/6P/r/+f/6v/o/+n/5v/i/+D/4v/f/9z/4P/k/+n/7P/s//X/9P/3//3/CAAUABgAIAAbABgAGgAZABsAGAAZABQAFwATAA0ABgAEAAYACAAJAA4ABQD8////9f/3//7/BwAJAAMA/v8AAAYACQAMAAwAEAAPAAYA/P/8/wAABQAJAAcA/P/9//T/6//r/+//+/8BAP//+v/5//v//P/9//P/6//m/+T/3//c/9//4f/j/+z/8//1//P/9//8////CgAQABIADwAFAP7/CQALAAUABAAFAAMA/v/3//z//v/7//z/+v/7//P/8//2/+z/8P/6//n////8/wEAAgABAAQAAgABAAQACQAEAAQA/v/8/wAA/P/6/wQA/v8CAAcACgALAAUAAgAMAAgACQAJAAEA+//u/+z/6f/e/+H/6//u/+b/2P/V/9z/7P/j/+D/2v/O/8//0//b/9//7v/7/wIAAQD0/9r/qf94/3T/eP+GAHMC8gIZAjABVgCD/0H/6v+vAL4AMwBI//n+jf7H/Xr+Bf8+/0MA6f+T/6b/lf/z//b/bwCOAD8AbQAbAMf/6f8DAB8AYQDmALAAwv+zAIAAqf6X/jf/9f/y/ycAXgBDAJ4BNQKxAe8BYwKwAfQAMgBg//T+Nf/n/8X/MP/+/lH+KP3s/Fb9G/6B/k/++/2m/ST+1f7W/vD/kQEXArwBEgEvAdIA6wB4AVwBZAFjAdgA4//8/zEAPwBoADcAzv9k/7n/3P/K/2cA8QD5AC8BSQFxAboBsAGYAUQBZgHBAZcBhAE0AQ0B9gCqAK4ArwDmADUBPwFvAUEB5AATAQABdQHdAU0BDgFRALz/mf8i/67+6P1D/Y78ovsX+3T6jvrz+u36Hfvz+k36HPrl+Z/5QPmH+J/3Rva59DPzGvL38S3y6vNE+WIA/AUaCTcLcg1FEOoTmxaqGNEaKxrwFLcNyQar/zX6JvjO9uH19fUu9Evx9vDL9KL6xACaBzkOqxLsFIEVNRUkFeUU8RLqDr8JYgNC/ND1TfHL7iLur+7L7tTujfDm85j3+PuaAIUDsQSFBDQCcf4C+1z4l/Wl8tXvPO1y6kXnbuQ44yXmHvEgA+wRJxVnFFMY6hkiFusV2RmWG2oZoBFLAqXzpOxO6BPkq+Vv7HLwKvB5733xnfjXBEMRzBngIPYmRScRIm0dyhpkFjUPYAbd+/3wQOiB4orft+Bq5aXqfe9j9fz86AWtDyAZTCC8I9sixx66GXkUXg7WB3MBS/oI8l7qfeRS4Wrhd+NX5XLnn+oB7ljx6/Ui+2T/6wItBX0EcwKcAMT9x/k/9onyAe5/6lTo9+VC5IPmte8HAbITChvNGMcZbR4sHx4dEhzWHHkcXhNIAYfyFu0e60PpA+vQ8Bf3B/s6/Fv+RAZqEREY/BnMG+0aCBXgDh4LewfXAvb8O/Z48ZDwQfFU8lj19fkv/icCUgbvCVQNdhFOFFwTqQ9MCx0GdgCE+yX3s/KK72fuKu5z7jPwYfK080n1O/i2+q775PsY/Ij8Rf2z/Uz95/sa+p34C/d79H/yLPGm7Zbog+f46ejpa+s89Kj6zPuWCHghRTBeMAoueCvgIxQYjQx0BcUD2wCE9FDjxNn22onf9eah9UkGuRBgFCIVJBcYHLEhDyQpIzUgnhjHCpD8wvRs8kvw4uwm6p/pXuoW69fsk/Jq/NAFvgqmDNANBw7hDOQK4QjXB8wGpgMw/jX4MvOD8M3wR/Pb9c33hPnv+aD3sfX59gj6ev2bAMMBCAE7//r76Pg8+NT4jvmQ+kb6D/eK9NbzmvHU8Gn1rvkX+3AAAgfaB3AJ9xM7IhMoEiR/HmkZqRIhC6cDCv80/1f8h/E66Lzn0+x09Ev+ZQhsEMYUkhQyEqcSgBVrFOwOsgnjArz4s+8o6xLrk+287lHvAvSx+j3++P8RA5oGUAglCAAHcQayB9UHbwNh/j78tPrN+PX3u/gX+zn8Qfrd95n2lPXO9a33rPp+/er9vvzZ/OX9BP/SAHcCPANdA7UBP/4x+2P5hvfP9bn0w/Pk88b0h/Tc9Xz7pQFKCAYSABrqHywpyi6LJ7ocaBjMEo0Ei/Zw8Zvv/ep050/pGe9C9bD5C/2HAmQKEhCXEUoSNxInDpAHhQLG/pr7bvr++Vv35PMA8q7wLe+C8Eb2gfzf/sb+s/+cATkDlwYYDfES7hMwEb0MqQbaAND9hPul97bzkfFg8CXuTOyD7wX3j/xQ/88C9wWNBSIDQgOdBHsCIQBlAS0BW/4p/Wv9g/5LAFwAg/+Y/hz8ivqH+g342vcv/x0EVAAf/7QH7BFQFioYiBrYGu0V0AvmAIz5S/OC69bmRegV7CPwEvVK+u7+fASWC08QYRCnEHYQNwpsA0ECRAAw+lr2R/bb9J3xv/EU9tH5Ffxp/00DRwaNCPcJkApLCsEI0Qa5BJUB1v6p/UL8q/mJ93D2mfWj9eb3uPq6+138Hf+eAtUDygIaA/QFfwdiBk8GtQdXB1gFngS5BKcCZf/n/a38kPrl+J73Afaf9MrzP/PA8hXzj/aQ/EoBWQYKD9AUMBLzDnkPHA7DCa4GEAPS+3fzl+5p7S3tFe5T8jr5VwHBCDgM4gwlD2YQ/wzICcgIJQavAT/9uvjJ9DbzA/Of8T3xtfbI/V4AgQFiAwkENQWYB3IIpQjICskM1gq/BfkB7gDR/539nPxo/LP75vv7+/35X/kN/NH+/P8eABcAlAGnA4YDRAKjApoD3AIwAYMAxQBEAOf9l/uY+kP5Dved9dv0BfTD80H1fveW+H38HQZiDH8KUgqVDpcNkwfCBVIGiQH++b31jPMV8Y7wvPPg+EP8uP2OANkEuwYhBWsENQiVDJUM5gqtCnwJVgYRA///0/wa+iD4DPcY9z75OPwZ/ZD9z/8VAQwCDAezDVMQtQ9iDigLDgb1AoECpgAi/Sj8SfzQ+KD0uPWP+pv9gP7O/6YAd//X/pUBHgVBBqgFWQSyAUT+mfur+vb6H/uG+rb52fix95j3Dfra/VsAwwAyAX4CUQBJ+Tb0ivUR+z8AmQDJ/Zj9i/5j/Jv5n/rX/y0FKQW+/3r5MPaS9vL4Xf0/BNcJzwtkC1gJ1AfQCUYOjRBCDYMHhQOg/zD7svlg+zP9X/0S/N76m/tt/rIBzQMOBUEHvAn9CMwF+wTCBuEGgARLAugBKQOmBHIEIQLn/5r/fv95/ZT7z/xK/4v/Gv5C/Gb56fYa95X5dvw//r79tfu8+oX7tPwt/nsAYQK3AX39qfdK9Gn02PUz9834Mfv8/NH7iPmg+mP/NgSqBvEGMwVzAAr61fX79Ev1YvcD/cgCewSSBMMF4wWGBV0I6ww+DqcLjwhMBnIDiP9n/JX8kf/xAH7/of5Y/9//UQBVAeEBFAHoAD8EHQntCpUKkQqHCdUG8wN/AaMAxALlBbUF1AG8/lH/3AEtA6wCegK2Aj4Bn/0G+ZX15/W492T3x/a9+HH8fP/iAEMCQgRbBHgBiP0T+1D7S/vy+Aj3o/Yt96f4yvnU+mX9cQDTAbgAlP4k/uv+BP/J/pT+Yv2m+5/6/PmA+Yn6F/0E/zX/Hv74/KP8y/yI/ZL+aP9/AGYBVAHPAYcEzweXB2EE0gJOAjAB1gT/DDoPAglCA2oBv//l/vECrwh0Ct4J6wjBBDr+CPu4/Nb/CwORBsoIEAj/BHoB/f7A/cv9aP5y/gT+T/3C+y/7y/ws/r/+RAFYBWAH2AWEA4wCoABm/UD8Yfxy+/L6RPvA+sH5UvkW+jr8H/6V/pr+gP6K/Ub8sPvv+q35wvmu+778xfvw+oL8nf91AZMBdgHDAOz+i/0c/WT8B/zb/ED96/u8+mH6ivlD+S37a/1Q/igA0AKIAtsBrwj9FKAaIBbUDzILLgX4/uH72ftW/fP97vqF9XPzK/hoAP8HPA7wEqwTLQ+BCGMD2QABAd4BFQAy/PD4O/bU83nzHvdd/d4BSgPbAwUEZANeA2MEaAakCdULeQo9BssBo/7t+0r5+Pj1+rH7V/kQ9lj0uPS69pL5vvxxAHIETwdfB3IE6QD3/mv9aPst+vf5ovms+OP3JviX+KP4tPnK+9v8mfyo+zr66vg6+JT41Pod/q8AQgO7Bb4F2gMVBBcKpRWHHs4aVA8HBzcBR/g28ArxWflP/+39Jfq5+Sv83/8uBmEPqxecGpAXbxCnBxQAt/u3+Zv3x/TH8S3vFO6570n0I/viAnQJ3g0dEPcOYgsxCmEMDw2tCTAEYP+C/D/6VfeO9fv2TvpC/Dj7Bvnm+Ef7xv0//xwAtABQAQAC5AEgACD+Zf0w/TX8kvry+BX4+PeY92v3TfgB+db4Z/gG+Pj2A/Xp84X0qvXI+Ef+egIeBsUKMQxADDsUiCF8JOAYXAuxAZz3eu0Q6s7vy/jE/UL9pPvC/C4AJQRHCpATORsyHJ4W6wwMAwb98/kK98j1gvZD9eDxkvD78qr3UP3LAkUHCAryCbMI2AmbDDQNJQtnCAQGzwO+ANj8YvrI+sz7gfov9+f0Bfad+A760/t4/x4DAgWxBdsFmwSYAcH9Dvqy9uPzBvIs8qb0vvfW+uX9uf/6/2n+0vrl9qfzsPCa73jwcPI89lb60v3sAbYF/gxnHrEvuC7qH3IS0wa4+T3vtevV77D2oPim9EbxxvPM+pkCugtBFzIgpCCmGQMQpwb6/8/8y/pu+Hf2mPTi8XPvXu8f8xb6yQF4B/4JuAp2CyUMUwtZCfsHzAcVB98D4/7f+lb5oPmm+X74hviB+93+KP8y/YP8ev7yAAsC8AKjBPQEcwLa/dn3IvL97ovuOfCl81P4yP0iAdAAMQA9ADH+XPr29RjxOe7a7sLwj/I69mL8pQH3Ba0Q8SJDLokoABwmEmkHP/tu9PLzHva6+Jb3SvEO7Y/wQvjJAIcMuxnzIKwfsRjSDlQFlf9t/S38LPuS+mD4x/OV7/7tku/U9CP87wGhBXYJ3QxCDcAL+QsnDRAMkgjCAyb+zPk9+Nz34PZ89mz4Kvuz+/r6tPw5Ac0EcAWlBD8E/AMvAsL+XPvs+Er2dvKf7r7ty/Bv9Y76QwCIBMMFGQTB/8n5hvQK8Z/vmvDS8pP01/X197L6sP3bBeAY3i01MlwmIhoyD7YA2/Qu8S/zoPc/+qz2rfC27wz0O/pzA9MP/hnaHZYbThQvCp0BEP3f++T80/1j/OL4xvSw8CXuoO9L9br72QDkBWcKbgt0Cb8Ivgl6CjELwwu2CZ4E8v7w+R71PvIJ9Nz4Hvxe/OL7efww/ZH9XP+oA78IRQsMCfoCDvz99erwbe3I7Sbycvf7+2//qgDd/9D+XP3C+gL4mfVx8zHyiPLO8631+Pgj/Cr+YgScFDQnzSs6Ik8YGg/XAUr28/ET8373qPtq+pv1lfN59XX4CP4tCPASDxlUGUUUZQuFAsr8AvuI/EL/PwBD/qj6uval8v3vUvEE9r77fALnCG4LMgo9CUEJOwhxB98IZQpyCeQGbAMD/uP3bvTC9L/2e/iP+rD93f+2/yH/VQD6AmgFQwYsBfACe//Z+ejyOu5a7rfxJvbv+iL/KAJvA50BQv3V+JT1XPOy8XfxV/Oi9cT3zflt+mb98QqhHQIlhyBLG4MUpghE/XP38fb1+WH9yPxE+IX00fPY9IT4wQBACx8T3BVzEwQNGAXQ/mj8tP3RAGUDdAPtAIb8nvZq8ZLwnvSC+jYAvAV4CVIJ6gZsBV4FLgZzCD4L+QtIChAHBALe+7/3hPdh+YX6svoi/Jb+YP9//tX+UgH7A9gEVQN5AJv9cvqH9WfwOu8/8gr2mPlG/n4D4gZyB78Eqv4P91vwseuS6vPtmfKT9ZL4EPyq/Lj9cQh7GJggzh8GHeEWDQsI/7X4jfdj+Fr6Gfz9+i74Avdt9+r4Vv6OBwIPuBE/EX8NeAZwAGD+Rf+TAbkDewRbA2D/lPhR8kvwk/K19jn8lQOACYEKrAhWBw8GwAT0BYoJdwtvCrwIHAaPAAX7k/nc+mr73Ps3/lYACP+C/Hn8Nv5i/+D/TAA4AGv/a/25+dP1N/QN9Z72vfgK/aQCGwYBBhsDK/3u9Nft7ulV6X/sk/K/+HX9ZQAdAJ7+DwQzEZoaSBsbGUwVJAyJAN35BPkF+kf8/v4p/9L9Mf3w+zD7C/8gBs4K9gt+C+UIBQR0/3n9jf4iAkIFPAXqAmD/1Pmq8xbxu/P0+Lz+EAVkCbwIuQVZBDIE3AO8BBkHigjwB74FGgI6/on8TP1n/t/+nv8WATkCzAEBAMT+ef/3AIAAQf3z+fv4qvhB99b2ifjG+Z35JvrH/HcAQgOmBDkE7P+l96TvJevz6dzrH/DU9DX6jgAFAxsA8wE4D7AcBB5mGF8T5Av7AMT4rfXJ9kH8TgIaA5UAGABaAJv+WP4hAwAKQA63DqoLwwWc/xf8Evwh/gcAFQG6AaEARPyk9pDzZPQs+Hj9pwIhBgwHpgUcA98AfQDxAgoH4QnyCToIOwWsADv91/3WAMsCRwMnA4EB6v2U+uv5lPt3/Qj+ff2t/Ib7yfko+F73RfiE+1r/pgE3A/oECwXtAbb9F/pq9ePupOk+6ETq0O5+9Nb5OP/CBMIGogMxAzkNuxmiGqcSzwxuB//9sfaN9jr6Xv+eBSEJaQeYA8QA8/1c/Nb/iwbRCrMLXAomBl8AZ/xC/Oj/gQQcBmUEKwG0+53zeu0N7vbzQvswA1EKWAz+CLkEXwGK/tP+iwNMCAAJLwedBEIAGPxz/EgBFAZ4CEAJTwc5Aen5RPUk9LX1tPn//hgCcQEs/0f8zPhW9/T4mvsU/qcA5QLaA58CX//g+rL1zPDM7aDt5O4+8Mnz6/nJ/uUBoAWxBtAChQJfC4YTYRIWDRcJtwMr/df6If0JAJcDmQijCosHjwNMAEb8LPr+/FcC0gbOCcgK2AjqBLIBLABh/xT/Nv+2/gD9C/oH9lDzhfR5+FP8uwAUBgcJ3wcFBXACUQAAAC0C5AQrBo4GfQZpBEUA5P2w/x8D1gQtBCkCW/96+yb3DPUy96X7Lv9eAY4ChAEh/ur68PkV+5X9mAAhAisBIP9r/C34cPRr8zvzyPGn8B7yEvXH92T7fgDJBNQGDAaPAR79Kf9TBiIKGgnhCDMJ/wW+AQoBfwMlB48KewrMBQMAl/tI+DX3rvq0AX8IkAxVDQYLRQf7A/8BPgEKAToAOP6Y+wv4bvOC8BjzMfoOATIFwwcACdYGCQI8/qn9RQDXBD0I9QcMBj8FsgP9/wz+WgD6AhEDUgJiAcv+g/tB+iD7Q/wM/Tn+u/+UAJ0ARABU/wH+vf1P/mL9UPtw+pf6FPr8+ND3nPYB9pT2Ifj9+W774fu1+wT8k/zz+2T6efq3+8b63Pna/n4H6AsLC8gJTgl+CFMIxAmCCx0MuwqkBjoA3fmM9Tj0G/dX/e4DMglkDCUM3wj7BDIC7gAAAZgBBAHK/vT7Cfkx9gr1pff9/IQB3wP7BIwE4gGc/ir9Qf6uAYwGLApkCq4IYQexBUgCTf8u/14AZABN/1L+iv3B/FL8UPyS/GL9I//hACsBjwCsAOoAXP+2/Cb7HvuT+9/7qfuz+rL4BPYj9Fr0Lfap+AL8if8+AY4AwP6e/AX6Ovi5+Jv64vvI+3f6PvrV/rsGOwzvDjIS3hMmECIKVAbzA/MAd/7V/MD6Gfmj+WP8ywBiBqsLxQ4iDhwKQwXqAAn9n/on+lP6pPpS++T7yftJ/Pf+wgKNBeYGNQb1Aov/A/4M/X/8c/6BAtoF2wYQBhoFMwW6BcAE5gF8/0P/Mv9E/ZD7Efxp/SX+sf7T/lL9Vvtj+0r9HP/BAPIBuAA6/e75yff59vD33Pn4+hb77Prx+e335vZF+D37xv7IAR0DtAFj/jj7uvgo95z3kflb+vf6DwBWCXgQfhEWD5QMPglSBDsAa/+XAZUEiAWPA6cAnP7e/Wz/tANoCCELkAs+CRsET/4S+p738PaA+HX7VP73AN0C+QLMASUBhwEvApcC0QLWAq0CQgKiAVIBqAFLArUC7QIcA2IDzgMBBM8DfQOhAmYAiv11+zP6RvlN+bv6fvyj/d7+vgAJApMBJQAh/63+0P1P/Ib6Avnn97b2B/XV86b0l/dz+zX/jQLEBLkE6wJJAWcAgP+K/hP+9/xo+bL0qvLm8ofybvQn/QkJShAxESQPrQw4Cn8Iywc/BycHCwgyBwsDTv9j/vf9gv1r/6cC4AK+/7L8g/vB++/80P6FAY0EiQYJBwYHXgZbBLIBtv+m/l3+1v6t/+f/M/+F/gr/DQHYA2oGhggJCqoJrAagAhr/WPzZ+mj7AP11/ab8Uvyb/E38Bfw1/Vn/cgGYAyUFwQTmAqwArf16+V/10vGw7lntGe/i8qL3Sv32AjwHqQn5CecHwwRTAo8ALf4n+uz0ue/D6+rp/epL8GD6jQZbEK4VoBaRE4wO9wkjBnwDngMRBVAEXAGo/gf9qfxD/mcBAAWECPIJSgcGAl79Q/qL+DT5vvxXAS0F2QfwCAwIbgYSBZoDSAImAowCzgEZAOf+uf2K+0b6SvxkAHgEdQhJDNMNGgyfCGYEGP/y+QX3Nvbs9Qf2XfcK+in9VADVA+gGogetBUAC/f2S+cj2MvZ69oj2z/af97v4ffoX/ST/9/9QAFQAh/9g/s798v0i/i3+Gv6a/Q79gP3M/t7/QwD7/2/+iPtC+Cr2kfVg9kf5M/5WAwkHsAhNCD4HMgjPDPQSDhZPFM4PWwpBAxP7yvSw8kX08vf++8v+7f+2AOIBOgOHBZAJqA02Dy4Nmwj5Ag/9rvci9E3z2vU4+1kBkAb3CrENqgz+B1MDhgEWATT/uPyv+xH7Yfh59K7yz/SM+U7/HQWZCaQLKQs8COcC3/wM+VP42vi3+BL4avcX96T3JPlC+1b+LwJCBUAGGQUxAjb+ffoM+Db2FfW19nr7CwDoAfIC3QRYBT8DaQEiARsAcP3R+TD1WfBa7i/wCPTR+Y4CkQsfEWQTpxRCFQQUzBA5DAcH7QEN/eb3RPOy8dnzpvcn/F4CCQmNDScPPA5lCiYE6/0J+sT4wflU/BD/oQACAaIAKgC4APYCJgaWCNMIMQb0ALb6hvUT84jzffYX+9b/ZgOiBbwGEQdWB1sHvQb+BSgFfwOmAAT96vgf9a7yyfHR8RPzCfb8+Rz+WAIABscHmAd/BlcEeAC0+6j3ZfRy8f/vc/FH9RT6gf9nBU0K2wwnDckLqgg2BE//MPpj9JruE+sS62fuz/WbAaMNRxR8FQUUMBAoCo0FggQ8BOEBZP6u+tT12PA1713ydPnUAyUPQhajFqkSmQyVBID8Hfiv+Mz7Pv/LAT8C1P8w/NX5RPmV+qb+QgQeCIEImwbSA18AWP3e/Mb+FgEMA2EEggNNANL8kfrQ+fP67v0yAb4CowKoAS//V/uS+F/4Dvp2/BH/IAHqAYABXABM/g38Zfto/D39lv0f/uj96/uY+eX46Pkc/FH/mgJzBGcEbAOYAc/+SfxP+3v7SPxW/cj9DP0F/PX7vfxY/fn9Tf/cAGQBaQCS/pn9jf7NAHgDaAYyCQwLHAsaCWoFagFK/ln8f/sF/Or9ggDOArUEPgbbBgkGlQTdAycERARMA10Bd/+z/Zb7yPnn+eT7Z/73AIwD/QRFBDACUACE/mf8M/vx+5j9Qf8IAbgCkAMOBHwFqgf6CIEIaAZaA+P/avwO+Xj2sPVE99f5mvt6/KL9x/7B/ln+Ev+PAHUBbwEEARAARf4g/I/6vvnj+br6OPux+mr6f/uo/Gj8P/xz/rYBzwKYAXoAXwAgAKv/6f/wAMQBvQG8AKT/yP9rAeQCVgPIA4EEhQNkAJ79vfwa/Wf+MgGeBBYH7AgyCxkMfwmtBbQDkAJhAEj+o/2l/a78QPtg+tL5uvlz+6H+6gEJBWEIGQvLC1IKpgcVBMT/t/va+Lb2QfUG9Qn2YPef+Fn6Bv2v/4cBFAOrBCgFaASsA8gC/f+g+yf48vY396b4cPvU/pkBbwNlBHkEPQSUBOEEFASTAm0BLAA2/uD8nP1b/zQANgDIAPMByAIhA70DXATDA7cB3P8F/zD+j/xp++z7Wf0//nj+l/5k/9gAvQGfARACAASrBb0EBgIwAHr/J/5s/Pz7Tf0B/1EAXwEaAjQCgAJKAyYD0gHnAOMAwgA7ABwAeQCdAAIA5v6z/Sb9Nf38/BH8N/ue+ob5PPgx+FX5bvpZ++T8av4b/93/KAJLBVYH9wcOCHEHgwWvAn0Az/8xAKoAPQEuAjADxAMXBHMEaASbA8gCnAKIAsIBwwAlAI//iv6V/S39Ov12/Q3+1v4x/w7/Kf+D/4f/Lf8B/9b+QP6o/cz9I/7s/WX9V/32/ZX+y/5I/08APgF5ASsBAgFYAa4BjQH4AEsAyv9X/8P+Qv4g/nj+4v7h/r3+Mv/0/zoA6/+j/3D/J/8S/7j/jQC6AHcAoQAcAUYBPgGzAScCcAGF/9/9FP2O/GH8Zv2u/0ICKgRnBSAGTAYRBs8FfQW8BIUDTgILAdr/NP8+/5H/PACYASQDegN8AmkB5gBHAAb/wv14/er9/P0q/Qf8Z/s/+8f6K/qP+gz8UP3i/Zr+gv9n/3f+Nv5M/2kAkwA3AND/Nv9k/s79lv2l/Sf+vf6s/hz+AP6H/kP/UwDpAUIDwAOCAwkDZgKBAXUAtP+X/x0AqwDjAAcBkwHqAaIBMQExAZgBFQLKAloDdgMuAxcDQAMkA4YCvAEvAc4ALwA6/5D+hf69/tT+5f4P/x3/3P7A/gH/Mv8F/wT/dP/j/wEA9P/S/9D/IwB4AB4AVv/f/vH+4P5k/g7+af4d/7j/SwD7AIIBsAGKATMB4ACnAF8A8/9x/wr/yv6U/k7+Sf55/oj+lP7N/vL+zf7A/lb////8/9r/gABSAZgBugE1Aq8C+gJjA8UDqwNNAxIDhAIqAeH/nP/4/7H/5/6j/v/+Zf8BACoBUAK2ArQCigKnAQsA7/63/kf+Hf1G/Eb8Vvz3+7/7Avxo/N78VP1B/Z78e/wt/ZL9K/01/ZH++f9CAOv/2//A/1f/Gv9W/97/rgB9AZoB5QAUAI3/FP/M/k//zgCAAqsDdQTtBM8ELwStA28DNQP0AvwCxALNAa8AMwAvACwAcQBmAXIC0gLAAtsC7QKaAhoC5wH0ARoCOgIzApwBWwDb/pb9t/yL/E79kv51/2X/mv6s/ez8iPya/Cf99/2p/sP+IP71/Bz8OPzd/HP98f1y/rP+Nv4a/Tj8MfwD/ST+//6A/8v/sP8a/1D+yf2m/cL9QP46/4EAmAFXArUCnAL3AfgAEQDP/1kAZAFnAhgDagOWA3cD2wICAk4BBAEPAU0BtAExApgC4QImA24DyQMjBEoE6AMFA+wB7wA8AAAAMAB+AIkARgDt/5j/M//J/p7+rv6m/mL+2P0Z/UT8jfsf+zD74fsO/VH+K/9//1H/sP6z/aX83/vF+3/8lv03/kr+cP7a/vP+if5U/tj+sf81AFQAZACgAOcAIAEyAT0BfQEBAk4CNgIKAicCTAI1AicCQAIyAtsBgwGSAboBuQHAAcIB0AG0AYcBUQFBAV0BtQECAuUBQwFnAMj/VP/h/oT+zf60/7UAZAGuAYwB9AANAB//jv6l/l7/LACbALcAdADL/+v+YP42/jD+Rv6V/rf+I/5s/Sb93vwy/PX7pPxT/V39If1M/c79Cf4p/lH+gf7h/hj/8P51/ib+df6O/p7+Rv9eAFsBLAIrA5QD/wIxAs4BdwH0AP4A6AHwAmcDMAO9ApECrgLTAuYCCQNlA6EDIAMHAikBwgAwAJ3/X/94/+T/DwACALH/Yv8X/2D+rf15/cH9V/5B/8v/CgAGABAA+/+b/2X/N//u/oL+8f0t/c78PP37/Xj+rf6s//oAowGBAYoA4/+X/yb/Vv5b/Sb9ov34/Qb+9v0C/qH+Zv9hAEcBRgIKA1MDyAKwAeoAiADHALUAjwBSAA4AHgAFAJEA+gAJAs8CfwPFAxIDOQKOAdsAlQByAAEBSAG9AY4B6QBhAPL/rv+O/zf/bv8H/0X/Hf/8/hX/IP9i/zD/L/8q/0H/cv9R/33/aP9//6D/jP+R/87/sf/b/5z/zP+4/93/yv+v/8H/tP/T/8v/qf/y/5r/CgC1//D/3//l/+X/2f/o//b/8v8EAOn/EwD8//z/9f/7/+7/EAAPABcABQALABUA+v8NAPn/BQABAAYA9P8EAPn/BgDr/w4A5P8TAPv/AAD///7////1/wUA6f8eAO7/BgD9/9r/FQDS/xIA4v/+/wwA6f8XAML/KwDB/wgA6/8BAOT/BADi/wkA6f8HAOX/BwD+/+b/HgDO/xUA7f/n//3/5f8EAO//9v/z//7/8f/5//n/7f8PANv/EwDr/woA6f8PAOP/FADm/woA7v8EAPX/CQDz/xMA4P8aAOX/CQDn/xAA8P8KAPX/DADo/xAA6/8OAPH/DwDq/woA8f/1/wUA5f8SAOD/DQD1/+//EQDn/wQADgDu/xMA+//w/x8A4v8VAO//DgD7//3/AgDp/x4A3v8hAO3/BwD8////AQAIAPL/FwDo/x8A9/8HAA4A7f8hAOX/JQDa/xkA6v8JAPf/DgDk/xUA9P8HABsA5/8eAPH/BwAAAP//BQABAPf//P/5//7/9//+//P//v/5//X//P8JAOX/FADm/wgA/v8DAPH/CgDm/wsA8f/5//7/6f/+/+//BQDg/wcA5v/5//X/AgDu/wAA7P8GAOD/GwDk/wkA/v/2/xQA9/8GABIA/P8IAAQA9f8ZAOX/GgDt/wMA///z/wUA+////wcA9P8MAP///v8BAAYA8P8VAOn/CQD//wEA6/8OAN//EQAAAOf/HADb/xkA7v8JAO7/CQD+/wAACgD2/xYA6v8fAOz/GAD0/wYA7/8IAO3//v/5/+7/AwD2/wYA/f/9/wIABQD6/wQAAAD9/wQABAD5/wUACQDu/xAA7f8NAOn/FgDp/xIA+f/x/xwA5v8UAPn/BAD8/wYAAAD+/woA+v8IAAEABAABAAkA/f8FAAEA/v8CAPv//P8CAPT/AQDz/wUA6/8JAPb/BQD0/xAA6P8OAOj//v/2//r/8f8GAOj/AQDy//b/8/8FAOr/DQDd/w8A9//6/wUA6/8LAPb/9v8DAPP/BAD//+f/DgDm/xMA4P8TAN3/DwDs//n//P/y//r/9P/p/wcA7v8QAOb/EgD4/wcA8/8UANf/KADU/x4A7f8MAPj/AgDz/xEA6/8RAPP/DAAHAAsA7/8eAOH/HQD0/wcABADy/wgA9f8QAO7/DAD0/wkA7v8UAO//BgDv//r/AQD///3////8//3/AgDw/xMA5f8SAPD/FwDu/xUA6v8IAP7/+/8GAPf//P/9//7/8P8TAOX/EwDp//r/CQDk/wwA+v/r/xMA3v8ZAN3/FADa/w0A8v/3////6//w////6P8IAOr/9P8HAOr/CQD6//H/CgDd/xMA5v8BAPX//f/w/wEA+v/z/wAA///t/wUA/f/p/xEA8P/6////7f/9//7/7/8EAO7/9/8CANv/EwDm/wkA9v/w/wQA/P8BAPz/AwAAAP7/FQDr/xEA+P/z/yAA1f8fAO//BgD///j/BAAJAAIA+f8HAOr/EQDt//P/BADl/wgA8f/5/wwA7P8IAAMA/v8DAPn/AwD5/wIA+/8BAAAA8/8MAO3//v8FAPn/AQAGAPT/CADi/x0A1P8VAOz/+v/4//v/7f////X/9v8DAOP/BgDd/wQA4//u//f/7P/x//7/5P8LAO//AQD7//X/BADv//f/BwDg/xcA4v8EAPb/+v/x/wEA7f8BAPz/9f8HAO3/CwD2//v/BQDx/wUA9v/4//r//v/1//D/AgDs/wIA7v8DAOz///8FAOj/DwDq//7//v/3/wAA7P8TAN7/FAD1/wIACAAGAPv/CAD+/+7/GwDi/x4A8P8NAPf//v/+/wIAAQD1/woA4v8QAO///P/+//n////1/wgA8v8NAPD/BQD0//n//f////b/AQD6//n/CwDp/x4A4/8YAO3/BAD9/wAA//8EAAIAAAD0/wkA4f8FAO//8//2//X/8v/1//f/6//5/+z/BADl/wMA9v/4//7/7//8/+v/DwDb/xUA1P8WAOn/CwDq/wYA+f/r/xEA2/8WAOv/+//4//3/+/8MAOP/GQDl/woA7/8EAO//AgD4//P//f8HAOz/DwDn/wMAAgDu/wUA+P/6/w0A8/8MAO//EADq/xwA4/8ZAO3/DQD7/wAACADu/xkA5v8VAPH/DAAFAPH/DwDs/xAAAgDx/w0A8P8DAAAA/v/z/wUA7P8GAPL/9f/8/+//BgDq/wIA8v8AAPn/+P/0/wwA7f8IAPz//P8JAOb/DwDu/w4A9P/5//3//P/8//z/AQD4/wUA9P8IAPf/BAABAPn/CADz/w4A6f8QAOD/BwD1/+///v/x//j/8v8CAOf/DwDw//z//v/r/xAA4P8MAPn/8v8VAN//JQDx/woA/f8OAAQA/v/9/wIA9/8IAPf/CAD5//n/BQD4//j/DgDx/wkAAgD4/wEA7v8HAPH/AQD9//D/BAD3//v/+P8KAPb/DgDs/xAA8/8LAPr/AwAGAPv/CQD+/wYAAAAGAPX/FADm/xYA6v8BAPv/6P8WANz/GQDY/w8A9v/u/xIA5P8bAOP/CQDu/xUA5P8QAOr/BgD2/wIA9f/+//X/BQDi/xUA5/8AAAcA5/8QAPD/AAAIAPX/AgD9////9f8DAPz/9f8NAPD/BgD7//v/CQDr/wwA6P/+////9//3/wIA7/8KAO//AQACAAEA/f/+//j///8CAPH/EgDn/xkA5v8RAO3/CgADAPP/BQD1//7/BAD0/wgA9v/5/wgA7/8QAPT/AQD9//X/BwDy/wUA9/8HAO//DQDv//3/9P8CAPj/+//6/wEAAAD///f/AAD8/wUABQADAAcA8v8ZAOL/FQDv//j/DQDh/w0A8P8CAPj/BADm/xYA4P8OAOz/BQD3/wAADADs/xAA5v8JAPb/AQD+/+3/CADt/xIA7P8EAPv/+/8QAO7/GADk/xkA7/8EAAIA+P8FAAQA8/8HAO7/DgDj/xQA2/8RAO3/+P8DAOv//f/4//j/AQD1//n//v/r/xgA4f8KAOn/EgDm/xMA5/8TAPX/BQD4/wwA8f8OAPH/CQDz/wwA6P8PAOz/BQD8//f/AQD6//r/9v8OAOH/EgDl/wUA///x/wQA8P8NAOj/FgDl/wwA7/8BAP3/9//9//T/9//7//z//v/8//X/AQDp/woA9v/0/wwA3P8RAO7//P8QAOj/BAD7//b/AQD8//T//f8KAOn/EwDn/xAA+P/+/wEA9v8KAOr/FADc/xkA4f8TAOr/CgDv/w0A6P8QAOz/BAAGAPX/CQDy//v/AgDw/wUA8//5//j/AgDy////+P////z/8f8HAOr/EgDi/w8A6v8AAP3/7v8IAPL//P8AAPj/+v8EAOb/DgDo//3/BQDp/x0A1/8aAO7/CgD+//X/FADk/wsAAgDv/xkA4f8VAOb/DgD1/wAA/P/v/wkA8/8CAPv/+v/6//7/AAD+//3/+P////H/AQD7//L/+f8DAOL/EQDm/wQA9v/2/wQA5P8TAN3/BwDu////+P/5//z/9P/9/+v/DgDo/xEA5f8EAPX/9v8GAOb/DQDr//3/9v/0//P/AwDm/wcA5/8KAOz/AgAAAOb/GADZ/xsA6/8IAPL/AgDr/woA8v///wkA9P8NAPH/DQDv/xAA/f///wsA8f8eAPb/CAD7/wcA+f8IAPf/CgD///j/BADp/wkA7f8BAO3/BQDe/xoA4P8SAPD/AgD5//v/BgDx/xQA4/8VAPn/+P8FAPT/9/8GAPf///8AAOf/FADr/xMA6v8JAPv/AQD8/wMA9v8JAPT/AwD1//z/BwDq/w0A5/8IAPb/7v8IAN//CgDt/wAA8/8FAO7/BAACAPb/AAD5//z/+v8GAOb/CwDm/xIA4P8MAOz/BwAFAOr/DgDv/w4A7v8BAP3/9f8GAOj/AgD3//r/9//9//T//f/x/wIA8v/5/wIA5P8OAO3/BwDx//v//v/6/wAAAgDx//7/9/8LAP3/8/8KAOf/GQDi/xkA5v8QAPH/+v/8//j/EwDp/w4A6/8QAPb/DwD1/woA+P8CAP7/+v8KAPX/EADo/w8A+P8BAAIA/P/6/wYA+f///wcA7P8UAO//CAD7//3/BgD+/wAA+P8KAOX/JQDi/xkA6/8EAAIA8f8GAPb/9P8RAOf/FADw//z/CQD3/wEAAQD5//r/AQD4//3/9/8CAOn/EwDp/w4A6v8TAOr/FgDn/wkA9//9/wAA/P/9/wEA9v8FAPP//f/x/wkA6v8CAPf/8/8EAOz/CQDo/xIA4f8QAPD//f/6/wIA8v8KAPH/AgD+////+P8KAOz/EQDk/w4A9v/7/wAA/P/z/wQA/f/3/xEA6f8WAOj/EgDw//3/AQDx/w4A8//+//X/AwD4////+//1/wsA7P8LAP3//f8GAPb/DgD4////BgD+/wgACADv/x8A9/8IAAsA+f8OAPz/BgD8/wMA/v8BAP7/7/8JAPL/BwD8/wQA+v8GAPP/CAD5/wIA/v/7/wcAAgD6/wYA8/8OAOn/GgDc/yAA6/8NAO//DADp/xAA6v8KAO3/CQDq/w4A7P8IAO7//v/1/wcA5P8TANf/FQDc/w8A6v/6//z/6f8LAOf/AAD3//L/CADx/wcA+v/7//n/8/8RAOL/GgDh/w4A5P8UAOr/CwDq/xEA9//6//3/AwD7/w4A8f8SAPv/DADz/wsAAAD+/wYA6/8UAOj/AQDz/wAA9v8FAOH/DQDm/yAA3f8aAOL/CgAAAPf/BwDx/xAA+P8JAAAA9f8ZAPT/EwD0/wMAAgABAAoA+v8BAAYA+f8HAP7/AQAHAPb/CgDw/xIA6P8UAOj/EADu/wMA+f8EAO3/DADk/x8A2P8UAOb/BQDx/wsA7P8QAOX/EwDr/xUA9/8DAP/////5/wcA8P8GAP7/5f8RANz/CAD2/+3/AQDx//r/9f/9/+b/DADl/wEA7v/8/+3/BwDj//v/8v/3//z/+P/x////7/8DAOv//v/3//v//v/z/wAA9P/8//7/+//9////+f/4/wYA4P8dAN7/DADz//r/DAD3/wMABwD2/xYA7P8LAP//BwD2/wwA+/8HAAQA///9/wwA9P8NAP//CQACAP7/EAD8/wsAAgAEAAYAAgADAAUA/v8LAPn/AwAMAPP/GAD0/wUADwDy/xUA7/8CAP7////8//b//P/s/wYA6/8IAOf/DADo//z/9//h/w8A2/8QAOb/BgDm/wkA4f8YANj/DwD3//f/BADx/w4A7f8QAOL/EwDl/xkA5f8NAOz/BQDy/wcA+v/2/xkA2v8fAO3//v8JAPP/AgD+//b//P8KAOn/GQDk/xsA4/8XAOP/EwDr/wEACQDq/xUA6P8NAPL/BQD1//z//P/2/wQA9v/7//3/+//r/xYA2/8UAPD//f8DAPP/AwDx/wgA7P8KAO7/CADz/wIA8v8EAAEA+v8IAPL/EgDg/xsA6f8HAPv/9/8IAPj/BQD2/wAA+//9/wgA7f8KAPv/AAAHAPz/AAD+/wMA7/8SAPX/AwACAPr/BAAAAAgA/P8NAPz/CAACABQA+v8JAAYAAQAIAAsA9f8UAPv/AgACAP7/BwDx/w0A6P8XAPD//f8GAO//FQDn/xQA8f8DAAAA8P8MAPT/AAD8/wQA+P////z/AAD1/woA5v8MAOv/AwD1/wMA8f8IAO3/BwD0/wsA9v8AAP7/8v8TAOH/FQDs//z/BwDi/xsA3P8ZAOX/BQD2//7//v////L/BwDy////AgDy/wIA+//5/wYAAQDw/wMAAAD0//7/9//0//v/AADs/xIA5P8GAO//CgDp/xAA8//6/xQA2f8fAN3/FQDr/wAABgDq/w8A7P8FAPj//P/3/wYA7f8GAPj/9f8JAPv/8f8RAOH/CwD9//j/BAD4/wQA/P/6/wcA9P8FAPr//v8JAO7/CwACAPn/CwD8//7/FQDw/xAA8P8QAPP/CgDy/wwA8f8PAPD/DAD0/wMA9v8HAPb/CQDs/wAA9v/8//7/9f/8//T/CADm/wgA7/8PAO7/EwDq/xgA7v8RAAAA/P8PAOL/IgDb/yAA4P8OAPT/AgD5//3/AQD7//7/9f/3//3/7v/9//D/BwDo/wsA4f8IAOr/EQDg/xAA3P8UAOj/BwDr/wAA6/8GAOv//v/+//T/BgDn/xAA6f8MAPr/AAD///r/BQDp/wQA7v8JAO//9/8EAOj/BADx//P//P/7//7/DADy/wsA9P8RAPv/+/8OAOz/GQDp/w0A8v8IAP7/9v8KAPD/+/8JAOz/CgD+/wAAAQDy////9f/3/wcA7v/5//r/9/8HAO7/BQD6//3/BQD0/wQA9/8BAP3/9/////P/DgDm/xMA6/8GAAIA6f8SAOf/DADz//v/BwDu/w4A4v8gAOD/EAD8//P/DwD0/wsA+P8NAPP/EQDw/wsAAAAGAAAA9/8QAOH/JgDP/yYA5f/+//z/8f/9//j/8P8EAOr/EgDe/w0A7f8BAPv/8v/2//7/9v/z//f/9v/4//v/+//v//X/+//9//n//v/1//T/AgD1/wEA/f/x/wkA3/8WANj/IgDY/w4A6P8PAOf/FgDs/wkA/P/w/wMA/f/7/wYA+P/7/wgA+P/+/wgA9/8RAPr//P8NAPr/CgD///P/DQD1/wMABQDu/wcA9P8BAPX/BQD5/wkA9f8DAPr/+v////n/+v8AAPf/+f8EAOf/DgDo/wcA6//8//P/6f8PANb/EgDn/wEA7v8JAPf/BwDu/w8A8/8AAPz/+v/9//7//f8CAPf/DADu/wwA9f/+/wAABgD1/wsA8v8JAAIAAwAIAP3/CgDz/xIA8f8GAPz/AQD7//3/AQD+/wIA8v8EAP3/+v/7//b/AAD+//b/AgD0//r/8v/8/wUA6/8EAPL/8P8HANv/CgDi//r/AgDg/wwA4f8JAOT/DADg/xoA2v8YAO7/EQDv/wYA9f8GAPf/AwD7/wEA/P/4/xEA8v8LAAgA+v8fAOz/GwDq/yIA7P8eAOf/DAAEAAEADADx/xIA+v8QAPj/BwACAPr/CQD+/wIAAgD8//P/DwDq/wYA7/8EAPD/CwD2//L/BwDu//j/AADt////9v/1//P/AADh/wEA6v/8//L/8v/7//j/6f8GAOT/CQDx//n//P/v//7/7f8GAOr/BADy/wcA+f/7/wAAAgD9/w0A7P8SAO7/FQDv/x4A8f8FABcA7v8RAPP/EwD2/xMA7/8QAAAABQAGAP7/BwD//w0A+/8HAP7//v8EAAAA+/8JAPH/FQDp/xIA7/8EAP3/9/8FAPf/9/8PAOP/EwDw/wIA//8FAPH/CADu/wUA8v8IAOv/BQDz//P/9P/5//f/+P/2/+f/CQDg/wcA7v/7//H/+v/u/wQA8P/9//n/8P/+//n/+P8BAO3//P/5//f/+f////X/AQDu/wIAAQD5/wkA9f8GAPr///8BAPr/BQD2/wMACQD3//z/AgD5//r/DQDg/w0A9v////X/CgDr/wsA/v/0/wQA9/8IAP3/EADi/yIA4P8cAPL/AgAQAPL/DAD4/wMAAAD7/wMA9v8KAPP/DADw/wwA+P8CAAIA9P8KAP7////9//7/BwD3/wcA7v8OAO3/DADy/wgA7/8TAPD/CAD0/xAA7v8RAO7/BQD2/wAA9f/2/wIA7P8DAOn/+P/1//v/4f8IANr/CQDm//f/8f/w//n/+v/u/wAA7f8FAO//+f/8/+7/CQDu//7//P/1/wgA9P8AAPf/+P/2/wEA6/8HAO3/AADx////+f/6/wcA6v8IAOf/AQD//+D/GQDf/xYA6/8DAPn/AgACAAEA+/8BAPz/CwDz/w8A+P/6/w4A8f8UAPP/FADv/w8A+v8BAP3/BwD8/wQA/v/0/wYA7f8IAPL//v8CAPT/BQD5//b/AADu/w0A7P8TAOv//P8FAOv/BQD1//b//P/w/wYA7v8LAO7/EQDl/xcA5f8OAPb//v8HAAIAAAALAPX///8AAPn/BwD0//3/8P/7/wEA8v/4//r/8/8GAPL/AgD0//3/+P/4//X/AgDw/woA6P8JAOT/DQDi/wwA6//+//b/+v/2//v/9f/7//v/+//1/wYA8v/7/wIA9v8JAOz/CQDk/xAA5v8KAO/////t/wwA3P8SAOj//v/7/+3/+v/3//b//v/6//b/+v////b//v8AAPD/AAD///H/EADl/w4A8v8LAPf/BwD+/wQA+v8IAAAACAD3/w4A3/8fAOL/HgDv//7/AQD1/wMABADr/w0A7P8MAPP/BAAAAAEA/v8EAOv/FwDt/wkA/P8CAP////8AAAAAAwD6/wIAAAD9//3/AgD0/w8A7v8NAPn/AAAGAPX/BwD1//z/AAD9//3/9f/+//L//f/v//3/7P8EAOz/AQD0//v/+//9//z/+//+//n/AgD6/wMA8f8MAOz//v/+//j//v/3//n/7/8JAOD/DQDd/wsA6P8KAPH//f/5//v/CADt/xAA6/8BAPH////z//f/9f/x//b/8v/4/+//BADh/w0A7//9/wQA6v8NAPX/EQDu/wwA+v8CAPn/AwD7/wUA+/8DAOn/GQDx/wcA/v/5/w0A9/8FAPf/BQDw/xYA6P8ZAOn/EQDz//v/EQDf/yAA4P8IAPr/8/8KAOv/DwD1/wEAAQDz/w8A8v8KAPn/AQAJAPr/DQACAPj/EQDz/xMA8v8EAPj/AwDr/wUA8v/4/wYA7v8MAPH/BQDv/wsAAQD//wkA9P8cAPb/BQAAAPz/BwDz/woA9P8MAPT/+P8FAOv/EADm/w0A6P8OAOr/FQDv/wYAAQD1/wAAAgDo/xYA3f8RAPD/9v/+/+z/9v/8//H/BwDm/w4A4f8UAPP/DAD5/wYA9/8PAP///f8EAPb/DwDn/xEA7P/8/wQA7v8FAO//+v/5//P/AgDu/wcA7f8MAOn/CwD4//z/+f/+//v//f/z/wMA8f/+/wYA4/8NAPP//v8YAOb/GADt/xQA8/8OAPv/DgD6/wgA/f/8/wsA+f//////+//7/////P/q/wYA7f8GAO//BADy//j/BADt/xgA7v8IAPP/AQACAAAA+/8CAPL/BQD7/wMA+v8DAPv/AAACAPn/EADr/w4A+f8BAAoA+f8FAAMA9v8NAO7/EgDp/w0A9f/v/w0A6/8EAP7/+/8BAPX/BQD8/woA/f8DAAwA/f8RAPv/AwAWAOj/HgDp/xIA9P8HAAAA+P8KAOv/FADs/xEA8f8QAPf/EAD8/wQAAwD//wgA/P/+/wEA+P8DAPv/9/8EAO/////3//3/+f/1//3//P8BAPT/AAD7//3//f8GAPr/BgAGAO3/CQDv/wEAAwD5////9P8HAPD/CAD2//T/CgDv/wYA/P/y/xMA6f8UAPT/FADu/xQA8P8NAPr/DADz/wgAAwD7/xAA8P8NAPX/AgADAAMACgD5/wkABAAEAPr/CQDx/xcA8P8OAO7/CQDx/wYA8v/2////+v/9/wEA+v/6/wQA8f/8/wAA6f8XAOD/DgDp/w0A9P8HAP7/+v8FAP3/+v8IAO//BgD3////+v8BAPf/EQD0/w8A+/8MAPv/BwD6/wcA9/8LAPL/CAAAAPT/AgDy/wcA6v8KAPL/BAD8//n/CAD0/wEA/P/8//z//v/8//f//P///+7/CwDx//f/BgDs/w0A5v/8//3/9f8FAPn/6f8PAOD/DQDr//f//P/o/wkA5v/8/+7/+//s//b/9f/l/wcA5f/6/wEA8v/4//3/8v/3/wQA8P8KAPP/BQD5/wkA9P8OAPn/CAD6/wQA/v8BAP3/9P8RAOT/DgDq/w4A7v/6//7/7/8NAOf/AgD6//r//f/0/wgA7v8CAPz/+P8DAP7/6/8MAN//DADy//r////1//b/+P8EAPT/CwDx/wcA+//2/wgA+f8DAAQA6f8WAOL/GwDk/woA9P/4/wQA8P8KAOv//v/+//H/BgD6//r////1/wIAAAADAPn/AwAKAO3/EgDt/wcAAgD2/wIA/P8GAPX/AwD6////AAABAO7/EwDl/xMA7/8IAPH/AAADAOn/CQDz//r//v/0//v/8v/8/+//9f8BAOv//P/1//X/9P/6/+j/CgDi/w8A5/8CAPn/+f/4//3/9v/5/wAA7P8EAPL/+v////L/+v/+//H/9v8IAOX/EQDh/wcA8P8BAPL/AQDq/wkA9P/3/wAA7v8FAPj/8/8MAPL////8/wkA6f8dAOb/FgD2/xQA8v8QAP7/BgALAPb/BQADAPn/BgDo/xIA8f8BAP7/9/8BAAIA+v/7//r/8f8IAOn/DADn/wMA9v/1/wUA8v8LAPX/BgD1/wQA9//5//n//v/+/wUA7/8AAPr/+P8HAPj/+f/9//r/+v////X/AgDt/wwA6v8IAPX/AgD5//7/+P8NAOv/GQDp/xMA9P8GAPD/DADs/w8A8v/9/////P/+////6/8KAPT/AwD9//r//v/7//z/9f/5//j/9f/6//b//P/7//j/AgDy/wkA7v8IAO//BAD4/wYA/f/9////8v/5/wEA9v/6/+//AADx/wMA+f/p/xMA5/8DAAIA8/8BAPv/9f8HAPL/AgD6//j/BwD0/wgA8f8BAPb//f/3/wYA6/8KAPD/BADy/wgA8v8IAPb/AwD///r/CAD4//3/BwD0/wEA///9/wMAAQDx/wUA/f/9/wEA+f8FAPz/+v8AAPz/AAAFAPb/CAD///n/AwD6//7//P/6//z/AgDz//7/8f8EAPP/AwD3//r/+//7//3/AwAAAPv//v8AAAIA/v8HAPT/EQDm/xgA5P8SAO3/BwDt/woA6f8PAPP/CADs/wkA7P8RAOn/BwDx//7//P/8//T/AQDu/wwA5v8NAOr/CAD//+3/FQDi/xUA7P8QAPb/AwD///r//v8FAPX/DQDy/woA8v8DAPH/BgD0/wgA6/8LAPH/BwDt////AgDt/w4A6f8EAP7/9v8BAP///P/+//3/BADx/wMA9P/+//r/9//4//j/8/8CAPP/AwD8//n////x//r//v/9//b/BQDw/xAA7/8GAP7/+/8DAPr//P8DAO3/DwDw/wIAAwD5/wIABgDw/wsA+f8DAP3/AQACAP3/CgD1/wMA+P8BAPj/EADv/xEA7v8LAPz/9/8BAPX/BgD7////AADs/woA8P8GAPj/AwDw/wsA/P8AAAUA7/8GAPj/+f8BAPr/9f8AAPz/+v8IAPP/EgDt/xUA7P8QAO7/BgD1/wUA8//w/wgA7f8NAOn/CQD4//r/AQD3////9v8GAPH/BwD7//3//P8CAPL/DQDo/wsA9/8EAPb/DQD0/wQA+P8DAPz/AADz/wEA///t/wgA6/8HAPX//f/1/wMA/v/q/wUA9P/7/wgA8v/8/wMA7f8JAPL/+f/+//j/AgD3//n/AAD9//H/BQDx/wQA+f/9//T/DgD2/wgA8//9/wAA8f8XAOD/EwDs/wcAAAD3/woA9f8JAPL/EQDu/wsA9//7/wgA+v/5//z/AQD5//v//f/0/wAA///y/wQA9v8CAPn//v8AAPj/CwD5/woAAQD7/wUA8v8IAPX/CQDw/wAA9v/4//3/9v8DAOn/CADw//j//P/+//n/AADy////+/8BAPz/9v8AAPL/AwD+//v/+//u/w4A6P8HAPf/AAAAAPf//P8IAAAACwD0/w8A9P8YAOz/AQAGAPn/CgD9//v/BQDz/wsA8/8AAAYA3f8iANf/FADw//7/9P8BAPv/7P8KAOv/AwDz//n/9v/x//7/7P/+//T/8v8DAOf/CADs/wQA8f8HAO7/BwD5//b/CgDx/wcA9//7/wEA/v/7/wEA9v8JAPr//v8AAPD/DQDq/xIA7f8DAAYA7/8KAPv/AgAFAPb/CAAHAPb/DQDz/wYABwD1/xAA7/8JAAAA///7/wMA/v/6/woA8P8JAPP/BAD4//j/AADq/wAA9P/l/wYA4v8AAO3/8//8/+3/AQDw//j/7v/5//D/+v/2//D/BADt/wMA8//8/wYA/P/7/wYA8P8DAP//8/8GAPj/BQD8//7////6/wUACQD0/wQA+f8FAP3/BgDq/wwA/P/7/wMA+/8HAPn////2/xIA6f8bAOP/HgDn/wkA7/8GAO7/CADy//n/CADu/wAA+v/t//v//P/1//v/9v/z//f/+P/p/wEA6v/w//7/5f8FAPL/+f8AAPP/9v8HAOj/FgDm/w8A+/8EAAEA+v/9/wAAAQD4/wYA7/8DAPT/DgDx/w8A+P8EAAIA///+/wEABAD3/wEA+P/7/wUA9/////3/+P8IAO//AwD4//n////6//X/CADs//r/+//1////8//6//L////y/wIA7f/+/+z/AQD3//j/+f/u//7//f/q/wwA4P8KAO3/+P/0/+/////q/wYA6//4/wAA6P8PAOr/CwDw/wIA+/8DAP//AgD6/xAA8/8WAPT/EwDw/xMAAwABAAkAAgAAAAgA+f8VAP3///8NAOf/IADq/xQA+v/9/wYA/f8BAAoA9f8GAPr/8/8CAPP////0//z/9v/4//z/8/8BAPD/+//4//X/+//z//3/8//3//T/8v8FAOz/AgDs/wIA8//1/wgA5v8KAOv//f/1//j/6f8CAPD//P/0//f/+//0/wcA5/8IAPL/9//+/+7////y/wIA6f8LAO7/CgD///r/BwAAAPz/CAD8/wwA9v8FAAYABwAJAPj/EQD7/wgA//8KAPr///8IAPf/EADv/w0A8P8KAAEA9P8WAOP/FQD8//r/DQD4//v/DQD0/wsA9P8OAO7/AwD2/woA8P8MAPX//P8NAPT/BgACAAYABwD4/wMA+P8BAAIA8P8LAPH/AwD7//T//v/5//X/CwDy//T/AwDw/wkA8P8FAPj/+/8EAO3/EgDq/xQA4P8fAOj/DwD+//z/AgD8/wEA+/////n/BQDx//b/+v8AAPj/CgDw/w0A6/8MAOz/BwD0//f/CwDr/xoA5P8LAPP/AwD6//j//P/9/wsA9P8BAPr/AQD8/wMA9f8HAPP/DAAAAAMA+/8FAPb//P8LAOb/GgDj/wkA9f/9////AgDv/wkA+f/+//n//P/5/xAA8/8NAPP/FwDp/xUA7P8UAPH/DgDs/woA9v8KAPX/DADs/w0A9//+/wAA/f///wAAAwD8/wAAAQDz/wcA9v/5////9f/9/+n/CADu/wEAAgDZ/yMA4P8PAAMA6f8YAOr/DgD0/wAAAgDw/xIA4f8cAN//FwDr/xEA6/8GAPn/BwAAAPv/AgDx/xYA7/8PAPb/BAAAAP7//P8IAPH/CwDv/wkA7v8GAPr/AAD6//r/8P8aANv/FwDh/wMA/f/6/wYA6P8XAOT/HADx/xQA9f8HAP7/AQACAPn///8CAO//CQDx/wEA///9//D/EADs/wcA/v/1/wYA/P/6/wIA+P8BAAAA+//2/wIA+P8FAPn/CQDw/wcA9v/9/wQA7v8QAPH/CgD4/wUA+/8FAAMA+P8JAPD/AAABAOr/EQDf/wkA5f8LAPL/BAD1/wIAAwD4/wYA9f8HAP3/+P8HAPn/BQD4////+//7//T/BwDj/xEA5f8KAPT/9f8LAOT/DwD0//z/BQDs/wEABQDx/wgA/P/0/xMA7/8IAPz/AwD//+//CADv/wQA+//6//X////6/wQABgD9/woA+P8LAPz/9/8WAOD/JADq/w0A+P8GAPT/DQDx/xEA+//0/wcA/P8BAAcA6v8UAOj/CQDy/wAA+f/7//j/9f8EAPn/+//9//f//v/5//f/+//y/wsA5P/8//b/+v8AAPj//P8FAPD/CgDv/////P/+//r//v8GAO7/DADu/wEA9P8DAOr/DQDv/wgA6v8VAOL/DwDz/wEABAD1/wcA+/8LAO//CwD4//b/CQD1/woA9/8EAPD/FgDp/xcA6v8MAP//9f/+//7/8/8LAO//DQDx/w4A5v8MAO3/CwDz//n/8f8CAO//DgDk/xMA3f8YAOX/CQD3/wEA/v///wIA+v8LAPz/AAACAAMA9f8YAOb/EADy/wgAAgD1/xIA1/8nANb/IADs////9f8FAOr/CgDh/woA7P/9//T/+v/z//3/+f8DAPD//v/8////BAAFAPn/CAD6/xIAAgD9/wwA8v8NAPn/BAD5/wUA8P8CAPT//f/8//7/8//6//X/BwDl/wYA6v/4//b/+P8BAOP/EwDg/w0A/v/+/wAAAADw/wwA8//8/wIA9v/+//f/AAD6/wQA///v/xoA5f8RAPP/+v8LAPn/9v8KAO//BgABAPD/BgDw/w4A8/8LAPL/BgDw/w0A7v8HAPf//v8AAPL/BwD0/wUA8f8JAO//BwDr/wIA9v/0/wcA5v8MAOH/FwDo/xIA7v8PAPz/AAAJAO//EQDz/wAACAD2/wcA8/8FAPP/AgDy/wYA5/8PAOH/EQDn//7//P/u/wUA9v/2/wYA6P8JAPf/+/8BAPn/+v8MAPD/CQD5/wUA+f/1/wcA8//+/wQA9P8BAP3/+f8HAAQAAQAGAPv/BAD+//X/FQDZ/yUA4f8OAPD/BwDq/xIA5v8UAPP/9v////v//f8EAPL/BwD4//v//f8AAPv/AgD0////AgD8//7/+v/6/wMA8f8AAPb/9/8HAOv/8v8AAPP/AAD+//P/CgDt/wwA5v8EAPP/AwD0//z/CADm/xQA5P8FAPD/BgDn/xQA6P8OAOv/EwDt/wEAAQD1/xEA6/8SAPL/FQDr/wkAAADq/xMA6/8RAO7/CwDr/xYA5v8VAOb/EgD3//j/+//0//7////0/wQA9f8LAOT/CgDo/w8A7f/8/+r/BwDs/w4A5f8TAN//HgDm/wcA/v8CAAEAAgD+/wIABgABAPn/BwD+//X/FgDh/xIA7P8IAPv/+P8IAN3/HgDZ/x0A7v/4//f/BQDo/xAA2v8RAOb/BADy//f/+f/5//3////y//7//v8BAP3/CwDz/w0A+P8OAAcA+P8QAO3/EQDz/w8A7P8OAOf/CwDr/wEA9/////L/9P/5/wEA6v8BAOz/9v/9//P/BwDj/xIA5/8IAAUA+f8GAPz/9P8JAPL//v8AAPj////0/wIA+/8FAP//8v8XAO3/CwD5//X/EADw///////5//z/BwDp/wsA7P8JAPj/AQD4//z/+P8DAPT/AAD6//7//v/4/wIA+f8EAPT/BwDz/wUA6/8EAPT/9f8IAOb/CQDk/xYA5/8TAOz/EAD6////CADu/xIA7/8AAAQA+v8CAPX/AgDz/wIA8/8CAOr/CgDj/xAA5f8AAPr/7f8GAPb/9P8JAOP/EADy/wAA/v/5//z/CQDx/wQA/v8CAPj/9v8DAPX//f8AAPb//f8AAPn/AwAGAAEABQD+/wEAAgD0/xQA3v8fAOn/BwD5/wAA8P8PAOX/FwDu//z/+v////v/BAD1/wMA+//6/wEA/P/9/wAA9f8BAAAA/f/+//v/+/8DAPD/AgD0//r/BADu//D/AgDy/wMA+//0/wsA7f8MAOn/AwD1/wMA9f/9/wYA6v8SAOb/BQDx/wMA7v8LAPP/BADz/w8A8f8CAAAA+P8PAO//DwD1/xUA6v8JAP3/7f8OAO3/DQDt/wkA6P8VAOL/FgDh/xMA8//7//X//P/3/wYA8v8FAPz/BQDt/wYA7v8NAPH/+v/s/wgA6v8TAOH/FADf/xoA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA+//4/xEA5v8NAPD/BQD8//n/CADg/x0A3f8aAPT/9f/6/wQA6v8RANn/FADj/wgA7//7//f/+v/9//3/9P/7/wAA+/8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8v8NAOr/DQDm/wgA6v/9//f//f/v//P/9v8BAOr//v/v//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AAADAO//GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BgD0/wMA+v8AAAAA+f8EAPj/BgD0/wkA8/8FAOz/AwD1//T/CADl/wsA4f8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPL/BADp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/wsA8P8FAPz/AwD3//b/AwD0//7/AAD2//7////6/wMABgABAAUA/v8BAAIA8/8UAN3/HwDq/wYA+f8AAPD/EADl/xgA7f/9//n/AAD7/wUA9P8DAPv/+v8BAP3//f8AAPX/AAABAPz////7//v/AwDw/wMA9P/6/wQA7//w/wMA8v8EAPz/9f8MAO3/DQDp/wQA9f8EAPX//f8HAOn/EwDm/wUA8f8EAO//CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9f8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8GAO7/DQDx//v/7P8IAOr/EgDi/xQA4P8bAOn/BAD/////AQABAP7/AgAEAAMA9v8KAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8EAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD6/w8A7P8QAPP/DQDq/w0A5v8IAOv//f/4//3/8P/0//b/AQDq////7//y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr///8AAPn/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8LAOH/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDy/wQA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD9//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO//8P8DAPL/BAD8//X/DADu/w0A6f8DAPb/BAD1//3/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xMA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2f8UAOP/CADv//v/9//6//3//f/0//v/AAD7/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO//CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAA///5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8/8DAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+v////v/BQD0/wMA+//6/wEA/P/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDu//D/AwDy/wQA/P/1/wwA7v8NAOn/AwD1/wQA9f/+/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD0/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wcA7v8NAPH/+v/t/wgA6v8SAOL/FADf/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/wkA/P/5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wUA6v8RANr/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPn/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8CAOr////u//L/AQDu/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP//AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v8AAP//+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wIA8/8CAPP/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD3//b/AwD0//7/AAD2//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//r////7/wUA9P8DAPv/+v8BAPz//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wUA7v/w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9f8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9P8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8HAO7/DQDx//r/7f8IAOr/EgDi/xQA3/8bAOn/BAD/////AQABAP7/AgAEAAMA9v8JAPz/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPz/AgAFAPX/DAD0/xAAAwD5/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AgDq////7v/y/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDw/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr/AAD///n/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8KAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9v/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/6////+/8FAPT/AwD7//r/AQD8//3/AAD1/wAAAAD8//7/+//7/wMA8P8DAPT/+v8FAO7/8P8DAPL/BAD8//X/DADu/w0A6f8DAPX/BAD1//7/BwDq/xIA5v8FAPH/BADu/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPT/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+3/CADq/xIA4v8UAN//GwDp/wQA/////wEAAQD+/wIABAADAPb/CQD8//n/EQDm/w4A8P8GAPz/+v8IAOD/HADd/xoA9P/1//r/BQDq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+f8PAOz/EADz/w0A6v8OAOb/CADr//3/+P/9//D/9P/2/wIA6v///+7/8v8BAO7/CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA7/8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8FAPT/AgD6/wAAAAD5/wQA+P8FAPT/CQDz/wUA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA8P8AAAUA+v8DAPX/AgDz/wIA8v8EAOn/CwDi/xIA5P8BAPr/7v8GAPb/9f8JAOP/EADx/wEA/v/6//v/CgDw/wUA/P8DAPf/9v8DAPT//v8AAPb//v8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8fAOn/BgD5/wAA8P8QAOX/GADt//3/+f////v/BQD0/wMA+//6/wEA/f/9/wAA9f8AAAAA/P/+//v/+/8DAPD/AwD0//r/BQDv//D/AwDy/wQA/P/1/wwA7f8NAOn/AwD2/wQA9f/9/wcA6v8SAOb/BQDx/wQA7v8LAPP/BQDz/w8A8f8CAAEA+P8QAO//DwD1/xUA6v8JAP3/7f8OAO3/DQDu/wkA6P8VAOL/FwDi/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wYA7v8NAPH/+v/s/wgA6v8SAOL/FADg/xsA6f8EAP////8BAAEA/v8CAAQAAwD2/woA+//5/xEA5v8OAPD/BgD8//r/CADg/xwA3f8aAPT/9f/6/wQA6v8RANn/FADj/wgA7//7//f/+v/9//3/9P/7/wAA/P8CAAUA9f8MAPT/EAADAPr/DwDs/xAA8/8NAOr/DgDm/wgA6//9//j//f/w//T/9v8BAOr//v/v//L/AQDv/wsA4P8VAOb/CgAFAPn/BwD7//X/CQDw/wIA/f/6//3/9v8AAP7/AQACAPD/GQDt/wsA+f/1/xIA7/8CAP3//P/7/wgA6v8JAO//CAD5/wIA9//+//b/BQD0/wIA+v///wAA+f8EAPj/BQD0/wkA8/8FAOz/AwD1//T/CADl/woA4v8WAOb/EwDs/xAA+//+/wkA7v8SAPD/AAAFAPr/AwD1/wMA8v8DAPL/AwDp/wsA4v8SAOT/AQD6/+7/BgD2//X/CQDj/xAA8f8BAP7/+v/7/woA8P8FAPz/AwD4//X/AwD0//7/AAD1//7/AAD5/wQABgABAAUA/v8BAAIA8/8UAN3/HwDp/wYA+f8AAPD/EADl/xgA7f/9//n////7/wQA9P8DAPv/+v8BAP3//f8AAPX/AAAAAPz//v/7//v/AwDw/wMA9P/6/wQA7//w/wMA8v8EAPz/9f8MAO7/DQDp/wMA9v8EAPX//v8HAOr/EgDm/wUA8f8EAO7/CwDz/wUA8/8PAPH/AgABAPj/EADv/w8A9f8VAOr/CQD9/+3/DgDt/w0A7v8JAOj/FQDi/xcA4v8TAPT//P/1//z/9/8GAPL/BQD8/wUA7f8GAO7/DQDx//r/7P8IAOr/EgDi/xQA4P8bAOn/BAD/////AQABAP7/AgAEAAMA9v8KAPv/+f8RAOb/DgDw/wYA/P/6/wgA4P8cAN3/GgD0//X/+v8FAOr/EQDa/xQA4/8IAO//+//3//r//f/9//T/+/8AAPv/AgAFAPX/DAD0/xAAAwD6/w8A7P8QAPP/DQDq/w4A5v8IAOv//f/4//3/8P/0//b/AQDq//7/7v/z/wEA7v8LAOD/FQDm/woABQD5/wcA+//1/wkA8P8CAP3/+v/9//b/AAD//wEAAgDv/xkA7f8LAPn/9f8SAO//AgD9//z/+/8IAOr/CQDv/wgA+f8CAPf//v/2/wUA9P8CAPr///8AAPn/BAD4/wUA9P8JAPP/BQDs/wMA9f/0/wgA5f8LAOL/FgDm/xMA7P8QAPv//v8JAO7/EgDw/wAABQD6/wMA9f8CAPP/AgDz/wMA6f8LAOL/EgDk/wEA+v/u/wYA9v/1/wkA4/8QAPH/AQD+//r/+/8KAPD/BQD8/wMA9//2/wMA9P/+/wAA9f/+/wAA+f8EAAYAAQAFAP7/AQACAPP/FADd/x8A6f8GAPn/AADw/xAA5f8YAO3//f/5////+/8FAPT/AwD7//r/AQD9//3/AAD1/wAAAQD8//7/+//7/wMA8P8DAPT/+v8EAO//8P8DAPL/BAD8//X/DADt/w0A6f8DAPb/BAD2//3/BwDq/xMA5v8FAPH/BADv/wsA8/8FAPP/DwDx/wIAAQD4/xAA7/8PAPX/FQDq/wkA/f/t/w4A7f8NAO7/CQDo/xUA4v8XAOL/EwD0//z/9f/8//f/BgDy/wUA/P8FAO3/BwDu/w0A8f/6/+z/CADq/xIA4v8UAOD/GwDp/wQA/////wEAAQD+/wIABAADAPb/CgD7//n/EQDm/w4A8P8GAPz/+v8IAOD/HQDd/xoA9P/1//r/BADq/xEA2v8UAOP/CADv//v/9//6//3//f/0//v/AAD8/wIABQD1/wwA9P8QAAMA+v8PAOz/EADz/w0A6v8NAOb/CADr//3/9//9//D/9P/2/wIA6v///+7/8/8BAO//CwDg/xUA5v8KAAUA+f8HAPv/9f8JAPD/AgD9//r//f/2/wAA//8BAAIA8P8ZAO3/CwD5//X/EgDv/wIA/f/8//v/CADq/wkA7/8IAPn/AgD3//7/9v8GAPT/AwD6/wAAAAD5/wQA+f8GAPT/CQDz/wYA7P8DAPX/9P8IAOX/CgDi/xYA5v8TAOz/EAD7//7/CQDu/xIA7/8AAAQA+v8CAPX/AgDy/wMA8v8EAOn/DADi/xIA5P8BAPr/7v8GAPf/9P8JAOL/EQDx/wEA/v/6//v/CgDw/wUA/f8CAPj/9v8DAPT//v8AAPb//f8AAPn/BAAGAAEABQD+/wEAAgDz/xQA3f8gAOn/BgD5/wAA7/8RAOT/GADt//3/+f////v/BAD1/wMA+//6/wAA/f/9/wAA9f8AAAAA/P/+//r/+/8DAPD/AwD0//r/BQDu//D/AwDx/wQA+//1/wsA7v8MAOn/AwD2/wMA9f/+/wYA6/8RAOf/BQDy/wMA7/8LAPT/BAD0/w8A8v8CAAEA+f8PAPD/DwD2/xQA7P8IAP//7f8PAO7/DADv/wcA6f8UAOP/FgDh/xMA9P/8//X//P/3/wYA8v8FAPz/BQDt/wYA7v8NAPH/+v/s/wcA6/8RAOL/FADf/xsA6v8DAAEA/v8DAAEA/v8CAAUAAgD4/wgA/f/4/xMA5P8PAO//BwD8//r/BwDi/xoA3/8ZAPT/9//5/wUA6v8RANr/FADj/wgA7v/8//b/+//8//7/8//8/////P8BAAcA9P8NAPP/EAAEAPn/EADr/xEA8f8PAOj/DwDl/wgA6//7//n/+v/x//H/+P///+v//P/v//H/AQDu/woA3/8UAOb/CAAFAPj/BwD6//b/CADx/wEA/f/7//z/9/8AAP//AQADAO//GgDt/woA/P/z/xUA7f8DAP3//P/6/wgA6v8JAO//CQD5/wIA+f/8//n/AgD3/wEA+////wAA+f8EAPn/BAD2/wcA9v8DAO7/AQD2//P/CQDk/wwA4P8XAOX/FADr/xEA+P8AAAUA8f8OAPT//P8HAPj/AwD0/wIA8/8CAPP/BADo/wwA4f8SAOP/AQD6/+3/BwD1//b/BgDl/w0A8f8AAP3/+v/6/wkA8f8EAPz/AQD4//X/AwD1//z/AQD1//7////4/wUABAABAAUA/P8DAP//8/8UAN7/HgDq/wYA+v/9//T/CgDq/xMA8f/5//3//P/+/wMA9f8CAPz/+f8DAPn///////b//v8CAPr/AAD5//z/AADy/wAA9//4/wQA7v/y/wEA8/8DAPv/9/8LAO//DADp/wUA9f8DAPf/+v8OAOP/GADh/wsA7v8GAO//CQD1/wYA8P8SAO//AgABAPX/EQDt/w8A8v8VAOf/CwD4//D/DADt/w0A7P8IAOr/EwDj/xYA4v8SAPb/+P/5//n/9/8JAO7/CwDy/w4A4v8QAOX/EQDu//z/7f8FAOv/EwDi/xQA3/8cAOb/CgD4/wcA+/8FAPv/BAABAAYA9P8LAPr/+/8QAOj/DQDv/woA9//+/wUA4f8gANj/HgDw//f/+f8JAOX/FwDX/xUA5P8JAO//+//4//r//P/7//b/+P8CAPj/BAABAPn/BQD5/w0AAgD7/woA7/8NAPX/CgDr/wsA5f8LAOf////1//7/8P/z//X/AwDo/wQA6P/6//r/8/8KAN7/GgDg/w4AAwD3/wkA+v/0/woA7f8GAPj/+v/+//P/BQD5/wMA/v/0/xcA6f8SAPD///8LAPX//f8DAPn/AAAFAOz/DADu/wgA+P8IAO//CQDp/xMA6P8KAPb/AAD9//3////7/wMA9v8JAPH/CwDk/w0A7//7/wIA6v8JAOf/FADp/xAA8f8NAP7/+v8QAOn/GgDn/wgAAQD///3//P/8//b/AgDz////7/8DAOr/CQDs//z//f/u/wUA9//3/woA4v8QAPj/+v8JAPH/BgADAPz//v8EAAAA+P/4/wUA8P///wAA9f////z//P/9/wsA+/8FAP3/+/8FAPH/EgDZ/yIA4f8NAOz/CwDl/xUA3/8ZAOz//v/1/wIA+P8DAPT/BwD3//7/+P8HAPX/BgD1/wAAAgD7/wEA+f/9/wIA+P/9////7v8TAOr/9f8FAPP/BQD///b/DADx/w0A6v8FAPj/BAD2/wQA///7/wQA9v/9///////2/wwA+v8AAP7/CgD2/wMAAgD0/xcA5/8XAPP/EAD4////BQD1//n/DgDt/xEA7v/4/wsA6f8WANz/FADx/wEA7f8AAPD/BwDx/wIA9/8KAOb/CADx/wQA+f/6//P/AwD2/wYA8v8LAOz/EAD0/wMA+P8IAPr/CAD6/wIAAQD7/wUA8v8PAOL/GgDf/w8A6/8KAPD/BwD4//L/CwDo/xkA7v8JAPX/BQD5////8/8CAPv/+v/+//n/+P8BAPf/AwD0//z//v/7/wMA+P/9//7//P8BAAQA+f8FAPb/+P8JAPv//v8AAOz/EgDi/w4A9P8AAP3/7f8HAPP/BgDr/wQA6f8GAPj//f/s/wYA8f8JAPr//v/9//n/AgDx/wMA5v8UAOn/AQD8//H/DQD7/wMA+/8IAP///v8HAPX/BQD4/////f/4//n/AADw/wQA7f8JAPb/+/8KAOv/DQDu/w4A8/8IAPr/AQAJAPT/DgDw/xAA8f8OAOf/EgDl/wsA5/8MAOj/BADn/woA7P8QAOX/FgDr/wsA9f/6/wMA9v/+//X/CADy//3/+//v/wMA6/8GAOj/DQDh/xEA6f8EAPf/+P/8/wMA5P8LAOj/AAD4//X/9/8BAPL/BAD3//v/AgD8/wgA7P8OAPb/BgACAAAA+f8CAPv///8MAPT/DQDv/w0A8v/v/w4A3v8TAO//+//7//v/6v8OAOH/DwDw/wAA+v/9/wkA8v8GAPP/BAD0/wYA7/8GAOv/AgDt/w8A5P8LAPX/+//+//X/9v/9//z/8//6/+n/BADz//z/+P/3//7/9f///+z/BADv//z/BADo/xEA6v8EAAEA9f8GAPj//v8CAP//CQDw/x8A5f8RAAEA+/8KAP3//P8PAOz/EADt/w0A7//9//7////3//j/8/8CAPj/BQDy//z/AwDm/w8A5f8AAPn/6v8LAOb/CwDn/wAA8f////X/+v/0//z/+f8HAPH/BADt/wQA8f/9//3/8/8AAPX//v/4//j/BQDt/xEA5/8RAO//BQD8//H/DADp/xMA4/8PAO7//P/9//j/AAD0//3/9v8GAPT//f/+//j/BgD4/wIA/P8BAPf/AwDr/w4A6P8LAPn/9f/7//j/+P/+//n/8f/8//H/9//1//f/7/8EANz/BQDm/wEA9P/w//T/9v/5//j/9//4/+v/DgDd/xsA2v8UAPH/9P8JAO3/EwDs/wYA9//5/wUA9f8DAPr/9v8MAPX/DAD7//P/GgDm/xQA7P8DAAkA6f8SAPL/AgD6//n/AQD0/wcA8P8IAPz/+P8EAPb/AQADAPX/CwDz/wYA+f/+/wAA7/8JAPX/+//+//H/AAD6//n/9P8CAPH/9v8AAOv/DwDq/wEA/f/1//3//P/p/wYA8//5//b/8P///+//+//w//7/8f8EAOv//P/+/+r/CQDk/wIA9f////L//v/y//n/AgDs/w4A6v8DAPb/+v/0/wkA7v8DAPD/AwD1//X/+v8EAPP/EADv/woA/f8BAP//+f8BAOv/CwDo/wQA7/8AAPL//f/9/+7/FgDf/x8A2f8WAOn/BwD0/wQA+v8KAPr/+v8MAPT/EAD8//7//P/8/wIAAwD0/wAA7f8RAPD/AgD8//v/BADt/wsA9P/4/wMA8P8IAO7/BwDe/xAA4v8MAOT////4//P/9//8/+L/CgDq/wEA///2////+P/+//j//P/5//H/AAD1/wMA7P8JAPv/8v8VAN//DAD2//z/BAAAAAAA/f/5//7/8//9/wQA7v/4//7/7P8LAO///P/9//r//v/9//7/9v8GAPb/AAAGAOv/FwDg/xwA6P8RAPH////+//T/AwD5//r////7//D/BwDy/wQA8v8FAPv/+f8OAO//DAD2/wcA+f/8/wwA7v8aAO3/AwAHAPX/DQD1/wAABwDz/wYA+P/7/woA9P8CAAQA9v8GAPn/BQABAPD/EADp/xAA+P/3/wEA9v/9/wQA+//z/wcA7/8CAP7/+P/+//T/AgD6//v/AAD0/wMA8P8IAOn/DQDw//X////1//r/AwD4/wMA9P8BAPH/EwDn/w0A+P/w/xEA6v8NAPj//f8HAPP/DQDx/w0AAQDz/xUA6f8ZAOz/EgDw/wwA+/8BAP7/BgD9/wgA////////BQD6/wYA/P8BAP7//f8GAPP/CADv/wsA6v8QAOD/DAD4//j/BADz/wIA+v/+/wcA7f8PAPD/CgDx/wIA8/8DAO7/BAD6//X/DQDs//7/9P/5//v/AwD1/wMA9/8MAPf/CQADAP3/EQD0/xEA9v8DAAYABwD6/xQA9/8NAAkA/P8GAA4AAgANAPv/FAD2/x0A9P8LAPv/AwD9/xIA8/8HAPn/AwAAAPb/+v/z/wkA8f8IAO7//f/5/wEA8P8IAPj/+P8PAPP/DgADAO//DwDy/wMA/f/7/wEA8v8IAPf/BwD+/wgA+/8QAPn/BAACAPv/CgD4/wsA5v8XAPL/DwDy/w8A+f8NAPj/BwADAPn/EgDp/xoA8/8LAPv/DQDz/xsA4f8bAPX/EwD1/xQA9v8MAPb/EAD3/wkA/P/4/wgA8/8AAPz////8//7//P8DAAQA7v8EAPz///8KAPT/AAACAPT/AwD6//j//f8AAPP/BwDv/wMAAgDx/wgA8v8MAO7/EQDm/yIA8f8LAPn/+/8LAO7/GwDi/xwA6/8OAP3/AAAFAAMA+/8GAP3/BwD4/wUA8/8MAPf//v/8///////3/wEA9/8EAP3/+P8GAPP/DQD1/wkA+/8EAAgA//8PAPz/BQAEAPP/CgD5//7/AADz//3/7/8BAO7/BgDk/wMA7P///+j/EADn/wgA6f8AAPj//v////D/AgDy////AAD2/wAA6f8QAOj/BgD4/wEA+f/+//P/EQD1/xEA7/8PAPX/DwDv/wsA9f8EAP3/AwD2/wYA7P8NAOr/CgDy/+z/FgDd/xYA7P8EAPb/BQD///D/DwDw/wgA+//4/wMA8f8FAPL/+v8GAOj/EQDl/wgA9P////b/BwDv/wcA+v/3/wUA+f/6/wQA7/8IAPP/AQD5//X/BQDz/wMA7v8AAPT/AAD8//7/8v8UAOL/FgDw/xAA9v8KAPn/EwDs/xQA7/8HAAIA9/8MAPH//f8GAPn//v/5/wMA9P8LAO7/BAD2/wQA+f/8//7/8v8JAO7/+P/7//b/BADs/wIA9f/+//z/9v///+3/AwDm/woA6v/9////6f8IAPD/+P8JAO//AAD3//X/8v8FAOP/BAD6//T/AwD5//X/CwDv/xwA5f8QAPj/AAAAAP//9P8DAAAA8f8NAPf/AwD3/wEA8P8SAOX/FADo/xAA7//9//b/+//0/wgA7/8EAP7/+/8AAPb/AADw/wgA+v/+//r/+//0/woA5P8MAOr/+v/9//H/AQD1/wEA/f/0/wEA9v/2/wcA7/8GAAQA+v8NAPD/AwD8/wkA7/8MAOr/CgDz/wgA+P8FAAAA/f/+/wUA+P8GAPf/BQDu/wgA8P8CAAAA8P8NAOr/FwDj/xYA8f8FAAAABwD0/xMA7f8HAP7//f8FAPj////5/wIA9P8GAOv/DgDj/xAA6v8OAO///v/5/wcA7P8QAOr/BgD3////9//3//7/9f8CAPf/9f8CAPD/AAD4/wAA7v8JAOL/FQDo/wQA+f/2/wUA+//9//j/+v///////P/7//f/AgDx/wAABgD3/wAA9v8AAPz/BgD6/wMA+P8EAPr/BgD+/////P8DAO7/CAD3//z//f/8//3/+v8AAPr//v8DAO//EADr/woA9P8GAPr/AAD3/wIA//////7/9f8DAPH/AgDz////8//9/+7//f/2//T/+P/5//r/+v8BAO7/BwD6//r/AQDx/wAA9/8CAOr/CgDt/wAA+//3/wkA9v8TAOP/IADm/xYA8f8QAPL/BQAHAP7/CQD5/wkAAgD+/wQABQD3/wEA/////wEA+//8//3/+/8HAO//FwDf/xcA7f8MAPf//v/6//7/BADy/wEA+v/1/wAA7v8IAO//BgD7/+//GADq/xcA8f8TAP7/AgD8////AgD2/wQA8v8FAO3/CwDh/wAA+P/v/wMA8f/w//v/9P/3//z/9P8DAOr/DwDg/xUA5P8QAOX/EgDt/wsA9f8DAPj/+v8IAPX/BQD2/wAA/P/s/wgA9v/8/w8A6P8UAOj/DgDk/xIA6v8JAPD/AAAJAPD/CQDn/xAA6f8MAPH/AQAHAP3/9P8JAPL/BQD8//v/+v////v/AwAEAPb/CQDu/wEAAAD2/wAA+//1/wUA9/8BAP3/7v8WAOf/FQDl/w0A7v8RAOz/EADx/w0A7f8HAPv/AAD9//z/AAD2/wkA8P8IAPn//P8AAPX/AQDy/wcA8v8CAP7/9P8QAOb/FQDm/woA9P/5////8//x/wYA5v8OAO//9f8CAPj//v8JAO3/CwDz////8/8DAO3/BwDs////+P/t/w0A4v8NAOT/BwDt/wQA8//2//n//f/z//7/AQDu/wgA8f8CAAAABQDr/xEA5f8VAO///P8EAPb////+//j/BgD9//v/AQD6//3/CgD0////BADx/xQA9P8MAPn/AgADAP3/BQD2/wEABQDr/xMA6P8OAPj/CwD3/wQA/f/9/xAA8P8EAAUA8/8HAPf/9/8KAOv/BAD1//z/+f/4/wAA9P8AAO3/CAD0/wAA+v/8/wIA+v8CAPX/AgD4//3/9f/8//b/+v/y/wAA6/8FAOj////4//n/+//3//f/CQDv//z/+//9//j//f8GAPD/EAD2//b/AwDu/woA8P8KAO//BQD4//z//f/8//3/+v/6//n/+f/2/wIA8v8AAP7/+f///wEA9v8EAAMA/v/7//n/CAD1/w0A8P8AAP3/8f8GAPf/BQD3//7////6//j//f/s/wgA8v8CAOr/AwDl/wIA7P/z//b//f/t/wUA8v/5//f/+P/p/wsA2P8VAN3////v//n////1/wEA9P8CAP7/+//7/wAA9f8KAPD/AAD8/wAABgD8/wUAAAAGAP3/BQD6/wUA/f8DAAAA+/8MAPX/BQD4/wYA9v8EAAIA9v8OAPL/FwDr/xMA8P8PAPb/BQACAPL/DgDv/wIACQDx/wQA+v8BAP///v/t/xEA6f8WAO//+/8IAOP/EgDl/wUA6//+//L/+v/z//P/+f/1//b/+P/u//7/9P/7/wYA9P8BAPT/AQDv/wkA9P/+//3/9/8EAPX//f8DAPj/AgD4//7//v/8//7/8P8TAN//FQDo/wgA/P/t/wcA7/8DAPX/8P8GAOb/CQDm/wkA5/8CAPz/9/8EAPP//f/3//n/+v8BAPH/DgDy//v/+v/+/wEA/v////L/CwDr/wwA9/8BAAEA7/8UAOX/HwDj/xwA5/8JAAQA+/8HAPz/8P8OAPH/CAD5//3////v/w4A7v8NAPP/AQAOAN//JQDZ/xoA8v8CAPz/BAAAAPT/CADy/wMA+v/5//n////w/wMA/P/5//z/8v8TAOH/FwDo/w8A+/8BAAMA+f8KAPP/BAAFAPb///8EAPP/AQD4/+v/FADe/xMA6P8FAPn/AwDy/wgA8P8IAPX/AwD3/wMA8v8KAPL/AAABAPL/+v8LAOn/DgDr/wMA+P/8/wAA+f/+/wIA///5/wkA8P8QAO//BQD/////9v8CAP//8P8TAOr/EADu/xAA6/8LAPX/CAD9//f///8CAPP/BADn/wsA///y/wkA7/8IAPv/+P/+//f/9v/9//X////x/wIA8f//////+f8KAPz//v////r/BQDt/woA6f8UAO//BgDv/wUA8v8PAO7/AwDx/wEA9f8CAPD/BgDq/w0A7f8FAP3/+f8JAPL/DAD7/wIABwAAAAUA/v8GAPP/EADr/xYA5v8TAOz/DwDy/wkA6/8GAPb/BQD+//v////6/wEA9P/8//b/9v/7//T/AgDx/wIAAgDt/xcA4/8SAOz/DwDv/xgA8v8OAPj/AAD0/wQAAQDz//z//P/1/w8A6v///wQA+P/6/wsA7P8PAO//BQD8/wMA+v8CAPT/CQD+//7//P/6//7/+P/6////8v8EAPj/8v8EAPb////8//z/+/8IAPH/DAD0//v/AwDz/////P/1/wMA+v/s/wUA8f/+//n/6/8LAOr////x//n//f/7//z/9v8LAOr/DADs/wYA7v8BAPX/AwDy//T/+f/6//3//P/9//b/AgD3//v/DwDx/xEA7v8SAP3/AgAKAPb/GADp/xkA6f8SAPX/AAD9////+/8HAPn/DADn/xUA5v8aAOz/CAD4//r/BQD5//v/AgDx/w4A5f8TAOT/EwD4//X/FgDl/xYA7v8LAPv///8FAPX/BgD9//f/EgDq/xQA6/8IAPL/AwD5/wEA8v8HAO//CADq/wAA/f/u/wcA5/8FAPb/+f/5//7/+//4//7////x//7/9f/+//T//P/t////7P8BAPX/+P8GAO///v/x//T//v/7//T////x/woA8f/9/wUA7v8NAO///v/+/+3/DADx//7/BAD3//7/CwDp/w4A+f8EAP////8IAPj/FgDr/xEA8f8HAPr/DAD4/wgA9/8GAP//+f/+//n/BgD2/wgA9f/6//z////4////AADu/xMA9P8JAP//9/8BAPz/9v8EAPf/9//9//7/9P8OAOv/FQDv/w0A8f8IAPD/BAD4//r//f/m/w0A6P8KAOn/CADz/wAA9//9//j//P/8//r//f8AAPv////2/wIA/v/6//3/BQD9////CQD6/wUA+/8CAP7////6//3////v/wwA4v8QAPD/AgD2/wUA+v/0/wAA9v/8/wgA9P/3/wEA6/8IAPD/+f/2//z/+P/3//v/8f8MAOH/EADj/wsA7v8AAPb/AgAIAOz/BwDx/wAA9f8IAOn/CwDw/wMA/f/1/wUA+//6/wAA+P////7/9/////z////6//v/AQD6//j////2/wQA9v/9/wAAAAD1/wwA+f8HAAEABQAGAAYACgD3/w8A7v8NAP3//P8FAPP//P/6//n//f/9//b//P/6//X/9/8KAO//CADv////+/8DAPz/9v8GAOz/DgDu/w8A5/8CAPn/9P8CAPX//f/z/wMA7P8QAPT/BgD3/w0A7P8SAOn/BwD+//b/BAD4/wMA+P/9//f//f////v/6/8QAOH/GQDl/xAA4/8QAPb/9f8HAOz/CgD4//b/AADx/wMA7v/5/wgA5P8LAOz/AQD1//f/9/8EAPH/CQD0/wEAAgD8//n/DQDr/w0A8v8CAPj/+f////T/AgDw/wEA8P/2/wgA6P8MAOf/AAD8//z/9/8GAOz/EAD6//X/DwDm/xcA7P8GAAMA/f/0/w0A+//0/xUA5v8VAPX/DAD0/wcA//8AAAYA9P///wIA9P8BAOX/EADq/wQA8//+//T/CQDx/wIA8P/7////9P8HAPD/AAAAAO7/EgDv/xAA9v8NAPD/DgDt/wcA8v8CAP7////3//n/AADy/wsA9//4/wIA8f8FAPX//v/2//b/CADs/wgA8P8MAO//CADz/wwA7f8XAOb/GgDt/wkA8/8DAAAA+P8FAPP/BQD///T/CQDi/w8A7P8HAP7/9f8AAPn////2//3/9P/5/wIA8f8GAPb/AQAFAPT/BgD4/wEABwDu/w0A+f8RAPH/BADu//3//v/x//r/8f/0//z/8f8GAOT/DQDx//j/AwDw/wMA9v/1//z/8P8HAOv/AQD0/wIA+P/7//v/+f/9//n/BQDx/wcA/P/1/wAABwDu/xYA6/8HAAMA9/8HAP3//P8JAO7/EQDu/xYA8v8OAO//GgDr/xcA9v8IAAcA+v/8/woA9f8FAAIA9/8IAPj/AgD6/wcA8v8KAPD/BAAAAPP/BQDx/wcA9P8EAPr/AADu/xAA6/8ZAOr/BQD5//v/AQAAAPv/+P8KAOP/FwDg/xUA6v8NAO3/CAD3/wYA///9//z/+f/9/wMA8P8FAO//AwD3/wMA8P8JAO7/CwDz/wEA/P///wEA+P8DAPb/AgD7/wQA///4/woA7v8MAPb///8FAPL/BgD2//T/+f/5//j/BQDn/xMA4f8QAO7/+//+//j/9//7/+3/BwDu////9v/2/wAA+f8EAO7/CAD2////+//5////+v/3/wcA/P8CAP//+v8OAOz/CQD7/wAAAAD+////CQD4/w8A8/8RAPb/DwD5/woA+P8OAPz/BAAGAPD/GwDs/woA/v/+//7//P/4/wcA+P8AAAIA7v8WAOH/GQDv/wsABAD4/wMA+f/+/wEA9v8EAPj/AgD7//r//f/5//n/BQD1//7/7/8PAO7/BAD3//n/+f8CAO7/CwDo/xIA4f8dAOj/DAD7/wAAAgD6/wEA+v/+/wAA/P///+j/EQDs/xAA9v8CAAEA+f8FAPj//P/9//b/AgACAPf/BQDt/wYA+P/7//T/+//5/wcA8f/9//7/+P////X//v8DAO//EwDq/xcA6/8SAOv/DgDw/wwA///8//j//f////z/AQD7////AAD6//v//P/9/wYA9f8CAAMA///6//7//P/2/w4A6f8CAPj/+/8GAO3/CwDy///////u/wsA9v8CAP3/AAAEAAMAAwD7/wcABAADAP//AQACAPP/FwDo/xoA5/8MAPf/BgD+/wIA/P8DAPb/CQDv/wYA+f/6/wMA8f8OAOr/FADw/wUA+v/2/wEA+/8CAPz/8f8IAPP//f/9/wEA9f8KAOz/BAAEAPr/BgD1/wcA7v8SAPD/CQDw/wQA6f8YAOL/CwDp/wQA9P/7////7/8MAOj/EQDn/xoA7f8FAPv/+/8IAPP/BgDw/wIA+//8/////P8AAPT/BQD0//r/DgDm/xIA7/8HAPX/CgDr/wwA9f/5//3/9v8DAPf/+//3//L/BAD2//D/AgD1//f/BgDv//7/+//2/wAA+P/4//j/+f/6/+//BgDq/wMA6P8CAPv/+v8AAPD/EQDs/xgA2/8bAPT/AwD+//7/CAD0/xQA3/8YAOb/EADv/wwA8P8NAPz//P8IAP7///8FAPH/DgDl/wsA+P/2/wMA+P/1/wkA9v/5/wgA8P8HAO///f/2//v/AADy/wQA6/8AAPz///8DAPn/AgD8/wUA6v8OAOT/CwACAO//CgDs//7/AAD1/wIA9f8EAPb/CQD1/wQA7/8NAOv/AwD6//b/CgDk/xAA5P8UAPX/AAAHAPP/CQABAPD/CgD4//7////y//r/DQDn/xYA5P8PAOL/CgDo//n/8v/r/wcA2/8JAOP/AQDv//v/8//2//z/9P/8//z/9P8QAOT/FQDo/xAA8f8JAPn//f////v//v/9//T//f////r/9v8BAO3/CAD6/wIAAAD1/w0A5f8UAOv/CADx//z/AADz/wYA9P/6/wIA9v8DAPT//v8CAP3/CwD+/wIA/f8CAPj/DAD9//v/BwD2/wYA/P/8/wYA+v8FAPv/BgD4/wcA9v8KAPf//f8IAPf/BQABAO3/EQDs/wkA9v/4/wAA8/8EAO7/BQDw//3/AgD1/wsA6v8KAPH//v////j//P8HAPv/8/8GAPj/AgD8//3/8/8CAPD//v/2//3/9f/0//j/+f/1/wQA8P/w/wcA6v8KAOv/BADo/wkA6/8GAPL////2//f/+//9//v/+//5/wUA6/8IAPT/AQD9//3/+f8SAPP/CwDx/xQA6/8UAPD/AwADAPT/CAACAPf//v/7/wMA+P/7/wQA9f8TAOr/DgD5/wMAAQD9/wsA+P8HAP//AgABAPr/AAAFAPf/BQD5/wAAAgD9//z/BAD0/wYA8/8JAPL/DADn/xMA7//+/wgA5f8IAP3/7/8IAO3////4//b//P/7//n/AQD4//X/DADm/xYA5P8LAPf/CQDt/wsA+P/0/xEA6/8PAPD/CQDs/wgA6/8JAPX/9P/6//n/9f/1//H/AAD9//3/+f/+//3/AgD5////+P/7//v/+v/8//P/BQDx//n/CgDs/xYA6/8IAPf/+/8BAOz/CwDi/xoA5v8NAOv/BAD6/wUA+f/3/wEA8//+//v/8f8GAO7/AgDz/wAA9v8JAO3/CgDx/w0A8f8PAO//DwDw/wcA7/8FAPD/CQDp/wgA7/8GAPT/AQDt/////P/+/wMA9//9/wQA+P8CAPb/+//+//r//f/9//j/BgAEAOv/HQDg/xMA8f8KAPb/FQD4/w0A+P8KAPT/DQAGAPX/BAD+/wAACQD9//X/EgDx/woAAAD5/wYA9/8CAP7/BQD4/wQA7f8RAPP/AgD6//T/CwDl/wYA8P/9//v/+//q/wMA9f/7//z/9//4/wYA8/8FAPj/+f8BAPT//P/6//P/BwDx//L/AgDr/wUA7f/2//7/8P/6/+//+f/3//n//v/u/w0A5P8LAO//AgDz//z/+v/9//j/9v/z/wMA8f8LAO7/AAD7//f//v8GAPb/CwDx/xAA+f8HAAIA+/8SAOz/GADp/xIA9v/6/wMA9//9/wEA/P8FAOv/DQDl/xgA6v8CAAAA7f8QAOv/CAD5//n/DQDn/xgA4/8WAPr/+P8TAO3/EgD2/woA/f8BAAgA9f8NAPr/AwALAPP/EgDv/w0A8v8GAP///f/7/wEA+f8EAPD/AAD9//P/BgDq/wUA+//4/wAA/P/7////+f8HAO7/AgDy/wQA7P8FAOP/BwDk/wMA8v/2/wQA6//8//D/7v8BAPH/+P/2//T/AwDy//v/AADu/wgA7//7//v/7v8EAPb/8v8IAO//AAAEAOr/CgD2/wIA///4/wkA8f8XAOT/EgDp/wkA8v8LAPb/AgD5//3/AgDy/wAA9P8HAPT/CADy//3/+//+//r//P8HAOj/GADs/xIA+P/5/wEA+//2/woA7v8BAPn////+/wMA/P8MAPv/DAD6/wMA/f8CAPr/BwD1//T/CgDw/wkA7/8HAPf/AgD3//3/+//4/wcA7v8EAP3/+v/9////8/8MAOf/DwDx/wsA7v8UAOz/CwD0/wIAAAD3/wIA8P8JAOf/DADk/wkA8P////P//f/8/+X/CADr//r/BwDr/wAA9P/0//r//v/r/wcA6v8JAPD/+v////v/9/8BAPb/AgD4//3/+P8FAAUA8P8KAOn/DgDl/xgA4v8LAPT/+/8JAOr/EADw/wMA/f/5/wcA9P8FAPH/DwDw/wcA7/8MAPX//f8AAPH/CQD1//7/+/8AAPj/CQD7/wMAAwADAAsABgADAAEACAD1/xAA8/8MAPf/BADy/wEA+//+/wMA8/8CAPv//v/3/wwA9f8GAPT////9/wQA+P8BAPL/AQD6////AADx//r/AADw/wMA9P8EAPH/CQDn/xkA8v8NAPr/BAD+/wgA9f8IAPj/AwAAAP///P/8//n//v/x/wQA7//v/wUA6f8FAO7/AQDr/wIA/f/l/xUA4/8NAPr/8f8FAO//BQDx/wQA/f/7/wIA+v///wEA/P8DAAUA+/8HAAMA+P8NAPz/AwAHAPL/EADx/w0A7/8CAAQA8v8KAOb/EADn/wwA+P/9/wAABQDw/xIA8f8WAPP/DAD8/w0A9v8IAPj/AwAIAO7/FADs////BgD3////+//9//n/AwD5//r/+P8GAPD/BwD2//j/BwDs/wMA8v8BAAEA7v8IAPX//v8FAPb////7//r/+v8BAPr/+P8DAPT/AwD6//z/CAD4/wMA9//+//X/BwDo/wcA///2/wYA+f8BAAAAAAAMAPP/DQD2/wYA/P8HAPT/AQAKAOT/GwDv/wYA+v/7//z/CADw/wcA9/8HAPT/+P/8//v/+P8AAPf//v/+//n/AgD1/wcA6v8PAPP/CQDx/wMA9v8AAPn/+v/5//n/+v/7//f/BQD1/woA7f8IAPL/AwD5//z/AQAKAPr/CQD7//n/CQD6/wAAAgDy/wQA9/8BAAQA7/8SAOj/FADw/woA8v8DAP3/8P8KAPH//P8KAOX/EgDr/wgA+P/9///////4/w0A7f8WAOr/CwD/////AAADAP//+v8PAO7/FgDm/xkA4v8bAOf/GADr/w0A8P8OAO7/DQDw/wYA9/////b/AAD6/////P8CAPL/CgDx/wIABQD0/wEA/P/1/wsA8P8AAAEA9P8OAPH/BAD3//3////9/wAA+v/6//b/9//5/wkA8f/9//H/AAD1/wIA8v/+/+//CADr/wcA9//5/wAA9//t/wUA8/8AAPn//f8BAPf/BgD2/wYA/P8CAPz/AwD5/wUA+v8GAPj/AAAAAPn/CgDw/wMA9P/5//3/9v/8//b/8/8AAOj/CgDi/wMA9f/9//j/BADv/wwA9P8HAPX/BwDz/woA+v/3/wQA+f////P/CAD5/wkA/P/5/wQA///7/wUA9v8FAPP/EADz/wcA+v/9/w0A6v8RAPf/9v8GAO3/CADy/wEA8v8CAPP/BgDv/wwA7f8JAPz//v8CAPb/AQD7/wIA8f8BAPb/+v/2//L/AQDy/wMA9f/t/xAA8f8GAPr//f8GAPf//v/3/wEA8P8JAOv/DADo/wkA7f/2/wQA5v8JAO7/8v/9/+3/BgDo/wgA7/8AAPj/8/8FAO3/CADo/wgA8/8CAPX//P/8//L/BwDt/wYA7P8DAPL/6v8IAOL/CgD1//T/AQDy//3/8//7//7/9v/8//n/BgD0//7/7v8KAOb/CgDq/wUA+P8BAOr/DADl/w0A6v8IAO7/BgDy/wcA+//8/wgA6f8JAPT///8AAPH/BgD1/wIA+f/7//L/CQDx/wwA6/8NAOf/EADz/wkA+f8BAPP/CAD7//3//P/7/wcA6v8NAOv/BwD1////+v/y/wIA7P8DAPX/+P8AAO//DQDn/w4A7v/+//j/9P8DAO7/+//6/+7//////+n/CQDx/wAADgDn/xAA7f8HAPb/+//3/wgA6f8MAO7/9/8KAOv/BADy//v/8f8FAPL/8P/+/+7/AQDs/wUA6f8CAPH/+v8IAPH/AQDx//z//v/6//f/+v/5//D/BADz//3/+v/0////9v/+/wAA7P8KAPP/9/8PAOf/DgDy//7//P/5/wIA8v8DAPn/7/8GAPn/+P8JAPv//////////f8PAPn/AwAKAPz/BwABAPn/FQDo/xQA7/8KAPL/BgD9//r/CQDj/xsA5v8QAPT/CQD9/woA/P8EAAMA/P8KAPj/AAAAAPf/AQAAAO//CgDt/wMA9v8BAPf//P/3/wEA+f/z/wEA8f8DAO7/DwDt/wwA/f/s/wgA5/8IAPb//f/7/+7/CQDn/w8A7P/+//z/9P8DAPb/9P8LAOj/EQDz/wgA9P8HAPT/AwAAAP7/+v/8/wQA+f8LAPH/CAD4//r/BgD5/w0A8v8HAAEA/f/6/wMA6/8WAOv/BgDx/wAA7f8GAOb/+//2//r/9f8DAPX/+f8DAOv/AgD5/+7/FQDg/wkA7v8HAPj/BQD+//7/BAADAPr/BwD3/wUAAgD+/wIA//8FAAwA//8PAAMAEQACAAsAAAALAP//CwD7/wYAAwD6/wIA+f8EAPP/BQD6/wEAAQD9/wkA+f8CAAEA//8AAAMA+//9/wAA+P/8/wMA+f/4/wQA8/8GAPH/9/8CAPX/BwD6/+n/EgDd/xMA6v/5//3/6v8FAO3/9//y//j/8P/z//r/5P8BAO//9f8HAO/////z/wUA5/8WAOb/EwDy/wYAAQABAAEACAD//wwA+P8KAAEAAQAFAPH/GQDj/xUA7P8OAPr/9f8HAO7/EgDn/wMA+v/6//v/9/8BAPP//v8AAPX/BgD4//L/BwDk/w8A7v8CAP3//f/1////AQAAAAQA/P8BAAcA8v8RAPj/CQACAPD/FgDm/yEA4/8WAO3/BwD+//7/BwDy/wEA/f/4/wcA+f////3/9/8GAP3/BwD3/wkAAwDy/xQA5/8QAPr/+/8CAPz/CQDw/woA8P8FAPr/AQDu/w8A5P8TAO3/CQDu/wAABADp/w0A7f8EAPz/9P8GAOr/CQDp//3/BQDu//z/+v/3//j/+//s/wgA6/8HAO7/AQD3/wIA7/8HAO7/AgD7//P//f/7//b/BADw/////P/6//L/EADk/xYA5P8MAPL/BQD7//7/+P8FAAMA9f8LAO//DwD2/wAACQD7////AgAGAO//GwDn/xkA8f8YAOz/EgD5/wwAAAAAAP3/CADz/wsA5P8UAPL//v8BAPf/AQACAPn//f/6//D/CgDq/wsA6//9//3/8/8HAPP/CwD2/wcA9P8EAPz/8/8BAPf/BQD+//b/+P8EAPD/DADy//3/+f/8//f//v/2/wAA7f8MAOr/DADw/wgA9v////7/CQDu/xkA6v8WAPb/AwD3/wsA8v8PAPb///8CAAMA9/8NAOP/FgDt/w4A+v8AAP///f/+//n/+P/8//X/+f/7//T/AQDz/wUA7f8LAOv/BgD0//v/AAAAAP/////6//T/+f8AAPn/9//y//3/9P8DAPf/7/8PAPD//f8IAPH/BgD8//j/BgD3/wEA/P/4/wgA9/8FAPf//P/+//b/+v8DAOv/CwDt/wUA8f8KAO//CwDy/wgA+/8AAAIA/f/3/w4A7P8GAPv/+v8GAPz/8/8FAPn/AgD5//3/AgD6//v//v/+//7/BAD2/wQABQDv/wkA9f////7/8f8DAPj/+v/0//b/+f/5//z/+f/1//v/+//6/wcA9/8CAPr/AwACAAIABwD3/xMA5f8cAOT/GQDu/woA7v8PAOv/EAD1/woA7f8IAO3/DgDx//3/+v/3/wAA/P/x/wYA6P8SAOD/DgDs/wIAAwDp/xUA4f8UAOv/DgD1/wMA/P/7//3/AwD2/wsA8P8MAPD/AQDw/wMA9f8FAOn/CQDv/wQA7f/6/wMA6P8OAOX/AQD///H/AgD7//f/AwD0/wsA6v8IAPD/AAD4//r/9f/8//D/BQD2/wAAAgD1/wUA8f////z/BgDz/wwA8v8SAPP/DgD6/wgA/f8FAPz/BgDx/w4A9//9/woA8/8JAAIA8v8LAPf//v////3/AgD6/wUA9P8BAPP////1/wsA8v8HAPH/BAD8//L//v/x/wUA+P/6/wAA5f8NAOj/AwD0/wMA7P8FAPn//f8AAO3/AgDz//3/9//8//D/AAD4//3/AQD3/w8A6/8YAOv/DQD0/wQA9v8HAPL/9f8HAPX/CQDv/woA/f/9/wMA+f8IAPL/EQDu/w0AAAD6/wIA///5/wwA5f8UAO3/EQDt/xMA7/8LAPD/DQD2/wMA+f/+/wUA7f8JAPD/CADy/wQA8v8GAP3/5v8JAO//+/8EAPT/9P8EAOz/AgD5/+r/CADs/wQA9P/3//r/AADr/wYA8v/7//r/+v/y/wsA9f8FAPD/AQD6//L/GADe/xoA5/8KAAEA9/8MAPn/BgD3/wsA9f8HAP3/9v8KAPn//P////j/BQDz/wgA7/8HAP7/8P8TAOX/FgDt/wgA/v/8/woA/v8GAAIA/v////f/AAD8/wAA9f/7//f/9P/+//b/AADq/wsA7P/9//3/9v8IAPP/+//5/wAA/v/8//v//f/1/wYA+P/6//7/7P8NAOT/DQDq/w8A6/8IAOv/FwD0/wwA9v8FAAEACgDv/wQAAQD5/woA+P8BAP7/+v8CAPX/BwD9/+v/EgDq/xAA8P8HAOf/GwDo//z/BQDy/wYA+//z/wIA6f8QAOH/DADw//j/CADo/wkA6P8SAOT/FQDi/w0A8v/8//v/8/8GAO7/+//7//P/AwDv/wMA9/8AAPb/+P/6//z////9//v//P8QAOP/HQDj/x4A7f8IAPv/DgDx/woA+v/3/xQA6P8UAO3/AQADAPj/AAD9////8/8OAO3/EgDo/xEA9P8CAAAA9//6/wMA6v8QAOf/EgDg/xAA8/8GAP//+f8EAPv/+f/2/wMA8v8DAPj/9/8CAO3/AwD9//j//v/1//3/8f8GAOb/DQDr/wwA8P8GAPr//P8MAPP/DwDr/xMA8v8CAAYA8P8BAP3/8f8LAPr/8P8OAOL/CwDz//P/EgDr/wkA7v/+//f//f/s/wAA+f/+//z//f/3//7//P/w/wwA9P8IAPj/BADz/wYA8f/+/wQA5P8KAOz/CAD3////+f////3/+P/7//n/+/8HAPH/CAD3/wIA+f/9//7//v/5//3/9f8BAPn/AgD6//7/AgD5/wYA8/8FAPn/+v/7//3/+P8AAAEA+//w/w8A5f8NAPv/9v////r////5//3//v/z/wQA8v8NAO7/EADo/woA+v8CAAAA9f///wYA6/8ZAOH/DQDv/w4A5/8VAOP/CgDv/wkA7v8FAPL//f8GAO3/AgD+//H/CgD3/wUA8f8IAPH/DQDw/wUA9P8JAO7/CgDz/wMA+f/5/wUA8f8IAO//BwDs/wIABgD1//r/BwDn/wsA+//0/wYA8P/9/wQA8/8LAPD/CAD2//L/DQDs/wUA+v8AAPr/AgD4/wUAAwD+//f/DAD6/wIAAQD0/w4A6/8QAPD/DADw/woA8f8JAPH/BAD7//7/AwDr/wwA7/8FAPj/8f8OAO//BwD3//3///8GAPX/AwD7//3/BAD+/wEA+v8KAO7/DADx/woA+f8HAPL//v8BAO3/DwDi/wUA9v/8//r/9v/5/wIA9f/6/wEA///1//P/AgD2/wAA+P/t/wUA7/8FAPD/BAD3//z/AAD2/wMA9f8EAPT/EADm/wYAAADw/wYA6/8JAOz/BwD5/+v/GwDq/wsA+/8BAAcA9P8MAO7/DADz/wYA+f8IAPT/CQD0/wAABgD4/wgA9//6/wYA7f8QAOr/EAD4/wQA9f8DAPv/AAABAPH/BwD3/wcA9/8EAPv/+v8DAP3///8BAPP/BgDo/wkA9f/3/wsA8P8FAPz/+v/6//z/AgADAPb/BQD9/wQA/v/7//j/AQD/////BgD6/wMA+v8CAPb/AQD+//n/8v8JAO3/DQD3////+f8GAPH/CgDw/wMA/P/8/wsA5v8TAN3/EADn/wYA9//y/wYA4v8XAO7/BgD5//z/AgD6/wAA+v/7//3/AADy/wIA9v/5/wQA8//6////7v8AAO3/AAD1//v//P/3//r/AAD9//7//f/3/wcA9f8EAPv/+P8EAP//9/8EAP//+v8XAPH/CgD+/wUAAAADAAMA/f8JAPn//v8BAAIA+/8FAPf/BAD6/wAAAAD0/wgA7/8BAP7//v/5//3/+v/8/wsA9////wAA8/8QAPf/BQD4//7//f///woA8v8PAPH/CAD4/wEACQD7/wMA/P8DAAEACgD8/wgA+f8HAPb/DQDr/wsA+f/v/xAA5/8NAPf//v/8//r////7/wcA9v/9/wsA6f8TAO3/CAADAPT/AgD8//L/DQDs/wsA8P8AAPf/9f/4//v/9//+//b/AAD5//3/9f8BAPn/AQDv/wEA6/8JAOv//f/8/+r/DgDn/xEA4P8SAOf/CgDx//r////2//n//f////7/AQD7//3/+//2/wMA+////wEA8f8PAPH/DgDx/xEA7f8QAPj/CgD3/xAA9/8PAAEAAwAJAAgA/f8MAP//BgD8/wQABQD9/wsA9v8CAAMA+f8JAAAABQAAAAEA/f8MAOT/FwDi/xMA7/8DAPb/9P8FAOz/BADz//3/9f/+////+v/6//f//v/y/wYA6v8QAOf/DADx/wYA/P8BAAEA+f8BAPv/+f/7//j/8////+7/7f8GAOf/EQDg/xMA9P/+//z/+v/9///////4/wEABAD1/wMA+P/4/wkA5f8HAPn/+//9//n//v/7//b////2/wkA6v8OAPL/BAD7//r/AwD5//3/AAABAPv/9//u/wwA7P8RAO3//P/9//r/9/8AAO7/DwDp/xUA7P8OAO3/DAD3//j/DQDi/xIA7f8AAAEA+f/8////8/8EAPD/CwD0/wcA/f/5/xAA9f8LAAEA9/8PAPX/AQABAPr/9P8OAOb/FADk/wIA9v/9//z/9v8FAO//AAAFAOb/EQDr/wMAAwDy/wIABQDy/xEA5/8KAO7/BgD1/wMA8//9//v/9v/6//r//v/9//n/+f/1/wkA8v8JAO3//f8BAPT/AwDy//3//P/m/xAA5P8IAOn/9v/5//X/AwDl/xQA4/8KAPf/BAD///r/BQD0/wwA7/8HAO3/BgD0//3//v/8//v/+P8EAOv/DwDx////+P8DAPn//v///+3/FADl/wwA5f8NAO7/CgDo/wMA7P8IAOn//f/1//n//f/z/wAA8/8EAPb/BQD1/wYA8//+/wQA5v8WAOL/CQDr/wcA8f8GAPb//v8CAPD/CgDr/wsA+f8CAPf/BAAFAO//FADg/xgA7P8GAPT/CwD1/wAABAD2/w0A9f8GAPb/BAACAPH/DAD3/////f8DAPz/DADz/wYABAD8/wMA8v/8//3/8v////P//f/p//r//v/5/wMA8f8BAPz/9f/0/wAA7//+////8P/+//P/9v/+//r/8P8BAOv/BgDx//r//v/0//7/9//8//f/CQDx/wIAAQDy/wsA+v/+/wUA8v8HAPj//P/8/wAA9f8EAO3/AwABAPT/EgDv/xUA5v8PAPT//P/7//P/CgDz/wcA8P8AAAUA7/8IAO7///8BAPj/CQDy/wUA/P/4/w8A7P8WAOv/EAD7/wAA/f///wEA9/////P/CgDq/wgA4/8NAPX/CQD3/wEA/P/6/////P/3/woA7v8OAOr/EgDv////AADz/wgA9f/0/woA8P8KAPj/DwDn/xMA5f8WAPT/BwD6/wAA/v8IAPL/DQD0/wMAAAAAAAMA+P8CAPn/AAAAAPT/BQD2/wIA8P8IAOn/DADr/wEA+P////b////v/wYA/P/w/w8A5f8bANz/DQDr/w4A4/8cAOX/EADr/wIAAwD1/w0A5f8MAOz/CgDr/wwA9P/7//7/7P8UAOr/EQDm/xQA5f8VAOz/DADy//z/CADp/xIA3v8cAOH/DADy//3/CAD0////8/8GAOz/EQDl/xIA5f8VAPH/CwD+//L/EwDo/w4A6/8DAPD/AQDs/wkA6//0/wEA5f8EAPP////v/wwA4P8PAOX/DgDq/wkA+v8BAPn/BgD3/wcA+P8FAP//+P8KAOz/FwDk/xcA5/8WAOX/EADz/wwA8f8NAP7/+/8IAOr/FADo/woA+//5//3/BgDs/xYA5f8TAO//EQDz/wYA+P8EAP7/+v8CAPX/CQDx/wQAAQDy/xEA3v8QAO//AgDw////6/8HAOr/BQDu//v/+v/5//r/+P8KAPL/DQD9/wAAEQD1/w8A+P8JAPT//v8CAPz/8/8GAOT/EADq//7/BQDs/wcA6P8LAOf/CQDu/wQA+v8BAPv//P8BAPz/CwD3////+P8AAPj/+P/8//L/9v/7//D//v/8/+z/DQDi/wsA8v/4/wwA9P/7//7/+P/5//D//v/5//P/DgDp/wgA8/8EAPf/+P8JAPP/EgD2/wYA+/8LAPb/+/8DAO//DgDs//z/AADw/wcA6/8JAPD//v/2//v/AgABAAEAAgD2/wYAAgD4/w0A7f8GAPz/8v8QAOf/DADv/wEA/P/5//z//P/4//3/9P8MAOv/EQDy/w0A9/8IAAMA+v8MAPn/BgD9/woA/P/+/wIA8P8OAO7/CwDw//7/9//5/wYA8P/+//r/9f8HAA==\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 112\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 112_003_0107\n", + "Original Audio: 112_003_0107.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 112_003_0107_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 118\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 118_003_0522\n", + "Original Audio: 118_003_0522.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 118_003_0522_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 118_003_0836\n", + "Original Audio: 118_003_0836.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 118_003_0836_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 118_003_1091\n", + "Original Audio: 118_003_1091.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 118_003_1091_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 121\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 121_003_0527\n", + "Original Audio: 121_003_0527.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,UklGRlo3AwBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAATElTVDQAAABJTkZPSUNNVBEAAABQcm9jZXNzZWQgYnkgU29YAABJU0ZUDgAAAExhdmY2MC4xNi4xMDAAZGF0Yfo2AwD///7//v/+////AAD//wAAAAD//wAAAAACAAIAAgACAAMAAgADAAIAAQAAAAAA///+//7//v/////////+/////v/+//////////////8AAP/////+//7///8BAAAAAgAAAAEAAAAAAAEAAQABAAAAAAAAAAAAAQABAAIAAgADAAMAAgABAAAAAQAAAAAAAAAAAAAA/////////v/9//3//f/9//3//f/+//3//v/9//z//f/9//3//f/+//3//v/+////AAACAAIAAQACAAEAAAAAAP////////7///////7//////////f//////AAABAAAAAAD//wAA//8BAAIAAQACAAIAAwAEAAMAAgACAAEAAQABAAEAAQABAAIAAQABAAIAAgACAAIAAgACAAIAAgABAAIAAQAAAAAA//////7//v/+/////v/+//7//////wAAAQAAAAEAAAABAAEAAAAAAP///v/+//3//v////7//v/9//3//v/9//7//v///wAAAQABAAIAAwACAAIAAQABAAEA//8AAP////8AAP///v/9//3//v////7/////////AAD///7//v/+//7//v////7//v/+//z//P/8//z//f/+////AAAAAAAA//8AAAEAAgADAAQABAAEAAMABAAEAAEAAQAAAAAAAQACAAEAAgACAAIAAwACAAIAAQABAAEAAQABAAIAAwACAAQABAADAAIAAgABAAAAAAAAAAAAAAABAAEA//8AAAEAAgABAP//AQABAAAA////////AAABAAAA//////////////7/AAD//////////////v/+//7//v/9//z//f/7//v//P/9//z//f/9//z//P/8//3//v/+//7////////////+/wAA//8AAAAAAQAAAP//AQABAP//AQAAAAAAAAAAAAEAAwACAAIAAwACAAEAAgABAAEAAQAAAAAAAQACAAIAAwACAAEAAgADAAMAAgACAAIAAwAEAAMABAADAAIAAgABAAEAAQABAAAA/////wAAAAABAAIAAgACAAEAAQACAAEAAAACAAEAAgABAAEAAAD/////AAAAAAEAAQAAAAEAAQACAAAAAAABAAMAAgABAAEAAQABAAAAAAD///////////7//f/9//v/+//8//z//P/9//3//f/+//3//f/+//7/////////AAAAAAEAAAABAAEAAQAAAAAAAAAAAAEA//8AAAAAAAAAAAAAAAACAAIAAgACAAIAAgABAAMAAwACAAEAAQAAAAIAAQABAAIAAgACAAEAAgADAAIAAgABAAEAAQABAAEAAQACAAIAAgACAAIAAQABAAAA/////wAAAAAAAAEAAQAAAAEAAgACAAIAAwADAAMAAwADAAMABAADAAIAAwABAAIAAgAAAP///////wEAAQACAAEAAQABAAAA/////////v//////AAAAAAAA///+//3//P/8//z//f/8//3//f/+//7///8BAAEAAAD//////v/+////AAD////////+//7///////7//////wEAAQAAAAAAAAABAAIAAQACAAIAAgACAAIAAwACAAIAAgADAAIAAgACAAIAAQADAAIAAAACAAEAAQAAAAEAAQABAP/////+/////v////////8AAAAAAQABAP///////////////wAA//8AAAEAAgADAAQAAwAEAAMAAgAEAAQAAwACAAIAAgACAAIAAAAAAP///v/9//z//f/8//z//f/+//z/+//9//z//f/+//3//////wEAAAABAP/////+//3//P/9//3//P/9//7///8AAAAAAAAAAP///v/9//7//v/+/wAAAAAAAAEAAgACAAIAAgABAAAAAAAAAP//AAD+//7///8AAAAAAAABAAAAAAD//////v///////v////7//f////7//f/9///////+/////////////////wAAAQACAAMAAgADAAMABAACAAMAAgACAAIAAgACAAAAAQABAAAAAAAAAP/////+/wAAAQD//wAA//8AAP7//v/+//3//v/+//7//P/9//3//P/8//z//f/9//3//f/+//3///8AAAAAAQAAAAEACwAKAAkABwAFAAcABQAGAAYABQAFAAMAAAAAAP///f/9//////8AAP///v/9//7//v/9////AAAAAP///v/9/////v/9////AAD+/////v//////////////AQAAAAEAAQADAAIAAQACAAIAAgABAAAAAAABAP/////+////AQAAAP///v/9//z//f/8//z//v///wAA/////wAA///+//7/AAD///////8BAAIAAQABAAIAAgABAAMABAAEAAUABQAGAAYABAAEAAQABAADAAMABAAFAAIAAgABAAAAAAABAAEAAgABAAAAAAD///3//f/9////AQAAAAEAAgADAAEAAQAAAP///f///wAAAAAAAAAA/v/+//////8AAAMABQAEAAIAAgAAAAAAAQABAAAA/////wAAAQABAAEAAgACAAEAAQABAAEA/v//////AQAAAP///v/////////+/wIAAAD///7//v/+//3//P/8////AAD+//7////9//z//f/+/////v///wEA//8AAAAAAQAAAAEAAQAAAP///v8AAAEAAAACAAEAAAAAAP7/AAAAAAIAAwACAAMAAwADAAIAAQABAAAAAAAAAP//AAAAAAAAAQACAAEA//////3//f/8//z//f/+//3//P/+//7//v/+//7/AAABAAAAAQACAAMABAADAAQABQAEAAMAAwADAAEAAgABAAEAAQACAAEAAQABAAEAAgACAAAA///+////AAABAAIAAQABAAAAAAAAAP/////+//3//v////7////+//7//f/+//7//v/+//7//P/7//3//v/8//r/+f/6//n/+f/6//r/+//7//7/AAABAAEAAAD+/////////wAAAAAAAAEAAQABAP/////9//3//////wEAAAAAAAEA/f/+//7//////////v/+//3//v/+//7////+//////8AAAEAAQACAAEAAAABAAEAAwACAAMAAgACAAEAAQAAAAEAAQAAAP///////wEAAgACAAIAAgADAAMAAgABAAAAAAABAAMABAAEAAMABQAGAAYABgAFAAUAAwACAAMAAgACAAAAAAABAAEAAQAAAP7//f/8//z/+v/7//3//f/9//7//v/8//z//f/+/wAAAQACAAEAAAABAP//AAABAAIAAwACAAIAAgACAAAA/v/9//3//v////////////3//f/8//z/+v/8//7////+//z/+f/5//n/+v/8//v//v///wAA/////////v8CAAIABAAFAAQAAwAEAAQABAAGAAUABQAGAAYABAAEAAYABQACAAIAAQACAAMAAgACAAIAAAACAAIAAwADAAQABAAEAAMAAgACAAEA//////3//f/+/wAAAQACAAIAAQABAAEA////////AAABAAEAAgABAAAA///9//7//v//////AAABAAEAAAAAAP7//v/9//3//f/+/wAAAAD///7//f/7//v//P/9//3//P/7//v/+v/7//3//v///wAAAAAAAAAAAAD/////AAD/////AAACAAMAAwADAAIAAwADAAIAAgACAAMAAgABAAEAAQABAAAA/v8AAP///v////7///8BAAEAAAD//wAAAAD//wAAAAAAAAAAAAAAAAEAAQADAAMAAgAEAAQABAADAAMAAgABAAEAAAAAAAAAAAABAAAAAQAAAAAA//8AAAIAAAD//////f/9//3//v///////////wAA///+///////+///////+//7//v/+//3///8AAAIAAwABAAEAAAD///7//v///wAAAQACAAMAAwADAAMABAADAAQAAwADAAQABAAEAAMAAQAAAP///v/9//z//P/7//z//P/8//z//f/9//7//////wAA//////////8AAAAAAgABAAIAAgADAAMAAQACAAIAAgABAAEAAQACAAEAAQABAAAAAAD///3//P/7//z//f/8//7//v////7//v///wAAAQABAAIAAQADAAIAAgACAAIAAgADAAUABQAGAAYABQACAAIAAAABAAIAAgADAAIABAAEAAMAAwACAAIAAQABAAAAAAAAAAAA//////7///////7//f/9//7///8AAP7//v//////AAAAAAEAAgABAAIAAwACAAEAAAAAAP///v/9//3//f/+/////v///wEAAAAAAAEAAQACAAIAAAABAP///v/+//////8AAAAA/v/9//3//P/8//7//////wAAAQADAAIAAgACAAAAAAD//wAAAQABAAIAAgACAAIAAgABAAAA//8AAAEAAgADAAQABAAEAAMAAgABAAAAAQAAAP//AAD//////v/9//3//v/9//7//////wAAAAD///////8AAP//AAACAAEAAgACAAEAAAD+//3/+//6//v//v8AAAIAAwADAAMAAgACAAEAAgABAAIAAwADAAMAAgABAP///v///////f///wAAAQACAAAA///9//3//f/+//3//v///////f/+//7//v///wAAAAD///7///////7//v///////f8AAAEAAQABAAEAAgACAAEAAAD//////v/+//7////+////AAABAAEAAgADAAIAAgABAAIAAwACAAEAAQAAAP/////+//7//f/8//3///////7//v/+////AAAAAAAAAAAAAP//AAD+//3//f/9//z//P/9//3//f/9//3//f/9//7///8AAAEAAgABAAIAAQABAAIAAQADAAMABQAEAAQABAACAAEAAQAAAP/////+////AAAAAAAAAgACAAMAAwACAAIAAAD///3//f/9//7/AAD///7///////7//f/8//3//f///////////wEAAQACAAEAAQABAAIABAADAAMAAwABAAAAAQACAAEAAAD///7////+/wEAAQACAAQABAAEAAIAAQACAAEAAAAAAAEA//////7////+/wEAAQACAAIAAQAAAAAA//////////8AAAEAAwACAAEAAAD//wAA//////7//v/+//7/AAD/////AAAAAAAAAAACAAMABAAEAAQAAwADAAMAAgABAAAA//////7//v/9//3//v///wAAAQABAAEA///9//v/+v/6//v//P/9//7/AAAAAAIAAAAAAAAA/////wEAAwADAAIAAwACAAEAAQD///////8AAAAA//8AAAAA//8BAAIAAwACAAIAAgABAAEAAgACAAIAAgAAAAEAAwAAAAIAAQD//wAA//8BAAMABAAEAAUABgAGAAYABQADAAMAAgABAAIAAQADAAQABAABAAEAAAD//////f/9//3//f/+//////////////8BAAIAAgACAAMAAgADAAMAAgACAAAAAAD+//7//v////7//v/+//7//v/+//3//v/+//7//v///////v/+//z//P/9//v/+//8//z/+//7//v/+v/6//z//f///////////////////wAAAAD/////AAABAAIAAwACAAIAAwADAAQABAAFAAQAAwACAAMAAgADAAMAAgADAAMAAwACAAIAAwACAAEAAgABAAIAAwACAAMAAwACAAIAAgABAAIAAQABAAEAAgAEAAQABQAFAAQABAADAAEAAgAAAAEAAAD+//////8AAAAAAAAAAP//AAAAAAAAAQACAAIABAADAAMAAwABAAEAAAAAAAIAAgACAAEAAAAAAP///v/+//3//v/+//z//P/8//v//P/7//z/+//8//z/+//9//z//P/8//z/+//7//3//f/+//7//v/+//3//f/9//7//////wEAAQACAAIAAQABAAIAAgAEAAQABQAEAAIAAQAAAP7//v/+//7//v/9//3//f/+//7///8AAAAAAQABAAEAAAD+//////8AAAMAAwACAAIAAgACAAEAAgADAAMAAwAEAAQABAAEAAUABAAEAAUABQADAAMAAgAAAAEAAQABAAEAAgABAAEAAQACAAEAAAD//////v/+//3//v/+/wAAAAAAAP/////+////AAD///7////+/////f/9//z//f/9//3///8BAP///f/8//v/+//6//r/+f/7//v//P/6//r/+v/6//j/+P/5//v/+v/8//z//P/+//7////+//7//////wAAAQABAP///v8AAAAA//8AAAEAAQABAAEAAAACAAMABAAFAAQAAwADAAIAAwAEAAIAAgADAAAAAAD///7////+//3///8AAP//AQAAAAAAAAACAAEAAgACAAIAAgADAAIAAwADAAEAAgACAAIAAwADAAQABAADAAQABAADAAMAAgABAAEAAQABAP///v////7///8BAAEAAAD//wAA/v8AAP///v/9//3//P/8//v//f/8//z/+//7//v/+v/7//z//f/9//v/+//8//3//v////////8AAAAAAAAAAP///////wAAAAAAAAEAAQABAAAA///+//3//f/+////AQABAAIAAAD//////v/+//////8AAAEAAAABAAEAAQACAAAAAAAAAAEAAwAEAAMABQAGAAYABgAHAAgABwAGAAUABwAFAAUABAAEAAIAAwACAAAAAQABAAEAAAAAAAEAAQABAAAAAgADAAMAAwADAAIAAwABAAAA///+/////////////////////v/+//7//////wIAAQACAAAAAAAAAP///v///////f/9//3//v/+////////////AQABAAIAAgACAAIAAAAAAAEA//8AAP//////////AQACAAIAAgADAAUABQAGAAcACAAIAAsAEQARAAwACAAIAAgABwAEAAEA/v////7//f/7//T/+f/+/////P/6//z/CAAaABsAEwAGAPr///8EAAEABAAGAPz/8f/m/+r/7f/x//3/CwAZAA8AEQATAA4ADgAeAAYAEgAHAEIABgDV/q//OgBv/8n/FABj/8f/rQDu/xoAiQDY/9L/gAC7AIEAhQApAK//u//7/yEAEAASANX/pf+x/6j/+P/N/53/7//8/+3/EAAqAPD/MQA4APT/FAAnABwACQAsAEsAGwDn/93/2f/m////8//d/+b/4v/i/+P/+/8IAOb/+P8dACQAFgAXAAkABAAVABIAEwAUAA0A+/8ZACcA/v/1/wkA+P/l//v/8f/g/+j/5v/u//H/3f/a/+v/BQAPAA0AGgAgAPX/+/80ADQAFwACABgAAwDl////DwD7/9X/5f/0/+H/5f/w/+z/9P/4//z/AQDz/wAAEQAVAB0ADwAHABUAGgAPAAYADQAEAPX/+v8LAPX/1f/3//T/3v/n//T/8f/m//v//P8CAPz/+/8fABAAAQAPABkA/v/9/ysAFgD5/wcA/P/4/xAAAgD8/w4A1f/b/xMA+v/2/wcA7//m/wEAAQADAAoA9//p//r/DAD7//H/CwD4/+H//v8KAAoA9v/v/wsADAD2/wAAEAD//wAA/v/8/wsACQD5/+7/AQAMAP//9/8CAAMA8v8IAAoA7f8CAP7/9f8JAAsA+P/y/wgADAD6//r/+v///w0A/f/0/wEAAAACAPv///8QAPX/8P8FAAgADAD+//T/AAAVAAkA+v/+//r/AwAHAAYA/P/5/wIA/v8UAAUA7f8IAAYA/v8CAP//BAAAAAQAAgD0/wQABgADAAoA4//o/xAAAAD5//X/6/8DAAsA9P/x/wMABgD5/wUAAQD5/wUACAAFAP//BQD///v/DgAAAPz/CQD6/wIACwD7/wEABQD9//7//P///wsACgD1//P/AgADAAYA/P8KABAA9f/9/wUAAQAJAAcA///z//X/CgALAAIAAwAGAAcA+P/6/woA/v/1//r/AwADAAMACgD7//n/AADy/+7/BAAPAAUASABbAMr/yf/K/8H/3//UABYBS/+z/+P/a/8KAFIAeQDy/2v/qf9gAP//6v8zAHf/mf9CAIoAXAD//+r/ov/R/z8AGgALAB4AFQD7//X/CwDh//X////6/wgA6f94ABUAcP8NAEQAAQATAB0AAwDy/xYAAwDE/zUAIwDP//T/JgAKAO3/9v/X//n/FgAvAAQA7/8pABEA3v/q/zUAMQD///z/CAAmAPL/1P/2/wEAEQAbACIADQDP/8v/9//1/wgADwD+//P/9v/b/+D/CAD6/w8ABwDl/9j/9v8JAPv/CwAJAPP/4/8AAAsAHgAlANX/8P8dAPL/+f8LAAcACAALAN//3f8OAAgA9//o/9r/2v8DAA4A1//w/yMA6/+8/+7/AQD2/wcA6//2//v/2v/8/yoAMQAGAPL/EAALAAkAPgBEABoAHgAoABkAJAA+AD0AMwA5ACAAHQAtABEAIAA3AC0AIQAUAAoADQAjACIAHAAMAO//+/8hABcABQANAAsAAQDz//z/DQAEAPf/8//t//H/AAD6/+n/4//z//3/4v/U//7//v/X/9f/4v/v/+n/4f/n/+v/4f/X/9//7//0/9v/1f/h//L/6f/j//D/6//r/9z/4//y/+j/7f/x/+T/5v/z/+P/3v8BAPz/7P/1/+P/7f/1/+X//f8NAPT/6v/1//T///8NAA0A/////wsABQAGAAwAEgAPAAQACgAKABEADgAFABIABgD//w8ACQAEAAYA/f8FAAcA/f8GAPf/9v8EAP7/AAD8//v/AgD+//z/AQD+/wEABgADAAMABQD//wEADAAIAAQABwABAP7/BgAGAAcAAQD6/wUABwD7//v//f/5//3////6//b/9f/2//v////9//r/+v/0//b/AAD///v/+//2/+3/8/8BAAAA9//1//b/+v/7//v/AAD9//r//f/5//b/+f///wUABAD9//z//v/+//7//P/6////AgD6//3/AAD+/wAA///2//n/AQABAPv//f/8//v//f/+/wMAAAD//wIAAwD7////BAD7//j//v8EAAIAAQAAAAAAAQACAAsACQD///n//v8HAAkACgADAPr/AgAGAAMABAAEAAEAAQACAAAAAgACAAEA/v/9/wAAAAD+//z/AAD+//n//v////7//v8AAAIAAAD8//v/AAAAAAAAAwABAAAAAgAAAP3/AwAIAAYABAAEAAEAAwAIAAUAAwABAAIAAwACAAAABAAIAAQA/f/9/wAA/f/+/wIAAAD5//z/+//8//z/+/8CAAMA/f/9/wMAAQD8//3//v/8//v//f8AAP7//P/+/wEAAgABAAgADAADAAMABgAFAAYACQAFAAQABQABAP7//P/8//3//P/7//z/AAACAAQABwADAPz/AQAHAAQA//8AAPv/+P/4//v/AQAAAPj/+f/+/wEABgAFAAMAAQD//wAAAgADAAAA/f/9//z//P8BAAMAAQD//wIABwAFAP//AgAJAAcACAAHAAAA//8BAP3/BQAOAAUAAQADAAMABgALAAoAAwD+//v///8DAAAAAQAHAAMA+/8AAAYACQALAAMAAAADAAMABAAIAAkAAgD7////BQACAP//AwACAP///v8AAAAAAgAEAAIA/f/9/wIABgAHAAMAAAD//wEAAQD///3/AAADAAAA/f/6//r//P/9//7//f/5//v/AAAAAAIAAQD+//z/+f/8/wAAAQAAAP7/+/8AAAEAAQACAAAAAgACAAAAAQABAAIAAwD9//v//v8AAAEAAgABAP7//f8BAAMAAQADAAMAAgD9//z//v8BAAMAAQABAP7/+//9/wQABgACAP7//f///wEAAwACAAUABAADAAkACgAGAAcACQAHAAYABQAEAAYABQAEAAYABAABAAAAAgABAPv/+v8DAAgAAgD9//7/AwADAP7/AAAIAAcAAgADAAQAAQD9//3/BAAFAP7/+v/7//3/AAD///7/AAACAAAA///+////AQD/////AQACAP3/+/8AAAgAAQD2//v/BAADAAAA/f/8//z/+f/7/wQAAQD3//f//P8AAP3/+f/7//z////+//z/+/8AAAQA/f/8//v/+P/7//7//f///////P8BAAYABQD7//n/BgAKAP//+P/8////+v/8//7/BAADAAEAAwADAAEA/P8AAAcABQD+//7/BQANAAoABgAGAAAABgD9//7/BwAHAAkA///8/wEA//8FAAQABAAEAAMAAQD//wEAAgADAP//AgD/////BgADAAQA///8//3//v8EAP//+/8GAAkABgD8//n/+//6/wIABAADAAIAAgD+//X/+/8BAP//+v/7/wEAAAD9//7/AwABAP3/+//6/wEABgADAP7//P/8//j/+v/9/wEA/P/7//3//P/9/wAA/f/6////BgAEAPr/+f/6//v//f8BAAAAAgAAAPv/+//9/wMAAgD///7//f/9/wEABAADAAIAAQABAAAAAQAFAAIAAwAGAAMA/v/9//7/BQAIAAQA//8BAAUAAwAAAAMAAwADAAIAAQAEAAEAAgADAAAAAQD///z/AAADAAQABAABAP7/AQAFAAQAAwAEAAMAAAD9//v//v8CAAMAAgD///7/AQAAAP7/AwAFAAMAAgD9//r/AAAFAAIAAAD9//3/AQADAP7//v8CAP//+//7//z//v/9////AAAAAP7/AgAGAAMABAAJAAcAAgACAAEAAQD+//r/+f/4//X/9P/1//j/+v/9/wEAAQAAAAMABQAFAAYABAAFAAcABQAEAAUABAAEAAIA//8AAAIAAQAAAAEA/f/9//z/+v/6//z////+////AwADAAAAAQADAAQAAgACAAMABQACAAAA//8CAAMA/v///wMAAwAAAPj/+v8EAAEA/f8CAAUAAQD+//7//v8EAAUAAQAAAP3//v8AAP3/AAAGAAIA/f///wMABQAEAAAA/v8DAAIA//8CAAUABAABAPz//f8FAAUAAgD9//z//P8BAAIAAgABAP3/+//9/wMABgAFAAYAAgD//////f/7////AAD//wAAAQD9//r/AQAKAAgABQAKAAwABwADAAQABwAFAP////8DAAEA+//3//f/+v8CAAcABQADAAIAAQD//wEAAQABAAIA+v/3//r/8//x//b//f8EAAkACwAMAAsABQACAAIA/f/8//z/9//y//D/9P/1//r/AAACAAUABgAFAAgACAAIAAYAAQAAAAUABgACAAQABQD+//3/+//7//7//v/8//7////7//r//v8CAAUAAgD//wIABQAJAAoADAAOAAkA/P/3//v////9//j/9v/1//f/9/8DAAwACQAIAAQA//8FAAkACgALAAcAAgD+//z//f///wAA+//+//7/+f///wgABgD//wEAAQAEAAsACgACAAAABgAKAAMA9P/u//j//P/9/wMACQAGAP3/+////wIAAQAFAAoAAwD1//b//P/8//z/+f/2//v/BAAJAAYA///9/wAA/f/7/wcADQAJAAYAAAD2//j//v/7/wEACwACAPr/+//7/wAAAwD//wAA///7/wUAFQAOAAAA8//r//T//v8BAAkABgD8//P/7v/y/wIAEAARAAUA9v/x//n/AQALABIACAD5//b//P8EAAgAAwAAAAMAAAD8/wEAAwADAAIA+//2//r///8JAAwABQADAP3/9P8AABEADQD///7/BwAHAPb/9f8GAAcABQAHAPj/7f/6/wUACwALAAQA/f/0//X/AwAAAPb/+v/+/wIAAgACAAUA/P/3/////P8BAAsABwAGAAYA+P/x/wAADAAIAAEAAQAAAP3/+v/7//v/AAAGAAkABAD2//n/BAD8//r/AgD+////BQAIAAIA+v/6/wMACAAAAP3/AgABAPf/+P/4//n//P8AAAQABgACAP7//f/2//j///8BAAMAAgAAAAMAAAD6//f/+f///wEA/v8AAAYAAwD5//3/BwALAAgABgADAAAA/v8CAAYABwAKAAcA/f/+/wsACgAEAAMAAgACAAAA//8CAAcABQABAAAA/v////3//f8BAAEA/v/8//z/+//6//3/AgD///7//P/7//3//v/+////AAAAAAIA///9//3/+//6//z//f8AAAMAAAAAAAAA///+//7///8AAP7//v8AAAIAAwAFAAUAAwADAAMAAwACAP//+//8//7///////7/AAAAAP///v/+//7//v///////f/+/wAAAQACAAIAAAD//////f8AAAEAAAD+//7//f///wAAAAACAAIAAAABAAQAAgACAAUABAADAAQAAwACAAIAAgAAAP///v8BAAUAAgABAAEA///+/////v////////8AAAIAAgACAAIAAgABAAAAAAAAAAMAAwAEAAMAAwACAAUAAwAAAAEAAQD///7//v////z/+//+/wEAAAD9//3/AAAAAAAA/v/9/wAAAQD///7//////wIAAQD//wAA/v/7//3//v/+//7//v/+/wAAAAD//wAAAAD+//7///8AAAAAAAD///3/AAD///7//v/+/wAA/f/7////AgD9//3/AwAFAAIAAwACAAEABQAGAAIA//8AAAIABAACAAEAAwAAAP////8CAAMAAQABAAAAAwADAAIAAwAEAAIAAQD/////AQACAAAA//8AAAAAAAAAAAMABgAFAAUAAwACAAIAAgAFAAMAAQACAAMAAQABAAIAAgABAP///P/+//////////7/+//8/wAAAQABAAAA//////////8AAAMAAwADAAUAAgD+////AgADAAMAAgABAAEA///9//z/+//8//z/AAACAP///P/8//7//P/7////AwAEAAAA+v/5//3//f/6//z///8GAAcAAwD8//r///8AAPv//f8FAAUA///2//n/AgADAP//AAACAAMABgAHAAMAAQAEAAAA+f8EAAgA//8AAAcABgAFAAAA+f8EAAsAAQAEAAgAAQD9////BgAHAP7//f8EAAIA//8AAAMAAwD7//n////6//3/BQD6//T/AAABAPn/9//9/xAABwDx//n/CAAMAAkABgAGAAIA+v/6/wUACAAHAAIA+P/4//v/AAAIAAAA+//+//T/+f8DAP//BgABAPn/AQAAAPf/AgAKAAUA/v/3/wIACwD6/+7/AwAOAP//9f/2/wwAEQD2//H/BAAMAP//9f/7/wYABQDx/+j//P8OAAQA/P8CAAcAAwD9//v/BgAJAAkAAADw//T///8JAAoACAANAAoA///7//z/+v/0//b///8BAAcABwAEAAYAAQD8//b/+/8CAPv/+v8DAAoABgAEAP////8IAAQA+f/2//j//f////7//f8BAPn//f8RAAEA9f/5//n/8//y/wgADgAKAAcAAwAHAP7/+f/6//P/+v8BAP3/AQD+//3/BAADAAUABAACAAQA+//4/wQABQD4//b/AQAGAP7//P8QAA0AAwAGAAAA+f/y//b/AgAHAP//AAAJAPj/+f/2//f//P/u//3/CgAGAAAA/f/3/+///P8GAAoABAACABIAAwDs//z/DgAIAO7/7f8KABMAAADq//v/CgD///j/AgASAAoA8f/t/wsAFQAEAPj/9P/9/wsAEQDx/+//DAAGAPb//f8ZABcA8P/c//L/CwAJAAYABQAPAAoA5f/k/wgAFQAMAPn/8/8GAP3/9v8EAP3//v/0//v/DwAGAAsAEQADAO7/8P/+/woAEgAOABMABADl/+D/8f8IAAUAAQAEAP//+P/z//v//P/5//r/+v/9/wcAEQALAP//AAD4//T/AQAIAAoACQAJAAQAAQADAPf/8v8CAP7/+P8EAAcAAAD5//v//v/x/+r/+f8GAAMABQAJAPv/8//2//n/CgALAAUAAgABAAUA+v8BAAoABQABAAEADAADAPv/BgAQAAQA+v8HAAgA/f/y//v//P/y/wUAFAANAAQABQAPAAkA7//x/wQADQASAAUA+/8JAAsA///9//7/BQD9//j/CAAJAAEA+P/1//7/BQD///X/+//9/wIABAD2//3/EQAKAPz/BQAPABIABgACAAEA/f/7/+//8v/3//X/CAANAAAAAwAKAPf/7f/+/wQACgAKAAQA/P/o/+T/+P/+//r///8NAAoA/P/6////AAABAPj/7//8/////P///wYABwAIAAcADgAUABUAEwD///f/BwARAP7/6v/0/wQA+//z//3/JAAqAP3/9v8FAA8ADQD0/9X/5v8BAPb/8v8EABgAFgAWAAkAAQAVAAgACAAPAAIA7P/O/9H/9f8VABoAGQAeABUA8f/r//3/BwAQAP7/7//0/xMAEwACAA8A9f8CAAgA9f8aABUA9v/b/8P/9P8QAAkA8//s/yAAEADy//r/FQALAAUA/v/V//P/GwAHAOb/8v8JAAQA6//o/wgAHQARAAgACAAKAPP/3v/x/xIAHgD9//z/FAAdAAkA2v/j/xYAJAARAAUABAAOAAkA4P/c//3/EwAVAAwABwAZAPb/0P/l//3/GAAAAPr/GgD8/+P/7//t//P/+P8bACAA/f8DAP3/9f/v//n/FgAYABAACgD+/+f/6v8HAAsA8f/0//L//f8YAAoA9//8/wMA+v/w//r/DgAJAPr/8v/0/+z/5v8MACUAJAApABkA9v/b/9v/7//n/9P/y//g/+//6f8UADkANAA/AE4ASgAcAP//AwD5/w4ADgAGAAEAzv/H/9v/2//S/9j/1//B/9X/AgAlADYAQgBEAEEAPAAsADkALAD8/9T/2//h/8v/0v/b/+T/4P/Y/woACADF/63/ov+s/9r/LABlAHUAVABOAGEAJgAgACAA6f/O/8r/4P/z/9z/qf+D/4P/mP/L/wAA6//V//T/FQA3AEQAVQBxAIkAiABZAGUAZwAcAMX/c/+E/6//lv+0/+j/+v/Z/5X/sP/p/wAAFwA6AHEAdQBbAEgAPgBCAC0AKwAfABIAMQApABEAGQAJAOP/rv+O/53/xf/e/9//7f8AABYAFgAcADAASwBnAEMAMAAIALn/of+T/4z/lf/S/xIACwABAMj/wv/O/87/7P/T/9P/z//A/+//KgBKAGMAOgAtAAQAoP98/1z/x//J/7j/TABGAH0AewBIAGMANwBCAOn/+f/2/+v/fwAsAPD/AwASAMD/ZP+M/27/mf/Y//H/9/8HADQARAAdAOf/FABSAIsAqgC2AKEAdQBwAEIA3f+g/5j/tv8MAFUAjQCVAGQAYwBHAPz/DwAhAA4Ay/+Q/57/qf+u/77/8v8bABgACwDf/6//j/9u/1j/Xf9//4//uv/q/wcAJQAjAAcA//8OABoAGAAsAEIALgArAE0AbQCUALMAqwCcAE4A+f/v//3/GQBHAD0AHwAYAAMA2P/c/+T/sP+v/9X/BgAhAOf/tf+e/4H/ov+N/7n/mf86/8T+rP5j//f/xv/4/oL/xP+F/3D/TP9O/z7/pP8oALkALgFvAH3/qv/6/wIAYP+W/wMBXgFpAY8BAAHWAIcASgCNAGgAqQBlAUoBuAD5AG4BigH2AFEASQBIABAA2f9OAHgA0f9S/1H/dv9K/6j/CgDJ/0P/xP6p/sD+Cf9R/1X/ff/Q/5//fv/d//L/wP+1/+D/CgAVABkAOwCSAKsATgAZABUAEABpAI4ATgAgADsAYQBsALUA4ACUAGkAfgBsACgA9//F/7n/FQBFABwA8//0/zMAdQA4AMT/lP/C//f/y/9T/0L/mf/E/+T////Y/3X/af+J/2H/Qv+E/7X/xP/b/+//HQAoABcAUgB4ACQA7P/7////BwAXACAAMAAWAPb/CgAeAM//hP+f/9j/+/8TACcANABJAGMAbwBFABgAGAAuAEQAXgBHAAUAyP+v/5n/lv+T/4n/rf+b/2L/cv+q/7z/zv/j//L///8KAAcA9f/e/+3/HAA8AE8ASQApACAAHQAHACIAQgA7ADQALgAWABgAPgBVAFsAWwBuAIUAfQBxAGoAaQBvAGgATwBNAFAAMAAbABYAEAAUABMABwABAPj/7P/o/9j/yf/P/9H/1f/Q/6r/kf+g/6L/mP+o/67/s/+4/67/pf+o/6n/m/+m/7T/yv/m/+D/3f/l/+T/1//W/+//CwAEAP3/AAAIABkACgD7/xYAJQAaABQADgD+//f//P8EACIAMQAtACwAKgAgABgADwAAAAcAGwAmAB0ADwAIAAgABAD7//j///8HAAQA/v/0/+r/5v/s/wMA+v/q//P/9f/w/+v/7P/u//L//P8GAA0ACAAEAAsABQAKACQANAA2AC8AKgAkABkAGQAeABoAFAAQAA8ACAAAAAUAEwARAAcABgADAAMACgATAB0AHAAQAAIA9f/h/93/5v/o/+H/3f/T/8//3f/j/+j/7f/t/+3/6//m/9v/1f/T/87/2//v//n/9f/q//X//P/w/97/0P/M/8z/zv/b/+b/3P/L/9T/8/8PABwACgAIAAwA/P/9/wEAFQA6ADgAIQAfAD8ATAAyACQALgA3ADoAKwAZABsAGgAkACUAFQATAA8AAgADAAQA+f/y/9b/yv/3/w4AAQAEABEACQD2//X/CAAjAB8A/f/q//H/8//s//n/DAAeABcABgAOABMABAANACUAIgAeABoAEgAaACMAHAAgACMACwD3//r/+//7/wQA/v/z/+v/4f/X/9v/6f8BAAwADAASACUAHwAAAPr/BQD8/+j/6v/s/+z/5P/W/8r/sf+V/5T/pv+2/77/uP/D/93/3P/R/+f///8WAC4APgA+ADgANQAiABUAHAAfAAcA7f/b/+P/BwAOAAMAEQAZAAQAAQAMACEANwAmABgAMAA6ACsAJAAmADsARAAiAP////8DAPH/3P/h/+7/2/+7/8T/5P/x/wEAFwAcABwAEgDt/9r/7/////n//v/3/97/3P/m/+3/9v8AAPr/2v+2/8j///8VAA8AJgBPAGsAYQA7AD4AWQA+ABYAJgBEAEYAQAA1ADgAUQBBAA4A+v/4/+z/9/8DAPH/6v/8//v/5v/z/xoAKQAIANn/wv/A/7n/uf/M/9n/xP+o/6P/r/+4/7T/sf+7/8z/yP/D/9v/8v/7/wEABQAKABYAHQAaAB4AJwAhAB4AMAA9AC8AIgApACMADAACAAwABQD3//n/CAAbACYAKQAqACkAIgAOAPn/7P/j/+X/6P/2//n/9v/r/83/xv/S/83/vf+y/7P/tP+3/8T/xv/e/wkAHgArADIALAAuADEAGQAJABIAFQAPAAoAGgAsACgAJAAxAEAARwA6ACkAHAAmAFkAbQBRAF0AfABwAGAAXQB2AJQAYAARAA0AJgA3ADUAAgAFAC8A9v+4/8j/3f/s/9X/uP/Y/9j/rf+s/5r/jv/G/8b/gP9p/33/ff9o/2L/hv+6/5H/Sf91/7b/sf+k/7v/5v8EAPT/1v/x/ykARwBHAD4ASQBfAE4AJgAOABoAQQBEADEARABVADQAIQAqACwAJgAUAA8AIAAOAOz/6P/w/+//+v////n/9v/c/8b/u//G/9v/xv/M//f/AgDl/9//+f8GAAEA+f8BAAsACQAJAAgAAgD9/wwAEgAXACoALQA0ADMAJQAXABMACgAPAD0ATgAxABUACgALAAQA//8cADEAJgAmAC0ACQDi/+7/9f/o/+z/BwARAP//9//m/9H/1f/p/+r/5v/f/+D/5P/W/8L/yP/a/+n/4P/Y//T/AgDm/9z/4v/1/xAACQD0//f/+//z//P/+v8GAAoA+//n/9r/6f////3/AQAiABcA5v/l/wYAGQAjACIAFgAlADgAOAAoACEARQBDAAwAEQA4ADkANAAxADQAUwAtAOj/GQA/AAsA8f/y/wAAIQAhAPz/6P/n/wAA+P+r/6//AwAXANj/tf/f//P/1f/I/+j/DgADAN//5P/7/+3/2f/s////7//o//P//v8GAP7/8//x//L/9f/6//7/EgAaAAkA/f/n/+v/DAALAP3/EAAbAPv/6P/x//T///8SABkADAAIAAUA8P/T/9L/8//9//7/DQAVABIA/P/j/+z/+f/8/wwAFQAQAAsAAwDx/+//8f/9/x4AIAAWABgAEQAGAPr/7v/o//j/BQD//wcAFwAcAAkA+/8CAP3/9v8CABMAFwALAAQA+v/v//v/BgAJABEAFgANAP7/7f/g/+7//v8EAPv/9f8CAP7/7P/2/woACgADAAcAEAAOAAUACQAdACAABQDv//T/+v/w/+3/7P/x//b/6v/i/+D/6P8BAP3/8f/+/wQA/v/9/xQAMgA1ACoALQAqABAA+f/v//P//f/5//D/6f/P/8T/1v/g/+7/DgAfABkADQADAPf/8v8FACUALgAqACUAJQAWAO//4v/1//f/6v/i//T/CQD5/+X/8P/6//L/+f8NABAACwATABIA+//n/+v/9v/6////EwAfAAkA9P/s/+//6f/q/w0ALAAkACYAIwAIAAsACAD7/w0AJAAYAAIA6f/f/9z/zf/2/xwABwDz//v/9P/k/+n/7f/7/wMAAwACAAEAEQAiAB0ADQD+//L/5f/l/wEAGgAyACcA9v/c/+z/4//R/wMAKQA2ADIABQDm/+r/7//w/wUAIQA1ADYAFQD4//z/+f/q//P/FAAjABIA9P/v//n/5//X/+z/DwArABcA7//7/xkA/P/S/+D/+//t/8P/vP/a/wIAFgAWABsAIwD+/8X/xP/Y/+X/+/8OAAoA8P/k//H/7v/q/wsAJgASAPP/AgAeAPr/3f/+/woABgAmADcAMQAqAAwA+v/7/wgAHAAiACYALQARAOH/3//2//3//f8aAC8AGwAFAOr/1f/q//D/4f/3/xEACADt/9n/5P/v/+b/9P8EAAEAFAAfAAkA8f/3/w4A+v/n/zUAcwA/ABEAFgADAO3/6P8GAD4AUgA7ABMA6v/u/wYA/f/7/xoAIAD9/9j/2P/5/wsADgAOAP//7v/U/8j/4//w/+r/9v8CAPn/2//H/9T/2//Z/+X/8v/5//b/3//P/9D/1v/p//L/+/8IAPr/6P/d/9X/8f8PABEAKAA9AB0A9P/p////FQANABcALgAmAB0AEgAGAAwAEAAZACcAKAAtADEAJAAXAA8ADAAQAAwACQAQABAAAwD///z/6//c/9r/6//7//P/7P/w/+7/9f/5/+3/6f/9/xAADwD9//P/BgAJAPP/6f/0//f/8v/v//X//P/1/+//9P/0//X/CAAUABQAHwAfAB4AIAAXABcAEwAFAPn/+//+//3/9P/0/wMA/v/t/+//AAAHAAUABwANAAkAAwABAPr/8f/w//D/8//y//P//P/3//f/9v/w//T///8CAAgABAD2//z/AAD6/wEACAAMABUADQACAAQA+v/s//H/9v/7/wQACwARABQABwDx/+b/9/8AAPf/AAAWABgABgD5//v/BQADAAEAAAAKAA0A9//o//P/CgAKAAEAAwAPAAoA+P/5/+//4v/t//v/AAAHABEAEQAHAPH/6f/m/+n/+/8SABgACQD+//L/+P/7//z/CQAhABsACQD8/+z/6f/q//v/DQARAAkABwADAPr/8v/u//r/CQARAA0ABwADAAYACAD7//v/EwAaAAIA9f/3//j/+P/1/wAACgADAAgAAwD3//T//f8CABEAFAD+/+7/5//x//7/9f/w/wEAEwANAAYA///9/wAA/v/1/+///v8NAAwA/P/0//D/6//0/xIAJgAgAAoA9P/w/+H/2v/n/xAAKwAbAAcA/v/8/+7/9f8HAA0AEQAQAP3/7v/0//T/6//v/wYAIQAfAA8ACAADAPn/8P/z/wIACAACAAsABQDz//n/BwACAAEADQARAAcA/P/7//n//v/5//f/CwAQAA0AEQAKAAAA9//x/wUADAD//xAAHQAPAPv/5P/o//z/+P8GABsACAAIABcA9v/Z/+b/AgAaAA0ACAAiAA4A2v/p/wEA6//v/w4AKAAeAPn/8/8DAPD/8P8SABYAEgALAPP/4v/k//T/FgATAAYAHQAYAPL/0//b/wUABwDu/wUAEQANAP3/3//u/wUA8v/+/x0ACwDw/+P/+P8MAAAABwAUAP3/+v/r/9z/AwAVABUAGgD///v/BQDk/+n/CQAVABwAEwAJAAIAAAAFAOX/2P8IABQAFwAtAB4AFAAAAND/0v/x//r/FQA9AFIAHADP/9P/7P/v//j/BAATADkAIwD0/+b/2v/w/wMA/f8VACIA9P/8//f/5//s/9f/DQA6ABoABgD4/+n/9P/t/wEAIAAXABkACgDu/+D/4v/6/xEAGgAfAAgA+//4/9v/4P/6/woAIQAWAPb/7f/w/+v/9f8LABkACQD7/wAA8v/z//f/9f////b/9P/8//v/BAARAAwACgACAPH/9P/2//v/CgAUAAkA+v8HABEABAD6/wgADgAKAPv/9f/3//T//v8AAPf/8f/5/woAEwAAAP//CwD5/+r/6f/7/xQAGAAUAAQA4v/e//f/+P/3/w0AHgAWAP7/7f/0//n/7P/l//P/EgAeAA4AAwD0/+j/6f/w/wgAKQArABkAAwDt/+//9v/4/wsAGQAMAPT/6//x/+//9/8GAP///f8MAAoAAQD8//n/9P/v//3/FgATAAMAAAABAAIAAQAHAAgAAgALAAkA9v/z//f/7P/x/wUACgAGAAcABwD4//T/+//6////BQAFAAkADQADAAAAAgD1/+7/9/8CAAMA+P/4/wUAAQD0//b//f/8//v/BgAOAAUA/f////v/+P/6/wEABQD4//H/+//7//f/AQAEAPn/7f/6/wsABwAFABAADgD8/+X/4//z/wIACQAMABUAGgAJAPr//f/5//f/8P/z/wYABgAHAAsA9f/r//P/8v8NAB4AFgAYAAsAAgAEAOr/6f/2/+n/9v8MABMAFgAKAAMAAgD9/wIA/f8AAAwA/f/9/wUA//8GAAAA+v8UABoADQATAA4A9v/c/9b/5//t//L/AwAHAP3/9v///w4ACQALAB0AHAALAAQAAgDy/+P/5f/0//7//v8CABYAGwADAP3/+v/z//3/DQAPAAsABQDy/9z/4//0/wYAGgAfABgACQD3//f/9v/y/wEA/P/v/wAACgAEAP3/AgAWAAwA/v8UAAwA+f///+//4v/y////+v/1/wAACwD7//H/AwAUABoAEQACAAkAEwD//+r/9v8GAP//8f8BABoAEAD2//f/BwAFAP//CQAPAP//8f/2//j/8f/7/xcAHQAHAP//AQAAAP7/AwAVABYAAQDx/+r/6f/v//P/BQAPAP7/9//4//T///8PABQADQD7//j/9//u//T/AgAFAAQA+P/4/wsADAAGAAcABgD7/+7/+/8OAA8ACwAJAAwABQD2//f/AAAAAAIACgAJAAIAAwAGAPX/5v/3/wgACAAEAAYADgAFAPD/8f8BAAEA+/8BAAcABAABAAUACAD7//P/9//1//3/CwARABEABgD0//H//P8DAAwAEgAPAA0ABQD0/+//+v8EAAUAAwD4/+z/8f/1//j//f8EABQAFgAFAPb/9P/8/wIA+v/9/wkACwAFAAcADAAFAAMAEAAVABAABgADAAIA+v/2/+//4v/y/wAA+P/5/wQACwANAAEA9P/+/wgACAAAAP3/BwD//+X/5//4//3/+P/6/w8AGwAPAAMAAwABAPn/8//1//7/BgAMAAMA9f/t/+r/8P/4/wwAIAAiABMA///5//L/5v/w/wQADwAOAAIA///8//b/+f/9/wYACAD///z//v/8//f/+v8BAPv/+v8AAAgADgAJAAQA/f/2//X/+P/+/wEAAwD9//f/9v/1//j//v/+/wIACQALAA8ACAD///v/8P/z//7/BgASAA8A///2//D/9P/8/wYAEQAVAAwA/v/6//j/9f/9/wEABAALAAsABgAAAPv/9v/t/+z/9////wAA//8DAAAAAAADAAQADgAOAP3/+//8//j/+f8AAAkABgD8//r////+////CwANAP7/8//r/+v/9P8AAA0ADwAKAA4ACgD7//b//f8EAP///P8NABUA9f/i//D/9v/4/wAADgAbAA8A+//1//T/9P/4/wEACQAJAAoADwD9/+z/9//3//n/EAAZABoADAD3//X/7//y/wUABwAQABQA+//4//7/6f/m//n/CQAOAAgAFQANAOD/3v/1//L/+P8PABgACQDz/+//8v/p//f/IgAlAAEA/v8MAAEA6v/l//7/DwAAAAQAHgAaAAcAAQD8//H/6f/y/xAAGAANABAAAgDl/+z/BQAHAPn///8VABAA7P/s/wwABQDm/+7/AAAIAAwACgAZABgA/v/1//f//P8LABYADQAAAPr/8//r/+P/6f8CABMAFQAKAPz/BgAJAAMACwAGAAwAFQACAPX/6v/n//v/BgD7//7/DAAJAP7/+//+/wkACQABAAYABAD3//L/8P/4/wUAAgACAAkACgAIAP//9P/7/wEAAQD7//j/+//8//P/8//9//7/AgAQABUAEAAPAAsABAD6//P/8f/0/wIADwAIAP//AAACAAgADAACAAAAFAAcAAAA5//t//v/8v/n//z/CwABAAEACwD9/+n/7P/6/wMAAwALABEABQD3//3//f/6/wUADAAMAAcABAABAPP/6f/9/wsAEAAKAAYADgAHAPX/9f/8/wAADAAWABsADADz//H/+f/w//D/BQAZABkADAAHAAYA8v/m//D/+P/9/w0AGgAXAAMA7f/d/9r/8/8KABMAIwAwABYA7v/U/9b/6v8CABMAEwATAA0A9//g/9v/9f8HAA0AFAAWAA4AAQD5//j//v/8//z/BQAUABkACQABAAQA+P/i/+f/9//+/wQACgARAAQA6//v/wMACQARABQADgAUAAoA8v/u//D/9P8CAAAA8//4//P/6f/q/+X/5v/z//T/+/8UABIABQAGAAYABQD///X//f8NAAgA///4//X/+v/x/+f/8f/9//X/7f/z//7//f/2/wEACAAIAA4AFAAQABEAFgAUAA8ADQAVABMADAAHAAgAAgD2//f/+//8//n/9//6//f/8P/0/wIACQAFAAMAAwABAP7/AAADAAkADQACAPr/9//2//n//P/7//7////1//H//P8IAAYAAAABAAoABADv//T/CQALAAEACwAOAP3//f8DAPf//f8XAAsA/P8EAAMA/P/v/+f/AAATAAMA/P8AAP7/AADs/+f/DAAWAAIA+//6//v/9P/m//X//v///w0AAgDm//D/BQD4//D/EQArAB8ABQD7//b/3P/a//n//P///xUAFwAAAPH/AAADAPb/AQARAAoABgADAO//6P/+//7/8v8EABoACwACAA4ACgACAAcADgAHAPv/9P/3/wAA9f/7/wcA//8AAAcA/v/+/w8ADwAIAAMA/v8KAAcA8//5/wYA/P/6//7/+//9/wMABgAAAAEACgAJAPj/8//9/wEAAAD6//7/AgD3//L///8KAAsADwAIAP3//v/5//T/+f8FAAUA/P/2//b/+//4//f//f8KABIADQAHAAQABgAAAPz/+/8BAAgADQARAAoAAwD+//T/8v/8//r/8v///wgAAgD8//f/+v8EAAIADAAWAAsABwAHAO//4P/o//b/BgAEAAAADwANAPr///8NAAwADQARAAsA///x//P////3//H/BQAIAPz//v8CAAIAAwD//wYACAD+/wEA+//1//r//f8KAAkA+/8GABAA9//u/wAACgAMAAkABwAMAAUA9/8AAPr/7v8JABEA+//8/wsAAAD5////AwAKAAQA8//6/wsAAQD6//7/DgASAPv/+P8NAAQA+v8BAAUAFQAPAPL/7f/2/wIABADq/+3/GwAfAAMA9v/3//r////8//3/BwAMABAABADv//b/AwD9////DQAGAPn///8FAAEA+P/0/wIACAAAAP3/AgAMAA0A/v/x//j/AwACAP//AAAAAP3/9f/1/wEABAD/////AwAGAPf/8f8FAA8ADAAKAAEA+//8//r/+P/9/wYADgAIAAIAAwD7//j/AwACAAAABQAFAAYAAgD9//r/+v///wIAAQABAP////8BAP3/+v/6//v//f8CAAQAAgD+/wMACAAAAPj//f8EAAIA/v8AAAMAAQD9//z/+//7//n/+f///wIAAwAHAAIA/P8CAAEA+v/9/wYACwAHAP7//v8CAP//AAAFAAQAAwABAP3/+f/4//j/AAAGAP7//f8FAAMA+//9////+//+/wQAAQD+/wYABQD6//j/+//+/wQABgAGAAQA/v/+////+////wcAAwAMAA8A+P/v//3//v/+/wMABAAEAPr/9f/7//T/8v8EAAwA///0//r/BAD8//v/DQAPAAUAAQD9//b/7//s//n//v/v/+v/BAAXABUAEgAaACAAEQD0/+n/8P/y/+3/8/8FABEADAAEAAMAAgAKAAoA///1//P/9P/i/9H/4P8AABAADgASACYAGQD0/+r///8PAAgA/v8GAAQA8P/e/93/6v8FABkAGAAYABoAEAD3/+r/9v8JAAsACQAOAAAA7P/k/+7/AwAJAA4AHgAbAAQA+//u/+L/9f8RAAoA9P/x//X/9f/s/+3/AgAVABwADwD5/+3/8f/2//j//P8BAAoACAD+/+n/7P8DAAUAAwAMABEA///s/+3/9//7/wgAFwAQAAkACQACAPP/7f8IABoAEQD8//z/BwD8/+z/8P/4//v//v8CAAYABAAHABAACAD6/wEACgAFAAMA+//1//D/8f/6/wUABgAHAAsACQAIAAQAAwAKABAACgD8//b/+P/6//7///8CAP7/+//8//v//f8BAAEAAAD8//r///8BAP7/BQAHAAQA///+/wQAAwD5//j/AwAJAAgABgABAPv/9f/x/+3/8f8EABMAEwAMAAEA9f/r/+v//f8IAAkAEgAOAP//9f/o/+//AAADAAcACAD+//v/9//x//T/8v/3/woACAD+/wMACQAHAAYABQAIAAoABgD//wEAAQD7//X/+P/+/wIAAwALAA8ACAAIAAkA+P/x/wIACQACAPz//v/+//T/7//6/wQABAAFAA8ADwADAAEACgANAAYA9f/v//n/9P/3/wMABAAEAAUAAwAAAP//DQAfABQABAADAP7/8v/t//f/CAAKAAAA//8CAP//9//7/woACgAEAAEA+f/v//D/8v/z//j/DwAgABEA//8DAAUA9f/r/wEAFgAQAAQAAgD8/+7/4//o//v////9/wsADgD6/+//8P/x//P//v8TABgADgAQAAsA7f/h//T/AAAHABIAGgAbAAsA8f/q//L/AAAPAA8AEAAYAA0A9//o/+X//f8SAAwAAwALAA4A9//q//T/AwAMAA4ABgD6//b/+f/6//X/AgASAA8AAgD3//T/+P/4//3/BgALAAEA/P8BAPv/8v/5/wIAAAD+/wQABwAEAP//AgAGAAMABAADAPn/+f///wQACAAFAAcACAACAP7//f/9/wUABgACAAgABgACAAYAAwABAAcABgADAAAA/f8HAAoA+v/2//3/+//9/wEAAAACAAIA/f/2//X/+P/8/wQABQAFAAEA/v/8//f/+v/7//r/AgAEAAIAAwD//wAAAwD+/wIABQADAAgABgABAAAAAQACAAMABAAFAAAA/f/9/wAAAQD5//r/AwABAPf/+v/7//7/AgD7//j/+f/4//7//v/8/wUABwD+//r//f////n/+f8BAAUA/P/0//z/BgAFAAIAAgAFAAUA/P/7/wUACAAIAAYA/f/9/wAA/////wQACgAIAP7/+v8AAAMAAgAFAAYABgAEAPv/+P8CAAYABgAHAAUAAAD6//f//P///wQADAAOAAQA//////v//f8DAAQACgAGAP//AQD8//r//////wIAAwD+//r/+v/9/wIAAAAAAAUAAgD7//3//P/9/wYAAQD//wUA/v/4//b/9//9////+//+//7/9//t/+n/9/8DAAcADgAOAAQA/P/z//L/9/8CAA0ACgABAP7/8v/k/+n/+/8HAAwACAACAAEA+//t//X/CQANAAoACQAIAAIA+P/4////AwAFAAgACAAHAAUA/f/6/wMACwALAAoACgAFAP//9v/x//P/+f8BAAAA+f/9/wEA+P/3/wEABwAFAAMABQAKAAgABAAEAAEA/v/4//T/8//7//3//P/+//3//P/+/wMAAwAJAA4ACAD8//P/7v/z//f/+/8FAAkAAgD+/wAAAAAFABIAFQASAA4ACwAIAAAA/P8EABIAFQAOAAQAAAAHAAgA/P/4/wEABAD7//D/4//b/+P/4//a/+T/9P/4//X/8//0//n//P/+/wcACAAJAAUA9v/2/wEA/v8DABIAFQAUABIACgAKAA0ACQAPAAwAAwAAAPz/+f/8//7//f/+/wEAAgADAAAA/v8EAAQA/v/6//7/BAACAPv///8FAAQABQADAP///P/2//H/8v/z//v/CAALAAgABwAFAAIAAgAIAAoABwAEAAYAAgD2//D/+P///wEACgAOAAoAAgD5//j/+f/1//v/BQAGAAMA/v/3//z/AQD//wEABwANAAkAAwD+//3/+//7//7/BQANAA8ABwD9//b/9//+/wMAAgAFAA8ACQD4//T//v/+//r/AwALAAgAAAD8//v//P/+//3///8FAAUA/P/1//j//f//////AAADAAEA+v/4//z//f8EAAYAAQD+//3/AQACAAAAAAAFAP7/9v/7//7///8CAAQABAACAAEABQAIAAUAAgD//wEABgAEAAEAAwAIAAUA//8AAAoACAD8//L/8//3//3/BQAIAP7/9v/5//j/9f/7/wsADAAAAPf/9P/5/wMAAwABAAgACwD+//z/BwAIAP7//P8IAAsACAAKABMAFAAPAAcAAAAFAAgAAgD5//H/6//u//T/6//r/wIABgD6//j/+P/w/+3/8/8BAAsACgAFAAgAAwAGAB0AKAAkABoACADu/+T/9v8CAPj/9v/+//X/5//r/wIACQAAAAgACwD5//D/8P/2//7///8FAAUA/v8LAAwA+P/9/wkABQAAAPb/9/8IAA0ACwAKAAAA//8GAAMA/f/3//b/AAD4/+j/9f8CAAAA//8HAAoA///4/wMABAD4//3/EAAOAPz//f/+//z/AwAGAAQAAgD4//f/AAD7//j/CgAZAA8A/f/6//7/+/8HABcADwD8//v//P/0//n/AQAHAAkA/v/z//j/+v/3/wAABgAGAAEA8f/u//r/AAAFAAMA+f/6////+v/9/wUACgAIAAgACgABAPj//f////r/+f/9/wEAAgAEAAUAAAD5/wAACAAIAAgABgACAPv/8v/1/wAABwADAAIACAAIAPz/+f/7//z//v/8//7/AwADAAEA/f/+/wUABgAAAAEABgAGAAAA/P/7//3/AQAAAP//+//5/wAAAAD7//r///8EAAAA/P8BAAUAAQACAAMA//8EAAgA///7/wAAAQAAAP//AgADAAIAAAD8//r/BAAGAPz/+/8CAAIA+f/2/wMACAD///7/CAADAPn/+//+//7/AQABAAcACgAAAPv/+//+/wYABgABAAEAAQD///n/9f8BABAACwABAAEA///9//3//P8EAAsABQD+//v//v8CAP3/AgALAAYA//8DAP//+//9//n/+f8CAAIA+v/9/wQABAD6//r/CQAGAP3/AAAAAP3/+//6//v/AQAHAAcABAD+//3/AQAAAAAACQAHAPz//P/8//b/9f/6////AAD8//3/AgAGAAYA///4////AQD4//X/+v/9//7//P/5//7/CgASAAsAAwAAAP7/+f/5//z/+//6//z//f/7//z/BAASABQADgAIAP7/9v/1//f/AAANAA4AAQD5//7/AQD8//7/CQAPAAcA+v/v//T/AgAEAP//AgAGAAgABAD7/wAADQAOAAoAAAD9/wAAAQABAAcABAD4//X/+/8CAAUACAALAAkABwD//+3/8P8IAAwA//8AAAMAAAD2/+z/+f8HAAQABAACAPX/+f/6//T/+P/8////BgAAAPX/+//7//D/+f8DAAIA/f///wcABwAAAAIABwADAAIACQALAP3/+f/+/wAAAgADAP3/+/8BAAEAAwABAPz/AAAPAA4AAQD7//7//P/4//r/9//2//v/AQABAPj//f8NAAkAAQALAAgA/f/4//r/AQD///n///8EAAQABgAAAAIAEgAQAAMAAwD///3/AQACAAYACgAAAPn//f8EAAwACQADAAsACgD4//P//v8EAAEA/f/7////+//z//b/AwALAAYA9v/0//7//P///woADAAFAPj/8//8/wIABQAGAAIAAwADAPr//P8GAAkABQD6//j/AAACAAcACAAAAP7/AQAEAAgABQADAAQA/v/8/wAA/P/6/wEAAgD7//T/9v/9/wAA///+/////P/8//v/AAAGAAYA/f/8/wIAAwADAP///f8EAAUA/P/6////AQACAAEA/f/8/wAA/P/8/wUAAwD+////AAABAAIA/f///wMAAgAAAPj/9/8AAAMA///+//3/AAACAP7/AgAGAAMABAABAAEABAAFAAYABAD/////AwABAAEABwAFAP3///8EAAcABgAAAPv//v8CAAQAAwD+/wIABQABAAIABgAAAP7//P/8/wAAAgD//////f/9/wEABAADAP///f///wAAAwACAAIA/v/7/wAABgABAPv//f8BAAMAAAD+/wAAAAD9//7/AwADAAEA/v/5//r//v/7//f/+f/6//n/9//4//7/+//3//r/+//9////+//6/wAA/v/2//j/CgAUAP7/8P8AAAkABAACAAIA///4//v/AgAFAAcABAD3//f/AQAEAAcACQAEAAIAAgABAAMABQABAP7//f8AAAUABAD9//r//f8EAAcAAQD6/wAACgAKAAQA////////AwAEAAUAAwD+//r/AAALAAkAAAD8//7/AQADAAIAAQACAAMAAgD7//3/DQAJAPj/+f8AAAMA/f/6//z//v/8//b/9P///wcA+f/q//r/EwALAPD/8/8LAAsA/f/1//f/AgARAAQA8f/1/wQAAAD2/wAACAD7//f/AgACAP7/AwAEAPn/+f8GAAYA9/8AAAoAAAD6//v///8HAAUAAAD1//L/CQAOAPz/+f8BAAIA9v/0/w4AEQD6//P/9P/9/xAAEQD8//L/+f8DAP7/AAAJAP7/8P/7/wgACgAGAPr/9f///wgABwD///3/BAAHAAAAAAD+//3/AAD8//v//f8BAAQA/P/8/wYACQAHAAAA9v/5/wAAAQAIAP3/7f/6/w4AEQAOAAYA/f8FAAwAEAAMAPj/8/8AAAQABgAGAP7/AAAGAAYACQAIAP7/9//6/wIAAwD5//H/8v8CABAABwD2//n/DAARAAcAAQD///3///8BAPv/+/////r/9f///woAAAD3//7/AQD+/wEABgD+//T/9f8CAAwACADz/+v//f8VABMA9P/t/wIACwALAAQA8f/0/wUAAgD2//j/AwACAPL/+P8IAAUA/v/8//v/BAAHAPz//f8GAAcA/v/y/wIAFgAGAPP/+/8IABEABgD3/wAADAAEAPz/AAAHAAgAAQD//wIACQARAAoA/f8AAAoACwAHAAYAAwD6//v/BwAJAP//+v8CAAMA/f///wQA/v/5//z//P/8//v/9v/4/wgACADy/+v///8TAAwA7//m/wEAFgAKAPT/9P8GAAkAAAD+/////P/7//7/AwAKAP7/6//3/w8ADgD4/+v/+P8JAAIA/P/9//z/AQAAAPn/AAAMAAAA9v/7/woACgD4//n/BQABAP3/AgADAAIAAgAAAP//AgAIAAIA/v8HAA4AAgD3/wEADAAFAP7//f8AAAgACAD///z/BgAJAP7/+f///wUABQD///f/+P8CAAYA/v/5//7/AwAAAP//AQD9//z//v8AAP3//f/9/wAAAgAAAP3//P/+//3//P/7//3//f/7//7////+//3//v8AAAAA///+//7/AAAFAAEA/P/9/wIAAQD+//z/+v/+/wIAAgD+//n/+/8DAAUA/v/4//z/BAAGAAAAAAAEAAkABwACAAEABgAJAAMAAAAFAAgAAwD//wIACAAHAP///P8AAAgABgAAAP3/AQAEAAMAAgD///////8AAAQAAwD8//n//v8GAAEA9//2/wEABAD///j//P8AAAAA+//7/wAAAgD8//f///8FAAAA+P/6/wEAAwD9//n/+v8AAP///P/5//7/AQD///7/AgACAP7//f8BAAAA/////wAA//8BAAIA///9////AgABAAEAAQAAAAEAAgADAAIABAAFAAYABQAEAAQABQAFAAMAAAABAAMABAAEAAIAAgADAAMAAgADAAIAAQACAAAAAQACAAEAAAD+/wAA///9//3///8BAAAA//////7//v/+//7//f///////v8AAP7//v/+//////8AAAAAAAADAAMAAQABAAEAAQABAAEAAAD/////AAABAP///v/+//7//f/8//7//v/8//3//f/+//z//f/7//v//v/+//7//v///wAA//8AAP////8AAAAAAQD//////////wEAAAACAAIABAADAAMABAAEAAIABAAFAAUABQAEAAQABQAFAAQABAAEAAQABAADAAIAAwADAAIAAgADAAQABAAEAAUAAwACAAIAAQACAAEAAQABAAEAAQAAAP//////////AAAAAAAAAAD+//3//f/8//3//P/9//3//f/+/////v/9//7//v/9//7//f/9//7//f//////AAAAAAEAAQABAAIAAQD///7///8BAP///v/9//7//P/9//z//v///wAAAAD//////////////v/+////AQABAAIAAgABAAIAAwAEAAUABAAFAAQABQAEAAQAAwACAAMAAgACAAIAAgABAAIAAQACAAEAAQD///3//v/+//7///////7////+//7//v/9//7///8AAAAA/v//////AAABAAEAAQABAAEAAQABAAEAAQABAAIAAwACAAMAAwABAAEAAQAAAAAAAQAAAP///////wIAAgABAAAAAAAAAP/////9//7//v/+//3//v/+//7//////wAAAAABAAEAAQAAAAAAAAAAAAEAAQABAAIAAQAAAP//AAD//wAAAAAAAP///v/9//z//P/9//z//f8AAAAAAAAAAAAAAQABAAAAAAABAAAAAgABAAAAAAAAAAAAAAD//wAA//8AAP7///////////8BAAEA//8AAP7/////////AAABAAEA///////////+//3//f/9//3//v/9//3//f/+//7/AAABAAAAAQAAAAAAAQAAAAIAAgABAAIAAQAAAAEAAQABAAAAAQABAAEAAQABAAAAAQAAAAAAAAD//wEAAAAAAAEAAQABAAIAAAD9//7//v/9//7//v/9//3///8AAAAAAgABAAAAAQAAAAEAAQAAAAAAAAD//wAAAAAAAAEAAAAAAAAA///+//////8AAAAA/////wAAAQACAAIABAAGAAUABQAEAAQAAgACAAAAAAD//wAA/////wAAAAAAAP7//f/9//v//P/8//z/+v/7//z//P/+//////8BAAIAAgACAAEAAAAAAP//AAAAAP///v/+//3//f/9//7//f/9//z//f/9//3//f/9//3//f/9//3//v/+/wAAAgAAAP////8AAAAAAQABAAEAAAD+//7//v/+/wAA/////wAAAQACAAQABAAEAAIAAwADAAMAAgABAAEAAgABAAEAAwADAAIAAwACAAIAAgADAAEAAAABAAIAAgADAAMAAgACAAEAAgACAAEAAgACAAMAAwADAAQABgAGAAYABQAFAAgACgAJAAcACwAOABEAFAAWABQAFQAZABcAFAAQAAwABwACAPz/9//r/+L/3//T/8f/yv/H/7P/p/+f/57/z//b/2v/U//2/ykAnP+B/+b/GQBEAGUAEgD+/7YAHwGiAFYAuQDkAMIAtQB3ABYAHQBBAPz/l/9l/13/af9p/zr/+/4G/17/hv9Z/1b/mf/V/wcAJAAVACUAbQCUAIIAawBrAHMAbgBnAFQAHwD6/wwAEwDi/7X/pv+s/73/vP+g/5j/wP/s//T/9f8OAC4AVwB8AHsAdQCPAKQAoQCUAHsAYwBbAE8ALQAGAO7/4//d/83/uf+s/6z/tf/E/8f/wv/P/+v/+P/+/wYADwAUABsAHAASAAwACQACAPn/7f/h/93/4f/j/9//3f/o//r/AwADAAoAGQAlACoAKAAoACoAKwAoAB4AEgAIAAMA+v/v/+X/2P/S/9D/zP/I/8b/xv/J/9D/1//g/+b/7f/4/wUADAARABcAHwAiACAAHgAcABsAEwAPAAsABgAAAP7/+//4//r/+f/1//L/9//7//n/+f/8////AwAFAAgACAAJAA0ADgANAAoABwAGAAMAAQD///z/+v/5//j/+f/6//r/+v/8//3////+////AQABAAEAAQABAAIABAAFAAMACAAMAAwACwAPABAAEAAPAA4ADQARABQAFQASABMAFAAXABgAEgAMAA8ADgAEAP//+//z/+7/5//e/9v/3f/a/9D/zf/R/9r/5f/e/87/6v8JAPf/6/8DAA8ACQAZABkA+v8MAD0AJgD7/w0ACADo/woACACv/5X/tv+Z/2f/SP8t/yj/Gf/i/pD+f/5r/9v/5P0L/QkAYQGk/r792f/AAOUATwEhAJz/YAIEBL0BkQCLAmgDnQKgAkoCNAGSAUACRQEkAM//bf9n/6P/4P6u/cP9z/4K/zj+4f2H/jv/jv+N/1j/gf9AAMUAmgBJAHAAzADjAN0AvwBGAAIAaQCKAOj/Sv8y/23/lP9O/77+lv4d/4f/Qf8G/0f/e/+5/xwA9/+g/+v/XgBkADwAFAAFAE0AnQBwABwANQCGAKoApACFAG0AlQDQAMgAkwByAHkAkQCFAE4ALAAbAAIAAQD6/73/lf+b/5j/mv+Y/27/X/+a/9T/zf+u/8b/CwA8AEEAJgAhAFQAcABNACcAGgAdAB8A/v/S/8H/tf+w/7D/lv99/5T/tP+7/9L/7f/c/zcA0gBtAPj/8gCQAeMAywA8AUwBmAGyAfgAyABkAXoBoADW/77/vP8U/1P+4f1d/cn82vsf+3f8vf2K+sH3B/xsADr90fmX/BYATgExAv4AkP+wA6EImAYiA0wF/AdhB9AGQQYrBI4DlASyA1sBtv+S/lj+8P7f/Tb7dPo8/FT9SPwe+6D7Lf1h/qf+Tf5b/pH/7gAnAbsAvQAgAXQBwwG8Af8AOQB7ADIB5ACz/w3/bP8JAPz/Jf+P/hD/z/+7/1b/QP8+/33/FgAUAFn/Hv+X//j//f+i/w7/OP8aAFQAuf+F//f/dgDKANQAkwCZADoBrAGQAVgBXwGKAa8BngHIAdsBzAA5AM0BbgJiAGj/owAIAZAA5wAXAGn+mP/5AZ0A3/1P/uH/3/8//x7+p/z+/Iz+8P1a+8f6HPwg/CH7Lftc+3f7F/zZ+/v7Jf7p/kj9KP5oAXwCAwK+AiIEoQVNB9YH4QYsB+0I5whwB9IGUAYrBXcElgPQAU4Aav+o/vv94PyP+xH7afvM+2771Ppx+8z8V/1z/Tz+Wv/x/zIA/wA5AlkC2AFbAvUC2QKuAkMCmQGjAfIB+gBt/1D/t/+q/iD9lPzO+zH7jfuW+sP4uPjB+TX6jfkv+bj6pvvB+gr8Ev+6/qr9SQD+AngDhgTvBUAGxwfFCuIKxAhqCUILbQrCCBUIuQZmBdIETANMAQ4Auv5h/dD8K/yb+pj5b/oj+2v6VPp3+0H8BP0+/uf+xP5R/70AdQEdAZkAigAiAYwBhwDD/uf90/0Q/nT9wvqI+KD5BfvP+fP5w/ld9tz3e/43/l/4RPlz/9gBOAK/AsoBCgSBCogMdQjbB5sLFw09DCELUAngBy0I3AehBYQDmAEqAFQAz/91/T/7gPvT/Fn80Pri+jr86fzH/S/+r/0+/oL/YQA2AWsBPQDK/+MAlwDr/nL9gvxj/Bf8FPpi92b2RveH9+H1bPfO+ZX1IvM7+sv/CPuC9/39pAN5BDgFxgSCBdoLYxHsDSUKmw3OEEwPYQ3pCyQJyAefB1kFVQKS/wP9Tv02/nD6cfW19v36Rfoa99v3MPqy+4n9of0Q/U//bQKkAlABtgGWAvIBygF0AU3/9PwP/ML8yPsH+Jb1Uffq93L3yfjM9e7yQfkC/4j6Uvcf/roD8AOGBYYGsAa6CqYQDBBpDPENNhCrD04OTgxLCacHmQc4BUcB0v4q/Rv8Pvu3+e723vVr+KL5zvf19y76Kfu//D7+JP4F/90A4QG/AV8BNAHOAO8ARQBt/Wr8dv1Z+2H49vdR+Lj2RPYY+vP4L/Pr9ef9o/yf9/j8ZAOpAloE5QdjB7cIpw+CEfIMfA1lEIAP8Q0xDd0KsgeUBjAFJwLs//r8pPod+zL6wvZN9ez2rfh1+Ln3nfic+aH7bv5d/sb9tf/XAT4CggETATkB6wFXAS7+aP0i/7f8KPli+aH5Tvcd9iH6F/tU9Rf17/u2/R36WvwkAg8D+QNkB5II5wiuDQgSGg+lDSQQYBDSDm8N3AsiCfAGqAWxAnH/DP0t+5P6t/jq9Q/18fUc9+v2MfaN91b54/pf/O38GP5j//D/1wBPAfEA5gD0AIMAe/7D/Rz/mfz/+FT5t/lf96v2t/re+lb1//W8/OD9FPr7/BUD1gNSBDEHFwmLCtAObxGvDn8OyxBjEAMPng3bC9MISQa3BTMDnP70+5X7xPqj9yL1YvXl9S72wvV/9cX3c/nk+Xj77vzk/fv+UwCVAXgBBwGYAdYBAQHd/rD+NgAz/YT57vmY+of4Xfjj+xP6z/Us+AT9Ff3D+jn+5QL9AqgDwQYfCmMLVg2iD58Ozg7JD4UPsQ6oDNIKEwjVBVoFoAFP/Zj8APxY+ZD2V/bK9ur12/WD9kj3kfg9+ZD6Y/wZ/bv9Gf9EAMYAdwBgAC8BYgGn/2r9af4U/4r7m/ih+en5+fdU+tX7FPdP9mf79f1V++b7zgHsApkC2wUMCRkLzwzrDgIPog4AEPMPIQ/0DXQLEgn3BgkG0ANB/yH9qPyo+vX3s/bb9m72+fWz9YT24vhP+SX55Pod/dv96f1V/50AowAZAPj/gwAqACb+s/3E/rb8t/lm+TP6FPnb+LX7Z/r79r344/zn/Uv8N//2AgIDugSyB38KXgwLDgQPYA69D2gQ5w43DlMM4AnPBxAGuQS3AED9p/w5+8v48Pav9q32oPVj9Qb2uPcA+eX48Pme+4T8JP3q/lUADAD3/18AvADaAG3/J/4M/5D+6/tO+rn6gfrW+JL6D/zJ+JH3N/o0/fn85/y5AC0CngKCBaYIygoTDLkNOg6NDvgPcA8tDooNqQsQCa0HwQZXA1b/QP5K/eT6H/k7+HX3Sva39WL2//eV+Bz4Qfnm+t/7s/zN/VP/jf8B/3D/VAB/AJH/Yv4M/kP+WP3B+2/6Y/pO+jv6MPxa+3L4Vflk/K39Jf1l/8EBdAG/AwwHKwn4Cj0MKg3zDRoPVQ9lDsUNnwxXClAISQdzBaEBGP99/mb8sPnY+If4Vvf69UD2r/dH+B74bvgS+iP78/r0+8j9KP5S/dn9bP9a/6L+D/7h/XX+i/0y/Fb72fof+8z6Bvz1/JT6m/mK+/79Uv5u/tcAwQGEAv0EcweuCd8KkwuODLQNfw4MDtQNFg0HC8QJyAiHB5cEmQHQAHn/XP1g+2L6pfks+Jj3gfj/+Of4k/hB+fz65fpI+2/8NP0O/Zn8rP05/vD9d/3D/Kb8wfxh/FT7L/pS+jX6xPr4/E385fn3+V/8Zf6A/uT/VQFzAa4DewYGCWcKoQq4CwANcQ4RDuMMKQ0WDOcJrQi/B3sFXQIYAVkAuf26+1r7r/qW+Tz4Nvha+Zz5n/ne+Yb6y/on+5T8cf07/YT89fwI/sX9Pf3Q/Hn8e/w//Jz7cPqd+fH5DPop+3/8x/pg+Zb68/x5/t7+JgD1AAsChwTMBicJUwqqCh8Muw02DrMNhQ1ZDd8LBgroCB8IagZ5A00BIwBo/u38Cvw0+xT6nPjQ+DH6UfqV+cX5Cvuf+637pPx4/RL9svx4/S/+aP14/Hj8ofxc/Ir7Efu4+h/6CvpI+mn77Pt7+sj5ofqN/BH+jP5C/+H/gQH1AzMGHQjTCM4JlQsMDYgNEw33DKYMTAt9CTIIpQcvBnEDCwGq/4T+pv3P/JX7X/oF+Rj5n/ou+yz6hvnD+sz7B/zT/EP9Mv0Y/ZT9U/7w/XH9af0f/Zn8LPwY/Jz7bfrp+Tj6P/tw/JP72/nU+XH7t/3Y/j3/i/9tAPoCiAWKB2cI7giMCvELhAxfDGkMcQwhC4sJOAgeB7cGYAVkAsP/gf58/l3++PxB+9/52vkt+wz8j/tm+uf6efzp/P/8Qf2d/af9dP2m/Xf9YP2F/fb8JPyF+1z7j/vy+tP5MPnB+Yn7+Ptd+gf5v/mA/GT+nf6e/hf/NAEOBEgGSge8BxgJxArEC9oLogvHC6cLnQoDCccHZge4BqAE9AEsAJH/gf+J/tL8avvT+pv7Zfwg/IH7n/u4/FT9Mv1B/bX9Lf7q/WX9NP1t/av9Pv0i/FD7d/ug+0T7//nK+C75nfoi/FX7Hvnt+Pv6vv2B/h3+bP63/0cCWASYBUoGbgdlCXAK4wrPChcLvQsNC2sJyAddB0kH+wW+AxkBlv/F/+T/af5B/FX7qfuE/LP8MPwC/Gb8Rf1m/SH9cv3P/Qf+iv3H/OL8FP0n/Zz8SfvZ+nv7wPvp+lr5xvi9+WH7a/yq+uH4+vmA/GT+W/4S/sH+wgBlA7AEawWjBqEIVQpmCmwKDwscDFQMtArZCOQH8gfBBwIGpwN/ASEAFACP/xj+2/xR/I78kfxE/F/8u/w1/Rn9n/zN/GL90P1b/bf8Xvxm/M78j/yo+8v6z/pw+wr7s/ms+PT4mPrq+2n7ivkG+f/6nv1v/on9U/09/0ACxAMOBPgECQcYCbIJcwmkCbsK1AtZC2EJxgewB0kIkwfQBAsCFAFAAQEBL/8z/Qn96/3y/Zn80/vH/N396/0i/cf8bv0s/gH+6fww/HX82fx1/F77ffqO+tP6fvpq+Wb4lfj8+cT7AvwL+s34evqu/QX/lf0O/S//KQKvA88DzwQyBzkJzAm3CfoJGAtKDNEL5gkmCN8HZwiUBz8F1AJ2AfYAcgBX/yv+o/2j/bX97fxF/Pr8vf3V/Sz9ofw8/eT94f0s/Xf8dvxX/N/7Yfu0+k36WvpU+pn5jfhP+K752vvo+4v5SPhS+rf9tP4j/bj8F/9NArgDygOfBPgGNQmRCWUJ1An8CicMiguLCeoHygePCMgHEgWaAqoBnAHnABf/vP3A/Rv+tP1h/Av8Ev3U/aT9kfyE/IH9BP7F/ar8TPzR/Kf8DfwP+5L6yfqz+kb6Rvls+OD4pPpN/Fz7vPh5+G37aP4U/kv8M/1QAKoCTQOjA4cFZAigCVQJXwlZCiYMvAwVCwgJ5QdUCJMIpwYTBGECmAEqAQMAev6u/av99f1E/fL7E/wR/Zv9BP32+4P8xv3+/S/9Y/yl/DD9vvzJ+xr79fpB+yv7Ofoi+cv4RPp1/Dn82fmW+Jn67v2W/vz8rfwc/xkCCAP8Ai8EOAduCRwJZwgBCeoKHAwyCyoJiQeeB1YIFwd+BLwCOQKlAQMAOv5y/b39B/4L/XP7cvsD/eX9Mv08/GP8mv0k/lb9pvyp/Bb90/zc+0T76vrc+vn6Uvpf+dX4w/kl/Mz8nvrf+KH6KP4x/5D95vwr/0UCIgPQAqoDVQbdCPgIKwhPCO4JoQvkCuAIcgclB28HagZ8BJcCiwFTATMAcf5b/a/9gf6v/fP7mfvt/Bb+m/2a/Mv83v1C/q79Iv0r/ZT9c/13/JD7Yvvo+078V/vW+Sn5nfoG/dr8bfr/+O/6O/7+/pL98PxK/3sCUgMeA8oDsgZWCSIJIQgLCOAJdgtDCvEHoAbbBv8GfgVUA7kBFQHBAJv/uf19/Pf8I/6U/Zv7R/vy/Cv+k/2F/AP9MP5H/oL9Fv1X/bz9ef1W/Dr79fqy+z78LfuH+Qz5//o8/Sr8hvki+Rb8y/5m/kL94v3AADQDlAOTA8kEzAfZCVoJAggHCD4KVAteCc4G+AWSBg8G6wP8AfcAgAC9/2L++/xM/Ob8pf3v/L/7zvtg/Xj+vP0y/QT+/f7R/in+O/5+/oj+BP4n/V/8yvtW/Kv8nvsZ+vr5aPyo/Zn7f/lW+nr9uf6X/fL8l/7AAUkDeAPVA88FlAiLCekI0wd8CDMKBQr5B+kFqgXcBacEvwIRASMAhP+1/kz94PvB+7n8Ef35+1r7TfzT/UT+kf2w/a3+R//l/kb+Zv6x/oP+q/3T/CP82vsX/Pj7Jvsl+i37Kf2H/I36OfqX/PH+sv72/UT+ewDoAqwD2gOlBAgHoAiVCD4IYAi6CQ4KgQi7BqkFrwUWBTgDOwG//w//fP5S/er7Pfvr+8P8RfxL+4L7IP0y/uT9m/1u/pH/o/9I/yX/J/9o/wf//P3r/D/8Tvx2/B/87vrN+rz8dP18+wP6lftG/un+MP4K/pb/LQKvAywEgARHBoMIFAlwCNoHDQlqCjoJ8wZKBQYF4gR5A6EBx/+p/mX+2/0c/Jv6Gvth/Er8H/vm+lb8xP3d/Uv9e/2d/m3/Rv/D/pr+6P4J/3H+Sf1L/B/8S/xc/Lz7ePvh/Kb9D/yg+sb7R/4g/4f+Wv53/9AB1wO8BMUE6gUNCAgJrwgJCMMIBApcCS8HagUIBbkEYAOqAQoAuv6+/QT9+fux+pX63/s6/Bf7qPoB/Kf96f1L/Z/92/5m/z7/Pf9f/0//IP/X/hP+6PxK/E/8uPxz/E38j/2u/bz70fqP/An/Jf8u/oL+z/+2AWIDkwQiBV0GMAgUCcsI8ge0CCAKOAmLBp4EtARSBGMC1gC3/0/+1fxm/OT7gvoX+gf7rfvx+mz6bPv8/Lb9TP2k/cD+LP9F/3v/lv9m/zf/CP8R/rj8z/s9/Cr9j/xS/FL9z/wG+1H7H/67/8n+S/5y/z0BmgJOBNoFkAaKB6EIqQmRCfwIvAlDCrAI0gWhBCAF2QNxAcr/1v6E/Qz8bfuu+qj5rPmW+p764Pkp+sr7Qf07/dn83P32/ij/NP+G/7X/J//i/rT+kv3i+5T7Yf0+/q/9eP2K/Cz7xPtC/r3//f50/kz/DQF5AvADpgXrBh4IrAg6CXUJiAlICpsKTQmMBuAE9QQBBPEBJwDK/lH9CPwx+zb6avle+TT6IvqE+ef5OfvQ/Cn94/x3/V7++/7y/gv/X//r/lv+pv1v/Bf7gvtn/WL+E/6L/Ir6tPqA/dr/k/+b/kL/3wBnApIDcwUyB3IILAmJCQ0KTwryCosLowoaCO4FVgXvBBQDkQDF/nn9OPwT+5X5ePh3+HP5i/l/+I748vmz+0D8Cvyh/IL9GP4p/kL+Y/4R/r79C/2c+8L5dvrF/U8Aiv+7+4v5CPwOAHcBbgD5/74A8wFSA+UE6gZxCK4JKgowCisKigqaC/4LUwpiB5EFPwXcBPICRwBO/jD9bvzr+qv44PcI+Sj6/PjH97f4ovoB/BD8WPwW/bD99/0d/gv+kf0k/af87Prr+LH5c/3XANf/6/pv+OX7bQBDAbb/ev9XAGcB3AL0BDAHjgiHCQgKcQpqCtQKHQw3DDUKYwdfBW4EugOLAmYA1f0H/EX7K/oy+Iz3GPkW+rH4lvel+Mz6PPyu/N38Jv0Q/VP9Av4f/jP9w/tp+r740Phs/McA8/83+rn3x/siAJMA5f/nAM8BnwE3AvkEgQh4CvcKwgrkCkQLKgzpDCwMZAqhCM4GNwQUAsEBfQH5/tP7R/pr+Ur4Kfh3+Q76z/g/+ID55Pq1+6P83v30/c38bfxV/fL9BP0z+w752ffp+SX/NQHx+972g/kP/9D/oP17/rABegI6AegBtwVwCUYLjgsGC6oK5guqDSwNpwoWCWgJYwdRAvP/qgGEAan9Dfq0+PX3nvfR+Pn50fjJ90j5NPuC++T7Wf77/6T+s/wz/YL+bf13+sb4jfnn/NwASP/v+DP3w/ysAEr+8PtV/qQB1QFIAIkB9gVTCTcK3wlACZkJ+QtnDZ4LNwkHCXkItQQLAR4B+gHB/yj8GfrM+GP3BvhD+pn60fij+I36kPvf+5n9rP+b/7H9D/1L/n7+Pfz4+fn5ov0HA5gBT/n49lv+UQN//4f76v1JAu4CSwAeAEUEQghtCaQIFwcXB0IKZAzdCewGdgeZBwUEjwDjAPEBfv/4+wj7KvpG+Lz47vrY+lT5DvrA+7f7xPv6/V8Asv8T/df8h/4a/tj6v/jL+rQAtQSa/3X3IPkuAtoEm/+u/CEAHQS5A+8AYgEUBiQKQAoZB4wEtAZLC0sL9Qa7BAUFhQNaADH/5//h/gv8Xfp5+df3m/cX+qP7RPrC+af7kfxN/MD9SgDeAPP+R/38/IL85/qy+bP71gAMA779xPdt+iUCXQSAAFL+0gA9BB4FVwPeAs8GxAsiDMsHsAR5Bx8MYAvABjgElgOgAXP/3P5F/nj8+voF+s/3bfV/9h/6OPuf+ff5MPyh/Pj8Xf+iAYMBAgAL/wP+3/tN+sD7XQAOBCwBbPrn+Iz+aANqApP/lv8UAjoE8ANsAiME0glqDaQKcAX2BOsJ3AyeCqEGeAOUAY0AJQAL/7v88/p8+vn4sPW59M/3TPqJ+eD4Pvo5+8P7gv3B/6EAzv/v/o79HPto+cX6TP92A9cBMPub+KT99wLBAub/cv/iASQE8gMlAvYC6wdsDGULYgY1BJgHtwupCzQI/gMEASIAhgAzAM/9OPuh+uL5Lfek9bj3hPr3+mj63frT+2X8o/2A//kASwFgAOr9OPo6+Pr5/P8eBiwDKvmx9sj+BwUKAyn/Fv/zAWQEAgSoAQICmwd+DaAM5AX7AqoHQgyFCy8ImgQOAa7+p/6b/zL+Tfs5+kj5G/bD9Ef4i/uU+qX5kPsr/WT9QP4xALQBEwKfAT7/1voR+OP6kQPcCKEB2/aS93MAUASgAcP+sP4bAU0DHwKk/3MBUQgMDcIJQQPmAtAHDgr9CE0HFATi/wj9wfzt/YH9Hvzu+mH3c/M29Zn6KPxv+qX6dPyO/VP+l/8ZATYCIgPOAsj+d/n89wv9mAbfCYAA7fb8+cICOwVeAg4A0gDCA3oETwFE/0kDxgpuDd0H7QHeAgwH2AjNB8cEUAGO/u/7pvo6+178AP3W+oT1O/JX9d76Ify7+iT74/zu/Qn+i/5YACEDTwTRAMP6//YL+ssEzgv6BMX6uPv0AhwFPQOuAmYE8gbYBsACZf+HAtQKIg8UCuUCOwJTBVsG1AW2BD4CoP5F+n/3yvcx+Rv7BPv19fHwdvLf92H6Yvp7+0b98/1u/Xr9H//KASQEYQIn/Iv20fejAggMQAdt/Ez8zQPaBcADvANeBW8IJQr8BQ0ANgFKCgkR9Qx5BIsCiAV1BowFMgQGAtT/T/zP97L10PYe+kb7lvaJ8FjwaPVc+Gn4uvmO/EL+O/3T+9z8AQAWA6cBAPzf9o33LAJfC+sFUvzI/p8G1wb8A34E6wb5CuMMTgjVAVgCHwu5EcUNHwZsBGcGhQZCBcMDBgJQAI/8QfgP9kP2hfko+zn3//GC8Z71//f092L5ffxf/vr8LPts+2z+PQEI/4T5JvVs+PcDyggtAIH6TwFiB3sFdwP2AxIHjAwyDfQG3wGmBB4N1hB+C/YF7AVBB+8FvANuAt4BAwCB+xb4rfat9xP78/os9tzy5PR7+MP4cvjN+tD9J/6n/IX7Pfsy/RL+NPrf9KT1jP+FBjwA3PhO/ocGFAaWAxMEGAamCrsNowkIBC0FbgwUEfMMPQfGBvgHBAelBJsCYQFEAEn9vPm49zb4Vvv9+y/4DvV+9hH60/rw+Yj7p/54/xr+tvy1+3z81Pxu+QD0FvNV/G0FpACQ9wL7twPIBHADTQTIBeUJow2PCmEE6QPkCu0QgQ3PBqYF4wavBScDRQFNAFT/c/zq+Kz2C/cs+4v9w/lF9VD2A/vW/Of70fzY/2wB/f+n/Qf82vxp/Wv5GfNc8sT75AN2/7X3sfr0AuQEgANWA1cFAQsADxgLOwTSA+QKxRDQDQ4HlQToBIME2wINAD3+0v2P+x34zPVc9uv6x/2Z+pT2yfdE/J3+fP7Q/pIB8gNQAsP+2/zv/Wf+W/o788HwA/pYBCkBNfjn+bsC+gXaBOgDaQXDC+kQaQ2MBWAD1AlEEH0NbgZiAwUD9gG//+b8V/sR+yH5MvYG9Cb0m/jg/Gj7CPjO+Gr8cf7c/k0AGAObBAUDwf+4/K78yf2L+irznO8h+CEDfgE6+WT60wKaBtIG6gYOCIcNIxPZEMAIIAWmCgERVA6fBrMCAQK7ANH9t/mQ9+H3CPfr9Mvyq/J+95z8CPzw+Lr53/1eAMwAygEyBJYFxwPe/zb8t/sn/O/3/+967O30jQC9AOT6g/xUAxcHmQh6CeULoxKzFyQUrQucB28LiQ+VDPIGAwS3Ac39lPjQ80Xz+fUQ9qvzbfFU8R/2n/tX/Zj9VP/kAbkCHQKVAj0FuAbABFEA8foX+IX2HPI46zjov++7+5T+rfnN+jECFwjLDNMPghFyFpIb6BmDEvwMEA6xEE4NyQYzA2IA/fuK9i/x1O+O8rLz9vFT8GvxzPbW/H7/sACaAuMEHwaBBTYF1gb5B7sFSADv+UD2avTp7mnnd+Nw5wXzjvo6+UT6lgFMCKsNdBETFOcZ2h+5Hy4ZHRE7De4NUgzKBkMCz/2i+HTzBu7M6+ftTPB98aLynPPk9n/9ywL2BXMIwwniCTwJaggPCLAHRQXqANX6UfTq777rbuUS4dThteeK8ur5b/yeAeYHaA5DFR0Z0hspIKoi+R+ZF0YNewh6B08EhP8R+s/zqe/g7Nnoe+cY7DPzHPeI+Hf6BADoCDcO/Q7ED94QiRAgDlcJBgW9A5AALfp78zTtkehq5ajhZt+O4eHkIvAl/sADhQkDEbYW1RvGHv0f0SLSIYQebhYZBzD+7vug+PbzjPBM7crq0+in5Vjl3+tx9rj+CwN4BQYKJxGwFfsV1RQUFHwShQ3LBd3+HfvN9/3yoO3a56LjFOJ14KPge+NJ5j7zRwWVDuQUYBoNHlQhESIFIXUgvB34Gg8Sgv8V8y3vqeyE6srp6+lV6bLpBOpx6kzwvPsvB78NPBAUE50XAhqhFkoTgBGBDesHaQAB+Ery/O/e7IXoS+Wy5Cjl4OPJ43fmaehd81kJpxiFH/AjtyREI90hCB3oGdAZoxdUDyz+nO3X5IvihuJq5d7qjO6M8RH0EvQs9uH90gjBERQWsxc6GMAX4hOGDb0HOwMoACT8cfU979Xsg+u86HrnO+gF6uTrDuwn6zLqie+jAu4XYyKSJbYmLCWPIPsZfxFLDYsO3w0XBTX15ufA4mXij+MX6cLxEfkI/30BXP/k/sEDJgtdEbgUyxXNFZIT5wt0AcP6V/jy9xr3+fNL8V7wtO7v6x/rd+2B8eL0OPSq8L3uivO9A8AVYB7fIoUlRSMQHV0S8gZEAw0GEQfgATz36+2k6cnmouUw6nfzav2zBoMKsgacA2oE6QUSCGQM6BBAEwwSXgpx/1H3s/Om86f0pPXP92r5gfaU8QfvGu9O8dv0//X6807xNPS0AWkQQxjZHeQj9SPsHfkTsgcHAosCNgKg/iT4dfLn7mHriOjR6X/wd/puBdcKoQk1CMUGdwR9A4gF/QlTD20RdQxgA+b6hPXI8vbxY/SR+V39D/xd95LyOe9q7j3vyu+I75Xv8fbOBi0TsxiPHeohZSEFHKsS0wg4Bb4DUQAp+6b0k/Cd7u/rr+ls7O/yQvuiBI0IHgmwCuIJlQbDBEoFqQePDBAOBQo+BA/9aPdP9L3ySfRT+dz85fuD+dX1wvH77jLtKO0v7a7tHfebCO4UIRvmIKQjhiH9GvwQLwgSBIQB3f3x+MnzDPHt7i/sJesg7gb0AfyKBPgIDwuEDMgKwAYiBK4DcgVmCc8KSwgtBDn/cPr79RrzvvO396r6A/ul+rL4NvU88aHuXe3S7Bvt9fJUAuQQ+BiWH+AjCiI9HDgU4QnDAur+BfvV90L0KfJo8ZrvTO6978rzFfreAqoIYAsWDrYM4Qc7BFsCMwKABEcH7wctBp4CZf+T+832JfXH9pb4efno+fP4cfb+8snveu027JnrmPD2/gUNfhXEHnwlDSRbHywXEAy3Aj/8u/c+9djzwPOt9HrzcfEE8hj12flwAFkF+ggKDRkN5Qh3BXID5AEDAjwD1QSwBXcELgIS/yT7dPhY94v2KfaV9or2H/Vk8j7wJ+947Zfroe/1+5MHZxA8GtojTiXBH7EZPxBHBov9p/Y487nyZPRi9Xb1gPU596H51vsZ/xoC9AU3CT8JTQdCBuYFlQNeAogDwAQtBAQDzwJyARH/pvwA+zv5gfbH9KbzGfKM8ETwo/CG7z7uafIf/OcE9QvhFPEdUiFBH7IZWBJWCg8B+/k99TTyf/Js9Mf1/vYR+l796f9IAcoBSwStBRAEeAK6ApIDZgOUA1EE+QQzBC4DGwM5AqIAmv4Q/Sv7Qvhr9SPzlfGL8JLw+++W7w/xYPVd/AQCTwfEDoEW0BnVGPkV8RCUC9kF9P+3+iH3hfbJ9ib3fviJ++/+IgGsAYYBmwI3Auf/Gf9KABkBmAHcAsQDjwQjBR4FmAR/A0UCXAETAIP9yfpV+PT1WvQF8w7ykvGN8SPyIPSa+JX9YQLeBx4NGBDvEN4QIw9ZDFcJ/gWnAjz/1Pwu/Cv8rPwB/m//AQAyAIkAWwAz/4P9Tf2a/t7+zv6DALACdwPHA0IEagR8BGMEBgTjAuIA+P7O/Kn5rvY99ST0rPLW8frxIPP/9cX63P7TAWMFyAiJCjQKZQmfCesJ9ghOB5sFwgP3AvgBW/8k/o3+of5n/vv92/2v/qP+OP1k/XL+Vv6x/mgAswE3AkgD0ASrBXkFNQUuBT0ELAL0/1X9X/qA+Bv35/S38w/1avfF+K35Dfxt/44B1wGAAq0DmANAA6EDtwO1A10EqwQpBGYD7ALGAg4CXQF6ARcBs//D/lH+f/0k/fr9J//3/+gACwI3AzIEJwSZA20DLwOhAnEB9f8R/0v+bP1d/F774vqz+gj7lftG/Aj93f1b/z8Apv9E/4D/LP8R/4H/DwDkAKwBZwGcAEAAt/9a/xH/J/8/AIgBtQFLATMB5QCpAH8ASQD/AEYCngLMAhIDWwP9A1QECQRZA24CNAHK/w3+1PyD/Jb8i/zj/Lb9Vf4N/xwADgGyAZEBDwGtAK//pf43/hj+Kv4O/8f/yP8t/+r+uv4b/vf9Jf6O/gb/xv+I/+j+mf6G/sb+S/8HAPwA0wHzAUkCoQKkAqcC4wJKAp0BHwF6APf/Zv9V/1P/Ff+6/jX/xf/Y/4gAiAHFAWsBcAFIAbEACwBg/6T+bP7k/ob/lP9f/+H/9P83/7z+Av/I/5wAiQDZ/0T/y/5u/tP9+P3G/nP/6v8VAC8AsgD1AIUA6//l/1UApgBSAb0BiAEwAcMApP83/73/5v9LAJYA3QAQAccArQCHAOL/UP+4/vf9If4n/+b/hwAUARoBzgBxADAAZgDgAFgBmAFnAcAA6f/6/iH+lv0s/Yj9Dv6N/oL/PQBPAOf/a/+N/5MAZgGmAQQCCwKDAfsAfwBdAIoAsgBeANP/3P8EAP7/2v90/97+Ov67/c39Nv6+/u7/+AAtAXAB7AGrAQ4B/AA9AcUBOAL0ASoBnABw/7z9nfwD/K78Bv4B/7L/fQBbAKz/3v/U/x4AFgGaAWQBegGSARsBAwFVAGT/OP/f/pD+6/5j/7n//P/Z/4T/If/e/tn+Cf+e/1EAiAF/AjkCmAFUAZAA9f84ACwAcwAQAf0ABwAJ/8b9yfwC/VT9J/4b/xIA7QDpANcAfgAlAC4AEgDS/0gAHgEaAeAAUgDc/27/hf7p/SH+9/5d/9z/iwCpAMIACgGZAE8AzQAiAasBIwLaAZUBTAH0AJsAuP++/5QAKwHCADQA//8j/9n+hf5E/oz+Rf8nALkAYAE9AQcBxgDp/yf/8/4x/y3/kP+g/4b/v/8R/9r9jv0C/jP+S/8BAPv/FgGbAaIARwByAFgA0wCsAVABDQGcAT0BwADt/yz/Gv/q/zoAmf/W/0IAngB7APv/cv8N/+/+8f5g/+H/1gCEAe0ABwC+/6j/AP+w/pv+vv6v/6j/Lf8K/+3+C/8V/0T/wP9/AMUAyAA/AN3/7v+T/wMAbwCDAAABmAFoAfEAmwAWADgAiQDUAN4A8QA3AacB3AEFAT0AcP+n/oj+6f4Q/5D/fADFAJsAtQCgAGb/Ff9e/wf/e/8NAAEAPgAsAE3/1f6V/rL+6f7S/iv/K//y/mL/wv+b/+r/RQBKAN8AagFXARkBagGfAW0BIgEEAfUA2AAuAcIAGwBf/2D+wf2F/f/9LP7V/iYAHwG8AesBkgG2ABsA9v/i/+v/fQBbASABqwBiADf/dv6S/tr96fzV/Qf+/v0J/z//sf99AOYANgGsAa4B0AHoAW8B7gEPAvEA2QDNANb/vf/R/+3+u/7o/hP+0P1Z/kX+Tv6I/34AWgGvAr8CmwJRAoMB/gCYAM8AygCGAPL/Af+v/iX+k/1s/Uj9Gv2e/fv9vf0l//v/NwCaASkCFwJlAqICvQGsAYwBYAAxAB4AtP9c/3X/7v6n/hL/rP7V/u3+IP5u/uP+Ff89AFIBVwKqA4UDoQL0Ae4A4wBgABkALwB5/2//zv74/a/9e/1f/df9N/5F/q7+2/6W/zAA7QAaAk8CjgLaAmkCIgKvAYQA5P9i/8z+n/4f/gf+Uf6e/nn/1f/c/9L/z/9d/yf/hf/Q/zQBoAL/ArsCkgLxAWkBWgFEAPz/bADz/47/Iv+H/j/+Nv7o/T3+D/7E/RX+Ev4L//P/AQHQAVECzAJnAuUBhwFeADv/Xf/a/nr+g/6w/Tf+8f7B/jT/Hf++/+v/Vv+1/0v/RP+nANYB/AELA0QDtgK7ApoBKgFNAA8ArgA4//v+2P8i/z7/q/+d/rj+Xv6W/Q/+3P1d/3cAsAB2AnMC7wGXAhUC2gCzAF0AIf+Y/57/q/3S/Tj+J/2Z/bP+dP50/30ArP9FAKkA9v/uAJUBJQEBAvgBzQBMARAB/v/IAAMBdAB+AXcBNACtANz/Wv49/mr9KP1w/X/9bf7w/tT/kAHXAXgCWAI8AZcBpwCR/6z/y/64/l3/iv5A/qr+Pf4X/nz+0f4y/+D/UgAzAGkAygDIAHwB2wFqAY8BrAFBAWMB7QA2AD8BGQGeAP4AZgDP/23/aP65/Tv+Dv7z/YL+D//A/50AsAH8AUkC5wIdA5cC4gGPAD3/Kf+S/tH91f14/Vf9Zv3g/Iv9b/4h/wUAjAAAASoBnwG9AVEBoQGAATEBMgGIAFAAZADDAIIBYwEOAToBmABy/+n+xf2D/Un+1/0t/mz/l/+3AOkBmgFGAjICTAKGAkUBEQFLAE//8v6m/lD+S/66/kT+8f0+/R79xf3F/WT+E/+q/zEAiAANAUYBNQFcAQwBowCNACEAdACtAFMA4gAXAZwAYgAQAID/Cf8D/wz/Af9e/yUATwAIAIkAtgDKAHYBkAEYAgACVQEuAQUAcf+N/wP/o/4m/sj9L/5C/qT+T/+D/7j/JQB/AHUADwHzAOIA+ABgAIEAOgAWAOr/iP/Z/4P/qP9nAHwA7wAYAYEA+//z/6r/gf/6//v/7P/e//v/of+s/5EAuQB4AccBSgH5AOoArAAhAOr/G/8w/yj/ev6J/oX+8v5s/wIAbwC9ACkBzwDZALQA5P+x/9n/dv8C//z+m/6w/j7/tv87AIAA0wCPADkAOQCs/5T/hv8e/1v/i/+i/9L/cgDmALEAJAFSAfUA5gDYALsAMADS/13/GP+6/3P/F/83/x7/g/8IAGMAYgApAFMADADI/+H/lP9EAK0ALAAeAHj/L/83/1b/AwAnAHUAVwCz/6T/z//8/+v/IgDP/4L/ef/N/kH/iP+L/+3/TwDbAHIBEgLRAd0BzwH8AHcABgBC/z3/UP/1/nf/Uf+r/4YA4/8BADUAyv/w/0QATgCKANAAegAzADb/qv4S/zMAkwHlAccBBAEBAKH/gf/c/n/+tP47/mf+1f5T/oz++f7T/mL/CAB6ALIB6wFNAlkCqwEaAY4AzgDy/7H/NQDh/8j/MAAVAFH/Kv/p/pf+9P4i/57/GgAOAEEASQASANr/UAAzAXcAuADIATMB/wCLABoA4P8m/3r+rv6e/uX90v6O/nz+Hv/V/iwASABEAEoBbQFiAf0BUAG1/wQAOgAZAHcAggDh/3z/i/9D/0n/ZP+Z/03/g/9VALv/w//j/yYAIwDzAIMCEQHPADQCOwBX/4wBWADA/wYB5v+Y/zz/K/6B/bn9Tf6h/tb/WAC7AAAA0//LAMj/zQCvAYEBCQHzABEB7/7E/6f/6f6y/7T/TP/e/rb/HP/z/rj/n/9h/2H/6v/O/7f/UgHfAGkAHALTAU0AegAGAWP/hADaAG7/KwCQ/2n/Iv+X/lj+cP9IALr/0ABcAKH/GAE7AEsAhwGfANkAVQDd/wAADgBd/z//ef/k/pX/D/+U/1n/Ev9qAAv/hv8nAKr/mgCsAFwAnACxAb4APQC+ANj/6/+UAH4Azv/r/zYA6f+d/3P/uP/A/uj+KQBJ/5YABgHcAFcAAABVAdX/swDlALT/qgCQALH/Xv5f//v+KP5HAFv/PP/+/y0Au/8n/8n/M/++//z/kwA7AMD/MgGTAUIAdwDZAa//z//OAKn+w/9MAGb/JwH8/4n/pgCA/4D/mgDR/1D/9wB1/8D/lABt/4sAWwDO/0wAAwBf/9v/+v+Q/8D/GQCX/ycAqwAiAMb/BgBBACf/rwDX/13/qABs/17/hP/N/wMAkgDCALAAoQAGAMj/NgAXACoAzQCrALwAPgBt//L/mf8o//P/OgAuALT/KACw/6n+o/8bAH//DQBhADAA3ACtAGoA7wB6AB8ARgAp/13/kf8d/wUA8v/X/n//I//x/uj/o/4DANUA6f+NABcBdQAlAFsAnv9xAKwA4/8lAd0A1P+lAED/7v5P/2j/+v+Y/63/Zv+cAJMAn/9fAIkAPgCNAL4Ajf/iAKYBgP+sAAAA7f6aAKr/C//D/0T/Dv8nABz/I/9+/xv/FwB4/1IAoACEAHcB3gDjABABNAA0AN0Amv+w/2cAK/8+AD0A+/4sAHf/xP7w/4D/HgB7ALP/CwBJAJMAiQCeAPAAsAC+AP0AFgD+/8z/K//J//T+tP5S/wb/0/5E/8D/aP8cAA0Az/+DAKsA+ACMAPcApQArADABx/9CAHMAu/7D/9b/Bv+x//D/4f6j/+3/S//j/8j/uv9aACABzADjADkBIgBuALf/r/+KAOP/mADf/9H/+//P/hf/Kv8G/zr/b//S/7f/+v8IAA8AYAChAIsAwQB0AYMANgGgANn/owBQ/z//2/5E/gv/M//j/iwAZgDL/z0BGgCi/58Aov9bAFcALQDnAJ4A/AABAAEAPwARABEAKwBDAM7/eACo/0v/HP+H/2//ov+EAM//VgBrAOH/WwB6AMf/nwBUAB0ASwE7AFEAEQE4/yX/T//7/Tf/yP9T/z8AjADx/4kAswCG/9QAoQB3/3sAlf/c/7MA1f8WABEAav/q/1QAKgBDAKkANQARAOP/Ov97/3H/a/8L/4T/9//m/1YAYQD5/wUAGACi/2QATAAIAJoAPgAbAK//Bv9V/0b/Qf/u/50ArwDOAPsAVwBKAM3/Y/+A/5P/yf/k/4UAx/+j/wwAPv8fACAANwDuAIMAiQCbAF4A4P8YAAf/5f4v/+z+FQD4/woAjQBuAFkA4gD2/+P/vAAWAFIAfgBIAJD/i//w/3v/IP8/ADYA6P8FAbMAYwA0ABQAIP8X/4b/kf8GAA8AcgBAAF0AkgAjACYASwBWAIQAVACYAPf/gP/9/x3/cv47/0f/LP9YAE8AYADgAOcAeQDk/2AA9v/6/1IA2P///xsAtf9A/5H/pv+Y/w0AjwBzAF4A/v/R/w8Ahf9z/8n/Zv95/2QAwP8RALkA+v+UAGQA+P+OAAoAHABnAAIAHQC3/1v/hv89/37/IAAJAJMAwAA5AIQAQgC//97/sv8S/wkAagBW/87/rP+I/6n/a/85AD8AzP+9AAYBkwCwAHwA5f+Z/7n/cf+y/7f/BgAtAPL/vgBiACcAWAAgAM//KwDo/1r/8//b/8b/HwDG/5L/OABwAGgAcwB4AHAAcQDC//D/qf8U//L/df+g/+//jP8jADUAGQAeAOj/JQCWAGYAGQBiAOP/3f/C/zj/kf98/woAagApAIoAngBJABoABQDj/8D/tP+p//f/6//B/zIAPgAMAPf/DADg/3EAmAAGAAwApP/K/2z/tf/c/6T/KADk/1EALwAeAB0A3P/a/4b/JADk/xsA1QASAIEAYgDW/ycA0f/n//X/6f/+/9X/9P8xAMz/tv98/zb/IgDo/9z/eQBTAI8AUwANACcADABRAIAATgA3ACIAvf+5/7T/UP9q/0//h//2/wcAWAA5AAkA2v/U/yMADwASAGAALQAkAEwAgP+f/xYAnP+2//D/nP+0/yMAKwALANr/4/+R/1L/pP/0/xgAWABsAEQAYQCIAKoAYABQADMAyP8JAPr/kP95/3b/aP8v/4H/x/8YAOYANQElAUgBXgH6AMYArgCUAL4AygCRADkA9f+y/5//tf9p/4n/xP/I////1f+S/0P/AP+j/qv+jf5u/s3+pf7M/rv+nv7R/rf+mP6U/sb+/v6Q/zoA5ACdARICkALmAkYD2gMfBE8ErgSzBFYEygMLAy8CRAGfAPL/Tv/5/jb+QP3E/E781Pup+7T7Fvxn/Hn8fPxF/Lz7LPuX+nv5m/lk+3T8tP2kAIQDhgRrBTgGYwURBUIFHgUOBTkGFAcfBo4FpgQYAzwBdQByAO3/GADz//P+rf0S/e77qvrb+1P9qP1D/sr/NwBV/9H/HABrAGoB9QHSAV8BVwEi/3f8rPqK+N71tPRb9mb3FfkP/HX/ogFKA90EsQSLBbYGPwcZBzsI5gkXCRUIBQeoBYADBQKIAWAA+v9f/8T9Evwk++/5iPg/+Tj7WPwh/bn+gP+l/3UA3QA3AaoCLgQTBHUDQgOMAb3+ZvzO+oL4Fvb18rPw0fQy+Pf4nPx0A84G3gYLCc4H3AdZCegJBAkyCfEL7gkMB2gEPgN6ALv9U/5A/UX93fxb+9/4qvgJ+Tv33fn9/f3/igDiAlUEbgLsAiEDkAKSA8oEqwP6AH0Aqf0i+aj2lvTB8Vfta+8q9aH3dfuwAQQISgnRCyQM9wjwCjsMjguPCbgL1QsyB2MEXwHi/oP7Wfvk+nD6OPxb+w35qfci+R/4fvhc/R4BFAODBCQGHQVLBJQEGAPZAmkEWwRBAc/+g/2m+SL2i/SV8rTwle0r7Tzzx/g//NsBIAoQDu8OGhAHDRwMDg1jDBsKXAlRCscGQgIk/vH7hvns98T43/jB+lD7wfkd+En5evpW+tD+/AO3BqcHzAhRCI0FKgXgA3gCBQMfA28Akfwu++P3vPMC8uvwse/M7HHt7/RZ+2j/5AVMDqMRYhIsE/EOHw3yDUwMXQgEB1IH8QLN/fr5Jvjs9bX1VPf/94f6BfwZ+1P5Y/sC/RD96wEKBzoJXAnwCWAIHQU7BGgC2QD9ABEBBP7/+Ur4VvUW8gPxMvHI8KrvHO8D9Ub9bgGfBsMOnhR9FCkUbBA0DNsLwwkMBiIDfgPzALD7ePe99Y71GvUT9135C/y1/TH9LPy8/J7+aP9mA2YIWwpKCmUJ9wdvBHECNwG1/+P/a/+6/Nf42fbM9J7xDfGn8ZfxVPCW8B34kP/9A+cJHhLXFsgVxhSYD6AL0goWCLwDLAGNATr+TPmV9fz0WfXz9Zr4R/sW/jP/o/41/b79kP+cAOcEvAkhCycK2wirBqEC1AC6/7D+K//P/uL7yvfj9cHzcfF58Qfyg/JX8VryhfpzARYFAAt9E60WLBWZEx8OcAopCV4G+QEOAAAAfvzI97j0KvUL9qT30/o1/f7+af9K/uX8Fv4gAL8BUQYGCjwKYAgKBxkFmQGzAB0Atv+W/z/+fvqD9kH1X/O48YfyyvMF9BDycfNy+2QBDQVjC9wTjBYCFbIS3AyNCf0HNAX/AJ//ev+6+0z3zvRX9qD3JPka/H7+0v8W/+n9k/wq/pkA9ALSBn0J7gkFCHcGOQRCAqYB6gAnADH/FP3w+GP1ofM68pvxtvHl8azxl/HI9wb/JQP3CJoRwBbPFbkUOhDTC28J9QZGAxAAf/+R/B34ZfRC9eb29fdG+8T9Lf9V/+P+8/y+/dsA3QLCBa8ITQqoCOkGDwXxAgkCdgF5AAz/e/0y+kH2VvM68orx4PCD8Bzx5/Dr9Gv9eQKuBnsO8xVcFc8TxRGQDYcKJgiDBRYBP/+P/R36BPZP9t/4e/hD+l39uf7s/RP+a/1m/boAowLxA0AG8QheCPcFNAVxBGoDNgIbAVf/Xf3s+jP3QfTy8i/y7fCq7/3v4O+F8+H7lwGgBZgMLhRCFAsT1BE1DuQLDQrUB/4CdwAV/1/77/YP92n5tPgg+rz8MP2s/Dr95vyC/QIA1QE7A3YFqgf2BhAFPAWTBRsEOQMPAuv/vf1a+473VvSY8z/ywu857iLuGO8J9Nb7hAHFBqINfBJdE9kSThGAD0AOVQyJCOQD7wD5/fP6vvin+HD5ZPqW+4375ftK/Lr8Pv2O/nMA3AGNAwcFbQXuBLkE+QRjBNsD+gJSAav/cf1Y+hL3vPRO8k7whu5R7TbtNvE6+Ov9ugK2CPgOMBEGEisRqRB6EB4PDAyFB2kEDgHg/Yj6s/kn+uP6AvuR+oT7vfsd/Ij8Qv35/rgAfgHRAmsEYgQLBEMETAT7AyID7gHEADf/7PzR+ar2FfTm8ULv8O1A7dXu6/R7+m3+jwObCZkMVA76DoEPvRDkD/gNwAplB8YECAIP/tr8cv3H/Er88vsi/Hf8Bf3H/ED9E/9NABgAngCyAjUDjgLXAkwD6wJkApoB6QA6AOz+RPwv+Sf33/Sx8Vzvx+458Dn0LvhB+8r/hgWoCO8JoAy6D6QPTw7EDrAM8QiMBvIDsQBT/57+Tf12/MX8LP2J/E79Av7I/SL+9f7a/mL/5AAwATwB4AGEAuUBUgH5Aa8BXADu//D+Ufzf+TT4RvZI9JbzhfME9Df3EPs3/cj/9AJDBKgEhga5B+EHgwjhCNMHTgZbBQIEDwPFAnsC9wFIAX8A3/+d/57/7/94/x//iP8p//D+3f+mAO4ARgFPAc8AmQDYAL0AigB9AP7/lf7g/F/7Wfpy+Un45/c6+Cz46fha+2D9qf4dAEIBvQFwAvgCzAPKBLIFTgZtBXgELgSbA7oC6gLTAr4CygLkAR8BfQFFASsA4/8IAN//Zf+i/xkARQAPAMv/kP8JAIkAjwDAAPYAagDr/lz9BfyQ+5P7KPua+q/65vo9+xT8Xv08/3IAQQAnABYAX/9o/yIAAQHUAa0CHQM1ArIB8QH2AYQCcwNzA1QDtAJ1AUQB9ACSAPAAOgEXAcUASwDRACABEgAlALYAXQBPAKIAUgC2/3D/wP6b/Tv9C/2p/NT8uf0v/gH+S/5n/jv+DP9u/4b+G//U/+z+mP4L/7v//v9b/5n/NQAUAJIAXQGfAVYCqALuAaYBWAH4AIgB5wFaAZcB6ALuAroBzgGKAicCqAHGAWoBpwBmAHT/f/7a/uL+nP4+/kD+fv5o/q3+UP88/7f+Rf/k/or+C/+H/nb+e/6C/g//Xv/E/l7+I//S/vP+Hf+c/qX/l/+K/uT//v9W/sT/RwATAAoCzwEUAtcDYQMzA2wDIAMNA6MCiwHpAHIBsgBW/3n/FgCh/zf/z/+l/9j/Mv+H/tT/8v8LAM3/d/5l/27/6v2G/mT/PP9C/p7+IP+H/q/+5f34/aL/qP9I/jT9/v2J/sf9b/5W//P/ygA1AbgBnwJEAyIDDAKtAs0DSwI+AfMAJgEBAdkAZgAeAFUB6ABOAFsA9wDlAU8BEwC5ADEAiP4f/zv/YP4S/nb/jf9y/uH/h/+R/uT+p/7t/gv/4/+M/hz+dv8a/6P+Df0x/msAPP/f/uv/yv+LAPAA2v4RAFsCHwJrAcYAtwG2ATQBRQBXAJcBVQHEAHf/mQANAwcBy//hAGEA7P99/wj/xf/kAKb/Z/4j//T/XwBv/3D/jABDAB4BzgAXACcAUP9A/gf92vx6/Dz8eP0//7f/vwC8/2H/GgK9Adb/JAKLAbn/vAO0AJP+ywFWAZn/b/62/4EAjADl/9wAoAA5/0ECMP9M/ncCXQBB/3EBXQGo/3sAfQCmAEgAEABvAED9/f+LAeX91f+HANL9Af3f/67+jPxEAR0Adf2QATQBG/8qAQ8BHACMAIkAkwD1/0H/aACM/7T+kACiAIv+HP9ZAtYAtP9HAQwB0wCuAUsAzP+YAYQAvf9M/wgAXACM/o3/2wCS/sL/BgEdAAEBwQAbAGX+p/4t/4H/Rv8a/68B2f86/24CJQGL/9YAdwCN/3P/N/6hABEAzvzx/uL/xv1c/akAIf/i/SsBUgBK/3wA2AGbARwBEQPqAzoB1QB7Aoz/dv+nAHf+gf4Q/yMAzv5v/ksCIgCZ/ZMB3v6w++EBkv8M/AUDUwKU/iQCVwJCAN0BxAEqAYQAFP+sANn+UPy7/z79Gfq8/yb/BPxwAAkAlP4cAYP/VQCCAuUBSQKGAbUBTwJ3/8b/rgFjABT/2v/e/yEA3gAk/zwArwHsAPP+Qv9sAAsBXgCb/goB2AAKAWMB5P+RAZMBAwDg/63/If8OAIX+bPyo/pD/Mf0A/eP/K/84/oP/S/84ACcBjQBrAJwB7QGHAYb/1wARBG7+m/4sAyj+Zv7+ADT9oP+eAT3+dQAN/zwBXAPk+yAAdQJo/kwBqAHj/wUDUwGP/1sCFQAcAj4ARP3WAn//bPuKAKP+Q/u7/Rf+3f3G/RH/PgHm/uv/fAND/yn/DwJtAH4BfgFE/z0CyAEe/8cAJv/v/tIA4f/D/zr/Rf9xAq4Ayf3vASYCNv02Ac0CZf9s/z8BAAI6/yUBjgE9AHIAhAHe/3j9cAC9/vz86f6b/eD8kv+P/vn8p/+SAZ8AgP3/AGkDbP+rAL4ApABnATYB5gBB/yz/af+VAGb9zf52AWL+f/8Y/83/6gAZAB0AhgLjAMAAOgS2AEsBHAGCAGAAHgEIAs7+bf9qACoAMvzg/gsB+PsI/sf+tf2J/pn/vv96/s3/dwFFARYAMgGVAjEBSQCcAHUBRv9Y/5EAqf43/y//uP4i/k//mP9U//0Av/8cAVcAhgAiA//+PgCyAn8A+ACGAc0AVQAkAyACHf6x/4kCvf8p/H0Aq/8u+xMBDv93+/MBq/5d/AoCq/4q/wgEhv05ASYEVfz7AMYBW/5eAqH/SPxpAhcA2vtlAfb+sv0oAfH+T/+i/5YAOAKs/qv/LAOhAQ3/+AGsAh0ATAL8AHz/wQGxAE//QgHt/Yb+wwBF/Nf++ACb/Zr+hP+t/lkAa/7u/2wCPQBg/wgBxQKGAGj/LQC6AN/+sQBfAXL7n/46Ag39hfyi/6T/GP/JAPYASP4yAHED4v9Y/+kEVgGF/gYDGAIcAG0BzgK3/zT/ogHp/lP+swAd/xj96/5q/5T/H/+d/Q8BiwBo/dwAaQLf/w8BlwLA/83/8QHPAMb84/+XApv9af03/9D/nf51/Gb+PwDm/soAVf+Z/t0D3gHt/hUB0gA2AZ8CyP+p/5wBHwHcAfL+Nv9JBO/+0vyZAjz/6PyGAuj+3PvxAT3/N/yKATkD4v4iAGQD0AC8/qsBfQKB+9b/ugO7+j3+egKo/Sv+QACo/3n+1v5x/4n+RP6xAcT/Rv3aAnwD8f/S//4AsAHKAfb+NwGsAQYAmwMH/5f9PwJ4Afb/Zf7v/uz/OgAh/kP89wBMAMv9hQEfAl7/LQMHAiP/jAKD/y8AwgBp/Yv+IwB5/a3+5//G+tgA0wGi/acAHv7x/2sDrv7K/uwCx/9PAsIBxvxrAR8Cxv///4YAQgCLATX/RP4dAa/9mv4nAV79p/2PAYEBp/76//4CYwFhAbkB/QB+AygDNP98AFMANP7o/4D9r/1bAPj+2/1g/Wv/wAFr/RX9NAIe/wD+3gGWAPD/JAJKAGD+lQGPAur+af4SAwMDUv00/wsCuP9k/ir/p/6u/LQATQBh++b/CAUFAI7+rgNHAoUAHwL5AZz/3wDXATj+5P+gAvL+EvwBAVsC1/zS/PH+0ADx/hz9yf15/wYCXgCz/9v/1QLQAT7/dADP/5sACgGu/0P+rwB6ALr9wf4iAIL/AP00AI8AsP30/8D/BQBPAZEBzwC+AXoDjwKOAQwCJALWAJUAcwA4/7z+/f/J/kv95P5t/zD+RP0m/gL/Vv7N/4n/iv/tAcIAjwBFAYwAUgHfABIAiwErAJH/mgCF/zP/Z/9//mD+4P55/rr+Ov5t/4cAV//X/9b/agFDA+T/Qf/RAlYCkAAcAgkBhQCGAb8AMv+K/g0CmP5S/aYALf/1/gb/s/9I/1IA0QEb/6j/cgNWATj+nABOASD/c//D/w8ASQBgAEYAK/5P/+QAyv6z/Sz/sABg/1n+of/MAIUAff+I/yUAxwFjAbb/eABrAHoB8gGG/4n/RgFiALb9yv9NALn/3f9k/SMAfACe/1EA+f5/AHwBW/9+/04Chf/B/nkBx/43ADsB0f62AJkA5f9YACT+sf1GAfv+yPyFAO//Tv9F/vD+XwDs/4QBnQAbAPwAxAG8ANUAvAG3/2n/wgBzAEb/NP8//wEA5f8LAK3/df+8ANL/Lf8b/8cAPgFs/8oAqAG6AC4AbwB3APEA3QDw/8n/sP93AUn/vv0mAB7/QP4m/xUAAP/R/9UAqP5JAGgANP/xANIAMwDrABEB6/9pALgAwf9OADT/1v/aAJX9Vv9CAX3/ZP9yAFIAu/4cAAIBxf/j/8oBNgFJ/xwBiQFn/8T/FQGC/2T/FQEC/xj/bwDT/vb+o/92/zn/Ov9fACkAxv/W/w3/w/+jAHEAYv+qALAAYP+rAFcAIQCn/yH/QADk/4r+kv8JAP3/DgCJ/q7/NQBA/xwBlgBX/5MBhwEPAXMBPQGBAYoA8v8bAQABFf9/AHQBav8CACIAP/8VALQAEv8z/noAjQB8/oP+1v8G/17+kv+7/xv/jf8IAFj+yf4RAKD+pf2n/jD/o/6Y/1f/D//bAOb/nf/iAQEBKwGEAsIBhgJpA7MCtwK3AkUC0QLcAQ8BRQEWAT4BAgEKAFH/vv8S/8H+if4J/vz9E/1z/V7+of0P/YP+W/3G/IX+Df2S/Ej+Yf5X/e794f5B/3j/1/+wALAAxQFzAn0B5gI2BLkDYwMJBJwErwM9BNEEaQTTA30CsQEPAloBzv9v/+z+ev5O/mz92/zM/UD9avzd/DL9FP16/Sv9ovz+/Hv9Af7f/O39e/6W/L39m/9O/kb+s/+D/yMABgHxAKAAZgESAzMD0wJIBPcESwR2BAQF2QTYBMAEkwOdAugCVwIgAGb/Of9Q/on9Cv0x/ez8ify+/Fb8x/yw/d/8Qv2K/fL8wP3O/Kb84/6//mD9u/5Y/47+lAAcAOr+5gC0APD/OAFnARsB8AFaAh8D7wMfBFEElgQbBTUFiwQCBCsEBwSwApkBzQDe/+X+AP7G/er8T/yC/DP87/ux/Mz8UvyA/f79jP3c/Vz9/fzx/Fr98f7+/oL9z/4fAKr+qP/uAJMACAFBARkAqwAvArYBywEoAj8DjwQWBDEEfwWGBc0EhgQdBLIDNgMrApAACQCj/4b9tvzw/Nz7Vfue+8P7MvxX/HP86vyX/cb+nf5K/V3+Nv/X/Cr9ov8IAAL/df5T/1b/1f8TAef/L/8lAcMAk//MALABKAKYAugCJASLBV0FHwWsBYYFDAWiBEsDqAJoAtcAXv+v/tL9yvzd+9P6y/o/+9f6+vot/I/83vz3/eX+/v7o/mf/HP/2/oX+lf4XAaYAk/64/5YA2f8fAR0BUv+PAO4Ajv9FAEkBmgG4AtwCIgPNBGUFugQeBV8F3QQ7BOoCewIpAvwAX/+f/gz+uPzd+xr71Pra+ij77fp5+/f8P/14/Rz+kP/P/4z/EQC8/6v/Gf/M/aoAbwK2/iP/ZgE8/5oAsgGD/hQAdAEX/53/tAEqARcCJgOuAiMEGAWJBIMEEAWmBKoDuwIvAoEBvgBE/xL+O/4u/UD7Svuh+9z64fsX/Lb72P23/bD8Mv8kABD/YgAMATgA3gDBADz/jv4+AOAB9P9U/yUBBQB7/9IAY/91/xIBeP/K/uMA6AAKAT8CHQLVAlMDhAONA1cDfgNNA28C0wFoAX8AyP+Y/v/94/3X/C381Pzk/Kz8F/3s/Mr9b/5e/tv+2/+OAH0A9AAFAf8AHgG1AOv/+/+gABMB9wC1/1//EwDd/r3+kP/O/jH/Sv+U/pX/vwATAAsBhQE0AXYCHALdARgDLQNgAlACmAHdALwA2f8N/x7/2f5I/vL99P0J/hD+Ov4q/rL+Qf9z/6H/agD6AKgAGwFGAQYBGwEjAbEAeQB6ANT/NQAfAC7/Lf8X/3z+rf7h/mn+0f4A/4v+X/7p/hL/Ev+q//3/OgDSABQBEgFDATgBEgH1AD8BIgEPAdMAxQC6AGQAbgBKAGUAEAAcAH8ATQDBAC4B+gD7APkA9gCuAHMAmACOADMA2v++/5f/af8X/+H+1P4S//j+vP71/r3+nP5D/t79EP4h/nL+jP6L/g3/Lv/6/iH/gP+K/9z/PwAgAKcA/gC6AAwBOQEqAW0BmQGrAc4B5gHyAcoB6wEkAqQBmAGXAQsB6wC7AIwAawAeAPf/sP95/0//Jf8u/xX/C/8H/+b+0P6j/lD+Pv5q/l3+jP7e/uz+FP8w/wr/Iv9U/1f/i/91/0T/j/+v/6n/4f8vAF4AzwAWAREBhQHcAdcBtgHVAe4BiwFZAS4BqgBgAHQARQDZ/w0AJQCc/5v/3P+o/53/q/9b/1r/g/9R/zP/Sf8s/zz/af9l/5P/w//E/6L/gv+9/8P/hf+q/9P/ov+b/87/uP8BAFIASACKAOoAEAH6AB0BPAE1ASAB9QDeAKwAfAA6AAQABADj/6v/sP++/6H/kf9w/3T/nP9r/0v/Wv9V/z7/NP8x/1L/dv+V/63/vP8jACQA6/8+AFMAKQBfAE0ALwBKAEcARAAhAF8AiQB6AJUAnwC+ALgAlQB6AHQAbQA8ACIA9//w/9f/pv+w/5z/mv/B/57/kP/L/6X/n/+v/5//pf98/4z/ff9H/4L/aP9g/6T/q/+2/9b//v/p/wsALgD2/w4AKgAXAA4A8f8XADQAIwAnAEEAdABnAGkAbQBbAHIAWQBFADYARQBlACYAKQBeADYAPgBrAFIAUgBnAFUAPgA3ACAABQAlAPX/r//M/8P/o/+V/7v/0//M/wYAHQDy/yIAVAAXAB8ASwAfACYAGwDx/w0ACAD2/wkAEgAAAOz/3f/e/7b/kf+m/5L/g/+P/4X/k/+a/63/1f/D/+//EAAAABYAFAAKAAkACQD4/+f/+f/r/+//AgAQACIAQgBgAEsAagBrAFAARwAwABgA+f/z/83/zP/U/7b/1v/h/9H/7P/3/+z/9//6/9//1P/O/8//vP+z/8r/yf/D/87/0v/U//r/9//1/yAAIQAUACMAIQAXADcAKQAsADwALQBLAEUAPwBgAGoAYABmAGsAUABAAD0AHwAMABIAAgD5/wYA+f/5/wgAEgAOABAAKgAWAPr////l/83/xv+o/6v/rf+U/6D/lv+q/8j/tv/d/wcACgAUABwAJwAxABEAGQA5AAwAEQAkAAgAGwAqACEAPQBTAEcATQBEADMAKQAYAP//6v/z/+T/xv/P/+7/2//B//X/8P++/87/xP+U/5H/kv9u/33/mf+F/5D/of+7/8v/0P/3/xMAGwAiADAANAAxADoAOAApAD0APgAfACkAQgA/AD4AXABfAFUATwBKADoAIgAlABAA+f/9/+n/2P/k/+7/6//v//b/BgD+/+j/8v/s/9v/1f/W/9H/xv/Q/8z/1f/a/+H/+f8DAA0ADgAgACIAHAAmABgAHwAhABMAFAAZACQAJQAfADkARwA6AEAASAA+ADEALwAVAAIADgD3/+P/6//u/+r/7f/w//b/9v/z/+7/3v/h/9j/wv/C/73/uv+7/7v/wv/P/9j/2v/o//L//v8CAAQAFQAZABQAGwAdABoAGwAaABcAHAAcABgAGQAdABwAFwATABAADAAOAAUA+P8CAP7/8v/0//T/+P/0/+//8//3//L/9P/w//D//P/x/+//9f/z//L/8P/u//L/9v/z//3/AQAEABAAEwAWABsAJAAcABMAFgAQAAkAAQAGAAUABQARAA8AFgAjACAAJAAtACcAIAAjABsADQAOAAsAAAD9/wAA///7//3//f////X/8P/3/+b/4//q/9v/2P/c/+D/2f/g//D/6//z/wEABAABAA0AEAABAAgABAD+//v/+v/8//T/+f/9//j/9/8EAAQA+v8FAAsA/P8CAAYA9f/4//n/9v/7//v/AgADAAEAEAAKAAEADQAHAP3//P/3//H/9P/w/+b/7//2/+//8f/+/wAAAgACAAgADAAGAA4ACAD5/wwAAwDx//7////7//j/+P////r//v8BAPn//f8GAPf/9/8IAPv/AgAKAAkAHAAhACYAMQAyADEALQAlACMAFAAIAAgA+f/x/+z/5f/p/+v/6v/v//X/AAAMAAMABgAPAAkABAD8/+//8P/t/9z/3P/f/97/2v/X/+L/4v/g/+v/7v/p//b/+//y////CQAHABEAGgAgACoALwAxADYALQArACUAGAAWAAgA//////b/9f/4//P/9P/+/wAA//8EAAoABQABAAAA9v/y//H/6f/e/+T/5P/Y/9n/2f/a/9r/2v/g/+L/6//y//X/+////wIACAAFAAkAEAAQABQAFwAaABkAFwAWABEAEQASAA4ADAANAAwACAAEAAEAAAD8//r//////wUACQAKAAgACAAGAAQA///9//3/+v/1//T/7//q/+z/6v/p/+3/6v/t//L/9P/2//z/AgAAAAIABgAFAAcADAAMAA0AEAASAA4ADgATABAADQATABEAEQAWABYAEwAWABwAFwAYABwAFQATABQACwADAAQA+f/0//P/7P/m/+P/4v/a/9P/1P/P/8b/zf/L/8P/0P/R/9D/3P/g/+L/7P/z//n/AAALABMAEwAZAB8AHQAbAB4AIAATABkAHAAMABAAGAAMAAoAFwASAAwAEgAVAA8ACQATAA4AAQAKAAsA/P/9/wAA8f/v//H/6P/i/+f/6v/e/+b/8P/q//P/+v///wEABQAOAAkACgAUAA8ADQAVAAwABwAOAAUAAAAHAAEA+v8DAAEA/P8BAAcABgAGAA8AFQANAA0AGAANAAcAEgAJAAMABgAEAP3/9//7//X/7P/2//L/6//3//f/8////wAA//8GAAkABwAIAAsACQAHAAkACQAEAAQABQD9//r//f/6//j/9//3//j/+v/7//7/AAD//wIABQACAAIABgADAAIABAADAP7/AAACAPr/+f/8//P/8f/3//L/8P/1//X/9v/7/wEABQAKAA8AEQARABgAFwAUABYAEwAOAAwACgAEAP///P/4//H/8v/y/+v/7f/w//D/8v/z//n/+P/8/wEA/P///wUAAwAGAAoABQAIAAcABQAEAP//AgD///j//v////n//v8CAAUACAANABQAFgAXACAAHQAYABwAGAAQABAADwAGAAEA///5//P/8P/s/+n/5v/m/+X/5f/n/+f/6f/t/+7/8f/4//n//P8BAAIABQAHAAYABgAGAAUABwADAAYABQABAAcACAAIAA4ADgAOABQAFAAWABgAFgAVABYAEQARAA8ACgAFAAEA/v/8//b/9P/1//D/8P/w/+//8P/r/+z/7f/r/+z/7v/v//L/8f/0//j/9v/7//3/+v/8/////P/7//7////+/wAABAAFAAgADAAMAAwAEAASAA8AEQAVABAADwAPAAwACwAKAAcABQABAAEA/v/6//n/+f/1//b/9f/z//T/+P/1//X/+f/7//v//P/9//7/////////AQABAP///v///wAA/////wAA//8BAAIAAwAGAAcACAAIAAsADAAKAAwADAAKAAgACgAIAAUAAwABAP///v/9//v/+v/8//v/+v/4//j/+v/6//n/+//9//7//P/8//z/+//9//3///8DAAEAAQAAAAEAAQAAAAIABAAEAAUACAAJAAkACAAIAAgABwAHAAcABQAFAAMA//8BAP///P/7//n/9v/0//D/8f/v/+7/7f/t//D/8f/y//T/+f/8//7/AAAAAAMABwAGAAcACwALAAsACgAKAAgABwAHAAUABQAIAAgABgAGAAgABwAIAAgACgAKAAsACgAHAAUAAwAAAP7//v/9//z//P/8//v//P/7//r//f/8//v//f/+//3//f///wAAAAD//wIA///+//7//v/8//7//f/9//3//v/8//3////9//7///8AAAEAAgADAAUABgAHAAgACQAIAAYABAADAAAA///9//v/+f/4//f/9v/1//f/9//2//f/9//3//j/+v/6//r//P/+////AAABAAAAAQAFAAYABQAIAAgABgAIAAcABgAGAAYABgAFAAUAAgABAP////8BAAMAAgACAAMAAQD+//7//v/9//7///////7//v/+//z//P/8//v//P/+/wAAAQABAAEAAAABAAEAAQAAAAEAAQACAAMABAAFAAUABwAHAAUABQAFAAIAAQABAAAA/////wAA/////wIABAADAAQAAwAAAP//AAAAAP7///////3//f/9//z//P/8//z//f/+//3//v/+////AAABAAEA/v/+/wIA///+/wAA/v/+/wAA/v/+////AAD+//v///8BAP3/+//+/wEAAAD9//7/AwAGAAEA//8DAAMAAAAAAAIA/f/+/wwA/v/p/wUAFQDy/+7/BQAMAAgA+v8DABcABwD4/wcAAwD0//n/+f/7/wYAAgD//wQAAwD5//v////1//X/BQD1/9j/7P8AAOT/5v8QACUAKgAkAAMACAAhAAAA6v/2/+3/3v/b/9f/AwDP/6oAEQI4AOT9Jf88AaoAAf///pcACQG8/3P/PQBEANf/5f8ZAPP/qP+y//z/JwD0/wAANQAlAAYAJgBYAAEAAQBRAOX/O/+t/38AdQDS/17//v+pAOf/Iv/r/6IALQCh/7n/BQAAAN7/CgBOABQAn//O/zwA4/94/1cAcQEHAbv/Dv/O/zcAm/+k/zgALwDI//3/EgDf/6P/y/81AAEAkv+V/wIAy/+e//r/VgA2AMP/0P8/AHkA9f8RAHUA8/9c/7//sABXAEb/u//WAGcAQ/+s/6sAUACw/5L/ogCiADz/L/+JAAcBo//+/2oAnf+2/0YAmAC4/yL/xgDXAST/EP7WACgBP/8+/xIAfAAqAJL/3f9dAAQAJwBuAEP/tP/+APL/1/4dAJsAYQDU/6r+RgCIAeL/4P5TAO4Apv95/9H/AABLALT/rv91ADYAY/87AM4Acv8ZAI0Axf/Y/9P/AADf/0EAsP+b/70AXgDe/g7/LAGVAB7/o/88ACgAIQCE/9D/RQDt/9L/yv93AD0AWP+T/7QAhwB4/3v/SABpALH/uP+fAM0A2P9P/6IAngFWAEH/kP/LAesBNP/Y/SH/dAFFAcf+s/zI/poCRAFk/Yn9qgC3AioAc/0r/8ABwAHi/8j/FQC5/xQBIwL0/x//ogAkAqYCqf+X/Y3/4AHlAPH9Mf1o/68AXP85/Zf9+AAPAQX/sv4u/0H/qv8TAG0AfQApAK4AkwEaAikAof8GAZEBCAK8AO399v7wAND/h/9I/xr+kf9kAUz/KP7z/Uf/mwEvAeb+F/7pAOIBsgHy/xv+PgCgApECZP+k/ZwAygLnAJ7+jP4mAA0CVQEZ/9/+0v88AfYA0P+C/4H/qwBqAE7/I//v/m4AhgHi/6v9y/6vAaMAgP4b/vn/WAIlAMT8O/60ARwCk//t/RH/mgLfAkP+JvwWABMEqgJA/t36bP63BMsDx/zS+vEAeASqAaL8Nvvi/9QDawK0/kz8Pf7+ApED+v59/FX/MQPOAuP+zvvQ/aACfQJq/239df5RAT8Cp/+U/R3/TgFXAjsAzv5N/0QAHAHiAFMAy/9AAGUBUQEKAMj/SgHgAYcA0P8yAGQB8gDN/3//HACyAHn/1v5F//UAgwBk/sX+2P///zf/gP54/kIAIgE7AAD/av6YANgBHwA2/yUA/QAPAX0A2v9Q/+3/HgHxAMn/X//a/7EAVQCR/hn+0P9EATIA1v6f/pv/bwDO/zH/MAAOAVkAuP9Z/+T/lgCUALEAJwFJAXoANP+i/lH/DwBHACkArP8q/0j/Of9n/63/Vf8GANoALADe/kf+jP6v//sAEQHEANMAKgEsAUUAWv93/4AACAGUAEwAVwBRAHkAmABBAHz/ff9pAMwA8/8G/2P/nABPAcEAKQAZAHUAxQA5AH//kv8YAFgAHgCJ/wH/Cv/+/10A7v+u/6b/2//H/1v/PP/U/38AqgBJAAEAEADO/6P/4v8SAD4AMgASAPz/ov+R/+X/QAAcANv/IQBEAOD/Ov8A/zD/8//dAMYATAAFAPn/6f+O/yT/k/+mACwB1gDt/0P/Tf/h/24ApADJAOcA/gDAAOL/7/7g/rf/QQD4/4r/V/9Z/2L/Gf/C/uD+Yf/q/xMArP86/zf/c/91/2T/kv/z/38AmgD5/1P/Uv+x/+3/GQBgAMwAMgFbAQ4B0wAsAa4BEgIdAgoCNwJmAh0CsAGNAY4BzAHOAWkBowAIAK//Pf/C/jP+EP4x/kD+4f2W/XL9Rv19/bv91P2E/U39UP1F/Qb9Zfw2/Fr8dfzW/Kv9rP6M/7gAmQE/AgEDYAMABNsEjwXMBSQGXQYNBg8G3wVWBbkEGwRsAwYDIgKRAMT/Mv9S/mP9y/xe/FT8vPyh/Gz8gfzE/Fr96v3v/fL9b/6W/iz+e/2o/Bb8cvup+gX63vkO+nv6mfvf/N792/7o/xcBZQJrA0wEUgVXBkYHBwhcCDwIRAhpCEcIZgfbBasEtAOHAuIASP8w/pX9Hf0z/Lz7qPuh+/L7Tfyn/A39vv2e/lf/x//R//b/KwAnALn/Av9J/jT92PtM+hX5xPju+En53/kQ+1v8mf2y/rr/cQH6AmAEwQX2BuEHmghhCXcJbwlFCe8IQAjpBkUFfQMFAj0Agv5I/Sb8avvs+q76WfoN+kj6H/tk/BL9uf3n/iQA9wBOAagBzgG/AWEB0QAlANL+Hf1L+4r5Q/jS9+H3EPjz+Hv6Jvyw/fr+dQBWAhUEYwWVBt4H+Aj2CaUKtQqGChcKZgkRCBkG/gM3ApoAy/4a/an7uvrg+T/5Nfka+Sv51/kt+7H8xf3i/jwAqAHCAiUDPgM+AyQDjgJWAc7/Mv6o/Lb6bfil9i/2d/ah9kX3qPiH+p38Y/74/+EB8QO/BUoHowjPCbwKjAvgC5ILDQsYCtAI8AaGBFYCZQBv/l/87Prk+Qr5wfiQ+Lb4FfkE+gr7b/wN/iP/mgDWAc8CggMJBAcEuAMzAyYCwgAs/239b/tL+Qf35fW49ev1APYo92X51Pv2/W7/hAHgA+UF+wYdCF0JbQpRC5oLbgvsCm4KkAn+B6EFMwNZAZ7/Ov0G+7v5Gfmu+I347fh0+Xj6ufv3/Pb9KP+LAOoBHAOlAzoE5AT/BEgEJAPkAVIAbf5X/CP6+/eB9eHzAPSS9Pr0GPaV+E77u/1a/w4BjQO5BT8HcQjUCcAKpAsnDMsLMgt6Cp0JBQiTBecC4wAA/538ifpb+Z34D/gL+Lf4h/l7+rr7MP3G/tP/zwAZAlEDBASDBPMEtAQfBC0D3QEPAOz99Psm+hT4U/WF84PzCPRl9GX19fcm+/L9CgDyAW8EpwYNCA4JCQr/CrULPAwGDG8L8QofCrUIZwb8A/IB7v9//Qz7nfnU+Dj4EfiU+K754PoY/E79pv4fAEkBZAJKA+kDegTqBMEEzAPpAuQBWgBU/jX8UPpR+PD1gfOn8h3zl/Nh9DL2M/lf/EX/gAGtAzwGBghkCW4KAgucC4cM3QzuCxALYgpyCa0HqgT0ATcAVv6Q+zj5GfiH94r3uPdS+Iv5+/p//NX9C/8tAKYB+wKyAyMEmgT2BI8EfgNCAh8BnP+E/WX7cfl090z1LfMY8qry3PMb9fX2GPqn/eMAYQNiBeAH+wk8Cw8MvgwIDVgNaQ2HDHULRgrQCOYGXwSHATn/Hf13+jL43fYw9kX2+vbz90/5/vrW/IL+8/8kAWUCyAN2BMIEDAVGBeEErQM6Aq8A5f6s/Gz6j/iy9qz0qfJN8cPxQPOo9Hb20Pke/sQBpQSqBvMIcQvRDFANcg20DfEN2w3FDPkKtwmkCK4GuAOzAIn+ePyk+dn2c/Vf9Wj12vVS90z5LfsC/bv+MwC0AfIC7QOtBPsEQgWMBRAFrANeAhIBI//M/Hr6jfj89jf1U/Os8X/xTvM39b72Jvlb/Z0BmQS4Bo0IKQsgDY4NWw1jDbENnw3EDNsKJgkrCHQGmANdAOP9Kfzy+SL3J/X/9Fj1tPXr9sr48PoJ/fr+eQDXAUwDeQRRBX8FbwWtBYsFUQSQAh8BoP99/Rn74fhZ9xP2TfR78jjxCfJy9Gj2IfhE+zcAfQQ7BxUJ8Ap0DcoOXg6mDZUNgg3TDHQLKwlJBzYGTQQ0AQ/+2vs7+v/3VfXt82P0MfUP9ub3TPqt/On+4AAtAlMDcARRBdkFnQVWBW8F3gQKAxYBof/D/Vb7E/k39/L1yvRg8/Px8PAf8hb1k/ea+Sr9awKtBjUJ4Qq/DA8PIRCKD58OIg6UDZYMxgo3CFwGOgUVA8v/wPyg+tv4fvYL9CXzAvTj9AD2Rvjj+kj9aP9SAcwCHwQxBdsFSAYjBsYFhgVqBGACvABI/wT9gPqs+Fb3Kfbp9NXzevPt8rTyIvWN+ML6R/3ZAasGlQlRC7sMmA7fD1wPXA6IDYgMeQsqCsUHNgXtA1cCYf9t/Ev6+PhP9/H0nfNh9Hj1K/Yv+D374/28/4gBMANKBBoFdgWLBVoF5gSCBLgD8wECALX+Cv2n+rH4dfeP9uX1/PRm9Bf03/Pr9Uf5rPsR/jYC6wblCV0LcgwIDjsP1Q7WDQUN2Au6CqgJXwePBLkCUgHG/nn7/vjx9+L2wvRp8w70mfXX9rf4Zvvy/SAA0AFBAx0EmARTBeAFlAXpBKwETASqAnMAuP5N/ZT7o/lY+LH3ZPcs99T2nPZA9g72Qvic+5r9+/9ABJII/AqUDLENVA7EDisOuwwYC3sJVQgvB3EEkwFmADj/mPwM+oD4fvdn9tT0FvQS9Z32DPio+nj9qf/lAXsDMASKBC0FhQU+BbEEVgQqBDIDVAGi/zH+g/zJ+nL5jfj699/35ved9yH3xPar9pH4Bvxi/sQAoASMCP0KdgwiDVINuA0/DbYLAQouCOEG3QVfA1gAAf80/iv8Jfq1+Kn3B/cI9l31KPbL93L5AfzF/qQAggIYBMgE6gTnBPEEEgXoBEsEuQPyAngB2P8M/kX83PrS+Sf5dPhq+Mf49vik+CH4k/cS+GT7mP50AG8Dzgf/CiUMWQztC/gL8QuhCsAIFAe4BfEEPQMAAPT9h/00/DX6/vhi+Ez4+/dD92330vgi+gX8wv7RAG0C7QPhBP8EwgSJBKgEtwQzBLUDLAMdApcAxf7v/Hb7PPqO+f/4lfjS+Df5/Phg+OD32PaJ+AT9q/+IAV0FvQnVCyAMkQsLC10L3ApOCa4H8wUBBU8EpQFT/lD9Of2c+8/50vjY+C/5gvi992r48fl5++v9gQBhAhUEjgXyBVYFwwS4BNoEcAS9AyUDRAKvAL7+qvz1+uj5W/lA+Sb5UPnz+Vf60flB+UT4Z/dr+t7+4gDqAsYGRgpjC+AKpwk6CZ4JFwnfByYGSQSEA2QC+f4q/BL89vvg+hT6zvkY+jv6m/kg+bT56/rz/O//MwIFBOUF4waHBncFzQSsBJQEFwSFA8gCeQHB/579b/sh+of5RPlB+Vf59fmF+oD6B/pv+X34lfeI+pT/DwIJBJgHlgrKCs4J7ghsCNwI6wgtCHAGFgTFAmEBYP7K+777Ufyn+x/7Jftd+0z7kPol+qD66fvy/QcBuQNFBX8G8gY5BrwE3QOuA2kDKQOYAmgB0v/8/fP7BPoS+RD5Yvmt+ab50fkM+oT5WPiK9172Kfen/H4BdgORBZQITQrfCZYIeQdRCNAJEwqPCJMFZANrAiwApfxs+7L8Lv1t/In7Gvs1+wj7qfqc+qT7v/2tADYDpAQDBtAGkwZIBQ0E0gO2A2ED0QKnAe7/EP40/CX60Pi8+FX59/nw+fD5Lvry+U35ovhc+K/3ffgW/nkD7QQOBoUIzAm6CMEHgAdYCOIJNQp6CB0FQQJVAS0Akf3W/H/+4f5x/Ur8jftN+6P7DvyN/K39PP/4ANkC7wOQBAwF2wTTA/kCeALCAXEBHQHm/0b+z/wx+3z5jPjQ+ID5CPo3+kb6QPrD+Sz5rfhP+Hr3Vvnb/z0EwAS0BdcH4ghqCAkI6Ac4CQULtArLB9cDzwHSAboAjP5s/pz/8v4X/a774Pph+5v8T/2G/U/+mP8qAZECLwPnA3wEVQR2A5kCOALHAZIBEwFs/639TfzU+lr52Pht+Tv6xvp0+v/52fm++Yn5A/le+Jz3rvobASIEagTEBTYISwk8CUgJTgmtCgkMqgrMBuwCnAG1AV4Aav6C/jT/wf29+8f6mPqB+9r8O/1D/QT+K/+YAO0B8QLMAxAEewO3AkoC1AF5AU0BNQBH/pH8MvvN+fX4XvkI+lj6cfoG+uH5LPp2+tj6svrJ+SD4hPqKAQsFGgU8BqEIyAmlCZEJcAnACmMMMAvTBiIC4QDXAcMAnP52/u/+ef10+0f6Zvom/AX+K/5a/ab9sv5AAOoBKAMfBF0EwAPHAhkCtAG0AeoBigC9/bP7hvqz+Tz5t/mE+rH6svrv+TP5YfmB+kj7m/q/+Lj39PytA1IFXgVJB/cJywqwCkMKgQpSDPwM8QlNBIIAzgAwAS7/of0N/r39kfvo+VD5Bfoa/N/9Uf15/G/9Mv8AAVsCiwPvA3QDlwL1AcgBnAHNATgBoP4G/MH6+Plg+an5lPrA+nL67vmo+Rr6EvtZ/Er8I/uK+Vr40f2MBdQG3AUoB2wJ2AkcCsQKNwtzDEkMlwjqArX/5AAUAmMAnv4l/vL8p/rZ+WD6lftg/TL+G/08/Gb9Wv/XAbwDHQQfA/wBzQEpAp4CdALwAXQAUP29+jL6jvr7+mz7hvui+sb5pfnL+bj6M/z0/LX70/nG97L5UQLlB5kHyAbMB/8IjgkWC+0LSwyzDJsK4wRQ/+v+ywH/Aaz/u/07/FD6XPkL+vn6Kfxw/eb86PpL++b93AABA8kDKgNEAWMAOQF5ArsC+gGeAPD91vqx+VX6JPtk+0j7i/o2+S75Kvp/+838Uf17/MP6NPlQ+QcA+Qe0CJcGgAYvCKAJZQuwDDYMyQtsCk4GPQFw/78BKQPPAKH90/vT+lL6xfpf+8/7lvyt/DL7JPv3/SsBVgO7AwUDkQEJAUMCtwMXBKQCtABa/sr7yfqS+4/8VPxd+5z6vvn5+Vn7PfzR/Ov8Xfy4+pX50vgX+7gDhwjzBdEDNAUFCG8KZgz/CzgKowkTCOwDTADuAGQDOwJn/qD7C/tR+9P7I/xR+9n6c/tP+7T6i/zE/8EBVgL2AQ8BowDWAYwD9wPKAuIAR/+u/UL8Zfwh/e78x/vS+rz67/ol/BP93fyI/Pn74PoQ+pf5XPkLAN8H5AZtA60DhQfSCqQMgAwvCj8JCQlcBk0C7gAcAz8DWf/u+0L78PsD/OD78/oZ+r36cvsX+0b8cf92ATgCSAIRAoIB9wGvA4YEVgPpAFn/hf46/av87PyB/Ir7sPrL+hD72fvX/I380ftQ+7/6R/rS+Wz5x/7qBlQHRwMpAwYHGwrWCw0MqAljCLsItwbYAi0BoQKOAhD/9fuC+1T8SvzU+/j6B/pt+p77xfur/GL/7gBsAZ8BwwGCAeEBZgMnBDkDGgHv/1b/Cf5U/Wn9x/yU++X6M/tU+5j7Y/zZ+8P6qvo++pr56fjv+Lf/VgcxBkwClQM5CGULtgxXDDsKlAmLChYIlgMmAs4D4wI6/h77Y/ub/Eb8Jfv8+VH5afru+8z7rvyX/3ABngGbAe8BMgIIA/IDmwOCAnEBkQCH/7b97PwS/Qz8tfod+lf6zfpS+477tPo2+ov6KPqi+Zj4RforAoMHfQWZAmUEnghQC7kMtAt0CR4JggniBpsC4gE/A2ABMf06++L7TPzd+177MPp7+Y/63PtF/Mv9ZgCIAZcB1AG9AR0CSQPNA38DhAJvAY0AFf9//VH96fx6+4D6Svqn+t/6+/qY+sv5+vni+S75t/iS+Nb+YAdIB4gDpgNRB0EL5w2uDV8KOQkQCuoHbQSqAhoDbgKO/mr7N/t3/Kz8qvtD+or5hfro+1j8QP1z/wsBIwLJAu8BqAH7AsgDdQPIApoBAQAk/+X9pfzh+876Cvou+UH5sfkU+oT6vPmD+Zn59/gv+AX4Cf5VBh0HpQNvA3wHrgpYDAgNTgu1CcwJqQgbBQ8D1gNdA5L/J/wI/Pb8jfyZ+7f6PPro+uv7APz0/L7/fwE2ArcCXgKbAocDsAMLAzECsgH8AHr/n/3J/Gf8V/v/+dH4sPgS+X/5g/nZ+Nr4Wfks+Tb4+/e4/ZEG8wdABBoE0wciC+8MEQ3bCqAJOwqNCMoE3AJ3AzkDiP8a/OL7+fzb/Dz7Ivo3+kD7WvxE/BT9uP++AdgCJQOcArACtQO6A0kCUwHdAOb/bv6u/OL71/sT++D5y/iQ+Or4bPnZ+Vn5Xfn/+af5xPjy99r8YwbaCC0FMATQB1QLEw3DDQoMEwqeCcEIcQXkAqkDMgNF//b70vvf/K38Y/s++vP5sfoa/J78N/3D/4wBagKlA+4DhANQA18D5QIMAj0BHACd/t78LvwV/Dr7Ifrv+Gr4aviM+PL4pfjc+Er5rvhr9xP3B/6LB+wISAUsBAcIQAxqDnAOVwt0CTwK/AicBPoBHwP9Ag3/QPss++H88/xp+835tflK+9j8vfwr/af/6AFZA/cD7wPiAwcEtAMyAmIBHgG5/1f98fqF+iX7A/sA+rT4XviV+B/5Y/ny+F75FPqK+eH3J/fG/VEH2QhpBb0EmAgsDMINvQ1DCzMK9QrnCJ0EVgIwAzgDev/3+877Ef3X/G77Ufog+qX7+fyO/C39pv+mASkDJgQZBAEEXgQKBKwCwAEgAZr/bP0p+0X6W/rf+f74L/j49xr4lfgk+fD4Tvmh+bP4fvdO9zj+iAdUCBkFlQVkCT4MPQ6NDgoMlArqCq4ISASaAgsD4AEQ/m371/vS/ID8//oM+or61Puv/JD8u/0HAHgBFQNeBMMEpwTYBJgEOAODAr8B2P9I/Ub7Evo5+db4VPjl93T3UPfm90n4h/gr+Sv5uvh198b4ggFwCH0HBQWoBjkL+g3pDtMNagsHC+UKEAj1A3gCNgPuALL8zPqk+7P81Pvu+Q75YfqB/A/9y/y4/u4A7gGjA70EdgRoBOUEIQShAg4C/QBD//n8u/p7+Xf4Hfi890L3Mfd792j42vgc+YX5VPkg+dj3TvphAxgINQavBF4HuAu1DZ0OBg3sCksLjwoiBxYEcwP2Ah8Agfwp+z780Pxv+5r5O/n3+vr8JP35/PD+uAD8ASkEWAW3BEYEAgVtBAcDfQKIAYz/2PwZ+wX6xPhS+NH3+vZ+9ib3V/iJ+O34y/kY+Rf4G/do+mMEFQlYBkkEygZYC+MN7g52DAQKkAsWC7AGfwNeA/QCBQCc/Dr79fuH/FH7Yfn7+Lj6uPza/Jf8wv6dABgClAQHBT4EbAT8BPsDsQJnAkoBpP9f/Rr7xPkL+bD4gvcm9oX14fUo94z3//c0+G73nfbv93kArAhkCKoFMQf+C4sOww4tDlEMcgudC9gIpQTrAscC5QDZ/L36zPuu/KP7xvnJ+KT53vtj/Xz9uP7AAAoC3gO8BaQFoQQVBQYFbgNmApQBJgAZ/hb8ivor+Xr4I/hx91D23/Wn9mn3BPjG+Ir48vcM91X6NARyCVAHYwXgB/4L3Q23DtoMjQn1CWcKvwb9AiYCxQHq/sD72/qz+3b83vtI+kr5mPr6/Jb9wv0cANMBswKyBKMFqQRpBGYFdQSCAswB6AA7/zD9ZvsC+k/5+/gb+Ef3uPbc9rv3ofcO92f3EPhk98j2YP0BB0AJTgdCB80JewzuDnkPAwyrCScKQAgjBDkCQgL2AJf99/oA+zX8mPzj+4n6evpq/Lb9oP3z/mYBjwIiBPQFTAUoBJEERQTvAgoC/gA2/1f9x/tu+ln53/iI+AP4nPeT91z4ufhU+Hv4WfiP9xD2Y/iGAs4JOwghBd0GcAsQDm0PFg6ICrUJ2wl+BkACWwHJAWP/Mvu3+Wz74vyL/Ar7zPnk+oj94/74/k8AWAKXAyEFOQYHBYgD4wO3A0YCIQHd/0n+Vvx4+lj5svhz+Dv4Pvg3+FD4UvmD+VP5Yvnh+O33rfUJ+o4F6wnaBm8F1wc6CwwOLg9zDOsJiQrrCDEEaAESAWIAcf0X+pP5avuy/Nz7WfpJ+mj8uf7M/nT/AgJDA1cEGAbHBUMEIgT2A1cCXwEdAQIAJf6F+4P5NPle+Qn5NPjV9zL4+vhp+Rz5hfk3+Tv4uvZX97kAagl/CJUFcAeJC08Nfw4sDm8LVAriCX4G/wFHAFgAUv6S+kP57/pq/HP8lfuV+gn7l/1i/3D/LAF4A1kEJAZJB8cFPwRaBL0DxQFUAB3/nP15+zb5Dfjs9xj4Nfgo+Br4jvis+Rv6Efrm+ef45/c99xX9XgfBCVMGlwWOCK4LvA0PDrcKOgjgCP0GhAIDAAIAOP9Z/NT52Pkc/Nz9OP0x+5f6vvxF/7L/cQCfAvADcQUEB/cFKARkBPIDugHi/6L+nf1n/IX6c/in9zH4t/ib+J743/id+Xv6DfoD+jf6NPms9kP5LwT3CikJWgbPByILdQ37DsQMowhwCHoIBgSz/3P/UAAg/jT7cPqE+3L9Uf72/CX76ftq/pH/IQBIAh0EBwVGBnEGGQWCBGEEwQKcABH/pf0f/Cz6AvgO95D3efh3+Cj4cvgx+QD6Q/pz+gX6CfkO9/j4SQO+CkoJnwbmB+UKlgxqDewL0QiPB2UGhALF/nn+SP+1/cD6G/o0/Ar+Lv5F/VP8rfyP/oL/CwA3AvgD3wSZBXQFhAT2A9cDzAITAQ3/Nv0T/IT6lvjj94H4PvkZ+cD4l/jd+JD59/ks+pb5MfgW9q760QYHDP4IJAdlCZEMXg6uDq0LaQjGB0oFMwD2/Gr9PP4g/PH5nPoK/dP+5/7A/a78rf3Q/0cAGQFIAzQE1ARvBfcE/QPeA8UDLgKEALb+6Pzb+wX6N/ji90/46Pj8+On4+Phu+Q/6ofrq+lD6Svho9u38bQjeC/UIrgddCZsLPQ03DSAKkgckB5gDN/4F/Jn8+vyY+7X66vsx/n7/OP9s/jL+z/9WASEBAgK/AwcEIwRdBMcDzwJ/Aj4C2gAA/179RvyS+xf6qPh/+Mf4wvir+Nv4QPnv+V76A/pp+ff4iPYY+F8DTQymC5IINwkrC1wM1Q02DIYIigcYBkEAYfqa+QT8dPxX+7H7ff3//oX/Y//Y/r3/JALIAhoCrQLXAvIC+wNGBJgDdALTAcIACf/Y/ez8jfx++7D5nfg0+LL4O/mr+Rf6cvq3+in6+fhr+Nz2X/Yu/wEKHQxsCQgJUQuhDHYNvAygCaoHrwaPAuj7l/gj+un7FPyi/NP9VP7Q/nX/S/8eAGYCCwQGA9ABJgJ2An8DpwQcBK8BHgAUAHn/o/4F/mT93vvc+dP48/eb92L4Ovlq+Rv5hvm1+ZL4e/ec9RD4fQOMDHIM1gkHC/MNWg5XDYoKqgeEB9IFWP/x+K33vPnN+kr72/yl/pb/n/+I/0MAUQJbBFAEcwJKAdMAZAFoA5QElgPXAOn+QP4f/m3+1/27/Cf7Zflo+AX43/jj+Sr6/Pln+bb5mfns+A34vfXv+ZcF2Aw/DDgKqgxsDjsNFgzsCYQIegfXA9z8q/c4+IX6m/td/IX+iACWAAYABQBbAbIDIQVFA14Atf+v/0QA/gG8A1ED2gC8/kT9TP0J/g3+F/0t+8H5ufg/+IH4PPlO+g36S/mg+Pf3GvjE9lr4wwG6CcoK+QosDfgNqQ2sDf4KzgeaB+oFd/+D+Uj4xvqx/Cz8Mf2L/7cAGwE0AXoB+gK2BFUDWwAe/0T/+v8PAfwBuQGsAJz/Uv5B/en8R/1C/aT7hvme+Hb4V/jY+JX5evk6+Uf5Bfn29+T2IPudA4IIzgmnCzANwgwfDRENpgp1CDIHQwSb/sX6zfrm+z38AP2//nT/UwAjAg0DagPBAzQDXwHs/13/R/8oADoBfAFuALP/5/4j/ZH8yfzQ/D/8+/pb+cb3G/ck9xD4gflT+sz6U/qO+aT4fPoyAUAGwgefCb0LngvzC2EMCwoiCJAHvwWrAOD7mftY/Y/9oP0E/+D/BAEQA/UDQQTvBLIEVwJV/839rv2w/iAA/QB0AC7//P2u/IH8x/xc/Hb7E/r2+Cz4lfdR97v4rPq3+278rfzx+wj67PzaArgEcASpBrcIhgceCE4I9gbZBu8GVQWnAXr/Zf/q/5X/3P9UAbgBkQJ5A3QDxgNWBKoDegGRACUABv/q/s//HQCp/mT9U/x2+977SfwK/Oj6wfk9+SL5//iH+ZX7Nf2H/YD9gf2L/fr8g/xH/tMAhQFHASEB0QEQA84D4wTkBZEGpQd8BzIFEASVBEwDngFdASkCGgPjAsIC5AIJAg4B6QC3ADkBKgLQAQwB+P+l/p/9ePzf+0P8x/wL/fr8afyw+/369flg+RD6DfuN+4L7vvur/Db9BPyH+gv9WwGjApkBqQCHAEgBFwKDAp8DzwVICA0IcgV2BBUFqwRYBDgERAQ9BdgE+ALoAdMBuAGoASYBzgGEAvoAHP9I/uT9mf2s/dP9bP66/jH+NP3p+7n6KPrL+WD5e/kS+mj6V/rl+lz8l/3G/pX+F/3v/cf+sP3q/b3/2f8WAOkBiwOPBFYFGwd2BgEFPwbaBtIEIATCBT8GfwXuA6wDSgPfAeAB2wG5ADUAyP9U/qj9/v14/qD+3/67/0YAav/P/Wz8V/uA+vT5y/l0+jP7wPo7+vr6j/zu/cL+KP/R/Sj7v/t7/q397ftC/+ACyAGtAZsDRQSpBMAEAgQ/BG8FAQZcBgMGTQY0B7wG6QUuBIwCUAI4AlUBbQCA/wX+Mf1q/r4AHAFFAE0BBALw/yf9kftw+2X7r/ow+t36O/sG+yT7Avs7+wb83/xj/I/6LvlL+ZH6dvxG/hgApQEwAocC8ALYA4YE1gNBA0oETAWDBbMFBgbMB9kIyAbEBFYE9gI+AbEAvACLAdoBGwGkAJkAwAAaAR8BkQCu/2f+Bf0a/CL7q/p8+4v8Qv3t/ND75voU+pL5o/n4+aT6U/sy+1X6U/k6+vH9qAE6AgAC8gNmBOUBkgBWAT4CCwTrBYMGbgaxBhQHCQacBI4EjQRBA9gCiANeA2cCEQLaAfcA3ABCAg0DPAKZAdUAEP9N/SX8afvi++b88PzR+1v6xPlW+lb63vmO+hr7kvuf+5z6Rfq8+6P9P/4+/v3+MADVAPYAEQH9AVoD6gPlA4MEBQXgBLoEAgTdA4UFLAaBBR0FcwRZBAAEyQL0AXwBRgGgAUEB+AB4AT0BGgCi/mj9I/2Y/X/9cf3H/DX7cPpE+ov6B/vh+nr7Mvx7+z/6wfml+iH8OP3k/Wj+D/7u/Sz/OQDNAM8CNAWHBWQD1AHUAmsDwAJrAwIFuAU4BqAF4QNRA8QDSQM7Ah8CFANiA28CLgJ/AiwCvQGaAc0A5f/d/wb/+/yC+9n6yvl6+Or4tPqS+9X7Jvwm/OL78fsj/C/8gvxy/Uz+5P1e/Sn+iv/aAOIBjALXA0cEOQLdAH4BuAHPAXQDxgXLBtIGTgbaBOcD/QOZA14C9gFoAyUEoAJ6AbcB0QGLATQBtwA+AEj/hv3N+9/6ovp9+v/5Cvq8+g/7dPs2/Hf8LPxZ+y/6efr0+zf9M/7P/lP+PP25/Fz9R/8wAeQCcQTeBEkEEgM+AugCggMuBKwG6wfWBRcEaQQ4BOkCkwLlA7gEaQPWAWgBewHGAQUCWQE5AEAAowDN/zf+4/xd/A/8jvqu+Or4Xfq6+sD60/v+/JT8l/vc+078wPui+x78v/x4/T/+DAD3AboCHANlA/8CjwLGAUwBawIPBMAFEwcmB3IGzgW6BJMCoQDwAPkB+wFEAtwCzQKrAr0CgAJcAq0CYgKmABD/2/1v/Hj7XfuK+6P6qfkh+nf6UPoa+xL8ffzN/Bb9FP2U/JL8C/36/FX9BP5q/nf/tQBtABUATwFFAjQCNwIjA+0D2QPYBMgG/AbBBfAEVgQeA+MB5gG+AvYCoALGAkYDyAKuARAB5gBOATsBbACH/+790vyY/Oj7yvpB+o762Pre+gz7jvu++1L74PoP+8/7Hvxq/Aj+sP/JANIB3QHRAK3/4P7N/pv/GgFxA88FKgfmBnIFPAT4AzUEaQTaBKsFmwUFBAsC/wAoAe0BgAKDAlgCwQF6AA3/V/5k/hD+Ev1c/AD8U/u/+u76Y/tA+wf7Gvv/+nv6KPr5+jT8+fxG/rP/VQAaASEBFgDS/58A3gAqAB4APwEmAvICUATIBb0GOQZ2BUUFMgQwA/ADjgTjA+wCnQJFAk0B3wDzAOkA0QC2ABwAyf6H/Zn9/f1K/R/8zfuH+8f6sPr1+iD7jvoB+vf6cfx1/HL8AP5C/3L/s/+Y/6L+Hv7e/l8AhgGQAnMDngMBBGYElwP5Av0CvQLlArYD3gR9BSgFswSuBAkEjQK8AZ8BswG3AfMAt/8C/0v+Iv1v/Fj8OPy9+1T7hvvv+/f7hPtb+8/7/PsX/A79LP7Y/sj/KABi/5b+q/5q/ycAJQEFAk4CYQKmAl0CJQGgAPsAMgH9AbADugQSBZQF6wV0BZQEHwTKAyYDCgIDAawAdgB9/y7+Zf0t/Rj9AP0P/Qj9c/zj+5L71/qk+jf7h/vW+y79Of+yAHoAQwDqALwAIQCt/7T/4v9U//X+4/+5ABsBeQHXABcAhgBgARICywKHA1QEaAQQBFYEmgQkBI4DvwLBAcMAtP/9/k7+VP16/Cb8IPxV/Kf84/yt/Gz9Kf/d/oz81ftK/bD9MP2K/m4B4gIZA7sD3gOrArYA5/4G/zEAAQBUAMkAZwCRAHMAwf/P/9EA7wFAAiYCtgJpA/sCIAJ0Aq4CpwF4AFsA0QAKAEz+v/z6+h36X/rU+lX8xv28/kb/Kv7M/KP9vf7J/nH/ggC0AbQCWgKdAYwCMgNDAskAwP+9/94AhwHdAD8BIwL4AfkAVQD2APEBsgFCAaABBAL0Aa8BtAGQAd8ApP+c/ij+kP1T/Dr7uvqk+rD6QftO/DP+CAANAJv/S/94/0v/qf5+/34BBAMLBN8E/ASBBDADGgGw//D/8f88AD0CIQNoAsABhgHUAc4BWgEnAeAAswCMACIAyP/q/2YAKQD0/rz95vz3+/D6mvlM+Nn3EPiB+FH6vP2P/8v/8f+I/pv9af9cAd8CLgXVBmkGiwVEBB0CTwEJAkECJwK4AjICkgERAn8CJwP4A4cEVQQjA98BIwHpAPYAOwHDAZMBhwB+/5P9Svt3+rv5JPiC9mj1UfU79nn3dPks/Cb+//4w/wUAgwFhA84EqgT3A+gD2wNBAzoD9gNfBWIFUwNgAZsAHQHqAWUDSgVlBrIG6wWpBMoDTgMEA8UCfALRAaIAYP9+/pb9Mfyt+mf5xvfN9enzFPPX89f0qfb8+H/72v1x/nf/8AEhBCAFXgXuBCYD9AGxAeEABwGgAqUDPQMkAnEBbgFpAvUDhAWJBlMHVAj1BwIHoQYABvQEkAOuAez/sf78/cr8rvqU+f74l/cX9RvzCvPm8732Evr1+xH+df/n/0YAAAHxApAEsgTFA28C1QAT/3r+Jv8ZAHUBTAL9AYIB3AE+A8MEdQb5B7sIrggCCKsHjgevBicFaQN8AZT/nv3/+wX7W/oo+S73FfU/83/y5/Nk95b7Cv/EADABhAEyArcCBQN0A+ADqQKe/5796fyR/Pr87P3S/s7/0QAIATUB8gI8BYkGRAe1B80HqgfSB9kHvAbCBMsCtgCk/j/9IPyd+iT5Z/jD9jT0DfO/86T28/q1/0ME5gUJBUkEmANpA+ADBAQKBEUD0gBM/fH6VPux/Kf91v4NAK0A2AD/ABICZgT5BnMISwiLBywHtAZIBV0DmgG8/579ePve+fn4CvgT96j1t/NW8tXy2vYr/M0A1wQ0B7gHAgdXBu0FvwW0BS0E9ACy/aL7Xfve+6D8cv4LAHoAZAChAOgB6wPiBWMHIgjbB+gG0AU6BQEF+wPfATX/lPx2+uP4vvfT9qb16PMn8qvwHfFd9pT9hwKQBpIJvgnxB3wGRgbQBfgFrAUMAvP9S/s0+qL7lf0P/7AAYQFHAS4B0gHlA0wGQwhCCZEIHgctBuoFxgVxBPUBWP9k/Kb5Gvgs92n2hvVW86fwu+4N73H15v4bBRYJGQvcCXMHLAWbAxsD6APfA1cA9vt0+KD2T/nv/TEAOgE8AncCKAJ9AiIEXQb8CAELVwrsB3YGfwaUBj4FkQKC/4f8FfpK+Cj3T/dz91n1GvK87lfuKParALoG8AlWCwsKHwb+ApEC7QJKBG0EGgAh+g/2CvXD93P8e/80AIAA0QAYASUCggSkB2sK4AuUCnwHSgYZB1AHxwW9Amf/U/zE+WX4avhC+fL4I/Y58vDuRu8v990BKQihC7cMzQncBRcE3APGA8EEZQQn/6j4UvTn8lj2Yfz4/3gAWwBRAEgAZAHCA3MGdwhpCeUI+QXpAxQFLAYUBWoCa//n/On68/mB+RL5tPhP9rHyve/l7yX4AwMMCVwM8gwPCloGNQQmBAMFXwZbBRwAU/re9ZrzKfYR/GUAIQGs/13+J/+TAeIDAwYMCBMJdgjwBbsDcATrBdUEWwGx/fv6UfmT+M34pflC+bn2ivN/8Njv5PYlAtwIvwsiDQkLpgbfBFAGWgesByoG4gD5+r32cvRP9tT7QgAHAX3/Av7M/vsB8gS8BrgH5wfdBxcG6QN0BMAF3QRUAXX92foN+Xn48Pgx+aP4KvaZ8jfwzu9G9Zv/6gaSCl8MHAvJB3EFDQamBy4IzAbnARn8bPg09hL3kPvj/3kBvQAs/+/+eQHRBNoGwQcRCG8HJAVuAxgEMgXJBCECtv7++4P57fcu+Bf5+Pj69pvz9PDg7z7zb/wrBBYItQpACtMH7gXUBTgHDgiIB50DeP1Q+Q/3OveV+jn+CwAWADv/xP57ACEEBQdrCM4IWQjaBjQFogXKBlYG4AMVAAb9v/rD+On3I/hB+Pn2pPSJ8tfw3vEV+X4B0wWvCO0Jbgh5BqEFTgaNB8YHQwWu/5b6v/fU9vf4L/xl/of/9P4A/lb/EgNxBhoI6QjbCKkH2wU0BWwGMwc9BYQBkf5//HD6EPm6+LH4IPjj9bfyivDi8Db24P3yAm0Gngg7CMAGRgaGB8sIugimBs4Bzfya+YH3RPhW+yT97f1T/tz9d/5iAY0E4QahCPkI8geiBjQGoAa6BlkFLQJG/0/9Lvud+fz4dfiL9/P14fNw8g3yg/Ts+vwA7wQwCOAIVgeyBgcHcgcVCJEH1QPk/oj7qPjm90L63fvg/GP+rP7e/iMBGAQQBqEHWAjmB04HuQZxBskGJwZaA1UAEf70+8v6KPoF+cz3QfYA9JjySvIh8773xP0+AQgEdgb2BsoGHgfYB80I1ggOBm8BMP7u+/P5UPqW+837dPwb/YL9tv/eAvoElAbOB+UHfwdjB3UHwQd0BxUFtQFK/1n9yfvw+gP68/ic95L11vN984LznPQG+fj94wCqA78F4AX5BZgG4QagB/AHMgX4AD/+Jfzh+t/70fzR/IH94v1N/vEAHASYBY8GWAcbB9oGAQcgB4QHAwdQBF0Br/8L/sb8Lfwo+/v5ePhP9tz0pvRY9Mf0Bviq+/X9sADjApcDogSbBd0F0gZMBzgFHgK3/4P9L/yD/Mr8t/wz/WH9Gv66AHQDJQWhBlQH8Qa3Br8G/AanBwYHbgTmAUoA8f4Y/q79z/yu+0X6UPgH9+L2jPZp9hr4T/qw+7L9vP97AGsBgwIOAxIEuASwA+YBLACm/t79X/7z/uv+JP8x/3r/EgHVAiIEcQX/BZsFlAUIBl8G3AagBggFbgNaAkcBmwA2ADn/+v3D/C/7Cvqh+ez4GPgy+BD54vny+ln8PP3U/Xr+E/8hAFIBlAHtAA8ASv+5/u3+tf9cANYA7gDoAIIBuAI4BJ0FdAZlBtYFeAU6BS4FLAVyBE8DawKKAdIAiwAGACz/hv65/af84/tk+6z6xflD+VD5r/ln+j37xPs1/M78rP3E/oX/pP9P/8T+bf6V/m3/rgCcAQgCNgKDAk8DhwS5BYoGqAb/BSMFtwTABPEE5gRbBHcDowLzAT4BlQAOAJn/BP85/nD95Pxx/Lj7vfoF+uH5CfpE+qn6A/sq+3/7GPzI/IT99/3w/dP9FP7U/gwAdgFnAqkCyQIpA9MDvASkBUYGSwawBRUFBQVNBXYFMAVqBFYDQAJZAcUAhgAuAIX/wv4C/pH9g/1R/cb8BfwH+wf6bPlT+bX5L/pO+lf63PrM+8L8kf0d/mv+xP5S/xAA8wCzATICtgJWAwYE1QSZBR8GfQa0Bq0GhQZOBtQFEAU0BFcDkgISAroBPwGRAMb/Ff+m/lP+Bv61/SX9IPzL+p75Bfnj+AT5avnL+d75DPqi+on7r/zK/V/+gf6W/tf+eP+VAMABjgIQA1cDtwOSBLwF8wbhBxYInwf1BlkGywVqBRgFaARfAz4CSwGzAEoA0P9a//z+h/66/Zf8hfu9+g/6Z/nf+Kr44fhd+d35JPp++iP77fu3/E79rP3o/RD+ev5O/3oAkgE7At0CsgOxBPMFOAchCHAICghHB5sGQwYNBrIFHAUtBAYDKALCAXEB+ABHAEv//v3K/C/86vuU+yP7bvqz+VH5QPl5+f75g/qd+nf6h/rN+kP71/tT/NL8Uv3h/bj+/v9qAZ8CtQOtBIkFWgYdB9QHOQg4CP4HngcyB5gG3AUoBXUEuwPiAhECNwEOANz+9P1K/dP8k/xW/Pj7pvtd+/z6oPo7+uP54PkX+h/63vmn+a/59fmG+j77APzI/Jz9xf5VAAkCZQNPBBUFxwVkBhgH4weACJAIPAjBB1AHGQfPBkoGgAVgBAkDuwGpAMD/7P5N/rL9N/0W/Rj9Dv3r/JT8A/xM+3n6bvl8+BP4Efg9+Gn4fvjP+J35y/ol/Jb9zP5+//7/vgDIAfQC+QO8BFMF3AV2BksHLgjCCN8IiQjsB10H3gYXBv0EqwNJAjMBkwAPAHz/Cf+r/j7+5f2v/XX9H/2N/KP7k/qN+WP4RffG9gP3ofdW+CD5GPol+yn8JP0j/hT/zf9eAAUB3QG8AmcD9wObBE8FLQYyBxUIhghvCPgHVwe/BjAGiwXXBO8D8AIwAq0BIgFcAIT/s/4R/rL9XP3n/F38kPuP+qf5Cvm5+Jf4v/hB+fH5ifrR+uz6Q/vz+9z8pf0n/m7+nP4U/xoAggHTAsEDeQQSBbMFrAbDB3kIfggECHMH/QapBlwG+wVXBT0E8gLRAdYA4P/y/hL+O/1s/Lr7Pfv6+vH6A/sh+zr7R/sd+6n6LPoM+mD64Poz+zP7EPsE+1H7DvwY/Rv+6/7H/9kAAwIiAx0E6wSpBXsGagdHCNwIFAn5CJ4IBgg6B18GZwU7BOkClgFQABT/4P3G/AH8y/sL/HT88Pxi/ZP9fv1D/RD9yvxM/Jb7wPoc+rn5jPmE+Yn52fmP+pn7zPzw/QT/0P+JAG8BZQJfAx4EwARaBfwF4Aa8B0AILQivByYHcgaMBW0EIgPCAUcADf9i/jX+M/4m/i3+Ov5K/nn+tP7Z/rD+Pv6h/eP8C/we+0X6ovk7+T35q/lT+uX6afsr/Cr9Rf5F/xQArQAaAaQBbwJpAzIEjwTBBBgFowVFBsQG3AZkBpAFsgT6A4cDQwPkAl8C1QFVAdUAXADy/4j/Gv+q/j/+6/2d/Tb9vvxW/P37j/se+8T6evpI+lX6wvpm+/f7SvyU/C39Fv4I/9T/XQCWAKYA5QB+ATQCpgK7ArkC7AJeA/8DvARbBZgFiwWGBZ4FlwVABY4EpgOvArcBxADK/7r+pv3D/D78Efwv/HX8s/ze/Bb9aP2s/br9hf0h/bD8XPxZ/Ln8Ov2H/Z/9vP3u/TP+hf7N/tz+tv6p/vb+kP9BAPIAuQGsAtgDLAVzBl0HuQejB04HygYKBgoF1gN4AigBCQAK/xD+HP1p/Bv8N/yq/En99v2A/uP+PP+e/wkARwAwAMb/Hf9W/nX9sfxJ/BT8yftb+yX7SfuM++L7cPw8/f79vv7S/zABYgIlA7wDagQYBbIFPQaUBnIG3gUlBW8EnwOmAqcBwAADAHn/Lf/4/rT+eP54/sX+Uv8GALIAFAEqAR4B6AByAN7/WP+v/rj9pfyk+7v6GPoI+m/60voa+477R/wL/bf9WP7e/kT/xv+aAIsBLAJmAogCzgI5A78DUgS9BMgEiQQ+BAIEvQNgA/8CngI+AvEBswFmAfgAiwBAAA8A+P/9/w0A+f+3/2H/B/+w/nD+Wv5Q/iL+x/1I/a/8Fvyx+5L7lfuZ+7H7+ftM/Hn8jPyj/NP8Kv3O/bD+h/8qALQAWgEsAiIDJwQYBdQFUAaaBqoGdQYFBmwFqwS7A7kCxgHnAAsAOP+d/lr+b/7R/mj/CACCAMsA+AD+ANUAfwAbAKX/Cf9A/kz9K/z/+hf6m/lp+Uf5NPlc+cT5Svra+nb7Jfzn/Nn9Bf9FAFwBQAIhAx0EIgUaBtoGRgdNBwAHegbPBQsFNwR3A90CbQISAskBjQFOAQ0B4ADfAAgBSAGIAagBjwEsAYgAxv8S/3/+9/1P/X/8mvut+sX5Gfnl+A/5Rvlv+bf5KPqg+hD7oPtR/Pv8oP13/nf/aAApAdIBkQJuA1QEQgVBBiQHwgcQCBkI0gdKB4kGwgUVBW4EsQPhAhYCOwFfALL/Wv8+/z3/Rf9N/0T/Gf/n/sj+n/5P/un9kP0i/YH8wPsH+176wflY+TL5LfkZ+Qv5KPl5+dn5Pvq7+mn7VvyU/R3/swAaAj4DTARzBa0GzgerCDUJUwn+CEkIdQeaBrAFtwTFA+kCBAI3AaQAWwAvABQAFgAxAFQAYwB3AH0AZQAjAND/df8B/3j+3v1K/a38Dvxk+6/66Pk9+d742vga+V75l/nL+S76z/qy+7b8wf3C/rH/lwB/AYUClwOtBK0FigYzB6IH6QcRCCUICQilB/wGKAY3BT4ETgN7AsoBJgGYABwAtf9V/w3/9v4M/yj/K/8G/8b+hf5P/jj+Jf7w/Yv9B/2F/Bb8svtB+6j66/kg+W/4D/gG+EH4rfgw+bX5SfoU+z38v/1y/xMBcwKTA3gESQUtBh8H/AezCDAJTwkOCW4IfAdmBk8FOAQzA0UCaAGyADUA5v+z/5z/if+D/6n/9/9ZALEA2gCzAEwAuv8k/7D+Vv7q/Vr9oPy4+8v6BPps+QL5w/ib+Ib4g/iY+Nj4Yvko+hH7F/wu/UP+aP+kANsBEQM6BEIFMwYTB9AHcAj6CEkJSAnyCDsITwdsBqUF+ARPBIEDggJ7AZEA4/+J/2b/Uf86/xb/8P7t/hX/S/9q/03/3v4//pn9Ef25/IL8P/zZ+077m/rm+Ur5yfhv+D/4H/gj+F34zvh9+Wj6bPty/In9p/7f/1cB8gKFBPQFFQfZB2YI1QgnCXQJowmCCREJVAhJBzAGNQVMBIEDyALqAfEABQAu/5X+X/5h/nD+hf5+/lL+L/4Z/vz95P2w/U/94/x//Cv89vvP+4z7Lfu3+i36wvmO+Xj5gvmY+aX5vfkB+nb6JvsV/BH9B/4J/xEALgF2At0DNgVwBncHPgjYCFcJugn8CREKzwk5CVsISAcgBvgE0wOvAo8BcQBw/5/+BP6h/XH9Wf1F/Tr9N/1C/XP9vv0N/kj+WP49/gP+yf2e/Yf9cP1D/fT8jvwo/Nn7rfuO+2f7JPvR+ob6afqo+jX79Pu8/H79K/7Q/pP/ewCCAZACjQNhBB8F2gWWBmUHIwioCNUIpwgmCGoHmwbBBeQE7gPSApsBYQBA/13+r/0d/bn8i/x8/Kb8HP2y/U3+6f5U/3r/jv+r/7b/u//T/9b/nv9U/xf/tf4x/sj9Zv3P/B/8dPu8+jn6G/oJ+vb5R/rE+v76Qvv8++v84P37/g8A6ACkAWICMwMtBBYFvgUtBmcGegaKBoQGWAZbBiwGaQWIBO4DOgNvAuMBVwGqAAkAfP8L//3+I/9F/6H/DwBFAGAAnwDgAO0A8QDcAI8AGgCH/xL/wf5d/vr90P1a/ZD8vfsd+6P6EPq/+Uz52vgh+Qz58/ez+JH6ifqL+hn8KP2L/eH+dADBAeAC4gOOBHUFwwahB5kHdwcMCIIIQAhTB/wGAAdvBpMFyQSFAyECGwEkAOf/DQCC/+z+a//E/6v/8P8/AL4AhwFVAXYAXwBZALb/I//g/kz+nf33/EL8Avy9+/X6ifp3+hL6mvkH+TD4yveG96n3K/lV+dH3aPmP/cD+8v06/z0B8AGPArkD8ATqBdMFGwa+Bz0IYgcHByoHdwfPB2cHmAbiBa0E1wPFA8QC8AAsAM//+v4D/23/Ff/e/n7/cgD+AMkAfgASAa4BTgGeAN7/O//Y/mn+//2q/QD96/uG+6b7M/tf+uL5BPok+r75F/no+Pf4j/hb+FH4v/mx/FD86fm5/G4CCAPkAA8CZwTLBAwFNgYCB9sGIQbJBoMITAgcB9kGgwZUBgcHwga9BDID5wLyAlMCVgDm/l7/Ev+N/QP+SP/P/jH+Cv+AAAQBVQCK/zcADwE2ADT/jP66/Wb9gv05/Xj8nfvt+iP7svtS+4n6Zvqq+uX69/ps+rr5XPn6+AH5DvuY/Wz8hPrZ/QYDuQPVAXEC2QTgBe4FTwbjBu0GXAaBBk0HOweKBrEFpQSgBIEFHwUyA6kBJQEpAdMAYP+P/sb+G/6q/RT/dAAvALX/YQDhAQEDswLIAa4BBgLJAfYA8//h/g3+d/0h/RH9Z/wq+7b6H/st+876Z/oW+iL6V/p0+kn6xvkB+dL4YPk9+ej6A//v/kv77fwFAysFzQJyAbwC2gSRBpYGjQVQBZUFZgbGBswFygVABqYEQAOFBI4FFgSqAXkAhQHWAtYBSgCoAMYAMgBRAZUCWwKuAWIB6QH2AvACZwE5AIr/Ev8k/wv+zfu8+uf65fqh+vv5AvkN+cb5IPoa+vv5yPmy+fr5Gfoj+nr5MPh79xL5zv3k/6H8pvsKAScGDgZ9BB8F9AZ0CCsJuQhkCKQIqQiICF4HzgbuBy0HRAQnA1MElwT2As8Asv8JABEAU/9v/6r/uv6N/sH/0QArAbUAFgAXAKcAzgAsAPn+Zf2r/J783fsC+2X6Mfls+Av5x/n2+cT5fvm++dv6Cvxk/IL8kPzZ/FD9XP0n/f78v/yc/Hr8cvx5/50DxgKu/+kAlQWKCHUIRgdrBtYGmQhoCQUI4AXEBD0FnwTbAvoCmQM2Ak8ADAAAAT4B2f9z/mv+IP91/9r/JQBC/wn/QwAoAS0B7ADGAGQAzv+C/7b/8/8f/y39ofte+9z7X/zQ+6z6zPq8+638Nf0Z/d38v/zS/LL8VfyE+xb6cfhd90T56f3c/yn98vvL/9sEggeNB94GIge0COgJPwntBzUHBAe+BRcDpwFsAo4CjwBa/nP9G/7k/rr+Pf7p/Vn+SQBDAhwCfQF8Au4DWATYA2wDcANYA0ECmQCl/2X/IP/h/bH7TPp2+r36ivqS+qv67fqf+3X8bP1M/qj+0f4H/zD/X/9V/5/+av1E/Nv7h/tN+gD5q/iy+NL3bfkc/wgCtwD9AI4DcAYiCSIKjQmNCCAIaAi+B8kFOAR5BDwEfgGg/zMAswDTABUBvgAjAAoAMwGcAuIChALnAhkEkgMwAjMCnwK7AmEC+ABe/8b+qv6M/p/9wfvT+v/6rvo++qf6BPvN+h37pvvM++r7aPyj/LP7ePqt+Qb56feT9if1ZvPy8yT5VP4b///+2QFpBi8KMQwdDa8NOg5SDvwLUwhzBgAHTQdwBC4Aif40/53/GP8V/ir+If8xAHgBmwL5AzIGvQe5BnMEjQMxBJUECQTAAhMBO/8k/R/74vl8+Uj5iPhv9/H2ZPfQ+IP6Gvsa+877N/33/j0AuQCeAOP/xf4E/or9Pf0Y/XX8Jfuk+cL4fvhS+Bj41fc5+A39LgWECAcHZwehCs0N7A6zDQ0MDwtzCmMI5gMnANH/RgFqAGz8Bvqg+zf+xf8+AGcAFgEtAgkDFQPeAqEDVAWiBZICzv4z/j3/Zf+M/vv80vuF+x77xPol+0j8KP2g/KP7p/tZ/bX/qABdAC4ARACvAOEASwCR//L+wv2g+2P5FfiL9932fvVX9OnzX/Vn+2MCTQUGB08Krg02D0AOMg28DIYLGAoCB+kBTf50/ZH9Ffyc+cr5EPyj/Wb+2v7P/8QB6wNrBfIFYwaHB2cIPQdaBKgC2AJsAmoArv2Y+/X6tvo++rj5k/lp+jD7MPuD+5z8Vv6t//3/aQAAAW4BxQEAAZX/oP7J/Zv8Avus+QL5Svg99wD2G/Um9cr1vfUx9mr7SQN3BwoJlAtZDnQPAg67C9kJKgh6B+wFvAH0/eL8Gv1U/Mf6YfsL/isAkAHkAiMElwUiB78H7QbQBeEFswYtBqUDWwFPABX/QP1t+076nfq8+1D8DPzc+2D8pvxe/ED8A/0H/70ADQHxAHoAy/9R/2T+Mf1Q/Ov7h/t1+iz5Yfjl91f3tfZt9pX2KPdq99j3R/xgA40H8An0DOoOPg/ADUwLXQmPB2oGCgWtAb7+Bf7f/Uz9OPxd/F7+TQAEAhYE4AUDB3UHCAf6BQEFDQV0BcsEwwJoAIb+U/zu+aD4h/hW+cP6o/vA++b7bfz3/Gv97/27/u//wQAKAYgBiAHBAPr/yv5v/Zv8NPz9+0v7Hfox+UL4Vvfb9qD2+Pb19134OPjU+ogAPQWBCEMMUw98EEYPSAzTCeQH+wU7BKABOP/g/gz/AP5O/O77sv2r/+0AggKSBDMGwgZxBugFZAU/BTsFSQSMAjUBYwDF/mv8+/rR+vz6KPuS+5f88f3Y/u7+eP5W/vX+oP/a/x4AywAtAZEAgf+w/uD9rPyI+8v6K/qr+W35Dvma+Fn4TvhF+D74dvj4+Pf4qvh0+xcBHwVnCOwMOxAKEdkO8QqxB/UEAgOdAdX/Jv/P//H/M/8y/lz+vP+/AJoBWANoBRcHxAchB2gGtgWoBHMDJwKuAJz/Bf8i/q/8a/sE+2P7Bvx3/E/9ZP4g/zT/2v78/nX/Xv84/0H/MP9a/wL/Kv59/dP8D/wY+xj60PkG+vz5rvmv+Rr6ZvqB+o36gfqb+ov6PPpZ/NQAiARbCA4NbRCtESQQkgyXCJ0E5QHF/3H9xfzF/Z7+xf77/g4AEAFHAasBywImBHkFgAbdBvgG4wZEBsoEowLBAHX/Bf7R/Ar8o/vx+3P89vxj/c/9NP76/Vf9DP0v/ZT9xP0S/sP+J/8j/7z+Cv6C/eH88fs++9b6k/pn+hj63vkY+jX6E/oC+j36kfqd+lz6Cfqv+5r/zAICBtsKkA7sDw4PVQx2CXoGLwM6AKz90fzB/Yv+cf/sALUCKwRTBCYEtwTyBLsExgQJBa4FCAbQBYcF1ARKAz4B6v7q/K778/qd+gH7Sfyw/XH+i/50/h7+TP1w/Cv8HvxJ/CD94/1O/uj+MP+3/sf9kPyb+7/6qPnv+OP4M/nt+dz6h/sS/Gb8IvyY+zb7pvp4+k38pP+ZAmsGTQt2DlwPcA5oDOMJOwZ0AtH/4f2E/ZD+lv8IASgDDwXOBV0F2ASvBLIDUQLbAUgC6QKFA2MEAAWvBIQDuAFz/yn99Ppe+e/4gvng+m78rv2b/h7/3v4B/gr9Ifxi+yr7UPv2+yf9Pf4D/1H/JP+F/lX98vvJ+v75uvnj+ab67/sY/Sj+2P6X/tn9Fv31+4/6qPr1/Hf/lwJ3B4ULew34DTYNlQukCPoEAwIu/179xf2p/u//gwIyBd8GVwf+BlAGpgRoAtAABQD2/5AAmQH5AiQEhgQkBIACFQCq/Tb7UvmN+Jr4RPmA+hr8sf2o/u/+oP73/UT9cfyd+zj7WvsV/Ez9iP6u/1UAMQB+/2v+9vyl+836Uvpr+un6ufvH/Jn9Gf5f/h/+Iv0s/Mv8Mv50/5ACwAZ/CSELLgxUDHULRQmSBq4DuQAh/3n+Iv4h/1MBZgMIBXoGmgdTB9UFWgSgAtwAzP9J/2v/QgAPAa8B7gF7AW4Aqf6a/NL6c/mI+Dr4p/jG+Q37UPy9/QP/vv+U/+b+Vf75/Wz99Pwb/b39eP4D/1j/iv9p/7b+sv2W/MH7O/vn+tz6G/vN+8D8of1O/rT+X/+YAKQBLwNqBcsGkAdKCEcItAf/BgUGuQQfA/4BTwF0AE8APQFCAmIDuQSgBeEFeAW4BLQDUgIkAUoAmf+B/6z/tv/W/5b/8P4y/ln9kfzQ+xz72/r2+kj72ftt/CX9CP6//iP/Sv9f/13/Bf99/kH+Pf4j/hb+Pf5E/hn+2v1q/ez8qvyk/Kj8tPz4/HT9Bf6f/i//ev92/73/VgDRAO4BhgMoBDsEpwTNBIgERgT7A2MDvAK9AuECaQKVAqUDOQSZBEsFjwUzBYwEsANtAvcACQBU/7b+1P42/zP/R/9y/1j/Kv/2/nj+zf2Y/bj9jf1S/Vr9R/07/Xb9r/3S/QX+Lv4K/rD9fv19/WH9WP1x/Yb9xf0D/uz91/36/Rv+E/4H/j7+Xv5H/kr+R/4y/h7+6v0A/m3+Jv9QACIBlwFQAtIC/gJaA7oDHwRnBIwEtQSfBI8E3QQLBRUFQAUvBeoEcQSjA7QCwwHwAF8A/v/9/zwAXQCIAJgAUwAnAAgAq/8p/6j+Tv4H/qz9Sf3D/FT8SPx1/N38Z/24/d/95P3J/cT90v0I/k/+Sf4j/h/+FP4G/vL90f3U/e79E/4P/sr9wv3h/av9SP0F/TX92f23/q//igBvAX0CBwMnA5IDAARCBEsEEgQPBDYELgRCBHcEugQCBQIFugQ2BI8DBQNIAmEB/QD3ANwAqQCZAK8AhgAFAIn/H/+r/kP+3v2J/UT9Hv0R/ez82/wa/YD9zP38/Tr+iP6X/oL+if54/kv+IP75/b79dv1F/Uf9Lv0T/TX9Vf1g/VP9SP1a/UP9Af39/PT8Cf2L/ZL+7P8OAfIB0gJYA6ADBAQqBAIEtgOYA48DXwNHA5MD8QM6BIEE1gRNBXwFNQWeBO8DTwPQAkYCugE8AeoAxABiAM3/bP8p/8P+Yv43/kr+Uf46/iT+CP4M/k7+gP6W/rT+1P7b/p7+Pv7c/UH9jfwC/LT7mvuc+7n79Pso/Hb85/ww/Vb9YP1w/Zn9lv1w/Tb91Pzc/KP95f4gAA8BBwLZAh4DVgO6A9cDxwOlA6QDwQPeAzAEkgTLBB0FmgX7BTQG+AVlBbsE5wMLAzACZQHYAH4ALQDV/4T/R/8c/9P+h/5t/or+rP6Z/mL+J/72/dr90f3U/er9Ff4n/un9h/1N/Rr9sfxb/FD8Z/x3/Ib8wfwN/TT9Pf1D/Vf9gP29/eD9yf2G/Tr99Pyt/Mf8jP29/uv/8QDkAZQC7QI+A5UDzQPfA9kDwwOwA+ADQQS8BEQF3gVwBsMG2waYBggGMQUSBO0CBQJ9AR4B4gDFAJoARgDQ/3P/Iv/3/vr+Dv8U/9/+nv5b/hr+1/2g/Yn9ef1n/S396fyh/Gz8UvxD/Fv8ify5/MH8rPyv/LX8q/yX/Jj8uPzh/Bv9K/0E/bj8fvxL/E387fwi/nL/hQCMAYQCJgNrA8wDKgRLBC8EFQT+A74DtAP5A38ECAWdBS8GhwaWBkEGjAWWBI8DrALmAVwBBQHhAMwAngBaAPf/qf9M//v+uf6H/mz+Mv7n/Y79Xf1T/Vn9dv2K/ZX9bf01/Qf96/zq/O/8Ff0z/Tr9JP0C/eT8rfyO/IH8hPx8/H78m/yc/In8c/yH/Kv8Cv3l/SP/UwBHAQsCpQL9AjgDjgPDA9sDygPXA8kDrgPMA0UEAAWeBUkG2QYsBxMHlQbPBcAEtwPUAhUChgEsASABAgG2AEAAuv9U//T+s/5l/jT+B/68/Wn9H/0W/Sb9Uv1z/V/9Nv0G/ef8svyD/H78kPyx/LD8ovyR/JT8nvyE/Gn8Wfxz/Jn8q/yc/Hj8cvx7/In84vzF/RX/VwBnAUgC6AI/A2cDpgO8A8MDsQOwA7ADoQPbA1wEQQUaBs4GXAejB48HAQdDBloFTQRKA2gCvwEyAdkAjQAbAKv/Rf/6/qr+cf5V/in+//3V/br9if1W/VH9V/1Q/R394fyk/Fr8JPwc/E78hvyX/H78Uvws/Bn8GPwb/CD8Ivw4/F78g/yh/LT8nPxP/BH8T/wn/VD+ef/HAAkCAQOIA/YDZgRqBE0E/gPQA7MDygMuBMQEtgWfBmIH7wctCAYIbAecBpkFdgR4A5EC4wFNAfYAtQBcAAAAg/8p/8v+f/5E/hP+/v3R/bb9qP2h/Zb9d/1K/fr8pvxb/Bv87vvl+wf8M/xl/IX8gvxb/B786fvB+6X7j/uY+7r72/sA/CL8Ufxl/Kn8R/01/jL/LwA0ATECCQOrA1kExwQFBfsE2wTBBI0EegSKBOsEkQVUBhEHjgfEB6cHKwdgBlwFRgQYAwICJQGMAC0A4v/I/7D/m/9+/1b/Kv/R/l/+4/1y/RT91fzK/OH86/zP/JH8W/ww/Ab83vvN++37Fvwp/Br8DPwU/CH8GPz1+8b7nPtz+1D7PPtB+1X7iPsc/CT9gv7a/wIB9gHCAnEDEgSxBCAFXgVeBVgFTwU1BR0FDgU5BZkFGQaYBuwGDQf6Bq4GKAZtBYsEpQO4As8B+QA0AJn/H/+//n7+V/5i/m/+Uv4X/s79l/1V/QL9wPyM/G/8YPxH/C/8E/wA/Ar8Afz7+wX8A/z9+8/7lPts+zb7I/sX+/f67Prl+gb7Q/uT+0/8Wf2F/rn/5gAmAj4D5gNwBNQEKAV0BZYF2AXwBfcF+wX8BScGUgZ6Bo8GlwadBnMGEgaVBRwFgwS1A+UCNAKQAcMABgCH/y3/3P6M/nP+Yv4p/tf9h/1E/e78kPxL/CH8Afzd+8/73vvv++371vvL+8v7vPuk+5j7mvuJ+2D7Ofsu+xz7/Prd+tH63Pr4+lL7APz0/BT+PP9uAKQBwQLCA5kEPQW4BesF8QXjBdgF7wUKBkIGmAbqBjAHRwc/B/sGcQa7BfAEOgSOA/ACYQLXAUYBpAASAJ//Uv8c//b+1v6s/nf+Lv7c/Xr9Bf2E/A38t/uE+277fvuk+9T7+vsS/CH8GPz0+7v7fPtE+x37D/sZ+yD7KPsi+xj7I/tM+7b7YPwm/Qj+4f7B/7EAlgGSApADgwRxBRkGlQbfBu8G8AbeBuQGCQcjB0sHZAdTBxoHpgYNBk8FaQR+A5wC0wExAasAUQAaAAEAAAD0/97/uv99/yX/sP4s/qX9Fv2N/BD8n/tC+/j60/rN+tv6//ox+2r7k/ul+6n7l/tn+yL71vqW+mf6TPpW+nv6ufoo+8j7lfxv/Ub+Gf/g/6QAZgE7AhMD5wOrBFQF+QWUBicHmQf8B0wIeAh1CEgIGgjGBz0HmgblBTIFXQRxA5sCywENAV8A3f+b/4f/iP+j/8n/4f/b/57/Pv+3/gv+Wf2r/BT8lfsq+9n6ofp8+nf6g/qb+sn6Afs/+3f7m/u0+7P7kvtX+w/70vq7+tP6JPu5+4f8ev1p/lL/PgAcAfMBrwJZA/0DiAQRBYoF+QV4BuYGWwfIByEIbQiFCG0IGAh7B8IG7AUJBSAEMQNbApgB6ABVANj/g/9K/yP/Df/7/vL+6/7W/rT+e/4v/tH9Yv3y/H78Dvyo+0/7B/vO+qb6jPp5+nD6b/p1+or6pvrN+vj6IPtO+4D7u/v/+1L8tPwm/an9P/7o/qL/ZAAsAfIBswJsAxgEtwRFBb8FKQaDBs4GCAcvB0IHPAcfB/AGrAZWBu4FcgXlBEgEogP2AksCpQELAYMADgCs/1v/Ef/K/n3+Mf7h/ZH9Rv0A/cD8jfxj/EP8JvwO/PP70Pum+3j7Svsc+/f64frZ+uH6Avs5+4X74ftM/Ln8Hv16/dT9Lv6F/uD+Q/+r/x0AoAAyAckBWQLjAmADxwMgBG4EswTrBBMFNQVQBWEFagVmBUsFGQXVBIUEKwTMA2wDDAOoAj8C2wF9ASUB0QB9ACcAz/97/yn/1v57/hn+sf1E/eD8i/xI/Bv8+fvl+9v74fvz+xH8MfxN/GT8dPyF/Jn8tPzV/Pb8HP1I/Xv9vv0O/mv+yf4d/3D/w/8fAHwA1gAtAXsByQEZAl8CmgLDAuIC+QIGAxEDGgMvA0gDWgNqA3oDiwOTA5MDfwNcAy0D+wLGAowCRAL7AbEBSwG/AGoAGgCq/2H/EP/M/on+Xf5C/gH+9/18/nv+Sf4s/qX9Nv2B/Br8xvuv+yj8nfzg/Az9XP0//Sb9B/3s/O78Gv2S/ej9d/4U/7f/SwDbAHIBkQHMAfYB3QHTAbkBxAHYASUCUAJyArYCqQKaAlsCMALtAb4BygE0Ad4AoQEkAqkBhAH9AUcCSgKrAcwAqwD3AKYAyv95/9L/HQDT/zf/MP9f/1T/+P6W/sH+9/7X/nr+dP6F/kz+D/7r/QL+BP78/fX9Hf5o/m7+Z/6O/s7+4/7k/vv+G/9C/2T/g/+u/+X/FgBBAHsAwgD3ABEBLwFnAZEBlwGUAbMB3AHmAdABywHQAboBmAF1AVgBRQE7ATMBLAEsAS8BOwE6AQ4B7wDoALYAWQAKAMP/fv9b/xv/4v4M/0X/Tf9Z/4j/kf+L/2//L/8c/wH/y/6F/nL+af5S/k3+LP5G/nL+if6U/sL+/P4N/yv/O/9Z/3f/k/+o/8j/9f8JAC4AbACeAMAA9wA2AW0BigGbAakBrAGaAXABRgEiAQEB1gCzALEAwQDAAL8AyADUANcAygC5AKEAlgCLAHsAWwA+AC8AFgDp/8D/pP+G/3j/b/9s/3T/g/+P/5T/nv+b/5T/iP9y/1j/L/8I/+P+xP6r/qH+qv69/tv++/4l/0z/cP+H/5L/mv+k/7f/yv/p/w8APgB7ALMA3wD7AA4BHgEjARoBCQEAAfcA5gDWAMgAugCrAJgAgQBwAGsAawBoAGsAdQCDAJUApACoAKAAlQCEAF8AKADy/8n/pv+E/2v/av+D/6H/rf+x/73/xf+z/5H/cv9e/0v/Lv8S///+9f7k/sj+sf6n/qT+pv6r/rv+3/4M/zb/X/+O/8L/7f8OACsASgBsAIgAmQCoALwA0wDkAO8A+QACAQMB/ADtANoAzAC/ALUArgCoAKwAtQC5ALUArwCqAKEAkQB6AGYAWgBQADsAIgASAAoA/P/n/9f/0f/V/9v/3v/i/+3/7//n/9b/wf+r/5P/e/9f/0j/N/8n/xj/C/8E//z++P79/gf/EP8b/zD/TP9q/4T/of/C/+D/+/8RACMANQBGAFEAWQBkAHMAhQCVAKEArAC2ALwAvgC4AK4ApgCeAJYAjACDAHwAdwBsAF4AUwBIADwALgAkABwAFgATAA8ACwAIAAQA///4//H/6//l/+D/2v/W/8//x/++/7H/ov+V/4n/fP9x/2f/Yf9e/1r/Vv9W/1n/Xv9o/3T/hP+X/6z/w//X/+z/AAAUACMAMAA6AEUATwBXAGAAaQB0AIAAjACZAKQArgC2ALkAuACyAKoAngCOAH0AagBXAEMALwAdAAwA+//v/+P/3f/Z/9f/2P/b/+H/6P/u//L/8v/v/+j/2//O/7z/rP+d/4//g/94/27/Zf9c/1b/Uf9N/0z/TP9O/1L/Wf9l/3T/hv+d/7T/zP/i//j/CwAcACwAOwBJAFoAZwB2AIQAkQCeAKgArwCzALgAuAC3ALcAugC8ALwAuwC4ALAApACUAIAAaABRAD0ALAAiAB4AHQAcABsAFwAPAAMA8//k/9f/zP/G/8P/wP+8/7L/oP+K/3P/Wv9F/zn/N/87/0f/WP9p/3n/hP+I/4n/h/+D/4X/jP+W/6b/uv/T/+n//f8PABwAJwAtADEANgA6AD8ARQBLAFYAYwBuAHkAhACKAIoAhgB/AHQAZwBbAFIATQBKAEkASQBIAEMAOgAuAB8ADwABAPX/7P/n/+X/5v/n/+j/5v/i/9z/1v/Q/8z/yv/F/7//uv+x/6j/nv+T/47/i/+M/47/lP+a/6L/qv+x/7j/v//F/8z/0//Z/+H/6f/x//r/CAAXACgANgBFAFIAWwBjAGYAZgBlAGcAZgBpAGwAcABzAHYAdwB0AG8AaABhAFsAWQBXAFcAVQBVAFEASwA/ADIAIAALAPX/4P/N/8D/tv+x/67/rv+v/6//rv+p/6L/mP+L/33/bf9h/1n/WP9c/2X/cP9//43/nP+r/7f/wf/J/9D/2P/g/+r/8/8AAAwAGQAoADYAQgBNAFgAXwBkAGkAbABuAG4AbQBsAGwAbQBsAGsAaQBoAGcAZQBkAF8AVwBQAEUAOwAvACQAGgATAA0ABAD9//X/7f/k/97/1P/M/8T/wP+8/7f/sP+t/6z/qv+m/6X/pf+i/6H/of+j/6f/qv+z/7z/yP/U/+D/6f/x//b/+//7//r/+v/7//3///8EAAsAEwAeACYAMAA3AD8ARABEAEIAQAA/ADsAOAA5ADkAPABBAEYASwBNAE0ATABGADsALgAgABIABgD7//X/7v/q/+r/6P/o/+b/4//e/9n/0//P/8v/xv/C/8H/vv+8/73/v//B/8T/x//N/9L/1v/a/9//4v/k/+f/6//t/+7/8v/2//n//f8CAAYACQANABMAGAAeACMAKgAzADgAPQBHAFQAWABZAGEAaABjAGEAZQBlAGEAWwBUAFAASABEAEYARgA4ACkAHAAOAP//8P/l/+H/2//T/8z/xv++/7n/qv+i/5v/lv+A/2L/bf9p/2X/Zv83AHMAw/6C/vL/sv/T/k3/yP+G/3b/3f++/1T/nv8hAOb/kP+7//7/5f8UAEoAqACQAroBFv7I/nYBNADp/mUA6AAvALQAwwL7AhAAK//9AMQA9/48/3kAWADl/0sAcwDo/20ANgHtAFsA///g/5r/Hf/V/m//XQDP/43/lgB0AG//RQCBAYb/vP4pAF7/WP7F/4oA2/5m//4A5v59/o4ALwDR/lv/EAF8ADv+Zv/GAQ4Av/5IANMAQv/C/0sAFv9+AFQBef+oAIQB7/9mAPAA9QDr/1sAtQHu/97/kQExAO3/p/+cAIoBIf4e/7UBSf9W/kgA+/8i/2f/fv8BAMj/av/j/3r/4P/t/7n/vf9Q//3/ZQAh/5n/kQCm/xcAnf+h/6b/Rf99AK7/+v6YAEgAz/4xAC0BWP8//40AcgB8/6D/8AAnAL3/jwBBALf/nQDTAEz/jf8OAaoAZf9dABwBQgC3AH0AtP+8AHsBJADx/pkAsQFk/+T+egElAYj/lgBBAbv/yf9gADoAOgBS/8//zP+e/88BhAFz//3/oQFQANH9sf4TAKj+i/2L/Rv+1v9U//79MP4WAF4A1/7t/o7/r/8+AD0A//7H/4oBVAH//8MA7AB+ALoABQAjAFEAqv8BAGgAk//A/7//JABbAZ3/Vv66AJABlP8t/2kA0QFbAbn/mwCCAeMAbwBrAMAAIQAKAFcBegB1/0MAagBV/97+kv83/zX+6/5LAIT/ev6C/14Av/9s/8b/CgBTAAcAgP+h/6j/2f96AAEAef+TALAAqP+b/ykAvgANACL/r/9nAEAAGf9b/z0BQAEqAB8A7wBWAZsABQASAY0BKgDt/+AA5wDc/7T+e//VALD/G/6+/t7/nf8S/7f+Gv9v/xr/6f7Q/p/+ZP6N/jb+vf1Z/bn9Tf7X/XT9T/14/mgBqAMjA/cBmQJNBLQEZwO/AhoEmgb8BgEEpwEZArICKAJAAVwAh/99/3H/5v3o+3z71/wu/kX+P/3Z/Df+rP8t/wj+mP53AJ8BEQEPAO7/JQAVAKv/Kv5q/Jv7Aftf+bb2rPQU9rj7hgEMA1cBTAFPBHMGVwUYBMQF1gm/DCgLqQbUA2MEhAW+BNsCHwEDADn/cP3w+bv2SfY5+C76iPoK+pD6u/zS/hv/7v7EAHEE2wcQCe8HogaGBr4GLwaeBNgCtQEXAdH/J/1H+vr4d/k2+sP5UPiF9wb4c/gI+J738PfK+Lr5Mvr/+f77iAJbCQkLVwmeCfUL9gwwCzgIWgdcCScKcQZ1AVD/B/+A/kf9Z/sD+kX6lvre+Uz5VPnG+YT7tP4IARMCsgMsBp4HPQeEBm8GGwcYBwwFeQJDAUgABv7G+9D6Mvr3+Hr3OPaM9SH1C/S58s7xFfHB8TH1ufufA9sJRwwYDAINmg5NDs4MFwyPDDYN8gubB1ECTf9m/h796fqp+EH38fby9pb2WPZh91j5e/s//ggBAAP2BKIHSQqTCyQL5QkCCWAIrwbAAxoByv/K/t/8dvpt+Bb3ZfZc9r/2Svfr9434Evkd+SP4X/YV9Tr0/PUt/p0J/w+dDvwLxQxPDt4M4gcQBB4GXQoBCV8CN/3L+zH8i/xa+3T5F/kO+sz6X/ut+xn7cfsN/3gDEwWoBCEFNAf6CHcIjwWNAzQEYwQ2Ar7/m/4q/oT9UPwu+s33dfYn9uT2i/h1+Tb5E/kl+bf3RvTQ8XP04P0mCfwO4Q5+DrAQ2RGeDpsIDAVTBk4IRwZaAZr9aPxA/KH7cfoo+Y74xPir+Sf7uPv3+n77Tv8VBFsGLQY/BggIWAmTB9QDkQHsAUEC0gD0/hf+yf3y/Cf76/g792T2eva09y/5t/kr+S/4c/cJ9uHzVfTK+n0FkA3LD6YP/xDeEmgR1wtBBswEuQVnBIAAVv2a/LX8GvzG+lr52PhA+fD5OPuk/MT8Bf3T/+IDCgb+BTsGdwfoB+cFSQIhAJsAUwFlACn/Fv8k/5n92fqo+Hn3zPZp9r/29Pfx+Jb4gPdY9ur0lvSK99X+xQcHDmUQKRH9EtQTxRAzC6gGywTXA6UBfv5c/DT8dvzj+9z69PnU+Wf6ivt4/HX8zfxf/icB9wNwBTcG1Aa+BuME8QE3ABMAbABZAAwA5f97/0L+Ifwb+sf4oveh9lP2GPdz+B758fj694v2SPYA+K/8aAPfCYwOHhEQE4ETOxHHDOEHpATaAi0BtP5I/Jn75/tS/Ir8Kvw8/M38Wv1q/R/9t/3X/l4AFQKyA5EFsAZ5BuoE6gKZAXMAif/w/vH+ff9W/yn+P/xe+t/4c/di9vr1a/Zg98b3uPfc9vD1tPZH+dr9XQMZCcINvRD5Ej0TMBFzDfwIEAXZAV7/1fyh+g/6z/pr/MH9Wv6V/sj++v43/gr96vzn/Wn/8AC4Ar0E0gWlBbQE2wP7Aj8BpP83/63/nv98/gb9l/uG+nb58/fX9q/2Jfd79473pPf69tj2t/gu/LsAdAUuCgAODhH0Ej8SxQ9pDMMI+wR7AXD+t/tj+qz6tvs7/cL+CAADAWABwgAm/6T9t/xr/Cr9v/7xAA4DWwS8BHIE/APUAiUB8v9u/w//+P2h/IX7UfoW+dj3Bvfu9i33gfdy90X31/Zc9lP34Pmi/fkBlAYAC3gO0BBSETkQJw4bC3sH6APOAP79zfsI+4373PxG/mn/lACJAXsBIwBy/lf9pvxf/Ab93v5KASIDHATWBH8FbwUgBH4CawF5ABn/Q/3K+/D6J/pK+WL4/Pfp98737Pch+DD4gPcl98r46/sc/+cBfQXDCQ4Nwg4PD5gOUw3xCo0H1APPAHf+1vyS/JL93f6w/1QABQEjAUwA2f6v/Q/9YfzQ+zP8xf2o/wABGwJvA7UE9QTvA6cClwFJAHz+zvza+zf7k/oB+o35LPnC+Ij4xfhs+fj5sfkp+ef5y/vF/bj/YALJBeIINgtaDDQMegsWCvMHTgW0AqAACf+f/jP/zP88ALUAbgG1AfQAq/9W/nb9v/zt+9j7+fzg/pMA5gEbAxYE1wTiBO0DcQIHAZf/7/2X/JD76/rK+sD6qfpi+j/6ZPq0+iD7Uvu2+wn85/sn/D/91f5NAAgCZwS7BpQIYwlGCcYIvAc4Bk4EmAJWAUEAtf+4/w4AWgCoACIBCwE5ACH/Df4n/WD88vsf/Bf9rf4zAIsBuQK6A0AE6QPkApsBWwD4/oT9YfzK+6L7qfvf+yH8V/ye/AX9R/1S/Wf9lv29/YL9+PzD/IX96P7z/8MAJwIOBFkFjQVkBXcFfAUSBUIESwNqApkBCwH9ABcBAAHZABQBTwHxACgAUv/E/mr+Gf76/Wv+dP9KAJ0A3wAjAUQBFwF9ALz/Ff+6/lv+3f2c/ZH9yP0n/ob+1/75/i3/b/95/0n/IP8d/xX/6v62/p/+wf7i/gD/Yf/e/1wA9QDIAXQCfgJBAg4C7gGAAaYA1P9d/1b/bf+i/wsAcwDIACUBegGQAWEBGgHeAIUA8P9B/9f+1P7i/tf+1P4g/6D/1//A/6T/uP/H/7X/nf+N/7n/FQBNAGYAjgDKAAUBLQEtAdYAaAA8ACgA/v/A/6T/0v8fAEQAIwDl/8L/r/+P/0P/0f58/nv+w/4C/wn/Hf97/w4AiADAANwAEQFUAVcBCQGrAFQA7P+F/zz/Fv8K/x7/Wv+s/+7/EQBDAKAA3AC5AHQAUgBcAGwAUQAOAAkAgwAMASIB9ADlAAABCQHRAF8A7f+9/77/vP+9/9j/9v8GABEA/v+e/w7/sv6v/tb+7v7n/ur+LP+K/67/pP/H/yIAYwBZABkAvv9a/wn/0v6u/qb+2/5M/9n/XQDJABoBTgFeAUMBBgHFAI8AZwBRAEgARQBHAF8AgwClAMIA5QD+APwA5gC/AHwAMgANAAAA+P/9/xQAHAAOAOL/jP83/xD/Cv8a/0//mP/H/+X/CQAgABoAEQATABMAFAAVAOj/ff8P/9D+x/7p/i7/h//v/2oA4QAXAfcAtQCAAFwAMQDq/5f/bf99/5n/l/+L/6j/9P9BAGgAZgBaAE8AMwD6/7v/mv+W/5r/oP+8/+n/DAAUABQAIQAzAD8ASABfAHsAggByAF4ATgAtAPj/z//G/87/zP+6/57/iv+d/97/LQBsAJwA1QARASwBDgHJAIgAZgBgAFMAHgDU/7H/wv/O/7X/mv+0//X/KAAmAPz/y/+j/3//Vf8z/zD/UP+G/77/6//7/+n/0v/R/97/5v/x/xIAPABPAEcAPQAuABEA7f/Z/9f/1v/F/6X/lP+1/wMAZQDMAC0BYQFJAfkAlwA9AOT/mv+F/63/5v/w/8D/fv9T/z3/Lf8b/xr/Pv93/6f/uP+s/4n/YP9I/0L/S/92/8n/IQBOAEsANQAiAAoA7v/h//P/IABTAHEAeQB7AIsAqQDHANYA1QDGAKwAggBWAEoAaQCmAO0ALQFWAUkB+wCFABIAvv+V/5X/sv/a//r/BQD1/87/n/9z/1P/Rf9H/0//Vf9X/1b/Uv9V/2j/gv+V/6L/vv/q/w8AFwAJAPv/9v/t/9j/w/+9/8X/2P/z/xwAUQCIAKsArwCmAJsAiQBmAEgARgBfAH4AiwCGAHIAVAAlAOT/p/+O/5v/vv/h//f/CQAYAB0A///D/4f/W/83/xH/8P7b/t/+//4q/1X/gP+o/8f/2f/m/+n/5v/p//f/BgAOAA0ADQAYAC4ATABzAKkA6QArAWUBfQFwAUUBDAHJAIcAUQAxAC8ARQBmAIAAhQByAFIAKQD+/93/1f/m/wgANABZAGoAVgAfANH/d/8h/9H+kP5d/kT+PP5J/nH+tf4Q/27/wP/2/w4ABwDr/8j/sP+r/7n/2/8KADgAXQB/AKoA4AAaAU0BcQF8AWcBOAH5AMEAmQCGAHsAawBfAFsAVgBDACkAFgAOAAIA4/+x/4//k/+s/7z/vv++/8H/uv+S/03/DP/t/ub+2f62/pf+nf7Q/hH/Qv9j/4//x//v/+//0v+2/7T/zf/2/x4ARQByAKcA2gAEAR0BLAE3AUcBSgEvAQIB0wCyAKIAlwCBAGgAVwBMADcAFwDx/9D/sf+O/2r/Vv9T/1z/Z/9z/4D/lf+o/6//pv+X/43/gv9x/1v/Sv9N/2T/iP+v/9f//P8YACcAJQAbABkAKQBGAGsAjgClALEAtgC4ALgAtQC3ALsAswCaAHcAWgBJAEoAUQBUAFIATQBJADwAJAAMAPz/8v/h/8H/mf9y/1n/Tf9M/1P/af+I/6r/yP/b/97/1P/A/6b/hv9m/1H/U/9x/5z/yP/o//3/CgAVAB8AIgAgAB0AJgA8AFkAaQBqAF8AWwBkAG8AaABNACsAFQARABsAJgAuADkATgBmAHIAZQBDAB4ABgD6/+//2P+7/6T/l/+O/4P/ev93/4b/of++/9H/1//W/83/wv+x/5z/j/+P/53/s//R//H/GwBMAHYAkgCZAIwAegBrAGIAWQBRAE0ASABEAD8ANQAuAC8AOwBRAGoAgQCPAJEAiAB7AGkAUwA5AB8ABQDl/8T/o/+C/2n/V/9H/zb/KP8g/yj/Qf9s/5f/vP/b/+z/7P/d/8T/rP+g/6H/q/+9/9L/6f8FACcARwBlAIEAlwClAKkAnwCFAGMAQQAnABAAAQD6/wAAGwA+AF8AegCOAJ8AqgClAI4AcQBYAEQAMgAXAPn/3//K/7b/mf90/1b/Rf8//0L/SP9V/2n/hP+c/6f/qf+p/63/s/+2/7b/uP/D/9f/7f8EAB4APgBlAIsAowCwALQAsACgAIMAXAA0ABMA+//n/9r/2v/o/wAAGQAxAEkAYABxAHoAewB5AG0AWwA9ABYA7P/C/5//hf9w/2D/Wf9d/2j/dP+C/5P/qf/C/9r/6//v/+v/4//b/8//xP+//8f/3f/8/x4APwBgAIAAlACaAJEAfgBiAEUAKQAXAAgA//8BAA0AGgAdABwAHwAkACcAJwAoACwALwAvACwAKwAiAA8A9//i/9D/vP+m/5L/iv+L/4//l/+h/6//uv/J/9b/3f/a/87/wv+8/7T/rf+t/8D/5f8SADcAVwB1AJUArACzAKkAlgCAAGwATQAkAPv/3//Z/+L/8f///wsAHwAwADMAIQAFAPX/9v/9/wIAAwAFAAsACgD7/+D/x/+5/7b/tP+q/5//m/+m/7r/yv/a/+7/BQAVAA8A8v/O/7X/r/+0/7//zf/l/wkALwBMAFYAUgBLAEkASABBAC0ADgD0/+j/6//z//7/CwAfADUASQBQAEoAOwAjAAgA8P/h/9//3//c/9T/z//O/8r/v/+y/6z/tv/O/+j/+f8DAAwAGQAkACcAJAAkACMAGgADAOb/0P/G/8f/yf/O/9//AQAuAEwAUABIAEUASwBQAEYALwAYAAsACQAFAPv/8//5/w8AKQA/AEsAUgBRAEQAJwAJAPL/6v/r/+//7v/s/+j/3//O/7f/o/+a/6X/tv/J/9f/4//y/wEABwACAPn/9f/v/+X/2P/N/8X/wf/D/87/4v/9/xMAIwAvADsARABDADYAIgALAPn/8P/v//L/9f/3//3/CAASABkAHwAiACMAHQAOAP//8//r/+X/5f/s//L/9f/x/+r/4f/X/9L/2v/v/wYAHQA2AFEAYwBlAFwAUgBGAC4ADgDz/+r/5//j/+L/7/8CAAsACAAEAAcABwD///j/+P/9//7/9//v/+f/3//X/9H/1P/h//L/BAAXACgAMQAzAC4AIwATAP7/7P/f/9T/yv/F/8T/xP/E/8T/y//S/97/7f/8/w8AIAAwADoAQgBEAD4ANQAlABQABAD5//P/8v/z//X/9v/y/+3/5//j/+P/6P/w//r/BAANABAADQAGAP//+v/1//X//f8LABkAKAAyADYANwAvACEADQD4/+f/3P/Z/9z/4//t//b//v8BAAEA/////wMACQASABkAHgAfABsAFgAOAAcABAAEAAUACQALAAwABwABAPn/8f/q/+T/4f/j/+n/8f/2//P/7f/n/+X/5f/k/+b/7P/2////BgAJAAwADwAUABkAGQAVAA0AAAD3//L/8//3////BgAGAAUAAAD7//X/9P/3//7/BQALAAwACAAGAAUABQADAAMAAgABAP//+//6//f/9f/0//X/9f/4//v/AAAHAA4AEgATABAADAAHAAIA/v/9//7/AQAHAA0AEwAWABYAFAAPAAkAAAD7//j/+f/8////AwAGAAgABwABAP3/+P/1//X/+P/9/wEAAAAAAP7//v8AAAEABAAGAAYABgAEAAAA/v/9//3//P/8//n/9v/2//j/+P/5//f/9//1//P/8P/v//H/9//8/wIACAAKAAkABwAEAAEA/v/7//j/+P/5//v/AQAFAAkACQAIAAIA+//0//D/8v/2//z/BAAIAAoABwAFAAIAAAAAAAIAAgABAP///f/6//z//f8AAAEAAwAFAAcABgAFAAMAAgAFAAYABgAGAAUABQAFAAUAAwAAAP7/+//6//r//f///wEAAwADAAIAAwAEAAcABgAGAAIA///9//n/9//3//r/AAAFAAgACQAGAAIA///9//v//P///wIABQAFAAQAAQAAAP7//f/+//7///////3/+v/5//n/+v/7//v/+//9//7///8AAAAAAAD+//z/+v/4//n/+v/8/wAABgAJAAwADQAJAAcAAwD///n/9v/1//j///8FAAwADgAMAAcAAAD3//L/8f/v//L/9f/5//r/+v/6//v///8CAAMAAwAFAAIA/f/5//X/8f/v//D/9f8AAAsAFgAfACQAIwAeABkAEAAJAAMA+//1//X/+v8BAAwAGAAiACoAJwAfABUACwAMABgAJgAuADIAKwAoACkAMAA9AFAAaQB4AHUAegCPAIkAcwBXADEA+//P/6n/jf92/1j/MP/3/s/+x/7Q/tT+7P4F/xX/Jf9Z/57/6f8nAFcAdwCKAKIA0wADARYBGQEKAfMA0wC2AJ0AfwBBAOn/mP9r/1v/Sf8l/wL/7f7n/vr+IP9Q/3D/ff+I/6f/4f8rAHwAtwDBAKYAiwCOAJYAkwCMAIEAZwA6AA4A/f////n/5v/B/5r/ev9t/4D/qv/W/wUALgA+AC4AKABFAGcAcwB9AI4AnACZAHgATwA7ADQAJQADAOX/1f/C/7v/yP/f//L/BAAQABQADQAWACQAJgAkAAwA4v/J/9j/7v/c/7j/w//e/+j/4//X/9r/7v/u/+D/2P/6/y4AEwDH/6r/4P81AFkAMAAVABQAEwADAOf/5/8cACsA3v+f/7T/AAA4AC8ACQAAAMT/hf+Y/+z/LQA3APL/wf/l/yUAYQB4AHcAoACgAAAAof/r/3YArwAvAGv/Iv9T/+T/awBVAAwA9//d/6z/tv8OAIMAeQDR/zf/Sv/Y/2oAggAHAIv/nv8VADYAAwANAIIAvwB8AB4ADAAFAOD/y/+L/03/Xv9g/xr/Df9X/9b/MQBEAEQAIAD5//n/CQAzAHUAYAAWAMT/oP/s/1IAkAC4AK8AdQAvAAUAaADkAMsATADJ/0//J/9w/+n/OgAyAM7/Vv9T/8H/RACIAFsA8P++/67/pP+//x0AbQAHADD/Dv+M/7T/p/+j/3L/Lf9J/67/DwBZAJwAtACxAL0A/gB2AcABuwF6ATEBVgG6AbQBbAEmATMBagFOAQ4BAAHCAGAA/f+R/0f/K/8l/9b+Iv6a/av95v37/fP97v3D/XH9Hf3u/OT8/Pwe/Rj98vzI/Jz8tvym/e/+wv/J/7f/lgAMAs4CWwP/A/cDVQSbBR0G5wVFBnwGOQbRBTEFwgSsBCsEcgNrAo4ATv8s/5r+kf3B/LT77fpj+sL50PkJ+vP5d/ql+tH53/nj+kz7Bvum+qX69fv7/Y7+Av7Q/vkAbgK1ArACSQOvBN4F9wWpBRkGEQfNB+EHCAdFBgIH6wfvBj8FpQSYBBEEoQLOAKD/Nf+p/kT9R/s2+or6lfp/+aH4zvie+U76GvqY+bb5U/oe+3z7xvqA+tX72fwA/dL99P6X//r/cgBlAcAC0AOYBCgFSQWzBdYGbwddB0cHjQcdCOAHOAf8BqkG9gWNBbwEzAIVATEAEP+q/Sn87foQ+ib5tfin+IH4X/iz+BH5Cfn9+BX5tfl8+oL68vrF+/j7dPym/cf+0P/mAAkBYwFDA9oEKgXnBPEEGAZTB54HuAe8B4gHCgjNCPIHvwZsBuoFBgXwA5gCdAFYAMH+Yf1I/Ev7yPof+sH4FPhl+Jz4kPhr+Gj4FfnE+Yz50PkK+8v7rPuz+1H8ef3S/qD/5/9tABgBAQKKAwYFXQXlBJEFPQd8CEsJZwmxCLgIogmECRMIBAeEBr0FSQSmApcBIQBf/hv9evvX+V/5D/n59/322vZX9xL4Kvju91T42Pgv+av5h/ox+yT7Wvum/Cj+Xv+EADQBYwFnAtgDnQRoBRMGLAadBo4HMwg4CRoKWwmBCNEI+gi0CLMH5gV1BHYDaAIxAVj/9vyx+/X6bPlM+MX3FPfM9qv2jvYu90H4oviT+Pn4kPm++jz8ivzF++H7x/16/yYA8ABMAd0BmAP7BC4FtQXhBkwHXwfDBxAIKAmECsMJ3gfdB/oIdwgYB2QFOQP/AaEBMACC/UX7W/rP+Xn4JPeb9kD2Kfal9nn2ePbU99T4o/hp+Lj4bfrQ/LP8Mftm/GP//gDnAVACRAKaA70FXAb3BVkGhgd5CIoIhwg3CXUKawumCkUIowf3COsIwwa6A0UB+gA6Adv+U/t4+Rz5qvgw93b1HvWO9bf1g/V09Xj2H/h1+Nr31/ce+Sv8GP6Q/Kv7cf55AckC4AKgAmwDiAXuBgkH6QZZB/gIyQkkCVAJdQp3C8ALtAm8B48IQQmZBxcFXAJkAKEAi//N+/r47Pd195v2zfTP82b0+vQX9Sn1f/W39hD4qvek9qH3KPvZ/Xv8E/u1/VwBAANwAyADXgMHBmYIAgg4B94HcgmqCusJOglqCgMMqgwZC1MIEwjGCdIIiQVSAiUAvv8w/8r7PfgH96v2Ffau9DjzfvPb9Pv0cvT19IX22/fK94P29vae+hL+3v1f/ND9OQFuA/sDuwOxA5IFAQhQCEAHpQerCUkL9godCqgKGQxQDb0M5Qn8B/AI1ggNBvACiQAf/3X+IvyP+KP2DfZV9TP03/Ke8tzznPSE9Mj03fVT9y74mffg9k75RP30/bz85/3TAMICpAM7BIcE2AUkCLYIzAf7B98JWAvXChQK2Qr9C54M2gxmC98IVAiLCIcGvwOHAS3/DP1J++j4yPaL9Wr0nPMN86PyZfOP9MD04vTe9R73CvhJ+Gb3WPho/Ln+BP6Y/rgAFQJXA6gE3QQyBRoH7wcCB/sGmghWCowKzgkACh0L0wsoDNUL7QnhB28HdQb3A/IBhwAx/oX7wfkw+Kn2XfVG9L7zevOj85j0PvWn9Yb2h/cB+I/4N/mk+M35t/2D/6z+KP8fAYcCagNvBMkEbQUPBycHQgbaBsUIuQmJCZIJ1AmnCqwLkAvvCu0JGAjPBtoF/APYAUgA5P32+i/5rPdY9l71O/TF8+TzIfTZ9Kj1SPYi91D46Phv+SP6aPlH+mH+UgAw/6D/uQHJAo0DoASWBM0EXAaHBm4F5gUGCG8JLwnCCDMJJgraCuoKegpiCeoHngYxBWwDvQElALT9sPq4+Ir3UfYm9Ur0B/QL9Gj0UvVG9g73J/gX+ZT5UPpa+xT7ufqA/dMA7ADk/8YAFgLrAhQEeATNA9MEMQYnBYQEBgZ+BwEINwgQCB8IMwngCUsJ4QhPCMsGOQXuA2wCOwHL/+T8Afq9+M73sPbr9Vf1AfUf9bD1S/ZT9+b4Wfre+hr78vsJ/Tf9Ff3O/r4BmQJnAb0BzwOHBAEEWwQwBOkD/QT6BHMDfwNrBUoGyQUQBrIG8gaLB50H8ga0Bh4GlwQOA8wBtQDi/1f+nPu9+Sf5W/hX9w73Ivf69k73Efiq+Kb5KftA/GH8vPz+/eP+r/6C/jIA5AIEA7MBMgKEA8sDSwMuA9AC4QKQA44CCwHUAWQDsQMbA84CTAMeBKgEVQQyBL0EjwQ3A84BCQGrADAAvf5T/KH6aPok+jj55/gc+QP5K/nt+aT6vPtQ/Vb+nP7y/gEAHAH8AHAACgFOArUDZwP2AZcCyQPxAhYCAwKaAesBOgIIAaH/IQBTASgBxwAxAboBfwIRA+gCEAPYA/oD0wKDAQ0B4wB0AG7/rP1B/LX7efvv+r/6TfvX+9j7Yfw8/f39CQCrAB4ALQHHAaoBBgJJAu4BbAHrAeUCaQIcATIBngGhALv/Uv9w/lP+iv6L/ar8G/0E/kv+mv5I/xEA2AA2AYUBEAK3AhkD0ALtATABHwHfACoAdv/T/kH+7v2d/YX9Ev6X/hz/Tv/J/ykAeQGUApgB9wKaA+IBKgLAAiACSQHQAMsA+v96/9r/lv7S/S7+gv2G/CT8M/wZ/BX80PuW+/H73fyy/TP+Bf+D/0gAVgGLAcsBrQKPA3AD1wIJAxgDqAKdAmsC6QG7AZEBRAEoAacB7wHkAQYCGAI3AvECLwNFApoCnwKSARoBigB8AHL/f/7U/lT9M/yB/DP8Ffzx+5b7mvs1+w/7SPuB+8T74/tl/HX8R/zo/Cj+I//a/7MANAG+AXYCVwPmAxoEpASgBEsENgRMBHIEGgTEA4cDCQP9Aq0CRQIUAnkBXwFQAaoA///y//X/uP8e/3L+Xv4t/oz9Rf1f/Rn9Bf2z/Aj8RvzO/Oj83PzZ/BT9//yz/CH9vf2p/bL9Qf6w/un+cP89ALoAJgHgAYcCzQIQA2UDvgMXBEIEQQT1A80D3gO5A2ED0AJbAtABLAGfACsA1f9v/y3/w/5k/pH+v/68/qH+ov7F/rr+sf6p/oL+ov7l/n/+O/6e/rT+uf61/qX+7v74/ln/ev/3/n//+//Z//L/DAAzADcAagC7ALcA5QAEAdwAEAFKAR4BIAEqAfcAyQC+AP8AnQAiAIwAhQCz/7f/RgDi/xX/aP9aAJf/Gv/0/wQAiv+o/08AMQCG/97/ZQAsAKH/2f+zAFIApP8QAI0ACwDE/zkA8f9Y/5j/+f+d/8X+V//B/8/+u/4C/wL/wv6F/rf+3P7j/vv+F/9e/4f/yP8fAF8AjwDUAO4A3gBNAVABDAEGATQBkAEiAbwASgGLAf0AMAF2AXUBpAFhAVUBhwGKAY0BbwEcAecAfABqAMkAEAAj/9b/3/+j/mr+r/6b/kn+1P29/Tn+1f1u/UP+CP4n/pb+lf7//pT+jP5r/27/M/9G/zr/5f/K/2H///+wAKYAjgCBAIMBDgLpAJEBhgL2AZcB+AEnAgsCOwEjAdABBAGUAEUAJQCgAO7/Jf+X//r/L//O/c/+EgDU/tn9C/6s/5v/N/7P/cv+zQB3/1D9Xv9/AI//I//c/tf/fQAiAKv/z/96AJQA/P9PAFEBdgHwAHgA0QAyAsMB9QDNATcCUwGuAGMBRAIyAQsAcwDWAMoAif/n/nz/LQAh/5z+FP9i/hL/E/9X/v/+rv8t//v9g/81AGX/Nv8//8f/DADo/wn/OP9bAEUAXP/g/x0Ad/91/10AzQC5/3z/3gD2AM3/lP+NAE8BOQFPACMAnADgANAALgBiAHIAsABtAFX/vv80APX/CgCq/1j/Fv+B/8L/Pf8L/17/nf/K/x7/KP9QAN//+v96AIUAcAA9AMEAKQFlAPsAUQH2/+D/HQHFAS0AKv7r/+8BXwDp/pX+zv+cAHn/a/5u/sf/ZgBs/5D+3f5ZANMAr/9g/50AqgE8AU0AwwAeAfkAIQFQAR0B3/+v/1cAkACH/7H+c//e/9n/l/6g/lH/Z/+sAB0AG/8YALcAxQAiACwAvQA7/5IA2ABg/kz+Hv/f/7H/H/6C/az+HP9v/qL9i/2//q7/QP+R/oP+sAAnArEBJgHYATcDcgMQA/MCkALQApYD6QLgAfQA9gA8AY0AYv8q/0T//P70/j7+Vf4t/+/+SP8n/3//rP8o/xQA1f8z/3L+6P6H/+v9hPwo/br+Nf+D/mb9/PwF/lb/R/5S/av+/ACTAZ3/Ov/QAPoCSQS0A08DGwSABeEFpQQ8A64D5wSNBJYCiABzAOUAGQCP/kP9w/yL/T3+E/3X+7v89v28/YL9uP03/qH+jf7H/af9x/1k/b388fuk/Hb++/9o/0r9Q/0s/87/cv///sr/jQFNAtkB7wBeAXUDqAU6Bu4FpwXfBaoGsgasBTsEswPOA2QDuQE7/+398f2g/Xb8WvsR+5H7MPx3/GX8kfxx/Wj+3/5b/lr9Sf02/iv+hfyX+lH6+Pyx/8v/M/6A/U//5gAOAIr+6f7XAV4EQgTHAh4CPAMRBVcGzQbpBpUHTQizB0IGnQRVAwcD8QLkAS8Akv77/Hb7L/qM+df5VPp9+oz63fqb++j7BPzc/Dr+6f5V/kf9afya+6P6Tvss/pkAGQFBAJH/uP/8/6T/lv9zAT8EfAWBBBsDwQKlAxEFbwarB6wI2wgkCAAHRgVRAxYCQAJ2ApwBuv9O/RT7OPkX+Aj4sviL+f/5Fvpx+t/6JfuO+6L89v1B/mj97/t9+hn6Efyp/w8CbwLgAdMBWgJvAskB7AFABNMGWQfPBdUDMgMABH4F/gbuB2QIKwj7BggFYQIVAGv/8/83AE7/nP10+1j5FPil9yD4MvlW+j37xvvr+5z7dPvN+7H8Uf3+/Gr7Gvni+Nz7rv/EAesB1QGrAskDxQOHAr0CcAURCLoI1gebBgYGoQbAB4UI4gjGCDAIPAdUBR8C5P5a/TH9K/2E/Cn7Wvmc91L2vPUC9vH2qPij+pX8SP0l/J366/nA+tT7SfuI+Zn5of3sArcEzgKPAJAArwKbBJMEaASlBroJxQrsCNYFDQS4BC0HFAlWCb4ISAeSBe0DjAEe/+H95P3y/Uv90fv9+Pz1hfQt9WH3oPkS+9z7yPws/TX8xfo8+uT6APzt+7L5SfjU+oP/TwJaAhgCPAMeBToGWwVKBIsF/wdqCQ4J1wcCBy0HNAjLCFgISgcpBpoF1wSDAgj/qvz8+5f7G/uF+mP5FPgh98/2OfcX+Cv5kPqE/O394v3Y/AH8evv++o75t/ba9ZD5Ov9yAp8CpgLvA6UFQQYPBXEETwYKCS4KhAk/CH4HtAepCPII9geKBkgFmQRvA9kAuv36++n74fsl+yD6HflD+Mn39feM+JL57fpO/Lf9av4H/jP9x/yV/ML7ufm59ov1mPjN/fgApwGhArAEwAZjB8sFXwSvBXgI/gl3CY4IRAgCCSEKiAmdB4gFIQSJA3gC+/+6/F37pvsq++/5uvja94/3vfcJ+Hb4k/lM++X8P/6m/ir+pP1m/QT9vfs8+br1g/QI+G79igBPAbkCsgVDCHEI/QUyBMEFoAjuCWsJswjICAEKCgvJCf4GggRDAwID+gEk//b7KPvF+zz7hfm99+n2R/cs+Nb4WvlW+jH8R/6S/wz/wf1v/ZL9Nf22+y35KPYE9Rb4xvxU/8//lAFvBdkIrAmHBwIGwweJCg4LSAlCCCQJAgseDJkK8gcVBiwFmwRTAj7+zvoG+qH6wvnI9272zvZh+P34MfiO90/4MPrX+5/83vyH/bj+Hv/t/Qr7r/fK9CfzP/Uo+ur9DgA0A/cHaguwCyIJCwcWCMUJbAnIB9cH2QkzDFINjwu4CNoGfgX2AxABAv3P+Uz5KfoT+VD3Vvf7+KX6H/r396n2kvfi+Vj7O/x9/W3/dQE+AZf+Evst+GP1nvLy8hz37ftX/6cCRAcxC44MAQt0CGAIlgltCSsIQQhBCjgMUg2JDDoKTwh9Bj4EBwHo/Iv5Svjb+G34N/e395T59Po++kL4Sfc6+Az6G/v4+739/f/cAcoBuP+5/Aj66vbi8jzxE/Rq+YD9YgCwBLcJ+AyxDMEJOgjYCAUJnQeTBtUHFwoxDL0MJQs7CWEH6gSDAQ/9x/iq9kn3IPix9xr43/mX+6n72/k7+H/4avr5+6b8v/2R/6YBjAI0AZH+1vs4+Sb1HPEW8Uz1cvoi/jQC/QcfDSYPJg0KCioJIQkVCG8GgwabCCgL6ww4DC0KYQglBu4Cg/6z+U32pvXB9vL2+faJ+Pb6mvwP/GL6l/lH+nD75/uk/J/+QgEmA/UCFAG0/kr8DvkE9PXv1PCn9T76fP0/AuAIWw7iD2INrwpaCiMKHAjTBcUFzgd6ChAMLAt0CREI4gUQAr78Xvdg9JD0XfW59AL1z/dI+wf9IPyj+sj6F/yj/EP83fw1/z4CMwTCA/sBLwAO/kj6S/TZ7wrxFPYN+r38DwKtCYsP4hBFDhUMKAw/C6cHXgRhBLUGign8CgMK4QhJCDsG3wH++7X2CPT/89fzu/Lg89z32ft6/aX83Pum/J39Hv35+4z8Mv9HAsYDMgM8Am0BZv8H+4r02O/a8FD1fvjd+pYADAmdD2cRYg+3DQEOtQxwCFIEdQNsBRgIXgmCCA8IkQg0B90C0Pxu93/0VPPQ8VHwHfIG91/7Ff3Y/Bn9df7X/mL91/tO/Jn+LQGcAqgCsQK4Ag0BFP0Z9/fxjvGX9L32Z/jM/aEGNw5FEU4QWg9KECUPBgqxBPMCUwRTBhYHlAZBB/0IYQh3BAD//fmB9gf0WfEj72zw/vRq+eD7x/wG/hAAxAAY//T8/PzN/n4AQQFAAeABwwK6ATL++vhz9Fjz7fT89bn2R/u7A0ML2A5LD90PqhEAEcoL3gV2A+QDUAS4AxMDXQQ3B+EH2wRPAHv8g/lW9qPyye9i8EH08PcR+tD7d/54AXMC9wAe/xf/MQCZACwA4f/YACACSAFU/of6wvbO9DX1B/be9nv6SAHxBwcM7A1aDz4RKREfDeAHqwR7A48CgwH2ALcBEQSQBYEEbQI8ALb9hPrw9v/zWvOO9R/4t/mN+y7+1QANAoUBkQBtAJQA6/8K/8v+af8mAJf/nP1s+xv5T/an9FX1SPca+sr+sATPCb0M5g3QDr0PTg69CRwFUQKYADL/gP7I/uL/iAF9Ah0CMwEaAMn+ufwr+kL4GPjY+cf7K/29/pgA4wHVAREBXwANANb/N/+l/rH+N/8a/7797fsl+rv3ufRG86/00PcQ/HoBcAd0DAwPgw8cDxYOMAtiBvkB3/6H/Cr74Pqr+1/9mv9xARUCAAKwAQkBsv/J/Y/88vy0/oMAcAFcAq4DLQTkAs0AOf9k/vr9fP0A/UH9Cv4k/iL9kvv6+S/4xvWm8zr0i/eH+zEALAbvC3oPNhAFDz8NoQraBTAABfwU+Rj36/YY+P756fxgAPkCOQSTBFEEmAMnAlgAqP+DANUByQKWA2EEfwReA08BXv81/pT9+fxl/I/8k/1N/s79mvyq+9P6QPnH9vz0KfaS+en8kgCfBZ0KPg0wDY4LaAnEBoYCXP2M+Un3EfZN9r33Evqi/YoBAgQMBbsFDQZZBVgDGQFrAGEBPwJtAtICiAPVAyMDkQEgAGX/Cf+s/if+E/6g/tf+KP4h/Un8Nvuh+df3svat9y76m/z5/+QELQnzCncK/AhxBzgFDQFh/LH5e/jB9/73Sfmw+1b/1QLFBLoFXwZCBhAFCQNZARgBiAG1AREC7QKDA08DfAJwAaMA7P8f/7L+sP7e/gP/sf7T/fT8SPxN+6r5mfeN9tb3JfpO/Ib/zwMYB1cIEggFB6MFQwOa/2f8oPpI+XL4FfkR+9f9ygABA3EEsQVZBpgFuQMFAl8BTwEeAToBHwJPA+UDegN/ArIBJwFGADX/ov6H/qj+nf7y/Sr9u/wI/KX6+fh19+j2zPdJ+Wj7JP9uAx4G0QaKBiUGdAVbAxkA0/3w/BH8VPu2+z/9if/qAaYDywSZBcQFAgV1A+8BZgGGAa8BYQKVA0IEJQTJA08DfAIjAbX/G/9B/zb/vf4t/qj9Nv2k/Ij7JPrd+ID3gfbq9oP40PoF/nYB8gM0BYAFPgWyBFgDMQGL/5z+aP1x/Nj8Vf4GAHwBwQIGBOIEtgSwA3wCuwGXAZcBgwH3ASADDgQZBIUD1gJCApEBrQDW/x//ff4O/rX9KP1y/LT74Prl+a/4V/eB9tf2PPhy+mf9lQAOA1IEuwT8BOQEsAPAAWoAuv/L/hD+bP6V/+UALQJ4A4cE6QSyBCMEHAPeASEBEQFwASoC8AJaA3YDYgMdA4sCYAECAGH/Qf/M/ir+r/0+/b38JPxu+7L61fmb+HX3O/cc+NH5Kfy0/g4BuwJqA5ADogM4AxgCBAGEAPj/Vf+O/50A0QHxAjUEegUUBssF+QTJA4ACjQEcAQYBNQGwAUECjwKOAkACsAEKAXQAAACC/+P+c/5L/v39af3Q/Ff8xfvY+rL5fviK9233afg++nD8sv69ALoBmwGZAfEBpQG6AGQAmgB4AHkAWQHBAiMETwVABt4GzwYoBiQFugMrAiMB4gDCAJ0AwQDpAMgAgAA7AOf/T//F/qv+vf6U/mj+ZP40/sn9Wf3U/Cb8bPut+rz53/j3+E76Ofzp/U7/UgBqAAUADAAYAJH/FP94/woAEwCCAN0BXgOYBNwFOQcRCP8HZQdQBroEJQMJAlEBngBLAIkAigD8/2P/I//8/q7+rP7n/vP+5v74/u3+dv4j/kr+P/7Z/WD91/zv++v6xvqR+6P8p/24/pz/lP8I/wL/F/+K/v79KP5r/n/+T//wAHwC0QNsBSIHRgiJCCgIQwesBdoDkwK+AfAAawBSAAMAcP81/yT/wf5Y/lj+g/5f/hX+Gv54/qf+kf6k/r/+fv4M/nz9g/xd++n6W/sZ/OP82P26/uL+ZP4O/uj9Xf2m/Ib86/xS/RP+kf9ZAecCZwQMBnEHLAhTCPsH6QZkBSQERANqAtIBvQGoAQkBRADW/4z/H//k/hn/Vv9Y/3L/lv9S/8/+iP5m/gz+sf19/fr86Pv8+hL74vuT/EH9Pv74/r/+Ef67/WL9zfye/N38//wo/f79Wf+eAN4BfQNmBf8G5Ac9CC0IZwcZBugE6APnAjkCDQLQASMBoQCZAHcAGAAIAFgAVQDS/2L/Kv/B/jX+6f3n/c79sP22/Vn9SPw2+xD7l/sy/Pj82f1U/kX+Cf7D/Rr9Tvzp+9X7v/vI+4L86f1Y/80ApgLbBMgG9Qd+CE8ITQfoBbUEwgPuAocCoAKOAhgC0wH4AeYBXwHfAJMAFwB3/x////61/mL+c/6p/pr+av5s/hj+9fyZ+9f63fp3+3j8a/3G/Zb9Qf3X/DH8Zfvx+uP66foD+6H7/Pyi/jwA7wHNA7gFYQdiCJIIHQhLB1YGcgW1BDEEBgT4A5ED2gIyAqEB5wApALf/kv+F/4L/jv+G/0z/B//t/tj+vv6//r/+Sf41/f/7LPv1+jf7u/sx/GX8Sfz9+6/7XvsO++76CftD+5b7P/yF/RH/iAAYAhMESQYbCFUJ/wnlCRIJ8gfSBrEFqQQEBIsDuwLFAS0B9AC5AH8AdwBvADoABgDf/5X/Mv/+/gP/+f7F/on+H/4z/dL7Z/pO+df4Jfnv+bL6QPup+8z7p/t5+2H7V/tZ+2f7mfsd/CT9k/5EACsCQgRpBk4IgAnVCXAJbwgBB4IFXgSxA2QDQgMPA7ICTwL7AakBWwE2ATwBPAEXAdMAbwDk/2T/Bv+m/hr+iP38/FL8afti+m/56/g5+UT6V/v2+1n8hPxV/AD84/vR+7z76/tl/OP8mP3W/mgA6gFsAyUF2gYjCOcISgkgCUEIGgc7Bp0FAAWDBDEEygM+A9QCiAIaAqABbQFsATQBrgD//yv/OP5x/Qr91vyt/KL8n/xU/Jb7i/qC+ev4Fvnc+bf6UPu1+/j7FPwN/PT71/vZ+/n7FPwy/KT8kP3f/osAkALSBA0H6wgMCkYKswmRCD0HAgb8BCoEqANkAxoDxwKqAqkCbQL3AX8B9QAQAAD/Kv6i/UT9J/11/fn9Xf6Q/n7+5f3A/Fj7CPpB+WP5Qvo4++f7Wvxv/BL8lvs1++f6yvoC+1b7vPuU/Bf+BAAQAjYEcQaBCO4JZAoFChsJzgeABqAFMAX4BPoEFAXLBPkD7gLlAegADwBv/yX/Kf9E/0L/JP8E//H+9P73/tv+sf54/uf95vyz+1n6Cflw+Nb4i/kP+pP6+/rW+oL6mfrz+lj7E/wZ/df9Of7E/p7/nADRAW8DUgUdB34IRwlkCeII/Qf6Bv0FGAVdBLwDDwNfAugBvQGzAa4BvwHkAekBnAEQAXcA6f9r/x//D/8F/93+oP4f/g79lfsb+qv4WvfC9jj3SPhw+Z36oPsx/HL8p/yg/Ev8G/xD/Gv8nfxk/d3+nQCDAqIEsAY6CDMJpQlQCSkI2gb2BWIF8wTSBPkE/ASOBO8DYQPpAoUCNgLhAXEB7wCAAAsAW/+Z/gD+ov1N/fr8zfyH/P77Zvuw+rv5o/j99xv4c/jh+IH5Fvp9+sP6Uvsm/L78XP0X/qv+Ff+U/5oA1gH0Al8EBQaUB8sIdwmvCTEJHwgDB/cFPQXsBOQE4gR7BOwDgAP6AlICrgEoAaAA5v89/7P+JP7B/bT97f0v/lH+af41/lD97ftn+sL4C/eI9dX0D/XO9Q/3hfjJ+dv64/sp/XH+Xf9FACgBvQEOAmECHwMABOgENwadB7EIdAngCcQJ8wjTB98G8wUKBWkE+QNtA54CzwExAZQAHwATADEALgADAM3/ev/i/kH+0P10/SD9zvxv/NH70fqd+V74BPej9Yj0JPSl9OP1u/ff+c/7Xf2s/ur/DwHxAY8C/wJHA28DjwMEBOcE9wUmB24IcAnwCf8JhwlcCMQGPgUEBCwDwAKRAmgCMQLpAZYBLQGpAEUADQDM/3r/NP/q/mz+zP09/bL8Jvyu+zX7qPr8+Tz5gfio97r2/PWo9QD2/vZ7+Ef64vsy/XX+uf8ZAXcCqgOlBDIFVwVMBUIFagWiBQkGzgagBz8Iegg4CHUHSwYfBREEDQNZAgUC3gGeAS8B2gCaAEMA+f+z/1///P6I/iL+pP0a/dD8sPyY/Ij8afwq/Jn7sPqa+Vj4BffV9QH15vSx9TX3TvmT+5H9Tf/VAEQCngOuBHEF1wXVBZsFWQVUBZAF0wVJBvcGiAfdB/UHnwelBkUFBQT6AhACeAE/ASkB+ACtAGUAGADd/9L/z/+r/2//LP/f/mX+vP0G/V782/t5+x/7oPoK+mv5rvjc9xr3cfb99Q724/ZY+Ej6ffx2/hwAnAERA3sEfwUABkwGZQY5BtIFkwWnBb0F7QVJBn4GnwavBlcGZQUlBAsDIQJdAQUB/AACAfYAyACFADcA/f/f/7P/b/8U/5/+Kf6y/Tv9z/xl/Af8w/uB+xT7ifr7+Uj5ZPiX9/32hfZw9hP3UvgR+nT85/7nALICdQQPBhoHcgdzBzgHowbIBQcFtgSXBKMECgWFBekFTwZSBp8FhwRlA08CTAGkAGMAQwAfABQAHwApADQAQAArANj/V//F/iX+fP3T/E389PuR+0r7Lfvw+oH6Ffq4+T/5o/g7+BH47Pc8+D75ifpT/KL+uwBYAp8D7gRLBvoGDwcCB7EGDAYlBWEECAS3A5IDzgMRBEsEjgSFBPADEgNlAsYBAwGBAFsARAAYAPr/EAAZAP7/AADr/57/M/+x/i7+mv3//JH8Nvzo+937/vsK/On7xful+z37nvoN+or5DfkK+cr55fpw/L/+8gCPAtgD/QQRBpIGaAYfBnUFeQSkA/oCrQK2Ag4DsAMWBEsErQSZBNMD0gLwAS0BXwDW/8f/3P/u/x8AYwBnAD0AUwBdABYAwv92/yb/mf7x/YP9If3c/O/8Ev0I/d38tPyB/P/7W/u7+gr6L/mf+Oj4qvnQ+tr8LP8fAccCSgSxBXMGaQYcBnIFYgRGA1oC3QGVAZYBGgKvAhwDmAPwA6YDrwK1AeMABgB9/3D/xf81AIYA6AA5AVoBkwGkAWwBIQGfABAAg//d/nP+J/7K/Zb9d/06/dv8cvwL/I77Dfub+iT6l/nJ+G74E/n8+Vj7rv30/6oBRQPCBOEFbgZmBugF2gSKA3QCmwEpAS4BmAFYAiIDywNzBJ0ECQQiAyQCJAFMAPb/KwCXABoBtQFHApcCiQJnAhQCbAHaADsAfv/7/ob+LP4A/sP9lP1f/Qb9o/wb/Hn7xvoZ+pr5/vhd+M/3ofd0+IH5ufoS/Xz/UgH+Ak0EYQXtBcUFWAVuBHcD1AIOAp8BtgEGApoCJgOdAxsETgTqA/wCEQI4AWsAOQBjALQAbwEvAtYCUwNqA3cDKQNaAo0BuADU/wj/U/7G/Uj94fyv/IX8Vvwl/Pz7w/tY++X6ZPqy+ej4G/hc91P3Ufic+TD7sv1xAHUC7gNaBZIG+gaoBucFtQR3A3kCqQFQAYEBEALQAmEDuAMVBBkEcAOOAvABZQEBAUAB2gFlAgoDuAMSBOcDeAMSA1MCTQGQAOX/KP97/uf9e/0I/Zf8VfwA/IL7BPub+j36z/mC+WT5Efmk+F340feW92j4jfkF+2H9GgBtAhYEcQWBBrwGYwarBaYE2ANGA+gC9wJbA+8DdQTJBMQEfQQhBEIDFgJPAb8AjgAVAegBuwJ+AxsEXATnAycDdgKKAYYAo/8T/9T+ff4O/rb9Tf3X/EL8lvvh+kf6Bvrb+Zz5jvmY+Xz5FPl5+NH3R/fc90T5i/qr/OH/3wLnBBgGJge1B2MHkwZaBXIEEwTpA+oD+gNrBBMFRAXABNcDKgNoAksBawAfAG0AKAEJAsgCNQNyA2cDugKdAZ4ACgCu/2P/Vf9y/1r/0/7p/db8xfu5+tb5MvnK+L34EPmO+Qz6ffrS+tX6afq4+av4A/jw+Jj6YPwJ/14CdQUuB9UHgwicCC4IZgcxBtgF6QXRBfIFgwUBBYUEdANiAh0BUAA6AMn/5P+AAAYB8gFlAk0CFQJ5ARYBhwC7/6b/s//c/xIAyv+m/1f/X/4o/br7sPoX+pL5q/kf+qP6Rfu8+x78NPzf+3778fp2+rv5KPlb+nn8XP5ZAI8CQgXJBn4GoQYRB2gHcQefBpMG2gZGBsIFlQRCA3YCOwFTAIb/zf4m/1z/V/99/37/JgCWAF0ATQBDAJUAwQBoAHgApgDSANEAMQCN/97+B/4m/RX8iPuE+4z7u/vg+zn8wfy7/H38ZPxI/Db8zvtw+0r7rPq8+ij86/3E/3YBigPXBW4G7QXJBQAGSwafBdQEOQV+BScFTwQUA1ICYgFPAML/H//t/kr/fP+e/0D/B/+Z/9f/rf+J/+X/2AA7ARkBTgG+AQ0CswEeAdUAigASADr/Sv6w/U79FP3H/HX8i/zf/Bz9+Pyt/IX8Tvy7++L64Pkz+ev55Pv6/a3/bQHIA6MFjgW1BJgEiAVNBrUFCAUlBTkFiQSdAp0Alf8P//L+yv62/jn/wP/2/5b/xf67/nn/PQBqADsA1ADwAVACwQFMAe4B3gLlAlcC6gHoAXEB7f9Q/lH96/ym/CL85Psx/Jn8v/xW/J779Ppl+vb5Xfk2+N33hvkr/DL+8/79/1kCIgQjBGADrAPNBTMHgAYzBaQE+QRzBEcCcAALALQAPwG0ACEAGgAyABoAVP+a/vv+LwCTAfsBggGrAXsCQQNOA+oCngMVBboF1gQIA+oBTAEMAHj+Qf0a/av9ov3t/PH7L/vZ+kr6cfm1+FD4iPiA+ID3EPd8+Hj7Ef7h/mL/+wAFAwcEUwPtApkEuAZMB/IFhgS0BDkFkATeAn8B0gG/AosCUQH1/67/MwBAAMP/WP/W/x0BoQEZAVcASwBtAZ4CKgNuA9cDhQSOBHIDBwLsAGkAEQBL/0D+Nv1O/G/7Svob+Tz42ffg98r3afcZ9+T2w/YT91L4wPpX/fr+r/80ACMBJQKpAk0DuQS1BiwI/wesBowFSgWJBXgFBAWqBJwElwQJBOUCvAEYAQcBJgElAfwA0gCfADMAs/+D/+L/rABtAecBGAICAqsBGAGQAFsAVwAvAKL/z/4N/nb97fxn/Pz7zPvM+8/7uPuU+4T7i/ui+7f7tvuq+7f77Psw/Gn8kfzG/Bz9hv0I/qj+YP8hANAAcAEXAsYCfwMoBK8EIQV6BbsF6wXqBcsFqgV+BUUF9ASWBFAEGwTVA3gD/QKFAhMCmgEoAb4AaAAtAPP/qf9I/93+fv4w/uT9lP1T/Sj9/vzO/In8Pfz6+7H7Zfsi+/n69voE+xX7L/tc+7j7LPyW/On8H/02/Tr9Nv1F/Y39Af6S/j3/+P/aANwB5QLsA9UElgUtBoIGqQatBpUGfAZcBjkGCwa6BUsFvAQjBI0D9QJgAsgBLQGQAOn/U//g/pv+jP6g/tb+Iv9g/4X/kP95/zn/uP4a/oz9Df2N/Ab8kvtZ+0X7P/tR+4D7yPsW/FL8gvyi/Lz83vz7/P383vy2/L78/vxX/cH9R/4A/8v/hgA3AQUC+ALrA6oENQWkBQgGUAZdBjwG/wW6BW8FAAVsBMgDJAOfAjIC5AG/AasBkQFhARoBzgCFAD4A///G/5j/ZP8j/+f+yP7D/sH+qP58/kr+Df61/Uf91fx2/DP8D/wM/Ar8/fvy+//7JPxM/G78nfzm/Dv9d/2J/Yv9nf3H/fj9K/52/uf+a//p/2gABwHGAYkCNAPIA0sErwTnBOsEzASlBIwEegRhBEEEIwT9A7YDTgPZAm4CBgKQAQ0BkgA3APn/z//E/+r/OgCSAMEAvACPAEcA6P9p/9n+Uv7j/YX9J/3M/IX8Yvxd/GD8aPx7/Jn8wPzi/Pv8FP04/Wf9kv2x/cX90f3R/cT9t/25/db9C/5Z/r7+Ov/D/1AA3gByAQMChQLvAkgDkgPJA+YD9wMKBBYEDwTuA7oDewMxA9wChwI9Av0BwQGLAV4BOgEYAfEAygCmAIAATwAbAPj/6//i/9H/uP+a/2v/Iv/B/lj++/2s/Wr9Mv3//NL8rPyM/HT8ZPxZ/Fz8fvy4/PT8KP1Z/ZT9zv3z/QL+Cf4Y/iz+PP5T/oL+0v40/57/HAC5AGoBEgKsAkID1QNOBJkExATlBAMFDAXqBKsEYgQUBLEDMQOqAkMCAwLPAZcBagFbAVsBSwEiAfUA0ACmAGoAHQDO/4b/Pv/z/qX+XP4W/tf9oP1q/TT9BP3b/Ln8lfxt/En8M/wp/Cn8Lvw+/GH8lvzV/B39eP3i/VD+rv4B/0P/c/+N/5L/kv+a/6//0v8HAFsA2ABtAQICjgIWA5kDAARABGAEcgR3BGQENgT3A7cDdwMwA9sCfgIhAsMBXgHzAJEAPgD7/8f/pP+R/4X/ev9y/3X/gP+G/4L/ef9t/1D/HP/Q/nv+L/7s/a/9fP1j/WT9b/1t/WL9a/2I/an9yf3v/SH+T/50/pj+vv7t/iH/Vv9//5b/qv+5/8L/yP/Y//z/MgCGAOoAWgHKAT4CnQLaAgYDOgNzA40DiQNyA08DJAPyAroCdQI5AhQC3QGUAUQB+QCtAGcAQwAcABkADAD7/+f/wv+p/8L/+AAZApUBe/9m/jX/aP9B/mn9Q/61/sr97vwd/Xv9IP1n/Iz7Hfv4+ob7Zvze/Jr81vs9/N79AP9u/sH9M/7f/sD+jP5f/xwAsAABAWgB+AH3AiAEtATNBJsECgWsBZ4GqAYkBtUFRgUHBW4EvQPzAh0CRAE7AIb/j/+V/63+0f2f/Wf+9/4//r39IP4a/2f/7/7y/m7/pv+s/3r/TP9Y/9z+SP6v/Sj95/xR/HX7ePqs+Y/5vvhe+Ab7MP21+/34k/kB/r0ADwAN/6j//QI7BnEGJgYQBxkJEQuoCskJYAoXC50KOAgyBggGqwUVBPEAGv4e/pD++/we+gz4gvgD+nX62Pkg+dH5wvtn/Uv+lP4a/0MANAHTAX8CsAILAhUBxwD2AHMAH/9K/Rr8cvu5+of5avf3+A39IfuJ9sz2vvqb/sr+Fvwn+1f/AAZpBrACrwThCM8K9guhCnsJQgtgDK0KqwfgBqsGOwRyAogAd/6i/jr9hPkT+Fj5Lvod+Vv4uPjL+RP8OP1i/Bf9SP+FAAABeAHuARYCKAL4AQwBuQBpALn+g/3y/Ob7efkS+on/7v7U9772//stAM7/bPzJ+rv9iAQFB98BJQEcB0oKrwrrCdUI3wllC+0KRgi0BpYGPgTSAcUAav/K/p/8ufiC90H5KfoZ+Ef2bfeH+Wb7W/x7+/r7y/6OAJ0ApAB1AT4CjgK9AgsCLAHEAHD/Xf7N/bL8Xvpd+Aj8tACn/Hr2OfhZ/ukAS/5F+4b7ZQBcBg4FeQBvAyEJ0gpgCjkJQQn8CvoLcQrcB0wHdQapAwgC8wCu/2f+M/tH+L34gvrP+dH2Qfa8+MD6jPv9+kv6MvwY/+f/TP+Q/90AlgHOAdMBGAHBAB0Ajv6R/TT9/vsv+e36QgE8/473k/iJ/5MC9f9d/ED8OAA6BhYG7P9OASgI2gngCAMI7geYCcoKCAr9ByUHxQY5BKcCaALaADP/q/zm+cP5B/t/+rP3nfa1+L76nftO+476Kfw3/2MAf//q/jgAlwHhAZYBWgC0/+f/T/8A/r78f/tD+cj6EAFc/+32X/cS/6ECkv+X+5/7FgBQBtoFNv9uAAwIKApeCAgHLQdTCbQKmQlYB6UGdQb+A5QCmQLyABD/Af3l+s/6vPsa+6/4xffU+dr7KPyJ+wL7b/xm/70AtP/w/jMA3QE3AoQBRwDC/x0Ap/84/o38cfvU+fz6cQCb/iH2svbu/kYCPv4J+iT7RQDXBdMEYv4bAE0IJQqmB2kG4gb5CCgKCAn4Bm4GQQb0A/gCbAORASv/cv0o/Ab8IvzS+nP4KfhM+mf7P/tH+4L79Pxt/5IA2P+I/wgBqgLwAkwCBgGNABIBlwCi/rD8ivtk+u/8wwGN/ZH1zvhOAWQCH/3g+RX8aQHhBSEDMv2nAGgIMAnYBv8FjQZrCH4JRggYBlAFhARwAmkCZgK5/6j9l/zi++j7Yvvj+Yj4CfmY+un6/fph+5T75Pwf/ygAqf94/+EAiAL6Ah0CzgDnAH0BzgDm/hH9fvs5+w4AugJh+sz0Nvx5AxcB7vqA+cX9MgSCBkIArfxaBJwK2QhdBngGZwhrCkMK5wcNBs8F+AOuARcCQAFD/jL8UftG+w/7A/pv+Nf3W/mV+qj6+Ppk+zL86f3d/zsAef8RALYBswJyAhgBWgDuAPgAo/8H/u78QPvC/JwCWQC29rD3NQHdA33+G/qk+wYBNwZmBKP94f99CCEKdAeABnEHpgn0CtwJngfbBpgG9wN7ApICEAAd/cT7NPv9+kH69/iH96X3h/nC+QH5l/mp+uL7gv2G/kj+Yf4aAGoBUgHgAFYAfwDYAMX/C/5M/TX8TPu0/w0DM/tK9f38aATvAHb6I/ro/lcE+gVRAFv9QwVDC7MIWgZaB3kJHwsUC1MJkgeaB64GKgS8A+sC3/+n/dD8Sfwx++/5G/lu+CD5QPrm+dn50fqq+4X8y/1v/h/+qv48AMQALwB5/y3/e/8o/1z97ful+7D69PwEAgz+b/VZ+XwDQAPq+535jv0KA3UGiQLP/CMCDwtSCoMG9gYjCf0K1wtECrEHfAeuB40FSgSvAzkByP6f/UX9Pfws+sL4tviS+Rf6Xfly+dr6FPyl/Bn9D/7K/jb/BACAAGUAGgCu/6L/Jv+O/cH7JPsU+vv5W/82AEv2afTV/6cEjP0G+Mr63gDNBYAE4vzK/aYIpgsiBykGIwhgCvcLWwu4CC8HrgdvBo4EGwTvAVX/Tv61/ST9b/tk+R75TvoV+8L59Pju+rT8Dv02/eD9G/8tAOIAFAEvAXAB1gB1ALAA3f/v/W78r/tB+qL8FQK4/NHyYvjcA04CSvmE91P9IwPLBR8BG/sDAqgLgglOBYUGDgmeCjELAQppB5QGtwYvBVEE6QLH/zT+9f2P/U/8A/qS+Cj5k/pK+oT47vgx+3T8ovzm/M79zv4PAB8BEwElAWIB+gBDAT8Bsf/a/Sv94vyX+p385wKi/ULz8vieBOgC0/mB+BL+AwP3Bb4BRPu3AWELnAmkBYQGpggNCtAK2AkIB9AFCAb0BC4EbgIP/6v9uP05/X77c/m/+OT49flB+of42vhd+3r8dfw8/Yf+8v7P/1sBYAE1AYsB/gA2AYEBu//u/Yv9D/1v+pr75QI+AFP0wfagA9gEsPsP+O/8oAKoBv0DL/zT/98KhAvsBmUGTAg0CiALQAqhB+EFrQWmBB0E0AIR/xf9+/yq/Jf7f/lR+Dz4Wvm0+gb5AfiF+l/8U/zK/Pj9YP4L/woBZwGuADQBNQEYAb4BfwCU/vr9ev1f+zj6HwA3A7v4VvN+/j8G6v92+Gv6UgBrBbYGbf9t/PAGLg2wCK8FRgewCRYLRQsoCTgGkAU9BWUE2wO1ACT9h/z7/FX87/n59933Yfil+cj5UPgZ+bn7y/z+/C7+Hv9Y/+IAiQIWAnABnQG1Ad4BZQGB/w3+a/2l/DL6PvpJAWEBNfUc9FkBaAXu/PT3tftNAaUGHgZu/W3+jQqLDAEHaAZRCMsJ3wqPCiMI0QVXBTkEiANgA7P/cvxU/F78j/uB+ev3x/dK+Ln5mfm7+C767vuX/Lv9Yv8KABoAlgH7AsYCxQJzAuoBbALpAZz/X/7H/VL8RfkB+o0BcwDd89rzJQFYBPL7q/c6+2EAIAbABV/9AP8/CqwLWgdkB84ItwnYCsEKnAi9BuwF5gMaA1YDLgAZ/Sf8u/ti+1b6Ffn59933uvkf+rT5Evvy+wr8KP4AASEBBAAEAdcCtwPuA1AC7QD4ASUCxP9d/ij99vry+B/6pgBAAGb0CvPb/3AE2PzC92/6gf95BYQGxP4z/tAIyQvVB6kHzAgeCcAJCArhCPgGGQWOAkYCrwPEAIX8zfpn+tj6uvoP+RH3aPcD+mz6EPp1+9b7Jvyr/kQBdwGzAJ0BdAPTBBYFBAPqAdoCNwJPAG//mP2L+rH5K/mu+xgBMvvj8Jf30gPVAVr5nvhP/WACnwdSBNr8MQN5DEQK4Ac5CX4JRAnmCRQK0geiBSUE9QFfAlkC4v4l/EX6yPma+v/5V/g49074HPpi+n37efwj/Ff9iQDzAvgByQD2AtsE8gQvBKsClgK8ArUBPwDV/hT9mPp4+XD4pvq1AB/7WPBi90IDKAEa+ib5vvzyAbQHZATY/HMDAAxWCRoINgnVCHgJjgmpCH4HPwa1A4cAXwHzAaP+Cfy5+dz47/nK+bL4NffR9/L5Ofrr+6r9nPxa/WMArwIAAz8ChwKkA1MFhwUKAzUCoAKqARkBV/97/Kr6rfof+W72gPzyABb2avEH/WwDW/4C+nj7b/4XBOYHyAC1/ngJGAw8B/wHvwpjC+MJLAirB+kHRwfIAkT/ywDBAEn+Svu4+C354/mF+QP5xfgy+p36pPpr/Wz/+f6p/iwAewJkA3MDJAOoAsQDlAToAwYDQQHs/xcAOf/a/JL6dvlb+Cv29/keACr6w/G493gB7wDP+7X66PzMAQEHDAR7/yoFRgq/B8kHUwqACmsJGAiOB0cIMQg0BIz/RwDaAW7/LPzo+Xr59PnO+UT5u/iu+f36Dfs+/C7+4/4//xsAlQGpAgMDZQNIA3ADAQSiA0MDogIvAVAAnf9F/pz8x/um+kn4lvYm9wn8af4h94rzfvsyAaj+ovu2/Gn/AAN9BcsCzgLWCCMJIgbjCJILkgpzCPkGQgfpB88G/wJ/AOsBVQGZ/o39gvw0+0H66flh+uL6Nft6+oz6aP3+/p3+L/+FANwBdQKkAmMD/gPUA/IC3wLiAwwD9QDL/yz/lv40/az7J/ox+E33KveB+XX8lPgE9Af40f1Q/of8lPyx/fb/3gOjA/4B/AUqCCsGgwclCjAK6AgFCJoHZgfrB0gG9wKoAm8CPQBD/5n+Cv1j+8n6Pvtw+637KPsr+uL72/3T/Xz+gv8eAPEAkAEuAi4DvwO1AsQBOwOmA4EBZgDm/5v+y/3J/Cj7yfnl+Pb3vPaf+Ov7i/nL9Tr4Vvw//Qn9T/2F/Wf/5wIGA4MCWAZYCMUGwAfOCY4K+wo5Cp4ISwjkCJgHEgUNBBADPQFFAPz+av2S/Jv70Pq3+vn6Bvu8+l77jfzb/Ev9W/5X/1cASQGQAW0BMAImA9ACRwL8AUYBrgBrAKP/U/5f/Vz86/o/+rL5jviN9533zfkH+/n4O/ig+t785/05/qH+3//uATgDEgO7BLwHAQh7B2QITgkKCvgJzgjoB7kHGwdOBf8DzgOMAjgAo/4i/qf9lfyw+0T7Hvtr+2X7cPul/Jz9X/2i/eb++/+OAIcAFAClAAECzwF1AHMA9QAxAH//Jv86/pD9Lv0K/A37Ivvw+qb5OPnk+Tb6cfsG/XX8+Pvk/X7/k/94ANYBAAJYAmoDEgQNBYcGmwYYBsMGoQeuB0QHzgadBq8G2QURBD8DXQNsAuYA7P8c/2b+2f0i/Xv8fvy9/Cb8tPun/Jj9hP1w/dX9Uf71/oz/ev9G/5v/rv91/8X/7P9j/7v+ov7s/rH+cv7s/Un95f3c/br8dvyl/CH9Ev1d/EL9vv78/qT+q/5W/+H/cwC9AEgAAQGmAQcBzQEoA5IDGgRZBEgE8gSVBX0F8QTBBLsECQR5AwUDDwJkASABgwCV/6z+af5Y/vH95f2q/SH9hv0p/kL+df7t/un+2f5j/xgAJQABAFwANQBcAKgAKgDG/+b/kf+o/p7+8v48/ur9P/6x/bL96P2Z/c39ev11/Q7+Cf5C/u/+DP/T/l//uv/B/93/5//q//j/VwBlAGIA/wBsAcMBkgLEAqcC6AIqA2sDeAMMA0MC4wEnAvwBgwERAX0AdACmAFQA+v8NAEcAYQB7AIwAYgB1APUAJgG+AEYAWgC1AIgAOgAMALz/hf9b/xz/qP5r/kv+0f3A/dv9ov2G/ab9+/1M/jP+Hv5W/oj+1P7+/uH+7f4u/1f/W/9w/5H/X/9Y/5n/b/8s/y//G/8w/0b/Jv9I/7D/9v/0/1gA8AAkAXQB/wFpAqQC0AILAzEDPgNaA2IDCwOsAtgC3AIyAsQB1wGWAfYAkgBsAFEAMADL/yf/Dv+R/4//5v6Q/s7+/f69/mj+Uv5i/m3+Tf4S/hD+T/5O/iH+QP5t/nr+hv53/mz+tf4X/+v+f/6u/h7/Rv9N/1j/ff+y//j/TQB4AH0AjgDMAAEBGwFSATkB+wA+AZcBswF0AREBPQGjAXoB+ADtAEsBPAH2AMoApQD1AC8B1gCVAJEAiwCNAIoAkABsABoA8f8NAFkAVADX/37/jv/f/9z/OP/G/u7+Gv/3/qX+Wf49/oD+tP5h/jb+b/59/pr+6f4L/+r+8f53/8H/sv/j//v/8f9MAJwAcQAsAGQAuQCfAJAAggBWAIYA0gCnAFsAdwC9AKQAfgCnANAAqQB5AJ0AvQCsALoAowBdAH4AxwCfAFsAYQBoAGcAXwAkAA4ASgAkAK//w////8H/gP+M/3P/TP9s/4D/Y/9V/zn/Rf+4/+b/jv9p/7T///8dACYA7v/B/xcAXwAdAOL//f/1/9r/7P/c/6X/m/+m/5j/p//Q/7b/f/+z/wwAFAAHAAsABwAkAHwAqABhAB8ARQCVAKoAaQAfACgAXQBoAEMAGgAnAFMAQAAaADUAYQA7APH/DAA1AAUA5v/u/+3/4v/a/9f/2v/v//T/z//G//n/HgD8/8n/2f8TACQA/f/M/8n/8v/p/63/nP+u/4T/QP9Z/5T/fP9Q/0z/Yv+f/83/vP+b/7z//f8JABEAHgABAPj/MQBoAGcAQwBDAHIAuADfALcAngC7ANsA8QDQAJMAkwCtAJIAVABKAFkASwA8ABYA+f8XABEA0v/C/93/7//P/47/lf/i//X/sf+a/7T/v//k/+7/q/+b/9v/2v+j/6T/tv+r/5j/nf+Q/3r/kP+E/03/WP+m/5T/Pf9O/4v/cv+G/8//wf++/yoAeQCZALwA8ADgACAB/gAtAYkC6gCB//sA4AAlAD0Auv/Z/+b/dP9q/x7/Vv+Z/3X/NP81/6D/0/+V/6v/BAAAAEIAjAAzAD8ApgDaALIAMwBQAJYAqQCSAFYALAAZABYA5/+v/9b/vf81/xL/Tv9D/wD/Ev8C/+j+MP9o/yv/R/+Z/6b/yf8tAGsASQB8AAABFgH6APcAyAA/Af0AeAFkAdb+c/+KAfMAyP/y/jT/XADV/zz/Qv8f/9X/BwCG/4f/zv/D/6QAxQBBAIYARQCvAlgCff/1/xoAbADxAJH/i/9qAN//2/8f/yr/Tf/4/pX/Ev4o/koA9v7t/eP/FgE3AIL/xv8DAMr+6P8yAcMAHv8k/wgBL/+a/4kAuf9V/xEDtwOu/tb/rgLX/yP/VgHL/rv/dP8u/ioA6P3c/sr/nP5j/xD/Pv/fAFT/BQCWAV//8ACvAQQBTgHTAqcC2AAhAUcB6gA1AO7/bAHO/xf/IAB4/0r/qf8K/+P+fwC7/cP///49/TwB6P9t/YL/LAFMAKMAGgACATABg//o/mP/SwF8/+H9AwG7/1r+gwAUANX+sf8kAK4Azf33/4YBG/6zAFMA7P9fAD8BjgH8/08B/ADT//cABgAjAXT/4f9gAqX+4/8KAYMAs/8G/zABqgDm/pH+3QAQAgf/AAAmAa79lgBIApH9ef9JAY7+Fv+x/1T+YgAh/hD/LwE//YUALQBm/jIAqP8vAA0C8/9f/vH/XQFOARL/mf9YAlABHv4uAeH/x/5EAkEAO/6t/hsBUwAU/pD/IAFnARb/Y/8KAF/+xQEfAAb/SAALADYC/f7j/lMCGgKX/8D+HAIvAvT8f/9HBC3+sv42BCj+Xv6bAxX/3v3IADYC4f4u/AgC7QAh+27/4AKa/f77GwNPAc/69AFjAsX8SgHsAEf/WwEz/yv+IQLSAKf+DAHn/1MAff89AB0BMv6s/hQBtgDi/Zv9hQEfAYv+UgJP/9P+8AMb//z+uwJU/34BgQEF/wMDQABfADgC3/7zAe/+rv2TBAIBRf2T/oMBwgLg/a3+egK8/L/7xgGR/t36Gf9sAmn/BfzE/sYBTQJu/zr+LwRNAdj7nAFJAuj9wgGMAcH/PwE8/wEBt/+U/rUBcgAU/pL9DP49/zf/0v3i/hT/0gDJAUD+JgATA3UBBQA6AHIBRALJAUsA0P/vAHMC/AE+/pH+6wGRASr/K/0F/xsBk/4u/lz/0v5mAEMAtP9/AIEArAIxAZ799QHXA9/+vf7gAQ4B/f6u/ioA+v+7/VIAxwCo+xn8SwB4/8r8aP0WAvEC8f11/T4AlgFQAVUASwHrARwAOwC5APT/zwAyAqwBCwCU/7H/iv9X/2YAUQF6//39Hf9s/4P+Vf+FAA4ARQC9AKcA5/84/1IBjQJvAAf//f/kAC0Awf/RADMAov+OAEz/Ov6s/+gApwATAH8AWwAA/4r+Y/65/hb//f7l/2cANwCEAN3/lv9nAM7/mf9gAPUA6QAwAA8Ai/+x/lv/lv+Q/0EAcQBqAQgBn/8JAFMAAgCS/9L+Uf/RAD4BMgA1/+H/TQEEASkA0QC8AfQB3gAX//r+YQBxAAz//f5BAC8Abv/Z/zEAOwDIAPIA///d/sP+I/97/6T/bv+K/1kAuAC2/xP/u/8cAKD/wv8IAPP/gAB6AAcAXwA2AKb/1v9iADwAqP/l/48AlgAiAG//qP4D/9D/xP9p/9X/yABuAOT+tf7P/y0APQAHADkABAKMAnYAKv8DAF4Az/91/8z/twDYAEIA9f80ADgAOgB5ADgAx/9Q/1L/iv91/8z/rf8p/5f/6v+N/+7/AgGRAXgBlAAUAO3/Lv8z/ib9n/27/wIB8QD/ADMBeQFuANX+pv/bAPkAjAAyAKAAmADE/+r+Bf8MACEA5P+aANwAOAAkAJ7/yf7b/rf+Kf5D/mL/bgB5AI7/UP/b/2UAXwB9AKUBTQKpAYwA/v9R/+T+VP9y//H+Tf9dAIgAOwBuAOEAhgHQAfIALwCRAK4A0f9F/6L/eABRAVEB2QAWAd4ASwDlAFIBywAJAEv/Hv/m/hD+iP3+/Tn/EADi/7j/ZADOAG0A5P8y/1X+Df5W/vj9a/0v/kT/Lf+g/mT+0v51/4z/Sv8X/7H/7f/1/gX+Xf6Z/xoA5v/p/zABuQKRAuMBegJxAwoDxQGPATkCLwJdAT0BewLHAzYDEwKhAroDpANbAksB5AAmAdsAgP/z/rz/CgBt//H+jv65/sf+PP6c/Q79C/0U/W38DfxC/MT8L/2b/Uj+Pv4N/k/+B/55/Ub95vyR/I78rPze/Ib9dv7n/vz+wP/eAIIB+QFFAqkC4QIOA1cD3wLlAuEDvQO+A38EvAT4BCAE5gNZBU4FtwO+AmIDYwR1A7MBKgJsA9sCpABk/yQA1P8K/pP9n/4i/+n9dPye/Nb8T/x8+9L6Mfu++637cPsi/G39pP1d/Xf9Y/3i/CT8a/t9+m75Rvl0+e35oftF/dT+fADzAQcDpAMGBaIF4ARABMcDbAOgAswBMALnA44FYQV3BBIFFgZDBWgDhQMkBWgFhwSKA4gDvQNsAiACHgPrAp0CogL0AeMAdP8l/oz9Kv21/Mj7JPv3+sj6WPrJ+fb5cPoT+zn7BPsj+xv7Nvuz+zn8Pvyi++D6rvrf+pD6e/oI+/v7sf1v/wgBowIHBN4FzQbuBfEFygUvBNMCegLYA4YEfQQeBt8GSAbfBRAFhQRhBHEE7ATpBIQERASAAzMCjwGdAcIBfgIHA3wCpAErAHD+U/3a+wz7cvvC+9n7H/tJ+un5YPl8+RL6qPpf+zv7sPoc+mP5IPnz+FH5ufq6+6f7ZvuW+zX8VPza/Oz+zAH6A7YE7AUzB+YG2wVgBOQDWwTmA7YDIwTABHUFEAUNBZ8FnAWbBScFjwVUBlAFZgTtA3cDdQMKA5cCUgJvAksCLAFVAKT/Q//U/gf93vvf+4r7HPvq+vb66PqQ+vr5ePk2+fv41/jX+FH4Ffiu+Ab5Pvm/+eP6Xfvk+o77LP1d/lr/PgGxA8QEbwSoBAIG2wb6BcoEBQV3BW4EKAPlAkAEQwY4BoYFSAYzBywH4AXfBGUFxwXRBIUDQwM6A2YCTwF4ASUCCgHY/8z/Rv82/vn8efzY/Hv8x/tL++f61frV+pD6CvrP+ZP5nvhu97D2H/cT+Br4ofjQ+fn5ivq3++X7cPwg/loAzQK7A04EsAaCBzgGlQUIBW0F9wQpA+wDMQSNA3IEVQRpBcoGZQUKBqYGgAUHBhUF7AQxBosElAP1AqsB5AGsAGL/dgCxAJz/Nv5R/Xj98fzc+7X7lvwD/R78HPuG+hf6wfk3+ef47vjD+Hz4yvcn9w/3A/dP90v4h/mt+y7+e/8yAQwDygPABLUFPQYLB48HQAdtBp4F4AQgBDUDtwJZA4wDHANYA9kDiATwBPYE7wWnBlAG7QXABe0FLAVFAykCigGuAHf/1f1h/S3+X/7R/Xn9zP0Q/pT9uvz8+zf8avyr+/b6dvpK+g/6G/l0+Hr4zveb9v31z/Vx9Qn2qvc/+b37Pv5zAIYDegb3B38I6wgiCZoIJgcJBngF6wQ0BMcCtwG3ATwCswK6AvYCSQQzBeEE6ASiBcgG2wb4BcYFmgX4BEwDWwGqAEn/lf28/K/7HfwO/Yz8nfz6/DT9Yv3S/Oj8mf2h/eP89Pt2++/6u/mp+P73Wfem9lb1kPTD9Mn0WvUc95H5pPzO/84CAwblCHQKSguhC8AK/gkVCd4GVAWlBA8DawF9AGoA5gD0AHcBAQMRBBcFDgaLBlYHPQfSBsUGywXYBNIDcAJgAZ3/S/5V/cv7WPsM+w77P/yB/Nz8rf15/Yv9gf0p/WT9of2r/Qf9Cvwo+5H5Sfiw98H28PXh9ET0l/Qk9Mr06PbG+OH8SgEOBGEIwwsGDQAOPA1lDB4L0gd5BkUFLgIYAWgANv8q/zv/GAB4AR8CKQQMBj4GYAfYBxEHtQb5BUUFdATJAu0BBwHw/sL9Ev3u+1T7Oft0+x38bvzM/G79av00/UD9Zf3E/UP+R/57/WP8WPvZ+Z34D/gf9yL2MPUs9K7zifMi9CP28Phm/FUAWgRsCJgLxw2xDv8NDw2LCwsJPQf4BTcEjwIwARcAWv8I/7n/6gDtATADPAQgBVcGXQY7BsQGNgZwBawElAPLAlUBcf81/uH80fuJ+w/77PpR+8z76Pu9+xr9Ev45/cz9zf5e/un9k/2S/SH9r/vL+s/5hvgF+Ov2s/U/9dX0rvTl9FP2Cvmv+5H+JAL8BYgJ0As2DeANTw0zDF4KLQjnBmkFfQMIArsAxP9Q/y//9/8+AfMBxQKMA00EpQUWBrcFGAYPBvAEzAOxAqwBsABU/zP+aP2D/Dv8NPyl+5H76vvS+/P7pvw2/Yn9xf0g/pD+X/4N/gf+dP1x/KL7sPq5+Rf5Svhj96v2Pfaa9Rn1J/Yd+Cr6AP1aABQErgfmCccL2AwqDFwLOwq0COUHDgepBS4E4wLOAaQA7v9UADUB1wFMApwC1gI2A7wD7AMfBIgEgwT3Ay0DjQLaAcoAsv8K/3b+oP3R/F78Nfy7+277cvuE+wX8XPx7/AT9Yf1o/c39AP7m/Zv93vxU/Ij7nPog+mj5mfgT+FT3nfZw9t/2R/j3+f373v6yAVkE1QZKCEEJDQqVCQEJDgnsCMAIAgjwBuoFdQRyA/cChwKlAgsD1wJzAj0CIgJZAssCMANsA4IDKQOSAu4BLAG7ADEAgf8x/9H+U/7f/Wj9Fv21/En8Mvwz/Dj8cPya/Lz8vPyw/OD88fzb/MP8ifwu/M77hft/+1z75/qk+iX6bvku+Tb5lPmD+r77Nv3v/qUAIAJLAyMEtgQFBUEFzAV3Bt0GGQcnB74GGQbhBe0FKgaEBp8GbAbSBSMFVgSPAz4D+AJ9AtYB7ADq/xD/Zf76/bj9xv0z/nz+p/7v/gf/4f7D/sD+pf5//nz+dv48/qb9EP27/HP8TPwz/Pv7n/s1++f6uPq5+tX69voI+676K/ov+mn6cvoQ+zX8Lv0P/hD/NwArAacB1wH0AScC0gLsA/AEyAV0BuwG+gYbB7YHRgjjCCkJCAmUCHwHawZuBYQEyAP7AgsC8gC5/4r+gv2+/Kj85vxJ/Q7+sP4t/3f/pP/D/6n/n/94/zb/Cf+a/sz9Hv3G/H/8G/zI+6b7Tfu7+pX6uvqf+sb6L/s1+yP7Avuz+nP6Y/q8+mX7E/zD/Ff9BP7v/n3/+P9wALIAaAG8Ai4EEAXGBb4GNgdxBwkIswh1CQIK+wl1CZgInQeUBlsFWQSiA/ACGgLmAIL/Mf5a/Qj9P/3q/Y3+Pv/A/6T/a/9O/yH/EP8k/xr/9/6q/gf+R/22/GL8KfwR/CT8IfzL+2X7MvsR+yH7ePut+7D7fvsM+536H/rX+Sz6pPox+/X7oPxo/UH++f6V/8H/IQAxAWACuwMFBdYFdgbLBiAHyAdxCE0JAwr2CWQJqwj4BwQHBwZ9BQwFUwRnA2ACDQFy/1b+xf2O/eD9Rf6s/v7+6v7H/pf+c/61/uT+//4N/8D+Uv6z/fP8p/yu/MT8u/xl/C/81vs1+/D68Pr4+vP60vrM+o/6Hvra+a75w/k7+un6qvs9/P789f2x/kD/ov8QAOcAGgJ4A74EswVzBtoGHge6B7MInAkvCnkKdwrcCeAI8gcWB0IGZwWWBLMDgAIeAaH/S/58/Tv9jP0I/mn+u/7Y/rb+fv52/s3+Nv9+/3r/QP/k/hP+Tv3w/LX8qvyd/Fn88PtT+836aPor+lX6e/qD+n/6Pfr4+bn5ffmH+d/5lvpN+8r7XfwD/cn9h/4N/5j/PAA/AZECsQPeBP8FwgZuBwUIzgjVCaIKIQsgC5UK4gkKCRIINQd2BrYFpAR9A2IC8wBh/yT+sP3T/Rf+o/4e/zD/L/8g/xn/O/99/7X/2v+9/0L/g/64/T/98/y//KT8QvzE+yb7R/q2+XX5V/lU+XH5uvm++ZL5cPk1+ST5P/mK+Sr60PqE+zz8+vwL/un+df/9/4IAhgHJAtYD8wThBbAGUge9B50IfwkqCroKrwpsCgsKcAm0CLsH8AYIBrwErQORAiwBvP+I/hP+H/56/vn+Tf+Z/7r/jP9q/5L/3v8RAA4At/8M/zr+Xv2k/Cb82vuD+xP7s/ot+oT5HPkJ+Tr5hPnK+fr5+/nw+cD5ffmX+dP5TfoL+4L7yvsQ/LP84P3S/mn/3/9qAHoBoALIAw0FEAbZBlwHzwenCKYJewr5CgwL2ApgCssJBgkACAoH4AWHBGQDRgIFAbH/ov4o/hP+hv5E/7b/CwAjAL3/Uf8p/0P/X/89/+f+Jf4i/UX8dfvr+sv6svp9+j/65/l2+QL5y/j5+FL5qvn0+Qr66/mW+TD5D/lH+dr5n/pJ++f7jPxP/Tr+B/+k/ykA4gD8ATMDcgSCBUYG4wZiBxsILQlXCn4LHAwHDLMLAgsmCmEJagh/B3wGLgUIBNECqAGyAJn/7v6//uD+a/+w/7b/c/+m/h/+1v27/Rf+Qv5A/v39I/1E/ID78Pre+s76uvqc+iv6mfn4+In4i/jS+Cv5XflZ+U35Lvkb+UD5pPlY+iH7tfso/KT8Yv1R/gD/cf/u/6QA1gEFAxgESwVBBjEH4QeDCLkJ0AqZCw8M8AvVC1wLjArSCegI9geoBiAF3wN+AhQB4P/f/ln+Bv7s/UL+pP7+/hz/0v6X/mb+PP5a/mv+Q/7x/Uf9b/yf+w77rvpc+if66flh+cD4RvgT+Dz4nPgj+Z75EfpR+iX6B/pE+ov6CfuW+wr8lPwL/cD9n/40/6D/+/+eAAACbAO6BCEGVgc5CNIIiQmcCpwLZgzdDLQMMwxYC0oKPgkmCB4HyAUyBO4CtAF7AHn/mf5C/kn+eP7e/j3/iP+I/xT/sP5z/jT+D/7p/ab9Ef0w/E/7f/ru+ZD5L/kF+eL4h/hB+DD4ZPjQ+ET5yfk++nj6lPpf+iP6N/pF+nf67vpf++f7qPyl/br+jP9KABIB/wF1AwkFdAaYB3oIGglICb8JuQpuCwMMPQzWCzsLTQpPCUUIRAeBBmYFIwQoA/QBxQDw/17/Ev/+/j//lf/I/8v/Xf+o/hv+jv0m/Qb94fyC/O77UPuw+iv6/fn7+e/5BvoM+tP5mfmN+Z75w/kB+j36V/pE+hP6u/mZ+dv5J/qV+jn72vts/Ej9lv7K/5YASwHnAcYCCQQwBS4G9QaQB+gHHwjNCLsJagr2CgILowokCmQJjAi4BwQHQgYhBRoEOgMYAgUBHABy/xX/2v7Z/uz+8/7e/lL+s/1d/QX93/zV/Mn8ufxX/OD7h/s0+xX78/rL+vH65vqC+iL63/nC+Y35ZPmV+cD5xfnB+a757vld+sv6a/sY/M78mv19/pb/cAD1AIEBDAIMA0sERwUcBrsGRQe8Bz8IRwlSCgwLeQs8C7MKCAowCWEIkQfZBu4FpwSBA0sC5gC3/9/+Wf76/b392P38/fL91P2I/Vb9Wf1U/Vr9b/13/TX9h/zn+3j7FPvq+tH6ofpl+vb5cPkO+ej48/j++DH5l/ng+QL6F/ou+ov6+fp2+x78yfx2/Sv+I/8+AOgAPwGSATkCWwNtBF0FPQYQB8gHMgjYCO4J4QqUC8kLjQsZC2AKfAl5CHwHkgZTBe8DtgJ3ATwAQ/+r/lP+I/4p/lf+lP6z/n3+If7//Qf+Bf4F/v79xv0t/VL8kvsA+6P6bfoo+vD5q/km+b/4iviQ+Mj4Afly+d/5A/ok+hH6Dfpk+tX6dPs0/O78tf2M/oD/TgCsAPgAhQFtApIDuwSmBV8GGwexB04IZwmcCnELxAudCyoLXwp6CZkIoQevBo0FIATWApoBXQBm/9D+jf5e/lr+mv7R/t7+zf6Q/lf+K/4O/v395P2o/Sn9dfy2+xz7r/pw+lT6OvoL+rP5T/ki+S35VPmD+bv5LPqV+r76u/q/+gr7Z/vO+2j8//yd/V3+L//j/yQAVwDjANABIANvBHoFXQYUB6gHRQg1CV4KMwuZC6ALKAtwCqIJqQinB6gGjQVXBB4D8wG4AJj/7/6Y/mr+e/6s/tz+Bv8E/8L+bf5I/jj+D/7d/aP9Hv1V/I775fpu+iL62PmZ+Wj5L/ns+MH43fgd+V/5tvki+nL6lPqc+qr6zvoN+2L72ftp/Ab93f3K/n//7f9LAOYA7AE7A54E0AW1BmYH5gdvCEwJWQoTC1kLVgsCC1MKhwnCCOUH7wbgBa8EfANrAlIBOwCH/zb/+v7i/gj/Mv85/xT/zf6D/kn+If73/dH9rv04/YH80Psn+536Mfrl+bH5c/k0+fr4x/jJ+O/4L/mG+dn5HPoq+hP6Jvpa+qL6EfuY+0b8GP0M/gP/zP9iAPEAoAGnAtoD9gTwBbcGSgfAB1EIOAlGCgMLUAs3C8QKDgo+CXoIsAfJBrUFbQQfA/IB0wDc/z//4P6q/qn+2v4L/xT/+/69/nX+Tv4z/hX+8P2Z/fz8OfyV+xv7u/qB+k/6Bfqx+VD56vii+JT4uPjr+DP5ePmG+YL5h/mn+fj5d/oV+8D7cvxf/WP+O//B/xAAmwCaAe0CUQR+BXMGHgdwB9EHiQilCcYKgwvAC4UL7wovClQJggixB8QGqwVkBBMDyAGkANT/W/8V/wH/Jv94/6v/jv82/9L+j/5p/lP+Pv74/XP9ufzq+0L7zfqN+nH6Qfrp+XH57viN+Ez4OPhX+I/41fj2+PP4/Pgd+Wr52vlG+rX6Q/s5/Hf9j/5T/9v/fAB+Ab4CCQQ7BUIGAwdlB8gHjQiyCeIKrAvkC6cLGwtdCnsJpQjfB/kG4QWgBEgD8QHJAP//jf9a/2b/mf/Z/+X/sv9c/xX//f74/vD+0/6K/gn+UP19/Lb7Gvuw+l76Bfqe+SP5mfgd+N733vcG+Dv4X/hd+Db4Gvgz+Kf4W/kL+qr6ePui/Ov9/P7I/38ATgFdApUD0QTqBdgGdAfGBzUIDAkiCg4LngvGC34L2AoICi8JbgiyB94G0QWRBDsD4QGnANL/bf9Z/4D/yv8EAOL/hf8Y/8j+oP6w/un+9P6X/tX93fzj+yb7t/p6+k76IvrI+SH5Yvjc95T3d/eO98X31fer94j3k/fk93T4HvnN+a36+vtl/YD+Nv/A/18AUAGpAj8EswXmBrgHHwh+CDUJRgpRCxAMaQwnDG4LkQqlCcYIBwhhB5EGggVhBBcDqwGOANv/kP+d//7/cwCIAFsA8P9e/wn/Cf8z/z7/E/+r/tb9y/zT+wb7gvo5+gf6vvk7+Zj46vdP9/D20fbh9v32C/cf9zz3gPf894T4I/kC+lr73vwR/tP+Uf/u//kAXQLuA1kFlgahBzcIowhRCT8KIwvACx0M/gtqC7MKwwm1CNoHHwdFBjQFJAT8Ap0BdACy/1v/cv/P/zAARQAWALf/Of/t/vz+Sv+E/23/Bv9N/lf9Yfyd+x77vPpw+iH6mvn2+Fr42Pd891/3dfdy91H3SPdd95T39fd7+AX52fkw+6T8uf14/hr/y//XAEQC1gM4BYEGgQcBCGcIJgkwCiEL0AsgDOsLTwt4CnQJewi4BxgHUQZVBUcEEwPLAbcA/P/H//v/bwDcAOwAxABeANn/hv9p/4D/e/82/7L+xf2t/Kf74/pn+gz6yflj+eX4cvj294b3IPf+9vT2yPbH9s/2+fZD95n3IPi6+PH5oPsG/RX+uf5G/xcAcgFaAxMFewafBzcIgQjsCLkJjAoxC7QLtgsfC0wKXQlCCDgHfwbNBeYE8gPkAqIBhQDf/6T/uv8lALcAEwEiAesAeQAAALv/tP+n/2H/4/4i/jD9Pfx5++z6hvpU+ij61/lj+eD4Zvj/98D3kfdN9yD3D/ck91j3nvcV+LP4v/kz+4f8m/1r/h3/7P8MAaMCKgR7BaYGagfZB0MI9QiwCUYKyQrqCpIK9AksCU8IVweGBtUFAAUXBCUDFgIPAVQAAQAAAE4AzwAwAVoBRAH3AJcAPwAJAM7/Zv/F/uz9//wD/CX7m/pG+v75sflW+dz4YPj795v3SPf+9sf2tfa89gT3ofdx+Eb5ffoz/Gr9DP7v/r3/AwCqABoCfAOVBKwFXQZ2BqQGQgexBy0IKAmYCUYJ4AhfCJ0H4wZ8BjsG9QWqBd4EhQNyArQBugAtAK4AGwERAeoArwBMANn/qf+Z/37/hf/2/o79h/zm++/6/Pmh+Wr5+fhZ+MX3HPdy9jP2yfUC9eP0JfUx9cX1Gve5+ID6DP1lAOUCewRNBlMH1AZHBpAG7QbvBsYGQgZFBTMEqANkA0wDRAQzBQAFyQTzBPEEygQOBZ0F9wUDBp4FygTbAx0DRwJTAf8A+gByANf/Uv+2/lD+EP7H/cj9+/2t/dX8Mvwm/AL8e/tG+2b76fou+pD5yfgR+F33efZ29fT0LvWd9Z32qvju+in9jQB2BF4GbQfwCC4JzAeFBpcGvAbcBd0EkwPOAdcAtQB5ABwB7QKcA1UDmwMvBKAE/gTpBYgGRgY5BpEF/wMCA2QCSAFBAAwA0v/w/kT+JP7X/Yn9z/3m/c79HP7b/Tn9OP1e/QL9OPze+7X7rfp8+bj4y/fZ9tX1h/Ta82P0lfUg93j5cPzh/xUEPAdZCKQJIQuwCpYIbwfDBxQH7AQ3A7cB8f9j/0//Of/AAIQCmwKGAioDDgRwBOME/gUUBoIFUAUWBJgCVQKlAT0Ap/+e/xn/Zv5r/sn+cv5G/s3+y/61/v3+qP5p/p/+W/6Q/dr8o/wU/LD6s/kG+cf3lPY19Rj0VvRT9dT2avkn/AX/YAOHB/gImQk7C50LnwliB/0Gtga7BL0CNQFe/6z+1/6O/l7/fwE9ArgBHwIhA6QD6wPIBF8F5QR2BJQDzAEEAZsASv9m/p7+1/5r/hP+iP4C/7T+lf4S/zb/Yv90/+X+AP8v/07+Zv34/Jf8jvsH+v/4Dfiw9sz0kvMK9Df1uPZK+SP8Gf9rA4sHVAlfCg0MjAzDCmsIDQjjB98F3wNrAoEAZv9v/1D/8f/0ARkDwAIiAzIEdQQ5BLIEDgVMBJoD3wJWASAATf/1/eD8Av17/SP9Gf3Y/d79WP2b/TT+aP7E/h3/6P6m/nj+yf3l/IP8Ofz3+qP51vi69xf2SvRn8/bzFPWl9mj5g/w+AAMFBQjqCOwKngzmCzgKCgkiCWkIqgW1A1ICdQA4AFEASwAYAqMDVgMwA8YDRQRFBFoEUgV6BUsEYgMsAmsARv/S/WH8XvzE/Iv8Mvx4/NL8evw7/Hf94f7G/uH+QP9j/sT9df3U/Er9c/2I/JP7LPro+L/3u/WD9JD04PQf9kf4w/qH/XkB9QU+CEcJnQvtDNALKQolCYkI8QbOBFcDrQGGAMQAlwDSAKwC4AO3AxUE/QQQBc0EQwVRBYUEEQQQAz0B4f+R/hv95PuC+/n7g/sf+/b76fuO+zv8tfwp/az9sf2e/Xn9fv1S/Sb80fsh/Cn7kvpH+jj55/fc9Ur0gPT29IX2yPnb/AcBEwajCHkJEQsMDEsLpwnzCGkJhAh2BjEFaQNYAeMAtgDzAM4CnAQGBTEF4gX4BTgFNQXfBc8FBgVaBDADUgGv/+L9EfyD+6L7L/vH+jn7lPsx+xD7+vso/Tj91fwe/Qz9h/xC/Nv7DvxB/A/7rPnD+K73m/Y49Wn0BfWm9WT2gPgC+5r9zgHwBcIHcQnKC+cLXgpDCdAIIAigBmwFiwT3AjcCbgLwAXQCLwSoBKwE3wW3BnwGogZBB+8G1gUQBQsEPwK9AJf/H/4H/dv8kfyw+5r7CPy6+6P7NPxe/Hn8pvxo/Av84/vb+4j7JPtT+yP72vnS+A/41PZV9Uj0LvTk9Ov1pvf0+QL9hQEABVMGlQjlCoAKdgkLCfEIdwjNBnkFngQEA14CTALiASEDogSbBBQFWQakBnAGzgZzB1kHuQYQBggFogMeAmUA2f4B/p/9AP1L/IH8yvxl/HX83Pzk/P/81/xS/CD8Afym+yn7zvqj+vP5yvj79yr3JfYM9QD0zvNa9Af1qvbS+Jf7/v9oA/sEhgeMCUQJvwhfCGEIbQhKB0wGagWlA98CqwJRApYD7ATlBG0FdgaBBmQG+gZlB0MH7gZbBnsFTwTpAmQBtf+u/kL+kf0w/Uz9IP35/Af9Dv0Z/Rr9NP3k/GL8hPxL/Jr7h/tT+3n6pvnL+Mz3pvZJ9Sv02/MO9Kz0PfYu+Az7Jv+hAT4DNAZzB/8GSwdiB34HhwfHBiwGBwWmA1wD3gLoAlgE7wTcBLYFRQbdBfIFjAawBq8GmgZGBskF2ASkAzkCzQD+/zD/QP49/j7+q/2Y/bD9cv1Z/TD9/fzR/J78ivw9/Mn7mfsw+2T6yfkH+QX4Cffc9c/0YfRL9OH0MPYG+B77Q/46ALoCEgV8BeMFSAZiBssGqQYhBnoFZQTQA4MDKgPtA9YEGQWPBSMGHAbbBRsGQgYgBkcGUAYtBs4F8AT0A6MCawGgANX/fv95/zr/8P6V/k/+Af6K/Wn9SP0M/eT8fPz3+4H7Gvuu+hv6v/lW+YD4Yfcp9jb1gPQ39OX0O/Y0+OX6Rv15/+kBXwPBA0oE9QRfBZgFlQVnBc4E6wNiAyIDUAMXBNcEYgXqBUsGHgbYBeoF1QWzBdYFAQYHBpsF5wQABMUCxAEaAaIAcgBhAB4Asv9e/wP/pP5x/jr+AP7F/Xf9Lf2y/Az8f/vY+ij6tfkl+Un4Wfdk9oz13vST9C/1WfYa+KD6BP16/6wBmAInA7kDNgTdBEcFgAVOBYwE0ANOAyUDcgMPBMAEPgW4BecFrwWoBYAFNAU/BV0FcgVXBeoEQgR6A6ACxgFQAR0B9gDcAJYAPQDj/5//gP9a/0D/6/5T/sD9M/2i/Af8ffvz+jr6jfnS+NX30/bf9SH1oPSm9FH1TvbQ9+D5LfyQ/mQAmQGhAnoDHQSUBBsFWgUGBYwERAQEBPcDQASgBPQETAV2BXQFggWBBV0FLwULBQQF+ATHBHYE5QM5A4wC3QGCAXgBVAEQAcYAeQA+AAwAz/+n/2n/DP+x/jL+sv0z/Y/88Ptw++r6O/pk+Xv4c/da9oP1EfUg9br1w/ZS+En6lfzH/ikAKwEmAtkCXQPkA3kEsARhBBsECQQGBDUEoQQXBYMFywXCBY4FTAX8BKIEbARwBHAEXQQ8BNQDMgOMAvcBhgEoAfAAzwCZAFoAGADc/7X/jf9f/yn/5v6S/iz+tP0c/Yr8DfyN+xH7fvqu+cH4o/d69pz1T/Wz9Xf2mPdX+Y372f2U/68AqgFxAvwCewMOBHAEYAQwBCAEGgQ6BKEENAWwBfgF+QXABXwFKgXEBGQEOgRKBEsEJgTiA2kDwwIKAmUBEAH/AO8A3AC8AIUAUQAjAPH/sv9g//z+iv4X/rL9NP2a/Af8kfsq+7H6BPpG+Xf4dvds9sL1rvUh9vn2S/g3+n/8lf4CAAIB6wGfAv0CZgPsAw8ExQOIA3cDhgPTA3YENAW+BQ0GNAYnBt4FfwUMBZgEXARPBDsEEgTEAz0DlgLqAVMB7QDIAMUAuACOAGIALwDo/57/V/8J/8H+df4W/qv9OP20/Bj8efv0+ln6i/mz+L33mva29Wr1sPVt9pX3Zfna+zb+8P8bAfIBpAIjA3kD3wMeBOoDkQNlA2QDnQMZBM8EjQX4BR4GPQYrBswFUwXPBHwEdwRvBEYEBgSOA+gCNAKLARIBuACGAIoAfwBTAD8AIADO/4L/VP8d/9X+i/4u/rH9Ff16/O37XfvF+hr6P/kt+Pv23/Uy9Rf1dvVh9v33Ovqx/NH+bwC2Aa4CTgOxA/4DJgT6A44DPQMYAxYDWAPwA6wEOwWBBbAF6gXtBZsFIgW+BIwEcQRJBA4ErAMZA20C0QFbAQcBtQBsADIA/v/U/73/rP+L/13/I//Q/mr+D/7B/V/95/x1/P/7ZPuZ+qz5jPg79wr2UPUf9WL1J/aO95/5Hvx6/lEAqQGlAkoDtQMjBIEEeAQQBLMDhQN0A6cDMQTgBGoFwgX1BQYG+wXABVEF3wSuBKQEjARrBD4E0wMiA2MCvAElAakATwADAKz/Vf8d/xf/LP8u/wz/1v6U/kH+6f2f/UT9w/w2/J771frn+f349fe+9r31WfV19eD11fak+BL7e/1+/xsBYgJAA8QDMASQBJwEPgTMA4gDYQNoA88DfgQlBZoF3wXqBcEFhAUuBbQEPQT/A8gDdwMmA+ACegLwAXYBFwHIAIsAaABCAPv/r/9k/xr/2f6r/ov+X/4x/gT+0/2E/RT9pPwu/Iz7qfqq+ZP4UPch9m/1ZvXT9b32TviE+vX8Hv/GAAMC8QKDA9gDGgRGBCkEzQOJA2kDawO6A2AEEwWWBfkFLQYeBtYFegUCBWwEBgTNA5MDPQPvAqoCRALFAVkBIAHnAKUAfwBgACoA1P+I/z//2P52/kj+Mv4F/uf9zv2I/Q79f/zb+/v65fm++IP3QvZX9Rr1ifWB9hb4YvoP/W7/CAEOAsYCLANXA48D3QPhA4gDLgMGA/kCNAPnA8cEcQXgBUIGawYvBrYFLAWZBA4EvQOXA04D0AJAArIBHAGrAIYAiwCHAHYAWwAbAMr/mP93/yb/ov5C/v79vP2V/ZT9iP08/dX8XPyY+4f6ZPk5+Or2zfVN9Xz1O/aZ97P5O/zA/tgANALrAjsDUgNQA00DSQMbA8wCnwKrAt8CWAMhBPEEkQULBnAGlwZ2BiwGvgUzBa0EPASwAw4DaAK+ARQBfwAqAAcAAQAdADkALAD2/7b/bP8V/7L+W/4O/rf9ev1V/TL9A/2z/Dn8kPuy+pz5R/jU9qn1GfVE9UX27vca+qf8PP9/AfQCoAPmA+UDyQOhA3cDPgPJAmICRQJQArYCjwOdBIcFIgalBt0GnwZPBusFXgXMBEYE5QNXA4kCwQHfAP//Yv8t/23/pP/F/+n/wP9z/z7/Tv90/zz/9v6Y/vD9Y/0K/ej8rPxB/Nf7Evvw+af4Pffc9dT0jvQs9az2D/kV/BX/nAGLA3gEdQQdBLoDegMvA/UCxQIeApoBiwGwAWQChwPLBN0FewYQBz0H0QaIBicGhgX2BHkECQQ7AxkCFAH0//z+nP6k/gD/X/+e/8v/tP+J/1z/Iv8I/8v+cP44/uj9df0D/aP8P/yx+xr7bvpP+dn3UfbY9AX0QPSS9ez3TPs9/4MCrgQoBl8GRAX6A0QDAAN+AksCbQLMARUBAgFSAR8CZgP2BF8GOgfMB7oHMge6Bv8FKwWYBEYEnQNuAkEBAACe/rL9iv0J/rj+OP+n/6//aP8z/zD/Yv9I/wz/1v5Q/pn94PyS/Hb83vsz+5v6kvkg+FX21fTz89vzKfV49+D6TP96Aw0GbwfoB70GxAQ7A8QCjAIfAjECIAL2AGAAwwBAAXsCDwTsBTwHjQe7B7cH4wYZBscF9wRpBKcDlQKAAa//Uf6D/cD8BP3j/a/+c/9p/13/Pf/9/lL/8/7y/gv/O/6R/cn8nvyC/J37UPvG+qf5a/iy9kX1x/MH84v08/bz+QT+JANdB2YIbAgSCEoGvAPPAQUCLAKWAegBZgFOADEAcgCeAQoDpATkBsEHGwhjCFQHmAYZBusELASJAyYDQQLa/2T+e/02/Er8C/1O/qv/mf+//5b/hv7K/t3+ov4p/9b+hv4A/qb8o/xq/IL7Xvtp+mL5SPhe9tn0UPMS80j1TPgd/BwBzQaNCswK3gkDCGoF4AIOAUMBqAG3AS0C/ACS/7v/KgCZATIDEQW1B2QINQgQCMAGAAaQBXsEKAR7A40CaQGz/tX8Dvxs+yf8Ov17/gwAGgDE/2r/eP5s/l3+Hf5a/jb+CP6l/Z78NvwB/Fv7uPrG+bH4V/dQ9VLzavJa8372YPqS/pcEwQr0DKALmgkNB/oDygDh/0QB6QFsAroC2wAx/zb/PQAnAosDCAa+CPwIZQhjBxYGvAXyBCgEJARaA88CGQHR/SP8J/sJ+6T8CP7Q/1IBtQDP/1X+2/xo/Vv+2f4S///+t/54/Zz79fot+/z6n/oo+gr5MPck9XrzWvKm8uL1rPoa/xwESQpFDpANYQomB3wElgGV/zEAlQFRAsECawFB/8P+2v8SAtADXwXcB0MJhQgQB3oF4wQOBZEEDgRSA3ICOQF+/o/7b/qv+in8Bf5N/5cAEgE4AOP+Xf3k/G7+S/+7/oH+SP6z/Xz8BftH+wf8fvve+nb5Yffa9TP0wPJc8mP01/lU/0YDxQh9DgsQTw2QCNgEtQJoALr/qQAgAXwC0QJUAI7+9f5hAQoE9gSYBr4IEAkyCCoGdwSwBDAFJQUMBFwCXgFj/y/80/kR+d/6sP1O/00AwgCiAPf/Df50/LL8qf04/sf9Jf0w/dn84/sz+7j6jfpb+jj5Q/cI9T3z+PEg8cfyG/hz/tYDpgl8D28Rew4zCYgEmgGo/4r/AQHMAasC/wKjAOT9e/0xAAsE9AU4B+MIJQkvCEAGPwQiBCUFHwbPBWkDHQFX/4v85vnF+Dv6FP66AE4BSQGlAAUA0v72/N78J/73/uX+0v0C/eH8gPwe/Jz78Pp8+pr5sfdt9YfzY/L48e3yBPcw/RQDrAlGEGgSng+ICmwFEQLP/0X/VgFzAlECHgLG/zT96/wr/z0D4AUYB38ILgjOBk8FmQOeA9oE+wVBBgUECgHO/tX7R/lI+GH58vzy/8EAuAACAEr/o/5U/Rj9av4+/9r+sf2V/Cr8+PsU/E/81/sc+xb6M/j79d3zg/Ie8oHyg/U9+wQB8AaiDeMRfxF8DfIH1QNAAbz/5gA7AgcCEgLAADj+Av3a/YkBXgUiB20IdAh2B44G5QQPBMAE2QXVBqIFgAKv//P8kfon+fr4WfvC/osAzgAWAHj/Nv8Z/kH9lf09/rL+5/11/OD7vfsF/D78qvvz+g36mPix9mj0SPIP8ZfxA/XX+sgA5gU0CykQqRGDDtcJOAaFA+wBUQEuAfMASwDk/xT/p/0W/uMASgTuBugHvAcKBwQGUwXJBIQESgVCBsMFEAM1/4H8Mfss+sT54fqC/fn/mgDZ/zz/S/9q/zj/yf5d/iT+lf2S/M37afuf+zb8KPws+9L5a/jK9tz0BPOy8QHyS/Wj+hAAswSrCTsP6BCHDa0JlgYOBE8CLgFyARIBjv8o/0v+N/1U/v8AXATwBs4H8gcfBxEGIQYGBkwFVgXGBUsFFgNP/6P83Ps5+y/7nvuQ/Mz+GgDx/4H/IP/G/wcABP9F/nr9l/wY/Jr7dPuP+7X7zPv/+kD5tPdl9pH05fIg8ibz2Pbx+2YBlgbSCj4OPw8EDfAJSAfiBBcDDwJhAU0Ar/68/dT9WP69/w0CZASDBhIIVwgnB9QFoQW6BU0FtQSABDsEhwKy/xD9dvs6+zv7SPtN/MT9Ev98/wX/FP9G/+n+cf70/bj9Vv1u/MH7hftL+wP7ofrg+ej42Pc59hD0Z/KV8o315/mi/YMBgwYVC3gNfA1gDP0KjggJBikEUQIzATkA1/4a/hf+Tv8UATUCeQRsB3kIpwfwBcwEvAR2BHwE9QTZBAcEGwKZ/8j9gvzR+5v7+/tz/Tr+C/4U/kb+w/4U/8z+yP6v/hz+Xv1H/Hz7Mvvl+m/6xPlA+Zn4GPcV9TDzPfOR9Uz4gPtA/5YDpwd/CZwKaQvxCgkKFQivBcwDEwIIAb//d/6S/mv/5ADhApMEeAX0BUUGqQXLBKUEGwWpBWoF3AQkBP0C8AF5AOn+2/3g/Ab8avs0+7f7evz5/Fj94v2y/mL/Nv+Z/iT+ZP1n/H37m/r8+Vj5sPgl+P32jfV89NX0APcz+Ub7IP5MAUkETge7CqwM7woQCEsGYQR7AroBZALfAuUBswAEAIgApwIPBWQGtQb6BqQG4gRSA24DjAROBRsFlgTgA6sCEgFQ/z3+8v1t/Uj8kvv5+2P8NPz9+6783v1X/lz+Rf4R/qz93fwc/JD7B/ux+jL6Zfm2+O33RPdl91T4nvmo+hD8X/5XAKcBUwNCBbAGygYaBnkFvwR+BH8E4QMgAwwDVAPHAvoBtQIgBI0ESQQ1BHkEYwRNBH8EcgTPBCgFfARIA3ACGgI+AdT/xv4H/iL9avxL/HH8vfwc/T39E/0d/cv9cf5R/sn9YP3I/LH7kvoz+pP6s/ox+nX5z/io+CP5/fnC+o/7F/2h/mD/FwCoAUoDzAOiA4ADmQMHBBEEFwSGBPAE8gTiAxoDNgOpA04EZATtA/0DIASdA2cDCwQvBcgFfQUJBVoESANFAhsBIACc/9n+r/3v/MP8/vw9/YH9Ef4R/sn9uP2D/Sn92fxp/NL7XfsF+5z6cPpe+jH61/mX+fj5k/oe+8n7iPxC/TP+9f7G/5UA1gD5ADoB1gGyAmMDoAMNBIwE0ARbBU0FHwU4BfkEMAQ8A5MD5wOIA98DiQSOBJAE3ATyBOkEaASoA5QCTwGIAKH/Zf4i/s7+m/6u/XP9rf25/Z39ov0x/an8avyS+8z6z/pH+2b7Pvsq+x775vp2+m76Bfui++375fv++xT9Gv5c/rL+bP/9/zgAawCHAA4BOAKnAi8CfQJ9A0wEswQOBSAFCAUXBT8EAwPdAhYE7QRzBHcEOQWLBfAEmwSYBOQDCQN3Ae//AwA0AFn/wP4z/9P/dP/X/hv/gf9M/3/+m/3w/Jf8dvzu+5v73fvz+2r7nPqc+gj7QPtt+5n7DPyH/Lv8IP21/Xv+GP80/33/tf9x/2f/JgDoAC4BZAFiAYoBCAJSAnYCtwKpA8wDjQJXAuMCXAMDBLgEFgWCBScGsAWJBOYDmQPkAgcCQwFcAP7/KAAZALH/sP/w/6P/gP+S/17/+v7D/pP+uv0k/ff8jPxS/GX8I/yI+1T7pPtv+5T7xPwV/Q792P2d/fD8l/32/Ub+yv7Z/p3+ff5o/hP/2v/K/8sAlAGZAaQBDQKVAiYCHgJOApwBlQFgAt4CPANmA8IDuASmBKMDRANYA/wCJAInAhcCmgH+AYwCNAJ5AXABCgHpABsBqwCwAHIAG/+U/uz+4v3m/b39Ufyj/L/8Q/xC/Vj9Kf2v/Qv8jf0p/+79J/8q/wn+O/08/WP+tv1+/TT/eP6x/D7/2gDs/lUAnQECAUIBwP8BAIUB7QC4AdYB5wDKASkCSgGSAekCBgEOANYAhf+8AHkCDwL2AQ0DsAIKAgQDogI3A4gC8gCtAJ7/SQBYAHz/nf8KAFr/fv7L/i/+5f7U/sn9O/4i/uP+yv7M/nD/pwB9/9D9CAB0/tv9gwB0/gX9I/9N/nj+7v3K/WAAFf43/pT/Tv9S/7b+WQAz/+H/XAOGAZgAmgJDA5wAuwC6ASb/qgBiAW7/SP9vAWAC2wB8APwC0QJDAEEDCAJf/0ACKgKnAFD/6v/UAXz+gv5mApj/4f0TAbT/d/6YACkAav+O/sAAXQA4/CEAFQLR/dH9kP8j/yj9nf4nAA39H/6RAET/xP6AADwBbv9q/9f/w/1L/4f/7P7bAOn+NQDDAP7+CgDW/jL/3v5x/vkAMQDv/2YBHQETAZIBzQE9Az0CbQBKA3ECwf5UAskASPwlAhoALv0RAWz+rP8xATT+FAGpAGr+ygH6/7X+cAIHAg4BMwE8ASoBSf4E/4z/1vwH/wr/Tv0ZAB8ACP4qACoAuP7AAMH/X/4kACgAfv9mAeX/bwFYA2X9iwAsA4H9QADaAHr8wP9GAKT+S//c/oQAtAA//zUBPALf/10AVgBeAGkAj/9VARX/K/8AAJj9hwDnAA0A+f9b/+IAav/p/+cB6f83AakCj/4m/1wCYf9d/pv/rf+tACL+ov/sANz+9gBG/2v+CwD//gIA4v9k/l0BlQCr/zICiQBhAfUBOv9PAPr/y/7wAMb/BP5qALcBYP7A/tYBzf9bADYCzgDm/4f/xAA+AOH+u/8QALP/pv9i/xf/r/8qAHn/T/6s/+z/aP8H/zkA8/8e/tUBQgA5/9oBpv+x/z4Aqv9Q/9L/AQBr/pYAkP+E/icB6f3v/+MAL/3y/woCTP8YAPYB1P8IAjcASQB+AC/+w/8p/ywAJQHw/zcAUQJvAFMB0gKN/0QCcANr/pYBjAKR/SUBLQID/rUCuAB5/I0Div8f/moCo/3y/uj/Kvyn/4T/V/2eAOr/W/+c/gX+QAGX/lv+KgEr/e79aQFi/5f+PwBw/7v9rf/nAFP/3f8qAKgAKAGV/zUAEAJBAEUDEQDc/GQC8v8TAAIB6AAwAjACkwGrAusAawGjBLUAd/+TAeYAm//c/1cAq/7j/akAJv4q/E3/wACd/Q/9rv9R/oj+VwDh/eX+6AFk/gsATQG3/pcAmwAE/q3/x/+6/dn+uP4pAHP+5vzvAMP+6P2/AK7/av6RAboB+P6BAdkA1wAyArgBy/5IADYCIAHWAfX/gAOrAIv/XwPQAFYBLwLHAh4B2wB/BAACEgBhAzUCBf8PAKkAe/5I/8wAm/4Q/rAA2P7w/Hf/lv7K/V7/uP2S/eH/k/60/o8Ak/60/lj/9P1tAF//Tv1MAKn+2f3+/nr91f7k/pb+QP5r/cP/J//N/moA0f/CAKEAngCKAYIAQwLsAML/xwNbAZgApAJ7AV8BvQHTAQoCXQJqAUICrAB5ATkC+/5gAdgAav74AJ//YP6ZAVP/2P9gAeL9XQBfAAT+FgGS/9j+NABl/pn/2P8y/sX9fP+B/lr+AQD+/br/nAA1/q//rP/+/u8A6P24/vL/tP2C/87/zP4c/zEAeP81/9r/3ADSAID/TQFMArT/zgFWAk7/xQI4A9cAmgJjAoQCNgKNALUDLgEI/rYBeABz/kb/pwBT/9r9OwGoALv9c/+xAG////4ZALb/l/6ZAJ//Hf5Z/y7/mP4d/az/bf+F/Ob+Yv93/rv+v/9q/0/+ef8LABr/hP8oAAABcv/8/S4B4AC2/Yb/wgBD/qT/WADw/l//OQCoALn/l/+fAoIC9/+kAokC2wEtA00B+wGWAjUA8AEtAmj/WgEJApEAWgCBAIABpwGv/w8AEAJQ/8z+yQHBAHv/Tv/Q/8z/Mf45AFQAZP0T//3+7fxE/4b/4v1n/uz+ev47/rr+9/5F/wn/aACD/9j9VgDlAM/+2f75/yD/z/8BAAz/LP+s/5wA4f6P/hsBKQGmAOn/0v8vAdEAEAAzAKEANAC9AHgAUv+3AWMBZgAYAvEAOgHmAZ0AWgIwAqn/jAG4ASoALgHcAMD/kQBJAb7+Ev+CAEz/6f4W/sj+i/8w/tP+Rf9G/mf/T/+T/9n/Vv83APX+4/4BAG//w/7H/xwAef5r/z0Aqf+q/xcAdAA5//z/uwFSADr/GgFqAc7/XwBWAK8ADQHU/9n/AAAyAHUAaAD1/2IApgDg/4YAbQD1/2AAlQCxANT//v+JAOP/IwH1AP3+DgCxAbcASgA1AGf/KQALAKz/AP8C//UAtQC2/rH/cwBR/8r/gP8E/4b/i/85/yD/Y//W/5T/j/+X/83/hwDF/47/jgHcAHH/DgHsADX/6P//AMv+J/+iACP///6u//j/RAAvAO//GwG/AK7/pwFsAFf/fQGOAK7/hQAWAFD/wQCcAIf/GQABAMn/7v5G/3QAUP+t/oX/E/+s/hoAPgBY/xQA3wC8AAoAEQCjAI8Anf/a//j/Kf8TALn/JP8c/+T+ogD//wD/XgGrAGL/ZQHUAI3/EAHbAGgAtgCR/18A6gB4/yMA9QC2/4EAVwF3/wEAcwFzAAQAQwBMADMAc/9FAAQB5/8RAOAAcv+x/+MAy/9b/2T/yP/W/hP+Y/9i/17+K//Q/7n+nf+BAO/+s/6KAGIA2v4d/4X/Wv9s/5H/fv9e/yUAhwBH/2j/BgH2APb/LgBvAHoAIAGoAPL/aQAxAeYATwC/APcA6AAZATMBEwFkAcMBawHtAMoApAF5AXoA9gARAUMAJQBpADAA2//k/5H/jP7h/tH+6P3Z/d/9Ef6V/cn9I/46/vj+7P5F/tX9Uf51/kr9z/wz/S39pfwE/U/+nP6H/2YA//8+AakCfAJXAx0EFARpBAIEPwS+BG8EogQMBA0DfQNiA30CbwKJAl8C0QEbARMBqgAdAOD/Of/E/sH+VP7U/fz99v20/Y39bf2D/V79yPxU/An8iPvq+mv6LPqy+kX6p/kc+9r7Ifz0/Jb9B//u/1oARAFCAjQDbQNnA/wDGgVuBQEFCwVGBSIFcAV1BWQEVwSyBBoE6AKHAnoCqwFNAccAPAA/AAsAof/K/+v/3//y/43/fv+O/y7/+v7N/h3+gP3y/Pb7R/vh+pj5gPg0+N/41Pjl96/4vPkk+1z8Vv0y/5MAEwJyA/YD7ARABqwGTQZSBpMGIgbFBeoFEQX7A/YDsQMmA7cCVQKGAd8AMAGWALj/IgA7AAgAUwAvABsAXQBQABIA7P8BAJT/A/+1/hP+Qv1L/GH7afr0+Jf3mfa69qH3Nfeq9lD4wPo//M/9yP+OASYDKwTXBLwFqAbSBjgG+gXkBdcF3gXLBW8FwQQKBJ4DaAOSAhwCoAFJAPr/NQCl/4j/IQBCAEYAwADjALcABgEKAaIAkABpAKn/xv4h/jL98vu5+mb53vda9nH1CPbl9sT2gfdH+TL7tv3L//sAmAKNBKAFlQU5BgkHlAZWBt8FLgUcBV8FSAVCBJQDSQPvAoYCIwK7ASEB8gC3AHcAnAC5AM4AjwDEABIBkQCHAMcAqgBfAAcAfP/Q/lD+T/33+876afkQ+HD2xfRE9c/27fZ592f5nvuI/VX/vgCxAegDKQWnBL4EiAXwBUgFwQSCBGsE3wT7BGoEMQRqBC8EmQNPA14D4gInAvMBwwGgAWgB0wB1AJEArABbACYAvADNADoA5f9Z/8X+Rf5C/b77hvpd+eL3Sfat9Ib0EvYA98r3ifkA/DL+Pv+WAAUC4gOEBQ0FmgToBEQF0gSlA4QDoAO2A6gDPwNrAwcEXgTSA2gDpwOJA+UCZgJyAqoCdwKnATUBYwF3ARABnQDGAOsAiACe/+P+df6y/Zv86PqT+bH4Kfdj9fPz4/TT9l33tvhR+5D9ff6o/04BkwIqBAcFOARdAyEE1wPXAvAC+wIaAysDXAM/A5MDnASqBC8EQwSZBBMEPQNMAzoDzwKaAhQCtwGbAU4B0wCqANIAigD//23/Df+L/mn9cPxY++z5vvhM96/1tfS49Qj3qPey+Tr8vv1G/p3/iQHzAlIElASNAxkDTAOKAvEBagKwAmACOgKPAusCyAOiBGIERASTBDUEcwMkA1cDSgN4As0BmQGVAaoBYAH0AN0AyQACAFT/Mf/u/lr+Wf0b/Nj6vPlk+PT2uvUk9Wn29vcV+Rz7aP2B/u/+twBZAmQDWAQrBFMDZQIAAmkBOQEgAoECXwJTAtQC1AOABD4FVgURBR4FIwRSA2UDfgMjA10CuwEnATsBawEjAUIBNAHMAB0Amf+Y/zj/hP7H/dT8ffs4+u74Y/fL9S31FfZg91v5WfvV/Mr9ZP74/04BnQLNA74D7QK6ATUB2gBkAaoCmwKWAuEC8wKEA4QEGAUlBTMFpATXA1IDZQO2A2MD3wIDAloBPgEmAUEBUAEUAYIA0/9B//3+zf4X/i39LPzb+mb5Dvj09pb1YfXv9lv4M/pW/Kv9Lv4F/5YAcQF0AkgDEQMXAgABjgB6AFQBSgK8AtkC6gJiA0sEdgUKBkIGBgYdBTUEzQPWA8cDqQMAA9oBDwG4ALkA8AArAQsBbQD5/73/bf8y/77+5P1+/B/7xvkh+BP3//WV9dH2lvh/+hP8S/3a/XT+5f+wAI4BxgLPAp8BZQABAFkAlQHFAjoDRAMfAzUDhQNFBC0FnQVfBVoEZAPeAt4CHQMqA/wCFQJSAfoAyQAtAaIBwAFNAc0AUgCU/xr/kv7d/Zr8KfvH+Tb4EPf29Tb21/ee+Uj7HvwF/XL9Hv5j/0YA8QHLAl8CAwG7/8P/YQAQAhQDjgPyA7ED1AM+BHIFWQaHBikG7AQ7BK4DcQOCAzADwQKsAb8AbwBtALgA6gAEAbAAVwAXAI7/QP/4/hr+yPxb+5v5LPgB9+n1S/bL91j5lPqh+1/8fPx1/bD+s/9lAUUC0AG5APv/6f/GAFgCSQP6AxwEAwQlBIEEowVhBpoGIwYDBfcDQAMdAxAD3AJGAnYBtQBHAGYAtQAnAWEBQQHsADMAhP/t/m7+yf2T/A37RPmR9w72fPVg9gv43vkk+w/8Yvx//FD9YP71/6QBbAK7AWIA7//9/+oAoAIIBOUEOAUhBekEegWFBjEHPgeMBokFRQRDA9YCrgKOAhoCQQF1AA8ADABkAPIAYwFdAeAA7//0/lf+4v03/Ub85vo/+bL3H/YP9nD3Ffm0+t77gfwi/Cr85/zs/cH/DQFdAY4Am/9A/5T/KQG/AjcEMgVfBVQFKQW+BX0GzQbBBjAGMgXxA0gD4QKYAnkCDQKLAboAPgAyAFQA7gBBAVAB0gDc//3+HP56/ar8m/tQ+qj43fbd9XX20vdv+cj6yfsE/Ln7IvzY/Fj+AgDjAOAABgC6/7z/VgDhAYEDvQSEBe4FrAXoBYwG8wZWBzsH2wbeBZAEqwMBA5ICPgLxAXEB0gBfAEEAgwDtAD0B/gBQAFf/Qf56/bf8xvuP+gT5OPe69c/1FPfC+Ef6j/tg/Az8UPwH/Sz+zv/IANkAzf9y/yj/O/9xANMBpwPMBIMFoAWvBV0G3wY6BzoHZQfPBlIFHARJA5QCSwJCArIBQQHjAHQAZwC9ADsBTAG8ANP/q/6V/ZP8fPtm+gP5TPfg9dz1FffT+K/6Hvzo/OP84/xB/U/+vP+kALcAy/8i/7X+0/7Q/zoBMwObBC4FYQVjBcYFUAZ6BuEGQgfNBosFOgRSA7YCfAI+AgcChwHwAHkAKACAAPAABwFeAGz/U/4X/TD8Efv3+aj49fZK9qz26Pfv+bH7Iv2V/Zr90P0n/mL/PAC2AHAAjf/u/mX+3/6U/0MBKgMNBMMEzgQtBYYF2wWwBh8HQgd9BmgFWwRWAwADZQLrAZoB4gAmAKL/xP8JAFoAOgB+/4H+JP0x/Cj7FPov+ZP3p/b69vH3gPlN+1j9k/7E/tT+H//v/6MADAHIANn/KP97/hD+bP7+/xMCLwP4A3oEjQTMBFEFQAYJB3wHDAexBZoE1gM4A6MCZgI5AlkBXQDK/8z/AgArABEAYf+W/lf95/ve+h36Pvns99P3y/i7+TP7C/3e/pD/3P/2/8//qQArAQgBBwAk/5T+ff0Y/bH9u/8fARMCIQNzAyIEbgT2BJMFXwa7BqUFvwQRBHsDigLRAbIBDQFUAJn/I/8r/37/4/9l/8T+/f2N/Df7OfqN+Zz4KfgE+SD6Svu0/Er+mf8wALAAvAD6AKIBMwFIACD/d/4t/rv9RP52/8YAbAH/AdsCOgMdBPIEdAXhBckFNQUjBMgDlwPsAnkC4QFDAWEAy/+a/5L/0P++/17/b/7F/SP9G/xq+5b6c/mj+Ef5j/rE+6z9qf+xAOsAWAGJAZgBQgJQAl0BaQDU//H+Q/4S/+P/dgBjAQoCkALiAvID8gRaBe8FwQX2BOMDXAPHAtoBlAHVANz/+P6V/kv+Bv6u/rb+Vv7I/QX9Qvw8+7f6Ivpa+Rn57/ny+tn73P23/64AYAHOAbMBkgHcAWoBiQAaAKL/EP8P/5D/SwBEASECrQIBA3YDDgR5BNAEMAUSBU8EhAPWAvIBNgGwANL/Af9u/vr9uP3i/UH+Sv4N/qD9W/0b/Yz83vs++8H6dfoR++D7p/xZ/rv/AAASAAwB5QHaARICIAKWAbUAyP9g/4n/SgDMADEBOQIcA6wDAQSpBCgFFgXVBIQEJARZA/YCiQKWAbQA6v8t/3v+U/5v/pn+p/54/hn+WP3y/NX8jvxW/Bz85vtU+w/7m/sY/CT9i/5R/33/1P+LAJEA9wB1AbAB3gE8AZ0AkgA3AVIBbAFLAuoC6QLjAn4DrwPWA+4D+AP6A5wDPwODAu8BTgGBAIX/7/7p/oL+Vf5Q/hT+l/3u/ID8SPwz/Pn7yfva+7v7V/so+5L7wvvb+xX9Zv7Q/rj/BwEZAb4AUwFAAs0BXgH2AeIBfAFAAbMBCQIBAk4CwAJ/A+ADcAS8BMME5gQ/BJEDOQP/AskBkwBBAJH/0P5S/nb+PP63/YX93/yG/H78TfzN+737Nvz7+777Nvzi/Jf8M/z4/OX9hv4O/yUASAGEAUkBZwGhAdgBTwIQAvABXwJSAskB1QHTAnIDpwPGAyMEcgQcBAIEFQQIBL8DGAMDAhgB1gA9AHb/Qv89/3j+dv0X/Zz8O/xn/Gn8+vvN++H7iftV+8f7ffxX/IL7jfsw/KL8Gv2V/nwAqgH7AXIBSAGqAeoB3AEeArQCaAKuAU4BtAFsAs8C/QJHA9MDegMaA8QDUwT6AwQDlgL6ATYB5ACXADcArv8X//j9/fxn/BH8Mfws/Bv85/vs+8/7ZvvV++f8Kf07/BT84fwa/Rj9iv5cAXAC5wHuAYgBuwDeALkBJwL4AvMDEwOmAaoBegKLAg8D8QTgBRQFRgR1BDwEkAOJA14DmwL0AXMBaADI/wIAi/9F/kj9hfyL+z37qvuC+xf7JPvC+gb6svp6/Kv8gPuS+y38zfur+4X9HQChAVUC4QEAAf8ATwHYAKcAiQKdA3AC3gGjAuYCYgJbA6oEXgX1BYYF0AQ+BCcEaQNMAmcCUQJmATcAOgBxAHz/nf4V/gz9Zfsd+377wfoz+p/6C/tO+j367vv5/Ev8Lvsw+xH8mvyQ/QYAUgIXA28CBAE+AJIAVQHyAQEDHwQoBAMDOgLgAsADjgQ2BekFAQZKBcME6AOPA9cDpgOjArgBcgHKABoA2f/Z/0X/Bv69/FP7lvqC+lj6Ivo1+in6PflI+ej6r/sY+yn7i/wS/QP9hP7hADoCWAI9AicBAAByABcBhAFqAhgEfARKAxEDZQO9AzwEDgWlBYoFTQXlBF0ExgMGBO4DkAKvAVcBkAB1/1n/nv/g/pP9PPw7+5n6Rfr/+bn5APoY+nL5mvnu+k77j/pY+2D9z/2U/YT/FgI7AtMBQAKdATgAGwAHAR8BEAI9BNYE4AOFA+0DegNbA3kEgQXJBXkFQAXbBFQE+wMoAx4CiQELAdb/2v4h/yL/5f2R/NP76fr1+a35rPmt+TL6dvq0+X75J/qW+r36lfut/cr/+gCuAYgCFgODAj0BlgDMAP4A+gB0AWICKwN1A4kD8QN8BDwFnQWEBcwF6gVyBQAFPgVbBVcEDgMCAqoAG/9e/mn+VP4W/qX9r/xZ+5z6cvoY+qX5V/kC+az4O/lC+lf6l/pw/Nz9pP2v/sUBwQMXA70CSwOSARL/6P4OAIUAagFLA/4DZQNEA6kDVAPuA/QFugaDBmwGMAbxBMkD9gNfA9sBAwFAAFr+5/x7/eb9If2K/Dv8F/uE+cv4hvgi+Er4Dfml+QL6VvoC+4X8af7t/18BxQKrA24DWgKoAZcA0f8PABEAdwB3AV0C0QJGA9QDCgR6BHgFuwb/BpkGhwYhBm8FfAQDBNsDrwJAAdz/h/6P/Rv9C/3+/C39l/x7+8L6GPot+cv3l/c1+Kr46/lE+7H8gv76/48ADAFTAkUDZwMFA38CiQHk/9/+1/5M/0sAEwKwAxoEYQTIBGoE8APWBDMG+AWTBQUGiwXfA5ACUALJAXoAgP/C/o/9rPxt/BT8qfty+7v6fvkZ+eb47fdo9+730PhD+WT6sfzB/lUA3gFcA9wDFQRYBKcDNwK/AIr/I/7O/dD+CABkAfUCYgSJBGYEpgS3BIkFTwZiBlgGEAZUBfMDmwIUAuMBFQH1/yn/Rf5V/Zb8Qvww/Pf7hvt++or5FPld+GD3N/dX+MX58fqr/H7/TQKoAy4EOQWhBbsEcAMPAuMAr/+y/gT+Cf7K/8MBagIsA8wEUwWBBEAEfQWRBvAFpwXHBc4EOQPxAVoB0wBDAF3/Mf5w/dX8Bfx9+8z7LPyl+5D65fk2+Yz3TvZK9gT33vj4+pL9PQEUBEkFzwVIBoYF9AP0Ao4BCwAv/4H+n/2+/WX/BwEpAlwDNAX8BVMFHQVVBdkFhQUMBSAFVARUA/gBowD0/3v/iP4K/cD88vxj/Lf78fuD/Pb75/rj+TP5Gvh49on13fUn+LD6S/1AAfkEcgcWCLMHEgepBakDewH+/xT/R/6F/W79m/5bAE0ChAP7BJgGbQZ8Bc0EfQXwBRYF7QTFBNQDxgERAIf/8P5j/r/9if1Y/eP8a/wI/En8SPy1+6r61vkW+S/3WvWS9Nb1nfip+3IA8wTGB6gJcgmqB8YFNgRFAgkAz/5r/in9PfxI/an+jQCjAnQEdAY+B90GzgVWBesFaAVxBCMElAMsAmMAaf8O/27+qv0x/eT8n/yJ/KP8zvwJ/bP8T/vP+b34e/cF9pP0qPRR92z68v2hAv4G7QkwCrUIIAcIBa8CrgAc/1P+kv1y/Gj8LP2u/lwBfQMzBSUH1AfzBqYFOAWCBQUFUwQiBHsD8AFdACT/MP6v/Rf9kfxu/Gj8qvzV/Df9tP0z/db7Zvoy+cz3R/bk9MT0Uvew+mH+GwNfByoKkgonCcoGXQTRAi0BcP95/iP+OP1I/Ab99f55AeED2wV3BwUIGgeSBdMEBQVeBdkEGgSiAzwCVgCr/vn93f3r/Ev8Uvw9/FP8mPxK/dj93P05/bv7KfqS+An3tPWi9HH1Tfja+9r/vwMZBxcJxwiBB5cFZgMHApUAB//1/V/9BP29/EX9a/9OAlwEEQacB+gH6AbVBVoFXgVfBacE4wPvAokBFwCR/ib+Gf5a/cj8sPy1/Kb8Bv3Z/XH+Uf6W/TT8Nfof+Dj2jvSG8031QPkX/UABNQUdCAAJ5gcsBnYEXgNNAugAo/+q/oX9BvzZ+z79qf+kAgYFAQc/CNMHXgZaBVEFngViBbwEQgQRAxQBZ/9m/hr+uv0r/eT8u/zH/P38iv1A/o/+Of4J/XP7lPmX9/r1efQw9J72PPrE/VgB/wS4B8gHjwZ6BV0ELwPaAXkADf8C/tX82fsn/MX9EwCLAi8FPQczCO0HKgdpBpsFwAXhBSMFNwTbAisBQP/p/bb9nf2V/Zr9nP2f/bn9If5K/n7+bv54/fn7Mfp9+Mj2APUK9Gn1b/iw+1f/EQM1BicH8gUCBXkEZQNUAqwBCwGa/9v94fxI/Nb8DP/yAX4EfgYGCOIHmAa6BVkFCQVLBWUFNwSzAksBrv/t/Yn9XP6I/j/+U/55/uP9o/0U/kD+Lv67/ZT81Po3+bf32/Vn9Cf1yvd9+sv9jQFsBL0FgQXIBPMDNwMCA64CygHTAJL//f3O/MP8K/5wACYDfAUqB+sHRwc0BoAFeAWGBWgFcwXSBCoDTAHC/7f+WP55/tn+6f6q/oT+BP6M/Xr9e/14/SL9H/yD+tv4M/dl9Vr0tPV9+Aj7H/5lAV4DpQM4Ay4D6QLNAjMD/gIrAiMBl/8G/m39Dv6L/7EBPQTGBWkGgwbUBSQF2wRSBZgFWwVHBVoEeALHALT/+f7I/jv/jv9T/wf/o/7a/W39jv23/Yv9Nv2H/AX7QfnU92v2OvXN9eb3V/od/db/2wF6AiwCGAIPAmkCSwORAwMDCAKmADf/UP63/h0A7wEiBHwFBwYWBocF8ASuBDkFyAW1BXUFtAQ+A7cBiwDx//D/CgAGAMr/lv8z/4H+LP4u/jP+9v2D/dv8xvt2+ij5rPdO9jb2Vvf5+C371v0EAOYA0gDSAAMBSgEAApwCmAIqAkIBAAA9/1P/FQBaAQEDbgQlBUQF1QQqBLcD2gMyBFUEVwQDBCwDJQJVAb0AdgBeAFgASQAzACgAtP88/xH/2v6Y/jb+v/0G/eD7pfpP+cr3p/ax9q/3G/lC+3n92P45/zz/dv+5/3gAggEbAjgCBgJfAacAdwDeAK4BwAI9BD0FmgWmBRYFeQQSBCcEbQSTBJsELARNA2ICnQEQAd4AuACpALUAzgDSAK4AiwBAANz/Sf+y/h/+Wf1H/AP70/l7+Cb3nfYq9174/Pm/+wn9e/1Q/T/9Yv0C/vX+1/+MAPoAIwENATUBlgFQAmsDuQTJBUoGdgYKBmgF+gTsBBsFKAX7BGgElwOpAtoBNgHkAKoAhwDDAB4BjQHJAcsBawHMAC0Aa/+q/u39E/3m+6L6Zfkd+CT33vZI9yr4jPnA+kD7OfsY+x37e/t9/LL9zP62/00AfQCmACkB+gEwA70EXAaLBy0ILAiZB+UGZAYwBggGwwUqBT8EJgMSAkwB2AC/ANAAGQF8Ac0B9gHWAWoBvgAaAIT/9/5p/sf94vy5+4/6YvlD+Gf3Gvdi9yf4Ofn6+Sr66vmU+YH59vkI+0X8df1e/gf/pf9/AL8BUgNMBTcH1AjiCUcK/AktCV4IowcuB9MGaAamBaEEkwORAvMBoAGLAWEBRwEmAeYAsgB0AC0Avv9f//b+hv4O/oj96fw4/Iz7ufrM+cz4/PeV98T3Xvjl+Pz4lvgA+K33+/f7+HH6FPyo/f3+OQB5AdkCTATaBV0HhghUCaYJkAkQCYAI/weVB1kHHAesBtoF2QS0A6gC4AFLAcUAPADF/1f/G/8G///+4P6k/ln+Ff77/eH9rv1P/bn85Pvm+sj5hPho98/2zfYy97v3BPjp98b3Hvgo+dj65fzV/kIAOAH0AZ4CdQOUBOgFQgeSCKgJQQpsCjkKxwlBCeEIfwjoBwAHuQUvBKUCewGfAAcAjf8Z/5/+Tf5Q/or+4f4k/zv/Dv/P/on+JP6X/db85fu4+oj5UPgw92n2S/bR9rn3sPhD+Xj5h/nh+Zv6u/sX/Vn+Wv8bAOEAxgEJA5UESQbvB0sJVArSCuUKkgoLCmoJvwgTCDIHHwbFBF0D/gHcAAYAXP/X/mj+P/5a/rz+MP91/2v/EP+G/uH9Wv3q/Ir8EPx7+8b6/vlI+az4S/hK+Mb4gPkh+lr6F/qe+Wr50fnW+k783f03/0IALAEhAkkDrwRBBtIHMAk4CsAKwApZCrAJ8whFCKMH5gbZBX0EAgOlAZgA5P97/y7/5f6g/lz+Jv4J/gX+//3f/ar9Yf0h/f78+fzs/Ln8YvzX+zL7c/qw+QH5rPjO+Df5rfnS+bf5jfnE+YH6uPs8/bj++f/mAMQBqgLXA0gF2AZECE8JAAo1Cg4Klwn2CDwIgQfIBuoF5AS3A5cClwHQACYAd/+2/vX9af0g/Tv9k/0G/l3+ff5l/in+C/4W/kD+Qv73/VH9cvyZ+9j6P/rQ+br58flN+or6c/oc+tf5Ffrj+i38of0D/zQAQAFIAlYDhgS+BesG3Qd/CNcI5QjACGAI0QckB3UGywUOBScE/wLCAZgAsP8K/5D+L/7Y/aj9of3N/RL+Xv6P/pD+Zv4p/g7+Hv5S/mv+PP64/fn8M/xx+8L6H/ql+XD5efmq+c357fkt+sf6xPsG/VH+b/9SAAMBsQF9AoADqQTTBdkGlgcNCEIIRAgbCMcHSAemBuUF+wT0A9ACtAHAAAQAd//x/l/+yv1g/UX9g/0C/pD+Bv9J/2P/Yv9p/4L/oP+b/zf/b/5e/VT8hPvy+o76Pvoh+kP6lfrh+vn67vr5+lv7Ivw4/V/+av9HAAYBzwGyArgDxAS/BYgGGgd6B6IHogdqBwcHggbuBVIFnQTDA7MChwFnAHz/0v5j/hr+8v36/Tj+qv4w/6z/9f///9v/o/94/1r/Mv/i/l7+vP0X/Yj8Bvx8++H6Rvre+cH56fkj+kj6XvqK+gD7xvvC/Mb9sf59/0MAKQE8AnUDqQS5BZAGKQeQB8EHyAeJBw0HTAZqBYMEqQPkAiICcAHLAE8A7P+h/2X/Q/9G/23/q//g//r/8P/W/7X/ov+W/4n/W//0/lz+pP0I/Yj8HvyY++/6PvrD+az54Pkw+lf6Yfpo+rT6Yfth/Ij9l/59/zkAAQHsAQ4DRARmBUwG5AY/B1sHTgcRB74GWQbsBXQF2gQbBDkDWgKPAekAWADO/1H/8/7Q/un+Kv9w/6b/vv/I/8v/0//W/8//p/9P/8H+Cv5U/a/8KvyU++T6G/pt+RT5F/lb+Zb5w/nt+VH6BvsE/CH9Jv4M/9H/sACwAeICFAQqBQkGrAYuB4oH2QfrB8wHWAetBtsF8AQMBB0DTgKMAfgAbQDp/2//DP/w/hL/ev/g/ykAMgAJAM//pP+m/7D/rv9j/8/++f0Y/WH83fuC+w/7gfrg+Xb5Wflt+Yr5g/mF+bj5XfpW+3n8if1q/jD/9//2ACICeQO4BMwFlwYfB3sHqAfDB64HfgccB5kG6gUQBR4EGQM3AnUB6wBzAAgApP9O/y//O/+C/8//FwA4ADgAIgD8/9X/nv9c/+3+Yf6+/SP9kvz/+1X7ffqa+eT4l/iv+AD5R/lk+Xn5u/ld+lX7h/y3/cn+xP/DAOABEANFBE8FJgbEBj4HnQfgB/sHzgdZB6MG1QUKBVIEsAMVA4AC5gFLAbIALADI/5z/rv/p/zEAXQBdACsA3/+U/1//QP8g/+L+bf7N/Rr9dvzp+2X7yPoH+jz5m/hX+Gz4s/jv+Az5KfmB+UD6ZvvL/Cz+Yv9uAG8BiQK8A/QEBAbVBmIHxAcUCFkIkAiXCF4I1gcQByMGJgU0BFgDmgLqAUgBpgANAIr/L/8M/xT/N/9c/3H/bf9c/0T/Kv8E/8T+Wv7K/SL9f/zx+3L77/pU+pn53/hN+An4Gfhc+Kr46vgu+aH5Z/qA+8L8/P0T/wsACQEyAogD8wREBlEHEAiKCN8IGwlDCUcJDAmECLQHtAahBZEEjwOcArMB1gAFAFb/2P6X/o3+n/63/sL+xP7M/uP+Af8S//7+vP5R/s/9Rv3D/Er83/t2+wL7e/rg+Ur53Pi2+NX4HPlo+af57/lt+kj7ffzk/UD/cAB4AXkClQPNBAUGDgfSB08IngjbCAkJGwnwCHcItwfMBs4F0QTXA9oC1wHSAOH/H/+b/lf+QP5D/kn+Uf5h/nz+n/67/r/+p/53/jj+9v2w/WD9/fyF/P37cvvw+nf6Cfqn+Wb5Uflx+bb5D/pu+t/6evtR/Fz9ev6F/28ARAEjAiwDXgSXBawGewcBCEwIdwiMCIAIPgi5B/IGAwYDBQMEAgP5AeoA5f8D/1X+4f2k/Zf9rv3g/SL+af6x/vX+Kv9J/0D/DP+7/lz+Cv7I/Zn9bf0z/ef8hvwT/I77CvuQ+j36HPoo+lf6lfrs+mj7KPwl/U/+ff+SAIsBcwJmA1sETAURBqUGAgc9B2sHiQeHB0cHyQYOBjwFYQSPA70C4wEDASMAYP/B/k7+9/23/Y79iP2w/QX+f/4F/4T/6P8pAEMAOAAOAMX/Yf/n/mL+3P1b/eP8a/zs+2L74fp7+kv6U/p/+rz6+PpG+737dfxg/Vz+Tv8mAPkA0gHEArcDlgRFBcEFIQZpBqcGvgajBkUGswUEBUkEjgPCAuMB8gAMAEr/w/57/mD+Zv6B/rf+Af9g/8n/KAB5ALAAzQDMAK8AeQAsAND/Zv/0/n3+AP5+/fb8ZfzL+yf7kPoa+t/54PkR+lj6qfoP+5z7XPw8/R7+6f6i/2MATgFjAoADdQQaBXkFsAXhBRcGPwY6BvQFcgXTBDkEqQMhA4wC5wE8AZ0AGwC6/3P/Sv9G/3H/0/9aAOQATwGMAZkBggFXARcBvwBJAML/N/+2/kL+1P1e/dv8Svyw+xH7dPrp+X75Pvkp+UD5ffnp+Yn6XftL/EL9Kv4E/+D/yADAAbECjgNEBNsEWwXSBTgGewaIBlQG8AVvBeoEXATJAxoDYQKqARABpwBYAB4A3P+l/4T/mv/n/1sAzAAYATUBKAEHAdcAngBIANL/PP+X/vH9Wf3Q/FD81vtY+9r6X/rx+Y75RPkN+fz4Fvlv+QP6x/qq+5j8lP2d/sP/8wAkAi4DCwSzBEEFywVSBtEGIQc8BxUHwAZQBtMFQwWZBNMDAQM9ApYBGAGzAF8AFQDo/+b/DgBTAJQAuwDGAMEAuQCvAJUAXQABAJD/Hf+y/kX+wP0Y/VT8lfv0+nn6Evqq+Tr52fin+LD48vhd+ef5ivpK+yf8G/0d/if/OQBTAWoCbANPBAoFpwUsBpwG+AY1B0gHNQf/BqwGOwagBd0E9gMCAyECYQHFAEMA1/+K/3H/m//+/34A7AAvAUEBLAECAcQAbgD5/27/4P5g/vj9lv0p/aD8//tW+7D6D/p1+eH4Z/ga+AL4Ifht+OT4g/lP+kD7Tvxs/Yz+sv/bAAoCLgM/BC4F/gWrBjUHkwfAB8QHpQdzBysH0QZSBqkF3wQBBCsDYQKrAf4AYgDe/4T/Y/90/7P/BgBgALMA/wA2AUoBKwHKADgAiv/i/kb+rf0G/Uf8gfvG+ij6nvkZ+ZX4HvjS98f3+/dX+M34XfkP+vD69/sQ/Sb+NP9EAGQBjQKwA70EpwV3Bi0HyAc7CH4IhwhZCAQIigfwBjAGUwVfBGgDgwK5ARUBjAAkAN//w//R//7/OwByAJ0ArwCzAJwAZQAJAIn/+P5r/vb9kP0s/bv8OPy0+zL7s/os+oz54fg8+Mf3i/eL98H3G/iu+H35j/rI+xH9UP6D/7IA4gEZAzkEQwUbBtAGZgfaBzIIWwhjCEAIBwitBzQHkAbGBecE+QMaAzwCcAGsAAIAhP83/yn/Rv+U//b/XwC6APYABgHiAJEAEgB7/9D+IP5y/cr8MPyj+yv7wPpW+uP5XfnY+GD4E/j29wD4MviD+An5xvm8+tH78fwT/jz/fQDGARMDOQQ3BQoGwgZsB/gHYgiQCIsIVQgBCJUHDQdnBpwFvwTbAwwDTwKlAQkBewAOAMT/qv+1/9f/AQArAE8AaABwAFUAGQC6/0H/uf4r/qP9GP2Q/AL8dvvw+nH6/PmF+Qr5iPgT+ML3rffU9zH4uvhq+UX6SPts/Jv90P4EADoBbwKTA5oEeQUvBssGWQfWB0EIigiqCJ4IZQgBCHQHvwbqBf4EDQQgAz0CZAGYAOb/Zv8i/yH/VP+i//b/PABtAIYAfgBRAPv/gf/p/kf+p/0O/Xj85/tc+976c/oY+sD5X/nw+If4Ofgb+DL4efji+G/5JPr/+vf7/vwJ/hb/MABZAYsCrgOyBJcFYwYhB9AHYAjGCPgI/AjZCIwIGQh1B6cGvAXEBMsD2ALxAR0BZwDb/4H/Uv9K/13/gf+x/9z/9//1/9L/kv8+/9r+bP72/YD9EP2u/Fb8Afyn+0j74Ppx+vz5g/kH+ZX4P/gW+CT4aPjh+In5Xvpc+3r8qv3i/hgAQwFiAmkDWgQwBe8FmwYyB7IHFwhbCH8IgwhlCB8IqAcBBzAGQgVCBEEDQwJOAW4Asf8i/8r+p/6y/uX+NP+T//L/PwBuAHMASwD6/4n///5r/tb9TP3P/F/89/uW+zf72Pp4+hj6s/lR+fv4v/ir+MX4C/l++Rn62Pq3+6/8vf3b/ggAPQFzAp4DsASfBWgGCweJB+cHJQhFCEsIMwj/B6gHLAeJBsEF3QTmA+cC6wH+ACwAhf8P/8r+s/7G/vf+Ov+D/7//5//w/9v/r/9q/xX/tf5R/uz9if0k/b38Uvzc+1/71/pN+sD5OfnB+GX4L/gp+Fb4tvhF+fn5zvq3+6/8sv3A/tX/7wAJAhsDIAQQBegFoAY9B7cHEghSCHMIeAhfCCEIuwclB2MGeAVvBFcDQQJBAWkAxf9g/zz/UP+R/+7/VACuAO0ABQHzALQAUwDX/0r/u/41/rz9Vv37/K38YfwO/LL7QvvE+jf6oPkN+Yb4Hvjh99z3EfiB+CP58PnZ+tn76Pz+/R7/QgBnAYgCmgOZBH0FQwbvBn4H8gdJCIMInAiPCF8IAgh5B8oG8AX8BPMD4wLgAfUANACl/03/LP81/2f/rv/6/0MAdACFAHAANADY/2L/3f5T/s79UP3V/GP88PuB+xb7rfpI+t35cvkE+Zz4SPgS+Az4Ofib+DH56PnB+q/7qvy9/dj+AQAwAVcCeAN+BG8FSgYJB7YHRAizCAAJHgkXCeEIggj/B1QHjgauBb4EywPVAvEBIgFwAOT/ev85/x//KP9S/47/zf8HACcAJwACALr/VP/W/kr+uv0q/Z/8F/yX+xv7qPo6+tT5bvkN+az4WPgW+PD3+/c4+Kj4S/kQ+vT66fvj/O/9A/8lAFEBfgKrA8UEywW4BoMHMwi6CBsJVgliCUcJAAmRCAMIRgdsBnYFagRhA1oCbgGhAPb/d/8i//f+8/4P/0b/i//L/wEAGgAUAOn/nP82/7n+M/6r/ST9p/wx/MP7YPv++p76PfrU+Wj5+PiO+Dj4/ffv9xb4dvgN+dH5uPqz+738zv3d/u7/AQEOAhoDHQQWBQQG3ganB1QI4whQCY8Jogl/CSUJlQjVB+oG3wXABJoDfAJyAYsA0P9G//D+0P7e/hb/af/L/ygAcQCWAJAAYQAMAJr/F/+I/vf9av3m/Gz8APyg+0f78vqY+jX6wvlB+bn4PPjW95v3oPfl92/4Mfki+i/7Tfxw/ZX+tv/SAOQB7gLpA9MErQV1BioHxQdMCLcIBAksCS0JAwmjCA0IRQdQBjsFEwTtAtoB5AAaAHz/FP/c/tP+9f45/5L/7/9FAH8AlQCBAD8A2f9W/8T+MP6d/RT9lfwb/Kf7NvvH+lj66/mC+SH5w/ht+Cf4+vfx9xr4fPgW+eH51Prf+/f8Ev4u/0oAYQF0AoADewRhBS0G4AZ5B/oHaAi/CPkIDwn5CK4IKghuB4cGgwVxBGADYwKAAbwAHACi/1b/Nf8//3D/uv8JAEwAcQBxAEcA+f+V/yj/uf5P/uf9ef0C/YH8+ft0+/36mPpH+v75tflk+Qf5p/hW+Cr4NviE+BT52Pm8+qr7lfxz/Uz+KP8MAPwA9QHuAtsDuQSFBT4G5waABwUIcwjACOQI2QiZCCcIhAe6BtIF1ATLA8cC0gH4AEgAx/95/13/av+X/9f/HQBaAIIAiABqACcAxf9M/8v+R/7J/VH93fxp/PX7hvsc+8H6dPo0+vf5sPlg+QT5pvhY+DX4Ufi0+FX5K/oi+yD8Gv0M/vj+5f/ZANIByAKyA4kERQXrBYUGGQerBzgIuQgbCUgJMQnUCDMIWwdcBkoFMgQcAxMCHgFGAJT/FP/O/sL+7P4//6X/CABUAH8AfgBUAAkAoP8m/6L+GP6R/Q/9mPwt/ND7gftC+w774Pqu+m76HPq2+UH5zvhy+Eb4XfjD+HT5Yvp1+5b8sv29/rn/pwCMAWwCSAMgBPAEuAV0BigH0gdrCPEIWwmcCasJfAkKCVUIaAdOBh0F4wO0Ap0BqgDj/0r/4v6r/qT+wv4B/07/n//h/wQAAgDW/4H/DP+G/v79fv0L/ab8Tvz8+6T7Sfvv+pn6R/r/+br5dvkn+dD4f/hI+D34c/j3+L/5uPrF+9X81f2//pn/cgBNATUCJQMXBAAF2QWeBk4H6gd7CPwIZQmoCb0JlwkuCYYIrQexBpwFfgRjA1cCYQGMAN//X/8P//D+AP8w/3j/xf8HADMAPgAiAOP/hP8O/4r+AP5y/eP8VfzI+0H7w/pY+gP6xfmb+X/5aPlK+Sj5B/n0+Pv4K/mP+SP62vqm+3n8S/0b/vP+1P/FAMEBwAK3A6AEdgU8BvIGnQc+CM8IRAmRCaoJggkWCWsIjQeLBnYFWwRIA0QCUgF5AL//Kv+//oL+cf6I/r3+A/9K/4T/pf+k/4D/O//d/m7++v2B/Qn9kvwb/Kb7N/vV+oX6Tfos+iH6HPoZ+gv68vnS+b35vfnl+Tv6u/pc+w38xvx+/Tb++P7N/7cAtQHAAswDzwTABZ4GaQciCMQISQmpCdYJxAlxCeAIGwgyBzQGLgUpBCcDLwJJAXcAxf86/97+sf6r/sb+8/4l/1D/aP9u/17/O/8H/8P+cP4T/q79Qv3U/Gf8Afyi+1D7A/vB+of6TPoR+tj5nvlq+T35Jvkv+Vz5tvk8+uT6oPtl/Cv97v21/ov/dAB1AYgCnQOqBKMFhAZOB/4HnAgnCZcJ3AnnCbYJQQmFCJQHgwZhBUAEJgMjAjgBZACt/xj/rf5s/ln+a/6c/tz+G/9L/2D/W/86/wb/wf51/iL+yv1t/Q39rPxR/Pf7qPte+xb7yvp/+jT65/me+WD5N/kf+ST5S/mh+SD6xfqI+1v8Lf34/b3+hf9YAEABOQI8Az4EOAUeBu8GsgdjCP4IcQm/CdIJqAlBCaII1QfmBtsFvASUA20CVwFhAJj/A/+l/nX+b/6K/rz+/P4//3n/ov+v/5z/av8d/7/+V/7v/Yv9LP3S/Hr8IPzE+2f7Cfuv+mH6H/rp+b35mvmA+W35avl++bL5D/qV+kD7Bvzd/Lf9kP5n/0QALAEhAiMDJQQeBQUGzAZ2BwcIfwjkCDIJaAlyCUQJ1wgrCEIHMQYLBeADvQKwAb4A7v9D/8L+bv5F/kn+cP6w/v3+Q/95/5D/h/9d/xn/xf5j/v79m/03/dH8avwI/KP7RPvt+qL6X/op+gD63/nC+aj5lfmP+Z35zPko+rj6dPtR/D/9LP4O/+P/swCGAWECRQMpBAQFzQV9BhQHlwcLCHIIyggOCTMJKQnnCGoIswfMBsIFpASCA2kCZgGCAMb/Mf/J/or+b/5z/pL+w/77/jH/V/9j/1D/Hf/O/mv+Af6V/S79zfxw/BT8tvtY+/r6ovpU+hn69Pnn+e35APoV+if6MvpA+mX6rfoq+9f7qPyJ/WL+LP/k/5kAVAEfAvcCzQOTBEIF1gVUBswGQwe3ByIIeQisCLAIeAgKCGkHnwa4Bb8EwQPAAsgB4QARAGD/1P5x/jz+NP5R/or+z/4R/z//Vv9S/zP///68/mr+Dv6r/UL91Pxt/BP8zPuW+3P7XftG+y/7Ffv9+ub61/rS+tj66/oR+0/7rPsr/Mr8ff06/vL+ov9GAOcAhQEmAscCZAP6A4wEFwWgBSgGrQYsB5gH6wcaCBMI1gdeB7AG2QXqBO8D9AIBAhsBSwCY/wn/pv5w/mX+ev6q/uP+G/9E/1T/R/8c/9f+gv4o/tH9g/0//f38uvx3/DT8+vvN+677mfuI+3X7Wfs4+xf7+frn+uf6/vox+4b7/fuQ/DL92P11/gT/iP8KAJUAKAHFAWgCDAOvA1ME+gSiBUgG5QZxB94HJAg6CBsIxQc8B4gGtQXKBNED1QLhAf4AMgCH/wL/pv5w/l7+aP6H/rL+3v7//hD/EP/8/tf+p/5t/i7+6/2k/Vr9DP29/HH8LPzw+737lPt1+1/7R/sx+yD7FvsU+yH7RPt/+9X7RfzJ/Fv97/1//gz/mP8pAMEAXwEEAqgCTAPtA4oEKAXKBWcG/gaGB/UHOQhHCBwItQcTB0YGWwVhBF8DYAJtAZIAz/8u/7L+Yv48/j7+Y/6b/tn+EP8z/z7/MP8P/97+pP5l/iP+4v2g/Vz9FP3N/Ij8QvwB/Mf7lftp+0H7IvsH++/63frW+tz69vom+3n75vtn/PX8if0X/qP+L//E/2IAEQHPAZQCXAMiBOUEnQVLBu8GgQf8B1IIfwh2CDEIswcGBzUGSQVUBF0DbgKNAb0ABwBw//v+rP6H/oX+o/7S/gb/NP9U/2L/Xf9L/y3/B//Z/qL+X/4T/rv9XP39/KP8TvwF/Mb7kPte+zD7B/vg+sD6pfqa+qL6vPru+jr7n/sW/J38LP3A/Vf+8P6R/zkA5wCdAVQCDgPGA3wEKwXUBXYGCAeHB+UHHggnCPkHlgcCB0QGZwV4BIEDjwKoAdoAKACZ/zH/8/7e/uv+Fv9R/47/wP/e/+P/0P+o/2//MP/r/qP+Wv4O/sP9dv0o/d78l/xU/Bb83vuq+3r7Svsb++76x/qp+pz6pPrG+gr7cPvx+4f8Jv3F/Vz+7f57/w0AogA/AeUBkAJCA/gDswRtBSEGxAZRB70HAggRCOcHhgftBigGSQVYBGEDbwKLAcMAHQCd/0L/DP/1/vr+E/84/2P/if+h/63/qP+O/2v/N//4/rH+X/4M/rn9Y/0U/cj8gPxC/Ab8zPuY+1/7LPv9+tb6vfqx+q76xPrr+ir7jfsJ/KH8Sf3w/ZT+N//M/2cAAAGeAUMC6QKMAzkE5ASUBUIG3wZxB+MHKghBCCQIywdBB34GoAWmBKkDsALMAQEBVQDP/2X/Hv/1/uL+6P4I/x//Uv9s/3b/gP9n/0f/I//j/qb+Y/4H/rz9Zf0K/br8XPwD/Lf7Xvsa+9r6mvpz+lD6Nvo5+j/6XPqZ+tv6TPvU+2D8C/2o/Tn+1P5U/+P/fwAbAdgBlgJbAywE8QSxBXIGFQe3BywIgAioCJEIRQjDBw8HQQZWBVoEeAN4AqgB5gA0ALv/XP8K/wz/7f4D/0P/Pv+N/5j/n/+2/5f/gv9q/yD/6P6V/ib+zP1V/eD8bPz1+377H/vB+nT6P/oL+vD57Pni+Qj6Lvpj+rv6D/uT+xT8pvxO/eD9gP4h/8L/cwAkAeEBoQJdAycE5ASbBVAG7gaBB+8HQAhkCFkIGAibB/YGKAZDBUoEVQNrAo4B0QA0ALX/Xf8r/xr/FP8z/2z/n//T//z/EAASAAwA6v+z/2T/F/+4/j3+yf1m/b78Z/w3/HX7D/vb+ob6Rvr/+cT5mfl0+Yj5oPmx+Rv62Ppe+8T7q/y8/Y/+Jv+7/4UALAHuAc8CQAPaA7UEMQW9BX0GMQegB7IH4QcACNYHngcCBw8GUwWLBJMDvQLjAf8ARgDF/3n/U/9N/1//Uf94/+v/IwBJAH0AawBCADAA//+c/z7/0v4y/qr9R/2i/AX8qPsv+8H6ePom+tj5mPmA+XH5XPl8+ZD5qfno+VX6dvtd/D/8pfws/q3/XQCoAEAB+gHHAs0DLgRRBCoFmAW5Ba8GnAfEB5gHqQf7B+kHoQf9BqoF1QRiBFQDYQKoAbwAEADG/7b/tP+t/8//1P/8/6YA0ACYAMcAtQBjADkA3P9c/8v+Nf6d/fn8mPwF/B77zPqo+i362/mt+X/5W/lW+Xv5h/mW+c/58fk7+pT6aPvW/B39w/zF/bD/+gArAUsB/AG0AroDpgSNBMEElAWaBeAFDQd1BxkHzQYMB1IHCQfBBtMFaATwA24DYgLGARoBIwCn/8P/9//5/xAAOAAtAIwANAEQAc0A4ABvAPz/y/9Q/5j+3/1t/dz8Pvzx+037ovqV+nv6IPrj+cv5n/mA+br56vnc+Qf6SfqM+r/6qPtj/dr9Wf0d/isAvQH7AQYCmwJOAy0EFgUZBRQF2gXOBcAF3QaEBysHegZ3BsYGbAYrBk0FrgMUA8gC8AFeAeIA//9i/5z/9P/o//P/KQATAD4A7wDtAHgAbQAUAIH/M//Z/ib+XP3w/ID89/uv+yv7gfpy+pf6Ovr1+Rr6DPrk+Q36TvpY+lD6jvqJ+o36Q/wO/qT9V/39/hkBDwI+Ap0CEAOqA7AERgUKBZMFBwYrBYQFzAbxBmAG8QU0Bk8GJwbvBbAEcwMlA30CogF2AQoBIQDG/woATQBcAHcAdwA1AJkALQHdAK0AmwDo/0z//f60/hn+Sf2q/AL8sPuT++n6Vvpd+k36BfoR+jr6Gfru+Qv6PfpH+lf6i/o5+rv6Mv1V/o/93f2z/64BeQKOAvICWAMsBDwFTAU3BfsFvgUDBeIF5Ab7BmoG5wXoBckF1QU8BaUDzgJoAo0BKgERAV8AxP+1/+H/GgBTAKYAhgBLAOwAHwGrAK0ASQBw//X+kP4E/lj9zvwy/Hv7WvtJ+6X6SPpm+iX63/n/+Qj64Pm5+d35F/oa+mf6bPor+hn8Yv4Q/rn9E/8zAY8CpQIFA54DMQQ6BZUFVwUJBmQGbQVqBW4G7wacBuIFzwWwBYoFhAUyBP8CyQIeAogBhAFCAbIAVAA/AFcAnQADARQBewCUADwB5gCKAGAAo//d/kz+//2Q/fr8hvzD+zr7RPv6+lr6PvpR+hL6EvpX+mj6O/oo+mP6Wfpl+rL6U/pv+xv+g/7H/Yr+qABcAoECzAJlA7QDpgRpBfkELgULBkcFrQSVBV0GVgagBYIFZgXwBC8FbQTcAnoCLAJdAQsB7ACUAE8AKQAeADgAqQAnAaEAOwDkAOIAaABGALn/5v4s/rH9Uf3H/Iv8EPw/+0T7Zvv7+sn6y/qh+nT6n/rd+rP6gPqp+qH6jPrV+pf6h/sU/pf+4f2t/roAZgKXAuECgAOmA4sEdgUkBVsFKwZtBa0EeQVVBkUGggWEBZ4F/wQxBaoEEAOuAm0CjAEoAR4BDgHLAFsAUACfAAsBZgHvAIAA9ADgAFQA+/9n/8T++f1Q/Rv9ufxi/Mz7BfsH+yL7z/qP+nz6Wvo5+l36hvpr+i36JfoD+uT55vke+hD88P10/YH9Sv9LAW8CpAI8A8kDEAT+BF8FMQUABhkG+wQ5BTIGawb3BVoFkAVkBfoEBAXOA80C1AL3AR8BKgFBASUBuABcAIkA2QA/AWAB3ADvACABiQBNAP7/Pv9r/mf97Pzb/Ir8Cvxa+yD7Y/so+9766/qx+nj6hvp5+n76Xvox+jn6D/oG+gT6Hvuj/Tv+q/24/ogANwLkAhoDwwMKBMQEwgWQBdoFnAa9BSYF4QVvBpoG2AVcBZsFKQUPBXQEBQO7AlkCUwHaAOsAJAHmAFMARgCYAOIAMgHvAJIA5QCrACQA4/9Q/6T+rP3Z/L38jvwa/Ij7DfsE+wn71vrL+tr6q/p7+mj6YvpE+hf6CvrR+Zz5fvla+qD8pv2A/U/+6f+xAZsCEgPOAwAEXAQ6BWYFmAU/BscFBQVOBfwFbwbiBVwFagUIBQMFpwRjA/gCuQLHATYBQQGGAXMB1ACtAPsAIQFwAVoB8QANAdMASQAbAKj/5/7g/e/8zPyb/Cr82vtq+077ZvtG+0X7UPs3+/r6lvqP+oX6MvoX+vL5sfmC+XX6w/zA/Yz9aP4CAIwBkwIuA7AD5ANFBBAFIgUwBfAFYgWLBAMFbQWsBXcFFQX3BHQEjAR2BD4DzQKvArUBOAGJAY4BQQHcAKoA4QAGAWkBewH7ABkB4QAkAA8Azf/9/u79E/3S/Ir8IPzW+1n7Dfs5+yX7F/tZ+zz7/vrb+r/6sPqD+mP6Ovq5+Wn5evqQ/KX9sf1t/jIA+gHyAoIDEgRQBK0EZQVxBXQF6AVVBXQEowQ+BYMFFQWxBKEERAQxBPoD4gJZAi4CQgHEAAMBHQH4AJsAlwDWAOoASQFzAS8BOwEKAXYANgDW/wP/Af4t/e38rvxA/BT8vft++377VPtr+437a/tc+zT7Dfve+ov6bfpB+sn5Uvkr+nH8iv1j/Q3+q/+EAYECDgP5A0EEkgRaBUYFaQUSBlEFTQR1BO4EWgXuBGgEnAQmBP4D+wPjApYCZwJLAf0APwFUAUoB1QC3AAQBEQGIAbkBLQEzAeMAFQD+/6T/1/7r/e78vfyD/PH75fuS+0j7ivtv+3n7y/ur+4D7Wvst+wz7wfpz+kn65Ply+Xn60/zm/ar9Tf4lAPwB4gKDAxQEHwSmBF8FKAVSBeYFOgUuBCgE3wQdBX0ERQQzBLcD7QPFA4UCKwI1AlgB5AAnAVsBWQEMAf8AAwEDAaIBjAHWAN4AmQDo/4z/EP9e/k/9Y/w4/Pn7qPu6+2P7H/t3+3v7evup+4/7Z/ss+yT7K/vp+r76X/r5+bz5sfom/R3+5f3Y/ncAZgKOA+IDmwTRBBoF7AWsBaIFRQZxBXoEfQS7BDsFugT4AyUExQO+A8IDhAJKAkoCNAEHATcBQQGKARUB7QAyASIBvQG+AfoAGwHDAPP/tP8e/33+uv22/Ij8V/zh++f7gfs++4X7Wvtn+4z7ZPuB+1j7Ifsa+9D6ufqu+iP6lfnQ+m/9Rf7c/aT+fQBlAloD1AOCBKcECgXPBW4FcAUsBjYF/QPtAzwEqAQNBGsDjQMHAw4DEwPaAa8BuQHIAKcAyADRACYBzgC6AAkB/ACyAfQBJAEkAf0AKwDS/1X/sv7//Qb9y/yh/Az8Kfz5+3D7rvu4+6/76vuw+7T7t/tD+xv75Pqj+sz6UPrd+cX7Mf6J/l3+V/9NAQkDfAMABNYEAAWGBfIFTQWEBRYGDAUGBPUDSQSQBOsDeAOPA/8CxgKTAqsBmgGeAegAsgC5AOUAagEmAbYA1ADlAEgBaAHRALAAjADE/yT/kf4B/ob9wvw5/AX8xfvK+8b7h/t1+4D7pPvE+7v77vv5+3f7M/sm+wT7Dfu0+q36mfys/un+8/4zAO8BHAOWAzMEyQTsBFkFnwUTBRwFwgVkBXQEGQQ4BDQE/wPXA5UDIwOmAvsBeQFEAQ8BDAHgAEEARADoAAIBtACtAO4ACAHFAH4AXQAXAKb/BP8u/mf97/yz/Gv8Afy/+7f7p/uP+3z7lvvk++P7xvsU/Cb89PsM/Cb8HvwZ/Bb8svzl/eX+mP88AOgAjQHSAQECnwI8A5gD1QPUA7MDtQPjAzoEegSqBOIE8wTjBK8EOwS6AzUDpwI6AsEBMwG9AG4ATwBQAEcARwBaAHMAlADCAN8AzgC1AJ4AXwD8/5r/Ov/G/ln+Gf6+/VP9F/31/MH8cfw3/Bj85vvL+9v7yfuU+2T7MfsT+xn7FvtA+837f/w6/eX9bv7l/ib/j/9lACoB0QGUAjMDuANBBMUEUAW1BSAGqQbxBv8G0gZMBqUF5QQpBJID8wJGApkB6wBWANb/ef9m/2D/T/9q/6v/5P/4//X/7//G/3v/T/8s/9D+aP4U/qn9OP3v/LL8Zfwa/On7t/to+zX7H/vT+oD6Y/pU+kn6bfra+pL7avw1/en9i/4T/5n/QAAIAdYBkAJPAxgEsQQhBaMFFwZuBtMGMgdgB0IH7QaGBuIFCQVZBLwDAQNCAp0B/wBSAMP/h/94/3n/jv+f/8X/8/8PAB0AAwDX/7L/c/8r/9P+Zf4L/pb9//yp/Iv8WfwS/Mj7ePsi+9T6qvqG+kj6HfoN+vf5FPqq+nv7RvwN/bL9Nf62/lT/MAASAb8BfAJAA+8DmQQiBZ0FCwZTBqYGBwcjB/EGjQb8BUIFbwS0AxgDcgLJASUBfADz/5z/cP90/5L/xf8EADoAcwCkAJ4AbQAzAO7/nv83/8H+Uf7t/ZH9NP3N/Hj8QfwL/NH7m/tn+zb7BPvZ+rn6pfqe+p/65Pqg+5v8l/1s/gb/cf+8/xcAuQB9ASYCvgJaA+oDYwTdBFIFoQXcBTYGiAaBBiwGwAUqBV4EmgP3Al4CuQESAXUA6v96/z7/NP9D/2z/rP8JAG0AqgDPAOkAxgB5ADEA5P9+/wb/lP4l/qH9L/3Y/Gv8BPzA+4L7WftD+yP7AfvQ+pH6bfpY+nj6EPvg+7X8iv06/r3+J/+Q/zAA8QCWAUwCDQOgAxwEkgT/BG0FxAUjBoUGoAZ8BjkGvQUZBWQEqQP8Aj0CgwH4AG8A9/+z/5D/lv+0/97/KwB6ALMA6AD+AOgAswBUAOb/f/8R/6v+Sf7f/XX9Bf2X/DD81PuJ+0r7Evvt+tH6p/pu+jj6EPop+rX6mfuU/Hv9KP6p/hH/f/8wAPQAkAE5AugChAMcBJYEAgVZBY0F6QVaBo8GlgZgBuwFUQWRBOADNQNhApQB6gBGAMX/Z/80/yf/H/88/53//P9DAIEAmQB4ADIA4P+W/zr/yf5l/gb+j/0U/an8NfzB+1f7CPvQ+qX6gvpm+jv6Cvrn+fv5dvpI+0f8Tv0p/sr+Tf/c/58AcwE9AgQDuwNVBOEETQWpBfAFKQZ2BsgG9wb3BroGRwaoBeQEKQRuA58CzAEFAUoAp/8n/9/+wP6z/sr+C/9i/8D/EABIAEsAFADM/4H/Lf/W/nf+C/6O/Qb9jfwa/KT7Ofvi+pz6afpD+iv6//m6+Yf5j/kD+ub69fsB/d79eP4L/7f/jgB+AUEC9wK4A2QEDQWcBQIGSgZrBpUGzAbhBtMGmQYxBq8FBgVJBKAD4QIUAloBqQAFAHb/Dv/R/p3+jf67/gX/Yf/H/wgADwDj/57/VP8C/7P+cP4p/tj9c/0C/Zb8Hvyb+zD73/qR+lD6Jfry+ab5ZPmE+Q764vrq++z8w/11/iD/8P/jAM4BmAJiAzME9QSXBSUGkga+BtQGAAcqByUH9QaiBhoGZQW0BAgEUQOHAr8BCQFXALf/Qv/0/rD+hv6P/sr+Df9P/47/nv9r/yT/3v6T/kH++/3E/XD9Av2c/ED80Ptd+/b6rfpk+iD66vmt+WP5NPll+QD61Pqw+4z8T/36/bP+qv/CANIBxwLCA7YEgwUsBrkGIwdbB4kHvwfgB8UHeQcSB4gG0wUVBVMEgAOTArQB+QBSALv/T/8M/9v+x/7o/iL/VP+E/6v/rP9x/yP/1f55/hL+wv19/ST9w/x4/C38wvtM+/P6qfpX+gr6yPl8+SX5GvmF+TD62/qU+1H88/yR/W7+gv+KAHwBigKsA6QEcAUiBrsGGwdqB8EHCQgPCN0HkgctB54G9AVGBYcErAPKAgICQgGBANb/Vf/4/rr+p/67/uL+Cf8t/z//LP/x/qP+T/76/ar9Vf31/Hz8AfyK+x77vfpw+i367fmu+XP5NPkP+Tn5vvlz+jP78vua/Cr9yf2o/qv/qQClAboCzQPGBKkFfAYcB3sHyAcgCGIIaAhHCP8HggfNBhcGbAW3BO8DLgN0ArQB/ABfAOT/fP84/yH/KP85/0r/S/8v//n+vf6D/kX+9v2U/R/9mfwT/JX7IPus+j/61Plu+RH5zfic+Ir4t/gq+cf5dfor+9P7Z/wB/c/9zP7c//EAGwI4Ay4EBQXRBX0G8gZRB6sH9AcHCO0HrwdDB6kGCAZ2Bd0EKgRzA8MCEAJkAdwAfwA6AAkA+v8GAAwAAwDp/7T/Wf/s/of+Lv7R/XT9GP22/Dn8uPtC+9D6Vfrd+XH5C/ms+G74dvjD+Eb56Pmd+lD7/vu0/In9fP58/34AgwGFAncDVQQgBdkFawbeBjcHbwdwB0QH/gajBjAGtQVABb4EIARxA8cCJgKQARcBzgCiAIUAhACiAL8AwwC4AJ0AYQAKALP/Xf/s/mL+0f05/Yz83vtK+9D6WPrn+YH5Hfm4+Hn4j/jo+Fn50vlW+tX6Tfvk+7z8vf3B/sr/9AAmAkMDRgQ0BfIFcwbfBkQHfgdlBw8HnAYVBoMFBAWQBAIETQOSAucBQgGtAEQADADu/+7/GQBdAJUAvgDeAOIAvQCGAFcAGAC//1L/4P5O/qH9/fxh/KD7uPri+Uf52vix+N74Qfmm+R/6xvpn+8/7LPzK/JT9bP5p/6oA6AH0AtcDtwRYBaEF0AUrBnMGYgYcBs0FXQWyBBUEjAPmAhgCfQEfAc8AbgAhAPX/1f/L/9P/8/8OAEUAowAAARQB3gCQACUAif/q/mP+yf0G/TL8MPvt+cn4E/ik96f3zvjF+hb8dfws/bb+CQClAGEBjwKsA3UEBgUKBU0EfANPA4QDTwP3AkQD3QOvA7MC3gGKAV0BXQF8AWEBLgFJAV8B+wCEAIAAvQC0AJ8AugC3AFkAzP87/6j+Iv7i/eX9rf0G/Xn8PfzL+x37z/ro+rn6MPrX+aX5Evox/Lz+bf9N/38AjAK6A7gDzQOXBGoF3wXOBecEwAP7ArsCxwJ6AiQClAL5AlECIwGCAK8AzQCxAOMABAHkAPMA0ABiAEwAlgDIAKoAYwA0AAYAjf/j/jv+v/1q/R79q/wA/Ev75frE+nH65vmZ+Yb5VvkN+RX5mPqr/dD/5/8bAMoBvAOeBNAEVAUEBpkGGAexBlYFSQTqA8QDVgPWAg0DhAMIA8oB8gAFAVABJAHhAL8AqgDdAOcAPACy//f/VgDr/y7/OP+o/1v/fv7N/YP9cv0p/X38mvvE+nj6hfoK+g35Y/h7+KT4SPgd+U78M/+H/yj/fADKAhYEXQS/BGUFFwawBngGRAU6BBEEYwR5BD4EXgTEBF8ELwN2AqkCAwPYAmgC5gFBAf0AFAGBALL/zf8zALP/t/6F/vX+wv7l/Uv9OP1O/fP8Gvw4+436PPo++uD59Pg1+Cr4K/jQ9wL5evz8/r3+lP57AK0CZAOSA0gECAVmBb0FoQV/BF8DWAPiA/sD0AMVBGMEqANtAiICuwIeAwUDsQJSAvYB0QHFATQBsgALAUsBfgCF/3D/sf8a/wb+fP2J/VP9h/yJ+6n6/fmu+Xf5v/jH94T31PcG+F75ufxX/yL/3P4FAa0DPwTMA1wEdQXGBW4F/wQuBBYDrQI+A6gDhwOTA8oDLQMBAtQBsQIpA74CLQLlAbsBggEvAaoAawDDAM4A8f8b/zD/kv8b/x3+3/1J/hT++vzd+0373/o/+pH5ovif9zX3RPcD+JH64P0+/9f+g//AAYEDIAS8BKgFNwb0Bf4EAwSOA4wDogO5A6UDVAMBA+ECkQIrAk4CugK3AjsCxQGJATcBqgAaAOP/OgA8AF7/lP54/oH+Vv4q/h/+7f1W/Wz8dfuy+jr6l/mn+OX3dPcW9wr3L/lv/SwA2f90/04BvgPBBCoFAwaZBhoG/AQpBKMDSQM4A3wDigNaA5sDCwTYAwUDvgKZA0gEvwPVAnACWwKqAZUAEwBjAJkAzf+Z/hT+Lv4t/u79t/1+/fj8A/zL+vT5n/lD+Vb4IPcz9qX1HvYV+Wf9hP/U/uP+cAGzAzgE1ARABrsGiQVDBAYECgSVAwsDIgOiA+sDEgQjBMsDIAMnAxoElwTzAyIDwgJPAmQBrwCpANgAfwCO/6v+Wv52/nL+P/4Z/s79Af3K+5768fl2+bH4ePdP9oH17/Qt9k36Rv67/pj9CP87ApgDnwO/BH4GbAaRBGMDqgP5Az0DiAIVA/AD+wOvA5oDbQMtA7UDpARxBGMD5gL3AoACegELAVMBZwGhAJr/Iv8p/xz/Af8f/+3+2P13/HH7r/rf+dT4v/e69pf1xPRV9tn6cv4J/rj8tP4wAjUD7AJcBGIG3AVIAw8C+QKxA84C6QGiAuQD/ANRAwIDSAPHA4oELgXLBNUDTgP8AkoCdQE3AVQB5gDb/+7+xf7i/pT+UP6A/n7+jP09/Hn7+PpB+hz5xPev9sH10/WP+MD8nf5//Wr9GwAxAjwC2wL4BMAFzQO1AacBhgJhAnoBsgEEA6QDPAP8AnIDCARyBOoEAgVjBMMDfwP/AhcCaAFFATEBggB3//r+A/8B/+v+//4a/1j+y/y5+2f7GPsA+mL4Pvc+9of1UPec+5P+4f3q/Bn/3QEwAvkBwAPJBdEE+AEZAV0C5ALNAXoBDAMXBFwDnALzAsEDPQSRBLIE/gMQA9ICwwIYAjIB5AAOAacAnP8q/2z/XP8V/1P/4f90/7X9UPzq+4n7n/pC+TX4VveM9gH4Evwu//j+9/2I//YBSQICAmYDBQUYBFMBHgAnAbkBjAAOAO4BdgODAlQB9AFHA6oDpQMjBC8EWgOYAk8CVAILAk8B8gDSAB8AP//j/vz+EP/5/gr/w/50/f77J/uV+sD5jvh397P2U/dX+tT9B/9X/t/+MQG/AvsCsQNMBbkFxQPFAbUBfwIwAisBmgEsA28DQAKTAUUCYAPTA/MD9QNlA3cCsAF0AYUBAgExAOf/z/9q/97+p/7l/hf/Ff8H/3H+R/0h/Av7DPoJ+cz3pPZ39r74qPzR/k7+Df4GADsCAgPAA3IFSwaaBCwCgwEzAj8CGAHjAGkCSAN4An8BwgH7As0DKQRRBMcD/AJxAjUCLwLLAf0AUgDf/6b/d/8g/9/+wP6t/qL++f2c/GT7c/p7+VX4Mvcw9rX1sPfr++H+8P6X/i8ALQIcAx8E1AXBBkwFwgKeAQMCHgIXAacA8wHfAvAB3QD+ACcCVQPlAxwErwPsApQCcgKEAosC0gHWAEwAIAAUAMX/WP8w/xX/zv4s/iT9SPx8+1L6/Piw93j2zPWO9+v7h//o/wL/3//RASQDagQ8BvsG+QT6AbsAEAEqAUIA3P8PAd0BEgE8AIsA1wEeA+EDOASPA2MC8QEVAoACnAKJAUAAxP/Q/+v/rv9T/zP/8/5//t/93vzf++f6sfmF+FD39fWs9Vv4VP2mAIoACgBtASsDUwTzBfAH4QfZBN8BLgFwAdQAiv+q/1wBxwGeABoAwwD3AfcCuAMnBF8DKALnATECiAJLAu4AyP+U/53/oP9o/xb//P6z/mT+wP1v/E77U/oi+Sb44PZ59VP2jvpk/+IAEgDfANQCHwSVBZwHrAjTBm8D2AHbAXwBRABt/1gAVQF6AHf/p/+uACkCQwO3A0wDJwK8Af4BPwKUAuUBKAA6/1b/gP9H/6v+cf6B/iL+ov3o/OX79vrK+bj48/eQ9hH2Dvkw/kYB0gBeACgCCARjBUkH2Aj7B5sEGAL6AcsBFgBx/hb/8QCWAK7+Y/6j/xABTgJJA2UDBwLfAIYBhQKWArYBUwDI/+v/l/8c/6f+S/5M/h7+t/3x/Kf76Ppk+lT5Y/hK94v2v/jq/VUC+AL1AQEDGwWOBi8IwgmkCTMHLQS9AjECCAGA/zf/jQDaAOv+vf2o/jYAkwFjAmECegFbAF0AEwFWARIBxf9l/kD+Qv7k/aL9cP2Y/X/9oPyr+7z6HPqr+bv4Afhj91/25/dD/aEC/wOnAiQDxQX2B4QJuwqjClYIPgXtA9IDggIVAHL/VgFAAmkAnP7p/pMASAIpA9wCjAFAACQA0gARAWIA2f65/cH9vP30/DP8GfyK/KT81fus+pv55/iS+OL36fYj9pX1OvdP/OMBFQQ3A28D8QVvCEQKkwt+C4YJzAYjBR4EZAKRAHYAFgKiAs8AZP/5/48BbAN4BPoDkQJ2AY4BugHQAOT/Nf9k/qb91PxB/B/8+ftG/H/8e/vy+cn4Y/hE+B73pfUE9bb0z/Uq+vf/0wI1AnUCLAXGB2UJyQpjCzEKnwdxBTAEuwIaAeMAUwL7Ak0Bmf/4/6IBiwOxBE8ENwOKAnACSAKnARcBmACb/27+dP27/Hb8dvyW/FL8+fpm+Yv4K/i493T2FvXM9Kv0LPXP+Hz+6wFVAhcDawV6BwEJmAo1CygKBQgFBooEqQLEAKwAVgIJA38BKACrABACzgM9BVkFYgRLA9QClwLFAe0AeQC8/2z+CP0j/PL7F/w+/AT8//qx+cb4Ofix95H2XfUd9ej01vSk9yL9/wB9AdYBDQQvBm4HHQnSCqEKOQjSBcAEcANrAREB1wLLAz8CjADZAGMCFwRbBaQFAwXpA/kCnQJXAtUBOAE6AMv+hP3C/Gn8Z/yk/HT8RvvD+bn4Evhb90n2aPU59cP0bfT29mb8iwBXAacBqAMCBqIHDgkyCt0J1gfMBXMEmAK1ALkAMQKzAn8BWwCKAM4BtAMkBTUFgATNA2kDEANYAvIBNAK4AfX/XP7E/Zj9g/2j/YT9ZPyV+lH5zfgt+Or2t/V/9UP1uvSp9sr7OgBmAaQBRgMSBVAGHAjzCdsJrAdgBcID4gEuAA8APwHwAQkBsf+w/1EBeAPjBE0FDQVABHcDDwO1AnYCSgJvAcL/QP55/SX9R/2y/Xf9Ufze+rr57/hU+Kj37Pai9k32dfVd9tj6AAD9AdoB5gLXBFYG/gd5CWAJ8geCBvAEjAKUAEoACwGCAfEAsv8G/wcACQKpA3AEMgR2AxADqAIaAuIBLwICAksAgv7Y/YT9lv0J/tX9x/yK+5P6qfmi+Nb3d/dc9xT3DfbV9UX5F/8yAo8BZAGkAyoGvQfhCCgJNgg/B0YG9gNkAXkACwH4AcIB+P/N/t3/rQF1AqoCMgNtAwIDXQKVATEBnQHFAbgAFf/3/aj95/0//iH+ef2p/ND7ofpM+aX4jPhK+Of3Wve89ub3MPzuAHsC6AGyAmYFOQggCfQHGQdmB+EG0wScAg0BNgB7ACYBjgA0/yb/qgBiAtoCIgIHAsICswLNAUcBdgGuAQoBp/+S/j/+Y/5+/lr+5/0o/Zj8BfzV+sb5Jfl3+EX4Vfif91332vlF/goBCAH8AMICVAXvBjQHKQchB2UG7gR1A3oCyQFZAYUBiAGMAJv/EwCGAXUCYwIaAvoBuAFbARkBKwFjAfoA4v/v/of+if63/tL+tP4p/m39wfzS+7r67Pl3+Vv5PfmL+Kr38fdX+vX9eQAMASsBdgKlBDcGsQa5BrwGnAbHBR0ErQJAApEC/wLOAuIBJgE0AbkBCgL1Ad8B7wHYAUABYgAaAHYAmQAoAFz/xv68/tb+xv6S/jf+xP01/W38dfuf+j76MPoR+qf5KvkG+ef5QvwC/0wAaAA1ASkDIgUwBgwG7QTpA9ED4gOHA+oC/wGFAQ4CcALWARwBVwFYAgoDwwK6AecA2ADbAJ0AcQApALD/bP93/3b/DP+i/sr+Jv/+/mH+wP0a/Vn8vPtb+xz7Afva+qP6g/ps+vH61vwz/zcA9/8OABIBFAJwApgCKQPLA7YDMgP5AuICswL+AtQDTATWA/ICfAKjArICSwIFAhYCyAH8AHMAZgBdAA4Auv/Q/+r/Q/9R/jL+xf4M/8L+XP4D/qb9TP0M/fL83/zg/E396v0c/uX92P1g/jn/qv9+/zf/S/9+/3H/O/8f/1r/3P81AEMAPAA5ADgAYQC3ABwBhgH2AVMCiQKlArgC3wIjA2kDgANjAwkDiQIIApEBRAEoARQB0gB1ABsA5//c/8H/gP88/yP/D//0/s/+nP5k/jX+Ff7P/X39QP0v/Sj9Fv0B/e38I/15/bn9zv3w/RX+N/5l/oz+yv4g/4v/y/8LAFUAxABVAfABdgKgArwC4gL9AukC3QIDAxsDGwP0AssCoQKgAt0CAAPOAlkC/wGvAV0BAQGxAHQAIgCd/xP/p/4+/uP9rf2j/Yf9XP0b/fr86fzb/OD80Pyh/GH8Y/yY/Nn8Nf25/S/+mv4Y/7j/XQDNAAAB/wAOARoBJgFrAcQB9gHRAasBnAGsAfUBUgKTAogCQQIUAiICPAJIAkkCPgLuAYgBJwHPAIcAPAAcAA0AFgD//83/mf9X/x//7f7H/nf+LP4I/gD+Af4G/gP+Ff5Z/q7++/43/3z/kv9+/0P/H/8s/zX/M/8t/0//Vv9R/3f/1f9FAIwAtADOAO0ACQEcAUUBcgFxAU8BJwEGAdsAtgCmALQAwwCxAH8AXABQAFwAZgBLABAA1f/B/7n/vP/E/8X/vf+x/8D/9f8zAEMAMQAbAPv/yv+w/7j/uv+h/3n/Wv9Q/1//iP/B//D/BwAVAB4ABwDc/8T/xP+y/4v/Zf9X/2L/ev+X/6r/tv/O/wEALQAyACMAIgAnAAYA0P+z/8P/6P8HABAA/f/g/9v/+f8bADoAVQBlAFIALAAlAEsAhQC3AN0A8QDqAM4AwgDeABIBMwE8ATwBMAEdAf0A0QCfAH0AXwAZAKP/Kf/m/tX+0f7L/uD+CP8r/1f/kv/I/93/4//k/9X/tf+g/6D/j/9h/zf/Mv88/0v/a/+F/2r/I//0/vD+6P68/ov+fP6A/n7+fv6p/g7/lf8jAJoA6QAhAVcBjgG4AcoB1wHlAdIBkAFMATwBWgFyAW8BZgFpAX4BoQGuAYUBOgEJAfYA1QCYAGgASAAXAND/rv+9/8//xP+f/2f/Dv+5/pb+i/5T/uX9hP1U/T79Nv1h/bb9/f0k/kv+i/7a/in/fP+8/9H/wP+0/8T/4f/4/xUAQAByAJsAwADzADcBfgGzAc0B2QHrAQECCwIOAhYCKAI6Aj4CLwIMAtoBqQF9ATwB0wBXAOf/i/8y/9X+iv5j/lT+S/5G/kz+Yv5+/qD+v/7N/sv+zv7f/u7+6f7e/uX+//4Q/wj/+v78/hT/M/9O/2f/hv+p/8r/6f8QAE8AnQDkAB8BUAF5AZwBvwHiAfMB6QHVAcMBlwFBAdwAkQBiADMA+f/O/7n/pv+Z/6b/1v8OAC0AMQAmABIA+f/v//z////U/4L/O/8i/yf/Jv8N//T+8f77/gn/H/8+/1D/VP9b/37/s//f/wEAJwBQAHAAiQCvAOIAAwEDAewA1AC3AJMAcgBTACcA7v++/6n/sf+9/8X/1f/x/w8AHwAqAD4ATQBPAEQAMwAbAPn/0/+2/63/sP+x/6b/nP+j/7X/v/+//8P/0//f/+T/6v/2////AQAHABYAJwA2AEYAVQBVAEQAOQA6ADoALQAhABYA///Y/7j/s//A/9T/6v8GACEAMwA/AEgAVABdAFsATgA3ABgA7//H/6v/qP+z/7b/sv+y/7X/t//D/9r/8f/7//j/+P8BABMAKABEAF8AbwBzAHMAdQB3AG4AVQA1ABQA8P/V/8L/tP+m/5f/j/+U/6T/wf/e//j/CgAYACYANABCAEYARAA7AC0AGQAHAPP/4v/Z/9T/zv/I/8L/wP/A/8T/0P/j//j/CQAVACEAKAAqAC0AMwA5AEEARQBDADgAKQAZAA0ABQD+//X/6//f/9P/y//O/9n/7f8CABUAIgAnACMAHQAaABcAEwAMAAQA9P/g/9D/wv+9/73/v//C/8H/vf+3/7X/u//I/9f/6P/6/wgADwAWACAALwA7AEsAWQBhAGAAVwBOAEQAOwAyACoAIAANAPX/2//K/8b/0//o////EAATABEAEAAUACEALAAvACQADADv/9f/y//J/8r/y//I/8D/u/+8/8X/y//O/87/0P/V/93/6f/0/wIACQAQABgAJAAzAD4AQQA8AC4AHgARAAsACQAIAAIA+P/w/+z/9v8IAB4ALwA3ADcANAAzADUAPAA+ADgAKAASAPz/6v/g/9n/0//M/8f/wf+9/77/xP/J/8z/zv/V/+D/7P/3////AgAAAP//BAAQAB4AJwAkABgACQD+//r//v8FAAcA/v/z/+3/8/8AABEAIQAnACMAHgAZABwAIQAmACUAHQAKAPb/5P/c/9n/1v/P/8T/uf+0/7T/vv/I/83/0//a/+L/6v/z//z/AgABAP//AAAGAA8AGAAZABMACwADAAUADQAYAB4AGwATAAwADgAeADUARgBLAEsASQA/ADkALgAiABUABwD4/+j/2P/M/8X/u/+y/6P/if9u/2L/c/+y//L/CgAQABYAJwBkAN4ApAEcAtYBgAE3AQgB+QDCAGQA/f+r/5j/bf8h/9r+lP6I/or+eP5e/jb+Nf5L/ob+M/+r/wUAiQDmAEcBfgGRAbkBrgGtAcgBhwEaAb4AggBpADwA+P+2/3X/P/8c//7+0v6w/qv+tP6x/qP+mf6q/sn+9/4u/1f/e/+T/5X/nf/G//D/AQAAAAcAMABYAGwAgQCJAJ4AugDNANkAvACUAIEAdgBgAC4AGABAAH4AiwBGAA4ASACqALMAUgAKAC8AVgBCACsAIwARAAcAPgCJAI4AVgAqACAA9/+u/73/EQASAJb/If9I/9f/GADv/+D/NwCQAJIAYQAyABIAEQAMANj/e/8g/yH/tv9cAJQApAAaAacBhAHmAIMAfgBmAMj/AP+s/sP+4/57/vv9vf5bAFsBuAHAAYsBUAGxAMf/Jf+x/qz+PP9V/4H+C/7p/gkAKgDZ/xcAuwDvAHAA7//E/3//Jv88/2D/DP8Q//b/1ADhAFYAIQDaAEYBcwCq/8r/3P9t/wP/0/7i/hb/Sf+8/0cAqQBwAUICFQJLASMBjgEwAZD/Kv5Z/oH/yP/Y/mr+Kv8wAKUAkAB5APIAlAFtAXkApv+u/yMAJAAd/+T91P3P/j7/6v7g/uH/twFTAgQBRQACAXcBMwGsAGEAWQACAIr/Iv+P/nr+aP9gAIMAWQBJAWQCsgFPANj/of8e/5r+dv7r/lP/3/9EASoCXAGgAGQB1AGmAFX/Av8W//j+m/6Y/hb/GP/b/mb/CQABAOL//f8oAE4AYgCPALwAVwC4/97/VAAeAIL/V/93/1D/Nf99/8L/IwD6AFMBiQB//2f/PgCdAO//b/8gACMBLQEwAIL/HgA1AXIBcQD9/pD+Tv9j/7L+2/7x/yEAC/9W/vL+KAALAdQBaALfAQQBbgAo/3v9W/yx/Br+xP4g/4UAygH4AZEBPAEHAbIATAC5//P+lP4W/3D/zP6U/tn/hQHqARYBPQGhAk4DeAIUAaEAzAD3/1b+Y/2t/XT+fP+HAOMA4gA3AQ0CnALQAVYAvP+1/wf/nP0t/Tj+bP+7/0H/bP+jAHQBCgGLAKsAygAZANn+Jf62/Vj98v1q/3gA1QB1AZkCFgMmAgABnACYAOL/hf7h/e39w/2Q/eT9Rf9KAbECfQNCAzoC9gGlARcAa/54/Uz99f2F/qj+Yv/VAJMCTAP6AVcA7f/9/5//Wf4s/Yv9zv2E/fb9jf4N/2AAxwFpAloCfAGMAF//mf3l/BH+u/89AcUBigGxAYsB/ADkAOgAvACpACQA7/6k/VD9SP7I/9UAUQGoAbwBnwFQAa4ANAD1/+n/QgAPAB//u/51/j/+Kf82AN0AXgEAATAAVP/1/Tr9p/2j/rf/5wAQAosCUQK+AVQBIAFZATQBqv87/jr+vv67/in+Cf5E/2sAZwGiAuAB5P/f/0wBUgEeAGT/5f/SAFIA4f6K/qH/ywB2AW4BnQBo/3/+YP4f/qz92f18/qP/rACsAKEA4QC5ABIBxAFSASoAhf8A/7P+Iv9e/2T/of9UAIsBRQFE/6/+5P+3AOQAKQGsAUkB4f9O/8f/bf+Y/u/+t/9gAGoAJQCfANEAhgCAAD8AqP8j/3P+J/6M/uD+XP9qADQBLgG+AB4AIQBRAGr/P//N/1T/iv4z/jb+Cf94AMcBiwKnAl4CZQEzAGL/0/6V/gj/cP/8/gP/6v+eADwB/QGpAvsCKgLbAIT//v2p/Sz+ff5K/1MAaABIAPcA3QHlAV8BPQHWANf/av6v/cz9AP6R/kj/RQDMAG8AbwAEAd8AWQArALD/i/5P/VP9UP4O/yf/0P9fARACygFMAcYALwCn/8P/9v/d/oX9G/55/xEAIwAjALoAmwEfAiUCjgEEARkBEwHwAO4AZQDl/yoAoADXAKUAcQCXAAIBIgF7AN3/3f+x/+z+Wv6f/lH/j//M/0QAbgDQAO8AbgC9/8z+H/6j/Tj9Af2u/Lb8rP2y/kH/+/+4AGQAef9a/5D/Kv9Q/uj9J/5o/sL+ev9GAGIBkwJGA64CeADM/u/+rf/+/x0AlgBgAfABXAK9AscC8QIXBJoFeAVpA5wB4AAoAFn/Pv9TAMMBuwKTAqsBuACR/xD/Dv+F/h7+j/4g/yj/ZP4S/hb/5f8EAJT/6v5i/m79Nfy2+8b7/fvx/CH+z/6q/sH9df38/Tj+Qv6r/i7/Sf92/9f/JwAKANH/XQA2AYABTAEZARcBsQGWAV8BRgJBA0gEpAQZBFQDrQJrAsYCDQP5AuUCIAMJA+4BZgCg//v/gQCYAIIATQGOAbsAgf+K/h3+6/1m/ub+f/69/Wn9IP0P/RX9cf0B/jb+0f03/aj8KvwV/Jr8pP1p/o/+Qf61/Tf9Gv0A/RP9pv1G/ov+5f7y/1cBAALyAW4CrgNABJgDYQJjAe8AhADvABsCLgMnBAwFOAaPBmEF/APgAiMChAHmABoBPwHdANEAZgEfAu4BagE8AQIBaAD5/13/Nv7P/OH7HPyg/E79LP4G/43/vP9G/3v+kP2z/D/8O/yY/I78SPxE/F38ffyz/Pv8Q/2Q/SX+rv7+/o3+gf04/X3+dgCdAfQBawKlA2gEeQSxA30COQL6Ad8BEQIFAkECrQJ5A0YENgTbA20DNwNuA0UDywI3AjMB5ABSAeMA/v8+/3v/vgC3AY4BVQA3/7/+jP6E/n7+P/4n/rD9IP2f/Cr8fPwm/fL9eP5h/hz+T/0L/G/7sfuF/CP9tvwF/BH8H/wk/LD8kP1I/m3+t/7v/nr+JP79/jEB6wOSBeQFuQWqBL8DUANDA2oDTQOUA7cDOQOIAoECWwN1BFgFEgZeBcwDpgIoATAA7f9iAIwBpQGbAOv/UP9Z/8z/IwBMABMAQwC4/zT+JvzC+hX7Dfzr/MH9QP5D/vn9TP3a/Fn8T/z0/DX90fxv/LL7r/oi+tj5xvo9/Gb9Uv7E/rb+hv7a/pf/oQCFATAC7wNjBfcEDQREAxMD5wMSBH0D9wJJAvwCqwMHA8AC+AJABBsGhAaxBSsE/gKzAnUCcgHV/zP/Xf/d/wkAUP8F/5X/lwCPAVkBHQDC/qX95vxl/Av8MPzA/IT9Jf4e/rr9af03/Vf9l/1W/a/8uPsj++j6x/qA+tr5KPqN+0T9df4k/4z/PP+5/rz+qv/qABACIwMMBKYE3gNLA2QDHQN3AxgE9gQHBUADQgIWAucCQwQLBLkEggUXBU0EtwKMAR4BTwEWAgAC+wAmAO3/ewC4AEYAMQBhADMAWP+y/XH8Pvzl/HT+X/8f/+f9sPzr/Er9gv1s/in/V/9e/mr83/oR+kb6a/uh/Pn8V/ws+0f6Y/pc+xz9/f7o/3sAZgAE/3z+w/9rAWwDVwU9BnkGjwR2AmYCLwJdAvsCZwNnBDIEowP4A7oDZATtBC8FmgVkBGMDyALSAV0BCgEEATkBcgHhAYMBxgDb/53/MgCa/5P+3v23/S/+Ov7I/W39bP0X/gj+9fxk/JD8F/0L/dz74frO+uH6GvtT+yj7/PrR+hj66/jl9wb5+vvL/nsAUAAAAFUAHwG8AbwB/AJMBZQGIgbpA8sCMwOSA5IE/gTfBaAGigZHBlMEVQNMBJwFkQYJBbADvwM8AxwD4QHQAHQBvAHyARoBOf/2/of/lv9N/03+Df5A/mj9dPzU+/r7/Pz+/Tj+qv3Y/CT85fuQ+zX7+/vS/KT8wvtl+q/5FPr/+t/7LvwE/L37ePvu+hX6LPqO/K3/QwF/AVEByAGYAkkDpAQHBnkGOwaWBeUEHQTfAwkFKAY8BlUFFwTXA6YDdQMoBHMEywQuBJoChgJxApsCJAPWApQCPAGR/+D+gv7b/jD/Nv8B/1/+vP3+/K38F/20/T3+Dv4q/RD8dPsB/PH8Wv2O/Xj9y/wI/C37v/o0+5779vv3+6z61fii9933nvmB+2r9Mf8tAGoBQAKkAqsDWgTbBeYGFgZzBXMDmgKlAxUDnwPaA20D0gSSBE4EwARGBCAFIwVtBJMEyAOAA1oDdwJ+AhQCRQHNAAoAy/+u/57/SP9e/g/+cf0l/an9Iv0X/Yj9Ov0G/Xb8Zfz9/Bv9A/2r/LD8uvwR/Mb7sPu9+zP80PvC+hD62Pk7+kP6xfnv+XD6XPtE/NL8EP/nATsD7QOSAyMDOAQuBdAFrgaSBqYFEgVMBK4DNwTmBJoF/gU0BZAEKAQDBJ4E2AQvBT0FPwRuA40C6wEpAukBNwGwAAEAe//G/hn+XP6q/pr+RP6A/fT8dPxG/ND8Df31/AP9xfxH/BP8EfyZ/Fb9L/2g/NP7EvtA+3D7Jvvu+pT6ePq++nD6UPrA+jz7Hfxl/C/9LABpAkwDVQSQBNkEfwXHBZ0G+QZyBlQGEgVaA98CrAKKA5MEtQQQBTQEXAMKBA8ELgR4BPIDiQO8Ap0BwgBjALQAnQDv/y3/bv4s/l7+jv6Z/mf+7P1d/ej8rfyz/O78Kv04/XX9Of16/Jb83/wI/Xr9L/2H/Cn88vv/+/X7AfwP/Nr70fvI+9r72fuz+/f7VvyI/Lj8+P1mADECMgOpA9YDpgQ/BdcFyAYIBzUHpQZOBRYFEAXwBCoFDwUaBasE1APQAxEEcgSyBBoEdAMTA78CcALhAYkBUwG+ACsAlv8M/5T+B/7D/bX9lv0e/Yb8UvxC/FD8ZvyF/M38x/yZ/GX8bfys/JT8kvyM/Gr8Wfzq++b7T/yd/Ar9/fzn/Or8xPzP/L/84vwJ/fz8LP1V/c799/5BAH0BxQLBAzsEigQHBcIFFwZuBqUGEAbRBZMFGQUOBc8E3wTWBEcEKATjA5QDagPzAp4CQQLSAXUB+gCeACQAif9D/wP/kP5X/jL+3/2p/aj9k/1L/Tz9EP2i/Hn8OPws/IX8nfy2/Lz81vwY/Sb9Zf2u/dz9Ef4W/jL+Sf4y/j/+Of4p/jb+Vv52/oX+lP6M/nb+nP6j/qb+X/8bAJgAegHyAQoCZQKdAhEDmAP8A2wEbgRuBIgEawStBA4FRgWaBZ4FdwVSBQAFuQRlBMEDAAMgAjQBaQDM/1n/2/5//jj+2P2h/Xv9bv2L/Yb9kf2Y/Xb9av1Q/TD9JP0e/Tj9Zf11/Vf9If0R/SX9Qv1p/W79Wf1i/Xj9k/3Y/SH+Vf5v/ln+Qf4z/lf+ev5d/kn+G/7z/fv9Mv61/hH/av+f/6D/GgCLAPcAogEmAqoC/QJlAxkEugR0BQAGRAayBgIHCgcWB/gGugY2Bj0FPgRHA4sC8QEeAWgAof/L/jj+vf11/VT9O/0z/Tb9Nf0u/T79cv2k/dL97v3l/en98P3c/cf9pv1//Vb9Jv0N/Rr9SP2A/ZX9mf2s/c398v0k/lz+iP6n/q3+pf6Y/pn+pf6S/nf+Nv4x/oz+6P5n/3D/Uf+c//7/1QCoAT0CvAL/Al8D9AO+BJ0FVAanBr4G3AbpBhwHIQfQBkMGZQV4BHQDeQJ8AXsAov+9/vH9Qf2q/HT8fPyd/Mf84Pz5/Dr9af2Q/cn93v34/eX9tf2s/aP9s/2o/ZT9g/11/Yz9lP29/dn96/0I/hj+OP5g/pH+tP7F/rT+nv6p/r7+z/7G/pf+P/7r/ar95v2T/jf/qP+O/3H/zv9/AIQBaQLdAhwDNgODA0sEHAXSBToGTAaMBswGAgcnB9oGSQZoBUAERQNjApUBxAC8/8b+6v1C/QD9/fw6/Xb9hf2l/dr9Nf6b/sP+xP6d/mj+T/4h/vz9x/1+/Vv9RP1C/U/9Uv1y/Z79uf3R/c/94/0F/gj+DP75/f79HP4g/j7+Wf5r/n3+V/4S/rP9V/2e/Wf+Iv+A/zj/Lf+8/4EAgQE7AqsCBAM2A9YD6ATjBYYGmQZzBqgGAQduB7MHWQd9BhcFoAOfAtcBHwE8AEL/fP7p/ZT9lf3H/fv9CP7//ST+dv7A/tP+p/5t/jH+Bv7w/eH96/3a/bT9lv1z/Wv9W/1S/V39X/1m/XD9jf20/cX9yv3a/Rb+WP5y/m/+U/4+/ij++/2x/V39/fzz/J39gP5X/5L/a/+5/00AMgEDAloChQJ3AqkCkwPZBAEGqAayBtoGVAe5BxsI5QcbB/EFPATdAgkCeAEAASYAKf9u/uz9yv32/R7+Pf4s/g3+M/57/rv+3P7P/qr+g/5S/j3+Pv4e/tL9cf0o/Rb9E/0B/fT87fz7/Ar9I/1E/WL9cP1p/Xr9nP3L/Qn+H/4Z/uz9q/1h/QT9tPy5/Iv9nP6G/77/bv+c/zAAKwEoAqQC5gL8AkgDLAReBX4GLAdaB18HqAcHCHIIaAiWB0oGlwQ2A2UC3AFhAZAAlv+3/hX+4f0P/kX+R/4P/tL9Bv6J/gL/K//s/o3+Lv76/QL+D/4I/r/9Qv3l/Kz8m/yi/K/8pvyY/Jz8yPwN/VD9dP1Z/WT9m/2w/dn94/2s/Xn9Qf31/Ln8rvwq/Rz+3/6S/8b/m/8VANoAygGzAvkCAAMYA4ADhwSwBZcGMgc6ByAHWwewBx0IBQhHByMGlgRbA5MC5gFdAZwAnv/E/iD+2/3h/f79C/7p/dL9LP6T/ur+Mf8E/5X+Pf4I/ub90/2i/TX9x/yE/Hf8e/yw/GX91P1u/eL81Pz0/PD87/zI/Fr8/vvw+/X7B/zV+2f7K/vV+sD7rP40ADoAmwGtAscCcAN1BN0FjwYHBm4FoQTuBGcGfgYKBhIGkwUZBfQEfAV+Bi4GAQW/A2gCFwIZAlwB5QDaACsA3/6R/lX/OP+D/i/+0P3f/Uj+O/4x/jb+0v31/F386vyr/an9Kf2W/EX8EPzr+xT8Kvwi/P37jfu2+4j86vzP/Ln8zfzE/JL8q/zJ/Nj80/xf/MT7tft1/uACmgN1At4DfwX5BR8GrQYgCNsH4QWCBIID7ANDBdwEzgOmA/AD7QN2Az4EkgXcBEQDRwK3AZcBagH3AG4AzP/W/iH+jf7y/mf+7v2B/QP9Ef1o/dL9CP69/TP9wfzy/If92P3w/YH9x/yK/Ij8qPzw/NX8mPxw/Dv8fPwp/Y/9Zv0B/ZL8V/yd/MT8lPyH/In8yfvm+nT+ZQXrBhoEggRaB8gIZAdSBoAIvwgmBYkCogEJAmADUANBAp8B5gHOAukCIwOPBBAFCgSRAnYBuwFLAuEBFAHM/0D+2P1M/hv+RP3p/Ar9qPwT/Ij89P3V/lz+bf0w/YX9zv34/Qv+u/0g/Zn8QPxI/Kn86/yz/BX8vPtP/Bz9Ev1z/Ob7r/ug+3X7ivvT+x/85ftM+zf/NgfGCdsGuQXkB7EKfQmhBqwHPghnBRwCa/9C/zoBQQI/AiABSwC7AU8DygMfBMIElwWYBL4BVABFAWQCtwF//1f9rfyW/dT9ifwC/OP8hv3r/Pr72/zn/lj/K/4k/S397v1k/lj+7/1i/SX98/xr/Pb7LPzq/Ov84vtp+yX8tvxB/JT7qPsE/N/7jPvb+5L8wvsE/AoDFAsxC04HaQZfCSoLIwhTBuAHAge/A34AUv5S/kD/9gCCApMBeQDlAW8EkAW2BJkE6AVEBVICaADyABACiQES/0j8XPsX/Iv8Q/zT+4T8Kv5X/hP9uPzr/Tz/+f66/SL9Mv13/Yr9Af1D/Bb8l/z9/IT8FvzT/HT97PxH/CX8EPy8+4v7pPvH+/P7PfzA/Nz7sPqXAFcLRA8kC+MGNAidC40JgQVsBfQFawRJAaH9qvvM+13+QQIrA0oBFAHWA3IGwAUNBAcFXwa+BLkBbgD9AD4Brv8C/S37Ffsz/DD9Dv3j/LL9m/7B/ir+vv15/pX/qP+O/gn9Zfz//GD9yPwZ/Fj8c/3U/QH9cPyw/B39Fv1h/EX7ZvpF+pz61Pqo+uD6YPsl+1b+aAfXDlUP2AvBCecKEwrgBfgDsgSeBJsCP/9t/KD6hfos/iED1gTjAyUEXwbzBnoEBQOoBDkGTAUMA1cB6v/E/az7F/sb/Aj9Sf2a/UD9Qfwv/GT99P6l/z3/qf41/kz9Jvzc+6L8h/3E/T39tvy1/Nf8/fxF/Tn90fy//ML8LPyl+iL5T/lO+sT6FPt++3j7QvwpApYLtxC9DxoNogwbDKEHEgOXAi8EmAQTAkj+O/sM+en5u/69A6oFdAUNBhsHwQU6A9wCmgTrBdkElAKrAGv+3PsU+k36HfxW/Zr9t/3H/XT9Ef3H/Vv/VgAyAHr/Yv7P/E378frn+/f8W/1y/a/9wf0t/dT8Mf1R/en8V/zB+9b6P/kU+HD4fvk4+lL6f/pU/nIGcw2NEIEQGA/KDVgKSgUpAuwAzgCwAKP/Qv49/OL6hfwoAGQDMgVSBokHuAdwBr0EkAMmA4sC0wFQAQ8A8/2D+1j6Yfut/B39if0T/lj+Av5T/W39cP4E/63+Mf7M/eH8qvtX+y78GP1f/Z79Qv5u/tH9Kv2x/C/8hPvo+rL6Qvoj+UX4Zvhp+df50/lp/UYF0wziEGsRpRA8D0EL5AXnAd7/Xf89/xv/A/8b/nr95v7IASsE4wRlBe4GCQiQB+4FRgQrAwYCngBL/1z+t/0E/a381fzz/AD9Gf1s/a/9Z/1G/YX9qf3L/a39M/24/Cn8m/uY+x789PzL/fr9uf15/Sz91Pwk/Ej7vPot+jv5Kfi+9wn4tfhb+fv52PxdAiIIuQxAD04QTxDUDWkJrwTUANv+Df7i/Wb+7P6B/6sAYALdA3ME9wRFBq4H9AesBvQEuQODAgABhP+0/pX+Y/4i/ij+Lv4N/pX9K/0//T/9/fzF/K78t/xr/Ab8KfyJ/Mj85/wX/VD9M/3w/Pn8Sv11/S39qfwq/Jn70fro+Rz5pfiB+Nb4jvmR+j79zgGOBjoKPwymDZ4OlA32Cu0HCQWiAnEA6v5p/mr+7f4wAOABSgNpBMUFDwdWB1IG0wSbA1IC2gD6/wEAcwCRABwAv/+l/zL/af6q/Rf9hvzi+5f7t/uZ+xr75Po5+6X7D/ym/GP96P3D/U79I/0i/QT98/wq/Wf9Qv3L/Gn8PfzI+xf70voD+1D7ifuR/Cr/LgKiBNoGBgmTCokKPQm6ByIGfwQIA/gBjAF5AbMBeAJdA94D8QPyAycEKQSdA9sCXALrASkBWQATAHkA5gD9AAMB8QBaAB3/1P02/QT9rfxV/FT8n/y6/H78M/z7+//7Xfza/Er9kP2l/cb94/3S/a/9pv3O/fz9y/2G/Yb9r/3e/Qr+Vf6e/rj+2f45/wkALgFEAnQDngRiBbMFnQVfBQEFbATRAzwDvwKAAlcCZQLDAg4DCAPPAqgCdgLtATsBwACRAGcACADL////RAA/ACAAGADO/zH/nP4c/q39gv2p/QD+Hv7o/a39jf2J/Vf97/zp/D79W/1x/Yv9qP3t/Rb+Ff4q/ln+nv7A/rL+wP7P/tn+Fv+U/wIANABqAPMASQFOAU0BXAGEAbEBzgHRAesBLgKAAp4CfwIpAgcCFgLyAZcBcgG2AewBzwGBATgB6gC7ALMAuACMAH8AsgCvAFkACADe/87/yP94/+7+mP6i/pz+gP6S/qL+hv6e/ur+AP/b/tD+5/7G/nv+SP5H/nb+rf6u/qj+0P4r/4n/zv/z////DAA7AJEA2wAMASYBHgHhAIYAWQBrAIAAjwCiAMAAzQDDANoAIQFpAYIBagE6AR0B4QB6ADEARgB/AHkAOwD5/9v/4P/2//T/+f8WACsAAwCu/3T/a/95/3P/R//6/sD+rv7B/u/+Rv+0/xYAbACVAHEALAANAP7/vP9L//D+wP6u/qL+m/6l/sn+Dv9h/6n/zv/n/xQATABqAHUAigCtALMAkwBpAEMAMQBCAHgAqgC7AMEA3wD+APwA2wC+ALMAmgBSAPX/t/+q/7X/vP/D/9j/BQBXAL0AFwFGAUsBLgHpAIQAHgDh/9H/uf94/zb/If82/0//bP+i/+v/GgAdAAwA+P/X/6D/Wv8T/97+xv7O/t/+6P77/iP/Sf9m/5T/4v8lADsAOgBCAEkAOgApADQAVABtAHAAYQBSAFgAhgDJAPUA8ADbANsA1ACUADMA+f/6//n/0P+m/6j/zv/5/xcAMwBTAIYAywAIARUB9wDSAKYASwDQ/3X/X/9t/23/aP9w/43/tP/W//D/CgAxAE4AMQDN/1v/HP8J/+/+yP7G/vT+Fv8L/wb/OP+L/8n/6f8DABYADwD3//H/CQAqAEkAaAB6AHEAYgBsAI0AoQCWAIUAiACPAHgAUAA6AD8AOgALAMX/j/9//4//qf+//+P/KAB5AK4AvADCANoA9wDxAMAAegA0APT/vv+W/4n/ov/i/yYAQQApABEAKgBkAIsAgQBLAPf/iP8c/9z+1v7s/v/+Df8R///+5P7z/jv/gv+c/53/qP+2/7v/1P8eAHQAogC3AMwAygCkAIIAjgCtALcAsgDAANEAwgCgAIsAhgB0AFAAKQAEAN//w//C/93/+/8SACwARwBTAEwAQgBFAE0ASQAsAPv/yv+w/7P/x//h////GwArACUAEAABAAYAGAAdAPz/vv98/0z/Mf8r/zL/Qf9W/2P/Xv9P/0v/Wv93/5H/qv+//8n/z//l/wkALgBGAFEAUwBMADUAFwAGAA0AJAA6AEYARwBKAE8AVwBYAE0ANQAYAP3/6v/t/wkAMQBOAFgAVABQAFMAVABRAEwAQQArAAsA8//r/+//8f/y//r/CQAbAC4ANwAvACEAGAATAAgA9P/g/8n/qP+F/3H/df+C/5H/nP+g/53/mv+e/6z/wf/c//v/GAAwAEUAVABXAFIATgBLAEAAKgAbABsAIAAkACoAOQBIAE4ASwBFADQAGQD///P/9f/4/wAAGAA5AEoAQwA2ACoAHgAIAPH/3P/H/7L/p/+i/6D/of+x/8j/2//l//L/BwAUABEAAgDv/9r/xf+z/5//kP+H/4T/gv+B/4v/oP+3/8P/wf+//8T/0v/m////HQA/AFgAWwBKADoAOAA1ACkAHgAqAD8ARQA7ADYARABVAF0AYQBpAGkAXABBACgAGQAbAC8ARQBWAGIAbgBxAGAASgA7ADEAHwAHAPP/4v/P/8L/wf/Q/+b/AAAYACQAIAAVAA0A/P/i/8f/tP+p/5v/j/+N/5L/l/+Y/5j/mf+c/6P/qv+v/7H/s/+y/7X/yf/u/xgAMwBBAEcARgAzABoADAALAA0ACAABAAEACQAVACgANgA+AEIAQgA4ACcAGAAaACQAKAAnAC0ANgA5ADIALQAnABwADgAGAAgABwD9//b/9f/w/+X/4P/p//v/AAD6//P/8v/s/9//1f/V/9b/0//N/8v/zf/O/9H/0//V/9P/0v/Y/+H/5//o/+z/+f8FAA4AEwAgAC0AMQApAB0AGgAaABkAFQASABQAGQAeACQAKgAzADkAOgA0AC0AKwAsAC4ALwAuACwAJwAeABMABgD7//X/8//z//n/AgAFAP7/8f/p/+H/1f/P/9f/4f/l/93/1//Y/9b/zv/H/8j/zv/W/93/5P/n/+b/3//Y/9j/3P/i/+n/8P/x/+v/6//3/wcAEQAVABgAFQAPAAgAAwAAAP//AwALABAAEwAZACsAOwA9ADoAOwA8ADsAMwAtACQAGwAXABUAFAAOAAcAAwAEAAEA/P8AAAYACAADAP3/+//7//f/8P/s/+3/8P/w/+//7v/y//T/8f/t/+n/6P/o/+f/5P/k/+f/6f/r/+3/7//z//f/+/8BAAQABAADAAgACgAKAAgACAALAAsABwAFAAkADAAKAAsAEQAWABcAGAAcACEAIgAcABoAGQAXABQADgAJAAUABAABAP7/+v/4//j/+P/z//P/9P/3//X/8P/u/+z/7P/t/+7/7f/u//H/8v/u/+j/6f/o/+L/2//b/9//3v/b/9//6P/s/+3/8f/2//j/9v/3//v/AQAAAP7/AAAEAAYAAgACAAgADgAPAAwADgAUABYAFgAXABoAGwAbAB4AIAAcABUAEQAQAA8ACwAKAAoACQAFAAMABQAGAAYABAAGAAIA/f/7//3////+//3/+//5//n/+P/3//v/AgAFAAAA+f/1//T/7//q/+v/8v/4//f/9f/2//r//v8CAAQAAQD+//z/+//2//L/9v/8/wAA+//5//3/BQAKAAsACgAKAAoACgAJAAcABwAKAAsACwALAAwACwAJAAIAAAABAAEAAQACAAIAAQD+//3///////v/+v/7//z//f/+//7/+//4//X/9P/2//f/+v/8//7/+//4//X/9P/z//P/8f/z//b/+P/6//7/AgAEAAYABgAHAAUA///6//j/+P/3//v///8EAAYABgAHAAkACQAIAAoACAAJAAoACgAJAAYABAAIAAsACQAHAAcACwALAAkABwAGAAYABAAEAAMAAAD///7//v/+////AQAEAAMAAgABAAAA/f/8//z//v///////v/8//z/+v/4//f/9//5//v//P/+/wAAAwACAAMAAQAAAAAA//////v/+f/6//v//P/+////AAACAAMAAgADAAQABQAFAAUABgAHAAUABQAEAAMAAgACAAMAAwACAAIAAwADAAIAAQADAAMAAwADAAMAAgACAAAAAAABAP//AAD+//7//f/8//r/+f/7//7///8AAAAA/v/8//v/+//6//z//f/+/wAAAQABAAIAAgACAAMAAwADAAEAAQABAAEAAQABAAEAAQACAAMAAgACAAIAAgACAAMABAADAAIAAAAAAAAA//8AAAEAAwACAAIAAQACAAMAAwABAAAAAAAAAP//AAABAAIAAwABAAAA///+//7//f///wEAAgACAAIAAgADAAQABAADAAEA///9//3//f/8//7//P/7//3//f/9//7//v8BAAEABAABAAAAAAD+/wAA//8CAAIABAADAAEAAQAAAP////8BAAEAAQACAAEAAQABAAEAAgACAAAAAAD//wAA/v/8//v/+v/7//7/AgAFAAQAAgAAAP7//P/7//r/+v/5//v//f/7//7/AwAFAAIA//8EAAoACAACAP7/+f/1//H/8P/w//P/9v/5//v//P///wUADAANAA0ACgADAPv/+P/4//j//v8BAAIAAwABAAEA///+/wIABQAJABEADgAIAAYACQAGAP//9//x/+f/3v/g/+j//v8NABQAIwAqACAAEwD4/+j/4//U/8D/1f/F/9D/UgHtAREA3v4e/8T/rQAJASYAI/8o/9H/TgAzAIn/nP52/n//WABpAAQAkP/J/3IAEAEuAcoANwAkAJIAwwCbANn/NP+J//7/+P/1/+L/kv9u/7z/8//+/wEAcgCPAVAB7/+R/9r/6P8CANL/YP84/1X/kP+V/1P/ZP97/8T/RQAsAPoAyQDU/6cAMQBk/3IAnABWAMoAUQDd/woA5/8SAAsAoP+I/0z/kv+y/3X/Wv/+/j7/9/8gAOX/BgAhACgAtQDXAHwAnwAtAOv/TwAbAOL/+P8Q/8D+CQDpAHwBDgEn/2n+nP9uAPAAqQBL/3T/6wD1AI8AAQAC/6H/NgBcAHwA6/+z/7T/ZQAWAlgCBAFhAHcARQCQAFcA0/5r/vL+Lf8s/+n+W/4D/mD+Jf9n/zT/av9Z/0r/KwChAHMAUABsAAABYgGQASMBSgB8APAAlwBaAKwAvv+p/oP/IwA9/yb+i/6L/2z/LP+e/y4ABgH9ATsCSwGCAIoARgG4ASAB5wDDAAgBOAGQAeIAG//w/sH/vv8p/yv/pP4M/hj+0P5d/5X+w/1Q/tH+C/+V/1b/aP/+/7AA6gCSANYAjQFiAQEBNgHLAJ8AmQAGAIz/Rf+S//L/eP/n/oz+RP7D/nn/Mf/y/k3/ov/9/zYABwH6AB4A2wCRAa4BtAGpADkABwEfAeYAWACS/w8ACQHbAYYBWADf/43/tf9mAA4APf/b/nP+D//H//7/2f90/iD+sP8XAL7/EgB7/5f++f5HAA0BzACbACwBCQG/ADoBygBLAAUB2gDO/yAAOQCF/wD/Cf8X/4X+2f4F/6T+K/8f/3b+6/4IAH4AyQAEAdIA5QBMAQ8CwAH7APwApgA/AF0AhgDO/9/+8P5T/4z/s/9C/4X+vv6u/77/kP+y/5H/pf8nAJgAXgAmAGIArAABAekAjwBAAEkAoQAJAID/v/9j/zz/dv+9/77/Ff9J/8n/JgB/ANr/hP8qALgAfQACAPP/UgDgAMgA+//u/1IAMwBDAA4AmP+D/6X/BABiADsACgC6/5X/RgCtAF0Auf+O/8T/u//L/4j/JP9Z//P/SwD4/+D/TgClAHIAJABzAJYAUwAKAJP/nf8iADoA2f9n/5P/bACWAA8A/v/+/2z/Nf/N/9//t/8bAAUAc//D/8oAMAHEAP3/xf9+APsAmQBw/3r+Uf+xAHAAjf94/5H/BwA4ALn/uv/k/9z/vf+y/+z/IgAFAP3/7f8UAJQAfgABADcAfgDE/zD/nv8sACMA4v9a/y//GwBfAKL/cf9MAOsAngARAPT/ZQBlABIAoP+Y/3kAsQA3AOL/4v8cAEEAVwBsAI0AXAAeAA8AFQBfAOn/Q/9y/4L/xf8ZAJ7/Av9q/+n/kf98/9L/1v+D/0L/Jf97/wcAy/8L/1r/QQCQAAoAfP+0/6v/wf8zABIAkv/o/3EASwD+AMEBUwG8APAArAH7ARUC9QFFATQBEQKnAg4COwFSAZwBaAHfAH0AAgCK/z7/7P4Z/zr/rv4W/uL90/27/e39+v2Z/ZD9rP2C/Tf9HP3n/GX8XPyz/MT8gfxt/Hr8evxC/aH+Gf+j//AAUgJtAw0DjQN3BdgF6wVEBlAGTwaMBtcGpAUdBdEFwgXnBHwDbAJtAQ0B/ACR/9L9Ev3S/DT85/vj+3/7V/uO+837C/yZ/L38UfxY/Gr8iPy6/NH8y/yl+436zPqM+2r83/zV/AL9a/4jABoBeQKTA9sDxQRZBugGCQfWBzIIjggeCA8HEwf2BiUGhAQtA9oCHQL5AGT//f2S/Rz9QvyC+1L7h/uP+1b7dfsf/Lz8D/00/Yn94P0e/gX+K/03/L37Jfwn/Jn6aflV+uz7mvxt/ff9s/5FAcADTwR7BMQF8QbHB1sI6gdqB+AHVAiSB5AGiwVIBJsD4gKOAd3/af53/XT8lfv6+m76GPpJ+h37w/t5/FP9sf1Z/l//6P/c/87/of+p/pf9kPzC+oD5Ovr0+nb6Uvp3+kz7+v1ZAFMBawJVBDcGUwdmCC0JbgmyCWcJ7AjVCIEIuQc2Bk8ExgKFAUYA3P56/fL7oPrf+Rn54fhO+b/5Dvp8+m/76/x4/pD/NQDOAIcBvQFeAY4ACP9c/Z77cvky90v30PnQ+n763fpa/Nf+QAFlA3MEyAVkCIAJfAmDCXIJzQm0Cf8I1gfoBu4GqgUYA9AAF/8a/vz8Vful+bj4h/go+Of3fvi8+eH6yvv3/In+RQC7AYkCwALsAvYCYAIGARb/yvyX+qT40PWf9a354fvg+zX8HP3S/ngB1wRZBn8HzgkKCiMJNglYCeAINAiuB2MGLgXjBKEDNAHx/mz9GPyb+sT5Evky+C34Kvgt+LD5i/vI/N/9V/9CAbgC/gOeBCEE2gOFAxgCGQCW/ZH6OPh79YT0c/gD/Lj8oPzT/HD+GwEsBWUHUwd9CZcKPQm6CFAI0wemB4AHEQaxAw0DWQImACX+G/xm+rX5dvnd+A34kvgW+en4CfrB+0z9+f6LAPQBAgOVBNIFQwXMBGQE+wIWAdv+1fu2+MD2I/Tz8//4p/w1/e/8ef2q/6UCzAZiCFIIbArZCnEJPQhmB5sHYAe1BhAFigKhAecAzv5u/G/6hvkC+VP4GPjp93/4YPms+e/6+fwd/9YA0gEWA2EEqQVZBpsFvATpA9AC/wA7/jf7kfjY9vbzB/P/9xD8Mf2b/b79Xv80AowGdwhGCNIKmQv6CfAIegefBp0GtgYwBV4CFQHe/8H97PtQ+mD58Pi8+ID4Lvgj+Tr6nPr3+9H9kf9GAa0CDAQgBXQGMgdDBmQFVQRdAnkAav6u+8P4zfYv9CXy3PV9+s37Iv1F/t7+uwDOBD4HfQc+Cs8LNwocCfYHkQZGBs8GkAXEAmMBIgDX/f37yfrZ+WP5g/ny+BH46/h7+kD7efw1/l3/ogBZApEDUgTQBQMHcAaLBeAEVQN/AcP/MP1K+j/4bPZ284DyNvZQ+dX6Mv1v/oz/GgIqBXwGhAdVCgoL5QkxCXoHAAYGBiMG4ATiAsIBSgA+/ub8rfvS+tn6yvoD+kv58fnr+rP7Tv28/oH/rAAXAvICsgMIBc0FewU3BTQEGQJuAPL+rfyG+tz4Efew9IbzkfXx95P56fuP/cD+5ACoA24FhAbLCPkJogl7CUsIvAZOBnQGiwXkAyUDOwKPADn/Hv4V/br8o/yl+5/6xfpU+577jfzm/Zj+eP+hAE4BLgK1A9oEqwQfBHYDBQKeAFb/m/3w+8T6RPlk97L1z/RN9ub4svrp+yz91P5WALICkgQvBRUHOggJCNEHLweeBioGkgY+Bg0FuASCA+8BAgEnAFv/u/44/gH9Afz2++775/uE/Fr9t/1A/gz/uv/AADgC9gJ9AiMCrwGfANv/Sf9P/vj8+vu/+tX4CPjl9wT4l/kr+/z7ifx5/cr+BQAsArgDeAS0BWMGgQYLBugFCAZPBiAHhQZ4BaMErgMFAwMCTQFoAHv/tP5R/X/8R/xb/IX8q/wZ/Xb9KP4T/9H/aAAQAYQBPQEUASQBEgHRADgAKf+7/fH8WfyI+zr7IvuT+n/6evvb+wz8q/y2/SP/RgBPAXEB7wE/A8MDWwQjBb4F6wXbBdcFSwXxBNYETgRcA1wChAGuAAcAVf+F/hf+/P3K/XX9lP3//V/+uP4H/0H/Xf/s/zMA+P8wAD4A7/+a/yT/x/6D/q7+fP5J/dz86vx4/CT8OvxT/H78Zv2b/cz9hP7M/pr/BACCAHQBlgFAAtICmgI9A9ADuQPxA+sDtwOUA1ADEgNBAusB+AEpAQcBNgGsAG4AcgD3/03/tv/k/1z/cf9NAB0AD/95/+T/qf9N/9r//P+v/u/+8/7u/bH9Iv59/ef8vP3e/AT9Cf06/er9p/01/9f9GP6a/1L+2f8UAEsA1gDD/6oBkwEcAdkCOQI3AgYD/AKHAvwBtAJvAoEBmQJHAnIBXQE+AYEBlQBeAYYB+/+JABcAsv/Q/4T/8/89/z//jP9x/oT+xv9I/r79fv9J/n39+v5R/s79QP70/Wb+3P1t/qn+j/61/gf/9P9x/hkAsgD0/3kBrgBMAUIBHgGyAVgB1wEIArIBfAE9AXsAswDWABUAJgEuAT4AHQDf/48Axf9rAHwA7/+CADn/EQE3AJb+bQHx/hP/FwFX/rP/XgDI/av/Mf4M/n8Azvxg/6T/ff3M/0L/JP9//2n/XQCKAAIBaAHhAaMBmAGGAYcAmgHTABwB6QBVAGcAXQDJ/0sA6gBc/1MABQBZ/yoAwv85/6j/GP+M/t7/C/9v/+T/nv5VAJv+mP6X//H+Kv/O/hf/Nf/X/rf+Xf/O/vH+nv9uANP/AAAIAXYA2gAAAhABwwGIAeYBKALQAPwCDgGLAQYCgQDsAQ8A2gAuAX//YgFV/xQAbwDP/nAA5P7D/5n/GP8mAJv+sv+n/lv+k/8Z/hIAUv7I/qT/Df5t/3r+q/4U/77+0/6k/s3+9v4U/6z/2f/r//z/lQAOAf4AHAFTAekAUAH3AG4BdgFzAL8BqAC7AE0BrQAgAZkAygB4ACMA/wCf/6kABAHk/0EAxv9sAMH/O/+s//f+ef+B/w//Xf+y/hH/Cv/R/sf/G/84/9z/1/5M/wz/D/9Q/2D/KAALABYASQCQADAA5wC4ADwADAEqAFwAfABJALYAvv93ALcAUwBPAM8ArgAhAJwB5f+cAHoBm//mAHcACAAaALT/+f/D/8/+gf/r/4H+mP9s/8D+kf+Z/8n/Vf/E/xUAOf+M/xoAYP8x/40AIv+l/7AAV/+dANv/agB8AK//hACR/xgAo/9YABkAuf8cAXb/2QDlAF4AoAGvAD8BFQEEAakAuABuAMv/HQBj/2z/wv46/wv/9P4m/6f+t/9U/4P/GQDM/93/BABKAOL/FwA3ABEABwAgAFcAcv+pACkAvv+FAML/hwCm/+X/sgCc//n/MwAvAGwAHwC0AB8AeAC+AIwAkgCUALEAgwCuAHgAsQCg/x4A5f9o/3X/of/+/iv/Wv+y/jb/Zv9v/+//EgBB/0oAtP/l/9T/MgAqAOr/RQAeAOr/3P+QACn/cAC0/2H/RQDd/r7/k/9f/wIA0P85AHUAQADnAJwAHgG7ADoBNQFTAKABNQD8AHwA7/9mAM3/EgAz/5X/av/x/l3/W//l/sn/AwBr/y8ABAARAFEAAgA6AFQAQQCl/3EA1P+z/08Apf/D/7T/6P8L/2MADf9y/30AcP6ZAN//qv8eASQAvACiAJsASwFmAN0A+wDBAAEAywA4AOn/EgCu/97/Of88ALv+KACI/yD/GwBL/+//vv/D/x0A1//j/87/l/8uAP3+UQAh/8f/gv8Q//f/df42APj+g/+Y/77/DACq/8QAKwCGAAkBHwG+ACMBNwHOAFEB2wCsAD4BbgBdAKQA5v8tAPH/NQCC/6b/8P8X/wEAEv/z//n/Jf9WANr/kf8oALf/9v/a/7f/DgBr/yEANP+p/7L/G/+6/8v+sf+T/0P/EQBG//P/SwBS/60ANQAxAH8BmAC3ABcBcADhAPYARAAgAV8AUwDOAIb/KQBnAKL/vv9VAIz/tf/J/yP/NgCE/6v/5P/E/5P/iv/N/zj/x/8a/5L/Sf8X/4n/u/5S/wP/ZP+C/9z+DABg/7//fwCE/5QAQQC1AK0AegAiAbcAwQDtANYAaAD6AKIAfAC+AOcAMgDNAO8A8v8lATkAxQCAAFAAMgFo/9IAKAD1/3YAdf8gAEL/DQDv/oX/af9u/uf/Vf52/6r+0f6c/4n+x/8L/3X/pv+0/97/n/9ZAN3/VgAFAFEAbQDj/6cAAgBKACcAZADy/08AdADu/5YAJgBmAE8AtgBAAE0ApgBjAD4AVwBBAPn/IgDo/9P/y/9C/6X/vP/A/oD/b/8C/0r/q/8B/1P/LAAl/7f/GgCP/yQA5f8sANz/OQCNAK7/ugDN/30ASAAeAPcAkf9IATwALQDfACUAKAHK/0oBmABdAN4AZQDwAOL/jgEGAJsAAwGB/yIBgf9OACEAc/9lACX/w/9i/5D/I/+q/0n/gv8BAOD+fQBK/8H/BgCb/wAAKP9mAE//v/+q/wj/dwBd/uj/1v+u/jwAY/+V/xAAQ/8xABEAkf9tABoAagAeAK0AlgBJAAoBrQCMAKoABwFCAHUAyQDb/y4ANQDO/6r//v9c/63/+/8U/wAAyf/g/+//rv/CAGH/TwA0ANH/MQCL/1sAcf+t/4//0v9i/2r/AgAw/6f/GACH/8z/PADi/2AAPQBsALYAeQAFAcoA+gAnARABEgH4AFABVAAeAcQA5P+5AKT/NgBu/6j/KQCF/kMA0P8T/0MAIACo/y0AWwDs/xcAHgDq/47/7/+B//3+i/82/53+G/8b/5r+6P4w//v+3f6J/4r/TP/e/xQAEgDn/xEBNQAmAJ0BCwDqAPkAnwCrAKMAGwGQ/zEBSgC2/9IApP+XAGv/fgAcAHP//QB5/+gABACTALcAiv+NAW//2AARAOH/1wDk/qEA6/7h/6//x/7n/8/+h/8c/wH/MP+Z/wH/Tf8HABb/JADm/7D/dQBuAAwAfwDhACcAvACEAGEA1gAYAKYAWwA4AHUAHgBFAAEAlADE/0kAnAB8/9EAAQD1/5YAvP/7AFX/JACUAAj/gAB8/8f/mf/E/5f/vP7I/23/xv56/1v/3f6N/w3/1v8x/67/UwAW/6oA2P9vAHsANwBUAYn/KQGzAIn/lgH2/1gA5AD4/3YAOwBrAP//IACRAPT/+/9CAC4Aov/k/7AAEP8sACsAJf9MAHj/NgA9//T/4/91/04A8/65ABr/2f8xAFX/hQDu/qkAbf8vAN//cf+eAC7/1gAr/5sAqf/G/8QAJP9HAdD+5AAsAN//HQEB/4UBcP+BAH0A+f8FAcn+SwHD/+P/egBN/1gAXf90AHD/7v89AFD/TQDt/1YAWf9WADgAo/9aANT/LACD/0wA0f+h/7j/zf+Q/4D/BABi/8j/r/+q/5T/+//W/0r/EgDy/+n/XwABAFYAaQB9AEAARgBdAHQA/v9lAMMAr/98AGUAYv8/ACcAlP8VAO//KwC9/2MANQCX/z0ANwDD/yAASQCn/z0A7f/u/yEAdf/q/woAWv8mAIv/5P8AAF7/HgC5/yEAZ/8aABEAmP9gANz/CgA2AEoALgBZAE4ApAALALsAYwBAALUAu/8MAYH/dwCEAEP/9gCo/zEAWACr/7MAsf9yAOT/7f9+AKL/JwCZ/z4Aj/8LAID/wP87AM/+awCH/3H/CABI/wsAUf/8/3j/gf8aAPr+YAA7/8b/RABC/ykADwDz/xIAIAACAHIA7/8GAFwAIQAMAMj/igCw/+b/WQDZ//L///9BANb/PADx/2MAEQDg/9EAnf9dAHcA3P80AE4AGwCk/7kAev/7/2AAVv83APT/sP8IACgAYP+eAND/zv93AN3/egCX/9IA2/8DAMsAX//VALX/OAB+AIT/dAAsABEA5v92ABgAxf+qALX/8f+aAEj/rQCa/6X/xgDp/mgArf+M/wUArv+z/2D/PgBZ/+z/lv/K//r/fP9zABH/VwAYAB//qACB/7b/NADh/5L/KQDT/8z/VgBv/1EA7//q/z0A0f8eACQABgD9/zUALwCs/50A3v/r/3UAqv9tAKb/XwDh/8//ZQCn/yUAv/9HAJ//AQBaADT/ogCz/+7/HwC2/4UApv9yAND/VgBRAOT/wQDa/60AVQAaAMgAOwB9AF4AfQCLAB4AiACQACkAVgCZAMz/ogArAPz/owCw/6wA5//c/7sAsf9LABkA7v9DAMj/XABe/xMA3/9X/wEAHP+l/0r/Mf9B/9b+Uf8P/7b+Jf/R/gH/1P7p/kj/s/45/zT/J/9V/4//kv96/9b/2f8WAMv/YQBFABMA1gBEALwAkwCDAPcAlwDsAJQAFgG1AIIAEwFQAAEBcgCGAMMAHgCUAEUATwBDACkASQD9/y0AIQAEADkAIAABADkAIgDL/5cAvv/c/5gAav8iAAUAhv87AH7/oP8sAEj/y//Q/3n/8P+U/7r/5v+C/yEAu/+n/28Adf8ZAD8AgP9bABIAsP9hAPP/z/96AMT/7P+QAKP/KgBiAJL/dgAfAM//eQD8/yQAWAD5/2IAOAAuAGAAGgB5AAIAEACWAKf/HABOAJ7/+f8CAND/qf///7j/yP/x/47//f+Z/7H/4v+J/7f/xv+O/4j/n/+C/8j/dP+c/9P/cv/A/5//vP/C/5//0P+U/6D/vf+j/8H/vv/x/9X/6v9BAAoANgBqAIgATABmANkAYgB5AOYAfACfAM4ApQCiALsApgCxANkAbAC/ANMAPwBqAL8AIwARAIAADwDq/wUAHQDJ/6X/CQCr/37/tf+U/3D/XP9z/1H/G/9J/0H/Bf83/z3/KP88/1L/V/9c/33/b/+j/4T/j//S/63/sP/O/wMA3P/X/0wAOAApAHYAhwCEAJMAzgDZANEAAAEfAfYACwEuARYBEQEwATsBIwFEAVQBUgFkAW0BdgF0AWkBVQEzARoB1QCgAHoANADz/8T/g/9C/yL/4/7E/rL+af5R/h3+9f3j/ar90/2i/Y/9j/1l/WP9Rv1c/UP9Nv01/Rv9Jv1E/Yb93v08/or+9/46/3z/CwBQANMAWgGwASICeQLdAi8DpQP/A2AEsQTGBOAEugSHBEAE8wOjAzQD6wJxAvUBmQEOAbUAXQAcAPL/uP+k/3//Vv9E/zP/MP8f/y7/L/8L/wr/5v7E/qj+g/5u/kf+Hv4D/tH9pv2L/Xn9Xv1Y/XD9UP1j/Wv9X/1p/Y/9pv2u/fX96/38/SD+Gv5n/qj+8P5l/4r/y//p/xMAUQCQABQBcwHnAUgCqwLwAmoD1wNTBNEEEQVUBTgFBgXABFoE6QOOAwwDdgLnAT0BmQAKAJ3/WP8l/xX/EP8B//n+9P79/un+B/8J/wb/EP/p/u/+uf6r/qL+eP5+/mH+Uf4q/gT+7f26/aH9iv2G/Xb9fP2W/aH97v0m/nX+1f4T/1j/a/9+/5f/kv+d/7H/mv+D/1H/Kv8M/wf/Qf9+/6z/4v8AABMAWgCfACYBnQEQAn0CvgL6AksDtQMSBHwExQTZBLwEfgQCBI8D/gJqAtQBJQFzALz/Kv+Q/j3+Ef7//RH+Of5W/ov+w/7z/jz/W/+I/5L/kP+P/4D/d/9Z/0X/B//R/qH+Yv49/hD+9/3J/a79iv1b/UX9HP0g/Rz9Lv1f/Wz9qf3B/ef98f31/Qj+Gv6j/gj/lf/r/9v/8f/z/2AADQHJAYIC3gIvA1QDnwMwBLsEgwUEBmEGbgYhBr8FFAWQBOYDSgOyAtIBCQEUADn/j/4Z/vD96v3//fj9AP7u/Q3+Tv6e/gn/Qv9i/2L/Uv9H/1X/Uv9O/zD/8P61/nH+OP4f/v/9/P30/fL9/v36/R7+MP5q/qz+9/5Q/5H/yf/y/wwAJwBBAEoATAAvAAUAwP+R/0z/EP/c/n/+Vv4x/j/+fP64/vb+Ev9A/2v/2P9tAPwApQEFAmACpQICA2kD5wNQBI0EuQSXBG8EJgTHA14D3QJQAqQBAgFiALf/UP/Q/o7+cP5X/nn+lf7I/ub+Iv9J/4H/2v8JAEAAZABGAEUAJwARABwA/P/z/6f/Z/8C/7H+ff4//iH+6/22/XT9Pv0F/ez85/ze/OX86fza/Nv80vzg/B79bP3C/TT+kf7d/i7/cv/t/4cAKgHSAXMC5gJEA4wD9QN4BPsEdQXDBd4FuAVWBeQEgwT9A3gD0gIOAjkBWQCD//3+n/5e/mX+aP6K/rH+x/4D/0H/av+n/7v/0P/E/5z/iv9X/zv/Av/X/qb+bv5G/hL+E/7y/f/9CP4a/kP+V/6S/rT+/P4v/1v/lv+o/8z/4f/q/wIAAQD///T/4//A/5r/a/85/xf/5/7Y/r7+rf6Z/of+jf6c/tT+DP9G/2//fv+b/8z/LACuADgBuQEEAlMCgwLTAkQDowMcBD0EQAQLBKYDTAPXAoECEwKlARUBdQDY/zz/0/58/mP+WP5Z/nD+eP6r/sz+Hv94/8n/LwBQAJoArgC6ANUAwQDUALgAkgBqABwAx/9r/xv/4P60/nz+Sf4B/qP9X/0h/SD9Gv0W/RP97vzi/MH83fzg/Ov88Pzw/ET9jf0Q/of+0P4G/zX/h/8nANUAewEXAmsCxwIhA6ADTwTjBFcFkQWeBXQFSQXvBJcEJwRuA88C7AEeAVgAn/8v/9v+rf6X/oX+df6H/qX+7v5N/5//1v/p/83/p/94/1v/QP8n//r+s/57/jf+I/4Q/hL+Ev4V/iX+Lv5P/mr+jP64/uv+K/+B/9H/IwBnAJwAyQD2AAoBFAECAc0AmQBLAA8Ax/+O/0z/FP/m/rn+rP6c/qL+nv6h/pv+nf6z/sv+AP8p/1j/gf+r/+D/JAB5AMsAKQFxAbIB/AE2AoICygIKAzIDSAM0AwYD4wKZAmkCLgLdAY8BJAHAAFQABQC0/33/Yf9E/0P/SP9Z/3L/k//A//z/SgCUAOAAHQEmARkB9gDCAJ0AcgBJABQAyP9f//j+oP5U/in+EP76/dv9r/10/Uv9I/0F/ff8+/z2/PL87vza/Nb8vvzO/Pb8Rv2g/Qr+jP7V/iz/cf/W/2cA8wCgAUACzwI7A5gDDwSGBAYFdwXDBewFxAV4BQgFhwT2A0gDnQLRAQoBNQCE//n+mv54/nH+lv7A/vD+LP9p/7r/DwBWAJsApwCZAFIA//+c/0P/+/6w/oT+Ov4C/rn9hv1l/Vb9c/2X/dT9DP4+/oD+tv4Q/27/3v9bALoAJQFgAZ8BuQHHAdEBtwGjAWsBNAHsAJcASwDx/7H/cP9H/zL/IP8j/xf/J/8s/0f/Xf91/4f/g/+C/2P/Xv8+/z7/Mv8x/zf/Nf9J/03/bP9//6H/yP/y/ygAWQCSAMYA9gArAVYBhwGyAdMB+AEFAhUCFAIQAv0B5AHAAZABXwEjAfIAxQCfAIAAawBWAE8AUABoAIkAwQDxABgBLQEhARUB9ADhAMAAqgB8AD0A+P+T/0H/4/6b/lP+Df7A/V/9B/2U/Dn86fuv+5r7mfuo+8H75/sN/Ez8rPwh/bT9VP7f/mP/0f8zAKIAHAGgATUCxQI7A6QD9wM+BIIEvgTpBAAF7gSyBFkE1QNBA50C9wFRAbIAIQCY/zP/3P6s/pb+nP64/uz+Mf98/8v/CwA6AE8AUQA2ABEA3f+g/1n/Af+q/kf+7/2b/Vf9LP0M/Qb9CP0V/Tb9Vv2R/c39If6A/ub+Yf/L/0kAsAAZAW0BrgHjAfoBDQIHAv0B4AHAAZYBaQE7AQkB3ACtAIYAVAAsAPj/zv+c/3L/VP8s/yT/D/8W/xf/IP80/zv/VP9b/23/cf9w/23/X/9U/z3/Lf8e/xb/Gv8o/0L/aP+W/8b/AgA5AHoAuQDzADMBXQGSAbQB1QH0AQYCHwIeAiICDQLyAcoBkQFXAQUBvABeABEAxP9+/1X/Mv8t/zn/V/+K/8H//P88AGgAgACOAI0AgwBxAEMAGgDk/5z/Zf8Y/+T+qP5r/jj+8/2w/WH9D/26/HT8L/wB/N37u/uz+6P7svvL+x782fyP/Yf+b/+9/xkAwQCtAdUCngPVA+0D2wMABIYE6QQbBcYEcwR7BDIEwANkA/oClgIqAlcBhwD7/4b/fv+M/1//4f7h/qf/JABhAIEAnwDxAFkBRwHgAKsARgD7/7H/4/7n/Uv9M/0F/aL8Rvwc/EX8hfyg/LP8E/1y/dX9V/6e/t7+av8MAGMAsQD7AE0BtQHkAd4B2gHiAbAB0AFhAkACWgHbAEcByQG7ARIBmQDeADIBTwEdAcAAuwArAXkBHAG+AMYAyABrAP3/3v/X/5T//f6e/qn+uf6J/hv+/f08/kD+AP7i/Qn+I/7u/aP9t/0A/gX+6v3b/ev9af7h/uv+CP+E//7/XgC9AAkBewHJAdYBBgJVArEC7QLkAqQCvwIMA+4CuwKKAlwCOgL7Aa4BhwFZAe8AoABjAEYASwAGAL//vP+6/7L/yv/A/77/5//7/xAAHQBOAGgAEgAfAJUAegAIAM//tf+p/2T/D/8D/9/+nv6B/nL+RP4w/jv+EP7j/eT9u/1n/VX9Qv0J/dv8z/zm/Nn81vym/Mz8+v2k/sj+a/97/xsADwKcAk8CsQINA/gDcgQLBH0EmgQYBGIEhAREBBYEYQPRAlgCwwGWAagAdP9C//7+dv5b/mr+dP58/uv+gP++/yoArwDPANEAGAF7AWQB0wCBAFQAxf9V/wD/Zf7g/Wb9C/3f/L/8x/za/Nf8/PxZ/bj9Ev55/vX+ZP/H/zYAnQABAUcBeQGiAcQB2AG7Aa4BkgFTARABHwGpAVYBLwBLAHsBjAGnAF0A4wBZARMBzgAEASkBEAHfANIA+wALAaUAKABBAJsAEAAv/0r/av/a/mf+W/42/t/9xv3N/ZD9a/2a/ZD9Vv0h/Tr9df0m/QP9XP1m/Uz9gf3X/Sv+Yf6f/vv+S//P/04AlQAHAWYB0QFZApECDwObA4cDswMkBEEEUQQ3BDYEPQTOA5wDfAMDA6sCTALMAWgBAgGyAFUA5v/P/6j/Sf9M/1P/I/8i/0//af9F/yT/g/+2/1X/Wf+U/0P/6P77/gX/nP47/iz+If77/dr9vv22/bP9xP3w/dz92P3//f/9IP5W/kv+Jf4e/jn+Kf7n/fH9Lf7+/ef99f3D/Wv+z/9TAK3/Lf+1APkC6AIDAngCUQMJBCsECQSKBIoEKgQbBAYEdQRJBK8C8gE0AtsB5wC+/x7/xf4e/tj9w/1u/ZL97P3y/T3+Nf8eADcAOAD9AMEB1QG2AaMBeAEyAcoAVgDr/3P/5f4//tj91/2U/Rn9/vwb/Sj9Ov15/bL97/1m/tH+QP/J/ygApAAFAWQBQwJiAsMBLQIGA98CMQLaAfwB2QFZAf0AsgBpADMA9P+//8L/8P+9/1f/j//2/83/jf+f/6X/kv+c/7//k/9U/5L/sv9M/zP/Yf8p/6/+ev5+/kD+xf2M/Yr9Z/1H/Ur9Yv12/bf9/P0c/o7+G/9N/6b/PADBACABeQEVAoYCnwLrAlgDhQOUA44DiQN5A04DIAPQAn0CUALwAWkBEwHdAIoAJADw/+n/yv+c/6z/4v8SAD8AbwCTANcAOgFaAXIBTwEOAVwBVwHiAIkACgDg/6b/Bv+r/kH+yf2Y/Tr9Bv3w/JL8ZPxd/Ib8tvx7/Gr8ovy9/Nj80Pzd/CX9Yv18/VX9JP3A/en+fP93/yz/e/8bAWQCTQIfAooCZAMaBFAEogQCBeME3wTsBNgEDAW0BJ8D3QKiAmACiwFgAKz/R/+4/jL+6v3n/RL+Ef7w/VT+Sf8CAAYA8v9rABEBPgELAeQArABRAOX/d/8Z/7r+M/6K/RX9Bf35/LT8evxy/KP86/wV/VT93/1g/ub+cv/7/68AUQE1At8ChwLeAuYD6AOKAzcD+ALyAnoC/gGSAd0AxACSAOD/5/8HAKn/YP9i/7z/ov9G/4X/d/9A/5H/lf9K/1L/nP+r/y3/Gf9w/z//xf5v/kj+Kf7a/YH9FP3l/Bv96/yl/Ob8Qv11/bP9VP4S/3D/9v/RAF4B/QHBAhsDXQPKAzAEXwQuBCgEGQS1A4YDNAOdAjgC0gFGAacANAD8/33/Bv/l/sD+n/65/vT+KP9q/9P/TgCtAAMBdAHAAcsBwAG0Ac4BrAEiAZQALAD7/6T/+/5a/uD9jf0u/bz8XPwh/Pj7v/ux+8f77vsl/DX8X/yl/PD8Tf2B/c39MP5x/mP+U/5g/9QAGgFqAA8AOAHWAkUDBAPRAjEDDgR1BGgEpwQYBT8FxgQ8BGYEsAREBFkDSgKbAW4B6gDX/9/+Yf4h/pL9D/0q/XL9bv1W/a79af4l/6r/1f8OAIsAAQEwAfUAwgCYADIAw/9K/9j+hP4Y/qn9av1V/Vn9VP1I/XX9x/0B/lD+of5Q/0QAWQB4AFkBDgKuAuEC3gI+A0sDYgNdA8MCpgKEAtcBSwG3AFQA+/9M/+/+kP4j/iL+5/2z/dT9+f0S/vv9Lv6j/rH+vv75/h7/Ov9O/2z/XP81/0j/N//8/vf+6/69/qT+uf7S/sv+Dv+M/6P/xv9NAMsAOwGkAQ8CZgKhAg0DTwM6A1YDZgMkA9AChgI2AroBVQEPAYMA///I/5H/WP9C/0n/Rf85/3L/u//F//X/UwB0AHwAlQCrALAAnACSAIIAPwAAAM3/mP92/0v/Ef/m/tP+0/7N/sr+4v70/gD/Hf8z/03/Zf9s/3f/d/9z/2v/U/9B/y//Ef/6/u7+1/62/qL+mP6k/rP+uP7F/sb+yv7q/hj/Wf+K/5H/oP/K/w0AVgCOAMYA/wA5AX0BvAH7AUsCngLPAtIC3ALgAsoCrAJxAicC2AF3AQ8BlgA9ABIA3v+i/2//Sv86/zr/UP90/4f/jf+U/5f/o/+7/8L/p/+P/4H/aP9G/yz/FP/1/tb+vP6j/pb+mv6g/qH+s/7a/v/+G/9B/3P/p//P/+3/EAAuAEwAYQBlAGwAdwCGAJAAlgCeAKUAswDDAMwA0QDYANwA3ADZANEAygDGAMIArwCbAJkAlwCKAHUAZgBbAFIAQgAhAAsAAADn/8H/nf+O/4n/cf9V/0X/Qf9J/0L/Lv8t/zr/RP89/zX/Qv9N/1H/WP9f/2r/d/+C/4P/gv+Q/57/n/+n/7H/t//C/9X/8P8HABoANQBSAG8AiwClALgAyQDaAOAA4QDhANgAywC3AKEAjgB0AFsASgAxABkABgD3/+r/3f/S/8r/wf+//7z/t/+5/7v/uv+4/7n/u/+8/7z/uf+1/7X/s/+v/63/sP+0/7j/vv/M/9r/6f/8/xAAJQA4AE8AZAB0AIAAkACdAKQAqQCsAK4AsQCzALAAqgClAJ8AmQCPAH8AbwBdAEwAMwAXAPv/4f/I/63/k/97/2r/Wf9L/0D/PP8+/z//Qv9J/1D/Wv9h/2r/cf92/3n/fP+A/4L/gv+F/4r/jv+U/5//qf+1/8X/1f/m//f/CwAcACoAOABHAFQAXgBqAHMAegB9AIEAgwCBAH4AfQB4AHUAcgBuAG4AcABxAHMAdgB6AH0AfAB8AHQAbABmAFcARQA0ACEADAD1/93/zP+4/63/p/+g/57/oP+s/7H/t//A/8n/0P/S/9P/2f/b/9j/1P/U/9r/1//W/9b/1//X/9P/3P/e/9X/1f/W/9z/3f/b/97/3v/k//L/+P/5/wYAEgAXACYANQA9AE4AXABlAF8AWgCPAJ4AcwBuAHEAbgBhAEYAPwA1ACcAHgACAO3/7v/i/9D/wP+1/7b/sP+u/7P/rf+t/7v/vP+4/8P/0f/P/8//3v/Z/83/0f/X/9X/0P/N/87/0P/R/9b/2P/i//L/9//+/wcAEQAYAB4AJQAnACEAHwAeABsAGgAbAB8AIAAiACYAKgAuADQAMwA6ADoAMgA2ADMALwAuACcAKAAjABoAHAAXABAAEwAMAAsACAABAAEA9//0//H/6//k/93/2P/U/9X/1f/T/9T/2f/Z/9b/2P/e/+L/5P/o/+b/6f/q/+3/7f/u//n//v/9/wMACgATABwAJwAyADgAQQBNAE4AUgBNAFYAWgBVAFEAQABCAEQAOgAzAEEARQBCAEkAQwAwADMALwAGAPX/0v/A/8v/zf/l/+j/EQCwAL8AUgCGAIsAAwCK//z+Vf71/aj9Lf3X/PD8LP0k/Tn9gP3X/RH+Uf6+/iT/ff/6/5MA/QBVAeUBXwKIAssCAAO8AmACYwI+AuQBlgFQASkB9gDCAJ8AawB9ALoAogBxAGcAeAB7AIAAlwCLAIUAiABRAA8A9P8AAAoA9v/9/wAA0P+u/7r/1P/q/+b/zP+z/5L/cf9U/yT/Fv8l/xP/1/6D/lj+Wf5R/k7+N/4A/tn90P3D/bP9xv3Q/b/9yP28/YH9Nf0H/fr8xfzs/SYBPwPyAk4CWQIVAwkEyASJBdUFzAXtBQUFHANlAhwDjwP/AsABUgAG/1X+f/59/un95f1b/jP+5P0r/sf+ov+xAEUB3gA0AIUAhQHaAbQBqAE8AXcA7v+Q/xX/qv6v/sD+LP50/UL9Yf2h/RH+qv4p/3f/2f88AJ4AEgE+ASgBCQH8AAAB5ADBAJoAJgB6/9f+dP4z/tH9bf0I/YD8L/w6/Fb8Yvxp/F38QvxD/If8vPyI/A78zfs//QgC+gdrCigJtQedB1sICgkFCXoIlgc2BvEDQQCo/OT7eP1u/pj9wvse+qr5sfo5/Cz97P07/6sAcwG9AUwCVQOGBDAFQwT6AWwAogAnAacAS//P/db8oPzQ/PH8M/0A/jj/EgAmAB8A0QAZAikDpANaA1ECdAEyAcgA//9s/w3/q/4b/lX9yvzT/Fb93/3l/X39Lf1U/cP9//3Q/aL9nf2U/YD9Fv1w/OT7l/uz+wX88v3sAuEHZAlyCDsHpQYDB5QHggfSBnsFvAN5AU3+tvtj+4/8mP3P/T79ifx8/F/90P7W/5MAqgGlAi8DSQP6AqwCagLUAbMARP9J/ir+IP6H/a/8I/w3/LD8mf41AoAD8AE/AT0BYwE7AksCewHiAIIAAwDA/kj9Fv0+/ob/h/9p/tf95v3//Vn+R/6u/c79Yv5B/qr9UP1T/Zn9gP2c/HP7m/p5+v754/ma/68J2A7UDSgLpwjnBy0IXgdBBgkFeQPnAUH+8viQ9mT4Bfz7/tX/SP+h/nn+fv/MAFoBPQKgA9wE1QT/AnEB+wDbAFIAyP4D/Ub8zPxL/VP9cP25/c7+JAD3AD4BygCjAFcB7QHpATwBRwCw/y//r/5d/qf9+fzu/Ab94vzU/Cz9x/1O/oL+nf7H/jb/z/8FAPX/pv8B/1H+e/1q/FX7lfol+r/5Sflf+Mr6uAS8Dl0RFxBsDSgKtAgWB1AEmgLXAWwB9/9m+5X23vX3+Dz9RwAVAckAqAAYAesBMwLgAe8BxgKcAwIDEAH9/xkAtP9z/qn8aPsE/Kb9/P63/8P/wf8LAEwAnADKAM8AfwEwApgBUwB3/yb/Nv+L/67/I/8e/lL97/yW/Eb8a/ws/RD+j/6Z/of+rv7d/gH/Ff/0/o3+yf0G/Rb8q/qh+Vv5VPnA+Cj7RQX+DxUTFxIzD3gKQAeMBJsBogB3AGMApP8C/OL36/ZK+Xr96QBEAsEC9wK2AngCyAEYALT+GP/aAMgBYAE5AQwBbP9G/eH7dfvL/Hn/hAEOAk0BFwBb/y7/uP/bAJcBHAJ6AnEB3P9L/z//cv/7/xUAXv80/hT9afw6/Hb8Hf0E/uf+U/8W/9n+Hf9Q/wL/pP5B/or9r/zE+6z6pPn7+Pb4wfhY+DD9Lwm+EsUUGhOWDhQJOAVBAVT+Mf7Q/pD/1f5h+5f4wfh++6j/iAKfA78EbwXHBFgDCAF9/lz9Zv4AAUwCwAHiAaEBdP82/cT7hPs+/dj/tQH2Ab0AoP88/9n+Af/a/7gA0gE/AiIB1v8q/+L+7/7Y/nH+tf3b/HD8Qfwl/I/8df2Y/tD/ngDEAKgAogCDAOD/zP65/cr8Hfy3+w/7Efox+a34yfin+M36iwRWEEgVcBVjEu8LaAa0AS/90/t5/KT93v72/Qf8Rvuj+5L9OQAwAk0EIwbRBYADcQDZ/Wn8ifyb/uAA0AHhAsoDVQIl/2D8i/vr/Bv/8wDlAWcBmQBKAMX/QP9A/7j/5AAWAv4B9wD1//L+Iv7H/Yz9ev2E/Yv9fP0+/Rv9aP0a/gn/8/9wAEYA1/9S/3b+jf0I/RX9ff16/YP8IfuT+V/4CPhw+Ib9CglgEhYVhxQsEGsJxgNf/p36Yvrv+0j+lP99/hH9Rfwh/K/9MgCkAiEFUgblBMIBrf7C/Ar8yfwj/0kB1QKYBJoEpAHq/ZP7fvsH/ab+7P+5AM0A5AC/AJr/7f+9An0E4wSdBMsBp/4S/WP7sPqi+wb9W/6C/vb9s/1U/cb9Mf/p/xcAVgDp/7v+tv1i/Zr9F/7r/rD/Tf8p/v38+vqE+Dn3Yvc3+Mj7NAV1D/ETXRSCESYLCAWz/yj74Pkp+179af+a/7P+E/61/Yn+iABFAuwDRQWxBGUCDQBz/mn9GP0+/jEAzwFTAwgEVQIh/4j8j/sF/Or8Gv65//oAvAHmAdEAbP/r/mX/xwAeAh8CTAGWALT/lP7M/Zj94f1A/lX+Of7f/aP9Fv6+/jf/wf8dAC4ARQBdAE8AKADx/5T/Gf+A/rL9wPzT+xX7q/px+mb6qfos+wb8F/40A2EKFA8rENsOuQpuBbwAd/zy+cT59/oe/eD+eP/R/wIANQD6AKAB+gFzAnoC/gFKAU8Ad//r/vb+IACIAY0CSwPTAvUAEP/q/XP9Zv2r/XX+cf8NAFoAPQDh/5b/Lf/x/ywCUQNTAxYDvQEIAE3+Zfx5+0/79Ptx/UT+7v7S//f/8//8/6n/Qf/2/iT/a/9f/33/Yv+5/iz+0f2B/Uj9Av2V/O77fPuj+xD8qfwq/iACMgj5DI8OfA3SCboEJgBu/Bf6CvrU+4z+CQEZAu4BVwGfAEkAbwCTALcA7wAQAfQAgwAAAK3/0v/bADsCGgNDA3cC1ADo/kT9l/yz/CL9Of6n/54AHgE7AdwAMwDA/5//jP+n/wAATQBLAN//f/9n/yf/zv6r/n/+Yf60/g3/D/8k/1r/d/+g/8P/zf/a/9H/2v8GAAAA0P+A//L+fP5B/hH+Dv7o/Vb9C/0i/Sj9Wf14/W39A/56/xQCvAWpCNcJkAmnB4IENAEt/h78vvux/E3+9f/xAF8BkQGiAdoB5wGeAYkBngGYAXIBCgF9AAkA3f8PAEMAcgCuAIcA6v8v/2r+8P38/Tz+wv6E//X/MQA/AM3/WP8j/x7/fP+8/8j/3/9oALUCUgWBBQQEVQGL/cL6d/lX+cL6J/0rAKMCCgM7AggBzf+I/9T/zf91/wf/v/4j/iX9m/yJ/NL80f29/pz+uf1v/NP65Plc+jH8dADIBuALDg5YDakJlATQ/wf8Ffow+v/71/4LAQQCZgIqApUBRAEvASsBGgH8AP8AtAAPAMj/6f9wAI4BjwLJAmYCYwHk/2/+Vf3X/PX8ef1x/qj/hADNAKgAOgDi/77/hP9K/1r/mf/j/yAAIwAHAOX/t/+K/4H/nf+h/4b/dv9a/yz/JP9L/6H/KwDJAEkBcwFHAcsAHQCN/xz/vv6E/lH+Mv4e/gD+Df4o/kX+u/5Q/7r/8//P/4v/bP89/zX/VP9J/3//7f9RACUBKgLMAkUDbwMpA8ECIQJvAewAhABHABAA5v8GACkAYQDKAPMA2gCYAB4A6v8YAFIAtQD8AOEAtwB2AAYA0P/Z/9b/wP+R/1r/N/8v/0X/df+w//z/ZgDPAP8AGwElAfsAOAHUAbwBBwE0AEP/w/6P/h7+2f3v/WT+IP9c/w3/zP64/sD+qv4s/pj9hf3r/Sz+2v1Q/fD8u/zC/NH8bPzL+yz75PoL/Dz/NgRtCVIMVgxICkcGzgFP/uX7VPvE/BL/RQFcAnUCjAJlAswBSwG1ADMAcAANAWUBPAHcAMIA1AAAAXkBxgGQARkBLQDi/tj9P/00/dL9wv7k/90AGgG1AOj/6P4j/rP9mf0A/rD+Yv/5/1AAdwCDAEIA3P9w/9P+X/5I/jP+Wv7e/lv/4/+AAAcBhQHRAdQBvwGLATUB5QC0AJ0AngCNADUAwf9v/1v/nv8WAIIA9gBvAasBuAGpAXkBVwFIATIBCQF7AJL/5f6E/mv+x/4j/zn/Uv9n/0f/9f5//hP+4v3z/T/+gP6d/rP+tv6l/pL+ef5y/pT+yv4E/zj/cP+o/+b/JgBMAFQAYwBnAGQAbwBrAH4A3gBQAbQBCAIrAigC7QF7ARkB0wDAAAIBTAGFAcEB3AG2AUoBzQCBAFkAaQC1APkAQgF1AWsBbwF6AW4BawFJARoBAAG1AEwA2v9n/0D/Of8V//z+3/6j/mD+F/7f/dD93f35/Qb+5/23/Yf9RP0C/dT8o/x0/Hb8tfwT/WT9iP1j/fP8UvwV/CD9uP+EA30H9gk8CsAI3AVZAkT/Nf3H/OP9rP9sAXcCxALQAngC2AFMAa4AVQCFANcAHwE2ASUBZwHkAV4CzAKGAnsBOwDF/m/9qPxY/Kf8pP3O/sb/UQBeACQAvP8///r+Af9G/8D/BwDd/7H/tP+x/7b/pP87/6r+BP55/VX9cP3o/ef+1P+WAGUB2QHtAe0B3wG5ASMBPwCQ/+r+h/6g/q3+0v5c/wIAnQDsAPIALAF2AYkBmQFlAQUB+QDrALgAtwC5ANkAPQFCAfUAqwAMAFn/yv4s/gD+kP6G/58AZwGaAXQB/ABAAHj/r/4f/gn+I/4g/gf+9v0u/qb+J/+T/7f/qP+2/7n/ov+a/4//lv/J//H/BQANAAcAEgAZAPT/xv+U/2L/Xv+D/6//5P8ZAEYAgACvAKcAbwAzABsAPwB+AKMAtgDPAOYA9AAHARwBNwFXAV8BRAEdAQcBCwEtAV8BbwFBAeUAZQDi/5f/gP+a/+D/MwB4AJwAcwAaANj/uv/Z/zkAlQDWAPkA4QCVABsAi/8c/+b+6/4N/xb/E/8b/wv/6v7X/uT+J/9+/7f/rP9c/wL/tf5X/vz9t/2e/an9kv1d/UD9U/2u/RT+NP5E/nf+xP4B/wz/WP9lAAwCrwOmBM0EXAR1AzkC0QCr/1b/rv9JANcABQEyAaoBLQK3Ag0D8QK7AmEC4gGWAVQBJAEuAQUBoQAaAE//qf5T/hr+G/5C/nz+C//V/4MA8QDrAJwAQQC1/yP/g/7Z/Y/9hP2f/eH9Fv6B/hv/hv/I/8P/ef9F/y//LP9V/6f/NADRAAwBJAEaAbMAXgAlANn/nf+Z/+f/NwBuAMAA6wDOAM4A5QDWANEA5QDxAP4A+QDpANwAywC8AH8AKgDx/8X/rv/V/zkAfACBAIAAXwAeAP3/CQAsAEcAhQDyAOgAVgC//yL/qf6O/p/+yf7r/vf+Hf8k/yP/df/O/yEAZgA8ANb/bv/v/ov+ef6Q/qv+z/4E/z//V/9c/4D/gv9f/3v/mP+L/7b/+P8tAFYATwAsAOv/j/9f/1P/T/97/9//kACLAVcClAJeAv0BjQElAdcAjwBzALoAJwFqAXMBcAGWAb8BsQGHAUIBzgBhAPf/iv9o/6z/NQDSADwBaAFNAdYALAB0/7P+I/7b/cb95/07/r3+Z/8CAHcAwADcANMApABpAD4AEgDr/73/Uv/P/o7+pP7q/hP/Cf/d/rj+zP4N/2L/yP80AJwA5QDZAHMA4/9V/9P+a/45/jz+Xf6n/vH+AP/8/hj/Tv+f/+n/AADa/5b/SP8O/yT/s/+WAJABUgKZAmkC7AFQAbsAUgAtAEUAdwCkAM4AFQF6AesBMQIiAuMBkwE8Ae4AkAAsAOH/u//V/wkAIgAuABsA6//K/6L/jP+3/wIATwB7AGMANgD+/73/hP8u/+r+5f7X/qz+f/5s/rn+Sv+//w0ANgBAAD4AEwC7/3X/Vv9T/2T/YP9a/3//pP+1/8P/wf/K/+f/6v/o//j/CwA1AGsAgQCJAHEAOwAcAAUAEABNAIMAqwDFAJ8AWQAcAN3/tv+r/6//7v9eAMEABgEMAdgAlwBPABoAHAArAA8Avf81/6z+Uv4v/kn+pf40/83/OgBUADcAFwAFAPb/2P+k/2//T/9A/0r/dv+0/+z/BQD2/9//1//p/woAJQA4AEoAXgBqAGgAXQBdAG0AhwCTAIUAZQBBACUAHAAqAE4AhwDEAO0A8QDOAJAASAAGAN3/1//s/xAAPwB3ALgA4QDXAJ4AUgAOAOD/tv+F/0//Mf8//23/rf/6/1AAqQDoAPIAyACDAD4AEADy/9H/p/+G/4z/0P88AJ0AxQCtAG0AJgDc/3v/C/+u/pX+w/4M/0H/T/8//yb/C//z/uH+2f7j/gT/MP9W/3P/jv+3/+f/+v/V/4D/Hf/f/sL+n/51/mv+3f70/2MBhQLwAqQC8AEgAWgA4f+2/woApgAsAVEBLgEGASMBbQGVAWUBBQG6AJ4AnwCKAHYAjQDoAE8BfAFLAdkAQQCm/xv/uv6x/gD/fv/l/wAA1P+f/37/ef9z/1f/Lv8L//v+D/8x/0r/d/++/w4AKwDy/3f/+/64/rT+3P4q/4v//P9gAJQAmACRAKUA3wAKAe4AqwBeACUAAwDR/5b/d/93/7H/BQA3AFoAegCbAMwA2QC5AJoAbwA/ABUA1/+t/6n/r//K/+b/+P8VACAACwD8//T/+f/8/9f/oP+E/4v/qv+2/5b/ff+c/+j/LgA8AAsAyv+i/5f/o/+//+D/BAAWAAUA3P+5/77/9v9DAH4AmQCSAHYAXABCAC8AJgAbABIAEAATABEABgD0//D//P8XADYAUgByAJMApQCjAIUAXQA6AB4ABADn/87/zv/r/xYAPgBRAE4AQAAqAA0A6P/I/7f/tf+s/5X/hf+K/6//6f8PABgABQDh/8P/p/+L/4T/lP+x/9f/5//f/9D/zP/l/wwAJQApACgALgA/AEgAPQAiAAcA+P/0//L/8//0//3/FAAuAEMAUwBeAGoAaABJAAYAuf+A/3b/lv/F/+f/9//+//7/8v/l/+X/BgA8AF0ATQAVAMr/lv+F/4v/ov+3/8//4P/e/9L/y//X//v/HQAvADQALgAmABkABgD3//X/9v/6//3/AAALABkAHQAeABQAGAApADUAMgAgABMAJgBGAFwAYgBdAGAAYABDAB0ACAATADoAVgBPAD0AOQBEAEgALAAKAAsAKQBHAEUAJgAOAAoACQD7/9//1P/b/9v/vv+E/z7/Df/x/vr+Ef8w/1H/nP/9//v//v////////8AAP7/AwD8/wQA/f/8/wMA9/8EAPn/AAAAAAAAAQAAAAIA/v8FAPz/BQD//wIABQD5/wYA/////wUA+/8CAAAAAAABAP//AgACAP//AwABAAIAAAAEAP7/BQD//wMA/v8FAPv/CAD7/wkA/P8EAAMA/P8IAPr/BQD+/wEA/v8EAPv/BQD+//7/BAD3/wgA9/8EAPn/AAD7//7/AAD8/wIA+/8BAAAA+v8GAPj/BQD7/wIAAAABAP//AgD+/wUA+/8LAPf/CQD3/wgA9/8KAPf/BwD3/wkA9P8NAPH/BwD8//3/AQD+//3/BgD7/wgA9/8IAPr/BwD7/wIA//8AAAAA/////wEA/f8DAPz/AQD9//7/AAD5/wAA+f8BAPv//v/+//7/AQD/////AAABAAIA/v8DAP//AwD+/wIA//8CAAAAAgD9/wMA/////wgA+P8KAP3/AwADAAAABAADAP7/AwAAAP7/AgAAAAEAAwD//wQAAgAEAAMABAAEAAMAAwD//wIA/////wAA//8BAP3/AAAAAP7/BAD9/wMA/////wIA/P8CAP7//v8BAP7///8AAP7/AgD4/wQA/P8BAAEA/P8HAPj/CAD5/wMA///+//////////3/AwD6/wYA9v8JAPv/AAACAP3/AwD//wEA/v8FAPn/CAD8/wMA/P8GAPv/BwD7/wUA/f8BAP3/BQD7/wYA+v8DAP//BAD9/wUA+v8DAP7/AgABAAIA//8AAAMA+/8IAP3/AwD9/wUA/f8IAPv/BAD+/wEABAD7/wYA+/8EAP7/BQD8/wcA/f8DAAQA/f8IAP7/AAAEAPz/AgD/////AAABAP7/AwD+/wEAAQD+/wUA+/8EAPv/BAD7/wUA/P8DAAAA/v8EAP////8CAP7/AAADAP3/BQD7/wMA/P8DAP7/AQABAP7/AwABAAAABAD+/wIAAAD//wMA/f8BAPz/AgD9//7/AAD7/wMA//8BAAEAAAACAAEAAQADAAAAAgABAP//BQD8/wQA/f8CAP3/BQD9/wMA/v8DAPv/CQD4/wYA+v8BAAAA/v///wAA/P8DAPz/BgD5/wMA+v8EAPn/BAD5/wIA/f8BAPv/AwD5/wAAAQD7/wQA/f8CAAAAAAAAAAIA/f8HAPj/CgD3/wkA/P8AAAUA/P8FAP3/AwAAAP//AwD9/wcA9v8LAPP/CQD9//3/BgD7/wAABAD6/wUAAAD8/wYA+v8CAAEA/f8IAPj/CAD7/wQABAD+/wYA/v8DAAIAAwD+/wcA+/8HAP3/BAAAAAEAAwD8/wIAAQD8/wQA/P///////v8BAP3/AAD9/wIA/f8BAP///P8BAPz/AQD+/wEA/P8DAP3//////wAA//8EAPv/BwD7/wYA+/8EAPv/AwD8/wAAAAD8/wIA/P////7//P/7////+f8FAPj/AgD9//7/AQD8/wIA/v////3/BAD7/wAAAQD8/wMA/f8DAP7/AwD8/wUA/P8EAP////8CAP7/AgD+/wEA//8AAAIAAAAAAAIAAAD//wQA/v8BAP7/AgD9/wIA+v8GAPn/CQD3/wgA+/8DAP7/AwD9/wMA//8BAAAAAwACAAIAAgABAAMAAQACAAAAAQAEAPv/BgD+/wEABgD7/wMA///9/wcA+P8IAP3/AQAFAPv/BQD4/wgA9P8HAPr///8BAP3/AAAAAP3/BQD8/wQA/v8DAAAAAAACAPv/BgD4/wUA+f8DAPz/AAD+//7/AQD8/wAA/f///wAA/P/9//v////9//7//v8AAPv/AwD9//3/BAD6/wUA+f8GAPj/BgD5/wQA/v8BAP7/AQAAAP3/BAD9/wEA//8CAP7/AgABAP//AgADAAEA/v8FAPr/AwD+/wAA/v8EAPn/BgD7/wMA//8AAAEAAwABAAAABAD+/wYA/f8GAP//AgABAAEAAgADAP//AQABAP7/AAACAP3///8BAP7/AgACAPz/BgD4/wcA+f8DAP7//v8AAPv/AgD//wAA///+//7/AQD//wEA//8BAAAA//8CAP//AwD/////BAD7/wQA/v8CAAAABAD8/wUA+/8DAAAA/P8CAPv/AwD///3/BAD6/wQA/f8AAAAA//////7//v/7/wMA9/8EAPb/BQD5/wMA///7/wcA+v8EAP7/+/8HAPn/BwD7/wQAAAAFAPz/CAD6/woA+v8GAPv/BAD4/wMA/f/8/wQA+v8EAP3/AAD+/wAA/v8DAPv/AgD8////AAACAPz/BAAAAP3/BAD//wMAAAABAAMA//8EAAIA/v8FAAAA//8FAPz/CAD6/wYA/v8CAAIA/P8CAP7/AQD+//7//v/9/wIA+/8BAP3/AwD8/wMA/f8BAAEAAAACAP//AAACAPz/AAD///7/AAD+/wAAAgD9/wQA/P8GAP3/AgD8/wMA//8BAAIA+/8GAPn/BQD7/wIA+v8BAP7//f8EAPr/BQD6/wIA/f8CAAAA/v8DAPz/BgD9/wUA/f8DAP//AAAEAPv/BwD9/wEAAAD+/wMA/v8CAP7/AQD///////8AAAAA/v8EAPn/BAD8/wAAAgD9/wQA+/8EAP3/AwAAAAMAAwD//wMA/P8EAP7/AAAEAPz/BQABAAEAAwAAAPz/CgD6/wUA//8BAAEA/v8CAP3/BAD3/wkA9/8GAPn/AwD//wEA/////wUA/v8CAP7/AwD+/wMA/P8FAPr/CAD4/wkA+P8DAP7//v8DAPv/AwD+////AgD8/wIAAQD9/wUA/f8BAAIA/f8DAPz/AgD8/wMA+f8FAPn/BQD8/wIA/v///wMA/f8DAAEA//8FAPv/BwD9/wIABAD9/wQA/f8CAP///f8EAPz/AgD8/wUA/f8DAP7/BAD8/wkA+P8IAPz/AwAAAP7/AgD+/wUA+v8IAPn/BwD9/wEAAgD8/wEA+/8CAP3/AQAAAPz/BgD8/wMAAQD8/wQAAAD//wMA/f8CAAEAAAAEAP7/BAD//wEAAAABAAAA/v8CAPv/BAD/////AgD/////BQD7/wcA+P8FAPv/BAD8/wMA+/8GAPn/AwD9/wAAAgD8/wAAAgD+/wIAAwD9/wUA/v8DAAIA//8EAP3/AwD+/wEAAgD6/wYA+f8DAP3/AgD9/wIA/P8CAP//AAACAPv/BwD7/wYA/P8EAP7/AwAAAAEAAQD+/wQA+v8HAPn/BgD+//3/BgD6/wQA/v8AAAIA/P8EAPr/BgD5/wMA/v/+/wMA+f8DAP3/AgD5/wMA+/8BAP7////7/wEA/P8BAAEA/P8DAP3/AAD//wMA/f8EAP7/AwD//wEAAwD9/wcA+f8HAPv/BgD8/wQA/P8DAP3/BAD8/wUA+/8GAPr/BAD6/wUA/f8DAP3//f8BAP7/BAD6/wQA+v8GAPr/AwD//wAAAQD//wIA//8EAP3/AQAFAPj/CgD4/wcA+////wEA/f8FAPv/AgD9/wIA//8CAPv/BQD6/wQA/v8AAAIA/f8FAPz/BAD8/wEAAQD9/wIA///+/wAAAgD7/wUA/v8CAAAAAAAAAAEABAD8/wcA/P8CAAQA+v8HAPn/AAABAP3/AgAAAP7/AQD9/wMA//8EAP3/AwAAAP7/BQD5/wUA+v8DAP3/AQD9/wEA/////wAA/P8AAP7//v8AAP//AAD///7/AQAAAP7/AQD//wAA/v8BAP3/AwD6/wMA/P8BAP3/AQD8/wIAAAD9/wUA+v8FAAAA/v8JAPj/CAD8/wQAAAAAAAEAAAD//wEAAQADAPz/AgABAP7/CAD6/wYA+v8GAPv/CQD4/woA+P8FAP3/AAADAPz/BQD8/wIAAAAAAP//AAD+/wEAAAD+/wIA//8BAAIAAQD8/wcA+/8GAP3/BAD/////AwD7/wkA9/8IAPj/AwD//wAAAAADAPz/AQABAP3/BQD9/wEA/f8DAPz/AwD///3/BwD5/wcA+/8HAP7/AgD//wAA/v8EAPz/AgAAAP//BAD+/wQA/f8BAAIA+/8EAPz/AgD9/wAA/P8BAPz/AgD9//////8BAP7/AAD/////AAD+/wMA/P8EAP3/AQABAPz/BAD9/wIA+/8DAPz/BAD9/wAAAAD+/wQA/v8CAP3/BQD8/wIAAgAAAAUA+v8GAPv/BwD6/wYA+/8EAP7//v8CAPv/BQD6/wUA/f8FAP3/AAAEAP7/BAACAP3/CAD8/wUAAAD//wQA/v8EAP//AQD//wIAAAAFAPz/AwD+/wQA/v8DAP3///8EAPj/CAD2/wYA+f8GAPn/AwD9////AAD9//7/AAD//wAA/f8FAPn/CAD6/wMAAAD//wIAAAAAAAIAAAD/////AQABAAIA/f8CAP////8CAPr/BQD8/wIA/v8CAP3/BQD4/wYA+P8FAP3////+/wAA/f8AAP3/AQD/////AwD6/wUA/f8AAAIA/f8AAAQA/P8GAPv/CAD6/wgA+P8IAPr/BAD///7/AwD+//7/AwD8/wQA+v8FAPv/BAD+/wAAAgD//wMA/v8CAPr/CAD6/wgA+f8FAPr/BgD7/wQA/P8FAP//AgAEAP3/BAAEAPz/BAABAAMAAgACAAIAAQAEAAAAAgAEAP3/BgD+//7/AwD7/wQA/f8CAP3/AwD6/wMA/f8CAP3/AQD+/wAA/v8AAP7/AgD5/wQA+/8EAPz/AQD8/wYA+/8DAPz/AgD+////AAD///z/AgD4/wYA+v8CAPv/AQD+//7/AAD///3/AgD6/wAA/f///wAA/f8BAP7///8CAPv/CQD3/wYA/v8AAAYA9/8KAPn/CAD+////BgD8/wAAAgD7/wQA/v8CAAAAAQABAAMAAgADAAAABAABAAMAAQABAAMA/f8CAP3/AwD//wIA//8AAAIA/v8FAPz/AgADAPz/CgD3/wYA/f8AAAEAAgD6/wgA+P8EAP///v8BAP3/AwD8/wIA/P8EAP3/AgD/////BQD9/wQA//8CAP//AQACAP//AQAAAAEA//8CAP3/AwD+/wEA////////AAD+/wUA+P8HAPj/BQD8////AgD8/wEA/v/8/wQA+P8GAPj/BAD7/wIA+/8EAPr/BAD8/wIA+P8HAPX/CQD4/wMA+v8EAPr/BAD8////AQD7/wMA+/8CAP7/AQABAP3/AwD+/wMAAAD//wQAAAABAAQA//8CAAIAAgD+/wQA+/8HAPz/BQD+/wQA//8CAAIAAAACAAEAAQABAAQA/v8HAP3/BwD//wYA//8HAP7/BgADAAAABwD//wQABAD+/wQAAQAAAAQA/v8AAAUA+/8GAP///P8IAPT/CgD4/wcA+/8AAAMA/f8BAAAA/P8DAAAA/P8GAPn/BQD7/wIA+/8EAPz/AgD/////AAD8/wYA9f8KAPf/BgD7/wIA/f8BAPz///8AAPr/BAD4/wQA+f8GAPn/AwD//wAAAwD9/wIA//8DAAAAAAADAAEAAgACAAAABAD+/wcAAAACAAMA+/8KAPv/CQD7/wQAAgD9/wgA/P8HAPv/BwD7/wUA/P8CAAIA/P8DAPv/AAADAPf/BgD5/wMA/P////7/AQD+/wIA/v8CAP7/AgD9/wQA/v8BAAEA//8FAP7/BQD9/wMAAAD//wQAAQADAP//BAD//wMAAgD//wMAAAAFAP7/BwD9/wYA/f8FAP7/BAACAP3/CQD5/wsA+v8HAPv/CAD4/wkA+v8GAP//AgAAAAIAAgABAAEABAD+/wUA/v8EAPz/BQD8/wIAAwD6/wMAAQD8/wcA9/8HAPv/AgACAP3/BAD8/wMAAQD+/wQA/P8CAP7/AAACAPv/BQD8/wIAAQD+/wEA/v8BAP///v8BAPv/AQAAAP//AQAAAAEAAwD+/wUA+/8IAPr/BwD7/wIAAgD9/wMA+/8CAP//AAD//wAA/v8GAPr/BgD7/wIAAAD+/wIA/v8DAPz/BAD5/wUA/f8EAAAA//8FAP3/BAD/////BQD8/wYA9/8IAPX/CwD3/wgA+v8HAPv/BwD8/wAAAgD+/wQAAAD//wAAAQD//wAA///+////AgD9/wQA+/8EAP7//v8GAP3/AwACAP//AQAEAP//BAD+/wIAAAABAAAAAgABAP7///8CAAAA/v8EAP3/AwAAAP////8EAPz/AwAAAP3/AwD//wEA//8BAPz/BQD8/wIA/////wAAAAAAAP7/AgD9/wMA/P8GAPr/BAD+/wIAAQAAAP//AgD+////AQD+/wEA/f8AAP3/AgD7/wIAAQD6/wMA+/8CAPz/AwD6/wgA+f8DAAMA+f8JAPf/BwD6/wEAAAD8/wEA/v////7/AQD7/wQA/P8FAPv/BgD7/wMAAQD7/wQA/P8BAAIA/P8EAPr/AwD+/wIABAAAAAAABAAAAAEAAQD+/wIAAgD9/wYA+v8GAPj/BgD4/wUA/v8CAAEA/f8CAAEAAAD//wAAAwD7/wYA+v8EAAAA/P8EAP3/AgD//wUA+P8IAPr/AwD//wAA//8AAAAAAAD//wEA/v8AAP///////wIA/f8DAPv/BQAAAP//BAD9/wUA+/8HAPf/CgD3/wgA/P8CAP7/BQD9/wUA+/8GAP3/AQABAAEA/v8DAP//AwD+//7//////////v/+//7///////7/AgD7/wIA+v8FAPr/AQD///n/BAD6/wIA///7/wUA/P/+/wEA/f8BAP3////+//3/AQD8/wMA+/8FAPj/BgD9/wYAAQACAP//AQAGAPr/CwD4/wkA+/8DAP3/BAD9/wQA//8BAP//AQAAAAMA/v8DAP3/BAD9/wUA/v8BAP7/AgD7/wMA+/8BAP7/AAD+/wAA/////wMA/v8CAP///f8DAPz/AQADAPj/CAD3/wUA/P8BAPz/AgD9/wAA/f/+/wEA+v8GAPn/BAD9/wIAAgABAAIAAgACAAIAAgD+/wcA/v8EAAAAAAABAAIA/////wIAAQD//wMA//8CAAEA/v8DAP//AAADAPz/BAD7/wYA+f8HAPv/AwD9/wIA/P8CAPz/AwD//wAA/f8DAPv/BAD7/wEAAAD8/wMA/P8DAPv/AAD///3/AQD///3///8AAPr/AAD+//7/AgD+/wAAAAAAAAAAAQADAAAAAQAFAPz/BgAAAP7/BgD8/wcA/v8DAAIAAAAEAP//BgD+/wIAAgD+/wQAAAAEAP//BAD8/wIAAAD//wQA+P8HAPn/BgD7/wEAAAD//wEA/f8AAAEA/////////f8FAPv/AQD///7/AQD+/////f8AAP3//v8DAPz/AAABAPv/BQD9//3/AwD6/wIAAQD9/wIA/v8BAP//AgD+/wIA//8AAAAAAQAAAAIA//8BAAIAAAADAAIAAgAEAP//BQD//wQAAAABAAYAAQABAAUA/v8CAAIA/f8EAP//AgD+/wUA+/8GAP7/BgD9/wUAAAACAAAAAwD8/wYA+f8JAPf/CgD1/wcA/P8AAAEA/////wAA/f8AAPv/BgD4/wgA+f8BAAEA+v8DAPz/AwD9/wEA/f8AAP//+/8DAPn/AgD9/wAA/f8DAPz/BQD6/wQA/v8AAAYA+f8GAP3/AQACAPv/BQD5/wUA/f8AAP7/AgD+/wMA+/8DAAAAAQD9/wUA+v8GAP7/AAD//wAA+/8FAPn/CAD4/wQA/P8CAAAAAQD9/wQA+/8IAP3//v8EAPr/CwD5/wcA/P8HAAEAAQAEAPz/BQAAAAIAAQAAAAQA/v8FAPr/CQD5/wgA9/8IAPf/BwD5/wUA+/8AAAAA/v8CAP///v8BAP7//v8BAP7/AAABAPv/BAD9/wAAAwD7/wIA///+/wAA/f8DAPn/BAD6/wIA+/8CAPv/BAD4/wMA+v8BAPv/AQD6/wYA+////wIA+P8FAPr/AAD///3///8AAPz/////////AQD8/wIA/f8CAP7/AAD+/wEAAQAAAAIAAAACAP//AgABAP//AwD//wEAAQABAAAAAQAAAAIA/v///wQA+/8GAPz/AQAAAP//AQD//wEAAgD9/wMA/v8CAP3/AAD9/wEAAQAAAP//BAD+/wEABQD8/wUAAgD9/wYA+/8FAP//AgABAAAA/v8EAPv/BQD8/wIA+/8DAP3/AgACAPn/BgD4/wMAAAD8/wIA/P8CAAAA//8AAP3///8AAPz/AgD////////9/wMA/P8EAP3/AAD+/wEA/v8CAP7/AQD+/wAA/v8BAAAA/v8AAPn/AwD7/wAA/f/8/wIA+f8EAPj/BQD3/wUA+/8DAP//AQD//wIA/v8BAAAAAAAAAP3/AQD//wIA/f8EAPz/AwD+/wAAAQD9/wEA/v8CAP3/AwD9////BAD9/wMAAAD9/wEAAQD9/wMA/v8BAP7/BAD5/wgA+v8GAP7/AQAAAAMAAQD//wUA/P8EAAAAAQAEAP7/BgD9/wUA+/8CAP7/AQAAAP//AAABAPn/CAD6/wQAAAD+/wQA/f8CAP//AAAEAPz/BAD9/wMAAQD+/wUA+v8FAP3/AQD+/wAAAQD+/wEAAAD//wEA/f8AAAIA/v8BAP7//v8AAP///v/+//v/AAD+////AQD7/wIA/////wEA//8DAP7/AgD9/wIA/v8CAP7/AQD+/wAA/v8AAPz/AQD8/wMA+v8EAPr/BQD4/wUA+/8EAP3/AgD9/wAA//8AAAQA+/8EAPr/AwD7/wUA/P8AAAAA/f8CAAEA//8DAPz/BwD9/wMAAwD7/wkA+f8GAAAA/v8FAPv/BQD+/wAA/////wIA/f8FAPv/AwD//wIAAQABAAEAAAADAAAA//8BAP3/AgAAAPz/AwD7/wMA/f8BAPv/BgD5/wUA/P8EAAAABQD//wIAAAADAP7/BQACAP//BwD7/wYA+/8IAPz/AQACAP7/AwABAPz/BQD9/wYA+v8HAPn/CgD2/woA9v8HAPj/BQD1/wQA+f8BAP7//P8BAP3/AQD7/wMA/f8BAAMA/P8FAPz/BQD8/wUA/v///wMA/f8DAAAA/P8EAP7///8CAPb/BQD6/wAA///8/wMA+/8EAPz/AgADAPv/BwD8/wUA//8BAAAAAQD//wMA/f8BAAEAAQABAAEAAQD+/wUA//8CAAQAAAAEAAEABQABAAMA//8AAAMAAQAAAP///P8CAP3/BAD9////AAAAAP7/BgD6/wcA//8BAAQA/f8IAPv/BgD+/wUA//8CAAIA/f8DAAAA/v8EAPz/BQD7/wUAAQABAAMA/f8EAPz/BQD9/wIA//8CAP////8GAPf/CQD2/wgA+P8EAPr/CAD4/wkA+/8DAAAAAgD+/wYA//8DAAIAAQABAAEAAAABAAAAAgABAAMAAAADAP//AwAAAAMAAAACAP//AAADAP7/AAD///7/AgD+//7/AwD8/wQA/P8DAP////8DAP//AQABAAAA//8AAAEA/v8FAP7/AAACAP////8EAP3/AQD/////AAD9/wMA+/8CAPz/BAD7/wYA+f8EAPz/AwD8/wQA/v8DAP////8EAP3/BQD+/wAABQD7/wYA/v///wUA+v8JAP3/AgABAP7/BAAEAP//BQD9/wYA//8DAAAA//8CAAEA//8BAP//AwD9/wMA/P8FAP7/AQD//wMA//8AAAIAAAAEAPz/BQD//wEAAwD8/wMA/v8CAP7/AAD+/////v8CAPr/BwD4/wYA/P8DAP//AQABAAAA//8CAP//AwD//wAAAwD9/wQA/P8IAPr/BAD+/wQA/v8FAPz/BwD9/wYA/v8FAAIA//8KAPn/CwD7/wYA//8CAAEAAwABAAMA//8BAP7/AQAAAP//AAD//wIA///+/wAA/P8GAPn/BQD7/wEAAQD+/wEA/f8DAPz/AAD+//7/AwD9/wIA/f8BAPz/BAD8/wIAAwD7/wcA/P8FAP7/AQAAAP3/AAD////////+/////v8AAP3/BAD8/wIA/v8AAAQA/P8EAP7/BAAAAAIAAAACAAEA//8DAAEAAQADAP//AwD+/wIAAQABAAMA/v8GAPz/CAD7/wcA/f8DAAAA//8EAPz/BAD8/wQA/f8CAPr/AwD8///////9/wAA/f8EAPv/BAD8/wUA/P8EAPr/AwD8/wMA/f8BAAEA/f8GAPv/BQD6/wQA/v8AAAEA//8AAAEA//8BAP7/AgACAPv/CAD2/wkA9/8FAPv/AgD9//////8AAP//AAABAP3/AgD+/wAAAwD+/wAAAAD//wEAAAD6/wQA+/8CAAAA+v8EAPv/AwD8/wAA/v8CAP3/AgD8/wAA/f8AAAAA/v////z/AAD9/wEA/f8BAPz////9//7/AAD9//z////9////AQD+/wMA///+/wEA/P8FAPz/AwD+/wAAAAABAP3/AQD//wMA/P8EAP7///8EAP7/AQAEAP3/BgD+/wQAAAABAAAA/v8DAPz/AwD6/wMA/v///wEA/P8GAPn/BgD5/wYA/f8CAAAA+/8HAPn/BwD3/wYA9v8HAPn////+//z//v/+//z////9/////P8AAPn/AgD6/wMA/P///wEA+/8EAPv/AAD///7/AQD+/wAA/f/+/wIA/f8EAPv/AgD/////AwD8/wYA+P8EAPz/AgD//wAA/f8AAPr/BgD1/wQA+/8AAP//AQD7/wMA/f8CAAEA/P8EAAAA/v8EAPr/BwD9/wMAAQD+/wQA/f8CAP3/AwD8/wAA/f/9//3/AAD5/wQA/P8AAAAA/P8BAP//AgABAAAAAAD+/wEA/f8DAP3/AQD9/wAA//8CAAEA/P8GAPr/BQD7/wUA/P8FAP3/AQAAAP7/AQABAAIA/v8BAP7/AQABAP//AAD+/wAAAQD//wEA//8BAAAA//8EAP7/AQD+/wEAAAABAP3/AAD///3/AgD8/wAAAAD8/wEA/v/9/wQA+f8GAPr/BAD8/wEAAgD8/wIAAAD9/wQA+/8BAAMA/v8BAP3/AAD//wIA+v8EAPv/AgABAPv/BQD6/wcA/P8DAAIAAgD//wMA+/8GAPv/BQD9/wAABAD7/wYA/P8EAAEAAAABAAEABAD9/wMA/v8AAAQA/f8FAP7/AgD9/wcA+v8EAP3//f8EAP///P8FAPr/AgABAP7/AwD8/wQA/f8FAPv/AwD//wIA/////wIA+v8GAPv/BQD8/wMA//8BAP3/AQACAP7/BQD6/wcA+/8FAPv/BgD8/wYA//8BAAIA//8DAAEA/v8DAP//AwADAP3/BQD8/wcA/f8CAAUA/P8GAP////8FAP7/AwD+/wEAAAAAAP//BAD6/wgA+v8FAPr/BwD4/wsA9/8HAPr/BQD+/wIAAAAAAAEAAQAAAAEA/f8CAP3/AgD9/wMA+v8DAPz//v8DAPn/BQD8/wAAAAD9/wQA/f8EAP3/AQABAP7/AQD//wQA/P8FAP7/AAADAP3/AgACAP3/BQD6/wIAAQD8/wcA9/8HAP3/AgABAAEAAwD//wQA/f8EAAAAAAAFAP//AQADAP//BAD+/wQA/v8CAP//BAAAAAUA/f8DAAIA/v8CAAEA/f8DAP//AQD//wMA/f8BAAIA/P8HAPv/BgD+/wMAAgD+/wIAAQABAAAAAQD+/wMA/////wEA/v/////////+/////P8EAPz/BQD7/wQAAAABAAMAAAADAAAA//8EAP7/BAD///3/BQD7/wYA+v8CAAMA+v8IAPv/BAAAAPz/BwD8/wYA+f8GAPz/BwD9/wEAAwD6/wUA/P8AAAQA+f8EAPv/AQACAPn/BgD4/wcA/P8CAAAA//8AAP//AQD+/wEA///+/wIAAAACAP3/BAD7/wUA/f8DAPv/BgD7/wYA/P8BAAMAAAAEAP//BAD8/wYA/P8DAP////8BAP////8AAAEA/f8AAAIA//8FAP3/AQAAAAEAAgACAP//BgD+/wMA/f8EAP3/BgD9/wMA//8CAP///v8EAPj/CgD2/wkA+P8HAP3/AwD//wMA/v8BAAAAAAAAAAIA/f8CAP////8AAP7/AgD+/wEA//8AAAMA/v8BAAAAAQD//wAA/v8DAP//AQD//wAAAwD6/wUA/P8CAP//AQD//wEA//8BAAEA//8CAAIA/v8CAAEAAAADAP3/BQD//wMA/v///wQA+P8IAPj/BAD+/wIA/f8FAPz/BgD+/wAAAwD+/wUA/f8GAP//AgD//wEA//8AAAEA+/8EAP3/AgD9/wQA//8CAP7/BAD5/wgA/P8DAP7/AwD9/wcA9/8HAPj/BgD7/wEA/v///wEA/v8DAPz/BAD+/wEAAgD//wEAAAD///3/AgAAAP7/BQD6/wYA/f8EAPz/BgD6/wgA/v8AAAAAAAD//wIAAAAAAAIA/v8FAP3/AwD+/wMA/v8DAP////8AAAEA/f8DAP7///8HAPj/CgD6/wIAAgD+/wIA/f8FAP3/AwD//wAA//8AAP3/AQD+//7/AAD+//7/AAD+////AAD///3/AwD8/wEA/f8BAP7/AAD+/wUA/f8DAP3/BAAAAAIA//8EAP3/BgD+/wIAAwD9/wUA/v8GAPz/BAD//wAABAD9/wUA/f8AAAAAAAACAPv/AgD8/wQA/v8CAP7/AwD9/wQAAQD//wIA//8BAP7///8AAP7/AAD///3//v//////AQD8/wEA/v8EAPz/BAD8/wMA/v8AAP//AQD//wIA/v8BAP7/AQD+/wAAAAABAP///f8CAPv/BgD7/wIA/f8BAAAAAAACAAEA//8EAAAABAD//wIAAAADAP7/AAAEAPr/BwD3/wgA+/8FAPz/AgAAAAIA//8EAPv/BQD8/wMA//8AAAIA/P8EAPv/AwAAAP3/AgAAAAAAAAABAPz/BgD4/wgA+/8CAAEA/f8FAP7/AQACAP//AAACAP3/AQD///7/AAAAAPz/AAABAP3/BQD9////BAD6/wcA+P8FAPv///8AAAAA/v8BAPz/AgD/////AQD9/wQA+v8GAPz/BgD8/wQA/v8EAP//AQD+/wUA/P8GAP7/AwAAAAEA//8BAAAAAwD+/wUA+/8GAP//AQACAPr/CQD4/wgA+P8GAPz/AgD9/wAA/f8CAPz//v8BAP7/AQD+/wAA/f8FAPr/BgD7/wMA/v8BAP7///8BAP3///8CAPj/CQD5/wIAAgD8/wYA/f8BAP//AAACAP7/AwD8/wEAAAABAPz/BwD4/wkA9f8LAPb/CQD6/wEA///+/wIA//8AAP7/AQAAAAIA/v8AAAIA/P8GAPr/BAD+/wMAAAABAP//AQD+/wIA/v8CAAAAAQD/////AQD//////v8BAPv/AgD8/wIA/f///wEA/v8CAPz/AgD9/wUA+v8EAPv/AgACAP3/BAD//wIAAAABAAAAAgAAAAMA/f8EAPz/BAD//wAA//8CAP//AAADAPr/BgD4/wYA/f8AAAEA/P8IAPf/CAD6/wUA+/8EAP3/AwD9////AAD+/wEA/v8AAAAA/v8AAP7//f/+/wEA+v8DAPr/AwD7/wMA+v8DAP3/AwD8/wIA//8CAAAA/v8FAPv/CgD5/wMA///+////AAD9/wAABQD5/wgA+P8FAAAA/f8BAP3/AQD9/wEA/v///wAA//8DAPz/BQD5/wgA/P8FAP//AgAEAP7/BAD8/wcA+P8JAPv/BgACAAEA//8HAPv/CAD8/wEAAgD+/wQA/////wIA/P8AAP7//v8BAP7//////wAA/f8CAPr/AgD+//7//f8BAPv/AQD6/wcA+f8GAPj/BAD8/wMA/P8FAPv/BQD4/wgA+v8FAPr/BAD8/wMA+v8DAP///P8EAPv/AgD8/wAA/P8DAP3/AAABAPr/BAD6/wMA/v8AAAAAAAD///7/BAD7/wMA//8BAAIA/v/+/wUA/P8IAPz/AQAEAP7/AQAFAPz/BgD//wUAAAAFAAEA/v8HAPz/AQAEAPr/BwD8/wIAAQD9/wcA+f8GAPz/BAD+//z/BwD1/wYA+v///wMA9/8KAPP/CgD5/wAAAwD7/wYA/f8BAPz/AQAAAP3/AgD6/wMA/f8AAP3//v////z/AQD6/wAA/f/9//7//P////r/AwD5/wAAAAD8////AgD8/wMA/v8CAP3/BAD8/wYA/f8IAP3/BQD+/wQA//8GAPz/AwD//wMA/f8FAP//BAD//wIAAgABAP//AQD///7/AwD6/wgA+f8GAP3/BAD+/wYA+/8FAPz/AgAAAPz/AgD///3/AwD8/wQA/v8DAPr/BAD6/wQA/v////7///8CAP//AQABAP//AgD+/wAAAAAAAAEAAQAAAAMA/f8FAP3/BgD9/wcA+P8IAP3/AwD8/wMA/P8CAP///P8IAPj/CAD7/wEABQD4/wcA/f8AAAAAAgD6/wYA+v8AAAIA+v8DAPv/AQAAAP3////+////AAD///7/AQAAAP3/AwD//wEAAAD6/wQA/P8CAP7//P8FAPn/BgD6////AQD9/wAAAQD//wMA/v8FAP3/BAAAAAIAAQACAAAABgD//wAABQD8/wUA/v8DAAIA/v8CAAEAAAAHAPv/BgD9/wUA//8BAAIA/f8BAP7/AgACAP7/AQD+//7/AQD9/wAAAAAAAPv/BAD5/wQA+////wMA/f8CAAAAAAACAP////8CAP7/BQD6/wMA/f8CAP7/AwABAAEAAwABAAAABAAAAAEAAQAAAAAABAD8/woA+f8LAPr/BwD//wIAAQD9/wQA/v8BAAEAAAD//wIA/v8EAP//AgD//wMA+v8GAPv/BAD///7/AgD+/wMA/f8DAP3/AQD//wAA/v8DAP3/AQD/////AQD//wQA+/8EAP3/AwAAAP3/AgAAAAIA/////wIAAAADAPz/BAD5/wcA+f8FAP7/AQACAP7/AAADAP7/BQD6/wkA+P8JAPz/AgD//wEA/v8EAAAAAQADAPv/CAD8/wcA/P8HAPv/CgD7/wkA+/8FAP7/AQABAAEAAQD//wMA/f8AAAIA///9/wUA+v8EAAAAAAAAAAMA/f8BAAAAAAABAP7/AQAAAAEAAwAAAAAABAD6/wQA/v8AAAIA/f8CAP7/AgADAP7/AwD7/wUA+/8FAPz/BAD7/wQAAAADAP7/AAD9/wIA/v8DAAAAAgD+/wAAAgD//wAAAwD8/wYA+/8GAPv/BgD/////AgD7/wgA+f8DAP///f8EAPv/BgD6/wMA/f8AAAIAAAABAAAAAQAAAAMA/v8EAPv/BwD2/wwA9P8IAPf/AwD7/wAA/f8CAPv/AwD6/wUA/P8BAP//AgD//wEA/v8AAAMA+v8HAPv/AwD/////AQD//wEA//8CAP7/AwD+/wIAAQAEAP//AQACAAEABAAAAAIAAQAEAP7/BgD6/wMAAQD9/wQA/f8CAAIA//8DAAEAAAABAAMAAAD//wAAAgD//wAA/v/+/wAAAAD+////AQD6/wYA+v8EAP3/AAACAAAAAwACAP//AgAAAAAABAD+////AAD9////AAD8/wIA/f8AAAMA/P8EAP7///8CAPv/AwAAAP//AQD9/wUA/f8EAP7/AwD9/wQA+/8AAAEA+/8EAPn/BQD5/wYA9v8GAPz/AAD//wEAAAAAAAEA//8CAP//AgD9/wQA/v8AAAAAAAACAPv/BAD6/wQA/v8CAP3/BQD9/wMA/////wIAAQACAAAAAAAAAP//AQACAP3/AgD+/wIA//8CAAEAAwAAAAEAAwADAAAAAwAAAAIAAwAAAAAABQD+/wIA//8FAPz/BQD+////BAD+/wMAAAACAP//AQACAP//BQD+/wQA/f8CAAEAAQD//wIA/f8FAPr/AwD6/wIA/P8CAPz//v8BAP7/AAD/////AgD9/wUA+v8CAP7//P8DAPn/BAD9////AQD8/wIA/P8CAPz/AgD9//v/BAD5/wIA/f//////AAD+/wMAAQAAAAMA/v8CAAEA/////wQA/P8EAP3/AQD//wMA//8FAP3/BAD//wYA/////wQA//8CAP//AwD8/wUA+/8GAPv/BAD9/wQA//8CAPz/AwD8/wQA/P8EAP3/AQAEAP7/AwAAAAEAAQAAAAEA/v8DAP3/BAD8/wIAAAAAAP//AwD+/wIA//8AAP3/BgD5/wUA+////wMA+/8CAPz/AAD//wEA/P8DAPz/AwD9/wMA/f8HAPz/AwACAAAABAACAP//AgAEAPv/BQD8/wAA/v8CAP3/AQD///////8BAPv/BQD8/wEA/////wQA/f8EAP////8CAP//AwD7/wMA+v///wAA+/8CAPv////+//7/AAD+/wEA/P8DAPz/AAD+//3/AgD7/wMA/f8AAAAA/f8BAP//BAD6/wQA/f8AAAIA/f8CAP3//v8CAP7/AAAAAP7/AQAEAPv/BgD8/wEAAwD///3/BgD4/wkA+P8FAP///v8IAPr/BgAAAAAAAwD///7/AwAAAP//BAD4/wcA/P8DAP3/AAACAPz/BAD6/wMA/f//////+/8EAPn/AwD8/wEAAAD///3/AwD6/wQA/f/+/wEA/P8CAPv/AwD7/wEA/v8DAPz/BQD6/wYA/f8DAAEA//8CAAIA/v8CAAAA/f8CAPz/AQD/////AAD+////AAAAAAEAAAABAAEA/P8CAAEAAAAAAAIA+/8EAP///f8GAP//AQABAP7/AwD8/wEAAAD9/wQA/f/+/wIA/v///wMA/P8AAP///v8AAAAA/f/9/wEA+P8EAPj/BAD4/wIA/f///wAAAAD//wAA/v8BAP7/AAD///3/BQD5/wUA/f8DAAAAAQAAAAMA//8DAP//AAABAP7/BQD8/wMA/////wYA+/8DAP3/AQD+/wIA/v8CAP7///8BAP//AgD8/wUA+P8KAPX/DQD2/wkA/P8CAAEAAQABAP//AgD9/wUA+f8EAPr/AQD///7//v/+/wEA/f8AAP3/AAD+/wUA+f8EAPz/AgAAAAEA//8CAPv/AgD+/wAAAwD4/wcA+/8CAAAA/f8DAP3/AQD//wAA//8CAAEAAAACAP//AgD//////v8DAPz/AgD+////AQD7/wQA+P8EAP7//////////v8AAP3/AAD6/wIA/f/+/wAA/P8BAPz///////v/AgD7/wIA+f8CAPn/AQD7/wQA+v8EAP3///8FAPf/BgD7/wcA+v8EAPz/AQAEAPr/BwD7/wAABQD5/wcA/P8CAAAAAAACAP3/AwD8/wIA/v8AAAIA+/8FAP3///8CAPz/AgD///z/AwD8////AQD6/wUA+/8BAP///v8DAP7/AAACAPz/BAD9/wEAAgD+/wEA//8AAAEA+/8DAPz/BAAAAPr/AQD8/wIA/v///////v8BAP7//P8EAPn/BgD6/wUA+v8DAP//AAABAP//AAABAPz/BQD6/wcA/f8AAAIA+/8EAP3/AQD6/wQA+f8FAP3/AAACAPz/AAD//wIA/f8DAPz/AAD+/wAA+/8EAPr/AwD8/wEA/f////3/AQD+/wAA/f8AAP3/AAD7/wQA/P8DAPz/BAD+/wQA//8BAAQA/v8FAPv/BQD//wEAAQAAAAEA//8EAPz/AAABAP3/AQAFAPf/CQD6/wQAAAAFAPr/BwD7/wUA/f8EAPz/AgD+/wEAAgD6/wUA/P8EAP////8CAAAAAwD9/wMA//8BAP//AQD//wIA/v8DAP3/BAD8/wQA//8CAP//AwD8/wQA/P8FAP3/AQAAAP7/AQD//wAA/v8CAPz/AwD+////AAABAP3/BAD7/wEA/P8CAP3/AQD9/wEAAAADAP7/BAD7/wYA+/8FAPz/BAAAAAAAAwD//wMA/v8CAAAAAwD//wMA//8CAP3/BAD+/wIA//8AAAQA//8DAP//BAD//wQAAAADAAQAAAACAP//AwAAAAQAAQABAP//AgD+/wUA/v8CAAEAAQABAAEAAgD//wEAAQABAAEA/v/9/wQA/P8EAP3/AAD+/wMA+v8FAPr/BQD9/wMA/f8EAPz///8CAPr/BQD9/wAA//8CAPr/CAD6/wUA/f8CAAEAAAAGAPz/CAD9/wYAAAACAAMAAgADAAAABAD+/wUAAQAFAAAABAD//wYA/v8FAP7/BAACAP//BgD+/wMAAQAFAP3/BwD5/wkA+v8GAP7/AgABAPv/CAD3/wgA+P8HAPz/BAD5/wUA+f8JAPr/BgD6/wcA+v8HAPj/BgD3/wcA+P8FAPv/AAAAAAMA+/8HAP3/BQABAAEABAAAAAAAAgD//wMAAQD+/wUAAAAEAAAAAwABAAAABQD8/wgA+v8JAPn/CAD//wEABAD+/wcA//8JAPr/CQD9/wMA/v8AAAEA/f8CAP3/AQD9/wIA//8AAAAA/v8BAPz/CQD3/wgA+P8EAAAA/P8EAPf/BgD7/wIAAAD9/wAAAwD9/wMA/f8EAPz/BQD//wIABQABAAMAAwADAAQAAAAHAP3/BwACAAEACAD//wgA+/8JAP7/AgAHAPz/BwD//wEABgD8/wcA/P8GAP//AQD//wMA//8DAP7/AwD//wAAAAD8/wcA+P8FAPv/AgADAPv/BgD4/wcA/P8EAP//AwD+/wQA/P8DAP3/AgAAAAAA//8BAPv/BQD6/wQA+f8FAPv/AAABAPv/BAD9/wMA/f8CAP7/AgD9/wEA/f8DAPz/BQD7/wUA/v8AAAQA/f8DAP3/BQD8/wQAAAAAAAIAAAACAAIAAgACAAIAAgD//wEAAwD6/wgA/P/+/wEA+/8CAAEA+//+//7/AAD//////P8DAPv/AQD+//z/BgD8/wIA/v8BAAQA/P8HAPv/BAAAAP7/BwD7/wcA/v/9//v/AAD5/wUA+/8BAAIA/P8DAP3/AgD7/wEA+v8CAPn/AgD9/wMA//8BAAIA/v8EAP//AgABAAIAAgD9/wYA+f8KAPf/CQD2/wkA+/8DAP7/AwD//wUA/P8GAAAAAQADAP//BAD//wUA/f8CAAMA/P8IAP//AQACAAMA/f8JAPf/CQD4/wcA+/8EAP7/AQAAAP7/AgD8/wEA///7/wIA+P8BAPv/AAD///7//P8EAPv/AAAAAPv/BQD7////AgAAAAEAAAD//wMA//8FAP///v8FAPr/BQD6/wYA+v8DAP3/AAABAPz//////////v8AAP3/BQD9/wQA/v8AAAEAAQD+/wMA/P8DAP7/AAD9/wQA+v8HAPn/AAD///7////8//v//f/+//7//P8CAPf/BwD5/wUA/f8CAAAAAQAAAAAAAgAAAAIA/f8EAP3/BQD8/wIAAQD+/wcA+v8IAP3/BQABAAMAAQAEAPz/CAD8/wIA/////wIABQD8/wgA/P8IAAAABwACAAMABAD8/wYA+/8DAP3/AQD///7/AQD+////BAD8/wYA+v8FAP3///8BAP3/AAD//wAA/v8AAP7/AAD8/wAAAAD9/wMA/f8DAP//AAABAPv/BwD2/wUA/f/9/wEA///9/wMA/P8AAAUA+P8IAPr/BAD//wAAAQABAP3/BgD7/wUA+f8KAPf/CgD6/wQA/////wAAAAAAAAIA/f8CAP//AQACAP//AQD9/wIAAQD+/wgA+f8GAP3/AAADAAEAAgD9/wMAAAACAAMA/f8DAP//AQABAAEA//8AAAMAAAACAAIAAAACAAIAAgADAAEAAAABAAEA/v8BAP7/AQD//wMA/P8EAP3/AwD//wIA/v8BAP//AQD9/wQA/P8EAP7/AQABAAEA/v8CAP7/AQABAP//AwD9/wAAAAABAP7/AwD+////BQD+/wQAAAABAP7/BQD8/wIA//8AAP7///////7//v////7/AwD//wIA//8DAAAAAgADAP7/BAAAAP//BgD7/wIAAQD9/wIAAgD+/wEAAgD8/wUA//8BAAAA/f8BAPv/CAD1/wkA9v8GAPr/CAD3/wUA+v8BAP7/AAD5/wQA+v8EAPz////8//////8BAP///v8GAPn/BwD8/wMA//8CAP7/AgAAAAMA/f8CAAAAAgAAAAIA/f8FAPz/BQD9/wQA/P8DAPz/AgAAAP//AAACAPv/BgD7/wMAAAD+/wIA/v8CAPz/BQD+/wMA/f8DAAEAAwABAAMA/v8IAPv/BwD//wMAAQABAAEAAgABAAEAAQD+/wIA/f8EAPz/AQD8/wAA/v8DAPn/BQD5/wUA+v8EAPv/AgD7/wAA/v8BAP3/AQD+/wEA/P8DAPv/BQD9/wEAAgACAPz/BwD4/wUA/P/+/wIA/f8AAP3/AAD8/////f/6//7//f/9/wIA+v8AAP///v8AAP3/BAD4/wUA/P8AAP///f8DAP7/AQD+/wMA/f8EAP7/AQACAP////8DAP7/AgD8/wUA+/8FAP7/AQACAP//AgD//wIAAAAAAP7/AwD7/wIA/f8BAAIA/P8GAPn/CAD6/wIAAgD9/wQA/P8FAPz/BQAEAPz/CQD8/wQAAgABAAEAAAADAPz/BgD+/wIAAwD9/wMA/v8BAAAAAQD+/wUA/v8DAAAA///+/wIA+f8CAAAA+v8FAPr/AgD+/wEA//8EAPz/BAD+/wMAAAD+/wIA/v8AAP///P8AAP//AQD6/wMA/P8AAP///P8AAAEA+f8CAPf/AQD7/wAA/P8DAPr/AgD8/wEA///+/wQA9/8JAPf/BQD5/wQA//8BAPz/BQD5/wUA/f8CAP7/AQD//wIA/f8GAPv/BQACAP//AgABAP7/AAD//wAAAAABAP7///8AAAAAAQABAP//BQD7/woA+P8MAPj/CAD//wIAAwD+/wQAAAAEAP3/BQD8/wIA/f8GAPf/BgD6/wQAAAD//wEAAgD7/wMA/f///wIA+v8CAPz/AAACAPv/BQD5/wIA//8AAP//AwD7/wQA/v8BAAQA+/8FAPv/BwD7/wMA/f8EAAAAAwD+/wEA//8DAPz/AAD///7/AQD+/wIA/P8EAPj/CAD5/wYA+P8FAPr/AQD6/wEA+/////z////9/wIA/P8BAAAAAAABAPz/AgD+/wIA//8BAAAAAwADAP3/BwD7/wgA/v8AAAQA+f8AAAAA/P8BAP7//P8FAPz/AQD9/wEA/v8DAPr/BAD6/wIA//8AAP//AgABAP3/BQD9/wMAAgD+/wgA+/8FAAEA//8FAAEA/P8JAPn/CAD9/wAABQD9/wQA+/8DAP3/AgD8/wEA+/8AAP///P8CAPz/BQD5/wUA+v8GAPv/CAD5/wYA+/8FAPz/AAD///3/AQD+/wEAAAAAAAEA/v8GAPr/BQD9////AwD8/wcA+P8IAPj/BQD6/wMA+/8AAP7//v8CAP/////+/wAA//8BAP//AgD9/wEAAwD+/wYA+/8DAAAAAQABAP//AQACAP////8CAP3/BAD7/wUA/P8CAP7//v8BAAAAAAD9/wQA+v8CAP7/AAABAAMA+f8GAPv/BgD9/wYA/v8EAAAA//8CAPz/BgD8/wcA+/8GAP//BgD8/wIAAgABAP//BQD7/wYA+/8EAPz/BAD2/wsA9v8HAPj/AwD+/wIA/v8CAAAAAgD//wAABAD7/wQA/v8AAAMA/v8BAAAAAQD8/wMA+/8BAAIA/P8GAPf/BQD8/wIAAgD8/wYA/P8DAP7/AgD//wAA///9/wMA+v8EAPn/BgD4/wkA9v8HAPz/AQACAP7/BQD9/wUA/P8GAPv/CAD9/wAABAD6/wYA+P8FAP7/AQD8/wYA/P8CAAEAAAABAAMA/v8CAAAAAAADAPn/CAD6/wQAAQD9/wQA//8AAAEAAgD7/wIA+/8BAP///f8EAPz/BAD8/wQA/v8BAAIA//8AAAMA+v8HAPz/BAACAP7/AwAAAAAAAgAAAP///v8BAPz/BQD9/wAAAgD8/wQAAQD+/wQA+v8FAPn/CAD4/wYA+/8CAP3/AQD+/wIA/v/9/wIA//8CAP//AgAAAAIAAQABAAIAAQAAAAEAAQD+/wIAAAD9/wIA/v/9/wIAAAD9/wQA+P8IAPj/CAD8////BAD+/wIAAgD+/wMAAQAAAAEAAQAAAAAAAAAAAAAAAQD//wAAAQD//wIA/f8CAAAA/v8CAP7///8AAP7/AAD//////v/////////9/wEA/P8AAPz/AQD+//z/AAD+/wEA//8AAP3/AgD9/wUA/P8DAAAAAQACAP7/BAD9/wYA/P8DAAAA//8DAAAA/f8EAPz/AwD//wIA/v8CAP7/AAD9/wMA/f8GAPj/AgD8/wMA///+/wEA/P8GAPj/BwD4/wgA+v8FAP7/AgABAAAAAAACAP7/BgD6/wcA9/8EAP3/AQAAAAAA/f8CAP3/AgAAAP//AAD+/wIA/v8CAAAA/v8FAPz/BAD8/wAABAD5/wcA+P8FAPr/BgD6/wMAAQD//wIA//8AAAQA/v8DAAAAAAAEAP////8CAPv/AgAAAPz/AwAAAPv/CAD1/wkA/P8DAP//AgD//wEAAQD9/wAA/v8BAP//AAD+/wAA/v8BAP///f8AAP3//v8BAP3/AQD///3/AwD9/wEA/f8EAPn/BgD5/wYA+v8BAP3/AQD9/wMA+P8HAPf/CQD3/wgA+f8GAAAA//8GAPr/CAD8/wUA/f8EAP3/BAD7/wUA//8BAAAA//8BAAIAAQAAAAMA/P8DAAAAAQACAAEA/v8CAP3/AwD//wAAAQD+/wMA//8AAP//AQD8/wQA+/8DAP//AAABAAEAAgD+/wMA/f8GAPz/BgD8/wMA/v8CAAAA//8BAP7/AAAAAP//AgD+/wIA+/8GAPv/BAD///////8BAP7/AAADAPv/AwAAAAEAAQD//wYA+f8JAPn/AQABAAEA/f8FAPv/BQD//wQA/P8FAPv/AwD8/wMA/f///wAA/P8AAP7//f8EAPj/BQD8/wIA///8/wMA/f8CAP///////wQA/P8DAPz/AwD+/wEA/f8BAP3/BAD7/wQA/v/+/wQA/v8CAP//AQACAPz/BwD9/wUA/v8BAAAAAQAAAAAAAgD9/wMA+v8GAPf/CAD7/wEAAgAAAP//AwAAAAAABgD5/woA/f8EAAEA//8CAAMA/v8DAAAAAQD+/wYA/P8HAPn/BgD+/wMAAQD+/wEA/f8EAPn/CQD0/wcA+f8DAP///v8AAP3///8AAPz/BAD4/wcA9/8KAPf/BgD9/wIA//8BAP//AQADAP3/AwD///3/BAD9/wUA/f8BAAAA/f8EAPv/AgAAAP3/AwD//wAAAgD6/wQA+/8CAAEA+f8GAPb/BwD3/wQA/P8CAP//AAD+/wIA//8AAAEA/P8EAP//AQAAAAIAAQACAP7/AwD+/wMA/v8AAAAAAgD8/wMA/f8AAAEA/v8BAP7/AwD9/wQA/f8EAP7/BAD6/wQA/f8GAPz/AwD7/wUA/f8BAP//AQAAAAMAAAAEAAAA//8GAP3/BQD+/wUAAAADAAQA/f8HAPz/CQD8/wUA//8AAAEA/v8BAP7/AQABAPz/AwD8/wEA/v8BAP7/AAAAAPz/AwD7/wMA+/8CAPz/AQD//wEA/f8AAAIA/v8BAP7/AAAAAP3/AwD7///////8/wAAAwD4/wQA+f8EAPv/AQAAAPv/BAD5/wEA/f/+/wAA///9/wQA+f8GAPv/BQD9/wEAAQD//wQA/f8DAP//AwAAAP//BwD5/wUA+/8CAP//AQAAAP//BgD6/wsA+/8HAP//AwACAAIAAgACAAAAAgD5/wkA+f8GAPz/AwD+/wQA/P8FAP3/AQACAAAABAD9/wIA/f8DAAAA/v8CAP7/AAABAP///////wAA/v8CAPz/AwD8/wUA+v8GAPz/BQD//wEAAQABAP//BAD9/wQA/P8GAPz/AgD//wAAAgD///7/AgD+/wAA/v8AAAEAAAD///7///8BAP3/AgD8/wMA+/8AAP7//v8CAPv/AgD7/wEA/v8AAP7/AAD+////AAD7/wMA/P8AAP7/AAD9/wIA/P8AAAAA///+//3/AwD7/wQA/P8CAAEA/f8GAPj/CgD5/wcA//8BAAIAAQABAAQA//8AAAEAAAACAAIA/f8HAPz/BwD7/wUAAAABAAQA/P8HAP7/AwAEAP//BgAAAAUAAAAGAP7/BwACAAEABgAAAAIABQD+/wUA//8EAP7/AQACAAAAAgD//wIA/f8GAPf/BgD8/wMA/v///wIA/////wEA/f8BAAAA//8BAAAA/f8CAP3///8AAP////8DAPv/AQD//////v8CAP3/AwD8/wAAAQD9/wEA+v8DAPn/BAD5/wIA+/8DAP3///8CAP//AAADAP3/AgACAP//AQAEAP7/BgD+/wMAAQADAAAABwD9/wQA//8DAAIAAwD//wMAAQAAAAQA/v8HAPv/CAD6/wQA/v///wQA/f8AAAAA+/8DAPz///8BAP7//P8CAPn/BwD6/wQA///+/wEAAgD8/wcA+f8GAP3/AwABAAMAAAD//wQA/v8CAAIA//8GAP//AgADAPz/CgD4/wkA/P8HAP3/BQACAAAABAD//wAABwD6/wkA/P8GAAAAAgACAP3/CAD3/woA+f8HAP7/AwD+/wUA/f8HAP3/AwAEAPz/BwD8/wMA//8BAAAAAQD///7/AwD+/wIA/v8CAPv/BgD8/wQA/f8CAP//AwD9/wMA/v8AAAIA+/8EAPz/AgACAPz/BQD6/wcA9/8IAPn/BAD6/wMA+v8FAPv/BQD7/wcA/P8EAAEA/f8HAPv/BAABAPv/CQD3/wcA+f8DAP7/AQD///7/AQADAP7/AAACAPv/BgD6/wUA/f8BAP//AAD+/wIA/v8DAP//AgABAAEAAwD6/wgA+/8EAAIA+f8FAPr/BQD9/wIAAAACAP//BQD6/wUA/f8BAAQA/P8FAPr/BgD7/wIA/v///wAA/v8CAP7/AgD//wAAAAABAAIA//8FAP7/BAD+/wMAAgD//wMA//8AAAIA//8FAPv/AgD//wAAAgD//wIA//8BAAAA/v8FAPn/CAD7/wIA/f8DAAAA//8BAPz/BQD6/wcA+v8DAP7/AAAAAP7/BAD6/wYA/P8BAAMA+v8GAPz/BwD7/wEAAQD+/wIA/v/+/wIA+/8EAPr/BAD5/wQA/v/+/wAA+/8DAP3///8AAAEA/f8FAPz/BAD8/wEAAQD8/wQA+f8DAP3///////3/AgD8/wMA+/8FAP3/AwD/////AgD9/wAAAAD//wIA/v8AAP7///8CAAEABAD9/wQAAQADAAEA/f8DAP3/BQD//wIA/v8CAPn/BwD6/wIAAQD+/wIAAAAAAAEAAQD9/wMA//8BAP////8DAP3/AQAAAP7/AQACAAAAAAAAAP//AQABAP3/AwD8/wIAAAAAAP//////////AQD9/wIA/v8DAPz/AgABAAEABAD8/wUA+v8JAPf/CQD3/wgA+/8EAP7/AwD+/wUA+v8HAPz/AgAAAAIA+/8HAPz/BQD7/wIA+v8FAPr/AQD8///////+/wAA///+/wAA/f///////v8AAPz//v8AAP7/AAD8/wIA///+/wAAAAD7/wQA9/8HAPf/BAD7/wEA/////wAA/f8FAAAABAAAAAEAAgAAAAQA//8DAAIA/f8FAPv/BQD+/wIAAAACAP7/BAD8/wYA+/8GAPv/BQD8/wUA/////wIA/P8DAPv/AQD///3/AgD9////AgD8/wQA/v8DAPr/BgD4/wYA+/8EAPv/AwD9////AQD+////AAD+/////f8BAP3//f8EAPr/BAD+/wIA//8FAP//BAACAP//BgD8/wUAAAAEAP//AgD//wMA/v8BAP//AwD//wMA/v8EAP7/AAADAPz/BgD8/wEAAQD7/wkA9P8MAPf/BgD8/wAA/////wAA/v8DAP7//f8FAPb/CwD1/wYA+/8CAP3/AQABAPj/BgD5/wIA///+/////v////3//f////7/AgD9/wIA/v8BAP7/BgD8/wgA+v8IAP//AQADAP3/BwD8/wYA/v8EAAEAAwAAAAIAAwD//wQAAQD+/wMAAQAEAAAAAgD+/wAAAwD+/wIA/f///wIA/P8FAPv/AQABAP3/AQD/////AwD7/wMA+v8GAPv/AwD9////AAD///7//v8AAPv/AgD///3/AgD7/wUA/P8CAPz/AgD8/wAAAgD8/wYA+P8FAP3/AwD9/wIA//8CAP3/AwD9/wUA//8AAAIA//8GAP3/CQD8/wYA/v8FAP//BQD+/wYAAQACAAQA//8BAAIA/v8EAP7/AwD9/wQA//8BAAMAAAADAAAAAwD//wIAAgD8/wYA+f8IAPj/BwD6/wIAAQD9/wIA/v8AAP7/AQD9/////v8AAAEAAAD9/wAA/v//////AQD9/wEAAAD8/wIA+////////P8CAP3///8AAP//AwD7/wMA//8AAAUA+/8CAAIA/f8CAAAA/v/+/wIA/f8DAPz///8CAAAA/////wEAAQD//wEA//8AAAQA/P8BAP//+/8FAPr/BAD///z/AwD9/wIAAAD//wIA/f8GAPv/BQD7/wYA/P8GAPz/BQADAAAABQD+/wMA//8EAP//BAD//wMA/v8HAPj/CgD5/wcA+f8GAPj/BwD4/wYA+v8BAP///v8DAP3///8DAPv/AQD+////AgD///z/AwD9/wIAAAD9/wEAAAD+/////f8EAPj/BwD1/wUA/P///wEA+v8DAPn/BQD3/wIA/f/+/wEA///9/wMA+f8BAP3///////7//v/+/wEA+f8FAPv/AQD/////AAD///////8BAP3/BAAAAP//BQD6/wgA/P8EAP7/AQACAAEA/f8GAPz/AwACAPz/BAD6/wgA+P8JAPn/BAD8/wQA/f8BAAIA//8AAAEA/////wIA/P/+/wQA+v8HAP7/AQD//wUA/f8HAPr/CQD6/wcA/P8DAAIA//8DAP////8DAP3/AQABAPz/AgD9/wEA//8DAPv/AwD7/wAAAgD7/wMA/f8AAAAAAQD+///////9/wIA+/8DAP7//////wAA/v8CAP///v8AAP////8DAPv/AwD+//7/AQAAAP3/BAD5//7/AQD9//3/AQD4/wMA+/8CAPr/AwD4/wMA/f8CAP//AwD9/wEAAQD+/wIAAAD8/wQA+/8EAP7//v8EAP3/AwD+////AAAAAP7///8DAPr/BgD6/wIAAgD//wAAAgD9/wIA/v8AAAEA//8AAAAAAQD+/wAAAgD//wUA/P8BAAUA/P8FAAAA/v8HAPn/CQD9/wMAAQACAAEA/f8DAPz/AwD+////AwD7/wMA/f8DAP//AAABAAEAAAABAP//AAACAAAAAAABAP//AgAAAAMA+v8GAPv/AgAAAP3/AwD8/wMA/f8EAPz///8DAPz/BAD8////AAAAAP7//P8BAPj/BQD7//3/BQD2/wkA9/8IAPj/BgD+/wEAAgD8/wIA/v8CAP7/AQD//////v////7/AAD8/wUA9/8HAPj/BAD9/wAA/v8CAP//AQD+//3/AwD+/wUA+v8DAPz/AQD+/wIA/f///wIA+v8GAPz/BAD/////BQD+/wYA+/8EAAMA+/8JAPn/BgD//wAAAQAAAP//AAAAAAAA/v8EAP3///8GAPn/CQD8/wIAAgABAAEA/////wAAAAAAAP/////+/wIA/f8CAP7//v8CAP3/BAD+/wMABAD//wIA//8DAAAAAwADAP7/BgD8/wYA/f8FAP7//v8GAPv/BgD9/wMA/f8EAP////8GAPr/BgD9/wAAAAD+/wEA/P8AAPf/BAD7///////9/wAA///7/wQA/f8AAAMA/P8FAP3/BAD9/wQA/v8AAAMA/v8BAAAA//8AAAMA+v8EAPj/AQD+//3/AAD+/////v8CAP3/AgACAPv/CAD6/wgA/P8BAAMA/v8BAAIA/P8DAP7/BgD6/wkA+P8GAP//AwAAAAQAAQADAAIABQAAAAIAAQD+/wcA+/8DAP7//f8BAP////8DAPr/BQD6/wUA/v8BAAMA//8FAP//AQADAAAAAwAAAAMAAAACAAIA/P8EAP//AAACAP3/AwD+/wIABQD8/wYA/v8BAP//AgD//wEAAgD8/wQA/f8FAPz/AAABAPz/AwD8////BQD8/wQA/v8DAAAAAgD//wMAAgADAP//BAD+/wQA/v8DAP3/BQD+/wYA//8AAAMAAQABAAIA//8DAAEA//8AAAMA/P8CAP3/AgD9/wEAAAD9/wQA+/8FAPz/BAD+/wAABQD7/wUA/P8CAP//AQACAP3/BgD6/wUA/f8CAAEA/P8DAP3/AAD///7/AgD+//7/AQD+/wQA/P8DAPv/AgD//wAAAgABAAAA/v8EAP3/BQD+/wIAAAACAP//AwD9/wQA/v8EAAEA//8DAP//AgADAAIAAgABAAQA/v8FAP///v8GAPv/BQD8/wMA//8CAP3/AgAAAAAAAQD//wMA//8AAAIA//8FAPz/BAACAP7/AwD+/wEA//8DAP7//v8AAP7/AAD+/wAAAAD+/wIA/////wQA+/8GAPz/BQD6/wcA/P8DAAAA//8DAP//AAAEAPz/BAD8/wcA+/8HAP7/AQADAAEAAwACAAIAAwADAAAABQD+/wcA/f8DAAEABAD//wUA/f8DAP3/AQD//wEA/v8BAAAA////////AgD8/wEA///+/wMA/f8BAP3/AgD9/wIA+f8FAPr/BQD8/wEA/v///wIA/P8FAPr/BQD//wIA//8FAPv/BgD6/wMA/P8BAP7////+///////+/wEA/f8EAPv/BAD8/wUA/P8FAPz/BwD7/wgA+v8IAPz/AQADAP//BQD+/wUA+/8HAPv/BQD//wUA+/8LAPb/DQD5/wYAAAD+/wUA/P8FAPr/BgD6/wcA+v8DAPn/BgD3/wUA+f8CAP3//v8EAPr/BQD8/wQA/P8EAPr/BgD4/wYA+v8DAAEA/v8EAP3/AQAAAP3/BgD5/wYA/P8CAP//AgD+/wEAAAABAP//AgD+/wAAAAD///3/AwD5/wUA+/8CAP7///8BAP7/AwD7/wQA/f8EAPz/AgD+/wAAAgD6/wIA/f8BAP/////8/wEAAQD8/wIA/f8AAAAAAAD9/wEA/P8BAP7/AAD+//3/AAD8/wEAAAD8/wEA/P/+/////v/+//7/+v8DAPr/BQD8/wMA/v8BAP7/AAAAAP//AwD7/wUA/P8CAP7//v8EAP3/AgD//wAAAgD+/wEA+f8CAPz/BgD8/wUA//8BAAAAAAD8/wAA/v/3/wUA9f8DAPv/AwAAAAAAAwD5/woA+/8HAP3/AQADAP//AQD///7/AAD+//7//v/9////+/8DAPj/BwD4/wYA+/8AAAAA+/8EAPz/BQD7/wQA+/8EAP7/AwD+/wMAAAAAAAAA///+/wEAAAD+/wMA+/8GAPr/BAD7/wMA/f/8/wEA9/8DAPn//f/9//v/AAD5//7//v/7/wIA+v8DAPf/BwD7/wUA/f8CAP//BQD8/woA+v8GAP3/BAD+/wIA//8CAPj/BwD5/wMA9//9//3//f/7/wAA+/8CAAAA/////wEA//8EAP//AAD///3/AwD9/////v/+/wAAAAD+///////8/wAA+P//////+f8GAPf/AAACAPr/BgD7/wYA/v8CAP//AgAAAAAAAQD+/wEAAgD///7/BAD+/wEABAD//wUA//8DAAEAAgAFAP7/AQAAAP7/AAD+/wEA/v8DAP7/AgAAAAYA/P8LAP3/CAD+////BQD7/wEAAAD7/wUA+f8CAAIA+/8FAPz/AAADAPn/BwD0/wkA9f8GAPn/BAD6/wUA+/8EAPn/CgD2/wgA+f8GAPv/BgD9//7/BQD8/wEA/v8CAPz/BgD5/wgA+f8HAPr/BAD///3/CAD5/wkA+/8DAAEA/f8GAPv/AwD6/wkA9v8LAPn/AgADAPn/CQD3/wcA/f8DAPz/BQD8/wgA+/8BAP///f8FAP7/BgD6/wgA+/8DAP7/AgAEAPz/BQD//wQAAQAAAP//BAD//wYAAAABAAAAAwAEAAEAAgABAAMABAACAAUAAgABAAMA//8DAAIA//8CAAIA/v8GAP3/BQD8/wQA/////wUA+f8FAP3/AgABAP3/BgD5/wkA+/8DAP//AQAAAAMA/f8FAPz/BQD+//////8BAP7/AwD8/wYA+/8FAPz/AAAAAP7///8AAP7//P8CAPr/BQD6////AgD9/wUA/f8BAAYA/P8HAAAAAQACAAMA/f8IAPv/AwABAPz/AAAEAP3/AwAAAPz/BwD//wIABQD4/wcA/P8DAP//AAABAAEAAAAEAAAA/v8BAP7/AQD+//7/AgD+/wUA+/8EAPr/AwD6/wUA/f8CAAAAAgD8/wkA+/8AAAYA+f8JAP3/BgD//wAABwD6/wkA/f8BAAUA+f8JAPr/BwD6/wMA/P8AAP7/AAD8/wIA+f8GAPr/BwD6/wUA/v8DAAIA//8FAP//AwACAP//CQD7/wYAAgD+/wkA+/8EAAQA/f8FAAMA/v8GAP3/BAABAAAA/v8DAAAAAQAAAPv/AwD7/wMA/P////7////+//7////9//z////+////AQD8/wMA/P8BAPv/BAD7/wUA+f8IAPr/BwD8/wUA9/8JAPX/BgD9//7///8AAPz/AQD6/wAA+/8BAAEA/v/9/wIA/P8DAP3/+/8GAPj/AgD///3//v8AAP//AQABAAMA+/8IAPf/DQD6/wYAAgAAAAEAAQAAAP//BQD9/wIABAD9/wUA+f8JAPf/CQD7/wIA/v8DAP///v8CAP//AgABAP3/AgAAAAAAAQAAAP//AAD+/wMAAAABAAQA//8EAAUA/P8JAPz/BQD+/wEA/v8HAPv/CAD8/wIAAgD8/wIAAQD8/wQAAQAAAAQA/f8BAAAA///9/wUA+f8EAPz/AgABAPz/CAD8/wQA//8AAAIAAQD8/wQA+v8EAPz/AAD/////AQD///7/AAABAPz/BQD6/wcA/P/+//3/+v8BAPn/AgD5/wMA+f8BAAIA/f8DAPv/BAD5/wkA9/8HAPn/AwACAP/////+//////8CAPv/AwD6/wQA///+/wgA+P8IAAAAAgADAP7/AwD6/wAAAAD+/wQA/f/+/wUA+/8HAPz/BAACAAMAAwD//wYA/f8GAP//BAACAAAABAACAAUA/f8EAP7/AQACAAAA+/8EAPr/BAD+/wIAAAAEAPj/CwD2/wcA+////wAA/P8DAP3/BAD7/wMA+f8CAPz/AwD8////AAD8/wMA///+/wAA//8BAP7///8AAAEA/v8DAP7//v8CAAIA/v8BAPv/BAD//wIA//8CAP////8FAP7/AgD//wEA/v8DAPj/BQD2/wcA9/8DAP3/AQD8/wIA/////wQA9v8CAAMA+v8LAPT/CgD//wUAAQAAAAUAAAAEAPv/AQAAAPf/BAD3/wIA+f8AAPz/AwD7//////8BAP7/AgD9//z/BAD5/wcA+v8DAAMA+f8KAPf/CwD2/woA+/8FAAAAAQAAAAAABAD6/wsA9f8KAPv/AgAHAPv/CAD4/wsA+/8EAP3/AQD8/wQA+f8DAP3//////wIA/P8FAPz/AgACAAMA/f8FAPz/AwD7/wIA/P8DAPv/AwD+/wAAAwD7/wgA+f8FAP3/AAADAP3/BQD2/wwA9P8JAPn/AAABAPv/AQABAP3/BAD7/wAA///+////AQD9////AAD//wQAAgD+/wMA//8AAAIA/f8CAAEA+f8CAP///v8CAPr/BAD+//v/BgD1/woA+P8CAAIA/v8BAP//AAAEAPz/BwD3/wkA/f8DAAUA/v8KAPr/BwD9/wAABQD9/wUAAgD9/wwA+v8JAPr/AwAEAAAA//8EAPr/CQD1/wgA9/8EAPj/AAABAPz/AQD7/wIA/f8EAPz/BQD/////AQAAAAIA+v8GAPj/AwACAPn/BwD7/wAAAgD5/wgA+P8HAPz//v8EAPr/AwABAPz/BgD+//3/BQD9/wUA+P8GAPf/BwD6/wEA+f8EAPv/AwD///v/BAD9/wUA/v8DAP7/BAD//wMA/f8GAAIA/v8GAPr/BQD7/wEAAQD+/wEABAD8/wMA/f8EAPz/BgD4/wQA/P8BAAIA9/8HAPv/AwAAAP7/AwD9/wQA/v8AAP7//f/+/wQA+/8BAAMA/f8HAPz/BAAAAAIAAQACAP7/BAD9/wMA/v8EAAEAAgAAAAAA//8BAAAAAQD7/wQA9/8MAPb/BQAAAPz/BQD//wAABAD5/wEA/v8BAPz/AgD9/wEA/P8AAPr/AgD7////+v8DAPv/AgAAAP////8CAP//AwABAAAAAAADAPz/BAACAPv/CQD3/wIAAAD8/wEA/P/+/wEAAAABAAEA+/8HAPz/BAD+//3/BAD8/wMA/f8DAPv/BgD6/wUA/f8CAAAAAQACAAIAAAAEAP7/AwD//wIA/P8FAPv/BQAAAAIA//8AAAAAAQAAAP3//v8CAPz/AwD6/wAA/P/8/wEA+v8CAPz/AAD9/wAAAAD+//7/AgD//////v8AAP//AgD6/wYA+/8FAPz/AgD9/wEA/f8DAAAAAAD+/wEAAAD///7//v8CAP///f/+/////P8EAPn/AgD7/wAA/f///wEA/P8EAPv/AwD8/wUA+v8GAP///v8EAPz/AwD+//v/BgD7/wcA+v///wQA+/8IAP3/AQACAP3/AwABAAAABQD+/wkA/f8GAP7/AgAEAP3/AAACAPv/BgD8/wIA//8CAAMA//8BAP//BAD//wEAAQD7/wUA+f8EAPv////+//7//v8AAAAA/v8CAP3/AwD//wAA/f8IAPT/CgD2/wQA/P/9//3////8//7//v/5/wEA+P////r//f/7//r/AQD6/wAA/f/+/////f8AAAEA/f8EAPn/BQD8/wEAAwD+/wUA/f8BAAEA//8GAP3/AQACAP7/BwD8/woA/v8DAAQA//8GAPz/BAD9/wEA//8CAAAA//8CAP//BwD9/wkA+P8FAAEA/v8GAPj/BgD9/wAA/f8EAPv/BgD7/wQA+/8EAPn/BgD7//3/AwD6/wYA/P8FAP3/AwABAPz/BAAAAAAABQAAAAEAAAAEAPz/CwD3/wkA+v8BAAQA/v///wMA+v8DAAAA+v8JAPr/BQD7/wIAAAAAAP//AAD//wMAAgD5/wkA/P/+/wQA+P8DAP//+/8DAP3///8AAAAAAgD9////AAAAAAAA/////////P////3//f8AAPv////+/wEA///8//z/AgD6/wYA+P8HAPv/BAABAP//AwABAP//BAD9/wMAAwABAP//BQD9/wAABgD8/wwA8/8MAPv/AwAHAP7/CAD7/wYA/v8EAAAA/v8BAP7/AwD/////+/8EAPr/BAD9/wAAAAD//wMA+/8GAPn/BgD9/wYA/f8EAP//BQD//wIAAQACAP7/BAD9/wUA/f8CAP//AwADAAEAAAAEAP3/BQABAPv/AwD9/wQA/v8AAAQA+v8JAPr/BAAAAP7/BAD3/wwA9v8JAPv/AQACAP//AgACAP//BQD+/wMA+P8IAPv/BQD///7/AgD9/wQA/P8AAAEA/P8CAP////8BAP3//f8BAP7/AAD///7/AAD9////AAD+/wIA/P8EAP3/AgABAP3/BwD4/wgA+/8DAAEA//8EAP//AAADAPz/BQD9/wAAAgD9/wIA//8BAPz/BQD4/wcA+/8EAP3/AQACAAAAAgD///7/AQABAAMA/v8BAP///v8FAPr/BgD7/wQAAQAAAAUA/f8EAAIA//8CAAMAAAAEAAEABAAAAAMAAAAEAAEAAQAAAAMA/P8CAP7///8DAPz/AgD+/wAA/f8CAPz/AwD9////AQD8/wEA///8/wIA/f///wEA/v8AAP3/BgD5/wYA+v8CAP////8AAP//+v8DAPn/BQD7/wIA+v8BAP7//v8BAP///f8BAPv/AAD8/wEA/v8AAP3/AQD+/wIA/v8EAPv/BQD9/wEABgD4/wgA/P8CAAQA+/8KAPj/AwD+////BAD7/wQA//8DAAAAAwABAAMAAgAAAAUA//8FAP3/BQD+//7/AgABAP//AwD9/wEAAgAAAAMA/P8DAAAAAQAEAP3/AQAAAP7/AwD/////AAABAP7/AgD9/wAAAQD9/wEA///+/wQA/f8AAAAAAgD//wUA/f8DAP7/BAD8/wgA+P8IAPr/BgD8/wMA/v8CAP7/AQD//wEA/f8BAP3/BAD8/wIA///7/wYA+P8HAPj/BQD7////AQD7/wMA/P8BAP3/////////AAD//////v////3/AAACAPj/BgD4/wQA/f////7/AAD//////v8AAP3/AwD9/wMA/v8AAAMA/v8CAAIA/v8GAP3/BQD//wMAAgAAAAIA/v8CAAIAAAABAAMA//8EAP7/AwACAP//BQD8/wYAAAACAAMAAgACAAQAAwABAAUA//8HAAEABAADAAIAAQAGAP3/BgD9/wUA//8BAAIA/v8FAPv/BgD8/wUA+P8FAPz/BgD7/wAAAgD//wAA///+/wMA/P8DAP3/AgD///3/AwD5/wUA/f///wMA+v8DAP7///8AAP7/AQAAAPz/AwD8/wMA/P/8/wMA9/8HAPf/BQD4/wQA/f8AAAEA//8CAP//AgD9/wYA+/8HAPv/CgD6/wcA/f8FAP//BQAAAAQA//8BAAQA/v8IAPr/CAD9/wMAAAACAAQA/v8DAP//AgD7/wYA+/8GAPz//f8BAPz/BAD6/wMA+v8BAP///P8EAPz/AQACAPv/BQAAAPz/BQD7/wQAAQAAAAAABQD9/wYA+v8IAPn/CAD+/wIABAD//wQA/f8FAAAAAQADAAIA/v8IAP7/AgADAAEA//8FAAAA//8IAPv/CAD+/wQA/P8IAPb/DQD2/wkA/P8EAAAAAgD//wcA+/8IAP3/BAD//wUA+f8HAPv/BAD//wAA/P8FAPz/AgAAAP//AQD9/wQA/f8EAP3/AQACAP//AgD///3/BAD8/wUA+v8EAP3/AwD/////AwD7/wQA/P8BAP7////+/wIA/v8AAAIAAAADAP//AwD9/wYA/P8FAP7//v8GAPr/BAD8/wAAAQD+/wMA+f8IAPr/BQD+/wIA/v///wEA//8DAPz/BQD4/wcA+f8FAP//AgD//wQA/v8DAP7/AwD//wIAAAD9/wIA/P8EAP7/AQAAAAEAAgAAAAIA/P8DAP7/BQD8/wYA9/8LAPb/BwD5/wIA/v8BAAAA//8BAP7/AwD8/wcA+v8JAPv/BQAAAAEAAgACAP7/BAD//wEAAAACAAAA//8BAP3/BQD8/wEAAwD9/wUA+v8FAP3/AwD+/wEA//8BAP//AwD8/wMA/P8DAP7/AgD/////AQD+/wIA+/8HAPj/BwD7/wMA/////wIA//8EAPz/BAD7/wUA+/8CAP7/AAD9/wAAAAD+//7/AAAAAP7/AQD7////AwD4/woA9/8FAP3/AwD//wEA//8AAP3/BAD3/wgA+f8BAP///P8AAAEA/f8CAAAA//8BAAIA/P8EAP3///8CAP7/AQD//wAA///+/wIAAQACAAQA+/8IAP3/BQD7/wUA/P8HAPv/BgD6/wYA+P8FAPz/AQABAP//AAABAP7/BAD+/wAAAQD+/wIAAAD9/wUA+/8DAP7/AAAAAAIA//8AAAAAAQD+/wMA+/8FAPv/AwD/////AwD7/wEA/v8AAAEA/v8BAP7/AQAAAAMA/v8EAP//AgD9/wUA+/8GAPv/BAD8/wUA/f8EAP//AQD//wQA/P8DAAAAAQD//wEAAQABAP///v8AAP3/AgD6/wIA+/8CAPz/AQD+/wAA/f///////f8CAPz//f///////f8EAPb/CQD5/wEA/f8CAP3////+//7//f8CAPv/AwD9/wAA/v8BAAIAAQAEAAAAAQADAP7/BgD9/wYA/f8EAP7/AQACAPz/BwD6/wgA+v8EAP//AQAAAAIA//8CAP7/AgACAP7/AgD8/wEA/f8AAP7/AAD+/wAA///+/wIA/v8EAP3/AgD+//7/AgD8/wYA+P8IAPX/CQD3/wUA+/8DAPr/BAD5/wIA///6/wUA+/8EAPr/CgD4/wcAAAACAAMAAgD//wMAAgAAAAQA//8CAAAAAgD9/wQA/P8EAP//AgD//wQA/P8EAP3/BAD8/wYA+f8GAPv/AwAAAP//AQAAAP7/AgD9////AAD//wEA///+/wEA/f8CAPz/AQAAAP7///8BAP3/AAD//////f8BAP///f8CAPn/AgD7/wAA/v8BAP7/AAACAP7/AQAAAAIAAwAAAAMAAAAAAAQA/v8FAP7/BQD8/wkA/P8GAP7/BAADAP7/BQD9/wMAAQABAAQA//8DAP3/AwD+/wEAAgD6/wQA/P8CAP7/AwD6/wcA+P8EAP3/AgAAAP3/AQD8/wQA/f8BAPz/AwD8/wIA/f/8/wMA+P8GAPv/AAAAAP7/AAACAP7//f8CAPz/AQAAAAAA//8AAP//AAAEAPr/BQD+////AwD9/wMAAQD//wEAAgABAAEABAACAAEABQD8/wgA/f8EAAIAAQAFAAEAAwAAAAAAAgAAAAAABgD5/wYA/f8DAAAABQD9/wYA/f8FAP//AgABAPz/CAD3/wkA+v8CAAAA/f8EAPv/BQD5/wcA9/8FAPv//v8DAP3/AAABAPz/AgD8/wEA/P8FAPr/AwD///v/BQD3/wMA/v/6/wUA+v8CAP3/AwD/////AAD//wMAAAD//wEAAAABAP///v8EAPf/CQD5/wMA/f8BAP7/BAD7/wIAAQD+/wMA/v8BAAAAAgD+/wEA/f8AAP7/AgD9/wIA/P8DAP7/AQD//wAAAgD+/wMA//8AAAEA/v8EAAAAAAAEAP//BwD+/wQA/v8DAAEAAgABAAEAAgD//wYA+f8JAPv/BAD8/wMA/f///wAAAAD9/wIA+v8FAPv/AwD+////AQD7/wIA/v8CAP3///8DAPn/CAD6/wMA/f8BAP7////9/wQA+v8BAP3//f8BAP///P8CAPv///////3//f8AAP3/AAACAPr/AwD7//7/AgD6/wMA+v8AAP///v/+/////////wAA////////AQD+/wAA//8CAAAAAgAAAAEAAQAAAAEAAQD//wQA/f8CAAIA/f8GAPz/BAD8////BgD6/wgA+f8DAP7/AgABAPv/CAD4/wkA+v8DAP3/AQD+////AQD9/wQA//8BAAEAAAADAAAAAwD//wIAAQD//wUA/f8CAAIA/v8CAAEA+/8GAPr/BQD6/wAAAQD//wMA/P/+/wMA+v8FAPn/AgD//wAA//8BAP3/AQD6/wUA+f8EAPz/AQD///3/AgD+/wAAAgD7/wIA/v8AAAMA+f8GAPv/AgD///7/AgD8/wMA9v8HAPf/AgD9//v/AwD6/wIA+v8CAPr/BAD7/wIAAAABAAAAAAD+/wIA/v8FAPn/AwD8/wMA/v8BAAIA+/8HAPj/CAD4/wMA///9/wUA+f8GAPv/AgAAAAEA//8EAPv/AwD9/wAAAgD//wEA/P8EAPz/BAD+/wIAAAABAAEAAQACAP7/BwD6/wgA+f8JAP3/BAABAAEAAQD/////AQD//wAA//8BAP///v8AAAEAAQABAP//AQD//wMA/v8BAAEA//8DAP7/AQACAP3/BwD5/wUA/P8DAPv/BAD9/wAAAQD/////AwD5/wgA+f8EAP///f8BAP///f8AAP3/+/8CAPz///8DAPj/BQD7/wQA/v8AAAIA/////wMA+/8DAP//AQD9/wIA/P8BAP7//v8AAPr/CAD2/wcA+f8CAP7/AAD//wEAAAD///7///8CAP3/BgD5/wQA/f//////AAACAPr/BQD5/wcA+/8FAPz/AwACAAEAAAADAP7/BQD/////BgD5/wsA9v8JAPr/AwD9/wEAAAD//wMA/P8CAAEA//8EAP//AgAAAAIAAQD//wAA/v8BAAEA/f8BAPz/AwD+//////8BAP3/AgAAAP7/BwD+/wUA/P8GAP3/BAAAAAQAAAAEAP//AgABAAEAAgD9/wQA/v8DAAEA/f8DAAAA//8EAP7/AAAFAPn/BwD5/wUA+v8BAPz//f/+/////f///////P8DAPr/AgD///7/BgD4/wkA+P8JAPr/BQD+/wEAAAAAAAAAAQAAAP7/AgD9/wIA+v/+/wEA+/8CAPv/AgD9/wEAAAD//wMA/f8EAP//BAD+/wEAAgD/////BgD3/wkA+f8HAP7/AgAAAAAAAgADAP7/BgD//wYA//8HAP//AgADAP3/AwADAPz/AwD6/wIA/v8CAP///f8DAPz/AgACAP3/BwD8/wQAAgD//wQAAQD//wUA/v8FAP//AgD///7/BQD9/wIA//8AAAAAAwACAP//BAD//wEAAAABAP//AwD9/wIAAAAAAAMA+/8EAPv/BAD6/wMA/f8EAPv/CAD6/wcA/P8EAP//BQD//wQA//8DAAEAAAACAP7/AgACAP7/BwD9/wQAAAACAAAAAwD//wIAAQD//wIAAQD9////AgD9/wEAAAD9/wEAAQD9/wMA///+/wYA+f8JAPn/CAD4/wYA+/8FAP//AAADAP3/AgAAAP//BQD6/wIAAAD8/wMA/P8CAP///P8EAPv/BgD7/wMA+/8EAPz/AwD//wMA/v8CAAAA//8FAPz/BAD//wEAAQABAP//AgD+/wYA/f8FAPz/BAABAAMAAQABAAMAAwAAAAIA//8CAAIA/v8CAP7/AwD//wEA//8AAAEAAAABAP7/BQD8/wQA/v8EAP7/AwD//wUA/P8FAPv/BQD8/wQA/f///////////wAA/P8GAPf/CAD6/wQA//8BAAAAAgD///7/BQD8/wUA/f8DAP3/BgD4/wwA9f8HAP//AgAAAAEAAQADAAAAAwABAAQAAgABAAYA/f8IAP3/BgD+/wMAAAAEAAEAAwD9/wQA+v8IAPj/BgD6/wUA/P8FAPj/BwD4/wUA+/8DAP7//v8DAP3/AgD9/wAAAAD9/wIA+/8EAP3/AAABAPv/BQD6/wUA+/8FAP7/AwAAAAEAAAD//wMA+/8CAPv/AQAAAP3/AQD7/wEA//8AAAMA+P8HAPz/AwD//wEA//8GAPz/BQAAAAAABAD7/wYAAAACAAEAAQD//wMA//8DAAAAAQABAAMAAAAEAP3/BwD9/wEAAgD9/wYA+/8EAPz/BAD+//3/AQD+//7/AQD5/wMA/f///wIA/f8CAP//AgD///7/BAD5/wUA/f///wIA/v8CAAEAAQD8/wQA+/8FAP3/AAACAP3/BAD9/wIA/f8DAAAA//8CAP3/AgD8/wMA/P8CAP3//f8DAPz/AwD9/wAAAQD+/wEA/v8DAP7/AwD6/wcA9/8KAPH/DAD0/wgA+v8AAP7/AAD//////v8AAP7/AgD9////AAD7/wMA/f8AAP3/AAD8////AQD7/wQA+v8AAPz///////3//v/9//3/AAAAAAEA//8BAP3/BAD5/wYA/P8DAP///v8CAAAA/v8AAAAAAAACAP3/BAD7/wYA/P8EAAAAAgABAAIAAAAFAPv/BwD4/wUA/f8AAP///////wEA/P8DAP7/AQAAAP3/BAD8/wQA///8/wYA+v8EAP3//v/+/wEA+/8CAPj/AgD6/wEA+v8AAP7//f////v/AAD7/wAA/v/+/wAA/f8AAAEA+/8CAPv/AAADAPv/AgD8//3/AgABAP7/AAD+/wAAAwD9/wEAAQD8/wIA/v8AAAEA/f///////f8AAP3/+/8CAP7//f8EAPn/BAD9/wIAAAD9/wQA/v8DAP3/AgD//wMA//8EAP3/AgABAPz/BQD7/wIA/f/8/wEA+v8BAPr/AgD9/wEA/f8AAP3/BAD7/wgA+f8HAPn/AwD+/wAAAQD7/wMA/f///wMA/v8BAAIA/P8DAP7/AQABAAAAAAAAAAAAAAD+/wQA/v8BAAEA///+/wMA/f8BAP7/AAAAAAAAAQD/////AwD9/wYA/f8BAP3/BAD7/wUA/P8BAP7//f8BAP3/AAAAAP3/AAD+////AAD+/wEA/////wAA//8CAP7///8CAP3/AwD7/wUA/f8DAP3///8AAAEA/P8BAP///v8DAPz/AQAAAP//AwD+/wQA//8DAAIA/f8DAP///v8FAPv/BAD//wAAAQABAAAABAD9/wQA//8EAP3/BQD8/wAABAD9/wMAAwD9/wIAAAAAAAAAAAD9/wMA/v8AAP//AQD+/wIA/f8EAPz/AwAAAP//AgD8/wQA//8BAP//AAD9/wIAAAAAAAEA//8CAP3/AgD8/wUA///+/wQA/v8BAAAA//8CAAAABAD//wIAAAACAAEAAQAAAAAAAwD+/wUA/v8CAAIA//8DAAAABQD8/wcA/P8EAAEAAAAEAPr/BgD7/wQA//8BAP7/BAD+/wAAAAABAP//BAD8/wIA//8DAP7/AgAAAP//BAD8/wcA+P8GAPr/BAD8/wMA/P8CAPr/BAD6/wMA/f/+/wIA+/8DAP3/AgAAAAEA/v8BAAAAAAAAAP//AgD+/wUA/v8AAAIA//8CAAAAAgD8/wUA/P8BAAEA/P8GAPn/BwD9/wEABQD9/wMAAwD9/wUA/v8CAAMAAgD//wMAAQABAAIAAAABAAIA/v8FAP//BQD//wIAAQABAP7/AwD/////AgD+/wEAAgAAAPz/BQD7/wcA+/8HAPz/BgD9/wQA/P8IAPn/BwD8/wAABgD4/wgA+f8EAPz///8CAPn/AwD8/wEAAgD8/wQA/f8FAP7/BgD8/wgA+v8GAP//AAADAP7/AgAAAP7/AwD+////BAD8/wEABgD5/wcA+f8IAPv/BQD7/wUA/f8GAPv/BQD9/wMA/P8DAPz/BAD8/wEA/P8EAPv/AwD6/wQA/v8BAP//AgD9/wMA/P8CAP7/AAD//wAA//8BAAIA/v8BAAEA/P8HAPn/BQD9/wIAAAABAP//AwD//wYA/v8DAAAAAAACAP//AgD6/wkA9v8HAPr/BAD9////AwD9/wYA/v///wMA/f8GAP//AQAFAP7/AwD+/wMA/f8FAP//AgD//wIA/v8AAAAA/v8CAP//AAAAAAAAAgD//wMA//8CAP3/BAD8/wMAAAD//wEA///+/wMA/P8EAPv/BAD9/wQA/v8AAAMA/f8CAAEA+/8EAP7/AAADAP7/AAABAP3/AwD+/wAAAgD8/wMAAAD9/wcA+v8DAAAAAgAAAAEA//8CAAEAAAACAAEA/v8FAPz/AgD+//7/AgD///7/BAD6/wgA+v8HAP3/AQACAP//BAD+/wUAAAACAAAA/v8DAP3/AwD7/wIA//8BAPz/CQD4/wkA+P8FAP7/AgAAAAIA/f8FAP3/AgD/////AAD///7/AQD+/wAA///+/wQA/f8EAPz/AwAAAAAAAgAAAPz/AgD9/wIAAAABAP//AwD9/wMA//8DAP3/BgD9/wIAAAD9/wUA/P8EAP////8DAAEAAAABAP////8FAP3/AwD+//7/AwD9/wEAAQD+/wQA/f8HAPn/BgD8/wMAAAD+/wMA//8BAAEA///+/wEA/P8EAPr/AwD4/wcA9/8GAPn/AgD+/wEA+/8EAPv/AwD7/wIA/f8BAP7/BAD//wAAAQAAAAIAAQAAAAQA/P8GAP7/BAD//wMA/v8GAP7/AgD//wIAAgAAAP//AgABAP7/AQAAAP//AgD6/wQA+/8HAPv/AwD/////BgD9/wIAAAAAAAEA///9/wQA+f8DAPz/AgD6/wEA/f8DAPn/BgD4/wkA+f8FAP3/AQAAAP////8CAP3/BQD6/wYA+f8GAPr/AwD9/wUA+v8DAP3//v8GAPj/BAD+////AgD+/wMAAAAAAAIAAwABAAMA/v8CAAIA/v8CAAEA/v8BAP//AQD//wEAAQD9/wQAAAAAAAIA/f8CAP//AwD8/wQA/P8DAP7/AAAAAAAAAQD8/wUA/P8EAP////8AAP7/AwD//wEA//8BAP7/BgD5/woA+P8EAP///v8EAPv/AQD8/wQA+/8AAAAA//8DAP////8BAP//AQD8/wMA/f/9/wIA/P8CAP7//v8CAP7//v8EAPr/BgD6/wMAAAACAP//AwD9/wUA/v8BAAAAAgD//wQA/f8HAPv/BgD7/wQA/v8CAAAAAwD//wAABAD+/wAAAgD9/wQA///9/wQA+/8GAPf/BwD1/wkA+P8CAP7//f8EAPr/BgD5/wQA/////wIA/f8AAAMA/f/+/wIA+v8FAPv/AAAAAAAA//8CAPz/BwD7/wQA+/8EAP//AQD+/wEA/v8CAP////8BAAIA/f8AAAMA+v8IAPr/AgD+////AAABAP///v8CAP7/BAD8/wIA//8BAP7/AgD9/wYA+/8GAPz/BAD9/wIA/v8BAAEA/v8GAPn/BAD///7/AQD+/////v///wAA/v8AAP3/AgD+/wIA/v/+/wIA/////wEA/P8DAP7/AgAAAAAAAwD+/wMAAAACAP//AwD+/wQA/f8DAP7/AgD9/wQA/v8AAAQA+P8HAPn/AwACAPr/BgD5/wkA+P8HAPv/AgAAAP3/BgD7/wMA+/8BAP7/AQD+/wIA/f///wIA+f8EAPf/BQD5/wMA+v8CAP3///8BAPv/AwAAAPz/BQD6/wcA/P8DAP3/BQAAAAEA///9/wIA/f8AAP7//v8FAP3/AQD//wEAAgD8/wAA/v8BAP3/AgD7/wIA/////wMA/f8DAP3/BAD+/wQAAAACAAIAAQABAP//AwD8/wYA//8CAAMAAAADAAEAAwD+/wMA/v8DAP//AQACAPr/CQD2/wQA/P/+/wIA/P8BAP7/AAD9/wMA+f8DAPz///////3/AQD8//3/BQD5/wcA+v8BAP7/AgD9/wIAAQD9/wIA/f8DAP//AQD7/wMA/v///wIA+v8DAP7/AAD9/wAA/P8BAP7/AgD7/wMA+/8BAAAA/f8AAAAAAQD//wEA/f8BAAIA+v8IAPv/AwD///7/BQD9/wQA//8CAAEAAgD9/wcA+/8IAPv/CwD6/wkA/f8DAAQA/P8FAPz/AgADAP3/AwD+////BgD7/wQA/P8EAP//+/8GAPf/BQD6////AQD8/wQA+v8DAPv/BAD8/wMAAAD+/wMA+v8DAP7////+/wAA//8AAPz/AAD8/wAA/v/9////+v8AAP3//P8CAPX/AwD7/wAA/P8BAPv/AgD9/wIA/////wIA/v8CAAAAAQACAAMAAAAFAPz/BwD9/wYA/v8AAAMA/v8CAAIAAQADAP7/BQD+/wQA/////wIA/P8BAAEA//8CAP7/AwD//wMAAAABAAAAAQD//wAA/f///wMA/P8BAAAA/f8GAPz//v8FAPj/BAD///3/AgD+//7/BAD9/wUA/P8DAP3/AgD9/wQA/P8FAP//AQAAAAIA/v8GAP3/BgD8/wMA//8CAP7/AwD7/wIAAAD6/woA+P8GAP3/AQADAPz/AwD/////AwD9////AQD///v/BgD2/wYA+/8AAP7/AQD6/wQA+/8BAAAA+/8FAPz/AwD9/wQA+/8FAPr/AgD+/////v8BAP////8AAAAA/P8CAP3/AQD//wIA/v8CAAIAAQACAP//AgACAAAABAABAAIAAAAEAP3/BQD8/wUAAgD+/wIAAAACAAMAAQD//wQAAAACAAAAAAD//wEA/v8CAAAAAgD8/wEA/f8CAP3/AAD+/wIA/f/////////+////AAAAAAAAAQAAAAEAAAD//wIA//8BAP///v8DAP3/AgD//wIABAD8/wsA9v8NAPj/CAD7/wUA/v8BAAYA/P8GAAEA//8IAPv/BQD+/wAAAwD9/wMA/v8AAAUA+v8GAP3/AwAAAAEAAQD9/wIA/v8DAPz/BQD6/wUA/v8AAP//AwD6/wcA+P8FAP3/AQD+/wEA//8AAAIA/v8BAP//AgD+/wQA+/8CAAAAAwD8/wYA+f8JAPz/AAABAPz/BgD4/wcA+v8FAAAA//8BAAAAAgD+/wUA/f8CAAMA/v8DAPz/AgABAP//BQD+/wIAAAACAAAABQD9/wYA/v8FAAAABQD//wMA/f8CAAIA//8DAP//AQD/////AQABAP3/AgD///7/BgD7/wMAAgD9/wMA/f8EAPz/BAD8/wQA/v8FAP7/AgADAPr/BAD9/wUA+/8DAPz/AgAEAP////8EAPn/CQD3/wcA+v8GAPv/BQD+/wMA/////wAA//8BAP//AwACAP3/AgD9/wQA//8CAP7/AgD9/wcA+v8IAPv/AwD+/wAAAgAAAP3/AwD7/wUA+f8IAPj/BQD7/wMA/P8HAPz/AwD//wIAAQACAP////8CAP3/BQD5/wUA+P8CAPz////9/wMA+f8EAPz/AAD//wIA/P8GAPv/AgAAAP3/BQD7/wMAAQD9/wQA/P8DAP3/BAD8/wMA//8AAAQA+/8HAP3/BQD+/wIAAwAAAAQA//8CAAQA/v8FAPz/AAADAP//AAAAAAAAAwD//wMAAAAAAAMAAAAEAPr/AwAAAP//AwD8//z/BQD4/wYA+v8EAPn/BgD5/wYA+f8GAP7/AQADAP//AgAEAPv/BgD9/wMA/P8CAPz/AQD+//3/AQD9/wEAAgD9/wQA/P8BAAEA/f8BAAAA/v8DAP3/AwD//wIAAAD//wMA/P8DAPz/AAD//wAA/P8DAPz/AgD6/wMA/P8DAPz/BAD8/wIAAgD8/wYA/P8DAP7/AwD8/wQA/f8DAP///v//////AQD//wEAAAABAAAA//8DAPz/BwD7/wQAAQD//wAAAwD5/wkA+P8GAPz/AwD//wAAAwABAAMA//8FAP//BQD+/wYA+/8JAPz/AwACAAAAAAAEAP7/AwD//wIA//8BAAMA//8BAAIAAAAAAAMA/f8GAP//AgD//wAAAwD//wAAAgD8/wgA9v8GAPn/AgD+//3/AgD6/wMA/f8AAP3/BQD6/wYA/P8AAAAA/v////3/AAD9/wMA+/8DAPz/AAD+/wEA/v8AAP3//P8CAP3////8/wAAAAD+/wEAAQD+/wgA+/8DAAAAAQAAAAAAAgD+/wAAAAABAP//AwD+/wMAAwD+/wUAAAABAAMA/f8IAPj/CQD5/wYA/P8EAP3/AgAAAP//BQD8/wQA/P8CAP7/AQD//wAAAgD9/wYA/P8EAAEAAAAAAAAAAwD8/wQA/v8AAAEA/v8BAAIA/v8CAP7/AwD//wAA/f8DAP7/AgD7/wEA//8AAP3//////wEA/v///wAA/f8GAPn/BgD9/wQA//8BAAIABAAAAAMA//8BAAUA/P8DAP3/AQD8/wMA/f8AAAEA/f8AAAAA/f8EAPz/AwD6/wUAAAAAAAUA+f8GAPv/BgD+//7////9//7/AAD9//3/AQD7/wEA/P8AAAAA//8AAP7/AAD///3///8AAPz/AwD8/wAAAQD8/wAAAwD9/wIA/P8CAAAAAAAAAP3/AgD6/wYA+v8EAPz/AgAAAAEAAQAAAAAAAAADAP7/AAACAPv/BgD7/wMAAAD+/wYA/f8FAP//AgAAAAMA/f8DAP7/AwD//wAA//8CAP7/AQD//wAAAAD+/wAA/v8CAPr/AgD5/wUA+f8DAPv/AgD//////v8DAPn/BgD5/wMA/v/9/wIA+/8DAPn/AgD//wIA/f8CAP3/BAD9/wUA/v8DAAAAAAABAAMA+/8EAPv/AgD9/wEA/f8AAAIA+f8HAPj/CAD7/wUA/f/+/wUA+/8FAPz/AwD/////AwD6/woA+v8HAPv/BQD8/wIA/P8EAPv/BAD9//3/BAD9/wAAAAAAAP3/AQD+//3/BAD7//7/AAD5/wIA/P/+//7//v///////v8CAP3/BAD7/wAAAgD5/wgA+P8EAPz/AQABAAAAAQABAAAAAwD+/wQA/v8CAP7/AAAEAP3/AwD+/wEAAQABAAAA/v8CAPr/BwD7/wQA/P///wQA/f8CAP3/AQAAAAEA//8DAP//AQABAAIA/v8EAP7/AwD9/wQA/P8AAAEA+v8DAP3////9/wAA/v8AAP7/AAD7/wYA+/8BAAAA/f8DAAAA/v8EAPv/AwD6/wcA9v8JAPn/AgAAAP3/AwD/////AAAAAP7/AwD8/wQA//8CAAAAAQABAP///v8BAP7/AgD8/wUA+f8FAPn/BAD6/wMA/P8DAPv/AwD8//3/AQD8///////9//7///8AAPz/AAD9/////f8AAPr/BAD3/wQA+P8AAP7//f8DAPv/BAD8/wUA9/8IAPf/CQD8////AgD9/wUA/f8BAAAA//8CAP//AgD8/wYA+/8DAAEA/f8BAAAA///+/wAAAQD+/wQA/P/+/wQA+v8GAPr/AgD9////AAD9/wIA+/8DAP3/AAD//wEAAAD//wIA/f8BAAAAAQD//wEA//8AAAAA///9/wMA+/8FAP7//f////3/AgD+//3/AwD7/wMA+/8AAP7/AgD8/wIA/v8AAAAA/v8DAP3/AwD///////8DAPv/BwD7/wMA/v8AAAAA//8AAPv/AwD6/wQA/v8AAAIA/f/9/wQA/P8CAP/////+/wEA+v8DAPr/BAD7/wMA+////////v8BAPv/BAD4/wcA9/8BAP7///8DAPv/BAD8/wUA/v8EAP3/BgAAAP7/BAD9/wQAAAD+/wQAAAD+/wYA9/8GAP///P8FAPz/AgACAP7/AAAEAAAAAAADAP3/BAD+/wEAAgD9/wEA//8BAP7/AgD7/wgA+v8EAAAA/v8FAP7/AQABAAEA/P8FAPv/BgD7/wUA/P8EAP3/AwABAP7/BQD8/wIAAAD+/wQA/f8CAP7/AAD+/wMA+/8FAPj/BwD7/wIA/f8CAP3/BgD3/wYA+v///wMA+f8DAP7//f8HAPr/BwD7/wUA/P8EAPz/BAABAAAAAQAAAAMA//8DAP3/BAD//wQA/f8DAAEA/f8EAP7/AAACAP//BAD+/wUA/P8IAPr/CQD8/wgA/P8KAPf/CgD8/wMAAwD//wQAAAAAAP//AwAAAAAABAD//wIAAAACAAAAAQACAP//AQD///7/BAD6/wcA+f8EAPz/AgD8/wMA/f8BAAAAAAAAAAEA/v///wAA/////wAA/////wMA+/8EAP7/AQABAAAAAQACAAIAAAAGAP3/CAD9/wUA//8GAP//BAD+/wUA//8GAAEAAQAEAAIAAwABAAAABAAAAAQA//8DAAEAAQAFAP7/BAAAAP7/BgD+/wEAAwD9/wMA/f8DAP7/AgD8/wUA+/8IAPb/CAD2/woA/P8CAAAAAQD+/wQA+v8EAPr/AwD8/wIA/P8BAP7/AwD+/wUA/v8DAAMA//8JAPr/BAD//wEAAwABAP7/BAAAAAYA/v8FAP7/BAACAP3/BgD9/wcA/P8EAAEAAAAFAP7/BgABAAUA//8EAAAAAQAAAP7/AwD6/wQA+/8FAPr/AgD//wAAAAD//wEA+/8IAPn/BQD9////AwD7/wMA+f8DAP3/AQACAPn/BQD9/wMA/v8BAAAAAAABAAIAAQAFAAAABQD//wcAAgACAAUA/f8IAAEABAADAAIABQABAAIAAQAEAP//CAD9/wIABQD+/wUAAAD//wYA/v8CAP//AgAAAAEAAgD9/wYA+/8BAP//AgD9/wEA/f8BAAQA+/8EAPv/AwD//wIAAAADAP7/AwD9/wEAAQD//wIA/f8CAP///v8CAPz/AQD9/wEA///8/wMA+/8CAAAA/v8DAP7/AQD+////AQD+/wEAAAD+/wIAAAAAAAEAAgD+/wMA/f8GAPr/BwD9/wEAAgABAAIAAwAAAAIAAwABAAMA/f8EAPr/CAD7/wEA/f/+/wAAAQD6/wIA/P///wAA/f///wEA/P8BAP3///8CAP3/AwD+/wEAAwD9/wMAAAABAAEA//8DAP7/CAD8//7/+//9/wAA/v8AAAAA/v8DAPv/BQD7/wEA+/////7//v/8/wIA//8AAAYA+v8FAP//AAAFAP3/BQD//wAAAgD//wIAAgD8/wMA/v8DAP3/AwD//wMAAQAAAAEABQD8/wkA+v8HAP3/BQD//wIAAAABAAEAAwACAAEAAwD+/wUA/f8CAAAA//8DAP7/AQAAAP7/AwD9////AQD8/wEA+v8AAPv/AAD7/wIA/f/+/wEA/P8BAP///v8AAP////8CAP7/AwD+/wIAAAABAAQA///+/wQA/f8CAP7/AAAAAP7/AgD+///////8/wMA+f8FAPn/BAD+/wEABAD7/wUA/f8DAP7/AQD//wEAAQD6/wcA9/8GAP7//v8BAPr/AwD7//z//v/6/wIA+P8CAPr/AgD9/wAAAQD9/wYA/P8CAAIA/f8EAP//AQABAAAA//8DAPz/BQD/////BAD9/wYA//8FAP//BAACAAEAAgABAAAAAgD8/wQA//8DAAIAAQADAAMAAwAFAAMABAABAAIA/v8EAPr/BAD+////AAD///////8CAP//AgD+/wEAAAD+/wAA/v8AAP7/AwD5/wYA+v8BAP3//v8CAPz/BAD8/wMA/v8DAPv/AwD///z/BAD4/wUA+v8EAPz/AAD//wAAAwD7/wQA+/8FAPz/BAD8/wUA+/8HAPr/BQD5/wsA9/8KAPj/BgD9/wEA/v8CAP7/BAD8/wEAAwD8/wcA/P8AAAEA/v8DAP//BAD9/wMAAAD//wIAAgAAAP//BQD8/wcA/f8BAAIA/v8FAPz/BAD9/wIAAgAAAAIAAgAAAAIAAwABAAMAAgD+/wQA/f8BAP//AAABAP3/BAD8/wUA/P8FAPv/BgD7/wUA+v8EAPz/AwD/////AwD+/wEAAwD7/wUA/P8CAAEA//8DAPz/BQD4/wgA+P8GAP////8CAAAAAwAAAAIA//8BAAAAAAD//wEA/P8BAP7////9//////8BAAIA//8BAAEAAgD//wUA/v8EAAAA/v8HAPr/BQD+////AgD//wMA/f8DAP7/AAAGAPn/CAD3/wMA/v8AAP//AQD6/wQA/P8FAPz///////3/AQD9//3/AgD6/wQA+v8BAPz/AAD9/wIA/P8DAAAAAAAAAAIA/v8CAAEA/f8GAPv/BwD7/wIAAQAAAAMA/////wUA+f8KAPj/CAD4/wcA+f8DAAEA+/8HAPr/AgAAAP//AgD//wEA/v8CAP7/AQABAP//AwD9/wUA//8DAAIAAAADAAIAAAAFAP7/BgD8/wYA/v8DAAIA/v8EAPz/AwD9/wMA/f8BAPv/AQD9/wMA/P///wEA+/8FAPr/AwD9//z/AQD+////AQD9/wAAAAD9////AgD+/wMA/v8CAAAAAwD8/wYA9/8GAPn/BQD6/wIA/P8AAP7//f/9//3/+v////3/AQD6/wIA/P8CAPz/AAAAAP7///8BAP3///8AAP//AAAAAAEA/v8DAP3/AwAAAAAAAQD//wAAAgD+/wAAAQD+/wMA//8BAAAAAgD//wQA+/8IAPj/BQD+////AAD+/wEA/v8EAPz/BQD9/wEA//8CAP//AQAAAP//AwAAAAUA/v8GAP7/AwAEAP7/BQD8/wQA/v8DAAIA/v8FAPz/AwAAAP7/BQD5/wcA/P8GAP7/AgD+////AQD7/wAA///+////AgD6/wUA/P8CAAIA/f8EAP7/BAD9/wMA/f8BAPz/AwD6/wMA/f/9/wMA+P8GAPv////+/wAA/f8BAPn//f////z/AAD8/wEA+/8DAPv/AQD+/wAAAQD8/wMA+v8EAPz/AQD//wIA/P8EAPz/AAADAP3/AgD9/wMAAAD//wMA/f8EAAMA/v8EAP3/AgD+////AQD+/wMA+/8DAP3/AwD//wAAAgAAAAQA/v8EAAIAAAADAAEAAgABAAEAAgADAAEA//8CAAAA//8BAAAA/f8BAP7/AQACAP7/AgAAAP7/AAAAAP/////+//7//////wAAAAD///7//f8DAP3/AwD9/wIA//8CAP3/BgD7/wUA+/8HAPr/BAD+/wEAAgACAAAAAAD//wEAAAD8/wIA/P8DAPz/AQD//wAA/////wAAAAD//////f////3//v/9//3//v////z/AQD///3/BgD4/wgA+P8DAAAA/v8EAPz/BAABAAMA//8DAP//BQAAAP//AwD6/wIA+/8BAP7//v8BAP3/AgD+/wAA///9/wQA/P8BAP3//v8CAAAA//8BAAEA/v8EAP7/BAD9/wcA/v8CAAMA//8FAP3/BwD6/wcA/v8CAAEAAAABAAEAAgD8/wMA/f8BAPz/AwD4/wQA+v8CAPz/AgD+/wAAAAD//wAAAgD//wMA///+/wYA+P8CAAAA+/8EAPr/BAD9/wQA/P8CAAMA/v8DAPr/AwD//wAABgD2/woA9f8HAPv/AAD9/////P8EAPv/AwD9//////8AAAAAAQD+/wEA//8EAPz/CQD3/wsA9v8KAPj/BwD//wAAAgD8/wIAAAAAAAEA/f8DAPz/AgD+////AgD8/wMA/v////////8BAP7/BQD4/wgA+v8FAP7/BQD//wQA/v8BAAAA//8CAP//BAD+/wUA//8EAAAA/v8FAP//AgAAAAIA/v8BAAEA/P8GAPb/CAD7/wAA/////wEAAAABAP//AQAEAPv/BQD//wAAAgD+/wAAAQACAPz/BwD4/wYA+f8CAP7/AgD+/wEA/P8DAPz/AwD+/wIAAQABAP7/AAADAP7/AQD+//7/AQD9/wAA/v8AAP//AgD8/wIAAAD+/wQA/f8FAP//AAADAP//AgACAP//AgABAP3/AwD8/wEAAgD8/wEAAwD8/wUA/P8HAPn/CwD2/woA+f8HAPz/AAABAAAAAAACAP//AgD//wEAAQAAAP///v/9/wEA/v/+/wMA/f8CAAAA//8DAP7/AQACAP3/BgD4/wgA/P8DAAMA/v8DAAEA/f8FAPz/BQD6/wUA+P8HAPz/AAAFAPn/BwD9/wAABQD5/wMA/v8BAP7/BAD4/wcA+v8AAAIA/f8CAPz/AAABAAEAAAACAAEA//8DAAEAAQACAP//BAD8/wQA+/8GAPr/BQD6/wAAAQD+/wIA/f8AAP//AQAAAAAAAAAAAAEAAQACAP3/BQD9/wUA/v8CAP3/AwAAAP//AQD+/wEAAAACAP3/AgD+/wEAAgD7/wYA+f8EAPz/AQAAAP3/AgD6/wMA/f8BAPr/AwD7/wEA/P8CAPn/AgD7/wIAAAD8/wMA/f8AAAEA//8BAAAAAwD9/wcA+P8KAPr/BwD8/wMA//8BAAEAAgD8/wMA/v8BAP//BAD5/woA9v8GAPn/AwAAAAIA/P////////8CAP3/AAD//wEA/f8CAAAA/v8DAP3/BAD//wMA/f8DAAAA//8FAPv/AwD///z/BAD7/wUA+/8DAPz/AgD//wEA//8AAP////8AAAMA/f8CAAAAAAACAPz/AgAAAP7/AQD+/wAAAAAAAP7/AQABAAEAAAABAP//AwAAAAAABQD9/wUA/P8CAAIA+/8DAP3//v8DAP7///8EAPf/CgD4/wgA+v8HAP3///8GAPX/CQD4/wQA/f8CAPr/BgD5/wQA/v/8/wIA+v8EAPn/BQD7/wEAAAD9/wQA+v8FAPz/AgD8/wMA/f8DAPr/AgD9/wAAAAD9/wAA/v8DAPz/BAD8/wQAAQD+/wYA/f8DAAEAAAABAAEAAQD//wEA/v8EAAAA//8AAAEAAQACAAEA/v8DAP7/AwAAAAEAAgD//wAA//8CAP7/AAACAP7/AwD+//7/AwD9/wAAAQD+/wEA/v8CAP//BAD//wEA//8BAAIA/v8IAPj/BgD9////CAD1/woA9/8FAP///P8HAPr/BQD7/wQA/v8AAAIA/v8BAP3/AQD//wIA/f8DAP3/BAD//wMAAAABAP7/AgD+/wEAAAD//wEAAAABAAEABAD8/wMA/v8AAAAA//8AAP///f////7//v8BAP7//v8AAP7/AwD7/wMA/P8BAAEA/f8DAP3/AgD/////AQAAAP//AQD8/wIA/P8HAPj/BQD9//7/BgD9/wAAAQD//wIA//8CAAIAAQD//wIA/v8FAP3/AQAAAAEA//8AAP3/AQD+/wIA//8BAAIA/v8CAAEAAgABAAAABQD+/wUA/v8EAP7/BQD9/wQA//8BAAEAAAACAAIA/v8CAAAAAQABAAAAAAD9/wMA+/8GAPn/AwD5/wcA+v8CAP7//P8DAPz//v8CAPv/AwD9/wIA//8BAP7/AwD8/wYA+/8DAAMA+v8JAPf/BgD8/wUA/f8EAPv/BAD8/wMA/f8AAAAA//8AAAEA//8BAPv/AwD9/wAAAQD5/wYA+P8EAPn/BAD7/wMA/v8BAPz/BwD4/wcA+/8AAAIA//8DAP//AQACAAAAAQABAP//AQABAPv/BgD+//3/BQD5/wQA/v8BAPz/BQD7/wQA/f8DAAAAAgD+/wAA//8BAAMA/v8AAAIA+/8HAPj/BgD9/wEABQD8/wkA+P8IAP//AgAAAAQAAAADAAQA//8GAP7/AwACAAMA/v8FAP3/AAACAPr/BQD8/wMA/v8BAPv/AwD8/wIA///+/wEA/v/////////+/wEA/P8BAAAA/f8CAP3/AgABAPz/BAD7/wIA/v8BAP3//v////3/AAAAAPz/AQD8/wIA+/8DAPv/AwD6/wUA9f8GAPn/AwD8/wEAAAD8/wUA+/8EAP///v8DAP//BAD7/wYA+v8LAPf/CAD9/wMA/v8AAPz/BQD9/wMA//8BAAIAAgADAAEAAwABAAMAAQACAAEABAD7/wQA+/8FAP//AAABAP7/BAD8/wgA+P8FAAAA//8IAPf/BwD7/wIAAgD9////BAD7/wUA/P8BAP7/AAABAP7/AQD9/wEAAgD+/wEA//8DAAAAAgD//wIAAAABAAEAAQD+/wUA+v8HAPr/BQD7/wUA/P8DAPz/AAAAAP//AwD7/wUA+P8FAPz/AQD+/wAA/v8BAPr/AwD7/wMA+/8DAPn/AwD9////AAAAAPz/AwD7//7/AQD///3/AwD4/wUA+/8BAP7///8BAPz/AAD+/wEA/v8DAPz/BAD9/wMA//8CAAAAAQAFAPv/CAD8/wUAAQABAP//AgD9/wcA+/8FAAEA/v8IAPn/BgADAPv/CQD6/wYAAQAAAAUA//8FAAIAAwACAAUA/f8JAP//BwD//wYA/f8IAP//AQAEAP7/AwAAAAEAAgD+/wMA//8BAAEA/P8BAAAAAQAAAPz/BQD+//7/BAD4/wcA+/8BAAIA/P8EAPn/BQD4/wcA+v8DAP7///8AAP//AAD9/wEA//8BAP3/AgD8/wEA///7/wMA9v8IAPf/AwD7/wAAAAAAAP//AgD+/wIA//8BAAMA/v8CAAIAAQADAAEAAAAEAP//BQAAAAQA/v8EAAEAAAAGAPz/BgD//wIAAQAAAAYA/f8FAPz/AwD+/wEAAwD6/wcA9v8GAPv/AQD+//7/AAD9//////8AAP7/AgD+/wEAAAABAP7/AwD9/wQA//8CAAAABAD+/wQA/v8CAAAAAQADAAAABAD//wQA/v8EAP//AwADAP//AwABAAUA/f8GAP3/BAAAAAQA/P8JAPz/BQADAP7/AQAEAPr/CQD6/wUAAQAAAAAABQD7/wsA+P8JAP3/AgADAAAA//8CAP3/AwACAPr/BgD7/wIAAQD8/wYA+f8GAPz/BAD9/wIA//8DAP7/AgD/////AAAAAP//AQD//wAAAQD//wEAAAD9/wMA/f8AAP///f8AAAAAAQD+/wMA//8CAAIAAAAAAAQA/f8DAP////8DAP7/AAD+/wEA/P8EAP3///8CAP//AAACAAAA/P8FAPj/CQD6/wQA/P8BAAAA/v8CAAAAAAAEAP3/BQD+/wEAAQD//wQA/v8AAP///f8EAPz/BQD8/wcA+f8JAPn/BAD/////BAD9/wMA/v8BAAEA/P8BAP///f8EAP7///8CAPv/BgD8/wUA/f8EAAAAAgABAAEAAgABAAEA/v8FAPz/BAD//wEAAAD+/wMA/f8DAP7/AwD//wEAAAD+/wUA+v8GAP3/AAAAAAAAAgD9/wMA+v8HAPn/BgD8/wEAAAD+/wIA+/8HAPr/BAD+////AwD8/wQA/v8DAAAA/f8EAPz/AgD9/wMA/P///wMA9/8JAPX/BgD+//3/AgD6/wMA/P8BAAEA/v8AAAEAAAADAPv/BAD8/wEAAQD5/wcA9/8FAPv/AAD/////AQD+/wIA/v8CAAAA//8BAP7///8BAP7/AgD9/wIA/f8BAP7/BAABAAIAAgAAAAMAAQD+/wMA/f8HAPn/CQD4/wUA+/8CAP3/AwD8/wUA/P8DAP7/AwD+/wEAAAAAAAAAAAAAAAAAAQD9/wIA//8AAAIA//8AAAAAAQD//wIA+/8FAPv/AwAAAP3/BAD7/wAAAAD+/wEAAQD8/wQA+/8EAAAAAwD9/wYA+/8EAAAA/f8EAP3/AgABAP//AAAEAP3/BAD9/wQA/v8CAP3/BgD4/woA+P8IAPv/AAD///7///8AAPv/AgD9///////+/wEA/f8AAP3//////wAA+f8CAP7//f8EAPf/BgD9//7/AQD+////AAD5/wcA9P8IAPj/AwD8/wUA9/8IAPr/CQD9/wUA//8BAAMA//8EAP//AwD//wIA/v8EAPv/BwD7/wcA+/8DAAAAAQAAAAEAAAABAP//AgADAPv/BgD4/wUA+/8BAP3/AAD///3/AwD7/wQA/P8EAP///v8DAPn/BwD4/wgA9/8HAPj/BAD9/wEA/P8CAPv/BAD4/wUA+f8AAAIA+v8HAPr/AwACAAAABQAAAAEABAABAAEAAwD+/wUAAgD+/wMA/v8BAAEAAAAAAAIA/f8HAPr/CAD5/wYA+/8GAPz/AgAAAP3/BQD8////BgD5/wUA/P8AAP7/AAD//wIA///+/wEA+/8FAP3//f8EAPn/AwAAAP7//v8AAPz/AgD8/wIA/P8AAP7//f/9////AAD+/wEAAAD//wAAAAACAAEAAwD//wUA/v8DAAIA/v8HAPv/BgAAAAEAAwACAP//BgD+/wQA/v8EAP3/BgD9/wcA/f8DAP//AAACAP3/BAD6/wQA/f8AAAEA/v8BAP//AAD+/wEA//8BAP7//v8AAAIA/P8EAPn/BQD6/wQA+v8BAP//+v8EAPv/BAD7/wEAAAD9/wcA9P8JAPb/BgD8/wEAAAD//wAAAAAAAAAAAQD+/wIA//8AAAQA/P8FAPz/BAACAAAAAwADAAEABAD+/wUA/v8HAPz/BwADAP7/BwD9/wIAAgAAAP//AwABAPv/CAD7/wQAAQADAPz/CgD5/wgA/P8EAP3/BAD9/wIAAAD//wEA/v8CAP7/AQD9/wIA/v//////+v8HAPf/CAD6/wEA/f8BAPz/AgD+////AQD///3/AgD5/wMA/P/9/wMA+/8CAP3/AgD//wEA/f8DAP7/BwD4/wYA/P8EAP3/AwD7/wAAAQAAAP7/AAD+/wEAAgD8/wIAAQD+/wIA/v8CAAAAAQAAAP3/AQD7/wQA+/8GAPj/BAD+//3/BwD4/wcA/P8BAAIAAQD9/wQA/f8CAAUA+v8JAP3/BgABAAAAAwD+/wUA/f8HAPz/BQD+/wMAAAACAP//AgD9/wMA/P8CAP3/AgD8/wEA/v8BAP////8AAP//AgD4/wYA+f8HAPv///8AAAAA//8EAPr/AgD///7/AAD+/wAA/v////3/AQD6/wUA+//+/wEA+v8DAPv////9/wAA/v8EAPj/BgD3/wIAAAD6/wUA9/8CAP7//v///////v8AAP//AAD9/wQA+v8EAPz/AQACAP//AgAAAAIA/v8EAP//AQABAAAAAgD+/wUA+v8IAPv/BQD6/wMA//8CAAAAAAAAAP7/AwD9/wIAAQD//wIA/v8CAP7/AQD9////AQD//wIAAAABAAAAAgACAAAAAQAFAPn/DAD1/wgAAAD+/wUA/v///wMA/P8FAPv/BAD4/wYA+/8EAP///P8DAPz///8DAPn/BAD9/wAAAgD8/wIA/f/+/wMA+v8BAAAA/f8DAPz/AgD9/wMA/P8CAP7///8CAPz/AgD+/wMA+v8DAP3/AgD+/wAA+f8DAPv/AAD9//7//f///////f8AAPv/AQD//wAAAQAAAAAAAAABAP7/AgAAAP3/AwD6/wUA/f8BAAEA/f8DAP7/AgD8/wIA+/8GAPz/AAD//wEA/f8IAPf/CAD7/wIA//8AAP7/AwD9/wEAAAAAAP7/AwD9/wQAAQD9/wUA/f8GAPz/BgD9/wMAAAACAAMA//8DAAEAAgD+/wAAAQD8/wUA+v8FAP3///8BAP7/BAD//wEAAAAAAP//BAD8/wQA/v8BAAAAAQAAAAEAAgD8/wQA/f8AAAEA/f8EAPz/AQD//wEAAQD9/wEA//8AAAEA/f8BAPv/BAD6/wAA+/8AAP3/AQD///z/AwD7/wUA/P8CAAEA//8CAP3/AgD9/wQA/f8AAAAAAAD8/wIA+/8CAPv/BQD3/wkA9P8KAPT/CQD4/wUA/v8AAP///v///wMAAQD8/wUA9/8HAPj/BQD///z/AwD7/wMAAQD+/wQA+/8IAPz/BgD9/wMAAQAAAAIAAQAAAAIA/v8CAAAA/v8AAAEA/f8DAP7/AQD//wMA/v8EAP//AgAAAAMA//8BAP7///8CAP7///8BAPr/BwD5/wIAAAD8/wQA+/8FAP7/AwACAAIAAAACAP//AwABAAQA//8FAP3/BQD8/wcA/f8AAAMA+/8JAPn/BwD6/wYA/f8BAAMA/v8EAPz/AwD8/wIA/v/9/wAA+f8AAP7//f8BAPv/AwD6/wIA/f8CAP7/AwD9/wMAAAAAAAEAAAABAP//AgD9/wQA///+/wIA///+/wMA9v8GAPj/AwD7/wEA/f8CAP3/AgD+/wQA/P8FAP//AQAEAPv/BQAAAP3/BgD6/wIAAgAAAAAABAD8/wQAAAABAAIAAwAAAAYA/v8HAAIA//8EAPz/BgD+/wMA/P////////8DAPz/AgD8/wIAAQAAAAAAAgACAAAAAwD//wQAAAADAP7/BgD8/wcA/f8BAAAAAQD//wQA+v8GAPz/AwAFAPz/AwACAAAA/v8DAP3/BAD/////AQD//wUA+v8EAPz/AQD///3/AwD+/wMA/v8DAAAAAQACAP7/BQACAP//BwD6/wgA/P8DAAAAAAABAAIAAgACAAEA//8EAAAAAwD//wIA//8DAP//AgD8/wEA//8BAP3/AgD9/wEAAAD+/wQA/f///wUA+v8IAPz/AgD+/wIA/f8EAP//AQAAAAIA/v8CAP7/AwD8/wQA/P8AAP///v8CAP3///8BAP7/AwD8/wIA/f8CAPz/BAD9/wcA+f8FAP//AAAEAP3/AwACAP3/BQD9/wIAAAAAAAMAAQD//wQA/P8HAPz/CQD8/wcA//8CAAEAAAABAAMA/P8HAPb/CwD5/wMAAAD+/wQA/f8CAP3/BgD9/wEAAgD+/wYA+/8GAAAA/v8GAPr/AwAAAP7/BAD7/wEA/P8DAPr/BQD8/wEA//8CAP3/BAD//wEA//8EAPj/CwD5/wQA//8AAAIAAAAAAAIA//8AAAQA/P8IAPn/CAD+/wEABQD9/wgA//8CAAYA/P8KAPr/CAD+/wEABAABAAAABQD8/wYA+v8FAPr/AwAAAP//AgD9//7/AwD7/wUA+f8FAPr/BQD7/wUA+v8EAPz/AAD+////AAAAAP7/AQD+/wEA/f8CAP7/AAADAAAA/v8FAP7/AwD9/wQA+f8GAPf/BQD7/wIA/v/8/wAA//8BAAEA/f8AAAIA//8CAP//AAAFAP3/BQD//wAABgD3/wsA+/8EAAIAAAABAAEA//8DAAEAAgD+/wYA/f8HAP3/BQD//wAAAwD9/wUA/v8AAAAAAQD///7//////////v/8/wIA+/8CAP////8BAP7/BAD9/wEA/v8BAP3/AwD7/wQA//8AAAIA//8BAP7///8EAPr/CAD4/wQAAQD9/wUA+v8FAP7/AgD//wAAAAD9/wIA/v/+/wAA/f8BAAAA/f8BAAAA/v8DAPz/AgAAAAEA/v8AAAIA+v8IAPX/BgD5/wUA+/8CAPr/BAD9////AQD7/wUA+v8DAPz/AQD8/wEA/v///////P8BAPz/AQD+////AAD5/wUA+P8BAAAA9/8FAPn/AQD//wIA/v8DAPz/AwD7/wIAAQD//wIA/v///wIA/v///wEAAAAAAAEA/v8BAAEAAAABAAIA//8FAP//AgADAP////8CAPz/AgABAPn/BgD5/wUA+v8DAAAA//8AAP7/AwD9/wUA+/8CAP//AAABAPz/AwD5/wMA+/8AAPz/AAD6/////f/+/wAA+//+//7//f/+//7//f8BAP7////+/wEA/f8AAPz/AgD///7/AgD5/wIA//8BAP7/AQD9/wIAAQD8/wMA/////wAA/v8AAAIA+v8FAPj/AgD+//v////+/wAA/f8CAPv/AgD//wIA/v8BAAAAAQAAAAAAAAADAP7/AwD//wIAAAAAAAAA/f8EAPv/AgD6/wAA+/8BAPr/AQAAAPv/BQD4/wQA/v8BAAIA//8CAPz/AgD+/wAAAQD9//7/AgD8/wYA/f8AAAEAAAAAAP//AgD+/wMA/////wEA///+/wYA/f8BAAAAAAD//wMA/P8CAP7/AAD//wIA///+/wQA+/8GAP3/AwD+/wAAAAD//wIA/P8DAPr/AwD7/wMA/P8CAPv/AAD//wAA/v8BAPz/AQAAAP7/AAADAPz/AAAAAP//AwD9/wAAAgD//wEA/v/+/wIA/f8AAP7/AQD9/wMA/P8CAP7/AwD//wMAAAADAAAAAAABAP7/AgABAP7/AwD+/wIA//8BAAMA/v8FAPz/BQAAAAAAAQD//wIA/v8EAP3/BgD9/wAAAwD+/wAAAQD7/wQA///9/wIA///+/wMA/P8FAPz/AwD9/wUA+v8GAPr/BwD6/wUA/f/8/wUA/f8DAP//AAAAAAEA/f8BAAIA/v8EAPz/BQD8/wMA//8BAAIAAQABAAEAAQABAAMA/f8EAP7/AQAFAPz/BgD///7/BwD7/wYAAQD+/wcA+/8FAP//AwAAAAAA//8CAP7/BAD9/wIA/v8EAPz/BAD9/wAABAD8/wMA//8AAAMA/f8DAP7/BAD+/wQA/f/+/wMA+/8FAPv/AgD8/wAA///9/wAAAAD8/wQA+v8DAP3/AgD+/wYA+f8FAPz/AwD9/wMA/////wUA/P8CAAIA/P8GAPv/BQD9/wIA/v8AAAAA//8CAP//AAABAAAABAABAP7/BgD8/wYA/f8DAAIAAQADAP7/BAAAAAEAAQABAAAAAAADAAEAAwAAAAMA//8DAPz/BQD+/wEAAQD/////BQD8/wMA/////wIAAQADAP//AwD+/wMA//8EAPv/CQD1/woA+v8CAAIA/f///wEA+v8HAPb/BAD6/wUA/P8EAPz/BAD+/wYA/P8IAPv/BwD6/wgA+/8GAPz/AgD//wEAAQD///7/BAD8/wIAAwD+/wIA//8CAP7/BQD6/wYA/f8FAP3/AwD+/wEA/////wEA/v8BAP///f8BAP///f8CAP3/AwD9/wMA/f8CAAAA/f8CAP3/AgD+/wAAAAABAAAA//8CAP//AgD/////AAACAP7/BAD8/wQA/v8DAAIAAQABAAAAAgD+/wQA/P8AAAMA/f8BAP7/AAAAAAAAAAD//wYA+/8FAP3/AQAEAAAAAAAHAPz/AwD+/wMA//8EAPz/BQD9/wQA/f8AAAEA/f8EAPz/AwD+/wIAAgD+/wQA/v8CAP//AgD8/wQA/f8EAP3/AAD/////AgD9/wMA+/8HAPr/BgD7/wUA/f8DAP7/AAD+/wQA/f8DAP7/AAABAP////8AAAAA//8DAPz/AwD9/wMAAAAAAAEAAwD9/wMA//8BAAMA/v8DAAAAAAACAP////8AAP3/BAD7/wQA/v8AAAEAAQABAAMA/v8AAAQA/f8GAP//BAD+/wMA+/8GAPv/AwD8/wEA//8BAP3/BgD9/wEAAgD9/wMA//8CAP//AQACAP3/BwD3/wYA+/8CAP7/AAD+/wEA/f8CAP7/BAD7/wYA+v8GAP7/AQABAPz/AAABAP//AQABAP7/BAD+/wEAAgD+/wQA//8EAPv/BwD4/wcA+/8FAP7/AgD//wQA//8AAAEA//8CAAIA//8AAPz/BQD5/wkA9/8HAP3/AgACAP7/AwD+/wMA/v8AAAQA+/8GAP3/AAABAP7//f8DAPz//v8CAPv/AgD9/wAA/f8BAAAA+/8EAPz/AgD6/wUA+f8GAPn/BwD+////AwAAAP//BQD9/wQAAAAAAAMAAQAAAAIAAQAAAAQA/v8AAAQA/v8CAAAAAAACAP///v8CAP//AAD///z/BAD9/wUA/P8DAP7/AwABAAAAAAACAP///v8BAP3/AQD+///////8/wAAAAD+/////////wMA/f8CAP////8BAP7/AgD9/wMA/v8BAAEA/P8EAPv/AwD+/wIA/f8AAP////8CAPv/BwD3/wYA/P8BAAQA//8AAAMAAAAEAAAAAQABAAEAAQD+/wQA/f8BAAAAAAD//wQA+/8FAP3/AwD//wMA/f8CAP////8EAPz/AgD+/wIA/v8BAP//AAD+/wQA/v8BAAIA+/8EAP3/AgACAPz/BAD7/wcA+/8GAPz/BAD+/wAAAQD9/wMA/P8BAP7////9/wMA/P8HAPr/AwABAPr/CQD2/wYA/P/7/wYA+P8GAPr/AQD+/wMA+f8GAPv/AwD9/wIA/v8GAPv/BAD//wIAAgD+////BQD9/wUA/v8DAAEA//8EAPr/BgD+/wEABQD7/wUAAAD//wIA//8AAAIA/v8AAAAA//8BAP3/AQD9/wAA/v/+/wAAAQD9/wAAAAD+/wIA///+/wUA+f8HAPn/BgD4/wUA+/8CAP7//f8DAP3/AwD9/wEAAgD//wEA/v8BAAIA/v8BAP////8BAAEA/f8EAP3/AwD7/wcA9/8LAPX/CAD5/wMA/f8BAAEA/v/+/wUA/P8DAP///v8DAP3/AgD+/wMAAgD7/wkA9/8HAP3/AAABAP7/AwABAP3/AQAAAP3/AwD7/wAAAAD8/wEA///+/wAAAAD/////AQD9/wIAAAD8/wQA+f8IAPj/BgD/////BgD7/wQAAAACAP7/BgD5/wkA+f8FAP3/AwD9/wQA/f8CAAEA+/8GAPj/BgD+//7/AQD//wUA+f8HAPr/BAAAAPz/BQD7/wQA+/8CAP3/AAD+/wQA+/8DAPv/AQD8//7/AgD5/wQA+f8DAPv/AgD+//7/AgD8/wMA//8AAAEA//8BAAAAAgADAP7/AgD7/wMA/P8CAPv/AgAAAAIA/v8BAP7/AgD+//////8AAPz/AwD6/wQA/P8EAPv/BgD7/wQA/v8DAAEAAAAEAAAAAgACAPz/BQD+/wAABQD8/wkA/v8AAAQAAAADAP//AAD//wUA/P8FAPv/BAD8/wAA+/8CAP7///8AAP7//v8AAP///v/+/wEA+/8BAP7//P8BAPv/BQD7/wQA+v8CAP///v8DAP3/AwD8/wIA/v8EAP3/AAD//wAA////////AAD+/wAA///+//7/AAD9/wEA/v/+/wMA+/8BAP7/AAAAAP7/AwD8/wUA+v8EAP3/AAACAP//BAD6/wQA//8BAAQA/f8EAP//AgABAAAAAgADAP7/CQD8/wcAAAD//wQA//8EAPv/BQD8/wQA/v8DAP7/AgABAPz/BQD+/wEA/P8DAPv///8BAPj/BwD5/wMA/P8AAAAA//8BAP7/BAD8/wQA+/8CAP7////+/wIA+/8EAPj/AwD7/wEA/P/+//3//f////3//P////v//f8AAPz///////7//P8EAPz/AwD/////AQAAAP//BQD+/wUAAAACAAEAAgABAAMA//8BAAEAAQAAAAAABgD9/wYA+/8HAP7/AgD///////8BAP7/AgAAAP//AwABAP7/BwD6/wYA/v8AAAAA/f8BAP//AQD9/wIA/v8DAP7/AQD+//////8CAP3///8CAPv/BgD9/wIAAAABAPz/BQD7/wQA/v8CAAIA/v8EAP//AQACAAEABAD7/wYA+/8HAPn/BQD8////AwD6/wUAAAD//wAAAQABAP7/AwD9/wEAAAACAPr/BgD8//3/AwD6/wIA///9/wEA/v/8/wMA+v8FAPn/AwD9/wIA/v8BAAAA//8AAP3/AwD5/wgA9P8KAPj/AgD///7//////////////wIAAAABAAEAAgAAAAAABQD+/wMAAgACAAEAAQADAP7/BAD9/wQAAgD+/wMA/v8EAAIAAAADAP//AwABAP7/BAD7/wQA+/8FAP7/AgD+//////8AAP7///8AAAEA/P8BAPv/AwD6/wMA/f8CAP//AQAAAAEA//8BAAIA/P8FAPz/AQACAPr/BwD6/wkA+/8FAAMA/f8IAPv/BQD+/wIAAQACAP//BQD8/wsA+P8MAPr/AwADAPn/CQD5/wcA+v8GAPz/AgACAP7/BAD+/wMA/////wAAAAAAAAEA/v8CAP7/AgD//wEA//8AAP//AAD+/wIA/f8BAAEA/f8AAAEAAAACAP7/AAAAAAMA/P8EAPz/BgD6/wYA/P8GAPz/\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 121_003_0527_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 121_003_0791\n", + "Original Audio: 121_003_0791.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 121_003_0791_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 121_003_2994\n", + "Original Audio: 121_003_2994.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 121_003_2994_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 126\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 126_003_1107\n", + "Original Audio: 126_003_1107.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 126_003_1107_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 126_003_2205\n", + "Original Audio: 126_003_2205.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,UklGRoxgBgBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAATElTVDQAAABJTkZPSUNNVBEAAABQcm9jZXNzZWQgYnkgU29YAABJU0ZUDgAAAExhdmY2MC4xNi4xMDAAZGF0YSxgBgAAAAAAAAD//////////wEAAAABAAEAAAAAAAEAAQAAAAEAAAAAAAAAAAAAAAEAAgAAAAEAAAABAAAAAAAAAAAAAQACAAEAAgABAAEAAQACAAIAAgADAAMAAQADAAMAAgABAAEAAQABAAAAAQACAAEAAAAAAAIAAQAAAAAAAAD/////AAAAAP////8AAAAA/////wAAAAAAAP//AAAAAP//AAAAAAAAAAAAAAAAAQAAAAAAAQAAAP//AAAAAP//AAABAP//AAABAP////8AAP/////+//////8AAP////////////8AAAEAAAAAAAAAAQABAAAAAQAAAAAAAAD//wAAAQD//wAAAAAAAAEAAAABAP/////+/////////////v///////v/+/////v////3//////////v////7///8AAP7/AAABAP//AQAAAAEAAAAAAAEAAQABAAIAAgACAAIAAQADAAEAAQACAAMAAgACAAIAAgABAAMAAgACAAEAAQAAAAAAAAAAAAEAAAABAAEAAQAAAAEAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAP///////////v/+//7///////7//v/+/////v/9///////9//////////////8AAP//////////AAD/////AAABAAAAAQAAAAAAAAAAAAEAAAABAAEAAQABAAIAAQAAAAAAAAABAAAAAAABAAAAAQAAAAAAAQAAAAAAAAABAAAAAQAAAAEAAAAAAAAAAQAAAAAA//8AAP7/AAAAAP/////+//7///////////8AAP///v8AAAAAAAD/////////////AAD/////AAD/////AAAAAP//AAABAAEA//8CAAAAAgACAAEAAQABAAEAAAABAAEAAQABAAEAAAAAAAEAAQAAAAEAAQABAAEAAAAAAAAAAAABAP//AQAAAAAAAQAAAAEAAAAAAAAAAQAAAAAAAAD//wAAAQD/////AQACAAAA//8AAP////8AAP//AQAAAAAAAAAAAAAAAQAAAP//AAABAP//AAAAAP//AQAAAAEAAAAAAAAAAAAAAAAAAAAAAAIAAQAAAAIAAAABAAAAAQAAAAAA//8AAAAAAQAAAAEAAQABAAAA//8BAP//AAAAAAEAAAAAAAAAAQAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAQAAAAEA//8AAAAAAQABAAEAAQACAAEAAQABAAIAAgABAAIAAQABAAIAAgACAAEAAAABAAEAAAABAAEAAAABAAEAAQAAAAAAAAABAAAAAAABAAEAAgD//////////wAAAQAAAAAAAQAAAAAAAAABAAAAAAABAAAAAQAAAAEAAAAAAAAAAAAAAAAAAQABAAEAAAABAAAAAAABAAAAAQAAAAEAAAABAAIAAQAAAAEAAQAAAP//AAAAAP//AAD//////////wAAAAABAAAAAAAAAP//AAABAAEAAAAAAAAAAAD//wAAAQD//wAA//8BAP//AAD//wAAAAD///////////7/////////AAD////////+////AAAAAAAAAAAAAAAAAQAAAAEAAQABAAEAAQABAAAAAgAAAAEAAgABAAIAAAAAAAAAAAAAAAAAAAAAAAAA//8AAAEAAQABAP//AAD//wAAAAABAAIAAQAAAAAAAAABAAAAAQAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAQAAAP////8AAP//AAD/////AAAAAAAAAAAAAAAA//8AAAAAAQAAAAAAAAABAAAA//8AAAAAAAAAAAAA/////wAAAAD/////AAD//////////////////wAA////////AAAAAAAAAQABAAAAAQAAAAAAAAD//wAAAAAAAAEAAAD//wAAAAD//wAA//8AAP//AAABAP//AAAAAAAAAAABAAEAAAAAAAEAAQAAAAEAAQACAAEAAgAAAAEAAAABAAEAAQAAAAAAAAABAAAAAAAAAAAAAAAAAAAA///+/////////////////wAAAgACAAEAAQABAAIAAQABAAEAAAAAAAAAAAAAAP////////7//////wAA//////3//P/+//////////7//P/8//j/8v/x//H/7f/y//3/AAAIAAkACgAQABAADwARABUAEgANABMAFAAOAA8ABAABAAUAAgAEAAkADgAHAAcADgAIAAMA/f/u//f//v/1//P/9P/7//n/9f/8/wwACQD6//z/AAABAOz/2v/f/9//6v/w//T//f8MAAYA//8WABEA9//m/9P/3P/m/9X/3//1//n/7v/4/xUAEQAcACkABADu/+b/zv/T/+P/6P8FAB0AKwA+AEIAFQDz//v/8P/1//3/BwAZAAkAEQAvADYALQAiAB8AKgAfAPr/7P8CAC8ANgArACkAIgApAAwA+v8BAPP/4f/F/6n/mv+u/57/pv/y/w8AHwAsAC8AEwDz/+3/y//J/8X/wv/l/xcAQABHAFsAZgBUAFYASgAKAOD/6f/u//T/CgAyAEEATgBlAFMAVgBIACUAAAD4/x4ACQD6//v/+f8BAPD/8P/x/w8AIwAIAAQA4//b/8j/tv/C/7//zf++/87/zP/G/7v/oP+v/7D/tv+5/83/2f/L/87/xv/D/77/yf/d/+3/6//C/7b/xv/r/wAA9P/m/+n/BgAHAAgAAwDz//v//P/7/wcAEwAVABgAIgAaABcAJwArACoALAAuADAAPgBEADYALAAqADIAMAApACsARQBbAEsAOwA6AD0APABBAEUAOwA0ACYAFwAfABsACwAeACgAHAAZABQACAACAAwABwD7//v/+//t/8//3f/x/+//9//0//H/6f/b/9L/0P/U/87/0f/T/8n/zv/Q/9j/4//n/+r/5v/n/+r/5v/p/+P/4f/d/9T/4P/x//z/AAADAAoACgAHAAEA+//5//7/AQACAAIABAALAAkA///7//z/AgAGAAcAAQD8//n/+f/9//3////+//r/9f/z//z/AgAHAAcABAAFAAYAAwD+//7/AQAEAAYACQAKAAsABwAHAAwACQAKAAwACwAJAAgADAARABUAEQAPABIAEwATABIAEwAQAAwACAAGAAIAAAAAAAUACgAKAAkACAAJAAgABwAFAAMACAAHAAcAAwACAAEAAgAFAAQAAgABAAMAAwD///v//f/8//n/+v8BAAEA/P/+/wEABAAAAPv/+v/6//r/+f/4//X/8P/v/+//8P/z//P/9f/1//L/7//u/+7/7v/t/+3/7v/z//H/8P/w//H/9v/5//j/9//4//b/9f/2//f/+v/+/wAA//8AAAAAAwAHAAcAAwACAAQABQAGAAYABAALABEADwAOABAAEgAQAAwACAAGAAgACgAKAAsADAAMAA0ACwAJAAYABgAFAAUABAADAAUABQAAAP7/AAAAAAAA/v///wEABQAEAAIAAQD+////AQD///z//v///wEAAAAAAP////////3//v////3//P///////v/+//3//P/+//z//P/9//z//P/8//v/+//9//z//P/9//7//P/9//3//f/9//z/+//9//7//P/+//7//////wAA///+//3//f/+///////+//7///8AAP//AAD/////////////AQAAAAAAAAD//////v/+//7//////wEAAQAAAAEAAAD///7//v/+//3//f/+//7///8AAAEAAQABAAEAAgACAAIAAQAAAAEAAQACAAEAAAADAAMAAwAFAAMAAwACAAIAAgABAAAAAQACAAIAAgABAP//AQAAAAEAAAAAAAEAAAABAAIAAgADAAIAAQAAAAEAAQABAAEAAQACAAEAAQABAAAAAAAAAAIAAAAAAP//AAAAAAEAAAAAAAEAAAD/////AAAAAAAAAAABAAAA/v/9//3//f/8//7//v/9//3//v///////v/9/////////wAAAAAAAP///////wAAAgACAAIAAQABAAEAAQABAP//AAAAAP//AQACAAQABQAGAAQABAADAAMAAwADAAUABQADAAQABAAEAAQAAwAEAAMAAwADAAMAAwABAAIAAQACAAEAAAABAAEAAgAAAAEAAgABAAIAAgABAAIAAQACAAEAAAABAAEAAAAAAP//AAD//wAAAQAAAAEAAQACAAEAAQABAAEAAgABAAEAAgACAAIAAgACAAMAAgACAAIAAgADAAIAAQAAAAAA//8AAAAAAQABAAIAAQABAAAAAQACAAIAAwACAAMAAwACAAEAAgABAAEAAQACAAIAAQACAAMAAgACAAEAAAD////////+/////////////////////v/+///////+//3//v/9//7//f/9//3//v///wAA//8AAAEA//////7/AAD//wAAAAD+////AQAAAAAAAQABAAAAAQACAAIAAgABAAIAAQABAAEAAQAAAAAAAAD//wEAAQABAAIAAgABAAAAAgABAAAAAQABAAAAAAAAAAIAAgABAAEAAgABAAEAAAAAAAAAAAD//wAA/////wAA///////////+//7//v/9//7//v/+//7//v/9//7//v/+//7//f/9//7//v/+//7//f/+//3//v/9//3//f/9//3//v/////////9//7//v/+//7////+/////f/+//7///////7///8AAAAA//8AAP7//v////////////7////+//7///8BAAAAAAAAAAAA/////////v8AAP////////7//v////////8AAAAA/f////7//v/+//7////+//7//f/9//z//v/+//3//f/9//3//P/+//3//f/+//7//v/+/////v////7//v/+/////////wAAAAAAAAEAAQAAAAAA//8AAAAA//8AAAEAAgACAAIAAgABAAIAAQAAAAAAAAAAAAAAAQABAAEAAQAAAAAA//8AAAAAAAAAAAAA///////////+/////////////v///wAA/f/+//3//P/+//3//f/9//3//f/8//r//P/9//v/+//8//z/+//7//3//f/+//7////+//7///8AAP////8AAAEAAAAAAP//AAAAAAEAAQADAAMAAgACAAIAAwACAAIAAgACAAMAAwADAAUABAADAAQAAwADAAMAAwADAAIAAgABAAEAAgADAAEAAAAAAP////8BAAEAAQACAAEAAAAAAP/////+/wAA//8AAAAA/v///wAA///////////+/wAA///9//7///////7/////////AAAAAAEAAAAAAAEAAQABAAEAAgABAAEAAQABAAAAAgACAAEAAQABAAIAAQABAAEAAQACAAEAAgABAAIAAgABAAEAAQACAAEAAQABAAIAAgADAAIAAgACAAIAAwACAAIAAgABAAAAAQAAAAAAAAD//wAA//8AAAAAAQABAAAAAgABAAAAAQAAAP////8AAAEAAAAAAAEAAQAAAAAAAAABAAEAAQAAAAEAAQABAAIAAgABAAMAAgABAAEAAgACAAEAAQACAAMAAgACAAIAAgACAAEAAAABAAAAAQAAAP////8AAAAA/////wAA//8AAAEAAQAAAAEAAAABAAAAAQABAAAAAQACAAIAAQABAAEAAwABAAEAAQABAAAAAAABAAEAAQACAAAAAQAAAAAAAQABAAEAAgACAAEAAwACAAIAAwADAAMAAgACAAIAAQABAAIAAQACAAEAAwACAAAAAQACAAIAAwABAAIAAgABAAEAAgADAAIAAgADAAMAAgADAAMAAgACAAMAAgADAAIAAAACAAEAAAAAAAAAAAAAAP//AAAAAAAAAQABAAAA//8AAAAAAAD//wAAAAAAAP////8AAP////8AAP///v////7/AAD//wAAAAAAAP//AAAAAP//AQABAAAAAAD//wAA///+//////8AAAAAAAAAAAAA//8BAAEAAAAAAAAAAAAAAAEAAQABAAEAAQABAAEAAAAAAAEAAAD//wAAAAABAAIAAgACAAEAAQABAAAAAQAAAAAAAQABAAEAAQABAAIAAQABAAEAAQD///////8AAAAAAAAAAAAAAAD+/////v///wAAAQAAAP//AQD///7//v///wAAAAAAAP//AAD///7//v/+////AAD//wAAAAABAAAA/v///wAAAAAAAP7//v8AAP3//v///wAAAgACAAAA/f/8//r/+////wEA//8DAAMAAAD+//z//P/9/wEAAQD8//7///////7/+//+/wEABQAEAAAAAAD+//7//P/8//3/AQADAAIAAAABAP7//f/9////AAD//wEAAQAAAP7/AAACAAMAAQAAAP7//v/+//3//f8AAAEAAgABAP/////9//7//v/8//z//v/+//7/AAABAAEAAAD//wAAAAD//wAAAgABAAAAAAABAAAAAgACAAEAAAD9//7//v///////v8AAAAA/v/9//7//f/8//7//v/8//z//f/+//3//f/8//3//P/+/wAAAQAAAP///v/+/////v///wAAAQABAAIAAQAAAAAAAQAAAP////8AAAAAAAABAAEAAQAAAP//AAAAAAAAAAABAAAAAAABAAAA/////////////wAA//////7//v/+/////////wEAAgAAAAAA//8AAAAAAQABAAIAAQACAAEA//////7//v8AAAAAAAD+/wAAAAD///7//////wAAAQD//wAAAAD///7/AAD///7///8AAAAAAAAAAP////8BAAEAAAAAAP////8AAAAAAQACAAMAAgACAAIAAQAAAAAAAQACAAIAAgABAAIAAQAAAAAAAQACAAEAAAAAAAAA/////wAA/////wAA//8AAP7/////////AAABAAEAAAAAAAEAAAABAAEAAQAAAAEAAAAAAAIAAQABAP//AAAAAP//AAAAAAAAAAAAAAAA//8AAAEAAAABAP//AAABAP//AQABAAEAAQABAAIAAgADAAQAAgABAAEAAAD//wEAAgABAAQAAwADAAMAAwADAAEAAQACAAEAAQD/////AAACAAAAAgACAAIABAABAP/////+/////P/+/wAAAAAAAP//AQABAAAAAAD//wAA//8AAAAA///+/wAAAQD/////AAAAAP//AAACAAIA///+//3//v8BAP//AAD+/wAAAQACAAAAAAABAAAA//8AAAMAAwABAP///////wIAAwAFAAUAAAAAAAIAAgABAAEAAQACAAEABgAGAAIAAAADAAMAAQAAAAEAAwD///z///////7////+//z/+//8////AQACAAAA///+////AQD+//v///8AAAAABAAGAAUABQAEAAAAAQAAAP7/AgAFAAEAAgAHAAcACQAKAAgACAAGAAQABgAHAAMAAQAEAAQAAwAGAAYABQAGAAYAAQAAAAAA/f/2//f/+//+//7////+//3/AgAFAAMA+v/3//n//f/+/wEAAQAAAAkACgAEAAMABAD6//T/9f/9/wEA/P/1//P/8v/3/wAA///8//f/8P/4/wQACAALAP3/8//9/wMACgAOABEAEgAFAPr/BAALAAsABAD3//H/+/8BAAIA/v///wYAAwD8/wAAAwD//wUACQACAPn//P8HAAsABgAEAAEA/f8BAAQAAwADAAQABAABAP3/AQD9//f//P/+/wAA//8CAAQABAD+/wMABQD9//7/BAD+//3/BAAAAPz/AQACAP7/AQACAAIAAwACAP///f///wEA/f///wAAAQAEAAUAAwAAAPz//P///wQABgAAAP3//v8AAAAAAQACAAcABAD9/wIABAD+//3/AQAAAAAAAAAAAAMABQD///7/AAD+////AgABAP//+//8/wEAAAD+/wMABQACAAAA/v/8//z//v8AAP7//f////3/+//+/wIA///9//7//f/7//3//v/+//7//v/9//7/AQADAP//+v/7//z//P8BAAMA///+/wAAAQD///7//v/+////AAABAAEAAQABAAEA/////wEAAAD//wIAAwABAAEAAwACAAIAAgABAAEAAQABAAEAAAACAAIAAgACAAIAAgAAAAAA//8AAAEAAQD//wAA/////wAA/v/+//7/AAAAAP///v8AAAEAAAD+//7//P/+/////f/9//3//f/8//v/+f/7//v/+f/7//v//P/5//f/+v/6//r/+//4//n/+f/3//r////9//j/9//5//z/+//5//r//v/9//j/+//9//v/+//9//7//f/9//z//v8AAP7///8AAAMAAQD/////AAAAAP//AAD/////AAAAAAAAAAD+//7//v/9//7////+//7/AAABAAEAAwABAP///v/9//3//f/7//3/AQADAAUAAQD9//7//f/9//3///8AAP7//v///wAAAQAAAP7//P/9//7/AAAAAP3//v/+////AAAAAAEAAAAAAP7///8AAAAA/////wAA/v/9/wAAAAABAAEA//8BAAIAAgABAP///////wAAAgABAAIAAgAAAAEAAwABAAIA/////wAAAAABAP///////////v8AAP////8BAP7////+//z//f/+//7/////////AAD//////v8AAP///v/+//////8AAAIAAgABAAAAAAAAAAAA///+/////v/+/wAAAQAAAAAA///+/wAAAQAAAAAAAQACAAEAAAABAAEAAAAAAAAAAQACAAMAAgAAAAAAAAABAP////8BAAEAAAAAAAAAAAD/////AAAAAAAA//8AAAAAAQAAAAEAAQABAAEAAAAAAAEAAAD//wAAAQABAP//AAABAAAAAAABAAIAAgAAAAEAAQABAAEAAAAAAP////8AAAIAAQACAAIAAQACAAEAAQABAAIAAAAAAAAAAAABAAEAAQACAAEAAQACAAEAAgABAAEAAgACAAIAAgADAAIAAgACAAIAAgABAAMAAwACAAEAAAABAAAAAgABAAEAAwABAAEAAAABAAAAAAAAAAEAAQD+////AAAAAAAAAQAAAAAAAQABAAEAAQABAAAAAQABAAEAAgAAAAEAAgABAAAAAQAAAAAAAAD//wAAAAAAAP//AAD//wAA//8BAAIAAAABAAAAAAABAAAAAAAAAAAA//8AAAAAAQABAAEAAQAAAAIAAQABAAEAAQAAAAAAAQABAAAAAQACAAIAAgACAAIAAQAAAAIAAgACAAIAAQACAAEAAQAAAAAAAAAAAAAAAQACAAAA//8BAAEAAQABAAEAAAABAAAAAQAAAAAAAgACAAEAAgAAAAEAAgAAAAEAAQADAAIAAwACAAIAAQABAAEAAQAAAAAAAAABAAAAAgADAAEAAQABAAEAAgABAAAAAQACAAAAAQACAAIAAQACAAEAAQAAAAAAAAAAAP//AAAAAAAAAAAAAAEA//8AAAAAAAAAAP//AAAAAP////8AAAEAAAD//wAAAQAAAAAAAAD/////AAD/////AAD+//7////+/////v///wAA/////wAA/////wAAAAACAP//AQAAAAAAAAD/////AQAAAAAAAQABAAEAAAAAAAIAAQABAAEA//8AAAEAAQABAAAAAQABAAEAAQABAAIAAgABAAEAAAABAAAAAgACAAAAAQACAAEAAAABAAIAAQAAAAAAAQAAAAAAAAABAAEAAQAAAAEAAgABAP////8AAAAA//8AAP//AAD///////8AAAEAAAAAAP/////+/wAA//8AAAEAAAAAAP////8AAAAAAAD//////////////////wAA////////AAAAAAAA/////wAAAAD//wAAAAD//wAAAAABAAAAAAAAAAAA//////////8AAAAAAAD//wAAAAD/////AAD/////AAD/////AAD+//7/AAAAAP///v/+/////////////////wAAAAD//wAAAAD//wAAAQD//wAA/////wEA//8AAAEA//8AAAEA//8AAAEAAQABAAAAAQABAAAAAQD//wEA//8AAAAAAAABAAAAAQAAAAEAAQD/////AAAAAP///////wAAAQABAAAAAQACAAEAAAABAAAAAAABAAAAAAABAAEAAQAAAAAAAAAAAAAA//8AAP/////+/////v///////////wAAAAABAAAAAAABAAAAAAAAAAAAAAD//wAAAAAAAAAA//8AAP//AAABAAAAAAD//////v/+////AAAAAP////////////////////////////8BAAAAAAD//wAA//8AAP////////////////////8AAP//AAAAAAAAAAAAAP//AAD/////AAAAAAEAAAABAAAAAQACAAEAAgABAAAAAQAAAAAAAQAAAAEAAQAAAP//AAABAAIAAAABAAAAAQABAAIAAAABAAEAAQAAAAEAAQAAAAEAAgABAAEAAQAAAAEAAQAAAAAAAAAAAAAA/////////////wAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAQAAAAAAAAD///////8AAP///////wAAAAD//wAAAAAAAAEA/v8AAP/////+//7/AAAAAAAAAAD//wAAAAAAAAAAAAD/////////////AAAAAP////8AAP/////+/////////wAAAAAAAAAAAQAAAAIAAQABAAEAAQABAAEAAAABAAEAAAAAAAEAAQACAAIAAwADAAEAAgABAAEAAAAAAP////8AAAAA/////wEAAQAAAAAAAQAAAAAAAAABAAEAAAACAAAAAQABAAIAAgAAAAEAAgABAAEA/////wAAAAD//wEAAAD/////AAAAAAAAAAAAAAAAAAABAAAAAQABAAAAAAAAAAEAAAAAAAEAAQAAAP//AAABAAAAAAD//wAAAAABAAAAAAAAAAAAAAAAAAEAAQAAAAAAAAAAAAAAAAAAAAAAAQABAAAAAAD//wAAAAAAAAAAAAAAAAAAAAD/////////////AAAAAP///v///wAA//8AAP//AAAAAAAAAAAAAP///v///wAA//8AAAAA//8AAP/////+/wAAAQABAAAAAQAAAAEAAAAAAAAAAQABAAEAAgACAAEAAgABAAIAAwADAAIAAgACAAIAAgABAAEAAgACAAAAAQABAAAA//8AAAAAAQAAAAEAAQAAAAIAAQABAAAAAQABAAAAAQABAAAAAQABAAAA/////wEAAAD//////v///////v///////v///wEAAAAAAAEAAQABAAEAAQAAAP//AQAAAP//////////AAAAAAAA//////////8AAAAA/////wAAAAAAAAAAAAABAAEAAAAAAAAAAQAAAAAAAQAAAP////8AAAAAAAD//wAAAAD//wAAAAAAAAEAAAAAAP////8AAAAAAAD/////AAAAAAEAAQABAAEAAgABAAIAAQAAAAIAAQACAAEAAQABAAEAAQABAAEAAAABAAEAAAABAAIAAQACAAAAAQACAAAAAgACAAEAAQABAAIAAQABAAEAAQABAAAAAQACAAEAAAAAAAAA//8AAP///////wEAAQAAAAAAAQABAAAAAAAAAAAAAAD//wAAAAAAAAAAAAAAAAAAAQAAAAAA//8AAAAAAAAAAP////////7////+///////+///////+//3//v///////v///wAA//8AAAAAAAAAAAAAAAD///7/AAAAAAAAAQABAAAAAQAAAAAA/v//////AAABAAAAAQABAAEAAgACAAEAAAACAAAAAAABAAEAAQABAAEAAgAAAP//AAAAAAAAAQAAAAEAAQAAAAAAAAAAAAEAAQAAAAEAAgABAAEAAQAAAAAAAQABAP//AAD/////AAD/////AAAAAAAA/////wEA//////7//////wAAAAD//wAAAAAAAAAA//8AAAAA/v//////AAAAAAAAAQABAP/////+//7//////wAAAAABAAEAAQABAAAAAQAAAAAAAQAAAAEAAgABAAEAAAAAAAAA///+//7///////7//v//////AAD+//7/////////AAD//wAAAAAAAAAAAAABAAAAAAD+////AAD///7///8AAAAAAQABAP/////+////AAD/////AQABAAAAAQABAAEAAgACAAEAAwABAAIAAgABAAAAAQAAAAAAAAAAAAAAAQABAAEA//8BAAAAAAD/////AAD/////////////AAAAAAAAAAD//wAA////////AAD//////v/+/wAA////////AAD//wAA/////////v///wAA//8AAAEAAAAAAP//AQAAAAAAAAABAAIAAAABAAIAAAAAAAAAAAABAAAAAAAAAAAAAAABAAEAAQAAAAAAAAD//wEAAQAAAAAAAQABAAAAAAACAAIAAQABAAIAAQABAAEAAQAAAAEAAQAAAP//AAD//wAAAAAAAAEA/////wAAAAD///7/AAD///7/////////AAAAAAEAAAABAAEAAQABAAAAAAAAAAAAAAABAAEAAQABAAEAAAABAAAA//8AAAAAAQAAAAAAAQABAAAAAQABAAEAAAABAAEAAAABAP////8AAAEAAAD//wAAAAD//wEAAQABAAAAAAAAAAAAAQABAAAAAAABAAEAAQACAAEAAgACAAEAAgABAAEAAQABAAAAAgACAAAAAQACAAIAAAAAAAAAAAABAAEAAAD//wEA//8AAAAA/////wEA////////AAD/////AAD//////////wAAAQABAAAAAAAAAAAA////////////////AAD//////////wEAAAD/////AAAAAAAAAAAAAP//AAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAA///+/wAAAQACAAIAAAAAAAEAAAABAAAAAAAAAP//AAABAAEAAAAAAAEAAgACAAIAAQD//wEAAgAAAAAAAQAAAAIAAQABAAEAAQAAAAEAAAD//wEAAQAAAP//AAAAAAEAAAAAAAAAAAD/////AAD//wAA//////7//v8AAAEAAAD/////AQABAAEAAAAAAAAAAAAAAAAAAQACAAEAAAD//wAA//8AAAAAAAD///////8AAP////////7//v//////AAD////////+/////v///////v/+/////v///wAA/////wAA////////AAD+//7///8AAAAAAAAAAAAAAAAAAAEAAQABAAAA//8AAP//AAAAAAAAAAABAAEAAAAAAAAA/v///////////////v8AAAEA/////wAAAQAAAAAAAAAAAAAAAAAAAAAAAQACAAIAAQAAAAAAAQABAP////////7/AAAAAAAA/v////7///8AAP////8AAP///v////7/AAABAAAAAAD//wAA//8AAAAAAAABAAAAAQACAAAAAAABAAAAAQACAAIA/////wAAAAAAAP////8AAAEA//////////8AAP///v/+////AQD//////v/+/////f/+/////////////v/9/////v/+//7//v////7///8AAP//////////AAABAAEAAAABAAAAAQABAAAAAQACAAEAAAD///////8AAP//AQACAAIAAAABAAEAAAAAAAAA/////wEAAQAAAAAAAAAAAAEAAAABAAIAAQABAAIA//8AAAAAAAD/////AAAAAAEAAQD///7/AQAAAP//AAD+//7//v///wAA/v/+//7//////wAAAAD+////AAAAAP7/AAABAAEAAAD+/wAAAgAEAAUAAgAAAP7/AQADAAEA//8AAAEAAgAEAAIA///+//7/AAACAAQABAAAAP///f8AAAEAAgADAP////8AAAAA/v/+//7//////wEAAQD+//3//////wEA///9/wAAAwADAAMA///6//j//v8BAAIA//8AAAEAAgACAP///f/7//7/AQAAAAEAAgADAP///f/+/wAABAAEAAAA/P///wMAAQD///7//v8AAAUABQADAAAA//8EAAEA/P/6////BAAHAAYAAgD8//f/AAAKAAoAAwD+/wEABgADAP7////+////AwAJAAgAAgD+//z//P/+/wUABwAAAPr//f8BAAIAAAD9////AwAHAAQA+v/3//7/AwD//wIABQACAAAA/////wEAAQD9////AwACAP//AQADAAAA/v8AAAQAAwD9//z///8AAAAAAQABAAAA//8AAAQAAwACAAUACAAHAAAA/P/8////BQAFAAAABQAMAAkAAQD8//f/9////wQA/v/9/wEAAgD+//3/+P/z//b/AAAFAAAA//8JAAgA/v8CAAkABwAGAAUA/f/+/wUADAANAAQA+P/8/wQA/f/x/+v/7f/s//H//f8CAAIAAgAFAAMA+P/1//n/+f/8/wgADgALAAgACQAFAP7//f/6//n/AAALAA0ADAAMAAwABgACAP//+//1//r/AgADAAgACQAEAAMACgAMAAcAAwAEAP3/9v/+/wMAAwAEAAsACgADAAMA///z//P/9//2//b/+v///wUACAAEAP3///8HAAgABAD9//X/9/8EAA8ACwAEAP7//v8DAAcAAQD5//b/+f8AAAIA///5//X/+P////7//v8DAP7//P8EAAYAAAACAAgABAD9/wQACgADAP3///8DAAcABAD///r/9P/1//n///8AAPz/AQANAA4ABwAFAAYABQD//wAA/v/5//r/AgD///f/+P/+/wAA/f/9/wAA+v/1//b//P8DAAQAAAD8/wEABgAGAP//+//6//T/8f/7/wEAAgD///v/AAAIAAkADQAOAAcAAgAEAAYAAwABAAkACQADAAAA/v/+//z//P8AAP7/+v8AAAQABAAEAAYACgAGAAUACAD///D/8/8EAAsABQD8//j/+P/7//3/+v/4//j/+f8BAAsAEAAMAAkABwABAPr//P8BAPv/8//+/wsABwD+//n/8//p/+v/9P/y/+f/6f/1//f/7//y//P/7//6/wkABgD8//v//v8BAAkAGQAeABYAEAAXABsAEAD7/+//8v/8/////v8AAP7/+//5//b/7v/w//3/BwAGAAQAAwANABYADQAJABIADwADAAYACAAEAAEAAwD8/+//+f8SABEA9//j/+j/+/8KAA4ABgD9//3//f/+//3/7v/p//b/9//5/wMA/P/4////AQAFAAYA//8CAAQACwAUAAgA+P/w/+j/5v/w//3/9//x//r/9/8DAB0ADgD//wwADQANAAwAAgD///3/BAARAAoA/f/9////8v/r//n//f/8/wMACAAMAAYA9//2//z/+v/+/wUAAgD5//7/BAD2/+//+v/8//z/AQABAPz//v8BAAEAAgAEAAgACgAGAAAA+P/2//j/+////wIAAwD+//b/+P/9//3///8EAAsACwADAAEABgAEAP///P/7//v//v/6//f//v/9//j/AAAGAAUAAQD7////BAD///z/CQAPAAsADgALAAUABgAGAAAABQAHAP3//f8DAAEAAgD///7//P/9/wAA+v/1//X/8P/w//j//v8AAPv//v8BAP//+v/2//j/AAAKAAgACAADAP//BQAGAAEABAAFAP//AgAIAAMA9//9/wUA+v/2//j//f////3//f/4//j/AgD+//P/+/8DAPj/+/8RABAAAwAFAAkACgAMAAQAAQAOABEABgAFAAoAAQD1//P//f8AAPn/9v//////9v/3/wEABAAAAP///v/8//7/CQAIAPz/+/8FAAUABQAHAP7/9//5////AQACAPr/+/8EAAQACAADAPX/9/8LAAwACAAIAP//9v/8/wgACgD6//H/8//0//v/+/8DAAkABgAIABMAFQAIAPr/BQANAP//BQALAAoADwATAAcA9v/9/wYA+P/x//T/7//y//L/6v/8/xIAAwD2/wMADwD+//b/BAAHAAAAAgAFABUAHgAWAAAA8P8LABAA7P/h//z/AgD7//3/7v/k//b//f/u//P/FgARAPT//f8DANX/uP/h/xEAHQAYABEAAgAuAEQAGAD//wkAFQAbABMADAD2/+b//v8AAOr//f8HAAcA6P+3/9f/2P/3/xQA9f/v/yAAUgAxAAYA/v8aACwABwDx/+P/9v8EACgAJAAIAO//LgATAB4Apv9H/h0BugHL/r7/6ACR/1v/xAC9/1T/1ADK/zH/iADw/5j/XgD5/wYAbADl/77/IwDz/+r/wP9+/4gAwACi/5T/SQAcANP/QgD6/8X/NwAKAOD/NQAnAPb/9f/s/+3/BwDy/9f/QwAqAOD/FgALAPv/OAASAMH/HwDh/4j/KQAwALj/xv8eABsA+//x/8v/uP8XADkA1v/t/y8AGgD4/xAAFwARAC0AJQAIAOb/AwD9/9j/4f/M/wAAEwC8/9P/LQAAAM3/MwBBAPv/NwAuAOr/MgA3AMH/7P9FAAcABQAJANj/8f/k/8P/0v8TAPf/xP8FABQA8v/o/wwAHgAVABUADQAfABwA8//8//r/4f8EAGQAQADb/+f/DgA6ADEA+v/t/yQAPQDI/8j/EgDm/9H/yv/H/+H//f/T/8H/1v/6/x4A7P/a//D/DgAoACoAPwA9APz/2v8pADMA2v/9/wkA6v8NAAcA6P/k/+H/CQAVAOj/8//4/+3/6f8RABEA5P/s/+P/BgAxAPj/AQAxANj/+f9OACoA6f8GACcA+v/7//j/0f/V//n/HQBOAAkA5P8UAOf/yP/l/ycA7P+2/xoAEAAEABIA4f/5/zMAFQDp/wgAJAD3/93/HQBBAAIA/f8hAMb/zv9DABwA9v8FAMb/2/8PAPn/6v/9/+j/0v8MAAcA1v/O/+L/JQArAPb/CwDu//7/RAAQAPz/GADw//f/HgAOAPv/6/8AAPv/8P8rACAA4P/a/+b/AgAVABAACgD+/9v/xv8+ADUA6v82ACkA5/8NAEMA+f+9//b/OQDr/7L/6P8SACYA6v/n/xgAFwD4//n/6/+5/9r/6//E/+b/KQAPAAQAFwDG/7n/NAA1AOH/IQBXACQAPQAzALr/tf9KABsA2v90ADgAvv/C/7T/6v87AOT/zP8MAKf/y/89ABMA4f8LAB8AUABbAOn/vf+f/73/XgBFALf//v9FACEA8P/H/9j/8f8GAEsAXgAUAPb/CAACAAwAJAACAAAACQDW/9f/AAAFAPv/4f/Z//j/HwAqAP3/xv/w//X/CQBKAO//2P/1//D/QgApANP/GgALAJj//P9GALH/vv8NAO//DAAaAO7/6P8IAP7/IAANALz/wv8iAH8AIwDF/y4ARgAEACcAQAAHAMn/0f8FAB8AHAAOANn/4//2//H/CgDy//T/8v/I/+//KQATAO//DACY/8v+uP+wAPb/Vf+9/3gAywBsAGb/yf/9ALUA9P/r/0YAkABPAML/FACHAB8Avf/k/wEABwCY/yr/i//R/3P/T/+t/xIAQQAeABsAQACZAGMAyP8FAFQAUQD+/+r/dgBhALn/4/+AACgAs/+o/4z/8P8UAMD/1f8TABcA9f8BAFcAdACGAFYABQBbAH0A5f+l/0sALwDb/9T/o//d/9n/yf/W/uD+wADe/1H+vf+kAMD/g/8UAGcAhACGABAAJACyAJUA///o/1kApABRAMr/LgCzAFsAf/9D/xAAjP/I/g8A3P8V/+H/u/9m/ycAcgCx//n/qABKAFkAfwA/ADoACgDv/0wApQA6AM/////f/+f/0/9z/6H/rf+M/4v/s/8aABUAEABFAP//CwBgAHkAZwA/AEwA9//r/2oA4v92/zcANwCX/7D/4v/j/wAA1v/O/0IAYQDS/8n/ZwBwACcA9v8UAEcAVgAVAMf/8P8dAAUAxv/c/93/qf/z/wgA0f/6/9v/zv8OAPr/8v8UAEgAMAArAHQARAA0AFUARQALAMn/3P/8/xAAxf9b/5H/2f/6/xgAAQD1//H/DAAbACQATAATANP/0P8JADUACADp//v/FAAPAO7/2f8LABoA2P/R/xwA7f+i/xMARQCk/2X/xv/w//X/9//L//j/JwDg//3/SQBiAGsAUgAjABgARwA1ANH/+/8bAMj/FgBMAOz/9v/x/6r/6/88ACYAyv/J//T/8/8EAPj/0//z//b/3v8IAAQABgD1/77/y//Q/+//QABaAGkAjwB0AEoAhAB7ABoANwBIAND/2f8KALD/uv/e/0D/KP/M/83/zf8OAKn/lv8IAAQAJAByADcA8f80AIkAjgCBAEEA4f/4/ykABwDU/8v/8//G/2X/xf8oAOb/5f/M/8f/NgA6AKH/0P8lAL3/1f8UAAgAHABOAD8ATACEAEoASgCPAGgAEgABACYAGADO/5r/nP+2/7//lP9B/3L/yf+L/5//AgBFADoAHgA5AA0AUACTAFsAJwAjANH/0v9bAB8Azv9r/0L/2P/4/47/lP8LAAUA2/8TAL//PwDkABMAGQCFAI0AeABRAB4A4//i/7j/5P/i/3n/h/+d/4f/f/+d/9n/BgDz/+j/PwBJACwAQwBRADcAIQA2ACAACAAhAAIA9v8jAAIAIQBXAC0AIQAXAAUABAACAD8ALgDm//H//P/d/77/0f/K/9b/BADY//v/VQAQAAYA///3/0gALADg/xUATgDx/53/8P8wAOr/qf9k/5n/9/8EAN3/if+m/9X/1f/u/wAADgD4//3/JgBQAGoAVAArABAA5/+3//P/OwBJACkA1/+H/9r/CwC4/83/EAAgAMD/hv8EACQAw/8AAAMAnP8DAF0AOgB6AIkAMQAiACAABAA7AEkAvv9Y/6b/kv9H/7v/rv9b/1b/Uf+P/wwA8//N/1cAnwCLAJIArgDRANUAdQA2AGcAUAC3/83/AwBu/0f/jf94/23/p/+5//n/DQDc/xkARABNAHAAXgAqAEEAggBUAB0AMwAWANX/hv+u/1MA8v9F/4T/iv+F/+7/FAAKACQAKwDZ/6r/6P9fAH8A+//8/08AKAA1AGgAYwBwAFQA5v/5/4sAWwD8/9T/hP/C/5P/Sv/0/+j/T/96/67/1P8RAMX/qv8VABwACQAnAEUApAClAP3//P9tAEcARQAdAJz/zP/R/5b/0/+h/3H/mf98/3n/yf8DAC8ASwADAML/CwCZAOIArQAMAPT/agCJAGMARwA3AAgAt/9t/57/CgDu/6b/m/+J/4z/qf/l/w8ALAAsAN3/7/9oAMQAbQA5AGUAJQBOAK4AjgBlACgA1P+z/6f/mP+E/4r/cf9n/4j/mP/T/xoAVgBSAAUA8v9DALkAwwBmAEIATQCcAOYAmwAoABoA6f9G/0D/rP+p/37/Jf8H/2b/cf9a/4v/AAA8AAAA1P8vAHoAcQCQAEkAIQBsAGsAbgCbAFUA5P/r/93/nf+m/7P/4/8QAMP/rP+u/6n/3//y/xcAUAAsAO7/QQB5AEoAXgAzAAMAHAAPACkAagAnAM//2f/q/9v/uf+b/7T/zv9X/2X/6f/S/wcAJQDO/xcAUAA6AI0AsgBxAFYAaQBQAEUAWwArABgA9v/f//H/kP9v/5b/tf+w/z//PP/A/wQA9P8VAEUAHAAUADkAbgC2AJ4AhgCNAGgAbABBAAIAJQAhAN7/sv+R/7T/tv9a/zL/Yf/B/7f/pv/n/wgA+//z/08AnAC1AKwAdABaAG0AawBDACcAHgAjAP3/o/+O/7P/q//S/+v/yv/S/87/3v8LAPf/0f8KADsAiwCXAAAA+f9MAP3/tP+7/87/IQAGAJr/tP8DAPf/1P++/7D/8P8FAO7/LgBXAB8AAQA0AKEAxQA2AO7/SQBiACMA+P8QAA0A0//V/wMA9f/u/+7/pv+Q/6X/cf9U/4r/rf/c/xsATABPAAcA4P8dAHYAYwBJAIQAagD5/97/MwBTACMAPwBaAA8A7//+/7T/nv8DAA0A4f/R/87/GgBBAP7/6f8LANX/qP8eAGIALAAVABUATQBWAPH/uP/S/ycAfwB/ACMA1//T/8T/q//m/00AeAAcALr/nP9l/6P/AgAAAB8ACACL/4L///8YANL/0v/l//v/LQBWAH0AJgC+/+H/IAAdAOn/4f9IAJUAFgCp/8v/DAA7AAEAx//r/yoARwDz/7v/4v/S/7H/+v9jAJcAkADT/z//bP+Z/x4AsgCUABUAlv9b/4z/8/8+AIkAOwCb/7//6f/m/wwALABlAF4A+f/e/z8AXgAsAAAA0/8xAFsAwP+6//D/wf+W/3P/rf/9/97/mv+L/8f/8v8aAGIATQD3/+f/JgBbAKMA7ACuADIAyf/B/ykAMgAHAP//0//n/+b/fv+t//T/of+h/8P/8v83ABAALgBuACsA/P8vAEYAGQAjADQARgBwAEsAMQAaANL/uv+l/6D/4f/h/6H/pf+n/43/wP8AAPr/9/8YAOX/oP/I/xoAigCcADcAxP+f//r/RgB0AEkAAwD//5D/LP+C/y0ATQAbAP3/7f8TADQAcACQAHwAEwCY/7D/6/85AHAAKADn/wQA5/+K/3f/iP/J/x0A5f/d////y//P/6z/of9gAM8AWgA4AFIAKgAvAD4ARACGAHIAsP96/9T/yP+t/6H/qv/D/3v/Hf9+/x8A/P+f/7z/JQBJAC0AIwAwAFEAMgAyAG8AkgCyAK8AUwDo/wQASwBVAA4ArP+//7n/nf/L/9T/q/9x/37/AAAaALv/3f88AEkARwAyACEAkACiACAA3P++//T/XwAXAL3/DQAHAIv/Vv+d//v/9f/R/9X/3v+7/5H/xP87AHgAbAB9AFMA5v/U//n/5v/h/0MAVwAMAOj/xf/J/wUA3v++/yIAGgCS/27/6v8uADAA+/+J/9v/MwDw/7z/3/87AF4AFwCt/9b/XgBMAAgADwD//9L/l//Z/00AOwAdACQASgBAAN//oP/F/yMATwApAPn/9f8OAND/lP/z/xkA5v/T/9L/AwArAN7/3v9GAA4A8/8pABsAQgAMAHj/nf/2/wsAHwD5/8n/DABDAAUA3f/a/9X/+P8KABUAIAApAFsAlQB6AAEA7/9KAG8AdQBFAPb/BADZ/2f/pv87AGMAIgCV/yz/Y/97/2f/1P8aABIAUgA4ALb/1f9BAIAATADE/+3/UgAcAAsARAA4AP//lf+F/9L/4v/4//T/gf9b/7j/zP/D//z/EABVAJQAbQBiAEkAIAAlAEIAJgD6/yYAbQCGAFQAzf+D/6//sP++/zMAJQDL/8n/n/+u/wIAKAB7AHUA0//V/yMA5P+4/67/sP8hAFcAMAAmAOL/pv/M/8j/m//m/00AOAAEAPL/7//6/9H/nf8OAJgAbgBJAHcAigC9AF8AY/96//n/GgCHAFIArf/4/9f/Bf9a//n/BwD6/6H/3/+AAPr/g//D/9T/AgAkACEAcwC7AHcAAADu/w4ADgAeAGoAZQAbAAkAn/+M/yMAMgD0/+7/1P/d/8n/m//k/+//qf/R/x0ATQAiAJr/zv94ADoAx/9oANkAegAIAIr/ov/9/83/p//b//P/7v+c/zP/pP8lAAwA/f8LABgACwD+/zkAZgA4ACgASgB4AHwAWgBJADcAGQAHAAcAIQBHABwA2/99/3X/xf+Y/5X/LgCpAGQAnf9z/yAAdAArAMD/i//z/x8Abv9V/1wAtwAMAJf/u/9FAEYAkP+N/zEAMQD5//r/zf/N/00ARQCv//z/eACGAJcA/f+h/04AQACq/+3/SgAbAPH/pP+k/zIA6P9z//T/VQAtACkAUwBYACQAqv9//+f/IQBAAFgAdwA/AKv/mv+x/9//JAA+AFkAVgDP/4D/x/+l/5r////d//D/ggBdAOn/2//j/+//3f/d/5wA5gD3/4//3v/j/+n/+f8XAGYAWgAKAOT/qv+E//b/SQD2/4v/mf8+AI4A//+l//T/3f9b/1b/7/+AAGMAv/+Z/xsAPgBKAJwAiwCYANEASAB0/x7/bf87AGsA6/8BAHoATQDE/4X/rv8UAP3/qf/P/wIA/P/M/63/yP/F/77/BQCmAA4BnQAEANf/s/9//9H/YQB7AHUARgCz/4X/i/9Z//n/agAlAE4AbgASAK3/sf/g/4z/Mf/D/5MAywA4ALL/MgCGAC8ABwA3ADQAHwAEAJj/lP8ZAP3/3/8nAOD/5f8NABoAbgAgADj/YP8bABQAQQCDAE0AKgDz/+L/RgDs/0P/3P9XAKf/fv8RAPD/5P92ACMA2f8YANn/EQDX/y//1f97ADIAUQCTAO7/qP/2/xcAOABEAFIAcwAwAI//U/+E/+D/NwAgAK//Sf9y//H/7/9n/6n/0wAdAW8AMABsADsAff8j/xsAFwF/AKH/1P9aADEAsv+7/2QAewAMAN//av8H/4X/CgD7/wYAKQAhALL/O/+k/54ArQBBAEcAVgA1AIT/Lv/K/1cAWQAkAKv/2f9lAMz/fv86ALQAggAiAKL/7/+nAC4A/v+GAGAAFgBGAJn/Qv9kAIMAlv9+/8D/pv+7/2L/qv/MAHsAyv/v/4D/Sf9MACsAwP+8AKcAn//b/xsAoP9o/zz/HQAsAUcAMv+2/0kAxv9Z/8L/ogDWABsAmv/t/wIAwf/G/zgAvwChAF8AIQAxAFYA3P95/5n/6/9cAEAAjv+L/+f/5f+L/53/bgC1AAEAkf/r/9r/gf9d/8r/dwBKAAAABgAGABEA//+2//f/jgCaAFwAbgBKAMP/cP+m/18AOwAUAHUASwCq/17/CwBKAMf/vf8/AAMAJP9T/x4ASQD0/7f/0P8TAGwASQDb/w8AWAA5AN3/wf8gAE8ABwDH/77/8/9BAEAAxf+C/+//SwAvABgAAQDV/6X/o/8DAFYATgAxAAoA1//i/wAAGgAnAEkAQQAGAP//7f+l/3j/qv+U/6X/CgBaAL4APwDl/0gAtf8v/x8A9QCLAN7/of/O/+n/fv+n/3IAXgDj/97/9P/p/+T/tf+v/y8AogCkABsA6/82AC0A6P+0//j/QAAHAOf/9f+n/+f/bQAjABIATwAhAP7/+P+u/9n/7/+2/+7/vv+P/zYAkgAlAAEAXgBHAMP/k/+X/8T/AQDL/xoAywCSAO7/sP+c//H/vQCOAOD/UAA5ACf/KP9j/3T/gACpANn/EQDy/23/4//W/8r/1AB+ADH/kf+gAD8Ad/9+/y0AvwDl/6f/egA3AN//JwD//wUAeAAyANz/7P/z/yIA+/9x/9j/TADY/wIABABl/9f/jAA1ANr/DQBKAF0A+f+S/woAJQCI/+3/lAA/AAAANQD3/5P/mf/r/wkAuv+h/wQAJAD//3MAxACNAD8AJAAyADsAQwD8/7H/AQAxAH3/Hf/w/4YAEwAGAF8AMADk/2n/U//+/y0A/f91AOEAXADe/9L/WP9c/zgAaAAnAPH/hP+L/+j/pv+F/wcAYgCBAFUAxP96/4D/lP8tALoAkQBUAGEANQDl//T/FwBMAC4A0v9CAEgAUP8W/4n/ff+B/xQAWwB7AFMAqf+m/8z/ov/W/zEAkQCrAEsAjP9V/9f/v//M/9MAQAFmAEb/vP7m/iP/Wv/u/6oAxwB+AIgAGACm/0gA3gDbAL0AmAAiAKD/p//E/5v/kf/v/28AdAAIACcAUQCZ/yf/mP/t/+j/EgAyAD4AUwDs/8n/dADjAOIAiADt/7P/vf9F/0j/5v+g/7H/+v+e/9f/PwD7/w4ArwDCAMYAYgB0/4H/y//z/1sAhgCSAIoAIABu/1H/af9c/5r/of/a/wIAbv8r/9H/FQCp/7X/DgBbAIIAMADv/04ATAAIAFcApgCWAEIAJgD9/+T/yf9c/83/NQCa/1n/IwCQANv/df+2/0UAGgCC/x0AdQD3/yYAhQD4/9D/oQC/AGkAKgDQ/9D/i/8F/7H/zgB0ALD/9v/z/zX/Ef+F/+z/dQB5APj/8P/K/7X/bwC0AGUAyACrAOf/yv/O/2D/T/+z/83/4P+s/23/0f8vABAAHACWAKsAVQBEAFoAigAzAIr/z/8NAKL/kv8MAFUAOgA5AC4A3f98/5v/tP9G/1L/6v95AJMAJgDh//z/5v/3/4kAqwB/AH4AIwDd/7n/TP9X/+L/UwCtAH0Axf+b/4P//v4y/7L/OQD6AMwAGQCR/zr/vf9WAF0A2QBdAeUAPgDx/5b/qP8IANb/xf/S/3j/a/+Q/37/t/9LAIUAQwDy/73/rP+S/7r/BwAdAHsAzQBxABYASgBKAOn/AwAwAAYADwAvACgAsv9g/9n/YAAQAJD/HQBbAGT/6v6x/24ATgA5AGYAWAAhAAgA/f+b/zb/pP8JAJX/oP9wAJ4A8v+5/0QAPwCv/7X/CAAMAN7/0v8YADUAy//3/5kAYQBLAGsA2f+w/xkAyf9O/6b/BAAZABAA0f8FAEsA0v+D/yUAiQA0ACQACQDL/8j/mP+O/wwAUwAyAFYAPADD/9b/0f+M/w8AXAAxAHQA9v9H/8L/9/+O/1oAfwEWAW8ACQCf/7H/hf+O/34AAAFKANL/+P+J/0j/j//s/4gAgQD5/wsADACj/5z/zP/c/xkATgC3APoAXAAFABUAiv83/+T/ggAqAJT/jv/o/6r/0f4B/0EArABHADYAQwAWAM3/9f80AOb/7f9nAGgABgAaAJgAcQCf/23/+v/K/7r+4P6r/4b/cf+5/xkAeQCXAKcApABkABsASgA/AKX/lv8PABgAr//I/08ATgD3/wQAHQCG/xP/jv8ZADoAHgBEAIEASgAYAAYAEwBbAGAAPwByAHQA///p/wAA+v8iAAcApP9U/z7/Lf9J/3X/dP86AMoAcwCWANMArwCAABYA8P8kAAcAvP/3/3wAZAABANj/GwBVAKr/Tf/e/+z/df+B//j/8f+5/8X/2f8MAPL/+P+HAMAAgQAeAPf/5v/p/1IAWwBAAEAAyv+I/5z/of/t/zgADwDj/8D/kf95/3H/7P+SAJ0AWgApAD8AZgAtAMb/5f8uAB4ARwAEAFv/Q/+G/77/2v/+/0kARwCp/1L/nv+9/5//0f9rAMIAcQAVAPD/yv/+/1MAMQBGAIAADwDK/63/J/9h/1kAaAATAHIAlgDa/y7/af8rAJIAMwDl/0gANgCh/8j/CgAiAHoAoABjADEAPwBIADEAGQAJAOX/k/9k/6H/AAAgANn///9iAO//bP/M/ycA7f89AIQAXwBLANT/7P9vAGQAWwBmAOz/c/+T/7X/kf/G/3YAiAAiAAkAtf+D/5z/zv82AFYAAQCs/5L/fv+k//3/3f/k/3sAVgB3///+Ff9w/9//CwBiANAAkwBUACkAzv8MAHMAFQArAKEAFAB+/5L/6P8WAPb/+/98AIEApf9b/6X/kv+f/x4ATACEAK0AZgA6ABsANwA+AOP/7P8zAPD/xf/N/1z/Ef+Q/+L/9P9hAGcAeACDABMAOQBfAAMAMQCiAF4A/v/y/5X/RP+d/xEAYQAqAMb/JAD3/0b/TP93/43/HQDAALEANQDz//j/5P+B/3D/GQCIAIkAcwACAML/s/96/5r/IACjAJcASgAKAJL/Gf8R/6j/RgC2ANoAlwBBANP/c/9X/6f/OACTAOkA6QAvALz/rP+M/6//CwBaAFMAEQDi/87/1f+B/0n/DwCdAFAAMwBfACsAz/+x/4r/tf9BAGwAMgDv/6P/h/98/yn/Tf8nAKkAnwB/AD0A7f/q/+f/BwCCALEAfAAfAMH/eP9b/4D/ev/Z/7wAtgDr/5r/kv9n/2T/tv9aAL0AYQAMAB4AyP+C/63/8f+RABIB4wCIADUApP88/zD/hf8lAHgAOQAuADoAvP+C/5P/tf8QAHcAdQAzADAADwDJ/6P/rf8MAI4AfABFAHcAPQCU/3T/1v8ZAD0AGgAPACQAvf9W/2L/qf/s/xwAVgCBAFkA7P+N/2v/cf+S//f/OAAXAPH/zv+i/2v/Yv/j/1AAPQBcAIgAGQCQ/6L/6v8AAEgArQB0ADoAbwAeAKX/0v9TALUAiQAyACwA9P96/0X/q/8dAAwA6/8EABMA5P+1/+D/CQBGALYAeQDp/x4AhgBBAPT/GAAyACIAEgDs/8v/x/+3/7b/rv/b/3oAewD8/zoAaAAJANj/3P/v//z//f/7//b/5f/d/xMA9/+c/9z/OwAIAM3/AwADALn/pP92/47/7v/D/7b/7v/W//r/GQDT/+z/JQAEACYAQwDE/6n//v/n/8T/1//r/w8AFQAuAGsARgD5/+//2P+s/8D/6v8eAHMAfgBHAFAAOwAMADsAVgA3AB4A9v98/1f/vv/F/7D/9P8BAOj/5//F/+7/NgA9AEMATwBDAC8AOAAVAPH/FgAXANj/6f8jABcA/f/3/+7/zf/E/9j/9v8uAEQAOAA+ABEAtv+g/7T/5/9DAFUATQB/AEoArv9n/2v/iP/i/xkAKgCTAJMA9P/B/+L//f8jAAMAAAA+APj/ov+h/27/nP9EAGEAQQBiAGIAQwASAND/0//Q/77/9f/1/7H/v//m/+P/6v/M/7z/JQBmADUATABaAAEAFQA/APT/AAArAOb/4P/0/8T/+/8qAOb///8cAM3/x/+5/5f/GABpAA8A7/8QAAAA8//a/8T/BwAsAA0AGgAaAPH/7//3//7/HQA2AEUAPgD1/8T/5P/o/+P/HwBkAHAAPgDq/6D/hf+3//z/+P/4/zEAHQDY/7r/ov/Y/yMAIQA5AHQAXQAbAPP/7/8DAPr/7v8UABAA2P/I/9T/2P/Z/xYAZABQAC4ANgD+/8j/y//L//7/IwASAD8AKgCT/5P///8EAPX/NwCUAH4A7f+z/+3/3/+4//r/IQD2//3/IAAFAOv/6P/d/+v/5f/A/+P/AADQ//b/NQAXABkAPQA3AF8AkwBLAOj/yf/K/7//r//i/zwAQgAqABkA4v++/8v/7P9RAJIAVgBGAEsA1v95/3n/ef+g//3/MgBJADYA4P+y/7D/r//l/zcAVgBlAG0A+/9X/03/vP/8/xYAegDPAJgAIgC2/2H/Uv+K/+X/UQCBAGIAHAC1/4X/qf/I/wEAZwCXAHcAGwC4/6b/yf8AAFMAdABOADoADACQ/1//qP/u/zAAgACVAGIABwCe/2f/dP+t/xkAgQCCAGYAVwDx/6H/1P8UAEcAgQCCAFEACQC7/5n/j/+P/9v/NwA4ABIA9v+4/4v/j/+j/+v/ZACmAHAAGQDy/9H/rv+v//L/WgB5ADEAy/+V/4v/p//o/xgARQCFAHgACgCn/47/mv+p/8f/DgBfAGEAPAAtAA0A3v/W//7/UACaAJsAVgDt/4L/Sv8y/0b/vf80AG4AigBFANX/qP+T/4//9f9mAJAAkQAxANz/0v+i/4f/7f+GAMUAqABbAAIAqv9V/0v/kf/w/zMAKgAKAPT/yf+j/7X/8/9PAJcAcQATAO3/2v/I/8//8P8zAGAATABAAD4ACQDg/83/v/8BAEsAHQDb/+X//P/0/8z/yv8dADgAFAAxADcA7P/V/+P/zv/Z/wMAIgA+ABwA0v+9/8n/1P/y/x4ARgBNABYA5v/o/+7/+P8IAAIAAgAEAPL/2//b//T/FQAEANT/1v/1/wIABQDs/97/CgAPANv/3/8UADYASgBAADgASAAeANz/4P/2//n/9f/n//H/CAD8/+3/9v/6/+3/5P/y/xoANgA0ABsA/v/9/wIA7v/w/x0ARwBHABsA5//L/77/2v8SACEAJwBHACsAx/9+/3f/kv/A/+3/HwBRAEcADQDn/8X/q//G//T/IQBVAEoA/P/Q/77/vf/i/+r/6/8kAF8AeQBxADYAAQDw/+H/+f8mADoAVABjAEgAFwDU/6H/sP/C/9z/KwA4AOL/of+R/4b/iP+p/+v/KgBJAF0ASQAQAP//HQAvAD8ARwA1ABsA/f/h/93/4//q//D/5P/o/woAEgAGABgANgBAADAAIwAmAAkA5v/w/+3/y//H/+P/AgAGAO7/5v/j/8//5P8IAAcA//8CAAEACAAGAP3/FAA2ADQAGgASABsADADt/wYARABHAB4AAADp/8L/j/+D/6r/0v/5/ykAKgAGAAcAFwD//+z/FABQAFAAGAAHACQAIQAMAAcA9P/j/+r/3//J/8b/vv+u/6n/vf/W/9P/x//j/wsAHwA1AD4AKAAeACYAGwD//wMAKAA/ACYAAADw//D/7f/x/wwAJQAtACkAHQABAOz/7//x/+v/7//x//H/9v/3//P/AgAWABgAIQAxAC0AGAD9/+D/2P/r////BgADAP3/BQABAPP///8eADgATgBLAC4ADADt/9T/0////zUAKgDm/8f/z//P/9v//f8QABIADgAJAPD/yv/B/9X/8f8gADoAIgASACQAHgD0/+b/FgBHAEYAMwAqAA4A2v+Y/27/h//A/+X/9P/+/wgADQAGAPr/CwBBAGsAagBJACMABQDb/67/tP/s/w0AEAAOAAEA8f/b/73/sv/B/9f/5v/l//L/EwAYAAkABAAKACMAOAA7AD0ALgAGAOP/yP+2/8T/6f8fAE4ARgAgAAkA7P/S/9D/3f/9/yEAJgAYAAMA2v/B/9P/8/8TACwAMQAsABoAAAADABYAKQBGAE0AMwAhAAsA6v/n//7/DwAXABQADwABAOL/0f/T/9X/2//1/xkALQAfAPj/3P/L/8D/3/8nAFMATQA1ABcA8//Z/8n/xv/h/xYAHwDw/9P/1//T/9X/+v8qADsAOQAxAAkAz//F/+b/AQAXACUAJAALAOn/5f/y//X///8cACYAEwDq/7v/qf+9/9L/4//8/xEAHQAfAA8A+v/w//D/CQAtADcAOQAwAPz/zf/N/9j/4P/y/wIAFAAfABoAHgAkAAsA/P8MAA8ACwAQAAIA9P/5//H/8f/+//n/+v/+/+z/7f8AAPj/BQAgABcACgD7/+P/6//2/+z/AwAkACQAJAAEAMv/xf/Y//D/IgA5ADAAMgAaAO//4//T/9f/BAAgACEAKAAbAAQA8//Q/8r/8P/1/+r//f/5/+r/6P/c/+P/CgAWACUAPAAqACAAJgAUAAQABwALABsAJQAbAA4A+v/i/83/0f/X/9X/3//y//n/+f/z/+H/5f/4/wwAJwA0ACkAGgARAAMA9P/r/+n/9f8AAPP/6v/x/+3/9P/3/+//9v8AAAEAEQATAAUABgD+/+7/8//0/+3/CwARAAMAFwARAAgADQDy/+D/6f/h/9//7P/3/wUAFgANAPT/7f/m/+z/AwAWACgAMQAsABcA/v/z//v/BAAFAAcAGAAZAAoA/////w4AEgD7//b/AwD7/+7/5//k//H/+P/y//j////3//T/9//7/wIADgARAA4ADQAJAPz/9//6//7/BQAJAAEAAQABAPr/AAD///b/8v/2/////v/+////+v////7/+f8EAAkABQAMABAACQADAPr/8v/3//j/8f/y//X/9f/5////BAAEAAMABAABAP7/+f/y//j/AAD9/wMAEgARAAcA/v/+/wEAAAD9////BQAGAAEAAwAAAPD/6P/x//v///8BAAkADgAKAAIA+v/5//z///8HAAwADQAHAP7/9//2//T/+P8AAAwAEAANAAgA///2//D/7v/y//f///8JAA4ADAADAP/////7//z/AgAFAAYAAgD//wEAAAD8//v//f/9//j/9v/1//X/9f/2//v//v8BAP//AAAAAPv/+//6//z/+v/7/wAA//8AAAAAAQD///v/+v/6//r/9v/4//z//P8AAAEABgAHAAoADwAVABcAGgAcABwAFQASAAwABwAEAAEAAwADAAMAAQD+//7/+v/6//r/+f/6//r//P/5//n/+v/9/wAAAAAEAAoACwAIAAkACwAJAAUAAQAAAAEAAAAAAAAA/f/8//v//P/6//z//P/7//r/9v/1//f/+P/7////BQAHAAUAAQAAAP7//P/7//z/AAACAAAA/P/6//n/+P/4//r/AAADAAIAAQACAAAAAAAAAAEABgAGAAIA///+/////f8BAAUACAAJAAkABwAHAAgABwAHAAcACQAIAAUAAwAHAAcABwAJAAgACgALAAkACQAKAAwADQAKAAsACwAKAAwACgAIAAgACAAHAAgACgAMAAsABwAGAAUABAAFAAYABwAIAAkACAAGAAUABQAFAAMABAAEAAQAAgAEAAQAAgACAAAAAAD/////AAD+//3//v/8//v/+//5//n/+//9//7//P/6//n/+v/7//3//f/9//z//f/+//3//P/9//3//f/7//3//v/8//z//f/9//7//f/+////AQABAP//AAAAAAEAAAACAAMAAQACAAEAAgADAAMABQADAAMABAACAAIAAgADAAMAAwAEAAMABAADAAMABAADAAMABAADAAMABQADAAQAAwADAAQABAAFAAQAAwADAAIAAgADAAIABAADAAMAAgACAAIAAQADAAMAAQACAAMAAgADAAIAAQABAAEAAQADAAMAAgABAAAAAAABAAIAAQACAAEAAQABAAEAAQD//wAAAQAAAAAA//////3//f////7////9//7//v/9//z//P/7//3/+//8//7//P/9//7/////////AAD/////AAAAAP///f////////8AAAAA/v///////////////v8BAP//AAAAAAAAAQAAAAEAAQAAAAIAAgABAAEAAAACAAEAAgACAAIAAgABAAIAAAABAAIAAQABAAEAAQAAAAAAAAAAAAAA/////wAAAQAAAP////8AAAEA//8AAP///v////3//f////7//v////3//v/+//7////+//7/////////AAAAAAAA//8AAP7//v/+//7//v/////////+//////8AAAAAAAD+/wAAAAD//wAA//////////8AAP////8AAAAAAQABAAAA/v////7///////7////+//7//v/+/////f/+//7//v/+/wAA//////7//////wAAAQAAAAAAAAD//wAA///+//7//v/+//7////+///////9//3////////////////////9//3//v/+//7////////////+/wAA///+/wAA/////wAAAQAAAP////////////////////////7/AAD+//3//v/+//7//f/+//7//v/9//7////+//7//v/+/////v///////v/9//3////9//3//////////////////v/+//7////+////AQD/////////////AAABAAAAAQABAAAAAAABAAAAAAABAAAAAAAAAP////8AAAAA////////AAAAAAAA//8AAP7///8AAP///////wAA///+//////////7////////////+/////v/+//7//v8AAP7//v/+//7/AAAAAP///v/+//3//v/+//7/////////AAD////////+////AAAAAAAAAAD+//7////+//////8AAP/////+/wAAAAAAAAEAAQAAAAEAAAAAAAAAAAD//wEAAAABAAAAAAABAP////8AAP//////////AQD///////8AAAAA/v////7///8AAP///////wAAAAAAAAEAAAABAAIAAQAAAAAAAAD//wAAAAAAAAEAAQABAAIAAQABAAEAAQABAAEAAQABAAEAAgABAAAA//8BAAEAAQAAAAAAAQAAAAEAAQACAAAAAQABAAAA/////wAAAQABAAAA//8AAAAAAAABAAEAAAABAAEAAAABAAIAAQACAAEAAQABAAEAAQABAAIAAAAAAP////8AAP//AAAAAAAAAAAAAP///v///wAA//////7/////////AAAAAAEAAQACAAAAAQABAAEAAAAAAAEAAAABAAEAAAAAAAEAAAAAAAAAAgABAAEAAQAAAAAAAAACAAEAAAAAAAEAAAABAAAAAQAAAAEAAAABAAIAAQACAAEAAQABAAIAAgADAAIAAwADAAMAAwADAAMAAgABAAIAAQACAAIAAgACAAMAAwACAAMAAwABAAEAAQABAAAAAQAAAAAA//8BAAAAAQAAAAAAAAAAAAAA////////AAACAAIAAgABAAAAAQAAAAAA//8AAAEA//8AAAIAAgABAAIA//8AAAAAAAAAAP//AAD//wEAAAABAAMAAgAAAAAAAgAAAAEAAAAAAAEAAAAAAAAA//8AAP////8AAAAAAAAAAAEAAgABAAEAAgABAAEAAgABAAMAAQABAAIAAQAAAAAAAAAAAAAAAQAAAP///////wEAAAAAAAEAAQAAAP//AAD//wAAAQAAAP//AQAAAP//AAD/////AAABAAEAAQABAAEAAQACAAEAAQABAAEAAQABAAIAAQADAAEAAAACAAIAAQAAAAAAAgABAAAAAQAAAAAAAQACAAAAAAABAAEAAAAAAAAA//8BAP//AAD//////v///wAAAAD//wAAAAAAAP////8AAAAA//8AAAAAAAD//wAAAAAAAP//AAAAAP//AAAAAAAA/v/+/////////////v/+//////8AAP7//v/+/////v/+//7//v/+/wAA/////wAA/////wAAAAAAAAAA//8AAAAA/////wAAAQAAAP//AAABAAIA/////////f/9/wAA/v/+/wAAAAD9//3/AAAAAAEAAAD//wEAAAACAAAAAgAAAPn////8//f/+f/+/wAAAAAGAA4ABwAFAAgABQAJAAYABQAAAAMACwD//wEABwADAAUACAD9//b/7P/m//X/9f/3/wEAAwD+/woAAQDz//b/9//4//f/9//9//7/8//9/wIAAgD1/x8A/v/KADkCpwAL/5//3P+g/7T/3v/M/83/l/9Q/1X/fv+1/8H/5f8PAPT/7v/h/83/BgBBAD0AbQCiAK4AygCYAA4Axf/6////3f/v//v//f/5/wQABgDt/9L/uv/A/6n/zv8FABYAKwDf/9L/0P/W/wIAvf+9/+H/4P/1/+L/9/8YAOT/rf/K/9b/zv/h/9L/+/8kACoAFQADABIARwA4APf/RQBMABoAZQBKABcAjQBZACcAgQBpADQAJADX/2L/Zf9R/3j/nP+c/4//dv+l/8H/2P/f/w0AMgBLADoAVgBOAHwAMAHZAIkA4gDxAL0AcQCVAIUAaP9V/4r/M/9W/1v/Qv8I/8D/a//2/rv/pv+f/8n/MAAfAE8AhAA/AIYAbQApAHoAOgD1/z0AIwDd/+X/GACc/2T/pP/0/6z/tv+AAI//q//fAN3/zf/7/+P/PADA/yIA1f8AABkAsP/l/7H/+v/l/+v/BQDJ/zQAKgC9/ygAIgAbAEkAgwA/AGMAuQDjABMC3gDn/7j/Bv+u/+X/RP+l/z0AWv9u//L/yv8jAKr/b//i/7L/lP/7/rX/+f9g/+3/TgBuAFoALgGAAOX/UwAcAEAA6v8bAEgAYACoAGQAcwBSAA4Ag/9M/5T/S/9e/9T/EwCs/4v/FwDC//P/IwCw/0sANAAgALgAKwCx/7EA8v9T/5oAyP+j/14Awv8JACgAwv8JANb/DwAcALT/+/+z/wYAJQAMAA8B+QAEAXUAvP/R/5r/5P9L/wz/rP+5/tn/EgKCAFkAdAC//6cAtf/C/zsBlQDG/+L/zP9V/23/cP+s/6f/yP5h/1f/v/6//1P/sP+pALH/lgC1AVUBPwBcASIB2P+1AWwAWgDmAFH/nv8dALb/WP6Y/rH/T/5v/QsAEwA8/6oAoQAZAL//iAC7ALf/DgCvARcBEQB9ASoBFABhAOj/mf/e/s/+nP/m/mL/HABHAAQAPgDKAJr/AQDDAN3/OgC0AJcA/f/B/2wAZgDF/57/rwApAIb/EgB3/1T/jv8SADsAm/+KADIAd/81AP3/5/9/AGYA4/+YAIwABwCQAGoA0v+y/4v/Ev+j/4H/Ov/2/4H/5v/C/6b/iQCDAAcAgQDDAFj/nwCGAJL/AAGoAH0A0wAeAKv/HAAWAHf/ef8GAOj/uv8rALD/0f/d/7X/uv9Q/1wAVwAWAEEAQwB7APn/U/8MABYAZv9MAMz/Tf+l/zAA6P+D/6oAPwBs/5EARQDQ/+QAcwAXALn/awB+ACL/HgAhAHn/yv8PABf/sf9sAAL/AQBAAOL/pf8jANUAHACOAJ0A4//d/1cA+f+1/wUBVwC8/+gA3v/Y/0cASwCV/6f/IQHV/3r/3/8cAM7/7v4FAAgALf/W/0cAjf9YACQB3P8tAMoAQgDE/4D/lAA+AJj/8f+2AGMAh/+WAEQAwf/G/x4AFADt/mX/mQDA/s3+KgExACn/QwCKAMb/GgDr/8b/jv/G/zsAm/+A/1QASQBr/+3/nQCFACYAZwCGAH//7f+WAJr/IQCnAND/MQDt//D/2gAYAD3/8/+7/3v+0P9FAMX/GAAbAF8Auv90/3YAiADj/zQArgD5/0H/xgAOALj+2gBeAHz/6ABpADUA5v8s/6n/BADo/8L/LADr/4cAngB0/wEAOgCo//j/8v9+/yUAKABd/0kAvwDq/wgADgCc//P/RAATAHgATgAcAMn/f/8S/0//1P8z/xUAjgCEACoAr/+AAFkAhv8IALwANADn/3wAwP+M/x0AnP+q/xsAUgAuAL3/x/+DAN3/k/8+ACwAmAB/AA0AFQC2/63/y/+A/yYApgAyAMn/p//R/4f/d//2/8z/4f88AOn/MQBnAMX/lv/N/3AAbwAeAJYAGgDR/+v/dP/P/yEAGQAtACEAGABDAEIAov8vAKQA1f/z/x4Asv85/6P/kf80/8T/Mf/N/v7+k//g/5//1QCrASsBNgFRAakAfACZAHAAbQAIAO//5f9g/y3/Sv+D/2j/YP+X/53/lv+T/ycAEADO/40AsADc/6//bABzAHwAMgD0/yAAiP/6/0gAl/9EAKcAWf8f/xkATv+W/7kALgCDAH8AAQALAPv/NABoAGkAQgA6AO7/Mv8d/0//i//j/zMAYgALAKr/h/8u/1r/PACPAJkApwDDAF8AxP8NAOT/0v8iAJH/m/+D/xD//v40/3L/l//h/8b/OgAUANf/PgByACQBdwH2ANgAmwFNAeUAWAEsAVkBVQGxAD0AGAAvALj/Sf8//yv/9f6k/qv+2/7z/vf+l/6G/pf+ZP6t/kz/wv9Q/1b/3/8Q/wz/Zv+j/vH+hv9P/yT/Kv/f/4EAAQBBALUBagJfAuECrAJmAm0DmgK5AWICuAJfAkYBtwCTAC0AYP+M/r/+cP+F/9r+g/4q//X+cP5Q/i/+qP6+/hX+s/1s/vD+ev7z/TL+B/5j/U39B/2Q/Q7+A/5N/l3+7P77/1AAKgEWA/0DdQSSBIoECgUhBakELgTDBHwEDwQ/BNACAAKkASgAJf+j/t79Yf2j/XD98PxI/fj8kPzz/HH9x/2W/cb9QP6Q/p3+4v6c/2j/4P5i/jX9avy4+/T6cvtb/Pz84P3t/j8ALAF9AuUD3AQABywIGAgmCDEIbgilB5kGawbpBYcEzgLPAFn+1vxz+7v5G/k4+ZP51vlJ+uj6LPwb/Xn9av7A/lH/5/9P/6v/7AAPAcoAOwDs/qP9Kft5+DL41fjm+e779/zX/b3/pwCtATEEKwcDCtALtAy7DeQNpQs4CvYJbwhlBxIGDgOAAKD+gPtk+NP2lvZt9uH1pPYL+Pv4T/qm+938t/6wADMB7wA2ASUBDQHtAL4ApgCD/5H9cvtU+Q33YvUw9uj3+/kK/av+JQD+AoQEyAVhCQYNmQ/rEQwSmBDbD5gNBgoqCPsGrgS8ATz+Q/o792/0DfLB8D3wr/JF9c31L/fi+e/86f7ZACcDPAVsBtYFjwRpA8gCIwJWANX9/fuj+UT2K/Pu8aHzyfaz+XH7o/2DAsoFOwYECXkNfRFNFAEVBRRBE2ASlA5FCp4HagU6AnP94Pij9Rnz+u9J7Srtj++F8RHy6vRS+Zb8Bv9wAR4EIwdxCAkHYwb+BcEEJQMXAK79nPwj+vP1IPNh8qbygPSL9lX5q/1QAWAEkQcRCi0OOhP6FJsVrBZ/FfwSig8vC3gIXAXWAJr82feR8wfx7+2p607snO3P703yMPTP97D8SwAIA/wFmwgACsAJIQhaBiEFcQO4AG798PrM+Oz0DvG/74vxiPQN97T5yP3zAlUGWwgJDC8RwRUxGOkX3ReIF4QTOw5DCtAGngPw/o74bPR58ejtPeuX6Rfqc+327xnxLPWp+qj+0AHBBPgHlwpDC/UJHAioBpQEbAFZ/cH5Jvfo8/jwkO/c72jy1PV8+GT8FwIvBnkJBw40Eg0WYRnLGrcaehhoFTESrgwQB6oC8v35+Nnze+9v7Jzqs+ka6c3qd+5Y8mb2SPn6/KgClwa0CA0LYwxUDPQKnweMA/8Arf6Q+Zn0tfG07ijtv+2m77b0SfoV/rkCegduCycQKRSlFsMZEhyQGwAZmhX2EaoNuge2Afn8bPcr8inuLOrf5x3oFOky6mTtjvJN9zH6yf3OAx4IAQroC+UMLAxZCgsHiAIF/2P8Pvhi85rvze027QPuA/FK9az6+gAiBlMJKw7rEtcVsxgKG38cbRwRGUIUjRCPCwEGWgDP+Tn0g/DP7I3oauYc57vo/OqH7lPzQvhQ/PEALAX2B/IK6QwpDEIK2gctBAQAmvyG+Zf1GPGP7lHu+O2u7+P0Cfpv/xwG7AryDRcR8hT0F2UZeBr9Gs4ZSxafEQ8MWQZeAez8p/fK8Zvu2+xL6UrnoOhf6+juVPKz9oL7Mv9CA80GrQfsCeQM4grxBr8DMQDy/Mn47PN78bXwcu9x74fxifQu+hMBQwUbCSsPTxMyFYcXVxjKGDQZGhcVEy8PuwtDB+4Bufzt96XzA/Dx7PDqfuqQ6w/tRe9v84X3d/o//psBmAMFBp0HjggsCPwF/QJj/l/6YPct82zvnO9D8szzWvVY+bz+yQPlB9ILFBA+FDIYgRnXF+IWexaUE5QORwq2B2YEIQAx/G33YvMy8dzue+zA7FHvr/F887j1wPkm/RX++/91AvADcAXuBWEEIQK9/0P88/iS9K7veO8V8f/xt/VG+hr+9gOvCagLdA5JE9MVyxelGPEWSxZbFEgPjgr2BdgBSP+5/En5hfYk9JHxp+/P7ibwI/Kb9ML44fsz/RL/aQBvAJEBiwKqAuMCWwF2/tP6i/ZO9M7xqu107nfzyfWu+L3+VgOMCHYOyhAPE5AWpRjQGQ0YyhQsExkQ/AlGBF0A+/x5+uX3b/Un9FPz8PG08EXxwPN+97v6rfwI/1IB4gHkAQwBLwD3ALkB/gBj/k/6ovZe8zLv9Oui627uP/Mq+O780wJZCWgOShKMFSIYPxt/HKcaYBgQFSsQ/gobBdn+nvrO9+n0LfON8sjxQPHF8Q3zOfVr+FT7rf6SAa8C1QP8A1YDrALNAP7+tP6w/XT66/Y38x/wpu2y6uDpBu6s81/4h/8QB04NeROxFuoXnBo5HXYdnhu5GIUUeQ9pCEAAo/pM9g7zkvFf8NbvJPAm8Svy5fI49rT7if9TAi8FJQb+BToFIQPrAIX/O/84/xP9Uvhi9DvxQu1q6tvoh+nT7p/1QvqpAEoKhxHvFUQZkxvPHUQfdB6zG0YXBhI6DfEFIP1S96zzuvDm7pPude8B8DHwHvLe9Er3kPsvAQcFCwclCIkHEQUNAuP/iv76/eX9jPyV+PTz/e847BjpsOe46i/x7fZb/IoEOA0kE0YX7xnSG4EeRSBcHswZVhWgEMwJbwHX+bP0nPG272zuJO5k7+PwafFe8pz1kfoW/3ECwAU3CEkIpQYxBGwBF/+W/az9bf2J+6b4/fTn70frFOl16Avr7PAW+H3+Owb6DZMTjhiyGk4cKx8JIAkfhhwtFh8OPQgMATr4hvK27w3voe4C7gzvSPDa8N/yMPby+W7/cgTbBgsIigfkBXkEQwG+/fn8fP02/Tn8AfnC9C3xU+xz6fvo2em88Hv5ef48BTgOnhTGGDwbVBz2Hrsgoh5vG+8Veg5fCOEAovdw8dnuGe6c7UXt4u6I8DrxevPF9tv54f4qBLAGAQilB+sF2AQAAlP+9P3//ff8wvyi+rH2WvOP7hPrJuqo6RHtKvb7/c0CEgvyEgwX7xmdG4Mdoh/jHk0cQRguEWoJQQNd+/Hy6e+474LugO2g7r/w3vFA8i70P/jR/PgATgT2BaAFCwUMBRkD4/4H/pkAtABf/nT8o/mm9Xzxmuwd6/zqw+rj8XT7QgB7BngQ2BWdF6gathz5HWIeVh3eGSgUAQ2MBeH+Hfdm8XzwK/Av7ifu0u8o8NjwkPIk9ZD5HP4gAW4DcATxAzwEXgSfAV0ArgKdA34B8/6S/Pr4HPTW7pbrPeqY6pnt/vW3/vwCyArJE1UXXRjsGvAcIR2kHG0aKhaoDyEIoAGV+071tPG98bvw3+6m8M3yKvLy8bzzHfe6+j/9vP9oAo4DKQMrA4wCsgAFAVgDxQOfAQ7/Yvws+FTy9uwa64zryesj8Ez5lQDcBR4N/BOTF2wZAxtPHCAddBuiF7oTdg5sB38AEvtQ9jHzu/Eg8ODvKfHy8BHwDfEO82j1a/iW+7D+0wGmAwUEKwRPBIAEUgU6BpcFDQOd/1z75vWU757rjuoo6kDsDfJL+k0BIAf+DY4UvRg7GvEaqBxLHhscvBa/Ee4MOQY9//D5Cfaa85bx7O8O8EXwQu7V7LbutfEN9LL2UfrN/mcC0AOMBMsFKQfCBwEIMAjrBhwEpwAv/F722PBf7drrSOxn7xn1aPz0AqcHKgysELkTCBWLFWsWdBffFsUTTw/GCmEGSwJK/tX6oPgc9+31SfXy8zHxMe9777Lwl/Gn8kj1lvkT/dH+TQBRAnAEigZgCCIJVwjcBh4FygFk/f/48/UZ9TH1ufY7+of+rwGoBLkHcglwCkoKxwnGCm8MHQymCjcJwweUBrwEGwKUAGQATf/5/TP9kPtu+DH1/vLA8efwifAj8iH1F/ge+9H9sP+CAQAElQb1B7oHNQfIBggFOwIR/yf8Ofqg+c76W/w8/pMB3ARABjAGNAZcBdADKAQWBREFWgXcBfEFbwWJA/ABbwJrAjsBmAGuAvQARf0f+iT3QPQW8kDxhfJE9VD4EPtQ/bj+q/+BASUDsANKBD0FAwaZBZ8DPAFz/yT+Sv20/d3+fgAtA38FLQVTA6UC8QGkAPX/BwDqABEC1gIPA8MChAIjAzgEWgS6A3cEHAXqAqP/zfyT+WX2bfTA83b0cfYF+d37LP48/4IAcQIGA/ICvAN7BJoEaASTA54Bvf/Z/g3/j/+j//wA0AL6AoABav+Y/Wz8HfyC/Mr93//fAc8CHQNfA84DiQR/BQIGkgZxByIGngJM/2P8Bvmb9nr1bfV/9/35g/vX/Aj+CP9ZAIQB8gHJAk8EFwWnBF8DygGBAKr/Wf/X/80AHAKlA3oEfQMQAVn+kPtU+dX3afde+L767v2eADMCrwJSA/wEeQbYBt0HtgniCc8HYgQbADT8lfkA+Pn3pPn1+9P9a/6e/b38fvw7/LX8jf4XAYQDFAUoBd4DkALpAasBtwHzAicFpwbCBvcEOgFw/JP3oPOa8TPx8vHY8wb3Lvsy/x4CpQQHCKMLOw43D1APVQ8+DmMKtQTT/2L8o/mq9yH3BPgL+Rf5hfhL+Dn4Nvhy+UT8xf8eAyAGkwioCQgJuAfqBtgG/AYLB3wG+ATAAuX+Q/mV86vvru247B/s9uzF8Ef26fteAcwGBwxdEFsTjxQHFEcT3BFqDkMJ4QMb/336Dfca9Vj0gPTQ9G31LfZZ9kb2d/cD++f/SATLB+EKXA3uDSgMgAmfB90G5AXLAzwBzP5d+z/2uvBr7OrpAumF6ZfrkPBJ+CcAlQboC24QEBQgFvAVJxUGFb0TBxDHClkEe/0F+CX0rvH58MzxufPc9Q33f/a89X73y/uUAP0Ejgk4DqIRghHMDcoJegdmBaAC3v+I/ev6t/bV8EHri+en5c/l3Och7ZT2MgGoCbwPfBR8FyYYhxYyFG0TChMgEbsMdwbQ/0v5pPOF7zbuu++Q8p71/vcN+cb4k/jA+nD/WgRjCPQMmBHCEvMPdgtbB3sEUAHz/Z37Afo+97jyKu036PvkAuR/5c3povJH/oYJ6xHMF/oaDxuSGJ4UBhLsEO4O/wrGBf7/RfqU9MTv/+0G8InzW/et+mP8nfzT+6T7eP26AGkEKwkfDiwQhw7ICs8GXQPX/6z8hvsy/Ir7vfcm8g7tjujh5YXlrOYh7SX56QS4Df4UUhpSHGkawxXpEUMQXQ6jCjsGPgHS+7r2H/JE7ynwnfNr91z7mP5wALkAov8R/ywA+AEuBLwH4Qo8C2kJOAZvAvf+G/zW+l37Afyc+nT3QvNF7gTqROeU547rs/SVAMgJmxE3GEobRxqJFVsQiQ6XDSwKgQWXAR3+JfqP9eHxYfIU9sH5e/3IAEsCSAKTAbgANABJAAUBUQPbBc0FugPQAWMAmf4s/N76GPz1/UD96Pk99kDy9e2R6qLo8umi8XX9hwffDoYUXRiPGVQWNxDFDFYMfAp9Bv4BFf4Y+yT4AvWC9IH3aftU/w0DsQQBBG4C1wC//8v+E/5t/2cCQgNlAcj/Qv90/jP9svwx/psAGgGG/r/6L/Zp8GfrX+gB6ODsSPdNAoQK4hDFFTEY0RbbEW0NDAwiCygIIQSbAHr9OvoK95D1+/Za+sT+DgNNBfYElQMRAr3/Av3M+6n8l/4cAJQAeQDp/7j+s/0S/n3/FwFKAsIBb/4T+avy1ewG6dzmlehz8SL99gXhDKETpBiKGasVkBBYDtwMXwlsBRoC3/6d+0T49vUw9ov4O/zjAGwEhgUyBeoDhgEF/kz7Kftj/Pv8Ef0A/rD/HQDa/kP+zf+gARoCsQErALX8FffB8PXrouj+5sjrZ/YEAHAHXg9nFlIZOBcBEsQOdw1xCu8GIQWJAwEB0v1v+kT4B/iW+SP9CgGdAloDCwRcAhL+HPqB+S/7E/yH/Kf+rwGKAvAAZ/9B/7H/0P+m/3r+ivsg9yHydO2K6Z/nUuvg9Av/QgfsD3IXVxr5GIwU3Q/sDIEJowXGA+AB0/63/Df7mPmt+TL7wf2SAeEDugNZAy8CRP96/Jb6Qvmo+fn7Bf6o//YAHAH5AP0ASwCd/0//m/3i+r33jfJ17Y7qfuiS6r3zNf6MBowQ0RiFG0kbLBddEWMNHwjsAnMB3P+T/Pj6MPrx+Fv5Kfvw/TQCTwUrBoYGNQXEAWD+fPvG+Iz3Z/hM+tD8tP74/mb/lQCTAOX/af++/XX7y/jc88LuCuw76qDqbPGo+7EEtw1bFvwbXx06Gp8USA+oCeYDnwDy/mv8dvo4+iH6Bfog++X9BAKPBRQHhwf2Bm4EBQG1/Cj4LfbC9rr3yfhX+hz8gP4+AFIAGQCU/4L9CfsL+DbzsO6b7F7rmey48xr9igV5DkEX5RwrHr0bBxfNEb0KwgLP/cz6Bvid9vn2N/gB+mP8vP+hA3cG/AfmCJYICQZ+AjYAVP1x+Mf1DfYf9mX24/eP+pL99v6f//cA/f9g/O753/a78evuhO7+7fHxufrUAjsLDxRkGm0d2hvxFtYRdQvlA6f+kfvA+O32Efc3+CT53fp3/uYCsQbvCLYJvgkICO4D6/+5/FP4OvWL9QL2T/Yb+Gb67vzL/h3/zP+2//j8RPqJ96/y3u9/757uRPFU+cECtguQE/EZaB57HsEZPBOWDNoE//3a+d721fQ59Sj3TvmO+3L+JQPpB8cJuQkSCiIJUwUqAM/7SvlF9m7ztvNP9N70g/de+nT8WP4DAAsBAQAg/Dn4DvVq8T7vtO6b8IH4xAJhCh0SSBk6HbEehRuOFHsOOQj7AMH71/fO9MT0CvZX91j66P3WAQQHawq2CnoKHgq4B3ECN/zU9+/1LvSS8Svxa/PW9Ur49frz/OT+ngBsADH+s/oc9qjyCvHF7pDvo/fRAboJShLiGaAdEx/VG0QV7w76B34Bmfza9yT0Q/Tz9R334Plg/vcC/gaACT0K1QmjCK8GjgIn/ZL5Qve29BrySfBg8V30Lfd8+rH94/+XAe0AZv16+RL1nPHU78buzvLq+wEE3gsXFQ8bbR3EHR4aTBSFDtgHjQFa/BD3OfS19Fv1CPf7+qX/bQSbBykISwgHCFAGDAS2AM38nfqe+HL1FvPw8eTx1fOD9v340PymAC0CUwFF/gb6pfXB8eLutO1u8WL6RwPeCjkTzRr2HgAfGBsLFj4RyQqmA4D9H/gG9bD0zvS39YD45vxLAgsG3QZsB0YIkwdWBakBU/2D+q/43/WF8vfwd/JA9Zj3vvna/LsAiALiANX9S/rq9QXypu4I7S3xefmwAV4KZBNsGlEfLSDkG4AXKhN4DFUFY/5r+O700PNV8wf0C/dt+2sANgSOBT0GWgdIB4sF6gJEAPv9Kft+973z//Cc8EryrvTV9178TQFLBKwDygCb/dr4UvPL7iPs9e7J9jD+5AW2D0AYnR30HvwbIhjfFBcPngckAdD77/dj9VvzjPJX9E743fxvAJEClgRFB/cHdwWFAr0At/5X+9r3l/V99M7zKPQd9rT42fvu/7QCHQIKAL79fPmz83zu6+tq7m31/vy0BAEPFBkSH14g4h2TGQMWmBA8CLkB0fz/+Ln2SPQj82X1/Phy/FP/tgDkAo0FYgWsAyICXgAY/yf9P/mN9ULzffJW82L0cPZN+4YA5ALkASL/V/wM+Dbyse1V7KTw9fiWAKAI8xHkGa0e+h56G9cXlhR7D/oINAKf/Cb5Vvbh8xXz0/QU+e39ygD+ARUDgQTTBEMClP/g/2r/dPyY+Zv2S/QN8wPyZvPq9sz6RgCXBAwElwG1/gT5yPKE7enq0u/x99X+TgfaEJsYKh3pHNcZbhdKFG0P+wlWBDL/fPtP+HL18PPk9JX4hvxn/sf/KQKxAyIDLgGh/9//qf/Z/Kv5H/eD9PTy2PLr8+n2uPvTAGEEVgTQAbX+lPkJ88HtP+yy8PT3e/61BicQOhceG28bthnvF+cUSRAWC6kFrgDo++73WPXJ8+f0yfhB/D3+2v+ZAd0CGwLz/xb/y/9j/0f90fpj+A/2uPNS8lzzVPaq+vX/twNjBAcDvP8j+sfzce777DfxFfhV/vMFLQ+JFkUathmHF78WXxQBD2EKIwZSATz9Ivnz9cT0FPXu97z7iv1Z/zACjQMaA8ABQgD1/3//l/wf+V72x/Nh8mDyh/MI9yf8zQDeAw4EOwI//1D6bfQs8D7wdfSK+V3/KQeiDkAUMBcFF9sVthQsElUO8gmWBY8Bfv0A+ob3MvY59wD6SPzN/Vr/JQEmApsBmABkAHMAXv81/Uz6FPdg9Gby/fHG8zH3SPzFAZUEtwTzAqn+Tfl28/XuOfAd9RH6OwGGCQoQBxVtFgkVQxSoEtIPPg0aCoIG1QJ4/pn6p/eT9XD2ZfmJ+6T9CADzAQ0DKQKoAH8AEwCg/rr8iflq9h30ffIx8vTyzvUL+///dQIBA4EBO/7i+XP0xPEy9Cn4Df1CBDIL0RAZFC8UmhOJEkcQXw5bDIoJywZzA5//f/yA+dn33vgR+k37Pv1t/gsAnAH1ACcAjwDOAP7/Lv1G+UL2sPNQ8W/wvfFv9W76f/4/AdoB/v8i/eL4tPRd9E/3k/uMASsIPQ51EhsTHRIrEQoPhAwGCzAKXwmCB2oEEgG+/YH6k/hP+OH4lfru/Kv+UACeAZ0BQwHaACkAD/+E/C35OfZe84XxfPHj8lH2uPpg/ioBsAGz/0386/eK9ZL21vjR/DIDoAnWDoIRaREZEWkQ9w3pC0ELpQqgCX4HaARQAQL+D/tV+V34iPj7+Zr7cf15/7gAdgHaAZkBJAGW/7X8Yvnp9VnzM/In8urza/dC+33+vv8d/7T9dPqQ9oT1TveH+jr/ygS9Cp4PHBHQEBMRYhBIDp0Mwwv8ClAJbwZZA0oAHf3g+mH5VPh9+Kf5Nvsl/fb+cgD7ARgDSwNzAhkApPwK+YL1mPJl8ejx4vOC94D7N/4j/6P+rPyM+Sn3Jvfn+Er8eQEIB9UL4g7mDz0QGxCODssMJQzhC7gKxwhYBmwDRgCf/Yb7evla+Kr4nfmk+vT7qP2O/xoBCwKiAigCMQB3/R/6s/ZX9KHzZvRQ9iX5NPwD/jr+a/0d+wP4mfai9+75Uv3PASkHOwyHDpcOJQ91D/4NbAzPC5ILxQrcCHcGygO4AE7+NfzN+aH4rPgJ+dj57fpV/E/+/P8GAaABRAHZ/239d/rM9+z1RfUP9qz37fkQ/NL8nfyG+xj5Efcg92j48vr4/p0Dlgh3DLwNQQ4rD98OhQ2KDFAMGQzeCvoIDgduBIIBRP/v/J36RPm0+PH4jPkb+oL7d/3E/q7/MwDK/2T+zvsR+T73Bfav9Qb3SPmx+2393/23/Wz81/kg+DX4evnz+zb/OgONBx4K7wrtCwoNEA1ZDFUMMA1WDRgMTwo2CI4FugLr/0f9RfvZ+UT5h/mW+ej5UfvB/If90f2//Zv9ivxN+qD4wfcq95/3Nflz+8z9G/+c/4z/zP1P+wX61/mP+k/82P5QAn8F3AZ8B6wIigl8CY4JlQr4C50MQAxQC94JngfxBGcC5f9k/Y770/qN+vv5oPkv+hT7aftB+0T7RPuG+kn5V/gj+Jf4kvlj+6/9Mv/t/3UA/f8W/hT8HvtJ+1D8uf2h/0YCXQTYBOsEmgU2Bj4GqQZGCA8KxgrhCrQKogm+B5YFjwPDAeH/Xf7X/Wz9hfza+6P7Y/u9+tP5LPmg+M73G/cA95b3yviQ+sj8yP4jABIBkwErAaD/4v0u/Wb95P2u/gIAsAHJAtUC2wJUA2wDXANABMIFBQf2B5wI4giVCJgHUgYLBYoD+wHtAGIAuf/q/lj+4P0f/QL8zfrY+df4iPfD9hL31/fq+NX6J/3S/tT/vgBCAXsAuf5+/RL9Ev2Y/ZT+6v84AdgB7QHQAY0BcQGMAQUCZAMoBUwG/QamBw4I7AcXB/MF7gS7A5sCBwJdAYIA6P8h/yb+E/2D+xz6Tvk++G33tveW+NX5ZvvM/Pv9/f6w/xAAs//J/ij+8/3m/Rf+wP6x/1AAvABZAXIB7ADQADsBqgEzAicDggSuBW4GIgeUB2IHvQasBYAEpgPAAtIBVAHJAN7/6/7j/av8Tfv++T358/jV+Ej5VPpw+1P8+/yz/V/+eP4+/gj+tf2m/c/9xf0X/v3+sf9VAEkBvwFyATEBPgE5ASABggGjAs0DwQTvBfgGbAeKBzoHdAabBb0E4wP1AtUBzgDM/4P+Tv1Q/E77fPoS+vH54PkK+rX6W/vD+2X8DP14/db94v3k/Uj+bP5E/kb+RP6w/sf/iQDiAEoBXwEsAekAkQCZABYBxgHyAkQEHQXDBWgGtwZ3BrIFAwWMBI8DUAKPAdQAvf/F/hL+c/3I/Dz8G/z1+3z7OPsw+yf7TPuK+/H7lfwy/fP9w/4K/wH//f63/o3+Ef/j/4sAIAHDARACsQEkAbUAKwDu/34AgAGBAnQDfASLBR0G9AWWBTQFdwR6A3YCkAHoAGAA1P9f//H+e/71/Uv9h/yq+8f6T/pE+mX62/qW+3/8u/2u/vv+MP8m/5H+KP5V/tr+gP9UAGIBIALwAVQB8ACBAM//if9JAHoBcgKgAwYF4wUYBusFbgWpBLYD0gImArkBiAFmATgBCwGJAH7/Wv4x/cf7ZfqH+U35h/n5+aT6wvsg/Un+Kf/F/9z/j/85/+z+xv4Q//b/KgEKAmUCPAKGAaYAzf/f/nb+BP8eAHkB3AIEBBAFuwWKBdIEBwRXA+QCjQJfAoYCjQI5AroBvgAy/6X9S/wT+xD6b/mB+Sb61vqY+8b8PP5l/87/t/+s/3j/1/5V/nb+K/9HAG4BNAJzAhwCQgEmAOj+4/2O/Rb+Zf/wAFICxwMLBYUFUwW0BPwDpAOOA2oDUQNVAz0DtQKKAQgAfv72/Iv7cfq8+XD5p/l1+oT7gPyV/b3+hf/X/+//wf9J/+r+4v4O/6P/zgDvAYgCxgJ4AjEBff9I/nf96/xb/d/+mAAgAoEDiAT+BOMEhARdBGYEQAT8A+IDwAMeA/MBqgBy/wD+iPyS+/r6evpd+s36jvts/FD9M/4A/5f/8P8CANX/j/9R/zz/dP/b/3cAWwERAh4CiwF9AEH/NP5W/dv8Qv11/s3/CQEmAhcDxgM4BIUEswS2BLIErARRBKMD2gLpAdoAtf9v/mb9pvz9+677qfu++yj8yfxG/bv9Uf4J/5H/p/+g/7L/wf/U/9L/3f9gABsBVQEGAYoAyf+w/qb9GP3w/B39+P1v/8wA0AHVAusDtwQCBRAFOQU5BcMEGQRiA5kCuAHAANr/HP9g/rf9VP36/H/8TPx6/Lr8/vxn/Qn+uf45/6D/CgBzALgAvQC4AMsA0QDEAKcAUgDQ/xD/Bv4G/Vn88/vy+3z8if3o/k0AvwFDA28EIAWaBbMFSAWWBOMDLwNrAsEBWwHzAFUAuv8l/3z+zv0q/an8aPxu/LX8Kv29/X7+Wf8QAJgAAQE7AToBHAHwAKMATgAWANP/O/9Q/mf9lfzF+z77M/ua+4/8CP60/2EB6wJJBGcF4wXUBYQF8gRPBKcD+QKPAlYCAAKNAeoAIAA+/0H+Tf19/O/71/sz/L/8e/1s/m//WQDvAEIBfQFyARsBrAAiAJf/P//t/nf+4f0z/YL8wPsP+9P6G/vp+2n9N//qAKsCSQRMBakFogVtBf4EWQTuA8QDiwNeAyYDjQK0AawAXP8O/vb8Ify6+9P7WPwk/Qb+8f7g/50AGgFoAXkBTgHmAEsAsP8Y/4X+Pv4R/pT98Pxv/Ob7Pvvg+i379vv5/Gb+LQDYAScDIwTcBEAFPQURBf4E3gS3BLAEjgQkBHcDeAIuAbb/Sv4w/W/8/fsD/Hf8LP0W/gD/zv+OAAABDAEBAdwAewALAJb/Ff+j/iz+vf1k/er8ZvwK/Lf7gPuf+xf86fwB/i7/bwC1AbMCYAPoA2gE1AQJBSkFUwVJBfIEfwTMA6wCYAEwACf/OP59/T79Yf2f/QL+kP44/9b/LwBcAJMAnwB8AFgADQCf/zH/vf4+/p/9//yR/Cz81Pu5+7D7vPsf/Mj8hf1X/k//dQCCAU8CFQPZA3YEAAVnBYwFggVQBfEEPwQpAwYCGAEoAEP/r/56/ob+rf7c/iD/Wv93/5b/ov+G/2//bv9s/0v/+/6p/mb+8/1R/b38UfwO/Oz78vsr/Hn81/xU/df9VP7w/r//oQBuASkC9AK5A0oEyQQ6BXIFawUxBa8E3gPVAuQBLQF9AO7/uf+v/6T/nP+E/1//Nv/7/r/+kv5p/mP+dP50/mv+V/41/hv+6P2e/Xf9U/0g/Qn9Dv0f/Ub9hP3d/T7+m/4o/9r/bgD3AIkBDAJ/Au8CVwO8Aw0ELwQkBNYDQgOXAv0BgAElAeYAxwDEAMEAogBiAA4Auv9e/+7+jf5P/jb+Nf5K/mf+c/6A/qD+o/5z/jf+Dv7i/Z39V/1F/W79tf0Y/pP+Ff+c/x4AewChALoA9QBKAZ4B8QFcAuACRgNdAzED3AJ1AhUCtgFbARYB6gDRALAAYwARAOL/vv+B/yb/zf6c/oP+c/51/o7+zf4i/1H/TP8y/w3/1/6F/iH+2v3I/ev9Nv6N/vf+fv8CAFgAeQB8AHoAgQCVAL0ACgGDAQMCYAKGAnwCVwIgAtABeAEqAfMAzwCtAIkAbwBfAFAAMgD2/6z/cP8+/xP/BP8Y/zL/UP91/4D/ZP80//P+l/44/vf9zf29/eH9N/6b/gP/bv/C//b/GAAnACkAQwCAAMcAFwF0AccB/AEWAg0C2wGbAVcB/QCfAF4AOgAtAEEAYwB3AHkAawA9APf/uP+U/5D/qP/R//j/GAAjAAgAwv9q/xL/xv6H/lv+TP5f/oP+ov7D/un+Ff9B/2X/e/+S/77/8/8vAHsA1wAvAXgBoAGkAZIBeQFOARIB3gCzAI0AdQBzAHgAdQBsAFcALAD//+f/5P/2/xYARgBvAIIAeABPABEAx/9+/z//Ef/w/tf+x/7A/rj+q/6m/rP+yv7j/gb/Of92/7f/BABdALcACwFOAXcBcgFJAREB3ACpAIEAbgBuAHcAfgB7AGYARAAcAPP/0P++/8v/+f8/AH8ArgDFAMEAmwBUAAAAtP99/1b/OP8h/wn/7v7Q/rD+k/6H/pb+uv7n/hf/Uv+Z/+b/NACHANwAJAFOAUgBHgHkAKgAcQBNAEcAYACAAJYAkgB3AFUAMAAKAOn/2v/s/xwAVwCQAMAA6gD/APEAvwB2ACwA5f+j/2X/MP8O//f+3/7D/qz+o/6j/qr+uv7Z/gv/Uv+n/wEAXQC0APQAEAEJAesAvQCKAF0AQgBAAE4AYgBuAHAAZgBUADcAHAAMABEAKQBIAGoAkAC3ANUA4QDaAMAAmwBhABYAvv9t/yr/9v7M/rP+qf6q/rf+yv7j/v7+IP9L/33/sf/x/zsAgwC8ANsA4wDXALYAhgBYADIAGgAQABEAGAAdACIAJQAiABsAHwA4AFsAfwCiAMQA3gDqAOcA2QDBAJ4AawAoANj/h/88//n+xP6j/pj+ov65/tf++/4k/0//d/+a/77/5/8ZAE4AgQCtANAA4gDbALsAjABYACcA+v/a/8r/zf/f//L/BwAgAEIAaACKAKkAxADaAOcA5gDcAM4AvACfAHMANwDv/57/Tf8C/8X+nf6O/pX+sf7X/gH/Lf9V/3v/m/+9/+D/CgA3AGQAjgCvAMUAywC9AJkAZwAtAPv/2f/M/87/4f8EAC8AXQCFAKoAywDnAPUA8gDkANQAwwCuAJIAbwBGABUA1/+O/0X/A//O/qn+mf6h/r3+6P4Y/0j/cv+Y/7j/0//s/wUAIwBFAGYAggCVAJsAjwB0AEwAHQD0/9j/zP/R/+j/CgA3AGUAjgCzAM8A4ADnAOQA2ADGAK8AkwB0AE0AIADv/7b/ef88/wH/0f6s/pf+lf6l/sb+9f4t/2r/ov/S//j/FQApADsATQBdAG4AdgB0AGUASQAnAAcA7//l/+r/AQAhAEQAagCMAK4AzwDsAAMBEgEUAQYB6ADCAJkAcABMACwADgDs/8f/mP9l/zD/AP/Y/r7+tv6+/tn+A/82/27/o//Q//j/FwAtADwARwBJAEQANwAiAAYA6v/V/8n/xv/S/+f/AgAdAD0AXwCEAKsA0QDwAAQBCAH+AOMAwACXAG0ARgAhAP//3P+5/5H/aP8//xj/9/7e/tb+3v73/h7/UP+J/8X///80AFwAdQB8AHUAYQBGACsAEgAAAPP/6v/p/+z/9f8CABQAKQBDAGMAgwClAMcA4gDzAPgA6wDSAK8AiQBhADoAFADs/8L/lv9s/0L/Hf/+/ub+1P7L/s3+2v71/iD/Vv+S/9L/CAAyAEkATQBCAC8AGAAEAPb/8v/0//v/AgAMABYAIwAzAEgAYgCAAKEAvwDdAPQAAwEFAQAB8ADZALsAlwBtAD4ADQDa/6f/ev9T/zb/IP8T/wz/D/8W/yf/Q/9s/5v/z/8CACsARABLAEIAKgAOAPP/3//U/9D/0f/T/9b/2f/e/+v/AAAfAEUAbgCUALAAxADKAMkAwgC4AKoAmQCCAGUAQAATAOD/qv95/03/K/8U/wn/B/8O/xv/MP9L/23/lf/D/+7/FQAxAEAAQQA4ACkAGwAQAAkACAALAA0ADQAMAAwADwAdADMAUAByAJIArQDAAMYAxQC8ALAAogCTAIMAbQBUADMADQDk/7j/jf9l/0T/K/8b/xX/Gf8m/zz/Wf97/5//w//g//j/BgAHAAIA+P/w/+v/6//x//f/+v/5//T/7//t//X/CAArAFIAfQCjAL8AzgDRAM4AxQC/ALgArwCjAI4AdABQACgA/v/S/6f/gf9e/0H/Kv8h/yT/NP9R/3j/oP/E/+H/9f8BAAMABAABAAIAAgAGAAoACAACAPj/6P/Y/9D/z//d//f/FwA5AFgAbwB/AIgAjwCVAJoAnACYAIwAdgBWADIADADp/8r/sf+b/4j/cv9d/0r/Pv8//0//a/+Q/7P/0v/m/+z/6//l/+L/6P/0/wYAGQAoACoAJAATAAIA9v/y//f/CwAkAEAAWQBsAHgAfwCHAJAAmgCkAKgApACUAHoAWgA4ABoAAADs/9r/xf+t/5H/eP9k/1r/Xf9u/4f/ov+5/8b/yv/F/73/uP++/83/4//+/xQAHgAeABIAAQDz/+r/7f/8/xIAKQA+AE0AVwBeAGYAcAB9AIoAkQCRAIYAcQBUADYAGwAIAPv/8//o/9n/w/+o/47/fP91/3r/jP+l/7z/zf/T/9D/x/+//77/x//X/+z/AgAQABYAEwAKAAIA+//8/wMAEAAgACwAOABAAEMARwBMAFIAWwBkAGgAZgBaAEgANAAhABIADAAKAAcA/v/v/9X/uP+Z/4T/ev98/4r/m/+q/7D/sf+t/6j/pv+r/7n/y//f//L/AAALABIAGAAdACMAKgAzADsAQQBFAEYASwBQAFYAXQBlAGkAaABkAFoASwA8ADEAJwAhAB4AGgAUAAcA9f/g/8v/tv+o/6D/n/+k/6z/sv+2/7X/r/+q/6b/q/+2/8n/3f/u//r///8DAAcADwAZACoAOgBCAEMAOgAtACEAGgAbACEAKQAzADkAOwA5ADAAKQAiABsAGAAaABsAGwAbABgAEAACAPP/5P/V/8z/w//A/8D/w//G/8f/yf/K/8r/zP/Q/9X/3v/p//b/AgAOABoAJQAuADMAMwAwACoAIgAaABQAEwATABYAGQAdAB4AGwAVAA8ABgAAAP7//v8BAAUACgAMAAoABQD7//D/5f/a/9L/zP/J/8j/yv/J/8n/yv/M/87/0v/V/9r/5P/w////EAAkADYARgBQAFQAUQBIAD0ALwAjABgAEwAPAA4AEgAVABYAFQAUAA8ACAAEAAIABAAKABMAGQAaABcADQD+/+7/3//S/8r/xf/C/8H/vv+9/7v/u/++/8P/yv/R/9v/5P/z/wUAGAAtAEAAUABYAFcATAA8ACgAFQAIAAIABQAOABoAJAAqACkAJQAfABgAGAAcACYAMAA4AD4AOgAwACIAEAD8/+v/3f/T/8n/w/+9/7f/tP+w/7D/sf+1/7v/w//M/9b/4v/y/wMAFgAoADcAQQBBADgAJwATAP//7P/j/+D/4//q/+//8//z//H/8P/x//b/AQAQAB4ALgA4AD4APgA4AC0AHgAQAAQA+v/w/+n/4//c/9T/zf/H/8P/w//H/83/2v/r//7/EwAnADgARABMAEoAQwA4ACcAFgAHAPz/9f/0//n/AAAJABMAFwAbABwAHQAdACIAKQAyADoAQQBCAD4AMwAkABIAAADv/+D/1f/M/8X/vv+4/7H/qv+l/6P/pP+s/7v/zv/j//r/EAAjADIAPAA+AD0ANAAoABcABgD1/+j/4f/g/+P/6v/z//n//v8CAAYADQAVACEALgA5AEAAQQA/ADcAKwAfABQACAAAAPb/6//g/9L/xP+1/6z/pv+p/7L/wP/U/+r//v8QACAAKgAxADYANgA1ADAAJAATAP7/7f/Z/83/yP/K/9X/5f/0/wIAEAAZABwAIgArACsALgAzADUAMQAsACwAJAAdABYABgD8//r/5P/G/77/uf+g/5z/sf+9/8v/6P/v/+3/AgALAAwAKQA6ACcAHQARAPL/3f/S/8n/xf/E/7j/uP/M/97/7/8LACEAMQA6AD0ARwBUAFkAWgBkAHEAfACPAJoAkwB9AFAAHQD8/93/tv+Y/4T/a/9b/2v/hv+e/7P/xv/O/9r/3//i//L/BgAWACYAPQA9ACUAFgAKAPj/9P/0/+j/9/8eAB4AOQBHACkADwASAC0AOgBQAFYAQQBSAFcAPgBNAEYAFADs/9n/0//b/9f/2//R/7j/nP+Y/9n/8v/X/9X/3v/W/9z/6v8RACgAHwASAAMA9f/G/7j/y/+6/5z/mP+j/8b/4P/g/+X/8f/t/8n/x//l/wMAIwA6AEsAYQB6AIMAYwBLAB0A+v/g/8D/7v/W/5v/of+Z/5D/c/9d/5H/sf/j/wcA5v8fADAAIgAuAA4AFAAuAD8AUwBYAH0AjwA3AEAAVgBKAFkAFQAsAC0A5f9DAHQAjwClAJkAnAAoAAUAEwAwAD8AXQCyAE0AnP9w/6P/fv90/6//vP+c/6j/wf/e//b/BQBCADsA7v/E/yQAKADD/4//uP+n/zr/Zv+k/w8ADgDR/wMAw//B/xgAKQD//7f/HgChAJUAkgCLAKkAkwDh/5n/fP89/zH/XP/Q/9v/8/8YAKj/X/+f/9r/uv/F/8v/KgD1AKoA1f+B/0v/J/+p/yoA5f+b/37/Hv+y/o7+CP+x/+T/FADcAFQBBQHnAKwAaQCVAAkBFgEoAWEBhwHvAb8BVAH3AAsB0wApAAsA+P/s/+7/BQBNAFMAEwDq/4D/IP/7/vb+rv4W/p79ZP0n/Vf9r/2s/cD9g/1t/V/9nv0b/lH+/v7Z/98AJgLjAowDQgSnBMUESgQDBMEDfAOdA14DTgNKA8wCNgKNAeUA6v8H/3j+2f2A/Z39Mv5u/jv+U/6E/pP++/0+/ej8pPxo/Br8Afz2+wn8J/yU/I/8T/x6/JH8Uv1X/sP/jwFmA9kEwwXIBjAH8wYfBwwHjAZDBs0FUQWJBJoDbQInARkAy/6b/Zb81vu6+8b7l/ur+2j8Qv1x/aX9WP4S/3r/QP/s/rL+Zv4N/nj94/xM/Mv7afvt+pj6y/pz+wb85/yt/rcAfwI/BAgGngetCDAJcgmYCWgJMwk0CWcIAgcgBiAFYgNeAXX/7P3a/Hf7yvmn+Yr6SPoQ+qr6CPth+/b7f/y+/Fb9Xv4T/y7/+v7M/gL/4/56/TT8bfuC+vT5ZvrJ+mL7nf0CAHwBOQOyBY4H6wi5CeoJPgqzCtYKwAqqCicKVAlYCF0GcgMTAe7+kvws+mv4h/dX94T3aPfB97P4f/nq+Xb6N/sj/Fz9mf59/zMA6wAuAaUA4/8A/6/9DPyP+rX58fmB+s76j/xS/5QBkAO5BWoHnwjWCaQKlwq6ClkLpwvdC04LQwpLCXAHLwTzAGz+zPs3+Yj3b/Z69Yj1DfZQ9pb2ZPeX+LH5lPps+9f8c/6A/0QAEgFLAfQAvAAkAKv+sPwn+2T6Lvof+nn6sPzw/6MCsgQiB7QJJgvXC3IMwQyYDMcM5gxoDGsLUwpZCXIH6wNDAMP99fo/92T0KvOh8pDyOvMM9C71x/bV+F76Q/uB/Bj+zf+uAEEBKAK6AiECVgHlANT/l/3F+wL7Evql+Xn6r/wz/9YBggQpB64JbAteDPgMQg0bDVENPA1LDPoK6QmNCAUGkALp/sf7aPj99Gry7fCG8NPwyvEa8/n0VPeJ+Uv77/xt/v7/cwErAvUCegNxA5wCuAHsAE7/6/z6+hL6pvlT+Zv5xfuv/sIBVwTjBmwJkgtLDRIOCg4WDroOnA5+DbgLLwpoCMoFugFY/RP6HPfU87zwCO/Y7rrvtfDQ8cfzmfaX+QX8jv0W/20BgwOQBPQEWAW8BVYFugPDAQgApv0X+8H5S/l++MP4/fqZ/bT/CAIpBQQIIwqtCykNUQ4JD5EPxw/5DkYNVgv8CKEFUQFU/cL5afYQ81bw4O5v7sjux+918Xfz7PWQ+Db7if3V/zYCLASUBYIGPgcuB1EGoQTPAtgA+f2/+if58vhZ+Br4iPlz/An/FAGbA7MGFQn2Co4MxA25DpwPTBDWD14ONwz9Cd4GdQKK/Un53/XH8hPwI+637ajuFvBv8WHz2PU++J/66fwB/yEBhQN9BbMGhQe7By8H1wWbA+wALP4F+5H46feQ9wH3avi8+2b+UQANAz4GlwiqCsoMcQ5+D1MQDRETEX0PCA24CoMHqAKo/af53fU18oPvDe5m7dftOe8P8fHy/fSq97f6/vy//mkBQAQrBqgHCgmzCSMJpgeDBcACi//j+334lfbm9Vr1GPaV+IH7Xf7CAdEEKAfdCXoMTw7JD0oRDxJXEp0RYw9wDCsJsQST/8T6DvY38sjvIe607NHsXe438Dvyf/TR9ov5gfyU/o0ABgN4BZIHYwk6CgEKSAmGB3QE/ABy/YP55PYG9jb11PQf96/6jv1ZAHEDhwaKCekLPg3pDp0QYRGVEUkRWw+5DB0KBAaUAL37cPcd8/Xv5e2H7KrsCe6q79DxOvSd9pH5jfyB/ocAcQMDBokHHwmJCvAKSwqCCJYFNAKi/lb6k/a89Cf0tPPs9Ar4cfuh/iQC8wX9CFwLrA3uDz4RyBEkEiYSvxD7DQYLmgccAwz+UPnS9NXwKe5z7HHrCuw67tLwF/OQ9dT4T/wF//8AFwOWBbgHHAlsCgMLrQq8CeIHbARfAKT8o/gp9ZPzA/PH8hT1iPh8+wb/oQMoB/oJGA06D54Q+hGeEuERSxGsD70MggnFBdAAEPzv907ztu/V7VjsmutJ7YLvifF09PD33PqT/QgAEQKBBLEG6Qc4CesKRwtjCikJ0gbsAiX/HvuB9sbzUvO08jLzmPYl+j/9kgGqBUQIMgvvDWUPexBRESsRChFsENwNxgroB6MDdP4A+l71HvGr7gHthOtw7Fvv1fEB9PX2ffqs/ef/RwH6AkoFFQdKCMkJkApICskJNAhZBPr/Ovzd96fzBfIg8m3y0vTR+GD8NADaBGAI4AqADY0P4hDmESkSuhFsEfEP4gytCfIFrgBV+wH3evJ77n7sxOu/66ftgvAQ8yL2oPlX/Jn+2ACRAkgEMQadB/sIxgo6C2sKWAk8BywD7/6S+oH18fFA8Vfxb/Fd9Af5Lf04AbIF3Qi0C+8O0hBbETES3xJyEpoRTA/GC40IwQTw/o35dfVS8Szu0uz16z3sDu/28e/zo/Yg+uv8E//xALcCEQUzB6QINAqvC6ML9QrCCckGTgIN/uT5BvUf8dfvhvBh8Zzz5Pe+/OsA3QSJCJ8LFA7uD1QRIRIQEsIRSxEmD8cLKghTBD3/7Pm89S3yEO+Q7QHu1+4N8HvynfVF+CT6C/yW/uoANQKZAwAGMwirCdoKTwvFCkcJrAaeAsL9A/mQ9Bnxlu/27xnxG/QP+aP9cQHdBfkJSgxhDjEQDxGpEfkRHBG8DwkO9QonBxkDQP5c+a31V/Jq747uHe/Q73bxVfTd9ib5mftU/cX+ogBKAswDsQWAB2MJBAtdC2cK9wguBtAB5vwV+DnzT++F7TDuFvCe8qD3nP1vAl8GhQqSDZ4PGRHsEekRqREvEZ8PXw3qCScGZgL1/fL4D/Uj8urvu+4B7wjwiPFI9DT3ovmW+5r9xv+tARkDnwSQBsIIWgoHC18LGAt6CXwGigIx/gT5I/Rg8D3t2uud7RrxL/RA+R0AmAWdCXcNABC/EdQSghLPESsRsw95DcoLfAjpA4oAGf3198fzb/GU73Puo+6q72jxh/Q692H5C/y0/pEAuAKIBFMF8wZPCRkK6QlJCqoJkQddBFgADfsm9qbxJu487CztUvDE88z5XQAjBhYLLQ9VEVASlBLNEdEQpw/XDTsMQQvGCCQFuAEJ/iH59PTi8Tfvhu3d7XrvJfHb8233ofr9/Cj/2wBOAqQDvgT7BWwH6QgcCtkKIQoICFgFtwFu/GL2tPHx7YPrL+xd7znzbfjl/xcGJAoXDt8QbBHaEaoRYxCPD+8Onw1EDGYKkgYPA53/T/ol9Sryee8x7RLtPO6x72Hy3fVL+OH6s/1w/9sA1AIrBJUF7getCXAKZwu5C/cJLwc/A4L+pPmn9OrvCO257IruM/KQ9hj8pAInCLcLqg5lEA0R2xG8EWAQcA83DxAOuAt9CLUECgHF/G33B/NQ8OHtPuys7Arun+928kj2NvmO+0v+tQBjAv4DbgX5Br0IDQr0CmsLtQrCCA8GKQIA/bP3VPOA71Ttbe1H8GH03fhV/7QFFwpNDQYQ+xAREeAQQxBwD7cOyA2UDNgKeAesA7b/s/o/9UrxN+6u673q7Ov97azwn/SU+Ln7Wv7dAI8C2wMABR4GewcmCUMK2AoYC7MJAQdkAxz/Xfks9BjwLu3d7FzvcvMR+L7+OAXuCZcNGRB8EIQQYxAyDygOxQ0IDekL4AoHCAwEOgB6+5r1LPEc7mrrgurS6+ft/vBi9RX5C/w9/6MB9QIPBAcFOAYKCKkJlAqkC0wMQgvLCEoFlACh+9r2OvK27gjt/+0S8nD2RfotAMwGjApGDCkO7g4YD3APAg/3DbwNng0/DMoJ6gWbAev9Ovlz86Hv8O1K7I3rau0y8MzyjvaX+nf96f9RAhcEpAXEBqcHSQneCvEK2goiCyUJfgXjAfH9pPgP9JbwzO2K7bvw3vRS+Mf9NATZCJgLqw17DiQP0A8nDxQO5A2WDXgM4QpUB+sCeP81+y31wfBm7nXspev07CLv//El9iv6af0GAFECRgS/BYAGSQfvCFYK2QpACysLhgnnBrEDYf93+qv1xvGv7h/tOu408pn2Y/onABAGbwlNCx0N6w0tDo0OiQ4JDsoNag3/C4YJRAUMARr9Fvj48ufvS+4F7XLtwe/l8XH0sPgt/FD+WgDnAvkEbwaAB90I0wrmC6ULXwujCuMHeASCAPT76fYU8wvwsu3C7dPw5vRC+EH9HANvBxsKiAyrDZYOlw+TDwcPyA47DqUMdAp4BsYB3v2i+W300fAa7/ztz+097wnxRfMy9w77j/29/7sC9QQpBmAHgQikCYIKpAoBCjwJnAfZBDwBSv3h+OD0ePEl7gftV+868yv2sfoiATMGugnmDJQOXA+qEPIQ0Q+oDuQNfQyGCv8GdwIq/5P7evZD8j/wku5k7UnuLfAn8rz1KPpz/QkAwAJABdkG4QcyCLwIzAnZCQMJiQhkB88EqgH5/Zv5C/Vq8T/uzezf7qTyM/Zu+zYCAwfMCnYOBhAbEDsRexEFEPQO4Q30C9IJ1gZcAp7+9voq9kDyU/BM7jHtne4q8Nfxi/XY+dT8zP++AvgEpwbgBzEIqAiHCWsJwwgmCOkGqgQAAlP+H/oS9ojyHO8n7Uzu7/G69d/5TQBHBkEKew0pEL4QYhEXEvcQkA+lDnYM2wlhB+sCg/5f+1D3rPJZ8C7v++1U7v3vofGY9KH41vvT/iACAQXtBkUIwAjHCGYJbwkyCBcH8wX7AxABwP3x+f31C/I57hTt6u6m8dn01frTAdUGKgtlDwASsBJxE7YTVhI8EPkNoQsYCUUFSwGY/Wr5CvV88U7vbu3C7PPthO928Zv1+vkJ/T8A0gNpBsQH0gh8CasJmglnCdMIwAfsBRUEoAGx/Yf5yPVJ8mHukuyx7b3wufRs+ff/OgbcCpoOnRGQEukSFhNREnIQXw4mDIEJcQbPAWD9wPkH9uDxCO+w7Tft3e1v73vxzPQB+XT8pP/wAt8FygcdCcQJ4wnUCbgJ6AhgB3sFeQPnAOz8jPiE9Lvw3OyA62DthfBV9KH6dgIiCI0MFBENFHIU1hT9FFQTsxDODj0MrwiLBM7/3ft199fyae+77UbsCuzv7TTwifJh9u36FP7pAPkDrgZhCGkJBwq1CosK1QllCS8IYwWNAlAAg/z49/rzoPBw7bnr5+xv8ED0AvlOAO8GJAvhDrsSSxStFOoU8RMBEtUPuAzpCCUFcgDM+9j37fMW8AruRe3z7BPuUPCK8rP15Pli/U0AfQNYBkUILwo7CyYLFwsdCzAKYQgKBnQD0gA6/bP4G/Sa8Ents+o864DuR/I595v+3AW+ClAPnBMmFYIV0hW2FHASdhAlDREJiAX7ANf7wPes80zvJO2M7CDsbu1Q8OLyM/bh+tz+WgE2BFAHWQldCqAKpgrXCpwKVQmkB4IF2AL8/1/8kvfW8k/v/evD6Szr9e6Q8mn46gB6BzMMUhGyFJEVpBaRFpUU4RLSEOkM/AgIBcj/8vp+9ubxVe6J7FnrlusS7tzwVPMh9777J/8JAh8FdQf8CJIKPQv8CpUKJAqOCRkIHgXvAXX/tPuT9uPxOe7L6izp9OrF7r/yv/gsAYUIVw1eEW0VQBfFF4wXLBYMFMMRFg5ZCVYE1/7F+f70ZfDe7HLr1ept6/LttfBQ8373IPyF/1UCWgXyB8QJ3Ar9CtkKMgoFCeMHEQYnA3AAxf3d+Xv1fvHE7afqzukE7SXxlvRY+5YEIgsyD5ET3BbeF50YRxjMFT0TthDoDJEHtQEz/Iz34/JK7mfrbepo6sjr3u668Wj0D/ky/hMBRAOsBpoJ9AqAC30LDwsJCq0IfAd1BRYCdf9j/Xf5X/RV8Hvtt+rt6bHsNPGi9Z77nwReCx8PiRPTFrUXbhjoGJQWpRNLESgNlgeUAdD7+PZt8vXtU+uX6oLqw+uv7ovxSfQF+Qj+TQFhBB8I0Ao0DJsM9wsJC4kJdgetBZADhgAQ/mP7L/fA8qnvKexx6aHqme6F8g/3Vf/BBzsNSxK3FicYTxmGGkUZchZ7E3APqwpkBWL+ZPgt9Kfvf+v96aLpqelD7OzviPIo9nb7AQCpAxEHnwmQC0YN7wyFC+cKXgnWBkcFVwO4/zL9vvpc9v/xqO6i61fpYuq97qrzqfhl/9EHOw5CEvQVIBg9GVQajhmFFmITpw94CrAEFP689zbzXe/G69rpcung6RvsOO/x8Rj2e/v8/yIEMggIC1AN+Q4yDk4M7wrWCD4GHAR1AWP+e/yK+ef0//AF7snqoejQ6R3uvfNZ+ekAkwnZD4wU1hhhGm8avxoLGvAWZhIoDtoIsAJJ/N31/PBO7S/q6egi6Q7q1ews8dH0g/iu/eYC1QYNCpAM/Q3JDuwNfwsRCWoGxAPoAX//kfyT+nT40/Te8L/tXOp06Ezr8/DW9bb77ARTDVYSwxaoGX4ZAhqMGtgXDBSpENQLDQbf/834HfM572jrQul06bPpReuf73PzD/bK+nwAigRdCNMLXA0/DoMOqwwUCsAH0ASaAjgBg/7M+5z6+ve+82jwX+0q6rjozuqQ8FH3Sf1BBR8OUxNGFisZshkkGd4YOBfxEyQQiQt+Ben+6PcF8k7umesB6k/qces87fzw4PRf91n76ACbBX8JaQz/DfsOkg6pCzwIHAWzAcb/0f4X/Nz5vvmR92/zWfBp7RnqBuqL7pf0WfqsAe4KnRFDFWMY7hkxGfkYFRmUFmESaw6UCesCCvtg9PvvUOye6Z/pHev461Tuk/LI9VD43vyQAigHqgqEDS8PWA8BDhMLLgcsAx4A1P6P/QH7cvkZ+aL2ofI/77jsE+s9677u6/V2/UIDygoXEpcVhxfaGLcYihi0GBwWxBEiDlkINQFQ+pPztu4V7LzqrepI7LHtaO/w8mL2v/i7/GMCZAeCC4MObQ9YDzkOSAqLBdcB+/5+/dD8yfos+Qz5ovZo8k3vIe3D65vsVfAw95L+YwSNC4kSMRX9FmEZNRlQGI4Ylha2ESIN7AYs/0z4FvKb7YPrcerE6uvsWe6Y7ybzEffy+av+rQSSCZwNLRAsEBAPfgz/B9oDTAD5/Av8L/zo+Sb4M/jK9SDy1u8l7hjt6u2X8j/6/gAfBtQM2RIlFZwW7RenF54XqReMFMMPzwofBNb88vWV8Mftg+wm7Cnt7e607yPxcfSq9/P6OwD1BdEK5g5pEDcPQA0TCqgFJgJE/7n8ZfyU/Kr64vjI9yX1YfKN8LPutO7T8I3zvfnXAdoFFQo2EbIUSxWeF44YPhfuFgAVOQ8SCbwCY/ut9XDxAO4p7SDueO4976nwqPEX9IT4m/wYAQYHGgz/DnEP8g2OC+IHfAOXAIn+bPwr/Gb8Nfrp9/j2qfQL8hPxxPAT8UzyjvU//CwCXwXrClAR4BM5FvkYbhgDF9cWgROqDIsG+/9j+Zf0JvEo78HunO6y7qfvPvBo8Vn1Mvpo/ssDtgnNDTwPVw7/Cw0JuQXbAikBPv/y/MD8ffyV+TH3MvZa9BPzEPPR8jnzo/Qf9q36KQHqA5kHHg96E8sUqRdSGMAVcBTzEWULwQXyAPf63vYm9E/xVfCC8Jbvs+8i8Zny+vVd+6v/qQNlCL4LvAwYDI8KjAjvBUkDRAHl/ov8i/t9+uz3M/b29QH11vOo88Pz4PP+80D2mvw0AjYFIAsYEt0UhBa4GIwXnBR3E/APXgn1A9D+efll9U3yH/Bh7+vuE++l8P7x/PPG+CT+5AGqBRcK/QwbDZsLqAmqB/UEEwI3ANj9CPsT+vz4//WK9EX19/Ru9Av1gvXa9S32WPhv/nsDDAYIDG8SXxRVFi0YkRUBE1QSKQ7mBy8DH/5N+Qr2EvMi8aXwKPBc8JfxcPLg9Pr5qP7qAYYF9whUC9ILMAo4CLUGqAQuAtj/2PxN+nn58fck9Sr0n/TN9D71z/UF9u31j/b2+lIBTgRFB0UOcRMJFfEWRBcYFbsTARLlDMIGwQFY/TH5G/XQ8X7wK/Cj7/jvRvG48gf2ifsoACsDggYBCngLkwrYCI8HVQYrBHsBJP85/HT5XfjS9m70I/SZ9Rn2WfYH96r39/ds+M37XwGhBJUHzg2mEgUUgRX+FYoTtBFEEKwLrQX/ANr88vgS9ebxGvFE8Rrx6/FK83v0vPe2/LsAnwNKBukIuwrdCbYHhAbjBNICpgFX/2T7NfmO+Jz2z/Tb9Lr1xvZk95/3BfiZ98n3VPz+AZEEQQjVDrASMhTkFfoUZRJkEXMPxQpYBYwA2fwi+dT0tvEr8ZfxpvFy8gL0kvUI+V/+9QGnA3IGDQk9CUEIxwYcBagENQRHAbv9EPsu+Xj4R/d+9TP2D/hG+EP4pfiV+DL4pPh8+1MAsgOoBiYM6RDXEuATJhPDELMPQA4rCtEF3gHZ/cr6C/f68qXx0PHi8YfzTfVv9kr6Yv8QArYDLwWUBpcIqAhHBmsEtQOFAs8AL/6p+gr5aPmk+Eb3Wfd/+F/5yvmv+R34rPZ19wH7jAChBBMH+gsnEe0SNxOFEg4QSQ98D5kL7QXyAeP9ofnj9ZryaPEP8gnzU/TX9V73wvqF/wEC3QLkBKUGGwcFB6kFYwM8Ap0Bqf8H/Y36jPlK+vP5kPip+ET5Ivl0+SD6Q/lE+Mb45fqv/woEogVvCcYO7hC2EaURnA/lDioPpguIBkYDyv/7+8r4NPV28zL0sfQf9WT23fcO+9P/sAF4ASwDQgVWBl4G2AN1AdAB7wGR/6H8Z/qq+cv6evoy+Eb4JPpa+jD6sPoc+jj5nPnD+u39nAIbBhUJSgy8DgsQ8A/ZDtINtAzdCiUIWgTg/wH84/is9lL1DPR99FD3IPmH+Zv79f56AdgCBAM+A8kE2QXjBIACLAA2/yr/E/6I+xD6/fqY+9z6TvpO+r/6pfuk/PX7rPmN+Ir56PxXAQgEkAYUCoMMRg1EDTANDw0bDYoM3QkaBiAC7P3Y+hv5vffS9jv3Q/jf+M75yPuL/eX+EwGQAoUCDgNcAwUC2wBNAHb/X/5G/dn72foq+1L7R/v7+0b8DPz7+y38Fvzh+uP5H/rK+6P/UwMlBYoH+wm2Cn8LIgzuC90MoAzTCbUHFAXpAC/+ZPwW+tX4dfhE+Kj4ifmw+z/+TP69/mQBEQJ/AeEBWgFfAHsABgBQ/hT9Yfw//AP9efze+zH99P1F/XT8CPzT+6j7UftU+uH5ZvxHAV8EwAQaBnwIlQmiCVIJBQqyC9UL8QllBskBwP5//in+EfyD+pD67fp1+1j8Wvx5/OL+JAHSAC0AGgAlAFUA2v+q/sX9fv1N/Sv9Kv0S/ef9O/+8/kL9Z/xJ/LD8Cv35+z/6LPpz+yv+yQGNAiYDngaXBxEHIgkpCtMJ0wrpCZMG5QP+AQEBogCX/oL8F/x6+1b7kPye/U7+2f6T/qr+gv9D/4f+3v7o/p3+mv7s/ST9k/1S/gv+Av7b/jr/KP8t/vn7YPvR/FT9PPyf++v7lvyF/gsBPgLZApQE8gYABzgF/AUkCOwIIgkqBysDAAIjAywCtf8p/oz9Y/5h/zj+hf1d/o/+rP5o/sv8ZPwO/tz+E/7G/eP93P1P/rb+8v6K/wwA1f/K/kP9Mfx//Gj9Hf0r/OT72Pvr+zf9T/+4AMIBaQJ+ApEDZgVTBvsGdQhsCVMIcQbQBBIDWwJCAhUBev/k/kP/wP5O/R/9Xf0o/VT+0v6Y/ML7xv17/v79Z/5y/qr+vP/G/0n/g/+I/1v/bf4F/G/6iPsC/Tn9Kv3U/FL8B/1x/qb+8P43AcIDcgQxBH4DLQNRBpYK/AljB1MGOgRQA64EwgMZARcAt/+b/rf9cP1S/ej9Bf5u/GD76/si/VT+sv5I/jz+OP/l/zL/df+UAPb/of5I/YH77vrp+9b8J/3R/Mr7evsc/Z7++v6N/zYAEwG4ArIDpgOiBB4GOwdSCYgJlwakBRwG/wQABH8C2v/i/mH/1P7K/Qv9zPxM/pX+pvvO+i79uv5H/yX/MP5j/qj/AwBx/wf/Dv86/3L+KPwZ+nj6Z/yp/dv92/zi+xv95f4I/67++f5WACYClQLbAZ4CggWcB+IH9wc/B5kGmgeUBm4DnQIFA0UCBQHo/uv8Xv1o/r390vy7+5P6t/uV/Z39mf2j/mP/4f83APr/IgAFALD+Xv06/H/7f/s4+4P75fxJ/cz8P/00/cj8Rf5m/2//qwCgAh8EgAOcAUoD5gfuCrIKOghSBpkFYgXYBBADbQHtAIAAMP+g/Zf8L/yD/Tv+gPuz+WX7wP1M/5j/E/9m/wwAHADy/+L/JACp/6b9Fftg+Z351vvI/Zf9kfzy+9D7Sf0x/wb/Vv6t/zkBNAG1AbgDxAUQBkYGWQjiCHkHbwcqBz8FWAMoAlwBsgASAED/fv5w/Qf9P/63/Y/7rPvT/D79kf30/aD+pP+cAPQAOADx/nD+rP64/W/74/l2+mj8jv19/WP8+vpI/Bb/f/9+/lP+c/84AWoCFAJ2AjMF7QakB0QIWgdJB+0HnwbsA0gCUQL7AZUBAgAJ/cD8Mf7N/vn9/vvM+878K/2I/eL9oP7l/wQB9gDz/3f/Sf8s/4P+Tvyk+p/6Jvtq/Jz9V/1K/O/7vPzc/bb+2v65/tn/mAGOAusC6gJTA98FhQj1B5sGJgbcBdYFFwT/AeYBFgJJAlkBuv6T/QL/3QCP/+D7kfos/D/+fv4x/Qj9cv62AL4B+/98/lj/ZwD+/un7PvrS+q789v1t/fv7Q/uY/Hr+bP7R/T3+AP8PACUBgQHGAeQC1gR+BooHTghUB7gF9QV5BuoESQJ4AZEBDAGyABX/b/1U/ln/mP5p/BD7XPxh/sD+o/2f/az/WQEKAa3//f5//+X/tv7n+w36Cfsq/d79SfxO+rX6Jv2G/hz+rP2k/Qb/owGsAWwA1QLRBf0FrwakBu0F5AcCCOoFXQRZAvYBLAJAAYkA7P9t/z//X/+g/pP9mv0Y/T79Jf4E/qn+fv+U/zMAWwCB/2z/3//q/v/8Zfut+q37Jv3+/Aj8F/vy+tn8Qv6g/UT9zf12/2cBxwG2AbECwQSjBgUIughaB/MFyAbMBgMEAwLTARQBmwAbAB3/0/4j/oj+Jv/B/Ej7C/21/qf+pv2j/bH/sgGVAVYAZf9J/ygAyf/m/J760fq7+0b8Cvwz+6D7J/1V/qz+KP7Q/dH+fQBCAU8BnQEPA80FfAfhBoEGwwbOBp8HsQZrA6cBAAExASwByf7n/VD/dP9A/m39ef2M/Yn9gP2T/XT+bP9wAPoAzv9M/z0AYgCh///9uvu/+vj6Afzv/KT7v/lF+p38Mv4S/hn9F/3N/qkApQE1Ag8DfwSRBgQJPAnUB2AH9waUBuIE9gKuAs4Amf8FAF/+sf3v/uH+Sv2F/Fz9ff1F/bL9a/6j/xgAUQA5AVMB0wBFAMr/BP/3/P76fPrY+tT7T/wT+9358fof/TL+lv5+/or+MwC8AWcBXQF6A+cFJQcGCMYHFAccB9kGxAbLBDIBUQCsABIAXf/X/j7+Ov1h/br+tf6V/YL9f/7H/sb+y//WALMAQADx/3z/LP/Y/iX+oPzD+or6KPwU/W77Xvn9+Wr8hv7o/nv9iP0RAMABIgLvAfUBhQRaBycIygjtCO8HBAfwBW0EZQOhAhwBjP8Z/jr95P0O/pj9rv0r/e78j/32/Yz+Lf9Y/0z/fv8uAIIAAABz//b+Qv5M/ef7YPsb/EH8H/vn+br5vfqG/A7+Sf5C/jv/zQAlAkkCDQL9A0MHFQmcCeEIQwc+B40H5AWbA9gBSgGbAI/+U/3a/U7+Vf2z/Iz90v2o/QL+Lf68/rr/jgCvAAAA2/9vAD8AIP/t/Tz9mPx8+yP7dPuj+mH5gvmT+qT71fxA/lj/9P86AMYAqwJKBLcENgY5CIcJegqCCZ8HJQcHBvADzwIzAVH/Df9J/nT86vuD/G/98v0l/Tn9iv4M/33/qv8C/xj/AwCEACgAef8B/0n+Iv0b/Bz8fPxt+1n5S/gO+W76WPtN/Gj9aP7r/0QB+QHrAhME9wV2CDEJ9wiLCfIInAcuB/8FVwShAocASv+W/mb9S/xU/Fb85fuq/O79C/+z/iP9l/4MAZ0BUwHg/y3/FACKAL7/7v13/C38uPzF/N36o/hk+Jr5C/vH+8L72vyG/pv/bQGRAp4CawR+BoYHvQiSCYoJpQhpB7AG7AXeBDMDYgBy/i3+7P3M/J77zvuW/BL9cP1e/fP9Mv9W/33/dgDnALIAVADU/5L/7v+8///9Ivz/+/v8Pvyi+Vv43Pgf+rf7pfwU/eb9Jv+RAIQCFwRvBDEG0Ai8CGgIIAluCMEHbQfRBSAErALeAEX/fv0//JD8Rfzf+u36f/ym/eD9xf1F/q3/ngCsAG8ADwBVAAsBawC//sv9d/10/Qj+LP2T+jr5gPlw+jb7+vov++f8oP6E/3oAFQLYA54FCgfKB3sIPgl3CTkIsQZjBvwFngRVArL/w/64/r79avwq+7z69ftR/Sr9w/yi/cT+X/8eALAAaQBBAN8AGgFbAFX/U/6I/Xn93P1L/fr6Nvm4+bH6V/v7+1j8Uvx0/MP+CAJTAwwEAAUSBj4I2gmeCScI1gZfB+cHZgaHA3EAW/+q/6r+HP1c+/n5zvrH/ID9d/wa/Jz9Dv9gAEABiAAKAKsAWAEHAfH/Fv86/pj98/0E/mj8GPpg+W76rPuL/G38U/ss+7v9RgEuApoBCQOaBdwH5ghcCEoImwgSCFMHMQbGBIQDwgGv/wb+L/2A/FL71fpw+5L7vPvC/Yr/Xv5S/WD/CAJsArMAOP9m/2IA+ADq/3L9ifwp/iX/+/yq+dr47frg/KT8Z/um+pP73v4sAeIAtgHrA48FUwdhCFoIsAhpCIUHdgexBoQErgJpASgAy/4R/Zv7cPua+2H7GPyL/eT9Z/0J/sb/pgBsAOz/u/+sAGMBwQB5/0j+AP56/rr+w/17+x76hvqd+/X83vy8+vP5/vyZAOsAs//SAGkEawfyB0gHZAeNCEwJtwgzBxIF5AO2Ax8Clv+y/YL8A/zx++P7XPt9+zb9Hf7C/eH9uP4xANsARwBIAMMA+gDSAOv/n/71/dH+Mv/B/ED6V/pS+xz8bvw9+236W/zE/pL/yf9tAHgCAwU2BoEGtQZHBzsIdgieBw8GqAQkBKcDCwKE/2v9gPw7/BL81vtZ+2T7xfzx/R7+j/4u/+r/ugD4APwAzQCYAJ8AIQAN/1L+kf5y/sf8lfuI+6f7Z/xA/Jb6R/sH/uf+cP4R/x4BeQPkBC4FnwXLBtAHLQhZB+4FhgVOBWcEtQJ6AD7/Zf73/Bz8uvuS+2L8RP0E/ZL8gP0x/wEA2v/S/2kAMQGaAVEBQAAw/xz/nP+A/979sfs6+0T8If3v/Dj7Ufp1/Pf+OP+O/i3/SwGDA64EkQR7BPQFwAf3B3AGzQTfBJMF3ASHAu//7/72/uH9QfzX+378Lv1i/eX8tPzg/TP/mv+V/33/IQBoAYcBhQC+/7j/IwAcADL/bv0L/JX8ff3g/LT7H/vE+1n9Kv4B/lD+n/94AecCZwOrA0wEkgW8BmgGRAWxBMgEEgVsBIMCnQCj/xr/Kv5C/Qj9D/0w/Xb9Ov0M/ff96/4e/zH/a/8LAKgA0wCDANb/z/9QAFoAxv9I/ur8UP0f/pz9VPyG+9P7/vzc/RH+NP66/hsAIAJ2A6oD1QPjBAwGKAZnBcYE4AToBOkDQwLDAAwAzv///h3+tv2y/Rf+CP6S/Zf92/1a/g//Zf86/0D/KAD9AMAANgA6ALoAEAF1ABv/Ef7K/aP9Rf2b/Jn7OvsG/Ab9Xv1k/Rb+xP+AAWcCfAK0ArwDBwWeBfsEFAQiBLIEhgQiA2MBbgA4AC0A4v/y/uD9yf1u/ob+G/7M/cL9V/4N/zn/O/9n/w0AvAC2AKAA5gDtAGoAx/9d/7H+tv0V/c38YPzw++T7GPx9/D79Fv4c/1UAIAGiAXwCdAMjBIkEjARoBJ8EuAQXBDUDcQLtAakBNgFsAIT/4/69/q7+aP7N/T39gP0S/kb+g/7W/iv/1P9pAG0AjQA0AZ4BNAF6AO//Ov87/oD9FP2m/Dj8Bvwv/HX8yPxv/U3+GP/o/+cAvgEfAqECmgNpBLYEvARuBAsECAT1A3cDxwLqAR4BpAAiAHn/6P6C/vr9lf29/bj9cP3e/Z7+3P77/mX/AgCBANgAJwE/AfsAVwBy/63+C/6O/R/9evz7+wj8ePzx/C39af0A/v/+DgCVANcAhAGYArADRQQxBPoDBQRtBL0EcgS7A/YCTwJ7AYwA/P86/z3+2f3j/bX9Pf0x/cP9Iv5v/hL/cv+U/y8A7ABAAUwBOAH/AIwA1/8z/7v+JP50/eX8fPxs/NT8Cf3Z/BL93f3P/q3/BgA1ABYBawJ3A9wDsQOwA1wEEQUDBUIEcgP2AqECIQIdAaX/n/6m/uD+Zv5x/QD9d/0r/oX+cv40/n/+n//FABEBnQB0AAUBYgHsACIALv9C/hT+P/68/dH8XPxk/MH8Qf2X/c39HP6l/pb/owA8AZQBUgJKA/IDVASbBJ0EfwSABE8EwQPxAs4BoADs/5r/Kf9P/mf9Ef2A/f39zP1g/Yj9T/5R/xUAMgABAEUA6QBmAX8BFQFoAN3/lP9V/7r+/v15/RH9AP0o/ST9Dv35/DH99f3T/m3/2/9sAGMBnQKmA+MDtQMPBKgEvQRjBKcDuQIsAu0BQwEUAPP+Zf52/of+Ev50/T/9kf0+/tH+5P7G/ib/9f+cANUAywDfAC8BXgEWAXkA6v+O/x//hP7+/av9gP1W/Rb98fwv/Zf94f1c/ir/8//DAMUBgALrAowDHQQ2BCcE/gOeA2ADIgNwAn4BqwAUAL3/RP90/sj9lP2v/en9Cv74/f/9c/4U/3v/xP8oAIoA0gAbAWUBegE1AZ4A6/9n/xX/s/4R/lf9yPy+/A79Cf2Z/Jv8dP2n/pj/BABPADQBmAKCA6kDiwOiA/EDLgT4A2gD3AJ6AiYCmQGvALv/Iv/B/oP+fv5l/tr9Wv1z/fr9af6e/uj+lP9mAOkALgFiAYIBkAF5AQwBagDP/yr/ev70/YH9Bv2q/Iz8nvzJ/A39kP07/sf+cv9qADMBkQH4AZUCSgPrAxYEvAOFA8kD7QN4A4sCkQHoAIcA8P/8/h7+wv3D/br9bf0q/WL9Cf7D/mH/+/+eADUBmwHdAe4B2wG0AV0BmQCt/wP/oP5N/sH9Cv2g/Mn8/PzW/Iv8yvzR/fr+Zv91/wgAMQFaAgcDNQNMA9EDeASuBFME0ANzAysDewJeAUYAZv+5/ir+iP3q/L78/fxF/XX91/2N/mP/GACtABgBYQGxAQcCHALTAVIBvQArAI3/5/5h/u39ZP3f/Hr8NPwn/E/8pfwX/bD9af42/xoABAHPAX4CTQMkBK4EwASuBK4EsgRqBKQDfAJeAYkA3/8H/+z9Ef3Z/Af9LP1C/Xj9Bv7Q/pD/GwCeACQBhAGoAb8B3wHXAXkBvwD9/4f/Rf/H/uf99fx6/IH8mfxg/Pn75fuE/KD9kv4C/2j/YwDNAQMDpgPnAzAEswQyBU8F3wQhBF8DrQLeAd4A0P/c/hv+oP1Y/Sf9Hv1U/cj9T/7U/mT/CgCbAPkAPQF3AaEBnwFrAfkAdwD4/2X/qf7v/Un9yPyE/H/8dPxH/Eb8rvxT/d39Yv4m/1EAiAGDAhgDiQMXBNkETwU1BdsEowRYBKkDrwKWAYsAp/8C/1b+qf0v/TP9Zf2M/cf9SP7X/kz/zv9bALwA3gD7ABUBIgEHAaUA6f8o/7f+bf72/U/9v/x5/In8rvyh/H78sfxg/Tv++/6w/5cApgGrAmoD+QN3BOQEFgUJBdwEpgQ2BFYDOgI7AYAAz/8B/yb+m/17/ZL9kv2H/bD9Nf7k/nT/xf8DAG0A8QBKAVABFgG6AFIAvf8P/2P+7P2F/ST95vzg/OL8yPy5/Nv8TP33/cD+af8bAP8ADwL4AqIDEwRqBLYE5wTxBLUETwTAAwQDDwIqAVMAcP93/sr9iP2L/YL9XP1O/ZT9Lv7I/jr/lv8kALAADgEcARMB/ADGAEYAkP/r/oH+Tv79/YP9BP3a/Of89/zr/P/8a/0g/uv+pP9qAEMBNwIGA7oDOASbBMcExASrBIsESwSxA9ACygHhAP7/J/9D/pv9Sv1N/U79OP0+/aL9Uv7q/lb/s/9HANAAGgETAQQB7ACeAPf/L/+f/lH+BP6A/RD98/ws/UX9Lf0U/Vn99/2+/m7/CwDCAKEBmQJYA9kDJwSEBNIE+gTWBIYEDgReA2sCWgF2AKj/4/4J/nP9K/0t/ST9Jf1j/fj9m/4H/2X/3v+KAPwALgEaAQIBwgBFAHr/u/5H/gj+rP0j/cz86PxO/W79RP0o/Zv9W/4N/4P/NwBRAZMCaQPBA/sDcQT+BCMF1QRUBA4EswPzAqkBdwCg/xb/Xv6H/fn86/wm/Uj9a/20/U7+5f5j/8L/TADXAD0BTwE7AQ4BsgAQAEP/p/5I/gT+j/0c/eH8Cf02/T79Lf1g/fH9sP5b//v/zQDRAdgCkgMJBF4EswTcBMYEiwRPBPEDMQMOAuUA/v9D/3f+mv0G/ez8Hf08/Ur9iP0f/t3+dP/M/yUArQBFAZ0BjQFDAe4AiwDg/wX/Nv6x/VD9+Pyr/KX81vwC/Qn9Hf2Z/WP+PP/a/4wAhAHDAsYDTQRzBJQEzwTqBLgEOgS4Ay0DgAKDAWoAXf+B/sv9RP3+/Pz8Jf1I/XT9xP1b/gf/of8WAIoACAFtAYwBZwEqAdIAOwBc/3r+0v1n/Qr9r/x7/I/80PwF/R/9W/3z/dX+rf9tADoBNwIyA+kDUQSLBMYE5wTQBHEEDASlAxIDHQL8AP7/Pv+V/t39RP3u/Pj8IP1M/Xb91P1s/h3/tv86ALYAIAFjAXEBWwEcAacA6P8H/zv+tv1X/QD9qfyV/Mv8Hv1E/U39gv0k/hb/8f+fAEsBQwI/A/8DRwRbBGsElASABCsEwQNNA7QC4gHxAPj/Jv9q/r/9N/0Q/SD9Sf1z/bv9Mf7a/n7/+f9lANgAQwF/AX8BPAHWAEoAk/+4/v39cP0M/cP8pPy8/Pn8KP02/Wf96f22/n//LwDgANgB5gK1AxMEOARhBJkErARuBAEEjAMWA3ACjwGJAKD/2f40/qH9Rf0X/Rf9L/1x/eL9df4Q/5L/FQCYABcBWAFpAUUBAwGRAPX/Pv+Q/gX+j/0q/eD81/z2/Cf9Rf1p/bv9YP43//r/oQA8AfEBnwIqA3UDrQP9A10EfwRGBNMDWAPmAj0CUgFBAGD/tP4e/oX9Df3o/BP9Xf2S/eT9cP4x/+T/bgDFABQBWQFzAUUB4wB4AP//ef/V/jz+rf1J/Qb9/vwd/U39Z/13/az9IP7R/oP/NwDyANoBtgJrA80DFARTBI4EmARgBAgEmwMZA0cCSQEwAEH/X/6c/fb8sfy//AD9Nv1s/dn9iv5k/w0AjQDvAGkBwwHaAY0BKwG/AEcApf/z/mT+9P2U/S39/fz9/BT9C/0W/WD9AP6//nT/IgDoANYBuwJoA8wDJwSRBPkEEAXSBFsE3AMyA0gCJAEOADD/dv7D/Rn9tvyi/M78+/w+/aj9Uf4J/67/JwCPAPYAPwFSAR8BxwBSANj/TP/N/lr+/f2k/WD9Pf0z/Sn9Gf0y/Yf9Hf69/mH/EQDyAOUBwQJnA+UDWwTABPsE5wSpBEMExwP/Av4B4ADq/xr/Wf6p/SD98Pz2/Bn9MP11/fb9u/5x//f/XQDPAEwBlQGXAV8BEgG2AEIAtP8t/7j+TP7Y/X79Vf1H/Sj9/vz2/Eb92v2C/h3/x/+iAKcBlwJDA7sDLQStBAMF+gScBCsEtgMPAx0C9wDa/+v+K/6E/QP9vvyw/M78EP18/f/9of5Q//v/hgD5AEkBdAF6AVwBIQHPAHAA/P95/+z+Xf7J/V39Lv0g/Qj93vzb/Cv9wf1l/v/+sv+kAK0BjAIpA6gDMAS3BAMF8ASjBEEEyQMOAw8C+wAHAC3/U/58/fX83PwH/Sn9Nv1i/eX9sf5v//T/UgDAAC8BcAFlASsB7QC3AGsA+/+A/wj/jP4J/p79Vv0m/e/8vfy4/Aj9nv1C/uf+lv9wAFgBMwLcAmwDAgSWBPsEDwXiBIUEBwRYA38ChgGMAIv/mv7P/VL9Hv0U/RP9IP1f/dz9ef4Q/5X/CgB3AMYA+wALAREBBAHbAI8AOgDp/5T/Jf+i/i/+6v3D/Yr9Pf0I/Sr9i/0C/nr+EP/M/60AgwE9AuACiwMpBJUExAS+BIsEIwSIA7ICygHZAOz//f4r/pv9Xf1P/VH9Vf14/dv9Zf7m/k7/rf/0/zoAfwCqALUAvQCbAEsACwDj/53/Mv+6/kT+9v3L/YX9L/0N/T39j/3m/T3+wv6N/4IAZgElAtQCgAMfBIkEwwTHBKMEPASeA9MC/AEcATQAR/95/vH9ov18/Wv9ev2s/QT+Xv6t/gH/af/W/zEAZAB5AIoAmACVAIAAZQBJACMAyv9K/8v+d/45/ur9h/02/SD9Rv2R/eT9Zf4e//f/xAB/AS0C4QKRAxoEagSBBGsEFQSLA9ACEgJPAY8Auv/k/jj+xf2E/Wf9b/2P/dL9G/5n/rv+IP99/8r/CABGAIQAsQDIANAA1gDUALMAVwDm/3b/F//C/l/+9/2g/Wj9UP1d/Zb9EP6y/lv/BAC+AIcBUQL7AnsD3gMnBDgE/gOKA/8CbALHAfoAFgBB/5v+Iv7C/X/9aP2H/cD99/0o/nD+1v5B/43/u//u/zgAgwCuALQArQCzALgAowBkAAQAmf8t/8X+Xf74/aL9a/1n/ZX97P1e/uf+jP9MABMBxwFjAu4CbAPOA/oD5wOhAzUDswIVAmgBtAAOAHf/9P6H/jX+Dv4L/iP+Pf5Y/nr+p/7e/hr/V/+Z/9L/9/8IABsARwCFALkAywDCAKAAYgD5/3r/B/+4/nT+Jv7S/Z/9q/3r/Uj+s/40/8j/XgDfAFYB0QFNAq8C3QLTAqMCXgIBApEBIwHCAGgAAQCK/yb/7/7q/vr+Bf8L/xf/Kv80/zH/M/9I/23/j/+s/9D/BQBCAHkApQDLAOsA9QDVAIwAKgDP/3n/G/+x/lL+HP4T/ij+S/6F/uH+Xf/X/0YArwAeAYsB3QEFAgsC/gHlAbgBfgE9AfwAvAB3ADQA+//U/7f/of+K/3D/Sf8c//H+0/7K/sb+z/7o/iD/av/E/xoAdgDRACIBVgFiAVQBJgHqAI8ALADC/2b/F//X/qX+hv59/ob+qv7f/in/c/+6//L/JABZAJUAzgD4AAwBEQEUARcBFAEHAfkA8QDlAMcAjwBPABkA7/+//33/NP/1/s7+vf7A/tL+/v5C/5T/5f8wAHUArADNANMAxwCrAIcAVwArAAQA4v+2/4D/U/88/0H/Tf9T/1H/W/9w/4n/lv+f/63/yP/p/wAAFgA8AHYAtwDtABEBKAE4AUIBPQEpAQ0B5ACpAFkABAC7/4b/Xv9A/yz/Lv9G/2z/mv/P/wQAMABKAEsARAA5ADAAJAAaAA8ACgADAPb/5//b/9X/zv/G/7f/pv+Y/4//hv+B/3v/fv+K/5//vP/g/wsAOgBsAJ0AxADcAOQA4gDXAL0AjwBTABkA6//F/57/d/9X/0j/Sf9R/2H/ef+g/83/8P8BAA0AHQAtAC8AIgASABEAIAAxADMAKgAgABwAGAAJAPX/5v/k/+H/z/+r/4z/hf+Z/7X/zv/j//7/JgBWAIMAqwDRAO4A9gDkAL0AjgBkADwAFQDx/9j/x/+6/7H/p/+j/6n/tf/C/8n/y//K/8v/0P/V/9r/3//j/+v/9P8AABIAIwAzAD4APgAzAB0AAwDo/9T/v/+j/4b/cv9w/4H/n//F//L/KwBgAIsArgDNAOUA/AABAe4AzQCiAHQASgApAAcA8v/Y/6j/ff9d/0j/R/9V/1z/Yv9v/3n/fv+S/6X/t//b/wQAFAAfADAANQBJAF4AWwBHADgAEADl/8r/lP9t/2H/Vv9M/1D/YP94/7L//v9GAH0ApwDWAAABGgEZAREB+wDgALwAjgBkADkAHAAHANn/yf+k/3v/af9x/1f/if+L/wwA2wEVAFn+CADE/xX/LwAxAG3/MQBmAI3/3/9QAAAAOgBKAM7/uP/J/2n/Uv+F/77/wf9eAJIBiQD6//IA2wBdABQAQABuAFEATAD5/93/+P8NAP3/uP/c/6D/av+//9j/ov+6//7/zv+u/6D/uv/t/7v/aP97/7v/rP+3/8r/+f8aAD8AqwCmAIcAogBsAD4AUQAoADQAGwDk/wMAx/9d/1z/9P+i/37/9ACbAKH/lQCAAKT/vf8gABEAtv/m/3n/eP///+3/eQBMAM//tgC8ALX/VgD7AJAABwHTAFL/Gv+0/1v/gv/x/4L/x/8BAGD/s//u/wkAlQApAEQACQDK/zwAp/++/3gA4//A/68AiP92//AArP9y/1gAc/+P//T/yf8cAO//MQB8AC4AWADK/5T/jQDt/6X/gwBeAP3/af/w/wIAMf+KAM3/E/87ACwAs//T/68A8/8bAMYA6//X/wYBmACN/7kAPQAD/zcAmAAu/6j/owBU/zX/lQCU/7D++QDMADP/bgCBADH/igDlAFn/zAA5AAz/ZAD7/5H/LwAoAGYACwDQ//sAdACO/20Ao//Y/vf/ff8m/5b/r/++/1r/fAB6ADH/PgEdASf/RwBvAWEACADuAE8AGQCAAA8AUP/S/+z/qv8R/yj+IQCu/wz+JAAvAKH/FQC+/04AOwBJAPH/iAAeAe3/vQCWAJYACgFYAA0Ayf9vAK//Wf+bAFUAR/85/3r/Ev9K/ycAZwDM/5b/OP8E/6v/sP9r/+r/YwCgAG4AQwDQAHMAqwDZAGcA8gDYADkAaQCRAKn/gP8UAIP/+v5J/wYA+P+x/oz/gQAAAH0AYAAuALIAEAGeAH8AwAA2AJr/mf9H/w3/iv8w/0H/R/+c/hb/Hf/W/g3/vP5e/+7/Tv8X//n/OQCL/7f/eQCOALYAqQGaASkBywEuAtABHgLaAvMCvgKSAuIBcQE2AbsAnQAtAFj/bP9j/yf+4f2H/k3+BP6Y/jn+1P1+/t/9Cf0i/RL91vw4/Jr7I/sa+7v7Rfym/Ln9jv82AZcCrAOoBA0GMAf0BycILgiACNEHvwZNBdMD7gJ6AZz/UP4f/cb7v/rY+ST51vjc+dP6GPuq/OD+FwAmATICwQKmA9oEIgWLBJIExgPKAff/cP2Q+2f6vvdH9ob1+/PC9WL4qfjV+tz+hgHLA78GtAgHCrEMSw6DDTANmgyjCnEIXgVZAnwAXv7M+6z52Pc39jz1+/QA9dD1r/dn+gX9Bv/tAPYCvQTWBdYGAAfrBmIHmgYCBOAB5/+9/Oj5Tff982fyQPKH8Enxefas+VT7pwCuBbMHuAtxD2QPdxE2FMkS0xAVD2ALdAeHA+/+V/s9+Qv3RfTo8nrysvE98pzzuvSj9038DACbAkUFcAedCLQIOQjMBzYHiwaJBWoDowAX/hv71/e/9KHyJfF28IDxsvG483T6Af9MAR8HogzNDvoQuhI1EmIS1RK6D3kLUQjxA1j/gPsy9+D06vT481/zC/Rk9H31SffG+Pv6lP6nAngFsQbPByQIbgdbBsoEYwNbAmwB8f8t/mz86fpd+Qz3TvUB9UD0dvMN9Sn28/cH/7cE7QUeCgcQWBFBEQQS2hCUD/MOaAx+BxUD7/8q/PD3OfSf8m7z/vMW9Lj1jPc8+UT7wfwp/rkATQTSBuUGUQanBgsG6APSAen/ev5N/jD+Rv1s/BX8jvs9+mX4Rve99kb2BfcH+CL62wC1BgMItwsaEF4QcRAUEPwMwwucCy0IrwMDAEL8Lvlz9vfybvIc9Wz2Y/f++Yb8Y/7b/7YAPAGfAh0FowY0BuQEPgT9Arv/Fv3x+3n7XvyL/aP97v2k/jX+dfy9+ov5DPlZ+DX41Pjv+Tv/fQXTBsUIOQ4QELwO5w2fC4wJsQhXBe0AGv6c+//4efas8zPzPvaR+Ir5JPxB/yQBvAIqA8gC7gNWBWUFAAWhAy4C5wBY/u77zfrj+q77OPw3/cn+2f/8/7P++/zD+5H6HPmP+A35IPpq/hgF6QcICdMN8hD5DqsMxQrdB54FIgNF/038RvqY93n1u/MY89L1mPmP++L9FgKzBe8GEgZmBfUFpAUmBOoCVAHA//3+0vyz+Z34Vflx+or7hfyT/mIBoQK4AXEAef+2/eH7kfmg+Ar50vlU/joELgYSCF0Nzw4UDKIKswghBq8EwwG//rv9yPvc+Dr3WfaH9qj5yfy5/soBAgWfBvoFAATkAi0C2wC1/0r+0vyY/DD8n/p4+Uz5qPrw/FL+2/+CAsUEgAXoA0oBJv+0/LP5GPeK9gb3EvnH/64FWwalCeQO8w2FCmcIEQbGBHkDZgC6/vv9Z/tU+YT4Ovel+PL8Of8FAWgEKAbgBaoEKQL3/3f+tvyu+2P7IPtw+/z7Dvzl+1v8Nf60AOAC0gSpBvkHkgcKBYcBZ/7A+s33pvbW9dz19/bt+s4AvQOsBI0IkQzeC+EJQAkuCJkGzgQuAjT/4vzW+jT5IviC9/D5K/4IABoBYQMHBUAEZgGT/gH9D/zJ+qj6wvvQ/Mn+kgAUANv/oQElAwUEQAQyBHoFjAXaAvv/r/xP+ff3O/cb9dP1rPh8+fn83wJyBIEFigq+DBML0AlpCKAHfAafAqL/0v7x/JL6g/nS+IX5Df0ZAKYArAFNAy4D1gBg/Qn7MvrA+cb5C/tW/VkA+AI9BI0E3gTjBUgGogQZA2UDTgNxAWT+wPph+Gz3efVp9Cb1qvYf+PL5gf5WA1gFXgiuDZsObAy3DLkL0QcgBbcCz/+1/Qn7svil+P743PkE/UL/dACcAk0DCgIuAH790frp+TT69Pr//KL/fgINBdoFVgXyBGMERQO8AaQAuAAcAKX91/uu+vf3mfak9zb4Tfgs+h37Y/ykAbAFmAaLCcINVA4vDUMLdQfmBGQDuv+s/Fn7bPqf+tL6UvrO+93+XgBpAb4BTAFQAaH/mfv1+OT4Afqo+yH9if8fBJwH2Ac4B2gGwATWArgAM/5C/GD75voe+uv4Z/iv+cn6b/ud/PH9vP4R/4cBSQVMBq0GaQr9C+kIUgcDBv0CegFG/4D84PwW/Vj7uPsR/dn9awBgAh0CgAJoAjsAW/2i+cj22fbg9w/5GvydAGkFWAmjCkgKOwnUBn4Dv/8P/Iv6+vq0+mr67/p5+yT87vzS/GL+JwCP/1P/pv7j/uEBfALvAPkD4QYWBj8GmwUsBAUFOASbAQQBz/+I/qj/HP9b/VT/ggCo/s79C/xS+V74ZvYY9Zf39vli/e8DwwjcC1kPlQ+QDJ0JOQVe//D60/cN9iP2rPaC9y/60vye/hIAHAGjApQCSgDU/Zn9Ev9B/0T+KwALBF4FmQX1BSgGCQe+BsEEuwPBAuEAgv/k/Rn8JPw7/A77bfqO+VX4Cvhq9/D2Q/kC/SwBjgZoC6IOGxHpEE4NPQgpAhL8o/fu84fxkvJS9Vr4Sfyg/4YBzAOsBBsEkAKY/xv8ifo8/AH9vPti/QYDwgVuBbUGsAiTCX0JBQhlBqoE1QE4/5D8d/lz+G/5I/l4+Ln4svke+wz7svqt/VoB+QPbBzULxwyyDpwOTArWBGX/WPqm9TLxlO9f8s72v/qK/qkCRgbzB/kG5wRiA+r/YPyf+cn2L/dq+Un5nvvsASsFfgczC+AM9Q1vDgQLWQejBLL/Rfv4+A324vSP9mj3iPi2+pX8kf7N/xIA1gFYBA0GFQjQCZoKAQs+CYgEaP+P+ub1UvLd8EHyC/al+k7/owOwBukHoAdoBY0CVQCX/O34Xfai9Y/3T/kh+uv+tAWZCDwLNA5RD38PKg1DCF4EPAAg+/H3KfYr9bD27Pg8+nr8Zv6C/48AdgBZABwCIgNbA0kF9QaPB7QHAQW1AHT9ivmN9evzafQ+9zT8ZADMA1EH6Ah2B04FNwIT/yH8G/jW9BHzJfSO9/b5nPwiBE0L2A21D1sRqhH+D6YKnQTuAF38S/cF9S30TPQ991j6Dfwr/jsAZwFqAYgAZgDZADIBdwJMBDkFygWBBe4CqP9+/GL5aPcf92n47Ps9AOkCJgVMBzgHWgXPAif/e/zo+c71ovKc8mv1PPl7/B4BRgkWD6gQeRE9EYAPvQv5Bd4A+/wx+ar20vWV9bf2Xfmd+xj9m/7c/4QAUACT/1//nP9HAO8BnQNfBIkFDwbrAwoBo/4c/Ar6gfnM+lz95f/TAZUDfQQ5BLMCiACV/lT9+fpx9zf2JfZN9hn5GP1QAG8GeAznDRMP2w+yDVEKeAWy/9P8xPp691H2Q/eh+Oz6svxn/fb+LABwADkA0/4u/pj/UgDFAAkD8gQTBnEGTASDAoEBfP4Y/OT7t/tX/Nv9tf5WAFQCIgO7AnIB1v+z/tD7LPiz9qv1yPbC+pD9pAB2B7ALYAz6DWcN+wpnCOQD//9o/lD7qPgG+SL5jPlz+yL8evzd/bz+9v6y/nP+T/+AAOcASQKwBCAGWgbtBQwFaQPAAM399Pup+vX5p/oS/Jb9/f+2AjME8QPpAtkBAABr/Dj4i/aN9fT0rfd8+yT/wwSsCSMM/A3BDRkMOQoJBhMCOQCy/W37Ovp2+QL6evoC+r76/vuT/Jb9qP5X/2sArAFEA+AEiQVFBg8HAAbJA+oBhf/U/Kf6JflI+bH6LPyN/rkBCQSZBe0FFAQMAnP/C/xe+Jf1VfR+8yv11vmO/Z4AkQaBC64NBA7+DKsMDwuaBhUDGQH//c76JfjK9qj2x/bY94/6F/3N/t0AVALJApQC2gKZA/YD+QMpBKkEYgTJArIAFf9F/Xv7HvuX+wz8qf2ZAMkCdQMPA3ACigFM/or67feN9TH0ofOP9SX7uP/gAuQIvw0RD04Pag14C6MJ+wT9AEL/Qfxe+fr3BfcG98L3zPhy+1j+cf8mAQsDJAP4AkoD1QKLArgCmQI8AzAD4gFSAYgA1/5r/WH84Ptc/En9zf4JATYCUAIRAv8AC/4Z+5z46vWC9JX0oPcK/TABHwU4C0YP1A+yDu4LrwnMBnkB6f2R/Dv6i/gy+E74+/j/+XL7pf1d/3YALAIcAxgDOAM6Az8CyQESApcBDAG2AHEAdgC2/07+9/0K/p798/0t/3QAsAFEAggCEgFl//z8nvrn9+f1x/U59kj5xv55AvEFfQtbDqoNGgxaCdQGEwSX/9r8ofy4+4r6cPrA+j/77PvS/ET+uf/TAOYBfwIYAkcB9wB1AKf/eP/O/6oAOgEuAXoB8gFvAVAAz/9V/zX/wv/J/8L/eQBzAAX/tP1G/JX68/ic99X39fhJ+4b/6gLdBZ4JEgusCsAJ9Qa7BHADVgBo/ob+zP0i/f385fxy/cH9BP4B/2H/Uf+T/3H/Af90/iX+n/40/6v/igDjAQoDUgN1AwgElwPAAVMAiP/r/oT+Hv5L/in/Xf+A/r/96Px/+2X6V/mV+XH6J/tc/roB9QIwBfAHZgi8B38GwwWVBRgDmACpAFIA0f6D/W/9Fv6R/Qb9nv3r/WL9Av0h/U39E/1//cH+xv8RAYwC5wNaBQ8GCQbPBZgEqwJ6ADX+2fx1/DT8OPwv/ZD+xf7b/Vj9r/zM+836QfrY+iT8R/6GAEwCdASEBsQGmAYxBvMECASeAgsBzwCSAKD/8f4M/xz/Cf5M/ab9Y/1P/OP7lvxi/Zb9av5gAA0CdgOIBEMFPQY4BrQEQQMJAtH/H/2q+/D7hfzR/Nb9/v+3AfcAc/94/8D+2/zC+0X7QPze/ET88v1QAGsAPAEYA9YDRAQVBCAE4gQIBJ4CxgJnAmYAlP7b/Vf9Wvy6+xf8ufz1/Dn9Qf4a/5f/CAFRAs0CwwN1BFEEGgQYA9QBGwGF/7n9C/0l/bP9Nf6+/lYA/AGTAW8AOQDS/1/+Ov2z/OL8AP2q+9j7hv0v/Xj9FwDhATsDyATHBTIH+wbHBCIErAMnAW7+Jv3f/Fn8UfuL+wD9yP3e/YX+lf9bABgBmgH5Ab0CGQOkAqgChwK+ATkBgwCj/1z/C/8L/5L/kP/E/2UA/P8e/+z+xv4I/oH9j/2U/Tb9dfxF/Oj86fwp/UP/cwHlAn0E+gXTBnMG7gTbA60CaABB/jX9+/yh/DP85vw0/qf+sv5C/wgAjADuAAoBKgGvAeABcAFhAX0BfwGhAQoBcADIAJQA+f8QABsAAgATAI3/yP6E/jj+t/1M/XH94P32/ZD9S/2s/Qn+BP7n/sEA/wHVAssDVwQfBHQDhgKRAYwAU/9+/k7+/v2e/f/9kf7m/gr/Kv/X/6MA1gAFAUABSgEvAXsA/f9RAIEA3QCKAcQBQQKWAhYCygEaAfH/bv9v/v38tvzl/Az9kP1Z/pL/hABVAO//BAB6/zz+qf06/s3+F//J/xoBKwIrAicCoQKGAscBRwHKAGUAz/8s/z7/Wv8N/xT/f//k/x0ACQAIABMA2/9P/yP/tf83AN4A2QF6AiMDaQOgAg4CaAH+/xz/Rv4Z/fP8aP3O/Z/+Yf82ABgB5ADg/1//xf5B/dv7y/vr/LH9+v2F/9wBjAJ8AiQDkQMpA0MChwGKAeIApP+d/8L/aP9F/1f/t/8CAIL/W/+r/0X/k/5h/t7+vP9+AHYBtQLMAzcEuwMrA4kC9ABj/2P+Vv3m/Pb8Nf0b/uj+RP/h/xoAcf+h/vn9KP0t/Mn7pfzd/ZP+yf/NAQEDFQN0A/ID2gMlA2QCNAKNATwA0f++/x7/CP9J/17/k/9a/z//nP9E/4f+g/4r/87/HgAAAb4CxwOxA3QDaAPNAiMBif/L/hn+O/3F/B/90f0d/m3+Lv+7/5L/CP+k/mb+d/2H/Bf9Rv6c/hv/2wBgAo4CkQJBA5MD/AI+AgYCzwHpAAYA3P/J/5P/ev9//7D/rf9d/0r/N//J/mD+d/4H/3f/DgBQAXwC6gIDAxAD/wI+AugACAB6/43+u/2C/bT95f0o/pv+Lv+K/4P/X/8a/6n+5/0z/V39Iv5a/u7+nADSAe0BJAK5Au0CUgKpAZ4BbgGsACcAMQAfAPr/zf+v/7T/ev8E/+L+6f6e/lr+iP4H/6b/dgCnAcwCUgODA4YD8QLnAdEAuP/u/kz+wf3i/VH+f/7s/kz/W/91/yn/VP7w/cP9Bv27/Lj9n/72/vn/agEqAjwCkgIUAwkDkQKJAngCzAEUAZgAKADG/3b/Hv///uj+r/6G/ov+ef58/qP+8P6Z/3IATwFNAvwCJQM0A9gC3gHMANn/Jf+3/nH+rf4z/5T/zv/E/5T/kv8T/yH+h/0t/ab8Afzw+/38C/5i/oT/TAH6AfgBuQKaA5sDOQM7AyUDSgIPATAAzf9e/8X+iP7D/t/+pP6U/gP/Vv8k/xP/xv+5AD4B0QHIAi0DzQJdAs4B4QDZ/w//5P73/gz/f/8JABkA8v+0/zP/r/4X/oD9B/3D/Iz8Lfwx/Dj9Bv5o/t//dQHzAYUCoQM+BEMEHATpAzMD7wGoAJj/q/4G/s797v01/nX+zP5E/8X/7f/j/ycApgAGAT4BxgFkAmIC9AGhAREBSQCu/23/jv+8/wkAeQChAG8ALACd/9/+Pf6Z/fT8ZfxJ/Dj8+PuP/PP9fv7h/mQAxwFLAt8C3wOcBJoEDgR9A3sC9wCO/4/+G/4D/vv9U/4S/2f/cf/C/x0ADQDG/9r/TgCKAKkAKwGSAUoB5gCvAD0Auf+w/wQAXQDDAD0BZgEMAX8A5v/x/vH9c/0k/Zr8a/za/NT8pPxW/Uj+g/4L/24ArgFHAv4CQQTzBJ8EDARvAzYCugBX/3f+Xf5j/mn+4v5r/37/hf+m/7v/h/9e/7n/KgBZANMAjwG6AVkBBgH/ALYATgBtAMQACgFTATsBzABJAGX/Vv5n/ev8+PzZ/K78Kf2q/X39N/2S/Uv+Z/6g/isAtQE4AgYDdQQKBXMEnQMEAwYCVwD6/rH+uP5w/n3+C/9//2//OP9i/3r/Lf8+/7T/FAB7ABABrQHwAboBkgF3ARABxQDXAA4BHAHuALgAWgCg/+L+L/6V/Yb9yf3S/fH9Uf5s/vT9Tv1j/ef9vf3f/Yv/DwGRAYsC7QNhBOIDTwPbAukBdgB9/07/Nf/t/u7+U/9t/xT/5f4A/xH/6v7g/nb/QwDEAE4B+gFCAgkCsQGSAVoByQCDALUAxwCHAFQAJwCs/9z+Qf4h/iP+Gf4u/mn+jf5y/sf9/vz9/GT9Tv22/Xf/LgH7AfYCZATvBGYEuAMZA/oBnACp/0L/C//Z/s7+0/7N/qD+bv57/sX+3P4E/8D/kwAqAZ0BHgJvAhwCdgE0Ad4AUQA7AFYAbwB3AEIA8v+I/+b+f/5a/jz+Ov41/ir+BP57/cv8XPxn/NP8Pf0x/iAAyQGhArwD1wTbBCwEcQOpAqYBegDA/67/mP9g/3X/qv+M/yP/2P73/iT/+/4W/9//rwApAYwBBgJCAuQBWwEdAcoAdwB3AIEAmQCvAHcABQB7/+7+bf7z/d79+f3T/cb9yf2A/fL8Z/yX/GL91f2v/oAAGgLlAoQDIwQvBG0DcwK7AfwALQCf/4f/vv/X/7D/qP+p/1//9P7S/vv+Ef8Y/5D/WAD3AGsBzAHzAdMBkwE+AekAuQCfAJgAywDNAGMA5P9j/9T+R/7c/e/9Qv5O/lT+cv5G/rf9Bf3e/IH9Kf62/vr/jAFKAnIC4AIeA3QClAE5AfIAZAAJADgAuADGAHsAYQBLAMn/BP+w/s/+3v65/v/+zP+TAPAASQHYAQ4CtAFXAS4B+wC5AJQAtwDBAGUA+/+z/03/yv6B/pf+tf6X/l7+If7I/Uj9tfyT/F39R/7E/ur/oAFiAnoC6wJIA+YCOwLaAagBLwGkAKcA8wDHAD0A7P+z/wv/Q/4F/if+O/5N/s3+yf+kABkBrgEwAgICiQFBAfsApwBwAHUArQCsAF4AHADS/0r/yf5//mf+ZP5F/iD+8/2L/Rz9tPx5/B/9QP7h/rr/TgFoApoCxAIWA/wCVQK0AYABRQG3AF4AjQCUACEAzP+2/1//2P6i/tL+9v7v/k7/KADZACUBfwHhAc4BSgEBAfcAyQCuAOUAKQElAc8AaQAFAGX/0P6s/qr+fP5O/ib+7f2F/Rb92fy8/AT9A/7v/nn/mQAAAlsCJgKAAssCPQKDAWQBaAHqAFkAbQCgAD4Axf/S/9P/U//f/vn+L/8Q/wf/hv8zAIQAsAAXAX0BYwEYAQ0BIwEYAQkBEAEJAdkAfgANAJf/Lf/S/pn+gf5u/lr+Ov78/cL9qv2C/Wb93f2w/hX/gv+SAFgBVwGLAR4CKQK2AY8BpQFSAc0AtAC6AGkALgA1AA8AoP9R/zX/GP8F/xn/U/+3/ygAcAChAN0AAQHnANEA/wApASgBPAFCARUB3gCGABIAr/9N/xb///7J/q3+sv51/in+E/7u/Yv9U/3L/V/+bP7n/iEAwADMAGkBNwJVAgoC+gEKAqkBFwHZALgAYgAcAAIA1v+V/1X/Fv8A/xb/B/8j/7L/JABJAJoACQE2ASwBPAFkAXABXwEyAesAtQBaAMj/gP9d/w7/6v7w/uX+6f7j/rv+hv47/u79jv07/Xj9+P01/u/+HADHAEIBFwKiAqcCgwJWAu8BOAGZAEQACADg//T/AADw/8v/j/9D/yb/Kf8q/1b/wf8vAHQA0gBGAXMBZwGHAYwBVAEhAQIBywB7AAQAp/+E/zf/5P7t/gj/9v7y/ur+zP51/vf9df0H/fb8ef38/WX+Z/+fAFgB5QGdAvECvwJjAgkCgQHXAEcADQAfADUAOQA+ACoA0f92/0L/H//9/vf+M/+4/y8AewDuAFcBawFtAYQBWQEFAd4AxACMAEIA8P+1/4H/I//3/hj/F//v/un+9f7I/jz+sP1R/fX8/PyM/Qf+u/4RACwBxwFtAvcC4wJoAvEBhwHsAFkAIgA3AE4AZgCCAG8AFwCs/2D/Iv8A/+H+8f5p/+7/NgCtADABVAFaAWMBWwExAe8AqwCAADoA/v/t/8//hv9X/3D/if9u/2X/jP9u///+ev4J/mn9q/ym/Hf99/15/ur/QgHGASACngKyAi4CcQEoAQUBpABoAKAA3wDJAHoAKwDt/3z/6/6r/uL++v76/l3/CwB6ALUAAwFiAXcBOgEdARIB2ACFAFAAFQD1/+X/vf+N/5n/xf+9/5j/n/+x/2T/2v5c/vn9R/2Q/L38kf0B/on+4/8aAYEBwQFBAmYCAAKGAXcBbQEGAbkA1gDZAIUAIwDT/57/S//y/uX+Lf9V/23/wv9MAKcAygD7ADMBNQECAcQAowCwAJYATwBOAIoAZwAFAPb/KgAlAMv/o//X/8n/If+n/oz+If5V/cv87Pxl/bf9MP5b/3kA7wA9Ab4BCgLjAZoBkgGVAT4B5QDJAK4AZwAMAMT/wf+2/2//Uv+E/7H/rf+o//H/agB/AHAAwAAgAQ0BzADRAAgB5gByAGcApABjAPz/CAAjAOb/jf90/33/Tf/V/n/+Kf6f/f78r/zn/HH98P2a/sz/zQBJAbUBUgKAAkgCCgLuAa8BQwHrALkAiwBQABkA2P/L/8L/jf9Q/1n/a/9F/zf/sP9NAGwAfgD2AFUBIAHgAPAACQHEAGUAdQCgAEYAxv/G/8z/i/9F/zj/S/9H//7+wf6K/gT+SP3H/NX8Tv26/Sf+Tv+YACkBbwEeAoMCOQLWAcMBpAEwAcUArgDFAKUAYwAwAC0AAQCi/17/ef9v/yP/Lv/T/1oAYwCTAAkBLAHeAMMA3gDbAJ4AegCWAKIAUgAJAAkA8P/B/7b/wP/K/+T/3/+x/1n/tP7R/eH8WPyK/BP9e/1f/rf/qAAeAa4BJgIZAs0BmgGLAUQB4QCqAK0AkgBjACsAAgDe/43/Pf9J/2z/Qv84/6X/OABtAH4AzQAVAegAuwDRANgApgCAAJEAogB6AEkAUAA+ABQADwAHAO3//f/h/2v/6P5f/qX9w/xI/Kr8Uv2Z/Xn+AADlACgBsgE3AjMC5AG3AboBjAE0AfQA7gDTAIcAGQDZ/6//W/8C//7+Kv8X/wT/ZP/t/wsALQCaAPwADwEPARkBJAEJAcwAtwCvAG0ACgDa/8H/uP+m/4f/lv+r/1X/zP5j/tn9HP1x/G/8E/29/Tn+S/+bAFkBmQEAAmUCTgLwAcwB7wHBAWkBKgEKAcUAZgD2/8X/pP9g/yP/Lf9Q/0H/Q/+n/zIAVQByAMMA/wD1AP0A9gDyAOMAjgBIAEkAGACj/4r/o/+1/6n/tP/g//X/if8G/7f+JP5D/Zn8nvwO/ZL9AP70/hYAsAD7AJEBDwL9AeMBAQIVArkBZQE9AQUBoABRABsA4v+d/1b/S/9W/0D/Kv9o/9b/KwBdAKYA/AAXAfwA6wDFAIkAXQA/ADUAUQBhAFQAWABdAFMARgArAAwACADw/6T/Qf+4/hb+W/25/JL87/xI/bb9rP6y/1oA5wCcARcCQQI4AikCBQK3AUIB5QCuAF0A+f+o/33/W/8m//P+Ev88/zb/YP/h/0gAcgCrAP8AGwHyALcAmQCXAIgAlwDpAP0AtACbAI4ATgAOAOj/wv+s/6v/kP9J/+r+dv7S/S/9zfzk/Df9ev1D/pf/bwDcAJsBKwJYAlcCNAIYAtYBRQHUALUAhwAaALj/pv+G/yH/5f4S/zj/LP9A/8D/MgBJAIEA6wANAe4A3wDpAOUA0wDuABYB+gCwAGIACACz/3v/Xv9N/1T/bf9V///+rv43/oL93/yk/O/8bP3J/af+BADzAFIB5gGEApYCSgIhAigC5gFUAd4A1ACkAB0AxP/G/4z/Iv8A/0f/jf+G/6r/OwCmAJsAowDaAPIAxgClAMcA8ADEAKMAxAC8AE4A5/+5/4f/QP8r/1H/df9s/y3/4v5l/pn9xPxw/Kj8Fv1x/T3+n/+hAB8BuQFvAqkCcQJBAk0CHwKgAVEBNQHxAHIA6/+Z/13//v7a/hb/a/+D/53/CwCOAJIAhwDVAOcAoQCJAKIAlgCGAHsAdABfADMA4v+h/47/jv+Y/8z/DwA3ACIAyP9Y/6v+uP3P/IX8zPw5/Yr9bP7C/5QA/QCmAUwCVwIvAjACXQISAokBTwE5Ab0AGwCv/27/H/+4/rz+GP9E/yr/bf/7/1wAZwCpAA0BJAHrAOMA+ADNAIkAYgBrAGUAHgC7/7T/sf+D/3j/uv/f/83/nv99/y//Zf6P/Q799Pw3/Zb96P3g/gcAigD4AMYBUgJeAkkCWgKBAiUCogF7AUsBrQAIAJL/SP/m/n/+hP7X/vL++v5f/wIAdwCbANwARAFpATYBHQEaAeEAjgBVAEcARQAZAMj/oP+E/1v/Rf9k/4v/k/9y/0r/9v5N/pz9K/0k/Wf9m/3b/bj+mf8VAKYAgAEvAnsCgwKeAr8CcgL3AZ4BVgHdACoAfv8u//b+nf58/q3+0v7j/jv/yP8wAHIAxwASAT8BUgFQAT8BJQEGAdEAkgB7AFcAAgDI/6P/fv9l/1L/Vv+E/4X/XP8O/5v+Ev6A/UP9mP3t/QX+mf53/w4AfAAXAaUB8gHmAcsB5gHUAXQBLwEgAeEAcgD8/7P/iv9N/xz/N/9k/3T/l//j/0oAhwCLAKcA1wDMALIAuQDJAM8ArQCXALIApABNABIAAADw/9z/yf/d/xQACQDC/4v/If92/q39LP0z/W39a/22/Y3+Sf+q/w8AuQA8AUwBNwF6AaEBYQELAfEA2ABxAOH/mf+O/17/Qf9y/8j/5//p/xoAiwDLALMAvAAjAVUBDwHlACQBUAEfAeYA3gDAAFMA5P/F/7//kf9m/3H/rv+7/1//Af/R/kv+Zf3j/Ar9V/1a/bv90v7D/xUAeABCAdoB8gHnAS4CUALkAWsBUgExAaoAAACq/5j/Sv/1/h//hv+b/4b/zf9tALAAiQDCAD8BWQEzATABPQEYAbYAewCJAFsA4v+k/5r/eP88/yf/Yf+O/1//Kf8S/6j+zv0V/fX8L/1L/Wn9GP4Y/8f/OADfAJcBAwIbAjQCewJ8AiAC2wG2AVIBwgAtAMn/f/8h//X+K/9e/2P/fv/O/zoAbgB3AK8A/QACAeEA1QDZANYAqgB+AH0AUQDO/3//Zv9A/xz/I/9S/47/j/9f/zX/yv4k/ob9Sv10/a39xP1S/kj/5v82AMUAcAHSAecB9gExAi8C6gGmAXgBKwG4ACsAvP+K/0//Ef8V/0//W/9c/6X/PgCkAK4AzQAjATIB7gDOANsAyQB4AEEAXQBUAND/hv+a/57/av9Q/4b/3P/U/5P/iP9M/53+yf1+/bn98P3R/Tz+M//L//7/dQA0AawB0AHjAT4CWgIGAqIBbwErAa0ABACN/2n/Gf+3/rD+6P7u/vP+Uv/y/1UAbwCrABABPwEgAfAAAAEuAQEBqQC4ANAAbQD2/9T/0/+W/0b/RP+I/4n/Pf/z/rL+SP6T/QH9J/2Z/aH91v24/pT/8P9IAP0AuAEAAv0BPAKUAncC7QGFAWoBGAFPAKr/gP9O/9/+ov7T/hX/HP82/7n/SQCAAJMA5gBEAUIBBgH8ACcBEQHFAK0AvgCEAAwAvf+n/5n/av9U/43/yP+o/2P/MP/l/k7+mv1N/YT9sP2v/R3+9/6X/+7/awAJAYoBvwHNAQgCRQIQAqABZAFFAfYAbgD//97/t/9O/xz/Vf+I/3r/h//r/1YAeACGAOEATgFVASABPgFxATUBwwCWAIgARwDh/53/o/+l/2P/Mv9T/0L/1v5v/jn+8f1y/Qv9OP2X/Zr93v3C/or/7f9fAPcAowEbAjMCSwKNAnUCAAKQAT4B5gBfAMj/b/9N/wn/x/7O/vH+Bv8k/2n/0P8yAGAAnAAQAVUBPwE4AV4BZgE8AQIB1QCzAIMAMwALABUACADe/8X/sP9y/wr/j/4k/rn9Qf0N/UP9ef2a/RX+zv5b/9T/ZQD9AJAB9AEZAkkCbwI7AtsBjQFAAdoAXgDw/7z/lP9N/yL/Iv8m/yX/MP9d/7r/EgBQAKQA/wApASwBMwFDAUMBGQHeALsAqABzADQAHwAdAPv/wf+K/1H/Af+K/iD+4P2T/Ub9VP2b/c/9IP66/l//6P9UAMkAcAEBAjACQgJuAmcCEgKlAUEB+QClACAAuP+X/2r/I////gH/If9M/3P/yf9EAIUAowDcAPgA2QC2AKgAswCwAHsAVgBpAFgAIAATABYA8//E/6b/g/8+/93+h/5V/g7+pv1//bP91f3Y/ST+vf5T/8P/KQDGAIYB8gECAiICQwIaAsMBcgE3AQkBsQBFAAYA3f+a/1//TP9P/1P/Yf+L/9P/FQA+AGwAkgCNAHkAeACEAIsAjQCbALYAwgCyALAAxgDEAJIAYAA7AAIApP8///D+t/5p/gH+xf3P/ef97f0P/mn+0v4h/3b/8f9xAMIA6gAOASgBFwH7AAQBCwHkALoAswCkAG4AOQApACkABADQ/87/9/8OAA0AFgAkABoA///q//T/BQAPACcAWwCSALsA5wAYATwBRwE7AR0B8gCvAFwAEwDf/5z/Pf/r/sT+ov5z/lv+dP6Z/p7+rf7v/kb/d/+N/67/0//a/9H/1//x/wgAHQA+AF8AdACCAJQAnACOAHcAbABmAFMAOAAiAA0A6/++/5b/fP9z/3n/jP+l/8n/BwBXAJ8A1QAPAUcBXQFNAS4BEwHyAMAAgwBKABYA5/+2/4b/Xf9I/0n/UP9Z/3H/nP/M/+X/4v/S/7z/mv9q/0D/Mv8z/zT/Pf9X/3z/nv+9/+D/AwAgADsAXQB7AIgAhAB6AGcARAAZAPb/2v/E/7n/xf/w/ysAaQCqAOkAGwEwASsBEQHrALwAgABDABEA6f/D/57/hf96/3b/bv9q/3T/h/+a/6b/sf+7/7//tP+k/5n/l/+d/6n/uP/G/9P/4f/y//r//P8CABYALAA2ADwASABPADwAEgDo/8f/o/98/2r/d/+U/7b/6v8uAG8AoADHAO8ABgEFAfgA6wDUAKYAcwBFABkA4P+r/4j/d/9t/27/gf+e/7v/0//i/+r/6P/Z/8X/t/+4/7//yv/Z//H/AQAAAPb/8P/s/9//2P/m//3/DQAWABwAFQD4/83/o/+B/2L/Tv9U/3T/mv/F//n/MgBhAIMAngC9ANYA4QDgAN4A1AC6AJEAawBNAC8AEQAAAAUAEAAUABsALAAyACIACQDy/9b/sf+Q/4v/lv+Z/5v/rP/B/8L/s/+u/7b/vf/B/9X/+P8aAC0ANAAyAB8A9P+//4v/Xv8+/yv/Lv9J/3D/nP/M//7/LgBPAGYAewCOAJIAiQCAAHEAVwA4AB8AEAAFAAMAFAAvAEsAYwB+AJIAkwCGAHUAZABHACQACQD7/+3/2//Q/9D/0f/M/8v/1f/l//L/AAAUACgALwAqABsABQDk/7n/j/9u/1X/RP9A/0r/Xf9z/4z/qf/I/+L/9/8MAB0AKgAwADAALAAjABkADwALAAwAFAAhADcATwBnAHkAhwCMAIoAfQBuAFwARQAwABsADgABAPX/6v/j/97/2f/Y/93/6f/6/wwAHgAwADgAOgAxACIADADy/9X/t/+g/5H/iP+H/5P/qf/B/9b/6v8AAAoAAgD3//D/4//M/7b/rf+t/6f/pv+1/9D/5//+/yAASABpAHwAjACbAJ0AjwB9AGoAUgA5ACMAEAD9/+v/3v/V/83/y//O/9r/6//6/w0AIwAzADQALgAmABcA/P/e/8v/x//F/8X/0v/u/wQADAANABUAFwAFAO7/4//b/8X/q/+c/5X/jP+B/4P/mv+x/8z/7f8ZAEEAWgBsAHwAhAB6AG0AZABdAE8AQgA7ADUAJgASAAUA///2/+3/8/8FABUAHwAqADgAOgArABgACgD2/9z/yf/E/8v/0f/c//L/CgAXABoAHwAiAB4AEwAHAP//9P/f/8X/sP+d/4j/eP93/3//jf+f/7v/2//1/wMAEQAfAB8AIAAeACIAHwAaABcAEAAAAPX/7//g/9D/1v/b//f/HQAtADsASgBTAEgAPAAmABMA/v/c/+L/3//p/xAAAACgAPQAoAC+AJMAdAB1AFcAUQBRACcA8f/b/6r/av9F/zP/Xv+P/5z/0P/U/5P/gv+X/33/lP+4/8P/9f8iACcADwABAPD/0v/b/+b/1v/f//7/FwAjAEoAdQCBAIQAmQCUAIcAkgBwACsA+f/z/93/x//d/+j/+v8hABYABgAOAAQA6v/W/+v/7//y/xQAAADY/9v/0/++/6j/pf/I//f/GAAVABEACwDP/67/wP+7/9r/HAA9ABgAJwBWAD4AHwANACoAKwAEAAcABAD9//X/6/8FAAAA0v/G/9//uv+Q/5P/h/9v/1//eP+S/8f/DwAqAFwAiACKAIwAawBYAIUAcwBAAHEAigBNAEIAPgAJANj/7P8LAO//DAAgAB4AQADy/7f/0f/j/wIA9P8UAAkA2//m/5r/VP91/6H/rv/+/2AAAgDC/0UA6v+G/8v/yv/b/+3/vf/N/+3/zv///yEA9//u/xoAHgASAB4ATADAAJAA7f8nAC8Aq//p/w0A9P9bAI0AcQCGAIMAKgBGAGwADwAkAD4A/P8KAPz///9FACIACAACAAMAvP+Q/5L/Nf9X/7v/sP+I/7T/yf/p/yMABADa/7b/dv+H/6H/rP/m/xYAHAAUABsAzP/x/wEAV/86/23/NP97/xwA/v8HAFkAJgD8/zYASgCHAKMAjACrALMAtwDMAOcA7AD/ANEAUwBZAGQANgB4AH0AMwD8/+H/6P/w/8r/lf+b/7f/m/+o/xwAYgCIAIAAkAB9AP7/zP90//j+CP8Y/yX/eP+1/3r/Pv+J/5H/If8A/wP//v7l/p/+hf6y/uL+0v4l/8f/9//4/1MAqADCAAYBYQGGAYABpwHzAR8CAwLGAWUBBAHAAIUATQBBAG4AVQAvAC4AEgDX/8D/nP9d/5P/3/+0/8j/KQAzAHwAvQC1AL4AeQBcAGYAHwDj/83/xv+p/4D/Uv8i/zf/9f5O/ln+c/7Q/Y394v3B/Z/9DP49/oj+/f4n/33/xP/i/1oA+gBUAbMBAAI3Aj0CMwI9AgoCzwGvAZsBUQHlALYApgAEAGf/L//r/sb+sv7F/g//R/9C/5D/5P/z/zQAlwDgAAcBLwFdAY4BrwHnAQMCAwL5AbYBUwESAbkAFAC//3T/AP/K/q/+lP5X/u79pv1u/Wf9PP0C/Sr9Gv0A/Vv9y/3r/Sj+vv5j/6T/HQDdACwBZgGyAQwCUwKWArwCswKpAncCBgKgATABwQCYAGkAIwDU/63/jP9C/x7/B//B/rr+A/9A/4f/zf9JAL0A9QBVAcAB4AH5AS0CLgIsAjoCGALdAcgBdwERAdcAZADO/yz/jf4c/tb9j/1O/f38qvyg/LL8hfxx/I/8iPyt/B79Z/2k/TD+uP5H/xMAtgA9AQ4CsALyAmwD0QPJA8IDwgNkAxcDvQLZATUBxwD1/0X/4/5P/s79rf2l/ar94v0R/j/+df6c/gP/cv/q/5YAKAG+AVMCrAIJA1oDWwNqA4EDOwPRAq0CfwLcAVIB9wBGAH//4f5U/uX9Yf3Z/KD8Yfzp+7r75fur+4D7xvvr+zP8kPzi/KD9T/6t/nb/fgBbAUcCIwO4Az4EqAS0BKYEZQTWA1ADsgLvAVMBtQAUAKL/Mv+1/kn++f2o/XX9UP0a/Tb9lP3T/Tf+3/6I/yUA1QCRASQCwAJWA64D6wP/AwQE+wO2A0wD3AJ/AgwCbQHSACsAif/V/in+v/02/a38ffw3/PH71/vQ+wL8MPxc/Jf80vwg/VP9iv3y/WX+5/5w/x0A4QCOATUC4QJrA6gDvQPzA9IDYwMgA64C+gFaAcoAHgCW/yr/m/41/gD+sP2N/Zb9fP1+/c79MP6Q/iP/xf9dAPwAsgFZAtICTQOyA+IDAQQWBP0DwwN8Aw8DmQISAlsBswAjAID/7v6R/h/+nf1p/SP9zfyt/H/8Zfx//Ib8mfzh/BL9Jf1u/av9wf3y/Tv+mv7x/l3/CQCaAPwAnwFQArECAwNsA5MDbAMrA9YCUgLAASoBiQALAI7/AP+n/mH+Av7E/cD9p/2Q/dP9I/54/g7/uf9vACgB2AGKAicDkAPZAyQEPwQfBPcDwwNrA98CRwLMAT0BlAAVAKf/LP/A/ln+Av7N/Xj9PP0y/RL9/Pwo/WT9kf3R/Rb+Pf5E/jT+HP4J/sz9kP1v/Wj9Yv2c/Tj+w/5f/zYA+gCRASwCvgIVA0MDYwNDA+8CawLtAVcBsAA1AMD/T//w/r7+nP6T/rD+0/4F/0L/c//E/y8AkAD1AIoB/AFEArQCFANKA2oDhAOBA1IDAQOoAk0CzgFBAdAAUgC//zn/2/6D/iT+9P3h/bf9nP22/b79uv30/SP+HP45/jf+3/2C/Sv9w/xX/BH8//s0/HH81/yk/Xn+Vf9CADwBHQLCAk4DxwP/A9QDqgNzA8wCKgK0AREBZQDi/4H/EP/Q/rr+pv6q/rL+6f4e/zv/af/L/0YAlQAHAawBQgKxAh4DlQPHA7gDuQOhAyoDmQJFAs4BGQGSACMAo/87/+j+if5A/j3+Kf4T/jD+Y/6S/sT+AP8a/yL/JP/6/pP+G/6f/Q79Yvzo+4v7M/so+2f7vPs1/Av98/3G/r3/wACYAUoC8wJ4A64DywPMA4UDKQPSAlkCzAFtARYBpQBWABcAv/95/1T/Mf8D/wr/O/9c/5X//f9sAM0AQQG/AQ0CTQKZAsQCsgKZAowCUAL2AbUBdwEaAdgAtQBrACQABQDb/4z/Vf85/wH/3f7Y/sn+rv6v/rH+h/5a/jH+5/2D/SH9rfwP/IT7LvvH+nP6fPrU+jr77vv7/Bb+O/96AK4BmwJlAxEEgwSdBIYEWQTzA2ID1gJOArIBNAHTAG0AEADO/5X/bP9O/yj/FP8p/z//Wv+p/xIAYwDBAEIBqgHqAUACrALiAvMCFQMkA+YClQJdAgACdwEbAdAAXQDv/8D/hv8j/+/+zf6T/ln+VP5H/in+I/4i/gL+1v21/Yj9Rv30/Jz8Nvzs+477TPtI+1v7c/vT+4r8G/3G/c3+wf9vADkBJQKyAgEDfQPNA8cDswO/A58DQwMDA8wCZgLnAYUBNQG2AEoACADC/37/TP9Q/1D/Y/+Z/+3/UgCxACcBlQH1AUwCfgKsArwCqAKOAnMCSgL0AcABhQEaAb4AdAAbAKj/Xv8i/7n+a/5D/gb+uv2L/XL9K/36/PD82/y9/MD83/zt/Pv8K/1T/XD9dP2M/ab9lf2L/aT92v3r/SX+qP4n/5j/RgAZAbABMQLhAngDqAPFA/gD8QOGAzID5QJbArwBUAH0AHIADgDh/7//nP+e/8n//P8wAH0A0QAkAXEBvgEEAkMCcQKOAqwCswKcAnsCTQIDAqsBUwHyAIwAMADQ/2r/E/+5/k/+7f2c/UD94Pyh/Hf8Tfw5/FL8fvyv/P78Z/3N/S/+mv79/kD/Zf93/2f/LP/U/n3+HP6y/Wb9Sf1I/Wz91v11/iP/8//oAM4BkAJIA98DKgRFBD8ECwSjAy0DtAI1ArABQgH1ALkAhAB0AIoAnwC2AOoAMAFbAXwBrgHVAdEBxwHKAbkBigFtAWEBPAEOAfgA5AC2AIgAYwAqANv/h/8p/7v+Qf7P/WX9Av2r/HT8VPxJ/F38lfze/Dv9sv0z/qf+GP+L/9j/AwAaABgA4P+Q/zv/0/5c/vz9uv2G/XX9kv3Z/Tj+u/5a/wYArwBTAeUBYAK7AvECCQMLA/cCywKhAoACXQI8AjYCPwJCAkQCWAJmAlgCQQIxAg8C1AGbAXABOQH2AMgApgB6AE0ALgASAOn/w/+n/4T/Xv8//yX///7c/sD+mv5x/lH+NP4G/uP9zv22/Zz9m/2k/a79yP32/SX+Vv6T/s3++v4f/z3/R/9D/zT/Hf/5/tj+vv6s/qn+vf7j/h3/bP/E/yUAhQDoAEABhwHJAQQCKgJEAmQCfgKMApwCtALCAsICwgK9AqYCfgJUAiQC5AGlAWwBNAEBAdkAuwCpAJ8AmQCbAJsAlACCAGgAQAAFAMP/gP8y/+n+sP6C/lz+TP5U/l3+cv6W/rP+xP7X/uD+0f64/p/+ev5K/ib+C/7y/ej99P0N/jD+Y/6i/uD+G/9P/3v/mf+m/6X/m/+M/3r/bP9v/3//mv/K/wwAXACxABEBdwHVASoCegLBAvICEgMlAygDEQPuAsEChQI8AvIBrAFiAR8B6wDCAKAAjQCHAIQAhQCGAIEAcgBbADcABgDM/4//S/8L/9b+q/6M/n/+g/6Q/qj+x/7l/gD/Ff8f/xv/Df/1/tP+rf6H/mT+S/49/j/+Uf5y/qD+3P4b/1j/kf/C/+T/+P/+//f/5//S/8D/s/+u/7b/y//w/x8AWwCdAOAAJgFqAagB3gEOAjUCVAJpAncCewJ0AmICSgInAv0BzgGdAWsBNwEIAdsAsgCNAGsASwArAAwA7P/M/6n/h/9m/0X/Kf8S/wD/8/7u/u/+9P79/gn/Ev8a/xz/Gf8Q//3+4/7C/qH+fP5a/j7+K/4h/iX+Mv5M/m/+mv7M/v/+NP9n/5X/vv/i//7/FwAsAEIAVwBuAIkArADQAPQAHQFDAWABdgGDAYkBgwF2AWUBVAFAAS0BJAElASsBOQFPAWYBewGLAZYBlgGHAWwBRgEVAd0AnwBjACgA8f/C/5//g/9y/2j/Zf9m/2f/af9p/2P/WP9H/zL/F//6/tz+wP6m/pD+f/51/nT+eP6E/pf+r/7I/uH+/f4T/yb/NP9B/0v/Uv9Z/2X/dv+K/6f/y//0/yMAVgCIALcA4gAHASQBOgFHAUoBQQEzAR4BAwHkAMoAtQCkAJ4ApgC2AMoA5wAHASIBNwFKAVMBSwE5AR8B+ADHAJIAYAArAPv/1v+9/6v/o/+m/67/tP+4/77/uP+n/5D/cv9L/x7/8/7K/qD+e/5l/lb+Sv5N/lz+a/59/pv+vf7X/vP+Ff8u/0L/Wv9w/33/jf+j/7T/wv/d//r/EwA1AF0AgwCnAM4A9QAUASoBQgFPAU4BRgE6ASQBBwHrANAAswCaAIoAgAB8AIEAjwCiALkA0ADqAAABCgENAQUB8QDOAKQAeQBGABEA5//G/6j/lf+R/5b/l/+e/6n/qv+h/5T/g/9k/z//Gf/2/s7+r/6b/oz+hf6K/pj+qf69/tn+7P79/g3/Gf8f/yL/J/8v/zX/QP9S/2f/gv+j/8r/8/8cAEgAdACfAMUA6QAHASABMAE6AT8BOgEvASABDAH1AN4AyAC0AKEAlwCRAIwAjACRAJMAlgCYAJkAlgCOAIYAfABxAGQAWABJADoAKgAaAAUA8v/a/8P/qv+T/33/Z/9T/z7/Mf8g/xP/B////vj+8P7x/vL+8/76/gX/EP8b/yz/Pf9I/1b/Yv9t/3P/fP+I/5H/nP+u/8X/3f/2/xgAOABVAHIAjQCiAK0AvADIAMsA0ADWAN0A3QDlAO4A8QD1APoA/gD0AOoA4QDLALUAmwCJAHEAWwBTAEgAQgBAAEEARQA+AEEAOAApABgA///o/8L/p/+N/3D/Xf9J/0P/Of81/zv/OP8+/0H/SP9O/0z/V/9V/1L/Tv9H/0P/Mf8s/yb/HP8d/xr/JP8o/zP/Sv9W/3P/jf+t/9D/6/8bADkAWAB6AJcAswC3AMwA2ADUANMA0ADVAMwAygDWANgA2QDcAOcA5QDZANYAywC5AKMAmQCGAG0AYgBZAE0AQgBDAEgAPgA+AEIAQQAvACEAHwADAOf/1v/E/6j/i/+E/3L/X/9a/1j/Wf9T/13/Zv9j/2r/cf9x/2//Z/9m/1T/RP9A/zH/Lv8u/0P/W/9h/3z/mP+h/6P/rP/E/8T/yf/u/x0AJgA3AHEAgQByAG0AcwBuAEwAWAB1AHQAfwCZALcArACpALUArgCmAI8AlQCgAIAAjACbAJYAigCKAJ4AgwBxAGQAWAAuAPb/9f/e/7v/tv+9/7v/kf+d/43/cv9s/13/X/9z/6X/pf+f/+L/5v/h/8f/dQB8ARIBEQFXAYQAA/+r/bn87fui+//7tfzk/Tz/WQBHAdwB/gGmAWAB4wAuAOr/MgB0AMQAbAGzAZgBmQFUAW4AkP8U/4/+6/30/Yr+7P6v/90ArwESAmsChQIHApEBMwGlAGsAtwD7AAABUQFwAf4AfQDv/yr/ff5W/lf+g/4M/7b/JABrAJQAVQC4/1f/J//l/gD/Wf/D/xwAZABmAA8Amv9A/9j+gP6L/rH+Af9m/8v/+v8AABwABgDR/8X/q/+b/8T/+/8eADUAbQB2AF8AXABEABoAFQAaAAgADwAqAE8AXgBuAIMAagBbAGAAPwBKAG4AfACjAMUA2wDKALYApACTAHMATwBWAGIAXQBXADgADgDr/9r/wv+m/8P/8/8PAAgAAADu/87/kf9P/0v/W/9O/0n/if+W/2D/bP9w/zn/Ef8Q/yX/Kv9P/4n/sv/q//v/HAB2AJsAlQCuAMYAfgATAO//sP9w/4D/rP/D/+n/RAB0AH4AqQDIALkAfABjAIEAYAAnAFYAswDiAOgA8wDsAJMAMADa/5P/Zf9Q/2//vv8eAFwAegCXAKIAYgALAKH/Jv+q/l7+UP5P/mj+qP4N/3L/tf+z/7f/m/9J/yH/Y/++/zgA9QCsATcCnQKtAmQCFwLAAUcBAAEyAVkBVwFRARgBmAAUAGv/of4U/tz9zP0F/l3+o/4t/9v/ZgDNAHIBGgJcAkEC+gGLAQABZgDs/3b/DP++/m3+Bv5j/bD86vtV+wr7t/op+o/55vhz+BT55voM/en/QAR5CO8KYAwoDUgMYwq5CLcGUgSZAlcBuv/n/U/82/rW+RT5f/iz+Kb5qvrJ+0b9oP7H/00B3QLQA6UEdgW3BSoFHgTGAkMBGwCL/4L/l/+R/+3/RAC8/8z+QP76/fX9Df/pADACQANbBN0EagQ0A4wBjv/D/Sv8yPq4+dn47PeP9z/3sfa39Y/1APig+2H/vgPACCYMzg11DkUNiwowCOAFWwMpAef+/fyK+/n5Y/gN+Fz4wfgX+uv7gf1M//YAFAJtAxcFGgaOBgIHIwe/Br0FzAOqAQ0Ajf43/X78Tvyt/Nn9+f5s/+r/fwDHAAABSQGWAQgCnQLDAkECoAHMAHb/qv3d+7v6+/nf+O/3TPcQ9yP3O/fX9i32h/hg/ewBLAbJClkOSxBXEAsOPArlBiYEIwHN/nP87fpq+sD52/j0+OL59PpZ/LX91/4EAP8ALgGrAZICqQOXBBYFVwWGBTMFbAMwAWT/AP5N/Wv9sP2D/j8A+AHGAn4C5wFeAfAAkQCJAO4AhwFxAgADgQJyAXEA2P60/NL6hPlO+Dj3P/aP9aP14/XZ9oz3Ifpa/7sEhAlaDe0PQhAtD1cM4gebA6gA8/3O+wf6evjz96n38vfR+Kj60Px9/7QBwwI+A/wCXAJfAbgAjwBdASoCyAKGAxcE8AO3AkUBvv8Y/wL/cf9tAL4BJgM5BBYEnQI7ARgAg/9+//n/nAB5ASQCiAHS/7r9kfuj+az4h/iH+LX4nPia+KH4P/gY+JT4KPyhAQgHMQyEEJkSoxG1DgcKggTB/zr8WvnU97P2PPZi9sD2L/iC+mf9agCEA3gFJAaYBYUDCwFP/2r+/f3Q/mIA2QHpAusCQwIPAY3/iv50/vz+RgDnAWUDgQTqBG0EMwMlAjoBtQAEAXgBtwHgAYABfgAw/4f9y/uf+gL6o/kP+dn4EPi59/b39vdN+Kz4bfwsAt8HwAxrEHoSrBGxDqIJ2APC/iH7afjj9h/2bvZC9x/44fk3/Pz+sQE9BG8FvQW+BEQCc/8B/Qr8F/wk/e3+JwEjA+EDSwM6AvIAxP9N/2P/1gDGAlQEVAVmBWMEhQKVAD7/Mf+v/6MAbAEiAjAC9ABa/1r9iPtf+u35FfrD+Rz5bPgp96j2afYz93n4P/w9A+AJXA9FE8cUDxM9D+YJCATI/iL7Xvh39lD1kvTW9O31SPjO+/b/5wMCB1cIXgf5BBoB+fwN+iX5NvoS/MP+owEJBAgFbQQRA+oBAAHkABoBjwG+AqIDFgSWA0ICrwBN/73+Ev/1/yEB2gFyAoUCYQGa/6v99Pup+tL5sPlD+V/4kffK9hD3IPdJ+L75jfwJA+8I9Q0EEu8TJBOhD1EKUATQ/hf7Mfgz9or1PvXy9Rv3Lvlo/E8ASAR7B/MIUggLBgsCX/2/+ej3Y/ge+vr8VgAXA6QEkQSiA1MCpAGNAZsBkAGqAtIDQQQJBMkCfQHR/5P+JP7f/sr/oACBAdsBWgEfAAL/m/1W/Mj7r/sr+y762fjb9yT3ZPfT92n5UfuF/n8EvAnFDfcPbxHNDxkMFgd5Ae/8zfl59wj2Ovba9gD5KfsD/jMB4ATmB0oJpgiaBcQBUv36+Bf2x/XO9xL7l/7eAW0EywXABfMEAQTzAgwCswGMAQwCMwIhApQBSQD9/hX++f2P/gsAjAELA5YDCQNdAV7/xv0t/DT7g/pR+uL5p/h+94n2BffE9135Vfso/sUDDgkmDVYPaRD6D2kNKAiWAhX+lvoP+CH2IvbJ9gH5qfum/vkBUwVWCNQJ7ghiBf8A4Psb98nzw/IW9fH4kf0xAskFEAiACAMI4Aa8Ba8ERQMiAtEBbAHZAD8ATP99/vz9E/4j/uf+VQDJAfkCwwLDAZYAXf/n/Uz88Po1+s754vjD97b2mPb391/58/p9/N8AxwYdCxgOnA9MEF0OswnzA9/+mfp6+OH2ofa29xb6If1y/2sC3wRsB8wIQQhFBeUARfyf9wH0NvK586j3YPyPAQYGNgnVCk4KhwijBgAEqAEDAAf/Tf+P/87/dv+f/uj9cv1i/QX+N//rAOwCEARhBGgDvwEaAMT95ft7+nH51fjj94v3S/cA+Eb5jvpm/Hn+NAM9CEkL5wwiDnsNzQniBEIAqvyt+Yj4Gvgg+R77Pv1M/0IBbgObBaYHzgdQBvwCuv4Q+oP17vK+8nn1PfqI/+YEXwkEDEwMrArsB5QF4gJtANz+B/6I/oT+Nv6H/Wz95P1x/uP+5ACWAy8F9AW/BKwDvQGm/5b9M/uJ+dH4MfgF90D2Y/Zw99H4mPo1/Af+JgGxBW4IVAopDC0N7Qv3B5ADi/8d/G753feD93r5Efz6/m4BsQPVBSwHYAe1BacCcf44+vT12/L38UD0cPlQ/wQF6glWDcMOhg0hChgG9wF4/mH8/fre+gT8T/1J/pv+aP+FAFEBWwLQA54EFQYyBhwEmgHd/n/9k/uI+XT4N/hh+FD4zPd0+FT5u/re/Gr+vv/PAZsF6Ad1CTcKvwq4CbMG2ALm/hD8Gfov+Tn5q/qL/Oz+DQHfAiUEAAUtBcUDxADv/Fr5UvbN9Hn0Ifds/CECkAdNC80NOQ5dDK0IkQTMADr9Dvsz+n/6rvtD/Yz+q//LAB0CLQMSBfIGOwdvBlgEoAIiAE/9Q/uT+Yn4zveK92n3qPdk+Ar6bfsg/TT+Ef+z/2UBIwTkBe8HCQk/CtoI/AV6Arj+mvtY+dj4Cfo8/Eb+2QCbAjAEpwSMBIUDggF2/tD61ved9SD1PfYh+tD/KQVECdALDA0uDDMJ8gQFAe79Ffu0+Vf5jfrz/F3/hAHPAu0DxwT1BFYFUAWdBNsDjQJxAU//JP2c+8D5XfiT95f3fPj8+OP5rvum/Oz9qv6K/vf/VQMvBlwIcQljCggKFAfWA4X/YvyM+qP5xfkq+zT9fP8nASYCZAOgAzQD5QG0/4v8dvm39pT10/UQ+OP8eQKTB8YKogyvDIoKzwakAuL+nPsd+tz5jvqA/LL+4AA3AlwDPwSaBPkEeAVKBTsE7AKIAQwA7P3z+0P6Hfne+Ev5JPoH+1L8n/2K/pv+Ff7W/ab9x/yk/B7/DQJuBJoGRAjVCMQH1gSSAe3+kP1M/S/9av0N/if/2P9IAE0AjgDAAA4ACf5p+3D5jvh4+E35m/yTAW0G2gmTC9UL+wmxBnUCbf7R+zr69/mK+vf7L/5lAPgBZANeBCkFhQVKBecE3gPPAv0BsAC+/sv8GfsA+sz4Yfgt+Xv6Sfx5/lwAnQElAXD/Cf7k+yP5qvls/d0AZwSfB40K0wpYCOEEGAHM/gD+uv1y/dT9dv4H/3P+iP2Y/Tv+k/72/f78P/z/+6X7qfsi/ScAQwQdB64IVwmICFMGcwK6/g/8tvpy+/78Gv4OAOsBmAMnBL4DXwQyBKYDDANjAvQBgwHTAK//iP3l+xn7z/lA+bn5lvv1/d//DwILA6sCWAGE/l378fer9SD1Svgt/UkCUQf3Cm0MzgoDCG0EmAG9/wf/lf6c/nT+tv05/Nf6h/r/+k38Vf2d/v/+Mf9E/8j+f//4AEkD/AWVB1YIRQfzBJACJv8r/IL6j/pA/P/9eQArA98E8AWkBRsEfwLxAK//Wf9//xcA3AClALn/Av6j/Nj7Yvv8+339dP9aAWICdQJ+AXb/+/xj+vH3nvZJ9hn3qvtJAGQE8wcRCi0LogkHB00E/QGWAHn/Lf4s/df76PoB+lf5yflW+7D9if/sAEYBWgGcARQBiACGAO0B5wNwBHkEegSrA0ACoQDN/rz9r/0P/9IA4gEqA/oDzwNZAnwAO/+6/j//WQAoAUsBRwGCAEL/xv0r/bP9Tf5R/yEAuAAhAegA+P/k/qb95Pxn+3z5qfj+95b3jve4+pD/wwMHB8cJSQs1CkYI1wUQA7QAi/8R/jD8QPoZ+YX4Afjo+Pj6rf2pAAgDmARtBQQF/AOFAvcAegCsACkB4QEXAi8CowHYADIAkP+s/4cAcAHuARoCCALbARYBIgBe/yb/h/+tANMBEAJxAgMDpQIDAd7+Xf08/Ov6wfpe+7n8sP59AMgBvgFbAEn+mfp891L1+vPy9z39CQI1BsMJcQzaCwEJyQXlArwAnf8u/UD7q/mO+OD3Rfdq+En7Sv8fA6sFjAaoBkIFcQLz/tb8NP38/s8A4gEiA0oERgRZA88BsQC1AFUBcAEmAWgBYgFoAVMAAP+N/on+cv80AFUB2QIjBLsEegOjAOP9z/tg+rj5W/oY/W8AbANFBUgFIATFAYf+Ofoz9vXyDvKp8bjylPhR/1QGGgtfDgoQuw6fC+gGmgGE/Zv6zvfj9bj07PX496L67f1vAecEIAfFB5UGQARgAfL+qfyl+7P8i/+QAj4EfgXiBSIFmQPCAdv/vf67/nn/FwBJAKIAhgDY/1D/dP/X/94AbwJ1A0gDLgLXAKr+GfyP+1r8DP4JAFgCwQR4BTEFxwNVAWP+f/uG+NL17vOC82303vUR98P7vwIKCFULzAy0DesL6geXAgD+/frt+TT5bvi7+Cf68PvW/Df+YAB3A5YFTQayBG8CxQD7/vf8n/tC/fQAZQQzBg4HzgZ7BQIDbAAl/hv9Cf74/l3/Rv/S/4MA7P83/yz/z/84AUMCpQJaAh0C0wJ5AjcBpwCQAEcBFwFVALT/uv5g/jj9iPvs+uH6FfvO+sz5p/mm+ZH5Nfmw+wQAAwQ7B9YIXgmjB4UFkgLg/8b9Sf0X/ZD8lfuX+rv63vqB/HL+EgHrA9UFnAUmBHwCsAAc/7r9XP4aAGACLQSrBNEE7AOwAr8BNAAi/1f/cf9j/+P+XP5z/k/+rP60/1YBoAPVBfcGjgZxBTYE5gEf//j8pPuO+9f7z/wV/i//KwCdAOcACgHFAD8AvP7N/L36F/iW9ffz7/L/8mn4Nf/DBEwJpAzZDpANgwr1BVcBNP69+6n4/fWx9Gj12fZm+O77JQHtBtEK6Av5CnYIUAVHAdH8VPqo+l79eACBAkgEWgU4BYYDGQHv/u78XvxD/HP8oP2g/x4CvgPtBCMGjQZFBpQFBgR3Adn+Z/3S+0D6N/or+0r96v8oAl0EUQWIBbsEKwJq/138/fm19/D1PvUh9b/0G/Uz9dX2Df70BMQKIw5kEPUQFA2KBnD/LPoE92/17/NT9GL2Evpc/WcAPgQPCAILswvBCS8GcgLo/t/7NPny+NP7KwAuBEkGrgbzBaYD6AAT/iv8WPwY/lAAmQI1BB4F6QQaA5IBSgCx/+n/swBgAVsBVQFLAZsAVf/a/if/4/+RAJoApgAJANX+/v0B/T784vuK+277DfuC+r352/gW+E/42fd4+u8A/wXICXILVAwLCjQFzf/3+uX3j/em+On57fvT/aQAaAINBFQFmwZcB4YGPwQiATj+pfsP+pD5UPtx/70DvwYOCLkHOgYAA8f/Zf0G/C/8q/1//1QB4gIRBI4ECQTKA7kDsgN8A/4C8gEIAH/+iv2F/Nr7JfzB/QYAPQELAhoCgwGGAIL+kfwf+676cvr++Z75dfmu+d/5f/lO+s36Hv/aBCIHpAg2CLoH8ATRAN/8qvpF+nX7dfwb/Qb/RgGIA4UDbgOKA2kEKgQ+AjAAm/7x/YD9Kv1H/ZD/vAIgBWkFagSfA24CLgBC/pj9Lv43ADsC6gNeBa4GhQdzBsgDUQE7/zH9gftz+nf68Puc/nUBTAP4A8QEmAR9AgEALv4U/eT7KfsI+7z66Pr7+nH7Yfyc/Tr/SADl/+f+w/yM+kb4YfUL+Cf90AHoBRwIFQlxB5cDgf8o/C37YP06/w4BMwLfArACZQG8AMEAvgGYAuYC+AERABf+3Pz8+937Bv4jAu4FWgg2CYsIpwagA9UAnP67/an+IABoARMCJgJmAXj/ZP1s/L78oP60AUQELwbFBgAGkANh/7v7ePn5+Nv5oPui/Vj/HwA2APH+Vv3C/LD8Tv3d/W7+rf6D/nH9Dvw3+jz54/eR+CD9KQBrAvADAAUCBSIDCgFeAOUADwPTBO8ECAQuAtoADv5X+4r60PvU/Xn/lQBbAT8CtwK6AlEC0QK0BJAGzgazBacEdQNFAaH+5/z8/GL+NgBlAa0BQwFFAdMASv+H/kr/LAE1A9UEdwXqBEED2QDt/GT5D/hY+M/5I/sh/Un/HQAAAPH+Qv6B/mD/kABPAfAB1gHIAHL+fftf+eL3Gfd590z46fhG+SX8KgA6AqADIgVtB6kIJQhLB0gGAgX5Ao3/RPzM+dj4i/lf+rT8xAClBDYHDgj8B3UHAgZmBJ0CRgHsAGYBbQEuAIT/6f9VAOj/iv8FAPQAlgGpAVQBzgCMANEA0ABGAMAAhgHEARoB5P+q/h/99fsj+1L6Ifoo+1z8fP1R/jT/mQA9AQ0BYACS/7b+1/0z/J760/m5+az51fdI+a39rwAYAhMDmwRkBbME+gLjAYMBTwKIAq4BLwBB//L+1/0M/Sr9kf50ABEC8AJ6A/sDmwSBBGEDcwLSAnsDnwI1AWMAHwD6/8L/sf+TAB8ChwNDBC0EogP3AtsBjf+S/cD8T/2F/lD/+v9tAOQAHAA8/mv8fvtw++/7kvyA/TT/tQCBAeYA8/8K/8X9mvy4+7H7ofwH/ub+MP8j/xf/zv4k/vr9wv2f/dH9Df4+/nX+7P6J/wQApABxARQCrwKYAjgClwGvAO7/R/9S/58AkgKHBN0FdQZIBisFpAOWAfr/yv4u/kj+qf5y/68AAwISA8wDSAT4BFcFIgWwAwcCKgC5/cr7FPpV+Xf5U/qz+w79ff6h/28AVgG6AaIBxQEoASoAgv4x/Aj64fes9gn2Efcj+p79DQGABOYGQAiKCBEHKwXOAqIAUP7M+9X5M/mc+Rv6Kvs+/VYAtgJFBGEFBQa/BYUE2wLCAEP/MP+l/+3/pwDGAasCFwPgAj8CogHeAEoAPgAhACMApQDeABkBqQF5AiwDaQOhA5oDLQMQAhQA8/0L/IH6IPqA+qr7yv3F/6gBwgIaA/sC7AGfAEf/If5F/W/8/Pta+5L6q/pC+/H72vz+/Uj/YQDnACcBzgDu/zv/X/6u/Xb9lP3g/fX9IP6Z/gf/Mf+t/9IACAIKA6cDrwM0A8wCRQKyASoB3AAxAXoBvQFMAh0DlQOdAyIDhQK/Ae4APABR//D+fv+nAFsBlAHuAVoCigJ2AjcC1QFhAdgAuv8c/sj82vt++5z7gPwv/vb/QgHeAQoCuQH9AEEAhf/F/iL+TP1f/Gj70PoF+4z7wvyr/oIABwL+AhoDbgJLAeP/1P7a/RH9Dv1e/a395/2t/un/uQBZAQoCdQJ5AvMBSQGkAJQAywDBAAsBhQHDAVsBogADANj/7v80AMQAqQGvAk0DdwMZA3MCCwKyASsBvACHALEAiwDw/0P/r/5s/lX+Tv5//l//cQAeAV8BPQHkAEQAM//8/er8OPwP/BT8R/ya/Ev9Qf7E/vj+Gv9T/6H/zP/R/77/pP96/xv/vv6M/u3+nv/6/yYAWwCZAKQAMAC3/5z/sf8dAK0AZgFIAgcDhQNgA7sCDAIgARYAXv8D/yP/fP/i/3kAQQExAhwDyQNBBFkEBQSVA58CcQF9ALX/Av9e/vz9yf3p/Yv+b/9gADEBygEsAh4CxgEmASUA+/7w/Tn9ofxQ/JP8Qf0f/sD+Rv+5/wgAKgDx/2L/2v6J/jb+//3W/Qz+a/6n/sX+zP7j/v/+NP9a/43/xf/9/+D/kf+T/+L/WQD/ANkBhwL6AjID4AIgAmwB1QA9AKr/dP+t/9L/xv8BAIoA7wAmAZkBTALRAvsC3wKsAkMCywE9AYAArP8i/xr/C//4/hX/dP/O//X/HABGAFoAOAD9/7P/Vv8u/xn/7/7Y/v3+TP9B/9n+hP5Q/lD+XP5l/oz+zf5A/63/sv+f/57/nv+O/2X/Ov8L/+T+1v7H/p/+Ff/u/6oAHwGIARQCHwKuASUBywCqAJgAgAB5AJgA9gA1AUkBdQHaAUICSwICAsYBrgFtAe4AYgATAAcAFgAWAAkAQAD9ALMB4gGtAUsB1QAMACP/b/4H/gf+Tv6v/gj/af/R//v/+f8kAGkAcwAkALj/QP+v/jr++/3Y/e/9X/6//sf+p/6T/m3+Sf5u/tf+Ov+T/97/4v+c/zn/Bv/7/iT/pP9gAAkBYQFqASYBqgAtAPn/7f/8/0AAngD6ABgBIgFSAaIBCAJcAnYCUwLXARABKABM/+j+9/5d/wgA0gCoAUoCfgI7AqQB6gBXANL/U//6/tb+5P72/vD+B/9D/4n/2P///yMAOwA0AB4A3f+M/1z/QP8c/9n+kv6W/sL+3/7X/un+Mv+F/5j/ev9z/3z/ev84//f+7P4g/2f/n//1/08AqQDGAKIAXAAjABgA+f/N/8b/AAA0ADkARQB4AOcAdQEWAp0C4gL8AsICFwIcAUgAwv9V//7+9/5L/7T/DABbAKoAAQFZAZMBkQFgASoB1QApAEb/gf4H/sT9v/0U/qb+PP/K/0gAnQCzAJoAWADm/2H/6f6K/jX+CP4C/gn+I/5Z/pn+2f44/7z/JABFAEcATgA9AOn/d/8r/y3/Uv9n/3j/n//b//v/AQAZAFYAnADJAPIACQH0AMIAmACRAI8AogDNAAoBLQElAQUBzgCTAFwAUgBvAKMA3QAPATYBQAEvAQkB6wDXAL0AkwBbABcAwP9g/xX//P4Y/0r/jf/b/zMAeACNAHEAJwDK/17/9v6l/oP+lP6//vL+F/84/0L/Q/83/yL/Ff8n/2f/qv/a/+v/+v/x/8b/iv9u/33/f/92/3T/mf+2/8P/1v8RAGEArwD0ACYBTQE8Af8AkwAyAOX/qP+n/9n/LwB9AL4A5QDsANIAtQCgAIwAfgBuAGEAUQBCAEEAZACPAKcArwCzAKMAVgDm/4P/P/8d/w//Gf9Q/6P/7f8RABcAKQAsABkA+f+//3b/Ov8E/8T+ef5I/kX+UP5x/rb+Kf/C/10AzAAMARUB5gBzAOH/ef8p/+j+zP7i/gv/Jv8+/3z/6f97AA8BfAHcARYC7AFWAZ0ADQCh/17/W/+d/wsAbwCnAL0A0gD3ABUBLwFZAXEBXwEvAecAnABZADAAGwAAAN7/vf+X/23/Tf9O/33/tf/n/ykAaAB/AGQANgAIAMv/g/9C/xH/+/7x/tf+x/7e/gD/Ff8f/0D/dv+l/9X//f8lAEwAPwABALb/gf9r/1n/Vf9y/5v/wf/G/8L/5P8SADwAXgB+AJsAogCVAIgAcgBrAGkAWwBJADkANAAqABoAJgBBAFgAdgCXALYAwwDHANQA0QC4AKAAhwBxAFcAMgAWAPz/7P/w//P/BwAxAFkAZgBYADwAIADm/5T/Tf8u/zv/Tv9f/3H/dv96/3j/af9N/0P/W/9x/2//d/+N/4n/cP9x/6L/2//7/xEAIQAkABcA+/++/4X/fv+X/6z/wv/w/zEAcACdAMAA5ADtALkAYQAjAP//0P+1/9H/BgAyAFUAdACCAHMAawCIAMcA6AC2AGcATQBAAAIAxP/I/wcAQQBYAF4AUwA5ACcAGgANABkATgCFAIIANQDl/7H/e/9G/0j/iv/L/+D/1//A/53/gv+F/5b/oP+r/77/wP+p/5H/i/+j/8j/0f+r/4n/jv+f/5v/rf/l//3/4//m/xkANQAzAEkAcQBpADMAAwDl/9j/1v/l/xcAQwA2AB4AKQBLAIkAzgD5ACoBPwHmAFEA6f+q/47/p//t/0QAeAB3AFcAKgADAAYAMABHADcAGwDp/6P/jP+2/9v//P87AEgA8P+J/07/L/88/4X/2f8PACMAEgDj/7v/pf+h/7v/0f/V/+b/4P+U/1j/cv+f/8P/+f8TAPT/AAAuACIAEgAwAB4A9/8YACEA7f/d/9z/5f8ZABgA+f8RACkAVgDAAP8A+ADIAHEAIQDr/8r/xv+//+z/WwB3AFYAWABIADEANgBLAH8AigBNAC0AJgAFAOX/3v/1/w8AKwBlAFUAAwD1/+j/pP+A/2X/Tv96/5b/iv+m/8j/w/+5/7D/u//D/67/wP/O/5X/jf/A/7//qv+o/7b/4P/w/+b/7v/3//j/6//D/83/AQAIABwAXABuAGIAXgBJAEUAPwA5AFUARQANAAsABwD5/xoAJABKALAAzgCqAHUAFgDy//7/8P8gAFcAVQBpAGsARAA2ACAADQAZAOv/rf+R/1T/Jv9S/7//LABeAFoAbQCNAHYATAAyABwA/f/N/33/Of8s/y//P/9m/4z/yf8IAPj/3/8VAEgASQAiAMv/j/+C/1f/Pf9h/7P/IwBoADkA+P/7//3/0v/T/xcAOgAvAC8ARgBCAB4ANgCRAJMAUABfAGEAAgDQ/+j//v8TABYALABxAH0AWABdAGMAaAB4AFQAHgDx/7n/qP+2/8P/5P/v/9z/3//o//j/IQBLAFMAOwAbAO7/r/+G/4X/lv/S/yEAIwDT/5P/l/+b/5f/1P8iADcAPgAyAAcA8v/G/3r/fP+0/8H/wP/c//7/DgAJABkAOgAaAM3/uv/h/wgALgBNAE4ARgAzAP7/zv/K/+L/AwA1AHAAmACHACgA2v/Z/97/4f8KAC0ARwBpAHUAdgBXACIAFwD+/9X/7//w/7X/pf+i/57/xP/c/+v/OACYAMsApABPACMA6/92/0L/a/9y/zz/Tf/S/woAvf+z//7/LgBfAH8AYgA7APX/tv+0/5X/dP+i/8P/4/82AEYAGAD+/+3//P8eABoAJgA9AC8AMwA0AA8ACgAWAPL/5/8WAC8AFwD2//n/CwANADEAXwA/ABEAJAA7ACYABAAEACMAIgDq/8v/0//d/wEANgAxABsALQAfAM//iP+m/wIAJQAyAFgARAD//8D/ev9n/47/o//F/+3/9v8TACsACAD7/ysAZgB3AEUA/v/P/5v/a/9F/y7/Yf/C/+X/1P/2/zcAOQAOAAEAIAA5ABYA1f/Q/+n/8P8IAB4AMQB+AKUAYQBAAFIAGADL/77/xf/I/8v/0v8IAEAANAAqADUAMAAsACMAHQA7AEcAJAD9/9n/5P8PAAIABABPAHUAYQAfALn/fv+D/5v/yP/6/xwAHQDr/8H/y//p/wMAMgCQAMYAcQABANj/qP+G/8H/+//7/+v/0v+8/5T/Z/+O/83/4v88AIQANQD7/wkA+/8JAB4AGABNAFgAAADs//L/rv+J/4f/jf/V/wQA9/8WADwANABMAGcARQA1AE4ASAAgAOr/wP/Z/+//yf/F/+7/CQAXABUAKgBZADIA8/8LAAsAy//D/+T/+f8LAP7/4f8AADUALwAYACoANgAiAOz/vf+x/5b/XP9m/83/FgAgACEAMwBCAC4A///h/9//7P/y/+b/+P8VAP3/FAB4AIgAYQBpACEAk/94/5f/iv+u//P/BQAqAFkAOAAaACoAIAAVACEAMQBNACMAt/+q/9T/z//e/wQAEwAQAPb//P8qAAQA2/8pAFMASQBRAB8A+v8YABAADAAwAEMATQAhAM3/4v8iAAAA0f+6/6v/yP/d/8L/mf+j/+P/DQATAC4AGQDN/+f/EgDS/73//P8dAA8A/f8LACcAAgDS/xAAUgArAPf/8v/4/9f/x//x/+//y/8AAFEAQwAaABoAFgD//+3/FABJAD8AGAD4/+D/sP+Q/6L/sP/W/zMAagBDAAgAAgAgABkAxv/B/ykAQAAcACcAOQAsAAMAx//p/z0ACwDN/wEAHQDX/53/zP8RAAIA+v8mAD4AQABPADkA9P/K/73/tf+w/6//3f8YABYAEQA8ACgA2f/Y//3/8P/r/w8AOABAAB0ACAAMAPL/zf/2/0gATQAkACIAFADQ/53/rP/l/w0A8f/w/xYA7P/H//v/BwDj/w8AOwAlAC8ASwAkAN3/yP/z/xIADABAAJAAXADh/8n/8f/i/8n/6P8AAOr/0//I/9b/7f/m/+D//f8sAEUADQDL//j/DwDF/9P/LgBNAFwAQgADAPX/u/9o/4n/1/8QAEcAJQDp/w4AJgDy/+7/SQB7AFcAPgA4AOD/i/+h/9v/6v/w/yMANwANANb/wv/A/9X/AgArAGcAfwBLAPr/yv/Z//j/8f8IAEYALgDv//P/2P+e/6f/zP/8/0AAQgAEAA8AQwAWANH/3f8rADUA+/8BAB8A/f+3/7r/9/8HAOD/3f8aABMA3v/d/x8AWwAMALn/4f8JAOT/2v8IABQAKQApAA0ADgDh/8//6f/Y//7/UwBGABwAPQAJALn/4f/f/8X/2/8DADAAKwD7/97/+f8KAPX/6/8QAFoATwAXACwASwAqAPL/7/8MADcAPgD9//H/GADy/8v//f8ZABAAHwARAAwAHQDh/5b/mf+i/6z/0//r/wYADwDc/7T/o/+X/8T/HQBMAFEAVABQAFYAMwAOAEgASQAJACcAPADi/5r/mv+D/47/kv+P//b/OgAYAAwAHQD8/+H/AwA2AEEALAA8AEEAGQAAAPP/3v/5/y4ABwDc/zQARADi//P/NQAWALT/l/8cAGEA5f+s/wsANgDz/9X/7P8MADEANgA4ADMAFwD8/8X/tf/3/w8AGwBuAHEADADZ/9n/qf9r/6n/GwBAAE0AVwAjAMb/sf/a/8v/zf8IADsALwDH/6H/xv+k/5H/3f8NAOr/4/8zAGAA9/+y/yEAYAAgABUARABTAEEAGQDu/9X/6v8cAA0A7P8mAFQALgDh/8D///8rABkA7f/J/+v/1/+D/67//P8oADoAIQA1AEoAFACz/4n/2f8bACMATgBkAEUALAARAOP/4f8AABMAGQD2/7P/pv+0/5T/1P9LAGkAbQBqAFYAPQAXAOX/uP/i/zMA/v+u/9H/FAACAKX/xv84ADQAFQDr/8n/1v+1/7D/EgA2ACMAfwCWAPf/rP/H/63/0f/g/6L/7v85APH/s//W/ywAVAAlADUAnACMACkAAAAAANj/mP+t/wQAIADd/7n/+v/9/7H/ov+w//T/SQAnAAcA9P/T/+X/x/+E/8r/ZgBNABEAbAB/ACgA1P/O/xQA//8DAIgAlgAJALv/6f8RAPz/v//X/2cAOwCl/8j/LQAVALb/w/81AEsA5v/K/wgAEADb/6//uf/t//b/4v/5//3/0//j/x4AEwDx//7/GwAMAOf/2f/1/y4AEgAPAEYANQAwAEUAMgA8ADIA0v/C//r/FgAXAAwACwA1AGkAKwD5//z//P/z/97/8v/8/wYADwAyAD4A/f/S/7L/vP/v//z/+f/y/8n/sP/3/+n/ov/Z/zkAQwALAAcAAAAdAP7/f/+5/1EAPQDn/y0AgwAlAMr/1f/w//P/1f/D/+T/MQAFAKv/xf/u/+7/wP/M/wkAOgBQACQA9v/G/7z/p/+8/1sARgAwAMYArQAFAJP/s/++/7L/6/8WAFEAcQBcAOD/S/9y/+z/5//y/1oAqACcAAsAz/8KAPv/CwBqAK8AZwAdABQAqP9T/zf/cP/q/xoACQAiAHoAFgCs/+3/7//i/wEAFAArAEYAOAAHANv/6v/W/5D/4f8hAAEA+v/q//H/FQAiAMn/tf8AADwAdABFAPH/vP/d/+3/6/8kAAUA1f85AKsAPgDY/wsADACJ/zz/vP8kACYAPwB5AEsACwBUADoAwP+F/4D/of8JABQAqv/m/zQA/v+s/woAcABuAFsAIgARAOL/tf+h//f/fwB/AHIAbQBFAAEApP+l/8b/wf+s/6H/3v8GAOz/4P/Y/8z//P8UACsAXgCDAF8A///E/+f/8P/e/+r/5P8nAEkATQD4/8L/FgDF/3r/wf/+/+v/5v8EABkAUQAHAI3/9/9vAG4AZgDu/7r/BgDU/3r/k//b/xYAMwA/AG0AVQDb/wIASADY/6P/VQDIADEADAAgAMr/n/+Q/47/3v8lAPr/PgBjANf/fP9//8b/EAD4/wYAVwCrAGAApP/D/0gAegAUAMn/5/8EACIA8v8AAEQADADX/87/KQC1AHEA9//Y/7b/Yv9X/5b/z/8rABMAGgBzADgADgAOADYA/v9+/7T/+v8OAOv/CADl/53/FgBJABsABABFAIAAHADN/53/sf8rACkABQAHAFEAegATAL//qf/W////6v/1/zYAQwAvAAoAvP+d/6f/x//z/xkARwBCAD8AGADL/77/zv/b/9v/RQBiAOX/4P+f/23/4/8wAB0Axf/9/48AbADz/6f/5P8uANT//v9zAE8ACAAHAFsAZgARAIL/r/9MAEEAxv+Y/0UAQwC6/2H/kv8bAMX/dv8hANwAXwCl/xwArQA/APj/wP+r/w8ATgCdAIkAKADk/9z/yv+O/67/BwBJAFQABACh/5T/p//K/xIAKgA7ABYABACXAOEAbQDH/6v/AgDr/6n/ov/D/6j/dv+n/4T/d//R/zEAcQAGAHX/lf8nAF4AMgATAPj///8TAAoAXQDNAIcAFABLAFwAxf9s/17/af+w/+X/wf+F/+r/xwDhAMz/Pf/u/2IATQCaANYAlQAuAKT/i//p/7n/c//8/xkAAgClAJsApv9I/4n/of/w/3AAXABvAB0BRwFRAH3/tf8PAP//4/8DAPr/6v+//5D/hv85/zH/zP8CAPT/MQBUACsAm/9H/2//5f9DADIAXwDNAKcAUQAhAL7/mP/d/7n/l/8vAHYAVAAqAPP/mP9A/3z/MQDAAHoAGwBHAC0A4//R/7//9f9FAOT/tv9XALAABQBb/6v/6f+m/4r/4v9/AEMAnv/7/3EARQDg/9n/bgCPACYAJwCQAF4Aqv9y/+z/4f/q/gX/TwC2AMX/qP9FACIArv9n/4X/CQAnAKr/8v9gADMAPgB6AJEACAAp/67/lgB1ANH/nv94AH0AqP8K/2X/aQBHAI3/f/8iALIAWgC4/97/KgBrAEwA+P+NAKcAWgBnAB8AlP8L/5L/agAYALD/1/8zAGUA5v+0//v/EQD2/7n/0v8WAEkASABWAGsAo/8q//n/VQAIAAoA//8DABQAaf8E/7v/JgC//woAcABJANsA0QDJ/3P/z//Y/3H/Kf9s/zQACgB+/7T/vf/V//n/EQBDAE8AFgC4/63/0P8vAF4AHADq/zgAvACKAPv/7P/5/+H/DACf/2j/aQDyAHwAtP/O/0kAHABNAC0An//8/y0AxP9b/23//P8cAEIANwABAIMA8gCZAAsABQBeAHMAMQCz/2n/0f8tAAQAYf9c/xIAlwCtABcAFgAcADkApwAnAO7/6v/8//7/vv/S/8z/CAAnAHf/Qv+w/7X/2f+S/yn/u/+OAHIAYP9m/+7/RgBwABIA4v/8/0MAw/9T/5P/AAD4/33/sv8GAFIAhwBsAF8AMACIAGwAwP8eAHMATgAbAKb/jf/y/6n/dP+k/6v/1v/6/0EAOgDR/1H/b/87AHoARQCGAAgB7gBMAML/yP8yAIEAHQDd//L/yf/U/4b/FP9+/2UAeAD4/9f/MABXAAcA5f/H/xMAywCFAKP/4f8OAHL/hf8QAFgAUQBeAF4AUwA6ANb/4//5/3X/jP8RACEABQDm/y4ACwCn/6X/av+z/yYAagCRAFIAGwBc/yD/1P9SAGcAPgCbAO8AOQCp/7L/i//Z/yMA8v+l/9z/iAAKAML/FACq/5D/nP/9/1UAFwD9/x4AhgAuAJP/gP+Y/woANACs/7L/3v+T/8n/5P+r/4n/MADsANUAhAApADEACQDO/9P/5v8NAOL/5P/z/w4ANABUABgAp//w/zcAKgC//9P/XACIAGMAEwAyAAQAev+C/4z/ZP/u/1MALgALAN7/EwBEAGUAqQDnAJQAQwBiAI7/w/4L/5n/MgCXAHMAQwAzAI3/KP+//xEABwBZAHkABwAqADUAa/9Y/+3/YgBOAO//qP/e/xQAyv+A/07/xP9AAJIAlQAgAOz/6P+w/07/Tv+I/93/UgDiAKIAAADa/zb/Sv/p/xUATwB2ALQAVwBW/y7/m//D/9X/gwArAcQABwBl/2z/r/8XAGoATQBmAJwA0ABGAIP/Kf94//f/LAAuAD0AgABoADIAAAAnAOD/Yf9//4n/v//R//P/VwAAAKP/7v9DAFUAZQDPACABsQDz/37/dP/C/9z//f8YAAkARwAUAI7/f/+W/67/DgBRACcAFQAxAOv/zv84ADoAKgA9ANf/2f8zAMv/X/+Q/+f/r/8s/3//KwDv/1T/sP8vAOT/0P9tALgAXAA6ACYAGQDw/9n/7f+A/5L/UACdAO7/mv88AIUAGACC/4H/CABHAMn/ef8OABkAiP+U/9r/WgCcAE8A8v9zAAgBHgBB/1D/nP+0/1AA3gCeANEAoAD//+D/1f+l/8n/iQA0AND/HgDi//z/HwAuALcAIAGHAEUAegD1/9n/gf+9/uD+xf9eAKYAyQA0AAsAYQD8/yf/FP/C/2UAqwB6AEYAuv+j/7//iP9+/4//gAC5AAQAdP9e/2f/4P5u/6oA/gDKAG0AJQDJ/0b/YP+///v/VgC2AMAASADD/3P/ff8TADcAj/+M/x4AJgCO/xP/mP9SAHMAhAB6AA8Ag/+r/z0A3/+q/yEAgACdAF8AKADs//3/y/+N/+7/VQBtAOb/q//w/wwAmf9z/wgAXACeAI4AcQAFAG3/pP8bADQAxf+//wYA/P/p/6T/kP/3/28AlwB3AGoAfQA7AA8A5P+F/5r/k/+c/xcARQA0ACMAQQBqAGAAWwBcAGcAJgC3/5P/a/9P/1b/n/8PAHcAnwCdAIgA1f+s//L/jP9a/8z/BQCw/6X/QABxAB0A/f9RAAUBwwDc/5b/s/+C/yb/Y/+9/wQAXwCdAIsAHQAdABYAwf+6/7H/tf+2/wYAgQAyAO7/RQCFAGUA7//w/ycAAQDf/3L/Qf9J/13/x/8TABoAOADRAMYACQCM/3//5v8SAAsAPgB8AFUAAwDV/83/AADU/+v/ogCxACMAx/92/z7/i//3/wcALABxADkAFwDY/2P/sf8WAEgASgA6AEIANwAVANz/zP/1/zcACwCj/7X/IQBRAC0AOwBcABQA5P/t//n/HwAvAGYAjAAgAKX/xP/9/9//4P89AJcAcgDj/3T/fP+x/9z/0//x/40AvgA3AIP/OP9M/6D/8//y/wgACQDo/5L/OP+G/x0AlwCbAFwAPQAgAKz/RP99/7j/DQBbAD8A/v/h/+n/t/+x/wkAUQBcAE0ASwA9AEoAIADd/7v/Xv+N/wQA0P/T/1sAbABTAHYAdQBgADkAKQBEABkAof+U/7b/o/96/3r/DABwAHoAkQBwACAA8f/G/77/3v+1/8r/BwAHAAkAGQBFADIAJwAhAAMAFADY/5P/nf/j/zUAGADp//b/5//V/8T/3v8ZAOL/BABrAFEA7v+P/+P/GwDz/w8AMQBhAE0A9P/B/+P/BQDe/8f/3/9kAJMA7v9d/5v/FAAJABcAGAAsAGwALAC//6v/xP/R/wIANQBTAD0A3v9//7v/RABLADUAUwBgAFoAAgAt/wf/qf8RAC0ARgBmAIQAiwAAAHT/m/8FAA4A+v84ADIA0f+D/5T/sf+r/+D/TwCSAHgAUwAXAK3/pf/V/9r/7f8HAGMAkgAbALj/7v9FAB4AuP+4////CgDB/7H/DQBSACQA0f/n/zcAOgD0/8H/5P/X/4r/qv8QAFoAjwBvABkAJgBLAM//Tv9K/7T/NAA1AA8A8P8LAF0AUADU/63/LgBgACoAGwAIABYACwCq/4r/rv8SAFsAWQAxANn/wP+//7b/qP+q/yoAyADJAE0ABgD6/87/q/+4/9j///8pACAAxP+A/4v/9f98AGgAOwB3AGQAt/9f/4j/mP+1//D/IAAmAAQAAAAvAD4AIwAhABoACwALAPf/tP+i/wcAFgDi/wQALgArABIA/v8lAFgACwDH/9r/2v8EAC0AIAAzADsAAgDE/9H/5//b/77/4P8wAAUAwf/g/wwAHABIAGMARgAvAA8A7v/j/9n/9f8LANb/ov+x//X//P/j//j/EAAbACIA+f+o/7H/4f/O/7r/6v8dACYAJgArACQABQAVAC0A/v/k/xIARQArANP/xP/I/7//6v8OAE8AjABMANr/qf+r/6L/jf+l//j/UQBzAGkAQgATABYAEgACAOr/6v82ABUAuv/I/+3//v/i/+f/OABlAEkAJgAbAPT/4P/t/+7/+v/3//7/AAAAABcAIQAUAPj/2f/T/+n/2v/L/+f/9f8DAO7/vP/v/zsAXQBtAF4ASwATAJr/Wf+G/93/EwAaACgAUgBHAOr/q/++/xwAcABJACoAJgDy/7n/pf/Y/xwAKAALAPf/1f+a/5r/qv/O/0QAnwCrAIYATADr/5//2/8JANr/zP/b/9f/0f+6/57/9f9kAHEAhQBgABIADwAOAO7/xP+//83/rv+g/+z/UQBiABgAz//V/wUAAQDx/ysAXABBAAsAyf+Q/4f/tf8QAGYAegBRADgAHAC0/3z/0P8HAAcAFwABAPH/+f/n//z/ZQCeAGsAHgDY/8D/uv/H//X/AwATAA4A1P/J/+T/4/8OAGgAcQAwAOD/m/+f/7v/yv8SAFwAXQBBAC0AEwDk/+X/FQAUAP7/+f/h/7r/rv/C/+v/DwAZACgAPwBLAEwAKgAGAPj/7//W/7b/qf/O/ywAfABxAD4AHwAGAOr/v/+W/7//EAAYAOn/zv/T/8n/wP/d/y0AgACSAFQA7/+f/33/hv+z/w4AYgCCAHsATgALAOH/2P/b//L/CwAgADIABwDR/9T/2//e//f/CQAWACsAFADP/6v/wv/i/xQAUQBLABQA+/8DAAEA+f8DACoARwAxAPT/t/+X/5f/q//Y/xEAQAA4AAMA5v/x//r/DwBDAGIAZABTACMA4f/J/8n/tf+4/+7/IwA1ABMA0P+v/8T/8P8aACMAGgA6AGYARwD3/9P/3v/g/97/8P8JABoAIwAfAAoA9f8BAPj/1f/z/z8ARQAGANr/x/+0/6j/sv/k/z4AiQCOAFgAJQAPAPP/xv+4/9b//f8RABEAAQD4/wEA9v/W/9z/BwAfAB8AJQAiAA0A8P/Q/7v/1P8RADoAPQA5AC4ABAC//5f/qf/Q/+r/+f8KACMAKwAbAAwABgAJABYAHAATAAEA5f/h/wQAFAD0/9T/3f/2//z/BQAXABMADwAIAOX/x//H/9D/4f8KACwAMwA2ADoAIQAEAAQABgD8////AAD2//b/7f/Y/+P/+v/1/wAALgA2ABEA7v/b/8//2//4//3/9v8LACUAJwAiABsAIQAoAAsA2v/L/+T/8//n//j/LQA+ABoA9f/l//L/CQAdADQANAAcAAgABQD5/9T/yP8BADIAIAD7/+r/1v+5/67/x//t/wcAJQA4ADQAHQD9//P/DQAoADQANQAmAAkA3//C/8j/3f/u//X/+P8BAAgACAALABQAGAAWAAwABgD7/+b/4P/x//D/2f/S/+n/EAAoACcAIwAjAB8ADwDz/+X/6v/y//X/5//N/8H/yP/c/+//BgAqADkAKgAqADoANAAUAPP/6P/m/9n/1//g/+3/+v8EAAsAGwAYAAEA9/8AAAQA/v///w0ADQD7//r/FgArACgAEAD0/97/0f/O/97///8bACkAGAD4/9z/0P/b//H/BAAXACcAHQAIAPT/8P8FAA8AEAAbACEAHQALAOf/2//l//r/GQAmACcAKAAeAAsA7v/U/9H/1//i//b/DQAWABIADwANAAQA8//o//L/CAAYACUAIAAJAPH/4f/f/+z/+/8NABgADQD5/+j/3f/a/9//8/8OACQAKgAdAAwABgD//+r/1v/U/+n/AAD///b/AQANAAUA7v/Y/+f/DAAiAC0ALgAlABUA+f/d/9v/5//6/w0AGAAeABwAAwDo/93/4P/o/+v/7P/8/woACAD7/+z/6//3/wEABwAXAC0ANwAvABcAAwD3/+f/2v/c//P/FAAiABgACAD6/+n/2f/N/9j/+v8YACkAKQAhABcACAD3/+//9v8DAA0ACgD7/+X/0//Q/93/8P8CABUAJAAkABwADAD///j/9//8/wMACAADAPv/9f/4/wAAAQD///7/AwAHAAYAAwAJABAAEwALAPn/7v/r/+n/6P/p//T/AwAPABUAHgArADAAJgANAPj/7f/n/+f/7P/0//z////5//D/6//s//X/+v/6//j/9//4/wIABwADAAEAAQAJAAsABwAGAA0AFgASAP//7P/q//P/+P/4//j//v8FAAMA/P/6/wIADAANAAUA+//5//n/8v/n/+r/+P8IAA8ACAABAP//AQAAAAAAAwAMABMADwAHAAMAAwAEAAcADAAOAAkA+//0//b//f8CAAMAAwADAAUAAgD8//n/+/8DAAQA+//u/+v/8//+/wYADQATABYAEgAGAPz/+f/7/wEABgAKAAoAAwD7//P/8f/1//j/+f/7/wAAAwACAP7//v8FAAoADAAIAAIA//8DAAUAAgD///3//P/6//j/9v/7////AgAEAAgABwAGAAIAAQACAAQAAgD9//v/+//9//7/+////wMABgAJAAkABwADAP7/+v/5//r//f8AAAMABQADAP///P/6//r/AQAHAAkACgAIAAYAAwAAAAAAAQABAAAA///7//b/9P/2//r///8FAAwADgANAAsABgACAP7/+//4//n/+//8//7/AQAAAAEA/v/8//3///8BAAMABQAIAAoACAACAP///v/+//z/+//8/wIABAABAAAA//8BAAEAAAACAAQAAwABAP7//f/8//z//P///wIABAADAAEAAAAAAP7//f/+/wEAAwADAAQABAADAAEA/v///wEAAAAAAAAA//8AAAAAAwADAAMABQAHAAYAAwAAAAEAAgABAAAAAAAAAAAA/v/9//7/AAABAAIAAgACAAIAAgD///7//v///wIAAgABAAEAAQAAAP3/+//7//3///8AAAAAAAD9//7///8BAAQABgAGAAcABAABAPz/+v/3//j/+v///wEAAQABAAEA///+//7///8DAAMABAACAP///P/4//n//P8BAAUABwADAAIA///9//z//v8BAAUABgAHAAUABAACAAAAAAD+///////+//3//P/9////AgAEAAYABwAGAAMAAAD9//v/+//7//z//f8AAP///////wAAAAABAAEAAQD//wAAAAD///7//v8AAP7/AAABAAEAAQACAAIAAQAAAAEAAAAAAP///v8AAP7//v//////AAAAAAAA/////wEA//8BAAAAAAABAAEAAwACAAIAAgAAAP///v/+//7//v/+/wEAAQAAAAEAAAABAAAAAQAAAAAAAQAAAP///v/+/////////wAAAAD///7//v/+/////v///wAAAQABAAEA//8AAP//AAAAAAAA///+//3//f/9//7//v//////AAD/////AAD//////f/9//7///8AAAAAAQAAAAAA////////AAAAAP//AAD///7//P/7//3//v8AAAAAAQAAAAAA/v/8//z//f/9////AAAAAAEAAQAAAAEAAAAAAAEAAAAAAP///v//////AAAAAAAAAQABAAEAAgADAAEAAAAAAP/////+//7///8AAP//AAAAAP///v8AAAAA////////AAABAAEAAAAAAAEAAAABAAAAAAAAAAAA/////////v///////v8AAAAAAAD//////v/+/////v8AAP//AAABAAAAAQAAAAIAAwADAAMAAQADAAEA//////7////+//7////+//////////7//f/9//7//f/8//3//v/+/wAA///+//7//P/+//////8AAP/////+//7//f/+/wAAAAAAAAAA//////7//v////////8AAP////8AAAAAAQD//wAA/////wAA//8AAAAAAAACAAEAAAAAAAAAAAAAAAAAAAABAAIAAQAAAAAAAQABAAEAAgABAAAAAQACAAAAAAABAP//AAACAAEAAQACAAEAAwABAAEAAAABAAAAAAAAAP///////wAA/v////////8AAAAAAAAAAAAAAAD+////AAAAAP//AAAAAAEAAAABAAAAAQAAAAEAAQABAAEAAgABAAIAAAABAAEAAQADAAIAAQAAAP//AAAAAP7///8AAAAAAAAAAAAA//8AAP////8BAAEAAAD//wAA///+/wAAAQAAAAEAAQABAAIAAQACAAIAAgABAAEAAQAAAAEAAAABAAEAAAACAAEAAQABAAEAAQABAAAAAQACAAEAAgABAAIAAgABAAEAAQACAAEAAQACAAEAAAAAAAEAAgABAAAAAQABAAAA//8AAAAA//8AAAAAAAAAAAAAAQACAAIAAgABAAIAAAABAAAAAAABAAEAAAABAAEAAgABAAAAAAAAAAEAAAABAAAAAAAAAAAAAAD//wAAAQACAAEAAQAAAAAA//8AAAAAAAABAAEAAgACAAEAAQABAAAAAAAAAAEAAAACAAEAAgACAAEAAAABAAIAAgACAAIAAgACAAEAAQAAAAEAAQABAAEAAQABAAAA////////AAABAAAAAAD//wAAAAD/////AAAAAAAAAQD/////AAD//wAAAQABAAAAAQABAAIAAQABAAAAAAAAAAEAAQAAAAAAAAD//wAA/v8AAAAA//8BAAAAAQD//wAAAAAAAAAAAAAAAP////////7/AAD+//////8AAP//AAD//wAAAAAAAAIAAAAAAAEAAAAAAP//AAAAAAAAAAAAAAAAAQABAAEAAQAAAAEAAQABAAAAAQABAAEAAAABAAAAAQAAAAEAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD//wEAAQAAAAAA//8BAAAAAAAAAP//AQAAAAEAAQD//wAA//8AAP//AQAAAAAAAQD//wAAAAAAAAEAAAD//////////////v///////v/+//7////+//7/////////////////AAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAD/////AAAAAAAA/////wEAAAAAAAEAAAAAAP//AQAAAAAAAAAAAAAA//8AAAAA//8AAAEAAAAAAAAAAAAAAAEAAAABAAEAAAABAAAAAAABAAAAAAABAAAAAQAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAQABAAEAAQAAAAEAAQABAAEAAAABAAAAAAABAAEAAQABAAAAAAAAAP//AAABAAAA/v8BAAAA////////AAD//wAAAAD///////8AAAAAAAD//////////wAA/////wEAAAABAAAAAAABAP//AAAAAAAAAAAAAAAAAAAAAAEAAQACAAEAAQABAAAAAQABAAEAAQABAAEAAQABAAAAAQAAAAEAAAABAAEAAQAAAAAAAQABAAIAAQABAAAAAQABAAEAAQAAAAAAAQAAAAEAAQAAAAEAAQAAAAAAAAAAAAEAAAD//wAA//8AAP//////////AAAAAAAA//8AAAAA////////AAAAAAAAAQABAAEAAQABAAAAAQABAAEAAQACAAEAAQABAAEAAQABAAAAAQAAAAAAAAAAAAAA/v////////8AAP///v8AAAAAAAAAAP////8AAP/////+//////8AAAAAAAD//wAAAAAAAP//AAAAAAAAAAAAAAEAAQAAAAEAAQAAAAAAAAAAAAAAAAAAAAEAAQAAAAAAAgD//wAAAAAAAAEAAgABAAEAAQABAAAAAQAAAAAAAQAAAAAAAAD///////////////8AAP///////////v8AAP//AAD//wAAAAD//////////wAAAAAAAAAA////////AAD//wAA//////7//////////v///wAA//8AAAEAAQAAAAAAAAD//wAAAAAAAAAAAQAAAAAAAAAAAAAAAAABAAEAAQABAAEAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAD//wAA///+/////////wAAAAAAAP//AAD///7//v/+//7//v/+//7//v/+//7////+////AAD///7//////wAAAAAAAP//////////AAAAAP//AAD/////AAAAAAAAAQABAAAAAAAAAAAAAAAAAAEA/////wEAAAAAAAAA/////wAAAAABAP//AAAAAAAAAQAAAAAAAQAAAAAAAAAAAAEAAAAAAAAAAAD/////AAAAAP///v///wAA//////7//v/9/////v/9//7//v////7//f////3///////7//v////////8AAP////////////////////8AAAAA//8AAAAAAAAAAAAA//8AAAEAAQACAAIAAQABAAAAAAABAAAAAAABAAEAAQABAAAAAAAAAAAAAAAAAP///////////v/+//7//v////7//v/+/////v/+//7////////////+////AAABAAAA//8AAP////////7/AQABAP///////wEAAQAAAP//AAABAAAAAAD//////////wAA//8BAAIAAAD//wEAAgAAAAIAAgACAAEA/////wAA///+//7//P/+//3//P/9/////v/9//3///////z//P/9//z//v/9//v/+P/5//j/8v/x/+//8P/v/+v/6//s/+7/7P/p//D/8v/s/+r/8P/0/+7/8//6//X/+v8FAAIABQD+/wkAAQAKAA0AEAAQAVoB/gDHAbACKQODAgACrwG0AHwAGQB7/wX/vv6l/jf+mP1O/Zj9n/19/Vj9nv0S/rv9Fv7v/lr/KQDfAHIBnwGwAVoCXwLRAVIB+ADyAMUAlgC3AMkAsQCRAGMACgC7/1z/Gv8//xX/6/46/7D/sv8d/0D/VP/T/rz+3/4C/73+0f45/0D/U/+O/w4ANAAeAFEAlwATATMB7gDCAG8ANgBbAIcAlQCZAPwAIwGWAFcAbgByAFIALAAvACsAzv9y/5r/w//N/9X/8v8KAOz/UgCvAHEAOwAjACwA9/+d/5j/yP/1/93/+f8eAOv/0P++/77/g/9U/7v/8P///ycAWACAAFcAewDgAAMBBQH3ANMAgQAUAPb/IgD7/+H/IgBXAH8AWAAzACsA6v+6/4n/gf+p/6H/2/8uADIASwAvANT/dP86/07/Gf/w/kf/mf+a/2b/Xv9u/2j/b/+s/xMASQBbAFkACQCZ/3b/jv+X/5f/2P88AEkAQAAQAMb/x//I/wUAMgAGABQARQA/AAQA2v/W/6n/Yf+V/8T/vP/C/6r/q/99/2v/pv/H//j/HABFALUACgEbAT0BSAEhAdwAiwBgACUAGwA/ACoARABYAIcA/wAjAS0BNAE3AQwBoAB4ADYA3v8FAAkAxf+z/+T/KAAUAMH/Uf/0/rz+bf4v/lT+eP5x/qb+xP7M/uH+//4k/+7+y/68/rn+IP9q/8b/DQAvAJUA8ABQAbMB+AFeApoCrwKtAmACUwJGAiMCEAKsAYcBhwFHARkBygCPAEQAyv9z/9T+Wf4u/g/+Hv4t/nj+Ev+V/9n/6P/H/5n/PP++/j/+x/3c/RP+Yv6V/o3+5v7r/pf+J/6//W797fzv/Dv9bv3z/cD+o/+EAHoBUQL0AnUD/gM8BCYEKgQPBLUDMAPLAsUCjQL6AX8BIgGtAO//XP/c/i7+1v32/Rv+E/5J/rv+Pv+q/wwAiQDTACoBXwE+ARMBlABBAA4Asf96/zH/Uf9l/xb///6Q/iv+1/1k/fr8Sfzw++H7KPyp/Cb9CP7d/rn/iABgAVgCSwMeBIwE4gT6BPUE7wSWBDoEkQPTAiUCLwFkAGv/6/6x/gX+kv1C/UL9Cv3T/BP9BP01/dT9lP44/+P/+wAfAvECkgM5BJEEpgRiBNYDHwNHAqAB3ADj/+/+Dv5V/Zv8+/tk+8P6aPoC+tn57vkj+kb6k/pU+x78RP3G/lUAvgFNA9IE1QWiBkQHdwc5B7EGBAbCBIMDmgKPAboA2/9H/+n+Uf63/Rv9o/wp/LT7jfuW+8r7d/xz/V/+Tf9mAFsBEgLmAsADNgRNBHYEhARGBP8DmwMlA30CqAHSAMv/vP7A/cj82fvC+sX59PhO+Az4HviH+P741vnx+jL8qv1J/xkBpwJ+BEoGmQeVCBoJYQkVCXsIsAdzBlwFGwTbAqwBeABe/yX+Mv1S/Jz7Q/vn+tr6+vol+3/77vu3/GX9Lf46/zkAWwF9AoEDfgQyBaAF2gW4BUAFlAS3A5MCHgHL/6L+9/yT+2z6OflG+Hv3PfcH9xj3rPcw+Ez5p/oz/BT+DQBqAocEjwZUCIQJdwrkCu4KQwoPCbcHEQZiBKQCNwHs/6T+m/3Q/DH8gPsW+9v6pvqD+pH61voe+7b7hfxe/Vb+Y/+/AOgBzgKgA3EEHAVrBbwFxgVOBacE4QPAAi0BsP9o/uX8kvtn+nT5mPji9733wfcj+Mj4uvkB+zX8sf0y/8kAYALtA5sFBAdKCAMJXgkrCXsIlAdjBjcFuQNOAiMB6v/p/u39RP3U/F78S/ws/D38P/xO/JH8tvwN/Vj9zf1g/hH/DQADASMCTwNVBB8FiQWzBXgF6wQbBAEDvQFXAOb+f/1A/DX7QPqT+SD53PjS+OX4SPnI+Yz6jPu0/AL+Vf+zAA8CUQNnBFEFDAaXBsQGvgaCBvIFJgVHBGMDYQJ4AcoALgCS///+uP5y/h3+/P3c/dL9i/1o/Vb9Jv0n/UD9pv0i/tn+2//dAO4B4wK7A1UEqATFBHkE6wMRAxAC9gDH/6T+h/2E/Ln7K/vb+rn6sfrw+lD7zvtK/OH8ov1Q/uP+YP/i/1gA0QBZAdwBWgLbAlkDuAPmA/ID3gOkA0cDwwJJAucBhwE6AeQAfwAgAL//Y//W/mT+Jv7Y/Zr9ef1//X79l/3//Yj+Ff/f/9MArQFsAvkCcwONA2kDOAO9AiUCZQGwAOb/A/9g/tn9fP1O/Vb9kP2+/Qf+Vv6J/qv+tP6i/m/+Lf77/dD9s/3Z/Rv+hP4U/7r/eQApAe0BowIqA5UDzAPmA9oDoQNdA/sChQIBAmMBygAaAG7/1f46/rL9PP33/N785Pwk/Yn9Ev66/mn/IwDJAGgB5wE0AlkCTAIcAtkBigE8AfEAuwCXAHkAaABfAGIAUwAwAAIAsf9H/8P+Pf7A/UX96fy8/Kv8wfwD/WP94v1m/hn/y/9zABwBqQEiAm4CoALDArwCqQKKAlkCEQKoAT0BwQA0AK//N//D/mP+Hf70/d792f3y/SP+Xf6x/gj/cf/d/z0AoADfABgBMwE7AUMBNwE+AUABTgFnAXoBlQGfAZsBgQFJAf4AkQAWAI7/BP+J/h7+2f27/cj9A/5g/t7+aP/1/3gA5wA/AXcBjQGDAVsBGQHEAGYADAC5/4D/WP9B/z3/Of8+/zL/I/8P//T+4f7T/tn+9P4f/2T/tf8QAGkAtQDxAAwBDwH0AMMAiABIAA8A3f/A/7j/u//T//H/FwAyAEIAQwAvAAsA2P+n/33/Xv9O/1r/ef+r/+z/OQCYAPQAVgGtAfIBIwIqAh0C4QGLAR0BmgAiAKH/Ov/f/p/+ev5m/nD+gP6j/sr++v4x/2T/pP/h/x8AUwCBAKsAvgDSAM0AxACxAIUAXQAbAN//lf9G/wn/wv6X/nP+ZP50/oP+tv7l/hz/T/9t/5r/qf/G/+P/CQBJAIQA6ABFAasBDwJcAqkCwQLIAqkCYgIBAngB7ABGAKX/Ef+O/jL+7P3Z/eH9B/5E/of+3P4g/2j/rf/q/ykAXgCaANEAAAEyAVgBfgGYAacBqgGSAWYBIQHIAF4A6P92/wb/o/5a/iH+Av73/QD+FP4w/lT+fv6o/uP+LP98/9P/MwCXAO0AOwGEAbkB2QHgAdkBuAF6ATIB2wB7AA8Aqf9C/9r+gP41/gj+8v3+/S/+e/7e/k3/vv8sAIkA4AAdAUoBagF9AYsBjQGMAYkBhgGDAXgBcAFZAT4BEwHVAIgAJAC4/0L/0f5u/hr+6f3U/eH9B/5C/pX+6P5D/5r/6v85AHwAuQDpAA8BKAE0ATUBKAELAeQAsAB1ACsA2v+C/yb/0/6D/kn+If4S/iX+TP6S/uf+Sf+v/w4AZgCqANwAAQEYASgBLQEzATQBMAEsAR8BDQHxANMArwCGAFsALQD+/9H/p/9+/1r/Qf8v/yz/M/9E/13/gP+t/9z/DwBHAH4AtQDlABABLgFAAUMBLwETAeEApQBhABQAyP95/zf/9v6+/pL+aP5R/j/+Pf5K/mf+mf7U/iX/eP/P/yEAYgCXALQAxwDDALgAqQCTAIUAbgBkAFYASQBEADkANQAsACIAFQAAAO3/zv+z/5f/g/97/4D/n//I/wcAUACeAOsAKgFhAX0BhwF8AVsBLQHvAKoAYAAZANn/n/90/1D/OP8m/xn/Ef8I/wb/Bv8N/x3/NP9T/3j/nv/G/+v/DgAnAD8AUABYAF4AXQBaAFMATABFADsAMwAlABUAAwDs/9b/wP+t/53/kP+J/4P/gP9//4H/if+U/6n/xf/p/xQARQB3AKYAzwDqAPoA9wDlAL8AjQBRABEA0f+Y/2z/Uv9L/1f/dv+j/9n/EQBFAHAAjACbAJoAjQB1AFoAOgAdAAUA9P/s/+r/8//8/wYACwAIAP//7//a/8T/r/+c/5H/if+J/4z/lf+g/6v/uP/F/9D/2v/l/+///f8KABwALgBCAFYAYQBqAGcAXABEACQA+v/N/6H/eP9Z/0T/Pf9C/1b/c/+Y/8L/7/8cAEIAZwCAAJYAogCrAK0ArACoAJ8AlACFAHQAXgBGACsAEwD6/+P/z/+//7P/rf+u/7T/v//P/+T//P8UACwARABXAGYAcQB1AHQAcABoAFwATAA8ACgAFwAEAPL/3//N/7n/of+J/23/U/84/yH/Ef8J/wv/GP80/1b/hP+2/+r/HABJAG8AiQCXAJkAkQCAAGkATgA0ABkABAD0/+j/4v/i/+T/5v/m/+P/3P/T/8z/yP/L/9b/6/8LADMAYgCQALoA3ADwAPcA7wDdAMIAoAB+AF0APgAjAAoA9P/c/8P/qf+N/3D/U/84/yL/E/8O/xP/JP9D/2v/m//R/wgAOwBqAJEArQC+AMQAvgCsAI8AaAA7AAsA2f+p/3//YP9K/0D/Qf9M/1//d/+Q/6r/w//d//T/CQAbAC8AQQBUAGkAewCOAKAAsAC8AMQAyQDGALsAqQCNAGsARQAZAO3/w/+d/37/aP9Z/1X/WP9j/3P/if+i/73/3P/9/xwAPQBdAHoAlACnALIAtACsAJgAewBVACsA///Q/6X/fv9d/0j/Ov8z/zX/Pv9M/1v/bf+C/5X/qf+9/9H/6f8BABsAOABTAG0AhACVAKMApgCiAJkAigB1AF4AQgAnAA0A9f/e/8r/uv+w/6r/p/+q/7H/vP/I/9n/7/8GACAAPQBZAHQAjQChAK4AswCuAKEAjABsAEgAIAD5/9X/t/+g/4//iP+H/4r/kv+e/6v/uf/H/9b/4//x//v/BAAKAA4ADgANAAsACQAIAAcACQAJAAkABQD///b/6f/Y/8X/s/+g/5H/h/+C/4b/kv+j/7n/0//x/w0AKAA/AFQAZABvAHgAfwCBAIEAfwB4AG0AXwBMADYAHQAEAOv/1f/B/7T/rP+s/7L/v//S/+n/AAAYAC8AQgBQAFgAXABbAFQASgA9ADAAIgAWAA0AAgD6//L/6//g/9T/yP+6/6z/nf+Q/4f/gv+A/4T/jP+Z/6r/vf/U/+v/AgAcADIASABbAGcAbwBxAGkAXgBLADQAGgD//+P/yv+z/6D/kv+M/4z/kf+d/7H/yf/l/wMAIgBBAFsAdQCHAJQAmwCbAJgAjwCAAG4AWwBEAC0AFgD+/+n/1v/E/7T/p/+a/5D/iv+G/4P/hv+N/5j/p/+8/9P/7f8HACQAPQBVAGgAdgB8AHwAeABtAF0ASQAxABkA///k/8r/tP+h/5D/g/9+/3v/fv+F/5H/oP+z/8n/4f/4/xIAKgBBAFUAZwB2AIAAhgCJAIUAfQByAGMATgA2AB0AAgDn/83/tf+h/5L/hv+C/4H/hf+Q/53/r//E/9n/7/8HABwAMQBDAFQAYQBrAHMAdgB0AG4AYgBUAD8AKAAPAPP/1/+9/6f/lP+G/33/ev99/4X/kv+h/7X/yv/e//P/BwAZACkAOABFAE8AWABdAF4AXQBZAFEARQA3ACcAFQACAO7/3f/M/7//tf+t/6v/rP+v/7X/v//M/9n/6f/6/wwAHgAuAD4ASgBVAFwAYwBjAGIAXABVAEkAOwArABkABwDy/+D/zv+9/7H/p/+g/6D/o/+q/7b/w//U/+X/9/8HABYAJAArADEAMwAyAC4AKAAhABoAEwANAAgAAwACAP///P/8//r/+P/2//H/7f/n/+L/3f/b/9n/2v/d/+L/6f/y//z/BQAPABkAIQAmACkAKgAnACIAGwASAAgA/P/x/+f/4P/Z/9X/1f/W/93/5f/v//z/BwASAB4AJwAsAC8AMAAsACUAHAASAAgA///3//D/6v/o/+b/5v/m/+n/7P/u//D/8v/1//f/+f/8////AAADAAYABgAHAAgACQAIAAYABQADAAAA///+//v/+f/6//r//P/+/wIACAANABMAGQAfACIAJQApACkAKAAoACcAJQAjACEAHAAYABAABwD8//D/5P/X/8r/v/+2/7H/rf+v/7T/u//G/9X/4//y/wIAEQAbACcAMAA2ADoAPAA9ADwAOQA0AC4AJgAdABQACAD9/+7/4P/T/8f/vf+3/7P/s/+2/77/xv/R/9//7v/9/wsAGQAnADMAPgBGAEsATgBPAEwARgA/ADMAJgAXAAgA9v/l/9X/xf+3/67/pv+j/6L/p/+u/7n/x//Y/+n//P8QACIAMwBBAE4AVwBbAF0AWwBYAE4AQwA2ACgAFgAEAPL/4P/R/8P/t/+v/6r/p/+p/63/tv+//83/3f/v////EwAnADkASABVAF8AZwBpAGkAZQBcAFIARAAyACAADQD6/+X/0f+//6//ov+Z/5P/kv+U/53/p/+2/8b/2v/w/wQAGwAtAD4ATABVAFsAXgBcAFcATgBBADEAIAANAPj/5P/R/8D/sf+j/5v/lv+V/5j/oP+r/7r/zP/i//j/DgAmADsATQBeAG0AdwB7AH4AewB0AGsAXgBNADsAKAATAP7/6P/U/8P/sv+l/5r/lP+R/5P/l/+g/6z/u//M/93/8P8EABQAJAAxADsARABIAEgARgBBADgALgAhABUACAD6/+z/4P/V/8v/wv+7/7n/uf+7/8H/yv/U/+H/8P8BABEAIQAxAD8ATABUAFsAXgBcAFgAUQBHADkAKwAbAAoA+v/s/9//0f/H/8D/vf+6/7v/vv/D/8v/1f/h/+z/+f8GABIAHQApADAANQA6ADsAOQA2AC8AKAAfABUACAD8/+7/4v/Y/8//xf+//7r/t/+4/7r/vv/E/8z/1f/g/+n/9v8AAAwAFQAfACcALgAyADUANwA1ADEALQAnACAAFwANAAUA/v/3//H/7f/p/+v/7P/x//f//v8GAA4AFwAeACYAKgAtAC8ALgAsACcAIgAaABAABwD+//T/7P/j/97/2P/V/9X/1f/X/9r/3//l/+v/8P/0//r//v8CAAUABgAGAAgACAAJAAgACAAHAAQABQAAAP3/+//4//P/7//s/+n/5f/j/+L/4v/j/+b/6v/v//T//P8CAAoADwAVABkAGwAcAB0AHwAdABsAFwARAAsABQADAPv/9//2/+7/7f/v/+//8P/v//L/+/8AAAIACwAQABYAGwAfACEAJQAlACgAKwAmAB4AFgARAAcA/v/x/+j/4//Z/9j/1v/O/9H/3v/i/9z/9P/+/xsAHABaAGkARQEJA84B5f9k//v9e/zv/NP+NgAxAfEBMQHG/+v+KP63/Yf+OgBTAasBwwFpAU8AaP8c/xn/qP+eAHoBwwGHAZoAR/8u/r796v3O/lcAbgHdAc4BGQHL/4r+E/4l/tL+JQAwAWoCCgOqAbH/M/6U/cX98v0l/z8BGwKsATcBoABb/53+nP7x/s//DAGmAVgBJgG0ANv/D//a/i//fP9LABAB5gCIAAwAiP8H/8T+OP/F/y8AnwDVALoAhwATAHH/RP9x/8L/JQCaAOgAqABRAOv/hP9e/3f/mP/n/1IANQD//+X/l/9L/1//tf/t/wgAcwC/AHQA7P+9/83/oP+u/xcAcgCHAJUAKgDe/8n/k/+7/+X/RgB4AHEAQADp/9z/2v+2/7H/4v8ZAB4AHAARAOb/1f97/5f/zP+0/ywArwC7AFgARAA9AM3/of/H/yUAZQCNAMMA/AA0ATUBugBCAGMAkwDeAWUC4ABv/xT+P/ze+if7MP1t/5AANQHTAK3/ev4W/rD+2P+YATgDiQOGAm0BZgCW///+SP8jAAcBhQFCAT0Asf5f/bD82Pyy/ez+bgDUAc0B9wA/AMn/Sf8G/2z/qADdAeQBSAL4AakAzP82/7H+rv5t/ywAlAB6AGYARgB///r+Lf9l//v/pwD7AMoAfABXAAEA/P/e/ywAggBiAC0A///y/woATgAyANb/IwBHAIQA9AHfAh4DlQFs/wz+b/wK/Hv9hP8sAQQCmAEkAJX+J/41/gf/JwAXAdUBdgEeAAb/6P4B/yr/s//hANQBCQJ/AYIClQKuAJQANgDB/lr9Kf3T/Wf+0P5o/5f/5/4e/qb98/2g/mQAMAKoAlIC5AAd/zT+1/1K/h8A+QGaA2EEDgPbAIn+u/yV+9j7Rv05/6MBhwJuAp4BJABp/87+W/4g/2UAKgFtAe8B4wGyABQAo//b/u/+jv8jAHsACwEJAs0BsAA1ANb/Lv8q//L/ZgDBAH4BdgEHAb4AAgBQ//v+p/4u/9D/NACcAJ8AhQCm/+P+0f5K//D/uQBpAbsBhgFNAA3/UP49/s7+q/9aAIIAvADJAKwAuABSAAQA2/+r/8H/MwCOACMAs/8B/1D+iP6s/pr+7/44AMkAwADGAKMAqABrAHcARgDz/7n/6P9b/63+N/81/53+pv5F/wEAHABKAJ8Ayv9F/zX/cf+t/xQAJQGcAZ8BawH7ABYAYv9D/zv/f/+0/wcA6/9m/1P//f6b/qX+Kf8RACkB9gHGAecAFwDK/6X/Vv9y/5cAvwFbASMB0gC2/1j/5P7f/gj/rv/tACIB5wDAAFEAPACpAPv/OwBoAdwBNgJ5AZYAq/9g/sb9qf1P/vj/fQHWAd4BygHeAAYAMf9P/jz+yP6q/10ALgBKAMoABwCs/tX9yf3X/mAAfgGnASwBngDl/3T+1P1U/xsB1wFwAdoAQgA2/wv+uf2j/jUAcAGeAVoBnABo/67+ev4Z/n3+dP8+AGoAVgBVAOv/lv8Z/07/tv8bAJAAmwCaAPT/Jv/i/tv+zv6V/9MAFgG3ABgAwv/y/67/OP8DAOcA4QB2AdUAxP/f/x4AZwA3ADMAnAAWAX8Alf+X/2wAlAHeAaoBqwHfAHL/Kv8S/w7/QQCIAcUBkAEBAacAHwAq/zH/qf/n/5f/qP8E//D9Uv4b/77/q//r/kH/9P8b/0X+Bf5E/ob+S/5P/pn+Iv9o/zT/w/6Q/rn+uv4g/5v/HwCuAUIDBAQBBEUDSgP9A3MDwgLMApsCKAK5Ad8APADa/27/2/+1ADQB8wATABT/fv7a/aX9NP6y/o//jQAXAPz+VP3q+4L8+vw5/fj9Av9U/+D9s/vi+jL7T/tO+5r7FP2F/uj+/v7Y/7oBgQSbBx0JPglnCfsIrwY5BNcC+QEQAooCrgIIAtcAUv/y/Sv9HP0P/rz/VQHzAdcB8AC1/1b+Nv0L/Wf9fP7P/6//UP7G/GP7S/pW+Vr5zvqK/Dr9yfzC+w/67Phs+SL7Jv7iAa0EowbWB0oHLAewCKgJfgmkCcEI+QVaA/8A1v5K/oD+7f7u/2D/7/2N/Tf9+/ws/vD/3QGvAn4BYwAeAGv/5f4u/0n/dP9H/7X9XfuY+Tv5JvrJ+g77Gfx3/VH92Pt5+oT5Ivk7+hj9xgCwA+UFqwdlB6AGWwerCdELZAxoC5oJ/QaXAh7+rvt7++X8if/4AND/nv7x/U/8yvo3+5j9KgDoAQMDFAPeAWsALP/g/Qv9Qf12/gT/jf2J+5v6R/oW+qT6gfx4/pL/jf8Q/n77mPjx9pv4fP0jArAFsAjQCScJyQeNB/oI2wqKC9QJEAeOA2//wPt9+cP5Rfy0/7YBkAEOAH7+HP0p+xn6lvuX/sMAfQE1Af4AtACM/4X+kv6V/2AAm//S/ff7sPpg+mP6evuG/bP+GP/j/pr9U/tQ+Hb2wvgO/ycEXQcSCscKvwptChgJDAkfC8ILHQoVBr0A6vyt+an28Paq+i3/SgJfAvUAyv+K/SX7TvqE+hr9mgBAAjECJQF2AHoAS//8/fr+uwB5AYgAXf61/NP76vqu+if7/ftk/XD+iv24+o/38vZF+lv/WwQvCSgN7Q1mCx0I2gZ0Br0GFgjSB2kFhwH9/Of46/bb9z78awH5AzwE7AL1/4D8ZPkc+Fv5+PtL/9YBbwIQAsMBzgANADUAxgBHAQ4B1/9F/mj83Po++nr6Ofv++/P81Pz7+mb3Y/Q79tn8bANIB4gKfw3HDcgK1wc8B/IHsgj3B4AF4AGp/er5aPjN+Eb7DAAzBMwFWASuAVv/Zvyl+PD2EPmb/MD/gQG6AXcCtAJlAYIAOwCxAJcB5wAK/9r9uPwY+xf6Q/r8+rn7wPuC+p337/OA9FX6iQHqB6AMehCGEB4MdgfdBJYEeQXPBgwHgwW3AWf8UviN9xr68f02AjYGKQcLBJT/8Pr49rP1Xfe/+vH+WALLA8gD+QHg/0j/x/9zAPwAYQEnAf7/p/0U+y753fjS+ZL6bfp3+W73K/Tq8675/gEfCBkN7RKSFAsQyQmWBbsE1AQEBJYDGgNDAB/8/feq9e74Lf9KA5AFEQeMBiwDU/2J9z/1mfaH+WP8o/4CAdsCjwLkALf/GQDQAZoCzgEWAZIACf+3/A76Nvgu+Vz7j/vK+Sf4nfZk9rP5S//bBbwMzhEBE6cP4Am/BMUB0AB3AXYDfwRnAyQAQ/uV9zH4Lf0qAlsEIQZWB7IENf799xH1U/Xd9+36v/3FAOMCCANRAYr/8/84AocDJQP9AtYCHQEn/qn6ZPfY9mr4qPmF+N/2HvbK9iH7jQGsBzQMARGxE6sQPgqPBCECDAHeAMABfgLiAZj+LfqM94T4M/3TAvUFCAfZByEGCABf+eb10vXn9+f6A/0s/pb/jAAnADn/NABVA9AFvgXABMsDPAE8/SL5tfXD9KL2TPk8+pz5KvjP9+b6ov8GBYQLnhLkFY4SfgthBDoAEf7B/Pf9dAEsBM0CHP6A+WP4wfuXAEYERAeWCcIIDQPO+iv1gPPe9En4EPy9/1kCigKHADD+RP4tAf4DHAWsBYUFZAP+/o/5DvXy8uLzDPe++RH7ofsq+4r8FgCsBOEJZw6AEg4T9Q3kBeb/AP1e+4P7OP6nAlQEfAGY/cn7u/zl/00DrwUMCHMIUQRT/bn2b/N486H1nPkG/jMBRwLdAWUAvf/XAZEE+QVqBnwGyAUMAsL7nPYy8+XxQPT492P6Afwn/NL8hf+ZAtYGaQuMDxYR8w2xB8IBLv5v/KP8wP5bAnUEeAID/1T8PPt4/aQBwASYBi4H3ARC/0D4cPOZ8mr0wvjy/R4BtgJqA7gCcAEHAqwE+gZ+B1gG5AQ3AuH8JPeX8n/wTfIv92r7uf0c/+P+OP+XAC8D+waeC6cPHQ9lCugDyf6t+zz6JvysAAYFGAY9A2P/ufwl/Bz+zQF9BRMI6gdDA1P8JvY/8iHx6/MN+vb/vQM3BVMFcgRPA3gD3ARxBgUHHQZUA33+4PjP8/jv4e7/8Yr4IP4DAakBRwGkARQCnAMDBosJDg2cDA8HxwDU/Vv8GPxa/okCVwZ+Bp4CJ/7c+2/8O/+wAoEFqAdqB+0BvPnt8wPyBPOP9oP8YQKfBUUG/gVWBYQEEAVjBtkG3QVgA23/e/o/9QnxGe8n8Fn1wvzPAcYDigMSAikB9ABRAXUCVQamCswJLwTp//b+T/5k/q8A7AQXCB4HzAJD/sv7VPwE/wECpgQmBi4Fqv+q94ry5/EW9Mv4Wv+1BPMHJAlGCFEGBgUXBYQFxASkAor/m/uT95Pz9/CQ8DDzGPkTAMkEXwbdBKsAaP3Q/KP9jv9aBGMJkwlPBY4BDgD2/5UBOwRGB+sIRAdhAkb91/o7+8D9ZwF4BJ8FGgQm/y/4MvOe8kL1ZPpsAI8FHwkeCuMIdAZ6BNUDoQNbAtX/vPzJ+SH30/Ow8dHyoPbm+3EBcAXFBpoEJf+2+lf6F/zO/p4D4wd7CPUFDAOVARsCzAThB/UJXQqWB5MB2PvL+Pj4ffz9AN0DhASvAnX94fbk8tDymfYO/dMDrwjPCsQK9QfQA30BXQE9AdX/EP7q+075EfYJ8/PyD/b0+kEAKQXdByAHZAIW+8P2d/fn+iT/lgQ4CU8JAwY7AxUCWgOSBzgLMwzdCpgGyf8E+an1N/ff+yoBbwQxBB8Bzfur9ZLyPfQk+en/zwaTCrMKPwm5BdwBLQAgAFYA7v9W/mH7Xfi29Wf0yvUT+Tn92QEQBpwHiAXY/3P5//ax+OX7LP/XA5MIIwnQBf0CLgPeBTUJJAspC/oJ9gWH/qL3BvWy9tT6Vf/OAR4CxwDk/NX30PX99y/8GAEsBnIJXgkEB7YDAAGk/z7/E//v/nf+XvyZ+bv3jvce+Vf7xf1eATYFzQXWArb9rvhz9135EPu2/acD/gdgB3UFlARUBdUHfAo7C7wKVQl0BNr8BPdS9UT30Po9/lUAlwCV/4b8FPnQ+Gb7CP8JA9MG/weKBnUECQJjAMz/0f/b/33/Nv66+3753fiw+Sf7G/2M/3wCDgRmAq7+tfrB+AH5LPpX/Ob/0QNeBd8EYARWBewHdArvCxEMLQslCJwBvfob91n2vPel+r/9l//i/6v+Hvx6+lf75v3JALUDWQazBs0ESAIXAJL/MACXAM4AKQH9/wL9Wfo9+cP5HvtQ/dX/KwK/Ao4A7/wY+Rj34PcV+nD87P/5A0wFoQTKBNYF3QeVCiYMHwwXC70HeAFv+yT4Hfca+M36e/3y/ob/+f4O/QP8Cv0M/wsB2QJjBJYEZQPWAXMAYQBRAWQBywByAG3/7vzX+nP6z/oO/CL+j/9AAIEALf9+/Bf6/fhw+YL7v/0//1AB2QLWAnED5QWnCAELzgyBDOkJYQa+Ab38L/of+uH6ZfxJ/of+cP2r/M37aPu4/Kj+/P8PAS4CUwJ0AQIBNgFTAqEDxgPPAmEBZ/+J/HH6Xvq1+8n9/P9RAZ8BOQE2AJP+m/y0+zD8Fv2T/U79Jv2V/Rz+LP9WARUEBgcHCRsJtQfvBdQDjAEqAKL/5P+sAMMAd/8Z/rb9E/3u+777Mvxv/Nz8nP1M/gP/DQCbAUYDEAQpBKQDXQJ+AJD+d/3Y/Tz/ngDMASMCQgHQ/43+hP1q/Jn7qvs6/Nn7mvrj+SP6SPtv/U0ApAMaBxIJ1Ai6B8EGhQUXBGwDUwNnA2EDiQKuALz+h/2g/On7ivtC+wH7//qX+438wf20/ycCDATZBKoEwQOFAsIA6/4s/sL+AAAzAQYCAAJLAU8AHP/0/Rj9YPzJ+2r7ivor+XP4tviG+VH7VP6lAZgEgwYOB/AG4waUBvAF3wXzBYwF2gRtAy0BLf8D/vv8F/y6+2X75/rV+p773Pxs/noAhgLFAxQEvQPNAmQBuf9S/gn+Ef9+ALABjQLIAg0CzgCH/1P+lP1f/Sj9gPxy++X5i/gw+I34J/om/TwAkgJOBGIFhgVvBasF4QUYBjgGFAZWBfwDTgJuAAz/MP6C/eP8X/wE/Bv84fzG/b3+MQCXARACyAFCAZcAmv92/sv9Vv7U/2sB6gIYBFwEmwM0AmIAyv7p/Tv9ZfyS+5X6SvkN+Hr3Evj9+cn8lP8AAukDJQV1BT4FYAXhBUgGFgaMBeEE3ANHAo8AXf/W/qn+Xf7t/cP9Nv7B/vX+WP9KAN8AggDm/17/of65/QH9xfyx/ZP/dwEOA2sEGgWbBDEDZAEYAGz/zP7t/Qn94/sN+gL4vvYK9+X4ZfvC/REAHAJHA3EDrAOyBA0GxganBlIGkQUZBBkCdAC6/7D/wP+R/2T/Sv9E/y3//v4b/8H/FwB0/5f+DP5c/Vn8q/vo+3r9yf/oAa4DPAXaBSEFxgNFAhEBegAeAGb/Xv7k/Iv66vc59kf22/ck+pn8Hv8dAQACPQLSAiYErQWtBhQHCgcdBkEEQALqAIAAzgAsATkBIwHyAI8A3v8t/+f+Cf/P/gr+XP29/PX7Ofv0+pH7cf3H/8sBswNxBSkGtQWnBFEDJwI/AYQAs//B/kn9E/un+PP2qPas93L5Zftz/VH/ZQDjALwB8AIOBCYFCQYxBoMFVgQEAycC8wEmAmsCogKsAlcCqQHZADMAlv/m/ij+af2q/On7Mfu4+vb6JvwE/uH/gQEdA2oEtwQ5BKoDMwOyAgwCNAFkAKr/XP5Q/Gn6UPn6+Db57Pn4+kb8bf0a/r3+2v8aASsCNwMnBIwEPQSMAwcDIgOmAyUEaQR6BDcEgQN1AloBdACv/8/+sP2d/Mj7E/t1+kz6AvuO/Gr+CABvAdQC3AMcBNUDnQOBA0sD2AIVAiEBFwC5/vP8h/sQ+yz7UPuG+/37bfyc/Kr8D/0c/oH/lwBiARkCYgItAiECeQINAwAEEwWnBa0FQgVfBEsDMAIHAQgAQf8y/vD8APxU+7X6kvo6+3H8Df67/xcBHALHAvECzQK2ArECogKNAlECxgHaAIP/B/4N/bz8lPxe/F/8evxl/CH89vso/O/8Gv4Z/9P/gAD5ABABRwEIAigDWARuBSwGYAYKBjsFKgQZAyoCSgFSAAf/if1C/Gf73frS+nz7iPyv/bv+fv8WANQAhAHtAVwC2AIeAxsD2wJHAnkBdQBT/4T+Nv78/ZT9MP2+/Cj8mftb+4f7EPzT/LX9ff7+/lf/sv9NAGwB7gJHBE8FFwZ1BkwGtwXoBA0EWQOpAqoBPQDD/oX9Zvx++0T7vft1/Cr9x/1F/tX+ov9yACoB8wG1AiADKQPuAn4C5wEnAUYAkf8p/8b+Rv7J/Tb9ePzK+3D7efvY+278Cv2g/Sz+kf7b/nL/igDuATcDPgT4BGwFlQVgBeIEYQQaBLIDwQJkAQ8A2v68/cf8NPwv/K/8Ov1o/XX90f1q/v7+rP+fALkBkwLZAqoCbgI7AtUBQQHRAJkATAC5//r+LP5P/Wr8u/t2+5D71Pso/H/89/yl/Wf+FP/n/x0BZAJnAzoE7gRIBT4FBwWzBE8E9QNsA3UCXQF3AHb/Qv5R/eH8xvzu/DH9QP1D/ZL9EP6I/kP/VABOAeIBOwJjAkMCAgLYAbwBoAGSAUwBiACG/5T+ff1G/HL7PvtO+2n7rPsE/Fj82Pye/Yb+rf8gAXoCWQP+A6EE9AToBOYEEAUGBZgE6gP5ArQBcABZ/1H+f/02/UH9Pf0+/W39l/2a/dn9f/5P/yUADQHBAQEC7gHAAYgBeQGpAc4BvQFkAaMAh/9Q/jv9XPzL+6v7uvuw+6P7v/vx+1T8G/0+/pX/9gBFAlsDFQScBP8EIAVGBZIFkwUIBSYEAQOMAQIAxf7W/Tz9JP1h/Xb9Yv2E/bH9yP0y/vj+w/94ABYBdAGMAX4BhwG8AQYCcgLHAqgCDAIDAbL/cf5x/Zb8DPzS+zP7g/py+qf63fpr+7P8Wf75/5UBDQMgBMkEXgXvBTgGbQacBkQGQwUEBJgCygAi/x/+WP2Y/Ef8Qfz9+8r7JPyW/P781f3k/q7/hgCUATsCVwJ7AtwCOQOOA60DOQMoAsoAA//J/Bb7Rvov+Tj4xfiR+RH55Pho+lT8RP7VAFQDMAXzBmIIjQhJCOAIawnGCJcHcAbZBNICswCN/sL8sPvY+vf5cPmo+X/6aPsK/K78q/3r/h4AGAH5AeUCcwNMAwkDLwOqAxcE7AMJA8AB9P+h/Vv7vvn1+F34NPeU9uL3b/lx+dL5Yfye/woCIARhBnoI5wlACn0JnwiqCLYIQQcmBbkDOwIFANL9LfwS+6n6dfr/+dP5wPpB/D39xv2Q/q//2wDJAWcC/QJqAyIDiQJ+AuQCOgM5A4IC9QAi/zb9UPsB+pv5cvmX+DL3Avcc+VX7q/s9/Ff/ewKSA+IEkAeMCSsKxQktCOoGHAebBncEtQJ8AYn/TP2c+/j6d/vK+9r6Hvq/+v77Pf1u/mP/PwBhAQICvAHWAe0CZgOuAkQCngLDAqoCXgJqAQcAUv4g/DX6dvmD+Y75ifh79wH5hfu5+9X75/5YAvIDcgUoB14IXAmFCTYIMgdtB8oGoASMAiIB5/9r/o/8Wvuo+9r7APuU+t/6pvsz/aT+Fv+N/8cAxwEvAiMCEAKNArwCPAIOAmICeAIbAmEB/P+X/nn9Dvz2+qH6e/r++fn4Wfih+RH8B/3T/DX+KwGCA6AE3QV7B1cIAQguB7YGaAbVBZgEewKTAL3///6G/Y78bfwH/E779fri+nj7c/0m/3L/9f8uAdoBCgJsAqkC3gIIA9MC0QIuAywDVALpAGP/YP6n/Un8NvsD+0T6G/l2+Eb4efkW/CT9ePyz/fsARANUBLMFMwcFCAkIWAfEBucGoAbrBFcCiAABABj/Rv1S/C/8Jfuy+Tj55PlS+xr9mv5m/xQABwHuAboCdwMjBGwEBgSMA6YDrgPmAokB/v/g/in+8Pxs+8T6vPoB+uP4MPj595T5r/zw/RX95P0vAecD0gS/BdsHYgkTCesHHQc0B/QG7wRHAqAAw/+6/gj9b/up+vr5Ofkb+av55frn/Ab/HQDXADIC4gLrAuMDKAUQBRUEewMtA4kCjgF3AEf/Xf7A/fD8yvvL+oj6Vvoy+Xz4x/hf+T77yP0i/nD9Pf+LAn0EpQVoB+4IUwncCNwH+QaTBn4FHQO6AJ3/zP4A/Tb7Zfrj+UL5SvkU+oP7bv3u/gwAbAFsArMCGQM1BEcFSQU6BAEDXALrAT4BVQBl/8r+tP4M/mD8P/sf+8r6xvlK+bX5gfl6+eb7av4G/rL9dADbA7AFuwbKB/YIqQm3CMEG3QVhBdYDwAG3/xn+Qf39+/H5RvkM+rf6Jfu2+8z8uf53ACQB2gEKA9kDLAROBGAENARJA/UBQgEXAcEAUADe/37/z/5U/RX8l/vg+gv6tfmK+Tb5uPiN+cL87/7Y/Sr+RwLhBdMGXQefCKIJcAmqBwsG0QXyBHUC1//1/f38c/wX+8v5Tvr2+uD6hvvQ/Oj9AP84ADIBPQIzA6YD4QPWA7MDHAPOATsBzQETAmcBiQDq/0z/+/1I/OH7AvwQ+9v5RPkR+Xr4X/gF++H+IP9F/a//iwSEBsAGCQi4CU0K+QhSBkEFpwUlBAwBif4r/cX8R/zw+n36g/tu+6T6EftS/PL9ff+6AAMC5AJBA88DDQSmA8UDngN9AhgCVALOAbgAxv8g/13+4/yr+7H7P/sf+uL5qflf+Yn5xvns+6T/CABC/pIA/QRlBooG4gcwCSAJswcIBkcFmQSYAgoAR/52/Qz9Mfwi+8r6+frg+t36yvsp/T7+nf9kAaMCBANkA/4DQgQbBNoDngMXA2QCzwH6ALb/t/5Y/sX9wPwn/N37K/uN+mz6MPot+iL6m/lU+zX/BwBT/gUANQQHBigGOQe+CH0JlQhzBgMFPASjAsYAZP/x/cj8FfwY+5P67fq6+ob6svv1/ML9t/8WAgsDKgNuAxcE/QQSBSsEegMmAzIC0gDJ/yX/wv5k/qX9j/zy++z7tftZ+1f7EPtn+vL51/lQ+7D+CQBr/kL/OwNOBZQFRAciCZgJ3AjQBhYFwgRlA9wAQP/K/Sj8mfsI+zb6ZPqJ+mb6cPva/Nj92/9XApkDCgR8BBcFbQXmBD4EEgTwAtUAsf9q/9n+L/6m/RH9g/wX/B78cPxI/OP7Svtv+i/6P/qF+/z+hABW/qz+9wKPBTwGowcDCVsJcghyBgEFQwTWAisB7P5V/Lr7R/xZ+zb6aPqq+tn60Pv//CT+AgCBAvgD7QNaBHkFmQWpBFMEFARDAhkATf8+/7r+Gv6h/dP86fuf++v74fuB+5H7G/sv+uv5Xvp6/Pf/4AD5/jQAPgTxBY4GgAizCRUJgwexBecEYwQ0AuX/Xf6H/KD71vsr+0L6nvrd+uj6JPyc/Yv+GwCHAjIEigSFBK4E6wTbBHQEbQOEAfz/dP/7/gP+bf15/eP8vvte++D7XPxs/Er8vftd+0L79/qR/IwA+AFX/4L/3APtBZoFBQf9CMoIOwdaBdYDYANkAlYANf5//AD8Q/yU+4L62Ppu+0D7rfvG/CP+UQCuAsgDKwT9BGsFLwXtBLQE0QPBAfH/g/8F/9D9Nv0t/Z/89Pud+9P7qvwJ/Yb8vPtA+yD7GPu5/HUAtwEl/1v/WgPuBOAECwfzCGQI7wZJBTME8wNzAhIAb/4J/Tr8Svyf+6769/r3+n/6ePvf/OP94/+sAjUEdwTLBEsFrAVlBbgErwOzAe3/cf8O/wn+Q/3w/Hn8B/wC/Dz8iPzt/DX9b/wF+/P6pvus/MX/vAE1/0H+TAKqBJUEtwYNCawIAwd6BYkEYwTuAjkAuv51/Rn8FfzI+8n6Bfs3+8362Ps8/QT+NgAMAw4EDgR4BLkE7wQoBXMEDwN8Aev/Gv/J/j3+zf2P/fb8WPwg/D782fxK/eH8z/vW+r/6CPsx/FL/IwE4/63+EQKgBJAFpgdrCRQJ0Af9BZUEPQSeAjAA5P5v/fD71vtf+6f6Xvtt+576e/tA/U3+9f+OAuMD/gM2BF4EpAQeBakEEQM6AdH/bv9T/3n+1P20/SD9BPxw++H72vxp/ef8hftQ+nD62fr7+3L/xQHA/4/+OwJ4BTsG8weKCbQIKAfiBX4EsQNaAjsAuf4F/av76/sI/Hv7gftn+xr7B/wD/Xj9ev+fAvEDnQO1AyQEyQROBa4EIgOAAWkACwCO/57+EP7k/Sj96vtD+7z7sPwT/Zb8cfuO+qX6uPro++//SQL7/3z/gAObBXgFeAdACaoIXgdFBZIDiQNSAtz/k/4f/dH7W/wm/OL6YvsV/BX7AftA/Gb9sP9eAiEDMgMVBI0EggS3BMgEQQSWAs4AGgDJ/wb///1Q/bD8Bfyb+5v7Tvwj/Sb98/uB+nD6PPuQ/O3/WQJoAKP/QgPwBIEECgdICVoILgeVBXIDQAPHAk0Avf7M/WD8Hfzm+/v6YPuY+1r6pfpJ/Ez9Nf86AqYDawO4A3UExAQ1BU4F+QMqAj0BegAs/3P+XP7V/dH83fua+yT8qfwC/U/9sPxv+0r7Rfsv+yr+AQIyAfD+CgHIAwAEyAWPCDcI5AZJBn4E2gKfAsgBUADS/s38xPsL/Nj7hPue+yn7LftA/OD8Ev49AZMDnAORA9QDCQQQBZoFBwRCAsoB/ABZ/4L+t/6v/rX9N/y2+1f8rfwJ/YP94fxL+1/6Yvql+r38bAAyARD/9v8mAzsErwVjCK0Iagf/Bq8F0wNxA4cCmAAW/2T9CPwH/OX7d/ux+2r7A/vG+4j8rf2VANACIQOjA+cDdANBBFoFlgQNAyMCRwFCAHv/EP/q/iT+pfz4+0L8d/wl/e/9Pf1P+8755fmQ+qn7y/4YAbj/a/9EAqIDawR7B8AIPAfsBnEG8AOoAoMCOwH4/4n+pvw4/J38UPzz+7r7ffun+8/7e/zb/moBJAJiAiEDEwNBA4cE4QS+A9oCRQL8AB8A9P9j/7H+Ov5P/Ub8K/zy/ML9Z/3q+4T6KPqP+hL7Mv1iAOcA6/8dAZ4CRQP8BUUI5wYzBhoHFAVgAqUCmAIxAWAAuf7M/PP8U/1p/O77BPx9+xj7pfv3/G7/xQEAAs8BjAIRA9QDvwQqBCMDHwNjAowArP/a/5L/xf68/aT8hPw3/cn92/0h/dr7sfqB+i77YvzA/pAA6P9b/5sAvQFHA44F5QVDBUsGAgYIAx8CaQP8AtoB9gA5/3v++/4N/pX8v/yp/FH7DPsc/FT9bf9TAQoBewCiARQDWwPuAsYC8wJuAuEAr//i/2EABwDx/rT9Rv0G/o/+Pv7e/Vb9N/x0+3/79/vU/R8A5v+X/gz/JgBfAUEDGwTzAwEFcQUtA8wBIgPtA1YDeALtANT/LwDA/yX+wP3S/ab81vsh/KH8M/4lAB4AYf8cABsBhgH8ASwCPQJQAiIBl//M/8MA/QCwALf/qv4i/xwA5f+0/xAAQf/R/W79i/2d/YD+of9W/zD+5P2G/mv/fQAVAT8B4AHeAXIAKgDIAZ0CfAKGAugBPQGMAWMBhwBcAAkA4v5l/l/+IP7P/vD/mv/A/iD/pP9f/3//HgD+/4f/aP8V//f+9f/8APIAjQCjABUBrQH7AcEBjwFYAcgAYgAfAI//Vf/a/9j/cv5U/Z795f3L/Qz+PP4T/gr+MP5s/hn/BgCNABYBzQHYAbABDwIsArsBawE5AeIA5AAfAdMAawCVAI8AAwC0/7n/sv+T//n+Ef4G/pP+s/7R/iX/L/+z/7YA+AAtAS0CcwILAl4CrQJyAocCTAKIATwBuwB7/8H+nf4R/o/9Qv1w/JH7oPtE/Mf8JP2H/Sv+B/+c/87/PAAGAWYBKgEiAX4B6AGgAigDqAIsAm4CRQKVAWcBewFGAacASP8M/gb+Lf77/Sz+S/4//uj+jf+0/2wAYQGeAc4BRgJ8AqkCxgJVAt4BjQGpAJr/Pv9G/xf/lf7S/fD8Vfxt/Pj8Yf3Y/an+SP9o/2H/hf8GAIcAcgA9AHkAsQDsAG4BgAE0AYsB3AFVASEBmwGSAfYAPgA8/3f+Vv40/gn+Rf6J/r/+IP9c/63/bQAEATsBogEsAmQCLAKzAXwBfwEEATcAw/+A/1v/Sv/J/iD+7v33/TX+tP77/lv/MQCWAE4APwBnAGcAggCYAHQAfwCpAIEAUwBuAI0ArADRANAA5wAZAcwAKwDR/2f/yv6K/nX+Dv7p/VD+i/5u/o/+/f6J/zIAtwAjAeIBdwJBAg4CYAIxAmsB+QCyAFIAIwCt/7r+Tf5u/lP+VP64/gf/Zf/k//D/5P82ACoAxP/s/zAA+v8EADYA5//c/10AZgBAAL4AKQEaASgBKQHbAJcAKwBw//7+zv5+/nj+w/6a/i7+Kf5r/tv+l/8vAJUAKwF1AT0BZgHKAYIBBQHeAIoALAA4AAgAff90/5r/W/9q//f/WAC6ACMB5ABvAFcAJQDr/wUA6/+E/2H/JP+3/vb+mP++/8D/DAA9AGAAuwDwANIAoQBEALf/Y/9l/4H/pP+e/yP/hP6C/gT/df/b/0wAjwCOAHIAgQDBANcArwB3ACcA0v+6/7//lP98/73/5//b/xAAjQADAXQBqgFgAdUAcwBWAGcAXAAKAJ7/Lv/K/sD+E/9O/17/lv/O/77/wv8pAIMAhAByAE4A5f+R/6j/8v8jAAIAff8G/xH/Zf+3/w4AXAB6AF0AKQAFAAIAJgA8AAYAs/92/07/Pf9X/4z/sf/J/+L/CQB1ABYBdgGAAVAB8QC7ANMAxAB+AFAA+v9g/wz/H/8q/x3/P/97/5H/lv/D/wsAWQCQAIAAUQBGAEQAWQCTAHUA8v+v/6P/ev+Y/+b/BABHAJEAVQAJABYADAAQAEQA9f9f/0n/Ov/d/tv+I/8z/0v/fv+i/ykAAAE/AfUA4gADARMBJAEAAasAiQBZAM3/af9u/1//Pv9a/1X/Ff9E/8n/7P/w/zYAPQD3//H/JgBWAGkAIQC6/7b/0//W/w4AUgBXAIUAvwB5ADUAiwDPAKoAgwAuAI3/Qf9C/wf/4f4B/+H+sP7w/lz/yf9aAK0AiwB9AK0A1QAFARgByAB0AGgAQgD0/8r/vv/C/9D/qf90/6j/AwATABcALAAMAOX/9/8HAPT/3f/A/4j/X/9k/33/nv/U/xAAQQBsAHgAaACGANQA7wC4AFwAAQDF/5//a/89/yz/HP8Y/zj/dP++/xMAQAA7ADYAWgCKAI0AcgBpAFgALgAXAPv/x//S/w4ADwAKAEQAZgBlAIQAnAB6AFIAUABbAEMA/v/J/8P/rf98/2H/Wv9e/4r/tP+9/9f/CAAtAE4AdACKAIIAaQBSADIABADp/83/kv90/47/qv/E/9z/1P/V//r/BwATAC0AFADp/+L/zf+0/8r/z/+y/77/3f/a//H/IwA8AFUAeAB9AG0AeQCTAJQAdwBXADgAEwD7/9//pv94/3H/cv9v/2z/d/+3/woAMAA3AEMARQBHAFAAPQA0AEoAJgDZ/8X/2v/v/xAAFgDx//f/IQAjABYAGQAJAPr/8//O/6T/ov+m/5z/nP+g/57/uP/n//b/BgA5AFkAXQB9AJMAiwCdALMAoQCKAHgANwD0/9L/pf9//3v/gf+Q/7D/vP/A/9//5v/M/9D/3//j//P/8v/N/8j/6v/+/xIAOABFAEUAWwBiAGIAcQBoAEIAQQBJACMA/f/q/8H/o/+r/6T/jv+e/63/nP+i/9D/+P8XADkAPgA+AG0AnQCgAJYAfABBABkABgDX/7L/v//M/8P/yP/D/7b/yv/h/9H/v//O/+X/4f/S/83/3v/z//r/8//8/yQASABMAE4AWABVAE8AUABIADYAKgANAOT/1//g/9f/wf+t/5L/ef97/43/ov+4/8r/1v/p/wUAGwBFAHcAgABfAEwAQAArACcAJQASABQAJwAXAP//DQAhABwADAD+//v/9//Z/8H/zP/X/8j/t/+0/8T/9/8jACEAHgAuAC4ALwBLAFsATQA6ACMADAAVACwAIgAEAOj/v/+h/6f/uP+4/7j/sf+r/8b/6f8FADgAXgBPAD8APwAoABUAJAAuAC0ANQAvABUAGQA0ADkALwAoAB4AAwDj/8z/zf/P/7n/kv9w/3D/l//A/8v/xf/F/8j/zf/o/wwAHQAmAC8AKAAnAEEAWwBbAEcAJQD6/+b/6P/g/9H/yP/G/8T/0f/q/xQAOwBBACsAFgAMAAMA/P/9/w4AKgAxABgACgAkAEsAXwBkAGcAZABQACwACwACAPz/4f+y/4n/iP+r/77/rv+i/5z/kv+m/9D/5v/y/woAFwAbADgAWQBpAHMAZwA7AB8AIAAfAAoA8v/i/+D/4v/e/9v/8v8GAPf/4v/c/8v/wf/H/7j/sf/a/+z/1P/o/yEAQABfAH8AcwBmAHMAaQBUAFoATwAdAPX/5P/i/+3/5P+3/5T/jP+K/47/kP+a/7j/1//n/wAALQBTAGQAYwBYAFQAWABPADcAIQAYAA4A9//h/9//7P8AAAsA///u/+L/z/+3/6b/ov+u/7X/qP+o/8b/5f8AABwAKAAqADoASgBZAGwAagBGACYAHgAiACwAJgAMAO7/zv+0/6j/pv+t/73/vv+7/87/7v8HABoAIgAlADMAOgApABYAEAAJAP7/9v/1/wAADAAYACUAKwAnABsABADt/+H/3P/S/83/w/+t/6T/uf/a/+//9v8CAB8AMQA0AEYAXgBeAEoAPwBFAFMAUgA5ABcAAwD5/+P/wv+9/8//yv+w/7D/yP/k////CgD9//r/BAD+/+T/1v/c/9n/z//X/+v//f8KABwAKwAxADkAOwAuABoAFAAUAAIA5//U/8f/xf/c//b/8v/p//f/AwAHABkAKwApACIAKQAlACMAOgBBACUAEAAMAP3/7f/l/93/5P/o/9T/1v/v/wsAGAAiACcACAD9//X/1P/Q/7//qf+s/6H/q//Q/9z/3//y/xwAMQArACYAIgAzADgAGQAEAOb/4/8BAAIA9//k/9j/3//X/9f/8v/8/9T/0//+//j/VwEUAfP+wv+OAOP///8cAPr/QwAcAB8A9P/UACoCGwGk/yL/f/9JALcAagDo/zv/zf6Q/pr+8v41/8j/1f/v/tv+vv80AEQATgDAAOUAFQG+AZYBDwMjBVcD5wDI/7X+pv63/lj+pv7c/Qv9bPzH/KoAtQBf/mb/qf8e/9X/WgAZANAANAE1AFT/RgDQARQC3AKrAggBcAAFAJL/6v///7kA1gCQAIAAv/8MAH0AFgAF/zL+hP2z/a3+2/7c/j3/Gf8J/wz/YP7R/uT//ABKAcgA4gBHAfgB9wEqAUsB3gEpAdD/Iv94//X/FwCR//z9a/37/Sj+Sv6z/jz/Qf9s/xEAnf8/AIcBogHZAf8AggD/AOQAIAEEAfEA4gCQ/xn/Nv97/58A1AAaAMv/0f/+/6EAjQFmAYIAegCcAFsAWQDFAB8A0v9sACr/Xf4f/53/JQBCANv/s/+2/+b/DgBmAKYAbQDO/3f/tP89AEwBOwFPABkAcACKABAA0P/I/5P/Nf/0/s7+5/4y/0j/4/6c/kH/qv9h/3T/+v9jABQAsP8AADUApwBWAQ0BmgD9ANAACwAEAD8A3v9r/2v/Pv/i/vD+YP+w/0IAYAD9/2IAVwAIABsAHABaAJ4AjAAcAO3/FwAKANj/BAD7/2r/Rf86/z3/tv/d/5n/uf9BAHgApAAIAaUAPgBaAEMAxf8i/0H/qf/M/xAAHAC8/5//p/+B/2v/zv9RAP//sv85ANwA8QBtACgALABAAL0A4ACwALQA/wCAAYkBXgFJAe4AvwDSAJgAPAAxAL7/y/54/vf+FABlALH/XP/D/4YAfgAmAGAArACPAIAASgDk/zgAcwDn/13/NP8b/9j+cP4P/gT+K/7c/WP9HP1i/QL+JP5f/mX+ff7D/uL+O//B/00A/P/0/9MAVQHsAWoCIQKSAfABGQOGA+cC0gHJATMDMARZA8QBTwHDAfQBbQFqAOP/GwArAMP/7P7r/sr/BgDo/2n/Tf///77/nv4y/u/+wP91/zD+gv2c/gMA6P+M/nL9RP2I/bT9C/1i/Hz8l/wj/K774/tJ/AT9zP1I/lj+l/7A/zYA6QDDAjgDCAMUBGMEhQQcBkcHPwdCBuQEswPcAr8CdwKaASgApv6n/YH9gP6x/lP+aP7Z/m3/2f+WAEEB1AH7AbUBwgE4ArYCtQJEArYBAAFAAIX/9P5D/qT9yv09/T/8cvyn/C78EPwc/Gj76Pok+yn7afsR/ID8S/1D/pj+8/4lAKgBlAKSA7IDKgNMAx0DWgNXBC0F1QQlA4EBOQENAWkAUQB+/5v+nv7U/fn8oP2Q/h7/i//Z/6oApwEmAo4ChAOzBI4FCAXEA2oD8gNGBC0DbAESAID/JP9D/pj9fP0Q/V/85fuj+8T7/Pu++6b6xPnz+TX6OvqF+qT7yfwC/c38Zv0BACECjQIfAwEEDgXJBQAFTATpBbwGeQUnBDQCwwABAWkAO/5W/ZH+mP4N/Qj8evxR/pv/Fv9N/nz/2QFsAyIDTQI2A7sELwXWBAIE0QNqBJYD3AHjALYAfwBQ/5f9wfx3/Qb+GP3R+3j7xPsP/NX7CfvK+gz7RvuT+zL8s/wP/Wb+vf8hAIAA9ACSAhsEGANkAzQFKwRzAngD1wSdBIsDmwFbAGYAMgCt/7z+1f2e/Uj9V/3w/UL+c/7V/hL/Cf+8/wsB2gEDAmcCfgPdA2MDZAPKAzAEIQRJA8YB6wDYAAsAPf+X/qH9S/1j/fD8DPx9+x773vo++0P7rPpz+mz6p/oU/PX9kf6A/vv+Lf+Q/5gBuwMYBPYDZgQ5BKcDiwRuBjsG5wTFBNUDHALpAZoBaACJ/y3+AP0g/T79CP08/c/8V/zd/L397/4+AEcBCgLHAogD+wM/BM4EqwXfBeMEjgOEAtoBXAGqAG7/3v3I/Eb8QfyE/DH8+fqT+QP5aPkI+ij6Evr7+S76z/q7+z/8uPzp/rcArACVAN0BywOZBfAGSwbjBR4HVgc8BvgFCQfYBukEqwIPAaQAeACx/5v9fPuu+tL6VPsR++D6qvu6/Ib9Ff52/9gBxgOABMMEVQUdBpEGRwa+BaAFJQXGAxwCGAFBABv/Qf7V/O76Gfpm+pL6vfnG+Jf4Cvk4+av4n/g0+Wr6RPtH+8z71Pwq/vD/jwGJAuwDhAXlBtEHcQdbB9YIXgk3CO8G7wXUBGIDwwHx/6H+pP0r/PL5s/ha+T/6wfoq+4z7FPx7/W7/7gAvAv4DiQW/BZgFWwbKB0kIkQd2BhsFEwTRAykDoQEZALn+7Pws+xv6kfnG+Xb6PPqs+JH36vcY+bj56viH+En5O/pm+xz8cfw3/Y3/6gEFA4EEWAaVB8kHwQdSCBUJ7wjRB3IGPwWUBMMDfgHs/sr9YP1I/HH6yPj4+If6Ofv9+vb61fum/Z3/VgD6AAoDvgT7BHoFuwb1B7kINwjaBhkGGQbjBY0EegJMAJH+e/1O/PT6vflO+WT5s/nf+XD5E/n++PT43Pi6+OX4r/nb+oz7wvuM/HX+aABTAmcE2AXOBpQHewhSCAsI0AhYCMkGoQWWBD0D3AFLAG7+4/yR+036eflD+ez5evpS+sD6vfvc/Df+oP+kAIEBpwLmA1kFbAbvBlQHowfPB6MH2QbqBTEFYASkAmgAxP7d/Ub9RPzl+uz5Afq++uj6SPpW+fH4rPiJ9472iPcM+iT7Dfu8+n/7Nf5jAW0E7gV4Bq4HZAhMCBwJlwprCt4IEAcsBRME8wJ2ARgA2P39+hD5jfhF+Bz4v/gC+fP4o/lC+1D9Bf9VADQBxQHaAo8EUQa0B0QI6AelB9sHbQf2BqoGXQWoA/0B+v+e/mj+vP3u+zL6SPmx+fT6JPuF+Qr4y/dQ94T2XvYZ+Pv5pfmu+Yv78v14AGgDqAWqBg4ImgmpCoQLEAwvDPwKjQhLBrkEygN6AiEAM/2d+lj5jPiD99X2C/f394v45Pgv+of8bP4l/wMA3wDDAYsDhAUJB14HXQfGB4MH8AbUBqIGsAV/BCUDiQFwAMX/q/7z/Fz7Wfqi+Vj5T/pN+1z6KPiE9qX2L/cx90/4XPr1+pv6MPyD/hwBzQSBB3sIIwnTCWcKbAv9C1ILiAn5BucEzQNdAkMAY/5k/Nr5JPjA99D3JvjI+Eb5Ovn1+cn7AP41AIEBBwIaApYCJAQpBrsH+wdHB5gGTwY9BuMF+wTgA6ICPAHR/6j+Iv7S/Rn9TfwH+0/5SfkJ+y/8Rfsi+Ub3V/bk9W72o/g3+5r7lvrj++D+pAFJBD8HqAmmCsoKrgouC/wL5AuhCuYHCwVZA6wBaP8q/b/7HPoP+OD2o/Y198X4l/rv+o/6yPtd/rIAvgEdAiADJAS0BFoFHAbRBjUHzAbKBdAE3wMBA2MChgFTAAv/zv1g/bz9tv3h/Cz8cPu6+if7HfzM+/D57Pc99iv1cPXQ9gX53vp++6L8C//kAVsEOwf2CW4LHQw3DE4MNgxkC+kJ0gdZBSMDGQHL/nL80PoZ+Tb3l/bG9pH3q/jW+Uj7pfzj/Sj/EAGxAnQD9gN8BGkFWQa2BokG/QVhBZ8EgQNvArEBTAGiAKf/wv5s/lT+Zv7M/o7+0v3q/PX7zftY/On7u/kw93/1LPTj86D1r/is+u76VPwn/9wBiwSYB3sKVQz4DJEMvAtEC+QK8gmtB40EDQLp/9D9FPx2+uz4SfdX9rn2Pvdg+K76gvxD/TX+QgCWAs4DGQSPBJ4FDAbIBcsFsgViBZgESgP7Af0AdwBYAP//OP9p/kj+o/7d/i7/bP8s/2T+RP1Q/Nj73/uW+z75+PV885PydPQY+MX6jPvM/A//2gDEA7oHAgs4DYwNuAz0C5IL6grkCewHmASAAfD+WPx7+lD5R/it9tf0sPQG9hb4l/oG/Vf+KP+oAOMCeAVxBmMGpgaQBmIGQgbFBesEGQQEAysBU//H/m//1P82/0T+3/1c/nD/mwBiAVcBYgAK/8D9tPyR/Gr86/rz92P0ovGI8fX0o/gb+oH7Ov65AHcCJgUfCe8MRw99D8oNaAsnCrMJWgh9BfABvf4P/JD5jPfT9pb24fVS9Z71/fau+QT9qf8eAVMC8ANmBXUGMAeiB6AH8Ab9BegEwQOzAlQB1f95/n79cP0m/mn+8f03/oz/AAH8AU8CcQKqAlkCJQFM/x39ePuW+mH54fY080Lw7e8n89j34PrE/BP/3AFcBHcHUwvnDkERLhEPD0sM/AklCCAGfwNAAAn98Pkg93f1C/V69Rz2MvYL9lL3Xfr3/T8BWQNfBD4F+AWQBtYHbAhbB/wFxQRBA+EB7QDF/6X+y/1M/SP9SP1q/l4AXQH1ADsBpwLNA4AEwgSxA0IBw/6E/Fn6nvie9yj20PIh74Ltqe+L9TH7W/7MAPUCHgVfCKMMehBZE+oTMRHuDIoIvAX/BKMDHQCp+4D3m/Rm88nz/fRT9v72G/cb+Hf6ZP79AhUGUAdSB3YGYQZZB2YIJwhNBssDSAGM/4H+fv6z/tL9w/xE/Lj8Xf4gAV8DewNEA3ADmAM6BMcENQSHAgkAiPzV+J72Efbc9ffzQvAj7Xvs+PAT+U3/VgItBBkGdQghDGIQ+BMIFmkUUQ8tCRoE1AFQAbj/0/tV9wT0GfL78cTzVPZ4+Dj5Yvma+oP9IAL1Bp8JGglVB38GlAbqBsUG+gVtBGcBCv5B/Jv8Iv5L/9D+Iv07/D/9KQDcAw8GvwVCBDADQQMoBAIFjgQuAl/+cvqd9ib0PfTy9Fnzne+j7HvsmfHe+a4AjgWlCLsJugqnDXoRhBTpFb8T/Q2kBpEA/v0e/ab7gPmN9nHzMPJg86T15ff9+Xf7afzM/YgARgSXB6UJzwkcCEYGKQW1BG8EjgPmAUL/s/yM+1X8M/5u/3H/xf7v/hEBxwNjBdUFyAVWBWwExAMOA0ACRwGd/9D8G/nX9avzcPLt8cvxsfCg7kvu1fIK+04CFweXCq0MwA2bDwUSyxPGE0QROAxRBXv+rvqb+YT4cvdB9q708fOo9H32FPmx+9j9Gv8WAL8BDQTRBt8IEwl2B5wFGQTbAsEBfACQ/5b+b/3M/Fb9zf6SAMsBugGXAaUCewSABqIHfgZqBPwCXAI1AskBPACV/eL6RfiY9czyJvGO8Y7y2vEm8OTvHvNH++0DEAk9DNIO3A9WEGoRCRKaEYMPxAo4BKb9S/jj9ar19/Xn9WP1SvUv9gT4ovpr/Zn/AQH7AU0DxwRDBmEHZwcVBvQDEAIBAUYAf//h/s3+1/6+/iz/aQBNAqIDEgQDBNkD9gN5BEgFHgXXA1UC+QAuAOX/M/9T/UL7YPn09t/z4/Hb8eLy5fOg883yHPRi+r8CFAmJDW0QWhF/EHMPwQ7JDSsM+giEA2n8J/Yt81TzGvX+9hP4qvhR+db66vzo/toAaQJGAxEDFAMABLAEuAQHBGsCWAAD/8X++v5U/6D/JgD2AHEBNgK9A8cE/wTMBAYENwP3AmIDpAM7A/gB6v+c/qD+9/5t/vv8L/v5+Kj2kPRH88rzbvU39vb1TvU59tX7UwQHCwcPExE1EPsN7Az7C7AKMwm5Ber/l/mg9LXy7vNd9rX44/pI/Ej9/v1l/rX/hQGrAtwCcQKvAYkBDAIIAooBBQHHACYBEgEPAJ7/XwC0AU8DuARhBV0FiAROA2QC9wEuAuUCTANjAnEA8f6Z/lv/HACT/9v9j/v4+IT28vRY9Av1TvbJ9nf20PXA9dP5XwKoCRkOvxCKEOYNgAseCh4JrgfNBLIAgvsn9krzqPPF9d34S/wc/wsBjQHdAHIAHABrAEsBDQG3/xv/1P5F/tD+5//iABQChQIuAvsBnwFlAswEtgb4Bg4GNAQ2Al4BBAGLAJoA6gAeAccAFf+Q/Sv+dgAKAiEBWf6m+735/veA9tH1kPaS98b3/fbJ9Sj2A/svAwoK3w1KD7sO3gtHCfEHrgbcBF4CHv+6+hX3hPVp9hv5jvz5/2IC+ALzAXkAU//h/rH+Dv4e/X78sPzN/S3/egDLAXED0gT+BBsE2QLYAkEEeAXWBTkFtQOxASsAPP97/lL+Nf85AeQBPQD8/uT/5wFNAxsD2ADS/YH7mflt9/z0CPSC9QL3gvZS9fH0gvfb/jMHmAx/DxoQlA17Cm8I/AUmBDsDeAFV/mf6EvcL9vj3fPtu/68CPwRNBJYCqf8X/fL7E/we/KT7tfu3/Nn93/6SAKQCgwTIBe0F/gQDBC8EEAWVBUgFWwS8Am4Aa/5l/WH9eP6OAA0ClwFeAHIAsAH7AnQDkgJCAGn9uPru94P1GvTi9JT2CPel9n/2tfZC+R0APwe1CxIP8Q+EDXYKhgeZBHkCggFJAK7+JPx3+dX42vko/JL/4QKcBF4EtgHm/XT6VfjM99T41Pov/Y3/pgBMAQIDmQXtB8wIvgerBQUELwPKAosCyAEAAfH/Kf6r/CX83/wD/0YCJgSPA4gCygIbBKgEogNgAU/+PftM+Mb1gvOn8vLzefUS9u71tPU692b9fwV9C4sPwhGNEHcN+Am9BX8CqwDy/3D/5v0O+9v4Wvhx+W78rABoBGIGhQXkAQX9B/gr9Z31XPg9/C8AnAIzAzMD2AMMBVwGbgffBy0HHAWXAoIAhP9//2v/6/5o/rD9qf0p/zkB1wKnAw4EqQRDBfcEfQMdAeT91vpQ+Mf1SvO28cHxv/K/82D04PRe92T+LAfiDZgR6hJzEXMO2QpABloCgf9i/q/95ftl+d33GvjW+YT93QGRBeQHawfCAxv+V/jq9Hz0S/YB+pT+BAKMA8YDvAPCBHUGtAdCCHgHkAXMA9YBz/+7/lX+I/5l/mT+4f0h/xUB4wJwBI4ERwQKBBQESwPDART/Nfzp+fX2H/Tt8e7wafGk8q7z1PT89pD8bARcC44QAxRIE1UP8AvuBxcEIwHv/lz9m/t1+QP4v/dO+If7kQCaBCkHgQfuBE8A7fo29kH0U/Wq+H/9zgFZBC8F5wS2BFgFMQY3B4IHCgaeA30BcP/C/VH9a/2v/SH+yv4AADoCGgSUBGYEVwQuBAQEzwOWAjQAQ/2e+Y31HPLu79zvIfGY8gf0PPXi9qn8cgWDDJ4RHBW2FK4QFwwJB8gCxP9O/dT7R/oV+EL3KfjF+ej8fQGSBSQIlwhIBu8BFPxe9gD01fRn97f7kQDxAzQFWwXMBFwE5QQ6BocHEQfsBJgCDgB4/XH87fw3/dn9Wv+1AEEC5wNwBGgExwTNBKMELgTlAq8AvP3b+XD1n/EG7+ruc/Ax8sXzFvUu9/38QAUmDGoRnxROFIQQVgvvBcMBKv8a/Zz7OvrP+H/4Wvk++4n+fwIjBuMIRgmVBvcB+vtN9k3zFPNk9fn5Of9dA+0FmAbFBe0E+AQBBssGggbdBLACcgAj/pX88fsg/PL8pP4GARUDugSSBWUFCwXMBEQEmgMTA5EBcP5X+rX1MvFG7jjt5u2c733xvfPt+HIBhAnjD5AUAxa/E1IPfQlnBPwAi/7X/Mj6Afhx9vf2hfjt++sApwXgCBAKxwiLBW4Ahfoq9j70t/Sz9wD8k/8yAtADSQTaA2gD9gOiBR0HXAe5Bc8CaAD2/gT+Z/1e/bP9XP+PAX4C6QKYA3sE8ASyBNID3QLOARgAkv23+cv0xPCs7oztJ+0x7jbwj/T5/GwGww1jE+UW5RWVEb0LlAUzAe/+Jf4X/YT6iveC9lP3c/m1/SgDDQiXCpIKZwjlAwX+pfhD9Tb01vVR+RX9LQArAigDigN0A3IDjwQMBtgGAwetBQsDzAD7/nz9lPxz/Cj9C/+mAY4DZgSSBJsEVwS4AxIDfAKrAfz/+PyT+Dfz5e6+7MjrK+zA7cnxfvkAA5ALTRLJFt4WUxP9DR8IVgNPAK/+s/2T+yb4IPYR9oH3TvtOAVEH5Qo3C/UICgUCANT6XffP9Xz2ePmz/LH+wP9PANgAyQFZAkUDZQV0ByoIRAe8BN0Byf9j/oL9X/3a/QP/VQHJAgYDZAO8A+4D/gPQAyUDVQKlANr9Ivpg9TPxZO4a7JfqUOtd7ir1//4lCCUQhRa/GJUWhBGFCq0EeAEZ/yP91vsZ+gL4dPbH9ej3gf1fBAwKdwwlC34HlQIa/SL4QvVR9UT4Efx8/sT/KgANAEgAIwFHAg0EYwY+CKMI7Qb9AywByP4I/ZH8Cf0o/gMAmgFwAkgD8gPZA6kDqwN7A68CxwBI/ij7Gfdq8zvwwewL6mDpn+yo9E/+aQdPEA4X9Bi9FmcR2AroBYICBgA//uP7jPkl+FT2UvWR95T8BAMDCdQLogv8CIYDhP3I+ML1uPWB+Mr7sf6xAB4BTQD5/pf+VwBMA+kFFQgzCZYIMQaAApD++/uK+338E/6CAHoC+QLpAsICUgL4AaMCNgMPA9UBkv8Q/Hj3P/O372zs4ukh6WnsvfQx/h4HPRCgFkYYHBb7EHoLMQfAAzkBCv/f+7v4jPam9Lb0sffE/MsCPwh5C/ULQQnmA3T+H/qY92r3BPkt+3H9lP+MADQARf/n/icAoAJXBYEHtAisCPcGjwND/+L76vow/Fb+bwBHAncDZwNRAtwAKAAwAcgCTgNZAjgAVPye9/Xymu7A6+Pp6epl8L73E/+sB3AQUxUXFl4UDBHuDNIILwUGAtH+aPtY+EH1NfNa9FT4cP1tArgGhQk+CjYIJgTg/5b89/qv+vf6X/tY/KH9I/7W/S3+T/+ZAHoCBgVQB5sIewjQBu0DgACB/en76fsm/Yn/2wElA1oDewLFANf/LQCjAC4BSAHy/8b8XPgF83zuSevt6SbtivNM+o0B1gnuD8MSfBMLEpgPtQyHCZIGrAPa/xr8l/iX9G/ytvP49n377wDiBe4IcQnUB+0EiAG4/nb9nPzv+5f8jP1i/Xf87/u2/KX+VQBuApQFJAh2CUIJyQYzAx0Arf1T/Nr7YPyd/jcBmAL5Am4CDAEuAIf/+f5Y/0v/9/3i+8n3iPLo7ajq2esT8TX3Sv7YBq4NsxABEVMP8g3/DEQLnwkGCG8F/gF+/Z/3M/MV8n7zsfYS+xcA9wTXB00IEwfzBCkDEALTAEX/f/5f/s390fua+XX5BfsC/ZH/9QKNBkAJ7Ak/CCwFGgL5/7X+uv1X/Vf+CgAdAUEBFAH5AHUAoP/K/j7+yv0S/f37hvnZ9eHxnu5+7iXx8fTW+tUCcgm1DLEN+AwzDEQL7AmPCZwJzwjnBkQDLf2b94H0C/M586D1WfrG/3cDBgXQBbQFDgXOBAsE4QJpAu4BLAAR/SL6K/mp+Vz6HPyg/7ID7QaTCHII6wbOBNIC1gAb/57+hv+iAM8AcgBpALMAPQDY/tL9dv0N/T78Y/vG+WX3Y/Qq8ZTwI/Kn9CL54v8HBssJyQvWC1MLfQp1CSEJMQkiCd0I5AaWAuv91vlc9ujzTvN99af5hP3mAN4DYwXgBd4FRAWPBEcELwReA/AAq/01+5L5jfhR+fv7ff/WAmgF8wY8B7kGhwX8AyoC/wDgAMUAqACEALMAlQCE//H9v/yb+wP6Svkc+aP4jPdd9WT02PV59/f4QPzQALgEAgeTBwoIaAiuB0MHpwe7BzgInwg6B8EEwQFV/vD6GPdA9HL07/Wc97L6ff7LAZAE4QU+BlEGvwVXBcQE4gKfAAH/+/yv+oH5TfrF/Fv/UgErA3oEvQSBBFgEaARbBA4E1QNyAysCuABz/+z9mvy/+7X6HPkJ+Jz3YPdm9lH1lvYB+e76Qf21AM0DhwUYBtkF5wXaBWwF1QWxBjAHIAjZCPUH8gWoAxMBwf0A+nL3r/aB9uX2Bvlc/Cv/bgF2AwMFvAWIBQoFegTjA/YCqwEwAEr+gvyK+y37Mvts/JH+IwDXAJIBZwMpBXYFbQUdBk0G9wTsApsAAv7i+0j6HvgK9vb0A/X29Fb0jPWw+Kv7Av4IAR8EwgUfBnsF2QQfBD4DUwNEBGwFogbdBycIqAerBi8FwQId/xv8Dvoo+K723Pay+Cn7Yf0Y/zkBMAM8BI0EuAQcBUQFzwTNA64CUQHJ/zT+jPxK++v6Oful+5z87/4+ApEErQXrBhMI2gdYBoEEeAL+/yL9MPoU92v03vI+8rLxa/JL9Z/43vuS/30DKwY2ByoH3gbsBeEDgQIeAngCqwNIBZQGeAcGCLgHTwY7A/7/5P2h+yv59vea+CP6g/uS/FP+kACbAQkCqgJRA/EDigQOBUAFEAUdBOUCKwHE/gP9Gvz1+q75qfl7+z/+8AC2A7EGjgi4CAsIhQYYBDIBMP79+lv3pvMB8UDvvu7u8Mn0wvg//SsCyAWJB6AHbAebB1cG6wONAiUCywErAkUDYwSQBX4GtQaUBf4CpwDG/jv85Pl8+V36KfvT+8P8Y/6J/5//DABNATsCpwK4A+AEdwWzBYgF4QSDA4YBZf+A/WH7n/lY+df5E/u5/VgBsQRPBw4Jlwm9CBsGxAJi/4r7cPfl8/7wAu8O77nwufMg+FX9jgJiBgYIlwi0CH0HTwWTA8AChAJkArcC2gOwBKoE3QTSBH8D6gGUAAL/JP3Z+9f7V/xQ/KL8+f3D/pb+pP5o/00AygCDAUQD3QRZBcsFRwa6BUUEgwKsAGr+u/vo+WP5dPlD+oX8r//YArgF4gf7CKAIsQb+A3EANvwf+Gj0B/Hp7ujuX/Br88j3ufyxAYwF0gfxCNoIagerBWYEdAPuAvgCnwN2BJoEUQQhBEgDrAFkAIH/bv5U/cH8t/yt/JX8R/2r/nn/sv8bAI8ARQCT//z/egHdAvMDTAVMBg4G1gQ0A1oBBv/D/IP71vpo+uT6fvyT/u0AagONBewGEgc/BnsEUgF8/cH5z/VB8kHw2+8I8c/zqvcd/H4ACATFBmMIfwjPB8kGTwXwA1gDcQPnAzoEYgSJBBQErgIGAdT/Bv9Q/qz9VP0O/ab8hvz8/Nr96f43AH4B+wGIASQBTAF4AbABbwLAA8sE2wQoBDsDqAGA//z9Qv3J/K/8Lf3+/fL+CwCEAR4DKgS4BMcEkQMjAUD+9/pn9270nvIJ8q7yWfTy9k369P1yAW0EbgZyB9QHigeVBnsFwwSGBGYEKATqA4EDngJqAUQAPP+P/lv+dv6M/kL+o/0X/c386PyQ/dv+YgCKAWICEQMgA7oCnQLcAigDKQPZAoMC5gFoAMj+9/2m/Zr9Ef4I/y4AEgGRAfQBFQLmAdABfwFxABL/jP1o+xH5HPe89VD1t/Wr9mv4fvqF/Nn+SgExA5wEywV5BtsG/QbLBo4GMwaYBeAEmAPZAV0AE/8W/rD9rv0a/p/+sP6u/p3+Fv7Y/Vz+JP8OAB4BRQJVA68DagNiA3sDOgO5Aj0CuwHfAJ7/mv4x/iD+aP4r/0wAbAE/AtcCGwOoAr4B1QC1/23+Uf1C/Bz72Plv+FD36Pb79nz3jvgP+rn7gf04/7EAGwKRA+EE+gXKBlEHkwdLB4UGcQXxAx0CZQAJ/x/+pP2V/cz9Cv5N/ov+if5W/mv+zf46/7D/cgC5AeICcAPfA1oEWwTIA+oC/AH/ANL/r/4F/t79Cv6u/tT/LwFwAm0D/APvA0ADEAKKAOj+Q/3H+7P6B/qH+f/4nfi0+PH4Ifmr+Y76i/u1/P/9K/9oAMwBUgPyBFAGXQcyCGYIzgexBhAF/gLjACH/9/1A/db89PyE/RL+f/7M/h7/of/y/wMAQgC8AF4BCQJ8AvkCnQPRA3cD2QIAAhIBGAAk/5T+cf6T/if/NgB9AdEC9AOaBJ4E6AOKAtwADf8r/W77HPo9+Zf4GfgD+ID4O/nK+Uj6Afv3+/f8wP1u/nn/8gB5AsMD7wRLBn0H5we1BzYHJwZrBFoCdAAC/9P92vx1/KH8G/20/TX+rf5d/wYATwBqANAAlQE7AnkCqQL7AiYD4gJGApAB2wAkAHv/Bf/Q/gD/j/9FABMBIAJMAyMEWwTlA+QCcwGA/1T9jftT+nz57viY+Ij4xfgl+Zj5OfoD++H7vPyI/Wv+c/+fAO0BOQOGBMQFpQYbB1oHQweZBlYFwgNJAtEAMP/B/ev8t/zi/DL9rv1t/jr/0v85AKQASwECAmwCkgKyArkCgwIIAm4B7gB8ABMAxv+Z/6f/8f9LALQATgEXAvACggOlA3kD5AK6ARcAN/6A/Cz7JPqA+T35Cvn1+CH5Yfms+Rr6xfq/+8n8wf3k/iEARgF4Ap4DhQRGBd4FNQZCBtwFIQVcBF0DBAKZAFD/Wv7D/Wv9Vf2b/Rr+pv4e/4v/JgABAckBVQK6AvQC5gJ5AsUBBwFhANj/gf9o/4n/4f9cANgAVgHkAX4CGgN/A4IDMwOQAo0BRADM/mD9OvxE+4H6Ffrd+a/5oPmn+cr5Gvp9+h/7FfwU/R3+V/+bAMsByQKEAzwE4AQpBS8FEgXMBGcEtgO5As8B9QAUAE7/qf5P/lT+Y/5n/p7+Bf+V/0AA4gCNATYCjQKPAkwCyQE9AbAAFACk/3r/h//J/zEAyACSAUACvwJEA8ED8QOuA/4CEwL2AJX/IP7s/BX8mfte+zX7Dfvs+tL6wfq7+sn6BfuI+zj8BP36/fn+2P+gAGEBEgKmAgoDVgOnA9ADsgNqA/4CagKpAcgACQCC/w7/sP52/m/+rv4Z/47/IQDSAHcB3wH1AdsBrAE+AY0A5f91/0T/R/90/+P/lABXAQ0CuAJSA9cDNwRIBPcDVwOPAqkBngB4/3z+zP1S/ff8uvyU/HT8QfwB/Nv72/sQ/Hz8+Px4/Qz+mf4M/3H/2v9RAL4ABAFAAYcBrAGqAaYBlwFpAQoBhwAaAMH/Wf/9/s7+yv70/kP/sv9AANYAWwGzAb0BlQFVAeMANwB3/9P+c/5M/lL+q/5e/0EALgEHAssCfAP8AzEEGQS9AzEDhAK4AesARQDT/33/Jv/g/rT+hv40/sf9e/10/aT95/1A/sb+X//M//X///8QACIAIgAcADcAYQBzAHMAbQBeAD8A+v+U/y//xP5K/tT9ef1Z/YP93P1Q/uL+if8rAJkAugCmAHgAKACn/wz/if45/hD+Df5N/un+xP+wAJEBZAIhA64D+QMFBN0DigMHA14CtQEqAcYAgABLACcADgDk/5v/R/8F/+T+4v70/iz/k/8KAHEAwAABATEBPAEjARIBGgEaAfQAvgCSAG0AKwDL/27/Iv/V/m7+8f2H/Vf9Yv2b/QD+jv4u/7v/EQA3ADgABQCW//r+Vf7H/Vn9Dv31/B79hf0n/vD+yv+kAHIBHgKPArYCogJxAjIC6AGbAVsBLQEMAe8A0QC0AJEAaABCABsA/P/2/w0APgB/AMUACAFIAX0BpQG7AcMByQHGAa0BgQFfAUMBEQHIAHUAHwDC/1D/1v5o/hX+6f3q/Qz+Uf6x/iD/ff+w/8P/w/+k/1P/2P5K/r79OP3G/Ib8hfzE/Dn92f2V/ln/EgCsABwBXAFvAVcBHQHhALgAngCOAIEAegB2AGQARQArACMAJgAoACcANgBeAIsAuQD5AEUBjwHEAeMB+AEHAv4B4gHKAbUBngF9AUgBDgHUAIMAJADO/5D/eP99/5n/1v8lAGkAjwCYAIkAagA5AP3/vv94/yP/uP5J/tv9fv1D/Tb9W/2r/R3+o/4l/5X/7P8xAFsAZQBPADUAHwADAN//u/+d/3r/Tf8e/wH/B/8m/1f/l//b/xgAQwBiAIMAqQDZAAgBNAFcAXkBiQGOAY8BiQF6AVwBNAEIAdMAjQA+APP/uf+d/6f/5/9UANEAOgF4AYQBXwEVAbwAbQAuAPj/v/9+/zT/4/6S/kv+Hv4X/jP+bf6+/hj/Z/+g/8H/1f/i/+v/+v8NABkAEADo/6P/S//t/pb+W/5G/lv+kP7Y/ir/f//L/wcAMABMAGMAegCTAKwAxgDgAO4A7wDnANsAzgC7AJ4AegBQABgA2f+n/5X/s/8GAIMAGwGuAR4CUQI/AvQBiwEcAbgAbgBDACYABwDj/7n/j/9o/03/Tf9j/4P/nv+u/7r/vP+q/5n/nf/A//r/MgBRAFkAQADl/1j/w/5R/hv+E/4o/mb+xv4V/0b/av+K/7f/4v/z//3/HQA2AEAARAA8ADQALAAUAPT/3v/I/63/if9d/zf/Lf89/23/yv9PAOMAYQGwAcgBsAFuAQ0BqQBjAEIANwAuACgAJwAfABQACQARAC0AVgB2AIIAgABwAFoAPAAhACQASQCAAKkAvgC5AIUAGQCJ//v+lv5Z/kT+ZP6w/v/+Ov9n/5b/w//g//H/BwAdACAADQD5/+T/zP+m/3T/TP81/yj/GP8J/wv/F/8k/zL/Vf+e//7/XgCzAPcAHwEhAfAAogBWABoA7f/U/9b/8/8aADMAPgBKAFwAeACbAMUA8AALAQwB8ADJAKcAlwCeAL0A6gAQARoB/AC9AGEA9f+Q/0H/GP8U/zD/Xv+Q/73/3f/s//P//v8SACkAOAA7AC8AEADf/6L/aP83/wz/6/7S/sf+xf7N/tv+8v4R/zv/bv+r/+r/IABCAEgAMgACAMX/iv9g/03/Tv9k/4f/sv/c/wEAIwBNAHYAmwC8ANEA2wDYAMYAtACrALMAzgDzABYBLQEpAQIBwgBzACoA8v/V/9D/4P/4/w8AHAAjACsAOABNAG0AlwDEAOUA7wDbAKsAYwAKAKz/V/8V/+v+2P7b/vH+EP8z/1n/fP+g/8T/5v/+/wcA/v/h/7L/ev9G/yD/CP///gT/FP8p/0H/W/98/6P/y//z/xgAOQBUAGsAfwCOAJgAowCtALgAxADQANQAyQCrAHwARgAXAPn/7//8/xAAJgAzADYANQA2AEMAYQCOAMUA9wAZASIBCgHQAHwAHgDB/3b/RP8t/y3/P/9e/4P/r//d/w4AOABaAGsAYwBEAAwAx/9+/z//Ef/2/vD+9v4J/yH/PP9X/3L/iv+b/6j/s/+6/8T/0f/k//n/DwAlADUARABQAFgAVwBRAEIALQAZAAcAAAAEABIAJAAzADoAOAAwACsAMABFAHEAqwDpAB4BQAFFASoB7ACbAEYA+v/C/6D/mf+o/8H/4P/9/yAARgBvAJQArgC3AKoAgAA8AO7/of9g/zL/GP8V/yX/Pv9d/3r/k/+m/7D/r/+p/6L/nv+f/6b/tP/F/9j/4//r/+//7P/l/9z/z//B/7H/p/+d/5z/pv+3/8P/0P/h/+j/5v/u//3/EgA1AGMAlQDBAOMA7wDfALMAegA8AP//0//D/8n/3f/9/yQARQBnAJEAvADhAP8ADwEBAdQAjQA6AOj/nf9o/1f/XP9v/5H/vf/b/+//AQAHAPf/8f/3/+r/0v/V/+H/zP+y/7j/vf+v/6r/rv+z/6b/ov+U/4T/b/9r/5L/t/+1/ykA6QCMAFUAAAHHAML/6/8uAGj/Kf9x/zX/yv70/vj+//5N/5n/8P+IAOYAFgGMAdQB0AHgASAC+QGvAZsBZQGbAP7/qv/7/lP+Ov5Y/lj+qv41/7//EgBpAL0A9ADpAOoAEgHuAK8ApgB+APP/wv+o/y//+/47/yn/Af9a/5j/hP+e/+f/7//s/wUADAD2/+r/yP+c/4P/U/8w/0z/Wv9E/3X/tv+o/6D/zf/b/8v/2P8HADQAQQBfAJsAqgCPAJUAuQCsAIUAmAC1AHEALAArAP3/sP+i/7r/u//m/x8APABkAJQAlwCCAIUAeABkAEwASwBBACYAHAAWAAEA7//r//z/EQAGAHYAQwEqAdEANQEaASUA4f/3/1X/3f7Z/ob+F/4S/g/+LP6X/t/+Ov/d/zYANwCKAMEAjgB6ALgAqABkAKMA1QBkAAoAJwD0/3T/VP+H/3z/aP+i/9H/1v/Z/87/x//g/8n/1P8rAE4APwB8AKAAiACbAIgASwBnAHwANQBGAG0APAAyAGQAQAA1AIYAfQA7AFQAZQAEANT/3/+q/3//vf/d/83/AQArAAoAEQAqAO//0v/u/9f/qP+i/4D/av9q/0L/O/+E/57/mv/z/zcALwBFAIUAegBNAEsAUAAbANX/vf+W/0v/KP8t/xj/K/9e/4T/yv8TAC4AZgCmAKUAsQDMALEAkACaAGMAEgAOAPT/o/+V/5z/hf+3/+D/5/82AHkAcwCpAM4ApQDKAPEAqACJAKgAWwABAO3/tP9y/3v/cv9k/43/rP+p/77/0v/V/+n/CgAnAEEAhwCwAJIAkwC1AF8A9v/t/8X/Zv9J/0v/J/8y/0D/Jv9G/3n/av+D/9n/3//s/z0AWgBTAHYAlACQAKUArAB9AFcAUwAmAL//i/+G/33/cf+Q/77/7/8uADsAMwBTAFsAEAAIACwA6P/B//f/7//H/+z/BgAWAEQATgBXAJIAlwBcAFkAUAABAA0AfABSAEIA8gAUAZ0AfgBsAL3/L//3/qL+V/5J/o7+sv7J/vX+ff/6/wcAPwCPALcAkQCIAIwAdwBwAGgAdwBzAEUAJgBHABcArf+m/5f/K//v/jH/Tf8t/4L/6v/o//n/QwBkAFgAeABzADoARgBhAEYAcwC7ADgAzv8xAC4Arf8DAI8APgAkAD8AJgBKAGUAWQB+AKgAYwBnAKUAbwBTAGcAWABHAAIAiP90/1P/Cv///v7+Lv9w/5r/CQCZAGUAWQACAbEA9v8wACkAgv+c/5H/EP+b/wcAlf/k/6kANgCh/9P/n/+k/xsA4P+m//D/1v/Z/zUAMABfAC0AgP+Z/6f/Gv/f/n//nv9s/ywAMgD4/4EAkQACACwAhgBRABwAJADJAJ0AmgDMAGcASwA/AB8Ak/+g//H/3//S/ykAWgA7AEIABgD6/wQADwByADcAlP/7/zQAgv/A//T/TP/z/7kAOQAFAEoAxP96/8j/H//U/i7/m//L/1IAHgG5AFYAYwDX/z7/MgB7AOP/JQA2AEEAUgCs/w7/WP8Y/7v+wP9/ALP/7/8wAXUA+/8UAZ4AFADjANEAPgCFAAQAzv5v/0oAEQDG/yoAFgCW/3r/fP9JACUAOAAMATsBfAHeALH/ov8TADf/Wf8xAMD/wP8YAM3/GP+P/6z/MP8NAI0AAAAjAL4AAgCv/3v/TP9nAGIARACfAPwAtABL/+f+Vv9//y//mP9TACYAEQAoAD0AR/8F/7v/hf9KAM4A3QADAdoA3P/k/jr/HAAbAKz+IwA7Af7/fgAzAJX+Rv96ALr+D/8kAbAACQGvAdkAegAmACX/Vf/G/zz/VACfAMv/7//9/5gA+wAPAK7+ef9OAHT/Rv/3/3oAqQDaANT/sP8GAb8A8f9mAGQAzv8XALz/Bv8V/7b+ev+rACcA+f8JALv/tv+N/3H/IwDZANAAlQBnAEwAxf/N/z8AKABmAP3/bf+s/0T/rf/tAEQAXf/JAH0A6f7Z//j/1v8yAAsAAABsAMAAEAC7//P/FQBf///+3P8dAAMAlgDTAMb/vP/8/4z/LQD+//j/fQCBAAwAHABRAIz/oQCxAE7/m/9KADYAyf8fAGz/8/5T/3P+6f6CAIQAGwAjAXIBXQCn/9n/bgCf/5L/3wCwAPL/gQA+ABb/9v8SACb/YgBLAVQAs/9CALv/8P5Q/7P/5/++/yYAeACAAF4AIQDX/zP/3P6u/vr+Gf+D/1sAuQC4AGAAmwDvAAAA9P/VAJUAAgCQAC0ASP8/ACQAxf/H/9P/8f8wAO//yf6m/xUACADTAOMAPgGVAbIAYP+G//T/g//b/44AhADT/ygAXAAcABUA8P4R/xH/YP/B/yn/LQDYAPcArACxABUA9f6V/3f/hf8RAC4AiAB/ADMAUf9W/53/Tv9j/7H/QgCy/2P/5P9q/wz/nQB9AWkARQHiAYwATQAdAM3+xf5TAJP/sv6L/8b/oP8Q/5j/rv83//H/QwBVAAYBnAFgATsCcAK6AIsAyQARAK//rv94AIEAhv/d/zQAYf+6/50Amv8RABYBo/+g/2AAMv+D/rP/jv8k/wEAxf86AHMAQf9r//P/6/6H/in/WP6z/iIAOf92/4oA0P8R/zz/0P9S/1f/kP8n/2D/hf92/+/+L/9XAEUAAQB/AIYBSAGFAIwBEwGRAPEBIAIjAfIBIQPKAXABqQGOADYAmAB7ADoARAB6ANoAnAAKAEQA4v+r/4gADgCf/1MAigAiABYAkP8m/37/k/+k/1j/Kv97/77+f/0E/mf+gv0x/kf/v/4M/yD/7P24/cr94Py1/H39Af4G/0f/AP8lAEYAsf+CAIcASAD1AcwCwwFHAhYDwwLlAtoCpgLdAuECWgJAAiICFwGFABUASv8Z/77+T/5v/+f/6f5G/9v/0P8XAEsABwBzAK0BCgLUAeIBsgEpAS8BegHnAIsAsQDiALEAvf9e/7z+Pv7O/SL9Q/1R/Wb9IP2v/BX8wvul+0/7sPvf+7P8F/6W/hf/M/8D/7j/jAClAGoBgAK0AowD4QMoA4EDCARIA8YCWwMYA14C0gETATIAkv8Z/2z+ef6g/kr+L/7W/g//df7U/qb/4/9OAD8BXwIYA6kDRQRZBAEEvwOKAykDfAMBA/IBSwK8AUMAhf8T/zH+4fyF/L38wPwg/GD7tvu7+zH7mfph+mD6c/rP+hT7CvzI/K39/v6c/3kABgEvAdEBRAJiAt8CmgO6AxYE1gSDBDMEsgRWBKICWQGsAND/KP/4/X791v1F/Q79C/0H/ZX90P3b/bX+5v/p//EA4QLLAgYDWQTGBOQEjAWiBXkFfQWzBDwErgNjAn0B/QBmAJr/yv5//cf8PPwU+x37GftT+ln67vpP+kD5r/lK+jr6MfvI/Ab9u/2f/zkAfwB8AQUCMAKUAhkDWAOtAzME2wT0BFcEQwSwA3sCrQGdAGX/1v4h/pn8ifwb/U/8TfwY/ez8pvxu/e/9M/5I/4EAnQGnAr4DtAQrBXoFrAWaBZMFxQXwBWkF0wSOBBwENgMAAkYBHwDK/iH+Nf3c+/X6IPvE+jH6hfpp+l36c/oq+gj6Gvp/+l77Lvyk/Pv9sP+hAIsBFgIAAqUCPgMSA1EDBASIBCcFnwUABVcEvAN/AkAB3P9v/mj9mvwQ/Lb7RfsJ+4D7rvtp+zj8mf1a/ub+5f/cAFwBRQKsA40EcAWmBj0HOAeSB3MHQAYCBvIFdQQBA6QCFgLjAAIA6f6H/ZX85/u9+rb5D/qI+l36qvrq+nP6W/pS+rj5D/p0+338Xf0N/9QAfwHMAbECJwPTAi0D6APzAyQExgTQBIAECwQKA+MBBgHW/4/+y/3t/Cv8ivv5+tv66/oS+6T7ZPwF/b/9q/61/+4AyQGVAikEZgUdBvEGoQf3B+gHZQeFBrQF/QQ9BGkDYQKTAegAzv/W/s/9iPya+876AfqP+aL5BfrE+h/75foc+0j79Prg+o37g/yu/Tn/xACgAfMBnwLKAh8CTwIfAzADnAOxBL0ETwRZBI0D3QHlACEAOf+Z/t796fwE/JD7Hfus+qr6T/sa/Kr8rP2b/gP/FABmAfwBxQJZBLwFlQaZBwcI4geaBwgHCAbIBCUEoAPwAl0CsQHSAJv/iv5M/cH7rfol+hf6Gvp6+ub67voT+/j6pvpI+jj6C/uQ/On9pv6k/5gA1gD7ACUBLwGwAf4C4QMuBKMEygSKBAIEYwOwAvwBbQHOAAgAOP8o/qH8yPtt+5j6Rvrp+un75/ze/WL+vv5r/1AAKwG/AeAC1QSFBlsHmAdxBzgH8wYFBtcEXgQtBAYEkANtAkwBUAAK/6f9Yfw2+8f69Pq6+mb6jvrJ+hT7Efuw+pP6zfps+0b8B/2w/aH+xP9nAFsAJgCOAH8BQALcAocDMwRtBHoERQR0A/EC2gKNAscBuQDB/wf/B/7A/O37jfuS+wT8ZfzS/Hv98/0g/m/+Ef/0/w0BPQLBA/EEjwU7Bk8G1AWpBZUFIwXaBK4EHgSvAxID5AGcAHL/rP4X/k/9pvxb/Pn7jfta+xP7Ivtg+zT7RPue+5z7u/tr/Pr8Z/0s/tj+Qv+1/w0AYADvALMBYQLeAlkDsQPEA9QDpQMqA9UCZAJ5AYQArf/t/kz+iP3q/NH82/zh/A79Pf2n/TT+jP7r/pT/hwCWAa0CrgN6BFYF5AXGBWwFJAXzBMcEbQTaA3UDAgNTApkBtwDK///+aP7X/RT9efw2/BP86Pu9+7H73/sb/CD8F/zU+7z7VvzF/Ab9kf0E/oH+Gv8+/w7/Q/8SABYBzAFIAtUCPgNDA+UCXwIQAr4BUwEGAZ8AIACw/x3/ev4Y/tX91f0Y/k/+l/4M/4j/1f8mALsAfAEvAvUC5AOOBOUEAwXsBKoEMwTcA6YDbgMyA8cCWgL1AU0BWwC0/0T/jv7l/bv9mv0m/eX84Pyj/Hf8gfxn/Cn86/u8+777B/xC/Fv8sfw4/YP9e/2P/R7+8f7e/8oAcAHwAWkCfgJFAhsCzwGzAcIBjQEkAbYAYQDv/zT/of5//mL+gv71/hX/Jf/C/yMAFgB6ADQBBwLvAqsDMASkBMAEnwRGBLcDkAOqA6IDmwNoA8QCNAK6AboAwP9D/+n+qf5p/hX+2f3T/b/9av04/Tz9Tf1D/fz8pfxI/AL8/fve+6z79vuJ/NP8EP1m/d79o/5p/xcArQBIAfsBSwIfAhMCMwIbAvwB1gFtAQkBxgBKAKD/J/8V/x//Cv/+/h3/Yv+9//T/CwCAAFkBEgKoAlIDvgPyAzsEPgTaA6EDqwO8A7oDbAPOAk8C+QFWAWwAxf+e/4X/MP+//mv+Sv4b/r39d/1d/Z799f28/S79kPzO+zn76/rJ+hP7rvtC/L78/fwp/Zn9J/62/lv/FQDpAJ4B3QHDAawBwQG0AXkBbwF3AU8BLwHoAEEApP9M/x3/DP8m/1L/nP8pAKcAugDoAJYBRgK3AjUDrgMGBEYERQQUBO8D3QO8A3wDJQPUAlkCugFKAeEATAD+//X/kf8T/+T+tf53/k7+Ef4C/jn+KP6x/RP9aPzQ+1D7HvtL+337zftS/ID8aPyL/NP8LP3N/ab+fP8tALUAAAEMASsBYgFqAX8B1AH1AbwBYwHrAFwA3f9u/x3/D/85/3L/r//s/xIAQQC1ADsBpgFIAgkDlgP+A0YEQAQABPMD+wOnA2YDdAMqA4UCIALbAWMB9QCdACwAxP97/0v/CP/F/sX+vv6P/nH+Lv63/Tz9y/x8/Dr85/uq+8z79/vW+6/7uvvS+/D7Ofy4/G39MP75/pf/+/9+AAUBIAFBAbAB5QHeAeMBwgFlAfgAcwDy/6H/f/+G/6v/6/8qAF8AoQDhAAoBTQHeAYAC+AJfA7wDAAQ3BCsExgOVA5gDXwMVA/ICrQIwAsoBgwEUAX0AMgAVAN//pv+A/2//X/8u/wj/6P6G/i/+Gv7d/V794/xg/Ob7iftB+//61Pr7+kn7b/ul+wz8hvwc/b39cf4d/7L/LgCJANMAFQE9AWcBmAGkAZkBhQE8AdcAkwBgAEsAXQBvAI4AuwDlABUBRQF5Ad8BZALMAicDhgPEA94D8wPbA4UDPQMvAxEDtwJZAiAC7gGbAUEB+QDHAJ0AgwBoABUA5f8GAMT/NP8Q/xP/5P6T/jj++f13/YL80/uR+y37wfql+tL6E/s9+2T7kfvM+1n8CP2V/Un+FP+h//P/RQC8ACEBTgGKAdMB8wHsAb0BWwH4ALAAigCSAJ8AsQDeAP8AGAEyAU8BlAH0AVkCywJBA5IDlwNxA1kDZgNFA/gC5QLyAtQChwIxAgMCywFRAREB6wB8AGEAjABzAE4ABAC6/6j/Yf8W/9P+f/50/uj9yPw2/Nr7Sfvm+rr60/rn+tH63/r4+iL7pftc/CD9xP1D/gL/x/83AF0AjgADAXcBnAGjAaYBqQGxAV8B+gDgAMgAzgAFASMBKwE2AV4BmwHJAesBKQKkAv8CBgMSAyoDLQMoAyADJQMXA/MC3QKhAkgCKQL1AXsBHAEXASkBEQHTANQAFAHLABMAu//d/6//Fv/v/p/+ov37/LH8LvyF+zn7QfsM+6H6bPqY+sP69fpE+8H7bvwH/Y39IP6u/jL/w/8zAIcA1wAbAUoBbgF8AYIBiwGeAa8BnwGGAYABdQFGASABPwGGAacBxgH6AT8CigKfApkCzAIIAwsD8QILAzADwQJnApoCggL3AdoBLwJjAs8BGAGCAQECbQGlANIAMQGKAI//1/8KAC//Wv4R/q798PyN/FH8/vu3+3n7ufp0+qL6hvp8+hH7h/vH+2X83/xy/ej9Sf6v/kH/vP///zMAvwA5ASsBPQGSAZkBaQGIAZsBswGxAXUBSQFQAbEB1gGaAdsBTQKFApECmQKpAs4C1AKFAtIC9AKXAl8CZALKAn4CFQLYArICpQGrAcUBZgIIAjgB4gHHAZYANQBdAA8ARwBd/5/+Vf7X/fb9Ff1W/OH73fuz+6/6QPqu+nX7I/vw+r379fsJ/Fb8//yQ/Xz9D/7K/pr+yP6R////fQC0AKkAAgFZAYgBLQEtAa8BmwFrAVEBgQH6AUwCWAKeAkoCQwLPAnkCpAJ6AuMBpgIIA0cChwKYAlECLQJgAp4CuAHeAVgCrgJaAkkBAQJsAnUBFAERARwBBQFpAAsASABa/7r+c/77/Zz9lfxE/VP9EPxY+9v7Jvx2+9f6SfuU/Bj8zPsl/Cj86fzR/FD8cP0o/qz9Mv6N/4//OP9g/yIAlgD5/2wAegGOARoBFQF8ASIC+AErAsQChgKsAqoCZAJ/AtoCTgIvAoMCMgKbAlMC7AGKAiACAgKTAhwCMgJjAu4CKwLVAZEC9QF7AgsCRwGwAFEB9QENAML/qgDY/7H/CP9a/c79Lf4g/rH8zvta/bX8G/vp+y/8t/sY/Ln8Zfve+gX9Dv1I/PL7ev3W/df8eP5l/vr9ov5l/nL/BwB//+j/EwEfAX4A3AALAbUBlQFUATICKALIARECVAI2ApMB0gFOAq0BjAItA9wAygGKA4UBLQL5AqkCOQMpAjsCagNFAo0CdwLPAV4DEwIQARcBJgEQArIA6//5//D+P//2/jr+8f6h/Vf87P1K/R78F/0d/W786Pzk/Ob7P/3M/Hj7Zvyo/e/9QP0s/R/9C/6s/rL9gP0R/5//Gf80/4L/lwAR/3r+8QFOALz+PAH7AZAA8v/GADkBfAG1AJYBRgEEAWECYgGyAfwBtQGYAlgCAQPzAiUCNQNgAxwDeQPvAt0BpgK6AlgCBQIeAn0CVQFvALUAGAHP/1H/RP8IABf/zP03/0j+ZP0k/jD+2P0U/c/8Uf2z/Lv8Tv0z/fn8Kf3p/Rv9yv0R/yH9J/2X/4b+gv0m/0v+jP5KADb+ev5XAeX+S/78AM7/g//D/+j/igDf/5cAcwCaAJgAqwDUAeAAZwHTAagBUQJGAmYCYwLTAgwD4wIPA24C0QJ5A7cCNQH4AgoDvQBtAjYCWADoAKcCRwCq//sAIgAAANf+jf+n/zv+Pf/R/gb+lf7j/eX8Of60/qj8of1T/ln+I/3e/jL/tfte/7L+M/w+/7b/rv3c/Y7/Zv4c/jwAmP6x/QYA9//L/kb+z/9UAM/+kP/G/77/HQATAJoAhQA8ATwAegFyAh3/IQIFBGAAgAIrBFUBuAIABKsBPQO7A+EAPQO2AnAAqwLLAhoBDQJDAqkAcgGfAXsAbgBHAJP/kP87AB4AZv6r/43/Uf6Q/vv9dP6W/rf+Yv5Y/iL+XP0f/mj/1f76/Sn/of9y/S7+xf9+/sX9gP7I/7X+a/25AMv/p/z8/8n/Vf6N/5f//v4y/wEAfv8P/3v/XQCVAAoBlwHiAPoAYAGFATQBhgFvAiwCLAJFAvkC6gF3AkIDvQDLARgDFQH0Aa0DdgCQAG0D5gC1/1ECTQCE/84BfwCv//z/lgD2/ur+XgCT/nf/mP8M/sj/av9r/jD/9P67/nb+MP/e/nn+6P6c/o3+6P5i/+j9QP7W/jb+QP+f/8z+Bv7j/vH+Ov5W/1L/UP5u/3r/pv8qAHD/mQBaACMAkgFBAVIAbQD8AVcC3gCYAeYC7AClAAwCKQFeAogCHAFRAvYB6ADHAVACsQCWAFwCUgGOAQkCeADqAAsBSgB+ABMAUgBmAMD+3ACkAX3/Kv/K/qf+vv7J/9v/ef8oACv/wf1V/+T+Pv3V/qj+Cv/Y//j9Bf/+/hb9Gf9//pT9e//Q/mf+QQBd/7/+LgAe/t797P/w/w0AYwBPAE4ARwCD/zkA8AEPAJb/5gGSAAsBCwO3ANAACwJ2AEEB5gGzALUBSgIlAWwBQwJ4AS0AGAFkAcYAGAKaATMAqAEvAar/fQEDAQ//KAC5AAIAqv8eAHoAXP+V/54Aef9z/pT/U/9s/QP/cACA/rT+Vf9A/ab+PQAX/kr+af8W//v+9f4w/qD+DP8P/1IA3f8t/wv/P/7a/qT+0f1p/8D/4f5//+b/r/+P/xgAYgBuAH0BtAFZAbQBnAEQAs0BhwHfAqYCMgIrApQBBAGWAKAAiAC6/wUAWAABAD0AEAB2AIAAu//Q/6cA4P/D/0oB6gCdAK4BIwF9ACsB1f9c/zEAEP/t/lX/hf5R/+H/7P6V/rz+AP4C/of/4/5d/g3/7P5Q/vP9pf6Z/kL+cP8UAMX+qf4IAA7/0P4AALf+oP75/5f/X/9bANIAJAAsAIkACwDLAIcB5QBwARcCewFrAaoB0QF1ApkBgABcAQ0BjQCBARcBUQDDAB4BoAA6AAQA8v+NABEAGAD3ADkADQCGAH0APwCJALoAeQDDAEUA9v/x/7L+W/5r/h3+LP7g/Xb+p/4o/j7+NP7Z/WT+ff8A//f+GADC/8b+gf9q/9L+wf/Y/7z/cQCx/yP/0//k/nL+/P7a/l3/8f+X/2P/mP/n/xYArgDGAeIB9gGpAm0CDwLkAcEBAAJcAdwANAEVAYcA4wAqAZEAiwChAHYAZwBcANQArgFTAf4AngEJARUASAAiAFX/av+g/0D/af8//2L+DP7Y/Yf9zv1u/mj+Xv6s/oD+Zv41/pX+T/9o/6v/s/+R/5D/af9m/6H/Ef/f/m//3/6E/tP+3/63/j7/m/9z/yoA/gDDAQQC0gGTAjYDrwJMAm4CRQJLAooCoAHkAHEB/wCTAAQBWQB1AEQBogAkAIEApgCFAN0AzwDtAHsBlgAIAPr/7/6R/of+zv1U/Xb9fv3P/TH9ifyO/Q39Y/z3/PD7T/sR/CP8N/yh/NL9tP6W/9gAWwE5Ak0DSASqBAEEpANyBAUEEQPKAnABvACHAH7/WP9h/xn//P/K/9j+LP9m/3r/CgC+ABkBQQLzAtwB1wGXAigC9QHqAYoBHAJBAswAyP8S/+D9MP6X/tT9Hf6q/vj+Mf/z/i3/fv+J/07/cv9R/yr+b/0s/H36VPnm+Pj5c/vu/Hn+9v/ZAXgD6gNNBB4GCAf8BTUGqAWQAnMBygBe/tn9Wf6Q/WP9of2g/A/8pfyt/Nf8P/6U/2gAjgHpAYkBIAKLAsYCTQTvBJwEFwQmA8gCxwG3AJgAhwCpAGgASf9b/rj9uf1e/s/+qv+wAN4AXAAy/9r9aPxf+5T6/Pib+Ff5S/kD+s/7Ev1K/ogAdAOIBe0GcQimCEQHCwb+BAUDEQGWANUAlwB4/yv+sP3U/OP7KPy3/JT9+f6u/2D/if+qADMBKQHBAVIC7AIBAwsCJgHDAFcBDAI7AgACcgFiARoBAgCb/1kAswAFAf4Aj/8E/8r+FP4u/hj+Zf7p/on9jvu3+d73QPcw96L3jfjN+on+ugAHAlQEJQYFB4gI0gluCcMIqgiqBqQCZADq/tf8rvyz/Jv8H/7q/pv+cP41/nb9fP1d/qP+hf/7ALABmwEGAWIAIgBqALgAPwHNAUoCsQIfAmoB3AAAAB4A1AClALMAfAG5AWUBRwFNAJD+4f2f/W39Af7r/Tn9ivxb+gD4Mvdi9vP18/fu+u/9LQEPAzkE/gXFBpgHGwlwCYQJsQksCA4F+QEi/3f9Kf3x/Br9Of7j/lP+bP1L/O771vxA/bL9Kv9cAEIBCQLnAVABKwFMAVgBcQHbAXwCxAJKAnoBqgC1ACcBAAEKAVQB2gFHAvABHgG0/4L+gf3y+0/7Z/vN+1r8KPzx+sb52fiQ9wD3K/cE+SL8g/7zAOcDWgaWCFIKlgprCnwKGAotCO8FVgR9AaP+xfzr+lv6QftX/GP9cf4J//P+pv5t/hr+Tv5A/6UAuAENAj4CDwL2ANH/EP9h/hL/zgDNATMCIAOtAw4DkQI1AgcCkALeAtcBrgDZ/zr+k/wm++P5Gfpe++v77vsL/AH7GflH92/1PPXs97z79/4MAqUFKQkFCwYLhQp7CooKXAmIBuMDtAGS/639Qfv9+QH7rfza/V7+Cf8ZAN7/xf5W/oD+v/9OAfEB4AGWAZ4B4gCI/9H+JP7b/an+VP/T/xABSQKrA7ME1gTmBLsEFwTuAqkBsQBw/yf+D/1R+5L5JfmQ+Qf6Y/qx+j37pvog+WH3AveQ+eH7C/5kAugGswmUCz4Ldgk6Cf4IpgYeBOgCeAH3/6T+8/s7+vj7eP2b/aT+tf+PAHgB8f+S/SD+b//P/ywATADfACgClQEl/5n97PwA/Xr9Gv5KACIDIQU0Bu4F0QRkBBwELANwAj8CqAHJAD3/ePz4+eD4oPiw+Mj5PfvB+y77i/ls9/r1zPa6+en8lwALBR4IlAmdCgAKvAibCA4IMQb3AwMC6/8O/sT8dvt0+0n93P4+AB0BSAGtAbcAJf8L/pj91P7P/2X/zv+oAPT/O//0/g3+Iv7V/gb/jf+JAKABwgL7A4IEZAS/BPIEiwQVBCQDHAIDATX/Bf20+ob4VPdw97/3TPh2+Qn6tPlf+S75LPq5/Pj+3gH2BSAJ8wo6Cx4KIwn1B88FoQOrAeL/8v7P/ez7R/vP+4T8Pv7//wsBbAJqA5oC8gDC/+3+lf5Z/ln+s/7Y/ib/wv7N/Zz9y/0P/qz+s/8AAbcCTAQIBcMFgwYrBlAFmwSUA1sCPQGx/7H9wfuM+Wf3D/YO9hH3nPg4+s76lvpB+in6rPo3/Cz/1wJwBtYJjwuRC0sLsgkyB6UF7QNkAar/Q/5P/Nf6WPqZ+vr7QP4vAOABfgPGA/wC3gGOAN//mv9Z/w//av73/UT9cPxd/Df8ivwG/mX/cgDwAcMDcgViBiIHBQeFBdUE+gMCAsgAvP8x/iX9ivv2+AX3jvYT99j3CPl2+mD7pPuJ+8f7nfwP/gEBpgSCB84JTAvMCmEJBwjvBdUDYgIFASb/Mv2C+0P65PmJ+nH8//5AAQsD0QObA/cCjgGHALkAfAD0/yEAhf82/if99vtB+5z72/yS/j8AzgFrA4MEoQS2BBUFDgU+BXcFUQR4AtgAyv5f/DT6dfja9+/39/dq+Bz5yPlt+kT62flp+/H9eP9hAWQE5AaXCG0JLAnBCGsIfQfVBbMDkQHQ/wL+tPvi+Y/5R/rD+539b/98AV0DsgMWA2ACjQHEATcCeAHdAJAAWf+s/e37hvpP+gz7NPzd/eD/QwJxBB8F6QQSBU0FiwXRBTgFCwTmAgYBE/7j+kr4D/f29vb2Kfe+9yv5C/sV+/H5RfuO/i4AZAEVBLIGLQmyCmIJkAcTB68GrgXdAwsCDAGb/8/87/lO+Gz4c/ob/QP/BQGLA+IETgSnAk8BswHNAnwCkgEmAQAAIP4Q/Jn5hPj/+Sb8+f3C/3sBOgNiBEIEzwNBBEUFNgZmBkwF1QNrAuP/Wvx9+e73lvfM94b3g/dE+Pf4dPlL+Ur5Q/wtAJgBFgNxBtYIqwnrCb0IWweNB1cH2ARlAgoBPP8a/Qb70vih+KT6bfzP/Yn/UQEYA1EEuwOxAkMDBgSOA3wC0ACJ/57+k/wx+mj5TfrS+2L97v5+ACgC0wMpBJED+gPDBBsFcAXxBJYDcgL2AE3+K/st+Yb4Jvjy9+T35PcY+GX4B/n1+JP5yP2QAZQCLQVXCDUJEQoPCgwIiwfvBygG5QMgAhQAjf7N/ED68vih+R77nPzE/S3/UAHgAmYDdwPnAhADIQSVA8EB6QATAFD+kvzY+t75rfom/JP9bP9hAUgDqgSXBNcD5wM/BEoEcAT4A84CqQEgAIf9pPom+Q35Gvkm+Vb5ovlb+bv4i/jx9/L4Jf2NACUCXAV5CDIJ6wkICncIHQhICKEGQAQCArH/ov2a+0b5Ofjc+Ff6BPw8/Zz+xQBYAvcCQwOMA1IEBAVyBAgDBwKQAGz+gvzI+uH5lvrY+0j9VP9XAW4DGwU3Be4E7gR5BCkE5APwAv4BKgF1/+v8l/oc+ej4cvnx+VT6vfp7+m35wfjW9/33Wvut/l4A6wPuB2sJRArRCvwJdwmhCfwHCAWwAqEAKP5c+734jfcd+H755vrt+579BwDBAdQCeAPiAzwFJgbnBFMDYgLuAL3+iPzN+ib6tfqr++L8hv6FAMsCcASVBJUEAwXTBEsEhwNrAoABdwDI/uX8TPvl+lT7Lfvj+j37JfvR+Tz4J/fK9oT4/Psj/v7/9wMdB0EIxgnWCv0KzQuEC58IpAVgA18AMf28+vj4rvhm+bj53/mA+t37h/0J/6oAWAIrBD0GxwaCBSIE0gIXARn/U/1d/D78cPzf/IL9Y/7P/34BkAITA7IDXQSgBAwECAMVAhEB6P/N/qL9t/wR/ar9VP3v/Gb8U/vo+en3dfYz9u/2PPnG+5z9rgB9BKEGSwhYCnULPwytDBcLdQhKBsEDjwDm/bz7+vl6+T35X/gS+Mz4+Pl++5v9pv/KAZAEIQbxBYAF8ATUA1QC7wDM/+7+tv6n/oX+j/4m/1AA6gDbABoBiAGJASQBowA3AP3//v/n/4T/Rv/H/2IAJgBW/1f+Df1p+xf54/aY9TD1UvaC+GP67fzMAOUDIgaECCQKegvsDIQM4gq0CeIHbAUfAy0AfP1L/Nj6uviI9xT3RvdK+EH5fPqP/Nv+xwArAkMDUAQKBSwF7wRgBHADyAJdAq0BRAFbAVkBOgGmAHn/oP7w/fz8dfyk/O/8pf33/s7/PwBUAUYCmQL+ApICFwGo/6f9vfps+Kb2iPVU9sz30vgz+4f+lQCVAkEFxAYdCDsKvQolClMKqwnWB2MGbwTZAQMAPP60+9P5GflH+JH3BvjR+Fr5wvqN/Mn9bf9YAXUCtwP8BOcEwQR5BWUFFAWTBVkFhQQjBNkClQDf/hr9Y/vm+rL6jfq6+xL9fv2K/vn/rACfAaMCcgLIARYBiP9i/WD70/kI+Vf5MPrW+uv7zP1L/1oAqAGoApcD9wTeBcsF3wUqBtEFVwXgBKMDpgIOAk0Ag/7H/cD80fvh+4v7BfuS+zn8Tfzl/P/94v7Z/9sAaAH8AQgDwgPZAxUEgwRXBL4D8wJ5AQcAf//I/rz91f2s/kX/AQDTAC8BngEeAt8BTAHZAOP/v/65/T78GPui+m36gPrd+n776vyo/rz/nQCPAU0CsgKvAjkCFAKaAvMCsgJvAiwCqQHsAL7/yv6C/nj+dP5v/j/+Qv6E/mr+Mv6C/kD/IwDwAGoBsgHnARsCGQKgAWgBvwG3ASgBdQCW/5T+HP7n/Y39Jf6m//AACAI4A/IDcwQEBaYEhAO9AswBSQDJ/lP99fs7+wn7uPrB+rD7+Pzb/av+vP/IAJcBFwJNAk4CcAKjApsCTAL2AY8B4QAAAK3+ev3n/Hz8PPwp/Bb8cfwU/VL9sP2X/pb/nACUASUCdgLmAjkDGwPDAmYC/wF2AaQAX/8Q/m79Gf23/NX8pv37/oMAzwHuAh4ECAWSBZsF/ARCBKUDdwKxAFD/Xv5X/ZD8Hvyv+8/7l/we/Zb9jv57/ygA3wBIAWcB7wGuAv4CMQNEA+UCaAK3AVcA+f5G/q79D/2o/Db8+fs8/Gr8Zvz5/CH+K/8HAK4AAwF3AfgBAQLLAcsB0gGGAdAAuf+G/q39Lf2k/Fr86/wL/iv/PgBEAT4CQwMcBGAETgRWBEMEmQOCAnkBhgCY/93+QP6q/Yz93/0f/k3+of4I/33/4P8DADwAzQB5ARcChwKgAp8CnAIVAh4BbwD9/4v/L/+v/gz+0f28/WX9Vv27/T7+3P53/8j/4P8BACYAGQDz//f/DwAEAM3/Sv+I/v/9rv1B/R39lP06/u7+wf9vAAgBuAFLArAC/gIuA0kDFgNnArMBFQEqAFf/8v6S/mD+l/7V/hb/a/+j/9r/CQAhAF4AwgAoAakBFwIsAigCEwKvASQBzQCTAEkACgDQ/2z/C//a/qr+kv7Z/k3/xv9CAHkAcgB5AGIACwDN/7z/qv+i/4H/Dv+N/jr+1v1t/Vn9if3R/Uv+0v4q/5r/NQC0ACYBmwH1AUACVQLwAWwB9QA6AID/Ff+p/lj+af5//n3+wP4X/03/nv/7/00AvAAzAYoB0QEFAhkCCwLMAWwBLAEIAcgAaQAbAO3/xf+P/2f/a/+s/xsAfACzANwA/gD9ANEAgABDAEIARQAoAO3/kP9N/zH/4P5z/lb+Zf5g/mH+YP5S/oX+9P5E/57/NwC3AAYBTAFdAUsBQwH9AHgAGwDL/2X/EP+0/ln+TP5t/m/+gf7Y/kv/uP8TAFAAhQDRACkBXgFrAWgBXAE3Ae0AiQAxAAUA8v/X/83/3P/4/zgAbQBnAHUAswDUANYA0ACzAKsAvACcAG8AaQB/AJwAmABeACsA/P+b/zr//P66/qL+wf7K/tX+Fv9d/5T/2v8zAIgAywDwAPgA0ACJADgAyv9R//z+v/6d/qH+kf57/qX+2/4E/07/oP8AAH0A0ADhAPAA9QDNAJsAaAA6AC8AMwAaAO7/5f/3//3/CgAFAPb/IQBBAA0A4P/n/93/FQBpACUAFgC6ABoBGgFFAW8BSAEdAeQAZgASAPD/nf86/1j/b/8I/yD/d/+P/+//dwCrANYAEQHlAKcAdQD0/27/L//8/rH+hv6H/ob+lf6x/tX+K/+g/w0AZgCiAL8AzwC+AHUANwAXAAQAEQAWAPf/5v8PACYA8v/U/9//zv+8/6X/YP8p/zr/Of8p/2r/zP80AJkA3QAbAVsBegFrAUsBIwH2ALgAcwBcAAEAf/+a/9z/tf+8/yMAUQCGAOEA6ADPANIApgA6AOv/uf+I/3P/SP8I//j++P7k/ur+E/9o//D/SQBaAHoAmgCHAFEAAQDO/+X/8P+7/6f/9P8UAOT/sf9w/zf/Sf9g/0//Nv8U/w//+v7b/iX/bP+x/0MAmACxAAgBPgEPAf8A8ACtAIoAYQAxABQA+f/k/87/1v8BABAAOAC/AP4A3QAGARQBtQBjAFEAFgC1/6L/kf9q/1H/Pv86/yL/Lf+C/8j/FQCSANUA5QARAR0B0QCYAJwAYwAzACQA6P+y/6L/eP84/zz/R/8J//3+If8T/yH/Vv9l/1D/hP/J//j/UwCaAJoAogDUAHYAFABOAOP/c/+L/37/UP9a/6X/mv/R/1sAgACzADgBJgEEAWIBJQGQAKkAWgDw/1IA9P9B/3H/q/8f/wH/i/+M/17/s/87AEsAegD6AI4AgQBQAfQANgCuAL8AFAAAANr/uf+r/zr/0f4v/5X/2/61/mP/Nf9M/8T/m/+7/xwAbADCAFMAqAAvAUoAWwCcALL/rf/p/+L+sf5d/y3/5v7T/7//PP+NAPMA7f9aAIYBxwAKAJgAlAD7/2wATgBo/+//FAAw/wP/rv8KANH/v/8ZABsA4v/gAKQACwACATsBrwCgAOMAQgCEAFkArf/d/wwAAQB5/2H/pf9s/yX/jv9F/yL/DAAkAKD/OAC9AKUAuQDGAIMAogC0AP//gP/x/xMAx/7I/sH/N/9n/vv/CwDs/pEABgFO/5//RgGNAFP/qP+NABIA5v+C//7+oAAqAAL/YQAsACD/Zf+vADAAFP84AZwAZv8kAa0Aqv+HAFMB4QCK/54ASwGX/9f/AgFiAPr+av+g/+n+pv8SAKz+ef+xAO//OQALAG0AKgEqAKgARgE3AAYAZgB2AJv/Df9Q/5v/wf9y/+H/CgBi/4v/BAC0/9H/GgDe/3j/Tf+V/0wANwAt/2b/XQARAHH/BADQ//P/lADn/33/fv/q/z4AZv/B/3AAv//1/1UAuf+g/7MA+QD0/3T/UwCzAGUAfAA/ACIAoQB5AIX/iv8UAKoApwAUADcAywAEAUAA+v+lANgAMADo/9L/gv93/2//jP81/2j/GADp/8f/RgDLAHkAbwCLAAMA/P8JAMT/qf/W/73/yv82AB0AWf9T/w8A2/+N//H/XwBCAKf/mf8WADIAwP/i/6gAFwCg/xkAIgBp/+X+z/8LAB//W/9WAD8Aev+r/woAqv8IAG4AXACsANwA6QDpALIAVACdAGkAx//Q/9n/k/9B/33/PP8m/7L/t/+B/6L/dwD8AFgAKgCjALMAiwB0AKMAgACNAGIAxf8CAAAAe/+Q/5v/kP/S/5L/cv/v/6b/bf/L/wcA+f+m/1YAfACU/5r/EgAIAMr/MAA/AAUAbgBRAL7/gP+K/6b/R/8H/4f/tf+n/9//6f8IAGEAngCDAGAAhQBmACwAMQDU/6b/u/9//0D/U/+E/yP/Zf/V/4P/6v+TAJQAbwD+AG0BOAGiAcsBrQEqAnwCOwLIAeAB3gGHAWABJAG8AIQARwC0/z//Lf/u/qD+uv6t/tv+NP8i/zT/Uv9P/3///v7D/gr/av4g/i/+lP0F/RD9tfzL+6L7UfvB+vv6iPri+kj9oP5P/8QBogQvBgEHxgffBxgIhghlBxsGCAaBBdoDNAKmACr/JP7d/D/7T/ok+hP6OPqB+hD7rPyI/s//5wA2AtIDMgVOBc8E5wQZBagEnQPbAjoCqQEgAYf/Lv7r/Sn98/tv+177Qvv3++381PzX/Ln97f1Q/dH8HvzV+/n7NPxT/cj+5P/UAesDsAQ/BcQFxwXpBd4F3AROBDIE5AJqAWcAFf/w/R79IPx/+zP7Avs3+4D7p/tl/Aj+T/+5/wYBuAKMA10EvgQ7BAkESgTDA6kCXQKaAowCVAK1AegAyQAUAQUAkv7d/jn/HP+i/83/vv87ABkAM//l/c38Avzr+t75pvjt9wD4kffR9/75y/sV/d//GgNABTIHogjoCGEJSQnLB5gGkwUcBNgCKAEh/7X9bvz/+uz5Rfnx+Jr5evr7+rf8G/9zAOYBzAOIBMAEKQUGBWEE5QOPAykD2QJLArcBjQEbAfj/K//S/v/9dv3I/S7+of5g/y8ApQDMANQAkwDu/zr/mf53/Qn84vrP+ZT4q/ft9mT3Bvo2/H/9oACKBKMG8gdFCbQJ7Qn+Cd8IfQe/BgMFugJQAcz+HPxO+/352/db95n3s/eU+J35nvqi/NL+mQCQAhEECgU7BuMGIAYkBe4EfQQMA4sBxgBIAGX/2/64/pb+lv5l/tL+mP98/wMAlwEeAi4CeQJfArsB2gCy/xP+Uvx3+pv40/Yh9brz0fML9rv41PqA/rEDBwgNC4gMNQ0ZDnYNNAtmCRUHpAQPAwAB6f1y+0r6t/gG9/D1b/Va9tb39/je+ov9LQBqAnIEFAayBigHJwcEBogEMQMaAgEBNQDj/5//g/+u/6n/c/9X/z3/KP85/0//lv9nAEUBpQFzATkB8AAzAPr+Qf19+yX6j/ha9tz0RPQY9M/1yPkJ/eD/KQWaClINqQ5cDwkPHA4HDOII8wV9A/UAt/47/Ej5ivfv9vz1EfVw9dL26vgx++b8KP86Ak0EoAXwBjQHyAauBpIFUQPUAd8Aif+N/i/+0/38/bj+7f71/n//3P89ALoA/QB/ASkCxAJPA0sDyQJPAnQB7f+y/X77ePl494/1afOF8tbybvP99f75p/1bAikIMAyKDnQQlRB8D+gN3wqiB2IFqAJ+/2v9Sfvg+IX3b/aP9er1E/eZ+MH65vzw/qkBoQNqBHgFdQZvBs4F0QRUAy8CHgGd/3z+3v1//br9H/5q/gL/1v9pAOUAQwFwATAC/AI5A4UDvgOKA+kCeQHF/+P9r/uP+VL3JvWP86PyZPKE8przOven+07/TARdCn8O8BCrEmwStBBoDuMKMQewA/P/Sv0j+zH4LPaa9Qf13/TT9Vj3P/nl+1z+uwBiAygFegaGB64HPwdfBpQEngJaAeH/KP4z/Sf9pf0V/oD+b/9PAOwAYwGPAZoBOwIbA1QDTgN4A2kDewL1AFL/cP16+3L5WPec9SP06vJs8oXyRfPA9bb54P1WAvAHCw1UEIMSFhPaEe0P6wzuCCsFVgHk/Uf7g/gj9kL15/Sj9Gz12/aR+E/7zf2E/88BdgQbBrgG/gYpBzIHRwYoBFQCWgEUALX+6/1y/cz9rP4I/9D/2gAkAa4BTwLkAaIBNQJjAlgCegLiAR0BgAD3/vX8TPty+Z73APb+877y3/IT8xP01PdA/Pv/VQUQC5MOABFXEpER2A8TDfsINgUHAmb+Rvs0+Rj3pvV+9W/1y/V292X5Vvu//QUA8QEsBDQGLQdtB1IHMAdzBlEEFwIVASAAiv5i/S/9Uf2g/SL+mP5l/5IAegErApcCnQIrA9MDjQMuAw4DSQJTASAA4P3I+4765Pjy9p/1a/TT8xf02fOF9B34UPy7/04EtQn4DdsQERJbEXUPtwzzCNME8AAe/TH6tvg+97z1jPVM9ln3kPjA+a/7f/63ADwCNgQoBg4HTAcXB3UGigXNA8IBmQBY/5j96vz8/M78T/12/mD/hgDYAdYCoAMZBFAEqgTCBEgEsAMMA/0BigD//in9HPtj+QP4bfbN9OHzrfO3887zDPWo+Pz8iwAaBXoKQw6QEI8RlhCVDsYLwge7AyMAsfxB+tD4RPcj9k/2KfcV+C35oPq8/CP/0wAzAgwEoAViBqQGlAY/BkAFZQO4AUEAVv7o/Gv8Jvxw/Gv9X/6z/3oBnwIlA9gDVwSVBM4EXATCA90DPAOUAV8AFP/v/Pn6c/mE99z13/TW80jzbPPQ8yf2VPrX/acBPQdtDLEPfxHeEQwRzg77CrQGrwLL/nL79vgn9wn2pfUM9hr3M/hM+VP74f2g/woB+ALRBBoGqgaPBlgG7AWIBGMCoQA5/6n9gPwN/OX7XPyb/dj+6v9LAegCKgTMBA4FUgV8BR4FbASyA44CJwEIAIn+bvy0+oD5Kvi19nX1pfSL9J/0gPQM9tT5V/2mAI8FcwreDVwQABG+D+YNyAp2BpACqf7++gb52veO9jn2L/e/+CX6D/ts/J7+ZQByAa0CMASSBXcGfAYBBpYFkgSrArYA2f4P/QX8dPsY+5P75Px1/iAAvgFEA7EEowXWBZcFWAXbBBYEPQPbATcAU/82/u778Pkj+UX49Pby9Xv1cfWX9Zr19/Zl+pL9iABrBV8KNA1zD74Q+A8dDhIL2gYTAxn/7Pq2+If3EfYI9nf3Avmj+v77hv2v/+gANQFXArADVgTMBP0E9gTQBOQDMwJbAG7+x/yl+8r6ovqR+079Uf9jAVYDGAV0BgMHywYtBkMFQARsA0UCtACh/wD/sv3O+2T6b/lI+Bz3M/aL9Vf1dPWY9Qf3a/qo/XcA7gTaCfoM8w7kDz4Psw3cCpUGggLa/ij7lPhR9zf2DvbM9/P5Wvvt/O7+xwABAj4CawKUA5QEjgRkBHMEXgTBAxAC8/9j/gb9zvs++y77//vu/fv/1AGtA0YFfwYEB2wGcgV/BCkDqQFhAAD/4/2B/cn8UPtd+gr6Qvko+DL3dfZM9pP2mPaV9836O/7QALkEWgk/DOQNzw56DhgNXApqBr4CJf9m+//40vfS9tX2Svgv+tf7M/36/jkBNgIMAp8CowMOBB0EtwOVA/8DPgNeAe//hf70/O77I/vz+iL8+v39/0oCSwQiBuEHIQjgBtgF5wQJAwgBef8r/mz9/vw+/HH78Pqw+lH6W/kx+Hz3MffS9i32uva6+T/91v9fAyEIoguFDZsOLA6GDDUKsgaoAvT+afvx+Pj3D/fH9nz4zvp0/Pv9vv+RAZoCgQJqAtgCHAMiAz4DPQNLAzEDOQLTAFn/r/2S/Ab8mftD/BL+7P/9AVUE+gUWB6AH4gaxBWUEXQJoABP/kf2D/Ir8V/yk+6P72ftZ+1f6GPng9w73OPY39fv1+fgH/B7/tQNqCM8LLw4YD3MO9Qw/CnIGqwLm/qH7tPl/+KD33vdD+Q37hPyM/fD+mwBZAXoB7gF0Av0CgwNeAwIDKwPHAlkB3/+b/mL9pfxD/FL8bf0S/8IA1gKxBMoF1gY+B1oGNgX3AyMCmgBL/7399Pz2/KH8RfxG/Dr8Ivyi+4L6X/mM+K33x/ZU9ov3ZPpG/W8AqQSpCKsLuQ3vDQYNvQvMCMYERgGi/XT6Dvkd+Gn3Z/h6+lH8zP35/h8ARgF7Af0ABQGtAUYCcAJ3Ag4DpQMjAxUC6QCG/4D+t/26/MT8FP6a/10BHAOLBC4G9gbzBeIEEAQVAuX/h/5J/Xn8jvzH/A79fv3a/RL+fv3t+0n6vPgI96L12/TL9dP4Afwu//oD1QjZC8sNdg6CDd8L/gi/BBwBBf7p+jT5svgr+NX48fqS/Iv9zP4IALkAwwB3AJEAJAHZAXMCvAIDA4YDXAMXAnQABv8M/mj9vPzP/D3+8v+OAWkDxQSpBXsGJwbXBLcDRAKCAEz/GP4f/Wb92f2+/QP+af48/qr9nvwd+4D58ffJ9hD29vWx97X6jP0pAccFggkUDLcN1Q3VDIkKvAbrAm//qPsA+Rv4t/cO+Oz5Pfwg/sf/FAHTAfYBWgF/AA8A7v///04A4QCjAVcCjgIZAhcBKwBr/43+Af5G/iz/pwBCAo4DCAVCBkUGZgVDBJ0CswDD/hL9C/zk+2P8OP33/cH+tf/j/+j+Zf2I+z35O/ef9YL0Z/VG+Dn7wv7fA4UIjQvIDZoOqg2oCzgI9gNTANX8yvmu+I/4nPgg+mv8+/1e/40A2wDeAJcAv/8+/0H/Uv/X/88AsAFuAugC0gIzAi8BDgAq/4b+VP7R/vD/cQEGA1UESgW/BW8FiAQqAzMBOv/u/dv8Gvxa/DH9Ff5I/z8AcgAeACL/Vf0b+8P41PZe9YT01/UL+Sf8+f9GBa0JZww9DmkO+wyICpMGLAKl/i37YvjP9w/4kfjE+oD9S//dAPkBEALZAf8AZv+P/lf+Hf6q/sn/mwCfAaYCpQIlAoIBlwD8/6//O/+P//UANQIuAxoEqQTCBCcEwgIvAZf/6v3b/I38iPwn/Yz+6P/aAHkBhQHYAFP/sfwE+rH3aPXd80T0hfaM+VL9MgJhBxILaQ3UDm8OPQz4CNMEmAAO/Sb6l/iR+Bb5vPps/Vr/kwDOAf8BGQH+/3v+IP2F/Gn8Qv0C/2QAwAGmA4UE2gMCAyUC6gDQ/wv/5f7C//UA6wEdAxMENwQABD4DggGR/wD+4fxx/GH84/yO/ocAxAFaAnEC4wFuAMP9kfqy92X1zfMc80H0aPdg+8j/AQWoCdEMHQ/JDxoOVAvcB4oDgf8T/ED5QPi6+Hf5F/uR/Yb/1wCsAWYBfQBo/+39n/w9/Ij8vv23/1IBvwJyBDsFqwSjA1MC/QAJACX/ov5O/5EApAHCAqsDyANeA4cC8QDr/lf9k/xo/J38b/0I//MAMAJgAjMCZQEN/+T77vgF9qfzdvIB88v1r/nN/WcDWwkADTUPqBCyD8gMNQm0BDQAp/ym+Rj4YPgC+UH64vwp/0AAFgFjAbYAi/8S/sr8Wfxv/Dv9Fv8cAccCaARzBVAFcwRMA+YBYwAh/5n+7P6X/2oAhAGrAkcDLAObAqMBBABc/oH9Af2i/C79ov5DAKUBSQJ8AlwCyADW/fj69vfW9LXy0vHU8jX2Tvr1/jsF3wpPDuwQ5xHmD6wMxAi2A+X+A/sF+Bj3pPdh+FP6c/2r/9AApAGZAZUAMv+3/Yf8GPyA/Pj9JAAPAqUDVQU9BqMFVgT3AlgB1//V/lP+lP6G/5YAmQF9ArwChQIcAvgASf9E/ub9n/3R/cL+JACQAW8ChgIpAgQBuP7O+/P4A/aL80PyLPL98+v3WvwUAdsG4QvpDscQ5BCpDocLuAfTAiT+nPot+Cv3afds+H36Qf1k/6YAcgFuAWQA/v6i/X38QPwO/Wz+TwCQAoYE4QVfBuQFwQQfAz4BoP9p/tf9Nf4T/w0ALAE4AuIC2AL1AdoA4f/H/tz9uf0q/h3/jgC2AToCjAJGAqMADP4J+9D34PS58k7xlvGq9EX55P1eA58JZw4rEWYSZhF1DsAK7gVtANX7h/iQ9j726faD+Gj7Vf41AHABCQJ5ARIAX/7K/Mj7m/tg/BX+YwDnAksF/gaABx8HFAYzBMwBlP8j/oH9Zf3R/ez+KgBJAToCcgIPAnkBfwBT/5v+M/5L/iP/IgD/ANUBOwKGAdP/SP1B+u320/N38YfwM/Id9rj6OgBKB3sNbhHaExwUjxGbDboIsgLp/Gb4bPVk9PD0WvY5+fr8pv84AWICSALXACz/gv0U/MT7e/wC/lwA8gIQBZQGHgdsBgsFSAMAAaX+T/0K/Uf9Bv5g/+EASQJBA1EDswLRAaIAgP+8/h7+FP4g/3IATgEUAooCJgKvAD7+Hfvk97z0OvLl8Anxp/OE+Ob9cwPYCXoP5BIxFFMTWBA8DBQH+gCM+6v3GPVD9D/1Iff2+Wr9CgCVAYACNgLKABL/ZP0s/Az8yPwx/m8AHgNBBYYGBAdwBuUEEQP7AJ3+G/3g/EL9A/4y/30A9gEdAxEDSgKzAdMAf/+R/l/+mf5X/2IATgHVAcoBDAFG/3f8H/nd9dvyzPAA8HLxtfUr+68AlgecDsgS+RTzFdoTRw83Ch8Etv3E+Bv1B/No8/b0Y/dF+8n+ngA+AjQDDwL8/1v+0/zW+xX8Tv1c//UBbgR5BrIHaActBpMETgKi/8j94PyH/On8GP6I//gAIwKqAqECQAJhAUsAd//r/tD+XP8jALsASAGDAfEAX//9/P75x/as83bxUvAO8bX0Dvp4/xIGdQ2KEj8VihYjFQ0RHAwZBmP/y/mS9bvyS/J/82n1t/iY/ED/PgHJAqkCXQEmALj+ef1Q/d/9E/80AVUDvwT8BYEGrQUiBIACmQDz/uf9Wv2a/Yb+ef92AJIBFQL/AdQBdQGnANr/bv9D/2T/2P9YALMA0wB5AEb/S/2s+sb36PSF8tPw6fDV84P4l/3jAzYL6BCdFOYWfBYUE40O2QjzAXn7ZPao8gfxh/Fl87P22/pe/h4BdgMoBB8DtwEeACr+K/1c/fz9cv/WAaMD6gRkBt8GsgUWBFMCXQDr/tT9+/wf/R7+Hf88AFUB3gEvAosCJgIyAXsA1f8k/9j+u/5m/l3+b/7F/WT81PqQ+Of1r/Nk8gbzdvXW+Af+UwXrC+cQURXDF/sW/hNZD/0IigGd+t/06PAe707vh/FR9ZH5pv07AY8DcwRXBIkDAgJ7AJ7/hv/1/w4BZQJfAwgEXQS0Ay4CpAA+/xP+fv2H/fv9Jv+SAKwBlwJbA4IDLgONAqUBxAD0/xT/s/7p/uX+0f4L//r+Ev7u/FD79fhj9jz0p/IT8tLztPdm/A0C7AgeD3ETLhbOFmMUVxDfChgEa/1T93vyCPDn7/XwwPM1+JP8EAAaAxEFRwVnBCADjAELAD3/N//n/zYBkgKjA0oELAQmA/gBoADb/rD9kv3H/U/+lv/JAMABtAIeA8QCdgL2AdsA9/9b/7D+pf4O/xv/Hf9U/8j+bP0G/AH6WfcQ9WfzbPLW81T3T/ttAFUHdw2qEQUV/BVDFCQRZAwQBnD/ZflH9CXxT/C/8AnzAPfK+jH+5AEtBGwEMASrA0UCxQAhAAQAXQA9ATYCHAO/A2MDZwJ4ATgAhv6z/bT9qv1R/t7/DQHuAe0CiAOiA0MDOgIKAXEAi/99/nL+pf4q/jn+nP7R/ST8u/oH+Yv2HfSb8mnzPvZg+ZH9ZwQfC3QPKBMIFlUVRhKyDogJ7gKY/D73R/Mv8ZPwoPGC9PP3I/vS/vcBKgN9AwcEpANfAsYB0AGeAc8BnAIDA98CnAIvAhUBof9z/vv95f3t/XT+4f9VAS0C5QKDA3wD8AJeApcBmACV/6/+PP4Y/tL9mv3W/dv9J/0t/Db7xPnV9+T15PT89SH4a/pc/lAEVwkWDfAQRRObEqkQAg50CfUDoP6W+bX1WvMj8ofyaPTD9rX5FP2K/y0BvQK3A5kDZANvA1cDCQP6AhMDtQL3ATcBfgBX/yv+k/2V/dv9gv61//sABgL1Aq8DywNKA4ICrgHoAAAABf9Q/iT+Pf5T/lz+cv57/hD+IP3M+0P6k/jS9jX1N/VH9+/5A/3xAYsHvwsZD6wRIxLKEIgOrQqwBZYAsvuM9/T0dfMJ80/0uvY7+d/7rP7HAPEBugJLAzgD6gLzAjYDXANoAy0DyQJMAlEB0v+4/gn+a/1f/Sz+Iv85AKQBzQJYA1wDGgOFAoYBLgA6/5f+xf1p/RD+u/7G/hf/i/8q//79nvz9+g35OPfB9cv1x/d9+n79FAI3B78Khg3gD/4PGg73C8oIUAQSAGr8G/kE9w72yvXA9ov4IfoN/HP+4P9lAEoBKQIuAkECvQL0AuMC6AKaAvsBNgEyACz/mv5U/m/+J/8qADkBigKyAxQE6QNWA2kCLwHP/4v+3v1c/eD8Lv0p/sP+Lf/+/2gADACZ/wL/0v1v/DL7Lfqb+V75i/ku+9f9CQAqAkgF5Af7CL8JLgoRCRgHIwVxAj//rvyp+gH5ivgs+TH60fsQ/vv/ZAGEAtcCWgKqAaEAWf+j/j7+sv2z/Xv+C/9x/1sAWQHSATsC+QKeA+MD9gMcBAcEbQOHAp0BbgAG/8z9tPy6+077i/sH/Or8Of5//2cAEwFaAQwBRQAd/839wfwd/Jj74Ptl/Un/9QAZA2sF1gZzB6MH+gZNBTID3wB//mb83Pr++QH6zvoN/Kb9nv9mAWYCAANKA8cCswGvAHz/O/5//Q39tfzg/FX9xP2C/nn/QQAdAScCHwMKBNoETgV5BTUFUwQzA94BCwAs/rb8Vvtn+m767vqH+878i/65/40AZgGcAeEA9P8b/0P+gf0e/Zv99v5UAMwBxANIBdoFQgYrBtYE9gImAR///PxN+z763vkp+g/7dvw8/g0ApgH/AgcEWQTpAz0DTgLaAE7/Gf77/Cr84PvN+wL84fz9/Rz/qwA5AksDVQRiBbAFgQUnBVcEGQPFAVwA3/5w/R78Xvsw+xL7N/tM/KT9oP7O/zYB4gHeAc0BbAF+AGr/pP4+/k7+wf6x/xgBlwLrAxoFsgVgBYMEPQM5Acv+tPwM+8X5UfnZ+f76nvzH/gYBxgILBOwEJAV8BFID9QF9APj+pf26/FT8C/zb+z78CP3C/a3+7P8lAWMCiQNnBP8ENwXSBBsEEwONAdf/Q/63/IT7Cfsc+6T7tPwm/pn/+QAtAtoCvQITAioB/v+k/pL9EP0g/dn9JP+yAFMC2APdBEUFEQUkBIMCnAC1/uv8bfuL+mP62PrM+zj9//62AAcCEAPbA/4DggPjAgcCyQCu/+D+/P0k/bT8hPyO/Ob8jv2N/tb/JAFqAp0DdQTZBOQEbQRWA/YBfgDW/j/9KfyT+2v72/vt/Ef+sv8FARYCzQINA7QC8wECAeL/4f5D/uf92v13/nv/fwCNAZsCNQNaAz0DpwKQAWYARv8Z/jL9tvx0/JH8Nf0U/gn/GgAQAdoBiwLYAq4CZgLkAf8AFAA8/z3+U/3R/Hj8Vfyx/G/9Z/6r/xABTgJbAzAEiQRcBM8D4AKTASIAvv5x/Wb80/vM+zj8+PwF/lX/pACsAXQC/wIdA8ECIwJPAUMAPf9+/gT+0f0X/sH+mv+RAIkBNgJ/AnwCGgJYAWYAZv98/tn9gP1k/af9Nv7t/tH/uABUAcYBLQI/Au4BhAH1ADMAhP/2/kf+lv0p/ez84vwz/dL9qP7F/xMBUQJbAxIETQQjBJMDewIIAYf/CP63/PT7vvvq+5v80v1C/6oA3wG/Aj0DWAMHA1wCagFkAIn/5v5k/i7+av7e/nT/PgD5AFwBlwGtAV4BugARAHH/1/5g/jL+Sv6N/uz+fv80AMcAMgGjAe8B1wGoAXsBBQFXANn/af/D/in+2f2Z/WD9c/3d/YP+Vv9FAEUBSQIDA0wDTAPrAvsB0ACs/2X+JP1s/Ef8dPwC/f39MP9iAHQBMQKjAswCjgIAAl8BoADR/zX/0/6Q/pD+8v50//D/eAD7AEMBXAFEAe4AcQD6/4P/B/+q/oH+f/6d/vL+av/s/3MABQF2AawBuQGjAVIB2gBSAMT/Of/J/nb+N/4o/lT+oP4M/6r/SgDUAGMB6AEeAhUC4AFlAbEA9v8u/2D+zP18/W79r/03/uH+s/+ZAFwB9AFdAm0CKgK6AQ8BNwBz/9L+Tf4X/j/+m/4k/9X/hAAVAX8BqwGXAVEB0QAtAJL/BP+F/kj+Uv6B/uX+hP8pALsARQGlAcQBwQGYATIBuABGAMn/U/8C/8D+lf6e/sr+B/9l/9D/NwCvACQBbAGOAYkBQgHNAEUAnP/l/lv+BP7e/QP+af7q/pH/UQD/AIsB8gEQAukBkwEMAVkApP8F/4H+M/4u/mL+xP5P//H/hwAAAVMBbAFIAfQAfwDv/2D/6/6W/nX+mP7q/lz/6/93AOwARQFvAV0BJAHXAHgAHADN/4v/Zv9g/2f/fP+g/8X/8P8mAFQAfACpAMoA0wDRALMAbAAVALT/Qf/Y/pT+cP53/rn+KP+y/1MA9AB6AdsBAQLkAYsBAAFNAJL/6f5h/hP+DP4//qf+PP/g/3kA/gBcAYEBdAE5Ac8ASQDB/0D/2P6c/o/+sP75/mP/2f9JAKYA5gABAfYAzgCSAEgAAADI/6H/j/+V/6v/y//x/xgAOwBfAIAAmwC3AMoAzQC5AJIAUwD+/5//Rf/1/r/+qf65/vD+Tf/G/04A1wBNAZ8BwQGpAVYB0QAuAIL/4/5o/iL+Gv5O/rj+Rv/k/38AAQFfAY4BhQFIAeMAXgDN/0T/1P6P/n3+n/7q/lT/yP80AJIAzwDmANwAvQCDADsA+v++/4j/ZP9Z/1r/av+Q/8T/AABDAH4AsQDcAO8A4AC9AIcANADU/3//M//4/tn+1v74/kT/pv8WAJgAEQFvAacBrAF3ARoBogAOAHr/BP+p/m7+e/66/hP/iv8VAJIA8wAyAT4BJgHwAJgAHACk/0f/BP/g/t3+A/9J/6r/BQBeAKgA0QDpAPgA5QCrAGsAGgDR/6r/kP96/ycA0gAHAM3+2f4M/2r+mv4aAGABiwFtASUBZgBL/3r+Hv4c/p3+b/85AKMADQGSAbcBQwH0AAIB7gC7AIoAIAB0/wX/qv4r/v/9jv5V/+b/YwDzAGQBVQHyAJcAXAAjAPr/3f/Y/7j/kv8g/4b+af6W/vL+qP/VALQCKQRDAwICOgGO/479mfwK/Qv+AP/d/44AjAA4AJP/CP/t/kr/IQDoAGIB1wFQAt8BsgDH/13/7/7I/g//S/+Z/yEAbwAdAO//OACBAIkAkAC+ALsARwCg/xH/rv6E/qb+GP+5/54AWAFtASUB/ADJAEQA5v/T/+H/7f/8/+D/eP8J/9z+4P7n/kX/AQDHAFUBvQHiAaQBDgFdAMf/XP8d//v+CP83/2v/if+e/6f/0P8rAIAAowDXAAEBtQA5AN//kP86/xD//f4O/2P/0//5/x4AiQDnAOIAuAC6ALgAjQAxANf/jf8s/7P+jf7Y/j7/rP8zALYACwE0AUcBKAHeAIAAMADZ/2z/Ev/9/g//Dv8p/13/oP/o/zIAcACtAOkACAH6AL4ApACPAE0A5/+g/3H/M//p/rX+yP4Z/5b/+/9sAP0ATQERAaUAaAAgAKb/P/9f/7r/8v8hAFsAaQAoANb/q/+3/83/DQBuAL0AzwClAEEAt/8z/9f+pf6s/hv/yP95APYAVgGCAVEB6ACGAB8AoP8p/wX/Gf8A/wP/O/+H/8n/JQCaAOIAGAFKAUMBBAHIAIMANgDz/+f/0/+x/5T/c/9d/zP/Ev8q/2j/qf/6/0YASQAJALr/Of/b/hH/bP+t/zIA1gAGAQ4BPgEoAeUA5wDiAL8ApgBgANH/OP/H/iT+wf3Z/Q7+e/45/xUAxgA9AXsBfQGsAfgBkgEIAdgAwQAqAD3/0/6k/nH+VP5y/uj+f/8BAFMAmgDjAOgAyADVAOwA9QD9AN4AhwAIAI3/F/+i/nH+u/5E/7D/FAB6AK4AqQCbAG8AQQA0AC4AJwAuADsAAwCd/2P/Uf9f/4L/pP/k/zsAZwBEAAsA3v+z/5H/if+e/87/AwAiAD8AZACCAHcATAAfAP7/7f/P/5D/b/+C/3P/Xf+O/9//IwB1AMgAAwERAfkAwgB9AFMALADt/6n/oP+7/7D/qv+p/7D/qf+H/4H/r//5/yMAVgCYALMArwCUAGkAMQDe/4T/R/9C/0z/bf/E/x0ATABeAHoAjgB+AFkAPAA/ACMA4f+p/4v/d/9f/1z/g//J/w4ARAByAIwAgwBlAEUAGgDn/8D/rP+c/5P/lv+l/8H/7v8gAEwAdwCXAJIAZAA7ACgA+/+q/4z/t//A/6T/xP8CAPr/yP/C/83/uv+4//P/PQBuAJEAmgB/AFAAHADn/7n/ov+2/+f/BQAYAEUAYABNAEgAVAAwAOb/wf+k/17/If8q/1z/lP/c/zQAlQDeAOkAxgCdAG4ALgDr/7v/o/+S/3v/W/9j/5T/rf+4//r/TgBfAF0AiACqAKsAqwCmAHgANQD+/83/mP9l/0z/P/8v/yn/Ov9Z/4n/2v9EAJ8AzwD1ABUBAgHKAI0APADi/5X/Uv8l/yf/RP93/8D/AwAwAFcAcwBxAF4ATQA4ACAABwDd/7z/1v8LABQACwArAD8AIQAQAB0AEwD5/9//zv/N/8r/s/+u/8P/0//c/9//4P/0/x4AMQAeABYAGwAJAC8A9QDyAW4CMwJuAYIArP/K/tn9WP16/QH+lf7w/iD/U/+K/6X/r/+V/0f/yv7o/Tn9Xv5AAa0DkATIBOQERAS2Ar0ArP4W/d78if3W/d/9fP5f//H/lABnAb8BkwGfAcoBSwE5AG7/7/5i/k3+FP/8/2QAqAD8AOgAUAC9/17/Bf/2/ov/XQDVAPsABQG9AAwAa/8Y/xn/m/94ABEBMQEjAcwA8//i/iP+0P3p/Zv+tv+pAEYBrAG9AU4BgwCx/xT/2f4W/6H/IABdAGUAVAAtANz/f/9H/1j/tP8oAHMAZgArAPH/p/8+//7+Cv9W/8n/SgCwAM8AvACTAFkAAgC6/7r/8f8lAE0AfACLAHIAWABHACQAFQA4AGUAdwB8AGkAIQDA/2j/H//k/tf+FP9+/+r/SQCdANcA3QCwAHQANADq/6f/gf9z/23/bf9z/37/kv+3/+P/EgBFAIoA2gAWARcB1AB1ABQAp/9E/yT/V/+q//f/TgCaAJ4AXwAaANb/c/8T//P+Hv99//7/kAAPAVQBQQHiAGUA5v9t/xP/9/4J/zL/gv/0/1kAlQC1ALAAfgBFAAkAtP97/4f/mP+J/57/9v9HAGYAdgCEAG0APwAUAN7/p/+i/9L/BgA3AHcAnAB+AEUACACs/0f/Jv9c/6b/5P8uAHkAmACBAEYAAADB/7D/0f8AADQAcACFAFAA7/+O/yT/yP6u/tz+Kv+P/xYAkwDdAP4AEQEZARIB+QDHAHgAFgDI/5P/W/8V//T+Fv9Z/6v/AAA2AEAATwBdACUAwP+H/5D/p//P/xYAYwCWALIAsgCDADUA8P/C/5z/ff92/4P/mP/D/w4AUgBzAI4AqACTAEcAAADk/9T/vv/B//D/LABXAGEATAAaAOH/rP+C/2v/fP+w/wAATQB1AHEAWgBGACkA/P/O/6r/jf96/3r/lv/P/xUAVgCCAIkAdQBfAEAA9P98/xb//f4t/3z/1P81AJUA4AAJAfgAoQAzANf/mv9x/1n/Xf93/5j/u//9/0QAcACcAM4A7wDkAK0ATwDG/0f/AP/u/gv/V/+6/w4ATQB+AIsAdQBUADsAJAAQAOr/wf+x/6f/pf+s/8z/CQBFAFgAhgCbAMsBPwQJBdQEHAPK/wr+H/xp+rz6bfsB/U7/qQDvAKYAHgCu/9j/5P+I/3X/K/+G/vr9Ov2f/MT8wf0X/x8ApABJAaQDrwY5CJIH1ATUAUn/3/w0+5H6ivvN/SgAlAFwAdUAnwABAdsBKAIJAtEBEAHP/+793fuw+sj6ePx2/7YCYAWZBkQGhASSAVP+nPtT+u/67/yF//IBlgMzBKcD/wECAHH+w/1G/p///ADKAfABYwFKAOf+jv0e/ef9f/9DAZUCWgNEAy8CbwBd/tP8R/zH/G7+UADAAZ8ChwKRARMAhP5+/Rr9ZP1R/pj/kADaAJwA5/9H/+H+u/4l/+f/9QDKAeIBPQE6AGH/tP5e/mL+4v7e/9cAtQEVAvYBiAHbADwAj/86/zb/MP9U/xr/zf6W/vT9jv1C/b38gPyL/HH8b/xc/Ij8OQD1BQcK4AyvDB4KsQb5ALP72PgH+IX5tvwQAKABMgJ/AhEC9AGWARsBngHCARkBpABv/0T9Efwf/JX9oQCGA80FNQdeBoIDvP+8+7v49Pcm+er7CQAtBMIGdAfvBbACtf+D/YH8Qf2a/sr/7wAVAbT/0v3l+wf79Pu2/f3/HgLLA0UELgNYAXj+T/zs+3n8Yf44AC0B8gFtAbT/6/1C/Kv7nvxb/o0A5gJXBL8EOARFAiIAe/7//Of8qf0//ir/sf/k/ykACACk/1X/cv+w/0EA2gC/ALYA1ADNAAYBPAFZAcoBEQLaAbABiAFHAUgBRAEeATwBMwG6AD8At/9m/6H/BwBlAJ8AcQDa/y3/tf6C/n3+of7m/in/9P4L/tr84vsw+xf7AvsO+177MvuC+rf6g/5xBQAOWxNKEz8QMApQAwz8H/YM9FX1nPl3/fr/rwAAAJoAeAF2AnIDZgRCBZQEuAHo/Lj4VvZk9pD5Vf4EBPkIPwukCqUGhACw+nz2M/WS9iL6BP/GA1EHEgiqBuwD0AD8/l3+CP8nALcAZQBh/xD+YPw2+5H7KP6jAeAEKAefB/oGawSNANH80/ln+Kf4V/qR/K3/YAJtA4YDAAIrABn/+f1p/Zj9H/4Q/4H/qv4o/fz7t/to/ML9T/8cAbwCjwMwA/ABmwCn/x3/qf6M/l7/oQAVAgoDFQMqA8sCmgKOAuYBgAEpAaAAAQDz/qX9yvyW/Ef9sP4hADwBHgKiAqkCIgI5AWEA3P9///f+Pf4a/Sv81vvz+1P87fwz/vP/jgHNAtMCEwLiAJ//K//4/rv/qwE5Bk0K8QkWB0ACCf+7/eL7A/y+/UUA5QLHA4EC5P8l/rD9cv6p/y4AAQHGAT8Bq//a/YP87PyI/u3/nwF7AjICTgEJ/2T8hPrK+Tn6mftc/c3+WgARAU0Anv4N/KL5Q/jJ9wD5X/0uAkcHVwxvDtsO0gv7Bu0C2v7r/L/8Jv2g/dr93/1p/Rb9Q/3P/h8BhQI0A3sCTAA9/cD5avcW92P5p/2CAscG5Qj0CI8GqgGJ/KD49faQ9wn60/2hATUF/gYwBwkGjwOLAnwCXQLrAuECTAKfAZH/p/2s/CL83vyz/skAuwLlA+gDpgJWAOL97vsH+/z6Q/zw/nQBHwMIBFUDkQEq/x38DfoF+db4kfls+g/7ePvi+2X8If2J/fT+/QNzCPQKfQuICR4IjwRfANz9F/yb/Hr+sgCXAYgB6gEMAjwC0AEmAtMDsgQUBDkC0v/x/Mv6wvke+mP8aP+XAlYEHgSUAogA6/5e/XP8Pvz//BH/6wAUAk0CtwEdAakAZwA9AIEAiQFTAk0CTAFV/0f9gfvM+ib7+vtB/bj+0/9cABwAFv+1/W78kfsL+9H6l/oV/Gj+CgA4AjcEzAadCOQHQQa0Aw8BlP5c/KL6OPnU+Xr7iv1c/8wAfAOcBTgGIQVzA44BF/+v/WT8mvyJ/r0A7QPrBdcGHQdVBrcEIgK8/1z9//u7+/r7Lf2z/poAwAKzBOQFpQYBB2sF6AJ5/5L7Bfky92n3cvkM/Aj/gwEQAwEDFwKhAPH+z/2y/Bj81PtE+7D68/nj+B/4Wvih+dT7pv4ZAuwEvgb7B30IcwjCBtEEdQPGAQAA8v3S/BD81ftH/CX9Vv96AR4EGwaeBgsGSQRmArv/j/0w/N77K/22/hwBXAObBIEFaAV8BG0CBgBV/jX9Z/0h/kb/mgCzAScDMwRDBFwDVAICAUP/pP0a/Ez7QPvR+yL9jv7c//4A0gHXATsBAwDB/Xj7avlf+Df49feL+O35fvs7/YP+dv+4AH8B/gFUApQC7AIUA/kCSAKRAXsArf9x/xf/Lf9Q/wsA0gD8AHIBBwK3AvIC4wLOAncCEgK1Ae0BFQIAAkECoQLUApcCDQKVAQYBFwAb/27+jf5w/5cApAEvAnwCdQLFAQ0BZQD6/wMAPACjAAIBHQHfAFAAY/8j/g39ZvxI/Pf84/2a/tT+Uf52/W/8OftX+hP6Y/p1+078p/yR/Cf87fxK/vv/QgJKBCoGvgYEBkAEvAF7/+D9c/16/SP+Kv8iANoA9gBIAaQBHwK9AmID8QP8A/4DewOTAoYBvAD9AFMBbwFbATsBFwHLAHUABADF/8D/VABgASECjwLCAtECVAKqAc8A2f9m/wb/O/9z/yz/0v5f/jD+8P26/Y39i/3z/SD+Iv6d/cP8Kvx4+9D6GPrI+Rb6n/pK+6z7U/xU/Xj+CgBuAaMClgMxBGQEoAOZAncB0QCOAPH/4P/k/zgA0ABJAd8BEwI0AtEBPAGKANj/KADIABsCqgPZBKoFfAXcBIwD6gFOABL/0/7Y/mj/NgAkARICyQJJAywD3wI7AtoBmgG3AP7/YP8B/zH/nP9jAG4BEQLyAQgB+f5I/Pv5VPib9+D39PiS+mH8rf1S/sT+4f4U/4L/lf+x/6D/8v7x/ZH8Jfs2+uv5WPrP+879wv+JAagCUAO+Aw8ERwRbBBQEdQPdAjMCgwH4AIUAaABwAKIA/gCDAU8CSwOuBPAFlQZ2Br4FuwSEA2QCVAHTAOQARQHqAd8BXAGUAKX/3v7//W39Pv23/Y3+Sf/W/9j/r/9L/6v+4P0B/Wz8K/xK/F/8g/zx/H79Iv5//q7+7f5E/6//6v8aAE4AZABGAMT/Ev9F/nX9yPwr/Nf7oPuz+zX81fzF/fz+awD2AUUDFgRvBGcEyQPLApEBOQAI/xv+nf2R/Qr+2/7s/zwBcQKZA4gECAVRBW8FbAUKBVsEowMBA3gCxQEIAacAsQDzAFABVAEdARUB5gC8ADQAbP8T/+v+A/8L/wX/Cv8n/1f/LP/j/gz+Af0j/C37zfqw+vn6uvuR/K/9xP7A/2wAyQDvAMUAWwCo//f+U/7i/bD9i/2l/eL9cf5b/zMARQFdAkcD6QMCBOEDYwOTAooBkQDX/zP/7/6z/sv+R/8NADMBIALgAmYDoQNwA7ECqgGFAKf/Hv/v/hj/hf80ANwAWAGYAb8BxQGWAT4BwgBeAPz/lv8j/57+S/4l/jj+gv7m/lv/tP/R/7b/eP8k/9/+sv58/kz+EP7E/W79Af2t/K/8CP1//Qb+gf7w/n7/1P/n/8H/af8z/xz/Ff8q/2D/uf8yAL4AJQF7AcQB/wE9Aj0CBQKpAU8BJwE+AYIB0AFKAtQCZgPZA8kDZgPKAh0CfwHZAF8ANwB9APQAYgGpAbcBtAF0AfkAXwDm/7r/mf9r//D+YP7e/Uz90vxT/DD8lfxi/WL+GP+f/+v/JgA+ABQA9f/F/6b/Yv/r/mP+zv1s/U/9of0n/tH+mf9IAMoA6ADEAHIAHQDA/2f/Iv/P/qz+of6w/sf+zf7t/jX/qf8ZAI4ACwGOASoClQLIAtgCugKdAm0CIALOAZIBdQFmAU8BDQHCAIoAWwBXAGcAnQANAYwB/AE7AkYCIALaAWQBwgAMACr/Mf4z/WT8CPw0/Nr8yv3l/uz/tAAOAeQASABz/5v+3v1K/cv8j/yk/Pf8hv0n/vD+yv+gAD0BcQE5AYQAqv/e/lL+Ov55/hv/5v/AAHsB3gEFAtMBhQEvAcsAhQBIAEoAggDuAGUBpAHVAcsBugGZAU0BGQHWALoArQCkALwAwwDwABQBSwGGAY8BjAFMAfYAhQD1/2r/3v6N/m/+l/7y/lf/xf8WAE0AcABwAGQAWQBMADgA8f+D//j+eP4f/uL91v3g/RH+TP57/qT+ov6r/qX+pv63/rH+yf7e/gr/Rv9w/5z/pP+v/6v/kP91/03/YP+c/wMAgADoAGIBwQEXAlECYgJ9Am0CSgLnAToBjgDZ/2//SP9u/wAAsQCIAScChQK4Ap4CZgLmAUMBngAFAJz/P//x/rT+mf60/uD+Cv81/3X/1/89AHwAhABtAEsAGwC4/yf/lv4w/g/+Bf4C/g3+L/50/rL+3v75/ij/i//z/0QAUQAvAAEAwf+F/zv//f7m/uj+BP8Q/xX/Mv92/+//cgDqAEUBegGQAXMBNQH0AM4A0wDpAPQA5AC0AIEARwAQAPn/FABxAOcAWwG2AfUBJQI2AigC8gGiAUgB3QBWALP/Cv+E/kH+Of5e/q7+Jf/I/3YADAFjAX4BbwEuAbUA8/8V/1D+v/1x/Uv9UP2A/dr9P/6K/rb+1P4M/1L/mv/A/8D/sf+M/1r/Dv++/p7+vv4o/6D/AAA8AFgAawBhAFgAZwCkABkBgwHQAdcBrAFrAQ0BrQBBAPn/3//l////DAAkAEwAhwDCAOEA6QDjAOoA7ADfAMAAoACVAIwAcwAzAN3/lP9j/07/O/86/1b/kv/h/xUALgA6AEgAXABdAEQAFgDr/7//hf8v/8f+b/42/iT+Kf5O/o/+8/5q/8v/DwAkACEAAwDH/3n/G//b/rf+uP7W/v/+QP99/77/7P8JADAAYACuAPMAJwE+ASwBEAHlALoAlACAAIYAmgC7AMsA2wDxAAgBIAEfAQQB4gDHALQAnwCHAHQAcwBmAD4ABADI/7L/vf/Q/+H/9/8NACYAJADr/5//XP85/zb/N/8u/zP/S/9d/1r/Mf/z/tn+5f4B/xz/Kv8+/2v/oP+7/8X/yv/b//f/8P+9/3n/Pf8Z/wr/Bv8h/3P/5v9lANIAFwFOAXABZQEqAbwAMwC7/1r/Df/1/hj/ff8gAMEAQgGlAdoB6QHXAY0BJwHJAHgAQwAmAAsABwAnAE0AdwCTAJ8AtgDHAMAApQBwADIACADd/6j/fv9Y/0n/UP9T/1X/XP9o/3v/jf+H/3L/WP84/x7/CP/3/vr+Ev87/2j/g/+C/3D/WP9C/zf/Mf8z/0L/Wf90/5b/uP/m/ygAaQChAMYAyQC5AJsAdQBMAC8AJAAzAFcAfQClANAA9AATASEBGQEEAeYAvwCPAFwALwAYABwAOABmAJMAuQDMAMMApAB1AD4ADwDr/9L/x//O/+P/BgAoADgANwAfAPj/xP+I/1L/Mv8s/zn/Tv9f/23/f/+P/5T/kv+R/5T/lP+G/2n/TP89/zf/Ov89/0z/bf+Z/8D/1//i/+T/3v/N/7f/qv+w/8j/6v8MACsARwBaAF8AWgBOAEQAQABAAEMAUwBuAJUAxADwABUBLgE8ATgBJQEAAdMAqgCJAHUAZwBeAFwAXABZAFMAQgAtAB0ADwADAPH/1/+3/5f/fv9t/2n/eP+X/8D/5v///wQA9f/Z/7L/hf9V/yn/Av/s/un++P4X/0L/d/+u/+T/EAAwAEEARAA4ABwA8f/E/5//i/+F/47/n/+7/9//AAAeADgAUABeAGAAWQBOAEUARwBSAGEAcQB7AHoAbgBYAD4ALAAoADQASgBoAIQAmQClAKwArACxALwAxwDJALkAlgBfABwA2/+o/47/kv+w/9v/BgAkADAAKAARAPD/zP+r/43/cv9Z/0P/MP8h/xb/E/8Z/yf/P/9c/3r/nf/A/+D/+v8IAA0ADgAMAAIA9P/l/9v/1//V/9D/zP/O/9f/5P/1/w4ANQBlAJcAvQDPAM4AwQCpAIgAYAA7ACEAFQAUABkAIQAvAEUAXABpAGoAYgBcAF0AYwBvAHgAfwB7AGYAQAAMANv/vP+0/8P/4f8CABwAKQAmABcA///u/+f/7P/4/wIAAgDw/8z/mf9k/zf/Hf8f/zr/af+e/9D/8v////z/7P/W/73/p/+Z/5H/jv+N/4v/jf+Q/5f/pf+9/97/CQA1AFsAdQB+AHcAYgBFACcAFAANABMAIwA7AFMAZABuAHMAcwBzAHkAiACaAKkAsACsAJkAegBYADgAHwANAAMA+P/r/9z/z//L/8//3//3/xYALQA4ADIAHQD8/9f/t/+h/5T/j/+U/5r/nv+g/6P/qv+3/8v/6v8MACgAOAA2ACAA/P/P/6X/hP9z/3X/hv+d/7P/xv/T/+H/8f8FABoALAA5AD8AOwAuABoACAD6//f/9//1//L/7v/q/+r/8/8BABYAMABOAGcAeAB+AH0AdwBwAGkAYQBYAE0APwAvABwABgD0/+3/8v8CAB0AQQBkAIMAjwCBAFsAIgDj/6n/fP9c/1H/VP9k/3n/jf+k/7v/1P/v/wIADwAPAAIA6v/J/6T/hf92/3b/iP+m/87/9f8VACkALgAnABgACgABAAAACQAaACsANQA0ACsAGgAIAPn/7v/t//j/BwAbADAARQBZAG8AgACJAIkAegBdADkAEADp/87/wv/C/8v/1//h/+n/9P8DABgAMABIAF8AbgBuAF0APwAbAPX/1v/A/7b/uP/F/9n/7P/2//b/7P/c/87/xv/G/8//3//t//L/5//R/7T/lv+B/3v/gv+S/6P/rP+s/6b/ov+o/7//5/8ZAFEAfQCNAHwATwAYAOj/yP+8/8X/5f8XAEkAcQCBAH8AdwBvAGcAXABPAEUAPAA0ACgAGAAPABIAIwA7AFIAYABhAFYAOQALANj/rf+V/5b/pf+7/9T/5v/z//b/7P/f/9X/0//Y/9v/3P/c/9z/2//a/9r/2//h/+3/9/8BAAMAAAD4/+j/1P/D/7j/uP/G/9j/7/8HABkAIwAcAAkA8f/X/8f/vv+8/8n/1//q//r/BQAOABkAKQA5AEUATABLAEAAMAAfABIAEQAbAC8AQQBQAFEASgA7AC0AJAAjACgAMgA9AEEAPQAuAB8AEQAKAAoADgASABEACAD4/+L/zP+9/7z/x//Z/+z/+f/6//D/2v/D/6//o/+k/67/vv/L/9P/1f/R/8z/y//Q/97/8f8FABQAGgASAAIA6v/W/8r/yP/T/+X/+/8OABgAHAAYABEACgAHAAwAEwAdACYAKQAnAB4AEwAIAP3/9v/0//f/+v/+/wEABQAKAA8AFQAeACoAOQBCAEEANQAgAAYA6//U/8f/yv/Y/+//CAAdAC0APABGAEcARQA7AC0AGgADAO3/1f/F/8D/xv/V/+r/AAAXACQAJwAcAAgA7//U/7r/p/+e/6D/r//F/+D/+v8MABMADwAEAPP/6P/k/+f/8/8BAA4AFwAZABoAHgAkAC8AOAA/ADoAKgAVAPr/5f/Y/9r/6v8CABsAMwBCAEYAQAA0ACUAFgAIAPr/7P/f/9P/y//K/9D/2//u////DwAaAB8AHQAVAAsAAgD3/+//5v/d/9b/0v/S/9f/4f/z/wUAFwAmACwALAAmABYABADy/+L/2P/Y/9//6//4/wIABwAFAPz/7//j/9v/2f/d/+b/8P/6/wEABgAIAAcABgAGAAUABQAFAAIAAAAAAAAAAwANABgAJgAzAD0AQAA9ADQAJwAZAAkA///4//j//P8DAAkAEAAUABUAFQAVABYAFQAVABMADAAGAP3/8v/o/+L/3//g/+T/6f/u//L/8//w/+3/6v/p/+v/7//1//r///////z/9//v/+v/6v/r/+3/8//4//3/AQAEAAgACwAQABQAFgAWABMADQAFAPz/9f/u/+7/8//4////CAAOABIAFQAUABEADQAHAAMA/f/8//r//P8AAAUACQAKAAoACQAFAAMAAwAGAAoADQAPAA0ACQAEAP//+P/2//b/9v/3//r/+v/9////AQAGAAgACwAMAAoABQD+//j/8//x//H/8f/z//X/9//6//z///8AAAEAAgAAAP3/+//6//n/+v/9/wEABgAKAA4ADgAOAA4ADQAKAAkACAAGAAYABwAHAAYACAAIAAoACgAKAAkACAAIAAYAAwAAAP///P/6//v//f8BAAUACgARABQAFAARAAsAAgD3/+7/5//j/+L/6P/w//j/AAAIAAsADgAOAAwACgAEAP7/+//2//P/8f/y//D/8P/x//P/+f/9/wMACAAMAA0ACwAJAAUA///6//f/9P/1//b/+f/9/wAAAgABAP////8AAAIABgAKAA8AEAAQAA0ADAAJAAYAAwABAAAA/v8AAAQADAATABYAHQAcABcAFAAOAA0ACwAIAAkAAwD8//H/5f/d/9n/3f/m//L//P8EAAQABQD9//T/6//j/+X/5v/k/+L/3f/a/9T/y//H/8f/yP/U/+X/9P///wsAEgAWAB4AIAAjACIAIAAfABIACwADAPb/9P/y//H/8P/+/xMAFgAaACoALAAvACEAKgAgAHUA5wDMANQAsgCCAIAAiwCOAIQAhACTAHMAXQBVACkAJwAMAAYAFABSAJAAfgCNAHcA9v+S/07/Av/i/tD+rf5d/gn+1f20/cL95P0S/nb+1/4r/53/EgB4ALgA7gAUASEBRgGLAcwB0gGaAUQBzwA+AKX/LP/T/oP+YP59/qf+vf7n/j7/f//P/0kAxwBDAawBFQJWAkgC4QF5ASEBswBkAC4ABwDk/8D/tv+f/4P/dP+T/83/6v8nAHkAvwDRANMAwgCVAEAA5/+I///+gv4V/qX9Fv1g/ML7Uvsy+zX7Pfsx+/H6hvzM/xgDnQUGCI4KxgvkC7AKIggLBX8CtgD4/qn8oPrn+Rn6HfpT+v765/ta/Wn/WQE/AgEDmgMXBDkE3AN0A9QCJAJcAYkAOf/A/QD9y/yq/OP8qv3a/iUAdQFAAoYCbwLRATIBawC//yH/4/7M/or+BP4M/dX7Nfo4+Gz2KPXD9B71PPdf/K0BvAYjC+cOfBBID08NgglVBWkBqf5M/PH5Y/iN98L3+PfF+N76QP2P/8oBoANTBP8D5gMpA2cC9wE0ApUCfAJSAloBWwCg/hT9O/zT+4b8H/7YACkD+gRFBnEGwwU9BMMCVwFyAD4AbwDvAP8ApgC//0j+o/w1+8b6Lftj/Cr+BwCLASEC/wHtAEf/c/3m+wj7/vkL+WP4S/h++Eb4XfseAF0E6AfSCnkNxgwFC/oHHARKAH79S/zB+qz5NvmW+h386Pwu/pf/lgBIASACsQJQAl4C0AI2A5MDfwPaA9ADaAM/AsEAK/9p/Yr8Pvxw/Ar9Vf7j//kAoQGbAW8BYwFfAdIBYQLFAtUC0gJcAowACv6l+9j5h/jl92z4qPmA+4T9GP87/1r+Wv2Q/G37bPxTASgGwgkCDD0NxwsVCOoD+v7s+rT4VPh9+Mf4BfkA+pv7A/2v/gsBlgPOBZMHrQcqBqQD4wAU/tD7Fvu5+4H9kv+qAVMD7QOqA+8CEwJAAU4BOwIZA7YDHQToA+ICdAHc/1/+of0Y/iD/sP/P/93/DP9s/S38pPu2+9n8Of+2AawDtgTFBE4DgABi/Qb6Dvcf9Q/0h/TM9FH3z/23A9gH+gk7DJ0MFgsVCfAF6QJ9AOr+pPzn+f32sPVp9sb3l/ls/Ln/4AKZBQ8H5gZGBh8GGQVLBLADOQOLAqgBgwB6/s78mvtS+9376Pyk/s8AnQJsA3wD/QKfAT8BpQHMAZsCIgQQBWcEUANZAUL+Dvuc+BD3TfYj90n5H/za/tEA+AFDAfv+H/yZ+e33vPqXAGwFNgmSC6gMxQlNBT4Alfs3+fL4XvrX+yP9AP4x//L/k//w/iD/aP/4/2UADQAP/7n+7v50/iP/wwApA3QF5wZ5B4wGzwSHAjwAbf5G/bf9U//7AEcCHAP3Av8BLQFbAGv/ff/JAJ8B+AHgAe4AFP8H/b37zPrW+k78F//yAf8DAAW4BOwCg/+O+3b3XvTU8jzzuPOY9/b+7gQqCWEL2Qx3C00J6gb1A5MBKAAk/+f8lvr295T2nvZ493P5TPzs/oMBygMRBcUEtgQ5BZ0EWASPBPAEQwQgA3UBV/8k/VD7AfuR+0P9lf8oAqYDcgPrArABq/9//uv+ZwChAowFnweCB18G2QMgAC38/Pj29i72Ovd9+XH8yP6aAHYBLwGU/x/9QPpS+Er3pfn2/zEF9AiZCvALtgmMBe0BRf4y/Lr7Mf1l/cv8APy3++j7H/y6/EL+ZABBAo8DogNFAhkBEQCg/hn+C/90AWwDOgWxBXAEmAKAANn+Zv0e/Un+bgC5AnUE+gQiBJgC6QBk/37+b/5Z/30AVgHJAYABTwC5/kn9MPwO/Bv9+f4AAZUCTgPHAokAKf2++LL0jvJX8n/zHfl2AR4IzAyRD5IQpA1WCYcEEwAr/er7IPt4+RL49/bJ9jH3jPgX+97+fgJuBWgHcQfzBU0EtwKtAD7/d/99AAUBiwGlAd0Ayv/1/o/+rf50/wQB5QLpA8YDCQM5AnMAzP52/i/+2P6FAGcCSAOAA3EDwgEl/9n85fpw+WP5qPrz/If/igFcAuUBDADy/FX5ivYP9cf39v1qA9gHFQsWDXQLwgeOA1T/Lv0N/Xz95fxU/NX7Vfsd+z77+fve/YMAgQKXA0ADzAHeAIEAUP+E/qT/uAEaA/8DDwS8AlkBOgBc/6X+8f4eAPoBhQPvA7sDzAJkAT8Auf/z/qv+Yv9HAFIAhAC6ABkANP9I/oP9oPwi/Wn+j//BALsB+gFzAIT9+Pl39j/0NPMw9G75/P+8Bd4KbA5RD3oNsQq5Bl8CaP9w/VD7avlH+Gz3SPfz93P5yPvV/m0BhgOnBIQE9gOGA58C5QB7AB4BtQEZAmgCzAHIADAAiv+s/nH+ff/7AIkCFwPEAlcC5AH8AEoA0v9g/8z/+wC1AYYBrAGVAUwAHv4Q/Ef6ifkg+qD78v1gAIUCRwOSAmMAI/2m+eL2SvWy9kf7/f+lBKMIJgsfC0wJwwarA4kBnwDg/8n+mv0Q/EX65PiR+Bn5v/o//fH//gEOAy4DvQJfAjwBtgCGAfcC9QP2BJsFlQQ+A14BQP9Q/Xf81Pzu/bj/VAGVAjADiALIAWABiQAYAIsAdwGKAYQBegFSAJT+8/x9+1v6b/qf+2j9Wf9FAfwBQgE3/zL81fgU9tH0FPYU+2oASwXsCeIMUA1OC4sI1wTEARwAwf4U/Tv7/vnA+Lj3kveV+BH7eP7iAUkEvAXRBd8EtwOMASb/Df5x/i//cAACAr0COQNsA8sCfAGIAGgArgARASEBBAEDAeQAgwB4AF0AAQBSAAoBTwEGARQBIwEsAJb+2Pwb+0b6ZvqU+2X9V/85AcgBTgEp/xT8Q/kw9232a/gP/WgBugU9CUcL5goLCYsGeAOJAW4AxP/M/kf9Qvtx+Sr40veU+Hf6KP0QAK8CUATBBCIEMAM9Am0BKQGnARICmQJNA04DigJyAVAAoP+H/6z/UQAJAXABtAGOAa8Avf/x/zkAAgCMACoBcwF1AWcBxwBW//39+vwT/H37//sa/ZL+AADGAKMANf/V/N75JPeF9aD17fjA/T8CvwZ3Ck4M0AvbCRwHagRaAmwASv5T/FD6pPjQ90z3sPeq+Zf8zP/aAtAEhQV2BccEWwORAfj/O/9q/zkAcAFFAp0C1QKJAoYBeQC5/4n/HQD7AK0BRwJoAusBagHaAOv/X/+a/w8AuQCWASkC2wGXAMD+tPwr+176ffr1+w/+EAA1AQIBOP84/GL5P/cJ9vb2yfqE/9cDeQefCfoJ+ghEB1EF1gPrAvcBIAGn/9/8FfrL90r2IPab9xP6Rv2tAHkDJQV2Bf0EBQQBA+sBgwGAAfcB/wKmA6UD7QKxAWcAbv+q/oj+IP8PAAcB8QH4AYcBMwHxAI4AXgByAHoATwHlAbUBzgBr/8L9E/zv+pT6Uvv0/Pn+eQD7AC8AOf5/+/r4PveO9of4zvxBARwFCAieCYUJXAhZBiMEswKiAewADQA7/tb7Kfra+AL4WfjC+eD7fv7rAFcCHQNOA8wCHAJ1AfMALgEDAkIDKAS6BMwECASwAhoBqf9H/tL9Tv5D/zEAuwDiAN4AuQB+AGgAsAAqAQ8CJQOPA/sChQGh/5D94fvD+oP6ZftB/S//TACMAJz/dv0K+6D4Bvek9pH4Q/z6/18DEAYdCIQIXQjvB78GXwVNBFQDrgEl/zX8yfkc+C73ZffL+Fz6wvyp/6gBmQItA2EDGwPtAqkCoAI2AyYE9ASKBQAFlAMxApoAv/5b/d38JP1I/lD/4f9XAMIA3QD1ABcBHwGkAYMCMgM5A3wCJgGR/9P9FPwC+8f6jfsv/b7+pf+x/wr/hP2E+2b53Pf197T56fxSAC4DowWrB2sIzQfNBqQFVgSPA78CCAHh/rT89Pq1+c74efhf+TL7G/3g/mMATwG7AdQBrgGFAa4BYALAAxcF5AVlBi0GJQVhA1IBR//M/Sz9Xv0c/rH+MP+8/yQA0v+A/+L/ewBbAbEC3AM+BOIDvwIIAfD+Jf0d/Oz7fPxe/Yj+VP8t//T9D/zT+cr3AvdR91z5gvzg//sCggUxB3wHawclB3QGUAWSBI8DwwG+/6j9wvtQ+o75WvnJ+eL6PPyT/ej+xv9QAJQAyQAkAbsBsgIbBHwFnwZJByQHDQZCBDQCCgBd/l39Nf1e/Zz97/2E/un+jf52/gz/1f/cAEQCfgMrBHwEQgT0AvwAIf/n/Sr96/wu/cT9H/7Q/d/8Rvth+dv3YvfR98T55fwEALwCRwUIB1MHNwfNBgQGMwWeBK0DRAK+APv+Zv0J/M367fm7+Sb69frd+7b8i/1B/sj+YP8wADIB0gLiBK4G3AeRCKwI2QcZBrEDZgGM/yz+ZP0J/cD8ufw9/XT9Gv0Z/dD91f4YAJcB/gILBKoEuwTxA1sCowBo/6b+OP4T/h/+F/7E/bb8/voO+Zb3BfdB99P4Yvsj/q8AggOkBU4GxgYGB5kG4QVjBVcEBQPfAVEAxP6m/YH8bfvx+r/6xPo7+977cvws/bH9Mv4//3MAnwFLAzMFjQZyBxoI4gfBBlcFwwMkArsAp/+e/r79NP3s/JL8Gvz/+4/8iv2U/s3/HgE8AgEDgwNmA7oCBAKjAS8BqgBiACYAtf8c/x3+ifzb+nn5b/i69/z3LvkG+/L8SP+OAQoDJAQ0Bb8FdgVDBd4EJARiA7wC5gHyAOj/wP6x/dH8Afxm+/z60fri+hj7fvtQ/MT9oP/PAeQDqwXmBrAH3Ac/BxYG3QSvA4wCpwG6AKz/4f5E/jf9Ify4++P7Tvwa/Qz++f4HAAYBjQGmAYQBZAGjAQMCEAIZAnECjwIpAkkB1//w/Uz87fqN+Vv49Peu+Cv6zfty/S//rQDvAfkCRgO9AoUCoQKBAk4CdwJ4AlwCWQLrAQYBBgAG//f9Kf0//Gz7RfvI+3L8jv0T/4sA/gGFA5AE2gTbBKYEOQSXA/cCSwK/AU4BAgGHAKz/0f5O/gD+uf2z/dj9Of7P/nv/4f///ycAfADVABkBdgHmAV4CxwLiAmMCgAFeAAr/jP0Z/OD6Jfrq+S36Avsa/B/9Jv5J//j/JgAfAB4ADQBFAMkAVwHnAZkCIgM0AwwDgwKXAYMAk/99/n/9//wK/Xf9Ov4S/9b/uQCAAf4BMgJCAisCMwJLAjkCDwLxAdoBtgFvAdIAKQC//33/Nv8D//j+Mf+e////KgA6AEMAWgCQALkA3QA2AcYBNAJQAgwCeAGpALT/jf5O/TT8i/ti+6j7Hvym/Gb9Sf7C/qv+gv43/t392f1c/sj+g//BAPkBnwIBAxUDigLRAQ8BNQA1/8X+wf7r/iv/sP80AJwAAwFOAXkBeQFxAWsBbgEnAfoA+gDpAMAA2QDWAIcAYgBjAEAAGgArAEYAfADEAO4A3gDBAJUAcgBnAHUAkADWAE0BqQHhAdUBfwHjAD0Aav92/p399/yW/JP80PwW/WX9sf3v/Qz+7P19/Tr9XP23/Uj+Of9OAEkBOgLnAgYDkwL9AVQBkQDC/0L/CP8U/4j/EgB9AMgAFQEvAScB8ACvAJwAnwCgAKsAtQCZAKQAsQCPAGEAYwBqAIkA0AD5ACEBZQGjAZsBgwFBAd8AjwBtAE0AOwBwANEAOgGJAaYBeAEwAcQAGQBR/6H+Df66/bj9vP29/fP9JP79/Z39B/1g/BP8Nfx2/A79Ff4u/0kASQHGAcsBrwFkAd0AUADc/5P/qP8NAHsA2QA2AXcBkwGnAXsBDQHHALkAewA3ABwA9P/f/wsAKgALABEARwBwAJoA2QAMAUYBpAHcAdEBpwFyASkB/gDmAMUA2wAtAYYBywEGAu4BlAExAacA4/8k/6L+NP4N/in+Rv5R/nr+mP5r/hf+jP3q/Gj8TPxu/Mv8U/0g/hL/0f8/AGcAYAANAMz/k/9b/zf/jv8nALMAMgGgAekB9wEFAt8BlgFGAScB8ACaADIA4f+n/27/TP9A/1X/dv+8/wQARACSAAwBhwHhASACRQI5Ag0C2gGmAXoBdQGcAdkBDwIsAiQC2gFcAbQA6v8U/3v+Mf4z/nj+2/4p/2r/gf9B/6L+3v0w/bz8fPxy/Jn87PyQ/VD+xP7f/gn/DP/b/qr+ef4z/ln++P6G/wwAsQBSAcYBNQJgAlcCSQJDAhQCvQFKAcMAZwApAOD/fv9O/zL/LP9L/2L/Z/+a/xwAfQDRAA4BNAFSAY0BvwHNAQACUQKtAvsCOQMqA+4CmAIYAlUBfgC+/xz/tf6C/n3+mP7j/iz/Tv9O/yT/yv5a/u/9iv1H/Uf9aP2j/cz9Bf41/lT+L/4R/gj+1P3B/fT9YP6n/lP/GgDGAFsBDwJ/Ao8CsQK5Ap0CQALlAVsB4ABqAO//eP8U/9H+r/7I/tn++v5O/8b/CgBGAIcAlACRAMkAEQE7AawBYALsAkkDrgPNA44DTgPxAjECWAHAADYAqP9K/xL/6P7a/uH+zf6T/kr+Af7e/cv9sv2k/dD9CP4v/kb+S/77/ZT9gv2S/Vj9F/1K/YH9rP0Q/qX+7f54/4AAcAHwAXoCGQNVA18DQwPdAjoCzAFjAdQANACl/y3/1/6V/l3+X/6m/vv+Uf+g/7z/vv/0/zkAYgDKAJQBZQIhA+cDXARXBB4E2ANBA3oCzQFOAdcAgABCAP3/oP9U/y//7P6M/j/+I/4C/vL9+/0U/jz+Y/5+/mL+J/7F/XH9Cf2U/Gf8kfy6/MD8QP2//Rn+lf54/xsAvQDOAb4CMgN2A9sDsQNMA94CgwLbAUoBygA4AIH/zv5R/ub9r/2a/er9NP55/rv+Hv9Q/3L/3P9uACMBDQItAwQEqAQtBU4F3gQoBHQDjgK8AT0B5gBvABcA5P94/+r+Xv7f/Vv9Jv0v/Wf9tP0a/pr+FP9G/x//wP4p/or9Bf15/AH8Hvyl/P38Tv3m/Vb+qv5a/zYA0wCuAcICkAPuAy8EBwSQAxgDkALeAScBmwDx/2H/rf7//Vr9B/3a/P78WP2n/Q7+qf5b/8P/VgARAeQBpgKZA2UE1gQXBToFAgVRBI8DtQLmATABxwBhAAMAt/9t//z+aP7i/WX9Lf0s/XH91v13/hn/of/u/+H/ev+5/uL9+vxB/HP7K/un+2f8pfwd/RD+m/7w/sr/7ACOAZ8CCwTkBN4E3wSwBPcD/gIkAkQBUACH/7/+/f0u/Xz8+vvj++v7Jfyr/Gr9Gf71/gkA5gCvAcwC+QOrBEYF9gVMBhQGpAX0BOoDywLRAekAGAB+/xL/qv49/tT9Z/0I/dv81vzt/Ef98v2q/l//FQB4AFoAyf8N/xT+Cf34+4X7//vV/GT9+P3p/l3/jP///7oAHgETAqIDxgQeBU4FTwV5BEwD8wGjAEb/Sv5a/Zj85ftW+wr78foL+337k/yN/Yv+w/8WAfcB4QLlA6EEQAX4BXQGdAZUBsYF3gSuA1cC1wCn/+b+RP7q/c/9vf2j/Y/9UP0J/Qv9K/14/S/+Cv/C/5gAegGIAQsBMgAI/5L9gvxz+8H6jfvw/Jz98P0O/5L/wv9FAAUBiAHPAncEYgXRBcUFLgUnBBMDRAGG/2T+dv1c/Kb7DPt1+kn6g/ri+sv7N/2H/vH/fQG1AnEDVAQmBcIFNwazBuYGzQZbBmgF/gMrAk4Ap/5j/Yf8P/xR/Ij84/xX/ZP9hv2l/fj9WP7h/rv/qwBoARQCRgLZAcYAYf/K/UL86/og+t/6O/wo/cb9M/8jAEEArgCMAUkCTQMYBSgGhwY/BnsFAwRsAooAxP6l/Zj8mvu/+iP6WPky+W756vkh+zD9K//SAI0CygOGBCQFuAUKBoEG9QYRB+4GcgZBBZkDxgHK//n9q/zB+2b7ufsf/H38BP1s/VP9nv0t/pP+PP+JAJsBKgLCAgIDagJgARYAav62/Hn7d/rn+cz6LPwp/bX91P6L/0IANAE8AkwD4QSIBvEG8wYzBvUEOQPTAfT/Uv4c/fn73Pr8+TX5lvjx+Jv5xfqu/B3/2gCCAv4D8QR1BQ4GbwaaBgkHNAfoBicG/gQrA0wBbf+Q/R/8Sfve+gn7ofv9+1j8Gf23/Q/+p/51/y8ACwHkAWgCvgLhAmgChgFYAK7+7Px9+xn6Cfni+bT7Bv3j/WT/ngBdAVUCFgPSAzEFuQYCB9cGMwa9BAgDegF0/4j9Uvwk+/r5Gfkt+MX3h/iA+Zr60Px0/2gBPwPxBLYFEga1BgUH/wYSB/AGewatBUIEYwLVADL/b/1v/MT7Ivst++H74Pvz+938pP3//ar+hv8LAMUAdAHbAQkCOgLlAV8BSgCs/v38lfsK+uX45fnI+1D9QP4DAD8BDALYApgDMwSIBU8HrwdwB3UGIQVFA4MBHf9C/RH83Pq1+bz4x/cq9+f30Pgj+mX8e//lARIEvgWYBuYGXAexB40HhwdkBw4H/gVQBBECIAA3/oH8W/vF+nT6t/pp+6n73vuW/Kn9e/5c/z4AGwG1ARQCJQLtAaQBMgGeAJ7/WP67/Db7x/mQ+AL5Efv1/P392//CAa4CVQMxBMgE8wWqBwYIaAd5BhgFHANVASD/HP3i+9T6bflE+FT3vvZi96n4QvqB/H3//AEaBOYF+QZiB9cHPwgoCPwHswf6BqkF/AP0AfH/N/6S/F37xvqc+r/6R/uz+7n7Pvxl/Zn+gf9sADcBvgEDAvgBkAE0AQwBpQDE/3H+5/wU+5z5M/g8+Cr6rfwN/oT/hwG3AncDOAQeBScGAwjNCGMIIgeABW0DhAF0/1r9E/wT+/35rPh+94/28/Yf+ML54/vE/ogB8APOBdQGagfvB4wIrwiwCE0IlgdDBlkE4AGE/6L9HPwv+876uvq0+gP7KftP+9P77fwo/nb/lQBhAfMBJQIgAr4BYwHnAIMAov9d/sz8Cftl+Rv4r/jM+uf8B/7W/+0BDQOlA0sEFQVbBiUIyghLCPMGNwVSA0MBtv6f/Hr7ePpU+Rz44vZb9h73ZPgQ+oD8d/8rAnsECQbgBncHPQj3CDoJ+giECMsHUAYMBHsBEv8f/fX7PfuU+jn62Pp5+137Mvvk+wL9M/6F/6EAcAEHAkwCBwJ0AegArABhAHz/DP56/Nn6Pvnv91L4ZfqN/Mz9jv+5ASwDHwT+BAAGUQfZCD8Jrgg5B0gFTQNcAev+xvxj+xn6q/ho91f2//Xo9lr4YPri/J7/5gEsBLsFowaNB4gIYQnDCbsJ5AjZBxMGtAMeAev+Fv3Y+0v74/rJ+uD6DPvo+iz76vsm/af+PQBnAQUCOwIIAssBQQH4ALUAawBj/+39I/ww+nf4Gffv91D6qvwA/ioAUAKRA2wECwXcBVgHKwmKCSUJmQeYBX8DPwE8/v/7z/qR+W/4SPcy9sT16/Zk+Fn6xvyA/ywCsgQ2BgIH2QeTCEcJzwm/CdIIsgcKBr0DDwGc/sL8t/su+876mfq3+un69foi+9b7E/15/uv/BAG2AfEBCgL4AZUBQQEPAYgAJf9r/Y374PlQ+FD3kPg0+xz9If4HAN8B+gILBCgFMga/B14JqgnnCN0GiATLAg8Bbv5z/Cv7rPko+Nf2qfV19fn27PhV+/39ngDcAvUEHAbABqwHrAiqCS4K+AnsCJoHrwVuA+EAhf7F/P/7rfsz+7P6lfrt+iD7g/tF/IT9zv4pAC8BigGOAa4ByQF2AQsBmADO/03+ffzC+kv5HPiM+MP65/z3/Tz/LQGSAp8DhAR6BbIGPwj/CLQINgfVBPECfwFj///8svt2+uz4avcJ9kL19/XD9wb67/yn/+IB5QM5BZ0FTAZ2B4QIgAkUCqsJjwjoBogEIgLf/979pfxN/NP7SfsF++36Dvtw+x38Ff1Q/lH/VgALASQB8gA1AU4BIAHuAJ4Auf8v/lz8rfqE+Xb46/g/+5b9Zf6S/2IBkgJsA5kE5wVMB9QIQgmcCNoGfASHAi8BQv9M/Qn8kvq4+Pv2hPXq9Nr12veJ+qr9VQB5AmkEfQXkBbQG1QfACKQJIwqgCUkIeQYmBNYBkP+0/YH87vtM+8v6i/pq+qv6VftL/Fz9tf7F/9cAfgGWAU8BZwFpAU4BIAGXAHX/1/02/If6I/kO+DX55vv4/W/+Wf8LAWwCZQM7BL8FiwclCUoJTwhIBhYEfQIKAQv/Bv2w+yD6V/hw9gr15vRJ9lz4/frs/XEAkAIoBDYF+QUKBwYIGQkIChsKYwkDCA0GzgPLAaf/2/2v/Lf7Efuk+lT6O/rM+qP7j/y6/dH+0f+VABYBNwFlAcUB8QETAucBPQH0/z7+cfyO+iD59veM+N/67PyG/TP+vv8BAWUC8gO5BZYHiAnzCS0JXQcABTgD0gEjADP+s/yd+nz4hfbO9DL0gfXZ95H6cv3h/x8CBQQZBeQFPgdpCH0JbAp8Co8JHQhoBl0EWQIkAFX+B/3f+/L6PPqP+YD5TfpT+1X8sv3q/qH/YQDWAP0AUAEDAowCtwJfAnMBv//I/cf7E/q2+HH4QfqB/GD9UP1c/q///wBWAhoEIwZ0CLUJRgnWB2sFpAOmAnoBWP/7/YD8LfqT90v11fMc9B32jvhj+9b9CAAPArgDogTKBXEH5whHCvUKdAozCcIH9AX0AwMCDwDC/p/9SPwg+2H61fnS+bX6f/tm/If9hv4u/8z/QwC9AJYBOQJ2AnsCLAIvAZz/vf3V+3L6a/l4+Sf71fwZ/U79uv4NAFkB8ALfBMAGhAgxCZwIKQcjBfADYgMbAv7/bv5p/Lb5C/f49OnzrvS39i/54Pvt/bH/pwFEAzYE0QW5BzQJVAqaCqMJQAjiBl4F5QMnAkEA5v6i/fj7i/ql+X35APrd+pH7jvyj/Wr+TP8BAGwA/gAGAo4CqQJ0At4B2QA7/2X9h/s0+gX5Y/k/+6783/yF/QP/MQCEARYD/wTJBm0IxggsCL0G+ATpAzADqAHH/1z+M/yW+Un3pvXv9M31nffH+RT82/19/2sBDwNnBCIG3QcBCfQJKApjCT4IDQeyBTQEZAJwAAb/hP3/+9P6MPro+Tb6xvo++wf86fy9/Yf+Tv/b/7oAtAFSApcCjwIiAi8B1v8C/lH8HvsC+u35hfvm/P38g/3w/jsAWAG7AkgEAQawB0MI8wfVBioF9AM7A7IBw/9E/kX86fny9172fPU+9un35PkE/LH9X/9jAfYCLgTiBWEHdwiWCeUJCgkXCCYH/gWtBNkC/gCO/xv+WfwH+y368/lo+vD6O/vs+8X8Kf28/Vn+IP8yAHoBHAI6AhoCnAGEAAH/XP0M/CX7tvrc+2X9vf1b/Rz+Pv9qAM8BdQMNBbUGyQd1B1wGkwSTA1wD6gJEAb//Df6n+yX5IvcY9jz2ufd2+Xf71/wA/qb/aAGOAhUEPgbWB+8IdgkhCSwIUQdnBqgFiASxAvoAqP/R/fr7D/u6+uv6ePvS+9r7NPyN/Aj9wv1n/jT/gQCfAcoB3wHoAZoBzwCP/wn+wfy0+5r69fp9/Fr9K/3B/QL/8v8VAaQCYgT4BW0HvAccB7AFXQS2AyoDzAEnAL7+pfwz+uz3afYC9vr2o/h6+hP8a/0p/+AAJwKiA60FaQfBCKMJfQm2COEHGgcbBpcEnQIlAef/GP5a/Ff7rfqB+u36//rb+kL79fug/Hz9I/7p/h0AEAFgAcsBCAKZAc4A2P9//jL9Cfz/+sf7Xv3f/bz9oP6f/1gAdQHCAgMEoQXkBtcGTQbFBKADaAPFAhgByv95/kn8PPpy+Bn3DfdV+I35EvtI/Gj9PP/xABoCxQPcBScHPggLCdMIJgjKByMHIgawBNUCTwHg/0H+5fz/+yf75/oe+//62PpK++H7evxN/fT9rP6f/4IANQHCAd4BmgH6AOz/W/4P/dv7efug/N/9B/7z/bL+e/9WAD8BvgJRBPsF8Ab1Bu8FVwS6A24DdgLVALb/If4K/Lj51Pfu9k33jPj6+Xz7lvwI/rT/9AAbAu4DzAU6B1UIvAhlCNcHZwfpBhoGhQS3AkYBpP/w/an8nvvw+vf6Ivvy+uL6JfuK+1f8NP3p/eH+2v+PAP8AQQFOARABggBu/0H+Lf3w+zz8BP7y/nr+g/6C/1cAEgErAnwD+QQTBjUG4wWLBEMDEQP0AqMBEwD4/iX91Prx+LT3g/d2+I/58foq/O38Iv7T/wEBSQJvBDkGPwcICCwI1weWB0gHzAYKBlIEdQIhAY//9P3t/D38qvt7+zn72/rO+vf6Z/t8/Gz9AP7b/q7/OQDDAEkBhwE7AYIAiP+o/nn9gPzM/cj/AwBa/3v/JADUAJUBeQK0AxgFuwWlBeEEGwMmAp4CVgLhAI//Hv4H/Of5G/gy9833AflI+tX78vyn/Q//egB3ARsDFwV6BlMHyAeZBz4H4QZRBtEFBwVGA5kBPACQ/kn9tPwu/KT7dvtr+1D7N/tG+9X73fx8/QX+4f6a/x8A0gAvASQBuwAGADv/X/48/Xr8v/2D/8L/Lf+W/18ASgEUAtwC3AMVBeQFzgULBZID+wIEA1ICvwCf/x7+GPxc+vP4K/iE+LH51Pru+5z8iP0Q/00ALAHbApsElQVhBvIG9wbQBrQGhQYJBqAEygJwASkAt/7q/U39nPxf/CL8n/s4+zn7dfsk/Nr8Yf0W/uP+gP8HAHwAxQDrAIsA4v8X/1L+VP2Q/Vj/aQD+/9H/bAD3AIgBXwJoA0QEQwWKBUEFLQQDA88C1gLaAYsARv9S/TP7fPmW+Gn4SflU+n77e/wa/Q/+nP/2AFACGQReBeoFYQbRBrgGkgZxBjoGlAUvBDkCZADO/qD9Gv26/C/85Pu9+0P7xPqI+sH6hPt2/Ef9JP60/vv+Rf+3/wcA+/+a/wj/Xv5d/aH89P3y/z4Az/8KAMsAWgFLAmkDQwQ+BdkFlgXKBGEDyAIQA8ECqQFWALX+Ofz2+an4Tvjy+Cn6MPtp/GP9Gv4g/3YA4wGnA7MF1gYoBz0HRwcvBzAH/waoBtMFFQT8ASsAtv7J/XH9/Pxx/BP81vtc+/v65fqV+8r8eP3g/VH+q/7r/mb/8P8YAMz/HP8X/if9U/xb/Ob9Tf+P/0r/av8IAMsA3gH+AkAERwV/BQsFBgTyAqQCvwJAAi0BzP/F/UD7WPlW+Az4v/jt+fz6FvwR/fD9Gf+bAGICfARTBhwHTQeDB5sHiwdvBycHwgaZBXYDUQGj/2j+x/18/e78YPwG/KD7D/vE+gH72vsZ/db9Kv5n/q7+Lf/n/1QAegA1AGT/O/4f/R387fuJ/fX+D//u/oL/DgCpAKQB6QJCBGgFvAVGBY8EoQMwAwIDYQJOARwACP5z+5f5l/hd+Av5APrg+vD72vzF/T3/zgBOAlwEEwaxBvwGXQd0B4sHvAe+B1AH/wX3A+oBKADa/m7+MP5k/a38Pfx/+6/6kvoK+/f7Af2e/fb9VP6y/j7/BwCUANEAcwBy/z3+Sf2S/An9uv6G/xr/AP97/93/dwCHAe4CYAQ0BfsEQgQ1A1oCcAKeAuoBzQBX/+z8hvr/+Fj4j/hd+Tv6Kfv5+3n8d/0Y/8MAtwLRBOIFHAZ+BuoGLwdhB2gHTAevBhQFBwMvAbT/4P6N/u/9DP19/Pb7O/vB+sH6Jfvz+8D8R/3A/Uj+zP5v/wsAhgCgAA8ACP8y/qT9f/3C/vr/zf9m/9f/gAADAcgB1gIiBFgFnQUYBUAEJwPLAgIDmwKeAWwAcv4d/ED6JPnR+Cf5tflV+lz7UfwG/S7+kv83AVoD2wRmBb0FcAYqB5IHrAeQBz4HbQaYBIIC/QDR/0L/0/7C/Yb81Pt2+/L6ivqo+kP7QPy0/NT8Vv3L/V7+Zf85AGgALACR/7D+Av53/Tb9gf4DAOD/aP/D/yMAhAB1AbcCAQQKBR8FiAT1AwwDhgLLAoUCigFpAHn+B/w/+lj5Fvmd+Tf6gPpJ+zD8yfzn/Wf/DAEzA+cEagWRBUgG/wZQB6cHsQdLB2oGfwRtAv0A6P9o/xb/Gf7E/N77L/tn+i/6mfoy+/77XPxj/Nn8gf1X/q3/0QA3AQQBUgBB/1L+tf3r/Z//DgG2APj/5f/a/xYADwFtAswD3ATQBPYDLwNrAhQCgQKJAuQB4gDq/mz8qvrz+d75cfo6+437EPzG/CX98v1P//YA5gI6BIUEvwSHBUMGgwb2BkgHAAcuBnwEmgI7AWoAMQDB/6D+m/3G/M/7zfp2+sb6Ivuz+wz8E/xS/Mr8rf38/hEAuQCWAM7/3v5C/g3+N/5y//oA+wBAAB0AMwBPABABhgLFA3oEXQSNA98CUgLQAfgBDQJ4AV8AnP6H/PT6Qfos+rn6f/vK+/37qfxb/Tf+X//YAIsCrgMQBIAESgXiBTEGwAZPB/EGygVBBJoCXgHdAL4AIgAE/xv+Cv3P+wL7r/q3+v/6ifvk+737xPtg/HH9zP7x/6sAqAAKAHD///6Q/oL+s/9hAZcBngAaAEEAZgDhADUCbgMWBC0EsAPmAiMCsAHoAT0CEwIbAWP/fP3r+xH75/po+z78r/yd/Nr8Q/3R/c3+RAD0AfgCQAO5A4IEPwXcBWQG7QbdBhQG9gSfA1YCkwFGAcIAzP/f/sH9Tfxn+/L6mvq0+ub6/foh+1f70vuj/Ov9Nf8YAHIABABV/73+JP4U/mn/TQGUAXIABABRAFcAjQDHARADqAO0AyoDUgKCAR8BkgELAtgB9ABr/9H9Yfxz+0z73vuj/Oz8x/zF/Bv92/3S/v//eQFmArwCSQMZBMwEYAUVBvUGFgcfBgkFFgQtA3sCHQKYAaUAlv9F/r387vt/+xz7Tft5+zL72/oA+6L7gvzQ/T3/KwBrAAsAlP8O/4P+Nv6//qwA9gEtASgAPgBrAGUAUgGhAkMDuQOWA58CsgEpARwBaQGIASIBEACE/u/8p/tQ+9L7tfwf/az8hPw5/dr9PP4t/6oA0QFnAukCWQPiA8sErgWFBuEGMwZeBa8EyQP0AmYCsAHcADkAPf+e/Un8mftA+2L7mPtj+yf7MfuS+0n8WP27/rr/QwBhAAAAG/8u/u79T/62/2gBbAFIANP/BwBnAOgAkAFQAlkDwwOvAlYB1ADEABwBoQGaAeIAf/8T/uX8Rvxk/Pb8a/0q/af8Jv3e/er9V/6d//IAugEkAmMC4wL2AwUFqwUsBgQGWQXdBDwEewMRA5QCzwEDAQAAkv5E/Zf8G/zW+9r7ovsw+9r6+/qn+8L8Hv49/9f/EgDf/27/5v6b/rr+Y//zAPUBSQFCACQAeQCkAAgB5AHDAksDngJKAdwA2QC0APcAYAFGAaEAZf/T/cb85fxt/dX9yP0j/fT8lP0D/jv+4P70/xQBvAHUAegBmgKxA6kEawXABXUFAQWGBOoDcAMaA44CtwHMAMD/e/5x/f78ofxT/Pj7Y/v7+s76D/vk+yf9Uv74/mD/s/9y/+3+gv6F/uD+I/8YADAB9AABAJ//uP/a/1MAeQFzArcCEgIKAcwA4ADFAGABNgI7Ao4BbgAp/1P+O/5r/qD+qf4E/k39c/2+/db9Vf5Y/0EAsgDiACEB1wHdAqADWQQWBUAFDQXHBEQErwNeA/kCQgKmAcYAW/9V/tv9Ov2G/BD8rvtg+y77QvvY++f84v1q/s/+Df/T/o/+Tv5Y/q7+9/4JACsB3QC8/0H/jv/B/yUAWwFBAmQCywEEAfAA9gAbAd8BjAKKAugB6wDj//z+vP74/j//EP8B/iz9P/1u/Yn9/f3k/rH/+v8bAGMAHAEjAhYD+QPABAIFygRnBA0E1gONA/8CcALyAREB0f/a/lv+s/3d/Dv8x/uK+3T7rPtZ/Cz90/1k/s7+5f6//rb+rf6U/nH+WP46/38AUADw/nj+B/9B/2f/WwA8AZABgAECAb4A5ABvAU0CHgMiA6YCOAKRAW8A0//1/wgAsv+z/oj9GP1b/ZP97f1b/pv+qf4E/4f/+f/FAAACKAMSBGsEMAQQBCQEHQTNA3YDSAP9AjICHgESAHb/6P7r/Rz9zvyV/GP8VfyD/PP8h/0d/kH+Uv5y/mL+Uf4v/un9xP02/kn/mf+V/vf9ZP7m/u3+P/85APwANQEVAewAKgGwAXoCgQPiA4EDGgO3AgsCOwHoANYAUQBa/2T+5P3q/eP9nv2V/fX9Tf5m/n7+1P6c/80A2wFwAgIDfwOpA9wD0ANhAz0DTAMaA78CRQJxAYAA3//5/hP+Ff4h/qH9Zv2F/ab97P0y/lb+gv7I/rj+Y/4X/sH9hf2M/fr9f/5A/oD9af2m/ZP9uv2Y/p3/NgCCAJoAvAA5AQICEAMdBH8ETgT/A4MDyQJKAioC0gEXAVkAof8O/8z+Y/4T/k7+fv5D/gT+Pf7d/n7/GwC0AG0BDwIwAksCZAIqAjICgQKFAlYCSwI0AokBnwDU/zj/Rf+H/1v/Nv82/07/af9Y/1j/k//0/wIAmv8S/4/+M/7s/Uz97/xR/XD9wvwr/Dr8ZvyM/Nr8RP3w/fX+k//X/3gAZQFaAhwDdgOHA8ADEwTXAx8DfQL0Ab8BvAFSAc0AtwC2ACsAef82/yn/Xf/T/+X/8/9qAMYA1wDOAOUA/QDpAMQAjACNAMkAugCOAEMAw/9v/2L/dv+O/7r/DgBEAHUAyQDtABQBZAGUAZ8BhgFHAdAAOQCp//3+Vf7A/Sn98Pzd/Fb8oPtS+2v7aftg+9r7qfyB/V/+/P5n/woA1QCGARAChQLtAi8DFwOXAjcCYAKMAn4CYQIUArgBcwEVAbwArwDdAOAAtQCqAKAAqwDnAP8A3ACyAJ0AdgAjAPT/8P/t/+z/xf+D/1r/Vf90/7X/AQA+AIEAxQDZAP0ATgF8AXgBYgEmAcoAbwD//07/nP4i/r/9Wv3r/IP8H/y3+0n7CPtL+9z7UfzF/HX9Qf7N/jL/xv90ADAB7wFfAmoCTQJKAlUCTQJaAoYCmwJdAvEBpwF1ATYBIwE3ATIBHgEWAQoB/gAXAUQBQQH/ALsAkQB9AE8AFQAGAAcA4f+l/4j/lP/A/wMASwB6AKwA4AAFASMBRQFYAXIBfAEzAb4AbgArALj/QP/v/p/+Mf7E/VH9z/xD/Ln7S/st+2j72fs6/F78Zfyf/Cf9zP1Z/sD+Ff9v/8r/AwAxAIwANwEEAqACyAKVAloCRQJOAmkCkQKsApgCSQL8AfUBGAIlAhIC6AGoAWABEQHDAJcAhQCBAKIAygCeADsAGQBKAIgAuADKALkAtgDTAPAAAgEZASYBMwEhAakA/f+Y/3P/O//p/oz+K/7Y/WH9qfwS/NL7rvuI+4H7ifux+xL8ffzT/F/9Lv7c/iv/T/+S//n/WACeAOkATwG6Ae8B7wHQAaQBjQGkAbABiAFdATkB9QC0ALkA8gAxAVMBOAH3ANYAygCzAK8A1QAFATIBQwETAdYA3AAPAU0BfwGAAWEBXgF4AY0BnAGhAZ8BkgFZAeQAcwA0ACUAKgAJAJn/C/+c/jf+zv2M/Wv9K/3M/H38Svwv/DH8V/yi/AL9Vf2D/ZX9hv1x/Yr91f0v/oP+3v5I/6D/w//D/+b/RQCuAPQAJQFNAVoBRAE5AV4BnQHNAfIBDgIHAtkBpgGdAb4B5AHxAeoB7AHnAcMBoQGbAaoBwAHIAbQBkQGGAY4BjQFzAUgBGAHVAG4A8/+g/5b/oP+P/1H/6/56/hT+vv18/Vf9Pv0b/fL8vPx9/GH8ivzv/Gn90P0f/l7+hv6S/rH+D/+Z/yEAngAQAWEBbwFEASMBNQFfAX0BigF7AT4B1wB1AEUARABMAFQAZgBuAFoANwAmAD0AbACqAOgACQH5ANIAwQDRAOsABgEnAUsBVQFMAVABagGEAYsBhgF+AWkBPAEQAQUBIwFCATIB7QCaAFUADACr/zz/4P6b/kr+2/1m/RD94/zJ/Kv8f/xL/BX85vu6+537pfvZ+yn8e/zS/Df9q/0i/pv+IP+z/z8AtQAJAUUBdwGqAd0BFQJQAo0CuAK+AqcClAKbArYCyQLOAssCwQKpAnUCKwLrAdAB1AHZAcgBowF4AU8BLAEbAR4BKgEpARgB9wDQALcAtAC5AMIAzQDRALgAcQAUAMP/i/9e/zD//f7E/oH+Pf4C/sr9if1F/RX9+/zi/L/8nvyf/Mn8B/1L/ZL95f1E/qL+9f42/2v/k/+2/9f/9P8QACsAPAA7ACcADAAFABwARwB4AKsA5AAZATcBPwFKAXQBtQHyARUCKgI5AjcCIAIJAggCGgIoAiYCHgIfAiQCGgL8Ad0B2QHpAewB0QGrAZcBggFHAecAiABEAA0Ay/9+/zf/9f6t/ln+CP7H/Z39fv1c/Tr9HP0J/QH9Av0T/T79ef2u/dH97f0G/hr+Jf4x/kn+Zf5+/pP+rP7D/tv+AP86/4j/4P87AJcA7gA7AXoBqQHSAfcBFQIrAj0CSwJTAlICRQI0AiQCKAI/AlsCbwKAApcCsAK+AsIC0ALyAhEDEQPwAsICjwJLAvABiwE5AfwAvwBrAAgAqv9b/w7/v/6A/lv+Q/4a/t39pv2L/Xr9XP1B/Tz9UP1c/Uz9MP0j/SX9If0N/f38Cv0z/WP9j/25/ev9Jf5m/rH+C/91/97/NgB1AKYAzgDyABgBRAFxAZYBqgGwAbEBtQG+Ac0B4wH5AQoCDwITAiYCQwJkAoQCpwLFAtICxAKrApgCiwJzAkICCALaAbcBiAFKAQwB4QC+AI8ASwAJANz/wP+e/2v/Nf8I/+T+uf6E/k3+I/4D/t/9sv1//Un9G/33/OL82fzY/N385Pzu/Pn8Dv0x/Wf9pv3r/TH+dv6v/tf+/f4w/23/qv/b/wUAMABWAHEAiACoANUADAFEAXYBoQHLAfwBOwKDAs8CHgNqA6EDugO8A7kDtQOnA4oDZQM8AwwDygJ9AjQC9wG3AWsBGQHTAJsAZwAtAPT/xP+d/3H/N//0/rn+jv5p/jz+Bv7R/af9e/1H/Rn9/vz4/PX88Pzy/P78Dv0c/TP9XP2S/cX98f0U/jL+Uv5v/or+pP7D/uj+C/8o/0T/Z/+T/8X/+/8xAGIAkgDEAPsAPgGLAdsBIQJbAocCogKqAqoCtALFAsgCswKUAngCWwI7AiUCHQIPAugBuQGnAasBoAF8AV4BVgFMASoBAAHRAJsAigD9ANEBIQIsAYH/eP5f/j3+Yf1q/DX8e/w6/Ef7lvrW+oz75/vv+zj8wvz9/L78ifzH/ED9mP3X/Tz+0f5b/5X/cf8Y/9L+1v4k/5L/9v9gAPkAqQEcAkUCZwKfAuMCPgOgA78DkwN4A24DHgOCAgUC5QHiAbkBcQFFAUsBPwH1ALkA5gBSAZgBugHzAT8CWwI6AioCJQIHAtIBswGYAVUB/ACMAAUAa//U/kv+vP0g/YD8Gfz6+8L7Ofur+lL68vlp+Qf50fio+P745fnJ+in77vt4/e7+v/9fAKUBHwNZBBQFkgUJBk4GWQb5BW8F2AQuBJwDnAIVATn/5v0S/fv7+vp8+vb6j/vr+zb80fze/a7+gP+hABACmAMnBd8G7weACP0IRAnsCO0HLweDBuYFAQWzA0wCCgH4/3X+Iv1c/Mv7Bvtj+j/6JvoL+jf6ufpV++H7k/xZ/RH+kf6t/n3++P11/bP89/tl+/r6Vfq6+T766voy+0D7WPy3/Yv+Wf81AHYBsAIXBOwEOQWcBQ0GUwYlBrwFIwWFBPgDzQJKAR4Aiv8K/0v+If5p/gX/bP+i/+L/NQC9ACQBrQFTAv0CuAOABBcFLQU7BX8FXgWxBPEDhgMHA0UCYgFfAHT/2v5N/oX95vyh/HX8R/xH/DX8Hvxz/BL9U/1I/Yv9//0k/sL9IP1s/O/7g/vW+u/5PPmV+VH68Pop+8j7J/00/h3/mf+YALABqAKCAyEE5QQ2BZIFxwWXBeMESgQVBEYDuAFeAHb/6/4t/q79zP2L/p7/FgB5AAMB0AFlAgQDswMlBK0EaAX/BfcFlgVwBRkFYwSIA5sC5wEvAaEAvP/i/k3+u/0T/YL8bvwr/Af8Cvwk/B78Ifyg/AX9Yf27/ff9Ev4C/gP+oP0H/WL8jvvL+lL6BvoW+dj4oPmb+vn6nfuD/d3+FwBpAdEC8wP/BFkGDwdnB84HRQhsCBMIRQdKBoYFbQR9AooARP94/kb9ffyF/MP8Kf2k/ZD+SP8HAPwA1QGnAngDQwQQBQEGtQbZBtgG6QZ1BlkFQgQyA+UBxgDk//X+8v1d/eb8FvyM+zn7x/px+mH6V/ot+oL6Hvt7+9n7iPwj/W39lf11/QT9ffww/Ff7Ufq3+SD6w/oG+1j78/th/XP+Y/8iAGsB3gLVA/IE0gWQBg4H2gdrCBsImQdSBwwH+gVFBLQCZwFPAP/++P2F/Yz9xP34/Wv+3f5k/w4A+ADJAWQCNANRBCIFVwVtBaoFsgVKBbUECgRIA10ChgGVAJv/r/7b/Tf9dvzc+037y/pk+ij6CvoL+kn6ovoN+377+ftU/M78Tv14/VD9IP3+/Kr8OfyT+zv7iPsF/Cj8Xfx4/aP+eP9UAG4BcgI0AycEHQW4BeYFHgaJBuwGrQYMBq0FggXVBIUDTwKDAZ4Ahv/g/sf+JP9x/9D/SgDCADwBqAEdAngCygJOA/QDOgQlBAYELQQYBIQDxAIvApgBswDK//f+LP5V/cL8Qfy7+1P7E/sI+wX75Pqo+qb68/oX+yL7c/v5+2j8wPwK/SH9FP0X/Q39ofz9+177Sftw+3/7g/sg/Hj9lv54/3QA3QEfAxwEFgXJBUIGkQYDB1MHLQfcBqUGsAY3BggF0AMAA0IC1QCC/7r+cP5Z/mv+z/4u/+n/ugB8AfYBTALXAmgD6APXA6QDswPnA9kDWwMZA7sCJgJLAVoAbv9b/nH9hfzI+y77vfpo+iP6Hvok+jn6Y/qv+gT7Lftj+7P7LPy6/D39ov3b/Rr+Kv68/ff8HPyD+yT76/oH+1v7bvyo/QP/VwCoAfoC3gPrBGgFqQXABRgGYwZZBogGygYoBz0HDQeABsIF5QStAygC0QDE/x3/w/7K/un+If+n/ykAjQCUALoA3AARAQwB2gDUAPEAMwE2AXABrQGsAW4BJQHLAO3/+f5L/qL9+fxr/BT80/uX+3r7IfvO+q/6l/pj+j76avqw+hv7n/sS/HT8+fx1/Xf9Q/0n/d78h/xs/L/8/vx1/aL+3P/7AOkBKAM3BO0EawWDBY8FowXLBbsFrwXXBfQFCgYkBgcGjgX5BGkEcQNCAjsBbwDd/4T/jP+t/wUAkQDvACkBTQFvAVUBDQHVAH8ALAARACQAKAAlAFUAegBdAAcAqv8Z/23+wP0U/W/82PuC+z77HfsH+//6E/s6+177WvuA+9T7PPyZ/AX9av2//Rj+R/4t/vT90f2K/Uz9Nf1O/VX9r/1r/if/2/+iAJoBYAIRA5QD/QNnBOIETAWOBfoFZQazBtkG0QaMBgIGgwXTBNIDygL6AWgBsgAiAMf/tv/K//T/GwAhAEMAXQBgADAAAADS/73/wv+3/5X/k/+5/7//mv9z/0P/3f59/hH+hf3b/F/8Cfyq+237U/tp+5b76/sr/Hb83/xN/Z/95P1L/of+sf7G/sr+nv5l/k7+Pf4c/hH+M/55/sj+Gv+X/xwAvgBKAdUBSAKtAgUDRAOKA8QDGwR1BNUEFAU2BV0FbgVBBdgEZQQNBJgDBgN1AhMC7QHBAZMBZwFYAUQBDgHCAF8A7P98/xb/uv5Z/hr+Cf4G/hH+Ff4x/kr+Vv5E/hr+CP72/df9j/1i/Vf9QP0Y/e/82vzO/NT86vz5/A39Sf2P/eD9Jf5l/qD+1/7//vr+9f78/gH/Ev9A/4D/wf8SAIIA2wAnAV4BiwGlAbIBugGjAa0B3QEpAmECmgIAA1wDoQO9A8sDwQOoA4cDTgMDA7cCkAJ7AnQCVQI4Ai0CHALeAWYB/ACVACUAm/8z/+3+tf6L/nH+ff58/or+jf6K/mX+Mf4I/tT9j/1B/SL9G/0X/QP9/vwR/Sv9Mf0v/UT9c/2l/c/9DP5k/r7+Bf9E/4H/t//V/+T/4v/z/wwAJABAAGoAswD2ADsBdgGhAboBvgHIAbYBowGmAccBAAItAnECvQIKA0UDUANOAzQDEAPBAlYC9gGWAU4BAQHUALUAoACbAIAAXQAfAOj/qv9m/yz/Av/l/s7+vv61/rP+uf7G/s7+2P7R/r7+kP5V/hf+0v2e/Wz9WP1I/Uv9VP1j/Xv9jv2x/c39+f0p/mD+oP7e/ib/af+t/+//LgBqAJkAyADuAAsBJQE4AUsBWgFqAXUBewGFAYcBiAGFAYoBmwGyAdAB+gEzAnECrQLdAgkDIwMqAxMD4wKbAkIC5QGDASUBzQB/AEEABQDO/5b/W/8d/9/+of5b/h/+6v3I/bP9qf26/c79+f0h/k/+bf6A/pP+lv6U/oT+ef5x/nH+dP53/oT+kf6o/rf+w/7K/tj+6/76/gn/Gv89/2f/m//N/wkATgCVANcACgE2AU8BZAFmAVwBRQEwAR0BCwEAAfQA/gALASkBRgFsAZkBxwH2ARoCOAJLAlcCUwI9AhoC8gHEAY8BUgEZAeIArQB7AEIAEwDg/6//dv83///+vf6C/kT+EP7r/cz9wf27/cb93v30/Qz+G/4w/jz+Qv5F/kb+VP5h/nj+lf7A/vj+L/9m/5r/zP/3/xUAKgA2AD8ASABSAGUAegCWALkA3gAHASUBNwE+ATsBKgEHAd0AswCUAHwAbABoAHYAlwDBAOkAEQE7AWUBhQGWAZkBlQGMAXgBWQE0AREB7wDNAKwAigBsAE8ANAAaAP3/4P/C/6H/hP9m/0P/IP/+/tz+vP6c/n7+Z/5X/kn+Pf41/i7+KP4j/h/+HP4e/iT+M/5J/mb+jP67/vH+KP9h/5z/1P8EAC0AUgB0AJIArgDNAPIAFwE9AWMBiAGlAbQBtQGrAZIBaQE4AQYB2ACpAIUAbgBgAF4AZwB5AI0AogC3AM4A3ADfANsA3ADcAM0AvQCxAKoAmgCFAG8AWQBAACMABgDl/8H/nf9//2T/Pv8W//z+6P7S/rv+rv6s/q3+rP6t/rT+tf6x/qr+oP6T/oj+gf6C/oj+kv6t/tX+Av8t/13/j/+6/97//P8WACoAQwBhAIQAqQDVAAoBPwFsAZEBrgHAAcIBtAGcAX0BWgE3ARQB8wDXAMUAuACwAKYAnACUAI4AhAB1AGEATwBBADEAHAAFAPP/6P/e/9X/zP/G/8f/y//L/8r/yP/H/8T/u/+o/4//dP9Y/zb/D//s/tP+wv60/qb+oP6h/qT+o/6f/pz+mv6e/qn+tv7L/uz+G/9R/4j/wf/6/y4AWQB3AIoAlQCfAKYAqwC0AMUA3wD6ABgBNAFNAWEBawFtAWcBXAFMAToBKQEbAQ8BBAH8APcA7gDhANEAvACkAIkAbQBPADMAGQABAPD/4P/S/8f/vP+x/6T/lf+E/3T/Zv9X/0z/Rf9B/zz/Ov84/zX/Mv8s/yb/H/8Y/xT/EP8Q/xH/FP8X/xn/G/8a/xb/Ev8P/xD/GP8o/0D/YP+I/7b/4v8PADUAVgBvAIYAmQCqAMEA2ADyAA0BKAFAAVYBZAFrAW0BZwFdAU8BPgErARYBAgHvANwAygC5AKcAlACBAGwAVgBAACkAEwD8/+b/1f/H/7r/tP+t/6v/p/+f/5b/h/91/1//Sv81/yH/D/8F//7+/P7//gX/D/8a/yT/L/86/0T/TP9U/1z/Yv9l/2n/bv9v/3P/d/9+/4j/lP+n/7v/0//t/woAJQBBAFsAdACLAJ8AsgDGANgA6AD3AAYBEwEeASkBMgE4AT0BPQE6ATIBJgEVAQAB6QDRALkAoQCKAHMAXABDACwAEgD2/93/w/+s/5f/hf92/27/af9n/2j/af9o/2n/Z/9j/17/Wf9V/07/Sv9I/0j/Sv9M/1P/W/9i/2n/cP91/3j/ev96/3r/e/97/37/hP+P/57/sf/E/9v/8f8FABkAKgA4AEUATwBYAGIAagBzAHwAhwCSAJwApgCwALsAwwDMANYA3gDnAPAA9gD8AP0A/QD3AO8A4gDUAMEArACUAHgAWwA8ABwA+v/c/73/of+K/3f/Z/9b/1L/Tf9I/0P/P/88/zn/OP82/zn/PP9D/0r/U/9e/2n/c/99/4b/jf+U/5z/ov+m/6z/sf+3/7z/wv/G/8r/zv/R/9f/3P/i/+j/7//2/wEADAAWACMAMABAAE8AXgBwAIEAkwCkALYAxgDWAOIA7AD0APcA9gDuAOMA1ADDALEAnwCPAIIAdgBuAGgAYABXAEoAPQAtABkABADv/9r/yP+5/6v/oP+W/4z/f/9y/2P/UP88/yv/HP8S/w3/Dv8U/yD/L/9C/1X/aP96/4v/mv+n/7b/w//Q/97/6//4/wMADwAVABkAGwAYABUADwAKAAUABAAFAAoAEgAcACYAMQA8AEQASgBQAFYAXABjAG0AeQCHAJcApgCzALkAvQC7ALIApQCXAIoAfgB2AG4AawBoAGIAWQBLADkAIQAHAOv/z/+0/53/jf+B/3v/ef94/3r/e/99/3z/ef90/3H/bf9r/2r/bv9y/3j/f/+H/4z/kv+W/5r/nv+i/6f/rv+2/8b/1f/m//b/BQAVACIALAA0ADwAQABGAEoATwBVAFoAYABkAGgAagBsAGsAaABoAGcAZQBmAGcAaQBrAG4AbgBsAGgAYgBcAFMATABFAD8AOgA3ADQALwAtACcAIQAYAA4AAQDz/+T/1f/G/7j/rf+j/5v/lP+S/43/if+H/4b/g/+C/4H/g/+E/4b/iv+R/5f/nv+m/7H/uf/D/8r/0v/a/9//5//u//X//f8FAAoADwAUABUAGQAaABsAHQAcAB8AIAAjACYAKgAyADkAPwBIAFEAWQBeAGIAZgBnAGQAYABdAFYATgBGAD0ANQAuACgAJAAgAB0AGwAZABYAEgAPAAwABwADAPz/9//w/+r/4v/d/9j/0v/O/8n/xP/C/7//vP+5/7n/uP+1/7X/tv+5/7r/vf/D/8f/zf/V/9v/4v/p//D/9//9/wIACAAMABAAFAAWABcAGAAYABgAFwAWABQAEgAQAA4ADgANAA0ADgAQABEAFAAXABsAHQAeACAAIgAiACIAIgAhACAAIAAeAB0AGgAaABgAFwAWABMAEQAQAA4ACwAHAAQA///6//X/8v/s/+j/5v/i/+D/3//e/93/3v/e/93/3f/c/9z/2//b/9n/2f/a/9v/3f/g/+D/4v/l/+n/7P/w//T/+f/9/wIACAANABIAFQAYABoAHQAdAB0AGwAbABkAGAAWABUAFgAUABMAEwATABIAEQARAA8ADgANAAsACgAIAAcABwAGAAYABAAHAAYABQAGAAYABwAHAAgABwAIAAcABgAFAAUAAwACAAEA///+//v/+//5//b/9f/z//H/7//v/+3/7P/r/+v/6//p/+r/6//s/+3/7v/w//H/8//0//b/+f/7//z///8AAAMABAAGAAcABwAIAAgACgAKAAkACQAJAAkACgAJAAoADQANAA4ADwAQABAAEAAQABAADgAOAAwACgAJAAcABwAEAAIAAQD///7//f/8//v/+//6//v/+//8//3//f/8//3//v/9//7//f/+//z//P/9//3//P/9//3//P/7//r/+f/5//n/+P/4//j/+P/4//n/+v/8//z//v///wEAAQAEAAQABgAGAAgACgAKAAsADAAMAAwADQAMAAsACgAKAAoACQAHAAgACAAIAAcABwAHAAcABQAGAAUABAAEAAMAAwACAAIAAgAAAP/////+//z//f/8//z/+v/5//j/+f/4//f/9v/4//n/+P/7//r/+//8//v//P/8//z//P/9//3//f/9//3//f/9//7//f/9//3//f/9//3//f/9////AAABAAIAAwADAAQABgAFAAcACAAIAAgACQAJAAkACAAIAAgABgAGAAYABgAGAAQABAAEAAQABAAEAAQAAwACAAEAAAD///7//f/8//3//P/7//v//P/8//v//P/8//z//f/9//3//f/+//7//v/+/wAA//////7////+//7//v/+//3//v/9//7//v/+//7//v/9//3//f/9/////////wEAAQABAAIAAgADAAMAAwAEAAQABAADAAMAAwACAAIAAwACAAEAAgACAAMAAwAEAAMAAwAEAAUABQAGAAUABQAFAAMABAACAAAAAAD9//3//P/7//n/+f/5//n/+v/5//n/+v/7//v/+//7//v/+//8//3//v/+////////////AAAAAP///v8AAAAA///+//7///////7//v/9//3//f/+/////v////////8AAAEAAQABAAIAAwADAAIAAgADAAIAAwADAAMAAgADAAIAAgADAAIAAwACAAEAAgACAAIAAQABAAAAAQABAAAAAAD/////AAD///7//v8AAP7//v/+//7//f/9//7//f/8//v/+//8//v/+//7//v/+v/7//z/+//8//3//f///wAAAAAAAAAAAAABAAAAAQABAAEAAAAAAAEAAgABAAEAAQABAAIAAgADAAMAAwADAAMABAAEAAQABQAFAAUABQAFAAUABQAFAAQABAAFAAUABQAEAAQABQAEAAQABAADAAQAAwADAAAA/////////f/8//v/+//7//n/+f/5//n/+f/4//f/+P/4//f/9v/3//f/9//4//n/+P/5//n/+v/5//r/+v/6//n/+f/6//r/+v/6//z/AAACAAIABgAGAAgACwAKAAsADAASABYAFwAbAB4AHAAgACMAIAAgACMAJQAgAB8AHwAfABwAJQAhABsAHgAbABsAHQAcAB0AFAAaAAkADQADABMAAgHoAAcA///4/+H/6//z/xAAZwDQAHcApv9V/2D/ZP95/3v/nv81ACYAh/8A/6z+zP4N/1j/pf/d/yMAKACt/1X/X/+E/8n/HABZALsADgEDAbYATQAVABgAIQAaADEAggDAAKMAbAD//2T/Rv95/5v/wv/5/0UAXwDaAC0BrgBaACUA3f+w/87/TADCAPcA8QCTAP7/QP+Y/oT+Av+x/0gAkwBnANj/Jf+3/qv+KP8IAOoAlgG6ATsBYABv/9z+9/6k/3cABgE3AcsAHwBh/7X+j/7R/ln/1f8HAPX/sf9+/2D/P/9L/4v/vf/E/7z/0v/l/+P/IgBaAF8AWgBAACEAGABKAIIAtADvAAAB9gDGAIIAWwBgAH4AjACfAJUAYgAvAP//2v/a//z/CgAQAP3/6P/z/93/v//E/xQAaABOAB4A/P/i/+n/GABOAFwARADx/4T/fP+3//P/LwAGAGn/2f64/gL/sf9nAIkAHACL/wb/3/5h/0AA8AAgAZgAu/9k/5j/CACtAA0BswD0/1P/Dv8t/7T/XwCaACsAPP9w/mb+H/8lAPIAEwF9AI7/3f7g/pb/ggAhAToB0gAxAM3/3v8nAG4AhABMADgAZwCTALQApwBwACoAAADk/8r/6v8dADgAPgAWAN//6v8YAPz/1f+u/2v/iP/Z/xQAPAAkALr/T/8c/yj/o/9WAKIAewARAIT/PP9d/6//LAClAKcAZAAhAOH/z/8FAEwAbwBHANj/jv+//zIAgACQAGoADwC1/5P/xv9GAKAAoQBeABsAEwAJAOP/yf/I/9T/4//l/83/uv+n/3v/Y/+O/8L/6v/7//b/DgAxACsABwD8/xUALABTAGgAVwBaAE4AOgA2ABEA7////zQAWABOACMA2f+n/6T/sv/u/xsA///C/3n/WP+d//P//P/C/2H/Jf9x/wUAXQBHAM//bf+L/+f/HQAyAEMARgBQAGIAawBwAGoAYABuAHoAVQAoADAAWwBqAEwAFwDH/3v/Z/+k//r/DgDK/3X/Qv8o/z7/jP/i//3/4v/W/+D/0P+5/83/DwBLAE8AMAAeABAACgAWABUA8//b/9f/yf+6/9H/AgAeAAUA2P/a/xgAYgB8AEsA+P/s/1oA1gDoAKgAiACuANUAwQCTAI0AvQDuAOMApABiADgANQBUAGkAWAAqAPH/vP+X/5P/s//G/5j/S/81/17/eP9S/yP/KP9N/1n/O//7/q7+mv7y/lz/M/91/ur9Jf67/v7+sv4r/uL9Sf42/9L/kv8B/yv/MwAlAVUBFAEdAbwBswKBA5sD5gIZAikC9wKEAyQDZAL4AeIBxQFmAbUA7P+i/9b/BwC0/wD/o/4B/3L/aP8e/+j+AP9p/+7/SAB7AIIAYQBCABkA8f/6/yIAFACx/wb/SP7S/bT9k/3+/CH8VftY+zv8s/z2+x/7i/vP/MX9BP5a/jT/WQB4AWICrAK8Am0DkwR4BXwF7ASWBJAEKgSkA3EDFgMbArQApv9p/2L/rP60/Sj9Iv14/cX9vf2T/cH9k/7T/6UA4ABGARwC9AKvAyMEIAQPBBUEFwQ6BDcEdgNAAhkBQwDq/1n/+P21/Bb8Svsz+ln57/j9+CD5/PgS+Z/5OPoD+/f7vPy3/TX/pwCmAUsC7gIPBDAFmAVrBUcFTwVGBfcEbgS3A5gChQHHACwAfv+7/tv9Q/05/Tj9G/32/A39xP3g/nX/pv9WAFMBNAL8ArQDMASRBNME7AT4BOEEzASfBOcDowLXAaoBOwH//4r+m/0J/Uv8Ovtv+iH6xvnr+GL44/h3+Vb5OvkC+kX7YvwI/Xr9W/7Y/5YBBgOrA4ADzAMCBRYGMAaxBRIFrQSSBA4E9wK/Ac4AKACy/+j+s/3b/Kf8zPzs/On81fz6/G/9J/49/1kA9gBGARICCAOZA+oDRAS8BEgFkAUyBYQE6AOcA24D2QLGAdkAQwCi/9r+vP2W/Nf7ZPv/+qb64vnY+Ov43fks+s75RPqS+6X8HP2g/Qv/uwD4AbICYAPXA38EkgU3BtwFFgW2BI0EEgThAsQBDgGGAI//SP5v/eH8ffwN/NX7GvxW/GD8mPw4/UT+O//q/8oAoAFnAhUDngN7BEMFfwVMBSwFIQXRBGEE+ANSA1QCZgHGACIA9v7k/Sz9R/xP+6/6SPoE+lf5jPgN+QX68/nD+a/6HfyK/bX+Zv9RAK4BpQK+Ay4FzAXLBeQFwQWnBcAFcQWVBEgD2gELAaUAh//b/bH8dPxD/L/7LPsB+3z7BPyT/DT9yv11/mb/lwDnAbACGgOUAwwErgQJBQEFygTFBLUEKQRuA+ACagLaAQgBBABc/6n+4v01/YX8vvsO+3P6Lfon+uL58fl/+sH69fr6+xn9of1Q/p3/GQFfAucCTgNQBD8FjwWyBaYFcAU7BeEELgQHA/4BRgFQADj/fv7k/VD9qPz++5H7GvzQ/Kj8bPzv/CD+Yv/d/8v/hwCOAVwC5AJDA4UDpgO2A5wDpgOTA0cD5AJ+AvcBhgFDAZEAi/+X/nn+jf6y/TP8J/sP+0P7Cvva+Vf5RPoX+9j60/oL/H79m/5v/zsAUAGjAqEDPAS0BCwFsgXPBXwFEgXXBLoEBQS/Ap4BzQAeAGv/YP6m/XX9Kf2E/Dj8pPwy/X39gv3S/ZL+g/8rALcAIgGYAVgCEgNlA2EDWwMyA0cDYwNKAwIDnQIrAqgBWgH1ADsAMv+A/h7+gv2O/Kn73/oj+tb5Wfkq+bT57/mf+Ur6yvvz/Ab+OP9OAEsBkAKJA1UE0gQ+BdkFKwb4BYEFMwXXBEAEIAMfAikBPABl/4P+0v1w/RH9ZPxx/Oj8G/1L/br9Hv7I/rH/HwCyAI8BWAKvAtEC/QKAA/0DpwMSA94C9wIGA8QC/wEqAcoAuwBgAGv/X/7O/Wr90vwJ/Dz7gPob+rz5UPmb+Tn6W/p3+ob76/zs/fv+KwD2AMYB7gLZAzEEVwTgBF0FVgUMBbAENAS2A/wC4gHtAFcA0/8U/0r+7f3I/WX9K/2G/er9Kf6M/tz+O/8CAMEAHQFcAcQBQwLCAvsC9QLtAvYC+ALDAmICCQIVAg8ClAHKAFUANwDH/x3/m/4I/jj9vvxD/Fj7oPqS+lj68Pkd+ob60fpz+4L8cf02/kD/UgAQAaoBXQL/AoUD1QP3AxAEJgRCBAQEQwNeAgACswEEAVQA0P8o/7H+pv50/jX+Pf53/p/+8/5o/7P//v9NAJ8A+ABcAdUBPgJWAksCfgKqAqwCkQJqAjYCOgJsAkkC0QFYAf0AjgBBAAQAbv+E/v/9v/0f/Uf8o/tP+xz7+voO+0L7V/uZ+1f8L/3D/VX+5f5n/wkAvgBBAWIBggEEAocCmwKnAp0CUAIoAh8CwAEtAd8AiAAZAKv/WP8w/yv/Df/o/hX/VP+L/6f/vv/r/10AwAAEAW0B2AEiAksCcwKcAuACDQMXAwsD/wIAAwMDywJPAs0BfgFSAdsADQBg/9P+Df5t/Qr9i/z0+4z7TvtL+3H7c/t7+937l/xH/bH97v1f/g3/tP8cAD0ATACPABcBmgHGAbUBtwHLAbQBgAFeARgBrQBXACUA9P/g/8D/gf9h/4P/rP/J/+j/+/8oAGQAtwAIAWMBtAERAmAChgK8AhYDTgMWA/ECEgNMA0YD/gJ1Ag4C/gHLAR4BYADw/2z/wv4g/q79U/0b/d78kfxY/En8Xfxj/Hv80fxA/Vr9Sf18/df9JP5N/nr+s/77/lX/sf/2/zkArwD4APYA9QAYAQ8B4gDAALMAqgBxAC4A+P/u//j/CwAFAPv/CAA5AIkAwwDtADwBvAEYAloCmgLnAjMDbAODA30DdwN6A30DWgMRA8cCfAIPArABYAHMAP7/Zv8U/67+Of7B/Wb9P/1B/Sj90Px//GL8hPys/Lj8p/yo/NT8BP0u/Vf9of0T/oz+5/5W/97/MACDAAoBgQGEAU4BGgEEAQEBzgBZANT/pv+d/4D/NP8a/zH/W/+I/7j/5/84ANAATQGtAQQChALkAh4DOQN2A8MDyQOuA5ADkQOGA2sD9wJ8AjsC6gFDAY4AIwC4/1b/+f6o/kr+Gf71/bD9cf1H/TT98/yn/HD8gvy0/K/8d/xa/Lj8Nf19/Y797P2h/j//qv8SAKAACgFcAX4BZQEuASQBGQG9AFgAIwACAK7/V/8r/zj/T/9i/4n/0P83AJIA3QAdAY8BGAJ/Ap0CuQL0Ai4DSwNCAzMDPQNNAxcDtwKAAlwCAAKJASMByQBzADcA7v+Y/1z/T/9G/xn/0v6F/kP+6/19/Q79wPyF/Ff8HPzr++L7A/w2/Ij8AP12/eb9X/4L/67/OwCjAAABRwFrAWcBOQEEAb4AhABGABcA1f+M/1D/RP9f/3P/kv+y/+j/FABiALMA9QAdAVQBwAEoAmsCeAKbAtQCDwMTAxADKgNEAzED/wLYAoQCFgKuAX0BVgEfAdUAegAlAOL/vf+A/w//dv74/aH9OP2Y/PT7tfu/+8b7nPuE+6/7J/yt/CP9pf0+/uP+Zv/j/1YA2AAqAUkBMAEyAT0BCwGvAFEAJQD4/9T/gf9M/0b/ef+N/2v/Uv9w/8z/CAA+AHoA8wBbAZ8B0gEgAo4C6AILA/cCDAMqAyIDAAP2AuECsgJxAgsCnQFNASEB/QDTAIQALQDp/47/Gf+9/nv+Ev6L/QH9mfxX/B389vv8+y78Tvxo/Lr8Nv2l/RH+kf4K/4D/9P9GAH0AsADZAOUA5wDEAIgAXAA6AAcAz/+v/4H/Z/9T/0b/QP9U/1r/V/+L/9//MwB5ANkANQGiAQACSwKJAuACHQMkAyYDLANAA0cDRgMWA+ICpQJiAhQCxQGFAVEBGwGrADEAyv93/xP/qv4v/qr9Q/3j/H38HPwB/A78MfxL/FL8W/yQ/Pv8cP3n/Vz+1/4+/6H/BQByAMYAAAEhASwBKgEgAQEBuQBtAEMAIgDT/2r/Gv/1/vD+8P7w/gD/L/97/9L/PACnABIBgAECAmsCswLtAiYDUANkA4ADkgOKA1sDMAMKA90CkAI5AuMBhAEYAa8AVADs/3j//v6k/k/+8v2C/SD90vyR/Fj8Nfw3/Ez8bfyM/L78Cf1p/cT9G/6Q/h7/of8AAFIApwD5ADsBWgFYAUABHwHnAIsAMADq/7P/Xf/8/rn+rv63/rT+uP7k/j7/mf/x/1IA2ABWAbsBCwJmAr0C+wIgAy8DRANQA1kDPwMdA+YCvwKHAjgCxAFLAfYAqQBTANj/dv8n//L+ov5T/gf+zP2Q/Uz9Ff3q/NX8zfza/OL87fwE/Tz9hv3e/Uf+yf5C/6L/7P9PAL8AKgFfAWgBYAFKAScB3gCTAEcAEADD/3X/J//4/ub+8P4I/yX/Vf+U/+//SwCoAP8AXAG6AQwCRwJvAqACzALcAtQCxgK7AqUCjQJtAkYCEALJAWQB9wCdAFYADACz/2b/Kf/5/rv+av4L/sH9jP1S/QT9xvys/Kf8pPyi/LP87PxL/bX9Gv6G/hH/oP8OAGcAyAA0AZABuwGyAYoBaQE5AekAgQApAOX/n/9X/xH/7/7l/vL+Av8l/1f/mP/j/0MAvAAsAYcBxQECAj0ChwLEAuwC8wL3AgADBwP/AuMCtgJ8AjsC5gGEARYBuABZAAMAqf9V/wT/tf5p/iL+4/2f/VL9+Pyt/Hz8cfxx/HT8gPyo/Pb8YP3T/Tb+o/4l/7j/QQCwAAcBVAGdAcUBvQGJAU4BEQHWAIUAMgDh/5X/Vf8h//7+7P4A/xj/Nf9o/7v/DwBZAK0ABQFhAbAB7gEpAm0CpQK9AswC0wLrAgED8QK/ApwCcAIWArYBSgHdAH4AQQDa/2j/Cv/B/nT+K/7b/Wz9EP29/HL8Qvw6/DP8YvyT/LL8+vx//en9Uf75/p3/GACSAM4AjAAIAdkB6gHrAQcC1gGAAUIBCQG/AK8AowAcALP/hP8Y/7v+2P70/v3+Qv+T/5r/pP8JADsAnwBvAa0B7gGNArcCtgLwAiEDLgNfA1gDBQO9Aj4CgQHwAKwAYQDo/4P/Jv+I/tz9Tf3c/G/8HPzN+4b7PvtI+6j77PtZ/DD9L/7V/m//LwDUAIwBYQLwAjIDhAPSA7YDVgMlA/oCkALpATUBbACn//7+VP65/Xr9ov2h/XP9df2g/cb9Af56/v7+gv8ZAJIA4QBCAdEBdAL/Ai8DFAMfAyoD4QKhArYCwQJ7AiIClgHTAE4AGgDQ/2j/Nf/r/k3+q/0j/Zz8I/y9+1D71Pp1+oX6GPus+z78Tv1o/hX/6v8HAdsBpwKeAyYEOQRzBLYEngRuBEgE6wNDA3ACdQFcAGr/uv4Y/pn9av1A/QP96fz0/B/9e/3s/Xf+Df+H/wcAlwBAAQ4ClALKAjADcQM1Ax4DOAMMA9ICsAIwAoQBIAG9ADgA1f+K/xD/g/4S/q39Of3r/L78SfyL+/b6vvqj+sL6bvtE/MP8f/23/pn/UQCcAc4CcAMPBIoEfgR3BMgE3AS3BKwEUQR2A4YCiwFfAHf/3f4W/nv9Rf3x/Jj8ufz9/Bv9ev34/UD+pv5P/+L/dwBFAfwBaAK6AukC4gLKAscCtAJ7AkICCAKjAS0B6gCrAFwAJQDw/2f/q/4T/qb9Mv2x/Db8mPvR+k36Evqj+cL5DvtC/Kj8if39/ur/zQAiAjcDHwQbBVcF4gTIBAsFDAXaBLoEbwS5A78ClwFYAGX/xP4K/lL9CP3o/LH8x/we/Wz94f18/uf+S//r/5wAPwHFATYC1QJeA0ID3ALiAgED1QKVAksC1QFvAQgBYgD1/+7/4f+j/zz/hf7V/Xf9Ff1n/M/7Yvus+tr5UfkW+VX5dvrO+5D8Xv3k/jsA+AASAm8DUwQCBV0F4QR0BK8EwQRtBEUEIASPA6gCmgFcADD/iP4P/ln95vz7/Pf83fwq/aP9Cf6y/mv/1v9AANsAegEcAq0CFQNrA6gDlQNeAzcDAAOmAjYCuQFNAREBzgCOAGUAKAC5/0L/tP4i/q/9Rv29/AH8LPs8+oD5RPk0+Rj50flW+0782/w8/v//HgEgAjoD9gOXBCwF8ARwBIwEsQRGBNEDewP9AmUCjwFcADD/c/7H/QH9dfx7/NT8M/2K/eL9U/7y/p3/KgDDAIMBRALSAiQDQAN7A7wDtANvAzUDAgOxAjICpwFVASwBCwHkALgAegA4AM7/HP9o/tn9QP2T/Oj7NPt3+rz5PPkp+Sj5ZPmH+vH7zPz8/c7/AwHNAeICtwMyBL0EuQRLBEEEKQSqA04DGQPKAnECyAG4AJ7/kf6M/bL8PfxA/Jb8+vxm/dj9R/7W/n3/LQDeAKQBXgLvAigDewMNBDkE7APEA44DKgP+ApwC7AGcAYgB+QByAFkAUwA9ABwAl/+6/vD9Q/2P/Nj7Ufvl+mH6svk3+ST5Vvke+qn7J/0X/l//4gC/AVcCSwMTBIME1wSBBNYDnQOBA/ICwwLiAokC7wEwAef/hv61/fX8SPwY/E78m/wK/W39xv1g/jD/CADCAHoBJQK3AgEDVgPQAx0EKwQVBK8DJgMAA68CAQK2Ab0BVgHlALcAdABNAGUAJQBd/6b+Cf4w/UD8k/sT+5/6Evp7+VD5efm/+er6uPzh/eb+ogDKARsCwgJ9A+ADVgR5BOkDdAM3A8ACUQI6AjUCFAKYAakAd/9H/jf9a/wT/B38bPzl/GT9t/0n/uH+n/9LABcBzAFKArYCHQNyA7kD+QMOBM4DWQMCA64CTAINAgYC2gFuARwB7gCWAD0AJADm/z7/eP6m/af8vvsd+6D6SPob+uv57vkX+oX6xPtw/ZH+vv9kAVMCjgIUA4sDkQPnAzQEzgNRAykDtQI5AjACDQKSAQ4BMwDI/oH9lPzN+2f7kvsA/I/8K/22/Ub+CP/p/8kAmgFWAvoCfgO8A84D0QPVA/0D/gNrA8sCiwIjApkBeQF7AUsBWwF0AQABWwAEAIj/tf7y/VX9rfwW/HT7wPpN+hP6/vlI+qj6EPtM/Mn9m/6R/xcB9AFVAg0DewNqA6cD4gORA1ADOwP1ArUClAIZAn0B6QANAMf+hv1i/Ib7Ofss+1P7Ffwr/ej9r/6Y/0IA7QDeAYQC3wJoA+cDAwTdA9ID2gOaAxMDsgJaAtkBjQGDAUEBCQElAR8BrgAnALX/Kf9o/rP9Gv16/PX7mfs2+9v6uvrA+v76SfvQ+/n8Mf7m/vr/UAHLAQgCpgLaAtsCUgNuAzEDWgNwAwoD1QLGAm4C6gE8ASYAyv54/T38Uvvj+vz6mPui/Jj9Pf7l/rD/RQDBAJMBdwIhA6kD5AOrA3QDcgNuA0ED8QKwAnMC7gFEAQwBHQEEAfIA5ABxAM//U/+5/hH+nP05/dT8hfwV/J77d/tr+1v7h/u3++H7pvyx/Uv+E/9YACwBegHtAS4CTQLmAn4DfwOYA+cDyQNeAwIDhwL/AYIBmgAh/5/9bfyC++76xfpV+5j8uf1S/v3+zv9pAAoB0gGBAiwD3wMmBOwDjANdA1UDFwO/AqsCkgINAoEBJAHDAIMAiwBvAA8AuP9Q/4v+s/0q/dT8kvxN/Bv8//vi+6L7lvvM+9T76/vC/ND9YP4e/zIA0QAhAcMBKAJcAh0DwwOhA6AD7gOyA1QDNAPHAiMCkgF3ANH+b/1u/KT7NftF+8v7yPzT/XX+8/7L/54ALgHXAZgCMAOwA9oDlANXA08DUgNGAxUDzwKnAi8CbgERARcBAQHmAM8AXQCP/7f+7/0q/cL8yvzM/J78hvxN/N37n/uj+5/7t/sM/H78//x//fj9rv6e/2AA+ACnAVAC0wJWA7wD8wMqBGAEVAQBBHoDyALxAegAkf8p/hH9PvyN+zr7i/tV/Fj9Qv4L/9X/oABBAcoBYgL7AnkDyQPgA8ADlAOSA6sDfwM1A+kCagLQAX0BPQHqANYAwQBEAH7/uP4D/n/9MP32/ND8xvzA/JT8Pvz9++z75PvY+8j71PtU/Az9bv3m/ej+uP8qAOMAqAEgAtICowPmA/0DUgRdBO4DaQO+AugBHgEmAM/+l/3M/Bn8jfuY+x/87/wE/tv+XP8cAOQAOQG/AZsCIQOJAwkEGATdA+AD6wPZA8oDowNRA9gCTALAASwBvgCNADgAof/z/i/+gv0J/ar8hvyr/M782fyz/FL8DfwF/AX8Gvw4/GL85vyA/cL9J/7y/rX/WwAFAZIBHALfAn4DoQO9AxMEFgSfAxEDZQKOAbkAn/9N/mb90/w6/Pf7Nvyx/ID9a/77/nX/LgDXAF4BBAKXAhYDuwMZBPkD/gM2BC8EJgQeBLEDIwO/AjACbwH0ALsAVADM/03/nf7k/Yf9PP3f/N/8Cf32/K/8V/wT/PP7+vst/Eb8Jvxp/PL8Iv1V/Rn+Cv/J/2oA6QBpARECuQIpA34D5AMWBNwDUwOCApwBCAFZADz/Xf7W/SL9kvx7/JP8Av3P/YD+Df+l/zkAzwBrAeIBhQJaA+8DOARoBGgEagSKBHEEKQTgA3YD4wIxAn0B/QChAEEAxv8y/4D+1/1T/fD8pfyM/KT8lvxN/Av85/vX+wz8aPxx/I38MP1//Xv99/2N/vP+zP+UAM4AYwFQAqQCyQJNA4wDgwOGAwMDBQJfAdQA3//f/iz+l/0l/fb86fwH/YL9I/6e/hD/mv87AOcAdwEGAq8CSwO8AwoEKwRaBKAEmARIBAAEoAMXA50CDQJsAQUBswAIAGb//f51/uv9kf0o/b78hPxY/Cb8//sG/Bz8QPx1/Hv8hPwM/WH9Zv0b/tb+4P5R/ygAZADCALABRwKXAk8DrgNgAz8DCANDAqUBOwFkAHr/wf73/Sr9yfzU/AH9U/3k/Wb+xv5Y/+n/TwD6ANYBUQLPApUD8gMBBH0E0QSWBH4EWwS+AyQD3AJwAu8BpgEzAXQA1/9E/4X+/v2j/SP9qfxa/AL8rPuX+5z7pvsD/Hz8rPzf/Ef9m/3M/R3+hv7Z/lT///9tAMoAYQHyAVwCrwL4Aj0DSwMFA44C+wFDAYAAxv/n/h/+xv13/Rn9Ef01/YT9E/50/tX+nP9XANMAdQEEAn4CPAPZAwkEUwTDBNEEgQQpBMIDVQMQA8YCVgLfAW0B7gBDAIP/6v5y/vb9Vv2a/A78p/tM+yX7IPtP++P7dfyk/L78Lv2p/ef9OP6g/gH/ov9BAIMA5gCTAQcCVgLBAvgCDwNJA/8CNgK4ATQBVACQ/+H+Lf7O/ZL9M/0Y/Wb91P1A/qr+Jv/H/2EA3ABmAecBZgIGA24DoQPiAxAEGQQuBAoEpgNiAywDvAI4At0BhQE4AeYAPwB+//P+Sv5z/br8L/zc+6z7gfta+4T77fs5/Hj8yvwp/Y/98P0n/nH+4f5E/6b/CgCFABEBqQEzAn4CxAL4AvsC0wJkAuEBZgHTAA0AMP+G/v/9f/0//SH9HP2Z/Q/+Rv7Y/ov/CgCcADYBmwETAtgCZwO7AygEbgR+BH0EWAQRBNMDmQM/A7sCPgLdAWMB0gA8AID/wP4O/jr9X/zV+4H7O/sk+y77Vfu0+y78nPwM/Yb93P0c/mb+sP4S/5b/AgBlANgAJgFvAeQBOgJ3AtEC9QKqAkgCywEHAXAA6P8j/6X+XP7n/Zn9eP1t/bH9Ev5x/vv+kv8bAKsACwFdAeQBTwK1Aj8DlwPxA1wETgQgBBcE0gOCA1kDAAOjAnUC9AEpAY4A2v/s/hv+VP2M/AD8p/tN+w/7N/uA+7L7Fvyh/BP9fv3i/RP+Sv6k/vH+JP+A/w8AiQD1AFEBmgHhATkCcQJhAksCLQLEATABnwDv/2b/Ff+b/iz+/f3Y/cb97/0f/l7+Cf+m/+r/YwABAWcB5gGIAtICPwPHA/MD9wMKBBkE6wPEA5QDIQPEAocCDwKFAQUBbgC2//P+JP41/Z38R/zI+4r7qPvQ+xz8dfzB/CL9mf3l/d79D/5w/p7+4P4S/zX/oP8AAG4AvQAgAaQB0gEHAg8C3gHQAZQBJwGzADwA6P+B/xD/n/5H/jv+Nf4s/ln+rv4q/5L/y/8jAI8AIAGvAf4BeQIXA3ADoAPAA9wD/wMQBOADcwNLAx0DnAIsApYB+ACHAOD/8/4X/oj9Bv17/Cr87/vn+0z8mPy8/Az9j/3g/f79Gf4q/m/+y/7z/vT+Df9S/4z/tf/6/3IA+gBeAYsBZQFBAVYBKgHOALEAhQBHAA4Agv/k/rz+xf6J/o7+w/7m/jv/gf98/8L/ZADhAEIBuwE7Ap4CCQM8Az0DaQO/A8sDigN0A0gD7AKuAjECggEfAa8A7v8v/5b+Bf6X/Vv99PzB/OX8Cv03/Wf9wP0S/kL+af5o/j7+aP6L/nP+ov7U/v/+Q/9i/33/1f8xAH8ArQDPAOkA3gC8AHYARQA8AA8AwP9+/zr/C//5/un+1v4G/1v/hP+f/83/GACFAP8AQwGEAfsBXAKHArQC6QIhA1kDUQMXA+4CygKLAkcCCwLGAXgBDwGHAPz/YP/p/o7+KP4B/tP9pP3i/Rn+Av48/nP+b/6q/rv+gv6O/sb+jP55/pn+k/6y/tf+8/75/kn/lf+s/9X/8v/j/+L/3/+Z/4H/jP+G/33/XP8u/yX/Iv8I/xH/Nf+J/+r/JABPAIQAvQD0ABgBUQG7ARICZAKzApUClQLRApYClgKuAl4CVgJWArUBZAFDAaAAsgBPAJL/w/9G/+3+Jf+m/qT+ZP8V//v+mv9T/2H/wv8n/+3+c/88/xX/Bv/V/gP/9f7X/uj+4v7p/kT/Mv/u/tn+yP6p/qL+hv48/q3+rf5h/vX+yv6i/lD/Cv8U/5//0/86ABEAWACzAIMAKQEUAfYAiwFeAakByQHRAcsBfQGvAdwBfgFsAdoBqgGVAS0BDwFbAboAYACVAOT/EQAbAPn/PwCH/+r/CAANAEIAkQAxAK//kACw/4H/owDI//v/5f/v/v//V//r/ub/8f4v/9//pv5N/pH+Mf7Q/Q7+R/7L/ZL9Ef7q/Q7+aP7+/aX+q/65/iD/D//Z/6j/zP9wAPX/lQDlAIkAHQH/ABEBbgEVARkBWAEkATcBewEZAVgBCAFGAV4B3QA0Aa4AzACjAKwAVQCOAOsAKAB8AFkAfACrAM4A8wCdAIkAwwD1ABQAbwCrAOf/IgFZAH//cACz/5n/9/9D/0//Qf+0/pb+Kv6I/kP+2P0f/sv9JP4O/vL9kf49/mX+gv6Z/gf/rP6M/23/9v4sAFf/JP+qAOn////VADUAygCMAFIAHAGQAAABQwFAAA0BRAEeAC8BYQFhANUAOAGaAKEATgGrAM0AbQE8AaEAagGoAZkAYwGhAewApwBEAcQAGwAZAYEAlQA6AQcAFQB6APv/cv/E/5z/1P6P/1n/a/7+/rP++P3l/rP+Hv6I/mP+7P2X/uf+zv2R/pz/W/66/u7//v5N/9//Wv9Z/8f/+f9M/7z/aACq/zgAygDU/1MAogDc/6YAzgD0/4wAvAD+/3MAIQFeAIAAfgGoAMkAkgHOABMBGgI6Aa4AwgEkAb0AVwE8AQQB+QCgATIB6wCAAb0AzADEAFwAiABIAHgAZP9v/63/vP6Q/+/+bP48/7/+Yv6//pr+XP6D/v7+qf6+/qj/8f7o/oX/Jv83/67/xf7e/sD/R/+t/3H/I/+x/w0A1P+G/93/j/+4/zAAy/+u/4MASQDp/6sAigCDAOYABQHeAPMATQHOAJYASgG4AGQAcQHNABwAMgGFAaYABgGuAdUA9QDsAR8BjgBmAQEBjgCKAJIAaAC0/ykA6v8Y/67/Zv+T/lz/Qv/8/hr/Kf9H/+v+gv+r/1L/Gf8U/yH/NP97/+j+0P6D/2r/7f5E/6j/J/88/z7/L/9M/1r/yP86/37/LwBp/8H/IQCJ/1wAZgAxAHwAx//NAK4AGgDsAMT/lgAsASUA0gCeALQAXAH+AA4BZAFDAYcBZQFnAaUBUwH0AMMADAFSAFIA1ADG/yoAAAAMAOr/Ov8OAF3/4P8QAKb/KwCx/0j/+v97/+/+uv8a/2//e/9d/yr/8f5h//H+YP97/zH/Vf8i/wv/LP8H/2D/KP/4/l//uP+z/3L/3P9h/7H///9E/wsAOgCw/0AAUgD4/zQAaQAxAHwAdgB5AMwAigAMATEBlQFjAawAPAENAZAAogASAa0AbwABAVAAXwDSAHgAagAFACgAjABWAEwAcgBuAFYA+f9RAJMAAAA/AEoA2f/I/5v/w/+//y7/ef8y/+7+H/+t/sb+PP8r/7v+QP88/8T+cv+r/+n+Lf/M/6L/vf/H/57/uv8TAJr/Ov+t/5v/Q//D/5L/IP/E/wEA2f8SAGUAPACgANMAcABaAMUAxgBJANYAxQB3ACgBGwGmAN8ADgHGAMMAsABfAPUADgGuANcArwCaAIIAiQA+AC8A0gBZADIAcwAYAAUAHAD8/6z/qf/i/43/NP+C/zr/IP9X/9/+2/41/zL/kf+n/zf/qP+S/1H/zf+1/4f/2f+f/0r/df9N/3T/if84/4r/bP+N/9//S/++/93/rv8NAPj/AABIADgAFACFAHwAlAD0AAkB5AC9ANAAjgBuAI8AdwAZAGoAkAAKAE8AswBfAGcA0ABxAFgAhAByAHMAZACXAFMAXAC2ADUAOQBXAPb/4f8HAMn/g/8WAM7/X//G/6H/Sv9w/3j/Bv87/4//g/99/4b/T/8k/4//fv+T/xEA9P/5/7//Lv8a/wn/5f71/v7+K/9L/1//j/9K/9X/RwA9AOwAwgCYAPwA3QAjAQwBwwD8AOAA0wBcABwAUwDw/+D/uP+W/yoA+P/O/zsAWwCRAPkAHAHjADkByQE/AboAGAGQAGEArgD1/4H/Wv9y/wH/oP4L/9j+j/9QANT/fQDdAJ0A0QB0ABIAyf/h/8z/Hv8t/yb/xP4L/87+f/79/jj/Sf8u/0z/mv8XAJMAAADl/zAA4/8eAB4Akv+i/+L/i/8F/zj/NP/x/mz/UP9v/1YA6QA+AXoBwQEHAuwBXQHAAFIAeABeAOz/rP9K/33/m//X/tT+ef+f/4kAkwFeAZcBFwKEAcIAyQCTADcAEgFPATkA8//z/yT/qv6e/jH+dP55/3f/TP8oANIAqwCsANYAmwDsAF0BsQAIAPj/Tv/I/nr+nv1k/Zj9Lf62/tr+Z/8DAEoANQAbAPL/LgDVALcAfgCbAIcAUgBo/7f+jP5T/rP+tf6m/h//Vf+6/+H/0f9JAPQAYwEiAfIAQgH7AM4AMAE7AbsBPALhAfoBHwK0AW0BMgENAa8AdwAcAH//GgCOAI8AFAEuAVYBWAG4AP3/pv8PAAYABwBFAIf/Uf9y/2z+E/57/qX+AP8M/3r+zv2l/cT9qv36/fz+mv/q/8H/b/7E/e390f2u/cr9Af4B/gv++/2+/TH+ev+cAGUBJgI2AlECzAK/ArMC2AJiA4QDsAJ/AREB/wCkACMBFgE5ATsCPgJqAcYAewCFAOsADwHKANQAQQHMAD4ADgCu/yoA2QCCABcAWQDx/8z+0P36/Jz8Qv0j/kL+3/7+/1MA1P9A/8v+Hf6w/f/80vtm+6H7RPxl/Xf+iP8dAW8ClAK5AhUDSQM9A/UCdAKjAckAtf8c/43+qP77/4cAfwCXAEgA8v++/wz/r/6+/68ACwHXAUYCQgJjAi0CugHZAUICVwLvAWAB4QAxAJH/3/4s/kv+5f4e/zf/d/8qAOUAMwEzAZIA9f+K/2D+/vxA/MH7+Pqf+tH6kPuC/U7/fgDTASUDgwM0A9wCHAL8ATACoAHMAO3/A//H/qH+6v1R/iT/af/c/6b/Ff8q/0z/Q//D/2wA5gDbAT4CMwJyAgIC4AEeAqgBawFiAdQAbAArAK3/lP/0/+3/2/89AGwArQD3AOIAMAEyAYMAev+6/T78Pvs3+v35Ufq/+sv7a/0n/6MAKwJuA8sD6QMUBPcDJATOA+gCZQJOAZP/qv5Y/p79OP7h/zwAPQAAAK7+N/5H/vv9av4XAKwBlgInA4IC3wEcAZD/gf4P/g7+5P70/74ApgEGAu0BgAG7ADUAOwDCAP0AxgA+AHj/w/4v/tn9Cf7C/i//wP6O/TT8I/sU+pP5/fmH+9z9VgDKAtgEdQYuBzEH9QXGA7wCrgEaAJz/S/8O/4D/bP8D/3n/VQBVAEYAOQDk/xkA1/9G/xj/Pv+b/+r/QACqACEBBQGuAB8AZP9u/5D/sf9JAOoApwEmAhACfgHjAE8AYf/D/nz+Yv5k/n3+rv6c/on+J/4q/fn7zvoW+sX5ivpp/Iz+1gDIAhUEFQXABV0F2AR0BGQDOwL0AC7/CP4e/nD++f5IAL8BCAPBA4EDgAIxASYAsf6C/Tj9gP10/qT/fADlAMMBNQKmAQ4BQABp/0D/Vf9c/9j/1ADDAQQC5wGlASwBywA+ACX/Rf7A/RX9/vwV/ff85fyS/Hf7CPo1+ef48vmm/H//bgIcBeQGbAemBlsFqgM4AtAAv/+N/mr9mP1P/gb/4/8gAWcCnwNlBBMEJAP8ATEAqf5m/Vb8iPzX/Wj/lQDYAUsC+wF3AWoAUP97/o/+/v7l/4YB/wK6A9EDdgP6AYUApv+B/vL99v2V/Xn9oP2J/RP9g/zh+/76XfoF+k36qPvt/ZUAQwM2BQ8GhQZtBg0FXgPTAX0ALv8O/k79rPy3/IH9Hv+HAC4CwwOIBLwEmAOtAb3/T/5b/Tb9vf2A/hQAMAFRATEB1ABmAL7/s/82AIMA0wBJAdABHAKFAtMCYgKoAcEAbP88/mP9sPx4/P78b/2o/Zb9ufyy+3X75/rD+of8Nf6p//EBawS6BXgGwQb4BZgEmQKTAPL+hP1e/PL7TvxL/az+aQAxAmYD3QNbBOsD8QGDABL/2/1e/SL9ef18/uz/uABGAWcB3wDw/9L+LP5l/tj/fgGCAnADTwRpBHsDEgKVAAn/yv24/CX8HPyF/GP97/0i/nz9jvzj+9v6sfqR/Mr+8//bAXIEFQb5BvoGFga3BPAC9gAY/7X9cPzf+/37e/zj/bj/lwEbAxIEngTqBKkDkwF5/2D9n/yu/PX8s/0V/20AeAHdAVwBfwC6/x//u/5m/9YAKQLXAioDVAOzApwBbgBe/5v+//2J/Tv93fxy/HD8ffzx+2z7u/vt+xj8Iv4JAPoACgJsA7UE8wT+BJgEBQRDAwgCkAAP/9D9//zr/PH8pv1K/9cA2QGOAosDSgSgA0EC6QBg/4T+AP5k/WH9Gv5D/8z/9P8wAHMAwgCuAKYA8QCDAW0CuQIaAtQBrwEtATYADP+e/kf+xP17/X79cf1Z/RD9R/w++9D6APtz+xf9S/8VAbECXQRuBYwFKgWKBJ8DewLrAFn/Wf5t/cv8nPwY/S3+mf8YAVQCJgOsA94DDAPJAd0A7P9G/77+QP6T/iv/jP9y/yf/Hv94/9L/xf92AHwBbQJgA14DmgLxAaYByAC5/xP/vP58/tX9XP1p/bz9s/0l/T/8FfvP+sD6Afv9/CT/JQGpApAEDQZTBmgGZwVPBKMCaQCQ/ib98Ptk+wb87/w8/un/aAGxAosDFgS8A34CLAEgAAUAzf9u/0v/R/9V/9T+NP7x/Qf+Xv72/rL/5QCgAr8DKwQkBGoDTAILAZH/Z/72/eT9uf18/Y39xv3A/Rr9lvwC/Mz7WPv2+yL+7v/EAVUDGAUKBiAGcgVjBBEDQAEKAOr+uv0B/Qb9P/2F/UT+9v7y//sAiQFqAscCkwITAjkB3AClAFcACADi/8L/PP9P/oH9Ov34/Pr8h/3d/sUAzgJcBBYFXgWqBB0DSQGQ/13+bP3O/P38S/0b/c/8e/w2/Iz7N/ty+/v7mf5GAQYDTwSSBVYGgAU7BN0CtwFjAHT/9f6H/vz9uP3z/fT9LP6p/ov/pACiAZsC8gK7AmkC/gFaAbAAsQCpAH8ALwB8/wL/VP6V/Sb96fwe/ST+tv96ATwDigRFBSQFGQQtAi4Azf6S/Vb8qPt4+0P77Pqw+mr6nvqs+vn7Ov+HAVkDawVhB4oH6AXcA0YC1gAf/zH+HP50/s3+jP/r/+X/uv9T/+b+Uv5V/lv/ZABqAY4CpwMnBMwDdAPRAqwBMAAb/0z+MP0k/Mr7AfyY/Jb9yP45AM4BeAO+BFMFYAXgBOADOQImAFT+9Pyv+2b6Z/kr+WH5evka+rP6w/wIAIcCeAQmBr4H+geJBgME1gEVAG3+HP2k/A39+/0t/6z/uf+u/67/lv8w//3+ff9pAGQBaAL7AksDngNcA3kCGwH9/0X/Zf5X/UP89vt+/BH9xv36/ncADwJzAzIE0QQdBbEEiwMKAocAD/+3/RX8qPqB+Sr5+PgW+Zb5QvuI/ukA2QLhBPsGyAfwBlYFhgOsAY7/8v0B/XD8iPx+/Zj+Yf/P/1AAowBGAKb/dP/r/4kATwFUAh8D5QNGBBAEHwOIAScAzf5S/fT7SvtQ+9371vwU/uH/kQHKArsDcQS7BFcExAPEAm0B5v+L/lr90/tN+nH5A/nl+AP54Pma/BL/RQHEAxUGigdTB0oG5gT4ArQA3f5n/Z/8evzx/LH9cv5h/ygArwDMAMMA8gDkAMQAJgGzAQgCdwLsAhsDAQNXAlAB6v+Y/mv9avzq++v7ffxG/Xr+DQCTAcwClAMeBCcElQPOAswBsQBj/xX+Ef0v/G774Pog+uf5//nU+qv8c/6uACMDkAX3Bh8HZAYQBVQDEgHg/jL9Rvzm+xv8xPyk/c7+4P+aACwBjgHDAfwBGwIhAjcCQAJkAmgCOgL+AX8BsQDm/8L+nf3T/Ir8xPz7/KD9i/7e/zQBegK4A4AErQRnBMADUAKVALr++Pxu+0D69vnh+RL6lPqS+079iP7Y/3QBNQOnBC4FMgW7BP4DqQLIAB3/E/5I/dP8n/zo/Nr9wP6c/0sABwG9ASoCUQJrApUCewJLAgEC2QG2AUcB5gBrANb/O/9f/qL9dv2y/df9J/7u/tb/9wAbAiMDwwP+A/cDRgMGAk0A1P5c/dL7n/oA+un5RvoL+wL8cP2r/tf/6gDbAckCPQNTA1QDWAMCA/UBrwCu/8f+l/2W/HL87fyh/X/+vv8bAT4CDwN1A4kDWgPpAlUCmQH9AJQARQD5/9r/qP88/9X+fP5A/hT+Fv5+/j//HAA5AX0CpANnBIkEIAQoA6EBzf8r/sL8ifvX+q/6xPox++/7yvzj/dP+tf+aAG8BQgKFAm4CgAJ7Aj4CswHqAFsArv+//tf9OP00/ZP9GP7t/icAkQHBAmoDowOqA1QDnAK8AdcAJwCo/2D/O/8U//3+Dv8d//z+uf6V/t7+W//d/7oAAgIuA9gD+gPCA/cCiAERAKL+Y/1a/Mn7m/t/+6P7BPxu/Ov8z/3B/qf/egBeAS0CZwI+AkUCNQLlAUYBmAAIAGb/5P49/tP9tP0h/uP+nf+gANAB2wJKA2QDUAPiAjYCVQF5ALD/HP/X/pH+Vf54/tT+6f7i/kj/AACiACIB4wHEAlUDgwMtA6AC0QGgAHP/QP5b/dv8h/x2/Jv8yfz3/Bz9Lf12/eL9bf4V/8b/rgBwAcwBEgJSAmkCGQJuAQEBogAWAHT/4P69/tP+6P47/+n/1gCiASoClQLwAgkD4QJqArEB7wAgAFv/nv4P/tn9x/29/fP9l/5l/zsASwGJApwDKAQlBKkDxgKCASMAu/6M/dr8ivx+/Ij81/xM/aP9r/2R/Zf9u/3Q/fn9cP4X/6v/MwDFAEYBmwHMAd4BzQG8AXUB+gCNAFAAGwC+/33/m//h/yMAYgDIAFUB4QFEAnMCcAIoAroB/QAkAGv/x/4q/sb9xv0o/tf+2f8UAUkCWwMPBCkEzwMaAxQC6gC1/6n+x/0j/eH87PwQ/VT9pv32/Qf+5P25/ab9sv3J/fH9N/6w/kL/xv8UAF8A2gBsAaoBngGhAbwBjgH5AEYA1/+h/1//OP9n//P/qQBeARECqAL7AvcChgLaAQ0BHQAx/27+7v3H/er9av5V/4oAzAHWApsDDwQOBJQD0QLUAbUAnv+X/rv9Cv2P/H/8wPwZ/Xv93v08/mf+T/42/hn+7P3e/QH+Uv7A/kL/+//jAMABYgLEAvQC6gJyApEBjwCv//v+Vv7s/Q3+s/6C/08APgFIAg8DVQMxA9UCRAJUATcAOP9z/t39g/2X/Sv+Bv8XAEEBTwItA7EDxQN1A94CEgIeARYAE/8Z/kH9oPxG/E38ovwX/Y/9Cv5g/oX+a/4k/vX99v0T/kv+t/56/3EAYgE8AuICYAN7AwsDMwIpASEAGv8s/qP9pP0o/un+0P/pAAUC5QJeA3gDTAPHAgUCKgE+AF7/mP4l/g/+Mf6b/kr/KAATAdcBhgIUA0YDIwO2AiACTgFMAFL/b/6d/eb8hPx7/Jz8vvz//HT90f3n/e79Af4z/ln+e/7X/nH/PgAJAb4BdAL3AhEDvQIaAlcBaABX/3D++v3m/R3+m/53/4IAcgFDAuECNANCAwMDfwLzAToBawCz/xz/yf6P/ov+w/4g/77/gwBEAQIClwLaAvICxwI8AnkBhQB2/1v+S/1+/PP7vvvV+xr8hvz9/Ef9ev2y/eb9SP61/iP/y/+CAEAB/AGQAhQDNAPrAm8CngGcAIr/nv4j/vX9F/6n/nT/aQBUARcCxwI2A1gDSQPsAl4CvAEGAWIAwP8+/wH/2v7C/sj+C/+a/1AAEAHgAZ4CDQMjA9UCQgJ8AXIAXf9Z/lj9ivz2+6v7qPuz++T7LPxx/M38Of3N/ZH+dv9mAEoBCAKgAhgDNgP1AloChwGkAJH/gv62/Uj9Z/3K/YT+iv+tAMcBeALuAjsDQQP0AnAC4gFdAeQAZQDw/6T/fP9s/0P/Lv9b/6f/LAC8AIABXwLtAjIDDgPAAjUCWwFVAED/Sv5i/Yr86/uY+477k/uf+877Lfyw/CL92v3g/gIADAHpAcoCXQOIA1YDxwL9AeEArv+d/rD9G/3s/Fb9MP4z/20ApgG7AlgDfgN/Az8DswLnAToBzgBcAN3/jf+I/53/ev9W/3f/o//d/xwArAB2ARUChwLIAtcCmgL+ARsBIAAS/xH+Fv02/Jr7Sfsp+w/7D/tS++v7pfyR/cz+UwDNAfACvQNKBHwEIgRBAw0CzABz/yD++/xh/HP8//z6/Ub/ywA8AkgD0gPvA6cDCgMwAjkBegD3/7r/n/+p/+n/HgAmAPP/yv+1/57/jv/D/1oAHgG8ATQCqgLnAsoCIwJBAUQAMP/5/af8ivvB+jT6vPmU+dT5m/qy+wD9mP6CAGEC6QP2BHIFfgXjBK0D+wE9ALL+WP00/J/77/vo/DT+pf9lARsDUgS3BIAE+AMWA+cBrADO/1v/Rf9d/5X//P9lAJsAfwAyAPP/uf+C/4L/1v+QAFYB/QGRAvICEQO3AvMB6gC5/3T+JP3e+876GvqQ+Uj5TfnD+ez6c/wr/iYANgIdBFcFwAWnBRQF+gM8AlEAy/6J/Yv87fsc/BX9SP6j/x0BtgLoA2kEWQTZAwwD8QHFAOD/b/9g/53/+/9zAMIA0gCXABQAff/2/sv+A/+R/24AjgGtAmcDrgOUAx0DPAL3AH7/Ff68/G37RPpy+Q75+/g3+b/53/pm/An+0P+0AZED6gSQBaoFKwUdBJMCtgAU/7X9s/wv/Dj8BP02/qj/LAGNAq0DNwQwBLMDwwLHAfAAPQDJ/7j/GwCYAOgACgH6AKgA9/8N/0P+6f0j/sb+u/81AesCMgTfBAMFcwRGA7cBzP/T/VX8TvtC+oX5VPl5+bj5+vm1+g38jv0h/9IAlAI9BC4FRwX0BFAEGQNTAZL/Vv5z/bn8bvzf/N799/46AKUBtQJwA9kDswMLAyACZgHoAFwAAwAfAJ4A+QDMAF4ABwCj/7b+r/2I/Sj+MP9VAMcBugM8BbEFWQV2BDMDXQE4/1r92/vh+kf63PnA+dv5N/qo+tv6kfv0/Jn+TwDnAZMDMAXlBWkFhQSEAykCKwBD/jf92Pye/K78hv3q/mYAqAGbAlkD0wOSA7gCxgH5AHEAOABHAJUABwF9AZ0B/QDx/w7/MP48/aH88Pwd/rX/cQFEAwoFNwZQBmMF4AP7AcX/fv24+7/6V/oy+k36nvry+kb7Xvt/+z/8e/3p/nMANwINBEIFhgUPBSME4wIuAVf/9f1K/QL9CP2S/Y/+4v84ATYC1QIcAzAD3QL4ASEBxwDVANYApwDaAF8BkgH0AO3/L/+S/rn93Pz3/Dn+5/+SASgDygTrBQcGHAV4A7wB4v/l/Rj8z/pU+jD6HPom+jj6fPqx+uv6y/te/Vz/kAHAA5kFsQa0BswFOwQxAhQAMf7n/Gr8fvwr/U7+bv9/AJIBXQKdAnYCWQIlAp4BEgHeACUBigGUAYsBuwG9AQEBrP93/pX9y/wj/C/8VP0z/zoBNAMBBU0GswYQBncEYgJcAF7+l/xE+7P6rPqc+on6avpY+nn6VPqe+un74f0zADUCRQQGBqcGJwapBOQCMgFc/939Av32/Iz9Sf4l/wIA9QDXAUICTwImAiUCKwLOAUgBLQGDAaoBZgEZATwBHgE+AAX/Hv6g/Sr90fxS/fP+DQEIA5EE3AWABiEGsQSqApYAkP7N/Fr7cvo8+lz6fPp9+kT6ZfqA+sv6zvuF/ej/RAJsBAcGwAZABuAE/gLzAA7/rP0T/TH9u/1+/oH/dQAtAYsB5gH+Ad0BsQF+AWkBRAFKAZEB9gEFAuUB3QF7AWQAFv/9/fL8C/y6+2D81f3O//oBKwQoBjYHDQf1BSAE6AFg/wb9b/t9+h36Ovp/+rb6mPqU+rL6i/pA+/r8Of9jAXcDhwW6BoYGJwVCA1gBrv8V/jn9YP0P/tD+nP9EANQAAgFGAasBlQGeAcoB/wHTAVoBZAGpAY0BUQEJATEB7ADI/4v+lf0X/Y/8bvxu/WT/nwG8A40FtAa3BrMFBwTTAXf/b/0o/Hn7Bfvk+hH7WPss+6v6bfqP+tX6+Pvu/ScAbwJiBAAGSQZsBdkDFgKnABT/1f3F/Wn+8v4g/4j/PgCJAK8A7AB/AfEBRQKAAncCPwLEAaIBhAEYAcMAvAC5ABEA4v7q/Un9xvxC/H38Dv44AFsCTQQqBn4HaAcFBrEDHQG5/l38evqs+fv5sPoy+6D7A/wv/BH8h/to+2z8+/2f/4oBXAPKBGoFzwSaA0sCIQEWADT/v/7V/vr+Ff8M/y7/yv+zAI4BCQKmAkMDQgN9An0BuQB4AAYAqv8BANUAMQHIABcALP8j/tn8Ifyz/F3+qQA3A4oFRAekB5sGhwS7AfT+dPzD+gL6B/rQ+s37lvzx/KD8UfzI+9n6C/ug/IX+XwCUAvkEKgbUBXsE9ALUAUQAhP7c/V/+8/7l/ub+gv8zAK8AFwGzAW4CCAMrA6gC3gE5AdkAXADg/9D/OgCzAF8Ah/+b/sH9B/09/Er85v1yAAUDMwUoByMIdAdZBWUCgf8J/SH7/fn5+ez6BPzU/FP9Wv3//Bv8EfuM+kb79fz6/i0BUQM6BfcFRgWpAzUCFgHV/7v+Uf6w/kv/Z/9N/57/WwACAUABmAE/AvcCAANLApsBTwEcAXAA3/8SAKgAzgAiADv/gv7F/ef8UfzW/LH+BwFGA0sF9wabB5YGVwSKAe7+vfwI+2D61fry++v8Xf2d/Sv9LPwl+0b6S/pP+yv9a/+bAagD6QQDBUYEEAMEAjUBUwDB/57/w/+L/9b+aP6a/i7/6f/hAEQCngMlBOYDFAMMAvkAGgBb/y7/wf+IAJoA8/9h/5L+nP3L/ML8D/5cALICtgRKBkwHsQaTBA8CZP85/W37g/rE+qz7uPx3/dL9y/3t/M37zvpY+jj7u/yv/ugANgPIBBsFggSDA0ICIwEwAIr/f/+s/9b/jP8//yr/P/91/w8A5gD0AcICKwPoAlwC5wE/AaUAKABMAIcAbwDs/y//Tv5g/ab8mPxO/cD+GwGmA68F6gb7BsgFiQOvAP39FvxI+0P79vs5/Uz+jP4O/lH9O/zh+tr5u/kj+wP9//41AWgDFAUkBUUEEQNNApIBYwDV/wkARgD1/xD/xf7//iz/if88AJ8BswIqA/0CqwIsAjgBfAAIAPf/NQB2AIUANABR/yr+Pv2w/K38kf2D/zYCegTnBc4GtQZGBZYCxf+3/Ub8lfuk+5r8B/7N/vX+uP7t/Z38Avso+sL5Afp4+1/9d/9LAc8C1wNPBAQEiAMxA8wC8QH4AFMAU/8I/hz9J/33/Sr/qwCvAjEE4wR6BIMDeALoAMD/FP8Q/63/VQDAALUAGAAF/7z9w/x9/Dz9Dv+6AXEEcgagB0oHewWkAoD/FP14++D6NftT/Oj94v44/w7/Yf5G/Yb7JvqC+aP54/p8/In+/AAqA60E/wSLBDEEpQOLAiMBGADG/zH/C/46/Zn9rP6W/7UAfwIfBLIEJgT1AroBfwBP/7f+CP/t/9oAPQEWAWYANP/M/a/8W/wM/cr+YwFwBK0GpAdRB44F0QJp/3/83Pp6+k/7uPyB/hsA2wCeAIn/8f0F/G/6JPl6+ID5SPsp/XX/FwJ6BIkFmwVlBbUEjgOhAbf/tv7k/eH8O/wH/dL+RQCQAVcD0gTuBOMDsQJiAcf/hP5B/uH+2//aAE4BfAH4AKD/If4q/Q39cf3//l4B1gPdBeYG0wZnBdAC5v9e/a376voz+7P8eP4uAC4BSgGDAPz+5PyW+gT5Efhk+MP5uvtG/jsB0wOQBUsGVgbcBYYEmAJ0AMr+dv1o/Kn7Fvy1/bb/lAFwAx4FrgXuBDEDWgGj/2n+1P0k/m3/ywB6AZIBMAERAHf+DP2a/Bf9eP6cACsDeQXdBuUGpwV0A3IAi/19+6v69/oE/O39GgCMAccB/gD1/z/+sfti+f/3z/ea+AD6U/yF/9ECRAVMBqQGfQY9BTQD7AAN/9z93vw3/GH8sP27/44BQgPSBJsFHAWWA9QB//94/r792f3V/iIAJwHJAdIBTQEFAHP+fP15/R3+W/9XAZkDXwXrBWIF9QPmAZT/U/3a+4n7J/x6/SX/2wANAkcCsAEDAKf9Ovsg+Yr3xPaZ9+f5w/zG/64CDAWSBqAGcwXqAyUCQABh/hX9rPzD/Hb9rf5iAFwC2AO3BOIEDQSkAtsAKv8V/p39IP5U/4gAZAHVAfMBSQHR/4r+Bv5O/kX/3wDWAq0EvwV9BTUEWQIFAIj9qPsy+/j7Gv3K/ucAmQIYAz0CrAB3/tL7Q/mX9xj3yfeb+Tj8Gv/2ATwEggXWBTAFEwRLAm4Az/53/dP85vyp/cb+QwAZAp8DXgSgBDcEFwNrAdH/q/70/fH9qv7b/84AWgF3Af4AEQDu/jH+Qv76/icAjgEiA4IE2QQWBOACdQGq/7L9gfy8/Iz9jP4CAMkBIQMSA/UBiACN/iH8zvlL+PD3Tvi6+dT7Lv6GAHICvwMyBOsD9gLWAbAAh/+C/hX+gv4S/47/pwAjAjUDnwOHA2MDqAIZAYv/t/5l/hv+Qf5U/6YAQQE9AS0BFwF8AJv/Zf8cADEBBQKxAncDxgP+AncB8v+j/oj9qfy5/NP9Uv/GAPEBswKoAqIB5//d/eL7UPpH+d34SPmY+l/8Qf4YAM8B4QIjA+UCKQJOAU4Aef8S/9/+Ef+F/yoAJQEcAhsDsgO8A04DVQJYATcAKf+u/rf+MP/d/08A3QAsAQ0BwwApAPj/FgBXAOgApQFSAsACkAL4ASYB9P/y/iz+9/1D/u/+BAAfAfIBVwIbAkMB+v97/uz8gPuB+hn6DvpD+hv7XfzO/Q//IAARAYwBigE5AbAAOQDr/6r/kP+6/3sAPQHOAWICHwOFAw0DXgK5AdkAr/+z/mX+h/7E/nj/XAAqAbUB3QHNAX0BLQH3AOkAIQGLAfkBJgL4AY8B1gDJ/9H+TP5w/gT/3v8TARwCqQKAAsABrgBe/w/+1vzX+yH71fq9+sr6WPtM/Hb9iP5//3kAGAEZAbsATADk/3L/Mf9d/+z/vAB5ASkC2wI7AyYDsAIPAmQBeACM/wz/2v7O/gf/pP9zAAcBWgGLAaIBfwE1AQ0BGQFQAXQBXgE7ARwBpQDn/1j/Qf9Z/3P/BgD0ALsBGgIsAhACewFaABf/8P32/B/8kftt+4H77fuU/FD99/2b/k3/nf+L/1n/Pv8X/9D+vv4P/67/UAD7AMgBmAIpA14DUwMNA3ACewFoAH3/3P6A/nv+8v7X/9MAgAHeAQAC2gFWAbwAeQCMALkA4gAdAU4BNwHLAE0A7P+w/7P/CgC2AG0BBgJtAosCJAIxAQ8AB//8/fT8Vvw7/Ef8Tvyb/Cz9tP0N/l/+m/6f/mT+Ef7W/cP91f3+/X7+W/9fAFIBPgIhA6kDxQOFA/cCGQITAR8AdP8W/wf/R//a/5oALgGFAa0BpgFQAdYAkACLAKEAxAD3AAsB7ACWADYA5f+2/8f/KgDZAKsBWgK8As4CgQLIAbcAkP+R/sr9S/0s/WT9z/1A/o3+mv5r/h7+vP1L/d/8kvxc/Dz8P/x9/P78yP3T/hIATAFmAk8D6AMDBJwD5AIAAgsBMAC9/7r/BQB7APMARQFjAVIBDAGSACQAAAAPACMAQwCXAPgACAHHAIgAeABwAGQAkAAZAcgBTAKDAn8CSQLIAf8AHQBq/wL/yv6u/sT+C/9P/1//QP8K/7/+Rf6j/fn8YPzH+zL70PrT+jz7/PsN/W7+9f9QAVYC9gIzAwkDgALKAScBuACZAMkANQG7AScCXAJAAtQBNQGXAA8Ar/94/3n/t/8EADYARQBJAEUAMQAjAE0AvABGAc0BNAJ3AnECFQKDAegAaQASAOv/9v8kAFsAkgC1AL0ApAB1ADAAvv8T/zr+Pf0m/An7H/ql+aH5E/r5+k381f1E/2IAIQF1AWcBFgG1AHwAiwDoAIABPwL8AoIDqQN0A/ACJwJAAXMA1f9k/yX/Iv8+/0j/SP9g/4r/qP/c/1IA9QCLAf0BVwJ9AkgCywE8Ab8AZAA8AE0AkwD1AFQBiwGMAWsBPwEFAbUAYQAYALr/Dv8g/hn9D/wU+1H6CvpV+g378/vl/Mr9eP7C/rX+l/6d/tX+Sf8VADABWQJaAw8EcwR6BCAEhQPKAgoCYgHKAEcA7v+n/2D/Cv/K/rH+sf7T/jT/3/+SAEABsgH0AfUBvAFQAbsAlgCBAJoA5QBiAeoBEQI3AiEC7AGIATEBBgHNAKUAWgANAJj/+P5J/oz9+fyG/DP85/u7+7j7pPt2+1T7YPua+wX8sfyp/cv+DQAzAS4C/QKcA/0DDwT5A7YDUwPMAjsCpgEFAW8A6P9y//7+tf6j/r7+A/9e/8//NgB/AJcAhABxAGMAZwCKAN0AUQHCARoCTwJgAkICAwK9AYIBWQE2AR4BDQH0AMAAgABMACUACwD6//P/2P9//93+8P3Q/KH7ifrK+YL5u/lN+h77Gfwa/Qn+2P6a/1oAGAHGAWUC9QJhA6ADpgNzAxsDpAIlAqIBLgHXAIkANgDi/4z/Of/z/r7+pv6h/rL+4/42/7L/RQDlAIwBGAJwApACigJuAjUC6wGiAWQBNQEXAQQBBwEkAU4BcgGGAYoBhAFZAfoAcwDC//v+HP43/WT8u/s6+9/6r/qs+tj6IPuD+wr8vvyX/X/+ZP85AOwAdAHPAQwCOwJpApcCvwLXAt0CxAJ/AgUCZQG1AP3/S/+1/lb+Mv46/mX+sf4b/4r/9f9ZAL0AHQFrAaQByAHYAcwBpwF1AUYBKgEoAUgBhQHWATACfwKxAroClgJLAuABXgHSAEsA0P9W/9/+Y/7g/VD9tvwj/Kz7Xfs1+z37d/vR+zj8o/wS/Y79Ev6q/lb/GADhAJwBNAKbAtcC5gLNApACRgLvAY0BGwGgACcAtv9R/wD/1f7L/uP+Ev9M/4n/wP/o/wQAEwAjAEAAaAChAOoAPQGXAekBKwJfAogCoAKuArUCswKpApACXQINAp8BFAF+AOT/Vv/n/pT+Xf5D/i/+Ev7e/YX9EP2N/A78uvun+9b7TPz0/Kb9Vf7r/lv/tv8FAE0AowD3ADsBgQG0AcUBywG4AYwBXwEiAeUAtgB5ADMA8/+l/1D///60/on+g/6Q/rn+9f45/5v/AQBjANcAVAHSAUgCngLiAhwDLAMaA/MCugKQAmwCNQIVAgECzQGIASIBigDv/1H/tP5C/uz9v/3b/f/9C/4u/lb+UP4X/rb9bP1Y/Tv9BP0I/Un9iP2l/ab9uf0V/pv+Gv+v/2MAKQHrAWcCgAKAAnICNgLWAVMB1QCaAGQAAgCd/zj/6f6w/lX+Gf5G/pf+9v5h/7v/QgDpAFkBxwFFAqgCIQORA8YD/QMXBNcDgQMXA4kCFAKbAQwBsQBjAO//ev/9/o/+ev6T/qn+z/79/kn/xf8IAO//yP+O/yP/f/6l/ez8j/w0/MP7qfvv+0z8m/zF/Pr8h/09/tL+a/8mAPYA0wFjAn8CiwKYAoACPgK3ARwBwABbAMH/I/+W/kL+J/4H/vz9Vv74/q7/VQDjAIMBKwKZAuICIgNiA7YD6QPoA+4D4AOLAwoDZgLKAVABwQAqAMb/g/9J/x//+/71/g3/LP9s/8z/JQB4AMAA2ACsAEoAwP/q/tD9wfwC/Jf7M/vA+s36mft2/Oz8O/2m/WX+Of+X/8P/NwDVAG4BwwGkAY4BuQHAAYIBEwGiAHoAWADS/y//zv6y/q/+i/58/uX+nP9NAOEAcAEWAsMCNANnA4UDpgPCA64DVwPxAq4CaQLuAWABDQH8APwA0ACSAH0AfQBmADIA7//K/9f/yv+u/7z/3f8EACEAAwDc/73/Zf/7/oj+1f0S/V38vfuM+zb7VPox+g/7ovvk+1b8zPyy/db+Xv+W//7/mQBiAckBrAHiAVkCkQKZAkcCuwGJAUYBnwDy/37/gf/I/6//gP/u/50AMgF6AYEBvwFXAtICEwMpAzEDhgPaA5UDFwPSAoACIAKhAfQAnACtAI4APgDs/63/1v8IAJn/Jv9y/x0AkgCSAFgAcgDMAKcA+P8k/3/+Mv7I/cb8q/vk+ov6gvqd+Uj4tvhT+iL7gPsK/PP8rf4FAB4APQADAR4CHAMaA6oCHQPNA8EDDQP8AVgBUQHYAOD/PP8a/3v/5f+s/4D/LwD5AD0BTAFuAeUBxAJ5A7EDzgMLBFYERASRA9QCcgIZAssBcwHpALoA+gDoAIAAGwDe/wUAHwDP/7P/BQBHAEQA5f9W/yD/7P43/of9EP2c/Fb88vs3+676a/pt+n76s/kV+U364vtS/IT8Bv3+/Wj/JAAYAFcAOgGKAmcDGgPcAowDCwSrA7UCrQFbAWYB1wAIALj/9/9oAHMA7//l/60AYQGEAV0BlQFZAhcDRgMjA0sDxwM1BBQEhQNbA44DYwO6AvwBbgFWAVgBqADr/93/8f/J/3b/GP8L/0b/U/88/xj/5v7Y/pL+BP6b/SH9jvw1/Mv7R/sF+336u/kA+rH6D/oI+ab5Wft1/Kz8rvx3/Sj/mQAiARcBXwHUAmgEWwSFA3kDAQRUBNYDtAL0AdEBqwEvAVsAvv/h/yYA9P/L/zAAvgAfAVwBnQEqAtcCVgPFAzYElATmBO4EkwQGBGIDywJYArIB/gCuAJQAZwAWAKj/VP8O/7T+hv5X/h/+fv7A/l/+Uv4p/rr9sf1S/Xz8+vvk++T7h/vX+v75mPmV+i370Pm4+PL5OPwp/cj86vxX/k8AjwGnAaMBXgLpAykF4QQFBP8DkwSrBLYDWQKDATIBBgFtAIv/Sv+5/yoAPQA7AKgARgGRAQwCqQIoA8UDSgSQBPwESAX0BGYE2gNrAyMDqwLcAWgBRQEJAcAAIAAm/5H+dP4U/pL9KP00/bP9Gv7v/Xz9Y/2X/Yv9/Pxn/Cn8Hvz8+8T7Qvvg+m36Rfr1+qf6Kfkc+Qj7nfz0/Ob8jf06/9cAUAFwARYCKAOCBCkFzQStBBAF/gSDBKMDewLPAWABsQAeAMn/iP+M/8D/BwBlAOMAWwEDAuYCowNjBOAEEQWEBfUFxwUvBYIEGQTIAxcDNgJaAZwAEgC5/xf/Mv6Y/YP9pP13/ST9K/3D/UL+Wf4N/qn9xP0M/r/9N/36/Kv8S/zY+1j71/pS+lj51PjG+RL68/iZ+Br6GPw6/U39ff0F/04B1gJBA0EDtQM5BWcGEQYDBVYEYQRyBKUDMgL2AGwAiwCPABMAkv+U/3gAkwHmAc0BQAIFA7IDOwR0BH0EuwQoBWgFPwWgBBQEtgMdA2ACwgEUAWgA6/+M/zb/tv4q/tf9of1C/fz88vz4/Pz8Jf0o/ST9WP1V/f38vvx3/AH8nftK+wn7sfrr+T352fmp+uz59PjF+aX7+Pxz/ab9qv57AN8BdQLbAnIDRgQ7BacFcQU7BewEgAQWBG8DiAKZAfcA+ADvALUAWQBJAAEBkQGPAbIBLgKpAlsDNQSyBMUEHgWIBbMFZQV9BNMDkAMgA2UCgAGDAPH/sv9E/1f+Wv3x/BT9/fx9/Gz8z/w8/Xf9aP0o/T39cv0x/dv8nfwR/HT7Qvvj+m/63vn3+PD47/kj+j35IfmR+nj8n/3N/dD9Mv+nARwDWgNeAyMExAXNBmsGdQXUBKAEbgTGA68CnAEvAS8BLgEiAbsAcwDhAIgB1AEUAlsCtgI2A+4DwQQlBeMEywROBawFWAVfBHQD2wKnAiUCzQCN//D+tf6J/tD9+fzT/AL9D/2//JL8t/wG/VD9PP39/AH9B/36/L788Psb+536kvpQ+p/5xPiK+Kv5ZPq3+R75Efru+0/9xP0Z/gr/4gDOAq4D+QNTBDEFCQYsBuYFhgXjBDME1gN/A8wC0AFJAS4BDQHpANAAEAFZAYYB9wFmAtICTQPBA3gEGQVUBX0FmwXVBc4FKAU7BEsDuAIhAh0B3P/U/mr+KP6x/R/9iPxj/Kr8t/yM/IX81vwu/Uz9Of3g/Ln8xPx+/MT71fpc+mr6PPqS+bD4WPgg+d/5qfk6+c/5gfv+/J39HP5H/+MAPgJgAy4ErQRWBQQGUQYzBuUFYQVuBIUDEwOtAukB6wCKANIAAAEbAR8BRAHcAWYCrgIQA8EDjATxBB0FWAXLBVAGCgZFBcUEMgSQA9kC4wHWAM3/FP+t/j/+vf0q/Z38Xfx8/AP9Cf2F/Hf83fxv/Yz98fw//O373Pu0++/6ufnr+N74OvnL+Az4Vvg3+cH5F/rj+rX7rPwt/nD/HgAkAVwChwO2BD8FcQWkBQgGHgbJBS8FGwQtA/ICsALjAe8AagCeABQBYgFTAT8BzAHsApkDjQOYAzsEQQX2BbwFOwV8Bf4FwAW5BNADAwNfAuIB7gDS/zH/0v5T/tP9a/0o/dT8qfy5/N38/Pzg/NX89vwf/RD9oPzv+1f7FPvh+in6Gfmj+MX47Ph6+D34vfhY+eP5Tvrm+un7Nf2A/sb/wADJAfECJgQoBZkF1wUGBi0GMQb9BWQFWQRcA/YChALPAQYBjwCuAPwARAF3AckBWALhAjoD0gOkBE8FdAWHBcoFAwZDBu4F+QT/A2cD9wI3AkQBXgCL//j+i/4V/qL9FP2n/Fv8YPy3/Mr8r/ya/KT89/wv/dL8BvyI+4P7Qvua+qX56fjm+Pr4iPjw9zf4OvnE+cL5F/pE+/r8cf5D/8r/ywCaAhcEyAQ7BYwFEwabBoMGEAaqBfME6wMdA8ACUQK9AR4BrgDiAGAB0QHUAf0BvwJhA9MDSwStBHEF+QXlBbQFmAXlBbYFyQTEA9gCeAIUAgcBEQBa/7X+K/62/WL91fxo/Df8Gfx8/L78evww/BP8VPyj/GH8w/tC+0f7R/uY+rj5Wvlx+Wj5y/gL+Fj4RfnZ+fD5S/pY+5T8t/3//kEAhQHaAqkDmwTRBZMGwgaWBoIGowZ/BqwFjQS0A2MDBgMrAmEBFgFeAXABQwGgASICjAIXA3QD7QO1BF8F2gXVBd4FAgbsBcMFBQX2A1YDuALZAbwApP/0/kb+sP0R/WP8Bvzm+8z7qfuz+xL8Qvwx/CH8KPxx/F/88fum+5b7bPvU+gv6tPnG+YT5yvga+HD4Zfmk+XD50Pnz+qj8J/4E/+H/IAH0AnIESgUTBqEGFgdfBy8H6Aa4BikGTQVPBKYDPgO6AhUCTwEAASYBSgFpAXwB0AGcAjwDuANaBBMFxAUFBu8F1QXaBd0FMgUTBEQDmgIAAjQB4P+p/vT9lf0N/Tv8tfuM+2v7avtt+2X7h/ux++T7+fv1++j70vv/+zH83ftL+8X6a/pT+jL6w/kY+dH4Lvlp+Xb55/mM+m/7m/zI/Qb/dQADAk0DKgQoBTIGBAeXB6UHgAdNBxAH3AY7BlYFiQTEA04D1gIPAocBWAFqAXsBfQHLATICwwJuA+ADVQTQBB8FTQVPBTEF5wRTBLADAgNCAnIBewBu/4b++f2L/d/8IvyQ+1n7Z/tg+0b7IvsQ+1b7ovu1+7P7pvvJ++j7w/uK+0r7Ifv9+qv6Tvr5+br5g/mR+dP53/kX+rP6mPut/Mf9E/9bAHkBywL1A+wE5wW9BlEHhgesB9UHzgekBwsHSwa5Bf8EPgRrA6cCRQL8AbcBXAEUAU8B6gGCArUCygIwA8YDVgSfBHwEPQTyA78DcgOnAtcBMgGMAM///v4r/o79If2b/PD7g/tK+xT7/frc+r76u/rR+vb6AvsD+0L7hPu2+8n7qPuk+7/7sftb+/z64vqz+lH6XPqi+u/6cPvZ+1T8Sv2e/gQA8wDSAQkDFQQqBTEGzwZoB8sH9Qf5B7wHxQebB9QGAAYcBWsE9wNJA5EC0wFxAYIBTAExAYEB6AFQAowCrwLiAi8DgQNlA+sCnQJgAucBUwHEAC8Ap/8k/2/+ov0p/fn8pPwT/H37D/vl+uj6xfp9+mj6i/qc+qT6yPom+4/7zfvd+8j74vsV/B78Evzo+5n7Y/tL+3P72/tD/In88Pyr/W/+av+UAG0BQAIrA/AD2ASsBVUG6AZAB6gH/AfdB6oHZgfwBoEG6wUgBVsEwgNaA+4ChgIqAtkBugG1AacBoQGrAZ8BbAE5ARoB/gDgAKgAPwDX/6//k/9W/x7/0P5R/uH9pf1s/R/9p/wz/Mv7d/tg+zX74vqv+qL6mvqp+uX6FvtG+5z72fvp+xH8RPxW/Gn8nPy9/MT86PwB/TT94v2F/sn+H/+S/zAABQHFAUkCxwJ6AzwEqwQnBeUFWwawBuwGwgazBtYGqgZEBrkFTAXxBE4EuQNAA+ACsAJKAsUBZwEgAQUBtgBDAAIAzv+2/4v/V/9Y/2P/V/8j/7L+iv6m/q3+b/7f/Xv9Yf1U/Rf9gvwO/Pz71vuN+yz7DPs6+0z7d/t9+3D72vsc/DX8kvyx/Ov8Pv1M/YL9tf25/f/9VP6H/uX+Q/+Y//3/UAC3ABsBXAEKAqkC2gJjAwAEVwS+BCsFdQWsBdwF5AXDBbMFtwWeBTgFygR4BA0EmAMpA8YCWwL3AbQBKQF9AC8ABADJ/3j/I//Q/q7+yv7B/ov+i/6B/l/+M/77/R3+Ef63/YD9O/3V/Kn8ffwv/BX8Jfzn+7v70Pvy+zH8Sfxh/Ev8nvwm/Rr9R/2q/f79Qf6Y/tD+x/4j/3P/iP+w//D/TwBTAGsA6gAOAUUB3QEQAkAChALwAoADmwP1A3cEfgTMBBsF9QT1BB8FNAW+BEUENwTFA0MD7gJ7AgMCeQE9AdcAcgByAO7/a/9B//X+yv6f/nb+h/5e/lr+c/44/gD+AP7q/Yz9ef1T/RT9I/3z/LD8q/yf/Kb8bPxH/JL8pPzJ/PD81fwU/a39qP3//WX+bP68/gn/dv81/6L/OAANAFoAhgCzAL8A/wBxAXcBmQHEAZcBbAHNAYQCYwJWAgwDPAOZAyUEPQQYBDEEQQQ4BBgE9gMgBM0DiQMfA1wC+QGiAREB2wDEAEUACQDF/3n/Wf/r/pP+uv5p/gv+G/4c/jb+O/4k/u396v2f/Wz9ZP03/XD9G/3U/B399fwM/SH9pPzA/PH82PxI/W39uP1G/kD+nP69/tH+Xv+X//j/NgBHAIMA7wAkAQIBWQFUAVcBqQGuAcoBgQGlAekB6wEcAr4B9gGUApQCJQNqA2MDmgNtA2gDZwN7A28DzQJ/Am4CJQLwAXEBPwElAaIApQA/AG//Tv8f/wn/8f6b/qn+jv5r/mX+Kf5b/kD+iP2k/Yf9ff3t/Zn9rf2O/Uv9fP1e/aT93v1l/XD9G/7O/cX9Xf6j/tz+wP78/lr/if+b/6v/CgAtAB0AlgDzAMIAXAGvAWoBkQFLAT4BPwFFAZ8BYwFqAVcBmQErAhYCkgK8AkMCZQKLAmgChAKQAp0CoQKeAkoC7wHYAU8BPgESAYsAuABNAPv/DgCy/wcAtv8O/+v+mP5m/hT+Iv5O/gD+D/4d/u795/3Y/eP9e/1S/YX9lf10/ZX99f0D/lL+Rv46/s/+xP7D/hT/G/9U/4P/yP+9/wIAWACFAIYAWAB+AKAAAQEGAWcBEAEMAZYB2AAfAXEBXAGjATIBQAG8AZwBxwHpAdUBJwItAhsCKAKyARMC/QGJAZwBMQFHAfkAtQDtAI4AVwCbAG0AHABNAPz/qP+1/zT/Zf93/wL/6f6E/o/+gP4q/hz+GP7V/eT99v3C/Qv+wf3O/RD+eP3c/R/+K/5I/k3+9f7W/hz/mf9c/6v/qf+V/woASQB+ALAArADFAPIABwEqAQMB/wBzASYBPgGDAQsBjAG6AYsBugF+AZABfQEaAZsBlAEKAUsBEQE2AVkBVAFrAa4AYwB8AHUAwQB3AE4A1QBtAFUAgwAyAHMAXADu/yUAUf82/27/sP46/xH/Ev+2/vr9f/58/lH+Wv5D/tz96f0W/u/9Df71/UP+Kf4k/nT+W/6s/pX+RP/3/v7+UQDD/97/EABLAKsARQDDAP0A4QDzADIBVgEzAVsBPAF3ATQBHQFJAZAAMAGIAV8BtgExAR0BjgFJATsBdgH0ABQB/wB6AO0ApgCuAOEAcAAuAD8AQwAqAEYALwBMAOj/5/8kAGf/H/+1/3T//v4r/03/+f6G/r/+tf6A/n/+Yf4s/oP+ov6N/o3+Vf53/qr+vv6b/pv+tf4G/yT/z/47/5z/fv+3/5H/kP/7/xcAOgAvAIYA+QC5AO4AGgELAXIBnAEaAXcBywFCAWUBgwGbAYQBZQEUAe4ARwFUAVgBbAHgAK8AyAA2AJsAsgByAH4ABQAgAEcAZP/f/xIALf8uALT/Nv+2/wz/Vf9W/xz/mP+b/rL+eP9h/g//EP+Q/vT+e/7A/r7+jv7F/o7+v/6y/nT+0f6v/vX+wv71/lb/sv4f/4T/Lf+I/6b/Zv8nACUA8P+OAJEAmQA1ATUBBgFDAU4BLAFeAXkBywFuAVoB/wEhAWEBnwH8AHoBfwFGAboBBgEOAXgBkgDcAL4AngCYAAgAAAAxAAEA1f/4/2//hv+q/w//Nv9p/1P/mv+B/z//e/9z//X+X/9M//v+mv/n/sv+Yv/H/vX+L/+c/kP/6f5v/kr/qf6L/hL/I/9C/07/Pf8Z/wv/Zf++/1r/FQDw/4b/+P/B/10ATwCIAMcAhgD+AJsAsABIAQUBQgFIAScBPwHvAGABcwEeAYcBMAFyADcBBQGdABoBggDlAIwAiAAAAen/hwCKAK3/EgDr/8f/x/+q/5P/cP9v/1r/Df8E/13/Jv8n/0r/TP9b/wT/Yf9y/zT/Wf8O/3n/iP8R/2z/MP89/8H/6f5J/2v/3v5j/1j/wv93/13/2f9I/4f/BwDz/34AKQD4/2UA9f91ADsAPQAGAXEAwQAgAWsAvQBYAfIAyQAxAYIBLQHcAGAB2ADrAIIBsgDYAK8AEAEsATEA8ACLAEYAgAAbAJkADgBfAGIA2f/u/5f/mf9J/3P/Fv8K/3r/Kv/0/mD/yf6i/lX/ov6O/5H/J/+q/wj/Yf9H/87+Zf9s/1P/f/8q/6f/x/9P/9H/hP9F/8L/k//B/+D/EgBzAOH/IQADAAYAGADB/6AATwD0/4QAkQBqAHEAigAFAfQAtAAPAcEAwwAGAdgAJgHaALQAdwFlALwAfwGGAKUApQCAAI8AdAB1AFEAYgCDAOX/8P/e/7H/mv+s/8D/Kf+o/27/T/9w/73+If9b/7j+GP9x//v+Rf8y/xf/N/9u/4L/eP+D/3T/cf9k/4X/f//B/7P/1f8r/33/MgCl////FgCv/+//cgArABEANgCTABsAVwCEACYAdwCXAIwAZAC7ANYAigCeAAwB3gCeALYAVwGkAOcADQFDANMAqQCiAMgA2ACwAKAAWQBgACIANwB7AOn/AAASAOr/3/99/3v/1v9o/4X/bP+V/7D/R/91//j+D/9B/zn/7f7k/lD/3v5H/2v/eP5j/7f/3P6Y/7X/tP+R/4P/AQBv/7H/SgCY/7H/KQDe//b/LAA3AOf/QgCgAKv/LgDVAPP/sQAmAYAAlQC+ANIAXgCPAK8AawD+AJYAUAAAAYAAjgARATwAoAD/AD0AZwCCAFgAkgBeAC8AOAAaAIwABACa/xQA8v8bABEA1v/b/4H/of99/z//h/9t/8T/GP86/2H/2P6c/23/GP/h/lr/KP/o/tT/EP+G//L/2/7Z/4z/Rv9FAMD/r/8IAB4AGwDZ/6L/ZAAiAPb/gwBOAGwAQABWAHwAeQCPAEwAUQDcAK4A5ABJADsAPwE3AHQA/wAwAIEA1ABPAHkAiwAhAA8AqACFAEsAygCEACMAmABBAA8AKwDF/wQAvf9QAAEAjf/K/3//mP/g/6v/Vf+q/5f/xP9P/xT/av9y/1X/W/+l/xn/af+G//r+wv+L/2X/bv9M/wsAwP8AAMr/Zf8XAJL/HgBRAM3/fQAjAKf/9f9bAA4AFABLAIgAOADz/7oASgBGALIAfQBaAEIAmwAEASsAawDSAEAARQBHAKEAgwCcABIAGACuAPP/IwB/APX/3f9XAD4A//+CAB8AZv9FAA4Am/82AOf/gf/2/wgAD/+S/wcAhf+T/+n/g/9P/9f/R/96/9v/y/9X/8D/x//1/o7/8/9h/0f/CgCC/w8AHQCS/xkA/f8cAAMAcABtAML/XwD6/wAAWwABAIgAFQAZAEkAdABMADsAmABDAI0AcwA+AKIAQwBnAHIADwCyACMAIQC3APr/FABvANP/4P8mAD4AFgCc/1cALQCf/1cAvv/o//X/0v8QAE7/UQDy/yL/5f8qAKj/bv8GAJr/tf8KAF7/qv+m/zj/sP91/2v/xf+D/8b/av8y/+P/ff9b/8v/2P/w/wAAHgC6/6j/cABGAC8AsgBsAHUAdgAmAHEAOABZAIIA/v9oAFYAKQCPAEoAIQBkAAoAwv9xAFoAKwBzAG0ASwBWACgAEACiAGIAcwB6APL/LwBNACMAGQAtAPH/3f/Y/3n/cf+Z/6b/1f+E/2T/8f9v/6//3//N/+3/fv8wAJH/nv9bAF//kP8WAGn/KP/d//H/Jf+9/1EAKP+H/1IAVP+K/2YAJQAfACwAoQBrANb/RwDv/6T/2P+2/yYA9v8cAFQAHAAWANv/sAB/AB0AegCbAEUAnQCFAPL/hwBlAHUAmwCiAFIAVgAqALz/FgASABEAJAD8/04ATwDI/8f/bf8g//T/OwCl/xYAJACh/5L/uf+l/3X/1//X/87///8RAOL/wf/d/x8A9f+S//n/0v+j/9X/zP/D/9v/gP9T/8X/Zf+5/w0AzP/V/xkA0P/M/0gADABaAEkAUABaAOP/8P/o/wIAoQBWADkAzQBDAAQAmACOAC4ARQB7AJUAuQD0ALEAUQBtABcApP/w/y0AjP+K//X/Rf9t/9//m/+H/9//8/+//ykARQA+AIIAhwBgADoA7v/9/77/0//o/67/pf+P/3X/F/8o//P+3v7Y/kf/cP9V/wIA6v8sAHgAdQDuAPoA1QDSAN8AxwCsAIUAQgADAND/ef9F/23/P/+L/3r/VP+B/3P/1v/B/8z/bgC6APIANQEDAVMBcAHzAEABIgHtAPkAgwByADwAMgALAGD/SP90/2j/Uf+n/27/Fv9l/2D/dP+B/1P/m/+S/2H/iP9y/z//5/5v/gv+Fv6v/SP9Wv2W/Qv+rf4l//L/FAG/AYICMwONA/8DLgQdBG4DKQPOAuEBiQEBAWsAGQDU/0f/+P4U/9b+xv43/2r/hP86AH0AkQAMAVcBHwHnAAMBNACs/4X/e/7i/Yj9Gf3a/Hb8NPyo+0H7RfuP+kj6OPsD/Ob8vf7ZAMYC7QRyBkEHLwjaCIAIxAcGB8sFRgRYAkIAdP5T/Uf83vqy+gv79fqB+//7efxv/aj+t/+CAAoCrANEBJ8EhwTgA2wD9AFiAEn/W/45/gH++/1v/vf+Fv+//sL+s/7h/u/+j/5T/ycA4v/H/2j/b/6N/ab8Xfx4/J78kf0e/6kA9QEbA4oENwVjBcUFWAXLBAAEMgPpAfP/Gf/P/W38Dfyj+8H7U/zP/FT9vv1e/sD+9P7O/8MAxQHNAlMDegNNA34ChAFVAGv/IP8P/5H/+v/CAP4BPQKJApcCWAL/ATYB4AD//zT/2v4y/nj9hfxm+3r6Rfn0+ET59/nL+0v9Qv90AbIDfgVWBkoHsgceBxAGkAQtA4wBx/+A/vb8QPwo/PH7L/wB/d79mP4x/4n/XQASAYwBrAEuAmMDsAPHA3YDpAIiApEAyf7y/Xf9t/0o/tb+8v8RAZgBbgEqARMBrAAkAGD/0/73/pn+9P3E/MP7pfq7+Iz36Pfu+En6nPzN/44C8AREB2QIGgkMCRsI6wZFBXIDlAG0/8v9Lfx4+wn7C/v5++/8Nf6n/4MAOQHpAWICHAMWA1YDvwNCAwIDGAKFACn/yf0m/AP7tfo0+2H8H/4JAP0BoQNkBNcEtATxA7sCrAGnAGD/Vf5a/SD8a/oz+Sr40fb79rj4tfq0/Kb/cgOJBpEI9gmICm4KXwn1BmQEHwIZAPL9W/u8+S75Fvka+Xz5Zfux/VX/sADLAdQC8wNOBBoEcwTOBBQFfgT6AswBQABI/kf8tvpr+vf6MPyD/SX/ZQHRArsD9APNAx4EngM+AigBHAAI/8X9Ivzi+lT5fPhl90T2sPfp+dr7v/3/AOoEUQfrCIsJmgkrCQMHiAQtAub/aP7h/AP7OPqq+hP7b/uR/IX+KQA8AVACAQOoA40ExQQgBJ4DwgM7A64BLQAV/wP+nPww+336svqa+wr9wf7EAPIC5wT5BfUFnAUNBVwDEgEp/2D90fvU+jP6g/nK+L74h/j89+r45/r9/CH/rAEZBRIIhwlICkUKhgmvB/oEVgLO/w7+VPxw+sP5YPoo+7r76vzT/qMAmQFFAt8CEAMyA8QCkAJyAkoCUAKpAREB8P+2/mT9sfvk+sP6jvst/fv+hQHhA44FxwaoBgQGJwUuAxMBSv+j/U38Nftu+uT5BPlM+NX3I/dy91H5WPs5/YMAoASyB4QJ7AqUC3YK+wcgBVACW//9/DP75vmy+WT6xPv3/G7+RADIAesCJAN4A+oDrwPcAikCCwKzAbwAsP/e/gn+2fye+9H6svqQ+7/8o/76AGADuAX/Bn8HJQc3BpgE6wF//6D9I/wT+2D6aPq1+t/6f/ow+nL6lvlz+Rb7evwF/qsAMQS+BlgI1wldCmYJDgesBIsCtf8a/aH78vrH+lL7kfwQ/p7/IgGNAikD2QKfAkcCWwFvAD8ALwAZAFoAEQDB/33/m/5n/X78f/wI/d39X//LAT0ExgXaBhIH6gVvBGEC2/9X/Zj7A/uA+kr6z/qk+7D7VftF+0H6GPnO+df6pPvD/eUBzAXPB4UJGQt6C20JOQbJAw0BQ/40/B37IvvK+zb9lv7i/ykBRwJlAmYB5ABhAJv/5v6m/j3/k/89ABMBBgHaACkAfv+B/lv9kf1V/nf/2gBoAkcEUAV5BeIElAMbAlsAX/69/Cz8RPxs/Lv8lv0v/k3+tf0o/BX7jvno9/L3Ffnw+pD9pgGuBfEH9glCCykLfQmIBhQEegGR/u78M/z6+5r87v0//0gAUgGbASYBRAAW/zL+UP2P/Ar99/2y/hgAXwHzARkCCgLbAV8B1gDHAFYBxgExAiIDsQN8A+8CsgEsAJD+1/zZ+6v7Avz3/Pr9Af/5/97/sv4K/UP7v/jD9uT2uvdB+XT8DQGVBewIfAvwDL0Mvwq4B18EwwAC/lv8TPtK+4b8J/6H/1gA8wBOAYsAEf/9/Rz9ZfyV/ID91P6KAF4CxAMuBMoDJQMnAosALP/Z/uT+Qv8iAG0BfQKxApUCyQFwADb/Bf44/QL90P3p/qL/TgCNAD4Az/4+/LD5KPfz9E70XfXb91n7SwDBBTgKhQ0xDzkPgQ0FCsQFwgFL/sj7cfo/+hz7rvwg/h3/wf8RAIj/Nv5E/ZX8LfxV/HX9hv92ASoDywTpBecFwQRSA5cBov8S/h/97Px2/YL+vv/KAHoB3gEAAnsBswBFAPv/EQBDAHEAngBwALr/oP6K/Mn5yPcz9oj0VPS99vT5QP2iAaMGqArvDJMNEQ1HCyAIjQQdAUb+t/zl+3z7MPx2/ab+SP9m/5X/gf9t/g/9i/xr/Hv8Uf2y/o0AtQJZBH4FBQa4BewEUgMXAZ3/1P4T/tb9Y/5B/wsAkACuAOoA3gBcAEEAHwBCAMgA4ACGADQAyP8o/gH86vn396/2cPVO9Ur3OPr7/IQA3wT4B9QJmwpGCgUJogboA5wBsv9U/tb92/0e/gf/5f8tAHEAHQCG/9n+ov2r/En8Mfzo/F3+/v8aAlUEoAXsBfIF7AT1AgEBJP8M/kT9z/xe/TP+JP8TAJ4ABgFaAXMBQwElAV4BvAHdAYcBLgHLAG//g/0K/Mb6bPlt+Cv4GfiK+CH63ftK/Qf/pgH3AyIFAwYLB9cH2QctB20GowVbBOcCIQFf/yj+//yQ+4f6iPra+gL7JvsD/LP9+v75/7IBggOpBE8FowWOBfUErwMfApUAGf8c/m797PwO/fb9Hf8BAN4AGAInA3YDfwN4AxEDNAI4AUMAL/8s/jX9f/zF+9L6W/rV+bT43/cU+KL4wfgt+hr92f9tAjIFKQhrCkQLZQsUC7wJZwfKBNsBl/4A/LD5tffp9h33APhX+U37l/2q/zUBtQIMBJ4E8wRABRgFpQQLBB4DBwKiAED/Nf4T/UT8R/zN/JD9+P7sAJoCBQQsBcQFoAWtBHQD8gH6/1f+Lv0l/G77Z/va+9r7XvsW+4H64fhk90T3UvfN9xr64f31ARgGcgohDk4QrxCyDyYN6gj6A/b+G/oy9urzU/Mv9EX2lPk//TMAlQJZBMYEPgSMA8cC9wGNAdIBLwJDAi0C5AH2AEj/xP2S/M37vfuV/G7+wwAxAy4FkgYAB6IGZAURA70AHP/d/eT8svwW/bL9dv7N/qH+7/2W/O76z/hT9sv04fR19Rb3xvqj/68EPgkjDUcQmhFuEKINsglvBC7/lfqJ9on0kPTA9Q34Pvus/pYBDwNYA1IDcQK5AIT/Fv8e/+f/7AApAlIDlQNOA4wCHwGy/5n+kv1B/Q3+Ev9ZAM0B5wK9AxAEsgMfA1MCIwFYAM//WP9g/6D/7f/7/6X/6f66/SH8CPoU+BP2V/R78//zp/UQ+A38KgFXBpwKGA6AEBIRrw/BDAgJuARPAJT84Pk/+Mj3g/jp+Wr7Of3c/tP/NwBbACgAfv8C//D+U/8jAD0BmALSA4EEvwRNBAADVwG//1b+SP0I/a396v5rAN4BFwOpA5cDHAMRAhABNQCP/27/p/8mAJkAgwAWAIX/H/7S+3T5QPc89YHzPPLZ8lf1X/g//D8BogZYC6IOcBAmEVgQng35CecFfwGZ/WH6wfdQ9iT2y/Y5+Ob53fsU/pD/YQAQAWgBRQEtAS4BtAGIAvUCcAP5A8wDKAOMAqoBzAA1AOH/7P8wAGcAwAAJAc4AmgCEABkAtv+w/wMAfwADAcgBngLyAqcCGAL1APT+xPyb+ob44Pao9S71ufWm9tf3M/ry/Gn/OwJdBfwH9gloCxkMCgzYCsMIMAYKA5L/efzK+dD3I/ds93r4ZPqS/J/+PgDpAOAAXwBI/zn+5P0n/mT/kgH8A3oGvwjwCdYJlggRBtcCOP+o+zP58/et9974WPsk/hoB+AP9BRoHVge/BoYFtAOuAQUAh/4J/Rj8ePvm+pH6GPqW+WH5Dvna+EX5hPlX+nb8gP5bAAEDEgZVCOUJtQrOCgYK3AcABR8C2P7E+775O/h99/f3LPms+l786f1C/wkALABOAG8AQQCSALwB+QJFBKQFyQYhB3YG5gS0AjIAq/15+zX6Ivou+zb9p/9XAuwEuwaIBzsHBQZTBEoCFwBg/oH9Af0B/W/9zf0x/kf+3/1o/f38ZvwN/PD71vse/Gj8ify+/Jn9g/4W/z4A8AGXA24EFwXtBfQF/wSVAyMCggC3/ir9QPz1+wr8j/xi/SX+r/79/t/+Wv7j/Wj9Nf21/cz+RAAOAiYE/gX9BhYHmAZlBXQDTAGC/2D+0/34/bf+vf/eAMoBHwLUAUoBugAGAFz/Pv/T/28ABAGwAe0BhQFyAAz/mv01/Cz7Hvsp/Mf9q/+MAQwDuAPeAr8Asv2Z+vX3mPWQ9LH1jPg2/LIAbwWJCbwMDg5lDUsL7we7A0X/L/si+Kb2PPa+9nT4ifpo/Bj+UP8DAHkA7ACgAZwC5gOCBUoHlQj/CKMIdAdnBZcChf/z/Av7/vnv+cX6ZvxP/moAGgLoAkUDIAMlAqIAVP9L/qH9mP0l/kf/xQAxAo8DfgS9BHIEdAPUAbP/UP2T+tb3nPUC9LjybvIn9Ar3P/p7/vUD3wiTDFYPtxBaEBYOCwowBbn/FvqI9VjybfCJ8P/yX/ZO+tb+2gLXBZIH2AdUBxEGagQ4A5ACKgJ9An8D9gP0A78D5QKBAb//1P2i/P37q/ss/Fj9kP7s/04BDwJ9AswClQI+AgICuQHDAfQBMQJ5AqYCWwLRAQMBm/8u/uD8gPtw+g/6N/rP+q37xvwE/tz+CP8U/1b+Sv0W/fz8wvxM/Qf/xAAbAkcDRgTJBCkExAKOAdj/CP44/QL9Df3L/Xv/LgFCAt4CCwO0AqcBHAAU/1D+M/5C/7oASwI8BDgGLAe1BnoFwgNCAS7+afsG+oT51PmA+wb+4ACYA7IF+gYgBzsGmgRdAgUACP7Z/HD8vPye/cz+HAAxAZ8BpAErAWcAh/9e/nP91Px6/En86/t9+//6XPo3+fL3Ofhh+cT6zvw5ADwEQwdgCaAKtApPCUkGswIl/6L7OPkY+Dz4Z/nf+8H+NwE5A2oEoAQMBM0CPgErAL3/OABPAbECIAShBWQG0wVOBH0CeQBk/qv8z/sz/Ef9wv59ADACTAPyA9cD8ALVAc8A8/+C/5b/+//bAOMBSQI7AtMBsAA7/4D9nvsQ+h350Pgd+er5IPvE/Gr+if9mAPMABwG4AOz/fv6K/V79Df2s/Nn84f2n/v7+cv9JAEwBIgLcAqgDHQRhBHQE7APSAq8B4wDC/5b+xP1I/S79jv3x/Zj+mv/1AGwCcAP7A3gECAWqBKADywI3AoMB4AB2AGoAogDBALEAiwA0ANH/j/8h/8X+9v58/+7/fgD+AD0BLQHmACwALP8v/kv9vPw6/PX7SfwE/br9Xf4s/8z/HAA+APr/c/+y/rX9ifxW+/L5OPlz+QH60fqZ/Bv/TAFhAz4FwAaRB7wHOAckBpwE3gI6AdX/i/7J/bP9zf0d/mH+r/7k/hX/Df8P/4P/VwBkAbMCKQSqBRgH1gfOBxAHzgXeA2cBzP5p/Kr6s/lg+fH5gvua/br/wQGAA5UE1QRIBFkD3wEAADn+5fze+yr7H/uW+0r8F/3Y/ZH+KP9F/1X/Sv8Q/9D+wP7Y/sr+of5m/vj9Ov32+xz79vrc+jP7lfz1/mUB0gNlBn0InQmeCXkIXgZDA9//8fyH+un4hPiu+bD7BP6vADgDCAUhBmgG2wXSBNID9gIsArYBtQE2Aq8CqAJrAi8CfAE4AOr+4v0Y/cD86fx6/U/+Qf8tANIABAHDAEYAqP/0/kX+uf1t/aj9Hf5Y/nD+j/6X/jv+vv1m/WD9lf34/Y3+Wf8FAJ4A3AB2AJD/TP7d/AX73fmd+e358vr9/I7/9QEtBCYGXAekB0cHIQadBJ0CswA//zH+nf2W/V3+Mf/v/wYB7AFTApcCqwJ4AjMCJgIOAgsCVAKhAv0CKAMKA+0CcAKPAZ8Amf+D/pX93vxs/F78pvwy/f396P7P/54AFAFQAWEBFAGJAAMAWf+v/if+q/0z/db8rvyD/HP8l/z8/Hr9/v2n/mz/DQByAKAAhAADAD7/df6q/eH8T/wv/F785/zS/QX/PwCHAbACSwONA5kDbQMUA60CeQKhAgkDYgPIA0YEVwTnAzIDCgJ2AOf+ov2c/Aj8Rvw7/cH+dgBZAlEE3wWpBswGUgYZBUoDMwEW/yP9lPuo+mf6fvoL+x38Zv1v/l3/SgAFAWcBSAH7AJwAFwBP/3v+zf06/bv8bfxW/In8BP2c/VL+Bf+m/////f+Y/9H+rP1f/Ib7K/so+8f7df2j/8oB2APNBU4HFwj1B/kGoAXhA9cB1P9J/kH95/wm/cT9rP7T/98ApgE6AmQCawJ5AoMCdQKnAg0DYgOnA9gDsQM9A48CiAEsAK7+R/0Q/CT7pfrE+oP7ovwR/sj/YQGyApoDBQTIA/ICwAEqAGT+vvxv+3r67vkN+rv6mvu2/P79Kv8FAIQA0QDgALUAUwD8/7X/Yf8m/wb/6P7d/vb+B/8T/y3/c//H/ywAkQAhAcYBKQJbAnoCdwJAAgUC1AGzAZ0BsQHeAQsCGAISAgwC3wGIARgBuwB4AE4AQABWAI0A5gBVAZ0BugG8AaoBUQG3AAIAX//X/mn+Ev7u/QX+Ov5//rD+4v4b/1D/Vv8//xL/6f6n/lr+FP7S/a79rf3F/cz99/1N/rP+7P4l/1P/dv94/zz/8v6s/nD+R/5Q/pb+Mf8XACwBMgJQA0YE7QQwBQ4FiQSpA4oCUgETAPf+Q/4N/k/+xv63//MAOgI2A/YDfwSnBGIEzQMGAyUCPAFZAJL/2/5m/h/+Af7w/Rr+fP7v/l3/0/9jAOQAIQEnARwB3ABZAKr/EP9y/t79Z/0q/Rf9EP0m/WT9pf3Q/fr9H/5A/kj+Tf5M/k7+T/5L/kj+P/46/jn+QP5h/qT+9v5o/wIAuwBnAfkBhALyAi8DJgPwAqUCQgLOAVIB4ACOAF4ASABGAGcAtAAfAY4B/wGDAgUDawOcA6ADbQP9Ak8CbwF2AIb/vv4z/vr9If6r/nr/ZgBOAQwCcgJsAv8BRwFZAFD/Xv6x/U39LP1O/aT9Ef5r/qH+uv6x/n7+Of4E/vb9+/0V/lL+tf4U/1X/fv+j/6//jf9Q/xf/6P6x/oH+bP51/or+vv4b/5j/JADJAIYBQgLaAjwDdwOEA0IDrgL3AUIBlQDs/2r/NP9U/6n/JQDEAIEBPALOAiIDMAMEA6QCDwJWAaAAFAC6/4b/ff+i/+D/FAAtACkABwDD/2D/+/6k/lj+G/7+/QL+GP4v/lP+gP6j/q7+o/6O/lv+//2H/Rr9vvxn/DH8Xfzt/LP9pf7a/zsBdgJcA/ADNAQGBGUDcAJdAUYAP/9v/vP91f0G/on+SP8nAPsAsgE9AokCkAJWAvYBiwErAeAAvQDEAPsATgGnAe4BGQInAhQC3AGAAR4ByACKAFkAMgATAAsADQD6/8z/n/+H/3T/YP9T/2T/iP+u/8v/3P/W/7D/ff8t/7f+K/68/Wr9Nf0c/Tv9l/0M/nn+zP4V/zf/Mv8E/8/+n/6G/or+pv7h/jj/m//v/zEAWQBgADwA/P+5/4f/Y/9j/6P/KQDQAIUBRwL8An4DtwOpA1gDyQIJAjgBcADI/0n/Df8P/07/t/81ALUAKAGCAasBpgGFAVIBDQHJAI4AZwBAABkA8//I/43/OP/o/qf+cf4//i/+TP6L/tL+H/97/93/KwBLAFIAQQAaAMf/Uv/c/nP+Gf7L/Z39oP3U/SP+hv71/nD/5v9AAHgAnACwALAAoQCLAHUAZQBdAFQAQgAqABwAFgAUABUAMQBpALYACAFVAZcByAHcAcgBlwFZASMB/QDrAPkAKgFxAbQB2QHhAbwBYQHOAB4Aav/F/jz+5f3X/RH+hv4f/9H/fgAQAWkBggFYAfQAXwCu/+7+Nf6Q/Q/9vfyU/Jz81vxC/cj9Xv77/o//CgBdAIIAfwBcAB4Azv96/y3/9/7Y/tH+7f4s/4b/6/9OAKwA7wALAQQB6QDBAI8AaABdAHQAoQDaAB4BZgGdAbkBuAGkAXwBPwH1AK8AfABaAEgAUAB0AKwA6QAZAT8BUwFRATIB+wC2AHMANAD4/8b/oP+M/4L/df9k/1L/Qv8t/w//7f7Q/sH+uv64/sP+2/4A/yT/O/9J/1b/Yf9m/2P/Y/9w/4D/hf96/2f/Tv8t/wX/3v7I/tP+/f5C/57/CQB/AOwAPQFrAXQBXwExAe4AoQBgADYAIwAmAD4AZwCXAMIA4ADrAOQA0AC0AJYAhQCKAKUA1AALAUABZwFzAVgBGgG7AEsA2v90/yf///4A/yT/XP+f/9//DQAdABEA6/+3/3j/PP8L/+v+3P7d/uX+7/77/gT/BP8B/wD/BP8T/yz/UP9+/7H/3/8CAA8ABgDr/8H/iv9S/yr/F/8h/0j/i//l/0gAqAD2ACoBPQExAQUByQCHAE8AKgAhADkAbwC6AAwBWAGRAa0BpQF8ATgB5gCQAEcAEwD7/wAAHgBOAIQAuwDkAPwA+gDeAKoAYAAHAKz/V/8T/+b+1P7a/vf+I/9U/4L/pv+7/77/sf+Z/3v/X/9I/zz/Ov8//0z/V/9j/2f/Y/9W/0T/Kf8P//n+6v7q/vz+If9a/6D/7/87AH0AsADOANUAyACtAIgAZABEAC8AKAAwAEYAZwCPALgA3gD6AAwBEgEKAfcA3gDEAKoAlgCLAIoAkwChAK8AuAC8ALYAowCCAFgAKAD3/8n/n/+B/2//av9t/3j/if+c/67/vP/G/83/zf/H/7r/qP+R/3f/Xf9F/yz/G/8N/wX/Bv8N/xv/Lf9E/13/d/+P/6f/vP/N/9//8f8BABEAIwAyAD0AQwBBADkAKgAXAAMA8f/m/+b/8v8MADYAaQCkAOAAGAFHAWgBeQF5AWUBRAEUAd4ApQBqADQABQDf/8b/uP+5/8X/3v8EAC8AXACHAKoAvgC+AKcAegA5AOr/lv9H/wb/2/7M/tn+/v4z/3H/q//Y//L/9v/h/7j/hP9L/xj/8f7f/ub+A/81/3f/v/8DADcAVgBcAEgAHgDo/7D/fv9f/1P/YP+F/7n/9/82AG4AmgC5AMgAzADIALwAsACiAJMAgwBxAF4ASAA0ACMAGgAdACwASABtAJQAugDYAOUA4gDPAK0AgwBTACgABgDu/+T/4f/j/+j/6v/m/9r/yP+x/5z/i/+D/4P/jf+d/63/u//F/8b/v/+z/6X/l/+O/4b/hP+D/4H/ev9w/2P/Uv9E/z7/Q/9V/3f/qP/h/x0AVAB/AJgAmwCOAHAASgAmAAsA/v8AABAAKwBMAGsAgwCSAJMAiwB8AGoAWwBQAE4AVgBlAHgAjACeAKQAnwCMAG4ARQAXAOv/xP+n/5r/nP+u/87/9v8jAEsAaQB7AH8AcgBXADEABADU/6b/ff9a/0D/L/8p/yv/NP9C/1X/av99/5D/ov+v/7z/x//R/93/6v/3/wIABwADAPn/5P/F/6f/i/93/3D/ff+b/8f///88AHcAqgDPAOMA5ADTALMAiABaAC0ABwDw/+j/8P8IACkAUAB6AKAAwgDbAOsA8ADrANoAuwCTAGIAKADw/7v/kP92/2r/cf+I/6n/z//z/w8AHAAaAAkA7P/F/53/eP9c/0v/R/9S/2f/hf+p/8z/6v8EABAAEQAEAOz/zP+q/4n/cv9k/2b/df+R/7P/2f/+/x4ANQBDAEkASgBDADsAMQAqACIAGgAVAA4ACQAHAAgAEAAeADMATABqAIQAmwCrALEAqwCZAIAAYABAACIABwD4//H/8//+/w4AHwAwAD4ARgBJAEMAOAApABkACAD5/+z/4v/e/9n/2P/T/8v/wv+x/6H/iv91/2L/VP9Q/1L/Xv9w/4b/nf+y/8f/1P/d/+H/4//i/+D/3//f/9//4P/h/+D/3v/f/+H/6P/1/wgAIwA/AF0AeACNAJYAlwCMAHYAWwA7AB4AAADm/9b/z//S/+X/AQAoAFUAgQClALoAvQCrAIYAVAAdAOr/xv+y/7H/wv/f/wIAIwA8AEwATgBHADYAHwAEAOr/0v/D/73/vP++/8T/yf/R/9X/1P/V/9f/2P/W/8r/vP+v/6P/kv+D/3v/f/+H/43/lv+m/77/3P/2/woAHAAyADwANQAnABkAEwAQAAkA//8BAAkABgD4/+v/6v/6/xEAJQBFAHMAnwC3ALUAngB9AFoALQD8/97/3f/3/x4AQQBfAH8AkwCMAGYAMAD5/9L/sP+J/2//ef+Y/7j/0P/l/wcAJgAsABYA+f/j/87/s/+V/4j/mv+2/8L/yf/X//D/AAD3/+n/6v/z/+j/zv++/8P/0P/U/9L/3f/0/wwAEwAQABQAIwAoABQA8v/d/9f/0f/N/+D/DAA5AFMAZAB1AH0AcQBdAE0AQQAxAB4ADwAKABAAHgAyAEgAVwBfAFwATQA1ABkACAAFAAUAAQD7//j//v8JABYAJgA9AFIAUAAtAPX/vP+E/1P/Nf87/13/g/+l/8r/6v/0/+v/6P/z//f/5f/W/93/5v/d/87/2f/8/xgAHwAdACUAKgAaAPj/2f/P/9D/0f/S/97/AgA1AF4AbwB2AIMAigBsAC0A+v/s/+f/zf+3/8//CwA1ADgAMAA6AEQAKgD3/+D/+f8WABQACAAUAC0AMQAnACoAQABFACoA/f/Q/5//df9p/3//qP/b/xEAPABUAFcAUgBMADwAJQARAPv/3v/G/7n/tP+y/7T/wP/U/9n/zv/M/9X/2//Y/9///P8cACIAFgAQAA0A+P/V/77/vP/J/+L/AAAfAEAAagCCAHEATAA8ADgAFQDh/9L/7P/7/+7/8v8cAEQARgA+AEYASQAxAAwA5//M/8T/yP/L/9X/+/84AF0AVABGAEwARAAQAN//4P/6/wAA9P/0//v/9P/f/83/y//Y/+7///8BAP//+P/n/8X/qf+s/8n/7P8QADoAXgBiADwABQDc/7z/lf92/4P/qv/B/8H/yf/o/////v/1//r/AAD3/+P/zP+9/8P/3P/1////GQBPAHAAVQAsAC0AQgAsAAEABwA5AFYASgA6AEYAZABqAFEAPgBJAFcAPwAKAPf/GQAwACEAHAAuADMADgDZ/8L/yP/N/9n/9f8PACMALgAiAAwA///3//P/6v/T/8X/xf+3/57/lP+b/7X/y//N/9n/9f8IAAsACQAAAAAABQD1/9r/1P/i//b/+P/q//L/CAD//+f/4v/p/+3/7v/2/w8AJgAyAEoAZQBgAFAASQA5ACcAGgAIAPj/8f/w//z/9f/e//H/HgAdAAAA//8YACcAFAABABsAPAAzACIAGAAHAAUACAD0/9j/yv/J/8b/pP+U/8L/8//0/+v/8/////T/zv+8/9L/5//o/+T/5//1//j/0f+m/6r/v/+8/7L/zP8TAE4AVgBVAG0AcQBJABgA+f/3//7/+P/p/+z/CAAjADAAPABdAIAAeQBQAC8AIAAOAO//3f/h/+7/+v8AABIANgBTAFQAOQAbAAMA4f+u/4z/nP/G/+P/+P8cAEUAVwBBABwAAgDw/83/pP+V/6L/uP/H/9z/AwAuADwALgAZAAgA8v/I/5z/kP+l/7P/tv/U/xYAVQBqAGEAWwBMABoA2/+6/77/zv/Y/+X/+v8IAAgABAAFAAwAGQAeABQABwD///n/8f/1/w8ALwA/ADsANgAwABoA+P/f/+H/9P8EAP7/8v/2//n/7v/c/+H/BgArACoAFgANAA0ACwD8//X/AwAUAB0AGQADAPX////+/+X/xv++/9z/7//r//7/KQA6ACwAEAD6//H/4//S/9P/1//k/wYAHQAcACEALwA1ACkAEAAEAAUA9f/i/9//1P/F/9H/7f/7//P/9f8RAB0ABQDz//7/FwAaABkAKAAqABsAEwABAOP/2P/a/+z/9v/f/+T/BQD8/+7/AwATACcAKgAQABoAKgAZABoAFgACABMADgDb/8v/4f8IACcAHAAdAEIAMAD4/9v/z//O/8//zf/i//n/AAAYACsAJQAsADEAJQAKANj/t/+6/7//zf/j/+z/AgAXAAgA+f8DABkAIwD9/8r/yP/m//D/8f8OAEUAZQBUACoADQD8/+H/wf+t/7H/zv/z/w8AIwA5AEAAMAAWAP7/+v/3/+b/4//y//b/+f8NAB0AHAANAAAABQAEAPP/8v8EAAkA/v/x/97/3//y/wcAHQAuAEMAYABbACoA/v/t/+P/w/+N/33/tP/p//L/6v/y/xwAMgAQAAAAJAA/ADAADgAAABUAIwANAAEACQABAPT/8P/m/97/3P/Y/9z/4f/f//n/KAAtAB8AHwAUAAsAEwAYACEAIwAMAAMA+v/h/+D/8v/z//j/CQAIAAEABQALAAsACQAUACEAEgDz/+z/+P/t/9f/4P/0//n/+P/p/9L/0f/a/9r/0//S/+//DwAMAAoAJAArABwADAACAAgACwD8//r/+v/l/9f/1P/e/wIAGgAfAC4AJgAKAPr/7f/l//b/BgARAAYA6f/z/xYAFAABAAcAHgAnABcACgABAPj//v8NABMAEAACAAcACwD0/+j/8//3//n/+v/9/w0ADwACAAAABQAFAA0ACwABAPz/8f/l/+L/4f/v/wQADAARABAAAQD8/wEA/f/3//H/8v///woAEQAVAA0ABgD///X/9v/7//T/7f/k/9b/4P/2//3/DAAPAAYAEAAIAPP/CwAdAAUA/P/7//T//v8EAAUAHQAhAA8ACgABAO//8//0/+n/7v/+/xAAFwARAA4AEAAGAPv/AQAIAAUA+f/u/+3/6//p//H/+f8DAAsABQAGAA4AFQAYAAgA7P/c/9D/1P/p//H/9v/2//P/CgAgACAAMAA8AB8ABgD2//L//v/8//L/9v/r/+L/8v/8/wMACgD9//H/9f/+/woAHgAyADMAFQD5//D/9v8HAA0AEAAdABAA5P/K/8n/1v/x/wQAAwAHABgAFwASABYAEAAOAB0ADQDw/wEACgAPACMABADe//P/8//l//7/7//W/+P/0//I/+3/AgASADoAQQA6AEwAOQAPAAAA5v/E/7n/rf+y/8//6P8GACMAIwAnADEAGwD4/9b/xP/Z//T/DAAuADIAIQAWAPX/3v/h/+b//f8GAPn/CQAPAPv/BAAWACIAMAAaAAsAGQAGAOj/5v/f/9j/7f8CABYAIwAZABAADgD2/+f/AQAPAAEA+f/6//f/+//2/+P/5f/1/wIAIQAkABIAHQAoABsAGQAWAA8AEwAJAPL/8P/t//D//f/7//j//f/3/+7/7f/k/+T/6v/i/+n/AwAQABoAHQAXABgADgD4//n//f/3//X/7f/p//r/+v/4/xIAHQATAAMA6P/m////AwABABEAEAALAAkA+v/7/woACAADAAIAEAAoACAABwD///D/5P/m/+f//f8XABMACAD4/+r/+f8CAPz/DQAaAAIA6f/h/+f/7v/o//L/EwAaAAcA+P/s/+r/+P/0//P/CgAaABgACwD+/wwAFwABAP7/CwD+//T/8v/0/wMA+//v/wkAGAAIAP//7v/p/wMABgD2//7/CQAQABoAFQAUABoACQD+/w0AFQAWABwAJgAqABsABAD3//T/9f/z//D/7f/Z/8P/z//r//3/CAAWAC4AOgAkAAAA9f/7//r/9P/4/wgAEwAXAAsA+f/z/+z/5v/1/wQABgAHAO7/zv/S/9//6v/9/wAA+/8NAAoA8v/1/wYADgAQAPz/8f8RAA8A6v/q//f//v8PABUAFQAeAAcA7P/2//v/CAAmACQADAD9/+7/7////w8AHwAdAAoACAAEAPD/7v/s/+f/7f/f/9z/AgAVABEAFAAKAAUACgD5//T/DgAZABEAAwD7/woAGwAcABUACQD7/+7/4//g/+b/7f/x//H/7v/y//v/BwAWABkAEQACAPL/7//3//b/9v/7/wQACgAJAPz///8NAAsAAgAAAAcADwD+/9v/1P/r/wMAEQALAAQADwATAAAA/P8PABgADgD+/wEACAD3/+z/AAAGAPL/6P/z/wMABQD8/wMAEAAMAAcACAD///r/AwAFAPn/+P8JABgAEAACAP//9f/j//D/AgAGAAsABwD9/wQABQADAAUAAAD6//b/5//o//r/AwAGAAAA7//u////CQAVABsAFAAOAAIA7v/0/w0AGwAYAPz/3v/c/+D/1P/i/xQAMgAaAOr/3f/y//f/7/8AABkAEwD6/wAAJAAwABUA9v/0/wEA+f/r/+7//P8QABkA+//T/9r//f8VAA8A+//8/xUAGgATABkACQDr/+j/9f8AAA8ADQD3/+7/8P/6/w0AEgAOABkAFADy/+T/+P8SABcAAgDq/+T/4P/Z/+P/AQARAAcA+v///wYAAgDt/+D/7f/2//f/DQAqAC4AJgAWAAUABwAKAA0AFwAXAA4ABgD6/+L/0f/U/+L/7f/v//X/BQAMAAIA8v/2/wQADQASABUAGQAcABkADgD+//X/+/8BAPr/+P8OABkAAQDi/+b//f8FAAIAEQAbAAEA6v/t//X/BQAOAAoABwDw/97/8f/n/8r/3P/w//j/BwACAAMADQD///j/7f/Q//b/QQBPACwADQANABcAAwDr////EgD5/93/5f/1//f/6//e/+n/8f/l////PABRAD0ADQDc/+D//P/6/woALgAwABcA7f/X/wcANgALAOv/CQALAPL/4f/o/y0AVwAfAP7/BgD+////8//V/+f/AADg/9L/5v8BAB4ADQDa/9//6v/J/8L/4/8MADEAMAAPAAsAJQA8ADwAKgAXAPj/wP+Q/5b/2f8YACYAMQA8ABgA6v/a/+X/AwAVAAoA+//q/+X/EAA4AC4AFAD+/+X/0f/T//T/EgANAPL/4P/s/wIAEwAqADwAKAD3/9n/3v8FACkAKgAfABkACwD0/+H/0f/Q/+j/AAAOABIAEwAWAA4A6f/l/x0AOQAGAMH/r//R/+f/7f8VACwADgD2//z/DAAVABEAGwAjAAwA/f/3/+r/5f/u/wsAJQAeAAwABQAVABQA4P/B/9D/2v/q//3/EAAyACwAFwA4AEYAGAD2/9//3f/7/wQABgASAPv/4v/t/+7/BwBFAFsAMQDs/9D/2f+//7H/5f8NAP//8v8KADUAIwDc/8z/2P/l/xQALQAfACcAFQDh/9L/z//l/xYAIwAfACYAEADz//j///8BAAQA9f/I/7n/6f8QAAIA6//b/+T/DAASABcAUQBdABAAzf/C/9//9v/Z/9//KAAtAOL/2/8SACwAHAD3/+7/BQD7//b/IgA/AEUAPAAQAPv/EQAUAPz/5//t/wIA6/+2/7f/9f8oACgABgACAB0AGQD5//7/JQAlAOz/vv/f/xkAEAD0/wkAHwANAP3/+v8BAAAA/P8DAPn/3v/p/w0AFAAHAAAA9v/f/9n/8/8JAAUA9f/2/wQABgD9/wkAJwAuABMA7v/e/+f/+v8VADIAHwDh/8X/5/8SAA4A9//5//P/3v/k//P/9/8CAAoACQAJAAIA//8OAAgA8P/3/w4AGwAjABgABAAFAP7/6//5/xMAEQD8/+X/2v/j/+b/5P/0//7/9v/v//X/FAAmAAQA+/8fACMAFAAdADEAMgD+/8X/1f/5//H/8f8BAPX/zf/B/+7/GwAXAAgAEwApACEA/v/8/yAAJwANAAYAFQAlACIACQD2/+L/vf+t/9P/AgATAAcA6v/l//r/EwA0AFcAUQAZAOX/4v8BAAUA7f/0/xkAEgDt/+3/AAD2/9j/0f/k/+3/8/8cAEMAJwDs/9//7f/0/wMALwBVAC0A2//M/+7/9f/g/+D//P/t/5r/d/+//wsAHQApAEAASwA6ACAAMABDACkAEQD8/8r/pf+u/9T/CAAhAA4A+v/u/+r/9v8CABYAOwBAACUADQD///r/9P/w//z//P/S/7X/yf/o/wIAHAAmABAA4//V/wcALwAlADUAWgA2AOf/6v8yAEgAFQACACgAFQCv/4j/1f8LAO7/z//c/wIADAD9/w0AGAD6/+n/8f8BACkANwAiABYA/P/m//7/DgAJAAsAAADw/+v/z/+9/+T/+f/o/+X/7P8KACwAGwD9/w0AFgALAAwAGwA4AEQAHwD7//z/BAAEAPn/9//9//T/6//l/93/6f/7/wcACQD1/+L/+v8RABIAKAA6ACUABQDh/+L/AgADAAIADQD3/+L/4//j//X////w//T/9P/l//L/AQAFAA0A+//v/wQACgACAAEABAAGAPv/9f/s/+b//v8SAA0ABwD0//H//P/s/wEAMQAaAPj/BgAYABsABwD2/wcAEAARACAAHAAIAP7/+f/y/+3/BgAfABUABQD0/+L/7v/+/wgAIgAhAAUA/P/8//v//P/p/+L/8P/1//r/DQAaABUADQALABEADQD1/+//BQARAAUA7//h/+X/9f/z/+H/8P8PAAgA7v/f/+n/DQATAAYAFgAmACEADgD5//b/9f/n/+z//f/+//P/8v/7/wgADgAPAAYA//8DABMAIQAbABAABADc/73/3P8bADAAFgAOABEA+f/b/9v/7v/0//f/DwAjABYAAAD0//L/7v/h/+r/BwALAPr/8v/3/wYA/P/q/w4ARgA8AAkA8v/p/9X/3f/1/wcAHQAeAAAA9f/w/+n///8IAPj//v/5/+L/9f8QABMAIQA5AD4ALQAEANj/0//d/9f/6P8IAAQA/v8cADMANAAoAAYA5P/f//v/HQAUAOj/1v/l/+H/0//n/wgABADk/9T/8v8kADMAJQAtAEAALAD4/93/9f8fAC8AGwD9/+b/2f/h/+T/2P/Q/9L/2f/c/+T/AQAhABoAAwD5////HgA1ACcAHQAbAPP/vP+3/97/CAAdACcAGADj/8f/7v8kACMAEwAsADkA+f/b/wwALQAgAAkA/v/9/+z/4/8BAPr/yf/Q//P/+P8AAP3/7f/m/9f/2v/+/xYAKAA/ADkAHQABAPD/9/8FAA4AGQAQAPb/6v/1/yYARwAhAAIA9v/B/5X/pv/w/0IARAATAAgA/P/x/xkAMgAmABEA6//e/+j/1f/P/9T/zf/0/xEA6v/T/9j/6v8VAAsA3P/i//P//P8rADoADQDo/9L/4/8xAFIAOgA+AEkALgAGAOT/8v8nADsAKgARAN3/qP/B/woAOwA6AA4A3P/c//f/CgAXAAQA4//t//7/6v/p//v/9f/1//3/7v/n/+f/4f8FADUAJgAQACEAHAATABsA/P/j/wIADQD+/w4ACQD1/w8AFQAPACIA+v/C/+H/8v/Y/+P/7v/s/wQAAgDy/wAABwAGAAYA+//v/93/x//q/yoAMgAIAOH/3P///yIALAAqAP3/tP+x/+j/CQAqAEMAGADp//v/CwDx//3/NABVAEAACQD5/yQAKwAeAEkAUgARAAMAJAAlAA4A6P+5/6//tf/M/xEAKgACAO//1P+r/7//6f8OADYAHgDl/93/3P/X/+f/6P/h/+X/4f/u/xEABgDM/7n/8f8pACwAOABNAD8AJAAeABkAHwAbAO7/wv/B/+X/BQDr/8T/y//E/7X/z//3/xsAHwDv/8z/2//m/wAAOgBZAFEAKwAGABQAPQBJAEcAQQAnAPX/0f/p/yYATwBJABwA6f/N/9z/FgA0AAkA6//6//D/6v8XACsAGgAhAC0AJwAKAOb/BwAzAAkA3f/i/83/xf/p//b/8//q/8f/o/+K/6n/CgA5ABkA+//Z/8L/5f8PACoAKQD3/8//2P/4/zUAWAAwAPn/5P/c/+T///8mAFAAQgDx/8L/0P/T/9n/AAAZAA4A3/+k/6b/zf/m/xEAOQArACUAQgBBACYADgD2//P/AwAAAAEAHwAlAPT/1f/a/+7/BwD6/+r/HQA6AP7/zf/A/8T/BwA4ACUAMgA4AAwAKQBMAAcA8f82AFQARgAgAOn/2f/N/7P/5P8hAOv/p/+9/9b/2P/w//n/AwAsACgADAABAOT/7v8jAAMA2f8DAAIA3//z/wUABgAIAPP/AAAqABQAAgAuAC0AFAA4AD8AHgApADgAIgD1/7X/tP8EABwA9f/r/8z/h/+A/7//9P/1/9T/zP/p/9//v//c/woACAAHABkADQD7/xcAOgA6ABwAAgASADQAMQA4AGAAQQDk/8T/3/8CAB4A+//F/8v/1P/T//7/GAAZAB8A+P/W//b/DgAHAAcA8f/N/6b/l//i/z0ALADu/9j/7v/2/8j/z/88AG8AKQDt/+/////7//b/EAA5ADoAFwABAP7/BgAUABYAFgAvAEUALwAgADMAKQD9//L/CQASAAcACAAdAB4A+P/m//z/EQALAAIA9//k//H/JwAnAOb/xv/r/x4ALQAEAOb/5/+2/4L/sv/3/wIA7//G/7P/3P/t/9n/7P8MAAcA+P/r//P/CADj/7D/5P80ACUA7P/Z/+T/9P/w/+//CAD4/7T/pP/f/xoAPAA+ACMAHAAoAB8AJQBLAFkAQQAeABAAHgApABsAFgAZAP//4v/k/wcAMgBAABYA5//e//f/BAAMADYAUwAYAMH/vP8IAFEAQwD1/87/0v/M/+T/GAAgAPb/sP95/7P/GgAxACUAFQDz/+n/2//H/wUASQAoANj/lP98/6H/2f8gAFIAEwCu/8D/GwBnAIcAXgAZAPX/8/8XAD8AJwAGAPf/2P/J/+b/GAA/ACsA7//H/7z/4v8kADQALwAoAPT/yf/b/woAKgARAM3/1P8GAPv/AQA2ACcA6P/T/97/+P8VACUARwBUAA4AzP/X/+7/CgAwACEAAAD7//r/EAApAAUA5f/x//L/6//m/8//v//O/+n/+/8CAAAA9v///yAALgAcAPT/zf/r/y0AEwDU/9b/8f8OACUAHQAZAP//z//u/zQAKgAEAAsAFQANAAsACQAGABUAJgAcAAcA/f///wwA/f/z/xMACQDT/83/0//H/+n/IwAzABoA6P/f/wcABAD//yQADADd/+//8/8FAEgAOgDm/7r/tf/f/xwAGwAUABwA5v+9/wIAPgAkABcAMAASAOD/1f/i/yAAVQAjAPf/CgAXACMAHADy//3/9/+n/5//5/8AAAQAEQD//9j/uf+3/+//IgAiADEASgAfAO//9P8AABYALAAXABAALAAnAA4AAQDf/7L/rP/j/zwAXgBAABAA2P/B/9P/4/8IADUAPAAVAMv/q//4/z4AIgDx/+3/DQAtACsAJwAhAAwA6f+8/7r/7/8LAPL/2v/j//j/9f/z/xgAMwANANv/7f8gACQAHgApABoAAADm/8n/3f8EAP//9//5//j/BwAHAOj/2P/c/wQAJgAEAO3/HwBDADcABwDN/9X/8//6/xwAOAAfAOv/mP+B/+r/SwBIABwA8v/i//H/9/8ZAE0AMwDt/+P/DQArACQAEQAXACIACQDk/+j/EQAaAOz/yv/E/8P/4f8QACAAFgD5/9//5//x//j/KgBWAEIAFQDq/8f/wv/q/xoAGwDy/8n/vv/r/zMASAAjAOr/x//4/zYAJgASABUA+v/o/+b/6P8ZAD0ADgDe/9z/1v/O/9j/6v8EABQA8P/S/wQAPQA5ABYA8f/z/zIATAAgABQALQAnAPr/vP+w//r/PQA2AAwA6//b/9r/5f8NAB4A7v/S/wIAHQADAO//AwAuACgA5P/M//z/FwAMAP///v/3/9T/s//V/xYAJgAJAPD/+P8YAB4ACQD7/+//6P///x8AKAASAO3/4P/d/8P/tP/g/yMAPQATAOH/8v8PAPr/7/8AAPn/4f/x/yYAUQBOABgA7P/v//T/+f8UAP3/yf/h/wAA2//T//T/AgAUAA8ACQBKAHAAPgAwADIAAgDu//z/CQAZAPz/yv/V/+n/1v/l/w4A9//L/8//6//4//v/BgAoAEIAFwDr/wwAIgANACIAJADz/9r/4/8CACIA+v/R/+b/4f/l/xwADwDl//b/9f/e//T/KgBHABsA1//5/zsACgDX/wwAJwAHAAgAAwDi/+D/4//Z/+7/CQAsAE8AJADV/9H/7f/y/wcAGAACAOz/7P/c/8n/5/8fACwACADV/8n/AgAoAA4A5//B/8r/JABeAD8AJAAeABgADQDp/9v/DgAzACAA/v/t//L//f8TACoACAC8/6b/xf/u/xkANwAmANf/o//j/zUAOAAhAAIA5P/y/wAA/v8HAAMABgAJAN3/4/81AD4ABgD2/+n/2f/p/woAKAAqAP7/2f/S/8f/5f8lACAA+P/2/+7/5/8SADYAOwA0AAsA2//q/x4AMwAqAPX/nf+B/7j/BAA9ADEA7P/g//7/+v8SAEcAPwAUAPT/3v/5/y8AOAAkABAA/f8FAA8A/f/2/wgA7/+5/6z/wP/y/zkAMgDk/8j/+P8tAEoAQAAQANv/yv/m/woABgD2/xIAKQDs/5X/oP/p/xYAKgAOANX/2P/s/+X/FAA7ABIA+P/+/xEAQAA6APL/3//1//P//f8MAPn/+P8VABgACwD//+v/6f/u/+b/+/8oABwA+f/5//j/9P/3//H//P8QABEADwAFAN7/1P///yEAKwAgAPv/5v/x//T/6v/t//7/HQA0ABwA8v/v/wEA9//l//D/BgADAO3/6P/x//b/+f/v/+j/9P8FABUAGAAHABEAQQBDAAcA5//3/xQAJAAPAOn/6/8AAPz/8//y/+n/3f/U/9D/8v8kACYADwD+/+P/4P8MACcAJQAnABwAAwD1/+7/+/8gABsA8//X/9L//f83ABwA3P/n/xIADgDn/8z//P9NADAA2v/Z//7/DAAgACYADADl/7j/v/8IACsACADk/9H/1/8CACYALAAkABAAAADu/+X//v8RAAYAAAD4/9//1f/v/xcAHwD8/+r/BwAWAAsACAAJAAUADQANAPv/+P8LAAUA5P/Q/97/7v/k/97//v8aAA0A6P/d/wMAOABOADkABQDX/+D/DAAnABsA8v/j/wQAEQD4//T/BgAJAAEA8v/n//L/AgAJAAYABwAdACAA+v/g/+n/BQAdAB8AFwAAAOH/9P8kAB0A+f/7/xIACgDr/+L/9v/3/+H/6v8HAAUA7f/4/xEABQDu//b/EAAYAAsAEAAeAAMA4//5/yMALwAlAAsA6v/n//3/DQAGAO//6f8DAAkA8v/s//3/CQD9/+j/9v8VAAoA5//d//D/+//z//n/GQAzACYA6//N/+f//f8PACgAJAAjACkABgDu////+P/t//r/5//M/9T/8P8DAAEA6P/n/wgAEAACAAgAEAAJAAUAGwA1ADkAJQAJAPP/5P/u/xMAJQAGAN//1//i//T/BAD+//n/BgAKAAkACAAGAA4ACwD1//v/FgANAOX/wP/F//7/HwACAN3/0//t/xsAHQD+/wIAFgAZAB0AHAAdACwAIQD0/+v/BgARAP//2f/D/+L/BQD3/+H//v8oACEA9v/e//j/JwAmAAEA+v8HAPv/+v8LAAwACQD9/+P/6/8LABcAFgABAOn/AAAcABIAAAD5//b/9v/z//3/EgATAP//8f/1/wEADwAVAAUA9/8AABAAEwALAPb/8v////P/8v8IAAEA6v/w//b//v8CAOf/4/8EAAoAAwAKAAMA/P8DAAYAFgAfAAIA7P/5//7/9//6//b/7v/3/wEAAgD4//D//P8KAAYA//8HABAAAgDs//P/AgD9//z/BAAFAAgABAD3//b//P8EAA8ACQD///z/9f/7/wkACwAEAAkAEQAKAAAABAAHAPz/8f/v//T/9//+/wwADgD1//D/DwAdABAACgAJAP7/9P/0//z/BwAGAOr/3v/s//n/BQAOAPf/6f///wcAAwAJAAIA+P8AAAIA/f8KAA4A///8////+v8AAAkACAAIAAgA//8AAAQA//8FAAoA+f/w//3/BAAAAPz/9P/y//r/AgAKAA8ABQAAAA8ACQD7/wYAEgAMAAcABQD8//v/AAD+//v/9//r/+b/6P/p//T/AgAAAPv/AgAJAAgAAwD//wYADwARAAsABAD9//7//f/6//r/9v/w//L/9P/w/+7/8v/+/wYA///5/wEADAAKAP//+f8CAA0ADAAJAAcABAD8//r/BAAIAAUAAQABAPv/+P/4//n//f8EAAYADQAMAAAA//8LAA0ACAAEAPz/+/8GAAwABQD5//L//f8NAAwAAQD7//f/9//9/wIAAQD6//X/9////wUABAAAAP3/+v/7/wEABwAGAAQA///8/wAABQABAPn/9v/6/wgADgAIAP//AAACAAcADQAFAPj/+P/8/wAAAwD///v//P/6//r/CgAOAP7/9P/4/wUAEAARAAgABgAEAAMADQANAPr/7f/u//3/DgANAPn/8f/z//b/9//z/+7/9v/8//3/BAANAAgAAAD//wAABAAIAAUAAwADAP///f/8//b/9f/6////AAD///v/+//+/wIABAAFAAIAAAADAAcABwADAP7/AAAFAAMA/f/+/wIA///7//3/BQAHAAQAAAAEAAkABAD+////AQACAAEA///8//3///8DAAUAAgAAAAEA//8DAAUABgADAAQAAgABAAEA/P/5//7////6//v//v/+//z//v///wEAAwAEAAUAAwAAAAQABwABAPv//f8AAAAA/v/8//z//P/6//3/AwAEAAAA/////wEAAwADAAEA///+//3///8CAAIAAQAAAP7/AAABAAEAAAD/////AAD/////AAADAAEA/v/+////AAD//wEAAQADAAEAAAABAAAA//8BAAAA/v///wAAAAAAAAEAAwACAAAA/v8BAAQABwADAAAA//8CAAUAAgD8//z//////wAA/v/9//3//v/8//////8AAAIABAADAAEAAgACAAEAAwAGAAQAAgD///z/AQADAAAA/P/+/////v/7//f/+v8BAAAA+//7//7/AAAAAPz/+/8BAAQAAAD//wEAAwACAP////8DAAcAAwD+/////////////v///wIAAgABAAIAAQAAAAIAAwAEAAYAAgACAAEAAAAAAAIAAQABAAIAAwACAAMAAwABAP7//f///////v///wIAAgABAAAAAgACAAEAAAD+//7//f////7///////7//v/+////AAAAAAEAAAABAAEAAAACAAIAAQAAAP7//v8AAP/////+//7//////wIAAAD//////////wAAAQABAAIA//8AAAMAAgABAAAAAAACAAEAAgACAAEAAQAAAAAAAAD//wAAAAAAAAAAAQABAAAAAAABAAAA/////wAAAAD//////v8BAAAAAAAAAAAAAAABAAEAAAABAAEAAAD//wAA//8AAAAA/////////////////v8AAAAAAAD//wEAAAAAAAEAAgAAAAAAAQACAAIAAgAAAP//AAABAAAAAQABAAEAAAAAAAAAAQABAAIAAwACAAAAAAACAAIAAQAAAAEAAAD/////AQABAP///v//////AQACAP////8AAAIAAQABAAAA///+//7///8AAAAA/f/9/////////wAA/f/+////AAD///7//v///wEAAAD//wAAAQAAAP////8AAAIAAQD+//7/AQABAP7//v8BAAEAAQD/////AgACAAAAAQACAAIAAwADAAAAAQABAAEAAAAAAAAAAQAAAAAAAAABAP///v8AAAIAAgABAAEAAQABAAMAAwADAAEAAAAAAAIAAgD+//z//f///wAA//////7//v///wEAAQD//wAAAAABAAAA///+/////v//////AAD+//z//v///wEA////////AAABAAEAAQABAAEAAQACAAEAAAAAAP////////7/////////AAD//wAA/////wEAAQABAAAAAQACAAMAAgACAAIAAwADAAEAAAAAAAAA//////7//v/+/wAAAQAAAP///f/+/wAA//8AAP//AAACAAEAAgABAAAAAAAAAAAAAAD+///////+//7///////7//////wEA//////////8AAAAAAAD///3//v/+/////v8AAAAAAQABAAAAAQAAAAAAAQABAAEAAAABAAEAAQABAAAAAAD/////AAAAAP7///8AAP//////////AAAAAAAAAAABAAAAAAAAAP//AQAAAP//AAAAAP//AAD////////+//7//v////7//f/9//7////+//7//v/+//7//v///wAA//8AAP//AAD///7///8AAP/////+////AAAAAP//AAAAAP//AAD//wAAAAAAAAAAAgACAAIAAQAAAAEAAgABAP//AAABAAIAAgADAAIAAQABAAEAAAD//////v/9//3///////7////+/////////////v/+/////v///////////////v/+////////////AAD+/wAA/v8AAAAAAAAAAAEAAAAAAAIAAQABAAEAAAABAAAAAAD//////////////////wAAAAD+////AAD///////8AAP//AQD/////////////////////AQAAAAAAAAAAAP///v///////v/+//////////////8AAAAA/////wAAAAAAAAAAAAD///////////7///8AAAAAAAAAAAAAAQAAAAEA//8AAAIA////////AAAAAAEAAQABAAEAAQAAAAEAAQABAAIAAgAAAAIAAgABAAIAAgACAAEAAgACAAIAAQACAAEAAQACAAIAAgAAAAAAAAAAAP7///////7/AAD//////v////////////7//v/+////AAD///////8AAP7///8AAAEAAQABAAEAAgABAAAAAQACAAAAAgACAAIAAgABAAMAAgACAAIAAgACAAEAAQACAAEAAQACAAEAAQABAAAAAQABAAEAAQABAAAAAAAAAP//AAAAAP///////wAA//8AAAAAAAABAAEAAAAAAAEAAQAAAAAAAAAAAAEAAQAAAAIAAQABAAAAAAAAAAEAAAAAAAEAAAAAAAEAAQABAAAA//8BAAAAAAAAAAAAAAAAAAEAAQACAAIAAgACAAEAAgACAAIAAQACAAIAAgADAAMAAgACAAIAAgABAAIAAgABAAIAAQACAAEAAQABAAEAAAAAAAEAAQAAAAEAAQABAAEAAQABAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAQABAAAAAQAAAAEAAQACAAIAAQACAAEAAgABAAIAAgABAAEAAAACAAEAAQAAAAEAAQAAAAAA//8AAP//AQAAAAIAAQD//wEAAAD//////////wAAAQAAAAEAAQABAAAAAQABAAEAAQAAAAEAAgACAAEAAQABAAAAAAABAAEAAQAAAAEAAAAAAAAAAQACAAEAAQABAAAAAAAAAAAAAQABAAAAAAABAAIAAQD//wEAAAABAAAAAAAAAAAAAAAAAAAAAQABAAAAAAAAAP//AQABAP////8AAP///////wAA/////wAAAAD/////AAD//wAAAAAAAP//AAAAAAAA////////////////AAABAAAAAAABAAEAAQACAAIAAQABAAAAAQABAP//AAABAAAAAQABAAAAAQAAAAAAAAD///////8AAAAAAQABAAAAAAABAAAAAQAAAAAAAQAAAAAAAQABAAAAAAAAAAEAAAAAAAEAAAD//wAA//8AAAAA/////wAA//////7///8AAP7///8AAP/////////////+//7/////////AAAAAAAAAAAAAAAAAAD+////AAAAAP///////////////////v/+//7//v/+//7//f/+/wAA//8AAP//AAAAAP/////+////AAAAAAAAAAABAAAAAQABAAEAAAABAP///v8AAP//AAD//////v////7//////////////wAAAAAAAAAAAAABAAEAAAABAAAA//8AAAEAAQAAAAAAAAAAAP/////+/wEA/v/9///////9//7//v/+/////v///////v///wAA/////wAAAAD/////AAAAAP///////wAAAAD//wAAAAD//////v///wAA//8AAP7///8BAP//AAAAAP//AAD//wEAAAABAAEAAAAAAP7/AAABAP///v/+/wAAAAAAAAAA///+//3///////3//f/9//3//P/+//7//f/9//z//v/+//7//v/+//7///8AAAAAAAABAP//AQABAAAAAAABAAAA//8AAAEAAQABAAAAAQAAAAAAAAD/////AAAAAAAAAAAAAP////8AAAEAAgABAP//AAABAAEAAQABAAAAAQABAAAAAAAAAAAA/////wAAAAD///3///8BAAAA////////AAD///7//v////7//v////7///////7//v/+///////+///////+//7///8AAP7//f///wAAAAD+//3///8BAAEA///9////AQABAP///f/+/wEAAgAAAP7//f///wEAAQAAAP////8BAAAAAAACAP///f///wMABAABAPv//v8EAAUA///8/wAAAwABAAAA//8AAAAA//8BAAIAAQD8//7/AQAEAAIA/f/9/wIABgADAPz//P8CAAQAAQD9//7/AQACAAQAAgD9//v/AAADAAMA///8//z//v8EAAYA/v/1//r/CAAIAPj/9P///wYAAgD7//z///8BAAEAAgAAAP3/AAACAP//AQAEAAIA+//9/wYACAABAPz//v8EAAcABAD+//z/AgAIAAQA+//9/wQABQAAAP3/AAABAAIAAAD///7//f/+/wEAAQD///z//f8AAAIAAAD8//v/AQADAP//+////wMAAwAAAAAAAQACAAEAAgAEAAIAAwACAAYABwAEAAEA//8FAAoABgD9//3/BQAJAAUA/f/9/wIAAQD//wAA/v/5//n//f8CAP3/8f/w//n////5/+z/6f/w//j/8//n/9//4//s/+v/4f/W/9T/2f/f/9//1f/H/8n/2//q/93/yv/O/+7/BgD8/+7/+/8jADoAOAA3AFAAcgCBAIMAkQCtAL0AwADEANUA3wDXAMoAyADLALoAmACBAHcAZQBBABkAAADp/8r/rP+T/3f/Vv89/y3/KP8P/+X+1P7f/uD+u/6b/p/+r/6p/pr+l/6g/qn+qv7g/i//K/8H/2n/CgBFAFIAhwDhAGQB2wH2ARICXAJ7AqMC7QLSAnUCWgJFAgQCygFhAcwAbgAxANr/fP8C/5L+dP5s/i7+9v3m/ej9Dv5A/lP+av6k/ur+P/+M/7f/4P8sAHQAqgDeAPoA/gAeAVQBXwFHAT4BQgE2AR8BCQHpAMUAjQBnAFAAIgDe/6L/gf9Z/zP/8v68/qP+lv50/lH+Qv4t/jv+Sv5W/lr+b/6U/sz+Cv8o/1f/lP/j/y0AbQCbANQAJwFqAZoBvQHoARYCPAJHAkwCUAJHAjsCIALzAcYBngFeARMB0gCXAEsA5P+K/1L/Dv+o/kT+9f29/YL9Ov3w/MP8q/yl/H78gfz2/Dv9Av09/Vz+Fv8e/1b/VAB9AQsCNgKpAn8DFQRsBH4EfgScBLQEggQNBKkDPAPGAhYCewH6AF8Anv/+/qv+Rv7H/Ur9Gf0K/fv84fzg/AX9QP2K/b/9BP5d/sT+Fv9y/8r/HQB9AMUAAAE/AZEBrAG1AeEBFwIRAt8B3QHrAdsBlwFeASkB+gDHAHwAIgDA/4j/T/8D/6P+X/41/v/9zf2X/YP9Wv0//UH9Uf1F/S79Q/13/bf91f3r/Rf+ef7x/kr/kv/p/1gA4QCCAewBKQKDAv0CbAO7A+wD7AP8AyIEMAQKBMIDgAM8AwADpgIsAqABNgHZAFcA1f9v/xH/pv5G/vn9uv11/Sz9+fzZ/L/8kvx2/F78T/xa/ID8YvxA/Pj8mP05/Tr9k/5l/zj/1v8IAcoBWgIdA7QDZwQZBVgFiwXiBfoFpwVLBeAEdATZA9UC0wEwAZAAkv+P/rj9Kf2+/B78efs6+zT7E/sL+z77kvvy+1T8uvxN/fr9kv4U/5z/KADIAGoB3gEwApECAANCA2YDmgPdA8ADYANhA7wDsQP1AlgCXQKGAhkCLgF7ACcA7/9Z/2/+jP35/JH8Avxk+8D6Ovq++cj5GfrP+Ur5rvnB+jb7gfsa/PH89v0a//T/mwChAZYCYgMgBNAESQWKBcMFHgaBBkAGxAWXBZQFPwW2BD4EmQPqAoACRAKKAZ8AEQDD/17/5v5y/uD9jP2D/XP9GP3O/ND87/wM/Q79Lv1S/Xj9u/0M/hz+Gv47/lX+sf6x/hz+YP5u/wj/Fv77/gEAq/+N/1kA4wBcAfoBQgKpAlsDxgPEAwEEMgQaBK0DRAP9AqcC8AHoAEUA2/9a/2X+fP32/Mz8bfy9+2X7X/u5++v78/s1/Pv8tf0g/s3+ev8dAMAAoQFNAq8CJwPJA28EtgTVBNIE6ATyBOEEjAToA3MDOgPeAhsCRQGRAPf/KP8//mb9lvyf++/6I/pE+VX5Tvn590z32Pid+fn4PfmI+qn7zvwK/rj+tf8eAXsCXQPuA4MEKwWmBbcF6QXwBXgF5QTTBNkENgQ/A5cCfQI2Ao4B4gB8AFwANADj/53/wf+7/2//Wf93/3T/cv9n/zL/OP+E/4b/Rv8z/2L/lf+Y/2j/c//A/83/mP9n/4H/b/8x/8j+pP45/iL+mv4n/u389/xs/kf+hv3j/ef+VP+1/2YA3wCOAfQBYgL8AmcDKgMPA0wDCgO6AlkCmQHKALIAewCA/5T+FP7J/Vz9Fv3Q/ID8Vvyt/FX9mP2a/f79/f7R/0gAiQDrALMBzwJfAzwDjQNKBLsEuwSxBGcEGAQJBMQDFgM5AqQBGQFDAF3/uf7k/bj8/Ptq+8T6rfps+gz5Xvif+Un6gvlv+a767PvS/J79Z/6s/yEBIALuAucDtAQuBY4FyQW9BXUF/gR+BOkDWQOuAswB7ABjAPP/Pf+5/l3+CP7U/er9+/0C/kv+nv4Q/5f/4f/x/2AALAGZAa8BwwHjARkCbQKMAjYC3gHjAQ8C3AE8AcoAiAABAIn/QP9w/kT92fy3/Gn8V/zS+676zfot/Fv8tvsz/Gz9Pv4P/+7/rQB9AWQCLQP8A6kE2QThBMQEhwSBBDwE5wLWAf0BzAFnADr/wf5W/uT9Uf23/Fb8W/yE/Lf84Pwt/bv9MP6+/nz/KQBhAMMAtgGIAr4CtQIcA7MDKQQqBNMDfwNeA0YD2QITAiYBaQDb/yf/HP4F/Sn8X/u4+tj6xfpz+Vz4YfnD+pT6VPoi+3T8yv0X/8H/egAAAm0DeARSBdIF/gWBBrUGMAbpBYoFZQQzA/kCiwIjAa3/8/7K/jX+Pv1a/Cj8Y/yI/Gz8aPzi/JT9RP7P/mj/2/+CAGgBNwKPAs8CWwOhA+MDKgRLBLEDLgNjA1YDdgJhAeAAKwBS/3T+Vv0y/GT7sPph+or6wfln+KX4HvqF+nn6Mvs5/EL9y/48APkA/wGFAwAF4gVhBr0GEAcwB90GbgbfBf8EwgPhAloCdwEiAPX+P/6P/RL9evye+x77ffvv+8z75vt9/D392f1s/iT/4v92APgA7wHPAicDQQNyA/wDcwSHBA4EmwNaAw8DlgKvAXkAQv98/on9bPw9+9X5Xfkp+uf5yPc69974Dvpb+rT6avvQ/AT/dwA9AUYCrQM2BV4GIAdnB74HzgdzBwkHtQbpBS4E/gJ+AucBlQAg/xH+Nv3U/ID84Pvn+tL6avvg+zX8Zfy8/Hb9r/6c/zcAqQBkAYwCogMkBCIEagTlBFwFjwVVBaME8QOTA+4C/QHNAIn/WP4u/Qf8xfo1+VH4+fj8+AD3AvaG98H4//jJ+dr6svuA/eD/BAGjAf0C3gR3BoIH3AcFCKMIvwgxCJUH6wabBTsEfAOhAqMBcgD7/rH9Tf0a/Sf8FPuP+vH6ivuz+3/7v/uQ/H/9bP4L/3j/KAA3ATwC/QJVA2oDywN8BNwE0QShBPwDMgPRAnUCaQHR/23+Vv15/Dj7mfkC+bP5lvnV9/n2GviY+XT66/py+8n8Hv/2ALwBsAI5BL8F8gbNBwsI+wc1CA8IKgcwBqwFsATaAlsBnAAZAP/+VP3Q+0/7n/tg+2L6p/kV+i379/s6/Ev88fxf/uj/xgAyAfoBKwOEBI4FxwWhBf4FywYHB8sGVgZXBToEiAPLAkEBcP+6/Q/8mvow+ZT3wfYu99r2G/Vp9OX1e/di+Dz5Xvrt+w3+KwDCAUUDqwQrBqYH2AiBCbQJoQk5CZII3gdQBycGSwS/AtsBAwHm/53+8PyM+z37Vvut+sP5uPks+q/6VfsH/Gb83fz3/XH/ogA3Ae4BHgMvBKEE4AROBaIFuQW4BXIFngSJA68CwAFVANP+Xf3R+x/6jPji95746vgv98j1//Yn+WP6NPsN/BT9UP8AAlsDCgRJBQQHUggQCUMJWwlrCegI6gegBn0FjwReA4gBn/9n/p396fyM+6P5tPgh+X/59Pib+BH5Bfog+wv8xPyD/an+OgDXAdYCTwNTBOQF0wYGB0MHiQdqB1oHbAfQBkoFkwOTAnMBl/9r/aL7zfmJ92D2zPYf93z1svMQ9BP2HfhJ+Q36+vou/TIAhgLAA7AENgYPCJ0J/gnlCRoKDgocCfQHHgf8BaAEHQOMARoAFv8c/t38jft4+g/6QvpY+t/5uPmD+qD7MPyP/Ef9OP5H/5QAxAFBAqUCuwMuBc8FogV9BY0FtAW9BS4F0AN0Ao4BYgCe/qD8C/uj+cD3Wfa99qT3fva99BP1SPen+WP7hPxs/Xb/RAKcBDUGlge8CJ4Jmgo7Cy4LqwodChQJdQfmBZEERgOzAeT/6P1k/JP7qPpe+Uf47ff29zj4Xvho+Bn5o/r4+3f8CP02/ub/tgEbA68DDQQcBaMGvQf+B8kHXwcXBxIHywaaBeYDXgLFACr/kv23+0n5MPd19q32mPZg9VD0qfRT9gX4e/kZ+9X8uv6+AOECyASRBhIIBgl1CaQJ0gnaCZ8JtAgGByYF1gMWAyUCnwCy/gj98ftX+9P6G/pj+UD5yvlI+nv6Lfuh/Nn9Xv6s/nT/1wCYAr8D2wPeA8kENgbKBlwGzgW7Bb4FPwUzBBgDKgLuAAn/Cv2L+0P67vhn9wr2ifX99V72H/b59YP2Ofi3+vP8bP4GADQCLATYBY4HSglZCr0KyAp/CuwJSwn4CBEI/wVcA6QBCAFXAN/+1vwk+yr6zflJ+Yv4Qvi7+E35ifn3+Sr70/zm/Ub+vP73/54BFwPrA0IE0QTdBakGggYhBlMGsAYrBtcEgQOtAgwCCwFx/4v9+vsP+0/6Lvlj+I74JPke+aL4XPj9+Lv6u/wj/gP/9//1AP0BTwOjBGYFsAUIBusFCAVGBHIEqASpA78BIQCR/3//PP9+/m39hfxK/Iz8mvy8/D39of3M/Vv+Yf9CAOYANgEtAWABUAJdA58DNQP5An4DRARaBJMD2wLUAhADxwLvARoBeQACAI//4P7c/Sb9GP39/Fn8qvuF+8r7JfxE/Dn8UPy5/FX9G/7x/or/8P9pAPgAWQG1AS4CbAIyAvYBJQJzAkYCeAGIABoAJwAZAKT/Av93/iP+Gv43/kD+O/4y/hv+Lv6g/jD/mf/D/5//gP8WAEUBDgIEAtUBJgLfAooDyQOXA0wDTAN4A18D2QI8AtcBgQHuACwAnf9V/xb/xP5V/tT9mv0E/oj+dv4W/hX+if4q/+3/bgBoABoAAgAfAFMAhgByAPP/PP/Y/vL+If/M/hz+rv2Y/aH9sP3G/a/9l/3L/T/+p/7Y/uz+/f5A/6f/8//+//z/DwAcAE4A2QB6Aa4BqwEAAqQCKgNvA5UDoQN/A0IDOANRAyMDewLmAZQBIQGNABwAy/9c/xH/wf4+/u39Lv6V/qT+jv6L/r7+Qv/+/3kAfAA2AAMAJQCEAMAAmgA/AOT/uv+z/5H/OP/k/q/+ev5e/lf+I/67/bT9Hf52/nv+iP63/sb+zf7x/hv/KP8h//v++f6F/zAAKwC6/7n/ZgAmAY4BnwGcAaoBwAH8AU4CdQJNAiYCFwLjAa8BwwHtAeMBlAE3AfYAvwCEAGMAfQDRABUBBgHsAOwA7ACHAMn/SP8p/0r/R//W/j7+vP2S/e/9mf4V/8/+Gf7C/Qf+n/4d//3+nv6p/in/qv/T/9D/uv/D/7//ZP/6/sL+jf6M/v3+mv/O/7v/EQCGANUAKQFnAZUBzwHkAekB1gGFASsB6wDIAGYAtv8X/wb/Jf82/4H/l/9o/4D/wv8SAKwAAQEaARcBMgH2AZwCWgL7AR8CBwKsARoBNQCt/63/sv82/zH+kv20/TX+5v40/+L+nv7e/jT/Wv+L/+f/SgB1AHEAjgCcAHIAAwCl/7v/0/+7/3L/wP4d/hH+f/77/jv/hP+R/1v/cv+X/6H/9P9VAI0AgAB8AN8AUQFTAb8AoAApAUkB8QCqAIgAowAGAVMBhwF+AZYAEgCqAIYBlAGcAEIAOQD9/5z/Rf8v/7L+Nv7q/R3+ef4o/kX94PyJ/db+6P+o/yf/XP8zAAYBkgH8ARkCEgLyAZwBPAFEAYkBlwE5AbEAHQCX/yD/0v4g/4H/bP87/+/+l/59/or+tP5R/9r/u/+J/4b/5f9sAG8AIwBcANQAyQCJAHIAcwC4ABMBDgH0AAMB6wCVADgA/f/4/wAA4f9G/9L+bP4e/lf+Rv7H/Yj9pv31/a3+ZP/+//f/pv8hACsB8gEnAkECYAK1AvUC9gKlAkIC7QG9AfgBygEuAWIAef+t/kf+I/4R/m7+1/7X/ov+W/5P/mf+YP4y/nT+I//X/yoAIQBEANgAdgHrAR4CDwIKAuwBtwFhAfQAzACiAFwAEAD//7r/9v5g/tL9Vv3p/HD8Jvz6+7H7kvuz+2n8jf2r/kD/+P4C//r/eQFtAtYCQQPlA3gEnASOBEwEPQQ2BPoDiwMHA1kCRgExADf/of5n/mf+nP6V/jv+5f3V/Z/9fP2k/QL+qP5N//D/TQCsABEBeAHbASwCXQJTAk0CJwIQAssBYQHaADkAtf9R/yX/6/5z/q/9wPzA+6z66/mc+ZX5nPkW+mL7zfzb/XP+Rv9cAL4BKAMoBN8EkQVFBnAGdAZNBhQGyQVCBXsEogORAg0Br/+R/o/9rfwd/ML7r/su/N38/Pyr/Lv8RP3d/Zn+Q//i/9kAKQIeA4gD8wNiBJYETQTGA2ADKwPhAi0CQQGYAEoA4/80/xf+yfzX+xT7Pfpe+ar46/d394j3MvhU+bv6XvzH/TP/hQASAqcDJwVnBjwHAAhNCF8IHAjEByYHNQYsBfkDhwJ/AKL+HP3++xj7ZPok+iP6Yvqi+jD7sPsm/L38mf3I/gMAUAF1AqIDsgTCBZMG+Qa/BvkFOwWxBEoElgOnAqEB0QADAAf/9v3Y/MD7fvpV+Xz43PdD96X2UvbG9hv4w/ku+0n8hv19/78ByAM/BVQGbQeGCIEJ6QnaCW0J8AhJCCYHmQXGA7sBZP83/VT70/mJ+In3Sfeo92X4/Phr+S76ZfvG/An+O/+XABgCrAMnBVcGdwd7CAEJ6ghhCF0HFga+BGwDIwLcAJz/Zf5e/Vz8KPu5+V/4P/d69hn20PWT9eD1Pvc6+TP78Pyl/pcAfwIUBEwFxwZ0CNMJkQqtCqoKfgowClcJ+wczBg8EpQEO/7j8hvqY+Pf2+/Xp9VX22vY+9/z3S/na+jL8Mf1V/tT/0wHVA68FLQdGCBwJiwmnCR4JGwi1BjwF2gOIAjIBxP95/kn9MfwY+/H5wfil99b2UPYE9h32yvY++Bb6yftG/fz+AQH9AqME8QUkB0oIWAkaCqYKtgpNCpAJsQiZB/8F/wOgAXH/dP2v+8H5x/dT9r31KvbT9oX3HPgY+Vv6pvv//FT+6/9/ASsDugRWBq8HcAi+CKwIbQidB2IG0gQrA6IBJwC4/iX9vftx+kH5M/hF93X2vvU+9Qb1cfXH9uD4Dfss/VP/qQHpA64FvQZmBzwIRAlVCgQLPwvrCmIKugnQCGUHbgUOA4sASf4c/AX6Wfh19xL3D/dl9wf40/jD+bH6h/uD/MP9KP+9AGwC/gNUBcEGKwj+CP8IQwguB+4FrAQ8A8EBZgA7/xf+5Py7+4T6Tvkt+Dv3Vfag9UP1bvU79t331/nl+xL+cwCyAl0EkQVKBvIGjAcJCEAIiwj8CEAJIwmHCG8HyQXZA7cBlv+B/Yf7y/mq+ED4Pfhm+On4yfnP+sL7o/yB/UP+If8oAIoBAANSBF4FagZ/ByUIJAh/B4QGOQXsA5gCQAH7//D+If5D/SD8v/p++YX4hPdQ9lX1AfVT9VX2A/ji+ff7cf4CARUDqQTjBbQGSgevB9QH3QclCHUIewg5CKIHiQYEBUYDRQEH/7n8h/rr+DD4CPgW+HH4NvlM+oX7ivw9/dv9of6W/+cAcwLtA0YFlgamBy4IPwjSB/cGzQVnBMUCNwH4/+X+9P03/V38U/tV+nX5Svjv9uf1Z/Wu9c32kfhX+mX8Af/SASsEnQVzBhsH2gcmCBAI8AfYB7AHmgeZByAHFAaIBKUChgA5/r/7c/np9yj3//ZU9wb4+fg4+pf7zPy3/Xn+Jf/l/9oAEwKAA/cESgZXBx0IcQgLCOgGVwWUA+UBZAAL/+n9Lv3J/GH83vsw+3D6tPnd+O/3Efe79iL3Rvjo+eL7cP5MAc8DawV5BjYHlAeXB1MH9AaIBlEGWgaIBnsG7AX0BLIDEQLg/1P94vrr+Kz3QfeL9yD4/fhe+vL7Qf0u/v3+zv+XAEUB7wHwAlUEtwXiBsYHLAjiBwoHzAU5BGcCbwCY/jn9Sfyp+zP7tfof+qn5Tvmq+Mv35/aD9h/3nvhi+kb80f66AUMEKAaEB2IItgiSCAQIVwe/Bj4GEwYfBtsFHAVQBGIDygGy/3P9Nfsf+bj3MPdc9wH4Efml+nH84P29/oj/MwCTAPcAtgG6AsQD7AQ1Bl0H8wfBBwMH1wUnBDACMQBn/u789fts+x/72fqU+mX6Hvpr+WX4n/c09273bvgJ+sz78f2zAGcDqwU2BwYISAhVCN0H9gYlBnQF2QSuBOIEpAQRBEIDHQJ+AJX+XfwU+nv4jvdf9+D3/vhy+kP8Ev5Y/00AHQGtAdkBHgKQAkwDXQSBBXkGMQeOBzkHZgYEBSYDJgFB/3v9Avwr+6X6Q/r0+bT5c/kA+VL4dfft9iL3Nvje+dD7cP5mAUEEvwaSCH0JpQmJCdgIxgehBocFwQRFBNADOQPRAhACnwAA/1L9T/s5+a/38fb/9sr3Gfn5+in9Dv+QALMBUAJzAokCtgLyAo4DegRvBVsGCwdQBwEHFQZ7BJECrwCg/q38Oftq+gD66fn6+dj5jfkN+Yj4xvcc9yP3Ovjm+cD7j/7mAfoEbwdmCXwKlwo3ChoJngcmBuEE+gOPA0gDxgJXArgBagCU/on8VPox+Jf20/X99Q/30PgX+5v9w/+TARkD5wPWA54DogOmA98DaQQjBQEG0QYMB58GqgUPBBACAwDK/aj7W/rJ+XT5cvnJ+Qf6Gfr4+Xr5q/jv99r3wPhT+i384P49AlQF7Qc2Cq8L7QtlCx4KVgh4BssEfwO/AkYCwgGLAS4BHQB6/q38j/pJ+Jb2sPWs9Xr2Ovi0+m392P8JAvED8QQGBcgEjgRRBFgElAT/BIwFFAY9BvMF9wQ1A0IBOP/2/MD6RPlu+Ab4CvhE+Jj43fjp+Jf4Kfgm+M74B/qc+xT+WwGxBLwHWgpcDDoNHA36CyoK+gedBaQDOAInATcA4//A/yL/Av6N/Mf6zPgW99v1dfXo9VD32PkI/ej/RgKBBCEGuQaYBjwGtQU8BQ8FNwWZBdwF8QXCBQUFXgM5AQr/sPw6+jr4Mfe+9qv2+Pax9z/4Vvgc+PL3TPgf+UL65fuy/hgCiQXjCNcL1A3LDskOjg15C8YIwQUaAxsBZv8x/uD91P15/eL86vtu+rL4JPcM9r/1O/aZ9yD6Y/2EADYDrgVnBwkI2QdPB3wGeQWnBFAEiwTNBPgEBgWaBFADigGK//z8cvp6+FT3qPaQ9v/24/fB+A/5Hvnz+PL4Tfkw+kz7Uv2FAPkDXQd4Cg4NuA6BD8YOtQwbCu4GdAOMAIX+2vzn++D7CPzw+5H70/q6+Yv4bffB9vL27fe9+Yj8qf93AgYFRQdpCFgIpweuBrAFpQThA6oDHwSTBMIEsgTxA50C3wCn/ur7cfm+96j2LfYw9sP2vPeS+Pz4/vgB+Uz5Avri+mz8R/+qAhsGewmfDOEOKBAnEJUODAzLCDkF1gES/9f8cfsP+x77L/sa+8r6GPop+Uj4pvd/9yf4x/kk/Ln+YAEhBHoG1AcTCLoHBAcRBgkFMQTGA7YDDgRtBGQEswOWAicBEv+Q/CD6UPgc92v2Pfah9nb3Kfiz+Mn4rfj2+OX59vpk/A7/aALzBU8JZQzEDjsQVRD2Dr4MgAmvBSACXf/n/DX7tPq1+sH6p/qB+gX6Q/lS+Lj3x/dH+Jz57vui/g0BgQPZBSoHcAciB4sGuwXSBPwDlAOmA8AD9AMRBH0DTwIPAXP/OP0C+1f5U/i+92T3kfc1+J/4rfif+Kv4CPnZ+az6TfxP/6ECzwUeCUcMXQ6ND2UP0w1yCzkIbATsADH+1fuC+lj6f/q1+h37avs6+8L6VfpF+pb6NfuD/KT+4AC9AmUEyQU+BrIF0ATVA7UCuAFUAY8BIQLTAokDHQT5AxMD2wFDABD+x/s4+jj5ffgf+HL4/vgf+cr4bPh8+Lb47viD+bL74/77AUgFAgk/DHgO1Q+WD/wNgQsqCFoEvgCC/Rr7GvrJ+cH5S/r++mz7kvtb+xD7M/u0+2n83v3r/9UBiwMUBecF0QU0BRkExwKuAdEAegDsAKYBXQJSA/cDtwP5AtgBBQDB/ar7C/rp+DD43Pc2+Jn4qPhq+Fz4sfhM+e757Pp2/aIAtgPsBm4KWA09DxwQYA96Da8KIwcmA1//NvwE+tz4Mfgd+Lr4fvkN+oL69/qS+4r8rv0J/7UAYQL1A0kF7QWyBQwFHQS7AkYBTAD0/y8AyQCvAc4CuwMBBLMD8QJoAXX/k/3u+4z6nvkb+SP5iPmO+TL5u/hk+HT43fgv+VT6Kf1+AJAD/QafCmkNIA+PD3oOYwyFCd0F/QFq/of77PlC+bv4yPiI+TP6lfr7+k/7y/vb/CT+oP9yAToDngSpBfcFVQVJBP0CagESAFz/Sf///zgBjwLVA9QE/wR8BFoDXQEI/+/8EPtp+Yj4NPhP+Jr4ufh3+Bj4NPi7+D/5ufm/+0T/uwLHBRwJegzyDhIQdw/ODVML4QfnAw8AiPz9+fv4j/hG+K34l/le+uP6Fvtd+2f83f0v/+QA/AKuBNwFfQYTBs0EOwNyAbf/j/7+/V3+3/+IAfYCgwSsBawF5ARBA9UAc/5S/Ez6zfgD+Mb3OPh9+DT40Pfz94T4Mfm0+Sz7ff4WAisFYgjYC3cOBxACEHoOIAwJCTIFKwFt/U76tvge+I/3iveV+If5DPqh+k77WvwO/rz/egHFA70F8gahBysHZgV0A1sB9f4k/WL8kPzg/cv/twHJA3gF4AVpBWIETwLT/7f9vPvl+eH4j/iy+Nj4h/gL+M33y/cp+OP47Pl0/CwAnAPhBoAKkg2IDysQCw/pDDEKkwaIAuD+nftt+ZD4BfiY9/z34fiK+Qn6jfqQ+0z9Uf9hAdEDNAb0B8cIXAi8BogE8gEd/738Xfs6+2v8ev6lACcDfAWlBowGtQXUAzgBrv5O/Eb62/g3+DP4ZfgE+Ir3UvdB94z3N/ga+Vv7S/8QA2sGGgqNDdsP7RAeEOgNMgu7B5UDqv8s/IH5WPi89yn3TPcw+Bb55/mG+mX7Sv2w/7IBrwPiBYwHYQgGCD0GuQNjAe7+ivwZ+wH7PfyV/g8BLANsBQwHKQcnBpYESALt/+X9DPyk+rL5Fvnq+Hv4F/fX9S71+PS79VX3VvkE/SIChgYBCkoNsA+/EGgQLg4MC/gHnATzANT9Oft9+er4ifjU98L3fvgb+a75i/oy/Nb+zwH3A+UFxgelCBYISAaBA6kAUP4Z/LH6qfrt+zr+HgGOA5cFPwemB6oGEgURA9QA/P5I/cz7y/rm+fD4Avhe9nj0X/NE81f0ZvYf+Yb9SQPbB0MLcQ5UEHwQmQ9UDTIKOAckBCUBo/4z/FD6o/nD+Er3vPY89673YPjf+Uf8sv84A9wF+weWCccJnAgjBokCG/+M/Ff6/fhI+dz6V/1EAK8ClwRMBtoGJgYnBe0DfgJLAQQAmv5a/cz7BPol+LH1FPNz8VXx6vJ79a/46/1GBPUIGQz4DmwQGxDBDlkMhAnrBkEEyAGs/w394/rf+V/4MfZT9eL1o/bT9w/6g/2kAUgF5wfKCYQKsQntBxEFDQGQ/Y/7HvoA+VD5PvvM/ez/fwE2A+oEigUgBeMEqQT3AzkDkAJbAYf/Sf3i+i/43PTu8RHwUe+98Gj0HviL/NkCRghCC08NkQ6sDv4NHwwNCqcIyAZxBKECHQDQ/FT6PPi99frz6PM79Vj3wPk6/d0B6AVyCEEK0wq3Ca4H7AQ7AZ/9QfsH+nf5gPm7+vz8RP/jAGoC+wMcBWkFZwWEBWQFowR+A1ECewDK/dP6Kfha9ZPypPDc77TwvvPU96P7UwDDBa4JsQvyDIQNbg3EDDsLmAkcCCAGrQPiAGH9J/rq9+v1UPQt9KP1EvgE+xz+hwEZBegHQQlGCV8I7QbjBPABqP5K/Aj7IvqY+Qb6dvtj/V3/EQHZAsEELga3BuYG0AYZBq8EsQJnAMD90vro93X1A/M08bHwYPGy84P3Yfte/00ETwinCkUMKg00Df0M2AsjCmUIAwYoA0UAvPwv+Qf3h/WG9PT04vZK+SD8Iv9NAi8FeQcRCbgJ7ghFB9gFgAPV/5j87fpf+e/30vcV+fP6Pv23/34CMAX6BgoIoQgzCL4GMAUTA1cAlv33+nf4cvY79EvyH/EQ8TPzpfad+ZT9hwMYCKEK4gxjDrQOcg63DFQKMghhBfsB5/5s+wL4Y/Zf9Uz0rvTp9n35Lvz5/iwCnwUiCJYJogo+CkAIgQZaBIgAoPxQ+rr4Z/eU9k733fmM/Lj+igHRBAsHWAgDCZsIfQcaBhwExQEK/zP88fn49yb1W/In8Y7wMvGS9Hz4/fuPAZIH1Aq8DIQOHw8OD7QN/QoVCeAGEQOD/1n8bvjC9eD0+PPj8+711/jn+7T+MgF7BLcHKQnWCV0KSQl6B74FcQJJ/l/7Ivkd9//19/XQ90T7N/7lAIUEmAf7CGcJ2QibBy0GAQRCAev+pfxF+lL4BPYZ8z/xcfBE8LryJ/f++rv/QAbKCrQMXw5HD+4O6A2rC2QJjQdiBKkAo/0L+pj2KPVU9MzzH/XB97X6xv2EALYDcAeeCZAKfAvkCrAIvQazAw3/d/sQ+ab2N/V19Tj3UPp2/WgA+AMMB3MI8QipCJUHIAb9A1wB9v6o/Pz5jvf09PTx0+/b7hbwOPTq+Ib9/wNjCokNGQ8SEDoP3Q05DIMJQAcmBRYC+P7b+wX4Y/Vl9Bjz7fJO9Wb4evvi/jYC3QViCV0LYgwoDRoMVwlgBoUCvP2c+bH2s/Sx8yL0dvYV+m79lgBZBHgHIwn9CQsKEAmwB/gFpAPtAPL9Fftn+D31sfF17wjuse3L8Br2rvoIAG0HtgyxDr4PLBCeDxgOdAtACWsHGgQtAMf8wfj/9EXzUvL08ZTz7/a5+hn+9gCiBLwIFgv8Cw8N8wyACl8H8ANz//j6jvdD9Rj0B/Td9Wj5w/zR/68DFQezCJQJ1gkXCQoIZAYYBPMBaP85/CD51/Uy8nTv4u307LnuTfQy+u/+JQWIC48OOg9/DwYPSg7YDJwK/Ai/BvkCCf+2+q71j/KJ8efw6PE49Tj5V/3OAIYD7AYgClcLAAyTDCkLzAjjBTYBivwi+cH1gvNd87/05/cw/Nv/hQNTB0gJrwl8CWwIMQfUBZwDjwGb/7X8Vfkq9m3yFO8r7Trs5u5M9Tv73gDzB4INeQ8GEGAPhg2PDIALlwn/B84FeQLG/gn69/Ru8sjxZ/E18wf33/rL/iACSQTDBmcJqQpMC64LmQrDCPYFBwEN/In4YvUk8w/zuvQz+JH8EgA6A5EGhAgcCUYJwQgrCHsHcgXyAoQAC/2w+LX0X/F47vbssOzW7gz1p/tWAM0FkAs3DmwOPg5sDQsN2gxlC+EJiAd7Ay//Dfo29NXwePDg8JryffYI+zf/IQIFBE8GegjmCUgLXwz5C1gKuAdPA679e/j69PfyEfI78+X2lPuo/w4DCAY6CH4J1QlXCcEIQQjqBmAEIQGE/YP5Q/Wq8ezuYu1O7VvuRvLD+ED+tgL0BxcMew0KDmoOkA7+DlIOygyfCmwGMgHA+6n1yvAd75rvWvHC9Ar5Vv26AIwCYwThBhYJ9gpLDJMM2wuzCX4F7P97+kH2oPNh8uHy5PU8+mD+1wF+BMAGmAhLCeMIsgjrCPkHjgWLAj//kvui98TzrfCr7hDune7873j0PPseAOkD3Qh/DEQN9Q17Dk4OuQ4ZDvMLqgjaA4X+MfnH81PwcfA58m70+veq+2j+kgDLARMDsgWXCLcKJgx9DBkLswdcAlz85Pf49Ejz5/PE9oL6p/52AmgEggUgB+4HXgcBB0IHTQcUBloDQQAN/Qj5HvWF8uTwNvAN8efx0vNz+cn+fgEABTgJFgvTC+MM4AxEDRUOuAz0CR4GSAGU/I/3FfPY8XLzMfWw9y77ev0Y/8AAggGxAl0FKgg3ChMLQQplCDAFs/9g+oz3zPVK9UL3KPqS/UwBhANaBEwFKwZBBjgGJAYcBuwFBgS2AGz9GvrH9hf0bPKK8bDxh/Lr8zL4tv0YAUsEWwhpCqkKqQtBDGwMWg3ZDL0KoQfiAvT9N/l09DfyXfMV9SL3o/o3/UH+iP+qANkBNQTzBmIJIAvmCsoI/wWkAQv8cfgd93f2YvcC+pP8Jv+3AfcCpwP1BDoGGAeVBzUHPwb3BD0Cov6C+6L4vfaq9VT0Q/Nn877zGvQ79+H7xf7fAXwGYwnwCUcL6Qw0DbwNng2OC2MIJQQY/+r5nvXG82r0yPWM93360/xG/br9if5r/4gBhARJB0UJIwrFCZUHUQNn/lz7/vkH+QH6hvzY/kkB/AL6ArECigMmBB4EegRmBMgDnALg/6f8Mvo7+AL3gPZe9q32jPfW97X32Pnb/DT+4v8NA70FNgdTCYILjgyKDaINhQvCB4kDf/9T+/r38PbA9zb5lvqy++f7cvua+zP8Nf3X/5sDTQZnBwEINAdJBDIBfP6X/Pz7afwD/tv/OwHdAusDXwMHA9ADngO7AuICmwJKAaH/QP2v+vj4GPgJ+Ib4Lfm5+UX60/kX+Rf7h/3y/az/cQOmBaUG5wiICggLLAyyC90I4gWXAq/+dPt8+Sj5rvpO/Mr8A/1q/C77A/st+8r7s/5tAo4EqQXgBXAENQK3/4n9Vf0c/ur+ywCgAjEDtwMHBCUD3gKoA4oDzgKFAr0BFwBA/gr8+fkL+Qf5gfkk+sT6B/vS+v/5t/h4+af7kfw5/hMCKQW1BuIIZQqACiwL0gogCJkFegNeAJP9Qvz6+5/8rv33/UP9RPxX+7/6mPpL+9n9wwA6AjsDjgNmArYAQf9U/p3++v9qAaoC0wOoBAIFlATWA9QDBwS7AwEDvwEvAFz+9ftT+ab3Q/eA9y/4GPla+Yz52flr+SX6U/x9/eD+HwLDBA4GOwhQCuIKrgvTC7kJLAfOBLYBn/6u/AP8Vfzw/AX91/xd/DX7efqk+kD7Hv3q/54BIgKRAlkC6QCI/9X+yf7p/zIB0QF8AgsDRwOmA+ADBATkBOUFhwUmBKwCxwCa/lX8PfpA+TL5lvkK+gj6HfpS+u752vgo+ET55/rj+9j98wB+A2oFcweNCI0IJgkzCYQHBQbTBPUCJgElALr/bf+b/73/5v7z/Wr9sPwG/DD85vxo/Rr+nf4W/oj9Y/3//A39DP5d/7UACQI8A5MEFwYhB8EHbAh4CFQHvgXNA2EBUP98/a/7pvpO+kv6Nvqu+Wn5pfln+ZL4+vcp+ED5yvpE/GH+YQHRA2MFQQYiBhwGVQaSBXUELATGA5ACxAGPAR4BMgEGAhUCXwE/AekAlf94/q398vwU/YT9LP2w/H788vtf+yb7J/sh/Ov9w///AWMEMAafB4oIGQgsB4QGHAWMA68CNwGB/9D+Mf5S/cj8TPwM/BT8qfv4+sX61/qk+h/70vxz/tj/mgGOAl0C4AEZAQcAUv8S/+L+0f7f/ib/oP/n/4oA5AH2AoYDEQRIBMwD/AIAAgwBjQAlAHH/9v6v/sH9yvxV/If7F/vt+1T95/7sAAcDSAToBE4F0wToAzoDywJ0Av0BVAHYAMoAVwCT/zj/2f40/t79w/12/Vn9v/1l/gX/t/+NAB4BNQHvAGEAmv+n/t79WP3N/Cv8yPsh/MH8L/0P/qb/5wCKAVMC0AKdAmcCTAIgAk4CYQKYAcgASQA//wT+fP1I/T/91P2w/pH/ogCAAecBDgILAgAC3wGPAYMBugFTAbwAlAAzAMf/vv+t/5z/uf93//L+3f7j/vb+pP9gAPQA7QG9ArACYAIlApMBzQAgAE//ef7w/Vn9y/zB/OX8M/0W/tP+F/+//00ACADj/0cArgAPAW0BawEeAcwAHQBF/+T+z/7s/mb/0v8qALYA+QDbAPUACQHtAPcA7gDPALYAIQBj/yT/6/6R/qb+AP9G/6H/3//F/7v/9f8sAIIAGgGqATYCwgLmArwCjQIUAnoBDQFlAIL/8P5t/tL9n/3S/R3+iP7j/hb/WP81/67+m/7h/hL/cP+x/4b/e/93/xv/6v41/53/EQCIAN8AQwGEAW8BaAF3AUwBOAFhAUgB2QBPAJv/3v5B/t399/1Y/rf+Nf9//1X/QP9L/0j/ov9IAM8ASgGvAdABwgGxAbUBowF9AXMBSgHWAFgA4v+Q/4//sP/t/zUAVABNAAsAfP/v/qv+v/75/jr/df9u/z//EP+8/qz+F/+V/xsAowDtABgBLAEyAV4BdwF0AagBywGNASYBowD0/0v/s/5K/kT+Xv5l/p7+vf5+/lf+YP54/tT+Yv/t/3UA2wD+AO0A1QDAAKUAiABvAHIAlwCOAFoAZACfAMYAzQC8AK0AiwAnAKL/PP8L/yH/af+N/47/v//n/4r/Hv9B/4z/sP/p/zEAZwCFAH0AkwDXAOwAFgF9AY0BWwE9AdIAHgCa/zT/9/7u/uD+7v4U/+L+rv7E/sP+9/6g/xMASQCxAPUA1wCQAFIANAAPAMX/kf+R/5//o/+m/9b/KQBlAIEAbQAvAAMAxf9c/y3/PP9d/6L/zv+8/8n/9v/4/+3/BAA8AHwAlwCQAJkArQC4ANQA7gDcAN4ACwH7ALoAmgB0ADEA5f+V/3r/kf+M/5H/xP/b/9r/6f/i/+T/KgB5AIkAkAC8AKwASwAOAOX/kP9d/2z/cf95/57/wf/d//T/+//e/57/bf9L/wf/0v7a/uH+5/4d/0v/bP+s/87/0f8bAGcAXwBxALYAuQCYAJ0AsgDZAAYBAgHxAO4AywCKAEUA+f/I/6b/gf+A/4D/Yv9j/2v/Qf81/27/sP8GAHgAwQDlAPsA1wB6AD0AKgAHAOr/+/8LAAoAFQAtAEIAYQCMAK0AlABQACMA3/9Y///+8/7P/rr+5v73/vf+KP9P/1L/a/+e/9H/9P/7/woAJQAyAFsAuQAAASIBVwFyATIB0wCCACkA6v/H/5//o//B/5//Yv85/xf/IP9j/7b/NADTADMBOgEWAc4AcgAkAPf/4P/f/+///f/x/9X/1/8KAEYAeQDBAO4A1QCvAG8A3/90/2X/OP8O/1D/j/+W/8D/5f/G/7H/qf+B/33/kv+B/37/qf/f/ykAcACQALUA2gDBAHEALQACAOH/xP+3/7z/vP+0/5T/Wf83/0r/bP+d//T/UQCLAIsAWgAeAOH/qv+g/7n/1P8LAEAARQA9AEUASwBhAJAAtwDIAMAAlgBEANj/hP9X/0H/Xf+h/9j/DwBRAGsAXABDACYA/v/N/53/j/+l/9D/GwBhAHoAiwCUAGUAJQARABIAHgA4AD0ANAAtAAEAuf+N/3L/Wv9r/5P/rP/Y/xMAFwDw/+n/2v+n/5L/oP+p/77/3f/v//7/HgBOAHoAlwC6ANwAzQCeAHQALADL/5X/dP9P/1j/hv+s/9//HwA/AEsAUQA0ABEA/P/g/9z/BAArAFUAiACUAHoAaABOAB0ABgARAB4ANABDAD8ANgAqAAAA0v/K/8//zP/R/9v/4//h/8n/nP91/17/Tf9N/1j/Z/9+/6P/x//h//3/MABvAKQAxwDVAMgAnQBnAB8AzP+R/4P/fv96/4X/ov+4/77/uv+4/7b/vP/J/8n/zf/z/x8ANwBZAHoAcwBsAHMATgApACoAJwAsAEYAUgBWAFwASwAyADcARABOAFUATABRAF0AOgD5/83/rf+G/2v/ZP9w/3v/gf+U/6n/w//o/xMARwB6ALEAygCkAHkAWwAWANT/vf+t/7j/6v/x/9//AQAAAND/uP+j/53/rP+1/6j/xP8BABoAKAA6ADkAOwBOAE4A+/9kAI4Al/8BAGMA3f/w//v/uP+7/8v/p//Y//z/CwAOAM8AjgFbAK7+Sv9IAR4A/P4GAIH/CP9G/yL/G//0/zEAhv8/AK0ANQBmAC8AxP/y/ygAmv+u/1UCCgOnAI8AqgCc/wQA8f9a/7X/3v8Z//j+zf9b/0r/6/9h/4X/7v/O/6v/y//c/7X/7/8QANn/UgC3ABcA4P/e/9n/tP+h/xIAAgD//7EAcQA2AJAAiAAbAAQAbQDq/wMAQQCx/0AAZgAoAC0AIAAhABD/S/+//zr/o////9n/OwC1AJ0AXQB6AP8AbgBHAI0AIABBAC8AAAD9/8b/zv8X/xr/m//z/k//iv8M/5v/BwCf/87/mwBNAML/3QDmACUA+wDuABoA/v9aAHwA7/8tAJIAAQAwAHEA9//I/2cAbQF/AMf/1wBYADIAwgDq/6T/OP+9/Uj9L/5L/tr9gf5n/oH++f5c/j3/DQAbADcAeABNAQcB/gAAAjoCogFYAX0BIQGrAA8BrAABAPr/2f9q/1z/hv/F/0//gP6E/4n/Nf+b/zIAcP8G/0YAbP/u/+f/Kf90AIcAcABNABgAGQAGAFEAgwBwAKYA9QAfAcsAGwCdAIMAnv9///j/bf9w/yoA3/8oACQAr/9+/93//v8jAG8AVgDe//P/hgA0ABMANwCrAEcAxv9xANj/BwC6AJ7/Uf+r/yT/UP///3P/gf9r/87/AgCr/gb/iP+S/17/gf+o/wUADgFCABcA1QBtADYA1QCwAE8AcQFBAVUAowADATwA5P8cAJ3/zf8CAO7/Dv+4/m3/e//H/n3/kP/D/v7/6P9f/9v/DgB6/yr/1P/N/2H/IAC7ALb/IwD8AOP/eQAhAS4AvgDpAD0AlAA8AHMABgFEAIIAEgBR/+n/fv+V//r/yf/o/6D/RgDw/7H/1ABhADIAxwBiAHsAVgFiAO//QQDI/yb/bv8mADH/E/+1/0b/yf6G/wsA7/6E/5MAsP+s/x8AMAAIACcAOgBr/87/DQFjAJz/UwBdAMT/AQA1ALD/IACNADYAIwDHAAQBVQCYAIcAEgBpANn/wf/t/9P/3f+j/xcAGQASAOv/pv/s/5f/xf92AO7/TwCGAN//KwBdADcAPf+h/5EAUf+2/gEAs/+X/o3//f/e/sf+xv88//L+gf96/13/aP8FAJT/bP/nALwANQASAfUArwBJAe0AHQDaAKUB+AC1AHQBYQEFAR8BfwB+AJ0AIwAcAJv/NgAtALj/3f8W/4z/o/8R/+n+of/t/2b/q//r/ygAiwBOAOP/GADq/6H/WP/E/4//zP7B/kr+J/42/ir+Jf5P/jf+rP4y/z7/hv/6/5gANwFUAboB7QL3AvkCaAP/AgsCOAJIAiQB4gCvAB0Aav8f/wL/Mv5G/jr+Wv26/WP+ef65/ij/Z/88////RAGZAJIAmgFtAUEBxQHEAZUBBALfAUcB0QDrAOAAiwDZ/1H/q/9R/6v+fP67/X/9if02/az9Rf0N/f796P2z/er+q/8e//n/CgGDAEkBjwIdAkUCBwOcAn4C0AJEAr8BxwFIAaQACQBs/zP/hP7y/eT9tv2J/dv9ZP5w/on+Wv9R/2X/igAHAXgB0gFMAlICiwLBArAC8gJiAn0C6gEHATQBlgDl//f/Nv+O/qn+pf1n/SX9D/wY/Eb8c/xI/B/8i/wM/V39PP5g/5n/cgC/AfQBDALMAiEDYAP/AxoEmQOgA5QDkgIxAhACJwF+AMr///60/oP+E/5O/VT9j/2A/bv9mv3T/YP+G//I/5EAaQGgAYcB7QFdAqQCsgKgAooCEgL1AfoBEwG5AH8AbP/p/jH+9/3E/WD8F/y6+637k/yI/Df8UPwM/cT9Zf5A/9r/9ABKAtMB7gGEA7sD6ANPBGkEUQQGBP0DLwNPArYB7QAgACP/mv4q/ur9dv2E/Gf8e/yH/Lr85Pxu/ST+4P7g/0QA5wAiAjcCKQKrAgQD/wJKA0MD9AL5AmkCeQEkAfwAEwCf/9v++/3G/dj8+fu5+5z7svsz/P37YPvy+9b8Y/0N/t3+av9bAIwB7AFUAkQDRwS4BPMEPQU4BUMFxQS4AwsDOgJvAbQAZv+l/nP+vf3S/Iz8Qfz3+4n8xPx1/B39Sv4N/+H/igD0AHoBKwJqAo4CPwOaA4wDUAOLAksCYwIHAm8BKABR/8v+EP5Q/Yn89/sb+0X6OPpR+977QPuY+0/8NfxD/Tr/2v+NAOIBGQJVAtoDNgV+BfEFQAbjBVMFEgWGBGwDwwLRAa8Adv+a/jP+dv3V/Dj8JvvY+m77rPse/BP9v/0q/hj/LQA6AW4CPAMnA2UD5gMvBJYERQSiAzUDzwIVAg8BUgCp/2H+Lf1p/FT7u/pz+qP5h/m1+nP7xPr6+uX7K/yF/RL/if+AAEMC3QKuAiwE+gVABoMGlQbZBboFngWeBFUDcAK4ATsA5f5Z/pj9F/2H/KP7SftV+9P7Mfxb/BD9SP5H/xUAFgHHAYYCSwOTA7YDXASPBBUE7QOQAyMDtQLVAc0ABQA+/2L+mf3M/Aj8QPuC+vf5JPlY+Xz7BPzh+ib7//t5/Cb+xv8bAAkBbwLpAgcDnwRoBrYGjAY1BqYFmgW9Ba4E8wK+Aa8ArP+V/sv9Lf02/OL7pfvZ+h77N/xY/Jr8ev32/dX+sgCnAcgBvgKIA6sDNATzBNwEsATOBEQESwPlAmsCYwFrAAT/rv3S/Ob7Evs4+nn5gPgi+Nj5SPuv+kj6KPvq+xr9NP+mADsBlAJ/A0MDgQSFBmcHdQfvBgwGvgUDBl0FpwMeAsgAYP8//oP9Df2F/O/7UvtE+oL6D/yd/MP8b/1Q/jn/pQAbAokCEQMZBNQDlwO5BDUFuwQcBI4DDQOJAvgB/gDD//X+FP64/If79vqg+gL6wvjY95z54ftn+4L6Afvm+5D9m/9sABwBsgKOA5gDhgQpBnQHrge0BpwFvQVOBmMFegO9ASQA7P4a/kv95vyo/Cv8bfuN+tT6ffwZ/ZH8Tv2d/o7/FQFUAmUCHANHBC8E2wOIBEQF1AT9A5MDOgPgAlwCPwEMAB3/cP7A/ar8yvtU++P6QfoP+UD4uPnN+1n7S/oQ+x/8HP3T/vv/rAAiAg8DaQN7BFMGcQcvB6QGQgY1Bj0GiwXuAxoCqQC4/67+xv1g/bn84PsQ+9P6T/tS/A79Ev2Q/Z/+9f+DAZICAQObA0wEggSkBEkFjAUMBXAEuAPaAh4ClgG4AIP/l/7Q/fL8H/xY+x77uPrL+QH5dPjE+Rf8Cfyp+r76Avwv/bX+RQD4ABICnwPxA0UEJwaQBzsHSgbVBcwFxwUTBVUDRwHU/zz/Kf60/Ir8pfy2++P6wPpJ+3X8l/0g/hj+OP+GAbUC4wKhA5gE6gT6BAgF4AQJBSkF+AN/AvMBmAHtAAAA1f7t/Z79NP0N/EP7e/tC+0H6OvkL+Zz6UfzB+2D6pfr8+0L9xv7q/50AKQJrA6EDjwR2BiwHYAb1BdwFvwXaBb8EKgI4AM//bv8S/tL8Ovyt+1b7X/ty+7/7evxe/SD+vv7S/5IB2gI8A54DZQQsBYAFYQUpBdUEewT5A9ECfgH1AO8AKwCK/pb9kf01/Zj8H/zH+3/7EPsn+hr50/n4+/f7O/rz+bv6wvuw/Wf/8/+vABgCPwNCBJcFdwaOBikGwgXmBSgGTQVQA44BkgDV/+3+f/0g/LP7lPuJ+7n7hfun+9P8Nv4u/+n/IQGWAmMDwwNhBCsFiAVNBdcENwQtBDkEzAIHAYkAlADy/9r+Hv7A/Zr9if33/HX8sPyk/HL7NPo6+jb7EvzE+4/69vmZ+hb8o/2+/p//fAB6AdgCIAQPBZEFqgV1BaIFcwb/BTAECQNXAm4BpwBT/7z9IP3W/P77pPtT/I/8TPzH/O79OP9eAPIA+ACzAUMDCAQDBHIE0QRsBNUDngNaA30C4AGWAckABQCd/xf/qv6U/hj+HP2l/Pn8rPye+7/6HPqB+sz7dPuh+UD5b/qz+wf9O/7+/hEAiAF/Ag4DSwSDBboFrwXpBcAFJwV2BM8D5gLMAa0AEf/N/ZH9Qv2f/GH8RPxZ/Pr8v/2F/r//igCIANAAxwHvAtEDDgTRA5sDlgNHA7YCzwLMAqABbAAPAAQA2/+K//L+Bf6E/bT91v2b/R39PfwG+3z6jftx/Cb7ePly+Wj6x/ta/UD+mv4//0kAcQGfApADKQSiBM8E4AQtBSwFhQTXAzEDLQIFAdb/2/6H/mb+Df7Q/bn9rP3i/Yb+Sv+n/5H/gv/q//QAHQK9ArMCiwLqAkID9wK0AqUCJQJ9ARsBvwCJAKkARwBR/wL/Pf/7/oL+Sv7Z/UT9y/y+++v6kPuV+5j5KvjM+CP6avud/CL9eP3f/pgAWgE4AtkDnQRKBI4EgwU8BmEGpgU1BBUDpwKnAQoASP84/7n+Av5v/V794P1p/n/+VP51/u/+fP8/AB8BwQFGApICwAIMA0sD+QKDAogCZQKPASUBhAGgARMBdgAdAN7/y/+D/5/+H/5Z/uf9lvwW+zP6WPoH+jb4/fag9w/5cfqw+5r8pf0e/40AngHVAi0E/QR7BTUGTAcQCLgHRQa5BJ8DMAOAAtUAG/9X/ur9q/25/W393/zy/If94P1p/k7/yv/E/3MAxQHcAmUDZgMYAzoDzgMNBF0DawIHAv8B8AHIAZgBOQG6ADkAuf9l/2n/Mf+s/uX92Pxs+835Bvkf+Q348vWB9Tr3RvnA+hr8L/0y/qD/IgFtAukDcAV0BuIGhwd3CFEIwQZCBXMEygO5AuoA0v7L/c/9hf3S/Gv8a/xL/HP8iv3//pL/KP8s/4sAUAJ/A9wDjANxAz4ECAWqBGIDYgJnAoACYgJiAjECgQG1AFUAcACHAD0Aff96/g3+CP4l/Sr75Piv9+L3lvdR9i32u/eM+RH7jfyz/Zn+8/+GAe4ClQQUBrAG5gY6B5oHgwdvBsYEiwPHAtoBVQDo/kf+8P04/Xr8LfxY/OD8Wf2L/fj9EP/C/xAA/QBLAiMDwAMvBDwEMgRbBC4EWAPYAswCXgL5AS8CUQL1AUwB6wD0APAAkQDt/yz/q/4f/hX9kPvf+Xr42/f29833/Pb79q34wfpl/In9Q/5M/wUByQLeA4wEhgV7BgoHQgcIBzoGBgXrA0MDcAI+Aer/Yf4c/af8gvww/BH8//vu+6X8Hf4G/0P/0//5ACQCHwOuA9UDNwTBBPAEfASRA9UCsQKqArcCugIpAlcBIAHHAUACpgFuAIr/NP9L/+H+pP03/Bj7BvrB+AX4ZviG+ID3RvcD+XT74fxz/Rb+WP/nAIcC1QPWBMMFgQbVBnAGyQVQBdgE7gMBAw8C7gB0/wz+Hv2Z/Bj8rPt1+4n7VPyk/db+mf9FAOcAswHKAvIDWgQ4BD0ETwQxBPIDdgPnApUCTAIPAi0CxQI7A/wC9QEeAScBVQGMAEH/Sf6W/fH8OPxO+zT6APkE+Nn3Jvgg+Cb47/gU+lj7Jf0O/wsAiwCwAV0DnwRABYsFlAWfBcgFrQX1BMsDwwLhAegArv83/gP9V/wO/OT79/tq/OP8hv2+/un/VACSAFcBJAKMAs0CDwNDA4oDugMtA28CjAI0A4cDbwNTA3EDkANhAwwD0AJAAhQB9v99/1v/EP89/sD8RPt2+lb6HfoT+ef3svcD+Bz4sfgj+or7mvz6/Wf/eACeAQADxQPlA20EewUXBrIFFAWZBOwDAQM/AjIBuf9e/mz91fyx/Or8D/1W/a39Q/4z/zAAawBHAI0AJAF/AZwBBAKHAu0CRgOBA2MDWAOGA90DFwQEBLgDZwMoA9ECZgIJApsBwACw/9/+cv4k/rX9w/xS+2P6Q/rT+ff4tPjy+L34nfiY+Wb7/vwO/hP/KwACAbABggIXA3ED7wNcBDQEBgQxBPkD6QK/AfsAVgBw/2v+n/1l/an9y/3G/Qz+kv7f/kz/x//I/1f/gP9iABwBZwHMAToCYgLHApcDBgTSA7IDswO6AwgEdAQvBGwDyQJpAgcCkAHFAJ//hf66/Sn9mvz8+yj7Yfrm+bj5pvm4+bf5nPn5+TT7r/zS/cT+mf8lAJAAHQGsAR0CbgK5AhcDfQOPAy8DkQIVAtABagGvAOD/Uf/i/ov+Yf5K/iL+Gv4u/iH+Sv7q/mn/Wv9m/+//uQCUAVoCqgK8Am4DfQT2BKgEZQRiBHcEeQRqBDkE3wNgA5gC9AFvAWoA0/6d/fb8cPz4+2r7qfpO+rL64vpu+kn6svrK+pj6IPuA/Nr9nv7b/s/+CP/f/7MA5wD+AK0BjgIXA0sDMwPSApgCewIjApsB+QD8/wD/kf6E/mD+9P1d/QX9R/3V/SD+Kv5k/u3+ov9YADUBIgL9ArcDeAQ/BdAF+QXeBdQF8AXhBWkFyAROBOED+QKPAf7/tP61/fP8TPzh+8L71fvP++H7QvyG/Cn8mfuV+9P73vvW+y/8u/xo/ez9D/7M/cn9WP4v//j/uwCrAXMC5QIgA28DgAMrA5cCJQKpAfwAMABo/6X+AP6N/f78aPw3/Ib83/xI/ff92f6j/2UAWAGNAtID3gSzBUoGwAY4B4sHRwebBgEGbgWuBNUD1AKOAV0AV/87/hf9ePxU/F38fPy+/Bn9ev3d/f390f2Q/X39W/0A/YD8IfwO/Dn8d/yt/N788Pwg/b79tv6g/4cAkAE5AmcCowIEA+ECcAIyAukBIgEbADr/aP6s/QL9hfwp/C38s/xp/ef9ZP5M/2YAPQH0Ae8C9gPPBJwFZgbMBu4G4gaZBvoFUwWyBOMD6wIYApcBCQFAAC3/R/7A/ar9tv3U/Rf+jv4U/2n/lf+b/4//M/+O/tH9Nf2O/M/7KfvO+sn6DPtl+5v7zftT/CH97v3P/rr/fgAXAa4B/wEAAgQC8gFvAbAAOADX/0r/xv5w/iL+1f3L/en9Gf57/kr/KgDQAH4BbgJLA8UDQgTlBGMFlQXUBfIFpQUOBY4EDwRYA50CKALcAXoBCAGeABkAi/9F/0r/Wv9x/9T/TQCUAKEArgBzAOD/GP9J/mn9ovwI/Fv7jvr/+f35VvrU+jb7ePuv+yn85/zC/YT+Nv/h/3kA7AA4AWoBdgF/AXQBVgEeAd0AegDj/0r/6v7L/rL+of68/h//tv9vAAkBTAFuAdgBoAJqAwAEagTDBN8EzgS3BJYELQSsA1cDCAOzAosCcAIFApcBWwEMAYYAQwBHAGAAjgC5AI8AGACh/yv/vv5I/r39Cv1L/HH7p/ol+gL6Dvoc+kH6YvqY+v76mvsb/Jr8ff2U/m3/EADMAFEBowH6AVACTAIMArABLAGPAPr/b//R/l7+RP6L/vP+T/+3/0cA3wBYAcwBXgL9Ap4DNASxBAEFOQVUBToF8ASiBFEEAAS/A44DawNHA+oCFQILATIAsv9o/0P/Q/90/6b/rv+V/4H/Rv/T/kr+sv0S/ZP8S/y9+wT7kPpu+hf6tPmj+eD5QPrM+nL7Dvzn/Ob9uv5U/xYA0QBOAakBAwIyAhICqAHzAD8Av/9h//3+sf6V/rr+Gv95/7r/DACRABgBtAGBAlkDBQSdBDcFtQUVBkMGHwagBRsFtwReBPADcAPPAhQCXQGkANb/E/+f/qD+AP+K/xsAogAUATkBGAHJAGAA2P9K/7T+9/0Z/RL86frD+Qv5qvhc+CP4LviA+AX53fnX+t373fz3/fn+2v+nAF0B6gEoAkYCNQIIApoBGgGWAC0Axv9b/xb/A/8u/2b/z/81AL8AZAEyAuUCgQMfBKgEIAV8Bc4FwAVyBQcFoQQcBJwDIgObAhwCvAFxAQ8BswBjADUATgDCAEsBsAHhAdgBoAFTAfYAYQCn/+H+If5W/W38Rvv++e/4PfjW96/32/cw+Jb4Jvno+bn6kPuQ/JX9gv5m/z8A7gBwAecBJAIVAvYB6AGtASwBrgBQAP//r/99/3X/pf/9/1oApQD+AIABFAKeAhMDigMGBGEEnwTJBOgE4ASuBFoEAgTEA40DYAM7Ax0D2gJ6Ag0CtQGMAY4BhgFhAUcBPQEWAcoAeQAdAJv/Bf9w/s79B/0g/CX7LPpv+dv4Vfjb97z32vch+Ib4Jvnp+cf6zPvY/On95P7W/4wAPgHiAXMCvwLdAtICngJJArsBDgFKAK3/MP/y/u7+LP+L/xcAxgB7ATsC6AJpA8QDQwTgBFQFhwWJBW8FLAXiBI0EJgS9A2UDDwOpAlEC7QF5AQMBywC4AL4AvgDIAOgAGAE+ATUB9wCDAOf/If9E/kX9KPzx+s/50/gS+IH3Mfcj91732veE+FH5PvpC+0n8Y/2H/p//fQAsAb8BRAKhAsoCtwJvAvYBUgGJALf/Fv+v/ov+pv4M/5r/SQAPAdkBiwIjA64DKQSbBP0EQwVrBYgFkAVfBfsEhgQKBIkDFwO/AnACGgLMAY4BXAFBAUcBawGHAaYB1AEEAhoC/wGrAR8BbwCn/7X+g/0k/ND6mvmT+L/3Jve79ov2s/Yv9+P3pfh2+V/6cfuT/LD9tP6f/2IAAwGVAR0CfAKbAn0CNwLWAVkBvwAcAJ3/Vf9M/3D/w/8lAJcAEgGuAU8C6QJdA7UDCQRpBMwEAAUCBdYEqwRiBA0EqQNbAykDCgPzAtYCwAKfAoACVgI6AjICQwJNAjkCHAIBAtEBYQGyANj/8P4B/gj98/vC+q35zfgY+Hr3GPcJ9zv3lPcD+JH4TflB+kT7Ofwl/Sz+Pv8uAOoAjAEYAnkCmwKAAjoC2wFlAckAGAB7/yT/+v7z/h//n/9UABwB0wFwAvICZgPNAxwEWgSLBL0E1wTmBNsEuARzBDQECQTwA9sDvAOQA1ADGQPiAqMCVAIRAuEBwwG3Ab0BuwGNASABfwC7/9n+2v3J/LL7uPre+R/5cfjm94/3bvd896/3Fvib+EX5BPru+u379/z6/fX+4P+wAGIB5gFCAmcCWAIOAqUBHwGGAOH/Tf/n/r3+0v4k/6r/SwDtAIABDAKWAh0DiAPeAywEhgTlBCgFRwVEBTQFEAXZBI8ETAQSBNcDkQM5A+oCngJiAhgC4gHOAe0BFAIkAhsC8wGqARoBTABK/0D+LP0n/DT7Y/qe+dz4JviX90f3MvdR95/3Gvi1+Hj5avqC+5n8mv2K/oL/dgA6AbIB9gEcAigCCAKyATQBqAAjALL/WP8t/0T/lv8SAKAAOgHNAV4C1wI2A4AD0QMqBIQEyQT0BA8FEwX+BL0EXwQABLcDigNtA08DKQMBA9wCuwKZAnwCbAJkAl4CTwI8AhQCvwEmAVQAYv9i/mP9Yfxy+5b61fkd+Xv47veH9073SPd599j3XPj1+Lf5oPqu+8D8xf2t/oT/QgDhAFcBqgHjAQEC+gHDAWkB/ACWAEAACgD0/xUAYgDXAFkB1wFCApkC4wIfA1oDlAPXAxcEWgSKBKQEoASOBHQEWQRABCUEDwT9A+0DxQOcA2oDNgP9AsICkAJyAmICQQL9AZYBFgFtAKP/wP7b/QL9Ovxp+5H6tfnq+DX4n/cy9/b2+PYx95z3Lfjs+Nr54frr+/L8/f0H/wEA2ACHARQCegKsAqACYgIBAocBAAGCACMA5f/c/wEARwCWAOoARgGbAewBQwKlAgkDeQPrA1MEpATfBAUFFAUTBQQF5AS5BJAEZwQxBOkDogNfAxoD1AKaAnYCZAJXAjoCBwK4AUwBugACAC7/XP6N/b783vv6+hn6RfmC+NX3R/ft9uH2Evdv9+/3pPiK+Y/6lvud/KT9of6H/0cA8QCDAfgBMAIwAgMCwgFlAfMAhAAuAAQA8/8FADcAmQAOAYMB4QE5ApEC8gJXA7cDFgRrBLME1QTdBNME0QTNBMMEogR8BFkEOAQMBMwDkANXAyoD8AK6Ao4CdwJeAiwC1gFgAdcAOQCF/7n+7f0p/Wr8n/vP+gH6R/mc+P/3fvcu9yH3WffG91r4E/nu+dv6yPuq/IX9Xv41//z/pwAsAYsByQHiAc8BlAFHAfwAuQB9AFQARwBnAKkA8wA+AYYB5AFJAqsC+wJHA5sD8wM3BFsEdgSUBLkEyQTNBL4EtwSjBH0EPQT2A7gDfgNEAwIDzwKuApwCegJCAvYBmgEkAZQA5f80/4f+2P0e/VH8gvuz+un5Hflh+MT3Xvcs9yz3XPfC91b4DfnQ+aD6hPty/Gj9W/5D/xsA1wBoAdABDAIiAgwC3AGcAVUBDgHPAKIAlQCsANUADAFSAaYBAQJXAqgC/AJNA5YD0gMFBDMEYQSIBKUEtgS/BLcEmgRrBDUE/QPEA4oDVQMpAwYD5AK9ApICWwIaAsMBVAHRAEIAqP8C/1D+lf3X/BX8VfuV+t75Nfmm+Df47ffQ9+P3I/iK+A35qPlX+hb73/ut/Hr9Qf79/qH/JwCNANgAAwENAf0A3wC+AJ8AiwCFAI4ArADZABIBWAGqAQICYQK9AhIDXQOeA9UDAQQlBEUEYwSABJgEoASZBIUEYQQxBPYDsgN1Az8DDwPkAroCkwJsAj4CAwK8AWkBCwGlADEAsf8r/5/+D/52/dj8NfyW+wP7evr/+ZP5QPkF+ef44vj6+DH5iPn7+YH6Fvu1+1f89vyM/Rr+n/4U/3z/0P8VAE4AegCfAMAA5QARAUUBfgG8Af0BQAKCAsYCBQM+A28DkgOtA78DzgPgA/QDCQQaBCQEIgQVBPoD1QOsA4QDXwNAAyMDCQPtAtACrwKKAmECLwL1Aa4BWgH6AI4AGgCh/yL/nv4U/oL98Pxd/M77S/vT+mr6F/rZ+bT5qPm4+eD5HPpm+rn6Dvth+7L7BfxZ/LH8Dv1x/dX9N/6R/uH+Lf9y/7n/AwBTAKgAAAFdAboBFgJvAsICDgNOA4MDrgPMA+QD+AMLBB8EMQRABEoESwRDBDIEGgT8A9wDuwOZA3YDVQMxAwwD5gK3An4COwLtAZcBPAHbAHgAEACk/zT/vv5B/sP9R/3P/F/8/fuv+3T7Tvs6+zT7PPtO+2n7ifuw+9r7Bvwz/F78g/yi/L/82/z4/Bj9O/1j/Y39u/3w/S3+dv7K/i7/m/8QAIoABQF6AeUBRQKTAtICAwMrA0wDawOLA6sDyAPjA/gDBgQPBBAEDQQGBPwD7wPeA8kDsQOSA3ADRQMUA9kClwJOAvsBpAFFAd4AdQAHAJf/J/+6/k7+5/2G/S393/ye/Gz8Sfw3/DX8PfxQ/Gn8g/yb/LD8vvzI/M780fzY/OL88fwE/Rj9Mf1K/Wf9if20/er9K/56/tP+M/+Z//7/XwC4AAkBUQGKAbwB4wEDAiECPAJXAnQCkwKyAtIC8wIWAzgDWgN5A5UDqwO5A7wDtQOjA4kDZQM8Aw4D2wKjAmYCIgLaAYoBMwHWAHcAFgC1/1T/+P6d/kj++P2v/XL9P/0c/QP99/zz/PL88vzx/O786Pzi/N/83/zn/PT8CP0c/TL9SP1i/X79n/3H/ff9Lf5p/qj+6f4q/2v/rf/r/yYAXwCTAMYA8gAdAUgBdAGiAdQBCwJFAn4CtwLtAh4DSQNvA40DpAOzA7sDvAO1A6YDjwNwA0kDGAPfAp4CVgIHArUBXwEFAa4AVQD//6v/W/8T/9H+mv5t/kj+K/4V/gT+9P3m/dn9zv3B/bb9rP2g/ZL9hf12/Wr9Xf1U/U79Tv1R/Vv9a/2A/Zv9u/3f/Qr+O/5w/qn+4/4c/1T/hv+2/+H/CwA0AGEAkQDHAAQBRQGMAdUBHgJkAqcC5QIcA0oDcAOKA5sDogOdA48DdwNVAyoD+QLBAoQCRAICAr8BfAE6AfkAugB7AD0AAwDK/5j/bP9I/yr/E////uv+1P68/pv+ef5S/in+A/7h/cH9qv2W/Yb9e/12/XH9cf12/YH9kP2m/cL94v0G/iz+VP58/qX+zf71/hv/QP9j/4X/qv/L//D/GABHAHoAtQD3ADoBggHJAQwCSQJ7AqECvALKAtACzwLMAsUCvQK1AqkCmgKFAmsCTAIpAgQC3AGxAYMBUQEeAegAsAB6AEUAFADp/8L/nf97/1v/N/8R/+j+vv6S/mj+Qv4g/gL+6f3Z/cv9wP25/bL9r/2s/av9r/22/cD90P3i/fr9Gf48/mP+i/62/t7+BP8m/0L/Xv94/5P/sv/Y/wUANwBwAKoA5QAgAVUBhAGuAdIB8AEEAhYCIgIsAjECNQI1AjMCMAIrAiMCFgIGAvIB2wG9AZsBdgFOASIB+ADRAKsAiQBqAE4ANQAaAPz/3f+7/5T/af89/xP/6v7D/p/+gv5n/k/+Ov4n/hX+Bf73/ev94f3c/dr93v3m/fb9C/4n/kT+Y/6B/p7+t/7Q/uj+Af8f/0T/bP+c/9D/BgA+AHgAsADkABEBPAFgAX8BmAGsAbsBxAHKAcsBygHHAcUBwQG6AbEBpwGXAYUBcAFYAUABKQEQAfkA4QDPALsAqACUAIEAawBUADsAHwACAOT/xf+k/4P/Yf9C/yH/Av/k/sr+sP6Y/n7+Zv5N/jT+HP4K/vz9+P35/QX+Fv4r/kP+Wv5v/oT+l/6p/rz+0/7u/hD/Nv9h/5H/xP/4/ysAXACJALEA1ADzAA4BJAE3AUoBWQFoAXUBgAGIAYsBiwGKAYIBeQFuAWEBVAFGATsBLwEkARoBDwEEAfcA6QDYAMQArwCXAHwAXgA8ABsA+f/V/7T/lv94/2D/SP8z/x7/Cf/y/tv+xP6v/pv+jP6C/nv+ev59/oL+iP6M/pD+kv6S/pL+lf6Z/qT+sf7F/t7+/P4e/0L/av+T/7v/4v8IAC0ATwBwAI4AqgDEANsA8gADARIBIQEsATMBOQE8AT0BPgE+ATsBOgE1ATMBLwEqASMBHAEUAQcB+QDpANgAxACtAJQAeQBeAEMAJwAOAPj/4f/O/7n/qP+X/4f/dv9l/1P/Qf8x/yL/E/8H//z+9P7s/uX+3v7W/s3+xf68/rX+sf6v/rL+uv7H/tn+7/4G/yD/PP9Z/3f/lf+0/9L/8P8PAC4ATQBpAIUAnwC1AMwA3gDsAPgAAQEIAQwBEAEQARABEQEQAQ8BEAERAQ8BDQEJAQMB/AD0AOkA3gDRAMIAsgCgAI0AeQBjAEwANQAfAAYA7v/W/73/qP+U/4P/c/9j/1f/TP9B/zf/Lf8j/xf/Cv/7/ur+3P7O/sP+vP66/r3+x/7U/uj+//4Y/zP/Tv9p/4L/m/+0/8r/4f/4/w4AIgA3AEoAXABtAH4AiwCXAKMAqwCzALoAwwDLANQA3wDsAPkABgERAR0BJQEoASgBJQEeARQBBwH4AOcA1gDGALUApQCTAH4AagBWAEEAKgATAPv/5f/L/7T/n/+L/3n/af9c/1H/R/86/zH/J/8b/xD/B/8A//z++/7+/gL/B/8Q/xn/JP8x/z3/Sv9Y/2L/a/92/3//if+W/6b/uP/M/+P/+v8PACYAOwBOAF8AbwB/AI8AoACzAMMA0ADbAOgA9AD7AAMBBgEEAQYBBQEAAfkA8ADoANgAxwDBALsAqwCbAIsAegBsAF4ASgAwAB0ADADz/93/x/+p/5L/hP93/2X/VP9L/0P/OP80/zD/JP8V/w7/Df8G/wb/Ef8X/yL/MP85/0P/Sv9O/13/bP9p/3D/hv+Q/6D/u//Q/+X/+f8XAD4AZQCaAPoAYAF8AaQBpgEqAaMBAQL1/07+UP+bABsATv+//z8Ajf/F/lj/egDuAO4APwHWARwC2gHwATQCIQMNBMkChwFeAbQAMgAk/xD+v/2K/a796Pwx/a7+3/5t/nv+K/+i/w3/Uv9sAJoAxAAVAekB+AF0AQMCXwFiALYBiAGk/93+FP6F/dD8Zfw+/Bf8YPwX/A38bPzm/HD+SP9A/+oAQAIMAkkCyQJ3A4YD+QJtA5gDoQIQAiQCPwHu/3f/k/8R/7P9vP3v/j//Rv4P/kj/pQBRADP/dwDNAbEBWAGIAbUB0AHXAbcAyQBVAdsAFAHDANr/NwA6ADP/Ff9M/xb/YP+X/2//CwBwAN3/u//J/7H/Zf9m/8f/vv+T/5j/9/+G/9H+KP9d/w//Ff+a//b/4v/W/8X/xf+r/2L/nf/x/7b/gv9vALkAyv8BAHIA8/+Q/7//DAAqAPj/z/9bAIQAwP+x////CgBCAEQALABeANQA3wC8AAcB8wCbAIEAoQDPAJkAfgDXAPwAZwAeAFsAWAAyAM3/Y/+U/wQAqP/1/uX+RP9V/y3/Wv+F/8z/MQAeAML/dP/S/xkAJ/91/u7+u/9B/5T+b/8CAKn/Yf9p/77/+//Z//v/IAAcADcAGwAaABYA//8BALf/kP+j/53/+v9oAI8AjgBgAHwA+ADrAK8A5AAHAR4B7QDGAO8AzAByAEoAwABDAeEAlwDuABQB+AByAFIAvADUAGoA7v8qAH4AJQCh/2z/bv+v/2//+/5M/4//jf/W/+3/gv9T/1//9v4x/j/+Y/7f/ar9k/3L/Qn+pv2G/fn9Gf4a/in+iv6K//7/CQDcAJwBewHUAcAC7gLLAksDTwOyAm8CDgKaAYoBMgGRAO0A/wATALj/IAAmAIz/of8FAMz/GQCcAJUAvQDJAPYA/wBKADsAtgBWALz/kf+b/1P/rP5C/tb9xP10/Y78U/wF/In7Zvsb+yH7QPtJ+1v8Uf3r/X3/WgH8AkAEtAURB0YHPAcMB/oFUAXqBIUDPgJ8ATEAuP6f/X381PuJ+9H6SPrd+sn7O/z2/Fj+3/86AV0CNQPvA4EE1wSjBCIE9QOLAxYDywJWAigC/QFXAWQAd/8f/5/+sf10/U395PwI/Xn9Uf2k/Hv8qfwG/BT7BPuF+zn85vxg/b3+9QA6Aq4CoAOtBEsFogVwBcwEfwQhBHYCuQBdALX/if7M/dz8Lfwb/KL7Fvs3+777efx9/dr+HQCbASUDoQOGA+sDDASTA0wDTAM0AwYDLgMvA/gCWALaAcABOwGBAGsAkgAfADr/eP4u/i/9wPsv+y77xfoy+qD5CPkG+TT5ivmy+kL8fv1c/6gBygLLAwIGcweQB+gHRggLCNYGGAWyA00CUAB4/o798Pwf/KP7gPtZ+0H7lvtN/KT8Z/0u/8oAqQFlAmUDCgTpA28DOQMIA9wCnAJRArEC0QI4ApgBWgEuAWkAvf9c//b+Bv/l/jr+Jv7Z/dP8Wvw0/NL7c/tp+1L7xfoJ+v/5ePvL/Av9cP6YAasD/wP+BK4GjwfXB/MHNQeUBhwGiwSBAucAj/9e/kb9zvum+sX6/voo+iL6j/u2/OD8lf2L/+8AiQEyAhYD/gM8BMYDsgOrAwkDmgKnAoQCoAEIAQABkAC2/3b/lv84/+v+Mf+k/6T/6/7z/az9S/0H/Nv6kPoP+iz5Qvmw+Sz6SPuo/BH+nf+eAdMDWQWyBjkIDQkPCegINgjuBp4FtQOEASIAhP5z/Lj7Sfsr+sz57PnT+Z/6Hvwf/YH+dwD5ATsDPgRvBLMEGAV+BHgD9AJ2AoUBBwHeAEMALwCKAB4AU/8//5n/Wv/g/vb+P/+g/5v/fv61/ZD9/vzq+8z6Kvrv+W/5Pfmt+Yj62Pt//b7/vAFkA6oF+Qf0CGYJGQr+CeYIdweeBZoDzgHF/8j9IfyN+oT5SfnJ+Ez4Eflw+oL7Wv3s/50BogIuBG4FjgVcBf0EtQRDBCsDwAHBACAAlf9h/8P+QP7B/lb/I/8A/0T/jv+z/97/zP/A/y8A+v84/43+1P3X/LP7R/rZ+L73QPjC+Tr69Pq7/c8AQgJ+A30FnwfoCD8JIQkmCYUI6AYeBQIDrQD8/pP9Y/u0+Wf5ivlY+RT5ufmS+1399/5KAXcDewRMBVIGLgYLBTAEugPrAkUBEgAVAMf/xP5//iz/ff8B/5/+EP9W/xH/Sf8GAFwAgwDkAAcBhQDo/6n/4/4y/Yz7jvpy+cn3KveA+JD53/ns+0D/jAFyA/0FOgjOCZAKWQq/CcIIvAaYBOwCcwDh/aP8bvuS+az4oPh4+MX4gvnS+lD95f9QAfoCGQUeBkgGOQZoBWUEBgQVA1gBWgA+ALL/7f5x/ln+6f76/lT+jP6F/8T/6P+JAPYAGQFCAVIB8QCXAA8A6f5Y/Zz73Plf+N322/W49kf4PPk6+/f+WQJ9BKgGVwkkC04L5gp6CisJ+AbCBLQCYQAG/jf8A/uu+Sr4oPck+HX4ffiv+S/8nf66AEgDXAVGBhAHpgfXBjgFOQR/Ax8CgQCq/4X/D/8Z/vb9vP7p/n3+sv5b/33/kP9NAB0BTAGLAUICqQIQAukAGQAE/7/8KPpm+LH25vR89L31Efes+DD8YQBiA+MFtwgBCxIM/wtxC7IKKwm+BnsEjAL5/1z9u/v6+cn3m/aQ9sv29vbY9xj62fwQ/6ABjgRGBvIGnwe1B8oGogWeBL8D2gKtAakAQAB7/4z+ZP5G/s/9i/2e/QH+e/76/pr/cwBLAe0BngITA5cC0wHrABX/z/xg+rn3j/VI9BT0WPUU9/b4V/ynAL0D7wW3CAML+gt4DFIMOgvnCSsIxQVKA64A1/2G+5z5KPdt9Xb1xPX+9Uz3dvn7+5/+6QA0A2sFnwYoB7AHeweaBu0FYgVyBFUDdAKOAVkAHf8X/j79s/w4/PT7SPwJ/dr9x/7l/wwBRAJIA4EDMgPWAgwCnwB2/qT7vfh/9tL0i/PH8/L1YPgQ+8/+UwJGBQAI1glGC58MjgyyC+wKGgmMBmUEtQGQ/kj8LfrF9yj2PvXA9G71l/a+9w76Ff1b/7ABoASKBkgHEQisCIkI6wf9BlIGAgbBBKsCHAHF/9j9Gfzo+vn5x/l3+j/7Avxd/VD/fAEeA94DiARYBSQFsAP8Af//Xf2q+kn4APbw8/DyF/R59hP47vkG/nQC3gSwBjwJaAuiDNkMDgwmC/MJsAc+BboCc/+0/DP7S/nD9mH1QvVx9cX1bvbt95P6aP0yAEYDlQX/BsMIFArGCSAJ/QiaCJgHwgVAA0IBdP+8/C/62vgp+Ff4lvm8+tj71f0aAAgCrgOgBCUF1AWlBfsDBgI9ACr+FfzC+QP3KfXB9O70xvVa91z5b/wfAMMC2ASTB/4JgQuKDJoMlAtDCloI3AVlA7cACv46/IX6I/gW9in17PTD9Pj0Vfbj+Kz7d/6sAdsEMQfRCCAK3wrDChoKOQnoB+8FsQN/ARj/rfz2+iH61/kM+sv6/vtR/X/+2v9yAZUCNAOoA8QDaAOgAmUB7v9h/qP82foG+VH3Vfb89fr1Dvcu+U/7kf2IAKMDTwZNCJIJlwphCxcL1QllCLAGugQHAyUBqP6A/Ab7ffns98z2GPYj9hv3UPjQ+VL8N//NAWEEhQaFBxUIpwiCCGUH+wWuBGQD8AFnABn/Iv5o/Sn9gP3s/S3+tv6i/zYAWAC8AGYBoQF4AUkBIgHDAOD/u/7W/dj8a/s2+nb51/iA+MD4p/k7+x/9Cv81AW4DLwWLBnoHsAdwB8EGfAUrBC4D6QFxAFf/ZP53/bX80Pvv+pj6gPp/+gv7CPw4/ev+1ABHAkoDEARVBOsDPAOnAv8BRwHuAPwAHAEzAVEBjAHkAQwCHwJaAnYCTQI1AjIC7gFoAdIAUwDU/xn/N/50/az8yvsp+8z6WPoe+qf6aPvZ+5P8+P16/9QAVALCA7AEIgU8BfkEGwSPAvsA6//S/on9wvxt/Cr8SPza/Iz9Sv75/pz/YgAAAUgBsQFGApACowKxApICCwIwAT8AVf9t/sH9hP19/ZH9Hv48/2EAVQFHAkwDOwTTBNgEowSDBD0EwAMpAzkCIAFxAML/lf5L/Tr8WPvU+nv6JPpt+p77Iv24/mIA2QE7A5sEaQVlBQIFbQSaA3ECtwCh/sD8RPst+lz5ivgk+LP4sfmj+uL7Zv3b/l8AwgGFAgIDqwMZBAUEnwMCA3MC/wEXAe3/VP8X/8D+oP6q/sj+bf9hABwB6AHdApcDNASXBGgEBQTKA2sD3QIwAlIBhwDd//P+3/34/CH8Xfv3+uT6B/uq+/P8oP5EAIsBqQLeA9IEHgX1BKQEKARKA+sBRwCr/jb9A/wl+2f62/nm+ZD6Xvso/CP9Zv6b/2YA3wBWAbcBwgGjAYMBIgF1AOz/ov8t/4f+Lf5L/n7+ff6h/kL/DgCxAGoBXAIbA20DqgPyAwAEtgNHA+sChQLlASwBjwDr/0f/0v5Q/qj9R/1j/bP9Bv6B/lz/bQA8AckBdQIlA3oDmgOoA0oDWQJaAYQAbv/+/c78OvzX+1T7E/tv+w38pPxS/Q3+mf4K/5T/FwA+ACIAEQD8/5//E/+Y/iX+zv3L/fj9G/5h/vX+rf9pAA8BnAE8AvkCegOOA2kDRwMuA/ECaALTAW0BCwGOAB0Au/9X/w//5P65/pb+l/7c/mr/AgB7ABUBzQFOApgC0wLsAuACtQJcAugBZAGyAAUAgP/V/hD+oP18/Vj9Rv2F/Qn+l/4Y/6H/GgBNAFEATQAVAIz/5/5Y/sb9LP2s/Gb8Vfx6/OH8ev0S/pX+Kv/p/5QA/wBjAeQBTwJ1AmICKALkAZABGAGoAFUA8P+d/47/ff9U/2f/p//P/wAAXADJADIBigHsAWcCvwLgAg8DRgMwA/ACyQJ1ArgB3QAtAIv/xf7//Yr9cf1u/Xz9z/1j/gn/vv95AA8BeAG4AcQBjwEMAUkAg//K/vj9K/2k/Fn8O/xa/K78NP3i/Y3+L//U/1kAsQAPAWABbwFWAS0B5wCaAE8A7P+G/z7/B//Y/qr+gf6B/sP+Fv9i/8z/VQDTAE4ByQEsAoAC0wIdA04DWAM0A/QClAL8ATIBZgCc/+f+b/44/iD+N/6a/jL/0v91ACgB0QFXAroC9wLvAo8C7gFBAZEAu//D/uP9Pf3G/Gz8PPxG/IX8+/yl/VD+2v5b/+H/TgCOAKUApQCgAHgALQDx/7X/V/8H/+n+wP6J/m3+cf6V/sz+Bv9m/+v/VQC3AEABtAH9AVcCogK1AqsCjAI7AtABPgF8AK7/8v5V/gH+6v3n/T7+Af/A/1cACgHRAXgC+gJMA2sDbQM5A7QCFQJWAWwAqf8b/3P+2/2Z/Yn9jv24/QL+b/7v/mX/2v9AAGgAcACRAJoAVADr/5b/UP/x/n7+MP4O/vf97P33/Q/+O/6V/gj/dP/T/zQAmQDpABkBQgF1AaIBtAGeAWgBFAGiACgAsv8t/7L+af5X/mr+mP7r/nr/MwDtAJIBKAKzAiADZANvAzkD1gJVAqoB5AAlAIf/C/+y/oD+c/6B/qn+6/5F/7z/PwC5ABIBQwFWAU8BIAHJAGUABACT/xr/uP5l/hD+1P3K/d798P0O/lf+wf4y/6D/EABxAKoAxADSAMEAhgBHAB0A6f+Z/0b/DP/W/pf+Zv5O/kf+VP6E/uT+aP///5wAQQHWAUMChAKlAqECdgIlArgBNwGmAB4AsP9d/yL/CP8T/zr/bv+x/wwAfwD6AG4B1gEeAjICFgLSAWwB6QBfAOP/ev8e/83+lf59/nj+gv6Z/r/+9v44/3//xf8QAFoAowDVAN4AwACKAEAA3P9h/+/+n/5p/jz+JP4o/kT+cf6r/ur+K/96/+D/SwClAPAAPAF/AZ8BlgFwATQB4gCBABkAq/9C//b+0P7C/sb+8f5G/7H/IQCaABoBlgEBAlECegJzAkcCBwKmARwBhAAGAJ//Q//0/r3+qv63/tj+DP9F/4b/3v83AH8ArQDLAO0A/QDxAMkAggAuAMv/Zv///pP+SP4y/kj+af5+/rP+E/9t/6X/xf///0QAbAB/AI4ApQCsAJUAdQBEAAEAu/94/zf/7P6o/ob+fv6D/pr+3v5S/9j/WgDcAF8B0wEkAkgCSAIrAvkBrQFMAeIAfQAgAM7/j/9k/0r/Qf9Z/4j/wf/3/zYAggDJAPYADgEdASMBGAH0ALgAYgD//5//SP/6/r7+qf68/ur+H/9R/4b/wf/3/xYAEgD+//X/9P/m/8b/rv+u/7j/uP+u/6P/mv+F/13/J//x/sb+sP68/ur+NP+N/+b/PwCSANgACwErATUBKgEPAeQAqgBhABYA1P+m/4v/gP+E/5b/r//K/+n/EwBLAIcAvADpAAsBHgEhARMB/gDlAMAAiwBIAAcA1v+4/6z/q/+4/9P/9/8aADUARgBOAEcANQAZAPv/4P/G/7X/sP+6/8n/1f/V/9H/zP+1/4b/QP/4/sv+x/7m/hj/VP+T/9H/CgA2AFcAcQCFAI8AggBYABEAt/9d/xj/8/7m/uf+8v4J/y3/Wf+J/8D/AABIAI4AyADwAAsBEwEKAQMBBQEGAfsA3QC3AJwAkwCcAKYAqQCrAMAA5AAGARQBEwEGAfUA3gC1AHEAIwDo/8//yv/F/7P/nf+Q/4z/kP+I/3D/Uv85/zP/QP9Y/3H/h/+W/6j/wv/S/7z/jf9d/yr/4P5w/tn9Rf3U/Hv8QPwf/Bb8XPwg/RH+yf5P/9L/cQBBATUCEwO8AyoEWARxBH4ESAThA6oDnwNtAwoDdwKkAbYA7f9Y//X+uv6c/qH+vf6s/l3+Ef7y/RD+eP4V/6P/9v85AKAAGgGiASQCgAKqAsUCywKkAkgCzwFyAUcBBwF9ANz/Lf9W/pb9JP2t/PX7Jftr+tX5VvkK+VD5O/oy+/L7zPzR/dL+6/8oAW8CtAPNBIcF7wUYBu8FnwVWBfgEbAS+A+AC0QGvAIv/d/6c/fr8hfxS/FT8bvyc/M/89vxB/eL92f7l/9AAhAEoAuYCuwN6BP0ETAWBBaEFlwVVBdoEKARKA3sC3gElAQ8A3P7w/VL91fxR/LL7I/vJ+o/6Zfo/+vj5oPmM+dD5Ovq3+l77Ufx1/Wb+D//Q/70AoQGcArIDfgTsBCQFIQURBREF4gSPBGAE/gMsAz8CPQEEAPn+Rf6d/Rn90vyT/H38ovyy/ND8Wf0d/u3+3v/QAKYBhwJTA/cDrgRmBc0F6gXTBXoF8ARJBKEDAgMiAuMAqv/I/iX+mP0R/ZP8Dfx/+xj7/voF+wP7Nfuj+/P75/u4+8L7EfyA/Dv9Vf4n/0f/Xf/r/5UACgF8ASMCCQPEA+IDywPfA7cDXANpA5IDMANVAk8BVQB+/4f+cv3N/ID8Hfzl+wn8PPyI/BL90P3V/u//+AAvAnIDVAQUBekFhgbbBgEH1wZoBs8F/AQeBEIDEwKtAIr/uf7v/Rr9QPx9+wz77/r5+iD7WPuI+9r7dfz3/P381/zk/P38Kv3H/ZH+4/7Z/vP+Pf+Z/wgAkgBoAY4CdgPeAw0E+AOUA10DbQNRA+UCLwIUAdX/r/6B/Y38Jfz8++77LPxi/Fz8lvxT/Vr+iP++AOAB+AIJBPYEuAVCBooGrgbABpQGBQYTBRYEagO7Aq8BiQCN/7P+6v0l/Wz84fuB+1r7kfvY+9L74/tW/OD8V/2o/ZX9Wv03/Qz9Jv3i/XL+O/4Q/mv+3P5k/0gAMAH4AasCAwMeA2ADewM/A0EDWwPUAuwBDQHZ/4v+sP0K/Wf8BPyz+3/7vfsW/Gr8Vv2u/qr/kgDLAd4CtwO5BL0FlwZGB5EHZgcNB3sGowX0BHgExQPKAowBEgDA/tf9Cf1E/Mb7dfsw+zL7ZfuG+9H7a/wF/aj9Zf6Y/i/+8f3d/ZD9m/1Y/tj+r/57/n7+mP4D/7r/fgBZARQCOgIzAmoCXgIKAjUCngJvAsEB1QCz/73+IP6m/VD9Ff26/Hz8mvzL/AL9nv2s/ur/HwEMAq4CTAP4A6AEggWOBiAH6QZMBo8FugThAyQDqQJNAm8B5/9u/mf9gPzo+/X7Mfwv/DH8b/zC/BX9f/0u/gv/rP++/2b/8/58/vj9wv1K/t7+Y/6W/Zb91P3N/Sv+Av/Q/4oAFQFrAc8B6AGaAeYB0ALxAioCfwHoABwAZf/J/h7+jP0T/dP8IP17/XL9q/2N/n//LwD4AN8BpgJYAykEIgUMBnUGVQYiBuQFMgVMBMcDUQONAqcBZwDq/vH9af3v/NP8yvxR/C/8ufwb/Vb96P16/gH/uP8IAJv/C/+S/gj+qv2Z/Zn9cf0Q/aT8o/wB/U39ov1n/mv/MAC8AEABnAHIASQCzQJTA04DzQITAmYBxgADADf/lf7p/Tb98/wZ/Rb9Df2K/Wj+QP8GAMwArAGhAncDSgRaBTQGTgYKBt4FgAXEBAAEVwOPApQBowDR/9n+r/3G/IL8ofyr/KH84Pxk/dz9W/4V/7f/9v8fAFsAWgD3/zf/RP59/Sz9Pv0y/Zb8yPut+y38k/z2/L79qf5w/zYA8wCAAeQBYwIgA7kDmgP1AmAC5QEwAWAAwf8c/0H+o/2e/bn9kP2Q/Rn+yv5z/zsA/gCjAU8CEwMlBFQFuAVtBVoFSwXABCkEpQP9AnUC5gHzAOD/xf6p/UL9jv2W/VD9Zf23/RH+nP48/6v/3v/c/8//3P+a/9b+E/5n/XL8v/vT++j7fvs6+0T7R/u2+9L8D/43/1UAGgGpAV8CBgOgA1UEhAToA0gD2gIRAv4AIgCC/+3+R/6r/U/9G/3s/Bz90/2H/vT+d/9XAEQBAgLeAvwD5ARkBbYF0wWEBfIEdQQ5BAwEeAOQArwBuQBw/4T+NP4U/tX9rf2//d/99v0p/pb+Df89/zr/VP+c/5D/1P7e/Qb9QvzQ+/H7J/zP+0P7Lft1++j7iPxn/Zb+4f/iAIcBIwKdAuMCfANCBF0E2QMZAzUCawHUADIAgP/k/jv+uv2u/Z79WP1Z/c79Yf4P/wMA+QDoAfMC4AOWBCMFgwXLBQYG5gUdBSQEgQPfAiUCjQGqAHT/iv4E/n79Ff0D/Sn9lv02/pD+uP78/ir/jf9VAL8ANQBF/4L+zP0S/an8qfyX/Bv8b/sa+xL7P/vp+yf9m/6p/ykAoQB2AVUCAQO3A0cEMwS+A0cDowLIAQgBegDp/xz/Gv5m/UX9QP0q/U79if3A/YX+3/8tARACoAJMA2gEaAXGBd4F1wVhBbwETATZAzoDfgKuAeoAFwAn/3D+Gv74/fb9J/5e/mz+f/7t/o3/6P/6/xsAQQALAFb/dv6x/dX8y/sh+xT73Pok+qf5vvkF+pr6ufsc/XL+lP90AF0BagI8A/MDzwQyBdoEcAQCBEMDdwK/AeYA/f8c/03+1P2o/W/9Xf2f/fP9jf6O/2sA9ACeAXECOwP1A4UEuwSlBF8E/gPVA6wDCwNYAgECfwGLAKb/Kf/3/gD/Ov9d/zT/Dv9G/9b/XgCCAFMALwAwAPr/R/9X/pD9vvy++/H6jPo3+p35AvkL+a35aPpH+4385P3m/uH/HAE+AgMDpwNPBMIEuARyBEkE9wM3A0sCZwFxAGf/if4O/sn9Zf0H/RT9S/2B/TP+Xv9XABcB7gG5AncDQQTlBD0FRgXyBK8EnwQoBFIDygJaAoEBsABCAOX/hf9c/07/R/83/w7/Ff94/6T/Uf8f/zX/Ef+t/if+SP07/FD7mvpo+oH69Pks+VT59flo+lb7uPzK/eL+RABTAR0C+QKiA0UE/AQABXMEQgQTBF0DoALhAbAAbv+J/vH9pP1g/fL80fwZ/WX9BP4z/0IA+ADJAbECdAMcBKcE/wQhBfoErASFBEEEqAMxA9AC8wH5AI0ATQDQ/3D/XP9V/0b/QP8+/1//lf+o/7z/1/+f/zL/z/4m/jr9Wfxy+576Svo5+s35Qvkg+WL5GPpP+5X8v/3X/qP/WQBsAaECgAMdBIsEmAR1BFUE+QNXA5oCtAG7ANv/Cv9l/vj9of1i/Vz9gP3u/eH+CADdAGoB3gFvAkQDHwSxBNcEnAQ+BAoE8QOmAyoDwAJWArQBEgGoAEUA3//R/wkADwDI/4b/hv+9//7/JQAtAPf/hf8f/6v+0/22/Kz75vqB+lf69PlI+cz49vjH+f36Hfz0/MX9tv62/9IA9wHoApUD/wM2BGIEfQRMBNoDPANcAkkBUACc/xL/f/7V/VH9Lv1o/dr9f/5K/yAA0QBaAeABkAJkAy8EsgS1BEgE5gPqAw8E7QN9A98CKQKXAUsBEwHUAJ4AZQAkAAcADQAUACUARwBEAA8AxP9q/yb/3f4J/rj8uvsr+436HfoT+qb5qPhD+Ar5Xfpl+9v7RvxM/aT+yf/yAA8ClwLtArsDfASNBDsE6AOeA0ADiAKTAdkAPQCR/xX/pf4F/sf9Lf61/jX/r//5/0kA5wDHAcYCgwOOA1MDaAObA7cD6wPzA3cD2AKRAnwCUwINArgBZwEkAfgA5wDdAMIAjwBkAFEAQAAXANH/Z//U/jT+cf1z/J37I/uK+tD5hfle+db4iPgm+Rv6yPpn+z78Sf1h/mD/aQB8AUIC0AKRA0EEawQ9BPkDgwPlAkoCrQEDAT4AbP+5/j/+7f3j/UH+vv4V/2n/8v+iAEEBygF0AjIDsAPkA/0D+wP6AykERgQEBJADGAOiAlwCSQIeApgB7ACAAH8ApQCMADcA+P/x/wAA+P+f//7+bf7//Vr9jPzo+zn7XPrW+b35Z/nb+N74iflM+uj6d/s4/ED9UP5F/1UAXAH4AXQCNwPbA/IDrQNLA+cCkgIwApkB3AABADf/wf6Y/on+pv7i/g7/U//b/40ALQGkAQ4CiwL/AmEDwQPcA6sDowO/A6sDeQM4A9ACgwKAAmMC/AF8ASEBLAFvAVgB0wBrAFoAbAB+AFUAtf/X/jr+5f1k/ZH8vfvs+g/6p/m++W35rPil+IT5bfoZ+6L7Jvz4/Bz+N/9HADABnQEFAu4CmAN9AzED+AKYAkgCHAKSAaEAuv8R/8X+2f7j/sj+6/5E/6j/NADEADEBvAFfAsYCCQNhA5sDpgPGA+8D2gOAAyoDJQNIA/oCZAIrAhoCywGSAYEBHQGNAGgAjAB6ACAAtv9x/zP/wv5J/tD9BP0s/Lb7Rft6+sn5aPkI+fP4g/kj+nn61Ppm+2X8tP2q/lD/KgABAa4BiQJAA0UDDQMOAwEDzAJnApMBwAA5AJn/Fv/m/p7+X/6a/u/+IP+T/0EA1gBtARICoAIhA4EDswP6A0gETAQjBPIDogNcA3ADgAMSA3YCLQIeAvkBtgFQAdcAhgCFAJQAgABGAPr/rP9g//r+dP7f/S79Y/yP+8L6NfoG+rz5Ivn5+Jv5VfrR+lj7+vvA/L79rv6O/4kAQAGcASUCswK/ApUCkwJsAvkBcQHFABIAmv9A/+f+q/6V/qT+4v4x/5j/PADmAFkB2QGKAicDjQPOAwQEQgReBD4EGwTwA5ADTQNBA/0CkQJNAvcBlAF5AWsBDAGmAGgATgCBALkAfgATAM7/eP8P/5j+5f0Z/WD8jvvX+oH6FPp6+W/59PlS+pr6JPvc+7z8tP1v/hv/CQDiAGwB5gE1AjsCUAJ0AkECrgEdAbUAXADp/2f//f6s/oD+w/5Q/4//h//Y/5UAaAE+Ar4CuALJAmcDIQRuBCgEhQM2A3wDpgNdAwADnAJGAmsCsAJTAp4BRQFAAUcBMwH0AMYA0gDiANcAswAyAGb/wf4z/oH9wvzm++j6Pvrh+YT5jvkL+k36gvpC+yT8x/yO/Vv+7P6n/6EAaAHdAQ0CFQI6AkgCAAKpAVkBywAcAJn/P/8J/+D+nP56/sX+OP+k/yIAnAAqAewBdwKuAh4DwwMSBBwEAQS7A5MDjQNHA+wCygKZAlYCTAIwAs8BfQFbAVIBSAEGAaUAoADmAA4B8gCUABAAk/8K/2b+1P0b/QH8MfsQ+9L6NfoL+oX69/o7+777e/wY/Z39Xf5a/x0AewDmAJQBCQIAAucB6wGtAS0B1ACPAO7/LP/h/u3+sf5J/lX+yP4e/1n/3v+fADABmAE+AgcDcwOcA+EDHAQKBOED0gOwA2QDAgO8AqkCfQIKArcBkQFTAQwB5gC1AH4AdwCjANgA1gCqAJQAhQD//w3/Qv7Z/VD9afyh+yn7l/of+nf6I/st+wn7n/ul/Hj9+P16/lP/UADmAEMBsgHYAakBugHoAY4B4gB9AEcA0P81/9H+ov5k/iz+Tv63/vr+Pf/n/7cAKgF7ASkC+gJvA6UD6AP8A8kDrQO/A6wDQgOqAk0CWAJTAucBdwE2AeUAnACdALQAnQCDAJcAywADAQABogAlAKD/+P5I/qf93/wa/JX7B/tr+l763/pM+2/7j/sH/Ab9//11/uz+sf9gAOEAWgF8AUwBOQFSAVgBFwFnAML/n/+G/yL/yv6P/kr+Sv6j/gv/Yv+2/zEA9ACTAdgBZgJTA9MDuAOrA8QDzwPDA3wD7AJbAhUCGgIpAs4BEwGdALUA0ACYAEwALAA6AH0A5wApAQkBnwBOABQAlv/N/g3+Uf2R/B/80vtG+8z62Pot+5T7CPxh/L/8c/1B/vT+vv9aAKAA8ABcAXQBTwE3AREBtABCAN7/m/9k/wT/mf5u/mT+Xf59/rj+AP9d/8z/WAAaAdoBVwLXApcDFwQMBNEDtgO3A5MDPwPiAnAC3wGFAY0BegH0AGIAHwABAO3/GgBlAGcAOQBqAN4A7QCFAAsAhf/j/kL+qP0c/aT8GvyG+0/7hPu6+9b7EPxt/PP8nv1L/gD/zf9hALAAHAGIAYsBWQExAfMApQBBAMT/fv9c/+r+df5w/oz+hv6K/q3+AP98//T/kwBhAdkBCQKhApkDPwRCBOQDrAPJA8YDaAP1AnQC0gFrAXEBZgHfACkA1P/J/7T/tP/w/y0ARQBnAKgAywCEAAUAtP9e/53+uP0s/fH8qvwn/Jb7XPuD+9f7PPyU/NL8Ov36/d7+uP9qANcAMwGbAdQBxgGXAUAB1AB/ACQAr/9g/yD/w/6Z/qv+nf6P/rr+B/98/wIAWwDEAIYBUQLYAjcDigPFA+0D/APaA3cD7QKCAlICAQJQAboAngCDABcAsP96/0j/Pf+X/w0ASgBdAHoAtADjALUAKACQ//v+SP66/W79Af1b/Nz7rvuy++T7Efwj/Hj8Rf0Z/rL+Pv/h/5kAXAHvAQsCywGlAb8BvAFQAaEAGADV/5L/K//c/sD+rP6o/tj+Ef82/3r/AgCjADMBuAFKAuICaQPZAyEEHwTwA8MDdAPcAjsC1AF/ASEBxQBWAMb/S/8f/yL/G/8d/1L/uP8aAF0AkgCxAK8AeQADAHT/2f4t/nr97fyP/ET8Efzw+837wfvg+0H84PyQ/T7+9/65/1wA5gBpAc4B9QH4AfUB1wFqAcEAOQDu/6P/Nv/g/sr+3v7y/u3+7f4f/4j/EwCvADwBqwEfAsICaAPMA+oD2QO7A6UDhgMjA4AC3wF/AUYB/QCFAP//nf9v/07/Of9J/3n/y/9BALgA8wAGAQ4BCgHRAF4Auv8D/1r+zf1J/az8Dvyl+477l/ui+6X7wvsJ/JD8VP05/vP+f/8PAKwAKAFpAZEBowGOAUkB/gDOAKsAWwDo/5P/d/96/4D/jf+h/8X/BAByAPUAWAGRAd8BXQLlAlEDmAOvA4QDLwPRAoUCPALnAYIBGgGvAFkAJgD8/6z/Uv86/3//9/9kAKYA1gAKATcBQAEQAasALwC0/yz/lv7x/UL9lvwG/J37TvsO++76CftS+6b7EPy4/Iz9WP4N/7v/VgDJAC4BoAHyAe4BtQGbAaIBeQEAAYEANAD6/8L/qv+8/83/zf/t/0EAogD7AGQB3wFLAqYCDANjA4MDbANAAw4DywJ8Ai8C3QF9ASMB4ACmAGUAJQDx/9z/8f88AKEA9QAkATkBMQELAdUAlQA8ALL/Av9I/pb94Pwt/JX7JfvK+nz6Vfpi+qf6E/ui+0j8D/30/d/+p/83AKMAGgGfAfIB9wHPAbcBogFoAfwAjgA7APn/uP+M/4j/nf/U/yEAcwCvAPMAXwHpAWgC0QI0A4wDwgPEA50DWAMEA6wCYgIXAsEBbwE8ARsB4QCEAC8AIABWAKAAzQDcAOwADgEtAR8BzABIAMr/df8l/5r+vf3D/PL7Yfv0+ov6Lfr4+Qn6XfrZ+mf7//up/G/9T/4y//b/iQD/AGUBtQHjAe4B6gHLAYUBJQHPAIQAKgDH/4H/cP99/5n/3P9IALwAFwFvAdsBTwKoAuUCIgNaA34DfQNZAxYDwAJsAicC7wG2AXsBSwEyARkB9wDXANcA7AAFARABFgEaARMBAQHoALwAYADh/2D/7f5h/rD96fwf/FX7n/oV+tP5w/nY+Rj6h/oT+6H7LfzN/I39W/4l/9v/dQD2AG4B3QEuAj4CDwLBAYEBQwHiAGcAAADF/6//sf/I//r/RwCmAAkBaAG2AfkBRwKvAgkDQgNaA2UDYANFAwwDxQKCAkECBQLcAcEBmQFvAVABRgFLAVgBUQE7AR8BBwH4AO8A0ACOACoAtv9C/8v+Of55/af83Psj+4/6K/rv+dD51PkS+o76I/um+yP8qvxE/e/9q/5k/wgAlQAQAYMB1wH2AeEBuwF+AR4BqQBBAPb/u/+U/5n/0f8jAG4AsQD3AEMBmAH4AVkCpgLcAgwDRAN1A4gDcwNIAxQD4gKyAn8CSgIcAv4B7AHcAcEBmgF3AWQBXAFUAUUBNgEiAQABwgBeAN3/S/+0/hP+Yf2d/Nn7Lfun+j766vm2+bP56PlH+rX6I/uQ+xP8uPxy/Sn+0f5w/xQAtgBAAaUB5gEHAgMCzgF2AQoBpABJAAIA2v/h/xsAdADOABYBUAGBAbUB5AEJAigCUwKNAs0C/AIZAygDLQMZA+8CvAKUAn0CaAJRAj0COAI9Aj4CIgLtAbUBmQGaAaEBiwFMAfQAkwAjAJb/7v5J/sD9SP3P/Ez8vfsj+4z6E/rQ+br5wfnl+S76kPr7+mr77vuD/CL90/2c/nL/NQDYAGAByAH/AQcC/QHfAZUBLAHVALIAtAC6AMoA9AAwAWQBjAGoAawBoQGiAc0BFQJgAqMC6wI2A2gDeAN4A3QDZAM+AwkD2AKyApcChAJyAlQCJQL1AdEBuQGVAV4BIwHwALkAYADf/03/xf5S/ur9iP0l/bn8R/zM+0j7uvo2+tz5v/nW+Qb6Sfqg+g77j/se/LT8U/0B/sL+hf86AM0AQAGeAeEB/gHyAc8BpQF1AUkBKgEdARoBHwEtATwBPAEpARoBIgE9AWMBmQHuAWAC2AJKA6wD6wMDBAEE9gPXA54DYgM7AyMD/ALEApICdAJVAh0CzgF4ASoB4QCaAFQABgCy/17/Df+w/j/+xv1f/Qf9nfwU/Hz76/pe+tz5fflb+XP5s/kS+of6Avt8+/z7j/ww/dT9gv5I/xYAywBXAcYBJAJqAoICbwI8AgACzAGrAZ8BmAGAAV4BUwFhAWwBXgFMAVABbwGhAe0BVgLCAiQDhgPqAy8EPgQqBA8E6wOsA2gDPAMlAwEDygKVAmMCJgLXAX0BGwGyAFYAGQDp/6r/Vf/5/qX+VP7+/Zz9Mf3C/Ff87/t/+/z6cPrw+Yn5SvlD+XT5yfkq+o/68fpV+8D7RPzi/Jv9cP5d/08AJwHXAWMC0AIRAygDGwP6AsICgQI+AgQC0AGqAZcBkgGSAY8BmAGzAdsBDgJRAqgCEAN/A+wDPwRtBHIEYAQ/BAUEswNaAxYD6QLHAp0CZQIYAr8BZgEOAbYAYAAjAP3/4P+s/2D/B/+r/kP+yf1D/cL8VvwG/Mr7jPtF+/L6nfpC+t/5fflD+Uz5mfkM+ob6//p8+wX8nfw7/eL9lf5e/zIA+wCkAS4CqAIRA1wDeANqAz4DCAPJAoQCOwL5AdUB0gHsARYCRgJ3Aq8C5gIbA04DfwOsA80D5gP6AwoECgTxA8YDkwNiAy8D9gK2AnECKgLnAaABUQH+ALUAdgA8APr/q/9B/7r+If6N/Rf9uPxm/Bv84Puu+4b7XPsm++P6nvpX+gr6tvlt+VD5ePnc+Vb61PpR+9b7Yfzt/Hr9Fv7F/oj/TgAJAbgBaAIXA68DGwRPBE8EJATVA3QDFQPMAqYCngKzAtUC+QIbAzkDRwNFAz0DPQNLA2YDhgOlA8ED0QPHA6UDeANJAxcD1wKJAjQC4wGYAVABAQGxAGMAGgDE/2b/CP+3/mv+F/61/U797/yh/F78Jfz2+9X7vvul+4b7YvtA+yD79vq1+l36APq3+Z/5w/ka+pL6H/ux+zv8vPw4/cD9Yv4k//3/4ADEAaYCgANOBPsEfgXKBeAFxAWFBTMF1wR4BBsEyQOLA2oDYQNgA1wDTwM3AxcD8gLJAqoCngKtAs8C8gICA/0C3wKlAlYC6gFtAfgAmABNABEAzP93/xr/tP5N/uz9lv1V/S39E/38/OD8v/yf/Hn8SvwZ/Oj7v/ue+4P7a/tS+zD7AvvL+or6Pvr8+d/5Bvpx+gv7uvt1/C390v1i/un+dP8YAOQAzwHEArUDlwRoBRoGlAbIBskGogZUBt4FUgXKBFwECwTTA7IDpAOYA4kDcwNLAxID0wKmApMCiQJ4AmQCWQJNAi0C7AGOARwBrABCAOD/iP83//b+yv6h/mv+H/7H/Xv9Sv0q/Q/97vzL/Kf8hfxk/Dv8BvzJ+5H7Z/tH+zP7JPsb+xv7Ifsm+yD7Bvvd+sT60voh+677Zfwo/e39qP5W//P/ggAMAaEBUgIfA/cDyQSJBSwGqwb+Bh4HDwfWBn4GEgafBSoFvARgBBsE6gPCA5sDaAMiA8sCbQIQArUBYwEgAfIA3gDZANUAvgCKAEEA7v+f/13/Lf8P//3+6f7A/nr+Hf64/Vr9Ef3e/Lz8ovyN/Hj8W/w1/Ab81vuu+5b7k/ue+7n73/sT/Ej8d/yQ/I/8dvxO/Bz87/vZ+/T7Vvz5/Mn9pv54/zIA0QBXAdABSwLaAoQDPwT/BLYFWwbrBmMHsgfPB7AHWwfaBj4GlAXoBEcEuQM8A8wCYAL6AZoBQQHyAKwAbgA7ABgABAADAA8AGQAPAOr/q/9c/w7/zf6Z/m/+Rf4Z/uX9qP1k/Rz9zvyF/Ez8K/wl/Dv8YvyQ/Lz82Pzc/Mn8qPyI/H38jfyx/OL8FP0//Vj9Xf1Q/TD9Cf3g/MT8yvwI/ZT9Yf5U/0oAJAHPAUoCpALyAkMDrQM3BNMEcwUPBpcG/wZBB1UHMQfeBmMGxAUSBVcEnwP0AmMC7gGYAVkBKwEDAdYAmgBLAPT/ov9i/zr/Kf8n/yn/G//1/rX+Y/4L/r39gv1W/Tv9KP0Q/ev8wvyY/Hb8afxt/IT8rfzi/A79L/1A/UP9Qv1B/UL9Uf1z/ab94/0f/lX+f/6d/qP+iP5h/kj+Qv5M/mn+kf7Z/mH/IADsAKcBNgKLAr4C5wILAzoDigP4A24E7wRmBcAFBQZFBmkGZwY4Bs8FIwVRBHIDngLuAWsBDQHFAI0AYQAvAOf/hv8L/47+J/7q/dX93/0B/if+Pv4z/v79rv1Z/RP95/zZ/OX8/fwY/Sn9K/0i/RP9AP3w/PH8Ef1M/Zj94/0n/mD+jf6n/rL+uv7M/u7+F/87/1P/X/9h/1X/Nf8M/+z+5P7y/gv/LP9t/+P/kQBTAQIChALPAvYCCQMWAykDVQOdA/cDTQSYBN4EIwVlBYUFZAX/BGIEpwPbAg4CVwHBAFEACgDa/7T/kP9h/xz/tv5G/uH9mP15/YD9n/2//dT92f3E/Z39dv1V/T/9O/1O/XD9m/3L/fv9Hv4w/jn+Of44/kr+ef67/gj/Wf+a/8L/3P/n/9v/wf+d/3X/Tv8d/9/+pP5e/vr9if0i/dD8mPx5/I/8KP1T/vj/CwJ6BPEGFQmlClYLQAuBCi4JXAdbBVADWQHA/4z+yf10/X39rf39/UD+Sf5O/mL+nP77/p3/aABTAToC5AI0AwMDUgItAdD/Wf4H/Rz8sPu8+yL8w/xj/fX9VP5n/jn+8P2P/TH9Gf1C/Z39J/7m/qL/RQDAAPcA9ADAAGIAzP8r/5n+J/7g/dv9EP5U/qv+Cf9e/5//6f8zAH0AwQARAXoB1AEKAi0CTQIpAtcBYwERAVEBRwJhA08EZAU9BlYGfwUgBIQCLAEEAOv+Yv6A/gr/nP9LANEAPAF2ATcBpQDY//H+Dv6//c39Ef6V/kH/tf+5/2H/e/51/XT8vft3+9b7t/zi/U3/ZwAOARsBrwDD/6L+lv0E/ST9oP14/qT/AgHwAVkCOQK4AQUBMwCI/zb/Wf+n/xoAgwCwAIcAHQCb//z+Yf75/fL9Jv6Y/jz/1/9uAOEA7gDJAMMAkwD8ALUClwS2BfoGNwjnBz4GDwS2AZL/Jf46/db8Lf39/Rn/4P8UACkAfABmAOf/qf9a/wb/9v4c/wb///4a//b+m/79/Vb9u/yE/NT8xv38/lMADQJmA8EDPQNIArAAuP7q/Hf75vpM+2f81v2P/0ABeQIEA7YCBQJYAbAAJAD3/yMAWQCRAIIABgBt/+L+cv4z/lP+0v7O//MA1QFJAisCigGRAGL/Mf5l/UL9rv1B/iD//ABoAw0F3wVmBgsGhAR9AmwAkf6A/T39Zf3e/Wj+9P6A/7z/lf+8/yAAKAAlAFEARwAQABcAFgDj/7P/iP88/7f+E/6a/VX9Mf2R/a7+EABZAZsCbANgA6MCZwG8/w/+8fxz/Kj8pv0q/8YAQgJEA4oDGAMFAr0Ar//3/rD+Hv/F/zsAlgCrAD4Amf86//v+/v5U/wAAwgA5AUQB/AA+APr+5P0m/Y38Yfze/Fz9Kf4TADUCZQNEBEcFYwVRBM8CcAFEAGf/wv5v/jD+9/0l/mL+Tv5t/kX/9/9DAIgAqgBxAPv/b//p/rb+ov7B/h7/Y/98/8P/MwB/AOoAoAF8AhoDSgMmA6YCmwE7AOT+r/3u/AP9uf2r/uH/WwGzAnQDZQPMAvAB3wCw/7v+K/4C/jr+mf7J/qX+mf62/q7+nP71/qz/hQBcAcUBsQFCAY0Aff9h/m/9+vxJ/dv9K/40/rn+r/9/ANwAXAE3ApUCPwKVAQUBUQDM/4v/Kv/W/vH+c/+V/4z/wf8xAI0AowCvAJEAVADn/7v/w//G/wYAeADhAOoA5ADdAOsAIwFyAf4BewKoAnMCEgJGARAAA/9G/t/94P1k/kb/UQA5AdEBIQLzATgBUgB7/6X+Hf4p/pb+Jv/J/0wAigBcAL3/Cf+Z/kz+EP5F/sr+J/9X/5b/ef/E/in+5/25/bb9KP6s/vP+Hv95/97/EABDAMUALgEBAbAAfgBnAEMAUQB8AJoAygABARABmQA9AFcAjQCAAGMAlwCpAH0ARgBcAKcA5QAmAVoBbgFIAVIBjAGdAZQBzAEgAhYCwAFkAf0AYACm//r+c/4W/gH+V/7r/o3/RgD3ACMBywBSAMb/Cv99/nL+tf4k/7//YQC8AJMA9v8p/2/+7v3P/Qz+bf7k/kf/ef9c/9/+Gf5v/RD9+fxQ/eH9av4H//b/qwDxACYBewGvAZIBRQHyANgAzAC2AI0AZgBlAJMAmAA7APj/KgCeAOAAEAFmAcsB9QH3AQgCFQL9Ac8BpAFLAcgAXQBMAGsAlQDgAFgBowGDAUEBBwGpAAcAef8P/6D+PP47/ov+yf7z/jv/W//4/nb+Rf4z/hj+Yv4Y/9b/cwD8ADQB6QBKAIT/uv7v/W/9bP3S/T/+tv41/2b/Mf/I/mX+Gf73/en9BP5I/rD+Sf/3/3AAwAA7AZEBgAFJAUwBeAG3AdwBzQG2AZcBSwHRAGAACQD4/ykAXAB8ANIAYAHeATACeQLJAusCsAIqAo0BzgAoANv/x//O/0oAIwG5AdMBnAExAYQAmP+I/qz9MP0N/Vj99v2U/hH/jP/C/4D/Gf/g/sH+qv7Z/lL/1/8pAEcAKgC6/wf/Nf6O/Uf9dP0F/uD+yf+BAAABGwG2APX/N/+P/gb+vP2u/cL99P1e/s7+Jf+J/zwA9wBvAboBFAJ1Aq4CuAJ5Av0BdwEgAb4ATAARAFAAxQAYAWMBxQEmAjcCKAIPAgQC6AHQAZkBLwGoACwA0f99/33/3f+BAAsBdwGpAYUBAgEcAA//Fv5w/fj83Pw0/cr9W/7X/jD/Gv/k/sb+uf6V/r/+VP/y/2MAnwCwAGsA6v9B/5D+9P2j/dD9Sv7G/jT/xf87AEcABAC+/4X/Uf8w/xX/8/7a/uP+7f7i/vT+bf8jALwAHgGLAR8CqgIBAwYD1AJ4AgcCcwHLAEEADAA5AIoA7wBVAcoB/wHvAb8BlwFvATcB/wCxAGsAKgAHAPn/IgCKAP4AQwE3AfoAgQDy/z7/hv7y/b/9z/3o/Qz+R/6e/tH+2P6x/pL+lf64/un+G/9s/9z/PwBNADIABgDP/4X/PP/7/tn+/v5c/6r/sf+r/7D/jv86//z+AP8//6T/7v8LAAAA0P9+/wf/qP6//kr/1P9XAAABsAFDApcClQJQAgsCrgFCAeQAswC3AOIAJgFnAagBxQHCAZcBWwElAQ0B+QDGAI0AUAADAK3/hv+B/6j/9P9VAJEAmABoABIArv89//n+1P6y/pL+qv7S/tH+vf6k/nv+Rf46/ln+l/71/n3/GgCQANsA+wDtAKoAUADw/4D/Jf/3/u7+9P4I/yL/Pv9Y/1n/T/9L/1X/Zv95/3z/av9L/wv/tv6D/rL+Q/8GAMsAdgEJAn0CywLHAooCRAISAtQBfgEoAeIAuQCkAJoAigCQAKwAzQDhAP0AMAFOAUABFQHfAJMARAD9/8//x//0/ysAPgA0ACAA9f+l/0n/AP/a/tb+8f4a/0L/W/9W/y//5v6a/mD+Vv5y/q3+E/+g/y8AlwDcAAQB/QC3AEUAvv9D//X+2P7K/sT+2f4D/xn/Ev8Q/yb/R/9e/2v/X/87/wz/0v6C/lP+gf4A/5j/QQAEAbYBMwJ+ApYChAJeAisC5QGbAWcBOwH7ALcAoQCoAJ8AjACQAKsAywDhAOEA1QDQAMQAlwBQABUA/P/5//7/DAAsAEUAOwAMAMb/dv8r//b+6P77/h3/Qf9X/0z/H//e/pr+b/59/rj+Dv+C/w8AkQDrABgBFQHeAH8ACwCL/w7/rP53/mn+dv6b/sr+/f4v/1j/dP+M/6D/nP9w/yj/4P6e/lX+Hv4x/qr+Zf8qAOAAggEOAnIClgKCAl8CRQIcAt4BmAFiATYBBwHZALMAmwCQAIsAigCQAKEAvwDZAOAA0AC1AJYAcwBRADQAKAAxADwAOgApAA8A6v+//43/Xv9E/0b/Tv9L/zv/H//2/sP+lP5z/nn+q/73/lP/u/8iAHgAsQDDALUAiwBFAN//cP8P/8j+mf6B/oH+kv6q/sH+1/7z/hT/LP80/y3/Ev/j/qT+bP5c/pP+D/+t/1YA+gCCAdUB8wHyAesB4gHMAakBiQF3AWsBVAE2ASIBEgHuAK8AbABCADYAQABYAHwAqQDLAMoArwCKAHEAZwBnAHMAigCiALAApQCCAFQAJgDv/7P/ef9F/xr/7/7C/qD+kP6S/p3+t/7p/in/aP+h/9f/EgBEAF8AYgBPACwA8f+b/0H/+/7M/qn+lf6Y/rH+yv7a/uP+8/4E/wb/8P7Q/rf+qP6g/qf+5f5k//v/egDaACUBVQFwAYIBmQG/AecB9gHtAd4BywGYAU4BCgHeAL8AlQBrAFYAXgB1AI0AnwC5ANUA6QDnANQAwQCyAKcAkwB6AGUAUwA8ABwA/P/h/8//uv+m/4//bv9H/yH/AP/l/s7+wP7I/uD++v4Q/yj/Tf9//6z/zv/y/xgAJwAJAMP/cP8o/+r+rP55/mH+Zf54/oz+nv6x/sf+zf7E/rX+q/6k/qn+2f5M/+//iAACAWQBoQGwAZoBfAGGAbgB4QHqAfcBDwIUAuQBjAE7AfwAugBxADsAKgBIAHoAoQDEAO4ABAEHAf4A6gDTAMIAwADCAMQAtwC2AKcAcgAqAOP/m/9f/zr/EP/5/vn+Af/5/vD+5/7r/vX++/4F/wj/Dv8g/0T/WP90/5//wv/W/9z/wv9//z///v7K/qL+k/6S/qP+rf6p/rX+v/7B/rL+pf6b/qL+tP7m/l3/CQChAAoBYgGaAbMBqAGWAaMB2QEDAhsCOQJSAlECHQLBAV8BCQGnAEsAIAAkAEUAbQCVALkA5wAHAQsB/gDtAN8A3QDaAMYAvwDEAL0AlwBcABEAw/95/zP/A//t/ur+5v7x/gH/C/8M/wD//f4G/wf/AP8N/yr/Tv9o/3j/jv+n/7j/rv+P/2D/Iv/p/rb+lf6M/pD+k/6S/on+g/6H/oz+lf6h/rL+y/75/lb/6v+IAAEBZAGpAcgBxgGqAZMBrAHPAeEBBgIxAkcCQgIgAuIBpgFbAfYApQBwAE8ARABHAFgAgACnAL8A1ADiAN8A3QDlAOQA5wDxAPIA6gDNAJMAQQDf/33/Lf/t/sL+s/65/tL+9v4Q/x//J/8Z//f+2f7N/t7+BP8p/0v/df+c/7j/yf/F/6n/df8q/9T+mP6A/nz+g/6L/oX+h/6Q/on+hP6S/pj+lv6r/tj+RP/1/50AIwGeAecB+QHuAckBtAHOAeAB3AH1ARQCIQIaAvEBsgFyASMBzgCYAIEAeQB9AIMAhgCXAKcAqwC1AMEAvgC6ALwAuwDFANAAyAC5AJkAVAAAAK7/XP8e/+f+q/6M/oz+kv6p/sX+1f7n/uX+xP6t/rf+0/4B/zj/Y/+H/63/xv/J/8z/wv+a/1f/GP/n/tH+zf7H/rv+rP6e/ob+a/5i/nb+jP6k/sv+Ff+R/ycAswA1AawB9QEOAggC+gECAhcCDwIJAiICOQI1AiAC/wHNAZUBRwHqAKMAewBVADYALQA/AFgAcQCIAJ8AtADDAM8AzwDhAPIA7gDYALsAiQBEAO7/iP8n/9b+jf5R/jX+P/5o/pH+rP7E/tj+1v7G/r3+y/7x/hr/N/9V/3z/mP+e/5P/fP9Z/zD/+P7K/r3+xf7W/t/+3/7U/sb+oP5v/lb+Uv5i/nv+u/4o/8n/ZADqAGABvwH+AQ0CFAIYAjQCOwI6AjkCQgJDAikCCALWAaUBUQH6AK4AhABnAFkAYAB1AJoAqQC6AMIA2wDgAOAA2gDZAOQA4wDcAL4ApQByACsA0P92/yL/1P6V/lj+QP5D/lr+bP6F/p/+s/7C/sH+yf7j/gj/Hv8y/1H/cP+A/3f/Yf9G/yf/8/7B/qT+ov6q/rP+v/7L/s7+sf6E/l/+UP5K/l3+m/4E/4n/HACxADgBtwEPAj8CWgJuAnECZQJeAlQCUQJPAkECIgL2Ab4BagEUAcQAggBTADYAMgBHAHAAmgC+ANsA7gDvANoAyADCAL8AvwDFAMUAtwCcAGIAFQC8/1r/7/6V/lb+Mf4m/jH+T/5y/pj+rf6r/qP+ov6q/rj+1f75/iH/SP9f/17/T/85/xT/5f65/p3+l/6j/rX+w/7N/s/+vv6W/mn+T/5T/nf+wP4q/67/RQDdAF0BxwEdAlYCdQKAAoACdQJnAlcCRQIvAhgC/QHXAaEBZQEtAfkAzgC2AKwArwC9AM8A4ADwAP4AAAHzAN8AxAClAIwAegBpAFoARgAqAP//zv+Y/17/H//o/rf+i/5x/mb+Zv5z/ob+k/6U/pL+i/6H/on+l/6s/sT+3v7z/gL/B/8G//n+4/7L/rb+qP6k/q3+uv7F/sr+xf61/qT+nP6o/s7+Ef9x/+P/YwDkAFsBwQESAkoCaQJ3AnUCbAJfAlACQgIuAhYC9wHPAaEBcAE+ARMB8gDcANAAzwDWAOMA8AD6AP4A/ADxAOAAxwCnAIYAZQBFACUACQDv/9b/u/+Y/3H/Qf8P/9j+pP56/l3+UP5Q/l3+a/50/nX+bP5f/lH+Sv5M/ln+bP6E/pv+sv7D/sz+1v7c/uP+7f78/hD/Jv88/07/Wf9h/2b/bP96/5f/xv8FAFAApQD5AEoBkAHEAeYB8gHxAeQB0QHAAbcBtQG8AcQBygHOAcoBwQGxAZwBgwFrAVIBOgEjAREBAgH1AOUAzgC3AJoAdwBNAB8A8P/C/5n/dv9b/0z/Q/88/zL/Iv8I/+X+u/6T/m/+WP5O/k7+U/5X/lX+Sv45/if+Gf4W/iH+Of5h/pL+xv73/iT/Sf9l/33/lP+o/8H/3/8BACYASQBnAH0AiwCTAJkApAC2ANAA9AAeAUUBYwFyAXIBZAFNATEBGgENAQgBEQEjAToBUQFiAW4BcwFxAWgBWgFIATIBGAH5ANcAsgCMAGUAPgAYAPL/zP+o/4T/ZP9G/yn/Ef/7/ur+3P7T/s7+zv7N/sj+wP6y/qH+kP6C/nj+dP53/nv+gf6H/on+kP6c/qz+yP7v/hz/U/+O/8j//f8sAFIAcACJAJ8AtQDQAOwACwEnAT8BUAFYAV0BXAFZAVoBXQFjAWUBYAFRATgBFAHoALwAlAB4AGYAYQBqAHgAhwCUAJcAlQCJAHgAZwBTAEAALwAbAAMA6v/R/7b/nf+H/3L/X/9P/z//LP8a/wf/9f7j/tL+yP7E/sT+yf7T/tv+4f7k/uL+3/7f/uL+5/7x/v3+DP8a/yf/Nf9B/1P/af+F/6r/1v8IADsAbgCfAMkA7QANASgBQAFWAWwBgAGRAZ0BpQGmAaEBmAGJAXsBbQFbAUgBLwERAekAuwCMAF4ANQAUAP//9//0//n//P/9//n/7P/a/8X/r/+b/43/gP90/2n/XP9M/z3/Lv8i/xv/GP8a/xz/Hv8e/xj/EP8G//r+8v7x/vb+A/8V/yz/Qf9U/2H/Zv9q/2v/bv91/4H/lP+s/8b/3P/u//r/BAAPAB0ALgBHAGgAjQCwAM8A6wABARABGQEgASkBMwE+AUYBSwFOAUkBPQEsARcBBQH2AOcA1wDFALIAmAB0AEkAHADy/8//tP+j/5r/mP+X/5L/hv91/2D/Tf84/yf/G/8V/xP/E/8S/xT/Gv8k/y3/OP9B/07/WP9f/2f/bP9v/3H/cv9y/3T/eP+D/5L/pP+5/87/4f/w//3/BwAOABcAIgAwAD8ATABaAGMAawBzAH0AiACVAKUAuQDKANkA5ADrAO8A7gDvAO4A7gDtAOwA6ADgANEAwgCwAJsAhwB1AGMAUwBCAC4AGAD9/+L/xP+n/43/dP9h/1L/RP84/y3/I/8Y/xD/Cv8K/w3/E/8d/yj/M/89/0X/TP9U/2D/bP98/43/n/+v/8H/0P/d/+f/8P/3//3/AQAJABIAGwAlADAAOgBDAEcATABRAFkAYgBvAH8AkACgAKwAswC2ALYAtACyALIAswC3ALwAwADAALwAtQCqAJoAhwB1AGEAUAA9ACsAGgAJAPn/6v/b/8//wv+5/67/pf+b/4//hP93/2n/Xf9S/0j/Qf89/zz/PP8+/z//Q/9I/07/Vv9h/2v/df+B/43/mP+l/7P/xP/V/+f//P8OACQAOQBMAFwAaQBxAHYAdwB2AHYAdAB0AHcAegB9AIAAgACAAH0AegB5AHkAewCAAIIAhgCGAIMAfgB3AGwAYQBYAE0ARgBBADwANgAyACsAIAAQAP//6//W/8H/sP+g/5b/kP+J/4P/fv92/2z/ZP9d/1b/Uf9P/07/T/9P/0//UP9T/1n/Yv9v/37/kf+j/7f/yf/a/+r/+P8FABEAHQAnADIAPQBHAFIAXgBqAHYAhQCRAJwApACqAKsArACpAKYAogCfAJ8AnACbAJgAkgCJAH8AdABnAFwAVABOAEgARABAADoAMQAoABsADQD///D/5f/b/9H/yv/E/77/t/+v/6f/nP+P/4T/e/9y/23/av9p/2r/af9r/2v/a/9s/2z/bf9w/3L/d/9//4f/k/+g/7D/wf/T/+f/+f8MABsAKgA5AEYAUgBeAGsAdwCBAIkAkACWAJwAogCmAKwArwCzALQAswCuAKoAoACWAIkAgQB1AG0AZgBhAFwAVgBRAEkAQAA3AC0AIwAbABQADgAHAAAA+P/v/+T/2P/L/7//s/+m/57/l/+P/4j/g/99/3f/cf9r/2f/Y/9h/2H/Y/9n/2r/cP93/33/g/+K/5H/mv+k/63/t//E/9H/3f/s//z/DQAeADAAQgBUAGMAcgB/AIkAkgCZAKEAowCnAKsArACtAKwArACrAKcApgCiAJsAlACMAIMAeABsAGAAUwBGADoALQAgABUACQD9//T/6P/f/9b/z//H/8D/uf+1/7D/rP+n/6X/of+b/5b/kf+M/4n/hP+B/37/ev94/3b/df90/3T/dP92/3r/gP+G/47/l/+h/6r/t//E/8//3P/o//b/BAAQAB0AKgA1AEEATQBYAGIAagBzAHoAgQCJAI0AjwCTAJUAlgCWAJYAlQCUAJAAjACJAIMAewBzAGkAXgBUAEcAOQAuACAAEwAIAPv/7//l/9r/z//H/7//uf+z/63/qP+k/6H/nv+c/5r/mf+X/5f/lv+X/5j/l/+Y/5r/m/+d/5//of+i/6b/qP+r/63/sf+1/7b/u//B/8b/zf/U/9v/4v/q//T//f8HABAAGwAmADEAPABIAFAAWQBhAGkAcAB2AHsAfwCCAIQAhQCHAIcAhQCDAH8AewB2AG8AZgBfAFYATwBGAD0AMwAqAB8AFgAKAAAA9f/s/+P/2v/S/8v/w/+9/7b/sP+q/6T/of+d/5n/mP+Z/5j/mv+d/6H/ov+k/6b/qv+u/7D/tf+7/8P/x//Q/9j/3f/i/+b/6f/u//D/8//4//z/AgAHAA0AEQAUABgAGgAcACEAJQAqADAAOAA9AEMASABLAEwATABMAE0ATQBMAE0ATABLAEcAQgA+ADkANAAtACgAIgAdABcAEgAMAAUA/v/3//D/6f/k/93/2P/U/9D/zP/J/8b/wv+//73/vf+7/7r/u/+7/7v/vP+9/7//wf/E/8f/y//Q/9L/1v/b/+D/5P/q/+//9P/6//7/AwAIAAwAEAAUABcAHAAeACEAIwAlACgAKQAqACsALQAuAC0ALgAtACwALAArACoAKQAnACYAJQAjACAAHgAcABkAGAAVABEADwANAAoABwAEAAAA/f/6//f/8//v/+7/6v/o/+X/4v/g/97/3f/c/97/3f/e/93/3P/Z/9v/2v/a/9//3//g/+H/4f/k/+T/5v/r/+n/7P/u//L/9P/1//X/+v/8//3/AwAJAAkADQAOABAADgAOABoAIQAnACoALQAzADgANgA/AEMAQABAAEkAPwBEAD8ARQA6AFYAIABNACsArQAwAg8BIADN/8L+ov1h/sb/PgAyAS8BEADZ/n/+Of6e/t///QBoAT0BogBw/7v+w/5J/wwARAHfAWwBfABV/1D+z/03/in/RQAiAVsB8wAxACX/eP6B/vn+uf93AAEBAQGSAAQAmf9G/0z/xf89AKgAvwCIADUA3v+//8//CABMAIEAhgBrADYAGAAQAAsAJgBVAHcAewBxAGEANAD9//P/8P8WAD4AQgBAAC4ACgDX/9X/8P/+/xoAEgDw/9b/oP+k/7j/0f/1/wQADAD0/+L/v/+j/6X/pv+z/9f/5//V/7L/pP+h/5j/zP/q//v/AADk/8n/s//P/+f/DgA2ADoAJAAJANn/vv/i//v/JwBYAHwAYgA/ABMA1P/b//j/BwAqAGIASQARAB4ABQDx/xkAEgAlADUAOgATABUAUABWAFMAPQBmACwA5P/8/yUAMAAwAFYAMAAVAAEA3f/A/8P/4P+y/63/2v/2/wEAAQD5/8n/wv/q/9L/uP/0/xEA3P/X/xIA9P8oAK8BXgLLAbQBkwCP/mf9Xf3g/ar+2/8wALX/5/4h/vr9L/5Y/6MAMwFpARwBegAAABQAkQBRASoCsAJfAlsBVQBU/3X+Uf78/pP/9/9pAGsAy/8a/+r+zP7P/oP/NQBdAIcArABjAC4AMQAoAC0AVgBdAAoAAwAKAOr/3P8fAFsAGQAgAPT/qv+g/7b/yf/S/y8AJQA0AEYA9//C/7z/pP+U/xMALQAkACsA9P+g/4//sP/u/0oAXwB8AFYABgCd/4r/lv+k/xQAcQCYAI8AYwAoAMv/pv+9//T/JAA2AGAAGAATAN7/uP/o/8v/9v82AL4ApQCIAG8A/P/S/4z/0/8CACUANQATAA4Auv9p/zz/Zv+n/w4AfgCPAE4AVAD9/5b/5/8wAFsAXQB3ACQApv99/0f/Lv9z/8z/7v8PACUA5v+B/2P/oP+j/33/EgBPACYAVQCNAIwAUQBvAB4A3P/T/5P/ef9S/4L/jv/M/9f/xf/j/6v/x/8PAIUArgDXAOwAewBoAFYAaQCGAKsAuwBzAAwAOP+x/sX+AP9V/+n/hQBUAO7/8/+g//D/cwBqAXoCNAIKAokBPgBR/zn/Ev9G/8z/u/9h/xb/uP5Z/pD+xv4P/7v/KwAUABwAXwAkADIAtgASAUwB1AEEAlIBCwGMAML/b/9O/zX/+f4N/8/+mv60/pv+zf4H/xb/Sv+Z/8f/AABTAIIAdwB+ALwA6wC/AMoADQHbAJgApAC/AJEAcAAxAKX/8f62/sn+6P6T/zEAPQDf/9b/uv95/6r/MACrAN8A/QDnAKsAiAAMALr/QgCpAJ0ANQABAMr/Ov8U/3L///8uAEcAbAD//6//mv95/5L/+v94AFEAZgCKADsAsv9H/6P/2//y/xgAHADI/2P/bv8+/3L/AgBPAGMAYQBCAL7/Vv+I/+L/LgCIAMkA0wCAAB4A7P/v/wIASABlAE0AHgDI/4T/WP93/+b/ZgB6ACAA/f/j/4P/f//i/xgA7f/8/xAA3f/P/yoAGwCq/wAAcwAwACoArwChAIYAmQBjABkAAgC1/5P/x//F/8v/9P8BANH/3//z/8z/tv/l/9T/o//I/7r/vv///87/t/81ACQA1P8OACIA4v/7/z8ASwBUAJsAkwBfAHgAMQD1/+T/nv8y/xX/Dv8G/0T/av/O/yQAOQA1AA8A5v8JAJcA7QBrAcUBWQGiAPj/oP+C/9H/cwDLAJ4AJgDC/zT/2v4j/3//+P+PAI8AFwCt/0//Av85/8L/QwCvAKEAWQAWAL7/lP/I//T//P9eAFUA1P+i/4v/b/9y/zUAngBxAJAAZQDB/2P/pv+b/7T/8v/N/5r/b/9n/3D/uP8lAC8AGQD5/7L/rP/H/wQAeACsAKoA2QCsAFAAdACQAGAAKgA5AA0A2f8DAAEA6v8FAEIAHgAhAGEAdABsAE0ATAAfAAIAGQAuADgAcgCAAPT/h/9k/yL/Df9l/8j/3v/4/+//iP9e/yz/Kf90/7n/0//E/+3/dP8Y/y//Iv9A/4b/1P+6/83/v/9r/3L/tf8SAFIAjgC0ABIBigFrAXgBxwG6AY4BaAEoAd8A2gDIAHIALwDv/8z/3f8BADEAXABzAAwAkf+K/9P/QQCoAPMA3gCTAEUA2P9t/x//Mv+o/5f/Sf/s/lP+rP1p/VT9Nf3D/RH+7f2a/UD96vxS/Vv+Af9n/xMAxgAXAVEBswFHAu8CoQPlA58DFwPGAoMCWwJBAigC/wFcAWUAVP+//nP+Xv6x/h3/af98/2D/If8w/6//TgDdAHIB2QHuAccBeQEeAeAAxQBcANz/df/+/kv+e/2t/Br89fu6+1r7avvl+2H8rvzr/Gf9fP4aAFMBLwJ4A7EECgVQBX4FdAWaBW8FzwSuA6MCgwFsAEf///0B/XD82Psw+xv7FPtS+//70fyS/dL+VwBcASAC3QJ6A/oDYARtBJQEogQ8BIwDswJ/AWsAfP9y/q79Ov2a/ML7SvvP+kj6Ffq2+rj7zPzK/ZD+if+dAHoB9gH0AlkEhAUBBtsFhAUXBXUETgMeAnMBHQFUAN3+gf2A/LX74/pl+n36+fqk+1D8Ef30/Rb/LwBBATcCFwP5A6oE8wT/BCIFCAWtBCMETQNaAl0BNwDd/qT9t/z9+477V/v5+nX6Nfro+bP5fvr9+xD9Mv7J/9QApgGoAswDzAS4BTYGeAZnBmEFRgQjA+wB2ABJAK7/7f4F/tz86vsn+7L6l/ov+xz8QP1h/lj/DADsADwCPQMIBM0EgAW8BZ8F9ARTBOYDJQMeAiUBLQAu/0f+Ff3w+xz7wfrE+gj7//rR+sT69vp0+yv83/yU/f3+1wD9ATcC3gIMBOgERgWUBccFzgU/BesDOALQAAsAuf8g/xv+TP23/A/8MfvF+iT7MfxG/XH+mf+tAJcBGgKvAnsDTgTeBDwFCwWmBDEEYwNUAl0BjADo/07/av5q/Xr8z/tI+x77OvuO+3j71/rf+tD7tvze/NX9wP9CAWECDwOWA1IEmgUYBuEFvgVJBaQEVgNwAdP/Lv9s/ov94vzp+xX7BPvE+lj64vox/G39s/4LAPkABgL5ArsDUATpBDkFhAWCBbwEHQSbA5MCSgGEALP/0/4N/jX9avwL/K37Yvtk+3D79Pqc+m37tvyX/fX9Cf+vAA0CXwIPA2gEZgXfBWAGTgZrBckE5wNbArsA9P/3/tj90PzY+7L6BPqm+Xf5BPr2+l386P1W/30AEgIbA5MDfwSlBUYGcQY9BnIFxATtA5ICIgEdAD//c/6i/Zj8u/tE+wT7v/qt+q76qfp3+q762/sn/Sz+j/98AagCIwPBA7YEgQU4Bq4GxQZ5BnoFIgRuAuAAfv+Y/ov9bfym+9T6wPnt+Ff5NfpW+6r8bP4KAGEBRQL8AvMDvwTABT4GHAacBU0FkQRyA3ACZgGTAMD/qv53/cf8W/y9+x77R/tR+/n6sPqA+kL6C/sE/Qj+h/7Z/7wBqwJcA3oESgXgBYIGpgYOBlMFeQR0AwYCSgDm/vf9tfxf+5r67Pla+Wz57fmO+qn7Q/3//qEAIQJzA3oE4wQvBdYFQwYPBp8FMwWIBJYDKwKRAHX/xf79/VD9zvwe/Kr7kftb++n6+PpU+xz7FvtL/Ar+Lf5g/hMA0wGRAiEDXwQPBcEFHwYJBnQF0ARIBAcDUAG6/+f+y/0s/Bz7wPoV+lz5WPkl+lv7mfz0/Y7/UgGnAswDdgTBBFAFGQYnBm4F7ASCBOcD5wKRAQcAKv9//rT9Gv3R/Hz8RPxr/En8tPsa+2z7avv7+r/7zf03/lj+8v9hAf8B5gJuBHwFlgaHBj0GzgXtBJIDZwI5Ae7/Rv/u/ff7i/or+on59/gN+ST67Pte/Vb+zf+DAcwCJQQQBWMF2AVXBu8FUAWoBNgDDAMuAu4Aqf/Z/jP+vP1H/T/9X/1K/R/9XP0H/Rv89Pvu+xD7JftN/RP+mP13/joA9ABtAdgCVwSsBVEGgAbsBckEGQRxAzYCwwAEAPP+PP18+yj6V/nZ+On41/lz+8T8A/6I/xIBbAKlA7YEnQWDBtIGSwaoBd4E/gMcAw4CugB7/7L+B/5w/ez8Jv14/Vv9Uf2a/cb9Af07/BX8ovvJ+pr7gf2x/Yv9//6VAA8BBgKXA54EtQWjBnUGlAXmBP0D9QKlARcAP/9l/nj8l/qq+R/5w/gZ+Xz6jPxU/p7/KgG0Av0D7ATJBYQGwwZiBsMFDQXHA4sCjAFZAB7/iP4w/nn9Cf1b/dP9Gf40/m3+tP6V/rr9C/3b/L37kPpP+w79r/yN/Jf+bwDrAKsBNwOQBBYGrAaCBugFLgVLBA4D+gAs/8v+xf14+7L5V/n++Hz4jPhB+tn8yP48ACkCxAOpBLwFkwbiBs4Gkgb8BQIFRgOjAaEAVv/S/UH9Rv0M/R/9l/0t/s3+Uv+S/8f/3v+d/8/+nP1h/D/71fqk+zP8HPyB/XL/1P9iADQCowO8BCUGcwbvBW0FTwTVAisBsv+8/sj98/uB+gb6+/hA+Nn4hvqf/O7+zwCKAm0ESQXzBewGEQd8BpcGSAZ3BJwCPAG0/y3+Wv0M/RD9Of3G/Zr+Lf+a/2wA+wDkAO8A9gBAAJP+Ev2l++j5C/lb+pb7rfv4/Lj+gf9nAAoCRQPIBFAGuQZfBm4F/QOxAmkBzv+7/vb90vwx+575mvjN+K/53Po8/fD/1gFDA+QE1wVNBtIG3gbABjoGhgRMAsEASf/T/Ur9Lv30/Dz97v1I/sj+8P8dAZgB3wFDAlACUQGH/+n9rfxv+2356/eg+FD6q/pq++L9pf97AP0BlAObBD0Gmwd6B34GiwV3BK4CJABl/iT+K/3T+nP5evkr+Rj5Q/qJ/D//AAIgBIgFeQYYB3QHugalBcsEjAOJAdD/cv5a/Q79y/yZ/Hz9u/4a/7L/9wD1AWoC7gIIA4oCkQFUAPf+Q/2O+076y/gI98f3Hvq/+kv7/P2q/yEA3gHHAxwF7QYgCLsH5AYoBSEDmQHp/3T+sf13/Pn6PPqX+VD5Nfqs++P9OQHHA+sE9wXTBo8GpQXeBNsDKgNhAsAATv8O/iD9Jf2f/d796/6FAFkBvAEfAmMCvwLxArsCYQI0AcX/ef61/HP6Ofmj+GT3Svfl+Cf6Y/o//Lr+3P8LAUADPAWYBsAHgwfFBsEFBAQpAoYAxP6z/VP9BPz7+g77G/uS+wf9b/47ALYCGASBBAIF9wSFBFkElQOFAuwBAQGa/7f+Pv4c/uj+xv8hAOUAywHVAc4BMwKnAq8CQgJ7AY4AQv+h/XP8Tfs/+oT5vfgy9+L2tPgq+uT6Jf36/x0BagIgBFMFdQbYBwkI/QZ2Ba8DDgINADf+rP3m/VH9rfyY/ET8J/wO/S7+kf+zAY0DCATlA+ADAgSqA7ECJALKAecAnf+6/hn+ev6C/wQATgBLAXYChAItAkQCwgLFAjICfgG9AIb/9P3C/IT7jPoz+qH56PeI9lf3E/n6+VD7hf42AToCSgPbBDIGEgeQBygHEgahBNEC5AAX/yP+TP6n/hz+Jf2u/Hn8b/wm/Wb+MQAfAlMDdQNlA4kDCAOHAhkCogEBAQMA1P5W/q3+bf+3AP0BkAL1AlAD/AJ1Al8CoAJJAl0BMwAe/xP+yfyr+wb77/qK+p35jfhj98z2J/iv+kn8RP6OAUsDHwN7AzsEuQToBXQGgAVwBAADBwHJ/1j/Y/9YAL4A0f/u/kr+Yf1K/UL+yf9AAekBAgL5AWUBSADg/9D/uP/h/0wAWgByABkB7wEhA18E8ATQBHwEgwMvAlgBtwAcAG//mP7M/e/8yvsY+wT7+voB+zj73frE+bz42vjG+tL8Lv4LACACfgLbAdIBIgIXA2cE+gTQBKgEJAQRAwMCdQHGAWYCIAL9APP/rv4S/UH8Xfyp/Nn9Sv9f/7/+hf6X/sP+a/+LAEgCvwNCBFAEKgQJBGcE8wSYBNYDQgMKAgYAaf6n/Ub9L/1U/Yn9iv1b/XD9kv1t/V79hf0e/fr78/os+gX6yvq/+1f8I/1S/tX+XP9nAMwBgAN+BdQG/Ab4BqsGHAYtBV0EygMvA9UBq/+c/av7BvoB+QD5s/nM+tX78vxs/p3/lAAIArgD1gTwBd8GAwfABngGmwVUBHIDigI8Acf/Y/6E/XX9VP00/c79av5u/ov+lv4z/mD+wP5p/vP9t/1V/Un88vqY+VH5Tfp/+zb8/fyj/pH/r/8jADkCEAVFB1oIhwhSCPcGDAUoA8sBNgEBAcr/gv1v+9D5bvjN94L4SvqP/Cz+hv/ZABUC/QIWBIEFYAYRBycHXgayBFwDOwIZAWUA8v/h/4z/AP9s/nz+g/5d/kf+E/4c/k3+Nv4k/rD+Vf/z/y0ADwCY/9D+Wf3u+3z6Z/kJ+uT6uPpG+3/92P7L/58BsANaBa8GRAcXB4oGVAUfBNECYwFmAOT/rv4k/Qf8D/tK+ub5Nfp4+5D9N/98AKkB5AIFBLoElAQLBGMEhQTgAw4DMwNiA+UCCQLCAO7/0P+s/7X+Df4O/gf+u/1K/Zn9Mv8TARUCrwJCA6gDGQNuAW7/BP5N/NT5X/e79fz1vvZT98n4p/t9/c/+5ACSAm0E4waeCJcIiAgBCNYGVgWRA2wCxwFuAFH+Df2x++j5yfj5+On5RPu+/Ob95v6O/3YALAEsAo8D1AQQBfkESQXrBEoEewNLA+sCCQKkAIf/9v5g/ir+K/4C/xAA2gAMAV4B1wFMApECcQIjAoMBPwA5/ir8A/pH+ML2UfVG9EH1TPdk+Kr5QPwR/5sAfwLuBI0H3wlWC4ILngpRCXYHZgVUA8UBuQBj//j8Vfpt+C33NvYb9kf3Rvlv+y79xv4XAKgBYwP4BBwGHgcKCDMIxgfkBgAG/QQIBNUCWgHi/9z+Hf5f/XX93v34/SP+JP8DAHoADQF6AckBAAKmAUAA5v4C/iX9DPw2+8r6OvoM+ar3yvYu99T4ovql/CL/mgHxAucDHAVaBggIMwlYCY0IWAeBBUoDVgGM/4P+nf0E/Nn58vd49tz1cfbj99b5n/zh/xQCPgNYBHEGNghGCbsJ6AlTCXQHGAWOAu0ALAD0/wr/0P35/GD86vuQ+3r8fP5sAE0B1gFCAikC2AGqAb0B9QHkARQBuf8W/tT87/tX+8/6YfrR+bP4i/d690f5k/tt/Wv/XAFvAvUCbwNrBCsGyAcXCJUHdgZbBBcCLQDp/lP+HP4h/U77YvkG+L33e/g++rH8SP86AaYCjQPvA0EE3gSZBc0FnAVRBaoETAMaAv0BUgJLAi8C7AFtAeIAVwD7/6//ev+X/8b/aP9e/rX9b/09/Vj9vf2m/l7/+f90AKUApwDfADwBAgEKAMD+Iv0k+wv5wfdW+HL5hvqm+0H9jf51/98AVAJEBBgGaQdaB3EGDwVxAwgCsQDt/4//N//h/R78OfrI+BT4g/gC+uX70f1p/xABGAICAyQE4AWIB7wIgQlqCVgIYQbABH0DUQITAUoAhP9U/v78zPsF+8/6Tfv9+8v8TP3Z/Zn+g/+vAFsCUwSxBaUG2wZRBlkFIASBAq8AEf8F/UL6L/d79FHyEfGk8dvzgPb5+ID7//1bANkChAU/CMoKmAzXDDYLkAjsBY8DTgEu/3T9/fsm+q73hfWY9AD1nfb5+Iz7RP7xACADkwTlBaEHdgmhCvYKzgoACosI0QY0BXUDjgHK/xH+Qfyk+p/5Oflf+db5rvra+zP97/7aAMQCmARgBqsHPAg4CI0HYwanBLcCpABi/vb7u/nv94/2lvW99Cv0IvSe9A71mvay+fX8V/+fAZ4DaAQ3BSkG+QagB4YIMAhyBvMDLwHr/jX9FPyc+xH8qvuR+n35yfjr+Jf6Vv0MADYD8AW5B1gIcQh8CLQIeAhuB2gG3ATCAq4AXf8s/qT9qv2k/WH9H/0Y/Rj9dv31/TL/jwCRATMC4gI3AyQDnwNRBLoEcQTzAwoD6gGpAEz/NP48/R38wfp9+d33kPbZ9YH15PTr9D71f/V/9776yf2M/wUC6wPIBKsF3AZXCHQJTQrICYAI7wUDA9wAS//Z/SD9r/wP+4n5K/hL96v36fma/LH/6QLFBAMGsAb6BhEH/AeNCKAIjwiYB+cF4wMSAjsAFv8K/gf9Tfyz+x37L/sR/OX8Gv6J/wkBQwKQA1cEdgReBEEETwT3A+YC/ABQ/6P99/t5+pb5LfnX+JH4SviH+AL5e/nm+fv5Evr/+Qj6oftG/gIAoAAMAg4DqgP1BFwGpweDCH4IuAYzBAkBqP7//bP9ff3F/c39Z/wH+y76LfqS+23+lwFOBPIFKgZLBmQGYQahBuAHlAjeBzUG1QN5Aav/hf6k/ZT94P0A/hH+1/3F/cD+QwDDABsBDQKOAjUC5gGJAeAAfwAPAFP/kv7V/Q793fzS/Hr8Xvx//IX8rfz4/M78tvzV/J78BfxL+7T60/kh+R/4VPjf+gH+t/+SAEkCcAO9BDEGsAfnCMUJbwm4B3YFUQITAAr/NP4P/bv8Ofzo+gD6kfns+a375v4IAu0EMwfCCPMJfAorCqsJhQmFCMAGiwT+AYT/v/1//J37cvtn+0j7ZfuZ+8P7nPw9/sT/8wDZAX8CzQLEAioCcAEoAeQAYQBy/1D+Iv2a/F/8Ivwt/Hv8wfxv/Mf7APuP+in6Bvo8+jH6JvpX+pX6SvpQ+4n+YALgBPMF2gYwBwwHeAZOBlsGbQYxBggFzwLD/4r9mvyM/M38pP2R/hb/F/+y/oX+W/+kAVQEuAYSCKAIZAhuB/kFpwQ7BNMDBAOzAW4A5/5k/WH8D/xm/PT8sf0A/hv+Ev5n/vD+bf+7/xwAhADm//7+V/4Q/hn+x/45/xv/B//o/r3+Vv4M/gH+Vv4V/oX9A/1N/Ij7Ofum+/z7APyw+8b7D/wT/BL8lvw9/p0ApwKzA5wEkAUbBk8GdQa/BhIH3wbVBYgE3QLaAHX/4f4j/qr9MP7R/hz/f/8VAMIA7wFcA+UEiQZ3BzEHMwbnBAQDLAH2/0D/pP4J/qz9Tv2+/EL8evzp/DP95v0x/y4AZAA8APP/vv+y/9v/8f+j/wf/sv54/nr9M/zl+5P8SP2X/aD9Yf37/Jr8efxv/If8/fza/YL+Pv6I/TD9KP0S/Sr9Vf0K/vn/KgJIA8YDmATCBesGXAdQB6wHFghPB4AFTwPrABv/V/4Y/sb9pf3X/TH+X/5p/vz+eQCUAm4ErgVRBj4GkwWeBIIDYAKyATEBYABK/zn+df0Q/Qr9Sv3L/f39xf16/YD9tf3F/dT9Av4k/gP+Dv4K/qr9fP0d/gn/hf+Y/2j/O/8n/9X+C/5a/Tn9Qv0Y/cH8Uvwh/I/8Nf2Q/fj9sv5w//r/YwB3AIMAFAHgAZ0CigNmBMMEKwVTBa8E7AODA+UCTQIGAoABIAELAZ4A/P/+/zwApACKASMCOgJZAjACLwF0AIIACQHKATsC2gE2AaMAdf9j/g3+Kv5Z/pL+UP7U/bj9i/2P/Qn+of4F/6b/zP9H/7z+R/7f/bn9l/00/Uj9kf2X/W79af15/Qf+x/68/iH+wf2A/dH8Cfxh+677Fv2L/jP/sv9KAMYAgAEZApoCnAMiBV8GSwd7B7IGFQYCBq0F1gQVBFIDkAKPAdv//P3S/D38wfu/+xP8sPzB/RD/EADhALYBbwJQAzAE8QSaBTwGHgYYBZADtgGz/+f9ivx7++T6rvqD+lr6UPpf+ub60vu+/Nz9M/8KAGwA8AAnAeUAlABNAND/Qv+e/uv9fv1M/Uf9mf0S/iL+BP4c/kD+U/6k/kX/2/9SAKIA3ADHAGsATQDEAFMBoQEUAoQClAJsAnwCpQLUAhgDhwPaA4oDtwIFApEB5QBAAOz/z/+o/6b/w//o/xYAhgA8Ab8B5wHYAdcBqgFLAdgAgAAjAJX/7v4o/kT9Vfy9+277UPuC+x/82vxd/dr9ev4n/7T/ZAAWAWcBWQEuAeYAXQDZ/3v/cP9v/0X/9v67/ob+hf7w/mj/1f8oADMA0P9Y/+r+5P5h/w0A0QCOAd8B5QFDAq0C6AI3A6gDBAQ6BN4D3QLcAQ4BcAAkAOX/Rf/V/sr+t/6x/vD+Mv+n/4EAGAE/AUEBKAEZAUsBPQHXAJoAagDz/1j/sv4w/jr+bP4y/rn9Xv35/L78tvzH/Cf96v2o/in/gP9b/zj/d/+8/7n/x//S/7n/tf9t//r+y/4A/07/2/9WAHUApQDzABYBJgFlAZYBzAHbAX4B4wBuACwAIwBKAD0APwCCALoAqgCaALcACgGbAfkBBQLQAXkBFQHTAIYAGwDz//3/BADy/+3/8f8VAD8ASwBeAGUAVgBbAI8AuADbAAcBAgHQAI0ASgAPAO//s/9Y//v+df7a/XT9V/1G/Vn9g/3E/Rj+W/6H/sb+MP+Y/xMAkADsACIBPAETAasASwAMABoAYwCUAI8AigBzAD4AHwAsAGgAyQASARwBBAGvACwA1f/P//L/TQC8ANcAqgB5AF8AWQBmAHEAtAADAc4AQADW/3T/Gv8k/1X/ef+t/87/0//j/9H/sv/U/wMACgAaABsA7//h/+H/pP9e/17/gv+h/4v/NP/t/uX+1/6//sv+Bf9f/57/kf9e/27/0v9UAMMACAEMAdUAiQA8APf/2P/1/zQAVQA4AO//uv+w/9v/JABnAK8A8gANAekAsACFAI4AuwDCAKEAgQBSAA8ABAAkAEQAZACRAKQAlgB3AFUATQBPAEMAKwAeAPP/o/9j/zD/5P60/un+Xv/X/1AAugDzABABBwHTALEAwQDlABEBDgGjAB0Ay/9u/wn/1v7a/g7/Sf8s/8r+kv6P/rT++P42/3v/8f9lAJsAwQDdAOgA+wARAQgB6QDOAKYAeAA8AOD/i/9j/1P/UP9n/4T/rP/p/xYAFgARABQAGQAmACAA9P/P/73/k/9T/w7/w/6k/rH+pv6I/oz+rP7m/jD/Wf+B/9//SACJALIAwgDJANwA5wDbANAAuQCZAIkAbwA1AAwAFgA8AGcAlgC+ANMA1QDbAOkA5gDPALgAqACaAIcAdgBsAGoAZABXAD4ADgDl/9n/0f++/8b/8v8ZADUAVwBsAG0AbgB3AIkAogCoAJkAfQBKABAA8P/e/8L/s/+e/1r/7v56/iT+Df4o/kH+Tf5U/lT+Vf5X/mn+uv5G/9P/OgB0AIUAjACYAKQArwDCAMAAmABbABIA0P+9/9v/EABZAKwA7wAdAS4BIgEkAUUBXQFlAWcBQwHkAGsA+/+l/3L/Wf9h/4X/mP+K/3//kf+6//v/RQCEAMIA+gASAQEBzwCRAFoAJwDo/67/iP9i/zv/Ff/x/tT+zv7u/ib/V/96/57/s/+x/5z/eP8+//7+3f7g/ur+7P72/hT/MP9D/1n/dP+R/7z/+P8fACcAMwBaAHsAcgBVAEYAUABnAIIAmACwANMA/gAkATMBOwFYAX0BjgGGAW4BTwEyARkB9gDOAKwAkgB/AGYAOgAPAPn/5//R/7//t//C/9f/3P/P/87/4f/1/wIABAAEAAwACQDt/8D/mP98/2j/WP89/x7/Bf/i/sD+uP7E/tX+7v4D/xD/F/8S//z+7P7y/g7/Nv9c/3T/iv+c/6H/l/+N/4z/nv/C//H/HgBMAIQAwQACAT4BcwGiAcABxgHAAbYBngGDAW4BXQFKATEBCQHdAK4AfgBUADsAKwAiABoACwD1/+P/3P/b/9z/4//w/wYAGQAdAAwA6v++/43/W/8r/wz/Cv8a/yz/Mf8i/wn/8f7k/t7+1P7E/q3+jP5o/kb+L/43/l/+m/7W/gj/Mv9Y/37/of+//97/+f8RACUANQBLAHIAnwDFAOcACQExAV0BiAGqAcwB7QECAgAC4wG6AZYBcwFHARsB/wD5AAcBIQFAAWIBhgGbAZ0BkgGCAW4BTgEcAd8AngBXAAQAtP90/0n/JP/8/tj+yv7R/tr+3f7l/vP+/f71/tr+wv7C/sb+t/6U/mv+R/4n/v39z/25/cX93/32/RL+Qv6K/tz+J/9v/77/DwBOAHMAfgB/AHkAcQBoAGMAaAB1AIcAnQCtALcAwQDQAOIA8gAFASEBSwF3AZoBrwG5Ab0BvQG0AaUBlgGFAW0BRAERAeQAxACvAJ0AhwBuAFkARQAqAA0A8//e/8z/tf+U/3X/X/9P/0D/LP8a/w7/B//1/tn+wP6s/pv+iP52/mz+cf6B/pH+nf6q/rz+1f7w/gr/Jv9G/2f/hv+e/7L/y//u/xUAOABQAFwAYgBnAGsAagBuAHsAkQCtAMcA4QAEATABYwGOAaUBqgGiAZYBhgF2AW4BdwGHAYUBZAExAQMB6ADZAM8AwQC2AK4AnQB6AEsAIgASABMAEgD//9//vP+Z/23/O/8N/+7+4f7V/sD+pf6U/pb+qf7B/tf+6/77/v3+7f7O/q/+nv6c/p/+nv6c/p3+q/7C/t7+Af8x/2v/pf/V//j/GAA8AGoAnwDQAPUADQEaARsBDAH0ANwAzQDKAMwAzwDNANEA3gDzAAoBIQE7AVgBcgGCAYYBfwF0AWIBSAEkAf0A3ADBAKoAlwCEAG0AVQA4ABkAAgDv/93/y/+1/5j/dv9P/yv/Df/6/ur+1v7A/qj+kP54/mX+XP5e/mr+eP6G/pL+nv6r/rj+yv7h/gX/L/9Y/33/nP+4/9T/9P8YAEAAZwCDAJQAmgCdAJ8ApQCvAL8A0QDhAPIAAQESASMBMQE8AUIBRQFHAU8BVQFXAVEBRAEuAREB9ADcAMsAvgC0AKwAogCWAIcAeABnAFUAQgAoAAgA4f+3/4//af9I/yn/Dv/6/u3+4/7d/t7+4v7r/vX++/78/vf+8P7n/t3+1v7X/t/+6P7r/uv+6f7l/uD+4/7s/vv+Ev8w/0//cf+U/7v/5P8RAEAAbgCbAMQA5AD7AAkBDgEMAQ0BDgEWASEBKwEvAS0BJAEYAQ8BBgEDAQUBBwEGAf4A8ADiANUAyQDBALYApQCOAHEAUwA5AB8ACgD1/9//x/+w/5v/jP+F/4D/fP9z/2P/Tf81/yH/Ff8P/xD/Ev8U/w//Bf/8/vP+8P7y/vX++P72/vL+8P71/gn/J/9P/3j/mv+x/8P/1P/p/wQAIAA4AEYARgBAADsAQABUAHYAngDAANcA4gDoAO0A+gAMASMBMwE7ATUBIwEIAe0A1AC/ALQArACpAKcApQCeAJcAjwCHAIEAfABxAF4AQAAWAOr/uv+R/27/Uf86/yb/Ev/9/uz+4f7d/t7+5v7y/vz+Bf8O/xn/J/83/0v/YP9v/3n/ef90/2v/Y/9c/1r/XP9j/27/e/+J/53/sv/M/+r/CQAoAEQAXgBxAIQAlACkALUAxwDXAOEA5gDoAOoA6wDuAPIA+QD7APoA9wDxAO0A6QDnAOMA2QDKALUAnACDAGwAWQBHADUAIQALAPT/3f/K/7r/r/+m/53/kP+C/3P/ZP9Z/1D/S/9K/0v/Sv9I/0L/Ov8y/y3/Kf8p/yz/Mf80/zX/Nv87/0T/Uf9j/3T/hv+U/6H/rf+8/8z/3v/t//z/CQARABsAKQA8AFMAbQCGAJ4ArwC8AMgAzgDTANYA1wDZANoA2ADXANIAygDAALUAqwCfAJUAigB/AHIAYwBSAEAALwAhABgAEAAKAAQA/v/1/+r/3//T/8j/vf+x/6T/lP+F/3H/XP9J/zf/Kf8i/x//I/8q/zP/Pf9H/1X/Zf93/47/pP+5/8n/0v/T/9P/1P/U/9f/3P/h/+T/5P/k/+P/5//y/wIAFgAsAD8AUABgAGsAegCLAJwArAC5AL8AwQC+ALgAtACvAK4ArwCtAKkAoQCZAI0AggB8AHYAcwBxAG0AYwBUAEAAJgAPAPr/5v/X/8r/uv+q/5j/h/95/3P/cf9y/3b/d/92/3P/bP9p/2f/Zv9n/2f/af9r/27/df98/4X/jP+S/5n/oP+p/7T/wf/O/9b/2//c/97/4f/n//L///8OAB0AKQAyAD8ATQBdAHAAhACVAKIAqwCsAKsAqQCoAKgApgCiAJsAkwCKAIIAfgB+AH4AgAB8AHYAbQBgAFMARQA4ACwAHwAQAAAA8P/h/9b/zv/J/8X/v/+4/67/oP+U/4j/ff90/2r/X/9T/0n/P/81/y//Lf8u/zT/PP9I/1b/Z/95/43/of+2/8v/4P/y/wEACwATABYAFQAWABgAHgAoADEAPQBIAFMAXABnAHQAgwCTAKIArQC0ALUAsQCqAKUAoACeAJwAlwCPAIIAcQBhAFYATgBMAEsATABNAEYAPgAzACkAIAAXAA4ABAD3/+j/1//G/7X/p/+c/5P/kP+Q/5L/lf+W/5T/kf+K/4T/fv97/3n/eP96/3v/fP97/3z/fv+B/4b/kf+g/7D/v//O/9v/5f/s//T//f8IABUAIQApAC8AMAAvAC0ALQAvADQAPQBGAFAAWABgAGMAagBuAHEAdQB4AHcAdgBzAG8AaQBgAFcATAA+ADEAJgAdABYAEgANAAgAAQD4/+//6f/n/+f/6f/s/+7/7//p/+H/2P/P/8j/xf/F/8b/xf/A/7r/s/+t/6n/qf+v/7X/u//A/8P/xP/E/8n/z//V/9z/4v/o/+//9f/7/wIACAANABEAFQAaAB8AJgAqAC8AMgA0ADEALgAtAC0AKwAsACoAKAAkABsAEgAMAAcAAgAEAAQABgAHAAkACgANABMAGQAiACkALgAwADAALQAqACcAIgAdABYADQADAPf/7P/j/9v/1f/R/83/yP/D/8D/vf+8/8D/xf/L/9D/0f/S/87/yv/I/8n/yf/L/8//0v/T/9T/1f/Z/+H/6//0////CQAPABQAFwAaAB8AIgAlACYAJAAhAB4AHAAbABoAGQAZABkAFgAXABgAGgAbAB0AIAAfAB4AHwAgACEAIgAhACAAHAAWABIADQAKAAcABQAAAPr/9P/v/+z/6v/s/+z/7f/r/+j/5f/j/+T/5v/q/+3/7P/t/+j/4//i/+H/5P/o/+v/7P/s/+j/5f/k/+P/5f/p/+z/7P/s/+v/6P/p/+v/7//z//n//f8AAAIABAAGAAcACwANAA8AEQATABUAGAAbABwAHgAeAB4AHQAcAB0AHQAeAB0AHAAbABgAFgAWABUAFQASAA8ADAAJAAUAAwACAAAA/v/7//j/9P/y//H/8f/y//L/8v/y//H/8P/w//H/8//z//P/8f/x/+//7f/r/+v/6f/n/+f/6P/r/+z/7v/u/+7/8P/x//X/9//6//z//v/9////AAACAAQACAALAA4ADwARABEAEAAPAA8ADwAPAA8ADwANAAoABwAFAAQABQAEAAUABwAIAAgACAAJAAoACwANAAwADAAMAAkABgAFAAIA//8AAP/////+//7//P/7//v/+v/5//f/9f/0//P/8//y//L/8f/x//H/8v/1//f/+P/6//v/+//5//z//v///wAAAgAFAAUABAABAP7/AAAAAAIABgAKAAwACQAFAAQABQAGAAgACQANAAwABQADAAQACAAJAAoADAAPAA0ACgAKAA0AEAARABAADQAMAAkAAwADAAcACgAHAAIAAQABAP///P/7/wEAAwD+//v/9//0//X/9P/z//f//v/9//j/8v/y//b/9v/x//j////6//X/9f/1//T/9v/0//b/+P/2//H/7f/u//D/8v/y//b//v/+//X/9v8AAAMABAAIAAkADQANAAYABQATAB8AHAAaABoAHAAgABIABwAVAB4AFgAQABkAIQAeAA4ABAAOABkAGwAdACMAJwAaAAEA+v8HABQAFwATABcAGAD//+P/4v/x/////P/v//z/BwD3/+r/7f/+/wwAAADz////CwAFAPL/6P/u/+r/1f/L/9f/8v8CAPn/8f/4//n/6P/e//X/GAAmAB4AHAAgABUA+//s//T////+//z/AAAFAAEA/v/7/wEAEAAdAB0AHAAdABoADgABAAMADgARABAAEgASAAwABwALABAAEAARABcAGAAPAAgADAANAAYA/f/6//j/9f/2//z//v/+//v/+P/y/+z/8P/5//7//P/4//v/+//5//j/AwASABcAEwANAAYA+v/w//H/9//3//T/9//7//f/7//s//D/9f/5//n/+/8FAA4ADAAGAAUACwALAAIAAQACAPn/9P/0//n//v/9//z/BgAOABQAFwATAA0ADwAHAP7/BwAQABMAEQADAPj/7v/j/+v/BgATABIAEgAQAAoA/v/4/wYAEQADAO//6P/u//D/8P/8/xEAGwARAAkADwAVAAkA9f/w//r////1/+7/9f/1/+b/4P/w/wUABQD5//D/7f/o/+7/BQAaACMAJAAhAAsA8P/q//X//P/+/xIAJwAaAPX/4f/j/+b/6/8EACIAJgAVAAMA8v/l/+X/8//9/wIAEQAeAAsA6f/i//P/9v/t/wEAMwBDACAAAwAEAP//6f/s/xQAOwA4ABEA8f/Y/8P/vv/M/+P/AgAPAPv/4//c/+X/7//0/wgAKwAyABwAEgARAAEA5v/a/+X/8f/p/+z//////+r/5v/1/wIACgAYAB8AHAAPAAMAAAD6//P/+P/+//j/+P8LABwAHAAXABgAEwADAPb/+//9//f///8HAPn/7v/1//T/6//y/wkAFQALAP7/AwABAOv/7f8EAAgAAQAMABIABwD//wIADAAPAAcACAAPAAsA9//w//D/7v/w//L/+P8PABkA/v/q/+//7//k/9n/5P8DAAoA7f/o//z/+//p/+7/CQAfACEAFwALAAAA6P/V/+b//v8QACwANAAiABUADwAIAP//AAAWABsABQD2/+T/1v/h//b/+//v//D/CQAFAOP/5v8BAAEA+P8EAB4AIwAJAAMAEAAPAAUA///4//H/7v/z/+//5P/3/xIACgDw/+T/8v/9/+j/5/8OABAA8//2/wYABgD7//n/EQAkABAAAgACAPf/+v8PABQAGwAqAA8A5v/a/9j/1v/e/+7/FwA1AAwA4//w//f/9v8QACQAKwAkAPT/zv/L/8L/2v8fADgAMwA9ABcA1//P/+f//v8SAAgACQAlAA8A3v/3/yEAFQAVACQAFgD8/+P/0//w/wMA3v/d/wEA+P/q////BQALACQAIAAHAAgAFgAWAPr/3//m/+v/0v/F/+r/GwAeAP3/BgAbAPf/0v/d/wEAKgAhAPX/BQAZAOP/zP/7/yYAPwAxAP7/+/8PANv/tP/r/xgACQD4/+b/4v/4/+f/y//6/zUAMAAVAAoAEQAaAAYA8f8BAA8ABwD+//L/6////xgACQD9/xEADwDu/+v/DQAiABUAAAARACIA7P/C//b/KwAWAPz/AQAPAAMA0v/D//X/EwAGAP3//f8GABAA7f/R/wYANAAfAAgAAwAAAPL/xP+r/+H/GgAgACcAKgAgAB8ABgDo//j/DAD7/97/yP/R//j//P/y/x0ATQBHACAAAAD///7/4v/l/wkA///e/+H/8////w8AGwAkADAAIwD//9v/zP/j/wkABQD4/xcAKQAJAPf/AwD///L/8f8GABkABADk/+n/8//w//b/9//5/wkAEAAZADwAQAAMAOr/8//4/+v/3f/u/xwAKQD7/9H/zv/V/+v/FQArADIAMAABAMn/xv/c//H/CwAjADgAQQAcAOr/4//1/wEAGgAjAPr/3v/o/9//1v/s/xEAMwA0ACMAKAAeAPH/5v/p/8n/xP/q/wYACAD7//n/GQAxACYAHwAUAAcABwDt/9b/6f/p/8b/y//8/x4AEwD9/w0AIwAQAAUAFwANAPf//f8AAPn///8GAAIA+v8FABsACgDc/9z//////+b/5P8BABAAAADu//n/DQAVABYAJABAADUA/P/e//D/8v/m//X/FgAbAAAA7P/0//f/8/8DABgABgD+/x0AHwD5/+X/+v8MAP7/9/8jADcAAQDv/wQA6//Y/+v/9//7//X/6f/m/9P/w//t/xQABwAIABsAHgAjACoAHwASAPr/3//q//z/8v/m//j/DgD//+r/7v/o/+n/FAAjAAYA/v/o/7r/w//v/wgAIQAxADMANQAsABcACQD//wgAGAALAP//8v/L/7r/x//I/93/BgAbADEAJwDw/+f/CAAAAP//LABCADEAHgASAAkACAD//+j/5v/+//r/3v/V/9v/4v/q/+3/AgAsADUAJAAoABoA6//c/+n/+v8UABkA///6/wEA6v/o/wUAAgAFAB4ACwDk/8r/rv/N/woA+P/1/zQAMAD8/wIACwD2//L/9P/2/wUAAQD1//z/8f/g/wMAJQAJAOj/AgAkABYAAQAZAC8ACQDF/7T/1P/z/wgAGQAXAAoA7v/M/93/CQATACwATAAzAAUA6v/X/8n/vv/r/z8ALADn/wUAEQDM/9P/HAA6AEEASgBGAA0Alv9x/8j/+P/r/xQAPAAMAMr/xP/l/wgAGgA1AGkAawAEAKT/rf/F/8T/5f8dACsAGAALAP7/3f/F/+b/JQA7ACoAIAAZAPj/3P/r/x0APQAuABsAGwDy/63/nP+9/9//8//2//X/AwAaAC0ALwAZABcALgAtABAADQAhAAYA1P/g/yIAMgD7/+H/AQD7/7//pf+8/9L/3v/m/+//BwAmADQALwAYAAkAFgAEAOb/CwAqAPv/4/8NABYA9//n/+b/4P/T/9j/7P/m/+r/GwAuABMADwABAND/zf/8/xIAFgAUAAIA9//x/+T/7P8CABcANAA5ACYAJAAOAOH/2f/v//X/+/8WADAALwAPAPX/9f/r/97/+v8lACcADQD9/+7/2P/a/+//AwAqAEIAJQACAP7/9f/V/7z/6P8vABcA3//s//D/yv/K/+L//f8mAC8AHQAbAP7/zP/M/9b/8v89AEsADADt/+P/yv/S/+//FgA5ABIA1//p//H/zP/l/xAACAAeADwAIwAMAAgABwAlADEALQBSAFIAEAD8/xEADwAFAAQAJABQACcA1//K/8v/sv/D//j/BwD9/wsAKwAsABQAIAA1ABAA8P8JACsAIADz/+r/DQD8/8D/1v8FAOv/2v/0/+L/vP+t/5b/ov/U/+P/9P8jABsA9P8BAAUA7P/y/wAA/P8EAPb/1//b/9b/vv/p/yAAEgAGABIACgABAA4AIwA1ACYAEQAsADUAFAAMABEACgARABwAHAAFAND/u//n//7/9f/3//H/7/8LABoAKABOAFUALgAUAB4AIgAAAPT/HgA3ADQAOAAsAP7/0//K/+z/AwDw/+f/5v/I/6n/n/+s/97/DgAiAEoAYAA1AAcA/f/2/woANAAxABgAFgASAAQA9P/l//T/AADi/9//GAAlANz/rP/P//7/CAAfAFkAeABYAB8A9v/a/77/v//n/wMADQATAPn/zP/G/9//6//s//f/FwAyABEA4P/1/yYAGQD6/yAAXwBnAEgAKQACAO3/7P/y//b/4v/L/9X/3v/2/yUAEwDf/93/4//o/wgAJgA8ADwAGAAQABcA8//x/yUAJQAFAP7/AAACANj/n//a/yEA8//o/x4AJAAPAO//4v8iABkA1v/z/w0A/v8MAAAA8//7/97/3/8QAAEA3P/4/w4AAAASACcAFgAGAPv/+v8YACAAEAAlAB8A5P/Z//z/BQAHABwAFwAJAAkA+P/h/9v/1f/p/xoAFwAMACkAJAAAAOj/0v/n/xsAKwA0AE4AMADq/8n/0P/W/9T/4P8FABEA6//D/8r/3P/V/9z/EwBOAHgAigBeABoA7P/E/7D/yP/y/y4AWwA7APz/1v+s/4b/kP/c/0IAZwAyAA0ACQDg/73/3v8cAD4APwBFAEIA8/+R/5H/xf/X//j/QgBnADgA4f+1/73/uv+5/wIAWABJACEAHAAEAPr/KgA9ADQAMQAIAOv/0f+V/7b/GwAFAND//f8LAN7/zP/h/xIAJgALAB0AOAAGAOD/8f/q/+D/AAAZABYACgD0/97/1f/e//n/EwAfACYAEQDs/+L/3//Y/wAANwBBACwADgD4/wEABAD4//v/+f/1/wkACQD2/wAACAD9//r/7//f/+z//v8OACIAEADy//P/5v/j/xQAEgDe/+//FAAPAAUA/P/5/wkACAAIABEAAQD7/xcABwDc/+j/CQAFAPH/6f/u/+v/5f///zEAPQAcAPD/zv+9/8P/2v8RAEsANwDx/9X/y/+1/8r//P8XAC0AUwBXABcAzP/E/93/yP/A/wEAOQAgAOz/6f/9/+n/3/8BAAkACwBIAFwAEADe/9j/uP+m/9P/FwAxAAUA4P/w/+3/z//Z/+//DAA+AE8AOQAgAAUACQAOAOX/3v/6/9z/sv/A/9f/5//u////MQAsAND/sP/1/ywAMwBAAEQAIADh/8L/yP/B/8P/GQB/AHIAGgDs/8D/cf9o/8n/GQARAPL/+//+/8P/m//d/y0ARQBlAIMAXAAFAK7/d/+L/87/CQA9AGEAUgAhAOP/rf+r/83/7/8yAGQAPAAIAPz/1f+z/7v/w//n/ywAPAAuAC4ABQDV/9//9v8KADMASwAwAPb/zP/U/9T/wP/5/08AQgAZAB4AIQACALv/lP/X////+P8/AGAA///a//b/0/+9/+n/HABEADIA+v/f/7r/lf/Z/0AAVgA7ACYADwDt/8D/oP/A//n/GAA+AGgARADk/7f/xP/X/+v/EQBAADQA8P/R/9b/wv/J/w4AQgBDABkA7//x/+//3////yEAIgA0AEYAMAAJAOL/1//q/+f/7v8lACgA6P/Y//b/9P/k//z/GwAQAPT/6v/t//D/8/8GACQALAAfABIA/f/r/wEADgD2//7/HAAIAOr/3//H/87//f8OABMAHQAYABAABADy//D/4P/N/wsAWgA6AO3/6v8LAP7/3//d/wMAOwBIABIAzf+v/8P/4P/y/x4AagCAAD8A+P/o//P/5f/W/wIAPgBBACEACgDy/8b/n//D/yMAQQAgACgAPwANAMD/qv/X//r/AAAgAEIAJwDz/9L/1v/7/yQAKAAHAN7/0v/d/+D/4//b/8T/y//w/wgAGwASAOr//P8WAN3/wf/r//j/6v/k//H/KAArAN//2/8AAOH/2v8NAC4ASABCAAkABQAWAP7/EQA1AEEAXwBFAN7/wv/J/6v/zv8BAB8AdQCFACYAFgAjANP/qP/O/wYAQgA6AA8AIwAlAOX/z//2/xgALwBIAEIA6f+U/7j/6f/K/9r/LABcAGsASgD2/8T/rv+U/63/5/8rAGcAUQAuAD4AIwDm/87/2P8FAA4Arf+b/wUAGwDn/+3/JABeAEsA+v/4/xAAyf+b/8L/0v/l/xsAMwBHAFEAKwAAANv/zf8PACcA0/+//93/vf/M/xsAQgA7ACoAKAAvAA0A7f8IAAQA8f8lAEQAJQAOAAUAEQAKAL3/n//q////y//N/+n/9//+//b/EgBGADYACwAIAPr/8/8wAEkAEADm/+//DgATAOj/1//7/+z/vf/f/wsA8//p/wsAGgAKABEAJQASAPj/EwAjAAAA+P/9/9n/xv/r/xcAMQAiABEAIwD7/7v/4/8CANX//v9JADkAIwAbAPv/9//4/+z/DAA3ACAA8v/p/+T/0//l//T//f9DAHYAOgD5/+r/1f/E/8D/z/8QACwA/f8EAEUANwD7/xQASQAaANH/6/8MANr/xf8FACIA+//a//H/DwD0/8r/zP8CADUAFADP/+D/BwDo/9b/8/8NACAAFwDo/83/5f/0/9f/1v8cAG8AZADs/6r/3v/y/8n/1/8gAGEAbwBEAEgAXQAlAOP/1/8CAEMAOQDy/9j/5P/l/9f/0f/4/yAAMwBuAHsAHgDT/9P/8v8GABIAPgB5AE4A5/+7/7H/tv+u/5r/yv8BANz/vv8BABkA7v8LAFcAYABCACEA3f+l/5b/rv/t//n/3P/0/wkA9//x/9z/xP/W/wEADgD+/wEACQD5//X/9v/+/yoAGQDC/8L/EwAyACQAEAD7//7///8TAEcAQwAVAC0ASgACAKr/pf/W/xkAOgBIAIcAoABUAAcABADe/4L/sP8uAFYAYwBpACAA0f++/5r/mv/p/w0ANQBGAPz/yv/W/77/jP/L/zgAdgCFADUABAAFAKn/Sf+T/x0ANQApACsAJADz/5z/j//B//f/FQAkAEQAPQD4/8L/uP+v/83/GgBgAJAAdQAzAAYAxf+n/8T/xP/R/xQAHwAGAAAA2//T/9v/uv/H/w4ARQBvAHkAQQAGAOP/yv/N/8v/1/8lAEkAHwAHAOP/rf+g/6r/4v9RAHAASABdAEsA1v+K/5X/zv8sAGUATwBGADEAuf9h/43/yv/4/yoAQQBUAFYAEADb/97/5P8JABMA4P/t/wQAvf+R/8T/HABXAEMAKwBYAGQAIwDi/9L/6v/5/+L/0/8AADYAIwDz/+L/7v8AAOL/0v8WAB4A3f/z/ysAMwAgAPz/IAB0AFIA4/+V/23/eP+W/67/8/8xADsAQwA8AB8AFQD5/8D/yv8rAFgACgDL/wAAIwDu//j/PgA7AAgA3f+z/7D/rf+S/8//NwBHAD4ALgDu/+7/KgAXAOv/7v/8/w8ALgAeAOn/2v/9/xgADQAIACQAGgDs/+D/3//Z/+H/5f8UAFQAHADf/x8AJQDO/7//1//i////IQBKAEsA9v/Q//7/9v/m/ykAXwBAAPn/zf/Y/7//ev+8/1YAZQAbABEACQDN/7z/7f8QACoAYAB7AE4ABADT/8z/y//R/wMAQABAAAcAzP+p/4r/jf/W/ysAVgBWADMAEgAGANj/tP/w/zoAVABiAD8ACQD5/8v/k//A/wYAHAArABUA3f/P/8H/of+9/wQANwBNAEEAGQAFAPX/1v/M/+P/FwBXAGYAOAAVAO3/pP+A/5n/zP/y//7/DgAZAAEA6v/3/xEAIQAkAEUAawA6AOz/4f/m//L/HQAqACcAJgDv/8T/7v8BAN3/z//W/+3/EQAHAOr/GgBeAEYABQABABkA/v/S/+P/HgA0AC4AJQANAOn/1v/L/7z/yf/q/9//uf+//wMAPAAbAOH/9v9HAIgAdwAUAOX/IwA6AAAA5P/2/wcABwDx/wYAHgDD/2r/mf/a/+//DgAcABoAFwAPACIAKQAYADMAOAD//+L/zf+h/7H/5f/3/xIASABDAO//x//s/+z/0f8DAFAAVQA6ADoAKQDr/6f/ov/W/wMAGwA0AC4ACwD3//n/DQARAPH/+P8oABYA4v/g//n/6P+9/9b/MgBcADwAFAD6//b/4v+l/5D/y/8PADMAQQA7ACgAFgAPAAkAAQD6/+v/0v/Q/9v/4v/9/y0AVABfADgAAwDt/9//z//W//b/BwD7//n/AgDh/7H/qP/T/x8ASwA3ADAAOwANANH/1v8CABgAMgBgAFsA/v+l/5n/t//L/+T/HABMAD4ADwD7//L/0v/K/wEAQwBkAGAANADx/8j/xf/J/8D/1v8WAEQAQAAfAO//xP+7/+f/IwAwABkAEgAMAPP/6v/w/+z/6P/4/xsAJwAGAOL/0//W//3/IAATAAsAFQAHAO7/5//x/wIAAQD9/xQAFAD1/+P/3v/9/0EAWQAtAPj/x/+x/7X/vP/t/0gAewBtAD8A9P+4/6P/nv/E/wsALgA/AD4AAwDO/8b/0P8MAFQAZABVACUA4P+9/63/tf/u/yIAPQBSACwA2/+1/7z/z//j//v/JwBSAEAAFgAdACIA8P/T/+f//f8TABwACADw/9z/1f/4//z/yf/J/wUAIgAbAAMA7v8BAAoAAgAmAEUAKgANAPr/5P/J/7D/w/8RAEMAPAAgAP3/4v/M/7v/0/8DABwAKwAyAB8ABwDw/+f/BAAkACYAIQAKAN//0//S/73/zP8CACsAOwAtABYAEAD8/+7/EgAjAA0ACAAGAPL/3v+5/6j/0P/j/+v/JAA3AA0AAAANABcADwDt//v/LAAgAPz/9f/c/9j/+f8DAAoAHAAFAAIAJAANAN3/1v/i/wMAKAAaAPP/5f/t/wsAHwAKAAcAKAAyACQABQDQ/7j/1f/t//r/EgAbABEAAQDu/+P/5v/x/w8AQABVAEkAFwDW/87/8//8//P/CwAkABAA4f+//8X/5f/6/xYALwA2ACkAAgDR/8f/6f/2/+7///8PAAkA//8BAAYADAAMABUAMgAoAO7/1//V/8D/1P8OABUA6f/f/wsALgAVAPL//f8SABEAGgAVAOn/1P/w//7/8//y/wQAGgAFANT/1/8JABcAGQA5AEcAOAAcAPv/7f/Y/63/tP/x/xcAFQD7/+b/AwAkAA8A/P8IABMAKAA0ABkA/P/n/9n/7/8CAOz/3f/e/+D/+/8VAAsADgAqACQAGAAvADAAEAAAAN//r/+p/8D/4/8aAC4AHAAqADQABgDc/97///8pADMAHgAMAOL/rP+0/+T/BgAwAFQASwAiAOP/uP/N//P//v8GABsAMAAWAM//r//c/w0AFAAaADIAKwD//9z/zf/H/9D/4P///ykAPAAxABMA8v/9/yMAGQD9//j/BAAdAB4A///1//b/5f/m/+v/4v/v//j/7P/v/+3/7/8VACsAHgA1AFIAPQARAOD/zP/e/9L/tv/b/wwACADy/+j/+/8iABYA8v8TADwAKgAWAAcA9f/0/+r/3/8AAAsA4f/e//j/AwAVABUAAwAQABUA8//g/+T/7v8AAAcABAAZABgA6f/T/+b//f8ZAC4AKwAfAAYA7P/t/+//5//8/xgAEgD///D/7f8CABAAFQAnADIAHgD+/9r/xv/S/+D/7v8JABkAEgD9/+j/8v8PABYAGQAlAB8ACwDv/9P/0f/j//3/GgAqACgAGwAFAPP/7P/i/93/6/8AABEADwD5/+v/7f/4/xMAIgAaABcAIAAbAPP/xv/F/+T/9P/1/wkAJAAlAA4A/v8IABMABQDu/+v/9P/t/93/3P/v//3//P8AAA4AFQARAA8AFwAeABEAAAD8/wEABgAQABIAAgDt/97/1v/M/8n/3/8LACkAKAAhAB8AFAADAP//BQAJAA0ADwAFAOb/xv/B/9b/6v/4/xYAMQAwACEAFgAKAPf/6v/z/wUACQD9//r/AgAAAPX/8f/2//X/8f/0/wcAGgAaAAwAAgD+//r/9v/z//X/AgAWACEAFAD6/+7/+f/8/+r/4//5/woAAQD4////BgD+//v/CwAeAB8AFAAQABAACwD6/+T/2P/i//n/BAD8//L//P8NABMACgAJABUAHQAaABEACQD2/+P/3f/n//X//v8JABYAFgADAOr/2//c/+T/9f8NACAAIwASAAEA/P/+//j/8//8/w0ADQD6/+T/2//c/+b/+v8TACQAJgAdAA0A+v/t/+X/5//v//z/AADz/+T/6f8DABwAJwAtADEAKgARAPH/4f/i/+n/8//+/wYAAwDz/+b/7f8DABQAEgAMABEAHAAUAAEA+/8GAAsACAALABEABQDw/+3/+P/2/+z/8/8JABkAHQAeABcAAADm/9j/zv/G/9D/8P8PAB0AIgAeABAABQAMABsAFwAIAAEA/v/p/8z/wf/R/+T/8v8JACQAMQApAB4AFQAGAO7/4P/q/wEADAAJAAcADgAOAPj/2//c//r/EAATABgAJQAhAAIA6v/o/+P/2f/i/wEAFgAUAAwAEQATAAsAAwABAPz/9v/3//7/BwALAAkA+f/o/+n//f8EAAEABQAVABcABADx//D/+f/9/wQADwAPAPv/6f/n/+//9f/8/wsAHQAkABoABwD3/+z/5f/k/+7/+f8DAAcADQAXAB4AHAAUAAsAAQD8//z/+//4//f/+v/9/wAAAgABAPz/+P/6////AQD8//v//f8CAAcADQAQABAACQACAP7//v/6//H/7P/y/wIACgAIAAIA/P/0/+3/7v/5/wQADAAUABwAHAAOAPj/6//q//X/AwAKAAkAAQD0/+r/4v/g/+b/9f8LACEALAApABkAAwD0/+//9f/7/wEABgALABAADwAKAAAA9//2//3/BwAOAA4ABwD+//j/9f/z//D/8//9/w0AGQAeAB4AGQAMAP3/8f/s/+7/8f/6//7/AAD+//v/+v/8/wAABQAJAAsACQACAAAA/v/+//7//f/8//z/+f/5//v//v8BAAUABQAEAAEA//8BAAQABAAEAAAA/P/2/+7/7v/y//v/BwAPABQAEwAPAAUA/v/6//v//v8BAAIAAwACAP3/9//2//r/AAAEAAYABAACAP///P/+/wQACwAOAA4ADAAGAP//9//x//D/8f/3//7/AwAIAAkACQAGAAMABQAIAAYAAQD///7//P/3//f//v8EAAUAAQAAAAMAAwAAAAAABQAEAP7/9f/0//j//P/+/wQADQAOAAkAAAD8//z//v///wIABQAFAAAA+f/0//P/9v/7/wEABwAKAAoABwAAAPz/+//7//z//v8EAAgABwACAPv/9//0//L/9////wgADQAMAAgAAgD+//r/+f/8/wAAAQD///r/9//2//j//P8DAAoADwAQAAsABQD///z/+P/3//v//v8AAP//AAACAAMAAgACAAIAAgACAAAA//8AAAEAAQAAAAAAAQABAAEA/v/+//7//f/7//z/AAADAAMAAgACAP///P/7//r//f8AAAIABQAGAAgABgAEAAMAAgADAP///P/+//z/+P/3//j/+//8//3/AAAFAAYABQADAAIAAwAAAP////8BAAEA/v/8//r//P/9////AAACAAQAAgD///7//v/9//3//f/9/////////wAAAgACAAEAAAD///3//P/9////AQACAAEAAAAAAP7//f/9/////v/+/wAA/////wAA//8AAAAAAAD//////////wAAAQABAAAAAAAAAP//AAABAAAAAAAAAAAA/v////////8AAAEAAgACAAMAAQABAAEA////////////////AAD//wAAAAABAAEAAAAAAP/////+/wAAAAAAAAAAAAAAAP///////wAAAQAAAAAA/////////v/+////AAD////////+//////8BAAAAAAAAAAAA/v/+/////v8AAAAAAQABAAAAAAD//wAAAAABAAIAAQABAP////////////8AAAAAAAAAAP//////////AAD//wAA/////wAA//////7//f/9//7/AAAAAAAAAQABAAAA//////////8AAAEAAgACAAIAAQABAP///////wAAAQABAAEAAQABAAAA//8AAAAAAQABAP//AAD//////v///wAAAAABAAIAAQAAAP7//v/+/wAA/v///////////wAA///+/wEAAAD//wAA//8AAP////8AAAEAAQD+/wEAAAAAAAEAAAAAAP///////wAAAAAAAAAAAQABAP//AQAAAAAAAAAAAAAAAAAAAAEAAQABAAEAAAABAAAAAQABAAEAAQAAAAAAAAD//wAAAQABAAEAAgABAAAAAgABAAEAAQAAAAEAAQABAAAAAAD/////AgAAAAEAAAAAAAEA/////wAAAAAAAAAA/////wAA//8AAAEAAQABAAEAAAABAAAAAAABAAIAAQABAAIAAQAAAAEAAAAAAAAA//8BAAIAAQABAAAAAAAAAAAAAQD//////////wAAAAAAAAAAAgACAAEAAQABAAAAAAAAAAEAAAABAAEAAQABAAAAAQAAAAAAAAABAAEA////////AAAAAP///////////////wAAAAD/////AAAAAAAAAAAAAAAAAQABAAAAAAAAAAEAAQAAAAEAAQABAAAAAAACAAMAAgABAAEAAgABAAEAAAABAAEAAAAAAAEAAQACAAEAAQABAAEAAgABAAEAAAABAAIA//8BAAEAAAACAAMAAgACAAIAAgADAAIAAgABAAEAAQAAAAEAAQABAAEAAgABAAAAAQAAAAAAAQD//wAAAAD/////AAD//wAAAAD//wEAAQAAAP//AAD//wAAAAAAAP//AAABAAAAAAABAAAAAAAAAAAAAAD//wAAAAD/////AAABAAAAAAAAAAEAAAAAAAAA//8AAAAAAAAAAAAAAQAAAAEAAAAAAAEAAQAAAAAAAAAAAAAAAAABAAIAAgABAAIAAgACAAMAAgABAAIAAQABAAEAAQABAAEAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAABAAEAAAAAAAAAAAAAAAEAAQAAAAEAAQABAAEAAAABAAAAAQABAAAAAQAAAAEAAQABAAEAAQACAAEAAQABAAAA//8AAP//AAAAAP//AAAAAP//AAD/////AAAAAAAAAQAAAAAAAQD//wAAAAABAAAAAAAAAAAAAAD//wAAAAD//wAAAQAAAAEAAAAAAAAAAAAAAP////8AAP//AAAAAAAAAAD//////////wAA/////wAAAAAAAAAAAAD//wIAAQABAAAAAQABAAAAAAABAAEAAQABAAIAAgACAAEAAQABAAEAAgABAAIAAgACAAEAAQABAAAAAQAAAP//AAAAAP//AAAAAP///////////////wAA////////AAAAAAAAAAD////////+/////v///////////////////wAA///+/wAAAAD/////AAAAAAAA//////////8AAAEAAAABAAEAAQAAAP////8AAAAAAAD/////AQABAAEAAAD//wAAAAACAAIAAAABAAAAAgABAAAAAAAAAAIAAgACAAEAAQABAP//AAACAAEA//8AAP//AAD////////+////AAD//wAA/////wAA//8AAAAA////////AAD///7/AAAAAP7//v8AAP//AAAAAAAAAQAAAAAAAQAAAP////////////8AAP7//v//////AAD/////AAD+//7//////////////////v//////AAD+/wAA/////wAAAAD+//7//v////////8AAAAAAAAAAP//AAAAAP////8AAP///////wAAAAAAAAAAAQAAAP//AAAAAAAAAAAAAAEAAQABAAEAAAAAAAEAAAAAAP////8AAP///v///wAA////////AAABAAEAAQD//wAA//////////////7/AAD+/////v/+///////+////AAAAAP//AQAAAAAA/////////v////7//v/+//7///8AAP///////////v////////8AAP//AAAAAAAAAAD//wAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAEAAAD//////v/9//3//v/////////////////////////9//7//v/+//3///////7/////////AAAAAAAAAQAAAP//AAABAAAAAAAAAP////8AAP//AAD//////v///////v///wAA/////wAA/////wAAAAAAAAAAAAABAAEAAAABAAEAAAAAAAEAAAAAAAAAAQABAAAAAAAAAAAAAQABAAEA/////wEAAAAAAAAAAAAAAP//AAAAAAAAAAAAAAAA/v/+//7//f/9//3//v/+////AAACAAEAAQACAAAAAAAAAAAAAwABAAAAAAAAAP///v///////////////f/9////AAADAAUABQAEAAIAAAABAAEAAAABAAEAAQAAAP3/+//7//v/+f/5//v//P/7//3///8BAAQABwAFAAkADQALAAoABwACAPn/8//y//b/+f///wgABAADAAEA+//8//v//f8GAAYABgAIAAEABAAJAAMAAgACAPr/9v/z/+7/9v/9//v/9//0/wMAHwArACIAHQAUAAIA5//V/9j/8P8BAAYADgAeACkAIQAFAPr//f/v/+T/4P/l//P/9f/w//n/DAAZABcADgAOABkAJAAnACcAJgAXAPX/4P/g//f/FQATAP3/7v/V/7j/qf+z/+b/FQAZABcAGAATAAsA/P8EAC4ASQA/ACYADAD6/+z/1P/E/8f/zv/Z/9//3v///y0ALgAhACAAJQAlAAwA+f8EABkAHwAMAPD/3v/a/9T/xv/Y/w0ALgAsABQAAQABAPj/5//1/wUADwAcAA4ABQANAAQA+P/h/9f/6P/n//D////5/xIAEAD4//f/+f8pAEoANgBFADYAAADo/9H/2f/s/wMAKwAQANf/yf/F/8j/0v/0/zYAWABtAGQARwAmAOj/w/+r/5z/zP/0//r/HAApAA4ABQAGAAYAFwAfACoAHAANAB8A6v+//wAAJwARAAsAGAAcAOX/hP9z/53/t//P/+v/RACWAHUAdQCDADoAKgAiAMv/p/+w/6//wv+U/5v/9//a/8X/7v8DACQAJAAaAC4AJQAwAEoANQAuAE4ATwAsADAABQCs/3z/W/9U/33/wf8HAAoAIwB5AJ8AgQA6AAEA2P/a/xAAYwCTALkAdwCS/1z/lf+I/5r/rv8MADEA5f/9//r/+f9TAE4A/P8RAHkAcgDy/8D/6f/B/37/qP///yIAdwCcAHsAXwASAM3/L/8E/3n/z/8RACYARAASAMX/m/+A/7//DwAuADYAnwAOAbQAWwA5AA8Ayf+K/8r/yf+//9P/1v/G/07/l//v/3b/yf+IAO4AvQBBAJkApgApAPX/6//W/6P/2P/R/3v/hf/X/9P/dv+6/+n/yf8pAEsAVgCmAGwAIgAAALz/l//p/+AAOwF4ABEAAQB0/zH/Uf9W/0n/nv/2/8j/NADhAHwACQACAML/zP/m/+T/RgCLAMcApAAbABcAxf9F/zb/dP+0/6n/FAA9AO7/h/+A////cgDdAAABVwHuAOb/l//8/vT+JwB0AF4ApACBAGcA8f8Z/yv/cP8E/wr/av8KAKYAlABOACUAZwA8AD0AZgDA/xIB2AFpACwAjf/Q/kX+cf3q/eH++v/PACsBngFCAbEALAB+/zYA9gD9AP0AigACAEj/UP7s/WP+GP/C//D/OAB4AEoAoQD4AJ0ARgCYAGkAxAA/AcMAkgCy/6f+Ff5m/S39w/01/wkB4gH0AUUCPQJ+ATkAcv8TAFgARABOAHj/PP+K/qj9Kf68/nP/AwBbAJIACwHkAbIB5gDtAHUAc/+M/7//vf/0/2j/zP5E/jH+2f+UAIsAKQFAAUQBuwApAOr/qP/R/3z/Nf+z/3oAeQASAOP/xf+w/0j/Wv/h/38AjgGsAW4AuP/c/zD/4v0S/ur+zv9vAJAAAwFbAewA3QDaABoADQDh/3D/Gv+h/rL+aP+W/8gAfgF+AJIAHwAeAA8Aof7J/lf/f/9PAGgAAgEXAsQBrQASAMD/Rf8j/xb/Rv/G/0UAGgBy/9L+CP/t//v/2AA5AvsBFAGq/zD/rf9j/9P/6P/8/zsAgf8N//H+DP+U/7n/8/4Q/8r/tgCJAeEBOQKgAdcAVAAi//L+e/9V/6L/l/9m/6r+Jv7j/u7+of+3AdgCBgJAAeoB4gD//hr/Wf+o/3z/Lf5I/jb/dP8mAOn/CgAVAcEAcwCvAGoBSgLKArIAxP5+/7z+6/2Z/SL+rv/X/8r/IAB1AJsAmAApAJQA4QGyAUkBygBKAPr/4v44/pn+LP+G//n/w/96/0cAPwCw/ur9ZP/cACQBdAFYAXECJgKDABsA6/4S/wH/Yf7m/kr/tgA5AdH/Vf/i/xYAYP+s/qv+CgAbAXwAawBXAWcCLAEBABUBBgFbAEMAHP+9/Yr9jP0H/hv+k/6jACIByQCrAfsBvwL8AtsAtgBGALP+tf77/gQAMADw/wn/zf3w/Cv9A/8c//b/VwJ6An4CgQLwAGQBQAGIAL4Ao/+C/3n+y/zf/CP+6/45/4EAfADlAH4BHAF2Ad8ADQH2ADD/f/+HAA0AGQCFAD0Ay/8R/9j+Iv8a/zwAVAFLAGD/Ov+m/9T/kf8wADYBbAFtAOf/1f/7/kP/DQGHAeIASAB2AAMASP1w+8v87v6k/zUAegExA50D0QJaAngBEAEmAez/5/2b/Lz8Sv1q/ez9MP8cAZACTQI2AvwCxwLlAdsAkP9h/pz9g/2r/Rr+oP8tAQwB3P8fAKcAdf+H/1QA5P/8/2YAXgCn/1D/dAH0AeUAtAAZAP7/3v4M/jr/WgCI/yr/Zv8e/2oAmABeAAYBIgHcANj/CP90/7AAYgB8/8P/gP+d/9//fv+8/zcAbQD8/7T+lP6///D/o/+W/9MAmQLgAp4CtgHpAG0ACv8y/fX8Uv5c/gD/FwDw/0UAgwBlAK8AxQCiAfwCawEeALwAFQCi/vr9DP9mAND/sv5e/0j/Nf/W////mv9O/8cAyQCS/+z/bgHDAXkBYQEAAXIAoP/J/tn9Kf7R/o7+5/2P/sL/vABLARIBXAHsAboCMAIEAeH/KP/5/wz/iv3f/hsA/v/G/0//XADLAEoAgAB//zD/KAC9/+H+nf5k/+wAXQE6AecAsgA8ACwALQBq/8b/dQBpACsAcP+o/j7/l/8g/8j+Pv8jAEz/D/9rAEYBagJYA7YCIgJEAJz+AP/m/Tb9I/68/l3/Pf/5/lkA2QCqAFQB0QG0AfEAGwCL/2L/Q/+d/3v/7/5J/zoA9wASAQYAGv+u/zz/rf3n/Q0AbAEJAsYCZAPZAkkAtP6+/Y/8vf0SAOsAzADZANIA+P9U/nT+CQCCACwBYgHjAAgBGAAs/yL/0/6V/54AcAD//4v/Gv+7/oH+8P7F/4EA6QAHAewAwQDUAOYAsgAoAYcAdf/4/+T/5v/7/w7/QP5G/qb+V/8m/+f+0/8cAOv/5QCDAp8DUwR7AzwCa/8K/FH7ePvK/Dz/CgERA0ADMgBP/zH/sP78/8QBlwKqAUgAX/4n/cb8Mv2m/5cBAQLYAZcAuv9N/9j+mf8HAEoAhQCL/7v+9P4F/9r/CwEyAesBJAKSAUUBWP/d/d39JP06/n7/SwDRATQCMQI6AVwA4QDfAXICowG1/wP+iP1X/X785vxY/5MBcgJrAhIDHwPfAdoAnP+1/TL9q/2m/KP8CP6D//cAhAHdAZwCpAJHAj0C1gAd/3n+nv1Z/Jb7Y/xp/u3/agBCAbwBbAGZAIL+eP0W/4cBQwPNA2EDRAKqADUAxv+I/+3/dgBNAAH+z/wp/bj9qf52AKwC4QPlA1IDOgHD/hT+Of4k/5z/EgDcAOoAxwCcALX/xf5x/lX97vwo/Yv97/4SALAAYAFdAQ8BWgHmAZoCRAIyAdD/KP4C/Gn6dPo8+9L8Ov9qAoMFpgfgB4UGVQQ7ASj+N/zH+1v8av6lANMBqwJEAiABx/+R/nP/GQG5AGP/GP5Q/Sb9yPzX/WoBrwRcBRME0QE3/xL8lfla+ar69PwDAMQCXQO7AtgBVQBW/+r+jf4+/tn9SP0m/QL+if8ZAZ8DQwb/BwMIPQbRA+0AuP5t/aP85vwJ/j//rQAFAAX/J/9p/2YAGwFYASgBFgEXAHv+Pf2G/DX9n/6Q/yAAKAC5/4v/lf6d/WH9s/2C/sz+IP+c/7r/8f9xAA4AFAD8AFMB1wHZAiwDKQJIAf8AzAF6At4CDwRWBEwD1wFHAGv+Lf3z/Mz9Af+J/13/0P5W/rz+CQAQAfoBpQK+AroBef9X/Qr8wPuT/HX+ZQA4AUEBXQD1/lb9rvtc+4D71fuP/J/9P/94AVoDpQR5BYwFrQVSBccDwgH5AHMAvP5g/df8FP2f/uz/XAEjAz8DrAJQAhwBWv+P/mP+av5d/mT+2v6D/yIAUgDKAIwA5P+W/3v+HP0W/Oj7xvxH/qD/1wAxAQ8BwAFAAgEBz/6m/Yn8Yvwl/R3+r//qAGsCcAQkBkwH6AeTB9kFDAOFAAP/aP5R/YP9L/+4/9L/vP9C/2j+b/18/cD+FwCUALYAcAB0/+j+SP/b/5MAAAH9AG8AiP69/LH7Yfsy/AH+tf97ACkAbP9K/4P+i/1v/QL+5f+gAUgCyAKKA2cE0AQnBVUFfgUtBQIEtwEJ/+b80fty/HD+twAdAsICNgLiAKT+ovx2/Kj9v/7n/7cAl/9U/vX98f6XAMgBuwJWA9sBpP4e/KT6oPpc/Of+OwHSAfkAiP83/Yf73/rb+qD75/3EAZcE7gQABdwF+AXWBBYE2QO6A+YBMv80/oD91fy2/DX+JwAhATQBeAGqAgsCuf9Q/rv9Kv28/Nr8//2y/88AyAE7Au4B+AENAn8BkwDj/9z+sv1a/Jv7HfyM/C/9x/0Q/jH+Nf6V/S39vv/TA2YGpAY/B5oHYQVuAuwANQHSAJf/mf7u/mv/0/5I/n/+KQD9Aa0C5QLzAuUB2f/i/I/6f/rV+3f9Rv9EAeQC4wLqADP/z/7Y/gT/xf+JADkAjf6H/Dr7DfsG/IX9yf52/17/z/0h+/v6/v1oAeADpwYyClMLAQnrBLcCtQHs/xP/+P+kAIf/5P21/Lr88P2R/ysBvAIWA7MBCf9Z/Kj7MPyL/R8AZAKTA7QD0gEc/4f95/w4/Yj+PwANAYoAXf8i/tz8ZftW+0v8T/xf/Pj7c/ok+77+KwNgBigJEQx3DAEKbQasAov/7/2A/ff9Y/5I/gv+kv1c/WH+jv+9AO0C8wNaAkn/HP2C/Kv8Ov3C/nsBUQOaAy4Cnf+H/Wj8jfyW/Yj/bAHgAdoARv+j/SX84Prf+nv7z/vy+9z7JP70AMkD2AbLCc8LCAvuCFQGVgO5/9n8W/t++3j8rv2t/q7/vwB4AfgBkQGPAXkB+wAQ/5j8ZvvD+wv9JP4EAI0BYAIWAoIAtP4K/U78+Py0/nYAoAGgAVsAYP6Q/G/6Rvlc+d35I/vN+7P+xwL/BboHXQnMC0ML4QgNBlcDqv8T/DH6VPod/Kb9I/+wAK8BYgIEAzYCPgHCABsAef6E/Ff8Hf1Y/jb/VwDVAU8ClgFv/1P9JfzU+4H8gP4PAR4D8wP0AmMBkP+7/Xb7zfhf93H3NvfK+L39fQNzCEgMYw/gDuULFwgIBCMASfyj+oj6M/uz+4L83v0E/94A1gIfBD0EJQMtAYr+5PoG+MP4d/uJ/sIB+wPUBMwDMAFs/sX8jfw//e7+LwDGAPcANwBY/4D+Y/55/kn9J/zl+ir6nPlR+gb/qwSLCAsKvgsSDCAJ8QSoARIANf4W/QH9Rf1v/Yr92f15/goAHQJsAwADkAFNAFz+uvpB+C/5kPxX/0IBWgNZBPgDqgE2/zj+R/4m/8v/SwBBAF7/Ff5//JD7vfsm/GH85Ptp+4X8W/wg/XEBHwcYCocKmQs9CrUGFgIJ/+H9nP3B/nb/mf8F/1r+Qv1e/Bn+JgEtA6YDRQMhAvn+Dfvl+Pn51vxW/1cBIgOnA00C4//Z/cT9f//JAToDjgMGA3wBV//A/M36Nvo+++j7JPzI/DX9d/0l/Bb+1wIgBgQHWghFCt4HrANQAAAAvwBgAPv/yP+k/yj+UPwj+wb85f79ACsC8wKDAnAAEf0/+rL5gfv8/U0A+QKWBJgEOQPxAJD/4/+9AA4BuABZALf/fP6Q/Mf6ifqG+8L8i/2Z/pb/HQCJ/uj9hwB/A04E/ASZBzMHrwSiApAC+AKEAg4CRAGbAHX+mPsb+Rr5/fvB/oIAcgLfA+MCwP8J/Gr6Evs+/aH/vALWBHwEQwMWATj/sf6q/5QARAGOARABXv+Q/G/6wfl8+tr7Qf0b/+H/dP+f/RH8Kf6PAekDgwVCCMMIBAdUBYEDQQNhA6MC/QAZ/+X8d/qy+Nn4Hfw2AN8ChAQABYUDBwDo+yT5Svmx+9L+igHKA1EF3wRFA0kBXwDtAI4BRQETABv/qf24+9z5Wfmq+qr8Dv/nAB8CAAEd/5f80fpS/XcAcQJaBDsH7QdYBrgEMQP1AvMCLgKDAMX+bP3P+zr6XPpx/ccApAKbA18DDAEl/dj5Sfg4+WL8WwBCA90FTAegBncE6AFkAaMB2gBy/47+9P2z+/n4EfiH+dX7H/7sAL0CzALiAGH+VPu9+vz9lQD3AcgEXwhGCOIFewQnBGMEcgOoAWkA8f7r/Mr6tvnD+hX+2QABAsYCZwLN/7D72fjD+DX7ff4TAn4FxweoB4YFHgOHAQABEAAO/3v+r/22+zv5Svgx+Xz79f3qAJkDwQSkA5sAMP6f+tX4WvtB/3YBwgMYBx0H5QUcBSoFpAVwBZgE9QIqAAr9z/oT+Wz5SvwZ/zgASwDY/47+dfyp+jb7J/6zAYQEFgbpBtYGvAXrAwYCfwDz/iP9Kfsq+QP4PPje+Tv8dP7KAGQD/gShBEYCvf+K/dL5k/h3+4T+fP8HAs8FoAZrBlYGVAf0BygHhAXWAnr/gvxK+kf4Yvjb+jz9XP6z/jv/Nf+B/WL8l/1mADADKQVkBlYHtwbhAxsBbv9a/sj8Dvt0+tf6z/oZ+978+P6pAEsC7wOKBJkDXwFI/vv7Kvkc9xr5yvy3/vsAggW/BzIImAjtCDQJxgiYBlEDLQDU/Nz5EvcV9sb3Wfrl+zX9Hv92AO//yv6j/z4CpwQuBkwHwge2Bt8DrQD7/f37kfp3+Uf5IfpO+5P8Of7y/5EBWQNyBL8DyAIdAQX+AvtQ+KL3u/l0/AH/IQPFBgAI1QgkCTEJXQlmCGMG3wP+/0H7uPfK9af1pvdX+pP8Jf+1AGQALv+R/lsAwALBBOUGsAiZCM0FIwKk/gv8TvoU+U750Pon/Mj8Q/3Y/oEApAEAA0UEGQVbBNwBYf2I+TL2MPRf9pf6Vf49AjUHUglNCdkJhApbCmQJZQfbA37/pvqg9nf0YfT29uD6rP2q/5YB1QEaAFr/nwDDAtsEJQb+BlkHSwVnAZn+Gf0j/P36OPre+v77dPzR/J/+xACwAhEEoQQkBU4EvQC1+2X4aPU+9D33Jfs3/pEC1AfyCPIIkQpfC1ALTwrsB14E8P9D+lz1cfMB9Lr2lfqy/SUAqwH/AFj/5/57ANEC/ATEBsUHdge0BA8Be/4a/d77hPqr+t/71Pzl/Pv8t/7WADcCQQNxBAUF3gOBAAj8DvmU9hT1Wffw+8X+wgFaBvQH5wdLCX8KOwr2CTwIjgSEALT7n/fl9Q325vcm+6n9af+KAHf/hv3A/TD/JwEHBFIGYgf6BsUEUgHs/sb9yfwF/OD7ZPyE/NX7wfvK/aQAwQIkBNEEswS8Aqj+avqc+KX3jPaD+Nb8b/8BAWIEpgYbB3kIAAorClgKeAm6BRwBMf0M+uz3nPcd+e370P3//d79H/2u++P72/2gAPsDygbwB34HHgZ4AxABzf+2/pv95Pyq/A/8Pvts+/H87v6CACICKwPyArABnP/3/H77i/qv+H357fyM/t/+zgGABOwEVgY3CPQIugmbCdQGPAORAOz9dftc+sz6+fs7/ID7Mfv0+gv6Q/pY/ET/jALYBWwHngeWBwEGhQM2AhQB4/4q/Wz8m/u++tv6K/w9/qz/ogC1Ae8BSQESAGb+av3i/GD7Hvs2/Vr+B/7s/4ACKwMyBPkFCgfnB2sIewafAwUCXABd/i391PwG/Qn9mvvJ+VD5yvh++FD6Q/1qAMIDzAU3Bv4G+AbxBK8DdQPZAcn/lv5R/Qn8tPv/+8z8Kf5j/wgAIgBZAGQAuP/D/lL+of2U/PL8eP0o/Uf+twC2AUACoAQ4BhMGbgZaBtQEVQNfAgsB9P+E/+v+Kf4v/bT7UPo7+cD4cPkg+6T9UwBxAsQD6gSRBeEEWwR4BLkD4gFyAJL/Nf5N/Yz9Gf5+/gv/av91/+7/dwBHAAMA2f/3/pb9xPyE/DT8kfzy/XP/TwB6ARUDrQP5Az4ElgOhAowCcwKJAf8A+QBpADz/5/2y/Lf7Bvvr+o37Cf3x/mUARgFPAiwD+gKyAuQCcgJgAY4A1//y/nv+gf65/k3/cQBDAUYBngF3Ai0CNAHjAGAAP/8a/i/9W/xb/J/8z/yd/RD/DgA4AJsAGgEHAYwAqgA/AZIBsAHnAfMBewGMAED/4v1R/YL98v37/nwAqgEgAlgCfwJIAtgBhAEUARMA5/4L/nb9T/3C/av+5P9WAVcCrQLrAhADlQLCARIBZQCe/8L+uf3O/G/8ePyo/Er9zP4WAEsANwBeAPb/7P6X/kv/TgAHAbABAQKRAbYAlP+m/n3+M/8sAEYBZALjAs0CYQLiAWIB5wB5AP7/cf+G/oz9F/0x/b390v5DAK0BrQLiAosCQQLFAfcATADB/2H/7P7g/fX8Jv1y/VT9A/5w/2UAmwBlAN//RP+O/v39Q/5F/zsA3wD5ALUAZgDC//H+NP+iAOMBCAMcBGcE4wP6ArMBdgCz/yH/pP4j/mv91fzT/Eb9AP5d/xUBPgKCAnUCOwKAAcQAdQBLALf/KP+3/vn9Vv1n/az95v3h/i4AxwDYAMMAJwBU/8z+a/6P/ob/bAC9AM0AvwB9ABkAt/8iAG4BnAKJA14EcQSbA44CQAHh/xD/q/4N/oz9Vf0m/Rz9hf1M/k//fwBFAZEBoAF0AfoAngCPAG4A7/9u/0T/8P5v/pz+Lf9h/+T/qACpAGIAfwAeAFz/H//d/mf+mf4K/xP/Hv9k/5f/tf/I/zcARAFAAuQCdwOZAwMDPQIzAfj/O//o/nD+OP5e/lj+Uv6q/hX/cv/+/3cAqwDPANIAegA1ADgALQDm/6//mP9j/xf/Q//Q//j/JAC3AMgAJADI/2v/sv5J/hn+6f1I/uT+Bf9C/+n/ZACjAAkBuQGcAjwDVgNVAyYDVgJKAW8Amf8R//v+wv6O/uz+Wv9M/0n/g/+v/9L/5f/W/wgAVwA+APr/GABkAGAADgDT/9r/4v/l/+7/5P/3/zMA3P8X/97+z/4y/q/9q/2Q/ZT9DP6F/gD/1/+6AFEB5QGcAjUDfQOFA34DPgOGApQB0wAwAJD/Lf8K/wn/MP9w/3P/W/9Z/0H/Lv9c/2b/T/+Y/8z/kv+N/+//EAABACMAWACJAMcA9AADASwBQgHvAFwA2v8t/w7+I/3X/MT8u/wH/Zf9Lv7M/lj/3f+mAI4BGAJnAskCAQPPAmMC7QGSAU0B2QBnAEYANgDw/73/n/+A/2b/L//o/vb+I/8W/zH/fP+d/7//+P8SACMAQwBkAJwA6QAFAfIA8QAFAeMAigA4ALj/y/7u/XL9Ev3X/P38U/25/VP+6f5d//X/wwB2AQYCmwISAy4DAAOxAlIC9AGKARIBsgBiAAgA0v/E/6P/cv9A/wD/1f7R/tD+7v4y/2b/iP+b/4z/iv/E/xAAcQDvAD4BQAEUAd0AoABWAAYAzP9q/6z+Bf7F/Zr9cP2D/dD9Of6b/s/+CP+r/3IAFAHXAacCHgNBAy4D0gJvAiYCuwFJAQIBswBMAOb/df8H/8T+kv5w/nz+n/7H/uz+Af8Y/zT/Q/9p/7//GgB/AN8A/QDpANcAtwCMAIcAtADCAHwA+v9k/8r+Rf7z/en9BP77/b39m/3C/Rb+lf5e/3wAegH+AT0CeAKJAnQCcQJ4AnkCaAL8AU8BxAA2AIz/DP/T/rP+qv6n/rz+6v4G/xD/N/98/8//HABqANQAPQFZASoB+wDjAMUAlwCKAKcAmAA1AJv/B/+a/jb+zv2y/dj9vv1//Y792/00/rn+gf9mADcByAEaAlcClgK0ArMCwgLMAosCCQJpAcQAJgCG/wH/vv6H/kD+NP5D/jL+Uv6d/sX+D/+H/83/HwCpAPUABAEYAQoB5gDRAMQAywDUAJgAMACz/wf/XP4A/uH91v3F/Zr9jf3W/Sj+dv4x/y8A+AChASkCZAKZAtgC3gLSAuYCxQJjAvIBaQHRAEUAvP9J//f+uP6O/lb+/v3M/c39y/3v/XX+Hv+x/y0ApAAOAUUBTAFNAVwBawFrAUgBCQGwACoAd/+7/hL+pv2H/Yv9e/11/Zv94/07/rv+b/85APwAlwH0ATACXwJ2AogCpwK2Ap8CbgIWAqQBLwGnAA8Ak/8z/9v+k/5a/jP+IP4O/hX+a/77/oj///9qANYALgFKATUBLAE1AS0BBAHHAIgAQwDP/yn/iP4F/pn9TP01/UT9aP2j/QH+iv4Y/6P/QwDkAEkBhwHRAQMCDgIuAlgCWAI+AhwC1QGDATkBxAAuAMD/bP8F/5P+Pf4R/vb91v3j/Uv+6/5y/9X/PQCnAOwAIAFZAYQBsQHSAb8BpAGTATUBggDb/0//u/4l/rr9if1+/XT9cP2p/Rj+e/7Y/mL/CgChACMBhAHGAQsCSQJgAoQCxQLeAqMCOAK7ASABagC5/y//wf5g/gn+xf2s/c79Gf6F/g3/j//v/zsAdQCxAAQBRwFsAYABeQFSASMB4AB9ABkAp/8J/1/+4P2W/Xn9Zv1O/Vb9mv3x/Uz+3v6i/2QAFQGhAQMCXAKwAucCGQNPA1UDGAOxAiwCjAHdACcAfP/s/nP+Bf6y/Zb9sf3p/TH+mP4f/5n/3v8GAD8AlgDpACABTAFxAXMBOgHcAHsALADs/6D/Pf/R/nH+F/7D/Xz9Uf1O/Wz9m/3n/WH+//60/28AIAHGAWIC3gI2A4gDygPQA40DEANrArYB9QAgAE7/tf5Z/hv+6P3Y/Qb+Uf5+/p7+5f5F/4z/xf8cAJcA+QAbARkBJQE3ASIB7wDYANwAtAA8AKb/If+j/iL+xP2h/Z39lf2Q/bH9B/6H/iP/3/+4AIYBKwKpAg4DVQNwA2kDRAP0AmYCtwEGAVAAj//b/mD+Hf71/eP99v0h/kr+bv6o/v/+Vv+Z/+L/PQCTAMYA1QDfAOcA6QDpAP4AIAEmAfgApQA2AJz/6v5a/hD+7v2+/YT9bP2I/cD9Df6Q/lL/MQD5AKIBMQKiAuwCDAMMA/QCsgJBArcBKQGZAAQAe/8O/73+gv5c/kr+TP5g/on+t/7v/jj/kv/n/zQAgwDRAA4BQAFmAYIBkgGJAWkBNgH2AJUABABN/4/+8P11/ST9+fzs/P/8Ov2a/RL+of5W/y8ABQHAAVMCwgILAy8DLgMOA9MCgAIJAnMB3QBYANb/U//w/r/+nP5g/jL+OP5i/pT+3v5O/8f/KgCFAOIAOQGGAboBygHCAbUBjwE/AdcAaQDf/y//cf7D/Tv97/zQ/Lz8t/zm/EX9sP01/vH+2v+2AGcB+gGBAuoCIgM7A0gDPwMJA6MCHgKNAf8AawDW/1L/8v6j/kn++v3Y/dX92v35/Uj+uP4u/5z/BABrANAAGQE3AU0BeAGdAZQBcQFGAfcAagDB/xv/fv73/Zj9W/04/Tj9U/1//dH9U/7y/pf/QQDrAIEBCAJ9AtQCCAMaAw0D6gKxAmAC/QGaASUBhQDT/zz/s/4z/tP9rP2x/df9Ef5Q/qD+Df93/8b/KgCyACQBYAGBAZMBiAFhATwBHAHzAKYALACO/+n+T/7S/Yf9df2K/Zz9q/3R/Rr+gf7+/qH/ZwAoAb8BJgKAAsUC5ALgAtECtwKBAicCrQEdAYMA2v8r/5f+MP7x/c39zf3y/Sv+Yf6m/gj/ev/e/z4AoQD+AEcBdAGFAYkBhQFlASoB3QCCAA0AjP8G/4P+EP7G/Zn9cv1R/VD9ev3O/Tv+xP52/z8A+ACOARMCjALlAgsDEAMHA+0CrwJDArwBMgGcAPH/TP/W/or+S/4f/hv+Pv5z/p/+zf4O/1z/pf/x/1IAwgAWAT0BSAFOAUgBJgH5ANUArQBmAPj/cv/q/mL+6f2L/VP9Nv0z/VP9mv0F/on+Kf/q/7YAcAENAocC4QIWAyMDEwPuAqwCSQLIATIBkQDs/1X/2P58/j7+Hv4f/jz+Zv6O/r3+/P5M/5v/5P9EALkAJQFfAWoBYwFdAUgBJAH4AM8AmwBIANH/Qf+s/if+xP2E/Vv9SP1Z/Y/96v1T/tL+aP8WAMUAYgHnAVoCvQIAAxQD8AKtAlwC+wF6AegAWwDf/23/A/+q/nH+VP5O/lj+bf6S/sj+D/9b/6z/AABaAKkA6gAZATcBOgEoAQ8B9wDVAJ0AUQD6/4r/+v5k/u79pv13/V39a/2r/fv9Sv6o/i//1f+EACwB0AFoAtkCEgMeAxAD6gKhAjgCvQE6Aa8AHACX/y7/3/6g/nX+YP5e/mz+j/7K/hv/cf/G/xoAawCzAOIA+AD6APAA4AC/AJYAawA+AP//oP8l/6D+Jv7H/Yn9c/2E/bj9AP5Y/r3+Mf+t/zYAzABnAfQBYwK5Au4C/gLkAq4CZAIMAp0BGQGNAAUAif8Z/7z+gf5k/lb+VP5j/oT+q/7V/g3/W/+z/woAZADCAA0BOQFHAUYBOAEXAeMArgB+ADwA4f95/xn/wf5r/iL+8/3k/eP97v0T/lb+tf4k/6b/OQDRAFoByQEbAlICaAJeAjoCAwLAAWsBCwGjAEEA5/+U/0n/Dv/j/sr+wP7G/tz+//4l/1H/g//A/w0AYgCxAO8AGAEnARcB9ADMAKgAgABNABMAyv9v/wn/q/5e/iz+Ff4Z/jn+b/60/gb/a//c/1AAvwAlAYEBygH2AQ0CFwIOAvABvwGCATsB4wB/ABgAuv9o/yH/7/7X/tX+3f7n/vr+GP87/2X/mv/Z/xwAVgCDAKUAtACtAJQAdQBUADAACwDl/7X/cv8g/8j+d/42/hP+Gv5A/nn+t/7+/k3/pf8FAG4A2wA9AYcBtgHUAegB9QH8AfwB7AG8AWoBBAGYAC8A1v+W/2//U/87/yX/EP/+/vL+8/4J/zD/Zf+n/+//OQB9ALQA2wDuAPAA4ADDAKIAfQBRABkA1P+D/yz/1/6U/nD+b/6G/qv+1/4F/zT/Zf+j//H/QQCKAMkA+AAVASUBOwFYAWwBbAFTAR4B0wB4ACYA8P/T/8H/s/+r/53/hf9m/1j/Wv9m/3j/oP/a/xkAVgCSAMcA4ADZALkAkQBjADYAEgD+/+v/yv+c/2z/Pv8U//j+9/4H/xz/N/9U/3b/mv/C//H/HgA+AFEAUwBOAEcASQBYAG0AfgCDAHcAXAA5ABgA///u/9z/yv+2/6D/kf+O/5j/p/+5/87/4P/w/wQAHgA7AFcAbAB6AIEAgAB8AHQAaQBYAD4AIgAJAPb/7P/w//z/DQAgAC4AOQA/AEQARQBDADoALAAcAA8ABQAFAAUABwACAPD/1P+z/5X/gv98/3//jP+a/6j/s/+//8//4//4/wsAGAAhACEAIgAjACUAKAApACIAEQD7/+T/0f/C/7r/t/+z/6r/nP+N/4j/kP+l/8n/9/8kAE0AagB+AIkAiwCFAH0AdABqAF0ATwBEADkALQAhABQABQDx/9v/xP+w/6D/l/+Z/6X/uv/U/+//BQASABMACQD5/+r/4P/b/9z/3v/d/9j/zf/A/7n/t/+7/8P/yP/J/8b/v/+5/7n/wf/P/+H/9f8GABUAJQA4AFIAbACIAKMAtQDBAMIAvACyAKgAmwCMAH0AaABPADEAEADw/9X/wv+5/7f/uP+6/7f/sv+t/6X/oP+b/5P/iv+A/3T/b/9y/3v/iv+d/6v/tP+2/7b/sv+r/6X/of+e/6D/p/+3/9D/8f8WAD4AZACCAJ4AswDFAM4A0ADIALMAlgB4AFgAOwAdAAEA4//C/6P/if93/23/a/9v/3P/dv96/3//hf+P/5n/ov+n/6r/rP+x/7j/w//S/9//6P/u//H/8P/s/+b/3//W/8v/wf+5/7f/uv/H/9r/8/8QADEAUQBwAIkAmgCjAKIAmQCKAHoAawBgAFcASgA6ACYADwD7/+v/4v/e/9v/2P/P/8P/tf+q/6D/m/+a/5z/ov+o/7T/xP/W/+j//P8PACAALQA2ADoANAArABwABgDu/9n/x/+9/7f/uf/D/9D/3//y/wgAGwAqADMAMwAsACAAFAARABQAGgAkACkAKwAnACEAGwAYABYAFAAOAAIA8v/g/9L/yv/K/9L/2//j/+j/5v/j/9//4P/l//H//v8LABMAEwAOAAEA+f/w/+r/6P/l/+P/4P/d/93/4f/q//j/BwAWAB4AIwAfABkAFAASABQAGgAgACUAJAAeABgAFQAWAB4ALAA6AEUARgA/ADIAJAAYABAADAAJAAUA+v/u/+H/2f/Y/93/5f/w//f/9//z/+z/6f/p/+7/9f/5//v/9P/r/+H/2P/V/9z/5//x//r//v/+//z//P/+/wUADQATABcAGQAYABUAEwARABEAEgAVABkAHwAlACgAKAApACkAKAAoACcAJAAbABAABQD7//b/+f///woAEgAVABIACAD///b/7//q/+n/5//k/9z/1//T/9H/1f/b/+L/5f/o/+7/9v8BAA0AGQAjACgAKgAnACIAHQAWABEADAAIAAYABgAJAA0AEQAWABcAGAAZABkAGgAaABgAFwATABEAEQAUABkAHgAkACQAIgAeABcAEQAOAAsACQAJAAkACAAHAAQAAQD9//r/9v/z//L/8//3//z/BQANABUAGgAcAB0AGQAVABIAEAARABMAFgAZABkAGAAVABMAEwATABQAFAAUABIADwAKAAYABAAFAAcADAARABYAGgAfACIAJAAmACUAJQAjACAAHgAcAB0AHAAeABwAGQAVABAACgAFAAAA/f///wEABAAKAAwAEAASABEAEAAPAA8AEQARABIAEgAQAA4ACwAJAAgABwAJAAsACgAGAAIAAAD+//7/AgAHAAsADQAQAA8ADQAMAA0ADwARABEAEAAQAA4ADQANAA4ADgAOAA8ADgAKAAcABAACAAAAAAABAAIAAwADAAMAAQAAAAAAAQD/////AQACAAIAAgACAAIAAQACAAIAAgADAAIAAwACAAIAAwAEAAcABwAGAAYABgAFAAMABAAEAAUABQAFAAUABAACAAIAAgADAAMABQAFAAYABQAEAAQAAwAEAAQABQAGAAUABAAEAAQABAAFAAQAAwACAAIAAQD+//7//f/7//v/+//8//z//P/7//r/+f/3//j/+P/5//v/+//9/////v/+/wAAAAABAAAAAQD////////+/wAA/////////f/8//v/+v/7//v//v/+////AgADAAUABgAIAAcABwAGAAUAAgABAAAAAAAAAAAAAAD//////f/7//j/+P/3//j/+f/8//7/AAACAAEAAAD+/////v///wAAAQABAAIAAQABAP7//v/8//v/+v/4//f/9v/2//X/9v/2//b/9v/3//j/+f/7//z//////wAAAQABAAIAAgADAAMABAAEAAQAAwABAP///v/7//j/9//2//b/9v/1//f/+P/2//b/9//4//j/+P/6//v/+//9//7//v/9//3/+//6//f/9v/z//P/8v/w//H/8P/w//H/8f/y//L/9v/3//n/+//7//z//f/8//z//f//////AAABAAAA///8//n/9v/1//T/9P/0//T/9P/1//X/9P/z//X/9f/2//f/+f/7//z//v///////v/+//3//P/6//n/+P/3//b/9v/3//f/+P/2//f/+f/6//3//v////7//v/+//3//P/8//z/+//9//z//P/8//r/+v/5//r/+v/6//z//f/9//z//P/7//r/+v/4//j/+f/5//v//P/9//3//v////3//P/9//z/+//8//z//f/8//z//P/8//z//P/8//3//v//////AQACAAIAAQAAAAEAAAD/////////////AQAAAP//AAACAAEAAAAAAAAAAQD//wAAAAD///////////7//v/9//7//f/+/////v////7//v/9//z//f/8//v/+//7//z//P/8//z//v//////AgACAAMABAAEAAIAAgACAAEAAQAAAAAAAAAAAP7///////7/AAABAAEAAQAAAP7//v/9//7/AAAAAAEAAgACAAIAAwACAAMAAwADAAQABAADAAMAAwACAAEAAQABAAEAAQAAAAAAAQABAAEAAQABAAEAAAAAAAAAAAAAAAEAAgACAAEAAQD///7//P/8//r/+P/8//3//v///wAA/v/9//z//P///wAAAQAIAAsADAAQAA8ADgANAA8ADQAKAAYABAAEAAIAAgD8//r/+v/1//b/8v/t/+f/5//o/+f/EAAhABEADwAQAA8ABwAJAAwAFAALAAwAIAAaAB0AIgAuACAALgBvAGMANwAiAAgA6//e/9//BAAWAOX/y//G/6//jf98/37/j/+a/6r/yP/i//v/AQAQACoATwBoAFsAVwBrAGQASAA7ADsARAA8ABgABgAFAPf/0f+0/6f/ov+R/4b/jf+X/6f/qv+1/8D/u//A/83/4v/z/woAJAA5AEQARgBCACwAIwAcABIAJwBDAEoAUABKADkANgAoAAsAAQAaABQAAQAUADYARAAeAAwAKQA0ABYA9f8FACMAKQAdABgAJgD8/9X/2//9/xMAEQBEACoACgAcAM7/wf8JAAoA9P8XABEAx//q/8z/gv8VAPv/2P9XAAYAtP+n/5T/kv+h//z/PACgAMIARgCDAIIA7P8gAHQAMQA8AEcAFgA6ANf/g//s/9f/of/c/73/s//Y/4D/bv/P/6D/6v93ADkAZQCQAPn/mf+1/5n/Of9l/+7/NwBOADEACwAOANT/dv+r/xkA3//I/yAAMQBaAE0AJQBEACoA/v8UAO//AQBcAOj/ov8bABgA+P8vABwA+f8NAP7/qP8WAPz/Bf/A/3oAOABOAG0AcwBKAAgAEQAwADsAFwAQAEAAcwB4APn/zv8WAMj/sf+y/3z/0f8WABMAQACXACsAz//X/6H/3//C/73/dgBgACUAQAAGAAQAMwANAOj/SQAgAIL/n/+I/53/EAC5/zEAmADY/4//rP/j/8D/Rv+R/1MA8f+R/zUAYAAKAL7/rP8DAAoAv//4/4YAYwBKAIEAJgBxAGIA/P9UAFwALwDn/7P/tP/3/8L/c/8IAE0Avv/i/zAAzf+u/5v/dv+6/9D/jP/n/ykAJABFAE0AdgBjAOv/xf/o/wAAGwAoADIAVgBMAOP/7/8qAL7/mP/c/ywAUQDn//n/3P9r/5f/pP/u/0gAHgADADgAVgDi/8L/EQDc/wsAUQAnAA0ADAAeAN3/u/8CABUAJAAXAB4AKAAJAPT/BgAYANT/4/8RAEUAOwDV//b/MwCZ/2X/4v/S/9n/HwAYAHUAlgDI/83/EQC+/+n/LAAxADMAGgAbACgALAD5/6n/zf/M/53/4//8/9r/yP+a/5r/4v/n/+P/RAAMAMH/HwD+/8z/UwAyAFYApQBEAFYAOADZ/7z/qv+0/6//q//1/zAAWgBLAC8AAQDZ/9v/yv8qAGEALQDl/+v/LQAWAOv/9v8ZAA8A9P/p/+X/sv+M/5v/v//i/9f/4/8qACQA7/9MAEEA3v/r/9n/7P8nADgAfQCXAE0APQA1AAIAwP93/4j/zf+R/23/zv8YABMA6f/n/+n/zf/S/8T//P8nAMv/zf/3/93/tP/b/yoAOwAAAOX/MwA1AP//JgB9AJcAcQBKAIgArgBpAC8ARAB4ABQAEQBlAEIA7v+3/9H/vP9r/0f/jP+4/7P/3/83ADwAFQDh//P/OQANAC8ATQA+ADgACgAMAMz/tv/L/5b/gf9l/2v/PP8S/2f/hf/A/8X/Sf+i/87/fv+p/xoAXwBZAGQAZwCfAK0AugAAARAB8gDCALgAhwAIALT/yP8MAAcANACFAEsAUABBAA8AGgDj/+L/HAAhADUAfACaAI8AmABuAF4APQDp/+//zP9z/63/vf+8/xcA7f/G/67/PP/E/sH+zv68/sX+0P7E/pv+U/4n/pr+3P7L/hT/Tv9T/4r/jP8LALQAmwCsABsBcAF/AXMBjgHAAYcBdgGQAaIBwAFlAeYA0QCWAEsAVQAnADAAmwCbAIkAuwBWABUABwDs/xEA0f/Q/1IAswCNAH0AvgCWAA0A4v8IAOP/of9d/zP/VP8n/yL/YP8B//n+yP5y/nv+NP6u/XP9nf2b/QX+aP5P/kL+ZP5//q/+IP+p/1YA2gBQAdUBDwIjAl8CSgIsAjECIAI0AhQCpwE3AfwApQAxAAwA6P+c/1//DP/p/vr+9P5K/3b/sv82AH4A6gA5AVYBlgHUAewBCAIZAiICUAJXAj4CBAKzAW8BAgGGABcAjf/Y/mT+H/7Z/cX9n/1a/Tv9H/3w/Mr8pfyG/En8Wvxw/In86/xa/dr9aP4E/8f/WgDjAGMBrAEKAh8CQgJMAmQCSQIjAjICLAIPAqABdwEYAZIAHgC2/2X/Mf/7/jz/mP+a/9v/FQCcAMwA4AAnAXEBwQGuAc8BIwJqApACfQKMAsUCcAITAv0B1AFXAbgAWgAGAHb/zf6q/mb+GP7Y/bn9qP1B/eP8xvzz/Pr80Py+/KT8r/zn/P78Df1d/d39d/7p/nr/QQCSAOoAMAFhAWQBjAGyAeABAwLrAUoCKQLYAX4BIwH6AGYA3/+J/wb/m/6Q/tj+Gf88/5T/+v9JAKcABwFWAYYB0QEQAoACrQLZAjEDMwNUAycDPQNEA74CcQItAp0BHwF1ALv/TP/R/lv+F/6u/Uz9IP3g/Mv8sfzB/Lv8j/yN/J38ovyD/Lb82Pzq/HP9CP6m/j//sP8QAHMA+QBKAUEBmgHzAbwBvAHkAQQCtgFjAUsBBgGhAB0AmP85//z+l/6C/sb+6P7b/jT/x/8sAD4AmgA9AYkB/wGHAg4DeQOXA58D1APTA58DZgMsAxwDoQIGAq0B+QA+AMj/U/8D/7D+Kf7W/cn9nP1x/Y/9h/0r/dP8+vwv/UX9KP0s/T399/wP/RD9Iv1p/Zf9//2d/hL/qv9wAOcAGgEqAVIBRwEbARUBPAFEAUYBSwEtARUBqgA5AM//Yv8z//v+z/7E/rb+OP/F/2sAKQGMAQACLQJgAo0CwwILA00DhwO7A/kDFQTFA3oDIgOPAiACcAHZADsAo/86/9L+hP5R/hv+8P3E/c79t/2H/ZD9dv18/Xv9zv2m/SH9Nf3n/Jb8ffxP/Jn8/Pw//QH++v6H/wgAkwDKALAA7QBZAXUBSAE7AYwBfwF5AXABfwFJAacAIQCc/x7/j/46/l7+lv6q/lr/NQDLAPoAeQE8AmkCbQKZAvgCMwNjA5ID4AMRBM4D0wO/Ax4DTAK2AVEBigDJ/0r/TP8d/6n+m/6T/nH+F/6c/Zn9z/3D/df9zv2Q/Tv9GP3z/HT8QPx5/GX8kPzM/CH9vf05/qv+Mf/c/2EA+AA3AT8BgwGfAcEB/wHYAd8BxAF/AQ8BbwAvAJn/6/7F/qT+h/5w/rj+H/+B/wQAhwA0AXYB+AFSAqQCEwMPAzoDhQOQA5QDmQPVA5ED4gKTAjICogFGAe8AdAD8/4D/Mf/m/qT+av5n/kz+C/7P/en92/1w/Tf9TP1n/fv8xPyh/IP8Uvwh/Hf8lPy//FX9+P1V/rz+nv9UAKcA2QAhAYkBsAGRAZEBoQGsAYwBUgEgAb4AVQAAALj/R/8O/yj/N/9g/4b/9/9nALcAFwFyAQICYgKZAu0CKgNUA4EDogPZA94DyAPGA38DAAOMAgwCeAHZAGEA3v96/z7/3v6O/lj+Wv41/g3+J/5N/mr+Uv7u/dH9Q/3j/If88fu2+0f7R/tB+4H7Dvyb/Hn9//2X/nr/FwDKAB4BawGnAcUBHwJgAogCdAJKAiECuAEuAY4A1v9B/77+mv6W/qz++f4//6f/GgCCABwBggH4AWUC0AI4A2wDuwMEBA4E8QPuA80DigMhA8kChAIuAuIBbgHXAF0AzP89/+H+p/6H/oD+lv6Z/oL+of61/mv+Bv6o/V793PxA/Kv7I/va+r364/ri+nL7Q/yv/Hr9Mv7s/qL/OwC7ADoByQFGAqsCEgMhAwkD7ALEAi4CXgETAVAAv/8V/4T+jv5N/mb+0/4k/8j/QwDHAFsBzgFLApQCFQNhA2EDqwPqAxUEGgQgBD8E/gOYA/YClgI+AqEBPAHCACsAqf9T/8T+af53/mD+af51/l3+cf4y/qP9J/2p/Fz8/vuS+0H72PrD+uD6APs3++j7pPwt/f792v6Q/yEAywAYAVIBrQH5AUIChgKRApYClQI9AtIBPAGlAOv/PP/g/ov+Zv6Y/gD/fv/d/3IAGQGJAb0B+wE2AloCiQLJAjADnwMSBIkEzgTLBIkEGQSTA/QCXwLUAV4B3wBNAL7/e/8s/67+Zf6R/ov+Uf6E/pL+Wf4z/sP9Uf0g/YH8vfsU+436BPoO+mb6wfpv+yb87fxm/SX+gf7L/qD/TQDrADIB+wHVAvUCAgPmAs4CggL8AXQB0AAtAEn/y/6Q/jz+Yf7S/lT/k/8RAOYAVAFUAXoB/gE5AmoC3gJcA+kDRgSXBN8E0wSJBDgE5wOPA94CUAL7AT0BgQD3/2f/Av/Z/n7+aP6K/lz+Hv4j/vf9rf2D/Tr9Ev2g/NP7IfvE+kX6APoi+rL6RfvZ+4X80fyA/XX+Cf96/0sACgG9AUECjQK5AvoCHgPIAlICCAK0ARQBWgCU/+7+nP5y/mz+mP7t/pH/PQDMAC0BgQHbAVACKQIOAqECHwNyA7sDJwSJBMsErARbBBUEpQMzA8ICbALGARgB+ABnALP/Uv8U/9X+bv4q/kr+Vf4e/s79if3q/A/8vPuu+077yPrN+sb6xvq8+qP6Cfu5+zH8uvyv/XL+WP/6/zUAtQBZAecBQgJxApYC5QLYAlcCCwKWAQ8BjgDZ/1j/+P7j/hX/QP98/9D/RAC7ANsA+gA8AYIB8QFrAtwCXQMGBJsE8AQUBSMFJgXxBLgEcwS/AxYDeAK6Ac8A5P+A/1v/BP83/gf+Lv6g/Vn9jv27/WX9yfzW/N78YvwF/PD71Pt8+zH7efuh+6X7CPyR/N78WP0u/sD+gP/0/2EA6wBHAYkB2wE8AjICHAIlAhECrQEvAakAPQCe/yb/Kv86/1n/eP/f/00AcgCpAK4A7ABcAXsB1gGrAlkDuANLBNoELQUpBTAFUAX4BEgEwANDA4sC2wEBAZMATgBb/5H+fv5G/rX90/1b/iD+qf3r/dj9H/2t/AX9M/1t/Nv7XPz6+/D6KPuT+4P7U/u/+9P8mv3G/UD+PP8MAF0AtQAwAaIBBgI1AjsCKQImAh8CxQEuAZgANwAPALj/bv+F/+b/VAChAKkAvAAXATEBCgFgAScCmAK2AjEDzAMRBBAEaAS0BJYEjQRZBFkE3QMdA9QCNAJTAZEAvv9g/0b/p/5m/sT+RP7s/T/+tv1j/a793v2S/RH9+vzp/Ar8Nftt+3/7BPsK++z7Pvz4+2L8EP2Q/d79Z/5U/wwAngAHAR4BNgGEAZ4BXgFZAZABpAE+AawAjwClAFwAFABHALcA2gDLAAABWAFPAQgBTgG6AdYB+gFsAhkDagN8AwIEjgSZBD0EDQQyBDAE1gM6A7wCWwKzAc8APADn/1b/BP8V/wn/wf5b/jz+Ov7h/aP9Uv0S/Rf9m/wH/Nf7v/tV+zP7bPtj+577J/wr/C38Av3W/f79XP50/yQAMQBIAKYAywCjANoAUQGSAWcBXQF0ASYBuQB1AHoAaABrAJEA1AAMAfIA/gAOASEBOgF1AdkBPwKpAvIChAPTA7YDGwRvBCwEKAS9BFkEjwOIAxQDEwJQAbsADwAOAOX/L/8S/zb/CP+G/j/+Iv7f/YP9u/2//cj8xvwC/Zj8f/sd+4X7WvsX++P66fuO/GT8w/yN/fL94/2G/hv/Y/+m/9X/GQBKAHIAmADVAMcAxgD2APoAxACuACkBMwHeAOYAZgGBAUUBWQGpAQECsgHKAWkCWgJHAnEC1wIpAxgDYgMiBBkEjQOpA7MDZQPmAmkCRwJKAk0BvgDAACEACgD0/8L/Sf8Q/1T/if7Y/Vj+tf06/cv9GP1C/En8a/yR++H6Gftu+zz7Ufsm/F38z/xS/aj9Af5L/qn+t/7//kz/x/87AEIAfwDcAPQAqQBRAG0A3ADYAN4AIQFZAX0BCgENAYwBYgEnAWAB5gEJAucBNAJMAkQC0AJDA0EDXAMEBE0EDgQJBLMDbwMNA9ICbQKhAeoBgwHoAJMAKQAnAGX/dv/b/gX+bP4T/un9Xv2s/QT+kvzd/AH9Dvxf+5X7QPxI+477lPyf/LT8tfwI/bP94f3l/Qb+i/67/vT9mf4x/xT/ZP/V/20AGABkALQAbwCJAGsAqgDQABMBUwFLAZYBygGgAYIBxgFPAkoC9AGCAgcDrALEAoED2AOAA34D5gO/A3EDTgOGAxIDGgI/AvoBMQFrAUYB6wB8ADgAEACU/+r+P/6a/pL+Vf49/qD95P2X/U78v/xY/NP7U/xs/IT8mPzb/JL8zfwy/f/8NP30/fr9qf1X/V/9Ev4l/l3+vP4i/3X/Qf9M/33/IgBIABUA8gBlAf8AEAGYAcIBvQEMAl4CXAI4AlQCjgIXAscBOQKaAmAC3QKWA0ADSwOIA30DzAJdAmQCoQIkApQB1QHPAacB0AD3AAUBIACo/5D/2P9w/6H+b/8u/9v9/f2G/Vv9WPxr/D39kPxb/Cb8VP3V/DH8wf2j/cD9+v2A/aP96fxY/UD+QP1m/eD9uf6t/vT9K/9o/0L/Wv+6/10AUgCLANYALQFlAVcB6AH4AeEBgQI3At8BGgJDAjwCSQKFAosCyALdAjIDjAMyA1ED8ALZAskCUwJkAhkCDgK6AdoBiQEjAU4BGQCH/3H/qv8YAPX+qP55/5b+0/0c/oT9/fwc/cz9G/2P/K/9HP3U/Ff9YP0r/aD9J/6N/YX9R/19/Yf9Tv0Z/uz9Dv6W/oX+d/7J/lr/Pv84/8D/0f/R/7kA5QCjAE0BYQGYAckBaQHXAfUB/gHdAaYBAgKeAiYD3QIIA78DpQPfAoICCwNUA00CiwLNAvwBZAKjAQ8BWgEkAd8AKQATAHEA1v8GADsAZf85/+z+q/5C/qD+g/5t/aH9Wv1F/RD91/zS/Tb9LP3a/dD9lv0P/a/9vf2//cT90/38/cP9ZP5P/j/+pP7L/i//Ef95/wYAMgC3AJIAPQAaAXkBjgDnAMIBaAHwAEAByAGNATAB1AFLAlECtgIwA2YD5QK4AhIDMQPuAqAChgKtAk4CtwHRAYUBngFAAdsAEwFkACwA0gCRAEP/Vf8XADb/bv4E/4/++P2//bz9mv0Y/eb9k/0d/fv9Hv6B/Q3+Qv51/eb9E/4F/mP+rv54/vb94v2a/r3+LP7t/mv/A/8g/3L/1/8fAOD/TACqALUARAFoAVkB9AAfAVwBHAHMAdcBYwHrAUsCUQKLAgsDIAOSAsMC4AJJApcC7AJDAq8B/gGlAQABUgG4AJ8A9gC9AB0B//8v/0AAef/E/sL/s//v/qv+nv4+/nv+Uf76/br+0v13/ab+TP49/g/+iP3M/Rf+Yv5f/rj+1/6a/qL+RP5o/g7/PP8u/w7/o/8GAJ3/cP9F/77/SwCgALYAfQC9AKcAhADWAP0A8QBaAeUB3gF+AQECqAJDAi0CpQLqArkC4AIRA4oCigGxARsC3AD0AEEBDgFLAUsADgCGAF8AKACF/03/3f9D/6r+S/8g/3v+Yf54/mb+Qf5F/lj+FP4R/s7+2P6H/hH/Pv8G/wv/Df9u/yT/p/4l/7b/FgC+/43/t/8d/wD/g//A/+b/yP+8/7r/Pv9Y/7z/sf8HACoABQA2AG4AygDLAMEARwHqASEC8gEmAmoCogJvAvgBGgKpAf8ALwENAZkAaQAXAPv/1P+I/8T/xf+E/57/SP8A/0P/UP9W/2D/T/+K/8//bP9H/7//yf+u/9H/YQD9AMkAnACZADIA8/+o/zj/Ov9f/yH/A//l/oT+fP6U/mj+X/7F/uP+s/79/jn/Lv9b/6X/PQD7AEMBfgG1AXkBTQElATYBRAEfAZ0BlAELAeUAvQBfABoAMwBeAFcA0P8y/wL/Gf8Z/+b+Iv9m/wL/x/6p/vD+Nf8R/zP/bv/T/+T/JACoAN4AGQFJAX8BmwHTAQgCigFzAY8BJAElAfAAWwDu/2X/4v6q/qz+Tf7a/e39/P3n/QL+Vv6t/s7++v6Q/wkAaQAhAS0BFAF7AcQBDAIRAjECGgJ8ARgBnwAfAOn/hP/a/nf+Yf4i/tf9tv2y/cP91v0F/jH+R/5B/nv+jf5V/sb+Q/9s/57/LgASAZ8B0gFSArsCsALwAkoDbwOxA40DUQM+A7ECEwINASsAyP/9/nv+Nv6t/SP94vy5/Mz8pf1K/of+Q//4//z/VQAdAXwByAEwAr8CXgOIAzMDugJOAq0BOwEpAcAAQwDg/xD/OP6V/Qz9uvxL/PD79/sz/FX8WvyH/Oz8Kf03/Wj9uP3c/dL9HP6I/vj+fP9kADwBtwGqAjMDlANNBG4EtQTEBHcEOQSrAxADVwJ3AYoAAwBT/y/+e/2N/MT72vvi+zz8Kf1s/ub/tQA4AQ8ClwK9AhIDsAP6Ay0EsASaBMQD+QJ4AtAB1wBBAPb/aP/+/oP+5/1z/Vf92vwr/HT8tPyZ/Of8NP0u/RP96/y0/GD8Uvxn/DD8uvy5/TD+qv5g/38AtgFyAnUDegQbBaMFowVdBcMEEgSDA54CZQFKAJP/xv7D/bj86Ptz+zj79frl+s37aP3e/mQABgJIAxMErwTVBMoESQWtBYUFXwUZBTkETgNPAiMBLQCY/xX/kv4//jH+RP4G/rD9tP3e/bz9rP3J/b79wv3h/Z/9Kv2n/OL7hfs2+8H6ffpp+u37kv1s/sX/KwGmAp4DpAScBf8F2gYoB0AG3QSRAwACBgDB/sv95Py5/Av83foJ+mH5Q/nu+VD7J/2p/ycC9QNZBQgGPwZEBjsGHgbKBYwFcwX8BOoDegI6AUMASv/E/pb+3/5c/4j/fP8q/+z+EP88/2n/vP8ZACgAE//j/WD9y/wE/FX75vpq+tv5iPml+VD6wvuj/QYAawIJBJ0FjgbiBlcHKwfUBmgGbQVyA9oAiv5m/O/6GPp8+Zv58fme+Xz5x/ku+mL7a/3M/88CVQUCB2IIYgiGB6wGhgWPBBAELwM5Ap8BqQCH/8H+Ov5b/tr+h/9NACEB7wHyAdgB2AHEAewBkQEEAYcAgP/1/VD8P/t7+vv5u/lW+Un5b/l4+bj5Kfu6/UoAAANzBV8HRghvCDEIowcKB7YFiQQPA7MAGv5++zv52Pdx94j3MPh8+Wz6y/px+2r8H/5uAN8CSAVQB9YILAlLCOEGLQV7AxkCCAEeAH//Qv+w/hD+EP4//jD/nACuAYECWgMFBOQDgAMWA5cCPQKpAXIA6f6R/RH8nPqk+UH5jvkM+vT57Pli+uv63ftu/TYAJwOMBQkIbgnwCAAIsAb3BKwDXgLQAGL/pf0B+5n48/aH9ib3nPjI+n78Hv56/4f/iP9tALgBewMyBVUG2gaBBtAEpQLaAHr/7f7A/tD+Ov/E//r/NACHAOkAwwGLAhsD0gOxBMsEEQQOAzECnQGEAC7/Of6k/dz8uPuf+on5U/kV+uH6Y/v9+xj99f3a/S/++P9rAnwEagZkCBkJYAiKBiUEtwEIAAf/L/7t/Jn7gPoh+Qb4v/f2+P/6IP0Y/5UAkwHSAZIBwAH+AdUC/QN+BIQEbwNGAswAy/5s/dr8hv3m/hkABgHbAYkCkQJmAkUCtQKlA+cDLgQxBEMDCQLMAJr/k/7P/VH9tfxB/Jz7y/qi+rX6Ovu1/Cb+q/4q/0H/7P7E/p//PgG/AlMEvwUtBkAFEgSbAtcAn//M/jP+q/2n/NH7MfvG+tn6kfvj/Dj+f/+YAAwBFgFhAYYBRQGZAYUCqQJcAuEB+ADa/xD/3P7J/vf+gf9aAE8BNwIIA5QDHAT+AzUDsgI6ApsBJwHXAF4A1f8V/wf+2vz3+437UPue+5H88v0h/7P/SQBTALz/KP86/or9Tf2U/cf+NwBZATUCOAPVA30DtgJIAvMBlAHNALD/q/6x/fj8+fuY+wf85PyQ/Rr+l/7t/lX/pv8pABIBUgIMA2gDYAO+AuMBwQCc/w3/vv7l/pn/hwB1AXACTwOKA2sDzQIwAmsBowBdAA8Ax/+H/9j+QP62/dX8cfze/Hb9EP7Z/uz/vQATAVQBBwFWAEX/F/5u/K36X/lr+Sb7svwz/r8A0QOJBfgFwQVTBS0FYwSyAg8Br//6/U787/rE+a75yvrJ+0v8Fv2E/pD/QgCHAd4CLQTWBN8E/wSMBIMDRAL5ANX/Zf9//zD/V/8RAOAAtQHHAccBAAKkAckAUwA4AP7/CQAHAMz/ev8V/9D+oP6X/qD+QP82ALgAEgE0AcAA0f+5/gn9PPvH+Yj4afc/90X5UPx7/3oCOwXxB2sJBwkRCHQGbQSmAkgAl/2/+0n6sPg/+Dr49Pjz+m785P3S/4UBjgKhAyYFAQZbBjkGowUABcMD2AE6AHr/g/6X/ZT96f0Q/2MADQHYAVwCBgLbAV4BLQAnAJAAhgAnAZsBEwHdAIEAef8O/+v+5f6D//L/7v8MAI7/EP6n/EX7ivlL+H33BffQ9636Nf42AT0EcQcLCtIKrwl1B2YFOQMtAH79d/t5+Tj4m/cS93X3jflF/L/+FgEhA6sEcwURBfgEyAUNBu8FPgURBLEClwAi/nL8FPwj/Fn9Pf///zsBmgLSAuUCFAPVAr0CkgLJAZUBtAF+AZcBTQFpAMf/+/7Y/SP95fwk/R/+0f4k/6f/tP/2/sr96vsw+kr5oPg1+Nn5pv3eAGIDEAb7B64IkgceBdgC5gDs/tj8G/t0+fT4HPlL+Yn6kfwk/1QBngJmA7sDbgPXAuMCfwOcA7QDuwMYA9QBLQCl/qT9gv2n/ZX+cwDTAfsCvQO1A4IDQwPEAhkCpwEmAQUBJQF5AEAAeQAEAH3/7f4P/pX93v1W/v7+HwDUAFUBfgGGAD3/c/2G+9f5xfhx+Ej4Y/mG/Ov/+wETBAwGeQZxBZgDvAGkAL3/Wv5r/aj8JPwR/LT7t/u0/GP+AwAWAXkBfgF+ARQBxACTAYoCZAN3BMUE/AOPAt4AO/+C/kz+Qf8WAYoCcANeAxEDogLXAc0AlwDnAPoAnAGGAeEAwwAAANX+eP6r/pz+6f7I/54AZwGtAY0BogFSAS0AQv76+xP6/fes9mv2e/YH+V/9uACcAswE9wZQB+MFvgPLAhUC0gB1/4n9evxU/D/7QvqM+qn7h/1o/5v/mP+1ANUAhABdAdECaATgBS4GlwV/BHsCJwCN/vj9Cf45/wwBPQKRAmECzgHsAGUAOwC1AEcBdwHEAbUBNgGyADIAy/9LALQA5AB/AZ0BlgGfATYBjQAZAEH/4f0L/Or5CfjS9o32d/bP9977EgCEAtsE/QZ2B+8GfQXPA/ECkgHY/3z+dvz1+pj62/lL+Uj6BvzU/S//z/+7ALIBIALxAjkEeQXWBhYHrgUcBGcC6//C/Z/8X/xQ/Qj/ggC8AV4CXQL3AS4BzADlAA0BRQEFAnICSgIeAqEBKQHnAIgAKgAsAD4AWACjAIMAQwAdAIX/SP4d/QT8YfoX+T/4RfgC+en5L/w4/84B1QNqBc8FJQUXBKUCcAFKAFv/wv6t/W78tPvz+kj6V/pk+yD9j/7F/+AAtgGIAn0DJwTxBP8F7gUKBZEDZAGX/zP+5/yu/Bn+j//vACECOwImAtYB1wCGAAUBawEiAjYDlgNIA+gCtAF0AOj/o/+T/8f/CwBwAIgAuv8e/wz/v/5m/jL+df15/Cv7tvkW+Qn5lvkq/Hj/HwFwAugD8AMJAxACHQEwAWMBtgBbAMz/e/43/cP7jfrM+s37wvzj/ej+QAB0Ab0BbgKrA4wE+ASmBK0DzwKBAZD/jv5J/kb+Mv9XAEwBRgItAkkB2QCZAJYAMQGpASMC9QKtAqEBIQF6AOn/NQBFAA4ARgBaAEoAOADb/wIA1gC+AMT/3v6S/cv7+vl5+GD46vhT+Ub7D/55/zYAaQH3AT8C1wI4A+cDIQReA3UCEAEB/xn9mvtd+g/6s/pO+zD8Wv2u/i0AZAGsAkIEmQW8Be0EKgRaAx8CnAB0/xr/w/96AGUAiADMAI0AqACQAAkAfQAHAegATAGZAWUBFQIwAjcBSgGGARIB3QC7AKYAMQEcAXUAigBKAG7/oP5U/an72/oj+j/5Svlq+SP6Mvyk/Zn+HQAPAbYB0wI7A4IDhwR7BN4DDgPuABX/k/1Z+9352vn4+Zb6BPxJ/er+sAARApcDEAXBBb4FDgX7A/ECogEqAGT/Vv+t/4kAIQFSAXEBKwG+AJ8ALwDF/1YAqgC9ACsBUgG8AYICbwIxAq0CkgIiAt8BMAHRAJQAvP8w/8f+8/1b/Y/8b/sf+/T6cfqb+t76vftK/bf97/1I/y0AVAABAZQBsALoA5AD8AJ2AiUBjP/O/ef7X/vS+9H7OPwt/TX+xP8DAbcB5QIkBKkEbwSMA4YC4AELAe7/l/8hAM0AewHMAewBPwLcAeAAaABQACEAVwCqAOcAdQGmAc8BVAI8AiwChgIwAm8BBgGtAC0Aj//m/tv+Bv/D/nX+E/6X/Uj92Pxg/Cr8Bvxf/LH8Kvwb/Ov8Vv3c/dr+CwDTARoDIAMsAx8DQwIkAb3/e/4w/tn9Qv01/VX92/3r/oj/HwBaAVYCtwKQAugBmwGNASIBGwGUAQsCfwLMAgoDJgOpAvMBiQH2AGIADwDa/wsAbgC0AA4BgwHKAe8BEgLzAcQBrAFuAQMBnwAqAMP/k/9J///+1/6Y/kH+vf0o/af8DPyV+3/7Ovvy+kD7yPtJ/Bb9Jv5+/xYBHwKcAu0C2wJfApQBawBn/wb/kP4e/hz+KP6C/jL/h//I/4AAFQFlAYEBGAH7AG0BdQFOAcYBcAL9AnUDiwN3A0YDowLpAUcBgQAfACMAIwBfAMQADwGPAfEB2QHTAb8BhQGrAbYBXwE0AdoAJgC3/0v/uP5q/jr+Ev4A/oz9zvxA/L/7TPsA+936LvvB+yX8pvyM/bX+5P+zABoBaAGWAYoBFgEiAHL/X/83//P++f5E/87/TQBvAJ4AAgEwARwB6gCtAKgAyQC7ALUALQH+AaQCFQN9A9oD1QM6A4ICDAKTARIB1QDtAEcBowHJAeAB4QHJAcwBwgGMAX8BgQEmAY8A+f98/z3/9P6Q/nv+jf5t/h/+j/37/LH8UfzR+5H7h/u6++L70fsm/AD9uv1Y/gf/o/89AK8AuwB3ACMA8P/m/73/mP/Q/1AAsACwAJ4AyADNAHkAJgDW/6X/zf8AAB4AdAARAdcBpwJBA78DKwQwBM4DSgPMAmAC7gGXAZgBsAHFAfcB8QG3AbQBuQGXAXcBXAFDASoB1wBNAND/bf8G/5b+PP4H/tn9lP0t/a38OPz0+8f7nvuf+9L7+/sb/Gj83/xt/fn9ff4d/8r/RQCKAKEAiwCFAIEAPgARADsAeQCLAGoASgBPACYAyP+a/47/jf+5//D/PgDWAIYBJQLGAmID7gM+BBgEuQNnAwQDcwLkAawBtAGWAX0BnwGvAaQBkAFaASoBJwEwASoB9QC1AJIAVgDt/37/C/+y/lT+s/0I/X/8DPzC+5f7bPtu+6z7+/sw/GT80vxt/QL+gf7n/lT/2f8oACcAHAAiACMABgDK/8P/EQBWAFoAQwA8ADgACgCr/1H/PP91/77/+f9cAP4AwwFzAtkCRwPoAz4EIwTrA54DNQOsAgcCrgGoAYkBYwFjAWcBawFfASYB+wAOATYBQAEdAfUA4QCtACgAgf/0/nL+x/3z/ED82fuZ+1b7OPto+8L7JvyF/Nj8Sf3U/Tb+ff7X/i//iP/P/9L/xv/M/7L/dv80/w7/QP+s/+n/CAAzAEwAPQAKALf/i/+z//P/LgB0AL8ANgHGASMCcwIEA6MDAgQIBMwDlANiA/oCeQI6Ai4CIgL7Aa0BYgEwAe0AoAB2AIUAyAAFAfMAvACmAIIAFwBs/6b+Av5n/ZX81vtn+0j7c/uy+/j7c/wN/X39u/0B/nD+5/4o/0b/iv/e//j/zv+K/1z/MP/1/sH+sP7k/kP/iP+h/7r/5v/0/+T/2//t/y4AegCjAM8ACQFHAZUB3gEuAqQCDAM+A1UDTQMcA/IC0AKyAp0CeAJJAh4C0wFYAdUAZgAhAA0AEwAzAGQAiQCXAIYAQgDR/z7/hv7O/Sb9k/w8/B78GvxM/KL87vxY/cj9C/5Z/r3+FP9r/6j/zv8fAFwAOgD5/6v/Tv8I/77+ff6F/rD+3P4O/yv/S/+L/7r/2v8MAEcAhwDWABoBUAGMAckBCQJPAocCtQLqAhEDFgMDA+QCuwKXAnACPQIUAt0BgQEeAbAAQQD8/8v/uv/x/y4AUQBmAEkA/v+K/9z+Mf6j/R79w/yV/Ir8sfz0/ED9qP0T/on+Ef9x/7j/DABKAGcAcQBeAFIAQwDr/3P/Dv+i/kH++/3A/dX9Lv5z/rP+//5C/6j/AAAyAJ8ALAGEAdABAAIcAlkCfAJ/AqsCzgLJAroCiQJSAkACHQLuAdoBxQGwAYcBKwHSAJUAVwAdAOf/0//0/wsA9P/F/4b/Sv/g/kH+xf1l/Sn9Kf0c/TP9qP0S/nP+7v5G/8X/YQCeAM4AGwE6AVMBOAG4AFsAEAB9/+3+Z/7v/c/9rv1n/XH9tP0H/pH+Cf9o//7/fgC5AAEBPQF3AeQBNgJsAsAC6QLjAtMCmgJbAikC1QGVAYIBYwFIASYB8QDjAM4AkwBpADwAGwANAMj/gP95/2f/Rv8X/6b+Pv4K/q39U/1M/Wj9tv02/n3+0P5p/9z/PgC1APUASwG2AbMBnwGeAU0B7QB0AK//If/J/kT+4/20/Yz9o/3O/dT9Bv59/vn+df/f/ygAgADtADEBcgHRASwCkgLsAgEDDAMMA8kCfwI6AtQBhAE+AecAugCXAEgABgDe/8z/1f/F/5P/Vf8N/9T+j/4t/vj9x/1+/X/9e/1l/cX9I/5V/u3+bv/M/4oA9wAsAcoBHQI/ApsCZAIGAhECowENAbAAyv8C/8D+Jv7A/dH9k/2S/e793/0C/oX+vP44/+P/EQBkAO0AJAGPAfoB5gEoApoCmAK2As0CfQJ1AmUC2AF3ASsBqQBLAOD/a/9Z/z//+/7e/rr+k/5v/hn+pP0y/fn83vym/LP83/zq/Hr9Tv7i/qz/SwCgAGoBBAI2AqYCwQK6AiQDAgOWApMCDAJRAewADABC/xT/hf71/dr9nv3G/Vz+cf6F/ub+IP+e/y0ALQBMAMEAIQHKAXICbQJMAm0CdQKhAsICOgLBAc4BvwG1AZwBAwGKAEMAqP8+/w3/p/5m/h/+iv0q/f38efzj+5b7U/s9+5f7zvsH/Pr8CP7+/jgACAGIAWEC7wJHA7cDpQN/A5EDQAPYAogCBAJ2AbYAxf8b/5T+G/7d/a/9rv0B/nv+Dv+G/5v/r/8fAL0AHgEMAe4AJwGYARICUQJiAokCiQJmAmUCUQIuAgkClQEvAUcBZQEmAbIAJgCs/1n/5P47/rD9Lv2o/DT8h/vy+sv6YPpF+vn6HPsj+2L8wf3g/ikA5ACcAfMCzQMEBCoEHwRLBHoE3AMCA4UC9AFAAVYAKv93/jv+yv1k/T79Pv3C/Wj+sv5T/0MAhQCTABYBowEnAlsC9QHFARECYQKsApsCSwJEAiQC7wESAhIC1QGOARABzgDmAMcAbwDv/zD/tP55/tv9/fxU/JT7uvpZ+jT6+PlK+tT68fqN+9T8Av5T/48AUgE9AiIDmgMUBEkEJQQtBO0DcQMhA3sCmAG5AIj/f/7s/Wn9QP1T/T/9h/01/sH+Uf+///b/hQA3AYEBvwEEAiICbQKsApECfAJ+AmACagJ6AjACvQGcAeQBGgLDAUMBLwFZAVAB4AANADj/6P6r/vb9Ef1T/ID7qfoY+rD5aPnB+ZX65voc+1H8N/7b/wEBoAEmAiUDQAS1BGkE3QOmA7MDcwPCAggCSwFTAFL/bP59/cf8b/w2/F/8Gv3z/aH+T//b/3YAJAGPAbIBxQHcAU0CvQLLAi4DfAMIA8oC8gI4A7cDLQO+AXIB5wEMAuwBBQESADYAeAATAFj/Ov54/Tn9tfwG/Bj7Efrn+f35vvlg+g37GvtS/En+Xv9qALsBrwLNA3cEFQQABFUEHQR2A6YC3gGcAUEBLwAB/y7+zf11/cz8Rvxa/N78hf0w/tn+i/8dAI4ANAHwARoCugGZAdwBMAI+AnECEAP+AkgCUwLiAmIDkQNtAjsBugGtAt8CDwKHABAAkAAmABb/9/2x/A786Psi+wX6l/li+Un5UPph+4X7fvxy/sj/2gAbAkQDeAQsBeQEiwSNBGkE6APDAnsB/wCnALr/zv7X/RD98Pym/Cn8Vfzd/In9ZP4Q/7r/RQCzAFMBrgHIASUCWAJJAlECUgJCAk8ClAJYAoMBVAH0AawCyALBAdUASQFaAukCUQIdAcMA2ABPAFP/B/5e/EX7ofqR+ez42/g3+E/48vkI++f7x/1q/78AkgIUBB4FAgYzBtEFNgXEBJkE0QNSAh0BWQDS/yr/3v3F/EP8zvu0+/X7HvyV/Gn9a/7M/8wA3QDgAJMBkgLRAkUC4AHWARwCpgKKAsMB0wGhAm0CTAHhAHcBWgKZApYBvgByAXkCjQK4AYYA6//a/0f/2/0H/H/6fvmg+A/41/e89434gPro++D8l/6qAJ4CVQRfBTgG/QbuBnIGtgXBBBAEJQOOAToAdv8H/3H+PP0T/H37Qfu8+5z84Pww/ST+G/8fADkBuwH8AWYCfAJxArUCxgKNAi4CrgF7AaABiQFlAUcBDQEtAWMBOwEpAWcBlwHQAbABQwEGAZcAuf/L/ob9DPwt+1X66PjU94X3m/fC+Gf6Efvd+zL+0ACUAt0DFwVYBj8HagfxBhwGSAVuBFADFwLoAKz/xf4N/h/9gvzx+yz7QPs2/Lr8Qf0e/tn+1P/6ALcBKgJiAkwCfgLCAu8CHQOtAv8B6wH6AdABnQECAb0AKwEWAaQAzgBVAcUB5wF/AToBHQGrANn/lP7c/IT7wvq7+TL40/Zm9mr3V/m++pv70/zo/s4BdwS7BWoGMwekB7sHYQdLBgsFpwPeAbcAHwAo/yr+Iv0C/M37CPz9+w78JPy//DD+Uf/+/5wAGQHPAU4CRgI0Ag0CFAJ4AlACzQGbAaQB4AHpAXAB+QDBAOwAmwG+ASIB5QA8AYUBygHEASoBRQBM/wv+k/xg+0H6m/i/9pH1qPWo9+35fPrf+hT9KgBKA8sFwgYlByYIBAnFCKQHJwaVBAwDogFFABb/+v3T/OL7ZPss+1z75/td/Mj8wf05/14A1gACAbUB6gJnA8oCWQJZAooC5wKvAuQBXwHtAPsAwAFoAQ8Aof9zALYBGAL6AFoAVAFbApIC8QG6AB4A7P+c/q78I/uJ+e73ffZD9ZP1ofcj+Xf5jPpr/RcB9QOXBfIGOQguCdEJcQklCL0G3wTDAj4B4/+T/hL9Zvuu+pP6XPq5+gf7dPso/Zr+Ov90AH4BCwITA54DawMbA70C6AJvA9wCtAEoAVwBiAEEAUAADABeAK0AjwBIANEAnAHIAbQB4QEAAv4BigF2ACT/zP2W/Cn77fiq9nn1nPVK93r4Mvge+fH7QP+3ArQEcQVoB3gJOQoICpkIGwdHBooEagKgALL+Yv1f/Bj7Qvq7+cb5p/p++3r8xv26/vr/aQFPAg0DiQOzA9wDkgMOAzgDQAOpAtgB0QA/AKAA8AB3AI//FP/X/+oANwE/AVMBYwHWAVoCWwLiAQkB2f9V/tj8wPtb+g/4FPaj9bX2yfcR+On4OPvM/XUAPAMtBe8GwQifCeMJ0AmaCAsHdwVyA4UBkP+G/SD8F/sV+lP5C/ma+dX61PvK/Cz+6v+NAU4CogJ4A3EEaQS2AzEDawPEA/4CdgG8AOcA4gBAAFr/KP+w/w0A+/8DALAArgEWAgACIQJ1AogCDQIRAcX/Lf6g/DT7f/lb96X15/Vs99H3vPde+WH8b//DAYUDtAUlCNgJWgrJCfEIVgguB/QEiQK8ACL/SP2S+wr6y/iA+O74YPkP+mL7MP0T/4cAcgFqArAD3gQkBWQEuAPHA+EDbANQAvcAeAA5AIb/M/9l/7H/1f+F/5X/oACjATACFAKvAeUBhAKxAu4BQABi/gT9FPzQ+qb4S/Z09ej2TPgJ+Hz4/vrA/T0AuwIwBY4HXQlCCkcKuAk7CYYIsgYoBKkBtv9A/o38gvra+Mf31Peg+DT5G/q5+7v97/9aAQICMAOsBKkFpQWoBAQEPAT+A/QCjAEMAHv/vv9W/3r+//1h/u3/xAAPACoAiQG2AiMDtQI6AiwC9wFuAQcAzv0I/I76FPmB98j18vXt90n49PfO+Zz8O//PARQEZgZmCHYJYAqOCqQJtwheB3QFtgN/AQj/Kv1W+4z5I/hM94v3Xvgl+Wv6LPyI/vsAJwKwAgsEigUgBqEF2QTHBLMEtQNUAiEBKwCI/+f+Tv49/nT+l/4F/9f/lgA6AbIB/gGEAvkC/QKkAuQBzQCW/+/9SvyP+qf4FvdV9lD3ufjd91L3I/pv/QUASQK2A9gFxQgxCncK+gnzCDgIMwduBYgDRwHo/vL89fpB+ev3OfdA98X3t/h2+nH8bv5qAOsBcgP4BMYF3wWlBWcFXQW+BEwD7gHdAPD/VP/j/mv+Fv4V/mr+Of8rAKoAJQF5AaEBSQIhAwADDwLtAAQApP92/nj8S/qE+Pb2CPeZ+Y76x/cQ99b6nP64AXADhwNCBa8IjQqgCkkJpwcHB00GuQTnApkAEf4W/Hv6SvlK+JP3Gvf09+T5f/vQ/OP+zgAWAuADHgVGBUUFdwWlBVEFtQP4AXQB5wC4/8n+Df4P/n/+iP7c/qr/PgDtAJMB0gFaAsMCxwJ2AgwCcAFsADL/4f2J/B77e/n695f2CfdQ+tf7Qfl6+Dr88AA6BJkEVARyBmoJWwpNCYAHYgb+BR0FYgMVATr/k/1++wb6Sflt+N33HviJ+VX7fPzo/UkAKwJcAwgEIgT7BNUFbQWBBDEDzgHLAbIBLADk/lL+Vv7n/i3/Qv+x/ygAlAAaAbQBPAI4ArQBeQFXARMBgQBB/wz+VP2M/Kv7hPoH+Tz4tvdi+dn9iv53+lf65f9OBQIHFQXZAx4G8ggdCcYGFwQYA+gCdAJYATz/zfxf+w/7C/s3+kH5d/mM+qD8eP75/u//IAIvAycDUAObAx0EwwNAAmgBbQHQAFQAZQAJAM7/CACFAIcB4QEpAfwAmQEtAk0C/gHKAUsBKAC5/8v/Vf82/qL8wPvP+7H7Y/tC+6z6Kvrb+RH7uv/7AoD/9Puv/+4F7wdQBT8CmgKOBdoGvgRxAdX/HQD8AO8A4v56/DX88PwD/dT8V/zx+5H8a/5VANEAPwB4ADIB0wFyAsUBTgDn/zQA2QBYAaEAHgAJATQCwgLNAs4CJAPvAksCGwL9AbYBdQGPAE3/qv6K/oL+u/0G/C77aPte+5v7QfyW/Br9NP7n/jT/qv+sAMYCaASmAhcAlAFOBOMDtgEyAAIAFQFfAb7/3v1g/eH+oQDJ/8P9J/7U/zoA///4/8T/sP/n/ygAegADANb+lv5e/7n/ZP+Z/gj+3P6GAI4BgQE/AdgBigMmBYAFUwQZAzIDfgMYA1oCNQH4/zX/qP5t/hH++vyf+8v68Pp4+437xPu8/PX9/v6X/+n/sQCvAVMCUQOZA8kBdABCAfYBdgH8/3T+lP6e/7D/w/7W/SX+3f9FAewAc/94/zQB/AF/AccAgv/B/lX/xf99/1j+2fy1/Nv96v5e/7b+Gf5T/4oBOQOvAx4DCQM9BJ8FJgbYBGECiAG7AgYDQAEN/x7+hf6T/m39MPzO++b7Vfzn/CP9Ev0S/g4AGwFdAewBeQIsA98DkwMRA3ACUgGDAMD/lP7Y/eH86fst/IH8bfzP/IP9Pf5Z/4kAhQHWAe0BnQJ6A7ADAgO7AZsAZQDXAHYA2f6A/X39C/4o/hP+Cf5W/sz+pv9bAIIAKAC4/8//jv9//u/70fx6AzIDjPg2990BAAbyAfb9b/2/ASwHAAebAt8A8gMdBxsH4wS4ArICZAOfAkUBx//i/I77pPxm/dz7n/h1+Pz7vv2F/Ab8zvxo/6oCqwNQAzkDvwPcBTUHJAV/Ao4BwgGSAez/Of2t+8b7bPyB/Nz70fpF+lD8GP9b/7P+cv/OAO4CRwQWA+4BvwIQBCwEgQKKACsAwgDHADf/df07/cL9xv1S/YH8Yfxt/Tn+hP6G/pr+nP85AS8CKAL6ATECSwJZAqQCFwLEAFAASgC2/4D/vP+F/yz/Q//Y/8UAZAETAZEAsABRAbgBKgEQAEL/Sf+W/wv/4P1M/Y79QP7e/vj+2f7Q/jP/RwBgAVgBlQBGAPYA6AGxAW0Arv/w/8MA9gCE/z/+fP7u/uf+B/8L/6b+l/6d/64A4wCPAHoAZQF6AkcCqQFdAsoCmAHpANUBRgHB/kj+3//P/3f+Ff7+/Y/+JwAdAL3+kP+4ASkCagHyAB8BwgG8AcIA9/+S//f+sP7A/lT+Z/0K/YH9Of7D/kT/+v9XACcAaACcAQAC9ABQAMYAVQA0/6D/sQAOAML+n/4r/5//tf9w/+P+6/6y/48AyABzAAcASADVANIAlwBcAE8AHgC8//f/lgDT/3n+Y/4U/07/9v6E/kX+1/42APEAPwADAC4BrAIDAwkCfgGMAjoDKALKAEEAeACcAN7/k/4S/pv+If+x/gH+7/2B/mL/+v/n/9T/AQA0ANsAWwEPAYcAAgCh/xgAlQA/AIj/6f6f/jL/RgBgAEP/wf6S/4AABAGYAJP/t//VACwB9ACiABcA/f9zAJgA9/8J/6T+LP+O//n+Nf6q/tz/QgDK/3n/AgAiAbEB8wAhAGcAVwFPASMAnf9WAKcAr/+i/q3+iP/i/yH/SP6K/o3/nwAkAZEA6/++AG4CFAPfATcAKQBWAZkBNwCf/i/+qv4D/97+SP7z/TH+nP5K/w0A+P+g/0UAHwFMARwBEwElAQ8BwgCdAKMASACP/1L/Zv8W/5v+Jv7o/S7+pf7Y/pv+TP67/rn/agCZAD4AAQAIAToCFwLdAJEA9wE0AssA2QCVAaMAIQB4AIUApwBtANP/tf9uAFABywCV/4j/CQAZAAb/k/1G/X39yvy2+1z6mfsBADf+YPfJ+poDEwI5/Y/+wgJTBZQFFASaAvQDsAZPBrAD6gIXAyADQgIBAS0ADf9j/T79R/4C/oL8hfqT+4L+Rv4i/PH7S/1p/6kAvv9f/38AqAE8Ak0CfAEpAcsBsQKGAlEBMAGLAZsBawF3AbABKgFoAPsA5wEXAfT/zv/5/9r/SP9W/hb+lf6N/sr9Zf3f/Tz+o/6K/g/+if6V/7P/cf8YAJMAgQBZAHoApgB2ABcAFgAPALD/iv/J/ycA/v+//7f/0P/z/yMA9/+E/4j/1v8VAKj/E/+M/2wAeAAOALj/4f+lAOYANwCV//v/owB8APn/CwCBAOgACQGXAMYAkQGGAc8AoQDuALIAAwBl/xX//P4T/7T+/v1J/i7//f5e/kH/lACHAPX/mwCzAb0B/ACsABoBjwF1AbUAFgAPAC4AIgBjACIA8P7r/r0AGAFQ/wb/PgBfABcAcADC/yf/OABwAED/bv+JAOj/4f5Y/y8A9P8b/7L+Cv+6//H/zP+Q/7L/SQANAUIBAwEaAasBAAJwAQQBBwETASgA+/4T/77/w/4o/eb9J/+I/u397P52/3z/WQAQAeMAJAHFAYYBKgE3ASsBBQGnAN//f/+V/3L/zv5K/t3+f//e/iT+0/7i/+P/Rf8W/y3/lf89ANT/4f5k/64AzABMABgAQQDoAEIBvwAyABYAUwDnADgBwgAfAGMAJAFDARAByQBRAF0A0gCTABcAmv9G/8H/EgBQ/x//uf92/23/JwBRACcAawDKADYB3gEtAo8BOAHvAS8CXgFzAOX//f/v/wf/Ev5g/cn8avz6+9D6xPsK/sL77ver+/gANf6o+5b+ZAGwAdQBDQI7AiQE9QXaBKAD8QR0BaEE8wOjA4oC0QAuAHIAcQD//mP9mvz1/Tf/dv30+7T8gP3q/UD+wf3f/Z7+fP9WAHAA2v8nAF0BBAJLAuEB3gF/AusC0QKGApECJQKSAQQCWQIOATgAaQBVABgAqf/I/qf+T/8n/6L+LP46/o/+kv4f/o796f1S/uL9XP2p/eX9pf3y/T7+bf4M/2D/XP/C/0kAogC+ALQA5wAFAUABXwENAaoAFwFwAfkAtQDTADcBTAHsAGoAbADtAM8AzP+U/1YAjQBcADUA9/8+AA0BCgHbAN0ArQDMABoBBwFXAMD/8f9RAPP/Zv9+/7b/s//O/7j/vf9XAGIAhv8c/yIAiwBN/6D+6f7g/oj+QP6k/Vb9e/05/WT8tvzf/iX/9PvZ+3gAcQGS/tT+BAG2AXoCRgOIAjMCKQQfBeoDYANZA+gCLAMeA0QBAgAcAOH/Nv+8/i3+ev1//Y3++/6u/UX9j/5a/17/Yv+T/yoA8wBIARsBvgArAYEBNAFqAZkBxgCLABICaQISAesAKQI2AkgBqwFTAoABwgAKAckAQwAAACH/T/6d/sX+3f34/P38MP0a/Tn93fwj/H78df1g/br83Px3/a39GP5r/jn+Vv4V/8//o/9r/4z/BwCtAIUA5/9TAEIBdQFoAYoBogHoAeMCBAPzAWEBGwK4AssBdQDn/0EATQDw/6n/Af+I/nH/XACt/3j/RwD3AF0B4AHjAacBaQJbAxcDPQJBAk0CYQJDAnMBxgDKANcAVADf/4X/Wv8N/wP/BP+v/mv+Wv6O/pz+f/54/pj+o/6v/mX+4v0T/eL8XP6K/nj7s/nQ/Oz+lfwu+4T9nP95/67/+wClAQYCUQPNA5sDJgScBPwDeAMGBNkDVQIrAVkBiwHkAIH/mv41/6D/ov6Z/fb9iP6N/jb+LP7P/kH/7P78/tr/1P89/6j/7ABWAbgAjAAhAcoBVwJ/Au0BdwEdAhYDzwKrARgBgwEOArABYwBn/6z/RwC9/1L+vP3t/eT9cP3n/Kb8g/xc/HT8w/z1/Pr8m/yg/L79qv4q/l/9K/6f/+H/EP+1/oT/gQCrAAQAPf+2/8MAwwC8/0r/SgC2AMb/6/9IAS4BmABbAZoC7QLeAlMD3QNnBMYE6ASfBJsEDwXvBAUEXgORAyID0wHfAK4AVABL/1z+oP2t/fX9V/1N/HX8pP0Q/nT9gP2f/jj/RP+L/0MAfwDPAIUBpgFqAXUBzQGCAaoAfQBVAEn/Sf6S/SD9+fwD/cb72vlm+iX89vvP+pP79/xJ/Uj+p/+E/6j/bwGJAm4CiQLvArMC2wKBAykD8gFMAfMBSAKoAaIAFwBhAMwAYQB0/1z/8P/w/33/z/9EAPX/vP+iAAYBKAAcADYBxwFZAU0B0QH4AesBQQJxAvEBnQElAnsCkAGPALEAEgGYAAoAt/8h/x3/cP/5/i7+5/3D/Zr9t/2G/df8lfzf/PT8+fws/Zz8w/td/I/93fw5+2f7ifwn/df8g/vd+vf8YP9V/hv9xf5LAaMCkgKcArkDZAWkBrYGHwaiBncHYQcRB4cGoQXxBPoEwgS9A5QCCALfAUwBjQDV/0D/M/9O/93+JP4R/sr+Hv9G/t79tf5R/x7/Zf+s/0j/bv8WAIIAIQCB/7j/iQCUABUA4/9nAMEAJgC3/zIAigCB/5P+b/5C/nf9iPwT/EH7b/o++l36mPni+Nj50/o++1j7wfsO/a7+0P8sADAAEwECA9IDXQPfAkUDQQRdBJADtwKLAvAC9QJ/AuUBlwGRAcgBQwI1ApgBiAGlAnsDIwNfAkICDQOxA0ADBQKRAS4CjQLJAb0AaQBZAIQAZQDX/zb/Cf+e/93/Rv9d/lD+D/9J/8D+LP4o/mH+j/51/tf9Gf0V/Xr9Av1e/JX7QfuR+7T7G/sL+k36I/va+wr8uPtU/Pr9Sv93/6X/1gAbArcCgwM7BCsEgwRmBdcFUQWuBIgEaAScBF0EUAODArUCDQNcAmoBWQHbAd8BgwGBAY8BqgG4AYABBQH/AJsBngH0AJYAyADiAOQAVwCz/9v/bwBkAJX/q/9FABwAf/+H/+z/g//p/gf/Tv+g/tv90P2r/fX8B/xw+8T6nPpm+lL5Zvib+Fn5PPkO+Rn5J/rA+2H8g/xB/RP/fwDzABoBBgIXA6MDBwTVA8sD8QNDBG0E6AMmAzYD+wPQAxEDpQLYAnEDygN3AwQDKgPHA0IE8gNaAwcD9QJAAwcD6QHGALsASAHiAL3/A/9Z/+X/5/9V/8r++v7i/50A7/8j/6b/egCCANz/Qf/W/q3+m/4J/ub8yvts+x/7tfo1+i35dPgm+RH6WPnx+O/54vqj+478U/29/d/+YwBGAU4BtQHSAjUDdwOiA2ADOQOSA7UDGQPNAu0CEAO+ArcC7QKcAqYCFgNBA/ICMANsA1MDdANkA1MDNQMRA7UCfwJ0Ai8CnQE8AUIB4wCHAHMAYgA0AAsAJQAUANj/3//6/+H/d/8R/+T+w/5j/pb9z/yM/Eb8m/vl+jr6tfmj+cH5VPlo+DD4EfmM+UX5Jvm6+Z36ZPs2/O/8p/2V/t//2gBiAQgClAIRA8EDCQScA4oDIwRLBOsDswPJA9oD4gMMBO0DnQOGA8MD6wOdA1EDPANMA24DXgPbAokCpAKaAksCxQGAAaEBdQEeARUB6ADmAAEB4QCwAMIA+wDMAJsAcABPACsA5P+W/97+YP45/n/9f/z8+3X7fPq0+S754Phy+Mj3gvfE9xf4Gfhh+Bb54vm5+pL7i/yo/Z/+nP+wAIoBaAL1Ak8D7QNwBHAEFAQ0BGAEGQS2A7QDnwMkAzIDWQMhA9wC8wI7AyMDCAM8A1kDXANvA2kDZQNyAygDtgKyArkCRQKTAXUBhgFZASoBuwCOAKEAogB+AEIAAQC//8b/xf9O/4/+Mv4X/qL95vwY/Jn7M/uZ+uX5Svn4+J74Vvgi+O73Hvhs+K/4D/m/+Xb6IvsK/BH9/v2l/mD/UQBNAc8B8wF7Aj4DwwO6A80DPwS3BNsErQS4BPUEFAXRBMgEqwRdBE8EZwRUBNgDtQPZA9IDjQNJA/gCsgLNAqYCKALWAc0B6AHeAbgBcgFiAZsBsAFhAe0A8AABAc8AbgD7/6f/cv8t/5P+2P07/bD8Mvxx+4L65/lo+cL4Rfgq+Nn3fPd499v3I/gz+Lf4Nvnx+fn66Puk/GT91v7t/5YAZQEqAvUCigMCBDcEgQTABMoEwATLBMsEqQSOBGEEfgRxBBME8wMyBEMEEgQXBCYEVgRjBB0E3APVA+YDbQPsArsCsQKDAvIBmQGJAZABRAG+AHgAewCEABIAmv9k/2H/Uf/G/kf+D/7m/Xr94fxt/Oj7bvsa+5P68/lw+Uf5BPmh+Gj4Tvhi+Ib40Pj2+GP5LPr8+pX7Qfxe/VX+Gf/r/80AkgEUArwCeAPeAxgEYwTXBBsFJQVUBWEFZgWFBZsFjwVPBWUFgwVqBUAFJwUaBe0E6ASdBDEE9AO1A3ADEQPGAmcCJAIfAu0BlAFRAVABMwHfALQAfABCABQAxf9e//D+wf5e/pb9BP2v/DX8e/vL+kj68vmf+Sj5lPhh+IP4cPgg+AL4a/ij+P74dPm/+XT6dPtf/Nz8rf3B/qj/ZQABAcoBQwLUAnQDwAPOAxIEjASQBHAEmwTxBM0ErQTqBBYFAwXvBCoFJAUhBUEFMwUHBd8E/ATIBF8EJAT5A7gDOQP4ArsCVgLaAZ0BeAEAAagAcgBNAOb/mP+T/0X/7f6j/mr+EP6v/Vz93/xM/OL7tfv6+kD6APrT+Ub5g/iD+Fz4Cvjm9wP4J/gq+N74VfnN+Yz6iPtT/BX9av4v//X/1ADYAaIC6wKjAywEcQS/BBgFLAUnBXwFkQWHBYgFkAWhBYQFogWNBUkFQAVTBTYFygS6BJsETQQEBNkDnQMSA9kCrwJ6Ag0CnQF1AVMBRAG3AEwAJQAkABgAZv/i/rz+3f58/qr9J/3t/Mn8NPyj+xf7mPpZ+ir6wfkL+bH4vPjb+Jv4Jfgj+KD4IPkn+Tj55PnD+o77JPzE/LT9l/6H/0IA+wCRARYC0AJdA98D4QMyBL0EDwU+BSAFcgW5BdcFtQXDBQIG1wXEBawFugXBBWYFGQUWBR4FxAQxBNwD3AOcA/oCigJJAv8BvgFQAc8AhwBjAAwAlv9X/xv/t/5n/kn+8/11/TP99/yZ/DD8xvtX+/b6n/oo+rD5XfkY+aP4Xfh4+D344Pcj+LP41vj4+LP5efo5+xX88vzP/bj+3P/AAGcBJwLuApID5ANhBMsE6gQmBUQFaAV6BZAFjwVsBXkFdgWUBWoFKgU1BTMFMgX4BMYEqQSABG0EKgTeA5sDYgMhA7ICcQJCAsEBQAEiAfEAeQALALb/nf91//r+l/5c/jf+AP6S/fz8oPyN/CT8cvvr+p76QvrS+V/59fiI+DP4R/hC+Mn3e/cF+K/4yfjj+Gz5dPqC+0v80vyk/f3+OADqAFMBNgJSA+oDRwTPBDYFfAXoBVIGWgYxBlAGqwasBkQGNgYxBgcG+gXOBXAFMwU0BQYFtQRJBAME3AOLAyoDqQJRAhYCxAFcAewAmQByAEMA2v+X/3v/Uf8P/97+sP5B/hH+7/1q/eb8i/xI/Mv7Gvu4+mP65vl4+Rj5ofhO+E34GPi897b3K/iU+J/4EPne+cj6lvtU/Df9Mf5X/z4A9gCqAYcCVQPEAyAEkwQQBTsFTAVyBasFwgWpBbIFrgWoBcQFxQWOBXAFkAWcBXgFRQUVBfcE3wSwBEEEwQOCA1sD2AI2AucBlQEfAa8AdwAiAK3/cP9W/yv/v/6H/mT+EP7b/aX9Qv2x/Fr8FPym+yv7f/r2+Zf5Z/nu+Bf4zvcF+P/3Y/dW9+v3YPjL+DD5w/m1+vP74/yb/Wv+gf/TAKcBSQLiAoYDVQTxBFcFWwWXBfUFJAZJBhYG8wUABgMGwAWNBaMFagUwBf0E+wT+BLEEhAQuBP4D6gOmAz8DsAJ8AmICCQKoAVcB/wDjAO8AogAxAPP/+v/d/4H/Lv+//mH+Ov76/VP9nvw4/Oz7a/vK+jr6k/kl+fv4d/jg95X3s/e992X3c/cE+JD4BPlj+Q36MPtX/CH9zP3E/vf/GwHcAVgC7QKxA3ME0gQBBSoFTgW0BdUF1gXJBZ0FvQW6BakFmAVsBT0FJAUtBQQF2QSiBFwESAQtBOIDeAM8A/0CrwJsAgICtgFuARgB3AC3AGUA6P/G/7L/cf8b/7T+Z/4f/vv9rP0N/Vj8//v8+3b7jvq8+Vz5U/nq+Pz3S/d/96L3bvdS91H3w/eX+Gr5z/lj+nf7wvwV/t3+lP+iANIBzAKBA+kDJwS4BE0FlgV8BVUFYAWFBY4FVAVCBRkFzgTIBNsEygSrBJMEXQRBBGsEeQRSBAAEvQOzA5QDZAMzA90CZwI2AkcCCwKjAVMBKAERAd4AhgAsAPj/r/9B/9z+ef4O/ov97/xO/L/7OvuZ+uf5P/nC+Fr4v/dL9zr3PPcd9yb3h/fR90H4FvkJ+tr6h/t7/LP95P7T/6oAkQFZAicDtwP6AzsEpgT8BPwE6ATcBPkEDQX4BNgEwwS6BJIEdwRZBEIEQgRRBEYE/QPbA/EDCgThA4wDXAMsAwcD5AK2Al4CGgIpAhMCsgFGASsBIQHGAF4ADADG/23/Bf+Z/h/+vP1d/dn8NvyO+yn72vpS+sn5U/kL+dn4m/hs+GD4ePiq+AX5SPmE+SP6CPvI+zn8yfzG/dn+g//h/4EARAHcAU0CpALdAgwDTAOZA7sDsgPLA/gDFgQnBFMEhASRBLQE5wQNBRoFLwVlBXsFZwVGBTkFJQXvBLYEUgTWA3wDRQPtAlQC2AF4ASUBzQBfAPj/e/8j/9/+n/4//r/9ef02/fn8tPxa/BX80/un+3/7Yfs3+wz7C/sH+//64vrM+s766Pru+rT6vvr4+j77fvuK+8L7S/z0/Ez9lP0g/sP+Yf/e/1AA2gCOASYClgIEA3ADBQSEBLcEvQT1BF8FlgWiBYcFdAWRBbsFswVsBSMF9gTqBMQEawQBBK0DfQNOAwQDkgI1Av8BygF7AR8BxgCAAEMA7f+X/0T/7v6H/ib+3P2A/Rf9svx4/ET89Pu4+5z7g/tU+z/7Q/s9+zH7J/sv+zv7S/ti+2v7gvu4+/37J/xR/LD8Hv1y/cH9M/7A/i7/j/8KAJIACgF9Af4BaQK5AhIDgQPcAwEEGgRSBI8EqwSnBJ8EogS7BM4EtQR8BFgEWgRSBCcE8APDA6EDggNiAzQD+gLCAo8CUwL9AZgBQgHzAIsADwCi/0T/4v5x/gD+nP1C/e/8n/xW/A/81vuq+477gfty+1r7R/tP+2P7aPtk+3L7kPus+8n7+Psy/GX8pfz//Fj9of34/W/+6P5I/5n/BgCNAAUBXAGpAf4BWgK4AgEDKQNHA3cDsgPdA/ID8AP4Ax8ESQRMBDAEMARMBGQETgQhBAwECgQDBMsDiANHAxMD3wKXAkIC1wF6AR4ByQB1AA4Amf8e/9D+kv46/sL9Uv0j/fn8v/xv/Dj8JPwa/BX8+vvp++77Dfwn/B/8H/w5/G/8jPyQ/Kn81/wX/Un9dv2e/cv9Cv5U/qH+xv7p/ij/eP/G/wYATACJANEAKwGLAegBKAJtAr4CFgNhA5kDywP5AykESQRgBHQEhQSHBHsEcgRgBEkEKAQABMkDfAMzA+4CrQJIAs4BXgH/AK4ASADW/2b/C//N/on+Pf7t/bD9h/1i/UH9Fv3w/Nb8yfzG/LP8ovya/KL8sfyq/Kf8tPzc/Pv8AP0B/Rz9WP2S/a/9u/3Y/R3+av6a/rH+1f4Z/2L/mf/D//P/NQCDAM4ADAFGAY0B4gEyAnICqQLhAhwDWQOHA6UDugPUA/ADAAT8A+oD1wPGA7ADhwNKAwQDwAKBAjgC4gGDASkB1gCGADUA4f+S/0v/Dv/R/pX+Y/49/hX+7f3E/aj9lv2E/Wv9Tv02/Sf9I/0b/Qj97/zn/Pb8Cf0M/Qj9Ev01/Wb9jf2h/bH91f0S/lH+dv6H/qf+5P4x/27/mv/C//z/UwCuAO0AFwFSAagB+QE0Al8ChwKyAukCGQMsAyoDOANYA2YDVwNGAz4DMgMhAwQDzgKUAm8CTAIFAq8BbQE5AfkAsQBuACkA6/++/5L/UP8P/+v+z/6h/mr+Rv4p/gb+5/3P/bD9jf15/XL9Y/1L/T39Of06/T/9R/1K/U/9Zf2L/aj9uP3Q/fv9Lv5c/oP+qP7W/hv/X/+M/67/6f83AHgAowDLAPkAMAFtAZoBsQHHAfEBGwI2AkgCVgJfAmwCggKIAnkCcgJzAmYCUgJBAisCCgLtAdMBqwF/AVwBNwEGAdYArAB9AE0AKQACAMf/lP91/1b/J//x/sT+n/6B/mT+OP4F/ub92f3J/a79j/15/XD9cP1w/WT9WP1e/W79g/2W/aX9uv3f/RT+SP5y/pv+1v4c/2T/oP/V/woASgCTAMsA8AARATwBZQGIAaIBswHBAdEB6AH7AQICBAIIAg4CFQIYAhICBgL6AfYB7QHZAcEBrQGYAX0BWgE7AR4B/ADXALMAiwBlAEUAJgD9/8//sv+d/37/VP8w/xf//P7e/sH+n/55/mD+Uv48/hf+AP77/fT96P3j/d/92v3d/ez99v30/f39F/40/kv+Y/6C/qz+3v4V/0T/bf+l/+v/KgBVAHwArwDnABUBMgFGAWMBhgGjAa8BswHBAdQB4gHnAeIB4gHnAekB3AHLAbwBqwGWAX8BZAFFAScBCwHuANAAtACYAHgAWgBAACcACQDq/87/t/+e/4X/aP9O/zr/Kf8Q//T+3P7N/r3+o/6J/nX+av5e/kv+O/41/jX+N/43/jv+R/5a/nH+h/6g/sL+6/4S/zf/Yf+Q/7v/5P8PADYAWwCAAKIAvQDUAO4ABwEZASoBPQFLAVYBZAF0AX0BfwGHAZEBlQGTAZIBkAGKAYMBewFtAVwBTgE9ASkBEQH9AOUAywCvAJYAegBbADwAIAABAOL/xP+p/4z/b/9W/z//J/8P//r+5/7V/sP+sv6k/pb+i/6C/nz+dv52/nb+fP6D/o3+m/6r/r3+0/7s/gb/I/9B/2L/gv+j/8T/5f8GACcARwBmAIUApADDAN8A+gAVAS4BRAFXAWUBcQF5AYABgwGDAYABfAF4AXQBbAFlAVwBUgFIATsBLQEdAQsB9gDdAMYAqwCPAHIAVgA6AB4AAwDq/9H/uv+k/4//fP9q/1b/R/83/yj/Gf8K//r+7f7j/tj+zv7F/r7+uf66/rn+u/6//sX+0P7d/uv+/P4R/yf/Qf9a/3X/kv+u/8v/6P8FAB8AOgBUAHAAigCiALoA0gDqAAABFwEpAT0BTgFeAWkBcwF6AX4BfQF4AXABaAFfAVIBPQEtAR4BDgH6AOIAygCzAJ0AhABqAE8ANgAaAAEA5//Q/7f/mP9//27/Yf9P/zz/Lf8l/yT/Fv8F//X+8P7z/vD+6P7e/t/+5P7o/uX+4P7m/vX+CP8R/yL/NP9O/2H/cP+G/6X/yv/3/yIANwBaAGgAcwCIAJwAtgDKAM8AyQDdAOYA/AD/AO4A+QAMASQBNAFAAT4BWQFmAVoBWwFLAUIBPwEiAfUAzACPAFkAOAAQAPT/4//G/7b/r/+V/4L/ev9i/0//RP8w/yn/Mv8m/xj/GP8L//3+7f7X/uH+/v4G//7+AP8D/xP/Kf82/z3/Sf9U/2H/aP9o/33/qf/O/87/uf+v/87/+/8fADgAOQAxAEoAfwChAK8AtgDOAPYACwENARkBJQElASkBHAEAAfYA/wAPAQoB2QCwAKYAkgB6AGIAPAAcAPX/sv9y/zX/+f7g/rT+S/7x/bj9hv1h/Tv9Wf0j/hb/hv+w/wkAyQDlAc4CZwP/A1EEVQRUBBkEzQO7A2UDnQKjAWMATf+o/tL91/wl/F/7y/rl+ij7cfsL/Jn8Hv3P/Xv+e//YAOUBlAIlA4kDIgTGBMYEbgQyBN8DWANtAhUB7P8A//b9t/xl+y/6pflj+V/4A/dT9lX2xPZ/+LL6Dvwn/Tf/CQKHBF8GJQhqCiQM9QwlDa0MjwuaCnIJMQeJBO4Bj/8w/Ur6ZPd49erzrvKY8h/z8vOr9QX4pfkx+6H9+gAUBLoF4QaACAkKAwuiC4QLsgqSCWgIpgZjBIcC0gCn/hv86/kD+Rz5DPk4+Jr37vcY+VX7Jv0n/mb/xwDfAbECDwN0A74D8AIVAUL/4f2w/KH7kPow+un5Z/ma+dP6bPxD/hQAlAEIA6QEsgZzCGAJVQkjCckI/AcPB4MFZgMIAYn+VPyS+gH5ZfhT+Kr3FPcF+Ln5lfte/ZP+HgAFAt4DjAUBB6sHSwi3CBAIIgdMBvwEsAMxAs3/+P2X/Ez7ffqJ+ST4wvcw+Jb4aflG+in7ivx5/eb99f4OAJAAzgCfAKf/pv52/tH/rgGHAQoASQCnAaICfgMaBHQEnATCBMUEUgSIA30DJANkAYP/b/7X/R79FPwJ+w36MPnA+Sj7LPzM/Jz9FP/UAPkB7QKGBLEFYwbcBqoGrAZAB1kHowYUBUQDgALiAaEAbv9d/kz9s/wN/E37L/sx+037d/sr+1j7ifyW/TP+o/6X/r7+Q/8i/5H+4v3B/K373fv4/N/9s/3q/UH/OQA0AbACSwSXBYEGsgbWBoIG5QXTBfcE/QI9AVn/AP1o++f5nfi79xX3qvdv+c36A/w8/qYAjwI+BMoFWwfpCLEJzwmDCekIiAgjCIoGIgQ2AnkAvv4d/Uj7GfrL+Xn5Q/lW+cb5HPu0/F/9zv3V/vX/4wBkAT0BPgFiAeMA2f+G/jH99fuP+0T8pfyL+1v7DP1w/nj/fQCbAWcD9gSHBQgGxgUtBWAFzgQbA/oBngCi/tz8pvoS+XD4YPc996b4j/lc+i/8RP5fAIICSAQeBtgH0Qi5CYoKWgoMCugJyggFB5MF9wNJArgAo/7a/O37Rvvj+sD6Lfrw+a/6avv0+6/8Pf3U/XH+i/7j/of/jv8u/7z+nv12/Cr9Q/9wANH/Ov/Z/ygBdAKJA4oEEwXpBIoEQwR5AwYDkAIuAVb/eP3I+9/68vk5+Dn3hPYw9n735fnn+5D97P7wAMMD0AUvB6EIuwn9CfkJdgnJCOUH1wZ/BVsDGAFu/zH+Dv3C+7P6Sfo8+pH6Tfst/Mv8r/0C/wMA7wClARkChwJRAqUBUQGlAHH/Y/7o/BL7t/ol/AL9XPz6+1b9Lv/cAJEC6wPnBNcFTAYnBvIFagW+BHIDggG0/+r9xPsg+kz4nvZh9jz2g/Z1+Jb6yPuf/S4AzgLuBEQGugdrCeEJdwlvCf4IFAgVByUFywJ4AUAAxf5C/av76vpJ+537o/sW/LT8Lf0A/v/+2v+pAPEAIwGRATUBxQD7AE4A1f72/R79+/uE/Hr+7f7u/QP+F//cANECDQRhBQAGpgU4Bp0G0QVpBR0ErAEAAFT+JPye+oH4BPYp9Rb1TvXO9uL40vqq/Mb+eQEJBKoF0QaPCGwJHgn0CNoIMQhkB/IFVANCAToAD/+7/VH8LfsC+yf7O/v++0T9SP4r/2D/Vf83AB0BeQHNAQoBfP+N/jr+tf1W/Z389PoK+5r9Uf9B//z/jwGKA2EFGAbmBnUI/AiJCFIHeQUWBaUEEQKN/8D9IPsa+Vf36/Se86nzIPSd9az3fvn5+xT/6QFoBEsGjQdKCZIKLQqjCY8JyAiaB6sF6QL4AKX//v1x/Nr6h/m7+WX6WvrO+gr8f/1W/28AdgDnAKgB8QExAngBof9m/rb95Pxu/Kz7Vfop+7v9vf49/x4BwQJ5BI0GfwdhCF4JHglnCPIGzQRIBIkDXwBx/cv7EvqL+M72JvWC9Fj0D/Wa9wf6nfsv/gcBRwPDBd4HvwiDCa4JAglvCI8HSgYgBfsCTgDB/oP99vsN+5T6HPr1+fD5gvoo/O79Sv+jAF4BawGlAQUCGQLRAWYARv5h/Nr6LPod+uX4bvgT+4P99f1o/zsCtAT0BpAIIwp5C4IL3QoMChIISAYIBaoCsP8E/aX6tvj19tT0uPNu8zzztvQ1+Cf78fyt/uYAOQSkB58JhQpjCkoJ9Aj+CLcHwQWjAwUBnv7l/Nj7Tfu5+g76/fk2+q76Ovx9/igAIgGQAb0BXwLuAnkCQQFD/9r8jvtI+i74D/fm9pH3Ovok/Gj8h/4HAgQFAgjxCQULfwy5DLYLjQp+CHAG9QRHAjz/J/3p+sD4LPdC9S30s/Q49Tj25Ph4+3v9k/+GATMEOAelCDMJkAkJCaQIVAg5B6gFlgMHAQL/q/2p/LD7vfoZ+v/5q/ql+3j8q/1S/6gAfwHlAQYCFALAAcAA2/46/En6SvnB9/D1UPXK9hL6V/ze/M/+lQI3Bh4J/wpPDFsNZw1oDHUK+gfwBeADHQH+/fX6tfiE95D2UvU29Br0vPWO+D373vxH/hEBcQR3BooHjQhXCbkJKQnyB94GnAUXBFcC5f+T/WT8FPzI+9b6x/kd+q37QP2Q/n//IAAmAY0CcANoA3gCawEsAAT+U/sB+aT3mPbS9e325/hg+Qj7wf6oAfED0gabCe8LCQ2tDNQLFgoSCGoGvwMPACz9vPpo+K32SPVg9IP0OfWo9gn5rvtU/qMATwJuBMIG+wfiCEcJaQiEBz4HYwb6BBID6AD5/mX9M/yY+1D7L/tR+5D7TPzB/aX/EwHBATwCQQNXBI0EbAPDARkA7v2u+xX5APYK9FX0YPbh97/3+/jV/O8AcQSbBxUKVAwjDrIO9w08DEQKFgjKBPkAvv3h+j/4r/W+8wPzyPI185v1A/lJ+5z8C/+6AnIF4Ab7B6oI7wgrCaEIQQfaBa4EHQPvAND+Vf0V/CX73PrC+iL7Cfzr/Cz+/v9aAT0CTwOKBCgF3gQhBNgCdwDV/YP74fhS9mH1r/Z29x/2J/aF+fD9HgJkBcYHJgo3DOQN5Q5wDlkNaAuNB5EDmQDT/dD6D/dk84fx5vDk8IzyWPXL98z5ZvxGACMEhAZQCNUJGwoxCqkKAwo8CGkGawRQAkIAGf4//Pb6+Plw+Un5evmx+o38NP67/wgBLwKuA9METwV+Bb0EkAL3/7f9hvtk+Bf2Xfcu+bT3CPab9yb7Z/8LA74FNAg+Ct4Lyg1hDsUN7gxiCn0GFwMOACr9Svpq9jzz2fGB8UfyhvS59mP4cfqo/XYBcgTTBuEIhQkKCYsJdgrfCc4HZAUWAw8B3v6s/Ef7OPr5+FH4WPjr+Kb6ovwP/nP/+ADAAmIE+gTfBHQEEgM8AWv/Iv2z+vD5+vqq+pb4Fvgo+hL90//dAfkDRQbmB2MJCAvZCw0MOQvoCIUGPQSPAaP+9fom9/z0APTR82L0MfWH9pj46Pqo/XcABAODBVIH7AcFCLcIrAmaCeEHgwWFA9ABi/9N/ez7q/pL+YX4evge+Yf6SPzW/e3+6P/LARcE/gRkBAkDYQHu/7H+2/wK/OH8bvzc+Xb4qfmt/AEAWgGpAQcDJAUmB4oIughCCagJgggMB98FVwR/AqT/7fsk+W/38vZ+9533Gvd59xH5avvG/c3/ngFWA+AEHwYBB+wH3Ai3CGsH0gVfBN0CNwFV/0P9Nfux+eP4n/jH+JD5zPqq+1z8/v12AEoCfgKBAREB5gC6/8P9bv0b/xv/DPxJ+oT7nv05AAsC2QGxAXgDvAXfBpkG2QZVB44GgwU+BVUEtwL1AMH+Wfxd+uz5qfrL+nb5nPh7+S77n/xB/mf/+v9AATADwATiBbgGNQc7B8YGZwa4BTgEhAILAUX/Uf3J+736D/qF+RX5fPmc+tf70vxx/eT90P53/2D/rv64/uQAWwKTAHP+ff7l/+EBDQNpA6UDdANOA3oDLANKA4oDrgJ+AcEAbQA9AEH/3f39/ET8HvwL/V/+zP4u/hH+7P6T/8j/fv8b/1L/xf9HAOsAuwG2AhUDAAPyAykFUAWFBFADawL8AXgBUQBS/jD8F/vL+ln6Svq9+n36y/nv+aX6a/tc/Ff+KAGvAej/SwDWAiwEyARdBV4FmgUVBuIFHwUfBEIDVgKkABr/p/5R/gj9Zftz+rP6qvt9/Ob8ov0D/3gArwGHAg0DbANzA7oClwHOAMIAvQAWAEv/6P7E/sn+Zv+MAHABAAJyAqcC2QLsAlwCVAH//4T+Wv1U/Hv7rvq7+Y34qveB9/X4o/zz/8T/wP5RAOsCfAU0B0YH8gYrBzYHRQdqBv8EKgRcApf/G/6Q/a/8cfuN+R34LPiR+Y37Vf0h/sH+CQDyAe4DDAXvBGoEWwQsBGgDjALeAaMARP93/gj+T/2w/LT8Lf3P/cr+TwDIAaACtwJ2Ag0ClwH4APT/5v2S+9v5ePg29zj32vnP/L387vsl/mkB2QMjBsAHQgiMCHQILggdCNQHwAYtBCsBnf8V/839a/vT+Bf30Pbb94j5Qfs+/Mv8yf7IAYwDawROBWoFCwUjBXgFZAWGBDIDgAGB/5j+pP6o/fz75/p++in7ivyr/br+f/8GACsB0QFlAewAMgD2/rf9FPxu+h77nf35/Zj7pPrw/J0AQwPAA2IDOwQWBqAHyQfeBqcGuAbOBf4DDgKXAFj/IP2W+gL5Vfhy+Ij5Dvu0+077KfxW/zICHgM0A18DuwO5BO0FTAasBdME1wOZAlwBYgBa/wD+cPx8+3n7wvvX++/7Wfwo/Tv+0P6q/hv+5f2T/fr8Qfy1/CP/IwETAM/+zP8eAZsCJgQiBH8DrgNuA04DYgM8A1ED5AK5AYcBugESAQoAev75/GX8KfxW/Kj9c/6p/dT8lf2g/98ASwDG/08ADwEbAkUDlAPGA24EvAQTBPcCjQJbAvYAWP+v/iX+m/04/Xb8qftM+3H75/t6+zb6gvnU+YD6Mvyp/n3/uv4SAIsDvQUMBj0GAwdiB/YGJga7BNICoAF8ABb/M/78/fP9MP2Z+xr7/vsg/Rf+j/43/6kArAEoAuoCKgOHAk0BIADZ/x8AzP9D/zP/uv9iABIBugFGAmwCCQJRAQEBWwHeAZUB5f8c/rn9Sv7v/UT8gfpn+Zr4QPhB+KL5xPxE/lr9cv70ARIFQgfXB5IHNgh7CJAHlAYOBTsDPQEU/yj+Lv5S/dP7GvrT+Fb52fo8/Kr93v5l//7/jAEkBAIGpQXTAx0CgwG0ARYB1v9w/0v/BP8Y/xr/df+iAAcBUQCc/7r/CQEgAkoBtf8h/xr/f/7d/A77Cfpc+Qr4Wvh3+0r9JPxm/Hj/zQKpBZwHQwiVCCwJJwkWCEUG9QSRA+YAbf7F/Zb9XfyZ+iP5tPhP+ZD6J/wC/kj/0P+vAFkC6gPQBAEFCQSNAk8C/wJwAvkA5P+g/zEAbQBs/8T+Ef9O/0r/wf6I/jb/j//4/pb+fP58/iP+vPzg+gf6//l7+l38s/2w/Dr8wv4jAugEiAbyBlYHOAiYCBYIvAZJBdQDLQFy/pv94/1H/X/7g/nE+GL5+vr5/H/+Hf92/2YALQLgA38ENAQIA5QBPgEHAisCKwEJAHb/qf+DAD0BcgE/Ac4AdwBeAEUAHgCW/47+bv14/DH8//sk+6X5dPjs96v4yvub/tX9qPzE/kcCnAUGCPgIQAlhCTIJPQlXCB0G7ANHAU7+Ovxk+/P64vli+Mf3Ofis+VL8Uv/oANwAZwGMA7QFUgalBe4D/AEgAWMBVAHW/wr+rP1s/ir/1f95ALoAyQAAAXIB9QEaAoIBXADK/lj9fPy3+5H6HfnM97X2xPdL+xD9rvut+0/+ogHtBI4HFgkwCsgKjAr1CTQJdAiUBjMDgf/X/DT7+/lz+AP3xPVK9bT2zvmt/WEA1ABsAdwDGwZ9B6QHZAbLBCwDogE9AeUAvf96/lH93fyy/RT/KAB5AB4APwDiAIQBwQFzAVYAbP5B/Cv7p/rk+Vj5SvkE+jb7m/tI/Ir+TgCCAVgDbwWuB4cJbwldCJ0Hawc8B6UF1AJtALr+8vzS+gv5ePhc+AD4+PeY+bv8Of8PAN0AhwKDBMEF2QV/BVMEXgKkAdkBFgH5/wb/AP7D/Zf+mv8iAC8AhgAYATMBEQEpAeQAd/8C/cv6hvkm+TT5kfi/+Nz6JvxL/Dn+zADEAtkE+QW1BnkI2wmeCTYIiQbmBQ0FvQJvAJ3+n/zQ+pP57fgc+dX5hPon+/j8zv+WAXUCcAMpBHoEXwT/A/AD9QKTANT+Bv7U/Uv+BP7v/BL9gv76/xMB6wGDArMCgwJfAkYCnAG7/9r88fk/+L73w/fk+D/6pvnf+Fz7w/8sA+YEuQXCBpEIVgoVC1kKwggrBz0F5wLxAIv/f/1B+lX3Wfbl9jz4wPkQ+9H86P74AIUDCwZXBz8HDgbzBMMEVQTyArcA/v3t+0P7Qvs5+3H7KvzN/IP9e/8oArMDyQOJA0YD5QLBAb3/Uv03+0f5FvjS+Bf68Plc+Xr60vzR/5QCRwRzBS8HgwgPCVkJ3AitBzsGRgTQAez/Uv4C/Dv5j/dZ9/j3AfmH+p/8e/7h/ywC7wRwBs0GzgZvBucFMgVFBBYDHgGb/sH8+Pt1+1n7zfsX/AH8FP07//MAwgEvApECegKYAZYAW/9w/VH7H/oE+1D8XPtj+uv7+/2U/yAB2wLBBOgFPgbTBswGgAaOBjcF0wIrAfD/c/5n/P/56/gI+V/5Qvrt++L9S/9LAF8C5AQPBgUGJgVMBF0E6wNIArwAl/+W/ov9Ufye+wb8BP3q/UT+f/64/zkBzgEFAkwCmgHd//r9y/wK/BX7cPvZ/Dn8hvqa+57+RAH2AuIDqQTTBS0HPwgGCIgGNAUsBOYCTAFi/2P9c/s0+Z73n/et+Pj5iPt+/X7/RQFgA9oFlwfyBw8HyQUYBXoE7wKkACH+Wvy9+xP73Pkv+aT5Vvtn/WT+TP8iAaECNwO6A88DzQL4AAj/dP24+3T78Pww/H/5Dvro/JD+OgBUAtsDKwVsBjMHqweAB8kGbAVfA9MBxwD9/pD8Fvr19zz3/Pcp+bP6Ff2O/y8B3QJoBYAHjQjoCEwI+Qa/BXYEzAKHAN390vsz+qH4GviZ+Nj4mfk3+zz9o/+aAXwCdANKBLcDQALCAL//z/58/q7+Ef1J+n/6o/zj/fv+1P+OAEYC6AOvBFoFfAVnBQkFuQOJAuoBlwB6/kH8ZfpN+QL5l/kE+7X8Q/7f/y8CpATbBV8GHAd2BxgHKQbSBJED8AH4/2P+o/zC+i/6Cvo3+W75Cfup/Pn9Rf95ANEBsgKnAt8BhQBE/0T+4v6DAB3/dPt4+yT+of+tAAYB6QASAsADbASSBGwEbQTiA1MCJgHwADUAy/3H+iv5Nvmf+Vv6V/td/Pj9WABLA9UF0AYTBzoH6AaRBlIGYQW1Aw4Bkf7M/RL9Cvuq+WP5UfnV+Qj7bPyp/ev+NgDUAc0CqAJ/AXUA3P/x/x0BCQE5/kL8X/0L/zEA2QBhAYsBbgEhAvMDygSEBMoDXwJ2AXsBOgHA//38U/oy+d74/PgD+qD73vwJ/igA+AIdBc8GBgjoBwIHWAazBU4FGARXAQH/uP1C/N/6Nvqi+Uv5YflG+nb7VPyR/ZH/sQDnAPsA9ACEABsA8QEFBLgB7f11/vsASwJdAskBygE0AjYCSwJMAl8C/AJvAqgAof/1/4MASP9T/GL6Ifo3+mP6Fvtg/MT9Lf8sAS4DEAUwBx4IaQfHBmkGBQZmBXMDnQCn/nz9MPzg+iX6Kvoe+gH6OvoA+1L8A/6r/kj+/f2u/pD//f+aAQMDVAEu/1sAHALvAosDtwOTA3YD3wJrAmECQgImAg4Bff8T/2f/9/7I/Rf8xvqK+uL6CvtK+3z8Xf7K/8sA/QGIA6kFVQd/B+IGYgZHBkwGPgXOAkIAcP6N/bb8iPvD+oz6Q/oo+sX6pfuG/LL8Yvxu/Eb94f2B/3YCDQOXAPX/TgL8A48ErARPBMQD0wPJA+QCVAF1ADMAof/f/nT+fv7o/dP8IfyU+6L7ovzC/Fj8UP0U/5IAZAFuAeoBJwM7BHcF6AUXBYcEVQQhBKIDAgLc/77+PP4O/rT9oPy4+wn8u/yt/OX7dvtc+zD7dvt4/FH/YAFx/wr+LAFkBKYF3wXqBMYEywX3BVgF5wPWAZYASP/t/fz9//2Z/PT6jPoa+5j77fuP/If9if5T/2YALAIOAxoDNAPNAooC5AP3BCQEvQIFAvIBXwH1/7r+Zf4y/hj+0v0//WX9Pv4v/hD9d/yK/JD8hvuI+7j++gCJ/j/8Sv61AUgEmwVGBW0EBQU1Bl4GQQUGBPACQAGM/wD/Fv///dH7LPro+W76APtV+z/8tP2g/tH/2AEVA0UDowPZA6UDrAMtBJQE5QOpAmUCQgKmANH+Zf4i/kb95fwA/Rb9Wv00/UH8zftK/Mv8QfzH/LD/xwBO/kf9c/8xAgsEBwSGA18EiwWWBb4EzwOrAwID/wCA/zT/HP9g/nf8cPoK+hP78fv2+xn87vwo/ggA9gGKApEC/AJcA7wDJgQkBPYDVQNdAtYB1AFKAWcAW/9e/s/9k/3f/RH+qv3X/HH8cPwi/a38EPxr/rAACv9e/bP+ZgD3AecC9QInA4sDowNBBIsE8gNlA4wCDQFGAI0ANgCL/qD8S/v0+nb7xvvd+wT8Kvwv/fH+OwB8AWQCPgJpAnoDOATEBNYE6wODAscBugGpAcoAlP+F/jz92/zG/ef9efzz+/D7/Ptq/AT+nQBYARb/ZP6kADEC9gLjAs8BigGDAqECygE5Ac8BLgLzAN//LwAMAQgByP+s/m3+Y/5s/iP+mv3S/e/9y/3m/bH9R/7u/zwAtv9XAGwBqwJvAwwDaQKHAh8DZwPgAQQAxv/l//D+l/2Z/GT8ZfwC/FT7IftW/gQC2QCJ/mQAxgIRBOkE5QOQApsCrwLnAXgAPP+f/7f/ef6j/RL+wP6y/kL+Rf4e/zAAXgD1/2IAHwEWAdgAKQBj/5b/y/9Y/wz/9v5u/1EARgDR/zoA6AB/AQQCogHkAJoALAAt/0v+gv0A/Rn94/yU/Dr+iAA/ABv/LQCiAqYEWQVWBFIDZQOGA/0CmgGe/x3+Yf0T/U39N/0Z/A37S/uU/EX+rv9OALEAaQE9AgYDnwNtA7sC6QH9AKcA4wBfACj/Sv63/fz9AP9s/+P+0f51/wQAcgDKAFkA8f5V/oj+dP66/R3+QwDpAA//8P5+ARUD9wOcBFYE1gPzA+IDLgPnAaEAuP84/sP8Zvyn/BT8D/up+gb7Jfzl/Vf/SwAgAXwB/AG6As0CugLoAkQCHgGkAA0BXwFuAMj+Ff5y/vv+Av+i/rP+u/40/kL+Bf8y/6T+/v3E/dH98/5XAbgBvf+3/4UByQICBKYEPQTBA34DowNCA8EB3AAgAHz+n/2l/Rr9Gvz++sT6qPul/Ij9xv74/7kAQQFzApoDFgPrAX4BdgGKAZkBGwFUALT/nf/B/z7/tv44/6//hv+p/k/9v/x2/bD9O/2t/Db9xP9+AYoA9v+OAUADRwReBB4ETwSPBP0DBwN4AkMCgAECAMT+Qf4Q/l39WPzI+6D74fvA/Jj9i/6m/08AnwAYAfYBGgNNAyYCAAGOAGcAbABcAOf/XP8T/yv/V/9b/2n/1v8TAHr/rP44/ir+Mf4l/kr9u/yR/qMAXQACAAgB7AEIA+4DugNqAzIDpgJTAuYBTAHCAKj/UP7c/UD+SP6e/dP8Xvzx/FP+I/+2/5cA1wDcAD8BigG4AYwB6ABnAJL/rP71/mn/Ef+5/pz+t/5t/x8AsgADAXIAof+u/xQAIABb/6D+cP8sAD7/+v6sAPwBIAK5AcMBlQJBA9kC6AHfAHgA0ABkADz/w/7Z/iz+UP08/Qb+6/4L/37+GP/xABcCWwIkAp8BggGxAVoBzQANAAn/D/4O/c/8v/1n/iX+4f3V/Vv+EQCBATgBRwArAIYAcwBaAIIA5AD5AEsAVP+2/28B5QLHAlYB+gAYAqUC5gHnAO//VP8I/4f+/P3A/ZL9F/2k/Mr86v19/18AkQAOARwCQwMLBAkEfgPlAikCawG2AK//Rf4q/Zf8RvwR/Bb8cPzV/AX9Q/1V/ur/mABUAIUAFAEiAboBqwIfAvYAXAFTAq8ClwIfArwBqgGLAZQBggF4AK3/vv9F/0n+LP7K/Wn8rvsY/Nz8sf02/pX+p//TAPUBPAPhA9MD9QMhBN0DKwM5AlkBAQBQ/ln9OP3F/Df8NvxB/Cn8n/yU/T7+9P71/8gAzwDtABACZQOCA6wC1wGjAYECRwPFApUB3QB0AE4ALAD1/8//Xv+L/j7+fP6U/kz+V/2f/Pj8oP0N/q7+R//f/5oANQHmAbsCaAOIA/4CbAJ3AmYChAEYANb+L/7F/QD9I/y/++/7WPyy/B39vv1u/pH/RAEzAgsCNAImA/sDWgQSBFUDsgJYAjsCugGOALX/e//o/kr+OP5Z/lv+Kf7Z/e39mv4i/w7/2f4D/2n/4P80AGEAowDUAP8AZgG6AdABoQE4AQcBCAGpACgAz/9k//r+ev7f/XT9hf3E/b/9v/05/sL+m/8uAeEBegGxAV4ClAKqApACDgJZAZgAQAD9/2T/wf5b/iX+Sv6h/u3+Kv9F/2z/vv9OAKcAqgDUAAwB2gCpAGgA+//r/xcAFQC7/1n/nP9HACQAf/89/03/TP8o/x//C//k/hf/iP9M/8v+cP++AAkBnADrANIB1gKsA2UDYgIOAmICawKGAfD/If8S/3H+j/0G/Y/8ffwP/Z/9If7l/sT/gwAWAV8BlQEBAmYCWQLgAUoB9ADEADsAd//k/pr+pv71/uL+Sv4F/oL+IP9C/wf/4P4V/0z/av/e/zsAPQCrAEgBZgGtAT0CqgIEA9QCIgLrAfUBkAHhAMn/p/42/gf+g/3+/J38YfyP/Fv9VP4M/6H/YAAZAZ0BGAK7AiID2gIzAq8BfwFJAZcAcP/A/pj+Yv4I/tT9wf3L/fn9Jf5T/qb+Vv/Z/8j/cf/U/xYB1gFLAbgALgEvAggDGgO9AqMCtwKOAkQCpQHFAA0AOv8x/oP9f/1t/dT8T/y0/Iv9K/6m/jn/2f+DAEgB0gEUAlICagIgAvEB1AF/AdEA9f9Y/1H/Yv/V/gT+sv3o/Qv+Pf6D/pv+sf4n/57/vv8cANkAFwGXAH8AEwG8ARgCHQL9AfMB3AHEAZsBKQGgABEAaP/Y/pf+Wv7n/WP9Ov2g/XX+DP8R/y//0v+ZADIBaAEsAQABOAF4AUEBvgByAD0Au/8+/yT/L/8k/+7+lf6l/jH/cv9b/3//v//y/1MAvQDLAJcAmwDvAP0A1ADyABUB7ADZAP0AEgECAb0ATwD6/9z/s/9y/yf/8/7Y/tP+4f4K/0D/kv/s/w0AMQB5AKwAyQDxAM0AjQB+AFUA8f+5/6b/df8e/9P+t/7A/gj/Zf9r/y//Uv/I/yEAWACpAM0AtQDDAP4AGQEMAfIA0QCyAJMAggBtAEwAKQD+/7n/mf+X/4z/c/9N/yL/G/81/z3/Pf9B/2X/q/8AACQAMwBsAMIA2ACdAGUAeQC5AI0A0P8c/+/+Iv83/9b+df6F/s7+Ef9O/5b/8/9TALcAGgFMAY0B9AELAtUBswGlAaMBfwH+AFoA3f+t/6n/a//t/qb+qP64/sT+0f7c/vL+Rf+p/83/3v8TAFwArgDpAPQA6gDnAOIA6wDyANsAfgDp/3f/VP9H/xn/tv47/vz9IP6H/vn+P/9x/77/FABwAP4AnQHkAc0BsQG5Ac4B2AGmAQoBZwA0ADgACgCq/zP/yv6V/qH+zf7d/tn+6v4B/yX/Vf9r/4z/x//c/8r/1P8fAGUAbwB0AJsAzwDxANMApQCwALQAggApAMr/hv9I/wT/BP82/0L/Pf80/2D/4f9TAGEAUAB5AOoATQFnAUgB/wC8AJ4AjwBpADMA6v+G/yL/JP9b/0v/Kf9E/4D/2f8qAE8AdQB1AEgALwAiAB4AMwAIAKT/hP+3/wYAPQAyAPf/5/8uAJIAsACNAGkAXgBOADUAIQANAP//7/+z/2b/bv+z/9j/wP+r/8X/9/8hAD4ATgAsAAAABgATAOj/tv+S/3D/S/8s/0f/jf+w/7n/1P/9/08AnQCzAKwAmABwAE8ANAANAOH/uv+g/3f/Sf8+/1z/j/+6/9X/+f8jAEUAbwCfAMUA0wC4AJoAmgCOAGQAOgAWAPT/2v+8/7f/y//Z/8b/rf+///T/IwAtACIAFQAOAPT/0f/N/9X/xP+f/4f/pP/0/z0AUgBMAFEAbgCXALUAtQCkAIcAWAAXAOb/5P/t/7T/Tf8c/0H/cf9z/2X/dv+b/8r/DQBYAIAAhACBAIIAfQB9AIAAZgAbAM//sP+1/7z/tf+0/7P/sP+4/9n/+//5/9r/xP+8/63/nP+e/7T/zP/b/+D/7P8NADAAPgA+AE0AXABhAGcAcQBdADUAFgD6/9b/vf/L/9T/rP91/3P/nv/E/8v/yf/i/xYASwByAI4AngCmAKYApACKAF4AQwBAAB8A4v/E/9v/7v/V/6v/m/+z/8z/wv+d/4z/mv+z/8b/0P/b/+//EgA3AEwATABLAGAAeQB1AFcAPgA0ACsAFgACAPH/0/+1/6v/tP+5/7X/pP+M/4b/ov/H/9D/xP/Q/wMAPABPAE4AUwBcAFQAOgAwADoAQAAnAP7/8/8WADoAMwAKAPD/+P///+r/w/+i/5j/pP+m/5X/jv+w/9//9v/7/xYAQgBfAFYAQgBDAFgAVwA1AAoA8v/x//j/7//a/83/0f/W/9H/1//w/wAA9v/v/wQAGQAWAAoACQADAPr/AAAGAPL/3//u/wkAEwAeADMAPwA+AEYAWABjAFsAOQAKAO3/+P8CANz/nP+B/5r/v//K/8j/1P/q//b/+v8PACsANAAfAAAA9v/+/woA///r/+T/7f/z//L/9P8MACQAGgD+//v/GwA3ACgA9//O/8b/0v/L/63/lP+X/6z/vf/O/+r/DQAuAEYAWQBqAHgAdgBfAEkATABaAEsAFQDk/9f/5v/3//D/0v+1/7X/zf/c/+H/7f/4//X/7P/3/w4AFQAPAAcA//8EABUAHwAdACgAOwA5ADIAPQBSAE4AJwD7/97/zP/D/7n/nP95/2//e/+P/63/0v/o/+n/+P8cAD0ARgBGAEcAQAA1ADUAPAA1ACwAJAASAAEABwAKAPP/1//S/93/6f/v/+v/2v/Q/97/+P8FAP//9v/z////GgAwADEAMgBFAF8AaQBhAFIARwA8ACsAFgD//+f/1P/C/7H/sP+9/8j/wv+//8z/5P/6/wwAFgARAAkADAAYACUAJwAVAPr/7//7/woABwD0/9r/0//g/+n/5f/g/9r/zP/B/7//yf/S/8v/sf+d/6r/0P/s/+7/7f/7/xUANABSAGUAbABnAGYAZQBnAGAASQAqABUADAD//+H/y//X//D/+v/t/+X/9v8dADkAOwAmABsAKwA3ACcADwAKAA8ACQD8//r/BQAVABwAEQAGAAsAHAAbAAEA5f/b/9z/0P+4/6L/n/+p/7L/tf+5/8n/4f/0////DgAnADwAPwA5ADoARgBMAEIALgAdABAAAwD3//D/6v/h/9T/zf/Q/9f/2P/Y/93/6P/u/+T/4P/x/w4AFwAHAPn/CQAsAEIAQgA6AEEAVABjAF8AUwBKAEMAMgAWAP7/9P/s/9r/u/+m/6z/w//O/8H/tf++/97/+/8EAP3//P8OACAAKAAoACkAKQAfAA8ACwAOAAsA9//Z/8r/yv/O/8j/vf+3/7j/uP+6/8n/5f/4//f/7//6/x0APQBCADYAMwBAAE4ATQBEAD8APgA2ACMAEwAOABAACQDz/97/2v/i/+n/4//Z/9//8f/7//r/+P/+/wwAFQATAAsABQAKABMAGAAZABUAFAAXABoAHwAjAB8AEwAHAP3/9v/w/+b/2v/P/8v/yP/N/9b/5P/t//f///8HABMAIAAoACcAHQAVABEADwANAAMA+v/z//H/8v/1//r//v8AAP7//P8CAA8AEQAHAPf/9P/+/wUA/v/q/+D/6P/2//7/+//2//7/CQATAB0AKAArACUAGwAYAB4AIwAYAAAA6P/g/+T/5//h/9T/zP/T/+H/7P/1//z///8AAAMABQANAA4ABwD6//L/8//6//v/+f/2//n//P8AAAYACwAOAA4ACAD7//n//v////H/3//T/9X/4P/j/+L/6f/4/wYAEgAbACsAPABFAEMAQAA7ADMAJQASAP3/8P/l/9n/zP/H/8v/2P/h/+f/6v/1/wIADwATABAADQALABAAEgASAA4ACwAIAAkACwALAAwACwAGAAIAAwAEAAMAAAD+/////f/2/+3/6v/u/+7/4//X/9P/2v/o//D/9P/6/wMAEAAbACEAKgAuACYAEwAEAAQABAD4/9//yv/I/9H/1//R/8z/0f/f/+v/7//0////CwAVABoAHQAjACMAHgAaABsAGwATAAUA/P/4//n/9P/s/+f/4//h/+D/3//f/9z/2P/V/9P/1v/V/9b/2f/c/+D/5v/v//j//v8FAAsADwASABIAEgAPAAoABgD///f/9P/0//L/6P/g/+L/5//s/+n/5P/o//T///8AAP7/AAAFAAwADgALAAwACgAGAAQABgAMAAoAAQD7//z/AQAEAAIA+v/5//v//f/8//z//P/7//v/+v///wMABQAEAAUADAAWABoAGwAaABwAIwAmAB8AGAATABEADAAEAPr/8f/v/+z/6f/o/+b/5f/l/+b/6v/r/+z/7P/t//H/9P/0//X/+f/8//7///8AAAMABQAFAAQABAAHAAcABgAEAAMABQADAP///f/7//r/+P/1//X/9f/1//X/9f/2//r///8CAAUACQAMAA0ADwAQABAADgALAAcABgAFAAQAAgD///z//P/8//3//v/9//3//v///wEAAwADAAMABgAJAAsADAAOABAAEAARABAADgAMAAoACAAFAAMAAQD///7//P/7//v/+P/4//j/9//3//f/+P/4//n/+//7//z//v8AAAAAAgADAAMABAAFAAcABgAHAAgABgAFAAUABAAEAAMAAQAAAP7//v////7///8BAAAAAQAEAAUABwAHAAcABwAIAAgACAAHAAYABAAEAAQABAAEAAIAAwACAAIAAwADAAMAAgACAAEAAgACAAIAAQABAAIAAwADAAMAAwADAAMABQAGAAYABAAEAAQABAADAAQAAwACAAIAAQABAAIAAwABAAIAAwACAAYABQAEAAUABwAIAAgACQAIAAcABwAHAAYABwAGAAYABgAFAAcACAAHAAUABQAFAAUABQAGAAMAAgADAAUABQADAAMAAwAEAAQABQAEAAQABQAFAAUABgAGAAUABQADAAMAAwADAAMAAQABAAEAAgACAAIAAQABAAEAAQABAAEAAwACAAIAAgACAAIAAQACAAIAAwAEAAMAAgAEAAUABAAFAAQABAAFAAUABQAFAAUABQAFAAYABAAEAAQABAAEAAQABAAEAAUABQAGAAYABgAFAAUABQAGAAUABQAEAAIAAAABAAIAAQAAAAEAAAAAAAAA///////////9//7//v////////8AAAEAAQACAAIAAwAEAAQABAAGAAYABwAFAAMAAwAEAAIAAQAAAP7//v/+//7//////wAAAQAEAAUABwAIAAgACAAHAAkACQAIAAkACAAGAAYABwAFAAMAAQABAAAAAAABAAEAAAAAAP7//////wAA//////////8BAAMABQAFAAYABgAGAAYABgAGAAQAAwAAAAAA//////3//f/7//r/+//6//r/+v/6//v/+//8//3//////wAAAQABAAEAAwAEAAUABQAFAAUABQAEAAMAAQABAAAA/v///wAA/v/+//v/+//9//3//f/5//j/+f/2//X/+f/9//v/AAAHAAgABwAGAAQABgALAAoAAAAAAPr/7P/v/+f/6v/y/0oAZgBdAGQAcQBZAOn/zv+l/3j/Nv85/1T/Ov9Y/4j/z//n/0IAigCQAIoAiwCRAEkAHwD+//b/4//c/+r/3v/i/9v/4//K/8v/4v/x//T//f8lAC0AIwATABAAAgDq/+T/4v/g/+v/DQAhACIAOABXAEwAMgA0ADEAFADw/9z/xf+n/6D/m/+c/6f/xP/i//f/DAAbACQAJgAeABYADAAFAPH/4f/c/9n/2P/T/9j/3//u//j/9//y//H/9P/x/+f/3f/Z/9j/3P/W/9j/3P/s//T/9P8BAAUADAD+//r/8v/q/+b/1P/W/9b/4//l/+r/8v/3/wsACwAQAAoAEAARAAAA7v/Y/9X/wf+1/7T/vf/W/+X//f8MAB4AKAAkACgAFAAEAPD/4f/T/7n/uP+y/7z/wP/N/+f/7//9/wEACAADAP3//f/s/9//2//g/87/yf/j//P/9v8EACgAKAAgACAAHQAJAO7/8//o/9j/0P/j/9z/xv/a/+T/4f/c//f/BwD9/xEAIAAeABMAJAA2ABsADgAiAC8ADAD3/xYAJwAYAAUAJwBEADUAIAAhABoA6v/A/5//gv9P/0b/Wv9g/2z/ov/y/xIAMQBdAI4AiQB5AHEAWQA1AA8ABADv/+H/6f8BAA0ABQAHAA4A///c/8j/yf/K/8f/4v8HACkAOQBUAGwAXwBRADoALgD//+T/3f/d/+D/4P8HAAsADwD+//b/2/+w/6f/o/++/9L/EQBXAI8AtADIANQAqwByADAA+f+5/3z/X/9X/2H/Y/97/6L/vv/c/+r/CQAkADAAOwA+AFoAVwBTAFMAUABOACYAGgAKAP7/7P/V/+H/2//W/8L/uf+7/6D/kf+G/5X/n/+y/+r/IwBnAJsA1gD2AO0A1gCYAFkA//+u/3T/Pv80/y//Y/+M/7L/8P8dAEgALwAlABwACgDu/9D/AQAlAEsAcACfALUAjgBwAEIAJQD8/+X/EgAzAEgAOgA7ABgAwf90/xj///76/hT/Uf+s/zEAmwACATgBWwFeARwBtQBCAOX/ef87/0L/bf+z//r/TgBqADkA4/+F/yv/wP6o/un+Q//Q/4AALAFnAW8BYQENAXoAvf9S/y3/Kf85/3v//P9MAHoAdgA4AOz/m/9b/xD/GP9l/8j/SACnAAYBJAEfAdMAXADj/1f/Ff/8/iD/Zf/H/zgAiQC9AJEAPQAOAPH/xv+T/8L/NwCIAKcAwQD0AOoAigAQALL/av8O/+3+JP9h/5T/8f+FANEAtwB2AEAAAQB///v+4f5P/9b/WwDkADsBVwFDAeYAJwCe/2P/NP8V/zf/j//j/zAAIwAPAC8AOQAMAOD/uf9v/33/gv8x/2v/RQAcAaMB6AHJAWsBwQB7/5T+Tf47/n/+9f6J/zgA3gDwAHwAVQBoAEYA9P/U/9X/vf/e/woAGAA6AHYArQDDAIYAAwD5/yIA9f+5/6H/ef8i/xH/Ff8L/zb/s/95AOcACAEuAUgBFQFcAI//Gf/9/l3/vP/u/z0AzAD+AGAAzv8o/7L+Dv8z/wD/pv+9AOgAugDhAOgA4QC9AG4AjgD5AFwAa/8q/9/+pf6a/uv+rP+eAFcBiQGnAeUArP84//z+9f5N/9T/IgBsANwAqwBKAPL/xv9ZANAAQwCq/wsAKwCE/yr/z/6S/v3+Y/9u/4P/z//y/wsA2/95/8P/HwD+/wIAVgCEAKMA+QD8AJMAOQCt//b+YP4b/lf+Dv8YAGQBigJ7Ar8B3wHUAYgAgv+I//f/HADM/5//KgCPADcAjQB6AbQBnQGdAXcBDAGAAA8AlP9j/1X/fv/X/5r/hf8BAHoAYQAVAAgAaf84/vz8QPy5/MH91P4DAIYBxwKMAlYBqv/k/b/8JvzG+9f7pfxM/Yn92v2w/Yz9SP5Z/3kAggJQBf4GfQdwB/YF4QM3AtEAwP/O/uT92f1S/xYAR/8h/73/bABdARcCDQIOAn0CNgKbAd8AtP+Y/24AbACq/4b/a/+8/hr+U/3y/Ir9H/6b/g4AwwGcAhAD7wIHAh4B9f8u/s38YfxS/KL83Px//EX8dPy5/Lv8Bv1B/qz/LwC6/2j/pP/i/xQA3wDzAk4F5wbUB84HXQZ8A1oAwf2d+xz6nvlb+sH7U/2r/qj/6ABKAlsD1AMmBKEEqAQdBFcD0gJlAtEBcgEDAXgADgCU/+/+QP7m/QD+ov6q/8EA/AGEAvwBLgFaAE3/GP5z/X/95f1r/rv+u/43/i39Nfxk+1b6zvmj+tf7k/zG/Rj/v/8vAHMAwgBWAdIBNgOQBUcGAwV9BEsECwLV/oj8V/wb/bz8c/zX/U7/Uv9d/0MAOAGlAZQBzgG5AmMDbAMvA6gCGgIYAmgCcgInAhACQwI1AnIBPQBr/+r+uP7N/vf+Ov90/83/6/+T/zn/dP87AHMApP/I/oL+S/57/Z78U/xk/H/8LPyV+/r6WPqm+tP71fzk/dD/vAIhBSgGFgYCBtoFRQR8AkkBsQAtAJn+c/wf+x372/qK+rT7J/4dAaQClwLdAqsDQgOZAdsA/QBhAckByAG2AacBkAGRAb0B2QEJAhkD7QNbAxACDAGjAGMAgQD4AIoBDQKqAZEAO/+2/dj83Pxl/R/+1/4U/4n+l/1l/FT7nPoh+iL68vry+5j8Vv20/iAAhwFuA+4E9gR+BLEEiQQ9A7cBvQAEACj/yP3Y/Aj95fwY/Ar85/xN/R/9eP3U/mMALwG3AYMC2ALUApAC0gFLAasBzALUA30EOwSvA4EDkgIkAYsAugDWAP0AHwEUAWYBgQEEAeUA2gCHAB8Acf96/h3+Gf79/I/76vrl+jb7t/s3/Nz8wf08/nX+Lf+v/7D/HgAsAUAC0gLBAjkCMgL0AtoDwAPmAmwCxgE9ACb+lvyj+zz7Cfvv+mz7BPxw/N38o/0K/9YAbwKUA4IEFQW5BKoDtQJIAiACBAIQAvcB0AGeATsBrwBXAL0AlwFjApwC0AJOAzcDhQKaAaUAlv95/oj94Pzq/DD9/fyK/Eb8cvxf/ND7efsO/ED9zv08/oX/8ACwARkC7gJRBDMF+wQwBHgDugKWAQMAaf5h/cv8UPwU/E78vfxL/YL9R/0+/cn9Lf7R/SH+5v/gAaICZAJ4AhADegMMA34CyAJoA7UDSANfApABLQHSAFcAggBnAX4CRQMnA74CwQKfAqoBmQASAJH/mf7R/Ev7Bfs3+xP70voR+1/7Kvu2+pX6j/tv/av/jgHfAnkE8QVcBvUFswW5BZoFHAXBAxgClgB1/mb8JPuP+hH7BPz1+2j7tfsq/Dj8Wfz4/N7+GwEVAkcCmAK2Al8CVQKGAoUCzALnAqMCiwJ9AggCeQFFASIBVgFnAYIAEwCdAPUAEwEgASABewEDAsgBDAFxAEP/2P3u/D/83vuK+8z6GfrB+ZD5u/ka+zz9y/+kAoEEXgUiBn8GEAZ1BfQEawQ+BKkDywFIAFD/1P1j/KD70Ptr/J38PvzO+2T7bfrk+V76x/tD/pIA/wHEAlADtAOXA3cDsQN5BIwFlwV/BEADIAK5AFj/t/6r/gL/g//Z//j/tv91/8b/RADkAN4BdAJ4AgcCywC4/pv8Vvvu+tL6bvob+lb6VPoY+qv6V/zC/gcBvgK8A0MEsAQTBWcFewWyBYwGEwdZBtkEfwNcAjoBAgCc/qb91fw3+wD5LfdD9k/2/fYa+Dr6Q/3u/+YBoQNGBYYGDAfsBiEH2QdeB6kFLATlAo8BRADq/r39Xv13/RL9o/zz/Cj+wP9dAEcAAwEZAlMCngH9AI0Atf9A/pj8wvsa/JP8NfxQ++v6KPtZ+9D7k/zh/aL/7wBFAWQBHQICAwAECwXOBcYGhAdeB5QGegVsBAoDFAHL/tn8Hfv1+Br3FvbL9W32m/ch+UL7yP0sACsC7APQBXsHUAhkCDkIKAiyB38GFAURBB0DgQGl/1n+k/0N/Yf8GPwC/BP8RfyT/CH9Mf4e/4v/0f8nAFUADgDT/y4AhgAUAO7+2P3U/Mj7YPve+5/8Uv23/ab94v2d/sb/TgHmAosEGQYKBzMHoAbtBWUF2AQFBI8CzQAi/5j97/sT+t74vPjP+Pf4iPm1+l/8sf0C/9kAnAIsBDUF1QV1Bv0GYQceB1cGsAXnBK8DFQL6AFAAVv+g/vr9O/3c/Lv8s/yh/Jv85vwz/W39c/1R/ZX9Of76/oL/n/94//v+cf7y/Xr9Qv00/T39M/1R/ZP9yf12/qv/NwHoAigE1wQiBUQFEgWoBEgEpgOPAggBxf+9/kL96/tZ+1r7n/vc+zj8Fv1a/lL/NwAfAc8BrwJrA/ID1QTZBWIGbgYpBqYFxwTjA+MC2gFOAdgAIwAe/yL+Y/3G/Gb8LPwx/Jj8wfzY/B/9hv3y/U3+/P6v/8f/Pv+z/ln+q/3A/Az8wvvC+9H7Ivx6/Dj99f6zAMUByALTAzkEHAQJBKcDXAN9AzwDZwJqAWMAQ/9D/nf9GP1V/eb9Iv7u/Qr+nv76/ln/FQDnALgBjwJCA5wDBgSrBC4FTAVDBT4FDAVpBG0DZALCAVwBjACm/+r+WP6//ff8Uvw9/JP85Pwx/Zr98P0//qv+7v7i/pL+SP6v/bj8LPz0+5T7dfu2+/f7HvyZ/In9kv67/8oApwE4AmMCbwKAApoC4ALxAuACzgJZAn4BqAAJAHn/If8E/+r+2v74/hf/RP+N/woApAABAUABkAHzAYUCIQOQA/4DWAR9BFsELgQdBNcDYgPeAj8CpgEQAVkAef/C/lv+xv0H/Z78qPzt/B39F/0l/Y799/3r/ar9w/3w/bL9Qf3K/Hb8z/xs/T/90vxP/UT+rP7W/j3/zP97AAcBHAE9Ae0BgwJuAjkCLwLeAT4BkQD9/67/uP+J/xX//f4y/2L/lf/C/9v/MAC/APMAFAHeAbkCQAOsAwAEIwQ1BC0E/QP7Ax4ECwR8A4sCswEcAXcA+P+j/0X/1v4//lv9qPy2/MT8efzL/IX9k/07/Rz99vyb/KX8vvyu/Pr8Kv0D/d782vwr/aX9DP6u/nL/FQBuALIABgGSAT8CdAJhAoQCewL1AUIBrQBBAPP/qf8m/77+q/6i/tH+Vv+3//L/XgDDACMBzwGCAiID4QM7BDsEoQT6BMQEggRFBOADjwMJAxYCdwFUARMBnAD3/0f/vP4r/pH9Yv2D/Y39gv2X/bj9qP1d/RL9yvyR/HP8ePyW/IT8UPxU/H38q/z1/Ez92/2u/kj/mv8fAKsABQFvAcAB2wEIAv0BdgHeAI4ASgDl/4T/Qv82/z3/I/8M/zL/eP+7/wEAegD5AHABAgKpAi4DiQPuA00EgQSfBMgE+gQEBacEBgRwAw0DnQL1AUABrgAgAF//kP7i/Zz9nP1w/Rn9I/1u/Yz9nv2k/ZT9fP0r/X78NvxQ/BL8svsH/JD8tfz//Kf9Vv4D/7H/FABtAN0AGQH3AOkA+wADAfwA5wClAEwANAAnAM7/Z/9o/2H/Rf9X/4z/q//l/xwAOwCiADIBkgH1AcsCkQPmAxwEcwSWBOIEZQVQBb8EcAQfBEcDdgLmAU4BsAAgAG3/3f6j/mr+//2u/cv9Kf5Z/mf+fv5f/vv9c/3T/Ej8F/wY/Af8Afww/Fb8fPwD/b/9Qf7B/gz/5v74/k3/af+x/zMAYACUAOwA8QDLAOMA5QCtAFoAFwDQ/2b/Kf8k/xP/Mv9u/2T/r/92ACEBuAGGAjgDtAMOBEwElQQSBYIFaAXiBFoE2gMwA44CFALDAYIBKwGSAOv/kv9k/xf/0v6v/nz+b/6i/sL+tv6d/iX+cf0N/QP96Py4/KP8mPyR/KH8q/ye/Of8a/17/UP9Zf2p/fP9dP74/l7/3P9YAIAAmwD2AEkBNAHzALYAXAAIANf/qf+H/7r/0f+b/6v/JgCoACsBoAHsAXICKQOWA8oDQAS6BNwEngQnBKsDcgNVA/0CrwKsAo4CAAJlAQMB3QCzADcAhf8R/8j+Zf4d/h3+YP6P/nH+Q/5X/oT+dP4H/pL9T/33/Jr8SPzs+977I/wP/Kz7kPv1+5b8Dv2I/Ub+AP+O/+b/FQCiAGsBfwEJAccAmgBPABAA8v8lAHEAbQBbAHMAjwDJAPEA7gArAZoB6wENAmUCAgOQA8QDwwO/A+ADJgQvBBAE9APiA5UDAwNUAuwBkgEPAVwAqf9W/y//9v7c/g//TP90/1v/Hv8E//r+Af/N/v79hv1+/aX8vPu3+4j7ZPt3+wz75fp2+/P7IvyJ/Dj92/0j/ln+4P5///j/QgBPAGIAsADNAKkArQDwAA4B1ACOAIcAtAD0APkA0gARAXkBmQHMAUICtgIaA10DPgM/A6oD7QP/A+cD0gO8A1ED5gKTAjsC8gGeAR0BkgBkAKMAOgDW/zoARgBaAEAAMwCNAFgA8P9h/5j+Vv7X/ez8s/x//A/8+Pu3+zv7E/sS+yD7R/t8+877LvxP/JT8MP2M/R/+m/68/hb/dP9z/5j/FwBcAG4AsgAFARABFwFGAVwBUgGGAZ8BkQHmAU0CYAKtAhYDCAPRAhYDmQMkAxoD2QNMA7QC8gLKAk0CFQIYAoABJgGNAT8BIwExAfYAhgFtAQUBhQE0AcIAwwAWAMn/vP92/z7/Bv+P/tT9tf0D/Uv8dPwG/Mj77/uv+377ifvC++H70Ptp/OH8p/w9/Y79Of3L/V7+hP7I/lD/jf+j//H/RQBgAGcAxgAQAfsA0AB4Af8BvAEdAnUCfgK4AhsDKAPdAjIDCQM1AhICVAKdAacBggLRAbcBbgLcAdUBlwJDAi8CsAJjAhgCCgLBARoCSQHoAI8BHAGYAD8AggAXAKr/8/5L/nv+wf3c/NH89/w4/IX7Afxt/Ir7IPwb/YX8bfz+/PT8bfw5/WX9Qv3a/ab9bf6J/nr+nf8C/yD/DQCN/0L/JwB3AGsA5QDTACABlQGLAcgB9QHHAV0CWQJrAVkCSwJAAQcDHgL7ABQDUgJ7AZECvQJkAuICJAOyAsECFAOrApUCdQI2ArQC2QHsAUsCEwH0AJEBPACg/x8A2v6a/k/+pf3C/QH9xvxe/Wr8pvym/aT80fyN/Sz9uvyq/U/96vwe/gT9gP1n/hX9DP7z/tH9yf73/jz+Rf+X/vX+ev9r/67/2f/2/8v/0wBHAJkAiQEMAeYAZwGBAeUA0gHsATQB/wGOAdwBMgLFATcDRQJiApIDzgJwArsDvQMgAoYDUQPMAZ4CrAKHASkCyQGfADQBdwDV/w8AVf+n/in/k/54/cv+Ff67/Or+J/57/Hz+Lv5n/Gv+dP5Q/MT+mf6Q/PX+Uf4W/fv+d/5n/Xr+WP7F/WH/X/7z/QYAtP4x/ikAfv+7/nEAVABf/3oALgF9/0cAYAHI/58AfgFqAAkBKgIcAT0CoAKIAQ0DlwKJAmADxAIpAxsDYwK6As4CfgK3AkICJAIIAuwAswGdAZ//8QAzAYj+S/+UAJH+7/7c/2T+Hv/L/tL97P7C/m/90v7D/iP9s/47/uz9wP5M/k3+jP4z/nf+rP7w/a/+gv7t/Yr+C/5x/sT+9P06/wL/Yv6P/+7+E/+I/zH/AACH/6n/kgCq/3oA3gDh/4wBXQGfAEYCNwLpAfABlgKaAuEB8ALGAnQC8gJyAsACagJEAqECtAH/AbIB/wAzAcgAhgAvABEAUgAFAKD/JgDF/0n/6P8M/wz/wf/f/ib/UP+r/hL/4P6w/t7+bv6s/rH+Df6t/rT+MP5p/tD+7f0J/gn/gP3M/rP+lf2t/1f+pP4BAPn9N/88AJf+VP8pAAX/a/+JADH/7P8sAen/uQCDAUwA6QEsAuMAowL4ARoCqwKkAdwC7gFYAd4CrQGQAU4CPgE6AeQBGAGVAE4BPQHZACgA4wDcADMA/P+DAGgAMv+KAJT/bP85AEP/pf97/wL/gP/d/lb/V/9A/iz/7f4f/k3+o/5e/uP9NP4D/7L9Vf5Y/zb+t/5J/kD/+/73/bn/xP7Z/pH/7f7z/kb/EwDt/nn/fACw/yAADgGDAHYAIwLNAMAAXgK/AIEBXAJoAZ4BpQFgAQcChwGXAAICQAHTALABYAHeAFABpQF6ABcBVgGFAPEA1QBbAD0AmgB2AM//VgATAKH/5P/4/8v/+/63/6X/Wv5X/6j+OP5w/7b+Hv4q/9D+dv5n/zz+/f5j/yr+C//Y/m3+0v7v/jX+k/5b//39Sv+0/y3+lv/v/yP/ev8fAN//+v+zAMIApwB+ADEBnAENADMBJQIdADYBpQHXACUBPAEgAc8AdgGqANIAtQGVAI4BMgHFAOwBkwAsAR8CJgC3AKUBBQCNAJQAEwBrAKL/6v9AAB//rP8kAIL+nP+h/9/+hv9g/1T/bf92/6z+QP81/2f+2P7U/lX+FP9v/jL+kv+o/ez9uP/t/Sr+5P9r/pf+u//5/iX/HQDN/0cAaACs/zcBUgCNALQBvf/7AB4B2f+dAW8AYQCEAZ4AoQBtAG8BtwB8ALYB2gDSAIoBLQE5AUAB7QC+AUcA3ACdAZL/ogBXAcr/qv+IACEAmf+k/8MAp/8d/9IAl/+j/m4ABgBi/h4Ag/8S/4L/3f7U/zb+xf4UAPf9Vv7J//n9O/6w//z9pP7r/pj+1v6c/iP/TP8h/2T/AgB//2T/zgCc/0n/ZAHt/3f/1AB5AHX/NgHzAIL/egGgAHcAGwGfAFkBhgD9ABsCugC6AAcCIwF9AEkBjAExAfn/iQHlAG3/kAEDAaX/vwCGAMX/jQCy/+r/mQAMAGD/MwC//6H/PwDf/tP/LwBo/o//PQDl/b//Vf/8/eP/s/5l/kz/ev4P/8P+rv67/47+MP9d/2P/Cf8p/xMAFf+X/zoAJP9i/14AHP/E/yIAWP/BAMH/cv8bAf3/IQA4AUEAUgCpAXsAHAA7AsoA1v8TAq4APQAKAhMAWQGXAaL/cQHtAKD/UwGNALf/igHN/1UARgH5/0MAgwA4AP//cQAJAP3/eQCt/7b/KgCB/sH/sv8b/s7/Sf/4/pv/W/4m/3L/Xv6B/wz/6v6s//z+m/9h//L++f8o/9X+AgCC/+r+0f9M/0r/yf91/5f/cv+dANb/B/8uAUgA2f/tAJ0AwQBbACYB2QCEAEgBAwE9AA0BewHs/zAB4ACHAJwAMQAwAakAFgDKAMYAbACPAFQAEwFgAKEAhQDh/4QBLAB3/ygBMgBW/38A7v9d/+H/vP94/1X/fv+n/+b+oP+Q/33+4P9A/4b/Vf/i/sT/1P5W/7f/CP/5/gsAbv5w/lIAsP7i/vL/UP77/tL/JP/I/1P/ZQDv/xP/4AAuAOf/vgFzAPH/dwFKAAcB4QD9/8QBHwBdAEwBhf+SAAoBUQANAKYA8ADg/6QAdgGz/68ArAEKAKkAMgEzAIUA3gDp/3MAgwD4/z4ADQDI/8//LAC0/53/DQCD/6L/t/+y/7j/7P6w//L/Iv8d/7H/S/8V/6z/+/6b/zz/Cf8fABb/x/7c/6r/yP66/wsAJ/+8/wAAMP9z/68A6/+A/7EAUwCz/yUAGAEmAHr/+ADKAKz/UADDAOr/MQAYAScA8P9mAdwAMwAIAcUAkAAhAYwA8AAwAEsAkwFu/yoAMQHE/yoAewBMADwA0/9pAAwAKP+dAGkAT/+FAO7/PP9oANn/JP+W/7z/uf+S/yr/Bf8EAH//qP50/9n+mP+0/5/+mf8JAC7/hv+B//z+RAAMADX/9f/P/6P/NgC+/+3/zv/z/+T/CwBWALz/NABFACUA7/9UALAANgDY/6YArQA6AKYASQB4ABoBnAAlALIAZQBTAGEALQDfAGsAMgBrANsAcwCK/90AogCs/1IAhwCp/3wACQEy/3f/CwGo/z//yACh/w7/hQCv/w7/LQA6//D/uv/2/mQAaf8I/7oAO/+Z/oYATf/8/g8Ak/+D/9r/W//a/9L/Hv8vAEYA4v4mAKEA6P4FAJUAPv/x//oAif/i/2IA1f//AEIAyf/TACMAAgCEAKUAqwA1AFQATQA5ALIARgBUALcA6/9BAIUAEACZALoACgApAGEA9/8XAMIAbwCL////UADn/4//IQBUAJ//3f9W/2X/bACO/3L/CAD5/p3/HwA9/wsAPf8b/9//pP+p/xv/XAAnAOT+tv/G/6n/pgC9/zH/pwCv/57/UwCe/9f/9f8CAOv/q//X/wsAjQCh/y//hQBJABIAVQAVADIAoQCzABMAJgD6AI4AWgDsACIANADzAH8ADwBKAIUAcQAsAA0AOwAqAMn/LgCCAFr/+/+VAKP/of/U/z0AQQADAH3/u/9cAK//gf/V//f/LACT/6H/KwAO/2H/KwCL/2X/hP/I/w8Atv9b/xcA8P/f/+7/cf9hAFMAVv/N//L/rf9nANj/bP8uAJn/7P9/AGn/sf9/AAsA+f8EAOT/ogAXAM3/2wAQAAcAzwDG/8b/7wCeAEYAOQAOABEAOQBcAHgAKQAaAIoA1v/E/54AOwCa/0wAJwC2/wIAQQA8AOn/2P/0//H/HQAKAGL/5v88AAgAu/8GADMApv/P/8L/3f8NAE8A3P/d/x4Al//y/+b/QQASAEP/2f8bAG3/H//T//j/Yv9t/6X/k/+f/67/af+u/1cAof9X/8P/hf9//7P/Vv9T/4D/+P4u/w3/Kv+g/1z/Tf+O/zgAZgCSAKYAeAANAcYBfgFDAUkCUwIVAj8C5AEVAiICCgLhAU8BLAETAbUAagBBAA8AxP/I/1P/9v71/kT/V/+R/v7+U//9/iD/Ov9X/47/zv/b/3f/7f8IAF3/8/+S/xz/v/8j/3P+uv4//qD9Xf1W/fz9z/1C/Wz9Cv4r/mj+OP8Z/6r/SAEdAegAIgJPAosCOgMAAwkDjANKA+0CjgLhAfoBaAFXAP3/sv8n/+3+lv7Y/df96v2r/d79yP31/df+KP8Y/6D/hwD4ACMBQgHoAV0CeAJ5AjEChAJ6Ag0C/AGKAWABawGvADkARgDP/2n/Rv84/yX/xP6R/kz+V/58/uv9sf28/Un9QP1e/Tv9pPxs/J39oP1+/NH86P1r/oj+ov4a/9j/qgBGAUIBsgHSAigDHQP8AlIDugNyA2MDkgIMAlwC2gFUAbIADgBkADgAQ/9v/4r/OP+O/2n/l//m/wcAtgCRAJIAMwE9AR8BVgGGAVcBSQE5AREB3wDMAHoADQDY/6b/u/84/8r+pf58/nT+Av7H/Wv9gv2r/fP8w/zd/BX91/2H/ZP8XP1n/m3+4v4R/yH/YgDoAK8AJwHLAZsCqAI8AnIC4AINA6MC7QGBAWsBPQHHAML/7f4a/x//df70/fP9Fv4t/kH+Uv75/rj/DgBtAMYAsgGAAtUCAwNPA7gDywPlA7cDJAP7AhoDdAK4AXkB5AAgANn/Vv/U/rL+WP4L/uD90/3L/Yv9i/1u/RL9Q/1T/QP9sfx7/NH8LP0E/eP8Ff2v/T7+gv6A/iv/7f9fAPAAMgHFAQYCMAKRApUC8AIaA4kCXAJaAhUC1wE3AbUAiABWADYAtP9c/3H/fP+F/4L/hP/j/zsAWwCiABEBkwHpAfAB+QFKAmcCpQKjAkECBgL0Af8BogEWAZ8AVAA/AOX/Qf8M/wr/xv59/qb+ef76/fP98v2z/Uj9P/3V/GX8wvwP/Xf8s/s//Cb9U/1w/aD99v0b/wwAKgAlAN8ApQHlAVsCcQKUAsUCZQLtAbMBsAGFAbwA+f/E/6z/tP9N/7T+4P5e/5P/jv+w/0UABAGCARMCUQKgAooD2APtA/ID0AP9A+IDSQPKAowCOAKNAaUASwAfAKD/Vf/E/kr+bv6N/kL+8P0M/g/+yv2z/Y79Zf2a/U39bPxc/Bf91Pwf/Df8qvwr/W39pv30/Yz+fP///+v/egBqAbsB9AEgAkYCnQKhAkMC+gHFAeMBhwGMADEAKQD1/8H/av8c/2H/sf+r/8P/CwB0AOUAPwGcASECpwKwAsQCJQOGA9YDdQMnAw4D3QLbAmYC4AFqAQkBoAAXAN3/pP9Y/wr/sv5g/pf+iv72/b/9ff1x/Vz9rvwB/NL7+vtC/KP7pPot+yb8gvx7/KT8bv2b/mL/a//G//cAuAHgATUCNAKjAi8DvgJYAhQC+QHtASQBZQA6ABUA1P95/+z+AP95/3z/kf/K/xsAwgBAAY0B+QEmApUCJwM4AzwDXgNoA08DOwMXA+ACkQI1AtgBcwEQAaoAWwARAL//Xf8B/yH/Fv/J/rD+d/4V/uv94P2F/eT8WvxF/EX81PtF+zv72vtQ/Cb8F/y6/Oz91v7e/v3+uf/NAHoBnwHoAQsCXgK3AnoCAgLuAdcBVQGsACQAKQApALP/Tf8w/23//P/b/7D/JgChADcBgAGQARkCowLMAg0DQgNWA0oDIAMSAxED2gJnAv8B3gG4AU4B5wCNAGoATwANAMP/df9l/1n/JP/k/n3+Iv7g/Y/9Ef1w/Nr7mvtw+/b6lPqi+vT6VPus++T7Z/yE/WL+w/5U/zgAAQFtAeEBWQJ1AoMCsAKHAiQC4gGgATgBjQAeAAIAtP+O/03//f5a/5j/oP8QAEsAggAfAZ4BBwJaAp8CDANFA1YDlwOMA1YDaAM8AwMDwgJiAjIC2gGPATEBkgBmAFQA7/+n/2H/MP85/x7/vf5W/gn+zP1r/ev8Zfyr+0/7TvsP+7r6oPrw+m/74PtD/OT8nP1D/hb/pv9JAPAATAHZAT8CWwJkAiwCJwIcApQBCwGcAE0A3/9V/zD/Mv8d/zT/Wf9u/8b/WQDHAAIBeAEJAmQCuQL1AjIDZQNXAykD+wL3AhkD8gKDAi8CGQINAt8BcQH/APsA/QDQAJIAKgAVAE0AIADI/4P/I//v/pr+7f1i/bf8/fuT+xn7sfqa+oL6efqz+gr7YvvZ+138z/xg/Q3+w/5b/8//aADcAB8BfwG2AbkBowGDAWgBKwHpAMgApwB4AGYAhQCdALcA1wAJAU0BcgGUAc8BBwJJAn8ClQKjAqMCowKRAnECRwJBAlkCKQIMAiECBQIAAvEBtwGSAXMBaQFfATsBMgH+AL4AlQAwAM7/Y//G/g/+Vf21/PL7J/uj+lL6Hvr0+e35MPp++tX6WPvQ+1H89/yX/UP+6P6K/yoApAAWAXsBuAHWAcEBogGaAXkBHAG2AJwAngCPAIEAfQCkAPYALgFWAYMBqgEAAmQCcwKAArYC1wL6AvQC0ALJAtMC3ALFAqkCrgKpApoCfAJNAhYC4gHLAZ4BZgFGARUB1ACBABgAu/8p/2D+e/2p/Pv7IPs7+rX5fflM+TT5TPmg+RP6lvol+8P7Y/w2/RL+5P6w/1IA8QCAAd0BGgJIAkYCFQLXAaoBdwEhAaIATQAnACEAMwAJAPH/OQCpAP4AIgE4AY8BAAJdAn0CdQKdAt8C/ALpAtgC8gItA0kDQgMxA0QDZwNMAwwD0AKsAo8CTAIBAsgBjgFMAdMAHwB//9D+Cv4w/Sj8PPuc+hH6q/lo+VL5hPm7+fb5V/rb+nb7LPzT/JP9cf5B/+j/cADoAFEBlgGfAZABcgFXAQ8BsQBoAC8A+P+//5X/mv++/9//EABHAJ4ACwF0AdgBNgKdAgwDUwNwA3sDhwOaA5cDegNTA0ADTgNGAyQDEwMGA/ECsgJOAv4B0wGsAXEBHAHlALYAUQDN/zL/nf71/RX9LPxz+/L6rvpw+lL6bvqZ+tj6EPtU+8f7YfwG/a39UP4K/7n/MwCPAM0AFAFEATQBBgHnAMwAsABDALb/dv9X/0z/L//6/gz/Zv+k/97/EgBiAOwAYAHCAT0CrQIcA2wDfQOXA7ADtQOwA5MDiQOpA6sDkwNhAyED9wK/AnICMwIFAuUBuQFrASUByQBNALj/Df9Y/qT9yPz1+1z7+frI+qf6hvqc+tr6EPti+7j7LPzU/Ib9Kv7f/nj/BAB0ALMA3wDtAOsA5wC9AHcATgAVANr/f/8i//z+Bv8J/wL/F/9t/+H/LwCAAOYAdAH9AW0C0AI3A4oDvAOzA6MDmwOHA2YDNQMUAw0D9gLCAnwCLAL7AcIBkAFuAUcBMgEsAQgByQBlAOj/ff/n/i7+c/3N/Ff8BvzG+7j7yfvc+/L7Bfw8/JP87Pxf/fX9qP5M/7D/8v83AGIAbgBJACUAEQDl/6//Yv8Y/+j+vv6V/oP+fP6b/sX+7v4u/3f/3/9SAL4AQwHXAWIC4AIyA3gDtgPFA7YDlgNwA10DPAMJA90CqgJ1AjUC4wGaAWsBSQE6ASgBHgEcAQQBygBrAPH/Yf+8/gv+av3o/JP8Wfw8/Dv8Tvxf/Gj8fvy2/BH9gf35/Yv+Lf+6/ycAZwCLAKYAmwBvADcA8/+5/3f/I//X/qT+jP6I/oP+jf6k/sf+CP9I/4T/0f87ALsARwG7ASwCpAIFAzsDVwNpA28DWwMqAwAD6ALVApsCPwLqAacBXQEcAeAAvgC4ALEAqgCkAIsATADq/3f/AP99/ur9WP34/Nb83vzw/AX9Hf1H/X79vv0C/kH+oP4f/67/IABtAKEA2wDuANcAlwBLAP3/mP8h/7f+af4f/uz9vP24/dn9C/46/nD+uv4V/3v/4v9bAN0AaQHvAXMC4gI3A2MDcQN2A3YDZQNCAxID5wK8AnUCGAKzAUsB5gCcAGEAQAAqAB4AFwADANP/kP9H/9r+Vf7J/WH9Lv0i/R/9R/2F/cT98/0O/jX+gf7W/iX/k/8NAH8A1wAPATYBWQFOAQ4BywB7AB0ArP8o/7X+Z/4d/uj93/3t/SD+YP6X/uL+Qf+U//D/TwC3ADkBsAEQAmsCwQIEAzMDOwMuAxwDCQPvAsgCgQIeAsABbAERAaIAOgDm/7D/fv9R/yv/EP/s/rr+eP4U/pj9MP33/O78Bf0s/Xz93f0p/m/+xf4a/2//xf8rALAAGwFaAYsBuAHKAbcBgAFEAQUBowAoAKj/MP+1/kP+4v2x/a79v/3a/Qb+Sf6m/hL/c//n/2UA7wB1AfEBWAK6AgcDOwNkA3gDbwNdA0QDIQPsApUCKQK3AT8BugBHAN7/fP8W/7j+b/47/v79uv2A/T396/yW/HX8h/y1/O78TP2+/Sb+fP7o/m3/9f9uAO0AeAHqATMCXgKHApUCewI6AvgBpQE6AbUAOQC8/zP/pf45/v/92/3K/cj95/0X/kz+iP7q/m7/CACfACgBqQEdAnsCuQLtAh0DRQNYA1YDRQMjA+UChgIaAqgBNQG7AD4Azv9j//P+fv4O/qX9RP3o/JL8RvwO/PL79/se/F/8uvwj/ZL9//1x/u7+ev8KAKIAQgHfAWICtgLnAgADAAPZApYCPgLdAWMB2QBNAM3/V//v/qT+df5Z/jn+J/4k/kL+bP6v/gn/e/8BAIwAGQGbARYCcgLDAgMDRANwA4wDjAN5A0sDBQOpAkEC0wFYAd8AXgDf/07/wf4q/pz9Cf18/Pf7hPst+//6BPsw+4n79ft8/An9m/0o/rj+Tf/n/4UAKgHSAW4C8AJMA4cDmgOIA08D9gKCAvgBYQHFADEArP8+/+f+rP6H/m/+Xv5V/lT+Xf56/rP+Df+E/xcAsQBGAcABJQJzArcC8wInA1IDawNzA2ADMQPhAoACGAKuAUEBywBNAMX/Nf+h/gP+X/23/BL8dfv5+qP6hfqf+uP6S/vS+3H8Ef2s/T/+2/57/x4AvQBeAQMCmQIaA3YDswPBA6EDTQPbAlECtQEMAWAAxv9C/+P+qf6X/pL+j/6I/pD+pP7D/ub+H/99//v/gwAEAYMB+wFhAq0C7wIyA20DhgOBA2wDTAMQA8ACXQL3AY8BKgG8AD4Aqv8C/0/+iv2x/Mf76Po2+tv51/kj+p76Pvvu+6P8SP3f/Xn+K//5/9EApQFiAgQDhAPiAxUEJwQSBNwDegPqAjACVgFsAIn/1P5U/hL+8f3x/QX+Jf4u/h3+Ev42/oz++v55/xkA1QB6AegBJgJWAoQCsALWAgEDLANEAzoDCwPDAmsC/wGEAQYBmAA1AMf/Nf+J/tb9Hv1W/Iv7+vrM+tb6wfqU+s36wfvz/Kr93/1Z/n3/wgB0AbUBHgLiApoD9wMSBBcECwTbA5EDFwNjAoUBxAAyALD/A/9I/sf9tf3n/fX9w/2c/eP9df7s/hH/P//B/3gA+wA/AXMBwQEhAncCogKhAqsCzALkArsCbAIXAtkBeQHsAHUAMgDO/wL/Gv5z/QL9Vvxk+8n6A/ta+/L6bfoM+5b8rP3S/dv9wP5oALEB9AHfAV0CUQMXBFAEDQTHA+wDJgTOA+4CGgKGAfcATQCW//T+hf5Y/kX+If74/ff9Fv4t/kX+cv7G/jn/uP8uAJwABAFbAaoB8AEXAhMCBgIkAl8CdAI+Au8BxwGrAVgB0gBhAPH/Tv93/rX9Jf1+/Ib7wfrd+i37s/ol+vz6t/y9/dL9FP4p/6UApwHeAQoCywLFA0IERgQXBPoD+APcA08DkAIVArUBGAFpAPz/jP/r/mj+Yv6H/nT+Nf4w/nP+tv7k/hr/aP+r/+n/OwCvAPQAAQEzAZ0BtgFyAY8BKgJtAuABUgFrAbYBiAH6AGMAxf83/97+c/6T/Wb8W/sF+2L7Vfs4+n/5l/p2/Gb9Y/2h/ev+5AAZAioCVAJNAzUEcwRVBCkEGQQEBJEDzAJbAj0CzAHsAEEA7/+X/zH/8v7M/p7+e/6S/u3+KP8o/0H/o/8HAEcAkQDWANUArADEABABLQHxAMAA5ABUAbgBtAFSAQkBFAEoAQcBkgDX/yD/tv5z/tv9sPxv+xL7kfty+xj6WPmX+nj8F/3M/DH93P7hAO0B8wE4AnYDrAToBIsEWQRpBEUElwO3AjwC8AEpAR8AuP/a/57/3P5e/qH+IP8J/4L+df4b/8T/9f/5/zMAtwA5AVgBFAEEAWIBnwFGAcsAzwAyAWUBPgERARsBPgElAdQAjgBYAOX/L/+N/h3+h/19/Jj7k/vH+/j6xvng+U/7kPzf/N/8q/2Q/2cB+gHlAYwC2gO4BMoEkwR3BGcE/gM0A30CDQJhAT8AQ/8K/1j/SP9o/nb9rP3p/pP/4/4Y/pn+BADjALMAUQC7AKwBDgKXATABdAHPAaMBFwHNABUBhgFbAbkAjQD8ACsBuAD6/1//Cf/H/jz+Yf2B/PH7+PsP/Gr7ifqk+qj7mPwF/UP92f0Z/4UAiQE1AgAD3AOGBPYEKwUeBbgEEARFA6wCOAKPAWQAUP/6/h7/7v4//pH9b/0H/qn+ov5F/of+af9XAMIApQCTADIBBgI0AtYBjQFuAVoBagFjAR4B1wDPANgA3QDVALMAcAAcAKH/Ef+a/hz+Zv2a/P772vsI/Kr72/rX+rf7dfzw/HL9+f3e/i0ANAHVAb4C1wN7BKQE6QRmBZcFDgXeA8ACUwIRAgIBgP+i/oH+X/7w/Yb9b/2s/f79L/5A/pj+aP8oAGEAfAD2ALgBTwJFAr4BhgHmARsCpQH5AJoAmgC+ALAAawA1ACIAAADC/3n/Jv+l/uv9J/2b/G38c/we/Er71fpz+5L8Lv02/V/9Lv6C/8YAbgG3AWcCjgOPBPYE8AQJBVYFMgVfBGQDwAJRAngBEwDw/of+VP74/X/9If0g/Zb9Fv48/kL+0/73/8UA2wDlAHABPwKzAlcCvQG7AQUCuAEBAZUAnQCpAFYAx/+F/8n//v95/5L+MP5R/hj+Pf1l/DL8ffxi/H77zPpV+4f8L/0f/U79V/66/6EABAF/AX8CmAMVBDIEmQQbBTIF1wQ2BJYDQwPzAvYBiwCq/3D/Ev84/mX9J/1o/Z39iv17/dT9nP56/wQAWADoALABOAJVAksCbAKsAnwCuAEkATMBUAHmACsAnf+L/+P/7/84/1v+Ev4J/rD9Cf10/EH8W/w9/Nj7yvs3/K78JP22/Tj+zf6s/3oA+gCtAa0CbgPEAxAEgATTBN0EnQQhBJUDLwPBAgACHAFoANX/Mv9z/s/9bP0w/RT9IP05/WP95v2+/oT/GwCWAPMAawEZApcCmwJsAkwCMQL3AZEBEAGRAEUABgB9//r+wv6G/jD+5P1+/RH9xfyQ/JT8i/wh/Aj8hvzz/EP9tf09/tf+hP8nAOIApwFaAvwCLwM5A6oDNwRlBDoEzQNcAxoDtwIaAocB9wBeAK3/5v5V/jL+EP6v/V79Sv2B/eH9Of7H/rH/aQCkAAMBtQFqAvEC2AJYAggCFAIRAqkBGAFSANv/y/9v/+z+ov6x/nf+3v2S/Zv9gv0w/TH9E/3i/Bj9Yv1r/Wj9PP7t/sv+SP81AJcA6QCYARcCXwLBAg4DRwNDAzgDSgMKA3gCDwLcAW8B8gBeAKP/Nf8C/8r+WP7w/dn96P0l/lD+Uv56/hL/mv/w/5QAEwE5AcABkAJmAssBmAGcAcIBIQFkAOj/Wf9R/2L/6f6k/pz+k/66/v79Jf7q/i7+MP5s/q/9//2t/qv+yv7G/kX/GwAHAAkAeQCJAB8B1AFhAVkB8AE7AmECYwJdAg8C3gHTAbUBaAHVAHsA+P+O/47/T//p/pj+Tf4e/gz+Wv6p/pX+6P5b/6T/IwC7ANEA4ABbAWwBcQEUAbMAXAFKAR8A0P+9/5//JACL/93+TP/k/rP+GP8d/w7/wP75/vj+rP5V/57/Q/9n/2r/TP/K/97/0v8zADkAZABbADQAVwGWAe4AuAHeAekAUAE5AqEB9wBGAQgBUwBDAEUA8P96/w3/1f7K/k/+XP7o/qr+nv78/if/Xf81AFQAJADHAO0APgCQAIgB1wBwABwB0ADz/wMAZQCBAA0A0P4z/3YAVf87/vb/CQAo/r7/rQDV/sL/xwB0/w8AIwCl/t7/uACV/7z/zf/F/2kADQASAPMA6ABBAEYAuADrABIBSQGLACYAsAAyAPj/lwCD/1r/lQCS/jb+wP+n/uz+o/+h/gX/hP9Y/00AAwBn/3oAZQEiALb/pwHGAIj/LwGnABz/egDoAN//p/8yAPD//v5LAEQBYv9u/3wBPwD9/n4AfQAOAPf/7f8/AYz/yP7TAM//fv7N/2YAIP91/3wAv/+w/4EAjABJAF0ATQGDAGD/hQAEAXX/pP8BAUj/Ef/EAKX/x/7Z/+3/af8M/27/0/+A/20AOgD9/mEAUwGf/+//BwH5/7X/DwCLANb/lP56ALwAtf5u/wcBt////nABgQDW/p0AxADL/8oAhQGYAED/iQB7AYT/3f+XAFv/RP+w/wL/TP/8/yUAn//a/gcAWQD7/4wANgCfACgALgBfAfT/mf+8AGQAh/9r//b/mP+I/kgAZQCn/UP/7v+d/o0AnACD/+j/nACBAJ7/egAwAOL/TQDp/24AcQAi/yP/0gDi/1P/xADa/3z/hQB1ADEA1f8qAKYAlACmAFYAgQGrARUAvf8DAMIABQBM/7n/nv8O//b/CwAQ/jUAjADQ/uwAYQDZ/qsA6QAsAHcBbwB0/8EA3v+F/x4Bvf/z/SsAVAAW/o//RQAj/oX/1ADm/r7/8wBp/5v/eQAKACEAyP/q/4UAFQAVANT/+/5x/xIA+f+G/2v/MABxACIA/v9/AHgANABcAFwASQBXAIgBwwCb/4sAmgBWABMAnv8s/7z/kACr/4v+6f98AWf/ZP8SAaX/5f/VAYgAy/7dAAcB7/42AIAB9//+/i4A5f/F/zEAof8qAHH/Pv9WAHL/Wf/1ANj/t/5kAJoA1f6t/iYASgBx/6b/xP/w/kj/3f+c/7v/7P9A/4j/nQC1ACQA+/8UADkA3ABkAE0AJwEgAIv/VQHcAO7+IQAKAUL/RwAPAd3+yP82AGP/jwBfAJr/LwDsALr/h/8fAT8AJwC2ANz/ewAVAW3/w//LAFL/gv+oAPf/Xf9ZAGEAV/8/AFgA3f49/zsAIgA8//b/KwAh//D/tf/F/vr+4v84AJP/iP9m/+b/kwB//3//nwCkAHIACgCYAJ4AFABkAAcAMgB8AMP/vv+gADL/PP/TAH7/Nf+QAPP/z/7k/2gA5f8+AHEAJACWAA8BGACw/2MBdQB+/+kALwGWAFL/9/9MANT/5v+n/5v/IwCcAI//wP9QACz/7P8tAJb/KgAMAJH/7f77/8f/8P4VAC8A9v47/+kA3P9//7QAu/+//hsAVAH2/w8AvwCH/xEAhQAnAHH/7P7KAG0A2v4AAOQAHf/v/gEBIABa/zEAOwDp/zEACQEZAUYAqf/PABkBkv/DAAYBZ/9AAH0AXP+h/7kAsv/B/jsA//+o/6EAAwBC/xkAwwDm/or/BQHL/5sAtABo/4b/nQCa/xr/mgAAACgAKgDR/8z/FQDUADL/7/5sAOf/f/+0AKwAdf/X/6n/Zf8iAKr/ZP8hAAwA6v81AE3/Vf8eAC4A6v8/ACgALv89ALIAd//x/1wBdABg/+0ATAFY/wH/bwCgAKH/Gv/B/zoAYv+A/14Aev8//0sAhAAVALr/yACGAE7/sADPAFT/PgDFAAsAYgBHAMf/TgANAMD/ZQAZALn/UQADAC//IgBpAAQA4f80AIAA6P+6//X/QACh/0H/JgCMAD7/YP/fAM//4P5gAAoApP8nAef/jv7EAIkBdv/K/3YA3f8tACgARAA6AHX/F/+k//D/b/8DABUA8f7I/4IAtf+w/wQA+f8ZALEA7v+d/+QAVgBz/xcAJAHU/yD/7QBWAGb/ggD3AFX/fv/bAPH/xP+xAGMAHgBXACz/4P8vAdP/6/4UAJkAhf/N/43/P//T/ykA4f+l/3wAgf+V/6gA7P/3/8H/vQCyAOT+hgCdAa//Uv6HAHwB8P6i/mIArwB5/h7/FwEX/yv/7wDL/13/9QDm/yf/NAFlADj/tQDlAKX/8v8gACwAjgDA/xb/3QD4AEv+EACSAX3/y/+NANb/3f8oANT/tP8dAEMAqgAp//L+BQG1/4r/kgAbAC//9/+OABj/jACUAEn/agBDAPr/0wB7AGv/ugDbACL/KwD2ALj/l/5JACsBd/7y/mcAWP9N////PQCO/8f/8/+4/1sAeAAwAM7/NQCPAD8AiP8gAM4Auv9z/1AADQAS/zEACgE+//n+AwGDAN3+qP8wAbMAHv/M/6kA8/8DABoAHADq/ysA2v9Q/0AArv/h////FgAyALT/NQAcAG4A2f/+/1IAz/9pALMAg/8L/9gAPQAE/7n/JADL/xr/u/8qACQAqv/Z/1MAIgDr/9v/AAEnAAMAaQA5AIcA2P+z/xwAGgCB/1wAw/+d/1EAov+f/1z/dwBrAOf+4P+MABQA1v8ZAHgAqf+//8YAbQBA/xkAsAAPAFn/s//MAOH/IwAzAMT/lQBVAFb/3//uAOn/1v9lAMb/hP8yAGgAXv/e/xMAr/8JAIX/Yf+EABkAP//t/xAAXADb/7n/2gDFAM3+y/+2AZT/9P6PAEoBiP/T/oIAoADA/hT/UwH5/33+0P/DABoAnv+r/6b/VACBAMH/nf+DAJoA5P+k/93/ZAA1AKr/5P/OAPD/Vf+8AFgAmP+e/9sAegDY/l0AlAGu/57+0ACYAMb/uv91/0oAHwCK/2X/YwCAAHn/ff+x/6YApgAi/+7/sgDw/zAAXABV/zgA0ADi/gIAdQFT/xT/CgHr/+L+eACcAGL/Q/92AFsAVP8OADoAvf/z/9T/GAA2AKH/AQCFABcAS/8HAM4ABP/A/2gBaf9I/+0Auv+C/4sA6//z/9QA9f9b/6wALgAj/1QAtAAo/4D/aQBeAMz/B/83AMYAYf9k/xABkAAM/wwAzAADANv/UgBIAOv/XQBHAOL/m/81ACEApP8vAO7/m//C/2sA4v9R/wYAogCj/y//eAB+AI7/AgDDAMr/q/+HABsA7v/x/6H/YgDHAKz/af8cATIAbf4yAFoBWP8Q/8wAzQBh/7H+uwAuAAH/3v+t/7wAMwDp/n3/gwBoANb+yf/mAGAAhP/n/9MAJQCk/7H/HgG1ALT+0//mAQAAk/4rAOoAhwAj/1b/YgACALD/QgBjADEAB/+F/3EB7//f/jsAeQEgAB3/WQBeADsAjv94//cAvQDz/lL/gAF0AKz+Hf+xAHAB0v5C/lABYQHO/rL+VwC2AM3/Ev9F/8wA6QAE/zP/XACEAOj/kf/T/6EAXgCc/5sAOQBp/2UA4AACAH//OgCoAAQAdP/2/5AADwC8/3L/tP9qAOX/cP+Y/yUA9v+Z/3IALQBC/7X/4wAHAXX/Vv/qAJEBBwB9/lIA9QFDAM3+ZP81AS4B8v7X/poA6wBq/+3+8f9tAIz/Kv9ZAHEAjP8v/7n/8wDG/6b+hwAYAab/Qf95AM0At/+B/1IAsAAOAGX/lwB0AWf/nv67AOkA5/4S/ywB0QAt/7n/+v/U/6b/of+9/+P/sgCb/4X/swBhALX/gf8MAI0ARADI/y0ArgB/AI3/pv+EAAgAiP/x/2cAvf97/yAAJADX/3L/wP8tANX/r/9gAEoAVf+w/4AA4v+b/7AAQwC6/wIAjwCwAGX/1/9kAFMAcgBQ/67/hwAWAOL/CwBdAFcApf+e/0gAAgCl/5n/TgB0AFf/nv98AC4AUf/k/4sARADV/53/eQDEAJ7/h/81AQABav/w/yMBWgCy/ygAZgDr/3L/uv/w////qf8SAA0BtABd/+b+nf8KAJb/Rv8WAM0Au/9x/uX+FQB1/0L+qv8NARYANP/d/3IA+v+W////qwDSAI0AiACXANsAzv8n/14ABgBh/5f//P9PAOf/LP9z/1UAwP8g//b/7AAxAFT/oQD0AOD/p/9HAM0A+P/E/48AvgBtAJj/i/85APL/Vv+5/3wAQAD6/7r///9ZAMj/g//O/4EADQC8/1MASwDm/7f/wv8BAAQAyf9PAI0ABQDb/xkAdwBZAKL/LgDhABgA+v/NAHkAm//w/7IAGgAi/57/aABOAHj/c/9WAC0AnP9s/57/IgAeAMz/+P+NACsAv/9RAFMAp/+v/1sAXADM/3f/KAB4AGr/5P5j/5n/Bv/n/oX/fv9E/1//kP+D/03/iP+d/7L/nf9o/w0AdgCR/xL/tv/S/xj/IP+d/4H/q/8FAD4AUgAWAEQAEwExAdMAngEjAncBsAGCAhwCWAHCAY0CAgIYAYABLwKMAU4AEgCAACEAUP8f/5r/yP89/yz/g/8U/7v+2/5c/u791f2Y/ZD9mP1g/fj8l/yW/F38tvvA+538Sv3w/dn+8f+JAb8C+wJjA28EHwWaBQYGFgbeBa0FUwUuBAIDLQI1AVIAqf+k/mv9//xt/ED74PoV+7D7I/x9/HD9i/41/33/ZwCqAR0CNQI7AzAE1wOjA0YECwSsAq8BbAGrAFX/tP2w/KT87fuP+qH5bfkC+eX4mPmP+n37M/zc/UwAIwLhAt0D7gVBBxQH2wYyB9YGNwYFBgoFoQOkAlwB8P9J/lD8QPvz+vn52vhl+U76kvr2+qv70vzz/c3+EQBwAS4CwwLeA5MEdQRKBLQEfwUQBTsEpwMpA7gCfwFwAMH/Mf+7/lP+I/69/R/9sPw8/L766fkf+iD6xfrl+0D9Uf5c/6sA0QFLAq4C6wPCBL0EgQSSBNIElwSnA+UCqALyAWwB2QBC/7f9UP38/KT76/qp+7/8+fxD/SL+Df+1//z/hAADAW0B8wGAAsgC0gIKAz0DMAOcAvUBiAH0ALYA+v/4/pj+TP6f/aj84fu4+sT5H/mb+C35evqp+/38Ev8MAYwCsgNcBBUFKgbzBtYGsgaEBgEGlwXiBOADcAIxAXAARv+B/cn72/pv+iD6Hfq0+gP8Yf2E/tD/xgAXAWQBqgHRAfwBEwI5ApYCxgJaAuwBpwEiAYQA2v88/9L+m/6I/kr+DP77/TX+QP7r/cT9pP1G/XX8KPzx+4f7OfyI/d3+7v8cAW0CigM7BC4EQwSqBLkEcAQgBKEDbwM2Az8ChwEkAVgAGf9K/of9gPxC/HT8WPyd/KH9nf6X/18A1QCOARECNAJAAk8CHwLpATQCSALvAcoBGALwAR4BXQD8/33/WP5x/fL8Xfy8+xL7aPrD+QP5N/i5+JD65fsN/U7/DAK7A6wErAVHBs4GIAccBx0HvAbhBT0FkATMAgoB2P9O/oz8LftB+pf5NPkj+d/5M/to/Nb9jv9XAfUCxwM/BNIEBwWdBBcEngMrA78CUgIeAoIBagCA/xr/3/4g/lP9Gf1Q/YX9of19/df9R/5r/sr+pv5D/iD+AP6v/Tj9hvxj/E79Mf7u/sX/qQC+AcQCBgOZAsYCQwNMAy8D/gKxApUCcQITAvMBiAHWAD8Aqv8s/7n+1P38/DH9qf3l/WT+Fv+E//D/uwAPAbkAQQBIAPYAVwFGAYYBiQI7A0cDDQOqAi0CNgFqAKH/lv6w/X/9e/2R/Fr7ePrw+e/4Vfh0+Rn7Wfzy/VgAXQK1A4kE9gTLBWwGZAZNBqAGbQaEBeEEygNPAsQAM/8D/uT81vta+0n7Cfv6+qD7wvwl/iH/xP/gAPABNALsAZMBIgG8AIcAaACdAPwAiQEYAlcCNAKmAQ8BUACM/+z+Rv4e/jz+TP6v/if/Pf8m/+3+fv7u/Rb9JPyG+x/77fqU+9b8Lv5x/6QAvQGZAjUDFQPyApkD5wPXAzQEgwSnBLoEiwQSBGIDQgIKARQAK/8M/vf8P/zC+6f7x/sS/Kn8Vv33/b/+af+l/+D/XADhAF8B7AHbAvQDlQTKBLcEPAQ9A9gBogDF/8n+t/1G/XD9/Pzb+/76b/qL+Yv4hPe096n5sPtW/VT/WwHDAlEEPgVfBacFUwZGBwQI+AdBB8gGFQasBAgDWwHC/7z+Bf4u/Vr8fvuy+rf6YfvV+z783fyw/bv+QP8k/yn/YP+5/18AaQFsAncDuQSvBbcFFwUyBAUDtAEtAO/+Qv7W/cf9Av4d/jX+/P1Z/bD88vv6+hP6e/lU+Vj5Zvmy+t/8gv6t/zQBngKIA2EE2QTyBHgFNgZ9BsIGxQZkBggGYwVuBGcD/wFJAMH+jP16/C772/lR+Wv5tflE+j77Vfxc/Zj+sP9KAIkA9gDVAfICDAQXBfEFjgbSBnQGcQX+A1ACuQBt/0j+W/0C/TL9HP2m/Bv8cfuR+oD5g/ju96D39Pe8+SP8Bv6c/1gB6wJFBDYFlwUkBhwHuQfrBwAIRgcfBjQFQQRmA4MCNAEPAEH/7v0r/Fr66Pha+IT49fjo+Wr73/xA/m7/HQBdALcAkwHPAuAD3wQZBvAG9wZvBmQFugPlAWQAWP+4/kz+Cf4h/ir+nf2f/J37oPql+ez4ePhS+Jr4Gvmx+UD7vv3d/zgBdgLTAw4FuQXABf4F2wZ/B2QHMQfOBtgFxATbA+gC4wGXABv/7P2W/LP6XfkT+eD4q/hw+Tj7wfyu/X3+Q/+r/9X/TwBXAX8CtQNHBYwG8gaWBqUFWAQoA/YBrgDD/3L/YP8P/7T+UP5t/Wj82Ptk+9z6bvoP+u35/vnk+Vf6KvxG/r7/LQG2AsgDWgSTBL8ESAWrBXAFXgVwBcgEuQMUA8sCbALHAbsAtv+2/jD9cvtc+hL6Qfoi+4f82P3B/nD/3f8JACYAMQBzAFgByAICBMAEDwXNBBEEIgMdAiUBagACABYALQCc/7r+KP6k/QT9p/yG/D78APzt+9r7jPsM+4/6nvon/Jn+TQDlAJABbgLiAsYCUgItAvoCDwRtBG4EPQS/A1kDQAMcA9ICPQJAAUEAUf/j/Uj8kvvY+3L8Mv35/W7+pP6x/nL+L/5S/uX+CgCiAd8CdQP+AygEhwPKAmQCFQK7AW0BKQHlAGsApf/v/oT+Lv7c/aT9g/0k/Xn88fty++v6lPqn+vL7Fv5o/6X/7P+IAPEA8AC4AAkBOAJCA8UDKQSHBKsErATzBDsFNwWtBFgDlgEKAKX+gP3l/Nf8S/3F/ef9Z/21/Hz8Xfwk/Jf85/1a/9AABgLEAlIDZAPQAnoC0wInAx0D2wKhAmoCCwJdAYUAHgDv/4r/1P7H/cX8+ftN+4766/m/+fn5tfpF/Lv9C/6//br9S/4+/+7/GADPAKgCWwT9BCMFdgUhBvQGRgetBqoFcgTSAg0BaP8//qz9O/2v/FH8SPwP/Dn7o/r5+rX7g/x1/X/+vv8bAQgCngJRA/UDVwTHBCUFPAXuBE4ErgMlA4MC2QEcARYAE/9G/mL9B/yD+pj5OvnR+FL4N/gY+b36EvyB/KX8U/1m/oj/agAEAT0CBAQlBaoFQwb2BpwHNwhECHsHbQYJBf0CBwHU/x//T/54/en8V/x7+2v6ovmr+Wn6VvtC/FL9jP60/5wAPAHUAb4CoAMEBEMElQSlBIwEdgQvBKQDEAOAArsBnQBR/wz+Bf1Y/Kf7tvrq+ZH5T/ky+dj5LPsl/E/8W/zr/OD9zv6J/1kA4QHmAyAFLwU3Bf8F8wZZBxAHSwZfBUoEswL2AOb/W//O/lX++v2F/cz8s/vE+sn6dvsD/F785fzT/ff+2f93ACQBBgL7ArkDAgQbBE4EYwQ+BAgEwgNaA7ECwgHDAMD/pf6n/eb8L/xl+6L69/l5+UH5pPnS+vD7H/z9+2X8HP28/Wv+Qv+EAFkCxgMgBFIEPQVwBioHSgfVBiwGiwVkBOMC8gF+AdUADAB2/7/+uv2n/Mz7qfsq/G78PPxj/DX98/0//nD++P41AJ0BNwJGAr4CjwMABPkDywONA1cDFQOSAswB6QA0AMH/Tf+K/nj9YvyG++z6f/pc+sn6bPuN+zT7BvtQ++z7sPyL/ar+RgC0AT8CeQI+A4sE2QWUBpUGVgYpBqMFjQSXAzkD8gJIAmIBeQBq/y/+LP2f/Gn8SfwH/LT7xfs//LP8Ef2t/ZP+lP9tAPIAbgExAv4CgwPdAw4E6wOTAz8D6wKCAv0BawHOAA4AB/+5/Y783Ptg++X65vpb+0r7fPrZ+fH5r/q3+3v8Nv2x/n4AjgEXAusCRwTRBdMG9AbCBqIGRAaZBf0EkQQYBEwDMwIFAcf/hv5y/br8Rfzb+2D7//oJ+4r7Nvzc/Jj9fv5o/yEApwA7AQ8C6AJdA30DmQOoA48DcgNIA/YCngI2AngBawBJ/xf+8/wY/G377Prm+jT7C/tH+qr5vflm+l37WvxE/WP+tP+kAEcBTwLGAx8FIQbOBvUGogYaBocFKAUEBZAElAODAoABVQAz/0r+gv3z/ID85ftb+0D7cfvH+1P8Bv3G/Yv+P//g/6kAtgG2AksDigOqA54DawNLA0QDIwPTAmQCzgH3ANz/rv6q/en8Vvy++0X7R/tf+8/6B/rs+Yj6g/t3/Bv97/1g/5sAHAGpAcQCJARVBdcFqQVsBXEFZwUGBXwE+QNhA34CXQE+AFX/pv4U/nv9/Pyd/Dn8EPxp/PX8b/3X/Tz+0P59////gwBDAfwBbQKlArUCugLCArMCtgKuAksC2gE8AWQAj/+//t/9Hv2t/GT8O/xa/Gz89ftr+zb7dvsP/Of8kf0f/hX/4v9GAOsADwJeA4QEFAUdBRIF/gS6BGQEOAQIBJcD1gL6ASoBWgCN/93+Tf7S/T/9pvxe/JH8Df1r/Zr93f1S/sP+RP8JAPsAuwH7AdABmAG7ARUCSAJ0ArICpQIYAkgBiwDt/2X/5P5K/rv9Uf3Y/Gn8g/zM/GP8lftK+6X7h/xq/a/9/v0A//v/aQDRAJMBrwLcA3cEZQRABGkElwRjBAcEwQNXA7QC6QH3ADsA4f9//+b+Rv66/WD9XP2B/Zr9tf3r/Sz+a/7K/nD/PADsAEkBTQE3AUYBeQG3AQcCXwJnAu4BSwHIAF4ABgCy/yr/gP7z/Wr99/z9/Cf9v/wB/Jn7tPtB/PX8Yf3C/ZL+bP/Q/ygABwFSApIDXwSTBHIEaQR5BG8EYARWBBQEbQN8AnEBhgDp/4D/CP92/tr9PP3O/MT8+vwx/Wr9uf0x/sf+R/+z/zYA1ABVAXUBdAGuARQChQLJArUCYgIGAqYBFQFuAMr/Iv+c/h/+if3f/Fn8Uvxq/An8Vvvz+mD7Yfw7/YP9of1S/nL/XgDyAJsBzwIuBOUE0QR8BH4EwwTXBJEEFgSUA9UCwgHMAE4AEQCT/8H++/1u/Qf9o/xv/K/8Q/3D/fj9PP7b/q3/VgC2ABEBeQHNARUCagLdAkEDPQPRAkQC2wFhAaEA9f95//n+PP5C/Wj8AvxL/K38O/xX+wH7ePtI/Pf8dv3z/dD+3v93ANsApgHdAg8EyQQEBekE0gTcBKYEOgTYA2sDtAKvAbAA6v98/zz/sP7V/Q/9n/x9/I/8wPz1/Fj98v1x/sn+TP8kAAQBfwGoAd0BTAK7AvQCIgNEAxYDcgKGAbsAXgBDANf/7P7t/T79y/wy/HP7Ufsa/J/8+vsh+1T7d/yV/RL+Q/4O/3gAagGVAfoBJQN2BBQF8gSVBHoEggQZBGcDIwMdA3kCMgEbAKD/dv8a/0b+d/0r/RD9z/yq/P78rv04/mr+rP5m/0oAzwADAWABAAJ8ApQChgKuAvYCzQIeAnYBDwGsACwAkv/p/lH+qv3G/Br8Dvx3/OP8zPxD/A38fPwc/ZT9HP7d/rf/fAD8AFwBCQIKA/MDcASJBGMENAQaBNQDTwPaAo0CHAJPAWQAsv9T/w7/kP7b/Uz9EP0H/Qz9If1g/dX9XP65/v3+e/9MABEBZQFuAagBFwJOAj4CLAIqAgYCmgH0AGEAGQDa/1T/m/74/YX9Fv2K/CD8VPwA/Tz9tPw3/HP8Mv34/YH+2f5u/4IAiwHyAQ4CkwKKA1QEcwQMBLoDzAPeA4ID8gKBAgoCYQG1ACAAj/8d/7r+Jv6B/S79Of1p/Zn9wP35/Wv+B/+W////TACXAAkBlwHrAeMB5gFFAqYCfAK+ARMB+AAvAekA5P/p/oL+I/5e/ab8Y/xs/Kz84fya/DX8N/yR/Bn9xv1k/u7+tv+pAEoBsQFRAjwDFQR0BGYESgRQBEoE9QN5AxMDvQIoAjgBSwCj/zD/0f5L/of97PzR/PP89Pz3/CX9iv0g/rP+Nv/c/5AAIQGZAeIB4QHcASwCngLYAqwCIgKKARYBoAAKAGr/6P6B/gH+Qf1h/Kb7RPuM+2385vyL/ID8Yv00/lb+u/4OAJYBjgLUAskCBAOEA+ED9QP9AwoEywNbA9oCGwJVAccAQQCy/0H/Bf+l/i3++/3o/fL9A/7j/RD+of4l/3n/sf8iAMoAUAGOAZgBvgHYAdgB9gH6AUYCwwJAAioBkQAQADz/Yf7E/UL9qvyL++/5XvmH+Vv5aPo//J/8dfyE/dj+rP8rAQEDNASzBcUGYAYcBncGgwZABr8FjQRHA14C0gDi/s79EP1B/CH8/Ptv+6P7a/yZ/MH8vP3T/qv/jwBIAcQBZALVAhEDjwO6A5YDuQNXAwMCwABCAA0AwP+T//v+FP6h/S39u/yU/If8vfz8/Of8Hvwb+7767fpI/Gz+J//k/m//2QDiASsC6AIrBGMFLQbfBQ8FXwQFBDcE+APrAuYBKgFKAPr+wf33/Hj8cfx9/HH80/xz/e/9af7//p3/KQDKAHkB7gE+AnsCrwIPA04DHAPDAnAC6wEtAXgA3f9o/z3/Cf9y/sH9R/0X/Qn90/xb/Pb74fuW+876/vnd+Tr7e/3P/iX/6f9OAW8CFQOtA6oEGQYZB/QGFAb2BB0EwwM8A0UCVgF3AIH/Vv74/AD8yvsA/Fv8yPxG/f794P6L/wYAdgDuAMEBlQLHApUCjwKuArgCpAJOAtEBhQEpAXAAk//9/sv+2v4W//P+Qf7G/Zv9bv05/ev8n/xq/PD7A/sb+gX6fPvJ/Sn/jv9bAIABKQKpAkIDMgTfBRAHxAa7BYcEzgOsAxIDGAJ/Ad0Ax/9Z/sP8t/u/+yf8bfza/H79Vf5c//f/BgBAANcAnAFMAnYCQgJFAnMCdAIkArABVQEvARQBmQCR/5z+Z/61/vz+CP+4/lL+F/7H/Sf9evwl/Nf7QfuS+vL5c/p5/Bv+v/7F/2kBZAK9AmkDJgQDBRoGdQbhBeUEIgTeA1MDVwKaASABLgDL/qj9ifzI+xn8sPwm/b79cP5F//D/RwCkADQBnwHHAfcB0wGxAQ0CKALwAcQBewEQAZQABwC0/43/K/+p/mP+aP6i/un+wf79/Vb99Pxr/Hf7Rfrt+UX69/qc/A7+1P4RALcB2gKfA4IEFwX1BbYGKAaKBdsEnQMjA+4CGgJ9AfUAwP9D/u382fvT+178ffzy/Nz9uf6I/8r/wf9jAIIBEQLVAYUBcwHPAT4COQIAAtIB0AGgAaIAlP8f//z+1f6j/mP+N/5I/oj+v/5m/l39ofxY/HX7Tfre+TX6MPu9/Nv9gf7O/9cBpAN6BL8EaQVCBnwG9wUKBTgE0QNuA3ACiAEGAWkArf+N/hT9L/wC/BX8ePxR/Sf++v7b/0sArgBVAeQBQgJZAiwCCwL5AQYCNgJVAgkCfAHyACYAUP/T/m/+Hf7V/Xn9SP1A/Sz9Df3d/Jv8Jvx/+8z6V/o6+qb6N/wH/gj/KAAJApgDqwTABYAGzgYsBwkHAQbABIcDvQIRAhABYAAfAFH/Qv6h/R79yfyY/Ar8J/xd/Yf+cv82ANsAiwEHAvgB2AH1AVECAAM1A5UCUwJoAiICpgHaACYA8f+a/8r+5/1E/Rb9NP0V/aD8SPwn/DH8G/xL+4j6sPoa+zH77fuA/ev+SQCGAY0CxgMGBQkGVQYABgIGrAWdBG0DVwK8AVwBAAFfALX/E/86/rz9Yf3K/MH88PwX/Qf+OP+1/yIA8wB9Ae0BGAIvArwCNwNSA0ID9QKZAnsCVQKeAcEACgAv/3D+ZP2Q/Jn8T/zH+637kPt++6P7jvtC+0T7lvvX+2z8Sf02/rb/VgFzAl0DZgSWBY4GtgZXBp8FtwTPA5wCfQHbALcAmgDo/zH/uv5b/hr+lP1E/Yb92v0h/lP+tv6F/zUAxAA0AZUBBQJfAsEC1AK4AtECyAJ9AkQC9QGaASABZABf/1L+gf32/Iz8Bvy++9z7Cvzt+3f7Ofto++j7gvzp/I79YP4o//b/mAC6AQ8D5wOFBNoEUgUwBSEEdgMSA7ECJQJxASwB6gCPAPz/+P5+/n/+iP40/pf9uf0f/kv+ef7X/q//igA2AYsBYAF5Ad8B9gHUAQECpAL6AqcC+AEXAVsAvf/p/uj9EP2i/Jb8b/wQ/O/7Ovyl/OT83/zC/An9nv08/qP+3f7j/k3/XQASAdQBPgNpBAYF3QT8A1ID4QKaAkoCIAIuAi0CJQJlAV0AFQAUACgA+f+f/5j/Mf91/ur90f1m/hj/d/+n/ygAzADVAK8AqAD1AMsBLALcAagBigE3AYgAlf/P/nP+Cf50/QT9z/zd/PX88fwR/Yf99f0L/gP+rP1e/dv9Xf55/ur+rf9gAPUAUwGKAf0ByAI5A0IDCgNyAi4CKwKvAYEBuwHAAeQBwwFZATUB/wCSADkAtv89/zX/Uf8i//z++P7o/hz/Qf+A/0YA3gArAXABaQFXAVwBOgH1AMAAQQB6/9z+UP4Z/jX+Ff7l/f/9Pf5s/mP+NP4r/mL+V/7m/Yb9kf0j/qb+5/5U/8r/EQBfAKcAyQAbAWYBLAEmAUUBMgFvAa4B2gFcAuECDgMUA+8CUgKcAR8BVgCp/1r/BP8O/y3/B//q/s/+6/5d/7z/EQB9ANQA3wCsAJIAYwBQAGgAVQAhAPX/2/97/+X+uP7F/r7+9/4c/yv/If+w/jz+Bv4N/hT+8f0K/h/+Vv4W/4P/kv/B/9j/CAByANAA9gDXABABGwFMAPP/hAAvAckBJQIYAicCNQJ9AW8AHABIAG4AfwAhAHj/RP9x/1v/Bf/7/mT/5P8CANb/3P9CAHoA7/+e/7j/qP/R/6//uP8rAOj/EgBUAM3/5f92AFIAHwDh/3b/kf/4/kH+xf4l/0//yP+o/6X//f8hAEQA5f9g/6v/7f9V/yb/h/9W/2b/vv95/9//iwCWAF0ARwB+AD0AAQATAPf/aQDKAHIAAQARAJEAOgAvAN8AdgCZAE4BBAC6/8kAWwBSAGMA+f8JALL/kf+s/wT/1f5p/6z/2P87AAcAy/86AMv/pv8bAP//iwDPAEAAQwCQAKEAYgA8APoAMgHkAPEADQAx/2v/bv/I/oD+PP8r/5v+V//+/o/+m/+C/zb/4/81ADUACAD+/1AARwCFANEASwDCAHsBmgCgAOQARQBDAPL/b//O/9j/rP8RAFP/ov6F/13/Sf/h/0D/yf8mAJf/JQBZANb/NgCkAAcA2/8gAUMBjwDUASUCMAFbATIBYgCXANMAowCJAA4ANP9K/5//xf5B/5r/nf78/jn/Of+R/73+5f7P/wH/dv9UAIj/HgB2AOz/KAABAC0AIwD5/2IA/P83AJMA8/8wAKYADAB3/3v/vP9S/7L+i//q/2f/EQA9ABv/hv/+AMsA/f9sANkA8/83/5z/iAAhARQBGgGtAIMAKwF8AJn/ygBGAbz/SQBkAHL/VgCSAJkAYACBAN8AZAD7/0AAbQCp/+v/GwBv/2H/z/+f/3b/iv88/+P/EgCb/zEAEQCi/xwBFQCv/ooASABP/7n/iv8u/3P/Rf8L/57/mgA6AGH/VwDw/3b/xv9Z/3f/uf/y/0gAZQADAKf/YABjAOv+4P9yAJv/QwD4/0T/3P/z/2H/7P9PABQBoQH7ACQAnwCOAf4ASAGUAEsAjgHvALz/LwB+AK7/KP8u/w3/GP8uAMD/d/8HAKr/Mv+B/17/c/9qAO7/PABNAV4ASgBRAfX/r/8gAfIA8wBoAeT/WP9m/3H+1v5J//z+8P+YAL3+G/+A/9f+y/+L/3P/JAAi/4L/dwAn/1EArABw/woA+v+7ADkBbQDVACQBQwB9/wAB1AAZ/9sASgH2/vj/jAFh/4z/IQHN/x//SACU/1X/SwDs/8z/rv+PAKv/0P4aARsBzP9SAXwBmgBcAQwB5ACaAA0BWgHZ/7X/OABw/z7/3v4s/qb/r/5//QkAIf8x/ef/tgA0/vz+EQEk/3L+AQFj/y//vwHU//D/dAFLAEoAbwAx/7P/eQC+/+T/UwC4/1j/LgAI/yX/5gCx/64AiQDo/lUB8f8W/2sBqf/4/ykBef9hAKsBBAFDAAgBuQEs/+3/hQKTAL7/oQHCAIH/dQCW/+L+1/8aAPD/6v90/2AAoABp/iYARgFf/lj/qgC+/pX/hwAr/zwAZ/9o/ogA1v/6/jAAbQDg/9L/BgBX/y3/IQBm/+D+gAB1ADP/iwA7AND+EgDV/9j+GP9CAK8AQP/J//0AS//0/9kAq/+FABQBRAH+AKUANAHlANr/PADYAML/FACdAB//dQBfAaf/4/+t/93+o/9AANj/zP9OANQAzf8G/8D/DgBKANYAwgBTALIALAC1/8n/AwBdAJr/1//X/9f+lf+q/6r+qf9EAK3/DgDz/93/y/+0/0gAlf+0/2cAIP/V/3sAyv5cAM8AYP/XAIIA2P9/APL/2//f/w0AoAC9/zEAdwC4/34AXADC/+3/UQAbAO//q//b/w4A3P+Y/5b/QgDT/xAAiwA8AFgAXgDN/+r/8P/P//YA6ACn/z0A9QDr/zYAGQA4/2sAlwAe/3X/awC0/4P/GQC0/1v/yf/c/2T/2P+7AFwAo/+C/8X/8P/S/7b/3P8gAIEAWwDQ/ywA7f8w/w0AJgCA/yIAUADKADgALv+gAMAAbP9AAGYARv9Q/wsA7P+//qn/8/+o/nb/OABy/2YA8gCaAP0ALgCQAEoB7QAEAAkBjAHB/78A9AB7/2YARAD//rv/b/9y/xAA4v8kAHr/Kv+A/5D/ef8PADMA7/8jACQAbADx/37/KgCzANv/OgDSAPf/rABfAFX/CAAwALv/8f87AI4Aq/+w/oP/Zv/X/hn/xv8XAAH/OP/y/w7/If/f/8b/5//9AIkAGQBPAXgARwAFAZIA2gBIAI3/XwD6/xQATgCn/7UAuv9+/08AUP8ZACAATP+8/13/yP8GALr+owDvAGL/zQDPANP/gQGNARr/dQA8ARb/gP9MAcX/ef/oALn/YP/6/9b/W/8TAIb/7P4NAG7/3P68/9X/7v9aAHb/cf9SAE0AYAA4AJYA4QBe/w0A3AB8/y0AegHZ/5//6gDD/1//FgDP/9f/AwC0/w4AYv8Z/9f/j/9GAEYASf8mACYA8/8tAEIAsgA7AFYAiQDU/0AABQENAB0AmQAJAAwANAAeAJ3/KwBkACr/z/81AAP/9v80ANv+dv+VAAAAf//w/yYA3/8nAMb/k/8kAZwAHgCRAFsA4f8pABsA/f/f/+X/aQAo/5D/WQA5/3//SwD6/q3/TgD8/uP/HABR/wAADQCm//r/AwARAF0AmABSANP/2ADdALf/swCdAAEAegAQAOH/oQAdAAIA+/+V/9X/Xv92/9f/jP/L/3MAyP9r/+//NwBAAEEA8f84AHkABgBXACQA7P/FAEwAjP+GAFMAq/+IADcAff/t/47/Uv+U/9b/bgDw/y//zP/8/33/2f9VAMz/lv83ANH/sf86ABsAWQBOACoAdAAVAEcAcQDL/+L/YAAFAE3/vf8vALn/9/82AK//BwAZACIAYwC3/3QAfAAPAG8A7f8LABsAzf9tAE0AjP9+ACsAlv+BAAQA0/9xAG0AxP8fAEMAwv8LALj/RAAuAG7/XADM/zf/jQC2//f+5f9t/xwAKQD7/vz/+f8z/yMAFwCj/24AdQC+/6z/AgDK/7j/VAAFANP/QABTAAYAkf9DADYAnv8+ADYA2P+SAD0A8/89ADYA7/+b/1AACQDB/1EA/P+F/xkAHgAbAEEAKgBlAFQATgAjABkAMQBAADEADAAfABUAEAArAOH/rP8jAN3/U/+7/7r/ef/U/yUA9/8oAEcAJwDj/xIAegALAA4AIwDa//b/BgC3/5v/KgBQAJz/8P8SAJj/QAA8AHf/3v9GAMj///8fANL/CwDe/wIA1/+4/zoA4f/t/0QAwf8ZADgAp/+OAEsA9/9dANv/NgCHABEAy/8rAGkApf/3/4IASv8EAEwAKf9IADcAYv9VAC0Ax/92ACUAGQA3ABIABAC0//f/RwC//7z/OABQ/6H/QAC6/wEA9f/S/zEAyP/3/z8Aq/9zACUAgv+eAAsAx/+FAMb/BAAmAMD/IQD+/8n/OgD//9z/JgDq/wIAGgAhAOP/BwAEAMP/JwA/AM7/4P8kAMn/5/9SAJP/uP/w/3L/CgAMANj/9//1/zgAJADF/2IABwDv/8IABwDl/4gAKQAbAEUAIwAAANj/EwC7/9z/HwCF/wEADABd/2UACAB8/5IACwDn/58A9//K/0QAt//d/1MA2P8UAGAA3v8hABgAsv9hAAMAZ/8HAP7/2P8ZAEsA7P/e/xUApf+i//D/BAD2/9//DAAcALT/KABEAPb/ZQA5ABgAUwCq/xEAnACA/xAASQCn/zUArf+J/z8Azv+u/yYAqf+9/xgAqP8GAPL/8v9FAC8AAQATAFwACAAYAD4AHADn/0EACgC2/yMA2P8UAEoA7v+9/wIAMwCb//D/LAC//zkAKACq//3/2P+c//T/AABMAA4A1v86AN//GwA1APj/KQAwAP3/CABDANX/PQAzAOz/igAKAKv/MQDA/6L/7/+E/97/wP+h/w0AEAD3/7r/7f8NAOH/NgAoACYAOQBAAEYACQA0ACEAEwBBAAsAJgAzAK7/DAASAND/1f+4/+j/8P/e/+v/KAD5/9z/z//a/0UA3v+m/zoAAgDu/2YA9P8BAEMA5f82AEUA9v9EAD4ALQDn/7b/VwDi/8j/WwCf/7b/MgB+/6L/AQCm/4T/4v/d/2r/HABsALb/NAB0ADYAWQCy/xMAQQDP/3UA9/8KAJYAsP8EADcAZ//0/xsA0v9IAOD/1P/e/9v/4f+m/xkAIwDS/ygAVAAHABUAHQAuAAQALgBuABQAfQBgANf/VgDy/+P/EwDc/zsA7v+r/+v/tf+u//H/m//s/+T/gP+8/9f/yf8RAC8A7P8PACIAGAD0//T/6f8FAFUAEwAoACQA+v9WAOz/1f8LANL/4f8qAMv/b/8YAAUAZP8pADMAkf9tAPD/sP+tAM//7P+SAIj/IwBuAL3/QgBOACQANgAaABkADQDK/+f/+v/G/ywAXwD7/0sAMwCk/83/rP+u/7v/HAAyAJn//f8TAKf/GAAYAAYAgwD8/xUAWQDS//7/PgDQ/yIANAAPAEkAz//Z/wkAuf+2/8f/lf8ZAP//yf/+/wgABADH/8f/WAAbAMH/PwDi//L/KgDx/9j/PgBOACkAXwAIAN3/QAAaAMj/DQAUAPD/FwDI/6T/QACZ/4f/TAD9/9j/DQDu//v/3//i/z8ABAABABUABQCx//f/JADK/1QAeAAfAEgA5//Z/x0A3v8fAPf/1v83ABIA/P82APT/MwD2/7H/WgDr/8L/VADb/9D/FwDF/xUAFgATAEsA9f/3/9X/uP8GAPT/9P9FAIgA7//v/zcArv84AE0A5f8vANz/qf8IAKn/3/80AJz/AgATAGn/s/8CAKL/w/8UAM//EABaAOL/EACDAPX/8v9AAO7/JABWAOj/+P/X/+j/9//w/y0A0f/1/yUA5v/W/yIAEQBWADUACgBEADcAKAAdACYAQgBXAP3/MAAvABwAXwBdAP3/agBUANT/IAAQAAkALAAmAND/2//3/9X/vv/m/9L/w//i/47/3/8EAI//FgAIALX/IwDX/7D/HQDw//D/BQAaAML/pP/m/0L/6v8aALn/UgDc/5j/JgB0/6//WQDe/0kAEwC4/zgAqv+u/5QA1//h/40A6f/a/xkAzP/N/xIAEQDz/zQATQAUAFIAKwDl/4AAlQATAE4AqAALAOT/ewD2//b/egAXAN3/4P/+/wUAy//u/ysA2P8XAC4A2/8YAFYAawAQAA0AYQBCAJr/+//rAO7/wP97AJj/sf+FAKb/vv9yAIz/0//Q/4v//P+q/ycAKACh/04ADQBd/z8A2v+m/1oA9/8JABcA0P85AAwAyP8nALv/BAAwAIz/9P/M/3r/9//A/7v/5P+4//n/GwARABsAFwAiADgAGAAXAHIATQDn/zwAYADQ/ycAFQCq/1gAIwDY/zkAuv+l/x4AzP+f/wwADQDS/yUAOgDs/wwANADq//z/NwANALr/LwDf/1D/TABwAKj/NABxAHr/5v9tAB0A4f9NAEgAvf/z/zsAvP8PAKAAuf+9/z0A3/+T/9j/IQDH/7b/GQCo/+P/aQCY/wcAsQC9/xQAhwAKAOT/GAAEALn/lgBdAFn/EQBSAGP/wv9eALD/qP+NAAwAUP9rAGQAP/8cAMwA0/8OAM0A2P96/5kARABn/3IAoACb/wkAPgBl/7X/RQCn/5b/OwAfAHX/qv8bALX/5v9tAL3/rv87AMf/t/8EAMv/6/9OAM3/hv/u/9L/w/8zABMA9P9BAOj/xP8SAFQAKAACAIIALQDB/y4ALgDJ/ycAdgDf/7z/IwAsAPb/NgAhAOH/OQAlAAAAQgB6AFQA5//+/08AGgAvAFYADQBRAFQA2P/4/x4A6v/y/zsA4v/C/xAAvf+e//r/6P/N/xcACADK//L/FgCP/7r/JQCj/6v/u/9M/2T/kP9M/wr/Bf8k/+v+zP4Y/17+Hf7j/n3+Zv54/6T/UP/n/wgAPgATASsBlgEoAlcCfAKNApQCdQKfAtUCjAInAiIC8QFMAfsApAA+AAsA3v+c/2X/af9E/x//Iv86/1D/nv/k/9//OQCXAFAAZwDFAHoAgQB1AMv/hv9A/7j+Lv7X/YL93/xq/AT8Qvu2+nn73fuy+9v86f0x/uL+iP8OAA4BDwLfApoDLgSOBH4EMATvA1EDGQMDAyYCoQEkAe3/Df9T/ln9Of12/Uv9Sf25/QT+M/6t/i3/sv+KAIQB4AEuAu8CGwP+AlYDHAPYAisDwgImAh0CwQEMAaYARgDb/8D/sv9U/xb/MP9L/zf/Nf9f/1r/nP+//4b/qf/H/3r/M//8/r7+hP5U/tP9X/0j/Yr8GPzD+2X7HvsI+/36Xvsx/ML8Vf1Z/ln/7f9kACsBSgJMA/8DZQS7BOYEvQSEBDME2wPQA3ADzQIwAmMBsgATAIz/Vv8o/zv/jP9r/2//v//e/ygAhgDZAEwBkwHtAQQC9AESAg4C9gHJAYgBQQHRAEoABgCi/yv/Fv/j/of+lv5D/gP+GP7U/Yn9bf1k/f38nvxq/Ff8u/w1/XT9vv1K/tf+/f5C/7D/DAChABUBOAE9ATsBbgFQAT8BdAFQATgB3wB1AB0ApP9p/xb/yf7Q/uX+7v4m/4f/zv8MAHoA3gAOAboBOgJEAsECIwMtA18DdgM4AyoDCQOfAlICEwLVAXUBHgHbAIcAWwBCAPr/y//i/93/tf+K/3X/Vf9Z/03/G/8f/w3/6P6V/kT+Av7N/av9Y/1M/Vj97fyX/Iv8PfwJ/Ab87PvY+zj8hvyQ/Pz8rP0B/mP+If/P/2UALgHcASkCvQJgA38DwgMeBCcEJQQBBKcDTgPiAn4CNAK4AWIBFQGjAH0AbQAoAD8AbgBaAIAAvADjABUBewHNAR4CbgKAAncCggJ3AkICIgLtAWoBAAGpAO3/kP9V/57+NP4A/nL9+vy4/Dn8g/sj+936iPqZ+tz68vpF+//7RfyH/D/90P1Q/if/tP/r/34A2gAGAU0BlwHCAcABpQFQAQ4B2QCOADsAJAArAP//CwA7AFEApwAYAUIBkwEYAkcCbQLZAiUDJQNLA3UDZwNwA2QDAwOpAoYCPALfAbwBfQE8AT8BHQHQANMA1wDRANAAwgC8ALMAgQA3AP7/wP91/xH/o/4N/l/9uvz/+2f7/vp4+hD6zPly+Vf5U/lm+eH5U/rI+nz7F/zA/LP9pv55/3sAcAEiAscCTwOqA/oDJQQNBNkDkQNSA+0CZQL8AZABLAHqALQAjgCpAMEAvADhABYBZAHkATsCfAIMA3ADlgPHA98D7QPuA9kDnwNBAxYD0AJLAgkCxQFPAf8AjQDv/3X//f6S/jH+5v2z/W39Lv34/J/8Rfz4+4X7DPuq+mL6Nvoz+l76nvrj+jv7tvsq/LL8Pv2z/Sv+tP4l/6L/OADBAEIBmAHiARECJAInAggC0gGwAXsBOQEbAQYBAAEWAU4BhQHJARQCUgKFAsIC/QIiA1YDjQOaA5gDpgOuA7wDtAOWA3MDQwMZA9sCkwKDAmACCALMAYsBHwHoAJsAIADh/5f/If++/lD+x/1E/a78+/sz+476+flF+dT4kvhd+H/4vvjw+GT5Cfqv+l37Jfz//Nf9tv6J/zcAAgHOAUUCqgITAzkDSgNLAxoD2AKoAkYCxAFxATEB8gDMAMUAwADgABYBPgFwAdoBOgKSAggDZwPIAzQEdASjBOYEBAUFBfEE0QSXBFQEGgS+A1ADAQN/AtoBeQHwAEoA6v96/9v+gf4i/o/9Hv3L/Ef8t/ta+9z6OPrC+Wz56fiw+Mb42/gw+cH5Mfqn+kX7wPs//NH8ff0Z/rP+S//L/0oAzgA4AZ4BAQIkAjICJAIAAtwBtAF6AWsBegGRAccBHQJ7AuECNgNdA4gDtAPYA/AD8gPtA/ID3gPBA64DqwO3A7EDfQNRAzQDDwPqAtACxgK2ApQCXwIeAt0BoQFQAe8AegD4/2H/sf7//VH9lvzZ+x77Rfp9+dv4RPjO95D3dfeL98/3Kfix+F75Hvrl+q77ffxp/Un+I//6/7AARwHHASoCeAK2AtsC3QK9AosCQwLzAbQBjAFxAWkBdgGPAbwBBQJUAqQCDQNnA7oDHwRxBLcEEgVdBY0FsAWrBZoFhAVZBQgFoARCBNgDTAPGAkQCvwFdAeoAbwANAJP/Ev+m/jv+6P2i/TH9wfxN/LP7GvuC+uf5c/kM+Z/4PvgT+DH4cvi3+BD5gPkX+rH6Jfuh+0r8Av2k/UD+5f6i/1wA6QBOAbkBFAI3AkECPAI0AjYCJQIEAg0CPAJnAo0CxAIGA0sDjAO3A+0DNARlBH8EogTGBOEE+QQIBRMFHQUKBdkErgSLBFcEDQTWA6ADVAMFA68CTAL/AaMBFgGOABYAfP/d/kb+lP3v/Fn8lPu9+gH6TPmk+Bj4k/ch9+X22/bo9h/3kvch+Mn4kPlZ+iD7/vva/Kb9fP5U/xMAyABvAekBSAKYArcCtQK3AqIChQJqAkgCHwIOAhECJAJDAnYCtwL7Ak0DlgPaAzIEiATMBBUFYwW0BfkFIAZCBlEGMwb1BZgFMwXWBGAE1ANgA/ACcwLkAUMBwABHALj/N/+8/kb+4f1d/dX8avzr+1P7w/o4+rj5Pfm3+D344feV9233a/eO99b3J/iM+Ar5mvk1+uT6mvtj/C799v3F/p7/bgAlAb4BNQKWAt0CEgMtAzgDMwMjAxADCAMPAykDUAN1A5cDugPqAx0EVQSPBM4EBQU2BWEFkwXFBfIFBgYEBu4FxwWOBU4FCgXEBHgEGASwAzwDvgIxApEB5gA3AIT/0/4g/nb91fw0/JH77vpM+rf5Lfmr+DD4w/dl9xj35PbT9vX2Pvei9xT4mfgy+eD5m/pZ+yL86vyx/W3+Lv/r/6gATgHTAToChQK8AtUC4QLgAtoCywK8AqwCqALAAugCGANPA44D1AMnBIAE3AQ+BaQF9wU3BmMGiganBrUGoQZ1BjkG6wWIBR0FsgQ/BMQDPwOuAhUCfQHhAEYAs/8m/5v+F/6Z/Rz9n/wb/I37+/pn+tX5SvnM+F74+/ei91T3HPf09uT26vYW92T31/dm+AX5t/lp+in78Pva/Mz91f7S/8gAowFXAu4CWAOvA9cD8gPwA/YD7wPuA+kD4gPiA+ID9wMPBEkEgQTEBPoENQVoBaIF3wUUBkkGcAaKBo0GgQZXBhgGxQVfBe8EfQQMBJcDGwOTAvcBUAGbAOj/Mv+K/uj9Uf3B/DH8p/sj+6v6Lvq++U356/iM+D/49/fD95n3dvdf9073Xfd898P3GviP+BP5pvk9+t/6nftt/FP9Nv4f////0ACKASICrAIYA3kDvAP+AzkEdwSsBNAE9QQJBSsFPAVhBX8FqwXTBfcFGgYzBloGbwaOBpEGlgaIBnMGSAYDBrQFUAXxBHsEDQSXAykDrwIoApgBAAFqAMv/L/+V/gf+dv3m/FL8w/s7+7P6Lvqo+S/5v/hX+PD3lfdN9xj36vbC9qf2qvbL9gX3VffI92b4IPnp+bb6lPuA/Hn9cP5h/1IAOgENArsCSQO/AyMEeAS1BOkEEAU3BUwFWAVbBVsFZwVwBYkFnwXIBesFEwYzBlUGeAaWBrUGxAbOBrwGnAZiBh0GwgVWBdkEUQTBAyMDggLeAT8BnADx/0H/kf7m/T39o/wZ/KH7NfvP+mv6DPq1+V/5DfnA+ID4R/gR+Nj3pfd69033Iff99vz2IPdt99r3bPgc+dn5nPpi+0D8Nv08/kT/SQBFATACBAO+A2QE9wR7BecFPAZ+BrQG1gbgBtcGvwalBocGawZVBkwGSwZIBkcGRwZRBloGXQZRBjkGGQblBaUFVAX7BJQEIgSiAxoDiwL+AW8B3QBIAKj/AP9T/qz9Cv1v/N77VfvX+l365vl3+Rn5xPh6+Dn4B/je97T3jPdr9073Nfcf9xf3Nvd/9+f3afgE+bX5dPoz+/L7xvyv/ab+m/+NAIABcAJNAxMExARnBfMFYAawBvAGIgc+BzwHJwcLB/MG3AbDBrEGqgaqBqUGlgaCBnYGawZVBjAG/wXFBX0FJgXBBFUE5wNuA+kCYALYAU8BwQAvAJn/Bf9u/tT9O/2q/Cb8qfs1+8f6YPoE+rD5X/kT+cf4gPg9+P33w/eQ92j3Svc29yz3KvdA93X3zvdI+N34iPlI+hv79fvh/OD98f4KAB8BKwIqAx0E+gS+BWgG9AZdB6cH0QfkB+MHzwelB20HLgfwBrkGjgZvBlkGSAY0BiEGDQb7BecFzQWmBXMFMQXjBIYEIASyAzwDvwI2AqwBGQGEAOr/TP+o/gX+Yv3C/Cv8m/sX+536LvrI+W75I/nk+LH4h/hi+EP4JPgJ+PL34ffW99D3zvfS9+L3BvhI+Kf4Jvm8+Wf6Ifvr+8f8s/2y/r//1gDuAf0CAgT0BMoFggYcB5kH+Ac7CGYIdQhoCEAIAgi6B24HJQfjBqsGdwZJBh8G8wXKBaIFdwVIBRMF1wSTBEgE8gOUAy4DwQJLAs8BTQHKAEUAvf80/6j+Gv6M/f38a/zZ+0n7wvpE+tD5a/kT+cz4kPhd+C74Cvjx9+T33ffe9+X38/cL+CH4RPhv+Lj4FvmT+SP6yPqD+0n8H/34/eL+0f/SANABzgLDA6sEhAVCBuoGbwfdByYIVAhkCF8IRwgcCOQHnAdUBwsHygaJBk0GEwbcBa8FfAVJBRIF2ASWBEYE7QOKAyoDxQJhAvMBfwECAYAA/P92/+7+Zv7i/Vv90/xJ/MT7R/vQ+mX6APql+VX5EvnY+Kn4hfhn+FT4SPhE+D/4PPg++Ez4Z/iK+LH43/go+Y/5Gfq6+mr7Kfz1/M39rP6S/4EAggGHAoIDaQQvBdsFhgY2B9MHSQiGCI4IhQh+CHMITwgVCMUHbwcUB8cGfgY7BvkFsgVvBTEF/AS2BGkECgSiAzMD0QJuAg8CrwFBAb4AKgCZ/xP/qf5I/tr9TP2x/Bj8m/sz+9r6cvoK+qz5XPkX+eH4vPin+KL4mfiV+I34lfih+L/44vgF+SH5M/lL+Yb5//mW+iX7oPsy/Pr86P3N/pX/YQBQAVUCSQMkBPgEzQWUBjkHvgcwCIcIrwivCJ4IhwhTCPIHfAcPB7MGTwbiBXkFKgXlBKAEWQQbBNsDlANXAywD/QKqAkIC6AGmAV4B9gB6AAkAo/8w/6j+If6o/TL9sPwc/I37Efum+jb6w/lk+Rz57Pi/+Jz4jfiU+KX4rPi9+N74C/kw+U75gfnL+QP6Ifpl+gT7sPsM/FX8DP0m/gT/kf8lAC8BSQIeA8oDqwS7BX0GBAd2BxsIoAjcCL4IjwiHCHEIHAiGBxUHswZFBrcFNQXQBIUEQwTzA6kDewNcAyID1AKNAmwCSwL6AXUBEgHjAKAAFABx///+qf42/ov99/yZ/Dn8ofsF+7X6gPos+r35bPlF+TT5L/kd+Q75Dvkz+V75e/mZ+bn54Pny+Rb6Svp/+pX6vfpF++b7TfyW/E/9Xf4w/9f/lACRAXECRwMlBAwF+AWwBloHxQc/CKgI8QjhCJYIcgg9CNkHMQeyBjoGqgUgBbIEQwTAA4EDYgMmA9wCswKZAmACNQIaAuIBiQEtAfcAtABSAM//WP/z/nf+9f1l/eL8W/zn+2b72Ppv+ij63fl2+UL5Ofk3+Sn5H/lC+X35vfnk+Qn6OPph+pj6xfrr+hb7OftC+2X7Hvzc/Aj9DP3T/Sn/9P9KANoA6AHpAqUDYAQmBfkFuwZjB9IHMginCOoItwhTCDEIGgiUB8EGGgarBTAFnQQcBKcDRwMFA9sClAJLAjUCIwLYAX0BegGJAToBnQBIAEsAJgCM/+L+gv5H/tz9Of2c/Bv8uPtB+8P6TPoE+s/5h/lF+T35aflz+V75bfnC+RT6Q/pL+mX6o/r3+jf7Tfti+4D7t/sp/Pb8Z/1Y/ar95f4aAIgAygCYAb4CkwNMBBEFzwV4Bi4HvwcRCFkIlwiOCDEI5QfLB2oHggbABWcF5QQkBJkDNwPGAmQCOwIpAvwB0QHIAZ0BYAFtAYUBPAG5AHMAaABFAMv/L/+9/mT++v1w/eD8T/zJ+1L76/qO+kj6DPq++ZP5tPnb+cH5u/nv+Tb6bfqo+u/6PPt6+5L7sfsD/Fj8b/xd/Fb8ivwu/fX9F/7f/XT+4P/VAAUBZAFoAo8DVQTdBJEFiQZLB6EH4AddCMQIpAgjCKgHcwdDB5MGaQWjBGUE9wMwA5ECKALPAakBkgFNAREBGwEjAQEB5gDqANYAggApAAgA//+g/+/+b/42/tv9UP2//Cn8ovtV+w/7lfog+vj57fnD+Zn5sPnr+RD6Hvph+uD6SPtz+6X7/vtA/GP8jvy5/M/82fzV/Pv8pP1g/lT+Hf77/nsALwEqAZQBswLVA4cEBAWbBWAGEgeDB8EHDAhKCCkIqQcsBxIH1wbfBbYEPwT/A0cDiQIlAroBSwEtASUB/wDhANEArACgAL8AygCOADIA8P/v/+j/b//M/nv+Q/7J/U/93/xS/NH7fvsx+9D6cfo3+h76DfoH+hT6Pvpp+o76z/pI+6X7xPv0+zb8bvyr/OH86/wB/S79Lf07/eT9wP7N/nz+Hf+dAJcBpgHHAbsCAwTPBCgFmQVcBhwHjAfBB+wHAQjiB4cHCwexBlkGfQVcBL8DdwPjAkYC0QEyAcMA5wAPAcMAewBsAHQAmwC1AH4ALwANAO7/y/+V/yH/kP45/v39nP0v/az8FPzB+6b7TfvD+oP6fvpl+kz6Vfpx+pn63voM+0b7rvsH/DT8bPyw/M388Pwc/T39Xv1l/S79W/1l/gn/ff5c/rP/DwFnAXYBAAIbAy4EsgQZBc4FjwYDB2AHyAcRCAkItgdDB/8G4wYuBuMEAwTCA1MDjQLhAUwB0AClALwAqABMACMAOwBWAG0AdABRAB8ADQABAOr/qf8z/83+jf5V/uv9Xf3Q/GX8FfzH+277APuu+pT6kvqO+pf6rvrQ+g/7X/um+wL8afyu/Pf8Wf2i/cb97/0g/kL+UP5P/i3+Jf7F/nb/Ov/b/q3/CAGXAY0B4gH9AiIEmwTlBKcFkgYSB1IHogf8BwQIsgczB7sGfwb2BcUEsANHA+ACHwJkAcYARwD+/+3/0P+D/0n/VP9f/1z/gP+F/1j/Qv9F/17/Vf/7/p7+e/5X/h7+zP08/bv8lPx9/An8h/tb+zz7Bfv/+in7K/sn+2370/s2/JX8y/wa/bv9PP5w/o/+tP4U/1X/Rv9e/3L/Sf8X/xn/w/9dAOn/oP95AJkBHgJBApoCiQOeBD8FiQUQBuYGWQdjB4sHsAd8BwYHRwapBWEFuAR8A2wC+wGTAd4ALwCy/1j/Kf/3/qv+of68/rH+lf6m/tP+6/7v/tn+xf7F/sD+Y/78/eH9tf1Q/QP9sfxK/Cz8J/zp+5/7hPuS+7P7yvvZ+wD8YvzH/PL8NP21/f/9Av4k/lv+kf66/pT+Z/6X/t7+Rf+d/1//YP9RADUBbAGTAS0CGwPLAysEwgSBBQ8GZwa0BgAHLAcUB8cGUgbTBWUFwwTQA+4CSwKrAQMBiAARAIn/Qf87/yb/JP9E/yb/Dv9U/6P/qf+J/3b/hv+c/3D/If/c/rD+bP4J/q39Vv33/K78Xfz9++T71/t9+0/7lPvG+8v77/sx/Hj80vw0/X79z/0o/m3+uv4V/z//UP+B/6L/sv/A/67/mv+b/5T/0/9GADcA+P9tAGABBAJcArICRQMfBPUEigXdBS8GgQa4BsEGowZdBuUFOAWBBAIEaANwAnQByQBJAMn/Qf+r/j/+H/4l/hL+DP4v/jj+N/5u/sz+AP/u/rv+sP7V/s7+g/4z/vz90v2e/UH96vzX/Mj8fvw9/D78TvxG/Fj8evyN/Nr8Rv18/cT9Of6Y/vP+Uv+V/9H/DgAkABkAGAAiAAIAzP+W/03/a//a/6z/TP++/3kA3QBVAfEBgwIyA/EDjAQlBccFJQZBBmMGfgZeBg0GgAW3BBkEjwOnAqUB5AAjAG//AP+I/v/9r/2C/WD9c/2V/ZT9n/3b/Tb+ev6g/sD+3f72/gf/9f7I/qL+a/47/hv+4/2a/XL9UP0V/ev85Pzi/NL8yfzj/C39g/3A/ev9S/7V/jX/Wf+U//n/RgBmAIUAxwAQASsBEAEJAS0BLAHyAKcAZwBZAFMAHAANAF8ApgDAAAABewEDAnICugLwAk4DqgPTA9MD7AMABOIDowNoAzMD2QJPArUBTAHmAFgAvv87/9z+kf47/t39nP2G/XX9Sv0q/UL9Z/11/Xz9lv3A/fD9Ff4l/jD+OP5L/lv+YP5d/nD+g/5u/lz+i/7O/tL+pv6R/s/+M/9e/0f/Sv+S/+H/+P/c/9T/6f/q/9X/v/+Z/2z/dP+p/7z/wv/t/ykAfAACAX8B6gF2Au4CMwOeA0UEsASvBHsEbAR9BG4EAARQA7cCSAK/AQsBawDh/1r/0P5S/hP+Df7t/ZT9ff3b/TP+Pv5D/mz+o/7o/hv/IP8W/xD/9P7h/vz+B//R/ob+a/6K/p7+cP4y/jX+UP5F/iT+Dv4Y/kb+eP6P/qb+1v4W/1r/i/+t/97/DwAXACEAVgB9AHAAaQCKAK4AwADBAMcA6AAMASIBMAFTAYUBswHUAe4BFgJDAmMCZAJfAmMCWwI1Av4BxAF+ATUB8wCqAF0AIgDq/6z/hv9z/13/Rv81/xj/Cf8f/yz/H/8Y/yH/K/9D/0X/Hv8D/wT/6v68/qP+g/5e/lH+S/4//kX+Tv5L/mD+ev6J/qP+zv7v/hb/Uf+E/7b/8P8hAEcAcACMAJ0AsQC/ALIAowCeAJIAgwBpAEUALgAsABkAAQAPAC0ASgB1ALoAAgFOAaMB+QFQApcCzQLvAg8DIwMaAwMD5wK9AmkCCgK1AWoBBwGYAB8Atf9u/yz/4v6Y/mX+PP4q/h/+Bf70/ff9BP4U/i/+Of4//lr+iP6y/sv+3v7v/g7/KP87/0T/R/9E/0D/PP85/zn/Lf8j/yf/MP83/z7/VP9r/4T/oP/B/9j/6/8EACkASABRAFEAZACFAJkAnwCoALUAwgDKANYA7gACAQgBDAEkAUwBZwFwAYABnwG3Ab0BugG6AbcBoQGAAWQBTAEmAekAsACUAIEAVgAaAO7/4//i/8//r/+c/5//n/+U/4f/ff9t/1f/Rv8//zn/JP8D//T+AP8N/wT/8P7w/gb/G/8c/xT/G/8v/0H/P/86/0n/a/+F/4j/iP+o/9n/+//+/wgAKgBMAFcAVQBaAGcAbABjAFgAXABoAGkAXwBlAIcAqAC7AMwA7wAfAUkBYgF7AZUBpwGyAbcBuAGpAZABdgFbAToBEwHjALQAhwBdAC8AAgDZ/7X/j/9n/0T/Lf8X//3+5P7T/sv+xv7A/rf+sv6y/rv+wf7A/rz+xf7Y/ub+6f7t/v7+Fv8l/y//Pf9T/2v/fP+O/6n/yP/i//f/EAAuAEcAVwBlAHYAhwCQAJYAmwChAKcAqACjAJwAlgCQAIkAgQB3AHIAcwB7AIgAlQCnAMAA3gD9ABkBMAFEAVMBXgFhAVwBUwFBASsBEQH0ANgAuACVAHQAVwA5ABkA+//h/8n/q/+M/3D/Wf8//yL/CP/3/uj+2v7R/s7+0v7X/t7+6v75/gn/F/8n/z//Vf9k/3T/jf+r/8P/0v/l/wAAHQArADQARQBdAHAAdQCAAJcArwC4AL4AzwDlAO8A5gDdANwA3ADJAKcAiQB5AGoASQArABsAFgARAAYABAAOABsAJQArADYAQQBJAEwATABMAEkAQQA1ACsAHgANAP3/7f/e/8r/sv+f/5H/fP9j/03/Pf8t/xr/Av/x/un+4v7b/tb+1f7c/uX+8f4C/xT/Kf88/1L/bf+K/6b/u//R/+v/BgAfADMARQBXAG0AgwCYAKwAvADMANwA6wD5AAABAwEAAfwA+gDzAOYA1wDMAMMAvAC1ALAArACrAKsArACrAKoApgCeAJgAjwCCAHAAXwBOADsAJQAMAPb/5P/R/7z/qf+c/5H/g/92/27/bv9r/2b/Yv9j/2f/ZP9e/1f/U/9M/0L/Nf8o/x7/F/8S/w//Dv8S/x3/K/86/0r/XP9w/4f/nv+w/8P/1v/t/wEAEQAgADQASABZAGYAdQCEAJUAoQCqALIAuQC9AL4AuwC2AK0AogCVAIYAdABlAFgASgA7ADEALQArACcAIwAjACcAKQAmACEAHgAcABQACAD7/+//3v/L/7n/qP+X/4T/cv9l/1v/Uf9I/0T/Q/9A/z//RP9G/0f/R/9K/1P/Wv9d/2H/bP97/4b/j/+a/67/wP/P/9z/7/8FABYAJAAxAEQAVwBkAGwAdwCIAJUAoACoALUAxgDUAN0A5QDyAP4AAgECAQIBAAH6AOsA2gDLALkAoACBAGcAUQA8ACAACAD7//L/5P/X/9H/z//N/8X/vv+9/7v/sv+p/6H/nP+U/4v/gP96/3T/a/9i/1z/V/9R/0n/Q/89/zn/NP8v/yz/KP8n/yb/J/8s/zH/Ov9G/1T/Y/91/4r/oP+2/83/4//7/xMAKAA9AFIAZQB0AIIAkQCdAKcAsAC6AMMAygDRANYA3ADgAN8A4ADeANoAzwDEALoArACaAIYAdQBmAFIAPQAsAB8AEAD9/+7/4//a/9D/w/+8/7j/tP+s/6b/pf+l/6D/nP+a/5z/nP+Z/5j/nv+l/6f/p/+u/7X/t/+1/7T/tv+1/63/pv+i/57/mP+R/4//kv+X/5v/o/+x/8H/0v/g//T/CQAfADEAQQBWAGkAeACEAJEAngCmAK0AsgC2ALgAuQC4ALYAswCwAKoAogCZAI4AgQByAGIAUwBDADIAIgATAAUA+P/r/97/0//H/73/s/+r/6P/nf+Y/5H/jf+I/4H/ev90/2//af9k/2D/Xv9e/17/Yv9l/2v/cv96/4P/jP+W/5//qf+w/7r/wf/H/83/0//Z/97/4//q//H/9v/7/wMADAAVAB0AKAAzAD8ASwBXAGEAbAB3AIEAiQCTAJsAoQCnAK4AtQC5AL0AwgDDAMMAwgC9ALgArwClAJcAiQB4AGQAUQA9ACcAFAABAO//3//Q/8X/uv+y/6v/pf+h/57/nf+c/5z/m/+c/5v/m/+c/5z/m/+b/5//oP+g/6T/qP+r/6//sP+0/7b/t/+6/73/wv/E/8j/zv/U/9r/4f/q//L/+v8FABEAHQApADQAPgBIAFAAVwBeAGMAZwBpAGsAbgBvAGwAaABlAGIAYABdAF4AXQBbAFcAUgBOAEwASgBHAEMAOwAxACoAJAAcABMABgD8//L/5f/h/9n/zf/F/7//vv++/7j/uv/A/8P/zv/X/+X/5f/m/+z/+f/r/xUAtQCVAOT/yv+Z/1v/X/9b/5j/lP93/1z/Uf+6//P/DwBFAE8AKQD9/wcAGQAYAEsAPAARACEADgAHAB4AawD8AGUBMAG6AKgAmQBbAEwANwACAN7/m/8n/8z+q/7L/vn+D/9L/3//0/8xAAkAJAC5AO8A9QAIAf8ABAE1AV0BLQHqALYAYQD5/47/Sf8n/wj/Df8Q//f+/P4m/0b/bf+r/8D/3P8OAEYAZABmAMcAGQHtAIoAUwA+APn/4//V/47/bv9D/xn/AP/1/jH/gP/g/y0ALwAuAEUAVgB3AKwAvACuAJwAbQA3AAAA6v/j/7r/p/+L/2z/gv+Y/8f/6P8HAFkAbACAAKYAngC8AMoA1gDjALcAxAC0AIEAdgArAPf/6v+8/5n/av9i/2r/Of9M/4P/ff+L/7r/7P8IABIAHgATACIAGQDc/8X/x//J/7D/iv9t/0n/U/9p/3f/kv+n/6z/mv+Z/5z/p//g/wEADQAZAA0AAADa/67/lv+J/6D/1v/a/5//X/9M/0f/Bf8F/2b/tP8CAB0AIAAnADsAtQAaAZYBUwK1Au4C4gKHAikCxQGqAYkBKwHcAIEAEACG/zT/Pf9p/7f/8f8LABkAJAA8AEkAVgBsAGMARAAZALb/SP8p/zP/Lv8z/zT/Hv8V/wX/9v7h/sr+0/6z/pz+e/46/hz+7/3W/aj9Xv0J/WT8qvtT+6n8OP8bAtMEAgZqBmYF1wMyA0kCmgL9ArIC1QH+/pn8Afur+jD84/1OAPcBXwL2AYwA1P+Q/+v/0QB3AecBggHsAA8A+/6W/pj+Pf/1/1AAoACmALYA3wAXAYQB9AFEAj0CuAH9AE4A8f/6/w8AHwAzAAEAi/8c/8j+rv7X/iv/bf9e/zP/E/8i/0v/ZP+T/57/k/+H/1//Mf/8/gH/JP8j///+1P7N/t3+/v4q/13/i/+R/33/VP+B/+D/KAC8AAgBEQEJAY4APQD4/9b/MABTAIQAlwBlAHcAdgCfAPUASwGzAaMBWgEVAaIAWAAxABsAMgAoABIAAwDL/8P/4v8bAG8AkwCeAJEAgABSAAUAxf93/2H/W/9d/2H/KP8l/zP/W/+S/87/JQArAEsAagArAAYA8v/t/+D/pv+h/7r/uP/e/+v/xP94/w//wf5T/v39zf2I/XD9Rv06/W39Cf6I//IAKAIfA3EDaAOrAhkCwgFHAVABKgHtAIUAyv+P/4r/z/91ABoBvwEiAgACewHnAH8AUgA/AFYAgACAAFgA7/91/wH/u/73/kj/lP/3/yIAUQB7AI0AuQC1AJoAawDq/3n/Jv/i/tX+tv6o/p/+mf7J/tr+If++/z8AfgBEAOb/ev+9/lL+R/5S/lr+Lf5i/pH+iP7V/kD/8P+GAKoAsgBFAI3/9v6H/nz+hv7i/uH/rgBZAc4B+gEXAsYBjgF0AUEBRQE1AUoBHQHUANgAzwAmAVsBrwEkAgUC8gGjARMBhwABAOD/8/8LAFsAwADVALMAVgDQ/1P/0v7G/hn/fP/7/2AAjwBnACQA0/+P/4P/Uf8n//f+wP66/r7+1f4d/13/o//J/73/tP95/17/OP/+/vX+n/55/pL+qv74/if/gP+2/6v/kv87//7+uv5v/mX+f/6k/oz/4gDHAZQC1wK9AnkCngEUAekArQCDAGUAbAA9AAAAKQCNAB0BggHRAS0CAQJyAbYA8P9t/zP/Zv8JANQAbgGkAYIBBAFuAP3/lf9m/0z/E/8E//b+9P4e/2X/3P9RAIwAaAAUAK3/P/8D/8v+z/7k/tP+z/65/tD+Hv+K/9z/FgAAAKL/cv8r/yP/Uv9i/3r/dP9i/0f/Mv8//5L/9v8/AHEAngDCAKQAsgDPAN8A/QDqAMIAWADi/6D/dP9e/z//RP9s/3//pf8BAHUA/QB8AQMCdgJ4AkIC8QGAARUBrQByAF4AVQBZAHAAlACuANIA9AALARUB8wDbAIYAEACw/0r/Kf8X/yH/Qf9C/zb/B//7/uz+zf6+/pf+jP6m/gX/Xf+q/+3/4//j/5H/Ef+1/j7+AP7s/fb9Bf7e/dH9uP2e/Zr9y/1u/40BnQO8BUYGDwZ1BOcBfQBB/y//1P9TABcB5QBuAOX/ev/M/zsAMwEgAmwCLgIJAa3/bf6D/Vj99P1n/wEBKwK/AloCWwENANv+gf6v/lD/EwDDACYBywA1AJX/Uv9Y/3n/9P+JAOAAvQCOAEgA3/9k/wv/Lv8h/yr/K//8/ub+Zf5G/mb+bf7w/mT/0/8OAPP/5v+G/xL/0/7S/hL/M/97/8v/4v8QACoAbACsAKMAsgDMALMAoQChAJwAxQDOAAABNQEwAUYBJwEHAdUAmgCbAHwAeQB1AF8AXQA6AFgAgACQANoA+wD7ALYAUQAeALz/gf+B/5b/tP+C/4P/kv9r/2z/gv/P/+7/0f+2/3P/Hf/B/qr+1f4O/2b/tP/y//f/x/+t/4n/av9f/3T/zP8bAEoAWwBXAEcAHwD7//H/AADq/8X/k/9D/x3/CP8e/2X/s/8YAG8A0gAyAV8BPAHtAMcAfAAkAOv/9v8/AEAATQB2AJgAxADbACQBTAEwAe0AkgBbAAYA2f/e//L/KwArAEsAXgBSAEEA2P+R/1P/CP/v/tr+Cf9X/43/8/9MAI4AnABbAAwAhf/7/o/+Vv5T/oH+7f5c/8X/BwAiADIAKQAkACUARABEAP//o/82/+v+sf6y/hz/d/+n/6D/k/+P/2P/bP/L/1YAwgAGATcBDwGjAAQAhv9b/1//2f+GAC0BsQHXAdgBngGKAbEBywH6Af8B7gGVAecAdgAmAP///v8nAKIA1wDFAJQARADS/0T/9v7z/iT/W/+r/wQAJgAYAL3/W//3/pr+hP6a/vD+Rf9c/1z/L//1/rr+p/7U/gv/N/9C/yH/8/6x/l/+E/7T/bL9gv1y/VP+kP/NAB4C/AJ4A+oC4gFTAYMA9//J//3/cQBKACwAAACp/4//lP9GACMBrgEYAgICkAGsAJ//Hv8A/33/UAAnAeQB0QFDAXcAn/9E/0n/1P+yAGsB7gHwAWsBpwDL/zT/8/4H/5r/IQB+ALcAnQBYAMr/XP9F/z7/OP8y/zD/7/6a/nf+hP6v/uj+P/+N/6n/ff9c/0X/HP8Y/zD/V/99/6D/wf/t//7/BQARAPf/8//9//b/9/8IADgAYwCFAKwAxgCpAGMAJgAGAAwAMACEAPYASAFwAVsBMwHxAJcAWwBLAE4AOgA5ACMAAADX/6n/uv++/+j/OABlAIIAYQA5AAMAsf+J/5P/rv+7/77/tv+X/03/Gf8Y/zL/hf/u/2IAtwClAGYA+f9s/xv/+P4Q/1v/m//P/97/vP+X/4f/pv/6/0kAigCkAIAALACs/1f/Of9B/4H/0P82AHEAawBJABYA6//X/+3/MQCIALwAyACwAIsAYwBNAG8AqgDxABcBDAHiAJcAPQDr/8j/2f8cAHwA1QAWARYB1QBkAOn/ev8i//P+7v4D/yD/Pf9o/6f/5P8fAEUAVgA6AOr/l/9D/wr/7v7z/if/V/9u/3j/g/+f/7P/zP8DABsAEQDd/5//b/84/yD/Ov9p/6L/0f/3/x8AMQBFAGcAiQCzAMkAxwCzAH4ARQAPAPP///8bAEgAbACLAJwAoACrALgAxgDIAMEAsgCSAHoAXQBFADUAHgAcAAwABAAcADAAWQB4AJcAsQCYAHkAUgAsAAkA1/+9/5n/XP8j/+z+5P72/iX/gv/Y/yIASgBAABcAtv9C/9b+ff5l/on+5v5i/9D/GwA2ACIA/v/c/8P/uf+x/6r/pP+c/6b/wf/p/x8AVQCWANMA/QAXAQUB0gCDACkA9v/m/xEAZQCyAOoA6ACzAGAABQDR/9X/BABPAJQAvAC7AI4AUwAaAPf/6v/1/xcAOABSAFMANgAKAM7/pf+i/7T/7f8aADwAPAAEANP/iP9X/z3/Kf85/zn/O/88/zH/SP9j/5b/3P8JADEAHgDp/6f/Vf8z/yn/Uf+e/+L/LwBOAFIAPwARAPT/3f/k/wsANgBrAIYAkQCSAIEAfwB4AHoAcwBMACkA/v/n//P/HgB4ANMAHAFJAUABFAG0AEoA+v+4/6T/rf/W/wIABgAFAPv/4//X/93//f8dAB8AHgAVAO//z/+7/77/yP/I/9H/yf+l/3n/T/9F/0f/W/+L/73/3f/f/9P/xf+s/5j/pP/E/+b/+f8CAPj/0P+e/4b/i/+i/8H/8f8XACEAJgAyAEYAWABuAIwAkQCBAG4AUwA9ACsAKwBDAE4AWQB0AIUAjgB8AG8AWwAxABoAHQA3AFUAZgB0AHUA/f/7//7/////////AAD+/wMA/P8EAP3//P8DAPf/BAD5/wAAAAAAAAEAAAACAP7/BQD8/wUA//8CAAUA+f8GAP////8FAPv/AgAAAAAAAQD//wIAAgD//wMAAQACAAAABAD+/wUA//8DAP7/BQD7/wgA+/8JAPz/BAADAPz/CAD6/wUA/v8BAP7/BAD7/wUA/v/+/wQA9/8IAPf/BAD5/wAA+//+/wAA/P8CAPv/AQAAAPr/BgD4/wUA+/8CAAAAAQD//wIA/v8FAPv/CwD3/wkA9/8IAPf/CgD3/wcA9/8JAPT/DQDx/wcA/P/9/wEA/v/9/wYA+/8IAPf/CAD6/wcA+/8CAP//AAAAAP////8BAP3/AwD8/wEA/f/+/wAA+f8AAPn/AQD7//7//v/+/wEA/////wAAAQACAP7/AwD//wMA/v8CAP//AgAAAAIA/f8DAP////8IAPj/CgD9/wMAAwAAAAQAAwD+/wMAAAD+/wIAAAABAAMA//8EAAIABAADAAQABAADAAMA//8CAP////8AAP//AQD9/wAAAAD+/wQA/f8DAP////8CAPz/AgD+//7/AQD+////AAD+/wIA+P8EAPz/AQABAPz/BwD4/wgA+f8DAP///v/////////9/wMA+v8GAPb/CQD7/wAAAgD9/wMA//8BAP7/BQD5/wgA/P8DAPz/BgD7/wcA+/8FAP3/AQD9/wUA+/8GAPr/AwD//wQA/f8FAPr/AwD+/wIAAQACAP//AAADAPv/CAD9/wMA/f8FAP3/CAD7/wQA/v8BAAQA+/8GAPv/BAD+/wUA/P8HAP3/AwAEAP3/CAD+/wAABAD8/wIA/////wAAAQD+/wMA/v8BAAEA/v8FAPv/BAD7/wQA+/8FAPz/AwAAAP7/BAD/////AgD+/wAAAwD9/wUA+/8DAPz/AwD+/wEAAQD+/wMAAQAAAAQA/v8CAAAA//8DAP3/AQD8/wIA/f/+/wAA+/8DAP//AQABAAAAAgABAAEAAwAAAAIAAQD//wUA/P8EAP3/AgD9/wUA/f8DAP7/AwD7/wkA+P8GAPr/AQAAAP7///8AAPz/AwD8/wYA+f8DAPr/BAD5/wQA+f8CAP3/AQD7/wMA+f8AAAEA+/8EAP3/AgAAAAAAAAACAP3/BwD4/woA9/8JAPz/AAAFAPz/BQD9/wMAAAD//wMA/f8HAPb/CwDz/wkA/f/9/wYA+/8AAAQA+v8FAAAA/P8GAPr/AgABAP3/CAD4/wgA+/8EAAQA/v8GAP7/AwACAAMA/v8HAPv/BwD9/wQAAAABAAMA/P8CAAEA/P8EAPz///////7/AQD9/wAA/f8CAP3/AQD///z/AQD8/wEA/v8BAPz/AwD9//////8AAP//BAD7/wcA+/8GAPv/BAD7/wMA/P8AAAAA/P8CAPz////+//z/+/////n/BQD4/wIA/f/+/wEA/P8CAP7////9/wQA+/8AAAEA/P8DAP3/AwD+/wMA/P8FAPz/BAD/////AgD+/wIA/v8BAP//AAACAAAAAAACAAAA//8EAP7/AQD+/wIA/f8CAPr/BgD5/wkA9/8IAPv/AwD+/wMA/f8DAP//AQAAAAMAAgACAAIAAQADAAEAAgAAAAEABAD7/wYA/v8BAAYA+/8DAP///f8HAPj/CAD9/wEABQD7/wUA+P8IAPT/BwD6////AQD9/wAAAAD9/wUA/P8EAP7/AwAAAAAAAgD7/wYA+P8FAPn/AwD8/wAA/v/+/wEA/P8AAP3///8AAPz//f/7/////f/+//7/AAD7/wMA/f/9/wQA+v8FAPn/BgD4/wYA+f8EAP7/AQD+/wEAAAD9/wQA/f8BAP//AgD+/wIAAQD//wIAAwABAP7/BQD6/wMA/v8AAP7/BAD5/wYA+/8DAP//AAABAAMAAQAAAAQA/v8GAP3/BgD//wIAAQABAAIAAwD//wEAAQD+/wAAAgD9////AQD+/wIAAgD8/wYA+P8HAPn/AwD+//7/AAD7/wIA//8AAP///v/+/wEA//8BAP//AQAAAP//AgD//wMA/////wQA+/8EAP7/AgAAAAQA/P8FAPv/AwAAAPz/AgD7/wMA///9/wQA+v8EAP3/AAAAAP/////+//7/+/8DAPf/BAD2/wUA+f8DAP//+/8HAPr/BAD+//v/BwD5/wcA+/8EAAAABQD8/wgA+v8KAPr/BgD7/wQA+P8DAP3//P8EAPr/BAD9/wAA/v8AAP7/AwD7/wIA/P///wAAAgD8/wQAAAD9/wQA//8DAAAAAQADAP//BAACAP7/BQAAAP//BQD8/wgA+v8GAP7/AgACAPz/AgD+/wEA/v/+//7//f8CAPv/AQD9/wMA/P8DAP3/AQABAAAAAgD//wAAAgD8/wAA///+/wAA/v8AAAIA/f8EAPz/BgD9/wIA/P8DAP//AQACAPv/BgD5/wUA+/8CAPr/AQD+//3/BAD6/wUA+v8CAP3/AgAAAP7/AwD8/wYA/f8FAP3/AwD//wAABAD7/wcA/f8BAAAA/v8DAP7/AgD+/wEA////////AAAAAP7/BAD5/wQA/P8AAAIA/f8EAPv/BAD9/wMAAAADAAMA//8DAPz/BAD+/wAABAD8/wUAAQABAAMAAAD8/woA+v8FAP//AQABAP7/AgD9/wQA9/8JAPf/BgD5/wMA//8BAP////8FAP7/AgD+/wMA/v8DAPz/BQD6/wgA+P8JAPj/AwD+//7/AwD7/wMA/v///wIA/P8CAAEA/f8FAP3/AQACAP3/AwD8/wIA/P8DAPn/BQD5/wUA/P8CAP7///8DAP3/AwABAP//BQD7/wcA/f8CAAQA/f8EAP3/AgD///3/BAD8/wIA/P8FAP3/AwD+/wQA/P8JAPj/CAD8/wMAAAD+/wIA/v8FAPr/CAD5/wcA/f8BAAIA/P8BAPv/AgD9/wEAAAD8/wYA/P8DAAEA/P8EAAAA//8DAP3/AgABAAAABAD+/wQA//8BAAAAAQAAAP7/AgD7/wQA/////wIA/////wUA+/8HAPj/BQD7/wQA/P8DAPv/BgD5/wMA/f8AAAIA/P8AAAIA/v8CAAMA/f8FAP7/AwACAP//BAD9/wMA/v8BAAIA+v8GAPn/AwD9/wIA/f8CAPz/AgD//wAAAgD7/wcA+/8GAPz/BAD+/wMAAAABAAEA/v8EAPr/BwD5/wYA/v/9/wYA+v8EAP7/AAACAPz/BAD6/wYA+f8DAP7//v8DAPn/AwD9/wIA+f8DAPv/AQD+////+/8BAPz/AQABAPz/AwD9/wAA//8DAP3/BAD+/wMA//8BAAMA/f8HAPn/BwD7/wYA/P8EAPz/AwD9/wQA/P8FAPv/BgD6/wQA+v8FAP3/AwD9//3/AQD+/wQA+v8EAPr/BgD6/wMA//8AAAEA//8CAP//BAD9/wEABQD4/woA+P8HAPv///8BAP3/BQD7/wIA/f8CAP//AgD7/wUA+v8EAP7/AAACAP3/BQD8/wQA/P8BAAEA/f8CAP///v8AAAIA+/8FAP7/AgAAAAAAAAABAAQA/P8HAPz/AgAEAPr/BwD5/wAAAQD9/wIAAAD+/wEA/f8DAP//BAD9/wMAAAD+/wUA+f8FAPr/AwD9/wEA/f8BAP////8AAPz/AAD+//7/AAD//wAA///+/wEAAAD+/wEA//8AAP7/AQD9/wMA+v8DAPz/AQD9/wEA/P8CAAAA/f8FAPr/BQAAAP7/CQD4/wgA/P8EAAAAAAABAAAA//8BAAEAAwD8/wIAAQD+/wgA+v8GAPr/BgD7/wkA+P8KAPj/BQD9/wAAAwD8/wUA/P8CAAAAAAD//wAA/v8BAAAA/v8CAP//AQACAAEA/P8HAPv/BgD9/wQA/////wMA+/8JAPf/CAD4/wMA//8AAAAAAwD8/wEAAQD9/wUA/f8BAP3/AwD8/wMA///9/wcA+f8HAPv/BwD+/wIA//8AAP7/BAD8/wIAAAD//wQA/v8EAP3/AQACAPv/BAD8/wIA/f8AAPz/AQD8/wIA/f//////AQD+/wAA/////wAA/v8DAPz/BAD9/wEAAQD8/wQA/f8CAPv/AwD8/wQA/f8AAAAA/v8EAP7/AgD9/wUA/P8CAAIAAAAFAPr/BgD7/wcA+v8GAPv/BAD+//7/AgD7/wUA+v8FAP3/BQD9/wAABAD+/wQAAgD9/wgA/P8FAAAA//8EAP7/BAD//wEA//8CAAAABQD8/wMA/v8EAP7/AwD9////BAD4/wgA9v8GAPn/BgD5/wMA/f///wAA/f/+/wAA//8AAP3/BQD5/wgA+v8DAAAA//8CAAAAAAACAAAA/////wEAAQACAP3/AgD/////AgD6/wUA/P8CAP7/AgD9/wUA+P8GAPj/BQD9/////v8AAP3/AAD9/wEA/////wMA+v8FAP3/AAACAP3/AAAEAPz/BgD7/wgA+v8IAPj/CAD6/wQA///+/wMA/v/+/wMA/P8EAPr/BQD7/wQA/v8AAAIA//8DAP7/AgD6/wgA+v8IAPn/BQD6/wYA+/8EAPz/BQD//wIABAD9/wQABAD8/wQAAQADAAIAAgACAAEABAAAAAIABAD9/wYA/v/+/wMA+/8EAP3/AgD9/wMA+v8DAP3/AgD9/wEA/v8AAP7/AAD+/wIA+f8EAPv/BAD8/wEA/P8GAPv/AwD8/wIA/v///wAA///8/wIA+P8GAPr/AgD7/wEA/v/+/wAA///9/wIA+v8AAP3///8AAP3/AQD+////AgD7/wkA9/8GAP7/AAAGAPf/CgD5/wgA/v///wYA/P8AAAIA+/8EAP7/AgAAAAEAAQADAAIAAwAAAAQAAQADAAEAAQADAP3/AgD9/wMA//8CAP//AAACAP7/BQD8/wIAAwD8/woA9/8GAP3/AAABAAIA+v8IAPj/BAD///7/AQD9/wMA/P8CAPz/BAD9/wIA/////wUA/f8EAP//AgD//wEAAgD//wEAAAABAP//AgD9/wMA/v8BAP///////wAA/v8FAPj/BwD4/wUA/P///wIA/P8BAP7//P8EAPj/BgD4/wQA+/8CAPv/BAD6/wQA/P8CAPj/BwD1/wkA+P8DAPr/BAD6/wQA/P///wEA+/8DAPv/AgD+/wEAAQD9/wMA/v8DAAAA//8EAAAAAQAEAP//AgACAAIA/v8EAPv/BwD8/wUA/v8EAP//AgACAAAAAgABAAEAAQAEAP7/BwD9/wcA//8GAP//BwD+/wYAAwAAAAcA//8EAAQA/v8EAAEAAAAEAP7/AAAFAPv/BgD///z/CAD0/woA+P8HAPv/AAADAP3/AQAAAPz/AwAAAPz/BgD5/wUA+/8CAPv/BAD8/wIA/////wAA/P8GAPX/CgD3/wYA+/8CAP3/AQD8////AAD6/wQA+P8EAPn/BgD5/wMA//8AAAMA/f8CAP//AwAAAAAAAwABAAIAAgAAAAQA/v8HAAAAAgADAPv/CgD7/wkA+/8EAAIA/f8IAPz/BwD7/wcA+/8FAPz/AgACAPz/AwD7/wAAAwD3/wYA+f8DAPz////+/wEA/v8CAP7/AgD+/wIA/f8EAP7/AQABAP//BQD+/wUA/f8DAAAA//8EAAEAAwD//wQA//8DAAIA//8DAAAABQD+/wcA/f8GAP3/BQD+/wQAAgD9/wkA+f8LAPr/BwD7/wgA+P8JAPr/BgD//wIAAAACAAIAAQABAAQA/v8FAP7/BAD8/wUA/P8CAAMA+v8DAAEA/P8HAPf/BwD7/wIAAgD9/wQA/P8DAAEA/v8EAPz/AgD+/wAAAgD7/wUA/P8CAAEA/v8BAP7/AQD///7/AQD7/wEAAAD//wEAAAABAAMA/v8FAPv/CAD6/wcA+/8CAAIA/f8DAPv/AgD//wAA//8AAP7/BgD6/wYA+/8CAAAA/v8CAP7/AwD8/wQA+f8FAP3/BAAAAP//BQD9/wQA/////wUA/P8GAPf/CAD1/wsA9/8IAPr/BwD7/wcA/P8AAAIA/v8EAAAA//8AAAEA//8AAP///v///wIA/f8EAPv/BAD+//7/BgD9/wMAAgD//wEABAD//wQA/v8CAAAAAQAAAAIAAQD+////AgAAAP7/BAD9/wMAAAD/////BAD8/wMAAAD9/wMA//8BAP//AQD8/wUA/P8CAP////8AAAAAAAD+/wIA/f8DAPz/BgD6/wQA/v8CAAEAAAD//wIA/v///wEA/v8BAP3/AAD9/wIA+/8CAAEA+v8DAPv/AgD8/wMA+v8IAPn/AwADAPn/CQD3/wcA+v8BAAAA/P8BAP7////+/wEA+/8EAPz/BQD7/wYA+/8DAAEA+/8EAPz/AQACAPz/BAD6/wMA/v8CAAQAAAAAAAQAAAABAAEA/v8CAAIA/f8GAPr/BgD4/wYA+P8FAP7/AgABAP3/AgABAAAA//8AAAMA+/8GAPr/BAAAAPz/BAD9/wIA//8FAPj/CAD6/wMA//8AAP//AAAAAAAA//8BAP7/AAD///////8CAP3/AwD7/wUAAAD//wQA/f8FAPv/BwD3/woA9/8IAPz/AgD+/wUA/f8FAPv/BgD9/wEAAQABAP7/AwD//wMA/v/+//////////7//v/+///////+/wIA+/8CAPr/BQD6/wEA///5/wQA+v8CAP//+/8FAPz//v8BAP3/AQD9/////v/9/wEA/P8DAPv/BQD4/wYA/f8GAAEAAgD//wEABgD6/wsA+P8JAPv/AwD9/wQA/f8EAP//AQD//wEAAAADAP7/AwD9/wQA/f8FAP7/AQD+/wIA+/8DAPv/AQD+/wAA/v8AAP////8DAP7/AgD///3/AwD8/wEAAwD4/wgA9/8FAPz/AQD8/wIA/f8AAP3//v8BAPr/BgD5/wQA/f8CAAIAAQACAAIAAgACAAIA/v8HAP7/BAAAAAAAAQACAP////8CAAEA//8DAP//AgABAP7/AwD//wAAAwD8/wQA+/8GAPn/BwD7/wMA/f8CAPz/AgD8/wMA//8AAP3/AwD7/wQA+/8BAAAA/P8DAPz/AwD7/wAA///9/wEA///9////AAD6/wAA/v/+/wIA/v8AAAAAAAAAAAEAAwAAAAEABQD8/wYAAAD+/wYA/P8HAP7/AwACAAAABAD//wYA/v8CAAIA/v8EAAAABAD//wQA/P8CAAAA//8EAPj/BwD5/wYA+/8BAAAA//8BAP3/AAABAP////////3/BQD7/wEA///+/wEA/v////3/AAD9//7/AwD8/wAAAQD7/wUA/f/9/wMA+v8CAAEA/f8CAP7/AQD//wIA/v8CAP//AAAAAAEAAAACAP//AQACAAAAAwACAAIABAD//wUA//8EAAAAAQAGAAEAAQAFAP7/AgACAP3/BAD//wIA/v8FAPv/BgD+/wYA/f8FAAAAAgAAAAMA/P8GAPn/CQD3/woA9f8HAPz/AAABAP////8AAP3/AAD7/wYA+P8IAPn/AQABAPr/AwD8/wMA/f8BAP3/AAD///v/AwD5/wIA/f8AAP3/AwD8/wUA+v8EAP7/AAAGAPn/BgD9/wEAAgD7/wUA+f8FAP3/AAD+/wIA/v8DAPv/AwAAAAEA/f8FAPr/BgD+/wAA//8AAPv/BQD5/wgA+P8EAPz/AgAAAAEA/f8EAPv/CAD9//7/BAD6/wsA+f8HAPz/BwABAAEABAD8/wUAAAACAAEAAAAEAP7/BQD6/wkA+f8IAPf/CAD3/wcA+f8FAPv/AAAAAP7/AgD///7/AQD+//7/AQD+/wAAAQD7/wQA/f8AAAMA+/8CAP///v8AAP3/AwD5/wQA+v8CAPv/AgD7/wQA+P8DAPr/AQD7/wEA+v8GAPv///8CAPj/BQD6/wAA///9////AAD8/////////wEA/P8CAP3/AgD+/wAA/v8BAAEAAAACAAAAAgD//wIAAQD//wMA//8BAAEAAQAAAAEAAAACAP7///8EAPv/BgD8/wEAAAD//wEA//8BAAIA/f8DAP7/AgD9/wAA/f8BAAEAAAD//wQA/v8BAAUA/P8FAAIA/f8GAPv/BQD//wIAAQAAAP7/BAD7/wUA/P8CAPv/AwD9/wIAAgD5/wYA+P8DAAAA/P8CAPz/AgAAAP//AAD9////AAD8/wIA/////////f8DAPz/BAD9/wAA/v8BAP7/AgD+/wEA/v8AAP7/AQAAAP7/AAD5/wMA+/8AAP3//P8CAPn/BAD4/wUA9/8FAPv/AwD//wEA//8CAP7/AQAAAAAAAAD9/wEA//8CAP3/BAD8/wMA/v8AAAEA/f8BAP7/AgD9/wMA/f///wQA/f8DAAAA/f8BAAEA/f8DAP7/AQD+/wQA+f8IAPr/BgD+/wEAAAADAAEA//8FAPz/BAAAAAEABAD+/wYA/f8FAPv/AgD+/wEAAAD//wAAAQD5/wgA+v8EAAAA/v8EAP3/AgD//wAABAD8/wQA/f8DAAEA/v8FAPr/BQD9/wEA/v8AAAEA/v8BAAAA//8BAP3/AAACAP7/AQD+//7/AAD///7//v/7/wAA/v///wEA+/8CAP////8BAP//AwD+/wIA/f8CAP7/AgD+/wEA/v8AAP7/AAD8/wEA/P8DAPr/BAD6/wUA+P8FAPv/BAD9/wIA/f8AAP//AAAEAPv/BAD6/wMA+/8FAPz/AAAAAP3/AgABAP//AwD8/wcA/f8DAAMA+/8JAPn/BgAAAP7/BQD7/wUA/v8AAP////8CAP3/BQD7/wMA//8CAAEAAQABAAAAAwAAAP//AQD9/wIAAAD8/wMA+/8DAP3/AQD7/wYA+f8FAPz/BAAAAAUA//8CAAAAAwD+/wUAAgD//wcA+/8GAPv/CAD8/wEAAgD+/wMAAQD8/wUA/f8GAPr/BwD5/woA9v8KAPb/BwD4/wUA9f8EAPn/AQD+//z/AQD9/wEA+/8DAP3/AQADAPz/BQD8/wUA/P8FAP7///8DAP3/AwAAAPz/BAD+////AgD2/wUA+v8AAP///P8DAPv/BAD8/wIAAwD7/wcA/P8FAP//AQAAAAEA//8DAP3/AQABAAEAAQABAAEA/v8FAP//AgAEAAAABAABAAUAAQADAP//AAADAAEAAAD///z/AgD9/wQA/f///wAAAAD+/wYA+v8HAP//AQAEAP3/CAD7/wYA/v8FAP//AgACAP3/AwAAAP7/BAD8/wUA+/8FAAEAAQADAP3/BAD8/wUA/f8CAP//AgD/////BgD3/wkA9v8IAPj/BAD6/wgA+P8JAPv/AwAAAAIA/v8GAP//AwACAAEAAQABAAAAAQAAAAIAAQADAAAAAwD//wMAAAADAAAAAgD//wAAAwD+/wAA///+/wIA/v/+/wMA/P8EAPz/AwD/////AwD//wEAAQAAAP//AAABAP7/BQD+/wAAAgD/////BAD9/wEA/////wAA/f8DAPv/AgD8/wQA+/8GAPn/BAD8/wMA/P8EAP7/AwD/////BAD9/wUA/v8AAAUA+/8GAP7///8FAPr/CQD9/wIAAQD+/wQABAD//wUA/f8GAP//AwAAAP//AgABAP//AQD//wMA/f8DAPz/BQD+/wEA//8DAP//AAACAAAABAD8/wUA//8BAAMA/P8DAP7/AgD+/wAA/v////7/AgD6/wcA+P8GAPz/AwD//wEAAQAAAP//AgD//wMA//8AAAMA/f8EAPz/CAD6/wQA/v8EAP7/BQD8/wcA/f8GAP7/BQACAP//CgD5/wsA+/8GAP//AgABAAMAAQADAP//AQD+/wEAAAD//wAA//8CAP///v8AAPz/BgD5/wUA+/8BAAEA/v8BAP3/AwD8/wAA/v/+/wMA/f8CAP3/AQD8/wQA/P8CAAMA+/8HAPz/BQD+/wEAAAD9/wAA/////////v////7/AAD9/wQA/P8CAP7/AAAEAPz/BAD+/wQAAAACAAAAAgABAP//AwABAAEAAwD//wMA/v8CAAEAAQADAP7/BgD8/wgA+/8HAP3/AwAAAP//BAD8/wQA/P8EAP3/AgD6/wMA/P///////f8AAP3/BAD7/wQA/P8FAPz/BAD6/wMA/P8DAP3/AQABAP3/BgD7/wUA+v8EAP7/AAABAP//AAABAP//AQD+/wIAAgD7/wgA9v8JAPf/BQD7/wIA/f//////AAD//wAAAQD9/wIA/v8AAAMA/v8AAAAA//8BAAAA+v8EAPv/AgAAAPr/BAD7/wMA/P8AAP7/AgD9/wIA/P8AAP3/AAAAAP7////8/wAA/f8BAP3/AQD8/////f/+/wAA/f/8/////f///wEA/v8DAP///v8BAPz/BQD8/wMA/v8AAAAAAQD9/wEA//8DAPz/BAD+////BAD+/wEABAD9/wYA/v8EAAAAAQAAAP7/AwD8/wMA+v8DAP7///8BAPz/BgD5/wYA+f8GAP3/AgAAAPv/BwD5/wcA9/8GAPb/BwD5/////v/8//7//v/8/////f////z/AAD5/wIA+v8DAPz///8BAPv/BAD7/wAA///+/wEA/v8AAP3//v8CAP3/BAD7/wIA/////wMA/P8GAPj/BAD8/wIA//8AAP3/AAD6/wYA9f8EAPv/AAD//wEA+/8DAP3/AgABAPz/BAAAAP7/BAD6/wcA/f8DAAEA/v8EAP3/AgD9/wMA/P8AAP3//f/9/wAA+f8EAPz/AAAAAPz/AQD//wIAAQAAAAAA/v8BAP3/AwD9/wEA/f8AAP//AgABAPz/BgD6/wUA+/8FAPz/BQD9/wEAAAD+/wEAAQACAP7/AQD+/wEAAQD//wAA/v8AAAEA//8BAP//AQAAAP//BAD+/wEA/v8BAAAAAQD9/wAA///9/wIA/P8AAAAA/P8BAP7//f8EAPn/BgD6/wQA/P8BAAIA/P8CAAAA/f8EAPv/AQADAP7/AQD9/wAA//8CAPr/BAD7/wIAAQD7/wUA+v8HAPz/AwACAAIA//8DAPv/BgD7/wUA/f8AAAQA+/8GAPz/BAABAAAAAQABAAQA/f8DAP7/AAAEAP3/BQD+/wIA/f8HAPr/BAD9//3/BAD///z/BQD6/wIAAQD+/wMA/P8EAP3/BQD7/wMA//8CAP////8CAPr/BgD7/wUA/P8DAP//AQD9/wEAAgD+/wUA+v8HAPv/BQD7/wYA/P8GAP//AQACAP//AwABAP7/AwD//wMAAwD9/wUA/P8HAP3/AgAFAPz/BgD/////BQD+/wMA/v8BAAAAAAD//wQA+v8IAPr/BQD6/wcA+P8LAPf/BwD6/wUA/v8CAAAAAAABAAEAAAABAP3/AgD9/wIA/f8DAPr/AwD8//7/AwD5/wUA/P8AAAAA/f8EAP3/BAD9/wEAAQD+/wEA//8EAPz/BQD+/wAAAwD9/wIAAgD9/wUA+v8CAAEA/P8HAPf/BwD9/wIAAQABAAMA//8EAP3/BAAAAAAABQD//wEAAwD//wQA/v8EAP7/AgD//wQAAAAFAP3/AwACAP7/AgABAP3/AwD//wEA//8DAP3/AQACAPz/BwD7/wYA/v8DAAIA/v8CAAEAAQAAAAEA/v8DAP////8BAP7//////////v////z/BAD8/wUA+/8EAAAAAQADAAAAAwAAAP//BAD+/wQA///9/wUA+/8GAPr/AgADAPr/CAD7/wQAAAD8/wcA/P8GAPn/BgD8/wcA/f8BAAMA+v8FAPz/AAAEAPn/BAD7/wEAAgD5/wYA+P8HAPz/AgAAAP//AAD//wEA/v8BAP///v8CAAAAAgD9/wQA+/8FAP3/AwD7/wYA+/8GAPz/AQADAAAABAD//wQA/P8GAPz/AwD/////AQD/////AAABAP3/AAACAP//BQD9/wEAAAABAAIAAgD//wYA/v8DAP3/BAD9/wYA/f8DAP//AgD///7/BAD4/woA9v8JAPj/BwD9/wMA//8DAP7/AQAAAAAAAAACAP3/AgD/////AAD+/wIA/v8BAP//AAADAP7/AQAAAAEA//8AAP7/AwD//wEA//8AAAMA+v8FAPz/AgD//wEA//8BAP//AQABAP//AgACAP7/AgABAAAAAwD9/wUA//8DAP7///8EAPj/CAD4/wQA/v8CAP3/BQD8/wYA/v8AAAMA/v8FAP3/BgD//wIA//8BAP//AAABAPv/BAD9/wIA/f8EAP//AgD+/wQA+f8IAPz/AwD+/wMA/f8HAPf/BwD4/wYA+/8BAP7///8BAP7/AwD8/wQA/v8BAAIA//8BAAAA///9/wIAAAD+/wUA+v8GAP3/BAD8/wYA+v8IAP7/AAAAAAAA//8CAAAAAAACAP7/BQD9/wMA/v8DAP7/AwD/////AAABAP3/AwD+////BwD4/woA+v8CAAIA/v8CAP3/BQD9/wMA//8AAP//AAD9/wEA/v/+/wAA/v/+/wAA/v///wAA///9/wMA/P8BAP3/AQD+/wAA/v8FAP3/AwD9/wQAAAACAP//BAD9/wYA/v8CAAMA/f8FAP7/BgD8/wQA//8AAAQA/f8FAP3/AAAAAAAAAgD7/wIA/P8EAP7/AgD+/wMA/f8EAAEA//8CAP//AQD+////AAD+/wAA///9//7//////wEA/P8BAP7/BAD8/wQA/P8DAP7/AAD//wEA//8CAP7/AQD+/wEA/v8AAAAAAQD///3/AgD7/wYA+/8CAP3/AQAAAAAAAgABAP//BAAAAAQA//8CAAAAAwD+/wAABAD6/wcA9/8IAPv/BQD8/wIAAAACAP//BAD7/wUA/P8DAP//AAACAPz/BAD7/wMAAAD9/wIAAAAAAAAAAQD8/wYA+P8IAPv/AgABAP3/BQD+/wEAAgD//wAAAgD9/wEA///+/wAAAAD8/wAAAQD9/wUA/f///wQA+v8HAPj/BQD7////AAAAAP7/AQD8/wIA/////wEA/f8EAPr/BgD8/wYA/P8EAP7/BAD//wEA/v8FAPz/BgD+/wMAAAABAP//AQAAAAMA/v8FAPv/BgD//wEAAgD6/wkA+P8IAPj/BgD8/wIA/f8AAP3/AgD8//7/AQD+/wEA/v8AAP3/BQD6/wYA+/8DAP7/AQD+////AQD9////AgD4/wkA+f8CAAIA/P8GAP3/AQD//wAAAgD+/wMA/P8BAAAAAQD8/wcA+P8JAPX/CwD2/wkA+v8BAP///v8CAP//AAD+/wEAAAACAP7/AAACAPz/BgD6/wQA/v8DAAAAAQD//wEA/v8CAP7/AgAAAAEA/////wEA//////7/AQD7/wIA/P8CAP3///8BAP7/AgD8/wIA/f8FAPr/BAD7/wIAAgD9/wQA//8CAAAAAQAAAAIAAAADAP3/BAD8/wQA//8AAP//AgD//wAAAwD6/wYA+P8GAP3/AAABAPz/CAD3/wgA+v8FAPv/BAD9/wMA/f///wAA/v8BAP7/AAAAAP7/AAD+//3//v8BAPr/AwD6/wMA+/8DAPr/AwD9/wMA/P8CAP//AgAAAP7/BQD7/woA+f8DAP///v///wAA/f8AAAUA+f8IAPj/BQAAAP3/AQD9/wEA/f8BAP7///8AAP//AwD8/wUA+f8IAPz/BQD//wIABAD+/wQA/P8HAPj/CQD7/wYAAgABAP//BwD7/wgA/P8BAAIA/v8EAP////8CAPz/AAD+//7/AQD+//////8AAP3/AgD6/wIA/v/+//3/AQD7/wEA+v8HAPn/BgD4/wQA/P8DAPz/BQD7/wUA+P8IAPr/BQD6/wQA/P8DAPr/AwD///z/BAD7/wIA/P8AAPz/AwD9/wAAAQD6/wQA+v8DAP7/AAAAAAAA///+/wQA+/8DAP//AQACAP7//v8FAPz/CAD8/wEABAD+/wEABQD8/wYA//8FAAAABQABAP7/BwD8/wEABAD6/wcA/P8CAAEA/f8HAPn/BgD8/wQA/v/8/wcA9f8GAPr///8DAPf/CgDz/woA+f8AAAMA+/8GAP3/AQD8/wEAAAD9/wIA+v8DAP3/AAD9//7////8/wEA+v8AAP3//f/+//z////6/wMA+f8AAAAA/P///wIA/P8DAP7/AgD9/wQA/P8GAP3/CAD9/wUA/v8EAP//BgD8/wMA//8DAP3/BQD//wQA//8CAAIAAQD//wEA///+/wMA+v8IAPn/BgD9/wQA/v8GAPv/BQD8/wIAAAD8/wIA///9/wMA/P8EAP7/AwD6/wQA+v8EAP7////+////AgD//wEAAQD//wIA/v8AAAAAAAABAAEAAAADAP3/BQD9/wYA/f8HAPj/CAD9/wMA/P8DAPz/AgD///z/CAD4/wgA+/8BAAUA+P8HAP3/AAAAAAIA+v8GAPr/AAACAPr/AwD7/wEAAAD9/////v///wAA///+/wEAAAD9/wMA//8BAAAA+v8EAPz/AgD+//z/BQD5/wYA+v///wEA/f8AAAEA//8DAP7/BQD9/wQAAAACAAEAAgAAAAYA//8AAAUA/P8FAP7/AwACAP7/AgABAAAABwD7/wYA/f8FAP//AQACAP3/AQD+/wIAAgD+/wEA/v/+/wEA/f8AAAAAAAD7/wQA+f8EAPv///8DAP3/AgAAAAAAAgD/////AgD+/wUA+v8DAP3/AgD+/wMAAQABAAMAAQAAAAQAAAABAAEAAAAAAAQA/P8KAPn/CwD6/wcA//8CAAEA/f8EAP7/AQABAAAA//8CAP7/BAD//wIA//8DAPr/BgD7/wQA///+/wIA/v8DAP3/AwD9/wEA//8AAP7/AwD9/wEA/////wEA//8EAPv/BAD9/wMAAAD9/wIAAAACAP////8CAAAAAwD8/wQA+f8HAPn/BQD+/wEAAgD+/wAAAwD+/wUA+v8JAPj/CQD8/wIA//8BAP7/BAAAAAEAAwD7/wgA/P8HAPz/BwD7/woA+/8JAPv/BQD+/wEAAQABAAEA//8DAP3/AAACAP///f8FAPr/BAAAAAAAAAADAP3/AQAAAAAAAQD+/wEAAAABAAMAAAAAAAQA+v8EAP7/AAACAP3/AgD+/wIAAwD+/wMA+/8FAPv/BQD8/wQA+/8EAAAAAwD+/wAA/f8CAP7/AwAAAAIA/v8AAAIA//8AAAMA/P8GAPv/BgD7/wYA/////wIA+/8IAPn/AwD///3/BAD7/wYA+v8DAP3/AAACAAAAAQAAAAEAAAADAP7/BAD7/wcA9v8MAPT/CAD3/wMA+/8AAP3/AgD7/wMA+v8FAPz/AQD//wIA//8BAP7/AAADAPr/BwD7/wMA/////wEA//8BAP//AgD+/wMA/v8CAAEABAD//wEAAgABAAQAAAACAAEABAD+/wYA+v8DAAEA/f8EAP3/AgACAP//AwABAAAAAQADAAAA//8AAAIA//8AAP7//v8AAAAA/v///wEA+v8GAPr/BAD9/wAAAgAAAAMAAgD//wIAAAAAAAQA/v///wAA/f///wAA/P8CAP3/AAADAPz/BAD+////AgD7/wMAAAD//wEA/f8FAP3/BAD+/wMA/f8EAPv/AAABAPv/BAD5/wUA+f8GAPb/BgD8/wAA//8BAAAAAAABAP//AgD//wIA/f8EAP7/AAAAAAAAAgD7/wQA+v8EAP7/AgD9/wUA/f8DAP////8CAAEAAgAAAAAAAAD//wEAAgD9/wIA/v8CAP//AgABAAMAAAABAAMAAwAAAAMAAAACAAMAAAAAAAUA/v8CAP//BQD8/wUA/v///wQA/v8DAAAAAgD//wEAAgD//wUA/v8EAP3/AgABAAEA//8CAP3/BQD6/wMA+v8CAPz/AgD8//7/AQD+/wAA/////wIA/f8FAPr/AgD+//z/AwD5/wQA/f///wEA/P8CAPz/AgD8/wIA/f/7/wQA+f8CAP3//////wAA/v8DAAEAAAADAP7/AgABAP////8EAPz/BAD9/wEA//8DAP//BQD9/wQA//8GAP////8EAP//AgD//wMA/P8FAPv/BgD7/wQA/f8EAP//AgD8/wMA/P8EAPz/BAD9/wEABAD+/wMAAAABAAEAAAABAP7/AwD9/wQA/P8CAAAAAAD//wMA/v8CAP//AAD9/wYA+f8FAPv///8DAPv/AgD8/wAA//8BAPz/AwD8/wMA/f8DAP3/BwD8/wMAAgAAAAQAAgD//wIABAD7/wUA/P8AAP7/AgD9/wEA////////AQD7/wUA/P8BAP////8EAP3/BAD/////AgD//wMA+/8DAPr///8AAPv/AgD7/////v/+/wAA/v8BAPz/AwD8/wAA/v/9/wIA+/8DAP3/AAAAAP3/AQD//wQA+v8EAP3/AAACAP3/AgD9//7/AgD+/wAAAAD+/wEABAD7/wYA/P8BAAMA///9/wYA+P8JAPj/BQD///7/CAD6/wYAAAAAAAMA///+/wMAAAD//wQA+P8HAPz/AwD9/wAAAgD8/wQA+v8DAP3///////v/BAD5/wMA/P8BAAAA///9/wMA+v8EAP3//v8BAPz/AgD7/wMA+/8BAP7/AwD8/wUA+v8GAP3/AwABAP//AgACAP7/AgAAAP3/AgD8/wEA/////wAA/v///wAAAAABAAAAAQABAPz/AgABAAAAAAACAPv/BAD///3/BgD//wEAAQD+/wMA/P8BAAAA/f8EAP3//v8CAP7///8DAPz/AAD///7/AAAAAP3//f8BAPj/BAD4/wQA+P8CAP3///8AAAAA//8AAP7/AQD+/wAA///9/wUA+f8FAP3/AwAAAAEAAAADAP//AwD//wAAAQD+/wUA/P8DAP////8GAPv/AwD9/wEA/v8CAP7/AgD+////AQD//wIA/P8FAPj/CgD1/w0A9v8JAPz/AgABAAEAAQD//wIA/f8FAPn/BAD6/wEA///+//7//v8BAP3/AAD9/wAA/v8FAPn/BAD8/wIAAAABAP//AgD7/wIA/v8AAAMA+P8HAPv/AgAAAP3/AwD9/wEA//8AAP//AgABAAAAAgD//wIA//////7/AwD8/wIA/v///wEA+/8EAPj/BAD+//////////7/AAD9/wAA+v8CAP3//v8AAPz/AQD8///////7/wIA+/8CAPn/AgD5/wEA+/8EAPr/BAD9////BQD3/wYA+/8HAPr/BAD8/wEABAD6/wcA+/8AAAUA+f8HAPz/AgAAAAAAAgD9/wMA/P8CAP7/AAACAPv/BQD9////AgD8/wIA///8/wMA/P///wEA+v8FAPv/AQD///7/AwD+/wAAAgD8/wQA/f8BAAIA/v8BAP//AAABAPv/AwD8/wQAAAD6/wEA/P8CAP7///////7/AQD+//z/BAD5/wYA+v8FAPr/AwD//wAAAQD//wAAAQD8/wUA+v8HAP3/AAACAPv/BAD9/wEA+v8EAPn/BQD9/wAAAgD8/wAA//8CAP3/AwD8/wAA/v8AAPv/BAD6/wMA/P8BAP3////9/wEA/v8AAP3/AAD9/wAA+/8EAPz/AwD8/wQA/v8EAP//AQAEAP7/BQD7/wUA//8BAAEAAAABAP//BAD8/wAAAQD9/wEABQD3/wkA+v8EAAAABQD6/wcA+/8FAP3/BAD8/wIA/v8BAAIA+v8FAPz/BAD/////AgAAAAMA/f8DAP//AQD//wEA//8CAP7/AwD9/wQA/P8EAP//AgD//wMA/P8EAPz/BQD9/wEAAAD+/wEA//8AAP7/AgD8/wMA/v///wAAAQD9/wQA+/8BAPz/AgD9/wEA/f8BAAAAAwD+/wQA+/8GAPv/BQD8/wQAAAAAAAMA//8DAP7/AgAAAAMA//8DAP//AgD9/wQA/v8CAP//AAAEAP//AwD//wQA//8EAAAAAwAEAAAAAgD//wMAAAAEAAEAAQD//wIA/v8FAP7/AgABAAEAAQABAAIA//8BAAEAAQABAP7//f8EAPz/BAD9/wAA/v8DAPr/BQD6/wUA/f8DAP3/BAD8////AgD6/wUA/f8AAP//AgD6/wgA+v8FAP3/AgABAAAABgD8/wgA/f8GAAAAAgADAAIAAwAAAAQA/v8FAAEABQAAAAQA//8GAP7/BQD+/wQAAgD//wYA/v8DAAEABQD9/wcA+f8JAPr/BgD+/wIAAQD7/wgA9/8IAPj/BwD8/wQA+f8FAPn/CQD6/wYA+v8HAPr/BwD4/wYA9/8HAPj/BQD7/wAAAAADAPv/BwD9/wUAAQABAAQAAAAAAAIA//8DAAEA/v8FAAAABAAAAAMAAQAAAAUA/P8IAPr/CQD5/wgA//8BAAQA/v8HAP//CQD6/wkA/f8DAP7/AAABAP3/AgD9/wEA/f8CAP//AAAAAP7/AQD8/wkA9/8IAPj/BAAAAPz/BAD3/wYA+/8CAAAA/f8AAAMA/f8DAP3/BAD8/wUA//8CAAUAAQADAAMAAwAEAAAABwD9/wcAAgABAAgA//8IAPv/CQD+/wIABwD8/wcA//8BAAYA/P8HAPz/BgD//wEA//8DAP//AwD+/wMA//8AAAAA/P8HAPj/BQD7/wIAAwD7/wYA+P8HAPz/BAD//wMA/v8EAPz/AwD9/wIAAAAAAP//AQD7/wUA+v8EAPn/BQD7/wAAAQD7/wQA/f8DAP3/AgD+/wIA/f8BAP3/AwD8/wUA+/8FAP7/AAAEAP3/AwD9/wUA/P8EAAAAAAACAAAAAgACAAIAAgACAAIA//8BAAMA+v8IAPz//v8BAPv/AgABAPv//v/+/wAA//////z/AwD7/wEA/v/8/wYA/P8CAP7/AQAEAPz/BwD7/wQAAAD+/wcA+/8HAP7//f/7/wAA+f8FAPv/AQACAPz/AwD9/wIA+/8BAPr/AgD5/wIA/f8DAP//AQACAP7/BAD//wIAAQACAAIA/f8GAPn/CgD3/wkA9v8JAPv/AwD+/wMA//8FAPz/BgAAAAEAAwD//wQA//8FAP3/AgADAPz/CAD//wEAAgADAP3/CQD3/wkA+P8HAPv/BAD+/wEAAAD+/wIA/P8BAP//+/8CAPj/AQD7/wAA///+//z/BAD7/wAAAAD7/wUA+////wIAAAABAAAA//8DAP//BQD///7/BQD6/wUA+v8GAPr/AwD9/wAAAQD8//////////7/AAD9/wUA/f8EAP7/AAABAAEA/v8DAPz/AwD+/wAA/f8EAPr/BwD5/wAA///+/////P/7//3//v/+//z/AgD3/wcA+f8FAP3/AgAAAAEAAAAAAAIAAAACAP3/BAD9/wUA/P8CAAEA/v8HAPr/CAD9/wUAAQADAAEABAD8/wgA/P8CAP////8CAAUA/P8IAPz/CAAAAAcAAgADAAQA/P8GAPv/AwD9/wEA///+/wEA/v///wQA/P8GAPr/BQD9////AQD9/wAA//8AAP7/AAD+/wAA/P8AAAAA/f8DAP3/AwD//wAAAQD7/wcA9v8FAP3//f8BAP///f8DAPz/AAAFAPj/CAD6/wQA//8AAAEAAQD9/wYA+/8FAPn/CgD3/woA+v8EAP////8AAAAAAAACAP3/AgD//wEAAgD//wEA/f8CAAEA/v8IAPn/BgD9/wAAAwABAAIA/f8DAAAAAgADAP3/AwD//wEAAQABAP//AAADAAAAAgACAAAAAgACAAIAAwABAAAAAQABAP7/AQD+/wEA//8DAPz/BAD9/wMA//8CAP7/AQD//wEA/f8EAPz/BAD+/wEAAQABAP7/AgD+/wEAAQD//wMA/f8AAAAAAQD+/wMA/v///wUA/v8EAAAAAQD+/wUA/P8CAP//AAD+///////+//7////+/wMA//8CAP//AwAAAAIAAwD+/wQAAAD//wYA+/8CAAEA/f8CAAIA/v8BAAIA/P8FAP//AQAAAP3/AQD7/wgA9f8JAPb/BgD6/wgA9/8FAPr/AQD+/wAA+f8EAPr/BAD8/////P//////AQD///7/BgD5/wcA/P8DAP//AgD+/wIAAAADAP3/AgAAAAIAAAACAP3/BQD8/wUA/f8EAPz/AwD8/wIAAAD//wAAAgD7/wYA+/8DAAAA/v8CAP7/AgD8/wUA/v8DAP3/AwABAAMAAQADAP7/CAD7/wcA//8DAAEAAQABAAIAAQABAAEA/v8CAP3/BAD8/wEA/P8AAP7/AwD5/wUA+f8FAPr/BAD7/wIA+/8AAP7/AQD9/wEA/v8BAPz/AwD7/wUA/f8BAAIAAgD8/wcA+P8FAPz//v8CAP3/AAD9/wAA/P////3/+v/+//3//f8CAPr/AAD///7/AAD9/wQA+P8FAPz/AAD///3/AwD+/wEA/v8DAP3/BAD+/wEAAgD/////AwD+/wIA/P8FAPv/BQD+/wEAAgD//wIA//8CAAAAAAD+/wMA+/8CAP3/AQACAPz/BgD5/wgA+v8CAAIA/f8EAPz/BQD8/wUABAD8/wkA/P8EAAIAAQABAAAAAwD8/wYA/v8CAAMA/f8DAP7/AQAAAAEA/v8FAP7/AwAAAP///v8CAPn/AgAAAPr/BQD6/wIA/v8BAP//BAD8/wQA/v8DAAAA/v8CAP7/AAD///z/AAD//wEA+v8DAPz/AAD///z/AAABAPn/AgD3/wEA+/8AAPz/AwD6/wIA/P8BAP///v8EAPf/CQD3/wUA+f8EAP//AQD8/wUA+f8FAP3/AgD+/wEA//8CAP3/BgD7/wUAAgD//wIAAQD+/wAA//8AAAAAAQD+////AAAAAAEAAQD//wUA+/8KAPj/DAD4/wgA//8CAAMA/v8EAAAABAD9/wUA/P8CAP3/BgD3/wYA+v8EAAAA//8BAAIA+/8DAP3///8CAPr/AgD8/wAAAgD7/wUA+f8CAP//AAD//wMA+/8EAP7/AQAEAPv/BQD7/wcA+/8DAP3/BAAAAAMA/v8BAP//AwD8/wAA///+/wEA/v8CAPz/BAD4/wgA+f8GAPj/BQD6/wEA+v8BAPv////8/////f8CAPz/AQAAAAAAAQD8/wIA/v8CAP//AQAAAAMAAwD9/wcA+/8IAP7/AAAEAPn/AAAAAPz/AQD+//z/BQD8/wEA/f8BAP7/AwD6/wQA+v8CAP//AAD//wIAAQD9/wUA/f8DAAIA/v8IAPv/BQABAP//BQABAPz/CQD5/wgA/f8AAAUA/f8EAPv/AwD9/wIA/P8BAPv/AAD///z/AgD8/wUA+f8FAPr/BgD7/wgA+f8GAPv/BQD8/wAA///9/wEA/v8BAAAAAAABAP7/BgD6/wUA/f///wMA/P8HAPj/CAD4/wUA+v8DAPv/AAD+//7/AgD//////v8AAP//AQD//wIA/f8BAAMA/v8GAPv/AwAAAAEAAQD//wEAAgD/////AgD9/wQA+/8FAPz/AgD+//7/AQAAAAAA/f8EAPr/AgD+/wAAAQADAPn/BgD7/wYA/f8GAP7/BAAAAP//AgD8/wYA/P8HAPv/BgD//wYA/P8CAAIAAQD//wUA+/8GAPv/BAD8/wQA9v8LAPb/BwD4/wMA/v8CAP7/AgAAAAIA//8AAAQA+/8EAP7/AAADAP7/AQAAAAEA/P8DAPv/AQACAPz/BgD3/wUA/P8CAAIA/P8GAPz/AwD+/wIA//8AAP///f8DAPr/BAD5/wYA+P8JAPb/BwD8/wEAAgD+/wUA/f8FAPz/BgD7/wgA/f8AAAQA+v8GAPj/BQD+/wEA/P8GAPz/AgABAAAAAQADAP7/AgAAAAAAAwD5/wgA+v8EAAEA/f8EAP//AAABAAIA+/8CAPv/AQD///3/BAD8/wQA/P8EAP7/AQACAP//AAADAPr/BwD8/wQAAgD+/wMAAAAAAAIAAAD///7/AQD8/wUA/f8AAAIA/P8EAAEA/v8EAPr/BQD5/wgA+P8GAPv/AgD9/wEA/v8CAP7//f8CAP//AgD//wIAAAACAAEAAQACAAEAAAABAAEA/v8CAAAA/f8CAP7//f8CAAAA/f8EAPj/CAD4/wgA/P///wQA/v8CAAIA/v8DAAEAAAABAAEAAAAAAAAAAAAAAAEA//8AAAEA//8CAP3/AgAAAP7/AgD+////AAD+/wAA//////7//////////f8BAPz/AAD8/wEA/v/8/wAA/v8BAP//AAD9/wIA/f8FAPz/AwAAAAEAAgD+/wQA/f8GAPz/AwAAAP//AwAAAP3/BAD8/wMA//8CAP7/AgD+/wAA/f8DAP3/BgD4/wIA/P8DAP///v8BAPz/BgD4/wcA+P8IAPr/BQD+/wIAAQAAAAAAAgD+/wYA+v8HAPf/BAD9/wEAAAAAAP3/AgD9/wIAAAD//wAA/v8CAP7/AgAAAP7/BQD8/wQA/P8AAAQA+f8HAPj/BQD6/wYA+v8DAAEA//8CAP//AAAEAP7/AwAAAAAABAD/////AgD7/wIAAAD8/wMAAAD7/wgA9f8JAPz/AwD//wIA//8BAAEA/f8AAP7/AQD//wAA/v8AAP7/AQD///3/AAD9//7/AQD9/wEA///9/wMA/f8BAP3/BAD5/wYA+f8GAPr/AQD9/wEA/f8DAPj/BwD3/wkA9/8IAPn/BgAAAP//BgD6/wgA/P8FAP3/BAD9/wQA+/8FAP//AQAAAP//AQACAAEAAAADAPz/AwAAAAEAAgABAP7/AgD9/wMA//8AAAEA/v8DAP//AAD//wEA/P8EAPv/AwD//wAAAQABAAIA/v8DAP3/BgD8/wYA/P8DAP7/AgAAAP//AQD+/wAAAAD//wIA/v8CAPv/BgD7/wQA////////AQD+/wAAAwD7/wMAAAABAAEA//8GAPn/CQD5/wEAAQABAP3/BQD7/wUA//8EAPz/BQD7/wMA/P8DAP3///8AAPz/AAD+//3/BAD4/wUA/P8CAP///P8DAP3/AgD///////8EAPz/AwD8/wMA/v8BAP3/AQD9/wQA+/8EAP7//v8EAP7/AgD//wEAAgD8/wcA/f8FAP7/AQAAAAEAAAAAAAIA/f8DAPr/BgD3/wgA+/8BAAIAAAD//wMAAAAAAAYA+f8KAP3/BAABAP//AgADAP7/AwAAAAEA/v8GAPz/BwD5/wYA/v8DAAEA/v8BAP3/BAD5/wkA9P8HAPn/AwD///7/AAD9////AAD8/wQA+P8HAPf/CgD3/wYA/f8CAP//AQD//wEAAwD9/wMA///9/wQA/f8FAP3/AQAAAP3/BAD7/wIAAAD9/wMA//8AAAIA+v8EAPv/AgABAPn/BgD2/wcA9/8EAPz/AgD//wAA/v8CAP//AAABAPz/BAD//wEAAAACAAEAAgD+/wMA/v8DAP7/AAAAAAIA/P8DAP3/AAABAP7/AQD+/wMA/f8EAP3/BAD+/wQA+v8EAP3/BgD8/wMA+/8FAP3/AQD//wEAAAADAAAABAAAAP//BgD9/wUA/v8FAAAAAwAEAP3/BwD8/wkA/P8FAP//AAABAP7/AQD+/wEAAQD8/wMA/P8BAP7/AQD+/wAAAAD8/wMA+/8DAPv/AgD8/wEA//8BAP3/AAACAP7/AQD+/wAAAAD9/wMA+////////P8AAAMA+P8EAPn/BAD7/wEAAAD7/wQA+f8BAP3//v8AAP///f8EAPn/BgD7/wUA/f8BAAEA//8EAP3/AwD//wMAAAD//wcA+f8FAPv/AgD//wEAAAD//wYA+v8LAPv/BwD//wMAAgACAAIAAgAAAAIA+f8JAPn/BgD8/wMA/v8EAPz/BQD9/wEAAgAAAAQA/f8CAP3/AwAAAP7/AgD+/wAAAQD///////8AAP7/AgD8/wMA/P8FAPr/BgD8/wUA//8BAAEAAQD//wQA/f8EAPz/BgD8/wIA//8AAAIA///+/wIA/v8AAP7/AAABAAAA///+////AQD9/wIA/P8DAPv/AAD+//7/AgD7/wIA+/8BAP7/AAD+/wAA/v///wAA+/8DAPz/AAD+/wAA/f8CAPz/AAAAAP///v/9/wMA+/8EAPz/AgABAP3/BgD4/woA+f8HAP//AQACAAEAAQAEAP//AAABAAAAAgACAP3/BwD8/wcA+/8FAAAAAQAEAPz/BwD+/wMABAD//wYAAAAFAAAABgD+/wcAAgABAAYAAAACAAUA/v8FAP//BAD+/wEAAgAAAAIA//8CAP3/BgD3/wYA/P8DAP7///8CAP////8BAP3/AQAAAP//AQAAAP3/AgD9////AAD/////AwD7/wEA//////7/AgD9/wMA/P8AAAEA/f8BAPr/AwD5/wQA+f8CAPv/AwD9////AgD//wAAAwD9/wIAAgD//wEABAD+/wYA/v8DAAEAAwAAAAcA/f8EAP//AwACAAMA//8DAAEAAAAEAP7/BwD7/wgA+v8EAP7///8EAP3/AAAAAPv/AwD8////AQD+//z/AgD5/wcA+v8EAP///v8BAAIA/P8HAPn/BgD9/wMAAQADAAAA//8EAP7/AgACAP//BgD//wIAAwD8/woA+P8JAPz/BwD9/wUAAgAAAAQA//8AAAcA+v8JAPz/BgAAAAIAAgD9/wgA9/8KAPn/BwD+/wMA/v8FAP3/BwD9/wMABAD8/wcA/P8DAP//AQAAAAEA///+/wMA/v8CAP7/AgD7/wYA/P8EAP3/AgD//wMA/f8DAP7/AAACAPv/BAD8/wIAAgD8/wUA+v8HAPf/CAD5/wQA+v8DAPr/BQD7/wUA+/8HAPz/BAABAP3/BwD7/wQAAQD7/wkA9/8HAPn/AwD+/wEA///+/wEAAwD+/wAAAgD7/wYA+v8FAP3/AQD//wAA/v8CAP7/AwD//wIAAQABAAMA+v8IAPv/BAACAPn/BQD6/wUA/f8CAAAAAgD//wUA+v8FAP3/AQAEAPz/BQD6/wYA+/8CAP7///8AAP7/AgD+/wIA//8AAAAAAQACAP//BQD+/wQA/v8DAAIA//8DAP//AAACAP//BQD7/wIA//8AAAIA//8CAP//AQAAAP7/BQD5/wgA+/8CAP3/AwAAAP//AQD8/wUA+v8HAPr/AwD+/wAAAAD+/wQA+v8GAPz/AQADAPr/BgD8/wcA+/8BAAEA/v8CAP7//v8CAPv/BAD6/wQA+f8EAP7//v8AAPv/AwD9////AAABAP3/BQD8/wQA/P8BAAEA/P8EAPn/AwD9///////9/wIA/P8DAPv/BQD9/wMA/////wIA/f8AAAAA//8CAP7/AAD+////AgABAAQA/f8EAAEAAwABAP3/AwD9/wUA//8CAP7/AgD5/wcA+v8CAAEA/v8CAAAAAAABAAEA/f8DAP//AQD/////AwD9/wEAAAD+/wEAAgAAAAAAAAD//wEAAQD9/wMA/P8CAAAAAAD//////////wEA/f8CAP7/AwD8/wIAAQABAAQA/P8FAPr/CQD3/wkA9/8IAPv/BAD+/wMA/v8FAPr/BwD8/wIAAAACAPv/BwD8/wUA+/8CAPr/BQD6/wEA/P///////v8AAP///v8AAP3///////7/AAD8//7/AAD+/wAA/P8CAP///v8AAAAA+/8EAPf/BwD3/wQA+/8BAP////8AAP3/BQAAAAQAAAABAAIAAAAEAP//AwACAP3/BQD7/wUA/v8CAAAAAgD+/wQA/P8GAPv/BgD7/wUA/P8FAP////8CAPz/AwD7/wEA///9/wIA/f///wIA/P8EAP7/AwD6/wYA+P8GAPv/BAD7/wMA/f///wEA/v///wAA/v////3/AQD9//3/BAD6/wQA/v8CAP//BQD//wQAAgD//wYA/P8FAAAABAD//wIA//8DAP7/AQD//wMA//8DAP7/BAD+/wAAAwD8/wYA/P8BAAEA+/8JAPT/DAD3/wYA/P8AAP////8AAP7/AwD+//3/BQD2/wsA9f8GAPv/AgD9/wEAAQD4/wYA+f8CAP///v////7////9//3////+/wIA/f8CAP7/AQD+/wYA/P8IAPr/CAD//wEAAwD9/wcA/P8GAP7/BAABAAMAAAACAAMA//8EAAEA/v8DAAEABAAAAAIA/v8AAAMA/v8CAP3///8CAPz/BQD7/wEAAQD9/wEA/////wMA+/8DAPr/BgD7/wMA/f///wAA///+//7/AAD7/wIA///9/wIA+/8FAPz/AgD8/wIA/P8AAAIA/P8GAPj/BQD9/wMA/f8CAP//AgD9/wMA/f8FAP//AAACAP//BgD9/wkA/P8GAP7/BQD//wUA/v8GAAEAAgAEAP//AQACAP7/BAD+/wMA/f8EAP//AQADAAAAAwAAAAMA//8CAAIA/P8GAPn/CAD4/wcA+v8CAAEA/f8CAP7/AAD+/wEA/f////7/AAABAAAA/f8AAP7//////wEA/f8BAAAA/P8CAPv///////z/AgD9////AAD//wMA+/8DAP//AAAFAPv/AgACAP3/AgAAAP7//v8CAP3/AwD8////AgAAAP////8BAAEA//8BAP//AAAEAPz/AQD///v/BQD6/wQA///8/wMA/f8CAAAA//8CAP3/BgD7/wUA+/8GAPz/BgD8/wUAAwAAAAUA/v8DAP//BAD//wQA//8DAP7/BwD4/woA+f8HAPn/BgD4/wcA+P8GAPr/AQD///7/AwD9////AwD7/wEA/v///wIA///8/wMA/f8CAAAA/f8BAAAA/v////3/BAD4/wcA9f8FAPz///8BAPr/AwD5/wUA9/8CAP3//v8BAP///f8DAPn/AQD9///////+//7//v8BAPn/BQD7/wEA/////wAA////////AQD9/wQAAAD//wUA+v8IAPz/BAD+/wEAAgABAP3/BgD8/wMAAgD8/wQA+v8IAPj/CQD5/wQA/P8EAP3/AQACAP//AAABAP////8CAPz//v8EAPr/BwD+/wEA//8FAP3/BwD6/wkA+v8HAPz/AwACAP//AwD/////AwD9/wEAAQD8/wIA/f8BAP//AwD7/wMA+/8AAAIA+/8DAP3/AAAAAAEA/v///////f8CAPv/AwD+//////8AAP7/AgD///7/AAD/////AwD7/wMA/v/+/wEAAAD9/wQA+f/+/wEA/f/9/wEA+P8DAPv/AgD6/wMA+P8DAP3/AgD//wMA/f8BAAEA/v8CAAAA/P8EAPv/BAD+//7/BAD9/wMA/v///wAAAAD+////AwD6/wYA+v8CAAIA//8AAAIA/f8CAP7/AAABAP//AAAAAAEA/v8AAAIA//8FAPz/AQAFAPz/BQAAAP7/BwD5/wkA/f8DAAEAAgABAP3/AwD8/wMA/v///wMA+/8DAP3/AwD//wAAAQABAAAAAQD//wAAAgAAAAAAAQD//wIAAAADAPr/BgD7/wIAAAD9/wMA/P8DAP3/BAD8////AwD8/wQA/P///wAAAAD+//z/AQD4/wUA+//9/wUA9v8JAPf/CAD4/wYA/v8BAAIA/P8CAP7/AgD+/wEA//////7////+/wAA/P8FAPf/BwD4/wQA/f8AAP7/AgD//wEA/v/9/wMA/v8FAPr/AwD8/wEA/v8CAP3///8CAPr/BgD8/wQA/////wUA/v8GAPv/BAADAPv/CQD5/wYA//8AAAEAAAD//wAAAAAAAP7/BAD9////BgD5/wkA/P8CAAIAAQABAP////8AAAAAAAD//////v8CAP3/AgD+//7/AgD9/wQA/v8DAAQA//8CAP//AwAAAAMAAwD+/wYA/P8GAP3/BQD+//7/BgD7/wYA/f8DAP3/BAD/////BgD6/wYA/f8AAAAA/v8BAPz/AAD3/wQA+////////f8AAP//+/8EAP3/AAADAPz/BQD9/wQA/f8EAP7/AAADAP7/AQAAAP//AAADAPr/BAD4/wEA/v/9/wAA/v////7/AgD9/wIAAgD7/wgA+v8IAPz/AQADAP7/AQACAPz/AwD+/wYA+v8JAPj/BgD//wMAAAAEAAEAAwACAAUAAAACAAEA/v8HAPv/AwD+//3/AQD/////AwD6/wUA+v8FAP7/AQADAP//BQD//wEAAwAAAAMAAAADAAAAAgACAPz/BAD//wAAAgD9/wMA/v8CAAUA/P8GAP7/AQD//wIA//8BAAIA/P8EAP3/BQD8/wAAAQD8/wMA/P///wUA/P8EAP7/AwAAAAIA//8DAAIAAwD//wQA/v8EAP7/AwD9/wUA/v8GAP//AAADAAEAAQACAP//AwABAP//AAADAPz/AgD9/wIA/f8BAAAA/f8EAPv/BQD8/wQA/v8AAAUA+/8FAPz/AgD//wEAAgD9/wYA+v8FAP3/AgABAPz/AwD9/wAA///+/wIA/v/+/wEA/v8EAPz/AwD7/wIA//8AAAIAAQAAAP7/BAD9/wUA/v8CAAAAAgD//wMA/f8EAP7/BAABAP//AwD//wIAAwACAAIAAQAEAP7/BQD///7/BgD7/wUA/P8DAP//AgD9/wIAAAAAAAEA//8DAP//AAACAP//BQD8/wQAAgD+/wMA/v8BAP//AwD+//7/AAD+/wAA/v8AAAAA/v8CAP////8EAPv/BgD8/wUA+v8HAPz/AwAAAP//AwD//wAABAD8/wQA/P8HAPv/BwD+/wEAAwABAAMAAgACAAMAAwAAAAUA/v8HAP3/AwABAAQA//8FAP3/AwD9/wEA//8BAP7/AQAAAP///////wIA/P8BAP///v8DAP3/AQD9/wIA/f8CAPn/BQD6/wUA/P8BAP7///8CAPz/BQD6/wUA//8CAP//BQD7/wYA+v8DAPz/AQD+/////v///////v8BAP3/BAD7/wQA/P8FAPz/BQD8/wcA+/8IAPr/CAD8/wEAAwD//wUA/v8FAPv/BwD7/wUA//8FAPv/CwD2/w0A+f8GAAAA/v8FAPz/BQD6/wYA+v8HAPr/AwD5/wYA9/8FAPn/AgD9//7/BAD6/wUA/P8EAPz/BAD6/wYA+P8GAPr/AwABAP7/BAD9/wEAAAD9/wYA+f8GAPz/AgD//wIA/v8BAAAAAQD//wIA/v8AAAAA///9/wMA+f8FAPv/AgD+////AQD+/wMA+/8EAP3/BAD8/wIA/v8AAAIA+v8CAP3/AQD//////P8BAAEA/P8CAP3/AAAAAAAA/f8BAPz/AQD+/wAA/v/9/wAA/P8BAAAA/P8BAPz//v////7//v/+//r/AwD6/wUA/P8DAP7/AQD+/wAAAAD//wMA+/8FAPz/AgD+//7/BAD9/wIA//8AAAIA/v8BAPn/AgD8/wYA/P8FAP//AQAAAAAA/P8AAP7/9/8FAPX/AwD7/wMAAAAAAAMA+f8KAPv/BwD9/wEAAwD//wEA///+/wAA/v/+//7//f////v/AwD4/wcA+P8GAPv/AAAAAPv/BAD8/wUA+/8EAPv/BAD+/wMA/v8DAAAAAAAAAP///v8BAAAA/v8DAPv/BgD6/wQA+/8DAP3//P8BAPf/AwD5//3//f/7/wAA+f/+//7/+/8CAPr/AwD3/wcA+/8FAP3/AgD//wUA/P8KAPr/BgD9/wQA/v8CAP//AgD4/wcA+f8DAPf//f/9//3/+/8AAPv/AgAAAP////8BAP//BAD//wAA///9/wMA/f////7//v8AAAAA/v///////P8AAPj///////n/BgD3/wAAAgD6/wYA+/8GAP7/AgD//wIAAAAAAAEA/v8BAAIA///+/wQA/v8BAAQA//8FAP//AwABAAIABQD+/wEAAAD+/wAA/v8BAP7/AwD+/wIAAAAGAPz/CwD9/wgA/v///wUA+/8BAAAA+/8FAPn/AgACAPv/BQD8/wAAAwD5/wcA9P8JAPX/BgD5/wQA+v8FAPv/BAD5/woA9v8IAPn/BgD7/wYA/f/+/wUA/P8BAP7/AgD8/wYA+f8IAPn/BwD6/wQA///9/wgA+f8JAPv/AwABAP3/BgD7/wMA+v8JAPb/CwD5/wIAAwD5/wkA9/8HAP3/AwD8/wUA/P8IAPv/AQD///3/BQD+/wYA+v8IAPv/AwD+/wIABAD8/wUA//8EAAEAAAD//wQA//8GAAAAAQAAAAMABAABAAIAAQADAAQAAgAFAAIAAQADAP//AwACAP//AgACAP7/BgD9/wUA/P8EAP////8FAPn/BQD9/wIAAQD9/wYA+f8JAPv/AwD//wEAAAADAP3/BQD8/wUA/v//////AQD+/wMA/P8GAPv/BQD8/wAAAAD+////AAD+//z/AgD6/wUA+v///wIA/f8FAP3/AQAGAPz/BwAAAAEAAgADAP3/CAD7/wMAAQD8/wAABAD9/wMAAAD8/wcA//8CAAUA+P8HAPz/AwD//wAAAQABAAAABAAAAP7/AQD+/wEA/v/+/wIA/v8FAPv/BAD6/wMA+v8FAP3/AgAAAAIA/P8JAPv/AAAGAPn/CQD9/wYA//8AAAcA+v8JAP3/AQAFAPn/CQD6/wcA+v8DAPz/AAD+/wAA/P8CAPn/BgD6/wcA+v8FAP7/AwACAP//BQD//wMAAgD//wkA+/8GAAIA/v8JAPv/BAAEAP3/BQADAP7/BgD9/wQAAQAAAP7/AwAAAAEAAAD7/wMA+/8DAPz////+/////v/+/////f/8/////v///wEA/P8DAPz/AQD7/wQA+/8FAPn/CAD6/wcA/P8FAPf/CQD1/wYA/f/+////AAD8/wEA+v8AAPv/AQABAP7//f8CAPz/AwD9//v/BgD4/wIA///9//7/AAD//wEAAQADAPv/CAD3/w0A+v8GAAIAAAABAAEAAAD//wUA/f8CAAQA/f8FAPn/CQD3/wkA+/8CAP7/AwD///7/AgD//wIAAQD9/wIAAAAAAAEAAAD//wAA/v8DAAAAAQAEAP//BAAFAPz/CQD8/wUA/v8BAP7/BwD7/wgA/P8CAAIA/P8CAAEA/P8EAAEAAAAEAP3/AQAAAP///f8FAPn/BAD8/wIAAQD8/wgA/P8EAP//AAACAAEA/P8EAPr/BAD8/wAA/////wEA///+/wAAAQD8/wUA+v8HAPz//v/9//r/AQD5/wIA+f8DAPn/AQACAP3/AwD7/wQA+f8JAPf/BwD5/wMAAgD//////v//////AgD7/wMA+v8EAP///v8IAPj/CAAAAAIAAwD+/wMA+v8AAAAA/v8EAP3//v8FAPv/BwD8/wQAAgADAAMA//8GAP3/BgD//wQAAgAAAAQAAgAFAP3/BAD+/wEAAgAAAPv/BAD6/wQA/v8CAAAABAD4/wsA9v8HAPv///8AAPz/AwD9/wQA+/8DAPn/AgD8/wMA/P///wAA/P8DAP///v8AAP//AQD+////AAABAP7/AwD+//7/AgACAP7/AQD7/wQA//8CAP//AgD/////BQD+/wIA//8BAP7/AwD4/wUA9v8HAPf/AwD9/wEA/P8CAP////8EAPb/AgADAPr/CwD0/woA//8FAAEAAAAFAAAABAD7/wEAAAD3/wQA9/8CAPn/AAD8/wMA+///////AQD+/wIA/f/8/wQA+f8HAPr/AwADAPn/CgD3/wsA9v8KAPv/BQAAAAEAAAAAAAQA+v8LAPX/CgD7/wIABwD7/wgA+P8LAPv/BAD9/wEA/P8EAPn/AwD9//////8CAPz/BQD8/wIAAgADAP3/BQD8/wMA+/8CAPz/AwD7/wMA/v8AAAMA+/8IAPn/BQD9/wAAAwD9/wUA9v8MAPT/CQD5/wAAAQD7/wEAAQD9/wQA+/8AAP///v///wEA/f///wAA//8EAAIA/v8DAP//AAACAP3/AgABAPn/AgD///7/AgD6/wQA/v/7/wYA9f8KAPj/AgACAP7/AQD//wAABAD8/wcA9/8JAP3/AwAFAP7/CgD6/wcA/f8AAAUA/f8FAAIA/f8MAPr/CQD6/wMABAAAAP//BAD6/wkA9f8IAPf/BAD4/wAAAQD8/wEA+/8CAP3/BAD8/wUA/////wEAAAACAPr/BgD4/wMAAgD5/wcA+/8AAAIA+f8IAPj/BwD8//7/BAD6/wMAAQD8/wYA/v/9/wUA/f8FAPj/BgD3/wcA+v8BAPn/BAD7/wMA///7/wQA/f8FAP7/AwD+/wQA//8DAP3/BgACAP7/BgD6/wUA+/8BAAEA/v8BAAQA/P8DAP3/BAD8/wYA+P8EAPz/AQACAPf/BwD7/wMAAAD+/wMA/f8EAP7/AAD+//3//v8EAPv/AQADAP3/BwD8/wQAAAACAAEAAgD+/wQA/f8DAP7/BAABAAIAAAAAAP//AQAAAAEA+/8EAPf/DAD2/wUAAAD8/wUA//8AAAQA+f8BAP7/AQD8/wIA/f8BAPz/AAD6/wIA+/////r/AwD7/wIAAAD/////AgD//wMAAQAAAAAAAwD8/wQAAgD7/wkA9/8CAAAA/P8BAPz//v8BAAAAAQABAPv/BwD8/wQA/v/9/wQA/P8DAP3/AwD7/wYA+v8FAP3/AgAAAAEAAgACAAAABAD+/wMA//8CAPz/BQD7/wUAAAACAP//AAAAAAEAAAD9//7/AgD8/wMA+v8AAPz//P8BAPr/AgD8/wAA/f8AAAAA/v/+/wIA//////7/AAD//wIA+v8GAPv/BQD8/wIA/f8BAP3/AwAAAAAA/v8BAAAA///+//7/AgD///3//v////z/BAD5/wIA+/8AAP3///8BAPz/BAD7/wMA/P8FAPr/BgD///7/BAD8/wMA/v/7/wYA+/8HAPr///8EAPv/CAD9/wEAAgD9/wMAAQAAAAUA/v8JAP3/BgD+/wIABAD9/wAAAgD7/wYA/P8CAP//AgADAP//AQD//wQA//8BAAEA+/8FAPn/BAD7/////v/+//7/AAAAAP7/AgD9/wMA//8AAP3/CAD0/woA9v8EAPz//f/9/////P/+//7/+f8BAPj////6//3/+//6/wEA+v8AAP3//v////3/AAABAP3/BAD5/wUA/P8BAAMA/v8FAP3/AQABAP//BgD9/wEAAgD+/wcA/P8KAP7/AwAEAP//BgD8/wQA/f8BAP//AgAAAP//AgD//wcA/f8JAPj/BQABAP7/BgD4/wYA/f8AAP3/BAD7/wYA+/8EAPv/BAD5/wYA+//9/wMA+v8GAPz/BQD9/wMAAQD8/wQAAAAAAAUAAAABAAAABAD8/wsA9/8JAPr/AQAEAP7///8DAPr/AwAAAPr/CQD6/wUA+/8CAAAAAAD//wAA//8DAAIA+f8JAPz//v8EAPj/AwD///v/AwD9////AAAAAAIA/f///wAAAAAAAP////////z////9//3/AAD7/////v8BAP///P/8/wIA+v8GAPj/BwD7/wQAAQD//wMAAQD//wQA/f8DAAMAAQD//wUA/f8AAAYA/P8MAPP/DAD7/wMABwD+/wgA+/8GAP7/BAAAAP7/AQD+/wMA//////v/BAD6/wQA/f8AAAAA//8DAPv/BgD5/wYA/f8GAP3/BAD//wUA//8CAAEAAgD+/wQA/f8FAP3/AgD//wMAAwABAAAABAD9/wUAAQD7/wMA/f8EAP7/AAAEAPr/CQD6/wQAAAD+/wQA9/8MAPb/CQD7/wEAAgD//wIAAgD//wUA/v8DAPj/CAD7/wUA///+/wIA/f8EAPz/AAABAPz/AgD/////AQD9//3/AQD+/wAA///+/wAA/f///wAA/v8CAPz/BAD9/wIAAQD9/wcA+P8IAPv/AwABAP//BAD//wAAAwD8/wUA/f8AAAIA/f8CAP//AQD8/wUA+P8HAPv/BAD9/wEAAgAAAAIA///+/wEAAQADAP7/AQD///7/BQD6/wYA+/8EAAEAAAAFAP3/BAACAP//AgADAAAABAABAAQAAAADAAAABAABAAEAAAADAPz/AgD+////AwD8/wIA/v8AAP3/AgD8/wMA/f///wEA/P8BAP///P8CAP3///8BAP7/AAD9/wYA+f8GAPr/AgD/////AAD///r/AwD5/wUA+/8CAPr/AQD+//7/AQD///3/AQD7/wAA/P8BAP7/AAD9/wEA/v8CAP7/BAD7/wUA/f8BAAYA+P8IAPz/AgAEAPv/CgD4/wMA/v///wQA+/8EAP//AwAAAAMAAQADAAIAAAAFAP//BQD9/wUA/v/+/wIAAQD//wMA/f8BAAIAAAADAPz/AwAAAAEABAD9/wEAAAD+/wMA/////wAAAQD+/wIA/f8AAAEA/f8BAP///v8EAP3/AAAAAAIA//8FAP3/AwD+/wQA/P8IAPj/CAD6/wYA/P8DAP7/AgD+/wEA//8BAP3/AQD9/wQA/P8CAP//+/8GAPj/BwD4/wUA+////wEA+/8DAPz/AQD9/////////wAA//////7////9/wAAAgD4/wYA+P8EAP3////+/wAA//////7/AAD9/wMA/f8DAP7/AAADAP7/AgACAP7/BgD9/wUA//8DAAIAAAACAP7/AgACAAAAAQADAP//BAD+/wMAAgD//wUA/P8GAAAAAgADAAIAAgAEAAMAAQAFAP//BwABAAQAAwACAAEABgD9/wYA/f8FAP//AQACAP7/BQD7/wYA/P8FAPj/BQD8/wYA+/8AAAIA//8AAP///v8DAPz/AwD9/wIA///9/wMA+f8FAP3///8DAPr/AwD+////AAD+/wEAAAD8/wMA/P8DAPz//P8DAPf/BwD3/wUA+P8EAP3/AAABAP//AgD//wIA/f8GAPv/BwD7/woA+v8HAP3/BQD//wUAAAAEAP//AQAEAP7/CAD6/wgA/f8DAAAAAgAEAP7/AwD//wIA+/8GAPv/BgD8//3/AQD8/wQA+v8DAPr/AQD///z/BAD8/wEAAgD7/wUAAAD8/wUA+/8EAAEAAAAAAAUA/f8GAPr/CAD5/wgA/v8CAAQA//8EAP3/BQAAAAEAAwACAP7/CAD+/wIAAwABAP//BQAAAP//CAD7/wgA/v8EAPz/CAD2/w0A9v8JAPz/BAAAAAIA//8HAPv/CAD9/wQA//8FAPn/BwD7/wQA//8AAPz/BQD8/wIAAAD//wEA/f8EAP3/BAD9/wEAAgD//wIA///9/wQA/P8FAPr/BAD9/wMA/////wMA+/8EAPz/AQD+/////v8CAP7/AAACAAAAAwD//wMA/f8GAPz/BQD+//7/BgD6/wQA/P8AAAEA/v8DAPn/CAD6/wUA/v8CAP7///8BAP//AwD8/wUA+P8HAPn/BQD//wIA//8EAP7/AwD+/wMA//8CAAAA/f8CAPz/BAD+/wEAAAABAAIAAAACAPz/AwD+/wUA/P8GAPf/CwD2/wcA+f8CAP7/AQAAAP//AQD+/wMA/P8HAPr/CQD7/wUAAAABAAIAAgD+/wQA//8BAAAAAgAAAP//AQD9/wUA/P8BAAMA/f8FAPr/BQD9/wMA/v8BAP//AQD//wMA/P8DAPz/AwD+/wIA/////wEA/v8CAPv/BwD4/wcA+/8DAP////8CAP//BAD8/wQA+/8FAPv/AgD+/wAA/f8AAAAA/v/+/wAAAAD+/wEA+////wMA+P8KAPf/BQD9/wMA//8BAP//AAD9/wQA9/8IAPn/AQD///z/AAABAP3/AgAAAP//AQACAPz/BAD9////AgD+/wEA//8AAP///v8CAAEAAgAEAPv/CAD9/wUA+/8FAPz/BwD7/wYA+v8GAPj/BQD8/wEAAQD//wAAAQD+/wQA/v8AAAEA/v8CAAAA/f8FAPv/AwD+/wAAAAACAP//AAAAAAEA/v8DAPv/BQD7/wMA/////wMA+/8BAP7/AAABAP7/AQD+/wEAAAADAP7/BAD//wIA/f8FAPv/BgD7/wQA/P8FAP3/BAD//wEA//8EAPz/AwAAAAEA//8BAAEAAQD///7/AAD9/wIA+v8CAPv/AgD8/wEA/v8AAP3///////3/AgD8//3///////3/BAD2/wkA+f8BAP3/AgD9/////v/+//3/AgD7/wMA/f8AAP7/AQACAAEABAAAAAEAAwD+/wYA/f8GAP3/BAD+/wEAAgD8/wcA+v8IAPr/BAD//wEAAAACAP//AgD+/wIAAgD+/wIA/P8BAP3/AAD+/wAA/v8AAP///v8CAP7/BAD9/wIA/v/+/wIA/P8GAPj/CAD1/wkA9/8FAPv/AwD6/wQA+f8CAP//+v8FAPv/BAD6/woA+P8HAAAAAgADAAIA//8DAAIAAAAEAP//AgAAAAIA/f8EAPz/BAD//wIA//8EAPz/BAD9/wQA/P8GAPn/BgD7/wMAAAD//wEAAAD+/wIA/f///wAA//8BAP///v8BAP3/AgD8/wEAAAD+////AQD9/wAA//////3/AQD///3/AgD5/wIA+/8AAP7/AQD+/wAAAgD+/wEAAAACAAMAAAADAAAAAAAEAP7/BQD+/wUA/P8JAPz/BgD+/wQAAwD+/wUA/f8DAAEAAQAEAP//AwD9/wMA/v8BAAIA+v8EAPz/AgD+/wMA+v8HAPj/BAD9/wIAAAD9/wEA/P8EAP3/AQD8/wMA/P8CAP3//P8DAPj/BgD7/wAAAAD+/wAAAgD+//3/AgD8/wEAAAAAAP//AAD//wAABAD6/wUA/v///wMA/f8DAAEA//8BAAIAAQABAAQAAgABAAUA/P8IAP3/BAACAAEABQABAAMAAAAAAAIAAAAAAAYA+f8GAP3/AwAAAAUA/f8GAP3/BQD//wIAAQD8/wgA9/8JAPr/AgAAAP3/BAD7/wUA+f8HAPf/BQD7//7/AwD9/wAAAQD8/wIA/P8BAPz/BQD6/wMA///7/wUA9/8DAP7/+v8FAPr/AgD9/wMA/////wAA//8DAAAA//8BAAAAAQD///7/BAD3/wkA+f8DAP3/AQD+/wQA+/8CAAEA/v8DAP7/AQAAAAIA/v8BAP3/AAD+/wIA/f8CAPz/AwD+/wEA//8AAAIA/v8DAP//AAABAP7/BAAAAAAABAD//wcA/v8EAP7/AwABAAIAAQABAAIA//8GAPn/CQD7/wQA/P8DAP3///8AAAAA/f8CAPr/BQD7/wMA/v///wEA+/8CAP7/AgD9////AwD5/wgA+v8DAP3/AQD+/////f8EAPr/AQD9//3/AQD///z/AgD7///////9//3/AAD9/wAAAgD6/wMA+//+/wIA+v8DAPr/AAD///7//v////////8AAP///////wEA/v8AAP//AgAAAAIAAAABAAEAAAABAAEA//8EAP3/AgACAP3/BgD8/wQA/P///wYA+v8IAPn/AwD+/wIAAQD7/wgA+P8JAPr/AwD9/wEA/v///wEA/f8EAP//AQABAAAAAwAAAAMA//8CAAEA//8FAP3/AgACAP7/AgABAPv/BgD6/wUA+v8AAAEA//8DAPz//v8DAPr/BQD5/wIA//8AAP//AQD9/wEA+v8FAPn/BAD8/wEA///9/wIA/v8AAAIA+/8CAP7/AAADAPn/BgD7/wIA///+/wIA/P8DAPb/BwD3/wIA/f/7/wMA+v8CAPr/AgD6/wQA+/8CAAAAAQAAAAAA/v8CAP7/BQD5/wMA/P8DAP7/AQACAPv/BwD4/wgA+P8DAP///f8FAPn/BgD7/wIAAAABAP//BAD7/wMA/f8AAAIA//8BAPz/BAD8/wQA/v8CAAAAAQABAAEAAgD+/wcA+v8IAPn/CQD9/wQAAQABAAEA/////wEA//8AAP//AQD///7/AAABAAEAAQD//wEA//8DAP7/AQABAP//AwD+/wEAAgD9/wcA+f8FAPz/AwD7/wQA/f8AAAEA/////wMA+f8IAPn/BAD///3/AQD///3/AAD9//v/AgD8////AwD4/wUA+/8EAP7/AAACAP////8DAPv/AwD//wEA/f8CAPz/AQD+//7/AAD6/wgA9v8HAPn/AgD+/wAA//8BAAAA///+////AgD9/wYA+f8EAP3//////wAAAgD6/wUA+f8HAPv/BQD8/wMAAgABAAAAAwD+/wUA/////wYA+f8LAPb/CQD6/wMA/f8BAAAA//8DAPz/AgABAP//BAD//wIAAAACAAEA//8AAP7/AQABAP3/AQD8/wMA/v//////AQD9/wIAAAD+/wcA/v8FAPz/BgD9/wQAAAAEAAAABAD//wIAAQABAAIA/f8EAP7/AwABAP3/AwAAAP//BAD+/wAABQD5/wcA+f8FAPr/AQD8//3//v////3///////z/AwD6/wIA///+/wYA+P8JAPj/CQD6/wUA/v8BAAAAAAAAAAEAAAD+/wIA/f8CAPr//v8BAPv/AgD7/wIA/f8BAAAA//8DAP3/BAD//wQA/v8BAAIA/////wYA9/8JAPn/BwD+/wIAAAAAAAIAAwD+/wYA//8GAP//BwD//wIAAwD9/wMAAwD8/wMA+v8CAP7/AgD///3/AwD8/wIAAgD9/wcA/P8EAAIA//8EAAEA//8FAP7/BQD//wIA///+/wUA/f8CAP//AAAAAAMAAgD//wQA//8BAAAAAQD//wMA/f8CAAAAAAADAPv/BAD7/wQA+v8DAP3/BAD7/wgA+v8HAPz/BAD//wUA//8EAP//AwABAAAAAgD+/wIAAgD+/wcA/f8EAAAAAgAAAAMA//8CAAEA//8CAAEA/f///wIA/f8BAAAA/f8BAAEA/f8DAP///v8GAPn/CQD5/wgA+P8GAPv/BQD//wAAAwD9/wIAAAD//wUA+v8CAAAA/P8DAPz/AgD///z/BAD7/wYA+/8DAPv/BAD8/wMA//8DAP7/AgAAAP//BQD8/wQA//8BAAEAAQD//wIA/v8GAP3/BQD8/wQAAQADAAEAAQADAAMAAAACAP//AgACAP7/AgD+/wMA//8BAP//AAABAAAAAQD+/wUA/P8EAP7/BAD+/wMA//8FAPz/BQD7/wUA/P8EAP3///////////8AAPz/BgD3/wgA+v8EAP//AQAAAAIA///+/wUA/P8FAP3/AwD9/wYA+P8MAPX/BwD//wIAAAABAAEAAwAAAAMAAQAEAAIAAQAGAP3/CAD9/wYA/v8DAAAABAABAAMA/f8EAPr/CAD4/wYA+v8FAPz/BQD4/wcA+P8FAPv/AwD+//7/AwD9/wIA/f8AAAAA/f8CAPv/BAD9/wAAAQD7/wUA+v8FAPv/BQD+/wMAAAABAAAA//8DAPv/AgD7/wEAAAD9/wEA+/8BAP//AAADAPj/BwD8/wMA//8BAP//BgD8/wUAAAAAAAQA+/8GAAAAAgABAAEA//8DAP//AwAAAAEAAQADAAAABAD9/wcA/f8BAAIA/f8GAPv/BAD8/wQA/v/9/wEA/v/+/wEA+f8DAP3///8CAP3/AgD//wIA///+/wQA+f8FAP3///8CAP7/AgABAAEA/P8EAPv/BQD9/wAAAgD9/wQA/f8CAP3/AwAAAP//AgD9/wIA/P8DAPz/AgD9//3/AwD8/wMA/f8AAAEA/v8BAP7/AwD+/wMA+v8HAPf/CgDx/wwA9P8IAPr/AAD+/wAA//////7/AAD+/wIA/f///wAA+/8DAP3/AAD9/wAA/P///wEA+/8EAPr/AAD8///////9//7//f/9/wAAAAABAP//AQD9/wQA+f8GAPz/AwD///7/AgAAAP7/AAAAAAAAAgD9/wQA+/8GAPz/BAAAAAIAAQACAAAABQD7/wcA+P8FAP3/AAD///////8BAPz/AwD+/wEAAAD9/wQA/P8EAP///P8GAPr/BAD9//7//v8BAPv/AgD4/wIA+v8BAPr/AAD+//3////7/wAA+/8AAP7//v8AAP3/AAABAPv/AgD7/wAAAwD7/wIA/P/9/wIAAQD+/wAA/v8AAAMA/f8BAAEA/P8CAP7/AAABAP3///////3/AAD9//v/AgD+//3/BAD5/wQA/f8CAAAA/f8EAP7/AwD9/wIA//8DAP//BAD9/wIAAQD8/wUA+/8CAP3//P8BAPr/AQD6/wIA/f8BAP3/AAD9/wQA+/8IAPn/BwD5/wMA/v8AAAEA+/8DAP3///8DAP7/AQACAPz/AwD+/wEAAQAAAAAAAAAAAAAA/v8EAP7/AQABAP///v8DAP3/AQD+/wAAAAAAAAEA/////wMA/f8GAP3/AQD9/wQA+/8FAPz/AQD+//3/AQD9/wAAAAD9/wAA/v///wAA/v8BAP////8AAP//AgD+////AgD9/wMA+/8FAP3/AwD9////AAABAPz/AQD///7/AwD8/wEAAAD//wMA/v8EAP//AwACAP3/AwD///7/BQD7/wQA//8AAAEAAQAAAAQA/f8EAP//BAD9/wUA/P8AAAQA/f8DAAMA/f8CAAAAAAAAAAAA/f8DAP7/AAD//wEA/v8CAP3/BAD8/wMAAAD//wIA/P8EAP//AQD//wAA/f8CAAAAAAABAP//AgD9/wIA/P8FAP///v8EAP7/AQAAAP//AgAAAAQA//8CAAAAAgABAAEAAAAAAAMA/v8FAP7/AgACAP//AwAAAAUA/P8HAPz/BAABAAAABAD6/wYA+/8EAP//AQD+/wQA/v8AAAAAAQD//wQA/P8CAP//AwD+/wIAAAD//wQA/P8HAPj/BgD6/wQA/P8DAPz/AgD6/wQA+v8DAP3//v8CAPv/AwD9/wIAAAABAP7/AQAAAAAAAAD//wIA/v8FAP7/AAACAP//AgAAAAIA/P8FAPz/AQABAPz/BgD5/wcA/f8BAAUA/f8DAAMA/f8FAP7/AgADAAIA//8DAAEAAQACAAAAAQACAP7/BQD//wUA//8CAAEAAQD+/wMA/////wIA/v8BAAIAAAD8/wUA+/8HAPv/BwD8/wYA/f8EAPz/CAD5/wcA/P8AAAYA+P8IAPn/BAD8////AgD5/wMA/P8BAAIA/P8EAP3/BQD+/wYA/P8IAPr/BgD//wAAAwD+/wIAAAD+/wMA/v///wQA/P8BAAYA+f8HAPn/CAD7/wUA+/8FAP3/BgD7/wUA/f8DAPz/AwD8/wQA/P8BAPz/BAD7/wMA+v8EAP7/AQD//wIA/f8DAPz/AgD+/wAA//8AAP//AQACAP7/AQABAPz/BwD5/wUA/f8CAAAAAQD//wMA//8GAP7/AwAAAAAAAgD//wIA+v8JAPb/BwD6/wQA/f///wMA/f8GAP7///8DAP3/BgD//wEABQD+/wMA/v8DAP3/BQD//wIA//8CAP7/AAAAAP7/AgD//wAAAAAAAAIA//8DAP//AgD9/wQA/P8DAAAA//8BAP///v8DAPz/BAD7/wQA/f8EAP7/AAADAP3/AgABAPv/BAD+/wAAAwD+/wAAAQD9/wMA/v8AAAIA/P8DAAAA/f8HAPr/AwAAAAIAAAABAP//AgABAAAAAgABAP7/BQD8/wIA/v/+/wIA///+/wQA+v8IAPr/BwD9/wEAAgD//wQA/v8FAAAAAgAAAP7/AwD9/wMA+/8CAP//AQD8/wkA+P8JAPj/BQD+/wIAAAACAP3/BQD9/wIA/////wAA///+/wEA/v8AAP///v8EAP3/BAD8/wMAAAAAAAIAAAD8/wIA/f8CAAAAAQD//wMA/f8DAP//AwD9/wYA/f8CAAAA/f8FAPz/BAD/////AwABAAAAAQD/////BQD9/wMA/v/+/wMA/f8BAAEA/v8EAP3/BwD5/wYA/P8DAAAA/v8DAP//AQABAP///v8BAPz/BAD6/wMA+P8HAPf/BgD5/wIA/v8BAPv/BAD7/wMA+/8CAP3/AQD+/wQA//8AAAEAAAACAAEAAAAEAPz/BgD+/wQA//8DAP7/BgD+/wIA//8CAAIAAAD//wIAAQD+/wEAAAD//wIA+v8EAPv/BwD7/wMA/////wYA/f8CAAAAAAABAP///f8EAPn/AwD8/wIA+v8BAP3/AwD5/wYA+P8JAPn/BQD9/wEAAAD/////AgD9/wUA+v8GAPn/BgD6/wMA/f8FAPr/AwD9//7/BgD4/wQA/v///wIA/v8DAAAAAAACAAMAAQADAP7/AgACAP7/AgABAP7/AQD//wEA//8BAAEA/f8EAAAAAAACAP3/AgD//wMA/P8EAPz/AwD+/wAAAAAAAAEA/P8FAPz/BAD/////AAD+/wMA//8BAP//AQD+/wYA+f8KAPj/BAD///7/BAD7/wEA/P8EAPv/AAAAAP//AwD/////AQD//wEA/P8DAP3//f8CAPz/AgD+//7/AgD+//7/BAD6/wYA+v8DAAAAAgD//wMA/f8FAP7/AQAAAAIA//8EAP3/BwD7/wYA+/8EAP7/AgAAAAMA//8AAAQA/v8AAAIA/f8EAP///f8EAPv/BgD3/wcA9f8JAPj/AgD+//3/BAD6/wYA+f8EAP////8CAP3/AAADAP3//v8CAPr/BQD7/wAAAAAAAP//AgD8/wcA+/8EAPv/BAD//wEA/v8BAP7/AgD/////AQACAP3/AAADAPr/CAD6/wIA/v///wAAAQD///7/AgD+/wQA/P8CAP//AQD+/wIA/f8GAPv/BgD8/wQA/f8CAP7/AQABAP7/BgD5/wQA///+/wEA/v////7///8AAP7/AAD9/wIA/v8CAP7//v8CAP////8BAPz/AwD+/wIAAAAAAAMA/v8DAAAAAgD//wMA/v8EAP3/AwD+/wIA/f8EAP7/AAAEAPj/BwD5/wMAAgD6/wYA+f8JAPj/BwD7/wIAAAD9/wYA+/8DAPv/AQD+/wEA/v8CAP3///8CAPn/BAD3/wUA+f8DAPr/AgD9////AQD7/wMAAAD8/wUA+v8HAPz/AwD9/wUAAAABAP///f8CAP3/AAD+//7/BQD9/wEA//8BAAIA/P8AAP7/AQD9/wIA+/8CAP////8DAP3/AwD9/wQA/v8EAAAAAgACAAEAAQD//wMA/P8GAP//AgADAAAAAwABAAMA/v8DAP7/AwD//wEAAgD6/wkA9v8EAPz//v8CAPz/AQD+/wAA/f8DAPn/AwD8///////9/wEA/P/9/wUA+f8HAPr/AQD+/wIA/f8CAAEA/f8CAP3/AwD//wEA+/8DAP7///8CAPr/AwD+/wAA/f8AAPz/AQD+/wIA+/8DAPv/AQAAAP3/AAAAAAEA//8BAP3/AQACAPr/CAD7/wMA///+/wUA/f8EAP//AgABAAIA/f8HAPv/CAD7/wsA+v8JAP3/AwAEAPz/BQD8/wIAAwD9/wMA/v///wYA+/8EAPz/BAD///v/BgD3/wUA+v///wEA/P8EAPr/AwD7/wQA/P8DAAAA/v8DAPr/AwD+/////v8AAP//AAD8/wAA/P8AAP7//f////r/AAD9//z/AgD1/wMA+/8AAPz/AQD7/wIA/f8CAP////8CAP7/AgAAAAEAAgADAAAABQD8/wcA/f8GAP7/AAADAP7/AgACAAEAAwD+/wUA/v8EAP////8CAPz/AQABAP//AgD+/wMA//8DAAAAAQAAAAEA//8AAP3///8DAPz/AQAAAP3/BgD8//7/BQD4/wQA///9/wIA/v/+/wQA/f8FAPz/AwD9/wIA/f8EAPz/BQD//wEAAAACAP7/BgD9/wYA/P8DAP//AgD+/wMA+/8CAAAA+v8KAPj/BgD9/wEAAwD8/wMA/////wMA/f///wEA///7/wYA9v8GAPv/AAD+/wEA+v8EAPv/AQAAAPv/BQD8/wMA/f8EAPv/BQD6/wIA/v////7/AQD/////AAAAAPz/AgD9/wEA//8CAP7/AgACAAEAAgD//wIAAgAAAAQAAQACAAAABAD9/wUA/P8FAAIA/v8CAAAAAgADAAEA//8EAAAAAgAAAAAA//8BAP7/AgAAAAIA/P8BAP3/AgD9/wAA/v8CAP3//////////v///wAAAAAAAAEAAAABAAAA//8CAP//AQD///7/AwD9/wIA//8CAAQA/P8LAPb/DQD4/wgA+/8FAP7/AQAGAPz/BgABAP//CAD7/wUA/v8AAAMA/f8DAP7/AAAFAPr/BgD9/wMAAAABAAEA/f8CAP7/AwD8/wUA+v8FAP7/AAD//wMA+v8HAPj/BQD9/wEA/v8BAP//AAACAP7/AQD//wIA/v8EAPv/AgAAAAMA/P8GAPn/CQD8/wAAAQD8/wYA+P8HAPr/BQAAAP//AQAAAAIA/v8FAP3/AgADAP7/AwD8/wIAAQD//wUA/v8CAAAAAgAAAAUA/f8GAP7/BQAAAAUA//8DAP3/AgACAP//AwD//wEA/////wEAAQD9/wIA///+/wYA+/8DAAIA/f8DAP3/BAD8/wQA/P8EAP7/BQD+/wIAAwD6/wQA/f8FAPv/AwD8/wIABAD/////BAD5/wkA9/8HAPr/BgD7/wUA/v8DAP////8AAP//AQD//wMAAgD9/wIA/f8EAP//AgD+/wIA/f8HAPr/CAD7/wMA/v8AAAIAAAD9/wMA+/8FAPn/CAD4/wUA+/8DAPz/BwD8/wMA//8CAAEAAgD/////AgD9/wUA+f8FAPj/AgD8/////f8DAPn/BAD8/wAA//8CAPz/BgD7/wIAAAD9/wUA+/8DAAEA/f8EAPz/AwD9/wQA/P8DAP//AAAEAPv/BwD9/wUA/v8CAAMAAAAEAP//AgAEAP7/BQD8/wAAAwD//wAAAAAAAAMA//8DAAAAAAADAAAABAD6/wMAAAD//wMA/P/8/wUA+P8GAPr/BAD5/wYA+f8GAPn/BgD+/wEAAwD//wIABAD7/wYA/f8DAPz/AgD8/wEA/v/9/wEA/f8BAAIA/f8EAPz/AQABAP3/AQAAAP7/AwD9/wMA//8CAAAA//8DAPz/AwD8/wAA//8AAPz/AwD8/wIA+v8DAPz/AwD8/wQA/P8CAAIA/P8GAPz/AwD+/wMA/P8EAP3/AwD///7//////wEA//8BAAAAAQAAAP//AwD8/wcA+/8EAAEA//8AAAMA+f8JAPj/BgD8/wMA//8AAAMAAQADAP//BQD//wUA/v8GAPv/CQD8/wMAAgAAAAAABAD+/wMA//8CAP//AQADAP//AQACAAAAAAADAP3/BgD//wIA//8AAAMA//8AAAIA/P8IAPb/BgD5/wIA/v/9/wIA+v8DAP3/AAD9/wUA+v8GAPz/AAAAAP7////9/wAA/f8DAPv/AwD8/wAA/v8BAP7/AAD9//z/AgD9/////P8AAAAA/v8BAAEA/v8IAPv/AwAAAAEAAAAAAAIA/v8AAAAAAQD//wMA/v8DAAMA/v8FAAAAAQADAP3/CAD4/wkA+f8GAPz/BAD9/wIAAAD//wUA/P8EAPz/AgD+/wEA//8AAAIA/f8GAPz/BAABAAAAAAAAAAMA/P8EAP7/AAABAP7/AQACAP7/AgD+/wMA//8AAP3/AwD+/wIA+/8BAP//AAD9//////8BAP7///8AAP3/BgD5/wYA/f8EAP//AQACAAQAAAADAP//AQAFAPz/AwD9/wEA/P8DAP3/AAABAP3/AAAAAP3/BAD8/wMA+v8FAAAAAAAFAPn/BgD7/wYA/v/+/////f/+/wAA/f/9/wEA+/8BAPz/AAAAAP//AAD+/wAA///9////AAD8/wMA/P8AAAEA/P8AAAMA/f8CAPz/AgAAAAAAAAD9/wIA+v8GAPr/BAD8/wIAAAABAAEAAAAAAAAAAwD+/wAAAgD7/wYA+/8DAAAA/v8GAP3/BQD//wIAAAADAP3/AwD+/wMA//8AAP//AgD+/wEA//8AAAAA/v8AAP7/AgD6/wIA+f8FAPn/AwD7/wIA//////7/AwD5/wYA+f8DAP7//f8CAPv/AwD5/wIA//8CAP3/AgD9/wQA/f8FAP7/AwAAAAAAAQADAPv/BAD7/wIA/f8BAP3/AAACAPn/BwD4/wgA+/8FAP3//v8FAPv/BQD8/wMA/////wMA+v8KAPr/BwD7/wUA/P8CAPz/BAD7/wQA/f/9/wQA/f8AAAAAAAD9/wEA/v/9/wQA+//+/wAA+f8CAPz//v/+//7///////7/AgD9/wQA+/8AAAIA+f8IAPj/BAD8/wEAAQAAAAEAAQAAAAMA/v8EAP7/AgD+/wAABAD9/wMA/v8BAAEAAQAAAP7/AgD6/wcA+/8EAPz///8EAP3/AgD9/wEAAAABAP//AwD//wEAAQACAP7/BAD+/wMA/f8EAPz/AAABAPr/AwD9/////f8AAP7/AAD+/wAA+/8GAPv/AQAAAP3/AwAAAP7/BAD7/wMA+v8HAPb/CQD5/wIAAAD9/wMA/////wAAAAD+/wMA/P8EAP//AgAAAAEAAQD///7/AQD+/wIA/P8FAPn/BQD5/wQA+v8DAPz/AwD7/wMA/P/9/wEA/P///////f/+////AAD8/wAA/f////3/AAD6/wQA9/8EAPj/AAD+//3/AwD7/wQA/P8FAPf/CAD3/wkA/P///wIA/f8FAP3/AQAAAP//AgD//wIA/P8GAPv/AwABAP3/AQAAAP///v8AAAEA/v8EAPz//v8EAPr/BgD6/wIA/f///wAA/f8CAPv/AwD9/wAA//8BAAAA//8CAP3/AQAAAAEA//8BAP//AAAAAP///f8DAPv/BQD+//3////9/wIA/v/9/wMA+/8DAPv/AAD+/wIA/P8CAP7/AAAAAP7/AwD9/wMA////////AwD7/wcA+/8DAP7/AAAAAP//AAD7/wMA+v8EAP7/AAACAP3//f8EAPz/AgD//////v8BAPr/AwD6/wQA+/8DAPv///////7/AQD7/wQA+P8HAPf/AQD+////AwD7/wQA/P8FAP7/BAD9/wYAAAD+/wQA/f8EAAAA/v8EAAAA/v8GAPf/BgD///z/BQD8/wIAAgD+/wAABAAAAAAAAwD9/wQA/v8BAAIA/f8BAP//AQD+/wIA+/8IAPr/BAAAAP7/BQD+/wEAAQABAPz/BQD7/wYA+/8FAPz/BAD9/wMAAQD+/wUA/P8CAAAA/v8EAP3/AgD+/wAA/v8DAPv/BQD4/wcA+/8CAP3/AgD9/wYA9/8GAPr///8DAPn/AwD+//3/BwD6/wcA+/8FAPz/BAD8/wQAAQAAAAEAAAADAP//AwD9/wQA//8EAP3/AwABAP3/BAD+/wAAAgD//wQA/v8FAPz/CAD6/wkA/P8IAPz/CgD3/woA/P8DAAMA//8EAAAAAAD//wMAAAAAAAQA//8CAAAAAgAAAAEAAgD//wEA///+/wQA+v8HAPn/BAD8/wIA/P8DAP3/AQAAAAAAAAABAP7///8AAP////8AAP////8DAPv/BAD+/wEAAQAAAAEAAgACAAAABgD9/wgA/f8FAP//BgD//wQA/v8FAP//BgABAAEABAACAAMAAQAAAAQAAAAEAP//AwABAAEABQD+/wQAAAD+/wYA/v8BAAMA/f8DAP3/AwD+/wIA/P8FAPv/CAD2/wgA9v8KAPz/AgAAAAEA/v8EAPr/BAD6/wMA/P8CAPz/AQD+/wMA/v8FAP7/AwADAP//CQD6/wQA//8BAAMAAQD+/wQAAAAGAP7/BQD+/wQAAgD9/wYA/f8HAPz/BAABAAAABQD+/wYAAQAFAP//BAAAAAEAAAD+/wMA+v8EAPv/BQD6/wIA//8AAAAA//8BAPv/CAD5/wUA/f///wMA+/8DAPn/AwD9/wEAAgD5/wUA/f8DAP7/AQAAAAAAAQACAAEABQAAAAUA//8HAAIAAgAFAP3/CAABAAQAAwACAAUAAQACAAEABAD//wgA/f8CAAUA/v8FAAAA//8GAP7/AgD//wIAAAABAAIA/f8GAPv/AQD//wIA/f8BAP3/AQAEAPv/BAD7/wMA//8CAAAAAwD+/wMA/f8BAAEA//8CAP3/AgD///7/AgD8/wEA/f8BAP///P8DAPv/AgAAAP7/AwD+/wEA/v///wEA/v8BAAAA/v8CAAAAAAABAAIA/v8DAP3/BgD6/wcA/f8BAAIAAQACAAMAAAACAAMAAQADAP3/BAD6/wgA+/8BAP3//v8AAAEA+v8CAPz///8AAP3///8BAPz/AQD9////AgD9/wMA/v8BAAMA/f8DAAAAAQABAP//AwD+/wgA/P/+//v//f8AAP7/AAAAAP7/AwD7/wUA+/8BAPv////+//7//P8CAP//AAAGAPr/BQD//wAABQD9/wUA//8AAAIA//8CAAIA/P8DAP7/AwD9/wMA//8DAAEAAAABAAUA/P8JAPr/BwD9/wUA//8CAAAAAQABAAMAAgABAAMA/v8FAP3/AgAAAP//AwD+/wEAAAD+/wMA/f///wEA/P8BAPr/AAD7/wAA+/8CAP3//v8BAPz/AQD///7/AAD/////AgD+/wMA/v8CAAAAAQAEAP///v8EAP3/AgD+/wAAAAD+/wIA/v///////P8DAPn/BQD5/wQA/v8BAAQA+/8FAP3/AwD+/wEA//8BAAEA+v8HAPf/BgD+//7/AQD6/wMA+//8//7/+v8CAPj/AgD6/wIA/f8AAAEA/f8GAPz/AgACAP3/BAD//wEAAQAAAP//AwD8/wUA/////wQA/f8GAP//BQD//wQAAgABAAIAAQAAAAIA/P8EAP//AwACAAEAAwADAAMABQADAAQAAQACAP7/BAD6/wQA/v///wAA////////AgD//wIA/v8BAAAA/v8AAP7/AAD+/wMA+f8GAPr/AQD9//7/AgD8/wQA/P8DAP7/AwD7/wMA///8/wQA+P8FAPr/BAD8/wAA//8AAAMA+/8EAPv/BQD8/wQA/P8FAPv/BwD6/wUA+f8LAPf/CgD4/wYA/f8BAP7/AgD+/wQA/P8BAAMA/P8HAPz/AAABAP7/AwD//wQA/f8DAAAA//8CAAIAAAD//wUA/P8HAP3/AQACAP7/BQD8/wQA/f8CAAIAAAACAAIAAAACAAMAAQADAAIA/v8EAP3/AQD//wAAAQD9/wQA/P8FAPz/BQD7/wYA+/8FAPr/BAD8/wMA/////wMA/v8BAAMA+/8FAPz/AgABAP//AwD8/wUA+P8IAPj/BgD/////AgAAAAMAAAACAP//AQAAAAAA//8BAPz/AQD+/////f//////AQACAP//AQABAAIA//8FAP7/BAAAAP7/BwD6/wUA/v///wIA//8DAP3/AwD+/wAABgD5/wgA9/8DAP7/AAD//wEA+v8EAPz/BQD8///////9/wEA/f/9/wIA+v8EAPr/AQD8/wAA/f8CAPz/AwAAAAAAAAACAP7/AgABAP3/BgD7/wcA+/8CAAEAAAADAP////8FAPn/CgD4/wgA+P8HAPn/AwABAPv/BwD6/wIAAAD//wIA//8BAP7/AgD+/wEAAQD//wMA/f8FAP//AwACAAAAAwACAAAABQD+/wYA/P8GAP7/AwACAP7/BAD8/wMA/f8DAP3/AQD7/wEA/f8DAPz///8BAPv/BQD6/wMA/f/8/wEA/v///wEA/f8AAAAA/f///wIA/v8DAP7/AgAAAAMA/P8GAPf/BgD5/wUA+v8CAPz/AAD+//3//f/9//r////9/wEA+v8CAPz/AgD8/wAAAAD+////AQD9////AAD//wAAAAABAP7/AwD9/wMAAAAAAAEA//8AAAIA/v8AAAEA/v8DAP//AQAAAAIA//8EAPv/CAD4/wUA/v///wAA/v8BAP7/BAD8/wUA/f8BAP//AgD//wEAAAD//wMAAAAFAP7/BgD+/wMABAD+/wUA/P8EAP7/AwACAP7/BQD8/wMAAAD+/wUA+f8HAPz/BgD+/wIA/v///wEA+/8AAP///v///wIA+v8FAPz/AgACAP3/BAD+/wQA/f8DAP3/AQD8/wMA+v8DAP3//f8DAPj/BgD7/////v8AAP3/AQD5//3////8/wAA/P8BAPv/AwD7/wEA/v8AAAEA/P8DAPr/BAD8/wEA//8CAPz/BAD8/wAAAwD9/wIA/f8DAAAA//8DAP3/BAADAP7/BAD9/wIA/v///wEA/v8DAPv/AwD9/wMA//8AAAIAAAAEAP7/BAACAAAAAwABAAIAAQABAAIAAwABAP//AgAAAP//AQAAAP3/AQD+/wEAAgD+/wIAAAD+/wAAAAD//////v/+//////8AAAAA///+//3/AwD9/wMA/f8CAP//AgD9/wYA+/8FAPv/BwD6/wQA/v8BAAIAAgAAAAAA//8BAAAA/P8CAPz/AwD8/wEA//8AAP////8AAAAA//////3////9//7//f/9//7////8/wEA///9/wYA+P8IAPj/AwAAAP7/BAD8/wQAAQADAP//AwD//wUAAAD//wMA+v8CAPv/AQD+//7/AQD9/wIA/v8AAP///f8EAPz/AQD9//7/AgAAAP//AQABAP7/BAD+/wQA/f8HAP7/AgADAP//BQD9/wcA+v8HAP7/AgABAAAAAQABAAIA/P8DAP3/AQD8/wMA+P8EAPr/AgD8/wIA/v8AAAAA//8AAAIA//8DAP///v8GAPj/AgAAAPv/BAD6/wQA/f8EAPz/AgADAP7/AwD6/wMA//8AAAYA9v8KAPX/BwD7/wAA/f////z/BAD7/wMA/f//////AAAAAAEA/v8BAP//BAD8/wkA9/8LAPb/CgD4/wcA//8AAAIA/P8CAAAAAAABAP3/AwD8/wIA/v///wIA/P8DAP7/////////AQD+/wUA+P8IAPr/BQD+/wUA//8EAP7/AQAAAP//AgD//wQA/v8FAP//BAAAAP7/BQD//wIAAAACAP7/AQABAPz/BgD2/wgA+/8AAP////8BAAAAAQD//wEABAD7/wUA//8AAAIA/v8AAAEAAgD8/wcA+P8GAPn/AgD+/wIA/v8BAPz/AwD8/wMA/v8CAAEAAQD+/wAAAwD+/wEA/v/+/wEA/f8AAP7/AAD//wIA/P8CAAAA/v8EAP3/BQD//wAAAwD//wIAAgD//wIAAQD9/wMA/P8BAAIA/P8BAAMA/P8FAPz/BwD5/wsA9v8KAPn/BwD8/wAAAQAAAAAAAgD//wIA//8BAAEAAAD///7//f8BAP7//v8DAP3/AgAAAP//AwD+/wEAAgD9/wYA+P8IAPz/AwADAP7/AwABAP3/BQD8/wUA+v8FAPj/BwD8/wAABQD5/wcA/f8AAAUA+f8DAP7/AQD+/wQA+P8HAPr/AAACAP3/AgD8/wAAAQABAAAAAgABAP//AwABAAEAAgD//wQA/P8EAPv/BgD6/wUA+v8AAAEA/v8CAP3/AAD//wEAAAAAAAAAAAABAAEAAgD9/wUA/f8FAP7/AgD9/wMAAAD//wEA/v8BAAAAAgD9/wIA/v8BAAIA+/8GAPn/BAD8/wEAAAD9/wIA+v8DAP3/AQD6/wMA+/8BAPz/AgD5/wIA+/8CAAAA/P8DAP3/AAABAP//AQAAAAMA/f8HAPj/CgD6/wcA/P8DAP//AQABAAIA/P8DAP7/AQD//wQA+f8KAPb/BgD5/wMAAAACAPz/////////AgD9/wAA//8BAP3/AgAAAP7/AwD9/wQA//8DAP3/AwAAAP//BQD7/wMA///8/wQA+/8FAPv/AwD8/wIA//8BAP//AAD/////AAADAP3/AgAAAAAAAgD8/wIAAAD+/wEA/v8AAAAAAAD+/wEAAQABAAAAAQD//wMAAAAAAAUA/f8FAPz/AgACAPv/AwD9//7/AwD+////BAD3/woA+P8IAPr/BwD9////BgD1/wkA+P8EAP3/AgD6/wYA+f8EAP7//P8CAPr/BAD5/wUA+/8BAAAA/f8EAPr/BQD8/wIA/P8DAP3/AwD6/wIA/f8AAAAA/f8AAP7/AwD8/wQA/P8EAAEA/v8GAP3/AwABAAAAAQABAAEA//8BAP7/BAAAAP//AAABAAEAAgABAP7/AwD+/wMAAAABAAIA//8AAP//AgD+/wAAAgD+/wMA/v/+/wMA/f8AAAEA/v8BAP7/AgD//wQA//8BAP//AQACAP7/CAD4/wYA/f///wgA9f8KAPf/BQD///z/BwD6/wUA+/8EAP7/AAACAP7/AQD9/wEA//8CAP3/AwD9/wQA//8DAAAAAQD+/wIA/v8BAAAA//8BAAAAAQABAAQA/P8DAP7/AAAAAP//AAD///3////+//7/AQD+//7/AAD+/wMA+/8DAPz/AQABAP3/AwD9/wIA/////wEAAAD//wEA/P8CAPz/BwD4/wUA/f/+/wYA/f8AAAEA//8CAP//AgACAAEA//8CAP7/BQD9/wEAAAABAP//AAD9/wEA/v8CAP//AQACAP7/AgABAAIAAQAAAAUA/v8FAP7/BAD+/wUA/f8EAP//AQABAAAAAgACAP7/AgAAAAEAAQAAAAAA/f8DAPv/BgD5/wMA+f8HAPr/AgD+//z/AwD8//7/AgD7/wMA/f8CAP//AQD+/wMA/P8GAPv/AwADAPr/CQD3/wYA/P8FAP3/BAD7/wQA/P8DAP3/AAAAAP//AAABAP//AQD7/wMA/f8AAAEA+f8GAPj/BAD5/wQA+/8DAP7/AQD8/wcA+P8HAPv/AAACAP//AwD//wEAAgAAAAEAAQD//wEAAQD7/wYA/v/9/wUA+f8EAP7/AQD8/wUA+/8EAP3/AwAAAAIA/v8AAP//AQADAP7/AAACAPv/BwD4/wYA/f8BAAUA/P8JAPj/CAD//wIAAAAEAAAAAwAEAP//BgD+/wMAAgADAP7/BQD9/wAAAgD6/wUA/P8DAP7/AQD7/wMA/P8CAP///v8BAP7//////////v8BAPz/AQAAAP3/AgD9/wIAAQD8/wQA+/8CAP7/AQD9//7////9/wAAAAD8/wEA/P8CAPv/AwD7/wMA+v8FAPX/BgD5/wMA/P8BAAAA/P8FAPv/BAD///7/AwD//wQA+/8GAPr/CwD3/wgA/f8DAP7/AAD8/wUA/f8DAP//AQACAAIAAwABAAMAAQADAAEAAgABAAQA+/8EAPv/BQD//wAAAQD+/wQA/P8IAPj/BQAAAP//CAD3/wcA+/8CAAIA/f///wQA+/8FAPz/AQD+/wAAAQD+/wEA/f8BAAIA/v8BAP//AwAAAAIA//8CAAAAAQABAAEA/v8FAPr/BwD6/wUA+/8FAPz/AwD8/wAAAAD//wMA+/8FAPj/BQD8/wEA/v8AAP7/AQD6/wMA+/8DAPv/AwD5/wMA/f///wAAAAD8/wMA+//+/wEA///9/wMA+P8FAPv/AQD+////AQD8/wAA/v8BAP7/AwD8/wQA/f8DAP//AgAAAAEABQD7/wgA/P8FAAEAAQD//wIA/f8HAPv/BQABAP7/CAD5/wYAAwD7/wkA+v8GAAEAAAAFAP//BQACAAMAAgAFAP3/CQD//wcA//8GAP3/CAD//wEABAD+/wMAAAABAAIA/v8DAP//AQABAPz/AQAAAAEAAAD8/wUA/v/+/wQA+P8HAPv/AQACAPz/BAD5/wUA+P8HAPr/AwD+////AAD//wAA/f8BAP//AQD9/wIA/P8BAP//+/8DAPb/CAD3/wMA+/8AAAAAAAD//wIA/v8CAP//AQADAP7/AgACAAEAAwABAAAABAD//wUAAAAEAP7/BAABAAAABgD8/wYA//8CAAEAAAAGAP3/BQD8/wMA/v8BAAMA+v8HAPb/BgD7/wEA/v/+/wAA/f//////AAD+/wIA/v8BAAAAAQD+/wMA/f8EAP//AgAAAAQA/v8EAP7/AgAAAAEAAwAAAAQA//8EAP7/BAD//wMAAwD//wMAAQAFAP3/BgD9/wQAAAAEAPz/CQD8/wUAAwD+/wEABAD6/wkA+v8FAAEAAAAAAAUA+/8LAPj/CQD9/wIAAwAAAP//AgD9/wMAAgD6/wYA+/8CAAEA/P8GAPn/BgD8/wQA/f8CAP//AwD+/wIA/////wAAAAD//wEA//8AAAEA//8BAAAA/f8DAP3/AAD///3/AAAAAAEA/v8DAP//AgACAAAAAAAEAP3/AwD/////AwD+/wAA/v8BAPz/BAD9////AgD//wAAAgAAAPz/BQD4/wkA+v8EAPz/AQAAAP7/AgAAAAAABAD9/wUA/v8BAAEA//8EAP7/AAD///3/BAD8/wUA/P8HAPn/CQD5/wQA/////wQA/f8DAP7/AQABAPz/AQD///3/BAD+////AgD7/wYA/P8FAP3/BAAAAAIAAQABAAIAAQABAP7/BQD8/wQA//8BAAAA/v8DAP3/AwD+/wMA//8BAAAA/v8FAPr/BgD9/wAAAAAAAAIA/f8DAPr/BwD5/wYA/P8BAAAA/v8CAPv/BwD6/wQA/v///wMA/P8EAP7/AwAAAP3/BAD8/wIA/f8DAPz///8DAPf/CQD1/wYA/v/9/wIA+v8DAPz/AQABAP7/AAABAAAAAwD7/wQA/P8BAAEA+f8HAPf/BQD7/wAA/////wEA/v8CAP7/AgAAAP//AQD+////AQD+/wIA/f8CAP3/AQD+/wQAAQACAAIAAAADAAEA/v8DAP3/BwD5/wkA+P8FAPv/AgD9/wMA/P8FAPz/AwD+/wMA/v8BAAAAAAAAAAAAAAAAAAEA/f8CAP//AAACAP//AAAAAAEA//8CAPv/BQD7/wMAAAD9/wQA+/8AAAAA/v8BAAEA/P8EAPv/BAAAAAMA/f8GAPv/BAAAAP3/BAD9/wIAAQD//wAABAD9/wQA/f8EAP7/AgD9/wYA+P8KAPj/CAD7/wAA///+////AAD7/wIA/f///////v8BAP3/AAD9//////8AAPn/AgD+//3/BAD3/wYA/f/+/wEA/v///wAA+f8HAPT/CAD4/wMA/P8FAPf/CAD6/wkA/f8FAP//AQADAP//BAD//wMA//8CAP7/BAD7/wcA+/8HAPv/AwAAAAEAAAABAAAAAQD//wIAAwD7/wYA+P8FAPv/AQD9/wAA///9/wMA+/8EAPz/BAD///7/AwD5/wcA+P8IAPf/BwD4/wQA/f8BAPz/AgD7/wQA+P8FAPn/AAACAPr/BwD6/wMAAgAAAAUAAAABAAQAAQABAAMA/v8FAAIA/v8DAP7/AQABAAAAAAACAP3/BwD6/wgA+f8GAPv/BgD8/wIAAAD9/wUA/P///wYA+f8FAPz/AAD+/wAA//8CAP///v8BAPv/BQD9//3/BAD5/wMAAAD+//7/AAD8/wIA/P8CAPz/AAD+//3//f///wAA/v8BAAAA//8AAAAAAgABAAMA//8FAP7/AwACAP7/BwD7/wYAAAABAAMAAgD//wYA/v8EAP7/BAD9/wYA/f8HAP3/AwD//wAAAgD9/wQA+v8EAP3/AAABAP7/AQD//wAA/v8BAP//AQD+//7/AAACAPz/BAD5/wUA+v8EAPr/AQD///r/BAD7/wQA+/8BAAAA/f8HAPT/CQD2/wYA/P8BAAAA//8AAAAAAAAAAAEA/v8CAP//AAAEAPz/BQD8/wQAAgAAAAMAAwABAAQA/v8FAP7/BwD8/wcAAwD+/wcA/f8CAAIAAAD//wMAAQD7/wgA+/8EAAEAAwD8/woA+f8IAPz/BAD9/wQA/f8CAAAA//8BAP7/AgD+/wEA/f8CAP7///////r/BwD3/wgA+v8BAP3/AQD8/wIA/v///wEA///9/wIA+f8DAPz//f8DAPv/AgD9/wIA//8BAP3/AwD+/wcA+P8GAPz/BAD9/wMA+/8AAAEAAAD+/wAA/v8BAAIA/P8CAAEA/v8CAP7/AgAAAAEAAAD9/wEA+/8EAPv/BgD4/wQA/v/9/wcA+P8HAPz/AQACAAEA/f8EAP3/AgAFAPr/CQD9/wYAAQAAAAMA/v8FAP3/BwD8/wUA/v8DAAAAAgD//wIA/f8DAPz/AgD9/wIA/P8BAP7/AQD/////AAD//wIA+P8GAPn/BwD7////AAAAAP//BAD6/wIA///+/wAA/v8AAP7////9/wEA+v8FAPv//v8BAPr/AwD7/////f8AAP7/BAD4/wYA9/8CAAAA+v8FAPf/AgD+//7///////7/AAD//wAA/f8EAPr/BAD8/wEAAgD//wIAAAACAP7/BAD//wEAAQAAAAIA/v8FAPr/CAD7/wUA+v8DAP//AgAAAAAAAAD+/wMA/f8CAAEA//8CAP7/AgD+/wEA/f///wEA//8CAAAAAQAAAAIAAgAAAAEABQD5/wwA9f8IAAAA/v8FAP7///8DAPz/BQD7/wQA+P8GAPv/BAD///z/AwD8////AwD5/wQA/f8AAAIA/P8CAP3//v8DAPr/AQAAAP3/AwD8/wIA/f8DAPz/AgD+////AgD8/wIA/v8DAPr/AwD9/wIA/v8AAPn/AwD7/wAA/f/+//3///////3/AAD7/wEA//8AAAEAAAAAAAAAAQD+/wIAAAD9/wMA+v8FAP3/AQABAP3/AwD+/wIA/P8CAPv/BgD8/wAA//8BAP3/CAD3/wgA+/8CAP//AAD+/wMA/f8BAAAAAAD+/wMA/f8EAAEA/f8FAP3/BgD8/wYA/f8DAAAAAgADAP//AwABAAIA/v8AAAEA/P8FAPr/BQD9////AQD+/wQA//8BAAAAAAD//wQA/P8EAP7/AQAAAAEAAAABAAIA/P8EAP3/AAABAP3/BAD8/wEA//8BAAEA/f8BAP//AAABAP3/AQD7/wQA+v8AAPv/AAD9/wEA///8/wMA+/8FAPz/AgABAP//AgD9/wIA/f8EAP3/AAAAAAAA/P8CAPv/AgD7/wUA9/8JAPT/CgD0/wkA+P8FAP7/AAD///7///8DAAEA/P8FAPf/BwD4/wUA///8/wMA+/8DAAEA/v8EAPv/CAD8/wYA/f8DAAEAAAACAAEAAAACAP7/AgAAAP7/AAABAP3/AwD+/wEA//8DAP7/BAD//wIAAAADAP//AQD+////AgD+////AQD6/wcA+f8CAAAA/P8EAPv/BQD+/wMAAgACAAAAAgD//wMAAQAEAP//BQD9/wUA/P8HAP3/AAADAPv/CQD5/wcA+v8GAP3/AQADAP7/BAD8/wMA/P8CAP7//f8AAPn/AAD+//3/AQD7/wMA+v8CAP3/AgD+/wMA/f8DAAAAAAABAAAAAQD//wIA/f8EAP///v8CAP///v8DAPb/BgD4/wMA+/8BAP3/AgD9/wIA/v8EAPz/BQD//wEABAD7/wUAAAD9/wYA+v8CAAIAAAAAAAQA/P8EAAAAAQACAAMAAAAGAP7/BwACAP//BAD8/wYA/v8DAPz/////////AwD8/wIA/P8CAAEAAAAAAAIAAgAAAAMA//8EAAAAAwD+/wYA/P8HAP3/AQAAAAEA//8EAPr/BgD8/wMABQD8/wMAAgAAAP7/AwD9/wQA/////wEA//8FAPr/BAD8/wEA///9/wMA/v8DAP7/AwAAAAEAAgD+/wUAAgD//wcA+v8IAPz/AwAAAAAAAQACAAIAAgABAP//BAAAAAMA//8CAP//AwD//wIA/P8BAP//AQD9/wIA/f8BAAAA/v8EAP3///8FAPr/CAD8/wIA/v8CAP3/BAD//wEAAAACAP7/AgD+/wMA/P8EAPz/AAD///7/AgD9////AQD+/wMA/P8CAP3/AgD8/wQA/f8HAPn/BQD//wAABAD9/wMAAgD9/wUA/f8CAAAAAAADAAEA//8EAPz/BwD8/wkA/P8HAP//AgABAAAAAQADAPz/BwD2/wsA+f8DAAAA/v8EAP3/AgD9/wYA/f8BAAIA/v8GAPv/BgAAAP7/BgD6/wMAAAD+/wQA+/8BAPz/AwD6/wUA/P8BAP//AgD9/wQA//8BAP//BAD4/wsA+f8EAP//AAACAAAAAAACAP//AAAEAPz/CAD5/wgA/v8BAAUA/f8IAP//AgAGAPz/CgD6/wgA/v8BAAQAAQAAAAUA/P8GAPr/BQD6/wMAAAD//wIA/f/+/wMA+/8FAPn/BQD6/wUA+/8FAPr/BAD8/wAA/v///wAAAAD+/wEA/v8BAP3/AgD+/wAAAwAAAP7/BQD+/wMA/f8EAPn/BgD3/wUA+/8CAP7//P8AAP//AQABAP3/AAACAP//AgD//wAABQD9/wUA//8AAAYA9/8LAPv/BAACAAAAAQABAP//AwABAAIA/v8GAP3/BwD9/wUA//8AAAMA/f8FAP7/AAAAAAEA///+//////////7//P8CAPv/AgD/////AQD+/wQA/f8BAP7/AQD9/wMA+/8EAP//AAACAP//AQD+////BAD6/wgA+P8EAAEA/f8FAPr/BQD+/wIA//8AAAAA/f8CAP7//v8AAP3/AQAAAP3/AQAAAP7/AwD8/wIAAAABAP7/AAACAPr/CAD1/wYA+f8FAPv/AgD6/wQA/f///wEA+/8FAPr/AwD8/wEA/P8BAP7///////z/AQD8/wEA/v///wAA+f8FAPj/AQAAAPf/BQD5/wEA//8CAP7/AwD8/wMA+/8CAAEA//8CAP7///8CAP7///8BAAAAAAABAP7/AQABAAAAAQACAP//BQD//wIAAwD/////AgD8/wIAAQD5/wYA+f8FAPr/AwAAAP//AAD+/wMA/f8FAPv/AgD//wAAAQD8/wMA+f8DAPv/AAD8/wAA+v////3//v8AAPv//v/+//3//v/+//3/AQD+/////v8BAP3/AAD8/wIA///+/wIA+f8CAP//AQD+/wEA/f8CAAEA/P8DAP////8AAP7/AAACAPr/BQD4/wIA/v/7/////v8AAP3/AgD7/wIA//8CAP7/AQAAAAEAAAAAAAAAAwD+/wMA//8CAAAAAAAAAP3/BAD7/wIA+v8AAPv/AQD6/wEAAAD7/wUA+P8EAP7/AQACAP//AgD8/wIA/v8AAAEA/f/+/wIA/P8GAP3/AAABAAAAAAD//wIA/v8DAP////8BAP///v8GAP3/AQAAAAAA//8DAPz/AgD+/wAA//8CAP///v8EAPv/BgD9/wMA/v8AAAAA//8CAPz/AwD6/wMA+/8DAPz/AgD7/wAA//8AAP7/AQD8/wEAAAD+/wAAAwD8/wAAAAD//wMA/f8AAAIA//8BAP7//v8CAP3/AAD+/wEA/f8DAPz/AgD+/wMA//8DAAAAAwAAAAAAAQD+/wIAAQD+/wMA/v8CAP//AQADAP7/BQD8/wUAAAAAAAEA//8CAP7/BAD9/wYA/f8AAAMA/v8AAAEA+/8EAP///f8CAP///v8DAPz/BQD8/wMA/f8FAPr/BgD6/wcA+v8FAP3//P8FAP3/AwD//wAAAAABAP3/AQACAP7/BAD8/wUA/P8DAP//AQACAAEAAQABAAEAAQADAP3/BAD+/wEABQD8/wYA///+/wcA+/8GAAEA/v8HAPv/BQD//wMAAAAAAP//AgD+/wQA/f8CAP7/BAD8/wQA/f8AAAQA/P8DAP//AAADAP3/AwD+/wQA/v8EAP3//v8DAPv/BQD7/wIA/P8AAP///f8AAAAA/P8EAPr/AwD9/wIA/v8GAPn/BQD8/wMA/f8DAP////8FAPz/AgACAPz/BgD7/wUA/f8CAP7/AAAAAP//AgD//wAAAQAAAAQAAQD+/wYA/P8GAP3/AwACAAEAAwD+/wQAAAABAAEAAQAAAAAAAwABAAMAAAADAP//AwD8/wUA/v8BAAEA/////wUA/P8DAP////8CAAEAAwD//wMA/v8DAP//BAD7/wkA9f8KAPr/AgACAP3///8BAPr/BwD2/wQA+v8FAPz/BAD8/wQA/v8GAPz/CAD7/wcA+v8IAPv/BgD8/wIA//8BAAEA///+/wQA/P8CAAMA/v8CAP//AgD+/wUA+v8GAP3/BQD9/wMA/v8BAP////8BAP7/AQD///3/AQD///3/AgD9/wMA/f8DAP3/AgAAAP3/AgD9/wIA/v8AAAAAAQAAAP//AgD//wIA/////wAAAgD+/wQA/P8EAP7/AwACAAEAAQAAAAIA/v8EAPz/AAADAP3/AQD+/wAAAAAAAAAA//8GAPv/BQD9/wEABAAAAAAABwD8/wMA/v8DAP//BAD8/wUA/f8EAP3/AAABAP3/BAD8/wMA/v8CAAIA/v8EAP7/AgD//wIA/P8EAP3/BAD9/wAA/////wIA/f8DAPv/BwD6/wYA+/8FAP3/AwD+/wAA/v8EAP3/AwD+/wAAAQD/////AAAAAP//AwD8/wMA/f8DAAAAAAABAAMA/f8DAP//AQADAP7/AwAAAAAAAgD/////AAD9/wQA+/8EAP7/AAABAAEAAQADAP7/AAAEAP3/BgD//wQA/v8DAPv/BgD7/wMA/P8BAP//AQD9/wYA/f8BAAIA/f8DAP//AgD//wEAAgD9/wcA9/8GAPv/AgD+/wAA/v8BAP3/AgD+/wQA+/8GAPr/BgD+/wEAAQD8/wAAAQD//wEAAQD+/wQA/v8BAAIA/v8EAP//BAD7/wcA+P8HAPv/BQD+/wIA//8EAP//AAABAP//AgACAP//AAD8/wUA+f8JAPf/BwD9/wIAAgD+/wMA/v8DAP7/AAAEAPv/BgD9/wAAAQD+//3/AwD8//7/AgD7/wIA/f8AAP3/AQAAAPv/BAD8/wIA+v8FAPn/BgD5/wcA/v///wMAAAD//wUA/f8EAAAAAAADAAEAAAACAAEAAAAEAP7/AAAEAP7/AgAAAAAAAgD///7/AgD//wAA///8/wQA/f8FAPz/AwD+/wMAAQAAAAAAAgD///7/AQD9/wEA/v///////P8AAAAA/v////////8DAP3/AgD/////AQD+/wIA/f8DAP7/AQABAPz/BAD7/wMA/v8CAP3/AAD/////AgD7/wcA9/8GAPz/AQAEAP//AAADAAAABAAAAAEAAQABAAEA/v8EAP3/AQAAAAAA//8EAPv/BQD9/wMA//8DAP3/AgD/////BAD8/wIA/v8CAP7/AQD//wAA/v8EAP7/AQACAPv/BAD9/wIAAgD8/wQA+/8HAPv/BgD8/wQA/v8AAAEA/f8DAPz/AQD+/////f8DAPz/BwD6/wMAAQD6/wkA9v8GAPz/+/8GAPj/BgD6/wEA/v8DAPn/BgD7/wMA/f8CAP7/BgD7/wQA//8CAAIA/v///wUA/f8FAP7/AwABAP//BAD6/wYA/v8BAAUA+/8FAAAA//8CAP//AAACAP7/AAAAAP//AQD9/wEA/f8AAP7//v8AAAEA/f8AAAAA/v8CAP///v8FAPn/BwD5/wYA+P8FAPv/AgD+//3/AwD9/wMA/f8BAAIA//8BAP7/AQACAP7/AQD/////AQABAP3/BAD9/wMA+/8HAPf/CwD1/wgA+f8DAP3/AQABAP7//v8FAPz/AwD///7/AwD9/wIA/v8DAAIA+/8JAPf/BwD9/wAAAQD+/wMAAQD9/wEAAAD9/wMA+/8AAAAA/P8BAP///v8AAAAA/////wEA/f8CAAAA/P8EAPn/CAD4/wYA/////wYA+/8EAAAAAgD+/wYA+f8JAPn/BQD9/wMA/f8EAP3/AgABAPv/BgD4/wYA/v/+/wEA//8FAPn/BwD6/wQAAAD8/wUA+/8EAPv/AgD9/wAA/v8EAPv/AwD7/wEA/P/+/wIA+f8EAPn/AwD7/wIA/v/+/wIA/P8DAP//AAABAP//AQAAAAIAAwD+/wIA+/8DAPz/AgD7/wIAAAACAP7/AQD+/wIA/v//////AAD8/wMA+v8EAPz/BAD7/wYA+/8EAP7/AwABAAAABAAAAAIAAgD8/wUA/v8AAAUA/P8JAP7/AAAEAAAAAwD//wAA//8FAPz/BQD7/wQA/P8AAPv/AgD+////AAD+//7/AAD///7//v8BAPv/AQD+//z/AQD7/wUA+/8EAPr/AgD///7/AwD9/wMA/P8CAP7/BAD9/wAA//8AAP///////wAA/v8AAP///v/+/wAA/f8BAP7//v8DAPv/AQD+/wAAAAD+/wMA/P8FAPr/BAD9/wAAAgD//wQA+v8EAP//AQAEAP3/BAD//wIAAQAAAAIAAwD+/wkA/P8HAAAA//8EAP//BAD7/wUA/P8EAP7/AwD+/wIAAQD8/wUA/v8BAPz/AwD7////AQD4/wcA+f8DAPz/AAAAAP//AQD+/wQA/P8EAPv/AgD+/////v8CAPv/BAD4/wMA+/8BAPz//v/9//3////9//z////7//3/AAD8///////+//z/BAD8/wMA/////wEAAAD//wUA/v8FAAAAAgABAAIAAQADAP//AQABAAEAAAAAAAYA/f8GAPv/BwD+/wIA////////AQD+/wIAAAD//wMAAQD+/wcA+v8GAP7/AAAAAP3/AQD//wEA/f8CAP7/AwD+/wEA/v//////AgD9////AgD7/wYA/f8CAAAAAQD8/wUA+/8EAP7/AgACAP7/BAD//wEAAgABAAQA+/8GAPv/BwD5/wUA/P///wMA+v8FAAAA//8AAAEAAQD+/wMA/f8BAAAAAgD6/wYA/P/9/wMA+v8CAP///f8BAP7//P8DAPr/BQD5/wMA/f8CAP7/AQAAAP//AAD9/wMA+f8IAPT/CgD4/wIA///+//////////////8CAAAAAQABAAIAAAAAAAUA/v8DAAIAAgABAAEAAwD+/wQA/f8EAAIA/v8DAP7/BAACAAAAAwD//wMAAQD+/wQA+/8EAPv/BQD+/wIA/v//////AAD+////AAABAPz/AQD7/wMA+v8DAP3/AgD//wEAAAABAP//AQACAPz/BQD8/wEAAgD6/wcA+v8JAPv/BQADAP3/CAD7/wUA/v8CAAEAAgD//wUA/P8LAPj/DAD6/wMAAwD5/wkA+f8HAPr/BgD8/wIAAgD+/wQA/v8DAP////8AAAAAAAABAP7/AgD+/wIA//8BAP//AAD//wAA/v8CAP3/AQABAP3/AAABAAAAAgD+/wAAAAADAPz/BAD8/wYA+v8GAPz/BgD8/w==\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 126_003_2205_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 126_003_2432\n", + "Original Audio: 126_003_2432.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 126_003_2432_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,UklGRiRXAwBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAAZGF0YQBXAwD7//v/+P/6//f////9//j//P8AAAMA+v8CAAMABQACAAIABAAFAAQAAwABAAMABQAFAAUA/f8DAAUAAAAPAAcAAwADAP3/+//9//3/BAABAPr/+f/4//n/9f/5//X/9v/1/wEA+P/7/wAA/v8BAAAA/P/5//b/9f/y//L/7v/3//v/9P/4//j/8//4//j/9v/7//j/+//o/+///P/7/+3/5f/z/+v/7v/w//X/+v/6//n/+v/z//f/9//9/wAAAQABAAEAAwAGAAkAAgD//wQAAwADAAYADAAHAAMABAAHAA4AFgARAAgABgACAP///f/3//T/9P/4//3/+f8EAAEA//8AAPr/+f////7/AAAFAAEAAgAIAAgAAQD7//z/BAADAAcAAQACAP3/+v/7/wIA/P/5//n/AAD4//b/9//v/wAA+v/9//n/8//5/wAA9//1//7/+f/0//T/AAD///r/AwAIAAoADAAKAA0ADQAUABQADgAJAAMACgAKABUABwAEAA4AAwAFAAYAAQAPAAcA//8BAP//+f/8////9v/2//r/8f/y//L/7P/1//b/+//6/wEAAwD///T//P/5//z/9v8AAPr/+v/6////+P/5/wEAAAD+//7/BQD//wAAAwD7//v/AgD3/wUABAADAPr/AgD+/wgA+//9////AQAIAPP////3//7/CAAFAAkABgACAAcAAwAHAAwADwADAP7/BQADAA8A/f/2////+P/1//X/8v/5//L/9f/5//7//P8DAAgAAQD7//3//v/6//z/+v8AAAEA/v/3//v//P/2//r/AQD4//n/+f/+//n/+f/2//D/9f/5//n//f8EAAUABAAGAAUAAgD+/wIAAQD6//X//f/u/wAA//8AAAMA+/8AAPv/AgD9/wEABQASAP7/AQACAAIADAAMAA4ACQABAAcABQD7//X/+P/z//P/9//1//X/AAAAAPz/9v8AAP3/AwD8//b/+v/2//j///8BAPf/7//q/+f/5f/n/+j/5//s//H/8//0//b/9//1//7/+f/4//v/BwAFAAoADAANABUADAAGAAEA/P/4/wcA+//2//P/7//0//H/+f/5//3////0//f/9//3//f///8BAAAA+f/9//j/+P/0//T/+//6/wMA+/8EAAkABAAFAAQABAAAAAYAAQADAPP/+P/2//T/9f/q//L/9//1////AAD//wYAAADx//v//v/8//X/9v/t/+b/7//t//P/+f/+//v/AAACAPX/AwAHAAEA/v/1//3/+//z//b/+P/1//T//v/7//f////7//n/AQD7//v/+f/1//j/9f/y/+7/9f/1/+v/9f/u//D/8v/1//X/7P/v//n/8f/z//n/8//w//P///8AAAYAAgD//wEABgD//wQA+/8BAAwAAQAAAAEA9f/8//j/+v/8////8P/2//r//P/+/wcA/f/3//L/BQD7//P/+P/w/wIA8f/x//P/8P/u////7v/7//X/+P/y//j//////wMABQALAPz/+f8BAAsABwAIAAIA+/8EAAEA+//+//3/9//2//P/9f/7//X/9f/r//P/+v/6//P/8v/0//f/+P/5//v/+//+/wcABgAEAAcAAwAGAAQAAAD9/wEA+f/6//7/+/8FAP//+P/7//v/+P/7//j/9//6//3/+v/8//j/8//3//b/+//z//f//P/1//f//P/8//P/9P/3//P/+//n/+3/+P/7//z/+P////n/+f8CAP3/AwAGAAgAEQAHAAIAAQD//wEA/P/0//n/9v/0/wMA/v8BAP//AAD6//b////5/////v/8//b////8//v/+//5//n/9f/0//P/8P/8//n/9v/x//j/+v/x//P/8f/y//L/7v/t//X/7P/x//D/7v/1//n/+f/4/wMA//8EAAUACgAHAAYADwAMAAkACwAIAAwABgAEAAwADQAPABUAGAAcACYAKAAkACMAGgASABEAEwAOAAcACQARAAYAAgD9/wEA/P/+//z/AgD///j//P/9//z/8/8CAAkA/v8FAAYAAQAQACIAJQAkAB0AFgAIAO3/s/+F/1n/QP93/3gAhgE5AUEApv9j/0H/o//rANsBhgKNAmcBNwCU/4f/hv/4/xkAMf9L/ub9gf1q/Rn+N/5C/ob+8f2+/TH+mv4l/5T//v8OAOn/IgC8ABAB9gB+AdIB/QFsAmgBVwCjADsB2AGZAaUAVACYAPAAogF5ArkC5ALsAqIBUwGvAvQDfgRbA6MBdwBB/zr/BwCT/77+g/0K/HX7H/tf+2b8Bf1i/dv9qv0c/Xv9+P2V/qT/qgAyAT8BlQC8/9H/RwC2ANYAsADm/2n/pf55/hr/jf9T/+L+uP6U/uX+av/z/4UACQFMAVgBKgH3AAABPgFPASsB7QBjAN//kP91/5r/CQB5AHQADgCV/1f/Xv+j/wgAXQBRAOv/n/9L/yv/VP+h/9n/AwANAOv/xP+p/9//SgCYAK8AcgAIALn/tf+9/97/JABcAIoAowCrAKYAlgCKAIgAlAClAIUAJADh/9v/8/8CAO//1v+x/5L/gf+C/4j/ef+E/4L/lP/E////JQA0AEUATQBQAFEAZABzAFYAKgAZAAQA6v/b/6j/nP+m/7H/vf+u/5r/s//a/woALABLAFgAOgDx/6T/jP+s/+3/IwBeAGUAKgABAA4AHQACANn/zP+1/5f/h/+V/9P/+P83AE8AQgBBADgAMwA4ADsANgAeAPP/yv+t/5b/g/+V/53/qf+t/5j/j/+g/9j/AwAUACAAGQAjADcANgBYAFkAQQAkACEAJgAjACMADwACAP3/DQALAPz/CwAQABAAAgD1/+//7f/w//b/AwAFAA0ACwAVABEAAwAVAAoAFQArABgA/v/r/9j/2//g/+X/7P/u//H/5//Q/9L/9/8TABwAEAAGAAYA+v/1/woAEQAQAAEA+//r/+P/6P/b/+L/4v/l/9n/1v/t//7/+v/0/wQA+v/u//L/+f8CAAQADQAKAP//9//2//T/9v8KAA0AAADv/+r/5v/i//H/9P/1/wAA+/8DAAYA+P8KAAkA///3//H/8//3//P/8f///wAA9P/0//r/9P/1//n////9//7///8AAAQABgAKABQADAANAAcACwAIAAwABQAVABwAEAAMABUAEQAGAAUADgAHAP///v/v////+f8FAAYABgADAAEAAAD6//X/AAALAP7//f/r/+X/7P/m/+//+f/6/wQA/f8BAP7/9f/v/+z/6//m//T/8//k/+T/4f/m/+X/3v/o//T/+f/5//z/9//9//3//v8FAAkACwATABEADAASAA8ADQAGAA0AEgAHAAcADAAJAAsACwAOAAkACAAJAAMACwAKAAIAAgAHAAAA//8CAAUABAAAAAUABQAAAPb/+v/u/wEAAwAEAAcAAgAAAPb//f/3//f/+v8MAAMAAQD///v/AgABAAAA/v/5/wAAAwACAAEA/v/1//X/9//z/+7/+/////z//P8DAAoADQAGAPz//P/z//P/+//+//j/+f/8//v/8//0//j/9v/5//z//f8DAAgABwAAAAYA/f/9//j/9//3/wQABQAEAAoACwAFAP7/+v8BABAABwAHAAUA//8BAPn/AwACAP//+//4//n/9v/1/+7/9P/2//r/+f8CAAQAAwAAAPv/AQD//wcAAwAEAAoABQADAAMAAAD4//v///8CAPn/AgALAAwADAADAAMABgADAAsACgD//wYAAAD5//3//P/7//v//v/7//X/+v/1//z/AgAIAAMABgAGAPn/AAAFAP7/9//5//7/AwD8//3/BQAFAAAACwAJAAUAAQD5//j/+//3//r//f/7//v/+//8//v/BQAGAPz////+/wIA/v/+//b/8//6//n/9P/1//b//P/4//X//v/9////AAACAP3/+f/1//n/9f/0//v/AQD+//n/+P8AAP7//P/4//v/9P8CAAEA///8/wAA/f/3//H/+v/4//H/9v/0//z/7P/u//H/8//4//3//P8BAPz/9v/1//j/+P8BAP3//f////z//f/7/wQA/f8BAAUA+f/3//7/9//6//r//v8BAPH/+P8AAP//+//+//z//f8AAP3//P/7//v/+//4//3/AQD6//3/AQD8//3/+P/5//z/+v/2//X/+v/2//D/+P/+//n/7f/z//j/8v/2//P/8f/t//T/9f/x//j/8f/v/+//8f/w/+7/7f/o/+z/8//x/+//9f/z//H/9v/3//b/9f/4//7//f/4//b/9v/7//T//P/5//j////1//H/9f/2//f/9f/w//7/AQAFABIADwAFAP//CgAEAP7////8//3//v////f//f8AAPj/8P/2//r/8P/u//H/7//2//v/BwAAAPz//P/2//L/8f/7//X/8//7//b/7P/s//P/9v/t/+z/+P/9/wMA/f/9//r/8//4//j/9//2//v/+//t//H/8f/1//z/7P/y//H/9f/1//v/AQD///n//P/0//r//f/8//r/9v/4/+7/8v/5//f/+P/3//f/AwAHAP3/AwD//wAA///+/wcAAgAFAAQAAgACAAYABgD///3/+P8AAAQA/f/5//f/9f/z//z/+v/8//z/+////wUABQD4//n/AAD//wQA+f/2//j/8P/y//j/6//w//7//P/u//r/+v/0/wAA/P/6//r/+f////3/BQAEAAIA/f/9/wUA+v/+//3//f/7//j/AAADAP7/AAACAPj//f8CAP//BQAMAAUA+v/5/wkABgAGAAUACwAOAAcAAgACAP3/AgAAAAcABAABAPv/BAD///3//f////v//f/4//3/+f/7/wAA+P8AAPP/9//3//T/8//+//H/+f/7//f/8//z//z//P/3//j//v/5//P/9//8//3//v/4//b//v////r/AQD///n//P/5//7/AAD7//7/+//9/wIAAwD6/wQAAAACAAMACwAKAAgABQAJABAACgAFAPn//f8LAAIA9f/+/woA/f/1/w0AEAD3//v/FQD5//3/+v/v/wgABwALAAQA+f///wYAAgD7//b/9P/+/+//8f/1/+b/8v/2/+T/5f/2//3/4v/4/w0ABgAKAAoACAD+/wEABwD3//X/AAAIABMA8v/p/wAAAwD2/97/6P/y/wQAFwAKAPj/AgAKAAgA5P/t/xAA8//v/x0A9P+//xMANQAHAOb/8v/5/+T///8HAAsAFgD7/wkAAgAcABYA4/8QAAwA9f8RAAYA3f/6/yUA/P/S//L/9//t//v/6v/4/xwABQDx//z/8f8hABwA7P8IAP3/9v8RAPD/7/8MAPv//f/o//D/+P/+/wMA/P/5//v///8DAAUADgAWABIABwDy//f/BAAGAAUABgD+/wEADAALAAUAAQAKAAcABwAAAPz/BAAAAAMAAQACAAcA/f/4/wEABgD7//f/AQD4//D/8//5//X/8v/1/+z/7v8HAPr/6P/1//X//f///+L/7/8LAPv/BAATAO3/6P8QABkAEQAWAAkADwAaAAMA+//8/wAA9//8/yEA9f/N/9j/9P/6/xAAKAD1/9z///8NAAgA+v/2/yoAHgAGABsA6//c/xIA6//I/9X/2P/Q/9H/5P/J//v/DwDm/xMAEADw/wgAFwAkACYAHgD8/+r/7v/c/87/0//a/8r/kf+6/87/lP+t/9j/uP/I/+j/2f8JAHIAlACGAJ0AiQCYAC0ANACTAIgAOgBGAIcA+v/A/8X/3P9CABUA1v8cAEEAPgBcAEgARgBaAFsAfQBMAFkAigANAAUAYwAlABEAHAD0/93/4f/g/+H/7v/1/+f/nf9x/4L/qv92/0z/af88/wr/Rv+F/xj/C/8UAM3/9/7T/w8Aff+8/+b/IQAYAOX/KQASAPD/DQDU/9n/JABLAAYAuP/d//D/vv/E//v/+P/w//X/3/+7/9r/HAAKAP//AQAuAEYAJwBGAGEAXQBYAFIAXwCGAJIAXwAyAFIAQgAHAAQAGwANAPz/DwASAAkA3v/k/97/2f/u//P/5v8CAAIA5v8LAPL/8P8WAAsA9v8DACcAMgAqAD0AUQBTAEQARgBDADgARAAoAPj/6f/S/67/k/+F/4P/YP9y/4L/ef+M/5H/n/+z/8X/6f8BAPH/DwA5ADcAMQA2AEwATAAvAEMAPwAjADYANgA7ACgAJQAiAPr/5v/b/9X/2P/P/8f/vf+w/6n/kv+P/6r/vf+9/77/vf+7/8j/3P/p//b/CgAOAA4ABQAFAA4ADAAYABsAFAARAAAA4P/T/9//6P/n/+b/1f/O/9f/2f/V/+L/6P/U/8j/3P/d/+P/6f/d//H/+//c/+D/BwAaADMAQAAvACoANwA/AEIARABAADsAJQAaAAYA6P8CAAwADAA2AA0A//8cADEAPQBLAF4AOgBcAMMAwwB7AFMARgA/ADkA/f/9/+b/s//l/3//IP9h/6v/I/8s/8j/jv/s/3gAawCPAGMAHQBFAB8A///n/4r/Dv/4/kT/8f7L/kj/g/8N/5/+K/+F/6b/3/8SAAIADQBGADMAHgBmANkA1QCeADkATgD//+H/CwDn/7f//P84AAMALwATACoAdAAtAP3/KgAsACcAZQCFAF8AUABcAEYANwCVAMkAUgBQAJEALAAaAEEAAADo//f/u/+D/zz/Cv81/xP/4/5N/5j/Tf9g/2H/O/+N//r/QgBGADMAiQCHAAsAaQDYAH8AbQBqAEcAWgBTAD8AGQAKADwAFwDR/xQAbAAtAO//DAAeAOD/sP+p/3//n/+o/63/tv/O/wgAxP/J/+H/8f9CAEgANgBTAEMAEAAnAEAALQArAAoAwf/v/wAAyP/2/wQAv/+c/5n/sP/P/6f/xf/b/9f/+P8JABgASwBOADoATABhAMwA2gBhAGoAhQBOAAkA3f/s/9r/6P8CAAwA3f+u/+z/0f+x/woAKgD5//b//v/+/+P/4v/+//b/+//6/9T/uP+//9v/AAC8/3b/uv/D/6j/7v8sAD4AZQBlACwATgBqACoARwBSADIA/v/Q/87/uf/Q/+T/wP/B//T/2P+h/7r/zP/x/zUAPwAdACkAOwAzAC0AJwA2AGIARgAJAB4AJAD3/9r/xf+o/5j/xf/T/5r/s//G/4D/jf/a//3/JgBLAHcAjwB+AHsAcQBcAFUAVwA6AOT/7v8bANv/r/+t/63/s/+5/8//+//c/5z/mP+l/8n//v8PACoARwBPADwAKQA0ADgAGAAKACAADwD0/+n/0//E/8b/4f/q/+P/+/8GAPv/7//5/w8AIQA5ADUAPgBGADcAKgAuABQA9v/x/+7/4//S/8P/x//G/6P/mP+r/8j/y/+5/9j/BADp/8P/yv/g/+7/CAAKABoADQDY/9f/6f/A/8T/8P/6//X/8f/1/wUAEgDX/wYANQAmAA8AGQAXAAYABAAOABcAEgAgABcAEAATAPb/5/8JABYA+//l/9j/xP/V/97/z//a/9r/4f/6/+r/6v8HAPv/3//U/7r/wv/w/+L/uf+4/8X/0//0//7/AwALAAQADQA5AEoAFwAhAD4AGwAVACsAOABIADoACAAKAA0Av//C/zMATAAzAPz/qf+W/73/1P/m/+f/wP++/9H/tP+7/xAAMwAyAAcA4f/g/+P/0//1/1QAYABZACEA0f/a//L/6f8lAFoAQQATAOz/6//S/7P/4v8zAF0AYAAqAKj/lP/K//3/LgAzACsAEgDC/5z/tf/N//L/8//l/+L/wP+x/9D/v/+d/8r/6f+g/47/vf+z/6n/pP+z/7b/m/+B/4f/t//d//f/BAD5/wMAKQA0ACQAHgA9AEwASgBfAFgANQAuACIA+//t/wwAEQAEABMA/P/c/wUAGwAKADYAJQAEACoAOQAuACMAIQAYAAUAGwA3AB4AJwA+AAIA7//8/9f/4P8AANT/uf/c/+T/0f/t/wEA+/8fADwAIgAQABMADgD8/wQAAADe//7/KQAJAOz//P8DAAUA7P/g//n/6//Y/9//zv+//8n/2P/P/83/yf+y/7v/xf/e/+r/FABMAEMA+v/e/wYAFwAWACcANAAkACIAKAAmACUAHAAfAAwA+/8vAE0AJgDz/8//x//M/8//6v8bADUAGQD8//j/+P/r/9z/9/8VAAoA9f/m/+v/8f/p/wQAJgAdAO3/0P/Z/+X/5v/u/wkAAQDn/+v/8f/q////HQAnABIAFgAgACMABADf/9P/8f8uAEsALQAkABcAFwAQAPv/DwAaACEAtv98/5//9//9/+T/1f/X/+n/4f+d/33/4/9QAGYANAABAPX/9//U/9b/OwCRAGcA9//C/+D/4v+6/8b/EwBbAHAAOgAJAAYA+f/U/9X/IQCAAIoAZQB0AJIAdwA7ACgALQAvACcAAADm/9n/wf+j/5f/ov+i/7b/z//E/9z/CwDx/67/sf/p/xcALgAiACUAWQBoAD4AHAAeAHYAsgBsAEYAWQAtAMn/UP8h/4P/0f/B/4H/e//Y/wQAxP+X/9r/MgBXAA8A8P9fAJsATgASAB4AMQA7AAIA0//O/3j/Rv9V/03/Zv/E/wUAEwAYACkAJAAZACsAZACVAHkAagCKAHoAPQDq/6j/yf8XABIA1P/R/+v/1v+n/6T/1f/x/9f/t//6/00APwATABEAKAAzAEIAYwCHAGYAQgBOAC8A+//u/w8AQAAoAK3/j//h/+b/vf/q/0AALQDY/7n/p/+R/6b/6P/m/7H/xv/1//n/8f/m/wgANAAxAA4A/v8cADkAYwBqAEsAOAD7/7T/vf8ZAEwAUQAuAOP/qv+W/7z/AgAqAD4AOgA6ACkA5P+0/9v/LABCAP3/tv+o/7r/vv/A/+3/BADv/93/3//c/9b/7P/w/8L/hv+e//T/BADX/+T/BgAVAAsA+v8EADcAVQA2AB4ANgBCADEAIgAkACgADADr//v/NgApAPj/+f8TAPX/of+e/wQARQAKAKr/pP/W//P//P/5//z/+f/k/7v/tP/i/yMASgBHAAQAw//v/zAAGgDq/+D/8v8LANT/jf+q/+X/0v+o/6P/wv/y/xwA+P/F/9z/KQAZAMv/9f+DAMYAUACN/0n/ZP+i/9X/0f8oADEABACw/5j/zP8TAC0AJgApABEAEAAUACIAIgAtADgAIwDw/+X/HABgAFwAOwA/ADMABAD8/xMAIAAtAD0ALADw/5b/ev+9//j/BQARACIAHADv/8n/0f/h/7X/bf9r/5b/if9i/0f/dP/A/4L/FP8a/6D/IwA0APX/pv+G/5H/gv9q/6z/xv+O/7r/GAA6AEUASwCBAKgAhgCLAJEAZQBxAHEAQwBCAIwA1wCqADEAAgAJAMn/k//9/2AAGQCP/1f/V/9O/1z/zf9CACYAmf9v/8j/8v/P/9n/LABtAF4AIgDO/6P/9P9CABEAqf+L/73/s/8S/4j+E/+s/0X/wv4N/67/0v98/2b/5P9qAJcAuwDPANQAsgBNAAgAIQBiAJwAiAAWALD/eP83/zr/nP8VAFoAGgC3/6b/0P/n/9z/GACOAOQAvwBlABcA4P8dAG8AQAA1AHIATgDy/+L/DgBqANUAqABFADcARQBGAEAA8v94/yD///4h/4r/IwA8AKz/GP+6/pf+B//L/0sAogC5ACwAQf+5/g7/3P9nAIMAnwCIAOX/QP9o/woAYQBAABAAAgDf/3v/Ov9T/3j/kP/Y//H/sf+7/0YAXQALAM3/p//A/xQAVwBxAGEAOABVAI4AjgCIALcAEAERAZkAaADKAOgAYQDh/8f/xv/k/0UAiQAtAKr/d//B/z8AawAzAA0A/f/H/53/nf/E/yMAagBJAB0AJQANAPj/EADy/57/Yf91/7v/6v/o/5X/R/83/0j/i/8JAEIALQDL/1D/Sv+7/0IApQCKACcA9v8VABsAEABSAPQA/gAhAHP/sv9BAHoAQgACAMT/ff+N/6r/DABAAD4Azv9g/3b/x/8bAFcAUwAXABwAXgCZAHkAMQArACAA6f8OAHYAfQDP/wr//P6S/x0AgwCbAF0AMQD3/2v/Xv8TAMwAEwGuAKX/Gf8t/0P/2//DAL0AGwCV/+3+qP7T/kr/JgClAEsAjf8s/0b/pf8oAJ0AmwArAPr/FgAgAOz/lv/v/40ARQC5/6//zP/N/3b/OP/W/8QAOAECAUwAz//1/0IAWgCpADgBwQGQAYIA+f8uABgA4P/t/9H/yv8OAA4ALAAWAGL/Tv/r/wsACQBGAFUATgAVANT/CgA9APv/9f+BAO8AbwB0/23/PgBvAMP/hP/9/zEAzv9w/1X/g/+m/47/mv9H/7b+9f7F/1EAFwCA/57/ZgC4AFkATACxAKoANgApALQAzAAnAJj/cv9R/yT/9P71/gL/9P58/0YAJADD/x0AtAC/AF8ATwC6ABABzABKAGUAhwDq/5//9v/e/57/5P8IANX/vP+C/4X/LgCCAD0ATgBrAFsAjgCGACEAOQCZALQAqgB1ADEANgBeABIAhf+C/9f/xv93/1b/NP8G/wX/K/9q/9v/AQDZ/+7/+//U/xsAwAAYAfgAwADDANUAUACm/wYAmADx/x7/K/9j/3//yP8EAD8AcwAWAIb/wP+GAM0AaQAaADsAbAAyAND/5f9JAP//CP92/r/+df8bACYAgv/r/hj/v/8qAGMA4wBxAYUB2QAZAPP/LwBOAGkAlABnAO3/nf9c/13/z//r/6L/y/9eAGAA1P/O/30A3QA9AFv/MP///8UAagCt//D/XQC7/8f+7P7k/3QAGgCo/6L/kv9L/x3/Z/9sAPcAlQDi/3T/1/+NAMgABgCc/x4AwAAAANv+Ov8VADYAyP9S/6P/uwBAASEBRwFJAbgADABr/3f/gABPAccAjP+Q/lX+o/7N/g7/BgAIAfAAu//U/iX/6f8KAOz/WADtALoAPwBqAN8ABwEWAfcAsQCwALgAhAAzAMz/rv+G/yj/yP6F/sr+E/8S/1b/iP9v/2P/qP9vABUB+gCeAGQAOgAJAN//CgBmAIEAgQB9ADMAzP/G//T/AQAbAF4AMADL/yX/+v4FAHYAnf8i/4//GgBlAC0AUABrAZ0BVQDK/zQAVAAbAK//lP9g/x/+If0k/uL/cAAeAPv/DADY/3D/YP8+ALEBkAK/AZP/4P7s/2kAMQA+AIMAggDu/7b+6v3O/hoAdwA9ALP/Tv8I/5H+tP4GAEgBVgGaAOH/tf/V/wIAxADIAYMBdgDN/2n/cf+4/3EAPwGXADT/7f78/sL+Xv/HAKAB7gCS/9z+8v4k/37/LwA8AA4AJQDs/8n/0P/f/2sAtgBjABUA1f++/6z/qv8sAI8A4P/c/l7+t/7W/34AqABCAXoBowDV/8P/GQBzAKsARgE/Av8BcgBy//3+nP7z/sP/ZACBANj/O/8o/9/+ZP65/vv/+wAAAYQAvf8d/+r+4/6H/0sA0f81/6b/3f+j/6f/x/9YAMUAFQB6/wQAhgBgAFYAYABcAEQA8P+y/x4AsgBYAI3/MP9r/1r/ef76/eD+CAAVAHH/Z/++/73/GQCYAHMAaAC7AL4AaQD+/+L/PABcAKX/7/5N/8X//f6f/VP9R/5L/2X/Sv/E/zMA/v+t/9T/8wCcAmEDaALpAIkA6wCRANf/PQAkAq0D8AH5/UH8fP0v//7/1P+9/8r/XP85/rr9tv59AIkBMgFKAKj/rv9KACMBXQEGAaQAKgBd//v+sP+gAHQA6//H/2X/Lf/q/4EAbwCFAJYAcACnAHUA4v8TAI4AmAAcAEn/Cv99/+b/r/8H/6H+Xv50/vD+4/6I/ov+W////2n/qv6p/kD/AgBPAB4ASwDVANkAbgArAIQAbwDz/wYADgASAEAA8f+3/4L/Lv+6/wQAPf+d/ob+h/6J/h3/ZQDrAOb/of8oAKD/Iv8GAAgB3QAQAML/LgCvAAcAVv8wABcB3ADFABEBKwHsADYAxf8WAK0AFgHjAGAANgAGAFb/8v7P/0QBoAFAADP/+//FADYAIP/F/3sBVgEHAKr/7f8mADcAwwAXAV4AWP/w/m7/kP8//9v/dQAdALP/1f9WALgAkABIAFkABgAZ/37/sQBuAN7/u/99/xv/vf6b/04AVADyALAAvv+F/0j/4f8JAbwAegB1Ac4Av/4o/lj+Fv/h/0//7P5P//D+Vv6W/qD+yv5M/53/+/+h//D+h/8qAOT/XwBJAU0BtABmAOUARAHwAM8AXgGLAYQAgf/I/64A3gATALf/5P9t/7P+hf7r/ub/mQBaAML/n//a/yoAlwDkAPEAaAHPAVMBUAEKAtQBEAEeAXEBmwBk/wr/Gf8+/xf/2P5E/1H/kv7z/hYAuf8J/7L/cQAmANP/ZwBpATcBJQASANYAXQF+AQYBpQC8AIAA5P8PAO4A1gDh/+D/kgAFAAb+h/1E/9//Mv/7/vr+bf+p/9j+f/6I/1oAwf+H/o3+4f95ADMA7v/E/1IA9wCtABAASQDiAC8B2wDq/5b/MgA+AK3/OP/O/yAB/ABa/9P+sP9ZAKUAZwCbAJYAvf/s/tv+gv+PABwBuQAxAO3/JACLAKQAsgDsAL8ANgBR/1L/lwCCAJD+1/3C/iv/sv7u/icACQHlAMT/2f60/xgBqACu/yUABwFbARwBQgA//4n/HQFGAbn/A/8lAPYB0gFbAPv/YQBBAOv/5v+4/4v/Iv98/ov+of8EAPT+Bf/U/0r//P50/5D/+f+lAFgAW/8+/97/ZgBQATAC6wFcATYBIQHyAAMB7ADLANMADQDO/iX+dv6Z/3QAxP82/xIAGQAv//r/VAEcAXgA4P87/1f+zP3K/o8AywB8/5v+2/0Z/Vf9p/5MAGgBKQEJAaYAdv5O/vYAEQNNA9wBjQALAFH/Ef6N/Tj/6ABBATQAHv5x/er9MP5m/ykBxgG0ASUB3f+t/5kAQgGrAcwBSQHdAFYAf/8z/wr/TACDASQAE/8o/9j+i/5t/tf/DgLrAYkAOP9J/hD/gQACAET/ZwDgAGUAkgA+AHX/r/+AACIBvwCFAO0BvAH3/2IAqwBf/3r+VP1M/dj+//71/s3/kv/1/jz+zP3Y/kQAHwHGATUCyQEoAcEA0v9e//3/1ABsASMBUgBq/93+cP9G/0j+HP+wAGoBHgH+/vD94f4U/z//DQBJAHoAnQBLAKwAfAA6/xYAZAFQADcApQEFAWX/Gv8F/wr/sv/i/5n/pgCYAZoA7f9AAIgAFwDw/gv+ZP5C/8D/wv+d/8v+pf3I/tD/Rv+1/7EAdQEUAbT/Pf8+/4v/iAB1AVkBwP9p/pz+lP8tAAUA4v+eAIsAgf9C/5f/HAAsAVoBggCXACgBEgF4AFUAVwFsAWEA4QCXAboAmf/I/kD/3wAPARYA2f9xAJ4AGwCZ/yoAHwFPAeMAawDp/8//oADOAEIAGgCZ/47+hP7h/z8AmP6a/RX+tv5S/7b/lP+W/+D/0/9h/w3/+v6u/78AmgCA/8T+Ef9j/wD/1f+uAE8A1f+R/1cAKQDL/tz+HADQAD8Ae//Z/44AcACTAGMA1//o/8//QwDm/9f+GQDBAJ3/tf9z/w3/2v+v/2D/WQDeAAsB7QD3/wH/2f5A/2b/of8WANwAdwDY/5kAyQCVAHMAwf8D/3z/lwBvAS0CvwBJ/vb+aQBOALoARQGfAR0BSv+h/pT/LgAeACIAdAD7AAYARv6R/qD/EQBCAFcA/QD1ABwAGQDF/7r/igClAG4ATf83/l/+SP4L/9z/Rv9z/zQAUgBuAMwA7ADlAOsA0AAGAc4B9QFDAccA+//k/kH+pf6B/6r/9v9rAJAA8v8F/63/XgBFADcA+v+KAOL/5P2v/sv/O/9o/3T/7f76/mX/Vf/I/+L/Z/+6/xYAvv8O/2b/0f/X/zkALQAdAD8Amv9O/zQA1gCDAGoAWQGUAVwAFgDMAJAA1P+w//H/5v+c/wf/rf5s//H/6v+HAKIAgf/j/sb/RgHQATEBKgHmATYBiP9w/yMAQgA4AN//a/+M/5L/xP7o/moA6AAoACUAtwDUAJoARwAlAFkAqACKACgAuwBAAaAA9/9N/4j+3/5HAPMAKABb/zz/8f6J/uP+DwBeAZIByAAzAI7/tP7H/sb/0wBPAXcA9P4Q/2MAVwB//zcAngF1AeP/Cv8MADEBowCv/7b/z/8Z/zr+jv7G/wkAl//o/6EAMQDL/hT+8P4OADQAMQBNANT/cP9n/6f/ZQD7AOQA2ADTAOz/Ff9c/zwA5P8a/xT/QP/B/wsAVv9g/xEA3P9TAHsBQwH4AIIBswB6/2r/2f9rAGwAH/+t/pn/jf8I/2j/EwDOACUBawD2/3UA9gD0AGUARgDeALMA4f/i/4YARwF6AaUADgCmACEB2P9V/rz+c//b/87/lP6x/gEAYv+K/l3/TwDaANEAygBoAQEBk/9G//f/LgANAPX/fv/o/lL/LwAfAM7/NAB/ANT/Mv/l/+EA6wCW/9H/6AFlAdj/zf/r/xoAMACf/6v/BACJ/3r+yf2V/sr/6P8y/2z/KwDX/47+yP4TARYCpAG1AeYBQAHt/xX/i//z/zwA2P/t/QH+jv8c/2n+bv/ZAFQBDQE/AKf/qwA+AQABXwGWAWIBbQBe/4j+u/7//3kArP8b/zH/If9n/+r/2v///l3/EwDR/4QAagAFAAoBMQHo//L+sv5B/tf+xADnACcAjwC0//n+f/+5/wgAbAD9AOwAzf8v/3L/EwAfAV0BfwDCAKAAsf+c/3r/9v+KAPP/rv8FAB0A2v/t/3AAbgAlAHcALAHVAJj/Rv9T/wj/2v/h/xf/jACeAY8A+P/A/67/3/+d/+f/NADC//r/igDk/y7/yf90ADsA6//H/77/of/H/8L/iv86AIEA1v8E/9z+7/9XAW4BWwCT/zD/Cf85/+H/3wALAfX/Cf8r/9z/iwC2ACIADgA9AEn/p/5o/xUAJgD2/8z/pv9q/3r/7f8bAFUA7wCiAK3/n/8dAE0AlwB6AMv/GwA7AUABv/+U/kn/RQBZ/3D+mv9TAIf/PP9u/xAAJgGnATQBGwEsAVoAHf+B/jH/VwCUAO3/Xf/5/ykBCQGj/zf/n/+v/3AAugBnAHUAPQCg/4r/x/9R/8H+S/9V/7X+Tv/U/9L/0/9s/z3/o//G/xoAywCTANP/+/9wAHAAhAAXAdoBCwI6ASsABABqAFUAt/9A/y7/Yf99/0P/D/+1/gj/VwBcACX/yv47/2kAgwGaAToBzACSAEQAWACPABUA7P8FAGL/Jf9k/+P+9P63/y8AmQAlAGX/u/8rAAoAAgC2/5z/2/9K/xT/zf+DAGIAn/+H/0QAFQBY/+P/NwFfAeL/Cv9e/33/D//l/uv/wgB4AGgAdQB8/xP/uP8KABwA9P9XADoBkQBb/4f/0f95/3T/xf/4/wIAeP8X/9X/cgBaAEcAAwEkAl4C6AE5AZwAWQCYADoBDAGuADkA7P70/cb9NP4O/1r/LP9X/5f+oP13/i3/df8RAOYAqQHQATUB2P98/4P/3v+nAHoAtwDLAFQAHgAg/wH/uAD5ADcARQDO/yT/o//O/xr/Dv8L/2f/bQAFAKP+tP6S/6n/Sf/O/xABKwFcAMgAdwEYAbUA7v98/2wAHQFxASkB4P/F/8//zv7U/h4AJAH7AGEA2f+q/1b/Ff7d/UH/eADEAFIAvP8p/1z+dP4c/3T/sgD8ARECtQEgABT/UP8x/6f/lwC9AHcA6v+C//P/y/9K/xUAqwBHABQAbgB/ALD/I/9S/6//y/+D/zL/s/+GAEUA+P+JAF8BgwGtAKD/Pv+Q/77/qf8sABgA+/5k/87/Ef9o/1kA/gAHAfn/Av/1/kn/lf8NALsAygBNAAwAHgA9AGwARACaAFoBMwGcAB4Ax//u/9z/xv+WAPoAdgCkANYALgAs/3b+Uv/fANIA5P/P/jf+5f5S/0T/7v/WAKEA+f+K/7L/UADCAMkA6wDqAI4ApABjANP/4v+E/3b+yP6bAGcBJwCf/uv9L/4Q/5n/9v+BALwArAA7AL3/eP+h/0kAlQAQAI//6/8sAFj/9/6K/20A4QBPAIwA5QD7/0j/Pf9t/5T/qP/z/40AcgA1AIYAdgBVAFwAlwAmAPH+Av/Z//7/DQD//8H/i/8v/xL/a//u/5EALwFJARcB5QCFAHv/oP73/rn/yv/r/+IAZgHRABYArP8v//r+kf99AHEBIwGo/7b/TgCS/1H/BQCaAJsAKAAuAJMAEgD6/gz/FQCwAFEA3/+YAB4BbQD5/18A7wAfAbYAPgDQ/1n/E/8H/wr/cf64/cb9RP4a/2v/9/5l/4AA6gABAWMBdAHwAEQA/f+BACEB7gB+AG8ATAAoALL/AP/J/v/+sP93ADEAhP/n/14A2v+X/67/5/+LACgAJ//E/0YAt//K/7H/bf/e/4sAmQBzAEYA+f8WACwAyv9L/1v/Zf9c//z/YQAhANz/qf/M/3wADwHfAN8AewE5ARwA0v8dAAIAwP+U/57/tf95/9T+Zf76/pz/1f+OAMAAtf9+/2MA9QDfAIAA1ACyARcBjv/d/50A6P8W/8r+1P4//2r/B/9E/w8AJwAXAHAAlwDdAC4B3ABGAPn/GgBWADgAZACyAE8Aj//l/o7+Ov95AMwAEQCm/9T/rP8w/0H/GAAgAVIByQB9ABAAB/+h/kr/tf+d/5//of/J/wcA5f/3/84AiwEbAQsA+P/YAAcBRAAOAGYAYgDU/y7/If+Y/5D/OP9v/ysAOQBF/5H+BP/h/0kAXgBQACUAUgBHAO//CgB+AKEATQAAAGr/kv63/sb/bv+d/hH/rf8YABUAPP9//80A4AB+AP0AUwFoAXQBigB9/3X/3P/t/83/dP+G/wUAof/T/tr+Wv8cAMcAmgA0AHkAxQBnALv/4v+nAPgAwwCSAM8AIgGkAOj/4P9eAMoAEwDW/ir/0v8RABsA+/7u/hYA1v9f/43/+v8FAF7/2f+VANP/V/8BAJcAmwBKAPf/z/+j/+v/wwBRATAB8AC1AAQAk//Q/6X/yf/Q/3T/pv9F//X+vP/X/y7/AQB6ALL/z//+/7f/rv/m/5sAwQCq/8//kgDu//n+BP8EAFoAw/9UAJsBkAHfANL/1P4S/08A1gDT/8v/rgAhAM3+ov6z//4AXAElAAT/0P+iADkAsv+L/wgAPwCq/x//l/+LAJcA+f+q/6D/Vv9W//3/qgA6ANv/9v9k//X+gP74/vcAHAJBAfT/Mf/y/jL/jf/R/zoAHwE9AU4Asf/f/8f/JP/w/+sAoQBeAOn/n/8yAGkAQgCIACgAf/+j/5H/iv8SAB0AEwA5ANz/xv8oADAALgCFAN0ANgFGAVoAf/8w/7j+G/+P/13/TgACARQAh/99/6f/XwDpABcBxAAiAOP/7f9r/9P+pv8dAU4B0QATAB//3f4P//f+Av+l/+L/e/8c//X+x/8vAWIBtgB2ADIApf+J/9T/WgCvABMAjf/l/7n/CP8h/8L/kgCyAFv/hf46/7T/VP9u/2sAKwEaAXwAq/80/8f/kgBAAAEA6wCnAbMA7/4e/r3+aADXAbMBUADx/o7+EP+8/tv9s/4kAC8A6P+C/zH/NgDcATkC3AH7AfMBtAC4/sX9m/4aAHgA4P8mAMUAfQCF/6v+zf7L/zoBugHzABMAvf98/zj/Jf86/+f/FgE8Af//dv+l/2v/9/7x/rn/nABvAB4ANQBU/0z+Bv9QAHQARwDiAIABNAG+/3v+MP/OAEwB5wB6APT/M/+5/tr+XP/H/ywAmwA6ABz/jP7g/hUACgFBAV8ByADO/1T/eP+g/4z/5P9zAOn/+P4v/0T/Jf+//30A/gCQAKv/ff/h/0UAdABjAI8AkgDQ/0P//P4u/5j/bv+1/5IAaABD/9H+Wv8CABcA9/+kADkBWwD7/vP+xf84ALAAawFLAXgAvf8f/yP/sf9NAOMAxAD9/8L/qP8K/5j+9P4cAOcAWQCJ//L/bADV/0z/MQCFAZoBvwA8AEEAOwA3AIwAtAAHAV4BwACb/4j+If7j/l7/E/95/7j/TP+A/57/Tv9//wcATwD9AGEBoAAMAHH/Tf+T/0L/lf/r//f/hABWAMX/WACbAPL/fv+G/8X/fAATAWoAP/+T/iP/HQAKAIf/hP94/+j+RP5P/nD/ZwC+AJABDQIuAUoAof8z/wgAPgHGAZ0B6wAYAE3/wf7E/rX/6gBDAasAGADj/23/c/4q/oX/IgFNAXEAwf80/5/+BP+l/4f/HgBsAW0BKwDt/uT+s//X/9X/gwDmAF4An/9k/9f/SgB+ABsBdwGvAJ//af+e/5D/qf8nAIAAXAD2/8v/tP+T/4n////JAAsBaAC4/1z/D//m/jH/yv9mAHMA5/8UAGAA0f/7/+YADAGfABIAn/+P/4P/VP+B/zkAZwDO/8P/YgBkAPH/0/9aABIBLQGvAEgACgDs/67/V/99/xIALAAjACwAx/84/wn/hf9uAL0AiQBKANP/Yv8m/3f/EwCQAFIABgAdAPH/l/93/4f/2v9mAI4AjwCEACoA5f+n/xr/U/+tAIgB2AC9/0n/OP8q//r+5P7G/wgBSwGgAPz/cf8d/0L/jv/b/04AWQD3/8f/wf+e/67/2f/5/8QASAFyAJT/Tv9A/0T/j/90AI4B1QEqAW0AAwDe//n/gQDSAFcAZQCZAJ7/h/6N/k7/uv92/3L/IgCUAEUAAwB7AE0BcwHqAFkAxf8l//r+4f7g/uX/VQGLAagAj/+g/kT+pf5W/14A9QDHAPkAqwBA/+z+4P+LAKcAcgC+AEYBRwCt/sP+mP/f/7b/kv81AL0A/f8u/8P/6ABCAcsAfwBPANb/Zv91/7z/UP/E/hz/if9u/+P+ff4A/9X/PQC/AEYBGwFCAHT/bf8nAMIAFgFFAREBigAjAKj/Kv9D/8X/PQBFALD/pv9fAFQAef+m/2QAUQDd/0T//P6n//j/pf8kAF0Ar/+f/0YAawAxAEwAmQD5ANAAAwCI/7L/v/+9/2MAAwGFAKz/e/+o/+7/eQDRANkA6QCNANT/ov/E/6P/nP/G/63/eP9i/wD/O/5Q/mf/cAAeAf4AAwDp/5MAawDl/xYA9wC2AQ0Bmf/h/9MA/v+3/nj+1v5a/23/IP/A/88AigBU/xb/7P+uALsAoADlAAYBhwDV/4z/BwB1APv/Q/8K/y3/3v+kADMA+f4C//X/3v8d/1T/dQCFAXEBWgD3/y8Ap/9M/+j/jADWAMEACQCr/xUAKgDK/77/GgBjABkAj/+1/4MAtQAEAFT/gv88AEEAl/+0/3YApAAoAML/h/+L/7b/tf+4//T/KwDh/wf/sv4d/5z/uv/R/0AAxgDLAHX/W/5R//gAmwB8/yUALgH4ALz/bP6y/vj/GQDe/8AAmwGNAdkAn/8J/5T/GQAzAIMAvQCmAB0A/v4f/qr+NABHARoBNwCx/6D/Gf+I/jj/kwDUAG0AjACoAIAA6gCPAdEBkgECAYIAwf/6/j7/kf9+/9D/dv+4/qz+fP6H/kf/qf/e/xoAlv9B/7X/yP+f/yQAswCzADIAbf8y//r/8gBiAZcB2QGCAYkAuP+q/0IADwHCAOD/8//U/6z+uf3Q/Q//yQAVAS0AQwBWAGf/X/+BAJAB+AFBAU4AQv90/Z384/2I/0sAhACeAJ4AUgCu/2f/LQCPAbACfAJ+AC3/fv90//n+Rv/5/6oAtgCY/3j+l/4//7T/HQAoAD0AewDy/0//nP8WAFkAYwDe/6v/9f/m/xUArwApAA//jv5x/ur+m/9dAFoBWwEZADr//f7//qX/6wAEAvQB0wBq/3L+BP6V/tb/UgBnALIAowAaAH//pv/NAKsBuQFWAV0Ahf9l/7//tQCfAUgBEwCN/mj9vP2c/kf/ygBlAkoC8wDJ/0//Zv96/+z/cAFzApEBHwDM/tj9RP50/0EA2wApAe0AJgDV/s39Q/7z/ycBaAE7AUMAS//s/ob+3P7m/08AdQBkAFz/gf7X/mn/5f+JALYATADb/0P/4f6i/8UAJwHhACoAi/+H/5T/O/9x/40ApgFqAXX/zP1D/rj/igCUANAAOQG7ANT/GP+6/oP/1QA+AfkAYACY/4D/pf87/2///QDOAWIAef64/QH+zf5L/xoAbQFKAbb/6v7R/hr/OgBaAYYBLgHTAMoAkgCL/8n+lv9jAfwBgwD9/sb+W/+m/zT/Of8YAMEAMgBr/2P/FwDHAJkA///r/2IA2AAvAdgAFgC3/5T/Zv/I/6oA9wAJAPb+kv5o/rf+KABwAVEBhQDZ/5P/v/9K/+3+bwBCAiICzwB2/3n+//0f/jH/ywCLARABQwCO/33+v/0k/ov/+wB5AQoBUgCf/+7+q/4W/7v/iADzAHIA/P9JAA0AfP/L/wUAs/9V//L+Uf96ACMBVgFkAbQAlv+I/vf9UP5U/zkAhQBuAIoAaABo/4D+7P4jAMgAigCRANoAagB5/wT/Vf8FAI0AGAF6ATYBagC//5X/p/++//T/6v+t/+D//P9m/8/+BP8IANwArgAxAH0A0wBEAJf/EAAJAR4BhwA8ADwAMwAMACUADACA/0b/W/8U/6P+eP4C/+D/+v+S/8b/dQAyAcgBzwFQAXcAN/9u/uz+sP/Z//3/YAAsAPD+//00/mb/DgEiAuoBbgG4AGb///7A/1sA+wCfAeYALv8V/kb+Vv8QAPz/HwBbAL3/0f6E/lX/zQAKAnsCyQH3/1j+AP5h/jL/YwApARkBkQC///z+Cf/X//AA5wHeAVkAO/98/5j/Df8q/ykA5QB2ABf/Yf4x/y8AlgDgAO0AJwAH/5v+zf5U/8wAJAJtAZj/7P5P/8T/FACFAFIB8wFiAef/x/5f/lL+8P7r/2MAwwD8ACMA8P64/pz/rAAGAagAlQDRAKgAKAC6/6j/9P8MALf/aP9j/8j/lwDrAF4A2//1/2QAjABfAIoA9gChALv/T/9p/87/9f+r/+b/ngCKABkArf8o/zX/nv8NAJ0AfgDL/0f//v40/+T/UgDEAO0ADQAr/wf/Gf9j/5L/jf/g/zwARwAfACAAUQCsAI4AZQC1APkAsQAaAMz/EgCWAI4ACwCQ/2r/Wv8t/3b/eAA9Af4Al//6/ZL9bP6g/7cAUAGOAYkBsQBM/4T+uP63/90AQQH7AMkAHADU/kn+t/4W/z3/tP+FAEUBMwFJANX/GQBTALEANAHuAGAAowDwAIgA7v9Y/+P+2v6a/jb+9f4hAIQAogBcAIX/Cv8Q/3z/hwBvAaQBnQECAeb/W/+b//T/KgADALb/ff85/zD/wP9QAFcAPAAAAMv/KwCpABYBXgGxAMv/t/+H/y3/Rv91/7v/y/9b/zv/Vf8E/03/kgBmAdkAFgAyAE4A5v/D/1EA6QC/AB0ACwAYAHv/9f72/vH+4v5R/xwAUAAAAMn/4v8tAFoAiwDSALcAaQA/AOX/hf+a/7j/rf+f/6v/DQBOALb/KP+Q/xsAWgB8AAwAg/+Z/+3/EgAgAAUAHwBjAPD/if9CAOYAyQDdALgA9/98/6r/+v/j/63/2f88AFUAHADj/+//CwAMABgAMQAqACQAKgAmACIAWgCQACsAqf+D/1z/HP8X/z7/ZP90/1r/V/9Z/wP/8f7c/+UATQFyATUBiQAWAPX//f9BAIYAugDtAE0AFP/z/l//Bf+6/hj/4/+wALsAEwAUAI8AWQDt//f/WgC6AJkAEQDT/9b/h/81/2T/9P9fAJkApgBbAK//BP/u/mj/1f9ZACEBlgEXAcz/2/4a/woA5gBjATcBPgD//kP+Qv7w/gwA/ABnATMBRABb/zf/jv/T/yUAuQAqAckAc/9Y/mb+G/+b/9v/NACmAHsAtv81/2f/4f9dAIMAWQAyABoASgB0ANv/Sf9v/8b/z//H/8//kv+W/5b/1v9LAJoAZQA0ABoA1/8hAL4AqABDAFwAXAAQAKb/XP/L/5kAoAAOAKz/gv9B/+z+v/4r/yMA7wAOAZgAAgDq/yUANQA8AGEAbQBeACgAxf+g/9f/DwAnAEUAXwBTAAEAnf+i/wMAFgDt/woAFAD9/y4ANQDt/+b/FAAqAA4Aqv+Y/wUAEQCJ/yL/H/8u/0//yP+mACsBtADc/3j/af+O/zUAdQEzAnwB8//c/or+rf77/uP/YgEZAlwByf9a/tb9af6A/7oAfQEKAff/Dv95/mb+UP8XAZgCwwJ5AQsAVf8X/yv/1f+yAAEBjQB5/2T+IP63/qT/cwDHAM8AywB3ANH/c/+s/ysAYwAxAM7/r//U/7v/c/9V/5j///8AALP/s//2/wYA9v/S/7b/yf8CADAANQArAE4AgQA7ALP/jv/T/zwAfgB0ADsACADA/4L/iv+5/ygAvgDDADUA8P/q/7n/kf/Q/00AcQAOAKj/lf+u/9b/IABuAJMAcwAzAPD/lP98/87/KQBQAEoAMwAVAPn/s/+H/9H/WwCKADMAvP+d/6//oP99/6T/QQCfAGkAQAA/ACIA/v/0/wAAAQDm/+L/7f+z/3L/rP8rADcA2P+c/7v/BQAaACUAXACAAFYA5f9g/1n/2f9PAIgAkABpABUAmf9P/5f/KwB6AGIAGQD2/+P/jf9I/4f/HgB5AFAABADh/8r/jv9w/+//uwDzAFwAp/+F/7P/yv/g/zYAigBiANX/gP+V/8z/8/8hAFIAWgARAJb/XP9///D/dwC1AI8ATgAMAMH/j/+9/ysAjwBnANf/cf9r/6H/2/8CAD8AcwCPAFQA+//S/4//s/+4/6b/sf8OAFQAVwAxAB4ALwAVAMr/l//R/yoAVgA6APv/yv+o/2f/O/+Q/18A8wDTAFQA8P++/6n/t//l/0YAuAC/AEIApP85/wj/Cv9U/+D/WACJAHoARAAEAOr/AQA+AIgAngBnAAQAs/+M/4v/lP+5//X/JwAuAAcA8f8RAC8AFgDk/8T/yv/m/+f/4v8PAEQATwA3AAoAAAA2AHUAnwCuAJcAOwCq/xv/5P75/jn/lf/z/zcARAACAJ7/g//B/y4AnQDjAOoArAA7ALj/df+I/9r/RAB1ADwArv8x//7+EP9Y/8T/AgAEAAIA///1//3/QgDIADsBHAGHABIA9P/b/6L/kv/Z/xwA+/9y/+n+0f4z/6T/6v8tAHcAlgBjAAsA4f/s/wQAJQA9AB8A6f/p/wEA0/+N/6b//v8UAOz//P9KAG8AWQBEAEQAMAAKAOv/5f/u//T/8f/y/+L/sv9//0r/OP9X/6L/EQCYAOEAuwBFANH/mP+C/6j/GQC+AEEBZQEBATYAfv85/23/t//5/18AugCrACYAov+Q/8D/2P/C/9v/JwA9AAUAyv/C/9j/zP+Q/1j/Yv+x//b/6/+e/17/Zf+k/8v/yv/f/xYAOAAzABIA/P/2//T/7//Q/6n/uP/2/yoANQAlABoAGwAkABgA/f/2/w0AJgAiAAYA4f/U/9D/1/8AAE8AhQBTAOH/m/+s/+T/BQD9/9z/wf+z/7T/yv/i/xsAQwBDACQA+v/v/wMAGwAlADIAOwAxAPf/uf+v/+X/CAD4/9//2v/F/5j/bv+A/9f/MgA0ABcA/f/6//j/8f/j/+j/HwBCADMAKgAEAOL/8v/E/8v/yf/k/+n/3//c/+z//f/2//7/+f/+/xAAOABFADsAMQAjAAAA1//L/+7/EwAkACIADQDx/+r/7//v/+r/9f/7//n/7v/u//L/6//g/+T/+/8FAAIAAQACAAAA/v/3/+3/5v/d/+T/AAAdACwAFgD4/+D/1//b/+r/AAAOABoADAD3/+3/7//v/+n/5//u//r/CgAJAPz/AQAOABgAFAAEAAAA+P/n/9r/4f/v//L/AgAVABsAFAAGAAEABAANABEADAD9//H/7P/u//H/9//9//v/BwAOAAoABAD+/wYADgAFAA0AEgAVABEADwAOAAsABAD8//7/+/8JABEAFQAGAPD/6P/u//z/CQALABMAGgAcAAIA9v/+//z/AAD4//j/CgANAP3/8//w//j/9////wcAEAAQAAYABAD7/wEABgD///v///8FAAEA/v////3/AAD+//z/AQD//wMABQAHAAUABwD///P/8v/5/wMAEgAWAA8AAAD3//L/7P/2/wsAIgAaAAIA5//T/9L/2//w/wwAKQAtABkA+v/i/97/7f8DABcAJAAaAP7/7f/r//X/BwAgAC4AHgD//+X/3f/o////EAAYABAAAQD0/+f/8P8JABUAFAAHAPT/7f/p//j/DwAWAA0A/v/v/+f/7//8/wsAHQAdABMA/f/q/+T/6v/4//7/BwAEAPv/+v/7/wcAEAAbABsADwD7/+z/6f/j/+X/9f8HABcADQALAAIA/P/9/wUAFAAVABAAAQD4/+//7//2/wEAAwAKAAgAAgD8/wAABgAOAAYABQAEAAAA8P/4//n//P8BAAkABADs/+7/6//m/wkADwD8/xIA9P/4/+f/7//2/wYAAAD4//H/5P/l/+z//P8AAAcA/P/4//j/8//6/wUACAAPAAcA///0/+n/8v/+/wIABAAIAAUA+P/x//L/9//4//v/AQAHAAUA+P/t/+j/8v/0//j///8AAAUACgAGAAEAAwD8//P/+f/7//r/7f/3/wYA//////b/8v/s/+3/8f/y////+//7//j/+f/2//f/+v/3//3/AgAFAAAA///7//v///8NABAACgAIAAUA/f/x//H/+f/7//H//f/9//3/9v/w//T/9v/1/wQABAADAAgA/v/4//n/7//5////AAD///7/+//z/+7/6f/1//n//v/7//3/9v/4/wAA/v/+////AgD6//T/+v/1//X/+P/3//L/7f/y//b/8v/3//z/+P/4//L/8f/8/wIAAAD8/wAA9//1//j/8//3//T/8v/y//T/+v/x//T/+v/+/wQA/v8AAPv//f/4//r/+f/6//7/AQD7//r/+v/4//b/9//9//3/AAD///7/+v/3//f/9v/5//j//f/5//3//f/8//7/8//1//X/9v/9/wQAAgABAP7/9P/6//3//P/5//n/+//1//b/8v/2//r//v///wIABQD5//n/AQD6//f/9//2//j/9//4/////P/8/wMAAwD6//v/+v/3//v/9P/0//b/8v/z//H/9f/0//j//P/9/wQABAAHAAMA+v/y/+//9f/5//r/+v/8//r/9f/2//X/+/8AAAIAAQACAAcA///+//X/9v/4//r//f/+//7/AgAAAPz/9v/y/+7/+f/0//L/8/////z////1//X/9f/4/wAA+/8CAPb/8//+//n/9v8HAPP/+v/5//n/+f/8//r/+v/1//T/+v/3//j/+P8DAPz/+f/9//7/AAD///j//f/7//j/9//x//b/+//5//r/+v/4//T//P///wIAAgAFAAUAAQD///n/9P/z//v/+/8CAAAA+f///wEA+//6/////f/3//n/AAADAPb/9v/+//7/AwD8//z/9v/z//P/7v/1/+//9P/1//b/8//w//T/7f/u//P/9v/1//r//P/6/wAABgACAP3/AAAHAAUAAwD+/wIAAwD2//T/8P/x//P/8P/2//n/9v8DAAEA/P8CAP3/+v8AAPr/AQAEAAQA/f/2//b/8v/y/+z/9P/0//r/9v/5//f///8HAAQAAwAFAAIA///7/wMAAAAAAAIA/v/7//P/+P/9//n//P8BAP//AQD5//n/BAAKAAcAAgAHAP3//P////j/+P/3//T/8v/1//r/8v/0//f/+/8EAP7/AwAAAAIAAQABAP////8BAAMA+f/9//v/+f/4//b//f/8////AwADAAMAAwAEAAEAAAD//wAA+/8BAAQAAwADAPz/AAD8//r/+v/+//7/AAAAAPn//P/+//////8AAAMA//////r//f/6//v/+v/9/////P/+/wMAAQD7//z//P/8//j/+v/9//7//f8CAAcA///9//r/+P/5//n/+/////r/AQAAAAAAAQAFAAYAAgAEAAIABgAEAAMA/f/7//3//v/2//T/AQAHAAMAAwAJAAQA+//6/////v////3////0//P/9f/2//j//f8DAA4ADQAJAAMA/f/4//7/9P/v//L/+//3/wMA/v8AAAQAAgD+/wEABQD4//n/AgD+//n/AQDw//j/8P/1/wEACAAIAAgAAAD0//P/7f/z/wMAEAAKAPj/5f/d/+H/9/8dAEEASwA7AA4A2P+//77/1//6/x4AMwArAA8A7P/l//T/BgAaACIAEwD4/+L/1v/g//T/FAAdABoACgDr/9z/4P8CAC0ARgBHAC8ABADT/77/xf/o//z/8//k/8//wf+7/6//xv/y/xoAOABKAFgAZABSACkA7v+9/77/6f8nAFwAcgBTAPv/bf/+/gf/Z/8LALAADwEBAZIAAwCR/1//pP9CALwAzgB0AOL/Yf8O/wP/R//W/3IArABUANz/hf9k/1//k//6/2AAxADzANUAlwBzAD0A6/+I/0P/N/87/0r/bP+j/8r/vv/L/w8AQAB7AOcAQQEfAXMAov8X//7+KP+S/1sAMgFeAXkAFf8n/vn9R/7u/uL/AgHdAe0BRgGWAE0AOgAqAAYAGgBKABMAfv/3/tn+KP+i/8r/0P8xANEAIAHdAFkAJgD2/2L/5/4y/0QAQwGEASEBNwBF/8b+Tf4S/tD+JwAmARYBMAC1/47/A//W/qP/AAEPAmsCFwIMAaz/5P6k/h3+EP4x/6QANgHSABkAWf/m/s/+0/5H/04AhwFDArUBeQDG/4L/M/9M/4z/mv9oAEkBcwC2/mT+Qf90/zj/7/8JAVQBRAEwAdMATABbANQANwAq/9L+3//rAHP/SP3j/bj/XP/T/cj+NgJJBOwCMwB+/8L/+v5Y/8oBSwPMArkAsv3c+037Wvt4/Ib+8AHBBdgG9AKb/k3/mQGBANX9Pf5CAQ8CoP8Q/Rf8wPzh/eT9dv5VAfEElgYIBRwBP/1A+0P6avpj/K0A9wXTCLcFef/E+4/70/31/zgCCwRdBIEBl/w5+db4Ff0tArgDywPgAwQCJP6D+6D8awBMA+MCdABi/fj6xvrs/YMDuQfIBwEEg/+1/Dr8b/45ASwDdQJp/h76aPdF+O39JgWYCFcHKAVzAsH97/kT+00A/QV+CCcFhP5v+u764vuX+3b84/5wAtQCFf58+k/9egKpBMME0AReBmQGHwNh/nv6nfnu+uT7lPw+/vT/WAF/Aer/x/2b/GT97gBUBegF7AI4AeMAvv7/+4D8PwGQBvMGsAGb+9T5MvwQ/lv+MQCUAqQCEQBv/NT6hfzn/0ACWgNcA5QCGwJtASMAff58/sMB8wLE/Rn54flJ/YH+K/2J/uUDOgeoBLwAiAAzAhMCzf8X/TP/owOXAs79yfoE+8/7q/uw/vcEYwiuBg4B3/sX+/78L/+VASMEHAbkA979Ovtp/Br+1QBsBHkG4gLN+xv5lvoa/G3/LgSwBUkDx/+q/Gf6nPq7/8YFkQaaA3ABPQCa/Kj48fq2AVEFtQPXAFj/Lf9B/yj+pv3J/7UCoAMdAa78cftT/34CwgGhAJMBMQIPABL+gf1v/WH+sf/7AOwBDwHI/7b/bP/w/vX8v/vR/jgCigPlAnYBQQGiAT8BEAEZAHv+VP8QAP/9J/wO/uv/QP68/VYAvQJdAtYAvgBhAAD/TQCxAm8Bk/+R/8gAagG+/i7+1AHIAeP9R/qC+5oBzQNEAHf95v4BAmsC3QB+ArYFYwQT/3P80v5Q/0X8Yvs9/o8B5gGjAHL/7v4hAEYARP4w/r8BfQWNA9r9b/wf/mn+rv3Z/Bf9v/8oA5IGTQltBxUC/fwW+WX3kvcS+94BjgamA7L+ZP3p/QgAZAG8ASAE1QbaAkn7WPmA/qsCpP9r/CAAeQV6A638Z/mk/XQDIgI8/b38JgEwA1b/hP37At4IWQbQ/rb6mftt/q7/0v67/38CvQHY/Ab5WPmZ/ogE5ATjAZIC0wb4BSf/k/y3AIcE+AJ3/QD5C/pHAbcFSgLl/bH8Av54++j0GvZIAdYJdwmEBqYFuga7BQABDP0w/Sr+9/v49+/36PvU/okALgPLBTUFRAJU/2b+9v3r++b7q/8nA74AJ/tv+hL/fAT2BlgFsAMYBYUEff+h+Zz21vaH+Wv9qwBHA3MFoAbsBJgA4PyT/Hj9xv1L/p3/EgJtA9oBPv96/4wBaANbApb/Ff9S/yX+cfts+n7+YgMiBGMCDQEiAnsD/AG0/tP9X/4M/Jz5mPtn/wwCIQVYB9cFVQGh/UX8g/xj/tUBUAOHAA3/OAAXAbQA3f/JAd0BdP6K/S/9bvtu/JD/RQHzAZUBfwC9//n+KAGMBAYFrwKCAET/pPqw9Wf4IgC2A9kB8QClAhEDMQLzAGz/ff/BAKkBWf9D+1b8GQBB/0f+IgFiBE0EwAHcARIDeQEi/pD73Poy/E39zPwh/U3/YwLhAQcAqAK3BuUHOQSl/8T9H/up9ln0J/Y2+/0ChwufD8cLFgXI/tD4KvaO96T9vAQ1B54EBQD/+wn6gPsz/pICxQYJCeMIsgGV+WX4W/mj+OX6iQK8CKQH5AKL/yL9BPui+7L/bQLPAQcCLgIsAND/1v+m/bD8yv/zBGkG2AL0/3v+dftv+Nv21feK/esFWAwzDHEGwgFQ/2b7HvVw8jf5nwT4C1cN1gqBBQj+VPcY9Zv1T/gc/40HEgo3BWcAdP1D+Xv27PkGADAGNguSDBUK1wMp+xX1HvPr9IL6P/78/58DnAXHAz8AvP1M/isAuQH8AjUEpAOqArQBUv/9/pj+P/uH+dD5If3+AjoEfQJoAUP/dft69or0ZvpeBe4NUhAPDtwJeQRp/tb4ivSZ9Lr6pgSxCb4FGAEV/eX5w/ff9Or1HPszAfMG4giXBc0EFwgDCLICdv5c/1AB0AC9/gH7FPaS85LzwPTM+agETw9dE6MPjwd3/6H4Fvai+DL+TwQ1CHoJXwfuAA/8wP37AA4D8wItAr4Bg/2z9w/0c/Pg9tL96wT/CIUIxQV1AwYB9f1z/YwADgTeBfsC9fqj8Q/tNu6n8aL1c/obAcEIUAxkCp8FMgFGAIv/M/9DAZED7gWlBjYEwADQ/N359voa/3sBEwDd/7QDVwhGCcwG7wXQCPoIWgM5/r78RP7wABkDJQLQ/nP91fvl9jXzGvZr+yL9IP2k/Pn6DPm79KfxZ/Iv9M72lfteAcMEAwU/A50AGf5z/PP8dwLSCjoSyBW+FcMS3wmX//r6CP2rBMkOuhY+F5cPaQaF/DPzKe9G8d/4nQF4CBwNnQxJB/UAovyU+kj4n/eP+IL47/eT9gL0GvFF71/tZutB67XrmOxa7abtZ+5E7WbuyPi2B04W7B6IHewc/hs2EjsIiwUYCeoM2wzcDcQOiAt9BucA6v1S/Cf8zf2w/P/86/7kACQELwYvCc0NHw8oC+cEp/669hfvfer05wnmV+eS7o72mPrm/GH8N/lO9oPxxuxZ6nbpDeki6dHp7+/DAUoWbyQOLlgz/zIWLToh6BT5CygHSgX2/5H4jPIg7vbri+p87j742AGvCRkOrBB0EfwM2AjLB8MEFAPAALz6SvYr9Mv0a/S98NDtyOwC7eDsr+wZ8hH8kgP9BdoDhP2P833pZuHg24HbuOLN89ALfyBwLYs2LzkINAcroSBWF0ARtA2LB7T9GPJ76m/l4d+c4qHr5PXfA9oPoRLkD8oLSgaF/Sz1HvW++ur/vQL3ATb/dv2W+XbzV/IO9d/4CPwq+i72N/ME8dburOsI7PXw5/O59LP10PQP9G/5uga3FKgeVCjkLjUtoSXtG5wUnQ6RCBsF2QDA+Rfzj+4M62/noeeo7W/2if9MCAMO7g1WC14H4P/N9ufxm/O/+L39sgEEBjQJvAfiAjf/d/te+Oj45Pk2+Zr3e/Tv8RTwVe4y74jwf/IH9mn3svkuAYsLiBbaHxUmRStfLwIt6CPmGUMQvQUU+0rwBehW5V3mpOkL7VTwyPbW/R0A6gGVBCAFJgVcAn/96/sb/WT9C/7q/6UBSAOsA80C/f/H/db8uf24/kf84vm19nzwT+vM6VjqkO5z82H20vcg+YYAPwowEMEV8xy8I7IkxB7MF5IRkQs9Bk8Cgv99/A/4FPGQ6SHlw+Qx6aPxh/yNB50OLg86C6AGCwHj+tf48vnD+h79NwFkA+kDhwS2Bb0GrgTQAokDSwQAAmr8VfbN8SPt2ekZ6urq++vW7XnyoPtjB88RBhh4GcUZ4xnvF9kVnxZiGcUXJRObDVoEtPeo6dXeINx438jlqPGZAN8LXhE9EVsLXgMG/o/7JPuP/Dz/PgLWBHoF7QOWAlsCzQLLApoD4wTgAwcBrvpH8mzstOdb483in+Yb65vtZ/GO+2YH4w5LFA4ZyxogGTwV8hK7FIgWlBcpFxESrAiz/pzzwObU3fHcgOT872v7XAbcDlwSCRAOCjMDTP6e+8f6Uvtz+wX7A/uH+237M/s5/DQA/wQnB2QHaAW+AKH6J/Tm7zjuM+3L7R7vFe6Q7VryX/weBj4L4A6ZEtMUeRQkFWQaYSD9IVEfCBlmDxAE2PdD7lHpxeZ559nrK/Ep+WoAPAOcBcsGMAQW/xz7a/pu/FH+nf+xAo8F0wQ1AhL/2fvi++MAHQfUCCYF7P+++TjxcOme5brlO+dl6HHqQ/CF+CQAnwapDNERBBdIHTQjiicMKlQqdCXvG08QUANC9jzsjeU04cPgeOR163Xz4voTAT0FVwclB5kE4QF9/6f81Po2+1f8KP6iAGoCrgMwBEYD3QJgA3gCv/9O+xD2IvAa6srlPuPs4V7iFOce8LP6lQUVEP4XpBs6G8gZnRsqIKMiUCIsH9AXjg3PAFTyGOdt4jbj/ubh7Cz0gftCARUEngJDACAAWADOAKIBYgE3ADUAZwD9/5gAmgL1A3AFqAcsCGoG4AP2/1H5L/Jd65LlUONa5D/mFeq78CT5UQLgCfgPoBb+G4IeAR+IHh8g4iAWHKUUNg0wBNb6N/ND7LXnyOa65+jq1vCY9oz7NgBRA9gExQT0A6oD7wP0AoT/K/xh/Jn+lP9jAJ0Ac//B/jH+7fyN/Kn7Q/gI9K7vP+xn67XsZe657o3wyPgMBL4LpxHUFxAa7hh/GOkX5BcTGk4b/Ri9FMgNQgNN+MTumOb/4F3gxeXu79v4rf2UAKsDPAYRB/oHCgpoCg4ItwWbAlD/q/2P/Kr7K/uy+RP5s/qb/Hj8NPrp9531+fHP7rbtOu2R7XnvwvJk9wX8eAHZCuETZhfTGLYbTh3mHPMb0hmpF5kVYBCcB579NvOF6i7krOEJ5ZnsUvVr/DAA7wHwBHcHJwizCSQLcAo0CJYFvQLY/4D9/frw+Ej4j/dg9+T4e/qK+hD5IfbC8hTxx/Cw74fuee/O8oj39/12BHgJCw+mFLYXRBrBHkMi6SFxH/QbShWMCy4Bj/ap7Kfl5+J95OvpUfHB93z8BwBzAzMGXgc4CDwJqQi6BW0CMf/v++f5NfiF9nv2mvd7+U/8cf1Z+yH4Z/VT8pTvZe9G8G3wq/D88mX30Px+AqUHcgzLECYU2BZSGjMexCDQIT4hXB5IGGQOQANy+b7w/elZ5nTm6ekf7+fzD/hB/Pn/AAMLBpEIbAmRCI8GjARYAk//UPxZ+mb4g/Yj9yX6Q/xi/IX7I/nj9DbxOfCc8J/w2u/58IH1rfqM/8wEkAmoDPcNwA5sEdAWQhyUH+YgvSCjHfcVpAqv/1/2Se+/6rzp0ex/8t33PPqB+4P8r/4GATgD7gXLB2YGggIg/2T8HPrP98/2c/eu+Nb6Vf1j/V37Ufms9yP20fTN9Gr2jPeA9qz0DfX+93P73/6XA7EIFwzsDQIQsBNJGAAcwB3sHVAcAhcVDsID8PkN8i/tluuh7R3ymvaF+SP74vux/O7+aAGrAncDGwSiA4oBj/4K+7b3O/XV81H0S/fX+xX/5/9V/2n9LvpR95X2Xfcn+C/3+PVZ9zX6G/yg/vsCxQY+CakLyA1TEJoVChsMHZgdEh4jGxQUWQxYBCb8Sfaj8wTz8PPZ9Lb0XfUo9yf4ZPm9/M7/fAAZAVECjwI4Ae7+eft89yX0uPJZ9A74bfsz/Nr6x/jF9hX0WvI79Er4E/vU+1j9mf+RAAUBVQKbA7oDRwTHBvYJWA07Ec4U4xe4GhUbfBd6ES4LwgRk/iH5nvZm9mT1ifJ98FzwV/FT80D2bfka/P3+BQI+BFgGageZBTACHv/3+/n4ofgo+uf6YvpA+W34bPdF9oz2Kvmh+0r8dfy7/bz+Tv7j/ZL+Bf9j/6wB0gWmCvAP4BShGPYbqx2NG28WKRFiDDAG6/4V+cb1CfMG76Dq+ej66k/uC/Kd9vf6Sv5OATAE7gVdBxkImwbRA1kBUf9G/ur+S//K/bL7nfn19uz0h/WA96b4GvkE+in7/vsm/DT8G/1Z/uX+AABgA4MInw0bEuIVCBkgG2sa2BXdDx8LBgejAXL8lPm/99H0lfBS7Qnts+5J8LbyOPdf+yn9m/6MAVIEkQVzBXAEGQO/AaX/iv0l/dT9KP1P++H5/fg9+CT42Pej9mf1dfV+9qf35Pjm+in91/55AD0DwAbeCnAPbBM6FtEY6hr4GRsWmREiDewHgAKU/jP8qvka9lryve8N777vkPFi9bb6Ef+qAXEDKAVpBt0GHgYjBTEFRgV2AxUBlgC4APr+uPzu+qr4d/Yr9VzzEfE58DTxZfLs8jjz7PTL9zb6A/3SAZ8HlwzhECEVrxj+Gl8bDhkfFUARTQwOBpgAS/1q+nn1wO9d7Ojrm+zi7U/x7fbd/EYB/wPkBd4H/QgGCNcFqQQSBJoCAwE0AE4AGgBY/qz7R/m79wr2evNj8OXuGfDz8ZryxvOR9mT5xvvu/uwChwe1DD4RRRSIF7caShsiGYcW8RN4EL0LtQabAYv9Gvp+9dXwhu7J7YnsE+0l8T72N/qW/Y4ACwPGBG8FLQW7BBUF1wWdBbcEpgROBJgCDABA/Zn5HvZF87zvCOwR67nsee4z8GLzm/bw+ID8hQFwBo8LFxFMFdIXiRrqG6EaRxh9Fb0RcQ38CLEERQGj/jb74Pb786fyB/Fs7+jvlfLF9bL4x/qX/AH/kgHoAnEDcQQ+Bl8HHQePBhsGwgQOAqT+T/v09+f0AfLG7jPslev662rsKe7N8YL1LPnQ/fsC3gd4DJcQ8xPYFjQZChlvFhIUKRI7D5cL2geyBI8CJwA9/GH45vXS89Hx/vCB8a3yFfTG9c/3c/o+/YT/4wHBBAcHowhSCs4LNwx5C4MJrwbuAj3+bfnk9WDy5O3X6sbqg+tM7Mzu1/JG9lL5a/0kAjYHcQzyEJYUHRjgGpIazhcSFasSdQ98C/gHDgbvBE8CHP6A+o73BPQD8R7wJPAK8MLwtPLK9B/3B/rz/Nr/GwOyBa8HNgq6DD0NyAunCRsHeQML/2z6efav8l/u3+qV6bnpF+or7Frwy/R7+Kv8wQHsBqwLyw+pE1cXoBnCGP0V7BN4Et4PYAzMCeEIiweSA1z+p/q1957zMfCH7/Xv1e/f71/xHvRs96D6hP3zABIFOQjcCeMLbg7dDtIMeQpXCOgEZgDo+8L3lvPr71PtkeuT6iHrTO0/8Dfzgfap+tH/kQXQCg8PQRMhFxkZQRhtFigVAhTWERoP2wz1ChkI/QJU/QH5RPUn8ZjunO4c70vvSvBz8mn18/gR/LL+awI6B1UKmwtDDScPYw+0DecKfQfqAyQAV/vv9Rjx3O3W6zPqEOnL6TDsXe5N8I7zP/hy/b8CzgeQDJYRfhXIFg8WrxXMFckUxhK8EI8PaQ2gCHcCNP3r+FD05e9y7YjtYu7U7p/vK/JH9hL6q/wy/8YCbwaFCOoJOgz9Ds8PWw4PDIkJOAY9Anr9+/fs8mLv2ezo6g/qsuon7N3t6e/h8kz3R/3FA+gIKg3gEakVwhZAFkoW3xaxFvsUixI0EIwN3Ag9Ahj82Pf/82Tv6uuL69js4e267rvwyvQ7+Sf8Q/6mATsGkAnPCvELXg51EPgPXA2ZCtgHJgQY/3H54vOc7xHtDusJ6cXoMOqk633tDPH69cL7vAGXBjYKQQ5GErYUuhXDFiwYfxgyF2AUZRGTDkwK8QNd/WT4EPRl76DrpOrK6z7tVe4L8IjzMfjQ+wb+7ABxBVEJcQtsDUwQaBI5EtEPUgxxCAYEfP7399zxme1M61/pM+eH5hfobuqv7Pnvy/SB+ikAqAQaCEYMzRCoE9gUZhZ3GJ0Z+hiyFvMTaBFbDeQG4f+H+jL2JvFz7HPqFuth7HbtUu/X8mX3YPtB/mYBnAVQCWQLHA3oD3gSwBLtEIgOqQuHBxwCLfwA9ufwnu0s6/LoD+g76UnrTe3W73zzKfiU/bgCZwYfCqsONRLiE/gUeRawF+YXmBYPFJwRkQ6UCQsDDv0z+Abzyu3T6mPq6up6677soe/S8wL4VvuS/nYCgQbNCWUMVA8IErkS6RA4DlILxQclA7H9pfc68rbujux96vPoGOlZ6svruO2F8Pr04frw//4CcwZqC/QPlhJBFBgWGBgrGVMYkRYUFfcSVg7iBwIC8fyD99Xx0e0x7Czsb+zZ7KjuJvKw9a/4KPyfAJoElweUCj0OZBGlEroR4w/IDawKXgbOAQD9l/dd8pLuAOw+6krpuugn6ajqJe2G8AP1JfqV/mECrAakC6sPfxIPFYsXHhlsGVwYzRbuFLYRcAxmBvYAyPtE9o7x8+4Q7pLtT+1b7mLxB/XW94T6M/4vAowFtwhUDIQPQRFZEVIQlA42DM4IYAR6/2v6LPWk8HXtDusO6fvnL+iO6ejrHu8X87j3QfwwAAkEkQjWDNEPARJzFLQWxxeAF9QW7xXzE+sPMwpVBP3+3/n49ATx1+727Z7tA+747wbzAvbO+Pv7fP8QA1MGhwnpDKYPphA1EBEP/QzWCR8GxwHI/J/3WfMz8M3t1Osh6nzpLeqD60rtV/Do9Jb5Vf2UAJEEGQm7DCwPlhFWFGgW3BaTFkgWmxUdE34OFwk2BJj/nfry9dXyMfHf79XuJO8z8Q70l/YR+Uj8BgBeA0IGTQk8DN0N9Q0bDeULNAqxBzkEBgCw+9z3yPQy8srv2O217I3s7OzS7aLvqvJd9rL5e/wqALoEvAiQCzQOPBFDFHQWlBc1GFgY3Bb+EhYOgwkRBcn/P/ov9v/zr/Ic8fLvUPA+8nD0SPaP+OT7bv9LAvIEBAiyChIMKwyIC2wK7AiKBgYDzv7e+pz3/PTs8h/xYe8K7nftU+2t7TbvxvF99OH2xvmK/YkBKQV0CNgLmQ/mEjkVthbpF0cYzxaJE3YPBQvrBVYAZvv295X1L/P/8DDwMPH68q70dPYX+YL8zP98AkkFLggwCtcKrQopChUJLgdSBPoA5f1c+y75N/d09aXzK/I28YvwWfAU8YPyHPSY9ZD3Z/q6/csAngPOBqAKOw4XEV4ThRUOFyoXfxXFEl8PTQsvBvoAFP2B+iP4jfXW87vzdPQb9Zn1CvcD+uz8S/93ATwECQfjCKUJ5QnjCecI2wYPBDYBnv4//PX55/cQ9nz0CvO+8efw5fCh8cLydvPV88/0CPf5+fP89P8oA00GNgn0C9QOlBFjE5YTTRIoEH8NMgqSBmAD0gCc/lr8U/oy+dD4p/h1+NT4JPoY/Af+3f/WAQkEGAaaB3kIxAiJCMEHcgasBKoCqwC4/q/8fPqP+P32hPVq9MbzhPOT86HzePM186Hz//TY9s349PpH/dX/rgK0BZYI7AocDEYMyAvhCmYJkwfZBWgEEwOkAUEASv+t/if+ov0//SL9Q/24/aD+DQDRAXYDzgQDBikH9wduCJQIcQgTCJYH6QYGBsAELANrAYL/mP3Q+xT6a/jC9iv13fMa8yDz2fPu9Bb2X/cL+QT7H/06/wMBHwJ5AlUCEAKzATwBzACaALkACAFKAYYBvAHpAfMBrgFWATIBPQF+AbsB7AEgAmkCzQJJA8UDTwS+BAUFNQWTBSwG4QZbB38HgwctB2QG+gQnAykBEP/a/Jr6qPhH94f2OPZT9tT2wPfo+Ab6+/rA+yv8LPy9+yb7mfok+sr5iPl2+bz5WPo2+xj8FP0l/jD/LAAIAdYBnQJhAw8EkwTwBDEFLwW8BBcElwNxA38DpwP8A8AEwwW1Bm4HCAiyCC4JJgltCBwHigXlAzkClQA//zT+if0U/dn86fxb/RL+wv5T/5r/j/8j/2P+Wv01/BD7APou+Zr4S/hC+I74Kvn4+RD7h/xH/i0A7gFEAyAEbgRJBNADGwNbArYBLAGbABIAtP+y//z/jQBeAWsCgQNnBMYEngTtAwID2wGXAF7/Lf4D/Qb8gPt5+7X7QPzh/F79Dv5h/pr+jf5k/h3+3v2Q/Sn9y/xR/Pn7y/vJ++X7Hfx6/Bf96v3U/r//uQCyAZoCagMzBNcEPwVQBRgFtQRKBNMDXQPdAm8CHQLqAd8B6AEQAkgCgAKoAsQCzQK7AnkCFQKuATYBqgASAIr/Ff+c/iL+y/2t/dn9KP6B/tT+Lf9+/8P/6P/2/w0AKgA3AAIAnv84/9f+Wf7u/a79s/30/V3+4v53/ygA3wCHAfwBOQJUAmICUwIqAu8BrgFjAQkBmQAsANP/pf+X/5H/kv+E/1f/Cf+h/in+u/1n/TX9Cf31/Ob83vzI/Kz8kPxz/G/8l/z6/Hz9If7c/pT/LwCSAMAA1ADJAJAANgDS/37/Pf8I/93+zP73/lr/6/+ZAGUBOgICA4cDxQPqAwMEFwT7A7kDYgPrAkcCXQFmAKv/Wv9x/9P/YAAZAd4BgQLvAhMDHAMXA+cCkQIDAlgBlADB/+n+OP7K/bn9/P10/hT/zv+bAFUBvQG1AYgBOQHVAD8Acv+2/jH+/v3k/dz9G/7B/oH/EABqALYAEwFAAR8BqAAuALL/Bf8b/g/9KPyD+wT7pvqW+ur6lPtQ/AP9vP2V/n//UADiAFMBvwEIAgoCvAFcASIB7wCfAFMAGgAMAPv/rf9n/17/sP8EAAoACwAYADcANAD0/7f/p//T/wQAJwArAFcAtwAFATgBdgEwAgIDSAMvA2UDLwPIAq4C1QFgAQgBLADZ/2D/hP4p/gP++vyT+wv7FAC7Cc4LUAHP+qgERQ8QCkH91v0ZBfkE+/w99C33Mv3b/Fb6Rfgk+Hv8nwAg/0z8+fyDAUsEzAFL/IH7TAD9Aev++/tA/SIAW/80/i79Wv1c/yMAGABCADYBNwL2A/AErATMAk4CxwN6AnAAjQCoAYcCQwEM/vz8Pv9CAaP/V/0M/pgAfAExACj+8/59AswDHAFU/p7/HwKRAi8Aiv51ABcD0AJt/6b9H/9DAX0ANv3P+7b8SP6Z/nT8hPvl/qwBbQAk/ZL7D/5fAEj/k/wh/b8AsgHD/j38vv1tAYkC2/83/vv+zABhAsYBWgHzAokEEAXoA0ACqAHTArcDVQHZ/4z/PwClAB3+DP2F/tMA9AGOAC//Pv+2AHoCKwJA/+X8lP4CAqEBDP7G+hn9TQMbA+P8YfnC/VQDzwC5/Nn97gEaBp0Ce/oW+2gC8wVfAYT5Z/tiAiAEGQKv+cr6oQUrB38BR/pw/PsEUQTK/sb94gAWA80AeP3I/ZX/LgBu/RH6x/3kAIP9Lf6e/8MD6QL1+3r+IgAbAyYAJ/1WAqoBmQARAF/86wEFAMj4lgSyBo8CcwAU/Zz/dwJXAan4i/UVAq4Ec/cu9C79yAMFBOsA3fu+/ncJ9gon+vP43gnPDfgFDv6FAdcEYQpXAKMBAQPL/DD7Vffg/jH4KP2e+2r1AwDYAfP11PZcBpAEfAJT+rkCegtD/lEBYwWQA0gAaQGKBgID0/TC+AsMKQm79ybvwgJQD28CZPHy+EEK9Ado/TrxaP5vDSX3MPWPAZ4JmAak9vb8BQXFBpoDCvzM/sMI/AcfAKbzj/ujD+wCk/kg8u///xAT+cnweP0rCLkIR/bE9vEHqAiS/x73O/11Cav9z/jU/esCe/6++iL+7v73A7z7Z/v+BfgH8v7f+E4DMQhSALf8qv+yBC8Dw/+7AKT/4Pww+Vf85P9gAsQAPvu5ALwG/AKU+qb7sweSBW8CMvzc+B4Ddvu7Arn93fwgCWn9IgOpAIACoARH/6MHmgKF/i//YPxKAa8EPf/v+Y/7RwGNA1z+CABFATgHMANL/2f+FPur/zL+SAQx/wf4zv1pANf6CvpFAOH/HAKsBvL8rfo9AikDkQMIAcAApvxh/ksD4fzL+8T6A//4A2cBx/9j+Tj+AAXVAVEDVv18/LkBbwYlA376Vv33BPQFqv7o+iL+SAZc+xb8Ngbe/Cv6XPsMAMIHOAFS9L/8zAwFEPr7PPFfBSgM1wL19qP32Qc3CEj/B/cV+PQHigKm94AA7wUHAV0Axf/VBDYEcP8AAGwE5Aab/VD3dPguA/MAEQFh/l75lACX/vX+tPmk/xYCOwLqB4L+qf9MAtkAPgbwAyf+wvrN/ZMBQQF7+Tz8tAKcBPD/9Pae/l8C9/tL+DcAlv8I/bP5ofyNBrYA8gEE/j4Fbw1D/SL67ALACYwF5fnO+ikEQAkJBMv6qvtQBL4J2v0B9bH8tQJ5Bd4AG/yu/e7+7APG/7r8mAH7+2T+Ov93+xP6/PgY/csCWgXy+FT8LwZLAyIHOAM7AloHswM8BdcCu/vE/6AGUQXR/kD6X/Xi+u4APvqI95/5Pf+1/FX7XPlq+UoDpQc4Bcv64QCjCNMFSgKDAE0ElAVvBET85Pzg/Lj+WAKjAIUBkf5FAPsBLANGCE7+c/qCBesFT/y39oT67v7CBPb8HvY0/yUDMgTHAUEAHALa/+wBoARmCH0B+fhDAGwFJwH4+mv7qgQrBDT76vQB/K4FsQDu9z/7igj/BMf7YfYJAQgH+vuJ++n/fwQ+/zD8xAEdBbcEefqr+LYEQAfN/8j4sgB+CrsBK/6x+xcBKgPW/j8Am/wxA64Bjvuq/iz9SQCx+oP2mv6rAcn85vkQAy4A5P9x/lUA0QUkAyL+zv0TB8MEmP/KABEFAwgEAPD7MgABBA0Ew/5l/XD/kwLp/yz77P4DAk8EGP+9/ikCFwEwAF/8ngNrBe3+Kv8r/hcCYwNb+635pgGQCNoAkfaf/AoDaf81/nsBXwN2AN/8YPwR/8sBUP+b/v/+GgCUABD5pf5VBEr7s/6F/Gn5w/9t/D3+m/03/e8BAAGh/6D+yQGJA0kEtgEiAtEEgQHFAMz/6wNCAs0BWQIGAYb/6Ptj/wP+If+P/SX+PwOVAOz6HvkJAfEDCgDe/3UANwTnBFL/8vwTAYQF3AM+AGgA6QJLASL8Gf8pBbwAWv6l/v4BYwVT/iz7fv7rBN//e/iy/M4A3v/5+Yz66P4oAY7+YvyIAaEFpAEY/XMAGAX/BMAAxf66AZYEAQJj/Ub+iQEvBKb/RPmz/aX+4ft1/Fb71f8kADD+Jf/b/gMAa/3u/nABIAPeAin+Cf9xAPwAo/+G/igBVQCm/wABI/+G/ZD+OQLNAuH/AwDv/k3/G/5B/j0BPABe/9X+0ACgAPv+WQBdAtUDEQFWAZkAYAAAAyUDdQXlAgkBjQMpA64Bv/9HA0MDxwNwBKYATf+M/+4CiwGU//f/rP7c/5MA+fyB/A38Vv/PAIb7fPtS+qb70/yG+3P6DflM+2b7Dftv+bX5efvn+0X8r/yt/LT63PyuAAMASQARAboBegRzA+YBvgPXBTsIWQmLCOMIyQf9CloIxgY2C2oKBAtJB5MH8wbfA+8FYwVgBHUF8gNHAWz/CP2j/VH8o/mx+oj5sPbz9sr0lPPj9Dn2vPe19Y30p/Kk7y7yWvM687X2d/U89LPw4e4F9MX1Mfh0+1D9WPwT/zgDHgKdBqcLXRJ4Fj4S/BFoEQgWghcgFeUWrBYvFWoO1grMCe0IpgjqBWcEQQB+/Zn9rvvM+V38AgI9A2sAUv6x/rUAxQKMA1EFngdmBnIBVv2j/Rb93P1Y/Yr5Vfds9Hvu4+de4pzg4uBW3xLffN+L4WPiSuVe67TwvfYR++IB+ArCDUcPcBQWGNkbAhsTGiYcYRuXGMkTcxG2DtIJ4gZvBCQCBgCp/J757vcp9jn1sPUR9k/5mvvb+vL8dQCzBJoHkgcFDS0TQhT8FNgT3RUtFVUU7BK8EDgOyAcyAgP9vfnT8unpSOWq4t/gJ91u11rWGtss3EnX6tk/4OHjbefM61X1S/z4AjILYRB6FSoY5xoZILkjmCQUJEUjwiF9HEkWbxVlFK4NMwl9BeP/xPfX8PLyzvF27XXt5e9a8kry6fFX+R0C4wGfAdwIBBEeFFsUwhT5GGAZ1he6FjAT4w8JC2UFxABS+mvx/ern5svgrtiO1N3Uj9TA0LbNVNGm2PbcF+DT5iLwY/kJ/4IFSA+9FaEbIyGsJiAqTioHKwcsEivCJJMdchojFwcOEAdGAZv6/vVT8JvqVueA6VXqd+qB6tvrxe+38yb4C/xfAiwIpwznEKAUYhbhFo0X1RgDGRAWOhJxDXkIgAK7/MP1QfAb7IfnNOPx24bWaNTk1UnXWdgY3fThQeh47n/zsvhi/4QHQBDUFsEash15IUkmgyacJaon8CerJAMg/BmHFN0OkQbVALr7/fg89cft0ep56B7nN+f35wPrsO1+8pf2oPlB/R0Ciwa5CbkNsQ7ZD4gSURMIES0O7w2TC2kF9P+p+6T2sfAi7GzoI+Vh4qvf9ODP4abjCOfN6BvthPG59Lb6PwDUBGwJwA2pEUATjBRgF+gZvxilFwYYthdiFv8SLRCYDocNjAnzAtEAqwEK/sf5gfmd+dr4q/fg+L/6KPse/fb9vf5QApgC6wAUAW0BXAHqAVYCkP9y/f/92P1g+uP1h/LI8Z/x6uyf56bm/Oe053vnpuha69zuJPLH9P/2Kfu5/28FpAoiDnQRkxT2FdUV8RScFTcXTxZiFMMRuxDnDWMJ9gddB/oGuwVXA24COgHRAMcACgIgBNsEzAYjByUIwAcFB2AIognICWsHZQVpA8MAOf/x/Xf7kfjN9uj18fG07r/r1Ogn6EPni+VF4o7i1OMB43njPOW16QfvNvIb9Xj6TQCrBKIJ7A6TE6EUNBY0GO8XARdTFZ8T4BDXDW4Kkwi9BqcEAgPIALT/kv7q/iv/W/5kAKwC0gPyBKoFhwibCx0NfA4SDpcORRCKD08O5Q0gDdoK2AhhBlQC+/1q+7L40/Nl8KTtROqK5UHiY+CZ3aDcj9s629HcrN2x3/jjcegp7lf0vfpaAakG4Qu0EMwUTxgPGxQerh9yHrkcnRs9GXsVuBFsDqgLXgfRA8AB+/6U/Ej6Eflk+MX3BPg9+lz9p/16/mUDfQifCsELiA9jEwoUaRTaFDUUNhNuEacPZQ3RCTQFhQE6/k75cvNh7pvqEuXI4DneYNtz2Y7Xc9f12BXZtdpY39Xj2een7Lzz2vqLAY4Igg6OE04YnRtNHnEg2CDlHxoeQRzFGJwTCA/4CrsG4AG6/Tr6Q/eo9KTyZ/Ld8qXzNvQg9vn4NPzNAHMFBwnVDOwQkxSQF7gYMxlFGX8ZchhYFJoQ1Q45DDoIWgTCAG38jvc48+3u4+m/5d/hZd692uXWutV51vHWfNee2j7gkuWF6o7xg/gHANoHZQ5zFHcYChyoH0Ah+iD9Ho4cFRu8F7YSQw6qCW4FrwEG/YX5v/fP9nP1V/RI9Cv0mPUh+ET6nvxgAHAERweXCSINuRE9FfQWSRjZGbIZuBhfFzYVGBJ6Dj0K1AVNArr9q/lp9S7wdezU5x7k6+GR3oPasteb17TXitcb2XLdTuKh5pbrHfJo+Jn/Tge/DJ8RKhYqGkMd5R58HpUdxRxyGgYX/BPbDwcLWQZ7AW/97vom+bH25fTC9ET0TvMM9Pz1ffi6+mT9VwDRA5MHJwvYDssQtRJMFQIXKhdnFlUVBxRUEkgPrgvaCOsFsQLO/nH6XPYD8uLt0emm5eXh8t7I22rY8teO2bzaN9z03zDlrerL8FP3kP40BaELFxKdFiMa+R2VIDAhxCCWH54dJBv4F90T9A7MCR8F0wCi/Ib4dPbZ9GLzFPJc8e7x8fE68+r0//Yg+s395AFHBbcIbwzAD9wRABPnE88VQRYbFRgUhRK9EGQOYAsmB6oCiv6V+aX0y+/t6rDmO+K03ZnZwNZ+1C7TxNQK2J7bceDh5iLtRvP3+joD6grNEUUXfBzpIFskoCYmJwEm/CMoIRwdCxmLFJMOvQiMA3/9h/iW9PXwy+2466zrhesV7PHt9e/A8pP1UPnd/bEBEAYkCocNhRABE30VuBf8GBsZkBiXF8EV5RPiEIoMLgjnA1f/G/ol9V3wtOvn5ujhid2i2vXX9tQu1BvX8to43jjiQeiw7pL0UPzUBHILKxGwF6EcPB+UIbAj9CNNItcfDB3GGXIVYBBFC3wGdwFX/D74nfTq8GLuH+4m7pvtae1w7oDwFvJv9LP3c/ua/84DZgi7DB8QxRN5F+sZ0BqrGq4a4RkrGDMVgBHdDQgJvgOn/lr5IfQf78Pp6+Sx4Obcgtpr2CbWSdSp1EHYo9xw4LPkU+r48FX4HgBMByQOihSDGWwdISG0I7MknSTvItsfnhyVGVUVvA/1CbEENwAt/Fz4uvSu8avvZu6G7Q7tYu1A7qfvIPLN9fj5cv6XA2MIiAy8EO8UpBgnG0kcqxxiHE4bOxmeFpET4g+ZC+IGCQLu/Lj3gfIh7cLnBOMB35/bLNkD17rUBtQj1l3Zotw24dDmIezm8Yb5GgKVCRcQ1RVYGscd+SBSIxckfiM/IVwelhslGDMTpA14CF8Dpf6x+r/2ffKN71vuge077N/r6uyt7vzwE/Sf98P7EgHoBcQJIw5EE7YXvBp1HGsd1h3QHbgc2xltFhATOQ9gCoYEvf61+aj0l+7251Li8N2r2bnVptO20krRo9An09HXjNzU4RPoa+4c9WD9bAbbDdUTqBlFHhEhZyNHJaAldSQ/InkfYRxNGPcSLg2mB1kCnv2J+WX1nfGG79buAO4+7bbt0u6h8AP0YPgy/F8A5gVqC8cP3xNFGAMcpx74H1Ag8x/nHqMcwBgyFKwPwAoSBSb/bPnf8z7uUui14uXdwtlC1snT5NLC0q/SYdMF1kfaP98t5dbrY/IU+Z0AQAjaDvIUSRrxHSMgAyLuIlYisiAvHgQbRxfJEo8NGAjcAuX9cPnM9WzyXfB/7/nuXu5s7oTvGvEy82v2ofr+/s8DGwnbDeYRMRa3GkUeLCDVILQgAyBXHokblxctE7wOnAmzA839Nvi78iftUee14QzdX9ky1gnUctM309LSs9Mr11Lc3uHi5yjuevQ9+8YCjQqREZYXShxKHxIhTiKaIpchmR+wHDkZYhWtEGcLFgYWAXz8kfh39ajynfDQ79Hvpu++78HwjPI59fn4B/3rALkFiwvFEKEUgRi1HNIfICEOIVUgMB85HdgZNhWAEPwLxwbFAO/6jPUN8Pnp1eOQ3jbaSNYM03DRLdH+0FbR5dOm2DPe6+N+6gTy1PlfAXkIoA9TFowbtB5xIDQhNyE8ID4eUhvwF6gU2RDuC7oGFgKm/SX5D/Wy8RLvp+097UXtze0J783wLvN49tH6kf8zBI0JPg/jE+4XRhwbIEoi9yKpIoohlB+JHF0YmBO7Dl0JZwNT/Zb3JfKX7IfmkuCs2/LXz9SV0n7RuNBw0G/SJNfm3N/ikOkw8f74lAAeCEsPuRUcGzEfhyGuImMjCCMcIUIeDBuNF+ATWw+9CYgE5v8V+4r26fKo7wrtIewi7CfswOxM7tbwmvQs+fn9EgN+CAwOExNsF4kbVB/YIbEiMCLNIMoe7RvwF1UTpg5xCY8D0/2g+Ljzsu5W6Znj890h2VHVZ9L30MXQhtDD0EjU1dpc4VnnGe449Tj8pwM0C8MRahcOHBgfjiB5IfIhRSGgHxkdshnPFaURbAytBnEBl/zn977zLPBM7ffr0esm7A/t6+438RD08/eJ/BcBDAaJC4gQqxTYGNkc2R9gIaghwiBnH4cdgRqJFlIS3A2vCBMDxP0f+ePz1O0/5wfhnttN1xPUvNGE0EzQ9NBz03zYE96o44/qM/IN+Q0AjgclDtsTbRmGHfEfkiFnIi4iziCLHsYbjxhHFAcP9QngBGT/L/oR9pTyS+8G7Sbsset669vsR+948UH0ivhp/T4CqAcjDdoR/BUfGr8dJSBtIf4hgSG+H0QdMxpSFq0RbgyfBokAv/o39YfvL+nX4ifdT9g81ETRyM9Wz0jPTdDU05HZvt/65f/sovQn/JUDyQpyEXcXYRyYH9QhRyOBI5gitiDTHSka/BU3Ef0LhQYVAf77P/f68mfvbuyO6gDqLur06tjsnO+e8iH2hPqA/9IEaAqRDxIUXxicHBEgWSJPIzMjSSKyICweqRqFFscRQgxDBnMA0/oM9fPuoOiD4gXdTNij1K3S09EO0dTQkdJh1kvbH+Gn53buxvW6/aIFpAwZE8MYKx04IEIiRiMrIxQitR9kHJYYHhTNDgMJRgOV/WP45/Mv8CPt+er76QbqpOrO67TtafDx8/D3SfxtAVEH1QxgEacVLxo+HhchqSInI88isyG7H6wcyBhlFI8PEQolBG3+5vj58nTs7OUU4ObaYNbo0hrRmtCb0CbRbdPX11zdPuNf6QLwO/f1/nYGPg1rE+IYKx0HIPgh7SLqIsMhOx+1G5oX0RIwDfgG4AB1+872r/IN7ybseeoA6oDqV+us7NXuHvIk9lb6zf7JA1wJxA56E8kXNhx1IJAjDiVOJfIk6yO/IRoeehl8FB8PFQmTAmL8pPYH8S/rY+Uv4L3bQ9jf1cPUs9Q+1UfWY9j325DgmOXG6mzwwPZU/b8Drwl2D6YUrhhQGwAdCh5RHoUdXhsfGHcUQhAoC9gFXwCh+0/3W/Pk70HtresW61jr/Oso7fPulfHo9Mf4Nf1EAtcHdw2mEnEXARwbIB4jxyRTJSIlMyQ3Iu0epxq9FUkQMAq6A3n9ovfm8fLrL+b54FzcpthC1mvVwtV/1q3X8Nmi3UniJ+f+62DxcffP/eYDeAnCDqETnRdWGhwcTR0FHocdkxu9GIkV0BEnDbsHGAIH/Yf4SfRT8BPt/Oon6jjql+pr6zftF/Co82L3gPtzACQGowtVEK0ULBmbHdogkyJRI70juyOWIuofKBzuFz0Tkw1ABxsBTfuC9RfvrugB4yneN9pt1yLWBtaD1pnX5dmu3Tni1OZf63HwJ/b1+4gBvwbmC8kQtBRSFzcZwRrEG5YbAxqJF70UQxG5DF0H5wHe/BP4gvN272Xsc+qd6ZfpHOpc613tF/B/8z/3cvtLAIUFuAqpD2oUEhk4HWAgaCKXI1okTiQQI4YgJx06GYwU9w7mCOAC9fzR9mbwROre5B3gKdyb2cHY9dim2STbwN1U4YTls+nG7WjyiffK/PMB/AbjCy0QbxPGFccXPRmtGeQYLRfSFOERWA5HCswFKAFz/K/3LfNp75Pslepc6fboV+mJ6mnsBu+A8oz29fqd/zUEtAglDaMR8xWrGckcfB+JIbMi7CJAIvwgCx84HEsYbBPvDeAHNgE1+nrzSu2K5/fhWd1k2kLZV9nR2ZHaUNz239nkLukE7a/xU/cO/aMBqQXXCg4QgRMeFhAZCRsKG6EZKBe3FEcS0w7TCrQGyAFQ/PH38fP07xftUetb6Vrn+OZ16A3q5+sM74nyOPbH+rf/bgRSCW0OTRMCGEkcmh8SItEjjCSuJO4jOyIPINwbPhXCDlsJQgJn+pD0iO986crjCeB+3ZjbZtva3LHd4d294InmUOsG7i7xB/c7/sIC2gR7CWcQGhSUFAQW6BeeF5UV7xOgEZMNDQqwB8oDZ/5c+n/3pvPA7xvuTO0L7KPraexd7dvuf/Hu9Eb4BvuW/ucCLwa4CaQOXBKhFJMXfhqdG/8bjhxMHNgaHRl4F8gUVhDlCkgGQwLC/AX3PPM576zqDeig5pTlEeYJ6Fzplul16uDtLPQS+Dz4v/lh/b8AcQMmBlkHcAiQC1INbwvLCpMLkgomCf8GyAOqAHD/Nv5i+qX2PfXf9O7zlvEv7wPvWfAc8SHxdfKa9rX6JPwH/dz/gARICAEKTQzwEO0SgBHHEvsVkxZGFawVyBb9FTsUgRJ6EMcNIwq3Bj4EzP+u+XL3k/b28grw7+6a7k/uJe227Pvu4fGz8xD2rffy+H79nwFtAWYBHQPjBJwGkARP/5z+8QFZAkH+hfut/gIBa/3X+TX83/2J+ir45fgU+U/4Ovnk+Wj6/Pps/Ab/UQFxAg0DwwQRB0MJPgqZCowKLArKCm8LkAoRCkALEQzcCX0HZwcHBoYEzwNZAm0BlgA2/639Y/wm/NL88Pul+WD5TftE/O76Yfpr/ZsAwv+2/skB4ATiAuH/VgK5BH8AKvyW+/v4NvPA8S7zy+8R63/q9OsW6+Hrk/H/9Qj3TfnN/e8CiAcYCx0N1A5hEDIQ6Q/fEIsR6w13CXcIHghABs4DhQLdAMf+Z/5O/mf9cvwS/G38NPy2/N/+tAASAdIAJQIHBpkI2QcMCDILDA79DPcKZAsmCywJyAlKCgYGcQL0An4Caf6H+2f6uPZX8b7uS+5065vlHuGn4ZPjouRo5iDp5+pQ7rL15fw8AlAHyQv+DNcN4xFvFfEUgBMTEuwPaA4TDYALuQkMB6UDbAGBAF3/vf0V+7j4rfc19/f2hPaf9Pj0+vhz+sj5SfzbAfUFmgjMC74PmRORFPITxRSgFAcSaQ9mDtcMjwgEBt8F+wLH/nj86/pk90/y8e8J71vqWORR5Ork1N1J2SThDepH6NfluewK9HD2D/9xCrcMew43FOYYCBtkHa4erBs9FvIRhhDxDn8KrgPL/Rf8IvtN90D1dvaa9c7ynvGF89r18vWw9cL1yPXG9zT+TwO6AjsEgwr8DsAPaxGgFXAXmBVgFLgUGBX8EzoS6g/yDLsIMQNG/sv5bPSK7obpzeRd4Ebd49oO2I3UatRT3Snoz+nb6OHvf/unBJkKtBGlGrofwiB1IQUk/yX7I+0eqxiXE9QNvgf8A0j/c/qk9mfxJe5979jwLO9h7FztDfHY85X0MPX59vz6SgCeBKgFSQYpCmkN2Q+ZEeAUVReDFrkT2xKnFf4XoBXsDScIiAWSAVP75vRS7nzm4d/U3Pfat9Yq0kDRFdLw0ZTXdech9PryWPPWAL8OdxXvG3gjmyaTJtQl2SQYI/UfxxpdEqgJJQSaAUT+UPix8lLxAvJb8RzwB/BS8vn0y/RC9T35d/xV/U3/VAIaBVgJZQ2VDkYNDg6wEk4VwhJVENgQZBA4DyoPmw1JCSkF1AJKAFr6wfFK69LnpeN130rbMtaK1Rjactsj2S/grfGJ/Sj/WwFXCXITOByEIdIiPiSuJsIk/B0hGKwUVRDqCIj/EPnh9tr0HPFM7iPtqey57m3y1PQs9gb4lfpO/Sj/6gGyB74KHQZ2A9oJpw7cC6kInQmDCzoLCgr0CRkLyAujCpEJwAd2BKgCKwH8+xH2T/Il7n7p4eS94c/f3d7f3trfIuHK4l3rQ/pWAqoDswqKEesSMhYtG94dnh3sHcYa1RGcCmgHowSQ/Sf2YfaF+MD0NfIt9a33tPhU/PABFwVyBsgHKgmeCLYG5gVTCHwJgAR8/6b/3wGZ/3P9qAAtBEEEXwNABIMFNgWmBKUFMgUdATH8gvhb9M/vM+3Z6zjplOa/5vvo8upN66ftJPkzBm8KAA4vFlgbrRhEFM0VnBmvFMkMrQosB4kAM/vf9XbxrfHd8RDwmPAc9KT4P/0aAWwEwQgsDd8P2A+zDqUPnxCBDWUHPgNGA2EC/PyI9r/09fYF9zn24feX+oH8Bf2h+wL62fpj+/z4Tfdd9TbxfO+97UPo7eU+7Ff3z/7b/r39DgM3DXkSJRI+FXAZXhgBF+oVbw+ECIsHBgceAK75CPeB9s/0eO/b637u4/Pt9l34QPyLAwoIFwr1CxsNDw87E6EUxhA7DHMKAAouBlMCZwKeA54BpvsH9bXzpvao+Lr4Avo9/Pz6l/bE8Irtvu6W8H3wp+6i7v7wQvTS+oQAkwFpBBEJdQmCDQwasCDWGuURxQ2oDvMNGQWy/2UE3AIX+W30X/NX9Hn4NPai8Ajyr/VL+Jr8pf6i/wYDNAYBCKMHygUlB18KowoNCyAKAgXyAloF7gXMAUD64vZz+hP8j/aL8ePwme9N7kzuRfHO9tv4dvWE8kf2hP7IBaYKlQp8BmMFiwcGCQwLUg4oDM4GvQVaBh0GcQTEAkIEegV5BZUHvgXpAFMCNgTtAiID3QQ9BZ8Ax/oA+pL9H/8b/GD8b/9W/Qr6A/tO/gkCWgP3AEr/vf4Y/uf9BPs79sX0TPbj9CHwK+7u8Ir0xvNO8tr3LgA3BMwFeQlGDuoR7BOTESkLIgnhClUILgGt+9z5/Pcm9un13/dN+0j+fQIKBuQFLAiuDHcMTArMCi4MNAtMB9gE/QWXBGEBYwJoAywBNQC1AE//bPwg+8j7NvyC+kP4iPdl9fbwZu4t7RDs8e8T9hX1ye+974n01voI/mgAwwqFFYQVig4ICioJJwgnB1MFfgF9/8cAIwH2+373qvrR/cn7yvnz+S78Lf/f//wAuQVrC14OLw57CwUKWgtuC8EIJQYlBQ0EuADs/Iv9VABHAVECcAPYA1oF6gS1AW0AKv+f+8b4+POA67jlkeAf1hXS49t86X/2FwDnBWsN7xFhEskX2R2rHsAh/iF0F0ILNf/88pvuouxu6lbvfPTn9/T+3AMKBNcF9ggyCr8JrAYzB24IBgLd+1L/egEvAGEBe/93/kL/UgCpA1UHjwdyCEkJFQg+B4QGqAXJBJgEIATYAEj52vMA8M/nOOJE5Mbj6dy410fXoN5p7x//HQqIF3MgmiENIl8eghjgFxIXmBAuCJ3/UfY07Lnio9684nzrtPQo/gAHEg7FEo8VmhdLF9YThhEwENAIIP7l9+70CfKm8OPySfcA/K0AQQMoBUcHkgkwDtcRmRFdD48OMQx2BJb7P/SW7s3rcehv40LgP98M4Dzfitp225/qov3bCRwUTSCnKlYtYCiWIAIaCBVFD3wIlgBX+a7zWewc49fdZuE963D1KP6aB2oRRxYcFS4TxBIHE+8TIhKIC4QDfPte887sEur07Pb1TgGmCHgMpg23Cw4ItARRA9YDwgZ6CI4GDgAV993tfec65J3igOQJ6I3qduls4njbPeBt7fT6DAmEF48nkTGkLTMjcxsOF+sRIQ2vB/oAcfrp8CflcdzU2xjkNPBB+oEDPg0cFGkVARP8D5sPnhBVEOAOzgqsBJH+7fgm9HryufU2/S4FtwvVDiMQ7Q8rC6wC7vr/+Gj68vzV/Zz6R/Wt8WDsAeXH4d7i1uZl6n/oc+RZ6F7wUfUQ/CwH9xUII2IoaicNJbciJB2EFpEP2AeaAFb36euc4s3efeHZ6dTyW/tBBeQNmxHhEVkPDgyCCw4MOQsjCQoGMgLa/pT87/uZ/ZcAtgNQCKQMwA4dDv0MQwquBCYAjvui9471yvLR7tDtc+417Y7qFOcT5JzjAuNP32nfSufl8/EA5Qp4FYkimShrJEkf+xsXF5oRYwx+BvsAZ/qM8P3mwOBN39DkFvDw+10HHhPTGlEaBhXiDgQJlwHx+pH6X/yL/JD8Nv3n/YsAjAI9Bf4JEA/hFFgYaRZiEUgMUQb6/zD5NPP98WfzGvKt7QXpVObY5HDixuBi5O3pfuol51vmuO8iAV0PjhmSJVcumy2uJtwcvBNpDbIHNgI6/er2J/BD6eTgmNxO4rjvWf8sDC4W7h+tI5sd4hTDDewFgP4q+Rf1cfJV8b7xb/T9+HX+BAViDA4T7RZ2GDoYExYNEpcLegTn/Dv2LfG066/mkeIH4MLgtuPm5NXkTuV95nnmfOK74jPwjwMYEoIfIi5mNoczuCgJHTUTmwgD/8761fdG8qTrPuSY3dPb7uBp7Cz7nAlLGEIlsSmQJc4fqxnhEHYHvf+J+Sr00O5C6/rrtfAW+BkBxAnKEMcVHheGFOAQZw2nCRoGwwHk+1D3dfNJ7KXkxOAv4KXhouJA4c/hguSv4XTdVeQ89ZsFHBUzJnU0jDmoMzgqSyGHFSwIgP/C+mz0Oez5437cF9gG2bzgaO7a/fcMfRvQJOgltiI4HqkYYBLIC0MG6wFq/Hb1w/BF8E7y1vXA+64DCAzuEXMTshK1EUkO1wfvAmYANf5u/Mf4E/J17LHojeNr36rdkt0T4Fvgwtsq3LHmc/MaAJsPVyDMLb0y0S5SKIIgEhV0DL0H4wDv95HtjeIn2s7Vvtdl4nrwlf6WDr8bEiDAHkEcrBhRE0ENYAn6Bj4CpPtq9zb3J/kV+zr94ADPBUkKLw3HDqMQrxCpDAwIkQMf/uz5h/cZ9ery3/Ao7dDoOuQy38nd997F3Lnabt+X6Yf1vgFBEHchkix4LJApjicrIS8YcBEpDKYFg/xh8PrjG9ph1XjYTeMI8fv/IRB0Gs4aYBgOF5UTGg4UCVMHGgchBMf+sfv1+pf8kP67/+gDcwrXDlcPKQ9ADyINoQjzA1b/gftE+YL3zfVx8xbwCu7p60PnB+Sb49viEuDi25PaFOLC7xP84AlkHBwqeC71LusqnyJRGngStQq6AsD4qe0T5G/bv9UE2PXh3+7S/LkLWhcrHMEboxnZFYMOZAbNAW3/5Pt0+Bn3dfn9/UsB6ANsCMkOIxPoE+sSEBH+DccJMASX/iH7KPlx90r1VvKO8OjvAe7R6+np8OaR4w3ef9X90fXYEuYG9vEIrBuhKVcwOjEcL4QqYyRyHe8V4QyHADryaOSu2FbTkdU33fDo/vYjBdAQYheDGiMdnxzVF7kS/A0oB+r/wfmk9aj0HvYs+Ev7EgE+B+IKaAytDQUOsQyaCVcFvQJfAnwBZ//d/AD6kfa48YLsjugP5QbiJ97Z1hnQWNBp1s7f/e4EA6AVNCNsLY40TTZyM6AtXiYFHq8ROwJc84Llvtr61fnWc9zg5YbxhfwnBV0MaxNPGGEZbhnXGC8Vtg7LBksAgPy7+eD3DPhk+4YBkQakCJsKRwyKC84I3QUPBH4DwgOPAwkC6/9n/e752vWw8ent9+rM5qbe/tS0zvXM286R1gHmK/l+ChEZGSYOLzYy3TFTMBwswCLZFeoHSvl869vglNsl3LTgMOif8RT60gDlBywOZxHEE4MV+BRpErUNEgh7AzkATv7//GL8gf65AicGIQknDFMOTw7zCx8JHAZLA8YBKgEwAMT+wvwC+hT32vNG8ZTwH++I6I7fDNkO1NbQh9NY3gLuYP3iCi0YPiMAKaUs7S6yLH4mkB20EK0BnPNP6CbhWt/K4uXq8fMN+5ABBQeKCjkMmgyWDIkMfwkUBewB+v/M/nP9xfwF/n4BfwUKCSMMCBBqEvcQEQ6xCl4GmgHl/ff7oPvm+lv5m/i69wz2y/R39B3zEO+S6CLiAt3G2C7YH90t5pvxiP5YCzQWcR6LJA4o7yeRI24bOBEOBgH7FfK67Z3tMvAc9cz7/QFZBiEJ9wnOCCYHpgV8A4kAqP33++L7P/3Y/jMA0QIwBkAJuAt+DnsRABN+EpQQmQ0SCccE8QAQ/Zn55feI93H2gPWI9bX1cPVR9BLw4ekr5cHhB97h3IbhtOle8qj7owQXDNYTcxpLHQwexRyZFx0QfgjkAHX6sPbR9b33Ovv4/ssCkwWnBsMGtQWnA08B5/75+wT6xPk8+ur7k/7tAEkDswY1Cp4MjQ7SEKASIBJ8D2IMRglYBeMAWf1A+w36L/nz+Nz4Ufh09631BPG66krlJOCy2mzXoNnn313nNfDT+jkF3g6CF+oc3x4NH0ocbhZPEG8KswSyAA3/hv7q/gMA8gBGAZIB5AHgAY0BZQHMAFP/kv63/k3+IP49/zgAKwECBLoHjwp3DckQahKGESMPWwvsBj4DTQCx/S78hPzh/cH+cf4R/t39ovv69YLv8elS5Ereodlp2N/ahODy5xzwrflBBDIN1ROVGXYdmB19Gz4Y2RK6DPIHXwSeAUsA1f/F/jP9avyD+5f5b/gz+Yf6vvsi/SH/JAHnAcABmwHvAY0DaQaJCTYNwhHwFKEUpRE8DXMH5wA2/KP6ivrO+r/7U/1M/rj9BPyn+u/4OvXc75Xq1eWz4LTcsttG3UThPug58Sb7vAVbDwUXDx3gIDAhuh4UGtoTmA0PCCkD3/+d/hv+WP01/HP7wfqH+f34CPr8+2D+5gCSAoQDUASPBGQDNQL4AlAF6wftCiAO1hAVEgURlQ6sC74HbAMAASMAi/7e/PL7IfoA9230JvKb7xfteer052LlYOKD4I/hvuRc6cXvo/eOALAJERHuFU0ZRRuZGgAXThJ/DakIfQSBAV7/V/5N/vX9+Py8/Bz9pvwX/P/8Rv7s/m//lACJAYgB5wBcALAAPQLPBL8H/AqbDsAR3xKwEbkPog0GCyEIggURA9b/cfsJ96byRe4p62/poedt5Y/juuLU4t7jk+Z167bxGfif/nkFWwumD6wS4RTiFQoVYRLpDjYLmgeoBL0CmwGjARADHATmA00DrgIeAbH+pfxy+0f6OPl++Wf6YPus/Aj+Pv9rAWgEjQfsCqgOGBJRFCIVHRWCExAQywxUCg4H6wJ1/hn52vL27OvnjeQr4//h1N+t3l7gFOQ/6CDtl/Pj+v8BGgjeDKkQTxOAFD4UtBLtD3QMcQhtBLMBQAHxAdICYAS1BiEIiQeLBfoC0P85/MP4NvYv9an1U/bb9qD4qPsj/gsAwwK+Bu0KMg7oEIsTPRWlFegULBOAEAINFQnbBOv/yPqg9TXwKOsb57DjQOAo3CzZFtre3Y7hOOYc7jv3Af+lBWcLCRAgFLoWKRY5FLESQRAgDG0IagYGBlcG3gYPCDUKYQtrCi8IgwVeAqj+e/q39mP0RfOC8kjy6/Oc90X7MP77AUAHVAwoEEsTIxYGGDMYNxb2ErMPewxQCDgDOf5l+gL3zfJ37vXrm+rT50rj/N9D34HfzN+j4Svmcu0L9uz90QT0C2gS5RWGFgkWWBQNEdoMEglhBkUFYgWSBWQGlQh1CvcJ4AekBdEC0f43+m32F/R58vTwsPCL8tj1XvkV/XwBvAY6DMwQFhSgFhAY0BfsFQ4TsQ/0C5wHygIj/pP6a/fd89jwoe8U75PsdOhq5QLk3uLV4Yfi7OWD6yby1PiY/x0HVQ4QEzYVkhbtFgUVKhFVDbYKNQn0By8HvQcxCXMJAAgUBkcEwwEC/sb5RvYE9D3yjPAS8LfxwPTV90D7CADsBYsLExDeEx0X3Rh0GCoWIRMSEHEMpQfkAlf/mvyK+YT2O/SN8qPwr+2h6RrmVORm407iGuIq5HTogu2O8lr4x/9FB8AMpBBRFB4XbxdTFd4SMRG6D0kNxQoQCmEKjAkUB5YE+AL1ADf9svhv9b/zefIK8evwAfM39hf50/vM/xsFNwpNDtYRExXiFr4WEBWrEtkPagxMCEkEIwGl/pT7FPid9XP0w/J978jr4+iQ5iDk4eED4Sni5eS36KTtjfNT+msB4wdjDTgS7xW6F7QXIBdJFrcUeBKQEFYP3w0LC4sHfQTXAbz+6vos91r0XfKv8Ijv7+/a8Ur0w/YE+o7+6AMxCdANvRHjFIAWURaGFNQRNA++DM4JwAY9BEgC/f9F/d76QvmO96702/AV7c3p3eYg5LrhueDE4U/k3ef17Ljzq/q8AD4GuwtZEF8T6hS0FRgWtRVIFKkSlRGMEGIOUAtMCKMFiQJ+/ir6pPYB9J/xUe8y7tDuZ/Bc8hT1CPkL/lwDHAgRDHgP6RH5Eq0SahGkD7cNsQuVCYUHowWMAzQBCv8d/ez6Dvi39E3xF+5i65rozeUm5Brk6OSj5v3p8O5h9Lv5Jv+QBFcJFg3iD9QRTxNtFN8UpBRDFN4TtRJ9EL4N+QrdBwsE8P9S/B/5MvaD853x2PBP8Zfyd/Q09wz7Rv8xA+wGoQptDfYOkg9LD1gOzAzdCvoIfAcqBpcE1gJLAQoAm/7C/JT6Sfij9YTyDO/J6yDpNuce5kfmo+fk6bHsGPA29AP58/1UAgoGXwlYDMkOuBBLEowTPhQDFCQTGBKwEG0OVgv6B7sEYwHa/Uf6b/eO9WX0uvPr8zf1p/e3+vP9NQExBHgGzwd2CKcIYQiQB1YGIQVkBBMEuQNQA0wDsQPNA2QDgAJlAez/2/30+p33TvRe8d/uC+2+6z7rf+tG7IHtsO+q8u/1YvkI/a0ALARvBzoK0QzQDlQQOxGhEX0RlxDzDu4MogrPB9cE4wFw/4f9Afyd+sb50vmh+tf7W/0p//oAQQLsAjkDQgPiAhQCIAGIAHAAtAD+AGsBRgJ6A78E3wXXBmgHPgdXBsoEugI4AIn9xPoM+Hv1L/Mj8ZLvue597qTuUO+l8IDymfTh9nH5Xfxo/0gC+QTBB28KigzyDbwOAA+jDp0NFgxKCn8IpgbIBBIDywHfAEYA/f8KAFEAfABMAND/Mv+C/p79nvy/+yX74PrS+gP7lfui/BH+vv+RAXQDOQWgBn4HwweAB7YGZAWDA0IB7v7F/NT6E/lp98b1WvRY89ny9PKK83D0dPV49ov30vhg+kn8if7kAAYDpATBBXUG6AY4B3EHwgclCGsISgjmB3oHIQfWBokGXgZMBhkGdAVeBBwDvgFHAMn+hP2R/A/81vvR+x382fwF/kn/qgAdAmwDfgQOBRkFvgQgBDkDHQLoAOD/Cf9i/tz9iP1k/W79ov3M/fD96f2k/Rj9Vfx2+6f6+vl3+Q75vviF+G34jvgB+dP5/vpY/ML9KP9/ALMBpgKFA2gENgXVBS8GHAalBeoE2gN/AjUBLQBu//P+v/7P/jb/7f+5AJoBhwJpAxQEfgSwBI4EKARxA2kCLQHw/8r+3/1Q/Sb9ef0k/hL/IAAuARsCywJFA3EDVQPtAmkCrAGgAGP/AP6J/Bj77/k2+dr47/hU+fL5sfpW+/z7q/yI/Xn+if+OAGoB9AEdAukBVAGsAAMAcv8c///++P4Y/2D/t/9CAAUB8wHmAswDkAQJBSUF0AQLBPsC6gHgAAAAUv/z/rL+nf7G/iT/k//+/5MARAHzAZ8CRAPiA4YEywTxBKoEGQQhAw0CzgCj/9P+J/7M/av9xP3T/eP99v0R/jn+Qf5L/lT+VP4u/ur9oP1B/cf8Pfy4+0z7Avvz+kj76Pva/AT+S/+GAJEBYALWAvkCrgIsAnEBlwDD//v+Uv7O/Xv9Sv0o/SL9Kf1A/XP90P2C/lz/XQB3AaYCugOYBDYFaAVTBd0EOwRvA5kC6wFbAfgAxAC9ALwAvgDGANMA0AC3AJkAhQBwAFAAJAD0/6r/aP8s/+/+qv6N/or+of7W/jf/nv8MAIAA2QDtANAAmwBcACAA5/+z/2z/Cv+k/i/+pf0Q/cr8q/yc/Mj8NP21/VP+Cf++/4UAZwETArwCXwO6A9sD9AORAwoDqwL/ASwBIgGCAG//XP9A/jX9G/2R/Pv7afsm++36xPre+ln7o/yi/vABnQbyCkwPNxRjFMwP5gzCBgz71PNU8QHuvfD4/T8HNQ3aFl4YQxGiCSQAufK36G3mbeR75izv1PXN+gMADgU6Amz/6gC/+if3pPnc/P3+TwbBDnAQ4BIOE4sOKwdW/hn3i/C/7YnuhPFC+CT9IAFUBZcGrwNg/ir8Z/is9fL3avvt/zcE4glwDsEPcw9rDVIK7gT+/2/8fvo2+vz7dABWA2EF+AcUCF8FBwIW/3b7bfiB+Jb59Pnw+xn/WgBSAG8Ar/8r/sX8RfyQ/ZX/qAF8BK8G8AZ/BpQFjQIe/xX9+/vl+hr8p/7l/1YBjQJTAuQAsP+Y/U77bfta+936qfys/gkAxQFPA7gDDAOyArwBqACh/83+Sf81AfgCDASiBQMGfgSdAsn/pPx0+oL5ifoX/MD+0wHOAx8E5QK6ANv9v/rA9+v2zvcK+sX9CwK7BcoGygdpB/YDUwHm/Zb6oPi7+HH5Nvvq/6oCCwU3CI0IdQa3BPMDbwEeAGAAwADvAi0EVgQMAwQCQwBR+xT5wvdG9Vj2cvnP+179iv8WAoACCgAa/H/5zvfz9oP29/bR+NT6Gv5hASEBeP4M/1n/Tvzk+0f9NQDFBoMNnhK/Fo8Z6Rj8FSgRQgknAuX90/yt+y38XABnA+gFSwd9BGQAbf2a+JXz/fBq8C3zYPhu/XEBXAXiCB4IHgXiAfj88fjL9lD1LfOI88H1sPZX90D2CvWP9Hfy8+4s7hnx1PMN+Pv+2QWDC5MQ+hMcFZEUeBNgE9oSCBFOEMcQPxA+DwcOVwuLB1oEAwFf/JX42ffa+KX5yvtq/hoAWwKcA/gC/AEGAOD94Pzg+/H42PZR+Pn38/Sm9JH0KfJY8bvwwe5x7tTuHO/n75ru7Owt7rPuUu6y8N317/32BmcOHxfdH00klyefKXQmpSA7HMsWzg4+B3YCAP8J+5n3n/Vo9FHyQ/CD7g3tLewf7oDyQfZH+sf/ngUuChwNEQ9rEHcQ8A/pD9kOLgwVC18KmAfWAzQAWvzc94rzT+807CnrUOsP7bPve/Gk87j2/PY19CHzBPPu8APxHfZr/GYD2Q3wGfwi5ScXKworPyVTG4QQ2QYl/QvzLexW6dLlFuOe5KDlUOTV5Yfqp+3I8JP3IADDBzUOZxOtF0QaxBuwHGob9BcPFioXgBT2DhQMrAnyBEf/7viA84PxR/Bv7rju2fC68kz16fbv9GDy7PAQ7Fbket0l2KrVANd/21LkSPP+BIAVIyVMMeM2vzg6OAkyPCcxHNsRTQjT/tf0Ze266OzkieI/4eDgw+Jv53Ht6vI8+Y0A4AfKD6sUchc0HH8fGx9JHUMauhUMEbgLMQUU/yf73fem9HPzj/KT8u/2PPtI/N/+2gHx/537xfV/7NzidduT05vN08wszhfV4+MK8gD/aBHpIhIvlTl0P+RATkCFOiAxTiYBGaQKRv018UPmDt6N2ULY5dik2qfeFuTw6Nfv4Pew/3cHOg/iFzkf2CNoJjUnzCNqHOMTxwmo/vf1wO8N7AXtr+8Y8z75V/5PAEwCQQLH/4P+4/td9nDy8e6p6dbkL9812NXTStAwzn3Sltom5xv7ZQ9bH3gw+UC5SVVKdEZHQJw2KigEF0MGLPee6QbgHtpd1HfTpNeY28LfsuRx67X0B/6gBuUOtxZRHjwjbCR/Io0d0RiqExsKpwBk/BD6Evic+Jr6EP6qA3sIuwqJCusJ/gjbBcv/Tvf770jrq+Wj3nHZJ9ch1srUbtM01K/YKOGH7WX8qwtIG1orZTroQ5REFED5Olwx1SD7D5YBIPOz5+fgQ9xl2tLawN0C41/oj+yS8Vj5PAEBBl8LfhJOFi8Ykhm9Fz4TMQ6UCBgDU/5H++n7n/8HBFsI8w1EE9IUDhQ4EnENlAYcAPz4VPBt6DDiJt2H2XLWkNOr0irT7NLz03fYJd+b6MP22wVbE7siWDIhPLJAN0HtPBY1fCpDHeMPWAS4+WLxjuzd6BHmIuZm6Hbq1+vl7XvwAvM49cP2uvgB/AP/3wFBBeMHYwrfDF0PqRH7EksVhhkyHSsf8h8pIM8egxl0Ec8I2v4r9Kbqn+LS3G3ZJdgx2ezazNs63L7cy9sU2XXXFdkT3LfhH+0v/AgMDByDK0E5f0KKRfFDnj4rNfYohBwwEAoEiPn08SDrmeVa4rbfj97R3mffpuFp5RrplO0S9Dr7swFLCDgPqhWeGise1CBDIh8iFyFFH6sbjRYEEeQKwgRW/nH4f/QN8XTuUu0M7bLtxO1t7G7qM+Yu4Gjap9T4zrfKcssV0s/aMuaN9ngIfxmtKdQ2tT//RItFYkECOhUwYyTGGLwM4P/F9Cvrg+LE2ynXNtSU05jVpNl033/moe7H91oAFghJD3QVbRoTHi4g5iCGIDofjB3lGkkXGxPnDrIKRAbqAR/+6PpK+Ej2OPSt8mry4vEu8P3tX+u356HiPd3b17TSFM9Qz7HT/9kb417xVALzEQ0hQTDBPORDFEYzRJ8+VzXVKHAaews7/fzvmuXm3iva+ddJ2WPcB+CQ4xjoBO568+b3fP2sBPkKIBAzFU8Zrxu/HGEckBpMF2MT2Q/NDGAJ7gVIBOID1wLeABL/Df7L++T3bPNT7xPrmebT49vhrN4r3Onbgtsa2nXZ+9v34brpdPIU/lkMPRruJkkx+TckOyg6ijSyK5AgPRQRCPP80/Mo7SjpmeiY6lvtHPHa9Xn6xf0f/z3//v7F/b37i/pV+kz64Pog/SkAfAIOBSIJrw2EEXIUkhdTGtsaixn6FnISywyQBqj/qfgP8tnsIOnH5WLiEuDv3uLdltxw27Hbwd3U4D7k3ejN72z5dwPWCycUOx3lJIspTCsSK70o5iOYHd0Wpg9KCfEEjgHj/ij9VPyI+xr6ufcv9bfybu9I7E/q4+jv52bpK+2u8ej2gv2eBeYNoxTdGr0gdiRyJawkXiIgHhsY5hAcCQgBPflA82jvMex06TjoyOfR5k/lY+Ml4Une29u82oLaadvF3rbkMOzE8+77tAVzDwsYgR+0JR8qcCzQLFcrpCdiIu8cPhesEK4JXwPW/R74OvIY7SPpGeYf5IzjmuQA55rqWe+W9Az6Z/+KBMIJhA54EhYWBBqAHTQfVh+uHvEcQRkXFB4OegfCAFf6cfR/767rzeim5ufkH+NC4c3fst683Xjc69vi3Vnh2+R76SDw9PcoAKAI3hFFG8Ej/yrcL6QxnjB0LR8onyDNF+MOwAZd/6P44vJG7nbrjemy51LnFunm69ruZvLm9lr7Dv/AAlAG1AjKClANBhCLEawSnBQNFrAV/hO0EdcOOgsjB/0CO//y+1b5PvcB9Tnyne847erpf+VU4X3eYNxD2vbYBtro3Yrjx+kr8ZH6EwV2D8EYgiAXJgApOimsJmkhfxpbE6YMWQbwACH9YPv2+pL6OPo3+kT6JPr8+fb50fn++fP6/fsj/LT7pfsk/GD8p/yB/hQCawZsC+4QERbUGd4bGBwDGtMVcxARC8oFewCG+y73N/Ng73rrzeds5GvhRt/J3dfcAd0d38Hi2eYD6+fv1vV4/BEDJwm+DuITDhiTGmQbExveGTYYSxYBFOcRjRBoD/kNXAwYChoHzgNRAH/8jfgX9avyFfHO7xrvTO8j8HjxbfNT9rX6NQA8BjcM2xHRFhwaGhuAGYgVHhAmCmUENf/t+sD3jvXs803ylPBY77XuFO4o7QTs++rt6ZrozuYD5Sbk8+R754XrHvFK+M4AUwnYECMX8xtKHyAhRyEUIEweXxw8GpcXRBSUEJkMLwhDAz/+ovn39RHzrPCz7oLtJe1U7Rfufe/X8WT1B/pW/98EgAqpD3sTuRUBFpkUvxHeDYQJggU7Av3/wv7N/bT8P/uH+Wn3ovRA8cztMuqN5qzjA+Ip4QbhDeLl5CzpVO679Lf8YwVTDeoTMBm8HDse+h2jHJ8aNRgdFqYUaxO4EY0PBQ3CCaMFHAH//Hr5h/Y89J7yUfHp763u2+2c7XHu0/D49Kj6fwHdCBEQKBZLGvIbvRrhFmARRAscBUn/WPqy9jz0p/KS8dPwh/B98DvwCu/x7N3qlemq6CDnieVg5ffm0una7Tnzyfk8AdcInA8WFUQZKByiHXcdZByHG74aahmfF5EVzxK1DkMJEwO5/O32bPJy76ztSO197q3wJvOp9WT4KvsY/lUB+QTGCLAMOxDZElEUUBSMEjcPDgvTBtMCSf9X/NT5fvdi9cvz0vJ48o7yYvIO8bzuaexy6jzoZOW94trh3uMw6APu6fT5/HkFgg20FGYaIB4OIFQgux7QG5QYcRVlEkQP9guWCJEFtgKa/338Yfpq+ej4IPg093721/Vb9TD1mPX/9sP5lv3gAScGKQq3DUoQWBHcEDwP/AxMCm4HtgQZApj/+/wv+j73KfRm8VDvfO1K6//owufV5/nnLedd5gHnVulr7PDvhfSJ+k0BoQclDfwR1xU4GAoZ5hg6GD4X+RWRFNoS1BCfDgYM0gg4BccB9f6p/KL64/is99T2Lfa79bv1NfZN93j57fxOATEGLQu8DykT+hRAFeETExFjDYAJ5AUnAh/+Lfqy9oXzqPCk7uLttu0X7UnsAexL7F7siOtH6qbpc+q47PHv6fPE+EP+vQOaCNoMYhAfEwAVLxbQFhIXzxbyFS0UgxEiDicKgwWNAAz8qvhi9gP1RvQ29ET17/b/+Az7Rf3K/5MCagUyCOsKaA15D7oQ+BAzEJ4OfgwPCnsHCQV0Anj/BfyC+Cf1NfLh70nu9+wX6wbpyudV53XmBOV85Nblyujc7BLySPj3/mMF8QpwD84S3hRbFbwU3BMwE5ISuhHjEEYQhw/+DXgLTQgWBdEBhv6J+xv5SfdA9vb1WvY391H40fkG/AX/bQIaBtcJYw1AEPoRqRJJEtUQTQ5HC2MIpwWEAq3+jPqN9ojyv+7m6x3qkOi35m/lsOX05kTom+nF69juY/KF9kD7XgBVBaEJ3wwZD40QDhF/EHoPvg6JDp4OxA73DvEOVg66DBgKnga+AvP+b/s5+KP18vMq80bzXPQN9vP3QvqB/X4BqgXVCRsO/xG3FAYW0RVTFMIRTA4jCocF2QAS/Dj3bfI87ubqr+ia5yTny+a/5njnpuhu6eXpC+tg7X3wC/Qo+Df9vwLgBy0Maw+sEfMSIBOAErQRNhHGECwQcQ+0DqoN5QtQCR8GtgJY/y/8afku97n1NvXB9Sv3HvlS+6P9EwDrAjAGogkVDaEQ0RP2FboWOBZdFEURYg1GCbkEof8U+l30De9x6grnVuXS5HvkP+Tw5H3mMOi76TDr/Oyt7yXzD/eO+68A6wXRCv4ORRJtFFUVARUNFPgSyRFGEJ4OzgzLCm0IowWJAj//Evxk+YX3hfZP9s32zvda+S374vw1/rH/xgFXBCYHLQqNDdMQWBOpFLwUpBOiEckOJwv9BlkCGf1H91nx2OtR5+LjgOG634ne9t5G4TvkKOeg6jbvFfSi+A792wG2BuoKJA69EIgSdxNkE8AS/xFbEckQERAjDxUOlQxLCgYHKQOF/8v7q/iT9qH1fvX49Rv3+fhQ+2j9Gf+3AAsD3AWECOUKmw1UED4SIBNlEwUTyxGoD8YM7QjwAxb+7vfQ8TLs0udH5bPju+GV3xTf0+Cp44nm8Olm7sjzsvlX/1kE5QjzDKoP7hCVEYsRvRCgD4IOhA0gDQQNywwwDBMLMAmMBmUDy/8T/On4uPY99VP0U/Qj9cT2Pvkl/N7+jwHOBFUIhQsjDkgQIRJLE0gTNBKAED4OfgucCLcFXwJs/i36zvVI8Tvt3ulc5m7iZ99n3v3eR+Cw4tjmTuxn8qP4m/5FBG0JaA28D88QMBH9EEgQcQ/rDv8OMg9LDw4PRA63DIEKlwcLBD4A6/we+pL3mPWL9G30HPWR9pX4yvo9/QoAKgN7BuwJMg37DwUSJBNIE4cSEBEQD3YMSAmwBcsBbP3Q+F/0hPBA7VLqc+fz5G/jweJd4nriw+PP5jbrePA59nH8DgMaCcsNJxE/EzMUNRSGE4YS1BFyEQYRNhAXD8sNEgyvCYYGDwO6/678uPnv9rD0MPOS8uDy+vOV9ZX3BvoS/b4AwQSrCDYMFA/nEHcR/BCkD8QNfAvOCPUFLgM8ANH8Avk09cHxxO7z6wvpc+bE5Enkk+RS5Tfni+r47uzzBflW/v0DWwmwDdcQGBOPFCAVxhQLFGoT4BL9EcQQdA8ZDh8MQQnSBXgCZf9L/C35avZm9BTzSvJJ8kLz+/QW94z5wvzNACIFGAlbDPoO5BDHEVcR5g/7DdALXgmMBmMDIgCa/Lj42PSt8Trv+uy76qfoJeeU5rPmbuf96NLri++08yL4AP0QArQGeApYDbcPwBH9EmkTiRO6E7MTShNPEr0Q4Q4oDOAIOwWyAWT+XPuD+PT1APTF8oryOPOn9Hj2gfgG+yD+VAFbBEkHJgqDDNcNCw6vDQUN3gsECr0HWgXjAvX/hvzb+Ez1H/Jn7/ns4ep66bXoWeis6O/pKewu75jyY/az+vz+7AJ/BqMJlAxsD8sRehPMFAYWrRZyFoUVJhQYElAPzQv2Bz4EqwBE/RT6KPeI9JHyfvFQ8UPyOfR79vT4Ifx2/0YCvQQYByMJqwpHCyILxQobCvAIZQejBeQD7gGK/6r8+vmg9zr1xPJi8Gnu/+zm61Pr7Ouv7fzvc/Jr9ST5Df2UAGgDrwURCFEKuAulDPkNkQ+XEPwQ8BCNEJcP0w1JC6IINgaJA18AFP0L+mP3RPXe8yPzafOg9P71iffM+XH88P5+ASwEgwZACEgJdwkgCXcIUgfSBWAEBQNnAaD/6f1F/MH6N/lQ9zX1DvRz80fyaPGu8ZzykPPZ9M72mfni/A0AwQJhBSEIhgoODBgN/w2GDt0Ocg5mDa0MzwsFCgUIfAaNBF4CXgAj/hn8efr++Lj3J/cV90H3y/fH+DT6zvvM/eP/twG+A7oFrwYeB1MHoQZrBQgESQKIACz/xP04/Az79/na+Ez43vdQ9wz38vZ69sn1t/UT9qX26ve0+Zf7lv16/08B+AKUBJMGNgg6CR8KFgvAC0ULfgqaCfkHLAZYBJMC1gCy/0P/QP6a/br9OP1L/d79pv2g/Sn+b/5c/jH/BACPAMcBJAOmA7kDfwSIBNMDIwOEAvsAwP9i/zX94/uW+076Tvok+l35b/nI+S75VfjC+bj61PpJ/GH9MP5H/7D/bQDBAdgCBASYBGQFggayBr8GrQa/BYgEQARAAwkCyAGHAVEBRQHZAH//L/+j/rH9d/1S/Z79af7Q/lH/LwB3AI8AoQDGAIgAfQBHAPf/gf/d/pv+MP4l/l3+7v1L/Uz9yfwO/Oz71Ps/+5n70fzM/JT9cv81AHoACQFWAh4CFQE0Am4CagE1AuECRgI+Al8CDALMAQICIwK9AmYCIAIfAk0BxgBXANT/gv6e/pT+Sf7P/t7+Zv/uALABOQHVAbkC/QGxAUYC4ADEAEQAh/9K/8X9u/6a/aT6q/zT/Pj5CPxk/nv9Kf34/qb+Sf0Y/4v+6/2JADIAT/9xAEgAuP8gAcIB9QAyAngDmgPmA+wDegQ0A9MB7gNYAuQA/wMMAjAAfgNmAs3/eAGvAmH/8f3EAB39FftD/rf8tfs0/fT8gf5o/pX+KwFG/1oAIAGOABQBRQAVALT/1/4j/hn+Pv73/Wr9uP8X/7H98f+4/nD9d/9j/nP8Nf/l/g79kv9s/p3+fgGY/t3+FwJQ/+P+fwADAOEB8AN6BPkDyQUaBjsDnAZ+BBAC1QYgAhYAmAOT/tj/sP+++1IAj/tx+yH/nfkP+s77mPqA+2n9EfyK/sz/IP4JAlQDrwATAisDvgAXAkoCugMHAnMBQwSd/w3/SADL/qP9TP3r/r39NfwZ/5D+6P2w/w4Atf/m/o4BogGaAF0CXwPwA6cEJQXABjwGpgOKBcgDhwFjBDEBGP8fAe/+Ev/b/jv9hf31/L/8avsP/D3+j/vK+xD9BPw6/GX8XP37/dX98P3//oz/Wf9OAFcBMwAJAQQCVQHtAJEBdAIh/4YAygD+/IX95vsL+637jPt8/Cb99fyI/W7+fP3//YkAjQHdAdgDmQXjBrkHzAaDB9EIVwXrBRQHWQKUA8AC6f/0AGv/AgAQ/sD8awBZ/Q39Xf/++z3+xf8o/Yn+GP+S/d/+2v+h/oP/+wAvAWgAhAGlA1IB+QFIBC0BzAB/AV0Auv0s/kwAAPwi/EX/mfsV+3n+sPwS/UH+YP4u/Rr+/wEx/osAmQIMATwCVwLMAb8BrAOyAnYDLQQcA5YD6AMCAlwBQQBw/lf/f/vm+7j8QPuv+7n7K/2H+6b79f5j/Jf9PgEl/8L/LwIWApoAiATSA4oEZgkcA2IDGQh9ASgAzALZ/qb++/4Q/jz/P/+M/jb+3/4g/2D/H/7X/Wn/Ev55/mAABv8RAN4AGf/qAGoBiAAQAtYCmAKgAigEGAN9AT8DvQHmAK4ASP4A/5P9xfsf/R38J/v8/G38ufoV/Gz8RvtS/EL9Rfwr/5EASf9iAyYErQOqBL8DnwIeApMCBwLDAcsBTAK9AXb/wP75/sL90/2K/Yn9N//p/iH+qv71/uz9vABSALb/5AGXAvMBcgFzBCQChQGoA7MA3wCyAfv+bv9P/sX+zP8t/Gb/swDr/LD+KwA7/dD/2QGi/uv9sgCiAV3+vf+iAcv+DADvAXEAQ/5V/yIC3v+6/aUAJQKnAOgBuv86/ykD8v8EAj0Cl/+dAsD/of/h/zX/aAHY/wAAIAIt/xP/9gABAOL+Bv80AsL/Cf4EARcAOP7VAKAAjf0Y/oj+rv33/Pf97v7S/j0AQgHzAMkAugAi/zr8KP26/dL8U/21/PP8Tv3e+2r7ef2D/vgAVwNbAy4F0AavBHkCQQOBAoX+/fw2AOb+Vv65Aer/2f9GAY8BQgERADcCtQNyBLIEtQRDBGQBl//p/Xz9Mv43/hv/c/6a/R3/If6s/Rb/pf2Y/a/9hP2J/Ej8Gv9k/gv90P0D/9T9cv5aA/sC3gEMB4cHnwMOBWwCJv5l/Sz9N/6v/n8AdAFJAKkACABnAG//mP5wAUf/4P+uAusBQwGJAIwBJQA6/uf9ZP8XAjv+3v4/Ah/+Gf5fAMz/fwEBAlsDngLd/tIAwf9D/CL9uv5R/7X+NQATAdj/D/85/wcAsf+KAPT+7v1GAI/92PxM/k39kf1n/kn/T/8A/8D/ewHaAGoAGgHn/zMA5//l/tAA0wAlAQMCuQBZAeAA5v9/ANsArgF4AQgAEwGbAOj+e//A/kT98vy6/Y7+1v5N/5QAl/9bALQBvgDtAdECnAFK/x3/CgC+/p396f19/rn+4/66APABwgLuA8UDAQMIArsBYwAuAOb/i/9VAIn/Zv8W/Qz8NPsh+cj60Pv3/J0A6gHlA7MEUwOgA5ID8QKuApQDOATbA6sCawGh/+L+nf6P/kf/NwBqAJsAIAIHAXgAxv+z/xkBNQBYAAQB0/8t/ur99vxS+4H8KP6h/+b/i/8EAPT+2fyj+rH5gfrb+rP6vfoB+uP5a/mS+H35OPw6/7kBPgTqBbQGpAZoBzwHoAYpCEoHIQdHBiUFxQZwBBQEGgSEAroEbQQPBBcFCQUWBS8E+gL0ARMBxv4L/in9T/qG+RP5afdt9jT2+PY5+DH2FfNX8Ezu0ey76VjnhenH7kny7faE/IMBcggxDa0OihHlFuga8Bo0Gzwa6xd8FN8OuApcBvUD5wJjAs8CDAM+BD4EDQUyBTMEvgUkBYcFfgb4BLECn/7W+sz13/Ir8Y7tWOyC7tvvfe868aHz1/Tw8y/zKPA96rXmleKm25famORR8rP9GAZOEpkevCOYItog2CSII8Mepx0OGR4S7woIA6/4TvFk8SzyN/LE8xH6XwHtBI8F4QbZCOUKWgztCgcKcwlKB2UBFv18+2L5h/iQ9u32Fvm99wn37vdZ+Mb56vh3+Qf7Pvk++Az27PJy8S3vAuy06ILnwOvH89L6GAKSDAQXhx0yHWcbGBxOGxQa2BU2D44MIgiQAPL4s/G48d70GfR69Ln5LAAHAhACcwSfBe4GcAisCPwHswgbC/MI4AT/AMT9//uH9jfylPGX8Z/zhPQM9fj2wPo5/1cARwDBAEj/ifz99z7yVO2Z6uno6+e86nPuqvSq/+wIPQ3rFCceoSI+IkshlyF2HUYWLw3IA6D7Y/S57T3p5ejC6tTrDe9C9HD4lv0kAoMHzA3+EzcX8RchGs8YSxM7DYwHKwD8+FP0n+4N6vTpz+t57Rnx//Uy+6IAlQLmAfICuANE/4j4dPZA82XviO5Y70LzoPmd/3cBEAaMDRoSIBPHEh4UXxaiE7ENTwnEAy3/KPx0+E/1PfWz9s/49Pfi9sH53/xO/cT+KwX4CdoOMhSkFEgTKBOBERIMLwYpAhH+A/ll9Yrvu+h76H3qKevj7nX1BP2VAscCNQLfANT8NPhZ8xbuqelg61TvJ/AH8737rwRACG8MChXrGEgYHhsiHA0YHxXXEaYJNgH//OX5V/Vw8hfzEvbL9g72sPnP/JAALAWlB20MtRBqE20V7RQYFHIR+wl0BEAAx/cT8FzsXewW6yro8unr7NPvbfRZ90T8iAIhBdoE6AFk/cz2Lu/C6UHnv+ji7yr3+fyLBfoN1RMrF1wX+RbaGPMXWBMvD2EM+wdRAQ/9cvrE98v2qvgx+vj7YP6+/y8BFQLvAwEHtQy3ETATsxQ8EzAPkgkSAYn69vW68f7tu+sP7ADsXe5V9P33P/u6/+EFFgkzBvEEUQES+tjxIed13jTaPd3o45vq1/a+BdESdhrNHo8iZyLlH08bHxbdD74Hsf9+9rftk+gY5xzplu3/82H8AwMHCc8OGBCRES4TeRRUF7wXIxZeEZILjQU4/I70IfC/7DvsNe397i7zx/Yj+pf+KgLkBGsHKAiXCHcGWf/e993uWeZt30fX6dRm2hXlT/P2/ugLEBqSIR0lJSZXJRckLh/HFuQL9wA798XrqeNo4DfiL+rf8gb6sQKyC20QFBEBEgwUFhYvGCQYqxZAExINtwOM+MPxX+5v6wrrkOzg78Pzavah+RP9mQQ4DaoS5BcdGAAVbw5sAdnz8OWn3NnYVtNGz4XSXd+q7of5SwXpE+kiSSxjLnMteinKIY8YWQum/DrxQ+kK4+TcJ9xm4szrkPK1+cUCewrGEBYU+BX7GEYbuxyQHAoXExEjCqH/XvWH7MjnG+f65mnoPets7/D1j/pl/yIIRhGyGPUapxrMGJEQ/wTv9mPnddqgz9THA8PrxOzSCufO+tsNrx/7L0A62DgJMjssciQDGiwNnAAF95jsq+Et2jHX9tpM5Pnth/eUBYQSLhgkGiUbZhzeG7kZuxnoF+0SNgz3Aen3de4U6KzmeuYW6ifxg/id/fT/qwOKCIAMIxF0FEwV/RMKDksEX/bK5WzZFNFTyB7C2Maa1/np3fhWCdkbPClYL/EvmCx/J1sfQRY2C2f9mvPN6z3jWd1W3MDj5O359Xn/hAlHEqsXlRdUFxkYrhYhFWQTZxDgC/UFHP/U9tLvI+3a7Pru+PL5913+GgPHBQUHQwolDiAPEBDNDxQNqwgqACDzIObx2tDRtMybyEPHYM8V4Uz1GgW1E8EkKzGWM4ovmygDI54caA88AeP2j+0t5kbg7tuA30voLPCg+OkCEA0dFckZnxzZHH8biRppGO0SlwnBAcL7M/V/7hnpJel07djwDPYx/TgCjQdGDmgR2RD6EVMTixHbDbwImQKV/PH0H+qD3krVq809x6jDKMl427PzjQioGA4q8zeFOlE1MiyxIZkXRQ3wADD18+yT5qjhfd3v3K7iaesG9Zz+qAi7Eg0a3R0PIGoenxveGOoToQweA8n6u/Jg68Pl2uIb5WbrHPMw/JoEAwx+EpIWvBm3GnkbUxnoET4LigL39mXsXeBu1UjOKMgUw6HDwM0I4FTzBAQbFlwovzRROD41fi+3JvcZGAz1/qry6OnA4l3cN9u+34fmEO+6+OgEaxBmF6gcgR8RIRAg3xr3FUUR/Aes/U31yO1r6CTmu+Zt6gfyQ/p+AR0H+QwzEw0YMBtkHK0a+BQADmIDCfds7P/hYdhjz9XJrcY/xFTIhNjv7j4DoRXjJ3Y2tDnQNJkvKCjlHQwS9AX1+4vx8eYZ3nTZwNpc4THrjPWLAZUNjBbMGqAd8SDmIf0ftRz/FscOuARI+qzw+OYM4yrkeOY365jynPzxA4YJZhGwGKUdtCACIJca6xDABbD75O+743TZTtJdzMHEnb87wDzJvdo/78oCXhgPLsc58jtwOnYzLilZHHEOMwM1+Lfs9OLI3Ybbp9yr4RLqQvVWAIcLyRQTG0IftyOaJkUkex5EGRQSTgbY+0/y2urc5ZbjV+Yi68XxMfl4ABEIOA8BFqQc/B9sIF0e+hUiCQn8Fu/C4VTWN9BGzOzFIsCFws/O0t2m7XoDJRzcLLI0XjcDNsgxRimHHmET8QYj+i7tQODW1/XXad035DXsQ/biAH0JJw7zEBIYMiD4JH0m1yT0H1gWRgmn+/DwLOpV5/PnlutV8db3df30AE8GxA38FaUdVyE2IxYishkbDkMBrfHZ4zrZTdAXyULCSL2Ivx3LOtpf6rH/EhZYJSQt8y94MRgxHywOI3sYvQ3VANXx6ePY2pbX+9nP4CDpJvFk+rED4Qn4DbMT5BzPI58lECSkIFca/g0z/z70lu3h6NPmz+eI7Nnz0vk0/5kFgw2qFOoYdxwVH4UdnRe8Dq8Divhh7A/gg9aqzmXIa8JjvrHCSdCc4X/z8AUtGDQnei50MeUz0DPTML0psR7gEOsAy+5d3fLTZNQA2cHgPOqC9L3+4AY8DG8RMxprJCgqZSpgKJAhoxZQCC75lO1Q6JzmVeZg6SbvnPY//X0DBgpnEjsafx53H18emhmnEKEFSfun8Vzm39yo1YjO7MaJvxC+DMZD1FXkwfdpDp0elihRMTE0djNfM90vIiesG04NnPvk55zXos9NzzbU4dyB57TzVf8vBzMOjBfqINoowSwSLAMo0R63EUECl/Rv6z7m8+Mp5UbqIfF3+G3+dgSDDAEVjhq8HWkf1B7dGaYQDAXy+ZrwyeWn2hTScMtow+K7m7xPxlTU+eVU+xQRgyFSLFwzLzcnOXw3NzKzKSIdnQyQ+TrmatcM0TPSBNcT3lboJvTq/vsHPhFEHOklhSwWL2As3STIGHEKmPwT8Uzp0uUh5EHm5evW8Sb4uP4nByYQwxdqHMceAiDOHqgYpw4VBCH6Pe+x4hrXj85xyMDAqbuMwQPN9ti36EL9bBG/HiopZjNAOGM4WDdxMe0l9RjqB8fy5OGM2JbUk9Xw2dLg7OqB9cv9IgYUEUgdNiaZK98sOSmxId0Vwgea+svx5+yh6Wvp6exc8bL27/svAWcJYxEKF68awByHHeoafxOjCa3/YPY67Fbh0tfmzsDGqL/+vKbD/s7L2vDqDf88ECwbzCQ7Ltwz4TXkMygtMCMNFmcEpPGF46PcD9yB3qTib+sA9m78jwFZCOIQSxm4HiMjNiVfIfkYrA4VA5T4wfI48BrvyfCf9fn5BP1zAOYENAvEEIQV7hlSHFwcNRmVEqIJWv8h9p/tX+Sq2/zSKMqYwdO+McZBz9LYAupD/v0NfRdYH0YowC1DLsUtIiouIgwY+wgv+LvrfeVm4yjkcOeZ7u71xfqy/i0EEwz+EvkXGhx+HUcaiBNeCYz/jveT80rx6fAe9WD6Yf4SAR0EpwetDNEQmRSWF9oYZBhqFE4NYAU1/bD0Ju0c5gjgCdqP0lPLCMlLzeHRM9jG5Vj2igI8CsYS7BrYIFUl9SZCJjskNx6LEtkEQfuD9S7wMe9n8lP2XPvc/uD/LQEWBXsJogqCDH4OawwzCGYCH/ya+Hn4v/mE+3D/qQWLCb0KOgx8DmgRbhMoFW8VSRSZEscONwi8AE/7Rfb37kTo6OXt4uvbQ9az1NXUUdWe2Rjh4Onu8gn65gCkCDcRLBngHvkiNiTVISAbuhFNCVIDGP/N+6v6F/xm/i/+pvwF/bf+9v/LAIgDSAQPAtH/IPzg95325/if+y3/1QRQCn4M8QyZDp0QrRLlFOkW+RcqFwsU9w5ACLABIfzV9kzy5u9T7iHq7eMq3sPYgNRr0h/T/NVK24fhM+bJ7Bn2DACuCSETEhzcIcYkrCOFHzcbCRdoExoQig4lDnEMYwhiA/3+lvz8+oP4Rfjg+MH4kvdm9cT0LPZ5+ez99AKTCSIQuxO3FZYWEheQF5cVmxM6EyISoA6KCeYEXP8O+QD1LPKN727uXuw86K/iJNw+2GXWk9XD1z7cLuAF4wjoV+9g9kr/HwurFfQc+iF8JJgjoSDNHpUdFht3GToYgRRZDncH6QDz+174BfTH8A/wVu/e6wrobudB6TbtofOc+msCjgunEiAWNRiDGgkcnhvyGYMXtRRNEcEL3QRM/3f7mfdC9GPzYfPv8GbsoucG46bebtud24jdz96r3+rglONw5xbuk/ctArEMjBUyHG8ffCCFIEQfdx7AHgEefhwaGlIVrg4/BxsBRvwA+IT1nPMP8eruxuwN6uLoY+xZ8gv4tP4eBp8LGQ/tEXQThhR7FVEWzBbwFfkTRBLpDywMIAlTB/EEwgHy/5j9LviY8WrrA+Vi3mna8dfS1JPT9tTz1ljZQt4A52nxi/r7AmkL9hHaFWMYwBnaGlEctBwNHH8aXRfUEnENOgneBsYEmgPnAqgAKv3/+cf25fIc8d3yqvUc+L/7cQA1Az8E1wWaCPwK+wwHEIYSTxPfEwUUExKgD2oO9ww9CocHYwQQ/2T3re6L5gXfltk912nVZ9Ma0wDUTNXO2I7gGutF9UX+YgaHDD0QchIPFJIVYxgZHFsedR7AHVAcGhnGFVIUBRMkEZIP9QziBz4Bmfpt9ETvE+wk66Drh+2f8MfzYffS+7cAZgV7CgEPCxJ2FBIW7hWRFDYTCRKcD/IL9wgoBgkCNv3595fxper95Jfgbd3U27na7ti11zjZ0N0b5LDqNfFp9xb97gHJBUsJDQ1bEZYVVBn+G4IdHB6QHaUb/hlWGSIYixU4EuAN5gf3AG35gfE068TnbebN5obpX+1n8cb2dP17A6sIiQ1zEdsTHxXWFD0T+hGwEJEOxwxeC+YItQXNAgz/S/po9aLwduwS6ZblqeHd3RTbytnb2SnbVt4n44nn9OoM76XzJvjt/fwFZA68FcYcwCIQJvMnpChcJ/8kcyKHHvIYzRJfDNUEYv2E9wHztu8v7pXtzO2P7yzyIPUY+Wn9ZQGZBYcJJQwTDnAPlQ9xD3gPdA78DKsMrQx4C5sJkwd5BeEC1f4Y+Xrz8e7/6frjzN4c2w/YytUG1ADTKdW22krgTOXv7DX2E/65BS4P8BdKHsgj+iiLKygreilrJqYhPxypFigQaQmhAzX+1vjH9DjzwfIR8gbyMvNg9A31ePYx+df7Qv49AUsEogZkCDUJxwlJC2QNMQ8cEY0TWhV0FQQUHRG+DNoG6//U+P3xX+un5AHestgb1VjSjtDy0O/TBNn+3nPls+zI9Pz80gRWDL8T5BkqHgoh4yJEI3kiCCECHzIc9BgCFTIQSAvyBp4Cif6p+xv6Wvgi9o30kfOg8h7yB/Ns9X34rvvZ/rcBIAQ9BiQIfwrlDbAR3RRsF1cZ7xmTGJcVtBETDU0HdADb+Pzw2+j14BTaw9Rl0ZPQCtIR1SzZdt5G5P/p2O/s9aj7uwBDBXMJAw0OEPgS1hXSGLYbHB64HxIg2h4GHLgXoxKFDYwIAAQBAMb85fkD90f0TvI48TfxX/Kq9PX3uPsJ/+UBpQQ/B8MJggx6D4oSFxWNFsIW6RVKFB0SUg/BC0cHxAHG+r/yHeuX5PvepNrW2HrZmdrO20Le7+GA5fro2Ozl8L/0g/gN/Hz/lQOuCO8NGhNVGFod8CCcIrEiUSFVHuQZoBRGD+IJegQq/yf61PV88grwvu4M79nwl/O/9iH6pP3DAHUDIgYHCaUL1w10D5UQORF5EXgRehF7EfwQYQ9+DGoIVgNZ/b/2i/BT6zbn1eMh4WPflt5D3nPeh98u4T/jgOVA6Kjrye+S9C76qgDFB+gOqhXWGxYhsSRVJuQltSMqIHIbghX6Do8IjAIB/Sn4UvTM8X/wIfDK8Gjym/RB92r69P1cAXAEGAckCW4KNAuAC6sLJwzWDGgNtQ3NDVYN2AurCcsGMQMX/+v63fb68jnvwOtU6CPlreLf4DDf/93T3WHex9+t4hvnouxF8zH7kQPKC8MTAxuwIHkkZiY+JvwjayDfG2oWoBA+C3wGTALD/i38EvpG+D73Fvf29i/3b/gS+pD7OP3+/nEAkgGEAjsDGwR5BSMH4AjNCuEMaw6oDs4NxAu6CCEFNQE8/Qn6kffI9B3x7uxw6NDjxd8I3dbbI9xw3VXfN+KH5urrSPLA+Z4CIwz8FP0b7iDsI3ckiCJQH8Qb/RfTE2UP9woaB9kD8AB1/hD94PzT/BP8/frr+bT4rPdJ96b3Rvn1+6r+1wAIAzoFKQc/CdsLgQ7OECwS6BG5DzcM+Ad8A1n/4vuS+Kj1D/O47xXrbuao4prfsN2Z3cLej+AC4+rlX+nu7Q305fpIAmMKTxKaGNgcQh+7H7MelBzSGbgWgBMAEBEM1gcTBC0B7P4l/T/8Afzc+0n7N/rm+Oz3ufd0+Cz6LP3jAGkESgelCaELSw3ODhYQvhDAEA0QOw4FC/kGhwIK/oj57PRN8FPsUemI5o7jJ+G/3zDfpN864Xfjqub66sLvoPRN+rQAFAeGDekTShkMHSwf5x88H2YdihriFvUSBw/6CuAGAAPW/5D9rfvo+bP4E/ho95H2CfYK9tf2d/j3+kr+XAKCBvsJsgyLDmoPqg+ND1AP+w5IDsAMIgpkBpIBBfxs9h3xz+vY5vHiB+DU3XrcJdy43JreqOFe5eHptO/M9XT7LQEMB3UMBBG8FM8XWRo4HMccIxzRGu0YTBbjEiUPkAsrCNUE4gFG/7P8J/qa90f1rPMS8+byNPPY9BP49fvk/+0DQwidDAMQKhJrE74TNBP3EU0Q4g27CjwHhwNE/4T6XfUR8C/rvua94mTgkt9X3/3fb+FM4/HlB+rg7kn0yfpnAU8Hqgz9EAEU3BW8Fo8XMRj+FxQX1RUlFNYR7g7wC/8I2wVFA34BFP9w/Cf6Bfd881jxzPAp8Xvy3vQ2+N/7cv+BA2gHaAqEDQ4QKRFTEuESKBIKEfgObQsyBzMCxPxv96DxJ+xx5/biQd/a3BLcM9xr3IjebuIk5mXqf+9/9db8ogQjC4oQpBUPGVoaLRt6Gx4bzhqYGbUWphNUEVAOhQq9BxgGRQRVAnkAB/7t+k34yPVB81Hy/PLu8031LPjQ+zz/3AJHB3sLlw7NEBoSABIJESYQQg4UCyMIfwSs/8T62/Vt8CnrRedy4+XeW9z93BXen9/z4vfmduvm8Hn2PfyRA2oLfBHVFTYZ3BqHGhsatRmQGAUXtBRZETAONAybCa4FnwNlAm//dP0H/KT5Gfgq9/z05/Fp8Rzzg/SK9tz50P2vAV0FbwgPC+ANLxCAEJsPRg/tDbsKVQcpA739NPnU9GzvUOsq6JrkVeHQ3hTfH+JJ5G/mleoK76bzgfjV/ZcFxA17E2EXARqUG78bKBsMG/oaQBqDF6MSeA7bCwgJJgYsA0UAsP1z+g33i/SL8+zyFvKo8b/xFPPQ9e74a/wnAdsFVgl4DKYP8RE5E6oTwBIMEYUOjQpTBTL/CfmG8+vtYOjc417fPNqa1rfVdNfx28XhjufH7ZD0Bfqd/t4Fxg87F2AbuR5KIAYgDh9UHb4cHR21GogVfRB4DBsJCwbOAjEAz/6C/IH4VfUk80jx7O8Z7sPs++3U8AH01/eR/G4BAQZTCucNExHBFDIXchdjF0sWEBOzDpwJRwM7/En1HO6R5gLg39ks1A3Sn9OL1f3Y2d9T5y3urfTg+2YDtgsvFPQYkxoyHfge1x0VHf0cXhwVG4UYEhT8DzANQwlMBMwAY/7j+9f5r/fU9CrygfAh79rtqO7T8bD0VvcR+5z+HAJFBswKQg/kEo0VFRdSF2kWhRTTEQEO+wjbAn773vL+6RXictqE03TPPM/O0QLW39xt5B7rJvNl+1ECLQuKFAsaDR4AIkcjLyHuHpUeHhx0FwAU8g+CCn0GGALH+0z4dffM9Tn1Dvbx9Xz1/PSB8wfz3/Qi94r5A/zA/d7/KQOeBiwKUw5HEXMS5hPlFKwTdRI5EQ0OnwllBJz8AfNE6r/gR9cE1AbV1tQm16jdi+Iq6B/xb/kxAwsPFBd8GrYdqiD4IKQfAh9tHsUbfRYED5wHpwI///36FfiJ9/P1yfPE8j/y9PPb9gb4Mvh4+DH6jf0iAOYBuwS7B8kI3whhCWcKawxKD0EQiQ+AD8ANvwn0Bg4E6/6a+fLyOenE3xnYXtJv0ebTS9Y13EvkXOkh8Sv+IgmaEkUdiiNvJbYnaSh4JfMiNiJ1HrUW3Q1RBuH/EfkS80rw0e7S7O3rRexA7aPvfPEe86r2ZPo0/l8CCQW/Bh8J8wrrC1cNJw7ADekNjQ54DpwOfg7GDEQKUgc0Air7DfRi7DzjftlO0SHOvM+c0N/SYdxY5ULrmPYDBBsPXhtQJbYojiu/LkctyiiiJb8j1h3TE6oKtQIz+y725/GR7Orqbu2+7avsBfBh9bv4Gvsz/LL9agK7Bk0HLgc1CYYLYQw8C/EKqw1eD5YPPQ/RDfcNBAzpBS8DKQHi+efzk+3F3o7R5c6UzzPPrdE32EHgBeed7tj4KgQeEgki7CkrKjoseiwaKAEkUSGwHRQYCA5fAVz3hvHD7ozrVejq5wLpV+oX7cXw3PXL+rj9sf+jAdsEmQj2CegJUwsJDT0NEwxQCz8M5AxHDKkLMguhCTAGZQH7+7r1Pe9+6bniA9qQ0xLTG9X61jrcfeaF8Zf6TgO8DREa1CT+Kz4vLS/pLkssHiUYHxwbAhUkC9r/Hvea8bPr2uXs4i3jIuaE6GDqdu+u9fX6nv70AEYGVQywDpwOCg54DkUPNw7sC0wKLArFCYEHpAXDBCkDAgHm/Xj3se/+6ank/twT1nzUotaQ2HHZxt3i6bv3cAAOCgsYyyOdK9YvIi8uLqUu4CowIloaPBXVDuUE3PmN8tLvjeyv5YLhkuN+5jDpge1B8mr3kfxt/+AA5gRxDAYSFRPiEg0T1BFhDkMK2AgSCloJIAaCAg//SfuP98/z5+7f6IzjCd4i1uLPHdBA1EHZ2t2Q4wXv3vtOBXcQ9xwYKFsymTWJMqIxnTBoKoUhmBmAE8QLCf/a8qDsR+o45xDjKeJf5bjo7upJ7hP1tf1sA6oFSweVCoUPhRO9FBcVIhZCFa4PLApVCWkJdwflBZYEHAEN/En3p/PC79/qpebJ4fPYldDDz2rUIdcX2uXkkvNr/RQFiBC5HN0nzDHnNbU0KTRyMbkopR7JGPkTpgqH/pHzkevS5EDe79lj2vjdjeFr5c/s9vQf+y4BIgePDCMSWRazF8cX9RZbFYUSAg+ZDHgL9AlRB+QCuP6w+8b3xvMa8A3s1Obr4Z7cA9TgzkLT8tca2eDeIesD9/3+OgflEv8f2CtFMskyBzTENG4vGiceIBQZMxL5Cfb9f/MP7cLmpt8o22/aKdy03/XkauqJ8Bb5P/8mA3sJlhBzFeMXLxj5GIEZIha4EZgPrA9iDt4JbwVLApH+7fnz9KfuwucH5Ozg6dik0BzOs9Pq3GLgLeSw8Gz9DAZZDxkZPyWxMEk0STI7MTYxmi3nJTodBRY6D/4FmvoV8NHpi+YC4erbMNz33rHhMOWB6YHvk/c5/ncDZwqIEWMWCRj1F5oYOhiRFWMS0Q+SDU4KCwZ8AWX8zvfy8kPsOOW933ncvdhm0d7MHNOM3FzggOT37bb4jgPGDq0X+SB5K3cwyi7ALL8s2CrwJEQdjxYpEfwISP6h9ifyGO0t6LPkNeIy4jzl1uk77ZXvovTQ+90ATQUcDd4UoRc/F0sXgRd+Fo8UWxIhD5gKRAZQAhL+mflQ9MLueejk4Nbbt9mR1NvOGNJa3QfloeaQ7KP57QRZDJ0WXSLuKCksOi0xKTYmJCcmJF0cXRUwEI4JZgDd99/zzfHB7GnnQ+XP5MDllegS7ezx+/W/+vH/lgMQCOoPYxVJFaIUsBW4FXcTYxDgDe0K+QZ+AwkACvyA99zxTuzd5iDgl9uQ2WjUVdGI2mrlPOWv5wT3pgS+CgsUICElKVgrRiv3Jywk4SK1ICkaYRLnDIsHtP/g9jvx7O6L7GDoaOWc5bnmtOjq7H7wx/PN+SP/bgHLBZ4NBxNKFDIU6hQJFWoTjxGvDwsNbgltBboBo/3u9wHy6uwH5xnga9xT21jWx9Hf11LiXOZi6of1YwFECtET7R6fJ6Ur4C51MLIq9iMLI9oeIxZqEXMMagNC/K/2lfBa7Onps+ec5uLmWeho63bvrPQr+SP9ywDzA30JnhCwEtgRXxL+Ek8RGg9PDmUMRwpkB4gClf0T+h72q/GW6lfio93o2gPXAtPS1aveUuO05cfuHfmRAcIMyhfyHggmOiz7LEkpuSYtJksi9hsLFzURkwg7AIL5/vJo78btyegJ5M3jxuRQ5lPqb++H9IH5Pf1PAXYHvg0eEkUUVhXLFTAUDxFTDmsMJgv0CNkExv/K+vX1hvCM6kXlceGW3kXawNQM1K/bGuUx6sHv9fiAAXMJwhJ3GZ4f4Sj4LOUo/CbIJ7gkxR+FG08W2g9yCIX/XfY88RHvF+sZ5s3jd+TK5MzlPerG8Pb2tvx3AX4FIgtNETwV4xYsGJEYLBbSEXgNjgkoBkcCJP7W+eDz8+5S6/TlMeLT4JbdhtmH2FbcdePp6R/vqvZD/zYGKQ3DEyYamiE7J28oDCfyJTwkMyCHG8gXPxRUD1UIxf+k+Kvy9usn5jTjI+I84Rfh4uLF5pPryPEH+Rr/rAUnDtkT+RVMGCwaRRq8GAwVlhDUC2oG3gAy+qbzQO/S6lbm/eKl3mLcKN0r3BjdruQN7Ivvh/Sp/HQEIgt0El8aaCCLI2Yk2yJ8Id8hyCBWHhMd9RiWEkkO5QYn/An2/vE166/l4+Ht3sXfQOOq5WPpBPBN9uf7PwITCHINyxTUG7Md+huEGrQXUhNID8kJ4QPW/7D6gPMS7QTocuNt4MTetduU2TPdoOMZ57zpz+6b9Sv+5gYBDf4SQButIYwjMyNaIfYfmSCMH8AcuhplFwwRgArLBIj+Y/i288vumufZ4GvdRN0c30fiKOfL7cf0LPt+AZgIoA93FbIaPx1PG/0YVRjvFfUPtwjXA3H/v/kW9K3t8Ocw5UviUd0c2iHaG9+b5Qzpe+vx8LL4oP7hA7EJ8RGKGv8gFSMJIGwdyh3RHFEaQBrAGRsXnhLcCwQE0P36+Lvz4e375lrh0N+Y4LrhmOQW6W7uC/QN+r8B3Qi3DpEV4BoKHWYe2h2qGlYXQRMUDSEHwQLw/fX44vJE6yznk+Uu4MPYRddr3Jfh7+WW65Tw1PWN/DcB0QTxC/gVDR6VICUgriAlIT4gjh5iHNMaMBnAFToQ1AhJAR37K/Sw68PkFeEq3yHd8Nxj3/PiHejF7wr4RACeCJoPXhRMGOoaDBunGuoZ/hZVE3QPaQlfBC4B2vxW9wLyEO3m51HhZNp21wHam9654djkkuo/8dn1HfoWAUYJkxE3GfIdBSBpIdoheyB3H/MeIB1XGwsZMhPxC1IEsPpG8ufsWOhL5Hrie+J14mbjJuYn6dftzPXN/bYDHArhEGgVMhjuGWwZexdbFk4UiRCQDSEL+AgFB5kCuPs+9b/ux+Y+32/bc9xI3sbfQ+Gs4eLkc+wh88f5bwTADisV2RpaHk4efh8oISIhRCH4ICIeDBqrFA4M0QGk+bvzQO7I6e3nouee5mrm8ebB55DqA+8M9IL6pwDRBBwLvxFnFDoVBxaPFgYVkhIMERcP5Qw8C/sIpAVDAcX7w/V17qXmSeEG38Xcl9i010fbR96W4lXr1/Pj+gYENg2aE/gZaiDjI8Yk3CThJFMjIB8dGk4WbRFoCWkBpPqj8uHqKudu5iLl7ePB5ZHoNekc6r/t7/IY+Yz/8QUZDd8T5BcOGhEbqxm4F1AXXhb6E9ASmxEjDigJWAIT+6z1de/15pnhgt+J3KDY/tVs1hPamOCI6a3zqP2MB2oQ7BbmGs0ciB2vHukeYhy5GaYXiBXqEykS/A2gCB4Dcv1q90vwLeqY57fm8uQt5IblL+iQ62XvsfJj9kj8KgOyCKoMCRJpGf8eviHAI2okkCKxHnYYyRCaCf4DK/+3+Ojw0+vd6WXkY9nnzwHNe87O0S3WrNxw5wD1vv+uBLYGoAqdECQVKRfKGCAc0B/hIJMedxrLFkYWiBdQFSgQZw1mCzoEa/mX7wznduAK3srdsN3035/k4ull7a3tefD++G0BAQjxEE8bKiQ6Ki4uAjD2LSEqeCbHHloRCwTZ+dfwdOd53bHX1ti82kPaUtsg3nvgQ+Vb7E3y4PjVAQAJpAuCC5gJuQZzBA0CuwAYA1UI3g1tEhkVxBarGHUY/xSjEM8NvgufCTUHKgQOATT+afmI8ULpZOIb3V7aktpj3c/kbfDD+7IEhg3rFpkfvSUTKLsoXincJysiVhoSE5cM2AS8+h3wredT4cDas9NazmXMA8+K1t3fzuhJ80sANAzSEf8RpxG8EP0MGQj+A3UBxAKoB5gM9Q5PD/ENgAqHBPH6gvHq7svyhvdX/OMCyworEW0SzA46CecDz//n/Cb5hvY2+f/+yQIuBLkF2wepCB0IYQe0Bi0IZgygD/4PSg8uDswKBAS3+qvw2ufX4KPaddX/1AfbleTk75X8CwoDFxciXynnKh8oTiVZIqMbIBLPCZcCrPqu8XHoQuA42iTW49Op0rbSm9ct4lLt2vVq/vwIuxEPFu0Ywxs/HMUbkhxwGiUTbQtHBLT6ue9g5f3dG9zS3l/lXfBv/RgKrhYzIf4mNym+KEIkEBzBEhEJvP6B9nzw2enD45rhS+D53Unep+He5m7wJ/1pCkoZaSfrMsE7bD5kOdMwtyQzE0v/B+2O3W/RFMjLwIG9Q7/4w1LK8NOS3zHsAvwYDfkaJib4MAQ7JEEOQcM83zbdLkgjshbPCib+zvKI6+Xlat8O28zZyNn82n3dCuLi6N/uufOr+Tz/2AHYA0UHKQkUCTAJkgqvC5kMHA8KEisTcxKjEH4NJQiMAOH4QPJs7FrogOeV6eztIfNI+PP9QQNNBmoIRQoOC3cLCQ1CD6cQVBLYFF0WtBQ/EW0NcAhDASn5PfFT6YjiUt703D/dMt9b44/p0O8/9IX3pPsHAFEDowaoC44R7xeoHosjQCVHJekj8R8VGQAQRAeGAFb6bPM97rTskuyn7Izt1e5u73Dvou4l7HTot+R04XfeEtwf23Ldt+Nn7Bf3fQQCEw8g6ywEOyZG5UmZSi5MoEoDQrE0AicnGUMJ6Pg662ff9tIEybvDdb1qtSGzGriCvl/G9NQH6Qn96g4cH3YrsDG4MlcwBiu3IoEa0BTBD/4JXAd9CNMIPAh0CZgKiQi7BQIEFAH7+9r2DPLX7IjoPeZV5WTl5ube6bXty/Bh8r/z1PVY9wD5xPwgAXoE9geiCzQNIAzgCkcKqAefArL+uPye+m75/vqb/fP/kANaCPUKLgtLDEoNvgpsBioDGQDz/Mv7o/yy/UX/VwKGBRYHEAddBk8FegN4ADL9bvuI+lD6IvxT/6AB2QNkBroG2wQiA2sB8v7h/G776PnX+MH4KvlD+Q/5IPnk+ZD6Pfps+qj7WPz9/Bn/MAFoAkoF3ghRCXUIRQrxDG8NIw5wEd8UhBb7F2AZ9RcxE0MNOwbO/InyM+qK4+jcHtih1r3WmtZC2Gfd1OMV6rzxr/puAiUJeBB7F1Yaihm8GD4YNhW+EVERgBIyE1UUIxaTFpwVKBTHERUNXwYr/5f3Xe5s5B3dZ9kb18jW39rh4UDoTO8N+HD+kwHUBXkL9w1BDiQROBa3GHsYeBikGOwWEBShEd4OeQsoCYoImgeqBTMEVAOdAF77y/VP8HXpheKo3SnalNdC11HZ79wb4kPpkPG4+Y0BlwnYEZ0Y7xyMHz8hHiEMH6kcAxp7FhYTQRAGDR8J5QXDA1kB7/2P+jj43fX/8j3wPe0J6jjoKOj659/nCeot7qLycvcD/YoCtge5DLoQxBKcE7cTfxKRD4sLtAYCAmT+cvuR+Mb2e/dI+h79bf+tAegDgAUQBj4G0AWoBIgDvAIzAZ7/cf+SALUBfgJ6A0AEjQSDBKwDiQGQ/pv7pfgd9WDx8+0j62foluXF46PjGuXc6GLvh/f0/9UIihIYHMUjYClELRkuciqtIyAc3BPpCrsCh/yq93f0PvRO9Ur16vSd9Uv2YvWB8w3xhu1i6pPpEurR6qPtWPLk9kz8bgPACXcOYhOqF4AZ/RgbFyATXQ2ZBzsBL/l98WHsfOjg5CzjauMW5PLlXurb74L1Hv3eBakMVBJ5GIMciR14HigfYB1hGiUY2BSSDx4KJQUHACn7i/dm9EfxTO9k7j7tqesD6y3rGes+69rs6+/H82z4T/3JAbMFOwn7CwAOeQ9dEJ0QVhC+D+EO3Q1rDBgKBAffA8UAb/1p+l74cveU91r4gvnp+uD7ovy//Wf+Vv7n/uH/zv9o/woA1QAvAUgCcAQdBuMGeQdaB8IFTwN9ANv8zfjN9bTzkPH47xzw0/Dl8Ezx8vKc9CH2o/hK+3v9jQB4BAIHggjRCrwMkgzxCykM2gsAC4gLGA0wDl4PthCzENcO4Au3B/oB6PpX84js7uaF4jLfEN0v3ajfQePO53TuPPbj/egFxQ2LE8wX7xv/HekcSBscGl4XSxMCEEcNvwp+CSEJ3Ad3BnsFcwP4/2f80vjD9HrxdO+u7XbsMe2O7wnyevTy9gz52fpX/OL8E/2W/sYA5wFDAhcDmQMHAzgCmQGPACEA7gGaBI0G6AjaDKgQoxJbE9gSkBAcDTUJFwTL/Sb40/Mp8H3tRewu7MLtu/A+8+P0Rffd+T/74fuV/M38+PzZ/XP+z/1N/Z39nv0r/fj9iADuA1YHmQpPDVcPlRAPEZcQmQ7GDDAL3giRBYcC6f8u/ff6pfnF+Oj3/fe5+Cz5vfkc+0/8k/yk/LT8X/wg/Nr7RPsP+5T7HvzT/F3+zQCQAzQGWwioCQAKCgqrCR8ICQZeBNsClAESAUYBngEnAu0COgPQAlQCowEFANT9wvu2+Zf3yPWb85jwK+477TftlO2J7jzwEfOy9ur5bvwa/48BNgIMAQX/rvwf+0v7n/xb/5IEowqpD/ATPRiJHKsgqSMnJZklLSSxIBEciBZtD80GkP079HTrTuQ2353brtna2YHbAt4S4jXnsuy/8k/5qP8KBbcJug26EHYSphNEFNgT9RJsEu0RjBAuDn8L8giVBSoBLf21+sL4/vZE9gj2TPWm9M306/SM9CD1ovZ792740Ppa/Vf/KAI7BWcHewluC/YLaQtUCj8IUwVfApP/hvxi+Wv2qfMT8aLuSOxV6r3oOOdu5lrnU+kP7DDwXfUb+s3+gwR2CsIPYRVvGzIgHiMBJaIl1SNTIJAcChghEngMzAeyAkr9/fim9TzyMO8q7YDrxOkL6Y3pL+rD6l3s5e478WXzbvYQ+rb9aQEUBXAIsQtnDisQmxEpE/8TDxQpFLMTuxFED/IMkglsBUICx//s/F/63vgG92r0p/Kp8Yjv1OyV60nrSOum7KrvzfIF9nD6R/8nA9YGWgsuDyMRfxKpE+4ShBAfDpULTQguBd8CqwBz/iD9aPw/+7z5wPgX+NX2I/U49DP0yfOJ8230qvU79n73vflG+zT8Bf4EAMwAbAG5AukDrgTCBfEGhAfdB3UIvQilCJkITQhwB3AGYwXtA14CxgHBAaIB6QG6AvICdwIJAgcB+f4Z/cP7Nvqu+HT3AfeY9nz25PbB9xj5AvsT/Rr/kAHHA1cF2AZTCOkIJQnkCTEKawnECFoIKgfkBaoFZAWlBIgEzwQWBAsD6QKWAkcBPgCa/xv+NPwd+9P54/en9mT2qvVz9PbzvPPh8vfx9fEJ8vvx3/LD9Fz2qfdm+RD76PuK/Mr9G/9UACUC0QQ4B/cIqgprDKINGQ6qDsUPiRC0ECYRGRJ7EjwSgBKgEksRUw+EDZkKOAbKAZ39nPg687HuneqX5m/jp+Fs4L3fIeBF4XviR+Q/59Hqee7F8pz3QPzvAC0GNgscD+YSDxdXGlEcrR6QISEjQyOnI9YjRSISIKgeVhwsGE4UpxBtCi8CPPvG9Jbsy+SO3zDa6NMH0H7OE8w1yvvLFs9J0WXVr9wJ5DvrOfQL/pAGug6eF3QfUyVpKsguDTExMQcwZS1QKeokaiBzG7QWgxLPDaAIsQOm/iD54/ML74/p8eMT31XaLdb60xDTodIe1IzX79oT34LlsuyL8yf8EAdIEfsZVSN6LHIytTVGOLU4tzV6MTQtWydKIPQZGhRaDX0GlQCZ+gX0Ge6c6ZzlyeEr38DdvtwX3B/cJN3e3vjgeuPP5q7qte4f81z4tP1MAoEGrwrHDWwP/BByEpkS5xHVEVYRLA/pDIULDAkWBWgBsP1h+PXybu8g7F7oUuZf5hrm6uXX5w7r/+0z8kf4w/11AkEICg7AEWIUKBfuGNwYThhSF48VpxMUEiUQuA5tDvINPw1yDYoNDAx6CpsJJQcHA+f/ff3f+VD27vTV85Hxa/Bk8RzyXvIx9AP3xfge+iX8c/0u/Sv9gv1u/Gb62vjL9hfzfu8K7VjqkOdd5nvme+ZA53PqU+5S8h74Zf94BrwN4hTpGv0fsSNcJSgmcCawJNkhNyAXHqIZiRWwEgkOJAg4BOsAqPvk9m/0PvHN7DrqeOnI54TmdOfl6Lvp5Os972/xfvM79+T6Bf0NAIIEnQeCCR0NvRD4EasSphS+FCwSNhF2EWcP+gyTDesN2wtiC94MQAtFBzoFPwOl/X333fOc78zooeJD3o7YGNJezsvMrcogyr3N/dKn13zecugE8t/6qAU9EXIabCKIKggx2DT6NsY3rzYENDkwjCtfJuAgFxtzFf4PKAoNBDj+DfiS8XHrCOa04CXcUtnK1xfXuNcS2rPdbOJv6G7vmvZz/hoHTw+vFmQeOSWgKSEtNzC8MA8vVy1kKskkcx5ZGJcQBAjBAAP6oPJV7InnquL83dnandit1ubVTtYU14DYs9qq3Kvee+FS5PLm1Oqh733ztffJ/XwD8QdHDtkV9xqsH0kmFitQLKgt4S6ALPknKCSYH+4YaBLKDDcGIP9k+VT0Se8d60joJeat5PjjuuMx5Kblrecs6o/tw/Hz9Tr6Pv9CBMAIVQ1hEucWPxoGHaIfCiG0IMkfnB7uG/IXUxQLEesMxwi6BdgCOv90+773IfPH7YLoieON3mDaitdy1SLU+9PW1CrWP9gS23feKuIU5qrqBfCd9Vv71QG/CE8PuhUUHMEhSiZRKpktJC8jL2IuiCwAKUQk2R6fGOARNAuWBDT+k/iU8wvvzOt36R7nhuV/5bHlt+X75jDpXOta7oPyqvYW+50AHQbhCgwQLRXtGAMcQB8zIbchKCL+IeQfDB3rGW4Vmw+qCUUDgPwq9l/w+Opy5rfiit973U/c5tq/2RTaG9rc2braXNwW3ZneKuKd5XLoPe1m84r4XP7xBRUNWBO7GsYhLSZ6KdIszS3BK3kp3ia4IasbHRckEpoLbwbbAvv94Pj09fby7e6W7LTrxulo6GjpSupo6mfs3+9V8nf1kvoN/70CIwjSDcQR4RVnGjcd1R5uIHUglh5hHGAZyRQBEOwKCQWF/576UfVC8IPsF+mu5SzjjuH93wnf5t613h7eRN4q33TfPd/0367hWOPg5WjqlO939C/7uAPNCu4QwxgfIHMkVyhvLFMtmytsKiQo3SIIHfgXEhK+C2cGwQF+/Sr6hfes9XL0N/N48uXyPfMm83D0evaE9+T40Ptb/koAgAOZB6gKoQ1kEXsURRbTF/0YaBigFjwUuRD7CxEHAQKi/In37fJ17tnqSujT5fXjuuPo48Dji+Td5THmaeZa56LnxOaj5lvnieeP52nptey877Xz2vlNAKUFEQxlE4wYBBweIKki+CHYIPsfzRwWGKEUJxGLDJwI/QUYA4kAUf+d/pX9GP1Z/Tb9hfxS/Fz83Pvh+/r88P3d/lABgwQZB9AJgA20EKEStRTkFmEXoRYcFoIU4BDqDAMJ5AM+/jL5DvSv7mrqt+bt4jXgC99B3kPe49+74Uvjx+Um6Mrokek0687r7Otg7QHv6u9E8gf2FvmM/PoBxAf1DOMSiRi3HFUgISPUIzMj8iE6H1gbWRcJEx8OpAnbBfMBGv5A+x/5FPed9W713fVe9qD37fkO/B7+DQFSBP0GtgnpDLQP4BEAFO0VDBeYF+sXhBcFFiUUCBIMD0sLYAcLAwP+vPhl86ft+ufQ4lTek9oQ2C/XbNfE2FLbY95+4Y/kN+df6X7qa+ur65jr0Out7OjtLfB59P358P+iBnIOVBWjGo0fTCNTJBgkviOMIZgdSxreFtgRKg25CbUFggHy/jP9LfvI+X/5l/m0+W763PtN/bX+pQBYAxgG5Qg/DPEP/xKSFRAYzxlpGocaRhoHGfYWqxQHEpEOuAqjBhACCP3m98jyZe0z6Kzjrt+V3NTaEdr02dnaeNzd3YPfROFD4vriYeQx5WPlBOeI6WDrjO738+74o/1eBAwLlA9FFFsZSxyrHXUfZiBBHwgeQh39GqIX4RT8ERcOBQpABl0CY/7v+pr4yvYq9eD0gfZF+Pz5+/yNAGMDjgaHCuAN0hAuFBMXzxhGGkEbRRuVGlwZQxe8FC8SIA+zC5kIFwWvAKj8x/gZ9FXvmOvv5xPk+eCw3mrcPtqQ2FvXNNYk1ejUytXl1iHYEduv38rj0ufh7c30f/pzAHAHEQ0qEfoVfxqsHHwe8yCRIgMjfiNXIxUiHiAzHWcZ8hSHD4YJCARj/p34DPQ58ffu2e3g7nvxyPQM+VP+sgOoCG0NNxLmFfkXghkBG0EbWxqZGRIZxRc8Fl0VXxRXEugPbA3OCbAEGf+a+X3zK+2c5/Xi396c23nZCNiY1m7V39R91OXT1NPD1D3WAtjf2v/eb+OZ6Bfv6fVD/AoDQwqJEKUVbBqHHhshtyIpJJsk5CPqIm4hhx6KGkIWLBH2CqMEvP4X+Wf0b/HR707vi/Ar81P2APpl/nEC9QWnCU8NNhASE4IWnxk3HOAeSyHBIsEjIST0InAgUx0dGVcTPQ0fB4IAHvrM9K/vn+rX5tTjl+Br3aLaTNfa05vQLc1JyrrIbcg2ye/LddAP1g7dO+X27ML0Yfw0Ax4JYQ6bEikWuRkXHfkfnSLjJE8m6CYQJkUj9B5ZGUUSdAr4AqP77PRA8Ibt1euJ6/3sRO/78XT1z/gK/Gn/ywJ3BrwKYQ8qFIkZ8h7GI94nwiptLKcsTStaKEYkUB+5GcIT8Q12CL4C9/zh9wrzeu0p6NXjHt8R2ivWu9LtzurLfcq1yUrJVMq9zNXPedM32OHd6ONC6mbxOPm0ADwH7A2pFNIZsx12ISIk/yQwJRoloCOzIHcdiRmsFCcPWgmcA8T9OviV8yLwKe3+6oTqUeu97E/vMPPm98n8BQKjB20N4xLeF54c4CA7JKcmbCh9Ka4pEinTJ70lzCLXHjUa4xSXDr8HAQGl+hH0Eu4p6aTkSuBj3NTY9NTS0CXNMcqTx0bFzcR+xhzJ7MxL0yvbjOKt6k30CP3wA0MKCxBUFIsXHhpGHK4eAyEII8gk1yXIJRQkviCJG38Uggz7A7b7/fOe7U7pQ+fJ5rLnaOoQ7hDyIfa2+tn+jQJCBoIK4Q5EEzAYnx0/IxYo4yuSLu4vQS8BLZIpjCRUHjUYXRIVDPMF6ADm/NP4n/Qs8GrrMuZk4C/agtNLzXnIdMWPw6TC58N5xwrMBNHr1nLdHeRM657yafnY/y4GZQwyEkEXeBteH/8iayUVJsMllyT9IQMedBkgFPQN1gflAUL8+faH8jnvP+2A7I7s1e1A8CfzOPbv+f79vgGUBT8KJA++E7gYLh4xIyInXioVLZ4uVC6GLOQpHiYkIS4b8RS2DowInwIW/ef38fJS7vfpXuUu4ALbn9bm0uHOFsu7yP3HLsgoyfnKz83q0WvX9d0Q5cvs2/QU/Q0F8AurEdoWTxvBHgIhjyIvIxIj/iHlH+sc7Rg1FLEOsAimArX8dvcq8/Tv9u1D7eLtSO9K8eDz1/by+SP9QQCKA0EHcQsQEOAU+BkTH7QjhScpKnErjCtrKiso3iTVIFsccxdFEskMVwc5AgP96fec8t/tLekZ5MLeWtmG1MbPbsv9x1fGh8ZjyOXLd9C/1SLc4uMn7I/zB/pCACsGbAtpD8gSQRZSGigeWiGBIx8khiN8IUIefRlQE9MMPgZIAKz6zfUB8oDvle5p7g3vBfBo8Rvz5fTj9in5G/ye/7MDiAhMDn4UnBoWIPgk0ShZK/sr5CqtKJwl1CEvHSAYWBP9DgoL1wZBAq79Rfl39Hbugued4IzaWNXN0DrNUMsayz3MPc4Y0d/UTdnv3XniTecw7AbxrPVg+lv/WARJCfMNRhI5FoEZ5hs2HVMdVxxqGsUXSxT7D0wL3wY1A1UA6f3L+w76tPiA93P2ivUF9R31LPZo+JL7bP+TAx0I0wxxEXwVzRiSG80dih+3IBMhqiCsH1UelRxEGjsXjRN1D0kL9wZjArH9P/lL9cvxlO6Q69HohOa05Ezj5+FQ4Ije2tzA23Pb59vO3OLdO98t4fDjQ+cQ61HvAfTH+Bn9pwB7AxkGuQgUC7QMbA2SDZUNoQ1/DRoNhAzrC5ILcAtTC+oKMgpeCbAIBQgqBzQGewVWBakFIQaCBtwGhwelCC0K3gvaDUkQABOOFVcXSxiAGBEY4RYMFbsSQxDLDVEL2AiMBmkEKgKY/6P8bvkz9v/y/e9X7TLrbOmX54zlV+NC4UvfdN0H3GDbudu93Bbez98K4hLlBOm07Yvyz/Y2+tv8+/7SAEwCRANeBLYFagciCYgKtQsODXUOkQ8bEDoQXBCLEJAQEBAxDysOPA1YDG0Lewq7CYIJ2AmQCocL5gzSDuwQwxIYFCMV/xWRFoUW0hW/FHITDBJyEKIOrAy5CsYIwQZ9BLoBbv7Z+nf3fvSQ8Vju6eq45wzlqeIl4GHd29r92L3XBtfb1tPXRNr/3Xni++Z66z3wTvX6+XX9xv9jAd0CFwQMBfkFDAdwCPsJggvPDMgNRg45DqUNqwxmCwEKtwinB/sGpgaOBsMGMAfZB4UIMAnyCd8KIQy7DcQPQxIRFcwXGBrGG88cQB0MHRscphqNGOYVsBIuD30LrAfhAz4A/fwC+hH3KvSD8RDvgey16cbm2+Ma4aXevNyH2x7bbttZ3OXd4N8V4nLkE+cM6k/ti/C489b27vnW/Dn/BwFaAm0DYQQRBWEFZQVKBS0FCAXlBNcE3gT1BEUFvgVcBvMGbQftB30IIQncCb4K5wtIDcEOaBBSEoYUtBZoGG0Z7BkXGgYauhkpGY4YARhfF3AWFRVKE+gQwQ3XCZoFkAHZ/VD64vbL80rxL+8i7SHrWOn559zmpOUe5IviSOG44MHgPuE44pbjRuUb5/ToJ+vX7e7wGfTe9lj5fvsU/R3+0P5v/+n/SQBeAEsANAAbACIAbADjAMUA2AC6AX0CTAPJBG8GyAd1CecKxwqyCnEM5Q0WD1IRHRNxFO4VYBeHGJ4bHiAzIckhWSM5IGsX7hEAEyASvg/LD74OEwgZ/b/x5PDZ+KD4qe8r7Y3zEPNr6Url6egJ6vfoxOj95r7jlt6v2WLZot1M5XDs/e7I7U3tsu7+7yjzBfhM/EH+lvz5+Lj3Dfo8+4772fyhAP4CUwCx/7sHLxCQDxMNAQ4EEVkODwiNCIQLbAviCM4HGwp8CdQFzQXICQYOeg6PDu8RlhPFEV8T7BYaGOwUsw8+D28OCwr2BvYHLgq+B6gDiQIfAj8AJvyE+Lj3w/Uh8ozwTvD17pftqOwj7Tzule9V8lLy+/Dj8hL0sPUC92b4gPpz+W32HvRr9Qj4AfgD9dLzz/TA9ZT1xfXc+CX6ifm4+Zr6DvuK/Mj+UgApAUsAPAE2BG4HoAmTCbULcg3cDOIN6g/9ERgROg8uELoRAxGTDiEOCg4pDIcKjwqSC4cLUwnuBuAFZQfJB3AGmAVvBZkFRwJ3AM4BRwO/A6kBXQBUAM3/tP4u/Pv50Pi99lL13fMq87nx4e5i7RfsBOwN7hvwafFu8KzvPPAa8UT0lPe2+PP3Bvjg+pf8q/zX/dH+1f5M/C76Avwk/pT+b/0H/RD9mPwZ/ev+HQCFAXkDWAPuAksEGwb2B5EJ3gpkDEoOow/UDwQRZRPRFMUWzRhnGYoa0hnAF88Y+RmmF8YSzQ0DCjUFzwHHATr/wfvP+FD1VvNQ8bzvcu8j78zv+vA68Erume0u7lvwHvFH8XTx/u/V7xDvs+6r8CLxYvGW8d/wpPDt70rwIvNv9Y731Pkn+5D6MPg7+tH/ywJ/BNQH7glOCT8L7BLpFy8XrBmdHJMYZhSDFvQYOxeiFK4TpREXDg4L/wf0BaUF3gJT/8P+q/+E/079AP2l/x4D9AUGB/0HFAl8B1gF2wT3BDsEogLmAJf9TPsK+1/5jPWj8a7tXehr4ynh098l3zPgTOCr3fPcdeIp61LwnPCB8mz27/jm/IoEswqyC04MbA4BEAgR8BHLEqQTghJUDyIPSREPDy0KVQjgB78FQANIAtr/Ifue+PX5V/zv/FD+EQKHBCUENgaUDQUTKBP0EzsXdhi/F0EY1xhCF+MTLBEADzQKTwKp+jb1i+9r6cLkCuHF2kbTj9K92RjfSt193C/iw+gf7tz1V/5KAp4EbwpTEEkUbxh6G/IaOBlSGTwaJRngFiQUTA+ACS4GbwRCABj6rvQC8NPrQen059fmJ+fo6CHqWOvw7nb06fjI/HsB5QXKCQAOaRIWFlUYRBlpGWYZ4BiZF04VUxEJDJMG2QE8/Sb20+vk5Z7qI/AQ6xHiu+HZ5qnrDfJJ+OX63/xEA2YLdRCKEwQXehh+GMQaKR4WH8oc0RhEFW8S1w/ZCzsGawB7+sP0EvF07n/qRubx43TkBOWk43DigOSn6AztJ/Lt9nH6x/3sAtEIBQ2cDmsOSQ5HDuEORxAyD54JlwNTAVn/5/fA8CjzSPfi8c/r8+588tnx7vY+/y4AnwAoBqwLHBCCFUUasxuQG9scXCC9IUYf9RqMF8gWCBaYEv0MEAjnAy3/uPl+9HLvXOvC6fDpMem55kTljOWP52Hrr++W8o30nfd2/LYC9gcYCmMJEQgsCBEIlAY6BN7/9vlx9rfzJewD5UznNe0d61Llc+em7ebvz/HM96v7lfxHAkQKJQ4KERoWtBlJGyMenSE1I88hyB7aHIccDRyLGYoVaBFsDaMJtQUOAar73Paj88bxXvCU7gTt7uwk7hrw+PIm9z/74/38ACgGnArwCysMUgzRCq4IEAfLA6f9zvc39GPuPeVp4jXp+elx36TbzOK65+7pWu9b86H0lvpwAwoHnwhODoQT1BQ/Fj4ahh5pHgQa3hbwFvIWrhTeD8UKAQiQBlkDHP5t+Qz22vNW80bzRfGu7invnfHP84X2aPrK/TYAHgQlCoEQoRNWE6ATwBMjEmgRVBDFCDL+hfvk+U7uc+V86crrIeWz4Dri1eVB6E3rVPAh8vTyz/hl//AC8wYXDEkQfBJNFt8bnhyhGA8W3hZFF28U/g9ADKgJpQYxAub8efjs9FDxGO4w7Jjq1ecM50rpS+r96jTwtPbF+cT98QQxC3gP1xI1FVcVahTtEwoTsw2bBtAF6QKP84/q9/N99VHnu+Ho52fpkuv98pPyEPFU+o0DbAT1BOAK7hAvEucSRhdRHFccvxi1F3AY2hhcFx0TwwuJB1wIPgbg/U72hPNd8c3v6+1u6nzmLub36OnquOtd7ujzXfcJ+ZP+WQYnCfMIFgsEC9oKzg4gDoUFlf9e/7/7dfBs5yPtofOU6mHgUuWx7hjw1PPJ93z1+flTBg0LRQhWDCQU6hUuFvUaPh9nHTEY2hVKF7AXsxVnEbUL4QiiCLYGJwHs+dT09fLL8tPwdOyo6WPq3OxL7irv+vEp98D6s/woAlwIkAqGClAM9AygC3MLWQluAob87PxU+F3qjuJU6jbvleSw2xXfGOe97b7y1vIL85f8oAjtC6kKQg9gF3kZ1hhJHeIiqyEEHJgYsBcWGAAXbhGdB5ICdwT6Aw/98PRt8G7v6/A18dnuPOw17Y3w4/Nn9wP7//5RAUgDbgiED8MSxxAUD9wNhg17DhgMswJs+Tn42/Yg7ang4d+M5rvkptx927Th6uYg7gb0IPTD+FoEnQpZC7sRzhhXGdQY8xvKIIchBhxYFPkQ2RKhEsMMGAMK/lz+Jv59+oL0pu/a7DPvHPGR7tbs7e9+9I72afiw+4cBYAYqCMoKzQ+oE7ATrBNQEmIPKw6sDNQEEftr+nz4seqR3QThkeks5kXbqNbP3n7qm/C679fvxvhxBvUO5w4KEqMbph8JHqAgHSg9KPQenBaQFVkZBhcGDakAWvsI/h3/Pve86x3nWemN7Jzq4Oas5f7pj/DL8m/zv/f/ANgGTgipCpsPTBWFFtkVHxSBEm8RWg7GBYL7kvp8+4Dv0t2P3J7mruZe3FrYFN4V5cftZ/Jg8Vr3/wXZDRoMRhFHHSIijR/5HlAkCyifJMEbJBbtFg4XyRDHBGD+ov9m/p72iu8X7bfskeyN6gbow+hH7SrxUvLX82L3Wv7+BEUIdQoHDjESexNyFJoTihD1Dc4LRQU8/En5UPZl68LdINs34UniTNof1KzZUOQN7KjuK/GO+GIENg0jDkERHxqcH3IeXB7MIkEloCGWGoAVshZzFq8PPwVJ/+L/ZgA+/K3zjO978fPzMfHt7TPuFvE99jD6P/qi+vIBZQrjDDMLHQ5DFDwV1hNAE/8QOQ3ZCpEFw/th94P3re673drZVuPD5CDb59SH2ADgQepa8K7uRvItAZQNxg2ZDxgYaxwHG7QcHiINI1QeBxlmFDgS3RKRDiUDRfs4/O/8x/c78pPv3u6r74vwT+8y7zrzv/aE99n52f/uBREIwQgEDAAR8hQUFjsU+BG5EgISAg1tBi0AiPpP9qXwy+QW20jekee448XWbdS53pjrLvEC8aLyn/sMCMIOkRD8EuoXyRrXGl8c1R9KIKIa2xL0D7kR8xCWCFP/8PuG+4r6M/ji9KTxRvHy8qrzqfMo9Z/3FPkL+hX89/+tBOYHNAhDCd0NrRFvEpAQ3Q1LDKELAAlNAzX9z/gh9lruUuJ74BTqm+rq3hLZWd+E62n0xfUi8zT3wAMWDtcQ1w8GEk8VeRYbGSQc7hn1ErMOaw9gEB8OcgeAALf9af4i/lr7tPfb9G70NvbY9+f25vWN9oP4xfog/XL/2QAzA3MGGAq4DNQNoA/PECgQLQ7mDAELOQbCAOD8ffmT89zryOSR43XoXulf4SnbcuFW7J3xq/FS88b4SP8wBscLvQ2yDfsOMxGEFIkXSxbDEFsM+wxCDxUNXwfkA8IC2AEhAosBqP1G+rb6q/uc+on6PvsT+uX4xfr5/ff/9gEXA+wDagdBDE8ORw3NDP4N3A8cDtwJNAdGAz/+t/xP+Sft1uUC6/rtbucO4eXgauXO7H7wsu7n74r4/f+8AKMBdgevC50LLQ0BEBURvRFfET0PCQ6jDlUO9QqMB40GlQZdA0wB3wCK/sH7YPuK+3b5Bvko+hf69Pg0+8D+sgBKARQDKgZCCVsMeQ34DCMNMw5pDcoK5QaDAg//KPwz9+bwOOwt6xzrmOfb4ujiWeer6RjqWezC7+Pzbvnk/Xb/cQI/B58JbQu4D2QShxC8DwISShPJEdgOHAwgCswIjAeKBTQCqv/0/aH7iPoa+yb6nfdT99H5PvwF/nH/wwBlAzoHEAoHC3YMFg+GEMYPFg+lDoAMwwjgBOIA4vzJ+Kfy2uxA69Trb+l85NzhkeP15x/rduvT7LXxUffT+3kA6AN7BZEIpw28EYUTuhPyElkSIBNTE7MQZww9CXEHcwWIAlYArf3G+Y33Tfjt+Gf3CPYc9rP3Bfsg/rL/HAGyA1gH5QoRDd4Ngg5iD34QaxCYDjgMQwlJBSACvf9o+nDyI+0H7Hjq7eeM5sTj2+Af5B/qdut16yHwS/VU+HP+ugV1B4MHpAshEOkRExNyE+IQnw6uD2cPzguDCGQGTwO9AL4A9/+Z/HH53/je+cT6RPv4+kX73f19AfMD5AXsBxkK5At2DRQPJBCQEMsPLg7gDLgLcwgmBHwBpv4I+U7yR+1Z69zq1uf34tvgEOLW4/Hlougv7OXvGfOp97X+VAWTCAgKJg2CEagUahV5FMASzxBTDyANSAqsB2YEqQBM/j3+x/2K+w/65/mR+tD7If2p/Xf+7AAbA4gEtgaRCYAKwgokDLkNew4DDxwP0g1RDCwLJwl5BdcBbf8B/M71tu8W7G3qXegJ5SDireDp4InibeXA6Kvs/PAB9dv5NwEcCMgK6AuwDxIUJxVXFLUTahLBD9cMFgr7BsYEvwI+/7f7GPto/O77tvmf+bf6lPtU/YX/6v+NANADJwbJBgYIiAphCxsM8g1eDq8NbA4pD/sM0QpdCQsGDwG3/Y37vvYT73fpyecV54vlnuIM4Krhg+aC6TXrUvAL+ET9DwAnBRgMXRChEeASPBQdFV0VeRP7D5ANawtbB38D3AHS/9H8MvsZ+3b6ufnM+vT7x/sD/Gn90f4kABMCqwORBOIFGAjTCTkLjgyIDeUNFw6ZDgwOrwswCNQE0QHA/a34TPRO8JzqBeX34lDjBONr4ZDgGuO96EvuI/On+En+IAORB10M4BDnE2YUrRMNFIkURhJ6DswLuwkUBlgCZgBT/+v9KPyM+vj53/oZ/H38WPyy/Ob9nv9OAZQCmQOvBGIGZAgBCuUK0QsfDeAN2g1tDU0M3gnfBvMD4f/z+nD2+/F07ETnReQX437iE+Iw4i7k1+i37uzzoviO/QAD3QjwDecQwRJkFB8VzhRhFM4Sdg/ZC/kINgaYA7YAV/0b+9v6fPq++Vj6PvtE+2n8d/97AXwBCQKeA9IEKAYvCBAJtggtCb8KtAsZDC0MKgscCQoHBwULArT9DfnR9Kfw/euU59HkseMd4zPjteQb6Pvsi/K59+f80gJdCF8MlQ/ZEi4VcBWZFPkT6xIVEEwM7QhcBb0Arvxs+in5bPer9RX12fXJ9y76Bfw7/a7+FAHvAykGhgdjCMoISAmgCk0MsQwUDIELHwsrCqEIpAa7A1f/fvpf9qHyJ+5K6SLluuJI4gDjO+Qz5njpj+5F9Tn87gHDBq4LbRBmFJ0XOxnMGL0XBxdEFRoShw5/CioF7v9i/Mv5uva889vxmvHT8i71j/c5+Uz7Ff8xA20GNQnOC6IMjQzGDVsPrA7bDBQMRAvXCRAI4wWCAsD+vPob9gDyvO636sDlZeNz5JXlK+Xq5Snpge2g8sb42f5IBAkJow2DEtAWxBiMGPMXoxe6FjwU/g9rC2wHMwP9/XT5O/bq8q3vne7L72Hx2vL+9Nr3dvswAOoEAQg7CvUMfA/IEFoR+hBdD3oN0AugCQYHCwQ8AAT8Ufhf9Bvw1+wc6kjmROOf4yrmFOgH6Y/qpO699BL6R/5SA+MIKA2BENETbhatF4YXmhY0FiwW5hMmD6QKNQcbA1j+F/pK9ubyCfGN8ALxm/Ic9Zb3bfpd/q8CFQY4CdgM6g9cEXASUhPdEiMRiQ+BDUwKfgZBAj39ZviL9GrwoOvK50rliuKC4AjhXeO95Sno2etM8Zf32vw9AQUGHAv7DooR+RNuFkgXRhb4FFUURRM0EWMODAu2BtYBh/33+cX2NPQo8rTwhPDu8UD0fveb+33/nQLXBW4JAA3KD0cR7hGbEpQS8BCCDlMMRwnSBO3/LPs/9lbxe+wl6PvkqOJ54HvfMuGq5M3n1upY7wf1rfpDALIFsAoQD4ASohQtFkkXzBahFLYSYBGID8EMmgn9BTkCpP5H+5b4Cvcp9azyN/JU9JT2dPga+33+yAEbBY8I7AvCDtIQ/BGaEq4SHRLZEMYO6gvmCCsFHQDw+kv2yPCT6szl6eJM4NHdJ9203ybkYegC7Fnw0vUy/JQCeAjeDTESuxQ/FpAXNhgdF1UUPBH+Dl8NQwsfCI4E2gBx/eL6EvkJ97v0YvMY9FX21Pjn+vH8Yv+WAkwGiwn4Cy8OJRAXEU4RzBGiEcgPLg0DC3gIHgUKAcD7fPUl8IjrgeYw4n3fntxh2rPbduA65c/on+zO8Un5UgGHBwIM2BBkFbEXcBiWGMIXgRVeEv4OQwybCasFIAH+/cn7rvk9+FH36fXl9On1Yvig+nH85f3V/5kDJAixCtcLmQ3BDzMRHRKcElgSOBF5D50NCgyHCTEFPQB1+yT2nPBv61PmauFm3VfaldlA3ETgBOMg5hnsMvQ7/GcD3QnzD0sVbBniGwYd2hzoGrsXvRTqEf0N3gjxAwAAhfwn+aH2IfUC9BbzhPOL9fP31/lf+zL9PABNBIYHJgn3CqQNmQ9WED8R6xFaESoQfQ+3DkUNoAp8Bo4BjPzD9hXwvemS5MzfFNse17nVYddj2n/dq+Hs58rvLPiSAFQI9A6XFNUYBhzdHbMd1hu5GbQXzxS4ENIL7QYiAvz92fqX+ML2UvXh9A72Avh3+Sr6fPtC/m8BlQMpBRwHUQkMC0YMWg29Ds4Pqg+pDncORA9CDy8NbgnrBHYAq/vK9TPv7+jt4sPc7NdB1kfXX9nj29Tf2uW47fP14f2uBeIMrhJOFyAbcx3cHfgcjhvSGUIXfhPMDu4JDQVfAB/8+Phr94n2iPV79Tn3LvnK+Sv6Ofxm/8UBFgOuBFIHKgorDFgN/Q62EPkQARAgEJoRhxL7EAoNhQhyBGP/IPm/8hvsNOQk3CbXIdVj1I/UzNV82IvduuSL7Cn1kv7MBpsMqhHLFpsa9RvqG3ob7BqqGd0WeBLPDUgJ0gS2AIX93PrO+OD38/d6+Lz4YPgR+E/5qPtp/Wz+SQAAAz0FFgdICQ4Mhg7YDzQQkRFDFDMWqRWCE+YQfA2GCEECH/u+8yPsaOS+3YXZA9ca1XrUNNba2T3fKuZO7t727f6LBYIL0hHsFq0ZCBt5HE0dWRy7GRMWAxKjDeEI8QMSAFL94/rP+Ej4AvmT+XD5WvlP+g/8DP0h/Qr+YgCxAjgE5AWACFkLGQ29DV8PuBKcFegVixQCE/8QfA18CIgCDvx59MfryONR3uja/Ndt1RvUo9Rr1zDcseJq6kXyJ/nK/wIHzw20EgkW/hheG1EcwBv6GbEX6BQPERsMoAduBMkBF/87/Tf8Xfs5+uT41Pev9zf4gfjH+DP6tPwv/9wBAQUCCDAKZAu6DEoPsRLzFLQVzBUmFS4TGRAbDEQHTwHh+QzxHenP4/zfAdxz2IHWjNY/2J/bseCq51TvFPYB/K4CuQlKDyUTjBa/GaQbFxxqG9AZYhcUFBIQQwwyCWcGZAOCAJT+yvyI+iT4P/a09I3zsfJz8qLzcPbR+Wz9kAHfBVIJ+wuwDosRyhOTFckW+hbrFdYTuBDJDDoI7ALH/CP2gO/E6YHlD+Kw3rTbkdmu2GrZKdyg4Hnm+exT80/55f/WBuIMeBFXFRcZ7BsBHZQcJhvxGCQWshLODmILYgglBdABGv9r/EH5LfbU8+jxTPAg7yLvyfDh85335fv2ABsGlgqiDvMRPxQLFqgXVRjjF2gWwRP9D3ALWAY5ATr83fbI8KnqleUY4t7fo93H2tTY8djk2pTeb+RD68fxAviP/kAF3wviEZAWIxpdHVwf0x+WHxwfSR3iGd4VxhGBDeQI3APh/q/6BPfj8vDueeyb63/r/OuL7anwN/W/+nkA5wXUCi4PzxJxFe0WpxfHF1IXLxZYFLkRQA4UCjMFp/9n+ob11+866Ybj1N+83UbcZ9qR2CLZO9zn3wzk5uli8LT2nv1ABTcM7xGyFjEa+hyUH/wgqCCpHyoeARtqFosRdgzEBqAAqvqA9Tjxme146tzoM+mX6lvsDu9D84T4BP5/A8wIsA2DET0URha9F1QYJBhhF0sW5RTiEuQPFAzsB2ED0v119+rwvOqC5XvhM96l28vZrNi92Iba392P4lDox+5v9Xv88AM7C+sR4RfsHBohHiTXJRwmHSWnInce2Bh6EuQLCAUE/mn36fGP7fHpVOdS5iXnJ+nT623vGPQ1+Sv+1gKuB7AM9hCxE0kVTBbsFtQWLxZVFWQUGBPqEPENkQp9Bm0BW/vS9H/u0ujm47LfNtyv2d/Xk9Zk1jHY+ttb4aTnXe5L9bb8iQQuDJYTWxpEIPEkFih2KS8pWCf0I/UeXhjXEAoJXwH8+T7zt+296eDmB+WF5LLlZ+ga7GHwQvWI+uv/0AQiCTANGxFTFKoWGhjwGE4ZSBn4GFoYche9FZYS9A1QCDMC3vtg9d7usegj43vevdrV1+PVK9Xj1S7Y4du64Hjm/Owk9L/7kwOQC24T4RpsIb0mkCpqLAgsfykOJREf2BfND5UHpv8n+GzxyOuS58bkUONm4/rk1Odz623vt/Nd+G39ngLSB4YMexBgE4sVLhdiGB4ZPRnLGP8XvBbgFFYS6g6nCnAFoP9x+VPzVe2Z54Tift5w2/3YD9fK1djVpdcm2xPgL+br7OvzAfuIAnoKfBITGqQgwSUuKXkqgymXJmUiOR0AF+MPYggeAVn6a/Rw7xDsMOpk6WTpNOrc6/PtTvD/8l/2hvpX/yMEhAiNDCcQRxOwFYYX3hhtGUYZThijFlwUoxF3DrsKQQb8AA/78PRX75vqsuZu46zgV9683LjbktvU3JbfpuNt6I3t1vJt+Kj+NAWpC9QRFBdFGzoe4R9YIAwg4x5zHN8YRhQjD9IJQAWKATf+O/tt+Er17PEZ7wDt/esL7P3sl+4G8TX04PcY/LAAWAXaCbUNFBG4E+MVpBdnGDsYJhcfFToSdA7dCTcFYQCA+wT38fIG7x7rwec75QHjIuHx3pbb1thJ2FXaxd0v443p1e6K9Kj6lwBhCHARiBkOIZIosC3rLvEsyCrhKBglxB/WGcUT6gpNAe73ve7E6GbleuJd4Avh8eFz4izlh+sQ85L6ugImCuwPChTrFpMYEBriHAsfsh48HYoaHRWiDWsHPAQ3AYf9Jfs0+T336vRr8BDrCueT4zjgs9v51TXS8NCK0JfR/dbj3/Tmhe0D9ykBWAveFxwmNzJXO/NBwkKrPWc3VzCDJwsexBNrB0f4F+pG32vWVNFb0D7SDdYu267gr+bV7tD4cgMEDRUWjh26IGMhJSGiH/wd6BtIGcUVyRDWC4wGmgFP/xr+nPvA+PH15fHb7CPomOTV4U3fqNzi2aPWGtQd09TUTNun5M/uAfqTBZQQ+xkJItorkjVdO4k9sju6NPsorhudDhoC9vaA7N7kNOGc39jeeuJq68PzJvp4AKMGvQyUEm4WpRehF60WFRFpCLEBnfyq94P06fSv9Qb1bfft/A4BZQR8CdQNCA6vCuAEQ/5S+Yb0Nu3H5sbhzNqs02zQrdGH107jiPBo/KcJhxezIOMj5ycPLHcpwyH7GK8PUATZ9yTuI+cc4jffKt/h4uLoz/Dx+5gJmRVpHtAldiqsKEwkSSDPGKoPKgjcABr34e3z6ETnB+aj51XuhvesAFEJYhAmFoQaCxzxGwQagRTHCj0CrPkK7Z7h39zH2ebUsNJA0ljTZtjE4WDtJPvOCyYaSCKOJv0mJiQUH84YVhIRC8UDA/lm68Dg5tmp1p/YyOCH6q/zPP8DCpoQdxYxHm0kgSb2JQUlhCEyGUkQ5gi0Aar6aPRd8LLt0+uD7GnvqfMr+8EDnQqcEA8W4RniGWoY4RagE2MOnwfy/yL3re655xvfqNfT1RPTbM6D0JbW3dlz4djxTgNuET8ezyl+L5gu/ynvITgZfBC/BEH3OOrH3tfTCc2SzsHVad9b6fj09ACTCPoNURSPG5EhxSQUJcMjryB/GRUQYQn0BPT/g/t9+E73Z/dO9w35x/6tBYYLQxGrFQoYBxkSGZcYBRZkEx4RMgvXATH4Fe+r5tPfNdt62YDYCtc61rnWmtg628DeBeaK9ZEJPhrDJUUxHjusNz8qYx8ZFvEIgvup7Yvgg9epzZXErcM0zEHZv+aX84wCeRIDG9cekiNzJ0sosSSZIA4fMxqGEPMHyf+I+Jr1LPN88Hby5Pg0/Wr+GgQRD8gVOBiyHW4j7CPbH9QaQRXnDKQC8vmC9Ofvkuna4/TfBdyR2B7Xfdi92yXgtuYx7U7w1vHb9HD6yQMTEEgeNitYMCwslyG/EqMCiPSV6KLf5dg400rPV8yizTfX8OSr8tQEcBiyJGgqKS5KL84qiiNxHjkboBRxDcsHlgCv9+Lvjux063LsBvLr+tYCTQnoEfUaayDzJIMpUSqIJvwfhRfdDA0CQ/gW8CDsrunQ5/DoOeqU6irq1Onf6qnpeei76qXqy+iU6kbpHeOj5nP5Gw3pGPwiqyvvJ3AWLAcN/NTwh+s57cXq3OV+41fd7Ne02KviKvRQBTMSTh7fJoYnHSPuHdsadRgKFKAOhgnGADz1Leuo46ffi+K561H45wM1D2wb0CP3KPwsxS+FL/EpSh/VFOgLPACh84zrqumv6v3r1+zq7r/xFPId8cTuG+2g7iPwJPCb7mHr3ugf52vkKOaZ8owGDhmTI4gniyVRHFwN9v149XfyVvGo7Q3nIuJb3rPaotl84mzzbwN/EH0bISPKI88fIxn1EXUMGAc9AF33qvD07KDqgup77mP3SwOCDtoWdxwGIRknJynxJpYlUCSmHuQR/wbo//L4DvQl8mvzJPRq8gvwmu3a6pTr7vDi9Jf4Uv2m/df6lvXL7Tzmed/l3Cvci9wz5kn3agYHDwgWmR3lG0MVlhKHD/MMJQlNAF70Hej73o7aOdrx4f/xZwCECkERuBI+EecNbQsHC6YJfQh0Bvb/2vhm8/nxiPVR+toBPQ1MF68bRBwdHe4dBx2jGt4XfBbjE5IMjAI9+6X2I/Uy+Iz9jwROCOEGqwND/Yb0Lu796/rtN/JK9oT3lvPM68DgU9S9y9/J28wI06DffvVcDUcchiRcK8kr4CSwGqER3gzyCEYBtvXm6ZPfidd/09nXNebm95wGsxDHFqEYjxRBERIQwA7FEDERXwyiBL38gvZT8ufx3Pb7/6QKpxJ6FTEWWhdLGEYZwBvEH0oiZh8AFzUN4gKU+Azz3vJh9Vr4Efvt+WT1z/CF7GTrvO1B80f6df5S/rf5QfHO5TnYe82kyU/Kvsyt1Vnrigb+GhEoKzJnNqEuHh+AEf4IjAIn+qbvKOWT3anYRtTz1T3jKPeCClUaoSR0JzUmRiDtFpgODAtECdYCWv2t+Yn1BPMc9HL4OgCSCksVwxs0Hc8eER+8HP8XnRPnEfYN/Qb9/4z6G/aa9Vb62/9RBdIHngc9A3D7fPUP8NnsaOz57dvwLfBR7bbpMeTu33LbwNfZ2O7aAd1x5eX28QlQGBAj7CyVLi4iZhJsBob8dvST7VvpBuiE6C7oUOd86xXzrv/kDV0XcR5bId8ekRhmEeYLtgkkCPoF7QR0AVD9cfrc+Mf5gP4CBzUP5hX5GfocXBx0F/YS2hCoDy4NdwsVCqMHrwLN/zkBqwMhBfAD/wG3/eH2/e5s6Obl8eWf6LDrN+3h7TbsC+jG44/fFtxM3bPe4N315N75Zg45GhIk3SrEKW8fQhEGBRP8zvW279fqoOdY59XoQOj96mPz2gHXD90Ylh7/IAEiehxOFYsQ+AsXCPECW/1z+Bv0jfEf88/2S/wCBS8NUhM6GB8cVRz9G+0bGBrOFXoPYAv1BmABev33/VYAQAJCA1AB7/65+Y3xS+ua5inl2edm6tHrPe3D7YrsmerI6Grl7OJi4j7g+t2S4EjwnQN7DSEXxCNBJ8kdnRT+DhAHt/+/+nz3lvM77+/r2+hq6ffuxvpKCNISkxuSIiUl1h/gGPkQvAhbAL74RvWw8nnxI/Ob+An/7ASVDMITuRlfHfsdPR0cG4wYHBTWDagHjwIV/f/4Nfh0+4gBIwblCDsIYAVd/pL0Fuvl4/ThLeOf5Ujo8ept6wnqbuey5B3iKuI+5AnkW+J15c308wWZEP8akSQ8J5shhhsYFHUKaAUp/xL4GfIg7lXsWeiF5/fsu/arAHYKfxKBFwgduR/gHSkZahSzDRAHPgFs+YPzgvFF8w326fcW/cUEIw10EyQYBR77IaskoSFAGWQPmgZX/972ZfKX89f42v1p/s38R/pY9H3tiufn4//kgOnU7Jvtme7R7nvunerW5QHkY+Jv4BncKtr04dXzyAWLE58gdisdMBErbiEnGRMQ1wXb/Bb12+3U6HDlvuIH5cTsfvptCpoVpx21I8UmpiMjHZoWSw82Bzr/Cfjw8dvtnezE8Oz2DvxMBHgPZRkUHjUfQSBfIAgdrBb9D0QJXgPC/UX50vYJ90f72/4W/4f8kfny9uLw6Olk50bp6Oyz7tbvIPD47Q3sNOcC4QndBtoS103Sd9Sx5K77uAyyGAkoXDF9MGcriiFQGFEQ7AdE/0XzbOgI4erbE9l/25nmffkgDEcZwiJ3JxAnZSPLHoAZJxMQDbMHWAKG+fTvlOuL67Ls4u6F9cT/GA2xGH4gISRQJaQmIiPKHBwVAQ5UCCECEP1f+W33/fTX8fPsIOen4xnjH+Z27KfzxPkU/rf+Hfu59JXtduUM3ZDVQdBqy8HI49Mq6fn8CA6AHrUtFTPAMK8qCSKHGXYQRQmL/n/vF+Tg27PVXdRC3YjvnwNTFM8fEChLK/EoCSJMGWUUHxGiDFMGSP6x9t/wIuyP6GTqbPI+/o0K9xW4HrAjuyStIcIcfRfSEx0ShxGqEHwODQunBn0AwPdu7uPkIt4P3F3bDt344x7u9vWo+kP/nAMHBFMA2fiH7RDgQdIUxBy6pr7D0LrlXvh1C0odqyeYKeckTSBOH+Ib9BT/C3X/2vFs5dfZCtSd2DXkb/QwBYURoxr/HlgeTxozFB0SDRMoE3ATehIUDzQJyAG3+i71pfIK91X+3wcnEGQVLxZRE7oRKxCQEGQSuBcFHesezhyMF6wPpgQw+LLs9eLK3D/bdNxB4cnnRu+u9Y38UAWaCOgDyPyd8+TmataQxubAdMgt17LlG/MS/z4KdRPIFHMRZBA0FNcb3hzpFKsO0AYV+TrtbOWE5Dvsx/dEATUFigdPCI8HoAOC/9YC4QpgEqYXsBrBG+8aghYGDh0FLwD1/ywBAwEdAdEA1P+P/nP8pfw4AAcKGRj+IxErKi1nKwckPxWaBGD2AezD4wTcedhq2fzdneN36DTvnfc8/q7/L/0N+/D2I+393u3TctI22M/fF+ZM7sr46ALYCokNaQ7VExUb3xxWF+YQwArzASD65vN470by+viR/fr/o//K/z8Bxf8g/XT9RAIICcgNNhJHFq0ZZRz6HBca4hPaDq0K0AUr/4P5C/eR+G78Sf56AKQG2BFmG50fYyE2JPEkWBsiDecAgPb97Rzn7+EW4p3nCO128Bzx4fOQ+TP9OfxX+YP3SPIR5njWb8520F/V8dvF4ybsi/ZqAMAFcQgPEFIc5CS4JbIg2RgfDywC6vZZ8IDtKu+q8r7zWfMu9GP1qPdd+hj/WQcDD3YU9hfmGQIaqhgnFdER0Q+NDMQHoQSvBLACgvw39gb12fcG/YsDmgxpGOsh+if1KLEkChzYEmoLXgFA9uruyepi6Erm/OP25NTnLOoF7JfvofNB9j366PuH+SrzS+4d7jvsVee05B/pHvDe8Y/uCvAB+Q4AawNGCe4Q+xaIGaYZtxc/EmIO/goAB1z/cvbD8lvvgewQ60/uOfbc/KUAPgRFCTANmRL2FzUdKSN7JHYjxx/aFnIMdgNA/qD6ovh8+VD8av6LAPECdAlKD+QRPRMLEUMNigdOAn/9b/t++eD4+vdW9Jbxy+2d697sevBP927/nQQ7Btf/evVF7P/iYN2X2xHfv+Fp3fvaI97k40XoofEZA1wVGiQkLQcwPyrEHgoUFAo8/jPzGe5v6tLmyeM04G3iQOmu8kr9EAa1D5sXjx08IfciYyTnJE0lFCNuHMcSwgoJAz75QfDO6v3tJPNz90z+OAdhEKgWtRtNHmEetByZGXQV4Q3JA1L6DfIA6pjhT95X467qBPFX9T/7ZQLNBkMIiwo1DzsRUA9ZBtP3Qecq2F3PBMgGwsDEjNFt3zzoE+7Y9+QEEw6XEw4XaRxzIXMhyR0DF2IS1Q3QBiz/GvdC8VPt3+q46H7nD+w+9OL7GgO5CtkVHSERKMUrDizvKVAk7RlDDSoB6ve28XHuAu3y7frxrfaM+87+9wJ4CTgRAhdPF3MVuREkC4ICSfn781/0aPcM+/b+QQKhBBAEbAFG/T/5bPdT9W/yxuvu46bdENiF0VTJN8cqz1Tg2PLHAa4NMxdUHpQfwRxOGPsWThoKHlAcSBMpCsYAl/W+6IPd99zE4+nrY/Jc9z7+bgUHDhIWXxzdIvonICq7JSwbYQ+xBUf+E/gA8xzwhPCS8FHwAfL49jcA0gvUFvMdayGQIGEbExQ9DGoGcwMCAfn9W/k788vti+kO57bniOwx9Uf+fwM5BvoHbgd/Agr4L+6r6ufoRuFh1rLRJ9c74cvnVewL9PgAZw1+Ep0TtBT7GF8dRB11FzsPNglKAYb2W+ul5AfmBuvD8K715fkKAEEGEwxSEZQTjBbxGloc/xiEEqULJgXq/S32Iu/F6unruO949oD/FggiEEQXVBzMHTEdVBu/GPwTQQ3hBJD9h/fX8IXqw+ZE5qDmXOjx6t/ulPRS+6IDlQvpETAVfBQKEbQKSASV/zP9Nfyb+QT3NvTf8ADtd+c+5B3lhugx7iP1jP3UBLUJGwylCyMJoQW5AZD9e/m19Vv0nfVC95n44/tQAfEGhQvODu0RcRUOGf8ZTRdnEm0M3QSu+37z5O697vfwW/Qn+fj+GARvB+wI+wnPC3EN0w4/DykO+At7CH8D0/0y+dD1r/SG9sX4ePvU/zAFJgnfCSEIlwXMA9UCVgHD/9H+Jf7e/Dv6S/b+8Onstuq56cjqF+0m8NDz3/ZI+Zf7P/4JAc0C5QKbAI/8Ivna9q30KPP18s30M/jx+uD8AAD5BN4Kbw/AEjIVVxaYFdUSlw58ClkHZwQNArP/zP10/GH8LP3U/bT/8gLhBr0Kdg1VDg8Ocg3uC5IJhwdsBegCYQFtACT/0/3s/AL8tvs9/fn+JgBLATcCcgEW/7X7ufeW9HzyCfFL8K3wyPGg8hzz/PNG9Un3cvro/cgBQQVQB7AHdgZBBMIBSQDK/xz/4f3h/BX8+frV+Yn4LfgS+k/94gDKA1sFigUVBb0EdAPdAVUBvwGiAfn/jf0d/D38hvzT/Jv+twJVBzgLHQ6pD8oQThEQERUQ9w7mDcYM/grwB70EDwOPAlEBqP8r/7L/yv/B/TH66fYz9APysfBN8MzwCPIE8wv09vSN9Uj3wfof/2ACKQRHBYgFogQaA/sBzQFRAS0ACv+N/ev6Vvfi8zzx7+/x73Hwc/Gn8+T2i/pO/U7+nP6w/8EB9AO4BTAHhAhrCe0JGwrCCckJIQqnCuYKOQvBDC0PHBFKEZ8Quw+IDncMsgkjB5gF2ARRBMkDJANeAogBowA5/zL8n/i194D5o/u9/UoA0gFkARn+gflQ97T3WfgD+ij/wQW2CSkJQAYBBOgBYP3C9ibzBfTU89rtYOOU2rrWztXQ1TTYVN8E6pzznfdI98H5cAT8E0EhjCdBJ5Yk8SBoGXUOhgYiBSIHgAc4A2r8T/fl8znwju9Q9oEBlgoqD8wRyRQnFi8Tzw3CC3wOBhCSDOkGpgNZAu3+Yvn09ar3Qvzn/9cBLAT9BwIM4Q1nDB8J4QXzA6UCvQAD/6D+Uf5x+8b1Z++76gToteaZ5p3nceiA5lngJdnc1ZfXHd1w5NXsvPVs/cQCZwY6DDAWeyEHK8cwXTLOL9wotR1zEVQHm/+J+L7wOemJ4z7gZt+K4R7n1O/r+fkDHA35FLcbciG6JYEnPSZjIpwcSxXtDIcE5/0v+rH4CfjR99L4Nvsr/tMAGAPtBSAIuQjLBhADOv+I+6z3PvMR7znsjurL6KPm++QW5GHjreGP3VTYMNch3rPqA/f3/5EGKA2vEhQVehaRGlchcyYwJaEdBBMiCLz9MvS47dDrKO1Q7iXt0Oo96hHsNO868w/5KwGNCb0PNRPmFREZohyQH4chuiLAIr8gkxs8FD0NFQiEBHwBrv5k/A37Nfon+pD7vv6QAm0FxQa8BqMF8QKp/jL6Uffk9Wj0TvFx7H7mOeCk2rbWitOLz5DNDdMY4dLwrPsQA/QMERpnJB8pciubLqsvHSm6GmYJ6fnh68LdG9JgzRHQoNWg2lffBueb8X38uwa9EWYeyylcMLEwxyz4JhQgRBgzEJ0JjwSEAMX8OPlS9sL1rviu/v0F4AxRE8AYehxCHYsbhhgDFXIQgwoYA937yPVQ8JXrYeif53bo4ekb62vrWeoh6OHkNeBA2ePR9s9j2JDog/dYASgJWBMcHkIkdiWYJZ0n7CYQHrkNYvs97Ire7tCaxp7EnMuY1ZLdpOPO7Pr5xQf0E0QfNStrNfI5KjbtLGEiDhjRDYsD5/rn9bX0gvUm9oH2avgh/lwHARI1HPwkBixjL+0t9yeuH2UXUA9BB2z+i/VB7SLmQ+AK3ILaZdwm4YjmE+r/6krq7edb4zDcEdfk2qToCPoqB6wPphYhHVIgBB62GegWXhXUD5cDh/Mh5GzXU8z0xGPELMy82XXmBPEY+6cGTxLvG3Uk3yumMbsyPC5ZJKgXTwuWALX3IvDl60ftzvKH+DT8kABuCK4SohwwJSstwzNUNjMzLivGIBoW7Qpw/9H0aOyf5jjikN/V3t/fe+K75s3rau8V8F3unOs75k/dsdWl2Broc/vyCi4VEx6kJDIlISCvGcIVSBFnCIr6rOnr2N/Jtr4xujW/N83t3mHwcv+xDV8ZqCD+JLYoMS2xLSwpaSADFTAHtviU7QvnMOWb5wPvfPkzA+YJcA/WFpEffyi7L8E0FjflNM4tdCJ9FaEIm/zp8hbrIuVU4a3eRN5j4VznVO449IL4gfrW+Uj28PD06ajfyNQ30XbaIu2Y//EN7RomJiQqWyUkHaoVSRD2CXL/AfFg4STS4cRivYa94sZp2GPtiQDaDxMdHSacKswrPCveKXcljx2pEYgDGvT65nnfOtzI3ZPljvP8Aj0PJxiCHwAn9y1XM6k2uDf/NOcsxR/SEPoCRfdB7gbn4OET39HdZNxx3TDj9ezy9+f/KATUBYkEIv699EDrR+Gp1+fUhd327pUBuRCeHBom2SkAJfAblBJ6CdQArfgP7g3iP9a9y2/FxMUFzyHgffXFCWEZ2iN1KawqsSdlIsMcuhbQD8QGGPwE8Ujn5uC13pbh5uvI+x8Lohe9IOMmfyqALNMtUy3RKq4lqh1uEhoEcPb061LlAeK74cbkhehb7NPvfvLY9ED4UvvI/Nb7WfZs7l/mZ95c1evQ7diP7nAIhxvaKPkytjX7Lp8jEBf6CTj+PfSU6V/dStJ5yjnGZMfqzwLgo/UTC+4chCkjMVs04zJ1LAcjyxmpD3MDj/Tm5mndrNgh2WLeTurN+0kOFBxkJaEqeiyGLGQqSyd0IsgcVBUXC7b8Le625Bbfj97U4h7qrvBg9W35E/qC+Nr2dPXa89Tx+O3x5njgv9pY1AjVNOdeBV8gHjG7OM878zh8LXEdpA0FAJP0HOr422vMJ8J6vlHBBsqz2pPxygrnII0uejYfOb83SzEXJh0aiw6TAxr1N+ZG22/W39cL3XjoQPpdDtUe4CgwLWsvty5PKhMjXhp8E8IMJQSD9x3sOeQM36PdUd/w5AfstvKx93P5+fd69sr3yfdx81Lto+Yl4Yvd79bM0T/fvv/iHxA1a0AgRUJCujfFJ5oTJwLT9Qzrlt6NziXBpblhuOe+M89F50kCTxy1MGU99UHfQF46Zy6cH7sPEQGy82PlHths0fTTbdzT6M75zw0oIO4sNjOeM68wFSyVJnIddBJQCBH+bvP653veiteV18rg3uqN8vP4df0c/+D7P/fG9er0qvIh7qvo8+Ie3UjWadBH2jD4/xsLNq5CEEnWR3s9sCuKE33+rO9z5tHbks3lwci53LkTwv/QAuZuAOsb0TC2PbFC0kD7OFos2hzACwb99+8p44fYatLa0/vacOeo90EMXCBrLb8zXDYTNnwvpSTQGBoOSgPo9yHsM+I/3B3a79sJ4RPq/vIu+5D/nv1r+ir5GPmb9WvxM+3o5+nkcuB+2D7X3ek7CqMmJTjnQjdJAkYVOusnPhFG/PvsFOGp0uDDKrq2tve4bMF204fsUgdNISQ1qkCRRJ9DtDy9LiQdkwpy+mfrxd0o1BnRrNUm4BbwbQNCF+gmTTE7NxA36zBsJzIcChByA6b3v+vm31PY9NRn1g/eXejg8iX8hAG7Ac3/v/2J+kD34fLr7ZzpnOVo4UTan9bc4vUC+iVSO+ZFI0tjSH06YCeJEjD8B+sL4TrXaMoTv6a3OLaEvXbMMeI7/sQb4TN3QUpGbUQJPTwxwh/ADPH8DfHW5A/ZodOl01faHudN+dcNlCHnMtI8BD6ANz4t5h+nEO4AN/LR5fjc7tZI057UOty/6cD0vPxVBOcIRggVBJQAsvn974focOO238jdT9p+1P7c5PnSHHE1b0I4Sg1Ls0PHMWoafASh8P3i3dhTy5+7CbOKtKC7TcjW3EP35hTVL1JCAUlHRypBVDc/KVUWSgOQ9JzoWduO0fbQgdgi5nD55Q4TIjcwHjiROVUzHimIHskSggbe+VnuFeMe193O7st60EncnOxE+14FVwzBDScIwgGW/Fn0++376Rjlf+AJ3RHb/uAR+MQUvylLOtdFMEclPpMvUhwaBhHycuJQ2AHNrsEcuh23/bucx+rau/TqDqMm8TmGReRHxkRQPcYujhxvCa/4Beyt3mfU49B81iDhIPIlCOMbQSxaN6A8BzlOMKgkMRcjB4X2yufJ27nS3MqNyMbNpdtZ68b3gwKfCe8LhwriBIX+e/v+9KDu0erE5cDdRtWl24fyvg8XJoQ0GELyRkNBDzTaH7wJxfY66Wbc980awDW2Z7N/tnvBTNWQ7r8L+SYCO6NG2kqNSNdBqDa2I0cPVv6y7nffkdP4zrnRx9zW7uADVRYgJ001xz0HPrA3dC88I5MT5AA37ZrbG83iwie/WsRo0SXiCfQ5Ap0IlA1cD84LVwUH/ZX3u/Gr7CrpleD23I3pZABTEKscqy04OSs8mTacKigaJgcg+eXqY9i6x7G9ALgStbS7wMrN3dn2ShH+JqA2v0CkRNBBpDkZLHsb0gpr+Yvov9xj1vjTitdc4u7zcAZWFxYoaTXUO0s76TZ0LR8f8w1u/ZjsLdv2zlbIZchKzTzYhOeP8z39UwVsCL0HOAWAAZf+dvel8njwrOYp3/3krvdLCccXOihaNEE5MTcuMKIjhxNaB2D7jukn1xHJI75CtdCzaL2KzR3i8PuuFI4knDE8PCw/aTtNM0ArHiCIELj/Q/HK5YDcPdnS2h/ivvAAAtAR+SG3L0w25zclNWwswB39C0L8u+7P33/Tus36zl/TJtn84WXqZPXe/q0BUAE0APn+1/ie8qXxSfAl7bXtJvnRCLUVXyDlKD8upS4dLcInsB33ERAGHPjZ5ofW18Vmue+2d7sgxejUoOoWAbQSXiLdLrgzMTRZMmotTSRcGKEN7QKr9s/ss+YR45XjgOgA8c/9XQyrGU0jMCnWKy8oaB9TFPQGlPzs9FLt/eds5PjiAOTw5pDpQO0x8HnyQfSb8z3yXO4q627ro+ol6QPr2/XyBvYW5iOXLFUzgjd3OEwzAiqLHxAUjAeE9s3kq9TRxja+LrzLv8fJdNkA6tP4iwlQGYwj9ynmLF0uhCylJsIeAhXBCz8DAfmN7aDkuOI05BvnUPBR/PcI4RRQHIYgrh+dG+YWQRAJCnwDzP1Y+dzyCu336Hvk5eE94SbiDOM44jTjPORB4z7kUekH7QbvQve8BdoTaB7qJvEtqjOCOPI4TzQZLPsiWBq+DNH7yurB277NNMNCwHfBo8jr1BvjC/Fn/tELnxY8Hboi9SkELowsGyrCJ+0hmBYYCI/6Ue545UHhi9+S4YvpAPUa/rUD4QjBDZIPRhCsEY8Q5xD+Es0Q+wusBMX7f/IC6Jbg5duS2KvYR9na2l3b5tuS4Pnj4ejR9TAFIBMTIPMp/DD+NJg4/TmdNbYw2CoMIcETPAPr8ULhCNTpyk/FXcWAyqTRKdrZ5FHwGfyYByQUICD7KDYwEzRhNPAxpSv2IXcUOQZm+Q7r1d/z2fLX9Nr84KDn2O3s8+H6xACcBdkMwRQdGysfoyACHaIUwwxsA5T2sOsm5QbfXNmi1zrWFdNB1LfZ8N755HbzZAWrEBAZtyKULCcxgzN7OFQ6tDe3M14rJBwXCjb6buqG2kjRks1XyXLHZMrDzrPU9N5K7nb/KA98HcoqEDTjOI48HDzQNFkqdR2mDIz6O+hz2IjPCM3WzafPodNW2Vjf1OYt8P76sAdtGMInqy8XMoUvLykyH7AS1gaQ/ab2XO7p5GbbR9Ny0OPP/M7t0/PfOO16+F4DtA2/Fk0hYyvYMYQ4ED4rP7861C+KIcUR3AK59D3mgdrj0gTN1soXzZbQ8tbt4ULvnPxNCqQXWiNuLeI04TXUMdwqkh+wEGYAfO7/3TfRRMnzxaDDnMbgzerUJd8v7NL6OwurGmApvDQhO/09dTxQNlQsDCGBFYYI3/qc7pfiPNcP0ibRx87Lzx/Zr+L/5zju7fbzASQLnxMVHnwk1SdlKlkowiJNHMUVsA50Bn3/k/q79UvwAO3G6iXpiOvC8M/2iv72CEARMxYRGysdcxurFksP0gTc+FztFuGT1sDOaMk1yMnJHs4M1SHduOgT9rgCMhAbHesmSC6oM/U0UDJML1AsqCXWG+QSOwni/BDxD+nn5ADimuL55Avls+WX58DoOepZ79n2l/2XAvQIsg1fDn4QBhO7E9gTbBRLFQUTLA6LClMGEQMdArYB8gIHA4QBsgLMBOEDsAN8BAIDoQAX/ij5vPGW7OnoKuS64Jre79wo3pjggeI350Luh/Vx/AUEnw0zFo0ddCSJKE0pLSmbJ8shFBpaEyYM7AMD/Yv34fJ57pTr6+rv6Yvox+il6knsXu3r79zyN/Ve+SX+xgJaCGYNKhHSE8oVSxfCF78Y6xgAGAEYVRZeEZULgQd4A2b/NP3V+oj4tvcL9hHxluuD5yLk6+Ld4YfgS+G74QzhmOEo4yvmUOyN9Nn7+QEICjUSQRg5Hj8kICilKT4qzCdXIpgc0RbdD9UIMQOG/hv6XvXd8E3td+qJ6LPnxueM6Dfpuukn6qzqMuw370TzPPmz/10DLQf6Cx4PKBH9EpQVoBdQGAIZOxh6Fu8VJxUYFJsTKhMsEWQMngZgAOD5jfPe7Ufq/OfP5efi3d8G3hbef+Bm47Tmj+tf8V72KPqg/loD+Qe3DU0UYBniHOkf9CD5HqYb9RjWFSoSqQ7tCuAGNwIn/RD60vct9Vf0bfQE8/fwwO507MDqT+tk7tHwaPOX9w77Uf0SAEwD8QbFCjwOtBA2ETUQbA6oDMoKeQngCbgKkQphCaQGZgL3/Qn6RPYd81PxefCk77vuEe2T64TrDOxL7SPwYfRt+aX9PwChAhMFggfdCdELAQ7gD04R2xFeEQ4RjBC4DzQPPQ6+C0gJQAceBDkBh/9D/o78zvrU+CP2QfPC8JXvnu+J8GHylvS19vz4lftG/ocBhwVpCW4MAg+CEGsQGg/qDAgLegmXBx8F7gFD/mT6hvZD8yfxxvAp8U7x2vG48ivzU/PR82T0RPWc9+H6Df6iAYoFSAk/DI8OYhDZEPEPtg73DCcLqAlcCOcHHgj7B1MHZAbeBCsDiwJyAv4BZwJqA8UCKAC1/M34c/Tg8EfvLe/r72nxOvPW9CH38PnO/MwA6AXFCqcOaRFXEssRPBAODpALBQkpBskCuP7T+Tr1n/Ec7wTu9+2X7WXtAO4j7qDt8u3470TzCvcm+7D+XQE2BNMGfAgjCm0MLQ4DD5UOowwFCooHbQUZBBEEHwWUBqAHqQfOBgQGwAW8BfAFuwaoB4kHygXeAiv/NPud9//0OfNB8l7yGfPw8y71W/c0+oP9CQE/BNIGzAgyCoULqwxGDd4NUA50DccKcgfbA97/Lvzp+Tj4F/Y09Hfyxe917azstuxD7q/xjPUi+Tb9FQGlA30FuQfXCRgLxAt7C58JogZQA7D/SPwo+p75APoR+7T8Xv6L/+MArwLLBFgHMgqPDKINWA2qC9gIRgWIAYb+k/w4+7/5hfiT9772sfbN99D5f/yR/24CPgQFBeIFAAfOB28IAgnZCLcHkAXiAYv91/kp94D1XvRc82jyrvEQ8ajwX/Gb87z3Cf0cAm8GxQmlCx8M6wtzC5AKpglhCPEFbQKe/sv6QvfE9KPzvPPc9G/2z/dR+VD7rf0wAAgDLgYjCYMLhwwwDAgLEAmABhIE8AH9/yv+pPxe+6D64/of/LL9d/9eAdYCzgPMBCgG4QfYCXwLNQyZC4UJGgbPAV/9fPnl9mz1bPR+87XyM/Ll8QPyH/Ou9a/5Nf5bArYFFwg7CS8JnggxCPwHwAf8BoYFYwOtAJT9p/pf+Pb2lfYF95L3RPgu+R/6H/up/LX+9AAuAz0FmQbIBsQFHwQ2AmkASv9r/sT9Of0M/Q79CP0Q/fD9dv8QAYwCuwOcBEoFHAYHB+MHYwhsCMoHWwb1A88ApP1I+8356vg2+Lb3m/fR9zX4APmf+nH95gAGBDEGSwdvB5kG+QQtA9IBQQE0AesAvv8E/kr8r/pV+XL4Xfgx+az6LfwW/X/9BP7c/vf/SgGHAqUDqAQuBaIEKANQAdP/6P5c/gT+4/0Y/nL+p/6Y/q/+Yv95AF4B4wFNArUCMAO/AzcEXgRjBEQEqQNiAp8AwP46/WT8UPyD/MD8Kf3C/Yb+av+WAEQCTgQwBkQHVwdZBsUEFgMtAWX/Tv7w/cH9Sf2J/JP7yfp2+k76LfqH+lr7O/zC/O/8Gf2K/UH+If8HAOIAlgHrAZgBwQDK/yX/5f4X/6P/XAASAacB+AHhAZ4BfwF8AVYB/gCjAFcAOAA/AFwAZQBvAGUAHQB4/1/+Kf0//M/75ftP/PX81f37/j8AkAHpAl0EuwW+Bj0HGgdoBnMFYwQ4Aw0CDwE7AHj/j/6s/f/8t/zP/B39hP38/XT+sf6O/if+xv2W/Z394v0w/lv+Zv5M/hT+5/0D/p3+hP+SAIEBMgKAAnMCKQLRAWUB8QB5AAMApf+L/8T/RQD+ALsBDALVAT0BZABc/1P+ZP3B/Jr8zPwt/cD9q/7m/0wBnQKHAwkEFAS4AwEDLwJzAQgB3wDbAOMA5wDFAFAAd/+R/uj9vv0Y/rr+Xf/E//P/8//n//D/6//H/6f/lv+J/0X/qf7a/Tz9A/1A/QL+Gv+CAPAB8gJIAwwDiALvATkBhwDy/8H/5P8hABEA1P/f/yEAJACS/8D+Hv7d/bT9Kv16/E/85vzU/aj+cP+KAPkBQQPqA/YDyAOOA1QDzwIcAqEBbwFlAQkBagC7/xb/Wv6H/en81PxF/fj9rv5t/zUA2gAuATcBCgGoAAsATf+T/gf+m/1P/TD9Y/3o/Zz+ev9qAEkBCgKPAusCIAMhA/ACjAIgArgBRQH9AMkAgwAUAGz/vv4b/oL9FP3Y/LX8pfwE/Yn9DP6X/gn/cf+5/wQAVADCADMBwQFyAiUDMgTxBPwFvwRBBrQM4QyrBzMCdPs19OnzRvwHA+QD8gFZ/Tr9NQA1AeQBWQQJBMD99PrP+Gn3Qvg8+dz5bvUK+HD5HPaK+jD7SwCVDAsPDQ0+CoEJLwtHC3ILpAg2ByAIOwTSAGEALwBJAOv7OPnQ+335MPhH+DP0jPST9lP1kfXg9+b4Tvs9AAsFHgdLB6gIawufDjcM3Ac8CFwJ4wW1AdcAOf60+Xr4Q/dk9Fb1+PWN9Wz4Kv2U/1L/5wG8BIkFGgaVBWEFewTiAt4BWQF/ARIAzv7s/nD9LPsq/MH9bv5A/pH+fv7H/WT/DgE3AjED+APpAq4C+AMoBGgDxwOlA24B4f9T/6D8lfnU+rj68Pnb+bH6yvvO/DoAEgG4AR0EkwQYBcUFCAbYBtgGawWfAwQCy/+k/EL71/6nAjQDUACv/Fj9ZgASAecAiAI8AqP/zvwN/SX+9f+vAoD+UPwCAJj+RP0G/43+T/5yAAACaf9R/en9+f2M/rX/4v4q/30CYQNzAlkCWgONBI4ErQQQAx8AWwA2ABP/uP3e+3b6kvh19zL1ZPTs9ov4Q/mk+iz8zv/7AYACDwT8BaMI0wdpBzQHLgVHBQgFoAPLAvIAJ/3s+Db4ePlE+Dv7JgBRAPIA1gGEAa8DIQeKCP8FagXWBN3/LP9mAN39mPzs/On7Sfu6+nH8Bv9vAI8DewNiAsoEKgVyBVMHWgcjBt4D/wBe/az77vr2+e75k/pq/IP8tPta+zb8UP/0AS0CtQBFAB0BaAC3/iD/wv9d/8X+pf1H/tv+7f8iARoA6wBUAq0C3AF7APX/PQDn/8X+8vzy+9n8M/1I/Vz8cv75ASIBbgDtALYAhQHUAU0BUQFZAIwA2QCQ/5b/ggDSAXUBMwDm/7T/vABCAHL/ev4o//QA8f5C/6gARgABALAAbgLNAc4B8QDX/14BIgGZANj+SPwW/AD9if4PAGwAXADD/539YPxI/n0ASAIiAp3/8P9GAbsBKwEdAB//4P1I/o39df1G/wn+KP1W/hP/8f6x/l4AiQAg/y//fv9V/1cA0QH4AFIAjf+G/mr/bADdANAAuQAeAJL/ZQC+AQgCXwJTAqwACv9e/Q794f6+/6H+0fxv+zT8sv2m/pL/jwBPAicCLQDuAAgDpAQBBmUFVAP0AWcC7AGH/4r+If8f/6//Zf9M/VP9KP9FANL/9P88ANX/Xv/N/WH84vx8/nb+DP17/Ib8Mf2p/oP/NACSAegCQgIDAQACTgNNBN8E8AO9AikBvv+6/pz9Kv6a/oz9JPyN+mf6Q/wr/fD96P4BAPMBbgIkAt4B7wLpBA4FngOUAjYDvQMlA0ICzAGWAX0AA//R/Rb+7P84AKv/Kf8X/24AWADN/+b/+v8xAXkCnALqAeABEgILATgAgQCFAcIBlACk/7b+lP4M/1n/7v5h/fD7nPrp+ub7dvoE+oH67vmr+W/4lfe9+Dn7uvwh/BX83fyC/Zr++f1L+2T7Fv4M/4X+Tv4b/R/9X/++AMEA4QGuBEsGgwf2CIAKsQ14EHcP6QxTDcgOTQ/dDkANvAt8ChQK0Qe8BOEEZgRZAgQB0/+E/oD93vsh+sz5vvml+EH35fX79R72avZw99714PR39Ur1D/bS9zP46/dp+FL4pfez94z3D/ay9EP0N/NA8jrzfvOY8kz0CvfO+Gj8zwB2BQsKcw29D4sRkhQ6F5oYOhgEFq8ToRKXEXcP1QsPCGgGbwQAAooAWf/v/br9Pf7g/Nv7f/xJ/Rz+6f80AjwCAQNBBgcI+wdHCD8JfgmXCYEJzgczBc8CCQFA/2L85fgu9TnyO/BO70nvRe+b7gTun+1w61fq5upT6ojpg+h05TDk9ual6f/rz++99LH5uf42BRYMdhMFG3sdTBzOHkQi1R++HLUblhecEW8OiAvXBUUBV//k/EP5wfbD9EP0EfY291/3n/le/cwAtgTsB/MJYgzJD24SXBQKFmoWpxbaFasTYBEiEL4OIwxaCHID4v3e+D/1FPEL7S7pzeVZ4m3fg9633q7feOCt4LPiAubu5gbpFu3O7VfugPN3+ff8LwANBaII+wi6DLoTBxhyGlAbDRtbGg0aehkQF+wTUhCgCxoHxQIu/Uv46/S88mrwQu1x6x3s6O0q8UP2k/kW/BQBFgc6CzQPpRRSGDMZ7RnFG9EdGB/uHRYbWBjxFd8UxRIBDTQGtQC4+/D23vG47PvoY+Uj4U3ehd0r3h3gSuFU4ebhbuM65sfpaeuz6/fsxO0x8GP2+v14BHQHfAhFCpcOARWrGj4egR4vG9UW1hUkFkkSbQy8B3MDxf7k+V/2q/SL8mLwVe6Q7DDtNfC083L1vfVy+WsBmwgGDVwQoRXPG3cfsCLoJUEmtCU/JjUk3R7LGmsY6xKXCmYDX/5C+RryouoG5TPhz94E3WTa7NeG2Mbcj+CC4Tvie+V36qzu7vAf8uDzIPae9xf4ZPrl/xcFTge6COoJ9go8DwYVwhZ2FcAVsBZiFNIQog+pDuUK+gWVAdD9B/vD+PH2pvMr7t/rn+7Y73nvcvJO9oL4wPyCBBALVg8cFFgZuRyzH90jkyagJhElBiMbIBkcoxddEgMMLQVK/k73i/Hd7JPoZOQN4H7doNz83JLeP+DN4dDjHOaT6Zzuq/Fm8przZvV09pj3U/hL9/j0xPQx+hoCIwYMBqoHsAsJD3UUYhvMHNcaHxp4GDwV1xLhEEYNyQfTAGf7h/gk9kHz0/CP7tzrRewy8GryRvMn9438jQAKBuIM3hFBFjIbbB+2IbIiTSOyIk0gDh7fGlAV4Q/JCogFfADw+8/21/Bo7OzpX+fH5ADkv+Mq4yTje+Rn5jPotelE63Ht4e7h8L/yfvNB9F/1HPXx9Dv1L/S19pv9lQC9/+8ACwR7CQQQDBXXGAQb6BrmGY0YHhZrFGQSkw0hBxkCj/4w+o31j/Ks8Ibuke4W8OnwxfJp9h360v03A8IIlgwsD9ASTBfYGrscAh0BHHcb/xsoG8UX0xOlEBoNoAiQA8H+cPqy9sbyzO0x6A7lWuVs5bnj4+Id42rjF+Wd56fqce4W8S3x//Cv8V7zKfWy9A3yefA+82X6hQCpAQcCrAWxC5oSrhjdG30dEx9aH/odRBwqGvwWdxJ+C64EfAAk/WP46PL27XPqiOlx6lDrJ+sl7bvyM/ib/GUCfwiGDIsQdBWlGP8ZSRz/HdAbxhhmGMwXfxT1D8ULdAhXBVgBG/3x+HH1zPEX7Rjplubd5MDjj+Kg4Y7iz+Nj5DzmTuml7Nvu7+5O8L7ylfLx8vDz2PF+8WL4ewAVAnL/agIDC8QRoReiHSMgzCDVIn8jMiBfHTccHBilDw8ItgLt/Uz5LfPc7XHqCemX6nPsS+vM6yTxKPcU+2v+iwIrBuwKrBDaFGEXZBlGGgkaPxnqGIoZ3Rc7E5MP2wzcCIYFFwMk/9/62PaE8eXrt+g+54/kd+I84WLfbd844d7ituX26GTpjOr97hbz8PTk9P70kvZX92v4N/6VBXcHOQeICnEPzROUGacemB/QH18gGyDUHYoZCxYyEm8L/QMl/9j65/Qp79/q5efR5uTn3ug26efrP/Hk9ev4+v2zBG4JIQ2JERsVNhdeGQMbVRr6GDkZDxl5FqoS0A/aDdYKLQZZAfH8+Pcf8y/uZeib4+rgEt983YLcx9uT3GHeRuDU42foFOzZ7nfyPPaz+Kj6gfzv+7z6f/0RBaALkAu1CfQM4BGAF+IeUSG6IMEiUiSjIVke8hzSGdsTQwy2BLP/S/x09n/uXOfm49DkGedH58DlVuf77T717fl2/tED5AivDVQSCBYlGPMavR2zHJQaLRudG/wY6xRKEekNhQpzBtMAvfqd9XTwCesx5hTilN6621LaiNqT2/3cE99t4SLk6ehH7uTxYvUC+H/47/nm+0T70vq8/4EG3Ah0B74HfwyvEtcXPxzFHoofrCAXIb0fxx2NG58YFxMwCwYF7AA5+5TzJez85pnlCeef6ETntuXN6arxMPfh+sH+FwKEBrAM9RGYFMAWGhrJHJ0cqhu1HC8dwBkJFXYR2A2ZCW0FRgBk+sv0Oe4s6PHkHOLF3mHdI9wg2oDaXN7u4gLnYeq76xDuFvTL+iP9nPuX+wn9If1I/zgFeAjEBogGrApZD+oTTRqiH6YgVCDeIT0jEiK9Hu8awhW1DkgIwAOG/gn2ae0b6S3mZ+OQ5FzmF+X65ZzrgvGM9Yz6OQC5A1oHBA6iFOYWOBg0G3AcfxtYG7wbhxnYFekSxA+WCukEvwC9/OH2n+/a6XvmIuNC3wfdktwl3HbbSdwO3+fhh+UK6kntZPC89A/43vkQ/Lv9jPzh+54BNwplDEIKnQuLD0MTchksIFkhXx8UID4ivCFQHzsdRRnFEYQKdAZ4Awn+x/Ub7jTpMuZZ5qPogOfC5FbnAO6z8xT51f4xAp0E+Ap2EwcYkxgHGm8czRyPHEQdsxt0F/QTNxG4DecIawP5/tv5EPJu6/Xn9uSy4S/eDduG2ZXZetsj3zji4ON75tLqMvC39iD8l/wP/Lj+mf/8/UkCmQr3CsAGWAdsDKwQjxVMGyEdBBtFGqUdOB7kGQIYLhf3EOsJQgfMBBz/pvcH8Rvss+ga6IzpE+hi5VHnoewL8hv3pvtl/pkA4wa7D4kTIxPRFMsXbBlPG40cMho+F3AWpBXKEVQMEAihBBEAPPkO8q3sFOnn5aLijN/t3Ebb9tpy3NPfP+S359/oW+sW8qr4/vrg+yD+6v4A/5sD+AqXDRoLzQodD5ESIBaUG9UcUhrlGqgd/B1UG3gXRxTYECUMwQgwBjAAhPhr8zPwEO3H6rvpUeg559PojuwY8Cv0qvla/db+OAM2C8MQPhIcEzcVKRcxGaoaZhkXFgoUZhPgENQMPghCBLIArPuM9FHuMeoD54PkhOHq3UTbWdsk3p7hHuRs5VDnaevl8AP20/jS+Nr5lvwa/Zf+PgXvCWsH3gV7CcgOzhSrGbYaixkUGgUdYR+AHsIadxblE9gQ0QtyCI4Fc/7h9kLydO8J7mjthutT6Nnmd+rC8Cj0Sfaw+Tv9GAH7BgoNUhBsEeAS1RTiFokYPxguFnkTyhHyEFgPjwu2Br8Ckf97+1r1o+9f7Bzqz+dz5ULibd+G33vhCOS25hfoIelp7DXx5fSk9zL6DvuW+vn7nwEvCAcJLgWIBb0LXhJDF6sYKxfvF90b9h5zHtkaGRc8FeASKw6KCcIFeABe+rv12vE57ijs5OpR6Ezmdejr7FrvcvHO9XL6Gv59ApgHTAssDt8QOxJ3Ek8TXxQCFB4SHRA5D3wOAwx/CFAFGwJF/k/6C/cw8//u6us06cnmnOQN4zPjtOM6487khujd6sntP/HY8n/11vhW+QD6Vv5HA4AFKgXrBdEKERHZFLAW6xc0GZEbCh8KIHEdtRpMGXkX5xP5DuQK9wbqAI37zPdc8xbw6u4i7GLo/ecs6/jtg+/t8Ur1+vgS/ZoBWwVZCGQLSg6ID58P5hByEgASaxAyD+sNIwwbCrMHygR6AT/++/oo9+fzmPHu7unr9eiv5m3m5eZ55qTmKucr6MPrzu8l8bHx/fOR9pT3UfmH/RcCgQT4BK8FUwiEDcASURV8FXQVlBeFG6UdExw8GX4WyBPZEWEPKwpoBEwA+vzQ+RP35fSg8uDvB+5B7nHva/Bq8XvyevNQ9dP47fzs/yICbwTWBiYJOwu3DPsNxA5fDpINYwxlC0cLcQqGB+4D9gAv/n/73fjy9f/yN/C57fTrP+u26mnqveqo64vt6u/f8fzzEfcd+WX5APtO/+QC2wMoBB4FwgZjCXwMFQ6eDWYNHA/UEaUSZhF6EPQPYw6QDFoLKAmcBXIC8P+b/bb8QPxA+gP4QvfO99T41/lP+vD5zfkr+3P91P5e/zoAUgH/AaMCKwQMBvoGxgY3BqIFZAXEBdkFfgQoAk8ALP/L/e37LPqq+Ar3rPX49NT05fQP9RD1R/U39kH3Gfjx+KH5Hvpt+hb7KPyi/F/8g/wC/Vf95f0c/1AAuwD6ACAC7wP/BQMILgkbCRkJLAp0C38LJAplCMQGngVABTQFXQSmAmQB+wASAZkBFAJoASwAHABPAQECawGvAIYAagAkAAsAAwA6AA0BnAFKASkBEQI1AzcDAQI1AY4B6wE9AZj/9f12/e79NP5r/VH8Dvye/EL9i/17/UH9+fzG/I38CPzG+w78kfsn+jX5TPm++dj5fPmm+Wr6Hfvv+wn92f3G/ogAMAIBA2ID/QOIBJsELARZA1ECtAG8AbIB8ABgAMAAdwFWAnUDGgTRAxwEtgU2B18HAgfaBlwGnAUlBdEEMQR7A8cC7wEWAbkAIgFNARUAQf5j/X/9d/3Y/NH7avor+Sf5JPqe+hj6fvne+Vb7Jv0//or+2v5p/8r/DACEAL0AMQBR/7b+nv65/r3+fv5d/sj+y//AAGAB4wGfAqcDdgTCBNwE/QTNBCEETQM+AuUAof+6/vn9JP2G/DL8VPww/Yz+ev+n/wwAkAGuAwgFPgX8BAsFaAW3BcAFfQUfBdwEfgQGBKQDpgOsA0oDYwKFAfMAjQAVAFf/UP4N/b/7F/se+2D7SPs0+qX5Pfqo+3n8SPye+0771fu8/P78Hvwh+/n6kvv9+/z7zPvK+zn8Q/1//n//QgA/AX4ChAM6BO8EiwWeBTIFhgSeA78CBwJRASwA2f70/bH93f07/rn+2f7j/nL/sgADAqsCrgKbAvACbAPYAwEEqwMhAxcDigPuA/IDtwNwA2sDjQOUAz0DlQLGARgBlgD1/xz/E/4k/aP8qfzF/HD8Hvxs/Ar9If3E/Lb8O/2s/YL99vx9/FL8pvw8/WD9DP0t/Qj+Nv9EAB8BvwFdAhoDqAPdA8ADcgPmAg4CAAHs/w//Z/6n/cH8Pfz9+xz8jfw1/cD9Hv7G/ur/OgFJAgUDhwPSAzQE+AScBWAFngQbBPYDAwQUBMAD9QIyAvIB6wGrAS8B3ACCAMr/3v5W/ib+2f1W/bH8Ivzb+yj86/w8/dT8Ufxi/CD98/0h/qL9Bf3S/CX9lv2f/Un99/we/b79oP5e/+P/YgAMAcoBXgKyAqsCNwJ2AcMARwD0/1n/Pv4m/Z78t/wD/Rv98Pzw/Hj9dP6N/1EA3gB4ARsCpAIoA8gDUwR5BC0ExAO7AyAEXwQaBJADWQNxA4wDdgNiA1IDHgPCAlEC4QFuAdYAFgBE/3z+8f2i/Vv9/fyM/Dj8GPxd/NL8I/0t/Sr9Wf3M/Vb+r/6o/m7+Wf6n/i3/of/N//b/OQCaABsBmgHrAewBsAFRAfwAqQA2AIr/vv4E/oT9Pv0L/dr8ufzY/D/9xP1S/uT+hP8hAK8AMwHEAVACpQKkAn4CiwK/AsoCbwLuAZQBhgGLAWwBGgHNAJ0AnQCmALUApAB0ABoAz/+z/7z/r/9P/+T+kv5T/l/+S/7u/dH9mf2t/b/9xP2y/cX99P0q/kL+IP4e/lP+yP4h/0f/V/+b/xQAjgDNANAAwQDJANYA1QClAE0A5/+i/4X/h/9//1P/Rv+H/wAAdgDUACcBfAHCAfUBGgI/AkgCMAL3AagBggF1AVEBAgGtAIEAgACDAGoAQAAxAEUAagCJAKwAyQDgAOkA7wADAS4BQwEYAcUAhgCAAHcANwDG/1z/Kv8S//v+v/52/ln+YP5o/lH+PP5Y/oX+mv6i/r/+9/42/2D/dv+F/5z/rP+t/6T/if9Z/wP/pv5l/kz+O/4c/v/9Af4v/oX+3/4+/63/IgCKANgAGgFoAbcB6gHwAdYBxwG5AaMBeQFEAQ0B3gCzAIYAWAA5AB8AEwAHAAUA+f/m/9v/6v8KABMABQD9/wwAJQAuACIACgD8//7/9//Q/6D/ff+A/43/g/9v/2T/bv95/4j/lf+d/7H/yf/k//b/AwANABUAFgAUAAkA6//F/6T/hv9r/1n/Tf9I/0T/Uv94/5P/tP/c/w8ARwByAJkAvgDgAPMA+AD+AAIBAAHyAOUA1gDPAM8AzgDCALEAowCgAJkAgQBlAE8ANQAXAPb/3P/J/77/t/+l/5j/lf+m/7z/xv/D/7j/tf+6/8n/2P/j/+r/6P/l/+P/4//x//v/7//b/9D/xP+0/6L/kP+B/3r/cf9o/1v/U/9g/4P/nP+T/4b/if+a/6//vP+//8L/wf/U/+r/AwAWAB0AKgBKAF8AcQBsAF0AZAB7AH4AXwBCADgAUQBjAFUAOwAsACwAOAA0ACYAEwAJAP///P/z//T/7//v/+b/9P/8//z/+f8DAAgA//8HAP7/5//2//7/9v8RAP7/CQAKABcAEgAgACUAIwAYAPL/4v/N/8H/sf+m/5X/jP+K/4z/mP+w/8v/2//X/83/2f/x//7/8//Z/8r/zP/c/+f/7f/t//f/CQASACAAKQA2ADsARwBJAEkAUgBTAFkAVwBUAE0ASgBJAEcAQAA1ACAAEAAGAPv/8v/k/9j/0//W/+T/8f/5/wgAEAAhACcAIAARAA4ADgANAAYA7//s//P/+v/1//T/AgAMAA4A/v/l/9f/1f/V/8n/tv+j/5b/lP+X/6D/rv+9/7D/nv+S/6T/tP+y/5n/kP+g/7P/xP/M/9D/7f8MABoADgAdAEMAUwBGADEAMgBCAFYAUgBQAFEAVABbAFoAYABmAGIAVABAADcAMwArACIAFAALAAQAAgAIABgAIAAhABgAFAAYABMAEAANAAEA8//n/9D/xv/D/8D/uP+t/7T/uv+6/67/nf+h/63/sv+d/3z/cf+S/7P/uf+x/7v/2//w/wUADAAJAA8AJwAYAPT/4P/c/+z/5P/S/8X/zv/W/97/2f/E/8T/1P/b/9X/1f/e/9//2v/W/8r/2//6/xcALwAwAC8AMwA4ADIATgB0AHIAbQB+AIwAiQBwAH8AjgCEAEkBVQOKBKYDKAFPAJ8EBgl3BC/+dwEYB0EDvvzM/tX/uP1A/Y38vfxe+zj7vf28/OT6UvvW/FD+1vtm+r39JgEOAsz+RPxy/7UEKwWNAHP+hADQAgkBjvzQ/Pn/pgFdAHL96v00AKT/m/7Q/6b/yf6d/3QANwCgANQBfQGT/0P+sP3L/Ab9u/27/Ov7fPxD/LP8I/4U/4cAAQJSAtICdwNQA5UD6ANiBGIE0QMMBN0DMgNsA4MDoQTpBZMFQQKC/xYDdgMOAAAAl//D/dr98f2F/Xv8nPrb+uj74vwZ/r7+g/3y/bUAuwBhAMoBFQIuAkECJgLrAtMCuAE6AXABpQEDAZX/5/+yAMb/cv7/+3j7j/0z/aT6bfk1/ED+lfz++u36hv2a/7f+s/7R/3MAvgHKAeYBOASJBdYDRgKUAgADJwTQA5gBGQBKAI8AOv/7/dr+s/5C/Nv7YfwH/Sf+4f1P/GD8Lv7C/ff9GwBw/+b+tgBGAv8BrgGJAr8DVgT4Aq8BwwLRBP8DUwBw/xgBOAIwAZT9l/zZ/dj/1gBH/Zn6o/xe/yL/xfug+Qf8qv5q/Q78dv1RALgBbAD7/yUBAATbBW4DbAF2A50FMwRUAtwChQM6AiUBuAB//0H+Wf9uAPL+9v0a/lP/HgDC/2j+3v0d/4//QP+T/RX+1wC/ADX/tf+9AScCNABp/9AAZQIUAvX/VwA0AaQADwL7Acj+8v2k/yH/x/zj+7j8Gv5q/jT80/qu/AX+nv0V/uL9Qv7k/+b/VP8wAJEB2gH4ASgBYQBFAcEBvQBp/y7/5P9CAPz+M/5g/lb/9wDv/67+tv8yAeABBwFgAPcANgE4ADIAmwHGARgBIwEOASEBigEaAboAqgABAQMBQADfACcCBgIFAQMBbQJ7Av4A7wBNAbAACQC/AAIAUv4fAJEAFv/j/g3/n/8V//X9M/6v/2H/nf1Y/j7/Zv5p/uj+e/5//q///P9R/97+S/9fACYAeP61/m4AuAATADH/G/9h/5v/NwD7/6L/Zv9O/10A4ADy/3D/QwA2AV8AA/+M/18BUgGY/7j/YABRAHoA5gBSAQ8BZAAWAXABgADXAHABwwCH/4L/CwBBACwACwA2ADIAlP9H/3X/wP++/+3+rv4V////+gCfAAgAWQCIAA0AtgAjAWn/Gv8+AIEARAB+/3z+4/5U/7P+8/0B/Y79Sv9h/oz8S/0R/vb9vP0h/sn+0v5Q/8T//f93AK0BTwOfA+8CKwRdBmIGRQbjBgUHeQfECC8JPwfUBeEGtQg4CCkGRARUA0cDSQMwA67/9PtK/cT+fvxS+Xj46vi8+MH3U/au9ef1Lvaq9rP1j/QA9br0gPQ49I3zHfQn9QT20Pbx9Wj05ve5/Jf8uPth/Gn/DwZZCaUFCAXHCuUPqhF+EV4QPxBuEzUX4hajEk8P4g4QEd0QTwxDB30DOgLYAj0Aovno9TP2gPV+80DzffN/8472ufrn+5H8bv8oArMCWwJNBF0IkAluB8AFWAXWBcMFwAOx/7n6fvej9q70Q+/n6gzq1Oj15tbl2eRW4y7jMel48W3z0fHn9Jf8bwTCClMPCRQTGJgbniAtJOwkbCXSJGchuh3EGpwYIBWWDeYElv9x/DD41/IR7Z/nqeQw5RTmMOUL5Yfox+2F8RX1WvpMAHUG0wwGEs4UUBdZHH8h0CEVH0UeLh7MGoUVuhEBDlEHdf/++KDxQemG4o3c3tXPz1rLvsljyenGPsZ5yrjOdNPC3cPpLfC69DD/NA2GGMshWituMZIz0jfVPVU/LD1GOvg0ky3LJrIgCxk4EGMGBfvv73HoLuQG4CfaP9RP0FbPN9Pv2UHdP95+5KrvWPoQAy4LhhOiHBIlhiopLawuyzD+MjIxnilkIcsbyhUkDiEFA/of74XmCN5g1bXOrMgzwiO/wb9cvwPAMsVPyZzNaNpi6mP2PP7pBTUTDyPgLfQ2BT99QF1A30LyQ2hBVzsjMkIoyx0kE/IKUwFY9NPor94g1dbQqc+ozO7J8cgBy3LQJ9fD3gDoxvAz+VMEjA+/GGEjcS5aNE42xDhhO+47Qjr9NM4rXyIfGpARbghA/ZLxYejW3GrQsMu8yE/Alropukm5ErsAwu3HhssF0e3a3OjU910EjA9dG7UkYy7AOTJATEJPRYBF1UBVPJ44MzK0J9YbahDiBOH5tPAb6J/dGdNFzCfJWchfyfbKtcxi0PjWb9+b6T/0Kv4HCM0SwR15J5UvCzbyOos+fz/gPCk4dTP5LeclwhugEJ0EQfln8FzoZ98k1SnLC8RHwPu+9L5fvoq+f8HDx8/Q3df+3ULo6vLT/WwPdR7MIyMqyDO6OhBB0UW6RJ5Bcz5NOPUxaSsrIJwT2QcC+l3t0eWo3rbTlMkyxOzB2cHNxa7K5c3j0+vb8ONT74j8PQeYEeQc2yYZL+g14DokPw5Byz1BORk1OC9wJ/EeaRTsB5/8EPNA6d7ezNaz0JPJycJtv72+tb5zv1vC5cZ1zePWU+D16Gbxm/dOAJ4RpiPiLkc02DY/Oko/NkNYRY9DVjwNNf4txCOBGYkOWQCp8wLpJN5q1l/QDMnwwlLAYcHHxu3Nr9TF3EPl5ey99g4C+AycGPUiaSohMAU0CTevO+U+QTyWNugwWyo1In4ZsA8PBBD4ce5p5j7dItXVz83KG8amxHPFksVmxgnKM8/N1aXeIuhs8CP3/v3CBiQQGxkYJGIv9zUtOBg5MTk9OSc5eTdhM5oquR6YFMsL6QBQ9Y3q/t9516bQgspNxsfE0cX+yV7PdtT32rTjae1f92EA5wirEgUcVyPIKYUucTFkNB83PDg+N/EyISySJQQeshQSDJIDLPqB8Z/oW9761c7Qr8xsykTKMsm7xz/Jys3c0vfX0N6D5jvtBPRT/IQEUwtxE6seninHLyEzqzYIOk46/jfDNts0yy3pIz0Z3AwGASL3Du7D5czcntKKzKDLQ8oQyfbLCdJM2X3hPukr8Vf6XwSJDwEavyDqJfMsRDImM9QyVjKEMCkudirPIwIcABT6CoIBr/m28m3qU+EJ2dbS7M+jzh3NAc0rz1DS6tZG3M/fnOPC6tXyVPlD/2cEqgiPDYwRAhUeGcAcgSIPK0Iuryq0J7ElJyIpHbcWbxCXCk8DR/qC8Xrp3+Gd21vX1tRO1czYf9vz3PPhXemD75j3+ACuB1sPOBjpHL8gzyYbKv8qSCxOK70nwCTZICcbYBVjDqEGngBd+U3wtOnh5I7gDt7r2sbXy9fY1+XWzdhD3FHgXugp8NbzYveW+kv81//0A1IGUwp6D20RjQ/ZDvwRUxajGD0aZRw3Htwe5h1xGjUV7hAWDTkI4QHC+DPw4eoP5Y3ge9/H3SzeJuPS5mLph+9J9tj84AT6C08SMRksH7MjhCbaJrMmpiYAJNkeLhleFLoQjgzOBLr7VfUo8L/q0ubb4oveYtxx3NXd6N9n4Irg7OPT6EjsAPDW9Ab5Cf2VAfUDRASTBGgEjASEBdgFZAUaB8wIVwhdC2wPIA6nDIcQVxL8D5kOSgysCDkHNAPT+8r2/vG07FjqW+gj5mzo2O2m8uT36PsN/skCfQlDD4EVNBtCHjYhUCRbI+cg2R9AHNYXTBb/EqgMcwh5BBn9MfZD8CfqXOY75G3hBOFB40PkY+Ud6Kbq5e0E89n3PfyZADMDCQYVCfwImgYuBJwBzf8S//P9bfyn+nb42fZw9Tz0Y/PH9Hf7wwI1BUkIZw4HEbwQwBDtD+kOJQ6nDDwLrQiTAoj9uvrE9j7zZfTy9kT3yfja/FEB8wXjCbsLMQ3hDwUSERQpFzcYyxanFeET4g7UCIwDVf80++X1GPFX7UXqVOeK5YXk9uS26Cnu6fLG97n8UgDoA/YHkAnWCEAIYAcgBTIC8f7Q+2X56/Wp8Zrvn+6Y7FHrE+s46pjpIOpy7YLz5fjP/akGXBC6FUAZoR3uH+cf4CBGIpUg9hwYGlkWhA9LBrH97fcZ9PrwXPCN8aby5fSf91D5qvuK/msBxgYGDB0OwA9rEiYT0hAQDesJ9gekBc4CzwDf/rv76vgq9w705/BX8TH0ufYH+L74SPrL+3j7Gfq1+bL66/uA+1/6gvk/95v03vM882vxnPAh8Q3yh/Ia8pTx0/Eb8oXy6PSZ+QYBrwmQEqQbjyPhJuwnzSkcKEki8R4vHYMXnw5qBhv+1fNg6gDkaN8p3Q7gP+Ym7Jfxhvb2+k4AnwV1CaUNTRMZGJoa+RpJGSoWXxJ7DRoHBgHp+1n3HvRN8t3v/OwA7LrtsfA38wD2hPooAMsDjwOlApQCwgCT/mL+hfwM+JX1zPRr8gjvaOvb6Mvp4Os+7Ojtc/G688D1iPlU/SwCCgneD6cXHiDaI0EluigxKWMkJiD1HeQZzxKmCrcCCfvU8lfrv+bb5GHkiuZY7DnyRvYk+2sBaAaiCS4MeA56EPgQrA9qDiENPApmBn8CF/7c+cX2cPWy9EPzRPLl84r3O/ly+Tb7uf04/6AAYwFjAfQB/QGEACcA0/7v+uD39vWG8v/u0ezd6ijqOurp6Ujr5O1n7wzy9vUQ+CH6QP4fA4cJZxFqGC0foyRuJl8mtyVjI78f7RpLFNoM4AUs/kf2OvAS673lfONd5EPmienI7vP1E/6DBOQIBA1mD8YP+hC3EpISRhEUEAMOigoCBoIBz/7R/PH4ZPUG9NvyuPDx7g/u1O4Z8aPzMPdX+0T+jgCLAloDrgICAcD/bv/g/Vz7wPm0+PL2rfQb8nPv4O1u7RjuF/An8lj0h/iq/a4BeAVOCXYNoRRNHb8h7CIvI/ghzR//HKUXpxHNDe8IUAEr+qrzk+zB56XluOTr5eno3+wK8wH6Hv8WBGMKXw8UEkQT0hL2EbkREREUD2IMbQk+BoYCqv5U+kj1zPGX8OnuCewy6iPqe+uC7fbun/A/84313/f4+k39J/8HAvYEAgeBCKkIQgcRBvwDlv/n+6D5ifaU9CL0KfPd8l/zkPOY9M/2gPgq+xoADAXgCJYNZBOcGHccIx7dHeMcERqXFPEOGQroBF4ASf1h+v73p/bH9fP1tfcn+nP9zAHfBbMI9gpJDbcO8A5aDnsNTQxtCf4ELwHB/bH5H/aP80bxeu+j7izu3u3U7eftI+4f7iXu5e6S8LvyXPWO+K77wf7yATkEbgXCBvoHkghTCcsJXQldCUsJVQcSBdcDjQEG/rz6afig99H3MveR9qj3DPmk+f35Jfsh/Vv/wwHsBKEHLAlwC0wObQ/RDh8Odg3jDKAMBAz7ClwKzAnbCPQHDwdgBccDvwI4AUz/Df5G/Uv8E/sU+mj6+/tG/eD9BP84ADsAmf8U/5n+w/0f/G76Tfgr9bTxBu887TjsnOuB6xPt1e928kn17fg//KH/QQSfCG4LxQ39D74R5xJOEiUQbA61DI8JrwWyAWv9tPmW9g/0AvN08pDxWfKV9NX18/aA+bD8y/89A5AGcQkrDDcOjw8DEZ0RmhBRD1sOigwTCvAH5gXrA18CWAGyAPb/4f5f/i7+P/1W/F/8pfwB/dr9bP5q/nX+sP6V/g7+cf0J/af8zfvO+vr5GPkn+JL3d/dx9/v2R/Y/9sT22/bW9mn3lfjb+SH7a/xr/R7+MP+RAKEBpwIRBIwF7gYMCHQIiwirCLYIsAj7CDAJighvBzMGcwRsAsIAhP+w/gr+T/3V/PL8z/w8/M77s/vI+338JP5YAIACiARgBrYHeQjECI4I+AdiB9MGCgYHBfUDyQJeAQcAIP9U/mj9Dv1g/aX9Qv2C/Bj8JvzN+w/74Poo+xH7tfqJ+kH6mfmu+Mr3nPcg+Lz4ffmq+vD7Pf21/tP/jgBaAe0BAgIrApQC1QItA+cDkQTeBB8FcwWZBU0FqgTXA7sCdAFcAL3/ff9w/6H//v9xAKwAmQB1AKcABQF2AUwChwPSBAkG9gYwB6oGkAVIBAcD0wGPACL/6f0Z/Wr8ofv0+oz6ivrq+on7N/zR/Hv9d/6m/4AAsQB5ABEATf/2/Sj8RvqU+Ez3sPaH9pT26PbU90z54fog/Br9N/6r/y4BjALsA1cFhgZJB7sHyAdWB3kGqAUbBc8EuwTNBOcE3gSgBAoEDwP7ASkBpgBhADoAIAD4/8D/gf9V/xz/6v4W/87/0gDWAcQCvQN9BL4EcwTuAxID1gFZAOb+o/1//Iz71fpc+mv6kvrA+m379Puv/Kb98/4GALQAIwGAAXEByQDR/4v+IP2L+wX6rPis9yf3Pvfr99z4vPmd+pn7qPyb/W3+P/8rACoBRAKDA8AEdwWYBbIF/wUNBsEF0QVrBg8HbQe1B44HrQY+BcwDjwJaATYASf/C/mL+7f2K/Tz9FP00/ZX9Wf58/9YAIAJNAzwEiAQkBDYDDALRAHz/Mf5A/cP8nvyS/K/88fwk/V79wf0o/nL+xf5Z/wEAbABaAO3/Wf95/l/9Nfwt+276GvpH+tX6l/tx/Hn9lP6D/xMAaAClAMsA3QDNAJ4AZAAfAMz/U//S/mP+Lf5F/rH+Y/9rAO0BvAN9BcEGewe4B5QHFwdfBqEF/ARNBIYDpgKrAZIAgf/F/nD+S/5B/nn+4P5N/6//7f/3/7//Vv/T/kn+4P28/dH9M/73/uf/nQADAWoB9QGFAvECUQO6A/8D+gN7A2YCxADL/sT89fpS+ej37fZ+9pL2C/fb9/j4RPqV+938J/5S/0QADwHcAaoCagPlA/IDjwPjAgUCFgEsAFL/zP65/hv/pv8KAE4AkADYAAsBSQHFAZUCmAOQBEwFsQW6BX4FCQWLBOkDFANOAp0BEgGKAPj/Vv+d/tH9//w//Jr7Ivv8+kH73vue/Ej97/2q/mr/IgDCAFAB0AFPAs0CMwN1A34DTQPXAiQCHAHQ/3P+P/10/AX8xvuj+6/72fv7+wL8+/v5+xX8XPzX/Jj9jP6V/6QAsAGGAu8C/ALdAsYCuwKvAqMCowKzAsYCugJ0Av0BggEzASYBYAHYAXYCGgOuAwwECQScA/sCTQLLAVoBBAGeAEEA+f+X//X+D/5L/cD8TPxC/Fz8hvwT/WH9uv3c/ez9yf2k/Yf9k/3O/Sf+sf5Q/wUAlgD4AAsB5QCUACcAqf8f/5j+Kv7l/bz9k/1p/Uz9XP2K/dj9Pv7P/pf/jACWAYcCNgNyA0wD4gJYArwBFwF2APL/lv9Q/xr/0v51/iz+IP5h/uX+sv+3AO4BOgNhBDYFogWyBXgFEgV5BLkD7AI5AqwBMgHNAGUAGADl/9b/0f/D/7z/qv+E/z3/zf49/pz9Cf2K/Cr88vvy+zr8w/x//V7+Uv8yANsARQF6AXoBRQH1AHMA1f8R/0T+gP3I/Dr83vvF+wH8h/xR/TX+Pf9IAEYBHgLMAkoDiAOcA4cDWgMBA4IC4wE7AYoAzv8g/6H+W/5A/k/+kv78/oD/EACpADABlQHmAR4CMgIbAvEBwwGFATYB4QCOAD4A+v/O/7z/wP/e/wgAOgBlAHoAYgAJAIX/8v5g/tX9af03/Sv9Rf2F/d39M/5y/qr+6P4p/1//m//Z/xcAOAA8ACEA5/+V/0L/A//O/rj+vv7n/in/eP/c/z8AowD5ADoBbgGPAacBtAHBAcEBswGTAWwBPgEOAesA1gDKALsAtwDBAMcAyQDHAM0A4QD4ABMBKwE+AUMBOQEfAfEAuwCBAEkAGQD7//H/9v8MACcAQgBNAEEAJADx/6j/XP8W/9T+kv5f/jD+//3Y/bn9sv25/dX9Bf5I/p/+9f5D/4D/qf+1/5v/bf87/w7/6/7d/vH+If9s/8H/LQCIAOQAKQFdAXgBhQGRAYoBdwFJARkB1gCDAC8A5/+q/4H/cf9q/2f/dv+Q/73/7P8nAG4AwAADAUIBZwGFAaYBtQGrAX4BTQEFAZ0AaAAvAPP/CQADACgAMwBDADoAJQDw/6n/ZP8S/9f+nf5v/kP+J/4R/gr+Ef4k/kz+k/7x/lL/r////zkAVQBUADwACgDO/5//iv+B/37/gv+R/6f/uv/Z/wIAMwBwAK4A4AADARsBFgEDAeUAwACbAIIAcwBeAFMASgA0ABMA9v/a/8n/zP/a//v/KABiAJoAvgDeAPwAIAE9AT4BIQHxALoAgQBJAB0AFwA4AGIAfgB6AG8AZgBHAAwA3/++/4j/Pv8A/9f+pv5m/i7+Gv4b/iT+TP58/qr+5/5H/4z/g/9g/2//j/93/yr/8P7h/vH++f4b/2D/2/+EADUBogGHAR4B5QAQAfwAdQAoAGIAjQBIAPf/6f8gAF0AbwCPAMQA6QABAQYBvgA7ABgAPgArAAoAOwCMAMcAAgEgAesAmQBfACoA8v/N/8b/0P/w/wMA9P/5/zMAZwBXADMALgAnAAQAwP9W/wD/4/66/n7+l/7d/g//WP/n/zoAMwAwACgA8//D/6n/ef9Q/y//Ff/9/uz+8v4r/6P/JABlADUA5v/S/+z/9f8FADMATAA5APb/iv81/zv/ev+3//f/KQBDAFgAcQCnAAQBZgGQAXsBWwFNAUcBOgEpATQBRQEgAaQAIADo//3/DwDq/8z/uP+e/6T/3v/2/9b/6f8gABcAzv+E/1D/Of8u/xf/Ef8O/8f+iv60/u/+6P7k/mP/VAAKAR0BzAClAKQAZQDp/17/I/8U///+yv62/r3++/5D/1T/a/+h/9//6v/6//r/7f/5//v/5//s/xUANQBEAGIAmgCsALEABgFZAVQBIwFAAV8BcgFZAR0B6QC3AFwA3/+L/4H/SP/6/u7+7f5M/2b/gv+v/zMAiQCtAMQA5QAiARkB6wCvAIoAbQBLABoA7//F/4z/S/83/zr/MP8a/yb/Wf9x/1b/Zf+v/+D/yP+R/4X/gv9T/y//W/+Y/5//tv///yYALQD0/8X/4f/y/9r/u/+Z/2b/Tf+L//H/TwBjAEMArAA6AVEBBgG9AMUABQHWAFEASQCHAH8AWABMABgA4//H/9L/yP95/0D/iv8GAN7/iP/K/zQAIgDi/+7/IAAlAB8APwBAABkABgATAEwAVAAIAP7/IwAhAOf/tv+4/87/zf+6/9L/DgA7ACUA+f8KAAYAxP9q/0b/M/8J//D+KP+N/6X/nP+9//b/8f/Z//T/GgATAP//DgAoADEAPABdAIsAqACdAI0AgwBlAD0ALAA9ADAABgDx/+v/2f/N/83/xv/O/+b//P8BAOv/5//w/wUAJAAmACEAJQApACcAHQAOAAUAAQADAA4AGgAXAA0A/f/s/9v/1//r//H/3f/k//T/8P/f/9z/9/8GAAsAEwAaAA0A/f8CAAgA/v/y/wMAFgASAPn/1//F/73/vP/O/9r/2//e/+X/8v/3//z/EAAtAEUAWABYAE0AQgA0ABoA+//r/+L/2P/C/67/sP/B/83/3//9/x0AMQA7AD4AMQAUAPr/5f/J/8L/y//X/+f/7//z/+r/2v/c/+T/6v/5/x4AQwBUAFcATAA1ABgAAADm/9X/y//I/8b/wf++/8H/wf/O/9j/4v/l/+v/9v8IAA8AEgAbABkAEwAMAAcAAgD+//z/DQAiADAAMgA0ADEAKQAXABIAEAAWABgAHQAAAOX/1P/E/63/lv+q/8L/0/8SACkAKABYAEQATwA8ADUAIwAUAPj/2f/I/7b/uf/D/9T/2f/o//H/9//9/wAABwAOABIAGwAXABAACgD///P/6v/l/+L/5P/k/+f/8/8DABgAIwAgACQAJAAeAAsA8v/a/9H/xv+1/7X/uP/H/93/8f8GACAALgA0AEEAUAA/ABIADQAVAAwAAQDr/+j/4f/S/9b/1//o//D/9v/7/wMADwAYABsAGgAnACsAJwAhABkACgD4/+f/8//n/9v/6f/t//D/8P/5//3/+P/8/wgA///+//3/9//0//L/9P/1/+3/7v/8//r/8P/0/+z/6P/j/+X/3f/m//X/9//3/wIAHAAmACMAGgAfABcAEgASAAsACwAMABAACQD7//v/8P/o/+L/2v/b/+T/7//q//P/AwAAAPT/8v/5//b/8f/v/+3/9f/9/wUAEQAPABEAGwATAAwA+v/4//f/7f/x//P//P8CAP7/AwAFAAIA/v8DAP3/+v/7//v/9//3//j/9v/v/+3/8//p/+L/4v/s//L/9v/8//7/BAAFAAcAAAAFAAgACwAQAA8AFAAMAAcACgALAAUAAAD6//L/8//5/wAAAQAEAAgACAAJAAUABAD///X/8P/u/+3/5f/k/+f/5v/o/+z/9v8DAAkADQAOAAUAAQABAPz/+/8CAAEA+P/6//r/9//3//f/BQALABMADwAJAP//8//r/+L/4f/k/+//+f8DAA4AFwAVAA8ACwD7/+j/5v/W/+L/6//y//v/CwAcABwAGQALAAMA9//r/+X/5//r//7/CwAPAAsABwABAPj/3v/W/+D/8f/2/wgABAD+/wEAAAD6//L/BAACAPv/GgAaAA0AJQD//wcA+P/x/+z/5f/g/9//4//e/+v/7P/u//T//f/9//v/9f/7/wYABQD+/wgACgAIAAUA/f/+/wMAAAACAAcACgALABAADAACAPz//P/6//j/9v/8/wgACwAOAAsAAgABAPz//f/2//r////2//X/+v8DAAYABAD2//7/BQAJAAsA+P/7//3/9P/5//f/+P/w//T/+P/r/93/5P/t/+b/4//n//P/+v/5//v/AgAGAAUABQAAAP3/AwALAA0ACAANAAsAAAD0/+v/5v/j/+T/6//z//f/EgAgABsAHQAfACEAGQALAAYAAwAFAPb/8v/2//P/8f/z//3/8//4////CAAGAA0AGAAZABYADQAFAAEA/P8AAPP/8f/6//f/+v/2//L/9P/x//H/7//u//D/8v/7/wIABAD8/wEACgAEAAYAAgD8//3/+f/1/+n/8f/4//P/8P/s//X//f/0//j/+v/7//z/AAABAP7/BAALAAYAAwACAAAA9f/y//3///8DAAIACQAOAAgAAQD7//b/7//y//H/+P///wUADQAKAAwABgACAAIABAACAAAAAAD//wIABgAGAAUAAwAAAP3/+P/y//P/8//v/+//8v/z/+z/7f/v/+7/7f/p/+v/7f/u//P/+f/0//X//P/4//b/+v/4//X//f/9//v/AAD//wEA+v/4//H/7P/r/+z/8v/2//z//v8DAAgACQAJAA4ACwAEAP7/9P/t/+//5f/w//3//v/+/wUAEAAIAAMA/f////3/+P/5//r/9//6//v//v/7//n/+v/7/+n/5v/m/+3/6v/0/+//7v/y//v/AQABAA4ACQD7/w8ADgD9/w8A8v/2//H/8//r/+7/7f/0//H/8v/8//r//f8CAAwABAD+//r//P/8//f/8v/0//D/6v/p/+z/8v/3//z/BAAGAAoABQAAAPr/9v/3//T/9P/z//T/9v/3//j/+v/8////BQABAAAAAgABAAEA+P/x//D/8f/t/+X/2v/f/+j/7v/2//b/+/8CAP//+f/u/+n/4P/h/9f/0v/S/9r/5//q//T/+P8AAA0AEQAQAAkABQAFAAIA+//2//X/+f/9//7/BAADAAMABAABAP//+P/7/wEAAwAEABEAFgATABQACQAEAAIA8P/q/+n/7v/r//D/+v/2//n//f8GAAMAAgABAAcABQADAAMA/v/8//b/8f/z//L/9//5//z/AgAFAAMAAQD+//r/9//z//T/7//u/+7/7//3//j/9f/1//v/9v/x//D/7//v/+v/7v/v//L/9//3//7//v8DAAoACAAGAAUABAAFAAYAAwAEAAMABQAHAAUAAgAEAPv/9v/3//P/+/8AAAUACAALAAwADAANAAsACQABAAYAAwD//wEAAwAJAAAA+f/5//v/9//t/+b/5v/o//D/9f/6/wEACAAQAA8ACQACAPr/7v/n/+H/4f/q//L/AwAKAAcADQAXABEABwACAAMA///0//L/9f/y//3/BAABAAEABAALAA4AAwAGAAYACAALABAAEAAHAAYACAADAPT/9f/7/wAA/f8HAA8ADwASABAACgAHAAIA+f/7//X/9v///wEAAwARABEAEQAMAAYAAgD9//T/9P/9/wgAAQD3/wAAEQAIAPX/9f8IAA8A/P/u//r/AQAIAA0AEgAIAP7/AQAOAAMA9//1//7/BwAYABUABwAXACsABADX/8j/6/8BAOf/4P/t/+T/1f/B/97/6f/U/+z/DQDr/wkAKgAYADMAXACSACkADwBnAO//DABAABAANgC0/4j/jv9I/1X/6v+SAIH+r/8yBWsH1QAZ+4IBHQJA/3AD5AI5AQIBvv7O+/j+CACp+2j+4/45+WH67gFQBZoJ5g9HBV4AEQoQAXD7gAGBAfgBTAD2+z34w/f3+5j9gPs9/bj8zfgx+qL+EwAjAZgJ5gSs/woEWAVNBdYCTAfOApT7SwAa/Db2ef1a/K738/+5/Tb7JAO//bD86AXkBX4EIwQYBcgFwwThBccDqgENAlAB0f9o/jIA0AAv/5QBmQKQABIBTQB6/jL/Hv79+0D7QvzD+ob6Vv7s/yoA2ADcAIf/u/8gAFIBPwMJBHUEhAPZA6gD9P+E/w3/afzE/AX7sflU/Dv9RP2h/w8Axv68/qj/1v82AIgBEwCVAJoBSACMANH/8v9LAWP/l/7A/mz+q/6O/sf9v/1l/6b/2AA1Ai0CuwEXATECZAOGARQCGwIW/0j/CQH6/qz77/4B/9D95f7p/2H/P/8fAycBPP9WAk0CkAHOAR0CAAPoAkoDcAPlAgcBvf8nAOwArwBFAYwCnf7K/af/N/2k/Cj+k/9A/z/9sv0s/cL+4ABo/5P+GQFbANv8Z//y/y3/xQFQAwgBkv6OAPr9e/0b/xT+Kv/LAEAAkP06/XT/sv6P/GH+Cf6f+/X72/4R/1L+vAGD/+P/pwFAALYBYgOOBLoDQgRMBPID5QPyAa8AGADcAfcA6P5uAEMAGP99AMv/E/8jABv/9fz5+07+Mfxx+ob+oP1r/Aj+n/y6/XMB2gF3AdwAQwGJ/9z+hAFF/sb8S/5A/Ub8r/ql++/7dfzI/n7/CwFaA1EGBAhoCiMM8wsxDGIMnwv7C6ALVQmiCDsH1wVsBTYCm/8d/jX7Ufow+Sj3hPZW9sz1gfU39vf2PfnL+Sb4/PiH+DP37/gX+NT2xfcy98f1yfR09BH0efMt8UPwe+6X7ObuUe7E72f6yQVXDtgSghK7FRUb9RuUHYQiCCawJ7ImfCHdGPUQGg4kCa8CIAAY+2H2A/TZ7ZrnCeel6Gjpces87w7zdvZI/BsDzwYJDKURKxPnFc8WPBTwE3ITChDbC5MINwYmA9P8avd380Ltm+kp54bk9OOp4dnedNxW2VXZcNxU2uvXqtyO4x3vufy+AoYJgxQsHI4gpSJVJiIsBDB1MGgpiB+nGvwT/gmMAdD4n/FH7p7ned3P2hjbZ9qX3GjfSuTg64nzPPmg/7MISRHkF5kfPSeVKhwreypHKfgk0x52G4Maohc/E90MBQZfADX7afj99sT16/O78kDuFeht5TLjBuJq5T7oyebE5b/mnuXb4hPlI+ja6OHoAuec5Jfjo+g687H8EwNaCzcTpBdfG3ogTCo2Nvo7oDZCMNMrjiFnGGcUwQ0iBYT9y/BJ4iXZ99Qb1oTXTdiZ29nezN+84hbqGfJw/jcMghO/FgUctiJvJq4mSCjWLAQtqyjmI7QdYxjQFI4OpgZL/vT53ffh8g7tAekc6J/lNeI74QLjyuX46mftb+nf6Pjqoutn7THvRe9f7rXtx+zw5cXkTvGs+yABhAZNB3UEGAp8E30Vkh2BKNErtSfAHHkT/Q5/DOEKogdW/+z4XfI96N3f9tmR3VbkR+Qp5QnprOzE88j9cgeYDm0ZdiWgKSkquiqiLD8veS8dK3Ykux7KF0kPxQa3/wX9UPpU9fLsduWp5lTobOZJ6H3tPPB6763tre1I8OL2ffvM/Wr+Lv28/bz87fkm+CH5h/YF7+DoYuOn4RDkiOX25wzrd+8g8zr3GP8GCrIX1x78IdcnhSZxIOchjCDbGiwa4RWACtYA8PXQ7GnrA+v76GzoY+i/5gDp/e069ZL9iQVwDZER8xPzFj4aGhwXHbEe/x7nG4gWIxAACuQF7wLs/XP5d/fj9PXxwO9p74XxoPFq8o3zr/JT9ST4a/py/d3+SP8jAW8Ajv8tAoMCoACi/iL7ivRH7bbozeYB5OTig+RK51bnTufL61/vF/WOAHgKmREXGO0cpSPqJDchmiOSIxQcaxePFMoL+QEO/t75KfJq71Tumuwj7n3vcvF+9Kv3R/wfAZsHgQ59EYgUBxhsGA4YBhgKGbcYMhRyDysNGAoQBrkC/QErAoQAmPzh9zH0v/Gk8nP1v/Wm9bj56PnG9T/23ftWAHEB7gCBAOX9d/pK+Lzwh+u471P1pPDi4SfZmNon3dnc0tvO33/rTvTY9Wv2M/oyBXwT5xuAHCQfsSO6I3Ue2Rm0GdAavhk3E8QKRgRjAU8A4fsU94D3JPlx+VD24/P1+E//5QK5BEwEkgRqCBsNXQ4bDMMOUBQDFVcSBxFWEqcSLRHJDfAImAScAHn8Ofga9C7x6u9L7rzqW+jT6YbuTvFH9Pv4XPte/m4CAgTdBKAHKwncB2oEMADW+n33TvaE9KDyfOxi53nndObd4p7h9+fT7evr4+zw8z73k/m4AqQMrRFeFeUbtSBgHpkbnBwNH7odzRdcFHoS7g1BCBoEfAKWAC0AJAK4AcL+Cv0sAHkCmP/L/RYB4QIVAu8BjgC4AYgFhwbkBR8GEwPEAKwB4/7U+5X9HP5b/Hn7cfkG+Pb5j/3w/hj/l//h/4D+yPqX+WP8Yv2Z+iz3Y/Rl8hT0ifWX8N3r1ey08KHv4+Y24jXm/egE6KDl5uSB6yH06fby93z+bwjfErsaZBt0HzsqNS/dL/suAin3IzEj2xtmDzYJfQVQATz9/fh48y7vVPAl8Bvu6+6z8cb1vfmm/CMBFgjEDVYQBBM3FOsR0BJ7FL4PUQsQC0EHPP96+yn73PhU9yX4SPbK83/z6vKI8S7ygfUi+Nn54ff79X35gPvq+L32IfcR9S7veev56RnmruCJ3tbcAdmX2lTeQd8W5KPuvvlGAwMLmRLyHWgoQy53MXEzVjW3MqIs+SdPIYIZYRVFD3QDcPkI9UfyzOzf5yXoNerZ6mLq6+u68cj4egAmBoMIYwtkD9sSuRTgFKIUmBf+FrkRGg2FCCAGygXwAfv9S/78/hP9zPc68wbxVfF78wb0n/AO7q3ul+096tLoXe1y8gLy3e2m6rXqIOce5O3mIufQ5HblWumg5triwe1x/Q8G/RDuHpEmQyoqLN0rvirtK7Qt/ixDKU0fPBahDwgEb/mG9jz0iu/W7VLu1+xl6SPq/+7i8qj4ev8mA7EG7gvBDzIRPRLmFTkbQx1PGewSjBHgEZ4OagofCFcFRgMdAlr80PRp8RDyLvDK7Jbq6+cF5yTnVOQr4cHh0+TH55zouebY5Prm8unI6jrs7+7870Puw+2Z7ffsBfEr9678fQYLEGYUnhsJJRAqkS1zM181BTDVLBcpVx/nF/cTPwscAZL4lfDW6mjlDePD5V/ozepp7l3xgfSv93f/kwjBDG0RqReVGoYYFRdqGWocyx0PHRcZpBNbELsMJwcJAkz+7v2p+0D0Le7g6oXqtOnV5ifm2+Rj4gzgV9wf2hHceuGf5G/k7uZO6grrcOxn7ljwqfJ19JH1sPMF8ir1nvw/Aw8JLhJeHGgjmydvLI4t0i3FMmw0li4wJ5YiOh2KFdgM8QNd/fv3t/HW7E7oLeSF5tPpbui16BLuWPXY+HP8AAI4BsgL3hGLE/wUIxovHqYftB0NHUQfkh6nGXgS4g7LC2cFV/9h9yjvCOtU52vgA9g802zTK9N1zs/JScw61JDZdtoW3Enhn+jq79rz8PNt94L+FAPpA8cApv+8BTAM8QpzCooRMRn5HY8grCGkIdUjKSbKI6IhCSFJHcQZuhfbDqoGsQUqAQz3qPJp83DwAe8I8Ojuh/A89Tn2WPeB/H8CHAevCmwNOQ4PEXsW2xguGt8chRxLHMAbABbuDkYMVwkSAu77mfZn793oiuOT3ebYGddg1qDVE9RM0lPVwtzh4Qzl3OsH9PL3afrL/e7/aQJwBBMDRQHRAH//afyf/fAAMgAYAP4D0wZVBxwKOw6xEQAV7BYaFy0X9hZXFrEWuxWzEqYR7hF8DxcM0AraCYQIdgcZBQACAgHPAC3+B/06/5AByAEvAfYBIgPZBE4GDgc7CHAIWgd/BxAG0AESAA8BWP9s+bj0vPHi7YPpAeed5grmpuax6DfpHOkR6zHw+/UM+LH3cPnk/K79dPxq+zf7gPsy+hn4tvW286fzS/XB9c721/lM+zb8o/7SAAUCIwXvCQAMvQyaDx8REBCdEF8ShBPmEwcUzRQWFhYWbhXRFbgV1hNLEtARQRDGDucNFw2nDBELNghgBhwFpwKeASYDXAOwALf+Qf7p+8P4iPZd9Qz1+PON8O/rLepq6TTnxub06NHqs+zQ7rbv5PC885j2yvjZ+WH5jvmY+o/6ePlc+cj6aPw//S/9+Py9/eP//gHxAtADbgTsA7QDLAToA3cC1QKRBKEERgSsA80CrgKqAgcDHQV8B/EIDgvmDfMOqA5sD44Q/RDuEAMRlxHQEAcPZQ5QDoUM/gmeCdQJwwfQBMkDpQMaAiT/Kf26/EP8XfoY+Mb2GvX78tDxofDi7jXv9vCM8W7xYvFp8WPytPPv89XzsfMg89XyLvOd8s3xOPN39V72Zfdd+eL66vta/ioB4gHJAdYCBQQyBHYEOgWFBuEHQgezBe8FQAXyAlAD8gV/BxwJAwwQDm4O/g56EIcS8xPeE+oTHhTMEfwNBgxGDDQM4QpLCSAIVQcJBo0EpgOfAhIBGABA/9r97/tK+n74nfYP9eLz3PIu8dbv+e/+8MLwae907/7wY/KB8l/yG/KB8XfxcvJv9En1e/Qz9b33O/m7+b/6jP3GAGYBIwHaAgQDswDo/2wAgAC9/2T/wf9+AB0A4v4rAIUBcQD/ABgF/wcyCSwKVQyEDzcR3RBPDroPOhSoE6EQMA/CDo0P/Q5tDGAJdAlgCwEJJQdUBq8F9wWdBCQDjQHcAf8CjQCO/Rb9gPoh9hb2r/Rk7F/kWecg8afwsObC5YLu4/LD7OTmfe1/9bX0xfF69Fv7UP1y/KP/jAKlAgj/efxnAIIDEQKC/0z+vf1A/M75ifku/Jn/YwJnAuMBqwO+BBcGVQllDNQNrA1TDW4MjQt5C1EN/Q7HDoUNVwxMC0EK8gncCfsKEwwXC1EIPgZWBTUErQKpAfsBowErAKj+bP7e/XD8VPzW/Lv7sve59Av1JvZd9ITvne6671vuCeuv6Jzrlu+h8A/xBPLs83v2UPdk+Pn6n/6sASACOgLJAu4DvASbA4gAKv99AJAAY/5x/Dz9r/9NAGb+o/7aAeQDcgPoA24GKwkdCsIKvQxuDuMOJA9mEGQQXg/ODT4NgA0uDEAKLgnOCHgH+QUOBKoBOwC3/mf94fxj/B78vfsY+z37hvth/GP8JvoZ9yL2TfjW+ab3B/SE84j1DfYW9KnyKPRA92T3V/VR9ab2avfg98/4c/lE+oz6gvu2/4cD9QLIALEAegJmA3MBmv8CARsD+wIZAvcCgAOiAuQC2AScB1QI3AfYCdUNqQ+XDJkJ0wpZDT4NNgvICasJaAr2CBoFXgLJAeYBNwFT/jT66vf89m32ZvaS9kj2ifYt9232X/Ug9qj4v/nM+XH7V/ut+if9gP6p/Rf+8///AV8C9P/7/bH/iAKUAn8AaAA1AlAC/gALANcA+gEAATX+Pfxd/RP/D/7O+4b7+PxL/uD9wP26/y0BXwB6/jj/wACgAVMDkwVaBmMFcgaoByEIqAm5CugKDwtKClcHMAOcAcsBbP4t+V/2EvWq8qbuzutE7lXzOfUQ9Nj16PxPArIDmwXeCYEN4A7RD80QfBGpEFEOJQ22DLkJ1AOe/s386fsv+fr2HveE9wz2YvMj8lTzgvSN9GH2d/pa/jX/cf68AFgFJwi8BwwIpQsXDokLSgfOBb8FbgMt/jr58vbW9JPw0uug6dHpsOke6W/pJ+py65btPe+98Oj0tPyABUIM1A/OEnYXThuRG04cwCFfKCMq1CajIs4fcxvYE4MMLghgBZIAefjo71fp1OOy3jzc7tyQ3qvfx+Be44DncOz68hj8eAbHDvUTIhjuHPog3iIVJFMm6ScWJv4f1BdcEJoJGwLk+j/17O8F6abgANn2077QCs5dzJbMvs11znHOr84s0ZHWXt9s7F77mgeuD0kXHyHGKy40XzpUQP5FvUfsQh86xDEgKvsgMxboC4ADcvyg9A/s+OTR3x7cidrp2k3cMt4n4fblaOzD8vj3aP3TBD0NQxRaGUweKCRlKVAr/imsJz8l6iC5GYARhAosBLT84/NV6zzkCN7w12PSUM6sy//JeMnYyUbK/MrxzJzPUdK01u/fwu0E+xcDZwgHERIdoCf3LmM23D+lR19JdUXXPhA3Mi5BJEMayxBUB1b9D/Pk6Ivfvtdo0grQudDv0yDYXNyD4ZnoPPEz+jsDyw17GUQjSSmYLZwyhzceOkI5zjXlMEYqDCETFrsLTANt/I31Oe3v4/DaF9OBzG/HnsSVxM7G+siLyGvG7cU7yHzMl9I33LjqiPpoBWELnRJlHZQokDHYOLI/WUUMRkhA8DZwLfAkyBxRE70Ix/4L9S3q997m1b/Qkc9e0bzUh9gC3IXfGuSu6gPzj/xGB7QSQR2UJUkrWS+YM8k4ST1NP0s+HDpPMzQqdh8CFFsJXwCl+NjwTOiX34HXmtDdyqbG6MSjxQXHyscxyJXIx8nMzNTQv9W43+Pvqf8yCRcPwBdIIxYtzTNgOtVA+EMkQRQ4hCuKH6QWqg5CBHX4HO7F5Q3dctMKza/L2c2n0prYM92+4Dfl+exO9r79HgVcEL8c/iStKfUtODP/OEc9pD7sPRM7gDUBLt0kFhqoDzkHaP8q99DuKeda4ezcFtgw0zrQvc9a0MHQX9Caz0XP7s9u0S3UBNr45cT2mQS3C5MS6Rw0Jt4s9zNeOx9ApkBYOmstlx56EfkGif2G84jplOFW2vPQLsj1xCPHUcwO1FfcS+Lj5uDrx/KK+l8BlwkrFfEfSiaKKhYv/zMRObI8Dz36Oh836zDEKC0flBQXC1gEMP689vLubej04xfg+NoU1tPTW9QP1WTUsdLP0CXPH8+j0TzVrdyo7PH+oQmiD6UYMCPaK8AzXztYQoVGdkS+Op8r3BvBDpIE3fr18Hfnet711UjNpMVOw+jHhM/r1oPdHuNh51Hri/E8+XAAhgi7Em8b/SAfJt0rKzLuODs95jwKOR8zgiwZJWQczBO8DDIGZP4o9ofuFejA4zbhNd6f2pTYdtd+1YzT29Ju0avPq9Al05nVmdz47En/8QmcD+AYNyQdLd41Az8YRpxIc0RzON4maxXzCND/AvdK7/Xnit8m1jrMbcMEwXHG9s7r1lPe2OT66M3savN1+yEDRwwDFhEdjyEuJc8odi1VM103OTdTM2wuMiksIooZ6RFPDH8GCv8x987wc+un5mnjPuCq22nYSdgF2MHVp9M10ofR2NMY2DLbvOEr8XYCTAy/EZQahCX7Lfk0zju/QcVExkH5Nv4mxxfrCygCS/iZ793oyuGX2YbQzMj2xhXM69Lh2DLfouS353TqnO+79hD/cwi8EbQYJB2/IGIleyt3MY41tjZeNMYukyebH84WkA5/CHUDF/3x9VTveulB5ITfmdun2ZrY4Nem2KrYEdbb0y/UTNZd2VXc2OFh7xP/xAefDaoXxyJlLEQ1ajsOQFVFeUYCPdAtkyC6FX0KQP7D853qYuEY2cjQysjWxcjJwNA+1sbabuF/53Xq/e8L+G3+bQY6EBAXJRsHH6MkbivDMDQ06TWENPEvVCopJIocABWOD3wJOQEG+VHyXevx4ibbI9Y71I3Tu9NJ1DbUAdMu0hvS59FC1SrcBOKW6nH5MATQBgcOFxvtJAIuxTloQ3VIqUkxQ9g0fyZNG+QQ7gXT+03ypOj/3ZfRHccNw1vFjcto0l/YTt7q44XoJ+5A9rT/cQhGELQWshpWHvgiEynuL0g2njncOPM0QS8VKMogNBoeE+kM0gVP/TL1Mu1Y5CDeB9vU10LVc9NB0iLTt9Qj06HSD9aw2Abd6OSd6Nnqd/O6/Q8FowoYE1chXS26Mk84ljuyOo86uTMBJaQZRRCLBNT41OyF48Pdpddh0ZPNeM2n0ezWe9q43tvj5Oo18e/3XgDfBucNiBYJHpkjlyg7LswyxTUON2o1ADJ1LX8o8CEqGeESRQ6HB77/CvgK8a7paODA1/TT6tMx1BnVoNdY2lfc4d3B33zi0eWP6/vxNPNO8Y31q/z//ZAAUA25HAknBC90NNgzWDAbLVElLBgaDkgIev9a82/qk+U04OfaV9eb1YjWDNpr3S/g7uP057PssfJe+f3/fwgwElgZTB9XJWkqZy4FMkM2pzjkNskyzy32JvkeshYID8cITgJq+hXy6Oru5XrhiNtc2CDbjt9J4QDj4eXc5x7qvOuQ61Ds2+4r8Arv9O2o73jzQfj8/DkDAA2zGPwhISSOIlwiUCGhG7ATlw1XCbkDWPtL8kbqv+Qi4mDfFt2E3wjjTuPo4Tfiz+QG6ZTwNvukBCIMAhS4GsodIyC8JRAr7CwHMA0yRi6gJ/0hDh6CGVYVWBHNDAgHjADg+dTy/O497rbs6ulZ6fXpz+ps7Aju9+/Q8vj0CfSF8Unvs+007F/qAuo06zfsAPC69w7+PgJoCewRGRRDE/QUyBOaD3kO9g1TCeMEyQJy/Qj13O4I69bnteYe5y3nlugm7Jzszuof7iL2Wvz1ALQIdQ+UEeUUuxn5GuUcJyROKTcoiiYSJr8iNR1GGT4WJBH+C3YH6QCr+WX2ivd79w711PRW9WP0DPQ69Ej1APhB+RL3lfMC8ETsf+n450Ln0+aX5uvmdeew6CzsiPLO+qMCTQnoDlIPXQxdDkoRFg9nDWENRQkwAvX8ivlN9abzzfRm8iDvNO8y7o/s+e1r8JvyGfd8/Kj/0AOtCREN3g4DFOwZdB3+H3kjQCbKJG8g5BuVF0UTKhATDcsJ2QXfAz0B//uS+TD8cf4b/kz+PP6B/qj+Ef6X/R/9A/0N/Cr3v/Dh637neuTr4/3gu9uR3APf2Nxt353pLvNx+mYBTwaRCc4OphNoFHoUxhSyEfILIQduBP0BnP7i+tv3/fTG8G/t1exh7aXv3fOh9pL4yvy4ACgDtwa4CyMPARIkFjYZCxkqGbga6Bn0F78WTRUME2QR0Q8mDHoJUAnaB74E1AEWASoCjQMEBYUGagdCB4UHcQWCAFb8MPnK8z7sruTO3M/VHtIG0KrM3czA07LbEOFk6Cjy3vnEAQELhRBlE5cYmhu8GCkWlBWdE/QQmQ2nCEkEAQFq/ef5EPdu86vuJuxR693qze3S8/n3/vvyAdUF6Qc4DGESNBcdGkUbOxp1GNEWlBOxEKQQ7xC0D3sOJQ1+CjgIkwcdCOcIDgvgDcEORw+aDx4Nuwk9B5QDtv2T9pLvjukh5CnfE9vF1rrRJM0xysTKhs9C1rLdqeTG6SbvU/bU/JkCIQpsEp8ZVh7tH14geiCfHsEbnxnpFdQP3wmMA337g/Wh8v/uRu2/7zXxofB88hX3F/x0AdsG7QpqDQMPiQ8hDgYMogsGDN8K6AmjCj0LUAt2DSYRURONFW8ZkBx6HVQdhBxNGWcVbRIUDUwGuQIJAPj5afOd7qXpM+TK3kDY39FSzyvPQM0qzG3PItNg1H/Vx9c+3L/jSOwt9V4AxAsZFFsa5x8/JDQnryjGKK8nNyQ4Hb4TMAucBFP+ePi89NTyi/Ht7yjuQe6l8Hf0Tvmi/m8CRwMuA0EDLwOLA44FSAgiC40Ozg+rDkMQOxW9GNwbciBDJAYmkCU0Is8dMRopF80U4xDsCgwFYwHs/Af12O3H6UDlVt/52YfU+s/hznzP180KzLzN388J0WXUuNmz4e/sSPZS+zkA+weuDxEUmRg4H5wknidWKNkk1R9yHDQYiBJkDVAIwwKF/Vz3svEe8MfwzfBB8kv0VvWO9mP3hvfb91r5rvsl/koCcQj8DVcRShR6FhUXYRkOHm4idyWqJ/Yo5yfLIxsfzhr4FqsUHhILDV8GkP8898rtAebe4Ojc6di71c3TIdJ20KLPn9AV0gXTI9VD2HvbDeGH6Dju3/GY9cn5dv3EASMHyA3JFBAapxy6Gx0ZChfhFhcYExgDF04VURFWCysE/Pzn9731D/UF9E/zUvO48kXyBPNt9Nr3uf6NBSAK+A3WECES1xNEFgMY6xq8H18jEiWWJpQnSidGJnokMSFSHCMWvg51Blb+yff98SHs4uXw33Hd3NwI2+LYodmL3evfB+C835DfL+FM5Cbmv+eN6i7sBuyn64brCeyp7XHwufN49g77aQIFCbMPyxg3IMoiayLpIMAdSRn+FNERvw2jB8gBDPyf9pfzKvMx89zzzfXH9zT57fpS/Vr/zgC2AyUJBw7fEWsXsBziH6Yi6SNjI/UiOiJUIO8dhRtbGBUUag9QCwMIGQUJAi7+dfiz8bbrneeg5KviRuK54U7gBeAU4SPhBuE34vDiGOKW4R3ikOIv4m/iEeQs5tTpQ+9J9Pj5XQHFBpkJqgwpD28QjRN2GIkb8xtaG38ZRRZ9EhcPNQzkCNYFCQOV/hv67PgH+jn7a/xR/ggA2AAGA10HGguyDVUQ+RBnED0ROxKIEyYV/BYiGQgZyBdQGI0YxhdIF8sWNBS9D9oLaAcsAi3+DfvN9qjyN+/z6kLmzOIJ4d3f0N6K3hfeK90Y3Y7doN0v3eXdy+Ab5BbnI+qi643tx/Hi9OX1rPji/igFcwlJDM4NxA/oEj0UDxIfEJkQBhDWDQwMAAryB5AHTQckBhEGtQdPCfEJIgr2CZsJBAqfCo4KzwrCC68MLA2PDWsNGA1oDf4N/g1gDgAQJBLeEysUZRPvEqUSdBFRD3IMnAjrBJ8BTf2p+Ej1dvIr72LseOpW6NHmF+fe5uvkH+Rl5Q7mBOZL5zrpX+sz7sHwM/Im9MH3UPuk/MP7HPp3+VP5g/fu9Gf0GPXZ81Dxwe9u73vw//Ki9hD7SgBjBq4L0Q4PEgQXkhyRILIiMCQaJE8iHx/gGkcWrxI3EIYN2wqQCcQJGQodCucJVgk7CS4KdArcCVMJhwgzBgUC7v1M+0z5eveW9jb2lfVP9XX1f/X09V330vif+Un6mfvg/P78hPyG+375AvdB9YbzVPFC8GTwHPAl72fu5u0a7rrv7PFK8zL0UvVO9sn2IfdG93T3A/iz95P2fvZu+Nn6rfwI/6UCBgeeC/wPsBMDF2walRyHHOcb/BvrGxsb2RkKGP4V+BOVEVcOYwtxCZ4H2QR9AWn+MPvg99/05PEK71ftDu007WbtLe6V72LxsPPV9pT6v/6JA/IHawpHC/ELywx6DWwNEQwACTAFZgHX/Hf3ofK77rTqnOZo4znhiuC14aTjf+UH6KjrsO+W81X3HfuZ/toBgQQsBi8HeAg1CrcLrgwSDfIMnAzzDCAO3g+uEdwSEBMqEtQQCRCSD9oO3g1pDE8KuwjNB5wGlQVOBUAFfgWJBqEHEggICLkHwQZEBbsDGwLZ//P8x/kG9iDy1e6062LozOXm5FvlS+aI5yDpwOqz7BHvFfFF8sXzzfbi+uL+RwJdBe4HmQksC8YMXg1cDcANig3WC6kJogdSBesCZgBu/dT5VfYC9G7yCfE78N/wxfLy9Ff36vmL/Fz/vQIABooIxgrcDJMOWhAAEvgSuxNhFBkU2hLCEScRsRDeD2gOqQzSCogIEwb4A6oCYgKBAkECMwFx/5791/sw+UT2z/SK9NPzr/JP8Xrvt+3q7MTsWOwy7KbsJu2q7Zfuge8T8Mzw1/G58kb06/bs+Yz80P7uALwCbQQgBksHBAgnCXMKBQvTCjkK6Aj4BhgFawOmAQAAsv5t/TT8yfsH/E/8Fv0D/0UBOQNJBWwHUAnSChsMZA3bDqwQDRLaESgRchGhEnwUOBaMFikWNhUyEcIKJQZnBJECRv9Z+7H2nfGb7ejqX+id57HpQux57WPub/Cl8vX0AvgI/LgARQbhC+YOHQ4LDO8JuwYbA+MAmP+C/Q77zvg79pHzCPJo8Xbwg++K73zvF+7C7OzsUO5E8OLynPX093P62v3eAW0FbAgrCw0Nig0ODUMMUQv8CTwIfQYnBTgE8QMXBK4DvwJ5ArwCEQLSAJkAJQHdAXoDzQXZB6EJewtrDA4MqQsGDFoMSAwkDL0LrQopCZEHowXwAzUDBwORAs8B/AD7/6T+0fzj+lv5Bfis9oD1tPRd9Cn03/NQ847yJ/KH8knz9/MC9Zj2y/dq+B35T/oD/BH+PAD4AWADFwUWB6MIdgkACpYKAgsAC+wKVQv3C/wLPAueCVkHLAWQAxACaQAV/xr+ifxA+l/4Zfce94D3b/hq+XD62vtv/Xv+Dv/s/2wBGAONBP4FWwdUCLkI0ghtCL4HSwcYB4UGTgWiA5ABOP/r/On6J/mb9zP24vSp89jyxvJr8zH07vTV9fT2L/hx+R77TP2A/xYB7AFXArkCGwNeA48D1ANHBLsE7wSxBFYEBwSjAwsDTgKXAfcAPAA8/yf+Zf07/Yv9Kf4d/zIADAFyAX8BkwEYAjsDzARvBs0HxQhLCUYJiQg6BwUGUwUHBcoEUASQA8MCxgFgANb+2v2h/df9//3e/XL95vwz/Ej7hfpw+vf6xPuT/F/9N/5D/18AOwGYAa8BlQFHAbIAGQCu/4n/df8Q/zH+M/2M/Db84vuk+3P7WPus++v7IPxE/J78QP3z/YH+AP9//+L/FwAUAP7//P9CANAAUwGGAYUBbAFTAVcBlAHqAT0CTgLoASIBQACi/57/JQDrAKcBQwKwAv8CNANgA34DkgOGAwkDPwKDAQ4BbgCd/9H+Mv6+/XL9N/3X/Ir8h/xa/Lv7W/t+++f7g/xI/fv91f7o/64A6wAMAUABDgGbADQA2/99/3f/gv8X/4f+Ov60/fT8yfxH/eb9jP4T/w7/0v74/kb/dv8hAJIBKAOaBO8F4AZcB5YHageqBuoFkAUoBXkExAMVA08CbgFWANz+NP39+zP7m/op+jP6gfrF+hv7oPty/Jj9J//AANIBTwKhAqoCNwLUAfYBOQJUAoICmAIpAm8BxgDW/43+ef3S/Dz8sPtj+yD7q/pF+if6IvpW+g/7G/wM/eD9xv6h/3QAdQGAAi8DfQPQAxoECATUAxEEmQSeBM4DgwIoAR4AgP8a//D+S//3/yoApv8H/4j+Ef63/ZL9pP0L/t3+qv8AABIAPQCKAMUA3ADMALcAvQDFAMMAuADpAFwBlgE2AXAArP/+/nr+Yv6r/hX/hv/Z/+P/sv+j/9D/VQAfAeIBWgJiAhICcAGgAMH/8/5s/i7+/P2+/bX91v0I/lP+x/5M/+H/ewDkAO0AsQB7AF0AQAA9AHkA7QBzAeEBKgJHAkICPQIqAvUBugGXAXkBPgEMAeUAqwBaAAIAqv9M///+0f7X/vf+GP80/1r/kP/I////OQBxAKMAzgDfANIAvgC2ALQAoQB9AEsAGgDd/5v/Zf9R/1L/U/81/+7+i/40/vP92v3m/Rf+PP5P/kn+N/4Z/gb+K/6M/v3+sv91AAUBpgHYAfYB3AGnAWYBMwEDAdMAkgA1ANf/gf9J/xn/B/8X/zL/Q/9P/2//q//z/0QAjwDhAC0BZgGTAdABOALLAl4DzQMGBBME5gN3A8wC/gElAUIAT/9Q/mv9vfxW/Bz8AfwD/Bz8Qfxn/JD81fxG/e79uf6a/4IAWAH9AWIChgJxAkUCBAKrAT8B2wCVAGoATAAoAP//3v/L/7j/lf9z/1r/SP8+/y//If8k/yj/HP8E//j+Ef9O/73/TQDrAH8B4AH5AdkBsgGhAa0BwwHKAbQBbQHjADQAhf8X//b+BP83/2H/k/+s/7j/xv/x/0IAmwDqABUBHgH8AK8ARADV/23/Dv+r/kr+7P2a/Wb9Wv2A/c/9N/6a/t7+DP8q/zv/Wf+U/+H/LgByAJgAngCSAIcAjgCmAMoA6AD8AP8A8wDjAN0A2gDVANMAuwB3AAMAeP/p/mH+/v24/Z79uP3o/ST+Z/6r/vL+Pf+J/+v/agD8AIsBCwJhAn8CYQIbAsQBZAEWAdMAkwBNAAQAuv9m/yP/+f7j/uj+9f4E/w//Iv8//3H/tf8BAE0AhACqAL0AwACwAKoArACoAJMAawAuAOD/iP86/wT/5f7j/vX+Ef82/2L/jv/C/wIAUgCmAO0AIAE9AUYBNwEaAQMB8ADYAL0AowBxACYA5f+5/5P/bP9L/z//Nf8n/xf/GP8m/y7/Mf8y/yb/FP/8/vT+CP8u/2n/sP8JAFYAoQDVAAIBGgEzAUkBTgFBASgBAgG/AG4AFwDW/6T/df9Q/zn/If8G/+n+1f7M/tn+8P4V/z//cv+t/+z/GQBGAEsAQQBHAE8ARwAtADQARwA7AFEAOQDz/+z/pv+c/5X/uP/U/+n/4v+0/3//Q/8z/0r/j//t/10AugDsAP4A9ADiAN0A6gAMATABPwEiAd4AgwAeAL//gf9k/2D/Z/9m/0T/Dv/b/rr+sv7I/vz+Pv+K/8v/CgBHAIUAvgDyAB8BNwE6AS8BEgHyANMArAB/AEsAFQDc/6n/h/99/33/iP+K/4f/hv9+/3r/f/+R/6X/tv+7/8D/w//I/9f/+/8tAGgAmQCzALAAmwCAAF8ARAA/AEUAOwAYANj/jf9B/wv/8v77/jX/dP+u/9D/4P/p//b/DQAzAGkAmwCwALEAlgBxAD8AFwD5/+j/2f+6/5v/c/9Q/zT/K/8t/0f/dP+o/9n/+/8ZACcAJgApAC8ANQA+AEwAUQBJADoAJQAMAPX/8P/1/wgAEAAZACEAIQAcAAgA8f/r/+//6P/g/9r/1v/O/7z/rf+f/6j/vv/S/+r/AwAjADkAPgBDAEwAVgBbAF8AXABcAFQARwA3ACUADgD5/+D/0P/F/7b/rP+u/7j/t/+z/7j/vv/J/9P/5P/7/xUAJAArAC4AKgAiABMACAACAPv/8v/f/9n/0//J/8P/xf/G/8n/1P/X/+L/8//9//z//P8MABsAHQAiADUATABWAEoAPAA0AB8AAwDr/9j/0P/E/7f/of+Y/53/m/+V/6f/xP/j/wAADwAdACwALgAfAA0AEQAPABQAGgAgAC0ANQA0AC8AKgAnACAAEgAIAAkAAgD//+n/4P/e/9r/0P/G/8D/w//M/8z/0P/Z/+H/7v/v/+r/5f/k/+j/5v/o//T/BQAPABgAHQApACkAJQAOAPj///8BAPf/6P/u/+v/2P/k/9r/xv/S/7f/u/+1/8T/vf/O/9f/5f/8/wcAHgApADcASQBcAFYAVQBWAE0ANwAVAP7/8v/h/9n/2//s/wUAHAAnADEANAA+ADUAKwAcABAAFQAUAAUA+P/8//z/9f/Z/8//3//v//j//v8WAD0AYgBvAHcAiACfAKAAjAB+AIIAhAB5AFwATAA9AC8AFADz/97/wf+T/3r/YP8m/9f+kv5R/vD9h/0n/cH8cfwf/M/7t/ve++378ftV/Fr9f/4b/5P/TQEGBA0GJwfiCGYLNQ0gDcELUwquCJQFLAE//bv6jfgo9gH1z/W/9+X54PsF/ogAJQJyAhsDkgSbBdIF+QUIBqUFSgSHAkYBfwDt/ur84vtM+0r6c/rM/DgA+gMhCCgMFQ/nDxQOqQquBmgBovoU9Lvuauq85wznm+iE7MTx5/bj+z4ApQLmAt8BeP/S+/j3ifRW8TfvHu/T8BP0q/i0/QMDMwiWDMkP4xG5E6kVshamFosW2haQFpQUgRFDDgIKwQNZ/WD4Z/TO8U7x3POG+AL9ZAFSBnIKrQw5DYANqQ3+C80IkwU0ApX+D/uP9xT0xfCl7I3nDOI33MPXA9Vc1HPXWN+/6mH3hwRuEnUhsi5aNt04/zjJNT8shRx1DJv/lPJq5NHZodZT2FXa693i5YHwDvrt/+YFAg1EECkQbhCIEZAR5A+HDrYOnAxGCIMFuQKF/VT4wPWU9PTyuvG49Pn73AJVCZoRFBo0Hw0gAB5ZGcER1gfH/W30sOvB5O/gOd/93SPeNOCC4ubjveSA5szpee298Yv3vP6KBp0OVxb8HO4hYCX2Je4hsxrlEWUHs/vJ7zbmvOAs3lLex+FQ6CzxPvpiAjMKDBFuFUMXqhfaFiUVOBIDD+4L3AgUBjsDUQHE/4H+rv7c/34BsQPDBesHwwoSDUYOcQ9ZEEMQUQ7sCgcIRwSf/tH4lfNO717rAOgJ5oblbOYi6DTq2ey18MH0Z/fc+LX5SPnQ9gbztvD68ELywPTe+mQEtA69FzcfKicKLZErgiRmGzwPeAAZ8Rzk/dv81xbY3Nyz5avwlPskBk0PDhTBFb0WIxTjDSIJ5waaBEACGAGPAcwBGwDA/nL/TADxACkDHQftCToLNQ0mDx8OaArUBhcEnQCz/Av7w/wwAD0CEgTtBg0IlAUMARP8tPZt8QrtD+oV6BTnZ+e16Ovpqut+7qvxKvQz9oL5lP2TAAIEeAn4Ds4S3xUMGAIYcRVIEIEJ2QFt+e3wPOmN41Pg4t/44oTpqfJw/YYIExPvG8ghxSNKIlwe8xd7D1UHzQC7+6/3kPVx9m75x/yYAOUExgiSCzQNPQ7vDqAOKg5FDrsN4wzHDLQLFAneBoMEwgDT/ML54fcd9tXzw/Jn8h/xN+9r7fjra+pW6Lfnz+iW6aXrNO8D8tvzL/W69pH4RPlt+lz++QKiB6sMfxGqFUkXMRbpE64PhwgMAan6hPTo7pjrFey27kLy1vcl/zUGiwvzDnMRYRMWEwcR7w7+DP4KJghEBFMBfP8r/Vj6j/jw+Zz9pwDCBGEL0xFkFj0ZGhrXGCAVQQ9vCN0A2vkr9THxfO277BTux+4Y703wp/LG883yQPMD9ar1IfXr9AH1FPMv73vrUemE6L7oRusc8U/5bQLFC+kTrxrNH2ghVSDlHDQXIhB/B5X+p/aR79XqEejY5vLopu2K83H6PgJKCsMQKxRjFgYYyRaYE2QQag0gC7wHTwTGAj0BcP+4/Qf9K/7//ysC8gVxC/IQWxWbGGIanRktFfkN6wWA/eH0g+0U6DPke+Kl4h/j3uRz6O7s6PBf9Cz4TPuc/Fj85/tW+sz2ZPJU7mnqA+dm5qDop+yg8+b9WwnGEnca1iGOJV4kFiFiHc0WpwxZA9z7xvMv7NrnJedB6NbqVvAz+GD/rwTXCWsNuw3BDJwLagp4CX4IsQj9CXMKfAqECjQKsQkYCQkJVQq1CxwN/w42ELQP2A2XCxMImgJM/ST6Ivhe9vv1o/el+eP5e/gw91L1hfHQ7C7pXucJ5xPoaOr/7fry5Pfe+uj85/0o/R76Z/X78QTwfe6177P09PunA/EKkBK9GKYbuxz7G2IY0hKsC7cD0vvp9AXw6Owu7PLu6/Ni+Rn/7QTWCtgOAhBCEQETpxKqEIkOkgx1CmUHwQRhBCcFMwdhCoENxxDdE70VMhV9EnAPAQvfBGv/SPsm+E71f/Pv8k3ywPF78fjwavD670LwAvHQ8XryzPIz8yPz/PE88Sbx+fDF8QLy//Au8M/uQewQ6TXmt+aj6+/xG/ndAYcMThfuHaUh9SMuJH0hKRvRE00MxgOY+hXzbO7g6g3ppuqi7tvyFfjI/goGgQxzESkWNBqmHAYdnRvwGb8XYxSBECENLgqfB/UFxQQsBCMFRgbUBjgHRwdUB7wGkQW3A70Bm//N+yv3W/NM8CXtL+tX69bsHe/h8RH1UPfz+Pn5evkN+On1SfJf7drnX+LV3cTZP9dG2WvgYurE9N7/1gv2FYscxh+rINcfRB28GMwS6gwPCOACQP0n+S33TPZO9rv4tPwqAM0CMwWnBi4HGwc+B+8HugmTCykNZQ9WER0TtxNUE4ASehAPDn8LzwjeBvIEiAOVA0MDjQKHArUCuAJmAq8BkQB7/iH82/lB9uXyCvG072Dvtu9T8Drxp/Ea8kPyxvFd8e3vie1N6j3mJuI83nXb5NqL3a/jNOwB9t0AYwsIFN4aeR+fIWAh7B5lG94WWRFnC3AFBwAh+3L3Hvab9sH4Ef2yAQAF+Qf0CccJFwmUCBMIYwixCRsMFA8+EVITQhX7FJgT6RGCDyYNVgtyCZ0Hwwa2BRIDcQCw/8n/m//0/y4BBQJhAXH/7fya+Yz1Y/HV7V7rlOl76Ozni+fB597oA+rk6s3rmOxN7B/qW+fu5MTiz+Hk4hfn1O1Q9Q/9MQWcDcgUyhmWHT8g3CBgHzEc+hcmE5UNfQf/AU/+t/yr/CL9qv5lAQgDRAO+AxkERwQmBTAGkgcsCgcN1A7ZD8gQxhAlDwoN1wsCC2oKhQpDC/kLOQxhDP8L0QpACWUH6gTNAVL+nPqx9uPyU+927KzqQ+pK6mjp8ej76VDrl+sw7NrtIO+S7zHvWu2p6oroG+Y/4l3gZeJV5rfqf+/b9d79aQVtC/wQTxcpHfcfAiHcIVIhXx7eGXAVYxFPDfkJmAcOBbACugAO/kX6tPbp9JX0f/XO+Cr+UASnCj8QHRQtFpgWfhXfEuAPCw5rDc4M/gsUDOUMDw13DOELtAv3CsMIRQUEAXL8QPd68ZPsi+nO5zDne+cU6APpYuq+62/sI+2X7mjvjO757ALrcOiP5eXiO+Fe4SnjyeYD7AXy+fcV/s4Eigq/Dq0SJBazGEAaUBsgHOobBhvIGc8XkxWOEwgRag3xCC4Esf4I+T30//DM73zxi/Wn+t4AfgePDUgSFBVdFrEW2RXyE5YR2g+JDvIMpgsRC2QKWwl2CK4HwQZsBYkDUgGE/in74fdB9ffymPDx7gHuc+zE6XXnwOZ45mPmnOdv6Zbqkuoj6gjplub542fiPuJB4w/mwupF8JH1u/q/ABgHdgy3EMMUfxgQG2scIB2UHS0dlxvGGSQYABZZE/AQUA5iCroFNwHO/Mf4KfaX9db2YfkS/VABSQWCCNwKKwz5DIsN5Q1MDjgPghCGEfMR1hEeEegPsA5fDW8LBgl5BoYD2/8G/IT4NvWL8oXwsu4J7VPrL+nf5gnlouPq4h3jIORY5WnmNedy503n3Oae5fzkV+Yq6dXsrfEh91f8BQJ9B+MLrA99E8kWQRkSGycchxwDHKMagBjkFQYT+Q/lDPwJEgf4A7gAnv05+zX6NPrJ+ov8kv8+AgAEpAUXB7oH4AduCAgKJwwoDj4QcRLyE5QU5BStFFYTGRHJDlgMNQl4BV0B/Pyn+Cj09e+j7NTpR+cX5W/jUeLF4bThWuKP4wflquYO6I/ozejA6cLq6epY66HtdPE+9Vj4ePs4/wsDTgauCa0NWRFWFGYXCxpvGwEcsxs/GksYRhbYExwRXA5kCyIIigRcAEn8bfma9yv2qvVf9gT4Svrs/F7/kgEFBI4GyAguC/0N1BAOE10U+BRUFUQVfRRNExYS6BBfDxUN/wkuBmoBsftd9UXvHurx5XHind/53bjddt7D383hjOSM5yfqLeyu7eXuh+8t7yfuwe1X7m3vyfCJ8mf0jfal+WH9IwFkBXAKhA8GFIIXqBnLGuIaxxm7F8IVIRQgEh4Q8w2QC7AIKQUnAUr9RPow+ND2O/Y392P5//vJ/r0BvwTeB9MKcA3cD3oSqRSzFfIV9BVeFf8TiRJDEc8PCg6/C9wIFAWJAKb7yPYW8vHtj+rC52LlcuNn4lXiEOOo5AjnounZ67Xth+8L8YjxA/Ec8ErvR+5s7WXtCO6U7pLvZ/LE9tH6wf7nA3sJ8Q3GEUsVxhffGCQZxhiWFx8W3BRfE8wQeA1pCiEH5gKZ/qf74Plz+A34cPl3+0f9VP/4AbkEJQc4CXQL/Q07EKERpxKHE/kT2hM9E2wSQxGiD5wNIQsgCJ0ExwDe/PL4cvXM8gTxgu8B7u7sDewX61PqYOrM6mPrUOx+7ZPuj++M8HPxyfGO8S7xMvF78UjxevDf7+vvhvDJ8QH0O/eM+7QAwAVJClwOoRHXE0sVnhbBF4AY1xi+GMgXthVQEvANVgkEBQEB3f0p/LT7O/yI/SP/DQFiA64FmAdsCTwL3AwhDvYORg9RD1sPTQ/qDlIO4Q13DY4MIws5CaEGRAOf/zr8U/nb9tf0A/PM8GzusezH61vraOsx7D3tOu6N7/HwmPGy8YrxOvEG8fvwvvCB8Fbw5O947+bv7PBC8jb0z/bK+bf8Q/+SAVkEqwf4CmwOXxJYFrgZNhxVHdQciRqCFl8RFAwzBwMDvf/v/bT9iv7N/4IBfQOIBXgHTwkVC7EMzQ1ZDnIOTQ70DUUNiwwODKwLWwvkCk0KgAlCCHwGUwQVAsv/RP1h+gX3fPM28GjtOetF6nLqV+vH7JLuSPCU8Uryg/JQ8gvyz/F/8QnxQfAG787tAu3d7FPteO4O8A3yivRf9zb6/fz0/2MDgAf5Cz8QCBSrF2AaxxugG/EZNhfXE8YPTQtaB2IEgQJVAdQA7wDCATAD7QS9BsgI1gp2DH4N+A3iDUsNQAwCC9wJ7ggECE0H0QZhBp0FgQRSAz8CNwEKAJD+u/x3+q33tPQT8jDwBe9p7lXuse5H7wfwtfAn8YrxC/K38krzlvNo87jynfEr8GvuyOyB68fqwOqJ6zjtwO/k8ln2N/qM/k4DQQgbDaQRrxXdGLYaIRtYGogY2BWsEnUPkwwdCkMIGweLBksGTgZsBpsG2wYVB0AHZgeVB8sH/Qf2B7YHbgcoB9IGSwa2BQ4FQwRNA0wCVgGYADkACQDV/4X/7v73/Z78GPuY+T/42/Zx9SH0APP58R7xkfBy8JXw9vB98cvxvvFX8XPwF++j7SnseuoP6XzooehJ6Z7q2uzr77HzK/gR/WMC7gcaDYERIRWyF9sYkBhMF5YVsRO4Eb4PCg4IDVwMogvxCmYK2QkLCfAH7gY3BpQF6QRvBGAEoATkBPIE8QQlBVoFWgUkBQ0F8wTEBJEEkgTGBBQFVAUuBX0EegMsAoIAk/7P/Dr7p/nu9yr2cPQD8wjybPHR8GjwWfA98Mrv+O4E7jHtTOwQ66zpqeg+6GHoMOnB6intS/Cz84X3//vbAHwFrgmgDT8R/hN3Fc4VmxUYFVgUYhOAEvERkxHzEP4P/Q7oDXMMkgpkCGgG3AR8AyUCLgHWANAA4QAmAZkBQgILA9oDfAQUBXYFqAW0BcQF+QUmBh8GyAVOBZwEegPRAbr/ov2x++z5Ivh09gj1AfRG88HyQPLn8bjxgvEA8T/wWu947oLtaOwn60Lq+ek36hXryOwd77rxafQh99j50Pzy/88C9wUfCScMtQ7HEGQSkhOHFEsV/RWZFjIXVRftFgQWkRR4EuEP4AzMCfUGdwRwAgoBRwDq/9r/KQDDAI0BaAI3A8wDSgR4BGMEIwTWA4UDWANOA0oDRAM1AxUDrgLZAZ0A7f4B/Sr7ePnY93n2bvV89IHzovLm8R/xN/AQ77rtfux263Tqdunr6A/p1ek/61rt+O/r8uD1t/iR+3b+LgGnAyEGpAgWC08NOg/6ELQSWxSjFZcWcxcuGIsYKhjwFhQVnRKCD+sLWggNBT4CAgBM/kH9Dv2E/VH+af+nAN4B6QLJA30E7AQUBckEKwSHAx4D0AKCAh8CswFHAdEACwAB/839l/xb+zH6EPn/9xj3QvZP9VH0V/Mw8sTwMO+27YbsnOvc6kPq/ulD6hXrj+yk7hPxnPMI9nb47/pq/b//2gErBM4GgQkVDIgOAxGDE6EVDRftF4AYwhhyGGcXxhXDE2cRjQ5aC04IjwUjAwkBcv+J/lT+m/4Y/8H/kwCDAWQCIgPTA1wEngRxBNgDAAM/AoUBxgAPAIn/R/8s/+L+ZP7K/T39nPzd+wb7T/rZ+U75WPgC93v1tPOv8YTve+346wfraeoD6uzpYOpv6yPtTu+18Tv0jPaK+FD68ftg/cj+agBpAs0Edwc3ChkNPBBHE8IVhheeGC0ZUxnaGJAXkRUJEy0QFg3pCbwGzQM8ATf/5P07/Sb9pP13/kv/GADXAHAB2QEYAjUCHQLuAa4BaQEmAdwAnAB5AGgASwD3/23/ov60/Z/8gPug+v75g/kE+VX4a/c29q/01vLt8A3viO1r7LPrOev76jTrIuz27ZHwi/OL9nn5I/w6/g0AggFlApIDwwQsBqQHTQlMC4UNvQ+4ETgTIRTSFEMVNxWXFFUTiBFiD/AMUwqsB1AFbgMaAjABqgCPANAARQHLAUQCowLbAtACagLMAREBVQCF/6f+tf3f/Fj8Gvw+/Iz84/wY/QH9mPzV+9/6+/li+Sj5DfnC+Ar48/ay9VP0AvPp8Q/xc/AD8KzvZ+907wDwLvEX8431Nfjg+mz9pP9dAaICdAP9A3YE7QSJBWMGlQcVCZ4K+Qv0DKYNIg5lDl8OAg46DSoM5ApgCcQHNgb5BBcEogOHA6gDAwSMBCUFnwX4BRAG3wVcBZAElAOJAosBgQB6/4r+u/0m/eT8zvzG/K38d/wT/KX7I/uk+lP6UPqP+s/66Pqb+v75GPn598X2sPXS9C70w/OE82DzgvMD9Pb0UPbt95D5FPt0/LX9wP6C/wkAfwAPAboBoQK7AyQF2gZzCLQJjQoQC1QLdQtkCxcLmgoDCkkJdAiqB+AGRQbsBdwF+gU2BoMG0AYYBykHEQe0Bg8GHAXnA5kCUAEsAC3/X/6l/RT9xPyL/FX8E/y++0v7w/o++qT5D/my+Iz4pvja+BD5Jvkd+eX4cvjz94b3H/e59mH2Hfb09RH2dfYy9zH4YfmJ+pL7h/xT/fT9bv7C/gH/Xf/Z/4cAagF8ArUD2wTVBZEGHweoBzQIqAgBCUEJWQlMCSEJ9wjJCLwIyAjnCAwJIAkoCRwJ/Qi6CGgI5wc7B1sGYAVpBHMDpgIHAoEBIwHQAHIAEQCd//r+Kf4//UD8RftT+mP5h/jR90H31vam9qH2ufbe9gP3G/ck9y33GvcN9wj3F/c895P3L/gI+f356vrZ+7v8Wv37/YD+yP5d/7f/TgDnAJMBNQLAAiMDXwODA4IDngPIA/YDHgREBF8EcwSKBLsEAQVgBdMFTQavBu0GFAc0B00HUwdLBywH4QZwBuoFaQXuBJAEUQQZBN4DegPhAhYCQQFUAGT/fP6a/dP8EvxG+3z6zPk0+a74ZPg0+A/4+ffs9+D33ffc98331PcG+GL41Pho+TX6K/sr/Aj9uf04/q3+E/9s/7j/BgBpANwATwGyARMCawKzAuYCBgMaAy8DSwNsA44DrgPEA9UD7QMVBFMEpQQUBXsFxgXcBcgFrAWNBXYFZgVPBTcFFAXyBLoEjQRnBEQEDAStAx8DUQJiAVYAS/9S/n/90PxL/M77Rvuz+iT6p/kx+eP4tfib+JH4k/iX+IX4XvhH+F/4pPgP+az5ePpi+0P8DP2t/RL+R/5q/pD+xf4O/2L/yP80AJoA7wA4AYYB0AElAogC3QIyA24DmAOpA6kDngOlA8YDAwRVBK4EGgWDBdkFEAYgBhgGEgbyBcMFjQVMBRcF1QSYBFQEJAQIBOcDoQMeA2kClgG1AL7/2f4P/mP9wvwV/GD7svoG+lv5vfgx+LL3TvcH9+v2A/dG96r3N/gA+eX5yfrA+8n8xP2S/ir/hP+m/63/hv8v/8X+dv48/j3+lP4r/9b/jABwAVAC5gJaA8oDNwRrBJIEwgTMBLMEAwVWBVYFegWhBagFyQXaBbwF3AWEBYQF2QUsBcIE4wTpAxUDogNnA6wC1gJ7AigBm//AAvcNwhQiCmz+OgFtBdz+KfqI/8T5sfW09wDwN++I9Rjwn+th8ufyG/AA9Dz1bfPF+B4Awf+l/AMAVwEc/k4ARAFg/+EADP+B/qb9F/gg+m/6dPlM/ar+if/2AqcDIQOIBOgCzwVXCP4H4QjUBl0GXQc3BqYFTgUIBTcDEAIRBXoAuP34BrEIkQCZAEgNRQYt/10QBgnDAKkN7whG/+wFWgKW++b/Dv2q++AAGPwoARwGePdFApoFYPoMAQYFIfsG/PsFDfDC7Sv+EPJj7ZX1vPIN81/4SfTXAXYJ3/ajA1EQRvhA+UgIyv+Z+LP96wLd9hnrFPcmAdb4Vv1lDD4GSPxtB8kRYwTjCKQXmA4SB2wLlg2vA3YAzgV6/db7jP5++vL+jfBi8D4E6v7O/OQJBgqK/W8B3gq1BM4DSxJKD/EIzg2RAPoBEQn2/r8A5wbMAmj5XPce+0r5VvXP+yH9OPSt9Ln0DvSO7kLzJvoC8aP1/fx+9X77KwOJ+u0DOAXG/sMBdv/a/pP8JP2+AB38Gfgw/Hv8M/UR9FX5z/z5+qr9uQZSAp8CRwsICW0D2gn3D+EFpf4XCRMSvAcc/rEK6wxt+XMCTwoXAEwESQ7dCa8B0gfVB20C6v8hBNoIKwFL/t4JVf5Y9BL8hP0m+tX32f5mANr6sPjwAgL9D/R5AHUHFwLu+ioBDwbK+bj3sQEP/Ez26f0LAcL2ofTr/nT8l/b990H8rP+B+qj2gv0/+h33PAIH/wX37f1hANf75fua/BIDxga3/7AEwQneAz8HMQtmB4kGHgnpCWQGhASzBLoA//zy+jL6ef1dAckBhf+zAeoGsAMm/44FoQV0BbURfglH/+cHkwVx+Pb3q/1Q/rD9av13+0H4ZvVO93//sfnG9oAFZgMh9F/27fs9+Gv8rAEuATcEcQfQBTn8mvb4/pABNvxy/gAB4PfG8iP4f/aQ9kwCeAk+Amj/DwTdAykA6wBQBy0JAAguBQwCOQHl/Yj+ewU0Ax39uwRqAfD9KggCBhsGgRNfDwgEZww0Cmj+bf9UAr8F+AS/AZ8EVQF1+av62P279/z0iPyf/Of3Yvpz+X/37PyZ/I36pvz//P8BzAMN/Aj/wwVW/H787ASqAPr87AKJAdD4t/iI+j/6f/cL90P6X/mc+G/57vcH+aD8HwKsBIUAvQGhASb+Ev+j/uQDpAd4BT0FXgJdACICeQIzAloFXgbDBQ4GlAJ2ANICdgd/CV4GiggMCewGYgZPACoDRQn7BmgCAgEhAhv+k/nQ/NT9d/oJ+9j9iQCY/NX5y//mAPf6ZPxtAj0E8QL0/+H9L/qj9k71afR39vr4LvqB+xb8kP/d//7/DwJB/yf/dAC7/p7+PQKcAFD+4QCG/vT7Qvy7+XH5Wf8iAmsBpAIoCNgIEAEgAMr+1Pz6AP0BbQKyBdYEhwHsAIr9D/ir/M4EiAPpAacHAwtyCF4FcQXRB8UEcgTPCN4HVQWH/6n/eQES+rD3yP0T/8P/WACo/Ov78/0M/x77+/c9+tf+3gD2/Pf6Zv7m/of6gfmG9zr5cPzf+4L8Vfz9/sQAJgHa/mP+bQPsAU3+I/6T/cj9hgC5/rX6i//qBJECyAG8AggHOgjYA2sELAXaA+IDWATkACAAPwMCAUL/MQEE/5IAngl8BQcCowUuAgEA6P58+dT6pgAm/fL/ZAUUATQCxwXzANL+FQSkA54ASALmAOP9XP3A+8H6Tfys/Gf7VPzA/MH8p/0l+9j8FP4O+6P6MvjO+j7+Nf0c/24Bwv9Z/pcAvv55+k38gwGAALz9mgC0/4r/jv9u/dj/MwPrAC7/qwCpAFQC7AExAtwESAVmBcwDjgHPAhcEcQJFAXoBQwLlBHwEJAL+APAACQOS/479cf0N/ksBXAJO/2z9OQB6AK3/lP6g/iUB7AJDALD8Pf8qATn+K/9ZAjYCOAFNAWICIAFyAREDOAImAWsBRgIlAQT/bP5r/gL9wvys+0f4JPqY/FP8/P+iBCUG1AQ9A/YBwP1Y+lH6J/h496P7kfua+GD5ivsk/Jb7LvzC/UT+av3z/un+bf1a/tf+S//p/Ur+kgGTAhYDjQV0BZkGwwpfC5QK0wpeCyULaQj3A4cDMwaIBbQB7wIwBQAFcQUYA10BIASnBk8GEgfpBSMFSgRx/535avVo8ofv6e7X7Q3u1/Ju9pD12/Z9+CH4PPgB9hL0aPMc8z/yzvAZ8m7yyfKO9TD3oPmD/KkAXQe5CzYOUBHHFIsYZRriGnUapBoMGx8Xlw/6Ci8I8wMgA1sBdwAnBTwHbAWLA6ACNgGyAqoCSQDFAZUEWwVnAAb+hQDX/1v92Prt9zX4jPZz71rsM+277AzuBfGg8+TzNfFI727rQujA5TPkoOd+6yLvK/X5/XkD+AaMDUATmxV8FbAWMxiRF0sU+RBvDEkElf6G+bHzv/JW9P33kQCbBbgHwg14ESARrRG2El8UvhdTFUAQ5hD/DbsGwwGw/0EBgAA4/iT/w/72/Yr90PlY9hX1mPL88MHxBPOJ81r0FPPs75Du0us36lHl4+Dp5wjwwPLP+XoIyg9XD3QQMxC4DkwHq/+yAeABKP7N/uEB/AAl/kz81fzI/zj+Xf8KCFoOOxCPEkAYnRoCFwUTORJKEesLFAhQB8IG8AOZALb/6v5D/5n+/P2OAPb/yv64ASQC7v5c+1v51Pf98p3sOedJ4+3g69tc2Q3eUuBf4lXr8fOd+TYAIgguEJgUgxSUFfUX/xVEEDkMigw8COb/LP2S/Gb5LPXF9kH8vv9bAbMF1QuLDgIQHRFVE5EVWBWZFbIVeRR8EOoLOAgCBG0BHv6J+/j6evnZ9574m/rn+u76Qvx+/dj7wvlP9hXveOdm4ObbUtnp1krXOttd4iPpEe9z9kz9+gBwBpYOLhF8EpUYTR5mHhIasRWFEgsNmQOa+7f3B/eX9//5Yv9rBLQK3Q9lEDUSqxKyD04ODg0gDVgP/w5XELUSVRH0DhQNSApJBUABS/2K+hf5V/eV9P3zAfhg+ov5cfeK98f4dfQo7ern/eN+39PXwdLu0xbXTNsk4UXpBvRd/YoF7Aw+EyQc4iANIqIk5CIBHt0W2A2kBp3/Vvl69KzxPvJE9O/2svojAPgFkQraDLEPQRNeE9YQfQ92E5QTRQ8pD/IOkQ0HCo4GxQd2Bg0BnP47/bj8g/pL9/v46Pqk+cj43fit+Ir3MPRs8jPwtOso5U7dcNcH0hrNVcsy0DLZ/+TP8xABVQ6kG5YlXCvELmUtQCiHIyccjRFqB4AA+/kg9DDw1u3z7gnyPfZ1+9wAsAVhCGIKVAuOCdcH6gg5C/sL/AtFDhYSaxPLEpUT3BVAFTcSiw/ZDqkN7wfzAtz/7vqu9Fzwoe3L6tTpjOk/6XboIeew5cfik9+A3BXc797c4EvkP+uv8m76/QBRB/oOxhW3HDUf5B43ITcg1xt8FjYPFwnAA7P8bfXK7/zt7u3K7drvVvEX9Gz6B/3M/MMARwYvCiENgxBIFcsYohruHJAdGh3hHOkZExYwEisNDQoaBkYAwfzY+av0Qu6467LskuvU6gftl+3M6yrpmOVd4SXd9Nu52+Ta4tzP4Q/pfvED+LsAOA5kGDgehyOAJ6spbicAI/AdTRamDc4C0/j+8QftA+vQ6uDrLO+e8r/1aPgs+v/9zQFQA2wFrwjtC6EOfRAGE9wVrhdYGMAXNRdTF98WOBVSEzkR2A7CDG8InQKx/oj6wvQ57yzr6+e44w3gR94/3D/at9nn2WbaL9tD3gzkWOp68AT35P10BWgMvxI7GSof5SPeJr4nNyYLIh4bChN2CRoA6viw80Tw4O317NPtOO7h7bXvV/KA9IX2Ofnf/FsADQR5CI0MCRAzE74Vlxe0GW0d9yCQIRkhMCG0HrsYshHICvYCvfq/88zth+lE5p/ja+LI4f3gJ+DC313fg90c3EHcRtwU3bDfrOSf68Ly0vo4BG0O1hdjH64l8isCL8gtByo/JNMcjRJQCLj/7viH9AnxfO707U7u9O2a7avtsO2v7tXwM/Mq9lL7AwL+BisLThBHFeYYLBziHhUh6yOaJfUk8CPHIfEcYhcPEC4Hc/+k+F3yB+x75xjlUOLx3xne6dud2QzY7NcK1xzXgtpb3qPh8uXz6/vyA/rdALkIYBKiG8QiGyg9K4MrXylDJMEcaxWgDkwHswDh/Ab7+/gR9ijyXu4H7Gfq/+gW6RDrCO6J8a31Bvqr/lgEOwoSDzUU+hndHg4jwyZDKUwpJCdUI5sdWhWBDE0FS/7097X0o/M78nfwVu676+voXOYf45rfb96f3gbdp9o12sHcseDn5KDqnPIL/IcEqwueE08aYh1CHnYd8hp2F/YTSRABDRgL5wnIB3oFWQSdAjn/m/t9+Ij1rPLm713uP+5K7pLuivCz85n26/k9/0wFOgpyD7MVZhvHH+oiQiUPJp8kkSFYHWUYLRMbDfQGBAK//aH5Z/a/85fw9ez26Mfk4eDo3RPbDNgG1yjY4tnJ3KzhcOaO6g/w7PYl/akCewjoDSMRMhNeFsgZ0xq0Gr4bvhzuG3caGxlyFpESSA5KCDsAaPgA8lbseufh47ni5uPM5ZToP+118ub38f3nA4sK4xGxF88bdh/0IfUhAyEdIbkg8h0jGrMXhxXvEU0OdArQBS4B2/sx9abuiui44SzbZdbO0rvQWtHI02nW89p24fDmCuul78Lzkfa3+b79PAIrB/kLoBAYFmQbnB62IGsiXSJIINMcKRiBEhEM/QRt/Qr2R+/a6fzlAOQW5Ojl5eh37JjwdPVf+uD+rwMGCd4NHxITFpUZkhySHzQiwCNCJPQj8iFYHkoaAxbYEDILqgU3ABv7L/aU8Urt/uhL5b3iH+Du3CnbYtsd3FPcEN1k3xPjZ+ZA6bjso/F79wb9qgE7B40O5hSIGBsbzx1hH+QeixwXGVoWfhMNDoAHyAKv/cL2I/Ef7q3rIOlY50jnuelx7W/wovMX+OH8gQGIBo8LiBC3FV4ash38H54hYiIWImIg8hxGGC0T/w0DCfEEmgGM/uT7qvkL9+rzvPDP7VTqh+bS40DiWOAp3urcm9yV3DvdPd8Y477oju9F9vX8WwSFCx8RghVCGSwc1h2PHq8e+B3cGz0YVRPzDeMH6gDZ+cjzge7t6XjmmeQt5GHlFuhI7K7xdfdV/UYDMglKDgoS/RRIF50YNxlqGWQZvRgiFzgVyhNsEiMRNhBHDxEOZAxxCrAHbAPX/Rb41PGh6vLjUd+0263Ys9fF2I7ZKdoF3SniXeeG7H/yE/kp/4AEmwm4DkETYxYAGE4ZbBpDGqUYaRa6ExgQYQvuBTcAyvrO9XTx7+1p6xDqvekU6s3r0+6J8nz28/pIAPwFjgrXDdEQVBOnFA4VDxW7FBUUQxNmEqURYRFUEcYQrQ9qDhkNEAsDCDkEDgAy+2v1Ye8Y6srkOt8m23HZ0dia2CbZeNv233zl1OoK8aP4YQBlB54NwxLzFj0awRstGwoamhj8FcoS/A+pDDIIjgNj/7P7zfhF9vjzT/KG8dXwkPAu8c3ymfTp9nX6XP6XAXoEugc6CogL6QzMDk8QPRGaErkU1RaOGN8ZZRorGiMZahaWEfULWwb1/y/4efDa6SLkrt4c2mnXU9fn2JzaGN0q4nroee2Z8cr2j/yYAbMFXwnTDP4PtRE6EmwTphWyFhsWrRW8FVQUnBDIC3sGFAHM+4n2+/Hh7iLtGOzm67jsue7f8Vz1fvjV+7r/KgOEBXEHMQn+CjINcA81EfIS7xT2FrwYyRndGWEZQRjFFckR7wwPB/n/Tfjx8NHpLOPw3Unaytcw12/YX9rg3OHguOXA6hvw0PVE+5cA1gVrCosOnhKtFZIXMxm3GgIbIhqBGCkWCBOOD38L6wZgAkv+m/pQ97D0+vJa8nvynvLb8tTzpPXK9wr6efwe/94BjAQhB94J1wwuELYT0hY3GWQbSR0bHmAdUBsTGJoT4Q0nB9z/pfhp8WTqHuQ434nbBNlh1/TWFdiB2hHdwN8741fnZes+74/zIvnP/zoGNQu8D8MUAhnuG9cd3R6VHq8c/xg/FDwP5wlHBK/+6PkH9hzzIPEx8EHwMPFi8o3zKvVv9/n5kvwh//UBIQWOCAkMfw/REuYVeBh+Gjocxx3IHrAeRx1uGl0WjhFmDNIGygCs+pv0z+4f6e/j/99e3THbTdmV2DvZFdrp2rfcIODH5ADqMO/i9Jv7sAK8CAwOVRPuFwwbvhxoHfocEBvLF6UTIg9pClkF6/+/+lL2mPKR77PtEe0u7Y3txO5K8Zr05fdC+wL/JwM8BzALWw/rEyEYdBsSHgcgNSFhIYUgoB4DHMYY1BRLEMYLcwfbAgX+fPmj9TbyF+8X7E3pbuYn42jf2Nvv2MbWW9VE1fbWetrQ37bmTu4f9ib+FQZCDY8TpxhCHIAeGB/CHfEagReyEy8PDAq/BPr/xvur9y7zPu+47OvqMOmY6KDppOse7n7xDPZL+7kA6QUHC0EQAxX+GCgcqB5pIEshUyHcIEYgaR/LHW0boRiKFdYRVA2oCE4EIACS+5X2x/EY7TPoHOOG3rna7Ndr1ufV7tVm13DaC95t4sfot/Bf+PH+JAUIC2MQ+BRZGDsachshHD8b3RjnFTwSqg1hCNkCeP1u+E7zE+7S6Qjn9eSe49TjB+aS6eDtsPLZ97f9HQS6CUwOpRI0F1IbZB6UIFUiqyMEJFwjSCIKIVofvBw+GQkVuxClDCsI0AJN/Uz4gPOB7sfpoeUs4mrfTt2t21TbJtwf3VfexuBk5GTos+xH8SL2d/v7AKkFZwlHDTARKxTdFeMWIxeEFicV6hKSD6kLMAfhAfX7YPaL8YDtHeqN56flaOWF5pvovOsR8Er1S/ok/y4EqAnxDi0TfRbmGScdDSAjIqIjeiRrJCoj7CBuHrYbjxhmFIoPcQpzBW8Ahvu49kTyI+5g6jPnAOW348jin+Ge4B7gKOC84KzhuuIb5Z3pT+8N9Yb6p/+4BO4JZQ6eEU4UrxYIGN0XsRZSFNgQ3gydCBQEz//Q+yD3ZvK77pzrseiH5oPldeVp5q3oNOzT8E72rPu+AGkG9QxOE4YY5hwJId4kvydAKbEpUSkSKLslZyJqHvAZ1RQqD1sJ4QO1/q75CPXe8O7sc+lt5qXjJuGJ38reqt7N3tHezd7a35Tis+bu64jxmvZN+1wAPgVsCVgNpxC9EhoUWhXQFT0VMxREEucO8grcBlICPv0M+HnyHO2w6BvlOuLA4LLg1uHh5MLpCO9b9Ij6gwF2CKQOHBRpGZkeECM7JkQooykFKg8pACdaJG4h9h2tGZAURA/sCWgEm/4R+T70BvAg7LTo/uUH5JDiduGd4GjgDOEt4hDjBuSM5ZXoMu0m8qz2IvvH//YD/Ac2DCMQKBNpFcQWvBbfFUkUNxHsDKEIYQSI/1f6dPWn8Djsg+jR5RrkfeMD5Izl3+dO6w3wQfVu+t3/5QUwDB0SWRcVHN8gQyViKOwpdipLKiEp3ya7I+IfNhuDFS4PIQm4A13+6/jJ85Pve+zm6THnBuXl4yHjWuL94bXhPOH74Hbh+eLh5QXqS+4+8uX2JvzjAN4E9ggCDUMQVxKJE9YTdxN1EocQqQ21CgEIvQStAJv8vvh49BjwFuy76CrmkOQ65FvlFeit6+zvE/Uy++sB1giND+8V5RtEIXYlMiiUKfkpWCm9JyMl9SFLHiMaWhViEGoLdwZzATL8Rvfz8jXv/+s26bzmCeW145Ti2+Hp4dLi+OMC5ZXmX+n67MnwJfRB9636af66AcAEuwd1CocMHg5eD4YQPRHHEE4PYw1MC6oIDwWzAPD7B/cI8n/trenP5u3kB+R95I/mB+pG7hvzefhy/sIEAgv4EGsWaxu8H0Mj9CXDJ5QoSigEJ/okMSKPHigaXxUjEJYKzwTf/gb5ofPb7vTqDugb5tDkI+QZ5K7kleVj5svmA+eG58zoyeoY7XnvDPL89ED4vvvN/0MEjQgZDAcPSRHCEnATaRN4Et4Q6Q5SDNwIuwRSAKX7sva98UztmumH5jfkUOMY5C/mKen17MzxuPcw/nsExQpBEakXVx0bIuElwSiEKhAreioWKdwmgCPsHnMZehNADRMHMAG++9D2e/Lv7nHs2uql6bLoGejO53Tn9uZm5vbl1+Xz5XPmIujl6ujt3PBn9H740vwzASYFmQjXC64OpBDMEaAS6hJwEk0RjQ8WDQkKOAZ6AUr8Kvf68cvsQujV5JjineH+4aLjrOYY62zwbvY//aIECAwBE1MZ3h7KI8wnVipLK/cq2yn6JxslNiGyHAQY9hIaDf4GNQHz+/n2J/LM7ZTqqOhg5yrmPuXT5LLkqeTm5ILlZuac5xDp+erw7ZvxGvVH+NL7xf+0A1cHoQrEDbIQ4RIkFNMUMRXNFE4T9RArDtoKjwYBAdr6/PTH79PqN+aw4ubg+eBQ4qHkOOg+7RXzJvlo/x8GRA0rFDcaPh+TI0In8ylLK0crPCpLKHMlgSGjHHMXwhFqC70EUf6F+Fjzfe4n6vrmP+VZ5MXjo+MX5NDkoOVm5j3nOOg46Rzqeuse7oTxwfS49xf7Rv+5A/sHtAsPD04SYhRLFVEV8hT4EwAS4Q4cC+gG+QFM/Dv2qPDy6/XngeT14c/gVOEN4+fl6+kU7xj1ivs2AgYJ3w9tFk0cXCF+JeAobivPLMoslytkKSwm9iHFHM8WeRAWCqwDff3r9wXzlu6u6qrn3OUS5aXkTORL5LjkMeWF5dzlVeZn55vp8OzE8Hn0A/i5+97/FwQQCOILyw9gE6wVXhYCFiUVyhOIEW8O/wqEB3YDVf6g+BDzLu7Z6Q3mOOOx4XfhHeKV4yTm6emk7hP0Dvq0ANkHAw/RFRMcnyFUJgsqoyz9LcctEywCKQQlLyCmGnQU4g1QB8wAfPpq9PPuR+qC5qLj1OED4Q/haOHV4aLi9+Or5ZTnj+l068/tC/Hm9J742Pva/hACmwX4CPoL6g7UER8UJRXvFPkTqBLFEA8O1gqHBzgEQABv+0D2c/FW7cLp/+Zi5SnlDuaI57Xp2uwF8Qb2v/sWAucI1g99FnEciCGoJeQoHis0LBEsgSq7J5AjYB57GPkRMwtrBOn9xvc98mXtSukE5qTjROLf4SziuuI849rjtOSi5bXmIOgZ6qbs+O/j89734vsDAPIDxQecCyYPMhJ1FMEV1xXvFGETNhF6DlML9QdXBGUAG/yI9wrz4O4k6/vn1+Xm5DHlgua76PrrN/BI9dj61gBFB80N9BNeGfIdkCFNJAcmliYuJvQk5iLsHxwcfxf1EfcL0AW2/zX6dPUh8ULtSOoT6IDmveXo5YXmVee96HHqu+sJ7V/uc+/Z8Z72g/uk/n0BZwTBBiwJowsZDdsOtxEaE7MRhQ9vDT0KtQYeBK0Btv9s/rr7Iffq8ijwZe2J6lXpOeoY7KTuSfHY8p/0Ofis++/+kAOYCBMNvxGAFc0XHhqSHH4dtx1EHkYd4homGNsUgBDVC4UH3ALk/g38C/jo80zxdO7w69zq4OpZ657sSu4j7u7t8+0r7eTsCe478ZL1rvl4/N7+eAHWA8YGWQroDeoRDRTTE/sSuw8CDFkJ3AWuAl4AYP3V+Wb2avIT73rskOqx6RPq0OyK7rLvDvRC91z6cv56ASgFtwkxDrYRyhV5GT0cCh2nHRMe8BsEGhAYnhW8Ei4R3Q7tCeYFBQNw/4b6ZffI9vf0R/OV8pLw1O/U8G3ywfJ180b1kvKv7pvsAOnz5ZLobe+980D1zfl8/isAnAA7BF8KwA49EXERMBB1DsALZwdXAx4B1f8c/Dj3CvON8J7wHvC07zfwcvN+9tX1pffa/LABvQWGCNALQw9+EVkT4RMsFPkUVBSoEqsOQgrRB/EFQwTCAmcCMAReBFcDSwSMBAYFfwXuA8sB9//1/uD9mPp493z2aPWr8kPvSetk5zjlZuLg38Lh++hz72P1yv7jB6gOjBVyGsIdaSEEIhEg2RugFn4PYQUk/MDyR+mM4uHclNjP14PaGt6z4BTm7O0q9c38DwXHDXsWqhwGIGwh7CBAH8IbZhfBEhAOzgkwBcEAT/3m+w394f5qAY0F2AkRDZIO+w4mDlkMlAlUBpsBifv89enwYeoA4w7fstwk2SrXENnL2j/ax99C7QD64AVjF9cn7S/rNVw5XDaoMcMs7yKNFf4HGPmV6OHYOc0PxoTDNMVbypLRytoH5v7wnvyNCeIVcR+5JM0nLyoRJ2cfCBl9EzYNJwhkA+X9/vuG+yL46/WM+aD/ywIzBRcKcw8SEvYRNhGWD/QLqAXZ/dX1rOx24/zbXtTHzOzI2cl9ymHJ+tHO50b9Uw8oJCs3NkTLTOxO6kkvRUNAqDNwIqoRqf7c6IPW5sdVvKe5AL3mwJ/J8tiG5xTzoADKDwkclSNJKMsrZiseJo4d/RPOC10E8/zN9vXyWfCK7u7uY/Fy9eb7ggPDCYMQYRaDGN0ZcBqWFpwO4wbv/S7we+HR1ETI47vNtPSzWbRgt77E9tyt+LcSJikjPuhPEVYEVBxTAFHAR4U4MSbuEBn6sePGzIi9eLx9w8nMbdjz5wb6BgndEYAYyR57IxMj2BvLFAgQ2Add/IHzdu/Q7mzw2fFu9Kf6+QCjBLMHkQyYERcVyxamFw0X8xRsEDoJnAO7/o/3tu+05hXbTc/Xw3u4NLFFsYa1r7oyyDzii/6IFXYpKD2US1BPCE5dTKlIQUBkMSQdrwZM8KfZJcbIvHfAHs4S3/bw+wQOGagnCS7uLr0uOi3hJKEWTgoUAHP1gOm74dLf6OKX6gXwJPehArwLDhGTFt8b8R9JIKUcCBieEQIJ7P8i+Wz2v/S67g/omuFc1y7MwcAttcexi7ePvV3GldxY+/YSYCPyNBRD4kW/QMo8LzkxMZIkVRK8+zXnvdSCwdK2fLsry1PgxfceD3ElazdeP6g+hDs5NakpTBpjCbn69+5E49/ah9kE4MvrtPUW/ocKMxZIG2kdSR9YIf0gWBvtEpcLewUT/xD4q/JS8Ovt5OiQ4V7XIM2WxMG9Jb0XwfDFXNK97P0L1yIKMQA+WUQSP042PCxWIl8aAg+u/JPmstSKx1S79rZSwHTUCOxrAjUXuiuwPDJE+0HfO9A1Kyr7Fm0EqvNx5ZPbvtO50oXbG+qi+AgEoxFtHkQjcyQeJoYkbh+uGGQPpwcVAsP8+vYk8RTvb+z45WLeYtVfzArH1sbvx2DIoNJT7FMHexpqKrg6PEYURY489jPSKcwcCA0f+ZfkNtR1xDi2pLGTubrLPOGs9b0LriLnNOY8Lz1+PHw58S4tHcsKs/sP7WfeJNTk0t7ZMORQ7vL48QbJEqgYvBo2HYke8RoCFt4P9gmNB2kFIwGi/Z/7QPeo7snkFdqxzxTJwsQUwT7C/9Af7ZcINx1IMaJD6UlMRTw//TexL0wlrRRc/zvpw9M+wXi0krGJuxnOG+LM9J4GzxfbJmQvLDNuNkY2qS/NI6AVEQQV89vmAt5E2xLf2ubH8Nr52QMgDPkNZQ5VELgPpwwFCZ4FYwR+A+8BMQLMAs8AI/tk8UPkWNUPytrDDb58vYzNuevVBbQW7imVPNFDt0LsPhI8FzuyM1Ug4wcZ8fHc/sp4vOS4McS91EnlXPR+ALEOVx2vJA8m/ClqLOommBwsEKIE9/uj8kLs1Ono61b0nPoM/UYC5wjrCxIMvQkeCHgFsf2W9WXxX/FE9e35zP1MAGf+Tvgv7e/c7s8YzBPLlcjGzunjWfyjC84WhiYJNl09WDz6OXI4PzJWJO8QqvqU5xbZMco4wp7Is9kh7dP8NgkPFs0h6CRqH4sbvhu2GCYPsASw/iv7gPiy9uP23fylBhQLRAwSDwISjRPgEGoN+ArxBqX/tfU57Mfme+d66hTtQPHK9Hnzi+2k47LZetUF2Enb2dtY5QH6XwoPEmIckimCLqUu5S4uK54jyxoaDuf8oetA3NzQhcuEzQnY/ugl+wILdRcgITAnrycAJEMfAhgXDf0C/vqq8obtZe7R8lf6GAP2C0YSkxZSGzQdaxrgFjsUsw4qBkj7BfBL6X3leOIv4dXhouQN5t7j1OD33jPe5uEU5SniQufz+zoNRRVMIQguPzNaMG0omSDEGRsQcAWi+fzryuAc1ozLxMczziHePvThBwQXrCWTL1svoCrLJFAbWBAoB2z+O/WJ7bnqBew/7mn14QDFChMUmxwxILIe8RlRFAEQrgrdAaH65/Vc8WTs/OS53fXZAtks2R7Zmtis3bjm7ejN5cjsXf+jEPUdoCwQO6JAPTtfMJUjmxRHCOf/UPXP6CTfXNXGyoPGwssu2n/tEwB0Eo4jbCytLLQpHyWWHfgTKQtwBa/+x/XX7+TtQu8j80b51ABPCWoQdRRrFUIUiRJ+D6sIQAHg/MX50vbR8YHq1uRc4J7aGNX10O/S3NuU4SThjOd1+MkHFhESHIQsVjkMPHc4aTHwJNwXpQxo/oTuceGR15rO2Mj2ySbVPed0+CsHPRaSItom0yb5JH4hlRsQFDIN5QVp/I/0NPG375nxy/e//gIG3A0LE6MTbRDqDG4KHgboACT+hP0k+wr2fe6m5qniJt9r2I7S0dJM2E7cP9zB4hP2+wkVFgUi7DCPOtw6OjWSLOghDxg2DjwAF+9b4YfYCND1yrPQPuFg9EIDbg5uGWsidCPCHQsa1hkcFtINZQYsAKT57PR98q/yFvc6/8AFqAlLDooSwxTkEmwPuA2fCioEF/yt9Z3x0+2i6BHkrOFH34DZxtAAzZXSq9kq3YDmW/q5DrIbvySwLWA0ZzRiLhAniB7DFLAKK/0j7OnehtagzjTL19GG4bf0SQUIEggfIirWLK8oeyRhIdkZbQ1uAZL4wO826Azm1OhO8Fr7iAU8DNARnhc/G6AZQxZjFMQQxQo8A4L6ofL/7KPnXeMi4lff7diw0sTOQc8Y1IvZrOLj9QkMExrRI4AvBjpCPb83my4QJW8Ziwqm+PblDNh0z2nIccbOzWXcD+/xAE4OqxsDKm0x5i9tLDoqwyO0FkkIOfzu8eDoguNu44foGPEv+u0C4guwE5oZDBtgGDEWbROiDfsGFAHD+l31EvCV6Xbjp9wL1X3Pss1/zyPTKtdS32/v5gDhDNwZVSxMOzVAUT7POL0upCBUEJL9i+mP2YrPzcd2xE7Lqdrj61n7/AjAFu8j7CouK2Qr3iuZJiAcvRCFBGv37OsK5L3ggOKT6Yf0b/8kCbYRXBa8FRYUzxLeD0sMIQcZApz+UvnA8QXryuUi4bHdg9ni1hjZpthP0z/X4ecI9/cDdxUQJtYx+jnGPHA4WC+BJF0ZUgpU9y7mNNmWzgTJPMxo2Mzo1vZBBJIT6R+fJYYpKCyjKuwjjBpEELcE0/hf7hDo6+Xl53jubfa4/f8HxBJtGHEY+RbnFA0PGAduALj7jPdN8zPtueWG35XaYNYV09jVTt8d5jjpZPDU+1UF5wspFEEfYij7LF4u8Cr8Ie8WUAvV/N7sgOBP2HzSq9EX2MLkHvREAngP9xyzJcUngygZJ9QgNRdhDT0Ef/pk8KDopuU06HbvS/c6/hIHVxE7F0EX8hRgFIkTgA6yBrr+WPlm9DrtE+W23sLc+N3c3aHavdsY5DHpsujH7qb9KQtLFhghnSmVMHEyFS2qJJcabw+6BcX5cuuz4NTY9tGfz0HVROMy8yUBBxGtIU0rky0ELdcolSBoFY8K1ABT98ftr+a+5Hjnre0m9UL9Swa5D4IV2BbzFgUXTRS0DHEDuPuX9TTvzOfO4dffV+A14JfeMNwv3hHjQOR95hfykgOhETMcQCZALWcvyi2hKdoiuRnqDpsDjPZp5xnae9Eyz9LSl9qk5xL3CQUhE4of1yVWKowujyz8JBobqBAtBXr4Ve1J5iLkZ+dr7VDzIvv/A5MLUxALE/sUkBVDExsMlQL8+5j2ve+V6krni+Up5HTga96u4NrjSOLb4AXq3fYdAc8M/BasHmYnUi15LCooXSQpHyUTjQOb9grrduCG2X7XbNuN45zsfPUP/yEJ+RHpGLUfTSQMI0AgDBwJExIHhfuy86Lv0u0A73/ytvYM/CAByARTBzILYBDJEF0MIwgMAwn7XfNR7qvqn+cv5sPlGeSc4mzi5OHF4WfmbfLNAJAKIhBrFlQbUh32IMokLSWnIsMdwBNvBBv1Huqn42XhPuIF5yXuVfUM/AQCBwjdD68YiB7oH7McEBlRFMcK1P7+9mjzVvD37X3tEvFX95/+bQUcDBoSwBQgFOEQ9AqRBSoBYPvx883q8uOZ34Pbv9vU4P3k3uWu5+TtQ/iaA7EL/hBQFYMXfBZWFQsXJhlSGjEYDxH3B+b9JvNt6l7nm+mD7b7xDPWc+On9agMFCN0OKRe/G0wc6hmLFSkOowQd/FD2FPJm7pXtnO7e8K/1FfzTAfcHMg6RETUSmBD6DWcLIwd/AEL4RO785TPhP9/B4FXjuOOx4dzh1eiA9CsArQuhFakaSR1EH9seOB2NGssXlROiCmD/8vRC7DXnwuce7IHxw/YK+y7/fAJlBoUMiBNTGV4czBt8FgwP6wY9/xL6Bvdt9Lzx1O+e7lruEvHt9t/8oQNbCiYOaA8QDqcMXgsqBtT/gvrp833tReh45M7ji+Sb467hS+TL7i78lAWlCfwLaxHHGTIfgiCJIU4hQhzbEqwGhPlx7sjnCufD6h3wkPYP/ZYA5gLhBdMJtg70E4wZXx3/GeIPEQdL/zv4GPSX8LbuCu9Z8CrzHfVW9q37IQMvCJ8LyQ6LD3gN/wkQBUz++fWP7r7okuOG4H/iqumK70bwD/Jr+NL8RP7DA0kKkwx0DrgSbRe7F18WNhbaEroLNgJt937ts+fe6MvuWfW6+xYD2Al+DPIKJguRD6oTWxUpFWoQcAfj/6n7o/jv9ZLzDvPw8+3z6POH9kH83gLrCA4OCxH7EeIRyg7VBw4AN/lh8jbr9+Q44Jrd6N2m4G7lf+wr+GUJwxXPFmAW+hYQFbMTtBOnEwcSwAvYAlL9/fmP9QrzUfMz9Un4Evtr/oYBfAWgCUUMMwsPCacJqArSCTgHvQSuAsIBEf0S+Ev3ePfD9rv0L/Uc+EH7CQAnBAwGWQlfCxMKdQbrAiwC//+V+ELxVuyD5+Tl+OR841Dl6Olm8oj9fwhGEZwXZRptFncRVBAcEgwUaw+7BFv8Q/mu+Gv3qfUW9gT7SAEsA0ECgwOVB54I7gLy/l0DAgdSBaMDVwKRARsDcQO3ACj+KwCJBDQDx/wn+DX5nPop+UD5qPvg/1cEeQTRAeQARgLKA8f/KPWa65fqPu4A8ALu5O3Z8938WwSJA3ECfwtHFrIWEg84DLoRiBPCCrgBXgA5A6gAqPmF93755fuq/skBIgU4BgoEOgIeARYBMwMeA/z/DPyi+0v+2v69/YX+RAEPBEcFKATTAgUBbQDGAPj/Yv7W/Gf8oPub+sf5fPmg+sH6r/j89FvwKO3b67/uC/b4+if6bfxWB0IToBUsEJARRhbXD7kFRAT/BXoCnPph9er1CfSy8cX5OQPSB9gK6wwpEHsPSQvsCRUHPAG5ABwC5fte9Lv09vZF8gjuu/RkACgGdAb0CYYLIweOBWQEkAJ9/x/7OvvO++j88P4/+0r0GvBK7Rvp8eeN75L4zvgX9d76KQcfDWoOPQ6sC/YN4hQYFssO5QTh/23+SPmx8nnxhPbi+2v+nP+lAj4ItgpXC4YM7g3HEBAUrBItCk8DIgKS/JvvheeK6Y3vGfLK8XT3jQPpCfEHCgYNCDUMNw2QC30Gkf2M97v30vgO9xrzvfBG86v11vXz9cD0SPI68/T4Jf5yAekFpgujD8oOKA6tE0QVZw0wBicEiQMw/Onxd/H39hf3jPMs9kX+uAY+DecOMA5CDksQCBEdCwIAjvoD/Tn+g/jl8Czw5/JO9NLx9PL5/TcJWQyPCBAGdgbyB5oI5QKK+qb4SPgi9uLz+vKV91n6ifZ083jzx/fEAiUL+AejAlYEHgfuBawEtwQtBigFcQKgAYj8EPmZANsFP/5d+goEigoxBtUCpQYlCigHuAPDAygFAQfTCJ0HuwHh/UP/d/wP9SDzGPa9+Rj5yfcl/koFfgaRBuQFgAAv/esAYQN/+yDzhfQd99D06PH79cH+UQCp+pL5BAB6CaUNEAdJ//cB3wpPC1oCdPxj/nIEZgaqAQX+HAFJBX4D2v2H+R77WAJEBM/+//xbARUHiQZWAXQAsQHe/+f90wEdBw8FSP4W+eL1hfWj+LP7d/rI+SIAowYFBj8Agv5KBPMFAf9A+1H+gwMkAsX5xfZ2+JT3z/YI+P/52/++BhkH6QLoArkIYA/JDsoHmwdiDCgKKATBAMP/kvzG9g70C/I+8lD2qvdG99P5mAAaBkEGvQdpDMgPUhEUELkNawmKA3cB3/ti8SvrxOds6LPr3e2v80j4MvkcAJYH3QnTDbYSFxRnDcIDCQJLBAgB0vn49qr2Wva/94r3b/gt/ZEBEwSkBdMHIwxVES0TWA/qCaEIaQc9BI4DsP4p9rfzbvRM96z4mfTB8dz0KPtV/88BBwUwCh0RIRSUDrQGaANxAxgBJPqJ8iLvje4c7bjoDOQ+6qH27vqM+TL7WQH2CZ4OlA2ODJANYw7VCuUECQGbAEgE3QU7/2T4svlSADAD7AELBJ4H1wk+C4YLIgueCPoEXwPy/xH7tPui/e/6iPXv8aXyKPVx+AL9+f5E/wUEvgZcBAQGQwmLB6ADLf/J/Iz6XfZv9JTy6u+t7BnqKu7m9mr8SP9PA1EIYgunDOAMqwyHDg0Qwg4gDGkLNApDBnwC5v7A/DD8BPsA/JUAMAbhCC0IBQZOBLkEWgXQAkv/8P2e/av8x/j39NjzhfN687zzO/QU9rP7fgE6AucAVAHrAcwAqv1Y/G/+XP3H9271A/Zh9Anz3/Fu8nP3GPuO+53+kwR8CdoMJg/REQ0VghXwE+ARtw1rCuwHvwTvAsUAp/5N/L/5DPrH/MsBjwYBCFgIBAiqBVADuQOFBGsCuf2p+Kv31/nF+Pf0w/J18kT0VffT9472lPdo+DX5+f1KAWb/jPxD+qH5APpD+jH7MPvX+eL4T/hu+Lr7ugJLCPcIwQm0Dj0UkhTEENoOgw9zDvgKqQVy/4D8Z/xN+2v6Gfth/egAmAMSBY4G9gkYDpAOQQt0CE8FiQHLAMn/u/vQ+C33Zvbi9gT03e/P8PbyrPPH9Kjzvu++7kDyUPSW8v/w6PSk+5P+lfxn+Tn8PwTaCHcJMQq0Cr0NOxPdFAkTwRDbDUoK1QVbA7oDDgNLAL79M/wR+uj4nPkV+zL9aQAwBfYIlwo+DZgQNhDLDAQKoggpCNcFuQBi/MT5z/e19mH0IPHl787u++147grw9/I782ntcuV75RHtGvKJ9JD5hwBsBZAGwwffC+URERgWGjsY7xdXFyYT6g27CLIDOv7o+Nb3Nvm89oj0RPhb/JL8lvzY/x0FsgcVCP4Kug7cD6sPbg81Dk4MtAkbBx8ELf/2+gT4UPQj8sfyqPQq9YTycu/t7fvr+eix59fooOcM4uzgjOp19eD4d/p3Ae4JyxFjGnwfDyEwIkQh4RvlFXER/g0mCRsCOft19vLzLvJ88EzvXPB98+z4/fxtAOQGQw79ET8SXRTNFxoZxRbOEdELnAZ1AQT9Hvry9WHytPEw8sDzF/aJ9xT34PMr8KrtJ+o358nmluWN4bzf9eZP88/6PvyU/7oIqhRvHsUkXiduJCIh2x4rGMMOjQdeAlL8RfUJ71br7Oyf7vbste0l8cL1S/2qBZQMpxJMFhMWzxWsGBgcWxvtFGgMTgYAAq78UvcR9JnxbO9v8JfyZ/IC8BXtgevC6vnpp+lH6efmueIf4mLqSvYC/h8EwAkeDCAQbxjiIKwlTyWHIUsc/hR0DBEGGwEb+2z1hfFE77XtB+3O7f7vJPR++Zv/gwiBEUMVcRR6EvMRjxO+FM0SUA/oC7cHCgN3/s/6C/m89wv3Affs9Qr0xvEi7nvpnOWl5E3m2Oa947be0d+O7aH8IwFFAXwGLw/dFdkaNB90IhAk1SG9F2QKiwTaAgT/dfwj+RX1p/bC9qfy4/LD9MP3Ff8xBMAFyAtnE2AVdBKyDcUKUgyVDqQM/wjzBSUD4//O+v/2efYb+DL6kvkP91P0OfBw6iblWeLZ4hLnKeiY4KTcG+fc9W/+5wNnCQoPBxaqHWcj6SR0IwAg2BeWC/UBxv9H/mD5A/QT8Lbw6vEu8Y/y9fX5+Zj/NQVGCJ8MahTkGYEYqxPXELQQaQ7gCaoG2gQ8A2P/dPpc9hD03vZ2+z/7xvf69ILyFe6R56/hTuAG43jinN3n3mPrUPtUBvQJ4gz8FOscIiGsJUAoySUwILYVzgY7/H74ivUf8ojuYuy07BDsTOsd78H3kQDaB2cPDRWOF7sa4B0PHFwV/w5cCw8GQwDX/Af7P/kc+Hv3hPYj90357/tX/KT6pPmz+Zf0VOz94z3deNtD2pzZQ+HX8Ej+kARsB6QNohXRGv0fcCbYKNwmYR/GD8YAivlj9k3zs+/E6zLqd+py6X7rAfQa/ywIpQ66EoQUIRaRGOIZ0xe2E4UP7AmJANv4Hfci+Mv5APxj/ij+Fvy1+hj7rf34/or9CPpV8gjnft2n17zS6c6204PmUf6BC+4NKxEqF2AbTyBMKQowGzB+KQQazQRT9dTvjO4z7fDqbOp368fpj+ae6EL03QPKD20Y4B5bIGwdgxpsFxEU0xGzDtsHcv/Z+ML0NPOo9KP5kP0E/rT8O/vY+lP6Kvo5+1r4tO704pTZ8tQB04fQt9UL6jUB6g2rET4TCBZXGxchRyZnK+ssbiZGFU3/2vHu7anr2+mC6dfqi+yF68fpPO6v90IC5gxAF88efSAcIMseIhrnEQMLcAd+BP7/E/uQ+Fb4Qfqk/Hv9Tv3S/X7+hf2Y+gP2ovIN8NHoxd4G2WXWUNLz0lzhOfeMBhINXhQdHhAhAR8wIZgnmCvHJ2sbeQv1/Q/zBesH5hDkN+eW67frUuqz7JnyRPu7A5ELHRUWG9cc3h/wIFEbCxUsEOkJTwOW/on7kPnG+cL5nfiW+Hn7YP8aAMH9SPsi+u320u5z5I3b5NXK0WzOVtRV6Hf+7AuCFW0f/iNZIo4hVSScJkslVB7tERUEYPhn753puObs5UvosuwY7k7sw+4e957+iATwDN4VYh20IkwjTB2jFWgQGgzfB7YCjf1O/J77Gvqa+mr4HvjG/DD92/k7+E/4hvjS9LDpr92Z16LVTtPZ0HPYtu2CAzkRrxrmIGki8yGBImAiZSFGH78ajhL+BEP3sexu5uji9OLV50HrW+wX8GT22/qP/hQGohIhHTchAiQvJCwe3BR0C6gDuQAY/+v89vwg/Oz47PWW9Nr25fyt/2r9+fvv+ij1X+s64Vrck95736DarNoZ6UT7uwWlDXMajiXKJ5glICWhJDMhHBwWFOcHofsg8d/mo95o2/zeT+bO7AXyufjb/UMCigiGD7YXGiCzJ1wqKyaVG3wQ2wfq//74mPRd84P0xfYh9jbyl/Fj9nb7Qf6o/9QA6QFR/Q/yLOiT4UbcRNZb0V/YeOsK+14GGhbwIU4joyI6JHIm7SbyImUbAxGrAwP1F+i53LHURdR32obiGOlk8PL4CADeBdwOGxvXJQcsgC7JLE8kaRk7DycFcPte9QbyXe527gXxhfB579zySPo9/+//XQABA1QDOftF7rTkLOGM3qHZLtdg3/rv/v3KCEAWWiFPJVcoqiy4LbQr6SZRHmsRy/+Y7ivh4tYx0WvTK9us4Xzm4+zF9fD9UgVbEZMhri3gM0A33zJMJtsXHQoe/s31evCQ7CLrM+tn6/PqKOvh7673Qv43AtME0waPBKD5zuxd5iLj8dya1T7YqenB/LcFiwzYGF4iUSTCJH4n/Cv0LgEqHRpHBnb2F+of3kjVc9Nd2AXguuPV5Qru8fjKAV8NYxu0JsQtCzIKMoErgCAWFcEKggAO99rvI+vZ6Fbpgeod6mLqI+/j9U76AP46AusD8P/19SXrEuQM3InSvdH337DzJgGQCqgWEyFdI3kiyST/KOArwSrhIRUSsgH38+rl0dgr06fVQ9s84CXiSeUt7l75+APZERIiKy6WNUY5UjVwKmwe+hIzCH7+HvdS8UHsO+mI6EPn2OZY65f0H/zi//EBKwOKAWz56O7G5irgRNoM2eTgZe+p+0oEAhCeG4UfYyDxJA4rCi4mLSAn3Bn9CCj40ejT24PTEtNE2Ovdo+Es5fnq+fKU++YGOhaoJFUu2DTMNg8vHyL8FWcJUvyR8w/vteq36IHpz+nD6RzsyfGc+f4AHgU+B9oGrQGY9/XriOHI1pTOdNHP4BDxnfzZCpcaxyLiJDgnPisiL4swbSwPItQTgARb9XPl1tfu0I3RGdcS3CHeKOMX7S336AE/EL0eBCvkNH45RDZSLAshfRbuCW38uvKs7CzoMuZ35pLlZeVT7IH3J/5UAaoF4gc/BZj8R+634krd49Wc0AvZd+nC93sFlRPZHdcjEynxLTYw/y9pLm8oQBqCBwL3iumM3EHTs9Gf1avZBtwE4FnnoO9r+fcIHRsfKR40xTxXPIMyMiaxGeoLgf/89jbxp+yd6EHm7+Tx4wTnlPCf+9wCMAeNCTUHvP668Yjla96l19TQL9R84+zyhP08CZkWdx5LInIo9i4IM7M1vDOzJwsVbwKe8UjiNtb50CDTP9eU2UvbxN+j5/XxxP9eELEfmSsvNcE5+DX6K5ghnBbWCHz7SfJz7ITohucP6Jfo2+oh8cv4Vv73ARsFCwdtA4f4WOtj4TXX58tDynLY2OuK+voI1BliJI8mFylKLd8v5DHJMespxhpaCbj4RelM3HbV69U02lXdMd5I4Ljlge3d914HPBlqJ90xFDhCNw0v4yK0FtILDwEu+Dnz1O7i6/HrdOrC6F/sOPWm/YQCsATtBOEAEfed6kbgVNri0xHRWNx77Uv2Uv5rDagYnRvIILsoli3RMZIzfy0+HxQP2f8g8eji5dhQ1//a4tzz2zjdq+Ga5x/wZP3FDW0d3yrTNfQ4EzHyJa4ceBIXBp388/cy9ZPzVvFj7a3pDusD8uv4Av4IA/EHZgeI/i/x5uQi24nRKssM0a3hJ/DV+vsJFhkaHiwfkSOaKMIsSzABLzUnrhzgD6j/Ku5r4L7Zc9lD26Dac9pg3knjxeeE8UQBWxBaHsMsDjYENRkuBibMHPwRJAc5/yP6T/ZN8xnwHuxz6z7xN/jg+p38AgFrAkz8QfKv6PHguNpL1cHUB92s6KzyUP7dCzgVZhqrIJYnnysCLu8tticuHkwTSAXr9pvsOeac4l3hRd8p3dfd/t/+4hXqZvZ9BAISox4SKU0s6Cj9I/kdbxXiDOkGJwLF/Yf6Y/YP8bDuGPFO9fj5p/5XAS0BYvzM8i/okt/E17PRu9JJ3FboJfLk+70GeA8oFUEavSBnJ10s/S6GLRAnCh2zENUCB/cs7wHpheRG4cndbNvK23Xd6OKC77r+9QveF0oj7ClBKX4kFB+3GPER0wtVBaH/1Pye+uf1HfKz88/4oP2rAIwCmAJr/8r2tep84bfa49PY0RbZaOSu7dj1CP8zB/4MPBMiHAQllSssL4Quqyh0Hw0VtAnM/7r4lPMa7nvnm+GI3cva1NkK3UzmuvOPALALYxZ8HsIh8iDWHcwZBhbhEZYMlgfMAycA9PoA9of0ePcu+w38yvsN/OH5WPKz51Xeetj+08PR39ew5OHuBfVw/M0D9Ac6DWkW3B9+J7ItCTD3KwUkEBq9DtIFRwAA/Of3p/Kl643kPN+U273bZeIK7qj6uwWqDqAWmxwqHXMa+xcMFq0TLxCSCwMH0gM2ART9JvqA+0//tgFBAqABAf9O+envGeah3kLY2tMc1nneVed77rz18f3MBCkKxBG6Gq8hqCcTKzUphSNWHSEWxA1XB5ICC/2r9h7wfej54MbbM9ox3ZXlOfBd+TMCjArfEKATQhMpEikSEhItEFsNyAolCTsHMATnAP//hgLABIsElgNIAr39tvUt7N/i2Nu31rPVf9sz5APrRfCx9b/6Hf/XBCUOTxndIvIoHSq0Jlchhhp6EhAMlQjZBR8Bs/nV8PLoLuOM3xbfH+TM7cb2m/28BAMLjA0iDZwMHA1ZDksPtA5bDQgNXgxTCRsFrAOnBQUIpAhaB+wEtABR+XzvTeav3hDZgNa01wrdWuT16cru5/Qw+v7+PgZcD+4Xex8+JAIlqSMcIeIbARasEhIQVQv8BDL+7/aZ71rphuW55eDpYO9L9MH5GgD3A4cEkQT1BdQHjAkRC4kLuQuRDGgMfgpCCV0K0AvdC/MKqAiCBAP+9/Qs69Xjv94N2j3XRNkU34/k8uim7dDyFfgF/lUFkQ3jFXwcFSDXIdkhHh/oGu8WJxTEENULagUl/mX3T/Hy6/zolOmO7K7wNvVq+VP81f1x/pj/sAEIBI8GQQm/C5ANWg5GDRUMIQ3aDm8OkQyuCg8IsQMA/W/0zOxS57rhbdyL2wjfaOI25Srpbewc7wP0E/vSAjwLAhTjG48hICQuI/ofJh3tGugXRxSmD0wJuAES+jPyZeyU6mbr7O1q8aH1N/kJ+8H7Zfx3/U3/WAJdBV8HggnKCzMM+wq9CkAMiw2mDWsNNQ0OC6IFRP6O93rxUurt4mfe0N063ybgkuFd5TDqdO458+354QG4CicTVhlTHYYgjSGLH5Qc6hkNFy4TIQ6XB5UAkfmm8kztXuvL68/sfu8G9Jf3ovgx+c76F/2q/wICVQThBjgJSgr9CRcK5guPDuMP0Q+7DysPoAuqBMX8vPUQ8KzqGeVN4aTgfOFz4kfkz+c77B/x8Pb9/bYFVw3YE24YQxxnH5Afah1/G+0ZxRblEeMLkwVf/xP5W/PZ7zrvnu+o8O/y4/WO9yn4Vfkl+0v9pf/2AS4ElgZHCJAIiwjZCSwMSQ6cD1cQjhDsDlkKngO0/Jv2N/CL6Ynkm+LH4gPjC+Po493lqOgI7ZTzoPv5A3ULsxG9FowaXxx+HAocmhs+Gl8XWBPuDWsHcAAJ+m31AfOg8VzxrfLV9Eb2cfaU9rD3qvnl+979x/8pAqUEIwaZBoUHswmTDLgOMRCvETwSiQ+ICWwCK/xg9q3vOelM5Tjk+OMV41jiTeMs5pzqafCe977/nQclDhcT7xb4GfMb3xwkHYocnRrOFmkRuQrTA7/98/h29UfzWfJa8gXzuPNA9Mf0BfYs+Nj6Yv2A/3kBrQOHBWgGIAcxCYgMew/bEDsRSBG0Dy8LZQSF/Zj3FPLR7Ajpi+f55sHlWuT04z7lYOgn7XLzT/vVA2wLVxHeFV8ZpBvaHHQdsx3VHOMZqRTWDWUG4v4y+GDz9PAq8C3w7vBx8uTzZPR89Kb1r/hS/Pv+7ABlAyMGXgfnBo8GRQg9C10NPA6FDt8N2QqWBUv/3/iP8v7sRunL53nn2+YY5jvmWOdQ6ePsaPKP+X4B4wgZD1YUrhiIG9McaB0uHakbhhj0EwoObAenAFr6hfXU8sLxbPEa8rTznvXA9lP3o/hB+xb+8P8KAW0CTARwBa0FeAa9CL4LxQ1CDi4Oeg3oCiwGqAB2+572rfEk7Tjqh+jx5tzl8+V058DqXe+f9BP7DQPTCrEQGxUTGTcctB3mHUIdshvrGFcUaQ12BeD9Gveu8Tbukux57Nbtye858QjyavMU9oT5IP2aAIkD9QX6B6IIugf8BtoHZglTCtkKLwukCp8H2gFq+9b1nPD46vfmeOYB6AHpJukj6mjsN/Dr9PT5/wBXCu0SiRjjG4geTyAmIBQe4RqLF/ETKg+BCMUAhvl/87vu0uvl6pvr/u3c8YL16/dY+qj96QCeA8IF5gZ8Bx4ILggkB6kF8gRSBWoFTgQVAxYCfP+W+0/34vJm7hrqBufW5nfo7uhq6XXsivFQ95z9+gM0C4AUch00Iq0jTCTQIyohuxxeFywSKQ22Bmv+rPUO7n7oQOVh5HLlYuiK7Z7zPfgu+zf+xgE8BX8IbQuIDQcPxw+oDmkLLgd/A8UBDAFB/1T95PvA+KLznO546Q7kEOCW3nbheOg57jHxOvY8/cMCZQjKD58YkCPeLR4yHzAmLPAmDh+lFYMM1wQX/wb5KPBz5rrelNnK127aLuDH53LxpfsnBNwJfg2YELkThRV1FdIUjBP7EFUNhQgJAr/7xvda9DTwsexG6ojnBeQl4AzdhNt/2/PeYOfv8Zj6bwHBCEkQUBYuGxAh+igOMfI1hzTHLdEkMhp/DYAAqPbT8HnsKefj4JXbqNiZ2LHbL+MA70T7GwZcDw4Whhk0GiQZ1Rc6F6wW9BTqEEkLaQU2/kT1Fu4661Tr1esu63zptec45UjiIOCv3tnemORl8FH9wga9DZ4V0xzzICcjPiZoK3cxETO4LJohJxYsCpv7Au1S41vfx92Q3B3baNt73zvmUu6t+BMFSREzGwQhcSK9IDUdBhiwEXoLJwZzAIL5UvP57mTr0+jZ6PfrpPDm8wj0f/Lk78/qzeWA42bhJOAB5uHylf7eBUQNbRckIUsnXipPLZYxYDNdLgQj0RSrBp740uhv2oPTI9TR1n/Zn92z5Wjxbfw+BRIQiB0cKLYsxSvdJpQfVhcoDNL/0vfU83vvOOqc59jopusD7jHwuvQ//H4CngOsAAj7G/Mn7JTmtd6T11vb7uqD+tQCSgpHF5gkiCviLNstojEPNKIuhiCvDmj+JvBo4VzUTs4s0dPYQ+AI5wXwyfriBPYN0Ba4HswkZCg2JlMfDBerDvkF1/zU9Gzwqu1+6ZnoLuyi74Tzw/nJAFkGpQg8BygCBvmV7Zrjgtxh1e3Nuc7l3JDut/ovBrEWXCdKMWc1YDeuOR06uTJ/I6kROgGB8rDia9N5y9/N6NWc3YvkyO7I+9AGmg1EFJUdHCZwKTAnZiFmGgoTEAmY/dr1dPOU8Ijsbeta7UPx0vSQ99H7OgE5Bc8E7v8291fsLOPF2pDRsMlVyYbX+++mBBUToyRZN3s/EDy4NUgxJy7MKJUcWQtt+47tw961zizFIcjj0m3ffOpK9WcCxw45FpMamR+SJBwnGiblIN0Y7BAxCcX/OfZ78Qvw++wW6z/tDPJe9gf7NQHSBA4F1ALZ/QL2JOsL30rW29HWzSvLMtEb45z6Bw+fHlguljzZQTU++DYwLpQl7xvfDQX8H+tl3tLSRMlNx+fN+dqq6pH4iANQDg4YAB4sIcgkiibuJvgl5B4+FZ0MegN1+nnzZu/M7L7qaet/7Rjwd/Pi+H3//gTfB6sGQQIv+uHtIOGi2GzUBdNg1KDdnvCOBC8S9RwdKf8ynjVRMmAufym0IZcVjAVD9YLnp9w60ijMbtDO2+bnVfK/+g0EDg0EEsYTnRe4H+AkUSTsIIAbAxQvCysAT/YA8yPyHe+27Ozts/AU8zr0A/dh/csDzgeaBrsBZfoT8bDnjd502GbTrNKy3w32pQUyD6UdvyoKL1csTCUyILsepxvIEtAFCPu88RDmeNnR0tLVGN+86Xjx0Pp2BlMPJBSLFqgZOh44IcogGB7qGWkUGAy/Aer34vJS8XLtKOyT72XzEvZA+S/92f/YAA4BjQCJ/A32yO6L50Pjw99J2hXXe92j7nUBoQwGFuckxC0nLR8qdyO7HC0Z8hO9CEL9rfR56obgCNrV2djgPOs79O38jgZyDRESrBR8FzIa1huMHLgZ0hSuDnwIYQIk/db4mfWn86Dxv/HD8YTxn/T4+en+kwHMAaUAov3F9lHu6udQ5M7jveIA4ejlnvGo/J4DuQrzFvUgpyJ4IgUj5iHnHSUWlwqm/s7z+Ofy3b/Yddui5P/tive6AboJNQ/iEYMTWRbcGFwb8Ry8GbcRIApeAoT6uPUY8+DyoPPT9ET2mPYg9fb2DP25ADgDGQVdBCcByvvN9Cbu8uml54LloOJT4gvo5fBI+JAApw0vHCYl5ycyKS8qiiYNHoYTOAdr+1jwr+Qo3D7cUOPL63v1Q/9zCKwPUhL7EgEVcReyGFAZehaPEJcJFgF2+dr0f/KG8lH0uPVh+Zj86/wj/tz/VgLSBSEH2wQuAUP8XvXv7MHjKN9L3a3Xg9N73Mns3fhIBIgRXR59JZAluSMwIw8g+hk+E3AL4wJP+TDuiuMZ3gbgT+ap7db2twA3CVoNwQ8/Eo0U2hZoFpsUJhFCC/UCW/vC9lT0S/Tp9Gf3iPvx/Qr+Ff9kAgAHKQt/DNoLCQjMAHr3U+3l4x3eWt1+3RTdeOAY6vn0mPzBBJ0RFx04IbsisCWAJr8iqRxxFSQMAgJQ9nvqKeN94a7kaeqj8jb+fQirDccO8w5GEAwRxg8tDngNOgpaBFH+hPlz9sD1dfZY92z7dAE2BTcGEwg/C/wMSwxgCeAF6gA3+WbvXeYT31XZP9b10oTRitmo6Fz1Ov9JDdEcYCR0JfglDyevJfUguBqwE6gKOP5C8tjoOeEK36niPulX8iP9igYsDDYOUQ/LEFcQMg8rDuYMmAkDBFr+1/rm+EH3Nfcd+Dn7f/8wAu0D4wcoDSQP8gz5BwcCqvn67qflLd/A25nav9nk2bLeFOn89Nj/Agt4F+8fPCKhI8kkZSRZIY8bQxX+DHQA4/I16H/hfOAr5IzqSPT1/VcGcgu7DFEOuhB0EXgPaA2CC7gHHgE++174q/e+96L45fs1AKYDWAbjCDQK6Qq9DFENDwo3BFn9nfQ16snfxdcC1pDXpNiC24PjyO7u+TEFkRGaHlwndCpEK9kq3SaLHiYU+wrJAaz2SOwG5n/kNeU+6JvuRPdy/6YFkwkXDDoNyw23DQIMFwpzB/oCpf18+X33MPjX+Q38WAArBKoFyAbmBpQGmgejCM4HngR3/0H4l+5I5F7cqtcp13jZK9yi4TPrPPVB/jMIVBMvHYgiCCRnJSAmySJGGzMTxAu+Abr1+uyO6Lrn9Oll7ij1Lf1aA3oGJAgFCqwLUgsDC68KCAncBSYBr/zV+x39vv0//0ICZAZOCdkJdAk9CsEKgwmVBwoEGP+W+XDzae3Q57jhxd273HbbmtsR4OHokPRHADcLeBcLIjwltiOWIk4gRxt3FGcN0Aas/u30De1o6VPqp+xF7yX1jf3fAzcGIwb3BW4G2QUlBEUD/gNlA1gAv/1W/Qz/KgIaBWoHNgq/DEoNBAtOCH4HTQbWA3gB5v1p+Ur0/+yS5SzgCtwt25Ddi9/04k/rW/Zn/yIImROIHYAgxiCQIjAi1h0SFz8PnQeOAA/4fPCO7eXugfKg9iP7LALmB1sIKwYrBPUCfQE7/yX+Cv8M/wj+PP3C/REA8wLJBKUHTwzbDkgOLAz4Cf4HtASvAAj+Gfxu+Sn1Qu806abjrd7s3N3dZ99R4snoBPL/+gsEkw4DGS4eZR7AH4YgsRxcF3wSJgxiBO/8kvWR70vu3vAQ9Z36/AFrCHoKVgiyBMUBMv8z/RP9Y/6v//3/bf/8/6kBOQM6BRgIBwtFDAQMHgtnCZ0HzAWEA+kBkwAu/jb7/fb68O3qBuUp4PLebOCw4Ibhkuci8Z362ATfD3YYKR2lHhgeBxxnFyoRtgs0Bij/MfmK9ezyY/PM99L7mv93Be8K8gu9CLMEjgDu+yP4vPY894H4/fo5/gMByATMCfgMuw1zDvgP5A8LDe8JKwixBasBWv2c+eb1jvFN7ebpSObl4t3gUeCB4UfjuuWm7LT3EwHJCYoUOBt7G/oagxp6F8gSwQ2xCEMEfQAt/M747Phc/F4AMQQmCHILNQwFCcYC2vyG+E/0YPE88hn10/di+7H/bQMUCGoNYhFoE8gU9xSrEfgM0Aj+BCgBLf5a/EX7nPmD9kryVu0j6KTixN+j34rfguHJ6DHy8PldAmwMYhOIFdgVehW+E5URyw6XCh4HWgVRAiT+Cv3X/wQDDgYQCcALrQxMCVUCm/q+89zuGOxW64vtcPJ996j7agBfBg4M3g+vESsT+hM1Eh0OpwkUBlIDQwCC/cb88vyL+6T4ivUs8nXtoOgH5dnh0t7J39PlF+3M9Bn+RwZoCw4PCxFAEQQSlBOxExUSiBBtDtkJMwWwA3oEPwZuCFoK7ArnCTUG+f/H+T71z/Fk77XvcPM5+Cr8xv8VBMoInAwVDhgOWQ5MDssLewc0BIwCywAN/w7/agCGAEX+rPp39t7xBu266HLmheVf5OnjdOWx6JvtR/Sb+zsCmQdADFkQ7hL6E90TyRI6ETkPPwyWCXcI3QhqCbsJxgmfCbIHZQKN+0z2QvII72XufvCv85v24Pn7/X0C7AZmCnoMrQ71EAMQEAzmCDIHPwXpAlwBNgJKBM8EDwOtAEL+2/rh9XvxoO4h607mXuKb4ArhZeSz6lDy2fnCAG0GWwrrDO8OmBBgEcIR+BE4ESAPcw1ZDc4NOw3UDJMMEQrABHj+DPgu8izumOyR7avw5vRL+Yv9jAGMBQgJ5QrAC0gMrgufCYIHxQUiBOsCFgO9BMoG3AfRB+AG+gS2AWf9ZPge8xvt9+Wp393cLt0k35PjYerI8On1DPurAEUGogsjEFYTwxU4F4YWrhT7E4wUhxTOE8gS1hBsDGkFm/0O93vymu+C7tXvBvNh9oz4Rfp6/W0BpwOLBOoFmgfUB1YGbgSCA0MD/wL3AsoEhgieC4oMxAzKC8gIOgSu/qj3Zu9656rguNsR2XTZ6NyA4q7o0e4n9a77gwIQCXoOlBIJFicY1xdEFuIVRBYGFssUeBP8Ef0OZgnCAvr8Sfip9LXyy/LT9Nn3L/rj+4j+pAF+A5YDjAOpBO8E3QK3AB0AbADXAOQBBQSiB7sLDQ4fDpINwQvPBsD/1fgv8ZjoEuJi3p3bf9pR3HTfmOK752Tvwfc5AIsILw9yEykWkBfiFlUVUBWFFlQWOBV6FPsSjw+2ChMFfP80+3T4UPbY9Pn0QvZq90X4r/nm+yX+rP8RAZsCFwNFAi8BIwEBAosDpgVzCOELjQ/rES8SzRAiDngJRAO//KP1Ee115M3dKtm21kfXLdpZ3gvk7OoN8o351QFQCbUOdBLsFIoVnRQ7FJ8UOhWfFYkVSxQyEq8ObAngA5z/Uvyn+W34wfg1+Qr5G/nO+fD6w/xj/sb+Hv9CAFkAnP9XAJcC4wThBhUJNQzcD8QSDxQMFOgSvQ9TCpgD4Pv38rzp2uH82yTYO9bx1tXZCt5h4xzq3/FD+lgCqAhUDS0RhxPRE4ATYhTYFesWABdVFhMVZxKFDW4H6gHv/Zz6ovdG9t32C/iK+PD4uvpQ/Z3+Mv7p/br+jP/b/tn9yv41ASoDLgVsCAENnRFrFBoV8xTHE6APpgh3ASL6LvFO6Gbhqtt310bWpNcD2mze9OQn7Lrz/PtnA/kIwA0KEn0UYRWxFtcYJRoeGkkZFxgVFmMSvAyuBqcBiv0h+h34dveY9/f3RfjN+Ln5xvqU+zb8N/1U/sH+i/5N/0QBUAP0BD8HvQqeDnMRAxOVEx8TEREFDZAHPgKL/EL0mOpF4/ndsdjs1OLU2taO2WHeouXe7T32P/7fBNkK4hAtFeQWSxj7GtgcRBxIGm4Y0hXEEBkKxwRnAV3+PPtf+XL51fr9+y/82fw0/2IBIQFi/wL/p//m/iX97vxM/vv/ZgFgAxgHWgyuEM0SThTMFa4U4Q/hCQEFkf9l9+HtQObe4Ancl9cQ1TzVedcY21zgRuet72z4YACqB9MOxxR2GMoafB2zH5IfBx20GQMWChGbClsE1P8c/Wr7Yfor+uv6KPwf/aH9Vf6P/wgAG//w/SX9ePwD/IX8D/6NAE4DHAaoCVQOpRJ0FR8X3Rd8Fm8SrQygBhEALvhY74vnYeHG26XWWtNu0mrTCtbs2nHi2+vz9M38SwSkC6MRlhVwGAIbJB2UHWEb/hfJFKQQ2wptBUQCCwEoAHf/1v9ZAcoCzAK5AVUBnwGQAGH9g/qI+Wz51/i8+HL6w/1CAYQEbwi6DfUSpBYNGbIachpcFyQSzwtgBKb7S/Ke6XTih9wj15vSOdCv0IPToNgP4HfpQvP6+3oDPgreD8sThhbeGGQaeBqpGAgWPhP2D+ELygfjBHoD0QJ9Au4CIwQ+BU0FJgSwAnoBo/91/CD5Ivdg9v71bfZz+NT7lv+LA/8HeA0RE30XexpJHB4ctBlpFYMPCwhF/8n1Cu3F5WHff9nc1F7S39FL0+7WBt3J5OLsbPRF+zACnwjBDZ0R1BQFF6cXixaFFBsSmQ/ADAIKMghjB7oGDwbyBaYGKgehBkcF9QPKArQAIv1d+R336fWg9Cv0sPUP+UL9igHfBQ8L3xDbFUQZaBvwG0AahRaYEaULcgQv/Onza+z+5fzfkdrA1ufUnNQ41iLaFeDe5lDtIvP7+K7+uAMmCCwMzw+YEgYURBQcFD4U0BNxEhIRphBEEOoORg1gDJ8LVwo+CMkFuQMaAv3/Nv2i+h35JvhH95H3tPl+/FD/eAIfBoMKBg9OEloU0BU/FsMUZRHbDHsHTQHw+o/0Wu4S6Bbi4dzr2J7WbNaP2LLcoOFV5sLqgO9O9Pj4df0XAsgGmQrEDOkN8w7ID/IP1A/eD4EQLRFfESkRERHaEMIPwA20C3QJgQbpAhn/s/s++U/35/Uk9ir41fqP/Z0AawTmCEENwRBuE0sV6xXBFI0S/w+MDJQHCgLN/ND3ZvKi7NnmreGe3QDbItqI23be0uET5eboNO2O8d31hfqK/z4EBgjSCgcN1Q7bD/wPQhD/EJ8RuRHBESUSRRJVEWEPLA0XCxkJZAYdAygA+P3Z++f50fjg+LT52fpc/Bn/zwKXBtQJegztDh4RKBLlEd0QLg9CDEMIyQNb/8769vXe8Afs6ucB5UvjgOJ44gbjvuOr5AnmBegc6tfswPA59T75xfwsAHIDpgbGCZAMHA+WEcYTORVoFnAXtRewFskUmRI7EDYNvgk+BhoDdAAb/vT7hfoP+h36Q/oA+7H8+/5ZAb0DNwbNCEoLNw1xDgEPmQ4bDcQK+QfUBEUBEv22+Jr0GfEr7snr3uks6MbmxOUd5QDljuXe5o/oler07LXv5/Jj9t35hP2GAcQF8gn7DeMRkhV7GHkaYhtgG5oa5Rg0FtwSOA+YC/8HoATSAZL/Xv1L+9L5NPlk+SL6Ufvv/Nj+oQAQAnYDIgXcBiYICAmICbIJZgl/CAAHPgVhA10BA/9p/Jj5g/Yw88/vqOz96fLneeZs5fLk7ORb5XjmWuj26mXuh/IY99/70ADQBcUKXQ9VE3kW0hhjGhAbxBq0GRoYFRadE9gQ1A2xCmoHAwS5AOD9h/vL+bv4Zvin+C35vflW+iv7RfyI/er+aAAJAowDqgRfBe8FkwY+B8QHBAjaB/8GKgV4Ah//YvtQ9xfz6O4i6/PnguXy41zjwOP45PHmpOnc7LDwGPXq+eH+4QOUCL8MLhDZEuUUjRbiF9sYRhkaGUkYwhZDFAERYw2hCRUG7wI+AP79Jvya+kj5RfjK9+H3Z/hX+Zj68vs0/V7+d/+3ADMC7wPMBXsHuAhlCXkJFAk7COgG4AQmAqX+cfqy9efwbuyh6Mrl9OMh4zrjHuTU5U7oq+uy71/0RPns/XUCWgbICZ0MHw9wEbETtRVEFzQYQRiaF0IWURTbESoPbAyRCXgGNAP2/wH9i/rD+JL3B/f39iD3Z/fa97T4Jfob/Gv+1AA9A2IFGAd1CIUJaAoICzILsgp3CXQHrARBAXz9kPmC9XzxuO1M6oHnnOXM5DXlxeZZ6ZzsX/BW9Ev4NvwZAM0DQAdDCp8MTQ5VD+IPFhAbEPIPmw8IDywOAA21C1EKzQg3B30FlwN5AR3/pfxk+rb40vei9yX4Kvlz+sr7L/2v/mcAWAJbBFAG+QcnCa8JqQk9CX8Idgc0Bp0EjQIQADb9F/r29gX0ePFd7+HtEO3k7Hbtp+5j8I3yEvW493v6M/3I/y8CRgT5BUQHPgjZCCkJQgkgCd0IjggnCLAHMAepBg4GaQWrBNkD9AINAjIBcgD6/7D/kv+b/8X//f9cAOYApwGSAnsDVwThBCcFJQXMBDYEawOEAmsBIgC6/kn94/u1+sX5D/mf+HH4cvim+B/5w/mC+jn77/uY/BP9av2r/e79VP7l/qn/lwCxAcUCoQMnBFUEOATNAysDaQKkAd0ADwBM/5D+8P2A/Uz9bP3c/Yv+Mv/C/zMAagBgADUADgANADgAewDQAB0BdQHJAQQCOgJ4ArYCtAJgArwB4wDa/8T+1/0p/c78pvye/Mv8MP3N/Z3+nv+uAK8BawLMAsgCbQLjAUEBqwBPAEwApAA0Ae8BrwJfA+wDMQQ9BBQEvgM6A4oCqAGxALL/2f5n/nD+7v6y/3wAEwFaASIBegCB/3T+hv20/Af8c/sG+8D6lfqV+sf6Qfvo+638bf0Y/pr+6/4F/9/+nf5f/h3+7/0D/nP+HP8JAB4BJwIyA9MDIAQRBKkDwwKTATsAAf8f/qL9wf1n/oP/1gA8AnEDWATsBCoFLQXzBJMEJgSjAw4DXAKpAQMBfQAeAPj/6//8/wYA+P+5/z3/mP7t/XH9Gv3w/P/8Ov19/b/99/0f/jH+KP4V/v/94/3S/c39w/2h/Wn9Tv1L/Vf9ff3B/RL+S/5U/kP+L/4e/j3+d/6r/tf+8/7p/sP+r/7C/hP/rP9pADUBXwJ1AwIEegQZBdAEXgSxBdoHfAnUCOcGKQbzBusHYwcOB+8HXwdmBCkCSwHJAJb/h/7M/R78zvuM+qT3GvdK97j41Ptc/c3+UgBkATgDCgT3A2kDEQJWAkkBI/8R/+T9HP10/ST+Hf+V/cP7WPtW+pr6Rvq8+Br5Fvn49zb34Pd7+eD52PoR/Dv8ef6jAEwAAwEvBBEGoAYkCOsHxAZBBxoHYQXQBF0E1QIjAvYB/gCc/3L/gv+7/sT+Yf6z/eD9Iv7A/k8AnwH0ApAE4AWsBtsGywj7CcUJ6wmJCBgGvwQ+A5ABSACa/qv9gfse+jv6RPq2+h37oPvf+bj3x/hc9971M/iW+LL3xfeM+Ob4Yvle+nP6R/pL+/H7Y/wk/qb/bgF7A9EE9gXUBnYHMAbcB5MPFBSXFHURQgyRDE4KxwMHATr/KP41+5v17/Rk9d30/fae9qT1wPkO/Mb81P+jAiEGHwl4C1sK5wgDCsEH0gcKCAsGuAU0BNYCdP9p+5L7PPoZ+I73SfUo8zDy2PN79N7zHfXC9PrzrPPs82f1GPnf+5n9VwDhATAFRAflBygKuAvrDdAOxg6qDzoPMQ9UDmoL3wkNB9cBpf2C+gX5hPcN9oX2b/Zp90z5pfme+rH94AHCAmgDiAVzA0oDBgS0AJz/Kf8k/MT59vZ79d71kvWy9nD3kfjk+pH8Of9zAtMD1wTuBa4GBAdbB3AHmwXmBukIawjsCWoJhAcPB1sF2AQ2BYEEtwREBHcCZAA3/rn8dfyV/I37/Po9+vn5KPuB+8z99ADMArwDXwMEA7QCtQKsAmkByf/w/oX9D/sq+gb6jfhS9wn1tPAk7WjqOugu6PfoB+mT6qnsu+357w71OfwEAsQIcA75EIkVKRpeHOAdIiEsI8AgHh/IHTIaIxejEw4QvAriBF//VfkM9T3xYe4t7IXqVerc6WDrl/BF9Un6gf/IApUFDgibCt8N4BCFE08UIhIGDyAKbgWeAAT7gfdr89buR+yG6H/lXuWJ5ejlY+Z15izmZucu6rDsOvCP9dD5J/7lATMEHwjQDYgT0RfKGm8aYxn2GG0XhxURE9QRhw/mCocHnQNq/vj8RP3R+jv5WfhF91D3n/jX+9r/QQQJCP8HYQgNCx0NtQ9sEeUQwQ2KCoUHVQJL/nz8IPlA9obzH+6C6lro2uWP5IrkdOUO5sfkPOSz5cboVO8a94T65/oeAKgGzgpeEIkVihk2HakcQhcWFA4UzhOCEnAQZgzEBlIDRf8k+r35XPr2+Rn6UPkc+Ij4Wvqq/O8AZwUUCMEJBAsQDE4O2w/BDocM3go1CvcGCAJx/8H7QvYC8sfs++Yc5C7j6uGn4XDgJeCz43zmJOnJ7t72J/5IAjIHNg67FGkbkB8ZH1gekR8xHsoaqRqCGPYSiQ4ZCZ4Bm/uD9+vzMvH27rfrPuk+6drq3+xH7yLye/Wg+uj/2gMCCGEN8BB+EvgTORTVE+UUBRVJEAoLvQcPAh38p/kl9AvtqOv554De5Nh62VvaQ9wM3ofcZt+L54vtZvOT/joJLxBJGAgdjR/5J/gvzzG/MQ0xny6FKZIhaRoxFWwR5Amk/9D3pPDl6srmUOTj4ijiU+KO4k3jguUP6+bw1fRE+mT/8wOECf4OrRQGGb4bvxy6G3QaVhh4FHsQFgosAoL7wvOL623jNdzC1vrROs89zAfKgM1d0oTVW9vs5HXsOfMm/0QKChMmHTwkCSngL0c0QzaeNic0XjJcLzsqRSa2IOsXiBHQDM4Ck/ka9aPvP+lU5Ergedud2mbc7Nz13+bkfuoz8an3iP1FBEYLxRHaF1EbAh3oHlggph9dG20WZxBfCPUBwvvg8Z3ne+H12aXQVc0AzUvLEs7V0sTTF9h439/kl+yu+IsDEQptE2Qd/SJNKuAvyDIfN3M3UzUMMlkr+icrJtwgUBg9DhMIQAEo+FfyCu0l6VHnzeIa3qrcVd4S4gjkeOm78V33z/wcAYkE+gq1E1UZEhuSHBIfnRvVEz0OKAtSCK0A5/RO6zPjKdx92bDTys2n0NnT39K701PYhOGj7Kz0E/uHAi8OZhmrHgsk0CkvLnEzozQJLwQsrCtDJ/YephVkDQgGtP8G+Qzw/ega6MzlF+AM3Q3dnt815U3pe+pw71H4JwAtBcgKAhKTGIwfZiMlImMj5ydNKCwj4hs5FUsOHwZO/OvvQ+ac39PXXs9lxnnAN8F3w0vFzsk60G7abuUE8OD8WAhYFaQjRi2rNKM75T/SQspCxj6NOSc0Ui3dIwUY6QpV/7r1Qey64knanNSx0e/NWcqiy1HRYtc03dDj0+pt8hT8hQZJDtIV8R45JogqPC08LywxSzGOLhYpBSJBGZEP2AV2+pTub+NM2QnPvMQcvue5jLaft426l71FxHzQJOHG72L9Mw3LHBssZjjCQJ9JvlC7U6xSK00rRjY+BjUeKnIbyg1wAmb1e+hf3PfR2Ms8xzTEQsONwn3Gic1G0ubXPuE17aH6nAQWDBgWIh+1JtoslzA4Mwg25TYTMmgq9SVBISAYJA0PAo73y+uU39zTuceQv7O8w7hpswyyZrXvu33CA8r81zXoGfi1BwgVDSP4MQc+tkcyUJFUCFYrVeJPtUfhPww3xysgHhcQRQQu9vDn3twp1AvONsrYyPjFGsL8xCbLEdCY15vhVe2a93kAlAk0EvAcUyjqLQUyATbpNFIz3jEaLuAp8CKqGPgNjAIU+ADuJOJF15LQ2cqFwBe4/LZItSyzSLcKvMLAV8xO2rnk3O9g/5AQJh8VKyc3R0EhSlJRhlKBUNxOFU07SGs+YjORKTodRg/rAdvzA+ea3RPV+sxlxv3C+8RYxo/H28yF03Lb7uYT8bL4FAS6EEEZ4B4LJukt6TIZNQk2bzIpLs8s4yb1HHwVjw4CBD34keww47DbHdLMyfXB/rkWtwG1SbARsRG6R8WYzT/UHN2f65f+9gt3FvAnmTlXRBFL6k/uUadSfVNzUD9HiD0/NpwrCBvjC0UBMfb758TZ8s+qyazGXsbgw2HCVcf3zZbT8drM5NruNvuYCEMQERiiI+UugTQ1Nsg4jjlENzozti6FKTAiwxlFEpIHbfnd7VrjL9jVzbXFeb32tOCwza3Bqluuk7e+wUfJONGX37LvbgBqEWsfWDC9QQZKUU3fUA9UD1atVGJPsEUZO+0yWCZ0FLYFdvld7CHfD9KXyHPDIcEawNq9375QxvnN+tNG2wjm//LK/kgKKxXsH+QqvDOwNwY5yTsSP00+TjnrMxMtACTtGX4OBwNd+OXsEOFa1PXI3sHquwC3/bKGsGix/7Rmu5vCFstF2OXnlPUHA/ISAiOoMC47b0RvS8VPyFHhULFN0Ed+QC84JS25H6wR3QPo9vfp691+1RHOXccTw+TBiMI7w9DH6M+317jgOup29Mr/ywlPFHUd9yNRLCEzbzX8NQs2STbpMpIspie6IJwY0g/wBG36NfGm6bngrNXczQ7JhMMDvhO6JrkevCrDTspAzU7UyOOy8Yb8bwlWF60kDjH/OYU+vkMASaBJRkZZQeg75jRbLO0hFBXkCbEAf/VT6Tnggtle0/LNvcjwxuTIXcufzqTS7dih4e3p1vE4+u8EIhBmF94dqiVAKxsvtzFHMaEvAy+WLUQnCh8nGFsPUAUO/YDzH+mv4NrYNtEEysbDosA9wSDEssdfzNbTr9q64r3uBPu6Be0REB8kKesv9zVaPLQ/iEA8QLA9EDlQMwgs7SNvGrEQigeF/SzzGOlA4QrbJtXK0GHO3c3tzlrRB9Wd2bvfRuce8Fv5EwDzBW4OyRaJGzcfCiMFJg0niSWXI3whJh9KHDEXuhAbCdsB5foC8tzoAOKB3GjWK9GYzVLLgcsvz8fTRtiw3/roKvFt+W4E/g9FGYYgsyb/K+wvJTIdNKk0pDE1LHQmRSBGGJ8QCgoiAkb55fHH62jlTOCL3eHbudo42tLav9zE3ybk7Olo8Cz2xftfApgJ2g6jEsYXphy4HrUf1iAtIJIePR2tGvkWWhMeD2QJQQOq/XL3dPBU6u7kbODS3aXbudia1qvW29iR3IngGOV660/yf/gp/0AHRA9UFs0bmR/aIoYlCieMJ9AmQyTkIJ8dmxgPEoMMTQfIAD/6EvW/8NXspulo5xHl/eN95VfnLuiS6ibv5fM0+ND7tP9RBO4I8AwhEAoTKRZAGOIYRBmaGXcYXhadFMURkg2PCaoFiAB0+qP1sfGo7P3n3uQ14aTcddpe26HcUt3Q3lni1+Zu6yXxMPgf/zEFywpCELwU8Re0Gj0dCR9FH6YeCh4FHJUYShV7ESEMPwZ5ASr9GvhL80DwHu4N7MPqr+pM6zvsje2o76LyePYY+2r/8AKrBisLXQ/MEpYV2hd6GRQanxk1GCUWoxO7EBwNLQlEBckA6ftx91DzU+/F68DogOVL4mjgBuDd4F/i9OMZ5rrp5O0p8vX25/uhADUFNgnADJkQSBTQFh8YxBjoGCwY4haQFZUTYhCMDNsI7QS1ADb9nfoE+Ab1x/KP8STwve/88GfyM/OI9HT36foa/mcBPAUKCeILWw7FEJASPxR2FVoVdRSEEygSvA/cDA8KAQdZA0X/yvrP9WfxOu4n64/ng+T94uXi8OLw4pDkMuiI66PtyPBk9bL5L/2pAHMEoQdxCg8NNA+DEOIQZRHAEQoR+g4jDaQLuAg/BW4CYADK/Y76Mfjo9Ur06fOR8yf0V/Vl9m/4/Pq6/Ij+RwEzBW8IMAoLDMgNpA9gEK0QFRKmEkYSWhHdDzMPAw2YCo0KrwjQBLUB0QCR/jT3A/IV8jfyVfBW6rHlpOjh6R/n4OY066zxYvQ29sD5V/xs/Sv++wCuBDcGJQYaCK8ITgfkBmIHigiNB4EH0ghWB3YDNgLyA/0CIgHcAHgAKf92/BT66/pF+9v5M/lN+qT6B/oy+6X9KgGgAi0FCQkcCrwJTgrpDBoOngxsC0ILvgmTBn0DyALBAhsAWf3H/Qf+fPn/9Pf1J/q0+LTynfFe9CX24vLI8aT3LPqE+C367PyT/Q//ZwHhBE0FMwX3B5UJTQjPBecGQQldCAwDLQCQAh0D5f5V/Ob9f/2R+hH32ffw+SX4Tver+NX4f/hw+f/7lf7+/tz+MgGcA+YDGgU4ByEJygi0Bz4JPwupCiwJBAlvB0sEdgL3Al4DtgDy+2b5Uvlp+Y33KPej+Uj5R/V38nH3+fzQ/Wn+iP8tAfoC4gWiCFcIjAeyCOgJXgh0BaoGswh8Bi0CVQA8AjYCN/9+/kT+0/tB+dj3ifjE+Tf5lvjl+H/55vhl+LL6Ovwq/P/7Bfy9/Yr/ef+T/hf/ZgBy/+L9QP5H/6wAHQFJALj+XP5P/2X/N/+A/uP9P/2y+/f5Ufs8/gMAcgGqAdEBUARpBm0HfQmnCkAMzgzfC5QMVwyODOkN3gtpCVUH6gT8BMYD6gBW/2j+Fv72/FH6HfgU+KT4n/iT+DL4dvby9c/2dvWX9Fb1LPXH9ED0ffRU9qP2G/YN+Ib5a/l5+Zr6Rvp899f4I/w//F/8V/+OAK38cPy4BGALOgyjDqgQKQ5eDnMT1RaqFdMS7xFnEMEN+Qu7CoYJ3wXL/w78g/q4+S35JPcc9mL2W/dL+i/9wf4J/9b+xgAnA+0DZQRtBZkGJQauBbcGVwZhAwMBdf/N+6P4ZPgJ99vyY/BG79vrcelN6BXmYOT55EjqU+/P62LnOu3V9RT54f1qBRwIwgYYCrESrxgbGs8aBhzHG+AalhsnHfkbvRbBEBgN/gnlBUoC5f7m+sv2qvNv8vryjvK28SDzW/RN9TP53P1P/13/RwK3B8ILpAwqDa4Oig5mDDYL2wqCB64BMf31+Zz0Ue5P69znWN+F2qDhuOi544LcJ+E669zv3vSZ/rIE3wWaCUsRORhuHE8fmCB4Hwse6x7hIG0f0Rk8FFwQdAzeB+0DMQBd+1322vG37TfrPuv86gLpvucm6ePs9e9J8hD23vrT/zoEPAdhChAPDhMQFDoTyhJSEuEPtwxUCnUFa/0T+Dr0jOpr4lTnyO335XjZ0NnL457pOex08ub3R/q0AE4KchCqFCIbxCD4IechCyWtKLMm5x9GGwgadRdpElsMQwYkASX8ZvZi8VHtAOmO5Jnht+Dr4aXj4OTi5mbqke8S9lL8AAASA30IbQ5QEuQUrxZuFhYVhBSLEkUMywVIAbH4sO1o7dX0Y/CK4T7b7d/z5Xbr6+6m7nvwGvmuAkEHOQspE0ca5hsAHggk/ijlKNEltiJdIFQfTh22F94PkQrOB8YC1vr+8zXvT+up51DjNd8+3aXeZeBx4HXiSeh37v/yPfeD/BEEWgt9DjgQehJxExwU4BNuDrUGmwMMAsj53+0r63Xwxu6B5Xvhb+On5E/o9O0v72/wbfgiAcIE8QgUEQQYoho2Hc4iYSdmJ80kHCPJIu4h0B+7G40VXBAWDR4IVQA8+Y70RfDO6z3oyuVs5AXkQOSu5CznT+2986n2APn2/g4G1wqRDVYPahALEG8PlQ4xCV4Bzf+d/WDuAeOw6rHwkOV62pDc9OL55bro1Oxk8Of1g/6/A0MGcw2aFs0Z7xljH0UnDSlHJJ0fAh8AIPoddxhkEhEODQvHBhUAKvkj9RXyde1C6KPkcORU5u/mM+aR5/TsePQF+gL9bgISCuAOJBIkFskXBBanFasULA0iBq4GVgI18mPnYutQ8Wzrrdwn10ze1uKW493m7Oiu60b1aP1A/oQDQRADFeETLhuNJb8nziNnIU4hcSHiIHwdfBb3D/4N6gokAlv4fPM/8KfqGeU34QffCd8a4N/fjuFC6PjvLfQO9739XAfVDosRHRRtGMAXvBc8G90U4QrRDNEKHPgm7GnxpvWf7mvh4tgx35joJ+UW5EHrne+U9nz+uP6qA5EQchWzE/AYjSPuKFwmQCGlIHsjICQSHoAW9xJ4ELsLCwTi+3H2cfOT7eDlPOJa4aPff98C4XbhveRM7C3xU/PE+nwDfAYmClIQvxRlFtUULBO9EZEL4wcVCKX5rOi07zv7m+4v37jePt9e4/3rFek85CXtp/ky/v3+dAN5DmUWRRbnGBQjySk9JwkiXiBYIxsmFSPpGUoR9w/kEDkKEP2C80fwV+3s6Kfk2t4O2hPcH+A14PDibOr67l3xuPaC/5cIWA3EDt8RwBOqFEsYwhXhCzcI3Aec+z/ujvA39svtE+AU3JbgSeZW6AnoJOda6/j3RwDR/r0BxQwBFHsW6ho/IjUo/SfEIw8ivSO0JdoibxsKFE8Qaw/kCogAN/ci84Hwqezx5xXjtt974RHl2eQf5TDrJvIE9j77vwJgCC4MaBDPE5UU0RMeFKAS/goUBTsF/vxv7Wrr1/Ln7CLg9NzI3MTeK+fo6L7jIOh49Hb84v+sAlEJqhMxGIEY1B/EKEAo4SH/HpEgmSNTIt4Z+Q/zDIAOPQrV/mb0MfCW7rDri+es4s7eVeBU5bnm6OYR7bT0jPc6+x0EpAxiENsSARbTFVEWkRqHF4wLawXXBgIA0u9+6CLvMO4l3qnUM9ts4WHg7uJl5OLkNfMQAqn/F//2DPsZ6BzdHKQjby0MLd0mQibPJgolwCLAHHMSAg1uDZcIU/sy8XPuUOtW5dvgQd7/2ljbiOCi4+3i7OZV8W740/pCAY4KQhBvFbIaqBnRF18bpBobEW8K1grwA7jyHuo078zsy99Y2aLZ09nL3hjkSuGs4mXw//vF/ej/cwpUGDgdyxwiJFgwwjEtKhgovys6LJAmuR6ZFg4RQQ+DCtz9m/Lw7lDrBeQU3wrd0Nh217rcQuC/3xfkA+759Db5gQAbCVwPxhT5GS4a4hhJG5caAxHVBzcHqAIY8iXmfulf6R7cHtQa2LvZLdl338DhZ+Dl7IT8/fz9/NAJ2BfcHcoetyMmLsoxmi3bKz8siisqKTskWxu8Eu4Qgw6/A2z2PPCL7ZPm7d9O3gPddtqi2zzgIuLN5F3vFfi9+VD/jgq8ETsWyBuZHHYa4Bu5HAEWUgygCEUEwPRj5xbqpOvI3nPUvNPn03PY69/C3uHceecj9vX8Gv5gBWYTFBtFHSYkLS4iMRst7SmDK2ItJSrZIbkX8hGFEZYNawDz8gPug+206DvgMto32OnaLuC+4R3gheQI8Eb5t/y9AMYJehJjF4EbHB11GygcQR12FVUJ4QV/BVH4Hucv5eroFOKX2JLU+9LA1p7gJeO13jfl5/XJ/yIB7QU/EZYcbSG0I9AqIjJlMqwtRCnZJ88o/SUMGwUOfgmCCyMGrPZU66Xpl+ce4sXewNwx2nzbpOBd4hfkbe7n+CX6u/z5B+8RrhZsGeoYrReZGaIb+hYHDJYFQQSQ+k/r6ecZ617lcdtN1rrVtNvf5OfkIuFL6CH4RQMVBRYIQRMYHrAhgCUZLdAx9y/JKvsnYCiBJ1ohexZ9DM0IQwhEACTynOkp6OblXeBI3DraOdqE3jrjaeOD5SnwvfqP/uEBfAl8Ed4W3RrqGi4YuBiuGVIS3wffBEICMffs6TXlzuVU4vfa+NZC1rfY5+Hp5SXjXutp+6EB/QI6CqsVMSB2I8gkgCsdMdEvjCvyJwkmQCV+IaEXbQxpCDgIMgGj9Bntdepn5XrgH+A638bcPN/E4xLkUuZk8a76F/zX/l8G9Q34E4cYORc0FOIVGRZODwoIAwWN/+f0Uusj6q/pUeLB2zraSdtG4C/mUuaV6OzyaP2XApEG/A7zGO4cIR/FJrItryxzJ40lWiapJDcfrRiwEFgLQgniA9/5jPPN8JnrmuVx5Cnl/+AT4Fflheh66HjtMfSq+Eb+nAMDCAQN7RGvFBkTxxGAFKYR2AfBAgICv/pk78Dpwuko543hp9tr2xrib+WG5HDnSfC1+kwBMgIZCNUUNBz8HMEeCyXMKyMrWiTuIUAi0x7kGCASjQs2CJYF/v6/9jXzYvIT7e/mwObE6B/mVOR86PLrH+1h8Pv1l/pm/xEEQQaNCoERgRNGENMOow72DF0I5wIuADv7GPBH6U3trOyg4wrfruC842jonevn63nzYf//AuYDJw31FzkbgRylHzokDCllJwUhlR+DIAgc2BNgDtEKVAdaAmr8WfeN9P3w8epl6JPqIOo35B/keupI7UDtKu+c9E35gPxr/+UBXgebDWMOhArTCuYOugtIA1wBuQH1+R/xmO+N787rKef84iLkcek265XshfCB+EICLQNlBEQPJxgBGoUZFR0/IyAldyHqG20a2Bo3F2UQ3AlqBgAFfgDD+074j/Zg9LbvnO7W797v8+0K7rXxIfQ79Jj26PoX/k8BrAPDBH8HswviDJ8LnAoLClMHFQNzAfD/5vgh8NvtLfCA7LDkuONx5Gflgeis6V7u2Pbm+zr9xgHaCswSLBUwFbkZ/h5OIFscERmSGsMYRBLDDWsMsQgyBKz/bPxJ/Dv7lfbf8hL1Zvdr9bvx6PNY+Hn3IfeG+nf9lf7m/pP/4AJDB8MHggSiA8QIWQoZBPwAAQM8AS75gvTF9N7zlPDv7Inp4uic6/vpzujV73/2YPdy+Jj8dgTDDKsNAA3lEecXoxm4FUsTwxQvEyUPZgt6CXEHygK7/y/+8/2m/iH8j/gW9z77Pv2X+Fn5C/37/GD8F/0GAQAB0P+PAj0C2ANSBygG/wTQBqQHkgV6AtEEswS5/Qb7pPtv+/f3/vTX85PwvfE581rxcvUg+lj6ZfnP+1YDSwhWBn4E/wd5DG8N5QhpBTYH1Qb+ApcAHgBa/5v9lvpr+mH8uP31+yj4kvrl/wj/L/tE/BEAlACQ/lcAbgJ5ATMCYQEEAjwGigXiAR8BUQQpBn4CfQE4A9MBV/7z+579S/6J+nv5J/j89n/5oPqQ+Wr6qf9aAZ7/nANFCHkI/QdBBwAKVQzICakGQgOvAsUCjgBa/DL50Pl++ID2fPhp+SD5OPhV+AX9B/9D/br9HgBvA48DwgF5ArQEMQVaBJgDHwPbBCEFVQMJA0UDxwFEAEAA2v8K/cL6u/qk+S/5aPjw9a31KvbG9X73Mfr/+6j9fP7IADcGbwpsCrAJCgspD1QRtQ7KC3wJKwrpCEUE2gF5/z797PsI+TL4SfoL+sL2u/ab+nH8T/sJ+3T9JP/X/un/iwFjAVgBFwFrAf0DfgUwA0cBZgLXA/gCNwDk/xf//fvz+e/4Nvn09331wfRQ9D/1//WH9rH3K/k3/IH9if72AnMGnQZsB60JigxpDVYLzwnQCQwK8AeVBS8DbAHN//79BP0z/Hb8HPxC+4v7IP0I/mH+Pf/AAHoBdAG3AsMD9gSXBLIDigQHBYAFkATHAkwDZQOyAHz+3/6X///7yffq+PH5/Pda9uH1yfVw9qf3y/iP+Sr8g/93/20AeAQEB0YHXwdMCAIKywlKCEMHpAVZBPYCbAG4/yP9iPtR/Pr6t/lS+hz6Vvov+/39D/8i/lMAzgOgBIgEgwYFCEAI4wfJBxcIBAgVCEcGIwRMBPEDbAEa/qj+vv1d+Rb3yPZK9jn0bvP18inzT/TB9b/2A/jN+/v+r//RAB4FjQhPCHQIAgrdCx8MawlWB4gGQAX8AoAAS/1T+qP5ifia9ur1A/b09qH26Pa6+Sb7WvtP/RgA7wEoA70EHwfBCNEJiwrSCjULngsgC9QJ1wi1B2EF3ALZAXUAQv38+Wr5CfnM9q31mvVP9bf1vvb093/5+vt7/tn/bQGRBLQHkAgNCTcK+gsSDGcKcglCCJsGhQQmAtT/cv13+3/6qvgG95T27PVP9aD1WfdC+Cj4a/kJ/PL9v/5SAIwCyQO9BNYF0AblB2oInwe1BrAGWQaGBIMC7QG5AE3+U/s4+v75r/jZ9nT1bvUa9mv3g/ci+Aj7Gf4y/3gA+gL7BeEHsgipCQYLTwzECzsK6QhECAEH4AQtAtr/4v7r/QX8OfpU+RL5afgu+P34N/lN+Zv6q/zX/Tj+jP+AAQUDuQNJBBkFBAaPBowFzQRPBQwFmgKkAO0AiACb/fz60PqH+lj5Sfh09/z2zvfJ+MD4Avn5+l79KP6a/gcB3QOtBFUF6QawCE8J9ggkCGIHGwdbBrEEfAKpANb/YP/n/ZP87vtg+xP7Y/sY/Bn8CfxD/fT+CABmALEBPQMDBF0EkARrBVwGfAZOBSoEGgQ6BK8CRwCP//j/3f01+qv5FfuZ+p34mvdh98j3E/nO+aH5zPrU/In9/P3h/7UBmwJOA60D4gRLBhQGFQVqBAoElgMCA6IBEwD0/h/+S/2Z/E/8Yvzz+zf7uvtc/Ab9zv1N/kr//f/GAEQCigMIBHkEDwVHBfYFvAWXBQgFWATOA7oB3AEIAikAkPwU+UP7OP/p/OD22/bu+jL6Pvk4+/L75f5P/0j+iQI4BXkETQU9BtMGzQbCBqcHBAX1AjsD8AFCAP/+3/2S/Uj7Z/mE+jH6Bvnz+Qv7C/st+7L6S/3H/l3+UQEhAScBJASOBNEE8QbDB+YGBwY5BrIGiQRDA+4CdwHcAEf82/ku/vj82/g1+GX4DPmq+RH6ePno/DH+3vtn/z4BKwCmAtsEDAQHBdQGFAbwBIkFRAaOA7cBQgPrA1gBCP+m/wb/5vuN+zj8Efty/Ev5DvgI/fr/0P7m+g//pwKTAMn+zQPcBWYB9AKCBsgGUAPPAXkEjAOr/UQAqAH3+S77Gfy/+rn6ZvjD+1v48vZr/vP85fwU/z8ChQFPACkFKwVLAtQG9grEAr0BWQpNBnX/kAKvA+0Bu/7p/vABXf1l+zD/9P01+of9gADs/YP7yf69ABH8s/7tARz/xv9oBMMD2/z9A6gKD//O/20HLwTAAuv9PQDhBOL8P/fE/Kr+LfUv+yX+gvIo+yoDlvsL+o8CUQRP/Or/gQQ/AHACxAK/AKoCdgLQAWkCSQHY/3/+AQCIAZL8Pv3u/63/zwHu/rL+AwOkAaz+fAHz/ycCgALW+yYB2QRF/9z9AwPGCNEA4f8nCa8DqAH2ACACDgGc/x4CZv5A+R/5JgMi/F71Vf1p+dj7rvqf+AYC5PfcAHQLsvnpARkIVgE9BVYEdgQ2Bx8EWgSYAHv/HQT3AMT/vACU/LP7s/xB+ib6DP1s+v/6XfxJ/F/7T/j3AiUC5vyK/5ACvwXmA2QDfwYeBUUI7wU1/FcDOwWf/2X6NQDTAnX7pPpP/A/92Ptp+5r2Of+iAaL3BQGiBmL8CfypBIIHdfxGAhkKkP+1A7wF1P3uA9gK/gGM+ZkAswbh+jP2lgSkBlH0nPnsBx76yffv92MAvwQH+En47/+Y/8f1zgPFClj8MgIlDNAJNfvhAtERzQbY/63/2ARkASD5UgLjAJj2b/vf+aD6rvEQ8S7/egAs+bT4PAD2AukISP6r/ecMFA36BWECtARGD2MC/vleCCYKcgRw9XkBGQPD9H78QQIq/dn2evnY+z304Pnz/9UDUwDT81X8xgv/BCX50AStEuMKCv/OAGoHpgb2BMkDmAAc/sP3bPxn+lz3tPcZ+rsAQPaf9E72XvkFAbf+M/xI/dYFQQD2+CoCQwhzCcMG3gdJCEwDfwAgBIoKHgj8AGD+OgXcAGvz8/fEBIQB0feN92f+wv2N9977m/89AaUBNQJ0BD3/Q/6OBwMJeAHkAkwF2P2n/+AI4/56/ccJdwPQ+Vj8V/6l/r0ASv/FBO4CW/R69//97Pl98yz4HARw/g/7d/kX+q8D2wKk/7UJiQ7mA0kAYQLHBrkGBv8pBeAHW/yT9mH6zAGv/cP5x/50AGwBq/oB9q4AzgDD+2cC9AV+/dr2GwB1BqoEz/8+/4kIXgcy/E3/eArpB4oB0/7v/+ICQv3t9o/+sgXL+xP4EgABAIz7wvha+yUB9gDz9+D4agK5BMkCPgFwBvoKGwipA4MD2gMABYAJngLe/Sz/2wD+/cb32/jN+5r7AvdY82v2+ABZAo380vwt/ScASQFdAYgCigOZB2oGHgJzAVQBdwMgCZYJ/QMgARADOgYXAKD33PZxAlIElfPw9AH++fe78qn3ev3A/BP5rf1PCFYDf/vL/q4L7hA/BasAggVOCbEHPf1E+h8CPwmHB/8AV/v/+tcDqf+Z9c/3If9+ASf+Dv4D+kL7FQEnAPP8Yv1x/nED2wNe/WD8MgAABVQDsgLSAOwDHghLA7kB2gN9BCsEtwKW/KT5m/eG+Ib7Ffqz+wr8HPkJ+iL8vvrU+tAAKwOxAtoFxARBAasDuAeiBMEC0AZGB8oEMAL4AXgAN/0D/BD+iv9K+5v6Wf4PAN/9C/z8/ukAXAL2Bb4E8AAFAgEAAf2s/iD8K/0jAzsDtf0o/CkCJwTaAo8BHQIjAgYAjgC//3L/nP6L/+X/vvxZ/Rz/YP4M/bv+rP4n+4n8x/+v/Zv5ffs9/8gBdQEGAGYEgQbGA0UCkgGeABABBwJ+A6IFagMl/+H9lPx7+2b8hf/7AWQCWgLW/i782fyA/TX9Hv24AMEATP09/AD+ngB2/7X+QwLkBM8AHAHYBOMAvQATAzUBEv/CAJn+V/z8/dP+EgC+/lb+y//iADUALf/f/xwBGgI6AMYAjwKmAh4CBgEHAXj/cf5iAFgATACr/7v+wgD8AI7/4f0SABME7wDQ/rP+8v78/wr/zP4c/2j/ygCTA9sACv5IAN4ApwApApQDQgNrAMr62PuL/Bv4u/vOADcCZwKH/Tb75gBxAqT9N/+pBFYG2QBz+0cAiwTZ/x3+1P4hAQECv/7E/kUAZgEsANYBOQLM/yj+r/yQ/e/+0/0++pn8SADZ/6f9Iv0UAVEFywUHBA0DJQTNBDwCiv8x/XP8ef/d/7X9dP1L/n8AAQLFAnAC6wEXAh0C4QLz/3H8tP2tARcB8/zW+zD+0v7h+x37ivub/YsA0wLRBPwF4gRqA8UFmQSRAIwAcAHZ/wr8n/mk+MP3Zvle+uX6i/t4+y/8of62/qf9kAKUBa0FowMKAXQC+QJtAngB/wByA6MCT/+t/sn9jv0w/mD/yQIKBLECDAOLA/EDcwOMA74GJgeNAuX/QP/i/nr+Tvvb/QoCl/90/pX/eACWAH0A4wKoBaQC4P6P/+z/oP5v+6z75f5r/vf8OPuG/KT9IvzO/GP+k/5N/ej9XP9b/i/7jfr7+9H72Pr++e/5T/wr/qn7R/vG/b/+WP+zAJ8BhwE9Au0CPQMhBPMDDwaSB0EF+gRIBR0FnATrBA8GwgYsCfwKtwvTCdAH8wf1Bi4GOAQrA7kBtf+K/nL6S/m1+vv8bf/FAP4Aw/+6AFsALP+8/CP7hfzT/Cb75/dH97j2Y/WR833xn/MY9cbyLfOJ9nX48veN95H7Sv5M/Ov7i/wT/bT8ifqv/EEAIwEsA3AGNwkVCvUKFgyMD7kRvQ7hDm0RuRDlC8YHsgU7A1MCbf/O/S7/a/+b/xr/jv7K/0IDOQUBBr4HNQmGC0ANbwvYByAH1gfKBegAGv0R+yn4lvWv8xLzU/Wd96b4OPre+pj6w/mv+AD5z/bw9Jb0hfIJ71jrwOlb6Bzp4+pe66Tsn/Ea+P36cP5jBEYK7w1BD18QORGBEWoQJg1FCtwIZAfDBA0DIQIhApEDjAWSB5IITglUC7sMcwrtCO4IkwmpCjUJ5AZqCAgL7woZCu0JWgvaDEIL6AdMBmoFtgN6AMn9qvre+CT4/fXN8o7w2/GN8xPzX/DH7kLvUPBA76jtcO6k7xbvMO0d6wrrVut16vfrku4B8lP2VfykBJkJ2Au+ENIVChjKFqEUbhQ+FEsQgwl5BJsBkP4m+2T6Jfn09+L6iv3R/qf/agIoB4ALuAy3DN4QJxTpEgIQZQ95ENAPqA0YC+YIJQgjBy8FhQPhAowCKAN4BIQCD/8p/Y78IvoN94z0S/Lb8SLwN+4O7d/rHut16jDp9OdP6MHoBOt97Ynu+fDq9Fb4E/nF+0kBzQTJB5kK8g1iEVMQygyVCmcI/AQtAsT/9P4gAHz+fPyr/On7m/pC+lP8CwDVAQkDCgZkCHMLAQ5/DVAO1BFIFeYVYhb9F7kYABkgFhYR0A7ZDSAMpQhOBToEUAN9AZz+PvwW+TP2SPTP8FTtMetP65ns0+wd7Z7twuyb6zHqUeiw56voQekF6m/r/ey68Uv4GP2hAM4EDQlUDdgOogwMDcMNQQzACfQFDwNqAWgAZv5N/I/7ofuI/IX9uv4t/6v/dwELAgQC8QJ9BIIGswmNDOEOiRL4FdMX/BdZFpoVFxdQF/EVlxT8E0UTbg+7CfMEiwGd/2X8dfhu9VnzcPLg8G3uqOub60Ltfe2k7Vnuou/n77ruTO3q6rbpnuhH5yfnLedv6TrvnfZi/AwB7QcNEF4TShKrEmwU6BKYDvkJmAV8Aoz+WfnX9X30t/NC9Pj1dvgz+sb78P3l/kP/IwBhApMFfwgSCxgPWhXZGXUauxrUHKgdPBzOGkkZURhKF4oUGxHqDWUJygQdApz9IfiK9E7y6++o7cbsau0A7wXw2/A685z0QfSo9Mv02fTi8zTyyfAv7h3r7uiK54vmxuZW6WvvKfdK/JEB6Qe6C9EOQRDeEE8T0xL8DpYM4wkvBWgA5vzc+PL1O/VF9Qb3EPgT+Gf54fq3+vX7MP+IAvoFcAnrDqAVOxooHUghjySJJJ0iiSHfIQke7BZLERkNNAfw/sv4HvVu8rXu8OyN72Twk+4H8BPzgvTK9Nv19Pl+/cD9Kv1k/QP93vmr9G3wpewn6EDkS+E73k7cgt1v4avnLO7+9DD+Egb6CWENexJqF1Qa5xngGM8YjRVhD5MJngWJAOD67vaO87nwI++L7unuUvBl8sb2M/2oAlIH8w3CFvIddyFUI/kk9SQcIiYdCBiQFD0RNgz0BmgDggAq/RT7zvmx+GP4xvn/++b8wfwH/TT+Wf4p/GX6X/p4+hD5h/cX9yT2NvPq7zTth+lo5eniGOGr3Vra/9l03Zzj8ugm7qj2kwBuBxkMqhHFFpsahh2gHusdSRxMGLcSUg0WBn3+Hvr09730b/Go8AXyQPKa8S7zaPfF/JoBowUXC/EQuhQYF0gZmBkvGBIXZhUnE9oQBRAKEBIP7QwiChwI+wV+Arf+Zv1b/mr+L/2z/H389vqi+K71afOQ8/LziPOl88L0y/Rg80HyevC/7SvrKegr5GHfw9v52hzcst004V3opfAV9939swYAD4UVOxvJIG4kLiV8I4UgtBwOF60Q8griBeoAXP0z+7j4dPUN893yIfNg88b1WPoF//MCHAZYCNkJZgrDCuoK7gqkDMcPHBOyFEkVGBZqFfcSFg8rCx4JqAiaB3cGzAYDBwoF1QGn/pH6MfWn8Dvt8uq66ZzpwurE66HrzOs07Kzr7Ooo6oXpKejz5Z3kWOVH5+DpBu0n8dv2w/v4/jED/AibDt8SIBZKGV4bIRviGAgXPRU9EmwP9g3zDG4KrgeaBVkCy/6z+5756/hX+HH46PlP++L7UPx0/Xb+Q/8UAbsD1gY+Cu8N6RDnEi8UWxQeFJ8T0BIwEjkSxRHjD+0Mogk4BVv/MPlA83XtvegD5dDiFuLc4ezif+Vq6AjrH+6g8ZL0z/Z++PH4+/iP+UD5OPcr9vH2oPZM9W/03/Q695359fpp/bcBAwYiCRoMlQ+/EpQUqRXxFmcXpBYhFpsUzRDrDHYKXAc0AwsAUP4R/QL8CPvg+V35KfqS+5v8of3+//IClQSnBe0HbQr7C9oM1g06D6gQCxLiEsYSyBGJD84L9wYgAff65fWh8UTtuelH6N/n+Ofs6C3rNO4q8fPzN/de+sD7T/yY/Tf+Bf0b+xL5+faQ9Ofx9O6N7M/rBu2c7/PxM/Mi9Y/4gftu/RAATANFBpYJ0QyNDg8QBBMdFXsViRUcFpcV9BMDEowPTAzgCHoFJAJS/0X93/zl/c7+TP+tAG4CnANcBEYFEQbABqEHzQd+B48HVAc8BkgFnAS0A/oCUAI+AVEA5v8c/4L9Mvyp+x77TPrx+UX64vqx+5n83Pwj/LH6Hfma93v1VvJJ71Lthush6QXnDOaw5RHm3eeR6kftFfDP8rj0AvbT99r6cv6+ASIF1ghXDOAPahNlFvoY7ht2HpYe4RwZG/QYeRVKEcINpgqXBz4F5QPRAnoB4gDsAFIAqv9KAIkBEgI5AmUCQgK2AUgBEgH8ABsBVAHhAdEC4ANxBO0EOAYXB/8G8gZuByMHIwV1AvD/Vv1D+iX3N/TP8QTwGu4k7NHq1unl6MfnxeaH5jHnNuhJ6anqMeyM7W/uje5S7gXv0fAP8+r1B/oX/6IDQweoCjcOnRHBFPoXNxvHHRAfXh/QHkwd3RotGNYVnRMqEZ0OOAxwCUYGfgNUAV3/9v20/SL+eP63/mj/MQBAAL3/9f/mAJIBJQIuA8IERQb3BjYH5AerCM0IUwjeB1EH9AV5AzAAS/xX+IH0ovC57PPp+uif6CPoQeia6cnr/e1f7/fvC/G08v3yE/Eu72ruKO1K607qcOoF6zPs6u7t8p73T/05BAMLixCUFaYauR43IdsiDCRGJBAjtSDUHfwa9BdWFGAQqAxfCfkFmgKU/+389voQ+vf5Hvp/+l77Gvxv/Az9KP5N/6sA2gIbBUcGdgaTBucGEAftBrIGjwZCBj4FUgP0AIb+JPyE+dX2afRl8ofwsu4f7U3sBuz261TsFO2p7fftEe657XHsh+p46JzmNOVR5BnkwOSB5mjpf+3d8jj5bwCGCB4QuBZjHP4gVySmJu0nEyg1J4clGSO+H9obBxhiFLwQXw1VClAHJgQuAaz+x/xm+3j6u/nI+Oz3evdV9433lvia+rr8pP7NAHYD3wWCB7kI0wl8Cm4K3Am3COkGhASzAZn+RPs8+LT1XvPX8IDuyuyM60fqP+nr6Djp0+lm6uTqEetv6hvpdueD5X7jbOLi4pnkI+dk6jXuUfIh9//8uQOwCmsREhhFHhQjOyaRKHIqIytFKmwo8CXcIm8f7BtCGGkUZBAqDHAHOQJA/Vb5hfZC9KLy0PFA8anwYvBr8L/w2/ER9CH3uPqa/qICpAZgCpMNDhAHEngTBxRYE6kR6w5aC/gGEAIe/aj42vSg8Y/ucuur6KvmUeVH5MTjBeSJ5MnknOQm5I/jD+P34lXjX+Rn5oXpWu028fr0OvmL/pcEwQqkEHkWMRw3IREl8SddKmAsOi10LJsqWih8JcMhWh2dGI8T4A1JBz0AxPmC9BrwS+yT6WTofehN6ZnqauwV76byvvYV+1X/fQOAByYLLQ7KEBYT4hT/FWIW1RVEFKYRJQ6oCUkEd/6P+OHyte0d6UnlSuII4KfeXd4B31bgBOLO41vlguZK58XnKOhp6PfoRuqK7G7vDPKJ9C34Yf1bA2sJfw8cFuccnyKCJiop3iteLkUvAi6wK08pgiaUInQdIBgWE78NMgfQ//74k/Mr7xDrvuc15qjmLOgH6o/snvAt9iP8kQEEBgQKzQ3dEMcS3RO/FF8VZRVGFCUS0g96DZUKmQbqAQz9Kvgf8yjuoukE5irjl+Bj3vDcqtyF3f/exOCR4mDk6eUc5wror+ht6eDqQe3B78nxz/MC9337JAEJB0MNaRTiG0EinSZsKQ8seS5SL/ctRStnKDklDSHAGzkWERHjC7cFpv4k+DfzYu/a6/jo0eeF6FHqduxW78DzevlO/0IEWwgiDHQP5BEqE6UT2RN3E0IS9g8dDUQKVwcKBBUA6vv59z30ifAf7UHq2ee05c3jQeI14brg2uB64ZPiqOOn5Lbl0+be55bok+mc69ruF/J79Jj28/nl/nQE8wmLD/oV1hyoIq0mmSmULD0vMjDvLp8sICrwJloijxyOFr8Qmgp6A/X7hfWz8Kzsxujw5Vnlrubh6Fbrju5g8xr5bf7LAq0GiArjDRoQIxGkEegRgREhEOgNawsVCXsGPANe/3z71Pcs9Gzw7ez16XPnPOUc40rhC+Bv32jfoN8W4LTghuGV4ofjZOSJ5c3nTOs879HyHfYU+jT/3QQyCpEPhRX0G8Eh/iX3KKkrKC5GL1ouHSzOKVMnlSNEHoAYNxPuDckHwgAR+tn04fAg7WHp+ebo5nvol+oj7dXwHfYK/BoBNwVWCbINJhHiEj4THRPdEvgRMxDrDZ4LYwm6Bm0Duv9U/Bv5ofX/8X7uTetx6MHl6OI74EfeUd0n3Tfdgt1O3rvfIeHp4dri/OSt6Bnt2/Dr88T3IP0mA74Iyw1UE7UZBSDEJMwnYyoELZku9S11K2IokCVZIr0dFBi1EvINygisAkn89PYM88XvWexi6SroHOn86ubsZu9X86z4GP57AjsGSgqCDnkRehJwEkMS6xGPEDEOqwtjCdoGXgNr/8779/g09hfzEPCD7UXrHunq5rbkr+JL4cLg6OAl4ZvhbuKC4yDkROQ+5d3nlOsm73Ty7PVD+rP/nQVSCw8R7RbKHBQiUCZWKTgrEiz8K5Aq3ieUJAshAR01GAkT8A0GCecDiP53+af17vJ28BDuduxB7Ejt+O4g8Tj0PfiR/LUAYgT0B1YLKA7xD8UQ+BCgEL0PQw6nDNYKgwiJBQ8Cfv71+oP3/POc8LjtF+tz6PPlxOPp4Xjgkd8u33zfLuAC4cnha+LY4rTj3uVY6UbtN/E19Xz5i/5KBFoKdRBcFvQb/iBmJc0oKyugLAgtJizgKZUmyCLHHi0a7xSED2MKVwUhABD7rfZa86nwGu4C7EDr4es07ejuVfHY9AT5Gv3hAJwEbQi/C/4NJA+fD8sPcQ9nDuYMVQu6CY4HlAQLAan9c/pd9xf04PA47uzrlukf58Lk2uKr4RLhwOD74MrhBeNB5BblwOUI53fpsuwb8H/zGfd8+wUBPAdYDUITJhnzHhMkCSixKnYsdS0cLQIrcSdEI/IebhpIFcMPfQrYBUUBoPyB+Dr10fLa8CLv3u3J7ejunvBz8on0Y/fP+jn+QgEPBOgGYwkAC7oL9wsYDPsLQQvaCSMIdwaGBNYBe/42+0r4jfV68l3v9ewe61DpHufs5FHjduIG4pnhoOFQ4pLj8eQg5obnr+nh7LrwrfSS+KT8VwHbBqAMCxIZFwYc4SAPJQYozingKmYr2yrFKEolICHLHEkYPBPzDQYJlQRrAHj8Kfmx9gj1wfOq8vzxKPI788X0c/ZD+ID6DP2a//0BSQSFBlMIfwkRCjEKHAq+Ce8Iugc5Bk0E2gHi/pb7T/gp9VTy2e+87QjsdOrV6CXnoeWM5PDjp+OP49bjTeTm5K/l9ubb6JDr9+7V8uL2NPsRAFoF1QovEC0VjRmuHSkh2CONJWEmhCbvJWAkwyFVHoEadxZAEu0N1gkrBtICtv/9/BP72/kf+Zb4Yvim+HP5lvrZ+y79nP4yAKIB3QINBDcFMwatBpkGLgaUBbMEYgO7AQgAW/6B/E/6y/cn9bTyb/Bp7s3skeuH6mXpUeh35/zm3uYZ56nnieij6cjqKuyv7Wzvn/F29Mv3XfsJ/+QCSQfvC3cQhxQnGJUbph74ICQiYiIoIoshMCDWHbgaQRfDEx0QZgzUCKkF8wJ9AFr+sfyz+xb7vfqo+g772fvT/Mz9u/6q/5oAhgFeAlYDPwTDBM4EfgT/A1gDWQLmAD3/rP0y/Gz6T/gV9gj0DvIk8Erut+yD62nqQukp6F/n8ObR5uTmIuek54notekC653sle4X8SH0gvcG+73+uALTBrYKIg5SEWIUGxc8GbIauxtzHL0cYRxoGxYaeRiSFlUU/RHOD84N1wvUCfUHXwYiBRAELgNvAvcBuwGXAWoBMwEWAQQB5QDGAMQAAAFTAYYBUQHjAF0A6P9K/1P+OP0F/NP6bvnb9zT2ofQY83Hx2O9j7hvt7uvO6r7p6Oho6Bzo5OfG5/znm+iL6aHq+uvG7frvhPJT9Yb4FfzV/38D5gbqCbsMWA+LEUMTnBTAFbwWehfrF/oXxBdsF6sWrhW3FMQTyBKtEXYQBg92De0LTwqxCEYHHQYtBWYE2gM+A6YCTQIbAvIBAQK9AUQB6ADd/w7/gv71/Hv7Vfr7+ID3CPb59J303vTx867yq/FD8a/xv/BX8P/vW+6g7fLsjuti63nrzuri6kHrB+wr7Qvu0O+V8afz1/Z2+aP8LgAUA8AFTQhdCmYLcwxtDXEO+w6vD0MQCxDTECMROhGFEcMRtBGuEV4SThL0EawR0hDrD8IOjQ1jDBkL7wlLCEQH/gXRBDYEVgOdAvoBuQH/AEcA0f/X/pn9cPwt+4z5MfjE9ln15/Pc8gTyv/At8D/veO4h7ubtPO7f7ert/+4C75Tvh/AA8GXwu/D38LzxM/K080L1//V5+Vn8iv1nAccDnQV3B0sJawoPClILNAwPDbsMSQ6wDqkN8A9tEJcQjRFwEscRzg/xEDwQsw7OD80P5w32DMQNUQr8CVwK4QccCEQGkQYHBB0DwgPn/qcBi/9l/KD99viW+t345vcr+Mb0+Pc885Xym/KH7oXvFO3z7mXuwuuV7/nubO3Y8ObxIu/b83/z8fAX9aDyafKr86D1/PSy9jv57/kb/t785wJSA5wDvwcNB5wKdQdwC6sKYgtXDoIIKwyHDOIMHAyPD1cQjA2dEogPcg6TETcQ6QxYEeYQeAuZD5YLsQreCtAGKQYuB4sDogGdAsL7YwMs/7v95wGp+I/+tPsf+bj43/k+9uL1/fhf8Ef1qfG37p/xhfCU7vbtI/DV7gnuNe5C80XxXvXB9InzQ/p/7yz7u/gs9v8AbPYB/Gj/7PoTAxcEygFYDTUDQgZJEdACkAlPEGsHLAzBDpUHJAs9DU8MwwrSCzcOTwwpCtkN8AuXC0ULRw5TDpMHMxB/CNcDGQwOBTYDoQsC/pYAJAcv9+AFoAG0+NYF0fg3+XwDZvON+pj+0+1k9hr4V+s38wbzyuua7Zjuiuwp6PPvRvCG6Bn0HvW28kr3RvaR+iT1cPlAADjwC/01/u336gHf+aP/fQLJAw8GvgWwBHQKnguUA7MNowp1CJARegR4CQISfwOvEXcQUwUVFYcOswdVEaMPrA18E5gPFwzdDGgICw1dCMQC3AzQANv9CweL/fP9ngZwAu/9UQBB/Rb+sf29/vX/d/uO+mn3kPKy8GbzBvas7Xr0NPWf6ALurPEk7Wfx8/d79/b1+PRY+cLycfj5BDz04/s0/Fn2Sfib9az9PvvW/mcCjP+j/nb/VAWQBesF8AzICIgF/wqzBjYGNQowDjMJDAd9C5gHtQa7C5sNVgcMEVELXAP6DvsFCQcJDXMEEAmQBKL+0AFo/cIB1/+uAjQAcvshAfz63f4AA5r+o/6I/Fv+mvzk95b7gfs/+tn34PYY8xvx3PRI8FX1nPaq8bb12Pf09lD4p/69/C/8Uf47+8/+mv4D/ikET/3h/XsDWfzw/1UBPv2eATQDXP85BJwE6AZcCTsFiwjZBOoGoAsOBTkI2Ql/A58JaAeLBBcLXAjVCZEHZwNuBxwEDgD1B84DngHBBkAAFv/D/qIB7gTe/7oAnADq+5z/YQE9/ecAEgI1/oz+fvqn+b36MvgC/BX3TPRR9xDxcfEu83j0dvYC94z2PPLZ9dD3x/hT/On6p/7E+5b6rAAB/NYAcAZVAZcDmAIFAoACbwIqBgoFSwV7BQsEFATbAgoGgQVjBTwHQwTRBccEoAQkBLIE1gWdAtID/AAdA2QCLQGkBg4D0QXSBNoDJAeEARkGKQVJA7IE0wFZAmH/ogRDAUP+pgLL/YH8LPvh+Ab4gPe1+Jj2evc6+Ov4nPlo+M37PvxK/B4BWwFVAg4FMQXnBPkD2wNYBS4FXQbDBNcCsgWmAugAtwAx/6b+ff5E/CT6Nvue+HH5Lvg3+KL4//V49gf2lfW291H4O/ST9ZHzBvF18gPy6/L78l71XvND84f3Jflu/TkB2AaqC5kPEBeoGuIe3SWiJfUldya7IJMglByCFPESLQxZBJb+ZvUX8M3r9ud26B7nuOf+6UHrOO918yf5kf+KBywO1g8uE4QV5hYbGvIWvhazFo4QWAw9Bir/b/rA9obvVeoH5jXgud8O3ADchd1p3Gjc29ke2VjXt9ak2XzeBekR8zH7SwYzDlcSfxk6I2YqojDZOe88NDc2M9As8yLnHSgWVg2WBjL9AvMw6jPilt5g3sbfb9/33Tvjc+ml7oD3fQIlCdAPqxUqGDIb1R5xIhAmoCcOJUwgdhxmGGkRewsiB8wBL/mo9BvrguPJ4RPf893H3Hfcwdqm23vdEN7q4Hjkt+R+5KDiid9t3UTjquzy8mb6FQBnBwUNuRKNGvwgUixHNpg11TKrLHQiAB5xFsQOrQqpAuT6X/Bx5YrhNd7G3ffileEc4h7nZ+w79mwBMgwWF2IgeyZ7KXosjDDkMTQzbTKbLQom2x1mFVoOCAqHAlr8PfVT7Cvkit7g3A7cD93h3N3bbNlJ2TTcW9xP37PhdeEf4b7dINwu3JripvDo9nL62wNhC1gQqxhKH1YkmjCNOCA0mi48Ju4cJBbrCuYDX/0d82HqgeD41SbRsNZu3KfdPeCq5mftSvVcAHINxxtKJpsrLi7vLigwnjK1NFMzji9OK1AjJxoIE8gM4AjMBG7/GPhv7gzn1uLE3rLfCOKO4MzfWN163D/d190M4gHjWuIm4yTfKN0h3xPjGu6O+8sCHAUzDjYW4Rg0ISwqvzACNa8yliiJGhgObAiU/jbzOeyK4HDXGdD4xzTGMMyF1hbfweKi7LX4gAQgFCch6inUMCQ4XTouOlY5ijklOpQ0dyymJN4clxeZELMIcAS8/Qf4fvIY6rzlqeVO5/Hlg+I74n7j8eQp5jTn0ubX53TnUuQF4+3fxN6e35/j8PFH/P7/KAjQDikXUR+TJoYwXTWuNrMxtyM/FL4IlwDe9BzoNd1B0sHJnMFqvN29JMVm0Zfbc+I676H+nQwEGzMnojBaN7Q7tTtgN+80uzSmMTYqSCDvFlEQCwyzBlYBzf5x/WH6JfVx8B3vdfFu8sPwXu6t7ebtwuyc6Bzm/eag5XrjxN/u3KPbENwC3aneaer0/sINexKlGmUmvi4bN1FA5UQlRG4++TGdHZALpQCS85rkrNVeyAy+f7eXtbS15rvByvvWe94T6/T76gykHJ8p2zPRN+44ADoyNeov4iwlJz0faxOwCRkE5f///9z+O/9PAH3/Dv62+ZH5ufwX/iv+Gvte9k7zGvAQ7LbnKeSV5QTgK9my10HSrNCF0pTWq+GP8TYG4xi4HpIk1zPNPdFBoUjfSUtDBTpDKGgTGQDG8EHjxNEywjm4SLH6rX2wlrbwwtnRPt8g7lD+kRBxIbAwuzvpP6M9mjthN9cueCh1ITcYhA1jBGv9p/ig+JP7LP1o/2ABTQAg/e380f/RAGcBhP/S+yD39PDR7FrpPOes5ZTdTdck18XXDtfA1HLYx91f5TT9DhTVHqUqFTe1P3VCD0d+Tg9MfkMnNyQgIgfz8WvhlNFZwe+4bLRqr+KugLNWvHbMed7r7tb/xBBCITovnzpMQKg/9z2EOXEuCyJyGU0T3QhC/b/2avQy9fD1SPf5+an9+wIABJEApgB1A6UD1wCf/cf5OPei9iPw7uNF3knc2NkS2DXUQNIX0KzSSdil2evocAWYG2EnSC/HODRB40r+UYNRRE0CRIozxBpYAL/qu9rbzSy//7LJrP2rArCytorBMdOa5335AAkzGaMoTTh1RVFINUO6Ox81kCwBHzgSWwiwAiP86PEV7A/rne/596r8x//NAiUFswYXBPEB6gF1BAwGggCw9pTsM+c14lzbf9cM09DOHdFM0FrLpc932V7hG+xgBQYfsSq/NXBAvEatSwlRIFTnTbJC/zMiHIEBcOos2JvHiblzsJWrMauur4W3PMNQ1h/snQBuFCMlhzSxQABJa0uIRQs9HTN0Jm4Y1AoaAQH70PPn7ZzpCunc7djzXfqCANoG/gvvDdwLWgtQC5kHFALJ+8X0QuzK5RPdHNT4z3rMI8jKxtDMatPU1fHbweVJ7dz5qRJRKQE0ID+gSk1OLk3cSstFHjv0LRAcRwRJ64HWO8bAuEOvvqpbrOayjbwDySbaz++OB8kbpCxFOnRDAEqlTRtLJEAwM9MnvRizBsP4WPHU7evqyOjA5wDqkvGm/CYHkw4VFN8XfBeEE5sPpgvnBlkBavlB71/kw9pP1JPPe836zrHQLdIK1jjZjNpj4IznUO0c+tQT2SrXMwI6hUFIRvREbUL8PWkzziXME7j8JuS/0TXFWrqtszexFLQ5vp/J2NRN57D9EBFPIh8x4TrZQJtFn0aUP8sx4SWlG6wMGf2S88vxYfJR8Srvpe9V9MD9VgnoD9ESQBbOFjkTdQx0BocE5v/495XuEueS4RTcsNnv1Q3SmNLO1EvYSd2b4mTmredX7HPx2faACTAhvy9ENSc74UKxQK04PDJeJo4YCQz8+crjIdKwxtS8sbbOtu27zsVm0nPf1u2eAfcWoye+NDs/aUToQ0pCoTzvLnwgCBVHCLT7V/Ig7wrwFPHm8xH3W/v5AbALBBUBGE4X4RSyELkKUQTeAND8KPWO7jPpU+Ol3jHdvN5c3/7eVeCK4fPgy+FJ5gfqtevs8Bz2L/hWAtkWtyT3Jqwu7ThhNsEtsSViG2EPbQSG9qDiptEFyZDDmb9uwAvHNNIq3//sRftiC/wc7Su3Nso96z/JPl08VDRBJjAXlwvOArT45/BC8InzZfae+O/65/5TBWQMjBFhE+MQPAxsB9ABK/2T+yH7IvmQ9XTwjev/6DHo0OdH5/nl+eOL5I/kv+EO4vDk5OnC8LvzTfRg/S0TDSiALQItRjHqLx8nFh8UFYQG+/k977jdPc1oxt/Ec8TtxvHQtd6k7Eb7RAr7GocrrDWpO3BAnD9BOQkyuikkG6QLj//59nbwKO+U8zT53fwW/sUA5QIvBo8MKhKsEtIQMgqYARD7Evl9+0n75/mr9if0sPJS7g7r7Okz6drq8OlS5mzlAOTk4pfkW+Xa6JXyhPUS8+EBcRiuIg0o6S2sLBciaBhaEpoHoPqH8K7mQ9iFy2/HbMZ+ycPTE+PZ8z0CTw93HawrFjV1OoI9bzrXMR8oRx7FEqkGR/6K+fv0Q/J49l3+/gLsBfwJFAxnDMwOhxFCD0MJtQUFAwD+ovhl9tD2XvYA9rv3qPfT9H3zd/JK7jnqruuQ77rua+pW6DnnJ+Ss4m/louev6rz38wrRGOEecCGEIzoiKxxnFeQOKQf4/b3wuuEB1pfPRs9P02/an+TV8u0Azgw9Gd4l9S8vMT0tmSqIJLYcJBgdEooI/gFhAMP9s/nA+okCTQe5BRcE0gTIBUcFzga/CcEJvgrECocHXANKACMBa/81/BH8PfoF9szuw+jX5/Pl5+Op46XjwuOG4lbjX+XH5Rbn9unz7DXt9+9+AA4VIx5HIR4irx5gF+IOTwoaBpQBXP1X9cDpnOL94l7mF+2U9pwAaQmkEKcSCBT1GgohlCImIIMbSRXODiUL2wikBkAJiQ3TDFwI1AS2BO4DdQCy/20BygAL/yP/UAHMAiIFZAm/C70M0wytClYGxv9v+zv5ZPT37j/pq+NF4O3b9tY41wLc3t8Q4svjceQL5GHmter/63XuwvxzEgEdBxuHGYwZSBUyDeoJGg3TDwgPQQkz/6jzeO5F7unsHvDD9on9FACc/ZX+6AP2C2ES9BeLHsYgOx8OHU0aKhciFR0UoQ+iBYv90/g885LtNuxK8Vj3Gf0yBJYLKBIPGDkbEx2FHWYanhSkDMQBSPfQ7/TpDeVx3j/Y5NNW0hLRQtBM1RTfneYO6k3qKumA6WfrdO7u7VLvcf68ED8W2hbIGocgkiQcJQ8k7SJ0ITcawQsl/KTuI+Ze4x7hVuFp50nwXfdK/OEDYA5hGlMjBimgLU8v2Sz6Jfoc1xLGCUkCR/ng7l3m2eKi4cvioOpR938EGhEhGxAhZyTiJS4mFyRSH2sYBxAZBVT3jOuo4abZVtSTzyDLzcabxC7HDczM0RbaCORn7D3yNPU+88TwL/DI8OXybPhuAnUJAAyGEksdcyV6KQMs+SsRJrMb5g3jASz4Ve9n6jPm2OOL5jHtQPV+/QQKsBjgIvYmjSbeIwggYBtFFwwUDhDpCsIDpPrb8Xntp+168J/2Mv+fB/MNcBToGw8ieSfULLUuwSmiH0cUywf/+f3upud64PDYxtJQzKrEor7Wvb3CKsow0xrd5OVL61nuCPB18KDylPUR+Ab6kPog/dEE4g6tFtYa1B51Ijoioh3IGH4WjxMdDsIG8v49+Djypezj6s3vXvikAOkH+A1TEzcV4BTcFLAULhT/EusSpBMKEt0MFAisBBgCRAHXAlkGNAioCeIMIxCFEQITtBYYGqIaUhhXFDsPaQiwALL5xvHY6PDgtNiUzyjIYcTgxXDLVtFd1kDb09+j41vmUOlv7uf0ifnM+1/8Mfz8/OX9x/5AAesF8gplDs0RbBd2HcUhNCNgIHsaWhRuDJgDi/7j+6v5vPga+UX5j/j/92X5mP7qBWEMnxJmGBIc5xx3HJIdgCCvIg8hfh2JGWETOAyjBa4Abv8WAOMAQwL8AosFcAsdEG8RwBJRFLYRDAmz+xjuOeIL2PrQGc59zGPKZ8t+zfvNhM/Q1ifj+u6V9N31TfdG+G/3ePbD9ib5S/37/uD7hPSv8MH04fzjBTYRkB/5K7sx/y+OKmwmkSLfGZAOQQSf+sPwD+fF4U3hGOUG7fn2Sf/GB1oTqB4gJ1YtRzLrNdc1uC9CJhYdOxW4DZgF2vx19/X1EvR78hP0KPkuAKUHEQ4DE0gVIhTKDlcFdPmO7YPkSN3T1vLR6NBW0aHQLdEW0xvXRt315GDuYPbj+qX8+vwD+lr0ye857V/s9euk7Cvu0u8Z86X6fgaJFN8jYjJnPDs/sDubM9QoXh1WERYGufuQ8ETmsN2q1+DXEt/a7C3+vw6hHaAqDTOHN6M6Hj2LPb86BDV7K+sdZw6C/znykOj6423kc+c87KvynflgAPYGIg4DEpcR1w2zByoAl/Wd65njGdwK1YTP/cxKzH/OatNz2njiveoy8h/32/cJ90/29vLu7VPqD+op6sXnJOZN5pflaOV87FL9BRJdJRE2f0OzSGZFZD2SMeYj/hWmCUoAlfcI7cDj/92a3AXgVeh79JUE4RXbI9QuwTatO3M+uD09Ocoy/CpYIGcSlgSE+NPvwOof6K/n5+jq7Kzyovdw/GIEfw1gEvAQqwrJASL3iuw046/b6NaE1S3WRdcw2f/bC+Bt5CfocOzw73/xpPHd79LsYeqh6Fjnxebl5gvnaecu6RbrOe/X+DIIQxkhKU44TURpSE9EyTw6MuMkpRf7CZj9HPRy66Xjtd4U3pDineqz9DsC9xOpJc8ytzuGQctCoT8SOAUvMiUEGPUJY/0H8/7r9Oen5Bjjl+PJ5mTtcfN7+SoCJAvtDmwMNgYy/+L2sOwj5FjfkdzZ2oDbB92n3SXdZd514eTjX+Vj6Nrr6+357hju9Oyx6kzoSefN5bLlXuct6Vfr0+3r9M8ChBUEJpAydz4iRaZEuT7RNegsliLiFbcJ0P/A9Knqj+QZ4dXgz+Rf77P/vg+VHRorwTYPPg9A9jyUN7Iw4yb1GlEOVgLW92fv5+fL4S3fVeCT5F3pW++f96T/9gPBBM0D0QCx/G73tfEP7WPqdOjL5Z/io+Av4Abf390u3srf6eE14/TkB+dz54Tn0+dj58vmhucU6v/rle1Q8WL1jPnh/uQHCxR0ILosLDYAPOU9IDuFNbcuVSbBHH4TRQo3AED2OO8c67nogemj72P6NgZSElMe9CeJLmAyFDIJLv8o5iJWGr4P6gSN+0by6ek75FDh8uAJ40znrewC85L4Jvz6/fj9yPwJ+v31uPLC72js1ulo6OLm6eTO437jaOJ64H7foN+/32PgT+E74nziCeRY5wPqDOx87inySfS/9uz8XgQ4CnYOsRSaHAMj8SdZLTUyoDR+NUo0tjBFK08kcxw8EwcKbQLd+1j2G/L58Ozzu/mZAbkKfxSdHcIjACb3JHMhXBycFnMQhQmZAgr9XvjR803vCOyW687s7+3Q77vzPfcS+F/3ZfZb9MzwKO2q6r3p1ulT6pzqcuq26rfq9+iX5orkiOJ84FvedNzt2tXaa9wv4BDmr+yC8h73IPyMAEYDXwZTCuwMEg16DiMS+BWXGbAdOyPRKN4sbi/zMDgxyC/mK+wkZRyyE+sJq/9w9yPzjfJk9bP6tACgB9UObxR6F48YPRhbFlETGA+0CW4EYf+x+Rj0a/Dp7lnuW+5y8BHzKPRx9J/0OvQ182DytvEp8TXwuu+k75XuB+2r60Tq3eiX5zHmCOSY4Fjd59rB2UfaNd485Wft+vSk+hP/+wIhBgcI3gnsDPkOWQ6ODVQPRBFNEmYVWBoEHyEj2ifZLG4wPTKaMZQtoCbHHv0VUAsNASP55vOq8dXyAveq/PACCAm/DRkQzRAWELQNngr6BtcC9/7K+5z4tPXs87rzxvTe9Yj22PbU9g72A/Ue84Hw+u1T65LoReaY5BPkq+RM5eXlYedD6SDqnelO6GzmZuTD4mbi8+P+52/t1fLv98X8KgHXBJIHiAn+C+EPohIdEzkTGRR+FFEUYhWzFxMaFx1CIb0lninmLKAukC3WKQwkSBxIEtkH/v4j+dT1JvVL9zX7p//eA2sHnglCCssJCgnQB0AFlgE2/v/6dffs8wHyv/ED8hnzdvUk+I36n/zS/WX9Hvsb90DyVu0X6e3l8ePU4l/iB+Kp4VLh2uBA4IrgheJD5j/rkvD99RX7Ef81AVUB7f/a/UX8iPtz/HL/nQMIB4YJpwynEBMU6RYRGuMclh4BIPohRySOJqYo2ymLKecngiTkHtEXChEbC84FbAEz/hT8ovqx+Tj5UvnH+Vn6TvuS/E39Xv0x/cz82/sa+zj7yvtW/MT8c/w0+775gfjl9gv12/NA83TyWfHU8OTw4/CA8H7vCO5t7Lvqe+js5Z3jI+K94aniYeUo6qPw9veo/lEDOwW1BOQCVAGSAC0BlAMQB/IJ1Au1DYQQvBPvFooaxh0rINkhzSIOI6cidSFWH88cIhr4FiUTGA+xCw0JMwdcBhAGuwXZBF8DLwFb/sz6Efce9EPyePGW8XPyp/P89OH2sflc/S0B3ANsBOoCHwDm/HD5MfZX88bw7u6y7dvsr+xd7Zbuy++T8Ojw/fCC8DzvLe2U6xTrbOtT7FHuDPL09iz81ABEBKgGQwjyCJ4IXAh4CCYI+QbmBcwFwAavCFILQQ5nEZsUthfKGqIdzR/zINsgax/FHMUYlhMyDtUJoAYzBMgCJALgAfgBSgIgAsoAVv6A+8z4VPbx86Xx6O9N78DvM/Gs8wz3g/pB/e7+0f9KABUADf9x/XL7FvlX9p3zWPG2763uRu6B7j/vX/DY8VDzgfRL9fP1RvY+9h32NPao9tD3UvmD+kX71/s+/OL8QP7DAH8DWwXYBewFswbZB70IhAnhCpEMqw6REacUjBe3Glsd/B6xH5Qf4B13GskV8RB5DEMIhAQpAWP+W/wf+xn60/hm92H28fWw9cL1Y/Y695n3X/dr9yP4Bvlf+V75lvku+tr6iPvi+wb8IPya+yD6VPiw9uP0CPPh8djxbfIi86rz3fMG9Jn0p/X79lb4bvkh+kz6NPrY+ST5hvih+G357PoJ/S//fADxAOsAPwFnAtYDKAVNBuQHrQkiC7oM/g5kEUET/hS5FvcXmxhUGBkXYhWrE/ERfA+HDMgJKgcjBNcAo/2Z+vP3+PUP9QP1evV59rv32vi0+aL6UfsY+9n5Gvh99nL1X/Ug9jj3SPiS+Qj7DPyB/Jf8jvwy/H77P/rP+GT3OvZ59Wr1cfZ0+Or6SP1t/2MBCQO8A+ECxwA3/uv7LvoN+Wb4R/iy+Hf5uPqc/Oj+SwFxAzIFwwaNCHIKKgybDbwOdw8GENkQkBGyEWQRuxCjD1wODg1qC5sJ7QceBskDVAE2/0b9ufvo+rP63fpg+xD8A/x++2f7e/vk+s75CfnT+Lj4m/i7+Ov4CPkx+Xj5CPr3+sT72fty+9j6Cvrb+J/3/fbQ9n/2Tvab9pL37fhe+vH73f3O//cACAFcAHX/Y/4A/Y/7ZPqI+dj43/hN+jn9pwCQAysG5wihCxEOExDXESoTVxNfEhURTRD7D3UPpQ74DYwNxAxICw4JnAYYBFoBlP44/JH6b/nN+Lr4YfnR+tL85f6dAN4BbgKIAnICFgJRATsAF/+8/Rr8f/ok+WD4P/ha+Fb4nPhI+fr5dvqn+kz6IPkv9+r0uPIu8U7w8O8O8OHwYfIk9Af2F/hj+rv80P5oAGMBwQGRATUBMAHyAXQDRgXiBksITgqMDOsOChGQEm8TvxMEE4cR3Q9FDoYMWwo/CKcGsQUVBXkEwwMbA2ECHgFG/1/9d/ua+bf3UvbR9UH2c/cy+Ub7ff2q/5ABVAP2BEMG3gZ/BhYFtALR/9X8FPrI99T1JfTV8vbxY/Eu8WPx9fHE8sfzufQh9Rr1y/RP9NfzovPJ84H0wvU/99T4svot/QAA9ALbBbIIkQslDuEPshAVEWoR4RGMEksTERSyFOUUVBQqE+cRpRA4D5INbgtzCOIE+gAw/R36Fvj29on2qPb39kT3j/cA+O74Pfqr+wH9KP5D/zEAFAHUAXMC3gIgAy8D9QKoAg8C3gAA/8n8ffpl+Hz2wPTy8hnxjO9r7sHt1+2g7qzvgPDp8AXxKPGn8WLyK/PG8zv03/Tw9aT3X/oY/n0CRAftCw8QexNTFsMYhRpSGykbChoiGMQVGBNUENgNxQv2CUAIwQZOBbID4wHr//L9EvxK+rD4ePe09mj2rfaM9+L4fPpJ/Dz+OwBIAloEXgZMCNEJpwrSClQKQgmrB6EFPwN8AH/9ePq293X1DPRl8xvzwvIy8onxEPEh8aXxS/Lt8lLzK/Ob8vHxg/GJ8QLyo/Ig86Dze/Te9fz33Ppx/owCtwaxCmUO6BFWFYoYOhsBHcwdxR3SHNsaIxi3FLsQogzACBYFvQHx/pr8k/rr+LP3A/f09mH35vdC+Jj48vhH+Y75JPoV+0z81v2q/64BwgPBBVkHcAgcCY0JkQkXCScImwaZBDYCk//X/Dv61Pfc9XL0ivMS8+/y/vIp827z/POZ9C/1k/Wb9SP1VPRL8x3y4vD776rvF/Bz8ZXzQPY4+X/8DADhAy0IgQxTEOsT5haRGZIb/xzYHSMerx1XHBQa1hYpEy0PAgvFBscC+/53+3H4CPZX9GjzL/N98w70t/R29Yr27fdJ+Xr6qPvj/B7+Vf+fABMCkQPUBKgFBgY8BmYGhAaoBrsGiAbZBcAEGwMpASn/Wv20+xn6gfj19qT1hPSj8xXz7vIz87nzKPRM9B70pPMK82TyyfFg8RrxF/Gu8SXzmfWz+AX8Pv+KAuwFPgmCDKkPrxJ4FbMXWxmpGsUbgxx+HFEbChntFS0SAg62CbIFEAKX/kz7Xvj19Sr0+/I/8ufx/PFQ8rPyMPPM86j00/V293358Pu5/p8BQQRGBroHpAguCXAJiwlvCSMJdQgyB4gFyANEAvEAtv+T/nf9Wfwl+935mfiU99P2L/bE9XX1MfXn9J30NfS/817z9vKa8lHyCfLX8TPydvNY9XL3mvkr/FH/3wJ2BuEJew0aERMUMxbGF0cZnhpvGzgbAhoqGMIVphICD2cLAgjABGIByP1E+i33xfTu8oTxlfAq8CnwbPDn8MHxL/Mv9YT37fli/M7+BgHgAmgEuwXSBpMH/QcgCOsHRAdFBgQFsQNiAhoB/P9G/+n+k/4Z/oz9Df2g/C38qvs7+/n60/p/+t/5FPkk+Cj3Q/aN9SD1JfVf9V31F/XX9Bz19vUv93P4vvmA+9n9dAAGA7EFowjlCxMPthEGFE0WbBi1GdoZ/hiDF5oVLxMnELsMUwnnBWYC6v7B+zb5P/eq9SH0rvKl8Qfx1PD78H3xXfKs81H1Hvca+XT7Iv7JAPwCoQTGBa4GVAezB8IHnAdGB7gG0gWWBD4DMgKFASwB+wDUAIgA8P8S/w7+AP1S/N/7Nvuj+qf5lvh392n2k/UX9fX0CPUj9fv0sfSD9LH0LPXa9az2tPcR+df6If3m/ycD0waGCuwN2RBQE3cVLBckGCQYKBeAFWkTAxF8DtsLHwk9BisD9//Y/Cv6Ifib9mf1V/Ri84Xyw/Em8fzwl/EE8xD1bvfU+R/8Uv5NAP4BgwMEBWEGbwcKCD0IKwjoB4UHCAdnBq0F9gQ7BI4D6wJAAo0B0ADw//P+2f3F/Nf7G/tn+q/5EPl7+PT3i/cp97f2R/bD9R71dfQC9PrzVPTb9Gn1IPYw97H4ofoc/TIAugNGB3UKGQ1cD0YR0RLRE0oUMRSNE1oSrhC0DqAMbAoICIgFBQOoAHr+fvyt+vb4Z/cH9sb0sfPp8ojygvL+8gf0Y/UI9734fvpM/B7+yf9XAdkCPwRxBVAG2wYwB2QHfwdqBycHwQY1BmoFbgRKAyECCQH///j+5f3d/OH7Afs/+p75JvnS+Ij4KvjP94P3SvcG96P2Kva/9Wv1RfVA9V31rPVS9nT3A/n6+lr9BwDbAp8FNwijCuYM6Q6TEL8RcBK7EpcS/REAEcsPbQ7hDDMLZwl3B2cFTAMqAQH/3vzg+h/5lfc49hP1PPTF87PzCfS99LL10PYB+ED5dvqs+/T8Uf6w//YAHAIjAwMEtwRJBakF4AX4BeYFpgVGBcQEFgRMA2QCawFgAEj/Ov5C/W38u/sj+6H6JPqn+R75mvgg+J/3GfeI9gD2dPUS9dn02fQK9Yn1cva191j5R/t7/c7/HAJgBJcGvAi+CqYMVw60D80QlRECEhMS3BFEET4Q1Q4hDTcLMAkZBw4FCAMTAUL/kv3y+2f6H/kT+CD3d/bt9WD1M/X/9Bz1W/Xt9bT2m/eX+LH59vo5/KT9Ff+GAN0BEwMNBNEEZgXGBQUGJgYwBiQG9wWeBQ4FPQQ8Ax0C+wDc/8b+zf3l/P77FPsu+ln5oPgW+LH3ZvcZ9+L2tfaV9o32r/Yc99P30vgP+o77Qv0O//IA4ALXBMgGpQhcCuALJw0kDtIOMw9TDy4PvA79DQoN2QuDCioJ0QeHBkQF/wOuAk8B3/9m/vL8kPtL+iD5GPg294f2FPbi9fT1QPa99lr3E/jZ+Kr5kPqI+438oP2w/rD/ngB9AVICGwPaA3YE6wQlBRoF3AReBLgD7QIYAioBQQBj/47+0f0p/Z/8Ifyn+yL7jvr6+V754viF+FL4Rvhd+Kb4HvnO+bX63/s6/bD+OADFAU8DyAQ3Bo8H1gjyCc0KYwu6C+EL4Au1C2ML9AphCqoJ0wjoB/EG8AXoBNMDpwJzATkAF/8B/gD9Jvx5+/X6f/oa+sf5kflr+Vv5bfmT+dP5JPqD+vD6Zfvv+5T8QP32/bH+a/8cAMAAXAHpAWQCwQL4AvUCwQJfAuQBYAHRAFAA1f9N/77+HP52/cL8GPx7++j6Zfr1+Z35YflC+UT5efne+W/6Ovsz/Fb9kP7m/08BrAL6AyEFIwb+BqoHGwhbCH8IfQhgCBIIpQcnB5QG+wVUBbMEGwSAA+MCSAKdAe8ARgCt/yz/vv5w/jX+BP7Z/a79jP1c/Sb98fyy/Ib8WPw7/Cj8L/w+/Gb8pvzy/Fv9xf0z/qH+/v5P/53/0//7/w4ACwD2/9L/qP9+/1T/MP8U//T+y/6q/oP+XP5G/kD+Tf5s/pn+z/4O/1b/mf/Y/xUAXwCdAOkAMgF3Ac8BDQJFAnICnQKsAroCwALJAtUC0wLdAtACtQKAAkEC8wGhAUwBAgHEAI4AXwA7ABMA5/+z/3z/Ov/x/q7+fP5Q/jz+M/45/j/+Of4t/iP+Cf7n/c39yf3N/cT9v/2//cz90v3d/QL+If5S/pb+4f4p/2P/oP/R/+j/7v/9/wIABAALAA0ABQD8//r/9//0//L/AwAKAEUAvwBwATkCagMkBBsEpAQdBvMHTwlCCToIDwdlBaIDyQINA+0CHwN+AsoAAf9X/VT8Ifvj+qv6Sfpo+jj7i/tS+xX8Y/xw/Gn8Pvws/Lr7YfyB/bn84Pw3/mv+av0G/5QCUwNqA3cEQwSpA6sE7wXNB+gI6wj/CFsGQwRmApP+qP3x++f5Ufpg+Qr3n/Tk89Xy7/G88xD2Yvk7/f39WP6c/5YCWQUaBicJYArECHwJUggrBpgGHwVUA/4BJQCK/x7+x/zK/Jv8SvzQ+6P7WfuV+w3+YP/N/qX/JQH+AT4CNwJKAo0CewOsAzsDFALjAWYCAgJdAbEA6wARAOr+4P4+/xL/0/+PADP/av6H/yEAWv9xAMoBZAEZAcwB0wCb/7X/Av9h/fP8pvxV+4D6tvnI+L73Xfey94H3gPd++Bn5Lfq5+0f96v7M/+QBGgYACDUJOgsAD/ERLBGFECANSws9CecFiwId/tP8t/7g/vH7lvxX/H/7WvzY/NP+6f7dARYDDAFhA2YE7gVCB4MHsggTBzUIlge7AOb8h/oI+C/0n+9O7vvsRuzg7JLtDewR76n0h/Yb+Or6mQFuBpUJNw6aD48RpRSXE5USkBHqEUYR1AtMCK8E1QBu/Y34HfT98HfxWPIe8avyHfb++C/7B/33/iL+v/2h/ez5wfax9RDzffBi8FrxwfS7+HH+UwhREdIYZh3yIIwlAyZYI5UeiRmEFH8NgQU4/vP3ZvPb7g7qP+hf55Pnnuhp6j/u7fI2+Ir7IP08/0UBLgNsBJ4FSQc4CIIHqgToABb+dfvc+Yn56PoG/pwBJAVaB4IJSQtMDGINlg76DQ4MDgnNBAb/qvkV9UvxDPDj79nwB/LH9Cj33Piz++7+rwJEBo8Knw2VD6kQPRBZDikKIwYKAhP+evqL9ofzXfCd7hTukOy57aLwFvUm+8T/BgSIBpYIrgnjByoHLQaIBZoFSQNuAcIAAQJCBN8EVgXeBtEITwmIBBv9w/fL9JvzufIT83X0PPi++vX7Kf54ARIGdArfD0wS7xG6EGUM+wbp/3X43vUJ9LLyKfPL9Cf5Xf21AiQH/ApvD9gRVRMUEkwP+Qy/CR4EQ/3v9pPxHu8P7Y7sj+4y8mP4X/zV/sT/Wv/b/wT/k/30+6f4BPP/7JDnV+P65MrtFfWU/d0IbA9MFL8aJx/CHqQfuiF2HmQUfQqRAE30leuH6M7nr+jG7qf2Rfxr/x0CkAS3BXUJlwuoCqoJzQe3BbQBof0J+XP25fZo9tv2evkm/vwCZQeOCnsM7w7ID9sOkA0eDDMLywk5B1ED5v7U+pT11/Fv8LfwP/NL9y37Af7mAFEDKwQlA7kDJAUoBkoGMwYVBXEBuP56/P35mPdf92f5KfoE+p75LPga92v2FfVc9TP4Yvw+AMsC6QOwA1ED/AE//7z9Rf6w/+v/WP/L/rD8xPrP+ab57fqk/QUC0gSUBNIDqAMoBF4EWQXhBvYHJwg4BzoFigHD/rP+7P7W/kcA/AG4Al0DvQNhAs4BWQOTBWQGxQVCBbYDggG9ALD/2/4SAIIAZQAkAK3/8/7t/Z3+cP/d/30AwQD2/7n86viP9YnywvGz84P25Pjh+oD8kfx+/Pn9Yf95AWEFkQltDP8LVAnWBBz/D/om9TbzofR/+hADfwdWCbIJ5wkkC+oK8QnaCIIJsAoOCZEElv5X+dT07fBW7YvqH+zn8fv3LP0tA98KyhC5E1UUqhG/DQILaghdA8X8AffI8RPteOqe6g7vuPgKBQkPzRR9FwQWJRAvB+f9BvdW8wbySvBA7Z3ra+xb7jXwwfLu9yP/6AbFDAAQexCcDkAKEAOX+xL3+Pbd+Pr73P7RADMDWAVpBd0E7AS9BTAHJQaUAmj9WPfG8pDwG/BO9Cr8uANDChoO0g9nEOQPEw4hCmQGvgNrAdP+cvv8+Jv3Q/g5+zr+vgI9CvkQQhPHECELRwR2/W/43vXg9aj3I/uy/QH9j/yc/Bn9vv6jAVgGtwlXCxULdQaPAEn7Cffs9ILzbPRZ9z/5tPrQ+pH5b/m0+nL8k/0J//oA5AA1/4b8V/gF9XvzLPOI84T1CPiK+c/6M/xQ/c79+gDQB4IPtBYaG/obthmSFJsOCQcXAEf7p/i0+Ub7jP0E/6n+0v6D/qz+uP+kAPICRwb3CIoJtAZJBDgCNAAq/1b+8P4K/+n9Hvx++dz2wfUX9u729fhN/BcAzAKXA5MECAYNB1wHMQb1BEMEpgIFAPf7VffR9Jr0z/RL9oj5ev0pACUArv++/kr/3gA3AQ8Bxf/M/2n+Xfrj9mf1gfiX/Ff+7P4//xMBPQLQAIb+nP4bAtUFJgkyDMwN2A9sEoMSRxBdDdAK0QUEAMf8mflD+Mf4RvrO/G3+YP8nAOkAGwNdBacH5AnfCqUKPQjdA3z+Lvly9MHwzO5j7/Dxo/X5+Nf6//yX/0ACQQV8BxEKrAsbCtYFiv8S+cXz/u9U7iPvUPGp9FL4Yvuk/Aj9BP77/jwAdwK4BHwFPAWiA6QAQPx19yH0JfJe8mz0pPed+7b+7v8AARgF9QuOEt4X8xsrHYwbUhdNDxkFif1p+lP4Vvf8+EX77fwW/zkBTgIWBG4HpwrCC5YKhAjYBcABg/0g+tf3cffK+GD6SvvB+4r7U/vf+7X8f/3w/lMAqgDY/zP/u/5o/Sb8wPu1+7z7Pvzr/FT9ev04/uv+ov5m/jD/uv/q/qv9YfyF+p74kvf89hL35veK+TL88v61AScEEwXdBIkELwPL/kD6ZvmD+1T+7AE0BVIHLgoADTENrApVCWQJOQgmB0MGpAT5A54E7gQbBI0CbAJyA+wCPQE6AHX/z/4I/x//ff5z/e38e/yu+s/4lfiX+N34U/rZ+yr+CAF5AzIErgLRAID+EvxZ+jL5A/nb+Rj8zv2r/cn8dful+hP73PzR/4MDUge+CXkJCgeVA+r/dfwC+eb1dPP68gH0b/Qm9fH2X/kh/Ln+MgFBA9YD7AP/AikBzAFBBgwMrg/uESoTqxJ/ELcNFwoXBkgEBQMxAGD8JPk19+T2UvkO/sYDPAriD+kSxxEcDhQJdQKz/Bb4fPUH9GTz2PM98y7znfML9Hr1kff4+uH+CwPHBtsI1Aj2BhoEdwBi/PP4D/ft9mP4BPrl+iL7kvvB+9H7K/3S/8YCoQVxB3kHwgWkArH/YP1A+776lfsM/Sn+Dv9o/1T+GP3R+6j7Afu3+sv6Yfrj+AX38/Xt9hP9TgZwDuYSrxUvGNAXgBRqEBoNBgtpCb4GFQLG/Gb4RvWQ8yXzgvUR+qL+jwLfA10DBQKJASMBof8j/2H/SQCNAPT/9f6M/X782/vP+kz6qPpp+0r8y/x1/UX+iv+mAOgALQFUAXEBmwE7AncD2gNcA5gCJgHS/nP8TPvq+uX6r/u8/C39//wB/RP9Rv1B/gEAdgEsAmMCAAH3/Xf63ffi9Zf1fPn7/xEG6Ql7C9gLogvNCwsMawxpDTMN7woGB8sBpfvY9vP16/fM+/kAHgbGCGYIWgZNA6L/Vv2z/b7+3f6P/tv9OftK98/z8PF18WLyIvXM+BL8n/42AKwArwB8AWIDZAXvBt4HwAeABeYBG/7F+pP4L/j6+dv8FQBHA5QFjgZnBmkF5wNqAoQB1gC1/3P+sf0l/Wv8U/xs/fH+xADRAggEpwM+AmMAV/2/+Tr39PVi9kf58P4bBHAHWApEDJoMwArnCNkGSQSqAv0BBwFm/9b+mf6I/VT8+Pux/Mz9AgCzApQEWwUSBeQDGQFs/en5Kve/9bP1rfbw93v5S/vW/OL9OP6x/pr/mwC3AWUC1wKdArsB+QAaAH//b/8HAPkAtQECAtYBcQEEAT4B4wELA0cEGwVkBRMEdAE//v/65vh/+Iz5ift//QUAjwK1BOEF0gX+BXUGrgdoCNoH3wXDAp3/N/zq+Mr21/Yw+av8+P8YAoACbwH2/0z+CP1x/F79Sf8aAVcCLgL+ACv/iP2w/Ef8u/zc/Qj/uP9N//39wvv++Sn5avn2+h/9NgBFA8UFCweRBhQFWgMOAoEANf+O/f38Yf2o/Qf/WwBmAk0ERQZ9BzIHkgZRBQgEfQKvAJb/YP6R/Q/9p/yv/E399v5PANYByAIbA3kC8QDB/+T93/xN/Fb8Gf2f/bP+MP4R/fr7AfyW/Sf/bgFJA+AEkAVqBIgCvP/n/Jz7Efss/NX99v9GAsYCQwKMAKf+Nf1D/Bb8OfsC++z7Ev2U/qn/KQGLAtQDuwRnBDADBwE8/3v9QPxl+xH7QPxk/nAAaAF0Aa4AAQDS//r/wABlAt0EGwf3B04HhAW5A5cCNAKMAjwDKQRsBLQDSwLp/+b91PxL/Ln8m/2a/6sBWgPEA/YCWAH8/2z/D/+l/oP+L/7X/Uj9A/27/B78gfz3/B/+H/+4/+n/5v/5AAUCawLWAVEBFQB5/yj+gP1r/Sr+UP/N/67/pf6c/fL81PyS/IX8ZP0y/rT/cQBkAQsCiALNAokCOAKRAc8AnQD6/9D/fP/B/43/BgDp/0kAKwCcAHkAoABkAFsA/P/H/wAAjP8FANf/GwARAMz/OgCi/00AsP8iAN3/9P8fAPP/7f8iAN3/RQDl/xcAFQAMABIA8v/x/ysA6P83ANP/OADZ/xcA9f/m/zAAw/8oAM//CAAIANP/GgC9/x8A2f/5/+7/9P/d/wQA8f/8//H/7/8FAOb/BwD//9L/KgDQ/xkA7P8TAPH/EAABAPT/CwD4/+T/GwDj/xUABQD9//f/FgDx//f/EADL/ysA2P8OAAIA4f8gAOP/+f/Y/wsAxv8PAN//BADq/wEA1P8hANX/JgDg/xIAAgD//xYA6/8HAAoA5f8BAPz//P8GAPP/+//8//X/8P8BANv/FgDF/xoA0/8MANn/BQDR/xMAz/8VAML/IgDR/x8A2P8aANH/JwDN/xIA3f8VAOn/CgD1/xIA8f8OAPz/CAAHABQA4P8qANb/FAAAAOb/IQDT/xcA8f8BAAYA8v///w0ACQD9/xgAzf9BANf/GgD2/xAAAgD3/wUA6/8lANv/JgDq/woAAAAGAAQABgAJAAIAEQAJABwA9/8rAOX/JwAAABEA8P8PAOn/FQDl/xYA5/8GAAIA+v8oAOT/FAD8//D/GwDb/yUA4v8EAPH/+f/x/wUA8v/t/wYA7P////f/EADY/zEA1f8MAA0A+//+/wsA5/8KAAAA6v8LAOP/9f////T/7P/w//f/4f8AAPP/8//5/+T/AQDS/xkA6f/2/wMA+P///xcA7v8oAPj/BAATAOP/OADG/zgA2v8KAAAA+P8BAAgA+P8RAAUA9f8iAOH/IAD3//b/GADk/wcACQDv//f/AADs/wMADgDi/x0A4f8OAPP/EwDZ/yIA7/8QAAwA+v8jAOD/QQDe/ywA+P/9//v/CQDh/w4A9v/w/wMA//8DABIA9f8MAAcA/v8PAOv/HADj/yMA5f8LAA4A4v8VAOr////6//z/BAADAAAA8f8XAO//DwD9/wUA9P8WAO7/EQD7/wEADgD0/xkA7v8gAPL/CgAEAPv/BAAAAOz/DQDp//r/9P8EAN7/DADt/xEA4/8jAMz/IQDY//n/+f/n//3/AgDc//7/8P/x/+7/EQDQ/ywAuP8pAOn/BQD5//n/+f8GAOf/GwDg/wwAAwDY/yAAzf8pAMr/KQC//x4A4//+//r/6P/5//b/2f8NAOX/GQDc/yIA7/8IAPP/GgDJ/zoAvP8yAOn/EwD2/w0A7f8nANz/JwDx/xsAFQAZAOj/NADY/zEA+/8FAA8A5v8WAO3/GQDj/xMA6P8QANX/KADc/wQA6P/o/wEAAQDy/wgA6f8OAOv/BgABAOn/FADv/x8A7f8aAOv/CQD0/wwA9/8DAOv/BwD1/+//GQDV/xkA3v/q/xwAuf8pAN3/6P8RAMf/JQDH/xgAxf8XAOD/AwDt/+7/6f/5/+7/BgDu/+7/GADc/xYA/v/u/xEA0/8YAOL//v/8//X/8f8IAPT/9//+/wYA3/8QAPz/4v8aAPD/9P8KANj/CQDy//f/+f/z/+T/EADG/x4A4v8IAPf/6/8LAPn/EQDy/xMA+/8IABgA9f8KAAMA6f8tAMn/KQDo/w8A8/8DAPb/HAD4//j/CQDb/xkA4P/t//z/5P8JAOj/BgD5/wEA+f8VAPL/DQD1/wUA/P8AAPv/DQD1//z/EgDm/w4AAQADAAcAEgD0/xQA1P8xAMH/JgDi//v/8P/8/+L//P/z/+j/CgDH/xgAtv8YAMP/7//v/+f/4/8LAMv/GwDm/wQA+v/u/w0A3f8AAP7/1v8iANL/CwDt//v/7v8BAOr/CAD3/wIAAwDv/w8A/f/1/w4A6P8NAPD/9//v/wUA6v/k/wYA1/8MAN//BwDi//3/DADi/xQA7P/7/wkA9P8KAOP/KwDM/ykA7/8WAP//HwDr/x4A9//1/x0A4/8kAPb/CQD8//3/AQAHAP7/+f8FAOP/BgD3/+v/BgDs/wEA6P8RAOH/GgDc/xAA7v/v/woA8P8CAAUA8P8EABAA5f80ANj/KgDm/woAAgD9/wQABQAGAAQA6P8UAMr/DwDe/+//7//s//D/6v/2/+T/8v/k/wwA0v8QAO///v/3/+v////e/xwAy/8bAMb/GwDq/woA5f8QAOv/9v8QANX/JwDd/wYA8v8CAAAAEQDd/yQA3v8HAPb/9v/1//r/+v/l/wAABQDk/xcA1f8MAPr/8f/7/wEA8P8bAO3/FwDi/yAA3/8qANr/JQDm/xUA/f8BAA8A6P8nAOL/IADr/x8AAAD+/wwA6f8kAPj//f8EAPv//f8FAPr/7f8MANz/EgDi//X//v/e/xEA4v/5//n/+P8CAO//9/8JAPL/BQABAPr/CwDf/xIA5v8YAO7/+P/7//3/+//9/wUA8v8QAOj/FgDw/xEA9/8EAAIA9v8SAOP/FADX/wUA9v/n//z/6f/0//P/+//h/xMA7P/6/wEA3P8jAMb/IADl/wEAFADZ/z0A5P8YAP//EwALAAQA9v8IAPT/CwD1/wsA+f/w/xMA5P8DAA0A6f8TAP//8P8PANb/FADq//j/CwDb/xEA8P/9//j/EAD2/xsA3P8mAOX/IADz/xAABAABAA4A//8RAPz/EgDu/yMA2v8kAN7/BAD//9L/NAC8/y8Axf8WAPP/7f8YANf/MgDO/xsA4v8mANn/HgDg/xQA7/8PAOv/BwDx/w4A1f8iANn/DQAHAN//HQDj/wUADADu/wgA/v/9/+//BQD9/+7/GADn/wQABwDq/xwA2P8YANb/AwD+/+//+//8/+7/DwDp//7/DwD8/wMAAADs/w8A/P/y/yAA2v8oAOT/EwDt/w8ACADv/wYA9P/2/xIA4f8TAPD/7P8ZANn/IgDq/wMA/f/t/xMA5v8PAPT/DwDn/xoA4/8HAOX/DwD3//P/AgD8/woA+f/6//z/AwAIAA0ABgARAO3/LQDU/x4A6v/6/wwA3f8MAOz/AwD1/wQA3f8eANP/FADi/wkA9f8AABoA2P8jANX/DgAAAPX/EQDa/xAA6v8fAOT/CwD3/wUAEgD1/xsA4/8pAOH/EgD///f/EQD///L/EgDf/yIAz/8eANX/FADs//H/AgDr//L/AADs/wsA5//8//j/5v8gANP/EwDd/x0A1/8iANn/JgDp/wwA+v8SAPD/DgD1/wkA+f8HAOT/FADq/wQA/v/w/wQA9v/5//D/EwDY/xMA5P8AAAcA5v8LAOP/HwDU/yoA1v8RAPD////9//X/+P/3/+3//v/+//n/AQDu/wQA4P8QAPb/7f8UAM3/FwDx//f/FwDp////AwDx//7/BADm/wYABgDm/xcA3/8bAO7/BQD+//b/EQDi/xwA0v8lANX/IQDe/xgA5f8WAOT/FwDl/wwAAgD9/wUA9f/1/wUA8f////3/7//2/wgA6v8GAPH/BwDy//P/BgDi/xUA3P8OAOb//v/0//X/+P/7/+//BgD1//j/AwDg/w8A6f/y/xcA1/8zAMf/IwDu/xMAAADz/yUA0/8gAPn/9P8hANX/KgDS/yAA7P8CAAEA4v8TAOj/CwD1//f/+v/+//v/CgDo/wcA7v/y////+//m/wEA/v/X/x8A1P8PAO7/9v8KANr/HwDN/xEA6/8BAPr/+f/7//n/9f/v/xIA4/8dANf/CgDy//X/CgDh/xAA5//+//L/9v/s/wkA3P8LANv/FgDe/wcAAwDZ/yYAyP8sAOL/EgDq/woA3v8YAOP/EwAAAAEACwDs/xoA5P8dAPn/BQAKAPb/IQD+/wQAAAAEAP7/BgD//wQADgDn/xQA2P8RAOT/BgDj/wkA0v8iANj/HgDl/wwA8v/9/w8A4f8nANL/IwD7//H/EADn////BwDx/w8A7f/s/xcA5f8dAN7/EQD3/wgA8/8HAPj/AwD///b//v/u/xoA2P8aAN7/CAD//9//FADQ/xAA6f8DAOv/FgDb/xUAAAD2/wQA9v8AAPj/DgDX/xcA2P8eANT/DwDr/wgADgDc/xoA5v8ZAOP/BAD+//L/CQDc/wYA9v/5//T/+f/6//P/9v/8//H/9P8MAND/GwDj/w8A5/8DAPn/+v8IAP7/9P/+//D/GAD7/+//EgDb/yUA2f8qANb/JADi/wgA6/8FABcA4/8XAOH/IADz/xYA/P8FAAMAAAAEAPr/EgDv/yEA2P8fAO7/CwD+/wQA8P8SAPL/AwAQANz/KADg/xYA8v8EAAUAAAABAO//FgDV/zYA1f8jAOH/CwD//+//CgDu//H/GgDe/x4A6//7/xIA9f/7/wUA+f/v/xEA5P8GAO3/AwDd/x4A2/8WAOD/GQDh/x0A3f8LAPf/8f8LAO3/BQD9/+j/EgDg/wcA4P8SANj/DQDp//D/BgDe/xUA0/8oAMz/HgDo//7/+/8FAOj/FQDn/wgA/f8BAPD/EQDf/x0A0/8QAPX/9P8DAPf/8P///wYA5/8oANj/KADc/yEA5/8CAAMA7P8cAO3/AQDv/w0A8P8KAPf/+f8QAPD/DQAJAPr/DwD5/xgA+v///xAAAgAIABkA4P8xAPr/BQAWAPn/EAD8/woA+//+/wMA/P/6/+f/CwDn/w8A8/8LAPP/AADv/wQA+v/6//v/9v8JAAMA6v8SANz/IADM/ysAw/8rAOX/EADf/w8A2f8WANz/CwDj/wgA4f8OAOX//f/0/+r/+v///9X/HwC6/yYAwv8YAOf/6v8IAMf/KADJ/wkA9P/j/xEA7v/7/xIA2P8JAOD/GADX/yIA0/8WAMv/IADg/xMA2/8XAAIA3f8dAOb/DwAWAOb/JQD3/xwA7f8aAAgA8v8iANL/LgDb/wIA8/8BAPT/CwDO/yIA1P85AND/JQDh/xUA+//9/wkA8/8XAP//CgAPAOr/NwDq/y0A6P8YAAQACQAbAPP/GQAFAAAAFQAEAAkAHQDb/zYAzv85AMz/KQDX/ycA4f8HAPj/AgDf/xIA0P8wALr/HADX//3/7f8IANn/HgDJ/x4A1v8iAOH/DQDs//7/6v8FAOH/AAD5/8z/HgC3/xQA2v/2/+r/9//n//P//v/Q/xgAyP8QANj//P/i/wQA2f/0//H/7P8BAPj/6P8HAOn//f/7/+//BQD4/wYA/P8DAPv/+v8XAPr/CQALAPX/CwANANf/OgDQ/yIA8//+/x8A//8FABsA7P8zAOP/FQALAAAAAQARAAEAEgAAAA4A+f8YAPr/EAANABUABAAMABQAEAAMABEACgASAAsABAAWAPz/GgD3/wEAHgDr/ywA8P8EAB4A6P8rANz/BwAEAPb/BwDn/wAA3v8MAOD/DgDW/xQA2//8/+3/y/8gAL3/HwDR/wkA2/8KANP/IQDH/xkA8//2/wYA7f8cAOD/IQDN/yEA3f8iAOT/BQDx/wUA7v8PAPf/9/8rAMv/MgDn/wcADAD3/wQA/v/5/+//HQDY/yoA2v8fAN3/HgDW/x8A2v8OAAEA8P8XAOD/FgDp/xIA5v8IAPD/9/8MAOv/AAD3//v/4v8pAMH/JQDs//f/FgDg/xgA5v8cAOT/GwDq/xgA+P8FAP3/CAARAPn/GwDr/ykA0f8zAOL/EgD+////CwD+/xAA8/8SAPP/DAALAO//EAAAAAgACAAIAAIABQAJAOz/HgDz/w4AAgD+/woAAgATAPr/HwD0/x0A/P8sAPL/FwACAA8ABgAZAOj/IwD5/wUA/P8KAPv/9f8TANj/JgDn/wAAAgD1/xYA4f8nAOL/DwACAOP/GQDu//3/AQD+//v/8/8EAPP/+P8LANb/EgDj/wEA8v8AAOv/DADg/xMA5P8XAPP//f8IAN3/LQDG/yoA5f/2/xEA0v80AMP/MADc/wEA/v/9//r/DQDi/xgA5/8GAAIA7P8LAPb/+v8OAPz/8v8AAAgA8P/5/wEA4P8MAPz/4P8tAMr/HQDe/yAA2v8gAPD/AAAZANj/IQDd/xkA7f/+/w8A4v8UAO3/AQD7//z/6/8WANz/DgD7/+3/EQADAN3/KgDM/xoAAAD2/w4A6/8fAOL/EwD4//f/EwDk/xgA+//y/wwABwDy/xcA9P8JABIA9f8JAPP/FwDn/xUA5v8TAO//EwDp/xcA8P/+//z/AwD4/wsA6v/3//v/9f/5//r/7P/2/w4Azv8YANr/IwDe/x4A4f8eAOn/GAABAPX/HADJ/zsAxf8yAM//GQDn/wkA9P/9/wQA9/8BAO//8P///+L/+//s/wsA3f8XAMn/GQDY/yMAy/8iAMT/IgDf/wsA5P/9/+D/CQDn//v/AwDw/wcA4f8YAN7/HQDu/xEA9v8CAAkA3/8MAOH/EgDq/+n/EQDV/wwA6P/p//3//f/9/xwA4f8gAOv/JAD3//n/GADl/yMA4/8YAOb/HgDs/wgA///9/+r/HQDb/xcA+f8HAAAA6//+/+z/9P8HAOn/8f/w//j/BgDq////AQDw/xEA7v8FAPL/AQD///L////s/xUA5f8SAOr/BAAIAOL/GQDf/xMA8v/3/xEA5f8WANX/MgDQ/xoA+v/t/xsA6v8SAPX/FADt/xwA6f8RAAgACAAJAPD/GwDY/zcAvP84ANf//v/7/+f/AwDs/+//BQDh/yAA0P8SAOX/BwD2//T/6P8GAO//7//8/+f/BwDo/w4A2P/+//L/AgD5//P//f/g/xIA4f8XAOT/CwDz/+n/GADJ/0EAuv8kANP/IQDh/x4A8P8IAP3/6/8EAP//+/8FAP7/7/8YAOj/DQAKAPL/JADy/wQAEAD+/wkABwDp/xUA7/8CAAkA4/8MAO7/AgDs/wwA8/8PAPL/AgD5//n//f/1//r//P/+/+7/EQDN/ycA0f8VANf/BgDi/+v/BwDO/xcA3P8GAN3/GwDr/xUA4/8aAPT/CgDx/woA9v8BAAIABwDv/xkA6f8OAPf////8/xUA6/8WAOz/EAAIAAMAEgACAA8A8P8bAPL/DgD8/wYA9/8KAPj/CQD//+3/DgD4//3/9v/y/wMAAgDs/wwA7f/7/+f/CQD6//X/+//4/+P/CwDL/xEA1v/x/xIAxP8eAND/CQDd/xQA1f8sAMn/JwDu/x8A6P8SAOn/FgDw/xAA9P8GAPf/9v8cAOr/DgATAPT/LwDh/yYA4v8xAOb/KADg/xMAAAAQAAYA/P8SAP3/GgDw/xIAAgD5/w8AAQAHAAUA+//u/xoA5v8GAOf/DwDf/x4A8f/s/w4A5f/8//7/7P/3//r/6P/6//X/1/8KAM//DQDj/+///v/t/+b/CwDX/xMA6f8AAPj/8f/4/+7/CgDo/wYA7/8NAPv//P8JAPv/BAAWAOv/FwDv/xcA8P8yAOr/CQAhAOv/GgDt/yEA7v8pAOL/HAD9/w4ADAABAAkABAAXAP7/CwABAAQAAQATAOv/IgDm/ycA3/8cAOn/DgD4/////f///+z/IgDL/yYA5f8HAAUAAgDz/wUA7f8DAPD/CgDm////+//Z/wMA5P8BAOr/+P/V/w4A0P8GAOn/9v/l//r/5P8CAO//8v/8/+r/8////+3/CgDg//v/7v/+/+7/AgD3//n/7f8CAAMA+/8JAPf/CgD0/wYA/P8BAAMA+v8CABUA9v/8/wcA+f/9/xYA0v8SAPn/AgDw/xsA2/8ZAAAA7/8OAPf/DQAAABoA2P82AM7/MADt/wMAIADl/xsA+f///wgA+v8EAPX/EQDq/xcA6/8PAPr/AgAFAPD/FAD6/wcA+P8EAA0A7/8TAN//GQDk/wwA9f8HAOf/IQDf/xUA5/8dAOH/HwDh/wcA9//4//f/6P8LANv/DADY//X/9P/y/9n/BADM/w4Az//+/9//8//t//v/4f8EAOT/CwDm//f/+//p/w4A5f8EAPT/AAADAPf//P/7/+z/+//+/+f/DgDh/wMA5f8GAPL/+v8HAOP/CwDY/wYA9P/X/yQAy/8lAN7/CADw/wsA/P8KAPb///8CAA4A8v8XAPb/9/8cAO3/IQDu/yUA5P8jAPL/BAAHAAMAEAD3/wwA6/8SAN//GQDl/wUAAQDx/wsA+v/z//n/8f8TAOL/KQDa//v/FQDY/xEA7P/x//3/6P8JAOn/DAD0/w8A4/8eANv/GwDt/wAADAAFAAYAEAD0//z/+/8FAPv//v/u/+7/9v8GAOb/9P/4//D/BQDx//v/9P/4//f/8f/v/wIA7v8NANn/EQDQ/xsA0v8OAOP/AADp/wAA8f/2//b/9f/+//n/8/8CAPf/8/8GAPX/CADo/w0A0f8eAM3/HgDa/wAA5P8MANL/EADh//v/8//z/+L/AwDp/////f/p//3//v/z///////q////BADl/yQAy/8lAOT/FwDx/w0AAwADAAMA//8RAPz//v8UAMX/NgDL/zEA5P/3/wMA7f8IAP7/4P8UAOL/EgDv/wQACgD//wUABwDk/yYA7P8JAAgAAQACAAcA9f8UAP3//f8FAAIABAD7/wcA8v8XAPD/EQD+/wUACwD1/w4A8f8BAP3/AgD8/+///f/r//v/5//9/9z/DADc/woA5//9//z//P////f/AwD0/woA+P8DAPD/EADk//7/+v/8//r/8//3/9z/GgDJ/xcAyf8LAOf/BgD0//X/+P/9/wwA6v8WAOf//f/x//j/9v/n//z/3f/4/+b/8//r/wAA0v8SAPH/9v8PANj/HADu/yMA3P8aAPn/AwD3/w0A7v8WAO//DwDT/zIA5v8IAAcA8f8fAO7/DwDz/xAA5f8vANr/LwDc/yQA6f8CABsAzP8/AMr/EgD1/+7/FQDZ/yMA5P8TAPf/8f8aAOb/HQDs/wgACwABAA0ADADy/xcA8v8fAOb/CwD1//3/4f8LAOD/+/8GAOf/CwDx/wAA6v8SAAAABgAIAPb/LAD2/wIACgDz/xQA6v8RAPP/HQDn////BQDm/xsA2f8WAOH/GwDd/yYA6v8JAA4A7/8FAAkA4P8tAMj/JQDj/wEA+f/p/+v//v/p/xAA1v8bAM3/IQDs/xkA9f8PAPr/DwAPAPX/FwDs/yQA0P8sANz/AwAEAPD/DADd/woA5f/8//n/5v8NANz/FwDb/xIA+v/z//n/BQDx/wkA4f8UAN7/CgAKAND/JADj/woAJgDW/y0A5/8eAPD/FgD8/x0A8f8WAPf/AAAWAPf//v8MAOz/DgDx/wQA3/8OAOf/CADu/wUA7//y/wMA7/8lAOn/DgDv/wMACwD//wEAAwDx/wkA//8DAAIABAD//wIABAD7/xsA6v8QAP//AQAXAPj/CQAIAPP/GwDi/yYA1/8bAO//6P8YAOL/BQAHAPb/BgDz/wkA//8TAP3/BQAbAPf/IAD3/wcAJADi/ysA5v8ZAPT/DwAIAPT/GwDh/yIA7P8YAPb/GAD8/xwAAQAIAA0AAAAVAP3///8JAPj/BgAEAOj/FQDo/wEA/v/6/wMA8v8GAAMACAD8/wYABwAEAA0AEwAEABQAGwDj/yIA6f8OABMA9f8OAOn/HgDm/xYA8//2/xIA7f8MAP7/7/8nANn/KwDz/x8A8v8aAPH/FgD7/xUA7f8TAAAAAQAQAPX/CQD5//r/EQAAABYA9v8QAAwACAD0/xQA5/8uAOb/GADl/w0A9P/7//v/5P8KAPb//P8GAPf//v8EAOz/+f8EANz/KgDK/xwA2v8dAO//CwAGAPX/EQD9//b/FADh/xcA6/8DAPr//f/9/xcA6v8jAOz/IQDs/xQA7P8TAO//DADn/wgA+v/s//j/6/8AAOH/AADs//z/+P/x/wgA7f/5//7/8//9//b/+P/z//T/AgDb/xEA6f/r/wwA3P8RANb/9f/7/+z/CwDy/9f/HgDG/xgA3//s/wAA3v8GAOH/7//p//b/4f/t//L/zv8PANL/+/////D/8v/+/+r/9f8HAOr/EwDv/wkA9/8WAOb/IwDy/wwA/P8IAP3/AwD7/+//HADW/w0A5/8PAOv/8P/+/+f/FwDU/wgA8f8AAPP/8/8IAOr/AgD8//f/AAAIANb/GQDF/xIA8//s/wUA8P/v//j/BgDv/w4A9P///wYA6P8QAPf/AwAFAN3/IwDO/y8Azf8WAO7/7P8OAOP/FwDe/wIA9v/u/xEA8//+//n//P/+/w4A/v/8/wcAFQDm/xgA6f8HAA8A7P8LAPb/DgD1/wEA+f8HAPf/EQDY/y4Ay/8rAOL/FgDn/wAADADX/xUA7f/2/wMA7v/6/+7/+P/u/+r/CgDd//n/9v/q//L/8//f/w4A0f8dANX/CQDy/wAA8f8AAPP/+/8CAOT/CQDm/wEA9//z/+3/AwDl/+//GADJ/yIA1f8BAPP/+v/x////5v8IAPf/7v8BAOv/BQD6/+n/HgDm/wMA+/8VAN3/NADT/y8A7P8sAOb/IAAAAAsAGgDt/xEABAD5/w0A2P8nAOX/CAAAAOz/EQD4//z/+P/0/+X/FQDS/x4Az/8LAO3/8f8PAOX/HADs/xMA8f8CAAEA7f8BAPv/AAAPAOX/AQD3//f/EgDr//7/9f/+//j/AADv/wYA5f8VAOT/CwDz/wkA+P/4/wAACQDz/xwA6P8YAPP/DwDm/xkA5P8cAO3/+/8AAAUA+f8MANT/GQDt/wwAAADx/wcA+f///+7/9v/y//X/+P/x//v/9//6/wQA6/8TAOL/FgDm/wIA+f8HAAUA9f8EAOH/+P8FAOr//P/b/woA5v///wMAyP80ANH/BQAKAOb/DAD5/+//EgDj/xEA8P/9/wcA8f8UAOT/DgDp/wQA7/8QANz/GQDn/wYA9P8NAPD/EQDx/w0AAgD4/xEA9//9/xEA5v8NAPr/BQAAAAsA5v8PAP//9/8IAPT/DQD9//X/AAAAAAAADgDs/xUA+/8AAAAA+/8AAPr/+f/5/wkA6f8CAOT/DADu/wgA9P/9//r////7/wsABAD7/wEAAAALAPr/GADp/yUA1f8rANP/IgDp/wwA5v8RAOL/GgDt/w0A6P8NAOT/HADb/w8A5/////z/+f/u/wMA4v8bANL/FwDb/xEAAgDd/yoAyf8vANn/HwDx/wcAAgD3//r/DQDr/xwA4P8gAN7/DQDk/wUA9/8EAOT/EQDp/xEA4v8DAAIA5/8WAOD/CgD2/wAA9P8NAPL/AgD8/wsA5/8MAOr/BQDx//n/9f/1/+//BwDv/wgA/f/3/wIA6//7//n/CQDo/w8A6f8cAOb/EQD1/wMAAQD9//n/CADm/xcA6f8EAAQA+v8FAA4A5f8WAPT/CQD7/wIACAD4/xQA8P8BAPv/+v/6/xsA5v8jAN//GAD7//L/CADt/w4A+/8AAAcA3/8ZAN//GADt/wsA6/8OAAgA9v8XANv/EwDx//X/AQD7/+n/BgD5//X/GADe/ywA2f8sANn/JADg/w0A9f8FAO7/7P8HAO3/FADe/xUA8//7/wYA9f8CAPL/DADr/w8A+f/7//v/BwDo/xoA0f8YAPD/CgDx/xMA7v8MAOv/CgD6//7/8//+/wMA3v8VANb/FADs//7/8P8LAPn/4f8NAOn/BQAGAPv/8f8NAOT/DAD5/+z/BwD3/wIA/f/w/wcA/v/u/wcA7f8FAP7/+f/0/xcA9/8QAOz///8DAOb/MgDJ/ycA3/8QAAIA8v8WAO3/EQDt/x8A4P8ZAOz/BAAGAP7/9P/8/wcA9//3////7f8CAAAA7f8CAPX/AgD3///////7/w0AAAAHAAwA9f8KAO7/CgDx/xEA5v8HAOz/8P8HAOf/EADS/xMA4v/6//b/AAD3//3/7//5//r/BwD4//L/CgDg/xAA/f/1////3/8fANf/DQD2/wAABAD2/wAABAAWAAcA+/8UAPL/LwDh/wcAEAD5/xUA+v/8/w4A7f8WAO///P8TAMP/PQC9/yUA5v8AAOz/BgD9/9r/GwDY/xQA4/8CAOf/8v/7/+D/AgDo//H/AADg/wkA4/8JAOf/EgDi/wsA/P/u/xgA5/8QAPL///8JAPj/BQD///X/HQDq/woAAgDm/x4A1f8vANf/DwANAN7/HwDw/wgADgDy/wsAEwDv/xQA9v8DABIA8v8bAOv/CQAKAPX/AgADAPj/AwACAPD/CwDs/woA8P/2/wAA2/8IAOP/2/8DANf/+f/i/+7/9v/o//v/5//1/+H/8v/n//P/9//d/xEA2/8JAO7/9f8TAPz/+f8SAOH/EQD7//L/DwD1/xEAAAD6/wcA/f8KABwA6P8VAPT/EgACAAcA6P8XAP7/AwADAAMADAD+/wEA9/8fAOP/MADZ/y8A3/8MAOv/DQDd/xMA6f/0/w4A4f///+//6//q/wAA4/8AAOb/8P/l//f/2P///9r/4v/6/9X/CgDj//3/8f/7/+r/EgDU/ykA1/8dAPz/CAAEAP//8/8SAPf/AAAJAOr/CQDu/xoA8/8YAPf/DQAHAAcA//8HAAoA/P8DAPv/9P8RAPL/BAD1//7/BQDw//v/+P/0//n/BgDe/xkA2P/3//v/7f8CAOr/AADj/wYA5P8PANz/BQDV/w8A6v/6//b/3f8FAPX/4P8SAND/BwDt/+r/9v/e/wQA3P8PAN3/8/8BANz/GADZ/x0A2/8MAPT/AwAEAAIA9f8eAOn/KgDq/yUA5/8dABYA/P8cAAUAAwAdAPX/LQAFAAYAIADl/zUA6f8mAAEAAQANAAUAAgAhAOv/EgD9/+v/DgDr////9v/3//X/+f/w//7/8//z//P/+P/x//T/6//7/+3/7P/y/+X/CwDf/wMA4P8DAO7/6/8PANn/DQDn//T/9//t/+P/AADt//n/7//y//f/8f8NANb/EQDl//T////h/wAA7f8CANv/FwDb/xUAAQDy/xQA+P8AAAgA+/8YAOr/EAAGAA4AEQDv/yMA9v8SAP//DgABAPr/FwDw/yUA4f8kANv/IAD7//L/LADJ/y4A9f/4/xsA7v8BAA8A8v8SAOz/IQDW/xUA5f8XAOv/EAD5//T/HwDn/xkAAQAQABUA8/8KAP3/+/8OAOn/FQD0////BwDm/woA7v8BAAoA+P/n/woA8f8LAOn/EwDn/wgA+//t/xMA6P8ZAND/NQDX/x4A/P8AAAIABAD7/wsA7/8KAP//6//0//n/AAD8/wgA8v8VANz/HADW/xsA4////wwA4/8zAM7/GADm/xEA7f8AAPL/AgAUAO//AQD5/wIA///+/wAAAAD2/xIABgALAPb/EQDv/wAAFQDW/zEA0P8UAO//+P8GAAAA7v8EAAUA8P8AAPj/9P8YAPb/DwD1/ycA2P8tANz/KgDn/x4A3v8XAPD/GgDt/xgA2f8iAOX/EADx/wQA/P8JAP//BAD7/w0A6f8aAOj/BgD2//3//P/c/xkA3f8TAP3/yf89AM3/GQAQANb/MADb/xYA8/8BAAkA5P8fANP/KwDQ/yAA4/8hANr/CwD2/woABgD2/wYA6P8oAOb/IADo/x0A7/8SAPT/FwDp/xoA2/8jANj/EwD7//n/AADy/+3/KQDJ/yMA1/8EAP7/+v8MAOD/JQDW/zMA5/8mAO//DQABAAQAAAD8//z/BADo/w0A6f8HAPv/AgDl/yAA4P8PAPz/8/8JAPz/9/8HAPH/AwD8//n/8v8AAPL/BQD3/wwA6P8EAPn/9v8NAOT/FgDv/xEA+/8FAAMAAwAMAPX/DADp/wUA/v/k/xYA0f8IAOL/BQD6//3/+f8CAAUA+f8OAPL/DwD8//j/DwD5/wkA8f8JAO7/CADo/xAA0f8kANH/HADq//X/FQDW/yEA7P8DAA8A4P8OAAEA+P8IAAIA7/8gAOr/DgD2/xEA8//z/wYA6/8BAPr//v/o/wgA7f8QAAgA+/8TAPP/EwD6//D/IQDU/zQA4v8QAPX/CwDm/xsA4v8jAPL/8f8JAAQA/P8SANz/IQDh/w4A7f8BAPn//f/w//L/CQDz//3//f/w/wMA8f/3//b/7/8TAM3/BgDn/wMA/P/3/wEABwDw/wkA6//8//3/+//6//3/DADj/xoA3f8PAOT/EgDa/x8A4v8aANz/LADU/xoA8P8LAAoA8P8XAPP/HQDj/xkA9f/1/xAA8f8YAO//DQDo/yMA4P8qANr/GAD+/+7/BQD3//D/DQDz/wcA+v8MAN3/EgDf/xoA4//+/+H/CgDe/yEAzf8kAMb/KwDR/xkA6f8MAP3/AQAAAP//DAAEAPv/DAD8//v/HADf/xQA8v8CABUA6P8oALz/RgDD/zUA5/8GAO//EADj/wwA3/8JAO//8v/+//T/9f/9//n/CgDp//7/+v8EAAYACQD0/w4A/f8WAAsA8v8jANr/JQDj/xgA7f8MAOL/DgDp//3/AQD1//L/9//s/xMA0/8IAOb/6v/+//D/BwDT/yAA0/8UAAEA+v8LAPr/8v8KAPX/9f8RAOP/DwDm/w8A9f8HAAkA3v87AMv/LwDb/w0AAwABAPT/DQDx/wMACADl/xEA4/8bAOr/FgDo/wkA6/8TAOn/CwD5//z/CADw/w0A8f8PAOj/GQDl/xEA3P8IAPL/7P8TANL/FQDR/yUA2v8iAOL/HgD5/wEAFgDk/yAA6P8KAAUA+v8IAOv/DQDk/wsA5P8PAND/IADG/ycA0P8AAPz/4v8SAPL/7v8VANP/GwDt/wIAAQD7//7/DwDx/w4A8v8WAOv/+/8FAPH//P8HAPX/9/8KAOv/FAAJAP//DgD8/wMABQDn/yYAxv84ANX/EQDw/wgA4v8cANP/LQDh//b////9//v/DQDm/xEA9v/9//v/BwD6/wgA7//9/w0A9f8DAPn/8/8MAOX/BwDr//j/CQDZ//L/9f/1////9//4/wgA6P8PANb/BwDu/wAA8//3/xEA1f8mAM3/FQDh/xMA1/8oANz/HwDg/yYA6P8HAAQA+v8bAOn/IgDq/ywA3f8bAPv/6/8YAOb/HwDi/xcA3f8kANn/KQDS/x4A9f/x/wAA7P/9/wAA+f/+//z/DADa/xIA2v8aAOD////Y/w8A2P8jAM3/JADE/zcAzf8dAOr/EgD6/wkA+v8IAAYABgD3/w8A9//4/xsA1P8bAOT/BgAEAPL/FQDG/zkAxP80AOf/+f/2/w0A5P8UANP/GgDg/wMA9v/y/wEA9v/+/wUA7P8DAPv/BwD+/xYA6/8XAPf/EwAWAOr/KQDW/ycA4f8hANv/FwDV/xcA2v8BAPH//v/k//T/6/8IANz/+//o/+j/BADp/wsA0v8gAN3/DQAKAPH/EwD1//X/CADv//7/BwDu/wUA6/8KAPv/BwAGAOb/MgDY/yQA5f8CABMA7f8IAPf/BQDy/xQA2/8VAOX/DgD8/wIA9f/+//H/DgDt/wYA/v/8/wcA9f8LAPb/EQDr/xQA8P8JAOP/BgDw//D/DwDX/w0A1/8gANr/IgDe/yEA8f8CABIA4f8jAN7/DgD+/wEA///t/woA4/8LAOb/BwDY/xQAzv8hAM//AwD1/+P/DAD1/+b/HQDJ/yUA5P8GAP7/+P8BAAkA9f8EAP3/DADw//b/BQDx//v/BQD0//r/BgD0/wgAFAD4/xMA+/8CAAgA5/8jAM//LwDf/wcA+v/9/+j/GQDP/zMA2f/9//j/AgD5/wsA6/8KAPv//P/+/wMA+/8HAO7/BAAHAPf/BAD6//X/DADj/wsA5f8CAP//4f/v//n/9f8CAPX/+P8LAOn/DwDc/wMA8/////T/+v8NANz/IQDT/xMA5P8PAOT/GQDu/xIA6/8jAOr/CgACAP3/GgDt/x0A7/8qAN3/GQD4//D/DgDt/xUA5f8RANv/IQDU/yoAyv8hAO//+P/1//f/8f8MAPL///8EAAIA6P8GAOX/EwDn//v/2f8TANT/KADH/yQAxv8xANP/EwDv/wsA+/8GAPb/DAD+/wwA7f8XAPH///8UANr/FwDq/wQABgD0/xUAy/81AMz/LADy//L//v8IAOr/EQDX/xoA3P8MAO3/+v/7//n//P8GAOn/BAD5/wIAAwAOAO7/EwD1/xQAEgDt/yUA1/8lAOH/IADX/xoA0f8XANf////y//v/4//w/+n/CADc//j/6v/m/wcA5f8PAM3/JQDZ/xAACADy/xIA9v/z/wsA6f8DAAAA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AAAWAOv/DAD1/wcA8/8TAOD/EgDp/w4A+/8HAPL/BADu/xMA6v8LAPz//v8KAPT/DwD0/xQA6v8WAO//CgDj/wYA8f/u/xAA0/8PANT/IgDZ/yMA3/8gAPT/AAAUAOL/IwDg/w0AAAAAAAAA6/8LAOL/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9P/o/xwAyf8mAOP/CAD9//r/AAAMAPP/BwD7/w0A7//2/wYA8P/8/wUA9P/7/wYA9P8IABUA+P8UAPz/AwAJAOb/JADN/zAA3v8GAPr//P/o/xkAzv8zANj//f/3/wMA+P8MAOr/CwD7//z//v8EAPz/BwDv/wMACQD2/wYA+v/1/wwA5P8LAOb/AgAAAOL/7//6//b/AgD3//j/DADp/xAA3f8DAPP/AAD1//v/DQDc/yEA0/8UAOT/EQDk/xsA7v8TAOz/JADs/woABAD9/xwA7v8eAPD/KgDe/xkA+P/w/w8A7v8WAOX/EgDc/yIA1f8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9v/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8WAMv/NQDM/y0A8v/y//7/CADq/xEA1/8aANz/DADt//r/+//6//z/BwDq/wQA+f8DAAMADwDu/xQA9f8VABIA7f8lANf/JQDh/yEA1/8bANL/FwDX/wAA8v/8/+T/8f/p/wgA3P/4/+r/5v8IAOX/DwDN/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDu/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPP/EwDg/xIA6f8OAPv/BwDy/wQA7v8TAOv/CwD9//7/CgD0/w8A9P8UAOn/FgDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8iAN//IADz/wEAFADi/yMA4P8NAP//AAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/v/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//8//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9//4/wwA6f8QAN3/AwDz/wAA9f/7/w0A3P8hANP/FADk/xEA5P8bAO7/EwDs/yQA6/8KAAQA/f8cAO7/HgDv/yoA3v8ZAPn/8P8PAO3/FgDl/xIA3P8iANX/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8nAMj/JADG/zIA0v8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xQA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA4P8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wkA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA3/8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IwDf/yAA8/8BABQA4v8jAOD/DQAAAAAAAADr/wsA4f8MAOX/BwDX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yYA4/8HAP3/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BgD0//v/BgD0/wkAFQD4/xMA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8LAPv//f/+/wQA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPb/+P8MAOn/EADd/wMA8/8AAPT/+/8NANz/IQDT/xQA4/8RAOT/GwDu/xIA7P8kAOz/CgAEAP3/GwDu/x4A7/8qAN7/GQD4//D/DwDt/xYA5f8SANz/IgDW/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADo//v/2v8SANX/KADI/yQAxv8yANP/FADv/wwA/P8HAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABgD1/xUAy/81AMz/LQDy//L//v8JAOn/EQDX/xoA3P8MAO3/+v/7//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xQAEgDt/yYA1/8lAOH/IADY/xoA0v8XANf/AADy//z/4//x/+r/CADc//j/6v/m/wcA5f8OAM7/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAEA8/8BAO//BgAAAAIACwDi/zUA2P8lAOf/AQAWAOv/DAD1/wcA8v8UAN//EgDp/w4A/P8GAPP/AwDu/xMA6/8LAPz//v8JAPT/DwD0/xQA6f8XAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yMA3/8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA6/8LAOH/DADl/wcA1/8UAM3/IQDP/wIA9v/j/wwA9f/n/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA9P/7/wYA9P8JABUA+P8TAPv/AwAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/CwD7//3//v8EAPz/BwDv/wMACQD3/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD0//v/DQDc/yEA0/8UAOP/EQDk/xsA7v8SAOz/JADs/woABAD9/xsA7v8eAO//KgDe/xkA+P/w/w8A7f8WAOX/EgDc/yIA1v8rAMv/IQDw//j/9v/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDT/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NQDM/y0A8v/y//7/CQDp/xEA1/8aANz/DADt//r/+//5//z/BgDq/wQA+f8DAAMADwDu/xQA9f8UABIA7f8mANf/JQDh/yAA2P8aANL/FwDX/wAA8v/8/+P/8f/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwABAPP/AQDv/wYAAAACAAsA4v81ANj/JQDn/wEAFgDr/wwA9f8HAPL/FADf/xIA6f8OAPz/BgDz/wMA7v8TAOv/CwD8//7/CQD0/w8A9P8UAOn/FwDv/woA4/8GAPH/7v8QANP/DwDV/yEA2f8jAN//IADz/wEAFADi/yMA4P8NAAAAAAAAAOv/CwDh/wwA5f8HANf/FADN/yEAz/8CAPb/4/8MAPX/5/8cAMn/JgDj/wcA/f/5/wAADADz/wYA/P8NAO//9v8GAPH/+/8GAPT/+/8GAPT/CQAVAPj/EwD7/wMACQDm/yQAzf8wAN7/BwD6//z/6P8ZAM7/MwDZ//3/+P8DAPj/DADq/wsA+//9//7/BAD8/wcA7/8DAAkA9/8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9v/4/wwA6f8QAN3/AwDz/wAA9P/7/w0A3P8hANP/FADj/xEA5P8bAO7/EgDs/yQA7P8KAAQA/f8bAO7/HgDv/yoA3v8ZAPj/8P8PAO3/FgDl/xIA3P8iANb/KwDL/yEA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FQDL/zUAzP8tAPL/8v/+/wgA6f8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FAASAO3/JgDX/yUA4f8gANj/GgDS/xcA1/8AAPL//P/j//H/6v8IANz/+P/q/+b/BwDl/w4Azv8lANn/EAAIAPL/EgD2//P/CwDq/wMAAQDz/wEA7/8GAAAAAgALAOL/NQDY/yUA5/8BABYA6/8MAPX/BwDy/xQA4P8SAOn/DgD8/wYA8/8DAO7/EwDr/wsA/P/+/wkA9P8PAPT/FADp/xcA7/8KAOP/BgDx/+7/EADT/w8A1f8hANn/IgDf/yAA8/8BABQA4v8jAOD/DQD//wAAAADr/wsA4f8MAOX/CADX/xQAzf8hAM//AgD2/+P/DAD1/+f/HADJ/yUA4/8HAP7/+f8AAAwA8/8GAPz/DQDv//b/BgDx//v/BQD0//v/BgD0/wkAFQD4/xQA+/8DAAkA5v8kAM3/MADe/wcA+v/8/+j/GQDO/zMA2f/9//j/AwD4/wwA6v8MAPv//P/+/wMA/P8HAO//AwAJAPf/BQD6//X/DADk/wsA5v8CAAAA4v/v//r/9f8DAPf/+P8MAOn/EADd/wMA8/8AAPX/+/8NANz/IQDT/xQA5P8RAOT/GwDu/xMA7P8kAOv/CgAEAP3/HADu/x4A7/8qAN7/GQD5//D/DwDt/xYA5f8SANz/IgDV/ysAy/8hAPD/+P/2//f/8v8MAPP/AAAEAAMA6P8HAOX/FADn//v/2v8SANX/JwDI/yQAxv8yANL/FADv/wwA/P8GAPf/DAD//wwA7v8XAPH///8VANr/GADq/wUABQD1/xUAy/81AMz/LQDy//L//v8JAOr/EQDX/xoA3P8MAO3/+v/8//n//P8GAOr/BAD5/wMAAwAPAO7/FAD1/xUAEgDt/yUA1/8lAOH/IADY/xsA0v8XANf/AADy//z/4//x/+n/CQDc//j/6v/l/wgA5f8PAM3/JQDZ/xAACADy/xIA9v/z/wsA6v8DAAAA8/8CAO7/BgAAAAIACwDi/zUA2P8lAOb/AQAWAOv/DAD1/wcA8v8UAOD/EgDp/w0A/P8GAPP/AwDu/xMA6v8LAPz//v8JAPX/DwD0/xQA6f8WAO//CgDj/wYA8f/u/xAA0/8PANX/IQDZ/yIA4P8gAPP/AQAUAOL/IwDg/w0AAAAAAAAA7P8LAOL/DADl/wgA1/8UAM3/IQDP/wIA9v/j/w0A9P/o/xwAyf8mAOP/BwD9//n/AAAMAPP/BgD8/w0A7//2/wYA8f/7/wYA8//7/wYA9P8JABUA+P8TAPz/AgAJAOb/JADN/zAA3v8HAPr//P/o/xkAzv8zANn//f/4/wMA+P8MAOr/DAD7//3//v8EAPz/BwDv/wIACQD2/wUA+v/1/wwA5P8LAOb/AgAAAOL/7//6//X/AwD2//j/DADp/xAA3f8DAPP/AAD1//v/DQDc/yEA0/8UAOT/EQDk/xsA7v8TAOz/JADr/woABAD9/xwA7v8eAO//KgDe/xkA+f/w/w8A7f8WAOX/EgDc/yIA1f8rAMv/IQDw//j/9f/3//L/DADz/wAABAADAOj/BwDl/xQA6P/7/9r/EgDV/ygAyP8kAMb/MgDS/xQA7/8MAPz/BwD3/wwA//8MAO7/FwDx////FQDa/xgA6v8FAAYA9f8VAMv/NgDM/y0A8v/y//7/CADq/xEA1/8aANz/DADt//r/+//6//z/BwDp/wUA+f8DAAMADwDv/xQA9f8UABIA7f8lANf/JQDh/yAA1/8bANL/FwDX/wAA8v/8/+T/8P/q/wgA3P/4/+r/5v8HAOX/DgDO/yUA2f8QAAgA8v8SAPb/8/8LAOr/AwAAAPP/AQDu/wYAAAACAAsA4v81ANj/JQDm/wEAFgDr/wwA9f8HAPP/EwDg/xIA6f8OAPz/BgDy/wMA7v8TAOr/CwD9//7/CgD0/w8A9P8UAOr/FgDv/woA4/8FAPH/7v8QANP/DwDV/yEA2f8jAN//IQDz/wEAFADi/yMA4P8NAP//AAAAAOv/CwDi/wwA5f8HANf/FADO/yAAz/8CAPb/4/8MAPX/5/8cAMn/JQDj/wcA/v/5/wAADADz/wcA+/8OAO//9v8GAPH/+/8GAPP/+/8FAPT/CAAVAPj/EwD8/wIACQDm/yMAzf8vAN7/BgD6//z/6P8ZAM7/MwDY//3/9/8DAPj/DADr/wsA+//9//7/BAD8/wcA7/8CAAkA9v8FAPr/9f8MAOT/CwDm/wIAAADi/+//+v/1/wMA9//4/wwA6f8QAN3/AwD0/wAA9f/7/w0A3P8hANP/FADk/xEA4/8bAO3/EwDs/yQA6/8KAAQA/P8cAO7/HgDv/yoA3v8ZAPn/8P8PAO7/FQDl/xIA3P8iANX/KwDL/yIA8P/4//b/9//y/wwA8/8AAAQAAwDo/wcA5f8UAOj/+//a/xIA1f8oAMj/JADG/zIA0/8UAO//DAD8/wcA9/8MAP//DADu/xcA8f///xUA2v8YAOr/BQAGAPX/FgDL/zYAzP8tAPL/8v/+/wgA6v8RANf/GgDc/wwA7f/6//v/+f/8/wYA6v8EAPn/AwADAA8A7v8UAPX/FQASAO3/JQDX/yUA4f8hANf/GwDS/xcA1/8AAPL//P/k//H/6v8IANz/+P/q/+b/BwDl/w4Azv8kANn/EAAJAPL/EgD2//P/CwDq/wMAAADz/wEA7v8GAAAAAwALAOL/NQDY/yUA5v8BABYA6/8MAPX/BwDy/xQA3/8TAOn/DgD8/wYA8/8DAO7/EwDq/wsA/P/+/woA9f8PAPT/FQDp/xcA7v8KAOP/BgDx/+7/EADT/w8A1f8hANn/IgDg/yAA9P8AABQA4v8jAOD/DQD//wAAAADr/wsA4f8MAOX/CADW/xUAzP8iAM//AgD2/+P/DAD1/+f/HQDI/ycA4v8JAP3/+v8AAAwA9P8GAP3/DQDw//b/BwDx//z/BQD0//r/BwD0/wkAFQD4/xQA/P8DAAkA5v8kAM3/LwDe/wcA+f/9/+b/GwDM/zUA1//+//f/AgD5/wwA6v8LAPv//f/9/wQA+/8HAO7/AgAIAPf/BQD6//T/DADj/wsA5v8CAAEA4f/v//r/9f8DAPb/+f8LAOr/EADd/wMA8/8BAPP//f8LAN//HwDW/xIA5v8PAOb/GgDv/xIA7f8jAOz/CwACAP//GgDw/x4A8P8qAN//GQD6/+//DwDu/xYA5v8RANz/IQDW/yoAy/8hAPD/+P/1//f/8v8LAPL/AAAEAAIA6P8FAOb/EgDp//r/2v8SANX/JwDI/yQAxv8xANT/EwDw/wsA/P8IAPb/DQD//wwA7v8WAPL//v8XANf/GwDo/wgAAwD4/xMAzv80AM3/LgDy//P//v8JAOr/EQDX/xoA3P8NAOz//f/4//3/+f8IAOj/BgD4/wQAAgAPAO7/FAD2/xMAFADq/ygA1P8nAN7/IwDU/xwAz/8XANf//f/1//b/5v/s/+z/BADe//T/7P/j/wgA4/8OAMv/JADZ/w8ACADx/xIA9P/1/woA7P8BAAEA8/8BAPD/BQABAAMADADi/zYA2f8jAOv//f8aAOj/DgD1/wcA8/8TAOH/EgDp/xAA+v8HAPP/AgDy/w4A8f8HAAAA/f8KAPT/DwD0/xIA7P8UAPL/BwDm/wMA8//r/xIA0f8SANL/IwDX/yQA3f8iAPD/BAAPAOb/HADm/wcAAwD9/wIA6f8NAOH/CwDm/wcA1/8UAMz/IQDO/wMA9f/i/w0A8f/r/xcAzv8hAOT/CAD6//v//f8MAPL/BgD6/w0A7//1/wQA8//5/wcA8f/6/wQA8/8JABIA9/8TAPj/BgAEAOf/IgDO/y4A3v8FAPz/9v/v/xAA1f8tANz/+f/7//3//P8IAO7/BgD///f/AQAAAPv/BwDt/wIACgD2/wUA+//2/woA6P8IAOv/AAADAOD/9P/2//r/AwD3//v/DADq/xAA3v8EAPX//v/4//X/FwDT/ygAzf8bAN3/FQDh/xkA7/8UAOb/JwDo/wgABgD2/yAA5/8gAOn/KwDX/x0A7v/0/wkA7/8SAOT/EQDc/yAA0/8qAMz/HwDz//H/+v/0/+//EQDs/wcA+f8NANj/GADU/x4A4f/9/9z/EQDU/ywAyP8mAMj/NQDR/xsA7P8TAPj/CgD3/w8A//8OAO//GADx/wQAEQDg/xQA7P8IAAEA+f8VAMf/PgDE/zEA8P/x//7/DQDf/x8Ayv8iANj/DgDt//X////0///////x//r/AQD5/wkACQD0/w8A9f8YAAkA9P8fANj/JADj/xkA3v8TANT/HADN/wsA5v8IAN3/9v/k/xAA1v8CAOX/7P8DAOn/EQDK/y4Az/8WAAgA7f8XAPD/8/8NAOP/CwD8//D/CgDm/xIA+v8DAAoA5P83AM7/NQDP/xoAAwD6////AQD+//3/DQDe/xgA5f8NAPr/CwDk/xYA0/8tAM//GgDx/wMAAQD//wIA+/8RAOr/GADo/xcA1P8XAOf/+f8LANn/EADZ/yEA3v8eAOv/GQD8//n/IgDa/zAA1P8XAP7/AwD8//b/AADr/woA6P8DAN//CgDb/xQA3v/7//3/4f8PAPT/7f8dAMf/JADw//b/FQDk/xYA+f8HAPj/CAAGAO//9/8JAOj/+/8DAO//+//+//f/+v8cAO7/EwD4//n/DQDk/yEAxf81ANP/EgDq/w4A2f8lAMf/NgDY/wUA7/8NAPD/DQDr/w8A9/8CAPT/EAD1/wwA8v8DAAkA+P8JAPj/+P8NAOn/CwDz//T/FgDb//T/BQDx/wcA///3/xIA6/8XANf/FgDq/xIA7f8NAAEA+P8SAOz/BwD5/wcA8f8bAPb/BAD9/xoA6f8RAPj//f8gAOD/HwDw/yAA7v8HAP//+v/0/xMA8P8LAPj/6v8YAOD/IwDM/x0A9f/1//T/8v/z/wYA9//z/wcAAADm/wYA6f8SAOz/BADa/xoA3P8eANz/HADb/yIA5f8SAOv/GADy/xgA7f8VAPr//v8IAPL/EwDe/yEA1v8WAOX/DADz/wkA9f/t/xEA6f8eAPn/+v8BAP7/BgDy//z/+f8CAPj/AAD9//X/DQDw/w8A7v/9/wMA+v8KAPX////7/wEA/P8LAPb/BwDy//H/EwD2/wAAAQDc/yQAzv8VAPf/+v/8/+X/BQD4/wEA6P/9/+f/BAD+//v/4f8NAOn/FwD0/wQA+//3/wYA5v8PAMz/LgDS/woA8v/u/xcA9/8IAO7/FgD5/wgA/v/5//3//f/8//z/9v/0/wcA5/8KAOj/EAD5//j/GgDc/yEA4/8iAOr/GAD4/wYAHQDh/ywA2v8pAOP/HADc/xoA3v8LAN3/FwDV/wwA2f8UAOX/HgDW/ykA2f8fAOj///8CAPT/+f/9/wIA+P/x/wYA4f8GAOf/AgDl/xMA0v8bAN7/DQDt//3/9f8PAND/GADY/wQA8f/4/+n/DADm/wwA9f/3/wYA+v8QAOD/GgDu/w4ABAAJAPL/BgD6/wEAGgDt/xkA5v8YAO3/4v8hAMj/JQDm//3//f/3/+b/FQDS/x0A6f/8/wIA9v8VAOX/EwDg/xYA4P8QAOb/AgDu//b/7/8NAN//BgD8/+7/BwDr//T/+f/7//D/7//k//r/+//1//b/9P/7//X/9//v//v/8f/3/w8A1v8mANv/CAAJAOv/EQDy/wMAAgAMAAkA8P80ANj/HQAGAP//EQAIAPT/IwDe/x8A6f8UAOv//P8DAAAA8//z//X/9v8GAAAA6/8CAPr/4f8TANn//v/7/93/EgDZ/xgA1P8NAN3/EADf/woA3f8JAO7/EgDo/wsA3f8QANz/DADs//3/9P/5//f/+f/z/wkA4f8gANL/KQDg/wkABgDj/xwA3P8gANL/IADj//z/AADy/wcA6v8DAPD/CQD1//T/CgDt/xQA6/8QAPb/CADz/wMA4f8aANH/JQDh/wEA6v/9//P/+f/7/+H/+P/u/+P//v/f/+//+f/J/woAyv8QAN3/6//p/+j/+v/w/+3//P/S/yEAv/83ALb/KQDp//H/EQDg/ygA2v8SAPD/9/8KAPL//////+n/GgDw/wwAAwDg/zUAzP8qANb/DAAKAOH/EwD1//7/9v/6//7/9f8JAPP/BQAGAPb/DAD1/woAAgD9/xQA8v8NAAIA+f8OAOL/FgDt//3/+//t//v/+//y/+///v/p//H/+//i/xIA4P8CAPf/8P/3//r/0/8PAOf/+v/g//r/6P/4/+r/7P/+/+T/CwDV/wMA8P/p/wUA2f////L/AQDl/wMA4v8BAPz/5P8SAOH/AwDw//z/5/8UAN//CgDi/woA7f/t//f/EQDc/y0A1v8aAP//+/8OAOj/DwDV/xsA1v8MAOP/BADn/wQA/f/l/y0AyP9AAL//JQDl/w8A9f8MAPb/HgD2//v/FAD0/x0AAgD0/wYA8v8MAAQA6v8CAOH/HwDi/wYA8/8AAAQA4P8XAOD/CwD4//7/+f/4//r/3/8NANn/DgDa//v/+//k//T////H/xkA2v8EAAsA6/8FAPj/AwD6/wAA8f/5//v////+/+X/HwDn/wQAEQDi//7/DgDl/xkA9v8IAPz/+//+/+n/BgADAOz/7P8GAOL/EQDn//7/+v8CAPL/AAAEAOf/FQDk/wwA/f/q/xwA2f8dAOj/FADr/wAA+f/z/wQA/f/q/xUA5//1/woA5v8LAPP/AAAEAO7/HwDg/yMA6f8YAO//CAAOAO//JwDq/wYAEQDy/xEA+//5/xIA7/8QAPf/+f8WAPH/CAAMAO//GADq/x0A8P8CABAA4P8mAO7//P8HAOr/BgALAO//+v8DAOz/BgD+//L//v/y//7////u/wcA7P8EAOL/EgDS/yMA3P/y////7f8GAPr/AQD+//P/BQDn/ycAzf8lAOP//P8UANn/IADt/wkA///2/xAA7/8UAP7/7v8gAN7/JgDk/x0A5P8dAO3/DQD0/xAAAgAHAAQA/P8CAAcA+P8PAPz/AAAOAOr/JQDZ/yEA4v8aAOH/GwDX/xIA9f/7/wMA7v8PAOv/DgAHAOf/IgDe/xwA5/////z/+v/v/wUA/v/v/xkA5f/7//T/+//y/xMA5f8TAO3/HADw/xAADQD5/yYA4f8oAOf/EAAIAAgA+v8jAO7/IgABAAsA//8nAPj/IAD0/yQA+f8kAP7/CQADAPv/BgAgAOz/EQD0/w4A/v/0//v/7f8SAOr/DwDo//z//P///+r/FADt/wAAEQD0/xEACwDm/xIA9P/8/wEA/f/z//f/BAD8/woA+f8UAPP/JgDw/wkADADv/yMA6f8XAOD/HAD+/xMA9f8hAPT/IQDz/xsAAAADACQA3f88AOj/IgD3/yEA7P84AMj/OQDv/ykA8f8jAPb/GAD3/xcA/v8LAAoA8P8aAOn/CQD+//7/CAD9//v/FgD8//T/CQD4/xgAAgAGAPb/EQDq/w8AAQDv/wkABwDs/xgA5v8MAAcA9P8IAPf/EgDq/xwA2v88AO3/FQD1//r/FADq/ygA2v8mAO7/DQAIAP7/DAALAPn/EAD8/xAA7v8UAOn/GQDx//3///8EAP7/8f8DAPb/BAD8//X/CgDu/xYA7v8MAP7/CQAJAAYAEQD//woAAAD4/wYA/v/8/wEA8f/7/+L/CwDb/xQAzf8LANv/BQDO/yYAzf8TANT/AwDs/wIA+P/g/w4A0/8OAPP/8P8GAND/LADG/x4A4v8XAOz/DQDm/ycA9P8iAOb/JgDv/yEA6/8OAP//AQAIAP//+v8LAOL/HQDY/xYA7P/e/zAAvf8vANz/DwDs/xAA/f/p/xwA6/8MAAEA7v8MAOb/DgDo//3/CwDb/xsA3f8LAPT/+//2/wwA6v8LAPX/+f8EAP7/9P8MAOD/FwDi/woA9v/r/xUA4P8IAOj/+f/y//n/CADs//r/EwDS/y4A2P8hAPP/DgD6/xkA4f8iAOb/CAACAPX/EADx//D/FQDq/wgA8P8FAPb/CADt/wAA+f8CAPj/+//5//D/EADg/+//+//w/wkA3f8GAO3/AQD5/+r/AgDh//3/2v8OANv/9f///9b/DQDn/+v/GADi/wAA9P/o//P//f/Y/wUA9//y/wgA9P/z/xkA3v89AMr/JQDu/woAAQD9//b//f8KAO3/EgD0/wgA8f8NANz/LADN/ysA1f8bAOn//P/x//v/5/8VAOb/AgAEAPD/CADw//7/7P8LAP3/BwDv/wAA7v8SAN7/CgDi//f/9//2//D/AgDz/wQA8f/5/wEA5P8aAOH/EgAEAAIADQD4//L/DwADAPH/EQDf/xMA6v8UAO7/DgD6/wYA8/8ZAOH/GADo/xMA2/8TAN//DgD4//T/CQDu/xwA3/8aAPT/CAABABgA5/8rAN7/DAAAAPr/DwDq/wgA6v8NAOX/EQDW/xgA0f8bAN7/EwDo//3//f8BAPH/DwDo/wEA+f/4//b/6/8EAOf/DQDn//3/+f/x/wAA8f8QANj/HQDM/yMA4P8MAPT/9P8JAAMA9/8CAOr/EgD//wEA+f/8/wQA9v/9/xIAAAADAPz/AQAFAAcACAAJAPL/FwDy/w8ACgD5/wQACgDh/xkA6v8FAPn/BgDy/wcA7v8VAOv/EgDi/xYA7v8CAPb/BQD8//3/8////wYA9f8JAOD/CgDv//n/+P/v//r/8f/l//3/6f/1/+n/AgDk/w0A8//t/wkA/v/z/wsA5v/+//7/+f/n/wwA7P/1/wwA4P8kAOv/JgDa/y4A5f8hAPT/HwDl/xkABQARAAwA+/8VAAIABgAGABQA7P8OAP7/BgACAAAA/v/+////CgDm/zYAu/89ANb/GwD+//L/CwDu/x8A3/8LAP3/5v8VANn/EADw/wMAAgDk/ykA4P8oAO7/HAARAPX/BwD9/wYA9v8IAO7/EQDd/x8Ax/8NAOv/9v////T/4P8DAO3/+P/8//P/BgDc/yYAxP8qANH/JwDH/y0A2v8eAPT/BgD3//j/FQDx/wYA///8/wIA4P8SAPb///8aAOX/HADg/xwAz/8rANP/FwDq/wAAGADi/xMA1/8mAND/HgDi/wgAEgDw//v/AwD1/wQA+P////f/AQD6/wkACwDu/xgA4/8FAAQA9P8HAPr/8P8KAOz/CgD7/+f/JADV/ykAzf8gANv/JADe/x8A6P8dAN//CwAAAPr/CwD0/wsA6v8aAOz/EgD5/wEAAgD0/wkA5P8XAO3/BwAEAO3/GwDd/yUA2f8VAOn/AAD8//P/5f8NANz/GADr/+X/DQDz/wIAEQDb/xoA5/8IAOP/CADl/wcA5v/6//b/4/8RANj/FQDY/wUA6P8EAOz/9P/w//7/7f/5/wkA4v8RAOL/DwD2/xcA2v8eAN//HgDt//3/CgD3//7/AQD0/xIA/P/3/woA8f8LAAsA8f8IAAAA+f8ZAPf/FAD4/wYACQD0/xYA5v8OAAIA4v8gANz/GgDv/x0A5v8QAPP/AwAUAOX/AgAMAOb/EQDj//n/AADw//L/+P/z//X/9v8BAOX/BQDd/xAA8f/3/wIA8/8NAPH/CgDo/xEA7P8FAOn/AgDt//z/5v8GANv/EQDY/wEA8v///+//AADm/xgA6//3/wEA+v/5//z/FgDg/x8A/f/m/xIA3/8UAO//DADp/w8A8f8KAPH/DgDv/wsA7//7//z/7P8PAOT/BgAFAO//DQD7//j/DQD+/xIA6P8NAPn/BAAKAPb/9P8EAOD/GQDr/w4A9f/3/w0A7//6//j/7P8FAPf/+v/l/wUA2////+b/6//y/wAA5P8GAO7/9f/z/+3/3P8OAL3/JgC4/wIA2//z//r/6/8BAOn/BwDx//3/7/8DAO3/EgDZ/wwA6P8RAP//+v8MAPj/EQD2/woA7v8SAPP/CwD8//n/FADw/wcA9f8NAPL//f8UAOT/JADg/zIA2f8jAOv/GQD1/w4A/f/7/wgA+v/y/ycA2P8XAO7/EAD3/wcA5f8ZAOv/IAD0/+//IwDB/zoAxf8aANX/EgDa/w4A5P/3//n/8v/7//L/8P/5//n/+f8UAOv/CwDp/w0A3P8dAOH/CwD1//3//P/7//L/EwDy/wMA/P/6/wUA9f8HAOH/KgDF/yQA2v8SAPj/3P8NAOL/AwDu/+H/AgDX/wkAz/8PAND/AgDw//j/+//r//v/4v8BAOH/FgDU/xwA7P/p/wYA9v8LAO7/GADW/ycA1f8bAO//EwD3//T/GgDa/zUAzP83AND/FwABAP3/CQD+/+X/GADl/xIA8v/+//z/7f8OAPD/FADv/wUAIADU/zcAzf8jAP7///8FAAMACgDv/xQA6/8FAP//7f8CAPr/8f8AAAMA9v/4//T/IADP/y0A1/8dAP7/BwAIAPn/GQDy/w4ACgD2/wQACAD3//b/BgDU/zAAvv8wANT/EwDz/w8A7f8VAOn/FgDt/w8A9/////j/CAD5//f/CQDi//f/HQDG/yMA1f8DAP3/5/8OAOf/DQD1/wwA5P8eANn/IwDf/wcACQD4//L/BQABAOf/JADR/ywA1/8lANn/EwDy/wgAAwDt/wUA/P/2//z/3/8OAAUA6f8XANb/IQDn/wUA8v/8/+j/BwDu////7v8CAOz/+P8TAOn/HQD6/wAACwDt/xkA0v8lANT/KQDg/xcA3P8PAOz/GADn/wcA6//5//r/AADp/w4A2f8XAOn/BwD8//z/EADm/yYA5P8jAPD/HgDz/w4ACgDr/yEA2/8tANL/KADd/yQA6f8XANr/DwD2/wwABQDw/wwA/f8DAO//+//y//P/AQDk/xIA3f8OAP7/5v8pAMz/IQDi/xYA5/8nAOj/IQDy/wQA6/8RAAoA7P/+/wIA8/8cAO3/7f8fAO//+P8dANf/IgDo/wUABQD8/wEA/v/z/w8A9/8DAPP/AAD2//X/8/8GAOb/DwDq//H/BwD0////+f8CAPT/GADh/xkA8P/2/w8A3v8QAO3//v/+//3/3/8LAO3/8v8CANX/GwDc//3/5f/4////+//4//H/EgDo/xEA5P8LAOL/BQDu/wcA5//2/+n/BQDw/wsA7P8HAPn/AQD2/xkA9P8RAO7/FwAGAPn/JADg/zkA0/81ANz/HwD7//r/CAD3/wQACgD+/xEA4v8kANj/MwDY/xwA6f8AAAUA9v/8/wAA7P8VANz/GgDY/yAA9P/w/ygA0f8uAOT/EwD///7/CwDz/wkAAAD2/yAA2/8sANf/EwDr/wIAAAD8/+7/DADm/xIA2v8HAPj/5/8LAN3/BADx//n/7P8MAOv/+//6/wQA4/8EAOX/BgDj/wAA3//9/+H/AwDz/+7/EgDb/wUA4f/z//n/+//t//n/7P8TAOL/AAAEAN//HgDa/wQA+f/j/xMA5f8BAAIA+f/8/xsA1/8iAPH/EwD//wIAFQDz/y8A4P8bAPP/BQD//xsA8P8dAOn/FQADAPH/BgDz/xEA8P8SAO3//P////r//f/8/wQA6P8fAPH/DwAEAO7/AwD+/+j/EgDv/+v/BQD1//T/HADV/zAA4f8eAOT/FwDh/wwA+f/w/wIA1v8RAOP/DQDf/w4A6v8AAPT//P/x//3/+f/5//z/AAD1/wQA7v8FAP3/7v8FAAIAAwD5/xEA9f8OAPT/BAACAP3/+f/7/wMA4P8hAMb/IgDp/wMA7/8TAO7/9/////L/AQAKAPT/8f8BAOT/BgDz/+r/+f/6/+//+//z/+z/FgDQ/xsA1f8RAOn//f/4/wEAEwDj/woA6P8BAPL/EADf/w0A7P8BAAQA5P8QAPT/9/8KAOv/BAD6//P/AgD4/wIA8P8BAAEA+P/y//3/+P8EAPT//P8BAAcA7v8aAPX/EwAHAAgAGAAEAB8A8v8cAO7/DAAOAPf/CQD5//D/AgD0////+P/7//L/+//w/+7/FgDj/wwA5//7//z/CgDu//7/CQDa/yEA3P8iANT/CQDz/+7/CwDs/wAA6P8PAOD/GQD6//7/AwANAOj/HADZ/xQA+v/1/wgA7f8LAPH//f/x//b/BQDs/+n/FQDM/y8Az/8jAMj/JQDs//T/EADf/xUA9v/0/wIA7P8EAOn/8v8VAM//GQDe/wYA7//x//r/AQDw/wsA8f8HAP//BQDy/xsA4f8aAO7/BAD6//P/BwDs/wYA6v8BAOv/7f8aAM7/HgDa//n/CgDu/wAAAgDp/xgA/f/s/x4A1/8pAOL/DAALAP3/7v8XAP//7f8lANj/KQDv/x0A6P8RAAUA//8TAOv/CAAFAO7/BwDR/yYA1v8RAOb/BADq/xYA4f8NAN3/+v8EAOb/FADh/wUA/f/m/ycA3f8kAPH/FgDw/xEA6f8NAOr/CwD9/wUA9//z/wQA7f8aAOv//f/+/+//BgD6//T/+//u/xQA5v8LAPH/EgDt/wkA/P8JAPj/GgDp/ysA4/8WAOr/EgD///v/BwDv/wsAAADv/w4A0P8dAN7/EAAAAO3/BwDy/wAA9P/8/+7/9/8EAOn/EADt/w0A///6/wgA+v8EAAoA5f8WAPn/GwDn/wsA2v8DAPj/6P/3/93/+P/1/+j/DwDG/ycA3f/6/wMA5v8IAPX/6v/8/+X/DwDf/wYA7f8BAP//9P8AAPf/+v/9/w4A5f8bAPH//f8BABQA6v8eAPH/BgATAPP/DgAEAPX/GgDf/yMA6/8eAPr/DgDx/ysA5P8lAPX/CgAPAPn/+f8OAPT/BgAJAOr/FwDq/w0A8P8NAOz/EQDl/wwAAADt/woA7P8MAPL/DQDx/xMA2P8tANX/NADb/wsA+P/2/wcA///8//r/CQDY/ykAzP8qAOD/FwDl/xMA8v8UAAEA/f8CAPf///8OAN7/FwDg/w8A9/8DAO//DgDn/xQA7/8GAPv/AQAGAPn/AAAAAPr/BAAFAAMA8f8XAOP/EAD5//b/DwDj/xUA5P/3/+3/9//3/wIA2P8dAM3/GQDi//f/+v/2/+z//f/b/wsA4P8BAOr/9v/2//n/BADd/xIA6f8CAPX/9P8CAO///P8FAAMABAAAAPz/EwDn/xEA9f8PAPz/BQAGAA4A/P8fAPH/JwDt/yYA+f8ZAPT/HgAAAAcAFADj/zkA4P8SAP3/AwD5/wQA7/8PAPj/+P8QANv/KwDL/y8A4v8bAAMA+/8DAPv//v8CAPT/BQAAAPr/AwDz/wEA8f/+/wkA8f8BAN//KADe/wsA9f/u/wIAAADg/xoA1P8jAMv/NQDY/xUA/P8AAAcA9P8FAPz/8v8NAPP////b/x8A3/8eAO3/DgD9//3/BgD3////9//6//v/EwDn/xEA2/8VAOz/AADe/wUA6/8WAN7/BgDy//v//f/m/w8A9P/y/xsA4P8nAN7/IADf/xoA6f8TAAsA9v/2/wQA9v8KAPv/BwDz/xEA7/8AAP7/9v8YAOj/EAD//wwA7/8HAPj/9P8VAOH/AAD4//b/DADk/xIA6P///wAA3/8VAPL/BAD///3/EAAAABMA7v8XAAUADAD9/wcABQDr/y8A0v85AM//IADv/xMA+v8IAAAAAQD6/wsA6P8KAPn/8v8NAOD/HwDb/yAA7P8FAPv/8/8AAPv/BgD4/+//BgD1//P/BwD+//n/CADq/wIADwD4/w0A6/8XAOT/GgDx/wYA7f8HANz/KQDO/xYA1P8TAOL/BwDy//T/CwDi/xcA3P8sAOT/BgD7//f/DgDs/wgA6P8DAPv/+f8DAPb/BgDr/w4A6v/8/xEA3v8YAO3/BQD0/xQA3v8aAOv//f/7//L/CADx/wAA7f/u/wgA8v/s/wUA8f/1/w8A5/8DAPj/8f8KAPH/9f/z//n/9//p/w4A1v8RANr/AAAEAPD/DADj/x8A4f8uAMX/LADz/wQAAQACAA4A7f8rAMX/LwDV/x4A5v8eAOX/EwAKAOv/IQD2/wEADQDn/x4A0/8YAO//9/8AAPr/7v8PAPX/7P8eANn/FwDh//n/+f/w/wkA5v8FAOT/+/8DAPv/CAD7//3/BgD+/+b/FADZ/xYAAwDn/xQA3/8HAPr/+f8DAPH/CwDz/xUA7/8QAOX/GQDo/wAAAQD1/w4A3f8ZANr/IQD0/wIAEQDo/xgA/v/s/xMA9f8GAPn/+P/q/ywAzv8wAND/HgDI/xkA2P/z/+3/1f8UAL//FgDI/woA4v///+b/8P/7/+v/AgDw//j/FQDY/yYA0/8rAN3/IwDl/w8A9f8EAPv////t/wIA+/8DAOr/CwDm/woAAwD9/w4A5v8iAM//JgDl/w8A5/8MAPf/+/8KAPD//P8EAPr/+f////L/EQD7/xAABQAIAPn/DADs/x8A+P8BAAYA+f8JAP3//v8IAAAABgD9/xMA8P8YAO//GQDz////FQDq/xgA/f/n/xkA6P8HAPz/7f8HAOb/DQDk/wQA8v/1/wUA+/8FAOj/EQDh/w4A8v8DAPP/FQD+/+T/GADv/w0A9f8GAOb/EwDf/wgA5/8QAOD/AwDl/wMA7f8LAOb/5v8NANr/GgDV/w0A1P8UANj/EwDh/wYA7P/4//L/AAD8//P//f8GAOL/EQDt/wMABQD9//v/IQDu/xkA8f8dAOn/HgDs/wcABgDv/w0AAQD5//D/AwADAPH/BQDz/wIAEADu/w8A+P8LAAUA/v8YAPT/EQAGAAMACAD5/wIACgD1/wkA/P8CAAUAAgD3/w0A7/8QAOv/EwDt/xAA6P8TAPD/9v8TANb/CgAEAN7/EwDd/wEA8//3//P/AgDx/wYA+f/p/xoA0v8rANP/FAD7/w4A5P8aAO7//v8RAOv/FADr/w0A4/8MAOX/BQD8/+P/AwDt//P/8f/n/wgA9v8DAPr/9f8GAAAA/v/7//v/8/8AAPr/+f/1/wQA7v/y/x4A1v81ANH/IQDn/wYA+//l/xYA0f8uANP/JQDO/xYA8f8PAPf/8v8FAOX/BQD5/+X/EgDb/wsA6/8DAPL/EgDl/w4A8/8PAPH/EgDs/xYA6P8TAN//EQDh/xcA1v8QAOb/BgD7//X/7P/1/wQA+/8RAOX/CAAHAPP/BwDq/wMA9f8DAO//CgDq/xEABADi/zEA0f8YAPD/CgDz/yAA9v8TAPj/DgDs/xcADQDy/wUABAD+/xMAAQDn/ysA4/8ZAAIA9/8OAPj/BAABAA4A9P8MAOf/HgDv/wkA9f/8/wgA4v8IAOz/AQD6//n/4f8LAO///f/8//b/9f8NAO7/DQD0//X/DADj/w8A6//8/wcA8P/r/wkA5v////P/4v8NAOP/9P/s/+3/AADt/wIA4f8TAN3/DQDp/wAA7v/2//b////s//b/6f8BAPD/DgDf/woA8f/9//3/CgD5/wsA9P8SAAEAAgATAPL/JgDe/y0A3P8hAPf/9f8LAPD/BAAAAP//AwDo/xAA2/8kANr/CgD3/+X/GgDd/w4A8v/1/xQA3f8fAN7/GwD//+7/KADf/ycA8P8TAAIABQAPAPb/EQABAAUAGADr/yMA5/8RAPT///8QAPH/BAD9//f/DgDi/wwA8v/0/wUA5P8EAPb/+f/3/wYA7v8AAPf/DQDi/wgA4/8PANz/DQDU/wcA3v8DAPP/6v8RANr////p/+H/CQDo//X/7v/y/wUA6f/7//7/5f8RAOL/+f/4/+b/AwD0/+j/EADl/wQACgDc/xkA7f8MAP//9f8VAOz/JwDa/xoA5v8IAPP/FQDu/xEA6/8HAAUA6/8EAO//CQD1/wsA6f////v/9//+//T/DwDa/yUA5f8ZAPf/8P8DAPz/6P8XAOH////4/wAA/P8NAPL/GgD4/xYA9/8LAPn/BwD4/w0A7//z/wwA7P8PAOf/DQD0//7/+P/7//n/8/8JAOf/CQD8//P/AgD9/+//FADT/yEA5P8YAOP/HADj/xUA6P8LAPr/9v8FAOn/DwDV/xsAz/8RAOb//v/r/wQA8v/a/wwA3//6/woA4v8BAOz/8v/w/wYA1f8YANX/FQDn//X/BwDz//r//P/5/wMA+P/7//j/CwAKAO//CQDk/xMA2f8sAM//GQDo//3/EADb/yIA5f8EAAMA9v8LAO7/DADo/xoA6v8KAOj/GwDo/wAA/P/u/wsA9v/5/wAA/P/4/xMA9f8OAAEADQAQAA8ABwACABEA8P8YAPD/FgDr/xUA3v8JAPj///8BAPP//f/6/wQA6f8gAOj/DgDs/wEA/P8NAPD/BgDw//7/+f8FAPb/+P/v/w0A4f8OAOr/CwDq/xcA2P8oAPj/CQAHAP7/BAAKAPX/CAD9/wAAAgACAPP/BgDv/wAA5/8IAOH/6P8GANr/DgDd/wcA2v8NAPP/3P8hANL/FwD4/+n/DQDl/woA7P8JAPv///8FAP7/AgACAAQAAwAOAPr/EgAIAPr/FgAAAAoACwDy/x8A7P8cAOT/DAAKAOz/FQDa/xkA4v8OAAIA9/8DABIA3/8uAOH/LQDm/x8A9f8eAO3/EAD3/wYADwDj/ygA4v/9/xEA7v8HAPT//v/8//v/BwDm/wMABQDr/wsA7v/6/wcA5//+/+//BAADAOf/DQDy/wIABwDx/wUA9v/7//b/DQDw//7/BQDv/wsA9//8/xIA9f8GAPX////x/wsA3/8KAAYA7f8SAPD/BwAGAPz/GQDq/xQA+P8FAAAABwD1//z/FQDX/ysA5f8QAPP/AgD2/xIA5f8SAPD/EADv//P//v/1//v//P/5//3//f/4/wYA7f8MAOD/FgDw/w8A6/8CAPz/9v8CAPL/+v/5//r/+f/8/wIAAgADAPP/CgD1/wYA9P8BAAUAEQAAAAUACADu/xEA/v/7/wsA6/8JAPX/BAAKAOX/IwDb/yEA7/8LAPH/BQD8/+j/FADk////DwDW/xsA5f8HAP3/9/8AAAQA7f8fAN//HwDo/wkABAD///7/DAD4//3/EADy/xgA4f8jANT/LADZ/yQA5P8QAO//FgDm/xUA6v8LAPP/AwDn/wsA+P/8/wMA+P/2/woA8P8FAAgA9f8EAPb/+v8SAOb/DwD3//7/DgD3//v////2/wUA/v/+//z/9v/y//P///8EAPf/8f/0////8P8BAOz/BQDe/xYA3v8IAP7/7f8FAPb/5v8IAPH/9/8BAPr/BQD1/wQA//8BAAQA+/8BAAQA/v/7/wMAAgD9//z/BAD0/xEA7v8AAPL/+f/9//L//P/w//P//f/j/wgA4//4/wEA8P8AAP3/7/8MAPb/BwD1/wgA7/8SAPT/+/8AAAEA8/////r/CgD8/wYA9f8BAAgA8P8KAPX////1/w8A9P8FAPj//P8WAOL/HAD2//H/EQDj/xYA6/8KAOr/DgDr/xEA5v8VAOX/EgD2/wMAAADv/w0A7P8TAOD/CQDy//X/+P/o/wYA7f8DAPf/4f8bAO//AgADAPL/GQDn/wgA7/8KAOT/FgDe/xcA3v8OAOj/+P8CAOD/DgDr/+//+//p/wYA6v8EAO7/AwD0//X/AwDp/w0A3v8JAPH/AAD0//n/+//q/w0A5P8IAOf/AwDv/9v/FwDJ/xwA5P/8//f/9P/2/+//+v/6//b/9f/9/wUA8//5/+z/CQDi/wkA5v///wEA8v/s/wYA4f8MAOX/BQDo/woA5v8UAOz/CQABAOj/DQDw/wQAAgDw/wgA9P8FAPv//P/r/xMA6P8dAOD/FQDj/xYA8/8PAPb/BgDz/wsA/P8BAP3/AAASAN3/HQDh/xAA9P8DAP3/6f8NAN//DwDq//v/BADi/x0A2/8VAPH/+P/6//f//f/y//T////l/wAAAQDg/wwA7v8AABcA4P8UAO7/BAD3//f//v8BAPH/AgD4/+r/EgDm/wMA+P/w//f////y/+n/+v/r//3/5v8EAN3/DQDa/wsA+//3////6//+//7//P/y//z/9P/u/wcA7v////r/7f8GAOz/AgACAOb/EQDs//z/EQDq/wsA9f/8//7/+f8FAOz/CwDy/+3/CQD2//b/CQD8//3/AAD9//7/FAD3/wcACgAAAAoAAQD9/xYA6v8XAPH/CQDz/wcAAwD0/xYA1f8uANv/HgDx/xAA//8PAAEABAALAPj/FgDz/wQAAAD4////CADg/xsA4/8CAPz/+P/+//P/+f8AAPn/8P////D/AQDw/xIA6f8PAAQA3v8VANb/FQDx/wAA9v/p/xMA3P8YAOP/AAD///D/AgD2/+//FADb/xwA6/8QAO//DQDv/wkA+v8HAPD/AgACAPz/CQD2/wIA/f/0/w8A+P8SAPT/CgAGAAEA8v8NAOH/JQDh/w0A6P8FAOn/AwDn//D/+f/4//T/BQDx//z/AQDl/wYA8f/x/xcA0/8QAOH/EADw/wgA///7/woA+//9/wkA8/8KAP7//v8IAPj/DQAQAP7/GwAAACAA/P8aAPf/GgD8/xAA+/8JAAYA/P/+//v/BgDx/wYA+v8FAP//AAAIAP7///8KAPn/BQABAP3/+v8DAPj/9/8JAPb/+P8GAPP/BwDy/+//CwDt/xAA+f/f/x4A0v8XAOv/7P8GAOD/BgDo//D/9f/x/+7/7//3/+D/AQDq//T/CAD1//j/+//+/+j/GADm/xcA8v8IAAIABQD+/wsAAwAGAPv/CwD8/wEABQDn/yMA1P8ZAOP/EwDy/+//BQDo/xQA3f8EAPL/AADw//3//P/1//r/AwDw/wkA+v/q/wsA1v8SAO3//P/7////6v8BAP7/AwD9/wUA8v8ZAN3/IADu/xMA/v/z/xkA3v80AM//LgDe/xAA/v8DAAoA9f8AAAMA9/8QAPv/AQACAPj/CgAAABMA6/8bAAEA8v8YAOb/EAACAPj/AwD//wUA9/8HAO//BwD2/wcA3/8iAND/IgDh/w8A6v/7/w8A1/8bAOD/DAD3//r/+f/2//z/8v/z/woA5//8//v/7P/9/+//6v8IAN//EADm/wMA9/8CAOz/DQDn/woA9//0//7/+f/5//3/9v/z/wcA7P/z/xUA1/8eANv/CAD1//7////2//z/BQACAPP/CQDx/w0A/v/v/yEA7f8JAP7/EQDq/ygA4v8kAPD/JADo/x0A+/8PAA8A9f8PAAEA/f8LAN7/JADr/wYAAwDy/wwA/f8AAPn//P/p/xMA4P8WAN3/CwDu//z/BgDz/w4A/P8EAPz//f8GAOf/CgDy/wgAAwDv//r//v/y/wwA8//6//b/+//2/wEA8P8FAOX/DwDn/w4A8f8KAPb///8AAAkA9P8aAOz/GwD1/wgA+f8KAPb/EgD1/wIA//8MAPT/FQDa/xoA8/8JAAoA8v8MAPz/+v/+//P//f/y//r/9P/+//T/AwD5//n/BQDu/wcA8//8//7/AgACAP7//f/v//H/DgDo/wUA3f8LAO3/AAACANn/JgDj/wEADwDr/w4A/P/4/w4A7/8KAPT/+/8SAOf/GgDe/xEA6P8CAO//CwDi/w8A6/8GAO//EQDq/xIA9/8DAA0A8f8aAO//BgALAPD/DwDy/w4A/f8GAO//BQACAPr//v/3/wUA/P/x/wEA9/8BAAIA9P8CAAMA8P8EAPP////y//v/8/8IAOH/AQDl/wUA7/8CAPf/8v8GAOv/CgD8/wcA+f/+/wMACAD4/xYA5v8lANT/JgDX/yAA6v8FAO//BgDw/w4A9P8JAOr/DgDp/xkA5P8OAO3///8DAPf/8v8IAOb/GQDa/xMA4/8QAP7/5P8oANH/JwDh/xcA+f8EAAAA/f/9/wwA8f8ZAOX/HADj/w0A6/8BAPz////m/xMA3f8YANr/BQD6/+v/CgDj///////u//7/AQDv/wQA8v8OAOP/BgDr/wcA6/8CAOX/AwDq/wYA9v/5/w0A7/8CAPX/7/8FAP//8v8CAPP/DQDz/wQA///2/woA8//8//z/7v8EAPv/7v8RAOj/CgAGAOj/EgDv/w0A9/8AAAcA9v8SAOz/BADy//7/9v8OAOz/EQDj/xIA8P/6//f/8f8KAPD/BgDu//X/AADv////8f8LANr/HADk/xYA8P/x/wAA9f/0/wUA7f/2//7/9/8CAAMA9/8VAPD/GwDv/w0A+//9/wAAAwD1//P/BgD1/woA6P8VAPD/CgD0/wQA///0/xEA6f8LAAQA9P8EAAUA6v8cANL/HwDm/xQA5/8ZAOX/EgDl/xQA7v8DAPf/8v8KANv/FADY/xEA5v8DAOv/BgD3/9f/DwDf//7/BgDp//n/+P/s//r////d/xAA4f8KAPL/8f8KAPL/+f/9//n/AwD0/wMA7v8RAAMA9v8FAOf/DwDg/yoAz/8eAOX/BAANAOL/HADu/wQA/v8BAAEA+f8HAO7/CgD8//n/+P8GAPf/9P8HAOf/CgD6//D/BAD3//z/DwDt/w8A+f8PAAcADgAAAAEADADt/xQA8v8LAPL/BgDn/wIA9//8////8P/7//n//f/w/xQA7v8HAPH//P8EAAIA/P/9//z/+P8FAPz//P/6//H/DwDk/wsA8P8NAOn/FQDe/yEAAgD+/w8A9v8KAAcA9/8HAPr/BQD9/woA9f8GAPL/AwDt/wsA6P/p/wsA3v8SAOH/BQDf/xIA8//g/yQA0/8eAPX/8P8NAOD/FQDl/wsA9//8/wgA8/8CAPz/BQD8/w0A7v8SAP7/+/8KAPv/CQACAPD/FgDt/w8A8//9/w8A6P8XANv/FADq/wYACgDy/wEAGgDZ/y4A4v8mAPD/GgD4/xgA9f8JAAUA9/8YAOP/JgDg/woAAgD5/wYA9P/+//z/+v8DAPP/+f8IAPD/AAD9//D/CgDs//T/+//7/wgA5v8IAPf/AgAEAPb/AAD9//L//v8BAPn/9/8MAOP/EgD0//n/EAD2//3/+v/7//L/BwDe/wwA+v/9//j/BQDy/xEA9f8XAOv/DAD+//z/BAACAPb/+/8QAOH/GwD0/wMA/f/5//n/CgDp/xcA6/8RAPT/8/8BAPX/+/8BAPb/BAD4/woA9v8BAP7/6/8ZAO7/FADu//3/AwD4//r/+//7//D//v/8//T/BwD9/wUA8/8NAPP/CADw/wIACwACAAwA9/8TAOT/GQD0/wAACQDr/wgA8v8FAAsA7P8WAOf/GwD0/wkA+f/9/wMA5f8UAOD/BQAEAOP/BwDu/wAA9f/5//j/+f/4/w0A5v8XAOj/BwACAP7/AwACAAIA9f8RAPL/FwDo/xcA2v8jAOH/GwDr/wEA+/8GAPH/CQDz/////v/5//D/BQD3//z/AwD0//j/BwDy/wEAAQD3//v/AADq/xcA3/8RAOz/DQDy/w4A6/8JAPf/9v8UAOP/FADq//T//f/4/wsA9f/y//r/+//3/wEA5/8OANj/GwDg/wcABgDq/w4A8//s/wwA9f/0/w0A7v8LAPb///8BAAEA///4/woA+/8DAP//+/8KAPn/AgAAAAcA/v8EAPf/AwDw/xAA6/8QAOf/BADx//z/8P/7/+X/DADo/wEA/P/s/woA+P/9//n/AgDu/xEA7v8GAPP/DADl/w4A6/8UAPH/CQDz//z/BwDq/w0A7v/8//3//f8HAPH/BAD3/w0A7v8RAPb/9f8FAOn/EgDn/wcA6v8BAPb/AAD0/wYA7/8LAPn/BQD+//D/FQDj/yUA1v8VAPj/8f8JAN//GQDh/xEA8P/p/x4A7f8JAP//+/8SAPD/DADr/wwA7f8RAOz/EADn/xAA6f8BAP7/7v8IAPX/7f8EAOH/DwDo/wcA8P/8//r/8f8IAOT/EADb/w4A6/8IAOv/CADx//T/BgDt/wcA7P8BAPD/4/8MANn/DwD2//H/AgDy//r/8P/7////8/8AAPv/AQAEAO//+/8BAO7/BQDx/wQAAAACAOH/FQDc/xQA6P8CAOr/CQDs/w4A+P///wkA7P8IAPr/+f8KAOz/DwDr/w0A9//4//L/DADq/xkA4f8TAOb/EgD4/woA/v8BAPr/CgD5/wkA+f8EAAwA4/8WAOn/BwD6////+//x/wIA7P8IAOj/AgD+/+3/FADj/xIA+P/2////+f/+//7/5/8QAN3/CgD7/+j/CwDw/wcAFADo/xIA9v8GAPv/AAD9/wUA+//9//7/8f8MAPH//P/7//T/9/8AAPP/6v/9/+z/AQDo/wcA4/8HAOj/BAAAAPz/+P/3//T/BwD7//j//v/w//z/+v8CAPP/BgDu/wYA9P8DAAcA7/8LAPX/AwAHAP3/AAD////////7/wcA7/8KAPn/5/8RAPP/+P8MAPX/AgD5//7///8PAPf/BAAIAP7/CQD8////EgDp/xMA7f8EAPf//f8CAPP/DgDg/xgA6P8PAPb/DgDz/xoA8v8MAAEA/v8PAPb/AAD///v/9v8PANn/GwDm//r/BADx/wMA8//5/wQA8/8AAPH/BgDy/wIACQDz/wgAEADZ/yEA1P8TAP3//P/7/+j/GADf/xkA5P8DAPz/+v/4/wAA6/8UAOX/DgD7/wMA+P8QAO3/DQD0/xEA6v8KAPr/AAACAPj//v/5//L/DADy/xEA7/8JAP//BQD0/woA5f8hAOr/DwDt//7/8//0////3/8BAPL/8f8IAO3/+v/6//P/+P/7/+n/EADi/wcA6P8FAPX/AwABAPX/BgD8//j/CgDo/wsA9f/6//3/+////xIA7f8jAPT/HAD3/xYA/P8KAAYAAQAGAAIABgD2/wMA9P8IAOv/CADx/wYA/v/8/wgA+f///wcA+/8FAPv//v/+////+//5/wUAAwDz/w0A9P8LAO3//f8AAPX/FAD0/+b/GQDR/xwA4v/5//P/+P/u/wAA5f/7//P/7//r//3/3v///+//7/8EAPz/8f/9//v/6f8UAPL/BQD7/wgA9v8PAPf/AwAOAPT/CgD4/wsA8v8KAOr/DgDu/wMA8f8DAPT/9P/9/+3/CwDm//v/AAD5//H/DADq/wsA8P8IAPb/BwADAOv/EADb/xAA9f/5/wAA+P/0//f/BwD3/wIAAwDp/xgA2v8ZAOj/FADy//n/BQDu/xcA6f8RAOr/AQABAP3/AgD5/+7/EgDg/x8A4P8VAOf/CwD0/wwABgD0/xYA+v8BAAwA+P8BAA8A8f8RAPf/DgDy/wsA8f8JAPr/AwDk/xQA5v8PAPH/BQDp/xIA6/////r/+/8CAPn/+P/7//T//v/0/+7/DADh/wQA7//7/+//AQDb/xUA2v8OAO7//P8BAPv/7/8LAOz/AQD+/+v/CgDv/wUA6v8IAOX/CwDs/+3/HADU/xcA4f8DAPj////z/wUA9P8DAAMA7P8NAOz/DwD2//T/FQDx/wkA9/8QAPH/FwDt/xYA+P8cAO7/FAAAAAcAEgDx/xMA9v8GAAAA6P8cAOf/DgD2/wEA/v8BAAMA6v8PANT/IgDc/xQA4v8PAOT/CgD6/wUA+P8OAPX/DADx/wwA5/8KAPf/AwAGAPL/9v8FAO7/EgDz//r/9v/8//7/+v/y//n/9/8EAPD/AwDy/xEA7f8EAP3/CgD9/w0A+/8QAP//CAD3/wsA//8HAAMA+P8KAAcA+v8HAOz/DQABAP7/EADu/wwA/f/6//z//f/u//v/+v/q/wwA4v8VAOf/AwD4//j////z//X//P8CAAAA9//1//L/7v8OAOL/AADj/wAA+f/y/wcA3/8YAPP/+/8NAO//BgAIAPH/DwDz/wgA+P8BAAQA+/8IAOz/CQDt/wQA8f8IAO3/CQDu/wQA8/8NAPD/CgD5/wMABAD5/w8A8f8EAAcA7/8PAOz/FAD4/wYA9P8GAAEA///+//r/BgD9//b/+v8BAPT/BgDy//z////w//f/+P/6/+j/BgDp/wEA7P/y//P//v/o/w8A6v////f/+v/7/wkAAAD8//r/CQD9/wwAAAD+/w0A7/8VAOn/FgD2/wIA+/8CAPv/CgAAAAIA9P8CAPL/FQDf/w4A6f/+////8f/2//3/7f8IAOT/BADz////BQDf/x4A2/8SAPb//P8LAO//DADv/wIAAgD0/w8A6v8SAOL/CwDp/wAA9/8DAOT/DgDh/w4A5v8CAPT/9//5/+7//P/x//n/9v/z/wMA6P8KAPX/+v/8//X/BQDq/wwA6P8KAO7/BQAEAPX/DQD8//b/CwDp/wUADgDm/xQA6v8YAPL/CgD8/wYAAAD7////AwDz/woA9v/1/xMA6f8PAP7/8P8LAPf/AgD///f/CQDy/woA7v8FAO3//f/3/wcA9P8BAPH/BQDy//n/9//y/wYA8v8FAPP/9//9//L/AAD4//v/+//2/wgA+v/8//b/9//4//P/+//1/+///P/z//f/BgDr/xAA7v8JAPX/CQDy/wQA9v/6//j/8f8EAPf/BQDv/wsA///6/wcA/P8BAPz/BgD5/wYABAD4/woA9P8CAPn/+v/5////+v/9/wMA7v8HAOv/AwD4//b/9f/7//7/7P8IAOf/CQD6/wAA9/8NAPH/AAD5//3//v8JAPz/7/8MAOH/EQDr//z/8//+//P//f/3//X/BQDm/wkA6/8GAO//AwD2/wMAAwD7////+//0/wcAAAD4/wMA+P/9/wcA9v8IAPf/AwD3/wIA/v/+//7/+v/7/wMA+P/+//X/AQDv/wAA8v/7//b/9v/7//v/7v8GAO//BAD///3/DQD3/xYA6/8HAPP/AgD///7/9P///+z//f8CAPD/BADw//X/+//o/wIA8v////j/8P8FAO//DgDy/wUA+//0/w8A7v8SAPD/+f8KAOr/EwDr/wgA9/////z/BwD//wQAAgAFAPj/DAD2/wcA+v8GAPz/BwAAAAIA+v8GAAEA//8KAOP/HgDp/xQA+f8GAO//FwDs/wwA9/8GAP///v/5////+P/9//n/8v8FAOr/AgDu//7/9f/4/+7/AwDr/wUA9f/+/wEAAgDw/wwA8f8DAP7/9f8LAO7/CQDp/woA5f8CAPL/7/8RANn/CwDq//z/9f8AAPL/CADv/wQA+v/3/wYA8f8MAPX/+f8PAPL/DAD4/wsA+f8MAPn/DAAAAA8A/v8IAAMABgAPAPL/DQD8/wYA+//y/w0A9f8LAPT/BQD8/wMAAQDv/woA2f8cAOL/EgDn/xAA5/8NAPn/CQD5/w4A+f8HAPX/BgDx/wEA/f/9/wEA+P/0//3/9/8HAPX/+f/y//n/BAD1//D/+v/s/w4A6v/7//j////6//L/AwDx/wsA+/8AAAQA+f8IAPj//f8EAPf/DQD1/wQAAgAAAP//9f8DAAAAAQARAPP/CQABAP7/AQACAPD//v/+//P/CADt/w8A7/8IAPz//f/9//n//f/9/wYA+/////T/+//q/w0A5v8EAOX/BgDx/////f/u/w0A9f8EAAEA+P8BAAUA+f8HAPH/AwD7//n/BAD4//7/9v/4//j/+v/x/wUA7P8GAPD//P/2/wcA8/8FAPb/AAD9/wIA/P////j/CADv/wkA7/8LAAIA/f/5//3/CQDu/wsA6/8TAPX/AAD7//n/CgD3////AgD1/wQA8f8GAPL/9v8BAPD/CADn//v/9v/6//P/DgDp/wQA8v/8//j/CwD1/wAA8P8FAPf/BAAAAPz////4/wEA8/8LAPP/AAD7//r/CQD5/wkABADy/woA7/8YAOD/EwDm/wAA/v/1//X/+//v/wgA5P8EAO7/BAAEAOH/HADU/xsA7f8EAAYA8f8LAO7///8DAPP/EgDn/xQA5f8DAPL/9v8FAO//+f/9//T/AwDu//7/+P/3//3/8f/4//r/9//4//b//P/s/wQA8v/5//f/8f8AAOL/DADd/woA4f8GAPj/+P8DAPX/9v/6//T/8f8UAOP/DADs/xEA9v8IAPv/BQD//wMA/f8IAPX/DgD3////FQDr/yIA7f8QAPn/DwD9/wcA9f8NAPb/BQD2//v/9v/v//v/+//y/wEA5f8IAOn//P/v//L/CADp/wwA6P/0//3/6f8AAPL/9P/6//H/CQDw//j/8v/x//j/6v/7/+3/8v/x//r/8f8DAO//BQD8//z/+/8AAP3//f/5////8f/x////8f8GAOf/EQD3/wgA/P8LAP7/AgALAAMACwAMAP//DQAFAP3/BwDx/wgA+f8JAAEADgD4/wkA9v8KAAMAAwDx/wUAAgDv//z/9f/5/wwA8f8AAAIA9//z//T//P/r/wkA7f/v//v/5P8KAOX/9P/0/+3/AADt//n/9P///+f/AADx//7/9v/5//n///8IAPP/BADu/wYA8v8NAPD/AgD2//3/DwD1/w0A/P8DAAMAAAAIAPr/CADz/wkA+/8AAPr/+v8FAPD/CQDq/w8A8P8DAAAA9f8BAAcA9f8JAPf/DAAIAAIABQD7/wIA9v/7/wIABADp/wgA4P8GAP7/8v8IAOb/BADy//v/+P/6/wMA7f8DAPP/AAALAO3/DADn/wgA9v8HAPX//v/r/wsA5/8JAO3/EADr/wsA9P8QAAwAAQAHAP3/DgAGAAQA/v8JAAQABwAMAAMAEwD1/xcA9f8UAAIA8/8SAPL/EgD2/wIA7P8cAOj/BgD6//3/+/8IAOT/DADb/xUA3f8LAOv//f/7//D////u/w0A5v8SAOL/EwDy/wIA+v8CAPX/CQDh/xUA5f8KAO//BQACAOz/BwDs/wMA+f/1/xEA6v/+/xQA2v8lAN7/IADs/xMA7/8XAOv/DQD2/wgA///2/xIA5f8PAAAA+/8JAPn/CAD3/wMAAQAHAPj/DAD4/wcABAD+/wAAEADm/xkA7/8fAOn/FAAFAAYADAD8/wMACQDs/wIABQD2//v//v/x//7/+v/0/wQA+v/y//z/+f/q/wYA6f8DAPn/9v8KAPT///8EAAMACQD0/wgA+f8NAOj/FwDe/wkABADs/wgA+v/6/wcA6f8NAO//AQAUAO3/EAD3//j/AgDl/wEA8v/8/woA5v8DAPj/AQD0//L/EADw/xUA9v8DAAkA///8//r/BwD1/wgA8f8HAPj/BgAEAPX/FgDr/wkA9/8AABAA9f8PAP7/BAAAAAoA+v8NAPH/CQDv/wAAAAD5//f/+//5/wEA8v/6//n/7P8FANz/FQDY/w4A7/8HAPD/CgD8//j/CAD2/w4A9f8PAP3//f8OAPD/EwD2/xUA9f8JAPb/CAANAPn/BQAAAP7/DwA=\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 132\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 132_003_1366\n", + "Original Audio: 132_003_1366.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 132_003_1366_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 132_003_2657\n", + "Original Audio: 132_003_2657.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 132_003_2657_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 137\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 137_003_1351\n", + "Original Audio: 137_003_1351.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 137_003_1351_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 137_003_1614\n", + "Original Audio: 137_003_1614.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 137_003_1614_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 147\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 147_003_1675\n", + "Original Audio: 147_003_1675.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,UklGRrQzAwBXQVZFZm10IBAAAAABAAEAgD4AAAB9AAACABAATElTVDQAAABJTkZPSUNNVBEAAABQcm9jZXNzZWQgYnkgU29YAABJU0ZUDgAAAExhdmY2MC4xNi4xMDAAZGF0YVQzAwABAAAAAAABAAAAAAABAAAAAAD//////////////v/+/////v/9//7//P/9//3//P/8//7//f/9//z//f/7//z//f/8//3//f/9//z//v/+//7//////////////////////wAAAgABAAAAAQABAAEAAAABAAEAAgABAAAAAQABAAIAAgACAAIAAgACAAMAAwADAAMABAAFAAQABAAEAAQABQAFAAYABAAFAAUABAAEAAQABAAEAAQABAAEAAQAAwAEAAMAAwACAAMAAwABAAEAAAD//wAAAAD//wAAAAD+//7///////7//v//////////////AQABAAEAAQABAAAA//8AAP/////+////AAD//wAAAAAAAAAA//8AAP//AAAAAAAAAAD//wAA/////wAA/////////f/+//7//v/9///////////////+/////v//////AAAAAAAAAQAAAAAAAAABAAAAAQABAAEAAQABAAEAAgABAAIAAgACAAEAAQAAAAEAAgACAAMAAgACAAMAAQABAAEAAgACAAEAAgABAAIAAgACAAQABAAEAAMAAwADAAMABAADAAMABAACAAMAAgACAAMAAwACAAEAAgABAAEAAQABAAEAAQABAAMAAQABAAIAAAAAAAEAAQAAAP//AAD/////AAD//////v8AAAAA//8BAAEAAAABAAEAAgABAAEAAQABAAEAAQABAAAAAAD//////////wAA////////////////AAD/////AAABAAAAAQAAAAEAAAAAAAAA//////7////+//////8AAAAA/////////////wAAAAD///////8AAP/////+//7//v/+//7////////////////////+/////f///////////wEAAQAAAAEAAgACAAIAAQACAAIAAgACAAEAAwACAAEAAgABAAEAAgABAAEAAAAAAAAAAQAAAAEAAQD//wAA///+////AAABAAAA//////////8AAP////8AAP//AAAAAAAAAAD//////v/+//7//v///////v/9///////+//7//v/9//3//f/9//3//f/+//7///////////8AAP////8AAAEAAQACAAIAAQABAAIAAgACAAIAAwACAAIAAgADAAEAAQABAAAAAQAAAAAA///////////+//7//v/+//7//v/+//3//v/9//z//f/9//7//P/7//z//f/9//3/+//8//7//v/+//7///8AAAAAAAD///7/////////AAD//wAA///+/////v////7///8AAP////8AAP///////wAAAAAAAAAAAAABAAEAAAAAAAAAAAAAAAAAAAAAAAEAAAD//wAAAQACAAEAAgABAAIAAQABAAEAAAAAAAEAAQACAAEAAgACAAIAAQAAAAAAAAAAAAAAAAABAAAA//8AAAEAAAAAAAEAAQABAAAAAgABAAAAAAAAAAEAAQABAAIAAgABAAIAAQABAAIAAQABAAIAAQAAAAEAAgABAAAAAAD//wAAAAD+//3////+//7//P/9//7//f/9//3//v/+//7//v/+//3//v/9//7//v/9/////////wAA/////////////wAAAAD//wEAAQABAAEAAAACAAAA//8AAAAA//8AAP//AAAAAAAA/v/+//7//v///////v////////8AAP////8BAAAAAQABAAAAAwACAAIAAQABAAEAAQABAAAAAQABAAEAAQACAAIAAgAAAAEAAAAAAAAAAAD///7//v////7///8AAP//////////AAD//wAAAAD/////AAAAAAEAAgAAAAAAAAD///7//f/8//3//f/6//v//P/8//3//f/8//v/+//6//r/+//6//r/+v/7//v/+v/5//n/+P/2//b/9//3//X/9v/7//z//P/6//n/+//5//n/+P/5//v/+//4//f/9//1//X/8v/z//P/9P/v/+3/7f/r/+v/5v/n/+f/5P/h/+L/6//o/+D/3P/d/+H/2f/X/9v/2v/Y/9T/2v/W/9f/3//p/+z/9P/4/wAA/v8AAPD/8P/n/+7/4v/9/+f/8P8EAK3/pQJcBHADWAPAApwCTQIUAgYCvwGBAUkBDQH7AO8AsgCoAI8ALQD5/9b/qP+L/3r/kf+E/2v/Nv8K/w3/+v4W/zf/Ff/3/t/+1P7D/qr+r/6G/mv+df6d/mz+6/3i/jH/kP6R/lb+rv3p/cn+ov4t/1X/tP6C/pj+PP9F/zH/iP+Z/2b/XP+t/8T/iv/A/8T/mf8xAJYAjAB6AHYAVQC1ANkA6wC0Ab8BbwHtAOwACAFYAYwBqAFJAuEBAAHRABQBxQC5ANUAgACzAOYAFwHyANsAHAEpAYsBEwGjANkA8gDvAJIAiQCYAL0AqQCqANEAkgC3ALIAUQAIALr/rf+o/4j/l/8OAEIA3v+//+P/2f+//5T/Z/9V/zz/Nf9M/0D/IP8R/yr/Tv9K/yX/6f7R/tH+xP6//s/+4/4L/1z/jv+d/7L/7//N/7L/0/+x/9j/xf+6/8v/y/8HAOr/AQAuACkAHgAPADYATwBuAIIAjgCjAI4AkQCOAIgAkQCNAKQAxQDRALwAzgDiAMcAuwCRAFkAMAAUAB4A/P/U/wUAMQAyACEAHQAmADEANQA2AEUAUAAnAOT/6f/t/+H/CQAyAEwASQArACkAMQBRAGYAXwB7AIoAcgBpAHIAcgBUADAAGgAPACAAJwAQAPb/7f8HACoANQA5ACkAGQAaACkAJAAeAAIA2P/K/6f/vv/j/9n/0v+9/7n/s/+1/+H/7/8IACkAFQD8/wwAGwAjABcA////////DQAUACIALAAdABoAEwAQAPb/0f/S/+j//P/0/+H/1v/N/67/rP+u/7T/v/+p/8P/5P/a/8P/tv+2/6b/pP+3/8P/x//G/8H/vP/K/+v/+f/2/+//6v8AABoAHgAiACoAIgAXABYAGwAnADEAIQAMAAAA+P/n/+v/BAAeABwAAAD+/wYAAQABAAcA+f/8//n/7P/8/+//3//w/+v//f8FAPz//f/x/wcAFQAWAAwA/v8aACIAFAAYAB0ACQACAAkADAAXACIAFQAKABEAEgAXABIAGQAfAAwA9v/y//f/7v/v/+z/4//h/+D/3v/b/+f/6v/j/+7/7//p/+7/7P/g/+P/5v/o/+j/5f/j/+j/7v/3//7//P/5//T///8FAAIAAgAAAAYACgABAAQACQAHAAgADwAPAAoAAgD9//7//P/7/+//8f/w/+f/7f/z//T/8P/0/+//6P/5/wkACAANAAkA/f/8//X/9P/2//b/+v/7//z/AgD9//X/9//7//z/+/8AAAYAAwD8//7/AwAAAPz/9//3//b/8P/y//X/9f/0//n//f/6//f/8v/0//r//P/9//n/8f/t//D/8v/w//T///////v//P8AAAIA///9/wAACAAHAP///v/+/wAABQAGAAAA/f8AAAEAAwADAAEACAAHAAMAAwABAAEA//8AAAIA/P/+//v/+f/5//T/8//0//j/+//4//T/8//v/+3/7f/w//L/8f/0//X/8//z//L/8//7//7///8DAAEA/P/8//r/+P/5//r/+P/6//7//v8CAAcAAQD//wAA/v8AAP7//P8AAAEAAAD7//n/9P/x//P/9f/4//b/9P/1//f/9f/3//n//f/9//z///8AAAIA///6//n/9//6//3/+//+/wEAAgACAAIAAwAAAAEAAQAAAP3///////7///8AAAIA///6//r/+v/3//r//f////z/+//6//f/9//4//j/+P/1//b/9v/3//r//P/+//v//P/9//v/+f/7//z/+v/6//z/+//7//3//v///wAA///+/////f/7//z//v/+//z/+f/7//3//P/9////AAD//////f/6//v/+P/5//n/+v/7//7/AQABAP///v/7//r/+//8/wAAAAAAAP/////+//3/+//8//3//v/+//3/+v/7//3/+//4//v//P/7//7//v/9//z//P/7//j/+P/4//n/+v/5//r/+v/4//n//P/8//7//f/8//z/+f/5//n/+//8//3//f/+//3/+//6//3////9//z//f/9//7//v/+//3//v/8//v//P/8//r//f/+////AAAAAP3/+//7//v//P/9///////+//7///////3//v/9//z/+//8//z//v8AAP7//f/+//7//P/9//v//f/9//3//v/8//z//P/9//7//v/+//v/+v/+///////+//z/+//7//z//////wEA/v/9//v/+//8//7//v/+//3//f/9//3///////7//P/7//v//P/7//v/+//+//7/+//7//3//v/+//7//f/7//z//P/8//7//f/7//3//f/7//v//P/8//3//f/9//v//P/7//z//f/8//v/+//8//3/+//6//r/+//6//z//f/7//v//P/9//7//v////7////+////AAAAAP7///8AAAAA/f/+////AQACAAEAAAAAAP//AQD//wAA///+////AQD//////v/9//7//v///////f///wAA///+//7/AAD///3//v/9//3//f///wAA///+//7//v8AAP///////wAA/v/9//3//v/+//7//v/9//7//f/+//7//P/9//v//f/+/////f/9//v/+//7//v/+//8//z//P/9//z//v////7//v/9//z//f/9//3//v/+///////+/wAAAAABAAEAAAD////////+//7//f/9//3//f/9//3//P/9//7//f/+//7//v/+//3//f/9//z//f/9//3//f/+//7//v/+//7//v////////8AAAEAAAABAAAAAQAAAP///v/+/wAA/////wAA/////wAAAAABAAAAAAAAAP7//v/+//3//f/9//3//v/+////AAD//////////////v/+//////////7//v////////8AAP//AAD///7/////////AAAAAP7////+/////v/8//7//f/8//3//P/+//7//f/+//7//f/9//3//P/8//z//P/9//3//v/+//7////8//3//f/9//7//f/9//3//f/9//7//f/+//3//v///////v////////8AAAAAAQD///7////+//7//v////3//f/9//7//f/9//7///////7//v/9//3////8//7////9//7//f/+/////v/+//7////+//7//////wEAAAAAAP7//v/+/////v///wAA/////////v/+//7//v/+///////+/////v/+/////v/+//7/////////AAD+//7//f/+//7//v/9//////8AAP////////7//v/9//3//v////7////+//3//f/+//3//f/9//7//v/+///////+//7//v////7//v////7//f/+//7//v/+//7//v///////v/+///////9//3//v/+/wAA///+//7/AAD+/////////////v////3//f/+//7//P/8//7//f///////////////v/+//////////7////+//7//////wAA/v///////v///////v/+//////////7//v/9//7//f/+//3//P/8//3//f/9/////v/+//7//v/+//7////+///////+/wAAAAAAAAAA/v/+//7///////////////7///8AAAAAAQABAAAA////////AAD+//7//v////7//v/+//3//f/9//7////+//7////+/////////wAA/////wAA/////wAA//8AAP///v//////////////AAAAAP///////wAA//8BAAAAAAAAAP///////////v/+//7//v/+//3//v/+/////////wAA///+//////8AAAAA/////wAA//8AAAEAAAABAAAAAQAAAP7/AAD///////8AAP//AAAAAP/////+/////v/+//7//v/+//7//v////7//v/+//7//v/+///////+/////f/+/////f/+////////////AAAAAP////8AAP//AAAAAP//AQABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAD//wEA//8AAAAAAQAAAAAAAQABAAEAAQD//wAAAAAAAAAA//8AAAAAAAABAAAAAAAAAP///v///////v8AAP////////7/AAD///7////+///////+////AAAAAAAAAAAAAAAA/////wAAAAD/////AAAAAAAAAAABAAAA////////AAD//wEAAAABAAAAAAABAAAAAQAAAAAAAAABAAEAAgABAAAAAAD/////AAAAAAAAAAD//wAAAQAAAAAAAAABAAAAAAD//wEAAAAAAP//AAD///7/AAD///////8AAP7////+////////////AAD//wAA//////7///8AAAAA////////AAAAAAAAAAAAAAAAAAAAAAEAAQAAAP////8AAAAAAAABAAAAAAD//wEAAQABAAAAAQABAAAAAQAAAAAAAQAAAAAAAAD//wAA/////wAAAAD/////AAAAAAAAAAAAAAAAAAD//wAA///+//7//f/9//////8AAP/////+//7//v8AAAAAAQABAAAA/////////////wAAAAD//wAA/v/+//////8AAAAA//8AAAAAAQAAAAAAAAABAAAAAAD//////////wAAAAABAAAAAQD/////AAD///7//v/+//3//f/9//3////9//7//v/+//3//v/+/////////////////////v////7//v////7//v/9//3///////7//v8AAP////////////////7//v/+/wAA//8AAP//AAD/////AAAAAP//AAAAAAAAAAAAAP//AQAAAP//AAABAAAAAAABAAAA//8AAAAA//8AAP7//v/+//7//////////v///////v/+//7//v/9//3//v/9//3//P/9//3//v/9//3//f/+//7//f////7////+////////////////////AAD///7//v/+///////+//7/////////AAD//wAA/////wEAAAABAAAA/////////v///wAA//8AAAAAAAAAAAEAAAABAP//AAD///////8AAAEAAAAAAAAAAAD///////////7//v/9//7//v/+/////f/8//3//v/9//7//v/+/////////////v/+//7///////7///8AAAAAAQABAAEAAAAAAAAAAQAAAP//AAD///7///////7//////wAA//8AAP////8AAAAA/////////////////////wAA//8AAAAAAAAAAAAAAAD//wAAAQABAAEAAAABAAEAAQD//////////wAA/////wAA//8AAP///v///////v/////////8//7//////wAAAAAAAAAAAAAAAAAA//8AAAEA//8AAAEAAgABAAIAAQACAAMAAwABAAEAAAAAAAAAAQD//wEAAAD/////AAAAAAAAAAD//wAA//8AAAAAAAD//wAAAAD//////v///wAAAAD//wAAAAABAP7//v8AAAEA//8AAAEA/////wEAAAD///////8AAAAAAAABAAAA//8AAP///v/+////AAD//wAAAQD//wEAAQABAAAAAAABAP//AAABAAAAAQABAAAAAQABAAIAAwACAAMAAwACAAEAAwACAAIAAQABAAEAAAABAAAA//8AAAEA/////wAA/v///wEAAAD//wAA//8AAAAA/////////v////7/////////AQD///7//////////v///wAA/////wAA//8AAAEA//8AAAAAAAD///7/AAAAAAAAAAD//wAA//8AAAAA/v8BAP//AAACAAAAAAABAAAAAAAAAAAA/////wEAAAAAAAAA//8CAP////8DAAEA//8CAAIA/v////////8AAAAA////////AAAAAAEAAQAAAP///////////v8AAP/////+//7/AAD+///////+///////+/wAAAAD//////////wAAAAABAAEAAQABAP//AAAAAAEAAAAAAAIAAAD//wAA//8AAAEAAgAAAP////8BAAEAAQACAAEAAQD///3/AAABAAAAAAD///3//v/+//7/AAD///7//v//////AQD/////AAD+//7//v///////v/+////AAD///7/AAD////////+//7///8AAAAA/v/8///////+//7//v8AAAAA//////////8AAP3//f/8//v//v8BAAEAAAAAAP///v///////////wAA/f/8/wEAAwABAP7/+//+/wIAAgACAAIAAQD+/wAAAAAAAAIAAQD9//z//P/+/wUABgAAAPz//v8AAAAA/v8BAAEAAAD+//7/AQADAAEA//8AAPz/+/8AAAEAAgAFAAEA/f///wEAAwADAP3//f//////AAAEAAIAAAABAP3//f8AAP3/AQADAP3/+v/6//z/AQABAAIAAgAAAAAA/f/7//7//v/8//7////9//3/BAAEAP/////+//7//f///wAA/v8CAAAA/v/9//7/AAABAAMA///8//v/+v/+////AwABAPz//v/9/wEABAADAAQABAD+//3//v///wEAAgD///v//f/+//z//v/+//3/AgD///7/AAAAAAYAAwD//wEA/f/7//r/+//9//3///////7/AQD//wEAAgACAAQA///8//z/AgAEAAIAAAAAAAAAAAABAAMAAQAEAAEA/v//////AgABAAAAAAAAAAAA/v8BAAIAAQACAAAA/v8AAAEAAAACAAEA//////7///8AAAAA//8AAAAAAAD//wAAAQABAAAAAAD/////AAD//wAAAAD//wAA//8AAP////////7//v/+/////v/9/////v/+//7//v////////////7//v///wAAAAABAAEAAAABAAAAAQABAAAAAQABAAEAAgABAAEAAQABAAIAAgACAAAAAQABAAAAAAAAAAAAAQAAAAAAAAAAAAAAAQACAAEA////////AAAAAP////////7/AAABAAAA//8AAAEAAAACAAIAAgACAAEAAQAAAAEAAQAAAAEAAAAAAAIAAgADAAQABAACAAEAAAABAAAAAAABAAAA//8AAAAAAQABAAAA/v/+//7////+////AAAAAAEAAQACAAEAAAAAAAAAAAD//wAAAAAAAAAAAQACAAIAAgACAAIAAgAAAP//AAABAAEAAQABAP7////+//3////9//3//f/9//3//f8AAP///////wAAAAAAAAEAAAAAAAEAAQABAAEA//8BAAEAAgABAP//AQADAAIAAwADAAIAAgACAAMAAwAEAAMAAgACAAEAAAABAAEAAQAAAAEAAAAAAP//AAABAAAA/////wAA//8AAAAA///+//3//v/+////AAD//////v//////AAACAAIAAgABAAIAAgABAAAA/////wAAAAAAAAAAAQABAAEAAQACAAAAAAABAAEAAAD//////////wAA/////wAAAQABAAAAAAD///7//////wIA//8BAP////8AAP//AAAAAAEAAAAAAAAAAAAAAAAAAQAAAAEAAAD//wAA//////7//v/+/wAA/////wAA//////7//v/+/////////////v8AAAAA//////////8AAAAAAAABAAAAAAAAAAEAAgABAAIAAQD//wAAAAD//wAAAAABAAEAAgACAAEAAAABAAEAAQAAAAAA/v/+/wAAAQAAAP//AAD//////v///wAA/v/+////////////AAAAAAAAAAAAAP//AAD+/wAAAAD////////+///////////////+//7/AAAAAAAAAAD//wEA//8AAAAA//8AAP////8AAAAAAQAAAAAAAQD//wAAAQAAAAEAAQABAAEAAQAAAAEAAgACAAAAAQABAAIAAQABAP///v/+//7//////wAAAAD//wEA/v/9//3//f/8//3//v/+/////v/+//7//v////7//v///wAAAAAAAAIAAAACAAEAAQABAAEA/v/+//7////+/////////wEAAQABAAAA//8AAP//AAD///7//v///wEA//8AAAEAAAD//wAAAAAAAAEAAAD//wAAAAD/////AAD//////v///wEAAAABAAEAAAABAAEA/////wAA/v8AAP7//v8AAP//AAAAAP//AAD//wAAAAAAAAAAAAABAAAA//8AAAAAAAAAAAAAAQABAAEAAQAAAP7///////7//v/+////AAABAAEAAQABAAAAAQAAAAAAAQAAAAEAAQABAAIAAgABAAEAAgAAAAAAAQAAAAEAAAAAAAAAAQAAAAIAAQACAAEAAgAAAP///v////////8AAAEAAQAAAP///v/+/////v/9//7//v/9/////f////7//f/+/////v////////8BAAEAAQAAAAMAAwADAAQAAgACAAIAAgADAAIAAgACAAIAAQAAAP7//v/+//z/+v/7//z/+f/5//z/+//6//z//f/8//3//f/+////AAD//wEAAgAAAAEAAwACAAMAAwAEAAMAAQADAAMAAQACAAIAAgABAAMABAD//wEAAQAAAP7/AAACAAEA/v///wAA/////wEAAAD///7/AgABAAAAAQADAAAAAAD///3////8/wQADAACAP//AQD+/wMABwD//wIAAgAAAP///P///////v8BAAMABAAEAP7//f/9//3/+//6//3/AwABAAAAAAD7//3//P8AAAAAAgACAPz//f/7////AgAAAAIAAQACAP7//P8AAAAA/v8BAAAA/f8AAAIA//8AAAEAAQD8////AQD///7/+/8AAAAA+//9/wAA+P8EAAcA9P8BAAIA9f8EAAQA9/8AAAEA/f///wAAAAD9/wUABQD8/wAAAQD//wEAAgAAAP7/AAABAPz///8EAPz/AAAGAP///f8CAAAA+v8HAAUA9v///wcA+//7/wAA+v/+/wEA/f8BAAcAAwD9////BwADAP//BAACAAAAAAAAAAAA/v8BAAQAAgADAAIAAwAEAAIABAADAAEAAwD///3//v/8//r/+f/7//3//f/9/wAAAQAAAP7//v/9//z//f////3//f8AAAAA/v/9/wEAAQD//wMAAwD//wIAAQD+/wEAAQAAAAEAAAAAAP3/+v/8//7//P/7//v//P/9//z//f8AAP///v8AAAEAAQD//wAAAQACAAQABAAFAAQAAwADAAQAAwADAAQABAAGAAMAAAACAAEAAAD///7//v/+//z//f/7//r//f/+//3//v/+//7//v/9//3//v/8//v//f////7//v8AAAAAAAAAAAMAAgADAAUABQAEAAQAAgACAAMAAwABAAEAAAAAAP/////+//z//v/+//z//f/8//z//f8AAAAA//8BAAEAAQABAAEAAwAEAAMABQAFAAQAAwACAAQAAwD//wEAAAACAAEAAQAAAAAAAAAAAAAA///+//7////9//7//v/9//7//f/9//z//f////7////9//7//////wAA//8BAAEAAAD//wEAAgABAAIAAgACAAIABAADAAQAAgABAAIAAgABAAAAAQACAAAAAQABAAEAAQD///7////9//7//v////7//P/+//7//f///////v/+//3///8BAP//AQABAAEAAgABAAEAAwACAAEAAAAAAAEAAAD///////8AAAAAAQACAAIAAAAAAAAAAQABAP/////5//j//P/3//X/9v/4//z/AwAEAAYACwALAA0ADQANABAAFAAUAA4ADgAUABQADgAPABAACgD///n//v8AAP3/8P/j/+v/6f/j/+n/6v/c/+D/3v/X/+P/5//r//X/BQAIABIAFQAdAN//b/+L//b/GwATAN//rf/9/00AQABFAF4AZAB2AHkASgAhACAANgA0ACEAHAAlAB4ACQAOABAA7//W/9f/4v/w/+f/yf/L/9T/1//n//D/5f/V/9P/2f/e/+H/6//1/wcAGgAdAB0AJQAsACgAIgAcAA4ACwANAAYABgACAPP/6f/r//T/9P/s/+z/7//0//v/8P/k/+f/5//w//H/5P/j/+f/7//3//n//f8AAAMACAAHAAIA/v8IACIAHAAEAA4AJAAvAC0AMAAyADAAOgA/ACQADgALAAUACQADAPH/7v/9//z/7P/y/+//4v/j/+j/7//n/9v/4v/3/+7/6P/x/+n/6//u//T/8v/n/+v/+//9//7/DgAaACEAJQAiADIADQDL/9//EAAjACAAHAAJAAwAGgAdADEAOQAwACQAJAASAO7/7//u/9z/4P/m/97/0P/Z//P/8//l/+v/+P8EAAoABgD7/wIADQARABUADwAJAAYA+v/u/+7/9P/0//b/AQAPABAADgARABUAFwATAAQA9P/1//b//P/+//L/8//9//3/9//2/+//6f/m/+X/7P/y/+//8P/w//z/BwAIAA8AEgAQABUAHAAZABMAFwAWABcAEwAJABAAGAANAAwACAADAAEA///6//n//P/2//7/AgD6//r/9v/5//z/+f/+//7/9v/4//f/8P/v//b///8BAAAAAwAFAAYABQAJAAwABwAGAAkACQAGAAQAAwADAP///f///////f/8//v/9//0//b//P/+/wEA/v/4//v//v/9//z///8DAAUAAwACAAAA//8EAAcABAACAAAA/f/8//z//v8CAAQAAwADAAUACAAJAAkAAwD+//7//v8AAAMAAAD7//r///8CAAIAAQADAAUABQADAAIAAgAEAAMABAAAAPz//v8BAAEA/f/9//7//P/8//n/+P/2//b/+/8AAAIAAwAHAAkACAABAP3/AAACAAEA/////wIAAgADAAEAAwAEAAAAAwD///z/+P/7/wAA/P/9//z//P/8//z//f/8//v///8DAAMAAwAGAAcAAwABAAIA/v/2//b//P8CAAMABAAFAAUABwALAAkABwAEAAAA/v/+/wAA///+/wAA/f/6//r/+//7//v//f8AAP7//f8CAAcABwACAAEABAAIAAcAAwAAAP7/AgABAPv/+//9//3//v/+//3//P/8//n/9//6//r//P///wMAAgAAAAEAAQAAAAIAAAD///7//P/6//z//f/8////AgADAAMABgAGAAQAAwABAAMABAAAAAEAAwABAAIAAwADAAIA///9//z/+//6//r/+v/8/////v8AAAMAAgADAAMAAQABAAAAAQACAAAA//8BAAEA//8AAP7//f/+//v/+//5//v///8BAAAAAAADAAgABgADAAIAAgAAAAAA///7//n/+v/9//r/+f/7//7/AQAAAP//AQACAAEA//8BAAIAAAD//wEAAQD///3//v/+//z//f////3/+//+/wMAAwACAAIA/////wIABQAFAAUABAAEAAgACQAFAP///v/8//b/9P/4//n/9v/3//j/+v/7//3///8AAAAA/v/8/wEACAAGAAEAAgADAAAAAgD///n/9//3//n/+//7/wAAAQACAAUABQAEAAcABwABAAIAAQD8//n/+//7//v/+P/2//T/9f/6//z/+f/4//7/AAAAAAYACAAIAAgABgAFAAMABgAGAAUAAwAEAAQAAQADAAQABAAAAP7///8DAAEAAAAAAP7/AgAGAAYABQACAPz/+//5//r/+//6//f/+f/6//v//f/+//7/AAACAAMAAwADAAMAAwAAAAMAAwABAP3/+P/3//v/+v/5//n/+v/9/wAAAgACAAEAAwACAP7//v/+//3//v//////AQACAAIAAgD//wAAAgD///v/9//3//j/+f/9/wAABAAFAAUABgAHAAcABgAEAAMAAQADAAYABQAAAAAAAAACAAIAAAABAP//+//5//v//v8AAAIAAgABAP7/AAADAAIA/v/7//v//f////3//P/8//3/AAABAAIAAwAFAAkACgAHAAYABQAFAAQAAgADAAYAAwABAPz/9//1//X/+P/7//f/9f/6/////v/6//b/+v8AAAQABgAEAP7///8BAAMAAwACAAQABQAAAAAABAAHAAoACAAJAAgABQADAAQABQAIAAUA+v/3//f//P////3/+v/4//j//P8CAAQAAwD///3//v8BAAMABgAAAPz///8AAAEAAwAFAAYAAwD+////AwAIAAYAAgABAAEAAwAGAAQA///7//n/+f/6//v/+v/2//T/8//5/wEABAAEAAMAAAD//wAA/v8BAAIAAQAAAAAAAAABAAUACgAOAAsABQAAAAIACAAKAAcAAwD///v//f8CAAQAAwD//////v8BAAgACQAIAAUAAgAAAP///v///////f/6//3/AgAGAAgACAAHAAMA/f/5//v///////r/+f/5//v/AgAJAAsACAAEAAIA+//y//T/AQAHAP//+P/5//7/AgAGAAkABAD4/+//8v///wcABAD5//D/9v8KABUAEQAFAP//AQD9//7/CwAOAP7/8//3////AwAHAAwACAD3/+7/+/8NABUAGQAHAOz/6v8BABMAEQAFAP7/+f/y//P/AQAOAA8ABQDy/+f/7f/6/wsAFAAEAPD/7v/7/woAEwAMAAAA/P/9//r//f8EAAcAAQD//wEA/P/3/wEACAABAPr///8AAP3//P8BAAMABgACAP//AgADAP7//f8AAP7//f8DAAUA/v/7/wIAAAD7/wIABgD+////BgAEAAAAAAD//wgAEwANAAIA///2//b/AgAHAAMABAABAPf/9P/6/wIACgAFAPn/+P///wIABAADAP7/9//4/wEABgD8//f/AAAFAP3/9f/1//3/CAAKAAAA+P/7/wEABQADAPv/+v///////f/7//z//v////7/AQADAP///v8CAAMAAgACAAAAAwACAAIAAwAFAAUAAAABAAEA//8AAAMAAQABAAIA/////wMAAwAAAAIAAgAAAAAAAAD//wAAAQD///z//v8BAP//AgACAP3//f////7//v8BAAIA///+//z//f8DAAMA//8BAAAA/v8BAAIAAwADAAEA//8BAP3//P8AAAIA/v///wEA//8CAAMAAAACAAIAAAD///3//f8AAAIABQADAAAA/v/+////AAAAAP7//P/+/wAAAgABAAEAAgADAAAA/v/9/wAAAwABAP3/+//9/wMABwAHAAIAAQACAAEA/v/9/wIAAwD+//v/+//9/wAAAgABAP3//P/+/wEAAQD8//r//P8BAAQAAwACAAQABQAFAAEA/v/9/wAAAAD4//T/+P/9/wIAAgACAAQABwAIAAcABQAHAAoACgAFAP//9//2//v/AQABAP///P/8/wAABQAIAAgAAwABAP///P/3//v/BQAMAAcA/f/7/wMADAAOAAUA+//2//n//P/7//v/AQAEAAEA+v/7/wUADgANAP//9v/3//z/AAAAAAEA/f/6/wAACwAJAP7/+f/+/wIA///4//b/+v///////P/7////BwAIAAIABAAEAAEAAQD9/wEACQAMAA0ACQAFAAEAAgAFAAEA/P/6//r//P8AAP7//f8EAAkADQAFAPr//P/9//r/AQACAP3/AwADAP3/BAAJAAYABQD7//L/9P/3/wAAAwD6//n/AAD+/wIACAABAAMAAwD+//r//P8CAAcABgAFAAQAAwAGAAsABAD9/wEA+//2//f/+v/9/wAAAgADAP7//f8AAP////8FAAMA/f/6/wAABwAKAAQAAwADAAIAAQD+//7/AAAAAAEAAAD9//3/AgAEAP//+v/6////AAAAAAAAAwADAP3//f8BAAIAAwABAP7//P/8////AgD+//v/9v/z//n//f8BAAUAAwD9//v//f8DAAUABgAKAAUA/f/8/wIAAgD+//v/+f/5/wAA/P/8/wMAAwAFAAUA+f/1//P/8//7/wEABQALAAoADgAWABgAHAAZABEAAwD3//X/+/8CAAkABQABAAQAAgAGAAkABAD9//b/7P/v//f/+//+/wEAAAABAAMAAQD+//7//f/3//b/+/8AAAQACAAIAAcABwABAP7///8AAP//+P/z//X/9v/4//n///8GAAYAAAD4//r//f/7//7/AAD//wYABwAAAAMABwADAAMABAAAAP7/AAD/////BAABAPr/+f/4//n//P/8/wEAAgADAAUAAwAAAAEABgACAPz///8EAAcABgADAAcADQAOAA4ADAAAAPf/8v/y//n//P/9/wEA/v/4//n/AgABAPf/7v/o/+f/6P/r/+7/9P/4//v/AgALAA8ADAAHAAEA+f/5////BAAFAAYABQAIAA8AFAAVABAACQAJAAgABgACAPz/+v/6//3//v8AAAQACAAIAAIA+//5//3/AgAFAAMA/f/+/wIAAwD//wAABQACAPn/9P/3//v//P/+//3/+f/4//r//P/6//j/9v/5//7/AQAFAAkACwAHAAUACgAKAAYAAQD7//T/8f/x//T/9f/0//n/+//7/////v/9/wAAAQD5//L/9P/4//3//f/8//7/AwABAAIAAQAAAAMACQAHAAYACwALAA0AEgAQABAAEgAMAAYABgD///r/+//+////+//5//r/+v/9//z//v8AAPj/+P/5//n//v/9//z//f/6//v/AAAFAAkACgAGAAYABwALAAkABwAHAAAA+v/8//v/+f///////f/7//n/9//3//n/+f/6//n/9v/y//D/8//9//7/+//7//j/9f/5//7///8DAAMA/v/+/wIABAALABAACgAGAAcABQACAAUACAAJAAoACwANABIAEgAPABAADAAKAAoABAACAPz/9P/z//L/9f/7//7////6//T/9f/2//n//P/8//v/+f/7/wAAAQAAAPz/+P/6//z//P/7//v//P/7//n/+/8AAAQACQAIAAMA/f/9/wEAAQAAAP7/+P/7////AgABAPz/+/8AAAUABQACAPv/+f/6//3/AQD///z/+v/2//L/+P/+/wIACAAJAAMAAgAFAAYACgAOAAsACAAJAAUABQAIAAYAAwABAAEABAAIAAUAAwACAAEAAwADAAEAAQABAAEA///7//r/+P/5//z//f/6//n/+f/9//z/+v/5//3/AgADAAMAAAD//wAAAgACAAEAAQAAAPz/9//z//X/+P/7//v//f8AAAEAAAABAAEAAgADAAMA///8//v/+//+//7//v8AAAEAAQAEAAUABgACAAAA//////////8AAAEAAQAAAAAAAAABAAAA//8AAAEAAAD+////AQADAAUABQADAAEAAgAHAAMAAQD//wAAAQABAAEA///+//7/AQABAAIAAwADAAAA+v/5//3///8AAAIAAAD9//r/+P/6//3/AAABAAIAAgAAAAEAAQD///3//f/8//v/+//7//3//f/+/wMACAAKAAoACAAFAAgACgAKAAcAAAD9//z//f///////P/6//f/+f/7//3/AQAEAAIAAgAAAAAAAwABAAAA/v/9//7//v/6//n/9//0//T/8//1//r///8DAP//+v/+/wgADwARAAcA+//1//T/+P/5//z//f8BAAEA//8AAAMACAAIAAUABQADAAEA/P/9/wMADQAPAAsAAQD4//X/+P/9/wMABwAEAP///v///wYACwALAAgABAACAP///v///wEAAgACAAIAAQACAAMABQAGAAgACwAKAAYA///4//X/+P/8//v//P/9/////f/4//b/+P/8//z/+P/3//j/+//+/////v///wEABAACAAEAAAD+//r/+f/6//3//f/+//7//f/9////AQAEAAcABwAEAAIAAQACAAMAAwAEAAUACgAKAAgAAwD//wAAAQACAAAA///9//7//v8AAAUACAAIAAQAAQD//wAAAAABAAIAAQACAAEAAQABAAEAAgACAAMAAgD///7//v///wAAAAD/////AAACAAAA/v/8//z//f/7//r/+////wIABQAFAAMA///8//r//f8CAAUAAwD///r/+f/6//7/AgAFAAgABgADAAAA/v/6//n/+P/8/wMACQAKAAgABQAEAAUACQAPAA4ABgD8//T/8v/0//r/+v/5//z/BQARABwAIAAdABQACwAKAA0ADgAKAAAA9P/v//X/AQAJAAYA/v/3//b/+f/9//v/9P/v/+3/+P8KABgAHAAUAAgA///9////AwACAPn/8f/r/+v/8P/8/wcACwAIAAEA+//4//r/AAACAP//+f/1//j/AAAJAAgACQAGAAcACAAGAAIA/v/+//7///////3//P/9/wAAAgACAAMABAAFAAgABwADAP//AAD+/wAAAAABAAIACAAKAAYA///8//3/AQAFAAkACwALAAYAAQD+/wAAAwAFAAUAAQD9//j/9v/1//r///////7//v/+//7///8AAP//AAABAAEAAwAFAAcABwAFAAIA///+/wAAAgABAAIA///8//3//P/+/wAAAQACAAIAAQD///7//f/9//3//f//////AwAFAAYABQAEAAUABwAFAAUAAgABAAIABQAHAAcABAAAAP3//f/8//v/+f/3//j/+v/+/wEAAgABAAIAAwABAP///P/7//r/+f/6//z///8DAAQABAAEAAIAAQADAAQABAAAAP7//f/+//7//f/9///////+//3////+//7/AAABAAAAAAAAAAEAAgAEAAIAAgAFAAQAAgABAAMAAgADAAIABAAFAAMAAwABAAAAAQABAAAAAQAAAP///P/7//3//P/8//3//v/+//7//v///wAA/v/+//////8AAAAA/v///wEAAQABAAIAAgACAAUABQADAAEA+//6//z//P////////////7//v//////AwAGAAMA///6//b/9v/9/wQABgADAP3//P///wYADAALAAYA/f/1//L/+P/+/wMAAgD9//r///8JABMAFQALAPv/7f/r//X/BQATABEABQD3/+//9v8EABAAEgAJAPr/8P/s//H/+v8BAAgACQAIAAQAAgABAPz/+P/2//b/+v/+//7/+v/4//n//f8DAAoACgAHAAIA/f/5//n/+f/9////AAABAAEAAQACAAEAAQACAAIAAgAAAPz/+v/7//7/AQACAAAAAAABAAIAAgD///7///8BAAIAAQAAAAIAAwACAAAA/v///wAABAAEAAMAAQAAAAAAAQADAAcACgALAAYAAAD8//j/+P/3//j/+//8//3//P/9/wAABAAIAAYAAQD9//3//v8AAP//AgADAAYABwAHAAgABgADAP//+//4//j/+P/3//b/9P/z//f/+v/+/wAAAQABAAEA/P/5//n//P8BAAgACgAHAAQAAgADAAQAAwACAP7/+v/2//T/9P/3//3/AwAEAAIAAwAFAAgACgAHAAIA///9////AQABAAEAAgABAAAA/////wEAAgD///v/+f/7//7//f////7//v8BAAQAAwADAAIAAQADAAQAAwABAAEA/f/8//3///8CAAQAAwABAP7//P8AAAAAAgABAAEAAQD//wAAAwAFAAUABQAEAAIAAgD+//z/9v/y//L/9v/6//3////+//3//P/9/wEABQAHAAYAAQD9//z//v8CAAkACQAGAAEA//8AAAMAAgABAP7///8AAAAAAQABAAEA/v/6//b/9//8/wAAAwAEAAMACAALAAsACgAGAAQAAgACAAIAAgABAAEA/f/8//3//v8DAAUABwAEAP3/+P/2//b/+f/5//r/+//+//7//f/+//7/AQABAP3/+P/2//j/+P/0//b/+v8CAAgADAAKAAkACAAHAAcABgAEAAAA/v/8//z//f/6//n/+P/4//v//f/+//7////+//7///8CAAkADQAOAAoABQAEAAgACQAIAAMA/f/6//z//v8CAAQABgAGAAMAAAD+/wIABAAAAPv/9v/1//j///8GAAsACAAGAAQABQAEAAUAAwD8//f/9f/5//v//P/8//7////+//7/AQAEAAMAAAAAAAEAAQAEAAcABwAGAAQAAgD///3//P/+//7//P/8//3//v8AAAAAAwAHAAgABwAHAAQAAAAAAAIAAwACAP///v/+/////v/9//z//f/+////AAAAAP///v////////8AAAEAAQABAAEAAgABAAAA///8//z///8BAAAA/v/7//r/+//7//n/+f/4//r//P/8//z//v8CAAYABwAGAAQABAAFAAQABAADAAEA///+////BQALAA8ADgAKAAUAAgD///z/+v/4//v//P///wQACwASABgAFwARAAsABAD///z/+P/3//f/9//4//v/AQAKABAAEQAJAP7/+P/3//3/AwAHAAQA///8//r//P/9/wIABgAFAAAA+f/2//X/+f/7//z/+v/7/wAAAwAFAAYABwAHAAYAAQD9//n/+v/6//f/9P/0//f//v8CAAMAAAD//wAAAAAAAAEA//////7///8FAAcACwAMAAoABgAFAAMAAgADAAMABQAEAAQABAACAAMABQAFAAMAAQD9//v//P/7//r/+P/6//3/AgACAP///f/8//7/AAACAAMABQAFAAQAAQD//wEABAADAAAA/f/5//r/+v/7//v//v8EAAoACgAEAP7/+P/5//v/+//6//r//P/9/////f8AAAQABAADAP///P///wMABAABAP//AAAGAAsADAALAAgABwAEAAMAAAAAAP/////////////+//7//P/7//z//P/+//7//P/6//v/+v/9/wIABQADAAIAAQD//wAABQAJAAcABAAAAPr/+f/8//7//P/8//r/+P/3//v/AAAGAAsACwAEAP7//v/9//3//f/9//v/+v/8//z//P8AAAQAAwABAAAA///9//7///8BAAMABAAEAAEA/f/7//v/+//+//7//v8BAAUACgANAAwACQAIAAUAAgD9//n/+v8AAAQA/v/2//D/8v/4//z//P/9//3//P/5//f/+f8AAAUABwAGAAEA/P/5//r//f8BAAMAAgAAAP/////9//v/9//2//n//P/+/wAAAQACAAUABgAKAAwADwALAAYA/f/0//L/8v/z//X/9f/2//b/+f/8////AgAHAAkACwAMAAsABgABAP//AQAIAA4AEAAMAAgABAACAP/////+//3/+f/z/+//7v/y//b/+P/5//z/AAABAP7/+P/0//f//P/8//r/+P/7//3//P/3//T/9P/2//P/7//y//n/AwAKAA0ACgAGAAIAAgAEAAMABQAFAAIA/P/y//D/9f/9/wcACQAGAPv/8f/t/+7/8//3//v/+f/x/+b/3f/j//b/CwAWABgAGAAWAA8ABwAEAAoAFQAUAAUA9//z//f/9//0//T/9v/2/+3/4//i//D/BAAVABoAFQAMAAcABgAIAAYAAgD6//P/7//q/+r/7//7/wYAAwD4/+3/6P/x//r////8//j/+v8EAA8AFgAYABgAEgACAPL/6v/w//v/AwAFAAUABAACAP3//f8DAA8AGgAdABcADQAEAPv/9f/x/+//7v/q/+b/5v/t//n/BAALAAwACQADAAIABwARABMADgAGAAEA///9//j/9v/3//X/8P/p/+b/7P/4/wMABgAHAAgACgAKAAUAAAAAAAMABQD9//L/7//y//j/+v/4//v/AgAHAAoABwADAAQABwAIAAYA/v/6//v/+v///wgADwAUABAABgD7//L/7//3/wAACwAQAAwABwAEAAMABgAKAAgAAQD5/+//6f/s//D/+v8GABAAFQAUAA4ABwADAAEAAAACAAUABQAGAAcABwAIAAoABgAAAPf/8P/x//j/AAAFAAYAAgD//wIACQALAAoABAD8//n/9f/y//H/8//2//r//f/8//z//////wAAAwAHAA0AEQANAAgAAgD+////AQAFAAcABQAAAP3/+//7//v/+v/5//f/+P/7////BAAJAA0ADQALAAYAAQD8//v//v/+////AAAAAAEAAgACAAMABQAHAAQAAgACAAMABQAGAAUAAgABAAEA/////wAAAAD///7///8EAAcACwALAAgAAQD8//z/AAAGAAcABAD///f/8v/z//j///8FAAoADAAIAAQAAQAAAAAAAQABAPz/+P/2//j//v8JABAAEwARAA8ABwAAAAEAAwAAAPr/9f/x//T/AAAOABIACgD///j/9f/5/wAABQAHAAgACQAOABAADAAFAP//+v/4//b/9v/2//X/+P///wYADQAOAAgAAgD9//f/9v/5/wAABAAIAAcABgAFAAUABgAEAAEA/P/8//v/+//6//v//P///wQABwAHAAUAAQD/////AAAAAAAAAgAFAAcABgAEAAEA/v/9////AQABAAMAAgABAAEAAgADAAYACQAJAAUAAwABAAEAAAAAAAIAAAD+//z/+v/7////AQACAAMAAgADAAMAAwADAAMAAAAAAAMAAgAAAP///f/7//z///8BAAYACQAIAAQAAQAAAAAA///+//3/+//4//n//P/9//z//v8AAP////8CAAQAAgAAAAIAAQAAAAAAAwAEAAQABgAIAAcAAQD+//z/+f/4//n//P/+/wAAAwAJAAcAAgACAAUABgAFAAoADAAFAPz/+P/+/wEA//////z/8v/s//H/AAAJAA8AFAASAAgA/v/8//3//f8CAAcABQD+//j/+v8AAAUACAAHAAgAAwD///z/+//5//7/BgAMAAkAAQD6//P/7f/w//r/AgADAP//+//7//z//v8FAA4AEwARAAoAAQD+//7//v/9//r/9f/y//T/+P/7//7/AwAGAAMAAQAGAAoADQAOAA4ACwAHAAQAAAD///z//v////r/9P/x//L/9f/3//v/AQAEAAQAAwADAAUACQANAAsABwAHAAcAAwAAAP//AgABAP7//f/7//n/+v/8//7//f/+/wAAAQD///7//v8AAAAAAwAIAAgABQACAP7//P/6//z/AAABAAAA/v/9///////9/////v/8//v//P/+/wEABQAHAAgACAAGAAQAAwABAAAAAAD//wAAAgADAAMAAgACAP7/+//5//j/+P/5//3/AgAGAAgABwAGAAIAAAABAAMAAwD///v/+f/6//r//v8BAAUABQAEAAIA//8BAAUACwANAAsABAD7//n/+////wEAAQADAAIA/v/5//X/+P/9/wIAAgD+//v/+////wQABAADAAAA/v/8//v/+//8//v//v///wIAAgADAAQABAACAP3/+v/6//z///8DAAQAAgAAAP3/+//8//7///8BAAIAAgAEAAUABQAEAAIA///9//r/+P/4//j//P8AAAIABgAHAAYABAABAP3/+//8//7/AAACAAMABAAEAAMAAAD9//v/+v/5//r/+/8AAAUABwAHAAcACAAIAAUAAgD+//n/9//3//n//v8DAAYACAAGAAIA/f/6//j/+f/6//v//P/8//3//v8AAAMABQADAAAA/v/9/wAABAAGAAUABgAFAAQAAQD9//v/+//6//r/+f/3//j//f8CAAIAAgACAAEAAQAAAP3/+v/4//j/+f/7//7/AQAEAAYABQADAAEAAAD///7//f/6//n//P8CAAQAAgAAAPz/+//8/wAABAAHAAgACQAFAAAA/f/7//z//v/9//z/+v/3//n//f8CAAYACAAJAAYAAwD9//r/+v/7//3/AAD///3///8BAAEAAwADAAEAAAD9//n/9//3//r///8FAAYABQABAAEAAAAAAP3//f/+/wAAAwAFAAQAAQD+//z/+//9//z/+//7//v//P///wMACAALAAwABwADAP3//v/9//z/+f/4//n/+v/9//7///////7//v/9//v//f8BAAQABAAEAAUABAAFAAQAAQAAAP7//f/8//n/+f/8/wMACgAMAA0ACQADAPv/9v/0//f/+//9//z/+//7//7/AwAHAAoABwADAP3/9//0//X/+P8AAAQACAAJAAoACgAIAAQA/f/6//n/9//4//r/AgAKABEAEQAOAAoABwAFAP//+f/0//T/+P/+/wAABQAMAA8ADgAEAPn/8//0//f/+////wEABgAHAAUABAAEAAUAAwD///j/9f/1//j//v8EAAcACAAKAAkACAAGAAIAAAAAAAMABAAEAAIAAAD+//3//P/8//z//P/7//v//f8AAAMABQAFAAIAAAD+//z/+//7//r/+v/6//r//f8BAAMABQAHAAUABQAEAAMAAgAEAAQABAAFAAUAAwABAAAA//8AAP7//v/+//3//v8BAAAAAwAGAAcABgAEAAIAAAAAAP///v/+//7//v///wAABAAIAAoACwAJAAIA/v/7//v//P/+/wEAAwAFAAQAAwAAAAAAAQACAAMABAAEAAIAAgACAAQABgAHAAYABgACAP7/+v/4//n/+//+/wEAAgADAAQAAQABAP//AAADAAQABQAFAAQABQAEAAMAAwACAAAA+//4//j/9//5//z//v8BAAIAAgAEAAQAAwAFAAQABgAEAAEAAwABAP/////6//r/+v/6//v//f/+////AAD+//3/+v/+//7/AQAEAAQABAADAAIAAQADAAMAAwD+//z/+f/8/wAAAQAEAAcABwAHAAcABQAEAAEA///9//v/+//9//7//f8AAAEAAQACAAEAAQAAAAEAAgAEAAQABQAEAAQAAwABAAIAAQAAAP3//P/6//r//P///wEAAgADAAIAAgACAAIAAQABAP///v//////AQABAAEAAQAAAP////8AAAAAAAD+//////8BAAMABAADAAEAAAAAAAAAAQAAAP7//v/+//3////////////+//3//v///wEAAwACAAIAAQABAAAAAAAAAAAA//////7//P/8//z//v/+//7//v/+//////8BAAEAAQD//////f/+//3///8AAP////8AAAAA//8AAP//AQACAAMAAQD+//r/+f/6//v//P/9//7//v8AAAIAAgACAAAAAAAAAP//AAACAAMABAAFAAMAAQD+/////v/+//////8BAAIAAwAFAAQAAwABAP/////9//z/+//+/wEABAABAPz/+f/4//r//P/+////AgAEAAUABQABAP///f/8//n/9//3//z/AQAJAA4ADgALAAMA+//1//H/8P/0//n///8EAAkADwAPAA0ABgD9//T/7v/t//H/9v8AAAYACwANAA0ACQACAPv/9//1//f//P8AAAMABAAFAAYABAADAP//+f/0//X/+v8BAAkADgAPAAoABAAAAPv/+P/3//j/+////wIABgADAAAA+//3//b/+P/6//z//v8BAAQACAAKAAkABAD8//T/8P/z//z/BAAKAAoABQD+//z/+//8/////f/6//j//P8CAAsAEwAUABAABgD7//L/7//y//n/AAAGAAgABQD///z/+f/6//7/AwAIAAkACAAGAAIA/f/5//f/9f/1//n/+/8CAAoAEAAVABcAEgALAAEA9P/s/+f/6P/w//r/BgANABAADwAKAAQA/P/1/+//7//0//v/BAAKAAoABgACAPv/+P/3//r//v8AAAIAAgABAP///f/9////AQACAAEAAgABAP//AQADAAUACAAJAAcABAACAP/////9//r/+P/4//f/+P/5//r//P///wIABgAOABEAEgAQAAkAAQD5//X/9v/5//7/AgAFAAUAAgD///z/+f/5//r/+//9//z/AAADAAgADQAOAA0ACAAAAPv/+f/6//z/AQADAAUABQAGAAYABQAEAAAA/P/4//b/9v/5//z///8BAAAAAAAAAAAA/v/9////AAADAAQABgAGAAUAAwACAAAA/P/8//v//f/+/wAAAwAEAAQAAgAAAP///f/8//z//f/9/wEAAgAEAAUAAwACAAAA//8AAP//AQABAAAA/v/+//3//f/+////AQABAAEAAgABAAEAAAD///7//P/9/////v8AAAEAAgAEAAQABAADAAAA///9//3//P/+////AQAEAAQABQAEAAMAAgACAAMABQADAAIAAQD/////AQACAAQABAAFAAUAAgAEAAUAAwADAAMAAAD+//z/+//+//7//f/+//7/AAADAAYACAAIAAcABAAAAPz/+v/5//v//f///wAAAQABAAAAAAD+//v/9//2//X/9//6/wEABwALAA4ADQAIAAIA+//3//T/9f/1//b/+v/9/wAAAQACAAEAAAD+//v/+v/6//7/BQAIAAsACwAJAAYAAwADAAQABAAEAAMAAQD///7//v/9//7//v8AAAMABgAIAAsADAAKAAoABwAEAAAA/f/9//v//P///wEAAwADAAIAAAD9//r/9v/0//b/+/8DAAoADwANAAgAAgD8//j/9//5//r/+v/6//r/+v/9/wAAAAAAAAEAAAD9//3//v///wEAAgABAP///P/6//n/+v/8/wEABgAJAAwADQALAAgABAABAP3//f/9/wEAAwAEAAUAAwABAPz/+f/2//T/8v/0//n/AgAKABEAFQAVABAACAAAAPj/9f/1//b/+//9/wAAAAAAAP///f/8//n/+P/3//j//v8FAA0AFgAYABQADQACAPf/8P/t/+//9P/6//7/AQABAAIAAgADAAIAAAD9//v/+/8AAAYACwAPAA4ACgAEAP7/+f/4//f/+P/4//n/+P/5//v//P/+/wEAAQABAAAAAAAAAAIABwAJAAoABwACAP7//f8AAAQABwAJAAYA/f/3//T/9v/7/wAAAgACAP///v/9//z//f/9//v/9//2//b/+f/+/wQACQAKAAgABAAAAP3/+//6//v//f/9/wAABAAGAAsADAAJAAUA///5//b/9v/5//z/AQADAAUABAAFAAQAAwACAAEAAAD+//7///8CAAUABQAEAAAA/f/4//f/+f/8////AQACAAQABAAEAAQAAwACAP///f/8//z//f/+////AAAAAAEAAQACAAEAAQAAAP7//f/9//3//P/+/wAAAAD///3//f/9//3//v///wIAAgADAAQABQAEAAQAAwACAP7//v/+//7///8AAAIABAAEAAMAAwABAAAA/v/+//////8AAAEAAAD/////AAABAAEAAgACAAEA//8AAAAAAQABAP///v/9//z//f/+/wAAAQADAAEAAgABAAMABAAFAAUABAACAAAA/v8AAAAA///+//3//P/8//3//f/+/wAAAAABAAIABAADAAQABAADAAMAAAD9//z/+//7//3///////7//f/9//7//v8BAAIAAwACAAEAAAABAAQABgAKAAoACQAGAAMAAQD//////f/6//n/+P/4//v//v8BAAQABgAFAAMAAQD+//v/+f/5//n//P/+/wEAAwAFAAUABQADAAIA///+//7//v8AAAIABQAHAAgABwAGAAIAAAD9//v/+P/4//r/+//8/wAAAgACAAMAAwADAAIAAQACAAEAAQABAAMAAQABAAAA///+//7//v/+/wAA//8AAAIABAAEAAYABwAIAAcAAwAAAP7//P/8//3//f////7//v////7/////////AAD//wAA///+/////////wAA//8AAP/////+/////v/9//3//f/+/wEAAwADAAYABQAEAAQAAwABAAAAAAD//wAAAQACAAIAAgD//////v/+//z//f/8//z//v/+/wAAAQADAAQABQAFAAUAAgAAAP3//f/7//v//f/9/////////wAAAQACAAIAAwACAAEAAQAAAAAAAQABAAAAAAD+//z/+v/7//z//f8AAAIAAgADAAUAAwABAAAA/v/9//7//f/+//3///8AAAAAAQABAAAAAQD///7///8AAAEAAwADAAMAAgACAAEAAgAAAAEA///9//z//P/8//3///8CAAMAAwADAAIAAgABAAAAAAAAAP///f/8//z/+//8//3//////////f/9//7/AAACAAQABAAEAAUAAwACAAIAAQAAAP///P/7//z/+//8//z//f///wAAAgACAAMAAwADAAMAAQD//////v/+//7//v/+/////v/9//3//P/8//7//v8AAAEAAgAEAAYABwAGAAUABAABAP7//P/8//3///8BAAMABAACAAMAAQABAAEAAAAAAP/////+/wIAAwAEAAUAAgD+//z/+v/6//3//f///wAAAAAAAAAAAQACAAMAAwACAP7//P/6//r/+v/7//z//P/6//r/+v/8//7/AQADAAQAAwADAAIAAAABAAIAAQACAAEAAAABAAEAAgADAAIAAgAAAP////8AAAAAAQABAAEAAQABAAEAAQADAAIAAAD+//3//f/+/wEAAwAEAAcABwAFAAQAAgACAAIAAQD///z//f/+////AAABAAIAAgACAAAA///+//7//f/9//7//f/+////AQADAAQAAwAAAP///P/7//z//f///wAAAQABAAIAAwADAAQABAADAAEAAAAAAAEAAAD///7//P/7//z//P/+/wEAAQABAAIAAQABAAMABQAEAAMAAQAAAP///v////7//v/+//z//P/+/wAAAgAFAAcABwAFAAIAAQAAAP7//f/8//3//f/8//3///8AAP///v/9//7//f/+//7/AAACAAQABQAEAAQAAgAAAP///v/+//3//f/9//7/AAACAAEAAgAAAP7//P/7//3/AAADAAUABQAGAAUAAQABAAAA/v/9//3////+/wEABAAFAAUABgAFAAMAAgD//////f/+//7//////wAAAgACAAMAAQAAAP//AAABAAEAAQABAAIAAQABAAAAAAACAAEAAQAAAP///f/+//////8AAP/////9//z//f8AAAAAAQADAAMAAgD///7//f/9//7//f/+//7//v/+////AAACAAQABAAEAAMAAwAEAAMABAADAAEAAQD///z/+//6//v//P/+////AAABAAEAAQABAP//AAABAAEA/////////v/+//7//v/9//3//P/6//z//f8AAAMAAgABAAMAAwACAAEA//8AAP///v/8//z//v8AAAAA/v/9//z//f///wEAAgAEAAYABwAFAAEA///+////AAABAAEAAQABAAIAAAACAAIAAQACAAEAAQACAAUABwAJAAkABwAEAAIAAAAAAP///f/8//z/+//9//7/AQADAAQAAwACAP///v/9//7////+/wAAAAD+//7//f/7//n/+P/3//f/+f/8/wAAAwAFAAcABwAFAAEA///+//7//v/+//7//f/9//////8AAAEAAwAAAP7//f/7//7//v8BAAIABAAFAAMAAgABAAEAAQAAAP/////9//z//P/9/wAAAwAGAAcABwAJAAgABwAHAAUAAwAAAP///f/9//7/AAABAAEA/v/8//n/9v/0//b/+v/9/wMABwAJAAsACwAKAAgAAwAAAPz/+P/1//b/+f/7//3/AAABAAEAAAD///7///8BAAIAAwAGAAYABgAHAAcABgAEAAAA/v/8//v/+//8//7/AAADAAMAAQABAAAAAAAAAAIAAgABAAIAAgACAAMAAwACAAEAAQAAAP7///8AAAAAAQACAAAAAAABAAEAAgABAAEAAQABAAEAAAD//////v/+//3//f/+//7//f/+/////v8AAAAA/////////v/+/////////wIAAwAEAAQABQAEAAMAAgACAAIAAgABAAEAAAABAAEAAgABAAIAAAD//////v//////AAACAAEAAgACAAIAAgABAAEAAQABAAEAAQAAAAEAAQABAAEAAQABAAEAAQAAAAEAAQAAAP///v8BAAAAAAAAAAIAAQABAAAA///9//7///8AAP//AAACAAMABAADAAEA///9//z//f/9//v//f///wEAAgAEAAUABwAHAAQAAgD///7//f/9//3/AQADAAMABgAJAAoACgAHAAUAAwAAAAAA///+/wAAAAD////////+//7//P/9//3//f/+//z//v///wIAAgACAAQABAAEAAIAAgABAP7//f/8//z//f/9//7/AAAAAAMAAgACAAEAAQAAAAEAAQABAAEAAAAAAP/////+//7//v/+//7//////wAAAQADAAIAAQABAAAAAAAAAAAAAAABAAIABAAFAAcABgAGAAUABAADAAIAAQAAAP7//v8AAP//AQACAAEAAQAAAP///P/+//3//v/+//7/AAD//////////wAAAAD+//7/AAAAAAAAAQADAAIAAQAAAP//////////AAAAAAAAAQAAAAAAAAAAAAEAAAD//wAAAAAAAP///v/+//////8AAAAAAQACAAIAAgACAAEAAQAAAP//AAD//wAAAgACAAMAAgABAAIAAQABAAAAAAD+/wAAAAAAAAAAAQACAAMAAQAAAAAA//////////8AAP7//v/8//3//f/9//3//P/8//v/+v/7//v//P/8//3//v/+/wAAAAADAAUABgAGAAUABAACAAMAAgD//////v/9//v//P/7//7//////wEAAQABAAEAAgACAAIAAQABAAEA/////////////wEAAgAAAAAAAAABAAEAAQABAAAA//////7/AAABAAEAAQABAAIAAAD//////v/9//3/+//7//z//f/+//7////+/////v/9//3//f/9//z//f/+/////v///////////wAAAAAAAAAAAQABAAEAAQD//wAAAAAAAAEAAAABAAEAAQABAAAAAgAAAAAA//////7////+//////8AAAEAAAAAAAAAAAD+/////v/+///////+//7///////////8AAAAAAAD//wAA/v/////////+//3//v/+//3//v/+//7/////////AAD//////v////7//v/+//3//v/9//3//f/9////AAD/////AAAAAP//AAAAAAAAAAAAAAAAAAACAAEAAQABAAEAAgABAAAAAQD//wAA//////////////////8AAAAA/v/+//7//v////7//v////3//v/+//7/AAD///7///8AAP//AQAAAAEAAAD//wAAAAD////////+//7//////////v///////v///////////wAAAQACAAEAAAABAAAAAQABAAAAAQABAAEAAQAAAAEAAwACAAIAAgABAAEAAQACAAEAAQABAAIAAQAAAP7////+/////v/9//7///////7//////wAAAAAAAP////////7//v///wAAAAAAAAAAAAD///7///////7//////wEA///////////+/wAAAAAAAP//AAABAAEAAQADAAIAAwADAAEA////////AAD+////AAD///////8AAAEAAAABAAIAAQABAAIAAQAAAAEAAAD//wAAAAAAAP//AQAAAAAAAAAAAP///f/+//7//v8AAP//AAD//wAA//8AAP///v////7//v///wAAAAABAAEAAgADAAEAAQACAAEAAwACAAIAAgABAAIAAQAAAAAA//8AAAAAAAAAAAAAAQABAAIAAQAAAAAAAAABAAEAAQABAAIAAQABAAEAAAAAAP///v////7///8AAAAAAQACAAIABAADAAQABAADAAIAAQD///7//f/9//3////+////AQABAAIAAQABAP////8AAP///v//////////////AQD+//7//v////7//v8AAAAAAAABAAEAAgABAAEAAgABAAAAAQAAAAAAAAD//wAAAQABAAAAAAAAAAAA/v////7/////////AAABAAIAAgADAAEAAgABAAIAAgAAAAAAAAABAAIAAgACAAIAAgABAAEAAAD/////AAAAAAAAAAABAAIAAgABAAEA/////////v////7/////////AQAAAAEAAQAAAP//AAAAAAAAAAABAAAAAAD+////AQAAAAEAAAAAAAEAAQABAAEAAQACAAMAAgACAAEAAwACAAEAAAABAAEAAQAAAAAAAQABAAAAAQAAAP/////+//3///8AAP//AAABAAIAAQACAAEAAQABAAIAAAABAAEAAQAAAAAAAAD//wAAAAD//wAAAAABAAEAAAD/////AAAAAP//AAD//wAAAAAAAAAAAAAAAAEAAAABAAAAAAABAAIAAgACAAMAAgACAAEAAQACAAEAAAAAAAAAAQAAAAEAAAAAAAEAAAAAAAAA//8AAP///////wEAAgAAAAEAAQABAAEAAAAAAAEAAgABAAIAAgACAAIAAgACAAMAAQAAAAEAAAAAAAAAAQABAAEAAQAAAAAA/////wAAAAAAAAAAAAD///////////7//v///////////wEAAAABAAEAAAABAAIAAgACAAEAAQACAAEAAgACAAMAAwADAAQABAADAAIAAgACAAMAAgABAAIAAgACAAIAAgACAAIAAgABAAEAAQACAAEAAAAAAAAAAQABAAAAAAABAAEAAAABAAEAAQABAAAAAQACAAIAAwAEAAMAAgACAAEAAAAAAP7/AAAAAAAAAQAAAAIAAgABAAIAAAAAAAEA//8AAP////8AAAAAAQAAAAEAAQABAAAAAQAAAAEA////////AAAAAAAAAAAAAAEAAQD//wAA/////////v///wAA//8AAP7////+/wAA/////////////wAA//8AAAAAAAAAAAEAAQAAAAAAAAAAAP///v/+//7/AAAAAAEAAgAEAAQABQAEAAMAAQAAAP7//f/+//3///8AAAIABAAFAAQABAAEAAIAAAAAAAAA///+/wAA//8AAAEAAwABAAIAAgABAAEAAAAAAP///////wIAAQABAAIAAgADAAMAAQABAP//AAD//////v/9//7//v/+//3//f/9//7//v////7////+///////+//7//f/9//7//v/+/////////wAA//////7////+//////8AAAEAAQABAAAAAQAAAAAAAAD//wAA///+/////v///wAA//8AAAAA///+/wAA/v////7///8AAAAAAAAAAAEAAQABAAEAAAAAAAAA/////wAAAAD+///////+////AAD//////////wAA//8AAP7/AAAAAP7/AAAAAP///v////7//f/8//z/+//8//z//f/9/////v8AAAAAAAAAAAAA//8AAAAAAQABAAEAAgABAAEAAwADAAIAAgACAAEAAAAAAAEAAQABAAAAAAACAAEAAgACAAIAAgABAAEAAAAAAP7////+//7///////7//////wAAAAD////////+/////////////////////////wAA/v/9//3//v/9//7////+//7////////////+//3////+//3///////7/////////AAD//wEAAAAAAAAAAQABAAEAAQADAAIAAQACAAEAAgACAAIAAQABAAEAAgACAAIAAgADAAMAAwADAAEAAQABAAAAAAD+////////////AAD///7//f///////////wAAAAAAAAEAAQABAAAA//8AAAAAAAAAAAAA//////7//v/9//z//f/8//z//v/+/////v///wAA//8AAAAAAAAAAP//AAABAP////8AAP//AAAAAAEAAAABAAEAAgADAAIAAwAEAAMABAAEAAQABAADAAMAAgAEAAIAAwADAAIAAgABAAAAAAAAAAAAAAABAAIAAAAAAAEAAAD//wEA////////AAABAAAAAQAAAP//AAD//////v/+//7//v////7//v8AAAAAAAACAAIAAgACAAEAAgACAAAAAQABAAAA//8AAAEA//8AAP//AAAAAP//AAD//////v8AAAAA/////wAAAAABAAEAAQABAAEAAQD//wEAAQAAAP//AAD///7/AAAAAAEAAQAAAAEAAQAAAP//AAD///7//v8AAAAA//8AAAAA///+///////+/////////////v//////AAD//////////wAAAAAAAAEAAwABAAIAAQAAAAEAAgACAAIAAgACAAIAAwAEAAIAAgACAAIAAwABAAMAAwADAAMAAgABAAIAAQABAAEAAQABAAIAAgACAAIAAAD//wAA/v////7////+//3///8AAAAAAAAAAAAAAAD//wAAAAAAAP//AAAAAAAAAAD///////////7//v////7////9/////////////f//////AAD//wAAAQABAAEAAQABAAAAAAD/////AAAAAAEAAAABAAEAAQACAAIAAgABAAAAAQABAAEAAgACAAIAAgABAAEAAQAAAAAAAAAAAAAAAAD///7//////////v//////AAAAAAEAAAABAAEAAAD+//////8AAP7///8AAAAAAAABAAEAAAAAAP7///8AAAAAAAD/////AAABAAIAAwACAAIAAQACAAIAAQABAAEAAQADAAEAAwACAAEAAQACAAEAAgABAAEAAgACAAIAAwAEAAMAAgABAAEAAQABAAAAAAACAAEAAAD//////////wAA//////7//v/9//3////+/////f/+//3//v/+/////v////////////3//f/9//7//v8AAAAAAQABAAEAAAD//wAAAAD///7///////////8AAAIAAQAAAAAA///+/wAAAAD/////AAD///7/AAAAAP//AAABAAIAAgACAAEAAQACAAEAAgACAAIAAgACAAIAAwADAAQABQAEAAIAAQABAAAAAAAAAAAA//8AAAAAAgAAAAEAAgABAAEAAQABAAEA///////////+/////v////7//v/+//7//v/+/////////wAAAAD///////////////8AAAAAAAABAAEAAQAAAP////////7//v/9//3//f/9//7//f/+//3//f/9//7/AAAAAP////8AAP///////////v/9//3//v/9//7////+/wAAAAAAAP//AAD+////AAAAAP////8BAAAAAAAAAP/////+//7//f/9//z//P/9//3//P/8//7//v/9/////////wAAAAAAAAAAAQABAAIAAgACAAEAAgADAAIAAgACAAAAAgACAAEAAQABAAEAAAAAAAEAAgACAAIAAQACAAIAAQABAAAAAAAAAP7/////////AAD//////////////v///////v/+/////////wAAAAD//wAA//8AAAEAAAD//wAA///+//3//f/9//3//P/8//z//f/8//z//f/9//3//P/8//z/+//9//z//f/9//7//f/+//7//v/9//7//v/+//////8AAAEAAQABAAIAAQABAAEAAQABAAEAAQABAAEAAQABAAAAAQAAAAAA/////wAAAAD/////AAABAAAA////////AAABAAAAAQACAAEAAQAAAAAAAAD///7/////////AAD//wAAAAAAAAAAAAAAAAEAAAABAAIAAgACAAIAAgACAAIAAgABAAEAAQABAAEAAAAAAP//AQD/////AAD//wEAAgAAAAEAAQABAAIAAgABAAEAAQAAAAAA//8BAAEA/////wAAAAAAAAIAAQAAAAAA///+//7//v/9//7//v///wAAAAAAAAAAAAAAAP7//v/8//v/+//7//v/+//7//3///8BAAIAAQACAAEAAgADAAIAAAABAAIAAgABAAIABAADAAMAAwAAAAAAAAAAAP/////+////AAAAAAEAAAABAAEAAgACAAEAAgABAAAAAQABAAIAAQABAP////8AAP///f/+//////8AAAEAAwADAAMAAgADAAIAAQABAAEAAgACAAIAAQADAAMAAwACAAEA////////AAD//wAAAQACAAIAAQADAAMAAwADAAQAAgACAAIAAQAAAAEAAQACAAEAAQABAAEAAAD/////AAABAP//AgACAAMABAAEAAQAAgABAP/////+//z//f///wAAAQABAAEA///9//v/+f/3//T/9P/0//X/9v/5//7/AQAEAAcACAAJAAkACAAHAAQABAACAP///v/+////AAABAP//AAAAAP///f/8//v/+v/5//n/+P/5//z///8AAAIABgAJAAsACQAIAAcABQADAAAA/v/8//v//f/8//3//v8CAAQABgAGAAYACAAIAAgABgAFAAUABQAEAAIAAQACAAEAAQAAAP/////+//z/+//6//r/+v/8//3//v8AAAEAAgADAAIAAwAEAAYABAADAAIAAgADAAEA/f/+/wAA/v/+/wEAAAAEAA0ADQAMAAsADQARAA0ABQAAAP7//v/6//X/9f/y//X//f/4//L/6//k/93/3f/d/9v/2//d/+L/4P/l//b/BwANABkAKgAeABYAHgAaAAsABwACAPz/BgD+//7/CgAEAAMAGwAQACwADQFfAWMAjP/1/5IAIgAx/wX/xf/V/+j+o/5q//f/jf9E/+v/oQCFAPP//v+EAI0AHQAAAFYAqQCJAFQAXQCAAF8ALgAJANz/8v/X/4H/ev8SAA4A6f/t/xEAYgDm/5z/IgCHALr/R/8BAFMA7f98/9T/lABjAKX/x/+MAEoAsv/f/xkABwDV/6v/qv/V/8b/3//x/8z/zP8UAFcA5//2/0MAkgBvAG7//f/pAJUAk/9+/68AzQDy/3D/0P+1ACIAJf+A/ywAGQBi/3b/BAAYAKb/d//S/ysAUgAjANH/6f+NAG8Amf/h/zUAQQBJAKv/yP8gAEkAQAC1/7n/ngCvALz/g/9OANwAKACR//j/kgAqAK//zf8wAGgAfv+O//X/+P8nAPP/lP9u/ywAAgCw/0H/YP+hACYASv9N/0UArwAUAIf/6v/vALEA9P+D/4MAKQHG/57/QAACACMAuf9s/wIACACy//L/2f8UAHUA3/+w/wgAmABwAOr/j/9QAI8Abv+z/ywAEABTAOD/jf8pAC4AJgAtAJr/5f82AL//9//i/63/CADd/wEAYgAaAA0AIQBGAGgAEgDi/xcA6f+z/97/MQDD/7P/DgALAO7/cf8oAHcAKQACAOX/8P8TAD4Auv+K/zMAPgDb/5v/xf9BABUA7//W/wYA+P8SAAIAzf/+//X/+v8vADUAyf9PAFYA8//I/+T/kQAnADv/7P/bAL3/gf/6/54A1wC3/2b/yf/XAPH/B//m/z4AZwBg/53/HQDS/xMA9v99ADQAxv+8/zUAQQDi/14ACQBrAOr/Sf8vACwACAD8//H/9//t/9//pf9eAK4AtP+w/8z/YgB4ACr/X/95AJ8ATv8N/5MA9QBT/zD/tQC0AEsAOv91/yIBrAAy/0b/tQC0AGH/BP8TAAMB6P/y/j0AzAAQAET/l//cAHMAn//1/7gAjQB0/8L/igCqANT/tf4bAIYAvv8c/yT/ygCfACj/H/98ANQAwf+K/5cA3AAFAEz/6v/iAFkAq/8AAGMAzf+K/5n/Uf9VACoA///Y/yb/SACwABsAn/9eAJAAKgAPAJL/EgBXACQAJADN/97/mf+C/6H/vf9XAN//W/8AAEgAJQDc/9r/VwDQAH4Ajf9LADsAPwDP/+j/zQDG/+z/m/+x/wkAvP/K/7X/OQADAHz/EAB1AEIAof8HAA8BoQDN/7j/eQDAAPj/3v8rAAwA6P9t/9H/w/+P/7j/Yf8sAAAARP9o/9n/IgA3AP3/BgDVAB8AIAA+AFQAjwAQAN0AFABs/0v//P9mAHH/gf+m/zkAAgBi/+D/BQC0/z4AUgBNALj/pf8eACEAbQDL//T/YwBNANj/nf8TAFMAOgDp/+f/HwDE/4X/IgCJAAMATf96AJ8AwP+U/4j/0ADJAPD/1/8JAHAAOAB5AE8AHgDWAKr/WP8zAPb/Qf8t/+z/sf+3/x3/XP+cACcABwBUAG8A1v+E/wgAPgBNAN3/1f/3//v/OQAuAIUAdABCACUAmv/N/8X/sf9EACAAuv+4/6T/Y//M/2QAlQAWADX/BABvAFv/cf+JALoA4v/H/5UAOgB0/2r/hwAoASgAVf/p/5kAn//P/38AkAAkAOP/IwAlAAoAOP/R/9YAQQCZ/9z/AgBW/63/XgBTACsAu/+f/wQAAABI/6v/rgAnAJ7/2f/P/8T/5f9OALAA4f/C/x8Aqf9X/+L/5ABWAJv/dv8wAB8AgP9XAI8AfgAmAPb/OwAKAFn/vf/LADgAuv+E//v/MAAW/73/8QCDAIX/1f+wACwAj//W/zgAzABoAK//LwB8AMP/g/+eAPEATABz/5j/8f+P/1//r/9pAAAAwP8kAAgAi/9z/wwAcQCQAPn/yf9IAIH/tv85ABQAdwAYAMn/s//X/5f/zf9xAGUAUAC1/13/hv9V/5j/EwA2APn/hv/C/9b/zP8zAKgAqQCEAEsAuv+2/0AAXABwAIMAdAALANb/9v8hAIgAKAAtAHYAWADn/5//3v8WAB8AOQAjANj/t/+E/9v/IgASANb/Vf+T/+n/kf9h/yX/cv+7/67/C//O/iP/5P5X/6f/uf9g/wz/SP+7/4T/Qv9//wMAdwC+/5P/zf9rAPoA1QA/AVoB4ADKADYBjQGoAZYBVwGGAZoBUQH1ACMBpQGYATsBAgH1APMApgB+AKQANQBfAF8Ahf8y/7H/xv8H/x7/T//X/vH+g/5O/rD+kP5x/rD+yf7S/aX99/0A/qr9Ov52/hz+WP6//R7+l/4J/k/+Xf8O/5r+DP+4/87/xf9ZAB4B7gHPAZsBNQJsAiICTAIPA/kCBgIhAqMCgwLJAcEBYQLsAQIBpgDWAJsASADAAFABFgEtABQAcwDuAMgA1QAIAdwAOQHAALoA2QD/ABAB2QDxACoAYP9u/+n+Pf5M/sv9Jv07/Q/9afwJ/Of7rPu++xX80vt9+4n7oPvQ+2/89Py3/U7+Yf6Q/gH/vf9bAEoBrAFeAqcCVAKDAmAC1gLiAvQCwAPAAyQDdgIlAgsCSwEyAcQBdgFAATMBGwGcAF4AogAUAcMBgAHTASkCAQKfAboBvAJzAngChAJDAtoBMgFRASsBrwBSAIkARQAG/zf+8f3m/WT98Pz8/FX8pvv9+sL68vry+gv77foa++L6TPpY+g37oPta/KD9m/2f/XX+q/5T/zMA0ADRAW0CpwK0AvACRQMIA4sDOgQkBBkEEwRWA/UCCQNcAuoBLQI5AosBNwEjAfoA+wAYAVsBlwHMAZUBlwHiAZgB6wHOAp0CZgJ2Ah4C5AGNASABNwFhAXgAxf91//f+RP7H/dP9xP2Y/ej8o/xf/Ov7nfuF+6T7yfv0++n7rfsY+8T6DPtZ+8v7rPx2/df9kP3j/T7+Wv4s/+L/hQA3AdAB4gG0AQMCVgL8AsUD8QMkBEcE6wN5AzIDIwPmAtoCGgPwAqACFAL9AfQBIAJgAlgC0wL7Au8CIgMHA+cC+gIFAxoDDgOMAt4BrwFhAbkATwA0AI3/AP9l/rb9WP0p/f38qvyh/Av87fvQ+4v7jPtd+1z7YPts+zX7Cvvs+rn61voW+3L78vtc/L78KP2h/Zf96f3b/mb/IwA2AfABEQJ7AvACJgPHA00ErATWBLMEPQT7A/oDTgP9AikDBwP0AukC1gLxAsACvwIpA2EDOwM+A5cDoQPMA2YDVAOxA2ADXAMgA28CtAFfAQ8BegDu/xf/o/5B/oD9Vv0t/dr8wPyx/Hj86vt8+0P7XPs1+yf7XPsR+5/6OPrP+Z/53Ply+vz6S/uQ+7f7Efyn/F39KP4y/+P/mACYAbkB6wFlAkQDBgRRBLYEwQS8BLUEWQTqA8kDxwPWA5QDVANoA08DBAMQA6IDsAODA6wDAQTnA8oDMwRXBFYEHATuA/QDgQPBAoMCdAKaASMB7wDo/wn/mf4T/ob9Lf3d/KX8hvxV/AT85/uj++36vvqu+kj6Hfo4+vT5afnk+D74ZfjX+If5MfrC+nj7ifvx+3P8Rf1T/gP/jgDNARECPgJ8AuACVgM2BOIERgWgBcgFpQUOBXMEagSeBIQEdASEBEIE5APFA3oDegO+A9oD8QMJBEgEEAQMBIQEVgSNBJ4E3QNpA00DrgLgAb0BAgEyAMf/Hf9c/s79n/1C/Rn9cvzO+7H7FPtk+kD6f/oc+tn5n/ko+Qn5zfiQ+E/4Hfjo+Hn5Xvn0+Uz6z/pz+yv8zfx+/X3+CP8hAPkARAH8AQkDtQP+A64E3wTVBF4FmgWdBVoFNAX/BK0EqwRGBHAEigSRBMoEYAQsBPQD/gMtBBwEWQTQBMwEfQS7BGkEjwOYA6YDPgPsApICwAHrACcA9P5F/hr+lv0s/SD9Qvy6+1H7OvpF+kv61fkW+mL6avm8+Lb4JvhG+OX4+PiB+Oj4TvnX+GD5TfrV+qX7H/wK/Nn8sP34/Vz/xAA1AecBKgOCA4kDaQQZBXAFEQa7BrcGgwYDBtAF0gVaBZ8FsAWKBZYFIgWcBDMEFgQjBCgESQQsBFEEtQRSBDwEfwTnA18DkwOFA0ICtAGyATsAkv9R/x/+cv1v/ff8rPyU/Fv79fqI+hz6A/q6+SD6cvlR+Sz6//iY+JP5jfgz+Kn4afis+HX5APo0+mz6sfoQ+3/7nPxw/df9D/8dADQAlgBIAfwB6gLiA5IEagXnBYUFGwaQBhUGVwbWBuAG/wbSBngG/gVeBQYGGwY/BZ0FsgUlBQUFfwRoBM4EQgRVBD4EgAP7AjkCMQKsAVsATgBSAJj+A/61/W78R/zK/Nn70fpk+6v6L/oc+tX5ovkk+gP6w/mP+mX5lfhf+bD5K/lq+bb54Pni+SL6VPow+nH6Fvun+/r7+fwm/eP99f5t/3EAIAHeAe4CIwTFBNYE7gVSBr0GKgcSB5oHLQcrB0UH0QZwBnQGxQYkBgoGDQbgBcMF8gRbBYoF5ATCBMwEDgQZA0gD1AJUAuIBTwGAAB7/d/4q/tv9Dv3H/GP8F/xX+2v6Dfvc+X75Y/qo+nD6U/rB+qX5B/oT+m35cvp++h76wvkQ+if6jvlN+ov65vnj+lj7bPrv+/f8V/y0/Yj+Nv6A/6IAuwApAlMDTAMyBCUFeQUaBrwGJQcVByAHvQe0B0MHPgeCB2QHHAd1B8IGggb/BhwG7gXvBZAF4QTTBHMEHgPOA7ACiwEmAswAb/9s/6L+Ev3G/RX9EPyJ/Cz73voU+zj6E/o3+mD6Bvpq+oz6Bfo++v35A/pA+r/6F/sA+yL7FvsC+m77YPsi+hb8i/sK+zf8jPz0+1T9v/04/RP/4f64/qsA8wDjAHECqQKTAu4DkwRiBHUFwQVhBbAGvAabBkIH8AYPB18HTgezB2wHYgd2B1AGZwb/BaQFbwWLBHQEbwOmAl0CjAFKAFsADQBt/o/+Pv7O/Lr8gfzV++r7pvt4+sb67/pK+qr6r/oO+tX5H/rK+f/5wvmw+lL63Pqd+zL6t/uB+2z7d/zM+3/8h/0v/Tf9Df5t/gb+Sv8q/xP/pACJAIUAlQHFAY0BnAK9AmQDgQO3A8YEDQSUBDMFqQRGBVcFnwWZBooG7gVtBiIGCAXzBY4F+gSKBacEuQQBBLECmgNRAuYBOgLWAHoA5v9y/zX+aP6t/Qf9o/0m/JT83vuU+577k/qL+wv71/qz+jr6kfqs+XH6LPp4+gz7ifry+uv6nfsC+2z8uvxb/FH9Cv4L/ur9q/+r/pr/YADh/xkBBQGdAbIBFwKkAkwC/wI7A24D7AO2A1EESwRiBF0EewTyBE8ECAUsBdQEygTNBCMEaARUBFEEmAS8A/UDHQPjAksCLQLdAeUASQEDARQA2P87/3H+av7W/Vv9bv3//Fn87Pw2/Kn7Lvyw+z37Lvy/+pr6/vtf+mL7Kvtp+1b7HfvT+/b6zfw3/Ib8ev3F/ZD9m/0r/x7+WP9z/3b/uwC2AN4AngHcAeEBpQK4AvkCQwPSA+0CGwTCAyQD5QRuA5cEqQTPAzQEQgTnA/AD5QQfAyAEvQPjAk4EwAKIAxoDqQLkAsQB7AEzAUkBOgDTAMT/Qf/8/+b9K/4h/o79Wv1v/Z/85fxW/MX7Rfz5+6T7uPuq+9D73/t++8n7v/vF+wb8c/zU++X8w/zH/PP9Cv0d/rP+pf26/nj/Pv/j/38ANgCcAJYBSgHfATACngLLApAC3AIWAwIESwOLA2cEJQMQBGcDBgPyAy8D3ANsA08D5AJNA8AC1gKCA8ICvwMPAnUCCAOZATIC1gEoAaoBbgBCABwAaP9h/9D+9v5G/kH++/1H/cX8fP3M/Fz8jP3U+3H8Bv2G+4T8uvsJ/OP8xfvG/I/89ftR/ZP82fyQ/fr9Tf7j/Tr/Dv7w/lv/Y/9BAAwAOwGJAGwBUAFHAXEDwAH5AmEDHQLeA8oC8AIXBGwDtwJrA2cDMQOrAgsDmQKpAvYCBwIcA18CYgKeAe8BNAJ0AWsCegGiAjYDLwFLAR0AOAC9ACkAz/86/y//mv3f/bH9uv1N/WD9uP1L/dP9t/xr/cP8vfxY/kr8Of2F/Qz8Nf2y/IX9rf1W/gn+g/3K/fT8AP7p/cv+r/86/5D/fv89/xUAKQBSAQACpgH7AVsClAG/AUACPAGIA34DeQLaAmwDdALQApMDPQLsA94ClgK4AuYBwgKsAh4C0gE8AqsBMgJ9Ad4AfwEfAZkBFACc/7AARQCu/0H/kP9B/w8Au//7/UT/7v0E/oj9Zf0n/gn98P1g/W/+qf2U/bf90PyF/Rb+mf1o/dH9kvx1/Bz9r/zJ/S3/Tv+9/0L+J//p/+3/YgADAPIARgE3AVcAaABJARUCLwLcAa0C8gE2AlQCewEMAuICSwPNAmgC/AEqAjsBWAHoAbABpAIJAgEBswBtAIsBUQGXASID/AB/AZUAYP8bAVb/qAChAIT/0P9y/mn+zv2G/ij+oP5R/w7+m/5M/fz9Pf22/Ez+wv1q/lL9Vv4P/j/90/5m/Xb+8v7m/sT+M/7H/hf+Jv9c/pf/UP/K/g8Be/4W/0AA9v9OAQABLwGqAd0AXgG5AXcBZAIvA3YDjQJxAZoBQAHdAcMCYQE0AhED4QFvASsB0gG9AUEBIALMAVEBFwGT/1r/Lv8w/+7/bf9C/1X/r/57/iv/d/5A/iX/kP7e/cz9+/0Y/qz9HP6X/lj+u/5d/hv+FP/C/67/Av8N/2z/jv40/0P/d/8mAdUALQGcAEUAFwHu/zgA0QBrAEIBxwDT/+H/7v8/ABMA5/8hAOz/gP8d/hP9W/0D/oT+E/6C/aP9UP5c/uD9I/76/r//8v+j/wf/af7n/hn/z/+JAdACbwPXA/ADLgXrBTEFGAdiBzMH3AZsBXAE2gLrAkQC+QHhAcoAIgCL/o/9wfwi/Cn9U/1m/Bj96fw2/K/8u/yz/FH9s/0A/ZT7xfr5+ur5GPq9+gv7xvqf+SX5e/gt+FH41fiB+Ur7cv0v/3ABBARQBsMIUAv+DBEOCQ+mD7MOiwwICuEG7wMSAd/+vfy3++v6QvmK+GP3Zfeo97H44vqt/JP/TgIgBF0FvAahB14ITgmzCP8HgAbMBOcCoACY/0D9fvtC+mX4APej9GPzA/Sh9KP05PNI82jzMfRj9/v8zQKjCOoMNw/MD2YOwQ5tD5MPEBC+DXkJdgL2+nf0RPA97w/vdvGN8h7yPfKK8QnzRfZA+zkBxgWyCTQLYgysDMQNmRAZEXMRZA/OCx0IawRxAmwAkP8H/8D8fPmE9rH0bfTm9Qv31vfy9z34QPh/9iD2t/ae+l0AjwXWCkEN8Q43D+INhwyVC9cLgwosCMEDDf1+9/Xx7O3n7LvtLPDH8gv1N/cB+ZP6R/1BARIFOAkLDbAOvg67Db8L4AncCAsHhgTXAd/9nvqB+KX3bvh/+Wv6QPsM/KT89f3V/0QCWQTuA90Byvwt+Cb3ivYj94L3Yvog/34AFwIqBCsHKQvhDSkQThH3D6oMwAirA8r++/ox+AH2x/MI86Hy7/KR9Dr32vvm/yYDUAZDCKQKLwyGDGgM2AtgCkgHkAOAANn9dPup+ZD4YPiF+Fv55vkK+wP9t/83AocDxAPIAr0BMP/D+wL43vNZ8ZHwCPGO8Tz0dvm3/jUD0QbSDC0RJRPIFS0WmhRgECQKVwNP/Cr2DfK472juoO8Q8aTyhvX9+Pb+8gToCdUNtA9KEUYRuA/fDcALdQquB8sCzv1D+cr1G/TZ8071Zfc1+bz8SP/6AIED2gUxCKMI3QcpBhcCcf0o+Un1vvHY7djrausY7CvxnPgEAPYGNAw/EFYShxNoE4gSsBFWDgYJawFP+MjvQuoq6PPplO4b84f3QfuJ/roB2wQ+CZEOrxJRFV4VIRL9DIcHEQOz/z38q/l890T1cvTt9Bf3Zfom/+gDnwdxCtwLIgwmCxkJgQZ1A8z/+vp+9o/y9+5w63jnB+dB6SDvSvnYAnsKUQ+lEgYVkRVPFpsVvxO8EPMKZQLf9pTsdeUv4/bliutE8uX39fvs/n8BVwRkCTMQNBUNF30VwRDiCWUCBPzK+Jv3lvbh9W70OvPG8rb0yfkGAN4GwQwZEEgQ0g6NDDMJhgVSAvj/Ofyw92Hzsu5I6wzppOfZ6GvtpPb/At0MMxQcGHoY/xcGF8IVERTcEHIMEAbI+43vluQh3mXdveIu61L0mPwLAhsGyQjJCk8OXxJqFaUWABPlDN0FRv5m+U32sPQ59LnzfvLB8eLycva7/OQD9ArjD3QRDBGxDucKuAZsA2kAt/yK+MPzfe817Grq2+rY653sPvCX9yUCxgyVEwUXRRd/FQYUzRI1ELgMJAndA4X73/CJ5hbgMuAx5uTvdfpPA0IKDA5dDwMQbRBgEpAU3BMKEXALSAIj+Zjyq+4l7e/tZe+f8ETyFPUt+roB7QlhESUViRUwFfARQQwcBwcC6fwq+EnzxO7U6k7oy+ht6/Xug/KV9MT3rvujAiwMOxL5FX4W1xTkEmcP4wvKCIQFbwEY/Kz1+e3552/mnull8Qb74QNnCpUNkA7uDmUOag34DKsLKAloBEP+sviC89rwjvDW8YL0rfcI/CABDAfIDCARUxOUEn8QLg5eC8EG7gC2+oX0tO+w61bpz+lu7O7xnvii/VUAHv+J+zr68/tCAXoIvQxxDuINbAqwBt8DlgKyAxAFhgTXAFD6UPLY7Lrs2PDd+AYBWQeYC5MM1Qv+CXUHvAV4BVkFnwNjAIH7Ffel9JXzdvXk+O38JwEHBS0JjwzCDtMP+A+eDocMTwk3BA3+Z/dZ8grvee1d7vTxyvfO/mYFHgm5CMIEFP6V9ifyRvFq9FT7CgAyAh0DzwHJAisGBArfDrURcxB/C/8C6Ph/8STuj+/a9IT6yP4hAeQBQgGbAJoAoQGTBBEHhQfVBb8BEP1j+gP66ftV/3IC1QTBBS8GIwbtBR0HdwhTCWEIZQVuAZz8G/jB9aD1nfeR+0UA7QTgBzQIRgZQARr62/Kb6wTnN+a156vtyfXB/HsDTwkxD0YWWhwzHRQbYxiIEjQKOf+l9HrtE+l36H3rbe9887f3M/t4/6gE8QizDNcOfg4BDeMJcwWKAeT+qv3j/db97/zm+w37CfwS/z4CggV7B2sGNAQaAfr9ufxg/M797wD0A8MG+Af0BngEqgAM/Gn3GPOD7p/of+SX4zzlC+zt9ckAGwzlFdodMyHQH/Mckhj9Ex0OgQUI/PbxMulc44Xhz+Mv6bjwjPmUAs0Jcg7PED8RrRDXD5oNTwnvA7n+pvrx+IL44vcC+Kv4T/oa/UEAlwP4BQgHnQcsB2AF9QM6A1gD0wQsBuMGhQaOBLAAzvtm9+jySO+t7Dzqv+dI5fLlLumW7vj3XwPKDr0Y8B8hI4cijh95Gt0TOgxMA7X6pfJH67HlteKP47Xmieys9IL9bwZmDX8SKBV7FFoSng7OCRUGugKJAJf/EP74+4D5Zfcl9rv2Ovmb/LX/uQGaAygFSgZeB1wIoglCC6oM0QyMChMFvv1v9hjwIuzl6hDrdetO64nppujR6jHuffTq/mkKyxW0HtshpSCVHB8Wxw/tCuEFrwC5+n7zdexC5gzjleMs6GPw9PrGBbENExLCEwgTixDCDbcKdwjYBtIEaAKd/kr65PYt9ZD0pPQs9mX4aPun/8MD2we4C/UNuw8DEdkQew8cDHIG5f8O+fXyue5X7Ezs5+2g7ybw8O/L7ovuPPHI9KH6XQInCpMRnRZgGGwXXBW3EmkQeA0oCC4CHvw19ZTuHOkQ5rnm8upd8gT63wBPBkQK0Q0tD64OYA1IDGgLeQkTB7oDa//J++D4L/YW9L7yz/Pl9zb8yABQBfgI1wzND38RtxHtD5IMhQhQBBH/D/pj9jb0f/PT8oHyRPLI8Fvv5O2w7DvupvAO9Tv8vgLxCdAQmhR0F5wY6hbsFEIRygvPBY/+q/ci8nLt/+mM6F/pA+3P8lX5QACABlkLrQ/rEVIRARAHDpwL3QhbBUcBQv2w+Tv2yPMQ8irxoPIA9iv7aAEeB8wMCxEcE6gTMBIFEAoNawl6BS8B9PwD+H3zqu8h7Gzqkems6DHpwekd7MfxY/fr/UUFJwwPExgZdxyaHKsa/BWlEF4LhgSk/dD2r+9V6jnmu+Pr5Iro1O+x+ZQDQwwfEvcVOBeGFVcSdQ6yCpwGIwK8/c74zfQN8YXtFuxC7ALvZvQY/O0ERQxcEpoWIRh7F6QVvhNJESwN4wdYAZn51/EL6yDmQONd42Dm0uoU72jyNfQq9zz8/gD1Bo8NKBNhFwUZAhdMFOgQ/AtoCO4DeP7p+BjzTu5D6u7neOhe7ALzFPvHAzQLfhATFHIVERTLDyMKhwU2Aor/Bv33+br2GvS48SbwPvDd8fj1ofyZBNELaxE2FaEW4RaRFbYSZQ97CpcEbv4K9zLvm+jn5Jjk4ecT7ZfyyPfv+vz80vwd/BP+cgBJBFcJsgwUD74PSw2/CkcIQQUXBGUDdQFz/j364PTu8A3vAe9/8+H5tADpBw0MnQ3dDA0KPwe4BLAD4gP5A7cDMQJm/2L7Lffc87TxQPLh9BL5MP+hBaYL2A8kEvsSRRKIEZwPVwzsB10BSvqT84jtgult6BXqE+5J87P3U/oT+3n6QPm2+UL8vf/yBBQKwQzMDWkMXAnqB/oGcwcaCe8IhgbXAU/7bvQi8B/vKvHB9e36Xv8xAgEDSQMhBOEFvQiMC5kNkw27C4sIOQSl/8b65fb588nxJvH78Wr0XPmMALsHlg3+EREUJBQNE2IQHw3hCL4Dn/5K+K3xkuzJ6ffpPux77xDzWPU19lz2DvZr93D7XgHZCK8OvhCAEHAODgzICnIJaggLCNcFaQFI+5H0lO/q7ZjvUPP890X8LABQA5EFyAekCYkLtQxcDE0LmwgYBWgCJwCl/YD6Iffz84/xFvFB8+n3B/84B5YNLxE/EvEQVw9LDvwMyAu9CewFeAD9+KHwl+rl52DoMOvG7hbyBvQE9YT1i/bd+Yb/YwexD0oU0BT4EU4MTgcJBMsBTQGUAZkASv5N+m71n/Ie80D2E/v7/20DggUjBlcFFwQhAzkDsANUBHwEUgO/ArwCIANOAyYCPACT/fn6MPmV+M/5Ff2aAZQErQW3BV0FGwdHCn8N+w+PDxMMOgay/fX0we7B6wjsj+0Y7wrw/O9/8GHyJPUn+m4BfQmbEMITVBIeDhwJNQWZAyUDHQRSBbsE4AKp/vf5NPdN9pn3+flP/P39Nv6R/Wz8kfsd/Bb+XAG2BJYHJAoMDOsMFQ2QCzAIuwMd/r/4Y/Ti8d3xhvSD+J37s/7IAZoFOAu0EDUVohd8FvsRHgrX//L1yO6t6lHpa+nd6bDqFeuB7Drv6POa+0IENw1fE+kUzRJlDioK1gcdB5QHYwlsCQkHZALl+7z21/OO8xj1VfcN+SP5FvlX+dH52vso/y8DPQcpCg4MQA1FDj8PTA81DdwI1QLI+2n1xfBn7hPvnPLG9kn6mP3WANEFWwyUEoUXShk1Fz0RCQgK/m71j+987E3rw+op6svpCequ68/uF/QI++MCjwqDD18RCRAVDe4KawrRChMMOg1EDAkJdQNC/dz3q/QL9Ij0EfZx99j3AfjP9wD4b/lZ/IIAigQ9CHcLBQ6UECESKBJGEAAMPgZV/7T4h/OK8IHwN/J39Lb2HPmQ/IYBdgdzDWMS6hRGFBQQbwnAAf36aPbz8tPw0e6y7Dbrpupy64DtvfG/99r+ewUVChwMIQy/C+ALlQxXDakOiA9oDkQLagbMADX8YfkH+Lf3tvc699f1W/Qu8+ryF/Sz9pz6Jf8KBPgIjg2FET0UJBXGE2kQTAvtBCP/Avpf9r305vPP8wX0PfXg91z8VAKCCOMNqBBREB8NBQhtArr9xPnm9tz0hvKG8G7uDO3+7Pnu6/Ld9+P9bgMWB4kIkQgzCGMIOwl0CtoLYAzZCwgKCAedAxwB3P/l/pH+Cv71+1D5p/YB9FXyJ/Ke81T26vlm/g0DqQcMDFsPJxH8EBkPrAt+B7QDMwD1/aT8fPts+nL5W/lo+jv9PwFxBWkIWQkLCGgEOwCG/KD5YPc99q31CPW59HX0yPSP9Yf3rfoR/oYBVARJBREEUQKqAAsAqwArAokEOAYQBz0HWgYzBcIEqATjBEIFXAREAlf///sY+cf2ivWM9X32zfjF+5L+qwGCBBEH9whfCaEIZwYoBGwC1gCqANwA+wDmAFcADQA7AKkBFgRxBqYHEAeaBNEAv/yR+aL3tPY390/4Ufkh+nb6x/qY+9D8J/4X/1r/TP9z/hf90PsR+5b79fwc/3MBLwOLBMAFkwZ/BiQGAQbeBVMF9AMNAvP/0f3s+2n6Vfm7+Df5ufqN/K7+6QDvArAEkgWFBdoEjAOkAhIC4wF7AtAC8ALQAkoCtwF/AQQCCQPSA+cD0QKGAMj9Ovut+RD5mfki+2L9Of/4/7AA6AC9AMQA7QDxAI8AHP8Q/fv6Rfli+Hr4qvk6+xD9iv4GAKEBoQKEA4YEYgXcBaoFrgQ9A4IBHgDc/mL9PPzo+gv6PPrR+vT7Xv0f/8gAkQEBAgQC6AG1AtIDRwWpBmwH4gdMBxoGvQR+A90CeQKHAQoACf7b+2D6f/l3+V360/v7/R4AtgEbA9wDaATYBMQEHQTnApsBBwAG/sr7MfpZ+f74UfkZ+gX7vvsV/aj+3v8RASMCQANRBOcEvgT8A9gCkQHb/xn+jPxI+7X6evp++sH6Vfsy/Nz8Lv1r/Xn9Gv58//sA/gL7BIcGlge6B0oHkwbaBWEF6QQHBHgCcQBm/rH8pPt7++b77PyE/lEAygHcAqIDOQTIBBAFDQWHBIkDRgK1APX+jf2k/DD8afzf/Ar9Jf12/Zb9sv0W/nD+5v5Q/4b/p/+5/00A1ABFAZUBKgGtABoAlP9K/8b+LP6H/Tf84fqk+ZL4jvhk+ez68Pwj/zMB9gJKBD8FIgbkBjMH9QbhBfsD3wHY/1T+bP0C/Rn9t/3G/vD/AwEOAvcCeAPLA6MD/gIXAjgByQDeAE8BxQGqAnMDFAQkBIgD3QIAAiAB4P+D/gr9svuu+uT5tflJ+p/7Rf3//nIAbAFgAgUDdAOXAxkDVQIHAR7/1Pxs+nv4iveJ9wD48vg++pH71Pwe/nL/1wBVAnkDGgQXBJ4DAwOYAmYCQwI1AkMCcwKhAtYCggL9AewA4/+k/iz9VPyP+6v7Qfz4/QQAbAIjBT8HLwkXCl0K+AkLCXQHKgVjAiP/OvwU+jT4dffN9xD5DPvC/Nz+iQAsAgMEOgUwBmsGJgb4BBsDBAFh/jr8Uvom+YX4EfgW+AX4Wfj9+Hj5k/r5+zz9Yf48/2IALAFTAkED9wOSBKoE3wT/A0MDgQI7AUQAwv5Q/dr7bfrv+bT5rfqT/NT+lAFLBLYGcQjKCW8Kugo0CpsIkAYXBGcAg/2D+3r5dflm+cH6Lvx2/ST/fwC5Ah8E6AX+BmUHBAfQBU8EMgIMAGP+yfwx/BX7Qvo0+kP5k/l8+Q36Cfun+w/8Xv0U/mj+ov86AAcBhwEmAqwBHwJdArIBBgKGAN3/R/6F/GH71/nl+YH5UvtA/B/+mgAnArYERwY8CMwIWQm+CCcH4AU7A3YAZP6u/H37I/sM+5378fx8/rb/NgHaAo0EpwYqB6IHFQiGBqcFBAS3AWcAw/7X/Rj8WfsE+176pPqg+rT7+/u//GL9nv0R/nT+7v6h/zYAdQA9AbcA+wHHAVABPgESABv/mfzV+575sfho+HP4bPon+/H99//hAUQEAgaeB9QH7QgnB58FfQRPAc7/y/wV/AX7Jfp2+tH6Wfz1/Pf/dgHGAzEGYQelCF0INgjFB/YFDARNA44Awv9j/tz8PPxy+y78kPsX/Un9yf0u//T+eP+v/3D/IwAPAVkAggHiAT0BsgF1AaMAi/9X/yH92/vC+3j51Pkx+XP5Mfu2+2n+xv+yAZ0D0wSBBbYFuQVXBKoDtQG9/4j+x/xl+9D6TfpP+in8YvyT/iMAuQFFBLYDDQfuBW0G1gatBIEFfgJLAgUB9P8//3L+AP8i/iP+w/6B/xn/+v/X/9L/Z/+X/qIAo/9KAEIBxwCeABwB1gB1/9EB/P5b/7n/hvzY/Zb72/tB/J37Yf2//eP+6P+7AG4B2gEiAlMCYwIkAv0B9ADg/w3/2v4i/bX85fyg/Hb9g/3z/vP+jABGAWkCagNeAtEEwgIAA2MDtwC8AS4AUABiAJD+E/9L/+b+VP+LAIIA1ADrABwBOwF5ALUAzwAeAUoAEwAKAVn/n//VAF3/HgBw/63+kP8u/b78f/5c/b/9Xf9m/lX/iADD/3wAowFUAfsBCQIDAvMBjgA5AGb/yv4v/s/97f11/cD9Vf51/rL/1wAjAR4C1QJqA6QC+gJHAz8BAQGZAOP/RP8+/nf+3P2w/cz+uf4f/1MAxwDjAF8BvAHIAWQB/QCHARYAov9fADf/S/8n/63+Jv+n/vP9nv6A/XP96v0G/VX+0P1u/a7+0P6c/18AXQA+AcYBngGGAmkC3wFxApsBvABeAKX/M/97/2P/D/+G//L/aQAuAaABQgIpA9YCqgLbAR4BrADb/6r/Qf+V/lz+k/6C/sD+MgDiAMgA/AE6AvABNAL5AScCqAEAAcsAU/9r/0X/vv4i/7D+6v5U/vb9Dv3a+wr8nvtR+xX7zvr3+rX6evu0/AT9uv5AAH8B5QIHA+8DcARsBBMF6gRSBKYDNQMBAuEAHQE/AD0A+QAmAP3/HABf/1D/QP8O/4j/J/8E/xH/9f7k/gn/Wv90/5YA9QBzAYwCmwI7A6YDUQN4AzgDAAMGAxUC2AGUAZYACgCt/0j/bP6v/l/+H/1l/Wb8APz6+wf7d/u2+mH6hfrY+QP6aPoq+mv6F/s2/AP/NwDDAOYBYwLsAvEDbgTtBAYG2QXJBUkEBAN2A7oCUwJgApsBHwDP/xz/f/2w/RD+Gv62/Zb9Av6E/rL/GQEFAocCuAPGA50DGwQtBLsEzQRCBYEEeQNBA18C9wHDAekBSQFyAEH/ev7T/Dr85fxo+5v7qfvU+n36P/q++rn6Nvoj+mL6o/nH+bv60vlZ+yf9cf7W/4//UQHbAt4DyQUiB38HbggbCAEGqQTMAhQC1wBD/8L+NP2r+8z6I/q6+bP6Z/tc/Ez9Xv7x/xEBmAKwA+cF4AYjB/IHSgdtB4oHmQaGBTEF6QQNBGIDsAG0AEQAev9A/9j9of2K/ZD86/vl+ur62vqZ+0/8N/wW/Qn+gf5r/ij+pP0j/bT7Ofo5+Sz4Nfhq+MD5Q/ti/KT+jgDFAmcFJgeeCOcJgQmpCPQGFASdAuYAwv5L/b77Hvqc+Xz5yPlf+vj63fwa/kz/HwGaAroESQZoB4wIHgllCVgJ4AhhCLkHYwapBSsEKgM/AnwAhf+h/fj8rvzl+wP8BPwL/EP8Zvxf/Or8Hf5u/2AA3ADgAPYAAQHR/w3//v3p+0P6CvgO9ln1VfRz9Df2yveN+jr93/9sA3UFJAd9CO4IuQnnCPAGeAQIAc/+Jv25+tv53/m1+RL6wfk++o37Lf3N/90BcAPxBdIHQwmFCgsLMgwMDAMLCAq6ByUGhwQaAiYASv5i/bT8MfxB/Pz7pvzk/eD+BQAKAb4BJAIPAqIBsAHJAKsADAH+/6X/Jf+8/uD9Ev3m/DP7mPku+KH1qPN88yn01vTT9cP36/pA/af/KQNaBc0GbwgiCeIH1wUoBAkCP/8i/aX7Mvrr+On4CPpu+gf8vv7JAMMCowTcBncIeQkeC9ALGguJCrQJcwjKBicENQLIAMr+df14/Oj7OPx9/XT/zQAHAjkDMARwBGEEYwToA7oCKAHX/yb+N/1Q/XH93/2M/kj/k/9d/8L/qP9R/vr82frO9w70UPGi8FnwAPFo8sXzqPX1+F/9jQG0BP4GiQhZCXEJ8Qi3B70FfwMeAaP++/tu+uj5S/pm+9/8qv7+/yEBwALRBMQGsAh2CqwLigvdCuYJVwgqB4kF2gMeAhkA4P65/Xz92P1i/m0A/wGPAvcCywLKAqkCegIOAj4BkgDA/3b/J/91/+oAyAFPArcCOwL9ALP/cf6L/FP6mfdL9JHwN+w46m7qBOtl7R/wp/I39i37cwGmBxoMrg4kEFEQtQ+6DZYKvgZwAtv9nfj289Pw4+8v8bjzMvdH+wf/MwN2B48Ldg+rEgsVZRUIFGoRzg3SCfYFWgII/8T7Uvlz99T1Nvb19xL7yv7cAfEEcQfTCPkJQwoxCTMIuAa5BM0CzABW/zT++vwy/Lv7R/sG+936kfoA+jX5TvgL98f1ZvRg8krwye777nLw0PKy9fD3+vpZ/5sE/QklDuYP4Q4zDNsI4QVdArL+5vqK9sXyae/q7WTvG/Pe+C8AOAeKDK8QwxP0FcsXVhhvF70U9Q8bCucD7v0x+ef1TvTg81T0k/VC94/6hv92BWUKvQ3ED54Pug6MDSsL5gfnA4r/TPuZ9yj1OvSI9Gb1HvdA+eP6iPwk/mz/mQBhAUkBgwBw/l37cvi69SDzGvHe71DvaO8O8PzwEvIV9VT6UQAVBuUJAQvsCgoK0wjPBzYGngOz/7X6iPY+9L7zKPYw+2UBsAc1DVERDhSEFREW2xXUE0UQtAswBoYAr/s1+Gr2dPat9w76Hf0wAOYDXgdACvkMzQ6xDtcMeAmFBEv/tfqF90v1lvOy8onyfPN59bD4M/3xAdkFvgjNCRgJmge5BeED7gD9/Gf4rPOX7w/tE+xb66rqu+md6b/qN+6/8pf39/zrATQHYAvTDgkRiRAuD/0Mfgl/BVsAUPsC953zgPKt8+j2OvxDA1YKbBCmFCMXKxiyFz4WPRP7DjAKOQXMALD9l/tk+oL6m/us/SsAFAJZA6kDdAN5AyIDZQKxAMf9/vrd+Az48/ho+kr8Yv7R/3IB6gLdAzoF8wXCBdwEPwJd/jr6hPbW803yqfGG8cvxfvKp86H0QfX99Uv2SPZ69iP3B/jB+C/5kPrW/QcC4AYaC2MMIQtTCREH6gSxArz/p/0g/AH7ivs1/QgAKgV9C+kRThfGGe8ZURg7FWcR+wxwCAwELACg/PH5Pvho91T4G/pj/Nz+swAPAtkCSQP9AlUCxwGQAN7+Nf3/+5P79fuv/Nr9Ef8T/73+M/4J/Zv8gvyE/Er8E/u7+ab4Z/gr+Yn6LPxu/eP9UP2x+2/5PPfK9CLxsO346x/rbuwJ727xa/Z2/eoEuQx1EjcVKRVUEvsOMAtMBqoBCf4D+yv57/h8+mH+JgQDDDwUdxrjHTEefRypGOoSDA1QB8YB4Pxw+OL0tfJL8vLzG/cU+6b+xwH7AwUFiQXiBM4DfAK9APr+wvwx+lf4SPeA99T4o/na+sD72vtW/IP8G/25/kYAbAEOAqMBwAC//8/+KP6+/ef89/ph+Kr0NfF77hbsRunG54foH+qv7p7zGfni/7gGEQ4dFEQYtRnjF7ETTA6rCG0DLP9O/PT60vpn+3797wCwBUIMZBPWGa8dKh46Gw0W2A8jCYoDuP5A+tP2ffT78sfy4vN49j/6Af4fAdYCuQKqASoAuP7t/eH85ftM+kb4VvfC9lz3WPkk+9T8L/7k/tT/kADZAdYDBwVrBesEQwOTAMT9Evuo+Pr2lvVi9BLzY/G975juGu6J7tvvmvKE9p36lf4VAtEE6Ae4C7wOIhFHEegOfwsQB8sDOQL1AHcAbABKAHkBKAPpBcEKeRDEFvYbMB7nHBwZfhOSDLEFN//6+Sv2E/M58UrwA/Bk8R301vc9/Kn/aAHyAQ4ByP/P/q39tvyZ+2v6P/no94P2qfVP9jr4l/ph/eP/UgHhAiYEEwVzBq4GZAZ0BdwC7/+G/P74ffYe9dL0/vT99Fv0i/PC8m3y2/I39Bb36fkH/J396/3f/sYBXgWhCfoMJw6wDdsL0AmTCAII8QfLCCEJLAm0CfcJhwskDmYRmxQlFpgVRBIWDaoGGQBI+uX1wfMU853zxPTc9WP3ivn2+7r+IwEwAtgB+P/d/GX5Dfbl82TzZvPl84j0gvR49bf3j/oD/+sD2QfzCnQMSgzHC8oKCglmB/wEHAJE/5P7//fT9GbybPEY8rPzV/XX9kT34vai9nv2w/ZT+JH6Afzl/Fz9Df4mABMDHAf1CzgPBRF/EQ0QGg4GDBAKmAhbBxgGIQW8BIsEqAVWCLgLKg8NEa0QUA40CrAFfwHQ/TH7I/mw9432KPXm8zbzMPO/8+z0yvXV9U31rfQY9XD2EvgU+hr8EP69/8UAUAHaAWYC7AKEBDEGXgYWBgIGYgX+BOQEQwQ9BKQDJQKbAGL91PlW9xL2a/Y+9zf4wvig+Dj4Lfiz+En50Pkm+m/6GvqZ+Sr6Ufsa/akAhwWrCjoPvRFNEgYRXw5qDCgLvQqiCsoJzwhVB9QFeQVvBkEIjgouDPsLHgpnBh0ChP6X+7D5Efg09jP0G/JZ8KDv++/s8Fbyz/Ne9TD3FPlM+4H9H/+aAGIBQgESAW8A2//D/4f/uf9MAVsDUQVBB4gIUAnyCaQKBguXCmwIuwRgAGb7Vvf29MbzfPO380j0bvXa9iz4yvk1+yf8hvwn/Nn7Wvtu+on6//p3+/L9qAHABY0KIA4RENIQCRBYD/YOKQ6LDTAMwglzB+8EvAKXATIBwQGnArMDyQT2BDoELANsAeX+M/xb+eb23/S58kPxJfDh7lzu9u6F8AbzSfb2+b790ACEA6wF4QagB8UHpgdsB7UG+wXxBa8FwwQcBOYDZwSfBdYGcAemBn4EqQGz/mv8+fpL+i36yfkO+Xn4WPiM+Cf5BfqM+sD6Evq3+KP3efZY9Vv1ifZ5+JD7W//mA6cITgxsD9cRghOTFIYURhPLEHMNwglQBtsCrP+g/Ub80vtO/E39yf4lABIBvAHwAY4BqADs/o78Ffpf98r0tfKD8Kfu0O3Y7aTvGvPT93P9vgLtBgQKwws7DNcLngrGCJcG1gQ1A/sBOAH8ABQCxwPaBdMHLwmYCdEIIwdoBBUB7P0c+374YfbC9Nfz4fN19KH1Sffz+ET6E/vl+tX5t/jq96X3B/if+OD5Yfzr/7YECwrLDisSLRStFNETNBKHD10MwwicBL4A7vwL+pz4i/hm+hz9nQAeBIMGHwiBCKEHkwWvAjf/Wvsr99fyO+9N7JHqwepr7ILvNPS3+ab/YQXhCS0N+g5HDy0OhQsZCIgEogF+AHkA4gANAkEDZAQCBkcHJQjOCFAIwwY5BHEAFPwg+N303/J/8gHzqvTu9kH5Mftg/Aj9kfy1+5T6APmQ9wX2h/Sg87zzmvWA+UL/BAYcDMIQthOHFPcTshKXEOkNswpJB74DOQAz/Tr7W/ro+gv9xP+OAqkExwU0BocFEQQfAn//v/zt+VP3SPWS82TyKPKc8t3zK/Zd+VT9RwGtBEgH5wi0Cb4JIgkWCMYG1gWfBZYFUAXzBHEErAN5A48DsQMnBPED3gItAbz+FPz8+aD4CfhQ+AT5/fkF+1P7//p/+qD5Ufgr9wL20/QR9Izz3fOs9Af2r/g7/FwA8QQsCX8Mzg6UDywPCw5FDKYKBQk2B3oFqwNEAiwBZQAtABYAjAB6AUEC1AIaA9MCFgIZAaT/9v17/EL7d/oP+qv5H/m++JD41vje+XP7Y/1b/wgBhgK7A04EfgRIBBsERAS6BI8FWAaIBlMGAgaXBZcFdwULBYsECQPpAL3+Zfwr+mP4N/fe9mL38/ek+Hv58flf+tT6PfsW+7P6ifoh+t/5s/lG+QH5//gj+Tj6IPxL/ucAZgNsBR0HqAgfCjwL8Qs8DAQMsAsNC80JIAjdBXADhgEjAD7/6f7g/uP+7v7s/uP+z/7w/k3/kf/g/yIANwDn/yL/JP4D/Q/8ZPsf+y/7e/sw/Af93f2l/lj/dwD9AawDuAW6B+kIjQluCWQIEQcPBdQCZwC3/WL7Y/nw9wH3pvbc9un3gflD+1H96f76/2AA9//s/l39zvtc+j35ffi69wf3hvYL9uj1r/ZS+ML6vP2eAEADwgU+CHYKEAwvDaINrw2rDQYNcwsVCWcGwAPVAbEA///1/wcAOgCPAMcADwFPAbAB5QHgAcwBgwHmAOX/uf5O/ff78Ppa+ir6GPqP+pP7xfze/dv+o/9vAHMBlQLOA7IENAWFBVsFmgRXA+MBmgBf/1z+ef26/Er83/uG+zv7K/uK+0b8LP3X/Rr+Qv4n/rT9KP1T/K77avtA++36HPoK+ff3Eve39gn3RPi6+tD9BgESBIAGwAirCu0LiwxXDKoL1gq3CRgIKQYyBMoCPgIpAnwCvAL6AmsDsQO2AyADFAL4AAwAa//9/pz+Gv6k/SD9pvxI/Oz7wPu++/D7Nfya/OT81vzD/LL8/vy//dv+PACEAbkClgMMBCoE5ANVA5ICtQGmAJH/g/5p/W/8gvvX+rj6Xfuc/AH+U/8cAFIAKwCy/xn/Ov4d/dr7gvol+dH38PZd9jH26val+FP7bf6oAZAE2QahCLcJKgoHCqUJUQkWCcAI/QczB2IGuwWWBYwFcAU6BeoEcATQA+ECYAG+/2P+f/1S/ZP92v0q/lT+Sf4j/tP9Xf3u/KL8ZPw2/PL7dvvT+in61vke+gz7gvw3/vX/kgH8Av8DnwTNBHcE5wMTAwUC7gDK/5r+jv2x/CL8Ovyv/HL9Rf7K/jr/dv9r/yj/a/5R/UP8RfuU+hX6kPk9+Qj5+fhr+Wz6HPx0/qUAgQLnA8wE1gWgBh8HdAenB0IIJgnwCUEK9wk2CYQIywfPBrkFTgQcAyAC/gDk/6P+sP1X/Y79Dv6K/u/+Lv9V/wr/bf6R/ZT8svvM+h36mflD+RP5/PgN+Yn50frE/Cv/fgFuA/8E8wViBi8GbQVsBFEDSQIpAfX/sv6n/QX9rPzQ/E/9Gv4d//v/lgC8AHMA1v/d/rf9fPxf+3T6uPkf+an4hfhs+Nv44fl2+8/95f+7AQgD/QMuBSgGCweTByUI8AjwCcYKxApKCi8JOQhvB1IGMQW3A3oCewGBAHr/V/5w/eT8Ff17/QP+h/6l/sb+U/6a/bH8hvuh+sL5Qfkh+T/5lPns+WL6PfvC/LP+xACGAsIDoQQHBQ0FswQCBEUDjALjAVQBtgAmAMT/hP+C/8j/JACAAKgAWQDD//v+Fv4i/fb7tvqZ+fL4wPjr+EL5r/k7+tP6tvvD/Av+Y/90AGwBKgIMA/UD4QS1BX8GsQfuCFYKLAtRC/0KIQpWCSAIvwYuBYYDRgICAdb/if5Q/Yf8U/yz/C/90/0u/lv+V/7R/Tz9WvyH+9H6J/re+cn5+fk++oz6+frr+0z94v6SAOABHQMlBNYEOQUnBdUEbATmAzsDjgLFASMBigDs/2n/4/6P/kX+7f13/fz8i/wp/M77Iftq+r35efm3+TP6m/qj+pr6n/ov+x38O/1X/mH/fwCtAQ0DKgRVBXEGqwc1CXoKjAv6C9YLQQtIChYJlwcKBlUE2AKIAU0ATf8v/nX9/fzb/CP9QP14/Y39d/07/cX8K/x8+/j6Z/o1+lr6tPpb+7n7KfzM/L/9K/94ALABvQKqA3wEAgUhBd0EbATVAzIDdAKaAd8AQgDV/2T/8/6G/hr+4P2e/Uj92PxQ/LX7GPtW+nL5wPhK+HP4yPgd+VH5cflV+q37V/3L/tz/3wAKApcD1QTcBasGlAflCCEKOgugC6gLcQvHCv4JhgjtBjwFiAM1ArAAbP85/lD9z/xv/HH8Vfxr/IL8nvy8/I78gPwa/NH7ivtK+5L74ft6/ND89/xT/cv9tv6M/0UA2QCfAawClAM0BDEECATXA7oDewPWAhsCcAEMAaUACgAe/xL+MP1W/Jv7u/rg+S/5kPgW+Ir3MvdF99n3t/h8+Sr6Avt3/E7+BAAoAccBcwKAA74ElgUDBmYGRwevCBQK2QrgCsYK2gr1CpQKYwmZB8UFYgQoA88BSgDe/tz9S/38/If85ftq+yP7C/v3+t36tfqW+sf6DvuH+0H8Jf37/Xf+zv7k/hr/if/4/08AkgBFATQCNAPnAwMEywOMA4IDOwOMApsBlgDO/zz/r/7g/fX8LvyA+9H6Hfpe+a34HfiO9xL3qvbQ9l73N/gb+eP5MPvx/Ej/TgGpAnsDBQQyBV0GWgexB8sHdgh9CesKnAuWCz4L/gofC6YKmgm6B6wFDQSbAkMBdP/u/df8Pvwl/LT7Ift3+jn6VfpE+lr6L/pH+t/6tPuo/FX9Jv7I/jT/nf+4/9b/EAB3AN0ALwG7AVAC7wKHA8EDrwNgA/4CgALGAQUBIQBJ/6T+BP5m/YP8ivt8+mn5mPi799f25vU19e/0EPXW9dP2Cvi0+dL7Tv68AAYDowS+Bd0GzQeYCAIJUAloCYwJJgqFCuQKEgtKC14L+gpmCvYITwejBQYElwLmAHr/B/4P/aX8MPy5+9j6Hfp/+Rf5+/ih+Hv4mvhi+bf6IPxw/S7+2v6S/3AAMwGBAYoBbwGvAS8CowLhAuEC9wItA2EDOQOJApIBnwDx/2P/v/7//Tb9dfzB++n62vmo+If3k/at9eX0QPTu8y70/PRi9kr4qPpi/QcAcAJuBDAG7wdrCYoKDgsoCzkLeQvLC7sLaQsLC/YKRgtgC8MKVQl+B9YFhwReAw4CkwAx/yf+lf0X/WP8cPtV+kv5cvjB9yX3pvZ69tD2u/dU+Tj79/xd/nD/bABvAYwCUQOOA5ADkgPUAyYEYgQ4BKgDLgO1AkUCeQFiACX/2P39/ET8r/se+4j62vn1+B34Avfl9QH1IPSN8xjzQ/IN8snz9vaP+oD+swHkA0YGTQgFCtoLhQ29DpkPGBCDDz4OhQzwCmYKXgplChYKGgmOBwYG4ATDA9sCJQJVAaEAz/+j/mH9NPwB+7b5Zvju9hz1fvOZ8ovyj/Nr9Yz3o/nU+w/+BwDSAWQD2ATeBWcGrgZ9BhQGwAV6BQAFYwS4A8ICagH7/8L+kP18/Hn7XPpD+Uf4mvfw9hn2PvV39NXzSfPS8l7yZPKs80X2wfn+/Ff/fAGlA1kGnQk5DMANNw5JDlIOZg5fDvcNuQ1VDS8NEw0oDAcL1Qn+CGIItAemBuAEIgN0AQUA1/5n/eD7BPob+F/2yvST83LyzPHE8Vjyk/P79K72i/jY+nf9sv9tAZgCqgO5BJUFKwZMBhAGtwVmBQcFiATrA3sDLwPdAjAC5ABS/539G/zX+pL5FPhF9pL05vJ28ZzwSvBu8Jvw7vCH8rP1gfkX/SQA2AKKBW8IXgteDRUO+A2PDWENXg13DVkNHA0zDWYNig0eDXwMWQx6DJoMHQySChMIbQVPA1oBiv+C/fL6efgG9tXzB/KY8JrvQ+/e78DwDPJa86D00faO+Wj82/7mAIYCwwPEBDkFYgV1BXsFjAViBfcEkwRGBBUE3gN5A9MCHgJVAS8AxP7i/KD6dfhH9iX0NvLJ8PLvWO8A70LvBvE69O/3e/tn/gABlgO1BugJ+AuqDEcMtQuMC9ULPQxSDE8MZAyZDOwMBg0eDXoN8w12DmIOYA3EC/UJMggxBuwDHgHq/aT6R/eS9EXyYPAk72rupu5Z703wdvHZ8t/0LPe5+Qn8BP7b/ykBSAJUAzEEpgSrBHcE3ANCA9ECegJ5An0ChQKgAlwCyQE4AYkAbP/M/ZX7KfkJ93D1HfSu8pHxy/Gd87r1j/c4+Q77nv3hAKcEZAe6CPsIcAgUCBMI2AhWCZUJEwpQCp8KyQpHC+8LzgwlDgcPRQ/IDgcOVQ1ZDDELUAlbBtQC9f5b+xT4K/XM8tjw5u+V77/vPvA68Qfz3fST9uj38vhE+q/7Gf1G/gr/ov8TAHsAqwCoAKQApwC7ALwA5gAGASQBPwEYAdEAcQDL/8b+j/1H/Cf7G/qK+BP38PYc+NT5B/uL+1P8pv3O/38CewQFBbEEZgQbBBkESAR+BDMFRgaOB3MI3wiBCVsKpAtADXUOyA6kDm0OJg4IDp4NYgwpChoHhAPo/8P8AvqT93n1LfSp85fzFPTb9Ob1ufZs9zj4oPgl+aL5IPp9+rP6Rvt0+1H7GPv7+iD7W/v3+z38V/zh/Mv9wf5J/8v/+f+v/yv/i/4F/ln9YPx6+8j7Jv1u/gz/+P4b//z/pgHKA0gFigXBBKUDtwIpAlECpwLqAlUDxwNZBP4E/gV7B/UIVgozC3cLqgvUCx8MHgzeC4sLeArwCNkGrgSHAhQAvP1r++v5HvnE+Oz4I/md+fD5NPpt+jb6Avq8+aX5gPns+If4O/hp+PT4Yvl/+RX5xvhW+AD4//ft9+735PdA+N74T/lN+tz7b/0D/vP9i/7x/xcCCQQABQQF4ASJBbEGtgcRCHAH8wUvBMwCIQLjAQUCewLyAnoDAwTJBNIFLwfTCOoJZwphCi8KNQo5ChIKPgkGCJcG0QQjA3YBKQBQ/8n+lv4v/s/9yP1D/gf/fv+A/7z+kv2P/K77AvuQ+jb6vfkc+Wv4vPcv9+726vbg9pz2BPZ89Qj1i/Q19OzzivMa82Xzk/Qd9j33RPik+tH9CAHKA50FwAbUB2AJ1wpPC5cKZAkpCNIGpgWQBGYDqQLDAmYDIQSnBBQF3gVRB1MJ9QqaC6gLdwtnC1UL8grQCbkHVgXiAqsA6f6X/a38P/yQ/En9Ov5q/7QAsQFGAsYC3QI9AhMBw/9Q/u386fs/+8n6Bfoz+YP47fej93H3K/eM9sD1mPQn8w/yCPEh8Izvwu8L8Wzyo/OY9YT4kvuc/kYBKAPJBF4GGQiVCQgKwQkFCf8HCAcpBiwFSwQNBDkE0gRvBZsF8gXYBqIIwApuDIYNxw2MDSQNtwz0C4EKewiUBUMCSv8x/Q78UPsl+237Nvy1/Wr/8gD2AdwCigN9A+UC3AGuAFX/Ff4a/SL8SPtc+o352/g9+Nn3dvc499f29fXc9JXzQPL08AXwvO/P7w3wofBP8gb1LPht+979uP/fAWYEEQfOCB4JUggcB0kGyQVXBXoEtgOiA/YD4QS6BScGxwayBysJ6ApyDKYNhA4nD1cPCw8MDrUMJAuWCHsFRQI6/wH9qfs5+8D7BP2f/kcArwHtAg4EmQR5BMMDbwK1ACH/wv2U/Nb7RfvC+g/6PPmQ+Lb35PZh9vP1R/VC9O3ySfEX8PXvZPCO8GPwAvGy8gf1//fo+gz9s/5/AJICaQSxBToGzgXBBOUDoAN+A2gDkAPDAwoEawTCBDcFSgbaB08JmArYCyUNbA6bD04QHhAdD6INvAvbCH4FbgLu/zP+Jv3s/F39nf5mAA0CoQPcBOQFYgY2BpgFMgRxAqEA5P4y/ar7lfqq+b74tvfg9ib2i/Vg9S71yvQd9AvzQvJS8r7ynfK88QHxtfGz81r2/fh3+k37qfy9/vMAvgL9A0wEogNsAsMB4QFZAkYD/wNCBEgESwSTBE0FtAZICKEJjAotCwQMIQ1+Dl4PRw9DDrEMxgpZCOAFdANbAez/8P6D/u/+NAC0AUgDogSPBS8GZQaWBi8G2AQEAwkBZv8B/sr8UPug+UX4NPeI9rn1z/Tr8//yk/Ij8o/xYvHz8dXyQPNt8//zYfU09wj5UvrV+nH7mfwZ/mP/WgC7AIMAQwAaAG0ABgHmARUD7ANmBHMEcATBBLYFRgetCNMJbQrgCuULPA2ADucObw4ZDScLOgkOB+8E6AKLARAB0gAhAe4BGgNWBJgFqgYnB00HCgdxBjsFpQMcArMAw//5/t/9P/yA+tz4evd49pD1rPRo80jymPHs8LDwLvEQ8qzy4vJB81D0/PUF+AT6H/uZ+zD8af0f/4MADwFkAC//Jv7W/Vb+Bv8ZAC4BZAKVAxEEegQkBYYGRwiLCQQKrAm7CbcKMAx5DcENEg1ECyUJgwe1BbUDwAF/ACAAQwBIAfkC+QTjBpEIpQnvCbwJ/gjjB1YGRwTtAdH/cP6m/dL8m/tx+lz5avip99726PWu9KPzy/Lj8dzwKPCH8E3x3/Es8qHyzvO79Q747/kw+yD8qf3e/6sBiQJHAkgBMwCf/5v/zv8qALsAwgHeAnYD1wN4BOIF5AeHCVkKdgqDCjMLdAyEDYwNcAx5ChkIHAZaBJYCvwAn/5/+AP83ADkCrQTzBrIICAq3CtEKXQpfCeEHswU0Ax0Bpf+k/sn9t/xr+z/6WfmC+LP33fYJ9jn1U/RK8+HxZvDR7zHwi/Bj8GjwOfHF8sL0oPYi+I35fvtG/s8ADQIeAn8B3wCnAMsA6ADMAOgASgH8AcQCgwOaBO4FrQdYCWgKGgujC3wMVQ3zDd0N1QxKCxkJ1QabBIUCnADl/i7+Vv5T//QAEwN1BXIHIgkcCjoKtQmzCHYHyQXzAz0CvwCM/4D+oP3K/BL8lfvo+hH6SPm4+BD42PZt9dXzNfLs8EvwIfB+787u8O5X8KfywvRf9pL3E/l9+27+BQEvAiUCnwEJAcAAdAA5ADUATgC5AEMB7wHSAh0E0gWYB28JCwtZDHwNcg4jDxsPeA5rDdsLmgnLBg0ErgHj/7j+Qf6y/gMA1gGdA3IFTgfSCHwJ7QiUB9gFRAQKA/AB6gDO/xT/n/4v/sL9Bf1V/MH7MPuD+or5cvgm9/H15vTI8zryTfDs7lnuQe6A7m/vHPEl8131hvfW+VD89v59AecCMgPQAjQCggG1APX/Kf+z/uP+uP8EAVgCkAPYBGgGlQgnC28NQA8xEHsQahDtDxcPhw1WCzUIwAT1Afr/3P4k/lf+bv/9AAED5QSZBtMHgAheCAUHXgXyA+AC2QG4AOv/Rf/q/qT+CP4J/QL8s/uO+w77LPrW+MP36/Y39ij1YfMw8ffuC+5C7p/uYO407uzvxvIA9jX5wvuh/S//bgFuA0IEOASBA1oCngAb/1H+Iv4R/4oAogFMAjID9gRnB3cKmw2hD2oQvhAdET4RkRA4DyQNSwoYB9cDGQFd/9D+xP7F/kf/VgAWAhoEmAVfBn8GRwbpBUgFVgRoA8kCQwKuAeoAxf+w/rL9uPzi+w77Qvp8+fn4ePjF9/n2/vXH9GHzQvJP8RrwMe8r7zPvAe+J72PxA/Sb9mf5t/sB/Rr+EQCTAgEEeQQBBMgCrwHyAPUARgHxAcwCvANWBT0HNwkIC/4MFg9nEDsRhhHsEKIP8g08DDQK9ge1BVsDVwHg/x3/y/6i/hH/z//PABAC/QJzA7YDSwS5BIcE9ANBA8cCFgIxATAA8P7Q/eD8LfyR+w37lfr6+V75e/h993n2uvVy9f/0RfRd83Dyi/He8I/wVfAR8A3wIPH+8gT1QPc++T/7ef3s/5QCkwSGBZ8FQwWYBBkEUwQCBQMGMgeNCA0KXQvLDFEOUw8AEEcQ+Q8SD9wNuwxvC1EKRgkjCKQGgwRyAoQAV//I/v39N/3v/Ij9uv4nAEYBwAHzAVgC1gKOArQB1QAkAG//mf4u/gj++v2o/dX8r/tr+pL50/gg+Hv3wvYu9tr1C/ZG9gz2O/Xv81ry2fAA8Dfvhe6F7srvKPLG9I/3CPq1/Ib/awJOBfAGwgf0B8cHugcCCP4IPwptC1MM9gxlDVMNVw3IDTQOkg6bDv8NEw1ODAsMzgsoCx8KawgsBnMDCgEC/yr9MfyP+yD7Cvto+y38x/xu/dX97/0X/mD+4v4y/53/GwBJAFoAUwBSAKP/Sv6a/M/6fvl++A344Pf09xv4BPg6+GH4Ofie92H2n/SF8t7wmu+87mTuIu7m7sHwgfP99gn6sfxS/zgCmQXbCEoLtAw8DRAN4wwzDW0NbQ1JDeUMkAxIDD0MkgwNDd0NlQ66Dm0O1Q02DasMGwwWC0AJzwb+A/MAQP4G/C/6hvgK9wL2RPUp9bP19vaM+CD65Ptu/Rr/vwDpAZECjwJMAncBBwCd/kb92Psy+hf5UPgB+E74o/j0+B75iPn9+RH69Plt+S34m/bJ9N7yq/DX76nvne6w7vTvzfK29q/6gP7UAUMFDwl8DNgO5w9IEAwQXw/gDuwNMg0QDVYNhg0ODY8MfAzxDNcN5Q4qD7YOSQ7GDRQN9QtcChMIJwXRARb+Z/o+9+L0F/O58f7wEPEh8uzzRvba+BT7+vyR/ikAYgH0AekBOQFUAG7/df4p/en7tPqQ+eb4zvh0+Sz66/oB/PP8m/3j/c39J/0e/LT6jPjn9Y/ykO/q7bHtXO527qbvt/Kd9hn7Nv8HA4sGygkFDcAPZhEEEgASbhFyEBoPnQ3DDI0MSwwCDOgLGgzKDN0NQw9VEIUQBxC5DtMMgArDBwgFSQIf/1b7aPdJ9D/y7/AW8OXvN/DU8EDyOvRq9r/4Gvt5/fL+xP+hABUB9ABFAD7/LP4p/Xb8Q/xA/Fr8wfxx/UH+MP/n/wMAx/9S/8X+CP7I/EH7cfl092T1YvPK8bbwZ/BQ8MbwS/G68lv2DvtJAB4EqgZlCUgMew/6EU0TpxOaEtkQ4g6CDRENYw1qDuIO3w56DjYOow4fD0YPPQ41DKkJCweYBEkCYgBt/kb8yPnS9sLzV/H073rvN+/x7kXvRvAS8m/02fY4+Wv78fy4/Qf+l/6L/xAAIgDO/5j/6/+TAEIBzgEzAjUCHgK0AUYBEAHQAH8AqP+r/rD9Xvyk+r74xPbC9C3z8fHT8EDwB/HJ8sfzk/VK+Uj9xQFXBZYHAQpmDBQPVRF7EgkT1BIrEkgRfxDcD5UP0A+wDxEP9A3nDCUMMAs8Cu4IFgf4BLECaAAw/iv8p/pX+a33tfVB8ybxCvBc78TuLu5U7lnvCvHr8qD0ZvY7+JL65Pyk/iAApQEZA+MDMQSeBJwFlwYDB80GFQabBTgFwQTSAzcCrABJ/yr+FP3S+1/61viV93X2pfVG9R/1zvQj9OvzuvQb9lr3t/jx+j3+PALIBUgINgpbDNsO7BBpEkcTNhN5EmQRWxApD4oOzQ7KDvcNFQzdCc8HFwbqBHADlAGI/2X9Z/uZ+Zn42vcD9+X1+/PJ8Y/vGe467aXsouwx7cLuwPDw8nD1Y/jL++3+hAGYAzcFjQbbBwMJlQnoCecJnQlGCZYIHAh2BzIGaQQZAiMAAv+b/h3+C/2R+/H51Phe+Ij40fiU+Mn31/ZP9qn29vdW+df5tvmj+RT6kPxsAekFOwh2CKkIZwoiDa4QlxPyFIcU5BJAEfkPiA/0DrkNnwtTCJ8EDQF//kz9lvyl+yP6BPjD9Qj0P/Ns8+nzCvRU89vxafDV75bwB/Je80z0H/Vs9jb4cPrW/B7/LAHaAlYEwQVRByUJ4QoIDH8MmwzIDEoNSA3JCy0JSAbgA0MC4wAX/6b8M/pK+DT3HveR9zT4pPgL+Qb5VPhU+Fr55vrx+/H7JPu7+UT5pvlE+pX8nwCPBFIGpAbvB7IKqA6XEgUVUhUXFIYSdhAUDswLaAkIB1cExQCS/AT58vYz9t31WfXk9Jb0f/Sl9EL1fvb99zj5Sfke+F72MfWy9an2+vbV9jb31/j7+kv9of/rAYcERQd1CakKUguDDBMOAg/gDuwN7QwXDBMLQQmLBgcE9wHv/4391vqW+E/37vbm9uX2BPeV97z48/lX+jb6kfpA+/n7//tm+8n6Wvr4+iD7wfpB/N7/HgRhBgIH8we5CeYM1Q8VEZcQyg7uDMoKawj3BXsDrAHb/5P9AfsZ+aL4sfiT+PL3FPe59rr2uvay9tf2hPd++Bv5Jvnr+GP5AvvL/L/96f1K/ob/GgF5ArkD1gT7BV4HqAiOCSwKEgviC/cLnQsKC4sKCwosCVgHawTRAQ8Abv5+/EL6PfjW9mH2sfY/99v3lviL+V/6rvqd+r/6Qvvc+wf8sPs5+w77m/vj+2770Psa/nQB1wPeBDAFzQVsB44JMgvCC3sLfgoYCUwHEwUHA74B1wBb/379w/tB+kr56viw+G34qvhM+a75qPn++dr6Xfxf/sD///+C/6D/WwD7ADwBEwH0AB8BzAG4Ao8DbwRuBXIGKgerByoIaAheCDMI1wckB3UG0QWSBLQC/QDj/7P+Nf2j+xH6Bvml+Lf4xfjm+Ez5nvnd+ej54/n0+Xv6QPu1++/7+Psw/Ir8w/yc/Gb8D/2F/vn/8gBDAYYBQwJ2A/sETAZRB2EHtgYgBj8FcQTaA4gD0QKuARoBMQDn/uD9XP0o/SX9oP2V/QP9Gf3O/aX+qf/UAIwBkgHyAaECwAKVAj0C1AFtAU4BZwHsAGkAbADqAJ8BUQIeA0YDAwMRA1EDwwNSBOEEagTmAroB8QAlADL/M/7A/CP7bvq9+Qr5Cfn6+fT6S/v0+1f8t/yh/Zz+8v5o/uv9Gf3f+7760fkK+Wr4a/jv+Lz5D/vB/Cn+QP+gAKgC6gTBBssH7gfEB8gH6AeRB8kGAAZ2BQMFsAPLASsAfv/3/74ArAHYAbYBUAL/Ao8D3gNPBEAEZwO0An0Bqv8z/mj9n/yl+2D7p/vi+5f8iP3n/Uv+gv+8AAoBCQFEAVEBmQFmAoECrQEbASYBvwDZ/0T/qf4i/gb+Ev62/Zv9of6i/9P/sv+r/6v/kv9a/3H+5vzL+y77VPoN+er3V/f69gD3fPf796f4EPoN/EX91v31/q0AygLNBBkG4QVpBXoGrAcHCNMH0Af0B18IUgnxCIYHwQYRB34HSQfVBoYFJwSpAw0D0AGwAEgAzv/O/m/9ovsh+gL6bvoP+oj5ePnV+XL6N/ul+5n7VPxW/aH9SP0o/en9Bv9JAMkAlQCkAH0BnwLeArYCmAICA4gDmANyA/0CGwOMA5MD3AKsAfoAIAAF/+b9lPxc+2b6vvnT+Lz3SfcN97n2vvbR9ur2kPf4+GX6PvtX/Lj9Ev8FAUwD5QSnBYsGvweACF4JgQopC2kLzgv3CwoLTQp9CnkKxQmzCG8HqgVQBIkDHAJZAAP/6f1j/Ob6t/kt+An3tfZq9r31dfUQ9t325vcb+c/5Hfq6+sX7sfx2/Xb+TP/t/7gAqgGvArwDFgVGBuMGIAcfB9EGQQb+BccFKgVfBLsDAQMIAmEB5ABDAKz//P6R/Z/7aPq6+er4Lfid98P26fX69X/2z/Yt9/j3y/jF+SX7WvyV/Vr/2gH/A0sFSQYSB20IBwpBC50LMgscCy0LgAuVCwULLQr4COcHugZVBa4DyAEqAEn+Xvzh+q75mfiQ99n2/PUR9dT04PTs9Ej1MvY19/r35fiq+Xv6Bvz6/ZD/PACgABEB2AFQA6IEjgVABvQGZwd0B64HzgfeBwUI1QclBzAGigUIBW4ExAOYAhoBpv9L/sz8Nfvt+dH4B/iW9zj3zPbG9oH3efh0+SX6ivrP+pb7Cv1q/oj/ZQBxAZoC7gNCBUMGMAc5CHAJDwpPCpYKAwuqC1UMlwy4C4EKaAkqCLMGFgU1A9IA3/5N/bz7d/rB+T75bPjM9zb3tPa49jv3vvfz9zr4mPg/+V/6pPvA/LD9mv5b/xcA1QB2AVACegODBCcFrQVZBtwGQQevB8wHggfyBhwG7QSnA8MC0gGvAI//bv5i/V78x/ts+wX7ivrq+WP56vjT+Bn5Tflh+Wn5hPmK+er5xPp7+/r7aPzL/AL90/1h/7kApwFwAjgD6wP1BDkGAQdQB7AH2QecB3gHVQcjB/8G8wZ0Bm8FtwQkBJoDFQOJAsEBnADI//H+M/7o/c79v/2G/U/9Ef0b/Yz98/04/kT+Q/50/t3+fv/4/5gAWgEBAugC5wO/BEsF1gVNBnEGoAbdBt8GmAYxBpEFkgTOA4AD3wLOAZAAOf/9/RD9U/xT+076ovlH+RD5zfh0+NL3NffV9pf2dvZL9n328PZ09zr4WvnF+if8jP2m/jT/4f/0AAMC3AKqA14E5QS0BcEGWAeZB7UHQQduBogFvgTLA/UCRQJGAZAAGgDM/4n/b/95/y7/Bv+W/tT9SP0r/Wb9hP3B/b/9lP3e/VL+rv7w/lL/if+r/zMAqQAYAdUBrAI2A4kD6QP4A9QD4QPHA2YDFAPYAngCAgLNAbkBqQG6AaMBUQHVAHkAVAAeAOn/kP8S/1z+tv1l/SD95vyr/Fn80vua+wD8bfyr/M780/ys/N/8gv3T/bX9n/2V/XX9ov0W/lz+fP7T/vX+mP5h/pD++/5g/8z/6v/R/0MACgGxAfoBPAJVAiwCIQLtAasBngH/AUECDALJAXoBUAE8ASUBwAD+/1D/zf6A/k7+TP58/pz+uf7G/rL+jP58/ob+R/76/dz93f0H/kj+qP75/o//TQC9ANcAvQDLAO0AQQGBAW0BVAE2AS8BEAH+APAArABUAJr/qf7B/RD9kPz1+2H7svof+uP5w/ml+WL5SPk3+TP5Xfl9+dn5cfpC+wD8lfxW/SD+Mf9JACwB8AGnAowDNwTIBD4FhgXgBSQGTQYuBhEGAQauBToFjQTWAzMDuQJGApUB5wA8AKT/M//H/mb+FP7n/Zb9Cf2c/Fr8XfyC/Iv8TfwD/CH8c/zp/H39G/6u/iz/vf8+ANUAoQFzAhsDfwO4A9AD3QPuA+oDvANsAwUDgALjATYBpQBKAAYAvf9K/7r+QP4R/hb++f2k/Sz9pfw8/Av8/Pv1+xX8X/yL/Kb84vxH/b39Rv7F/iD/qP9uACgBuQFGAtMCVQPqA2QEfAReBDME1wNSA8cCMAKQAR4B2QCHADsA9f+e/0r/9/6a/jT+5/2W/R79yfyQ/G/8e/y1/Or88vwY/UD9W/2u/Tr+z/46/67/KwCsAHUBYgIpA7MDKQSKBLMEzgTQBLQEkwSDBG0EPAQSBO4DzgOlA2AD8gJ1AhoCxgF/ASsBygBgAPT/q/9e/zT/Fv/1/tv+t/6v/rn++/5K/4v/2f8YAHYA3wBNAZMBtQHdAekB7AHSAawBggFbATMB0wBZAOP/i/9n/2f/ef98/3j/bP9G/zT/Kf8m/xj/2/53/vf9qf2L/YX9lf2Q/ZT9p/3W/Qr+Nf6R/gD/ev/s/1MAwQAzAdcBdgL1Ak8DhgOiA44DfgNyA1IDJAP4AtQCpAKVApUCgAJrAnsCjwJ1Al4CTAIoAg8CEALyAagBewFZARABzQCVAEAA1f+M/1b/Hf8N/yj/Qv9Z/4//xf/p/xoAUgBeAEQAMAAPAOf/1v/N/5n/Sv8I/8T+mv6W/pj+gP5e/kz+Ov46/lz+lv7k/jn/gv+Y/4z/hv+X/77/2P/V/7j/kv+K/6T/0/8KAEwAmgDYAP8AFwEtAVEBgwG0Ac0BzAHCAbMBpwGVAXMBNwHsAK8AkACRAJUAlwCiAMcABwFJAXUBfgF2AXIBYAE8AQkB1wCmAHUARgAEALf/bv8z///+2f7N/sP+rv6S/ob+kv6t/sP+tP56/jr+Ef7x/cv9nv1i/Sj9AP3y/Pn8Hv1i/bH9Av5U/p3+4P4v/4j/3v8qAGEAewCEAI8AnACoALQAuQCwAJwAggBrAGwAhwCrAMIAzADTAO0AFQE0ATsBLQEUAewAugB8ADEA6f+1/4X/Uf8m/w//DP8m/1b/hf+x/+f/HgBKAHQAngC7AMYAtwCSAF4ALQAGANj/mv9K/+v+h/42/gP+8v32/f/9//31/fX9Df45/l/+Z/5P/ir+Ef4N/hv+K/44/kT+Tf5V/mH+ef6f/tX+Ff9N/3b/nv/Z/zAAlwD5ADcBUQFWAVoBXAFbAU8BMgEEAdIAngBxAFgATQBEADIAHwAPAAsAGQArAC8AIgAWAAcA9v/l/8//s/+Q/2n/Of8O//z+CP8a/yb/Jv8q/0L/c/+l/8L/zf/P/9L/1f/T/8v/x//M/8r/p/9o/yz/EP8Z/zH/Mv8d/wn/Ev80/1n/dv+I/5r/rv+9/77/wf/U/+3/+P/v/9z/2v/1/x0ANwA8AEYAYwCWANEAAwErAVkBgAGUAZEBeQFZAToBFgHlAKQAZAAnAPL/y/+y/6b/nv+V/4//mf+z/9f/8f/4//T/9P///woABwDv/87/sf+e/4v/c/9W/0P/RP9R/2X/dP+F/6H/zP/1/xIAJAAyAEIATgBEACAA7v/D/6X/iP9h/zL/Ev8L/xn/KP8u/zj/VP+D/7n/7P8WAEEAeQCxAN0A9AD4APgAAgEVASQBJwEbAQ0BBgEPASMBOQFKAVYBXwFjAV0BTAEyAQ0B3gCmAGEAGADR/5X/Y/85/xX/8f7M/qv+lf6b/rX+4P4R/z7/Yv+G/6//3f8QAD4AYQBuAGsAYQBYAFgAZAB2AIkAmQCqAL0A0gDiAOgA4wDTAL0AogCCAF8APgAkABIA+P/M/5H/Wv8w/xT/Af/y/u3+9v4S/zj/a/+j/+L/JQBkAJsAxQDoAAEBGAEiASQBIwEnATYBRQFKAT8BKAERAQEB8gDmANYAwACkAIIAYABDACcACgDh/6r/Z/8k/+v+vv6U/mv+Sf4z/iz+M/5J/nT+rv7s/h7/Q/9n/5H/x//+/y4ATABjAIUAtADYAO4A/gD8AP0AAQH2AOkA3gDYANkA1QDNAMYAwgDKAMkAqgByADQAAADZ/7X/kP90/2D/YP9+/5D/kf+P/5r/s//U/+r/DQBAAGMAigCqALQAvwDKANUA4ADcANAAygDCALIAngCjAKoAqgC4AJUAQQFKATMA0/8y/xb/Kf/E/rn+iv6T/lP+6f0d/hn+Y/6u/uD+aP+N/6//qf+T/6T/nP/K/xUATwCOALMAsACYAJkAiwBxAKAAuACXALMAigBYAIwAtgDVAMkAmwBpAFoARgCKAJsASwCcAFwAFABaAMb/9v/4/+v/IgCy/w8AJwALAGEADQBQAE4A5f+DAOX/VQCDAPv/WwBAABAAYQABABgA7f93/9L/Gf/n/6P/iP9oADf/5P8xAPT+CQC+/lv/jv9e/s7/if5f/zn/yf6r/7f+av+U/wf/HAB5//H/7f+I/7AAcP+XANMASgAyAYsAxQA8AXwAKQHuAIMALwFNAIQAcABBAJUAmQBLAHYAogBRAEcAXwATAE0AAwDW/3IAK/9nAB0Aef9SAHn/BQB5/yD/WQBg/4P/vgCH/ycAegAzAFoAagCFACwAPgA5APH/r/86AOf/NP9S/2P/0f5R/+T+0P4m/3b+U/+b/kz+ff/d/p7+vP+7/uD+jP82/2j/Gf/D/3D/oP+XAJf/UgCfARQA9AAhAaEAZQGnAIsB9QDIADMBcQBBAYwAagAIAfz/9wArALz/qQBe/yIA0f87/6n/MQCk/6//lwBN/7r/BQBw/yQA0f/i/yYAOACQ/5z/PACw/lkA3v95/58Akf88ADoAXwBcALUAQADN//cArv/h/5gAVv+L/7v/AP/j/pT/k/60/uv+ef7Y//v+Uf4tANL+rv7e/x//x//U/r3/0v+o/l8Ao/+W/1YAMQC4ABoA0wDiAPn/sgHDAGMAhQFVAOgBvgCBAJIBeABfAUcA4gAuAXz/FgHV/7b/VwGd/pcAFgCo/pcAKP+J/8r/df80AHH/cADa/2j/NAFO/14AKAAH/x0B5/7V/5sAQ/4YAeb/QP8+Aer/ZwBJAD4A1wALAHcAgADR/2wAof+O/5v/HP8r/2L/tv+Q/u//Uf9T/mcAwf4S/3z/Sf+nAAH/CQDeAI7+mQAcAJ/+ewD1/3EA6v+2/40BFP8FADYC9/77AC8BggAIAjoAJgHfAOX/YgEEAEv/JwFrAK7/cQBZAKT/9/79/2sAHP8yAP3/PP+3//H/jv8e/0EAw/4l/4IAjv/nAKv/GQBPAdT/sQDi/7sAgACD/zYAGgDrAFv/QAAcAQn/twCmANz/xQCV/6MA+v88/0cB//6S/qgAov45/8b/cP+Y/9z+dQCY//r/nf/L/00Bgf+kAJkAGP8DAWEAXgAiAID/wQATAF8AUf+0/x8A7P+YANP/Yv/LALQAGP9iAMcAIgB7AO7/Yv8oAbgATP9MAKb/DwChAJn/pf/R/t3/UQCe/rv/6f5k/7MA8P6R/5n/FgANAGj/jwBIALIAhwBk/34AbACLAD8Afv73AMkAUP9EATkAJADyALAATABd/9wAQwBZ/8v/hP9+AD8AOABxABD/agCCAAf/7/9W/7L/aAC//x0Atf/r/6MA/f9cAMH/kQDW/5z+dwDY/5z/ZP88/3UAM/+d/1YByP/WATYBH/+CALn/3QAHAbn+ZwCJAO/+UwAcAO3+Lf+1//7+Lf+w/yIAB/8U/8MAEQDA/o4AawHp/mMAOwE8/xkAtACS/1H/JQDsAFr/NADCAE7/kf8JAA8AYP+J/0MAfQBw/2AAMQEB/+4AaAL//k0AygFv/9j/6//C/8UAs/8QANP/M/+4ADH/V/7HAKgAsv+v/9H+yQATASkA8/9B/xEArP+h/0X/e/8y/0r/Rv++/hkAh/+//3EAQf/kAAUBXP+wAOoAoQBG/+D/lAHl/wz/QwDM/hH/pAG6/wT/sP9RAPoAtP9/AM4ACv9GAZAA4v4HAZkAkf9TAN//cf/R/33/2QA4/0r+dADw/zf/WAHMAJ3+9v/v/ywAfQDv/8EAvv/Z/9kB6P8mAKAAIP+ZAXAAzf5AAWMALwDaABD/bf/v/8z/BAFMAKj+qv9o/y3/4gCx/93+JgEHAFT/tACw/gT/lP+T/kn/bf/+/5QA3v9T/4IAuwCI/58Az/9P/0YBZgAV/6AAJQEp/13/JgDA/9IAjgFqAEYAsQFlAej/wP8tAFkB5P+S/7AAxf5iAI//sf2j/4P+Fv+m/hT+xwCr/8n/HgFxAX4B6//j/xoA1ACgAhIBKf8zAGYAKgCL/8r+xP9z/9L+Of8n/9T/JQIaAQX+9/8PAfgAXwF+/8EAeQERAPkA8/+Z/5AB/AAN/8f/lwCO/0H/NP75/lQAwf9u/+b9rf7PAQcBO/+A/4z/dQBNAeAAyQBHANEAOwGM/ycAngEnAJj+pv9gAGoA2v+C/1YA1v8NAU4BHv+9/xkBtwCG/3v/ggCwACb/fv6X/wf/PwBxAMj+3P+lAOsAsAAKAC4BiwF//xsA1QBk/77//v/U/vP+iP+i/6n+yf7vAMIAef8oANn/WP+2AOkANQA9/yYARgBj/xYAowDQAKUAxQBy/wP/lABiAE0A0f/Q/6EB9v95/0cA7v46AEsArv7T/vT+cwATAVL/s/+QAGAAuwC/AG4AhADKAPP/eP6+/lv/Rf8eALb/x//NAKwAwQC7AH4ASQFDAQj/1/4qAF4AZQDl/wH/0v88AGsA9v+W/owAeQDM/lsAgwDZ/+L/Sv/E/7n/MP93/5r/MQDRAND/Rv8eAEkAWAAkAOL/mABcALv/rQCp/0z/kQB6//X+cv/o/o7/sABiABMAjf8aANkAeABiAUEBSwAUAXcAYv+m//X/9v8EALz/If9H/2AARAB//3sAywDj/6j/bf+b/3IAP//o/TH+LP/d//z/6v98/27/OQCfAKcATgF1AMv+5P6t/7X/RgCt/77+cf/G//r/HQAKAPYA5gE5AT0BOgHdANAAv/4h/u3/IgDT/07/WP7r/jP/T/7q/Tf/IgFXAJv/cQAdAHsAuwHiAHL/tv+t/4f/mP8YAOz/gf4eABUBXwAmAaIAVQD+ACMBpACQ/5X/AABN/xD/kf+i/53/BwCn/6P//gCIAK//4P9n//3/4QAIAH7/yP+C/13/0f/j/3b/p/9IAF3/NP+oAXgCpwCi/yAAKgCW//H/uf/U/iUAcADC/u7+IgALAHP/mf8PAAgBpgGqAHP/lABfAZ4AEgCE/pP9Hv9yAI//fP60/v//cv/2/i0AbADsAKIAXP+f/+MAaAGmAEH/JP+9//n/JgCaAIoA5P/pAKsBLwFvAfwARwDlANUBRwIJASb/cf+5/2//TgAH/+L96/6A/9v+df4R/0z/nv8AAFQAGACT/w0AXgF9Ab0ABQDW/70A0gBuAHsAhwCNAHgAxP/n/gP/9f/d/z//7f5O/j7+Cv/aAJkBpf/f//UAcgCfAvgDbwIJAW8AkACXAOcAhwFTAA3/nf9x/x7/3f9t/2L+kP4a/4v/cf/S/w4Au/4K/wwAGv8Z/1T/cP/RAHEB+wC+/+v+kwAQApQAjv+A/9H+1P0g/i3/rv4u/hz/W//t/vgA1gEAAFUAWQLYAhECSQL1AngBSwHPAiMCiAH3AswCOgCW/wQB5gEHASwBSQEkAJgAgAGBADP/qgCuAS0Aev9wAA8Awv2C/Yb+CP0M/RH/S/62/V7/xP/b/nH+sP7q/h3+ev78/sz8vvxM/o78Ivsn/Xn/e//f/34ACwG+AmEDXQOmArECQgQhBGEC/gGPAhMDrAN5AmUAsgB+AugC+gFmACMAfACl/+/+3P4a/07/hv8h/xH/5P80AMr/AP/k/qj/W/8E/13/DP8K/yf/cf61/Z39pv1T/Xj8Ovw0/NP6FvrW+v/77Pz2/DD+2ABcA2UFjQUxBUQG8AdeCOsG3gWxBIUDowK4ALX/zP5//j/+sf0H/hr+5v6r/2b/KADtAI4AxADzACcCxQIjAkICUQFjAFAALv+W/Rb9Gv5L/u789vsN/Pv8pv35/Rj9ufyt/YP+Ov7r/IT8h/wk/LH7J/3F/5EAyQFMAikDxgUPBu0G4AYjBpwG8QVYBMkCDQHX/hb+//15/bz89fvF+y/9Iv6I/hb/OP8PAFkBXwLSAvcCKwPgAsQBLQHvABEAKv+s/nf9jvsW+5L8qf1u/XX8ZfuT+w79WP6D/Y/8iP1k/oz+U/4U/6QAqwG0ArQDIQVeBnUHsAfmBoYGTQbmBWEDWwHMAJv+x/wk/D37z/og+7z71/wX/QD+rv/I/8cA8AIMA9ECqgPgA/oDcAOXAvUBLwBS/0X/0P3//IX86fqD+Qz5A/pY+6f7nfsW+5n69fvY/dP9Cv6p/0oBDQO1A0AEngQKBdQGKwijBwMH7gZ+BdgDWwJVAMb+sP3A/BH80vrg+l77hftq/BH9lv1I/vr/UgHyAbUCmAOXBC8FzgWBBb4DywIVAooBvQB8/3D+//wO/EH7UfoF+Wr4ePjQ+Pz54frC+uf50vqA/SIAwQE1AoYDRgVjB+EIQwjXB9UHawcaB0MGDAT1AQcAwf07/Pj6KPqA+ZH45fjB+ZH6LPzp/fX+CgDGAXYD4QR3Be0F7QX4BaAGbQWhA5YCZAFeACj/W/4Y/i/98/vw+vv5ivma+db4dfdu9+T5EvyZ/KT84v1EAHsCGQUbBhkH8AgiCu0KtAlhCF4HSgXDAxUCvP/s/TL8hvo1+Xr4dfjT+N/4efn3+lz8F/6S/2AB3wIZBOoFSAebB7sGHgZvBakEOwRoAhwAev6V/Ub9Xvyh+1X7tfql+iz7u/pL+lD6Vfoj+hL6UPxQ/5AASwDx/y0BxQN9BrMHdgd5BzQI2AjBB0cGzgTnAoUBRQDW/jv9svtg+hn57Pex9xr4APlJ+ib8Df62//4BsASbBpMHqAguCe4IsAfdBeYDKgInAaP/RP0++1r6I/rI+RL6Ufrn+Tf62vob+4/6oPpc+z37T/wV/3gAyQBEARkCqgMOBs8HPQijCKIIvgibBz8GXAU+A8UAr/6F/cD82fu2+pH5MvgA+HD52vpA/NX9kv+rAdkDxgUGB9UHvwjkCM4HGwZ3BJkC+ABhABL/w/yE+8n6afqk+uv6KvvH+tr6hvuX+zT71frg+h376foV/Eb+9f9EAW0B8gF9AyIGgwgQCacIfgjSCPIHNgaGBBYC5P+R/mf9BfyP+rH5ovjs9zf4d/n5+qn8x/5YAPsBoQOTBV8HDggfCKsHrgZ2BUYExQJHAbIA2P84/pD8cvuq+6j7SfsI+5f65Pp7+5j7jvoz+T35GfqM+sD6u/wz/+sATQJeAnEDvgWeCGoKyQndCHAIFQjLBtAETwKj/xD+uv0j/ar7GPos+XD4qvhG+r37wvwv/sv/vQDuAZgDLgUZBoAGkQbdBeQEfAQVBOgC3AEZAc3/w/5Q/qv92Pwz/Nv7WPvJ+lz6Efp9+WP49PdK+Mj4GPmG+mD9CACSARMCGwPLBMMHigrGCggKZgleCfoIJwczBeQCcACZ/yX/0P0a/H364/g2+Cb5AfpX+lL67PqM/PH97v8gApcDeARUBSUGRwajBmIGcAVlBKwDHgMjAgQBsf+b/k39kfwg/Cz7SfpG+eT4TPg5+Lv4Qvgs+J/4Nfmf+Uz8NgAhApgCOAJLA6IFxAgnC8IK8AjhB/YHzwdSBx4GyQNPAef/Yf/u/SH87fo5+Rn4v/ce+OD4vPlc+9D8Dv6w/7EB5wL1A5AFXAahBuIGuwbPBeMEvwRMBCADuQEkAG3+jf0v/T78qvpE+XH55/nM+R35DPiI9yb4IPlr+Tv5RfqK/er/HgHHAR8C2AOZBgoJ3wlgCRwJggmgCQ0Jmgd2BUgDuAFzAJ/+i/zt+qH5OfhV9533hvjM+Rf7n/vt+xz9Kf8CAV0CaQNWBOoESAWNBTYF1gTpBEgFMgUvBMkCbwGaAJIAIQA2//n9o/yr+7X6Bvoh+Y74s/fp9s/2XPcT+JL4tfok/Qj/1v9uACYCpgT4B80J7gl/CVMJtwmTCbgIDQcMBWIDTwI2AWn/iv3S+9r6OfrR+bz5L/nd+Cn5Wfqx++T88/08/uD+9/9jAWsC6gKpAx0E/QTwBYIGeQbvBbAFVgXsBCoE9QI1AVr/Av44/Gb6FPkN+HP31fZx9hL2s/Vh9rP4NPuK/EH9yv1P/0cChQWTB9MHyQeICKkJvQrYClEJXgdvBiEGjgXaA5MBwP9B/j/9cPxT+wL6DfmP+FL44fiH+Wb5Qfm8+QH7tfx3/g8A8gD4AfEDIQbCB4kIvwiLCGwI4AiVCKsGXASHAs8AWf/S/b77tflA+ND3KPcs9iX1gfSG9Vr3UfkD+tr5rvrq/PT/PgKvA5UEfAU0B+0I8gkPChcKSgo+ChYKGgnJB2AGDwW/A+sB3/+s/d77n/p6+XD4WPcc9on1LPZD91/43fld+938hP6vAFoDdAXcBgUIkQigCOYI/wg2CBIHswXQAwMCfQBG/wX+ovyH+x/6a/jx9lr26/bU90r4sfcL90/34Phs+5T9Pv9tAJ4BjwPtBe4HPwkBCqQKiAvIC1oLpQp2CSUInwaXBDcCRQCd/jn94/sw+tb4lfcV94/3LPix+Fj5Qvpk+9P8nP6MANMBzQKtA2MEcgUwBlYGHgaFBWYFJAU9BBEDygGNAML/UP///Vf8bvpk+Z75+PnO+az4wfer9+z43fpO/GD9IP4a/6oAigIrBBQFwQW7BpcH0gePBx0HfwYvBhEGJwWNAxgCDQGYAE8AyP+f/vP8A/zw+1j8afwA/HH7Kfv7+zT9Wf4T/1v/f/8uAMwBPAPUA60DeQNqA34DugOWAzIDkALyAX4B9ABtAGX/EP4X/aD8ePxR/Bn8k/vd+pf6uvoI+4X7/vs2/Hf8zvxg/T7+JP8rAPgAZgHwAecCvQNcBM4ExwSWBFUESAQ3BMwD+gL5AUQByQBXAKT/3P5X/kv+ov7B/tX+1P4U/8f/iAAhASEBEQEhAWYBtgGyAWoB2gDGAMoA9QBWATkB7gBPANL/kf+R/3f/Bf+0/lD+F/6c/eD8T/wQ/EP8T/wn/Nb7s/v5+3f8Zf0W/lj+wP5f/zYA8QCJAfABIQJVAp4CpAJZAvEBcwEuAfgAqgA0APn/DgApAF8AgwCoAIwAVgB1AMAAOgGEAUwBnwAbAE8AvwAdARIBywCpAPUAuwEDAskBTgHpAOcA5gD1AMIAWgAfAPL/kf8s/+f+m/6U/pD+WP7a/Wv9T/1f/ZX9oP19/TP9Mv2C/aD9jP1o/YT91/0w/nn+uf4D/y//gP/P/xsAXgCRAPMAOAFuAZYBrAG9Ac8B4wHQAa4BggFiASAB4wDlANAArwCUAH8AVQBgAN8AWQFfAQoBvACFALAAHwEyAf4A1wDwANgA0wARAU0BfAFQAScB4gC1ALwAdwAFAID/Hv+V/hD+lf0U/bP8OvwQ/PH74PsI/EP8mfzy/HD99f1u/rP+Av9x/8n/UADGAP0A9wDHAMgA6QD5AB4BUwFAAQoB4wDKANgAEAFpAXEB9wCKAFIAMgBOAGMALQD2/9f/+f9hAOoAkwHzAfkBBAI9Ao8CyAK9AnsCKgLGAXkBIQG3AFwA7v92//X+nv50/mz+d/5T/hL+6P0m/rT+IP9i/1j/C//R/sz+9P7y/sv+p/6D/m7+jP7H/uz+Gv9X/5H/rf/D/9n/6f8HAB0ABgDJ/6n/k/90/4H/rP+r/3//gf/B/w8AaADKABwBawGgAY4BiwHLAQ4CBwK+AY0BWAE5AWABewFdASoBGQEfAUUBcwFqASIB2gDEAMAAuwCgAFkA9f+c/1D/Jv8e/yn/Jv8E//z+Bv8S/yv/Qf8p/+n+0P4J/1b/Vf/+/of+U/6z/jj/Xv8u/+b+3v4p/4//xP+K/yD/6v71/ib/ef+3/8D/qf+N/7X/BQA/AGcAcACAAJYAtgDyADEBWQFmAWYBbwGYAcYB5gHsAckBmAF4AX4BpQHLAcEBdgEfAfgAAQELAewAkAAUAMb/tv+p/4z/aP81/wj/E/9M/3X/bf9p/3n/m//A/77/kf9T/yv/Fv/p/p7+S/4j/iP+NP47/i/+P/5t/sr+Mf+G/7z/v//B/9T//v8RAPj/yf+e/6f/1/8AAAAA+P8hAGkAuADtAAUBIwFlAbEBxAGcAWoBVgFZAVgBMgHlAJ0AiQCkALwAzwDhAOQA7QABARIBCQH0ANsAqwB1ADwA9P+q/3P/VP82/yP/Fv/+/ur+9f4c/0T/Xv9s/3L/h/+f/5n/cv8//wH/vv6Q/oD+dv5u/mn+Zv5//sL+Hf9w/7T/9P83AG8AiwCJAHgAbABwAG0ARAADAOH/6f8AABkAOABdAIkAwwAFAUIBggGyAbEBhAFOASkBCgHqALoAcQAXAM3/rP+s/8D/2//z//r/BQAnAFwAkQCkAJMAbwBZAFcARgASAMb/hf9d/0r/Qv8y/xr/Ev8m/z7/SP9J/0f/Tv9h/3b/dv9c/zL/Ef/8/vn+A/8P/xL/FP8i/0L/ff+8/+//EwA5AG8AngDAAMgAwAC4AK4AoQCHAGoAUgBHAEsAXQB9AKUAzQDwABIBMAE8ASYB7QCkAGkASQAxAAUAwf98/1//df+l/8//3//s/woAOgBlAHoAfgB0AGgAUgA0ABQABQACAPT/x/+E/0n/G/8G/wr/Ff8X/w3//f71/gj/Kv9L/1z/Xf92/5H/rf/B/7L/m/+C/43/w//2/xUAKABMAIYAvwDkAAgBGgEQAfcAywCvAIYAWwAvAPr/0P+6/9L/3//0/wMADAAmAD4AZAB9AIAAgAB9AHQArwDUAP8AbgKYA+0CtwEHANH9u/zk/KL9av8zAT4B7/8d/k38wvvd/PD+SgGxAoMCGQEP/2L94vyg/Uf/DgEZAhECHwHh//v+1f5s/08AQAHMAYUB2AD1/0H/nf+KADYBkgEDAY//Sf6F/Zr9wP4RAOsAIQGAAID/2P7H/kn/IgDnAEQBMgHJAC0A8P84AJUAFgGNAYQBRgHhAFcAOgBJAD8AcgCFAD4A/f+p/1r/Xf+M/9L/RgD3AIQBbAHUAPD/1/4f/sH9m/0E/rj+IP8x/+/+e/5L/nz+Cf/2//AAfQG7AboBSQG5AHMAZgDCAGcBsgF9Ae8AGgBL/9D+uP4D/3f/uf+T/x3/jf5U/qL+KP/J/2MAggAgAIr/BP9R/+8A5gIDBPcDsgK+ADr/sf4g/zkAQwFQARUANf5q/EP7h/tP/XP/GQGzAQYB1//x/rz+pf9LAfoCQwROBCkD/wEqAb0AGwHNASoCKQLqATcBMgB2/0b/gv/9/yEAkv+Y/mX9lPxz/H/8xvwt/fb8j/x1/Fj8tPzd/eL+XP+l/4X/Ff/6/i//gP8iANAAJAEnAfQA4gAhAaIBlwJ8A70D1gOlA+QCagJfAmkC1gJRAzIDjgJ7AU4Auf+r/8f//P/i/1b/tv4b/qT9qv0N/n/+F/+u/87/mv+W/+n/iQBQAd8BHwIKAqEBPgEJAeAAzQB8AMf/Lf+k/vf9cf37/F783/uF+zD7Dvs0+4j7A/yh/DL9nf0I/oX+J//q/7UAjAFDAqcC+gJ7A+0DQAScBK8EbQQGBEEDRQKJATMBAgHXAI4A0P/P/gX+mf2e/Rr+jv7O/v/+JP9o/7f/GwC6AGgBHQKrAvUCDQMUAw0D6AKwAo8ChQJVAgoCmgHbAAYAB//q/dP82ftH++z6l/oC+k35yvjH+LD5IvtM/OH8S/3//e/+SwDEAdkCnANJBMkEyATGBA4FbwXLBRgG3gWgBB0DCwJEAboAXQDm/wX/Ev5U/WP8gft4+yL83vzR/Yj+k/6J/sD+gv/HADMCeAMTBO8DawPzAqUCpQIiA1UD3AILAuAALv9X/fD7mfqK+Qb5svgT+Q76z/qs+jj6V/oW+9j8Iv8zAbMCyAPNBI8FCwZpBt4GZQcVCJUI2gdYBt4EeQNHAksBMgDJ/pH9f/xN+zb6Xvn6+An5l/lz+k77CPyp/G79Kf46/+gAkALtA9MEQQVFBSkFMgU5BQwFhQTtAygD2QGJAC7/ff0D/K36PvnT9yX31/f6+Jv5wfn4+aH6F/yR/v4AwQJCBLkFwAYSB2gH3Ac6CNMITAm5CPoGOgXOA2QCPAE7AB//1P3N/BX87vrZ+Xn5cvmI+eD5UPq3+m/7c/yY/eL+igBKApwDkQQMBR0FLQV/BcsFWAVPBDAD5AFpABT/nf2r+7n5Sfjx9rz1KPaE9zT4ofhB+Q/68Pq5/Fz/mwGtA8wFpgeMCNIIYgmqCQEK4QoyC/sJDAhMBiQEHAKvAIP/QP4I/T/8/fpp+Wb4MPjL+Mj52Ppn+7X7XfyC/QL/eABKAv8DGgWxBdUFvgWPBeAFCQaRBYoExQLAAK3+8/yV+zn6qfgN98/1kPSO9NX1w/Zs9x34TPm/+tr8q/8EAhUE7AWBB1wImwhQCSAKuQoeCw4LrwlxB8kFJgRrAvoAsv9C/p38VPsS+sf4F/hs+GP5KfrR+iz7c/tQ/Nv93P/kARkE8QUBB6EHrQeFB5sH8AfqBx0HnQVtAzYBMv9k/Zv7ofm+98H10/Nm8nPyxfP+9Ov1nfbO95v5AfwK//UBrgQAB88IuAn3CXIK6wqMCw8M7QukClcIPgYJBMgBEAB1/tn8TPu++Tj46fYX9of22/cL+SL6uvof+0H8DP73/9oB/wP/BX4HPwhOCCkICAhnCKII5AcWBpQDSAEd/1D9wPvL+fL3NfbF9AP0Q/SE9e728vdw+Gz5QvuE/XcAiQMaBv4HCAlRCQIJmwiNCJ8IZQjGB3gGSwT5ARoArP67/UP91/z9+9D6zPlb+a35xfp8/Cb+MP+p/xAAlgBvAfsCjwSyBUMG3wXUBIcDowKBAqYCfQKwATEAFP5E/Aj7H/qO+fX4Tvgs9xT2/fX19iP5s/uS/Zf+cP+9AGcCXARfBjIIiAn/CbEJhwjcBrEF/wRcBG0DDAIxAAj+Uvwz+4b6Yfqi+gX7R/ts+5n7gPxR/n0ArQI9BP0ELwUBBfQEPwW9BRQG6AUTBbcDCQJoAGD/EP/v/kv+MP3V+3/6uPlf+Vn5i/ml+Vb5ofgU+Dv4avmX+xb+hf/W/10AWAGwAlYEKQbrB98IAwljCNAG1gRvA7MC8gEeAdL/5f3p+6/6j/rR+mT7IPyU/OX8Uv1a/sb/cQGVA34FtQZFBzEHrQZrBpQGjwYiBgkFUQNJAVv/C/5b/e/8mPw8/Hz7ifrQ+XL5cPm6+XD6Hvsv+8n61vk0+b/5gfs7/gMAZQCCAMQAgQG9AmgE5wXrBrMHkwcvBvQDGgJZAQ8BGwG4ADz/Sf3x+6T71vuH/I79X/7H/jr/EwDrAAsCzQNvBTQGNQa3Be0ENwSmAygDbQKhAQkBNwAx/yj+3/0c/nH+A/8W/5/+Ev7c/af9X/2n/f/9+v0J/VP73vne+Hr5lPsO/S794/z9/Hz9yf6vAMMClQT4BXcGawVPA2MBFQFvAf8BIQLkADH/4P2T/bj9gP7V/+gArwHLAbABnQH4AVgDmwT3BG4EYAMVAqkAt/8F/6P+tf7w/tv+V/7x/QP+uf4XAJoBbgKSAlUCcwFBAJn/yv8qAMj/TP7f+0/5l/fV99f5lfsW/L77Xfu5+/b8Df+xAR4EDAbRBiAGRAR7AhwCbwIZAzkDMQJ3ALD+3v3E/XT+xP8bAfkB7AGkAU0BXQE+AjoDygNsAz4CjwDo/tX9V/1n/eb9hf6m/o/+Vv6Q/rL/RgEBA7EDpAMEA7IBjwAAAHUAuQA1AHz+fvu3+Or2Qvco+fL6o/tP+//6Zvu9/N3+tgF7BJMGSgdQBkoEXgInArkCcQO6A6EC9ABf/8j+EP8FAGoBegILA6UCGwKqAZwBkQKQAwsEYQOQAWX/qv3m/Mj8Gv2l/eT9lv1M/Tr9w/04/x0B9gKBA+YC3gGOAHz/Kf+w/9T/Hf9A/VH62PeZ9mH3rPmN+zv8KfwY/MT8Gf4nACMD3AXdB00I9wa9BJ0CNwLBAl8DagM7AlwAgv7P/fz97f5fAKUBUQIQAsgBfwGtAdICHQTHBBkEZQJcAND+If44/pr+8/5F/+7+hv5n/g3/gQARAoEDkQOeAi4BkP9H/pD9/f37/Qn9+/oh+Pz1EvVx9vX4xfqa+xT8xvy5/TL/cQFgBAgH8QgRCVcHqwRSAtkBLQKFAkUC7gAn/6T9SP2K/Yn+KwB9AVkCFgKYAWUBugFNA88EZAWBBJ4CfADE/kb+kf5S/wkAVwDN//j+r/5c/wgB5wJnBFEEDQNIAWP/9/0m/aL9xv3M/Mj64vd79W70/vXU+O765/td/P785f1z/8wB2QSgB60J9gkbCCwFcQKLAc4BXQJUAg0BDv8l/Ub8RvxL/R7/4gDfAZgB3wBaALQANwL5AyUF2gRQAy0BRv9r/r3+1f8NAdcBOQEYAEf/Z//sAMwCXgRPBPEC8QCt/t38BPyy/ET9CP1s+5T40vU19HD1UPjn+kv84fxP/er9Iv8uAV4EcQcRCtcKDgnbBZECPwErAdkBWQKSAcj/kf1U/Nj7u/y3/rgAKQL2AUUBbwAwAF8BIQPFBAAF8APcAa//ev5O/jj/egBrAUEBTQAp/83+y/9oASoDrgP1AkgB6v7i/LX7UfxC/X/9m/wP+nL3yPVk9q/4PPso/Tz+2v7z/mr/rABAA10GQwl2CgEJ/QVrAnQA0P97AIoBfAGNAIj+/Pz8+0j8Jf5OAGEC8QKCAnkBqgAhAWMCLAQPBaIEDwP3ADD/Kf5u/p3//gB6ARwBNgBY/1f/QgDTAY4CYgI3AQ7/1fxa+6f7bfzx/JT86vqo+Kj2i/bs9xP6Dvyq/bz+C/9j/xsAOQIKBQsI1AlICQUHnAM2Aen/5f/bAFgBKgHe/13+AP2P/I79Yv+ZAc8CEwN5ApUBWwHkATsDcgTgBBUEdgKYABD/h/78/igAzwD3AIIAoP83/1X/NgC5AOUAjgA6/5L9FPzn+zr8s/zx/Cz8wPoL+Y/4Jvlt+hL8i/2a/gj/bP/d/yABNgOgBXUHlQc+BrcDYwHQ/2b/IADXAGQBBgEoABf/Qf5k/j//6QA9AgwDGQNwAgAC9AGbAnUDIwQkBHYDNwLGALf/Tf+7/zEAtQDAAGwAFQC0/+r/wf+O/zn/Yv5e/Uj8Hvwm/Dj8O/yp++H69fnU+Vj6J/sK/Pf87f2W/iL/rP/gAIgCgAT+BS4GNgU3A4EBNgDK/zsAyQBfAVMBCAE/AGz/RP+e/6oAqAF+AsICZwIDAsUB/wFqAsoCzgJ1As0B9gAPAHP/gf+z/ycAhADSAP8A3gDUAF0A3P87/3n+x/0l/Sv9Rv1Q/QL9QPxN+5j6wvpH+wH8nPw9/cz9F/5y/qb+j/8LAe4CiQTiBEgEpwItASEAtv8kAMIAqQEQAi8C3wEnAc0AuABBAdkBawLHApsCWwLrAZkBTQEjATkBOgEvAfMAjQAyAPL/2////2IADwGdAeMB1gEoAT4AL/9A/of9JP1j/Z39sP1H/XX8gvvJ+sH6Fvu5+y38nvzp/Pn8FP0Z/e39OP//AKgCTwMwAwkCCgFRABwAtgB0AYECNwPRAxUEyAN5A/wCvAJ6Am0ChQJJAgECdAHaADcAr/92/1v/h//W/xoAWwB3AG0AbQCrAEEB5gF4ArYCQAJIAQMA0f7a/WH9gP3N/QH+wP0y/Vn8kPsk+wj7Qftk+7L76Pv++/T71/tJ/Bz9m/41AFUB4QGcAUwB8gAAAZEBUQJvA1YENAW6BccFiwXQBC0EcQPoAowCGQK2AQcBbgC5/xP/sP59/tP+Tv8EAKgA/wAYAewA+AApAZ4BKwKBAmgCqgGhAGz/VP6p/Xz9uf3z/fD9i/3D/AT8U/vg+qf6nPry+iP7RvsB+6P6svpA+7T8Nv62/6kAAgE/ASoBhQEAAtgCAQQdBWwGJAd6Bx0HHQYzBRwEiwP8AnIC9QEhAX4AhP+p/v79oP0J/qT+kv9PALQA4gDJAPEAOwHWAZsCJgNYA8gCvgFlACn/cf4q/lf+dP5n/vX9F/0l/Az7Uvrn+df5D/oK+gL6nvlJ+WD56vlA+8f8Zf6v/2YA/QBOAc4BaQIrAz0ESgWYBqAHMQggCF0HbAZZBY8E7gNbA80CDAI/ATIAH/88/q/9zf1P/g//uf8MACkABwD//zIAuwCZAWIC2QKnAvkB+wD7/0H/0P7D/tD+1P5y/pn9mfx1+6v6CPqp+Yr5YfmA+WD5Uflz+ev5HPt0/BT+Zv9NABIBfQEbApsCQwMjBAAFTAZnBz0IXQi/B9UGrQXQBA8EXwOxAt0BCQEHAAD/Kv6Y/Z/9D/6+/m7/xv/l/8H/oP/C/zEABgHgAYQCqgI5An8BkwDm/3z/Vv9e/0T/3/7+/d78qPu1+hX6rvmE+VH5WPlc+W/5svkV+g37P/zF/TL/OAAOAX8BCQKHAhQD1AOiBM0F8wbvB2EIFwhaB0EGOQVOBIsD4gI9Ao8BrQCg/4j+vv1u/bX9VP72/mb/cf9L/wP/7v5B//r/FAEQArUCvQIrAlQBcADd/5v/jP95/wr/Mf4H/bv7pPrQ+VL5Mfkv+Wr5l/m5+fn5WPo3+138wP0j/y4ACAGKAf0BbwLnApADSgRNBWUGdwcyCE8I7QcGBwoGBQUaBEgDdAK8AeUADQAh/1L+z/2v/QL+eP74/iz/If/j/qT+w/41/ygAMgEaApYCagLbAfQAOgDI/6X/rP9e/6T+Wv3f+5L6qPk8+RP5J/lQ+YP5tvne+S/6vPrD+yj9p/7u/78AUAGuAScCxAJ7A1sERQVjBm8HMQiACDgIkwenBr0F3QTyAwcDBQIXAS8AYP+8/jb+8v3t/SL+df6q/r3+of57/oP+0v6E/2wASQHsAQ8CywE0AY0AJQDn/93/oP/5/ur9f/xC+1n60fmn+Y/5ofnB+dn5/fkK+mH6HvtJ/MT9Bv/+/50AIAG9AWwCPwMTBAkFJgY+ByEIgwhgCNQH/wYnBkEFWQRlA1oCZgFrAJr/8/5p/if+9v0W/kr+ef6h/of+g/50/qr+O//8//gAsgEnAiMCsgEyAb8AmgCNAGQA8v/8/qb9OvwK+0364vm4+bL5o/mb+YT5ePmR+fP56/o+/LD93P6k/z8AvABxAUwCUQOABL0FFwceCKwIpQgrCIIHswb1BSAFQQQ8AykCLgExAHb/2v6Q/m7+Vv5q/lz+Yf5L/kL+UP5d/r/+Wf9GACoBxQEMAs8BdQEdAfcA8gDDAGwAqf91/hX9yfvu+mf6HvoP+un5wvmK+VD5NPkz+dj5B/t8/Nj9xv57//L/nACUAbMCBQRYBcsG+QemCM0IeQjzBzoHhga/Bc4EzAOxArMBrQDP/zv/2P6x/nz+bP5d/kb+NP7//eb92/0h/r7+lv+NAFAB3wH1AcEBgAFPAVkBUQEmAasAlv8//s/8qfsF+5X6lvp5+jb6+Pl4+TH52Pj0+Mz5//qa/NT9wv5l/9//vQDCASEDmwQTBnUHVgjOCLQIVAjNBycHiwa5BeAEzAOZAncBXwCd/wv/wP6V/mr+Wf5A/if+6P2j/XX9fP3d/Zj+mP+MAEQBlwGXAWsBSgFgAYkBmAFQAZMAYf/t/Yv8lvsf+wT7AfvG+mP6z/k4+cL4ePjH+J/5CPuK/KX9cv7q/qf/twAhAsoDRQWuBscHjwjsCMQIcAjfBz8HnQbHBeMErANwAlkBXwDV/2P/Nv///qv+hv5A/iT+3f2T/Xv9c/3y/aT+lP90AO0ATAFXAW4BjAGnAdoBuQFeAZoAXf/0/Y/8uftv+1v7SPvn+kr6h/nJ+Fn4Kfhe+DH5c/rg+/X8rv1Z/iH/aQAKAtUDeAW2BsMHeQjbCN4IhggOCHgH2gYoBj0FDwS8ApIBuwAyAN7/mf9G/+P+g/47/vr9pP1d/Uf9gP0C/qz+cP8KAHYAwgD+AEYBdgG5AeMBxgFKAVUAJf/L/bT8HPzv++T7ofss+2j6kvnY+G74aPii+Fj5UPpi+0v89PzE/cH+OgAHAssDWgVeBjgH4gdLCIAISggACIYH7gZJBk4FNQT7AvcBUQHRAHQA+v9p/9v+V/4D/rn9b/0//Tb9fv31/YP+Hv+Z/wEAUwCeAPMARwGiAc8BrgEvAWAAa/9n/o/9/Py5/Jj8UvzE++X6/PlB+eH43fgF+XL5A/q6+nH7HPz9/Bf+pP9uAScDqASiBWsGEwehBx8IOwgjCLsHHgd6BpsFuASsA7ACBAJ0AQ8BegDE/xj/bP4R/s79q/2b/Yb9xP0S/nn+2v4Z/3z/uf8PAHUAywA/AW4BfgEzAYAA0/8I/4P+DP6f/V/9w/wx/GD7ffrX+Tz5RPlQ+XX5qfnA+Vz6+voT/Gb9vf50AOMBeQOtBIMFXwbsBsEHOAhVCCsIgQcFB1QGtAX5BOYDEAM1Aq8BJQFTAIr/o/4v/gP+9v0F/uH9/P07/pb+8P76/gX/Ff9d/+P/XADSABEBJgEcAeQAmABRAAUAtP80/1f+f/2I/ND7NPtn+uX5OvkO+Rf5+vgL+d/4avmA+uH7gP2b/ub/KgGcAjQEOgU9BuAGqgdwCJgIiAjUB1IH1gYyBowFVgReA4AC5gFrAXwAov/E/lX+TP5M/m3+Yf5m/oH+kP6T/mT+VP55/uf+c////3YAvAD+AB8BUwF4AYEBfgEMAT0ACP+l/bj89Ptx++/6GPqK+dP4i/h1+DH4YfiQ+Jr58Pot/Iz9Zf6//y8BzQKdBKAFtgZOB/gHpQiWCJII6gdnB+8GIwaBBVIEVgN1AqgBOQF5APn/Z//7/u3+v/7g/rT+Zf4j/sD9yf3O/ff9Tf6c/jX/0/+EAEMByAFIApkCwQKrAgcCLQECAM7+vf2u/Bn8Vvuf+uD5Bvmu+DP4Kvg2+Cr4f/jJ+OL5JPts/O79If/OAFUC6gN2BUsGHweOByEIogiHCFcIoAf/BlwGhAXSBLwD2QIXAn8BQgG8AFEAy/9E/wP/oP5z/iP+t/2I/WP9jP22/eH9Vv70/uL/9gDtAcQCOgNnA1sD8AJgAogBsgDu/+H+5v2m/MH7L/ug+oH6zfk6+XX4q/eY9zf3lPcR+A75w/oP/M39EP9zABsCaAMnBRgG+AaRB9EHZQgyCBsIiQfPBmMGigUyBW8EvANCA48CXQKbAdwADQAT/7/+N/4Q/sv9RP0l/eH8K/2J/fz9w/5j/1cAOAESAr4C9gIHA9YCnQJOAtMBSAGSAO//Nf9z/qH9pfzl+/b6Ifok+QT4Vfei9qn20fYx9xL49Pic+jT8y/1f/4oAGgJtA8YE3gVXBvMGLAenB/sH3gfIBy0H6QaTBhsGqgWuBOkDAAMrAnkBbgCe/6v+A/6u/Vf9Xv04/TX9Sf1S/br9Kf7W/qX/XgAlAckBVwLLAugC6QLEApsClQJjAi0CowHSAPj/xv6y/WL8Dvv3+bD45PcA92D2GfbM9T72vfa49/r4Nfrr+0r9yP4IAP8AOgItA4sEvwW7BqkHBgiSCLMItgijCBoIxwcEB0UGXgUPBO8CnQG7ABUAcf/8/kv+vv1V/RD9JP0l/T79Zf2l/Tv+0v6L/z8A6ACfAU0C+QKLA84D3AOyA1cD7AJTAsgBLwGCAMn/2v69/U/8vfpM+RL4IveB9v71zvW69ez1i/ZG92f4ifnT+k78o/0A/zYAUwGKAqUD6wQpBi0HEAiMCPkIFQnkCIII1AccBzIGNgU1BBkD/gH8ACUAg//q/nH+FP7U/cD9uv3y/ST+aP6//hP/m/8QAKAASQHYAXwC8wJcA7MDugPJA5ADWAMCA3ACAAI5AX4Apf+b/qz9Ufz/+qT5Rvho95b2PfYW9gD2bPbP9o33Sfjw+Oz5v/ry+yH9Pf6F/30AxAH3Ai4EhgWBBpkHRgiwCMcIQgimB6oGxQXmBOEDDAMOAmkBBwHCAMsAlQB0AEIA+P/v/7D/p/+X/5H/x//a/zIAjQANAcUBYQIJA3oDtQPeA7MDfgMXA5ACFwJ5AewARACj/xX/jv4W/n39t/y++5X6cvle+Ib3A/ey9tL2+fZP96r37/d0+Pz41fnN+tv78/zx/f/++f8QAS8CVQOBBG8FLAaPBpgGaQb/BZYFLgXSBIQENgT0A58DWAP1AoQCGwKcAWQBPAFLAYsBtwEMAjECWwJ7AnoCmAKJApcClwKYArwCsAK+ApUCWgIqAsYBewHzAGYA7/+G/1r/JP/d/mn+zf0n/Wr8rvvX+gX6UvnI+IH4T/gu+BD49vfz9/v3BfgK+CD4W/jq+Mj55Pol/GD9qP7x/zsBeAJ7A0AEyAQgBWYFiwWgBZsFhgV0BVsFTwVKBUwFXgVwBYgFnQWdBY0FVgX/BJwEOQT1A8QDnwNwAzQD4wKOAjECzgFxARUB2gCpAIMATADv/5n/Qv8U/wH/6/7i/sz+2P7t/gH/Cf/Q/n3+6/0t/Ur8K/sm+h75VPi39yL3wfZU9gz22vWm9Zz1jvW89S/22vbn9xH5dfrq+1v94f4/AKEB6QIaBEYFPgYfB8oHRwitCPwIWwnFCSsKigqyCqYKZArwCWUJuwgPCGYHxgZFBroFMAWEBL0D9AIcAlUBewCq//D+Vf4C/sz9wP26/bj9y/3V/fT9BP4h/lH+ff7P/vr+JP8g/+D+iP7T/Qv95/uM+h75lvdo9lf1nvQh9MLztPPG8z706PSj9Xv2Tvdx+ML5KvuY/OX9Pv9zAJcBngKFA2wEKwX7BbsGcQcFCE4IjwikCMUI1wjZCPQI5AjnCMgIgggxCKEHMAfLBpgGjgZNBvkFcAX0BKEEMwThA5YDWwMeA5UC/AFRAcMAOQB//93+LP6X/Rz9kfwb/Kf7Z/s/+xv79PqZ+k768/ly+c/46/cD9yf2SvUn9PXyNvLV8aXxTPHa8Orwr/E/82X11fc0+nz8HP/YAXoE0waHCOQJAAuqC+8L1AuDCzUL3ApRCoUJkAiHB3oGkwWcBNgDkwODA6gD1AP6A1EE4wSdBT8GuwYQB1UHqQeVBwMHZAa5BVAFGwWuBLMDNQK6AKj/FP9s/lf9QPxe+736evo4+hr6avrv+nD7nvt4+zf7Dfvm+pP6P/rN+VT57Pgc+Of2hPUk9LnyHfGy72/um+097UDtMe4a77vxEvaz+ar9LgFXBMcHIwtqDukQnRKWE/gTuBOtEi8RkA/+DYoMDAtRCVQHZwXVA48CmwEhATABvAF6AlUDSgQxBTEGIQe3B+gH2AeTB90GsgUeBEUCjwAX/7z9S/yY+gn5J/ji9yr4d/iM+PP4tPm/+sX7xPzJ/dr+9f+TAHoA6/9K/77+Ef7y/Fn7kfns91j2zfTV8vLwbO/t7dfsOewS7PPr4eyj7oDwLfPm9qr7sACdBe0Jkg27EJcTIxaUF/EXfBcpFiAUohEYD7sMkgrdCC0HUQWBAyMCqwGAAbIBQQI1A4UEwAX/BtMHQQilCKIIDwiiBpcEYQLV/1f9uvoj+Bv2jPTu87rzqPPZ85P0L/bH95b5vvvh/R8A+QFqA0gEnATaBKgE4gOWAv8AXP93/Vb7a/ms9w/25/Qe9Arz1/EK8WDw1e+H7+vvkPCp8XfySvNS9af3E/v9/ZIAVwTLCIINzRC3Eh4UPhUJFlYWIxYpFWsTnxFeD8UMoQoCCQUI4gagBYsEiwMlA0wDiAOqA8QDFAQwBMUDFwMrAjQBHgCN/nn8/vmT9731I/S28kjxlPAn8X7yjvSG9l/4HfrV+/X9EACuAmAFogYGB0YGPgX3BIUEJARzAxkC+P9R/ZD6S/g796/2Lvar9d30J/S283vzuPMh9GP0m/QL9TD1fvWo9hj4gvr1/PT+lwFiBGUImgwnEN0SLxSGFd8WFBjvGL0YpBfGFSUTPhAwDWcKxQdNBfMCPAAO/pn8yft4+zT7U/uq+xP8c/xs/Cz81PuC+/r6yvkr+J32P/Vh9OzzkvN78+LzQ/UC9734nvq0/Bz/UAE/A90EgAZjCEAJRAnKCNcHdwe6BhMFRwM8AUP/Qf03+zr5hfdC9gf1PvTg89zzS/QF9a31KPbn9u/3H/lE+gf7dftM/Pb9wP+MAbEDiQW5CAUN7BAUFBkV3hVGF2oYmhh6F6MVyRJvD7cLJAe3AkT/vvxr+qj3c/Vy9Iz0ZvWi9v73Ffk++oH7BPwI/Bn8Wfxc/Gn70vkQ+L/2HfYH9s/1cPXb9Wf3R/n1+rL8cv7AAE8D1AXGB9YI9gpNDc8N3QzSCnYJ0wivB8sFvAJl/0r88vm/9031C/Tg8/jzJ/Th9Az2dPdr+fH6Efzv/LL9F/9t//H+C/6Z/Xv+dv8bAX8BIwJDBUkJEg7BEIgRiBJCE2MUrBRxE0ERgw6uCzoHMALX/Vn6gfh99kb0nvKr8RDyDPM59JL1XPe8+Zz7jfzt/CD9yv2C/kT+O/3Y+6/6efp7+l76NPpj+jL7Xvwo/sn/4QE3BKQGSQm6CvsLbA0ZD+IP0g7fDD8K2we/BU4DOwBs/Pz4b/Zk9BvzovJH80z0dPVA9+X4nPpi/Hn+HABtAGUA5f88/yj+8vyt/GT8LPx1/Aj9w/2S/7QDkgglDPQNfQ4nDyEQ7xBLEbEPYAzBCNAEkwB+/H/5S/f49OPyePED8U3xh/Ki9G32R/it+k39Qv9TAJUBrQJNA4EDBgPxAY4A3f/P/yr/AP42/S/99f11/0MBtALnAz8F4AY5CVoL2QxIDvYNQgxtCiQIWgYIBSADNwD7/LL5r/Yd9Y/08vQi9uT2qvec+Kr5I/vz/LD+jf+y/x3/F/7m/FP7g/qU+bj4vPhZ+BH5o/kC+77+sgIuB+8J2wobDKANew9LEGEPKw3/CYoGdgKr/qT7vviU9mn0z/J48gTzhfQv9mX4CPuz/bYAFgPOBGYGuQfaCCYJ/wc1BnoE8QLMASAAFP4g/On6F/si/P39ef/cADECVQMdBacH3wlUC4MMswsFCkwIgAa3BacEVQJp/zf8Pvkv9/X1dPUN9Xn1Gfb19jj4Kfnc+kL8TP0W/sD99fzm+9z61Pkw+Mr2UvYt9s/2R/fi9536mf6YAwEHPgitCYQLEw6bD5oPUw7OC3QJgAbqAtz/Yf2Q+875efjp9z74dPnt+sP8vv4NAdYDKgYEB4YHVAifCJ4IYQcIBbECZgAc/5/9gvvq+dL4+fim+Qv72fxj/jwAyQFDAzgF0gfmCQELpAuKCrgImwc8BhUFrgOxAHz9lPoC+Kz2xfUE9ez0q/V89uP2Cvhe+cL6Tfzf/Ov88PvR+i/6+vjA93H2svVx9Tj1nfWd9r744Pu8/8ADMAb9B3gKXw3yDzgRUxHjD/QN/AukCVkHhAREAmIAL/7d/B38Qvz+/Lr95v7j//AAfAKsAyUEgASzBJUEqgMHAm8AzP7D/Qn94/t5+qf5wPkz+mb73fxD/of/ZgBNAVQCjgPeBZ0HoAeAB2QGUgUsBeoEJQTBAuYA/P47/Wv7Mvpe+fn4mfhg+F74Dvi/+Lb5GvpZ+uj5ePmr+MX3PPf29T31g/Qz9NX0cvX19gv5J/zV/10DngbaCEILOA63EIMStxLhEasQpQ7iDPwK+AgSB68EVAJUANj+N/5i/kv+G/5N/uP+2/9XAIgAdwBYAGEABQAh/779mfyq+/T6Xfp++TT5RfnW+f/6XfzY/QX/BQC3AJgB5QJSBF8FvgX4BekFoAW6Ba0FZgX+BAoEiwIMARoA3P5o/Vb8UvuV+uv5Fvmd+Jn4yPgK+Q75FfgT9wf3rPZW9hr2q/Wz9UH2Wvd3+Lv5yfuw/g0CtwSXBoMImQryDC0PihCSEAMQUw9CDjENvwuGCl8JbQe3BT8E1AK+AfkAIgAL/2H+4P17/fr8TPzV+yX7bvrB+Q/5cvgX+P/34/f190348vgt+rf7fP0+/5AAlQEXApsCdgNjBEEFegU8BTEFAQUQBW8FXwXmBAMEKQN5At8BVQFnAFv/of4u/sr9Ff1U/PL7wPt7+9L6IPpG+XX4ffg0+Nz38PfW98734vcM+I34uPkS+7T8GP4u/5AANgJtBEwGJQhCCakJgAqDCpkKvQqRCp4KJAqgCQAJcAgeCHsHDAf8BX0EkQMiAgYB+P+A/oD9XvyN+5v6hfma+I73Eveg9mX2dPaT9mn3jvgE+sL7E/1k/pv/nwDuAdsCfgPOA/EDNgSFBEwF8wV5Bq0Goga1BqUGqAb6BfsE4wOOArUBsACO/0r+Cv0d/PX6/vks+SD4//YS9kj1ffQ99Fr0aPTn9HH1EPaI9yn5vPoo/Fn9ov53ALQCsgSKBr4H+giaCrULqwxCDU4NNA3zDHsMmAuDCoAJcAg0B8oFuwThA8QC1AGaAC7/JP4l/Rv8uPpO+QX4EfeR9hn29/UZ9sH2BPg6+Zz6rPuU/JH9XP5g/wsAxQBhAaIBXAIhAxgEDQXTBV4GqwYdByQH+AaDBm8FlgTxA0MDmAKYAXMAOv/u/eP8wftM+pf43vZv9WD0rPMj8+TyJPOr83D0RvUh9qf3LPlI+ob7+fzp/isBhQOQBXoHzwkUDAIOGA+oDzIQVhBBEHsP/g1oDLcKUQncBzQG1QR4AzIC2gCE/0T+vvxz+yn66PjP98r2Ofa99ZL1Ava+9qr3p/i0+cf6o/uH/H39OP4R/7f/MgAUAegB+QIoBBoFBwaTBh4HNgcQByUHdwanBYcEFwNNAn4BpgCd/w7+l/wo++D5TfiL9hr1mfPP8kfyBvKH8try2/Mb9Q72rfcf+aH6Wvz3/ez/zAHbA7oFxQc5CjMMVA7QD8IQoRGeEX8RtRBYD/wNHwxWCoMI6AamBRUEiwLkAE//Fv7G/Gj7u/kz+D/3n/Y49rH1a/W49Tv2APfb94D4RflJ+lX7K/zH/MP90P7q/3gBzgIfBCcFBgbbBkEH1QfkB2sHtAakBewE+QP8AuwBdQBO/wf+2vyD+9j5dPjE9m71b/RS87byR/I/8pryI/ND9Hj1BPfN+Df6yvt7/X3/owFtAzgF+gY+CYYLJw3DDgsQSRFfEskSuRLgEdgQsA8BDjUMQgpdCFUGRgR0ArYAF/8k/Sb7afnL9532bPU/9IXzOfOe8x30u/TC9bT2xfe0+Kz5Cvtf/LT9mv5E/4YAHQLRA1MFSQZTBzgI0ggKCWgIvAfUBrcFjwQUAxMC4ACs/27+tvx/+wD6ivgZ9y314vOF8obxT/Ef8aDxN/L98mz0CvYv+Pv5mvuP/Y7/HAJSBGYGhQhdCsMMug5xEBQS/xLUEwMUvxMqExES7BAvD0MNVgs/CUoH8QTIArYAg/66/MT65/g895T1dvSI8+3yvPKh8gPzyvP99GH2cfeB+IX5mPrs+/f87P3r/vH/cQHeAj0EpwW5Bu0HvwgRCSQJmAjzB9oGgQVBBNkCogEtALz+ZP3s+6v6Dfle98b1KfT28rrxA/G18LbwWvEm8nnzOPVB92b5Kvsj/XT/5AE3BC0GGQgqCocM0w60ED4SkROmFCMVExWZFKsTaBKvELUOtQyuCrgIdAYXBMsBfv9F/fD6wvjI9gf1n/NE8mjxJfFU8e3xn/Kq8/H0Sfa39wD5Qfp6+6/8wP2U/pj/wAAqAp0D6gQlBgoH4weSCPMI8QhjCIcHggZ3BVQEEAO5AT4A2/5b/cL7Gvo3+Jb2wvQf88LxgPD375rv5u+38OXx/PMW9lj4rfqv/Dj/rQEgBKIGngjXCtcM8g77EI0SIxQGFYwVnxUkFXoUJRNrEWsPLg03CyIJ7QbCBJAChQBU/vz7mvlD9y31OvOK8V/wre+d7+3vpfDv8XrzTPXz9nD42fkl+3H8lP2j/qj/ygAEAnED4QQqBj8H1wdVCGYIFQiTB6gG0QXKBLkDpwKHAaAAff9c/hH9jfsB+gz4EPYr9GzyRvF78ALwG/B98JPxR/NN9bT36/lG/LX+IgHLAxYGdwjMCgQNfg9XEf8SVhQDFZUVWxXGFMETDxJgEEcOYAyFCncIrAanBOECAQHL/tb8iPpR+Dr28fMq8rLwwe9V71Pv5e/c8DTy6/Oa9Xn3PvmH+gv8GP1x/u7/+wCQArwDNAWqBnIHVgiICIcIQQg5B1IGAQW3A7YCegG4AMv/4f4E/uP87PuL+gz5W/d59erzZfJf8f3w1PBf8TzygPOC9Xv3yfn9+yH+qQACA6EFAwhJCsoM8w5LESoTgRSaFcAVmhW8FFATuRF6D2MNBwvICO0G6ARRA4wBwP8h/jb8h/qJ+HX2nPS58m7xcvDc7+7vTfBC8Xvy6vOn9Tz3zvg7+oH74Pw+/pj/DAGIAhUExQUcB1kIJAlXCUQJfwivB34GFAXEAw8C4gDK/7b+AP7Q/OD7zvqJ+Xj41/Zz9Qj0v/Ic8pTxzfFI8i3z1vSP9rj41frI/Or+8QBGA6EF8Ad2CtcMPA92EVoT9RT2FWcWKxYUFYcTdhH7DnsMxgk8B/EEyQIIAU7/sP06/Kj6Ifl69731M/S+8r3xC/HH8B7xzfHz8jb0lPXo9jv4c/mZ+r772vwl/l//4QBlAgsE2AVlB/gI5glgCkQKdAl1CAMHfQXhAzACxABj/z3+Gf3P+7P6Yvk/+Pz2YPUY9LDy7fGg8WnxC/LS8lf0Mfbd9wP63Psn/nMAQwJ0BD8GfAj/ChoNpw+oEZ8TWhUQFqYWIRYiFaETNxEKDyoMhAn5BisEPgIGAE/+t/y/+kb5T/em9S30nvK28crwePDH8FrxqvL083X1IPeD+Ab6KPsY/Pz8u/21/uj/UQH8ArsEZQYaCGIJWAq+Cl8KvAlvCP8GUAVWA7sB8v+m/nX9A/z1+lz5J/jg9jv1HfR68rrxPPH58LDxPfL/8/H16vdE+v37W/6IAG0CfQTvBQEIIQorDIsORRBHEv8TLBU0FhoWohVcFFUSPBCXDSYLkgjkBZUDSAFb/3n9cPuQ+ZX30PUX9GzyKfEn8NLv/e+x8AbymPN/9VX3EPm3+gv8Nv0e/tr+l/9tAGQBkQLYA00F1wYfCD4JxQm/CU0JVAglB7EFFQSDAtcAXP/8/Yr8TfvT+Vv43vYo9aPzC/LW8BPw3O9j8FXx+PIf9Yn3HPqR/Br/ewHBA/oFqwd0CVYLNw01D+4QrBIPFAcVyRWbFewUyRPdEaMPIA2nCiYImgVuAz4BTf+i/bn73Pnn9/71APT58YXwbO/m7grvmu/h8MTy9vQv9y35Hfvl/En+cv9XAAUB7wHjAukDCwUhBlIHOQjsCFwJIwmmCMIHfQYXBXYD9wF9ABn/+/2V/ET76PlP+Mr29vRZ88fxafC170jvre+i8ETydfTf9q35VPwN/8oBWwTEBtsI8goADfEO2xB6EvUTNhXxFTMWrhXIFF0TUxEKD2sM7gl3BxUF5wLKAAL/R/2B+475ifec9aDz4/FS8CPveu5Q7u3u/e+v8cPzy/UC+BL6F/zo/WD/ygDyAScDXwRxBaAGvAfJCJcJ/AkCCpUJ3wi+B08GwAQPA3QB3/9V/uj8dPss+tv4X/fX9Rz0j/Id8fDvUu8r78fv7/CC8m707PbO+Yr8Yv8fArQEOQeKCeILLQ50EJ4SShSlFcAWYRdtF5gWERU1E/AQfA6rC8MIGQa5A50Bbf+X/e37Efoz+EL2XPSS8g7xxO+Z7jHuk+5s76fwOPIq9DT2Nfgp+vz7pv03/6gA+AFPA9wEeAbdBxYJGArDCgULwwoPCvcIpAcLBjcERwJsAMr+Of2L+9j5XPje9lH1pvP08dHwOPDQ73fvLvCf8TDzVvQK9SP4evwO/9oAaQOTBgkK1gzGDh8RMxR8FhUXaRjkGWIZ4BdwFbwSgxBxDnULOQe2BAIDLQCZ/c37NPo8+Hf2p/Ru8kbxsfBr74/uve6r76zwxvEL80n0fPaR+Pr5zvt8/Qz/jwARAlUDxATfBj4IFwkeCuoKNAv+ClcKLgnbB2wGfQRCAkcAgv4x/GP5FvcM9QPz2vCL7vHsZuzS7KDt8O4v8eXzAPdJ+uv9kwFqBMUGsQiWCrAMow5+EMIR1BIUFBoVuxWYFfcUoxO+EcUPgg1kCzQJ1gaYBH4C3gBd/9z9Sfwz+lX4nva09Oryh/G28PbvvO808MHwlPGu8gr0U/Wj9l74//l3++j8jv6KAKgCuwQ7Bm0H1ggeCtgKzQqKCk4KjAlcCOYGkAU1BGwCawDn/TD7bvi09QjzffBx7oXsN+td637sAu5D8GnznvZj+dD88wCYBJkHNgpMDNYNrg+EEWAS6xJqE4UTLROjEuYR4hCrD+oN7wtdCkYJGAjDBqAFRgT4AuIBQAAT/vX76/mr9y31y/Ln8G3vNO6b7a3t5+1q7k3vnfCd8r30BveA+XH70/3QAK8DPQZMCAwK/gqICxIMKQwADFULIAqgCPwGigXVA88BYv97/Gv5Rvag84Dxlu/s7cfsmOz/7IPuVPFX9Fj3C/qU/MX/hAPTBgUJcQp+C1MMBQ2MDUQO/A5ID0QPBQ/lDtkO2Q6WDt8NIQ1hDMgLGQs2Cj8JCgiWBq8EYAL7/4/99Pod+BX1CPJT72Ptf+w17DfscOzK7NDtyO9G8vX00PeS+tf8NP8CAskETAf/CL0J9gnoCdMJVwmRCJgHaQYNBW8DCgLUAMT/fv58/Ob5g/fD9Xf0KPO68RjxYfE+8ovz0PUm+Rf8Pf4UADICogSWBrcHOwhsCHMIJQgNCIUIHwkZCvQKlQs9DBANVg5vD0wQtxCtEGwQXQ8PDrgMkgo5CI4FEQI9/pf6ePds9KPxHe/97J3rIOvv60ntq+5G8MzxzfN/9lH5HPw1/pX/vQD8ARMDwAN8BOgEjwTeAy8D+AJ2A9YD1wOHAywD7AKHAh4CFQFn/y/9vfqy+P72fPU/9MjzkPN889n0CvgR+938mv4cAe4DFAZOB7QHswelB2gHnwbgBSgGGwf2B7QI9QmBC+4MTg5mD3kQURGwEXQRAhD5Dd8LyQkdB6oD0v93+5X3p/RT8lDwnO6H7RTtg+0g7xbxw/Jy9D/2NPhD+mb8XP6q/zYAIQDu////KQBZACsAkf8A/9L+df+LAKgBjwIvA7YDpANFA+ECsgG9/0r9NPtm+UL3c/VC9CX0wfRT9iL5hftd/Z7/eQJ4BY4HlQgICfsIigisB84GLAa8BdMF0wU1BpoHKAmbCrwL1gw1DnwPdBCwEOYPQQ4ADHMJgQYeA5P/nPvb9+f0ZfKs8NzvE/DF8JTxPvMm9dT2OfiM+Qr7EPwQ/RD+f/6R/mX+Qv4W/sD9hv1e/Sj95vzM/Fj9dv7K/yMBSwIYA5YDlQPwAnsBQ/9+/MD53fca9qD03vMD9JH1UvjI+9j+HQFSAzwGJwnaCkoLDAv/CVcIngZNBboEkQSLBFkEYARWBTQHVwklC3EM1A1QD1kQcRDHD9wO5Ay6CQAG3gGv/b35SfZ08yLxuu9a7+bvbPGs84j2OPkq+538rv2u/jj/Pv8a/5X+x/12/Bn7ZvpF+qL6B/sC+wf7bvtt/BH+5/9cAU0CCgMmA1YC4wC5/kb8wfp3+Yj3KvXg85f0H/bW+Db8qf98AtgEhgdxCQgLUQy7DKsL/whEBgQEygLJAi4DyANdBIgF3AYDCNwJOwyKDhgQ/RDREJEPTQ62DCIKewY2Ahn+3Pkh9jXzP/FF8MLvwvDZ8p71oviI+0H+AwAjAVEBugADAEf/m/40/T/7Gvlx98P2pfaV97L4qfmY+mT7x/xp/k0ArgGJAgADRwLAAEj+p/ur+Uv4cPc99lv1dvXj9n75RfxM/z8CKAUHCFIK1wueDO8M+gt+CaEGbARoA0IDmgNHBDAFRAZbB3cIhwmCCw4O9A/aEGIQDg/vDG4K2QfcBIoB7v1X+qv2Y/Ni8dnwqvGB80f2F/l1+2X9m/71/1oBLwICApMAff5U/Ir6sPg993z28PUt9sn2ovfT+D/6+/u8/Vn/qACFAZMByABr/0n90vra+FX3SPbb9YD27PeG+Uv77/zf/oIBTQQRB6sJ/wo+C8sKNQkaBwkFiQMDAzIDygOpBNYF1wa1B+kIWAr6C+gN+Q8lEdcQRw8ZDb4KAQgzBTwC+f6Q+1v4wfWy8yHzJvRH9ur4OPs9/ZP+j/9RAIUAIgBi/6D+PP0m+wn5Yfdw9t/1k/Wb9Rv2Efdk+Jf5avqD+xL9c/4U/1L/z/5E/UD7i/m1+BX4hvfR9zD57PrD/Jv+7P8dAYwCcARvBgoIJgmDCYEIUAYfBIUCvAHvAfcCdQTQBQsHJAiMCA0JZAo9DOINCQ+0D3MP6A2QC0QJywYuBAYC2f8o/Zf6rPhB91X2y/Yb+Sj8u/5lAAcB+AB3AKD/u/70/S/9//si+g34jfbU9cn1GvZb9qv2Vvcb+I349/iO+V36Jvt++6H7Ivsg+tX4Wvcf96f3NPiJ+YP7lP1g/7kAtwGhAvMDfAVZB6UI9AgICY4HFgUKA7kBbgGzAQAD3wRlBrkH3wiMCeEJDQu7DCMO7w66DsgNwgsyCc4GkgS4Ah4BmP+4/Xz7nfmd+Ln4Hvqq/Fv/eAG9AvMCUgI2AR8AK/8+/j/97vtO+lz4jPZu9Sv1m/Vc9lX30PfC98f3tve797H3rPek9xP3LPZD9d/0HPUG9h/4GPtW/vAAeAJfA4YDvgNhBGYFegbqBhQHVgZ9BKYCzQEIAucClQSuBoAIggn6CXIKcArgChgMBA1fDewM+wtxCjMI+gUbBFICzQC6/53+cf1T/Lv7+vvt/Oz+WAE+A0UEjwQYBLwCWwH2/4f+K/21+2b6HPku+O33Jvg0+G34Gfl++XX5H/mh+NH35/Yv9kn1PfTu8s3xKPEx8Sfy3POz9r356vyb/0oBswKMA70E+gUyBz0IDghKB6AF0QPVAmcC2wIlBFkGaAjmCc4KKQuRC9sLwwzZDVgOOQ4uDVwLCgkIBzgFXwMXAvYAnf8D/r38EvzU+7P8fP52APAB+AKWAwsDKgJhAXQAS/8R/hv9ofsW+jD5+/g7+aD5vfqa+9r7x/s4+276FPkF+AH3ePXQ8/DxPPDp7qruwO/e8bD0yfeS+nn8N/64/yMBygKwBJwGkgeuB74GBgV+A3MCMQK/AmQEeQYSCBoJlwn5CVMKcAsoDa0O/Q+HEN4P7Q2dC3MJTQeMBf8DQwJCAGb+N/27/BT9dP5gAAECWwNBBDYEVwPZAUEAxv5k/Qz8jvrY+En3sPYN9/D3FflQ+rD7s/w//az9xP1n/YL8TvuW+Tr31PSu8hPx7e9s7wnwCfF08j/0nvXj9nL44frs/Y0AtAJWBFQFrgXeBegFlwWbBRsGyQaQB1oIMQm7CSgK6AreCy8Nug4fEOIQqRD8D8UO4gzKCqYIzwYCBRoDRwGO/zv+r/1t/gQAmwHuAsEDDASVA5gCeQHb//X9Mvxo+nD4w/ac9er0kvTL9MT1uvas96r4c/ny+Uj6rvpp+kb5nfee9aPzMfKH8VXxPvFi8WbyMPRf9rv4kvus/mQBGwR8Bj0IQwkGCtMKvgrQCaIIYAfHBYYE/wO1A/gDuATABZUGaAf7CJYKEQyEDW8Oag57DVgMsgqVCJMG+wTiAx0DzQKRAlECbQIAA9oDrgRXBWsFzwS3Az8ClQCy/sb87Prq+M/2wPT98mDx6O/X7k3uh+5c7+XwePKA85P0kfUu9gz2gvUg9QH1qPWk9kb38fcr+Rn7iP1KAFADYAaDCR0MiA3XDW4NlwwcC6UJJwiBBjUF9gPNAqEBNwGdATICRwOrBAQGDwctCBAJWwmuCR8Kggp9CkwK1gmpCFYHNAZFBUwEcwPNAi0CFQJyAvgCTANeA3AD8ALfAT8AF/6l+7D4zPUC81vwQe6v7Ijr4uqQ62rtdO9F8ejyZvQL9uP3Wvn++Wf6Fvul+1b8Kv33/dH+HwC0AegCMgSYBakGSAcBCLAI6Aj9CNEI/AfMBhMGhAXGBC0EuQMjA54CvgIlA5QDaASOBc0GPAjXCfoKawunC6wLSQu3CtEJXwh+BrgETAMAAgcBZAAMAN3/i/8K/1P+df2I/JH7hPoy+cD3Wfac9NTyp/FA8SPx5PD68HrxdPIC9Ib1uvYk+Pv50fsZ/TD+cP+OAJ4BMAJCAu4BYQHGAOj/Wf8s/5n/hQBmAS8C6gLnA8QEYAUIBn4GogaNBlcGwQX5BJgE0wRjBTkGOwcGCNoIvwltCrYKfAoeClQJYwiYB8EGBgYzBW0EcwM1AuwAS/+L/bn7H/rP+KP32PY09sD1sPUp9vD2mvcG+Cb4R/hm+IX4SfgB+Fv48vj4+Tf7Yvx8/cD+SgA6AbYBDAIYAq0BDwGSAIf/Xv6f/fL8Tvzm+zD8s/xM/Xf+rP+NAHgBoAKcA3MEbgVmBlYHPgjJCLcIiQjMCCoJdwmOCS4JiAjyB4UHCQfEBsEGkgYUBkoFLgSTAskAEf9O/bT7avpT+Q74oPbG9Yv10fXI9ur3+/gB+i/7TPyY/Lz8D/05/Yb94/0w/mf+6v7N/ykALABEAGYAUAA+AG0AGQCv/1z/1f4V/mP9TP0W/er8B/3b/Gr8/PsB/C781fxD/vn/5AHUA6MF2ga6B6sIXQntCRsKpQl9CBYH2AW3BBME4gP5Aw0EHgQBBEMDPgL2AJ7/X/5s/cf8yvu++u75XPkj+U75tflO+j/7Yfws/VT9v/1g/g3/9v/GAG0B6AF5ApEC/QEyAXoA8f9n/0b/9v5+/lH+6/2V/V/9if3Z/fv9Qv4w/tb9hv0p/av8V/yA/PL8rf12/k//MQAqAYEC2gNGBYoGZQftB+MHYQebBvwFTgV9BM4D2QKkAVUASP9D/iT9ePz4+3/7W/t1+5v73/uI/E79wf0n/oL+qf7Q/tL+0P6i/uD+ov8nAAYB9AEhA3QEcAUtBvcFVQWnBKADgwIaAa3/VP4l/XX85/ul+5b7v/sc/E78n/zm/Pz8FP1B/an9M/78/tn/cQDfAEcBmQHeAWYCyALgArACgAJeAiwCYQKGArIC+AIBA8oCHAI/ARoAuf5l/SH8+foF+j75pfix+F75bvqh+/L8aP6j/9QArwEIAlICeAKqApMCdQKLAnACoQLIAvQCOwN1A8QDywO7A3YDJwPKAjMCiAG5AAEAG/9L/pn9w/xN/Ab88/se/JD8U/3y/an+h/9CACQBEALBAkUDqwPQA3cDzgLvAc8A5f9u//X+eP42/vH9nf2Q/eD9J/5a/pv+oP5F/vf9tf0P/Yv8ZfxX/E38Y/yh/Mn8Qv0a/tr+iv+QAHIB7wFmAsIC/wJAA6ADmgMiA5wC+QEwAYEAPADw/+b/SgCxADMBuAEwAmgCawKAAm4CLALfAVoBvABCAOr/wv+//+//OgCoADUBpgH+AUgCeQKLAnoCOwK4ARYBdwCk/7L+xv3d/A/8ePsp+/j68fog+2v7zvtv/Dj97f27/oD/CgBxAKkAlgBJANb/YP/v/nf+H/6p/VH9bP3F/Uz+0P5k//b/fAAFAV4BmQGgAZoBbAEQAdgAvACiAJMAjgCiAOYAUgHjAXcCFAPfA50EKQVsBVsFFAWrBEME2ANbA90CbALyAWkBDgHKAI4AbABVACsA1f+O/x3/eP7d/WD96Pxa/AX8ufuD+5z7+/t7/Nr8ef0U/oT+Cf98/+n/NACSANgAqwBdAPD/Vf+R/s39Kv2c/Eb8F/zp+7j70vtG/OL8oP1u/lD/OQAPAaQB1gHDAa4BnwF4AVIBNQFEAYoBBAKRAkcDKAQCBbwFMwaLBrMGrgZ0BtsFIQVRBJoD4wIzAqcBHAGxAC0Au/9g/wL/0P6b/n/+df6Q/qv+jP5z/kn+E/7j/bv9nv12/Wv9af1g/YP9t/3h/QP+G/46/mD+mP6x/o7+T/72/Yr9Hv23/E/8BfwB/Bn8R/y5/Fj9BP6r/lX/6f9OALMADgE7AVYBhwG6AdYB+wEmAj0CWQKXAucCNAOKA/IDQwSQBN0EDwUoBSYFGAXiBIoEFwR9A9gCPgK8ATwBwQBcABAA9P/y/wUAGAAqAD0AMwAPALn/Vv/c/kb+wv1G/df8VPzh+4z7Tvtc+4n7vPvq+xz8RfxL/G/8nPzK/An9Qf1w/Zv95f0o/k7+hv7G/gj/Vv+0//7/OwCdAAYBYQHHASsCWAJXAlACMwL5AdIBsgF1AVEBXAGSAeEBWwLjAkcDlgPRA+QDygOfA10D/AKZAkkCDALbAcIBtQG0AcMB3gHyAeMBwgGTAVkBFgHOAIAAHgCq/x//fP7I/RT9c/zl+3f7Ifvj+rT6nfq2+vf6Tfuq+wf8Xvy2/Bn9Y/2N/af9vv3V/fL9If5U/pH+7P5h/+b/cgAHAYsB7wFCAn8CmQKSAm0CMQL1AdABxAHKAdoB7AEBAhcCLQJDAmMCgwKYAp0CjwJrAj0CFALsAckBswGjAYoBbAFIASEBAwH4APoAAwEVASYBLgErARwB8ACmAEgA2v9X/8L+Iv6B/e38d/wp/AD8//sm/Gv8v/wR/Vb9gP2M/Yn9ff1n/Uj9Jf0B/fH8Cf1K/aL9CP54/ur+W//L/zUAkQDjACsBYQGJAbIB3gEFAiwCSgJbAmICZwJdAkkCPwJBAksCZAKEApoCqAK0AqUCbgIlAtEBawEKAcAAfQBEACwAKwArADYAYQCSAL8A8AAXARwBDQHwALYAYwAMALX/Vf/1/pn+Qf71/br9lv2H/ZD9sf3k/Rj+Qf5b/mH+U/43/hD+3f2f/WX9Mv0I/e/88PwN/Uz9rv0o/qv+L/+2/zYAqQAOAWABmgHBAdgB4QHfAdoB1AHSAdYB4gH3ARACLwJMAmYCewKHAoMCbwJJAhUC1QGSAU8BCwHRAJ4AdgBXAEYAOwA6AEMAWABxAI8AqQCyAKYAjQBpAD4ACQDI/3b/Gf+7/mX+Jv4C/vn9CP4p/lb+hf6p/sH+zv7R/sj+r/6F/kf++/2u/W39R/0//Vr9kf3d/T3+pP4K/2z/xf8UAFsAlQC9ANUA3QDhAOcA8AD9AAcBDwEVARsBJgE7AVkBhAG5AfUBMgJqApMCpwKkAooCXAIgAtYBigE+AfkAvwCTAHcAagBuAH4AlgCtALYArACLAFYAFQDL/37/Mv/o/qT+af43/hT+Av4E/hz+Q/50/qX+zv7p/vX+8v7n/tf+xv64/qn+m/6O/oH+dv5v/nH+ff6X/r7+8f4r/2n/pv/c/wgAKgA7AD0ANAAfAAEA5P/L/8D/xP/f/xAAVgCxABsBiQH4AVcCogLRAuAC0QKoAmgCHALLAXsBMwH4AM0ArwCdAJYAlACUAJAAhgB2AFsAOgAUAOr/v/+U/2f/OP8E/8/+m/5q/kD+Iv4U/hb+Kf5M/nn+r/7p/h//T/91/4r/i/97/1v/L/8B/9b+uP6r/rD+xv7r/hn/Sf93/5//vv/R/9v/3P/W/9H/0P/W/+f/AgAmAFEAgAC0AOkAHgFTAYgBuAHjAQICGAIgAhkCBALjAbcBfgFAAQABxACNAGIASQBDAEgAWgB0AIkAlgCWAIUAZwA8AAQAw/98/zX/7/6z/oP+YP5P/kz+WP5x/pP+uP7c/v3+GP8m/y3/Lv8t/yX/Gf8M//n+4v7M/rv+sv6x/r7+1v74/iD/UP+G/7///v88AHEAnAC6AMYAwwCzAKEAkACHAIkAmQC2AN0ADgFBAXIBngHAAdIB1gHLAbABiwFhATcBCgHmAMkAtgCrAKsAsQC3ALsAtQCmAI4AcgBTAC8ADADo/7//l/9s/0X/Hv/7/t/+xP6t/pn+if5//oD+i/6f/rr+2v72/gv/Gf8Z/xD//v7q/tP+wP63/rj+x/7o/hn/Vv+b/9//HwBTAHsAlgCmAK0ArQCnAKEAmgCXAJUAnACnALUAxADUAOEA7gD7AAgBGAEpATsBTQFZAV4BXQFTAUEBKQENAe0AzwCzAJsAigCBAH0AfAB7AHUAawBWADwAHAD0/8j/nf9y/0r/KP8M//X+5v7c/tb+0/7R/tD+zP7J/sT+vv63/rH+rf6s/rH+u/7N/uH++v4T/zD/TP9p/4j/pf/D/+L/AQAgAD0AWwBzAIcAlQCcAJ4AngCbAJgAmgChAK4AvgDXAPEADwEuAUgBXgFuAXQBbgFfAUcBKgEJAeoAzgC0AJ4AjwCBAHUAawBfAFMARQA4ACkAGwAOAAIA9//r/97/0P++/6j/kP9y/1T/M/8T//X+2/7J/r/+v/7H/tb+6P75/gr/F/8g/yX/KP8t/zH/Nv9A/0z/Xf9z/4//rf/J/+X/+/8NABcAIgAtADoASwBfAHMAiACeALMAxgDXAOYA8AD2APYA8gDtAOkA5QDiAN4A2QDRAMYAuACqAJsAjAB+AHIAZQBaAFEASABEAEAAPgA8ADoAMQAlABcABADu/9f/wP+p/5P/fv9p/1b/RP80/yX/F/8N/wP/+v71/vP+9/4A/w//JP87/1P/aP95/4X/jf+S/5P/lf+X/53/pf+z/8X/3f/7/xwAQABjAH8AlQClAK4AtAC2ALgAuAC7AMAAwwDIAM4A0wDVANIAyQC5AKUAjAB2AGUAWQBWAFkAZAByAIAAiwCQAI4AgQBuAFUANQAUAPT/2v/E/7X/rv+q/6r/qf+n/5//kv+E/3D/XP9K/zv/MP8q/yr/Mv8//03/Xv9u/3//if+Q/5P/kP+K/4X/gf9//4H/hv+S/6P/tf/K/+H/+v8SACcAPwBTAGIAcQCAAI0AlwCdAKIArACrAKoApwCeAJUAigB/AG8AZgBhAGIAZABiAGcAbgB2AIAAewBzAGkAXwBSADsAIgACAOr/3P/Q/8j/sv+m/7D/sv+6/8L/wv/V/+j/+P8hAEYAjADMAFgB5gKwAhEAEP+y/yf/6P0m/o3++f1K/mf+ZP2C/bP+Av/F/o3/kQA3AMj/MADl/27/DwBNAO3/JwHyAX4AIgCgACYAdgDbAL7/tP+kAHn/rv52/5//DAD5AFEBqwB8AEUA2v8WADMAmAHzAT4B0wE/AYIAwQDaADgA+f+AAJr/jv/b/3j+mf7K/pz+1/9mAFAAbwCRAAoAz/94AGsAaAC7AMQAigBuAC4A4f9IAFsAjwDOAJMAlAAhAMP/bv88/4T/dP9q/9b+p/7F/vD9yf3y/Zr9jP3h/aH9D/1D/QT9kfz//Gz9jv3Y/Xv+Bv+v/4sATAEZAuwC7gOQBLsEGgWGBbIF2AURBsoFPwWnBJ0DrwLEAXoAsP9L/5L+7P1c/Zz8sfsD+w37UPu6+6H8Y/27/Sj++f62/y4AKwFCAuECrgNYBH4EjASQBF0E/gOXAw4DjALRAbkAjP+W/sj9oPzZ+xD8i/vg+pT72fsk+z/75vv7+2H8LPyK+1f70PpV+k/6n/qc+hX7H/wK/VX+2f+fAW4DFwVtBjQHugc8CIkIVQgnCMwHyQb5BT8FswPjAfAALwD4/kD+rf2S/Ln7fftF+z77zft9/G79vf6J/2QAnQE5AvMCEwTZBAYFWwXKBcsFogVTBeMEmANiAvUBrwAs/9j+Zv6J/Un9C/1M/Nn76fvK+9/7JvyP/I39S/6e/iP/U/9S/8//CwBCALYAjwAVAHz/UP4O/XT8m/ua+u75wvj295v3M/dC92z3E/jS+W/8Sv+iAX8DTQUxB7oIlwlbCiELuwsvDCUMNguyCSIIUgZ3BL4CSgEUAL3+tv2w/FD7Hvpu+T/5VPn++fT6//tC/VD+cv9kANsAtQG5AjIDuwN+BOwEVwXYBd0FYwWxBO0DFgPpAbYA7/9t/8n+GP5Z/Xz8BPy/+6b72vv3+wr8c/zj/Kr8WvwB/Gf7K/uc+qz5S/mf+J33bfdy9wH37fYN+AT6kvxZ/7EBBwRQBhgIbAmKCqcLjgxNDd4N1g3XDEILoAlYB/gEPwNRAa7/y/6H/d/70/rO+bD4jvhy+K34LvrU+6L9h//sAPsB9wKvAyEEXgSCBB8FkAXIBSsGywWtBJYDZQLXAIv/f/6l/X39SP3D/Jn89fvc+p/6xPq/+lT7XPwx/Sj+Af+f/yoAGgDn/7//C/9H/rj95PxK/Nf7m/pb+XP4BPf99Q/2/fUb9mH3s/g3+ir91wA1BAAHUglfC+0MpQ0WDjsOog0jDa0MkAv3CSMI/gWsA6gBKADp/s79Kf36/OD8f/wR/Or77fsr/M/8wP2d/lL/HgDfAEwBPQHdAG8Atv+V/u/99P0e/rb+e//0/0EAlwC7AKEAwAAHAZABcAIMAxkD7QK+Aj8CaAGNAKP/kv6j/fL8OvwL+6H5jPh19yr2+PQn9JfzTPPH87P0yfUG92X4Jvrn+//9gwBkA4MGcwk+DGEO4g9zEGAQ/A+hDqwM1QoACRoHXgXBAx8ClgCJ/6H+8v1g/f78LP1k/ej96f57/7f/MACNAHcAZgAmAE3/kv7b/Sr9gvyt+yX71/q8+kD7OPw4/VT+h/+mAMoB/gLMA24ErgSEBFEE0AMJAxQCJgEfAD//Yv6V/f/8WPzX+y/7iPo/+hj62/nB+b35Tfns+BH5+fjU+Cj5sPln+v76dfvu+4X8I/0k/qb/YgH9A+cGwQkcDIUNtA7aDh0OTQ0tDNkKLgnkB9IGTAXTAzYChgAX/0r+vv0Z/dT80/wR/YT9K/6r/qX+k/6k/t7+4v5j/uT9Tv2R/OL7Qvuo+gT68fnE+vD7Wv3n/l0A1wFWA4EELAWaBY0FMwXSBDAEIwPzAUgBqwAQAKL/uP6t/av8lPvA+rj5vPgg+MH3yPfo9wP4Lfgx+ED4ePjG+FD5w/mV+ur76/xC/uv/LAIJBbcHmwrFDIMOrA/3D/gPog72DPQK2ghVB3IFuQMnAqcAp//n/iv+Y/2z/Ev8Mfx2/Ob8g/1X/h//7v88AAQAhP+S/mD9CvzN+p35yviZ+NP4zfkx+9383/6VAPEBGQMJBKgEFwVfBVIF6ARwBAsEnAMVA1QCeAF0AHP/wf7+/Sn9bfyg++b6Y/rX+TL5svhl+Cz4MfiH+L34EvnE+T76rfpQ+wT8+vwW/j3/hQBCAqQEQgceCl4MgQ0bDusNbg2IDN4K3wiSBu0E0QPJAtcBjQCP/y7/ev/3/+L/h/8o///+4v6h/hP+V/0N/Rj9W/1d/an8uvvR+hT6Xvl8+M33wve8+JD64fyU/yoCVQQtBjEHcAdMB+MGoQYHBicFWgSHA+8CWwKzAb0AWf8X/hX9/Pvn+v75VPkM+fr4Tfm5+dj5G/pn+nz6aPov+vb5wvmW+QL60PqP+1X8Ef0c/rL/9QHIBPAH7AolDcEOpQ/SD9MOCw0UC7oImQbeBFgDygFNAFH/1/5x/gz+B/7//Qf+D/4B/gz+0v3J/ez98/2q/S/93Pw4/Er7Q/pY+Zb4Jfhs+Fj5xPrK/E3/wwHrA5gF2wbYB20IhAg6CJMHowbABZ4EXgMCApYAp/+k/nf9avxj+4v6+Plz+cH4LPj992v4//iQ+Tn6vPp3+wT8Tfwz/Jj7N/sC+zX7Xvum+6D8NP6jAFEDQAbRCMsKfgxKDZMN9wzNC5cKLgnnB2YG/ASWAzkCIwFNAND/Pv8Y/xr/7/7l/pX+R/7r/Xr9Dv2b/Eb8y/tt+zD7uvp0+lv6GfoK+nz6O/tZ/P/9+P8bAusDLwU6BvUGUgctB6kG2gXbBCQEYANqAlUBTQCV/+/+Pv5i/Zv8+vuU+3z7KPvm+rr6mfqx+pH6dvpF+s75VPnY+Fj4BvgY+HD4BvmR+Wj6pvtO/cz/gALaBTgJ/QtpDqMPCBBHD/ANUQwPCsoHqQX7AzwCzQC+/+P+iv53/gH/Sv9g/6D/2P8VAPr/t/9l/+b+SP7C/fr8u/tY+jX5Yvis94X3FfgW+dX6N/3I/08CWAQABjcH4gc6CC8IugfmBu8FAQX4A68CPAHj/9D+7P0p/W380vtk+zj7Yfs0+/P68foE+zz7X/s6+//6xfpO+t/5VPm8+Gr4f/jN+Pr4iflF+sv7Ev58AG0DIAb7CL8LvQ0KDxIPeQ5oDf8LPgoJCMYFhQPYAXUAX/+R/gj+VP65/jz/xv8kAJMA0ADZAKEAGgB9/xn/cP5b/Rn8v/qu+bH4/PfJ9wX4IvkO+5f9GQAQAvoDqAUJB+0H8gd0B4sGnQXjBAAE2wKOAYoAyP8p/53+9f2D/Rv9w/yK/Er8BPx++wv7t/qB+lj6B/qx+Rr5ffj296H3dPdu9wD4vfjA+ef6MPwC/gQAtwJdBcoHLQrbC0kNvw2VDRMN6AuiChwJcweMBc0DQAIAARwAX/9f/5H/EwDNADcBfAF6AX8BWwHzAGYAoP+g/mD9IPy2+kL58/cY98v21vbO9335qPs4/rgANQNKBQYHcwgiCQ8JVQhsB04G2QQwA18Bzv9+/oP9uPzp+3P7Q/ti+6P7yfsC/Db8jfzU/Pb85fyH/Af8JPsU+uf4u/fu9n72k/br9rb3+fiu+tz8N//YAV4E9AZcCSELWAyuDLAMagzTCw4L2QlaCNQGcwUKBMACiwHKAIoAkwAOAVYBpwHhAeYB3wFkAdUAHwBc/3j+Q/36+5v6g/l3+L33XPdV9yX4bfk3+wP9tv6jAI0ChAQOBgIHaAdNBxgHpAavBTcEdQLXAH7/XP5A/Uv8uPuQ++T7WPyn/Nz8Jf2G/az9eP3x/FH8uvvp+uv52PjH9xb3zfa79rf2+/bq9435wfsp/tYAxAOlBjUJBgvzCzEMIAzyC3ALfApLCSUIQAdlBlQFQwRjA9YCpgKBAksCBgLTAcgBpQFWAcwARwDN/xf/J/7d/Jn7c/p2+Z74yPdR92f3TPiq+TX7yvxc/gQAeAHMAsgDggQVBXIFwgWtBUwFiwSEA2wCLwEMAOn+5f0Y/X38JPzh+8/73/sO/GD8qPz8/Db9Mf3V/A/8Jfsh+jf5efjc9333UPeK9xP48/hZ+kD8oP4yAdoDQwY6CLwJwwqEC/ELGAzcC1YLtQroCekIqwd/BmwFdASYA88CMwKoAUYB1QBZAPv/pf9p/xT/r/4Q/iD9LvxA+2T6c/mT+P735vd6+HT5n/rP+xH9a/6+//8A/QHLAmQD0AMpBEkEWgQ8BPIDiwP1AlkCjQGZAIr/gP6r/f38e/wE/KD7Zfta+3b7f/tw+zr75Pp9+vX5ZPnk+Jr4hviK+Kn4+Pio+fH6vPzS/gIBOANBBfsGZQh3CUoK+AqKC9ELuQthC84K6wnNCJkHdQZvBYEEsgPJAtUBAgFWANT/Tv/j/qn+gf5J/r393vzT+8r68vkd+Un4qveY9zX4NPl1+rn7Af1T/pj/6QAJAu8CmAMYBHgElASHBDEEtgMaA2MCvQH0ACEATv+V/vj9bP0W/dz8rPx0/En8NPwQ/Nn7nfss+4j6rvnp+Gf4/ffu99P39Pdh+F35Nfsg/WX/vwEOBBwGrwf7CNIJhQr+CmsLpguqC6ULQAuIClIJ+geuBnUFXQQsAwcC+QA6AMH/Yf8W/8X+mv5g/g3+dP2I/IP7dPqc+dT4M/jc9/X3ovim+fD6JfxK/V/+Yv9lAD4BFALQAn0DCQRbBIIEZAQkBLgDLgOPAtcBHAFVAJL/1/4q/qH9JP2q/CX8pPs8+9L6Z/rm+WL54Phl+BP46PcK+GH41fhZ+fr5Ffur/Lv+8QAWAxIFswYZCAoJognuCRgKSgpoCogKdAouCqQJ4ggVCEAHgAazBc8EvgOOAnMBdQCV/8H+AP5u/ez8bPy8+9f60vnn+EX42Pek96/3OfhN+bf6S/yy/eL+4P/RAMEBfQISA3UDxgMBBBoEHgTpA4wDFAOoAjoCogH+AEkAn//v/lX+1f1f/fj8j/wp/J77C/t9+uf5JflC+IX3HPcp93f38/d3+DP5ZPr2+9D9r/++AegD+gWpB64IQwmaCf0JUwp2CnUKXwppCksK4wkQCRAINQd2BrgFqQRmAyICEQEuADb/LP4t/YH8CPyH+8z66fk1+cD4h/hQ+Db4gPhY+bj6LPx9/Y7+f/9bAAkBmQENAoMC+wJjA6sDxwPnAwkEEATZA3ID9gJjArgB8gAZACz/VP6f/fn8SPyT+/j6Z/rv+Y35P/nc+GD49/e598P3Gfiw+Er53fm6+h788v3i/84BuQOMBTEHfAhLCasJ6wlNCqYK0ArBCpoKUQrJCQgJEwgSByUGWQVwBDEDywGLAI//qv7E/fP8SfzN+2r78fpD+nX52fiT+IH4i/jS+Iz5wvo6/Lj9Bv8OAN4AkQFCAuwCbgPDA/wDGAQfBCAEIgT3A4oDEAOWAhQCawGZAKf/qP7c/Uf9yPwp/JH7H/u++mT65/lR+av4Jfji98z30Pf/92z4+Pi0+an6AfzK/dz/OQJ0BFYG1gf8COYJawqqCrkKswquCqUKewrgCfII3QfnBhgGNgVYBEcDLQItAUoAff+U/sD9Ff2h/Dv8s/sJ+z76kfkH+a/4hfiK+Pv44fk3+7L8Kf59/3wANQHKAXoCHgOXA+4DGQQjBB0EJAQJBKQDKAO/AlUCuQHyAB8AQP9e/o/9zvwU/HT7DvvE+mb69/mV+Uj57fiL+EH4L/hg+MP4MfmM+Qf6CPu0/L/+1wDbAskEmAY2CGEJ6wkBChQKVwqbCpwKTgrHCSEJcgivB78GrQWjBLMDuQKbAXYAbv+a/uf9Tv3P/Gf8A/yC+9j6Evpm+QD53vjg+Ab5j/mm+jf86f1m/3kAJAGvAT8C0wI5A28DkgOjA8ED1QPZA7ADTwPoAnoC+QFRAYgAn/+d/qn93vxC/Lj7P/vl+pz6UvoL+sX5Yfna+Fn4Cfjz9yX4nvg0+bj5Tvpn+yf9Xf+5AeYDuQUyB5UI2AmNCpYKTAoZCh4KIgrhCTMJJggoB3oG5QUdBSEEGwMwAk0BbwCP/6D+y/0w/cj8dfwG/HP7xPoR+n/5Gfna+M/4EPnL+Qz7pfxg/vD/GwHVAUICpgIUA3QDtwPJA7kDpwOrA7MDhwMVA4IC4gE3AXoAl/+X/pP9v/ws/MH7YPsM+8H6ePow+uv5o/k6+bf4PPjr9/L3W/j7+Jj5L/oH+3T8eP7NAAcD5gR0BuIHKgkVCmIKLgrZCbEJvwm/CXQJ0Qj9BzMHgQbGBd8EzAOmAn0BZQBm/4D+qf3t/GT8DvzX+5b7Mvui+hT6vPmd+av51Pk9+hL7Vvzr/Xz/uQCDAQUCZwLMAjMDdAOZA4IDZQNkA0wDKwO9AikClQH8AIkA6P8o/1D+if31/GP84vs7+7H6Ovri+af5Q/nu+Hn4FfjU98L3Gviu+Gr5JfoL+1j8QP6NANQCygRBBqQH1QjLCUIKBAqYCSoJKAk3CQgJnAjfB1AHzAZSBosFbgRVAyMCJAEXABL/Fv4r/ZP8CPyw+1z7B/uw+j766fmt+bT51vkL+of6cvvm/LX+fQC0AXgCBQOVAyUEWQRCBLIDNQMHA88CkwIBAm0BAAG7AI8AFwB6/8T+E/5t/a/86PsP+2j63flU+ev4ifg8+P73vfdx92H32fem+JD5gfqK+4v8X/4SAVsDqAWwBy4JZwoaC0YLTwocCRII3QZnBkgGHAYNBlgGrwbcBjIH3wYJBiYFIATYAl4BNwCh/vX8vftJ+tj4sffX9gr2rvUL9rb2vfc8+Qz74fw2/7wB9QPLBcQGPwc7B8gGAga2BPwCMAHb/yX/4f7k/lb/9f+tAJ4BNgJAAqwBswB5//j9Sfx3+q74A/e09Zf0wPMp87Xyi/KF8vDyFfQQ9s74HPze/+wDLQhQDIkP9RDjEJkPaA0ZC4kI/AWXA7sBIQFZAQUCJQNyBOcFvwePCbwK0wq3CQ8IyAXaAr//Xvwg+Xr2gPRI84XyV/Lu8iD0vPWV97T59/tc/v8ArQP7BQkItwlhCsQJFwjwBWoD9gAo/8j90fze/Ov9i/9pAQcDHwSGBEUEoANfAosAl/7d/G774flH+LP28PQ98xfyZ/Ew8abxsPIk9J/1efej+db7Q/4GAQ8ETwcmCwgPABIWE2gSexAxDeYJjwZVA/0AeP+y/xgBFQNzBYAHQAmECmcLXwvwCWEHUATzAFP9OfpW99T0+/LU8YnxnfE48nvzO/Vy9xj62vyM/x4CiATCBk0IUwmgCdIIfgfbBRUETQLxAGMAIQBOAOMAsQGtAo4DVAS+BFAENAPUAer/g/0w+/j4z/bh9FXzFPIO8XbwifAc8e3xB/Nd9Oj1ivec+UP8Hf9qAvwFgQkSDW0QehLgEukRnQ/dDM0J9wb0BHIDrgLYAv4DLwVXBs4HyQgtCWQJPgm2B/UEtAH3/RT6m/Yf9Ijyj/GK8X3yvfPo9CT2mPc/+Q/7Tv3N/0kCtQTvBskIoAl5CWoIswYtBQ4EPQPNAuQCFgNLA2gDIgP2Aq0CbwJWAtIB9QDr/77+4vyq+o34O/YV9E3yAfEa8M7vGvAJ8XPyvfNh9TH36/jH+jz9FgAWA4wGEQqjDdgQxRLvEn8R1Q6zC7gIDAZiBIADWwMwBKoFTAd+CG0J3gmoCeAIjgdnBf8BKP4/+rz2xvPH8U7xZ/Eu8orzGPWq9gT4gfkh+/384/7zAPUCdwSvBY8G7QaaBvwFRwXhBP4EXAUQBq4GBQf1BikGHwU8BDgDUQKZAZ0ARP++/d77rvlu9yv1f/NA8mTxBvH98EbxivFt8pTzpPT79VP30fh4+uH82P8HA1oGjQltDeUQ6BJWE/8RpA+UDOIJ4geIBhAG2wVTBhIHige7B4gHKAd7BsgFxgQBA2sAHf3C+Xn2sfME8nTx7PH38n70KPZp93r4k/n4+nn8Af7Z/6cBSAPgBGUGjwcbCCMINwh+CHEIOQjbB1AHqAbhBcsEawOIAtIBgwEhAcX/Cv7w+8n5u/fl9XH0c/MG88fyhfIS8nHx5PDX8IPxtPJW9KD2E/mf+7T+9gH3BLcHygobDjgRMxNSEzsSEhDsDAoK2QdcBtYF0QUIBlsGigasBrEGaAb+BWoFiQQDA2UAAP1D+af19vKd8XbxQPKt8z31rPbN93n4QvmC+jX8Vf6OAKwCjwQ7BpUHrQggCcoItwjeCOUIogjnB9IGuQW+BHwDmgKtAbAAewBUAHf/FP6W/Lj6BPmY9/L1PfR38qXw9e7e7Urtae2n7uzwuvN49gj5Ift+/UEAJQNmBl4K8A6JEqsUWBTlEecOgwvMCOgG6AUkBvMGoQfTB6cHPgcoB28H9AdTCH4HhQVzAjT+Y/m79Hzx/u/371TxKfP69G32U/cx+Ob4Bfru+0P+nQBxAssDrASMBWAGKAfGBzEIMgkfCmgK/gm0COwGTwXmA58C4QFEAd4AogDE/3b+5fxC+6j5S/j49l31w/Pn8f7vAO5M7IXrsetx7WvwKfTT9676xP2gADADqQXMCDQNPxFHFI4UnBJPD2YLpQhpBjwFgQUOB8MIswnSCUMJuwiTCD0J8QmeCd4HfATk/0/6jfQi8L3tf+0Z783xwPQG92X4Qfmw+Wn65/v4/RQApwGAArkCPQMSBPIE7gX7Bj0IHQqxC9kL6QoBCe4GRAXZA6YC3gEmAWYAxf+s/iz9uvtj+jD57fd+9sj03fLs8Lnuruw+69PqxOsp7oLxCPVj+Lf7d//CAnAFdghnDLoQJBTeFMUSfA9+C3sIwwZ8BVEFYAYJCHcJMgo2Ch0KSQrACmsLRgtBCacFyAAF+2P1xPBa7jXud++q8RX0CvZP9yX4ufhk+a/62vwx/8gAkQGHAXgBYQICBKIFEAdHCLsJfQsyDLgLMgobCCAGXwTYAncBmAD0/3n/7P49/nP9nPy/+x36Jvj89dPzpvEX77jsxuqQ6dnp7evu7pLyNfYW+r3+6wLWBlUKgQ0KEUIU9xSmEqEOawoACBQGxQQCBfUFdwfZCBgK3gpQCzIMdw1jDqINLgtJB7MBQvsf9XTwou3c7FDuu/Al8y/1Effb+M/5jvrP+3L9Ev/f/wwAo/9H/xMANQLeBLIGHghICaAKwQu6C/AKSQmdB/EFSARYAn8Ajv/4/gT/3P5Y/s/9y/xk+0v53vab9KbyrfAg7mTrQem/6JXp++wL8b301vnl/XECKwZJCP4Kgg6pEu8UNhQ8EMQLDQhMBV4EDQRYBaoHpAk8C50LMAuNC70M3g0zDqQMTwlRBNv9IvdC8XXtcOz+7fPw0/Pt9ZP32PiN+QH6hPrC+zr9aP7H/j3++v2H/qwAzAOiBrMI+gk6C1UMkwy1C+MJ6AfPBeED5wEQAAz/1v5X/5P/af/G/iT+Nf0x+/f4Ufbk87rx/e4L7CDpfOja6L7qTO+681H4p/xAAWIFVgj7CuMNqxGhFAQUPBDIC9cHVwUYBBMELAU3B2gJfAudDEYMowzKDQoPQg9PDaUJVATN/Un33vES7qrsIO5H8ZH07PaA+Kf5//m/+a/5ePpu+0P8wvzH/PX8yP0HAFEDZQaNCBEK4AuHDbsNDgw/Ce8GQwWHA3QBkv+5/pj++v5T/17/Y/9k/07/Pf6y+1r4I/XR8TjudOp05yPmEudT6vjtlPIb+BT9JALbBiIKvgx0EMUU9hX+EqMNWQjuBGECYwFRAkEEoAZ9CTMMcA3zDVMPrBF0E8ISoA+JChoD0vo288/t0+qo6uHtEfKk9cv3efmQ+yD8+/sM/FX8h/zl+zT7MvrN+dL63f0mAowF6geMCa8Lxg0rDuUM8wobCUwHJgVQAn3/xP09/Yr99P0q/pL+hP/2/4D+0/vO+FD2lfMc8N7rHehd5rLmRunj7MTxIPfC/BACKQa0CaYNDRMXF/8W6RLDDPcG+wLDABQAuQFCBBoHMAr/C90M3A0NEHQT4BVDFT8SEwzQAiX5lvDf6qno2emq7drxPPWd99z5KPx4/Sf+yf7q/gD+BPx++c33eff0+I781wCGBGMH3QltDGwOnA5xDQgMKwoBCD0F3AEx/6z9/vwj/Yf92/2u/q7/qP/1/Vj7yfht9nvzsu9b67DnS+Xf5S/pbu1M87X4aP4BBM8G0ghCDLoRIRYCFtoRHQwzB9kCRAAuAL0B3gSGCOIL3A0wDusOnxFFFIEVKBRvEKkKnQH6993vQupe6G3qI+9Y8y32rfiO+9L9Uv5S/nb+Ff54/Fb6IPhH9vT1D/ja/L4BuQVTCY8M0w7aDmsN/wsMC+8J4gfyBCkBxf11+8z6q/sH/Uf/0wEgA/kBv/7P+kj3/vMc8b3tMOpR5irkNuYi6cHsf/IP+iQClQjLC9YN4BAjFLwU0BF+DHIHQwRkAaX/Gv9aAKoD9AcADIkP/BHQFAkYdhi6F1IUww2pBQP8FPMj7HnoUOgM6/XugfK79WT5lf2GAMYBUQE5AL3+Y/v696f1fvRL9QT5+/3rAYQFSgnjDHgOFg5lDQQN3QxzC80HUAM5//P7X/o4+kP7S/0DAIQCkwIXAKn8Zvl29qDzC/Bl7OnnzeRL5NzkDemQ7lX1Q/1LBMwJgQ5eE3oWUReLFAEPaAnbBIIBBf/z/Rv/2AFOBd4ImAygENkUaBlAHPgb8hjoElAK3/9H9VvtHOld6NfpSOzr7p7xF/W5+dj+agIxBDoE8wH7/ST5afXJ8zT0nvZv+oT+OAI6BsQJNQx+DdYNcA6zDl0NPApKBg0CIv42+4b5qvl6+1T+wgA2AW7/rvyv+V/3+/S58entyOmI5ffjQ+Wg56TsEPMo+6UCTwhqDRkTvhfuF/wTtg4LCu4EMAF4/pP98/5HATEFFwk0DH4Q2BUNG8wd1hxqGvgUtwvLAEP2Ku7N6f7oRuo47O7tC/De85T4gf37Af8E8gWNA5/+6fjQ9N7yjPNV9if5S/yK/8wDKQjoCqcMSg5SEE0RDhBpDKQHugLB/V76Wvge+JT6If5GAdYBuv/6/CT6+/f09fvyq+9a64nmRORf5BXmhOr671f3DABbBw4OoRTlGPgYhhV5DywKxQWeAbL+1vwt/VL/3QLhBnUL0xAlFz8dbSAAILAbFBZYDckBVveZ7vLpvOhs6R7rjexY7rPxpvbN/FcCHgZrB/sE0/+4+fz0zvLp8v302/f8+mn+8QEZBbQHUAo+DYQQrBItElAP/QquBQIAL/sA+MD3Dvps/T8ATAAd/j/7wfj59n31B/Rl8UTtKegR5U3lHeb+6d7v/vZKACsIGhDOFm4ZlBeWEngNUwm7BRQCU//1/Xn9Wf4TAYEF7QufE+cbQSITI+Uf1RsUFawKbf/J9SHvzurz6B7pzukq68LtJfIG+Ez+vANuB4kHbgMW/gz5lvV19IT0jvUS9/X4rfsP/3kC4gUECo8OrxFiEmwRNA+9CwkHwwEO/b/5yPjo+Ub8Tf5h/tL8ufpB+JH1jPPo8YHveuyV6e3mkOZh6L/rPfFn+C8BOgqwEvQXmhjJFbgQmgs3B/4DsAG9/9/+xv6a//0B0gYhDiIXnB8mI0UiYh/fGvISEgi6/Yb1oe+O6/Xon+f05yLq7u2u8+D6ogHEBpoIrgY+ApH86/ec9db0vvTi9Lj1dffO+Yv9+gHiBqYLOg99ETsSdRHkDoYLNAf+AZz9iPo3+ZL5APuK/Pv89/vF+UX3xPSS8iPw8+266z3p3Ofo57/qUu6T8+j7igRaDcAUFxifFgoSUQ1YCc4F7gKvANb/Qv/g/nIAHwRjCswSbxwcI1IkSiH3HIkX+g1+A6767/MT7wfrDujK5u3nOetA8AL3tf4/Ba0ISgggBXMAQ/tY9zz1KfSH86jzr/R+9k/56vxbAkEIVg0uEd4SPRPQEe0O6wpcBYb/vPsO+pL5Jfro+qj7xft3+kz4+PXR88jxu++H7THrb+ma6FXp7+tP8Iz3QwBwCVYRcRVMFuoT+g/EC8YHWAS4AVUARv+T/u7+pwEcB+EOzxeZIGYk3iGJHu4aNBQdCpcA3Ph18pDtSumQ5v3lv+ce7Gvy9PnGAVcHIQnIBzQE6P+s+xf4nfWB8+nxdvFZ8rX0ivj1/YIEYQpiDtgQZhKZEnsR+g58CvEEjv+7+5L5dPiC+FT5Pvpd+hL5Kfff9E3yFvCa7unt8+wX7LvrQ+z27kr0CfwFBaANAROlFD4TcRA8DSYJvwUMA4IBgQBY/xP/wgD/BCYMtBRYHZUj/SKgH14c7hbNDmkFCv1n9pTwGuvv5jXlTOai6SHvT/ZB/qMEyQczCA8GxAKD/2f8TflQ9XzxZO/27m/wvvP2+KD/NAaJCwcPXxHhEqETrhIDDy0J2QLG/S36F/iF98v3W/jo+K/4OPf59APzZfF/8JbvyO527uvtdO0p7jnynviqAb4KPhAIEg8R8Q9FDpoLcQiiBbYD1QGF/xj+pf5uAuwInBE7GmUgyyL/ILUd1BjiEjELTwMY/FD0Ke1H53HkquQ05+br6/E4+VMAygR+BikGEQVlAyAB7P0X+eDz9e/s7Tbuq/AP9Wn76AEpB/0KPw6AER4UDBWREnINzQc4Amr9+vkB+BT3fvYx9uX1UPVy9CLzy/EA8YjwyPAZ8ZnwsvBr8ST0K/n4/5MHhwx/DjAOxw0wDTwLIwnSBs0EswKFAIj/NABtAw0JYxChF5gdEiFPIPMcQxnCFNkNPwbc/sv29O5v6KHkOeRa5lfqu+8U9lj8LAFbBPIFfAbKBUMEQgHE+wX2FvH97Qft3e1u8fv2Kv3wAg8IqwylEOQTrhVSFA4QvgpyBd0Az/y7+UP3QPX28z/zRvND8xjzhPLX8XbxTfHb8VXyqvKy81b2dPvfAUcHDQojCwUM8AzoDEALKQlDBx4FlQKSAKv/2QBCBMoJABAEFhUbsx4YIHcdGxp5Fc4PtwndAar5S/FK6h/mlOQJ5XfnA+xO8s34Lf7zAaYEtwa7B1EHYATl/sr4tfPT71jtzOzC7ifzj/jp/TsDuQiHDsYTkBYsFpMSpA0jCaAELwAn/Jf4sfVc82jxX/Ar8A/xXfK+8qDydfIM86jzsfOg9FD3WfxxAhUHHQkkCSgJOgpzC1YL6AkdCCYG6wPdAYIABgEPBBkJFg9YFLEYBxzDHbIc5RmAFnIRsgsMBT/9/fSV7dDoj+YD5grnxulg7jH0svkK/lcBAgRRBooHSAY4AqT8h/eo8wvxWO9C73bxK/XM+b3+OQRkCkQQUBTYFNISnw8CDIgIjARcAD/8Nvid9Ljxse/77mrvIfCm8PDwZfFg8knzO/QS9kD5W/4sBFoIpgk1CRMJEgrfClAKIwmeB4wFDAMGAToAFwEABJYIDw46E5QXDBsVHbYcthkiFk4S3Aw8BrX+FveP8JvrWuh35ifm9OcL7G/xqfb3+sr+oQLRBYMH1AbUA5L/Kfsf98XzUvE/8EXxd/Ol9tz6NQC6Bv0MohE5Ez8SKxDEDSoL4gcEBJj/DPuM9pPy5e+N7tTugO/I77/v5u+J8THzVPSo9Tn4Wf15AwQIXAkFCS4JhwrECy0LpAkNCJ4G4wSrAusAkgBlAkgGEQvDDxQUNRggGygcXBrlFs8TIw9xCaUC6fpD9PXuRetj6E/mbeZJ6evtpPKS9kP6eP7RAjwGNwdlBUQCyf5z+1n4QfVE8/zyx/OH9Tb4OPwBAlgISw27D+YPMw+HDl8NHQuaBxcDXv7s+c/1R/LU78Hudu5M7rjtBu6e7zHxNvNC9bX42/3KAmcGHQhICVgK0wv0DH4MEwvcCJUGqAQmAx8CvwHbAkQFqwh+DHwQyhRCGPIZWBkxFxsULxAzCxQFwv7r+N/zO+/26q7nf+bt50nrNu/L8or2y/o2/+gCpAR9BDMDhwF0/4L8MfmI9oH1mfWA9lL4UfvM/94EBAkYC8QLIAyrDOAMpQvnCPQEjgBI/Gn4JPWC8sDwRu9Z7gvuve7077DwjPKd9c75nv49AusEpAZZCHAKIgw2DXAMvAq7CO4GvAW1BEcELATQBP4F4wehCuINphHZFOIWFxeNFQ8TsA+CC2gGNgEh/AH3AfIk7c7pZOgO6XzrOO5S8bv0tfjl/FUAawL4As4CrwHV/2T9qfrJ+Kb3RPe69yP58PvE/5wDnwazCPEJ1gqTC4ILOAqkB2UE/ABk/ev56fZh9EXywfC670fvy+/z8EzyHfQk9xz70P5HAcsCaQSuBi0JBguBC4gKIAkNCGwHaAdgBzAHUgeXBz4INAm+Ck4NRhClEm0TwRJWEY4PQA0JCjQGLALf/W758fS+8KTtB+xC7JHtIu8e8cbz8PYU+vb8FP9AAJ4AIgC+/vX8T/tN+tj5h/nw+T/7cf1SAEgD0gWVB9oI9QmrCnQK9QiMBqkDjgBz/S764/YH9MDxS/Cj783v1fAh8gX0zvaG+jT+oABIAnwDMgVRB78IWwl/CDsHXQbyBboGmAdRCKQItwhOCe4JFwumDJYOXRBFEUcRFRCXDvwMTAsICbYF5QF5/Uz5ffU18uDvee527jTvcPDk8d/zpPa9+cD8bf71/qX+Bf6R/bn8ovth+rL58Pm/+gb8tP3x/1gCoQRiBsEH+AiZCY0JPggRBp4DygDK/Y/6jPcD9ejyOfH/73zvCPBA8of1A/n5+yL+EwDmAR0EWgbtB7gIfAi7B3QGowUfBmEH4wiqCdAJlgmiCX8KlQvxDDwORQ+DD64Ong1iDEMLEwosCFoF2gFY/qv6+fa884vxofCe8DPxg/HZ8RrzkfW8+GX78fxW/SX9+/zl/Gf8WPt2+jf6tfqj+7P85P1X/w8B/wLnBJYG7AeJCGMIYgfXBbYDDAE7/nr7EPmk9jb00/H/76nvNPEH9Nf2LPku+1H9tf9yAv8ErgaDB40HDwdABgUGogbSB2MJvwpiC0ELVgu3C14MRQ0sDo0O8g0qDUEMQwtWCjwJtAePBWYD9wDv/cH62feO9e7zUPND8w/zDvN984j0GfbX93H5b/oG+1X7P/v2+nf66/m/+Vj6h/up/JT9Wv45/5MAQwLZA/UE1QWHBpcG3gVnBHUCRwBL/ln8DPpP95L0w/If8pjys/NU9V/3vfl0/Or+OgFKA0YFxQZUB5YHdgd5B/QH9QgsCiML+Qs8DB0MEgxfDOgMUQ1rDe4MGwyJCxELPwr+CG0HuQXaA8cBT/9q/MX5lvff9b30TPRO9EX0VvST9C71U/a899r4UPm2+TH6nvq9+m36EPoQ+vj6DPzN/D39w/3D/sz/6wDRAbcCuQNNBDwEZQNNAj0Brf+4/WH7VPnP93P2hvX19F31t/bf+Cf7C/3z/hkBSwMxBbUGqQdJCN8IZQl0CXsJAwrYCp0LFwwnDOML3As4DGsMYgxYDEIM1QsbCx4KvghSB9sFKwTrAY7/WP39+sT41vZ39aT0ePSt9Lv0vvT49HP1MfYN99/3kfhW+fv5IvoJ+gX6IPpV+qL6xPrR+kr7M/wI/cf9gf5T/1wAfAE+AisCqgHvAP3/3P6I/Rf8jPpF+Yb4ffgv+UP6o/s2/Qn/EQEeAwIFyAaYCDkKdgsFDPcLowtOCzoLMQsKC94KpQpWCukJjglTCVoJrwngCaoJ/ggoCDUH5gVzBPICVgHc/2D+gPxs+pz4OPcz9or1JfXU9J/0nfS29PD0ovW39s/3svhp+c/5FfpZ+kX6z/kp+bj4hPie+PX4Wvnu+bz64Pvp/MH9kP5K//D/OwAsAL7/TP8m/xT//P68/sn+Wf9PAIkBjwKSA8AELgaVB68I8Ak9C2YMHQ0VDYsMvQsSC4QK5glnCd8IWAjBBx0HowZgBn4GhgYmBocFvQT+AyIDOwI8ARwAH/8S/u38j/sg+rr4YPdV9oD1//Sn9HH0ZPSM9C71DvYE98v3VPjS+Cj5Vvkx+bj4Efhc9wP3Dvdd9+H3V/jr+KH5yfoj/Dz9OP76/sH/fgBHAQMCjAIzA8UDFwQuBGEE3AR6BTUG3wZxByEI9QjNCZ0KiwuEDDENdA1EDbUM/QtBC4IKnQmxCOAHIweBBtkFIwVnBMMDNwOeAgACRwFxAIH/fP6T/bj8CvxI+1D6P/kb+AX3/fUx9a/0YfRI9Df0K/RP9ML0fPVB9vL2f/fZ9x/4Sfhg+F34MPgK+Az4T/i5+EP58Pmy+qT7uPzc/QL/MQBgAWsCYANLBDkFHAbwBoYHxgfeBwMIVQicCNAI+Ag6CbYJPgqlCuYKPQu5CxwMPgwADHcL0gocCk0JVQhNB1QGZQWJBJMDgQJtAYkA4P9K/7L+9P0a/S38P/tw+s75WPnc+Cv4UPdq9p/1CvWw9J30qPS39Lj0rfTP9C/13fWd9kf36PeG+Dr56Pll+p36sPr3+oX7Jfys/AL9TP3F/Y3+m/+kAKYBjwJcAyoEAAXcBZoGOQelB9oHDQhECH4IqQjTCAAJMAl/CccJ/wlECqIK+wogCx8L7AqUCjQKvAkXCT8IRAcjBukExgOjAmQBEQDb/tn9CP1g/LD77fou+n753/hP+OT3h/cX9532FPZ+9ev0gvRT9Gb0uvQq9Zb1//V79gz3uPei+Lz57PoO/PH8iP3p/Tr+lP75/m7/z/8QAFUAqQAYAaUBNwLAAkgD7gOXBC0FqAX6BSoGUAZ9BpwGuwboBv4G/Ab4BgYHGQctB0cHSgdeB5wH4wcSCDUIXghsCFgIGwiOB7cGtgWiBH4DUAIaAbv/Xv5H/Yb8/fuF+wj7b/rQ+Tn5rPg0+Nv3k/cz97j2LPaZ9SD15/T49EP1vvVK9tf2b/cZ+Nf4x/n1+kj8k/2o/nz/IwDEAFIBtQH6ATsCfgLFAhYDagPTA1sE6QRUBakFCgZ1BtAG9gbhBqIGcgZXBjIG9wW1BXgFQwUVBdgEhgQ9BA8E9gPyAwgEPASDBMIE1ASnBFkEAQSTA/wCMwJMAVoAY/9X/j39OPx0+/X6nfpO+vn5pvlb+RD5zPic+If4g/hw+DH4zvdj9xX38fYA90n3xvdv+Cn54PmW+mD7Vvx3/aT+sv+PAEUB5AF8AgEDYgOiA9IDCgRVBKcE+gRHBZMF2wUfBlcGjgbCBuUG6wbNBpgGWAYUBs0FdwUTBacEQwTrA5gDPAPbAoACSAIwAjQCPwJKAkoCMwL4AY8B/wBbAK////5S/qf9/fxM/JP73fpK+u75xfmz+Zj5a/k6+RX5CfkP+SP5PvlY+WT5Wfk6+Rz5HflQ+bf5Rfrr+pz7VPwV/eD9u/6i/4wAZQEdArQCNgOnAwcETQR2BJQEuATzBEAFiwXCBd8F7gUCBh4GOAY/BioG/AW/BXsFKwXSBHEEBQSSAxoDqgJHAuwBkgE0AeAAoQB4AFwAQwArABUA8/+t/zn/pP4J/n/9Bv2f/Ef8+vuz+2P7C/vC+pr6lPqW+oL6Vvor+hf6K/pd+p762/oM+yj7M/s7+1D7evu5+xH8hPwT/bP9WP75/pz/RwD4AKIBOQK9AjkDsQMcBG4EpgTPBPYEIgVOBXcFmQWxBbkFrwWXBXwFaAVXBT0FDgXJBGsE+QN7A/kCfgIFAokBCAGUADoA///Y/67/fv9O/yr/Fv8B/9v+nv5I/t79bv0D/az8bvxL/DT8H/wE/OH7ufuc+5H7lPub+5X7hvt/+5X7zvsc/Gb8nPzB/OH8Af0g/Tj9UP11/bf9GP6M/gj/h/8GAJAAHwGuATQCrwIkA50DFwR/BMIE2wTgBOME7wT7BPcE3wS2BIwEbQRXBEkEPQQrBAwE0QN2AwYDlAIjArABMwGxADsA3/+b/17/If/l/rL+if5n/kz+N/4r/hr+9v28/Xn9Qv0h/Q/9Af32/Ov84fzP/LT8nfyb/LL81/z2/AX9Ef0p/VT9h/23/eP9Cf4r/kr+Yv5+/qD+zv4C/zv/g//c/0MAqwAMAWoBzAE7Aq0CHgONA/MDTwSVBLoEvwSzBKAEigRvBEwEJAT8A9cDswONA2sDSgMmA/ICpwJEAtUBYAHuAH8AEwCx/2D/Hf/h/qX+aP4w/gL+4P3G/bT9qf2l/aL9mP2I/XP9Yf1T/Uj9Pf07/Ub9Wv1v/YD9kf2q/cv98/0Y/jv+WP5z/o3+qv7S/gH/M/9f/4H/mv+w/8j/5/8SAFEAoQD8AFIBmQHRAQYCQwKGAs8CGgNiA54DxwPYA9oD1APMA8ADrwOXA4ADawNWAzsDFAPiAqoCbwIvAukBlQEzAc4AaQAMALf/av8l/+j+tf6M/mP+Ov4L/t39uf2k/Zf9k/2S/Y/9i/2D/Xr9cf1p/Wj9cf2H/av91f0A/iz+V/6N/sv+DP9E/2v/gf+R/6D/sv/F/9r/9f8XAD4AZQCCAJ0AvADoACcBcwHAAQICMAJRAmQCdAKDApQCpwK6AsoC1ALTAsUCsQKeApMCkQKRAosCfgJkAkQCHQLsAbMBawEbAcgAcAAeANL/if9C//r+uP59/kr+H/71/cn9mP1p/UH9Jv0d/Sf9PP1Z/XP9h/2S/Zj9m/2e/aj9u/3X/fz9K/5h/pz+2v4c/2H/pP/i/xQAOABJAEsARgBFAE4AYwCDAKYAyQDoAAIBHAE4AVoBgQGqAc0B5AHpAd8BzwHJAc8B3QHtAfYB9QHoAdgBygHDAcIBvwG4AaoBmAGDAWkBRwEbAeQAoQBYAA4Ay/+N/1b/JP/y/r7+if5S/h3+6P2y/Xz9Rf0R/er81PzW/O/8Gf1N/X39pf29/cn9z/3V/eT9AP4r/mD+mP7T/gr/Qv97/7b/9/80AGYAiACiAK8AuwDKAN4A/AAbATQBPQE0ASkBGwEfASwBNgFJAUwBQwEsAREB/gDmAOYA7QDuAO8A6ADUAMYAyADLAM0AyQC9ALwAqACKAHIAWQArAPj/u/94/1H/O/8a///+7v7R/qj+jf5p/kP+I/4N/vX9xf2z/Yn9tP2j/Zn++f6R/aL9jf2p/eL9qv2//dv9Q/5q/lD+jf7v/iX/df/1/2MAtADxAOcAFQEPATgBSAFjAcEBqgEGAg4CPQJzA9kC8wFRAcMAMQGaAJQARAAxAMoAqwBZADIAuv+0//z/BwC7AJAA2gDDAGYAjwAIADgAZQAmAF8AFwAIAPD/r/+j/x//cf8X//D+IP+B/tX+zf7t/of/K/8c/6f++/0s/gP+o/3Y/dX97P2p/lP+j/7F/rv+N//Y/tL+CP8W/0z/fP+e/6//2f9VAC4AgQDKAAgBYAFYAbcBYQGlAcIBcwGNAacBoQGYAX0BegFTAWMBngFtAZkBlgFIAeMA+gCxALQAhQCZAJYAVwCCAB8ADwDW/7v/k/8IAFX/x/+t/zn/2P8U/6P/TP+Q/4P/7v55/w//9P5z//j+M/9L/1X/kv/4/vn/3P5p/8j/zf7b/9r+cP9N//n+0P8k/6X/fv/5/ikAAf+i/2oAaP8GAdf/VwCuAPL/EgGuAD4BaAFhAZYBcAHZAX0C5QHDAVQCaQECArAB2QBMAYQAwACHAJQARwAdALgAegDCAPwAxwCeAMgAugBKAFgAQgBi/9L/Mv9+/2v/B/+b/6T+Rf/h/vf+Yf+u/r3/HP/5/n7/VP8s/wv/WP9X/oH/Av+S/oT/W/5N/4T/IP+S/yX/Yv9o/7j/FQCn/w0AHwB6/2UA8f/b/wABqP8WAbIANwA7AYMAfQBpAZ8B/gDFAQQBjwH7APwAyAHCAHQBbwHzAHUBGQHBACcB8wDXAFkAxgA6AKIAYQBDAKcAfv+UAMz/Uf8XAIL/wP9k/+3+s/9j/qT/+f5Q/r7/GP5C/8v+5P5A/z7+kv/A/hn/xf80/q//1f4o/wgARP4hALX/Rf9RAET/KwCE/zz/KwBV/y0Ab//8/2QAtv+3AGgAOgCeAFkAlADkAJcAEwGhAL4AawFqAHYBfAFXAEYC5ACWANQBogDoAKYA+QBIAbUA8gBIAEoA4QCu/6kAPgCv/5IAfv9rAHD/0f99AMD+8v+s//7+4v83/lX/Bv9r/SsAo/1z/qD/Ef5B/4T+Dv+4/+X+Ov/w/zr/7v+R/8r+vv9+/1T/+//0/uP/agBS/y0Ar/8AAPz/+P8LAOr/XQDuAHsALADyAOP/8gDyAAgBDQElABUBvAC5AIwA3gCGAHMAQQHaAJcAFQGgAPQAMQGpAPgAxABfAcYAjQCzACsA3wBYATsABADz/y8ANgCe/u7/Rf9n/q7/nP7M/jr/Ef91/0/+yf4j/5L+bP/C/vn+Dv/a/of/z/6p/rb/R/+R/+7/vf/n/1v/EwCT/77/1f/J/9T/1f+NAPL/sQB5ABgAhQBqAPAA2gCIAOgA4gCiANcA9wAwAZ0AYQBWAXsAQwDjANv/NQD7ALoAmAAnAIMAtgDyAPMAkgDaACgAbwDH/4n/pACU/2v/g//R/hoADwCC//b/Z/97/4D/mv+D/3z/uf8r/zT/kv+0/qf/T/8Y/9H/HP6J/4j/MP8IAI3/U//i/if/Ev9n/0j/BgDc/7/+gf/I/1AATQA8AMT/0v+PAEQA8gD0AKIA8wBhAPwAkgGNAWoBnwB3AZsAygAwAm4AcQEsARkA9wDp/9MAAAETAMYAjQC0/xEAWgA0AIgAdQD9/6X/m/9GADMAqP/i/2j/Vv+T/0//6P+q/2n/JwC+/uD+vf8q/2r/kf/l/u3+7/46/9//A/8G/0//Ef/A/rv/yP8i/+//yf8U/2f//P9iAA0ADAAwAEL/bABQAFIArADM/6cACgAGANUAhgBWAMUAqgAlAM7/qACZAAkA5QDo/08AmACbAK0AQADyAEoAUACDAP7/zwCXAGgArACZ/1sAtwAcAF8Ay//8/zwA5P8QAFv/0v8AAFP/a/+8/hD/bP+m/20Aqv8u/5f/g/9v/x0Ahf+6/xsAHf8FAJz/jP8zANX/c/93/+n/SgBKAOb/7v94/6r/aABmABAAyv/+/1QAGQD6/1kADAB2AMAAgACEAPz/fABeAHoAFwBAAF0Arf+KAGsAXwB0AJgAnABkAHwAZAA4AFoAdgDs/7f/LQDO//L/HwC8/9r/0P/s/73/t//F/9r/sf8Y/7b/dP8h/wQAlP+T/4f/Mf+c/8T/CwADAJf/Rf83/1gANAC7/zkA0f+Z/+r/oP9i/6H/NP/b/7D/Rv/H/7n/+P/9//n/TgDk/zYAiQAVAHcAMwArAEwAFgAfAHgAMAARAKQAKQCCALwARwBpAMP/7f9uAPD/bADFAPr/AwC6/5v/RgDh/+b/EwAv/xn/3P+F/63/yP/X/0AA/v9XAHMAdgDNACgB/QD8AOIAGgH/AWcBcwFeAcMAnQGUAaABiAF2ANcAigCOAM0ARQCwAFoAxv9B/9r+Gf+k/1v/8P5p/mT+q/6v/vz+NP5R/nP+bv67/qz+mv60/gz/ev4v/mb+XP6a/mL+9P0J/pj9hf3J/fr9Hf6z/Qv+bP5h/rH+e/9j/63/rACRAAMBaQEAAp8C+QIdA08DtwOmAxwEPwTtA/ID+wOZAygDKgPwAisCUwLfAQUBKAFdANP/uP8W/zf/Rf+e/sj+rP6r/nT/h/9y//v/NABQAKMARgCBAIkAkgAYAb8AcgCQAFUAyv9q//D+T/6b/ZP9r/zO+2j7yPoK+/P6Zvpz+nX6OvrT+sf63/pJ+9v78vxV/dL9k/6z/wkBcwI6A4EDGAR2BFsFygXBBeQFNwU2BT8FrARQBJ4DPgMDA1YCuwFnAf0AqADKAGQA6f+//yMArQDwAD4BIwFtAXoBrgHmAfEBPwIKAtABgQEHARQBDgHPAJwA4f9+/zr/+v6I/uP9mf09/eD8wvyZ/Dv8PfwE/C780vsM+/36wvrT+mf6Bvp7+Wf5GfqO+qT7ufs//I/9OP6a/0QAfgBJAb0B3gKDA84D5gT8BOgE4wQNBVQFBwUwBQUFdQQ0BBIEKQTGA38DlwMcA/ECtgKIAmwCMQKHAkwCzwGhAWEBWwEDAeoA4gAEAMj/6v+M/z7/BP/A/oL+Rv5i/j3+HP4w/k3+cP4X/mb+aP47/pL+W/72/Sr9evzJ+/D6cPov+rX57/hb+B/4/PcJ+O34jPnH+VH6IvtD/KT94/7Q/58AkwGyAsEDcATrBH0FrwUcBpIGxwYpB1UHkAeAB0sHEweeBq4GLQZCBk8GigVaBbgEaQRJBMIDpwNSA1gCowH8AFUA6P9R/7T+F/6+/aT9qP2J/Tb9T/0Y/QX9OP2M/Rr+Qv7P/sP+r/4Q/wX/F/+o/tP9Bv3O+8f66PkQ+Ub4dPft9gv2kfWM9YP1G/aS9jj3Lfg1+eX6d/zi/Vb/xgAdAmkDrgSCBUIGlQYEB3oHuwdFCFgIXAhQCOkH1we9B64HswdeB08HMQcHBwgH2QaQBuYFfwUHBQ8EmgPOAv8BSgFwAOv/N//U/pP+V/7t/aX9gf1h/Z/9hv1h/Wv9Qv2u/SH+J/6h/oL++f3J/Rf9cPzD+6f6x/mW+Ej3nfZD9qP1O/W19GH0mPTn9Bj2Fvf+95P5FPuV/CT+mP8zAXsCvgP6BE0FoAXYBRYGaAagBhoHEgdLB8kHDQhOCFgIeAigCJUIsgjOCLMI1QjLCIcIAAhgB8kGCwY5BesDlgKAAaUALAC4/03/wv5g/lX+Hf4W/uz9y/3H/X39Zf0H/QT9Xv1S/UP99/yK/BX8nftI+3n6g/mR+If3c/Ze9az0ePRR9B70IPTO8/zzpfSj9Sr3f/jn+Tb7rvxd/h8AwwHmAtEDZwTaBGUFxQU4BmYGowYpB5cHWAjACBkJxQk3CqwKGQsTCw0LBgvqChILkwryCT0JagjRB5sGjQVrBLsCjwHKAEcAHgCs/1//C/+H/lv+Vf4m/rX9Lf23/GD86/vb++n70/v7+937m/s9+4f6/flP+YD40/fa9vT1KPV09Dv0+/PW8wr0AfRH9J70QvVc9n/3+/hi+vn7Yf20/gYAJAE/AjsDBgSOBDYFkgUSBvwG5QfhCMYJgQoQC34L5gtXDLoM6QzQDL8MjwweDIkLxQq4CXsIYgdNBioF4gOiAqYBvAAhAOH/2/+t/1n/I/+k/iD+uP1u/Q79ovxX/PD7rfuK+3X7Vvsn+wz7s/oQ+mn5ofgO+IP3Cvel9v31evUD9Xv0PPTz88PzxfOf8//zePQ19Xv2//eU+S37zPxd/gYAYwGfAogDXwSNBZwGugewCGwJJAq0CpYLdwwQDXsNyA31De8N2w2gDWUNKA3zDGMMfwtuCv4IugeBBk8FTwQoAwMC8QATAKL/Jv/j/rn+Yv76/Wf93/w+/Lf7Qfv++tr6pvqJ+kL6I/oS+gb66vmJ+Rb5mfge+JL3G/eN9tr1SvW69DL0yvNy81/zavN987vzLvT29Pf1OveR+BH6tftw/TD/rQAJAhcDRwSfBc4GAwgECdsJlAozCycMFQ0IDgAPZg+ED3UPMg/zDrQORA6lDeAMAgwLC/MJxwigB2kGEQXWA5QCQgE1AGH/tf4M/nz9CP13/P37s/tS+8n6Ufrl+Zv5VPk0+TD5E/kX+T/5aflk+T759vh0+Mr3PPfB9lj29/We9Xj1HPXe9ML0nvSm9K/0CPVs9RH2A/cs+Kv5HPvM/GL+qv/YABgCjAPeBDYGhAeiCNYJ7AriC+gM2g3lDswPMBBKECAQGxD3D4sPGg9IDpENvwzvCxwL7AnWCIUHHQawBCIDrgFUACP/FP43/XT8rPv4+lb6u/lA+Y742fd19xH36vYG9zH3jPfp91L4rvjR+Mf4kfhZ+Nf3Wffn9nz2XfY29vH1rvWR9YD1qPXW9cf1sPWJ9cb1jPZf94f48vl3+/j8aP7v/z0BiwIIBJEFDAdaCLoJQgumDAQOaQ9kEBERpBHxEQYS2BFqEd8QPRCID/UOTA5rDYQMXgtJCgUJsQdGBnkE5gI7AdT/tf6O/aX8tvvN+uT5/vgq+Eb3fPbK9Vr1SfVJ9ZL17/VW9uX2LPeG95T3WfdW9wH3uvZd9vz1+/XX9c/1vvWa9Z71qfWt9Yz1cfVm9cD1evZ399D4LPrQ+1P9xv5HAGcBvAJCBN4FhgcHCX4K7Qs/DaAOwQ9uEAURchGpEZQRMhHIEDYQoA8sD5QO8w0+DYIMsQvACqUJTwjrBmkF5wN4AgsBv/9z/jn9CPzA+p/5rPjS9+v2CfZT9eX0s/Ta9CH1T/Ww9Sb2k/bT9t320vab9kz2Lfbs9cD1wPWz9bv1w/XY9Qv2JvYy9j72MPY49mn2LPcu+Gf56vpH/Pv9ff/9AHsChwMEBYAGGwivCfwKYQxjDZAOtQ+CEBIRHREXEe8QoRBTEMsPNw+tDk0O/Q1+DRMNcQycC5cKYQkQCHYG5ARNA7oBRADW/pv9Pfz2+t/5sviv96b2xPUP9YD0TvRF9Ez0jvT39GL13vUp9mX2aPYb9tv1e/Uz9QL10fTa9N308PQd9Un1kfXv9Sj2UPaM9u72wve1+Or5P/uj/Eb+8f+lARcDZQSxBRoHqwgkCnQLegx+DZkOsg+1EFERhRGLEXMRShEFEYUQ5g8/D7sOUg7XDUANXgxbCzsK/Qi2Bz4GqgQZA34BCwDX/pX9ZvxS+yz6NPlJ+Fj3cvaP9fX0b/Qr9Cb0PPSZ9Pr0WvWM9ZL1gPVZ9Rz1t/Rn9BT05PMH9CL0XfTF9Cr1t/Uf9mT2oPbT9lj3E/gL+TL6dfv//Kz+eAAsAr4DTAXWBmgI4gkiCz8MSw1tDqgPtRCXEQ4SNRI6EgcSsxEWEVAQew+yDhUOiQ0BDXcMyQvmCuMJmwg7B8cFLQSnAgIBlv9p/lv9g/x4+436m/ma+MD3q/bJ9Qn1hfRa9Cv0NfQ89G/0wvTi9AH1zvSp9H/0TPRK9Cb0G/QP9Br0SvSK9Pv0k/Uo9pX24PYU91b3t/dh+F35jfoA/MD9of+eAWgDHgW2BiUIrwkGC2EMjA2qDuEP7RDhEYYS8BIPE+kSdRLBEdYQuQ/CDt8NLw2dDOULMQtfCocJrwiRBzcGogTrAk4Bzf9o/jb9Gfwf+0D6avmh+NT3/fY49nj1zvRJ9O3z4/Pv8yb0ffTK9Av1P/VM9Tb18fSO9FD0AvTs8w70JfSF9Pv0m/VI9qz2D/dD94f3EPjN+Lz50/ou/Nn9n/9+AVsD6QRxBtsHRwmpCtQLFQ0ZDi8PRxA8EQgScBLBEq0SThLSERARSRBQD0IObA0vDFYLygrhCQUJ3wfPBoIF+ANzAtYAT//L/Yz8f/ti+nL5pPj591j3tvYa9mP1yvRM9AL0zPO388zzGPR59Mb0EvUh9Rz1AfXV9Kj0gfSJ9KX04/RO9cP1Y/YM97/3ZPii+OP4HfmS+UT6Ffst/E/91v6oAI4CVwTEBSIHewjdCUwLbgxKDSoOLA9WEGcRFBJ5EpASfRJSEusRPhFREFwPYg5hDWgMWws7CiEJ6we0Bk8FwAMlAm0AvP4t/cf7mvqz+dD4/Pc695/2H/aS9fn0a/T286jzkvOR857zvvP880P0qvQT9VT1efWX9bX1xvXF9fL1UPa39lH39feg+En57/mA+tn6IPtm+5j75ftu/DH9D/4K/0cAnAENA3wE7QUwB0QIdgmUCroLswykDZUOfQ+BEEgR1hEREhoS9BGIEeoQ+g/bDrINnAytC58KbwkVCKAGQwXJA0MCgQCj/t/8KPup+Tz4BPf09Rv1ifQi9Nvzi/ND8/vywvKr8qPyu/Lt8krz3POI9EH17fWT9i33s/cv+Jz4Cflx+eb5bPoL+8n7gPw0/cf9Q/6v/uX+I/9E/1n/jP+9/ykArABSASkC9QLPA7YEnAWPBmkHOwgXCegJzgqzC40MUg3wDX8O/w5SD4UPhw9JD+wOVw6XDaQMZwseCrwIWgf2BXQEAgN8AQMAkf7+/Gv7vfkc+Kf2XPVa9HXzwvI68t3xwPG08cDxzfHo8THykvIc877zavQ19Qb27/bu9/X4EPoZ+xv8Ef30/d7+pP9OAM4AJwGFAdQBJwJtAqEC5AIkA28DsQPbA/oDCAQdBDQERgReBIIEvwQlBa0FQQbcBm0H+Ad1COgISgmRCb8J4AkMCj4KdgqZCosKUArrCVkJmAiqB5QGWAUPBMgCfgEmAM3+hf09/AT73fm9+Iv3avZs9Xn0sPMJ84vyNfIK8iryavLO8k3z1vNj9PH0o/Vj9jj3Kfgj+TP6X/ux/AX+Sv9wAF0BLQLsAqEDNgSpBA0FawXNBScGeAazBsAGvAatBpQGbwY5BvsFwgWdBX0FagV2BY0FqAXEBeMFAwYOBg8GAgbsBdMFxAXTBdwF1gXKBbEFfwUhBZgE8ANBA5UC5QEdATAAMv8r/jL9QvxM+0b6PPlW+JD34vZG9rz1U/X/9NH0vfS59Mv09/RO9b/1PvbF9lr3Evjh+L/5pfqY+5/8rf3B/s//xACnAYICXAMnBNsEfgUaBq4GJwd5B6QHrgelB5EHfAdZBx8H1waWBmIGNAYBBsgFjgVXBSgFAAXWBJ4EXwQiBO0DwgOcA38DYgM9Aw4DzwKGAjUC2gF6ARABoAAhAJX//v5d/rD9+Pw7/H37xvob+nj54vhf+PL3l/dK9w/35fbQ9sv23fYB9zP3b/e19wr4dPj6+J35Wvox+xL8/Pzi/cr+rP+KAGQBLQL2ArMDcgQnBcYFWQbPBjwHiQe4B9AHzAfDB6IHdAcvB9sGiQYpBtoFiAU5BewEkgQ1BMgDWAPoAnsCIgLZAacBhQFoAVABMAENAd4AqgBtACIAy/9l//3+l/42/tz9ff0c/a/8Ofy7+z77y/pk+hL6yvmL+VL5IPn++OX43vji+O/4Bvkg+UT5c/mw+QT6afrp+n/7LPzt/Ln9lf5y/0sAGgHZAY8CNgPaA3wEEwWkBR8GiQbfBh8HTAdgB1oHPAcFB78GcQYdBsUFagUNBa8ETgTpA3kD/wJ/Av8BhgEbAcIAeQA7AAcA2v+u/3//Tf8W/9n+m/5f/if++P3L/aD9d/1M/R397fy4/If8V/wq/P370fuj+3f7TPsi+/z62/rE+rr6vPrK+uD69foM+yr7XPum+wz8jvwk/cX9af4M/6r/QADOAFsB6gF+AhkDtgNLBNMERwWmBfAFJgZJBlwGYAZTBj4GIAb6Bc0FlQVTBQYFqgRDBNIDVAPQAkYCvgE/AcwAZwAMALj/Z/8b/9L+jf5K/gn+yP2N/Vv9Mv0U/QT9AP0C/Qf9Ev0c/R/9Ff0A/dv8r/yG/Gf8U/xO/Fj8Zfxt/HD8cPxt/G38cvyA/Jr8wfz8/Er9p/0O/nv+6f5S/7b/GQB5ANMALQGHAeIBPQKfAgcDcQPdA0kErAQEBUsFggWiBa4FpAWJBWMFMgUABckEkARMBP4DnQMnA6ECEAJ5AeoAaAD1/5b/Qv/4/rj+ev49/gb+y/2R/Vv9Jf30/Mz8rPyd/J38qvzB/OH8AP0Z/S/9Pv1I/VH9Xf1o/Xn9i/2f/bT9xP3S/eP99v0O/i/+WP5//qr+1f74/hj/O/9h/4//yf8PAFwArgABAVQBpAHyAT4CkgLnAkEDmwPvAzoEdAScBK8EsAShBIIEYwQ/BBsE9wPPA6ADZQMdA8ICVgLeAV8B3wBnAPX/jf8t/9b+h/45/u/9pv1g/SP98vzN/LX8pfyf/J/8ofyk/Kn8sfy+/NT89Pwa/Uj9ef2r/d39Cf42/mD+hf6r/tf+Bf81/2H/if+r/8T/1//l/+z/9P///xEAKgBMAHcAqADgAB0BWwGWAc4BBAI4Am4CowLWAgIDJAM/A1cDZgNvA24DZANXA0kDOQMtAx0DBAPgAqwCaAITArMBUQHxAJUAOADY/3P/EP+x/lv+Df7D/Xf9Lf3n/Kr8gPxm/GH8afx3/In8nfy1/NP89fwV/TH9S/1s/ZT9y/0R/mD+s/4F/1L/mP/V/wUALABJAGEAegCVALIAzwDqAP8ADgEZAR4BIQEjASYBLQE6AVEBcwGeAckB7wENAh8CJgInAiYCJgImAigCMAI+AksCXAJqAnACbAJfAkYCIwL1AcMBigFLAQkBwQB3ACoA2P+G/zD/2/6L/j7+9f2x/XP9Ov0G/d/8wfyx/Kz8svy8/Mv82/zo/Pn8C/0h/UH9a/2f/eH9K/56/s3+Hf9s/7L/7/8iAFEAegCfAMQA6QAOATUBWgF8AZ0BtwHIAdABzwHJAcEBuQG1AbcBvAHDAcwB0gHSAc8BwgGwAZoBggFuAWIBWwFZAVsBYgFnAWUBYAFRATgBFwHvAMEAjwBaACYA9P/D/5f/b/9G/xz/8f7B/o/+XP4o/vj9zv2q/ZD9gP11/Xb9ev2A/Yz9lf2f/a79u/3N/ej9Bf4w/mb+ov7n/jD/eP/B/wcAQgB9AK0A1gACAScBTAF4AaEBxwHvAQkCGgIpAigCHQIRAv0B5AHOAbUBpQGaAYoBgQFxAVcBRAEtARMBAgHqANAAvQCpAJkAjwCAAGsAWgBKAC8AFQD9/97/w/+c/2L/Nv8V//X+7/7o/sj+qf6O/mP+Qv4p/iD+Iv4Y/hL+E/4Y/hv+Ff4q/iv+Hf48/lT+a/6f/tX+D/9l/8D/SgCgAGsBpgLNAnsC7AGbACr/U/61/Xz9Tv6Q/6cAzQG/Ar8CfAIvArQBlgGmAbAC3QOuA9oCBAKyAB8A5QCDASUDWwWsBocG8gTwAaX+XPxd+rP50/p0/Av+n/+lANMAXwBO/+39Gf3z/CH9Cf4z/x8AEgFgARUBiwDJ/wz/gv6N/un+d//u/+7/c/9Y/ir9qvyP/Av9c/4DAHQBrAJIA84ChwE3AIf+yfzG+4H7+PsH/Yv+HQAvAaYBZAGoAPD/Wv9X/xYAUAHUAnAEtwX/BZEFqAQ4A9cB6QBvAFsAwgA/AZEBdQHkAOD/lP6b/T39sP3M/nYA8AEBA2QDlgLwABD/Z/1j/GD8Jf2E/iwAmwFNAkcCkgEZAMX+H/6O/YX9JP6+/nf//v/V/y3/nf7//Ur9Jf06/X/9Rv7x/pb/FwAjAPH/lP8U/57+if6Z/sL+UP/e/xQAcABgAMb/Jf8m/kv97PwV/a/91v5WAJgB3QLsA1oEcgRRBBMEOAR+BPgEkgWwBZgFdwXvBKwDVwJAAQoAUf88/07/d/9a//3+fv6s/dj8fvyV/Pj8Df6I/5YATwEEAjEC0AFkAb0A+v9T/5v+9f2b/W/9Pf0//Uz9Bf3R/IH8yfsm+7z6SvoT+vj5+Plb+o/6wfoC+xL7Q/sI/G39R//FAVIEWwb2Bx8JqQnOCc8Jgwn5CDoIIwepBfQD8AHd/x/+hPwf+yb6iflN+YX5Jvr++uz78PwG/jv/tgBqAicE3AVYB60I6wmWCnQKyglKCHYG1AQaA5QBlADd/yH/hv6p/WT8FvvB+bL4Yfix+Hr5xPr9++j8s/35/cH9gP07/db8q/yu/Kn8wfy9/Ej8Vvvp+TH4zPbN9Rb1KPXE9jz6vv4XA4UGOglECzMMfQx5DMYL2wplCh8KcAnGB0EFHwKv/sb7n/kw+GT3fPef+Bf6n/sl/VX+Sv96AEECQwQNBsAHFgkJCqoKwApOChEJEgfxBBoDlwEmAO3+2/3B/AP8avuq+s/51vhv+Mz4UflI+q371fzp/T7/TQDbAPgAhwDJ/+D+q/08/JD6bvht9uT0t/Pc8lfyavOi9vX6VP8zA4QGCAmUCokL8QtTCzUKQAmWCKIHAgbiAxoBPP4v/OT64vlF+VH5Efou+6P8Y/4GAKoBvAN8BksJrwt2DXYOoA7VDTMMrAlpBvICEgAI/tL8Xfw7/Cn87/uz+1j7x/ol+uT5VfpV+8j8Y/74/y8BrAGpAXIBzQCs/5v+yf0Q/VD8hvs7+jH4v/U38x3xke8L7xDvYPBo9Cb6IQALBdcIwAtdDboNqA3JDCAL2AkBCUwIwQYTBMoAUv1N+hL4v/ZQ9mL2pfdw+rr9JgEeBMYGNQkRCwENdw7gDvQOiA5ZDUILDghkBFEAHPzS+FL3kffJ+Mz6Nv04/7MAtwEWAsABKQHtAOYA8wA/AXQBEQEmAPz+ev3B+wX6Tfju9tD1L/Ue9Tb1RvXh9G/0HPQr9MT0wPX296P7GACjBJMIGAsQDAIL3wieBlsDHgCO/eT7Ifvk+nf7QPzA/LP9gf+ZASQEIAd4CnANeQ8fEYsRShD6DTELHwjRBLABAf+E/Ez6CfmC+I/4K/l4+un8AgCwA6YHswpQDIEMmwuCCRQGDwI1/qz6HPjj9rn2Ufds+EP6RvzW/R3/0v/b/0P/Vv5g/fL77vmf9/n0PfLq713upO047RXuD/GS9fD6ZwB8BT0JXAszDOsLNgpoB5IERALGANb/gf9G/5z+JP6N/rz/ewHKA38GfAlMDOAOlxCwEHcPdQ0KCxMI5QQCAl3/Tv0U/K370Pst/P/8L/7D/+QBQgRHBp0HKQjYB24GEQQ7AXL+Hvxu+rH5kPkM+h37hfzo/a/+qf4y/kP93fs/+rH4gfc09gf1A/TZ8p3xw/B18MTwB/Lg9Gz5Wf6OAz8IfQucDM8LsAkdBugB0/3z+ln5F/nw+Uz77vyh/r8ARgMNBtAIvwuYDjsRPxMWFKwTvBG8DiULTwdCA3X/ZPw4+gj5lvgj+X/6b/y9/l4BggS0B4QKpwyZDecMzwp/B6EDkf/I+7f4P/a79Gn0RvWB9qX3lfik+b76uPuT/Ob8x/x6/Cb8TfuN+SL3u/Sq8hPxBPA/8B/yqfWN+tX/mgSiB8AIEwg8BnsDOAAl/Rf7gvo/+8H8RP68/y8BFAN5BRsIuAo9DWsPTBGSEn4SERFXDugKVge2A4cA5f0d/Jj79/sA/Tz+sf9WATsDZgXDBw8KDAyEDaANHAzDCCkEIv9Q+mz2uPMC8pHxtfL59KT3CvoJ/Ir9pv6I/zYANACZ/9X+1f1C/LH5YvbK8lDvtOwq6y3rBO3T8Gr2k/x2AtkGUgmbCTsIqAVQAv3+WvxH+177iPwQ/qP/UwFJA6QFSgj8CoMN4Q+3EewSzBI5EXQOAQtgB7gDdQDM/Qf8Ufum+8P8Uf5XALsCewUnCH4KlQz3DZAO1Q1XCxUHrQEm/AH33PLo73juqO6c8Cn0j/jV/FAAAQPMBNcF4wXhBAgDVwA2/aD5e/XH8PrrGugq5SHk3+Sf59rsa/NW+zgD3Al1DpgQVBDTDeoJXAXwADT9sPqG+Ub5ifmF+h/8Xv6QATYFOwlVDeUQxRNBFQMViBP3EJ8NAwpJBs0CAwAv/iz9k/xm/C39mv6HAPMCTgV6B0kJ4QrWCxoLiQjpBLYAlvwV+Wj2uPTi83r0nPaI+X38E/8QAV8CzQJcAhoB7P4O/On4vPVZ8uTurOvP6Kjmy+W75jvph+3J84L7bAOMCh4Q0xJhEh4POgo+BDb+Hfmy9QH0/fNb9UX3gPpI/twC+QfbDIQRWRUNGEEZyxj5FigULhCdC+oGPAIz/ij7DPnd96H32vhR+4H+9gFOBY0IZQvcDWIPVQ9KDbMJRgWoAEb8TfhE9QvzTvJa86z1ovh3+wf+LAB2AaABwADG/iD8S/lf9ovzpPDM7XnrhelB6D3o++mJ7Y3y6fjj/1wGeAt7DrkOVQw7CEQDC/5g+Rf2nvSx9B32qPg3/IIAEwUCCuMOIxNrFnIYABn9F6QVbRJyDgcKsAW2AX3+HvyE+rj5tPmX+oP8cv/WAiUGPwkGDEUOaw9uD7cNLwq4BQkBvPzm+Kb1RfMk8mfyDfSa9nj5Qfyv/tUA+AH2ARYBSf/I/Kv5Tvb28pvvc+zg6SvovOfe6KjrIfCY9ZX7kwGgBuIJywq7CVYH4QPT/+37FvnP9+b3Avnj+n790wDTBEwJyw3IEQEVMxfmFwIXvBSPEegN6gn7BWsCrf/x/fH8evxa/O/8WP6LAB4D0QWBCOcKIw2GDsQOXA1fCpMGSQL+/e/5fPbx847yavKE86D1Rvgk+7395v8tAZMBQAEnADP+Ufv491D0rvBA7T3q7OfS5m3nxOmz7dTy3/gZ/6MEeAgmCuQJGwhQBd4BRP4h+zb5v/hY+Yj6NvzT/lsCoAZVC9UPmxNTFsQXkxfPFdYSXw+0C/IHZgR4AW//Ev4//dv8Ef0Z/gEArwKrBYQICQtHDfoOYA9gDvoLBwhXA+D+tfoG9yr0VfKg8S/y1/M49iP5DPyw/sUAFAJ9AuUBVADL/XX6dPZV8mTujOqq5wzmd+Yr6ALrSvBI9o38IgIwBm4I0AgBCOwF/QLN/0v95Puh+wP85Pwo/uX/fAKYBQIJSwx7DxgSfBO4E+ESOxH3DnMMqwn3BscEyAJwAY0AvP+c/y4AhQFhA1IFawdjCRALXQz1DGYMkQpzB80DGQBf/Az5RfY19AzzF/MV9OL1SPj6+oz9jP/0AI0BRQHs/4r9P/p/9onyr+4O6yLoHua15Unn/Ont7lL10vt3AWkF6Qe9CEsIxgYhBD4BB/+z/R399Pwn/df9Lv8vAcsDqga7CdMMjQ94EWMSjxIuEhIRPA/oDCQKbgcrBXED4gGHAPH/MwBEAdkCsQSiBoEIMApqC8ILBQs/Cb0GEAR2Ac7+Tfz/+QD4r/bw9cv1QvZ99z75APuV/Or9of6P/t79Mfy5+Yj2APNf7+brEOkh5+nmJejl6ozvfPWL+80AvgRABz0IGQjyBo0EEgL+/7T+4v1e/TH9Yv1F/vX/YgIkBXsI2wv5DmYR2xKNE3MTexKoEEcOXAtmCI8F7AKTAOH+If5q/sH/wAEVBF4GpgiYCs4LCgxBC34JRAfuBGgCHQDo/e77Sfr7+Br4a/cM9zb32/e4+Kn5tPqK+/f7+fs8+8z5VPc/9PzwzO076xrpI+jY6M/rk/AM9lT7av+RAt4EJAb5BZoElALEAND/W/9L/0H/Y/+6/3gAxQFyA8QFiAiCCzUOgBBbEnsTwRMhE5ARKQ9MDCYJ/AXNAur//f1W/eD9Pv9tARQEtAZLCZALsgytDKwL+QkCCL8FggNQAQr/1fzX+gz5gfdK9o71hPUP9g/3Svig+bb6Svts++D6iPl69930//Es75XsWepA6UbqPu2G8S32bPoX/vMA4wLMA2AD2wFgAI7/dv///7MAZwEXAtkC5AMYBWsG7geiCW4LGQ2YDtkPnBCeEPIPdw5qDDAKvQctBaoCpgCR/3f/JACqAeEDKwZ2CJQK+wsmDDQLqwmzB8gFIQRgAnwA2/55/fn7avrJ+Dr3C/ZO9QX1XfUz9kv3d/hn+f75Cfp5+Sz4JvaR87Hwtu1L64bqj+sq7ozxMvXe+D38Gv8TAdgBagG/AIQA9AC8AYUCbwMpBLkEZAULBpIGMQcHCN8I6AkjC2oMkA1EDmYO4w3sDGILlwmqB4oFpANbAvQBNAIeA40ETQY9CP4JCgv+Cv8JQAguBhEEFAJuAC//gf4g/r39N/1z/Hn7VPoJ+cz37PZj9hX2/PUk9oT21fbm9pn2yfVa9HTySvBH7mLt1e087zbxr/O49ur5nvyD/oz/pv+Y/7L/9P+IAGcBtAIUBEMFNgbnBjAHSAc4Bz4HpwdGCFMJlwrBC6AMAQ3ODCIMxwr5CDsHfwU2BIUDXAPYAwUFpAZuCPEJtQqDCncJ6QcPBiUEMwKCAET/lP5j/mP+K/6h/en89fvL+nn5WPhV90L2TvWa9C/0FPQt9Br0BfTP8wbzw/HD8KrwQPEm8nXzUvVw97b58fuF/TH+b/6p/sL+2v58/8IAJwKcAzoFrAaiBygIWQhECBII4gftBzIIswhSCd8JTgpeCvAJIQkkCAEH7wU3BeEE8ARkBUMGfQfkCB4KlQoNCsIIIwdrBYMDnwH//+b+kP7w/pL/2/+8/0T/Yf42/e37a/qI+J727PSY86Hy+vGz8bzxzPGL8S3xFvFu8SzyNPM89E31y/bF+KP64PuV/BP9gf2p/Yj9df3l/fv+gABJAgsEmgXnBu8HlgjrCB0JMgkzCUQJjgn3CVoKdAoLCkIJUgg0B+sF0gQzBAYESAQiBYMGMgjoCUgL2QteCwQKIQgFBswDpAHH/4D+/f04/u7+mf/V/53/Gf9I/iT9o/u/+a/33PVo9BXzyPHW8I3wsPDI8LjwAvEN8qfzKvVC9kb3mPhE+r77l/zO/NT8Af0Z/QT95fwf/fH9bv9pAWgDNgXOBjsIPgnMCQoK/Qm5CVAJFAn/CO8IqggNCCYHCAYABTAEswOGA5cDEgQlBdcG4gjJChwMhQwODNwKJgkeB+gEiAIqAFX+ev15/d39XP65/tL+q/5n/ub90Pw5+275v/cf9pr0MfPU8cPwB/CI7xzvWO998DvyD/SV9Sj39fgh++v85f1O/oz+3/7x/tX+bf4F/iL+H//DAFECswMFBWwGxwcFCQEKgwqnCpYKcQpDCigKzwnkCHoH3gVWBCsDfwIYAvIBUAKVA64FDAggCpELYQx6DPYL0gr6CJ4GDwSxAZj/Ff5W/Rr9E/0B/ez84fz0/OX8Ufw3++n5yfir91T20PRz813yWfF28OzvFPDs8Dzyq/MX9dX2GPl0+wr9w/0i/o3++v5E/13/M/8N/1D/IgAcAQAC1AKtA7sE/wVPB2AIPAn/CbAKFQsUC9YKTgpKCbEH1AUpBPQCNgLgAd4BUAKWA7EFBwjdCRML1ws2DPoLBgtoCUIHzgRkAlYApv5U/V380ft1+yb7DPsy+1r7M/vZ+mr65/kx+Sb41/Z59Sj0yPJq8WbwBPBP8CTxT/LU89z1jvhL+1f9q/6l/4AADgFVAUcB9wC7AOIAWwHCAQwCSgKhAjcDGgQpBSoGLAdPCH0JcQoUC1YLBwsaCqUI3QYDBXYDYAKtAVkBpAHOAroE6gbDCBgK/wqJC5ML5QpuCXcHcAWGA7sBDwCT/kz9Rfx0+8D6OPoA+vf5z/mY+Xz5Xfnt+Dz4cfd99kT1y/Ng8kLxsPC58C/xAvJe84X1S/j1+gT9mv7w/w4B0QEqAgcCoAGgARgChAKWAngCVgJtAvwCvwNUBNMElAW8BhUIQQkHCm4KkAoyChsJkgfsBVkE/ALYASIBMQETApADaAU7B54IjQlVCqwKCwqyCCoHlwXyA0oCxwB9/zr+9vzd+xP7ePru+YD5IPnh+Pf4GfnW+Db4a/eF9oX1R/Tk8q/x+vDd8DfxD/Jd80f13feq+i39Mf+3AOkB6wJwA20DHQPIAroC1ALdAqECVAJPApoCHAOYA/wDmgSvBfIGPwhZCSMKngqYChAK8whrB9AFXAQSAwUCbwGFAVQCiwPrBCcGOwc8CMQIvQgGCM0GlQWBBIEDYAL/AJH/QP7Q/Hb7P/oI+RP4W/cL9xP3EPcK9+/2xPaG9t71vfSP86HyQvJI8kfyp/Km85D1Lvi6+u78s/5fAP8BTAP5A/YDnQOSA90D2QOMAwwDngJzApEC1wLmAgQDfwNxBK8F9wYsCDkJAgpzCmAKewkhCLIGTQXkA5AC1gHBASYC8wLxA+YE2AXRBpwHzgdSB4YGvAUmBXUEdAM1AtcAev8Q/rj8PPuU+Rj46vY69vf15/XU9cH16/Uq9gz2ZPWm9P/zc/MF87zy6/KV8xb1QveR+dv71P3L/64BLAMvBJgEygQJBUUFUAUBBWYEygNgAycDAQPBApoC1wKHA5sEzwXoBvAH0QiUCf4JyQkdCfAHiwYrBe8DCAN3Am4C5AKDAycE0gRtBb0FngUuBZkEBASoA3sDOAOtAs8B0gC1/2T+vvzN+vf4l/e29hf2o/Vv9Xb1p/Xy9Rb2svXg9Cr02POk81PzPvOr88n0lPah+J36Xfwl/h0A3QFGAzsE1ARUBecFiAbUBqQGQwboBYYFCgVXBI0DBAMCA6QDlgR9BWoGUQcjCMsICgnXCB8IDwf7BfMEHgSVA1cDYwOrAxMEfAS2BLwEoQQjBHoD1QJCAvoBsQF6ATsBpwAKAFz/ef4a/Uf7pPlx+KD3C/fD9qP2iPaf9qn2efbB9bb01fMJ86zyovLL8lzzRPT29S/4Z/p+/C3+yf9wAQMDUgQnBcEFUgYdB/cHfgh4COMHJQdpBsMFJQWJBEMEZwQDBdwFqAZuB/4HaQiiCFYIiAdUBikFRgShA0MDDQMeA28DyAP6A+kDtQNQA8MCIAJ8AewAawAvACcAEADf/4n/Df82/hb9AfwF+yn6d/kY+QL5CfkL+cj4PfiN9+r2Dvbl9LnztvI28kryAvME9AL1PPaz95j5gftC/cb+CQB3AcwCHgQ3BfwFwQZzBzsIuwjQCJMIEwixB3MHUgdNB2kHzAdYCOQISglRCfgIWQiiB+kGCgYEBfQDGwOgAmcCTgItAgIC0gGfAWgBGwHIAHQAJgDd/5n/V/8G/8D+cf4D/nb95vx2/CH85Pu7+6b7ovuq+4j7JPuH+sX5A/k4+H/34/Zx9ir27vXG9bn1vvXW9RL2h/ZT92P4mfnz+nn8KP6x/w0BTAJpA1cEFAXKBWAGuAbsBjsHzgeaCHcJQwoAC7cLWwyeDFYMiAuCCpEJwggTCE0HdwagBd4EPgSEA5QCXwESAP7+Vf4Y/g7++f0C/jT+iP6//pL+CP42/V/8pPsT+7X6l/q9+i37y/t4/Bf9h/29/Zb9K/2C/MX7Evt2+vz5oflv+V75a/mE+YH5SfkF+dj43/j/+DX5kvkm+vz6APwY/Rz+6v57/+3/UgDOAF4B+AG9AtIDUwUMB9MIhAoADC0N+w1wDokOQA6kDc0M1QvSCrwJnAiEB4oGqQW7BLoDsAK9AeQAFwBE/2v+qP35/Ez8kvvd+jb6mfkB+Yn4R/g/+HP43fh0+Sz6+vq9+1n8wvwG/TT9Tf1Z/V/9af1s/XD9av1P/Rj92vy8/LX8rfyT/I/8vvwE/UH9dv2V/Yz9a/1A/fv8lvwu/PD7+ftZ/B/9OP6Z/z0BCAO1BA8GHAcOCPEIkQniCQcKLApTCnkKmQqgCnUKHQq0CTMJlAjnByEHLgYyBVYEdgNnAkABDgDB/mT9DPym+kD5K/h59wn34vYX94L3APiO+CH5pPn7+Sr6Nfo4+mP6xfpH+8v7RPy2/Cz9rf0r/pb+7f43/4v/7v9GAIoAsAC6ALYAmQBWAOD/WP/W/lH+5v2r/Y39lf3h/Y7+Vv8PAN0AsQF0AhwDsgMhBF0ErAQIBUIFbQWzBSEGhAbWBiwHYgeAB5oHrAeiB20HLAfIBicGXwV2BGkDIQLAAGv/IP7k/M77/Ppm+gj63vnX+cP5mPly+Ur5Cfno+AL5Kfla+dv56PrE+x/8Y/yl/Kj8M/zE+437VPta+9r7pfyw/bv+6f8MAd4BnAIfA4IDfQP8AlsCkAGyABoA+/8qAEMAjwA5AdIBPAKMAugCDwP7AtcCfgILAqoBjAG5ASgC3QKEAxYEzwSEBf0FFgYEBsEFMwWTBPYDYQPtAq0CoQKNAlUC5QE+AYcAuP/J/rL9mvyv+/f6fvo++jT6ZPqu+vv6I/sT+8f6SfrC+Ub57/jR+A/5sfmZ+rD79/xX/m3/PADEAPsAwwCQAOcAJQEdAesA3gC8AEkACwDp/+f/4v/b//r/AADg/6P/aP9K/zD/dv/w/1MA9QAEAmsDuQQVBnIHdwhmCR8KWwodCpUJ9AjyB8YGiwUDBIECIgH1/+3+Lv6z/VL9G/3q/Kz8lvy1/OH8Pf3H/WH+3/5d/9j/7f/D/3f/9/4y/kT9ZvyK+9/6pfqh+rH6z/oK+2b7jvuv+8/76fsI/DL8qfw//df9oP52/yIAoQD8APUAawC//wf/BP7k/Cv8sfsv++76C/tb+9z7Df0c/68BuATTB8UKUA0hDyMQ/g/cDgUNrQorCIcFBAPSAOf+cP1l/NT7i/tt+6r7NfwM/TH+pf9KAecCXwSpBcEGbgd9B/sGGAbZBDYDXgFf/0H9LfuA+Wv41/fQ9x34n/hX+S/6Avt9+9T7G/xR/JD84fxp/eb9SP68/jj/nv/R//7/FgDw/7X/Pf94/qT9z/zR+436ePnG+Fn4UPiS+Cj5OfoJ/ML+PwJSBk8Kqw0+EOwRjhLEEasPmAwUCYgFCwLZ/vr7iPnK9/D2Cvfs96H5/Pu3/tYBNAVFCJcKLgwaDSwNWAzYCsYICgbXAo7/hvzH+R332/Rh89LyRfPb9In3b/oo/br/uAENA6cDkwPWApoBaAAs///98/wf/NT7pft9+5779fs8/EP8qPxA/Vv9Sv0J/YX8pPty+lL5Kvhp9yD3Ffe99zL5z/uN/wcEJQn8DcQRHhTMFAkUohEKDggKzgUAAsb+F/zk+eT3s/Zy9gn36fjq++H/NwRsCGUMGw9FEOcP9g0zC6QH4AN4ACT9afpK+NH24/X49K30/fQc9k/4Rvt3/6kDzQbtCF4JaQgyBkYDMgAD/bb6Ffk0+AX4sPfN92L4Afnk+Rb7mfz1/Sn/XQCsACkA/f7u/Jn6L/hB9gr1qfRB9RT2X/cf+Wb7j/6xAskH6QyIEegUgxZkFm4U5xAlDMUGnQEw/ZL5qvaR9LbzFfQB9sP5r/4MBCYJxw0qEboSvxIBEboNhAkSBe4ALP0P+pv3s/V+9L/zafPI87H0k/bp+Wr+fgMLCNAL4A1QDRcLSQeGAu/9DfpE9z31d/Rg9ID0Z/VZ9qH3j/ns+8P+yAF1BMkF8AVGBfECnf9B/Nj40PWu87vyXvJM8pby6fL98zz2m/mI/vMEPAxTEwEZLBxqHDca7BXlD3oJcQMf/g36F/cO9cnzjPO59Hr3LvwlAlUIJg6WEh0VcxVaE30PXAq+BKb/W/sp+Nr1JPQH8zHy+PGi8gf0Y/bB+S/+DwOVB/cKOAzECw8KMgfSAwUAPvxS+XT3afbM9fL1gfZA98f4fvpS/Kv+FAFQAw8FzQUVBfsCFQCf/Cn5A/ak84fyBfIC8mTyxPIv88vzePUx+Db8QgKpCekRJhnuHZEfyR1+GUETcwzRBfz/r/uA+C72lvS58zb0bfaE+mMAXAc1DoMTRRZGFowTcQ5ACAQCoPxf+H31zPNt8kHxcPBI8P3wx/IC9m36mP8dBSQK6w2AD8cOXwywCMoE3wAN/bT52/bz9NzzhPMF9DH1WPc6+oL93ACvA98FDgc5B4AG1gSWAtv/sPyK+VT2MvOa8NTuAu7+7VvvZ/HU8xn3DPsTAIsFnQsaEsMX3RswHQMctxhQE1gNgwcUAof9AvrF9xv2HfWq9WP3l/pL/xkFRQsMEPQSXxPjEBUMgQXb/gv5SPSW8bzwDvEO8i/zoPQj9iP4Nvu8/rgCeQY/CQwLSwvoCfAG3wJ+/pH6WfiQ97b3wPjZ+fT6L/w//V7+y//yAWkEhwbuB4kHwgUkA77/mPzq+Xz3tfUp9LLylvG68LLwuvEA9D/3zPpd/iMBBgOMBOwFhQeSCYAMBxBNE00VpBSnEeMMBQfDAbT9Sfvq+tb7wv3B/0IBmAJ3A0gEeQXNBq0Hhwc5BpMDzf/S+yr4jfVk9Fn0W/Xh9lf4wfkU+2H8uf1G/wEBbQJ1AwUE2gMVA/QBaADI/nT9oPws/eP+tAAiAsMCcwLnAasBaQFdAdsBIgI/AgECcADs/WL7F/mi92b3p/ca+IL4uvjx+CH5Ovlt+Tj6dPtY/R7/3v++/+T+RP5s/gQAOwOgB6YMERH2E60UdxKuDrsK4QZxBDMDagJ9Abf/a/2w+iH4mvbF9kb5IP0AATcEjQXZBAYDwwDt/r/9R/1//cn9qv2E/Nb6rfhy9pL1JfYY+Nn64P0HAZ8DhAXXBogHfAfPBhkG6QXvBYwFhgTUArgAx/6N/en8u/z2/IT96v3l/Zj9svzt+4X7bfvA+x/8R/xK/Fn8A/yc+zn7wfrZ+h/7YftU/Ar9+vzF/KX8Uf02/1kCXwZaCxcQnBI7E8kRNg5UCtYGAAQ1An0ApP5c/Iv5xvbD9M/07fZc+zYBYAYgClEL/glBB3sDwf/7/Or6bfka+Mn2j/Va9Cn0YvUi+A/8NgBlBN4HEAr2CgQLQwrKCOwG2gQGAzsBNAD0/1X/ff6g/ez83vyB/aX+/P9BARwCMQJkAWL/x/zX+uX56/n8+lb8Vv0G/hf+Uf34+9f6j/rn+sb73fvb+lv5ZfdV9rP2uPhx/BcCDAn+Dw0VRRfNFukT+g9lC0gHhQO7/+P70Pcs9PDwFO/M70HzT/nlAD8I7g1zEMMP4gyJCPIDy/+l/Fr6cPgH93b1UPTe89P0IPiu/AwCDwd9CgEMowsVCvUHnQWtAzUCAwHe/5n+FP4x/rn+n/96AC4BoQHrARIC+wF1AUAAqv7U/K/6Hvly+On4fPqs/AP/uAAlAUYAiP5D/Er6Dvlo+Pr3G/fp9aj0wfMP9Gj2zvqzAIoHpA7oFGQYjBhxFbMQkwu1BtkCvP6o+mP2nPLq743u0+/b85/60wINC5QR2RSCFHYR+wzOB7QCJf5r+ij3QPSz8RjwM/Ao8pH25/yvA28JTQ1+Dg8NQgrZBvED3QF3ADD/0v1M/Mf6ifqv+2D+iAHcAyUFQQWCBEoDtQHv/4f+Ff0l/F/7QvqX+Z35zvqW/E7+uv8wAKP/b/7E/BT7ffkx+Gn3gfZu9df0V/Rm9Ln19/gw/ngE1wo3EBIUQhUwFBMRhww4CHEETgHz/Vz6u/Zu84Xx0vGc9HL5EwBwB+4NNRKUE2ASGQ/ICo8GyALg/pv6dvag8vrvUO8d8SD1XfrW/3AEcQcrCFQH+QUeBQ0FbQXUBR8F/wIUAEn9bvuT+uP6aPzw/qIBzQKaAqsBagDy/z0AAgCf/zT/OP5U/Qv81foS+5/8t/6xAJEBOAHv//b98fvV+fv3kPYs9dnzd/Je8SbxnPLC9dn6QgFcByQO2BNcFxsY8xU8EuENgQnGBIAAGPvk9QLyiO8U74LwvPTr+oQCFwpoEIEUwhX7FM8SSQ8jCjEEw/0x94vxeO3S6zTske6p8sv3mfw7AHAD6AU0CJ4KDg18DisOrwvgBh0BF/uO9nX0X/TX9W/4Zvs3/vMAxgIsBKsFFgdCCF0I9gaiBK8BUv7X+xD6+PgW+cD5xvrN+278x/zx/K38vfum+nT5/vfL9qX10PTL9Lr1Cfhk+0z/HgTkCcsO2BE/EycTdhEEDpIJLQWlAOn7pPc19Mnx3/Dt8nv3tP0LBUQMSBIZFi4X4xWMEgQNpgYuAG75GPPK7SnqgegW6XbsvvG496n96gKFB2oLUQ6rEOQRShHCDgYKsAPK/MP2svLV8B/x6PJp9bL4dvx8ASMHWgucDpwQ7BCyD0MM+wZKAQ/89fdf9ZfzwPJV8//0ffdH+uX8G/+fAJkBFwIGAVX+7fpI9w/09PGA8cnyTPUE+Ov7lgFKB8cMRxEbFEwVcxT+ERcOtAjhAsr9Gvkp9cLyO/Kd85v2S/s4ARIH4gvHD94RrRGTD84LIQeeAfj7D/ft8r/vBu4h7s3v5/I09/v7bQCSBHgI1AtlDjsPEg4VC54GugFo/fz5v/cQ96/3HPkl+8b9IgHaBCkJnQyrDeoMWgolB9ADFgCq/Dn6MvjD9s32Gvfc94b5ovss/hAAnABUAGb/wf2R+yv5i/bR8w3yuPFv8i70xvYe+uT+bARTChwQFxThFcQVyxN4D/kJnATP/7n7A/hy9Ub0L/QZ9ef3YPyJAUIHRAyjD+UQCxCzDRsKhQWZANf7b/eP86DwCO/I7l7wnPPR97b8gAHDBYoJFgxeDbANyAycCmoHowPf/7D8YPoc+eL4Uvmi+tr9CAKlBQoIEglvCYMJCwleB6UEHQGq/Qz7bvg59kv1rPUJ99r4+/rT/Bf+Df/H/8H/k/54/BD6lvdj9AnyHvGF8WbzdPb/+Vz9TQKDCFcPdxSLFgYXShWJEZwMvgfAArj99vkw9xv1RfOb8iT0b/ea/JQCNAg6DAYO0A5YDqsMFgpyBsgBf/zS9/jzhPGa8EjxoPN29tD5Vf3YAGUEiAd+CoYMIw3hC/cImAUXApL/6/3c/ET8nfyO/rQAhgJYA7ADuwTPBdQFYQS8AZv+OPzO+pH5lPhA+Lb4Z/pa/NP9f/5i/lf+Lf62/Yj8ifpO+CH2+/MK8rLwj/Af8kL1lflZ/hQEAArWDoQSoBSGFWAVaRNoDwsK9QON/nz66fZJ9PzywfJ09NL3PvxhAc8FhAmYDDUOWg5HDR8LDQgHBGb/sfpj9irz7/G08n70u/b0+FL7/v3AAIoDSgaPCM4Jzgl9CGMGSwTuAlsCEQJKAVgAIQDH/1L/IP8M/4X/QQCOAP3/zf57/cz8F/0o/VH9Bf58/tb+1P6B/u39UP3n/H78yPtr+tb4Q/d69QH0AvN/8jbz0fSt9xv7Kf7KAdUFdQlnDAgPHBFQEv4Rvw/RCyMH/gI3AD3+2PvM+Sz4KveX9yL5q/uv/sYBowQLB3gIDQlbCc8IowfdBSED4/97/K75P/jR98P37fc0+K/44Pkr/An/rgHGAywFDQZhBpMGGweZB5UHmwYnBccDwwGw/y7+uPzn+/v7jPz3/Ob8nPyl/Ib9jv6P/0IAKwDr/9v/t/8y/2L+g/2y/NL7xPq6+Vb45fbo9ZL1N/V59F30YPXT9+X6wf0LAKUCZgaKCnUOShFWEnMRFQ9YDP0JmgfxBAUCjv7J+p73W/V09JP1VPjE+8H+NAFpA2sFawePCQUL5ApICZIGbgNKAJn9lPu++Uv4oPda9773EPmk+ir81f3b/xYC5AM1BVgGCQdOB2AHOwfYBvkF4ARSA8wAP/7Q/Kn8Jf1p/SL9Nfw9+1D7RPxB/QH+af7x/sv/8P+//63/Zv8x/+7+IP72/D/7R/mu9zj26fTY82LzxvNG9eD32fpa/UQAKgTOCJQNuxAxEvMRmBAYD7oNMgyECQQGfwG9/EH58fb/9f71k/Ym+P756ftW/iYBNgTXBrkIYAl0CNUGVgVDBB0DrQHB/8v9Mvwi+8r6K/ov+XL4fviW+Xv7zv3Q/3YBBgOQBDYGhAf3CAYKyglZCKkFYQMiAkABMQBB/sH7d/k8+Dz4u/he+QP6WPsv/bP+FQANAaMB/gG9AfUAtf/f/QT8c/qi+Kj2CfXK81fzPvSO9ez29PhC+/j++QMBCckNshClET0RLhBiD8wO8Q1wC3gH+AJS/gT7C/nS9xP3dPbn9kf4OvrT/Hn/qAEyA1YEMAWbBd4FIAbxBSgF+wOEAvIAif83/tr8V/vM+cH4YPi1+Oz5YPuo/Aj+i/9jARYERwcSCikL+wkrCN8GggZlBlAFugIy/2v83PoF+iz5gvg9+Hf4TPla+rH7E/2I/vP/mgBXAIP/r/46/s/9vfyK+gP4IvY69Tj1hPVT9hj3X/hh+6b/uQQmCVEMIQ6yDh0Plw/rD2kPzA36CrkGjgJJ/6/8ofoU+SX4ZPcn97X3LvlD+yr9Nf/LAOsBWAPFBK8F8QVsBSYE8wI4Au8BkgFlAMn+/fxf+3P6RPqr+lP7F/yv/ED9Yf4sALAC0AXRB7EHwAY0BsAG5wcRCEgGKgM4AH7+7f0h/dX7svpw+cH48Ph1+T36HvvS+0P8J/y3++37jfz8/Kn8QvuU+Yv4T/iN+Ev5Jvoi+qf6efzr/igCOgXvB9EJ5QonDB4Nrg2ZDfAMJwvMB+cEjgJtAPr+yf20/C779PnL+fb5gvps+zX8uvyS/QP/YwBxAfsBRAJJAmgCVAMdBE4EmwMWAlEA7v4x/rb9Yf3x/Kf8WvwR/NL8Gv54AHsDlwRjBNUDAARtBQwHfgfSBXkDmAHVAJIAiP+a/nL9KPyS+yr7zfpi+kf6Mvq++Qn5UPgi+EX4vPgM+bj4PPj+94v47fmJ+6/8u/06/xEBtANZBqQIdwovC2AL/AqICngKeArECc8HiQUjAyMBOAC4/0r/Yv4S/Tj81vsG/Hj8hvws/O37T/wl/Tn+W/8XAGMAmQAOAcoBvgJ/A78DdwOrAvEBcwHfAEQArP/e/hT+fP1C/cr9Nv8aAScC4QEbAQkBJALdAycFFwUeBBwDvQLlAsEC/AF3AMn+fP2r/BP8P/sq+tb4n/fB9ij28vUg9pf2Hfdv98P3Sfjx+I762vzk/iQA0wBrAroETwcZCoYLugs0C4IKbwpMCnwK1QmxBzQFBgPbAboA5f9e/x/+2/wr/BX8Gfwd/Cb80vts+3L7EPz4/Lv9j/4F/+r+Kf/v/ysBtQLsA2AEIwS6A4UDjwN3Ay0DdAJLAUYAVv8V/6P/OwAIAOH+P/7E/kEAlwEMArcBIwFQAfABdgKLAh8CaAGMAJ//2/79/dL80vva+ov5NPg796D2nfbW9qf2EfaB9a31NfcD+Zz6hvzV/aX/0QJYBkwJPQupDKINJg5dDlMO2A2CDLkKjAiHBckC7gBO/7D9Zfwh+9b5BPkM+av5Dfp1+vD6WPvC+5f83f27/nr/KwBWAIMAGwFcAnMD0AMNBBQECAQVBFYERASFA8ACzQGWANT/5/8lANb/Af9W/mf+S/+AAPQAaQCp/5L/FgB4AJsAZAD0/7b/mP9a/7f+Lf7t/ZD9Cv1c/Ev7Avor+cf4J/gm9yX2WfUt9av11fZQ+E/5sfpt/aUAmwTtCDkMKQ7pDswPwBBbEVQR8w8/DbUJ7Qa3BN0BA/9c/KP5lPd79u71ivVq9ej1y/ab96P4C/qA+139h/9CAS4C0ALCA+UEDAb8BiwHxwZMBh0GDwawBe0EjQO2Aaf/Cf57/cD9Av6L/Z/8R/zd/G/+CgCwAKYAcQDaAEoBlgGGAfMAPwCH/zj/w/4G/ov9WP1A/er8Svw++3f6Yfo5+uT5vviT9wz3vPa19vj2mfdX+Fv5b/tL/98DwgdgCg4MhA14D8YRHxOnEv4Qzw4xDHEJfgZxA83/K/xL+ev23/Sn82nzH/M58xX0VPXS9rr4Xfvk/ef/3gGGA8gE6AVFBzYIRAgbCMgHbQcGB84GOgbOBAYDMAGN/5X+dv5Y/l794/sZ+yL75fst/Uv+t/7K/jL/uP9aABwBtAHOAW4BEQGRAOL/mf+I/xP/F/4F/Qf8F/uS+kv6U/kS+FX35fad9pz2mPa69mL3rPjf+jL9WgB3BB0ImwoXDMsN0A/wEX0TKBMzEWIOogtOCYgGYQP6/1H8G/mL9rH0b/OG8jDyc/Jf88/0qPYX+Zr7Mf6NAFUC5AN/BTcHzAiMCYoJKgljCPAHsAdcB48GAQUAA68A3/75/RT+xP1f/PL6yvnn+Wb7//z9/TP+Zv7q/nX/VgBZARwCPQITAsEBDgG6ALEAUQCl/7/+m/1x/Fb7f/rd+Tj5XPhp96H2NPY09lL2WPat9sn3GPow/VAApwOnBm4JnQuaDcIPUBGQEowSIRH+DmkM6gkgBysELwHN/Zn6yPet9Wn04/Op84zz1POk9ED2WPjD+sv8Wf4PAAECOgRBBrwHbghXCEwIoAjhCNQIbgiKBxYGdwS4AtMAsv9I/37+7Pzm+sP50vm/+hf8qPyX/Lr8av2F/q7/6wDhAS0CJwLRAVwBJAEWAdYANgAm//X96PwN/Gz7qvqG+Wn4kPe99mD2LvYy9i32fvYE+N35SPxn/8gCDAZzCC0K0wuQDfAP+hE1ErIQaw4pDPoJCAjdBecCcP82/L/53vdj9p/1KvWf9I30LPVP9v/3/fni+zX9eP5NAD4CKQSkBZoG8AYEB3oH1AcmCCsIuQezBuwERwP7AQsBtgAmAJL+Yfy6+mj6H/ss/Lb8j/xQ/Hv8NP08/mL/ZwANASgBIQH/AAkBVQF4AQoB1f+P/lb9k/wH/Cn7J/oR+fv37fY89iv2aPay9lP3OPhz+UP7//1OARIEcgYfCH8JLws8DUYP0A9GD/MNOwyjCgEJdwc/BbYCagDt/bn7DfoM+U74cvcB97n21vax9yD5kvp7+4T8uv0i/5MA7QEfA88DvgSSBRoGaAaTBuIGrAYOBgEF+QM5A5cCyQE2AIz+Xv3l/Mv8u/y+/KH8g/xi/J38P/0R/gb/vf/w/8L/zv8VAEsAVwAjALH/7/4//t/9VP2z/Af8M/v7+dv4PPiv96r3sPfv93f4BflP+rv7nf0bAGMCggTwBWUHAwlnCuILRAwZDHQLhgoCChYJ1Af2BcMDyAHa/2f+RP00/DX7WPrS+Wz5ZvnV+V/6x/pi+xP81/zo/fn+4P+qAJYBiAJXA9gDUgTCBNcEwwSGBAAEjgM6A4oCrgGgAM7/W/8G///+0v6c/kj+Df4i/ij+k/71/vz+8P7J/r3+vP7e/gT/B//a/oP+G/6u/Yr9n/1P/Zn82Psi+6P6Pvrx+a75k/nE+Rn6pPoz+378N/60/18BggK9A/EEKgaGB94HOwg3CAgIDQi8B1YHEgbFBOIDyALTAfoACQAI/yH+s/1e/QP98vzs/LD8uPwJ/UP9ff3y/Y3+7f6C/0kA0QA/AdgBWwJuAloCegJaAjECVwJJAsQB+wBQALD/Q/8q/1j/V/9f/3P/XP93/8H/RgB8AFsAIQDL/6//tP+z/3T/+P6T/hP+kv1K/SH9Hf0l/dv8Ivxt+//6+foA+zr7rfuc+7f7B/yv/Nv9qv5o/7//IgBZAXoCQAOhA8IDFgRPBN4EYAVMBR8F5ATOBFUEvwNCA50CCQKjAVcB6QCEADoAzf9C/wH/4/6l/ov+qv7H/hH/XP+c/73/7/9uANAAFQExASIB3QC+AMQAugCBAA4Ar/9a/zL/T/9s/53/2v/u/zUAeACsAA0BXQFLAUABPgEXAUgB5wCwAJAAx/+m/1j/yv68/hn+yf09/Uj8iPzm+4n7h/s6+337i/sC/J381/yA/RT+T/5c/37/IgCMAKoAXQEVAVcBcAGAAR8CFQLpAR0CyAFEAkICbQLSAk4CbAIcAuQB/wG6AbgBqAEvARABjQB7AJgA0gA2Ac8A5wDpANIAFQEfAVwBQAHTALIAQgA1AC4A4f+p/1H/I/8d//P+9/72/vD+Rf9u//T/SgCtAHYBogHGAf0B2wEbAjMCOAI9AusBiQHtAMkAPQD8/6j/q/4m/mX9BP29/G38hfzX+7n7Bfx2+8X7W/zK/Ez9Wf1b/bT9hf4k/3H/sP/H/0cAfACoAEwBLAGeAeEBMgFUAR0B8QBdAf4A8wDNAHUA9wABASoBPwEFASwB7wD1AG8BtAEGAkICTAIoAmIC6ALnAvcCugIyAvUByQFoAfkAcwAiAAAAa//y/nT+Hf5x/q3+2f77/vn+kf8UAJgACQHlADkBbwFsAfsBGQI/Ak4CvgF5AR8BJgFWAY8AKwBd/77+FP44/cv93vyV/Ij8oPu6+6L7wPuR/Cb83fyO/f38Tv7T/Yn+9v7M/vT/d//w/34AMgDPALAALgDOACYAXgBeABYAsgAAABsAAwC7/38AQABpAFYAKQD7AMsAdQH8AfABWgK9AgMD6QIFAzkD5ALhAgQDfwKVAh4CuAEvAa8AeQDx/7P/d/9X/8X+1v7l/gf/Of9v/wIAEwANAFYAqwDrAJQBtQFiAS8BaAHWAbMBOwELAYwACwC7/x3/of5Z/jX+k/1Q/SX9Lv0k/e388fy6/Db9Yv03/Wv9yv1D/hf+Xv7k/p7+M/+5/yEAQgDU/xEA3v+k/xQAvP+O/6j/jP+1/4L/dP+//xgA0/83APz/0P++AHgAjAADAfYAgwGBAc0BVQLaAScCTAJgApUCagJNAmYCqAHNAaMBDwFsAakATgA0AKT/xP/D/1D/s/8t/1b/Zv+I/1oAkv8WAG0AygBzARQBpAGIAVwB1QFLAdYAJwBdAF4Ad/8u/1n+F/7+/W79RP0W/Wf9qv1K/Zv9iv3Z/Vb+Wv6z/nj+x/4o/0D/n/9O/9H/zf9N/6z/5f6X/93/sv8nADr/WP++/2v/3f+z/67/w/9g/wUAcv+W/x4AAwCiADYAPgDmALsAoQHKAZEBAQJrAeoBzgGBASMCuAGWAYcB7wATAdAA7wATAUMAFACh/+f/KQCq/wMA6f/J/y4AJACGAIoAggAbAa0A8ABQATABegEQAVgB6wBFAK0A0P9J/0P/mv4V/4f+Qv7J/gT+lf4p/uP9b/4j/sj+mP6H/m//A/90/7L/W/8fAKr/fP+T/yb/Tf85/6T+gv7H/uv+mf66/vr+2v5j/2T/sv/v/3//4//O/2//z//W//r/6/9NAJgAFgD7ALwACQHBARgBqQEuATEB1QEgAVsBMAHMAO0AUQC2AEQAsv9xAEEAMwAlACgAIgBFAL4AsQDjAAAB/AAyASkB1AG6AREBMAHbABYBMAHLALAAagCRAHoArf/f/6//W/+c/0r/Yf8i/8b+5v7U/jr/hf8W/yf/Qv88/2H/O/9z/3P/TP+z/lf+vf6J/vj+sP5//rr+gf6q/pv+rf7h/rv+z/4N/6P+4v7M/if/Xv8n/57/q//f/5cAqAB6ANwA3QDtAIcADwFVAUQBrwFIAWABWAEfAXQB9QC7AA4BjACdAFMAMQA4ABIATwA4AEYAogApAVYBUwGRAYUBgwG2AbMBvAF1AWgBTQHaANYA0wB1ABoAuP/G////uf+T/1H/V/+q/y7/Af9R/zT/Vv9C/2z/SP+l/pr+d/5s/o7+0P5n/sj96P0N/k3+Vf5P/mb+Q/61/r3+V/7j/vf+kv52/jj+if6a/of+Ef/W/i3/dv+Q/+z/vP/cAFkBVQG6AXcBGwJ+Am0C4AJcApYCcwLkAVwCtQGnAc4BdAGlAXABNgEwASIBegFtAZsBqwGrAbsBFwEZAS8BKAFuASQB0gCRACMANQDD/7D/AgAAAPT/ff9t/1j/JP8e//r+5v6k/pz+pP6R/pj+NP4y/jf+G/71/Vz9Z/2v/XL9Zv14/Tz9Xf1u/Zv9+/3M/Qj+JP7d/VD+nf6K/pP+bP53/nv+Yf74/j3/q/+rACYB5gEzApECPwOIA9gDEgSJBLwE5AS5BPsDqwOpA4ADRgPZAoECHgKtAVMB3wAcASAB3AAAAXUANgBxAEMAfgCUAMoA5gCKAFUA9//L/6n/2P80AAwAuf89/7r+oP5M/h7+6P1e/VH9zvxy/G78efzL/IH8e/yG/Fr8XvwT/P37Rfxr/JL8ivwW/Er8y/xp/T7+AP8EAO8AfwEQAscCVAPKAz0EmQS4BKcEVgSrAzEDDQPbAp8COQLlAZABDQHSAL4A6gBCAZcBwgHXAQsCWAJQAlUCkgK5Av0C5AKyAmoCDwIbAlsCIQK7AVoB7QCUAPD/p/9g//7++/7L/lr+2v0x/eT8Av0y/TX9Hv3q/IP8EPxL+xL7GfuR++X7zvuc++P61vrj+r/63/r5+kT7t/vD+0r8uPxu/fv+OgCrAdkC3wPdBH8FCwajBusG4ga2BgEGAgUzBGIDyQKaAjkCDALFASkBJQHDAJAAGwGrAZcC6gIYA10DFQMmAw4D8gIjA6kCSwLQARgBtgA/ADgAIwDm/8D/hP9a///+/P4k/y3/Uf8V/+H+uf5c/iz+/f3f/Q7+2P2e/U39xvxV/Lv7lPsq+8v6sPob+q/5Bfli+Fr4RPiH+Or4Bfmx+Xf6m/t//VH/fQGXA6cFXwcMCKwI6wi1CFII2wdEBwAGggQlA9EB5AB/AJoADAGXAWUC9gJUA+IDRASuBOYEEAUlBckEQQQ5AyQC7AD5/0j/aP4Y/tr9n/2U/YP94P1h/tD+kv/Z/xAAWgAxAPH/ov9v/1D/Gf+//mf+0/1p/UP9I/0i/fP80fx0/L77CfuB+jb69/m/+XT5Pvn7+Hz4D/jM99/3gPhb+Y/6U/wo/mAAlQKQBMIGZAj7CT0LhwtOC58KoQkGCCIGPASRAhEBJwDh/9//eQBSAUsCPgMcBN0EmwX3BTMGOQbABSgFCQTkAooBJAD3/sH92/xS/AT85PsN/GP88PyR/W7+PP/o/2AApQC2AEIAsf/b/iD+kv0d/eP8ZPz7+8z7sfu4+9L79vto/MP8yfyE/Bf8u/tv+yv7qfoY+or53fj292n3NveL91/4xPk6/Bj/vAFABNoGOgliC+4MvA2iDeEMtgsLCt0HVgUOAx8Bxv8a/xn/mv/WAFsC3QMnBR8GyAYQBzsH0gZDBlcF8QOCAsAA9f46/Zr7qvoR+pj52/l6+nH7afw5/SX+5P65/54AGwEeAcsAQgCU/4n+nP3h/Cv8yPu/+977wfud+8r7Avxg/K789vxG/S39+/yV/Df8uPsg+8v6KvpV+Yj42/e19wH4V/hj+er7hP9FA4kGPwmsC/wNFQ8uD5MOMQ3RCwEK7gccBSAC6/9M/sX9F/4d/wwBMANGBQ4HkQd/ByYHkgazBYIEXAPoAWAAmv6t/Oj6OvlS+Pz32vdX+GT5z/pi/N/9Bv/P/4UATwGwAYQB4QAJAFj/af5y/ZH8pfs++z77QPt7+9/7XvwO/b79Mf5W/lT+NP6+/en8FvyG+9H6Gfph+Wr48PfN99/3ZfgO+R/6gfxmAPsEQglTDH8ObRBZEcYQUQ8+DfIKBAniBjQEMAGM/vL8fvwg/W/+wQBNA60Fuge6CKcIrAdbBs0EFANPAZf/A/54/N76ovlp+HP3QPeD90H4XfkN+/f8y/5wAGUBuwHBAXYB/gAQAN/+BP5A/Xv87vuD+0/7a/vZ+2X8Bv2k/U7+Gf+i/8v/eP/S/u39I/1h/Gr7dfpT+T74pvc99wv3VvcS+GX5MvtX/qwCIgc1C0IOoxAnEigS+xDzDkMMiQneBjcEfwHD/p38n/vm+zH9Q//nAbMEFQfqCKkJNgm/B8cFygOyAab/sf3e+zX68vj+90z32fbo9oD3o/hB+jL8Uf7+/1QBDAIVAqQB0wDo/9r+xf3T/B38vvuN+5H7+vuK/DX96/2Z/jL/uf9eAOEAAgGiAL3/gv4l/fD76PrY+c34B/i197v36fdK+Pj44fmW+wb/vwN/CIsMiw+LEboScBJ3EJ0NXwqTB0QF3wI5ALb90PsI+7T7Xv2W/2wCiAUuCCUKvQrgCQYIXgWCAvj/dv0i+5L5cPih91P3Ovc596P3T/hy+Uv7Y/2E/2MBdwLpAuECGgLvAL//af4U/SL8nPtw+5H74ftl/Az9qP1G/vP+gv/r/5cANgFBAdQAtv83/vX8zvut+qz5vfge+O331vf/96v4nPno+rL8xv9GBOMI5QzmD+sRHhO7ErUQwg1TChkHMASoAUP/4vxL+8r6bPv4/Pv+bwFQBAYH7Qi3CQ4JNgfRBB8CjP8o/Qb7ffmQ+Ab43vfk9w34cPgX+S76oPto/Sz/8QBmAgsDDQNwAk0B+v+k/mf9u/yU/Mn8Yv0U/pj+AP9W/2f/lP/k/xUAagChAFQAjv82/qz8a/ts+pL5y/gU+F33EfdV9+b3svhy+fP6of69A6kI0AwPEJESChSkE4ARcA7mCpMHzwQBAvL+C/zr+Qr5iPkX+2n9bQAKBH8H7QnOCiIKkAhHBnYDhAC5/Tf7OPnF98b2QfY+9pf2Z/eA+IH5B/su/Zn/xgErA94DGwTQA78CHAE+/2n9RPwB/Bn8pPyf/bn+3P/CAA8B4wCnAH8AuAARAfQAYwB+/xb+j/wd+4r5d/jy94r3fveq9873QPjm+G/5Tfon/egBGwf4C6wPeBJhFEEUGRLTDukKPQdQBJcBtf4B/PX5Gvm1+Sz7Sf0kAJQD+Aa0Cf0KlQoLCakGuwOiAIv9uPqe+Cj3WfYt9jH2efZ496f4nvkZ+x79V/+eAVoDKwR2BBYEBwOvAc3/xv2c/Dz8S/wF/QT+3/7d/6YA9wAHAcQAdwCmAOcAmQAIAB//sf0z/Mz6dPlm+KD3EvcP9zf3h/dF+Af57flQ/HwAOgW8CZQNxhAGE6ETURLBD2IMtAhpBWQCSf9Z/AH6mvi3+An6Hvz4/lQCrQWeCIcK2wrWCccHDAUZAjz/gPww+lz4C/d09mv2pPZB90r4W/m7+qT83/4DAbUC0wN8BHUEmQNmArsAt/4J/Sj8BPx5/FX9df6z/5UA+AABAc0AfgBnAIsAkwBZALf/p/43/Zb7F/ri+Pj3U/f39tH2CPeq92n46Pjr+eD8fAF4BgoL/g4zEkwUbRTAEtMP8gsKCI0ELgHL/ef6uvjX92H48/lO/FT/4gJ/BoEJCQssCx0KFQgyBfQB0v7b+2L5k/d19ub1yPUP9hj3cvi5+W/7t/0fADQCsgN4BNQEqATQA4wC2wDW/kX9ePwO/Dn8GP1Y/r//6QB8AbUBogFBAfkAzQBZAMf/+P6u/ST8kfoi+fX3+PYo9nv1WvUk9iz3LfgL+d361P7DA30IxAwrENUSihREFDcS1A7SCj4HGQTZAIP9lPqm+C/4Gfng+iP9HQCiAxAHvQnQCkgKxgiABrYD1QDW/Sn7T/kj+ID3LPcE91r3Nvgz+Vj6yvvR/TIAXgLqA5sEogQeBCgDrwHP/+79zPyL/NL8c/1e/nf/eAAuAWoBXgEdAdgA3ADzANcAcgB3//H9OvxW+ov4/Pa99RH11PQY9f/1Gvcd+D/5Fft3/gMDrgcUDM4PvBKeFNgURBM2EFgMUQhsBLcAVv1r+oj4K/hB+Sr7WP0CACEDHwZlCJsJhglRCHEGNgS1Aej+NPwa+rD44feP96X3K/j6+OX56voG/GT9Av+UAAwCGwN5A3QDHANIAgoBmv9r/tr90P0+/vr+s/9aAL0AzwCtAEsA0/+H/1T/Lv/M/ur9tPxd+w365vjG97b2pvXN9Nn0tfXc9uf3A/lM+0z/BASNCH8M1w/HEocUXRRuEl8PugsOCHME3QBf/XP65vjl+AX6p/va/bUA3AOYBncIKgm2CIQHtwVrA8QAGf7Y+1D6OPmM+F34lPgy+fn5mPo2+zT8jf0R/28AoAGLAg4DIQO7AsoBaADu/vj9zP0M/pb+Zf9GABABfQFVAdoAOACW/1n/Uv8L/2D+UP0Y/NP6X/na94H2cfXG9Hv01PTT9Qv3avjg+eX7D//yAvwG5gpdDl8RqxN2FIUTBhGMDbcJ2AX/AVD+Tvue+Wr5SfrJ+5796/+YAgcF4wa9B20HgAYnBVUDMwHZ/t/8o/us+uz5hvl7+fz5rPoo+6z7Svz8/Nj9t/6g/5QAVwHtAVICQQKqAckAAwCO/27/pv8TAIgA5gADAdMAYwCw/xL/uv5x/hj+cv1n/EL7Mfo2+Ur4QPdH9qP1YPWC9Q726/b69zP5DPvw/Z4BpQWjCXsN8BBgE1YUshOIEWEO4wo4B3wD9f88/bb7Mftd+zT8mP1d/3ABggMWBcsFnQXgBMIDLQJAAGf+Av0c/Iv7UPtb+537HfyR/Lv8vfyu/Mr8KP2c/U3+SP8zAPwAkQGpAUoBuABJAC8ARgB9AO8AeAHTAcEBNQFOAEH/c/4E/q79R/22/P37O/tS+lr5hviR96L2AfaN9XP13fWz9uz3I/mg+kj90wChBIIIRQzND6kSPBRDFKESzQ93DOIIKAWGAYf+mvyp+6b7V/xU/bv+oACHAgUE0gTcBFEEUQMKAqsAJv+6/dn8efxF/BT8DvxK/In8mvyM/HH8YfyF/N38cv1k/pT/xACiAfkB+gHCAVIB3wCpAMcAKAGLAdgB4AFiAW8AVv+D/g3+n/0V/Xf8r/vo+iT6TPmC+J33ufYq9sD1hfXF9Wj2bveh+BP6gfz1//IDHQgPDK8PpxJAFEIUwRI1EB4Nkgn/Bb4C9P/1/cv8T/x9/Bv9P/7u/6gBGAP+AzcE3wP/ArQBXAAf/x7+df0l/R39JP1A/WH9Wf0L/Zb8JPzl++L7Ivyo/GH9gP7Y/+YAXQGAAaIBywG1AYUBjwHXATgCcwJGAnMBKgDd/uz9Qv3B/Fr8B/yy+z/7nvrK+eP4C/hE95T2IPb09R32qPaT97n44fl2+yb+1gHaBcMJeA3REDMTEhRnE2IRcw4oC90H1gQ2AiMArf7T/Yn9qv0U/tD+2v//AOkBUAI0AqABtwCv/7/+Df6r/aj99/1l/sz+Cf/w/nX+r/3f/DP8p/td+5f7Tvxn/an+vv98AO0ARgGcAccBzQH3AV0C0wL7ApcCpQFhACX/Kv5s/dz8bfwQ/KX7/voe+jL5VPie9wj3g/YN9r711fWM9o/3fviT+W/7iv6YAskGmgryDc8Q4hKOE5oScRC+DfEKOAiaBSQDAAFp/3z+Cv7T/en9j/6f/68AbgGtAWQBsADM/+L+/v1S/TP9i/0N/pb+GP9j/zz/ov7O/fn8Svzi+877EPyr/JD9h/5U/+D/PQCMAOQAPQGYAQUCcgLBAsMCYgKQAVsAGf84/rf9TP3o/Jj8Gfw8+zf6TvmB+Mr3RPfu9o72JPYZ9p/2bfdB+C/5rfpq/WABmwVlCbQMpA/hEc4SQBKQEEsO8gu+CZMHSgUMAywB1v8E/4T+W/7R/r//pAArAScBhwB//2P+bv24/Gf8qfxu/Ur+6f5Y/4v/W//h/jr+hf3m/HH8Wvyi/BH9f/3f/VD+5P5p/9X/OgCtAEsB7QF2AsQCuAJtAuAB+gABACL/cP7+/aH9J/1Y/Bz70/nT+AH4bPf+9pv2Svbx9en1efZB9xf4K/kG+0/+dAJwBugJ3AyMD3UR4BELEZQP1g35C+UJuQeBBUIDZQEhAFf/6v78/p//VACiAIwAHABH/zr+Sv23/H/8vfxi/SP+y/5I/5b/sf+M/zT/uv42/tT9hv1Y/Vr9Uv02/T39gP3m/VL+2P6K/z4A7gCFAdMB9wEDAvABogEVAYUAEgCc/yH/k/7A/b38j/tk+ln5aPit9x/3k/Yf9tn16vWT9mL3+/fb+CT75/4CA6gGDgouDa8PSBGeEdIQZg8DDoYMqwpuCAcGqwPTAaQAzf89/z//2f9mAHQAAwBK/1n+eP3T/HT8VfyF/Av9qf0y/qL+GP+D/9b/4v+O/wD/lv52/ln+Bf6G/UP9Sf1y/ab97v1Z/vD+lP8oAJQA2QBCAbMB6gHFAWMB7AB2ANf/Hv9N/lv9YPxn+1v6Wvl0+K/3Ffd09gX2/vV39iP3o/cq+OL54vx2AOgDEAclCuAM9Q4RECEQeg/iDiIO/ww1C/EIqQajBCUDHgJmARwBTgFuATcBYgAq/+r9yfzj+zb74PoN+3374vte/Pj8vf2p/pT/OwBqAGYAlwDJAKEA/v8Y/0f+wf1v/SD97Pwl/cH9TP6U/sP+JP+n/zsAyQAcAUYBSgEfAawA9f8o/4b+6P0g/QH8kfo/+RX4J/eC9jP2UPbE9hv3dPc7+L755/tl/lMBjQSdBzwKUQyrDW4Otg6mDlEOmQ1uDNAK9wgnB40FUQSYAy8D8gLAAmgCpwGQAFf/Cv65/KD73/pd+g/6/vk9+sH6mPuT/Ir9gv5r/yUArgARAS4B2AAiAGX/tP4S/pr9c/2N/bD9wv3G/c79Bf6C/g7/if/m/yIAMwAeANX/XP/D/kz++v10/ZD8dPtq+qz5Sfkh+Rj5GPk2+XD50flw+m77wvxZ/isALwIoBMMFDAcjCB4J5QmACtwK5AqHCuMJCgkTCDoHrAZ5BpEGyAbEBmQGugXcBK4DKgJrAKf+A/2f+3X6bfma+Dn4bvju+H35D/rQ+rr7qPx+/SL+bP5U/hj+4/20/Y/9pv0a/rX+Nv+M/9z/OACnAAoBTAFyAYYBjwGBAU4B3wBKALf/S//I/hL+Nf1u/Nn7b/si++D6w/rc+jT7mvsG/IP8G/2//X/+ev+mANsB8gLfA6AETwXlBU0GcQZzBnMGYwYlBrkFUAUiBU0FogXlBQMGEgYCBpIFlwQrA5kBJADg/rj9i/xn+4z6IPoE+gv6Ovqz+nj7TvwB/Wr9fP1H/f/8y/yr/Jv8r/wE/Yv9IP6c/vn+VP/F/0gAwgAgAVcBgQGyAdwByAFwAQMBpQBRAOr/bP/X/lH+8/25/ZH9e/2H/cD9G/57/sr+CP9S/7//UQD1AJsBLgKmAgoDZAO4AwEEPARkBHgEbQQxBMQDRwPyAt4C/gI1A20DmwOsA4AD/QInAiIBGwAn/zj+Pf1A/Gv73vqf+p/62PpL+/f7ufxg/cT92/3F/aj9j/12/Un9Gv0V/U39sP0Y/nb+3v5r/w4AmgDuABQBPAF7AcEB5QHcAboBmwFyASYBsAAgAJ3/Lv/b/qL+if6a/s7+D/9T/5n/7f9XANMAUgHEASoChQLTAhADRAOCA9EDIgRZBGgESgT9A4UD9wJ4AiEC8gHjAeEB2QG9AYsBNwG4AA4ATf+L/sH94vz2+y77q/pv+mj6k/r5+pH7Rvz1/IL93f0O/iX+Jv4H/s79of2f/c/9H/58/uf+Y//v/30A+wBlAcEBGQJpAqcCywLXAsgCmgJQAvEBfQH+AH0ADAC2/3z/Xf9U/13/ef+o/+f/JgBfAJoA4AAqAWwBlgGxAc8B+QEzAncCvAL7AikDPwMvA/sCpgJHAvABsQGFAWIBQgEdAe8AsABbAOv/Yf+//gn+Tv2W/PL7cvsf+/f6+Pop+4z7E/ym/Cr9kP3c/RH+L/4t/hH+8f3s/Qn+P/6I/uX+X//w/4MACQF9AeQBPQKIAr8C3gLsAuMCyAKgAm0CKwLUAWkB/wCmAGQAMQAJAOr/5v/6/x4AQwBkAJAAywAMATwBSgE/ATQBOwFRAXIBmQHHAfYBFAIVAu4BqgFaARAB1QCtAI0AbwBMACoACgDp/8L/jv9J/+/+gP4B/n39Af2X/ET8DPz7+xL8Uvyr/Ab9XP2r/fT9LP5K/lL+Tf5Q/mb+k/7U/ir/lf8QAJQAFgGJAeoBNAJtApgCswK8AqwCjQJqAkcCHgLhAY8BNAHmAKsAfgBXADgAIgAYABgAFgATABoALQBLAGgAdgByAGcAZwB2AIwApAC+ANsA9QADAfcA1ACqAIQAYwBGACcACQDx/93/x/+w/5n/hv9p/zH/3f56/hf+uv1o/SH98vzi/PP8Hv1W/Zz98/1Q/qb+6v4V/yr/K/8j/xz/Iv8//3D/tP8DAGAAyQAyAZYB6gEzAnACmwKzArkCtQKrApoCfQJIAvwBowFNAQEBwACCAEkAEADa/6n/gv9u/2//gv+m/8z/7f8AAAUABQAFAAcADwAYACIAKwAvACgAGQAGAPL/3v/J/7P/nf+J/33/eP98/4X/j/+O/3n/Tv8O/8b+ff44/vv9xf2b/Yb9if2n/dz9K/6I/uj+PP98/6T/uv/G/83/1f/f//L/EQA9AHsAyAAhAX4B0gEVAkgCZQJ0AncCegJ6AnQCYwI+AgMCsgFYAfsAowBYABcA4f+u/4D/Xf9H/0P/T/9r/47/rf+//7//rv+S/3f/Yf9S/03/UP9a/2j/dv+D/5D/mf+j/6j/rf+0/8D/z//b/9//3f/X/8n/r/+E/03/Ev/T/pT+Wf4s/hr+JP5G/nv+u/4J/13/rv/z/ycATgBpAHUAcQBeAEoARQBUAHgAswD+AFQBqQH1AS8CVQJsAncCdAJjAkECFgLjAasBbgEsAesArQBwADIA8P+v/3X/Sv8u/yD/H/8l/zD/Ov88/zX/Jv8V/wH/8P7i/tj+1v7e/vL+Ev86/2f/kf+5/93/+/8TACIAKwAsACcAHgASAAIA7f/U/7T/iv9Z/yf/9/7S/rv+t/7G/uf+Ff9L/4X/vf/u/xUAMgBFAE8AUABLAEgASwBXAG4AkwDAAPQAKwFiAZIBvQHdAfMB/gH6AeoBzwGtAYQBVwEnAfYAxACQAFoAJgDz/8P/nf99/2T/UP9A/y7/G/8F/+z+0v64/qT+lf6P/pD+lv6j/rn+1v77/in/XP+S/8j/9/8cADUAQgBBADsAMQAmAB4AGQAXABQADAD8/+X/yv+w/5j/if+I/5T/qv/H/+r/DAApAEMAVwBnAHMAeQB7AHkAcgBnAFwAWABeAGwAiwCyAOAADwE1AVUBZgFpAWIBUAE6ASABBgHoAMgAowB7AE4AHwDv/8X/ov+F/3D/X/9Q/0D/Lv8X//7+5P7P/r3+rv6j/pz+m/6b/qP+sf7J/u3+Gf9L/4H/tf/j/wkAIgAxADgAOgA6AEAASABTAF8AZwBnAF4ASgAvABIA+v/r/+b/7f///xIAJgA2AD8ARwBPAFkAZwB1AIEAiwCOAIoAfwBxAGUAYABlAHMAhQCeALYAyQDVANcAzwC+AKsAlAB/AGkAUwA6ACEAAwDk/8X/q/+Y/4j/fP9y/2j/XP9N/z7/Lf8e/xL/Cf8C//3++v74/vn+Af8O/yL/QP9n/5H/vv/v/xwAQgBiAHcAgACCAHsAdQBxAHIAdwB9AIAAfgByAGAASQA2ACsAJAAqADMAOQA+ADwANAApAB8AGAAVABUAFgAVABEACgAEAAAAAwATACkARQBhAHcAhQCEAHcAZABKADMAIgAXAA8ACAACAPf/5//U/77/q/+e/5b/k/+W/5r/nf+b/5b/jf9//3L/ZP9Y/1H/TP9M/1D/Vv9d/2v/ff+R/6v/y//x/xQAOQBWAGcAdAB3AHYAcgByAHoAggCJAJEAkwCMAH0AawBcAE4ARAA/AD8APQA4ADEAKAAcABAACAACAAEAAAAAAP/////9//z//P/8////BAAJAAwADgANAAoABAD7//X/7//r/+v/7v/w//L/8//z//H/7f/r/+v/7P/s/+3/7f/p/+T/3f/V/8v/xf/B/77/vP+8/7//wv/G/8z/1P/d/+f/9f8DAA0AGQAkACoAKwArACkAJgAlACUAKgAvADYAPAA9ADsAOQA0AC4AKgAnACcAJQAhABwAFQAMAAIA/P/4//j/+f/8///////+//r/+P/z//L/8v/0//X/+P/5//f/9P/y/+//7v/u/+7/8v/0//b/9f/z//P/8f/s/+3/7v/w//H/8f/x/+//7v/r/+z/7P/u//D/8//3//v//f/9//3//P/6//j/9v/5//z/AwAMABUAHAAgACIAIgAfAB8AIgAmACkALQAuACsAKAAjAB4AGgAYABcAFQATAA8ACwAHAAIA//8AAAAAAgACAAIAAQD9//n/9f/w/+3/6v/p/+n/6v/p/+r/6//q/+n/6v/r/+3/7//x//D/8f/x//D/7//t/+3/6//q/+v/6//r/+v/6//r/+z/7P/u//D/8//2//n//P///wEAAQAAAAIAAQACAAQACAANABAAEwATABMAEAAQAA8AEQASABQAGAAYABYAFQASAA8ADQANAA4AEAARABAADgAMAAYAAwD///3/+//5//n/+P/3//X/8v/u/+3/6v/p/+n/6//u//D/9P/2//f/9//4//j/+P/3//f/9f/0//L/7//w/+//7//x//P/9f/2//j/+f/4//r/+v/6//n/+//9//3//v8AAAEAAgAEAAUABwAIAAkADAANAA8AEAARAA4ADQAOAAwADAANAAwADAALAAsACwAMAAwADAALAAsACwALAAsACAAEAAEA/f/6//j/9//3//f/9//2//f/9P/z//T/9P/z//P/9P/1//P/8//x//D/8f/x//D/8P/y//T/9f/2//f/9v/2//f/9f/1//b/9//3//n/+f/6//v/+//8//z//P/+////AgAFAAcADAANABAAEgAVABcAGAAbAB0AHgAdABwAGwAYABgAGAAYABkAGQAaABsAGgAZABgAFQAUABQAFAATABQAEQARAA8ADAALAAYABQAEAAMAAQAAAAAAAAD+//3//f/9//3/+//9//7//v///wIABAAGAAYABgAGAAUABQAEAAUABAAEAAQABAAEAAIAAgABAP///v/+//z//P/9//v/+//9//3/AAACAAMAAwAEAAQABAAEAAUABQAGAAgADAARABQAFgAVABcAFgAVABQAFAATABIAEwASABIAEQAQAA8ADQALAAoACAAHAAcABwAFAAUABQAGAAMAAgABAP7//v/8//r/+v/6//r/+P/5//j/+P/4//n/+v/7//3///8AAAIABAAFAAYABgAGAAYABQAFAAQAAwACAAIAAQAAAAAAAQABAAEAAgADAAEAAQABAAEAAQABAAAAAwAGAAoADgATABgAGAAdAB8AHgAgAB0AHQAgACIAJwAtADEAMwA1AC4ALAAtACoAIgAmACQAHQAcABoAHgAWABAAEgAJAPT/AgD7/9//0P/H/87/zv/Y/9j/v/+7/77/t/+g/6n/n/+O/47/jP+d/6T/nf+l/6j/wP/m/wsARQAVAOQBMwIVAJv/Dv/L/xcAcP8d/5z+ZwAJAX4AlQBy/wIAl/9P/5j/7P53/+z+nf58/kD+Pf5g/sz+8/4V/6//FQBDAEMAEAD8/wEARgDi//D/3/9/APYAEwFMAXAAVgArAAkAQQABAPz/JACOACwB/gDeAMgAfABTAOr/mP+j/4f/xf+j/0v/WP8T/2//aP8v/0D/Nf+E/4//n/+L/y7/hv+w/ysAAgAI/zX/Pf9Z//j/mP+U/9f/bf9l/6n+av4I/zb/m//R/xUAkQDHAHMBewFdAawBgwHZAacBVQFdAScBVQEjAdcAjQBMAFIAAADT/4r/Vf9+/5j/pv9Z/yv/Qv9D/37/ff8+/1H/I/8A/wP/t/5u/kj+H/6l/Rj94/y1/MP8Qv27/Vz+lP6W/sL+0f6P/4YAQgGhAfMBSQJkApMC5gImA5EDygN4AyYDXgLJAX4BHgG5AB0Aw/8A/zL+q/1U/Xn9n/0k/j3+Qv4s/iD+pf7A/jT/xf9eAO4ASgGsAaoBvwFPAsUC9ALsAvkCAgO1AmUC+gHPAY0BNAHEAAoAX/8O/9n+of6h/nj+Tf4S/uD9qP12/Vn9Ev0k/eH8y/wW/fn8G/0z/UD9Xv3+/ez+Jv9c/7j/UwA3ATYC2AIJAxQDbwPhA9ED9gPFA7sDyAPVA9cDAANxAjwC8AGIAScBqwDp/8H/nv9r/3//ev+D/1n/oP/M/7H/AwDK/+T/q/8N//D+S/77/Vn9+/uj+m75/vjS+Rb7/fv5/CT9Tf3b/e7+CgHAAkEEcQUxBjQGMQZaBigGmwYtB1oHvQZ/BeQDPgLzAP3/P//z/Yv8dfs7+ln55fjZ+BD5uvl/+vX6JPtZ+w781/wN/qf/JQGeAsgDVgTGBDAF9wULB0gIKgn2CEMIPQdPBncFsQTVA+IC3AGGAC7/uv2s/CL8qPuE+xT7UPrG+Rj51PjP+C/5cvll+bP5gfmv+S36OfpJ+8T9HAAdAd4A4QAGAtAD9QWTB4gHHQeEB5oHywamBfIEhwR4BFoEKAMJAQ7/V/4e/nr94vwr/FH7cvsQ/Pn73ftG/Gf9z/4nAE0BkwEQAgoD/gOXBNIEZQWSBXAFsQRcA2ICrAGyAS0Bt/8q/nn8UftK+l/5gvix99/2MPbo9Qb36vmw/K3+Cv+b/wYBywJbBaoHNQn+CdUKiArfCBgHvAUvBfYEwQRfA4gAaf0N+6v5ffjq93T36/Zx9sv1cPVX9YP2/Pil+wv+c/9fAJEBUAOpBSoIzQr+DEUOWQ79DOYKWQmnCAUI9gboBAECtf6O+y/5OPfb9Rf1WvQ483Lx8u9h79TvT/KF9o76//zx/Vn/SgEWBIQH2Qq7DawPNRE7EV4P5gwbCzYK4Ak4CRMHJQPt/qH7W/mt92b2mvXS9Cf0QfNd8ubxxPJN9Vb4Wvul/Tj/jQBUAqcEPwcjCnQM3g3tDawMGgvVCVIJxQhhB0cFYAJc/4D8LvqI+Ov2uvWk9P/yNvEq8ATw6/H29NX3LPpn+9r8vP6MAdcEPAgpC1ENxA6ZDskN3AwUDKMLQgsiCu8HNwUyAjL/wPzI+mf5Nfj49ur1v/Tz8+Pzr/QN9gP4NvpL/Gj+NQDoAYoDkAUCCBgK2gubDFkMXgvPCTMIqAYWBWoDeAEx/6z8T/pQ+IL27/RA8/nxNPGZ8O7w6fJa9bH3H/qE/Hb+HQDBAuAF0giZCzkOng93DwIPYA4zDTMM3AsSCwAJbwZuA+j/uPxC+tr4uPe89on1DPSl8t7xe/Lm81j29Phh+1v9w/6cANcCqgWACAkL3QxYDfIMBwwFC94Jtwh5B5UFEgNPAMj9evuj+Qv4Gvb18wPy/PCS8NzwSvIb9N31dPdP+Rf7+/yL/3UCjgVRCNcKpgw5DWsNXg0dDbYMSAyeCxEKGgiUBbECDQDS/fn7avo9+Q/46vbn9XD1l/Vk9v/3Bvrr+4X9KP+5AG4CWAQhBuoHfAlyCn0KtQmwCGcHawZsBQIEcwJzAIX+z/wz+5H5wvfV9ePzufIn8tTxX/KP8yX1B/cT+SX7Av1L/yECGQXaBzAKLgxYDfENPA7dDQQN/wsTC5UJiQc7BVoChv8a/TX7hvni94L2O/Vn9Ovz//O59Pb1L/h0+nr8Zf5TAFkCdgQaB50JkwvxDJMNeA2SDGcLWwocCZQH0QW+AzgBuP59/C76ufdV9RzzIfHt71bvSe+778jwevKS9DP31fmK/Fv/OQJwBUoI1greDEkOXw/GD68P6Q7BDUsMfQqPCEAGyAMqAYn+/vu2+br3z/Ud9NnyX/J/8lDz3/R99iL4G/p4/Oj+VgEaBO8GfgmuCw4Ndg0pDccMRgxAC/MJeAi1BoAEGAKR/+P8g/o++O71p/P28RnxifBq8IPwC/FK8nr0ZfdA+u78b/8GAr0Edwf5CQMMuw0fD/YP5Q8cD8MNGgyPCv0IDQeDBOYBW/+v/CH6lvcx9VHzWfIu8lryJPNc9Ir14fao+P/6ff1aALIDpQbqCLYKHAzKDAgNNw3QDNELkgopCVIHEgXCAksApP37+mP46PWl8+XxxPAh8MzvyO958AnyT/TJ9oP5U/wJ/+UB9wTUBwIKCQzQDSEP8w88EMoPhw43DdQLGwoJCL0FXQPKAGj+9/s9+ar2wPS380PzXPO+8wf0jvSt9Wb3VvmB+zH+5gBeA5kFZwemCGMJIwq2Cs0KfQqrCYII7wY5BYIDhQFW//P8uPql+Kv2NfVd9AL08vNR9Av1zPW09u73hflX+4f9EQCAAqQEXwaxB6IISQnoCUMKYgpCCuEJTwlPCB4HqQUiBKICCgFd/4X9xvtA+kz54fjC+KD4cPh0+Nn42/lA+/j8yv6dAD8CiwOHBPsEOAV9BdQF+gXWBXoFjwRKAxYC6QCz/5L+uP3U/Mn78vo3+ov5EPkI+Uz5h/nS+SL6XPrG+qj7/PyX/k8A/gFLAy0EwAQoBZ0FHAaXBugGBQfgBmEGogWoBIkDcAKDAYsAc/92/qn98PxZ/A/83fuj+7H7G/yn/EP9Fv4P/9//vwBhAaABmQG0AQoCVgLUAiIDPgMuAzEDJAPiArECeAJIAvMBdgGvAJD/W/4e/Sn8a/vp+pH6LvrO+XH5bfm1+Un6JfsN/PH8s/2A/k7/FQDoAMUBnwJHA90DTgSDBI0EjgSSBFsE6ANWA78CLgKzAUsB3QCBADoABwDD/3H/NP/4/tb+6f4U/y7/Ov95/7P/4v9CAMoAOwFxAa8B3QHWAecBLAJsAnkCdwJUAtoBJwF0AMf/Cv9x/gj+of0v/dr8pPxe/B389Pvg+9P74Psv/KL8MP3W/Yn+I/+U/wgAiQAPAY0BEAKHAsECyAKxAnICCAK0AYkBYAE5ASIB/wCsAFcAIgDy/8P/rf+s/5b/gf+G/5X/t/8KAI8ADwF+AeIBFgIoAj0CdAKmAtQCCwMcA/sCrwJQAscBJQGaACEAtv9L/+r+hv4V/rX9Vf3w/Hb8CfzE+6/75vtH/Lr8Gf1s/bf97/0u/nr+5v5d/9f/RgCcANkA6ADjAN0A8AAcAV4BoQGmAXgBQAEcAfUAwwCTAEMA2v+J/2r/W/9Z/43/4v8pAFgAewB2AGIAiADrAF4BzwE7An8CkAKSAoECSwINAvkBAQINAjACQAIqAvkBwgFqAd0ATQC7/zv/1f6V/mT+MP4T/vX9w/2C/VL9RP1R/X/9tP3g/fz9If5Q/oD+rP7I/vH+Jf9h/47/s//M/9X/7v8WADMAKwAWABQAFQAnAEoAaABYACwAAgDO/5//lf+//wIASACXANMA5wDkAOoA9AABATABewHGAQ0CUAJvAk8CEQK7AU8B7gCnAGsALgAFAOr/z/+6/63/lv+B/33/gP90/2r/Zf9V/0n/Vv9k/2H/av+F/5r/uP/v/yMANgBJAGIAagBlAGIASgAOANT/o/9r/zj/Gf/8/sf+iv5P/hP+9P0C/i/+ZP6e/t7+Gv9R/3z/kv+f/7z/+P9UAMoAOgGRAc4B8QHrAb8BigFfAUIBOwFBATcBHQEOAQwBBQHxANsAtQB7AEAABQDL/6T/q//P//f/IQBBAFMAWQBmAHIAeAB+AIcAjwCCAGcALwDp/5//X/8e/9D+jP5U/i7+G/4X/hj+JP5O/of+uP7g/gP/Kf9W/3z/kv+M/4P/i/+m/9D/AQAqADkARwBLAD8ALAAiACAAJQBCAGUAbABhAF0AZQB6AJIAngCYAIwAkQCXAIYAkwC5AOcAJAE7AVwBTgGDAdcBlAF8ATgBGwEWAe4A5QBXAD4ATwBOAG0AQwAjAOX/6//e/6D/nP+I/5j/sP+m/1n//v7l/sb+t/6s/o3+gf6P/r3+4/4B/yj/K/8n/yD/+P7a/sX+q/6d/on+hP6T/rP+7f4q/1X/kv/G//T/MwBTAIEAogC8ANcA4AAOATMBUgF8AYoBjAGXAZYBewFOASgBEQHhAMwAsgCJALIA6gAEAfAA4gDjALYAvQC0AIIAsADVANUAxACwAIAANwAvAAIA7P/0/8n/tf+k/4j/V/8P/8v+l/6A/mD+Nf4P/gz+Ff4l/jL+Qv50/pb+wP7Y/tb+C/89/1//of/F/97/+P8TACwAQAByAIAAigC0AMQAyQDZAMsAsgCqAJwAhABWADgAJwAvAFoAbgB0AIMAnQDIAPgAEAEpAUcBagFtAUwBLgEBAfwABAELAf0A8gDwANEAswBpABYAxv+W/37/Rv8k///+6/7u/vf+AP/z/v7+Bf/8/vf+8/71/gz/Kv9A/0b/Tv9f/2b/e/+K/47/n/+h/7L/w/+3/7r/tf+4/7v/uv+r/4j/hf+U/6v/y//8/xoAMABTAGAAdACGAKkA0gAAAScBNgFMAYQBpwEDAsAClQKjAZsAef+X/kP+bP7G/pv/igD9AOgAygBLALT/FQDmAPwBbAIwAjQBof+k/tb9Xv0C/lD/twDfAnYELgS8ApkA0/1L+wT6kPnn+Z772f2j/0UBGAKwAdEAyf+r/gP+Cv6B/ir/EwDQAPoACgG9AB8A3P/V/+3/RgB4AIYAWQDj/3b/+P7I/h3/j/9mAIMBtQL+A0AEgwMqAlkAqP5a/bv89Pzx/V3/ogCaAU4CBgI8AW8Afv/N/o7+t/5T/1oATQH6AVUCOAK+AScBowBVAFcAjQDdADkBYQEOAVYAf/+y/jr+Rv6//nT/OQDwACoB/ABuAHP/fv6c/Qr92Pw1/SH+Kv8lAMcAmADs/zf/KP4g/d/8lv2b/sf/+gCrAbMBKAFXAH//2v5Z/iv+dP4X/6X/JwDCAO0A7AD0AMcAaQA8AFQAdwDlAIYBzwHLAa0B+wAiALX/Xf9F/3P/0v8pAGIAewBzAGcAXQBkAHAAjQB9AHUAeQBqAFoACACq/0z/CP8C/0b/4/+zAHUB9gEOAqABuAC0/8b+8v2b/cT9N/51/qT+3/6v/k/+/v3I/Yz9kP27/c39+v0O/vP98P0N/h7+iv5Q/zEAOwFnAocDOwTaBCgF5wSNBBAElAMqA+UC3gIAAw8DFAPqAl4ChgF2AIn/lv7H/XT9gP3V/WH+0f7r/rL+9v0S/T/8pftk+1z7n/v3+038ffyb/H78Nvzy++r7Zvxk/QX/9wAQAw0F4QZmCC0JBwkTCNYGfAVXBGADXQJdAYQAw//4/in+Df20+3/6rfla+Zn5Qvoi+0b8jP2i/nT/MACnAPwA5AFRA80EKgYxB9cHBwidB7oGZQWtA/0BpQB+/3v+yf2K/Wz9PP08/Rv9p/wJ/FT7bPpJ+Tb4fPcs9w33d/cF+Yz7hv69AfEEkAdiCVYKTwpxCd0HRQYfBUQEfwPEAsABIQA7/in8//kU+Mb2R/ax9uH3VvkN+9X8ev77/3cB9gJhBOcFnAc6CWUKDQsSC1oKAAkPB+sE8AJAAdj/sv7R/fv8W/z4+3D70fpA+qH5Efm/+Kf4vPjL+L34mvhs+Dr4NvgQ+QT7wv0WAcIEdgixC+UN8A7HDpcN7AsgCkEINQYjBCQCIgD4/Zf7K/nf9vX0vvN+8/bzMfUp95j5Nvyz/usAuAJGBK0FEAdQCCoJoQndCbQJ4ghhB1IFRgNbAZf/G/4E/XT8XPy+/CP9Nv0f/cD8KPxV+2P6ZPkn+Pn2A/Zu9RP1PvW69nD58vzMAM8EwQj4CzEOZw9DDwEOMQw8ChYIowVIAwkBCP9W/an77Pk1+M72/vXb9Xb26PcA+o78Rv/PAd4DRAUUBoAGtwa0BkEGjQULBZ8EBwQkAw4CKwFUAFH/U/6F/eT8rvwD/Wz9uf35/UD+Uv4H/lj9Qfyf+nP4lPZI9TX0JfTh9fD4vPzqADgFLQn9C58NbQ4yDv0MZAvaCRII3AWOAy4Buv5U/BD6CPg/9gT10vR49cr21viA+2b+PQHIA7IFxAYHB+AGigYYBl4FgATPA1MD1QJGAqoBMwHaACYAY//D/iT+xf3Y/Rf+W/6j/tT+1f59/r/9mvzd+o/4RfZU9KbyzPHX8pH1VPmy/U8C6QatCj4N1A46D24O+gxNC1UJ2AYsBJQBCf+h/Ib6zPhQ91X2R/bv9if4DfqK/GD/KgKLBE0GRAdvBwcHWAaXBaEEdQNIAnIB4wAnAI//ev+V/4v/RP/h/mz+/f3t/QP+L/6m/jz/7v9gAE8AyP9Y/tX73fgc9sfz9fHY8ePzWffD+3AAIwVsCV0MGQ64DiEOxwwGCykJ1AYBBD0Bkv4x/Ar6CviF9mn1GfXL9ST3QPnk++/+HgLcBOUGDAhxCFQI2wd6B+8G6AXzBDoElwOxAokBlADH//H+H/5M/Zj8BPwG/J78Ef2S/UD+4f5V/1j/4f6b/Ur7n/gC9rbzvfH78Jry9PVH+hj/CwTSCJEMEg+HEJEQcQ+zDbQLQgkdBtoCtf/X/Fb6Bvgl9rr04PMY9C/1/vZy+VL8hf+WAiYF/wYMCIAIhQh6CFgIgAcyBu8E2AOnAjYBCQBQ/8b+Y/4S/qv9J/3o/Ej9pP3v/Yn+Xv8xALkAyAAHAAD+KvsP+M30zfGm777vLfIl9lX74QBTBlEL/w4+EekRChFjDxENTwr6BkcD1f+t/BD60vfC9UT0YPNZ81b06PU2+Dn7rv5JAmoF3gd+CVAKmwp/CigKPQmgB+8FSQSAAnoAtP6g/fH8gfxg/Gn8gPy9/Fb9Dv6H/g3/w/9bAH4AOQCK/979Sfto+Hz1vvJj8JbvOfGB9Pv4bf4zBO4JtA4JEvQT8ROdEl8QNg08CaIEWQB//Bv5j/aX9FbzzvLu8hj0uvWm90/6gv3SANADdQaSCNoJkwruCvcKTwqpCKIGpARqAgsA+P2j/Av8+/tV/OD8e/0B/rD+ZP+i/63/8/9QAFwAHQCr/5P+j/wj+nT3rvTj8d7vBfD18Uf19/lz/2wFNAvUDxYTZxTaExYSFw8SCw4GEwHb/Ef51/ZT9Wz0SvSu9Lz1O/fB+Ob6kP1iABYDlAXlB7AJ8Qq0C+YLZgvXCYQH+AQ3Alz/x/wR+2r6f/oB+6j7f/xk/Ub+Cf9G/1f/nP8HAGAAUwARAJb/dP64/Gv6xfcG9Uzy0PBS8Wbz3faJ+zcBdwciDXkRRBQPFTAUyRHxDfIITwMv/tr5ifZ09GbzUfP68y71y/ZN+NX5xvv7/VQAnQIYBZkHzgmbC9QMSA1+DHYKxwenBEcB/P1h+wn6wflD+ij7Tfyt/ev+JQDnABMBNwE+ATkB3gAwAJT/q/4z/Vf7/PhX9przMvFJ8NXw0fJ29lr7SwG7B4INGRILFfIVJRVXErMN3AfDAT38mPeF9N7yevJ48y/1VfeQ+WP7Tv0s/78ARwLgA6sFngedCU4Lbwy+DLYLdglyBtQC5f45+6f4ifeu95f4+/nv+/b91v9gARUCYAJwAkAC5gEdATwAX/8h/o78evoJ+KD1EvMv8crw0/Fo9F34jP3CA84JAA8oE4UV+BU8FGYQCguZBGD++fjT9GvykfFe8lT0zPaM+e379P3o/4MBxgLVAxkFvAZrCOAJ7gp0C/IKOwm+Bp4D/v9o/LD5RPjk94H43fm1++39HAARAlkD3AMhBCQEoQOXAlgBMgC//tz8qPoZ+G31x/J68Gvvwe+N8Rz1I/oeAI0GnQylETsV4hZ7FrsT+Q66CBEC2/tC9l7yUPAE8ILxHfRH95f6m/1zAOYCjwSJBUwGKgf9B+UIoAkKCucJ0wjyBkcE6QBT/R761/fA9uH2+Pel+f37vP5uAZQD4gTBBRsGtAWnBBADcwHC/8f9pfs4+Wn21PN68YjvGO/U7yTyVPam+/QBkQhlDkkTyRYcGA4XkBMxDnQHagDC+SP0QPA/7l/uWPB28yz3HvvS/icCvgRrBl4HAAirCEUJ0Ak5CnoKMgoHCf0GKQS6ABj95vm095H2b/ZG9/f4dPtQ/hgBUQPwBB0GrQaFBnkFygP+Ae3/hv3W+qz3cvSa8QLvbO1b7dvuRPJH93P9RwQCC9kQhRVVGNUY5Ba4EucM8AX+/oX4V/Pb7ybug+5n8EDzwPZ7+jz+1AGgBLEGJwhZCVwK/ApJCzwL4Qr9CUAIwAWgAgj/iPuh+LL2xPW/9ZL2W/gH+wn+4gBMA3QFHAf1B+YH3gZhBYsDRwGK/j37dvfc87rw+u1r7Irshu5z8vD3Zf6HBTYM9hGbFuwY8BhrFr8Rmgt2BIX9PfdO8vnuoO1R7h/w6vJ+9lL6Lv7TAcYE6QacCBwKQwsQDH0MkgxDDBEL9wgUBnYCdf6b+o/3f/Vr9HP0m/UA+Cf7e/6vAYEE1QZHCMcIOwi3Bs8EnwITAAz9lvkR9ujyAfC87c/scO3v7zn05vljACgHgQ02E08XGhmpGJcVgxDmCcUC6/vG9SjxWO6j7bLu+PA99Bj4Avzm/zcDxgXDB1cJpwpxC+8LEgz8C2ELxglaBxIERgBw/On4TPao9A30k/Q29g75bPzZ//kCoQXBB94I9wj1BzEGOgQDAnP/Zfzc+HD1RvI77+rs+evx7MLvbvRu+jIBYQjjDsQUkBjfGfsYXxX9DyEJ9QEQ++/0fvD47Y7ts+4J8W70P/hK/EIAiAM7BkkIGAqBC1YM2Az9DOgM+AsaCl0HuwPG/6P76vc89WfzxPJx81f1h/gt/Pr/mgPCBioJYQpjCicJHwe0BOABhv7I+uv2a/NY8Jjtz+vK64Lt8fAS9hD83wLACe0PMBVRGG4ZOBiVFD4Ptwj0AVH7gPUf8Y7u0O2b7qfw0POh98b71/9KA0AGpwiYCvkL7Ax4DbcNaw09DBwKAwcrA9/+gfqm9tvzDvK08cLyPPXX+Nz8CgHKBPUHMApCC+4KYwlCB7EElwHb/bj5wfU18gTviOwf63vrwu248Sj3Wv1EBPsKBRHLFV0YDRk5F0ETsw09B8UAavoa9UbxJ++t7pTv2vH+9If4cvwpAH8DYAbXCPsKWQxlDQwOOw6YDc4LOQnDBbwBlP16+Sj2uvNs8oPyv/Np9tD5m/11Ae4E6QfOCZ8KLgq/CLsGIwTxAAX93vgI9WfxMu746z7rkOw274DzPPl5/zkGdQwFEvMV2xftF24VERFiC0AFEv9F+bL0g/Hs78/vD/Fb80H2rvlh/c0A9gPHBjIJFQtpDG0N6Q3XDesM8woWCIwEnQCK/LX4v/XK8+/yQPPR9Jn3APu4/lACjgUhCMAJOgp7CeEHugUZA8X/3/vX9yD0nvCx7dbrretW7VnwNPXi+kIBvQejDcMS7RVtF9EWABSHD94J6QPZ/U/4CvQs8ePvFfCc8Tj0U/cC+67+/gHyBFUHZQnPCuALwgwoDQkNDAwuCoMHHgRnAHH8u/j19Rb0S/On81T1Pfi7+3D/GwNYBrkIFQoxCh4JTAf8BCwCuf7P+vT2ZfMu8J3tLexx7Ebur/GJ9iT8dQKUCD8O+RLNFfIW3xXHEiQOdgihAsv8q/fB813xifAD8bzyPPVD+K37Cf8iAtUEGwcPCZAK0gu/DDANGA0gDDoKdQf5Ax8ADfxr+L314fMQ83nzUvVW+NH7jv9EA4wGGAmhCuEK4gkFCKoFpQLK/nv6KvZO8uHuXexK6/DrTe4s8mv3Pf2bA70JPg+QE/MVqxYcFcgREQ2OB98BNPxP96HzY/Gl8C/xyPI29Sr4n/v4/goCzAQsBzcJygoTDNUMGw3fDN0L6wkWB6sD8f8N/J/4+fX+8xrzk/N/9Wj4xPuI/zgDcAYXCbUKAQsRClEIGwYWA0P/+vqG9nXy1+4v7AHrkuv07e3xQPdJ/eEDHwqmD/ETPha9FvwUkxHZDD0HhgHt+xf3jfOH8e3wmfE987j1vfgZ/EP/FgKZBLYGlwgbCl0L/gtGDE8Mogv7CU0HCwRQAFT8zvjv9dHzzPIl8wf19Pd4+1f/IwN7Bi8J3AoyC14KqQhZBj0DYv8W+372bfLq7jnsKOvl64nuefLK9/j9dgSKCrQPuxPLFekV9xOUEP0LmwYnAe37fvcz9Gjy8/GR8gf0OfYG+Rf89P6pASAEPQYbCLQJCwu6CwYMBQxdC7cJHwf5A2AAnvw1+XL2bfR189nznPVC+HH7BP99ApYFGgi5CTQKpAlECEUGagPH/5T78fa78uru9uus6lLr/e0F8on39P2gBNUK/w/UE6MVhhVVE9YPWQsvBg4BJ/wV+Cr1rvNZ893zHfUY96L5cPwT/30B0wP3BeIHkAnuCosLxAvCCwkLVgm1BosD+/9Z/D/5t/bt9Cb0pfRt9s/4xPsU/1ECJwVwBw4JoglTCUQIiAb2A7MAyvwy+Mrzm+9Q7HbqsOot7cjwKPam/KcDNQp8D8IT4BXGFZwTFRCVC0YGEwFE/DP4RPXU86LzNvRu9UH3mfku/Jr+7gA7A08FYAdECc4KrQspDG8MpAvhCUcH+ANOAH/8Svm19ub0SfTj9MP2Pvkp/Fv/YQL4BBEHewjkCJQImgf7BZcDiQDJ/FD4/fP675LspOrO6irt+PA69sX8qQPzCQUP4RLtFLAUjBIuD9oK5wUeAdf8KPlu9ir17PQ59SH2nPeq+fb7Rv6SANcCDgVIB0wJzgqmC/8L9QvyChAJdwYzA7H/Rvxy+Tn34PW09Yr2Ovhn+u38g//4AUIELQZsB98H6wdiBxcGAwQrAX799/iQ9G/w9OwE61Drve2S8f72f/04BDIK6w5UEvATeRM5EeQNtAkiBeUAJP3u+Zf3a/YK9jn25/bu93X5ePuW/d7/VgLnBHcHrwlUCykMZAzuC1YK6gfzBJABMv43+/j4eve/9v32B/i2+Y77e/2L/34BPQPhBBMGkQbCBnwGigWoA+gAPP29+FX0VfA17cTrXOz97kLzrfjt/hMFRQpTDvUQCRJfEUIPVAyZCNAEXgE1/o37dvkw+Gf3//YO95732/iy+v/8qf+hApQFXwiVCugLbgwnDPUKuggCBgcD5v8s/QD7pfnj+KL4Evnv+Sf7Wfy4/UD/vwBiAgUERQX9BX4GagZuBVMDPwBK/Nf3kPPo753tFO2W7tLxf/bC+0EBXgZBCiYNlg4DDyoOTQwLCksHpgTVAT//8vzb+if52Pcc9+L2aPff+Cn78P36ABoEIAeDCekKXgsgC0sKjQhJBvIDngFN/1j9APwJ+1j6/vka+oH6Ffv++0P9sf5VAEgCFgQ8BdUFEQaCBcYD/wCH/aP5t/VZ8knwmu9Z8KXyPvZ4+pX+tgIqBr0IiwqjC04M8AvsCnEJmQdZBZ4C4/8p/Zv6hvg596j27vYp+FT6If0GANgCaQWgBwgJcglZCQYJTwjyBj4FhgO1Adf/Lf7g/Ln7vvo2+jH6iPpA+2v82/1d/yYBFAOIBEcFiwVxBXAEVAJk//L7V/j29Izyc/Gg8ffyePXx+ID8yP/KAmQFfQcECVgKOgsxC3UKUAmrB1EFeQKj//b8gvq1+Mn3q/dB+LP58ftj/o8AkgKPBDIGOwfXB10IqQhQCEkH7gUvBAECtv/E/Uf85PoN+sj5/vmh+rH7Ff1Z/sD/XwHcAqgD/AMaBMwDgAIsAEn9Gfr89mT0KvMu8yn09vWz+NT7hf7hAAEDCAWZBiEIkwl3CnEKuwmxCMYGPQRtAdz+lPyp+pD5Mvl2+VD6wfuA/R3/iwAOApMD8gQZBjUHPwi3CGEIUwfABZwDMAEY/5P9b/yF+w37+/ou+6D7Uvwx/T3+uv+JAS0DOwS9BK0EugORAXP+D/vJ9wz1gfOT88T0k/bN+CH7Gf2e/un/bwFLAz0FnwfdCTELQwtgCooIpQV1Akb/2fxK+3H6dfoN+9X7nPyX/WH++/7P//AAhgJaBBoG0QcSCUIJcQgGBxUFtQKKAO3+2f35/Bj8Tvuw+l36p/qB+738kv7lABwDiAQIBZUETgM9AVH+Hvtm+H/2lvXY9eX2KfhQ+Vv6KPv6+wL9rf5TAYAE6gfUCmwMMAx+CucHwwToAaX/Ev5f/T39N/0D/ZH8Ifzr+xz8tvzq/Z7/hgGCAzAFnQaVB+wHoAfkBvsFxgRgA8cBJABq/rr8DPvE+Wv5Cfqa+179ZP9MAbQCfQNjA84C0gGiADP/Sv1I+4/5E/gl9972Cvd99z/4Qvl5+ij8J/7VANsDAgfMCXELlQssCiIIlAUrA1gB5//1/mr+I/6z/RX9nPyB/ND8c/1A/k//TwBpAZcC3gNzBaAGdwfCB3YHrwYTBRID5wC+/gb9k/t5+hj6g/rY+zv9e/7u/wcB1AEUAtoBawG1ANT/cP6i/OX6e/ls+Of3r/dq92D3pfdT+KD5t/vQ/msC+gU6CUgLpAtECgEItgWFAzICdQHPACoAkv8L/xL+Rf33/Aj9c/3R/T/+rP4p/yIAngHjA0MG6AfjCMAItAfYBXIDZQGU/2T+uf3w/ED81/sN/J/8L/0p/lj/XAA7AaYBlAEnAXMAev8E/mf8+Pqz+ab45/cy94v2YPbP9g74Gvr5/HoADQRgB8wJtQoaCncIlgbmBJwD8AJZAr0BOAGUAM7/0P4U/oL9Df3e/KP8zvxx/ab+aQCJAggFtAZeB0MHXwZSBQIE5ALsAb8A4/+o/l79gvxG/Ar9rP14/l7/9v9nAGwAkQC3AN4A5AAmALv+LP2K+8r5PPj+9uP1LvVV9QT2dPfm+Uf9/wCGBIwHFgkcCRQImgZOBYIEgwTMBMMEfATXA7MCOgHb/8T+z/1E/RL96vwG/X/9bf6c/zkBHANDBNsE9gSfBBEEMAN1AmwBTACi/6/+Kf76/Vb+D/9g/xsAhAD2AGwBnwEeAloCpQJnAnsBMACF/rX8uvqe+Gr2gvQ589PyOvOv9BL34Pnk/Ir/iwGSAu4CIQNaAwcEDAURBqwGswaTBkwGxAU7BboEHAQ5AyICEgHv/zr/MP+h/4EAngHWAmYDbAMFAzwCgQHpALwARQDF/1//6/7F/qb+7v4x/7v/wQCyAbYCWQPRAwgE+APBAwcD/wGiAO3+2fxj+rD3D/Xw8rzxiPEj8ojzWPVu93j5N/vH/Ar+df/9ALICjAQUBjMHngexB6oHkAecB5cHWAeqBqEFmARkA1UCwAGTAawBwwEKAhUCpwE4AZkAAACF/0r/H/9o/rv9+vxg/Eb8bvwG/cX9Ff+rABECaANJBOMEUAWOBYgF/AT9A3ACRQDh/VD7sviR9hD1YvRD9Kb0ZvUc9i33Rvh6+cT6H/zb/YD/ZAE0A4kEdQXgBUYGfgayBhMHCwfWBlYGtQUdBWoEHQTPA5QDagMSA6YC9wE3AXsAvf8a/6r+Sv7r/Vz9o/wW/MT7+/t+/CD9Gf5R/+QAbwLAA9IEfQUlBpgGkgYMBucETwMMAYb+8ftc+X33Rfam9VD1UvWi9cz1Rvbk9q33vfhA+gr8tP2I/z8BqwLGA78EggX5BaEGQQeUB6wHqAetB3wHZgdLB74GNQaLBdEE4APHAgcCAAE8AIP/hP6s/af83PvU+vn5xfnp+cL62/sm/X/+8f+dAe8C/QPoBKUFOAZlBhEGJQWXA6gBZ/87/Xj7DPoT+SD4PPd69vL10/XA9Qr2pvaj9xr5pfpY/OP9V/+/AOwBHAP/A8QEqgVrBiYHrQcSCG0IngjoCM4IWwjRBwQHMAYfBRMEAAP2AS0BIwDu/ob9S/wz+z/6tvlh+Zb5Sfp++7z8yf0F/1oA2AE9A5cEngUnBl8GIgZkBSYErQIXAWj/2v1//DL7DPrb+L337fZV9iD27PUF9lz21PYA+HH5EvuQ/O/9g//OAAgCPANCBFYFXgZ6B3QIGQnZCU8KXgoaCngJ1wjoB/EGzAVmBDUD1gGdADT/uf1+/FL7yvpb+iv6IvoY+pr6Kfsh/Ej9g/4dAL8BmAMNBd4FSwYyBsIFBgX5A9oCmQFqAFb/E/72/Kr7d/pn+SH4NPdh9tn1d/VA9bP1UvZJ94b4vvlD+7X8Of7U/w8BawKeA+EEOgZlB/QIGgoNC8ML4AvPCw8L+AmACLoGKwVyAwwCqABJ/1v+QP1f/L37U/vu+rH69/rt+m/7dPxx/dn+XwD7AUwDZQRWBYwFUgXbBAsEMANSAosBygAVAIT/q/7g/Qz9+Pvj+rH5efgu9z72rPUe9fH0EPWi9Zj2y/dT+br6LPyy/Tb/sAAYAuYD0gXMB8gJiwv5DLYN1w0uDdILPApyCJQGuQT7AmQB+f/s/gr+Zv3X/Iv8QvzT+9370Pv4+3b8J/1d/p//BgFFAioD6ANKBHYEUQTmA2gDvQIsAtABmAGNAXUBRwHXAAoA6f5i/bb7IPq0+FT3HPYd9Vb0xvOL89vzZvRa9cb2cvgY+tX71f3M/+YBLgSSBrwIqwpEDAYNNw3gDA0MygpDCbMHAAaBBCMD0wGZAJr/yf4C/nz9HP3U/L/84fwu/XL9uf0Q/lz+5v51/xgAzwB9AUgCzwJMA5ADnQO/A9QD2wO5A5IDXwP+ApMCEwJPAVYABP97/Zr7kPl+93f16vOn8hLy2PEQ8ufyKPQL9gP4L/p4/Lf+6gDYAoYE/gVeB7QI0gmVCicLUQsGC2IKXQkSCJ0GKQWxAzYCDQEdAHL/Lv8H/wf/5v7D/oX+CP6U/Qz9sPyt/PX8nv2J/qz/2QDiAeECrQNjBOYEKgVKBSoFIQUDBcgEeATwA10DfwJVAf//S/5W/A36nfd+9ajzgvLW8Y3xPvJh8/70uvZC+OX5VfvN/C7+Yv+wAPEBFwM4BFUFbwZ3B1kI8gg3CToJ3wggCCMHAwbwBOkD9gIwAmoBwgAdAGn/6P6I/i/+z/1z/Uf9Q/1H/Wf92P2j/q7/+ABQAqoDAAUGBqgG4wblBroGLwaOBeoENQRsA4gCqAGEAEL/4/1w/A373vmX+F72NvU19Xb0NPS99G/1ZPaU96z4u/nt+t77Hvzz/HD+Sv9kAJYBtwIwBKgFxQZQB2MI2wgcCKIH2QbRBVgEjQIoAen/VP/T/ib+Gv4v/iz+QP5f/mL+JP5r/s7+Ov+pAPIBAAM/BLcFIQe+ByQIBQi4B2MHdgafBaQEgAOKArgBIgGSAPb/U/+p/gv+Tv14/Ir7CfrZ+Ff4f/d+9q31SPV69dT1JfZr9gH34PfN+N/5Lvup/Nn9J/+rACsCuwPMBG8F8gV9BuQGzwZOBoIFqgTcA/QC9gH0AA0AWv/d/rz+vP6v/rf+z/5A//f/vQBtAaQB5wGhAoYDHwRvBLYEAwWCBToG1gbiBpMGLQarBTIFpAT2AwsDPgLYAW4BBAFlAHr/m/7N/Qz9Mvwt+wH6+vhr+B74Mvgz+J/3R/c493D38fe09y33/fZl9zT43Pjx+an6Pvui/N396/7A/3sAOwHOATgDmwRYBdYF4QVjBqwGogZaBlkFgQTBA4QDegMHA8sCwALeAlUD/gOIBFIEzAPJA+sDEQTQA8UDJQQ6BIAEqATpBOsEKAQRAwYCbAG5ANb/4f4K/iD+wv43/47/yP8bAIEAvwCOAOf/Dv8G/gP9Cfz8+tX5kvgZ98r1BfVI9DrzI/KB8XLx1/GJ8rzzR/X89vH4CPtp/Wv/DAGFAnUDfgSiBY4GXAe/BwsImAg4CasJbwkfCcIIQwg8CAQIjgcoB+wGBAcgBy4H2AYMBhcFGgRkA9UC9wHdAC4AWgC6AIMAFQAGAEsAlACbAFcAUwCbAMQAxgDDAAQBTQGnAbABNgHkAHEA7f8G/7L9RPzm+tn5p/hf9xj2ufRl8zHyUvGY8KXvG+9I7/jvH/FP8rrzq/Un+Lz61Pyy/qUAmwKsBGQGkgfSCDwK1wsODbUNEw79DdcNgg3HDOcL9wojClIJfgjOBxIHjQb2BWUFDAU+BGADjAK9Af0AggA2ALr/qv8PAFQATABFAG4AtADiAKQAQAAVACAAMQAyACQA0P+h/93/2f9z/9H+Bf79/Nn7qPon+af3KPbQ9P3zPPNi8rLxAPF98JvwhfBT8MHwu/Fd8+X0J/b+90/61PxG/1UBMwPiBKIGfggdCi8LswukDOkNBw/UDwQQ6g93D+QOYQ5fDQYMmAozCScITAeEBrQF8ARUBKYDBgNmAr0BMgHQAHoAFQAFACgAGwDv/6D/KP+6/m7+Hf6n/d78M/zZ+6r7xPva+/37JPxJ/Jj8svyx/I/8LPyo+/36Zfq0+cf49vcs92r2ovV99GnzbvKA8dDwR/CG8F7xpvJ99D/2Kvhf+tn8ZP9/AXgDTQUNB+IIrwpuDNgN/A4rEC4RBhKMEnASDhI9ETkQSg8nDgwN0wuhCqEJtQgCCEEHUwYQBXIDCAKvADP/uP1i/GL7x/qJ+nP6U/oh+g76//ns+en51vnL+dH5IvrD+qf7pfxy/S/+mf6l/ov+W/4S/oT94Pw1/Iv7Avt2+vz5Vfl/+KL3i/Yw9aXzR/IA8QbwgO+V72zwnvGV87r1EPiB+rH8Tf+VAd0DJwYyCJAKqgzEDsQQfxJDFGQVERZIFv4VgRVmFAsTghHgD30OEQ2zC1IK3wiMByYGjASxApcAb/54/PL6uPmM+JD32vZ99m/2f/bC9hH3YffU9zr4wvhV+eD5pfp7+278P/35/Z/+7f4V//L+qv5X/tr9bP0P/Zr8KvzT+2D73fou+lz5a/hK9yz25vSq85DyovFK8YzxVPJ+8/H0v/bg+Dj7jf2Y/6sBBgRZBssIOguMDa4PghFoExsVWRbsFhkXRxcOF28WTRW/E+4R7g8wDnwMnwrDCOUGGAU5Ax4B8/6v/Ev6Fvgq9qj0bPOF8h/yFfJU8qvyIPPK84L0I/XT9Zf2gven+Mb5HftR/Eb9Tf5X/38ASwGvAfEBBAIGAsEBUAHIABMAd/+3/sX9oPxT+w36mvgW91/1x/OM8qrxefGW8UHycfMP9Sn3P/k2+yz9Sv+NAQkEmQb3CB8LUw3HD2ISzBSXFvUXCxnOGT8a6RnmGFoXXhWCE1wRBg+LDPAJfQfgBFMCo//3/H766feI9VHzZPHw7+DuQu7s7QDufe447z/wO/FC8mTzq/Qu9pj3FfmS+hj8r/0P/4QA1wEFAwsEyARYBXsFUAXXBAwEDAPnAZcALf/r/dL8yfuI+iL5vfdd9lL1bPSs81fzOvPF89/0T/Y7+AX6+fsv/lYA3gI+BXgHngm7CzAOhRDhEvkUlBZBGDIZvhmpGeQYjBdLFUsTaxCQDT4LbQjlBVwD+wDm/tX8yfqd+EH2HPQ28rPwbu9A7rjtm+0J7t3uz+/v8NTxBfNN9G31ovaJ97/4FfqE+1z99f6hADoCuQMrBesFVgZHBgUGtwUXBXEEdQNSAkABHgAG/7f9MPzN+nn5LfgI99/19vSI9IX0J/Uw9m33B/nG+tD8/P4NAUkDggXXBzcKeQzMDvQQHRP2FEoWVhe6F6sXAReiFecTqhFXDwsNqApOCOYFjgNhAVP/UP1A+yD58vbJ9NTyB/Fu7zDuV+0P7TXtt+2I7oXvyfAp8pzzB/Vs9uD3XPkP+8L8bv4vAOcBswNbBZkGgwcFCDwITwjvB0AHTQYnBRsE7AKXAfv/Sf6p/B/7x/lA+Nb2iPWP9CX0F/Rt9B31Uvbv9/X5/vv9/UAAcALnBEEHVgmLC4kNuQ/GEXUT6BTOFYUWuBZTFmkVrROoEVwP7gyiCh4IswVaAwUB+P7b/LX6jfhI9h30F/JB8LLuhe3G7H3syex+7YPu4u9h8fHyivTy9V734vha+uj7ZP3t/psAfAJ4BC0GhwdoCBEJjAmZCUsJdAhsB1sGQQUgBKQC+gA8/5L9HvyD+t/4LPej9Zj08PPM8+zze/R/9en20/jM+sb84P7+AGQD2QUYCDsKLQw+DlgQMRK1E68UThWhFXYVuxRBE0wRJQ/xDNYKmQhKBgMEzQHH/8b9ufuS+Uv3IfUS8yzxh+8d7i/tv+zH7E7tJO5Q78HwSvLn82f18Pae+GL6Pfzy/aL/dAFmA2wFIAdoCE8J6wlyCqIKXgqeCX4IYwdGBhUFpgPmARUASv6Y/O/6MPl29/D1svTm83PzVPO183H0s/U69+T4wvqg/Mr+DwFoA80FAwhfCqAMyg6/EDwShhNtFPkUGhWTFIMT9BEeECkOGQztCbYHhwVpA1gBLv/h/IH6JfjP9Z7zkfG871ruWe3f7N7sKu3w7RDvefAd8rvzaPUn9+v4zvqt/JD+cwBQAjQE9wWHB7wIkwkuCoIKlwppCuEJKQk9CB4H9AWNBAcDYgGN/8f94vsS+lX4rfZJ9RH0YfMX8zbzwvNn9H31v/ZF+B/6+vss/noA+wKnBS8IvAobDVsPZRH4EiwU1hQJFdYUBhTMEiERKQ82DSILFAnpBqsEhwJQACj+4/uK+Tn3+fT18jDxqe997rrteu3H7W7ubO+c8PnxkvMk9cr2X/gB+t37wv3I/6oBbAMxBdwGfAi/CZUKFgtNC1QLEAtqCmcJLwjaBocFCwRdAo8Akf6v/Mn65/gi92P1+vPx8lfyNvJg8uvy1fMT9Z/2TvgZ+hz8R/64ADkDvAVICMEKTg2jD7kRWxNzFBwVLxXLFMsTURKHEJAOpgyrCqYIigZdBDsCDwDX/Xz7B/mX9kr0PvKN8D/vbO4a7kfu6e7W7/nwN/KL8/f0YvbO9zb5t/p2/GP+bQBuAmEEUwY5COgJJwvWCw8M+guiCwULCgrGCGEH+QWUBAoDTAFe/1/9Z/t4+Yr3nfXg833yqfFM8VzxvfF18qLzGvXi9qn4gfp3/Ij+7wBQA9gFaQj7CrMNJRBcEvwTDhWdFYIV9hSyExUSOBAjDjQMGgoMCAUG8AP5Ad3/pf1K+9X4fvZH9FXys/Bw77vuhu7k7qbvpPDc8R3zivT29VD3vfgL+p37X/1F/2gBZANzBW4HOwnGCr0LPww/DPELVgtlCj0J0QdwBgsFmAMJAjUAQ/5D/EP6OPg69kP0jfJp8aPwefCu8CjxSPKJ8z31HPe9+Lr6oPzb/lcBvgNyBvoIxguJDvIQCRNWFE0VmhVlFa8UJROKEbEPzg0TDBAKHggvBjsEVAIpALD9J/uP+B729vPy8UvwJe+b7qfuLu8H8AHxQvKb8xP1mfbj90v5s/pS/Dr+HQAeAgUEAAbyB5UJ6AqQC9ELvQtIC6QKkAlQCP0GkwU/BMACEQFE/1H9UftO+UP3VPWC8/Hx3/Av8BXwdPAp8VvyzfOa9X33Y/lq+3b90/9IAtEEdwcUCt8Mew/PEbgT+xTXFRYWwhXQFEQTehGCD48NlguTCaYHzgX9AwoC0v9e/dH6N/i89V3zR/G577nub+6c7jzvRPCP8SPztfQt9nf3ovjb+TL7q/w+/uP/vAHOA+sF6AdpCW4KDgtCCysLiwqMCVEI9wa2BVAE3AJKAZ7/+f0m/FT6Xvhh9oP0rfJR8U/w2+8G8IDwmfH38qf0ovaB+Iv6mvzR/ksBxQNpBhIJvgtxDtoQ9BKMFJQVJRYIFlcVGBRlEpgQpQ64DMYKzAj/BiYFVQNUAQH/ofwV+qL3NvXm8gnxpO/67tbuGO/V79bwQPLD8yb1bvaA96/4/Ple++X8a/4xADMCWgR8BjIIgAlXCssK6wqJCrUJgggsB9wFigQoA6QBCQBr/sr8F/tL+V73dfWp8yHyBfFO8BvwZfAu8X/yHPTt9cz3rvnC+/n9VwDHAjoF1QeOClgNARBHEiEUfBVVFpoWHhYAFWYTkBG1D8ENzgvfCQsIVwaLBJMCWADx/Xr77/hk9ujzt/EP8PzuhO6G7gHv+u9G8dbyQfSF9aT2qvfp+Cb6k/sW/bf+vADHAvIEywY9CGsJEwp8ClgKswnACH0HTAYCBacDPQKsADv/qf0K/D76MvhK9l701fKX8Z7wVfBd8DHxXvLC84L1Hfcu+UL7gf3x/zMC1QRwB0UKIg2sDw4S7RNxFXMWwBZvFmkV+BNHEmkQhQ6SDKoK4QgiB2UFfQNeARf/p/we+oP37PSi8sXwdu++7nfuyu6U78PwOvKB88X0z/Xa9hf4Ofmk+gj8p/2l/5kBzwOuBU0HrAh9CSEKJQrMCRMJ7gfFBmIFBgScAhABk//+/Wv8u/rg+PX2FfV98zTyWfHk8PTwlvGp8ij0sfVp9zz5Oft7/a//DAJkBPcGyAmKDEgPkRGaE0sVbBYOF8AW1RV0FMQSFRElDy4NPAtnCdQHKAZYBEAC9f+U/Qz7ZPit9SnzF/GZ76fuP+5N7uzu+O9R8bvy8/Mm9S72Xveb+Nz5VvvR/K7+sQDGAtMEeAbrB/oItAkLCtIJOQlCCBkH3QWCBBoDowEkALH+K/2T+9n5/Pcu9ob0OfNG8qzxivHW8bjyA/SK9TP34fjO+tj8FP9kAaQDKwbSCLMLig4bEWoTKRWcFn8Xjxf7Fr0VJRRSElsQUw4jDB8KVgiYBtAE6gLAAG7+//ti+bH2AfSu8czvcO7E7Yrt7O3R7hXwj/Hu8kb0dvWp9vP3MfmL+vL7of2O/6gBzgOvBWQHtQi+CUIKOQrHCc0IwgdtBgoFoQMRArEAIf+t/S38h/r4+Cv3kfUZ9NzyKvKi8dbxdfKD8yX1n/aO+Gn6cvza/vYAeAPFBUAIBguiDU4QfRJjFP8VDxd6FxkX8hWJFOAS8BDbDoMMcQqOCNEGKAUyAyQB7P6F/BL6Yve/9EPyLfDF7tDthu2l7U/ubO+z8DTye/PO9BP2Q/ef+NL5PPvL/ID+kwCRApQEXgbSBxgJ1wkwCgUKUglnCDsH9gWiBCMDowEiAKL+Ov2l+/r5R/h89gf1uPPJ8jHy2vE78gnzZfQD9pj3bPlm+6/9FABKAoME0gZgCTAM1A4yES4TyhQ9FiUXXRfIFlYVuhPXEe4P3g19C2cJZgexBfMD2wG5/1z9+fqD+Nf1SfP08Brv3O0g7f/sZ+1O7pDvCvGB8ujzQ/WD9uP3N/mY+hf8mP1+/4QBrwPJBXEH7AjpCZgK0gpfCooJRwj+Bq8FLgSoAvAAQ//F/TL8svr/+DT3pfUh9AvzPfK88dnxbfKR8wb1nPZg+GP6jfzq/kEBaAPDBUEI7wqMDekP/xG9E1gVghb2FpkWphVDFJQSxhClDlMMHAoWCDAGVARkAkAA/f2/+1P5tvYy9NDxye9N7l/t8ewO7dDt7u5F8NjxTfOz9CL2fvfc+Cz6oPsm/dr+8QAJAxAF4wZtCKQJhwr8CsgKGgodCewHlgYhBY8D4AFDAMn+QP2z+x/6cfjZ9mv1L/Q/86DyfPLJ8onzvPQd9sz3svm2++L9AABEApkEHAfHCTsMrA7UEMkSgRSiFUEWBhZKFSUUjhLWEL4OlQx7CnwIwQbsBBsDGQHn/rP8Pvq39yH1ovKG8Mbupu0N7ffsje157sLvUvHQ8mD0t/UC92j4p/lD+8f8Y/5eAEQCcgRWBvcHVgkoCtMK3gpuCqYJeAhCB90FXATCAgcBdP/f/Tr8k/rJ+DD3xPWK9Jnzz/KK8svyfvOd9Nn1VPcl+TL7df2j/80BEQSLBkEJ3wtRDoUQgxJVFKYVYBZFFnsVYRTMEggRAA+5DKkKrwj4BkoFbAOMAYL/ZP0g+4v45vVc8yTxY+8D7iTt1ewj7RHuS+/F8EXy0vNt9eD2avi9+Tf7xPxb/kYAHQIeBPgFnwcLCQ0KtgrfCpMKzwnGCIEHGwafBPwCSwGW/+n9PvyO+sz4HPeh9Wb0jvMI893yPfPp8/n0OfaX9135K/tO/Wf/dwHLAzIG8wiKCwoOXRBZEkIUnhVXFjsWbBVAFJgSzxDGDoMMcwqJCOEGMQV5A6YBof+d/Vr7z/gg9pDzPvFg7/3tD+217Ajt/e0177TwH/KK8xn1ofY2+JL5AvuS/ED+OQAdAuEDmgU2B7YI3QmICqgKSAqqCb8IjAcbBocE5wJEAaj/6v0X/Dv6ZPiz9jf1HvRl8xjzV/Pv89/0CPZc9/j4sfqh/IL+UgBHAmAEuwYhCXMLvQ3xDxoSAhRGFdEVnRXbFK0TBxIaEOENpguvCdYHJgZmBK0C5AAG/xT9vvo4+J31G/P28DPv+e1E7Sbtxe3V7krw2/FX89P0NvaZ9934C/pB+6D8Q/4VAOQBpgNfBfYGcghyCf8JCAqoCRoJJQjuBnEF1wM/AqoAGv9j/Zz7z/nu9zr2sPSO8+byy/Ju83v0DvbG96z5fftO/SL/gQApAswDowWzB6sJxAvCDd0PyhFNE04UlRRaFHsTExI/EPsNrwt/CZoH9QVmBO4CXwHb/zL+T/wo+qz3PfXT8tHwM+8e7qbtzu2u7ufvlPE288D0J/Zf95D4q/nd+gT8Xf3i/pMAXAIKBLYFIwdmCFoJ1AnmCXcJywjSB6EGTQXDAzUCkwDy/i79TPth+Wb3rfUv9Dnzu/LW8r3z2vSu9pf4cvp7/Db+EQCZASoDrAQhBvsH0gntC/8N9A/PES8TORScFEEUThOsEa8Pbw0aC+QIvwb5BGUD/AGiABr/cv2H+3X5M/fR9H7ycvDe7urtpe397f3uX/Ah8uXzffXt9gf4MvlM+nz7zfwU/qL/RAEbA/wEpgY0CFYJNgqnCpcKGAoiCfsHqwZJBcgDJgJuAKr+7/wh+0f5X/eC9e7zzPIs8jHyxvIA9Mf10/cZ+iz8Mv4YAMsBfwPwBHcGDQi+CbELgw1cDxERghK7E0oUOxRqE+QR+w+rDUgL6wigBqoE5gJoAQsAlf4Y/U77Wfk89/T0zvLE8CzvI+7F7TTuMO+v8G/yRvQW9r73J/lM+mD7XPx+/an++v+FASQDCAXLBmwIwQmWCigLLgvJCvgJuAhVB7wFKATuAscBAwC9/Tf70fiv9p702fIt8VHwfvA/8cPyp/TX9lD53vt6/twA9ALOBHcGHQjfCZoLVg0hD8IQKRI2E70TvBP9EpwRng85Db4KLQjIBZwDxwFnAHH/xP7q/bz8JPtA+WP3i/XX80nyGfF78HvwKPE38oXz7PRL9qP3zfjJ+aX6b/tu/If90f4zAJoBKAO1BFkGwAfZCJkJ/gkuCvwJfQmTCDsHxwU2BHoCdwAg/rb7UPke9yj1RfO78ZDwDvBT8Dfx2vL99Jv3e/pu/XgAMwONBZ4HWwnxClcMpw3wDgUQBxG2ERASCxKWEcgQWw9oDQQLcgj7BZYDbwGB//D91vwM/H772voD+gL53vfH9qD1fPR887Xyd/K18oPzr/T/9YX3Bfl3+qj7jvxV/ej9c/4n/wsANAGMAhYExAVCB44IgwkPCjoK2QkvCQ8InAYWBV0DoAF5/yL9wPom+Lj1gfOU8R/wC+8K75TvZfB28rX0evex+sD90ACtA2MGywjkCoUM+Q1xD7YQphFlErwSzxKIEswRrBDFDqMMPAqAB+sETQL0/979GfzZ+tn5O/ms+Bb4j/fZ9hv2TvV49NDza/OG8xf08fQs9qH3SPkG+3X8k/1p/u7+Wf+6/0sABwHjARoDaQSqBcQGqgdFCHoIjwhXCL8H3gafBTgEngK0AJH+EPxg+bv2H/QN8mrwdu/b7irvd/Am8ZHzTvZO+HX7X/4vAQUEoAYVCToLHA3JDkQQeRFgEtAS4hKhEvsREhHPDyUOGAykCRIHcAS+AUj/GP1E++b53Pgf+Jf3Pvfr9pP2IvZx9eP0fvRY9Jr0JPUr9nP3BPnT+nz80P3A/n7/+P9SAKYAAQGrAYYCfAOABG0FIwaIBs0G/gYIB9sGfAblBf8ErwMMAvf/hv3S+ir4zPWt8/vxb/CZ7w/vgu958N/xOfSB9n35m/zk/wUDygVTCIMKYgzoDV0PfBBgEQwSgBK1En8SDhInEbgP9Q2VC/sIOQZfA88AWP5d/M/6kPmu+M33HPdf9sz1ZvXr9In0OvRZ9Mj0hvWg9rj3A/l3+gL8ff2a/q3/egAaAbQBKQK0AjsD0wNYBLME/gQyBV8FjwXGBeYF1gWABc4EjgPhAdz/m/1S+/v4x/bP9CPzwfGz8A3wuO8R8Crx6PJ09WX4kfvA/t8BwgQyB2QJRgsBDZYODhB7EZgSSxOXE2sT8RILEsgQJQ8HDbEKMgiuBS4DewDh/Yn7jvkO+Mv23vUx9cX0uPTW9PX09PTk9B31hvVB9kL3evga+uf7zP1r/5cAfQEcAosCxQLqAh4DfgP8A2sEwwTjBO0E7gTtBO4EsARCBJQDlwJGAYH/jv18+3z5nffU9U702vKI8Xrw1e/s77nwUvK09Kj37/oe/hcB0QNCBncIeApvDGAOGhCEEYESIBNkE0kT7RJZEpQRghAFDwkNmQrGB7wEpgGt/gz82/kz+Pf2//U29Yz0E/TF85jzjPOj8wb0xPTU9RX3Xfi7+Uf7FP3o/l4AaQEpAu4CpwMdBGsEsAQrBaQF1wXCBW8FFQW9BGkEGgS0AyUDVQI1Acb/Av7++9350vcB9m/0DfPd8QHxh/CG8O3w1fFo85j1Tfgv+/79rQBHA+IFfQj6CkYNSQ8CEWMSSRPAE+ET5xPUE5UTFBMzEswQqA7aC6cIUQX6Abv+zPtc+WX3v/VU9C3zUvK58WLxW/GY8QDyf/Ik8xX0WvXk9qP4lvrF/Pj+3AA6AiMD8gPMBLsFiQYcB38HoAeHBxsHdgbRBU4FDAXUBG4EtQOAAtgAyv6T/In6zvh692f2f/Wq9OHzRfPZ8rry+fKl88/0VPYs+Dv6a/yp/swA5gL4BAcHCgnaCoEM4Q3+DuoPsxCKEVMS6hIdE7MSphHhD48N6QoSCEIFjgIkAA3+K/xr+r74PPft9c302vMN83/yLvIa8kHylPId8+Dz6PQ49rr3T/nX+kv8sv0o/7IAPgKzA/wEEwbtBo4H/QdVCK8ICQlRCVsJBwlMCCcHnwXGA70Buv/g/UP82fqV+W34X/d39rf1KPXU9Nf0R/Uc9iz3XPiZ+eX6OPyP/e/+WwDPAUUDswQcBncHwwgLClYLmQykDU4Oig5WDsANxgxuC9EJCAgpBkIEWAJzAJ7+9PyG+1D6QflG+Gb3rPYn9tL1nfV+9YH1xfVb9kL3afi0+Q37Yvyn/dX+5//iAMcBmwJiAxYEsQQ3BbEFKAabBvkGKwcfB8UGEwYKBbwDUwL8ANX/1f7l/fH8+fsM+z76n/k2+Qv5KPmN+ST6zvps+/T7d/wQ/dH9uf6u/5wAawEbAq4CMQO4A1YEHgULBvsGuwchCBkIrAf4Bh4GNwVOBGUDdgJ9AXQAY/9j/o395vxg/Of7aPvi+mL68vmY+WD5Wvma+SL64vq++5n8ZP0Z/sP+cP8hAM4AcQEGAogC8QJBA3oDpwPRAwYESgSFBJoEbQT5A0gDcgKbAeIASQDG/zz/m/7f/Rn9ZPzg+5z7mfvF+wr8Xvy3/Bv9kf0m/s/+gv81ANwAbQHZASICTwJyAp4C4wI/A6ED6gMCBOEDhwP7AlECoQHwAEUAnP/+/mv+5/17/Tb9Hv0i/TD9L/0J/cD8Z/wY/OP7x/vH++v7Pfy9/F39EP7P/pj/bABEARYC0wJ1A/cDUwSLBKMEswTIBOYE9wTjBJoEHQR7A8UCBAJCAYwA7P9p//v+lv4v/sf9aP0f/fP84vzt/A39Pv14/bf9//1U/r/+Qf/T/2AA1wAyAXQBqwHfARsCZgK7AgsDPQNAAwsDpwIgAn4BxgD1/xX/Nf5e/Zj85PtH+9L6kvqM+rb6A/td+7T7//s5/GP8jPzB/BL9gP0H/qH+SP/1/6gAYAEcAtYCgwMbBJUE9QQ4BWEFdgWBBYcFhwVyBTwF3ARSBKED0gLzARcBUwCx/y//uv5D/sf9VP35/Lv8k/yF/Jj8z/wn/Y799/1i/tv+Yv/q/2cA0AAqAX4BzAEMAjkCWAJ/ArcC+QItA0ADJwPoAowCDwJyAbwA+P82/3b+sf3n/Cj8iPsa+9/6xvrB+sb60Prg+vP6B/sj+0/7kfvo+1X81Pxr/SH+7v7D/5oAagEqAtgCdQMEBIEE7QRLBZwF3QULBhkGAQbCBWAF5gRUBK0D/gJPAp0B5QAqAHH/vP4Z/pD9Jf3U/J78hfyG/Jj8t/zs/DX9kv0E/on+D/+G/+b/MgB7ANsASAGqAQUCYwK/AgUDPANhA2QDSwMjA9kCZwLiAU8BsgAVAGj/vP41/sX9df3J/bT+sf7j/NX6d/qD+/n71/qE+c35T/v9+0H71frk+5H9if7Q/nj/0QD3ARICqQHVAaUCYQO4A/QDUgSXBHwENwQjBBgEqwP2AnICKgK/ARsBgABTAJwA0ABiAGz/m/5P/mD+X/4U/sz95f1b/r3+sv6F/rX+Xv/7/xoADwBEAMgAQQFtAXEBowEWAm8CqQLPArUCeAIaAsABegFVAf0AIwDO/2IAfQAc/8b9TP5l///+Ov1A/Er9RP5L/U/72fo7/Pr8c/wH/MP8wP0G/gH+Mv6p/gH/AP8o/7b/AQD9/0gA5QAGAegAPgHMAeQBhAFAAWsB3wEBAuUByQEKAl8CPQLwAb0BngGGAX0BnAGOAS4BxQBjAGgAdgAtAMD/lf+//5z/SP87/4//5f///+v/0v/i/y8AbAB3AKYABgFNAWMBTQEQAe4ACAE2AR8B0gCoAMwAFwEHAZAADgCt/1j/E//j/pj+Jf7P/bn9l/1X/TX9Sv1j/Xv9mv2N/VL9Nf0+/Tn9J/03/WX9eP1x/VX9QP1c/c79c/7X/tX+5v6V/24A5gA/Ad4BpwJoAzIEpwS4BBUFrAXABYoFawUcBaoEkgRUBGEDlwJAApABmQAFAH3/zv6M/nj+/v2O/az97f0R/iz+FP7o/Qz+XP57/pH+sP7x/oL/+P8iAIoAYgEQAh8CFAI3AmUCmQJ3Ai8CKgI7Av0BeQEmAcYARAA0ADcAt//V/gn+lv1P/Rj9jPyp+0j7Yfta+y/7Ifsx+wv7zPrQ+g37Wftp+0T7Pvtq+7j7d/zR/QX/gv+7/0kAcwENA2gE9AQfBQsGcAdDCIMIdQhRCGUI4AguCasI1gf9BgkGQAWCBE4DsAFNADj/JP5Y/bz83Pvl+i365vkg+ov6o/pu+pL6I/u1+1j8Av16/Qz+Af8rAFkBbALTAoECqgLNA9wEIAXkBJgEuwRuBccF8AS0A/gCmgJIAo4B9/8e/k79Pf2o/Hf7WfqC+QT58/jl+H/4Afi797T3wffH9/33dfjc+Bv5xfkL+z/8J/00/rf/gAH3AvwDJgXcBm0IHgmTCW4KTwvoCykMGAzTC6ELQgsuCt8IoAcpBpoEFgN3Aaf/Cv7A/H37Y/p3+bf4ZPhI+Pb3mvfL91j4xvhI+dz5cfot+xD8D/0T/hj/BwAzAbwC+gPNBFgFqQUUBuMGrAejBxgHsQZqBjgGxQWuBDkDIAJcATwA4f64/ar8s/vQ+vf5M/mY+A/4kPds93f3PPf19vr2JvdG96n3Yfgj+df5kvqK+8j8Nv6y/1MBAgMwBCgFrwafCCIK+Ap7C/ILoAxoDYkN2gwBDHULwwprCbMH0gXwA0wCpQDH/t38KfvN+bX45fcY92j2TvZu9mX2TPaH9mT3i/ie+Xn6ePvk/Fb+n/+3ALcBwgIABDgF7AVZBrgG9wZJBxsI9AhoCCAHpga7BpEGsQUyBGACEwGQAG3/bv3G+/D6gPoK+oP5hPib96P33/fV94f3Ofdb98H3L/hc+Hv43/h3+X76l/ts/Fz9eP6l/8gAEAJ4A8UEJwZeB2YIiwmPCkkLngubC2kLLgsGC3wKYgkLCKIGbQVmBO4CzQC5/iD9tvtV+gf5wPfF9m/2bfZ09rL2GveJ9zv4U/l++pz7pfyG/Yj+6f9WAWECQgMzBBYFAAbkBnQHpAfMBxIILAj7B6EHDgdKBnAFfASIA7UC2QG1AFP/LP5V/XP8ePuQ+tn5T/kD+ff4yPhb+AT4A/hj+Mr41Pib+KP4KPms+Q/6fPrr+qj7yPzq/a3+Rf84AGkB1AJtBGoF2wXABmQI0glNCkwKOgoqCkkKJApICfYHtgbDBbsEigMuAoEA+/63/WX86fp4+Y747/d490/3QPd89wr4xfiN+VT6fvvH/LD9a/5h/78AEQI0AzcE3ARpBU4GZAcVCBgI2AerB4cHdgcfBxAGqwSwA1ID4ALTAY8AYf+G/iz+uf2e/Ev7nvp2+iP6xfmA+S35Fvlz+cj5lvlO+W35s/nW+a/5cPmV+QL6WPqb+vn6tPv6/D3+w/4A//n/rQFkA9oE3QVNBhcH/gjsCm0L4QqJCpYKmQo4ChEJZwcPBjIFEgRZAngA0P5V/fb71/rF+aL45Per94f3hff79674VPkG+t760vsC/X7+1v+yAIYBvgIjBGQFWQbjBjwHwwdjCL8IjQjQB/0GSAa5BVMFeQT6Aq0BBAGRALz/ef5J/aj8g/wp/E/7SPqe+Zz5y/nH+Y75WPmC+e35Nvr6+ZL5p/n5+fj5s/ma+eP5XPqu+rv65PqS++z8cP45/6L/rgCmAuEElQagB0MIMgncCn8M/wxVDIAL7ApyCtgJoAi4BsEEFwOdAREAZP6f/O76qPnB+BD4f/cV9w73S/e594L4ofnt+h/8MP1r/uf/kQEhAzgE5wS8BdIG0QeFCKwITAgFCBoI6gf8BqQFbgRgA3MC6QEGAVL/6v1u/Wv9OP2i/O77Z/uO++v7qfst+wP7HPtL+4j7t/uM+1f7dPua+1b7zfqY+nL6Ovor+gn62/nt+Vz61foN+0r7MPzE/Tn/ewDPAU8DJgUgB98ICwrOCtIL8gx0DQoNAAy9Cl0J5Ac8BkkEOwJEAI7+9PxK++n55fgI+Fr3/vYR93/3N/g4+U76f/vz/LX+fwDkAeQCygPpBB4G5QYpBxcH8Qb6Bv0GlgaoBYsEpQPlAh0CHAHT/4/+vP2U/bH9dP35/Nz8Z/1J/vz+Kf/3/u3+UP+z/4r/4/5G/uL9df3y/Fr8pPsS+6r6HfpU+bf4j/id+LH4y/jg+DT5BPoD+8r7YvwU/R/+pv9GAXgCSQN3BGQGbAj6CdAK3ArZClELnwv6CosJ2gfzBecDCgJPAKD+Ff3C+6r6qfkR+R35ZfnJ+Vn6Hvs6/KD9Jf9uAH0BiAKLA2UExATABKIEbAQ4BMsDDQMWAhQBfgBYAEQA3P8j/6D+kv7+/mr/e/95/47/7P+lAJwBcQK2At0CNgN3AzgDTQJFAYgABwA4/5b9uvtQ+n35+/iV+HP4afhl+Hj4s/gf+Y/5C/qq+kT73fuB/EH9yf3x/Qj+Sf67/hf/NP9S/8P/kgBtATICHAN5BHgGdAh5CWkJHglMCZwJbwl+COMG4AS9AvMAn/9z/lX9ZPy/+2r7f/vR+/L7Ffy+/Ob9+/6J/7//4f9BAAEB0wE+AvYBdQE9AUIBVwFIAQ4BpABiANwApAEQAhMCIgJvAsQCNQOXA3ID2AJMAkoCjgJ9AvMBAQESAIP/Uf/c/o79BPwc++X6xfo++mn5kvhG+N342fmA+sH6Gfum+zf8rPzx/OD8ufzs/Fn9k/1U/fL8h/wo/Dr8o/wz/X791/26/h4A+wHkA+MFFgiTCtQMvQ1iDY4M3gsDC5gJsgchBQYC7/5y/K36VfmR+HX4x/hg+R/68fri+zP96f6TAO0B3gJDAzgDMwOLA98DpQPvAicCgQEEAacAUgAVACQAuwCqAZYCPAOSA9cDPgTPBB8FwgS6A2YCPgFfAHT/Sf4I/Qz8cvvw+lL6nPlB+Xf5A/qh+gL7W/vZ+2D8Fv3m/bP+Uv+///P/i/+0/tz9Ov3T/IH8OPzH+wv7dPpy+tb6afv1+2j84vy0/Wb/RgHYAjsEvwX9B3QK/QyfDkYOkwydCnYJMAjuBSMDCQBS/fz6Lvn79yn3dPfj+CL7aP0D/0cAWgG7AmAEqQVLBv0FPwV3BLcD3gKdAWgAWf+Y/lv+aP6R/qX+CP/4/1gBCQOWBKwFEQbfBWcFgAQfA04BOf9J/fL7Hfse+sL40vf990P5Nvv1/BT+yv6g/8wAqgEBAg4CHwIVAowBmQAl/3/9Xvzb+4f71Pr2+Tb5r/ij+DD5K/oo+xT8Df3c/V/+zv4u/5v/QAA+AVgC9gJTAw0EdgVLByMJzAqJC+wKggncB0UGeARrApUAEP/H/XD8P/t7+kT6Nfsk/YT/kgHYApUD4QMSBOsDeQPqAiYCqgEnATQAwv5d/a38yPy//fH+8v+2ACUBlQE4AhwD2QM1BFAE/gMkA5kBpP+9/Sz8Q/s7+8v7PPyG/Of8ov3o/tEA7wIvBH4EHARTA00C2wBf/979mfy9+8r6lPkH+Ar3YPd9+PP5S/tB/ML8JP3z/dz+q/9nABwBfwHjAI7/Iv66/JD7QvsD/Bb9Sv60/30B2gOcBgMKcw1SDx4PoQ3PC4gJlAZiAzoAiv0g+w/5wffE9mH2dvcQ+kP9TgD9AuAE4AUiBu8FcAWhBKcD2wIyAuMAMP9//QT8J/sC+737Gf2+/igAKQEeAswCWAMgBP0EgwUeBd0D8AHB/+L9cvzB+8P7Nvzt/K/9kf5r/zwAKAElAhEDRANzAtUAmP5G/Gv6GPlw+GT4v/iA+Xj6mPvK/CH+zv9rAY4C+QK5AhACCwHi/6D+Mf27+5T65/ls+bD4tPdh92b43/py/iUCuAUSCSwMSg+iESUSnBC7DbcKkQeXA+f+aPrs9n/0a/Po8+30SPau+H38RAGUBewINQvvCz8LsAnEB1cFjQJiAPD+rv0n/Er6kPh393r3EPnU+7f+IQHmAiIEOgVMBiMHdwcmBxgGYQRIAuX/dv1L+8r5ZPnS+cP6xfvV/C/+Yv/gAIMCVANEAzQC6QDl/53+Uf1A/Jr7oPtJ/G79Z/4X//T/8QClAbUBSwFAAHD+nPwc+9b5u/jQ93v3k/e19wP4f/j5+Pv5L/xH/9MCFga8CHMLEQ6ZEJESMxJ6D4ELQAdfA2L/Zfsc+NL1vfSm9HL14vbJ+Pv7pQAGBgMLIw4pD0MOmwsoCKkEZQGE/kb8/voI+pf4yPZl9Rz1MPbc+K/8ZQBLA4AFUQfOCMQJEgqmCW0IlQZtBPYBCv/Z+xL5YvfU9hL35fcs+fb6YP0SAJwC5ASKBjIHHwc6BqoE4wLTAAD/zP3I/Pn7bPsd++36rvr4+rj7UPyv/L/8mfwp/Iv7Rfsy+3P7FPy+/JD9+f2m/SL9tvzU/JH9iv4UAF0CAAXhB7cKYA2UDwUQeA6wCzQI3QTJAeP+fPxX+qD4fvfT9tr2xPcW+hT+/AKJB34KUwsXCm4HdgTPAdj/gv5T/WT8ifs2+r/4ofc59wb44Plb/FL/PQKyBKcGXAj1CdsKwQqiCW8HrgTWAVb/Cv1++j347vae9h33VPgv+pX8kP/8AnYG7AjICagJgAioBpUEygHr/lD8zPkG+KT2jvUz9XH1uPYJ+Zf71/2c//4AvQEKAiACvAEsAV0Ae//w/uX9UfzJ+nv5+/gL+WH5JvqR+0j+MgLzBs4Lvg9zEuUSHhE2DpIK+wZxAwwAfv0E+y74SfXb8vzxEvNA9jX7ZwCmBJ4HeAkJCssIfgZSBKUCYgFAADr/9/0B/Pj5p/gP+Ov3UfjN+XL80/+fA2YHlAqUDD4N4AxcC8wIyAWPAmX/afyn+U/3SvXo85/zk/TT9ln61/5sA1AHNgrkCzIM4QpVCEYFygEw/rT6fffd9O3yGfKj8hn0b/aC+Qr9pwCGA3cFcQZBBjoFnwPYAQEA9f3t++b5HfiU9oj1lvU59m73TfnI+9H/1ATWCWgO0xFzFIEVChTREGAMlQfdAlz+yfqY95/0efJg8arx2fIf9Sv5Dv77AlgHeAoWDKMLvQlqB+EEkQKAAAb/HP7a/Hn7//mF+K73lPfV+IL76P7bArwGBApdDE0Ntwy+CrUHUAQ7AZD+KvwC+gb4ZvZt9TX15PWe96L65P6iA9YHxAoIDKYL9gkhB2sDVP9K++33jPXQ87DyafIV8w31JPjX+/P/swN+Bh8ITQhTB3AF8QJ+ABX+xfva+U34VveW9gf2cPaR92z59PtS/tMAtAPyBukKjw4TEYASOxIzELsMZAg/BGgAsPyc+Yf3MfYL9S30B/SV9Eb2avnD/WQC6AU2CHAJNQnaBwIGUAQEAxYCvgGTAcIAGv8V/XD7Yvrt+VX6i/tt/fX/2gKmBakHVAjhB68GBAVVA+oBzgDt/w//Gv74/J37I/o3+cX5n/s6/toA2QJCBKYE0wNXAiMAs/3j+9b6kfpf+hL6Y/pa+9T8mP6EAHwCpwP/A8wDyAI+AU7/X/3g+476s/ly+V35hPnX+bL6EfxB/XD+d/8/ADsBLAKIA94FNQhCCvILqgxFDHwKqAf0BIsCNwBr/h/94/uT+lH5U/jK9wf4bvnu+9L+hgHUA08FqwUjBRwENwOmAmUCqgLdApICAQLjAGX/z/0+/HT7dPsJ/If9kP+9AcIDKgXVBZIFmgSUA88CbAIeAs4BXgEPADH+d/wo+8r6JvsW/Lz9V/+PAFoBfgEyAX0AfP+b/tX9Jv3L/Of8fP0//gT/sP8iAF4AJwCD/9r+Df5X/fb8ivxC/CP87fsB/E38ifwJ/bj9X/7w/hr/Kv+T/ygA1wCkAd8CrQSpBqQIBwqKCjUK/Qh9B+8FLAR3AvcApf9X/tj8TvsQ+kX5K/n0+XL7Qf0M/68ArQHnAaEBIgHcANoAQAEEAqACBQPWAvYBwgB3/93+C/+y/9UACwIiAwQEmAS/BB0E4AKCAVwAmP8f/9v+k/4j/pP9E/3k/AT9ev02/i3/YQCSAZQCJQP+Ag0CdQCt/vT8Ufsq+qv50vmC+nr7mvyA/d79A/5N/tX+dv8SAJAA6QASAeUAcgDH/2X/Uv/8/lj+gP3q/KD8Rfxd/AH9DP5x//IA1ALCBEoGogeiCCoJLgmJCK0HkwYWBYgD3wFEALj+Ef27+7n6EPof+tD68fs7/VL+If+o/8D/o//B/ycAAAEEAsYCPQMGA24C3AFRATcBfgEMAu4CtAMcBEsEIgSoA/ACyQGLAIX/o/72/VT9zfxr/Bz86vvL+8z77/tz/Gv9w/4uADQBAwJrAhwCGgG1/0b+r/xR+5r6h/oH+7X7cPxQ/Sf+Bv8EAMQAMwG9AT4CjQJ1AiUC4QFmAb0AAQBF/57+Dv7I/RH+Tv6n/kX/5/90APgAqAFEArwCAgNDAy8D8gKxAjACjwH1AGcACACW///+g/4I/qD9af3C/YT+V/8uAAUBjwHmAQkC7gHoAeQBzwHFAb0BlQFkAQ8BtgBYAAsA//8kAIEA5AAyAYIBmwFbARMBuAA8ANH/h/8//wT/1/6Y/kT+pf3D/Pj7n/vE+078Mf0z/iz/DgCcAM0AwwBqAOL/i/9S/xr/8/7x/jL/jP/w/3wAEQGGAbgBrwF8ARYBpgBYACMA6/+w/4b/Wv/+/nz+I/79/RT+YP6z/g3/Y/+n//z/XgCuAPwASAGhAeYBCQIiAv0BlAEhAbkAVADY/0r/yP5Y/gz+B/5P/sv+WP8EAM8AcwHmASUCGALOAWEB7wCPADIAxv9b//b+s/6T/ov+q/78/on/TAAoAewBZQJtAkgCDQKlAScBsQBDAOD/bP/g/kb+kv35/L382Pw4/d/9uv6z/6YAbQHzARkC9QGpATcBvQA7AKb/JP/K/rT+6f40/5X/DgB1AL8A5wDlALoAggBoAHsAoADFAN4A0gCMABcAlP8F/3T+Ev7z/QL+Mf5z/sf+J/+O/wMAhwACAWIBqgHGAZoBIgGNAPz/d//6/oT+Jf76/QH+Q/7Q/pL/YABDASYC0QIkAwUDkQLtASABQAB4/9D+Qf7A/WP9Pf1O/aD9Pf4w/24ArwHBApgDCgT6A4IDxALbAdcAxv/U/g7+YP25/Db8Cfw4/Lj8h/1//ov/iQB6AV8CDQNdA0ID1wJAAosBxAD+/zv/n/5N/kj+mP4J/37/+v9hALUA6wD4AP4A/wAAARoBHQH/AKwAGQCB/+T+Sf7P/WT9H/0E/RH9bP3p/Xz+Nf8EAOMAoQEfAmgCbAI/AvEBhgEJAX8AAwCt/2//Sv9B/2b/vP88ANsAhAEYAoMCtwKnAkwCrQHhAAcANv99/uX9a/0M/dP8wPzn/FD99v3X/t7/4wDNAXsC2wLnApoCBQI9AWEAkv/j/mj+Hv7+/Qb+Kv5n/rv+J/+o/zkA0QBbAbsB4QHHAXMB+wB1APD/ev8X/9L+tv6+/uX+Gf9U/5X/2P8YAE0AcwCSAK0AyQDaANUAuwCNAFsAIgDi/6D/X/8h//b+2P7Q/uj+Gf92//X/gAAOAYcB3QERAhYC6wGXASQBnQAYAJb/If/M/pb+hP6V/rn+8v5M/7f/MgCwABABWwGHAXkBRQHjAGEA6v92/wb/s/5t/jv+L/4v/j7+Zf6h/gj/n/9AAN0AYgGtAb8BlAEvAbMASAACAPX/EwA3AEIAKAD4/8n/ov+L/4r/nf+0/9b/+/8HAPj/4P/X/+L/AgAyAHUAswDcAAEBHAERAeMAqgB4AFwAUQBRAFUAOgAHANn/rP9+/1L/M/8t/zj/Qf9J/0X/L/8f/yf/Sv+C/8P/DQBcAJ8A0gDsAOMAugB4ACcA2f+L/z///v7N/rL+sv7N/v/+QP+I/97/RACzAB4BfQG/AdUBswFaAdQAMgCS/wz/qv5q/kT+Nf44/kr+c/64/h3/nv84ANYAWgG0AeAB3QGyAXABJAHfAKsAjAB3AGAAPQANANT/n/93/2f/cP+P/8T/AAAuAEIAQwA9ADkAPABPAGwAgwCIAHoAXAArAPf/0f/G/9D/5v/9/wsACADx/83/pf9//17/S/9K/1X/av+D/5n/r//C/9n/9P8RADQAWgCCAKsAygDWAMkApQBtACUA0v9+/zP/+/7a/tX+6f4P/0T/hf/P/x8AbAC2APYAIgE2ASoB+gCoADsAxP9P/+r+n/51/m3+h/6+/gb/Wf+w/wkAYgC6AA8BVwGRAawBngFrARYBrwBHAPH/tf+Y/4//lv+g/6n/tP/D/9r//P8kAEwAbAB7AHEAVwAvAAQA2/+5/6L/mv+c/6f/tv/G/9j/7P8EACMARABkAH0AiwCGAGwAPwAHAM7/of+L/4//pv/F/+D/8//5//L/5P/X/8//1v/o/wIAHwA2AEQARAAzABgA9v/U/7T/n/+W/5f/oP+v/8T/3v/8/yEASQBuAI0AoACiAJMAcwBFAA0A0/+c/27/Tf8+/0P/Xv+J/7j/4v/8/wUABwANAB0AOwBkAIkAmwCTAG4ANwAAANj/yv/X//v/IgBCAFIATwA8ACEACAD1/+v/6v/q/+v/6P/h/9n/0//Q/9j/7v8NAC8ARgBOAEMAKAAIAO7/4P/k//T/CwAcACMAHAAIAO7/2P/R/9v/8v8KAB0AIQAVAPn/0/+v/5b/jP+U/63/0P/5/yUATwBwAIIAggBzAFcANwAYAPn/3P/E/67/o/+b/5j/m/+k/7L/x//g//n/EQAkADEANAAvACIAFAAKAAMAAAD7/+//2v+7/53/hv+B/5X/vv/3/zUAZwCGAJAAhQByAGQAYwBwAIQAmgCjAJoAewBMABUA3f+v/47/fP91/3n/gv+P/6D/t//T//T/FgA5AFMAYABdAEsALgAKAOf/0f/K/83/3f/1/wkAFgAbABkAEgAKAAMA/P/2/+3/4//V/8T/tf+q/6n/tf/N//L/HgBJAGwAgQCFAHgAWgAyAAQA2f+0/5j/iP+E/4j/kf+f/7D/xP/c//r/HgBDAGYAfgCKAIUAdABYADcAGwD+/+f/0P+4/5//h/93/3T/hf+q/9//HQBYAIoApwCsAJoAeQBUADIAHgAYAB4AKAArACUAEwD3/9n/wP+0/7P/wv/W/+r/9v/8//v/+f/3//v/AQAFAAMA+P/g/8X/pv+P/4X/i/+m/83//f8qAFEAaQBzAGwAWQA/ACEABADq/9P/wf+0/6r/pv+o/7X/zP/u/xgARgBwAI4AnACUAHwAWAAuAAIA3v/C/6//of+Z/5X/lf+b/6z/yP/s/xQAOABXAGYAZABXAD4AHwAFAPD/4P/Y/9P/0P/M/8j/w//F/9D/5v8FACsAUwBzAIsAkACEAGcAQQAdAPv/5//f/+P/7P/x//H/6P/b/8z/vv+5/7v/yP/a/+P/6v/l/+H/2//U/9H/yf/E/7//tf+t/53/kf+a/6r/2/8HADcAbwCVAMIA0wDPAMMAqQCUAH0AWABCACEACADw/9D/wv+y/7D/uv/O//T/GQA5AFIAVwBNADUAFwD7/97/yv+8/7f/s/+r/5z/jP+A/33/iP+h/7//4f///xYAJQAlAB0AEQAGAAcACgAZAC4APgBQAFMATwBHADMAJAATAAoABwABAP7/8v/i/87/tf+m/5z/nv+t/8T/4v/7/w8AHgAiAB8AFwASAA0ACQAHAAMA///3/+r/4f/b/9r/4f/u////DgAZACUALQA4AEgAWABpAHoAhQCHAH0AbgBXAD0AIwAMAPr/6//d/9P/xf+3/6n/mv+P/43/lP+k/7n/0//r//v/AgD///D/3//N/8D/vv/C/9H/4v/1/wEABwAFAP//+v/7/wIADwAgADAANwAzACUADwD5/+v/6P/0/woAKQBDAFYAWgBWAEMALgAaAAsAAQD7//f/7v/i/9T/w/+2/7H/tP/E/97/+v8YADEAQABIAEMAOgAsABwADQADAPn/8f/p/9//1f/L/8b/xf/M/9z/8P8HAB8AMgBCAE0AVABZAFoAVwBOAEIAMQAbAAMA6//Y/8v/w/++/7z/u/+6/7n/t/+3/7r/v//H/9X/5f/1/wUADwAQAAoA/P/r/9r/zv/L/9T/5v/+/xcAKAAzADcAMgAsACYAJQAmACgAKAAmAB8AFAAHAPz/9f/1//3/CQAXACMAJwAlAB0ADwADAPj/8P/s/+r/5f/i/9//2//Y/9T/0//X/9z/5v/v//z/CQASABgAGQAaABYAEwAPAA4ADgAOAA8ACwAGAAEA/f/8/wEABQAMABEAFAATAA4ACQAEAAEA//8AAAEAAAD+//r/9P/u/+r/5//n/+n/7f/v/+7/7P/p/+f/5f/o/+3/9P/9/wYADAASABMAEwAQAAsACQAHAAcACwAQABQAFwAWABIADAAFAP///P/7//7/AQACAAEA/f/4//L/7//v//L/+P/8/wIABAADAAAA/P/2//L/7//r/+z/7//y//f/+v/+/wEAAgAEAAcACwANABIAFQAVABYAFQARAA4ACgAGAAUABgAIAAcABgAEAP//+//6//r//P/+/wIAAgACAP7/+v/3//T/9P/1//f/+P/4//r/+f/3//b/9f/2//n/+//6//v/+//6//r/+v/7//z//v///wIABgAKAAsADAALAAgABwACAAAA/v8AAAEAAwAFAAQAAgABAP7//f/8//3//v8AAAAA/////////f/9//3///8AAAEAAwAFAAUABgAFAAQAAAAAAAEAAQAEAAQABQAGAAYABgAFAAUABQAFAAUABAAEAAIAAwABAP7//P/6//v//f/+////AAABAAEAAAD/////AAABAAAAAAD+//3//P/9////AAACAAMAAwACAAEAAAABAP7//f////7//v/+//7//v/9//7//v////7//v8AAAAAAwADAAQABQAFAAQAAgACAAEAAAAAAAAAAQABAAIAAgACAAEAAQABAAEAAQACAAMABAAFAAQAAwACAAEAAQACAAMABAAGAAUABAADAAMAAQABAAAA//8AAAIAAgADAAQAAwABAAAAAAD//////f/+//7///8AAAEAAgACAAMAAgAEAAQABAAEAAQAAwACAAAA///9//z//f/9//3/////////AAD//wAAAQAAAAEAAgACAAIAAgABAAEAAQABAP//AQAAAAAAAQACAAIAAwADAAQABQAFAAYABQAEAAIAAQD+//z/+//7//3//v///wEAAgADAAMABQAFAAYACQAIAAkABgADAAAA/f/6//j/+f/5//n/+//8//3//v/+////AAAAAAIABAAFAAUABQAEAAMABAABAP///v/8//n/+P/4//j//P///wMABgAIAAcABgAFAAYABwAHAAgABwAEAAIA///7//n/9//4//r//P///wIAAwAFAAcABgAHAAYABAADAAEAAQD//////f/8//v/+//8//3//v8BAAEAAgABAAIAAgACAAEA///9//7//P/7//n/+f/3//r//P8AAAQABwAJAAsACwALAAoACAAFAAQAAgD+//r/9v/x/+//7f/s/+7/8f/3//z/AwAKAA8AFAAXABYAFQATAA8ACgAGAAEA/P/6//X/9P/w/+7/7v/y//b//P8DAAgADAANAA4ACwAIAAYABQADAAEAAAD9//r/9f/z//D/8P/x//X/+v8AAAQABwAIAAcABQACAP7//v/6//n/+f/5//j/+P/3//b/9//5//z///8CAAYACgALAAoACQAGAAQAAgABAAEAAgABAAEA///+//3///8CAAQABwAIAAcABgADAAEA///9//3//P/6//n/+f/3//f/9//5//v//f///wEAAwAEAAYABgAGAAUAAgAAAP3/+v/5//n/+f/5//n/+f/6//v//P/+/wAABAAGAAgACQAIAAUAAwABAP///P/9//7//P/9//3//v/9//3//f/+/wEA/////wAA//8AAP/////+//3//f/8//v//f/+////AQADAAQABgAHAAgACAAHAAgABwAEAAIA///+//v/+f/4//n/+f/5//v//P/+/wAAAgADAAUAAgACAAMAAgABAAEA///9//3/+v/6//r/+v/6//z/+//8/////f///wEAAQAEAAMAAgADAAEAAAABAAAAAAAAAP////8AAAAAAQADAAUABQAFAAQAAwACAAAAAAD///7//f/9//r/+//7//z///8AAAIAAgADAAMABAACAAIAAAD///7//f/9//3//f/9//7///8AAAEAAwACAAQABAAFAAUABAAFAAMABQADAAEAAAAAAP7//v/8//z//f/9//z//P/+/wAAAAABAAMAAwADAAQAAwACAAEA///+//3//f/+//z//f/8//z//v////////8AAAEAAgADAAUABQAFAAQAAwADAAIAAQAAAP///v/+/////v/+//3//f/9//7////+/////v////7///8AAP////8AAAAAAQAAAAEAAQACAAIAAQACAAAA/v/+/////v/+//7//////wEAAAAAAAEAAQABAAEAAQABAAEAAwAFAAUABAAEAAIAAgAAAP7//v/9//7//v/+/wAA/////wAA/v8AAAAAAAAAAAEAAAD//wAAAAD///////////7//v/+//3//v/+/////v/+////AAAAAP//AAAAAAAAAQABAAEAAgABAAEAAAAAAAEAAQABAAIAAAABAAMAAwAEAAQAAAABAAMAAgABAAEAAAAAAAAAAgD/////AQABAAQAAgD///z/+//8//v/+//+/wMABQAAAPv/AAD7//b/AwANAAwACwATABgAEQAcABcAEAAbABEABQD8//3/AwD2/+v/8/////v/AQADAAAACwAdACEA7v9XATYChAEkAW8ANQDH/2n/TP88/6P/Tf9b/wkAMAAmAGz/IP9z/0L/Jf8W/wb/vv5i/vz9pf1a/kn/p//0/1AArQDzAAsB0gD2AOEAfgCZAF0AIQD8/0kAoACwAPQAjABPAGAAJADn/8n/AQA7AH0AxgDpABABAAHFAJoAaQBXAHIAXABCAFAAKAAhAGIAogC7AJkATwAZABgAGQDs/8j/u//G/6//ev9t/2P/e/+p/9n/4f+y/5D/ff8m/6T+PP7j/db9kf1t/Tj95Pwc/eL81fy1/HH8wfzd/Hz96f3w/YP+0v7BADkEBgfyCBIJ3QiVCF8HfwZZBTsEqQOpAukAr/0p+iT4aff394T5TPtA/en+EADsAF4BYgItBEsGHgiRCMAHHgYeBJ0CNQGe/7v+Kv6V/Y/8Kfuf+hX7pPz//sUAyQGNAtsC7gKLAsABVAFKAVcBOADE/Wv7qvnB+JP4Z/hB+FP4lPj6+Oj4rfie+C75SfoE+yj8b/1x/1oCrATfBnUJlQxtDqgNugtWCf8G0QRoAon/P/xE+SD35PVU9b310vdn+7r/vwO1BgIJ1AriC3oMNwzaCjIJ2wbaA4MA6fz0+fP36/ZT9jb2W/eQ+V38S/8cAvIE7QdmCqYLeAu5CRsHaQSUAZL+jvsZ+Zj3VPc691P3CfmB+9v+/AH7A7IFcgbGBkcGggTBAsEA9/6w/Qn8Pvp2+KL3+Pdf+Pr4uvmM+lv7tvsC/NH7nvss/Hb86Pwj/Rr9iP09/Vr9of2W/b/+sf+RALQBtQPMCNwObxJtEuwPgAwiCZAGsAQYAy4BAf+m/C35U/X98nzz//YR/C8BegUMCJIIhQeeBQ8EdgNjAwsD0wGT/6383/kN+AP4cfkS/JD/WQKOAzIDgAJCAz0FYwe7CGUIkAaYA9T/OPzm+Rv5rvk++2/8r/wj/Iv7IPyL/WH/qwFmAwAEUgNmAWT//f1g/V79mf0B/pD9Lvzi+uj5lvnl+ZT6s/tx/IX8dPwS/HP75voc+4P8of6cAfIEcwdqCZQK7gsWDpMNVgvrCMEFAgTsAR//nfxy+fr3EfjV+K364fxHAGYE4wcKClsKEAnNBg4EOgEz/hr7dvhW9sH1NfY+9+z4Kvof/Ir+6AC8A8EFXgeOCKEIYwilB9QFUAOtAG3+5fx7+wv6Qvl++QL7cP3W/74BGgNgBHoFyQV+BU0EuAJgAdP/SP6m/D37vvr2+nL7KfxR/Wv+ef+ZACcB9gAXALX+cf0//Cj7wvq4+h/73PuE/CD9Vv3//Oz8qP1H/uz+m/5T/koBjgURC7EPAA9WDIgIbQTDAt8AQ/9h/vX8jPxo+/f4Ivfu9gr6sv/TBZQKUQxkC3oIlgTwAFn9mPo/+er40fl8+pz6d/r5+d36Nf2uAE0ERAY0B34Hewf3B/IHwgbmBMUCMgEJABz+vfuf+WP4VPkQ/Pr+vgHnA0UFXAYABxAHJwa1BKUDWQLHAE7//PxG+kf4APe49h73nPcG+IP4kvlu+uX6kvtM/KP9PP9yACABLQBN/pn8PvqP+OD48vlr/Mj/PwK7BIgHDgqcC7cL3wpwCb0HgQWyAvv/2fzN+Wv4ePj0+Zb8Zf/qAk8GPgidCPYG9AOtAKn9FPz8+038x/w2/b79Q/6J/gD/d/9uAAoC7QPkBfAGxAaUBYUEhgT4BPsEhQPgAI7+Af13/Lf8Ef33/d/+AADrAd0CSgOkAyIDsgIxArQA8P6o/Zz8Bvwc/DX8G/zx+7f7xPsB/D/8g/zy+9b60/mr+B/4S/h6+FD5CPsQ/e7+8/9AAN7/df+L/7D+Kf0r/ML88f9qBCYJtwxDDRwMCQo1B8AERwI/AHD/O//H/p79P/wK+wD75/x8AFAFUwkUC5kKlwfoAq/94/hj9kL2WfgI/NH+IAD4/xH/+/7k/w4CHQXsByMKdgtEC4MJtAZzA5MAPv4J/Hz6wfkV+a/4afjK+Eb7uv6LAsUGEQkPCe0HgwXMAnIATv4A/UT8YPvh+df30/Xk9AT2NPgC+0b+CQCcAGgA9/7//V79E/0Z/sP+/P6v/rr8h/qP+E33L/gM+yD/EgPzBSkIpArMDbQPFg+4DKYIHASDANz9ePyJ+7f69/oh/Oj9AwA1AvQEDQiCCiULeAm4BXAAYPsN+OP2zPd++dz6Ifx5/df+1/9CALYAuwFtA0YFnwYbB4YGewXABDAEHAPeALL91vpG+e34YPl3+tT7PP0m/4oBdwMDBQkG1gVkBdsEfwPpAQ0A8/2e/LD7qPrA+ef4ivgJ+SX6wPtW/UH+df5G/hH+Cv7//dX92P0L/oX+Wv9v/4r+c/3g+3T69/nS+Yv6Mf08ATQG+gviEPISvREjDnkJ5wTnAPH9afyU+9b6i/pr+qD6qPtm/e4ACga6CssNwg1kCssE2f3D9y70OvPL9Kj39foH/pn/+v+t/1T/dAARA3gGqglGCwQLHwkcBtsC6v9g/Ub7Nvp5+g37dPvv+xz8Jf0BAA4DaAY4CRoJzgeuBVQC5/+2/er7j/sy+/v6jfoJ+b73p/fq+GP7P/5cAB0BrgBE/6n9TvwC+276sfqE+7j8dP1J/Rv88/qX+pb6LPy7/iMB/QTTCC4MGRDfEO8NtAkGBGH/E/1t+2P7Jvzb+/37Zvyi/Pr9ov//ARYGlQkkC50KTweRAsf9rvmU9y73y/dY+VD7dv1y/50ALgHQAc4CSwQnBqAHWAhtCMsHxQYLBVoCQv/C+yb5xfjg+e37Bv5M/3sAFALgA4UFZwYCBrIEYgMgAoQArv6F/FT6Kvmu+FX4dPiB+Nv4bPpB/AT+nP/n/5f/K/9H/qH9jfxG+8n6dfr/+sX7r/uu+0/7OPsU/I/8n/ye/AP+wAG0BkcMLRGIE2AS5g0DCEkCe/08+qL42Pj5+YD6nPr1+gX8Of7kAQsHNwyUD8UPhgzzBvD/9fjy89jx1vIP9gv62v3YAJkCkwNUBDQFwQZYCC4JagmICGkGqQN6AIz9LvtH+Zb4Wfkh+2X9Rv9LACIBlwIJBNUEBwUJBBICNgBr/vv8UPzX+737/Pvo+7/7LPsb+hj6FPsL/DX9EP5i/pL+Bv4o/YP8rfse+zz74vvn/G799fxK/Kj7Lvv0+wv9xP3M/+kCyQZ0CzMPNxFeEDIMJQeyAh7/q/xD+zb75/ub/Bv9Xf09/qH/2gG7Bb0JrgxIDZoKXwYkAVP78fZh9M7zZfUy+Ob7uv9eAuQDmgTaBHgFBwY9Bl8GxQWtBE4DHAG3/lT8APrP+A35zvqt/XkApQJDBL0FOgYIBZAD0AHB/1/+I/1P/Pv7cftG+2L7ivvU+6z7s/uS/Lf95/76/4IAJwAG/6b9Z/xm+3T6nvmB+TH6LvtM/En97v23/lb/KP/i/lD+MP3q/fwABgUpCrkOLBF/ESMOkAh1A7D+gvtG+ir6lfvq/D39lP3C/X7+zgBJBIAIKww+DXILYQc+AZH6R/U58lHyBvXj+If9hgH6A2UFegUqBUMFMgVXBXcFBAUZBIsCmQCm/t/8k/s7+yD83f35/9YB0AKLA8AEMwWMBIoDiAFu/zr+6/xY/Jr8Xvyh/Ej9Qf3w/Oz71voY+wP8cv0p//T/9/+d/7P+pv3V/Mz7FPsf+0D7wPv2+4j74PtV/Jr8XP26/fX9uf4vADMDhAeiC9QOhhCPDxwMMwfNAbP9Kfu6+eX52/qR+yH8sfyb/TD/fQGjBDkIIwsgDK8KFAfJARn8O/cg9Lbzf/W3+PD89wDoA5UF+gWnBTwF0ARwBEMEEQSpA9YCcAHR/yL+p/wa/KH8SP6bAF0CZgMuBIwEHAQLA5YBqf8E/hX9o/wQ/a794P0z/ib+xf0n/d/7Qvt8++v7Uf2b/uv+1f7g/ef8dfzI+5j72fsc/Or8x/1E/j/+1/2S/Rz9ofx8/EL8ffyt/TYA2QQGChgO4xDPEOcNqglpBAYAIv3t+oz6R/u6+0H8Rfxc/M/98P/uAhkHgwoMDHcLQghpA/j9nfg/9Xr02PUh+Rj9zwCVA5cEjwQuBLoDtQPpAz4ElgRaBIsDRgJpAGb+1Pzt+yH8av0D/4UAtQHlAg0E9QPVAmABdv/6/UD9Ef2a/RD+TP6V/kj+OP2j+0r69PmS+vn7lf20/ib/9v5p/qH9z/w+/On7zPvV++j7+vv/+x78ivw6/dL9N/6w/gL/6P5I/yUBVwQ0CL0L6A1rDsgM/wjSBDwBQP6G/PT7Xvx1/d79tP3w/Vz+S/92AcQEWgjKChoLUAl2Bf//ffpp9qn0afUb+Ab86v+oAggERgT6A30DFwMxA5oDNASgBFQEbAO3AXX/lv0d/ID7Ifwz/Zn+VwAlAsgDGgT1AkgBhv9u/hf+Pf7V/hn/6f6J/sz9ivyD+rj4cPiG+af77P1+/z8Auv+u/gL+LP1a/NT7j/vE+/n71vuc+4v7oPsA/Az9AP5Y/sD+Z//JAEIDCAb6CMkLWA0+DZALWQhFBGIAVP3G+4f7yPuW/Hv9Jv4V/93/1gCqAuoEmAfZCTIKRAgnBMv+x/ki9pH0VPW291H7eP+fAnEE9gRGBLIDjgO7A2wEqwRUBMoDjgLtAO/+pfwN+3P6Q/tM/WH/OgHOAlIETwXRBI8DzAHc/8r+K/4f/lv+wf0E/Vr8Ofvo+c34wfg/+q78NP9YAT8CVgHP/1/+AP0b/B37aPqW+r36DPuV++b7lvyc/aX+rP8ZAKT/K/93/5sAKQNwBj4JoAuWDK8LlAk8BmcCMv/e/OD7OfzK/BX9L/1a/eb96/63AIUD8Qa0CesKAQp4BisBdfvG9nT0afRo9iD6Sf7BAfIDqARMBIcDGQN2A1IEEAVDBcMEqQP6Ac7/aP1Q+zb6jvo9/IL+lgAzAsADaAUtBpsFEgTDAZr/Mf5Y/Sn9Bf1R/Kz7Lvtw+q75ePmB+tH8gf/ZAR8DsgLqAJP+gPwh+wr6UfmR+Wf6Yvta/PT8Zv0y/gz/9v+VAL3/J/72/Lr8wv5yAmoGTQr7DAwObA2JCjUGuQER/ur7TfvR+2H8WfwX/Bb8F/0R/8MBcwUoCdALhwxQCrYFf/8p+eD0S/NF9CT38voG/1gCbQSBBaMFRAXyBOYECAXUBAMEvQLxAP7+Kv2X+8D6rvqn++H9hAC8AicESwVcBkkGLQVyA8kAOf6b/Kn7kvvF+377Tfs++9X6rPo/+5z82P4cAZ8CIAMiAtb/KP3f+oD5Mfmk+bH6Cvwg/f39j/7U/hb/Tf+C/+L+df08/DX79/pD/DAAJAaaCtgMzA2CDQ4MnAhVBDwBW/7l+//6wfqX+rf5L/lU+9/+zALeBm8KqQy5C28IcQS6/+D6E/fO9e/2+/hD+7n9NQALAngDFwV4BhAHbQYrBf8DggKZAJD+w/yl+2X75fsP/bX+UADXATIDBQTEBEsFIAVmBLoCMQCy/a/7yPrk+iT7l/sN/IT8EP1n/Qv+1f6F/2gACQEEAQgABv7K+zb6yflk+nn7pfy5/aL+TP9y/zX/v/7+/Uv9lfzo+1D7i/qI+rj7Kv4EAnIGCQvvDtsQoxA3DrIJNQTP/nz6GPgQ9zj3wvj2+pf9bQBiA0sGiAj5CTIKHQm5BhUDSf+1+9P4S/fX9qX3pPmL/D0A2QOxBoEI3AjZB6gFxQLv/3z9Avy/+2v8ov2o/mf/EQCnAIIBngLMA90EhAWSBbsEogKS/478WvqM+RP6Gvtm/JX9Sv7D/t7+1/4A/xP/Sv+l/4b/j/77/HX7kvqK+kD7ffz8/RH/mv/d/6//s/5A/TP81/so/F781PuH++P7xvyv/lQBUgTCBzILHw4hEOoP0Qz1B24Cev33+av3E/ci+C76VP2tAHkDiAVNBrEGKgcoB8IGggVmA8EAk/2q+m34H/dX9zP5qfwIARQFyAfSCDwINgaIA6gADf6a/FD8/PxR/mT/5/8WAFUA8gDHAasCdQMSBGwERQRTA4sBOf/k/Ef7o/qM+tv6dPtD/FH9Rv4D/4P/l/97/2f/Nf+a/nT9Vfzd+x38/Pw0/lr/FAAYAKP/Ff85/vj84ftz+8D7dfwU/X79jP03/VX9hf5NAFkC3ATYBzYL1g1cDsYMCgnGA77+/Prv+In4b/mn+4H+BQG1Aj4DSANNA4gDywRPBgcH1gYkBUkC1f4E+y34BfeQ9yn6DP4TAmkFMgdLBwUGugM2ATX/4v1c/aP9QP74/qH/JAC0AAMBDAFqAQUCoAI4A2oDJQM/AowAtP4i/bv7x/qi+lH7o/wF/h//BwBfAPb/Tf+g/g7+nP1S/bH9sP58/9r/AgDR/xP/Ff4h/Wn8+vub+5b7A/xZ/JX8Ef21/VL+6v55//z/fwAgAf4BDgM3BIcFCQcjCBsIDgcYBWYCuP+d/Yj8hPww/Zj+UgCFAe4B9wErAm4C4gLVA+wExgWZBSIE7gHI/kX73Pgb+Cv5nPuq/vABigSOBS8FAQRYAmoAjv5P/f/8XP05/pD/2AClAf0BLAJ/AskCxgKXAlMCugG/AK//gP4G/bn7G/td+zT8JP05/kr//v9wAJgAcAD2/xz/n/64/vb+Wf+U/4L/HP8Q/uT85/vx+nf6hvo9+7f8Lv6N/98AjgGLAQsBSgCa//f+jf6o/iD/s/9XABIBjgGmAVACBwTGBbMGdwYBBQMD2ADz/iz+F/5s/pv/AQHNAboBAQGdANgAkgHgAhcEcgSUA7YBq/+l/ej7QfvR+1n9T/8FAUcCvgIyAjABLgAy/1v+zP2H/eD99v5+AEkCxgNmBHMEGwRHAwMCXQCa/hD9+vt6+2r7mvvr+3L8UP1S/nv/9QBgAoIDHgTbA/ECfQGz/yD+w/yu+xv76/oA+y77Zfu2+y78DP1b/u7/bQFpAscChwKKAez/P/7k/O/7n/sH/Cf95P6nACACGwNNAw8D/gKLA24EtAQoBCkD+gHQAOL/hf+5/yUA4wDmAaECngL8AXMBXgFtAZgB0wGrAecAzP/D/hH+vP3V/Xj+Xv8EAE8AUgDw/1T/1v6a/rD++f5V/9b/VgDPAIIBRAILA9ADFgTkAxIDbwGS/5f9rvtj+q/5wvm6+h38xv2n/2UB3AL0A5oE3gSVBJwDGQIZAJr9APvV+FL3jfab9nb3+fjb+tX84f7CAB4CBwOhA98DmgOSAtAAvf70/Ez8+/yB/mcAWgI2BIsFwAUNBdEDawJ/ASUBSwGQAUYB0wCzAMUANgHgAbwC0wN+BJYEKQTIAq0AZ/6X/NT7zftG/Dr9O/4e/9z/eQD+ABwBzwBzABYAp/8p/7z+rv4Q/6z/bQAbAV0BMAHSAIAAawB5AJAA7QBfAaABiwHRAJH/Bf5e/Dz71Poa+yj8s/16/zMBfQJQA4kDTQPAAs4BhgDm/jf91vu2+vj5q/nZ+cn6Yvx9/u0AJQPVBOQFTQY4Bo8FTASjAq8Aq/78/O77pfvg+3L8ef31/sQAjgLpA48EbwTJA+QC4QGzAGX/P/56/Sv9Xv3o/bX+w//2ADUCHgNSA98C6gGqAIH/u/5x/nr+of7j/kP/lv/e/1EA+gCkARUCMgLvAT4BQQBl///+BP82/3f/qf+v/43/ZP9S/1L/Y/+0/0EAyQD/ALgABwAV/y7+uP3j/aT+0v9AAaoCsgMGBKMDvQKXAXIAif/k/lf+yf1S/Rn9Jv1u/QL+8v4cAEMBHwJ7Aj8CgwGtACUA/P8GABIAEQD0/5b/BP9n/vL9zv0h/hX/eAC/AZAC3QLMAnoC9QFtAf0ApgBaABUA1v+X/1b/PP9//x8A5gCTAegBrQHVAJr/Z/6B/fj8zPz0/Ff90P1G/sL+Q/+z/xgAfADDANMAkQD8/0H/ev7N/XP9bf2//Wj+EP+g/xYAUAB+AK0A2QAPAT8BwgHQArQD5QNmA4sCywEyAc4AxQDJAKQAdAAnAJ//5f5B/iv+nf5i/3IAgAEpAjICvAEaAX8A+/+x/6P/oP99/1H/O/80/yv/M/9g/5b/mv+D/5H/0/82ALYAQwG7AeYBuAFkAQMBjgAmAPf/5/+q/x7/gP4T/ur9Gf6u/pD/fgBEAdAB8wF+AX0ASf9O/qz9bP2K/cj9Ev5b/rD+Lf+//24AMQHOAS4CKAKkAb0Alf+K/tv9lf3P/XL+T/8tAMoAIgE7ARIBywCRAIUAmgCuAMIA1ADjAOIA2ADQAKgATQDs/5f/Yf82/wf//f4Z/2r/2P8oAFQASAAPAM3/l/+X/8r/EQBgAJAAhAA7AL7/Nv/G/n/+kP7u/mv/5f89AHIAgABuAGEAZgCDAMMADAErAf0AnABDAPr/yP/J//f/PAB3AKwA6AAIARYBOgFwAYQBTQHkAGkAxP8J/3j+JP79/Q/+fP44/wAAswBVAb0ByQGQASoBmgDZ/xP/iv4u/uX9yP3l/Tz+vP5a/xgAyQBOAa8B0gGnAUoB6gCxAJoAjgCCAGgAPAAQAOT/qf9i/w//1P7Q/gL/Vf+h/8//9P8QACMALwAfAOn/i/8W/7P+Zv5J/mT+mP7N/v/+LP9I/0r/Sf9h/4n/v/8TAHkAyQDrAAcBLgFGAT4BJgH9ALAAWgAwAD0AVQBrAKEA8wBIAZABzgEKAhgC7AGNAfEANQB5/+r+rv6r/tf+Lv+j/ywAnwDoABUBGQH7AMsAhAAiAJX/8v5t/hn+A/4y/or+8v5X/7T/GQBxAKUAwgDMAL0AlQBSAAAApv9H//z+0v6w/o/+eP6A/qv+5P43/6L/AABAAFcAQAAHAK//bf9s/47/vv/o//L/yv9v/xz/Hf9t//X/pwBOAcEB4AGrAVAB4wBvACcAIwBLAIUAuQDqABQBIAEvAVsBiwGyAcIBrgFwAQUBhgAXAMP/lf+X/7L/0v/q//T/8v/R/4L/Gf+m/kf+I/4+/of+6f5T/7r//P/+/9r/mv9C//P+rf5r/ir+6v3N/dP93f0D/l7+5v6X/0IAyAAaASwBIQENAeYArQBrACYA6P+d/z7/2/6Q/oz+3f5u/yYA4gCEAQICQAIvAtQBRAG5AF8ALAAWABwAKAA2ADkAOQA1AB8A/v/1/wwAPABqAIcAkQB7AFAAOgA8AEgASwA5ACIA+//A/4n/af9i/3n/oP/b/xgANQAyAA4A1f+h/3//ev+Q/6j/tv/G/8//wP+W/1r/I//9/uP+3/7s/vf+D/89/3r/uP/j//f//v/l/67/e/9U/zv/Lf8l/zL/Vf+F/9P/OwCqABwBiQHgAQ0C+wHEAXsBGQGmAEMA+f/I/6r/sP/q/0EAogAKAV4BjQGRAXEBOgHkAHoAGQDB/2P/9/6V/mD+Vv5w/qj+9v5H/5L/y//r/+P/wP+e/4f/cv9b/0z/U/9s/4b/of+u/6r/ov+f/6z/xv/n/xYAVwCTAL0AwgCUAEsAAgDS/8v/0P/Q/93/+v8bADUAMgAnACgAKgA4ADoAEADO/5P/c/99/5r/x/8KAEAAaQCQAJwAlQB+AGsAggCrANIA+wAUASMBIgELAfIA1AC+AMkA3wDpANkAsACBAD8A9f/F/7D/vP/p/x4ARABDABgA4f+k/2j/O/8j/yD/HP8A/83+iP5H/iz+Q/6B/sr+Cf82/0n/Qv8t/yT/M/9r/8X/IgBjAGwASAAVANv/sP+o/7v/5P8UADoATwBGADAAKwBAAG4AqgDmABgBNQE+AUIBOQEhAfkAxwCeAHcAUQA4ADIAPwBgAI4AwwDfAM8AnABdACMA9f/X/9j/6/8BABMAIAAqADEANwBKAGYAbwBgADoA/P+p/0z/A//c/sf+yv7l/g//O/9i/4//vf/e//n/EAAZAAkA5/+5/4n/Uf8j/wv/Cf8Z/zX/Wv+E/5//sP/B/9T/5//1/wUAGAAnADoAUABiAHIAgACSAK8AyQDWANYAxwCxAJoAjgCQAKIAxADwAB8BRQFWAVABNgEQAe0A2ADQAMsAvACbAGwAMQDw/7H/e/9X/1H/a/+Z/8r/5v/j/8D/if9I/wD/sf5l/ij+Bv4I/iv+a/68/gv/Xf+j/8T/y//L/8f/w//R/9v/wv+d/3j/Sf8t/0b/e//K/0QAwwAbAVUBbQFUASYBBAHkAL4ArQCiAIYAbQBdAEYANgBGAGcAiwCyAMgAvQCdAHMASwAxACwAOABRAGwAhgCPAIoAeABdAEcAOAAuACQAFAD+/+X/zv/C/8P/0f/s/wwAJAAsAB8A+f/C/4H/Rv8X//n+8f7y/vb+/v4B//7+//4G/xn/N/9X/3T/gf96/2L/Ov8R//P+5f73/iT/Y/+1/wcAVACcANEA/gAbASMBIAEJAd8AtgCQAHkAewCXAMsAAgEzAVkBZAFeAVEBOgEkAQ8B8gDQAKEAbwA+ABAA9P/p/+v/+/8NABUAEgD9/9j/qv94/0z/KP8U/w3/Fv8s/0z/dP+W/6n/pf+G/1z/Lv8L//3+BP8e/z7/X/9+/5T/q/+//9D/4f/v//X/8P/f/8n/sP+f/5v/p/+9/93/AAAhAD8AVgBnAHYAgACHAI0AjwCOAIgAfQBrAFIAOgAqACYAPABjAJoA0QD/ABkBFwH+ANcAqwCHAHAAaABqAG8AbABjAFUAQgAxACcAIQAdABwAGgAVAAUA7v/S/7X/nP+M/3//cv9f/0r/N/8r/yn/Lv81/zj/Nf8p/xr/EP8P/xr/MP9J/13/bP90/3v/g/+S/6b/vv/Q/9r/2v/T/83/0//r/xcAUgCQAMUA5gDwAOUAyQCqAJQAjgCYALIA1ADsAPQA6wDSALIAmgCNAJQApgC6AMUAuwCXAF8AIgDu/9D/zf/f//z/FQAfABgA///i/8n/vv+9/8L/xP+0/5f/cP9J/y//K/89/2P/kf+9/+D/9P/6//X/5v/V/8D/rv+f/5X/j/+M/4z/jP+N/5D/lf+j/7z/3P/+/xwAJgAcAP7/1/+y/6H/q//J//f/JABDAFIATgBBADQALwA4AEkAXwBzAIEAhgCEAIAAgwCOAKUAxADiAPwABgH+AOUAugCFAEwAGADw/9v/2v/u/wwALAA/AEIAMAANAOb/wP+g/4n/df9h/03/PP8u/yr/Mv9D/1j/av9y/3D/Y/9Q/z//M/8x/z//W/+A/6n/0v/0/wwAHQAnAC0ANAA8AEEARgBJAEoASQBLAEwAUQBXAGMAcgB/AIkAkQCVAJUAkQCKAIAAcgBeAEwAPgAxAC4ALQA3AP3/+//+/////////wAA/v8DAPz/BAD9//z/AwD3/wQA+f8AAAAAAAABAAAAAgD+/wUA/P8FAP//AgAFAPn/BgD/////BQD7/wIAAAAAAAEA//8CAAIA//8DAAEAAgAAAAQA/v8FAP//AwD+/wUA+/8IAPv/CQD8/wQAAwD8/wgA+v8FAP7/AQD+/wQA+/8FAP7//v8EAPf/CAD3/wQA+f8AAPv//v8AAPz/AgD7/wEAAAD6/wYA+P8FAPv/AgAAAAEA//8CAP7/BQD7/wsA9/8JAPf/CAD3/woA9/8HAPf/CQD0/w0A8f8HAPz//f8BAP7//f8GAPv/CAD3/wgA+v8HAPv/AgD//wAAAAD/////AQD9/wMA/P8BAP3//v8AAPn/AAD5/wEA+//+//7//v8BAP////8AAAEAAgD+/wMA//8DAP7/AgD//wIAAAACAP3/AwD/////CAD4/woA/f8DAAMAAAAEAAMA/v8DAAAA/v8CAAAAAQADAP//BAACAAQAAwAEAAQAAwADAP//AgD/////AAD//wEA/f8AAAAA/v8EAP3/AwD/////AgD8/wIA/v/+/wEA/v///wAA/v8CAPj/BAD8/wEAAQD8/wcA+P8IAPn/AwD///7//////////f8DAPr/BgD2/wkA+/8AAAIA/f8DAP//AQD+/wUA+f8IAPz/AwD8/wYA+/8HAPv/BQD9/wEA/f8FAPv/BgD6/wMA//8EAP3/BQD6/wMA/v8CAAEAAgD//wAAAwD7/wgA/f8DAP3/BQD9/wgA+/8EAP7/AQAEAPv/BgD7/wQA/v8FAPz/BwD9/wMABAD9/wgA/v8AAAQA/P8CAP////8AAAEA/v8DAP7/AQABAP7/BQD7/wQA+/8EAPv/BQD8/wMAAAD+/wQA/////wIA/v8AAAMA/f8FAPv/AwD8/wMA/v8BAAEA/v8DAAEAAAAEAP7/AgAAAP//AwD9/wEA/P8CAP3//v8AAPv/AwD//wEAAQAAAAIAAQABAAMAAAACAAEA//8FAPz/BAD9/wIA/f8FAP3/AwD+/wMA+/8JAPj/BgD6/wEAAAD+////AAD8/wMA/P8GAPn/AwD6/wQA+f8EAPn/AgD9/wEA+/8DAPn/AAABAPv/BAD9/wIAAAAAAAAAAgD9/wcA+P8KAPf/CQD8/wAABQD8/wUA/f8DAAAA//8DAP3/BwD2/wsA8/8JAP3//f8GAPv/AAAEAPr/BQAAAPz/BgD6/wIAAQD9/wgA+P8IAPv/BAAEAP7/BgD+/wMAAgADAP7/BwD7/wcA/f8EAAAAAQADAPz/AgABAPz/BAD8///////+/wEA/f8AAP3/AgD9/wEA///8/wEA/P8BAP7/AQD8/wMA/f//////AAD//wQA+/8HAPv/BgD7/wQA+/8DAPz/AAAAAPz/AgD8/////v/8//v////5/wUA+P8CAP3//v8BAPz/AgD+/////f8EAPv/AAABAPz/AwD9/wMA/v8DAPz/BQD8/wQA/////wIA/v8CAP7/AQD//wAAAgAAAAAAAgAAAP//BAD+/wEA/v8CAP3/AgD6/wYA+f8JAPf/CAD7/wMA/v8DAP3/AwD//wEAAAADAAIAAgACAAEAAwABAAIAAAABAAQA+/8GAP7/AQAGAPv/AwD///3/BwD4/wgA/f8BAAUA+/8FAPj/CAD0/wcA+v///wEA/f8AAAAA/f8FAPz/BAD+/wMAAAAAAAIA+/8GAPj/BQD5/wMA/P8AAP7//v8BAPz/AAD9////AAD8//3/+/////3//v/+/wAA+/8DAP3//f8EAPr/BQD5/wYA+P8GAPn/BAD+/wEA/v8BAAAA/f8EAP3/AQD//wIA/v8CAAEA//8CAAMAAQD+/wUA+v8DAP7/AAD+/wQA+f8GAPv/AwD//wAAAQADAAEAAAAEAP7/BgD9/wYA//8CAAEAAQACAAMA//8BAAEA/v8AAAIA/f///wEA/v8CAAIA/P8GAPj/BwD5/wMA/v/+/wAA+/8CAP//AAD///7//v8BAP//AQD//wEAAAD//wIA//8DAP////8EAPv/BAD+/wIAAAAEAPz/BQD7/wMAAAD8/wIA+/8DAP///f8EAPr/BAD9/wAAAAD//////v/+//v/AwD3/wQA9v8FAPn/AwD///v/BwD6/wQA/v/7/wcA+f8HAPv/BAAAAAUA/P8IAPr/CgD6/wYA+/8EAPj/AwD9//z/BAD6/wQA/f8AAP7/AAD+/wMA+/8CAPz///8AAAIA/P8EAAAA/f8EAP//AwAAAAEAAwD//wQAAgD+/wUAAAD//wUA/P8IAPr/BgD+/wIAAgD8/wIA/v8BAP7//v/+//3/AgD7/wEA/f8DAPz/AwD9/wEAAQAAAAIA//8AAAIA/P8AAP///v8AAP7/AAACAP3/BAD8/wYA/f8CAPz/AwD//wEAAgD7/wYA+f8FAPv/AgD6/wEA/v/9/wQA+v8FAPr/AgD9/wIAAAD+/wMA/P8GAP3/BQD9/wMA//8AAAQA+/8HAP3/AQAAAP7/AwD+/wIA/v8BAP///////wAAAAD+/wQA+f8EAPz/AAACAP3/BAD7/wQA/f8DAAAAAwADAP//AwD8/wQA/v8AAAQA/P8FAAEAAQADAAAA/P8KAPr/BQD//wEAAQD+/wIA/f8EAPf/CQD3/wYA+f8DAP//AQD/////BQD+/wIA/v8DAP7/AwD8/wUA+v8IAPj/CQD4/wMA/v/+/wMA+/8DAP7///8CAPz/AgABAP3/BQD9/wEAAgD9/wMA/P8CAPz/AwD5/wUA+f8FAPz/AgD+////AwD9/wMAAQD//wUA+/8HAP3/AgAEAP3/BAD9/wIA///9/wQA/P8CAPz/BQD9/wMA/v8EAPz/CQD4/wgA/P8DAAAA/v8CAP7/BQD6/wgA+f8HAP3/AQACAPz/AQD7/wIA/f8BAAAA/P8GAPz/AwABAPz/BAAAAP//AwD9/wIAAQAAAAQA/v8EAP//AQAAAAEAAAD+/wIA+/8EAP////8CAP////8FAPv/BwD4/wUA+/8EAPz/AwD7/wYA+f8DAP3/AAACAPz/AAACAP7/AgADAP3/BQD+/wMAAgD//wQA/f8DAP7/AQACAPr/BgD5/wMA/f8CAP3/AgD8/wIA//8AAAIA+/8HAPv/BgD8/wQA/v8DAAAAAQABAP7/BAD6/wcA+f8GAP7//f8GAPr/BAD+/wAAAgD8/wQA+v8GAPn/AwD+//7/AwD5/wMA/f8CAPn/AwD7/wEA/v////v/AQD8/wEAAQD8/wMA/f8AAP//AwD9/wQA/v8DAP//AQADAP3/BwD5/wcA+/8GAPz/BAD8/wMA/f8EAPz/BQD7/wYA+v8EAPr/BQD9/wMA/f/9/wEA/v8EAPr/BAD6/wYA+v8DAP//AAABAP//AgD//wQA/f8BAAUA+P8KAPj/BwD7////AQD9/wUA+/8CAP3/AgD//wIA+/8FAPr/BAD+/wAAAgD9/wUA/P8EAPz/AQABAP3/AgD///7/AAACAPv/BQD+/wIAAAAAAAAAAQAEAPz/BwD8/wIABAD6/wcA+f8AAAEA/f8CAAAA/v8BAP3/AwD//wQA/f8DAAAA/v8FAPn/BQD6/wMA/f8BAP3/AQD/////AAD8/wAA/v/+/wAA//8AAP///v8BAAAA/v8BAP//AAD+/wEA/f8DAPr/AwD8/wEA/f8BAPz/AgAAAP3/BQD6/wUAAAD+/wkA+P8IAPz/BAAAAAAAAQAAAP//AQABAAMA/P8CAAEA/v8IAPr/BgD6/wYA+/8JAPj/CgD4/wUA/f8AAAMA/P8FAPz/AgAAAAAA//8AAP7/AQAAAP7/AgD//wEAAgABAPz/BwD7/wYA/f8EAP////8DAPv/CQD3/wgA+P8DAP//AAAAAAMA/P8BAAEA/f8FAP3/AQD9/wMA/P8DAP///f8HAPn/BwD7/wcA/v8CAP//AAD+/wQA/P8CAAAA//8EAP7/BAD9/wEAAgD7/wQA/P8CAP3/AAD8/wEA/P8CAP3//////wEA/v8AAP////8AAP7/AwD8/wQA/f8BAAEA/P8EAP3/AgD7/wMA/P8EAP3/AAAAAP7/BAD+/wIA/f8FAPz/AgACAAAABQD6/wYA+/8HAPr/BgD7/wQA/v/+/wIA+/8FAPr/BQD9/wUA/f8AAAQA/v8EAAIA/f8IAPz/BQAAAP//BAD+/wQA//8BAP//AgAAAAUA/P8DAP7/BAD+/wMA/f///wQA+P8IAPb/BgD5/wYA+f8DAP3///8AAP3//v8AAP//AAD9/wUA+f8IAPr/AwAAAP//AgAAAAAAAgAAAP////8BAAEAAgD9/wIA/////wIA+v8FAPz/AgD+/wIA/f8FAPj/BgD4/wUA/f////7/AAD9/wAA/f8BAP////8DAPr/BQD9/wAAAgD9/wAABAD8/wYA+/8IAPr/CAD4/wgA+v8EAP///v8DAP7//v8DAPz/BAD6/wUA+/8EAP7/AAACAP//AwD+/wIA+v8IAPr/CAD5/wUA+v8GAPv/BAD8/wUA//8CAAQA/f8EAAQA/P8EAAEAAwACAAIAAgABAAQAAAACAAQA/f8GAP7//v8DAPv/BAD9/wIA/f8DAPr/AwD9/wIA/f8BAP7/AAD+/wAA/v8CAPn/BAD7/wQA/P8BAPz/BgD7/wMA/P8CAP7///8AAP///P8CAPj/BgD6/wIA+/8BAP7//v8AAP///f8CAPr/AAD9////AAD9/wEA/v///wIA+/8JAPf/BgD+/wAABgD3/woA+f8IAP7///8GAPz/AAACAPv/BAD+/wIAAAABAAEAAwACAAMAAAAEAAEAAwABAAEAAwD9/wIA/f8DAP//AgD//wAAAgD+/wUA/P8CAAMA/P8KAPf/BgD9/wAAAQACAPr/CAD4/wQA///+/wEA/f8DAPz/AgD8/wQA/f8CAP////8FAP3/BAD//wIA//8BAAIA//8BAAAAAQD//wIA/f8DAP7/AQD///////8AAP7/BQD4/wcA+P8FAPz///8CAPz/AQD+//z/BAD4/wYA+P8EAPv/AgD7/wQA+v8EAPz/AgD4/wcA9f8JAPj/AwD6/wQA+v8EAPz///8BAPv/AwD7/wIA/v8BAAEA/f8DAP7/AwAAAP//BAAAAAEABAD//wIAAgACAP7/BAD7/wcA/P8FAP7/BAD//wIAAgAAAAIAAQABAAEABAD+/wcA/f8HAP//BgD//wcA/v8GAAMAAAAHAP//BAAEAP7/BAABAAAABAD+/wAABQD7/wYA///8/wgA9P8KAPj/BwD7/wAAAwD9/wEAAAD8/wMAAAD8/wYA+f8FAPv/AgD7/wQA/P8CAP////8AAPz/BgD1/woA9/8GAPv/AgD9/wEA/P///wAA+v8EAPj/BAD5/wYA+f8DAP//AAADAP3/AgD//wMAAAAAAAMAAQACAAIAAAAEAP7/BwAAAAIAAwD7/woA+/8JAPv/BAACAP3/CAD8/wcA+/8HAPv/BQD8/wIAAgD8/wMA+/8AAAMA9/8GAPn/AwD8/////v8BAP7/AgD+/wIA/v8CAP3/BAD+/wEAAQD//wUA/v8FAP3/AwAAAP//BAABAAMA//8EAP//AwACAP//AwAAAAUA/v8HAP3/BgD9/wUA/v8EAAIA/f8JAPn/CwD6/wcA+/8IAPj/CQD6/wYA//8CAAAAAgACAAEAAQAEAP7/BQD+/wQA/P8FAPz/AgADAPr/AwABAPz/BwD3/wcA+/8CAAIA/f8EAPz/AwABAP7/BAD8/wIA/v8AAAIA+/8FAPz/AgABAP7/AQD+/wEA///+/wEA+/8BAAAA//8BAAAAAQADAP7/BQD7/wgA+v8HAPv/AgACAP3/AwD7/wIA//8AAP//AAD+/wYA+v8GAPv/AgAAAP7/AgD+/wMA/P8EAPn/BQD9/wQAAAD//wUA/f8EAP////8FAPz/BgD3/wgA9f8LAPf/CAD6/wcA+/8HAPz/AAACAP7/BAAAAP//AAABAP//AAD///7///8CAP3/BAD7/wQA/v/+/wYA/f8DAAIA//8BAAQA//8EAP7/AgAAAAEAAAACAAEA/v///wIAAAD+/wQA/f8DAAAA/////wQA/P8DAAAA/f8DAP//AQD//wEA/P8FAPz/AgD/////AAAAAAAA/v8CAP3/AwD8/wYA+v8EAP7/AgABAAAA//8CAP7///8BAP7/AQD9/wAA/f8CAPv/AgABAPr/AwD7/wIA/P8DAPr/CAD5/wMAAwD5/wkA9/8HAPr/AQAAAPz/AQD+/////v8BAPv/BAD8/wUA+/8GAPv/AwABAPv/BAD8/wEAAgD8/wQA+v8DAP7/AgAEAAAAAAAEAAAAAQABAP7/AgACAP3/BgD6/wYA+P8GAPj/BQD+/wIAAQD9/wIAAQAAAP//AAADAPv/BgD6/wQAAAD8/wQA/f8CAP//BQD4/wgA+v8DAP//AAD//wAAAAAAAP//AQD+/wAA////////AgD9/wMA+/8FAAAA//8EAP3/BQD7/wcA9/8KAPf/CAD8/wIA/v8FAP3/BQD7/wYA/f8BAAEAAQD+/wMA//8DAP7//v/////////+//7//v///////v8CAPv/AgD6/wUA+v8BAP//+f8EAPr/AgD///v/BQD8//7/AQD9/wEA/f////7//f8BAPz/AwD7/wUA+P8GAP3/BgABAAIA//8BAAYA+v8LAPj/CQD7/wMA/f8EAP3/BAD//wEA//8BAAAAAwD+/wMA/f8EAP3/BQD+/wEA/v8CAPv/AwD7/wEA/v8AAP7/AAD/////AwD+/wIA///9/wMA/P8BAAMA+P8IAPf/BQD8/wEA/P8CAP3/AAD9//7/AQD6/wYA+f8EAP3/AgACAAEAAgACAAIAAgACAP7/BwD+/wQAAAAAAAEAAgD/////AgABAP//AwD//wIAAQD+/wMA//8AAAMA/P8EAPv/BgD5/wcA+/8DAP3/AgD8/wIA/P8DAP//AAD9/wMA+/8EAPv/AQAAAPz/AwD8/wMA+/8AAP///f8BAP///f///wAA+v8AAP7//v8CAP7/AAAAAAAAAAABAAMAAAABAAUA/P8GAAAA/v8GAPz/BwD+/wMAAgAAAAQA//8GAP7/AgACAP7/BAAAAAQA//8EAPz/AgAAAP//BAD4/wcA+f8GAPv/AQAAAP//AQD9/wAAAQD////////9/wUA+/8BAP///v8BAP7////9/wAA/f/+/wMA/P8AAAEA+/8FAP3//f8DAPr/AgABAP3/AgD+/wEA//8CAP7/AgD//wAAAAABAAAAAgD//wEAAgAAAAMAAgACAAQA//8FAP//BAAAAAEABgABAAEABQD+/wIAAgD9/wQA//8CAP7/BQD7/wYA/v8GAP3/BQAAAAIAAAADAPz/BgD5/wkA9/8KAPX/BwD8/wAAAQD/////AAD9/wAA+/8GAPj/CAD5/wEAAQD6/wMA/P8DAP3/AQD9/wAA///7/wMA+f8CAP3/AAD9/wMA/P8FAPr/BAD+/wAABgD5/wYA/f8BAAIA+/8FAPn/BQD9/wAA/v8CAP7/AwD7/wMAAAABAP3/BQD6/wYA/v8AAP//AAD7/wUA+f8IAPj/BAD8/wIAAAABAP3/BAD7/wgA/f/+/wQA+v8LAPn/BwD8/wcAAQABAAQA/P8FAAAAAgABAAAABAD+/wUA+v8JAPn/CAD3/wgA9/8HAPn/BQD7/wAAAAD+/wIA///+/wEA/v/+/wEA/v8AAAEA+/8EAP3/AAADAPv/AgD///7/AAD9/wMA+f8EAPr/AgD7/wIA+/8EAPj/AwD6/wEA+/8BAPr/BgD7////AgD4/wUA+v8AAP///f///wAA/P////////8BAPz/AgD9/wIA/v8AAP7/AQABAAAAAgAAAAIA//8CAAEA//8DAP//AQABAAEAAAABAAAAAgD+////BAD7/wYA/P8BAAAA//8BAP//AQACAP3/AwD+/wIA/f8AAP3/AQABAAAA//8EAP7/AQAFAPz/BQACAP3/BgD7/wUA//8CAAEAAAD+/wQA+/8FAPz/AgD7/wMA/f8CAAIA+f8GAPj/AwAAAPz/AgD8/wIAAAD//wAA/f///wAA/P8CAP////////3/AwD8/wQA/f8AAP7/AQD+/wIA/v8BAP7/AAD+/wEAAAD+/wAA+f8DAPv/AAD9//z/AgD5/wQA+P8FAPf/BQD7/wMA//8BAP//AgD+/wEAAAAAAAAA/f8BAP//AgD9/wQA/P8DAP7/AAABAP3/AQD+/wIA/f8DAP3///8EAP3/AwAAAP3/AQABAP3/AwD+/wEA/v8EAPn/CAD6/wYA/v8BAAAAAwABAP//BQD8/wQAAAABAAQA/v8GAP3/BQD7/wIA/v8BAAAA//8AAAEA+f8IAPr/BAAAAP7/BAD9/wIA//8AAAQA/P8EAP3/AwABAP7/BQD6/wUA/f8BAP7/AAABAP7/AQAAAP//AQD9/wAAAgD+/wEA/v/+/wAA///+//7/+/8AAP7///8BAPv/AgD/////AQD//wMA/v8CAP3/AgD+/wIA/v8BAP7/AAD+/wAA/P8BAPz/AwD6/wQA+v8FAPj/BQD7/wQA/f8CAP3/AAD//wAABAD7/wQA+v8DAPv/BQD8/wAAAAD9/wIAAQD//wMA/P8HAP3/AwADAPv/CQD5/wYAAAD+/wUA+/8FAP7/AAD/////AgD9/wUA+/8DAP//AgABAAEAAQAAAAMAAAD//wEA/f8CAAAA/P8DAPv/AwD9/wEA+/8GAPn/BQD8/wQAAAAFAP//AgAAAAMA/v8FAAIA//8HAPv/BgD7/wgA/P8BAAIA/v8DAAEA/P8FAP3/BgD6/wcA+f8KAPb/CgD2/wcA+P8FAPX/BAD5/wEA/v/8/wEA/f8BAPv/AwD9/wEAAwD8/wUA/P8FAPz/BQD+////AwD9/wMAAAD8/wQA/v///wIA9v8FAPr/AAD///z/AwD7/wQA/P8CAAMA+/8HAPz/BQD//wEAAAABAP//AwD9/wEAAQABAAEAAQABAP7/BQD//wIABAAAAAQAAQAFAAEAAwD//wAAAwABAAAA///8/wIA/f8EAP3///8AAAAA/v8GAPr/BwD//wEABAD9/wgA+/8GAP7/BQD//wIAAgD9/wMAAAD+/wQA/P8FAPv/BQABAAEAAwD9/wQA/P8FAP3/AgD//wIA/////wYA9/8JAPb/CAD4/wQA+v8IAPj/CQD7/wMAAAACAP7/BgD//wMAAgABAAEAAQAAAAEAAAACAAEAAwAAAAMA//8DAAAAAwAAAAIA//8AAAMA/v8AAP///v8CAP7//v8DAPz/BAD8/wMA/////wMA//8BAAEAAAD//wAAAQD+/wUA/v8AAAIA/////wQA/f8BAP////8AAP3/AwD7/wIA/P8EAPv/BgD5/wQA/P8DAPz/BAD+/wMA/////wQA/f8FAP7/AAAFAPv/BgD+////BQD6/wkA/f8CAAEA/v8EAAQA//8FAP3/BgD//wMAAAD//wIAAQD//wEA//8DAP3/AwD8/wUA/v8BAP//AwD//wAAAgAAAAQA/P8FAP//AQADAPz/AwD+/wIA/v8AAP7////+/wIA+v8HAPj/BgD8/wMA//8BAAEAAAD//wIA//8DAP//AAADAP3/BAD8/wgA+v8EAP7/BAD+/wUA/P8HAP3/BgD+/wUAAgD//woA+f8LAPv/BgD//wIAAQADAAEAAwD//wEA/v8BAAAA//8AAP//AgD///7/AAD8/wYA+f8FAPv/AQABAP7/AQD9/wMA/P8AAP7//v8DAP3/AgD9/wEA/P8EAPz/AgADAPv/BwD8/wUA/v8BAAAA/f8AAP////////7////+/wAA/f8EAPz/AgD+/wAABAD8/wQA/v8EAAAAAgAAAAIAAQD//wMAAQABAAMA//8DAP7/AgABAAEAAwD+/wYA/P8IAPv/BwD9/wMAAAD//wQA/P8EAPz/BAD9/wIA+v8DAPz///////3/AAD9/wQA+/8EAPz/BQD8/wQA+v8DAPz/AwD9/wEAAQD9/wYA+/8FAPr/BAD+/wAAAQD//wAAAQD//wEA/v8CAAIA+/8IAPb/CQD3/wUA+/8CAP3//////wAA//8AAAEA/f8CAP7/AAADAP7/AAAAAP//AQAAAPr/BAD7/wIAAAD6/wQA+/8DAPz/AAD+/wIA/f8CAPz/AAD9/wAAAAD+/////P8AAP3/AQD9/wEA/P////3//v8AAP3//P////3///8BAP7/AwD///7/AQD8/wUA/P8DAP7/AAAAAAEA/f8BAP//AwD8/wQA/v///wQA/v8BAAQA/f8GAP7/BAAAAAEAAAD+/wMA/P8DAPr/AwD+////AQD8/wYA+f8GAPn/BgD9/wIAAAD7/wcA+f8HAPf/BgD2/wcA+f////7//P/+//7//P////3////8/wAA+f8CAPr/AwD8////AQD7/wQA+/8AAP///v8BAP7/AAD9//7/AgD9/wQA+/8CAP////8DAPz/BgD4/wQA/P8CAP//AAD9/wAA+v8GAPX/BAD7/wAA//8BAPv/AwD9/wIAAQD8/wQAAAD+/wQA+v8HAP3/AwABAP7/BAD9/wIA/f8DAPz/AAD9//3//f8AAPn/BAD8/wAAAAD8/wEA//8CAAEAAAAAAP7/AQD9/wMA/f8BAP3/AAD//wIAAQD8/wYA+v8FAPv/BQD8/wUA/f8BAAAA/v8BAAEAAgD+/wEA/v8BAAEA//8AAP7/AAABAP//AQD//wEAAAD//wQA/v8BAP7/AQAAAAEA/f8AAP///f8CAPz/AAAAAPz/AQD+//3/BAD5/wYA+v8EAPz/AQACAPz/AgAAAP3/BAD7/wEAAwD+/wEA/f8AAP//AgD6/wQA+/8CAAEA+/8FAPr/BwD8/wMAAgACAP//AwD7/wYA+/8FAP3/AAAEAPv/BgD8/wQAAQAAAAEAAQAEAP3/AwD+/wAABAD9/wUA/v8CAP3/BwD6/wQA/f/9/wQA///8/wUA+v8CAAEA/v8DAPz/BAD9/wUA+/8DAP//AgD/////AgD6/wYA+/8FAPz/AwD//wEA/f8BAAIA/v8FAPr/BwD7/wUA+/8GAPz/BgD//wEAAgD//wMAAQD+/wMA//8DAAMA/f8FAPz/BwD9/wIABQD8/wYA/////wUA/v8DAP7/AQAAAAAA//8EAPr/CAD6/wUA+v8HAPj/CwD3/wcA+v8FAP7/AgAAAAAAAQABAAAAAQD9/wIA/f8CAP3/AwD6/wMA/P/+/wMA+f8FAPz/AAAAAP3/BAD9/wQA/f8BAAEA/v8BAP//BAD8/wUA/v8AAAMA/f8CAAIA/f8FAPr/AgABAPz/BwD3/wcA/f8CAAEAAQADAP//BAD9/wQAAAAAAAUA//8BAAMA//8EAP7/BAD+/wIA//8EAAAABQD9/wMAAgD+/wIAAQD9/wMA//8BAP//AwD9/wEAAgD8/wcA+/8GAP7/AwACAP7/AgABAAEAAAABAP7/AwD/////AQD+//////////7////8/wQA/P8FAPv/BAAAAAEAAwAAAAMAAAD//wQA/v8EAP///f8FAPv/BgD6/wIAAwD6/wgA+/8EAAAA/P8HAPz/BgD5/wYA/P8HAP3/AQADAPr/BQD8/wAABAD5/wQA+/8BAAIA+f8GAPj/BwD8/wIAAAD//wAA//8BAP7/AQD///7/AgAAAAIA/f8EAPv/BQD9/wMA+/8GAPv/BgD8/wEAAwAAAAQA//8EAPz/BgD8/wMA/////wEA/////wAAAQD9/wAAAgD//wUA/f8BAAAAAQACAAIA//8GAP7/AwD9/wQA/f8GAP3/AwD//wIA///+/wQA+P8KAPb/CQD4/wcA/f8DAP//AwD+/wEAAAAAAAAAAgD9/wIA/////wAA/v8CAP7/AQD//wAAAwD+/wEAAAABAP//AAD+/wMA//8BAP//AAADAPr/BQD8/wIA//8BAP//AQD//wEAAQD//wIAAgD+/wIAAQAAAAMA/f8FAP//AwD+////BAD4/wgA+P8EAP7/AgD9/wUA/P8GAP7/AAADAP7/BQD9/wYA//8CAP//AQD//wAAAQD7/wQA/f8CAP3/BAD//wIA/v8EAPn/CAD8/wMA/v8DAP3/BwD3/wcA+P8GAPv/AQD+////AQD+/wMA/P8EAP7/AQACAP//AQAAAP///f8CAAAA/v8FAPr/BgD9/wQA/P8GAPr/CAD+/wAAAAAAAP//AgAAAAAAAgD+/wUA/f8DAP7/AwD+/wMA/////wAAAQD9/wMA/v///wcA+P8KAPr/AgACAP7/AgD9/wUA/f8DAP//AAD//wAA/f8BAP7//v8AAP7//v8AAP7///8AAP///f8DAPz/AQD9/wEA/v8AAP7/BQD9/wMA/f8EAAAAAgD//wQA/f8GAP7/AgADAP3/BQD+/wYA/P8EAP//AAAEAP3/BQD9/wAAAAAAAAIA+/8CAPz/BAD+/wIA/v8DAP3/BAABAP//AgD//wEA/v///wAA/v8AAP///f/+//////8BAPz/AQD+/wQA/P8EAPz/AwD+/wAA//8BAP//AgD+/wEA/v8BAP7/AAAAAAEA///9/wIA+/8GAPv/AgD9/wEAAAAAAAIAAQD//wQAAAAEAP//AgAAAAMA/v8AAAQA+v8HAPf/CAD7/wUA/P8CAAAAAgD//wQA+/8FAPz/AwD//wAAAgD8/wQA+/8DAAAA/f8CAAAAAAAAAAEA/P8GAPj/CAD7/wIAAQD9/wUA/v8BAAIA//8AAAIA/f8BAP///v8AAAAA/P8AAAEA/f8FAP3///8EAPr/BwD4/wUA+////wAAAAD+/wEA/P8CAP////8BAP3/BAD6/wYA/P8GAPz/BAD+/wQA//8BAP7/BQD8/wYA/v8DAAAAAQD//wEAAAADAP7/BQD7/wYA//8BAAIA+v8JAPj/CAD4/wYA/P8CAP3/AAD9/wIA/P/+/wEA/v8BAP7/AAD9/wUA+v8GAPv/AwD+/wEA/v///wEA/f///wIA+P8JAPn/AgACAPz/BgD9/wEA//8AAAIA/v8DAPz/AQAAAAEA/P8HAPj/CQD1/wsA9v8JAPr/AQD///7/AgD//wAA/v8BAAAAAgD+/wAAAgD8/wYA+v8EAP7/AwAAAAEA//8BAP7/AgD+/wIAAAABAP////8BAP/////+/wEA+/8CAPz/AgD9////AQD+/wIA/P8CAP3/BQD6/wQA+/8CAAIA/f8EAP//AgAAAAEAAAACAAAAAwD9/wQA/P8EAP//AAD//wIA//8AAAMA+v8GAPj/BgD9/wAAAQD8/wgA9/8IAPr/BQD7/wQA/f8DAP3///8AAP7/AQD+/wAAAAD+/wAA/v/9//7/AQD6/wMA+v8DAPv/AwD6/wMA/f8DAPz/AgD//wIAAAD+/wUA+/8KAPn/AwD///7///8AAP3/AAAFAPn/CAD4/wUAAAD9/wEA/f8BAP3/AQD+////AAD//wMA/P8FAPn/CAD8/wUA//8CAAQA/v8EAPz/BwD4/wkA+/8GAAIAAQD//wcA+/8IAPz/AQACAP7/BAD/////AgD8/wAA/v/+/wEA/v//////AAD9/wIA+v8CAP7//v/9/wEA+/8BAPr/BwD5/wYA+P8EAPz/AwD8/wUA+/8FAPj/CAD6/wUA+v8EAPz/AwD6/wMA///8/wQA+/8CAPz/AAD8/wMA/f8AAAEA+v8EAPr/AwD+/wAAAAAAAP///v8EAPv/AwD//wEAAgD+//7/BQD8/wgA/P8BAAQA/v8BAAUA/P8GAP//BQAAAAUAAQD+/wcA/P8BAAQA+v8HAPz/AgABAP3/BwD5/wYA/P8EAP7//P8HAPX/BgD6////AwD3/woA8/8KAPn/AAADAPv/BgD9/wEA/P8BAAAA/f8CAPr/AwD9/wAA/f/+/////P8BAPr/AAD9//3//v/8////+v8DAPn/AAAAAPz///8CAPz/AwD+/wIA/f8EAPz/BgD9/wgA/f8FAP7/BAD//wYA/P8DAP//AwD9/wUA//8EAP//AgACAAEA//8BAP///v8DAPr/CAD5/wYA/f8EAP7/BgD7/wUA/P8CAAAA/P8CAP///f8DAPz/BAD+/wMA+v8EAPr/BAD+/////v///wIA//8BAAEA//8CAP7/AAAAAAAAAQABAAAAAwD9/wUA/f8GAP3/BwD4/wgA/f8DAPz/AwD8/wIA///8/wgA+P8IAPv/AQAFAPj/BwD9/wAAAAACAPr/BgD6/wAAAgD6/wMA+/8BAAAA/f////7///8AAP///v8BAAAA/f8DAP//AQAAAPr/BAD8/wIA/v/8/wUA+f8GAPr///8BAP3/AAABAP//AwD+/wUA/f8EAAAAAgABAAIAAAAGAP//AAAFAPz/BQD+/wMAAgD+/wIAAQAAAAcA+/8GAP3/BQD//wEAAgD9/wEA/v8CAAIA/v8BAP7//v8BAP3/AAAAAAAA+/8EAPn/BAD7////AwD9/wIAAAAAAAIA/////wIA/v8FAPr/AwD9/wIA/v8DAAEAAQADAAEAAAAEAAAAAQABAAAAAAAEAPz/CgD5/wsA+v8HAP//AgABAP3/BAD+/wEAAQAAAP//AgD+/wQA//8CAP//AwD6/wYA+/8EAP///v8CAP7/AwD9/wMA/f8BAP//AAD+/wMA/f8BAP////8BAP//BAD7/wQA/f8DAAAA/f8CAAAAAgD/////AgAAAAMA/P8EAPn/BwD5/wUA/v8BAAIA/v8AAAMA/v8FAPr/CQD4/wkA/P8CAP//AQD+/wQAAAABAAMA+/8IAPz/BwD8/wcA+/8KAPv/CQD7/wUA/v8BAAEAAQABAP//AwD9/wAAAgD///3/BQD6/wQAAAAAAAAAAwD9/wEAAAAAAAEA/v8BAAAAAQADAAAAAAAEAPr/BAD+/wAAAgD9/wIA/v8CAAMA/v8DAPv/BQD7/wUA/P8EAPv/BAAAAAMA/v8AAP3/AgD+/wMAAAACAP7/AAACAP//AAADAPz/BgD7/wYA+/8GAP////8CAPv/CAD5/wMA///9/wQA+/8GAPr/AwD9/wAAAgAAAAEAAAABAAAAAwD+/wQA+/8HAPb/DAD0/wgA9/8DAPv/AAD9/wIA+/8DAPr/BQD8/wEA//8CAP//AQD+/wAAAwD6/wcA+/8DAP////8BAP//AQD//wIA/v8DAP7/AgABAAQA//8BAAIAAQAEAAAAAgABAAQA/v8GAPr/AwABAP3/BAD9/wIAAgD//wMAAQAAAAEAAwAAAP//AAACAP//AAD+//7/AAAAAP7///8BAPr/BgD6/wQA/f8AAAIAAAADAAIA//8CAAAAAAAEAP7///8AAP3///8AAPz/AgD9/wAAAwD8/wQA/v///wIA+/8DAAAA//8BAP3/BQD9/wQA/v8DAP3/BAD7/wAAAQD7/wQA+f8FAPn/BgD2/wYA/P8AAP//AQAAAAAAAQD//wIA//8CAP3/BAD+/wAAAAAAAAIA+/8EAPr/BAD+/wIA/f8FAP3/AwD/////AgABAAIAAAAAAAAA//8BAAIA/f8CAP7/AgD//wIAAQADAAAAAQADAAMAAAADAAAAAgADAAAAAAAFAP7/AgD//wUA/P8FAP7///8EAP7/AwAAAAIA//8BAAIA//8FAP7/BAD9/wIAAQABAP//AgD9/wUA+v8DAPr/AgD8/wIA/P/+/wEA/v8AAP////8CAP3/BQD6/wIA/v/8/wMA+f8EAP3///8BAPz/AgD8/wIA/P8CAP3/+/8EAPn/AgD9//////8AAP7/AwABAAAAAwD+/wIAAQD/////BAD8/wQA/f8BAP//AwD//wUA/f8EAP//BgD/////BAD//wIA//8DAPz/BQD7/wYA+/8EAP3/BAD//wIA/P8DAPz/BAD8/wQA/f8BAAQA/v8DAAAAAQABAAAAAQD+/wMA/f8EAPz/AgAAAAAA//8DAP7/AgD//wAA/f8GAPn/BQD7////AwD7/wIA/P8AAP//AQD8/wMA/P8DAP3/AwD9/wcA/P8DAAIAAAAEAAIA//8CAAQA+/8FAPz/AAD+/wIA/f8BAP///////wEA+/8FAPz/AQD/////BAD9/wQA/////wIA//8DAPv/AwD6////AAD7/wIA+/////7//v8AAP7/AQD8/wMA/P8AAP7//f8CAPv/AwD9/wAAAAD9/wEA//8EAPr/BAD9/wAAAgD9/wIA/f/+/wIA/v8AAAAA/v8BAAQA+/8GAPz/AQADAP///f8GAPj/CQD4/wUA///+/wgA+v8GAAAAAAADAP///v8DAAAA//8EAPj/BwD8/wMA/f8AAAIA/P8EAPr/AwD9///////7/wQA+f8DAPz/AQAAAP///f8DAPr/BAD9//7/AQD8/wIA+/8DAPv/AQD+/wMA/P8FAPr/BgD9/wMAAQD//wIAAgD+/wIAAAD9/wIA/P8BAP////8AAP7///8AAAAAAQAAAAEAAQD8/wIAAQAAAAAAAgD7/wQA///9/wYA//8BAAEA/v8DAPz/AQAAAP3/BAD9//7/AgD+////AwD8/wAA///+/wAAAAD9//3/AQD4/wQA+P8EAPj/AgD9////AAAAAP//AAD+/wEA/v8AAP///f8FAPn/BQD9/wMAAAABAAAAAwD//wMA//8AAAEA/v8FAPz/AwD/////BgD7/wMA/f8BAP7/AgD+/wIA/v///wEA//8CAPz/BQD4/woA9f8NAPb/CQD8/wIAAQABAAEA//8CAP3/BQD5/wQA+v8BAP///v/+//7/AQD9/wAA/f8AAP7/BQD5/wQA/P8CAAAAAQD//wIA+/8CAP7/AAADAPj/BwD7/wIAAAD9/wMA/f8BAP//AAD//wIAAQAAAAIA//8CAP/////+/wMA/P8CAP7///8BAPv/BAD4/wQA/v/////////+/wAA/f8AAPr/AgD9//7/AAD8/wEA/P//////+/8CAPv/AgD5/wIA+f8BAPv/BAD6/wQA/f///wUA9/8GAPv/BwD6/wQA/P8BAAQA+v8HAPv/AAAFAPn/BwD8/wIAAAAAAAIA/f8DAPz/AgD+/wAAAgD7/wUA/f///wIA/P8CAP///P8DAPz///8BAPr/BQD7/wEA///+/wMA/v8AAAIA/P8EAP3/AQACAP7/AQD//wAAAQD7/wMA/P8EAAAA+v8BAPz/AgD+///////+/wEA/v/8/wQA+f8GAPr/BQD6/wMA//8AAAEA//8AAAEA/P8FAPr/BwD9/wAAAgD7/wQA/f8BAPr/BAD5/wUA/f8AAAIA/P8AAP//AgD9/wMA/P8AAP7/AAD7/wQA+v8DAPz/AQD9/////f8BAP7/AAD9/wAA/f8AAPv/BAD8/wMA/P8EAP7/BAD//wEABAD+/wUA+/8FAP//AQABAAAAAQD//wQA/P8AAAEA/f8BAAUA9/8JAPr/BAAAAAUA+v8HAPv/BQD9/wQA/P8CAP7/AQACAPr/BQD8/wQA/////wIAAAADAP3/AwD//wEA//8BAP//AgD+/wMA/f8EAPz/BAD//wIA//8DAPz/BAD8/wUA/f8BAAAA/v8BAP//AAD+/wIA/P8DAP7///8AAAEA/f8EAPv/AQD8/wIA/f8BAP3/AQAAAAMA/v8EAPv/BgD7/wUA/P8EAAAAAAADAP//AwD+/wIAAAADAP//AwD//wIA/f8EAP7/AgD//wAABAD//wMA//8EAP//BAAAAAMABAAAAAIA//8DAAAABAABAAEA//8CAP7/BQD+/wIAAQABAAEAAQACAP//AQABAAEAAQD+//3/BAD8/wQA/f8AAP7/AwD6/wUA+v8FAP3/AwD9/wQA/P///wIA+v8FAP3/AAD//wIA+v8IAPr/BQD9/wIAAQAAAAYA/P8IAP3/BgAAAAIAAwACAAMAAAAEAP7/BQABAAUAAAAEAP//BgD+/wUA/v8EAAIA//8GAP7/AwABAAUA/f8HAPn/CQD6/wYA/v8CAAEA+/8IAPf/CAD4/wcA/P8EAPn/BQD5/wkA+v8GAPr/BwD6/wcA+P8GAPf/BwD4/wUA+/8AAAAAAwD7/wcA/f8FAAEAAQAEAAAAAAACAP//AwABAP7/BQAAAAQAAAADAAEAAAAFAPz/CAD6/wkA+f8IAP//AQAEAP7/BwD//wkA+v8JAP3/AwD+/wAAAQD9/wIA/f8BAP3/AgD//wAAAAD+/wEA/P8JAPf/CAD4/wQAAAD8/wQA9/8GAPv/AgAAAP3/AAADAP3/AwD9/wQA/P8FAP//AgAFAAEAAwADAAMABAAAAAcA/f8HAAIAAQAIAP//CAD7/wkA/v8CAAcA/P8HAP//AQAGAPz/BwD8/wYA//8BAP//AwD//wMA/v8DAP//AAAAAPz/BwD4/wUA+/8CAAMA+/8GAPj/BwD8/wQA//8DAP7/BAD8/wMA/f8CAAAAAAD//wEA+/8FAPr/BAD5/wUA+/8AAAEA+/8EAP3/AwD9/wIA/v8CAP3/AQD9/wMA/P8FAPv/BQD+/wAABAD9/wMA/f8FAPz/BAAAAAAAAgAAAAIAAgACAAIAAgACAP//AQADAPr/CAD8//7/AQD7/wIAAQD7//7//v8AAP/////8/wMA+/8BAP7//P8GAPz/AgD+/wEABAD8/wcA+/8EAAAA/v8HAPv/BwD+//3/+/8AAPn/BQD7/wEAAgD8/wMA/f8CAPv/AQD6/wIA+f8CAP3/AwD//wEAAgD+/wQA//8CAAEAAgACAP3/BgD5/woA9/8JAPb/CQD7/wMA/v8DAP//BQD8/wYAAAABAAMA//8EAP//BQD9/wIAAwD8/wgA//8BAAIAAwD9/wkA9/8JAPj/BwD7/wQA/v8BAAAA/v8CAPz/AQD///v/AgD4/wEA+/8AAP///v/8/wQA+/8AAAAA+/8FAPv///8CAAAAAQAAAP//AwD//wUA///+/wUA+v8FAPr/BgD6/wMA/f8AAAEA/P/////////+/wAA/f8FAP3/BAD+/wAAAQABAP7/AwD8/wMA/v8AAP3/BAD6/wcA+f8AAP///v////z/+//9//7//v/8/wIA9/8HAPn/BQD9/wIAAAABAAAAAAACAAAAAgD9/wQA/f8FAPz/AgABAP7/BwD6/wgA/f8FAAEAAwABAAQA/P8IAPz/AgD/////AgAFAPz/CAD8/wgAAAAHAAIAAwAEAPz/BgD7/wMA/f8BAP///v8BAP7///8EAPz/BgD6/wUA/f///wEA/f8AAP//AAD+/wAA/v8AAPz/AAAAAP3/AwD9/wMA//8AAAEA+/8HAPb/BQD9//3/AQD///3/AwD8/wAABQD4/wgA+v8EAP//AAABAAEA/f8GAPv/BQD5/woA9/8KAPr/BAD/////AAAAAAAAAgD9/wIA//8BAAIA//8BAP3/AgABAP7/CAD5/wYA/f8AAAMAAQACAP3/AwAAAAIAAwD9/wMA//8BAAEAAQD//wAAAwAAAAIAAgAAAAIAAgACAAMAAQAAAAEAAQD+/wEA/v8BAP//AwD8/wQA/f8DAP//AgD+/wEA//8BAP3/BAD8/wQA/v8BAAEAAQD+/wIA/v8BAAEA//8DAP3/AAAAAAEA/v8DAP7///8FAP7/BAAAAAEA/v8FAPz/AgD//wAA/v///////v/+/////v8DAP//AgD//wMAAAACAAMA/v8EAAAA//8GAPv/AgABAP3/AgACAP7/AQACAPz/BQD//wEAAAD9/wEA+/8IAPX/CQD2/wYA+v8IAPf/BQD6/wEA/v8AAPn/BAD6/wQA/P////z//////wEA///+/wYA+f8HAPz/AwD//wIA/v8CAAAAAwD9/wIAAAACAAAAAgD9/wUA/P8FAP3/BAD8/wMA/P8CAAAA//8AAAIA+/8GAPv/AwAAAP7/AgD+/wIA/P8FAP7/AwD9/wMAAQADAAEAAwD+/wgA+/8HAP//AwABAAEAAQACAAEAAQABAP7/AgD9/wQA/P8BAPz/AAD+/wMA+f8FAPn/BQD6/wQA+/8CAPv/AAD+/wEA/f8BAP7/AQD8/wMA+/8FAP3/AQACAAIA/P8HAPj/BQD8//7/AgD9/wAA/f8AAPz////9//r//v/9//3/AgD6/wAA///+/wAA/f8EAPj/BQD8/wAA///9/wMA/v8BAP7/AwD9/wQA/v8BAAIA/////wMA/v8CAPz/BQD7/wUA/v8BAAIA//8CAP//AgAAAAAA/v8DAPv/AgD9/wEAAgD8/wYA+f8IAPr/AgACAP3/BAD8/wUA/P8FAAQA/P8JAPz/BAACAAEAAQAAAAMA/P8GAP7/AgADAP3/AwD+/wEAAAABAP7/BQD+/wMAAAD///7/AgD5/wIAAAD6/wUA+v8CAP7/AQD//wQA/P8EAP7/AwAAAP7/AgD+/wAA///8/wAA//8BAPr/AwD8/wAA///8/wAAAQD5/wIA9/8BAPv/AAD8/wMA+v8CAPz/AQD///7/BAD3/wkA9/8FAPn/BAD//wEA/P8FAPn/BQD9/wIA/v8BAP//AgD9/wYA+/8FAAIA//8CAAEA/v8AAP//AAAAAAEA/v///wAAAAABAAEA//8FAPv/CgD4/wwA+P8IAP//AgADAP7/BAAAAAQA/f8FAPz/AgD9/wYA9/8GAPr/BAAAAP//AQACAPv/AwD9////AgD6/wIA/P8AAAIA+/8FAPn/AgD//wAA//8DAPv/BAD+/wEABAD7/wUA+/8HAPv/AwD9/wQAAAADAP7/AQD//wMA/P8AAP///v8BAP7/AgD8/wQA+P8IAPn/BgD4/wUA+v8BAPr/AQD7/////P////3/AgD8/wEAAAAAAAEA/P8CAP7/AgD//wEAAAADAAMA/f8HAPv/CAD+/wAABAD5/wAAAAD8/wEA/v/8/wUA/P8BAP3/AQD+/wMA+v8EAPr/AgD//wAA//8CAAEA/f8FAP3/AwACAP7/CAD7/wUAAQD//wUAAQD8/wkA+f8IAP3/AAAFAP3/BAD7/wMA/f8CAPz/AQD7/wAA///8/wIA/P8FAPn/BQD6/wYA+/8IAPn/BgD7/wUA/P8AAP///f8BAP7/AQAAAAAAAQD+/wYA+v8FAP3///8DAPz/BwD4/wgA+P8FAPr/AwD7/wAA/v/+/wIA//////7/AAD//wEA//8CAP3/AQADAP7/BgD7/wMAAAABAAEA//8BAAIA/////wIA/f8EAPv/BQD8/wIA/v/+/wEAAAAAAP3/BAD6/wIA/v8AAAEAAwD5/wYA+/8GAP3/BgD+/wQAAAD//wIA/P8GAPz/BwD7/wYA//8GAPz/AgACAAEA//8FAPv/BgD7/wQA/P8EAPb/CwD2/wcA+P8DAP7/AgD+/wIAAAACAP//AAAEAPv/BAD+/wAAAwD+/wEAAAABAPz/AwD7/wEAAgD8/wYA9/8FAPz/AgACAPz/BgD8/wMA/v8CAP//AAD///3/AwD6/wQA+f8GAPj/CQD2/wcA/P8BAAIA/v8FAP3/BQD8/wYA+/8IAP3/AAAEAPr/BgD4/wUA/v8BAPz/BgD8/wIAAQAAAAEAAwD+/wIAAAAAAAMA+f8IAPr/BAABAP3/BAD//wAAAQACAPv/AgD7/wEA///9/wQA/P8EAPz/BAD+/wEAAgD//wAAAwD6/wcA/P8EAAIA/v8DAAAAAAACAAAA///+/wEA/P8FAP3/AAACAPz/BAABAP7/BAD6/wUA+f8IAPj/BgD7/wIA/f8BAP7/AgD+//3/AgD//wIA//8CAAAAAgABAAEAAgABAAAAAQABAP7/AgAAAP3/AgD+//3/AgAAAP3/BAD4/wgA+P8IAPz///8EAP7/AgACAP7/AwABAAAAAQABAAAAAAAAAAAAAAABAP//AAABAP//AgD9/wIAAAD+/wIA/v///wAA/v8AAP/////+//////////3/AQD8/wAA/P8BAP7//P8AAP7/AQD//wAA/f8CAP3/BQD8/wMAAAABAAIA/v8EAP3/BgD8/wMAAAD//wMAAAD9/wQA/P8DAP//AgD+/wIA/v8AAP3/AwD9/wYA+P8CAPz/AwD///7/AQD8/wYA+P8HAPj/CAD6/wUA/v8CAAEAAAAAAAIA/v8GAPr/BwD3/wQA/f8BAAAAAAD9/wIA/f8CAAAA//8AAP7/AgD+/wIAAAD+/wUA/P8EAPz/AAAEAPn/BwD4/wUA+v8GAPr/AwABAP//AgD//wAABAD+/wMAAAAAAAQA/////wIA+/8CAAAA/P8DAAAA+/8IAPX/CQD8/wMA//8CAP//AQABAP3/AAD+/wEA//8AAP7/AAD+/wEA///9/wAA/f/+/wEA/f8BAP///f8DAP3/AQD9/wQA+f8GAPn/BgD6/wEA/f8BAP3/AwD4/wcA9/8JAPf/CAD5/wYAAAD//wYA+v8IAPz/BQD9/wQA/f8EAPv/BQD//wEAAAD//wEAAgABAAAAAwD8/wMAAAABAAIAAQD+/wIA/f8DAP//AAABAP7/AwD//wAA//8BAPz/BAD7/wMA//8AAAEAAQACAP7/AwD9/wYA/P8GAPz/AwD+/wIAAAD//wEA/v8AAAAA//8CAP7/AgD7/wYA+/8EAP///////wEA/v8AAAMA+/8DAAAAAQABAP//BgD5/wkA+f8BAAEAAQD9/wUA+/8FAP//BAD8/wUA+/8DAPz/AwD9////AAD8/wAA/v/9/wQA+P8FAPz/AgD///z/AwD9/wIA////////BAD8/wMA/P8DAP7/AQD9/wEA/f8EAPv/BAD+//7/BAD+/wIA//8BAAIA/P8HAP3/BQD+/wEAAAABAAAAAAACAP3/AwD6/wYA9/8IAPv/AQACAAAA//8DAAAAAAAGAPn/CgD9/wQAAQD//wIAAwD+/wMAAAABAP7/BgD8/wcA+f8GAP7/AwABAP7/AQD9/wQA+f8JAPT/BwD5/wMA///+/wAA/f///wAA/P8EAPj/BwD3/woA9/8GAP3/AgD//wEA//8BAAMA/f8DAP///f8EAP3/BQD9/wEAAAD9/wQA+/8CAAAA/f8DAP//AAACAPr/BAD7/wIAAQD5/wYA9v8HAPf/BAD8/wIA//8AAP7/AgD//wAAAQD8/wQA//8BAAAAAgABAAIA/v8DAP7/AwD+/wAAAAACAPz/AwD9/wAAAQD+/wEA/v8DAP3/BAD9/wQA/v8EAPr/BAD9/wYA/P8DAPv/BQD9/wEA//8BAAAAAwAAAAQAAAD//wYA/f8FAP7/BQAAAAMABAD9/wcA/P8JAPz/BQD//wAAAQD+/wEA/v8BAAEA/P8DAPz/AQD+/wEA/v8AAAAA/P8DAPv/AwD7/wIA/P8BAP//AQD9/wAAAgD+/wEA/v8AAAAA/f8DAPv///////z/AAADAPj/BAD5/wQA+/8BAAAA+/8EAPn/AQD9//7/AAD///3/BAD5/wYA+/8FAP3/AQABAP//BAD9/wMA//8DAAAA//8HAPn/BQD7/wIA//8BAAAA//8GAPr/CwD7/wcA//8DAAIAAgACAAIAAAACAPn/CQD5/wYA/P8DAP7/BAD8/wUA/f8BAAIAAAAEAP3/AgD9/wMAAAD+/wIA/v8AAAEA////////AAD+/wIA/P8DAPz/BQD6/wYA/P8FAP//AQABAAEA//8EAP3/BAD8/wYA/P8CAP//AAACAP///v8CAP7/AAD+/wAAAQAAAP///v///wEA/f8CAPz/AwD7/wAA/v/+/wIA+/8CAPv/AQD+/wAA/v8AAP7///8AAPv/AwD8/wAA/v8AAP3/AgD8/wAAAAD///7//f8DAPv/BAD8/wIAAQD9/wYA+P8KAPn/BwD//wEAAgABAAEABAD//wAAAQAAAAIAAgD9/wcA/P8HAPv/BQAAAAEABAD8/wcA/v8DAAQA//8GAAAABQAAAAYA/v8HAAIAAQAGAAAAAgAFAP7/BQD//wQA/v8BAAIAAAACAP//AgD9/wYA9/8GAPz/AwD+////AgD/////AQD9/wEAAAD//wEAAAD9/wIA/f///wAA/////wMA+/8BAP/////+/wIA/f8DAPz/AAABAP3/AQD6/wMA+f8EAPn/AgD7/wMA/f///wIA//8AAAMA/f8CAAIA//8BAAQA/v8GAP7/AwABAAMAAAAHAP3/BAD//wMAAgADAP//AwABAAAABAD+/wcA+/8IAPr/BAD+////BAD9/wAAAAD7/wMA/P///wEA/v/8/wIA+f8HAPr/BAD///7/AQACAPz/BwD5/wYA/f8DAAEAAwAAAP//BAD+/wIAAgD//wYA//8CAAMA/P8KAPj/CQD8/wcA/f8FAAIAAAAEAP//AAAHAPr/CQD8/wYAAAACAAIA/f8IAPf/CgD5/wcA/v8DAP7/BQD9/wcA/f8DAAQA/P8HAPz/AwD//wEAAAABAP///v8DAP7/AgD+/wIA+/8GAPz/BAD9/wIA//8DAP3/AwD+/wAAAgD7/wQA/P8CAAIA/P8FAPr/BwD3/wgA+f8EAPr/AwD6/wUA+/8FAPv/BwD8/wQAAQD9/wcA+/8EAAEA+/8JAPf/BwD5/wMA/v8BAP///v8BAAMA/v8AAAIA+/8GAPr/BQD9/wEA//8AAP7/AgD+/wMA//8CAAEAAQADAPr/CAD7/wQAAgD5/wUA+v8FAP3/AgAAAAIA//8FAPr/BQD9/wEABAD8/wUA+v8GAPv/AgD+////AAD+/wIA/v8CAP//AAAAAAEAAgD//wUA/v8EAP7/AwACAP//AwD//wAAAgD//wUA+/8CAP//AAACAP//AgD//wEAAAD+/wUA+f8IAPv/AgD9/wMAAAD//wEA/P8FAPr/BwD6/wMA/v8AAAAA/v8EAPr/BgD8/wEAAwD6/wYA/P8HAPv/AQABAP7/AgD+//7/AgD7/wQA+v8EAPn/BAD+//7/AAD7/wMA/f///wAAAQD9/wUA/P8EAPz/AQABAPz/BAD5/wMA/f///////f8CAPz/AwD7/wUA/f8DAP////8CAP3/AAAAAP//AgD+/wAA/v///wIAAQAEAP3/BAABAAMAAQD9/wMA/f8FAP//AgD+/wIA+f8HAPr/AgABAP7/AgAAAAAAAQABAP3/AwD//wEA/////wMA/f8BAAAA/v8BAAIAAAAAAAAA//8BAAEA/f8DAPz/AgAAAAAA//////////8BAP3/AgD+/wMA/P8CAAEAAQAEAPz/BQD6/wkA9/8JAPf/CAD7/wQA/v8DAP7/BQD6/wcA/P8CAAAAAgD7/wcA/P8FAPv/AgD6/wUA+v8BAPz///////7/AAD///7/AAD9///////+/wAA/P/+/wAA/v8AAPz/AgD///7/AAAAAPv/BAD3/wcA9/8EAPv/AQD/////AAD9/wUAAAAEAAAAAQACAAAABAD//wMAAgD9/wUA+/8FAP7/AgAAAAIA/v8EAPz/BgD7/wYA+/8FAPz/BQD/////AgD8/wMA+/8BAP///f8CAP3///8CAPz/BAD+/wMA+v8GAPj/BgD7/wQA+/8DAP3///8BAP7///8AAP7////9/wEA/f/9/wQA+v8EAP7/AgD//wUA//8EAAIA//8GAPz/BQAAAAQA//8CAP//AwD+/wEA//8DAP//AwD+/wQA/v8AAAMA/P8GAPz/AQABAPv/CQD0/wwA9/8GAPz/AAD/////AAD+/wMA/v/9/wUA9v8LAPX/BgD7/wIA/f8BAAEA+P8GAPn/AgD///7////+/////f/9/////v8CAP3/AgD+/wEA/v8GAPz/CAD6/wgA//8BAAMA/f8HAPz/BgD+/wQAAQADAAAAAgADAP//BAABAP7/AwABAAQAAAACAP7/AAADAP7/AgD9////AgD8/wUA+/8BAAEA/f8BAP////8DAPv/AwD6/wYA+/8DAP3///8AAP///v/+/wAA+/8CAP///f8CAPv/BQD8/wIA/P8CAPz/AAACAPz/BgD4/wUA/f8DAP3/AgD//wIA/f8DAP3/BQD//wAAAgD//wYA/f8JAPz/BgD+/wUA//8FAP7/BgABAAIABAD//wEAAgD+/wQA/v8DAP3/BAD//wEAAwAAAAMAAAADAP//AgACAPz/BgD5/wgA+P8HAPr/AgABAP3/AgD+/wAA/v8BAP3////+/wAAAQAAAP3/AAD+//////8BAP3/AQAAAPz/AgD7///////8/wIA/f///wAA//8DAPv/AwD//wAABQD7/wIAAgD9/wIAAAD+//7/AgD9/wMA/P///wIAAAD/////AQABAP//AQD//wAABAD8/wEA///7/wUA+v8EAP///P8DAP3/AgAAAP//AgD9/wYA+/8FAPv/BgD8/wYA/P8FAAMAAAAFAP7/AwD//wQA//8EAP//AwD+/wcA+P8KAPn/BwD5/wYA+P8HAPj/BgD6/wEA///+/wMA/f///wMA+/8BAP7///8CAP///P8DAP3/AgAAAP3/AQAAAP7////9/wQA+P8HAPX/BQD8////AQD6/wMA+f8FAPf/AgD9//7/AQD///3/AwD5/wEA/f///////v/+//7/AQD5/wUA+/8BAP////8AAP///////wEA/f8EAAAA//8FAPr/CAD8/wQA/v8BAAIAAQD9/wYA/P8DAAIA/P8EAPr/CAD4/wkA+f8EAPz/BAD9/wEAAgD//wAAAQD/////AgD8//7/BAD6/wcA/v8BAP//BQD9/wcA+v8JAPr/BwD8/wMAAgD//wMA/////wMA/f8BAAEA/P8CAP3/AQD//wMA+/8DAPv/AAACAPv/AwD9/wAAAAABAP7///////3/AgD7/wMA/v//////AAD+/wIA///+/wAA/////wMA+/8DAP7//v8BAAAA/f8EAPn//v8BAP3//f8BAPj/AwD7/wIA+v8DAPj/AwD9/wIA//8DAP3/AQABAP7/AgAAAPz/BAD7/wQA/v/+/wQA/f8DAP7///8AAAAA/v///wMA+v8GAPr/AgACAP//AAACAP3/AgD+/wAAAQD//wAAAAABAP7/AAACAP//BQD8/wEABQD8/wUAAAD+/wcA+f8JAP3/AwABAAIAAQD9/wMA/P8DAP7///8DAPv/AwD9/wMA//8AAAEAAQAAAAEA//8AAAIAAAAAAAEA//8CAAAAAwD6/wYA+/8CAAAA/f8DAPz/AwD9/wQA/P///wMA/P8EAPz///8AAAAA/v/8/wEA+P8FAPv//f8FAPb/CQD3/wgA+P8GAP7/AQACAPz/AgD+/wIA/v8BAP/////+/////v8AAPz/BQD3/wcA+P8EAP3/AAD+/wIA//8BAP7//f8DAP7/BQD6/wMA/P8BAP7/AgD9////AgD6/wYA/P8EAP////8FAP7/BgD7/wQAAwD7/wkA+f8GAP//AAABAAAA//8AAAAAAAD+/wQA/f///wYA+f8JAPz/AgACAAEAAQD/////AAAAAAAA//////7/AgD9/wIA/v/+/wIA/f8EAP7/AwAEAP//AgD//wMAAAADAAMA/v8GAPz/BgD9/wUA/v/+/wYA+/8GAP3/AwD9/wQA/////wYA+v8GAP3/AAAAAP7/AQD8/wAA9/8EAPv///////3/AAD///v/BAD9/wAAAwD8/wUA/f8EAP3/BAD+/wAAAwD+/wEAAAD//wAAAwD6/wQA+P8BAP7//f8AAP7////+/wIA/f8CAAIA+/8IAPr/CAD8/wEAAwD+/wEAAgD8/wMA/v8GAPr/CQD4/wYA//8DAAAABAABAAMAAgAFAAAAAgABAP7/BwD7/wMA/v/9/wEA/////wMA+v8FAPr/BQD+/wEAAwD//wUA//8BAAMAAAADAAAAAwAAAAIAAgD8/wQA//8AAAIA/f8DAP7/AgAFAPz/BgD+/wEA//8CAP//AQACAPz/BAD9/wUA/P8AAAEA/P8DAPz///8FAPz/BAD+/wMAAAACAP//AwACAAMA//8EAP7/BAD+/wMA/f8FAP7/BgD//wAAAwABAAEAAgD//wMAAQD//wAAAwD8/wIA/f8CAP3/AQAAAP3/BAD7/wUA/P8EAP7/AAAFAPv/BQD8/wIA//8BAAIA/f8GAPr/BQD9/wIAAQD8/wMA/f8AAP///v8CAP7//v8BAP7/BAD8/wMA+/8CAP//AAACAAEAAAD+/wQA/f8FAP7/AgAAAAIA//8DAP3/BAD+/wQAAQD//wMA//8CAAMAAgACAAEABAD+/wUA///+/wYA+/8FAPz/AwD//wIA/f8CAAAAAAABAP//AwD//wAAAgD//wUA/P8EAAIA/v8DAP7/AQD//wMA/v/+/wAA/v8AAP7/AAAAAP7/AgD/////BAD7/wYA/P8FAPr/BwD8/wMAAAD//wMA//8AAAQA/P8EAPz/BwD7/wcA/v8BAAMAAQADAAIAAgADAAMAAAAFAP7/BwD9/wMAAQAEAP//BQD9/wMA/f8BAP//AQD+/wEAAAD///////8CAPz/AQD///7/AwD9/wEA/f8CAP3/AgD5/wUA+v8FAPz/AQD+////AgD8/wUA+v8FAP//AgD//wUA+/8GAPr/AwD8/wEA/v////7///////7/AQD9/wQA+/8EAPz/BQD8/wUA/P8HAPv/CAD6/wgA/P8BAAMA//8FAP7/BQD7/wcA+/8FAP//BQD7/wsA9v8NAPn/BgAAAP7/BQD8/wUA+v8GAPr/BwD6/wMA+f8GAPf/BQD5/wIA/f/+/wQA+v8FAPz/BAD8/wQA+v8GAPj/BgD6/wMAAQD+/wQA/f8BAAAA/f8GAPn/BgD8/wIA//8CAP7/AQAAAAEA//8CAP7/AAAAAP///f8DAPn/BQD7/wIA/v///wEA/v8DAPv/BAD9/wQA/P8CAP7/AAACAPr/AgD9/wEA//////z/AQABAPz/AgD9/wAAAAAAAP3/AQD8/wEA/v8AAP7//f8AAPz/AQAAAPz/AQD8//7////+//7//v/6/wMA+v8FAPz/AwD+/wEA/v8AAAAA//8DAPv/BQD8/wIA/v/+/wQA/f8CAP//AAACAP7/AQD5/wIA/P8GAPz/BQD//wEAAAAAAPz/AAD+//f/BQD1/wMA+/8DAAAAAAADAPn/CgD7/wcA/f8BAAMA//8BAP///v8AAP7//v/+//3////7/wMA+P8HAPj/BgD7/wAAAAD7/wQA/P8FAPv/BAD7/wQA/v8DAP7/AwAAAAAAAAD///7/AQAAAP7/AwD7/wYA+v8EAPv/AwD9//z/AQD3/wMA+f/9//3/+/8AAPn//v/+//v/AgD6/wMA9/8HAPv/BQD9/wIA//8FAPz/CgD6/wYA/f8EAP7/AgD//wIA+P8HAPn/AwD3//3//f/9//v/AAD7/wIAAAD/////AQD//wQA//8AAP///f8DAP3////+//7/AAAAAP7///////z/AAD4///////5/wYA9/8AAAIA+v8GAPv/BgD+/wIA//8CAAAAAAABAP7/AQACAP///v8EAP7/AQAEAP//BQD//wMAAQACAAUA/v8BAAAA/v8AAP7/AQD+/wMA/v8CAAAABgD8/wsA/f8IAP7///8FAPv/AQAAAPv/BQD5/wIAAgD7/wUA/P8AAAMA+f8HAPT/CQD1/wYA+f8EAPr/BQD7/wQA+f8KAPb/CAD5/wYA+/8GAP3//v8FAPz/AQD+/wIA/P8GAPn/CAD5/wcA+v8EAP///f8IAPn/CQD7/wMAAQD9/wYA+/8DAPr/CQD2/wsA+f8CAAMA+f8JAPf/BwD9/wMA/P8FAPz/CAD7/wEA///9/wUA/v8GAPr/CAD7/wMA/v8CAAQA/P8FAP//BAABAAAA//8EAP//BgAAAAEAAAADAAQAAQACAAEAAwAEAAIABQACAAEAAwD//wMAAgD//wIAAgD+/wYA/f8FAPz/BAD/////BQD5/wUA/f8CAAEA/f8GAPn/CQD7/wMA//8BAAAAAwD9/wUA/P8FAP7//////wEA/v8DAPz/BgD7/wUA/P8AAAAA/v///wAA/v/8/wIA+v8FAPr///8CAP3/BQD9/wEABgD8/wcAAAABAAIAAwD9/wgA+/8DAAEA/P8AAAQA/f8DAAAA/P8HAP//AgAFAPj/BwD8/wMA//8AAAEAAQAAAAQAAAD+/wEA/v8BAP7//v8CAP7/BQD7/wQA+v8DAPr/BQD9/wIAAAACAPz/CQD7/wAABgD5/wkA/f8GAP//AAAHAPr/CQD9/wEABQD5/wkA+v8HAPr/AwD8/wAA/v8AAPz/AgD5/wYA+v8HAPr/BQD+/wMAAgD//wUA//8DAAIA//8JAPv/BgACAP7/CQD7/wQABAD9/wUAAwD+/wYA/f8EAAEAAAD+/wMAAAABAAAA+/8DAPv/AwD8/////v////7//v////3//P////7///8BAPz/AwD8/wEA+/8EAPv/BQD5/wgA+v8HAPz/BQD3/wkA9f8GAP3//v///wAA/P8BAPr/AAD7/wEAAQD+//3/AgD8/wMA/f/7/wYA+P8CAP///f/+/wAA//8BAAEAAwD7/wgA9/8NAPr/BgACAAAAAQABAAAA//8FAP3/AgAEAP3/BQD5/wkA9/8JAPv/AgD+/wMA///+/wIA//8CAAEA/f8CAAAAAAABAAAA//8AAP7/AwAAAAEABAD//wQABQD8/wkA/P8FAP7/AQD+/wcA+/8IAPz/AgACAPz/AgABAPz/BAABAAAABAD9/wEAAAD///3/BQD5/wQA/P8CAAEA/P8IAPz/BAD//wAAAgABAPz/BAD6/wQA/P8AAP////8BAP///v8AAAEA/P8FAPr/BwD8//7//f/6/wEA+f8CAPn/AwD5/wEAAgD9/wMA+/8EAPn/CQD3/wcA+f8DAAIA//////7//////wIA+/8DAPr/BAD///7/CAD4/wgAAAACAAMA/v8DAPr/AAAAAP7/BAD9//7/BQD7/wcA/P8EAAIAAwADAP//BgD9/wYA//8EAAIAAAAEAAIABQD9/wQA/v8BAAIAAAD7/wQA+v8EAP7/AgAAAAQA+P8LAPb/BwD7////AAD8/wMA/f8EAPv/AwD5/wIA/P8DAPz///8AAPz/AwD///7/AAD//wEA/v///wAAAQD+/wMA/v/+/wIAAgD+/wEA+/8EAP//AgD//wIA/////wUA/v8CAP//AQD+/wMA+P8FAPb/BwD3/wMA/f8BAPz/AgD/////BAD2/wIAAwD6/wsA9P8KAP//BQABAAAABQAAAAQA+/8BAAAA9/8EAPf/AgD5/wAA/P8DAPv//////wEA/v8CAP3//P8EAPn/BwD6/wMAAwD5/woA9/8LAPb/CgD7/wUAAAABAAAAAAAEAPr/CwD1/woA+/8CAAcA+/8IAPj/CwD7/wQA/f8BAPz/BAD5/wMA/f//////AgD8/wUA/P8CAAIAAwD9/wUA/P8DAPv/AgD8/wMA+/8DAP7/AAADAPv/CAD5/wUA/f8AAAMA/f8FAPb/DAD0/wkA+f8AAAEA+/8BAAEA/f8EAPv/AAD///7///8BAP3///8AAP//BAACAP7/AwD//wAAAgD9/wIAAQD5/wIA///+/wIA+v8EAP7/+/8GAPX/CgD4/wIAAgD+/wEA//8AAAQA/P8HAPf/CQD9/wMABQD+/woA+v8HAP3/AAAFAP3/BQACAP3/DAD6/wkA+v8DAAQAAAD//wQA+v8JAPX/CAD3/wQA+P8AAAEA/P8BAPv/AgD9/wQA/P8FAP////8BAAAAAgD6/wYA+P8DAAIA+f8HAPv/AAACAPn/CAD4/wcA/P/+/wQA+v8DAAEA/P8GAP7//f8FAP3/BQD4/wYA9/8HAPr/AQD5/wQA+/8DAP//+/8EAP3/BQD+/wMA/v8EAP//AwD9/wYAAgD+/wYA+v8FAPv/AQABAP7/AQAEAPz/AwD9/wQA/P8GAPj/BAD8/wEAAgD3/wcA+/8DAAAA/v8DAP3/BAD+/wAA/v/9//7/BAD7/wEAAwD9/wcA/P8EAAAAAgABAAIA/v8EAP3/AwD+/wQAAQACAAAAAAD//wEAAAABAPv/BAD3/wwA9v8FAAAA/P8FAP//AAAEAPn/AQD+/wEA/P8CAP3/AQD8/wAA+v8CAPv////6/wMA+/8CAAAA/////wIA//8DAAEAAAAAAAMA/P8EAAIA+/8JAPf/AgAAAPz/AQD8//7/AQAAAAEAAQD7/wcA/P8EAP7//f8EAPz/AwD9/wMA+/8GAPr/BQD9/wIAAAABAAIAAgAAAAQA/v8DAP//AgD8/wUA+/8FAAAAAgD//wAAAAABAAAA/f/+/wIA/P8DAPr/AAD8//z/AQD6/wIA/P8AAP3/AAAAAP7//v8CAP/////+/wAA//8CAPr/BgD7/wUA/P8CAP3/AQD9/wMAAAAAAP7/AQAAAP///v/+/wIA///9//7////8/wQA+f8CAPv/AAD9////AQD8/wQA+/8DAPz/BQD6/wYA///+/wQA/P8DAP7/+/8GAPv/BwD6////BAD7/wgA/f8BAAIA/f8DAAEAAAAFAP7/CQD9/wYA/v8CAAQA/f8AAAIA+/8GAPz/AgD//wIAAwD//wEA//8EAP//AQABAPv/BQD5/wQA+/////7//v/+/wAAAAD+/wIA/f8DAP//AAD9/wgA9P8KAPb/BAD8//3//f////z//v/+//n/AQD4////+v/9//v/+v8BAPr/AAD9//7////9/wAAAQD9/wQA+f8FAPz/AQADAP7/BQD9/wEAAQD//wYA/f8BAAIA/v8HAPz/CgD+/wMABAD//wYA/P8EAP3/AQD//wIAAAD//wIA//8HAP3/CQD4/wUAAQD+/wYA+P8GAP3/AAD9/wQA+/8GAPv/BAD7/wQA+f8GAPv//f8DAPr/BgD8/wUA/f8DAAEA/P8EAAAAAAAFAAAAAQAAAAQA/P8LAPf/CQD6/wEABAD+////AwD6/wMAAAD6/wkA+v8FAPv/AgAAAAAA//8AAP//AwACAPn/CQD8//7/BAD4/wMA///7/wMA/f///wAAAAACAP3///8AAAAAAAD////////8/////f/9/wAA+/////7/AQD///z//P8CAPr/BgD4/wcA+/8EAAEA//8DAAEA//8EAP3/AwADAAEA//8FAP3/AAAGAPz/DADz/wwA+/8DAAcA/v8IAPv/BgD+/wQAAAD+/wEA/v8DAP/////7/wQA+v8EAP3/AAAAAP//AwD7/wYA+f8GAP3/BgD9/wQA//8FAP//AgABAAIA/v8EAP3/BQD9/wIA//8DAAMAAQAAAAQA/f8FAAEA+/8DAP3/BAD+/wAABAD6/wkA+v8EAAAA/v8EAPf/DAD2/wkA+/8BAAIA//8CAAIA//8FAP7/AwD4/wgA+/8FAP///v8CAP3/BAD8/wAAAQD8/wIA/////wEA/f/9/wEA/v8AAP///v8AAP3///8AAP7/AgD8/wQA/f8CAAEA/f8HAPj/CAD7/wMAAQD//wQA//8AAAMA/P8FAP3/AAACAP3/AgD//wEA/P8FAPj/BwD7/wQA/f8BAAIAAAACAP///v8BAAEAAwD+/wEA///+/wUA+v8GAPv/BAABAAAABQD9/wQAAgD//wIAAwAAAAQAAQAEAAAAAwAAAAQAAQABAAAAAwD8/wIA/v///wMA/P8CAP7/AAD9/wIA/P8DAP3///8BAPz/AQD///z/AgD9////AQD+/wAA/f8GAPn/BgD6/wIA/////wAA///6/wMA+f8FAPv/AgD6/wEA/v/+/wEA///9/wEA+/8AAPz/AQD+/wAA/f8BAP7/AgD+/wQA+/8FAP3/AQAGAPj/CAD8/wIABAD7/woA+P8DAP7///8EAPv/BAD//wMAAAADAAEAAwACAAAABQD//wUA/f8FAP7//v8CAAEA//8DAP3/AQACAAAAAwD8/wMAAAABAAQA/f8BAAAA/v8DAP////8AAAEA/v8CAP3/AAABAP3/AQD///7/BAD9/wAAAAACAP//BQD9/wMA/v8EAPz/CAD4/wgA+v8GAPz/AwD+/wIA/v8BAP//AQD9/wEA/f8EAPz/AgD///v/BgD4/wcA+P8FAPv///8BAPv/AwD8/wEA/f////////8AAP/////+/////f8AAAIA+P8GAPj/BAD9/////v8AAP/////+/wAA/f8DAP3/AwD+/wAAAwD+/wIAAgD+/wYA/f8FAP//AwACAAAAAgD+/wIAAgAAAAEAAwD//wQA/v8DAAIA//8FAPz/BgAAAAIAAwACAAIABAADAAEABQD//wcAAQAEAAMAAgABAAYA/f8GAP3/BQD//wEAAgD+/wUA+/8GAPz/BQD4/wUA/P8GAPv/AAACAP//AAD///7/AwD8/wMA/f8CAP///f8DAPn/BQD9////AwD6/wMA/v///wAA/v8BAAAA/P8DAPz/AwD8//z/AwD3/wcA9/8FAPj/BAD9/wAAAQD//wIA//8CAP3/BgD7/wcA+/8KAPr/BwD9/wUA//8FAAAABAD//wEABAD+/wgA+v8IAP3/AwAAAAIABAD+/wMA//8CAPv/BgD7/wYA/P/9/wEA/P8EAPr/AwD6/wEA///8/wQA/P8BAAIA+/8FAAAA/P8FAPv/BAABAAAAAAAFAP3/BgD6/wgA+f8IAP7/AgAEAP//BAD9/wUAAAABAAMAAgD+/wgA/v8CAAMAAQD//wUAAAD//wgA+/8IAP7/BAD8/wgA9v8NAPb/CQD8/wQAAAACAP//BwD7/wgA/f8EAP//BQD5/wcA+/8EAP//AAD8/wUA/P8CAAAA//8BAP3/BAD9/wQA/f8BAAIA//8CAP///f8EAPz/BQD6/wQA/f8DAP////8DAPv/BAD8/wEA/v////7/AgD+/wAAAgAAAAMA//8DAP3/BgD8/wUA/v/+/wYA+v8EAPz/AAABAP7/AwD5/wgA+v8FAP7/AgD+////AQD//wMA/P8FAPj/BwD5/wUA//8CAP//BAD+/wMA/v8DAP//AgAAAP3/AgD8/wQA/v8BAAAAAQACAAAAAgD8/wMA/v8FAPz/BgD3/wsA9v8HAPn/AgD+/wEAAAD//wEA/v8DAPz/BwD6/wkA+/8FAAAAAQACAAIA/v8EAP//AQAAAAIAAAD//wEA/f8FAPz/AQADAP3/BQD6/wUA/f8DAP7/AQD//wEA//8DAPz/AwD8/wMA/v8CAP////8BAP7/AgD7/wcA+P8HAPv/AwD/////AgD//wQA/P8EAPv/BQD7/wIA/v8AAP3/AAAAAP7//v8AAAAA/v8BAPv///8DAPj/CgD3/wUA/f8DAP//AQD//wAA/f8EAPf/CAD5/wEA///8/wAAAQD9/wIAAAD//wEAAgD8/wQA/f///wIA/v8BAP//AAD///7/AgABAAIABAD7/wgA/f8FAPv/BQD8/wcA+/8GAPr/BgD4/wUA/P8BAAEA//8AAAEA/v8EAP7/AAABAP7/AgAAAP3/BQD7/wMA/v8AAAAAAgD//wAAAAABAP7/AwD7/wUA+/8DAP////8DAPv/AQD+/wAAAQD+/wEA/v8BAAAAAwD+/wQA//8CAP3/BQD7/wYA+/8EAPz/BQD9/wQA//8BAP//BAD8/wMAAAABAP//AQABAAEA///+/wAA/f8CAPr/AgD7/wIA/P8BAP7/AAD9///////9/wIA/P/9///////9/wQA9v8JAPn/AQD9/wIA/f////7//v/9/wIA+/8DAP3/AAD+/wEAAgABAAQAAAABAAMA/v8GAP3/BgD9/wQA/v8BAAIA/P8HAPr/CAD6/wQA//8BAAAAAgD//wIA/v8CAAIA/v8CAPz/AQD9/wAA/v8AAP7/AAD///7/AgD+/wQA/f8CAP7//v8CAPz/BgD4/wgA9f8JAPf/BQD7/wMA+v8EAPn/AgD///r/BQD7/wQA+v8KAPj/BwAAAAIAAwACAP//AwACAAAABAD//wIAAAACAP3/BAD8/wQA//8CAP//BAD8/wQA/f8EAPz/BgD5/wYA+/8DAAAA//8BAAAA/v8CAP3///8AAP//AQD///7/AQD9/wIA/P8BAAAA/v///wEA/f8AAP/////9/wEA///9/wIA+f8CAPv/AAD+/wEA/v8AAAIA/v8BAAAAAgADAAAAAwAAAAAABAD+/wUA/v8FAPz/CQD8/wYA/v8EAAMA/v8FAP3/AwABAAEABAD//wMA/f8DAP7/AQACAPr/BAD8/wIA/v8DAPr/BwD4/wQA/f8CAAAA/f8BAPz/BAD9/wEA/P8DAPz/AgD9//z/AwD4/wYA+/8AAAAA/v8AAAIA/v/9/wIA/P8BAAAAAAD//wAA//8AAAQA+v8FAP7///8DAP3/AwABAP//AQACAAEAAQAEAAIAAQAFAPz/CAD9/wQAAgABAAUAAQADAAAAAAACAAAAAAAGAPn/BgD9/wMAAAAFAP3/BgD9/wUA//8CAAEA/P8IAPf/CQD6/wIAAAD9/wQA+/8FAPn/BwD3/wUA+//+/wMA/f8AAAEA/P8CAPz/AQD8/wUA+v8DAP//+/8FAPf/AwD+//r/BQD6/wIA/f8DAP////8AAP//AwAAAP//AQAAAAEA///+/wQA9/8JAPn/AwD9/wEA/v8EAPv/AgABAP7/AwD+/wEAAAACAP7/AQD9/wAA/v8CAP3/AgD8/wMA/v8BAP//AAACAP7/AwD//wAAAQD+/wQAAAAAAAQA//8HAP7/BAD+/wMAAQACAAEAAQACAP//BgD5/wkA+/8EAPz/AwD9////AAAAAP3/AgD6/wUA+/8DAP7///8BAPv/AgD+/wIA/f///wMA+f8IAPr/AwD9/wEA/v////3/BAD6/wEA/f/9/wEA///8/wIA+////////f/9/wAA/f8AAAIA+v8DAPv//v8CAPr/AwD6/wAA///+//7/////////AAD///////8BAP7/AAD//wIAAAACAAAAAQABAAAAAQABAP//BAD9/wIAAgD9/wYA/P8EAPz///8GAPr/CAD5/wMA/v8CAAEA+/8IAPj/CQD6/wMA/f8BAP7///8BAP3/BAD//wEAAQAAAAMAAAADAP//AgABAP//BQD9/wIAAgD+/wIAAQD7/wYA+v8FAPr/AAABAP//AwD8//7/AwD6/wUA+f8CAP//AAD//wEA/f8BAPr/BQD5/wQA/P8BAP///f8CAP7/AAACAPv/AgD+/wAAAwD5/wYA+/8CAP///v8CAPz/AwD2/wcA9/8CAP3/+/8DAPr/AgD6/wIA+v8EAPv/AgAAAAEAAAAAAP7/AgD+/wUA+f8DAPz/AwD+/wEAAgD7/wcA+P8IAPj/AwD///3/BQD5/wYA+/8CAAAAAQD//wQA+/8DAP3/AAACAP//AQD8/wQA/P8EAP7/AgAAAAEAAQABAAIA/v8HAPr/CAD5/wkA/f8EAAEAAQABAP////8BAP//AAD//wEA///+/wAAAQABAAEA//8BAP//AwD+/wEAAQD//wMA/v8BAAIA/f8HAPn/BQD8/wMA+/8EAP3/AAABAP////8DAPn/CAD5/wQA///9/wEA///9/wAA/f/7/wIA/P///wMA+P8FAPv/BAD+/wAAAgD/////AwD7/wMA//8BAP3/AgD8/wEA/v/+/wAA+v8IAPb/BwD5/wIA/v8AAP//AQAAAP///v///wIA/f8GAPn/BAD9//////8AAAIA+v8FAPn/BwD7/wUA/P8DAAIAAQAAAAMA/v8FAP////8GAPn/CwD2/wkA+v8DAP3/AQAAAP//AwD8/wIAAQD//wQA//8CAAAAAgABAP//AAD+/wEAAQD9/wEA/P8DAP7//////wEA/f8CAAAA/v8HAP7/BQD8/wYA/f8EAAAABAAAAAQA//8CAAEAAQACAP3/BAD+/wMAAQD9/wMAAAD//wQA/v8AAAUA+f8HAPn/BQD6/wEA/P/9//7////9///////8/wMA+v8CAP///v8GAPj/CQD4/wkA+v8FAP7/AQAAAAAAAAABAAAA/v8CAP3/AgD6//7/AQD7/wIA+/8CAP3/AQAAAP//AwD9/wQA//8EAP7/AQACAP////8GAPf/CQD5/wcA/v8CAAAAAAACAAMA/v8GAP//BgD//wcA//8CAAMA/f8DAAMA/P8DAPr/AgD+/wIA///9/wMA/P8CAAIA/f8HAPz/BAACAP//BAABAP//BQD+/wUA//8CAP///v8FAP3/AgD//wAAAAADAAIA//8EAP//AQAAAAEA//8DAP3/AgAAAAAAAwD7/wQA+/8EAPr/AwD9/wQA+/8IAPr/BwD8/wQA//8FAP//BAD//wMAAQAAAAIA/v8CAAIA/v8HAP3/BAAAAAIAAAADAP//AgABAP//AgABAP3///8CAP3/AQAAAP3/AQABAP3/AwD///7/BgD5/wkA+f8IAPj/BgD7/wUA//8AAAMA/f8CAAAA//8FAPr/AgAAAPz/AwD8/wIA///8/wQA+/8GAPv/AwD7/wQA/P8DAP//AwD+/wIAAAD//wUA/P8EAP//AQABAAEA//8CAP7/BgD9/wUA/P8EAAEAAwABAAEAAwADAAAAAgD//wIAAgD+/wIA/v8DAP//AQD//wAAAQAAAAEA/v8FAPz/BAD+/wQA/v8DAP//BQD8/wUA+/8FAPz/BAD9////////////AAD8/wYA9/8IAPr/BAD//wEAAAACAP///v8FAPz/BQD9/wMA/f8GAPj/DAD1/wcA//8CAAAAAQABAAMAAAADAAEABAACAAEABgD9/wgA/f8GAP7/AwAAAAQAAQADAP3/BAD6/wgA+P8GAPr/BQD8/wUA+P8HAPj/BQD7/wMA/v/+/wMA/f8CAP3/AAAAAP3/AgD7/wQA/f8AAAEA+/8FAPr/BQD7/wUA/v8DAAAAAQAAAP//AwD7/wIA+/8BAAAA/f8BAPv/AQD//wAAAwD4/wcA/P8DAP//AQD//wYA/P8FAAAAAAAEAPv/BgAAAAIAAQABAP//AwD//wMAAAABAAEAAwAAAAQA/f8HAP3/AQACAP3/BgD7/wQA/P8EAP7//f8BAP7//v8BAPn/AwD9////AgD9/wIA//8CAP///v8EAPn/BQD9////AgD+/wIAAQABAPz/BAD7/wUA/f8AAAIA/f8EAP3/AgD9/wMAAAD//wIA/f8CAPz/AwD8/wIA/f/9/wMA/P8DAP3/AAABAP7/AQD+/wMA/v8DAPr/BwD3/woA8f8MAPT/CAD6/wAA/v8AAP/////+/wAA/v8CAP3///8AAPv/AwD9/wAA/f8AAPz///8BAPv/BAD6/wAA/P///////f/+//3//f8AAAAAAQD//wEA/f8EAPn/BgD8/wMA///+/wIAAAD+/wAAAAAAAAIA/f8EAPv/BgD8/wQAAAACAAEAAgAAAAUA+/8HAPj/BQD9/wAA////////AQD8/wMA/v8BAAAA/f8EAPz/BAD///z/BgD6/wQA/f/+//7/AQD7/wIA+P8CAPr/AQD6/wAA/v/9////+/8AAPv/AAD+//7/AAD9/wAAAQD7/wIA+/8AAAMA+/8CAPz//f8CAAEA/v8AAP7/AAADAP3/AQABAPz/AgD+/wAAAQD9///////9/wAA/f/7/wIA/v/9/wQA+f8EAP3/AgAAAP3/BAD+/wMA/f8CAP//AwD//wQA/f8CAAEA/P8FAPv/AgD9//z/AQD6/wEA+v8CAP3/AQD9/wAA/f8EAPv/CAD5/wcA+f8DAP7/AAABAPv/AwD9////AwD+/wEAAgD8/wMA/v8BAAEAAAAAAAAAAAAAAP7/BAD+/wEAAQD///7/AwD9/wEA/v8AAAAAAAABAP////8DAP3/BgD9/wEA/f8EAPv/BQD8/wEA/v/9/wEA/f8AAAAA/f8AAP7///8AAP7/AQD/////AAD//wIA/v///wIA/f8DAPv/BQD9/wMA/f///wAAAQD8/wEA///+/wMA/P8BAAAA//8DAP7/BAD//wMAAgD9/wMA///+/wUA+/8EAP//AAABAAEAAAAEAP3/BAD//wQA/f8FAPz/AAAEAP3/AwADAP3/AgAAAAAAAAAAAP3/AwD+/wAA//8BAP7/AgD9/wQA/P8DAAAA//8CAPz/BAD//wEA//8AAP3/AgAAAAAAAQD//wIA/f8CAPz/BQD///7/BAD+/wEAAAD//wIAAAAEAP//AgAAAAIAAQABAAAAAAADAP7/BQD+/wIAAgD//wMAAAAFAPz/BwD8/wQAAQAAAAQA+v8GAPv/BAD//wEA/v8EAP7/AAAAAAEA//8EAPz/AgD//wMA/v8CAAAA//8EAPz/BwD4/wYA+v8EAPz/AwD8/wIA+v8EAPr/AwD9//7/AgD7/wMA/f8CAAAAAQD+/wEAAAAAAAAA//8CAP7/BQD+/wAAAgD//wIAAAACAPz/BQD8/wEAAQD8/wYA+f8HAP3/AQAFAP3/AwADAP3/BQD+/wIAAwACAP//AwABAAEAAgAAAAEAAgD+/wUA//8FAP//AgABAAEA/v8DAP////8CAP7/AQACAAAA/P8FAPv/BwD7/wcA/P8GAP3/BAD8/wgA+f8HAPz/AAAGAPj/CAD5/wQA/P///wIA+f8DAPz/AQACAPz/BAD9/wUA/v8GAPz/CAD6/wYA//8AAAMA/v8CAAAA/v8DAP7///8EAPz/AQAGAPn/BwD5/wgA+/8FAPv/BQD9/wYA+/8FAP3/AwD8/wMA/P8EAPz/AQD8/wQA+/8DAPr/BAD+/wEA//8CAP3/AwD8/wIA/v8AAP//AAD//wEAAgD+/wEAAQD8/wcA+f8FAP3/AgAAAAEA//8DAP//BgD+/wMAAAAAAAIA//8CAPr/CQD2/wcA+v8EAP3///8DAP3/BgD+////AwD9/wYA//8BAAUA/v8DAP7/AwD9/wUA//8CAP//AgD+/wAAAAD+/wIA//8AAAAAAAACAP//AwD//wIA/f8EAPz/AwAAAP//AQD///7/AwD8/wQA+/8EAP3/BAD+/wAAAwD9/wIAAQD7/wQA/v8AAAMA/v8AAAEA/f8DAP7/AAACAPz/AwAAAP3/BwD6/wMAAAACAAAAAQD//wIAAQAAAAIAAQD+/wUA/P8CAP7//v8CAP///v8EAPr/CAD6/wcA/f8BAAIA//8EAP7/BQAAAAIAAAD+/wMA/f8DAPv/AgD//wEA/P8JAPj/CQD4/wUA/v8CAAAAAgD9/wUA/f8CAP////8AAP///v8BAP7/AAD///7/BAD9/wQA/P8DAAAAAAACAAAA/P8CAP3/AgAAAAEA//8DAP3/AwD//wMA/f8GAP3/AgAAAP3/BQD8/wQA/////wMAAQAAAAEA/////wUA/f8DAP7//v8DAP3/AQABAP7/BAD9/wcA+f8GAPz/AwAAAP7/AwD//wEAAQD///7/AQD8/wQA+v8DAPj/BwD3/wYA+f8CAP7/AQD7/wQA+/8DAPv/AgD9/wEA/v8EAP//AAABAAAAAgABAAAABAD8/wYA/v8EAP//AwD+/wYA/v8CAP//AgACAAAA//8CAAEA/v8BAAAA//8CAPr/BAD7/wcA+/8DAP////8GAP3/AgAAAAAAAQD///3/BAD5/wMA/P8CAPr/AQD9/wMA+f8GAPj/CQD5/wUA/f8BAAAA/////wIA/f8FAPr/BgD5/wYA+v8DAP3/BQD6/wMA/f/+/wYA+P8EAP7///8CAP7/AwAAAAAAAgADAAEAAwD+/wIAAgD+/wIAAQD+/wEA//8BAP//AQABAP3/BAAAAAAAAgD9/wIA//8DAPz/BAD8/wMA/v8AAAAAAAABAPz/BQD8/wQA/////wAA/v8DAP//AQD//wEA/v8GAPn/CgD4/wQA///+/wQA+/8BAPz/BAD7/wAAAAD//wMA/////wEA//8BAPz/AwD9//3/AgD8/wIA/v/+/wIA/v/+/wQA+v8GAPr/AwAAAAIA//8DAP3/BQD+/wEAAAACAP//BAD9/wcA+/8GAPv/BAD+/wIAAAADAP//AAAEAP7/AAACAP3/BAD///3/BAD7/wYA9/8HAPX/CQD4/wIA/v/9/wQA+v8GAPn/BAD/////AgD9/wAAAwD9//7/AgD6/wUA+/8AAAAAAAD//wIA/P8HAPv/BAD7/wQA//8BAP7/AQD+/wIA/////wEAAgD9/wAAAwD6/wgA+v8CAP7///8AAAEA///+/wIA/v8EAPz/AgD//wEA/v8CAP3/BgD7/wYA/P8EAP3/AgD+/wEAAQD+/wYA+f8EAP///v8BAP7////+////AAD+/wAA/f8CAP7/AgD+//7/AgD/////AQD8/wMA/v8CAAAAAAADAP7/AwAAAAIA//8DAP7/BAD9/wMA/v8CAP3/BAD+/wAABAD4/wcA+f8DAAIA+v8GAPn/CQD4/wcA+/8CAAAA/f8GAPv/AwD7/wEA/v8BAP7/AgD9////AgD5/wQA9/8FAPn/AwD6/wIA/f///wEA+/8DAAAA/P8FAPr/BwD8/wMA/f8FAAAAAQD///3/AgD9/wAA/v/+/wUA/f8BAP//AQACAPz/AAD+/wEA/f8CAPv/AgD/////AwD9/wMA/f8EAP7/BAAAAAIAAgABAAEA//8DAPz/BgD//wIAAwAAAAMAAQADAP7/AwD+/wMA//8BAAIA+v8JAPb/BAD8//7/AgD8/wEA/v8AAP3/AwD5/wMA/P///////f8BAPz//f8FAPn/BwD6/wEA/v8CAP3/AgABAP3/AgD9/wMA//8BAPv/AwD+////AgD6/wMA/v8AAP3/AAD8/wEA/v8CAPv/AwD7/wEAAAD9/wAAAAABAP//AQD9/wEAAgD6/wgA+/8DAP///v8FAP3/BAD//wIAAQACAP3/BwD7/wgA+/8LAPr/CQD9/wMABAD8/wUA/P8CAAMA/f8DAP7///8GAPv/BAD8/wQA///7/wYA9/8FAPr///8BAPz/BAD6/wMA+/8EAPz/AwAAAP7/AwD6/wMA/v////7/AAD//wAA/P8AAPz/AAD+//3////6/wAA/f/8/wIA9f8DAPv/AAD8/wEA+/8CAP3/AgD/////AgD+/wIAAAABAAIAAwAAAAUA/P8HAP3/BgD+/wAAAwD+/wIAAgABAAMA/v8FAP7/BAD/////AgD8/wEAAQD//wIA/v8DAP//AwAAAAEAAAABAP//AAD9////AwD8/wEAAAD9/wYA/P/+/wUA+P8EAP///f8CAP7//v8EAP3/BQD8/wMA/f8CAP3/BAD8/wUA//8BAAAAAgD+/wYA/f8GAPz/AwD//wIA/v8DAPv/AgAAAPr/CgD4/wYA/f8BAAMA/P8DAP////8DAP3///8BAP//+/8GAPb/BgD7/wAA/v8BAPr/BAD7/wEAAAD7/wUA/P8DAP3/BAD7/wUA+v8CAP7////+/wEA/////wAAAAD8/wIA/f8BAP//AgD+/wIAAgABAAIA//8CAAIAAAAEAAEAAgAAAAQA/f8FAPz/BQACAP7/AgAAAAIAAwABAP//BAAAAAIAAAAAAP//AQD+/wIAAAACAPz/AQD9/wIA/f8AAP7/AgD9//////////7///8AAAAAAAABAAAAAQAAAP//AgD//wEA///+/wMA/f8CAP//AgAEAPz/CwD2/w0A+P8IAPv/BQD+/wEABgD8/wYAAQD//wgA+/8FAP7/AAADAP3/AwD+/wAABQD6/wYA/f8DAAAAAQABAP3/AgD+/wMA/P8FAPr/BQD+/wAA//8DAPr/BwD4/wUA/f8BAP7/AQD//wAAAgD+/wEA//8CAP7/BAD7/wIAAAADAPz/BgD5/wkA/P8AAAEA/P8GAPj/BwD6/wUAAAD//wEAAAACAP7/BQD9/wIAAwD+/wMA/P8CAAEA//8FAP7/AgAAAAIAAAAFAP3/BgD+/wUAAAAFAP//AwD9/wIAAgD//wMA//8BAP////8BAAEA/f8CAP///v8GAPv/AwACAP3/AwD9/wQA/P8EAPz/BAD+/wUA/v8CAAMA+v8EAP3/BQD7/wMA/P8CAAQA/////wQA+f8JAPf/BwD6/wYA+/8FAP7/AwD/////AAD//wEA//8DAAIA/f8CAP3/BAD//wIA/v8CAP3/BwD6/wgA+/8DAP7/AAACAAAA/f8DAPv/BQD5/wgA+P8FAPv/AwD8/wcA/P8DAP//AgABAAIA/////wIA/f8FAPn/BQD4/wIA/P////3/AwD5/wQA/P8AAP//AgD8/wYA+/8CAAAA/f8FAPv/AwABAP3/BAD8/wMA/f8EAPz/AwD//wAABAD7/wcA/f8FAP7/AgADAAAABAD//wIABAD+/wUA/P8AAAMA//8AAAAAAAADAP//AwAAAAAAAwAAAAQA+v8DAAAA//8DAPz//P8FAPj/BgD6/wQA+f8GAPn/BgD5/wYA/v8BAAMA//8CAAQA+/8GAP3/AwD8/wIA/P8BAP7//f8BAP3/AQACAP3/BAD8/wEAAQD9/wEAAAD+/wMA/f8DAP//AgAAAP//AwD8/wMA/P8AAP//AAD8/wMA/P8CAPr/AwD8/wMA/P8EAPz/AgACAPz/BgD8/wMA/v8DAPz/BAD9/wMA///+//////8BAP//AQAAAAEAAAD//wMA/P8HAPv/BAABAP//AAADAPn/CQD4/wYA/P8DAP//AAADAAEAAwD//wUA//8FAP7/BgD7/wkA/P8DAAIAAAAAAAQA/v8DAP//AgD//wEAAwD//wEAAgAAAAAAAwD9/wYA//8CAP//AAADAP//AAACAPz/CAD2/wYA+f8CAP7//f8CAPr/AwD9/wAA/f8FAPr/BgD8/wAAAAD+/////f8AAP3/AwD7/wMA/P8AAP7/AQD+/wAA/f/8/wIA/f////z/AAAAAP7/AQABAP7/CAD7/wMAAAABAAAAAAACAP7/AAAAAAEA//8DAP7/AwADAP7/BQAAAAEAAwD9/wgA+P8JAPn/BgD8/wQA/f8CAAAA//8FAPz/BAD8/wIA/v8BAP//AAACAP3/BgD8/wQAAQAAAAAAAAADAPz/BAD+/wAAAQD+/wEAAgD+/wIA/v8DAP//AAD9/wMA/v8CAPv/AQD//wAA/f//////AQD+////AAD9/wYA+f8GAP3/BAD//wEAAgAEAAAAAwD//wEABQD8/wMA/f8BAPz/AwD9/wAAAQD9/wAAAAD9/wQA/P8DAPr/BQAAAAAABQD5/wYA+/8GAP7//v////3//v8AAP3//f8BAPv/AQD8/wAAAAD//wAA/v8AAP///f///wAA/P8DAPz/AAABAPz/AAADAP3/AgD8/wIAAAAAAAAA/f8CAPr/BgD6/wQA/P8CAAAAAQABAAAAAAAAAAMA/v8AAAIA+/8GAPv/AwAAAP7/BgD9/wUA//8CAAAAAwD9/wMA/v8DAP//AAD//wIA/v8BAP//AAAAAP7/AAD+/wIA+v8CAPn/BQD5/wMA+/8CAP/////+/wMA+f8GAPn/AwD+//3/AgD7/wMA+f8CAP//AgD9/wIA/f8EAP3/BQD+/wMAAAAAAAEAAwD7/wQA+/8CAP3/AQD9/wAAAgD5/wcA+P8IAPv/BQD9//7/BQD7/wUA/P8DAP////8DAPr/CgD6/wcA+/8FAPz/AgD8/wQA+/8EAP3//f8EAP3/AAAAAAAA/f8BAP7//f8EAPv//v8AAPn/AgD8//7//v/+///////+/wIA/f8EAPv/AAACAPn/CAD4/wQA/P8BAAEAAAABAAEAAAADAP7/BAD+/wIA/v8AAAQA/f8DAP7/AQABAAEAAAD+/wIA+v8HAPv/BAD8////BAD9/wIA/f8BAAAAAQD//wMA//8BAAEAAgD+/wQA/v8DAP3/BAD8/wAAAQD6/wMA/f////3/AAD+/wAA/v8AAPv/BgD7/wEAAAD9/wMAAAD+/wQA+/8DAPr/BwD2/wkA+f8CAAAA/f8DAP////8AAAAA/v8DAPz/BAD//wIAAAABAAEA///+/wEA/v8CAPz/BQD5/wUA+f8EAPr/AwD8/wMA+/8DAPz//f8BAPz///////3//v///wAA/P8AAP3////9/wAA+v8EAPf/BAD4/wAA/v/9/wMA+/8EAPz/BQD3/wgA9/8JAPz///8CAP3/BQD9/wEAAAD//wIA//8CAPz/BgD7/wMAAQD9/wEAAAD///7/AAABAP7/BAD8//7/BAD6/wYA+v8CAP3///8AAP3/AgD7/wMA/f8AAP//AQAAAP//AgD9/wEAAAABAP//AQD//wAAAAD///3/AwD7/wUA/v/9/////f8CAP7//f8DAPv/AwD7/wAA/v8CAPz/AgD+/wAAAAD+/wMA/f8DAP///////wMA+/8HAPv/AwD+/wAAAAD//wAA+/8DAPr/BAD+/wAAAgD9//3/BAD8/wIA//////7/AQD6/wMA+v8EAPv/AwD7///////+/wEA+/8EAPj/BwD3/wEA/v///wMA+/8EAPz/BQD+/wQA/f8GAAAA/v8EAP3/BAAAAP7/BAAAAP7/BgD3/wYA///8/wUA/P8CAAIA/v8AAAQAAAAAAAMA/f8EAP7/AQACAP3/AQD//wEA/v8CAPv/CAD6/wQAAAD+/wUA/v8BAAEAAQD8/wUA+/8GAPv/BQD8/wQA/f8DAAEA/v8FAPz/AgAAAP7/BAD9/wIA/v8AAP7/AwD7/wUA+P8HAPv/AgD9/wIA/f8GAPf/BgD6////AwD5/wMA/v/9/wcA+v8HAPv/BQD8/wQA/P8EAAEAAAABAAAAAwD//wMA/f8EAP//BAD9/wMAAQD9/wQA/v8AAAIA//8EAP7/BQD8/wgA+v8JAPz/CAD8/woA9/8KAPz/AwADAP//BAAAAAAA//8DAAAAAAAEAP//AgAAAAIAAAABAAIA//8BAP///v8EAPr/BwD5/wQA/P8CAPz/AwD9/wEAAAAAAAAAAQD+////AAD/////AAD/////AwD7/wQA/v8BAAEAAAABAAIAAgAAAAYA/f8IAP3/BQD//wYA//8EAP7/BQD//wYAAQABAAQAAgADAAEAAAAEAAAABAD//wMAAQABAAUA/v8EAAAA/v8GAP7/AQADAP3/AwD9/wMA/v8CAPz/BQD7/wgA9v8IAPb/CgD8/wIAAAABAP7/BAD6/wQA+v8DAPz/AgD8/wEA/v8DAP7/BQD+/wMAAwD//wkA+v8EAP//AQADAAEA/v8EAAAABgD+/wUA/v8EAAIA/f8GAP3/BwD8/wQAAQAAAAUA/v8GAAEABQD//wQAAAABAAAA/v8DAPr/BAD7/wUA+v8CAP//AAAAAP//AQD7/wgA+f8FAP3///8DAPv/AwD5/wMA/f8BAAIA+f8FAP3/AwD+/wEAAAAAAAEAAgABAAUAAAAFAP//BwACAAIABQD9/wgAAQAEAAMAAgAFAAEAAgABAAQA//8IAP3/AgAFAP7/BQAAAP//BgD+/wIA//8CAAAAAQACAP3/BgD7/wEA//8CAP3/AQD9/wEABAD7/wQA+/8DAP//AgAAAAMA/v8DAP3/AQABAP//AgD9/wIA///+/wIA/P8BAP3/AQD///z/AwD7/wIAAAD+/wMA/v8BAP7///8BAP7/AQAAAP7/AgAAAAAAAQACAP7/AwD9/wYA+v8HAP3/AQACAAEAAgADAAAAAgADAAEAAwD9/wQA+v8IAPv/AQD9//7/AAABAPr/AgD8////AAD9////AQD8/wEA/f///wIA/f8DAP7/AQADAP3/AwAAAAEAAQD//wMA/v8IAPz//v/7//3/AAD+/wAAAAD+/wMA+/8FAPv/AQD7/////v/+//z/AgD//wAABgD6/wUA//8AAAUA/f8FAP//AAACAP//AgACAPz/AwD+/wMA/f8DAP//AwABAAAAAQAFAPz/CQD6/wcA/f8FAP//AgAAAAEAAQADAAIAAQADAP7/BQD9/wIAAAD//wMA/v8BAAAA/v8DAP3///8BAPz/AQD6/wAA+/8AAPv/AgD9//7/AQD8/wEA///+/wAA/////wIA/v8DAP7/AgAAAAEABAD///7/BAD9/wIA/v8AAAAA/v8CAP7///////z/AwD5/wUA+f8EAP7/AQAEAPv/BQD9/wMA/v8BAP//AQABAPr/BwD3/wYA/v/+/wEA+v8DAPv//P/+//r/AgD4/wIA+v8CAP3/AAABAP3/BgD8/wIAAgD9/wQA//8BAAEAAAD//wMA/P8FAP////8EAP3/BgD//wUA//8EAAIAAQACAAEAAAACAPz/BAD//wMAAgABAAMAAwADAAUAAwAEAAEAAgD+/wQA+v8EAP7///8AAP///////wIA//8CAP7/AQAAAP7/AAD+/wAA/v8DAPn/BgD6/wEA/f/+/wIA/P8EAPz/AwD+/wMA+/8DAP///P8EAPj/BQD6/wQA/P8AAP//AAADAPv/BAD7/wUA/P8EAPz/BQD7/wcA+v8FAPn/CwD3/woA+P8GAP3/AQD+/wIA/v8EAPz/AQADAPz/BwD8/wAAAQD+/wMA//8EAP3/AwAAAP//AgACAAAA//8FAPz/BwD9/wEAAgD+/wUA/P8EAP3/AgACAAAAAgACAAAAAgADAAEAAwACAP7/BAD9/wEA//8AAAEA/f8EAPz/BQD8/wUA+/8GAPv/BQD6/wQA/P8DAP////8DAP7/AQADAPv/BQD8/wIAAQD//wMA/P8FAPj/CAD4/wYA/////wIAAAADAAAAAgD//wEAAAAAAP//AQD8/wEA/v////3//////wEAAgD//wEAAQACAP//BQD+/wQAAAD+/wcA+v8FAP7///8CAP//AwD9/wMA/v8AAAYA+f8IAPf/AwD+/wAA//8BAPr/BAD8/wUA/P///////f8BAP3//f8CAPr/BAD6/wEA/P8AAP3/AgD8/wMAAAAAAAAAAgD+/wIAAQD9/wYA+/8HAPv/AgABAAAAAwD/////BQD5/woA+P8IAPj/BwD5/wMAAQD7/wcA+v8CAAAA//8CAP//AQD+/wIA/v8BAAEA//8DAP3/BQD//wMAAgAAAAMAAgAAAAUA/v8GAPz/BgD+/wMAAgD+/wQA/P8DAP3/AwD9/wEA+/8BAP3/AwD8////AQD7/wUA+v8DAP3//P8BAP7///8BAP3/AAAAAP3///8CAP7/AwD+/wIAAAADAPz/BgD3/wYA+f8FAPr/AgD8/wAA/v/9//3//f/6/////f8BAPr/AgD8/wIA/P8AAAAA/v///wEA/f///wAA//8AAAAAAQD+/wMA/f8DAAAAAAABAP//AAACAP7/AAABAP7/AwD//wEAAAACAP//BAD7/wgA+P8FAP7///8AAP7/AQD+/wQA/P8FAP3/AQD//wIA//8BAAAA//8DAAAABQD+/wYA/v8DAAQA/v8FAPz/BAD+/wMAAgD+/wUA/P8DAAAA/v8FAPn/BwD8/wYA/v8CAP7///8BAPv/AAD///7///8CAPr/BQD8/wIAAgD9/wQA/v8EAP3/AwD9/wEA/P8DAPr/AwD9//3/AwD4/wYA+/////7/AAD9/wEA+f/9/////P8AAPz/AQD7/wMA+/8BAP7/AAABAPz/AwD6/wQA/P8BAP//AgD8/wQA/P8AAAMA/f8CAP3/AwAAAP//AwD9/wQAAwD+/wQA/f8CAP7///8BAP7/AwD7/wMA/f8DAP//AAACAAAABAD+/wQAAgAAAAMAAQACAAEAAQACAAMAAQD//wIAAAD//wEAAAD9/wEA/v8BAAIA/v8CAAAA/v8AAAAA//////7//v//////AAAAAP///v/9/wMA/f8DAP3/AgD//wIA/f8GAPv/BQD7/wcA+v8EAP7/AQACAAIAAAAAAP//AQAAAPz/AgD8/wMA/P8BAP//AAD/////AAAAAP/////9/////f/+//3//f/+/////P8BAP///f8GAPj/CAD4/wMAAAD+/wQA/P8EAAEAAwD//wMA//8FAAAA//8DAPr/AgD7/wEA/v/+/wEA/f8CAP7/AAD///3/BAD8/wEA/f/+/wIAAAD//wEAAQD+/wQA/v8EAP3/BwD+/wIAAwD//wUA/f8HAPr/BwD+/wIAAQAAAAEAAQACAPz/AwD9/wEA/P8DAPj/BAD6/wIA/P8CAP7/AAAAAP//AAACAP//AwD///7/BgD4/wIAAAD7/wQA+v8EAP3/BAD8/wIAAwD+/wMA+v8DAP//AAAGAPb/CgD1/wcA+/8AAP3////8/wQA+/8DAP3//////wAAAAABAP7/AQD//wQA/P8JAPf/CwD2/woA+P8HAP//AAACAPz/AgAAAAAAAQD9/wMA/P8CAP7///8CAPz/AwD+/////////wEA/v8FAPj/CAD6/wUA/v8FAP//BAD+/wEAAAD//wIA//8EAP7/BQD//wQAAAD+/wUA//8CAAAAAgD+/wEAAQD8/wYA9v8IAPv/AAD/////AQAAAAEA//8BAAQA+/8FAP//AAACAP7/AAABAAIA/P8HAPj/BgD5/wIA/v8CAP7/AQD8/wMA/P8DAP7/AgABAAEA/v8AAAMA/v8BAP7//v8BAP3/AAD+/wAA//8CAPz/AgAAAP7/BAD9/wUA//8AAAMA//8CAAIA//8CAAEA/f8DAPz/AQACAPz/AQADAPz/BQD8/wcA+f8LAPb/CgD5/wcA/P8AAAEAAAAAAAIA//8CAP//AQABAAAA///+//3/AQD+//7/AwD9/wIAAAD//wMA/v8BAAIA/f8GAPj/CAD8/wMAAwD+/wMAAQD9/wUA/P8FAPr/BQD4/wcA/P8AAAUA+f8HAP3/AAAFAPn/AwD+/wEA/v8EAPj/BwD6/wAAAgD9/wIA/P8AAAEAAQAAAAIAAQD//wMAAQABAAIA//8EAPz/BAD7/wYA+v8FAPr/AAABAP7/AgD9/wAA//8BAAAAAAAAAAAAAQABAAIA/f8FAP3/BQD+/wIA/f8DAAAA//8BAP7/AQAAAAIA/f8CAP7/AQACAPv/BgD5/wQA/P8BAAAA/f8CAPr/AwD9/wEA+v8DAPv/AQD8/wIA+f8CAPv/AgAAAPz/AwD9/wAAAQD//wEAAAADAP3/BwD4/woA+v8HAPz/AwD//wEAAQACAPz/AwD+/wEA//8EAPn/CgD2/wYA+f8DAAAAAgD8/////////wIA/f8AAP//AQD9/wIAAAD+/wMA/f8EAP//AwD9/wMAAAD//wUA+/8DAP///P8EAPv/BQD7/wMA/P8CAP//AQD//wAA/////wAAAwD9/wIAAAAAAAIA/P8CAAAA/v8BAP7/AAAAAAAA/v8BAAEAAQAAAAEA//8DAAAAAAAFAP3/BQD8/wIAAgD7/wMA/f/+/wMA/v///wQA9/8KAPj/CAD6/wcA/f///wYA9f8JAPj/BAD9/wIA+v8GAPn/BAD+//z/AgD6/wQA+f8FAPv/AQAAAP3/BAD6/wUA/P8CAPz/AwD9/wMA+v8CAP3/AAAAAP3/AAD+/wMA/P8EAPz/BAABAP7/BgD9/wMAAQAAAAEAAQABAP//AQD+/wQAAAD//wAAAQABAAIAAQD+/wMA/v8DAAAAAQACAP//AAD//wIA/v8AAAIA/v8DAP7//v8DAP3/AAABAP7/AQD+/wIA//8EAP//AQD//wEAAgD+/wgA+P8GAP3///8IAPX/CgD3/wUA///8/wcA+v8FAPv/BAD+/wAAAgD+/wEA/f8BAP//AgD9/wMA/f8EAP//AwAAAAEA/v8CAP7/AQAAAP//AQAAAAEAAQAEAPz/AwD+/wAAAAD//wAA///9/////v/+/wEA/v/+/wAA/v8DAPv/AwD8/wEAAQD9/wMA/f8CAP////8BAAAA//8BAPz/AgD8/wcA+P8FAP3//v8GAP3/AAABAP//AgD//wIAAgABAP//AgD+/wUA/f8BAAAAAQD//wAA/f8BAP7/AgD//wEAAgD+/wIAAQACAAEAAAAFAP7/BQD+/wQA/v8FAP3/BAD//wEAAQAAAAIAAgD+/wIAAAABAAEAAAAAAP3/AwD7/wYA+f8DAPn/BwD6/wIA/v/8/wMA/P/+/wIA+/8DAP3/AgD//wEA/v8DAPz/BgD7/wMAAwD6/wkA9/8GAPz/BQD9/wQA+/8EAPz/AwD9/wAAAAD//wAAAQD//wEA+/8DAP3/AAABAPn/BgD4/wQA+f8EAPv/AwD+/wEA/P8HAPj/BwD7/wAAAgD//wMA//8BAAIAAAABAAEA//8BAAEA+/8GAP7//f8FAPn/BAD+/wEA/P8FAPv/BAD9/wMAAAACAP7/AAD//wEAAwD+/wAAAgD7/wcA+P8GAP3/AQAFAPz/CQD4/wgA//8CAAAABAAAAAMABAD//wYA/v8DAAIAAwD+/wUA/f8AAAIA+v8FAPz/AwD+/wEA+/8DAPz/AgD///7/AQD+//////////7/AQD8/wEAAAD9/wIA/f8CAAEA/P8EAPv/AgD+/wEA/f/+/////f8AAAAA/P8BAPz/AgD7/wMA+/8DAPr/BQD1/wYA+f8DAPz/AQAAAPz/BQD7/wQA///+/wMA//8EAPv/BgD6/wsA9/8IAP3/AwD+/wAA/P8FAP3/AwD//wEAAgACAAMAAQADAAEAAwABAAIAAQAEAPv/BAD7/wUA//8AAAEA/v8EAPz/CAD4/wUAAAD//wgA9/8HAPv/AgACAP3///8EAPv/BQD8/wEA/v8AAAEA/v8BAP3/AQACAP7/AQD//wMAAAACAP//AgAAAAEAAQABAP7/BQD6/wcA+v8FAPv/BQD8/wMA/P8AAAAA//8DAPv/BQD4/wUA/P8BAP7/AAD+/wEA+v8DAPv/AwD7/wMA+f8DAP3///8AAAAA/P8DAPv//v8BAP///f8DAPj/BQD7/wEA/v///wEA/P8AAP7/AQD+/wMA/P8EAP3/AwD//wIAAAABAAUA+/8IAPz/BQABAAEA//8CAP3/BwD7/wUAAQD+/wgA+f8GAAMA+/8JAPr/BgABAAAABQD//wUAAgADAAIABQD9/wkA//8HAP//BgD9/wgA//8BAAQA/v8DAAAAAQACAP7/AwD//wEAAQD8/wEAAAABAAAA/P8FAP7//v8EAPj/BwD7/wEAAgD8/wQA+f8FAPj/BwD6/wMA/v///wAA//8AAP3/AQD//wEA/f8CAPz/AQD///v/AwD2/wgA9/8DAPv/AAAAAAAA//8CAP7/AgD//wEAAwD+/wIAAgABAAMAAQAAAAQA//8FAAAABAD+/wQAAQAAAAYA/P8GAP//AgABAAAABgD9/wUA/P8DAP7/AQADAPr/BwD2/wYA+/8BAP7//v8AAP3//////wAA/v8CAP7/AQAAAAEA/v8DAP3/BAD//wIAAAAEAP7/BAD+/wIAAAABAAMAAAAEAP//BAD+/wQA//8DAAMA//8DAAEABQD9/wYA/f8EAAAABAD8/wkA/P8FAAMA/v8BAAQA+v8JAPr/BQABAAAAAAAFAPv/CwD4/wkA/f8CAAMAAAD//wIA/f8DAAIA+v8GAPv/AgABAPz/BgD5/wYA/P8EAP3/AgD//wMA/v8CAP////8AAAAA//8BAP//AAABAP//AQAAAP3/AwD9/wAA///9/wAAAAABAP7/AwD//wIAAgAAAAAABAD9/wMA/////wMA/v8AAP7/AQD8/wQA/f///wIA//8AAAIAAAD8/wUA+P8JAPr/BAD8/wEAAAD+/wIAAAAAAAQA/f8FAP7/AQABAP//BAD+/wAA///9/wQA/P8FAPz/BwD5/wkA+f8EAP////8EAP3/AwD+/wEAAQD8/wEA///9/wQA/v///wIA+/8GAPz/BQD9/wQAAAACAAEAAQACAAEAAQD+/wUA/P8EAP//AQAAAP7/AwD9/wMA/v8DAP//AQAAAP7/BQD6/wYA/f8AAAAAAAACAP3/AwD6/wcA+f8GAPz/AQAAAP7/AgD7/wcA+v8EAP7///8DAPz/BAD+/wMAAAD9/wQA/P8CAP3/AwD8////AwD3/wkA9f8GAP7//f8CAPr/AwD8/wEAAQD+/wAAAQAAAAMA+/8EAPz/AQABAPn/BwD3/wUA+/8AAP////8BAP7/AgD+/wIAAAD//wEA/v///wEA/v8CAP3/AgD9/wEA/v8EAAEAAgACAAAAAwABAP7/AwD9/wcA+f8JAPj/BQD7/wIA/f8DAPz/BQD8/wMA/v8DAP7/AQAAAAAAAAAAAAAAAAABAP3/AgD//wAAAgD//wAAAAABAP//AgD7/wUA+/8DAAAA/f8EAPv/AAAAAP7/AQABAPz/BAD7/wQAAAADAP3/BgD7/wQAAAD9/wQA/f8CAAEA//8AAAQA/f8EAP3/BAD+/wIA/f8GAPj/CgD4/wgA+/8AAP///v///wAA+/8CAP3///////7/AQD9/wAA/f//////AAD5/wIA/v/9/wQA9/8GAP3//v8BAP7///8AAPn/BwD0/wgA+P8DAPz/BQD3/wgA+v8JAP3/BQD//wEAAwD//wQA//8DAP//AgD+/wQA+/8HAPv/BwD7/wMAAAABAAAAAQAAAAEA//8CAAMA+/8GAPj/BQD7/wEA/f8AAP///f8DAPv/BAD8/wQA///+/wMA+f8HAPj/CAD3/wcA+P8EAP3/AQD8/wIA+/8EAPj/BQD5/wAAAgD6/wcA+v8DAAIAAAAFAAAAAQAEAAEAAQADAP7/BQACAP7/AwD+/wEAAQAAAAAAAgD9/wcA+v8IAPn/BgD7/wYA/P8CAAAA/f8FAPz///8GAPn/BQD8/wAA/v8AAP//AgD///7/AQD7/wUA/f/9/wQA+f8DAAAA/v/+/wAA/P8CAPz/AgD8/wAA/v/9//3///8AAP7/AQAAAP//AAAAAAIAAQADAP//BQD+/wMAAgD+/wcA+/8GAAAAAQADAAIA//8GAP7/BAD+/wQA/f8GAP3/BwD9/wMA//8AAAIA/f8EAPr/BAD9/wAAAQD+/wEA//8AAP7/AQD//wEA/v/+/wAAAgD8/wQA+f8FAPr/BAD6/wEA///6/wQA+/8EAPv/AQAAAP3/BwD0/wkA9v8GAPz/AQAAAP//AAAAAAAAAAABAP7/AgD//wAABAD8/wUA/P8EAAIAAAADAAMAAQAEAP7/BQD+/wcA/P8HAAMA/v8HAP3/AgACAAAA//8DAAEA+/8IAPv/BAABAAMA/P8KAPn/CAD8/wQA/f8EAP3/AgAAAP//AQD+/wIA/v8BAP3/AgD+///////6/wcA9/8IAPr/AQD9/wEA/P8CAP7///8BAP///f8CAPn/AwD8//3/AwD7/wIA/f8CAP//AQD9/wMA/v8HAPj/BgD8/wQA/f8DAPv/AAABAAAA/v8AAP7/AQACAPz/AgABAP7/AgD+/wIAAAABAAAA/f8BAPv/BAD7/wYA+P8EAP7//f8HAPj/BwD8/wEAAgABAP3/BAD9/wIABQD6/wkA/f8GAAEAAAADAP7/BQD9/wcA/P8FAP7/AwAAAAIA//8CAP3/AwD8/wIA/f8CAPz/AQD+/wEA/////wAA//8CAPj/BgD5/wcA+////wAAAAD//wQA+v8CAP///v8AAP7/AAD+/////f8BAPr/BQD7//7/AQD6/wMA+/////3/AAD+/wQA+P8GAPf/AgAAAPr/BQD3/wIA/v/+///////+/wAA//8AAP3/BAD6/wQA/P8BAAIA//8CAAAAAgD+/wQA//8BAAEAAAACAP7/BQD6/wgA+/8FAPr/AwD//wIAAAAAAAAA/v8DAP3/AgABAP//AgD+/wIA/v8BAP3///8BAP//AgAAAAEAAAACAAIAAAABAAUA+f8MAPX/CAAAAP7/BQD+////AwD8/wUA+/8EAPj/BgD7/wQA///8/wMA/P///wMA+f8EAP3/AAACAPz/AgD9//7/AwD6/wEAAAD9/wMA/P8CAP3/AwD8/wIA/v///wIA/P8CAP7/AwD6/wMA/f8CAP7/AAD5/wMA+/8AAP3//v/9///////9/wAA+/8BAP//AAABAAAAAAAAAAEA/v8CAAAA/f8DAPr/BQD9/wEAAQD9/wMA/v8CAPz/AgD7/wYA/P8AAP//AQD9/wgA9/8IAPv/AgD//wAA/v8DAP3/AQAAAAAA/v8DAP3/BAABAP3/BQD9/wYA/P8GAP3/AwAAAAIAAwD//wMAAQACAP7/AAABAPz/BQD6/wUA/f///wEA/v8EAP//AQAAAAAA//8EAPz/BAD+/wEAAAABAAAAAQACAPz/BAD9/wAAAQD9/wQA/P8BAP//AQABAP3/AQD//wAAAQD9/wEA+/8EAPr/AAD7/wAA/f8BAP///P8DAPv/BQD8/wIAAQD//wIA/f8CAP3/BAD9/wAAAAAAAPz/AgD7/wIA+/8FAPf/CQD0/woA9P8JAPj/BQD+/wAA///+////AwABAPz/BQD3/wcA+P8FAP///P8DAPv/AwABAP7/BAD7/wgA/P8GAP3/AwABAAAAAgABAAAAAgD+/wIAAAD+/wAAAQD9/wMA/v8BAP//AwD+/wQA//8CAAAAAwD//wEA/v///wIA/v///wEA+v8HAPn/AgAAAPz/BAD7/wUA/v8DAAIAAgAAAAIA//8DAAEABAD//wUA/f8FAPz/BwD9/wAAAwD7/wkA+f8HAPr/BgD9/wEAAwD+/wQA/P8DAPz/AgD+//3/AAD5/wAA/v/9/wEA+/8DAPr/AgD9/wIA/v8DAP3/AwAAAAAAAQAAAAEA//8CAP3/BAD///7/AgD///7/AwD2/wYA+P8DAPv/AQD9/wIA/f8CAP7/BAD8/wUA//8BAAQA+/8FAAAA/f8GAPr/AgACAAAAAAAEAPz/BAAAAAEAAgADAAAABgD+/wcAAgD//wQA/P8GAP7/AwD8/////////wMA/P8CAPz/AgABAAAAAAACAAIAAAADAP//BAAAAAMA/v8GAPz/BwD9/wEAAAABAP//BAD6/wYA/P8DAAUA/P8DAAIAAAD+/wMA/f8EAP////8BAP//BQD6/wQA/P8BAP///f8DAP7/AwD+/wMAAAABAAIA/v8FAAIA//8HAPr/CAD8/wMAAAAAAAEAAgACAAIAAQD//wQAAAADAP//AgD//wMA//8CAPz/AQD//wEA/f8CAP3/AQAAAP7/BAD9////BQD6/wgA/P8CAP7/AgD9/wQA//8BAAAAAgD+/wIA/v8DAPz/BAD8/wAA///+/wIA/f///wEA/v8DAPz/AgD9/wIA/P8EAP3/BwD5/wUA//8AAAQA/f8DAAIA/f8FAP3/AgAAAAAAAwABAP//BAD8/wcA/P8JAPz/BwD//wIAAQAAAAEAAwD8/wcA9v8LAPn/AwAAAP7/BAD9/wIA/f8GAP3/AQACAP7/BgD7/wYAAAD+/wYA+v8DAAAA/v8EAPv/AQD8/wMA+v8FAPz/AQD//wIA/f8EAP//AQD//wQA+P8LAPn/BAD//wAAAgAAAAAAAgD//wAABAD8/wgA+f8IAP7/AQAFAP3/CAD//wIABgD8/woA+v8IAP7/AQAEAAEAAAAFAPz/BgD6/wUA+v8DAAAA//8CAP3//v8DAPv/BQD5/wUA+v8FAPv/BQD6/wQA/P8AAP7///8AAAAA/v8BAP7/AQD9/wIA/v8AAAMAAAD+/wUA/v8DAP3/BAD5/wYA9/8FAPv/AgD+//z/AAD//wEAAQD9/wAAAgD//wIA//8AAAUA/f8FAP//AAAGAPf/CwD7/wQAAgAAAAEAAQD//wMAAQACAP7/BgD9/wcA/f8FAP//AAADAP3/BQD+/wAAAAABAP///v/////////+//z/AgD7/wIA/////wEA/v8EAP3/AQD+/wEA/f8DAPv/BAD//wAAAgD//wEA/v///wQA+v8IAPj/BAABAP3/BQD6/wUA/v8CAP//AAAAAP3/AgD+//7/AAD9/wEAAAD9/wEAAAD+/wMA/P8CAAAAAQD+/wAAAgD6/wgA9f8GAPn/BQD7/wIA+v8EAP3///8BAPv/BQD6/wMA/P8BAPz/AQD+///////8/wEA/P8BAP7///8AAPn/BQD4/wEAAAD3/wUA+f8BAP//AgD+/wMA/P8DAPv/AgABAP//AgD+////AgD+////AQAAAAAAAQD+/wEAAQAAAAEAAgD//wUA//8CAAMA/////wIA/P8CAAEA+f8GAPn/BQD6/wMAAAD//wAA/v8DAP3/BQD7/wIA//8AAAEA/P8DAPn/AwD7/wAA/P8AAPr////9//7/AAD7//7//v/9//7//v/9/wEA/v////7/AQD9/wAA/P8CAP///v8CAPn/AgD//wEA/v8BAP3/AgABAPz/AwD/////AAD+/wAAAgD6/wUA+P8CAP7/+/////7/AAD9/wIA+/8CAP//AgD+/wEAAAABAAAAAAAAAAMA/v8DAP//AgAAAAAAAAD9/wQA+/8CAPr/AAD7/wEA+v8BAAAA+/8FAPj/BAD+/wEAAgD//wIA/P8CAP7/AAABAP3//v8CAPz/BgD9/wAAAQAAAAAA//8CAP7/AwD/////AQD///7/BgD9/wEAAAAAAP//AwD8/wIA/v8AAP//AgD///7/BAD7/wYA/f8DAP7/AAAAAP//AgD8/wMA+v8DAPv/AwD8/wIA+/8AAP//AAD+/wEA/P8BAAAA/v8AAAMA/P8AAAAA//8DAP3/AAACAP//AQD+//7/AgD9/wAA/v8BAP3/AwD8/wIA/v8DAP//AwAAAAMAAAAAAAEA/v8CAAEA/v8DAP7/AgD//wEAAwD+/wUA/P8FAAAAAAABAP//AgD+/wQA/f8GAP3/AAADAP7/AAABAPv/BAD///3/AgD///7/AwD8/wUA/P8DAP3/BQD6/wYA+v8HAPr/BQD9//z/BQD9/wMA//8AAAAAAQD9/wEAAgD+/wQA/P8FAPz/AwD//wEAAgABAAEAAQABAAEAAwD9/wQA/v8BAAUA/P8GAP///v8HAPv/BgABAP7/BwD7/wUA//8DAAAAAAD//wIA/v8EAP3/AgD+/wQA/P8EAP3/AAAEAPz/AwD//wAAAwD9/wMA/v8EAP7/BAD9//7/AwD7/wUA+/8CAPz/AAD///3/AAAAAPz/BAD6/wMA/f8CAP7/BgD5/wUA/P8DAP3/AwD/////BQD8/wIAAgD8/wYA+/8FAP3/AgD+/wAAAAD//wIA//8AAAEAAAAEAAEA/v8GAPz/BgD9/wMAAgABAAMA/v8EAAAAAQABAAEAAAAAAAMAAQADAAAAAwD//wMA/P8FAP7/AQABAP////8FAPz/AwD/////AgABAAMA//8DAP7/AwD//wQA+/8JAPX/CgD6/wIAAgD9////AQD6/wcA9v8EAPr/BQD8/wQA/P8EAP7/BgD8/wgA+/8HAPr/CAD7/wYA/P8CAP//AQABAP///v8EAPz/AgADAP7/AgD//wIA/v8FAPr/BgD9/wUA/f8DAP7/AQD/////AQD+/wEA///9/wEA///9/wIA/f8DAP3/AwD9/wIAAAD9/wIA/f8CAP7/AAAAAAEAAAD//wIA//8CAP////8AAAIA/v8EAPz/BAD+/wMAAgABAAEAAAACAP7/BAD8/wAAAwD9/wEA/v8AAAAAAAAAAP//BgD7/wUA/f8BAAQAAAAAAAcA/P8DAP7/AwD//wQA/P8FAP3/BAD9/wAAAQD9/wQA/P8DAP7/AgACAP7/BAD+/wIA//8CAPz/BAD9/wQA/f8AAP////8CAP3/AwD7/wcA+v8GAPv/BQD9/wMA/v8AAP7/BAD9/wMA/v8AAAEA/////wAAAAD//wMA/P8DAP3/AwAAAAAAAQADAP3/AwD//wEAAwD+/wMAAAAAAAIA/////wAA/f8EAPv/BAD+/wAAAQABAAEAAwD+/wAABAD9/wYA//8EAP7/AwD7/wYA+/8DAPz/AQD//wEA/f8GAP3/AQACAP3/AwD//wIA//8BAAIA/f8HAPf/BgD7/wIA/v8AAP7/AQD9/wIA/v8EAPv/BgD6/wYA/v8BAAEA/P8AAAEA//8BAAEA/v8EAP7/AQACAP7/BAD//wQA+/8HAPj/BwD7/wUA/v8CAP//BAD//wAAAQD//wIAAgD//wAA/P8FAPn/CQD3/wcA/f8CAAIA/v8DAP7/AwD+/wAABAD7/wYA/f8AAAEA/v/9/wMA/P/+/wIA+/8CAP3/AAD9/wEAAAD7/wQA/P8CAPr/BQD5/wYA+f8HAP7///8DAAAA//8FAP3/BAAAAAAAAwABAAAAAgABAAAABAD+/wAABAD+/wIAAAAAAAIA///+/wIA//8AAP///P8EAP3/BQD8/wMA/v8DAAEAAAAAAAIA///+/wEA/f8BAP7///////z/AAAAAP7/////////AwD9/wIA/////wEA/v8CAP3/AwD+/wEAAQD8/wQA+/8DAP7/AgD9/wAA/////wIA+/8HAPf/BgD8/wEABAD//wAAAwAAAAQAAAABAAEAAQABAP7/BAD9/wEAAAAAAP//BAD7/wUA/f8DAP//AwD9/wIA/////wQA/P8CAP7/AgD+/wEA//8AAP7/BAD+/wEAAgD7/wQA/f8CAAIA/P8EAPv/BwD7/wYA/P8EAP7/AAABAP3/AwD8/wEA/v////3/AwD8/wcA+v8DAAEA+v8JAPb/BgD8//v/BgD4/wYA+v8BAP7/AwD5/wYA+/8DAP3/AgD+/wYA+/8EAP//AgACAP7///8FAP3/BQD+/wMAAQD//wQA+v8GAP7/AQAFAPv/BQAAAP//AgD//wAAAgD+/wAAAAD//wEA/f8BAP3/AAD+//7/AAABAP3/AAAAAP7/AgD///7/BQD5/wcA+f8GAPj/BQD7/wIA/v/9/wMA/f8DAP3/AQACAP//AQD+/wEAAgD+/wEA/////wEAAQD9/wQA/f8DAPv/BwD3/wsA9f8IAPn/AwD9/wEAAQD+//7/BQD8/wMA///+/wMA/f8CAP7/AwACAPv/CQD3/wcA/f8AAAEA/v8DAAEA/f8BAAAA/f8DAPv/AAAAAPz/AQD///7/AAAAAP////8BAP3/AgAAAPz/BAD5/wgA+P8GAP////8GAPv/BAAAAAIA/v8GAPn/CQD5/wUA/f8DAP3/BAD9/wIAAQD7/wYA+P8GAP7//v8BAP//BQD5/wcA+v8EAAAA/P8FAPv/BAD7/wIA/f8AAP7/BAD7/wMA+/8BAPz//v8CAPn/BAD5/wMA+/8CAP7//v8CAPz/AwD//wAAAQD//wEAAAACAAMA/v8CAPv/AwD8/wIA+/8CAAAAAgD+/wEA/v8CAP7//////wAA/P8DAPr/BAD8/wQA+/8GAPv/BAD+/wMAAQAAAAQAAAACAAIA/P8FAP7/AAAFAPz/CQD+/wAABAAAAAMA//8AAP//BQD8/wUA+/8EAPz/AAD7/wIA/v///wAA/v/+/wAA///+//7/AQD7/wEA/v/8/wEA+/8FAPv/BAD6/wIA///+/wMA/f8DAPz/AgD+/wQA/f8AAP//AAD///////8AAP7/AAD///7//v8AAP3/AQD+//7/AwD7/wEA/v8AAAAA/v8DAPz/BQD6/wQA/f8AAAIA//8EAPr/BAD//wEABAD9/wQA//8CAAEAAAACAAMA/v8JAPz/BwAAAP//BAD//wQA+/8FAPz/BAD+/wMA/v8CAAEA/P8FAP7/AQD8/wMA+////wEA+P8HAPn/AwD8/wAAAAD//wEA/v8EAPz/BAD7/wIA/v////7/AgD7/wQA+P8DAPv/AQD8//7//f/9/////f/8////+//9/wAA/P///////v/8/wQA/P8DAP////8BAAAA//8FAP7/BQAAAAIAAQACAAEAAwD//wEAAQABAAAAAAAGAP3/BgD7/wcA/v8CAP///////wEA/v8CAAAA//8DAAEA/v8HAPr/BgD+/wAAAAD9/wEA//8BAP3/AgD+/wMA/v8BAP7//////wIA/f///wIA+/8GAP3/AgAAAAEA/P8FAPv/BAD+/wIAAgD+/wQA//8BAAIAAQAEAPv/BgD7/wcA+f8FAPz///8DAPr/BQAAAP//AAABAAEA/v8DAP3/AQAAAAIA+v8GAPz//f8DAPr/AgD///3/AQD+//z/AwD6/wUA+f8DAP3/AgD+/wEAAAD//wAA/f8DAPn/CAD0/woA+P8CAP///v//////////////AgAAAAEAAQACAAAAAAAFAP7/AwACAAIAAQABAAMA/v8EAP3/BAACAP7/AwD+/wQAAgAAAAMA//8DAAEA/v8EAPv/BAD7/wUA/v8CAP7//////wAA/v///wAAAQD8/wEA+/8DAPr/AwD9/wIA//8BAAAAAQD//wEAAgD8/wUA/P8BAAIA+v8HAPr/CQD7/wUAAwD9/wgA+/8FAP7/AgABAAIA//8FAPz/CwD4/wwA+v8DAAMA+f8JAPn/BwD6/wYA/P8CAAIA/v8EAP7/AwD/////AAAAAAAAAQD+/wIA/v8CAP//AQD//wAA//8AAP7/AgD9/wEAAQD9/wAAAQAAAAIA/v8AAAAAAwD8/wQA/P8GAPr/BgD8/wYA/P8=\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 147_003_1675_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "################################################################################\n", + " SUBJECT: 149\n", + "################################################################################\n", + "\n", + "============================================================\n", + "File ID: 149_003_0927\n", + "Original Audio: 149_003_0927.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 149_003_0927_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 149_003_2332\n", + "Original Audio: 149_003_2332.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 149_003_2332_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "============================================================\n", + "File ID: 149_003_2621\n", + "Original Audio: 149_003_2621.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Synthesized Audio: 149_003_2621_500000step.wav\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], + "source": [ + "# Display Results\n", + "current_subject = None\n", + "print(\"-\"*60)\n", + "print(\"g_500000 Inference Results\")\n", + "print(\"-\"*60)\n", + "OUTPUT_DIR = os.path.join(PROJECT_DIR, \"output/g_500000\")\n", + "\n", + "\n", + "for pair in file_pairs:\n", + " if pair['subject'] != current_subject:\n", + " current_subject = pair['subject']\n", + " print(\"\\n\" + \"#\"*80)\n", + " print(f\" SUBJECT: {current_subject}\")\n", + " print(\"#\"*80 + \"\\n\")\n", + "\n", + " input_base = os.path.basename(pair['pt']).replace(\"_preprocessed.pt\", \"\")\n", + " # Based on inference_unit2a.py logic, if checkpoint has no numbers, suffix is 'unknown_step'\n", + " output_filename = f\"{input_base}_500000step.wav\"\n", + " output_path = os.path.join(OUTPUT_DIR, output_filename)\n", + " \n", + " print(\"=\"*60)\n", + " print(f\"File ID: {input_base}\")\n", + " \n", + " # 1. Original Audio\n", + " print(f\"Original Audio: {os.path.basename(pair['wav'])}\")\n", + " ipd.display(ipd.Audio(pair['wav'], rate=16000))\n", + " \n", + " # 2. Synthesized Audio\n", + " if os.path.exists(output_path):\n", + " print(f\"Synthesized Audio: {output_filename}\")\n", + " ipd.display(ipd.Audio(output_path, rate=16000))\n", + " else:\n", + " print(f\"generated file not found at: {output_path}\")\n", + " print(\"\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gyucheol", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/sample_utut.ipynb b/notebooks/sample_utut.ipynb new file mode 100644 index 0000000..4306543 --- /dev/null +++ b/notebooks/sample_utut.ipynb @@ -0,0 +1,682 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a0b0c0d0", + "metadata": {}, + "source": [ + "# Unit-to-Unit Translation Inference (EN → KO)\n", + "\n", + "**Pipeline**: EN unit (text) → UTUT Translation → KO unit (predicted) → CodeHiFiGAN Vocoder → Waveform\n", + "\n", + "**Ground Truth**: KO unit (text) & KO WAV for comparison\n", + "\n", + "**Data**: `aihub_a2a_unit` (en/ko unit text) + `aihub_a2a_wav` (ko wav)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a1b1c1d1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/2022113135/.conda/envs/gyucheol/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "import json\n", + "import subprocess\n", + "import numpy as np\n", + "import torch\n", + "import soundfile as sf\n", + "import IPython.display as ipd\n", + "from collections import defaultdict\n", + "\n", + "# Project paths\n", + "ROOT_DIR = \"/home/2022113135\"\n", + "PROJECT_DIR = os.path.join(ROOT_DIR, \"gyucheol/NetfLips/av2av-main\")\n", + "JJS_DIR = os.path.join(ROOT_DIR, \"jjs/av2av\")\n", + "\n", + "INFERENCE_SCRIPT = os.path.join(PROJECT_DIR, \"inference_unit2a.py\")\n", + "\n", + "sys.path.insert(0, PROJECT_DIR)\n", + "sys.path.insert(0, JJS_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a2b2c2d2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Device: cuda\n", + "UTUT checkpoint: /home/2022113135/jjs/av2av/unit2unit/utut_finetune/utut_additional_ckpt/unit_mbart_multilingual_ft/en_ko/checkpoint_best.pt\n", + "Vocoder checkpoint: /home/2022113135/gyucheol/NetfLips/av2av-main/unit2av/checkpoint/zeroth-hubert/g_00500000\n" + ] + } + ], + "source": [ + "# ============================================================\n", + "# Configuration\n", + "# ============================================================\n", + "\n", + "# --- Data Paths ---\n", + "EN_WAV_DIR = os.path.join(ROOT_DIR, \"datasets/aihub_a2a_wav/test/en\") # SOURCE\n", + "EN_UNIT_DIR = os.path.join(ROOT_DIR, \"datasets/aihub_a2a_unit/test/en\")\n", + "KO_UNIT_DIR = os.path.join(ROOT_DIR, \"datasets/aihub_a2a_unit/test/ko\") # GT\n", + "KO_WAV_DIR = os.path.join(ROOT_DIR, \"datasets/aihub_a2a_wav/test/ko\") # GT\n", + "\n", + "# --- UTUT (Unit-to-Unit Translation) ---\n", + "UTUT_CHECKPOINT = os.path.join(\n", + " JJS_DIR, \"unit2unit/utut_finetune/utut_additional_ckpt/unit_mbart_multilingual_ft/en_ko/checkpoint_best.pt\"\n", + ")\n", + "SRC_LANG = \"en\"\n", + "TGT_LANG = \"ko\"\n", + "\n", + "# --- Vocoder (CodeHiFiGAN) ---\n", + "VOCODER_CHECKPOINT = os.path.join(PROJECT_DIR, \"unit2av/checkpoint/zeroth-hubert/g_00500000\")\n", + "VOCODER_CONFIG = os.path.join(PROJECT_DIR, \"unit2av/checkpoint/zeroth-hubert/config.json\")\n", + "\n", + "# --- Speaker Encoder ---\n", + "SPEAKER_ENCODER_PATH = os.path.join(PROJECT_DIR, \"unit2av/encoder.pt\")\n", + "\n", + "# --- Output ---\n", + "OUTPUT_DIR = os.path.join(PROJECT_DIR, \"output/unit2unit_inference\")\n", + "os.makedirs(OUTPUT_DIR, exist_ok=True)\n", + "\n", + "# --- Inference settings ---\n", + "MAX_SAMPLES_PER_SUBJECT = 1 # max samples to display per subject\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "print(f\"Device: {DEVICE}\")\n", + "print(f\"UTUT checkpoint: {UTUT_CHECKPOINT}\")\n", + "print(f\"Vocoder checkpoint: {VOCODER_CHECKPOINT}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a3b3c3d3", + "metadata": {}, + "source": [ + "## 1. Find All Subjects & Build File Triplets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a4b4c4d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total triplets found: 9412\n", + "Total subjects: 32\n", + "\n", + "Subjects (count):\n", + " et_c_005: 27 files\n", + " et_c_010: 4 files\n", + " et_c_012: 58 files\n", + " et_c_014: 106 files\n", + " et_k_003: 3173 files\n", + " et_k_004: 408 files\n", + " et_m_008: 328 files\n", + " et_m_010: 714 files\n", + " iv_K_018: 11 files\n", + " iv_k_018: 1939 files\n", + " iv_k_019: 381 files\n", + " iv_m_001: 712 files\n", + " md_c_001: 30 files\n", + " md_c_002: 29 files\n", + " md_c_007: 46 files\n", + " md_c_012: 24 files\n", + " md_c_013: 22 files\n", + " md_c_016: 30 files\n", + " md_c_018: 44 files\n", + " md_k_018: 52 files\n", + " md_p_001: 343 files\n", + " md_s_003: 46 files\n", + " md_s_005: 49 files\n", + " md_s_006: 48 files\n", + " md_s_007: 35 files\n", + " md_s_008: 32 files\n", + " md_s_010: 80 files\n", + " md_s_011: 40 files\n", + " md_s_014: 30 files\n", + " md_s_016: 42 files\n", + " md_t_001: 251 files\n", + " md_t_002: 278 files\n" + ] + } + ], + "source": [ + "# Scan EN unit dir and find matching KO unit + KO wav triplets\n", + "en_files = sorted([f for f in os.listdir(EN_UNIT_DIR) if f.endswith('.txt')])\n", + "\n", + "triplets = [] # list of dicts\n", + "subject_map = defaultdict(list) # subject -> list of triplet indices\n", + "\n", + "for fname in en_files:\n", + " base = fname[:-4] # strip .txt\n", + " en_wav_path = os.path.join(EN_WAV_DIR, base + \"_en.wav\")\n", + " en_unit_path = os.path.join(EN_UNIT_DIR, fname)\n", + " ko_unit_path = os.path.join(KO_UNIT_DIR, fname) # same filename\n", + " ko_wav_path = os.path.join(KO_WAV_DIR, base + \".wav\") # .txt -> .wav\n", + "\n", + " if not os.path.exists(ko_unit_path):\n", + " continue\n", + " if not os.path.exists(ko_wav_path):\n", + " continue\n", + "\n", + " # Extract subject: e.g. 'et_c_005' from 'et_c_005_002_009_0026'\n", + " parts = base.split('_')\n", + " subject = '_'.join(parts[:3]) # et_c_005, et_k_003, ...\n", + "\n", + " triplets.append({\n", + " \"id\": base,\n", + " \"subject\": subject,\n", + " \"en_wav_path\": en_wav_path,\n", + " \"en_unit_path\": en_unit_path,\n", + " \"ko_unit_path\": ko_unit_path,\n", + " \"ko_wav_path\": ko_wav_path,\n", + " })\n", + " subject_map[subject].append(len(triplets) - 1)\n", + "\n", + "subjects = sorted(subject_map.keys())\n", + "\n", + "print(f\"Total triplets found: {len(triplets)}\")\n", + "print(f\"Total subjects: {len(subjects)}\")\n", + "print(f\"\\nSubjects (count):\")\n", + "for s in subjects:\n", + " print(f\" {s}: {len(subject_map[s])} files\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a5b5c5d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Selected 32 samples for inference (1 per subject)\n" + ] + } + ], + "source": [ + "# Select a subset: up to MAX_SAMPLES_PER_SUBJECT per subject\n", + "selected_triplets = []\n", + "for s in subjects:\n", + " indices = subject_map[s][:MAX_SAMPLES_PER_SUBJECT]\n", + " for idx in indices:\n", + " selected_triplets.append(triplets[idx])\n", + "\n", + "print(f\"Selected {len(selected_triplets)} samples for inference ({MAX_SAMPLES_PER_SUBJECT} per subject)\")" + ] + }, + { + "cell_type": "markdown", + "id": "a6b6c6d6", + "metadata": {}, + "source": [ + "## 2. Load Models\n", + "\n", + "- **UTUT**: loaded in-process (fairseq)\n", + "- **Speaker Encoder**: loaded in-process (to extract speaker embedding → `.pt`)\n", + "- **Vocoder**: called via `inference_unit2a.py` subprocess" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a7b7c7d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading UTUT translation model...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2026-02-04 06:51:25 | INFO | fairseq.tasks.translation | [en] dictionary: 1004 types\n", + "2026-02-04 06:51:25 | INFO | fairseq.tasks.translation | [ko] dictionary: 1004 types\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UTUT model loaded.\n" + ] + } + ], + "source": [ + "# --- 2-1. Load UTUT Translation Model ---\n", + "from unit2unit.inference import load_model as load_utut_model\n", + "from util import process_units\n", + "from fairseq import utils\n", + "from fairseq_cli.generate import get_symbols_to_strip_from_output\n", + "\n", + "print(\"Loading UTUT translation model...\")\n", + "utut_task, utut_generator = load_utut_model(\n", + " UTUT_CHECKPOINT, SRC_LANG, TGT_LANG, use_cuda=(DEVICE == \"cuda\")\n", + ")\n", + "print(\"UTUT model loaded.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a8b8c8d8", + "metadata": {}, + "outputs": [], + "source": [ + "# # --- 2-2. Load CodeHiFiGAN Vocoder ---\n", + "# from unit2av.model import CodeHiFiGANModel_spk\n", + "# from unit2av.utils import AttrDict\n", + "\n", + "# print(f\"Loading vocoder config from {VOCODER_CONFIG}...\")\n", + "# with open(VOCODER_CONFIG) as f:\n", + "# h = AttrDict(json.loads(f.read()))\n", + "\n", + "# print(\"Initializing CodeHiFiGAN vocoder...\")\n", + "# vocoder = CodeHiFiGANModel_spk(dict(h)).to(DEVICE)\n", + "\n", + "# state_dict = torch.load(VOCODER_CHECKPOINT, map_location=DEVICE)\n", + "# if 'generator' in state_dict:\n", + "# vocoder.load_state_dict(state_dict['generator'])\n", + "# else:\n", + "# vocoder.load_state_dict(state_dict)\n", + "\n", + "# vocoder.eval()\n", + "# vocoder.remove_weight_norm()\n", + "# print(\"Vocoder loaded.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a9b9c9d9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/2022113135/gyucheol/NetfLips/av2av-main/unit2av/model_speaker_encoder.py:19: UserWarning: Unable to import 'webrtcvad'. This package enables noise removal and is recommended.\n", + " warn(\"Unable to import 'webrtcvad'. This package enables noise removal and is recommended.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading speaker encoder...\n", + "Speaker encoder loaded.\n" + ] + } + ], + "source": [ + "# --- 2-3. Load Speaker Encoder ---\n", + "from unit2av.model_speaker_encoder import SpeakerEncoder\n", + "SPEAKER_ENCODER_PATH=\"/home/2022113135/gyucheol/NetfLips/av2av-main/unit2av/encoder.pt\"\n", + "\n", + "print(\"Loading speaker encoder...\")\n", + "speaker_encoder = SpeakerEncoder(SPEAKER_ENCODER_PATH)\n", + "if DEVICE == \"cuda\":\n", + " speaker_encoder = speaker_encoder.cuda()\n", + "print(\"Speaker encoder loaded.\")" + ] + }, + { + "cell_type": "markdown", + "id": "b0b0c0d0", + "metadata": {}, + "source": [ + "## 3. Inference Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "56013a95", + "metadata": {}, + "outputs": [], + "source": [ + "def read_unit_text(path):\n", + " \"\"\"Read a unit text file and return list of int.\"\"\"\n", + " with open(path) as f:\n", + " units = list(map(int, f.readline().strip().split()))\n", + " return units\n", + "\n", + "\n", + "def translate_units(en_units, utut_task, utut_generator, use_cuda=True):\n", + " \"\"\"\n", + " Unit-to-Unit Translation: EN units -> KO units.\n", + " Returns predicted unit string and list of ints.\n", + " \"\"\"\n", + " # Reduce consecutive duplicates and encode\n", + " reduced = process_units(en_units, reduce=True)\n", + " unit_tensor = utut_task.source_dictionary.encode_line(\n", + " \" \".join(map(str, reduced)),\n", + " add_if_not_exist=False,\n", + " append_eos=True,\n", + " ).long()\n", + "\n", + " # Prepend BOS, append source language tag\n", + " unit_tensor = torch.cat([\n", + " unit_tensor.new([utut_task.source_dictionary.bos()]),\n", + " unit_tensor,\n", + " unit_tensor.new([utut_task.source_dictionary.index(f\"[{SRC_LANG}]\")])\n", + " ])\n", + "\n", + " sample = {\"net_input\": {\n", + " \"src_tokens\": unit_tensor.view(1, -1),\n", + " }}\n", + " if use_cuda:\n", + " sample = utils.move_to_cuda(sample)\n", + "\n", + " # Run translation\n", + " pred = utut_task.inference_step(\n", + " utut_generator,\n", + " None,\n", + " sample,\n", + " )[0][0]\n", + "\n", + " # Decode predicted tokens to unit string\n", + " pred_str = utut_task.target_dictionary.string(\n", + " pred[\"tokens\"].int().cpu(),\n", + " extra_symbols_to_ignore=get_symbols_to_strip_from_output(utut_generator)\n", + " )\n", + "\n", + " # Convert to list of int\n", + " pred_units = [int(x) for x in pred_str.strip().split() if x.isdigit()]\n", + " return pred_units, pred_str\n", + "\n", + "\n", + "def save_input_pt(units, ko_wav_path, speaker_encoder, output_pt_path):\n", + " \"\"\"\n", + " Save predicted units + speaker embedding as .pt file\n", + " for inference_unit2a.py consumption.\n", + " \"\"\"\n", + " code = torch.LongTensor(units)\n", + " spkr_embed = speaker_encoder.get_embed(ko_wav_path)\n", + " # Before (2D — causes IndexError on transpose(1,2)):\n", + " # spkr_tensor = torch.from_numpy(spkr_embed).float().view(1, -1) # (1, 256)\n", + "\n", + " # After (3D — matches what the model expects):\n", + " spkr_tensor = torch.from_numpy(spkr_embed).float().view(1, 1, -1) # (1, 1, 256)\n", + "\n", + "\n", + " torch.save({\"code\": code, \"spkr\": spkr_tensor}, output_pt_path)\n", + "\n", + "\n", + "def run_vocoder(input_pt_path, output_dir, device=\"cuda\"):\n", + " \"\"\"\n", + " Call inference_unit2a.py via subprocess to synthesize waveform.\n", + " Returns the generated wav path.\n", + " \"\"\"\n", + " command = [\n", + " \"python\", INFERENCE_SCRIPT,\n", + " \"--checkpoint\", VOCODER_CHECKPOINT,\n", + " \"--config\", VOCODER_CONFIG,\n", + " \"--input_file\", input_pt_path,\n", + " \"--output_folder\", output_dir,\n", + " \"--device\", device,\n", + " ]\n", + " result = subprocess.run(command, capture_output=True, text=True, cwd=PROJECT_DIR)\n", + " if result.returncode != 0:\n", + " print(f\" [ERROR] {result.stderr}\")\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "b2b2c2d2", + "metadata": {}, + "source": [ + "## 4. Run Inference: Unit-to-Unit Translation + Waveform Synthesis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3b3c3d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1/32] et_c_005_002_009_0026\n", + "[2/32] et_c_010_002_004_0014\n" + ] + } + ], + "source": [ + "results = []\n", + "use_cuda = (DEVICE == \"cuda\")\n", + "\n", + "# Temp dir for .pt files passed to inference_unit2a.py\n", + "PT_DIR = os.path.join(OUTPUT_DIR, \"pt_inputs\")\n", + "os.makedirs(PT_DIR, exist_ok=True)\n", + "\n", + "for i, triplet in enumerate(selected_triplets):\n", + " sample_id = triplet[\"id\"]\n", + " print(f\"[{i+1}/{len(selected_triplets)}] {sample_id}\")\n", + "\n", + " # 1) Read EN units\n", + " en_units = read_unit_text(triplet[\"en_unit_path\"])\n", + "\n", + " # 2) Read GT KO units\n", + " gt_ko_units = read_unit_text(triplet[\"ko_unit_path\"])\n", + "\n", + " # 3) UTUT Translation: EN -> KO (predicted)\n", + " pred_ko_units, pred_ko_str = translate_units(\n", + " en_units, utut_task, utut_generator, use_cuda=use_cuda\n", + " )\n", + "\n", + " # 4) Save predicted units + speaker embedding as .pt\n", + " # inference_unit2a.py strips last 13 chars (\"_preprocessed\") from basename\n", + " pt_path = os.path.join(PT_DIR, f\"{sample_id}_preprocessed.pt\")\n", + " save_input_pt(pred_ko_units, triplet[\"ko_wav_path\"], speaker_encoder, pt_path)\n", + "\n", + " # 5) Run vocoder via inference_unit2a.py subprocess\n", + " run_vocoder(pt_path, OUTPUT_DIR, device=DEVICE)\n", + "\n", + " # 6) Locate generated wav (inference_unit2a.py naming: {base}_{step}step.wav)\n", + " output_wav_path = os.path.join(OUTPUT_DIR, f\"{sample_id}_500000step.wav\")\n", + "\n", + " # 7) Save predicted unit text\n", + " output_unit_path = os.path.join(OUTPUT_DIR, f\"{sample_id}_pred_unit.txt\")\n", + " with open(output_unit_path, 'w') as f:\n", + " f.write(' '.join(map(str, pred_ko_units)))\n", + "\n", + " results.append({\n", + " **triplet,\n", + " \"en_units\": en_units,\n", + " \"gt_ko_units\": gt_ko_units,\n", + " \"pred_ko_units\": pred_ko_units,\n", + " \"synth_wav_path\": output_wav_path,\n", + " })\n", + "\n", + "print(f\"\\nDone. {len(results)} samples processed.\")\n", + "print(f\"Output directory: {OUTPUT_DIR}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b4b4c4d4", + "metadata": {}, + "source": [ + "## 5. Display Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5b5c5d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------------------------------------\n", + "Unit-to-Unit Translation Inference Results (EN -> KO)\n", + "--------------------------------------------------------------------------------\n", + "\n", + "################################################################################\n", + " SUBJECT: et_c_005\n", + "################################################################################\n", + "\n", + "======================================================================\n", + " File ID: et_c_005_002_009_0026\n", + " EN units (first 20): [501, 501, 501, 501, 501, 501, 991, 991, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501] ...\n", + "\n", + " GT KO units (first 20): [43, 843, 474, 825, 825, 825, 825, 825, 681, 359, 874, 822, 255, 416, 565, 565, 565, 565, 217, 217] ...\n", + "\n", + " Pred KO units (first 20): [501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501, 501] ...\n", + " EN unit length: 334 | GT KO: 140 | Pred KO: 199\n", + "\n", + " [Src] English Audio: et_c_005_002_009_0026.wav\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/home/2022113135/datasets/aihub_a2a_wav/test/en/et_c_005_002_009_0026.wav'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-11-abd3b397a673>\u001b[0m in \u001b[0;36m<cell line: 7>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;31m# Source EN audio\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\" [Src] English Audio: {os.path.basename(r['en_wav_path'])}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mipd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mipd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mAudio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'en_wav_path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/gyucheol/lib/python3.8/site-packages/IPython/lib/display.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, filename, url, embed, rate, autoplay, normalize, element_id)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautoplay\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautoplay\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melement_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0melement_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mAudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/gyucheol/lib/python3.8/site-packages/IPython/core/display.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, url, filename, metadata)\u001b[0m\n\u001b[1;32m 635\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetadata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 637\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 638\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/gyucheol/lib/python3.8/site-packages/IPython/lib/display.py\u001b[0m in \u001b[0;36mreload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmimetypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mAudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 124\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/gyucheol/lib/python3.8/site-packages/IPython/core/display.py\u001b[0m in \u001b[0;36mreload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[0;34m\"\"\"Reload the raw data from file or URL.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 661\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 662\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_flags\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 663\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0murl\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/home/2022113135/datasets/aihub_a2a_wav/test/en/et_c_005_002_009_0026.wav'" + ] + } + ], + "source": [ + "current_subject = None\n", + "\n", + "print(\"-\" * 80)\n", + "print(\"Unit-to-Unit Translation Inference Results (EN -> KO)\")\n", + "print(\"-\" * 80)\n", + "\n", + "for r in results:\n", + " # Subject header\n", + " if r[\"subject\"] != current_subject:\n", + " current_subject = r[\"subject\"]\n", + " print(\"\\n\" + \"#\" * 80)\n", + " print(f\" SUBJECT: {current_subject}\")\n", + " print(\"#\" * 80 + \"\\n\")\n", + "\n", + " print(\"=\" * 70)\n", + " print(f\" File ID: {r['id']}\")\n", + " print(f\" EN units (first 20): {r['en_units'][:20]} ...\")\n", + " print(f\"\\n GT KO units (first 20): {r['gt_ko_units'][:20]} ...\")\n", + " print(f\"\\n Pred KO units (first 20): {r['pred_ko_units'][:20]} ...\")\n", + " print(f\" EN unit length: {len(r['en_units'])} | GT KO: {len(r['gt_ko_units'])} | Pred KO: {len(r['pred_ko_units'])}\")\n", + " print()\n", + "\n", + " # Source EN audio\n", + " print(f\" [Src] English Audio: {os.path.basename(r['en_wav_path'])}\")\n", + " ipd.display(ipd.Audio(filename=r['en_wav_path']))\n", + "\n", + "\n", + " # GT KO audio\n", + " print(f\" [GT] Korean Audio: {os.path.basename(r['ko_wav_path'])}\")\n", + " ipd.display(ipd.Audio(r[\"ko_wav_path\"], rate=16000))\n", + "\n", + " # Synthesized audio\n", + " synth_path = r[\"synth_wav_path\"]\n", + " if os.path.exists(synth_path):\n", + " print(f\" [Pred] Synthesized Audio: {os.path.basename(synth_path)}\")\n", + " ipd.display(ipd.Audio(synth_path, rate=16000))\n", + " else:\n", + " print(f\" [Pred] Generated file not found at: {synth_path}\")\n", + "\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6fb0b71", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " <audio controls=\"controls\" >\n", + " <source src=\"data:audio/x-wav;base64,\" type=\"audio/x-wav\" />\n", + " Your browser does not support the audio element.\n", + " </audio>\n", + " " + ], + "text/plain": [ + "<IPython.lib.display.Audio object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ipd.display(ipd.Audio('/home/2022113135/datasets/aihub_a2a_wav/test/en/et_c_005_002_009_0026_en.wav'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "557b5282", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gyucheol", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6b888de --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +# --- Core Libraries (버전 고정 필수) --- +numpy<1.24 +scipy==1.10.0 +librosa==0.8.1 +resampy==0.4.3 +opencv-python==4.5.4.60 +tensorboard +tensorboardX + +# --- Audio & Video Processing --- +python-speech-features==0.6 +soundfile +av +ffmpeg-python +amfm_decompy +matplotlib +tqdm + +# --- System & Config --- +omegaconf==2.0.6 +hydra-core==1.0.7 \ No newline at end of file diff --git a/samples/en/TRajLqEaWhQ_00002.bbox.pkl b/samples/en/TRajLqEaWhQ_00002.bbox.pkl new file mode 100644 index 0000000..d337ebf Binary files /dev/null and b/samples/en/TRajLqEaWhQ_00002.bbox.pkl differ diff --git a/scripts/lip_detect/README.md b/scripts/lip_detect/README.md new file mode 100644 index 0000000..fe5bad1 --- /dev/null +++ b/scripts/lip_detect/README.md @@ -0,0 +1,91 @@ +# Lip Detection and Extraction Scripts + +Scripts for detecting faces, extracting lip regions, and managing bounding box metadata for video processing. + +## Environment Setup + +Create and activate a conda environment for the lip extraction scripts: + +```bash +conda env create -f environment.yml +conda activate lip_extraction +``` + + +## Core Extraction Scripts + +These scripts process an input video and produce two outputs: +- A `.lip.mp4` video: Cropped and resized (96x96) lip region. +- A `.bbox.pkl` file: A pickle file containing the bounding box coordinates of the detected face for each frame. + +### 1. `extract_lip_yolo.py` +Uses YOLOv8 for robust face tracking and combines it with landmark detection to isolate lips. +- **Landmark Methods**: Supports both `face-alignment` (S3FD) and `mediapipe`. +- **Speaker Tracking**: Uses Mouth Aspect Ratio (MAR) variance over time to identify and track the active speaker in multi-person videos. +- **Usage**: + ```bash + python extract_lip_yolo.py --input path/to/video.mp4 --output_dir ./output --landmark_method face_alignment --device cuda + ``` + +### 2. `extract_lip_yolo_filtered.py` +An extension of the YOLO script that adds a "speaking threshold." +- **Feature**: If the speaker's MAR variance (speaking activity) falls below `--min_speaking_threshold`, the frame is saved as black and coordinates as zeros. This is useful for pruning silent or inactive segments. +- **Usage**: + ```bash + python extract_lip_yolo_filtered.py --input path/to/vid.mp4 --output_dir ./out --min_speaking_threshold 0.01 + ``` + +### 3. `extract_lip_mediapipe.py` +Relies entirely on the MediaPipe Tasks API for face landmarker detection. +- **Feature**: Fast and lightweight. It automatically downloads the necessary `.task` model file. It selects the face with the highest MAR (most active mouth) in each frame. +- **Usage**: + ```bash + python extract_lip_mediapipe.py --input path/to/video.mp4 --output_dir ./output + ``` + +### 4. `extract_lip_s3fd.py` +Uses the `face-alignment` library (S3FD detector) to detect landmarks. +- **Feature**: Highly accurate landmarking, though slower than MediaPipe. It selects the largest detected face. +- **Usage**: + ```bash + python extract_lip_s3fd.py --input path/to/video.mp4 --output_dir ./output --device cuda + ``` + +--- + +## Visualization & Inspection + +Tools to verify the accuracy of the extraction process. + +### `visualize_bbox.py` +Overlays the bounding boxes from a `.pkl` file onto the original video to check if the detection is correct. +- **Usage**: Edit the `video_path` and `pkl_path` variables in the script and run: + ```bash + python visualize_bbox.py + ``` + +### `inspect_bbox.py` +A quick diagnostic script to print the structure and sample data of a `.bbox.pkl` file. +- **Usage**: Update the `pkl_path` in the script and run: + ```bash + python inspect_bbox.py + ``` + +--- + +## Utility & Metadata Management + +Scripts for post-processing and cleaning up metadata. + +### `edit_bbox_pickle.py` +Manually "mute" specific segments of a video by setting their bounding box data to `None`. +- **Use Case**: Removing incorrectly detected frames or segments where the speaker is not actually speaking despite being detected. +- **Usage**: Configure the `target_pickle_file` and `ranges_to_set_none` (tuple of start/end frames) in the script and run. + +### `change_numpylist_to_py_list.py` +Recursively converts numpy arrays inside all `.pkl` files in a directory into standard Python lists. +- **Use Case**: Eliminating `numpy` dependencies for downstream tasks or ensuring cross-version compatibility for pickle files. +- **Usage**: Set `TARGET_DIR` in the script and run: + ```bash + python change_numpylist_to_py_list.py + ``` \ No newline at end of file diff --git a/scripts/lip_detect/change_numpylist_to_py_list.py b/scripts/lip_detect/change_numpylist_to_py_list.py new file mode 100644 index 0000000..e2ceb07 --- /dev/null +++ b/scripts/lip_detect/change_numpylist_to_py_list.py @@ -0,0 +1,65 @@ +import os +import pickle +import numpy as np +from tqdm import tqdm + +TARGET_DIR = "/home/2022113135/gyucheol/NetfLips/data" + +def convert_to_list(data): + """ + Recursively convert numpy arrays to lists. + """ + if isinstance(data, np.ndarray): + return data.tolist() + elif isinstance(data, list): + return [convert_to_list(item) for item in data] + elif isinstance(data, tuple): + return tuple(convert_to_list(item) for item in data) + elif isinstance(data, dict): + return {k: convert_to_list(v) for k, v in data.items()} + else: + return data + +def main(): + if not os.path.exists(TARGET_DIR): + print(f"Error: Directory {TARGET_DIR} does not exist.") + return + + pkl_files = [] + for root, dirs, files in os.walk(TARGET_DIR): + for file in files: + if file.endswith(".pkl"): + pkl_files.append(os.path.join(root, file)) + + print(f"Found {len(pkl_files)} pickle files in {TARGET_DIR}") + + success_count = 0 + fail_count = 0 + + for pkl_path in tqdm(pkl_files): + try: + # Load the data + # Note: This requires the environment to have the SAME numpy version as the one that created it + # if the file contains numpy arrays. + with open(pkl_path, 'rb') as f: + data = pickle.load(f) + + # Convert to list + new_data = convert_to_list(data) + + # Save it back + with open(pkl_path, 'wb') as f: + pickle.dump(new_data, f) + + success_count += 1 + + except Exception as e: + print(f"Failed to process {pkl_path}: {e}") + fail_count += 1 + + print(f"\nProcessing complete.") + print(f"Successfully converted: {success_count}") + print(f"Failed: {fail_count}") + +if __name__ == "__main__": + main() diff --git a/scripts/lip_detect/edit_bbox_pickle.py b/scripts/lip_detect/edit_bbox_pickle.py new file mode 100644 index 0000000..e7e5f8d --- /dev/null +++ b/scripts/lip_detect/edit_bbox_pickle.py @@ -0,0 +1,77 @@ +import pickle +import sys +import os +import shutil + +def modify_bbox_pickle(pkl_path, modifications): + """ + Modifies a bbox pickle file by setting specific frame ranges to None. + + Args: + pkl_path (str): Path to the .bbox.pkl file. + modifications (list of tuple): List of (start_frame, end_frame) tuples. + Frames in the range [start_frame, end_frame) will be set to None. + """ + if not os.path.exists(pkl_path): + print(f"Error: File not found at {pkl_path}") + return + + # 1. Load the pickle file + print(f"Loading {pkl_path}...") + with open(pkl_path, 'rb') as f: + bbox_data = pickle.load(f) + + total_frames = len(bbox_data) + print(f"Total frames: {total_frames}") + + # 2. Apply modifications + modified_count = 0 + for start, end in modifications: + # Clamp indices to valid range + start = max(0, start) + end = min(total_frames, end) + + if start >= end: + print(f"Warning: Invalid range ({start}, {end}). Skipping.") + continue + + print(f"Setting frames {start} to {end-1} -> None") + for i in range(start, end): + if bbox_data[i] is not None: + bbox_data[i] = None + modified_count += 1 + + print(f"Total frames modified: {modified_count}") + + # 3. Create a backup + backup_path = pkl_path + ".bak" + shutil.copy2(pkl_path, backup_path) + print(f"Backup created at {backup_path}") + + # 4. Save the modified data + with open(pkl_path, 'wb') as f: + pickle.dump(bbox_data, f) + + print(f"Successfully saved modified pickle to {pkl_path}") + +if __name__ == "__main__": + # --- CONFIGURATION --- + # Change these values to match your needs + + # Path to your .bbox.pkl file + target_pickle_file = "/home/2022113135/gyucheol/NetfLips/data/final_bbox/hulk_h264_part2.bbox.pkl" + + # List of ranges to set to None. Format: (start_index, end_index) + # The end_index is exclusive (Python slice style). + # Example: Set frames 100 to 109 to None -> (100, 110) + ranges_to_set_none = [ + # (start_frame, end_frame), + (41, 84), + (161, 224) + ] + # --------------------- + + if target_pickle_file == "path/to/your/video.bbox.pkl": + print("Please edit the script to specify the 'target_pickle_file' and 'ranges_to_set_none' first.") + else: + modify_bbox_pickle(target_pickle_file, ranges_to_set_none) diff --git a/scripts/lip_detect/environment.yml b/scripts/lip_detect/environment.yml new file mode 100644 index 0000000..eea8f21 --- /dev/null +++ b/scripts/lip_detect/environment.yml @@ -0,0 +1,118 @@ +name: lip_extraction +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1 + - _openmp_mutex=5.1 + - bzip2=1.0.8 + - ca-certificates=2025.12.2 + - expat=2.7.4 + - ld_impl_linux-64=2.44 + - libexpat=2.7.4 + - libffi=3.4.4 + - libgcc=15.2.0 + - libgcc-ng=15.2.0 + - libgomp=15.2.0 + - libnsl=2.0.0 + - libstdcxx=15.2.0 + - libstdcxx-ng=15.2.0 + - libuuid=1.41.5 + - libxcb=1.17.0 + - libzlib=1.3.1 + - ncurses=6.5 + - openssl=3.0.19 + - packaging=25.0 + - pip=25.3 + - pthread-stubs=0.3 + - python=3.10.19 + - readline=8.3 + - setuptools=80.10.1 + - sqlite=3.51.1 + - tk=8.6.15 + - tzdata=2025c + - wheel=0.46.3 + - xorg-libx11=1.8.12 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 + - xorg-xorgproto=2024.1 + - xz=5.6.4 + - zlib=1.3.1 + - pip: + - absl-py==2.4.0 + - anyio==4.12.1 + - certifi==2026.1.4 + - cffi==2.0.0 + - charset-normalizer==3.4.4 + - click==8.3.1 + - contourpy==1.3.2 + - cuda-bindings==12.9.4 + - cuda-pathfinder==1.3.3 + - cycler==0.12.1 + - exceptiongroup==1.3.1 + - face-alignment==1.4.1 + - filelock==3.20.3 + - flatbuffers==25.12.19 + - fonttools==4.61.1 + - fsspec==2026.1.0 + - h11==0.16.0 + - hf-xet==1.2.0 + - httpcore==1.0.9 + - httpx==0.28.1 + - huggingface-hub==1.3.7 + - idna==3.11 + - imageio==2.37.2 + - jinja2==3.1.6 + - kiwisolver==1.4.9 + - lap==0.5.12 + - lazy-loader==0.4 + - llvmlite==0.46.0 + - markupsafe==3.0.3 + - matplotlib==3.10.8 + - mediapipe==0.10.32 + - mpmath==1.3.0 + - networkx==3.4.2 + - numba==0.63.1 + - numpy==2.2.6 + - nvidia-cublas-cu12==12.8.4.1 + - nvidia-cuda-cupti-cu12==12.8.90 + - nvidia-cuda-nvrtc-cu12==12.8.93 + - nvidia-cuda-runtime-cu12==12.8.90 + - nvidia-cudnn-cu12==9.10.2.21 + - nvidia-cufft-cu12==11.3.3.83 + - nvidia-cufile-cu12==1.13.1.3 + - nvidia-curand-cu12==10.3.9.90 + - nvidia-cusolver-cu12==11.7.3.90 + - nvidia-cusparse-cu12==12.5.8.93 + - nvidia-cusparselt-cu12==0.7.1 + - nvidia-nccl-cu12==2.27.5 + - nvidia-nvjitlink-cu12==12.8.93 + - nvidia-nvshmem-cu12==3.4.5 + - nvidia-nvtx-cu12==12.8.90 + - opencv-contrib-python==4.13.0.90 + - opencv-python==4.13.0.90 + - pillow==12.1.0 + - polars==1.37.1 + - polars-runtime-32==1.37.1 + - psutil==7.2.2 + - pycparser==3.0 + - pyparsing==3.3.2 + - python-dateutil==2.9.0.post0 + - pyyaml==6.0.3 + - requests==2.32.5 + - scikit-image==0.25.2 + - scipy==1.15.3 + - shellingham==1.5.4 + - six==1.17.0 + - sounddevice==0.5.5 + - sympy==1.14.0 + - tifffile==2025.5.10 + - torch==2.10.0 + - torchaudio==2.10.0 + - torchvision==0.25.0 + - tqdm==4.67.2 + - triton==3.6.0 + - typer-slim==0.21.1 + - typing-extensions==4.15.0 + - ultralytics==8.4.11 + - ultralytics-thop==2.0.18 + - urllib3==2.6.3 diff --git a/scripts/lip_detect/extract_lip_mediapipe.py b/scripts/lip_detect/extract_lip_mediapipe.py new file mode 100644 index 0000000..c3c5cde --- /dev/null +++ b/scripts/lip_detect/extract_lip_mediapipe.py @@ -0,0 +1,286 @@ + +import cv2 +import mediapipe as mp +import numpy as np +import pickle +import argparse +import os +import sys +import urllib.request + +# Use the new Tasks API as 'solutions' is not available in this environment +from mediapipe.tasks import python +from mediapipe.tasks.python import vision + +# Define lip landmarks indices +# These indices are consistent with the mesh topology used by FaceLandmarker (478 landmarks) +LIP_INDICES = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 185] + +def get_lip_bbox_and_mar(landmarks, frame_w, frame_h): + """ + Calculates the bounding box (square, centered) and MAR (Mouth Aspect Ratio). + Returns: + bbox: np.array([x1, y1, x2, y2], dtype=float32) + mar: float (height / width aspect ratio of the lip cloud) + """ + # Extract lip point coordinates + lip_pts = [] + for idx in LIP_INDICES: + # Safety check for index bound + if idx < len(landmarks): + pt = landmarks[idx] + # pt.x and pt.y are normalized [0, 1] + lip_pts.append([pt.x * frame_w, pt.y * frame_h]) + + if not lip_pts: + return None, 0.0 + + lip_pts = np.array(lip_pts, dtype=np.float32) + + # Determine bounds of the lip points + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + w = max_x - min_x + h = max_y - min_y + + # Calculate MAR + # Avoid division by zero + mar = h / w if w > 1e-5 else 0.0 + + # Calculate Center + center_x = (min_x + max_x) / 2.0 + center_y = (min_y + max_y) / 2.0 + + # Determine BBox size + # 1.5x of the max dimension is a safe margin. + size = max(w, h) * 1.5 + + # Ensure square 1:1 + half_size = size / 2.0 + + x1 = center_x - half_size + y1 = center_y - half_size + x2 = center_x + half_size + y2 = center_y + half_size + + return np.array([x1, y1, x2, y2], dtype=np.float32), mar + +def get_face_bbox(landmarks, frame_w, frame_h): + """ + Calculates the bounding box for the entire face based on all landmarks. + """ + pts = [] + for pt in landmarks: + pts.append([pt.x * frame_w, pt.y * frame_h]) + + pts = np.array(pts, dtype=np.float32) + min_x, min_y = np.min(pts, axis=0) + max_x, max_y = np.max(pts, axis=0) + + # Return as [x1, y1, x2, y2] + return np.array([min_x, min_y, max_x, max_y], dtype=np.float32) + + +def crop_and_resize(frame, bbox, target_size=(96, 96)): + """ + Crops the frame based on bbox and resizes it to target_size. + Handles boundaries by padding with zeros. + """ + if bbox is None: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + fh, fw, _ = frame.shape + x1, y1, x2, y2 = bbox + + # Convert to integer coordinates for array indexing + ix1, iy1 = int(round(x1)), int(round(y1)) + ix2, iy2 = int(round(x2)), int(round(y2)) + + bw = ix2 - ix1 + bh = iy2 - iy1 + + if bw <= 0 or bh <= 0: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + # Calculate intersection with frame + src_x1 = max(0, ix1) + src_y1 = max(0, iy1) + src_x2 = min(fw, ix2) + src_y2 = min(fh, iy2) + + # Calculate placement on the destination canvas + dst_x1 = src_x1 - ix1 + dst_y1 = src_y1 - iy1 + dst_x2 = dst_x1 + (src_x2 - src_x1) + dst_y2 = dst_y1 + (src_y2 - src_y1) + + # Initialize canvas (black padding) + crop = np.zeros((bh, bw, 3), dtype=frame.dtype) + + # Copy valids pixels + if src_x2 > src_x1 and src_y2 > src_y1: + crop[dst_y1:dst_y2, dst_x1:dst_x2] = frame[src_y1:src_y2, src_x1:src_x2] + + # Resize to target + try: + resized = cv2.resize(crop, target_size, interpolation=cv2.INTER_LINEAR) + except Exception: + resized = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + return resized + +def main(): + parser = argparse.ArgumentParser(description="Extract lip region and generate bbox coordinates using MediaPipe Tasks.") + parser.add_argument("--input", type=str, required=True, help="Path to input video file") + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save outputs") + args = parser.parse_args() + + input_path = args.input + output_dir = args.output_dir + + if not os.path.isfile(input_path): + print(f"Error: Input file '{input_path}' not found.") + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + # --- 1. Ensure Model Asset Exists --- + # The new Tasks API requires a binary model bundle. + model_filename = "face_landmarker.task" + # Save it in the same folder as this script for reuse + script_dir = os.path.dirname(os.path.abspath(__file__)) + model_asset_path = os.path.join(script_dir, model_filename) + + if not os.path.exists(model_asset_path): + print(f"Model file '{model_filename}' not found.") + url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task" + print(f"Downloading from {url}...") + try: + urllib.request.urlretrieve(url, model_asset_path) + print("Download complete.") + except Exception as e: + print(f"Error downloading model: {e}") + sys.exit(1) + + # --- 2. Initialize MediaPipe FaceLandmarker --- + base_options = python.BaseOptions(model_asset_path=model_asset_path) + options = vision.FaceLandmarkerOptions( + base_options=base_options, + output_face_blendshapes=False, + output_facial_transformation_matrixes=False, + num_faces=5, + #min_face_detection_confidence=0.5, + #min_face_presence_confidence=0.5, + min_face_detection_confidence=0.2, # Lowered for better long-range detection + min_face_presence_confidence=0.2, + min_tracking_confidence=0.2, + # Use VIDEO mode for temporal consistency + running_mode=vision.RunningMode.VIDEO) + + # --- 3. Process Video --- + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + print(f"Error: Could not open video '{input_path}'.") + sys.exit(1) + + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + + print(f"Processing '{input_path}'") + print(f"Resolution: {int(width)}x{int(height)}, FPS: {fps}, Frames: {total_frames}") + + # 1. args.input에서 경로를 제외한 '파일명.확장자'만 추출 + base_name = os.path.basename(args.input) + + # 2. 확장자를 제거하고 이름만 추출 + file_stem = os.path.splitext(base_name)[0] + + # 3. 새로운 파일명 생성 및 출력 폴더와 결합 + out_vid_path = os.path.join(output_dir, f"{file_stem}.lip.mp4") + # out_vid_path = os.path.join(output_dir, f"{os.path.splitext(args.input)[0]}.lip.mp4") + fourcc = cv2.VideoWriter_fourcc(*'mp4v') # or 'avc1' + out_vid = cv2.VideoWriter(out_vid_path, fourcc, fps, (96, 96)) + + coords_list = [] + + with vision.FaceLandmarker.create_from_options(options) as landmarker: + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + # MediaPipe Tasks requires an RGB MediaPipe Image + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb) + + # Timestamp in ms required for VIDEO mode + # frame_idx / fps * 1000 + if fps > 0: + timestamp_ms = int((frame_idx / fps) * 1000) + else: + timestamp_ms = frame_idx * 33 # assume 30fps fallback + + try: + detection_result = landmarker.detect_for_video(mp_image, timestamp_ms) + + best_bbox = np.zeros(4, dtype=np.float32) + best_mar = -1.0 + detected = False + + if detection_result.face_landmarks: + for face_landmarks in detection_result.face_landmarks: + # face_landmarks is a list of NormalizedLandmark objects + bbox, mar = get_lip_bbox_and_mar(face_landmarks, width, height) + + if bbox is not None: + if mar > best_mar: + best_mar = mar + best_bbox = bbox + detected = True + # Calculate face bbox for the best face + best_face_bbox = get_face_bbox(face_landmarks, width, height) + + # Store coordinates (Store FACE bbox if detected, else zeros) + if detected: + coords_list.append(best_face_bbox) + else: + coords_list.append(np.zeros(4, dtype=np.float32)) + + # Write Video Frame + if detected: + out_frame = crop_and_resize(frame, best_bbox, (96, 96)) + else: + out_frame = np.zeros((96, 96, 3), dtype=np.uint8) + + out_vid.write(out_frame) + + frame_idx += 1 + if frame_idx % 50 == 0: + print(f"Processed {frame_idx}/{total_frames} frames", end='\r') + + except Exception as e: + # Basic error handling to keep going + print(f"\nError processing frame {frame_idx}: {e}") + coords_list.append(np.zeros(4, dtype=np.float32)) + out_vid.write(np.zeros((96, 96, 3), dtype=np.uint8)) + frame_idx += 1 + continue + + cap.release() + out_vid.release() + + # Save Coordinates + out_pkl_path = os.path.join(output_dir, f"{file_stem}.bbox.pkl") + with open(out_pkl_path, 'wb') as f: + pickle.dump(coords_list, f) + + print(f"\nProcessing complete.") + print(f"Video saved to: {out_vid_path}") + print(f"Coords saved to: {out_pkl_path}") + +if __name__ == "__main__": + main() diff --git a/scripts/lip_detect/extract_lip_s3fd.py b/scripts/lip_detect/extract_lip_s3fd.py new file mode 100644 index 0000000..07168ca --- /dev/null +++ b/scripts/lip_detect/extract_lip_s3fd.py @@ -0,0 +1,245 @@ + +import face_alignment +import cv2 +import numpy as np +import pickle +import argparse +import os +import sys +import torch +from tqdm import tqdm + +def get_lip_bbox(landmarks, frame_w, frame_h): + """ + Calculates the bounding box (square, centered) for the lip region. + landmarks: shape (68, 2) + Indices for lips in 68-point model: + Outer: 48-59 + Inner: 60-67 + """ + # Combine outer and inner lip points + lip_indices = list(range(48, 68)) + lip_pts = landmarks[lip_indices] + + # Bounds + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + w = max_x - min_x + h = max_y - min_y + + # Calculate Center + center_x = (min_x + max_x) / 2.0 + center_y = (min_y + max_y) / 2.0 + + # Determine BBox size + # 1.5x of using the max dimension is a safe margin usually. + # For S3FD which is more precise, we can stick to 96x96 relative scale. + size = max(w, h) * 1.5 + + # Ensure square 1:1 + half_size = size / 2.0 + + x1 = center_x - half_size + y1 = center_y - half_size + x2 = center_x + half_size + y2 = center_y + half_size + + return np.array([x1, y1, x2, y2], dtype=np.float32) + +def get_face_bbox(landmarks, frame_w, frame_h): + """ + Calculates the bounding box for the entire face based on all landmarks. + landmarks: shape (68, 2) + """ + min_x, min_y = np.min(landmarks, axis=0) + max_x, max_y = np.max(landmarks, axis=0) + + return np.array([min_x, min_y, max_x, max_y], dtype=np.float32) + + +def crop_and_resize(frame, bbox, target_size=(96, 96)): + """ + Crops the frame based on bbox and resizes it to target_size. + Handles boundaries by padding with zeros. + """ + if bbox is None: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + fh, fw, _ = frame.shape + x1, y1, x2, y2 = bbox + + # Convert to integer coordinates + ix1, iy1 = int(round(x1)), int(round(y1)) + ix2, iy2 = int(round(x2)), int(round(y2)) + + bw = ix2 - ix1 + bh = iy2 - iy1 + + if bw <= 0 or bh <= 0: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + # Calculate intersection + src_x1 = max(0, ix1) + src_y1 = max(0, iy1) + src_x2 = min(fw, ix2) + src_y2 = min(fh, iy2) + + # Calculate destination + dst_x1 = src_x1 - ix1 + dst_y1 = src_y1 - iy1 + dst_x2 = dst_x1 + (src_x2 - src_x1) + dst_y2 = dst_y1 + (src_y2 - src_y1) + + # Initialize canvas + crop = np.zeros((bh, bw, 3), dtype=frame.dtype) + + # Copy pixels + if src_x2 > src_x1 and src_y2 > src_y1: + crop[dst_y1:dst_y2, dst_x1:dst_x2] = frame[src_y1:src_y2, src_x1:src_x2] + + # Resize + try: + resized = cv2.resize(crop, target_size, interpolation=cv2.INTER_LINEAR) + except Exception: + resized = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + return resized + +def main(): + parser = argparse.ArgumentParser(description="Extract lip region using S3FD (face-alignment).") + parser.add_argument("--input", type=str, required=True, help="Path to input video file") + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save outputs") + parser.add_argument("--device", type=str, default='cuda', help="Device to use (cuda or cpu)") + args = parser.parse_args() + + input_path = args.input + output_dir = args.output_dir + device = args.device + + if not os.path.isfile(input_path): + print(f"Error: Input file '{input_path}' not found.") + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + # Check device availability + if device == 'cuda' and not torch.cuda.is_available(): + print("Warning: CUDA not available, switching to CPU.") + device = 'cpu' + + print(f"Initializing FaceAlignment on {device}...") + try: + # S3FD is the default face detector + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=False, device=device) + except Exception as e: + print(f"Error initializing FaceAlignment: {e}") + sys.exit(1) + + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + print(f"Error: Could not open video '{input_path}'.") + sys.exit(1) + + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + + print(f"Processing '{input_path}'") + print(f"Resolution: {int(width)}x{int(height)}, FPS: {fps}, Frames: {total_frames}") + + # 1. args.input에서 경로를 제외한 '파일명.확장자'만 추출 + base_name = os.path.basename(args.input) + + # 2. 확장자를 제거하고 이름만 추출 + file_stem = os.path.splitext(base_name)[0] + + # 3. 새로운 파일명 생성 및 출력 폴더와 결합 + out_vid_path = os.path.join(output_dir, f"{file_stem}.lip.mp4") + # out_vid_path = os.path.join(output_dir, f"{os.path.splitext(args.input)[0]}.lip.mp4") + + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out_vid = cv2.VideoWriter(out_vid_path, fourcc, fps, (96, 96)) + + coords_list = [] + + # Read all frames first to process in batch if needed? + # face-alignment can process batches, which is faster. + # But for simplicity and memory safety on large videos, let's do frame by frame or small batches. + # Let's do frame by frame for now to keep logic simple and consistent with previous script. + + frame_idx = 0 + with tqdm(total=total_frames) as pbar: + while True: + ret, frame = cap.read() + if not ret: + break + + # S3FD expects RGB + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + try: + # get_landmarks returns a list of ndarrays, one for each face + # or None if no face detected + preds = fa.get_landmarks(frame_rgb) + + best_bbox = np.zeros(4, dtype=np.float32) + detected = False + + if preds: + # If multiple faces, we need a strategy. + # Simple strategy: Choose the largest face (S3FD is good at finding faces) + # Or closest to center. + # Let's pick the one with the largest lip area or just the first one if unsure. + # Usually for single speaker videos, first one is fine. + # Let's calculate area for all and pick max. + + max_area = 0 + for landmarks in preds: + bbox = get_lip_bbox(landmarks, width, height) + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + area = w * h + + if area > max_area: + max_area = area + best_bbox = bbox + detected = True + # Calculate face bbox + best_face_bbox = get_face_bbox(landmarks, width, height) + + if detected: + coords_list.append(best_face_bbox) + else: + coords_list.append(np.zeros(4, dtype=np.float32)) + + if detected: + out_frame = crop_and_resize(frame, best_bbox, (96, 96)) + else: + out_frame = np.zeros((96, 96, 3), dtype=np.uint8) + + out_vid.write(out_frame) + + except Exception as e: + print(f"Error processing frame {frame_idx}: {e}") + coords_list.append(np.zeros(4, dtype=np.float32)) + out_vid.write(np.zeros((96, 96, 3), dtype=np.uint8)) + + frame_idx += 1 + pbar.update(1) + + cap.release() + out_vid.release() + + # Save Coordinates + out_pkl_path = os.path.join(output_dir, f"{file_stem}.bbox.pkl") + with open(out_pkl_path, 'wb') as f: + pickle.dump(coords_list, f) + + print(f"\nProcessing complete.") + print(f"Video saved to: {out_vid_path}") + print(f"Coords saved to: {out_pkl_path}") + +if __name__ == "__main__": + main() diff --git a/scripts/lip_detect/extract_lip_yolo.py b/scripts/lip_detect/extract_lip_yolo.py new file mode 100644 index 0000000..97e3957 --- /dev/null +++ b/scripts/lip_detect/extract_lip_yolo.py @@ -0,0 +1,381 @@ + +import cv2 +import numpy as np +import pickle +import argparse +import os +import sys +import torch +from PIL import Image +from tqdm import tqdm +from collections import deque + +# Hugging Face & Ultralytics +from huggingface_hub import hf_hub_download +from ultralytics import YOLO + +# Landmark detectors +import face_alignment +import mediapipe as mp +from mediapipe.tasks import python +from mediapipe.tasks.python import vision + +# --- Constants & Configuration --- +LIP_INDICES_MEDIAPIPE = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 185] +# FaceAlignment (68 points) lip indices: Outer (48-59), Inner (60-67) +LIP_INDICES_FACEALIGNMENT = list(range(48, 68)) + +def calculate_mar(lip_pts): + """ + Calculates Mouth Aspect Ratio (MAR) from a set of lip points. + Simple heuristic: (Height) / (Width) + """ + if len(lip_pts) == 0: + return 0.0 + + lip_pts = np.array(lip_pts) + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + w = max_x - min_x + h = max_y - min_y + + if w < 1e-5: + return 0.0 + + return h / w + +def get_lip_bbox(lip_pts): + """ + Calculates the 1:1 bounding box centered on the lips. + """ + if len(lip_pts) == 0: + return None + + lip_pts = np.array(lip_pts) + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + center_x = (min_x + max_x) / 2.0 + center_y = (min_y + max_y) / 2.0 + + w = max_x - min_x + h = max_y - min_y + + # Use 1.5x margin of the largest dimension + size = max(w, h) * 1.5 + half_size = size / 2.0 + + x1 = center_x - half_size + y1 = center_y - half_size + x2 = center_x + half_size + y2 = center_y + half_size + + return np.array([x1, y1, x2, y2], dtype=np.float32) + +def crop_and_resize(frame, bbox, target_size=(96, 96)): + """ + Crops the frame based on bbox and resizes it to target_size. + Handles boundaries by padding with zeros. + """ + if bbox is None: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + fh, fw, _ = frame.shape + x1, y1, x2, y2 = bbox + + ix1, iy1 = int(round(x1)), int(round(y1)) + ix2, iy2 = int(round(x2)), int(round(y2)) + + bw = ix2 - ix1 + bh = iy2 - iy1 + + if bw <= 0 or bh <= 0: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + src_x1 = max(0, ix1) + src_y1 = max(0, iy1) + src_x2 = min(fw, ix2) + src_y2 = min(fh, iy2) + + dst_x1 = src_x1 - ix1 + dst_y1 = src_y1 - iy1 + dst_x2 = dst_x1 + (src_x2 - src_x1) + dst_y2 = dst_y1 + (src_y2 - src_y1) + + crop = np.zeros((bh, bw, 3), dtype=frame.dtype) + + if src_x2 > src_x1 and src_y2 > src_y1: + crop[dst_y1:dst_y2, dst_x1:dst_x2] = frame[src_y1:src_y2, src_x1:src_x2] + + try: + resized = cv2.resize(crop, target_size, interpolation=cv2.INTER_LINEAR) + except Exception: + resized = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + return resized + +class FaceAlignmentHandler: + def __init__(self, device='cuda'): + print(f"Initializing FaceAlignment on {device}...") + try: + # S3FD is accurate but crashes on too small inputs + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=False, device=device) + except Exception as e: + print(f"Error initializing FaceAlignment: {e}") + sys.exit(1) + + def get_landmarks(self, frame_rgb, bbox_xyxy): + x1, y1, x2, y2 = [int(v) for v in bbox_xyxy] + h, w, _ = frame_rgb.shape + + # Add some margin + margin_x = (x2 - x1) * 0.1 + margin_y = (y2 - y1) * 0.1 + cx1 = max(0, int(x1 - margin_x)) + cy1 = max(0, int(y1 - margin_y)) + cx2 = min(w, int(x2 + margin_x)) + cy2 = min(h, int(y2 + margin_y)) + + face_crop = frame_rgb[cy1:cy2, cx1:cx2] + + # FIX: Avoid tiny crops that crash S3FD + if face_crop.shape[0] < 32 or face_crop.shape[1] < 32: + return None + + # OPTIMIZATION: Skip S3FD detection inside face_alignment. + # Since we already cropped the face, we tell it the face is the entire crop. + # detected_faces format: [(x1, y1, x2, y2)] + h_crop, w_crop, _ = face_crop.shape + detected_faces = [(0, 0, w_crop, h_crop)] + + preds = self.fa.get_landmarks(face_crop, detected_faces=detected_faces) + + if preds: + landmarks = preds[0] + landmarks[:, 0] += cx1 + landmarks[:, 1] += cy1 + return landmarks + return None + + def get_lip_points(self, landmarks): + if landmarks is None: + return [] + return landmarks[LIP_INDICES_FACEALIGNMENT] + +class MediaPipeHandler: + def __init__(self): + print("Initializing MediaPipe FaceLandmarker...") + model_filename = "face_landmarker.task" + script_dir = os.path.dirname(os.path.abspath(__file__)) + model_asset_path = os.path.join(script_dir, model_filename) + + if not os.path.exists(model_asset_path): + print(f"Downloading MediaPipe model...") + url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task" + import urllib.request + urllib.request.urlretrieve(url, model_asset_path) + + base_options = python.BaseOptions(model_asset_path=model_asset_path) + options = vision.FaceLandmarkerOptions( + base_options=base_options, + output_face_blendshapes=False, + output_facial_transformation_matrixes=False, + num_faces=1, + min_face_detection_confidence=0.3, + min_face_presence_confidence=0.3, + min_tracking_confidence=0.3, + running_mode=vision.RunningMode.IMAGE) + + self.landmarker = vision.FaceLandmarker.create_from_options(options) + + def get_landmarks(self, frame_rgb, bbox_xyxy): + x1, y1, x2, y2 = [int(v) for v in bbox_xyxy] + h, w, _ = frame_rgb.shape + + margin_x = (x2 - x1) * 0.1 + margin_y = (y2 - y1) * 0.1 + cx1 = max(0, int(x1 - margin_x)) + cy1 = max(0, int(y1 - margin_y)) + cx2 = min(w, int(x2 + margin_x)) + cy2 = min(h, int(y2 + margin_y)) + + face_crop = frame_rgb[cy1:cy2, cx1:cx2] + if face_crop.size == 0: + return None + + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=face_crop) + detection_result = self.landmarker.detect(mp_image) + + if detection_result.face_landmarks: + landmarks_norm = detection_result.face_landmarks[0] + crop_h, crop_w, _ = face_crop.shape + + landmarks = [] + for norm_pt in landmarks_norm: + px = norm_pt.x * crop_w + cx1 + py = norm_pt.y * crop_h + cy1 + landmarks.append([px, py]) + + return np.array(landmarks) + return None + + def get_lip_points(self, landmarks): + if landmarks is None: + return [] + lip_pts = [] + for idx in LIP_INDICES_MEDIAPIPE: + lip_pts.append(landmarks[idx]) + return np.array(lip_pts) + + +def main(): + parser = argparse.ArgumentParser(description="Extract lip region using YOLOv8 face detection + Landmarks.") + parser.add_argument("--input", type=str, required=True, help="Path to input video file") + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save outputs") + parser.add_argument("--landmark_method", type=str, default="face_alignment", choices=["face_alignment", "mediapipe"], help="Landmark detection method") + parser.add_argument("--device", type=str, default='cuda', help="Device for FaceAlignment/YOLO (cuda or cpu)") + args = parser.parse_args() + + input_path = args.input + output_dir = args.output_dir + device_name = args.device + + if not os.path.isfile(input_path): + print(f"Error: Input file '{input_path}' not found.") + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + # --- 1. Load YOLO Model --- + print("Loading YOLOv8-Face-Detection model...") + try: + model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt") + yolo_model = YOLO(model_path) + yolo_model.to(device_name if torch.cuda.is_available() and device_name == 'cuda' else 'cpu') + except Exception as e: + print(f"Error loading YOLO model: {e}") + sys.exit(1) + + # --- 2. Initialize Landmark Detector --- + if args.landmark_method == "face_alignment": + landmark_detector = FaceAlignmentHandler(device=device_name) + else: + landmark_detector = MediaPipeHandler() + + # --- 3. Process Video --- + cap = cv2.VideoCapture(input_path) + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + + print(f"Processing '{input_path}' with {args.landmark_method}") + print(f"Resolution: {int(width)}x{int(height)}, FPS: {fps}, Frames: {total_frames}") + # 1. args.input에서 경로를 제외한 '파일명.확장자'만 추출 + base_name = os.path.basename(args.input) + + # 2. 확장자를 제거하고 이름만 추출 + file_stem = os.path.splitext(base_name)[0] + + # 3. 새로운 파일명 생성 및 출력 폴더와 결합 + out_vid_path = os.path.join(output_dir, f"{file_stem}.lip.mp4") + # out_vid_path = os.path.join(output_dir, f"{os.path.splitext(args.input)[0]}.lip.mp4") + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out_vid = cv2.VideoWriter(out_vid_path, fourcc, fps, (96, 96)) + + coords_list = [] + + # Speaker Identification State + mar_histories = {} # {track_id: deque(maxlen=10)} + + frame_idx = 0 + with tqdm(total=total_frames) as pbar: + while True: + ret, frame = cap.read() + if not ret: + break + + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # --- YOLO Face Tracking --- + results = yolo_model.track(frame_rgb, persist=True, verbose=False) + + current_frame_mar = {} + current_frame_bboxes = {} + current_frame_face_bboxes = {} + + if results and results[0].boxes is not None and len(results[0].boxes) > 0: + boxes = results[0].boxes + + for i in range(len(boxes)): + box = boxes[i] + xyxy = box.xyxy[0].cpu().numpy() + + # Get persistent id + track_id = int(box.id[0].cpu().numpy()) if box.id is not None else int(i + 1000) + + if track_id not in mar_histories: + mar_histories[track_id] = deque(maxlen=10) + + # Extract Landmarks + landmarks = landmark_detector.get_landmarks(frame_rgb, xyxy) + if landmarks is not None: + lip_pts = landmark_detector.get_lip_points(landmarks) + mar = calculate_mar(lip_pts) + mar_histories[track_id].append(mar) + + current_frame_mar[track_id] = mar + current_frame_bboxes[track_id] = get_lip_bbox(lip_pts) + current_frame_face_bboxes[track_id] = xyxy + + # --- Speaker Selection (MAR Variance) --- + winner_id = None + max_var = -1.0 + + for tid, history in mar_histories.items(): + if tid in current_frame_bboxes: + if len(history) >= 2: + # Use standard deviation as the 'speaking' score + score = np.std(history) + else: + score = 0.0 + + if score > max_var: + max_var = score + winner_id = tid + + # Fallback if no one is clearly talking (low variance) + if winner_id is not None and max_var < 0.01: + # Pick the one with the highest current MAR (as a secondary heuristic) + winner_id = max(current_frame_mar, key=current_frame_mar.get) if current_frame_mar else winner_id + + best_bbox_coords = current_frame_bboxes.get(winner_id) if winner_id is not None else None + best_face_bbox = current_frame_face_bboxes.get(winner_id) if winner_id is not None else None + + if best_bbox_coords is not None: + coords_list.append(best_face_bbox.tolist()) + out_frame = crop_and_resize(frame, best_bbox_coords, (96, 96)) + else: + coords_list.append([0.0, 0.0, 0.0, 0.0]) + out_frame = np.zeros((96, 96, 3), dtype=np.uint8) + + out_vid.write(out_frame) + + frame_idx += 1 + pbar.update(1) + + cap.release() + out_vid.release() + + # Save Coordinates + out_pkl_path = os.path.join(output_dir, f"{file_stem}.bbox.pkl") + with open(out_pkl_path, 'wb') as f: + pickle.dump(coords_list, f) + + print(f"\nProcessing complete.") + print(f"Video saved to: {out_vid_path}") + print(f"Coords saved to: {out_pkl_path}") + +if __name__ == "__main__": + main() diff --git a/scripts/lip_detect/extract_lip_yolo_filtered.py b/scripts/lip_detect/extract_lip_yolo_filtered.py new file mode 100644 index 0000000..1a8a1ba --- /dev/null +++ b/scripts/lip_detect/extract_lip_yolo_filtered.py @@ -0,0 +1,383 @@ + +import cv2 +import numpy as np +import pickle +import argparse +import os +import sys +import torch +from PIL import Image +from tqdm import tqdm +from collections import deque + +# Hugging Face & Ultralytics +from huggingface_hub import hf_hub_download +from ultralytics import YOLO + +# Landmark detectors +import face_alignment +import mediapipe as mp +from mediapipe.tasks import python +from mediapipe.tasks.python import vision + +# --- Constants & Configuration --- +LIP_INDICES_MEDIAPIPE = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 185] +# FaceAlignment (68 points) lip indices: Outer (48-59), Inner (60-67) +LIP_INDICES_FACEALIGNMENT = list(range(48, 68)) + +def calculate_mar(lip_pts): + """ + Calculates Mouth Aspect Ratio (MAR) from a set of lip points. + Simple heuristic: (Height) / (Width) + """ + if len(lip_pts) == 0: + return 0.0 + + lip_pts = np.array(lip_pts) + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + w = max_x - min_x + h = max_y - min_y + + if w < 1e-5: + return 0.0 + + return h / w + +def get_lip_bbox(lip_pts): + """ + Calculates the 1:1 bounding box centered on the lips. + """ + if len(lip_pts) == 0: + return None + + lip_pts = np.array(lip_pts) + min_x, min_y = np.min(lip_pts, axis=0) + max_x, max_y = np.max(lip_pts, axis=0) + + center_x = (min_x + max_x) / 2.0 + center_y = (min_y + max_y) / 2.0 + + w = max_x - min_x + h = max_y - min_y + + # Use 1.5x margin of the largest dimension + size = max(w, h) * 1.5 + half_size = size / 2.0 + + x1 = center_x - half_size + y1 = center_y - half_size + x2 = center_x + half_size + y2 = center_y + half_size + + return np.array([x1, y1, x2, y2], dtype=np.float32) + +def crop_and_resize(frame, bbox, target_size=(96, 96)): + """ + Crops the frame based on bbox and resizes it to target_size. + Handles boundaries by padding with zeros. + """ + if bbox is None: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + fh, fw, _ = frame.shape + x1, y1, x2, y2 = bbox + + ix1, iy1 = int(round(x1)), int(round(y1)) + ix2, iy2 = int(round(x2)), int(round(y2)) + + bw = ix2 - ix1 + bh = iy2 - iy1 + + if bw <= 0 or bh <= 0: + return np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + src_x1 = max(0, ix1) + src_y1 = max(0, iy1) + src_x2 = min(fw, ix2) + src_y2 = min(fh, iy2) + + dst_x1 = src_x1 - ix1 + dst_y1 = src_y1 - iy1 + dst_x2 = dst_x1 + (src_x2 - src_x1) + dst_y2 = dst_y1 + (src_y2 - src_y1) + + crop = np.zeros((bh, bw, 3), dtype=frame.dtype) + + if src_x2 > src_x1 and src_y2 > src_y1: + crop[dst_y1:dst_y2, dst_x1:dst_x2] = frame[src_y1:src_y2, src_x1:src_x2] + + try: + resized = cv2.resize(crop, target_size, interpolation=cv2.INTER_LINEAR) + except Exception: + resized = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8) + + return resized + +class FaceAlignmentHandler: + def __init__(self, device='cuda'): + print(f"Initializing FaceAlignment on {device}...") + try: + # S3FD is accurate but crashes on too small inputs + self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, flip_input=False, device=device) + except Exception as e: + print(f"Error initializing FaceAlignment: {e}") + sys.exit(1) + + def get_landmarks(self, frame_rgb, bbox_xyxy): + x1, y1, x2, y2 = [int(v) for v in bbox_xyxy] + h, w, _ = frame_rgb.shape + + # Add some margin + margin_x = (x2 - x1) * 0.1 + margin_y = (y2 - y1) * 0.1 + cx1 = max(0, int(x1 - margin_x)) + cy1 = max(0, int(y1 - margin_y)) + cx2 = min(w, int(x2 + margin_x)) + cy2 = min(h, int(y2 + margin_y)) + + face_crop = frame_rgb[cy1:cy2, cx1:cx2] + + # FIX: Avoid tiny crops that crash S3FD + if face_crop.shape[0] < 32 or face_crop.shape[1] < 32: + return None + + # OPTIMIZATION: Skip S3FD detection inside face_alignment. + # Since we already cropped the face, we tell it the face is the entire crop. + # detected_faces format: [(x1, y1, x2, y2)] + h_crop, w_crop, _ = face_crop.shape + detected_faces = [(0, 0, w_crop, h_crop)] + + preds = self.fa.get_landmarks(face_crop, detected_faces=detected_faces) + + if preds: + landmarks = preds[0] + landmarks[:, 0] += cx1 + landmarks[:, 1] += cy1 + return landmarks + return None + + def get_lip_points(self, landmarks): + if landmarks is None: + return [] + return landmarks[LIP_INDICES_FACEALIGNMENT] + +class MediaPipeHandler: + def __init__(self): + print("Initializing MediaPipe FaceLandmarker...") + model_filename = "face_landmarker.task" + script_dir = os.path.dirname(os.path.abspath(__file__)) + model_asset_path = os.path.join(script_dir, model_filename) + + if not os.path.exists(model_asset_path): + print(f"Downloading MediaPipe model...") + url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task" + import urllib.request + urllib.request.urlretrieve(url, model_asset_path) + + base_options = python.BaseOptions(model_asset_path=model_asset_path) + options = vision.FaceLandmarkerOptions( + base_options=base_options, + output_face_blendshapes=False, + output_facial_transformation_matrixes=False, + num_faces=1, + min_face_detection_confidence=0.3, + min_face_presence_confidence=0.3, + min_tracking_confidence=0.3, + running_mode=vision.RunningMode.IMAGE) + + self.landmarker = vision.FaceLandmarker.create_from_options(options) + + def get_landmarks(self, frame_rgb, bbox_xyxy): + x1, y1, x2, y2 = [int(v) for v in bbox_xyxy] + h, w, _ = frame_rgb.shape + + margin_x = (x2 - x1) * 0.1 + margin_y = (y2 - y1) * 0.1 + cx1 = max(0, int(x1 - margin_x)) + cy1 = max(0, int(y1 - margin_y)) + cx2 = min(w, int(x2 + margin_x)) + cy2 = min(h, int(y2 + margin_y)) + + face_crop = frame_rgb[cy1:cy2, cx1:cx2] + if face_crop.size == 0: + return None + + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=face_crop) + detection_result = self.landmarker.detect(mp_image) + + if detection_result.face_landmarks: + landmarks_norm = detection_result.face_landmarks[0] + crop_h, crop_w, _ = face_crop.shape + + landmarks = [] + for norm_pt in landmarks_norm: + px = norm_pt.x * crop_w + cx1 + py = norm_pt.y * crop_h + cy1 + landmarks.append([px, py]) + + return np.array(landmarks) + return None + + def get_lip_points(self, landmarks): + if landmarks is None: + return [] + lip_pts = [] + for idx in LIP_INDICES_MEDIAPIPE: + lip_pts.append(landmarks[idx]) + return np.array(lip_pts) + + +def main(): + parser = argparse.ArgumentParser(description="Extract lip region using YOLOv8 face detection + Landmarks (Filtered).") + parser.add_argument("--input", type=str, required=True, help="Path to input video file") + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save outputs") + parser.add_argument("--landmark_method", type=str, default="face_alignment", choices=["face_alignment", "mediapipe"], help="Landmark detection method") + parser.add_argument("--device", type=str, default='cuda', help="Device for FaceAlignment/YOLO (cuda or cpu)") + parser.add_argument("--min_speaking_threshold", type=float, default=0.01, help="Minimum MAR variance to consider a face as speaking. Below this, output is black.") + args = parser.parse_args() + + input_path = args.input + output_dir = args.output_dir + device_name = args.device + + if not os.path.isfile(input_path): + print(f"Error: Input file '{input_path}' not found.") + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + # --- 1. Load YOLO Model --- + print("Loading YOLOv8-Face-Detection model...") + try: + model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt") + yolo_model = YOLO(model_path) + yolo_model.to(device_name if torch.cuda.is_available() and device_name == 'cuda' else 'cpu') + except Exception as e: + print(f"Error loading YOLO model: {e}") + sys.exit(1) + + # --- 2. Initialize Landmark Detector --- + if args.landmark_method == "face_alignment": + landmark_detector = FaceAlignmentHandler(device=device_name) + else: + landmark_detector = MediaPipeHandler() + + # --- 3. Process Video --- + cap = cv2.VideoCapture(input_path) + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + + print(f"Processing '{input_path}' with {args.landmark_method}") + print(f"Resolution: {int(width)}x{int(height)}, FPS: {fps}, Frames: {total_frames}") + + # 1. args.input에서 경로를 제외한 '파일명.확장자'만 추출 + base_name = os.path.basename(args.input) + + # 2. 확장자를 제거하고 이름만 추출 + file_stem = os.path.splitext(base_name)[0] + + # 3. 새로운 파일명 생성 및 출력 폴더와 결합 + out_vid_path = os.path.join(output_dir, f"{file_stem}.lip.mp4") + # out_vid_path = os.path.join(output_dir, f"{os.path.splitext(args.input)[0]}.lip.mp4") + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out_vid = cv2.VideoWriter(out_vid_path, fourcc, fps, (96, 96)) + + coords_list = [] + + # Speaker Identification State + mar_histories = {} # {track_id: deque(maxlen=10)} + + frame_idx = 0 + with tqdm(total=total_frames) as pbar: + while True: + ret, frame = cap.read() + if not ret: + break + + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # --- YOLO Face Tracking --- + results = yolo_model.track(frame_rgb, persist=True, verbose=False) + + current_frame_mar = {} + current_frame_bboxes = {} + current_frame_face_bboxes = {} + + if results and results[0].boxes is not None and len(results[0].boxes) > 0: + boxes = results[0].boxes + + for i in range(len(boxes)): + box = boxes[i] + xyxy = box.xyxy[0].cpu().numpy() + + # Get persistent id + track_id = int(box.id[0].cpu().numpy()) if box.id is not None else int(i + 1000) + + if track_id not in mar_histories: + mar_histories[track_id] = deque(maxlen=10) + + # Extract Landmarks + landmarks = landmark_detector.get_landmarks(frame_rgb, xyxy) + if landmarks is not None: + lip_pts = landmark_detector.get_lip_points(landmarks) + mar = calculate_mar(lip_pts) + mar_histories[track_id].append(mar) + + current_frame_mar[track_id] = mar + current_frame_bboxes[track_id] = get_lip_bbox(lip_pts) + current_frame_face_bboxes[track_id] = xyxy + + # --- Speaker Selection (MAR Variance) --- + winner_id = None + max_var = -1.0 + + for tid, history in mar_histories.items(): + if tid in current_frame_bboxes: + if len(history) >= 2: + # Use standard deviation as the 'speaking' score + score = np.std(history) + else: + score = 0.0 + + if score > max_var: + max_var = score + winner_id = tid + + # Filter: Check if the 'winner' is actually speaking + if max_var < args.min_speaking_threshold: + # NO ONE IS SPEAKING (or just reacting/listening) + winner_id = None + + best_bbox_coords = current_frame_bboxes.get(winner_id) if winner_id is not None else None + best_face_bbox = current_frame_face_bboxes.get(winner_id) if winner_id is not None else None + + if best_bbox_coords is not None: + coords_list.append(best_face_bbox) + out_frame = crop_and_resize(frame, best_bbox_coords, (96, 96)) + else: + coords_list.append(np.zeros(4, dtype=np.float32)) + out_frame = np.zeros((96, 96, 3), dtype=np.uint8) + + out_vid.write(out_frame) + + frame_idx += 1 + pbar.update(1) + + cap.release() + out_vid.release() + + # Save Coordinates + out_pkl_path = os.path.join(output_dir, f"{file_stem}.bbox.pkl") + with open(out_pkl_path, 'wb') as f: + pickle.dump(coords_list, f) + + print(f"\nProcessing complete.") + print(f"Video saved to: {out_vid_path}") + print(f"Coords saved to: {out_pkl_path}") + +if __name__ == "__main__": + main() diff --git a/scripts/lip_detect/face_landmarker.task b/scripts/lip_detect/face_landmarker.task new file mode 100644 index 0000000..c50c845 Binary files /dev/null and b/scripts/lip_detect/face_landmarker.task differ diff --git a/scripts/lip_detect/inspect_bbox.py b/scripts/lip_detect/inspect_bbox.py new file mode 100644 index 0000000..e744660 --- /dev/null +++ b/scripts/lip_detect/inspect_bbox.py @@ -0,0 +1,24 @@ +import pickle +import os + +pkl_path = "/home/2022113135/gyucheol/NetfLips/av2av-main/assets/samples/en/TRajLqEaWhQ_00002.bbox.pkl" + +with open(pkl_path, 'rb') as f: + data = pickle.load(f) + +print(f"Type of data: {type(data)}") +if isinstance(data, list): + print(f"Length of list: {len(data)}") + if len(data) > 0: + print(f"First element: {data[0]}") +elif isinstance(data, dict): + print(f"Keys: {data.keys()}") + for k, v in data.items(): + if isinstance(v, (list, tuple)): + print(f"{k} length: {len(v)}") + if len(v) > 0: + print(f"{k} first element: {v[0]}") + else: + print(f"{k}: {v}") +else: + print(data) diff --git a/scripts/lip_detect/visualize_bbox.py b/scripts/lip_detect/visualize_bbox.py new file mode 100644 index 0000000..a909794 --- /dev/null +++ b/scripts/lip_detect/visualize_bbox.py @@ -0,0 +1,62 @@ +import cv2 +import pickle +import numpy as np +import os + +def visualize_bbox(video_path, pkl_path, output_path): + # Load bboxes + with open(pkl_path, 'rb') as f: + bboxes = pickle.load(f) + + # Open video + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print(f"Error: Could not open video {video_path}") + return + + # Get video properties + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + print(f"Video: {width}x{height}, {fps} FPS, {frame_count} frames") + print(f"BBoxes: {len(bboxes)} items") + + # Define codec and create VideoWriter object + # Using 'mp4v' or 'avc1' for mp4 format + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) + + frame_idx = 0 + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + if frame_idx < len(bboxes): + bbox = bboxes[frame_idx] + # Handle different formats if necessary. Assuming [x1, y1, x2, y2] + if bbox is not None: + if len(bbox) == 4: + x1, y1, x2, y2 = map(int, bbox) + # Draw rectangle (color: green (0, 255, 0), thickness: 2) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + # Add frame index text + cv2.putText(frame, f"Frame: {frame_idx}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + else: + cv2.putText(frame, f"Frame: {frame_idx} (No BBox)", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + + out.write(frame) + frame_idx += 1 + + cap.release() + out.release() + print(f"Saved visualized video to {output_path}") + +if __name__ == "__main__": + video_path = "/home/2022113135/gyucheol/NetfLips/data/final_segments/hulk_h264_part2.mp4" + pkl_path = "/home/2022113135/gyucheol/NetfLips/data/final_bbox/hulk_h264_part2.bbox.pkl" + output_path = "/home/2022113135/gyucheol/NetfLips/data/final_bbox/hulk_h264_part2_bbox_visualized.mp4" + + visualize_bbox(video_path, pkl_path, output_path) diff --git a/scripts/preprocess_aihub.py b/scripts/preprocess_aihub.py new file mode 100644 index 0000000..9c4b54d --- /dev/null +++ b/scripts/preprocess_aihub.py @@ -0,0 +1,339 @@ + +""" +AIHub 립리딩(Lip Reading) 데이터셋을 LRS3 데이터셋 형식으로 전처리하는 스크립트. +주요 작업: +1. .tar 압축 파일 해제 (필요한 경우) +2. JSON(라벨링) 및 MP4(영상) 매칭 +3. 영상에서 입술/얼굴 영역 크롭 및 리사이즈 +4. 영상 프레임 레이트(FPS) 변환 +5. 오디오 슬라이싱 및 샘플링 레이트 변환 +6. 전처리된 데이터 저장 (MP4, WAV, TXT) +""" + +import os +import json +import glob +import tarfile +import argparse +import subprocess +import shutil +import numpy as np +import cv2 +import math +from tqdm import tqdm +from scipy.io import wavfile +from scipy import signal +import tempfile + +def get_parser(): + parser = argparse.ArgumentParser(description="Preprocess AIHub Lip Reading Dataset for AV2AV") + parser.add_argument("--data-root", type=str, required=True, help="Root directory containing .tar files or extracted folders") + parser.add_argument("--save-dir", type=str, required=True, help="Output directory for preprocessed data") + parser.add_argument("--temp-dir", type=str, default="./temp_extract", help="Temporary directory for extracting tar files") + parser.add_argument("--fps", type=int, default=25, help="Target Video FPS") + parser.add_argument("--sample-rate", type=int, default=16000, help="Target Audio Sample Rate") + parser.add_argument("--crop-size", type=int, default=96, help="Target Face Crop Size (Square)") + parser.add_argument("--padding", type=float, default=0.5, help="Padding in seconds to add to start/end of clip") + parser.add_argument("--no-tar-extract", action="store_true", help="Skip tar extraction if data is already extracted") + return parser + +def extract_tar(tar_path, extract_path): + """ + tar 압축 파일을 지정된 경로에 해제. + + Args: + tar_path: tar 파일 경로 + extract_path: 압축을 해제할 경로 + """ + try: + if not os.path.exists(extract_path): + os.makedirs(extract_path, exist_ok=True) + + print(f"Extracting {tar_path}...") + with tarfile.open(tar_path, 'r') as tar: + tar.extractall(path=extract_path) + print(f"Extracted to {extract_path}") + return True + except Exception as e: + print(f"Error extracting {tar_path}: {e}") + return False + +def resample_audio(audio_path, target_sr=16000): + """ + ffmpeg을 사용해서 오디오를 목표 샘플링 레이트(target_sr)와 모노 채널로 변환. + """ + try: + # Robust한 처리를 위해 ffmpeg 사용 + out_path = audio_path.replace(".wav", f"_{target_sr}.wav") + cmd = [ + "ffmpeg", "-y", + "-i", audio_path, + "-ac", "1", # Mono + "-ar", str(target_sr), + "-vn", # No video + "-loglevel", "error", + out_path + ] + subprocess.run(cmd, check=True) + return out_path + except Exception as e: + print(f"Error processing audio {audio_path}: {e}") + return None + +def process_video_frames(video_path, bboxes, start_time, end_time, src_fps=30, tgt_fps=25, crop_size=96): + """ + 비디오를 읽고, 프레임별 BBox 정보를 바탕으로 얼굴 영역을 크롭한 후 FPS를 변환. + + Args: + video_path: 원본 MP4 파일 경로 + bboxes: 프레임별 바운딩 박스 목록 [[y1, x1, y2, x2], ...] + start_time: 시작 시간 (초) + end_time: 종료 시간 (초) + src_fps: 원본 영상의 FPS + tgt_fps: 목표 FPS (기본 25) + crop_size: 출력 이미지 크기 (기본 96x96) + + Returns: + 전처리된 프레임들의 Numpy array (T, H, W) + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print(f"Failed to open video: {video_path}") + return None + + frames = [] + + start_frame = int(start_time * src_fps) + end_frame = int(end_time * src_fps) + + # 시작 프레임 위치로 이동 + cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) + + curr_frame_idx = start_frame + + while curr_frame_idx <= end_frame: + ret, frame = cap.read() + if not ret: + break + + # 해당 프레임의 BBox 정보 확인 + if curr_frame_idx < len(bboxes): + bbox = bboxes[curr_frame_idx] + try: + # AIHub JSON BBox 형식: [y1, x1, y2, x2] (top, left, bottom, right) + y1, x1, y2, x2 = bbox + + # 영상 범위를 벗어나지 않도록 클리핑 + h, w, _ = frame.shape + x1 = max(0, x1); y1 = max(0, y1) + x2 = min(w, x2); y2 = min(h, y2) + + face_img = frame[y1:y2, x1:x2] + + # 목표 크기로 리사이즈 + face_img = cv2.resize(face_img, (crop_size, crop_size), interpolation=cv2.INTER_LINEAR) + + # AV-HuBERT 호환을 위해 그레이스케일로 변환 + face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) + + frames.append(face_img) + + except Exception as e: + print(f"Error cropping frame {curr_frame_idx}: {e}") + else: + # BBox 정보가 없는 프레임은 건너뜀 + pass + + curr_frame_idx += 1 + + cap.release() + + if not frames: + return None + + frames = np.array(frames) # (T_src, H, W) + + # Video FPS 변환 (시간축 보간법 사용) + # 예: 30 FPS -> 25 FPS + if src_fps != tgt_fps: + sec = len(frames) / src_fps + tgt_frames_len = int(sec * tgt_fps) + + # 선형 보간을 위한 인덱스 생성 + indices = np.linspace(0, len(frames)-1, tgt_frames_len) + + new_frames = [] + for i in indices: + low = int(math.floor(i)) + high = int(math.ceil(i)) + weight = i - low + + if low == high: + new_frames.append(frames[low]) + else: + # 두 프레임 사이를 비중(weight)에 따라 혼합 + blended = (frames[low] * (1-weight) + frames[high] * weight).astype(np.uint8) + new_frames.append(blended) + + return np.array(new_frames) + + return frames + +def save_video(frames, out_path, fps=25): + """프레임 배열을 MP4 동영상 파일로 저장.""" + if len(frames) == 0: return + + h, w = frames.shape[1], frames.shape[2] + + # OpenCV VideoWriter를 사용하여 MP4v 코덱으로 저장 (그레이스케일) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(out_path, fourcc, fps, (w, h), False) # False: isColor=False + + for frame in frames: + out.write(frame) + + out.release() + +def process_session(json_path, video_path, args, speaker_id): + """ + 하나의 세션(비디오 1개 + JSON 1개)을 처리하여 문장 단위로 데이터를 분리 + """ + + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # AIHub 데이터 형식에 따라 리스트인 경우 처리 + if isinstance(data, list): + if len(data) == 0: return + data = data[0] + + # 메타데이터 파싱 + try: + # BBox 정보: 'Bounding_box_info' 하위 필드 확인 + bbox_data = data.get('Bounding_box_info', {}).get('Face_bounding_box') + if isinstance(bbox_data, dict): + bboxes = bbox_data.get('xtl_ytl_xbr_ybr', []) + else: + bboxes = bbox_data + + sentences = data.get('Sentence_info', []) + + except Exception as e: + print(f"Error parsing JSON {json_path}: {e}") + return + + # 오디오 추출 및 임시 저장 (전체 영상을 한 번에 처리 후 메모리에서 슬라이싱) + temp_wav = video_path.replace(".mp4", "_temp.wav") + try: + subprocess.run(["ffmpeg", "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "48000", "-loglevel", "error", temp_wav], check=True) + except: + return # 오디오 추출 실패 시 세션 스킵 + + # 슬라이싱을 위해 오디오 데이터 로드 + sr, audio_data = wavfile.read(temp_wav) + + for sent in sentences: + try: + sent_id = sent['ID'] + # 패딩(padding) 추가하여 시작/종료 시간 설정 + start_t = max(0, sent['start_time'] - args.padding) + end_t = sent['end_time'] + args.padding + text = sent['sentence_text'] + + # 출력 경로 설정 (save_dir/speaker_id/...) + spk_dir = os.path.join(args.save_dir, speaker_id) + os.makedirs(spk_dir, exist_ok=True) + + out_vid_name = f"{speaker_id}_{sent_id:04d}.mp4" + out_wav_name = f"{speaker_id}_{sent_id:04d}.wav" + out_txt_name = f"{speaker_id}_{sent_id:04d}.txt" + + output_vid_path = os.path.join(spk_dir, out_vid_name) + output_wav_path = os.path.join(spk_dir, out_wav_name) + output_txt_path = os.path.join(spk_dir, out_txt_name) + + if os.path.exists(output_vid_path): continue + + # 1. 비디오 처리 (크롭 -> 리사이즈 -> FPS 변환) + frames = process_video_frames(video_path, bboxes, start_t, end_t, src_fps=30, tgt_fps=args.fps, crop_size=args.crop_size) + + if frames is None: continue + + # 2. 오디오 처리 (슬라이싱 -> 샘플링 레이트 변환) + start_sample = int(start_t * sr) + end_sample = int(end_t * sr) + sliced_audio = audio_data[start_sample:end_sample] + + # 목표 샘플링 레이트로 리샘플링 (예: 48k -> 16k) + if sr != args.sample_rate: + num_samples = int(len(sliced_audio) * args.sample_rate / sr) + sliced_audio = signal.resample(sliced_audio, num_samples).astype(np.int16) + + # 3. 결과 저장 + save_video(frames, output_vid_path, fps=args.fps) + wavfile.write(output_wav_path, args.sample_rate, sliced_audio) + + with open(output_txt_path, 'w', encoding='utf-8') as tf: + tf.write(text) + + except Exception as e: + print(f"Error processing sentence {sent_id} in {os.path.basename(video_path)}: {e}") + + # 임시 오디오 파일 삭제 + if os.path.exists(temp_wav): os.remove(temp_wav) + +def main(): + parser = get_parser() + args = parser.parse_args() + + # 1. 압축 파일(.tar) 해제 처리 + search_root = args.data_root + tar_files = glob.glob(os.path.join(args.data_root, "**/*.tar"), recursive=True) + + if tar_files and not args.no_tar_extract: + print(f"Found {len(tar_files)} tar files. Extracting...") + for tar_f in tqdm(tar_files): + # 파일명으로 서브폴더 생성하여 중복 방지 + tar_name = os.path.splitext(os.path.basename(tar_f))[0] + extract_to = os.path.join(args.temp_dir, tar_name) + extract_tar(tar_f, extract_to) + + search_root = args.temp_dir + + # 2. JSON(라벨) 및 MP4(원본) 페어 찾기 + json_files = glob.glob(os.path.join(search_root, "**/*.json"), recursive=True) + + print(f"Found {len(json_files)} labeling files. Processing...") + + for json_path in tqdm(json_files): + # 동일한 경로 명에서 .json만 .mp4로 변경하여 비디오 파일 탐색 + base_no_ext = os.path.splitext(json_path)[0] + video_path = base_no_ext + ".mp4" + + if not os.path.exists(video_path): + # AIHub 특성상 '라벨링데이터'와 '원천데이터' 폴더가 나뉜 경우 경로 보정 + # TL(Label) -> TS(Source) 매핑 처리 + video_path = video_path.replace("라벨링데이터", "원천데이터") + video_path = video_path.replace("TL", "TS") + + if not os.path.exists(video_path): + # 비디오를 찾을 수 없는 경우 건너뜀 + continue + + # 화자 ID(Speaker ID) 추출: 파일 경로 또는 JSON 메타데이터에서 확인 + with open(json_path, 'r', encoding='utf-8') as f: + try: + meta = json.load(f) + if isinstance(meta, list): + meta = meta[0] + speaker_id = meta.get('speaker_info', {}).get('speaker_ID', 'Unknown') + except: + speaker_id = "Unknown" + + # 실제 세션 처리 시작 + process_session(json_path, video_path, args, speaker_id) + + print("Preprocessing Complete.") + +if __name__ == "__main__": + main() diff --git a/unit2av/README.md b/unit2av/README.md new file mode 100644 index 0000000..3bb04cd --- /dev/null +++ b/unit2av/README.md @@ -0,0 +1,144 @@ +# 1. UNIT2A +## 2-1. Train + +### 1. 학습 데이터 경로 포함하는 manifest 생성 + +`make_manifest.py` 실행해서 원본 오디오, 해당하는 유닛코드 파일 묶은 `.txt` 파일 생성(매니페스트) + +```bash +python unit2av/make_manifest.py \ + --audio_root /home/2022113135/datasets/zeroth/train_data_01/003 \ + --unit_root /home/2022113135/jihye/preprocessed_mavhubert_unit2a \ + --output_file train_hubert_new.txt +``` + +### 2. config.json 수정 + +먼저 `config.json` 에 학습/검증 데이터 매니페스트 `.txt` 파일 경로 넣어줘야함 + +```json +{ + "input_training_file": "train_hubert_2000.txt", + "input_validation_file": "train_hubert_2000.txt", + /// ... /// +} +``` + +### 3. 학습 실행 + +```bash +cd gyucheol/NetfLips/av2av-main + +CUDA_VISIBLE_DEVICES=<GPU번호설정> python train_unit2a.py \ + --config unit2av/config_hubert.json \ + --checkpoint_path unit2av/checkpoint/seamless-unit-2000 \ + --validation_interval 20 \ + --training_steps 200000 \ + --checkpoint_interval 1000 +``` + +## 2-2. Inference + +```bash +python inference_unit2a.py + --checkpoint "path/to/your/checkpoint" + --config "path/to/your/config.json" + --input_file "path/to/your/input.pt" + --output_folder "path/to/output/folder" +``` + +# 3. UNIT2AV +## Inference +```bash +python inference_unit2av.py + --in-unit-path "path/to/your/units.txt" + --in-vid-path "path/to/original_video.mp4" + --in-bbox-path "path/to/modified.bbox.pkl" + --out-vid-path "path/to/output_video.mp4" + --tgt-lang "en" + --unit2av-path "path/to/unit2av_model.pt" +``` + +## Explanation of Arguments +- `--in-unit-path`: The text file with the number sequence (speech units). +- `--in-vid-path`: Your original input video (used for Speaker Encoder). +- `--in-bbox-path`: Your modified pickle file with the None frames. +- `--unit2av-path`: Path to the .pt checkpoint file you are using. +- `--tgt-lang`: The target language (e.g., en, ko, etc.). + +# 4. 원본 코드 수정한 부분 + +### `unit2av/model.py` + +1. **불필요한 루프 주석 처리 및 삭제** + + ```python + class CodeHiFiGANModel_spk(CodeHiFiGANModel): + def forward(self, **kwargs): + # ... 중략 ... + for k, feat in kwargs.items(): + if k in ["spkr", "code", "f0", "dur_prediction"]: + continue + + feat = self._upsample(feat, x.shape[-1]) + x = torch.cat([x, feat], dim=1) + + return super(CodeHiFiGANModel, self).forward(x), torch.repeat_interleave(kwargs["code"], dur_out.view(-1)) + ``` + + - **원본**: kwargs를 돌며 spkr, code, f0 등을 제외한 나머지 특징량을 모두 업샘플링해서 x에 이어붙이는(concatenate) 로직 + - 목적 : "나머지 처음 보는 데이터(feat)가 들어오면, 무조건 **오디오(x) 길이에 맞춰 늘려서(upsample) 모델 입력에 이어붙여(concat) 버리자!**" + - **수정본**: 이 부분 주석 처리 +2. **학습 시 Duration Loss 계산 로직 추가** + + ```python + if self.dur_predictor and self.training: + # ... 중략 ... + return super(CodeHiFiGANModel,self).forward(x), dur_losses + ``` + + `self.dur_predictor`가 있고 모델이 학습 상태(`self.training`)일 때 실행되는 분기 추가 + + - `process_duration` 함수를 사용해 실제 Duration 값을 추출 + - `self.dur_predictor`를 통해 예측된 값과 실제 값 사이의 **MSE Loss(dur_losses)**를 계산 + - 결과값으로 `super().forward(x)`와 함께 계산된 **`dur_losses`를 반환** +3. **반환값(Return Value)의 세분화** + + 상황에 따라 모델이 반환하는 두 번째 인자값이 달라지도록 변경되었습니다. + + - **학습 시**: `dur_losses` 반환 + - **추론 시 (`dur_prediction=True`)**: `dur_out`에 맞춰 확장된(repeat_interleave)  code 반환 (FaceRenderer가 사용) + - **기본/평가 시**: 확장되지 않은 원본  `kwargs["code"]`반환 (이전에는 무조건 확장을 시도했으나 이제 조건부로 바뀜) + +### Dur_prediction에 관하여… + +**1. 첫 번째 분기: `if self.dur_predictor and self.training:`** + +```python +if self.dur_predictor and self.training: +# ... 코드 ... +return super(CodeHiFiGANModel,self).forward(x), dur_losses + +``` + +- **언제**: 모델을 **학습시킬 때** +- **이유**: + - 오디오 생성(HiFi-GAN)과 길이 예측(Duration Predictor)을 **동시에** 학습하고 있음 + - 오디오 생성은 **forward(x)**로 수행하고, 얼마나 길게 말해야 할지 맞추는 연습은 `dur_losses`로 + - 그래서 학습 결과로 "오디오 신호"와 "길이 예측 오차(Loss)"를 모두 반환해야 학습이 됨 + +**2. 두 번째 분기: `if self.dur_predictor and kwargs.get("dur_prediction", False):`** + +```python +if self.dur_predictor and kwargs.get("dur_prediction",False): +# ... 코드 ... +return..., torch.repeat_interleave(...) + +``` + +- **언제**: 학습이 끝난 후 Inference 단계 +- **조건**: 코드를 호출할 때 `dur_prediction=True`라고 명시했을 때 (UTUT에서 번역된 후 Unit2A 수행할 때) +- **이유**: + - 입력받은 유닛 코드에는 시간 정보가 없음 ← *UTUT로 번역돼서 왔기 때문* + - 그래서 "이 유닛은 3프레임, 저 유닛은 5프레임..." 하고 모델이 직접 길이를 예측해서 유닛을 복사함 + - 그래야 이 늘려진 코드를 받아서 얼굴 생성기(FaceRenderer) 등이 영상의 길이를 맞출 수 있습니다. diff --git a/unit2av/__init__.py b/unit2av/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/unit2av/config.json b/unit2av/config.json new file mode 100644 index 0000000..889820d --- /dev/null +++ b/unit2av/config.json @@ -0,0 +1,53 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 16, + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "seed": 1234, + + "upsample_rates": [5,4,4,2,2], + "upsample_kernel_sizes": [11,8,8,4,4], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "num_embeddings": 1000, + "embedding_dim": 128, + "model_in_dim": 256, + + "multispkr": true, + "embedder_params": true, + "embedder_dim": 256, + + "segment_size": 8960, + "code_hop_size": 320, + "f0": false, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "dur_prediction_weight": 1.0, + "dur_predictor_params": { + "encoder_embed_dim": 128, + "var_pred_hidden_dim": 128, + "var_pred_kernel_size": 3, + "var_pred_dropout": 0.5 + }, + + "sampling_rate": 16000, + + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "env://" + } +} diff --git a/unit2av/config_hubert.json b/unit2av/config_hubert.json new file mode 100644 index 0000000..a35707e --- /dev/null +++ b/unit2av/config_hubert.json @@ -0,0 +1,79 @@ +{ + "input_training_file": "train_hubert_2000.txt", + "input_validation_file": "train_hubert_2000.txt", + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "seed": 1234, + "upsample_rates": [ + 5, + 4, + 4, + 2, + 2 + ], + "upsample_kernel_sizes": [ + 11, + 8, + 8, + 4, + 4 + ], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "num_embeddings": 1024, + "embedding_dim": 128, + "model_in_dim": 256, + "multispkr": true, + "embedder_params": true, + "embedder_dim": 256, + "segment_size": 8960, + "code_hop_size": 320, + "f0": false, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + "dur_prediction_weight": 1.0, + "dur_predictor_params": { + "encoder_embed_dim": 128, + "var_pred_hidden_dim": 128, + "var_pred_kernel_size": 3, + "var_pred_dropout": 0.5 + }, + "sampling_rate": 16000, + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + "num_workers": 4, + "dist_config": { + "dist_backend": "nccl", + "dist_url": "env://" + } +} \ No newline at end of file diff --git a/unit2av/config_seamless.json b/unit2av/config_seamless.json new file mode 100644 index 0000000..bf4843b --- /dev/null +++ b/unit2av/config_seamless.json @@ -0,0 +1,79 @@ +{ + "input_training_file": "train_seamless_2000.txt", + "input_validation_file": "train_seamless_2000.txt", + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "seed": 1234, + "upsample_rates": [ + 5, + 4, + 4, + 2, + 2 + ], + "upsample_kernel_sizes": [ + 11, + 8, + 8, + 4, + 4 + ], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "num_embeddings": 10000, + "embedding_dim": 128, + "model_in_dim": 256, + "multispkr": true, + "embedder_params": true, + "embedder_dim": 256, + "segment_size": 8960, + "code_hop_size": 320, + "f0": false, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + "dur_prediction_weight": 1.0, + "dur_predictor_params": { + "encoder_embed_dim": 128, + "var_pred_hidden_dim": 128, + "var_pred_kernel_size": 3, + "var_pred_dropout": 0.5 + }, + "sampling_rate": 16000, + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + "num_workers": 4, + "dist_config": { + "dist_backend": "nccl", + "dist_url": "env://" + } +} \ No newline at end of file diff --git a/unit2av/config_zeroth.json b/unit2av/config_zeroth.json new file mode 100644 index 0000000..1d994ef --- /dev/null +++ b/unit2av/config_zeroth.json @@ -0,0 +1,79 @@ +{ + "input_training_file": "data/zeroth_train.txt", + "input_validation_file": "data/zeroth_val.txt", + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "seed": 1234, + "upsample_rates": [ + 5, + 4, + 4, + 2, + 2 + ], + "upsample_kernel_sizes": [ + 11, + 8, + 8, + 4, + 4 + ], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "num_embeddings": 1024, + "embedding_dim": 128, + "model_in_dim": 256, + "multispkr": true, + "embedder_params": true, + "embedder_dim": 256, + "segment_size": 8960, + "code_hop_size": 320, + "f0": false, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + "dur_prediction_weight": 1.0, + "dur_predictor_params": { + "encoder_embed_dim": 128, + "var_pred_hidden_dim": 128, + "var_pred_kernel_size": 3, + "var_pred_dropout": 0.5 + }, + "sampling_rate": 16000, + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + "num_workers": 4, + "dist_config": { + "dist_backend": "nccl", + "dist_url": "env://" + } +} \ No newline at end of file diff --git a/unit2av/dataset.py b/unit2av/dataset.py new file mode 100644 index 0000000..b134e66 --- /dev/null +++ b/unit2av/dataset.py @@ -0,0 +1,473 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Adapted from https://github.com/jik876/hifi-gan + +import random +from pathlib import Path + +import amfm_decompy.basic_tools as basic +import amfm_decompy.pYAAPT as pYAAPT +import numpy as np +import soundfile as sf +import torch +import torch.utils.data +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn +from librosa.util import normalize + +MAX_WAV_VALUE = 32768.0 + +# [FIX] mel_spectrogram 함수에서 사용하는 전역변수를 함수 정의 전에 초기화 +# 기존에는 함수 뒤에 정의되어 있어 "mel_basis is not defined" 에러 발생 가능 +mel_basis = {} +hann_window = {} + + +def get_yaapt_f0(audio, rate=16000, interp=False): + frame_length = 20.0 + to_pad = int(frame_length / 1000 * rate) // 2 + + f0s = [] + for y in audio.astype(np.float64): + y_pad = np.pad(y.squeeze(), (to_pad, to_pad), "constant", constant_values=0) + signal = basic.SignalObj(y_pad, rate) + pitch = pYAAPT.yaapt(signal, **{'frame_length': frame_length, 'frame_space': 5.0, 'nccf_thresh1': 0.25, + 'tda_frame_length': 25.0}) + if interp: + f0s += [pitch.samp_interp[None, None, :]] + else: + f0s += [pitch.samp_values[None, None, :]] + + f0 = np.vstack(f0s) + return f0 + + +def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): + if torch.min(y) < -1.: + print('min value is ', torch.min(y)) + if torch.max(y) > 1.: + print('max value is ', torch.max(y)) + + global mel_basis, hann_window + if fmax not in mel_basis: + mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) + mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device) + hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[str(y.device)], + center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) + + spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9)) + + spec = torch.matmul(mel_basis[str(fmax)+'_'+str(y.device)], spec) + spec = spectral_normalize_torch(spec) + + return spec + + +def load_audio(full_path): + data, sampling_rate = sf.read(full_path, dtype='int16') + return data, sampling_rate + + +def dynamic_range_compression(x, C=1, clip_val=1e-5): + return np.log(np.clip(x, a_min=clip_val, a_max=None) * C) + + +def dynamic_range_decompression(x, C=1): + return np.exp(x) / C + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + output = dynamic_range_compression_torch(magnitudes) + return output + + +def spectral_de_normalize_torch(magnitudes): + output = dynamic_range_decompression_torch(magnitudes) + return output + + +def parse_manifest(manifest): + audio_files = [] + codes = [] + + with open(manifest) as info: + for line in info.readlines(): + if line[0] == '{': + sample = eval(line.strip()) + if 'unit_path' in sample: + # Store path directly + codes += [sample['unit_path']] + else: + if 'cpc_km100' in sample: + k = 'cpc_km100' + elif 'vqvae256' in sample: + k = 'vqvae256' + elif 'hubert' in sample: + k = 'hubert' + else: + k = 'codes' + + codes += [torch.LongTensor( + [int(x) for x in sample[k].split(' ')] + ).numpy()] + + audio_files += [Path(sample["audio"])] + else: + audio_files += [Path(line.strip())] + + return audio_files, codes + + +def get_dataset_filelist(h): + training_files, training_codes = parse_manifest(h.input_training_file) + validation_files, validation_codes = parse_manifest(h.input_validation_file) + + return (training_files, training_codes), (validation_files, validation_codes) + + +def parse_speaker(path, method): + if type(path) == str: + path = Path(path) + + if method == 'parent_name': + return path.parent.name + elif method == 'parent_parent_name': + return path.parent.parent.name + elif method == '_': + return path.name.split('_')[0] + elif method == 'single': + return 'A' + elif callable(method): + return method(path) + else: + raise NotImplementedError() + + +class CodeDataset(torch.utils.data.Dataset): + def __init__(self, training_files, segment_size, code_hop_size, n_fft, num_mels, + hop_size, win_size, sampling_rate, fmin, fmax, split=True, n_cache_reuse=1, + device=None, fmax_loss=None, f0=None, multispkr=False, pad=None, + f0_stats=None, f0_normalize=False, f0_feats=False, f0_median=False, + f0_interp=False, vqvae=False): + self.audio_files, self.codes = training_files + # self.codes can be a list of unit paths or list of codes + random.seed(1234) + self.segment_size = segment_size + self.code_hop_size = code_hop_size + self.sampling_rate = sampling_rate + self.split = split + self.n_fft = n_fft + self.num_mels = num_mels + self.hop_size = hop_size + self.win_size = win_size + self.fmin = fmin + self.fmax = fmax + self.fmax_loss = fmax_loss + self.cached_wav = None + self.n_cache_reuse = n_cache_reuse + self._cache_ref_count = 0 + self.device = device + self.vqvae = vqvae + self.f0 = f0 + self.f0_normalize = f0_normalize + self.f0_feats = f0_feats + self.f0_stats = None + self.f0_interp = f0_interp + self.f0_median = f0_median + if f0_stats: + self.f0_stats = torch.load(f0_stats) + self.multispkr = multispkr + self.pad = pad + if self.multispkr and isinstance(self.multispkr, str): # Only if multispkr is a method string + spkrs = [parse_speaker(f, self.multispkr) for f in self.audio_files] + spkrs = list(set(spkrs)) + spkrs.sort() + + self.id_to_spkr = spkrs + self.spkr_to_id = {k: v for v, k in enumerate(self.id_to_spkr)} + + def _sample_interval(self, seqs, seq_len=None): + N = max([v.shape[-1] for v in seqs]) + if seq_len is None: + seq_len = self.segment_size if self.segment_size > 0 else N + + hops = [N // v.shape[-1] for v in seqs] + lcm = np.lcm.reduce(hops) + + # Randomly pickup with the batch_max_steps length of the part + interval_start = 0 + interval_end = N // lcm - seq_len // lcm + + start_step = random.randint(interval_start, interval_end) + + new_seqs = [] + for i, v in enumerate(seqs): + start = start_step * (lcm // hops[i]) + end = (start_step + seq_len // lcm) * (lcm // hops[i]) + new_seqs += [v[..., start:end]] + + return new_seqs + + def __getitem__(self, index): + filename = self.audio_files[index] + code_data = self.codes[index] + + # Determine if code_data is a path (str) or codes (numpy array) + unit_path = None + loaded_spkr = None + + if isinstance(code_data, str) and code_data.endswith('.pt'): + unit_path = code_data + #pt_data = torch.load(unit_path) + pt_data = torch.load(unit_path, map_location='cpu') + + code = pt_data['code'].squeeze() + if 'spkr' in pt_data: + loaded_spkr = pt_data['spkr'] + else: + code = code_data + + if self._cache_ref_count == 0: + audio, sampling_rate = load_audio(filename) + if sampling_rate != self.sampling_rate: + # raise ValueError("{} SR doesn't match target {} SR".format( + # sampling_rate, self.sampling_rate)) + import resampy + audio = resampy.resample(audio, sampling_rate, self.sampling_rate) + + if self.pad: + padding = self.pad - (audio.shape[-1] % self.pad) + audio = np.pad(audio, (0, padding), "constant", constant_values=0) + audio = audio / MAX_WAV_VALUE + audio = normalize(audio) * 0.95 + self.cached_wav = audio + self._cache_ref_count = self.n_cache_reuse + else: + audio = self.cached_wav + self._cache_ref_count -= 1 + + # Trim audio ending + if self.vqvae: + code_length = audio.shape[0] // self.code_hop_size + else: + code_length = min(audio.shape[0] // self.code_hop_size, code.shape[0]) + code = code[:code_length] + audio = audio[:code_length * self.code_hop_size] + assert self.vqvae or audio.shape[0] // self.code_hop_size == code.shape[0], "Code audio mismatch" + + while audio.shape[0] < self.segment_size: + audio = np.hstack([audio, audio]) + if not self.vqvae: + code = np.hstack([code, code]) + + audio = torch.FloatTensor(audio) + audio = audio.unsqueeze(0) + + assert audio.size(1) >= self.segment_size, "Padding not supported!!" + if self.vqvae: + audio = self._sample_interval([audio])[0] + else: + if isinstance(code, torch.Tensor): + code = code.numpy() + # If code is int, expand it + if len(code.shape) == 0: + code = code[None] + audio, code = self._sample_interval([audio, code]) + + mel_loss = mel_spectrogram(audio, self.n_fft, self.num_mels, + self.sampling_rate, self.hop_size, self.win_size, self.fmin, self.fmax_loss, + center=False) + + if self.vqvae: + feats = { + "code": audio.view(1, -1).numpy() + } + else: + feats = {"code": code.squeeze()} + + if self.f0: + try: + f0 = get_yaapt_f0(audio.numpy(), rate=self.sampling_rate, interp=self.f0_interp) + except: + f0 = np.zeros((1, 1, audio.shape[-1] // 80)) + f0 = f0.astype(np.float32) + feats['f0'] = f0.squeeze(0) + + if self.multispkr: + if loaded_spkr is not None: + # Add dimension: [256] -> [1, 256] + feats['spkr'] = loaded_spkr.view(1, -1) + else: + feats['spkr'] = self._get_spkr(index) + + if self.f0_normalize: + spkr_id = self._get_spkr(index).item() + + if spkr_id not in self.f0_stats: + mean = self.f0_stats['f0_mean'] + std = self.f0_stats['f0_std'] + else: + mean = self.f0_stats[spkr_id]['f0_mean'] + std = self.f0_stats[spkr_id]['f0_std'] + ii = feats['f0'] != 0 + + if self.f0_median: + med = np.median(feats['f0'][ii]) + feats['f0'][~ii] = med + feats['f0'][~ii] = (feats['f0'][~ii] - mean) / std + + feats['f0'][ii] = (feats['f0'][ii] - mean) / std + + if self.f0_feats: + feats['f0_stats'] = torch.FloatTensor([mean, std]).view(-1).numpy() + + return feats, audio.squeeze(0), str(filename), mel_loss.squeeze() + + def _get_spkr(self, idx): + spkr_name = parse_speaker(self.audio_files[idx], self.multispkr) + spkr_id = torch.LongTensor([self.spkr_to_id[spkr_name]]).view(1).numpy() + return spkr_id + + def __len__(self): + return len(self.audio_files) + + +class F0Dataset(torch.utils.data.Dataset): + def __init__(self, training_files, segment_size, sampling_rate, + split=True, n_cache_reuse=1, device=None, multispkr=False, + pad=None, f0_stats=None, f0_normalize=False, f0_feats=False, + f0_median=False, f0_interp=False, vqvae=False): + self.audio_files, _ = training_files + random.seed(1234) + self.segment_size = segment_size + self.sampling_rate = sampling_rate + self.split = split + self.cached_wav = None + self.n_cache_reuse = n_cache_reuse + self._cache_ref_count = 0 + self.device = device + self.vqvae = vqvae + self.f0_normalize = f0_normalize + self.f0_feats = f0_feats + self.f0_stats = None + self.f0_interp = f0_interp + self.f0_median = f0_median + if f0_stats: + self.f0_stats = torch.load(f0_stats) + self.pad = pad + self.multispkr = multispkr + if self.multispkr: + spkrs = [parse_speaker(f, self.multispkr) for f in self.audio_files] + spkrs = list(set(spkrs)) + spkrs.sort() + + self.id_to_spkr = spkrs + self.spkr_to_id = {k: v for v, k in enumerate(self.id_to_spkr)} + + def _sample_interval(self, seqs, seq_len=None): + N = max([v.shape[-1] for v in seqs]) + if seq_len is None: + seq_len = self.segment_size if self.segment_size > 0 else N + + hops = [N // v.shape[-1] for v in seqs] + lcm = np.lcm.reduce(hops) + + # Randomly pickup with the batch_max_steps length of the part + interval_start = 0 + interval_end = N // lcm - seq_len // lcm + + start_step = random.randint(interval_start, interval_end) + + new_seqs = [] + for i, v in enumerate(seqs): + start = start_step * (lcm // hops[i]) + end = (start_step + seq_len // lcm) * (lcm // hops[i]) + new_seqs += [v[..., start:end]] + + return new_seqs + + def __getitem__(self, index): + filename = self.audio_files[index] + if self._cache_ref_count == 0: + audio, sampling_rate = load_audio(filename) + if self.pad: + padding = self.pad - (audio.shape[-1] % self.pad) + audio = np.pad(audio, (0, padding), "constant", constant_values=0) + audio = audio / MAX_WAV_VALUE + audio = normalize(audio) * 0.95 + self.cached_wav = audio + if sampling_rate != self.sampling_rate: + raise ValueError("{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate)) + self._cache_ref_count = self.n_cache_reuse + else: + audio = self.cached_wav + self._cache_ref_count -= 1 + + while audio.shape[0] < self.segment_size: + audio = np.hstack([audio, audio]) + + audio = torch.FloatTensor(audio) + audio = audio.unsqueeze(0) + + assert audio.size(1) >= self.segment_size, "Padding not supported!!" + audio = self._sample_interval([audio])[0] + + feats = {} + try: + f0 = get_yaapt_f0(audio.numpy(), rate=self.sampling_rate, interp=self.f0_interp) + except: + f0 = np.zeros((1, 1, audio.shape[-1] // 80)) + f0 = f0.astype(np.float32) + feats['f0'] = f0.squeeze(0) + + if self.multispkr: + feats['spkr'] = self._get_spkr(index) + + if self.f0_normalize: + spkr_id = self._get_spkr(index).item() + + if spkr_id not in self.f0_stats: + mean = self.f0_stats['f0_mean'] + std = self.f0_stats['f0_std'] + else: + mean = self.f0_stats[spkr_id]['f0_mean'] + std = self.f0_stats[spkr_id]['f0_std'] + ii = feats['f0'] != 0 + + if self.f0_median: + med = np.median(feats['f0'][ii]) + feats['f0'][~ii] = med + feats['f0'][~ii] = (feats['f0'][~ii] - mean) / std + + feats['f0'][ii] = (feats['f0'][ii] - mean) / std + + if self.f0_feats: + feats['f0_stats'] = torch.FloatTensor([mean, std]).view(-1).numpy() + + return feats, feats['f0'], str(filename) + + def _get_spkr(self, idx): + spkr_name = parse_speaker(self.audio_files[idx], self.multispkr) + spkr_id = torch.LongTensor([self.spkr_to_id[spkr_name]]).view(1).numpy() + return spkr_id + + def __len__(self): + return len(self.audio_files) \ No newline at end of file diff --git a/unit2av/discriminators.py b/unit2av/discriminators.py new file mode 100644 index 0000000..ecb0273 --- /dev/null +++ b/unit2av/discriminators.py @@ -0,0 +1,387 @@ +# adapted from https://github.com/jik876/hifi-gan + +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm + +from .modules.jukebox import Encoder, Decoder +from .utils import init_weights, get_padding, AttrDict +from .modules.vq import Bottleneck + +LRELU_SLOPE = 0.1 + + +class ResBlock1(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.h = h + self.convs1 = nn.ModuleList([weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2])))]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1)))]) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.h = h + self.convs = nn.ModuleList([weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), weight_norm( + Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1])))]) + self.convs.apply(init_weights) + + def forward(self, x): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Generator(torch.nn.Module): + def __init__(self, h): + super(Generator, self).__init__() + self.h = h + self.num_kernels = len(h.resblock_kernel_sizes) + self.num_upsamples = len(h.upsample_rates) + self.conv_pre = weight_norm( + Conv1d(getattr(h, "model_in_dim", 128), h.upsample_initial_channel, 7, 1, padding=3)) + resblock = ResBlock1 if h.resblock == '1' else ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)): + self.ups.append(weight_norm( + ConvTranspose1d(h.upsample_initial_channel // (2 ** i), h.upsample_initial_channel // (2 ** (i + 1)), k, + u, padding=(k - u) // 2))) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h.upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x): + x = self.conv_pre(x) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class CodeGenerator(Generator): + def __init__(self, h): + super().__init__(h) + self.dict = nn.Embedding(h.num_embeddings, h.embedding_dim) + self.f0 = h.get('f0', None) + self.multispkr = h.get('multispkr', None) + + if self.multispkr: + self.spkr = nn.Embedding(200, h.embedding_dim) + + self.encoder = None + self.vq = None + if h.get("lambda_commit", None): + assert self.f0, "Requires F0 set" + self.encoder = Encoder(**h.f0_encoder_params) + self.vq = Bottleneck(**h.f0_vq_params) + + self.code_encoder = None + self.code_vq = None + if h.get('lambda_commit_code', None): + self.code_encoder = Encoder(**h.code_encoder_params) + self.code_vq = Bottleneck(**h.code_vq_params) + self.dict = None + + self.quantizer = None + if h.get('f0_quantizer_path', None): + assert self.f0, "Requires F0 set" + self.quantizer = Quantizer(AttrDict(h.f0_quantizer)) + quantizer_state = torch.load(h.f0_quantizer_path, map_location='cpu') + self.quantizer.load_state_dict(quantizer_state['generator']) + self.quantizer.eval() + self.f0_dict = nn.Embedding(h.f0_quantizer['f0_vq_params']['l_bins'], h.embedding_dim) + + @staticmethod + def _upsample(signal, max_frames): + if signal.dim() == 3: + bsz, channels, cond_length = signal.size() + elif signal.dim() == 2: + signal = signal.unsqueeze(2) + bsz, channels, cond_length = signal.size() + else: + signal = signal.view(-1, 1, 1) + bsz, channels, cond_length = signal.size() + + signal = signal.unsqueeze(3).repeat(1, 1, 1, max_frames // cond_length) + + # pad zeros as needed (if signal's shape does not divide completely with max_frames) + reminder = (max_frames - signal.shape[2] * signal.shape[3]) // signal.shape[3] + if reminder > 0: + raise NotImplementedError('Padding condition signal - misalignment between condition features.') + + signal = signal.view(bsz, channels, max_frames) + return signal + + def forward(self, **kwargs): + code_commit_losses = None + code_metrics = None + if self.code_vq and kwargs['code'].dtype is torch.int64: + x = self.code_vq.level_blocks[0].k[kwargs['code']].transpose(1, 2) + elif self.code_vq: + code_h = self.code_encoder(kwargs['code']) + _, code_h_q, code_commit_losses, code_metrics = self.code_vq(code_h) + x = code_h_q[0] + else: + x = self.dict(kwargs['code']).transpose(1, 2) + + f0_commit_losses = None + f0_metrics = None + if self.vq: + f0_h = self.encoder(kwargs['f0']) + _, f0_h_q, f0_commit_losses, f0_metrics = self.vq(f0_h) + kwargs['f0'] = f0_h_q[0] + elif self.quantizer: + self.quantizer.eval() + assert not self.quantizer.training, "VQ is in training status!!!" + f0_h = self.quantizer.encoder(kwargs['f0']) + f0_h = [x.detach() for x in f0_h] + zs, _, _, _ = self.quantizer.vq(f0_h) + zs = [x.detach() for x in zs] + f0_h_q = self.f0_dict(zs[0].detach()).transpose(1, 2) + kwargs['f0'] = f0_h_q + + if self.f0: + if x.shape[-1] < kwargs['f0'].shape[-1]: + x = self._upsample(x, kwargs['f0'].shape[-1]) + else: + kwargs['f0'] = self._upsample(kwargs['f0'], x.shape[-1]) + x = torch.cat([x, kwargs['f0']], dim=1) + + if self.multispkr: + spkr = self.spkr(kwargs['spkr']).transpose(1, 2) + spkr = self._upsample(spkr, x.shape[-1]) + x = torch.cat([x, spkr], dim=1) + + for k, feat in kwargs.items(): + if k in ['spkr', 'code', 'f0']: + continue + + feat = self._upsample(feat, x.shape[-1]) + x = torch.cat([x, feat], dim=1) + + if self.vq or self.code_vq: + return super().forward(x), (code_commit_losses, f0_commit_losses), (code_metrics, f0_metrics) + else: + return super().forward(x) + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self): + super(MultiPeriodDiscriminator, self).__init__() + self.discriminators = nn.ModuleList( + [DiscriminatorP(2), DiscriminatorP(3), DiscriminatorP(5), DiscriminatorP(7), DiscriminatorP(11), ]) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [norm_f(Conv1d(1, 128, 15, 1, padding=7)), norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)), + norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)), + norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)), norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiScaleDiscriminator(torch.nn.Module): + def __init__(self): + super(MultiScaleDiscriminator, self).__init__() + self.discriminators = nn.ModuleList( + [DiscriminatorS(use_spectral_norm=True), DiscriminatorS(), DiscriminatorS(), ]) + self.meanpools = nn.ModuleList([AvgPool1d(4, 2, padding=2), AvgPool1d(4, 2, padding=2)]) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + if i != 0: + y = self.meanpools[i - 1](y) + y_hat = self.meanpools[i - 1](y_hat) + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class Quantizer(nn.Module): + def __init__(self, h): + super().__init__() + + self.encoder = Encoder(**h.f0_encoder_params) + self.vq = Bottleneck(**h.f0_vq_params) + self.decoder = Decoder(**h.f0_decoder_params) + + def forward(self, **kwargs): + f0_h = self.encoder(kwargs['f0']) + _, f0_h_q, f0_commit_losses, f0_metrics = self.vq(f0_h) + f0 = self.decoder(f0_h_q) + + return f0, f0_commit_losses, f0_metrics + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + r_loss = torch.mean((1 - dr) ** 2) + g_loss = torch.mean(dg ** 2) + loss += (r_loss + g_loss) + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + l = torch.mean((1 - dg) ** 2) + gen_losses.append(l) + loss += l + + return loss, gen_losses diff --git a/unit2av/environment.yml b/unit2av/environment.yml new file mode 100644 index 0000000..110a64c --- /dev/null +++ b/unit2av/environment.yml @@ -0,0 +1,9 @@ +name: unit2a +channels: + - conda-forge + - defaults +dependencies: + - python=3.8 + - pip + - ffmpeg + - ninja \ No newline at end of file diff --git a/unit2av/inference_unit2a.py b/unit2av/inference_unit2a.py new file mode 100644 index 0000000..9883bbe --- /dev/null +++ b/unit2av/inference_unit2a.py @@ -0,0 +1,108 @@ +import argparse +import json +import torch +import soundfile as sf +import re +import os +from model import CodeHiFiGANModel_spk +from utils import AttrDict + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--checkpoint', required=True, help='Path to the model checkpoint (e.g. g_00011000)') + parser.add_argument('--config', required=True, help='Path to config.json') + parser.add_argument('--input_file', required=True, help='Path to input .pt file containing code and optional spkr') + parser.add_argument('--output_folder', default='output/unit2a', help='Output wav folder') + parser.add_argument('--device', default='cuda', help='Device to use (cuda/cpu)') + + args = parser.parse_args() + + device = torch.device(args.device if torch.cuda.is_available() else 'cpu') + + # 1. Load Configuration + print(f"Loading config from {args.config}...") + with open(args.config) as f: + data = f.read() + json_config = json.loads(data) + h = AttrDict(json_config) + + # 2. Initialize Model + print("Initializing model...") + generator = CodeHiFiGANModel_spk(dict(h)).to(device) + + # 3. Load Checkpoint + print(f"Loading checkpoint from {args.checkpoint}...") + state_dict = torch.load(args.checkpoint, map_location=device) + + if 'generator' in state_dict: + generator.load_state_dict(state_dict['generator']) + else: + # Just in case the checkpoint structure is different + generator.load_state_dict(state_dict) + + generator.eval() + generator.remove_weight_norm() + + # 4. Load Input Data + print(f"Loading input units from {args.input_file}...") + # Expecting the .pt file format used in training (containing 'code' and optionally 'spkr') + data = torch.load(args.input_file, map_location='cpu') + + if isinstance(data, dict): + code = data.get('code') + spkr = data.get('spkr') + else: + # Fallback if it's just a code tensor + code = data + spkr = None + + if code is None: + raise ValueError("Could not find 'code' in the input file.") + + # Prepare input for model + # Model expects batch dimension + if code.dim() == 1: + code = code.unsqueeze(0) + + x = {'code': code.to(device)} + + if h.get('multispkr') and spkr is not None: + if spkr.dim() == 1: + spkr = spkr.unsqueeze(0) + x['spkr'] = spkr.to(device) + print("Using speaker embedding from input file.") + elif h.get('multispkr'): + print("Warning: Model expects speaker embedding but none provided in input file. This may cause errors.") + + # 5. Run Inference + print("Generating audio...") + with torch.no_grad(): + # returns (wav, dedup_code) + y_g_hat, _ = generator(**x) + + audio = y_g_hat.squeeze() + audio = audio.cpu().numpy() + + # 6. Save Output + os.makedirs(args.output_folder, exist_ok=True) + + input_base = os.path.splitext(os.path.basename(args.input_file))[0][:-13] + + # Extract step from checkpoint filename + ckpt_name = os.path.basename(args.checkpoint) + match = re.search(r'(\d+)', ckpt_name) + if match: + step = int(match.group(1)) + suffix = f"{step}step" + else: + suffix = "unknown_step" + + output_filename = f"{input_base}_{suffix}.wav" + output_path = os.path.join(args.output_folder, output_filename) + + print(f"Saving audio to {output_path}...") + sf.write(output_path, audio, h.sampling_rate) + print("Done!") + +if __name__ == '__main__': + main() diff --git a/unit2av/inference_unit2av.py b/unit2av/inference_unit2av.py new file mode 100644 index 0000000..d9f2e86 --- /dev/null +++ b/unit2av/inference_unit2av.py @@ -0,0 +1,84 @@ +# This code is from https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/generate_waveform_from_code.py + +import argparse +import os +import sys + +import json +import torch + +from fairseq import utils +from model import UnitAVRenderer +from model_speaker_encoder import SpeakerEncoder + +sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))) +from util import save_video, extract_audio_from_video + +def load_model(model_path, cfg_path, lang, use_cuda=False): + with open(cfg_path) as f: + vocoder_cfg = json.load(f) + vocoder = UnitAVRenderer(model_path, vocoder_cfg, lang) + if use_cuda: + vocoder = vocoder.cuda() + return vocoder + +def load_speaker_encoder_model(model_path, use_cuda=False): + speaker_encoder = SpeakerEncoder(model_path) + if use_cuda: + speaker_encoder = speaker_encoder.cuda() + return speaker_encoder + +def main(args): + use_cuda = torch.cuda.is_available() and not args.cpu + + cfg_path = os.path.join(os.path.dirname(__file__), "config.json") + vocoder = load_model(args.unit2av_path, cfg_path, args.tgt_lang, use_cuda=use_cuda) + speaker_encoder = load_speaker_encoder_model(os.path.join(os.path.dirname(__file__), "encoder.pt"), use_cuda=use_cuda) + + temp_audio_path = os.path.splitext(args.in_vid_path)[0]+".temp.wav" + bbox_path = os.path.splitext(args.in_vid_path)[0]+".bbox.pkl" + extract_audio_from_video(args.in_vid_path, temp_audio_path) + + with open(args.in_unit_path) as f: + unit = list(map(int, f.readline().strip().split())) + + sample = { + "code": torch.LongTensor(unit).view(1,-1), + "spkr": torch.from_numpy(speaker_encoder.get_embed(args.in_vid_path)).view(1,1,-1), + } + sample = utils.move_to_cuda(sample) if use_cuda else sample + + wav, video, full_video, bbox = vocoder(sample, args.in_vid_path, bbox_path, dur_prediction=True) + + save_video(wav, video, full_video, bbox, args.out_vid_path) + + os.remove(temp_audio_path) + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-unit-path", type=str, required=True, help="File path of unit input" + ) + parser.add_argument( + "--in-vid-path", type=str, required=True, help="File path of video input" + ) + parser.add_argument( + "--in-bbox-path", type=str, required=True, help="File path of bounding box" + ) + parser.add_argument( + "--out-vid-path", type=str, required=True, help="File path of video output" + ) + parser.add_argument( + "--tgt-lang", type=str, required=True, + choices=["en","es","fr","it","pt", "ko"], + help="target language" + ) + parser.add_argument( + "--unit2av-path", type=str, required=True, help="path to the Unit AV Renderer" + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + args = parser.parse_args() + main(args) + +if __name__ == "__main__": + cli_main() \ No newline at end of file diff --git a/unit2av/make_manifest.py b/unit2av/make_manifest.py new file mode 100644 index 0000000..1bc1520 --- /dev/null +++ b/unit2av/make_manifest.py @@ -0,0 +1,81 @@ +import os +import glob +import torch +import random + +''' +train_unit2a.py 기대하는 학습 데이터 형식 맞추는 스크립트 +train_unit2a.py는 매니페스트 파일(텍스트) 내에 오디오 경로와 코드(Unit) 시퀀스가 텍스트 형태로 구성되어야함 +-> 유닛 코드(.pt 파일) + 원본 오디오 경로 -> 텍스트 파일로 변환 +''' + +import argparse + +# Argument Parser 설정 +parser = argparse.ArgumentParser(description='Create manifest file for unit2av training') +parser.add_argument('--audio_root', type=str, required=True, help='Root directory of audio files') +parser.add_argument('--unit_root', type=str, required=True, help='Root directory of unit (.pt) files') +parser.add_argument('--output_file', type=str, default='train_hubert.txt', help='Output manifest file path') + +args = parser.parse_args() + +# 경로 설정 +audio_root = args.audio_root +unit_root = args.unit_root +output_file = args.output_file + +# 1. 유닛 파일(.pt) 검색 +# 유닛 파일이 "선별된" 데이터이므로, 유닛 파일을 기준으로 오디오를 매칭합니다. +unit_files = sorted(glob.glob(os.path.join(unit_root, '*.pt'))) +print(f"Total unit files found: {len(unit_files)}") + +# 2. 100개만 선별 +target_unit_files = unit_files + +lines = [] +for unit_path in target_unit_files: + # unit_path: .../113_003_0012.pt + fname = os.path.basename(unit_path)[:-16]+ ".pt" + # fname: 113_003_0012.pt + + # 오디오 파일명 추론: 113_003_0012.wav + wav_fname = fname.replace('.pt', '.wav') + + # 폴더 구조 추론: 113_003_0012 -> speaker: 113 + # 오디오 경로는 audio_root + speaker + wav_fname + parts = fname.split('_') + if len(parts) >= 1: + speaker_id = parts[0] + audio_path = os.path.join(audio_root, speaker_id, wav_fname) + + if os.path.exists(audio_path): + try: + # 3. .pt 파일 로드 및 코드 추출 + data = torch.load(unit_path) + # 사용자 데이터 키: 'code' -> 모델이 기대하는 키: 'codes' + code_tensor = data['code'] + + # 텐서를 공백으로 구분된 문자열로 변환 + code_list = code_tensor.squeeze().tolist() + if isinstance(code_list, int): code_list = [code_list] + code_str = ' '.join(map(str, code_list)) + + # 4. 딕셔너리 포맷으로 저장 + # unit_path를 저장하여 dataset.py에서 직접 .pt를 로드하도록 함 + entry = { + 'audio': audio_path, + 'unit_path': unit_path + } + lines.append(str(entry)) + except Exception as e: + print(f"Error reading {unit_path}: {e}") + else: + print(f"Audio file not found for unit: {audio_path}") + else: + print(f"Cannot parse speaker id from {fname}") + +# 5. 파일 저장 +with open(output_file, 'w') as f: + f.write('\n'.join(lines)) + +print(f"Saved {len(lines)} samples to {output_file}") \ No newline at end of file diff --git a/unit2av/model.py b/unit2av/model.py new file mode 100644 index 0000000..875f846 --- /dev/null +++ b/unit2av/model.py @@ -0,0 +1,374 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Dict +from fairseq.models.text_to_speech.codehifigan import CodeGenerator as CodeHiFiGANModel +from fairseq.models.text_to_speech.vocoder import CodeHiFiGANVocoder + +import torchvision +import pickle +import numpy as np +import cv2 + +def process_duration(code, code_feat): + ''' + 새로 추가한 부분 + from speech-resynthesis의 DurationCodeGenerator + + + ''' + uniq_code_count = [] + uniq_code_feat = [] + for i in range(code.size(0)): + _, count = torch.unique_consecutive(code[i, :], return_counts=True) + + if len(count) > 2: + # remove first and last code as segment sampling may cause incomplete segment length + uniq_code_count.append(count[1:-1]) + uniq_code_idx = count.cumsum(dim=0)[:-2] + else: + uniq_code_count.append(count) + uniq_code_idx = count.cumsum(dim=0) - 1 + uniq_code_feat.append(code_feat[i, uniq_code_idx, :].view(-1, code_feat.size(2))) + + uniq_code_count = torch.cat(uniq_code_count) + + # collate feat + max_len = max(feat.size(0) for feat in uniq_code_feat) + out = uniq_code_feat[0].new_zeros((len(uniq_code_feat), max_len, uniq_code_feat[0].size(1))) + mask = torch.arange(max_len).repeat(len(uniq_code_feat), 1) + for i, v in enumerate(uniq_code_feat): + out[i, : v.size(0)] = v + mask[i, :] = mask[i, :] < v.size(0) + + return out, mask.bool(), uniq_code_count.float() + +class UnitAVRenderer(CodeHiFiGANVocoder): + def __init__( + self, checkpoint_path: str, model_cfg: Dict[str, str], lang: str, fp16: bool = False + ) -> None: + super(CodeHiFiGANVocoder, self).__init__() + self.model = CodeHiFiGANModel_spk(model_cfg) + if torch.cuda.is_available(): + state_dict = torch.load(checkpoint_path) + else: + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + self.model.load_state_dict(state_dict["audio"][lang]) + self.model.eval() + + self.face_model = FaceRenderer(unit_num=model_cfg["num_embeddings"]) + face_state_dict = state_dict["video"] + + # unit_embed만 특별 처리 + current_embed = self.face_model.unit_embed.weight.data + checkpoint_embed = face_state_dict['unit_embed.weight'] + + # 기존 1000개는 체크포인트 값 사용 + current_embed[:1000] = checkpoint_embed + + # 새로운 24개는 기존 임베딩의 평균이나 랜덤으로 초기화 + # 옵션 1: 마지막 임베딩 복사 + current_embed[1000:] = checkpoint_embed[-1].unsqueeze(0).repeat(24, 1) + + # unit_embed.weight를 state_dict에서 제거하고 나머지 로드 + face_state_dict.pop('unit_embed.weight') + self.face_model.load_state_dict(face_state_dict, strict=False) + + self.face_model.eval() + + if fp16: + self.model.half() + self.face_model.half() + self.model.remove_weight_norm() + + units_per_second = 50 + frames_per_second = 25 + self.num_frames = 10 + self.code_frame_ratio = units_per_second // frames_per_second + self.num_units = self.num_frames * self.code_frame_ratio + + def get_crops(self, bbox_path): + bbs = pickle.load(open(bbox_path, 'rb')) + return np.array(bbs, dtype=object) + + def read_window(self, frames, crops): + window = [] + for img, crop in zip(frames, crops): + # modified : if bbox is None, write black image + if crop is None: + window.append(np.zeros((96, 96, 3), dtype=np.uint8)) + continue + x1, y1, x2, y2 = crop + img = img[max(int(y1), 0): int(y2), max(int(x1), 0):int(x2)] + if img.size == 0: + img = np.zeros((96, 96, 3), dtype=np.uint8) + else: + img = cv2.resize(img, (96, 96)) + window.append(img) + return window + + def prepare_window(self, window): + # 3 x T x H x W + x = np.asarray(window) / 255. + x = np.transpose(x, (3, 0, 1, 2)) + return x + + def forward(self, x: Dict[str, torch.Tensor], video_path: str, bbox_path: str, dur_prediction=False) -> torch.Tensor: + assert "code" in x + x["dur_prediction"] = dur_prediction + + if dur_prediction: + x["code"] = torch.unique_consecutive(x["code"]) + + # remove invalid code + mask = x["code"] >= 0 + x["code"] = x["code"][mask].unsqueeze(dim=0) + if "f0" in x: + f0_up_ratio = x["f0"].size(1) // x["code"].size(1) + mask = mask.unsqueeze(2).repeat(1, 1, f0_up_ratio).view(-1, x["f0"].size(1)) + x["f0"] = x["f0"][mask].unsqueeze(dim=0) + + gen_wav, dedup_code = self.model(**x) + gen_wav = gen_wav.detach().squeeze().cpu().numpy() + + tgt_len = len(dedup_code) // self.code_frame_ratio + remain = len(dedup_code) % self.num_units + if remain != 0: + repeat_num = self.num_units - remain + dedup_code = torch.cat([dedup_code, dedup_code[-1].repeat(repeat_num)]) + padded_tgt_len = len(dedup_code) // self.code_frame_ratio + + frames = torchvision.io.read_video(video_path, pts_unit="sec")[0] + len_frames = len(frames) + reverse_frames = frames.flip(0) + repeated_frames = torch.cat((reverse_frames[1:], frames[1:])) + while len(frames) < padded_tgt_len: + frames = torch.cat([frames, repeated_frames]) + frames = frames[:padded_tgt_len] + frames = frames.flip(-1) + + crops = self.get_crops(bbox_path) + assert len(crops) == len_frames + reverse_crops = crops[::-1] + repeated_crops = np.concatenate([reverse_crops[1:], crops[1:]]) + while len(crops) < padded_tgt_len: + crops = np.concatenate([crops, repeated_crops]) + crops = crops[:padded_tgt_len] + + frames_numpy = np.array(frames) + window = self.read_window(frames_numpy, crops) + wrong_window = window.copy() + + dedup_code_seq = dedup_code.view(-1, self.num_units) + + window = self.prepare_window(window) + window[:, :, window.shape[2] // 2:] = 0. + wrong_window = self.prepare_window(wrong_window) + windows = np.concatenate([window, wrong_window], axis=0) + windows = torch.FloatTensor(windows).to(dedup_code_seq.device) + windows = windows.transpose(1,0) + + gen_vid = self.face_model(dedup_code_seq, windows) + gen_vid = (gen_vid.detach().cpu().numpy().transpose(0,2,3,1)* 255.).astype(np.uint8) + + return gen_wav, gen_vid[:tgt_len], frames_numpy[:tgt_len], crops[:tgt_len] + + +class CodeHiFiGANModel_spk(CodeHiFiGANModel): + def forward(self, **kwargs): + x = self.dict(kwargs["code"]).transpose(1, 2) + + if self.dur_predictor and kwargs.get("dur_prediction", False): + assert x.size(0) == 1, "only support single sample" + log_dur_pred = self.dur_predictor(x.transpose(1, 2)) + dur_out = torch.clamp( + torch.round((torch.exp(log_dur_pred) - 1)).long(), min=1 + ) + # B x C x T + x = torch.repeat_interleave(x, dur_out.view(-1), dim=2) + + if self.f0: + if self.f0_quant_embed: + kwargs["f0"] = self.f0_quant_embed(kwargs["f0"].long()).transpose(1, 2) + else: + kwargs["f0"] = kwargs["f0"].unsqueeze(1) + + if x.shape[-1] < kwargs["f0"].shape[-1]: + x = self._upsample(x, kwargs["f0"].shape[-1]) + elif x.shape[-1] > kwargs["f0"].shape[-1]: + kwargs["f0"] = self._upsample(kwargs["f0"], x.shape[-1]) + x = torch.cat([x, kwargs["f0"]], dim=1) + + if self.multispkr: + assert ( + "spkr" in kwargs + ), 'require "spkr" input for multispeaker CodeHiFiGAN vocoder' + spkr = self.spkr(kwargs["spkr"]).transpose(1, 2) + spkr = self._upsample(spkr, x.shape[-1]) + x = torch.cat([x, spkr], dim=1) +# for k, feat in kwargs.items(): +# if k in ["spkr", "code", "f0", "dur_prediction"]: +# continue +# feat = self._upsample(feat, x.shape[-1]) +# x = torch.cat([x, feat], dim=1) + + dur_losses = None + if self.dur_predictor and self.training: + # Re-calculate unique code features for duration loss calculation + # This is duplicate work if we already did it above but CodeHiFiGANModel_spk + # structure doesn't easily allow passing it down. + # Assuming 'code' in kwargs is the repeated/aligned code suitable for audio gen. + + # We need to extract unique codes to train the predictor. + # (Re-using the logic from DurationCodeGenerator) + x_for_dur = self.dict(kwargs["code"]).transpose(1, 2) + uniq_code_feat, uniq_code_mask, dur = process_duration( + kwargs['code'], x_for_dur.transpose(1, 2)) + log_dur_pred = self.dur_predictor(uniq_code_feat) + log_dur_pred = log_dur_pred[uniq_code_mask] + log_dur = torch.log(dur + 1) + dur_losses = F.mse_loss(log_dur_pred, log_dur, reduction="mean") + + return super(CodeHiFiGANModel, self).forward(x), dur_losses + + if self.dur_predictor and kwargs.get("dur_prediction", False): + # Inference with duration prediction: Return expanded code for FaceRenderer + return super(CodeHiFiGANModel, self).forward(x), torch.repeat_interleave(kwargs["code"], dur_out.view(-1)) + + # Default / Evaluation without Duration Prediction: Return original code + return super(CodeHiFiGANModel, self).forward(x), kwargs["code"] + + +class FaceRenderer(nn.Module): + def __init__(self, unit_num): + super(FaceRenderer, self).__init__() + self.unit_num = unit_num + + self.face_encoder_blocks = nn.ModuleList([ + nn.Sequential(Conv2d(6, 16, kernel_size=7, stride=1, padding=3)), + + nn.Sequential(Conv2d(16, 32, kernel_size=3, stride=2, padding=1), + Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True)), + + nn.Sequential(Conv2d(32, 64, kernel_size=3, stride=2, padding=1), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True)), + + nn.Sequential(Conv2d(64, 128, kernel_size=3, stride=2, padding=1), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True)), + + nn.Sequential(Conv2d(128, 256, kernel_size=3, stride=2, padding=1), + Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True)), + + nn.Sequential(Conv2d(256, 512, kernel_size=3, stride=2, padding=1), + Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2d(512, 512, kernel_size=3, stride=1, padding=0), + Conv2d(512, 512, kernel_size=1, stride=1, padding=0)), ]) + + self.unit_embed = nn.Embedding(self.unit_num, 512) + self.unit2lip = nn.TransformerEncoderLayer(d_model=512, nhead=1, dim_feedforward=1024, dropout=0.1, activation='relu') + + self.face_decoder_blocks = nn.ModuleList([ + nn.Sequential(Conv2d(512, 512, kernel_size=1, stride=1, padding=0), ), + + nn.Sequential(Conv2dTranspose(1024, 512, kernel_size=3, stride=1, padding=0), + Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2dTranspose(1024, 512, kernel_size=3, stride=2, padding=1, output_padding=1), + Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2dTranspose(768, 384, kernel_size=3, stride=2, padding=1, output_padding=1), + Conv2d(384, 384, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(384, 384, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2dTranspose(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1), + Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2dTranspose(320, 128, kernel_size=3, stride=2, padding=1, output_padding=1), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True), ), + + nn.Sequential(Conv2dTranspose(160, 64, kernel_size=3, stride=2, padding=1, output_padding=1), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), + Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True), ), ]) + + self.output_block = nn.Sequential(Conv2d(80, 32, kernel_size=3, stride=1, padding=1), + nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0), + nn.Sigmoid()) + + def forward(self, audio_sequences, face_sequences): + audio_sequences = self.unit_embed(audio_sequences) # B,20,512 / T/10,20,512 + audio_sequences = F.interpolate(audio_sequences.permute(0, 2, 1), scale_factor=0.5, mode='linear') # B,512,10 / T/10,512,10 + audio_sequences = audio_sequences.permute(2, 0, 1) # 10,B,512 / 10,T/10,512 + audio_embedding = self.unit2lip(audio_sequences).permute(1,0,2) # B,10,512 + audio_embedding = audio_embedding.contiguous().view(-1, 512).unsqueeze(-1).unsqueeze(-1) + + feats = [] + x = face_sequences + for f in self.face_encoder_blocks: + x = f(x) + feats.append(x) + + x = audio_embedding + for f in self.face_decoder_blocks: + x = f(x) + try: + x = torch.cat((x, feats[-1]), dim=1) + except Exception as e: + print(x.size()) + print(feats[-1].size()) + raise e + + feats.pop() + + outputs = self.output_block(x) + return outputs + +class nonorm_Conv2d(nn.Module): + def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs): + super().__init__(*args, **kwargs) + self.conv_block = nn.Sequential( + nn.Conv2d(cin, cout, kernel_size, stride, padding), + ) + self.act = nn.LeakyReLU(0.01, inplace=True) + + def forward(self, x): + out = self.conv_block(x) + return self.act(out) + +class Conv2dTranspose(nn.Module): + def __init__(self, cin, cout, kernel_size, stride, padding, output_padding=0, *args, **kwargs): + super().__init__(*args, **kwargs) + self.conv_block = nn.Sequential( + nn.ConvTranspose2d(cin, cout, kernel_size, stride, padding, output_padding), + nn.BatchNorm2d(cout) + ) + self.act = nn.ReLU() + + def forward(self, x): + out = self.conv_block(x) + return self.act(out) + +class Conv2d(nn.Module): + def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs): + super().__init__(*args, **kwargs) + self.conv_block = nn.Sequential( + nn.Conv2d(cin, cout, kernel_size, stride, padding), + nn.BatchNorm2d(cout) + ) + self.act = nn.ReLU() + self.residual = residual + + def forward(self, x): + out = self.conv_block(x) + if self.residual: + out = out + x + return self.act(out) + \ No newline at end of file diff --git a/unit2av/model_speaker_encoder.py b/unit2av/model_speaker_encoder.py new file mode 100644 index 0000000..26c25a9 --- /dev/null +++ b/unit2av/model_speaker_encoder.py @@ -0,0 +1,318 @@ +""" +Modified from https://github.com/CorentinJ/Real-Time-Voice-Cloning +""" + +import torch +from torch import nn +from dataclasses import dataclass +from scipy.ndimage.morphology import binary_dilation +from pathlib import Path +from typing import Optional, Union +from warnings import warn +import numpy as np +import librosa +import struct + +try: + import webrtcvad +except: + warn("Unable to import 'webrtcvad'. This package enables noise removal and is recommended.") + webrtcvad=None + +@dataclass +class SpeakerEncoderConfig: + + ## Model parameters + model_hidden_size = 256 + model_embedding_size = 256 + model_num_layers = 3 + + ## Mel-filterbank + mel_window_length = 25 # In milliseconds + mel_window_step = 10 # In milliseconds + mel_n_channels = 40 + + ## Audio + sampling_rate = 16000 + # Number of spectrogram frames in a partial utterance + partials_n_frames = 160 # 1600 ms + # Number of spectrogram frames at inference + inference_n_frames = 80 # 800 ms + + ## Voice Activation Detection + # Window size of the VAD. Must be either 10, 20 or 30 milliseconds. + # This sets the granularity of the VAD. Should not need to be changed. + vad_window_length = 30 # In milliseconds + # Number of frames to average together when performing the moving average smoothing. + # The larger this value, the larger the VAD variations must be to not get smoothed out. + vad_moving_average_width = 8 + # Maximum number of consecutive silent frames a segment can have. + vad_max_silence_length = 6 + + ## Audio volume normalization + audio_norm_target_dBFS = -30 + + int16_max = (2 ** 15) - 1 + + +class SpeakerEncoder(nn.Module): + def __init__(self, checkpoint_path: str): + super().__init__() + + self.cfg = SpeakerEncoderConfig() + + # Network defition + self.lstm = nn.LSTM(input_size=self.cfg.mel_n_channels, + hidden_size=self.cfg.model_hidden_size, + num_layers=self.cfg.model_num_layers, + batch_first=True) + self.linear = nn.Linear(in_features=self.cfg.model_hidden_size, + out_features=self.cfg.model_embedding_size) + self.relu = torch.nn.ReLU() + + # Cosine similarity scaling (with fixed initial parameter values) + self.similarity_weight = nn.Parameter(torch.tensor([10.])) + self.similarity_bias = nn.Parameter(torch.tensor([-5.])) + + if torch.cuda.is_available(): + state_dict = torch.load(checkpoint_path) + else: + state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu")) + self.load_state_dict(state_dict["model_state"]) + self.eval() + + def forward(self, utterances, hidden_init=None): + """ + Computes the embeddings of a batch of utterance spectrograms. + + :param utterances: batch of mel-scale filterbanks of same duration as a tensor of shape + (batch_size, n_frames, n_channels) + :param hidden_init: initial hidden state of the LSTM as a tensor of shape (num_layers, + batch_size, hidden_size). Will default to a tensor of zeros if None. + :return: the embeddings as a tensor of shape (batch_size, embedding_size) + """ + # Pass the input through the LSTM layers and retrieve all outputs, the final hidden state + # and the final cell state. + out, (hidden, cell) = self.lstm(utterances, hidden_init) + + # We take only the hidden state of the last layer + embeds_raw = self.relu(self.linear(hidden[-1])) + + # L2-normalize it + embeds = embeds_raw / (torch.norm(embeds_raw, dim=1, keepdim=True) + 1e-5) + + return embeds + + + def preprocess_wav( + self, + fpath_or_wav: Union[str, Path, np.ndarray], + source_sr: Optional[int] = None, + normalize: Optional[bool] = True, + trim_silence: Optional[bool] = True): + """ + Applies the preprocessing operations used in training the Speaker Encoder to a waveform + either on disk or in memory. The waveform will be resampled to match the data hyperparameters. + + :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not + just .wav), either the waveform as a numpy array of floats. + :param source_sr: if passing an audio waveform, the sampling rate of the waveform before + preprocessing. After preprocessing, the waveform's sampling rate will match the data + hyperparameters. If passing a filepath, the sampling rate will be automatically detected and + this argument will be ignored. + """ + # Load the wav from disk if needed + if isinstance(fpath_or_wav, str) or isinstance(fpath_or_wav, Path): + wav, source_sr = librosa.load(str(fpath_or_wav), sr=None) + else: + wav = fpath_or_wav + + # Resample the wav if needed + if source_sr is not None and source_sr != self.cfg.sampling_rate: + wav = librosa.resample(wav, source_sr, self.cfg.sampling_rate) + + # Apply the preprocessing: normalize volume and shorten long silences + if normalize: + wav = self.normalize_volume(wav, self.cfg.audio_norm_target_dBFS, increase_only=True) + if webrtcvad and trim_silence: + wav = self.trim_long_silences(wav) + + return wav + + + def wav_to_mel_spectrogram(self, wav): + """ + Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform. + Note: this not a log-mel spectrogram. + """ + frames = librosa.feature.melspectrogram( + wav, + self.cfg.sampling_rate, + n_fft=int(self.cfg.sampling_rate * self.cfg.mel_window_length / 1000), + hop_length=int(self.cfg.sampling_rate * self.cfg.mel_window_step / 1000), + n_mels=self.cfg.mel_n_channels + ) + return frames.astype(np.float32).T + + + def trim_long_silences(self, wav): + """ + Ensures that segments without voice in the waveform remain no longer than a + threshold determined by the VAD parameters in params.py. + + :param wav: the raw waveform as a numpy array of floats + :return: the same waveform with silences trimmed away (length <= original wav length) + """ + # Compute the voice detection window size + samples_per_window = (self.cfg.vad_window_length * self.cfg.sampling_rate) // 1000 + + # Trim the end of the audio to have a multiple of the window size + wav = wav[:len(wav) - (len(wav) % samples_per_window)] + + # Convert the float waveform to 16-bit mono PCM + pcm_wave = struct.pack("%dh" % len(wav), *(np.round(wav * self.cfg.int16_max)).astype(np.int16)) + + # Perform voice activation detection + voice_flags = [] + vad = webrtcvad.Vad(mode=3) + for window_start in range(0, len(wav), samples_per_window): + window_end = window_start + samples_per_window + voice_flags.append(vad.is_speech(pcm_wave[window_start * 2:window_end * 2], + sample_rate=self.cfg.sampling_rate)) + voice_flags = np.array(voice_flags) + + # Smooth the voice detection with a moving average + def moving_average(array, width): + array_padded = np.concatenate((np.zeros((width - 1) // 2), array, np.zeros(width // 2))) + ret = np.cumsum(array_padded, dtype=float) + ret[width:] = ret[width:] - ret[:-width] + return ret[width - 1:] / width + + audio_mask = moving_average(voice_flags, self.cfg.vad_moving_average_width) + audio_mask = np.round(audio_mask).astype(np.bool) + + # Dilate the voiced regions + audio_mask = binary_dilation(audio_mask, np.ones(self.cfg.vad_max_silence_length + 1)) + audio_mask = np.repeat(audio_mask, samples_per_window) + + return wav[audio_mask == True] + + + def normalize_volume(self, wav, target_dBFS, increase_only=False, decrease_only=False): + if increase_only and decrease_only: + raise ValueError("Both increase only and decrease only are set") + dBFS_change = target_dBFS - 10 * np.log10(np.mean(wav ** 2)) + if (dBFS_change < 0 and increase_only) or (dBFS_change > 0 and decrease_only): + return wav + return wav * (10 ** (dBFS_change / 20)) + + + def embed_frames_batch(self, frames_batch): + """ + Computes embeddings for a batch of mel spectrogram. + + :param frames_batch: a batch mel of spectrogram as a numpy array of float32 of shape + (batch_size, n_frames, n_channels) + :return: the embeddings as a numpy array of float32 of shape (batch_size, model_embedding_size) + """ + frames = torch.from_numpy(frames_batch).to(next(self.parameters()).device) + embed = self.forward(frames).detach().cpu().numpy() + return embed + + + def compute_partial_slices(self, n_samples, partial_utterance_n_frames=None, + min_pad_coverage=0.75, overlap=0.5): + """ + Computes where to split an utterance waveform and its corresponding mel spectrogram to obtain + partial utterances of <partial_utterance_n_frames> each. Both the waveform and the mel + spectrogram slices are returned, so as to make each partial utterance waveform correspond to + its spectrogram. This function assumes that the mel spectrogram parameters used are those + defined in params_data.py. + + The returned ranges may be indexing further than the length of the waveform. It is + recommended that you pad the waveform with zeros up to wave_slices[-1].stop. + + :param n_samples: the number of samples in the waveform + :param partial_utterance_n_frames: the number of mel spectrogram frames in each partial + utterance + :param min_pad_coverage: when reaching the last partial utterance, it may or may not have + enough frames. If at least <min_pad_coverage> of <partial_utterance_n_frames> are present, + then the last partial utterance will be considered, as if we padded the audio. Otherwise, + it will be discarded, as if we trimmed the audio. If there aren't enough frames for 1 partial + utterance, this parameter is ignored so that the function always returns at least 1 slice. + :param overlap: by how much the partial utterance should overlap. If set to 0, the partial + utterances are entirely disjoint. + :return: the waveform slices and mel spectrogram slices as lists of array slices. Index + respectively the waveform and the mel spectrogram with these slices to obtain the partial + utterances. + """ + if partial_utterance_n_frames is None: + partial_utterance_n_frames = self.cfg.partials_n_frames + + assert 0 <= overlap < 1 + assert 0 < min_pad_coverage <= 1 + + samples_per_frame = int((self.cfg.sampling_rate * self.cfg.mel_window_step / 1000)) + n_frames = int(np.ceil((n_samples + 1) / samples_per_frame)) + frame_step = max(int(np.round(partial_utterance_n_frames * (1 - overlap))), 1) + + # Compute the slices + wav_slices, mel_slices = [], [] + steps = max(1, n_frames - partial_utterance_n_frames + frame_step + 1) + for i in range(0, steps, frame_step): + mel_range = np.array([i, i + partial_utterance_n_frames]) + wav_range = mel_range * samples_per_frame + mel_slices.append(slice(*mel_range)) + wav_slices.append(slice(*wav_range)) + + # Evaluate whether extra padding is warranted or not + last_wav_range = wav_slices[-1] + coverage = (n_samples - last_wav_range.start) / (last_wav_range.stop - last_wav_range.start) + if coverage < min_pad_coverage and len(mel_slices) > 1: + mel_slices = mel_slices[:-1] + wav_slices = wav_slices[:-1] + + return wav_slices, mel_slices + + + def embed_utterance(self, wav, **kwargs): + """ + Computes an embedding for a single utterance. + + # TODO: handle multiple wavs to benefit from batching on GPU + :param wav: a preprocessed (see audio.py) utterance waveform as a numpy array of float32 + :param using_partials: if True, then the utterance is split in partial utterances of + <partial_utterance_n_frames> frames and the utterance embedding is computed from their + normalized average. If False, the utterance is instead computed from feeding the entire + spectogram to the network. + :param return_partials: if True, the partial embeddings will also be returned along with the + wav slices that correspond to the partial embeddings. + :param kwargs: additional arguments to compute_partial_splits() + :return: the embedding as a numpy array of float32 of shape (model_embedding_size,). If + <return_partials> is True, the partial utterances as a numpy array of float32 of shape + (n_partials, model_embedding_size) and the wav partials as a list of slices will also be + returned. If <using_partials> is simultaneously set to False, both these values will be None + instead. + """ + + # Compute where to split the utterance into partials and pad if necessary + wave_slices, mel_slices = self.compute_partial_slices(len(wav), **kwargs) + max_wave_length = wave_slices[-1].stop + if max_wave_length >= len(wav): + wav = np.pad(wav, (0, max_wave_length - len(wav)), "constant") + + # Split the utterance into partials + frames = self.wav_to_mel_spectrogram(wav) + frames_batch = np.array([frames[s] for s in mel_slices]) + partial_embeds = self.embed_frames_batch(frames_batch) + + # Compute the utterance embedding from the partial embeddings + raw_embed = np.mean(partial_embeds, axis=0) + embed = raw_embed / np.linalg.norm(raw_embed, 2) + + return embed + + def get_embed(self, wav_path): + wav_preprocessed = self.preprocess_wav(wav_path) + embed = self.embed_utterance(wav_preprocessed) + return embed diff --git a/unit2av/modules/__init__.py b/unit2av/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/unit2av/modules/dist.py b/unit2av/modules/dist.py new file mode 100644 index 0000000..d2d4f2e --- /dev/null +++ b/unit2av/modules/dist.py @@ -0,0 +1,108 @@ +# Adapted from https://github.com/openai/jukebox + +from enum import Enum + +import torch.distributed as dist + + +class ReduceOp(Enum): + SUM = 0, + PRODUCT = 1, + MIN = 2, + MAX = 3 + + def ToDistOp(self): + return { + self.SUM: dist.ReduceOp.SUM, + self.PRODUCT: dist.ReduceOp.PRODUCT, + self.MIN: dist.ReduceOp.MIN, + self.MAX: dist.ReduceOp.MAX + }[self] + + +def is_available(): + return dist.is_initialized() + + +def get_rank(): + if is_available(): + return _get_rank() + else: + return 0 + + +def get_world_size(): + if is_available(): + return _get_world_size() + else: + return 1 + + +def barrier(): + if is_available(): + return _barrier() + # else: do nothing + + +def all_gather(tensor_list, tensor): + if is_available(): + return _all_gather(tensor_list, tensor) + else: + tensor_list[0] = tensor + + +def all_reduce(tensor, op=ReduceOp.SUM): + if is_available(): + return _all_reduce(tensor, op) + # else: do nothing + + +def reduce(tensor, dst, op=ReduceOp.SUM): + if is_available(): + return _reduce(tensor, dst, op) + # else: do nothing + + +def broadcast(tensor, src): + if is_available(): + return _broadcast(tensor, src) + # else: do nothing + + +def init_process_group(backend, init_method): + if is_available(): + return _init_process_group(backend, init_method) + # else: do nothing + + +def _get_rank(): + return dist.get_rank() + + +def _barrier(): + return dist.barrier() + + +def _get_world_size(): + return dist.get_world_size() + + +def _all_gather(tensor_list, tensor): + return dist.all_gather(tensor_list, tensor) + + +def _all_reduce(tensor, op): + return dist.all_reduce(tensor, op.ToDistOp()) + + +def _reduce(tensor, dst, op): + return dist.reduce(tensor, dst, op.ToDistOp()) + + +def _broadcast(tensor, src): + return dist.broadcast(tensor, src) + + +def _init_process_group(backend, init_method): + return dist.init_process_group(backend, init_method) + diff --git a/unit2av/modules/jukebox.py b/unit2av/modules/jukebox.py new file mode 100644 index 0000000..ada7beb --- /dev/null +++ b/unit2av/modules/jukebox.py @@ -0,0 +1,178 @@ +# Adapted from https://github.com/openai/jukebox + +import numpy as np +import torch.nn as nn +from .resnet import Resnet1D + + +def assert_shape(x, exp_shape): + assert x.shape == exp_shape, f"Expected {exp_shape} got {x.shape}" + + +class EncoderConvBlock(nn.Module): + def __init__(self, input_emb_width, output_emb_width, down_t, stride_t, width, depth, m_conv, + dilation_growth_rate=1, dilation_cycle=None, zero_out=False, res_scale=False): + super().__init__() + blocks = [] + if type(stride_t) is tuple or type(stride_t) is list: + start = True + for s_t, d_t in zip(stride_t, down_t): + if s_t % 2 == 0: + filter_t, pad_t = s_t * 2, s_t // 2 + else: + filter_t, pad_t = s_t * 2 + 1, s_t // 2 + 1 + if d_t > 0: + for i in range(d_t): + block = nn.Sequential( + nn.Conv1d(input_emb_width if i == 0 and start else width, width, filter_t, s_t, pad_t), + Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out, res_scale), ) + blocks.append(block) + start = False + block = nn.Conv1d(width, output_emb_width, 3, 1, 1) + blocks.append(block) + else: + filter_t, pad_t = stride_t * 2, stride_t // 2 + if down_t > 0: + for i in range(down_t): + block = nn.Sequential( + nn.Conv1d(input_emb_width if i == 0 else width, width, filter_t, stride_t, pad_t), + Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out, res_scale), ) + blocks.append(block) + block = nn.Conv1d(width, output_emb_width, 3, 1, 1) + blocks.append(block) + self.model = nn.Sequential(*blocks) + + def forward(self, x): + return self.model(x) + + +class DecoderConvBock(nn.Module): + def __init__(self, input_emb_width, output_emb_width, down_t, stride_t, width, depth, m_conv, + dilation_growth_rate=1, dilation_cycle=None, zero_out=False, res_scale=False, + reverse_decoder_dilation=False, checkpoint_res=False): + super().__init__() + blocks = [] + + if type(stride_t) is tuple or type(stride_t) is list: + block = nn.Conv1d(output_emb_width, width, 3, 1, 1) + blocks.append(block) + for k, (s_t, d_t) in enumerate(zip(stride_t, down_t)): + if d_t > 0: + if s_t % 2 == 0: + filter_t, pad_t = s_t * 2, s_t // 2 + else: + filter_t, pad_t = s_t * 2 + 1, s_t // 2 + 1 + end = k == len(stride_t) - 1 + for i in range(d_t): + block = nn.Sequential( + Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out=zero_out, + res_scale=res_scale, reverse_dilation=reverse_decoder_dilation, + checkpoint_res=checkpoint_res), + nn.ConvTranspose1d(width, input_emb_width if i == (d_t - 1) and end else width, filter_t, + s_t, pad_t)) + blocks.append(block) + else: + if down_t > 0: + filter_t, pad_t = stride_t * 2, stride_t // 2 + block = nn.Conv1d(output_emb_width, width, 3, 1, 1) + blocks.append(block) + for i in range(down_t): + block = nn.Sequential( + Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out=zero_out, + res_scale=res_scale, reverse_dilation=reverse_decoder_dilation, + checkpoint_res=checkpoint_res), + nn.ConvTranspose1d(width, input_emb_width if i == (down_t - 1) else width, filter_t, stride_t, + pad_t)) + blocks.append(block) + self.model = nn.Sequential(*blocks) + + def forward(self, x): + return self.model(x) + + +class Encoder(nn.Module): + def __init__(self, input_emb_width, output_emb_width, levels, downs_t, strides_t, **block_kwargs): + super().__init__() + self.input_emb_width = input_emb_width + self.output_emb_width = output_emb_width + self.levels = levels + self.downs_t = downs_t + self.strides_t = strides_t + + block_kwargs_copy = dict(**block_kwargs) + if 'reverse_decoder_dilation' in block_kwargs_copy: + del block_kwargs_copy['reverse_decoder_dilation'] + level_block = lambda level, down_t, stride_t: EncoderConvBlock( + input_emb_width if level == 0 else output_emb_width, output_emb_width, down_t, stride_t, + **block_kwargs_copy) + self.level_blocks = nn.ModuleList() + iterator = zip(list(range(self.levels)), downs_t, strides_t) + for level, down_t, stride_t in iterator: + self.level_blocks.append(level_block(level, down_t, stride_t)) + + def forward(self, x): + N, T = x.shape[0], x.shape[-1] + emb = self.input_emb_width + assert_shape(x, (N, emb, T)) + xs = [] + + # 64, 32, ... + iterator = zip(list(range(self.levels)), self.downs_t, self.strides_t) + for level, down_t, stride_t in iterator: + level_block = self.level_blocks[level] + x = level_block(x) + if type(stride_t) is tuple or type(stride_t) is list: + emb, T = self.output_emb_width, T // np.prod([s ** d for s, d in zip(stride_t, down_t)]) + else: + emb, T = self.output_emb_width, T // (stride_t ** down_t) + assert_shape(x, (N, emb, T)) + xs.append(x) + + return xs + + +class Decoder(nn.Module): + def __init__(self, input_emb_width, output_emb_width, levels, downs_t, strides_t, **block_kwargs): + super().__init__() + self.input_emb_width = input_emb_width + self.output_emb_width = output_emb_width + self.levels = levels + + self.downs_t = downs_t + + self.strides_t = strides_t + + level_block = lambda level, down_t, stride_t: DecoderConvBock(output_emb_width, output_emb_width, down_t, + stride_t, **block_kwargs) + self.level_blocks = nn.ModuleList() + iterator = zip(list(range(self.levels)), downs_t, strides_t) + for level, down_t, stride_t in iterator: + self.level_blocks.append(level_block(level, down_t, stride_t)) + + self.out = nn.Conv1d(output_emb_width, input_emb_width, 3, 1, 1) + + def forward(self, xs, all_levels=True): + if all_levels: + assert len(xs) == self.levels + else: + assert len(xs) == 1 + x = xs[-1] + N, T = x.shape[0], x.shape[-1] + emb = self.output_emb_width + assert_shape(x, (N, emb, T)) + + # 32, 64 ... + iterator = reversed(list(zip(list(range(self.levels)), self.downs_t, self.strides_t))) + for level, down_t, stride_t in iterator: + level_block = self.level_blocks[level] + x = level_block(x) + if type(stride_t) is tuple or type(stride_t) is list: + emb, T = self.output_emb_width, T * np.prod([s ** d for s, d in zip(stride_t, down_t)]) + else: + emb, T = self.output_emb_width, T * (stride_t ** down_t) + assert_shape(x, (N, emb, T)) + if level != 0 and all_levels: + x = x + xs[level - 1] + + x = self.out(x) + return x diff --git a/unit2av/modules/resnet.py b/unit2av/modules/resnet.py new file mode 100644 index 0000000..18253c3 --- /dev/null +++ b/unit2av/modules/resnet.py @@ -0,0 +1,82 @@ +# Adapted from https://github.com/openai/jukebox + +import math +import torch.nn as nn + +from . import dist + + +class ResConvBlock(nn.Module): + def __init__(self, n_in, n_state): + super().__init__() + self.model = nn.Sequential( + nn.ReLU(), + nn.Conv2d(n_in, n_state, 3, 1, 1), + nn.ReLU(), + nn.Conv2d(n_state, n_in, 1, 1, 0), + ) + + def forward(self, x): + return x + self.model(x) + + +class Resnet(nn.Module): + def __init__(self, n_in, n_depth, m_conv=1.0): + super().__init__() + self.model = nn.Sequential(*[ResConvBlock(n_in, int(m_conv * n_in)) for _ in range(n_depth)]) + + def forward(self, x): + return self.model(x) + + +class ResConv1DBlock(nn.Module): + def __init__(self, n_in, n_state, dilation=1, zero_out=False, res_scale=1.0): + super().__init__() + padding = dilation + self.model = nn.Sequential( + nn.ReLU(), + nn.Conv1d(n_in, n_state, 3, 1, padding, dilation), + nn.ReLU(), + nn.Conv1d(n_state, n_in, 1, 1, 0), + ) + if zero_out: + out = self.model[-1] + nn.init.zeros_(out.weight) + nn.init.zeros_(out.bias) + self.res_scale = res_scale + + def forward(self, x): + return x + self.res_scale * self.model(x) + + +class Resnet1D(nn.Module): + def __init__(self, n_in, n_depth, m_conv=1.0, dilation_growth_rate=1, dilation_cycle=None, zero_out=False, + res_scale=False, reverse_dilation=False, checkpoint_res=False): + super().__init__() + + def _get_depth(depth): + if dilation_cycle is None: + return depth + else: + return depth % dilation_cycle + + blocks = [ResConv1DBlock(n_in, int(m_conv * n_in), + dilation=dilation_growth_rate ** _get_depth(depth), + zero_out=zero_out, + res_scale=1.0 if not res_scale else 1.0 / math.sqrt(n_depth)) + for depth in range(n_depth)] + if reverse_dilation: + blocks = blocks[::-1] + self.checkpoint_res = checkpoint_res + if self.checkpoint_res == 1: + if dist.get_rank() == 0: + print("Checkpointing convs") + self.blocks = nn.ModuleList(blocks) + else: + self.model = nn.Sequential(*blocks) + + def forward(self, x): + if self.checkpoint_res == 1: + raise NotImplementedError("Checkpoint not implemented") + else: + return self.model(x) diff --git a/unit2av/modules/vq.py b/unit2av/modules/vq.py new file mode 100644 index 0000000..d3fff84 --- /dev/null +++ b/unit2av/modules/vq.py @@ -0,0 +1,249 @@ +# Adapted from https://github.com/openai/jukebox + +import numpy as np +import torch as t +import torch.nn as nn +import torch.nn.functional as F + +from . import dist + + +class BottleneckBlock(nn.Module): + def __init__(self, k_bins, emb_width, mu): + super().__init__() + self.k_bins = k_bins + self.emb_width = emb_width + self.mu = mu + self.reset_k() + self.threshold = 1.0 + + def reset_k(self): + self.init = False + self.k_sum = None + self.k_elem = None + self.register_buffer('k', t.zeros(self.k_bins, self.emb_width).cuda()) + + def _tile(self, x): + d, ew = x.shape + if d < self.k_bins: + n_repeats = (self.k_bins + d - 1) // d + std = 0.01 / np.sqrt(ew) + x = x.repeat(n_repeats, 1) + x = x + t.randn_like(x) * std + return x + + def init_k(self, x): + mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins + self.init = True + # init k_w using random vectors from x + y = self._tile(x) + _k_rand = y[t.randperm(y.shape[0])][:k_bins] + dist.broadcast(_k_rand, 0) + self.k = _k_rand + assert self.k.shape == (k_bins, emb_width) + self.k_sum = self.k + self.k_elem = t.ones(k_bins, device=self.k.device) + + def restore_k(self, num_tokens=None, threshold=1.0): + mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins + self.init = True + assert self.k.shape == (k_bins, emb_width) + self.k_sum = self.k.clone() + self.k_elem = t.ones(k_bins, device=self.k.device) + if num_tokens is not None: + expected_usage = num_tokens / k_bins + self.k_elem.data.mul_(expected_usage) + self.k_sum.data.mul_(expected_usage) + self.threshold = threshold + + def update_k(self, x, x_l): + mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins + with t.no_grad(): + # Calculate new centres + x_l_onehot = t.zeros(k_bins, x.shape[0], device=x.device) # k_bins, N * L + x_l_onehot.scatter_(0, x_l.view(1, x.shape[0]), 1) + + _k_sum = t.matmul(x_l_onehot, x) # k_bins, w + _k_elem = x_l_onehot.sum(dim=-1) # k_bins + y = self._tile(x) + _k_rand = y[t.randperm(y.shape[0])][:k_bins] + + dist.broadcast(_k_rand, 0) + dist.all_reduce(_k_sum) + dist.all_reduce(_k_elem) + + # Update centres + old_k = self.k + self.k_sum = mu * self.k_sum + (1. - mu) * _k_sum # w, k_bins + self.k_elem = mu * self.k_elem + (1. - mu) * _k_elem # k_bins + usage = (self.k_elem.view(k_bins, 1) >= self.threshold).float() + self.k = usage * (self.k_sum.view(k_bins, emb_width) / self.k_elem.view(k_bins, 1)) \ + + (1 - usage) * _k_rand + _k_prob = _k_elem / t.sum(_k_elem) # x_l_onehot.mean(dim=-1) # prob of each bin + entropy = -t.sum(_k_prob * t.log(_k_prob + 1e-8)) # entropy ie how diverse + used_curr = (_k_elem >= self.threshold).sum() + usage = t.sum(usage) + dk = t.norm(self.k - old_k) / np.sqrt(np.prod(old_k.shape)) + return dict(entropy=entropy, + used_curr=used_curr, + usage=usage, + dk=dk) + + def preprocess(self, x): + # NCT -> NTC -> [NT, C] + x = x.permute(0, 2, 1).contiguous() + x = x.view(-1, x.shape[-1]) # x_en = (N * L, w), k_j = (w, k_bins) + + if x.shape[-1] == self.emb_width: + prenorm = t.norm(x - t.mean(x)) / np.sqrt(np.prod(x.shape)) + elif x.shape[-1] == 2 * self.emb_width: + x1, x2 = x[..., :self.emb_width], x[..., self.emb_width:] + prenorm = (t.norm(x1 - t.mean(x1)) / np.sqrt(np.prod(x1.shape))) + ( + t.norm(x2 - t.mean(x2)) / np.sqrt(np.prod(x2.shape))) + + # Normalise + x = x1 + x2 + else: + assert False, f"Expected {x.shape[-1]} to be (1 or 2) * {self.emb_width}" + return x, prenorm + + def postprocess(self, x_l, x_d, x_shape): + # [NT, C] -> NTC -> NCT + N, T = x_shape + x_d = x_d.view(N, T, -1).permute(0, 2, 1).contiguous() + x_l = x_l.view(N, T) + return x_l, x_d + + def quantise(self, x): + # Calculate latent code x_l + k_w = self.k.t() + distance = t.sum(x ** 2, dim=-1, keepdim=True) - 2 * t.matmul(x, k_w) + t.sum(k_w ** 2, dim=0, + keepdim=True) # (N * L, b) + min_distance, x_l = t.min(distance, dim=-1) + fit = t.mean(min_distance) + return x_l, fit + + def dequantise(self, x_l): + x = F.embedding(x_l, self.k) + return x + + def encode(self, x): + N, width, T = x.shape + + # Preprocess. + x, prenorm = self.preprocess(x) + + # Quantise + x_l, fit = self.quantise(x) + + # Postprocess. + x_l = x_l.view(N, T) + return x_l + + def decode(self, x_l): + N, T = x_l.shape + width = self.emb_width + + # Dequantise + x_d = self.dequantise(x_l) + + # Postprocess + x_d = x_d.view(N, T, width).permute(0, 2, 1).contiguous() + return x_d + + def forward(self, x, update_k=True): + N, width, T = x.shape + + # Preprocess + x, prenorm = self.preprocess(x) + + # Init k if not inited + if update_k and not self.init: + self.init_k(x) + + # Quantise and dequantise through bottleneck + x_l, fit = self.quantise(x) + x_d = self.dequantise(x_l) + + # Update embeddings + if update_k and self.training: + update_metrics = self.update_k(x, x_l) + else: + update_metrics = {} + + # Loss + commit_loss = t.norm(x_d.detach() - x) ** 2 / np.prod(x.shape) + + # Passthrough + x_d = x + (x_d - x).detach() + + # Postprocess + x_l, x_d = self.postprocess(x_l, x_d, (N, T)) + return x_l, x_d, commit_loss, dict(fit=fit, + pn=prenorm, + **update_metrics) + + +class Bottleneck(nn.Module): + def __init__(self, l_bins, emb_width, mu, levels): + super().__init__() + self.levels = levels + level_block = lambda level: BottleneckBlock(l_bins, emb_width, mu) + self.level_blocks = nn.ModuleList() + for level in range(self.levels): + self.level_blocks.append(level_block(level)) + + def encode(self, xs): + zs = [level_block.encode(x) for (level_block, x) in zip(self.level_blocks, xs)] + return zs + + def decode(self, zs, start_level=0, end_level=None): + if end_level is None: + end_level = self.levels + xs_quantised = [level_block.decode(z) for (level_block, z) in zip(self.level_blocks[start_level:end_level], zs)] + return xs_quantised + + def forward(self, xs): + zs, xs_quantised, commit_losses, metrics = [], [], [], [] + for level in range(self.levels): + level_block = self.level_blocks[level] + x = xs[level] + z, x_quantised, commit_loss, metric = level_block(x, update_k=self.training) + zs.append(z) + if not self.training: + # Be extra paranoid and make sure the encoder weights can't + # change from straight-through estimator + x_quantised = x_quantised.detach() + xs_quantised.append(x_quantised) + commit_losses.append(commit_loss) + if self.training: + metrics.append(metric) + return zs, xs_quantised, commit_losses, metrics + + +class NoBottleneckBlock(nn.Module): + def restore_k(self): + pass + + +class NoBottleneck(nn.Module): + def __init__(self, levels): + super().__init__() + self.level_blocks = nn.ModuleList() + self.levels = levels + for level in range(levels): + self.level_blocks.append(NoBottleneckBlock()) + + def encode(self, xs): + return xs + + def decode(self, zs, start_level=0, end_level=None): + if end_level is None: + end_level = self.levels + return zs + + def forward(self, xs): + zero = t.zeros(()).cuda() + commit_losses = [zero for _ in range(self.levels)] + metrics = [dict(entropy=zero, usage=zero, used_curr=zero, pn=zero, dk=zero) for _ in range(self.levels)] + return xs, xs, commit_losses, metrics diff --git a/unit2av/requirements.txt b/unit2av/requirements.txt new file mode 100644 index 0000000..6b888de --- /dev/null +++ b/unit2av/requirements.txt @@ -0,0 +1,21 @@ +# --- Core Libraries (버전 고정 필수) --- +numpy<1.24 +scipy==1.10.0 +librosa==0.8.1 +resampy==0.4.3 +opencv-python==4.5.4.60 +tensorboard +tensorboardX + +# --- Audio & Video Processing --- +python-speech-features==0.6 +soundfile +av +ffmpeg-python +amfm_decompy +matplotlib +tqdm + +# --- System & Config --- +omegaconf==2.0.6 +hydra-core==1.0.7 \ No newline at end of file diff --git a/unit2av/train_unit2a.py b/unit2av/train_unit2a.py new file mode 100644 index 0000000..a8b6c07 --- /dev/null +++ b/unit2av/train_unit2a.py @@ -0,0 +1,334 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Adapted from https://github.com/jik876/hifi-gan + +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) +warnings.filterwarnings(action='ignore', message='.*kernel_size exceeds volume extent.*') + +import itertools +import os +import time +import argparse +import json +import torch +import torch.nn.functional as F +from torch.utils.tensorboard import SummaryWriter +from torch.utils.data import DistributedSampler, DataLoader +from torch.distributed import init_process_group +from torch.nn.parallel import DistributedDataParallel +from dataset import CodeDataset, mel_spectrogram, get_dataset_filelist +from discriminators import MultiPeriodDiscriminator, MultiScaleDiscriminator, feature_loss, generator_loss, discriminator_loss # models -> discriminators 로 변경됨 +from utils import plot_spectrogram, scan_checkpoint, load_checkpoint, save_checkpoint, build_env, AttrDict +from model import CodeHiFiGANModel_spk + +torch.backends.cudnn.benchmark = True + + +def train(rank, local_rank, a, h): + if h.num_gpus > 1: + init_process_group( + backend=h.dist_config['dist_backend'], + init_method=h.dist_config['dist_url'], + rank=rank, + world_size=h.num_gpus, + ) + + torch.cuda.manual_seed(h.seed) + device = torch.device('cuda:{:d}'.format(local_rank)) + + #generator = DurationCodeGenerator(h).to(device) + generator = CodeHiFiGANModel_spk(dict(h)).to(device) + mpd = MultiPeriodDiscriminator().to(device) + msd = MultiScaleDiscriminator().to(device) + + if rank == 0: + print(generator) + os.makedirs(a.checkpoint_path, exist_ok=True) + print("checkpoints directory : ", a.checkpoint_path) + + # [FIX] checkpoint_path가 존재하지 않을 경우를 대비해 변수 초기화 + cp_g = None + cp_do = None + if os.path.isdir(a.checkpoint_path): + cp_g = scan_checkpoint(a.checkpoint_path, 'g_') + cp_do = scan_checkpoint(a.checkpoint_path, 'do_') + + steps = 0 + # Best model tracking (based on validation mel_error, like HuggingFace's load_best_model_at_end) + best_val_error = float('inf') + + if cp_g is None or cp_do is None: + state_dict_do = None + last_epoch = -1 + else: + state_dict_g = load_checkpoint(cp_g, device) + state_dict_do = load_checkpoint(cp_do, device) + generator.load_state_dict(state_dict_g['generator']) + mpd.load_state_dict(state_dict_do['mpd']) + msd.load_state_dict(state_dict_do['msd']) + steps = state_dict_do['steps'] + 1 + last_epoch = state_dict_do['epoch'] + # Restore best_val_error if available (for continued training) + if 'best_val_error' in state_dict_do: + best_val_error = state_dict_do['best_val_error'] + + if h.num_gpus > 1: + generator = DistributedDataParallel( + generator, + device_ids=[local_rank], + find_unused_parameters=('f0_quantizer' in h), + ).to(device) + mpd = DistributedDataParallel(mpd, device_ids=[local_rank]).to(device) + msd = DistributedDataParallel(msd, device_ids=[local_rank]).to(device) + + optim_g = torch.optim.AdamW(generator.parameters(), h.learning_rate, betas=[h.adam_b1, h.adam_b2]) + optim_d = torch.optim.AdamW(itertools.chain(msd.parameters(), mpd.parameters()), h.learning_rate, + betas=[h.adam_b1, h.adam_b2]) + + if state_dict_do is not None: + optim_g.load_state_dict(state_dict_do['optim_g']) + optim_d.load_state_dict(state_dict_do['optim_d']) + + scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=h.lr_decay, last_epoch=last_epoch) + scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=h.lr_decay, last_epoch=last_epoch) + + training_filelist, validation_filelist = get_dataset_filelist(h) + + trainset = CodeDataset(training_filelist, h.segment_size, h.code_hop_size, h.n_fft, h.num_mels, h.hop_size, + h.win_size, h.sampling_rate, h.fmin, h.fmax, n_cache_reuse=0, fmax_loss=h.fmax_for_loss, + device=device, f0=h.get('f0', None), multispkr=h.get('multispkr', None), + f0_stats=h.get('f0_stats', None), + f0_normalize=h.get('f0_normalize', False), f0_feats=h.get('f0_feats', False), + f0_median=h.get('f0_median', False), f0_interp=h.get('f0_interp', False), + vqvae=h.get('code_vq_params', False)) + + train_sampler = DistributedSampler(trainset) if h.num_gpus > 1 else None + + train_loader = DataLoader(trainset, num_workers=0, shuffle=False, sampler=train_sampler, + batch_size=h.batch_size, pin_memory=True, drop_last=True) + + if rank == 0: + validset = CodeDataset(validation_filelist, h.segment_size, h.code_hop_size, h.n_fft, h.num_mels, h.hop_size, + h.win_size, h.sampling_rate, h.fmin, h.fmax, False, n_cache_reuse=0, + fmax_loss=h.fmax_for_loss, device=device, f0=h.get('f0', None), + multispkr=h.get('multispkr', None), + f0_stats=h.get('f0_stats', None), f0_normalize=h.get('f0_normalize', False), + f0_feats=h.get('f0_feats', False), f0_median=h.get('f0_median', False), + f0_interp=h.get('f0_interp', False), vqvae=h.get('code_vq_params', False)) + validation_loader = DataLoader(validset, num_workers=0, shuffle=False, sampler=None, + batch_size=h.batch_size, pin_memory=True, drop_last=True) + + sw = SummaryWriter(os.path.join(a.checkpoint_path, 'logs')) + + generator.train() + mpd.train() + msd.train() + for epoch in range(max(0, last_epoch), a.training_epochs): + if rank == 0: + start = time.time() + print("Epoch: {}".format(epoch + 1)) + + if h.num_gpus > 1: + train_sampler.set_epoch(epoch) + + for i, batch in enumerate(train_loader): + if rank == 0: + start_b = time.time() + x, y, _, y_mel = batch + y = torch.autograd.Variable(y.to(device, non_blocking=False)) + y_mel = torch.autograd.Variable(y_mel.to(device, non_blocking=False)) + y = y.unsqueeze(1) + x = {k: torch.autograd.Variable(v.to(device, non_blocking=False)) for k, v in x.items()} + + y_g_hat, dur_losses = generator(**x) + + assert y_g_hat.shape == y.shape, f"Mismatch in vocoder output shape - {y_g_hat.shape} != {y.shape}" + + y_g_hat_mel = mel_spectrogram(y_g_hat.squeeze(1), h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, + h.win_size, h.fmin, h.fmax_for_loss) + + optim_d.zero_grad() + + # MPD + y_df_hat_r, y_df_hat_g, _, _ = mpd(y, y_g_hat.detach()) + loss_disc_f, losses_disc_f_r, losses_disc_f_g = discriminator_loss(y_df_hat_r, y_df_hat_g) + + # MSD + y_ds_hat_r, y_ds_hat_g, _, _ = msd(y, y_g_hat.detach()) + loss_disc_s, losses_disc_s_r, losses_disc_s_g = discriminator_loss(y_ds_hat_r, y_ds_hat_g) + + loss_disc_all = loss_disc_s + loss_disc_f + + loss_disc_all.backward() + optim_d.step() + + # Generator + optim_g.zero_grad() + + # L1 Mel-Spectrogram Loss + loss_mel = F.l1_loss(y_mel, y_g_hat_mel) * 45 + + y_df_hat_r, y_df_hat_g, fmap_f_r, fmap_f_g = mpd(y, y_g_hat) + y_ds_hat_r, y_ds_hat_g, fmap_s_r, fmap_s_g = msd(y, y_g_hat) + loss_fm_f = feature_loss(fmap_f_r, fmap_f_g) + loss_fm_s = feature_loss(fmap_s_r, fmap_s_g) + loss_gen_f, losses_gen_f = generator_loss(y_df_hat_g) + loss_gen_s, losses_gen_s = generator_loss(y_ds_hat_g) + loss_gen_all = loss_gen_s + loss_gen_f + loss_fm_s + loss_fm_f + loss_mel + if h.get('dur_prediction_weight', None): + loss_gen_all += dur_losses * h.get('dur_prediction_weight', None) + + + loss_gen_all.backward() + optim_g.step() + + if rank == 0: + # STDOUT logging + if steps % a.stdout_interval == 0: + with torch.no_grad(): + mel_error = F.l1_loss(y_mel, y_g_hat_mel).item() + + print( + 'Steps : {:d}, Gen Loss Total : {:4.3f}, Mel-Spec. Error : {:4.3f}, s/b : {:4.3f}'.format(steps, + loss_gen_all, + mel_error, + time.time() - start_b)) + + # checkpointing + if steps % a.checkpoint_interval == 0 and steps != 0: + checkpoint_path = "{}/g_{:08d}".format(a.checkpoint_path, steps) + save_checkpoint(checkpoint_path, + {'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()}) + checkpoint_path = "{}/do_{:08d}".format(a.checkpoint_path, steps) + save_checkpoint(checkpoint_path, {'mpd': (mpd.module if h.num_gpus > 1 else mpd).state_dict(), + 'msd': (msd.module if h.num_gpus > 1 else msd).state_dict(), + 'optim_g': optim_g.state_dict(), 'optim_d': optim_d.state_dict(), + 'steps': steps, 'epoch': epoch, 'best_val_error': best_val_error}) + + # Tensorboard summary logging + if steps % a.summary_interval == 0: + sw.add_scalar("training/gen_loss_total", loss_gen_all, steps) + sw.add_scalar("training/mel_spec_error", mel_error, steps) + sw.add_scalar("training/gen_loss_f", loss_gen_f, steps) + sw.add_scalar("training/gen_loss_s", loss_gen_s, steps) + sw.add_scalar("training/fm_loss_f", loss_fm_f, steps) + sw.add_scalar("training/fm_loss_s", loss_fm_s, steps) + + # [FIX] VQ-VAE 관련 로깅 코드 삭제 - f0_commit_loss, f0_metrics, + # code_commit_loss, code_metrics 변수가 정의되지 않아 에러 발생 + # VQ-VAE 사용 시 generator 출력에서 해당 값들을 받아와야 함 + + # Validation + if steps % a.validation_interval == 0: # and steps != 0: + generator.eval() + torch.cuda.empty_cache() + val_err_tot = 0 + with torch.no_grad(): + for j, batch in enumerate(validation_loader): + x, y, _, y_mel = batch + x = {k: v.to(device, non_blocking=False) for k, v in x.items()} + + y_g_hat, dur_losses = generator(**x) + y_mel = torch.autograd.Variable(y_mel.to(device, non_blocking=False)) + y_g_hat_mel = mel_spectrogram(y_g_hat.squeeze(1), h.n_fft, h.num_mels, h.sampling_rate, + h.hop_size, h.win_size, h.fmin, h.fmax_for_loss) + val_err_tot += F.l1_loss(y_mel, y_g_hat_mel).item() + + if j <= 4: + if steps == 0: + sw.add_audio('gt/y_{}'.format(j), y[0], steps, h.sampling_rate) + sw.add_figure('gt/y_spec_{}'.format(j), plot_spectrogram(y_mel[0].cpu()), steps) + + sw.add_audio('generated/y_hat_{}'.format(j), y_g_hat[0], steps, h.sampling_rate) + y_hat_spec = mel_spectrogram(y_g_hat[:1].squeeze(1), h.n_fft, h.num_mels, + h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax) + sw.add_figure('generated/y_hat_spec_{}'.format(j), + plot_spectrogram(y_hat_spec[:1].squeeze(0).cpu().numpy()), steps) + + val_err = val_err_tot / (j + 1) + sw.add_scalar("validation/mel_spec_error", val_err, steps) + + # Save best model based on validation mel_error (like HuggingFace's load_best_model_at_end) + if val_err < best_val_error: + best_val_error = val_err + # Save generator + checkpoint_path = "{}/g_best".format(a.checkpoint_path) + save_checkpoint(checkpoint_path, + {'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()}) + # Save discriminator & optimizer state + checkpoint_path = "{}/do_best".format(a.checkpoint_path) + save_checkpoint(checkpoint_path, + {'mpd': (mpd.module if h.num_gpus > 1 else mpd).state_dict(), + 'msd': (msd.module if h.num_gpus > 1 else msd).state_dict(), + 'optim_g': optim_g.state_dict(), 'optim_d': optim_d.state_dict(), + 'steps': steps, 'epoch': epoch, 'best_val_error': best_val_error}) + print(f"Steps : {steps}, New Best Val Mel Error : {best_val_error:.4f} -> Saved best model.") + + generator.train() + + steps += 1 + if steps >= a.training_steps: + break + + scheduler_g.step() + scheduler_d.step() + + if rank == 0: + print('Time taken for epoch {} is {} sec\n'.format(epoch + 1, int(time.time() - start))) + + if rank == 0: + print('Finished training') + + +def main(): + print('Initializing Training Process..') + + parser = argparse.ArgumentParser() + + parser.add_argument('--group_name', default=None) + parser.add_argument('--checkpoint_path', default='cp_hifigan') + parser.add_argument('--config', default='') + parser.add_argument('--training_epochs', default=2000, type=int) + parser.add_argument('--training_steps', default=500000, type=int) + parser.add_argument('--stdout_interval', default=5, type=int) + parser.add_argument('--checkpoint_interval', default=50000, type=int) + parser.add_argument('--summary_interval', default=100, type=int) + parser.add_argument('--validation_interval', default=5000, type=int) + parser.add_argument('--fine_tuning', default=False, type=bool) + parser.add_argument('--local_rank', default=0, type=int) + parser.add_argument('--distributed-world-size', type=int) + parser.add_argument('--distributed-port', type=int) + + a = parser.parse_args() + + with open(a.config) as f: + data = f.read() + + json_config = json.loads(data) + h = AttrDict(json_config) + build_env(a.config, 'config.json', a.checkpoint_path) + + torch.manual_seed(h.seed) + if torch.cuda.is_available() and 'WORLD_SIZE' in os.environ: + torch.cuda.manual_seed(h.seed) + h.num_gpus = int(os.environ['WORLD_SIZE']) + h.batch_size = int(h.batch_size / h.num_gpus) + local_rank = a.local_rank + rank = a.local_rank + print('Batch size per GPU :', h.batch_size) + else: + # [FIX] 단일 GPU 환경에서 num_gpus 설정 누락 수정 + h.num_gpus = 1 + rank = 0 + local_rank = 0 + + train(rank, local_rank, a, h) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/unit2av/utils.py b/unit2av/utils.py new file mode 100644 index 0000000..a4262be --- /dev/null +++ b/unit2av/utils.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Adapted from https://github.com/jik876/hifi-gan + +import glob +import os +import shutil + +import matplotlib +import torch +from torch.nn.utils import weight_norm +matplotlib.use("Agg") +import matplotlib.pylab as plt + + +def plot_spectrogram(spectrogram): + fig, ax = plt.subplots(figsize=(10, 2)) + im = ax.imshow(spectrogram, aspect="auto", origin="lower", + interpolation='none') + plt.colorbar(im, ax=ax) + + fig.canvas.draw() + plt.close() + + return fig + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def apply_weight_norm(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + weight_norm(m) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + + +def load_checkpoint(filepath, device): + assert os.path.isfile(filepath) + print("Loading '{}'".format(filepath)) + checkpoint_dict = torch.load(filepath, map_location=device) + print("Complete.") + return checkpoint_dict + + +def save_checkpoint(filepath, obj): + print("Saving checkpoint to {}".format(filepath)) + torch.save(obj, filepath) + print("Complete.") + + +def scan_checkpoint(cp_dir, prefix): + pattern = os.path.join(cp_dir, prefix + '????????') + cp_list = glob.glob(pattern) + if len(cp_list) == 0: + return None + return sorted(cp_list)[-1] + + +def build_env(config, config_name, path): + t_path = os.path.join(path, config_name) + if config != t_path: + os.makedirs(path, exist_ok=True) + shutil.copyfile(config, os.path.join(path, config_name)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self \ No newline at end of file diff --git a/unit2unit/inference.py b/unit2unit/inference.py new file mode 100644 index 0000000..43ecf9a --- /dev/null +++ b/unit2unit/inference.py @@ -0,0 +1,99 @@ +import argparse +import numpy as np +import torch + +from fairseq import checkpoint_utils, utils +from fairseq_cli.generate import get_symbols_to_strip_from_output + +from unit2unit.task import UTUTPretrainingTask +from util import process_units, save_unit + +def load_model(model_path, src_lang, tgt_lang, use_cuda=False): + models, cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path]) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + for model in models: + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + task.source_language = src_lang + task.target_language = tgt_lang + + generator = task.build_generator( + models, cfg.generation + ) + + return task, generator + +def main(args): + use_cuda = torch.cuda.is_available() and not args.cpu + + task, generator = load_model(args.utut_path, args.src_lang, args.tgt_lang, use_cuda=use_cuda) + + with open(args.in_unit_path) as f: + unit = list(map(int, f.readline().strip().split())) + unit = task.source_dictionary.encode_line( + " ".join(map(lambda x: str(x), process_units(unit, reduce=True))), + add_if_not_exist=False, + append_eos=True, + ).long() + unit = torch.cat([ + unit.new([task.source_dictionary.bos()]), + unit, + unit.new([task.source_dictionary.index("[{}]".format(task.source_language))]) + ]) + + sample = {"net_input": { + "src_tokens": torch.LongTensor(unit).view(1,-1), + }} + sample = utils.move_to_cuda(sample) if use_cuda else sample + + pred = task.inference_step( + generator, + None, + sample, + )[0][0] + + pred_str = task.target_dictionary.string( + pred["tokens"].int().cpu(), + extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator) + ) + + save_unit(pred_str, args.out_unit_path) + +def cli_main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--in-unit-path", type=str, required=True, help="File path of source unit input" + ) + parser.add_argument( + "--out-unit-path", type=str, required=True, help="File path of target unit output" + ) + parser.add_argument( + "--utut-path", type=str, required=True, help="path to the UTUT pre-trained model" + ) + parser.add_argument( + "--src-lang", type=str, required=True, + choices=["en","es","fr","it","pt"], + help="source language" + ) + parser.add_argument( + "--tgt-lang", type=str, required=True, + choices=["en","es","fr","it","pt"], + help="target language" + ) + parser.add_argument("--cpu", action="store_true", help="run on CPU") + + args = parser.parse_args() + + main(args) + +if __name__ == "__main__": + cli_main() diff --git a/unit2unit/task.py b/unit2unit/task.py new file mode 100644 index 0000000..65b3d4d --- /dev/null +++ b/unit2unit/task.py @@ -0,0 +1,39 @@ +import logging +from fairseq.tasks import register_task +from fairseq.tasks.multilingual_denoising import MultilingualDenoisingConfig, MultilingualDenoisingTask + +logger = logging.getLogger(__name__) + +@register_task("utut_pretraining", dataclass=MultilingualDenoisingConfig) +class UTUTPretrainingTask(MultilingualDenoisingTask): + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + lang_list = self.cfg.langs.split(",") + + lang_token_ids = { + self.dictionary.index("[{}]".format(lang)) + for lang in lang_list + } + + if extra_gen_cls_kwargs is None: + extra_gen_cls_kwargs = {} + + extra_gen_cls_kwargs["symbols_to_strip_from_output"] = lang_token_ids + + extra_gen_cls_kwargs["eos"] = self.dictionary.index("[{}]".format(self.target_language)) + + extra_gen_cls_kwargs["tokens_to_suppress"] = [ + "[{}]".format(lang) for lang in lang_list if lang != self.target_language + ] + [self.dictionary[self.mask_idx]] + + return super().build_generator( + models, + args, + seq_gen_cls=seq_gen_cls, + extra_gen_cls_kwargs=extra_gen_cls_kwargs, + ) diff --git a/util.py b/util.py new file mode 100644 index 0000000..4a4669e --- /dev/null +++ b/util.py @@ -0,0 +1,79 @@ +import os +import soundfile as sf +import cv2 +import ffmpeg + +def process_units(units, reduce=False): + if not reduce: + return units + + out = [u for i, u in enumerate(units) if i == 0 or u != units[i - 1]] + return out + +def save_unit(unit, unit_path): + os.makedirs(os.path.dirname(unit_path), exist_ok=True) + with open(unit_path, "w") as f: + f.write(unit) + +def save_audio(audio, audio_path, sampling_rate=16000): + os.makedirs(os.path.dirname(audio_path), exist_ok=True) + sf.write( + audio_path, + audio, + sampling_rate, + ) + +def extract_audio_from_video(video_path, save_audio_path, sampling_rate=16000): + os.makedirs(os.path.dirname(save_audio_path), exist_ok=True) + ( + ffmpeg.input(video_path) + .output( + save_audio_path, + acodec="pcm_s16le", + ac=1, + ar=sampling_rate, + loglevel="panic", + ) + .run(overwrite_output=True) + ) + +def save_video(audio, video, full_video, bbox, save_video_path, sampling_rate=16000, fps=25, vcodec="libx264"): + os.makedirs(os.path.dirname(save_video_path), exist_ok=True) + temp_audio_path = os.path.splitext(save_video_path)[0]+".temp.wav" + temp_video_path = os.path.splitext(save_video_path)[0]+".temp.avi" + + save_audio(audio, temp_audio_path, sampling_rate) + + frame_h, frame_w = full_video.shape[1], full_video.shape[2] + out = cv2.VideoWriter(temp_video_path, cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) + + for p, f, c in zip(video, full_video, bbox): + #modified : if bbox is None, write original frame + if c is None: + out.write(f) + continue + x1, y1, x2, y2 = [max(int(_), 0) for _ in c] + if x2 - x1 > 0 and y2 - y1 > 0: + p = cv2.resize(p, (x2 - x1, y2 - y1)) + try: + f[y1:y2, x1:x2] = p + except: + height, width, c = f[y1:y2, x1:x2].shape + p = cv2.resize(p, (width, height)) + f[y1:y2, x1:x2] = p + out.write(f) + + out.release() + + ffmpeg.output( + ffmpeg.input(temp_video_path), + ffmpeg.input(temp_audio_path), + save_video_path, + vcodec="libx264", + acodec="aac", + loglevel="panic", + ).run(overwrite_output=True) + + os.remove(temp_audio_path) + os.remove(temp_video_path) +